ParallelGravity.cpp
/**
* @mainpage ChaNGa: Charm++ N-body Gravity
*
* For internal documentation of the operation of this code please see the
* following classes:
*
* Main: Overall control flow of the program.
*
* TreePiece: Structure that holds the particle data and tree
* structure in each object.
*
* Sorter: Organize domain decomposition.
*
* DataManager: Organize data across processors.
*
* Tree::GenericTreeNode: Interface for the tree data structure.
*
* GravityParticle: Per particle data structure.
*/
#include <stdint.h>
#include <iostream>
#include <unistd.h>
#include <sys/param.h>
#include <sys/stat.h>
// Debug floating point problems
// #include <fenv.h>
#include "BaseLB.h"
#include "CkLoopAPI.h"
#include "Sorter.h"
#include "ParallelGravity.h"
#include "DataManager.h"
#include "IntraNodeLBManager.h"
#include "TipsyFile.h"
#include "param.h"
#include "smooth.h"
#include "Sph.h"
#include "starform.h"
#include "feedback.h"
#include "SIDM.h"
#include "externalGravity.h"
#include "formatted_string.h"
#include "PETreeMerger.h"
#ifdef CUDA
// for default per-list parameters
#include "cuda_typedef.h"
#endif
extern char *optarg;
extern int optind, opterr, optopt;
extern const char * const Cha_CommitID;
using namespace std;
/// @brief Proxy for Charm Main Chare
CProxy_Main mainChare;
/// @brief verbosity level. Higher is more verbose.
int verbosity;
int bVDetails;
CProxy_TreePiece treeProxy; ///< Proxy for the TreePiece chare array
#ifdef REDUCTION_HELPER
CProxy_ReductionHelper reductionHelperProxy;
#endif
CProxy_LvArray lvProxy; ///< Proxy for the liveViz array
CProxy_LvArray smoothProxy; ///< Proxy for smooth reductions
CProxy_LvArray gravityProxy; ///< Proxy for gravity reductions
/// @brief Proxy for the gravity particle cache group.
CProxy_CkCacheManager<KeyType> cacheGravPart;
/// @brief Proxy for the smooth particle cache group.
CProxy_CkCacheManager<KeyType> cacheSmoothPart;
/// @brief Proxy for the tree node cache group.
CProxy_CkCacheManager<KeyType> cacheNode;
/// @brief Proxy for the DataManager
CProxy_DataManager dMProxy;
/// @brief Proxy for Managing IntraNode load balancing with ckloop.
CProxy_IntraNodeLBManager nodeLBMgrProxy;
/// @brief Proxy for the dumpframe image data (DumpFrameData).
CProxy_DumpFrameData dfDataProxy;
/// @brief Proxy for the PETreeMerger group.
CProxy_PETreeMerger peTreeMergerProxy;
/// @brief Use the cache (always on)
bool _cache;
/// @brief Disable the cache (always off)
int _nocache;
/// @brief Size of a Node Cache line, specified by how deep in the
/// tree it goes.
int _cacheLineDepth;
/// @brief The number of buckets to process in the local gravity walk
/// before yielding the processor.
unsigned int _yieldPeriod;
/// @brief The type of domain decomposition to use.
DomainsDec domainDecomposition;
double dExtraStore; ///< fraction of extra particle storage
double dMaxBalance; ///< Max piece imbalance for load balancing
double dFracLoadBalance; ///< Min fraction of particles active
/// for doing load balancing.
double dGlassDamper; // Damping inverse timescale for making glasses
int iGasModel; ///< For backward compatibility
int peanoKey;
/// @brief type of tree to use.
GenericTrees useTree;
/// @brief A potentially optimized proxy for the tree pieces. Its use
/// is deprecated.
CProxy_TreePiece streamingProxy;
/// @brief Number of pieces into which to divide the tree.
unsigned int numTreePieces;
/// @brief Number of particles per TreePiece. Used to determine the
/// number of TreePieces.
unsigned int particlesPerChare;
int nIOProcessor; ///< Number of pieces to be doing I/O at once
int _prefetch; ///< Prefetch nodes for the remote walk
int _numChunks; ///< number of chunks into which to
/// split the remote walk.
int _randChunks; ///< Randomize the chunks for the
/// remote walk.
unsigned int bucketSize; ///< Maximum number of particles in a bucket.
/// @brief Use Ckloop for node parallelization.
int bUseCkLoopPar;
//jetley
/// GPU related settings.
int localNodesPerReq;
int remoteNodesPerReq;
int remoteResumeNodesPerReq;
int localPartsPerReq;
int remotePartsPerReq;
int remoteResumePartsPerReq;
// multi-stepping particle transfer strategy
// switch threshold
double largePhaseThreshold;
cosmoType theta; ///< BH-like opening criterion
cosmoType thetaMono; ///< Criterion of excepting monopole
/// only cells.
/// @brief Boundary evaluation user event (for Projections tracing).
int boundaryEvaluationUE;
/// @brief Weight balancing during Oct decomposition user event (for Projections tracing).
int weightBalanceUE;
int networkProgressUE;
int nodeForceUE;
int partForceUE;
int tbFlushRequestsUE;
int prefetchDoneUE;
CkGroupID dataManagerID;
CkArrayID treePieceID;
#ifdef PUSH_GRAVITY
#include "ckmulticast.h"
CkGroupID ckMulticastGrpId;
#endif
CProxy_ProjectionsControl prjgrp;
/* The following is for backward compatibility and deprecated */
#define GASMODEL_UNSET -1
enum GasModel {
GASMODEL_ADIABATIC,
GASMODEL_ISOTHERMAL,
GASMODEL_COOLING,
GASMODEL_GLASS
};
int doDumpLB;
int lbDumpIteration;
int doSimulateLB;
/// Number of bins to use for the first iteration
/// of every Oct decomposition step
int numInitDecompBins;
/// Specifies the number of sub-bins a bin is split into
/// for Oct decomposition
int octRefineLevel;
void _Leader(void) {
puts("USAGE: ChaNGa [SETTINGS | FLAGS] [PARAM_FILE]");
puts("PARAM_FILE: Configuration file of a particular simulation, which");
puts(" includes desired settings and relevant input and");
puts(" output files. Settings specified in this file override");
puts(" the default settings.");
puts("SETTINGS");
puts("or FLAGS: Command line settings or flags for a simulation which");
puts(" will override any defaults and any settings or flags");
puts(" specified in the PARAM_FILE.");
}
void _Trailer(void) {
puts("(see the web page at\nhttps://github.com/N-BodyShop/changa/wiki\nfor more information)");
}
int killAt;
int cacheSize;
///
/// @brief Main routine to start simulation.
///
/// This routine parses the command line and file specified parameters,
/// and allocates the charm structures. The charm "readonly" variables
/// are writable in this method, and are broadcast globally once this
/// method exits. Note that the method finishes with an asynchronous
/// call to setupICs().
///
Main::Main(CkArgMsg* m) {
args = m;
_cache = true;
mainChare = thishandle;
bIsRestarting = 0;
bHaveAlpha = 0;
bChkFirst = 1;
dSimStartTime = CkWallTimer();
int threadNum = CkMyNodeSize();
// Floating point exceptions.
// feenableexcept(FE_OVERFLOW | FE_DIVBYZERO | FE_INVALID);
boundaryEvaluationUE = traceRegisterUserEvent("Evaluating Boudaries");
weightBalanceUE = traceRegisterUserEvent("Weight Balancer");
networkProgressUE = traceRegisterUserEvent("CmiNetworkProgress");
nodeForceUE = traceRegisterUserEvent("Node interaction");
partForceUE = traceRegisterUserEvent("Particle interaction");
#ifdef HAPI_TRACE
traceRegisterUserEvent("Tree Serialization", CUDA_SER_TREE);
traceRegisterUserEvent("List Serialization", CUDA_SER_LIST);
traceRegisterUserEvent("Local Node", CUDA_LOCAL_NODE_KERNEL);
traceRegisterUserEvent("Remote Node", CUDA_REMOTE_NODE_KERNEL);
traceRegisterUserEvent("Remote Resume Node", CUDA_REMOTE_RESUME_NODE_KERNEL);
traceRegisterUserEvent("Local Particle", CUDA_LOCAL_PART_KERNEL);
traceRegisterUserEvent("Remote Particle", CUDA_REMOTE_PART_KERNEL);
traceRegisterUserEvent("Remote Resume Particle", CUDA_REMOTE_RESUME_PART_KERNEL);
#endif
tbFlushRequestsUE = traceRegisterUserEvent("TreeBuild::buildOctTree::flushRequests");
prefetchDoneUE = traceRegisterUserEvent("PrefetchDone");
prmInitialize(&prm,_Leader,_Trailer);
csmInitialize(¶m.csm);
param.dDelta = 0.0;
prmAddParam(prm, "dDelta", paramDouble, ¶m.dDelta,
sizeof(double),"dt", "Base Timestep for integration");
param.iMaxRung = MAXRUNG;
prmAddParam(prm, "iMaxRung", paramInt, ¶m.iMaxRung,
sizeof(int),"mrung", "Maximum timestep rung (IGNORED)");
param.nSteps = 1;
prmAddParam(prm, "nSteps", paramInt, ¶m.nSteps,
sizeof(int),"n", "Number of Timesteps");
param.iStartStep = 0;
prmAddParam(prm, "iStartStep", paramInt, ¶m.iStartStep,
sizeof(int),"nstart", "Initial step numbering");
param.iWallRunTime = 0;
prmAddParam(prm,"iWallRunTime",paramInt,¶m.iWallRunTime,
sizeof(int),"wall",
"<Maximum Wallclock time (in minutes) to run> = 0 = infinite");
killAt = 0;
prmAddParam(prm, "killAt", paramInt, &killAt,
sizeof(int),"killat", "Stop the simulation after this step");
param.bEpsAccStep = 1;
prmAddParam(prm, "bEpsAccStep", paramBool, ¶m.bEpsAccStep,
sizeof(int),"epsacc", "Use sqrt(eps/a) timestepping");
param.bGravStep = 0;
prmAddParam(prm, "bGravStep", paramBool, ¶m.bGravStep,
sizeof(int),"gravstep",
"Use gravity interaction timestepping");
param.dEta = 0.03;
prmAddParam(prm, "dEta", paramDouble, ¶m.dEta,
sizeof(double),"eta", "Time integration accuracy");
param.bCannonical = 1;
prmAddParam(prm, "bCannonical", paramBool, ¶m.bCannonical,
sizeof(int),"can", "Cannonical Comoving eqns (IGNORED)");
param.bKDK = 1;
prmAddParam(prm, "bKDK", paramBool, ¶m.bKDK,
sizeof(int),"kdk", "KDK timestepping (IGNORED)");
#ifdef DTADJUST
param.bDtAdjust = 1;
#else
param.bDtAdjust = 0;
#endif
prmAddParam(prm, "bDtAdjust", paramBool, ¶m.bDtAdjust,
sizeof(int),"dtadj", "Emergency adjust of timesteps");
param.bBenchmark = 0;
prmAddParam(prm, "bBenchmark", paramBool, ¶m.bBenchmark,
sizeof(int),"bench", "Benchmark only; no output or checkpoints");
param.dGlassDamper = 0.0;
prmAddParam(prm,"dGlassDamper",paramDouble,¶m.dGlassDamper,
sizeof(double), "dGlassDamper",
"<Damping force inverse timescale> = 0.0");
//
// Output flags
//
param.iOutInterval = 10;
prmAddParam(prm, "iOutInterval", paramInt, ¶m.iOutInterval,
sizeof(int),"oi", "Output Interval");
param.iLogInterval = 1;
prmAddParam(prm, "iLogInterval", paramInt, ¶m.iLogInterval,
sizeof(int),"ol", "Log Interval");
param.iCheckInterval = 10;
prmAddParam(prm, "iCheckInterval", paramInt, ¶m.iCheckInterval,
sizeof(int),"oc", "Checkpoint Interval");
param.iOrbitOutInterval = 1;
prmAddParam(prm,"iOrbitOutInterval", paramInt,
¶m.iOrbitOutInterval,sizeof(int), "ooi",
"<number of timsteps between orbit outputs> = 1");
param.iBinaryOut = 0;
prmAddParam(prm, "iBinaryOutput", paramInt, ¶m.iBinaryOut,
sizeof(int), "binout",
"<array outputs 0 ascii, 1 float, 2 double, 3 FLOAT(internal)> = 0");
param.bDoDensity = 1;
prmAddParam(prm, "bDoDensity", paramBool, ¶m.bDoDensity,
sizeof(int),"den", "Enable Density outputs");
param.bDoIOrderOutput = 0;
prmAddParam(prm,"bDoIOrderOutput",paramBool,¶m.bDoIOrderOutput,
sizeof(int), "iordout","enable/disable iOrder outputs = -iordout");
#ifdef SPLITGAS
param.bDoIOrderOutput = 1; //This becomes very important if particles can split.
#endif
param.bDoSoftOutput = 0;
prmAddParam(prm,"bDoSoftOutput",paramBool,¶m.bDoSoftOutput,
sizeof(int),
"softout","enable/disable soft outputs = -softout");
param.bDohOutput = 0;
prmAddParam(prm,"bDohOutput",paramBool,¶m.bDohOutput,sizeof(int),
"hout","enable/disable h outputs = -hout");
param.bDoCSound = 0;
prmAddParam(prm,"bDoCSound",paramBool,¶m.bDoCSound,sizeof(int),
"csound","enable/disable sound speed outputs = -csound");
param.bDoStellarLW = 1; /*CC */
prmAddParam(prm,"bDoStellarLW",paramBool,¶m.bDoStellarLW,sizeof(int),
"LWout","enable/disable Lyman Werner outputs = -LWout");
param.nSmooth = 32;
prmAddParam(prm, "nSmooth", paramInt, ¶m.nSmooth,
sizeof(int),"s", "Number of neighbors for smooth");
param.bDoGravity = 1;
prmAddParam(prm, "bDoGravity", paramBool, ¶m.bDoGravity,
sizeof(int),"g", "Enable Gravity");
param.dSoft = 0.0;
prmAddParam(prm, "dSoft", paramDouble, ¶m.dSoft,
sizeof(double),"e", "Gravitational softening");
param.dSoftMax = 0.0;
prmAddParam(prm,"dSoftMax",paramDouble, ¶m.dSoftMax,
sizeof(double),"eMax", "maximum comoving gravitational softening length (abs or multiplier)");
param.bPhysicalSoft = 0;
prmAddParam(prm,"bPhysicalSoft", paramBool, ¶m.bPhysicalSoft,
sizeof(int),"PhysSoft", "Physical gravitational softening length");
param.bSoftMaxMul = 1;
prmAddParam(prm,"bSoftMaxMul", paramBool, ¶m.bSoftMaxMul,
sizeof(int),"SMM", "Use maximum comoving gravitational softening length as a multiplier +SMM");
param.dTheta = 0.7;
prmAddParam(prm, "dTheta", paramDouble, ¶m.dTheta,
sizeof(double), "theta", "Opening angle");
param.dTheta2 = 0.7;
prmAddParam(prm, "dTheta2", paramDouble, ¶m.dTheta2,
sizeof(double),"theta2",
"Opening angle after switchTheta");
param.daSwitchTheta = 1./3.;
prmAddParam(prm,"daSwitchTheta",paramDouble,¶m.daSwitchTheta,
sizeof(double),"aSwitchTheta",
"<a to switch theta at> = 1./3.");
#ifdef HEXADECAPOLE
param.iOrder = 4;
#else
param.iOrder = 2;
#endif
prmAddParam(prm, "iOrder", paramInt, ¶m.iOrder,
sizeof(int), "or", "Multipole expansion order(IGNORED)");
//
// Cosmology parameters
//
param.bPeriodic = 0;
prmAddParam(prm, "bPeriodic", paramBool, ¶m.bPeriodic,
sizeof(int),"per", "Periodic Boundaries");
param.nReplicas = 0;
prmAddParam(prm, "nReplicas", paramInt, ¶m.nReplicas,
sizeof(int),"nrep", "Number of periodic replicas");
param.fPeriod = 1.0;
prmAddParam(prm, "dPeriod", paramDouble, ¶m.fPeriod,
sizeof(double),"L", "Periodic size");
param.vPeriod.x = 1.0;
prmAddParam(prm,"dxPeriod",paramDouble,¶m.vPeriod.x,
sizeof(double),"Lx",
"<periodic box length in x-dimension> = 1.0");
param.vPeriod.y = 1.0;
prmAddParam(prm, "dyPeriod",paramDouble,¶m.vPeriod.y,
sizeof(double),"Ly",
"<periodic box length in y-dimension> = 1.0");
param.vPeriod.z = 1.0;
prmAddParam(prm,"dzPeriod",paramDouble,¶m.vPeriod.z,
sizeof(double),"Lz",
"<periodic box length in z-dimension> = 1.0");
param.bEwald = 1;
prmAddParam(prm,"bEwald",paramBool, ¶m.bEwald, sizeof(int),
"ewald", "enable/disable Ewald correction = +ewald");
param.dEwCut = 2.6;
prmAddParam(prm,"dEwCut", paramDouble, ¶m.dEwCut, sizeof(double),
"ewc", "<dEwCut> = 2.6");
param.dEwhCut = 2.8;
prmAddParam(prm,"dEwhCut", paramDouble, ¶m.dEwhCut, sizeof(double),
"ewh", "<dEwhCut> = 2.8");
param.csm->bComove = 0;
prmAddParam(prm, "bComove", paramBool, ¶m.csm->bComove,
sizeof(int),"cm", "Comoving coordinates");
param.csm->dHubble0 = 0.0;
prmAddParam(prm,"dHubble0",paramDouble,¶m.csm->dHubble0,
sizeof(double),"Hub", "<dHubble0> = 0.0");
param.csm->dOmega0 = 1.0;
prmAddParam(prm,"dOmega0",paramDouble,¶m.csm->dOmega0,
sizeof(double),"Om", "<dOmega0> = 1.0");
param.csm->dLambda = 0.0;
prmAddParam(prm,"dLambda",paramDouble,¶m.csm->dLambda,
sizeof(double),"Lambda", "<dLambda> = 0.0");
param.csm->dOmegaRad = 0.0;
prmAddParam(prm,"dOmegaRad",paramDouble,¶m.csm->dOmegaRad,
sizeof(double),"Omrad", "<dOmegaRad> = 0.0");
param.csm->dOmegab = 0.0;
prmAddParam(prm,"dOmegab",paramDouble,¶m.csm->dOmegab,
sizeof(double),"Omb", "<dOmegab> = 0.0");
param.csm->dQuintess = 0.0;
prmAddParam(prm,"dQuintess",paramDouble,¶m.csm->dQuintess,
sizeof(double),"Quint",
"<dQuintessence (constant w = -1/2) > = 0.0");
param.dRedTo = 0.0;
prmAddParam(prm,"dRedTo",paramDouble,¶m.dRedTo,sizeof(double),
"zto", "specifies final redshift for the simulation");
//
// Parameters for GrowMass: slowly growing mass of particles.
//
param.bDynGrowMass = 1;
prmAddParam(prm,"bDynGrowMass",paramBool,¶m.bDynGrowMass,
sizeof(int),"gmd","<dynamic growmass particles>");
param.nGrowMass = 0;
prmAddParam(prm,"nGrowMass",paramInt,¶m.nGrowMass,sizeof(int),
"gmn","<number of particles to increase mass> = 0");
param.dGrowDeltaM = 0.0;
prmAddParam(prm,"dGrowDeltaM",paramDouble,¶m.dGrowDeltaM,
sizeof(double),"gmdm",
"<Total growth in mass/particle> = 0.0");
param.dGrowStartT = 0.0;
prmAddParam(prm,"dGrowStartT",paramDouble,¶m.dGrowStartT,
sizeof(double),"gmst",
"<Start time for growing mass> = 0.0");
param.dGrowEndT = 1.0;
prmAddParam(prm,"dGrowEndT",paramDouble,¶m.dGrowEndT,
sizeof(double),"gmet","<End time for growing mass> = 1.0");
//
// Gas parameters
//
param.bDoGas = 0;
prmAddParam(prm, "bDoGas", paramBool, ¶m.bDoGas,
sizeof(int),"gas", "Enable Gas Calculation");
param.bFastGas = 1;
prmAddParam(prm, "bFastGas", paramBool, ¶m.bFastGas,
sizeof(int),"Fgas", "Fast Gas Method");
param.dFracFastGas = 0.1;
prmAddParam(prm,"dFracFastGas",paramDouble,¶m.dFracFastGas,
sizeof(double),"ffg",
"<Fraction of Active Particles for Fast Gas>");
param.dhMinOverSoft = 0.0;
prmAddParam(prm,"dhMinOverSoft",paramDouble,¶m.dhMinOverSoft,
sizeof(double),"hmin",
"<Minimum h as a fraction of Softening> = 0.0");
param.dResolveJeans = 0.0;
prmAddParam(prm,"dResolveJeans",paramDouble,¶m.dResolveJeans,
sizeof(double),"resjeans",
"<Fraction of pressure to resolve minimum Jeans mass> = 0.0");
param.bViscosityLimiter = 1;
prmAddParam(prm,"bViscosityLimiter",paramBool,¶m.bViscosityLimiter,
sizeof(int), "vlim","<Viscosity Limiter> = 1");
param.iViscosityLimiter = 1;
prmAddParam(prm,"iViscosityLimiter",paramInt,¶m.iViscosityLimiter,
sizeof(int), "ivlim","<Viscosity Limiter Type> = 1");
param.bGeometric = 0;
prmAddParam(prm,"bGeometric",paramBool,¶m.bGeometric,sizeof(int),
"geo","geometric/arithmetic mean to calc Grad(P/rho) = +geo");
param.bGasAdiabatic = 0;
prmAddParam(prm,"bGasAdiabatic",paramBool,¶m.bGasAdiabatic,
sizeof(int),"GasAdiabatic",
"<Gas is Adiabatic> = +GasAdiabatic");
param.bGasIsothermal = 0;
prmAddParam(prm,"bGasIsothermal",paramBool,¶m.bGasIsothermal,
sizeof(int),"GasIsothermal",
"<Gas is Isothermal> = -GasIsothermal");
param.bGasCooling = 0;
prmAddParam(prm,"bGasCooling",paramBool,¶m.bGasCooling,
sizeof(int),"GasCooling",
"<Gas is Cooling> = +GasCooling");
iGasModel = GASMODEL_UNSET; /* Deprecated in for backwards
compatibility */
prmAddParam(prm,"iGasModel", paramInt, &iGasModel, sizeof(int),
"GasModel", "<Gas model employed> = 0 (Adiabatic)");
CoolAddParams(¶m.CoolParam, prm);
param.dMaxEnergy = 1e300;
prmAddParam(prm,"dMaxTemperature",paramDouble,¶m.dMaxEnergy,
sizeof(double),"maxTemp", "<Maximum allowed gas temperature = 1e300>");
param.dMsolUnit = 1.0;
prmAddParam(prm,"dMsolUnit",paramDouble,¶m.dMsolUnit,
sizeof(double),"msu", "<Solar mass/system mass unit>");
param.dKpcUnit = 1000.0;
prmAddParam(prm,"dKpcUnit",paramDouble,¶m.dKpcUnit,
sizeof(double),"kpcu", "<Kiloparsec/system length unit>");
param.ddHonHLimit = 0.1;
prmAddParam(prm,"ddHonHLimit",paramDouble,¶m.ddHonHLimit,
sizeof(double),"dhonh", "<|dH|/H Limiter> = 0.1");
param.dConstAlpha = 1.0;
prmAddParam(prm,"dConstAlpha",paramDouble,¶m.dConstAlpha,
sizeof(double),"alpha",
"<Alpha constant in viscosity> = 1.0");
param.dConstBeta = 2.0;
prmAddParam(prm,"dConstBeta",paramDouble,¶m.dConstBeta,
sizeof(double),"beta",
"<Beta constant in viscosity> = 2.0");
#ifdef CULLENALPHA
param.dConstAlphaMax = 4.0;
prmAddParam(prm, "dConstAlphaMax", paramDouble, ¶m.dConstAlphaMax,
sizeof(double), "AlphaMax",
"< Cullen and Dehnen Alpha Max constant in viscosity> = 1.0");
#endif
param.dConstGamma = 5.0/3.0;
prmAddParam(prm,"dConstGamma",paramDouble,¶m.dConstGamma,
sizeof(double),"gamma", "<Ratio of specific heats> = 5/3");
param.dMeanMolWeight = 1.0;
prmAddParam(prm,"dMeanMolWeight",paramDouble,¶m.dMeanMolWeight,
sizeof(double),"mmw",
"<Mean molecular weight in amu> = 1.0");
param.dGasConst = 1.0;
prmAddParam(prm,"dGasConst",paramDouble,¶m.dGasConst,
sizeof(double),"gcnst", "<Gas Constant>");
param.bBulkViscosity = 0;
prmAddParam(prm,"bBulkViscosity",paramBool,¶m.bBulkViscosity,
sizeof(int), "bulk","<Bulk Viscosity> = 0");
param.dMetalDiffusionCoeff = 0;
prmAddParam(prm,"dMetalDiffusionCoeff",paramDouble,
¶m.dMetalDiffusionCoeff, sizeof(double),"metaldiff",
"<Coefficient in Metal Diffusion> = 0.0");
#ifdef DIFFUSIONPRICE
param.dThermalDiffusionCoeff = 1;
#else
param.dThermalDiffusionCoeff = 0;
#endif
prmAddParam(prm,"dThermalDiffusionCoeff",paramDouble,
¶m.dThermalDiffusionCoeff, sizeof(double),"thermaldiff",
"<Coefficient in Thermal Diffusion> = 0.0");
param.bConstantDiffusion = 0;
prmAddParam(prm,"bConstantDiffusion",paramBool,¶m.bConstantDiffusion,
sizeof(int),"constdiff", "<Constant Diffusion BC> = +constdiff");
param.dEtaDiffusion = 0.1;
prmAddParam(prm,"dEtaDiffusion",paramDouble,¶m.dEtaDiffusion,sizeof(double),
"etadiff", "<Diffusion dt criterion> = 0.1");
// SPH timestepping
param.bSphStep = 1;
prmAddParam(prm,"bSphStep",paramBool,¶m.bSphStep,sizeof(int),
"ss","<SPH timestepping>");
param.bViscosityLimitdt = 0;
prmAddParam(prm,"bViscosityLimitdt",paramBool,
¶m.bViscosityLimitdt,sizeof(int),
"vlim","<Balsara Viscosity Limit dt> = 0");
param.dEtaCourant = 0.4;
prmAddParam(prm,"dEtaCourant",paramDouble,¶m.dEtaCourant,
sizeof(double),"etaC", "<Courant criterion> = 0.4");
param.dEtauDot = 0.25;
prmAddParam(prm,"dEtauDot",paramDouble,¶m.dEtauDot,
sizeof(double),"etau", "<uDot criterion> = 0.25");
param.nTruncateRung = 0;
prmAddParam(prm,"nTruncateRung",paramInt,¶m.nTruncateRung,
sizeof(int),"nTR",
"<number of MaxRung particles to delete MaxRung> = 0");
param.bStarForm = 0;
prmAddParam(prm,"bStarForm",paramBool,¶m.bStarForm,sizeof(int),
"stfm","<Star Forming> = 0");
param.stfm = new Stfm();
param.stfm->AddParams(prm);
param.bFeedback = 0;
prmAddParam(prm,"bFeedBack",paramBool,¶m.bFeedback,sizeof(int),
"fdbk","<Stars provide feedback> = 0");
param.feedback = new Fdbk();
param.feedback->AddParams(prm);
param.dEvapMinTemp = 1e5;
prmAddParam(prm,"dEvapMinTemp",paramDouble,¶m.dEvapMinTemp,
sizeof(double),"evaptmin",
"<Minimumum temperature for evaporation > = 1e5");
param.dEvapCoeff = 1.2e-7;
prmAddParam(prm,"dEvapCoeff",paramDouble,¶m.dEvapCoeff,
sizeof(double),"evap",
"<Evap Coefficient due to thermal Conductivity (electrons), e.g. 1.2e-7 > = 0");
param.dThermalCondCoeff = 1.2e-7;
prmAddParam(prm,"dThermalCondCoeff",paramDouble,¶m.dThermalCondCoeff,
sizeof(double),"thermalcond",
"<Coefficient in Thermal Conductivity (electrons), e.g. 1.2e-7 > = 0");
param.dThermalCondSatCoeff = 17;
prmAddParam(prm,"dThermalCondSatCoeff",paramDouble,¶m.dThermalCondSatCoeff,
sizeof(double),"thermalcondsat",
"<Coefficient in Saturated Thermal Conductivity, e.g. 17 > = 17");
param.dThermalCond2Coeff = 5.0e2;
prmAddParam(prm,"dThermalCond2Coeff",paramDouble,¶m.dThermalCond2Coeff,
sizeof(double),"thermalcond2",
"<Coefficient in Thermal Conductivity 2 (atoms), e.g. 5.0e2 > = 0");
param.dThermalCond2SatCoeff = 0.5;
prmAddParam(prm,"dThermalCond2SatCoeff",paramDouble,¶m.dThermalCond2SatCoeff,
sizeof(double),"thermalcond2sat",
"<Coefficient in Saturated Thermal Conductivity 2, e.g. 0.5 > = 0.5");
param.bDoExternalGravity = 0;
prmAddParam(prm, "bDoExternalGravity", paramBool, ¶m.bDoExternalGravity,
sizeof(int), "bDoExternalGravity", "<Apply external gravity field to particles> = 0");
param.externalGravity.AddParams(prm);
param.iRandomSeed = 1;
prmAddParam(prm,"iRandomSeed", paramInt, ¶m.iRandomSeed,
sizeof(int), "iRand", "<Feedback random Seed> = 1");
param.sinks.AddParams(prm, param);
param.dSIDMSigma=0;
prmAddParam(prm,"dSIDMSigma",paramDouble,¶m.dSIDMSigma,sizeof(double),
"dSIDMSigma","DM cross section in cm^2/g (NA,constant,knee value, or norm)");
param.dSIDMVariable=0;
prmAddParam(prm,"dSIDMVariable",paramDouble,¶m.dSIDMVariable,sizeof(double),
"dSIDMVariable","(NA,NA, break value in km/s, exponent value)");
param.iSIDMSelect=0;
prmAddParam(prm,"iSIDMSelect",paramInt, ¶m.iSIDMSelect, sizeof(int),
"iSIDMSelect","SIDM version (0 off, 1 constant, 2 classical, 3 resonant)");
//
// Output parameters
//
param.bStandard = 1;
prmAddParam(prm, "bStandard", paramBool, ¶m.bStandard,sizeof(int),
"std", "output in standard TIPSY binary format (IGNORED)");
param.bDoublePos = 0;
prmAddParam(prm, "bDoublePos", paramBool, ¶m.bDoublePos,
sizeof(int), "dp",
"input/output double precision positions = -dp");
param.bDoubleVel = 0;
prmAddParam(prm,"bDoubleVel", paramBool, ¶m.bDoubleVel,sizeof(int),
"dv", "input/output double precision velocities = -dv");
param.bOverwrite = 1;
prmAddParam(prm, "bOverwrite", paramBool, ¶m.bOverwrite,sizeof(int),
"overwrite", "overwrite outputs (IGNORED)");
param.bParaRead = 1;
prmAddParam(prm, "bParaRead", paramBool, ¶m.bParaRead,sizeof(int),
"par", "enable/disable parallel reading of files (IGNORED)");
param.bParaWrite = 1;
prmAddParam(prm, "bParaWrite", paramBool, ¶m.bParaWrite,sizeof(int),
"paw", "enable/disable parallel writing of files");
param.nIOProcessor = 0;
prmAddParam(prm,"nIOProcessor",paramInt,¶m.nIOProcessor,
sizeof(int), "npio",
"number of simultaneous I/O processors = 0 (all)");
param.achInFile[0] = '\0';
prmAddParam(prm,"achInFile",paramString,param.achInFile,
256, "I", "input file name (or base file name)");
strcpy(param.achOutName,"pargrav");
prmAddParam(prm,"achOutName",paramString,param.achOutName,256,"o",
"output name for snapshots and logfile");
param.dExtraStore = 0.01;
prmAddParam(prm,"dExtraStore",paramDouble,¶m.dExtraStore,
sizeof(double), "estore",
"Extra memory for new particles");
param.dMaxBalance = 1e10;
prmAddParam(prm,"dMaxBalance",paramDouble,¶m.dMaxBalance,
sizeof(double), "maxbal",
"Maximum piece ratio for load balancing");
param.dFracLoadBalance = 0.0001;
prmAddParam(prm,"dFracLoadBalance",paramDouble,¶m.dFracLoadBalance,
sizeof(double), "fraclb",
"Minimum active particles for load balancing");
bDumpFrame = 0;
df = NULL;
param.dDumpFrameStep = -1.0;
prmAddParam(prm,"dDumpFrameStep",paramDouble, ¶m.dDumpFrameStep,
sizeof(double), "dfi",
"<number of steps between dumped frames> = -1 (disabled)");
param.dDumpFrameTime = -1.0;
prmAddParam(prm,"dDumpFrameTime",paramDouble,¶m.dDumpFrameTime,
sizeof(double), "dft",
"<time interval between dumped frames> = -1 (disabled)");
param.iDirector = 1;
prmAddParam(prm,"iDirector",paramInt,¶m.iDirector,sizeof(int),
"idr","<number of director files: 1, 2, 3> = 1");
param.bLiveViz = 0;
prmAddParam(prm, "bLiveViz", paramInt,¶m.bLiveViz, sizeof(int),
"liveviz", "enable real-time simulation render support (disabled)");
param.bUseCkLoopPar = 0;
prmAddParam(prm, "bUseCkLoopPar", paramBool,¶m.bUseCkLoopPar, sizeof(int),
"useckloop", "enable CkLoop to parallelize within node");
param.bStaticTest = 0;
prmAddParam(prm, "bStaticTest", paramBool, ¶m.bStaticTest,
sizeof(int),"st", "Static test of performance");
_randChunks = 1;
prmAddParam(prm, "bRandChunks", paramBool, &_randChunks,
sizeof(int),"rand", "Randomize the order of remote chunk computation (default: ON)");
_nocache = 0;
// prmAddParam(prm, "bNoCache", paramBool, &_nocache,
// sizeof(int),"nc", "Disable the CacheManager caching behaviour");
param.iVerbosity = 0;
prmAddParam(prm, "iVerbosity", paramInt, ¶m.iVerbosity,
sizeof(int),"v", "Verbosity");
bVDetails = 0;
prmAddParam(prm, "bVDetails", paramBool, &bVDetails,
sizeof(int),"vdetails", "Verbosity");
numTreePieces = 8 * CkNumPes();
prmAddParam(prm, "nTreePieces", paramInt, &numTreePieces,
sizeof(int),"p", "Number of TreePieces (default: 8*procs)");
particlesPerChare = 0;
prmAddParam(prm, "nPartPerChare", paramInt, &particlesPerChare,
sizeof(int),"ppc", "Average number of particles per TreePiece");
bucketSize = 12;
prmAddParam(prm, "nBucket", paramInt, &bucketSize,
sizeof(int),"b", "Particles per Bucket (default: 12)");
_numChunks = 1;
prmAddParam(prm, "nChunks", paramInt, &_numChunks,
sizeof(int),"c", "Chunks per TreePiece (default: 1)");
_yieldPeriod=5;
prmAddParam(prm, "nYield", paramInt, &_yieldPeriod,
sizeof(int),"y", "Yield Period (default: 5)");
param.cacheLineDepth=4;
prmAddParam(prm, "nCacheDepth", paramInt, ¶m.cacheLineDepth,
sizeof(int),"d", "Cache Line Depth (default: 4)");
_prefetch=true;
prmAddParam(prm, "bPrefetch", paramBool, &_prefetch,
sizeof(int),"f", "Enable prefetching in the cache (default: ON)");
cacheSize = 100000000;
prmAddParam(prm, "nCacheSize", paramInt, &cacheSize,
sizeof(int),"cs", "Size of cache (IGNORED)");
domainDecomposition=SFC_peano_dec;
peanoKey=0;
prmAddParam(prm, "nDomainDecompose", paramInt, &domainDecomposition,
sizeof(int),"D", "Kind of domain decomposition of particles");
param.dFracNoDomainDecomp = 0.0;
prmAddParam(prm, "dFracNoDomainDecomp", paramDouble,
¶m.dFracNoDomainDecomp, sizeof(double),"fndd",
"Fraction of active particles for no new DD = 0.0");
param.bConcurrentSph = 1;
prmAddParam(prm, "bConcurrentSph", paramBool, ¶m.bConcurrentSph,
sizeof(int),"consph", "Enable SPH running concurrently with Gravity");
// Recognize (and ignore) gasoline compatibility parameters.
static int iDummy = 0;
prmAddParam(prm, "bRestart", paramBool, &iDummy,
sizeof(int),"restartg", "(IGNORED) gasoline compatible restart");
prmAddParam(prm, "bDoSelfGravity", paramBool, &iDummy,
sizeof(int),"sg", "(IGNORED) Use bDoGravity");
prmAddParam(prm, "bVStart", paramBool, &iDummy,
sizeof(int), "vstart","(IGNORED)");
prmAddParam(prm, "bVStep", paramBool, &iDummy,
sizeof(int), "vstep", "(IGNORED)");
prmAddParam(prm, "bVRungStat", paramBool, &iDummy,
sizeof(int), "vrungstat", "(IGNORED)");
#ifdef PUSH_GRAVITY
param.dFracPushParticles = 0.0;
prmAddParam(prm, "dFracPush", paramDouble,
¶m.dFracPushParticles, sizeof(double),"fPush",
"Maximum proportion of active to total particles for push-based force evaluation = 0.0");
#endif
#ifdef SELECTIVE_TRACING
/* tracing control */
monitorRung = 12;
prmAddParam(prm, "iTraceRung", paramInt, &monitorRung,
sizeof(int),"traceRung", "Gravity starting rung to trace selectively");
monitorStart = 0;
prmAddParam(prm, "iTraceStart", paramInt, &monitorStart,
sizeof(int),"traceStart", "When to start selective tracing");
numTraceIterations = 3;
prmAddParam(prm, "iTraceFor", paramInt, &numTraceIterations,
sizeof(int),"traceFor", "Trace this many instances of the selected rungs");
numSkipIterations = 400;
prmAddParam(prm, "iTraceSkip", paramInt, &numSkipIterations,
sizeof(int),"traceSkip", "Skip tracing for these many iterations");
numMaxTrace = 5;
prmAddParam(prm, "iTraceMax", paramInt, &numMaxTrace,
sizeof(int),"traceMax", "Max. num. iterations traced");
traceIteration = 0;
traceState = TraceNormal;
projectionsOn = false;
#endif
numInitDecompBins = (1<<11);
prmAddParam(prm, "iInitDecompBins", paramInt, &numInitDecompBins,
sizeof(int),"initDecompBins", "Number of bins to use for the first iteration of every Oct decomposition step");
octRefineLevel = 1;
prmAddParam(prm, "iOctRefineLevel", paramInt, &octRefineLevel,
sizeof(int),"octRefineLevel", "Binary logarithm of the number of sub-bins a bin is split into for Oct decomposition (e.g. octRefineLevel 3 splits into 8 sub-bins) (default: 1)");
doDumpLB = false;
prmAddParam(prm, "bdoDumpLB", paramBool, &doDumpLB,
sizeof(int),"doDumpLB", "Should Orb3dLB dump LB database to text file and stop?");
lbDumpIteration = 0;
prmAddParam(prm, "ilbDumpIteration", paramInt, &lbDumpIteration,
sizeof(int),"lbDumpIteration", "Load balancing iteration for which to dump database");
doSimulateLB = false;
prmAddParam(prm, "bDoSimulateLB", paramBool, &doSimulateLB,
sizeof(int),"doSimulateLB", "Should Orb3dLB simulate LB decisions from dumped text file and stop?");
CkAssert(!(doDumpLB && doSimulateLB));
// jetley - cuda parameters
#ifdef CUDA
localNodesPerReqDouble = NODE_INTERACTIONS_PER_REQUEST_L;
prmAddParam(prm, "localNodesPerReq", paramDouble, &localNodesPerReqDouble,
sizeof(double),"localnodes", "Num. local node interactions allowed per CUDA request (in millions)");
remoteNodesPerReqDouble = NODE_INTERACTIONS_PER_REQUEST_RNR;
prmAddParam(prm, "remoteNodesPerReq", paramDouble, &remoteNodesPerReqDouble,
sizeof(double),"remotenodes", "Num. remote node interactions allowed per CUDA request (in millions)");
remoteResumeNodesPerReqDouble = NODE_INTERACTIONS_PER_REQUEST_RR;
prmAddParam(prm, "remoteResumeNodesPerReq", paramDouble, &remoteResumeNodesPerReqDouble,
sizeof(double),"remoteresumenodes", "Num. remote resume node interactions allowed per CUDA request (in millions)");
localPartsPerReqDouble = PART_INTERACTIONS_PER_REQUEST_L;
prmAddParam(prm, "localPartsPerReq", paramDouble, &localPartsPerReqDouble,
sizeof(double),"localparts", "Num. local particle interactions allowed per CUDA request (in millions)");
remotePartsPerReqDouble = PART_INTERACTIONS_PER_REQUEST_RNR;
prmAddParam(prm, "remotePartsPerReq", paramDouble, &remotePartsPerReqDouble,
sizeof(double),"remoteparts", "Num. remote particle interactions allowed per CUDA request (in millions)");
remoteResumePartsPerReqDouble = PART_INTERACTIONS_PER_REQUEST_RR;
prmAddParam(prm, "remoteResumePartsPerReq", paramDouble, &remoteResumePartsPerReqDouble,
sizeof(double),"remoteresumeparts", "Num. remote resume particle interactions allowed per CUDA request (in millions)");
largePhaseThreshold = TP_LARGE_PHASE_THRESHOLD_DEFAULT;
// prmAddParam(prm, "largePhaseThreshold", paramDouble, &largePhaseThreshold,
// sizeof(double),"largephasethresh", "Ratio of active to total particles at which all particles (not just active ones) are sent to gpu in the target buffer (No source particles are sent.)");
#endif
int processSimfile = 1;
if(!prmArgProc(prm,m->argc,m->argv, processSimfile)) {
CkExit();
}
// ensure that number of bins is a power of 2
if (numInitDecompBins > numTreePieces) {
unsigned int numBins = 1;
while (numBins < numTreePieces) {
numBins <<= 1;
}
numInitDecompBins = numBins;
}
if(bVDetails && !param.iVerbosity)
param.iVerbosity = 1;
if(prmSpecified(prm, "iMaxRung")) {
ckerr << "WARNING: ";
ckerr << "iMaxRung parameter ignored. MaxRung is " << MAXRUNG
<< endl;
}
if(prmSpecified(prm, "bKDK")) {
ckerr << "WARNING: ";
ckerr << "bKDK parameter ignored; KDK is always used." << endl;
}
if(prmSpecified(prm, "bStandard")) {
ckerr << "WARNING: ";
ckerr << "bStandard parameter ignored; Output is always standard."
<< endl;
}
if(prmSpecified(prm, "iOrder")) {
ckerr << "WARNING: ";
#ifdef HEXADECAPOLE
ckerr << "iOrder parameter ignored; expansion order is 4."
#else
ckerr << "iOrder parameter ignored; expansion order is 2."
#endif
<< endl;
}
if(prmSpecified(prm, "bRestart")) {
ckerr << "WARNING: ";
ckerr << "bRestart parameter ignored; "
<< "restart from checkpoint is done with ++restart."
<< endl;
}
if(prmSpecified(prm, "bDoSelfGravity")) {
ckerr << "WARNING: ";
ckerr << "bDoSelfGravity parameter ignored; "
<< "Use bDoGravity to turn gravity on or off."
<< endl;
}
if(prmSpecified(prm, "bVStart")) {
ckerr << "WARNING: ";
ckerr << "bVStart parameter ignored"
<< "use -v # to increase verbosity"
<< endl;
}
if(prmSpecified(prm, "bVStep")) {
ckerr << "WARNING: ";
ckerr << "bVStep parameter ignored"
<< "use -v # to increase verbosity"
<< endl;
}
if(prmSpecified(prm, "bVRungStat")) {
ckerr << "WARNING: ";
ckerr << "bVRungStat parameter ignored"
<< "use -v # to increase verbosity"
<< endl;
}
if(!prmSpecified(prm, "dTheta2")) {
param.dTheta2 = param.dTheta;
}
/* set readonly/global variables */
theta = param.dTheta;
thetaMono = theta*theta*theta*theta;
dExtraStore = param.dExtraStore;
dMaxBalance = param.dMaxBalance;
dFracLoadBalance = param.dFracLoadBalance;
dGlassDamper = param.dGlassDamper;
_cacheLineDepth = param.cacheLineDepth;
verbosity = param.iVerbosity;
nIOProcessor = param.nIOProcessor;
#if CMK_SMP
bUseCkLoopPar = param.bUseCkLoopPar;
#else
bUseCkLoopPar = 0;
#endif
if (bUseCkLoopPar) {
CkPrintf("Using CkLoop %d\n", param.bUseCkLoopPar);
} else {
CkPrintf("Not Using CkLoop %d\n", param.bUseCkLoopPar);
}
if(prmSpecified(prm, "bCannonical")) {
ckerr << "WARNING: ";
ckerr << "bCannonical parameter ignored; integration is always cannonical"
<< endl;
}
if(prmSpecified(prm, "bOverwrite")) {
ckerr << "WARNING: ";
ckerr << "bOverwrite parameter ignored."
<< endl;
}
if(param.iOutInterval <= 0) {
ckerr << "WARNING: ";
ckerr << "iOutInterval is non-positive; setting to 1."
<< endl;
param.iOutInterval = 1;
}
if(param.iLogInterval <= 0) {
ckerr << "WARNING: ";
ckerr << "iLogInterval is non-positive; setting to 1."
<< endl;
param.iLogInterval = 1;
}
if (param.bPhysicalSoft) {
if (!param.csm->bComove) {
ckerr << "WARNING: bPhysicalSoft reset to 0 for non-comoving (bComove == 0)\n";
param.bPhysicalSoft = 0;
}
#ifndef CHANGESOFT
ckerr << "ERROR: You must compile with -DCHANGESOFT to use changing softening options\n";
CkAbort("CHANGESOFT needed");
#endif
}
dTimeOld = 0.0; // Reset if this is restarting from an output.
/*
** Make sure that we have some setting for nReplicas if
** bPeriodic is set.
*/
if (param.bPeriodic && !prmSpecified(prm,"nReplicas")) {
param.nReplicas = 1;
}
/*
** Warn that we have a setting for nReplicas if bPeriodic NOT set.
*/
if (!param.bPeriodic && param.nReplicas != 0) {
CkPrintf("WARNING: nReplicas set to non-zero value for non-periodic!\n");
}
/*
** Warn that nReplicas is set to 0 for bPeriodic.
*/
if (param.bPeriodic && param.nReplicas == 0) {
CkPrintf("WARNING: nReplicas set to zero value for periodic!\n");
}
/*
** Determine the period of the box that we are using.
** Set the new d[xyz]Period parameters which are now used instead
** of a single dPeriod, but we still want to have compatibility
** with the old method of setting dPeriod.
*/
if (prmSpecified(prm,"dPeriod") && !prmSpecified(prm,"dxPeriod")) {
param.vPeriod.x = param.fPeriod;
}
if (prmSpecified(prm,"dPeriod") && !prmSpecified(prm,"dyPeriod")) {
param.vPeriod.y = param.fPeriod;
}
if (prmSpecified(prm,"dPeriod") && !prmSpecified(prm,"dzPeriod")) {
param.vPeriod.z = param.fPeriod;
}
/*
** Periodic boundary conditions can be disabled along any of the
** x,y,z axes by specifying a period of zero for the given axis.
** Internally, the period is set to infinity (Cf. pkdBucketWalk()
** and pkdDrift(); also the INTERSECT() macro in smooth.h).
*/
if (param.fPeriod == 0) param.fPeriod = 1.0e38;
if (param.vPeriod.x == 0) param.vPeriod.x = 1.0e38;
if (param.vPeriod.y == 0) param.vPeriod.y = 1.0e38;
if (param.vPeriod.z == 0) param.vPeriod.z = 1.0e38;
if(!param.bPeriodic) {
param.nReplicas = 0;
param.fPeriod = 1.0e38;
param.vPeriod = Vector3D<double>(1.0e38);
param.bEwald = 0;
}
#ifdef CUDA
double mil = 1e6;
localNodesPerReq = (int) (localNodesPerReqDouble * mil);
remoteNodesPerReq = (int) (remoteNodesPerReqDouble * mil);
remoteResumeNodesPerReq = (int) (remoteResumeNodesPerReqDouble * mil);
localPartsPerReq = (int) (localPartsPerReqDouble * mil);
remotePartsPerReq = (int) (remotePartsPerReqDouble * mil);
remoteResumePartsPerReq = (int) (remoteResumePartsPerReqDouble * mil);
ckout << "INFO: ";
ckout << "localNodes: " << localNodesPerReqDouble << endl;
ckout << "INFO: ";
ckout << "remoteNodes: " << remoteNodesPerReqDouble << endl;
ckout << "INFO: ";
ckout << "remoteResumeNodes: " << remoteResumeNodesPerReqDouble << endl;
ckout << "INFO: ";
ckout << "localParts: " << localPartsPerReqDouble << endl;
ckout << "INFO: ";
ckout << "remoteParts: " << remotePartsPerReqDouble << endl;
ckout << "INFO: ";
ckout << "remoteResumeParts: " << remoteResumePartsPerReqDouble << endl;
ckout << "INFO: largePhaseThreshold: " << largePhaseThreshold << endl;
#endif
if(prmSpecified(prm, "bGeometric")) {
ckerr << "WARNING: ";
ckerr << "bGeometric parameter ignored." << endl;
}
if (prmSpecified(prm,"bViscosityLimiter")) {
if (!param.bViscosityLimiter) param.iViscosityLimiter=0;
}
if(prmSpecified(prm, "iGasModel")) {
switch(iGasModel) {
case GASMODEL_ADIABATIC:
param.bGasAdiabatic = 1;
break;
case GASMODEL_ISOTHERMAL:
param.bGasIsothermal = 1;
break;
case GASMODEL_COOLING:
param.bGasCooling = 1;
break;
}
}
if(param.bGasCooling && (param.bGasIsothermal || param.bGasAdiabatic)) {
ckerr << "WARNING: ";
ckerr << "More than one gas model set. ";
ckerr << "Defaulting to Cooling.";
ckerr << endl;
param.bGasCooling = 1;
param.bGasAdiabatic = 0;
param.bGasIsothermal = 0;
}
if(param.bGasAdiabatic && param.bGasIsothermal) {
ckerr << "WARNING: ";
ckerr << "Both adiabatic and isothermal set.";
ckerr << "Defaulting to Adiabatic.";
ckerr << endl;
param.bGasAdiabatic = 1;
param.bGasIsothermal = 0;
}
if(param.dConstGamma <= 1.0) {
ckerr << "dConstGamma relates pressure to internal energy and must be greater than 1.0";
ckerr << endl;
CkAbort("Bad value for dConstGamma");
}
if(prmSpecified(prm, "bBulkViscosity")) {
ckerr << "WARNING: ";
ckerr << "bBulkViscosity parameter ignored." << endl;
}
#ifdef COOLING_NONE
CkMustAssert(!param.bGasCooling, "Gas cooling requested but not compiled in");
#endif
if(param.bGasCooling) {
CkAssert(prmSpecified(prm, "dMsolUnit")
&& prmSpecified(prm, "dKpcUnit"));
}
if((param.bFeedback || param.bStarForm) && !param.bDoGas) {
ckerr << "WARNING: star formation set without enabling SPH" << endl;
ckerr << "Enabling SPH" << endl;
param.bDoGas = 1;
}
if(!param.bStarForm && !prmSpecified(prm, "bDoStellarLW")) {
// Stellar LW output is meaningless if we are not forming stars.
param.bDoStellarLW = 0;
}
if(param.bDoGas && !(param.bGasCooling || param.bGasAdiabatic
|| param.bGasIsothermal)) {
ckerr << "Defaulting to Adiabatic Gas Model." << endl;
param.bGasAdiabatic = 1;
}
if(!param.bDoGas) {
param.bSphStep = 0;
param.bDtAdjust = 0; // DtAdjust only affects gas
}
#if WENDLAND == 1
if(param.bDoGas && param.nSmooth < 32) {
ckerr << "WARNING: nSmooth < 32 with WENDLAND kernel." << endl;
ckerr << "WARNING: M4 kernel with be used for smoothing." << endl;
}
#endif
#include "physconst.h"
/*
** Convert kboltz/mhydrogen to system units, assuming that
** G == 1.
*/
if(prmSpecified(prm, "dMsolUnit") &&
prmSpecified(prm, "dKpcUnit")) {
param.dGasConst = param.dKpcUnit*KPCCM*KBOLTZ
/MHYDR/GCGS/param.dMsolUnit/MSOLG;
double dTuFac = param.dGasConst/(param.dConstGamma-1)/param.dMeanMolWeight;
param.dMaxEnergy *= dTuFac;
/* code energy per unit mass --> erg per g */
param.dErgPerGmUnit = GCGS*param.dMsolUnit*MSOLG
/(param.dKpcUnit*KPCCM);
/* code density --> g per cc */
param.dGmPerCcUnit = (param.dMsolUnit*MSOLG)
/pow(param.dKpcUnit*KPCCM,3.0);
/* code time --> seconds */
param.dSecUnit = sqrt(1/(param.dGmPerCcUnit*GCGS));
/* code comoving density --> g per cc = param.dGmPerCcUnit (1+z)^3 */
param.dComovingGmPerCcUnit = param.dGmPerCcUnit;
#ifdef SUPERBUBBLE
/* Thermal conductivity, c.f. Tielens p. 448
Heat flux = K(T) grad T, dE/dt = div K(T) grad T E = rho u
K(T) = 6.1e-7 T^2.5 erg s^-1 deg^-7/2 cm^-1
Silich: B-field reduces K(T) by ~ factor 10
we use du/dt = div k(u)/rho grad u
u erg/gm = 1.5 k_B / (0.6*m_H) T (ionized) */
/* Heat flux is k(u) u^2.5 grad u saturating at n_e 3/2 k T_e v_e
k(u) u^2.5 has units of erg (erg/g)^-1 s^-1 cm^-1
saturation value ~ 17 rho c_s h (g s^-1 cm^-1)
Assuming primordial v_e ~ 33 c_s, n_e ~ 0.52 n, grad u ~ u/h
So dThermalCondSatCoeff ~ 17, so time is 1/17 Courant time */
/* Convert K(T) to k(u) erg s^-1 (erg/gm)^-7/2 cm^-1 */
param.dThermalCondCoeffCode = param.dThermalCondCoeff
*pow(1.5*KBOLTZ/(param.dMeanMolWeight*MHYDR),-3.5);
/* Convert K2(T) to k2(u) erg s^-1 (erg/gm)^-3/2 cm^-1 */
param.dThermalCond2CoeffCode = param.dThermalCond2Coeff
*pow(1.5*KBOLTZ/(param.dMeanMolWeight*MHYDR),-1.5);
/* Convert to code units */
param.dThermalCondCoeffCode /=
pow(param.dErgPerGmUnit,-3.5)
*param.dErgPerGmUnit*param.dGmPerCcUnit
*pow(param.dKpcUnit*KPCCM,2.0)
/param.dSecUnit;
param.dThermalCond2CoeffCode /=
pow(param.dErgPerGmUnit,-1.5)
*param.dErgPerGmUnit*param.dGmPerCcUnit
*pow(param.dKpcUnit*KPCCM,2.0)
/param.dSecUnit;
/* You enter effective thermal coefficient e.g. kappa0=6.1d-7 erg s^-1 K^-7/2 cm^-1
Code then multiplies by prefactors to 4/25 kappa0 mmw/k (1.5k/mmw)^-5/2 */
param.dEvapCoeffCode = param.dEvapCoeff*pow(32./param.nSmooth,.3333333333)*
4/25.*(param.dMeanMolWeight*MHYDR)/KBOLTZ*pow(1.5*KBOLTZ/(param.dMeanMolWeight*MHYDR),-2.5);
/* Convert final units: g/cm/s (erg/g)^-2.5 to code units
[Later: Multiply by u^5/2 and multiply a length gives mdot units g/s] */
param.dEvapCoeffCode /=
pow(param.dErgPerGmUnit,-2.5)
*param.dGmPerCcUnit*pow(param.dKpcUnit*KPCCM,2.0)
/param.dSecUnit;
#else
param.dThermalCondCoeffCode = 0;
param.dThermalCond2CoeffCode = 0;
param.dEvapCoeffCode = 0;
#endif
}
#ifndef DIFFUSION
CkMustAssert(!prmSpecified(prm,"dMetalDiffusionCoeff"), "Metal Diffusion Rate specified but not compiled for\nUse -DDIFFUSION during compilation\n");
#endif
#ifdef NODIFFUSIONTHERMAL
CkMustAssert(!prmSpecified(prm, "dThermalDiffusionCoeff"), "Thermal Diffusion Rate specified but not compiled for\n");
#endif
if (domainDecomposition == SFC_peano_dec) peanoKey = 3;
if (domainDecomposition == SFC_peano_dec_2D) peanoKey = 2;
if (domainDecomposition == SFC_peano_dec_3D) peanoKey = 3;
// hardcoding some parameters, later may be full options
if(domainDecomposition==ORB_dec || domainDecomposition==ORB_space_dec){
useTree = Binary_ORB;
// CkAbort("ORB decomposition known to be bad and not implemented");
}
else { useTree = Binary_Oct; }
#define xstr(s) str(s)
#define str(s) #s
ckerr << "ChaNGa version " << xstr(NBODY_PACKAGE_VERSION) << ", commit "
<< Cha_CommitID << endl;
#undef str
#undef xstr
ckerr << "Running on " << CkNumPes() << " processors/ "<< CkNumNodes() <<" nodes with " << numTreePieces << " TreePieces" << endl;
if (verbosity)
ckerr << "yieldPeriod set to " << _yieldPeriod << endl;
CkMustAssert(_cacheLineDepth >= 0, "Cache Line depth must be greater than or equal to 0");
if(verbosity)
ckerr << "Prefetching..." << (_prefetch?"ON":"OFF") << endl;
if (verbosity)
ckerr << "Number of chunks for remote tree walk set to " << _numChunks << endl;
CkMustAssert(_numChunks > 0, "Number of chunks for remote tree walk must be greater than 0");
if(verbosity)
ckerr << "Chunk Randomization..." << (_randChunks?"ON":"OFF") << endl;
if(param.achInFile[0]) {
basefilename = param.achInFile;
}else{
ckerr<<"Base file name missing\n";
CkExit();
}
if (_nocache) _cacheLineDepth = 0;
if(verbosity) {
ckerr<<"cache "<<_cache << (_nocache?" (disabled)":"") <<endl;
ckerr<<"cacheLineDepth "<<_cacheLineDepth<<endl;
}
if (verbosity) {
ckerr << "Verbosity level " << verbosity << endl;
}
if(verbosity) {
switch(domainDecomposition){
case SFC_dec:
ckerr << "Domain decomposition...SFC Morton" << endl;
break;
case Oct_dec:
ckerr << "Domain decomposition...Oct" << endl;
break;
case ORB_dec:
ckerr << "Domain decomposition...ORB" << endl;
break;
case ORB_space_dec:
ckerr << "Domain decomposition...ORB space" << endl;
break;
case SFC_peano_dec:
ckerr << "Domain decomposition...SFC Peano-Hilbert" << endl;
break;
case SFC_peano_dec_3D:
ckerr << "Domain decomposition... new SFC Peano-Hilbert" << endl;
break;
case SFC_peano_dec_2D:
ckerr << "Domain decomposition... 2D SFC Peano-Hilbert" << endl;
break;
default:
CkAbort("None of the implemented decompositions specified");
}
}
CkArrayOptions opts(numTreePieces);
#ifdef ROUND_ROBIN_WITH_OCT_DECOMP
if (domainDecomposition == Oct_dec) {
CProxy_RRMap myMap=CProxy_RRMap::ckNew();
opts.setMap(myMap);
} else {
#endif
CProxy_DefaultArrayMap myMap = CProxy_DefaultArrayMap::ckNew();
opts.setMap(myMap);
#ifdef ROUND_ROBIN_WITH_OCT_DECOMP
}
#endif
CProxy_TreePiece pieces = CProxy_TreePiece::ckNew(opts);
prjgrp = CProxy_ProjectionsControl::ckNew();
treeProxy = pieces;
#ifdef REDUCTION_HELPER
reductionHelperProxy = CProxy_ReductionHelper::ckNew();
#endif
opts.bindTo(treeProxy);
lvProxy = CProxy_LvArray::ckNew(opts);
// Create an array for the smooth reductions
smoothProxy = CProxy_LvArray::ckNew(opts);
// Create an array for the gravity reductions
gravityProxy = CProxy_LvArray::ckNew(opts);
#ifdef PUSH_GRAVITY
ckMulticastGrpId = CProxy_CkMulticastMgr::ckNew();
#endif
peTreeMergerProxy = CProxy_PETreeMerger::ckNew();
dfDataProxy = CProxy_DumpFrameData::ckNew();
// create CacheManagers
// Gravity particles
cacheGravPart = CProxy_CkCacheManager<KeyType>::ckNew(cacheSize, pieces.ckLocMgr()->getGroupID());
// Smooth particles
cacheSmoothPart = CProxy_CkCacheManager<KeyType>::ckNew(cacheSize, pieces.ckLocMgr()->getGroupID());
// Nodes
cacheNode = CProxy_CkCacheManager<KeyType>::ckNew(cacheSize, pieces.ckLocMgr()->getGroupID());
//create the DataManager
CProxy_DataManager dataManager = CProxy_DataManager::ckNew(pieces);
dataManagerID = dataManager;
dMProxy = dataManager;
nodeLBMgrProxy = CProxy_IntraNodeLBManager::ckNew(1,pieces.ckLocMgr()->getGroupID());
streamingProxy = pieces;
//create the Sorter
sorter = CProxy_Sorter::ckNew(0);
CkLoop_Init(threadNum);
if(verbosity)
ckerr << "Created " << numTreePieces << " pieces of tree" << endl;
CProxy_Main(thishandle).setupICs();
}
///
/// @brief Restart Main constructor
///
/// This is only called when restarting from a checkpoint.
///
Main::Main(CkMigrateMessage* m) : CBase_Main(m) {
args = (CkArgMsg *)malloc(sizeof(*args));
args->argv = CmiCopyArgs(((CkArgMsg *) m)->argv);
mainChare = thishandle;
bIsRestarting = 1;
bHaveAlpha = 1;
CkPrintf("Main(CkMigrateMessage) called\n");
sorter = CProxy_Sorter::ckNew(0);
}
/// @brief entry method to cleanly shutdown. Only used for debugging.
void Main::niceExit() {
static unsigned int count = 0;
if (++count == numTreePieces) CkExit();
}
/// @brief determine start time of simulation
///
/// Function to determine the start time of the simulation.
/// Modifies dTime member of main.
void Main::getStartTime()
{
// Grab starting time.
if(param.csm->bComove) {
if(param.csm->dHubble0 == 0.0) {
ckerr << "No hubble constant specified" << endl;
CkExit();
return;
}
/* input time is expansion factor. Convert */
double dAStart = treeProxy[0].ckLocal()->dStartTime;
if(dAStart <= 0.0) {
ckerr << "Attempt to start at infinite redshift" << endl;
CkExit();
return;
}
double z = 1.0/dAStart - 1.0;
double aTo, tTo;
dTime = csmExp2Time(param.csm, dAStart);
if (verbosity > 0)
ckout << "Input file, Time:" << dTime << " Redshift:" << z
<< " Expansion factor:" << dAStart << endl;
if (prmSpecified(prm,"dRedTo")) {
aTo = 1.0/(param.dRedTo + 1.0);
tTo = csmExp2Time(param.csm,aTo);
if (verbosity > 0)
ckout << "Simulation to Time:" << tTo << " Redshift:"
<< 1.0/aTo - 1.0 << " Expansion factor:" << aTo
<< endl;
if (tTo < dTime) {
ckerr << "Badly specified final redshift, check -zto parameter."
<< endl;
CkExit();
}
if (!prmArgSpecified(prm,"nSteps") &&
prmArgSpecified(prm,"dDelta")) {
param.nSteps = (int)ceil((tTo-dTime)/param.dDelta)+param.iStartStep;
}
else if (!prmArgSpecified(prm,"dDelta") &&
prmArgSpecified(prm,"nSteps")) {
if(param.nSteps != 0)
param.dDelta =
(tTo-dTime)/(param.nSteps - param.iStartStep);
else
/* set dDelta to a non-zero value so timestep
* adjustment works. */
param.dDelta = 1.0;
}
else if (!prmSpecified(prm,"nSteps") &&
prmFileSpecified(prm,"dDelta")) {
param.nSteps = (int)ceil((tTo-dTime)/param.dDelta) + param.iStartStep;
}
else if (!prmSpecified(prm,"dDelta") &&
prmFileSpecified(prm,"nSteps")) {
if(param.nSteps != 0)
param.dDelta = (tTo-dTime)/(param.nSteps
- param.iStartStep);
else
/* set dDelta to a non-zero value so timestep
* adjustment works. */
param.dDelta = 1.0;
}
}
else {
tTo = dTime + (param.nSteps-param.iStartStep)*param.dDelta;
aTo = csmTime2Exp(param.csm,tTo);
if (verbosity > 0)
ckout << "Simulation to Time:" << tTo << " Redshift:"
<< 1.0/aTo - 1.0 << " Expansion factor:"
<< aTo << endl;
}
// convert to canonical comoving coordinates.
treeProxy.velScale(dAStart*dAStart, CkCallbackResumeThread());
}
else { // not Comove
dTime = treeProxy[0].ckLocal()->dStartTime;
if(verbosity > 0)
ckout << "Input file, Time:" << dTime << endl;
double tTo = dTime + (param.nSteps-param.iStartStep)*param.dDelta;
if (verbosity > 0) {
ckout << "Simulation to Time:" << tTo << endl;
}
}
if(param.nSteps == 0 && !isfinite(1.0/param.dDelta)) {
/* set dDelta to a non-zero value so timestep
* adjustment works. */
param.dDelta = 1.0;
}
}
/// @brief Return true if we need to write an output
///
/// Advances iOut attribute, therefore this can only be called once
/// per timestep.
///
int Main::bOutTime()
{
if (iOut < vdOutTime.size()) {
if (dTime >= vdOutTime[iOut]) {
++iOut;
return(1);
}
else return(0);
}
else return(0);
}
///
/// @brief Read in desired output times and reshifts from a file
///
/// Fills in the vdOutTime vector by reading the .red file
///
void Main::getOutTimes()
{
FILE *fp;
int ret;
double z,a,n,t;
char achIn[80];
string achFileName = string(param.achOutName) + ".red";
fp = fopen(achFileName.c_str(),"r");
if (!fp) {
if (verbosity && param.csm->bComove)
cerr << "WARNING: Could not open redshift input file: "
<< achFileName << endl;
return;
}
while (1) {
if (!fgets(achIn,80,fp))
break;
ret = 1; // default return if redshift is invalid
switch (achIn[0]) {
case 'z':
if(!param.csm->bComove) {
cerr << "WARNING: output redshift invalid in non-comoving coordinates" << endl;
break;
}
ret = sscanf(&achIn[1],"%lf",&z);
if (ret != 1) break;
a = 1.0/(z+1.0);
vdOutTime.push_back(csmExp2Time(param.csm,a));
break;
case 'a':
if(!param.csm->bComove) {
cerr << "WARNING: output expansion invalid in non-comoving coordinates" << endl;
break;
}
ret = sscanf(&achIn[1],"%lf",&a);
if (ret != 1) break;
vdOutTime.push_back(csmExp2Time(param.csm,a));
break;
case 't':
ret = sscanf(&achIn[1],"%lf",&t);
if (ret != 1) break;
vdOutTime.push_back(t);
break;
case 'n':
ret = sscanf(&achIn[1],"%lf",&n);
if (ret != 1) break;
vdOutTime.push_back(dTime + (n-0.5)*param.dDelta);
break;
default:
if(!param.csm->bComove) {
cerr << "WARNING: output redshift invalid in non-comoving coordinates" << endl;
break;
}
ret = sscanf(achIn,"%lf",&z);
if (ret != 1) break;
a = 1.0/(z+1.0);
vdOutTime.push_back(csmExp2Time(param.csm,a));
}
if (ret != 1)
break;
}
/*
** Now sort the array of output times into ascending order.
*/
vdOutTime.quickSort();
fclose(fp);
}
// determine if we need a smaller step for dumping frames
inline int Main::nextMaxRungIncDF(int nextMaxRung)
{
if (bDumpFrame && nextMaxRung < df[0]->iMaxRung)
nextMaxRung = df[0]->iMaxRung;
return nextMaxRung;
}
/// @brief wait for gravity in the case of concurrent SPH
inline void Main::waitForGravity(const CkCallback &cb, double startTime,
int activeRung)
{
if(param.bConcurrentSph && param.bDoGravity) {
#ifdef PUSH_GRAVITY
if(bDoPush){
CkWaitQD();
}
else{
#endif
CkFreeMsg(cb.thread_delay());
#ifdef PUSH_GRAVITY
}
#endif
}
double tGrav = CkWallTimer()-startTime;
timings[activeRung].tGrav += tGrav;
CkPrintf("Calculating gravity and SPH took %g seconds.\n", tGrav);
#ifdef SELECTIVE_TRACING
turnProjectionsOff();
#endif
}
/// @brief Perform domain decomposition
/// @param Active rung (or phase).
void Main::domainDecomp(int iPhase)
{
double startTime = CkWallTimer();
bool bDoDD; // determine new domains (true) or use existing
// domains (false)
if(iPhase == PHASE_FEEDBACK) {
CkPrintf("Domain decomposition for star formation/feedback... ");
bDoDD = true;
}
else {
CkPrintf("Domain decomposition ... ");
bDoDD = param.dFracNoDomainDecomp*nTotalParticles < nActiveGrav;
}
if (bDoDD) {
sorter.startSorting(dataManagerID, ddTolerance,
CkCallbackResumeThread(), bDoDD);
} else {
CkReductionMsg *isTPEmpty;
treeProxy.unshuffleParticlesWoDD(CkCallbackResumeThread((void*&)isTPEmpty));
// After shuffling of particles based on the previous boundaries, if any
// TreePiece ends up with no particles, then startSorting needs to be
// called as we cannot handle the case where there are empty TreePieces in
// the middle.
if (*((int*)isTPEmpty->getData())) {
sorter.startSorting(dataManagerID, ddTolerance,
CkCallbackResumeThread(), bDoDD);
}
delete isTPEmpty;
}
double tDD = CkWallTimer()-startTime;
timings[iPhase].tDD += tDD;
CkPrintf("total %g seconds.\n", tDD);
if(verbosity && !bDoDD)
CkPrintf("Skipped DD\n");
}
/// @brief Perform load balance.
/// @param iPhase Active rung (or phase). -1 indicates initial LB.
void
Main::loadBalance(int iPhase)
{
if(iPhase == -1) {
CkPrintf("Initial load balancing ... ");
treeProxy.balanceBeforeInitialForces(CkCallbackResumeThread());
}
else {
double startTime = CkWallTimer();
if(iPhase == PHASE_FEEDBACK) {
CkPrintf("Load balancer for star formation/feedback... ");
}
else {
CkPrintf("Load balancer ... ");
}
treeProxy.startlb(CkCallbackResumeThread(), iPhase);
double tLB = CkWallTimer()-startTime;
timings[iPhase].tLoadB += tLB;
CkPrintf("took %g seconds.\n", tLB);
}
}
/// @brief Build Tree
/// @param iPhase Active rung (or phase).
void Main::buildTree(int iPhase)
{
#ifdef PUSH_GRAVITY
bool bDoPush = param.dFracPushParticles*nTotalParticles > nActiveGrav;
if(bDoPush) CkPrintf("[main] fracActive %f PUSH_GRAVITY\n", 1.0*nActiveGrav/nTotalParticles);
#endif
CkPrintf("Building trees ... ");
double startTime = CkWallTimer();
#ifdef PUSH_GRAVITY
treeProxy.buildTree(bucketSize, CkCallbackResumeThread(),!bDoPush);
#else
treeProxy.buildTree(bucketSize, CkCallbackResumeThread());
#endif
double tTB = CkWallTimer()-startTime;
timings[iPhase].tTBuild += tTB;
CkPrintf("took %g seconds.\n", tTB);
}
/// @brief Routine to start self gravity; if gravity is not being
/// calculated, then clear the accelerations.
/// @param cbGravity Callback if we overlapping gravity with SPH.
/// @param iActiveRung Rung (and higher) on which to calculate forces.
/// @param pointer to start time
void Main::startGravity(const CkCallback& cbGravity, int iActiveRung,
double *startTime)
{
if(param.bDoGravity) {
#ifdef PUSH_GRAVITY
bool bDoPush = param.dFracPushParticles*nTotalParticles > nActiveGrav;
#endif
updateSoft();
if(param.csm->bComove) {
double a = csmTime2Exp(param.csm,dTime);
if (a >= param.daSwitchTheta) theta = param.dTheta2;
}
/******** Force Computation ********/
#ifdef SELECTIVE_TRACING
turnProjectionsOn(iActiveRung);
#endif
CkPrintf("Calculating gravity (tree bucket, theta = %f) ... ", theta);
*startTime = CkWallTimer();
if(param.bConcurrentSph) {
#ifdef PUSH_GRAVITY
if(bDoPush){
treeProxy.startPushGravity(iActiveRung, theta);
}
else{
#endif
treeProxy.startGravity(iActiveRung, theta, cbGravity);
#ifdef PUSH_GRAVITY
}
#endif
#ifdef HAPI_INSTRUMENT_WRS
// XXX this is probably broken (cbGravity gets called too soon.)
dMProxy.clearInstrument(cbGravity);
#endif
}
else {
#ifdef PUSH_GRAVITY
if(bDoPush){
treeProxy.startPushGravity(iActiveRung, theta);
CkWaitQD();
}
else{
#endif
treeProxy.startGravity(iActiveRung, theta, CkCallbackResumeThread());
#ifdef PUSH_GRAVITY
}
#endif
#ifdef HAPI_INSTRUMENT_WRS
dMProxy.clearInstrument(CkCallbackResumeThread());
#endif
double tGrav = CkWallTimer() - *startTime;
timings[iActiveRung].tGrav += tGrav;
CkPrintf("took %g seconds\n", tGrav);
#ifdef SELECTIVE_TRACING
turnProjectionsOff();
#endif
if(verbosity)
memoryStatsCache();
}
}
else {
*startTime = CkWallTimer();
treeProxy.initAccel(iActiveRung, CkCallbackResumeThread());
#ifdef CUDA
// We didn't do gravity where the registered TreePieces on the
// DataManager normally get cleared. Clear them here instead.
dMProxy.clearRegisteredPieces(CkCallbackResumeThread());
#endif
}
}
/// @brief Apply external gravitational field.
/// @param iActiveRung Rung on which to apply forces.
void Main::externalGravity(int iActiveRung)
{
CkReductionMsg *msgFrameAcc;
treeProxy.externalGravity(iActiveRung, param.externalGravity,
CkCallbackResumeThread((void*&)msgFrameAcc));
// External gravity may accelerate the coordinate frame
// (e.g. heliocentric coordinates.) Retrieve any such acceleration
// and apply it to the coordinate frame.
double *frameAcc = (double *)msgFrameAcc->getData();
Vector3D<double> frameAccVec(frameAcc[0], frameAcc[1], frameAcc[2]);
treeProxy.applyFrameAcc(iActiveRung, frameAccVec, CkCallbackResumeThread());
delete msgFrameAcc;
}
/// @brief Update time derivative of thermal energy
/// @param iActiveRung Rung (and higher) which to update
/// @param duKick Array of timesteps per rung
/// @param dStartTime Array of beginning times (simulation units) per rung
/// @param bUpdateState Whether to update the ionization fractions
/// @param bAll Whether to update all rungs below iActiveRung
void Main::updateuDot(int iActiveRung, const double duKick[],
const double dStartTime[], int bUpdateState, int bAll)
{
double startTime = CkWallTimer();
if(verbosity)
CkPrintf("uDot update: Rung %d ... ", iActiveRung);
double z = 1.0/csmTime2Exp(param.csm,dTime) - 1.0;
double a = csmTime2Exp(param.csm,dTime);
if(param.bGasCooling)
dMProxy.CoolingSetTime(z, dTime, CkCallbackResumeThread());
treeProxy.updateuDot(iActiveRung, duKick, dStartTime,
param.bGasCooling, bUpdateState, bAll,
(param.dConstGamma-1),
param.dResolveJeans/a,
CkCallbackResumeThread());
double tuDot = CkWallTimer() - startTime;
timings[iActiveRung].tuDot += tuDot;
if(verbosity)
CkPrintf("took %g seconds.\n", tuDot);
}
/// @brief Update velocities
/// @param bClosing Is this a closing kick?
/// @param iActiveRung Rung (and higher) which to update
/// @param nextMaxRung Rung of smallest timestep to be taken
/// @param cbGravity Callback to wait for gravity (close only)
/// @param gravStartTime Timing start for gravity
///
/// Based on bClosing the velocities of particles on rung iActiveRung
/// through nextMaxRung are either moved to a 1/2 step based
/// on their rung (open) or moved from the 1/2 step to the end of the
/// step (close).
///
void Main::kick(bool bClosing, int iActiveRung, int nextMaxRung,
const CkCallback &cbGravity, double gravStartTime)
{
if(verbosity) {
if(bClosing)
CkPrintf("Kick Close:\n");
else
CkPrintf("Kick Open:\n");
}
double dKickFac[MAXRUNG+1];
double duKick[MAXRUNG+1];
double dStartTime[MAXRUNG+1];
for (int iRung=0; iRung<=MAXRUNG; iRung++) {
duKick[iRung]=dKickFac[iRung]=0;
}
for(int iRung = iActiveRung; iRung <= nextMaxRung; iRung++) {
double dTimeSub = RungToDt(param.dDelta, iRung);
if(verbosity) {
CkPrintf(" Rung %d: %g\n", iRung, 0.5*dTimeSub);
}
duKick[iRung] = 0.5*dTimeSub;
if(bClosing) {
dStartTime[iRung] = dTime - 0.5*dTimeSub;
dKickFac[iRung] = csmComoveKickFac(param.csm, dTime - 0.5*dTimeSub,
0.5*dTimeSub);
} else {
dStartTime[iRung] = dTime;
dKickFac[iRung] = csmComoveKickFac(param.csm, dTime, 0.5*dTimeSub);
}
}
if(param.bDoGas) {
updateuDot(iActiveRung, duKick, dStartTime, 1, 1);
}
if(bClosing) // For a closing kick, gravity may not have finished
waitForGravity(cbGravity, gravStartTime, iActiveRung);
double a = csmTime2Exp(param.csm,dTime);
double startTime = CkWallTimer();
treeProxy.kick(iActiveRung, dKickFac, bClosing, param.bDoGas,
param.bGasIsothermal, param.dMaxEnergy, duKick,
(param.dConstGamma-1), param.dThermalCondSatCoeff/a,
param.feedback->dMultiPhaseMaxTime,
param.feedback->dMultiPhaseMinTemp,
param.dEvapCoeffCode*a, CkCallbackResumeThread());
double tKick = CkWallTimer() - startTime;
timings[iActiveRung].tKick += tKick;
if(verbosity)
CkPrintf("Kick took %g seconds.\n", tKick);
if(bClosing) {
// 1/2 step uDot update to end of step
if(iActiveRung > 0 && param.bDoGas) {
duKick[iActiveRung-1] = 0.5*RungToDt(param.dDelta, iActiveRung-1);
dStartTime[iActiveRung-1] = dTime;
updateuDot(iActiveRung-1, duKick, dStartTime, 0, 0);
}
}
}
///
/// @brief Take one base timestep of the simulation.
/// @param iStep The current step number.
///
/// This method implements the standard "Kick Drift Kick" (Quinn et al
/// 1997) hierarchical timestepping algorithm. It assumes that the
/// forces for the first opening kick have already been calculated.
///
void Main::advanceBigStep(int iStep) {
int currentStep = 0; // the current timestep within the big step
int activeRung = 0; // the minimum rung that is active
int nextMaxRung = 0; // the rung that determines the smallest time for advancing
while (currentStep < MAXSUBSTEPS) {
if(!param.bStaticTest) {
CkAssert(param.dDelta != 0.0);
timings[activeRung].count++;
emergencyAdjust(activeRung);
// Find new rung for active particles
nextMaxRung = adjust(activeRung);
if((param.bStarForm || param.bFeedback)
&& param.stfm->iStarFormRung > nextMaxRung)
nextMaxRung = param.stfm->iStarFormRung; // Force stepping at star
// formation/feedback
// interval.
if(currentStep == 0) rungStats();
if(verbosity) ckout << "MaxRung: " << nextMaxRung << endl;
CkAssert(nextMaxRung <= MAXRUNG); // timesteps WAY too short
if(verbosity)
memoryStats();
// Opening Kick
CkCallback cbNull(CkCallback::invalid); // Nothing to wait for
// in opening Kick
kick(false, activeRung, nextMaxRung, cbNull, 0.0);
if(verbosity > 1)
memoryStats();
// Dump frame may require a smaller step
int driftRung = nextMaxRungIncDF(nextMaxRung);
// Get number of drift steps from driftRung
int driftSteps = 1;
while(driftRung > nextMaxRung) {
driftRung--;
driftSteps <<= 1;
}
driftRung = nextMaxRungIncDF(nextMaxRung);
if(param.bDoGas && (driftRung == nextMaxRung)) {
driftRung++;
driftSteps <<=1;
}
double dTimeSub = RungToDt(param.dDelta, driftRung);
// Drift of smallest step
for(int iSub = 0; iSub < driftSteps; iSub++)
{
if(verbosity)
CkPrintf("Drift: Rung %d Delta %g\n", driftRung, dTimeSub);
double startTime = CkWallTimer();
// Only effective if growmass parameters have been set.
growMass(dTime, dTimeSub);
// Are the GrowMass particles locked in place?
int nGrowMassDrift = param.nGrowMass;
if(param.bDynGrowMass) nGrowMassDrift = 0;
double dDriftFac = csmComoveDriftFac(param.csm, dTime, dTimeSub);
double dKickFac = csmComoveKickFac(param.csm, dTime, dTimeSub);
bool bBuildTree = (iSub + 1 == driftSteps);
treeProxy.drift(dDriftFac, param.bDoGas, param.bGasIsothermal,
dKickFac, dTimeSub, nGrowMassDrift, bBuildTree,
param.dMaxEnergy,
CkCallbackResumeThread());
double tDrift = CkWallTimer() - startTime;
timings[activeRung].tDrift += tDrift;
if(verbosity)
CkPrintf("Drift took %g seconds.\n", tDrift);
// Advance time to end of smallest step
dTime += dTimeSub;
currentStep += RungToSubsteps(driftRung);
/*
** Dump Frame
*/
double dStep = iStep + ((double) currentStep)/MAXSUBSTEPS;
if (param.dDumpFrameTime > 0 && dTime >= df[0]->dTime)
DumpFrame(dTime, dStep );
else if(param.dDumpFrameStep > 0 && dStep >= df[0]->dStep)
DumpFrame(dTime, dStep );
// Update uDot at the 1/2 way point.
if(param.bDoGas && (iSub+1 == driftSteps >> 1)) {
double duKick[MAXRUNG+1] = {};
double dStartTime[MAXRUNG+1] = {};
duKick[nextMaxRung] = 0.5*RungToDt(param.dDelta, nextMaxRung);
dStartTime[nextMaxRung] = dTime; // only updating this rung
updateuDot(nextMaxRung, duKick, dStartTime, 0, 0);
}
}
}
// determine largest timestep that needs a kick
activeRung = 0;
int tmpRung = currentStep;
while (tmpRung &= ~MAXSUBSTEPS) {
activeRung++;
tmpRung <<= 1;
}
/*
* Form stars at user defined intervals
*/
if((param.bStarForm || param.bFeedback)
&& param.stfm->isStarFormRung(activeRung)) {
timings[PHASE_FEEDBACK].count++;
domainDecomp(PHASE_FEEDBACK);
loadBalance(PHASE_FEEDBACK);
if(param.bStarForm)
FormStars(dTime, param.stfm->dDeltaStarForm);
if(param.bFeedback)
StellarFeedback(dTime, param.stfm->dDeltaStarForm);
}
if(param.sinks.bDoSinks) {
CkReductionMsg *msgCnt;
treeProxy.countType(TYPE_SINK,
CkCallbackResumeThread((void *&)msgCnt));
nSink = *(int *) msgCnt->getData();
delete msgCnt;
if(nSink != 0)
CkPrintf("Sink number of Sinks: nSink = %ld\n", nSink);
}
ckout << "\nStep: " << (iStep + ((double) currentStep)/MAXSUBSTEPS)
<< " Time: " << dTime
<< " Rungs " << activeRung << " to "
<< nextMaxRung << ". ";
countActive(activeRung);
if(verbosity > 1)
memoryStats();
/***** Resorting of particles and Domain Decomposition *****/
domainDecomp(activeRung);
if(verbosity > 1)
memoryStats();
/********* Load balancer ********/
loadBalance(activeRung);
if(verbosity > 1)
memoryStats();
/******** Tree Build *******/
buildTree(activeRung);
CkCallback cbGravity(CkCallback::resumeThread);
#ifdef COOLING_MOLECULARH
// XXX should this go inside buildTree()?
treeProxy.distribLymanWerner(CkCallbackResumeThread());
#endif /*COOLING_MOLECULARH*/
if(verbosity > 1)
memoryStats();
double gravStartTime;
startGravity(cbGravity, activeRung, &gravStartTime);
if(param.bDoExternalGravity)
externalGravity(activeRung);
if(verbosity > 1)
memoryStats();
if(param.bDoGas) {
doSph(activeRung);
if(verbosity && !param.bConcurrentSph)
memoryStatsCache();
}
if(!param.bStaticTest) {
// Closing Kick
kick(true, activeRung, nextMaxRung, cbGravity, gravStartTime);
//SIDM needs to check that it is on on active rung?
if (activeRung == 0 ) {
doSIDM(dTime,RungToDt(param.dDelta, activeRung), activeRung);
}
doSinks(dTime, RungToDt(param.dDelta, activeRung), activeRung);
}
else
waitForGravity(cbGravity, gravStartTime, activeRung);
#if COSMO_STATS > 0
/********* TreePiece Statistics ********/
ckerr << "Total statistics iteration " << iStep << "/";
int printingStep = currentStep;
while (printingStep) {
ckerr << ((printingStep & MAXSUBSTEPS) ? "1" : "0");
printingStep = (printingStep & ~MAXSUBSTEPS) << 1;
}
//ckerr << " (rungs " << activeRung << "-" << nextMaxRung << ")" << ":" << endl;
CkReductionMsg *tps;
treeProxy.collectStatistics(CkCallbackResumeThread((void*&)tps));
((TreePieceStatistics*)tps->getData())->printTo(ckerr);
/********* Cache Statistics ********/
CkReductionMsg *cs;
cacheNode.collectStatistics(CkCallbackResumeThread((void*&)cs));
((CkCacheStatistics*)cs->getData())->printTo(ckerr);
cacheGravPart.collectStatistics(CkCallbackResumeThread((void*&)cs));
((CkCacheStatistics*)cs->getData())->printTo(ckerr);
cacheSmoothPart.collectStatistics(CkCallbackResumeThread((void*&)cs));
((CkCacheStatistics*)cs->getData())->printTo(ckerr);
#endif
if(param.feedback->bAGORAFeedback) {
// Reduce the time step size for AGORA feedback-receiving particles
int SFsubsteps = RungToSubsteps(activeRung);
int lastSFstep = ~(SFsubsteps - 1) & currentStep;
int nextSFstep = lastSFstep + SFsubsteps;
double dTimeToSF = (nextSFstep - currentStep)*(param.dDelta/MAXSUBSTEPS);
double dTimeSub = RungToDt(param.dDelta, activeRung);
if (dTimeToSF <= dTimeSub) {
if (verbosity) {
CkPrintf("AGORA feedback event occuring in next timestep...running pre-check\n");
}
AGORAfeedbackPreCheck(dTime+dTimeSub, dTimeSub, dTimeToSF);
}
}
double startTime = CkWallTimer();
treeProxy.finishNodeCache(CkCallbackResumeThread());
double tCache = CkWallTimer() - startTime;
timings[activeRung].tCache += tCache;
if(verbosity)
CkPrintf("Finish NodeCache took %g seconds.\n", tCache);
#ifdef CHECK_TIME_WITHIN_BIGSTEP
if(param.iWallRunTime > 0 && ((CkWallTimer()-wallTimeStart) > param.iWallRunTime*60.)){
CkPrintf("wall time %g exceeded limit %g within advancestep\n", CkWallTimer()-wallTimeStart, param.iWallRunTime*60.);
CkExit();
}
#endif
}
}
///
/// @brief Load particles into pieces
///
/// Reads the particle data in from a file. Since the full
/// information about the run (including starting time) isn't known
/// until the particles are loaded, this routine also completes the
/// specification of the run details and writes out the log file
/// entry. It concludes by calling initialForces()
void Main::setupICs() {
double startTime;
treeProxy.setPeriodic(param.nReplicas, param.vPeriod, param.bEwald,
param.dEwCut, param.dEwhCut, param.bPeriodic,
param.csm->bComove,
0.5*param.csm->dHubble0*param.csm->dHubble0*param.csm->dOmega0);
/******** Particles Loading ********/
CkPrintf("Loading particles ...");
startTime = CkWallTimer();
try {
struct stat s;
int err = stat(basefilename.c_str(), &s);
if(err == -1) {
char *achErr = strerror(errno);
ckerr << "File error: " << basefilename.c_str() << ": " << achErr
<< endl;
CkExit();
return;
}
double dTuFac = param.dGasConst/(param.dConstGamma-1)/param.dMeanMolWeight;
if(S_ISDIR(s.st_mode)) {
// Assume its an NChilada directory
treeProxy.loadNChilada(basefilename, dTuFac,
CkCallbackResumeThread());
}
else {
// Assume Tipsy format
// Try loading Tipsy format; first just grab the header.
CkPrintf(" trying Tipsy ...");
bool bInDPos = false;
bool bInDVel = false;
Tipsy::PartialTipsyFile ptf(basefilename, 0, 0);
if(!ptf.loadedSuccessfully()) {
ckerr << endl << "Couldn't load the tipsy file \""
<< basefilename.c_str()
<< "\". Maybe it's not a tipsy file?" << endl;
CkExit();
return;
}
bInDPos = ptf.isDoublePos();
bInDVel = ptf.isDoubleVel();
if(bInDPos) CkPrintf("assumed double positions...");
if(bInDVel) CkPrintf("assumed double velocities...");
treeProxy.loadTipsy(basefilename, dTuFac, bInDPos, bInDVel,
CkCallbackResumeThread());
}
}
catch (std::ios_base::failure e) {
ckerr << "File read: " << basefilename.c_str() << ": " << e.what()
<< endl;
CkExit();
return;
}
ckout << " took " << (CkWallTimer() - startTime) << " seconds."
<< endl;
nTotalParticles = treeProxy[0].ckLocal()->nTotalParticles;
nTotalSPH = treeProxy[0].ckLocal()->nTotalSPH;
nTotalDark = treeProxy[0].ckLocal()->nTotalDark;
nTotalStar = treeProxy[0].ckLocal()->nTotalStar;
nMaxOrderGas = nTotalSPH - 1;
nMaxOrderDark = nTotalSPH + nTotalDark - 1;
nMaxOrder = nTotalParticles - 1;
#ifdef SPLITGAS
nMaxOrder += nTotalSPH;
nMaxOrderDark += nTotalSPH;
#endif
nActiveGrav = nTotalParticles;
nActiveSPH = nTotalSPH;
ckout << "N: " << nTotalParticles << endl;
if(particlesPerChare == 0)
particlesPerChare = nTotalParticles/numTreePieces;
if(nTotalSPH == 0 && param.bDoGas) {
ckerr << "WARNING: no SPH particles and bDoGas is set\n";
param.bDoGas = 0;
}
if(nTotalSPH > 0 && !param.bDoGas) {
if(prmSpecified(prm, "bDoGas"))
ckerr << "WARNING: SPH particles present and bDoGas is set off\n";
else {
param.bDoGas = 1;
if(!prmSpecified(prm, "bSphStep"))
param.bSphStep = 1;
if(!prmSpecified(prm, "bDtAdjust"))
param.bDtAdjust = 1;
}
}
getStartTime();
/* The following is used to help restart DumpFrame. */
dTime0 = dTime - param.dDelta*param.iStartStep;
if(param.nSteps > 0) getOutTimes();
for(iOut = 0; iOut < vdOutTime.size(); iOut++) {
if(dTime < vdOutTime[iOut]) break;
}
if(param.bGasCooling || param.bStarForm) {
initCooling();
if(param.iStartStep) restartGas();
}
if(param.iStartStep) bIsRestarting = true;
if(param.bStarForm || param.bFeedback) {
param.stfm->CheckParams(prm, param);
if(param.sinks.bBHSink)
param.sinks.dDeltaStarForm = param.stfm->dDeltaStarForm;
treeProxy.initRand(param.stfm->iRandomSeed, CkCallbackResumeThread());
}
if(param.bStarForm)
initStarLog();
if(param.bFeedback)
param.feedback->CheckParams(prm, param);
else
param.feedback->NullFeedback();
param.sinks.CheckParams(prm, param);
SetSink();
//SIDM
if (param.iSIDMSelect!=0){
CkAssert (prmSpecified(prm, "dMsolUnit") &&
prmSpecified(prm, "dKpcUnit"));
param.dSIDMSigma=param.dSIDMSigma*param.dMsolUnit*MSOLG/(param.dKpcUnit*KPCCM*param.dKpcUnit*KPCCM); //converts input cross section in cm^2 g^-1 to simulation units in len_unit^2 / mass_unit
if (param.iSIDMSelect==2){ //only for classical regime do this
param.dSIDMVariable=param.dSIDMVariable/(pow(param.dMsolUnit*MSOLG*GCGS/(KPCCM*param.dKpcUnit),.5)/100000.0) ; //converts from km/s to sim units
}
}
param.externalGravity.CheckParams(prm, param);
string achLogFileName = string(param.achOutName) + ".log";
ofstream ofsLog;
if(bIsRestarting)
ofsLog.open(achLogFileName.c_str(), ios_base::app);
else
ofsLog.open(achLogFileName.c_str(), ios_base::trunc);
CkMustAssert(bool(ofsLog), "Error opening log file.");
#define xstr(s) str(s)
#define str(s) #s
ofsLog << "# Starting ChaNGa version " << xstr(NBODY_PACKAGE_VERSION)
<< " commit " << Cha_CommitID << endl;
#undef str
#undef xstr
ofsLog << "#"; // Output command line
for (int i = 0; i < args->argc; i++)
ofsLog << " " << args->argv[i];
ofsLog << endl;
ofsLog << "# Running on " << CkNumPes() << " processors/ "
<< CkNumNodes() << " nodes" << endl;
#ifdef __DATE__
#ifdef __TIME__
ofsLog <<"# Code compiled: " << __DATE__ << " " << __TIME__ << endl;
#endif
#endif
ofsLog << "# Charm defines:";
#if CMK_ERROR_CHECKING
ofsLog << " CMK_ERROR_CHECKING";
#endif
#if CMK_WITH_STATS
ofsLog << " CMK_WITH_STATS";
#endif
#if CMK_TRACE_ENABLED
ofsLog << " CMK_TRACE_ENABLED";
#endif
#if CMK_CHARMDEBUG
ofsLog << " CMK_CHARMDEBUG";
#endif
ofsLog << endl << "# Preprocessor macros:";
#ifdef CMK_USE_SSE2
ofsLog << " CMK_USE_SSE2";
#endif
#ifdef CMK_USE_AVX
ofsLog << " CMK_USE_AVX";
#endif
#ifdef COSMO_FLOAT
ofsLog << " COSMO_FLOAT";
#endif
#ifdef CHANGESOFT
ofsLog << " CHANGESOFT";
#endif
#ifdef EPSACCH
ofsLog << " EPSACCH";
#endif
#ifdef COOLING_NONE
ofsLog << " COOLING_NONE";
#endif
#ifdef COOLING_COSMO
ofsLog << " COOLING_COSMO";
#endif
#ifdef COOLING_PLANET
ofsLog << " COOLING_PLANET";
#endif
#ifdef COOLING_METAL
ofsLog << " COOLING_METAL";
#endif
#ifdef NO_LOWT_METAL
ofsLog << " NO_LOWT_METAL";
#endif
#ifdef COOLING_MOLECULARH
ofsLog << " COOLING_MOLECULARH";
#endif
#ifdef COOLING_GRACKLE
ofsLog << " COOLING_GRACKLE";
#endif
#ifdef DIFFUSION
ofsLog << " DIFFUSION";
#endif
#ifdef NODIFFUSIONTHERMAL
ofsLog " NODIFFUSIONTHERMAL";
#endif
#ifdef DIFFUSIONHARMONIC
ofsLog << " DIFFUSIONHARMONIC";
#endif
#ifdef FEEDBACKDIFFLIMIT
ofsLog << " FEEDBACKDIFFLIMIT";
#endif
#ifdef GDFORCE
ofsLog << " GDFORCE";
#endif
#ifdef CULLENALPHA
ofsLog << " CULLENALPHA";
#endif
#ifdef VSIGVISC
ofsLog << " VSIGVISC";
#endif
#ifdef HEXADECAPOLE
ofsLog << " HEXADECAPOLE";
#endif
#ifdef INTERLIST_VER
ofsLog << " INTERLIST_VER:" << INTERLIST_VER;
#endif
#ifdef BIGKEYS
ofsLog << " BIGKEYS";
#endif
#ifdef DTADJUST
ofsLog << " DTADJUST";
#endif
#if WENDLAND == 1
ofsLog << " WENDLAND";
#endif
#if M4KERNEL == 1
ofsLog << " M4KERNEL";
#endif
#if M6KERNEL == 1
ofsLog << " M6KERNEL";
#endif
#ifdef SPLITGAS
ofsLog << " SPLITGAS";
#endif
#ifdef SUPERBUBBLE
ofsLog << " SUPERBUBBLE";
#endif
#ifdef JEANSSOFT
ofsLog << " JEANSSOFT";
#endif
#ifdef JEANSSOFTONLY
ofsLog << " JEANSSOFTONLY";
#endif
#ifdef DAMPING
ofsLog << " DAMPING";
#endif
ofsLog << endl;
ofsLog << "# Key sizes: " << sizeof(KeyType) << " bytes particle "
<< sizeof(NodeKey) << " bytes node" << endl;
// Print out load balance information
#ifdef LB_MANAGER_VERSION
LBManager *lbMgr = LBManagerObj();
#else
LBDatabase *lbMgr = LBDatabaseObj();
#endif
int nlbs = lbMgr->getNLoadBalancers();
if(nlbs == 0) {
ofsLog << "# No load balancer in use" << endl;
}
else {
int ilb;
BaseLB **lbs = lbMgr->getLoadBalancers();
ofsLog << "# Load balancers:";
for(ilb = 0; ilb < nlbs; ilb++){
ofsLog << " " << lbs[ilb]->lbName();
}
ofsLog << endl;
}
ofsLog.close();
prmLogParam(prm, achLogFileName.c_str());
ofsLog.open(achLogFileName.c_str(), ios_base::app);
if(param.bStarForm)
StarLog::logMetaData(ofsLog);
if(param.csm->bComove) {
ofsLog << "# RedOut:";
if(vdOutTime.size() == 0) ofsLog << " none";
for(int i = 0; i < vdOutTime.size(); i++) {
double z = 1.0/csmTime2Exp(param.csm, vdOutTime[i]) - 1.0;
ofsLog << " " << z;
}
}
else {
ofsLog << "# TimeOut:";
if(vdOutTime.size() == 0) ofsLog << " none";
for(int i = 0; i < vdOutTime.size(); i++) {
ofsLog << " " << vdOutTime[i];
}
}
ofsLog << endl;
ofsLog.close();
CkMustAssert(bool(ofsLog), "Error closing log file");
if(prmSpecified(prm,"dSoft")) {
ckout << "Set Softening...\n";
treeProxy.setSoft(param.dSoft, CkCallbackResumeThread());
if(param.dSoft <= 0.0 && param.bEpsAccStep) {
ckerr << "WARNING: bEpsAccStep does not work with dSoft == 0; disabling\n";
param.bEpsAccStep = 0;
}
}
// for periodic, puts all particles within the boundary
// Also assigns keys and sorts.
treeProxy.drift(0.0, 0, 0, 0.0, 0.0, 0, true, param.dMaxEnergy,
CkCallbackResumeThread());
initialForces();
}
int CheckForStop()
{
/*
** Checks for existence of STOPFILE in run directory. If
** found, the file is removed and the return status is set
** to 1, otherwise 0.
*/
FILE *fp = NULL;
const char *achFile = "STOP";
if ((fp = fopen(achFile,"r")) != NULL) {
(void) printf("User interrupt detected.\n");
(void) fclose(fp);
(void) unlink(achFile);
return 1;
}
return 0;
}
/// @brief Callback to restart simulation after a checkpoint or after writing
/// a checkpoint.
///
/// A restart looks like we've just finished writing a checkpoint. We
/// can tell the difference by the bIsRestarting flag set in the Main
/// CkMigrate constructor. In that case we do some simple parameter
/// parsing and go to InitialForces. Otherwise we return to the
/// doSimulation() loop.
void
Main::restart(CkCheckpointStatusMsg *msg)
{
if(bIsRestarting) {
dSimStartTime = CkWallTimer();
#define xstr(s) str(s)
#define str(s) #s
ckout << "ChaNGa version " << xstr(NBODY_PACKAGE_VERSION)
<< ", commit " << Cha_CommitID << endl;
#undef str
#undef xstr
ckout << "Restarting at " << param.iStartStep << endl;
string achLogFileName = string(param.achOutName) + ".log";
ofstream ofsLog(achLogFileName.c_str(), ios_base::app);
ofsLog << "# ReStarting ChaNGa commit " << Cha_CommitID << endl;
ofsLog << "#";
for (int i = 0; i < CmiGetArgc(args->argv); i++)
ofsLog << " " << args->argv[i];
ofsLog << endl;
ofsLog << "# Running on " << CkNumPes() << " processors/ "
<< CkNumNodes() << " nodes" << endl;
ofsLog.close();
/*
* Parse command line parameters
*/
prmInitialize(&prm,_Leader,_Trailer);
/*
* Add a subset of the parameters.
*/
prmAddParam(prm, "dDelta", paramDouble, ¶m.dDelta,
sizeof(double),"dt", "Base Timestep for integration");
prmAddParam(prm, "nSteps", paramInt, ¶m.nSteps,
sizeof(int),"n", "Number of Timesteps");
prmAddParam(prm,"iWallRunTime",paramInt,¶m.iWallRunTime,
sizeof(int),"wall",
"<Maximum Wallclock time (in minutes) to run> = 0 = infinite");
prmAddParam(prm, "killAt", paramInt, &killAt,
sizeof(int),"killat", "Stop the simulation after this step");
prmAddParam(prm, "dTheta", paramDouble, ¶m.dTheta,
sizeof(double), "theta", "Opening angle");
prmAddParam(prm, "dTheta2", paramDouble, ¶m.dTheta2,
sizeof(double),"theta2",
"Opening angle after switchTheta");
prmAddParam(prm, "dEta", paramDouble, ¶m.dEta,
sizeof(double),"eta", "Time integration accuracy");
prmAddParam(prm,"dEtaCourant",paramDouble,¶m.dEtaCourant,
sizeof(double),"etaC", "<Courant criterion> = 0.4");
prmAddParam(prm,"dEtauDot",paramDouble,¶m.dEtauDot,
sizeof(double),"etau", "<uDot criterion> = 0.25");
prmAddParam(prm,"dEtaDiffusion",paramDouble,¶m.dEtaDiffusion,sizeof(double),
"etadiff", "<Diffusion dt criterion> = 0.1");
prmAddParam(prm,"nIOProcessor",paramInt,¶m.nIOProcessor,
sizeof(int), "npio",
"number of simultaneous I/O processors = 0 (all)");
prmAddParam(prm, "nBucket", paramInt, &bucketSize,
sizeof(int),"b", "Particles per Bucket (default: 12)");
prmAddParam(prm, "nCacheDepth", paramInt, ¶m.cacheLineDepth,
sizeof(int),"d", "Cache Line Depth (default: 4)");
prmAddParam(prm, "bConcurrentSph", paramBool, ¶m.bConcurrentSph,
sizeof(int),"consph",
"Enable SPH running concurrently with Gravity");
prmAddParam(prm, "bFastGas", paramBool, ¶m.bFastGas,
sizeof(int),"Fgas", "Fast Gas Method");
prmAddParam(prm,"dFracFastGas",paramDouble,¶m.dFracFastGas,
sizeof(double),"ffg",
"<Fraction of Active Particles for Fast Gas>");
prmAddParam(prm,"ddHonHLimit",paramDouble,¶m.ddHonHLimit,
sizeof(double),"dhonh", "<|dH|/H Limiter> = 0.1");
prmAddParam(prm, "iOutInterval", paramInt, ¶m.iOutInterval,
sizeof(int),"oi", "Output Interval");
prmAddParam(prm, "iBinaryOutput", paramInt, ¶m.iBinaryOut,
sizeof(int), "binout",
"<array outputs 0 ascii, 1 float, 2 double, 3 FLOAT(internal)> = 0");
prmAddParam(prm, "iCheckInterval", paramInt, ¶m.iCheckInterval,
sizeof(int),"oc", "Checkpoint Interval");
prmAddParam(prm, "iVerbosity", paramInt, ¶m.iVerbosity,
sizeof(int),"v", "Verbosity");
prmAddParam(prm,"dExtraStore",paramDouble,¶m.dExtraStore,
sizeof(double), "estore",
"Extra memory for new particles");
prmAddParam(prm,"dMaxBalance",paramDouble,¶m.dMaxBalance,
sizeof(double), "maxbal",
"Maximum piece ratio for load balancing");
prmAddParam(prm,"dFracLoadBalance",paramDouble,¶m.dFracLoadBalance,
sizeof(double), "fraclb",
"Minimum active particles for load balancing");
prmAddParam(prm, "dFracNoDomainDecomp", paramDouble,
¶m.dFracNoDomainDecomp, sizeof(double),"fndd",
"Fraction of active particles for no new DD = 0.0");
prmAddParam(prm, "bUseCkLoopPar", paramBool, ¶m.bUseCkLoopPar, sizeof(int),
"useckloop", "enable CkLoop to parallelize within node");
int processSimfile = 0;
if(!prmArgProc(prm,CmiGetArgc(args->argv),args->argv,processSimfile)) {
CkExit();
}
dMProxy.resetReadOnly(param, CkCallbackResumeThread());
if (bUseCkLoopPar) {
CkPrintf("Using CkLoop %d\n", param.bUseCkLoopPar);
} else {
CkPrintf("Not Using CkLoop %d\n", param.bUseCkLoopPar);
}
treeProxy.drift(0.0, 0, 0, 0.0, 0.0, 0, true, param.dMaxEnergy,
CkCallbackResumeThread());
if(param.bGasCooling || param.bStarForm)
initCooling();
if(param.bStarForm)
initStarLog();
if(param.bStarForm || param.bFeedback)
treeProxy.initRand(param.stfm->iRandomSeed, CkCallbackResumeThread());
DumpFrameInit(dTime0, 0.0, bIsRestarting);
timings.resize(PHASE_FEEDBACK+1);
nActiveGrav = nTotalParticles;
nActiveSPH = nTotalSPH;
treeProxy.resetObjectLoad(CkCallbackResumeThread());
/***** Initial sorting of particles and Domain Decomposition *****/
CkPrintf("Initial ");
domainDecomp(0);
doSimulation();
}
else {
CkMustAssert(msg->status == CK_CHECKPOINT_SUCCESS, "Checkpoint failed! Is there a disk problem?\n");
ofstream ofsCheck("lastcheckpoint", ios_base::trunc);
ofsCheck << bChkFirst << endl;
if(iStop)
CkExit();
else
mainChare.doSimulation();
}
delete msg;
}
///
/// @brief Initial calculation of forces.
///
/// This is called both when starting or restarting a run. It
/// concludes by calling doSimulation(), the main simulation loop.
///
void
Main::initialForces()
{
double startTime = CkWallTimer();
timings.resize(PHASE_FEEDBACK+1);
// DEBUGGING
// CkStartQD(CkCallback(CkIndex_TreePiece::quiescence(),treeProxy));
/***** Initial sorting of particles and Domain Decomposition *****/
CkPrintf("Initial ");
domainDecomp(0);
#ifdef PUSH_GRAVITY
treeProxy.findTotalMass(CkCallbackResumeThread());
#endif
// Balance load initially after decomposition
loadBalance(-1);
/******** Tree Build *******/
buildTree(0);
if(verbosity)
memoryStats();
#ifdef CUDA
ckout << "Init. Accel. ...";
double dInitAccelTime = CkWallTimer();
treeProxy.initAccel(0, CkCallbackResumeThread());
ckout << " took " << (CkWallTimer() - dInitAccelTime) << " seconds."
<< endl;
#endif
CkCallback cbGravity(CkCallback::resumeThread); // needed below to wait for gravity
double gravStartTime;
startGravity(cbGravity, 0, &gravStartTime);
if(param.bDoExternalGravity)
externalGravity(0);
if(param.bDoGas) {
// Get star center of mass
starCenterOfMass();
// Initialize SPH
initSph();
}
waitForGravity(cbGravity, gravStartTime, 0);
if (param.sinks.bDoSinksAtStart) doSinks(dTime, 0.0, 0);
treeProxy.finishNodeCache(CkCallbackResumeThread());
// Initial Log entry
string achLogFileName = string(param.achOutName) + ".log";
calcEnergy(dTime, CkWallTimer() - startTime, achLogFileName.c_str());
#if COSMO_STATS > 0
/********* TreePiece Statistics ********/
ckerr << "Total statistics initial iteration :" << endl;
CkReductionMsg *tps;
treeProxy.collectStatistics(CkCallbackResumeThread((void*&)tps));
((TreePieceStatistics*)tps->getData())->printTo(ckerr);
/********* Cache Statistics ********/
CkReductionMsg *cs;
cacheNode.collectStatistics(CkCallbackResumeThread((void*&)cs));
((CkCacheStatistics*)cs->getData())->printTo(ckerr);
cacheGravPart.collectStatistics(CkCallbackResumeThread((void*&)cs));
((CkCacheStatistics*)cs->getData())->printTo(ckerr);
cacheSmoothPart.collectStatistics(CkCallbackResumeThread((void*&)cs));
((CkCacheStatistics*)cs->getData())->printTo(ckerr);
#endif
/*
** Dump Frame Initialization
*/
DumpFrameInit(dTime0, 0.0, param.iStartStep > 0);
if (param.bLiveViz > 0) {
ckout << "Initializing liveViz module..." << endl;
/* Initialize the liveViz module */
liveVizConfig lvConfig(true,true);
int funcIndex = CkIndex_TreePiece::liveVizDumpFrameInit(NULL);
CkCallback* lvcb = new CkCallback(funcIndex, treeProxy);
liveVizInit(lvConfig, treeProxy, *lvcb);
}
doSimulation();
}
int safeMkdir(const char *achFile);
///
/// \brief Principal method which does all the coordination of the
/// simulation over timesteps.
///
/// This routine calls advanceBigStep() for each step, logs
/// statistics, determines if output is needed, and halts the
/// simulation when done.
///
void
Main::doSimulation()
{
double startTime;
string achLogFileName = string(param.achOutName) + ".log";
#ifdef CHECK_TIME_WITHIN_BIGSTEP
wallTimeStart = CkWallTimer();
#endif
for(int iStep = param.iStartStep+1; iStep <= param.nSteps; iStep++){
if (killAt > 0 && killAt == iStep) {
ckout << "KillAT: Stopping after " << (CkWallTimer()-dSimStartTime) << " seconds\n";
break;
}
if (verbosity) ckout << "Starting big step " << iStep << endl;
startTime = CkWallTimer();
starCenterOfMass();
for(int iRung = 0; iRung < timings.size(); iRung++) {
timings[iRung].clear();
}
advanceBigStep(iStep-1);
double stepTime = CkWallTimer() - startTime;
ckout << "Big step " << iStep << " took " << stepTime << " seconds."
<< endl;
writeTimings(iStep);
if(iStep%param.iOrbitOutInterval == 0) {
outputBlackHoles(dTime);
}
if(iStep%param.iLogInterval == 0) {
calcEnergy(dTime, stepTime, achLogFileName.c_str());
}
iStop = CheckForStop();
/*
* Writing of intermediate outputs can be done here.
*/
if((param.bBenchmark == 0)
&& (bOutTime() || iStep == param.nSteps || iStop
|| iStep%param.iOutInterval == 0)) {
writeOutput(iStep);
treeProxy[0].flushStarLog(CkCallbackResumeThread());
}
if(!iStop && param.iWallRunTime > 0) {
if (param.iWallRunTime*60. - (CkWallTimer()-dSimStartTime)
< stepTime*1.5) {
ckout << "RunTime limit exceeded. Writing checkpoint and exiting.\n";
ckout << " iWallRunTime(sec): " << param.iWallRunTime*60
<< " Time running: " << CkWallTimer() - dSimStartTime
<< " Last step: " << stepTime << endl;
iStop = 1;
}
}
if((param.bBenchmark == 0)
&& (iStop || (param.iCheckInterval && iStep%param.iCheckInterval == 0))) {
string achCheckFileName(param.achOutName);
if(bChkFirst) {
achCheckFileName += ".chk0";
bChkFirst = 0;
}
else {
achCheckFileName += ".chk1";
bChkFirst = 1;
}
// The following drift is called because it deletes the tree
// so it won't be saved on disk.
treeProxy.drift(0.0, 0, 0, 0.0, 0.0, 0, false, param.dMaxEnergy,
CkCallbackResumeThread());
treeProxy[0].flushStarLog(CkCallbackResumeThread());
param.iStartStep = iStep; // update so that restart continues on
bIsRestarting = 0;
CkCallback cb(CkIndex_Main::restart(0), mainChare);
CkStartCheckpoint(achCheckFileName.c_str(), cb, true);
return;
}
if (iStop) break;
} // End of main computation loop
/******** Shutdown process ********/
if(param.nSteps == 0 && param.bBenchmark == 0) {
string achFile = string(param.achOutName) + ".000000";
// assign each particle its domain for diagnostic.
treeProxy.assignDomain(CkCallbackResumeThread());
if((!param.bDoGas) && param.bDoDensity) {
// If gas isn't being calculated, we can do the total
// densities before we start the output.
ckout << "Calculating total densities ...";
DensitySmoothParams pDen(TYPE_GAS|TYPE_DARK|TYPE_STAR, 0);
startTime = CkWallTimer();
double dfBall2OverSoft2 = 0.0;
treeProxy.startSmooth(&pDen, 1, param.nSmooth, dfBall2OverSoft2,
CkCallbackResumeThread());
ckout << " took " << (CkWallTimer() - startTime) << " seconds."
<< endl;
#if 0
// For testing concurrent Sph, we don't want to do the
// resmooth.
ckout << "Recalculating densities ...";
startTime = CkWallTimer();
treeProxy.startIterationReSmooth(&pDen, CkCallbackResumeThread());
ckout << " took " << (CkWallTimer() - startTime) << " seconds." << endl;
#endif
treeProxy.finishNodeCache(CkCallbackResumeThread());
ckout << "Reordering ...";
startTime = CkWallTimer();
treeProxy.reOrder(nMaxOrder, CkCallbackResumeThread());
ckout << " took " << (CkWallTimer() - startTime) << " seconds." << endl;
if(param.iBinaryOut == 6) {
// Set up N-Chilada directory structure
CkMustAssert(safeMkdir(achFile.c_str()) == 0, "Can't create N-Chilada directories\n");
if(nTotalSPH > 0) {
string dirname(string(achFile) + "/gas");
CkMustAssert(safeMkdir(dirname.c_str()) == 0, "Can't create N-Chilada directories\n");
}
if(nTotalDark > 0) {
string dirname(string(achFile) + "/dark");
CkMustAssert(safeMkdir(dirname.c_str()) == 0, "Can't create N-Chilada directories\n");
}
if(nTotalStar > 0) {
string dirname(string(achFile) + "/star");
CkMustAssert(safeMkdir(dirname.c_str()) == 0, "Can't create N-Chilada directories\n");
}
}
ckout << "Outputting densities ...";
startTime = CkWallTimer();
DenOutputParams pDenOut(achFile, param.iBinaryOut, 0.0);
if(param.iBinaryOut)
outputBinary(pDenOut, param.bParaWrite, CkCallbackResumeThread());
else
treeProxy[0].outputASCII(pDenOut, param.bParaWrite, CkCallbackResumeThread());
ckout << " took " << (CkWallTimer() - startTime) << " seconds."
<< endl;
ckout << "Outputting hsmooth ...";
HsmOutputParams pHsmOut(achFile, param.iBinaryOut, 0.0);
if(param.iBinaryOut)
outputBinary(pHsmOut, param.bParaWrite, CkCallbackResumeThread());
else
treeProxy[0].outputASCII(pHsmOut, param.bParaWrite, CkCallbackResumeThread());
}
else {
treeProxy.reOrder(nMaxOrder, CkCallbackResumeThread());
}
writeOutput(0);
if(param.bDoGas) {
ckout << "Outputting gas properties ...";
#ifdef SUPERBUBBLE
/*
* Write out additional variables when using superbubble
* (multiphase properties and effective temperature)
*/
uHotOutputParams puHotOut(achFile, param.iBinaryOut, 0.0);
uOutputParams puOut(achFile, param.iBinaryOut, 0.0);
MassHotOutputParams pmHotOut(achFile, param.iBinaryOut, 0.0);
double dTuFac = param.dGasConst/(param.dConstGamma-1)/param.dMeanMolWeight;
TempEffOutputParams pTeffOut(achFile, param.iBinaryOut, 0.0, param.bGasCooling, dTuFac);
#endif
GasDenOutputParams pDenOut(achFile, param.iBinaryOut, 0.0);
PresOutputParams pPresOut(achFile, param.iBinaryOut, 0.0);
HsmOutputParams pSphHOut(achFile, param.iBinaryOut, 0.0);
DivVOutputParams pDivVOut(achFile, param.iBinaryOut, 0.0);
PDVOutputParams pPDVOut(achFile, param.iBinaryOut, 0.0);
MuMaxOutputParams pMuMaxOut(achFile, param.iBinaryOut, 0.0);
BSwOutputParams pBSwOut(achFile, param.iBinaryOut, 0.0);
CsOutputParams pCsOut(achFile, param.iBinaryOut, 0.0);
if (param.iBinaryOut) {
#ifdef SUPERBUBBLE
outputBinary(puHotOut, param.bParaWrite,
CkCallbackResumeThread());
outputBinary(puOut, param.bParaWrite,
CkCallbackResumeThread());
outputBinary(pmHotOut, param.bParaWrite,
CkCallbackResumeThread());
outputBinary(pTeffOut, param.bParaWrite,
CkCallbackResumeThread());
#endif
outputBinary(pPresOut, param.bParaWrite,
CkCallbackResumeThread());
outputBinary(pDivVOut, param.bParaWrite,
CkCallbackResumeThread());
outputBinary(pPDVOut, param.bParaWrite,
CkCallbackResumeThread());
outputBinary(pMuMaxOut, param.bParaWrite,
CkCallbackResumeThread());
outputBinary(pBSwOut, param.bParaWrite,
CkCallbackResumeThread());
outputBinary(pCsOut, param.bParaWrite,
CkCallbackResumeThread());
#ifndef COOLING_NONE
if(param.bGasCooling) {
EDotOutputParams pEDotOut(achFile, param.iBinaryOut, 0.0);
outputBinary(pEDotOut, param.bParaWrite,
CkCallbackResumeThread());
}
#endif
}
else {
#ifdef SUPERBUBBLE
treeProxy[0].outputASCII(pmHotOut, param.bParaWrite, CkCallbackResumeThread());
treeProxy[0].outputASCII(puHotOut, param.bParaWrite, CkCallbackResumeThread());
treeProxy[0].outputASCII(puOut, param.bParaWrite, CkCallbackResumeThread());
treeProxy[0].outputASCII(pTeffOut, param.bParaWrite,CkCallbackResumeThread());
#endif
treeProxy[0].outputASCII(pDenOut, param.bParaWrite, CkCallbackResumeThread());
treeProxy[0].outputASCII(pPresOut, param.bParaWrite, CkCallbackResumeThread());
treeProxy[0].outputASCII(pSphHOut, param.bParaWrite, CkCallbackResumeThread());
treeProxy[0].outputASCII(pDivVOut, param.bParaWrite, CkCallbackResumeThread());
treeProxy[0].outputASCII(pPDVOut, param.bParaWrite, CkCallbackResumeThread());
treeProxy[0].outputASCII(pMuMaxOut, param.bParaWrite, CkCallbackResumeThread());
treeProxy[0].outputASCII(pBSwOut, param.bParaWrite, CkCallbackResumeThread());
treeProxy[0].outputASCII(pCsOut, param.bParaWrite, CkCallbackResumeThread());
#ifndef COOLING_NONE
if(param.bGasCooling) {
EDotOutputParams pEDotOut(achFile, 0, 0.0);
treeProxy[0].outputASCII(pEDotOut, param.bParaWrite,
CkCallbackResumeThread());
}
#endif
}
}
ckout << "Outputting accelerations ...";
AccOutputParams pAcc(achFile, param.iBinaryOut, 0.0);
if(param.iBinaryOut)
outputBinary(pAcc, param.bParaWrite, CkCallbackResumeThread());
else
treeProxy[0].outputASCII(pAcc, param.bParaWrite, CkCallbackResumeThread());
#ifdef NEED_DT
ckout << "Outputting dt ...";
adjust(0);
DtOutputParams pDt(achFile, param.iBinaryOut, 0.0);
if(param.iBinaryOut)
outputBinary(pDt, param.bParaWrite, CkCallbackResumeThread());
else
treeProxy[0].outputASCII(pDt, param.bParaWrite, CkCallbackResumeThread());
#endif
RungOutputParams pRung(achFile, param.iBinaryOut, 0.0);
KeyOutputParams pKey(achFile, param.iBinaryOut, 0.0);
DomainOutputParams pDomain(achFile, param.iBinaryOut, 0.0);
if(param.iBinaryOut) {
outputBinary(pRung, param.bParaWrite, CkCallbackResumeThread());
outputBinary(pKey, param.bParaWrite, CkCallbackResumeThread());
outputBinary(pDomain, param.bParaWrite, CkCallbackResumeThread());
}
else {
treeProxy[0].outputASCII(pRung, param.bParaWrite, CkCallbackResumeThread());
treeProxy[0].outputASCII(pKey, param.bParaWrite, CkCallbackResumeThread());
treeProxy[0].outputASCII(pDomain, param.bParaWrite, CkCallbackResumeThread());
}
if(param.bDoGas && param.bDoDensity) {
// The following call is to get the particles in key order
// before the sort.
treeProxy.drift(0.0, 0, 0, 0.0, 0.0, 0, true, param.dMaxEnergy,
CkCallbackResumeThread());
domainDecomp(0);
buildTree(0);
ckout << "Calculating total densities ...";
DensitySmoothParams pDen(TYPE_GAS|TYPE_DARK|TYPE_STAR, 0);
startTime = CkWallTimer();
double dfBall2OverSoft2 = 0.0;
treeProxy.startSmooth(&pDen, 1, param.nSmooth, dfBall2OverSoft2,
CkCallbackResumeThread());
ckout << " took " << (CkWallTimer() - startTime) << " seconds."
<< endl;
ckout << "Recalculating densities ...";
startTime = CkWallTimer();
treeProxy.startReSmooth(&pDen, CkCallbackResumeThread());
ckout << " took " << (CkWallTimer() - startTime) << " seconds." << endl;
treeProxy.finishNodeCache(CkCallbackResumeThread());
ckout << "Reodering ...";
startTime = CkWallTimer();
treeProxy.reOrder(nMaxOrder, CkCallbackResumeThread());
ckout << " took " << (CkWallTimer() - startTime) << " seconds." << endl;
ckout << "Outputting densities ...";
startTime = CkWallTimer();
DenOutputParams pDenOut(achFile, param.iBinaryOut, 0.0);
if(param.iBinaryOut)
outputBinary(pDenOut, param.bParaWrite, CkCallbackResumeThread());
else
treeProxy[0].outputASCII(pDenOut, param.bParaWrite, CkCallbackResumeThread());
ckout << " took " << (CkWallTimer() - startTime) << " seconds."
<< endl;
ckout << "Outputting hsmooth ...";
HsmOutputParams pHsmOut(achFile, param.iBinaryOut, 0.0);
if(param.iBinaryOut)
outputBinary(pHsmOut, param.bParaWrite, CkCallbackResumeThread());
else
treeProxy[0].outputASCII(pHsmOut, param.bParaWrite, CkCallbackResumeThread());
}
}
treeProxy[0].flushStarLog(CkCallbackResumeThread());
#if COSMO_STATS > 0
ckerr << "Outputting statistics ...";
startTime = CkWallTimer();
//Interval<unsigned int> dummy;
treeProxy[0].outputStatistics(CkCallbackResumeThread());
ckerr << " took " << (CkWallTimer() - startTime) << " seconds." << endl;
#endif
ckout << "Done." << endl;
#ifdef HPM_COUNTER
cacheNode.stopHPM(CkCallbackResumeThread());
cacheGravPart.stopHPM(CkCallbackResumeThread());
cacheSmoothPart.stopHPM(CkCallbackResumeThread());
#endif
ckout << endl << "******************" << endl << endl;
// Some memory cleanup
// This is just for debugging memory problems, so comment it out for
// now to avoid tickling QD bugs.
// delete param.stfm;
// treeProxy.ckDestroy();
// CkWaitQD();
CkExit();
}
/**
* @brief Main::starCenterOfMass Calculates the total mass and center of mass
* of all the star particles and saves them to all the available COOL structs.
*
* Requires that cooling for planets be enabled at compile time.
*/
void Main::starCenterOfMass()
{
#ifndef COOLING_NONE
#ifdef COOLING_PLANET
CkReductionMsg *msg;
// Get the sums of (mass * pos) and (mass) of all the star particles
treeProxy.starCenterOfMass(CkCallbackResumeThread((void*&)msg));
double *dMassPos = (double *) msg->getData();
// Calculate the center of mass
double dCenterOfMass[4];
for (int i=0; i<3; ++i) {
dCenterOfMass[i] = dMassPos[i]/dMassPos[3];
}
// Record the total mass
dCenterOfMass[3] = dMassPos[3];
// Clean up
delete msg;
// Send center of mass to everyone
dMProxy.SetStarCM(dCenterOfMass, CkCallbackResumeThread());
#endif
#endif
}
///
/// @brief Write out the timing information
/// @param iStep Step number
///
void
Main::writeTimings(int iStep)
{
string achTimeFileName = string(param.achOutName) + ".timings";
timing_fields tTotal;
tTotal.clear();
FILE *fpTime = fopen(achTimeFileName.c_str(), "a");
CkAssert(fpTime != NULL);
fprintf(fpTime, "# Timings for step %d\n", iStep);
fprintf(fpTime, "# Rung Count Grav uDot DD LoadB TBuild Adjust EAdjust Kick Drift Cache\n");
for(int i = 0; i < timings.size(); i++) {
if(timings[i].count) {
if(i == PHASE_FEEDBACK) {
fprintf(fpTime, "# SF/Feedback: count StarForm, FeedB, DistFeedB, DD, LoadB, TBuild\n");
fprintf(fpTime, " %d %f %f %f %f %f %f\n", timings[i].count,
timings[i].tGrav, timings[i].tAdjust, timings[i].tuDot,
timings[i].tDD, timings[i].tLoadB, timings[i].tTBuild);
}
else {
fprintf(fpTime, " %d %d %f %f %f %f %f %f %f %f %f %f\n", i,
timings[i].count, timings[i].tGrav,
timings[i].tuDot, timings[i].tDD,
timings[i].tLoadB, timings[i].tTBuild,
timings[i].tAdjust, timings[i].tEmergAdjust,
timings[i].tKick, timings[i].tDrift,
timings[i].tCache);
tTotal.tGrav += timings[i].tGrav;
tTotal.tuDot += timings[i].tuDot;
tTotal.tDD += timings[i].tDD;
tTotal.tLoadB += timings[i].tLoadB;
tTotal.tTBuild += timings[i].tTBuild;
tTotal.tAdjust += timings[i].tAdjust;
tTotal.tEmergAdjust += timings[i].tEmergAdjust;
tTotal.tKick += timings[i].tKick;
tTotal.tDrift += timings[i].tDrift;
tTotal.tCache += timings[i].tCache;
}
}
}
fprintf(fpTime, "Totals: %f %f %f %f %f %f %f %f %f %f\n",
tTotal.tGrav,
tTotal.tuDot, tTotal.tDD,
tTotal.tLoadB, tTotal.tTBuild,
tTotal.tAdjust, tTotal.tEmergAdjust,
tTotal.tKick, tTotal.tDrift,
tTotal.tCache);
fclose(fpTime);
}
///
/// \brief Calculate various energy and momentum quantities, and output them
/// to a log file.
///
void
Main::calcEnergy(double dTime, double wallTime, const char *achLogFileName)
{
CkReductionMsg *msg;
treeProxy.calcEnergy(CkCallbackResumeThread((void*&)msg));
double *dEnergy = (double *) msg->getData();
static int first = 1;
FILE *fpLog = fopen(achLogFileName, "a");
double a = csmTime2Exp(param.csm, dTime);
if(first && (!bIsRestarting || dTimeOld == 0.0)) {
fprintf(fpLog, "# time redshift TotalEVir TotalE Kinetic Virial Potential TotalECosmo Ethermal Lx Ly Lz Wallclock\n");
dEcosmo = 0.0;
first = 0;
}
else {
/*
* Estimate integral (\\dot a*U*dt) over the interval.
* Note that this is equal to integral (W*da) and the latter
* is more accurate when a is changing rapidly.
*/
if(param.csm->bComove) {
dEcosmo += 0.5*(a - csmTime2Exp(param.csm, dTimeOld))
*(dEnergy[2] + dUOld);
}
first = 0;
}
dUOld = dEnergy[2];
dEnergy[2] *= a;
dEnergy[1] *= a;
dTimeOld = dTime;
double z = 1.0/a - 1.0;
double dEtotal = dEnergy[0] + dEnergy[2] - dEcosmo + a*a*dEnergy[6];
// Energy using the virial of Clausius
double dEtotalVir = dEnergy[0] + dEnergy[1] - dEcosmo + a*a*dEnergy[6];
fprintf(fpLog, "%g %g %g %g %g %g %g %g %g %g %g %g %g\n", dTime, z,
dEtotalVir, dEtotal, dEnergy[0], dEnergy[1], dEnergy[2], dEcosmo,
dEnergy[6], dEnergy[3], dEnergy[4],
dEnergy[5], wallTime);
fclose(fpLog);
delete msg;
}
#include <errno.h>
/// @brief mkdir with error checking
inline int safeMkdir(const char *achFile) {
struct stat buf;
int ret = stat(achFile, &buf);
if(ret != 0 && errno == ENOENT)
return mkdir(achFile, 0755);
CkError("WARNING: overwriting existing directory or file\n");
if(S_ISDIR(buf.st_mode))
return 0;
unlink(achFile);
return mkdir(achFile, 0755);
}
/// @brief determine if directory
int path_is_directory (const char* path) {
struct stat s_buf;
if (stat(path, &s_buf))
return 0;
return S_ISDIR(s_buf.st_mode);
}
#include <dirent.h>
/// @brief function to remove directory tree on Unix
void delete_dir_tree (const char* achDir) {
DIR* dp;
struct dirent* ep;
dp = opendir(achDir);
while ((ep = readdir(dp)) != NULL) {
if(strcmp(ep->d_name, ".") == 0 || strcmp(ep->d_name, "..") == 0)
continue;
auto file_name = make_formatted_string("%s/%s", achDir, ep->d_name);
char const* p_buf = file_name.c_str();
if (path_is_directory(p_buf))
delete_dir_tree(p_buf);
else
unlink(p_buf);
}
closedir(dp);
rmdir(achDir);
}
///
/// @brief Output a snapshot
/// @param iStep Timestep we are outputting, used for file name.
///
void Main::writeOutput(int iStep)
{
double dOutTime;
double dvFac;
double startTime = 0.0;
auto file_name = make_formatted_string("%s.%06i",param.achOutName,iStep);
char const* achFile = file_name.c_str();
if(param.csm->bComove) {
dOutTime = csmTime2Exp(param.csm, dTime);
dvFac = 1.0/(dOutTime*dOutTime);
}
else {
dOutTime = dTime;
dvFac = 1.0;
}
if(verbosity) {
ckout << "ReOrder particles ...";
startTime = CkWallTimer();
}
treeProxy.reOrder(nMaxOrder, CkCallbackResumeThread());
if(verbosity)
ckout << " took " << (CkWallTimer() - startTime) << " seconds."
<< endl;
double duTFac = (param.dConstGamma-1)*param.dMeanMolWeight/param.dGasConst;
if(verbosity) {
ckout << "Writing binary file ...";
startTime = CkWallTimer();
}
if(param.iBinaryOut != 6)
{
if(path_is_directory(achFile)) {
CkError("WARNING: overwriting existing directory\n");
delete_dir_tree(achFile);
}
if(param.bParaWrite)
treeProxy.setupWrite(0, 0, achFile, dOutTime, dvFac, duTFac,
param.bDoublePos, param.bDoubleVel,
param.bGasCooling, CkCallbackResumeThread());
else
treeProxy[0].serialWrite(0, achFile, dOutTime, dvFac, duTFac,
param.bDoublePos, param.bDoubleVel,
param.bGasCooling, CkCallbackResumeThread());
}
else { // N-Chilada output
// Set up N-Chilada directory structure
CkMustAssert(safeMkdir(achFile) == 0, "Can't create N-Chilada directories\n");
if(nTotalSPH > 0) {
string dirname(string(achFile) + "/gas");
CkMustAssert(safeMkdir(dirname.c_str()) == 0, "Can't create N-Chilada directories\n");
}
if(nTotalDark > 0) {
string dirname(string(achFile) + "/dark");
CkMustAssert(safeMkdir(dirname.c_str()) == 0, "Can't create N-Chilada directories\n");
}
if(nTotalStar > 0) {
string dirname(string(achFile) + "/star");
CkMustAssert(safeMkdir(dirname.c_str()) == 0, "Can't create N-Chilada directories\n");
}
NCgasNames = new CkVec<std::string>;
NCdarkNames = new CkVec<std::string>;
NCstarNames = new CkVec<std::string>;
MassOutputParams pMassOut(achFile, param.iBinaryOut, dOutTime);
outputBinary(pMassOut, param.bParaWrite, CkCallbackResumeThread());
PosOutputParams pPosOut(achFile, param.iBinaryOut, dOutTime);
outputBinary(pPosOut, param.bParaWrite, CkCallbackResumeThread());
VelOutputParams pVelOut(achFile, param.iBinaryOut, dOutTime, dvFac);
outputBinary(pVelOut, param.bParaWrite, CkCallbackResumeThread());
SoftOutputParams pSoftOut(achFile, param.iBinaryOut, dOutTime);
outputBinary(pSoftOut, param.bParaWrite, CkCallbackResumeThread());
PotOutputParams pPotOut(achFile, param.iBinaryOut, dOutTime);
outputBinary(pPotOut, param.bParaWrite, CkCallbackResumeThread());
if(nTotalSPH > 0) {
GasDenOutputParams pGasDenOut(achFile, param.iBinaryOut, dOutTime);
outputBinary(pGasDenOut, param.bParaWrite,
CkCallbackResumeThread());
TempOutputParams pTempOut(achFile, param.iBinaryOut, dOutTime,
param.bGasCooling, duTFac);
outputBinary(pTempOut, param.bParaWrite, CkCallbackResumeThread());
HsmOutputParams pHsmOut(achFile, param.iBinaryOut, dOutTime);
outputBinary(pHsmOut, param.bParaWrite, CkCallbackResumeThread());
}
if(nTotalSPH > 0 || nTotalStar > 0) {
MetalsOutputParams pMetalsOut(achFile, param.iBinaryOut, dOutTime);
outputBinary(pMetalsOut, param.bParaWrite, CkCallbackResumeThread());
}
if(nTotalStar > 0) {
TimeFormOutputParams pTimeFormOut(achFile, param.iBinaryOut, dOutTime);
outputBinary(pTimeFormOut, param.bParaWrite, CkCallbackResumeThread());
}
}
if(verbosity)
ckout << " took " << (CkWallTimer() - startTime) << " seconds."
<< endl;
if(verbosity) {
ckout << "Writing arrays ...";
startTime = CkWallTimer();
}
OxOutputParams pOxOut(achFile, param.iBinaryOut, dOutTime);
FeOutputParams pFeOut(achFile, param.iBinaryOut, dOutTime);
MFormOutputParams pMFormOut(achFile, param.iBinaryOut, dOutTime);
coolontimeOutputParams pcoolontimeOut(achFile, param.iBinaryOut, dOutTime);
ESNRateOutputParams pESNRateOut(achFile, param.iBinaryOut, dOutTime);
#ifdef CULLENALPHA
AlphaOutputParams pAlphaOut(achFile, param.iBinaryOut, dOutTime);
DvDsOutputParams pDvDsOut(achFile, param.iBinaryOut, dOutTime);
#endif
#ifndef COOLING_NONE
Cool0OutputParams pCool0Out(achFile, param.iBinaryOut, dOutTime);
Cool1OutputParams pCool1Out(achFile, param.iBinaryOut, dOutTime);
Cool2OutputParams pCool2Out(achFile, param.iBinaryOut, dOutTime);
#ifdef COOLING_MOLECULARH
Cool3OutputParams pCool3Out(achFile, param.iBinaryOut, dOutTime);
LWOutputParams pLWOut(achFile, param.iBinaryOut, dOutTime);
#endif /*COOLING_MOLECULARH*/
#endif
#ifdef SUPERBUBBLE
/*
* Write out additional variables when using superbubble
* (multiphase properties and effective temperature)
*/
MassHotOutputParams pmHotOut(achFile, param.iBinaryOut, 0.0);
uHotOutputParams puHotOut(achFile, param.iBinaryOut, 0.0);
uOutputParams puOut(achFile, param.iBinaryOut, 0.0);
double dTuFac = param.dGasConst/(param.dConstGamma-1)/param.dMeanMolWeight;
TempEffOutputParams pTeffOut(achFile, param.iBinaryOut, 0.0, param.bGasCooling, dTuFac);
#endif
#ifdef DIFFUSION
MetalsDotOutputParams pMetalsDotOut(achFile, param.iBinaryOut, dOutTime);
OxygenMassFracDotOutputParams pOxDotOut(achFile, param.iBinaryOut, dOutTime);
IronMassFracDotOutputParams pFeDotOut(achFile, param.iBinaryOut, dOutTime);
#endif
SoftOutputParams pSoftOut(achFile, param.iBinaryOut, dOutTime);
HsmOutputParams pHsmOut(achFile, param.iBinaryOut, dOutTime);
CsOutputParams pCSOut(achFile, param.iBinaryOut, dOutTime);
RungOutputParams pRung(achFile, param.iBinaryOut, dOutTime);
if (param.iBinaryOut) {
outputBinary(pRung, param.bParaWrite, CkCallbackResumeThread());
#ifdef CULLENALPHA
outputBinary(pAlphaOut, param.bParaWrite, CkCallbackResumeThread());
outputBinary(pDvDsOut, param.bParaWrite, CkCallbackResumeThread());
#endif
if (param.bStarForm || param.bFeedback) {
#ifdef SUPERBUBBLE
outputBinary(puHotOut, param.bParaWrite, CkCallbackResumeThread());
outputBinary(puOut, param.bParaWrite, CkCallbackResumeThread());
outputBinary(pmHotOut, param.bParaWrite, CkCallbackResumeThread());
outputBinary(pTeffOut, param.bParaWrite, CkCallbackResumeThread());
#endif
outputBinary(pOxOut, param.bParaWrite, CkCallbackResumeThread());
outputBinary(pFeOut, param.bParaWrite, CkCallbackResumeThread());
outputBinary(pMFormOut, param.bParaWrite, CkCallbackResumeThread());
outputBinary(pcoolontimeOut, param.bParaWrite, CkCallbackResumeThread());
outputBinary(pESNRateOut, param.bParaWrite, CkCallbackResumeThread());
}
#ifndef COOLING_NONE
if(param.bGasCooling) {
outputBinary(pCool0Out, param.bParaWrite, CkCallbackResumeThread());
outputBinary(pCool1Out, param.bParaWrite, CkCallbackResumeThread());
outputBinary(pCool2Out, param.bParaWrite, CkCallbackResumeThread());
#ifdef COOLING_MOLECULARH
outputBinary(pCool3Out, param.bParaWrite, CkCallbackResumeThread());
#endif /*COOLING_MOLECULARH*/
}
#ifdef COOLING_MOLECULARH
if(param.bDoStellarLW)
outputBinary(pLWOut, param.bParaWrite, CkCallbackResumeThread());
#endif /*COOLING_MOLECULARH*/
#endif
if(param.bDoSoftOutput && param.iBinaryOut != 6) {
outputBinary(pSoftOut, param.bParaWrite, CkCallbackResumeThread());
}
if(param.bDohOutput && param.iBinaryOut != 6) {
outputBinary(pHsmOut, param.bParaWrite, CkCallbackResumeThread());
}
if(param.bDoCSound)
outputBinary(pCSOut, param.bParaWrite, CkCallbackResumeThread());
#ifdef DIFFUSION
if(param.bDoGas)
outputBinary(pMetalsDotOut, param.bParaWrite,
CkCallbackResumeThread());
if (param.bStarForm || param.bFeedback) {
outputBinary(pOxDotOut, param.bParaWrite, CkCallbackResumeThread());
outputBinary(pFeDotOut, param.bParaWrite, CkCallbackResumeThread());
}
#endif
if(param.bDoIOrderOutput || param.bStarForm || param.bFeedback) {
IOrderOutputParams pIOrdOut(achFile, param.iBinaryOut, dOutTime);
outputBinary(pIOrdOut, param.bParaWrite, CkCallbackResumeThread());
if(param.bDoGas
#ifndef SPLITGAS
&& param.bStarForm
#endif
)
{
IGasOrderOutputParams pIGasOrdOut(achFile, param.iBinaryOut,
dOutTime);
outputBinary(pIGasOrdOut, param.bParaWrite,
CkCallbackResumeThread());
}
}
//SIDM
if(param.iSIDMSelect!=0) {
#ifdef SIDMINTERACT
iNSIDMOutputParams pNSIDMOut(achFile, param.iBinaryOut, dOutTime);
outputBinary(pNSIDMOut, param.bParaWrite,
CkCallbackResumeThread());
#endif
}
} else {
#ifdef CULLENALPHA
treeProxy[0].outputASCII(pAlphaOut, param.bParaWrite,
CkCallbackResumeThread());
treeProxy[0].outputASCII(pDvDsOut, param.bParaWrite,
CkCallbackResumeThread());
#endif
if (param.bStarForm || param.bFeedback) {
#ifdef SUPERBUBBLE
treeProxy[0].outputASCII(pmHotOut, param.bParaWrite, CkCallbackResumeThread());
treeProxy[0].outputASCII(puHotOut, param.bParaWrite, CkCallbackResumeThread());
treeProxy[0].outputASCII(puOut, param.bParaWrite, CkCallbackResumeThread());
treeProxy[0].outputASCII(pTeffOut, param.bParaWrite,CkCallbackResumeThread());
#endif
treeProxy[0].outputASCII(pRung, param.bParaWrite,
CkCallbackResumeThread());
treeProxy[0].outputASCII(pOxOut, param.bParaWrite,
CkCallbackResumeThread());
treeProxy[0].outputASCII(pFeOut, param.bParaWrite,
CkCallbackResumeThread());
treeProxy[0].outputASCII(pMFormOut, param.bParaWrite,
CkCallbackResumeThread());
treeProxy[0].outputASCII(pcoolontimeOut, param.bParaWrite,
CkCallbackResumeThread());
treeProxy[0].outputASCII(pESNRateOut, param.bParaWrite,
CkCallbackResumeThread());
}
#ifndef COOLING_NONE
if(param.bGasCooling) {
treeProxy[0].outputASCII(pCool0Out, param.bParaWrite,
CkCallbackResumeThread());
treeProxy[0].outputASCII(pCool1Out, param.bParaWrite,
CkCallbackResumeThread());
treeProxy[0].outputASCII(pCool2Out, param.bParaWrite,
CkCallbackResumeThread());
#ifdef COOLING_MOLECULARH
treeProxy[0].outputASCII(pCool3Out, param.bParaWrite,
CkCallbackResumeThread());
#endif /*COOLING_MOLECULARH*/
}
#ifdef COOLING_MOLECULARH
if(param.bDoStellarLW)
treeProxy[0].outputASCII(pLWOut, param.bParaWrite,
CkCallbackResumeThread());
#endif /*COOLING_MOLECULARH*/
#endif
#ifdef DIFFUSION
if (param.bStarForm || param.bFeedback) {
treeProxy[0].outputASCII(pMetalsDotOut, param.bParaWrite,
CkCallbackResumeThread());
treeProxy[0].outputASCII(pOxDotOut, param.bParaWrite,
CkCallbackResumeThread());
treeProxy[0].outputASCII(pFeDotOut, param.bParaWrite,
CkCallbackResumeThread());
}
#endif
if(param.bDoSoftOutput)
treeProxy[0].outputASCII(pSoftOut, param.bParaWrite,
CkCallbackResumeThread());
if(param.bDohOutput)
treeProxy[0].outputASCII(pHsmOut, param.bParaWrite,
CkCallbackResumeThread());
if(param.bDoCSound)
treeProxy[0].outputASCII(pCSOut, param.bParaWrite,
CkCallbackResumeThread());
if(param.bDoIOrderOutput || param.bStarForm || param.bFeedback) {
IOrderOutputParams pIOrdOut(achFile, param.iBinaryOut, dOutTime);
treeProxy[0].outputASCII(pIOrdOut, param.bParaWrite,
CkCallbackResumeThread());
if(param.bDoGas
#ifndef SPLITGAS
&& param.bStarForm
#endif
)
{
IGasOrderOutputParams pIGasOrdOut(achFile, param.iBinaryOut,
dOutTime);
treeProxy[0].outputASCII(pIGasOrdOut, param.bParaWrite,
CkCallbackResumeThread());
}
}
//SIDM
if(param.iSIDMSelect!=0) {
#ifdef SIDMINTERACT
iNSIDMOutputParams pNSIDMOut(achFile, param.iBinaryOut, dOutTime);
treeProxy[0].outputASCII(pNSIDMOut, param.bParaWrite,
CkCallbackResumeThread());
#endif
}
}
if(verbosity)
ckout << " took " << (CkWallTimer() - startTime) << " seconds."
<< endl;
if(param.nSteps != 0 && param.bDoDensity) {
// The following call is to get the particles in key order
// before the sort.
treeProxy.drift(0.0, 0, 0, 0.0, 0.0, 0, true, param.dMaxEnergy,
CkCallbackResumeThread());
domainDecomp(0);
buildTree(0);
if(verbosity)
ckout << "Calculating total densities ...";
DensitySmoothParams pDen(TYPE_GAS|TYPE_DARK|TYPE_STAR, 0);
startTime = CkWallTimer();
double dfBall2OverSoft2 = 0.0;
treeProxy.startSmooth(&pDen, 1, param.nSmooth, dfBall2OverSoft2,
CkCallbackResumeThread());
treeProxy.finishNodeCache(CkCallbackResumeThread());
#ifdef CUDA
// We didn't do gravity where the registered TreePieces on the
// DataManager normally get cleared. Clear them here instead.
dMProxy.clearRegisteredPieces(CkCallbackResumeThread());
#endif
if(verbosity) {
ckout << " took " << (CkWallTimer() - startTime) << " seconds."
<< endl;
ckout << "Reodering ...";
}
startTime = CkWallTimer();
treeProxy.reOrder(nMaxOrder, CkCallbackResumeThread());
if(verbosity) {
ckout << " took " << (CkWallTimer() - startTime) << " seconds."
<< endl;
ckout << "Outputting densities ...";
}
startTime = CkWallTimer();
DenOutputParams pDenOut(string(achFile), param.iBinaryOut, dOutTime);
if (param.iBinaryOut)
outputBinary(pDenOut, param.bParaWrite, CkCallbackResumeThread());
else
treeProxy[0].outputASCII(pDenOut, param.bParaWrite, CkCallbackResumeThread());
ckout << " took " << (CkWallTimer() - startTime) << " seconds."
<< endl;
if(param.bDoGas) { // recalculate gas densities for timestep
if(verbosity)
ckout << "Recalculating gas densities ...";
startTime = CkWallTimer();
// The following call is to get the particles in key order
// before the sort.
treeProxy.drift(0.0, 0, 0, 0.0, 0.0, 0, true, param.dMaxEnergy,
CkCallbackResumeThread());
domainDecomp(0);
buildTree(0);
DensitySmoothParams pDenGas(TYPE_GAS, 0);
dfBall2OverSoft2 = 4.0*param.dhMinOverSoft*param.dhMinOverSoft;
treeProxy.startSmooth(&pDenGas, 1, param.nSmooth, dfBall2OverSoft2,
CkCallbackResumeThread());
treeProxy.finishNodeCache(CkCallbackResumeThread());
#ifdef CUDA
// We didn't do gravity where the registered TreePieces on the
// DataManager normally get cleared. Clear them here instead.
dMProxy.clearRegisteredPieces(CkCallbackResumeThread());
#endif
if(verbosity)
ckout << " took " << (CkWallTimer() - startTime) << " seconds."
<< endl;
}
}
if(param.nSteps != 0) { // Get particles back to home
// processors for continuing the simulation.
// The following call is to get the particles in key order
// before the sort.
treeProxy.drift(0.0, 0, 0, 0.0, 0.0, 0, true, param.dMaxEnergy,
CkCallbackResumeThread());
domainDecomp(0);
}
if(param.iBinaryOut == 6)
writeNCXML(achFile);
}
///
/// @brief Calculate timesteps of particles.
///
/// Particles on the KickRung and shorter have their timesteps
/// adjusted. Calls TreePiece::adjust().
///
/// @param iKickRung Rung (and above) about to be kicked.
///
int Main::adjust(int iKickRung)
{
CkReductionMsg *msg;
double a = csmTime2Exp(param.csm,dTime);
double dDiffCoeff = (param.dMetalDiffusionCoeff > param.dThermalDiffusionCoeff ?
param.dMetalDiffusionCoeff : param.dThermalDiffusionCoeff);
double startTime = CkWallTimer();
treeProxy.adjust(iKickRung, param.bEpsAccStep, param.bGravStep,
param.bSphStep, param.bViscosityLimitdt,
param.dEta, param.dEtaCourant, param.dEtauDot,
dDiffCoeff, param.dEtaDiffusion,
param.dDelta, 1.0/(a*a*a), a,
0.0, /* set to dhMinOverSoft if we implement
Gasoline's LowerSoundSpeed. */
param.dResolveJeans/a,
param.bDoGas,
CkCallbackResumeThread((void*&)msg));
int iCurrMaxRung = ((int64_t *)msg->getData())[0];
int64_t nMaxRung = ((int64_t *)msg->getData())[1];
if(nMaxRung <= param.nTruncateRung && iCurrMaxRung > iKickRung) {
if(verbosity)
CkPrintf("n_CurrMaxRung = %ld, iCurrMaxRung = %d: promoting particles\n", nMaxRung, iCurrMaxRung);
iCurrMaxRung--;
treeProxy.truncateRung(iCurrMaxRung, CkCallbackResumeThread());
}
if(param.sinks.bDoSinks) {
int iCurrMaxRungGas = ((int64_t *)msg->getData())[2];
int iCurrSinkRung = param.sinks.iSinkRung;
if(iCurrMaxRungGas > iCurrSinkRung)
iCurrSinkRung = iCurrMaxRungGas;
treeProxy.SinkStep(iCurrSinkRung, iKickRung, CkCallbackResumeThread());
}
delete msg;
double tAdjust = CkWallTimer() - startTime;
timings[iKickRung].tAdjust += tAdjust;
if(verbosity)
CkPrintf("Adjust took %g seconds.\n", tAdjust);
return iCurrMaxRung;
}
///
/// @brief Count and print out the number of particles in each
/// timestep bin.
///
/// This routine is for information only.
///
void Main::rungStats()
{
CkReductionMsg *msg;
treeProxy.rungStats(CkCallbackResumeThread((void*&)msg));
int64_t *nInRung = (int64_t *)msg->getData();
ckout << "Rung distribution: (";
for(int iRung = 0; iRung <= MAXRUNG; iRung++)
ckout << " " << nInRung[iRung] << ",";
ckout << ")" << endl;
delete msg;
}
///
/// @brief determine number of active particles in the given rung
///
/// Calls TreePiece::countActive() and sets Main::nActiveGrav and
/// Main::nActiveSPH.
///
void Main::countActive(int activeRung)
{
CkReductionMsg *msg;
treeProxy.countActive(activeRung, CkCallbackResumeThread((void*&)msg));
int64_t *nActive = (int64_t *)msg->getData();
nActiveGrav = nActive[0];
nActiveSPH = nActive[1];
ckout << "Gravity Active: " << nActive[0]
<< ", Gas Active: " << nActive[1] << endl ;
delete msg;
}
///
/// @brief Change timesteps of particles experiencing sudden gas
/// forces.
/// @param iRung The rung on which we are calculating forces.
///
/// For gas simulations, find particles who are are in the middle of
/// too large a timestep and adjust their velocities to a smaller
/// timestep.
///
void Main::emergencyAdjust(int iRung)
{
if(!param.bDtAdjust || iRung == 0) return;
double startTime = CkWallTimer();
if(verbosity) CkPrintf("Check for Emergency Adjust, Rung: %d\n", iRung);
double dDelta = RungToDt(param.dDelta, iRung);
double dDeltaThresh = 0.5*dDelta;
CkReductionMsg *msg;
treeProxy.emergencyAdjust(iRung, param.dDelta, dDeltaThresh,
CkCallbackResumeThread((void*&)msg));
int *nUnKicked = (int *)msg->getData();
if(*nUnKicked) {
CkPrintf("WARNING, %d particles needed emergency rung changes\n",
*nUnKicked);
}
delete msg;
timings[iRung].tEmergAdjust += CkWallTimer() - startTime;
}
/**
* \brief Change the softening in comoving coordinates
*
* When compiled with -DCHANGESOFT, and bPhysicalSoft is set, the
* (comoving) softening is changed so that it is constant in physical units.
*/
void Main::updateSoft()
{
#ifdef CHANGESOFT
if (param.bPhysicalSoft) {
double dFac = 1./csmTime2Exp(param.csm,dTime);
if (param.bSoftMaxMul && dFac > param.dSoftMax) dFac = param.dSoftMax;
treeProxy.physicalSoft(param.dSoftMax, dFac, param.bSoftMaxMul,
CkCallbackResumeThread());
}
#endif
}
///
/// @brief Slowly increase mass of a subset of particles.
///
void Main::growMass(double dTime, double dDelta)
{
if (param.nGrowMass > 0 && dTime > param.dGrowStartT
&& dTime <= param.dGrowEndT) {
double dDeltaM = param.dGrowDeltaM*dDelta
/(param.dGrowEndT - param.dGrowStartT);
treeProxy.growMass(param.nGrowMass, dDeltaM, CkCallbackResumeThread());
}
}
/*
* Taken from PKDGRAV msrDumpFrameInit()
*/
int
Main::DumpFrameInit(double dTime, double dStep, int bRestart) {
if (param.dDumpFrameStep > 0 || param.dDumpFrameTime > 0) {
if(param.iDirector < 1) {
CkError("WARNING: DumpFrame parameters set, but iDirector is %d; DumpFrame is disabled\n", param.iDirector);
param.dDumpFrameStep = -1.0;
param.dDumpFrameTime = -1.0;
return 0;
}
bDumpFrame = 1;
int i;
df = (DumpFrameContext **) malloc(param.iDirector*sizeof(*df));
for(i = 0; i < param.iDirector; i++) {
df[i] = NULL;
auto file_name = make_formatted_string("%s.director%d", param.achOutName, i+1);
if(i == 0) {
file_name = std::move(make_formatted_string("%s.director", param.achOutName));
}
char const* achFile = file_name.c_str();
dfInitialize( &df[i], param.dSecUnit/SECONDSPERYEAR,
dTime, param.dDumpFrameTime, dStep,
param.dDumpFrameStep, param.dDelta,
param.iMaxRung, verbosity, const_cast<char*>(achFile),
param.bPeriodic, param.vPeriod);
df[i]->duTFac = (param.dConstGamma-1)*param.dMeanMolWeight/param.dGasConst;
}
/* Read in photogenic particle list */
if (df[0]->bGetPhotogenic) {
auto file_name = make_formatted_string("%s.photogenic", param.achOutName);
char const* achFile = file_name.c_str();
FILE *fp = fopen(achFile, "r" );
CkMustAssert(!(fp == NULL), "DumpFrame: photogenic specified, but no photogenic file\n");
fclose(fp);
CkReductionMsg *msg;
treeProxy.setTypeFromFile(TYPE_PHOTOGENIC, achFile, CkCallbackResumeThread((void*&)msg));
int64_t *nSet = (int64_t *)msg->getData();
if (verbosity)
ckout << nSet[0] << " iOrder numbers read. " << nSet[1]
<<" direct iOrder photogenic particles selected."
<< endl;
delete msg;
}
if(!bRestart)
DumpFrame(dTime, dStep );
if(df[0]->dDumpFrameTime > 0) {
/* Bring frame count up to correct place for restart. */
while(df[0]->dTime + df[0]->dDumpFrameTime
< dTime0 + param.dDelta*param.iStartStep) {
df[0]->dTime += df[0]->dDumpFrameTime;
df[0]->nFrame++;
}
// initialize the rest of the dumpframes
if (param.iDirector > 1) {
int j;
for(j=0; j < param.iDirector; j++) {
df[j]->dStep = df[0]->dStep;
df[j]->dDumpFrameTime = df[0]->dDumpFrameTime;
df[j]->nFrame = df[0]->nFrame;
}
}
}
else if(df[0]->dDumpFrameStep > 0) {
/* Bring frame count up to correct place for restart. */
while(df[0]->dStep + df[0]->dDumpFrameStep < param.iStartStep) {
df[0]->dStep += df[0]->dDumpFrameStep;
df[0]->nFrame++;
}
// initialize the rest of the dumpframes
if (param.iDirector > 1) {
int j;
for(j=0; j < param.iDirector; j++) {
df[j]->dStep = df[0]->dStep;
df[j]->dDumpFrameStep = df[0]->dDumpFrameStep;
df[j]->nFrame = df[0]->nFrame;
}
}
}
return 1;
}
else { return 0; }
}
#include "DumpFrameData.h"
void Main::DumpFrame(double dTime, double dStep)
{
double dExp;
int i;
for(i = 0; i < param.iDirector; i++) {
if (df[i]->iDimension == DF_3D) {
#ifdef VOXEL
assert(0); // Unimplemented
/* 3D Voxel Projection */
struct inDumpVoxel in;
assert(0);
dfSetupVoxel( msr->df, dTime, dStep, &in );
pstDumpVoxel(msr->pst, &in, sizeof(struct inDumpVoxel), NULL, NULL );
dsec1 = msrTime() - sec;
dfFinishVoxel( msr->df, dTime, dStep, &in );
dsec2 = msrTime() - sec;
printf("DF Dumped Voxel %i at %g (Wallclock: Render %f tot %f secs).\n",
df->nFrame-1,dTime,dsec1,dsec2);
#endif
}
else {
/* 2D Projection */
struct inDumpFrame in;
double *com = NULL;
CkReductionMsg *msgCOM;
CkReductionMsg *msgCOMbyType;
if (df[i]->bGetCentreOfMass) {
treeProxy.getCOM(CkCallbackResumeThread((void*&)msgCOM), 0);
com = (double *)msgCOM->getData();
}
if (df[i]->bGetPhotogenic) {
int iType = TYPE_PHOTOGENIC;
treeProxy.getCOMByType(iType, CkCallbackResumeThread((void*&)msgCOMbyType), 0);
com = (double *)msgCOMbyType->getData();
}
#if (0)
if (df[i]->bGetOldestStar) {
pstOldestStar(msr->pst, NULL, 0, &com[0], NULL);
}
#endif
dExp = csmTime2Exp(param.csm,dTime);
in.dMinGasMass = param.stfm->dMinGasMass;
dfSetupFrame(df[i], dTime, dStep, dExp, com, &in, 0, 0 );
CkReductionMsg *msgDF;
dfDataProxy.clearFrame(in, CkCallbackResumeThread());
treeProxy.DumpFrame(in, CkCallbackResumeThread(), false);
dfDataProxy.combineFrame(in, CkCallbackResumeThread((void*&)msgDF));
// N.B. Beginning of message contains the DumpFrame
// parameters needed for proper merging.
void *Image = ((char *)msgDF->getData())
+ sizeof(struct inDumpFrame);
unsigned char *gray;
dfFinishFrame(df[i], dTime, dStep, &in, Image, false, &gray);
delete msgDF;
if (df[i]->bGetCentreOfMass) {
delete msgCOM;
}
if (df[i]->bGetPhotogenic) {
delete msgCOMbyType;
}
}
}
}
/**
* \brief Coalesce all added and deleted particles and update global
* quantities.
*/
void Main::addDelParticles()
{
CkReductionMsg *msg;
if(verbosity)
CkPrintf("Changing Particle number\n");
treeProxy.colNParts(CkCallbackResumeThread((void*&)msg));
// an array of one element per treepiece produced by CkReduction::concat
// @TODO: replace this with a proper scan
CountSetPart *counts = (CountSetPart *) msg->getData();
CkAssert(msg->getSize() == numTreePieces*sizeof(*counts));
int iPiece;
/// This is large, but no larger than the counts message
NewMaxOrder *nMaxOrders = new NewMaxOrder[numTreePieces];
for(iPiece = 0; iPiece < numTreePieces; iPiece++) {
nMaxOrders[counts[iPiece].index].nMaxOrderGas = nMaxOrderGas+1;
nMaxOrders[counts[iPiece].index].nMaxOrderDark = nMaxOrderDark+1;
nMaxOrders[counts[iPiece].index].nMaxOrder = nMaxOrder+1;
nMaxOrderGas += counts[iPiece].nAddGas;
nMaxOrderDark += counts[iPiece].nAddDark;
nMaxOrder += counts[iPiece].nAddStar;
nTotalSPH += counts[iPiece].nAddGas - counts[iPiece].nDelGas;
nTotalDark += counts[iPiece].nAddDark - counts[iPiece].nDelDark;
nTotalStar += counts[iPiece].nAddStar - counts[iPiece].nDelStar;
}
delete msg;
treeProxy.newOrder(nMaxOrders, numTreePieces, CkCallbackResumeThread());
delete[] nMaxOrders;
nTotalParticles = nTotalSPH + nTotalDark + nTotalStar;
if (verbosity)
CkPrintf("New numbers of particles: %ld gas %ld dark %ld star\n",
nTotalSPH, nTotalDark, nTotalStar);
treeProxy.setNParts(nTotalSPH, nTotalDark, nTotalStar,
CkCallbackResumeThread());
}
/**
* Diagnostic function to summmarize memory usage across all
* processors
*/
void Main::memoryStats()
{
CkReductionMsg *msg;
dMProxy.memoryStats(CkCallbackResumeThread((void*&)msg));
// simple maximum for now.
int *memMax = (int *)msg->getData();
CkPrintf("Maximum memory: %d MB\n", *memMax);
delete msg;
}
/**
* Diagnostic function to summmarize cache memory usage across all
* processors
*/
void Main::memoryStatsCache()
{
CkReductionMsg *msg;
treeProxy.memCacheStats(CkCallbackResumeThread((void*&)msg));
// simple maximum for now.
int *memMax = (int *)msg->getData();
CkPrintf("Maximum memory with cache: %d MB, after: %d MB\n", memMax[0],
memMax[1]);
CkPrintf("Maximum cache entries: node: %d , particle: %d\n", memMax[2],
memMax[3]);
delete msg;
}
void registerStatistics() {
#if COSMO_STATS > 0
CkCacheStatistics::sum = CkReduction::addReducer(CkCacheStatistics::sumFn);
TreePieceStatistics::sum = CkReduction::addReducer(TreePieceStatistics::sumFn);
#endif
#ifdef HPM_COUNTER
hpmInit(1,"ChaNGa");
#endif
}
void Main::pup(PUP::er& p)
{
CBase_Main::pup(p);
p | basefilename;
p | nTotalParticles;
p | nTotalSPH;
p | nTotalDark;
p | nTotalStar;
p | nSink;
p | nMaxOrderGas;
p | nMaxOrderDark;
p | nMaxOrder;
p | theta;
p | dTime;
p | dTime0;
p | dEcosmo;
p | dUOld;
p | dTimeOld;
p | param;
p | vdOutTime;
p | iOut;
p | bDumpFrame;
p | bChkFirst;
p | killAt;
}
void Main::liveVizImagePrep(liveVizRequestMsg *msg)
{
double dExp;
struct inDumpFrame in;
double *com = NULL;
CkReductionMsg *msgCOM;
CkReductionMsg *msgCOMbyType;
struct DumpFrameContext df_temp;
// only do the first Director
df_temp = *df[0];
if (df_temp.bGetCentreOfMass) {
treeProxy.getCOM(CkCallbackResumeThread((void*&)msgCOM), 1);
com = (double *)msgCOM->getData();
}
if (df_temp.bGetPhotogenic) {
int iType = TYPE_PHOTOGENIC;
treeProxy.getCOMByType(iType,
CkCallbackResumeThread((void*&)msgCOMbyType), 1);
com = (double *)msgCOMbyType->getData();
}
#if (0)
if (df_temp.bGetOldestStar) {
pstOldestStar(msr->pst, NULL, 0, &com[0], NULL);
}
#endif
dExp = csmTime2Exp(param.csm,dTime);
dfSetupFrame(&df_temp, dTime, 0.0, dExp, com, &in,
msg->req.wid, msg->req.ht );
CkCallback cb(CkCallback::ignore);
treeProxy.DumpFrame(in, cb, true);
if (df_temp.bGetCentreOfMass)
delete msgCOM;
if (df_temp.bGetPhotogenic)
delete msgCOMbyType;
}
#ifdef SELECTIVE_TRACING
void Main::turnProjectionsOn(int activeRung){
CkAssert(!projectionsOn);
if(numMaxTrace <= 0) return;
if(traceState == TraceNormal){
if(activeRung != monitorRung){
}
else if(traceIteration < numTraceIterations){
if(monitorStart == 0){
prjgrp.on(CkCallbackResumeThread());
projectionsOn = true;
traceIteration++;
numMaxTrace--;
}
else{
monitorStart--;
}
}
else{
traceState = TraceSkip;
traceIteration = 1;
}
}
else if(traceState == TraceSkip){
if(activeRung != monitorRung){
}
else if(traceIteration < numSkipIterations){
traceIteration++;
}
else{
traceState = TraceNormal;
if(monitorStart == 0){
prjgrp.on(CkCallbackResumeThread());
projectionsOn = true;
traceIteration = 1;
numMaxTrace--;
}
else{
monitorStart--;
}
}
}
}
void Main::turnProjectionsOff(){
if(projectionsOn){
prjgrp.off(CkCallbackResumeThread());
projectionsOn = false;
}
}
#endif
const char *typeString(NodeType type);
void GenericTreeNode::getGraphViz(std::ostream &out){
#ifdef BIGKEYS
uint64_t lower = getKey();
uint64_t upper = getKey() >> 64;
out << upper << lower
<< "[label=\"" << upper << lower
#else
out << getKey()
<< "[label=\"" << getKey()
#endif
<< " "
<< typeString(getType())
<< "\\n"
<< moments.totalMass << " "
<< "(" << moments.cm.x << ","
<< moments.cm.y << ","
<< moments.cm.z << ")"
<< "\"];"
<< std::endl;
if(getType() == Boundary){
for(int i = 0; i < numChildren(); i++){
GenericTreeNode *child = getChildren(i);
#ifdef BIGKEYS
uint64_t ch_lower = getKey();
uint64_t ch_upper = getKey() >> 64;
out << upper << lower << "->" << ch_upper << ch_lower << ";" << std::endl;
#else
out << getKey() << "->" << child->getKey() << ";" << std::endl;
#endif
}
}
}
void printTreeGraphVizRecursive(GenericTreeNode *node, ostream &out){
node->getGraphViz(out);
out << endl;
if(node->getType() == Boundary){
for(int i = 0; i < node->numChildren(); i++){
printTreeGraphVizRecursive(node->getChildren(i),out);
}
}
}
/// @brief Print a visualization of a tree.
void printTreeGraphViz(GenericTreeNode *node, ostream &out, const string &name){
out << "digraph " << name << " {" << endl;
printTreeGraphVizRecursive(node,out);
out << "}" << endl;
}
#include "ParallelGravity.def.h"
/*
#define CK_TEMPLATES_ONLY
#include "CacheManager.def.h"
#undef CK_TEMPLATES_ONLY
#include "CacheManager.def.h"
*/