Content - 6cd1983b491f576d0a732e3b62a2b1dca96acba1 - df112fd/cuda_typedef.h

visit type:
Tip revision: a0ea58ef1ba3e8fcfe29d265226dd11cfe0ce40b authored by Harshitha on 20 January 2015, 04:44:48 UTC
Fix the crash caused by DD without Sorter. This crash happens because some TreePiece ends up with no particles. This fix calls the Sorter if any TreePiece ends with no particles.
Tip revision: a0ea58e
cuda_typedef.h
#ifndef CUDA_TYPEDEF_H_
#define CUDA_TYPEDEF_H_

/** @file cuda_typedef.h
 *
 * Definitions of types for the CUDA port.
 */

/** @brief floating point type on the GPU */
typedef float cudatype;
/** @brief floating point type on the host */
typedef double hosttype;

// set these to appropriate values (in millions)
// local
#define NODE_INTERACTIONS_PER_REQUEST_L 0.1
#define PART_INTERACTIONS_PER_REQUEST_L 0.1
// remote, no-resume
#define NODE_INTERACTIONS_PER_REQUEST_RNR 0.1
#define PART_INTERACTIONS_PER_REQUEST_RNR 0.1
// remote, resume
#define NODE_INTERACTIONS_PER_REQUEST_RR 0.1
#define PART_INTERACTIONS_PER_REQUEST_RR 0.1

#ifdef CUDA_STATS
#define CUDA_SER_TREE 9900
#define CUDA_SER_LIST 9901

#define CUDA_LOCAL_NODE_KERNEL 9910
#define CUDA_REMOTE_NODE_KERNEL 9911
#define CUDA_REMOTE_RESUME_NODE_KERNEL 9912
#define CUDA_LOCAL_PART_KERNEL 9913
#define CUDA_REMOTE_PART_KERNEL 9914
#define CUDA_REMOTE_RESUME_PART_KERNEL 9915

#endif

#define TP_LARGE_PHASE_THRESHOLD_DEFAULT 0.3
#define AVG_SOURCE_PARTICLES_PER_ACTIVE 10 

/** @brief 3D vector of cudatype.
 */
typedef struct CudaVector3D{
  cudatype x,y,z;
#if __cplusplus && !defined __CUDACC__
  inline CudaVector3D& operator=(Vector3D<hosttype> &a){
    x = a.x;
    y = a.y;
    z = a.z;
    return *this;
  }
  
  inline Vector3D<hosttype> operator+(Vector3D<hosttype> &v){
    return Vector3D<hosttype>(x + v.x, y + v.y, z + v.z);
  }

  CudaVector3D(Vector3D<hosttype> &o){
    x = o.x;
    y = o.y;
    z = o.z;
  }
  
  CudaVector3D(){}
#endif
}CudaVector3D;

/** @brief Version of MultipoleMoments using cudatype
 */
typedef struct CudaMultipoleMoments{
  cudatype radius;
  cudatype soft;
  cudatype totalMass;
  CudaVector3D cm;
  cudatype xx, xy, xz, yy, yz, zz;
#if __cplusplus && !defined __CUDACC__
  CudaMultipoleMoments(){}
  CudaMultipoleMoments(MultipoleMoments &mom){
    *this = mom;
  }
  inline CudaMultipoleMoments& operator=(MultipoleMoments &m){
    radius = m.radius;
    soft = m.soft;
    totalMass = m.totalMass;

    cm = m.cm;
    xx = m.xx;
    xy = m.xy;
    xz = m.xz;
    yy = m.yy;
    yz = m.yz;
    zz = m.zz;

    return *this;
  }
#endif
}CudaMultipoleMoments;

/** @brief Bucket of particles on the interaction list for the GPU.
 */
typedef struct ILPart{
  /** Index of particles on GPU. */
  int index;
  /** Encodes the replica for periodic boundaries */
  int off;
  /** Number of particles in the bucket. */
  int num;

#if __cplusplus && !defined __CUDACC__
  ILPart() {}
  //ILPart() : index(-1), numParticles(-1) {}
  ILPart(int i, int o, int n) : index(i), off(o), num(n) {}
#endif
}ILPart;

/** @brief Cell on the interaction list for the GPU
 */
typedef struct ILCell{
  /** Index of this cell's moments on the GPU. */
  int index;
  /** Encodes the replica for periodic boundaries */
  int offsetID;
#if __cplusplus && !defined __CUDACC__
  ILCell() {}
  //ILCell() :index(-1), offsetID(-1) {}
  ILCell(int ind, int off) : index(ind), offsetID(off) {}
#endif
}ILCell;

typedef struct CompactPartData{
  cudatype mass;
  cudatype soft;
  CudaVector3D position;
#if defined CUDA_EMU_KERNEL_NODE_PRINTS || defined CUDA_EMU_KERNEL_PART_PRINTS
  int tp, id;
#endif

#if __cplusplus && !defined __CUDACC__
  CompactPartData(){}
  CompactPartData(ExternalGravityParticle &egp){
    *this = egp;
  }
  CompactPartData(cudatype m, cudatype s, Vector3D<hosttype> &rr) : mass(m), soft(s), position(rr){}

  inline CompactPartData& operator=(ExternalGravityParticle &gp){
    mass = gp.mass;
    soft = gp.soft;
    position = gp.position;
    return *this;
  }
#endif
}CompactPartData;

typedef struct VariablePartData{
  CudaVector3D a;
  cudatype potential;
}VariablePartData;

typedef struct PartData{
  CompactPartData core;
  CudaVector3D a;
  cudatype potential;
  cudatype dtGrav;

#if __cplusplus && !defined __CUDACC__
  PartData(){}
  PartData(GravityParticle &gp){
    *this = gp;
  }
  PartData(CompactPartData &cpd, Vector3D<hosttype> &_a, cudatype p, cudatype dtg) : core(cpd), a(_a), potential(p), dtGrav(dtg) {}

  inline PartData& operator=(GravityParticle &gp){
    core = gp;
    a.x = 0.0;
    a.y = 0.0;
    a.z = 0.0;
    potential = 0.0;
    dtGrav = 0.0;
    return *this;
  }
#endif
}PartData;

#if 0
#ifdef __cplusplus
// work request data structures

template <class T>
struct CudaGroupRequest{
	T *intlist;
	/*
	CkVec<int> bucketMarkers;
	CkVec<int> bucketStarts;
	CkVec<int> buckets;
	CkVec<int> bucketSizes;
	*/

	int *bucketMarkers;
	int *bucketStarts;
	int *buckets;
	int *bucketSizes;

	int numInteractions; // number of interactions in intlist
	int numBucketsPlusOne; // number of buckets involved in the work request
	bool lastIsPartial; // is the last bucket partially computed?

	TreePiece *tp;
	State *state;

	virtual void cleanUp(){
		delete [] intlist;
	}

	CudaGroupRequest(int tpBuckets){
		bucketMarkers = new int[tpBuckets+1];
		bucketStarts = new int [tpBuckets];
		buckets = new int [tpBuckets];
		bucketSizes  = new int[tpBuckets];
	}
};

template <class S, class T, int size>
struct CudaGroupMissedRequest : public CudaGroupRequest <T> {
	S *missed;
	int numMissed;

	void cleanUp(){
		CudaGroupRequest::cleanUp();
		delete [] missed;
	}

	CudaGroupMissedRequest(int b){
		CudaGroupRequest(b);
		missed = new S[size];
	}
};

#endif // __cplusplus
#endif

#endif /* CUDA_TYPEDEF_H_*/
Browse the archive

https://github.com/N-BodyShop/changa