https://github.com/jmeloranta/libgrid
Raw File
Tip revision: ced4d9906bc8d32ac610097399fdfdf7a5e40a0a authored by Jussi Eloranta on 20 December 2021, 02:30:57 UTC
Update README
Tip revision: ced4d99
make.conf
#
# libgrid configuration. If you have compilation issues related to CUDA gcc, you may need to modify src/get_cc.sh .
#
# Compiler selection (only gcc supported):
# gcc          - GCC optimized
# gcc-debug    - GCC debug
#
COMPILER = gcc
#
# Data types: REAL = float, double or quad (quad = long double). INT = int or long (use long)
# NOTE: quad precision is not supported by GPUs (only float and double).
#
REAL = double
INT = long
#
# Install root
ROOT = /usr
#
# Use CUDA? (enables ?grid3d_cufft* routines). yes or no. Default to auto-detect.
# If you are on fedora, install cuda-gcc and cuda-gcc-c++. If not, the best bet is to replace cuda-gcc with gcc (gcc 8.x will not work)
# CUDA_TPB = threads per block per dimension (e.g., 8 will yield 8 * 8 * 8 = 512 threads per block). Should be a multiple of Warp size (32).
# CUDA_THRADJ = how to distribute 3-D threads to 2-D (multiplicative factor 2 - 3).
# CUDA_FASTMATH seems to have almost no effect, so better leave it as no.
#
ifndef CUDA
CUDA = $(shell /bin/cat $(ROOT)/include/grid/cuda.status)
endif
ifeq ($(CUDA),yes)
CUDAINC = /usr/include/cuda
CUDALIB = /usr/lib64
ifndef CUDAHOST_CC
CUDAHOST_CC = $(shell /bin/cat $(ROOT)/include/grid/cuda.hostcc)
endif
ifndef CUDA_ARCH
CUDA_ARCH = $(shell /bin/cat $(ROOT)/include/grid/cuda.arch)
endif
CUDA_FASTMATH = no
CUDA_TPB = 8
CUDA_THRADJ = 3
CUDA_DEBUG = no
endif
#
CFLAGS = -DHBAR=1.0 -I$(ROOT)/include
NVCFLAGS = -DHBAR=1.0 -g --optimize 3 -Xptxas -O3 -arch=$(CUDA_ARCH) -ccbin $(CUDAHOST_CC) --compiler-options -Wall -I$(ROOT)/include
LDFLAGS = -L$(ROOT)/lib

ifeq ($(COMPILER),gcc)
  CC = gcc
# gcc 8.3.1 bugs -Ofast disabled
#  CFLAGS += -std=gnu89 -g -Wall -Wconversion -march=native -mtune=native -Ofast -fopenmp
  CFLAGS += -std=gnu89 -g -Wall -Wconversion -march=native -mtune=native -O -fopenmp
  LDFLAGS += /usr/lib/libgrid.a -lm -lgomp
endif

ifeq ($(COMPILER),gcc-debug)
  CC = gcc
  CFLAGS += -std=gnu89 -g -Wall -Wconversion
  LDFLAGS += /usr/lib/libgrid.a -lm -lgomp
  NVCFLAGS += -g
endif

ifeq ($(REAL),float)
# -Wdouble-promotion
  DTYPE = -DREAL=float -DSINGLE_PREC -DCREAL=crealf -DCIMAG=cimagf -DCONJ=conjf -DPOW=powf -DSQRT=sqrtf -DCSQRT=csqrtf -DEXP=expf -DCEXP=cexpf -DLOG=logf -DCLOG=clogf -DCOS=cosf -DSIN=sinf -DTAN=tanf -DATAN2=atan2f -DFABS=fabsf -DCABS=cabsf -DACOS=acosf -DASIN=asinf -DATAN=atanf -DTANH=tanhf -DCOSH=coshf -DSINH=sinhf -DCPOW=cpowf -DCARG=cargf
  CFLAGS += -fsingle-precision-constant 
  LDFLAGS += -lfftw3f_omp -lfftw3f
else ifeq ($(REAL),double)
  DTYPE = -DREAL=double -DDOUBLE_PREC -DCREAL=creal -DCIMAG=cimag -DCONJ=conj -DPOW=pow -DSQRT=sqrt -DCSQRT=csqrt -DEXP=exp -DCEXP=cexp -DLOG=log -DCLOG=clog -DCOS=cos -DSIN=sin -DTAN=tan -DATAN2=atan2 -DFABS=fabs -DCABS=cabs -DACOS=acos -DASIN=asin -DATAN=atan -DTANH=tanh -DCOSH=cosh -DSINH=sinh -DCPOW=cpow -DCARG=carg
  LDFLAGS += -lfftw3_omp -lfftw3
else ifeq ($(REAL),quad)
  ifeq (CUDA,yes)
    echo "Quad precision not supported with CUFFT."
    exit 1
  endif
  DTYPE = -DREAL="long double" -DQUAD_PREC -DCREAL=creall -DCIMAG=cimagl -DCONJ=conjl -DPOW=powl -DSQRT=sqrtl -DCSQRT=csqrtl -DEXP=expl -DCEXP=cexpl -DLOG=logl -DCLOG=clogl -DCOS=cosl -DSIN=sinl -DTAN=tanl -DATAN2=atan2l -DFABS=fabsl -DCABS=cabsl -DACOS=acosl -DASIN=asinl -DATAN=atanl -DTANH=tanhl -DCOSH=coshl -DSINH=sinhl -DCPOW=cpowl -DCARG=cargl
  LDFLAGS += -lfftw3l_omp -lfftw3l
else
  echo "Unknown floating point precision."
  exit 1
endif

ifeq ($(INT),int)
  DTYPE += -DINT=int -DSHORT_INT -DABS=abs
else
  DTYPE += -DINT=long -DLONG_INT -DABS=labs
endif
CFLAGS += $(DTYPE)

ifeq ($(CUDA),yes)
  CFLAGS += -I$(CUDAINC) -DUSE_CUDA -DCUDA_THREADS_PER_BLOCK=$(CUDA_TPB) -DCUDA_CN_THRADJ=$(CUDA_THRADJ) -Wall -DHBAR=1.0
  ifeq ($(CUDA_DEBUG),yes)
    CFLAGS += -DCUDA_DEBUG
  endif
  ifeq ($(COMPILER),gcc-debug)
    NVCFLAGS += -I/usr/include/cuda -g $(DTYPE) -DCUDA_THREADS_PER_BLOCK=$(CUDA_TPB) -DCUDA_CN_THRADJ=$(CUDA_THRADJ)
  else
    NVCFLAGS += -I/usr/include/cuda $(DTYPE) -DCUDA_THREADS_PER_BLOCK=$(CUDA_TPB) -DCUDA_CN_THRADJ=$(CUDA_THRADJ)
  endif
  ifeq ($(CUDA_FASTMATH),yes)
    NVCFLAGS += --ftz=true --prec-div=false --prec-sqrt=false --fmad=true
  else
    NVCFLAGS += --ftz=false --prec-div=true --prec-sqrt=true --fmad=true
  endif
  LDFLAGS += -L$(CUDALIB) -lcufft -lcurand -lcuda -lcudart -lstdc++
endif
back to top