https://github.com/N-BodyShop/changa
Raw File
Tip revision: 58c1aa31c2f0a7cbaf92160410dabaeabc15dd1d authored by Tom Quinn on 18 November 2018, 15:38:00 UTC
Cleaned up prefetch requests.
Tip revision: 58c1aa3
cuda.mk.in
# optional CUDA flags: 
# memory:
# -DHAPI_USE_CUDAMALLOCHOST
# -DHAPI_MEMPOOL
#
# verbosity, debugging:
# -DCUDA_DM_PRINT_TREES
# -DCUDA_PRINT_TRANSFERRED_INTERACTIONS
# -DCUDA_PRINT_TRANSFER_BACK_PARTICLES
# -DCUDA_NOTIFY_DATA_TRANSFER_DONE
# -DCUDA_VERBOSE_KERNEL_ENQUEUE
# -DCUDA_NO_KERNELS
# -DCUDA_NO_ACC_UPDATES
# -DCUDA_UNIT_TEST
#
# print errors returned by CUDA calls:
# -DCUDA_PRINT_ERRORS
#
# for performance monitoring via projections/stats
# -DCUDA_STATS
# -DHAPI_TRACE
# -DHAPI_INSTRUMENT_WRS: to instrument time taken for each phase of a request. 
#                        prints average transfer, kernel and cleanup times for
#                        various kinds of request.
###############################################################################

# Turn on if outer variable is set. Otherwise, allow user to set explicitly.
CUDA_DEBUG   = $(DEBUG)
CUDA_VERBOSE = $(VERBOSE)

cuda_defines += -DCUDA -DSPCUDA
cuda_defines += -DHAPI_MEMPOOL -DHAPI_USE_CUDAMALLOCHOST
cuda_defines += -DCUDA_2D_TB_KERNEL
#cuda_defines += -DCUDA_2D_FLAT

ifeq (1,$(CUDA_DEBUG))
	cuda_defines += -DCUDA_VERBOSE_KERNEL_ENQUEUE
endif

ifeq (yes,@PROJECTIONS@)
	cuda_defines += -DHAPI_TRACE
endif

cuda_includes := -I@CUDA_DIR@/include -I@CHARM_PATH@/tmp/hybridAPI
cuda_includes += -I@CHARM_PATH@/tmp/hybridAPI

cuda_srcs     := $(source_dir)/HostCUDA.cu
cuda_libs     += -lcuda -lcudart
cuda_ldflags  += -L@CUDA_DIR@/lib64
cuda_objs     := $(patsubst %.cu,%.o,$(subst $(source_dir),$(build_dir),$(cuda_srcs)))

# For more details on the architectures, see
# http://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html#virtual-architecture-feature-list
nvcc_flags += -arch=compute_@CUDA_LEVEL@ -code=sm_@CUDA_LEVEL@

# nvcc includes are broken when the host compiler is gcc >= 5.0
nvcc_flags += -D_FORCE_INLINES

# Do the local treewalk on the GPU
nvcc_flags += @FLAG_GPU_LOCAL_TREE_WALK@

ifeq (1,$(CUDA_VERBOSE))
	nvcc_flags += --ptxas-options=-v
endif

ifeq (1,$(CUDA_DEBUG))
	nvcc_flags += --device-debug
else
	nvcc_flags += -use_fast_math
endif

.PHONY: show-config
show-config: cuda-show-config

.PHONY: cuda-show-config
cuda-show-config:
	@ echo NVCC_FLAGS = $(nvcc_flags)"\n"
	@ echo CUDA_DEFINES = $(cuda_defines)"\n"
	@ echo CUDA_INCLUDES = $(cuda_includes)"\n"
	@ echo CUDA_LDFLAGS = $(cuda_ldflags)"\n"
	@ echo CUDA_LIBS = $(cuda_libs)"\n"

.PHONY: dist-clean
dist-clean: cuda-dist-clean

.PHONY: cuda-dist-clean
cuda-dist-clean:
	@ $(RM) cuda.mk

$(build_dir)/%.o: $(source_dir)/%.cu
	@ echo Compiling $<...
	$(quiet) @NVCC_PATH@ -Xcompiler "$(cxx_flags) $(depend_flags) $(depend_dir)/$*$(depend_suffix)" $(nvcc_flags) -c $< -o $@
back to top