From 47a9063e270bc6c8bc766560b1c304f1ba8bf9c4 Mon Sep 17 00:00:00 2001 From: psychocrypt Date: Sun, 24 Sep 2017 21:23:33 +0200 Subject: add CMake flags to disable backends - add `CUDA_ENABLE` - add `OpenCL_ENABLE` --- CMakeLists.txt | 166 ++++++++++++++++++++++++++++++--------------------------- 1 file changed, 88 insertions(+), 78 deletions(-) (limited to 'CMakeLists.txt') diff --git a/CMakeLists.txt b/CMakeLists.txt index 3412b3c..e9cbddd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -43,99 +43,104 @@ option(CMAKE_LINK_STATIC "link as much as possible libraries static" OFF) #option(CUDA_USE_STATIC_CUDA_RUNTIME "Use the static version of the CUDA runtime library if available" OFF) #set(CUDA_USE_STATIC_CUDA_RUNTIME OFF CACHE BOOL "Use the static version of the CUDA runtime library if available" FORCE) -find_package(CUDA 7.5 QUIET) +option(CUDA_ENABLE "Enable or disable NVIDIA CUDA support" ON) +if(CUDA_ENABLE) + find_package(CUDA 7.5 QUIET) -if(CUDA_FOUND) - - option(XMR-STAK_LARGEGRID "Support large CUDA block count > 128" ON) - if(XMR-STAK_LARGEGRID) - add_definitions("-DXMR_STAK_LARGEGRID=${XMR-STAK_LARGEGRID}") - endif() - - set(DEVICE_COMPILER "nvcc") - set(CUDA_COMPILER "${DEVICE_COMPILER}" CACHE STRING "Select the device compiler") + if(CUDA_FOUND) - if(${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang") - list(APPEND DEVICE_COMPILER "clang") - endif() + option(XMR-STAK_LARGEGRID "Support large CUDA block count > 128" ON) + if(XMR-STAK_LARGEGRID) + add_definitions("-DXMR_STAK_LARGEGRID=${XMR-STAK_LARGEGRID}") + endif() - set_property(CACHE CUDA_COMPILER PROPERTY STRINGS "${DEVICE_COMPILER}") + set(DEVICE_COMPILER "nvcc") + set(CUDA_COMPILER "${DEVICE_COMPILER}" CACHE STRING "Select the device compiler") - set(XMR-STAK_THREADS 0 CACHE STRING "Set maximum number of threads (for compile time optimization)") - if(NOT XMR-STAK_THREADS EQUAL 0) - message(STATUS "xmr-stak-nvidia: set max threads per block to ${XMR-STAK_THREADS}") - add_definitions("-DXMR_STAK_THREADS=${XMR-STAK_THREADS}") - endif() + if(${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang") + list(APPEND DEVICE_COMPILER "clang") + endif() - set(CUDA_ARCH "20;30;35;37;50;52;60;61;62" CACHE STRING "Set GPU architecture (semicolon separated list, e.g. '-DCUDA_ARCH=20;35;60')") + set_property(CACHE CUDA_COMPILER PROPERTY STRINGS "${DEVICE_COMPILER}") - # validate architectures (only numbers are allowed) - foreach(CUDA_ARCH_ELEM ${CUDA_ARCH}) - string(REGEX MATCH "^[0-9]+$" IS_NUMBER ${CUDA_ARCH}) - if(NOT IS_NUMBER) - message(FATAL_ERROR "Defined compute architecture '${CUDA_ARCH_ELEM}' in " - "'${CUDA_ARCH}' is not an integral number, use e.g. '30' (for compute architecture 3.0).") + set(XMR-STAK_THREADS 0 CACHE STRING "Set maximum number of threads (for compile time optimization)") + if(NOT XMR-STAK_THREADS EQUAL 0) + message(STATUS "xmr-stak-nvidia: set max threads per block to ${XMR-STAK_THREADS}") + add_definitions("-DXMR_STAK_THREADS=${XMR-STAK_THREADS}") endif() - unset(IS_NUMBER) - if(${CUDA_ARCH_ELEM} LESS 20) - message(FATAL_ERROR "Unsupported CUDA architecture '${CUDA_ARCH_ELEM}' specified. " - "Use '20' (for compute architecture 2.0) or higher.") - endif() - endforeach() + set(CUDA_ARCH "20;30;35;37;50;52;60;61;62" CACHE STRING "Set GPU architecture (semicolon separated list, e.g. '-DCUDA_ARCH=20;35;60')") - option(CUDA_SHOW_REGISTER "Show registers used for each kernel and compute architecture" OFF) - option(CUDA_KEEP_FILES "Keep all intermediate files that are generated during internal compilation steps" OFF) + # validate architectures (only numbers are allowed) + foreach(CUDA_ARCH_ELEM ${CUDA_ARCH}) + string(REGEX MATCH "^[0-9]+$" IS_NUMBER ${CUDA_ARCH}) + if(NOT IS_NUMBER) + message(FATAL_ERROR "Defined compute architecture '${CUDA_ARCH_ELEM}' in " + "'${CUDA_ARCH}' is not an integral number, use e.g. '30' (for compute architecture 3.0).") + endif() + unset(IS_NUMBER) + + if(${CUDA_ARCH_ELEM} LESS 20) + message(FATAL_ERROR "Unsupported CUDA architecture '${CUDA_ARCH_ELEM}' specified. " + "Use '20' (for compute architecture 2.0) or higher.") + endif() + endforeach() - if("${CUDA_COMPILER}" STREQUAL "clang") - set(CLANG_BUILD_FLAGS "-O3 -x cuda --cuda-path=${CUDA_TOOLKIT_ROOT_DIR}") - # activation usage of FMA - set(CLANG_BUILD_FLAGS "${CLANG_BUILD_FLAGS} -ffp-contract=fast") + option(CUDA_SHOW_REGISTER "Show registers used for each kernel and compute architecture" OFF) + option(CUDA_KEEP_FILES "Keep all intermediate files that are generated during internal compilation steps" OFF) - if(CUDA_SHOW_REGISTER) - set(CLANG_BUILD_FLAGS "${CLANG_BUILD_FLAGS} -Xcuda-ptxas -v") - endif(CUDA_SHOW_REGISTER) + if("${CUDA_COMPILER}" STREQUAL "clang") + set(CLANG_BUILD_FLAGS "-O3 -x cuda --cuda-path=${CUDA_TOOLKIT_ROOT_DIR}") + # activation usage of FMA + set(CLANG_BUILD_FLAGS "${CLANG_BUILD_FLAGS} -ffp-contract=fast") - if(CUDA_KEEP_FILES) - set(CLANG_BUILD_FLAGS "${CLANG_BUILD_FLAGS} -save-temps=${PROJECT_BINARY_DIR}") - endif(CUDA_KEEP_FILES) + if(CUDA_SHOW_REGISTER) + set(CLANG_BUILD_FLAGS "${CLANG_BUILD_FLAGS} -Xcuda-ptxas -v") + endif(CUDA_SHOW_REGISTER) - foreach(CUDA_ARCH_ELEM ${CUDA_ARCH}) - # set flags to create device code for the given architectures - set(CLANG_BUILD_FLAGS "${CLANG_BUILD_FLAGS} --cuda-gpu-arch=sm_${CUDA_ARCH_ELEM}") - endforeach() + if(CUDA_KEEP_FILES) + set(CLANG_BUILD_FLAGS "${CLANG_BUILD_FLAGS} -save-temps=${PROJECT_BINARY_DIR}") + endif(CUDA_KEEP_FILES) - elseif("${CUDA_COMPILER}" STREQUAL "nvcc") - # avoid that nvcc in CUDA < 8 tries to use libc `memcpy` within the kernel - if(CUDA_VERSION VERSION_LESS 8.0) - add_definitions(-D_FORCE_INLINES) - endif() - foreach(CUDA_ARCH_ELEM ${CUDA_ARCH}) - # set flags to create device code for the given architecture - set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} - "--generate-code arch=compute_${CUDA_ARCH_ELEM},code=sm_${CUDA_ARCH_ELEM} --generate-code arch=compute_${CUDA_ARCH_ELEM},code=compute_${CUDA_ARCH_ELEM}") - endforeach() + foreach(CUDA_ARCH_ELEM ${CUDA_ARCH}) + # set flags to create device code for the given architectures + set(CLANG_BUILD_FLAGS "${CLANG_BUILD_FLAGS} --cuda-gpu-arch=sm_${CUDA_ARCH_ELEM}") + endforeach() + + elseif("${CUDA_COMPILER}" STREQUAL "nvcc") + # avoid that nvcc in CUDA < 8 tries to use libc `memcpy` within the kernel + if(CUDA_VERSION VERSION_LESS 8.0) + add_definitions(-D_FORCE_INLINES) + endif() + foreach(CUDA_ARCH_ELEM ${CUDA_ARCH}) + # set flags to create device code for the given architecture + set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} + "--generate-code arch=compute_${CUDA_ARCH_ELEM},code=sm_${CUDA_ARCH_ELEM} --generate-code arch=compute_${CUDA_ARCH_ELEM},code=compute_${CUDA_ARCH_ELEM}") + endforeach() - # give each thread an independent default stream - set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} --default-stream per-thread") + # give each thread an independent default stream + set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} --default-stream per-thread") - option(CUDA_SHOW_CODELINES "Show kernel lines in cuda-gdb and cuda-memcheck" OFF) + option(CUDA_SHOW_CODELINES "Show kernel lines in cuda-gdb and cuda-memcheck" OFF) - if(CUDA_SHOW_CODELINES) - set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS}" --source-in-ptx -lineinfo) - set(CUDA_KEEP_FILES ON CACHE BOOL "activate keep files" FORCE) - endif(CUDA_SHOW_CODELINES) + if(CUDA_SHOW_CODELINES) + set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS}" --source-in-ptx -lineinfo) + set(CUDA_KEEP_FILES ON CACHE BOOL "activate keep files" FORCE) + endif(CUDA_SHOW_CODELINES) - if(CUDA_SHOW_REGISTER) - set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS}" -Xptxas=-v) - endif(CUDA_SHOW_REGISTER) + if(CUDA_SHOW_REGISTER) + set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS}" -Xptxas=-v) + endif(CUDA_SHOW_REGISTER) - if(CUDA_KEEP_FILES) - set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS}" --keep --keep-dir "${PROJECT_BINARY_DIR}") - endif(CUDA_KEEP_FILES) + if(CUDA_KEEP_FILES) + set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS}" --keep --keep-dir "${PROJECT_BINARY_DIR}") + endif(CUDA_KEEP_FILES) + else() + message(FATAL_ERROR "selected CUDA compiler '${CUDA_COMPILER}' is not supported") + endif() else() - message(FATAL_ERROR "selected CUDA compiler '${CUDA_COMPILER}' is not supported") + message(FATAL_ERROR "CUDA NOT found: use `-DCUDA_ENABLE=OFF` to build without NVIDIA GPU support") endif() else() add_definitions("-DCONF_NO_CUDA") @@ -150,12 +155,17 @@ list(APPEND CMAKE_PREFIX_PATH "$ENV{CMAKE_PREFIX_PATH}") # Find OpenCL ############################################################################### -find_package(OpenCL) -include_directories(SYSTEM ${OpenCL_INCLUDE_DIRS}) -#set(LIBS ${LIBS} ${OpenCL_LIBRARY}) -link_directories(${OpenCL_LIBRARY}) - -if(NOT OpenCL_FOUND) +option(OpenCL_ENABLE "Enable or disable the requirement of hwloc" ON) +if(OpenCL_ENABLE) + find_package(OpenCL QUIET) + if(OpenCL_FOUND) + include_directories(SYSTEM ${OpenCL_INCLUDE_DIRS}) + #set(LIBS ${LIBS} ${OpenCL_LIBRARY}) + link_directories(${OpenCL_LIBRARY}) + else() + message(FATAL_ERROR "OpenCL NOT found: use `-DOpenCL_ENABLE=OFF` to build without OpenCL support for AMD gpu's") + endif() +else() add_definitions("-DCONF_NO_OPENCL") endif() -- cgit v1.1