diff options
Diffstat (limited to 'xmrstak/backend/amd/amd_gpu/gpu.cpp')
-rw-r--r-- | xmrstak/backend/amd/amd_gpu/gpu.cpp | 263 |
1 files changed, 225 insertions, 38 deletions
diff --git a/xmrstak/backend/amd/amd_gpu/gpu.cpp b/xmrstak/backend/amd/amd_gpu/gpu.cpp index d9bc962..79afa00 100644 --- a/xmrstak/backend/amd/amd_gpu/gpu.cpp +++ b/xmrstak/backend/amd/amd_gpu/gpu.cpp @@ -15,6 +15,7 @@ #include "xmrstak/backend/cryptonight.hpp" #include "xmrstak/jconf.hpp" +#include "xmrstak/picosha2/picosha2.hpp" #include <stdio.h> #include <string.h> @@ -25,8 +26,41 @@ #include <regex> #include <cassert> +#include <fstream> +#include <sstream> +#include <vector> +#include <string> +#include <iostream> + +#if defined _MSC_VER +#include <direct.h> +#elif defined __GNUC__ +#include <sys/types.h> +#include <sys/stat.h> +#endif + + + #ifdef _WIN32 #include <windows.h> +#include <Shlobj.h> + +static inline void create_directory(std::string dirname) +{ + _mkdir(dirname.data()); +} + +static inline std::string get_home() +{ + char path[MAX_PATH + 1]; + // get folder "appdata\local" + if (SHGetSpecialFolderPathA(HWND_DESKTOP, path, CSIDL_LOCAL_APPDATA, FALSE)) + { + return path; + } + else + return "."; +} static inline void port_sleep(size_t sec) { @@ -34,6 +68,22 @@ static inline void port_sleep(size_t sec) } #else #include <unistd.h> +#include <pwd.h> + +static inline void create_directory(std::string dirname) +{ + mkdir(dirname.data(), 0744); +} + +static inline std::string get_home() +{ + const char *home = "."; + + if ((home = getenv("HOME")) == nullptr) + home = getpwuid(getuid())->pw_dir; + + return home; +} static inline void port_sleep(size_t sec) { @@ -84,6 +134,7 @@ const char* err_to_str(cl_int ret) return "CL_MISALIGNED_SUB_BUFFER_OFFSET"; case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST: return "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST"; +#ifdef CL_VERSION_1_2 case CL_COMPILE_PROGRAM_FAILURE: return "CL_COMPILE_PROGRAM_FAILURE"; case CL_LINKER_NOT_AVAILABLE: @@ -94,6 +145,7 @@ const char* err_to_str(cl_int ret) return "CL_DEVICE_PARTITION_FAILED"; case CL_KERNEL_ARG_INFO_NOT_AVAILABLE: return "CL_KERNEL_ARG_INFO_NOT_AVAILABLE"; +#endif case CL_INVALID_VALUE: return "CL_INVALID_VALUE"; case CL_INVALID_DEVICE_TYPE: @@ -164,6 +216,7 @@ const char* err_to_str(cl_int ret) return "CL_INVALID_GLOBAL_WORK_SIZE"; case CL_INVALID_PROPERTY: return "CL_INVALID_PROPERTY"; +#ifdef CL_VERSION_1_2 case CL_INVALID_IMAGE_DESCRIPTOR: return "CL_INVALID_IMAGE_DESCRIPTOR"; case CL_INVALID_COMPILER_OPTIONS: @@ -172,6 +225,7 @@ const char* err_to_str(cl_int ret) return "CL_INVALID_LINKER_OPTIONS"; case CL_INVALID_DEVICE_PARTITION_COUNT: return "CL_INVALID_DEVICE_PARTITION_COUNT"; +#endif #if defined(CL_VERSION_2_0) && !defined(CONF_ENFORCE_OpenCL_1_2) case CL_INVALID_PIPE_SIZE: return "CL_INVALID_PIPE_SIZE"; @@ -323,57 +377,157 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_ return ERR_OCL_API; } - ctx->Program = clCreateProgramWithSource(opencl_ctx, 1, (const char**)&source_code, NULL, &ret); - if(ret != CL_SUCCESS) + std::vector<char> devNameVec(1024); + if((ret = clGetDeviceInfo(ctx->DeviceID, CL_DEVICE_NAME, devNameVec.size(), devNameVec.data(), NULL)) != CL_SUCCESS) { - printer::inst()->print_msg(L1,"Error %s when calling clCreateProgramWithSource on the contents of cryptonight.cl", err_to_str(ret)); + printer::inst()->print_msg(L1,"WARNING: %s when calling clGetDeviceInfo to get CL_DEVICE_NAME for device %u.", err_to_str(ret),ctx->deviceIdx ); return ERR_OCL_API; } char options[256]; - snprintf(options, sizeof(options), - "-DITERATIONS=%d -DMASK=%d -DWORKSIZE=%llu -DSTRIDED_INDEX=%d", - hasIterations, threadMemMask, int_port(ctx->workSize), ctx->stridedIndex ? 1 : 0); - ret = clBuildProgram(ctx->Program, 1, &ctx->DeviceID, options, NULL, NULL); - if(ret != CL_SUCCESS) + snprintf(options, sizeof(options), + "-DITERATIONS=%d -DMASK=%d -DWORKSIZE=%llu -DSTRIDED_INDEX=%d -DMEM_CHUNK_EXPONENT=%d -DCOMP_MODE=%d", + hasIterations, threadMemMask, int_port(ctx->workSize), ctx->stridedIndex, int(1u<<ctx->memChunk), ctx->compMode ? 1 : 0); + + /* create a hash for the compile time cache + * used data: + * - source code + * - device name + * - compile paramater + */ + std::string src_str(source_code); + src_str += options; + src_str += devNameVec.data(); + std::string hash_hex_str; + picosha2::hash256_hex_string(src_str, hash_hex_str); + + std::string cache_file = get_home() + "/.openclcache/" + hash_hex_str + ".openclbin"; + std::ifstream clBinFile(cache_file, std::ofstream::in | std::ofstream::binary); + if(!clBinFile.good()) { - size_t len; - printer::inst()->print_msg(L1,"Error %s when calling clBuildProgram.", err_to_str(ret)); + printer::inst()->print_msg(L1,"WARNING: OpenCL device %u - OpenCL binary %s not found.",ctx->deviceIdx, cache_file.c_str()); + ctx->Program = clCreateProgramWithSource(opencl_ctx, 1, (const char**)&source_code, NULL, &ret); + if(ret != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clCreateProgramWithSource on the OpenCL miner code", err_to_str(ret)); + return ERR_OCL_API; + } - if((ret = clGetProgramBuildInfo(ctx->Program, ctx->DeviceID, CL_PROGRAM_BUILD_LOG, 0, NULL, &len)) != CL_SUCCESS) + ret = clBuildProgram(ctx->Program, 1, &ctx->DeviceID, options, NULL, NULL); + if(ret != CL_SUCCESS) { - printer::inst()->print_msg(L1,"Error %s when calling clGetProgramBuildInfo for length of build log output.", err_to_str(ret)); + size_t len; + printer::inst()->print_msg(L1,"Error %s when calling clBuildProgram.", err_to_str(ret)); + + if((ret = clGetProgramBuildInfo(ctx->Program, ctx->DeviceID, CL_PROGRAM_BUILD_LOG, 0, NULL, &len)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clGetProgramBuildInfo for length of build log output.", err_to_str(ret)); + return ERR_OCL_API; + } + + char* BuildLog = (char*)malloc(len + 1); + BuildLog[0] = '\0'; + + if((ret = clGetProgramBuildInfo(ctx->Program, ctx->DeviceID, CL_PROGRAM_BUILD_LOG, len, BuildLog, NULL)) != CL_SUCCESS) + { + free(BuildLog); + printer::inst()->print_msg(L1,"Error %s when calling clGetProgramBuildInfo for build log.", err_to_str(ret)); + return ERR_OCL_API; + } + + printer::inst()->print_str("Build log:\n"); + std::cerr<<BuildLog<<std::endl; + + free(BuildLog); return ERR_OCL_API; } - char* BuildLog = (char*)malloc(len + 1); - BuildLog[0] = '\0'; + cl_uint num_devices; + clGetProgramInfo(ctx->Program, CL_PROGRAM_NUM_DEVICES, sizeof(cl_uint), &num_devices,NULL); - if((ret = clGetProgramBuildInfo(ctx->Program, ctx->DeviceID, CL_PROGRAM_BUILD_LOG, len, BuildLog, NULL)) != CL_SUCCESS) + + std::vector<cl_device_id> devices_ids(num_devices); + clGetProgramInfo(ctx->Program, CL_PROGRAM_DEVICES, sizeof(cl_device_id)* devices_ids.size(), devices_ids.data(),NULL); + int dev_id = 0; + /* Search for the gpu within the program context. + * The id can be different to ctx->DeviceID. + */ + for(auto & ocl_device : devices_ids) { - free(BuildLog); - printer::inst()->print_msg(L1,"Error %s when calling clGetProgramBuildInfo for build log.", err_to_str(ret)); + if(ocl_device == ctx->DeviceID) + break; + dev_id++; + } + + cl_build_status status; + do + { + if((ret = clGetProgramBuildInfo(ctx->Program, ctx->DeviceID, CL_PROGRAM_BUILD_STATUS, sizeof(cl_build_status), &status, NULL)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clGetProgramBuildInfo for status of build.", err_to_str(ret)); + return ERR_OCL_API; + } + port_sleep(1); + } + while(status == CL_BUILD_IN_PROGRESS); + + std::vector<size_t> binary_sizes(num_devices); + clGetProgramInfo (ctx->Program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t) * binary_sizes.size(), binary_sizes.data(), NULL); + + std::vector<char*> all_programs(num_devices); + std::vector<std::vector<char>> program_storage; + + int p_id = 0; + size_t mem_size = 0; + // create memory structure to query all OpenCL program binaries + for(auto & p : all_programs) + { + program_storage.emplace_back(std::vector<char>(binary_sizes[p_id])); + all_programs[p_id] = program_storage[p_id].data(); + mem_size += binary_sizes[p_id]; + p_id++; + } + + if( ret = clGetProgramInfo(ctx->Program, CL_PROGRAM_BINARIES, num_devices * sizeof(char*), all_programs.data(),NULL) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clGetProgramInfo.", err_to_str(ret)); return ERR_OCL_API; } - - printer::inst()->print_str("Build log:\n"); - std::cerr<<BuildLog<<std::endl; - free(BuildLog); - return ERR_OCL_API; + std::ofstream file_stream; + std::cout<<get_home() + "/.openclcache/" + hash_hex_str + ".openclbin"<<std::endl; + file_stream.open(cache_file, std::ofstream::out | std::ofstream::binary); + file_stream.write(all_programs[dev_id], binary_sizes[dev_id]); + file_stream.close(); + printer::inst()->print_msg(L1, "OpenCL device %u - OpenCL binary file stored in file %s.",ctx->deviceIdx, cache_file.c_str()); } - - cl_build_status status; - do + else { - if((ret = clGetProgramBuildInfo(ctx->Program, ctx->DeviceID, CL_PROGRAM_BUILD_STATUS, sizeof(cl_build_status), &status, NULL)) != CL_SUCCESS) + printer::inst()->print_msg(L1, "OpenCL device %u - Load OpenCL binary file %s",ctx->deviceIdx, cache_file.c_str()); + std::ostringstream ss; + ss << clBinFile.rdbuf(); + std::string s = ss.str(); + + size_t bin_size = s.size(); + auto data_ptr = s.data(); + + cl_int clStatus; + ctx->Program = clCreateProgramWithBinary( + opencl_ctx, 1, &ctx->DeviceID, &bin_size, + (const unsigned char **)&data_ptr, &clStatus, &ret + ); + if(ret != CL_SUCCESS) { - printer::inst()->print_msg(L1,"Error %s when calling clGetProgramBuildInfo for status of build.", err_to_str(ret)); + printer::inst()->print_msg(L1,"Error %s when calling clCreateProgramWithBinary. Try to delete file %s", err_to_str(ret), cache_file.c_str()); + return ERR_OCL_API; + } + ret = clBuildProgram(ctx->Program, 1, &ctx->DeviceID, NULL, NULL, NULL); + if(ret != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clBuildProgram. Try to delete file %s", err_to_str(ret), cache_file.c_str()); return ERR_OCL_API; } - port_sleep(1); } - while(status == CL_BUILD_IN_PROGRESS); const char *KernelNames[] = { "cn0", "cn1", "cn2", "Blake", "Groestl", "JH", "Skein" }; for(int i = 0; i < 7; ++i) @@ -487,7 +641,7 @@ std::vector<GpuContext> getAMDDevices(int index) printer::inst()->print_msg(L1,"WARNING: %s when calling clGetDeviceInfo to get the device vendor name for device %u.", err_to_str(clStatus), k); continue; } - + std::string devVendor(devVendorVec.data()); if( devVendor.find("Advanced Micro Devices") != std::string::npos || devVendor.find("AMD") != std::string::npos) { @@ -518,13 +672,13 @@ std::vector<GpuContext> getAMDDevices(int index) printer::inst()->print_msg(L1,"WARNING: %s when calling clGetDeviceInfo to get CL_DEVICE_NAME for device %u.", err_to_str(clStatus), k); continue; } - printer::inst()->print_msg(L0,"Found OpenCL GPU %s.",ctx.name.c_str()); // if environment variable GPU_SINGLE_ALLOC_PERCENT is not set we can not allocate the full memory ctx.deviceIdx = k; ctx.freeMem = std::min(ctx.freeMem, maxMem); ctx.name = std::string(devNameVec.data()); ctx.DeviceID = device_list[k]; + printer::inst()->print_msg(L0,"Found OpenCL GPU %s.",ctx.name.c_str()); ctxVec.push_back(ctx); } } @@ -549,6 +703,8 @@ int getAMDPlatformIdx() clStatus = clGetPlatformIDs(numPlatforms, platforms, NULL); int platformIndex = -1; + // Mesa OpenCL is the fallback if no AMD or Apple OpenCL is found + int mesaPlatform = -1; if(clStatus == CL_SUCCESS) { @@ -559,13 +715,29 @@ int getAMDPlatformIdx() clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, infoSize, platformNameVec.data(), NULL); std::string platformName(platformNameVec.data()); - if( platformName.find("Advanced Micro Devices") != std::string::npos || platformName.find("Apple") != std::string::npos) + if( platformName.find("Advanced Micro Devices") != std::string::npos || + platformName.find("Apple") != std::string::npos || + platformName.find("Mesa") != std::string::npos + ) { - platformIndex = i; + printer::inst()->print_msg(L0,"Found AMD platform index id = %i, name = %s",i , platformName.c_str()); - break; + if(platformName.find("Mesa") != std::string::npos) + mesaPlatform = i; + else + { + // exit if AMD or Apple platform is found + platformIndex = i; + break; + } } } + // fall back to Mesa OpenCL + if(platformIndex == -1 && mesaPlatform != -1) + { + printer::inst()->print_msg(L0,"No AMD platform found select Mesa as OpenCL platform"); + platformIndex = mesaPlatform; + } } else printer::inst()->print_msg(L1,"WARNING: %s when calling clGetPlatformIDs for platform information.", err_to_str(clStatus)); @@ -694,8 +866,18 @@ size_t InitOpenCL(GpuContext* ctx, size_t num_gpus, size_t platform_idx) source_code = std::regex_replace(source_code, std::regex("XMRSTAK_INCLUDE_BLAKE256"), blake256CL); source_code = std::regex_replace(source_code, std::regex("XMRSTAK_INCLUDE_GROESTL256"), groestl256CL); + // create a directory for the OpenCL compile cache + create_directory(get_home() + "/.openclcache"); + for(int i = 0; i < num_gpus; ++i) { + if(ctx[i].stridedIndex == 2 && (ctx[i].rawIntensity % ctx[i].workSize) != 0) + { + size_t reduced_intensity = (ctx[i].rawIntensity / ctx[i].workSize) * ctx[i].workSize; + ctx[i].rawIntensity = reduced_intensity; + printer::inst()->print_msg(L0, "WARNING AMD: gpu %d intensity is not a multiple of 'worksize', auto reduce intensity to %d", ctx[i].deviceIdx, int(reduced_intensity)); + } + if((ret = InitOpenCLGpu(opencl_ctx, &ctx[i], source_code.c_str())) != ERR_SUCCESS) { return ret; @@ -866,10 +1048,15 @@ size_t XMRRunJob(GpuContext* ctx, cl_uint* HashOutput) size_t g_intensity = ctx->rawIntensity; size_t w_size = ctx->workSize; - // round up to next multiple of w_size - size_t g_thd = ((g_intensity + w_size - 1u) / w_size) * w_size; - // number of global threads must be a multiple of the work group size (w_size) - assert(g_thd%w_size == 0); + size_t g_thd = g_intensity; + + if(ctx->compMode) + { + // round up to next multiple of w_size + size_t g_thd = ((g_intensity + w_size - 1u) / w_size) * w_size; + // number of global threads must be a multiple of the work group size (w_size) + assert(g_thd%w_size == 0); + } for(int i = 2; i < 6; ++i) { |