diff options
Diffstat (limited to 'xmrstak')
126 files changed, 33127 insertions, 0 deletions
diff --git a/xmrstak/backend/amd/amd_gpu/gpu.cpp b/xmrstak/backend/amd/amd_gpu/gpu.cpp new file mode 100644 index 0000000..04e442a --- /dev/null +++ b/xmrstak/backend/amd/amd_gpu/gpu.cpp @@ -0,0 +1,889 @@ +/* + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <stdio.h> +#include <string.h> +#include <math.h> +#include <iostream> +#include <vector> +#include <algorithm> +#include <regex> + +#ifdef _WIN32 +#include <windows.h> +const char* sSourcePath = "opencl\\cryptonight.cl"; + +static inline void port_sleep(size_t sec) +{ + Sleep(sec * 1000); +} +#else +#include <unistd.h> +const char* sSourcePath = "opencl/cryptonight.cl"; + +static inline void port_sleep(size_t sec) +{ + sleep(sec); +} +#endif // _WIN32 + +#if 0 +static inline long long unsigned int int_port(size_t i) +{ + return i; +} +#endif + +#include "gpu.h" + +const char* err_to_str(cl_int ret) +{ + switch(ret) + { + case CL_SUCCESS: + return "CL_SUCCESS"; + case CL_DEVICE_NOT_FOUND: + return "CL_DEVICE_NOT_FOUND"; + case CL_DEVICE_NOT_AVAILABLE: + return "CL_DEVICE_NOT_AVAILABLE"; + case CL_COMPILER_NOT_AVAILABLE: + return "CL_COMPILER_NOT_AVAILABLE"; + case CL_MEM_OBJECT_ALLOCATION_FAILURE: + return "CL_MEM_OBJECT_ALLOCATION_FAILURE"; + case CL_OUT_OF_RESOURCES: + return "CL_OUT_OF_RESOURCES"; + case CL_OUT_OF_HOST_MEMORY: + return "CL_OUT_OF_HOST_MEMORY"; + case CL_PROFILING_INFO_NOT_AVAILABLE: + return "CL_PROFILING_INFO_NOT_AVAILABLE"; + case CL_MEM_COPY_OVERLAP: + return "CL_MEM_COPY_OVERLAP"; + case CL_IMAGE_FORMAT_MISMATCH: + return "CL_IMAGE_FORMAT_MISMATCH"; + case CL_IMAGE_FORMAT_NOT_SUPPORTED: + return "CL_IMAGE_FORMAT_NOT_SUPPORTED"; + case CL_BUILD_PROGRAM_FAILURE: + return "CL_BUILD_PROGRAM_FAILURE"; + case CL_MAP_FAILURE: + return "CL_MAP_FAILURE"; + case CL_MISALIGNED_SUB_BUFFER_OFFSET: + return "CL_MISALIGNED_SUB_BUFFER_OFFSET"; + case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST: + return "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST"; + case CL_COMPILE_PROGRAM_FAILURE: + return "CL_COMPILE_PROGRAM_FAILURE"; + case CL_LINKER_NOT_AVAILABLE: + return "CL_LINKER_NOT_AVAILABLE"; + case CL_LINK_PROGRAM_FAILURE: + return "CL_LINK_PROGRAM_FAILURE"; + case CL_DEVICE_PARTITION_FAILED: + return "CL_DEVICE_PARTITION_FAILED"; + case CL_KERNEL_ARG_INFO_NOT_AVAILABLE: + return "CL_KERNEL_ARG_INFO_NOT_AVAILABLE"; + case CL_INVALID_VALUE: + return "CL_INVALID_VALUE"; + case CL_INVALID_DEVICE_TYPE: + return "CL_INVALID_DEVICE_TYPE"; + case CL_INVALID_PLATFORM: + return "CL_INVALID_PLATFORM"; + case CL_INVALID_DEVICE: + return "CL_INVALID_DEVICE"; + case CL_INVALID_CONTEXT: + return "CL_INVALID_CONTEXT"; + case CL_INVALID_QUEUE_PROPERTIES: + return "CL_INVALID_QUEUE_PROPERTIES"; + case CL_INVALID_COMMAND_QUEUE: + return "CL_INVALID_COMMAND_QUEUE"; + case CL_INVALID_HOST_PTR: + return "CL_INVALID_HOST_PTR"; + case CL_INVALID_MEM_OBJECT: + return "CL_INVALID_MEM_OBJECT"; + case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR: + return "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR"; + case CL_INVALID_IMAGE_SIZE: + return "CL_INVALID_IMAGE_SIZE"; + case CL_INVALID_SAMPLER: + return "CL_INVALID_SAMPLER"; + case CL_INVALID_BINARY: + return "CL_INVALID_BINARY"; + case CL_INVALID_BUILD_OPTIONS: + return "CL_INVALID_BUILD_OPTIONS"; + case CL_INVALID_PROGRAM: + return "CL_INVALID_PROGRAM"; + case CL_INVALID_PROGRAM_EXECUTABLE: + return "CL_INVALID_PROGRAM_EXECUTABLE"; + case CL_INVALID_KERNEL_NAME: + return "CL_INVALID_KERNEL_NAME"; + case CL_INVALID_KERNEL_DEFINITION: + return "CL_INVALID_KERNEL_DEFINITION"; + case CL_INVALID_KERNEL: + return "CL_INVALID_KERNEL"; + case CL_INVALID_ARG_INDEX: + return "CL_INVALID_ARG_INDEX"; + case CL_INVALID_ARG_VALUE: + return "CL_INVALID_ARG_VALUE"; + case CL_INVALID_ARG_SIZE: + return "CL_INVALID_ARG_SIZE"; + case CL_INVALID_KERNEL_ARGS: + return "CL_INVALID_KERNEL_ARGS"; + case CL_INVALID_WORK_DIMENSION: + return "CL_INVALID_WORK_DIMENSION"; + case CL_INVALID_WORK_GROUP_SIZE: + return "CL_INVALID_WORK_GROUP_SIZE"; + case CL_INVALID_WORK_ITEM_SIZE: + return "CL_INVALID_WORK_ITEM_SIZE"; + case CL_INVALID_GLOBAL_OFFSET: + return "CL_INVALID_GLOBAL_OFFSET"; + case CL_INVALID_EVENT_WAIT_LIST: + return "CL_INVALID_EVENT_WAIT_LIST"; + case CL_INVALID_EVENT: + return "CL_INVALID_EVENT"; + case CL_INVALID_OPERATION: + return "CL_INVALID_OPERATION"; + case CL_INVALID_GL_OBJECT: + return "CL_INVALID_GL_OBJECT"; + case CL_INVALID_BUFFER_SIZE: + return "CL_INVALID_BUFFER_SIZE"; + case CL_INVALID_MIP_LEVEL: + return "CL_INVALID_MIP_LEVEL"; + case CL_INVALID_GLOBAL_WORK_SIZE: + return "CL_INVALID_GLOBAL_WORK_SIZE"; + case CL_INVALID_PROPERTY: + return "CL_INVALID_PROPERTY"; + case CL_INVALID_IMAGE_DESCRIPTOR: + return "CL_INVALID_IMAGE_DESCRIPTOR"; + case CL_INVALID_COMPILER_OPTIONS: + return "CL_INVALID_COMPILER_OPTIONS"; + case CL_INVALID_LINKER_OPTIONS: + return "CL_INVALID_LINKER_OPTIONS"; + case CL_INVALID_DEVICE_PARTITION_COUNT: + return "CL_INVALID_DEVICE_PARTITION_COUNT"; +#ifdef CL_VERSION_2_0 + case CL_INVALID_PIPE_SIZE: + return "CL_INVALID_PIPE_SIZE"; + case CL_INVALID_DEVICE_QUEUE: + return "CL_INVALID_DEVICE_QUEUE"; +#endif + default: + return "UNKNOWN_ERROR"; + } +} + +#if 0 +void printer::inst()->print_msg(L1,const char* fmt, ...); +void printer::inst()->print_str(const char* str); +#endif + +char* LoadTextFile(const char* filename) +{ + size_t flen; + char* out; + FILE* kernel = fopen(filename, "rb"); + + if(kernel == NULL) + return NULL; + + fseek(kernel, 0, SEEK_END); + flen = ftell(kernel); + fseek(kernel, 0, SEEK_SET); + + out = (char*)malloc(flen+1); + size_t r = fread(out, flen, 1, kernel); + fclose(kernel); + + if(r != 1) + { + free(out); + return NULL; + } + + out[flen] = '\0'; + return out; +} + +size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_code) +{ + size_t MaximumWorkSize; + cl_int ret; + + if((ret = clGetDeviceInfo(ctx->DeviceID, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), &MaximumWorkSize, NULL)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when querying a device's max worksize using clGetDeviceInfo.", err_to_str(ret)); + return ERR_OCL_API; + } + + printer::inst()->print_msg(L1,"Device %lu work size %lu / %lu.", ctx->deviceIdx, ctx->workSize, MaximumWorkSize); +#ifdef CL_VERSION_2_0 + const cl_queue_properties CommandQueueProperties[] = { 0, 0, 0 }; + ctx->CommandQueues = clCreateCommandQueueWithProperties(opencl_ctx, ctx->DeviceID, CommandQueueProperties, &ret); +#else + const cl_command_queue_properties CommandQueueProperties = { 0 }; + ctx->CommandQueues = clCreateCommandQueue(opencl_ctx, ctx->DeviceID, CommandQueueProperties, &ret); +#endif + + if(ret != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clCreateCommandQueueWithProperties.", err_to_str(ret)); + return ERR_OCL_API; + } + + ctx->InputBuffer = clCreateBuffer(opencl_ctx, CL_MEM_READ_ONLY, 88, NULL, &ret); + if(ret != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clCreateBuffer to create input buffer.", err_to_str(ret)); + return ERR_OCL_API; + } + + size_t g_thd = ctx->rawIntensity; + ctx->ExtraBuffers[0] = clCreateBuffer(opencl_ctx, CL_MEM_READ_WRITE, (1 << 21) * g_thd, NULL, &ret); + if(ret != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clCreateBuffer to create hash scratchpads buffer.", err_to_str(ret)); + return ERR_OCL_API; + } + + ctx->ExtraBuffers[1] = clCreateBuffer(opencl_ctx, CL_MEM_READ_WRITE, 200 * g_thd, NULL, &ret); + if(ret != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clCreateBuffer to create hash states buffer.", err_to_str(ret)); + return ERR_OCL_API; + } + + // Blake-256 branches + ctx->ExtraBuffers[2] = clCreateBuffer(opencl_ctx, CL_MEM_READ_WRITE, sizeof(cl_uint) * (g_thd + 2), NULL, &ret); + if(ret != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clCreateBuffer to create Branch 0 buffer.", err_to_str(ret)); + return ERR_OCL_API; + } + + // Groestl-256 branches + ctx->ExtraBuffers[3] = clCreateBuffer(opencl_ctx, CL_MEM_READ_WRITE, sizeof(cl_uint) * (g_thd + 2), NULL, &ret); + if(ret != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clCreateBuffer to create Branch 1 buffer.", err_to_str(ret)); + return ERR_OCL_API; + } + + // JH-256 branches + ctx->ExtraBuffers[4] = clCreateBuffer(opencl_ctx, CL_MEM_READ_WRITE, sizeof(cl_uint) * (g_thd + 2), NULL, &ret); + if(ret != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clCreateBuffer to create Branch 2 buffer.", err_to_str(ret)); + return ERR_OCL_API; + } + + // Skein-512 branches + ctx->ExtraBuffers[5] = clCreateBuffer(opencl_ctx, CL_MEM_READ_WRITE, sizeof(cl_uint) * (g_thd + 2), NULL, &ret); + if(ret != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clCreateBuffer to create Branch 3 buffer.", err_to_str(ret)); + return ERR_OCL_API; + } + + // Assume we may find up to 0xFF nonces in one run - it's reasonable + ctx->OutputBuffer = clCreateBuffer(opencl_ctx, CL_MEM_READ_WRITE, sizeof(cl_uint) * 0x100, NULL, &ret); + if(ret != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clCreateBuffer to create output buffer.", err_to_str(ret)); + return ERR_OCL_API; + } + + ctx->Program = clCreateProgramWithSource(opencl_ctx, 1, (const char**)&source_code, NULL, &ret); + if(ret != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clCreateProgramWithSource on the contents of cryptonight.cl", err_to_str(ret)); + return ERR_OCL_API; + } + + char options[32]; + snprintf(options, sizeof(options), "-I. -DWORKSIZE=%llu", int_port(ctx->workSize)); + ret = clBuildProgram(ctx->Program, 1, &ctx->DeviceID, options, NULL, NULL); + if(ret != CL_SUCCESS) + { + size_t len; + printer::inst()->print_msg(L1,"Error %s when calling clBuildProgram.", err_to_str(ret)); + + if((ret = clGetProgramBuildInfo(ctx->Program, ctx->DeviceID, CL_PROGRAM_BUILD_LOG, 0, NULL, &len)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clGetProgramBuildInfo for length of build log output.", err_to_str(ret)); + return ERR_OCL_API; + } + + char* BuildLog = (char*)malloc(len + 1); + BuildLog[0] = '\0'; + + if((ret = clGetProgramBuildInfo(ctx->Program, ctx->DeviceID, CL_PROGRAM_BUILD_LOG, len, BuildLog, NULL)) != CL_SUCCESS) + { + free(BuildLog); + printer::inst()->print_msg(L1,"Error %s when calling clGetProgramBuildInfo for build log.", err_to_str(ret)); + return ERR_OCL_API; + } + + printer::inst()->print_str("Build log:\n"); + std::cerr<<BuildLog<<std::endl; + + free(BuildLog); + return ERR_OCL_API; + } + + cl_build_status status; + do + { + if((ret = clGetProgramBuildInfo(ctx->Program, ctx->DeviceID, CL_PROGRAM_BUILD_STATUS, sizeof(cl_build_status), &status, NULL)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clGetProgramBuildInfo for status of build.", err_to_str(ret)); + return ERR_OCL_API; + } + port_sleep(1); + } + while(status == CL_BUILD_IN_PROGRESS); + + const char *KernelNames[] = { "cn0", "cn1", "cn2", "Blake", "Groestl", "JH", "Skein" }; + for(int i = 0; i < 7; ++i) + { + ctx->Kernels[i] = clCreateKernel(ctx->Program, KernelNames[i], &ret); + if(ret != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clCreateKernel for kernel %s.", err_to_str(ret), KernelNames[i]); + return ERR_OCL_API; + } + } + + ctx->Nonce = 0; + return 0; +} + +const cl_platform_info attributeTypes[5] = { + CL_PLATFORM_NAME, + CL_PLATFORM_VENDOR, + CL_PLATFORM_VERSION, + CL_PLATFORM_PROFILE, + CL_PLATFORM_EXTENSIONS +}; + +const char* const attributeNames[] = { + "CL_PLATFORM_NAME", + "CL_PLATFORM_VENDOR", + "CL_PLATFORM_VERSION", + "CL_PLATFORM_PROFILE", + "CL_PLATFORM_EXTENSIONS" +}; + +#define NELEMS(x) (sizeof(x) / sizeof((x)[0])) + +void PrintDeviceInfo(cl_device_id device) +{ + char queryBuffer[1024]; + int queryInt; + cl_int clError; + clError = clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(queryBuffer), &queryBuffer, NULL); + printf(" CL_DEVICE_NAME: %s\n", queryBuffer); + queryBuffer[0] = '\0'; + clError = clGetDeviceInfo(device, CL_DEVICE_VENDOR, sizeof(queryBuffer), &queryBuffer, NULL); + printf(" CL_DEVICE_VENDOR: %s\n", queryBuffer); + queryBuffer[0] = '\0'; + clError = clGetDeviceInfo(device, CL_DRIVER_VERSION, sizeof(queryBuffer), &queryBuffer, NULL); + printf(" CL_DRIVER_VERSION: %s\n", queryBuffer); + queryBuffer[0] = '\0'; + clError = clGetDeviceInfo(device, CL_DEVICE_VERSION, sizeof(queryBuffer), &queryBuffer, NULL); + printf(" CL_DEVICE_VERSION: %s\n", queryBuffer); + queryBuffer[0] = '\0'; + clError = clGetDeviceInfo(device, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(int), &queryInt, NULL); + printf(" CL_DEVICE_MAX_COMPUTE_UNITS: %d\n", queryInt); +} + +uint32_t getNumPlatforms() +{ + cl_uint num_platforms = 0; + cl_platform_id * platforms = NULL; + cl_int clStatus; + + // Get platform and device information + clStatus = clGetPlatformIDs(0, NULL, &num_platforms); + platforms = (cl_platform_id *) malloc(sizeof(cl_platform_id) * num_platforms); + clStatus = clGetPlatformIDs(num_platforms, platforms, NULL); + + return num_platforms; +} + +std::vector<GpuContext> getAMDDevices(int index) +{ + std::vector<GpuContext> ctxVec; + cl_platform_id * platforms = NULL; + cl_int clStatus; + cl_uint num_devices; + cl_device_id *device_list = NULL; + + uint32_t numPlatforms = getNumPlatforms(); + + + platforms = (cl_platform_id *) malloc(sizeof(cl_platform_id) * numPlatforms); + clStatus = clGetPlatformIDs(numPlatforms, platforms, NULL); + + clStatus = clGetDeviceIDs( platforms[index], CL_DEVICE_TYPE_GPU, 0, NULL, &num_devices); + device_list = (cl_device_id *) malloc(sizeof(cl_device_id)*num_devices); + clStatus = clGetDeviceIDs( platforms[index], CL_DEVICE_TYPE_GPU, num_devices, device_list, NULL); + for (int k = 0; k < num_devices; k++) { + cl_int clError; + std::vector<char> devVendorVec(1024); + clError = clGetDeviceInfo(device_list[k], CL_DEVICE_VENDOR, devVendorVec.size(), devVendorVec.data(), NULL); + std::string devVendor(devVendorVec.data()); + if( devVendor.find("Advanced Micro Devices") != std::string::npos) + { + GpuContext ctx; + ctx.deviceIdx = k; + clError = clGetDeviceInfo(device_list[k], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(int), &(ctx.computeUnits), NULL); + size_t maxMem; + clError = clGetDeviceInfo(device_list[k], CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(size_t), &(maxMem), NULL); + clError = clGetDeviceInfo(device_list[k], CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(size_t), &(ctx.freeMem), NULL); + // if environment variable GPU_SINGLE_ALLOC_PERCENT is not set we can not allocate the full memory + ctx.freeMem = std::min(ctx.freeMem, maxMem); + std::vector<char> devNameVec(1024); + clError = clGetDeviceInfo(device_list[k], CL_DEVICE_NAME, devNameVec.size(), devNameVec.data(), NULL); + ctx.name = std::string(devNameVec.data()); + printer::inst()->print_msg(L0,"Found OpenCL GPU %s.",ctx.name.c_str()); + ctx.DeviceID = device_list[k]; + ctxVec.push_back(ctx); + } + } + + + free(device_list); + free(platforms); + + return ctxVec; +} + +int getAMDPlatformIdx() +{ + + uint32_t numPlatforms = getNumPlatforms(); + + if(numPlatforms == 0) + { + printer::inst()->print_msg(L0,"WARNING: No OpenCL platform found."); + return -1; + } + cl_platform_id * platforms = NULL; + cl_int clStatus; + + platforms = (cl_platform_id *) malloc(sizeof(cl_platform_id) * numPlatforms); + clStatus = clGetPlatformIDs(numPlatforms, platforms, NULL); + + int platformIndex = -1; + + for (int i = 0; i < numPlatforms; i++) { + size_t infoSize; + clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, 0, NULL, &infoSize); + std::vector<char> platformNameVec(infoSize); + + clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, infoSize, platformNameVec.data(), NULL); + std::string platformName(platformNameVec.data()); + if( platformName.find("Advanced Micro Devices") != std::string::npos) + { + platformIndex = i; + printer::inst()->print_msg(L0,"Found AMD platform index id = %i, name = %s",i , platformName.c_str()); + break; + } + } + + free(platforms); + return platformIndex; +} + +// RequestedDeviceIdxs is a list of OpenCL device indexes +// NumDevicesRequested is number of devices in RequestedDeviceIdxs list +// Returns 0 on success, -1 on stupid params, -2 on OpenCL API error +size_t InitOpenCL(GpuContext* ctx, size_t num_gpus, size_t platform_idx) +{ + + cl_context opencl_ctx; + cl_int ret; + cl_uint entries; + + if((ret = clGetPlatformIDs(0, NULL, &entries)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clGetPlatformIDs for number of platforms.", err_to_str(ret)); + return ERR_OCL_API; + } + + + // The number of platforms naturally is the index of the last platform plus one. + if(entries <= platform_idx) + { + printer::inst()->print_msg(L1,"Selected OpenCL platform index %d doesn't exist.", platform_idx); + return ERR_STUPID_PARAMS; + } + + + + cl_platform_id * platforms = NULL; + cl_int clStatus; + uint32_t numPlatforms = getNumPlatforms(); + + platforms = (cl_platform_id *) malloc(sizeof(cl_platform_id) * numPlatforms); + clStatus = clGetPlatformIDs(numPlatforms, platforms, NULL); + + size_t infoSize; + clGetPlatformInfo(platforms[platform_idx], CL_PLATFORM_VENDOR, 0, NULL, &infoSize); + std::vector<char> platformNameVec(infoSize); + clGetPlatformInfo(platforms[platform_idx], CL_PLATFORM_VENDOR, infoSize, platformNameVec.data(), NULL); + std::string platformName(platformNameVec.data()); + if( platformName.find("Advanced Micro Devices") == std::string::npos) + { + printer::inst()->print_msg(L1,"WARNING: using non AMD device: %s", platformName.c_str()); + } + + free(platforms); + + /*MSVC skimping on devel costs by shoehorning C99 to be a subset of C++? Noooo... can't be.*/ +#ifdef __GNUC__ + cl_platform_id PlatformIDList[entries]; +#else + cl_platform_id* PlatformIDList = (cl_platform_id*)_alloca(entries * sizeof(cl_platform_id)); +#endif + if((ret = clGetPlatformIDs(entries, PlatformIDList, NULL)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clGetPlatformIDs for platform ID information.", err_to_str(ret)); + return ERR_OCL_API; + } + + if((ret = clGetDeviceIDs(PlatformIDList[platform_idx], CL_DEVICE_TYPE_GPU, 0, NULL, &entries)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clGetDeviceIDs for number of devices.", err_to_str(ret)); + return ERR_OCL_API; + } + + // Same as the platform index sanity check, except we must check all requested device indexes + for(int i = 0; i < num_gpus; ++i) + { + if(entries <= ctx[i].deviceIdx) + { + printer::inst()->print_msg(L1,"Selected OpenCL device index %lu doesn't exist.\n", ctx[i].deviceIdx); + return ERR_STUPID_PARAMS; + } + } + +#ifdef __GNUC__ + cl_device_id DeviceIDList[entries]; +#else + cl_device_id* DeviceIDList = (cl_device_id*)_alloca(entries * sizeof(cl_device_id)); +#endif + if((ret = clGetDeviceIDs(PlatformIDList[platform_idx], CL_DEVICE_TYPE_GPU, entries, DeviceIDList, NULL)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clGetDeviceIDs for device ID information.", err_to_str(ret)); + return ERR_OCL_API; + } + + // Indexes sanity checked above +#ifdef __GNUC__ + cl_device_id TempDeviceList[num_gpus]; +#else + cl_device_id* TempDeviceList = (cl_device_id*)_alloca(entries * sizeof(cl_device_id)); +#endif + for(int i = 0; i < num_gpus; ++i) + { + ctx[i].DeviceID = DeviceIDList[ctx[i].deviceIdx]; + TempDeviceList[i] = DeviceIDList[ctx[i].deviceIdx]; + } + + opencl_ctx = clCreateContext(NULL, num_gpus, TempDeviceList, NULL, NULL, &ret); + if(ret != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clCreateContext.", err_to_str(ret)); + return ERR_OCL_API; + } + + //char* source_code = LoadTextFile(sSourcePath); + + const char *cryptonightCL = + #include "./opencl/cryptonight.cl" + ; + const char *blake256CL = + #include "./opencl/blake256.cl" + ; + const char *groestl256CL = + #include "./opencl/groestl256.cl" + ; + const char *jhCL = + #include "./opencl/jh.cl" + ; + const char *wolfAesCL = + #include "./opencl/wolf-aes.cl" + ; + const char *wolfSkeinCL = + #include "./opencl/wolf-skein.cl" + ; + + std::string source_code(cryptonightCL); + source_code = std::regex_replace(source_code, std::regex("XMRSTAK_INCLUDE_WOLF_AES"), wolfAesCL); + source_code = std::regex_replace(source_code, std::regex("XMRSTAK_INCLUDE_WOLF_SKEIN"), wolfSkeinCL); + source_code = std::regex_replace(source_code, std::regex("XMRSTAK_INCLUDE_JH"), jhCL); + source_code = std::regex_replace(source_code, std::regex("XMRSTAK_INCLUDE_BLAKE256"), blake256CL); + source_code = std::regex_replace(source_code, std::regex("XMRSTAK_INCLUDE_GROESTL256"), groestl256CL); + + for(int i = 0; i < num_gpus; ++i) + { + if((ret = InitOpenCLGpu(opencl_ctx, &ctx[i], source_code.c_str())) != ERR_SUCCESS) + { + return ret; + } + } + + return ERR_SUCCESS; +} + +size_t XMRSetJob(GpuContext* ctx, uint8_t* input, size_t input_len, uint32_t target) +{ + cl_int ret; + + if(input_len > 84) + return ERR_STUPID_PARAMS; + + input[input_len] = 0x01; + memset(input + input_len + 1, 0, 88 - input_len - 1); + + if((ret = clEnqueueWriteBuffer(ctx->CommandQueues, ctx->InputBuffer, CL_TRUE, 0, 88, input, 0, NULL, NULL)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clEnqueueWriteBuffer to fill input buffer.", err_to_str(ret)); + return ERR_OCL_API; + } + + if((ret = clSetKernelArg(ctx->Kernels[0], 0, sizeof(cl_mem), &ctx->InputBuffer)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 0, argument 0.", err_to_str(ret)); + return ERR_OCL_API; + } + + // Scratchpads + if((ret = clSetKernelArg(ctx->Kernels[0], 1, sizeof(cl_mem), ctx->ExtraBuffers + 0)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 0, argument 1.", err_to_str(ret)); + return ERR_OCL_API; + } + + // States + if((ret = clSetKernelArg(ctx->Kernels[0], 2, sizeof(cl_mem), ctx->ExtraBuffers + 1)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 0, argument 2.", err_to_str(ret)); + return ERR_OCL_API; + } + + // CN2 Kernel + + // Scratchpads + if((ret = clSetKernelArg(ctx->Kernels[1], 0, sizeof(cl_mem), ctx->ExtraBuffers + 0)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 1, argument 0.", err_to_str(ret)); + return ERR_OCL_API; + } + + // States + if((ret = clSetKernelArg(ctx->Kernels[1], 1, sizeof(cl_mem), ctx->ExtraBuffers + 1)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 1, argument 1.", err_to_str(ret)); + return ERR_OCL_API; + } + + // CN3 Kernel + // Scratchpads + if((ret = clSetKernelArg(ctx->Kernels[2], 0, sizeof(cl_mem), ctx->ExtraBuffers + 0)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 2, argument 0.", err_to_str(ret)); + return ERR_OCL_API; + } + + // States + if((ret = clSetKernelArg(ctx->Kernels[2], 1, sizeof(cl_mem), ctx->ExtraBuffers + 1)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 2, argument 1.", err_to_str(ret)); + return ERR_OCL_API; + } + + // Branch 0 + if((ret = clSetKernelArg(ctx->Kernels[2], 2, sizeof(cl_mem), ctx->ExtraBuffers + 2)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 2, argument 2.", err_to_str(ret)); + return ERR_OCL_API; + } + + // Branch 1 + if((ret = clSetKernelArg(ctx->Kernels[2], 3, sizeof(cl_mem), ctx->ExtraBuffers + 3)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 2, argument 3.", err_to_str(ret)); + return ERR_OCL_API; + } + + // Branch 2 + if((ret = clSetKernelArg(ctx->Kernels[2], 4, sizeof(cl_mem), ctx->ExtraBuffers + 4)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 2, argument 4.", err_to_str(ret)); + return ERR_OCL_API; + } + + // Branch 3 + if((ret = clSetKernelArg(ctx->Kernels[2], 5, sizeof(cl_mem), ctx->ExtraBuffers + 5)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 2, argument 5.", err_to_str(ret)); + return ERR_OCL_API; + } + + for(int i = 0; i < 4; ++i) + { + // States + if((ret = clSetKernelArg(ctx->Kernels[i + 3], 0, sizeof(cl_mem), ctx->ExtraBuffers + 1)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel %d, argument %d.", err_to_str(ret), i + 3, 0); + return ERR_OCL_API; + } + + // Nonce buffer + if((ret = clSetKernelArg(ctx->Kernels[i + 3], 1, sizeof(cl_mem), ctx->ExtraBuffers + (i + 2))) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel %d, argument %d.", err_to_str(ret), i + 3, 1); + return ERR_OCL_API; + } + + // Output + if((ret = clSetKernelArg(ctx->Kernels[i + 3], 2, sizeof(cl_mem), &ctx->OutputBuffer)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel %d, argument %d.", err_to_str(ret), i + 3, 2); + return ERR_OCL_API; + } + + // Target + if((ret = clSetKernelArg(ctx->Kernels[i + 3], 3, sizeof(cl_uint), &target)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel %d, argument %d.", err_to_str(ret), i + 3, 3); + return ERR_OCL_API; + } + } + + return ERR_SUCCESS; +} + +size_t XMRRunJob(GpuContext* ctx, cl_uint* HashOutput) +{ + cl_int ret; + cl_uint zero = 0; + size_t BranchNonces[4]; + memset(BranchNonces,0,sizeof(size_t)*4); + + size_t g_thd = ctx->rawIntensity; + size_t w_size = ctx->workSize; + + for(int i = 2; i < 6; ++i) + { + if((ret = clEnqueueWriteBuffer(ctx->CommandQueues, ctx->ExtraBuffers[i], CL_FALSE, sizeof(cl_uint) * g_thd, sizeof(cl_uint), &zero, 0, NULL, NULL)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clEnqueueWriteBuffer to zero branch buffer counter %d.", err_to_str(ret), i - 2); + return ERR_OCL_API; + } + } + + if((ret = clEnqueueWriteBuffer(ctx->CommandQueues, ctx->OutputBuffer, CL_FALSE, sizeof(cl_uint) * 0xFF, sizeof(cl_uint), &zero, 0, NULL, NULL)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clEnqueueReadBuffer to fetch results.", err_to_str(ret)); + return ERR_OCL_API; + } + + clFinish(ctx->CommandQueues); + + size_t Nonce[2] = {ctx->Nonce, 1}, gthreads[2] = { g_thd, 8 }, lthreads[2] = { w_size, 8 }; + if((ret = clEnqueueNDRangeKernel(ctx->CommandQueues, ctx->Kernels[0], 2, Nonce, gthreads, lthreads, 0, NULL, NULL)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clEnqueueNDRangeKernel for kernel %d.", err_to_str(ret), 0); + return ERR_OCL_API; + } + + /*for(int i = 1; i < 3; ++i) + { + if((ret = clEnqueueNDRangeKernel(*ctx->CommandQueues, ctx->Kernels[i], 1, &ctx->Nonce, &g_thd, &w_size, 0, NULL, NULL)) != CL_SUCCESS) + { + Log(LOG_CRITICAL, "Error %s when calling clEnqueueNDRangeKernel for kernel %d.", err_to_str(ret), i); + return(ERR_OCL_API); + } + }*/ + + if((ret = clEnqueueNDRangeKernel(ctx->CommandQueues, ctx->Kernels[1], 1, &ctx->Nonce, &g_thd, &w_size, 0, NULL, NULL)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clEnqueueNDRangeKernel for kernel %d.", err_to_str(ret), 1); + return ERR_OCL_API; + } + + if((ret = clEnqueueNDRangeKernel(ctx->CommandQueues, ctx->Kernels[2], 2, Nonce, gthreads, lthreads, 0, NULL, NULL)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clEnqueueNDRangeKernel for kernel %d.", err_to_str(ret), 2); + return ERR_OCL_API; + } + + if((ret = clEnqueueReadBuffer(ctx->CommandQueues, ctx->ExtraBuffers[2], CL_FALSE, sizeof(cl_uint) * g_thd, sizeof(cl_uint), BranchNonces, 0, NULL, NULL)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clEnqueueReadBuffer to fetch results.", err_to_str(ret)); + return ERR_OCL_API; + } + + if((ret = clEnqueueReadBuffer(ctx->CommandQueues, ctx->ExtraBuffers[3], CL_FALSE, sizeof(cl_uint) * g_thd, sizeof(cl_uint), BranchNonces + 1, 0, NULL, NULL)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clEnqueueReadBuffer to fetch results.", err_to_str(ret)); + return ERR_OCL_API; + } + + if((ret = clEnqueueReadBuffer(ctx->CommandQueues, ctx->ExtraBuffers[4], CL_FALSE, sizeof(cl_uint) * g_thd, sizeof(cl_uint), BranchNonces + 2, 0, NULL, NULL)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clEnqueueReadBuffer to fetch results.", err_to_str(ret)); + return ERR_OCL_API; + } + + if((ret = clEnqueueReadBuffer(ctx->CommandQueues, ctx->ExtraBuffers[5], CL_FALSE, sizeof(cl_uint) * g_thd, sizeof(cl_uint), BranchNonces + 3, 0, NULL, NULL)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clEnqueueReadBuffer to fetch results.", err_to_str(ret)); + return ERR_OCL_API; + } + + clFinish(ctx->CommandQueues); + + for(int i = 0; i < 4; ++i) + { + if(BranchNonces[i]) + { + // Threads + if((clSetKernelArg(ctx->Kernels[i + 3], 4, sizeof(cl_ulong), BranchNonces + i)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel %d, argument %d.", err_to_str(ret), i + 3, 4); + return(ERR_OCL_API); + } + + BranchNonces[i] = ((size_t)ceil( (double)BranchNonces[i] / (double)w_size) ) * w_size; + if((ret = clEnqueueNDRangeKernel(ctx->CommandQueues, ctx->Kernels[i + 3], 1, &ctx->Nonce, BranchNonces + i, &w_size, 0, NULL, NULL)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clEnqueueNDRangeKernel for kernel %d.", err_to_str(ret), i + 3); + return ERR_OCL_API; + } + } + } + + if((ret = clEnqueueReadBuffer(ctx->CommandQueues, ctx->OutputBuffer, CL_TRUE, 0, sizeof(cl_uint) * 0x100, HashOutput, 0, NULL, NULL)) != CL_SUCCESS) + { + printer::inst()->print_msg(L1,"Error %s when calling clEnqueueReadBuffer to fetch results.", err_to_str(ret)); + return ERR_OCL_API; + } + + clFinish(ctx->CommandQueues); + ctx->Nonce += g_thd; + + return ERR_SUCCESS; +} diff --git a/xmrstak/backend/amd/amd_gpu/gpu.hpp b/xmrstak/backend/amd/amd_gpu/gpu.hpp new file mode 100644 index 0000000..8a71cfa --- /dev/null +++ b/xmrstak/backend/amd/amd_gpu/gpu.hpp @@ -0,0 +1,50 @@ +#pragma once + +#if defined(__APPLE__) +#include <OpenCL/cl.h> +#else +#include <CL/cl.h> +#endif + +#include <stdint.h> +#include <vector> +#include "../../../console.h" + +#define ERR_SUCCESS (0) +#define ERR_OCL_API (2) +#define ERR_STUPID_PARAMS (1) + + + +struct GpuContext +{ + /*Input vars*/ + size_t deviceIdx; + size_t rawIntensity; + size_t workSize; + + /*Output vars*/ + cl_device_id DeviceID; + cl_command_queue CommandQueues; + cl_mem InputBuffer; + cl_mem OutputBuffer; + cl_mem ExtraBuffers[6]; + cl_program Program; + cl_kernel Kernels[7]; + size_t freeMem; + int computeUnits; + std::string name; + + size_t Nonce; + +}; + +uint32_t getNumPlatforms(); +int getAMDPlatformIdx(); +std::vector<GpuContext> getAMDDevices(int index); + +size_t InitOpenCL(GpuContext* ctx, size_t num_gpus, size_t platform_idx); +size_t XMRSetJob(GpuContext* ctx, uint8_t* input, size_t input_len, uint32_t target); +size_t XMRRunJob(GpuContext* ctx, cl_uint* HashOutput); + + diff --git a/xmrstak/backend/amd/amd_gpu/opencl/blake256.cl b/xmrstak/backend/amd/amd_gpu/opencl/blake256.cl new file mode 100644 index 0000000..3d5fe3e --- /dev/null +++ b/xmrstak/backend/amd/amd_gpu/opencl/blake256.cl @@ -0,0 +1,93 @@ +R"===( +/* +* blake256 kernel implementation. +* +* ==========================(LICENSE BEGIN)============================ +* Copyright (c) 2014 djm34 +* Copyright (c) 2014 tpruvot +* Permission is hereby granted, free of charge, to any person obtaining +* a copy of this software and associated documentation files (the +* "Software"), to deal in the Software without restriction, including +* without limitation the rights to use, copy, modify, merge, publish, +* distribute, sublicense, and/or sell copies of the Software, and to +* permit persons to whom the Software is furnished to do so, subject to +* the following conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +* +* ===========================(LICENSE END)============================= +* +* @author djm34 +*/ +__constant static const int sigma[16][16] = { + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, + { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }, + { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 }, + { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 }, + { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 }, + { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 }, + { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 }, + { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 }, + { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 }, + { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0 }, + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, + { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }, + { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 }, + { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 }, + { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 }, + { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } +}; + + +__constant static const sph_u32 c_IV256[8] = { + 0x6A09E667, 0xBB67AE85, + 0x3C6EF372, 0xA54FF53A, + 0x510E527F, 0x9B05688C, + 0x1F83D9AB, 0x5BE0CD19 +}; + +/* Second part (64-80) msg never change, store it */ +__constant static const sph_u32 c_Padding[16] = { + 0, 0, 0, 0, + 0x80000000, 0, 0, 0, + 0, 0, 0, 0, + 0, 1, 0, 640, +}; +__constant static const sph_u32 c_u256[16] = { + 0x243F6A88, 0x85A308D3, + 0x13198A2E, 0x03707344, + 0xA4093822, 0x299F31D0, + 0x082EFA98, 0xEC4E6C89, + 0x452821E6, 0x38D01377, + 0xBE5466CF, 0x34E90C6C, + 0xC0AC29B7, 0xC97C50DD, + 0x3F84D5B5, 0xB5470917 +}; + +#define GS(a,b,c,d,x) { \ + const sph_u32 idx1 = sigma[r][x]; \ + const sph_u32 idx2 = sigma[r][x+1]; \ + v[a] += (m[idx1] ^ c_u256[idx2]) + v[b]; \ + v[d] ^= v[a]; \ + v[d] = rotate(v[d], 16U); \ + v[c] += v[d]; \ + v[b] ^= v[c]; \ + v[b] = rotate(v[b], 20U); \ +\ + v[a] += (m[idx2] ^ c_u256[idx1]) + v[b]; \ + v[d] ^= v[a]; \ + v[d] = rotate(v[d], 24U); \ + v[c] += v[d]; \ + v[b] ^= v[c]; \ + v[b] = rotate(v[b], 25U); \ +} +)===" diff --git a/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl b/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl new file mode 100644 index 0000000..1bb334a --- /dev/null +++ b/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl @@ -0,0 +1,859 @@ +R"===( +/* + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifdef cl_amd_media_ops +#pragma OPENCL EXTENSION cl_amd_media_ops : enable +#else +/* taken from https://www.khronos.org/registry/OpenCL/extensions/amd/cl_amd_media_ops.txt + * Build-in Function + * uintn amd_bitalign (uintn src0, uintn src1, uintn src2) + * Description + * dst.s0 = (uint) (((((long)src0.s0) << 32) | (long)src1.s0) >> (src2.s0 & 31)) + * similar operation applied to other components of the vectors. + * + * The implemented function is modified because the last is in our case always a scalar. + * We can ignore the bitwise AND operation. + */ +inline uint2 amd_bitalign( const uint2 src0, const uint2 src1, const uint src2) +{ + uint2 result; + result.s0 = (uint) (((((long)src0.s0) << 32) | (long)src1.s0) >> (src2)); + result.s1 = (uint) (((((long)src0.s1) << 32) | (long)src1.s1) >> (src2)); + return result; +} +#endif + +#ifdef cl_amd_media_ops2 +#pragma OPENCL EXTENSION cl_amd_media_ops2 : enable +#else +/* taken from: https://www.khronos.org/registry/OpenCL/extensions/amd/cl_amd_media_ops2.txt + * Built-in Function: + * uintn amd_bfe (uintn src0, uintn src1, uintn src2) + * Description + * NOTE: operator >> below represent logical right shift + * offset = src1.s0 & 31; + * width = src2.s0 & 31; + * if width = 0 + * dst.s0 = 0; + * else if (offset + width) < 32 + * dst.s0 = (src0.s0 << (32 - offset - width)) >> (32 - width); + * else + * dst.s0 = src0.s0 >> offset; + * similar operation applied to other components of the vectors + */ +inline int amd_bfe(const uint src0, const uint offset, const uint width) +{ + /* casts are removed because we can implement everything as uint + * int offset = src1; + * int width = src2; + * remove check for edge case, this function is always called with + * `width==8` + * @code + * if ( width == 0 ) + * return 0; + * @endcode + */ + if ( (offset + width) < 32u ) + return (src0 << (32u - offset - width)) >> (32u - width); + + return src0 >> offset; +} +#endif + +//#include "opencl/wolf-aes.cl" +XMRSTAK_INCLUDE_WOLF_AES +//#include "opencl/wolf-skein.cl" +XMRSTAK_INCLUDE_WOLF_SKEIN +//#include "opencl/jh.cl" +XMRSTAK_INCLUDE_JH +//#include "opencl/blake256.cl" +XMRSTAK_INCLUDE_BLAKE256 +//#include "opencl/groestl256.cl" +XMRSTAK_INCLUDE_GROESTL256 + +static const __constant ulong keccakf_rndc[24] = +{ + 0x0000000000000001, 0x0000000000008082, 0x800000000000808a, + 0x8000000080008000, 0x000000000000808b, 0x0000000080000001, + 0x8000000080008081, 0x8000000000008009, 0x000000000000008a, + 0x0000000000000088, 0x0000000080008009, 0x000000008000000a, + 0x000000008000808b, 0x800000000000008b, 0x8000000000008089, + 0x8000000000008003, 0x8000000000008002, 0x8000000000000080, + 0x000000000000800a, 0x800000008000000a, 0x8000000080008081, + 0x8000000000008080, 0x0000000080000001, 0x8000000080008008 +}; + +static const __constant uchar sbox[256] = +{ + 0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, 0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76, + 0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0, 0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0, + 0xB7, 0xFD, 0x93, 0x26, 0x36, 0x3F, 0xF7, 0xCC, 0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15, + 0x04, 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A, 0x07, 0x12, 0x80, 0xE2, 0xEB, 0x27, 0xB2, 0x75, + 0x09, 0x83, 0x2C, 0x1A, 0x1B, 0x6E, 0x5A, 0xA0, 0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84, + 0x53, 0xD1, 0x00, 0xED, 0x20, 0xFC, 0xB1, 0x5B, 0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF, + 0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, 0x85, 0x45, 0xF9, 0x02, 0x7F, 0x50, 0x3C, 0x9F, 0xA8, + 0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5, 0xBC, 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2, + 0xCD, 0x0C, 0x13, 0xEC, 0x5F, 0x97, 0x44, 0x17, 0xC4, 0xA7, 0x7E, 0x3D, 0x64, 0x5D, 0x19, 0x73, + 0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88, 0x46, 0xEE, 0xB8, 0x14, 0xDE, 0x5E, 0x0B, 0xDB, + 0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C, 0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, 0x79, + 0xE7, 0xC8, 0x37, 0x6D, 0x8D, 0xD5, 0x4E, 0xA9, 0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08, + 0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6, 0xE8, 0xDD, 0x74, 0x1F, 0x4B, 0xBD, 0x8B, 0x8A, + 0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E, 0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E, + 0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94, 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF, + 0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68, 0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16 +}; + + +void keccakf1600(ulong *s) +{ + for(int i = 0; i < 24; ++i) + { + ulong bc[5], tmp1, tmp2; + bc[0] = s[0] ^ s[5] ^ s[10] ^ s[15] ^ s[20] ^ rotate(s[2] ^ s[7] ^ s[12] ^ s[17] ^ s[22], 1UL); + bc[1] = s[1] ^ s[6] ^ s[11] ^ s[16] ^ s[21] ^ rotate(s[3] ^ s[8] ^ s[13] ^ s[18] ^ s[23], 1UL); + bc[2] = s[2] ^ s[7] ^ s[12] ^ s[17] ^ s[22] ^ rotate(s[4] ^ s[9] ^ s[14] ^ s[19] ^ s[24], 1UL); + bc[3] = s[3] ^ s[8] ^ s[13] ^ s[18] ^ s[23] ^ rotate(s[0] ^ s[5] ^ s[10] ^ s[15] ^ s[20], 1UL); + bc[4] = s[4] ^ s[9] ^ s[14] ^ s[19] ^ s[24] ^ rotate(s[1] ^ s[6] ^ s[11] ^ s[16] ^ s[21], 1UL); + + tmp1 = s[1] ^ bc[0]; + + s[0] ^= bc[4]; + s[1] = rotate(s[6] ^ bc[0], 44UL); + s[6] = rotate(s[9] ^ bc[3], 20UL); + s[9] = rotate(s[22] ^ bc[1], 61UL); + s[22] = rotate(s[14] ^ bc[3], 39UL); + s[14] = rotate(s[20] ^ bc[4], 18UL); + s[20] = rotate(s[2] ^ bc[1], 62UL); + s[2] = rotate(s[12] ^ bc[1], 43UL); + s[12] = rotate(s[13] ^ bc[2], 25UL); + s[13] = rotate(s[19] ^ bc[3], 8UL); + s[19] = rotate(s[23] ^ bc[2], 56UL); + s[23] = rotate(s[15] ^ bc[4], 41UL); + s[15] = rotate(s[4] ^ bc[3], 27UL); + s[4] = rotate(s[24] ^ bc[3], 14UL); + s[24] = rotate(s[21] ^ bc[0], 2UL); + s[21] = rotate(s[8] ^ bc[2], 55UL); + s[8] = rotate(s[16] ^ bc[0], 35UL); + s[16] = rotate(s[5] ^ bc[4], 36UL); + s[5] = rotate(s[3] ^ bc[2], 28UL); + s[3] = rotate(s[18] ^ bc[2], 21UL); + s[18] = rotate(s[17] ^ bc[1], 15UL); + s[17] = rotate(s[11] ^ bc[0], 10UL); + s[11] = rotate(s[7] ^ bc[1], 6UL); + s[7] = rotate(s[10] ^ bc[4], 3UL); + s[10] = rotate(tmp1, 1UL); + + tmp1 = s[0]; tmp2 = s[1]; s[0] = bitselect(s[0] ^ s[2], s[0], s[1]); s[1] = bitselect(s[1] ^ s[3], s[1], s[2]); s[2] = bitselect(s[2] ^ s[4], s[2], s[3]); s[3] = bitselect(s[3] ^ tmp1, s[3], s[4]); s[4] = bitselect(s[4] ^ tmp2, s[4], tmp1); + tmp1 = s[5]; tmp2 = s[6]; s[5] = bitselect(s[5] ^ s[7], s[5], s[6]); s[6] = bitselect(s[6] ^ s[8], s[6], s[7]); s[7] = bitselect(s[7] ^ s[9], s[7], s[8]); s[8] = bitselect(s[8] ^ tmp1, s[8], s[9]); s[9] = bitselect(s[9] ^ tmp2, s[9], tmp1); + tmp1 = s[10]; tmp2 = s[11]; s[10] = bitselect(s[10] ^ s[12], s[10], s[11]); s[11] = bitselect(s[11] ^ s[13], s[11], s[12]); s[12] = bitselect(s[12] ^ s[14], s[12], s[13]); s[13] = bitselect(s[13] ^ tmp1, s[13], s[14]); s[14] = bitselect(s[14] ^ tmp2, s[14], tmp1); + tmp1 = s[15]; tmp2 = s[16]; s[15] = bitselect(s[15] ^ s[17], s[15], s[16]); s[16] = bitselect(s[16] ^ s[18], s[16], s[17]); s[17] = bitselect(s[17] ^ s[19], s[17], s[18]); s[18] = bitselect(s[18] ^ tmp1, s[18], s[19]); s[19] = bitselect(s[19] ^ tmp2, s[19], tmp1); + tmp1 = s[20]; tmp2 = s[21]; s[20] = bitselect(s[20] ^ s[22], s[20], s[21]); s[21] = bitselect(s[21] ^ s[23], s[21], s[22]); s[22] = bitselect(s[22] ^ s[24], s[22], s[23]); s[23] = bitselect(s[23] ^ tmp1, s[23], s[24]); s[24] = bitselect(s[24] ^ tmp2, s[24], tmp1); + s[0] ^= keccakf_rndc[i]; + } +} + +static const __constant uint keccakf_rotc[24] = +{ + 1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 2, 14, + 27, 41, 56, 8, 25, 43, 62, 18, 39, 61, 20, 44 +}; + +static const __constant uint keccakf_piln[24] = +{ + 10, 7, 11, 17, 18, 3, 5, 16, 8, 21, 24, 4, + 15, 23, 19, 13, 12, 2, 20, 14, 22, 9, 6, 1 +}; + +void keccakf1600_1(ulong *st) +{ + int i, round; + ulong t, bc[5]; + + #pragma unroll 1 + for(round = 0; round < 24; ++round) + { + + // Theta + bc[0] = st[0] ^ st[5] ^ st[10] ^ st[15] ^ st[20]; + bc[1] = st[1] ^ st[6] ^ st[11] ^ st[16] ^ st[21]; + bc[2] = st[2] ^ st[7] ^ st[12] ^ st[17] ^ st[22]; + bc[3] = st[3] ^ st[8] ^ st[13] ^ st[18] ^ st[23]; + bc[4] = st[4] ^ st[9] ^ st[14] ^ st[19] ^ st[24]; + + #pragma unroll 1 + for (i = 0; i < 5; ++i) { + t = bc[(i + 4) % 5] ^ rotate(bc[(i + 1) % 5], 1UL); + st[i ] ^= t; + st[i + 5] ^= t; + st[i + 10] ^= t; + st[i + 15] ^= t; + st[i + 20] ^= t; + } + + // Rho Pi + t = st[1]; + #pragma unroll + for (i = 0; i < 24; ++i) { + bc[0] = st[keccakf_piln[i]]; + st[keccakf_piln[i]] = rotate(t, (ulong)keccakf_rotc[i]); + t = bc[0]; + } + + //ulong tmp1 = st[0]; ulong tmp2 = st[1]; st[0] = bitselect(st[0] ^ st[2], st[0], st[1]); st[1] = bitselect(st[1] ^ st[3], st[1], st[2]); st[2] = bitselect(st[2] ^ st[4], st[2], st[3]); st[3] = bitselect(st[3] ^ tmp1, st[3], st[4]); st[4] = bitselect(st[4] ^ tmp2, st[4], tmp1); + //tmp1 = st[5]; tmp2 = st[6]; st[5] = bitselect(st[5] ^ st[7], st[5], st[6]); st[6] = bitselect(st[6] ^ st[8], st[6], st[7]); st[7] = bitselect(st[7] ^ st[9], st[7], st[8]); st[8] = bitselect(st[8] ^ tmp1, st[8], st[9]); st[9] = bitselect(st[9] ^ tmp2, st[9], tmp1); + //tmp1 = st[10]; tmp2 = st[11]; st[10] = bitselect(st[10] ^ st[12], st[10], st[11]); st[11] = bitselect(st[11] ^ st[13], st[11], st[12]); st[12] = bitselect(st[12] ^ st[14], st[12], st[13]); st[13] = bitselect(st[13] ^ tmp1, st[13], st[14]); st[14] = bitselect(st[14] ^ tmp2, st[14], tmp1); + //tmp1 = st[15]; tmp2 = st[16]; st[15] = bitselect(st[15] ^ st[17], st[15], st[16]); st[16] = bitselect(st[16] ^ st[18], st[16], st[17]); st[17] = bitselect(st[17] ^ st[19], st[17], st[18]); st[18] = bitselect(st[18] ^ tmp1, st[18], st[19]); st[19] = bitselect(st[19] ^ tmp2, st[19], tmp1); + //tmp1 = st[20]; tmp2 = st[21]; st[20] = bitselect(st[20] ^ st[22], st[20], st[21]); st[21] = bitselect(st[21] ^ st[23], st[21], st[22]); st[22] = bitselect(st[22] ^ st[24], st[22], st[23]); st[23] = bitselect(st[23] ^ tmp1, st[23], st[24]); st[24] = bitselect(st[24] ^ tmp2, st[24], tmp1); + + #pragma unroll 1 + for(int i = 0; i < 25; i += 5) + { + ulong tmp[5]; + + #pragma unroll 1 + for(int x = 0; x < 5; ++x) + tmp[x] = bitselect(st[i + x] ^ st[i + ((x + 2) % 5)], st[i + x], st[i + ((x + 1) % 5)]); + + #pragma unroll 1 + for(int x = 0; x < 5; ++x) st[i + x] = tmp[x]; + } + + // Iota + st[0] ^= keccakf_rndc[round]; + } +} + +void keccakf1600_2(ulong *st) +{ + int i, round; + ulong t, bc[5]; + + #pragma unroll 1 + for(round = 0; round < 24; ++round) + { + + // Theta + //bc[0] = st[0] ^ st[5] ^ st[10] ^ st[15] ^ st[20]; + //bc[1] = st[1] ^ st[6] ^ st[11] ^ st[16] ^ st[21]; + //bc[2] = st[2] ^ st[7] ^ st[12] ^ st[17] ^ st[22]; + //bc[3] = st[3] ^ st[8] ^ st[13] ^ st[18] ^ st[23]; + //bc[4] = st[4] ^ st[9] ^ st[14] ^ st[19] ^ st[24]; + + /* + #pragma unroll + for (i = 0; i < 5; ++i) { + t = bc[(i + 4) % 5] ^ rotate(bc[(i + 1) % 5], 1UL); + st[i ] ^= t; + st[i + 5] ^= t; + st[i + 10] ^= t; + st[i + 15] ^= t; + st[i + 20] ^= t; + } + */ + + bc[0] = st[0] ^ st[5] ^ st[10] ^ st[15] ^ st[20] ^ rotate(st[2] ^ st[7] ^ st[12] ^ st[17] ^ st[22], 1UL); + bc[1] = st[1] ^ st[6] ^ st[11] ^ st[16] ^ st[21] ^ rotate(st[3] ^ st[8] ^ st[13] ^ st[18] ^ st[23], 1UL); + bc[2] = st[2] ^ st[7] ^ st[12] ^ st[17] ^ st[22] ^ rotate(st[4] ^ st[9] ^ st[14] ^ st[19] ^ st[24], 1UL); + bc[3] = st[3] ^ st[8] ^ st[13] ^ st[18] ^ st[23] ^ rotate(st[0] ^ st[5] ^ st[10] ^ st[15] ^ st[20], 1UL); + bc[4] = st[4] ^ st[9] ^ st[14] ^ st[19] ^ st[24] ^ rotate(st[1] ^ st[6] ^ st[11] ^ st[16] ^ st[21], 1UL); + + st[0] ^= bc[4]; + st[5] ^= bc[4]; + st[10] ^= bc[4]; + st[15] ^= bc[4]; + st[20] ^= bc[4]; + + st[1] ^= bc[0]; + st[6] ^= bc[0]; + st[11] ^= bc[0]; + st[16] ^= bc[0]; + st[21] ^= bc[0]; + + st[2] ^= bc[1]; + st[7] ^= bc[1]; + st[12] ^= bc[1]; + st[17] ^= bc[1]; + st[22] ^= bc[1]; + + st[3] ^= bc[2]; + st[8] ^= bc[2]; + st[13] ^= bc[2]; + st[18] ^= bc[2]; + st[23] ^= bc[2]; + + st[4] ^= bc[3]; + st[9] ^= bc[3]; + st[14] ^= bc[3]; + st[19] ^= bc[3]; + st[24] ^= bc[3]; + + // Rho Pi + t = st[1]; + #pragma unroll + for (i = 0; i < 24; ++i) { + bc[0] = st[keccakf_piln[i]]; + st[keccakf_piln[i]] = rotate(t, (ulong)keccakf_rotc[i]); + t = bc[0]; + } + + + + /*ulong tmp1 = st[1] ^ bc[0]; + + st[0] ^= bc[4]; + st[1] = rotate(st[6] ^ bc[0], 44UL); + st[6] = rotate(st[9] ^ bc[3], 20UL); + st[9] = rotate(st[22] ^ bc[1], 61UL); + st[22] = rotate(st[14] ^ bc[3], 39UL); + st[14] = rotate(st[20] ^ bc[4], 18UL); + st[20] = rotate(st[2] ^ bc[1], 62UL); + st[2] = rotate(st[12] ^ bc[1], 43UL); + st[12] = rotate(st[13] ^ bc[2], 25UL); + st[13] = rotate(st[19] ^ bc[3], 8UL); + st[19] = rotate(st[23] ^ bc[2], 56UL); + st[23] = rotate(st[15] ^ bc[4], 41UL); + st[15] = rotate(st[4] ^ bc[3], 27UL); + st[4] = rotate(st[24] ^ bc[3], 14UL); + st[24] = rotate(st[21] ^ bc[0], 2UL); + st[21] = rotate(st[8] ^ bc[2], 55UL); + st[8] = rotate(st[16] ^ bc[0], 35UL); + st[16] = rotate(st[5] ^ bc[4], 36UL); + st[5] = rotate(st[3] ^ bc[2], 28UL); + st[3] = rotate(st[18] ^ bc[2], 21UL); + st[18] = rotate(st[17] ^ bc[1], 15UL); + st[17] = rotate(st[11] ^ bc[0], 10UL); + st[11] = rotate(st[7] ^ bc[1], 6UL); + st[7] = rotate(st[10] ^ bc[4], 3UL); + st[10] = rotate(tmp1, 1UL); + */ + + + //ulong tmp1 = st[0]; ulong tmp2 = st[1]; st[0] = bitselect(st[0] ^ st[2], st[0], st[1]); st[1] = bitselect(st[1] ^ st[3], st[1], st[2]); st[2] = bitselect(st[2] ^ st[4], st[2], st[3]); st[3] = bitselect(st[3] ^ tmp1, st[3], st[4]); st[4] = bitselect(st[4] ^ tmp2, st[4], tmp1); + //tmp1 = st[5]; tmp2 = st[6]; st[5] = bitselect(st[5] ^ st[7], st[5], st[6]); st[6] = bitselect(st[6] ^ st[8], st[6], st[7]); st[7] = bitselect(st[7] ^ st[9], st[7], st[8]); st[8] = bitselect(st[8] ^ tmp1, st[8], st[9]); st[9] = bitselect(st[9] ^ tmp2, st[9], tmp1); + //tmp1 = st[10]; tmp2 = st[11]; st[10] = bitselect(st[10] ^ st[12], st[10], st[11]); st[11] = bitselect(st[11] ^ st[13], st[11], st[12]); st[12] = bitselect(st[12] ^ st[14], st[12], st[13]); st[13] = bitselect(st[13] ^ tmp1, st[13], st[14]); st[14] = bitselect(st[14] ^ tmp2, st[14], tmp1); + //tmp1 = st[15]; tmp2 = st[16]; st[15] = bitselect(st[15] ^ st[17], st[15], st[16]); st[16] = bitselect(st[16] ^ st[18], st[16], st[17]); st[17] = bitselect(st[17] ^ st[19], st[17], st[18]); st[18] = bitselect(st[18] ^ tmp1, st[18], st[19]); st[19] = bitselect(st[19] ^ tmp2, st[19], tmp1); + //tmp1 = st[20]; tmp2 = st[21]; st[20] = bitselect(st[20] ^ st[22], st[20], st[21]); st[21] = bitselect(st[21] ^ st[23], st[21], st[22]); st[22] = bitselect(st[22] ^ st[24], st[22], st[23]); st[23] = bitselect(st[23] ^ tmp1, st[23], st[24]); st[24] = bitselect(st[24] ^ tmp2, st[24], tmp1); + + #pragma unroll + for(int i = 0; i < 25; i += 5) + { + ulong tmp1 = st[i], tmp2 = st[i + 1]; + + st[i] = bitselect(st[i] ^ st[i + 2], st[i], st[i + 1]); + st[i + 1] = bitselect(st[i + 1] ^ st[i + 3], st[i + 1], st[i + 2]); + st[i + 2] = bitselect(st[i + 2] ^ st[i + 4], st[i + 2], st[i + 3]); + st[i + 3] = bitselect(st[i + 3] ^ tmp1, st[i + 3], st[i + 4]); + st[i + 4] = bitselect(st[i + 4] ^ tmp2, st[i + 4], tmp1); + } + + // Iota + st[0] ^= keccakf_rndc[round]; + } +} + +)===" +R"===( + +void CNKeccak(ulong *output, ulong *input) +{ + ulong st[25]; + + // Copy 72 bytes + for(int i = 0; i < 9; ++i) st[i] = input[i]; + + // Last four and '1' bit for padding + //st[9] = as_ulong((uint2)(((uint *)input)[18], 0x00000001U)); + + st[9] = (input[9] & 0x00000000FFFFFFFFUL) | 0x0000000100000000UL; + + for(int i = 10; i < 25; ++i) st[i] = 0x00UL; + + // Last bit of padding + st[16] = 0x8000000000000000UL; + + keccakf1600_1(st); + + for(int i = 0; i < 25; ++i) output[i] = st[i]; +} + +static const __constant uchar rcon[8] = { 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40 }; + +#define BYTE(x, y) (amd_bfe((x), (y) << 3U, 8U)) + +#define SubWord(inw) ((sbox[BYTE(inw, 3)] << 24) | (sbox[BYTE(inw, 2)] << 16) | (sbox[BYTE(inw, 1)] << 8) | sbox[BYTE(inw, 0)]) + +void AESExpandKey256(uint *keybuf) +{ + //#pragma unroll 4 + for(uint c = 8, i = 1; c < 60; ++c) + { + // For 256-bit keys, an sbox permutation is done every other 4th uint generated, AND every 8th + uint t = ((!(c & 7)) || ((c & 7) == 4)) ? SubWord(keybuf[c - 1]) : keybuf[c - 1]; + + // If the uint we're generating has an index that is a multiple of 8, rotate and XOR with the round constant, + // then XOR this with previously generated uint. If it's 4 after a multiple of 8, only the sbox permutation + // is done, followed by the XOR. If neither are true, only the XOR with the previously generated uint is done. + keybuf[c] = keybuf[c - 8] ^ ((!(c & 7)) ? rotate(t, 24U) ^ as_uint((uchar4)(rcon[i++], 0U, 0U, 0U)) : t); + } +} + +#define IDX(x) (x) + +__attribute__((reqd_work_group_size(WORKSIZE, 8, 1))) +__kernel void cn0(__global ulong *input, __global uint4 *Scratchpad, __global ulong *states) +{ + ulong State[25]; + uint ExpandedKey1[256]; + __local uint AES0[256], AES1[256], AES2[256], AES3[256]; + uint4 text; + + states += (25 * (get_global_id(0) - get_global_offset(0))); + Scratchpad += ((get_global_id(0) - get_global_offset(0))) * (0x80000 >> 2); + + for(int i = get_local_id(0); i < 256; i += WORKSIZE) + { + const uint tmp = AES0_C[i]; + AES0[i] = tmp; + AES1[i] = rotate(tmp, 8U); + AES2[i] = rotate(tmp, 16U); + AES3[i] = rotate(tmp, 24U); + } + barrier(CLK_LOCAL_MEM_FENCE); + + ((ulong8 *)State)[0] = vload8(0, input); + State[8] = input[8]; + State[9] = input[9]; + State[10] = input[10]; + + ((uint *)State)[9] &= 0x00FFFFFFU; + ((uint *)State)[9] |= ((get_global_id(0)) & 0xFF) << 24; + ((uint *)State)[10] &= 0xFF000000U; + ((uint *)State)[10] |= ((get_global_id(0) >> 8)); + + for(int i = 11; i < 25; ++i) State[i] = 0x00UL; + + // Last bit of padding + State[16] = 0x8000000000000000UL; + + keccakf1600_2(State); + + mem_fence(CLK_GLOBAL_MEM_FENCE); + + #pragma unroll + for(int i = 0; i < 25; ++i) states[i] = State[i]; + + text = vload4(get_local_id(1) + 4, (__global uint *)(states)); + + #pragma unroll + for(int i = 0; i < 4; ++i) ((ulong *)ExpandedKey1)[i] = states[i]; + + AESExpandKey256(ExpandedKey1); + + mem_fence(CLK_LOCAL_MEM_FENCE); + + #pragma unroll 2 + for(int i = 0; i < 0x4000; ++i) + { + #pragma unroll + for(int j = 0; j < 10; ++j) + text = AES_Round(AES0, AES1, AES2, AES3, text, ((uint4 *)ExpandedKey1)[j]); + + Scratchpad[IDX((i << 3) + get_local_id(1))] = text; + } + + mem_fence(CLK_GLOBAL_MEM_FENCE); +} + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void cn1(__global uint4 *Scratchpad, __global ulong *states) +{ + ulong a[2], b[2]; + __local uint AES0[256], AES1[256], AES2[256], AES3[256]; + + Scratchpad += ((get_global_id(0) - get_global_offset(0))) * (0x80000 >> 2); + states += (25 * (get_global_id(0) - get_global_offset(0))); + + for(int i = get_local_id(0); i < 256; i += WORKSIZE) + { + const uint tmp = AES0_C[i]; + AES0[i] = tmp; + AES1[i] = rotate(tmp, 8U); + AES2[i] = rotate(tmp, 16U); + AES3[i] = rotate(tmp, 24U); + } + barrier(CLK_LOCAL_MEM_FENCE); + + a[0] = states[0] ^ states[4]; + b[0] = states[2] ^ states[6]; + a[1] = states[1] ^ states[5]; + b[1] = states[3] ^ states[7]; + + uint4 b_x = ((uint4 *)b)[0]; + + mem_fence(CLK_LOCAL_MEM_FENCE); + + #pragma unroll 8 + for(int i = 0; i < 0x80000; ++i) + { + ulong c[2]; + + ((uint4 *)c)[0] = Scratchpad[IDX((a[0] & 0x1FFFF0) >> 4)]; + ((uint4 *)c)[0] = AES_Round(AES0, AES1, AES2, AES3, ((uint4 *)c)[0], ((uint4 *)a)[0]); + //b_x ^= ((uint4 *)c)[0]; + + Scratchpad[IDX((a[0] & 0x1FFFF0) >> 4)] = b_x ^ ((uint4 *)c)[0]; + + uint4 tmp; + tmp = Scratchpad[IDX((c[0] & 0x1FFFF0) >> 4)]; + + a[1] += c[0] * as_ulong2(tmp).s0; + a[0] += mul_hi(c[0], as_ulong2(tmp).s0); + + Scratchpad[IDX((c[0] & 0x1FFFF0) >> 4)] = ((uint4 *)a)[0]; + + ((uint4 *)a)[0] ^= tmp; + + b_x = ((uint4 *)c)[0]; + } + + mem_fence(CLK_GLOBAL_MEM_FENCE); +} + +__attribute__((reqd_work_group_size(WORKSIZE, 8, 1))) +__kernel void cn2(__global uint4 *Scratchpad, __global ulong *states, __global uint *Branch0, __global uint *Branch1, __global uint *Branch2, __global uint *Branch3) +{ + __local uint AES0[256], AES1[256], AES2[256], AES3[256]; + uint ExpandedKey2[256]; + ulong State[25]; + uint4 text; + + Scratchpad += ((get_global_id(0) - get_global_offset(0))) * (0x80000 >> 2); + states += (25 * (get_global_id(0) - get_global_offset(0))); + + for(int i = get_local_id(0); i < 256; i += WORKSIZE) + { + const uint tmp = AES0_C[i]; + AES0[i] = tmp; + AES1[i] = rotate(tmp, 8U); + AES2[i] = rotate(tmp, 16U); + AES3[i] = rotate(tmp, 24U); + } + barrier(CLK_LOCAL_MEM_FENCE); + + #if defined(__Tahiti__) || defined(__Pitcairn__) + + for(int i = 0; i < 4; ++i) ((ulong *)ExpandedKey2)[i] = states[i + 4]; + text = vload4(get_local_id(1) + 4, (__global uint *)states); + + #else + + text = vload4(get_local_id(1) + 4, (__global uint *)states); + ((uint8 *)ExpandedKey2)[0] = vload8(1, (__global uint *)states); + + #endif + + AESExpandKey256(ExpandedKey2); + + barrier(CLK_LOCAL_MEM_FENCE); + + #pragma unroll 2 + for(int i = 0; i < 0x4000; ++i) + { + text ^= Scratchpad[IDX((i << 3) + get_local_id(1))]; + + #pragma unroll + for(int j = 0; j < 10; ++j) + text = AES_Round(AES0, AES1, AES2, AES3, text, ((uint4 *)ExpandedKey2)[j]); + } + + vstore2(as_ulong2(text), get_local_id(1) + 4, states); + + barrier(CLK_GLOBAL_MEM_FENCE); + + if(!get_local_id(1)) + { + for(int i = 0; i < 25; ++i) State[i] = states[i]; + + keccakf1600_2(State); + + for(int i = 0; i < 25; ++i) states[i] = State[i]; + + switch(State[0] & 3) + { + case 0: + Branch0[atomic_inc(Branch0 + get_global_size(0))] = get_global_id(0) - get_global_offset(0); + break; + case 1: + Branch1[atomic_inc(Branch1 + get_global_size(0))] = get_global_id(0) - get_global_offset(0); + break; + case 2: + Branch2[atomic_inc(Branch2 + get_global_size(0))] = get_global_id(0) - get_global_offset(0); + break; + case 3: + Branch3[atomic_inc(Branch3 + get_global_size(0))] = get_global_id(0) - get_global_offset(0); + break; + } + } + + mem_fence(CLK_GLOBAL_MEM_FENCE); +} + +)===" +R"===( + +#define VSWAP8(x) (((x) >> 56) | (((x) >> 40) & 0x000000000000FF00UL) | (((x) >> 24) & 0x0000000000FF0000UL) \ + | (((x) >> 8) & 0x00000000FF000000UL) | (((x) << 8) & 0x000000FF00000000UL) \ + | (((x) << 24) & 0x0000FF0000000000UL) | (((x) << 40) & 0x00FF000000000000UL) | (((x) << 56) & 0xFF00000000000000UL)) + +#define VSWAP4(x) ((((x) >> 24) & 0xFFU) | (((x) >> 8) & 0xFF00U) | (((x) << 8) & 0xFF0000U) | (((x) << 24) & 0xFF000000U)) + +__kernel void Skein(__global ulong *states, __global uint *BranchBuf, __global uint *output, uint Target, ulong Threads) +{ + const ulong idx = get_global_id(0) - get_global_offset(0); + + if(idx >= Threads) return; + + states += 25 * BranchBuf[idx]; + + // skein + ulong8 h = vload8(0, SKEIN512_256_IV); + + // Type field begins with final bit, first bit, then six bits of type; the last 96 + // bits are input processed (including in the block to be processed with that tweak) + // The output transform is only one run of UBI, since we need only 256 bits of output + // The tweak for the output transform is Type = Output with the Final bit set + // T[0] for the output is 8, and I don't know why - should be message size... + ulong t[3] = { 0x00UL, 0x7000000000000000UL, 0x00UL }; + ulong8 p, m; + + for(uint i = 0; i < 4; ++i) + { + if(i < 3) t[0] += 0x40UL; + else t[0] += 0x08UL; + + t[2] = t[0] ^ t[1]; + + m = (i < 3) ? vload8(i, states) : (ulong8)(states[24], 0UL, 0UL, 0UL, 0UL, 0UL, 0UL, 0UL); + const ulong h8 = h.s0 ^ h.s1 ^ h.s2 ^ h.s3 ^ h.s4 ^ h.s5 ^ h.s6 ^ h.s7 ^ SKEIN_KS_PARITY; + p = Skein512Block(m, h, h8, t); + + h = m ^ p; + + if(i < 2) t[1] = 0x3000000000000000UL; + else t[1] = 0xB000000000000000UL; + } + + t[0] = 0x08UL; + t[1] = 0xFF00000000000000UL; + t[2] = t[0] ^ t[1]; + + p = (ulong8)(0); + const ulong h8 = h.s0 ^ h.s1 ^ h.s2 ^ h.s3 ^ h.s4 ^ h.s5 ^ h.s6 ^ h.s7 ^ SKEIN_KS_PARITY; + + p = Skein512Block(p, h, h8, t); + + //vstore8(p, 0, output); + + if(as_uint16(p).s7 <= Target) output[atomic_inc(output + 0xFF)] = BranchBuf[idx] + get_global_offset(0); + + mem_fence(CLK_GLOBAL_MEM_FENCE); +} + +#define SWAP8(x) as_ulong(as_uchar8(x).s76543210) + +__kernel void JH(__global ulong *states, __global uint *BranchBuf, __global uint *output, uint Target, ulong Threads) +{ + const uint idx = get_global_id(0) - get_global_offset(0); + + if(idx >= Threads) return; + + states += 25 * BranchBuf[idx]; + + sph_u64 h0h = 0xEBD3202C41A398EBUL, h0l = 0xC145B29C7BBECD92UL, h1h = 0xFAC7D4609151931CUL, h1l = 0x038A507ED6820026UL, h2h = 0x45B92677269E23A4UL, h2l = 0x77941AD4481AFBE0UL, h3h = 0x7A176B0226ABB5CDUL, h3l = 0xA82FFF0F4224F056UL; + sph_u64 h4h = 0x754D2E7F8996A371UL, h4l = 0x62E27DF70849141DUL, h5h = 0x948F2476F7957627UL, h5l = 0x6C29804757B6D587UL, h6h = 0x6C0D8EAC2D275E5CUL, h6l = 0x0F7A0557C6508451UL, h7h = 0xEA12247067D3E47BUL, h7l = 0x69D71CD313ABE389UL; + sph_u64 tmp; + + for(int i = 0; i < 5; ++i) + { + ulong input[8]; + + if(i < 3) + { + for(int x = 0; x < 8; ++x) input[x] = (states[(i << 3) + x]); + } + else if(i == 3) + { + input[0] = (states[24]); + input[1] = 0x80UL; + for(int x = 2; x < 8; ++x) input[x] = 0x00UL; + } + else + { + input[7] = 0x4006000000000000UL; + + for(int x = 0; x < 7; ++x) input[x] = 0x00UL; + } + + h0h ^= input[0]; + h0l ^= input[1]; + h1h ^= input[2]; + h1l ^= input[3]; + h2h ^= input[4]; + h2l ^= input[5]; + h3h ^= input[6]; + h3l ^= input[7]; + + E8; + + h4h ^= input[0]; + h4l ^= input[1]; + h5h ^= input[2]; + h5l ^= input[3]; + h6h ^= input[4]; + h6l ^= input[5]; + h7h ^= input[6]; + h7l ^= input[7]; + } + + //output[0] = h6h; + //output[1] = h6l; + //output[2] = h7h; + //output[3] = h7l; + + if(as_uint2(h7l).s1 <= Target) output[atomic_inc(output + 0xFF)] = BranchBuf[idx] + get_global_offset(0); +} + +#define SWAP4(x) as_uint(as_uchar4(x).s3210) + +__kernel void Blake(__global ulong *states, __global uint *BranchBuf, __global uint *output, uint Target, ulong Threads) +{ + const uint idx = get_global_id(0) - get_global_offset(0); + + if(idx >= Threads) return; + + states += 25 * BranchBuf[idx]; + + unsigned int m[16]; + unsigned int v[16]; + uint h[8]; + + ((uint8 *)h)[0] = vload8(0U, c_IV256); + + for(uint i = 0, bitlen = 0; i < 4; ++i) + { + if(i < 3) + { + ((uint16 *)m)[0] = vload16(i, (__global uint *)states); + for(int i = 0; i < 16; ++i) m[i] = SWAP4(m[i]); + bitlen += 512; + } + else + { + m[0] = SWAP4(((__global uint *)states)[48]); + m[1] = SWAP4(((__global uint *)states)[49]); + m[2] = 0x80000000U; + + for(int i = 3; i < 13; ++i) m[i] = 0x00U; + + m[13] = 1U; + m[14] = 0U; + m[15] = 0x640; + bitlen += 64; + } + + ((uint16 *)v)[0].lo = ((uint8 *)h)[0]; + ((uint16 *)v)[0].hi = vload8(0U, c_u256); + + //v[12] ^= (i < 3) ? (i + 1) << 9 : 1600U; + //v[13] ^= (i < 3) ? (i + 1) << 9 : 1600U; + + v[12] ^= bitlen; + v[13] ^= bitlen; + + for(int r = 0; r < 14; r++) + { + GS(0, 4, 0x8, 0xC, 0x0); + GS(1, 5, 0x9, 0xD, 0x2); + GS(2, 6, 0xA, 0xE, 0x4); + GS(3, 7, 0xB, 0xF, 0x6); + GS(0, 5, 0xA, 0xF, 0x8); + GS(1, 6, 0xB, 0xC, 0xA); + GS(2, 7, 0x8, 0xD, 0xC); + GS(3, 4, 0x9, 0xE, 0xE); + } + + ((uint8 *)h)[0] ^= ((uint8 *)v)[0] ^ ((uint8 *)v)[1]; + } + + for(int i = 0; i < 8; ++i) h[i] = SWAP4(h[i]); + + //for(int i = 0; i < 4; ++i) output[i] = ((ulong *)h)[i]; + if(h[7] <= Target) output[atomic_inc(output + 0xFF)] = BranchBuf[idx] + get_global_offset(0); +} + +__kernel void Groestl(__global ulong *states, __global uint *BranchBuf, __global uint *output, uint Target, ulong Threads) +{ + const uint idx = get_global_id(0) - get_global_offset(0); + + if(idx >= Threads) return; + + states += 25 * BranchBuf[idx]; + + ulong State[8]; + + for(int i = 0; i < 7; ++i) State[i] = 0UL; + + State[7] = 0x0001000000000000UL; + + for(uint i = 0; i < 4; ++i) + { + ulong H[8], M[8]; + + if(i < 3) + { + ((ulong8 *)M)[0] = vload8(i, states); + } + else + { + M[0] = states[24]; + M[1] = 0x80UL; + + for(int x = 2; x < 7; ++x) M[x] = 0UL; + + M[7] = 0x0400000000000000UL; + } + + for(int x = 0; x < 8; ++x) H[x] = M[x] ^ State[x]; + + PERM_SMALL_P(H); + PERM_SMALL_Q(M); + + for(int x = 0; x < 8; ++x) State[x] ^= H[x] ^ M[x]; + } + + ulong tmp[8]; + + for(int i = 0; i < 8; ++i) tmp[i] = State[i]; + + PERM_SMALL_P(State); + + for(int i = 0; i < 8; ++i) State[i] ^= tmp[i]; + + //for(int i = 0; i < 4; ++i) output[i] = State[i + 4]; + if(as_uint2(State[7]).s1 <= Target) output[atomic_inc(output + 0xFF)] = BranchBuf[idx] + get_global_offset(0); +} + +)==="
\ No newline at end of file diff --git a/xmrstak/backend/amd/amd_gpu/opencl/groestl256.cl b/xmrstak/backend/amd/amd_gpu/opencl/groestl256.cl new file mode 100644 index 0000000..1a7c96f --- /dev/null +++ b/xmrstak/backend/amd/amd_gpu/opencl/groestl256.cl @@ -0,0 +1,295 @@ +R"===( +/* $Id: groestl.c 260 2011-07-21 01:02:38Z tp $ */ +/* + * Groestl256 + * + * ==========================(LICENSE BEGIN)============================ + * Copyright (c) 2014 djm34 + * Copyright (c) 2007-2010 Projet RNRT SAPHIR + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author Thomas Pornin <thomas.pornin@cryptolog.com> + */ + +#define SPH_C64(x) x +#define SPH_ROTL64(x, y) rotate((x), (ulong)(y)) + + +#define C64e(x) ((SPH_C64(x) >> 56) \ + | ((SPH_C64(x) >> 40) & SPH_C64(0x000000000000FF00)) \ + | ((SPH_C64(x) >> 24) & SPH_C64(0x0000000000FF0000)) \ + | ((SPH_C64(x) >> 8) & SPH_C64(0x00000000FF000000)) \ + | ((SPH_C64(x) << 8) & SPH_C64(0x000000FF00000000)) \ + | ((SPH_C64(x) << 24) & SPH_C64(0x0000FF0000000000)) \ + | ((SPH_C64(x) << 40) & SPH_C64(0x00FF000000000000)) \ + | ((SPH_C64(x) << 56) & SPH_C64(0xFF00000000000000))) + +#define B64_0(x) ((x) & 0xFF) +#define B64_1(x) (((x) >> 8) & 0xFF) +#define B64_2(x) (((x) >> 16) & 0xFF) +#define B64_3(x) (((x) >> 24) & 0xFF) +#define B64_4(x) (((x) >> 32) & 0xFF) +#define B64_5(x) (((x) >> 40) & 0xFF) +#define B64_6(x) (((x) >> 48) & 0xFF) +#define B64_7(x) ((x) >> 56) +#define R64 SPH_ROTL64 +#define PC64(j, r) ((sph_u64)((j) + (r))) +#define QC64(j, r) (((sph_u64)(r) << 56) ^ (~((sph_u64)(j) << 56))) + +static const __constant ulong T0_G[] = +{ + 0xc6a597f4a5f432c6UL, 0xf884eb9784976ff8UL, 0xee99c7b099b05eeeUL, 0xf68df78c8d8c7af6UL, + 0xff0de5170d17e8ffUL, 0xd6bdb7dcbddc0ad6UL, 0xdeb1a7c8b1c816deUL, 0x915439fc54fc6d91UL, + 0x6050c0f050f09060UL, 0x0203040503050702UL, 0xcea987e0a9e02eceUL, 0x567dac877d87d156UL, + 0xe719d52b192bcce7UL, 0xb56271a662a613b5UL, 0x4de69a31e6317c4dUL, 0xec9ac3b59ab559ecUL, + 0x8f4505cf45cf408fUL, 0x1f9d3ebc9dbca31fUL, 0x894009c040c04989UL, 0xfa87ef92879268faUL, + 0xef15c53f153fd0efUL, 0xb2eb7f26eb2694b2UL, 0x8ec90740c940ce8eUL, 0xfb0bed1d0b1de6fbUL, + 0x41ec822fec2f6e41UL, 0xb3677da967a91ab3UL, 0x5ffdbe1cfd1c435fUL, 0x45ea8a25ea256045UL, + 0x23bf46dabfdaf923UL, 0x53f7a602f7025153UL, 0xe496d3a196a145e4UL, 0x9b5b2ded5bed769bUL, + 0x75c2ea5dc25d2875UL, 0xe11cd9241c24c5e1UL, 0x3dae7ae9aee9d43dUL, 0x4c6a98be6abef24cUL, + 0x6c5ad8ee5aee826cUL, 0x7e41fcc341c3bd7eUL, 0xf502f1060206f3f5UL, 0x834f1dd14fd15283UL, + 0x685cd0e45ce48c68UL, 0x51f4a207f4075651UL, 0xd134b95c345c8dd1UL, 0xf908e9180818e1f9UL, + 0xe293dfae93ae4ce2UL, 0xab734d9573953eabUL, 0x6253c4f553f59762UL, 0x2a3f54413f416b2aUL, + 0x080c10140c141c08UL, 0x955231f652f66395UL, 0x46658caf65afe946UL, 0x9d5e21e25ee27f9dUL, + 0x3028607828784830UL, 0x37a16ef8a1f8cf37UL, 0x0a0f14110f111b0aUL, 0x2fb55ec4b5c4eb2fUL, + 0x0e091c1b091b150eUL, 0x2436485a365a7e24UL, 0x1b9b36b69bb6ad1bUL, 0xdf3da5473d4798dfUL, + 0xcd26816a266aa7cdUL, 0x4e699cbb69bbf54eUL, 0x7fcdfe4ccd4c337fUL, 0xea9fcfba9fba50eaUL, + 0x121b242d1b2d3f12UL, 0x1d9e3ab99eb9a41dUL, 0x5874b09c749cc458UL, 0x342e68722e724634UL, + 0x362d6c772d774136UL, 0xdcb2a3cdb2cd11dcUL, 0xb4ee7329ee299db4UL, 0x5bfbb616fb164d5bUL, + 0xa4f65301f601a5a4UL, 0x764decd74dd7a176UL, 0xb76175a361a314b7UL, 0x7dcefa49ce49347dUL, + 0x527ba48d7b8ddf52UL, 0xdd3ea1423e429fddUL, 0x5e71bc937193cd5eUL, 0x139726a297a2b113UL, + 0xa6f55704f504a2a6UL, 0xb96869b868b801b9UL, 0x0000000000000000UL, 0xc12c99742c74b5c1UL, + 0x406080a060a0e040UL, 0xe31fdd211f21c2e3UL, 0x79c8f243c8433a79UL, 0xb6ed772ced2c9ab6UL, + 0xd4beb3d9bed90dd4UL, 0x8d4601ca46ca478dUL, 0x67d9ce70d9701767UL, 0x724be4dd4bddaf72UL, + 0x94de3379de79ed94UL, 0x98d42b67d467ff98UL, 0xb0e87b23e82393b0UL, 0x854a11de4ade5b85UL, + 0xbb6b6dbd6bbd06bbUL, 0xc52a917e2a7ebbc5UL, 0x4fe59e34e5347b4fUL, 0xed16c13a163ad7edUL, + 0x86c51754c554d286UL, 0x9ad72f62d762f89aUL, 0x6655ccff55ff9966UL, 0x119422a794a7b611UL, + 0x8acf0f4acf4ac08aUL, 0xe910c9301030d9e9UL, 0x0406080a060a0e04UL, 0xfe81e798819866feUL, + 0xa0f05b0bf00baba0UL, 0x7844f0cc44ccb478UL, 0x25ba4ad5bad5f025UL, 0x4be3963ee33e754bUL, + 0xa2f35f0ef30eaca2UL, 0x5dfeba19fe19445dUL, 0x80c01b5bc05bdb80UL, 0x058a0a858a858005UL, + 0x3fad7eecadecd33fUL, 0x21bc42dfbcdffe21UL, 0x7048e0d848d8a870UL, 0xf104f90c040cfdf1UL, + 0x63dfc67adf7a1963UL, 0x77c1ee58c1582f77UL, 0xaf75459f759f30afUL, 0x426384a563a5e742UL, + 0x2030405030507020UL, 0xe51ad12e1a2ecbe5UL, 0xfd0ee1120e12effdUL, 0xbf6d65b76db708bfUL, + 0x814c19d44cd45581UL, 0x1814303c143c2418UL, 0x26354c5f355f7926UL, 0xc32f9d712f71b2c3UL, + 0xbee16738e13886beUL, 0x35a26afda2fdc835UL, 0x88cc0b4fcc4fc788UL, 0x2e395c4b394b652eUL, + 0x93573df957f96a93UL, 0x55f2aa0df20d5855UL, 0xfc82e39d829d61fcUL, 0x7a47f4c947c9b37aUL, + 0xc8ac8befacef27c8UL, 0xbae76f32e73288baUL, 0x322b647d2b7d4f32UL, 0xe695d7a495a442e6UL, + 0xc0a09bfba0fb3bc0UL, 0x199832b398b3aa19UL, 0x9ed12768d168f69eUL, 0xa37f5d817f8122a3UL, + 0x446688aa66aaee44UL, 0x547ea8827e82d654UL, 0x3bab76e6abe6dd3bUL, 0x0b83169e839e950bUL, + 0x8cca0345ca45c98cUL, 0xc729957b297bbcc7UL, 0x6bd3d66ed36e056bUL, 0x283c50443c446c28UL, + 0xa779558b798b2ca7UL, 0xbce2633de23d81bcUL, 0x161d2c271d273116UL, 0xad76419a769a37adUL, + 0xdb3bad4d3b4d96dbUL, 0x6456c8fa56fa9e64UL, 0x744ee8d24ed2a674UL, 0x141e28221e223614UL, + 0x92db3f76db76e492UL, 0x0c0a181e0a1e120cUL, 0x486c90b46cb4fc48UL, 0xb8e46b37e4378fb8UL, + 0x9f5d25e75de7789fUL, 0xbd6e61b26eb20fbdUL, 0x43ef862aef2a6943UL, 0xc4a693f1a6f135c4UL, + 0x39a872e3a8e3da39UL, 0x31a462f7a4f7c631UL, 0xd337bd5937598ad3UL, 0xf28bff868b8674f2UL, + 0xd532b156325683d5UL, 0x8b430dc543c54e8bUL, 0x6e59dceb59eb856eUL, 0xdab7afc2b7c218daUL, + 0x018c028f8c8f8e01UL, 0xb16479ac64ac1db1UL, 0x9cd2236dd26df19cUL, 0x49e0923be03b7249UL, + 0xd8b4abc7b4c71fd8UL, 0xacfa4315fa15b9acUL, 0xf307fd090709faf3UL, 0xcf25856f256fa0cfUL, + 0xcaaf8feaafea20caUL, 0xf48ef3898e897df4UL, 0x47e98e20e9206747UL, 0x1018202818283810UL, + 0x6fd5de64d5640b6fUL, 0xf088fb83888373f0UL, 0x4a6f94b16fb1fb4aUL, 0x5c72b8967296ca5cUL, + 0x3824706c246c5438UL, 0x57f1ae08f1085f57UL, 0x73c7e652c7522173UL, 0x975135f351f36497UL, + 0xcb238d652365aecbUL, 0xa17c59847c8425a1UL, 0xe89ccbbf9cbf57e8UL, 0x3e217c6321635d3eUL, + 0x96dd377cdd7cea96UL, 0x61dcc27fdc7f1e61UL, 0x0d861a9186919c0dUL, 0x0f851e9485949b0fUL, + 0xe090dbab90ab4be0UL, 0x7c42f8c642c6ba7cUL, 0x71c4e257c4572671UL, 0xccaa83e5aae529ccUL, + 0x90d83b73d873e390UL, 0x06050c0f050f0906UL, 0xf701f5030103f4f7UL, 0x1c12383612362a1cUL, + 0xc2a39ffea3fe3cc2UL, 0x6a5fd4e15fe18b6aUL, 0xaef94710f910beaeUL, 0x69d0d26bd06b0269UL, + 0x17912ea891a8bf17UL, 0x995829e858e87199UL, 0x3a2774692769533aUL, 0x27b94ed0b9d0f727UL, + 0xd938a948384891d9UL, 0xeb13cd351335deebUL, 0x2bb356ceb3cee52bUL, 0x2233445533557722UL, + 0xd2bbbfd6bbd604d2UL, 0xa9704990709039a9UL, 0x07890e8089808707UL, 0x33a766f2a7f2c133UL, + 0x2db65ac1b6c1ec2dUL, 0x3c22786622665a3cUL, 0x15922aad92adb815UL, 0xc92089602060a9c9UL, + 0x874915db49db5c87UL, 0xaaff4f1aff1ab0aaUL, 0x5078a0887888d850UL, 0xa57a518e7a8e2ba5UL, + 0x038f068a8f8a8903UL, 0x59f8b213f8134a59UL, 0x0980129b809b9209UL, 0x1a1734391739231aUL, + 0x65daca75da751065UL, 0xd731b553315384d7UL, 0x84c61351c651d584UL, 0xd0b8bbd3b8d303d0UL, + 0x82c31f5ec35edc82UL, 0x29b052cbb0cbe229UL, 0x5a77b4997799c35aUL, 0x1e113c3311332d1eUL, + 0x7bcbf646cb463d7bUL, 0xa8fc4b1ffc1fb7a8UL, 0x6dd6da61d6610c6dUL, 0x2c3a584e3a4e622cUL +}; + +)===" +R"===( + +static const __constant ulong T4_G[] = +{ + 0xA5F432C6C6A597F4UL, 0x84976FF8F884EB97UL, 0x99B05EEEEE99C7B0UL, 0x8D8C7AF6F68DF78CUL, + 0x0D17E8FFFF0DE517UL, 0xBDDC0AD6D6BDB7DCUL, 0xB1C816DEDEB1A7C8UL, 0x54FC6D91915439FCUL, + 0x50F090606050C0F0UL, 0x0305070202030405UL, 0xA9E02ECECEA987E0UL, 0x7D87D156567DAC87UL, + 0x192BCCE7E719D52BUL, 0x62A613B5B56271A6UL, 0xE6317C4D4DE69A31UL, 0x9AB559ECEC9AC3B5UL, + 0x45CF408F8F4505CFUL, 0x9DBCA31F1F9D3EBCUL, 0x40C04989894009C0UL, 0x879268FAFA87EF92UL, + 0x153FD0EFEF15C53FUL, 0xEB2694B2B2EB7F26UL, 0xC940CE8E8EC90740UL, 0x0B1DE6FBFB0BED1DUL, + 0xEC2F6E4141EC822FUL, 0x67A91AB3B3677DA9UL, 0xFD1C435F5FFDBE1CUL, 0xEA25604545EA8A25UL, + 0xBFDAF92323BF46DAUL, 0xF702515353F7A602UL, 0x96A145E4E496D3A1UL, 0x5BED769B9B5B2DEDUL, + 0xC25D287575C2EA5DUL, 0x1C24C5E1E11CD924UL, 0xAEE9D43D3DAE7AE9UL, 0x6ABEF24C4C6A98BEUL, + 0x5AEE826C6C5AD8EEUL, 0x41C3BD7E7E41FCC3UL, 0x0206F3F5F502F106UL, 0x4FD15283834F1DD1UL, + 0x5CE48C68685CD0E4UL, 0xF407565151F4A207UL, 0x345C8DD1D134B95CUL, 0x0818E1F9F908E918UL, + 0x93AE4CE2E293DFAEUL, 0x73953EABAB734D95UL, 0x53F597626253C4F5UL, 0x3F416B2A2A3F5441UL, + 0x0C141C08080C1014UL, 0x52F66395955231F6UL, 0x65AFE94646658CAFUL, 0x5EE27F9D9D5E21E2UL, + 0x2878483030286078UL, 0xA1F8CF3737A16EF8UL, 0x0F111B0A0A0F1411UL, 0xB5C4EB2F2FB55EC4UL, + 0x091B150E0E091C1BUL, 0x365A7E242436485AUL, 0x9BB6AD1B1B9B36B6UL, 0x3D4798DFDF3DA547UL, + 0x266AA7CDCD26816AUL, 0x69BBF54E4E699CBBUL, 0xCD4C337F7FCDFE4CUL, 0x9FBA50EAEA9FCFBAUL, + 0x1B2D3F12121B242DUL, 0x9EB9A41D1D9E3AB9UL, 0x749CC4585874B09CUL, 0x2E724634342E6872UL, + 0x2D774136362D6C77UL, 0xB2CD11DCDCB2A3CDUL, 0xEE299DB4B4EE7329UL, 0xFB164D5B5BFBB616UL, + 0xF601A5A4A4F65301UL, 0x4DD7A176764DECD7UL, 0x61A314B7B76175A3UL, 0xCE49347D7DCEFA49UL, + 0x7B8DDF52527BA48DUL, 0x3E429FDDDD3EA142UL, 0x7193CD5E5E71BC93UL, 0x97A2B113139726A2UL, + 0xF504A2A6A6F55704UL, 0x68B801B9B96869B8UL, 0x0000000000000000UL, 0x2C74B5C1C12C9974UL, + 0x60A0E040406080A0UL, 0x1F21C2E3E31FDD21UL, 0xC8433A7979C8F243UL, 0xED2C9AB6B6ED772CUL, + 0xBED90DD4D4BEB3D9UL, 0x46CA478D8D4601CAUL, 0xD970176767D9CE70UL, 0x4BDDAF72724BE4DDUL, + 0xDE79ED9494DE3379UL, 0xD467FF9898D42B67UL, 0xE82393B0B0E87B23UL, 0x4ADE5B85854A11DEUL, + 0x6BBD06BBBB6B6DBDUL, 0x2A7EBBC5C52A917EUL, 0xE5347B4F4FE59E34UL, 0x163AD7EDED16C13AUL, + 0xC554D28686C51754UL, 0xD762F89A9AD72F62UL, 0x55FF99666655CCFFUL, 0x94A7B611119422A7UL, + 0xCF4AC08A8ACF0F4AUL, 0x1030D9E9E910C930UL, 0x060A0E040406080AUL, 0x819866FEFE81E798UL, + 0xF00BABA0A0F05B0BUL, 0x44CCB4787844F0CCUL, 0xBAD5F02525BA4AD5UL, 0xE33E754B4BE3963EUL, + 0xF30EACA2A2F35F0EUL, 0xFE19445D5DFEBA19UL, 0xC05BDB8080C01B5BUL, 0x8A858005058A0A85UL, + 0xADECD33F3FAD7EECUL, 0xBCDFFE2121BC42DFUL, 0x48D8A8707048E0D8UL, 0x040CFDF1F104F90CUL, + 0xDF7A196363DFC67AUL, 0xC1582F7777C1EE58UL, 0x759F30AFAF75459FUL, 0x63A5E742426384A5UL, + 0x3050702020304050UL, 0x1A2ECBE5E51AD12EUL, 0x0E12EFFDFD0EE112UL, 0x6DB708BFBF6D65B7UL, + 0x4CD45581814C19D4UL, 0x143C24181814303CUL, 0x355F792626354C5FUL, 0x2F71B2C3C32F9D71UL, + 0xE13886BEBEE16738UL, 0xA2FDC83535A26AFDUL, 0xCC4FC78888CC0B4FUL, 0x394B652E2E395C4BUL, + 0x57F96A9393573DF9UL, 0xF20D585555F2AA0DUL, 0x829D61FCFC82E39DUL, 0x47C9B37A7A47F4C9UL, + 0xACEF27C8C8AC8BEFUL, 0xE73288BABAE76F32UL, 0x2B7D4F32322B647DUL, 0x95A442E6E695D7A4UL, + 0xA0FB3BC0C0A09BFBUL, 0x98B3AA19199832B3UL, 0xD168F69E9ED12768UL, 0x7F8122A3A37F5D81UL, + 0x66AAEE44446688AAUL, 0x7E82D654547EA882UL, 0xABE6DD3B3BAB76E6UL, 0x839E950B0B83169EUL, + 0xCA45C98C8CCA0345UL, 0x297BBCC7C729957BUL, 0xD36E056B6BD3D66EUL, 0x3C446C28283C5044UL, + 0x798B2CA7A779558BUL, 0xE23D81BCBCE2633DUL, 0x1D273116161D2C27UL, 0x769A37ADAD76419AUL, + 0x3B4D96DBDB3BAD4DUL, 0x56FA9E646456C8FAUL, 0x4ED2A674744EE8D2UL, 0x1E223614141E2822UL, + 0xDB76E49292DB3F76UL, 0x0A1E120C0C0A181EUL, 0x6CB4FC48486C90B4UL, 0xE4378FB8B8E46B37UL, + 0x5DE7789F9F5D25E7UL, 0x6EB20FBDBD6E61B2UL, 0xEF2A694343EF862AUL, 0xA6F135C4C4A693F1UL, + 0xA8E3DA3939A872E3UL, 0xA4F7C63131A462F7UL, 0x37598AD3D337BD59UL, 0x8B8674F2F28BFF86UL, + 0x325683D5D532B156UL, 0x43C54E8B8B430DC5UL, 0x59EB856E6E59DCEBUL, 0xB7C218DADAB7AFC2UL, + 0x8C8F8E01018C028FUL, 0x64AC1DB1B16479ACUL, 0xD26DF19C9CD2236DUL, 0xE03B724949E0923BUL, + 0xB4C71FD8D8B4ABC7UL, 0xFA15B9ACACFA4315UL, 0x0709FAF3F307FD09UL, 0x256FA0CFCF25856FUL, + 0xAFEA20CACAAF8FEAUL, 0x8E897DF4F48EF389UL, 0xE920674747E98E20UL, 0x1828381010182028UL, + 0xD5640B6F6FD5DE64UL, 0x888373F0F088FB83UL, 0x6FB1FB4A4A6F94B1UL, 0x7296CA5C5C72B896UL, + 0x246C54383824706CUL, 0xF1085F5757F1AE08UL, 0xC752217373C7E652UL, 0x51F36497975135F3UL, + 0x2365AECBCB238D65UL, 0x7C8425A1A17C5984UL, 0x9CBF57E8E89CCBBFUL, 0x21635D3E3E217C63UL, + 0xDD7CEA9696DD377CUL, 0xDC7F1E6161DCC27FUL, 0x86919C0D0D861A91UL, 0x85949B0F0F851E94UL, + 0x90AB4BE0E090DBABUL, 0x42C6BA7C7C42F8C6UL, 0xC457267171C4E257UL, 0xAAE529CCCCAA83E5UL, + 0xD873E39090D83B73UL, 0x050F090606050C0FUL, 0x0103F4F7F701F503UL, 0x12362A1C1C123836UL, + 0xA3FE3CC2C2A39FFEUL, 0x5FE18B6A6A5FD4E1UL, 0xF910BEAEAEF94710UL, 0xD06B026969D0D26BUL, + 0x91A8BF1717912EA8UL, 0x58E87199995829E8UL, 0x2769533A3A277469UL, 0xB9D0F72727B94ED0UL, + 0x384891D9D938A948UL, 0x1335DEEBEB13CD35UL, 0xB3CEE52B2BB356CEUL, 0x3355772222334455UL, + 0xBBD604D2D2BBBFD6UL, 0x709039A9A9704990UL, 0x8980870707890E80UL, 0xA7F2C13333A766F2UL, + 0xB6C1EC2D2DB65AC1UL, 0x22665A3C3C227866UL, 0x92ADB81515922AADUL, 0x2060A9C9C9208960UL, + 0x49DB5C87874915DBUL, 0xFF1AB0AAAAFF4F1AUL, 0x7888D8505078A088UL, 0x7A8E2BA5A57A518EUL, + 0x8F8A8903038F068AUL, 0xF8134A5959F8B213UL, 0x809B92090980129BUL, 0x1739231A1A173439UL, + 0xDA75106565DACA75UL, 0x315384D7D731B553UL, 0xC651D58484C61351UL, 0xB8D303D0D0B8BBD3UL, + 0xC35EDC8282C31F5EUL, 0xB0CBE22929B052CBUL, 0x7799C35A5A77B499UL, 0x11332D1E1E113C33UL, + 0xCB463D7B7BCBF646UL, 0xFC1FB7A8A8FC4B1FUL, 0xD6610C6D6DD6DA61UL, 0x3A4E622C2C3A584EUL +}; + +#define RSTT(d, a, b0, b1, b2, b3, b4, b5, b6, b7) do { \ + t[d] = T0_G[B64_0(a[b0])] \ + ^ R64(T0_G[B64_1(a[b1])], 8) \ + ^ R64(T0_G[B64_2(a[b2])], 16) \ + ^ R64(T0_G[B64_3(a[b3])], 24) \ + ^ T4_G[B64_4(a[b4])] \ + ^ R64(T4_G[B64_5(a[b5])], 8) \ + ^ R64(T4_G[B64_6(a[b6])], 16) \ + ^ R64(T4_G[B64_7(a[b7])], 24); \ + } while (0) + +#define ROUND_SMALL_P(a, r) do { \ + ulong t[8]; \ + a[0] ^= PC64(0x00, r); \ + a[1] ^= PC64(0x10, r); \ + a[2] ^= PC64(0x20, r); \ + a[3] ^= PC64(0x30, r); \ + a[4] ^= PC64(0x40, r); \ + a[5] ^= PC64(0x50, r); \ + a[6] ^= PC64(0x60, r); \ + a[7] ^= PC64(0x70, r); \ + RSTT(0, a, 0, 1, 2, 3, 4, 5, 6, 7); \ + RSTT(1, a, 1, 2, 3, 4, 5, 6, 7, 0); \ + RSTT(2, a, 2, 3, 4, 5, 6, 7, 0, 1); \ + RSTT(3, a, 3, 4, 5, 6, 7, 0, 1, 2); \ + RSTT(4, a, 4, 5, 6, 7, 0, 1, 2, 3); \ + RSTT(5, a, 5, 6, 7, 0, 1, 2, 3, 4); \ + RSTT(6, a, 6, 7, 0, 1, 2, 3, 4, 5); \ + RSTT(7, a, 7, 0, 1, 2, 3, 4, 5, 6); \ + a[0] = t[0]; \ + a[1] = t[1]; \ + a[2] = t[2]; \ + a[3] = t[3]; \ + a[4] = t[4]; \ + a[5] = t[5]; \ + a[6] = t[6]; \ + a[7] = t[7]; \ + } while (0) + +#define ROUND_SMALL_Pf(a,r) do { \ + a[0] ^= PC64(0x00, r); \ + a[1] ^= PC64(0x10, r); \ + a[2] ^= PC64(0x20, r); \ + a[3] ^= PC64(0x30, r); \ + a[4] ^= PC64(0x40, r); \ + a[5] ^= PC64(0x50, r); \ + a[6] ^= PC64(0x60, r); \ + a[7] ^= PC64(0x70, r); \ + RSTT(7, a, 7, 0, 1, 2, 3, 4, 5, 6); \ + a[7] = t[7]; \ + } while (0) + +#define ROUND_SMALL_Q(a, r) do { \ + ulong t[8]; \ + a[0] ^= QC64(0x00, r); \ + a[1] ^= QC64(0x10, r); \ + a[2] ^= QC64(0x20, r); \ + a[3] ^= QC64(0x30, r); \ + a[4] ^= QC64(0x40, r); \ + a[5] ^= QC64(0x50, r); \ + a[6] ^= QC64(0x60, r); \ + a[7] ^= QC64(0x70, r); \ + RSTT(0, a, 1, 3, 5, 7, 0, 2, 4, 6); \ + RSTT(1, a, 2, 4, 6, 0, 1, 3, 5, 7); \ + RSTT(2, a, 3, 5, 7, 1, 2, 4, 6, 0); \ + RSTT(3, a, 4, 6, 0, 2, 3, 5, 7, 1); \ + RSTT(4, a, 5, 7, 1, 3, 4, 6, 0, 2); \ + RSTT(5, a, 6, 0, 2, 4, 5, 7, 1, 3); \ + RSTT(6, a, 7, 1, 3, 5, 6, 0, 2, 4); \ + RSTT(7, a, 0, 2, 4, 6, 7, 1, 3, 5); \ + a[0] = t[0]; \ + a[1] = t[1]; \ + a[2] = t[2]; \ + a[3] = t[3]; \ + a[4] = t[4]; \ + a[5] = t[5]; \ + a[6] = t[6]; \ + a[7] = t[7]; \ + } while (0) + +#define PERM_SMALL_P(a) do { \ + for (int r = 0; r < 10; r ++) \ + ROUND_SMALL_P(a, r); \ + } while (0) + +#define PERM_SMALL_Pf(a) do { \ + for (int r = 0; r < 9; r ++) { \ + ROUND_SMALL_P(a, r);} \ + ROUND_SMALL_Pf(a,9); \ + } while (0) + +#define PERM_SMALL_Q(a) do { \ + for (int r = 0; r < 10; r ++) \ + ROUND_SMALL_Q(a, r); \ + } while (0) + +)===" +
\ No newline at end of file diff --git a/xmrstak/backend/amd/amd_gpu/opencl/jh.cl b/xmrstak/backend/amd/amd_gpu/opencl/jh.cl new file mode 100644 index 0000000..fe70ea3 --- /dev/null +++ b/xmrstak/backend/amd/amd_gpu/opencl/jh.cl @@ -0,0 +1,274 @@ +R"===( +/* $Id: jh.c 255 2011-06-07 19:50:20Z tp $ */ +/* + * JH implementation. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2007-2010 Projet RNRT SAPHIR + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author Thomas Pornin <thomas.pornin@cryptolog.com> + */ + +#define SPH_JH_64 1 +#define SPH_LITTLE_ENDIAN 1 + +#define SPH_C32(x) x +#define SPH_C64(x) x +typedef uint sph_u32; +typedef ulong sph_u64; + +/* + * The internal bitslice representation may use either big-endian or + * little-endian (true bitslice operations do not care about the bit + * ordering, and the bit-swapping linear operations in JH happen to + * be invariant through endianness-swapping). The constants must be + * defined according to the chosen endianness; we use some + * byte-swapping macros for that. + */ + +#if SPH_LITTLE_ENDIAN + +#define C32e(x) ((SPH_C32(x) >> 24) \ + | ((SPH_C32(x) >> 8) & SPH_C32(0x0000FF00)) \ + | ((SPH_C32(x) << 8) & SPH_C32(0x00FF0000)) \ + | ((SPH_C32(x) << 24) & SPH_C32(0xFF000000))) +#define dec32e_aligned sph_dec32le_aligned +#define enc32e sph_enc32le + +#define C64e(x) ((SPH_C64(x) >> 56) \ + | ((SPH_C64(x) >> 40) & SPH_C64(0x000000000000FF00)) \ + | ((SPH_C64(x) >> 24) & SPH_C64(0x0000000000FF0000)) \ + | ((SPH_C64(x) >> 8) & SPH_C64(0x00000000FF000000)) \ + | ((SPH_C64(x) << 8) & SPH_C64(0x000000FF00000000)) \ + | ((SPH_C64(x) << 24) & SPH_C64(0x0000FF0000000000)) \ + | ((SPH_C64(x) << 40) & SPH_C64(0x00FF000000000000)) \ + | ((SPH_C64(x) << 56) & SPH_C64(0xFF00000000000000))) +#define dec64e_aligned sph_dec64le_aligned +#define enc64e sph_enc64le + +#else + +#define C32e(x) SPH_C32(x) +#define dec32e_aligned sph_dec32be_aligned +#define enc32e sph_enc32be +#define C64e(x) SPH_C64(x) +#define dec64e_aligned sph_dec64be_aligned +#define enc64e sph_enc64be + +#endif + +#define Sb(x0, x1, x2, x3, c) do { \ + x3 = ~x3; \ + x0 ^= (c) & ~x2; \ + tmp = (c) ^ (x0 & x1); \ + x0 ^= x2 & x3; \ + x3 ^= ~x1 & x2; \ + x1 ^= x0 & x2; \ + x2 ^= x0 & ~x3; \ + x0 ^= x1 | x3; \ + x3 ^= x1 & x2; \ + x1 ^= tmp & x0; \ + x2 ^= tmp; \ + } while (0) + +#define Lb(x0, x1, x2, x3, x4, x5, x6, x7) do { \ + x4 ^= x1; \ + x5 ^= x2; \ + x6 ^= x3 ^ x0; \ + x7 ^= x0; \ + x0 ^= x5; \ + x1 ^= x6; \ + x2 ^= x7 ^ x4; \ + x3 ^= x4; \ + } while (0) + +static const __constant ulong C[] = +{ + 0x67F815DFA2DED572UL, 0x571523B70A15847BUL, 0xF6875A4D90D6AB81UL, 0x402BD1C3C54F9F4EUL, + 0x9CFA455CE03A98EAUL, 0x9A99B26699D2C503UL, 0x8A53BBF2B4960266UL, 0x31A2DB881A1456B5UL, + 0xDB0E199A5C5AA303UL, 0x1044C1870AB23F40UL, 0x1D959E848019051CUL, 0xDCCDE75EADEB336FUL, + 0x416BBF029213BA10UL, 0xD027BBF7156578DCUL, 0x5078AA3739812C0AUL, 0xD3910041D2BF1A3FUL, + 0x907ECCF60D5A2D42UL, 0xCE97C0929C9F62DDUL, 0xAC442BC70BA75C18UL, 0x23FCC663D665DFD1UL, + 0x1AB8E09E036C6E97UL, 0xA8EC6C447E450521UL, 0xFA618E5DBB03F1EEUL, 0x97818394B29796FDUL, + 0x2F3003DB37858E4AUL, 0x956A9FFB2D8D672AUL, 0x6C69B8F88173FE8AUL, 0x14427FC04672C78AUL, + 0xC45EC7BD8F15F4C5UL, 0x80BB118FA76F4475UL, 0xBC88E4AEB775DE52UL, 0xF4A3A6981E00B882UL, + 0x1563A3A9338FF48EUL, 0x89F9B7D524565FAAUL, 0xFDE05A7C20EDF1B6UL, 0x362C42065AE9CA36UL, + 0x3D98FE4E433529CEUL, 0xA74B9A7374F93A53UL, 0x86814E6F591FF5D0UL, 0x9F5AD8AF81AD9D0EUL, + 0x6A6234EE670605A7UL, 0x2717B96EBE280B8BUL, 0x3F1080C626077447UL, 0x7B487EC66F7EA0E0UL, + 0xC0A4F84AA50A550DUL, 0x9EF18E979FE7E391UL, 0xD48D605081727686UL, 0x62B0E5F3415A9E7EUL, + 0x7A205440EC1F9FFCUL, 0x84C9F4CE001AE4E3UL, 0xD895FA9DF594D74FUL, 0xA554C324117E2E55UL, + 0x286EFEBD2872DF5BUL, 0xB2C4A50FE27FF578UL, 0x2ED349EEEF7C8905UL, 0x7F5928EB85937E44UL, + 0x4A3124B337695F70UL, 0x65E4D61DF128865EUL, 0xE720B95104771BC7UL, 0x8A87D423E843FE74UL, + 0xF2947692A3E8297DUL, 0xC1D9309B097ACBDDUL, 0xE01BDC5BFB301B1DUL, 0xBF829CF24F4924DAUL, + 0xFFBF70B431BAE7A4UL, 0x48BCF8DE0544320DUL, 0x39D3BB5332FCAE3BUL, 0xA08B29E0C1C39F45UL, + 0x0F09AEF7FD05C9E5UL, 0x34F1904212347094UL, 0x95ED44E301B771A2UL, 0x4A982F4F368E3BE9UL, + 0x15F66CA0631D4088UL, 0xFFAF52874B44C147UL, 0x30C60AE2F14ABB7EUL, 0xE68C6ECCC5B67046UL, + 0x00CA4FBD56A4D5A4UL, 0xAE183EC84B849DDAUL, 0xADD1643045CE5773UL, 0x67255C1468CEA6E8UL, + 0x16E10ECBF28CDAA3UL, 0x9A99949A5806E933UL, 0x7B846FC220B2601FUL, 0x1885D1A07FACCED1UL, + 0xD319DD8DA15B5932UL, 0x46B4A5AAC01C9A50UL, 0xBA6B04E467633D9FUL, 0x7EEE560BAB19CAF6UL, + 0x742128A9EA79B11FUL, 0xEE51363B35F7BDE9UL, 0x76D350755AAC571DUL, 0x01707DA3FEC2463AUL, + 0x42D8A498AFC135F7UL, 0x79676B9E20ECED78UL, 0xA8DB3AEA15638341UL, 0x832C83324D3BC3FAUL, + 0xF347271C1F3B40A7UL, 0x9A762DB734F04059UL, 0xFD4F21D26C4E3EE7UL, 0xEF5957DC398DFDB8UL, + 0xDAEB492B490C9B8DUL, 0x0D70F36849D7A25BUL, 0x84558D7AD0AE3B7DUL, 0x658EF8E4F0E9A5F5UL, + 0x533B1036F4A2B8A0UL, 0x5AEC3E759E07A80CUL, 0x4F88E85692946891UL, 0x4CBCBAF8555CB05BUL, + 0x7B9487F3993BBBE3UL, 0x5D1C6B72D6F4DA75UL, 0x6DB334DC28ACAE64UL, 0x71DB28B850A5346CUL, + 0x2A518D10F2E261F8UL, 0xFC75DD593364DBE3UL, 0xA23FCE43F1BCAC1CUL, 0xB043E8023CD1BB67UL, + 0x75A12988CA5B0A33UL, 0x5C5316B44D19347FUL, 0x1E4D790EC3943B92UL, 0x3FAFEEB6D7757479UL, + 0x21391ABEF7D4A8EAUL, 0x5127234C097EF45CUL, 0xD23C32BA5324A326UL, 0xADD5A66D4A17A344UL, + 0x08C9F2AFA63E1DB5UL, 0x563C6B91983D5983UL, 0x4D608672A17CF84CUL, 0xF6C76E08CC3EE246UL, + 0x5E76BCB1B333982FUL, 0x2AE6C4EFA566D62BUL, 0x36D4C1BEE8B6F406UL, 0x6321EFBC1582EE74UL, + 0x69C953F40D4EC1FDUL, 0x26585806C45A7DA7UL, 0x16FAE0061614C17EUL, 0x3F9D63283DAF907EUL, + 0x0CD29B00E3F2C9D2UL, 0x300CD4B730CEAA5FUL, 0x9832E0F216512A74UL, 0x9AF8CEE3D830EB0DUL, + 0x9279F1B57B9EC54BUL, 0xD36886046EE651FFUL, 0x316796E6574D239BUL, 0x05750A17F3A6E6CCUL, + 0xCE6C3213D98176B1UL, 0x62A205F88452173CUL, 0x47154778B3CB2BF4UL, 0x486A9323825446FFUL, + 0x65655E4E0758DF38UL, 0x8E5086FC897CFCF2UL, 0x86CA0BD0442E7031UL, 0x4E477830A20940F0UL, + 0x8338F7D139EEA065UL, 0xBD3A2CE437E95EF7UL, 0x6FF8130126B29721UL, 0xE7DE9FEFD1ED44A3UL, + 0xD992257615DFA08BUL, 0xBE42DC12F6F7853CUL, 0x7EB027AB7CECA7D8UL, 0xDEA83EAADA7D8D53UL, + 0xD86902BD93CE25AAUL, 0xF908731AFD43F65AUL, 0xA5194A17DAEF5FC0UL, 0x6A21FD4C33664D97UL, + 0x701541DB3198B435UL, 0x9B54CDEDBB0F1EEAUL, 0x72409751A163D09AUL, 0xE26F4791BF9D75F6UL +}; + +#define Ceven_hi(r) (C[((r) << 2) + 0]) +#define Ceven_lo(r) (C[((r) << 2) + 1]) +#define Codd_hi(r) (C[((r) << 2) + 2]) +#define Codd_lo(r) (C[((r) << 2) + 3]) + +#define S(x0, x1, x2, x3, cb, r) do { \ + Sb(x0 ## h, x1 ## h, x2 ## h, x3 ## h, cb ## hi(r)); \ + Sb(x0 ## l, x1 ## l, x2 ## l, x3 ## l, cb ## lo(r)); \ + } while (0) + +#define L(x0, x1, x2, x3, x4, x5, x6, x7) do { \ + Lb(x0 ## h, x1 ## h, x2 ## h, x3 ## h, \ + x4 ## h, x5 ## h, x6 ## h, x7 ## h); \ + Lb(x0 ## l, x1 ## l, x2 ## l, x3 ## l, \ + x4 ## l, x5 ## l, x6 ## l, x7 ## l); \ + } while (0) + +#define Wz(x, c, n) do { \ + sph_u64 t = (x ## h & (c)) << (n); \ + x ## h = ((x ## h >> (n)) & (c)) | t; \ + t = (x ## l & (c)) << (n); \ + x ## l = ((x ## l >> (n)) & (c)) | t; \ + } while (0) + +#define W0(x) Wz(x, SPH_C64(0x5555555555555555), 1) +#define W1(x) Wz(x, SPH_C64(0x3333333333333333), 2) +#define W2(x) Wz(x, SPH_C64(0x0F0F0F0F0F0F0F0F), 4) +#define W3(x) Wz(x, SPH_C64(0x00FF00FF00FF00FF), 8) +#define W4(x) Wz(x, SPH_C64(0x0000FFFF0000FFFF), 16) +#define W5(x) Wz(x, SPH_C64(0x00000000FFFFFFFF), 32) +#define W6(x) do { \ + sph_u64 t = x ## h; \ + x ## h = x ## l; \ + x ## l = t; \ + } while (0) + +#define SL(ro) SLu(r + ro, ro) + +#define SLu(r, ro) do { \ + S(h0, h2, h4, h6, Ceven_, r); \ + S(h1, h3, h5, h7, Codd_, r); \ + L(h0, h2, h4, h6, h1, h3, h5, h7); \ + W ## ro(h1); \ + W ## ro(h3); \ + W ## ro(h5); \ + W ## ro(h7); \ + } while (0) + +#if SPH_SMALL_FOOTPRINT_JH + +/* + * The "small footprint" 64-bit version just uses a partially unrolled + * loop. + */ + +#define E8 do { \ + unsigned r; \ + for (r = 0; r < 42; r += 7) { \ + SL(0); \ + SL(1); \ + SL(2); \ + SL(3); \ + SL(4); \ + SL(5); \ + SL(6); \ + } \ + } while (0) + +#else + +/* + * On a "true 64-bit" architecture, we can unroll at will. + */ + +#define E8 do { \ + SLu( 0, 0); \ + SLu( 1, 1); \ + SLu( 2, 2); \ + SLu( 3, 3); \ + SLu( 4, 4); \ + SLu( 5, 5); \ + SLu( 6, 6); \ + SLu( 7, 0); \ + SLu( 8, 1); \ + SLu( 9, 2); \ + SLu(10, 3); \ + SLu(11, 4); \ + SLu(12, 5); \ + SLu(13, 6); \ + SLu(14, 0); \ + SLu(15, 1); \ + SLu(16, 2); \ + SLu(17, 3); \ + SLu(18, 4); \ + SLu(19, 5); \ + SLu(20, 6); \ + SLu(21, 0); \ + SLu(22, 1); \ + SLu(23, 2); \ + SLu(24, 3); \ + SLu(25, 4); \ + SLu(26, 5); \ + SLu(27, 6); \ + SLu(28, 0); \ + SLu(29, 1); \ + SLu(30, 2); \ + SLu(31, 3); \ + SLu(32, 4); \ + SLu(33, 5); \ + SLu(34, 6); \ + SLu(35, 0); \ + SLu(36, 1); \ + SLu(37, 2); \ + SLu(38, 3); \ + SLu(39, 4); \ + SLu(40, 5); \ + SLu(41, 6); \ + } while (0) + +#endif + +)===" diff --git a/xmrstak/backend/amd/amd_gpu/opencl/wolf-aes.cl b/xmrstak/backend/amd/amd_gpu/opencl/wolf-aes.cl new file mode 100644 index 0000000..996944b --- /dev/null +++ b/xmrstak/backend/amd/amd_gpu/opencl/wolf-aes.cl @@ -0,0 +1,90 @@ +R"===( +#ifndef WOLF_AES_CL +#define WOLF_AES_CL + +// AES table - the other three are generated on the fly + +static const __constant uint AES0_C[256] = +{ + 0xA56363C6U, 0x847C7CF8U, 0x997777EEU, 0x8D7B7BF6U, + 0x0DF2F2FFU, 0xBD6B6BD6U, 0xB16F6FDEU, 0x54C5C591U, + 0x50303060U, 0x03010102U, 0xA96767CEU, 0x7D2B2B56U, + 0x19FEFEE7U, 0x62D7D7B5U, 0xE6ABAB4DU, 0x9A7676ECU, + 0x45CACA8FU, 0x9D82821FU, 0x40C9C989U, 0x877D7DFAU, + 0x15FAFAEFU, 0xEB5959B2U, 0xC947478EU, 0x0BF0F0FBU, + 0xECADAD41U, 0x67D4D4B3U, 0xFDA2A25FU, 0xEAAFAF45U, + 0xBF9C9C23U, 0xF7A4A453U, 0x967272E4U, 0x5BC0C09BU, + 0xC2B7B775U, 0x1CFDFDE1U, 0xAE93933DU, 0x6A26264CU, + 0x5A36366CU, 0x413F3F7EU, 0x02F7F7F5U, 0x4FCCCC83U, + 0x5C343468U, 0xF4A5A551U, 0x34E5E5D1U, 0x08F1F1F9U, + 0x937171E2U, 0x73D8D8ABU, 0x53313162U, 0x3F15152AU, + 0x0C040408U, 0x52C7C795U, 0x65232346U, 0x5EC3C39DU, + 0x28181830U, 0xA1969637U, 0x0F05050AU, 0xB59A9A2FU, + 0x0907070EU, 0x36121224U, 0x9B80801BU, 0x3DE2E2DFU, + 0x26EBEBCDU, 0x6927274EU, 0xCDB2B27FU, 0x9F7575EAU, + 0x1B090912U, 0x9E83831DU, 0x742C2C58U, 0x2E1A1A34U, + 0x2D1B1B36U, 0xB26E6EDCU, 0xEE5A5AB4U, 0xFBA0A05BU, + 0xF65252A4U, 0x4D3B3B76U, 0x61D6D6B7U, 0xCEB3B37DU, + 0x7B292952U, 0x3EE3E3DDU, 0x712F2F5EU, 0x97848413U, + 0xF55353A6U, 0x68D1D1B9U, 0x00000000U, 0x2CEDEDC1U, + 0x60202040U, 0x1FFCFCE3U, 0xC8B1B179U, 0xED5B5BB6U, + 0xBE6A6AD4U, 0x46CBCB8DU, 0xD9BEBE67U, 0x4B393972U, + 0xDE4A4A94U, 0xD44C4C98U, 0xE85858B0U, 0x4ACFCF85U, + 0x6BD0D0BBU, 0x2AEFEFC5U, 0xE5AAAA4FU, 0x16FBFBEDU, + 0xC5434386U, 0xD74D4D9AU, 0x55333366U, 0x94858511U, + 0xCF45458AU, 0x10F9F9E9U, 0x06020204U, 0x817F7FFEU, + 0xF05050A0U, 0x443C3C78U, 0xBA9F9F25U, 0xE3A8A84BU, + 0xF35151A2U, 0xFEA3A35DU, 0xC0404080U, 0x8A8F8F05U, + 0xAD92923FU, 0xBC9D9D21U, 0x48383870U, 0x04F5F5F1U, + 0xDFBCBC63U, 0xC1B6B677U, 0x75DADAAFU, 0x63212142U, + 0x30101020U, 0x1AFFFFE5U, 0x0EF3F3FDU, 0x6DD2D2BFU, + 0x4CCDCD81U, 0x140C0C18U, 0x35131326U, 0x2FECECC3U, + 0xE15F5FBEU, 0xA2979735U, 0xCC444488U, 0x3917172EU, + 0x57C4C493U, 0xF2A7A755U, 0x827E7EFCU, 0x473D3D7AU, + 0xAC6464C8U, 0xE75D5DBAU, 0x2B191932U, 0x957373E6U, + 0xA06060C0U, 0x98818119U, 0xD14F4F9EU, 0x7FDCDCA3U, + 0x66222244U, 0x7E2A2A54U, 0xAB90903BU, 0x8388880BU, + 0xCA46468CU, 0x29EEEEC7U, 0xD3B8B86BU, 0x3C141428U, + 0x79DEDEA7U, 0xE25E5EBCU, 0x1D0B0B16U, 0x76DBDBADU, + 0x3BE0E0DBU, 0x56323264U, 0x4E3A3A74U, 0x1E0A0A14U, + 0xDB494992U, 0x0A06060CU, 0x6C242448U, 0xE45C5CB8U, + 0x5DC2C29FU, 0x6ED3D3BDU, 0xEFACAC43U, 0xA66262C4U, + 0xA8919139U, 0xA4959531U, 0x37E4E4D3U, 0x8B7979F2U, + 0x32E7E7D5U, 0x43C8C88BU, 0x5937376EU, 0xB76D6DDAU, + 0x8C8D8D01U, 0x64D5D5B1U, 0xD24E4E9CU, 0xE0A9A949U, + 0xB46C6CD8U, 0xFA5656ACU, 0x07F4F4F3U, 0x25EAEACFU, + 0xAF6565CAU, 0x8E7A7AF4U, 0xE9AEAE47U, 0x18080810U, + 0xD5BABA6FU, 0x887878F0U, 0x6F25254AU, 0x722E2E5CU, + 0x241C1C38U, 0xF1A6A657U, 0xC7B4B473U, 0x51C6C697U, + 0x23E8E8CBU, 0x7CDDDDA1U, 0x9C7474E8U, 0x211F1F3EU, + 0xDD4B4B96U, 0xDCBDBD61U, 0x868B8B0DU, 0x858A8A0FU, + 0x907070E0U, 0x423E3E7CU, 0xC4B5B571U, 0xAA6666CCU, + 0xD8484890U, 0x05030306U, 0x01F6F6F7U, 0x120E0E1CU, + 0xA36161C2U, 0x5F35356AU, 0xF95757AEU, 0xD0B9B969U, + 0x91868617U, 0x58C1C199U, 0x271D1D3AU, 0xB99E9E27U, + 0x38E1E1D9U, 0x13F8F8EBU, 0xB398982BU, 0x33111122U, + 0xBB6969D2U, 0x70D9D9A9U, 0x898E8E07U, 0xA7949433U, + 0xB69B9B2DU, 0x221E1E3CU, 0x92878715U, 0x20E9E9C9U, + 0x49CECE87U, 0xFF5555AAU, 0x78282850U, 0x7ADFDFA5U, + 0x8F8C8C03U, 0xF8A1A159U, 0x80898909U, 0x170D0D1AU, + 0xDABFBF65U, 0x31E6E6D7U, 0xC6424284U, 0xB86868D0U, + 0xC3414182U, 0xB0999929U, 0x772D2D5AU, 0x110F0F1EU, + 0xCBB0B07BU, 0xFC5454A8U, 0xD6BBBB6DU, 0x3A16162CU +}; + +#define BYTE(x, y) (amd_bfe((x), (y) << 3U, 8U)) + +uint4 AES_Round(const __local uint *AES0, const __local uint *AES1, const __local uint *AES2, const __local uint *AES3, const uint4 X, const uint4 key) +{ + uint4 Y; + Y.s0 = AES0[BYTE(X.s0, 0)] ^ AES1[BYTE(X.s1, 1)] ^ AES2[BYTE(X.s2, 2)] ^ AES3[BYTE(X.s3, 3)]; + Y.s1 = AES0[BYTE(X.s1, 0)] ^ AES1[BYTE(X.s2, 1)] ^ AES2[BYTE(X.s3, 2)] ^ AES3[BYTE(X.s0, 3)]; + Y.s2 = AES0[BYTE(X.s2, 0)] ^ AES1[BYTE(X.s3, 1)] ^ AES2[BYTE(X.s0, 2)] ^ AES3[BYTE(X.s1, 3)]; + Y.s3 = AES0[BYTE(X.s3, 0)] ^ AES1[BYTE(X.s0, 1)] ^ AES2[BYTE(X.s1, 2)] ^ AES3[BYTE(X.s2, 3)]; + Y ^= key; + return(Y); +} + +#endif + +)===" diff --git a/xmrstak/backend/amd/amd_gpu/opencl/wolf-skein.cl b/xmrstak/backend/amd/amd_gpu/opencl/wolf-skein.cl new file mode 100644 index 0000000..868757b --- /dev/null +++ b/xmrstak/backend/amd/amd_gpu/opencl/wolf-skein.cl @@ -0,0 +1,114 @@ +R"===( +#ifndef WOLF_SKEIN_CL +#define WOLF_SKEIN_CL + +// Vectorized Skein implementation macros and functions by Wolf + +#define SKEIN_KS_PARITY 0x1BD11BDAA9FC1A22 + +static const __constant ulong SKEIN256_IV[8] = +{ + 0xCCD044A12FDB3E13UL, 0xE83590301A79A9EBUL, + 0x55AEA0614F816E6FUL, 0x2A2767A4AE9B94DBUL, + 0xEC06025E74DD7683UL, 0xE7A436CDC4746251UL, + 0xC36FBAF9393AD185UL, 0x3EEDBA1833EDFC13UL +}; + +static const __constant ulong SKEIN512_256_IV[8] = +{ + 0xCCD044A12FDB3E13UL, 0xE83590301A79A9EBUL, + 0x55AEA0614F816E6FUL, 0x2A2767A4AE9B94DBUL, + 0xEC06025E74DD7683UL, 0xE7A436CDC4746251UL, + 0xC36FBAF9393AD185UL, 0x3EEDBA1833EDFC13UL +}; + +#define SKEIN_INJECT_KEY(p, s) do { \ + p += h; \ + p.s5 += t[s % 3]; \ + p.s6 += t[(s + 1) % 3]; \ + p.s7 += s; \ +} while(0) + +ulong SKEIN_ROT(const uint2 x, const uint y) +{ + if(y < 32) return(as_ulong(amd_bitalign(x, x.s10, 32 - y))); + else return(as_ulong(amd_bitalign(x.s10, x, 32 - (y - 32)))); +} + +void SkeinMix8(ulong4 *pv0, ulong4 *pv1, const uint rc0, const uint rc1, const uint rc2, const uint rc3) +{ + *pv0 += *pv1; + (*pv1).s0 = SKEIN_ROT(as_uint2((*pv1).s0), rc0); + (*pv1).s1 = SKEIN_ROT(as_uint2((*pv1).s1), rc1); + (*pv1).s2 = SKEIN_ROT(as_uint2((*pv1).s2), rc2); + (*pv1).s3 = SKEIN_ROT(as_uint2((*pv1).s3), rc3); + *pv1 ^= *pv0; +} + +ulong8 SkeinEvenRound(ulong8 p, const ulong8 h, const ulong *t, const uint s) +{ + SKEIN_INJECT_KEY(p, s); + ulong4 pv0 = p.even, pv1 = p.odd; + + SkeinMix8(&pv0, &pv1, 46, 36, 19, 37); + pv0 = shuffle(pv0, (ulong4)(1, 2, 3, 0)); + pv1 = shuffle(pv1, (ulong4)(0, 3, 2, 1)); + + SkeinMix8(&pv0, &pv1, 33, 27, 14, 42); + pv0 = shuffle(pv0, (ulong4)(1, 2, 3, 0)); + pv1 = shuffle(pv1, (ulong4)(0, 3, 2, 1)); + + SkeinMix8(&pv0, &pv1, 17, 49, 36, 39); + pv0 = shuffle(pv0, (ulong4)(1, 2, 3, 0)); + pv1 = shuffle(pv1, (ulong4)(0, 3, 2, 1)); + + SkeinMix8(&pv0, &pv1, 44, 9, 54, 56); + return(shuffle2(pv0, pv1, (ulong8)(1, 4, 2, 7, 3, 6, 0, 5))); +} + +ulong8 SkeinOddRound(ulong8 p, const ulong8 h, const ulong *t, const uint s) +{ + SKEIN_INJECT_KEY(p, s); + ulong4 pv0 = p.even, pv1 = p.odd; + + SkeinMix8(&pv0, &pv1, 39, 30, 34, 24); + pv0 = shuffle(pv0, (ulong4)(1, 2, 3, 0)); + pv1 = shuffle(pv1, (ulong4)(0, 3, 2, 1)); + + SkeinMix8(&pv0, &pv1, 13, 50, 10, 17); + pv0 = shuffle(pv0, (ulong4)(1, 2, 3, 0)); + pv1 = shuffle(pv1, (ulong4)(0, 3, 2, 1)); + + SkeinMix8(&pv0, &pv1, 25, 29, 39, 43); + pv0 = shuffle(pv0, (ulong4)(1, 2, 3, 0)); + pv1 = shuffle(pv1, (ulong4)(0, 3, 2, 1)); + + SkeinMix8(&pv0, &pv1, 8, 35, 56, 22); + return(shuffle2(pv0, pv1, (ulong8)(1, 4, 2, 7, 3, 6, 0, 5))); +} + +ulong8 Skein512Block(ulong8 p, ulong8 h, ulong h8, const ulong *t) +{ + #pragma unroll + for(int i = 0; i < 18; ++i) + { + p = SkeinEvenRound(p, h, t, i); + ++i; + ulong tmp = h.s0; + h = shuffle(h, (ulong8)(1, 2, 3, 4, 5, 6, 7, 0)); + h.s7 = h8; + h8 = tmp; + p = SkeinOddRound(p, h, t, i); + tmp = h.s0; + h = shuffle(h, (ulong8)(1, 2, 3, 4, 5, 6, 7, 0)); + h.s7 = h8; + h8 = tmp; + } + + SKEIN_INJECT_KEY(p, 18); + return(p); +} + +#endif + +)===" diff --git a/xmrstak/backend/amd/autoAdjust.hpp b/xmrstak/backend/amd/autoAdjust.hpp new file mode 100644 index 0000000..442d8f1 --- /dev/null +++ b/xmrstak/backend/amd/autoAdjust.hpp @@ -0,0 +1,114 @@ + +#pragma once + +#include "autoAdjust.hpp" + + +#include "jconf.h" +#include "../../console.h" +#include "../../ConfigEditor.hpp" +#include "amd_gpu/gpu.h" +#include "../../Params.hpp" + +#include <vector> +#include <cstdio> +#include <sstream> +#include <string> +#include <iostream> +#include <algorithm> + +#if defined(__APPLE__) +#include <OpenCL/cl.h> +#else +#include <CL/cl.h> +#endif + + +namespace xmrstak +{ +namespace amd +{ + +class autoAdjust +{ +public: + + autoAdjust() + { + + } + + /** print the adjusted values if needed + * + * Routine exit the application and print the adjusted values if needed else + * nothing is happened. + */ + bool printConfig() + { + + int platformIndex = getAMDPlatformIdx(); + + if(platformIndex == -1) + { + printer::inst()->print_msg(L0,"WARNING: No AMD OpenCL platform found. Possible driver issues or wrong vendor driver."); + return false; + } + + devVec = getAMDDevices(0); + + + int deviceCount = devVec.size(); + + if(deviceCount == 0) + return false; + + + generateThreadConfig(platformIndex); + return true; + + } + +private: + + void generateThreadConfig(const int platformIndex) + { + // load the template of the backend config into a char variable + const char *tpl = + #include "./config.tpl" + ; + + ConfigEditor configTpl{}; + configTpl.set( std::string(tpl) ); + + std::string conf; + int i = 0; + for(auto& ctx : devVec) + { + // keep 64MiB memory free (value is randomly chosen) + size_t availableMem = ctx.freeMem - (64u * 1024 * 1024); + // 224byte extra memory is used per thread for meta data + size_t perThread = (size_t(1u)<<21) + 224u; + size_t max_intensity = availableMem / perThread; + // 1000 is a magic selected limit \todo select max intensity depending of the gpu type + size_t intensity = std::min( size_t(1000u) , max_intensity ); + conf += std::string(" // gpu: ") + ctx.name + "\n"; + // set 8 threads per block (this is a good value for the most gpus) + conf += std::string(" { \"index\" : ") + std::to_string(ctx.deviceIdx) + ",\n" + + " \"intensity\" : " + std::to_string(intensity) + ", \"worksize\" : " + std::to_string(8) + ",\n" + + " \"affine_to_cpu\" : false, \n" + " },\n"; + ++i; + } + + configTpl.replace("PLATFORMINDEX",std::to_string(platformIndex)); + configTpl.replace("NUMGPUS",std::to_string(devVec.size())); + configTpl.replace("GPUCONFIG",conf); + configTpl.write(Params::inst().configFileAMD); + printer::inst()->print_msg(L0, "AMD: GPU configuration stored in file '%s'", Params::inst().configFileAMD.c_str()); + } + + std::vector<GpuContext> devVec; +}; + +} // namespace amd +} // namepsace xmrstak diff --git a/xmrstak/backend/amd/config.tpl b/xmrstak/backend/amd/config.tpl new file mode 100644 index 0000000..b8b6dc4 --- /dev/null +++ b/xmrstak/backend/amd/config.tpl @@ -0,0 +1,29 @@ +R"===( + +/* + * Number of GPUs that you have in your system. Each GPU will get its own CPU thread. + */ +"gpu_thread_num" : NUMGPUS, + +/* + * GPU configuration. You should play around with intensity and worksize as the fastest settings will vary. + * index - GPU index number usually starts from 0 + * intensity - Number of parallel GPU threads (nothing to do with CPU threads) + * worksize - Number of local GPU threads (nothing to do with CPU threads) + * affine_to_cpu - This will affine the thread to a CPU. This can make a GPU miner play along nicer with a CPU miner. + * "gpu_threads_conf" : + * [ + * { "index" : 0, "intensity" : 1000, "worksize" : 8, "affine_to_cpu" : false }, + * ], + */ + +"gpu_threads_conf" : [ +GPUCONFIG +], + +/* + * Platform index. This will be 0 unless you have different OpenCL platform - eg. AMD and Intel. + */ +"platform_index" : PLATFORMINDEX, + +)===" diff --git a/xmrstak/backend/amd/jconf.cpp b/xmrstak/backend/amd/jconf.cpp new file mode 100644 index 0000000..f8a551e --- /dev/null +++ b/xmrstak/backend/amd/jconf.cpp @@ -0,0 +1,261 @@ +/* + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * Additional permission under GNU GPL version 3 section 7 + * + * If you modify this Program, or any covered work, by linking or combining + * it with OpenSSL (or a modified version of that library), containing parts + * covered by the terms of OpenSSL License and SSLeay License, the licensors + * of this Program grant you additional permission to convey the resulting work. + * + */ + + +#include "jconf.h" + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#ifdef _WIN32 +#define strcasecmp _stricmp +#include <intrin.h> +#else +#include <cpuid.h> +#endif + +#include "../../rapidjson/document.h" +#include "../../rapidjson/error/en.h" +#include "../../jext.h" +#include "../../console.h" + +namespace xmrstak +{ +namespace amd +{ + +using namespace rapidjson; + +/* + * This enum needs to match index in oConfigValues, otherwise we will get a runtime error + */ +enum configEnum { iGpuThreadNum, aGpuThreadsConf, iPlatformIdx }; + +struct configVal { + configEnum iName; + const char* sName; + Type iType; +}; + +//Same order as in configEnum, as per comment above +configVal oConfigValues[] = { + { iGpuThreadNum, "gpu_thread_num", kNumberType }, + { aGpuThreadsConf, "gpu_threads_conf", kArrayType }, + { iPlatformIdx, "platform_index", kNumberType } +}; + +constexpr size_t iConfigCnt = (sizeof(oConfigValues)/sizeof(oConfigValues[0])); + +inline bool checkType(Type have, Type want) +{ + if(want == have) + return true; + else if(want == kTrueType && have == kFalseType) + return true; + else if(want == kFalseType && have == kTrueType) + return true; + else + return false; +} + +struct jconf::opaque_private +{ + Document jsonDoc; + const Value* configValues[iConfigCnt]; //Compile time constant + + opaque_private() + { + } +}; + +jconf* jconf::oInst = nullptr; + +jconf::jconf() +{ + prv = new opaque_private(); +} + +bool jconf::GetThreadConfig(size_t id, thd_cfg &cfg) +{ + if(id >= prv->configValues[aGpuThreadsConf]->Size()) + return false; + + const Value& oThdConf = prv->configValues[aGpuThreadsConf]->GetArray()[id]; + + if(!oThdConf.IsObject()) + return false; + + const Value *idx, *intensity, *w_size, *aff; + idx = GetObjectMember(oThdConf, "index"); + intensity = GetObjectMember(oThdConf, "intensity"); + w_size = GetObjectMember(oThdConf, "worksize"); + aff = GetObjectMember(oThdConf, "affine_to_cpu"); + + if(idx == nullptr || intensity == nullptr || w_size == nullptr || aff == nullptr) + return false; + + if(!idx->IsUint64() || !intensity->IsUint64() || !w_size->IsUint64()) + return false; + + if(!aff->IsUint64() && !aff->IsBool()) + return false; + + cfg.index = idx->GetUint64(); + cfg.intensity = intensity->GetUint64(); + cfg.w_size = w_size->GetUint64(); + + if(aff->IsNumber()) + cfg.cpu_aff = aff->GetInt64(); + else + cfg.cpu_aff = -1; + + return true; +} + +size_t jconf::GetPlatformIdx() +{ + return prv->configValues[iPlatformIdx]->GetUint64(); +} + +size_t jconf::GetThreadCount() +{ + return prv->configValues[aGpuThreadsConf]->Size(); +} + +bool jconf::parse_config(const char* sFilename) +{ + FILE * pFile; + char * buffer; + size_t flen; + + pFile = fopen(sFilename, "rb"); + if (pFile == NULL) + { + printer::inst()->print_msg(L0, "Failed to open config file %s.", sFilename); + return false; + } + + fseek(pFile,0,SEEK_END); + flen = ftell(pFile); + rewind(pFile); + + if(flen >= 64*1024) + { + fclose(pFile); + printer::inst()->print_msg(L0, "Oversized config file - %s.", sFilename); + return false; + } + + if(flen <= 16) + { + printer::inst()->print_msg(L0, "File is empty or too short - %s.", sFilename); + return false; + } + + buffer = (char*)malloc(flen + 3); + if(fread(buffer+1, flen, 1, pFile) != 1) + { + free(buffer); + fclose(pFile); + printer::inst()->print_msg(L0, "Read error while reading %s.", sFilename); + return false; + } + fclose(pFile); + + //Replace Unicode BOM with spaces - we always use UTF-8 + unsigned char* ubuffer = (unsigned char*)buffer; + if(ubuffer[1] == 0xEF && ubuffer[2] == 0xBB && ubuffer[3] == 0xBF) + { + buffer[1] = ' '; + buffer[2] = ' '; + buffer[3] = ' '; + } + + buffer[0] = '{'; + buffer[flen] = '}'; + buffer[flen + 1] = '\0'; + + prv->jsonDoc.Parse<kParseCommentsFlag|kParseTrailingCommasFlag>(buffer, flen+2); + free(buffer); + + if(prv->jsonDoc.HasParseError()) + { + printer::inst()->print_msg(L0, "JSON config parse error(offset %llu): %s", + int_port(prv->jsonDoc.GetErrorOffset()), GetParseError_En(prv->jsonDoc.GetParseError())); + return false; + } + + + if(!prv->jsonDoc.IsObject()) + { //This should never happen as we created the root ourselves + printer::inst()->print_msg(L0, "Invalid config file. No root?\n"); + return false; + } + + for(size_t i = 0; i < iConfigCnt; i++) + { + if(oConfigValues[i].iName != i) + { + printer::inst()->print_msg(L0, "Code error. oConfigValues are not in order."); + return false; + } + + prv->configValues[i] = GetObjectMember(prv->jsonDoc, oConfigValues[i].sName); + + if(prv->configValues[i] == nullptr) + { + printer::inst()->print_msg(L0, "Invalid config file. Missing value \"%s\".", oConfigValues[i].sName); + return false; + } + + if(!checkType(prv->configValues[i]->GetType(), oConfigValues[i].iType)) + { + printer::inst()->print_msg(L0, "Invalid config file. Value \"%s\" has unexpected type.", oConfigValues[i].sName); + return false; + } + } + + size_t n_thd = prv->configValues[aGpuThreadsConf]->Size(); + if(prv->configValues[iGpuThreadNum]->GetUint64() != n_thd) + { + printer::inst()->print_msg(L0, + "Invalid config file. Your GPU config array has %llu members, while you want to use %llu threads.", + int_port(n_thd), int_port(prv->configValues[iGpuThreadNum]->GetUint64())); + return false; + } + + thd_cfg c; + for(size_t i=0; i < n_thd; i++) + { + if(!GetThreadConfig(i, c)) + { + printer::inst()->print_msg(L0, "Thread %llu has invalid config.", int_port(i)); + return false; + } + } + +} + +} // namespace amd +} // namespace xmrstak diff --git a/xmrstak/backend/amd/jconf.hpp b/xmrstak/backend/amd/jconf.hpp new file mode 100644 index 0000000..9fef331 --- /dev/null +++ b/xmrstak/backend/amd/jconf.hpp @@ -0,0 +1,44 @@ +#pragma once +#include <stdlib.h> +#include <string> +#include "../../Params.hpp" + +namespace xmrstak +{ +namespace amd +{ + +class jconf +{ +public: + static jconf* inst() + { + if (oInst == nullptr) oInst = new jconf; + return oInst; + }; + + bool parse_config(const char* sFilename = Params::inst().configFileAMD.c_str()); + + struct thd_cfg { + size_t index; + size_t intensity; + size_t w_size; + long long cpu_aff; + }; + + size_t GetThreadCount(); + bool GetThreadConfig(size_t id, thd_cfg &cfg); + + size_t GetPlatformIdx(); + +private: + jconf(); + static jconf* oInst; + + struct opaque_private; + opaque_private* prv; + +}; + +} // namespace amd +} // namespace xmrstak diff --git a/xmrstak/backend/amd/minethd.cpp b/xmrstak/backend/amd/minethd.cpp new file mode 100644 index 0000000..f2f5ff4 --- /dev/null +++ b/xmrstak/backend/amd/minethd.cpp @@ -0,0 +1,237 @@ +/* + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * Additional permission under GNU GPL version 3 section 7 + * + * If you modify this Program, or any covered work, by linking or combining + * it with OpenSSL (or a modified version of that library), containing parts + * covered by the terms of OpenSSL License and SSLeay License, the licensors + * of this Program grant you additional permission to convey the resulting work. + * + */ + +#include <assert.h> +#include <cmath> +#include <chrono> +#include <thread> + +#include "../../ConfigEditor.hpp" +#include "autoAdjust.hpp" + +#include <vector> +#include "../../console.h" +#include "../../crypto/cryptonight_aesni.h" +#include "../cpu/minethd.h" +#include "../../jconf.h" + +#include "../../executor.h" +#include "minethd.h" +#include "../../jconf.h" +#include "../../crypto/cryptonight.h" +#include "../../Environment.hpp" +#include "../../Params.hpp" +#include "amd_gpu/gpu.h" + + +namespace xmrstak +{ +namespace amd +{ + +minethd::minethd(miner_work& pWork, size_t iNo, GpuContext* ctx) +{ + oWork = pWork; + bQuit = 0; + iThreadNo = (uint8_t)iNo; + iJobNo = 0; + iHashCount = 0; + iTimestamp = 0; + pGpuCtx = ctx; + + oWorkThd = std::thread(&minethd::work_main, this); +} + +extern "C" { +#ifdef WIN32 +__declspec(dllexport) +#endif +std::vector<IBackend*>* xmrstak_start_backend(uint32_t threadOffset, miner_work& pWork, Environment& env) +{ + Environment::inst() = env; + return amd::minethd::thread_starter(threadOffset, pWork); +} +} // extern "C" + +bool minethd::init_gpus() +{ + size_t i, n = jconf::inst()->GetThreadCount(); + + printer::inst()->print_msg(L1, "Compiling code and initializing GPUs. This will take a while..."); + vGpuData.resize(n); + + jconf::thd_cfg cfg; + for(i = 0; i < n; i++) + { + jconf::inst()->GetThreadConfig(i, cfg); + vGpuData[i].deviceIdx = cfg.index; + vGpuData[i].rawIntensity = cfg.intensity; + vGpuData[i].workSize = cfg.w_size; + } + + return InitOpenCL(vGpuData.data(), n, jconf::inst()->GetPlatformIdx()) == ERR_SUCCESS; +} + +std::vector<GpuContext> minethd::vGpuData; + +std::vector<IBackend*>* minethd::thread_starter(uint32_t threadOffset, miner_work& pWork) +{ + std::vector<IBackend*>* pvThreads = new std::vector<IBackend*>(); + + if(!ConfigEditor::file_exist(Params::inst().configFileAMD)) + { + autoAdjust adjust; + if(!adjust.printConfig()) + return pvThreads; + } + + if(!jconf::inst()->parse_config()) + { + win_exit(); + } + + // \ todo get device count and exit if no opencl device + + if(!init_gpus()) + { + printer::inst()->print_msg(L1, "WARNING: AMD device not found"); + return pvThreads; + } + + size_t i, n = jconf::inst()->GetThreadCount(); + pvThreads->reserve(n); + + jconf::thd_cfg cfg; + for (i = 0; i < n; i++) + { + jconf::inst()->GetThreadConfig(i, cfg); + minethd* thd = new minethd(pWork, i + threadOffset, &vGpuData[i]); + + if(cfg.cpu_aff >= 0) + { +#if defined(__APPLE__) + printer::inst()->print_msg(L1, "WARNING on MacOS thread affinity is only advisory."); +#endif + cpu::minethd::thd_setaffinity(thd->oWorkThd.native_handle(), cfg.cpu_aff); + } + + pvThreads->push_back(thd); + if(cfg.cpu_aff >= 0) + printer::inst()->print_msg(L1, "Starting GPU thread, affinity: %d.", (int)cfg.cpu_aff); + else + printer::inst()->print_msg(L1, "Starting GPU thread, no affinity."); + } + + return pvThreads; +} + +void minethd::switch_work(miner_work& pWork) +{ + // iConsumeCnt is a basic lock-like polling mechanism just in case we happen to push work + // faster than threads can consume them. This should never happen in real life. + // Pool cant physically send jobs faster than every 250ms or so due to net latency. + + while (GlobalStates::inst().iConsumeCnt.load(std::memory_order_seq_cst) < GlobalStates::inst().iThreadCount) + std::this_thread::sleep_for(std::chrono::milliseconds(100)); + + GlobalStates::inst().oGlobalWork = pWork; + GlobalStates::inst().iConsumeCnt.store(0, std::memory_order_seq_cst); + GlobalStates::inst().iGlobalJobNo++; +} + +void minethd::consume_work() +{ + memcpy(&oWork, &GlobalStates::inst().oGlobalWork, sizeof(miner_work)); + iJobNo++; + GlobalStates::inst().iConsumeCnt++; + +} + +void minethd::work_main() +{ + uint64_t iCount = 0; + + cryptonight_ctx* cpu_ctx; + cpu_ctx = cpu::minethd::minethd_alloc_ctx(); + cn_hash_fun hash_fun = cpu::minethd::func_selector(::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/); + + GlobalStates::inst().iConsumeCnt++; + + while (bQuit == 0) + { + if (oWork.bStall) + { + /* We are stalled here because the executor didn't find a job for us yet, + either because of network latency, or a socket problem. Since we are + raison d'etre of this software it us sensible to just wait until we have something*/ + + while (GlobalStates::inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo) + std::this_thread::sleep_for(std::chrono::milliseconds(100)); + + consume_work(); + continue; + } + + assert(sizeof(job_result::sJobID) == sizeof(pool_job::sJobID)); + pGpuCtx->Nonce = calc_start_nonce(oWork.iResumeCnt); + uint32_t target = oWork.iTarget32; + XMRSetJob(pGpuCtx, oWork.bWorkBlob, oWork.iWorkSize, target); + + while(GlobalStates::inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo) + { + cl_uint results[0x100]; + memset(results,0,sizeof(cl_uint)*(0x100)); + + XMRRunJob(pGpuCtx, results); + + for(size_t i = 0; i < results[0xFF]; i++) + { + uint8_t bWorkBlob[112]; + uint8_t bResult[32]; + + memcpy(bWorkBlob, oWork.bWorkBlob, oWork.iWorkSize); + memset(bResult, 0, sizeof(job_result::bResult)); + + *(uint32_t*)(bWorkBlob + 39) = results[i]; + + hash_fun(bWorkBlob, oWork.iWorkSize, bResult, cpu_ctx); + if ( (*((uint64_t*)(bResult + 24))) < oWork.iTarget) + executor::inst()->push_event(ex_event(job_result(oWork.sJobID, results[i], bResult), oWork.iPoolId)); + else + executor::inst()->log_result_error("AMD Invalid Result"); + } + + iCount += pGpuCtx->rawIntensity; + using namespace std::chrono; + uint64_t iStamp = time_point_cast<milliseconds>(high_resolution_clock::now()).time_since_epoch().count(); + iHashCount.store(iCount, std::memory_order_relaxed); + iTimestamp.store(iStamp, std::memory_order_relaxed); + std::this_thread::yield(); + } + + consume_work(); + } +} + +} // namespace amd +} // namespace xmrstak diff --git a/xmrstak/backend/amd/minethd.hpp b/xmrstak/backend/amd/minethd.hpp new file mode 100644 index 0000000..4fb3b13 --- /dev/null +++ b/xmrstak/backend/amd/minethd.hpp @@ -0,0 +1,61 @@ +#pragma once +#include <thread> +#include <atomic> +#include "./jconf.h" +#include "../IBackend.hpp" +#include "../../Environment.hpp" + +#include "amd_gpu/gpu.h" + +namespace xmrstak +{ +namespace amd +{ + +class minethd : public IBackend +{ +public: + + static void switch_work(miner_work& pWork); + static std::vector<IBackend*>* thread_starter(uint32_t threadOffset, miner_work& pWork); + static bool init_gpus(); + +private: + typedef void (*cn_hash_fun)(const void*, size_t, void*, cryptonight_ctx*); + + minethd(miner_work& pWork, size_t iNo, GpuContext* ctx); + + // We use the top 8 bits of the nonce for thread and resume + // This allows us to resume up to 64 threads 4 times before + // we get nonce collisions + // Bottom 24 bits allow for an hour of work at 4000 H/s + inline uint32_t calc_start_nonce(uint32_t resume) + { + return reverseBits<uint32_t>(static_cast<uint32_t>(iThreadNo + GlobalStates::inst().iThreadCount * resume)); + } + + void work_main(); + void double_work_main(); + void consume_work(); + + uint64_t iJobNo; + + static miner_work oGlobalWork; + miner_work oWork; + + std::thread oWorkThd; + uint8_t iThreadNo; + + bool bQuit; + bool bNoPrefetch; + + //Mutable ptr to vector below, different for each thread + GpuContext* pGpuCtx; + + // WARNING - this vector (but not its contents) must be immutable + // once the threads are started + static std::vector<GpuContext> vGpuData; +}; + +} // namespace amd +} // namespace xmrstak diff --git a/xmrstak/backend/backendConnector.cpp b/xmrstak/backend/backendConnector.cpp new file mode 100644 index 0000000..1013f79 --- /dev/null +++ b/xmrstak/backend/backendConnector.cpp @@ -0,0 +1,104 @@ +/* + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * Additional permission under GNU GPL version 3 section 7 + * + * If you modify this Program, or any covered work, by linking or combining + * it with OpenSSL (or a modified version of that library), containing parts + * covered by the terms of OpenSSL License and SSLeay License, the licensors + * of this Program grant you additional permission to convey the resulting work. + * + */ + +#include <assert.h> +#include <cmath> +#include <chrono> +#include <cstring> +#include <thread> +#include <bitset> + +#include "IBackend.hpp" +#include "BackendConnector.hpp" + +#include "cpu/minethd.h" +#ifndef CONF_NO_CUDA +# include "nvidia/minethd.h" +#endif +#ifndef CONF_NO_OPENCL +# include "amd/minethd.h" +#endif +#include "miner_work.h" +#include "GlobalStates.hpp" + #include <cstdlib> + +#include "Plugin.hpp" +#include "../Environment.hpp" +#include "../console.h" +#include "../Params.hpp" + +namespace xmrstak +{ + + +bool BackendConnector::self_test() +{ + + return true; +} + +std::vector<IBackend*>* BackendConnector::thread_starter(miner_work& pWork) +{ + GlobalStates::inst().iGlobalJobNo = 0; + GlobalStates::inst().iConsumeCnt = 0; + + + std::vector<IBackend*>* pvThreads = new std::vector<IBackend*>; + +#ifndef CONF_NO_CUDA + if(Params::inst().useNVIDIA) + { + Plugin nvidiaPlugin("NVIDIA", "xmrstak_cuda_backend"); + std::vector<IBackend*>* nvidiaThreads = nvidiaPlugin.startBackend(static_cast<uint32_t>(pvThreads->size()), pWork, Environment::inst()); + pvThreads->insert(std::end(*pvThreads), std::begin(*nvidiaThreads), std::end(*nvidiaThreads)); + if(nvidiaThreads->size() == 0) + printer::inst()->print_msg(L0, "WARNING: backend NVIDIA disabled."); + } +#endif + +#ifndef CONF_NO_OPENCL + if(Params::inst().useAMD) + { + Plugin amdPlugin("AMD", "xmrstak_opencl_backend"); + std::vector<IBackend*>* amdThreads = amdPlugin.startBackend(static_cast<uint32_t>(pvThreads->size()), pWork, Environment::inst()); + pvThreads->insert(std::end(*pvThreads), std::begin(*amdThreads), std::end(*amdThreads)); + if(amdThreads->size() == 0) + printer::inst()->print_msg(L0, "WARNING: backend AMD disabled."); + } +#endif + +#ifndef CONF_NO_CPU + if(Params::inst().useCPU) + { + auto cpuThreads = cpu::minethd::thread_starter(static_cast<uint32_t>(pvThreads->size()), pWork); + pvThreads->insert(std::end(*pvThreads), std::begin(cpuThreads), std::end(cpuThreads)); + if(cpuThreads.size() == 0) + printer::inst()->print_msg(L0, "WARNING: backend CPU disabled."); + } +#endif + + GlobalStates::inst().iThreadCount = pvThreads->size(); + return pvThreads; +} + +} // namepsace xmrstak diff --git a/xmrstak/backend/backendConnector.hpp b/xmrstak/backend/backendConnector.hpp new file mode 100644 index 0000000..024d9b1 --- /dev/null +++ b/xmrstak/backend/backendConnector.hpp @@ -0,0 +1,18 @@ +#pragma once +#include <thread> +#include <vector> +#include <atomic> +#include <mutex> +#include "IBackend.hpp" +#include "miner_work.h" + +namespace xmrstak +{ + + struct BackendConnector + { + static std::vector<IBackend*>* thread_starter(miner_work& pWork); + static bool self_test(); + }; + +} // namepsace xmrstak diff --git a/xmrstak/backend/cpu/autoAdjust.hpp b/xmrstak/backend/cpu/autoAdjust.hpp new file mode 100644 index 0000000..32c8576 --- /dev/null +++ b/xmrstak/backend/cpu/autoAdjust.hpp @@ -0,0 +1,172 @@ +#pragma once +#include "jconf.h" +#include "../../console.h" +#include "../../jconf.h" +#include "../../ConfigEditor.hpp" +#include "../../Params.hpp" +#include <string> + +#ifdef _WIN32 +#include <windows.h> +#else +#include <unistd.h> +#endif // _WIN32 + + +namespace xmrstak +{ +namespace cpu +{ +// Mask bits between h and l and return the value +// This enables us to put in values exactly like in the manual +// For example EBX[31:22] is get_masked(cpu_info[1], 31, 22) +inline int32_t get_masked(int32_t val, int32_t h, int32_t l) +{ + val &= (0x7FFFFFFF >> (31-(h-l))) << l; + return val >> l; +} + +class autoAdjust +{ +public: + + autoAdjust() + { + } + + bool printConfig() + { + + ConfigEditor configTpl{}; + + // load the template of the backend config into a char variable + const char *tpl = + #include "./config.tpl" + ; + configTpl.set( std::string(tpl) ); + + std::string conf; + + if(!detectL3Size() || L3KB_size < 1024 || L3KB_size > 102400) + { + if(L3KB_size < 1024 || L3KB_size > 102400) + printer::inst()->print_msg(L0, "Autoconf failed: L3 size sanity check failed - %u KB.", L3KB_size); + + conf += std::string(" { \"low_power_mode\" : false, \"no_prefetch\" : true, \"affine_to_cpu\" : false },\n"); + printer::inst()->print_msg(L0, "Autoconf FAILED. Create config for a single thread. Please try to add new ones until the hashrate slows down."); + } + else + { + printer::inst()->print_msg(L0, "Autoconf L3 size detected at %u KB.", L3KB_size); + + detectCPUConf(); + + printer::inst()->print_msg(L0, "Autoconf core count detected as %u on %s.", corecnt, + linux_layout ? "Linux" : "Windows"); + + uint32_t aff_id = 0; + for(uint32_t i=0; i < corecnt; i++) + { + bool double_mode; + + if(L3KB_size <= 0) + break; + + double_mode = L3KB_size / 2048 > (int32_t)(corecnt-i); + + conf += std::string(" { \"low_power_mode\" : "); + conf += std::string(double_mode ? "true" : "false"); + conf += std::string(", \"no_prefetch\" : true, \"affine_to_cpu\" : "); + conf += std::to_string(aff_id); + conf += std::string(" },\n"); + + if(!linux_layout || old_amd) + { + aff_id += 2; + + if(aff_id >= corecnt) + aff_id = 1; + } + else + aff_id++; + + if(double_mode) + L3KB_size -= 4096; + else + L3KB_size -= 2048; + } + } + + configTpl.replace("CPUCONFIG",conf); + configTpl.write(Params::inst().configFileCPU); + printer::inst()->print_msg(L0, "CPU configuration stored in file '%s'", Params::inst().configFileCPU.c_str()); + + return true; + } + +private: + bool detectL3Size() + { + int32_t cpu_info[4]; + char cpustr[13] = {0}; + + ::jconf::cpuid(0, 0, cpu_info); + memcpy(cpustr, &cpu_info[1], 4); + memcpy(cpustr+4, &cpu_info[3], 4); + memcpy(cpustr+8, &cpu_info[2], 4); + + if(strcmp(cpustr, "GenuineIntel") == 0) + { + ::jconf::cpuid(4, 3, cpu_info); + + if(get_masked(cpu_info[0], 7, 5) != 3) + { + printer::inst()->print_msg(L0, "Autoconf failed: Couln't find L3 cache page."); + return false; + } + + L3KB_size = ((get_masked(cpu_info[1], 31, 22) + 1) * (get_masked(cpu_info[1], 21, 12) + 1) * + (get_masked(cpu_info[1], 11, 0) + 1) * (cpu_info[2] + 1)) / 1024; + + return true; + } + else if(strcmp(cpustr, "AuthenticAMD") == 0) + { + ::jconf::cpuid(0x80000006, 0, cpu_info); + + L3KB_size = get_masked(cpu_info[3], 31, 18) * 512; + + ::jconf::cpuid(1, 0, cpu_info); + if(get_masked(cpu_info[0], 11, 8) < 0x17) //0x17h is Zen + old_amd = true; + + return true; + } + else + { + printer::inst()->print_msg(L0, "Autoconf failed: Unknown CPU type: %s.", cpustr); + return false; + } + } + + void detectCPUConf() + { +#ifdef _WIN32 + SYSTEM_INFO info; + GetSystemInfo(&info); + corecnt = info.dwNumberOfProcessors; + linux_layout = false; +#else + corecnt = sysconf(_SC_NPROCESSORS_ONLN); + linux_layout = true; +#endif // _WIN32 + } + + int32_t L3KB_size = 0; + uint32_t corecnt; + bool old_amd = false; + bool linux_layout; +}; + +} // namespace cpu +} // namepsace xmrstak diff --git a/xmrstak/backend/cpu/autoAdjustHwloc.hpp b/xmrstak/backend/cpu/autoAdjustHwloc.hpp new file mode 100644 index 0000000..e1916e0 --- /dev/null +++ b/xmrstak/backend/cpu/autoAdjustHwloc.hpp @@ -0,0 +1,210 @@ +#pragma once + +#include "../../console.h" +#include <hwloc.h> +#include <stdio.h> +#include "../../Params.hpp" + +#ifdef _WIN32 +#include <windows.h> +#else +#include <unistd.h> +#endif // _WIN32 + +#include <string> +#include "../../ConfigEditor.hpp" + +namespace xmrstak +{ +namespace cpu +{ + +class autoAdjust +{ +public: + + autoAdjust() + { + } + + bool printConfig() + { + + hwloc_topology_t topology; + hwloc_topology_init(&topology); + hwloc_topology_load(topology); + + std::string conf; + ConfigEditor configTpl{}; + + // load the template of the backend config into a char variable + const char *tpl = + #include "./config.tpl" + ; + configTpl.set( std::string(tpl) ); + + try + { + std::vector<hwloc_obj_t> tlcs; + tlcs.reserve(16); + results.reserve(16); + + findChildrenCaches(hwloc_get_root_obj(topology), + [&tlcs](hwloc_obj_t found) { tlcs.emplace_back(found); } ); + + if(tlcs.size() == 0) + throw(std::runtime_error("The CPU doesn't seem to have a cache.")); + + for(hwloc_obj_t obj : tlcs) + proccessTopLevelCache(obj); + + for(uint32_t id : results) + { + conf += std::string(" { \"low_power_mode\" : "); + conf += std::string((id & 0x8000000) != 0 ? "true" : "false"); + conf += std::string(", \"no_prefetch\" : true, \"affine_to_cpu\" : "); + conf += std::to_string(id & 0x7FFFFFF); + conf += std::string(" },\n"); + } + } + catch(const std::runtime_error& err) + { + // \todo add fallback to default auto adjust + conf += std::string(" { \"low_power_mode\" : false, \"no_prefetch\" : true, \"affine_to_cpu\" : false },\n"); + printer::inst()->print_msg(L0, "Autoconf FAILED: %s. Create config for a single thread.", err.what()); + } + + configTpl.replace("CPUCONFIG",conf); + configTpl.write(Params::inst().configFileCPU); + printer::inst()->print_msg(L0, "CPU configuration stored in file '%s'", Params::inst().configFileCPU.c_str()); + /* Destroy topology object. */ + hwloc_topology_destroy(topology); + + return true; + } + +private: + static constexpr size_t hashSize = 2 * 1024 * 1024; + std::vector<uint32_t> results; + + template<typename func> + inline void findChildrenByType(hwloc_obj_t obj, hwloc_obj_type_t type, func lambda) + { + for(size_t i=0; i < obj->arity; i++) + { + if(obj->children[i]->type == type) + lambda(obj->children[i]); + else + findChildrenByType(obj->children[i], type, lambda); + } + } + + inline bool isCacheObject(hwloc_obj_t obj) + { +#if HWLOC_API_VERSION >= 0x20000 + return hwloc_obj_type_is_cache(obj->type); +#else + return obj->type == HWLOC_OBJ_CACHE; +#endif // HWLOC_API_VERSION + } + + template<typename func> + inline void findChildrenCaches(hwloc_obj_t obj, func lambda) + { + for(size_t i=0; i < obj->arity; i++) + { + if(isCacheObject(obj->children[i])) + lambda(obj->children[i]); + else + findChildrenCaches(obj->children[i], lambda); + } + } + + inline bool isCacheExclusive(hwloc_obj_t obj) + { + const char* value = hwloc_obj_get_info_by_name(obj, "Inclusive"); + return value == nullptr || value[0] != '1'; + } + + // Top level cache isn't shared with other cores on the same package + // This will usually be 1 x L3, but can be 2 x L2 per package + void proccessTopLevelCache(hwloc_obj_t obj) + { + if(obj->attr == nullptr) + throw(std::runtime_error("Cache object hasn't got attributes.")); + + size_t PUs = 0; + findChildrenByType(obj, HWLOC_OBJ_PU, [&PUs](hwloc_obj_t found) { PUs++; } ); + + //Strange case, but we will handle it silently, surely there must be one PU somewhere? + if(PUs == 0) + return; + + if(obj->attr->cache.size == 0) + { + //We will always have one child if PUs > 0 + if(!isCacheObject(obj->children[0])) + throw(std::runtime_error("The CPU doesn't seem to have a cache.")); + + //Try our luck with lower level caches + for(size_t i=0; i < obj->arity; i++) + proccessTopLevelCache(obj->children[i]); + return; + } + + size_t cacheSize = obj->attr->cache.size; + if(isCacheExclusive(obj)) + { + for(size_t i=0; i < obj->arity; i++) + { + hwloc_obj_t l2obj = obj->children[i]; + //If L2 is exclusive and greater or equal to 2MB add room for one more hash + if(isCacheObject(l2obj) && l2obj->attr != nullptr && l2obj->attr->cache.size >= hashSize) + cacheSize += hashSize; + } + } + + std::vector<hwloc_obj_t> cores; + cores.reserve(16); + findChildrenByType(obj, HWLOC_OBJ_CORE, [&cores](hwloc_obj_t found) { cores.emplace_back(found); } ); + + size_t cacheHashes = (cacheSize + hashSize/2) / hashSize; + + //Firstly allocate PU 0 of every CORE, then PU 1 etc. + size_t pu_id = 0; + while(cacheHashes > 0 && PUs > 0) + { + bool allocated_pu = false; + for(hwloc_obj_t core : cores) + { + if(core->arity <= pu_id || core->children[pu_id]->type != HWLOC_OBJ_PU) + continue; + + size_t os_id = core->children[pu_id]->os_index; + + if(cacheHashes > PUs) + { + cacheHashes -= 2; + os_id |= 0x8000000; //double hash marker bit + } + else + cacheHashes--; + PUs--; + + allocated_pu = true; + results.emplace_back(os_id); + + if(cacheHashes == 0) + break; + } + + if(!allocated_pu) + throw(std::runtime_error("Failed to allocate a PU.")); + + pu_id++; + } + } +}; + +} // namespace cpu +} // namepsace xmrstak diff --git a/xmrstak/backend/cpu/config.tpl b/xmrstak/backend/cpu/config.tpl new file mode 100644 index 0000000..990a31d --- /dev/null +++ b/xmrstak/backend/cpu/config.tpl @@ -0,0 +1,32 @@ +R"===( +/* + * Thread configuration for each thread. Make sure it matches the number above. + * low_power_mode - This mode will double the cache usage, and double the single thread performance. It will + * consume much less power (as less cores are working), but will max out at around 80-85% of + * the maximum performance. + * + * no_prefetch - Some sytems can gain up to extra 5% here, but sometimes it will have no difference or make + * things slower. + * + * affine_to_cpu - This can be either false (no affinity), or the CPU core number. Note that on hyperthreading + * systems it is better to assign threads to physical cores. On Windows this usually means selecting + * even or odd numbered cpu numbers. For Linux it will be usually the lower CPU numbers, so for a 4 + * physical core CPU you should select cpu numbers 0-3. + * + * On the first run the miner will look at your system and suggest a basic configuration that will work, + * you can try to tweak it from there to get the best performance. + * + * A filled out configuration should look like this: + * "cpu_threads_conf" : + * [ + * { "low_power_mode" : false, "no_prefetch" : true, "affine_to_cpu" : 0 }, + * { "low_power_mode" : false, "no_prefetch" : true, "affine_to_cpu" : 1 }, + * ], + */ + +"cpu_threads_conf" : +[ +CPUCONFIG +], + +)===" diff --git a/xmrstak/backend/cpu/crypto/c_blake256.c b/xmrstak/backend/cpu/crypto/c_blake256.c new file mode 100644 index 0000000..ff623dd --- /dev/null +++ b/xmrstak/backend/cpu/crypto/c_blake256.c @@ -0,0 +1,326 @@ +/* + * The blake256_* and blake224_* functions are largely copied from + * blake256_light.c and blake224_light.c from the BLAKE website: + * + * http://131002.net/blake/ + * + * The hmac_* functions implement HMAC-BLAKE-256 and HMAC-BLAKE-224. + * HMAC is specified by RFC 2104. + */ + +#include <string.h> +#include <stdio.h> +#include <stdint.h> +#include "c_blake256.h" + +#define U8TO32(p) \ + (((uint32_t)((p)[0]) << 24) | ((uint32_t)((p)[1]) << 16) | \ + ((uint32_t)((p)[2]) << 8) | ((uint32_t)((p)[3]) )) +#define U32TO8(p, v) \ + (p)[0] = (uint8_t)((v) >> 24); (p)[1] = (uint8_t)((v) >> 16); \ + (p)[2] = (uint8_t)((v) >> 8); (p)[3] = (uint8_t)((v) ); + +const uint8_t sigma[][16] = { + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15}, + {14,10, 4, 8, 9,15,13, 6, 1,12, 0, 2,11, 7, 5, 3}, + {11, 8,12, 0, 5, 2,15,13,10,14, 3, 6, 7, 1, 9, 4}, + { 7, 9, 3, 1,13,12,11,14, 2, 6, 5,10, 4, 0,15, 8}, + { 9, 0, 5, 7, 2, 4,10,15,14, 1,11,12, 6, 8, 3,13}, + { 2,12, 6,10, 0,11, 8, 3, 4,13, 7, 5,15,14, 1, 9}, + {12, 5, 1,15,14,13, 4,10, 0, 7, 6, 3, 9, 2, 8,11}, + {13,11, 7,14,12, 1, 3, 9, 5, 0,15, 4, 8, 6, 2,10}, + { 6,15,14, 9,11, 3, 0, 8,12, 2,13, 7, 1, 4,10, 5}, + {10, 2, 8, 4, 7, 6, 1, 5,15,11, 9,14, 3,12,13, 0}, + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15}, + {14,10, 4, 8, 9,15,13, 6, 1,12, 0, 2,11, 7, 5, 3}, + {11, 8,12, 0, 5, 2,15,13,10,14, 3, 6, 7, 1, 9, 4}, + { 7, 9, 3, 1,13,12,11,14, 2, 6, 5,10, 4, 0,15, 8} +}; + +const uint32_t cst[16] = { + 0x243F6A88, 0x85A308D3, 0x13198A2E, 0x03707344, + 0xA4093822, 0x299F31D0, 0x082EFA98, 0xEC4E6C89, + 0x452821E6, 0x38D01377, 0xBE5466CF, 0x34E90C6C, + 0xC0AC29B7, 0xC97C50DD, 0x3F84D5B5, 0xB5470917 +}; + +static const uint8_t padding[] = { + 0x80,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +}; + + +void blake256_compress(state *S, const uint8_t *block) { + uint32_t v[16], m[16], i; + +#define ROT(x,n) (((x)<<(32-n))|((x)>>(n))) +#define G(a,b,c,d,e) \ + v[a] += (m[sigma[i][e]] ^ cst[sigma[i][e+1]]) + v[b]; \ + v[d] = ROT(v[d] ^ v[a],16); \ + v[c] += v[d]; \ + v[b] = ROT(v[b] ^ v[c],12); \ + v[a] += (m[sigma[i][e+1]] ^ cst[sigma[i][e]])+v[b]; \ + v[d] = ROT(v[d] ^ v[a], 8); \ + v[c] += v[d]; \ + v[b] = ROT(v[b] ^ v[c], 7); + + for (i = 0; i < 16; ++i) m[i] = U8TO32(block + i * 4); + for (i = 0; i < 8; ++i) v[i] = S->h[i]; + v[ 8] = S->s[0] ^ 0x243F6A88; + v[ 9] = S->s[1] ^ 0x85A308D3; + v[10] = S->s[2] ^ 0x13198A2E; + v[11] = S->s[3] ^ 0x03707344; + v[12] = 0xA4093822; + v[13] = 0x299F31D0; + v[14] = 0x082EFA98; + v[15] = 0xEC4E6C89; + + if (S->nullt == 0) { + v[12] ^= S->t[0]; + v[13] ^= S->t[0]; + v[14] ^= S->t[1]; + v[15] ^= S->t[1]; + } + + for (i = 0; i < 14; ++i) { + G(0, 4, 8, 12, 0); + G(1, 5, 9, 13, 2); + G(2, 6, 10, 14, 4); + G(3, 7, 11, 15, 6); + G(3, 4, 9, 14, 14); + G(2, 7, 8, 13, 12); + G(0, 5, 10, 15, 8); + G(1, 6, 11, 12, 10); + } + + for (i = 0; i < 16; ++i) S->h[i % 8] ^= v[i]; + for (i = 0; i < 8; ++i) S->h[i] ^= S->s[i % 4]; +} + +void blake256_init(state *S) { + S->h[0] = 0x6A09E667; + S->h[1] = 0xBB67AE85; + S->h[2] = 0x3C6EF372; + S->h[3] = 0xA54FF53A; + S->h[4] = 0x510E527F; + S->h[5] = 0x9B05688C; + S->h[6] = 0x1F83D9AB; + S->h[7] = 0x5BE0CD19; + S->t[0] = S->t[1] = S->buflen = S->nullt = 0; + S->s[0] = S->s[1] = S->s[2] = S->s[3] = 0; +} + +void blake224_init(state *S) { + S->h[0] = 0xC1059ED8; + S->h[1] = 0x367CD507; + S->h[2] = 0x3070DD17; + S->h[3] = 0xF70E5939; + S->h[4] = 0xFFC00B31; + S->h[5] = 0x68581511; + S->h[6] = 0x64F98FA7; + S->h[7] = 0xBEFA4FA4; + S->t[0] = S->t[1] = S->buflen = S->nullt = 0; + S->s[0] = S->s[1] = S->s[2] = S->s[3] = 0; +} + +// datalen = number of bits +void blake256_update(state *S, const uint8_t *data, uint64_t datalen) { + int left = S->buflen >> 3; + int fill = 64 - left; + + if (left && (((datalen >> 3) & 0x3F) >= (unsigned) fill)) { + memcpy((void *) (S->buf + left), (void *) data, fill); + S->t[0] += 512; + if (S->t[0] == 0) S->t[1]++; + blake256_compress(S, S->buf); + data += fill; + datalen -= (fill << 3); + left = 0; + } + + while (datalen >= 512) { + S->t[0] += 512; + if (S->t[0] == 0) S->t[1]++; + blake256_compress(S, data); + data += 64; + datalen -= 512; + } + + if (datalen > 0) { + memcpy((void *) (S->buf + left), (void *) data, datalen >> 3); + S->buflen = (left << 3) + datalen; + } else { + S->buflen = 0; + } +} + +// datalen = number of bits +void blake224_update(state *S, const uint8_t *data, uint64_t datalen) { + blake256_update(S, data, datalen); +} + +void blake256_final_h(state *S, uint8_t *digest, uint8_t pa, uint8_t pb) { + uint8_t msglen[8]; + uint32_t lo = S->t[0] + S->buflen, hi = S->t[1]; + if (lo < (unsigned) S->buflen) hi++; + U32TO8(msglen + 0, hi); + U32TO8(msglen + 4, lo); + + if (S->buflen == 440) { /* one padding byte */ + S->t[0] -= 8; + blake256_update(S, &pa, 8); + } else { + if (S->buflen < 440) { /* enough space to fill the block */ + if (S->buflen == 0) S->nullt = 1; + S->t[0] -= 440 - S->buflen; + blake256_update(S, padding, 440 - S->buflen); + } else { /* need 2 compressions */ + S->t[0] -= 512 - S->buflen; + blake256_update(S, padding, 512 - S->buflen); + S->t[0] -= 440; + blake256_update(S, padding + 1, 440); + S->nullt = 1; + } + blake256_update(S, &pb, 8); + S->t[0] -= 8; + } + S->t[0] -= 64; + blake256_update(S, msglen, 64); + + U32TO8(digest + 0, S->h[0]); + U32TO8(digest + 4, S->h[1]); + U32TO8(digest + 8, S->h[2]); + U32TO8(digest + 12, S->h[3]); + U32TO8(digest + 16, S->h[4]); + U32TO8(digest + 20, S->h[5]); + U32TO8(digest + 24, S->h[6]); + U32TO8(digest + 28, S->h[7]); +} + +void blake256_final(state *S, uint8_t *digest) { + blake256_final_h(S, digest, 0x81, 0x01); +} + +void blake224_final(state *S, uint8_t *digest) { + blake256_final_h(S, digest, 0x80, 0x00); +} + +// inlen = number of bytes +void blake256_hash(uint8_t *out, const uint8_t *in, uint64_t inlen) { + state S; + blake256_init(&S); + blake256_update(&S, in, inlen * 8); + blake256_final(&S, out); +} + +// inlen = number of bytes +void blake224_hash(uint8_t *out, const uint8_t *in, uint64_t inlen) { + state S; + blake224_init(&S); + blake224_update(&S, in, inlen * 8); + blake224_final(&S, out); +} + +// keylen = number of bytes +void hmac_blake256_init(hmac_state *S, const uint8_t *_key, uint64_t keylen) { + const uint8_t *key = _key; + uint8_t keyhash[32]; + uint8_t pad[64]; + uint64_t i; + + if (keylen > 64) { + blake256_hash(keyhash, key, keylen); + key = keyhash; + keylen = 32; + } + + blake256_init(&S->inner); + memset(pad, 0x36, 64); + for (i = 0; i < keylen; ++i) { + pad[i] ^= key[i]; + } + blake256_update(&S->inner, pad, 512); + + blake256_init(&S->outer); + memset(pad, 0x5c, 64); + for (i = 0; i < keylen; ++i) { + pad[i] ^= key[i]; + } + blake256_update(&S->outer, pad, 512); + + memset(keyhash, 0, 32); +} + +// keylen = number of bytes +void hmac_blake224_init(hmac_state *S, const uint8_t *_key, uint64_t keylen) { + const uint8_t *key = _key; + uint8_t keyhash[32]; + uint8_t pad[64]; + uint64_t i; + + if (keylen > 64) { + blake256_hash(keyhash, key, keylen); + key = keyhash; + keylen = 28; + } + + blake224_init(&S->inner); + memset(pad, 0x36, 64); + for (i = 0; i < keylen; ++i) { + pad[i] ^= key[i]; + } + blake224_update(&S->inner, pad, 512); + + blake224_init(&S->outer); + memset(pad, 0x5c, 64); + for (i = 0; i < keylen; ++i) { + pad[i] ^= key[i]; + } + blake224_update(&S->outer, pad, 512); + + memset(keyhash, 0, 32); +} + +// datalen = number of bits +void hmac_blake256_update(hmac_state *S, const uint8_t *data, uint64_t datalen) { + // update the inner state + blake256_update(&S->inner, data, datalen); +} + +// datalen = number of bits +void hmac_blake224_update(hmac_state *S, const uint8_t *data, uint64_t datalen) { + // update the inner state + blake224_update(&S->inner, data, datalen); +} + +void hmac_blake256_final(hmac_state *S, uint8_t *digest) { + uint8_t ihash[32]; + blake256_final(&S->inner, ihash); + blake256_update(&S->outer, ihash, 256); + blake256_final(&S->outer, digest); + memset(ihash, 0, 32); +} + +void hmac_blake224_final(hmac_state *S, uint8_t *digest) { + uint8_t ihash[32]; + blake224_final(&S->inner, ihash); + blake224_update(&S->outer, ihash, 224); + blake224_final(&S->outer, digest); + memset(ihash, 0, 32); +} + +// keylen = number of bytes; inlen = number of bytes +void hmac_blake256_hash(uint8_t *out, const uint8_t *key, uint64_t keylen, const uint8_t *in, uint64_t inlen) { + hmac_state S; + hmac_blake256_init(&S, key, keylen); + hmac_blake256_update(&S, in, inlen * 8); + hmac_blake256_final(&S, out); +} + +// keylen = number of bytes; inlen = number of bytes +void hmac_blake224_hash(uint8_t *out, const uint8_t *key, uint64_t keylen, const uint8_t *in, uint64_t inlen) { + hmac_state S; + hmac_blake224_init(&S, key, keylen); + hmac_blake224_update(&S, in, inlen * 8); + hmac_blake224_final(&S, out); +} diff --git a/xmrstak/backend/cpu/crypto/c_blake256.h b/xmrstak/backend/cpu/crypto/c_blake256.h new file mode 100644 index 0000000..b9c2aad --- /dev/null +++ b/xmrstak/backend/cpu/crypto/c_blake256.h @@ -0,0 +1,43 @@ +#ifndef _BLAKE256_H_ +#define _BLAKE256_H_ + +#include <stdint.h> + +typedef struct { + uint32_t h[8], s[4], t[2]; + int buflen, nullt; + uint8_t buf[64]; +} state; + +typedef struct { + state inner; + state outer; +} hmac_state; + +void blake256_init(state *); +void blake224_init(state *); + +void blake256_update(state *, const uint8_t *, uint64_t); +void blake224_update(state *, const uint8_t *, uint64_t); + +void blake256_final(state *, uint8_t *); +void blake224_final(state *, uint8_t *); + +void blake256_hash(uint8_t *, const uint8_t *, uint64_t); +void blake224_hash(uint8_t *, const uint8_t *, uint64_t); + +/* HMAC functions: */ + +void hmac_blake256_init(hmac_state *, const uint8_t *, uint64_t); +void hmac_blake224_init(hmac_state *, const uint8_t *, uint64_t); + +void hmac_blake256_update(hmac_state *, const uint8_t *, uint64_t); +void hmac_blake224_update(hmac_state *, const uint8_t *, uint64_t); + +void hmac_blake256_final(hmac_state *, uint8_t *); +void hmac_blake224_final(hmac_state *, uint8_t *); + +void hmac_blake256_hash(uint8_t *, const uint8_t *, uint64_t, const uint8_t *, uint64_t); +void hmac_blake224_hash(uint8_t *, const uint8_t *, uint64_t, const uint8_t *, uint64_t); + +#endif /* _BLAKE256_H_ */ diff --git a/xmrstak/backend/cpu/crypto/c_groestl.c b/xmrstak/backend/cpu/crypto/c_groestl.c new file mode 100644 index 0000000..1318d5a --- /dev/null +++ b/xmrstak/backend/cpu/crypto/c_groestl.c @@ -0,0 +1,360 @@ +/* hash.c April 2012 + * Groestl ANSI C code optimised for 32-bit machines + * Author: Thomas Krinninger + * + * This work is based on the implementation of + * Soeren S. Thomsen and Krystian Matusiewicz + * + * + */ + +#include "c_groestl.h" +#include "groestl_tables.h" + +#define P_TYPE 0 +#define Q_TYPE 1 + +const uint8_t shift_Values[2][8] = {{0,1,2,3,4,5,6,7},{1,3,5,7,0,2,4,6}}; + +const uint8_t indices_cyclic[15] = {0,1,2,3,4,5,6,7,0,1,2,3,4,5,6}; + + +#define ROTATE_COLUMN_DOWN(v1, v2, amount_bytes, temp_var) {temp_var = (v1<<(8*amount_bytes))|(v2>>(8*(4-amount_bytes))); \ + v2 = (v2<<(8*amount_bytes))|(v1>>(8*(4-amount_bytes))); \ + v1 = temp_var;} + + +#define COLUMN(x,y,i,c0,c1,c2,c3,c4,c5,c6,c7,tv1,tv2,tu,tl,t) \ + tu = T[2*(uint32_t)x[4*c0+0]]; \ + tl = T[2*(uint32_t)x[4*c0+0]+1]; \ + tv1 = T[2*(uint32_t)x[4*c1+1]]; \ + tv2 = T[2*(uint32_t)x[4*c1+1]+1]; \ + ROTATE_COLUMN_DOWN(tv1,tv2,1,t) \ + tu ^= tv1; \ + tl ^= tv2; \ + tv1 = T[2*(uint32_t)x[4*c2+2]]; \ + tv2 = T[2*(uint32_t)x[4*c2+2]+1]; \ + ROTATE_COLUMN_DOWN(tv1,tv2,2,t) \ + tu ^= tv1; \ + tl ^= tv2; \ + tv1 = T[2*(uint32_t)x[4*c3+3]]; \ + tv2 = T[2*(uint32_t)x[4*c3+3]+1]; \ + ROTATE_COLUMN_DOWN(tv1,tv2,3,t) \ + tu ^= tv1; \ + tl ^= tv2; \ + tl ^= T[2*(uint32_t)x[4*c4+0]]; \ + tu ^= T[2*(uint32_t)x[4*c4+0]+1]; \ + tv1 = T[2*(uint32_t)x[4*c5+1]]; \ + tv2 = T[2*(uint32_t)x[4*c5+1]+1]; \ + ROTATE_COLUMN_DOWN(tv1,tv2,1,t) \ + tl ^= tv1; \ + tu ^= tv2; \ + tv1 = T[2*(uint32_t)x[4*c6+2]]; \ + tv2 = T[2*(uint32_t)x[4*c6+2]+1]; \ + ROTATE_COLUMN_DOWN(tv1,tv2,2,t) \ + tl ^= tv1; \ + tu ^= tv2; \ + tv1 = T[2*(uint32_t)x[4*c7+3]]; \ + tv2 = T[2*(uint32_t)x[4*c7+3]+1]; \ + ROTATE_COLUMN_DOWN(tv1,tv2,3,t) \ + tl ^= tv1; \ + tu ^= tv2; \ + y[i] = tu; \ + y[i+1] = tl; + + +/* compute one round of P (short variants) */ +static void RND512P(uint8_t *x, uint32_t *y, uint32_t r) { + uint32_t temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp; + uint32_t* x32 = (uint32_t*)x; + x32[ 0] ^= 0x00000000^r; + x32[ 2] ^= 0x00000010^r; + x32[ 4] ^= 0x00000020^r; + x32[ 6] ^= 0x00000030^r; + x32[ 8] ^= 0x00000040^r; + x32[10] ^= 0x00000050^r; + x32[12] ^= 0x00000060^r; + x32[14] ^= 0x00000070^r; + COLUMN(x,y, 0, 0, 2, 4, 6, 9, 11, 13, 15, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp); + COLUMN(x,y, 2, 2, 4, 6, 8, 11, 13, 15, 1, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp); + COLUMN(x,y, 4, 4, 6, 8, 10, 13, 15, 1, 3, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp); + COLUMN(x,y, 6, 6, 8, 10, 12, 15, 1, 3, 5, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp); + COLUMN(x,y, 8, 8, 10, 12, 14, 1, 3, 5, 7, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp); + COLUMN(x,y,10, 10, 12, 14, 0, 3, 5, 7, 9, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp); + COLUMN(x,y,12, 12, 14, 0, 2, 5, 7, 9, 11, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp); + COLUMN(x,y,14, 14, 0, 2, 4, 7, 9, 11, 13, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp); +} + +/* compute one round of Q (short variants) */ +static void RND512Q(uint8_t *x, uint32_t *y, uint32_t r) { + uint32_t temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp; + uint32_t* x32 = (uint32_t*)x; + x32[ 0] = ~x32[ 0]; + x32[ 1] ^= 0xffffffff^r; + x32[ 2] = ~x32[ 2]; + x32[ 3] ^= 0xefffffff^r; + x32[ 4] = ~x32[ 4]; + x32[ 5] ^= 0xdfffffff^r; + x32[ 6] = ~x32[ 6]; + x32[ 7] ^= 0xcfffffff^r; + x32[ 8] = ~x32[ 8]; + x32[ 9] ^= 0xbfffffff^r; + x32[10] = ~x32[10]; + x32[11] ^= 0xafffffff^r; + x32[12] = ~x32[12]; + x32[13] ^= 0x9fffffff^r; + x32[14] = ~x32[14]; + x32[15] ^= 0x8fffffff^r; + COLUMN(x,y, 0, 2, 6, 10, 14, 1, 5, 9, 13, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp); + COLUMN(x,y, 2, 4, 8, 12, 0, 3, 7, 11, 15, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp); + COLUMN(x,y, 4, 6, 10, 14, 2, 5, 9, 13, 1, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp); + COLUMN(x,y, 6, 8, 12, 0, 4, 7, 11, 15, 3, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp); + COLUMN(x,y, 8, 10, 14, 2, 6, 9, 13, 1, 5, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp); + COLUMN(x,y,10, 12, 0, 4, 8, 11, 15, 3, 7, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp); + COLUMN(x,y,12, 14, 2, 6, 10, 13, 1, 5, 9, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp); + COLUMN(x,y,14, 0, 4, 8, 12, 15, 3, 7, 11, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp); +} + +/* compute compression function (short variants) */ +static void F512(uint32_t *h, const uint32_t *m) { + int i; + uint32_t Ptmp[2*COLS512]; + uint32_t Qtmp[2*COLS512]; + uint32_t y[2*COLS512]; + uint32_t z[2*COLS512]; + + for (i = 0; i < 2*COLS512; i++) { + z[i] = m[i]; + Ptmp[i] = h[i]^m[i]; + } + + /* compute Q(m) */ + RND512Q((uint8_t*)z, y, 0x00000000); + RND512Q((uint8_t*)y, z, 0x01000000); + RND512Q((uint8_t*)z, y, 0x02000000); + RND512Q((uint8_t*)y, z, 0x03000000); + RND512Q((uint8_t*)z, y, 0x04000000); + RND512Q((uint8_t*)y, z, 0x05000000); + RND512Q((uint8_t*)z, y, 0x06000000); + RND512Q((uint8_t*)y, z, 0x07000000); + RND512Q((uint8_t*)z, y, 0x08000000); + RND512Q((uint8_t*)y, Qtmp, 0x09000000); + + /* compute P(h+m) */ + RND512P((uint8_t*)Ptmp, y, 0x00000000); + RND512P((uint8_t*)y, z, 0x00000001); + RND512P((uint8_t*)z, y, 0x00000002); + RND512P((uint8_t*)y, z, 0x00000003); + RND512P((uint8_t*)z, y, 0x00000004); + RND512P((uint8_t*)y, z, 0x00000005); + RND512P((uint8_t*)z, y, 0x00000006); + RND512P((uint8_t*)y, z, 0x00000007); + RND512P((uint8_t*)z, y, 0x00000008); + RND512P((uint8_t*)y, Ptmp, 0x00000009); + + /* compute P(h+m) + Q(m) + h */ + for (i = 0; i < 2*COLS512; i++) { + h[i] ^= Ptmp[i]^Qtmp[i]; + } +} + + +/* digest up to msglen bytes of input (full blocks only) */ +static void Transform(groestlHashState *ctx, + const uint8_t *input, + int msglen) { + + /* digest message, one block at a time */ + for (; msglen >= SIZE512; + msglen -= SIZE512, input += SIZE512) { + F512(ctx->chaining,(uint32_t*)input); + + /* increment block counter */ + ctx->block_counter1++; + if (ctx->block_counter1 == 0) ctx->block_counter2++; + } +} + +/* given state h, do h <- P(h)+h */ +static void OutputTransformation(groestlHashState *ctx) { + int j; + uint32_t temp[2*COLS512]; + uint32_t y[2*COLS512]; + uint32_t z[2*COLS512]; + + + + for (j = 0; j < 2*COLS512; j++) { + temp[j] = ctx->chaining[j]; + } + RND512P((uint8_t*)temp, y, 0x00000000); + RND512P((uint8_t*)y, z, 0x00000001); + RND512P((uint8_t*)z, y, 0x00000002); + RND512P((uint8_t*)y, z, 0x00000003); + RND512P((uint8_t*)z, y, 0x00000004); + RND512P((uint8_t*)y, z, 0x00000005); + RND512P((uint8_t*)z, y, 0x00000006); + RND512P((uint8_t*)y, z, 0x00000007); + RND512P((uint8_t*)z, y, 0x00000008); + RND512P((uint8_t*)y, temp, 0x00000009); + for (j = 0; j < 2*COLS512; j++) { + ctx->chaining[j] ^= temp[j]; + } +} + +/* initialise context */ +static void Init(groestlHashState* ctx) { + int i = 0; + /* allocate memory for state and data buffer */ + + for(;i<(SIZE512/sizeof(uint32_t));i++) + { + ctx->chaining[i] = 0; + } + + /* set initial value */ + ctx->chaining[2*COLS512-1] = u32BIG((uint32_t)HASH_BIT_LEN); + + /* set other variables */ + ctx->buf_ptr = 0; + ctx->block_counter1 = 0; + ctx->block_counter2 = 0; + ctx->bits_in_last_byte = 0; +} + +/* update state with databitlen bits of input */ +static void Update(groestlHashState* ctx, + const BitSequence* input, + DataLength databitlen) { + int index = 0; + int msglen = (int)(databitlen/8); + int rem = (int)(databitlen%8); + + /* if the buffer contains data that has not yet been digested, first + add data to buffer until full */ + if (ctx->buf_ptr) { + while (ctx->buf_ptr < SIZE512 && index < msglen) { + ctx->buffer[(int)ctx->buf_ptr++] = input[index++]; + } + if (ctx->buf_ptr < SIZE512) { + /* buffer still not full, return */ + if (rem) { + ctx->bits_in_last_byte = rem; + ctx->buffer[(int)ctx->buf_ptr++] = input[index]; + } + return; + } + + /* digest buffer */ + ctx->buf_ptr = 0; + Transform(ctx, ctx->buffer, SIZE512); + } + + /* digest bulk of message */ + Transform(ctx, input+index, msglen-index); + index += ((msglen-index)/SIZE512)*SIZE512; + + /* store remaining data in buffer */ + while (index < msglen) { + ctx->buffer[(int)ctx->buf_ptr++] = input[index++]; + } + + /* if non-integral number of bytes have been supplied, store + remaining bits in last byte, together with information about + number of bits */ + if (rem) { + ctx->bits_in_last_byte = rem; + ctx->buffer[(int)ctx->buf_ptr++] = input[index]; + } +} + +#define BILB ctx->bits_in_last_byte + +/* finalise: process remaining data (including padding), perform + output transformation, and write hash result to 'output' */ +static void Final(groestlHashState* ctx, + BitSequence* output) { + int i, j = 0, hashbytelen = HASH_BIT_LEN/8; + uint8_t *s = (BitSequence*)ctx->chaining; + + /* pad with '1'-bit and first few '0'-bits */ + if (BILB) { + ctx->buffer[(int)ctx->buf_ptr-1] &= ((1<<BILB)-1)<<(8-BILB); + ctx->buffer[(int)ctx->buf_ptr-1] ^= 0x1<<(7-BILB); + BILB = 0; + } + else ctx->buffer[(int)ctx->buf_ptr++] = 0x80; + + /* pad with '0'-bits */ + if (ctx->buf_ptr > SIZE512-LENGTHFIELDLEN) { + /* padding requires two blocks */ + while (ctx->buf_ptr < SIZE512) { + ctx->buffer[(int)ctx->buf_ptr++] = 0; + } + /* digest first padding block */ + Transform(ctx, ctx->buffer, SIZE512); + ctx->buf_ptr = 0; + } + while (ctx->buf_ptr < SIZE512-LENGTHFIELDLEN) { + ctx->buffer[(int)ctx->buf_ptr++] = 0; + } + + /* length padding */ + ctx->block_counter1++; + if (ctx->block_counter1 == 0) ctx->block_counter2++; + ctx->buf_ptr = SIZE512; + + while (ctx->buf_ptr > SIZE512-(int)sizeof(uint32_t)) { + ctx->buffer[(int)--ctx->buf_ptr] = (uint8_t)ctx->block_counter1; + ctx->block_counter1 >>= 8; + } + while (ctx->buf_ptr > SIZE512-LENGTHFIELDLEN) { + ctx->buffer[(int)--ctx->buf_ptr] = (uint8_t)ctx->block_counter2; + ctx->block_counter2 >>= 8; + } + /* digest final padding block */ + Transform(ctx, ctx->buffer, SIZE512); + /* perform output transformation */ + OutputTransformation(ctx); + + /* store hash result in output */ + for (i = SIZE512-hashbytelen; i < SIZE512; i++,j++) { + output[j] = s[i]; + } + + /* zeroise relevant variables and deallocate memory */ + for (i = 0; i < COLS512; i++) { + ctx->chaining[i] = 0; + } + for (i = 0; i < SIZE512; i++) { + ctx->buffer[i] = 0; + } +} + +/* hash bit sequence */ +void groestl(const BitSequence* data, + DataLength databitlen, + BitSequence* hashval) { + + groestlHashState context; + + /* initialise */ + Init(&context); + + + /* process message */ + Update(&context, data, databitlen); + + /* finalise */ + Final(&context, hashval); +} +/* +static int crypto_hash(unsigned char *out, + const unsigned char *in, + unsigned long long len) +{ + groestl(in, 8*len, out); + return 0; +} + +*/ diff --git a/xmrstak/backend/cpu/crypto/c_groestl.h b/xmrstak/backend/cpu/crypto/c_groestl.h new file mode 100644 index 0000000..2b51339 --- /dev/null +++ b/xmrstak/backend/cpu/crypto/c_groestl.h @@ -0,0 +1,60 @@ +#ifndef __hash_h +#define __hash_h +/* +#include "crypto_uint8.h" +#include "crypto_uint32.h" +#include "crypto_uint64.h" +#include "crypto_hash.h" + +typedef crypto_uint8 uint8_t; +typedef crypto_uint32 uint32_t; +typedef crypto_uint64 uint64_t; +*/ +#include <stdint.h> + +#include "hash.h" + +/* some sizes (number of bytes) */ +#define ROWS 8 +#define LENGTHFIELDLEN ROWS +#define COLS512 8 + +#define SIZE512 (ROWS*COLS512) + +#define ROUNDS512 10 +#define HASH_BIT_LEN 256 + +#define ROTL32(v, n) ((((v)<<(n))|((v)>>(32-(n))))&li_32(ffffffff)) + + +#define li_32(h) 0x##h##u +#define EXT_BYTE(var,n) ((uint8_t)((uint32_t)(var) >> (8*n))) +#define u32BIG(a) \ + ((ROTL32(a,8) & li_32(00FF00FF)) | \ + (ROTL32(a,24) & li_32(FF00FF00))) + + +/* NIST API begin */ +typedef struct { + uint32_t chaining[SIZE512/sizeof(uint32_t)]; /* actual state */ + uint32_t block_counter1, + block_counter2; /* message block counter(s) */ + BitSequence buffer[SIZE512]; /* data buffer */ + int buf_ptr; /* data buffer pointer */ + int bits_in_last_byte; /* no. of message bits in last byte of + data buffer */ +} groestlHashState; + +/*void Init(hashState*); +void Update(hashState*, const BitSequence*, DataLength); +void Final(hashState*, BitSequence*); */ +void groestl(const BitSequence*, DataLength, BitSequence*); +/* NIST API end */ + +/* +int crypto_hash(unsigned char *out, + const unsigned char *in, + unsigned long long len); +*/ + +#endif /* __hash_h */ diff --git a/xmrstak/backend/cpu/crypto/c_jh.c b/xmrstak/backend/cpu/crypto/c_jh.c new file mode 100644 index 0000000..9d685a0 --- /dev/null +++ b/xmrstak/backend/cpu/crypto/c_jh.c @@ -0,0 +1,367 @@ +/*This program gives the 64-bit optimized bitslice implementation of JH using ANSI C + + -------------------------------- + Performance + + Microprocessor: Intel CORE 2 processor (Core 2 Duo Mobile T6600 2.2GHz) + Operating System: 64-bit Ubuntu 10.04 (Linux kernel 2.6.32-22-generic) + Speed for long message: + 1) 45.8 cycles/byte compiler: Intel C++ Compiler 11.1 compilation option: icc -O2 + 2) 56.8 cycles/byte compiler: gcc 4.4.3 compilation option: gcc -O3 + + -------------------------------- + Last Modified: January 16, 2011 +*/ + +#include "c_jh.h" + +#include <stdint.h> +#include <string.h> + +/*typedef unsigned long long uint64;*/ +typedef uint64_t uint64; + +/*define data alignment for different C compilers*/ +#if defined(__GNUC__) + #define DATA_ALIGN16(x) x __attribute__ ((aligned(16))) +#else + #define DATA_ALIGN16(x) __declspec(align(16)) x +#endif + + +typedef struct { + int hashbitlen; /*the message digest size*/ + unsigned long long databitlen; /*the message size in bits*/ + unsigned long long datasize_in_buffer; /*the size of the message remained in buffer; assumed to be multiple of 8bits except for the last partial block at the end of the message*/ + DATA_ALIGN16(uint64 x[8][2]); /*the 1024-bit state, ( x[i][0] || x[i][1] ) is the ith row of the state in the pseudocode*/ + unsigned char buffer[64]; /*the 512-bit message block to be hashed;*/ +} hashState; + + +/*The initial hash value H(0)*/ +const unsigned char JH224_H0[128]={0x2d,0xfe,0xdd,0x62,0xf9,0x9a,0x98,0xac,0xae,0x7c,0xac,0xd6,0x19,0xd6,0x34,0xe7,0xa4,0x83,0x10,0x5,0xbc,0x30,0x12,0x16,0xb8,0x60,0x38,0xc6,0xc9,0x66,0x14,0x94,0x66,0xd9,0x89,0x9f,0x25,0x80,0x70,0x6f,0xce,0x9e,0xa3,0x1b,0x1d,0x9b,0x1a,0xdc,0x11,0xe8,0x32,0x5f,0x7b,0x36,0x6e,0x10,0xf9,0x94,0x85,0x7f,0x2,0xfa,0x6,0xc1,0x1b,0x4f,0x1b,0x5c,0xd8,0xc8,0x40,0xb3,0x97,0xf6,0xa1,0x7f,0x6e,0x73,0x80,0x99,0xdc,0xdf,0x93,0xa5,0xad,0xea,0xa3,0xd3,0xa4,0x31,0xe8,0xde,0xc9,0x53,0x9a,0x68,0x22,0xb4,0xa9,0x8a,0xec,0x86,0xa1,0xe4,0xd5,0x74,0xac,0x95,0x9c,0xe5,0x6c,0xf0,0x15,0x96,0xd,0xea,0xb5,0xab,0x2b,0xbf,0x96,0x11,0xdc,0xf0,0xdd,0x64,0xea,0x6e}; +const unsigned char JH256_H0[128]={0xeb,0x98,0xa3,0x41,0x2c,0x20,0xd3,0xeb,0x92,0xcd,0xbe,0x7b,0x9c,0xb2,0x45,0xc1,0x1c,0x93,0x51,0x91,0x60,0xd4,0xc7,0xfa,0x26,0x0,0x82,0xd6,0x7e,0x50,0x8a,0x3,0xa4,0x23,0x9e,0x26,0x77,0x26,0xb9,0x45,0xe0,0xfb,0x1a,0x48,0xd4,0x1a,0x94,0x77,0xcd,0xb5,0xab,0x26,0x2,0x6b,0x17,0x7a,0x56,0xf0,0x24,0x42,0xf,0xff,0x2f,0xa8,0x71,0xa3,0x96,0x89,0x7f,0x2e,0x4d,0x75,0x1d,0x14,0x49,0x8,0xf7,0x7d,0xe2,0x62,0x27,0x76,0x95,0xf7,0x76,0x24,0x8f,0x94,0x87,0xd5,0xb6,0x57,0x47,0x80,0x29,0x6c,0x5c,0x5e,0x27,0x2d,0xac,0x8e,0xd,0x6c,0x51,0x84,0x50,0xc6,0x57,0x5,0x7a,0xf,0x7b,0xe4,0xd3,0x67,0x70,0x24,0x12,0xea,0x89,0xe3,0xab,0x13,0xd3,0x1c,0xd7,0x69}; +const unsigned char JH384_H0[128]={0x48,0x1e,0x3b,0xc6,0xd8,0x13,0x39,0x8a,0x6d,0x3b,0x5e,0x89,0x4a,0xde,0x87,0x9b,0x63,0xfa,0xea,0x68,0xd4,0x80,0xad,0x2e,0x33,0x2c,0xcb,0x21,0x48,0xf,0x82,0x67,0x98,0xae,0xc8,0x4d,0x90,0x82,0xb9,0x28,0xd4,0x55,0xea,0x30,0x41,0x11,0x42,0x49,0x36,0xf5,0x55,0xb2,0x92,0x48,0x47,0xec,0xc7,0x25,0xa,0x93,0xba,0xf4,0x3c,0xe1,0x56,0x9b,0x7f,0x8a,0x27,0xdb,0x45,0x4c,0x9e,0xfc,0xbd,0x49,0x63,0x97,0xaf,0xe,0x58,0x9f,0xc2,0x7d,0x26,0xaa,0x80,0xcd,0x80,0xc0,0x8b,0x8c,0x9d,0xeb,0x2e,0xda,0x8a,0x79,0x81,0xe8,0xf8,0xd5,0x37,0x3a,0xf4,0x39,0x67,0xad,0xdd,0xd1,0x7a,0x71,0xa9,0xb4,0xd3,0xbd,0xa4,0x75,0xd3,0x94,0x97,0x6c,0x3f,0xba,0x98,0x42,0x73,0x7f}; +const unsigned char JH512_H0[128]={0x6f,0xd1,0x4b,0x96,0x3e,0x0,0xaa,0x17,0x63,0x6a,0x2e,0x5,0x7a,0x15,0xd5,0x43,0x8a,0x22,0x5e,0x8d,0xc,0x97,0xef,0xb,0xe9,0x34,0x12,0x59,0xf2,0xb3,0xc3,0x61,0x89,0x1d,0xa0,0xc1,0x53,0x6f,0x80,0x1e,0x2a,0xa9,0x5,0x6b,0xea,0x2b,0x6d,0x80,0x58,0x8e,0xcc,0xdb,0x20,0x75,0xba,0xa6,0xa9,0xf,0x3a,0x76,0xba,0xf8,0x3b,0xf7,0x1,0x69,0xe6,0x5,0x41,0xe3,0x4a,0x69,0x46,0xb5,0x8a,0x8e,0x2e,0x6f,0xe6,0x5a,0x10,0x47,0xa7,0xd0,0xc1,0x84,0x3c,0x24,0x3b,0x6e,0x71,0xb1,0x2d,0x5a,0xc1,0x99,0xcf,0x57,0xf6,0xec,0x9d,0xb1,0xf8,0x56,0xa7,0x6,0x88,0x7c,0x57,0x16,0xb1,0x56,0xe3,0xc2,0xfc,0xdf,0xe6,0x85,0x17,0xfb,0x54,0x5a,0x46,0x78,0xcc,0x8c,0xdd,0x4b}; + +/*42 round constants, each round constant is 32-byte (256-bit)*/ +const unsigned char E8_bitslice_roundconstant[42][32]={ +{0x72,0xd5,0xde,0xa2,0xdf,0x15,0xf8,0x67,0x7b,0x84,0x15,0xa,0xb7,0x23,0x15,0x57,0x81,0xab,0xd6,0x90,0x4d,0x5a,0x87,0xf6,0x4e,0x9f,0x4f,0xc5,0xc3,0xd1,0x2b,0x40}, +{0xea,0x98,0x3a,0xe0,0x5c,0x45,0xfa,0x9c,0x3,0xc5,0xd2,0x99,0x66,0xb2,0x99,0x9a,0x66,0x2,0x96,0xb4,0xf2,0xbb,0x53,0x8a,0xb5,0x56,0x14,0x1a,0x88,0xdb,0xa2,0x31}, +{0x3,0xa3,0x5a,0x5c,0x9a,0x19,0xe,0xdb,0x40,0x3f,0xb2,0xa,0x87,0xc1,0x44,0x10,0x1c,0x5,0x19,0x80,0x84,0x9e,0x95,0x1d,0x6f,0x33,0xeb,0xad,0x5e,0xe7,0xcd,0xdc}, +{0x10,0xba,0x13,0x92,0x2,0xbf,0x6b,0x41,0xdc,0x78,0x65,0x15,0xf7,0xbb,0x27,0xd0,0xa,0x2c,0x81,0x39,0x37,0xaa,0x78,0x50,0x3f,0x1a,0xbf,0xd2,0x41,0x0,0x91,0xd3}, +{0x42,0x2d,0x5a,0xd,0xf6,0xcc,0x7e,0x90,0xdd,0x62,0x9f,0x9c,0x92,0xc0,0x97,0xce,0x18,0x5c,0xa7,0xb,0xc7,0x2b,0x44,0xac,0xd1,0xdf,0x65,0xd6,0x63,0xc6,0xfc,0x23}, +{0x97,0x6e,0x6c,0x3,0x9e,0xe0,0xb8,0x1a,0x21,0x5,0x45,0x7e,0x44,0x6c,0xec,0xa8,0xee,0xf1,0x3,0xbb,0x5d,0x8e,0x61,0xfa,0xfd,0x96,0x97,0xb2,0x94,0x83,0x81,0x97}, +{0x4a,0x8e,0x85,0x37,0xdb,0x3,0x30,0x2f,0x2a,0x67,0x8d,0x2d,0xfb,0x9f,0x6a,0x95,0x8a,0xfe,0x73,0x81,0xf8,0xb8,0x69,0x6c,0x8a,0xc7,0x72,0x46,0xc0,0x7f,0x42,0x14}, +{0xc5,0xf4,0x15,0x8f,0xbd,0xc7,0x5e,0xc4,0x75,0x44,0x6f,0xa7,0x8f,0x11,0xbb,0x80,0x52,0xde,0x75,0xb7,0xae,0xe4,0x88,0xbc,0x82,0xb8,0x0,0x1e,0x98,0xa6,0xa3,0xf4}, +{0x8e,0xf4,0x8f,0x33,0xa9,0xa3,0x63,0x15,0xaa,0x5f,0x56,0x24,0xd5,0xb7,0xf9,0x89,0xb6,0xf1,0xed,0x20,0x7c,0x5a,0xe0,0xfd,0x36,0xca,0xe9,0x5a,0x6,0x42,0x2c,0x36}, +{0xce,0x29,0x35,0x43,0x4e,0xfe,0x98,0x3d,0x53,0x3a,0xf9,0x74,0x73,0x9a,0x4b,0xa7,0xd0,0xf5,0x1f,0x59,0x6f,0x4e,0x81,0x86,0xe,0x9d,0xad,0x81,0xaf,0xd8,0x5a,0x9f}, +{0xa7,0x5,0x6,0x67,0xee,0x34,0x62,0x6a,0x8b,0xb,0x28,0xbe,0x6e,0xb9,0x17,0x27,0x47,0x74,0x7,0x26,0xc6,0x80,0x10,0x3f,0xe0,0xa0,0x7e,0x6f,0xc6,0x7e,0x48,0x7b}, +{0xd,0x55,0xa,0xa5,0x4a,0xf8,0xa4,0xc0,0x91,0xe3,0xe7,0x9f,0x97,0x8e,0xf1,0x9e,0x86,0x76,0x72,0x81,0x50,0x60,0x8d,0xd4,0x7e,0x9e,0x5a,0x41,0xf3,0xe5,0xb0,0x62}, +{0xfc,0x9f,0x1f,0xec,0x40,0x54,0x20,0x7a,0xe3,0xe4,0x1a,0x0,0xce,0xf4,0xc9,0x84,0x4f,0xd7,0x94,0xf5,0x9d,0xfa,0x95,0xd8,0x55,0x2e,0x7e,0x11,0x24,0xc3,0x54,0xa5}, +{0x5b,0xdf,0x72,0x28,0xbd,0xfe,0x6e,0x28,0x78,0xf5,0x7f,0xe2,0xf,0xa5,0xc4,0xb2,0x5,0x89,0x7c,0xef,0xee,0x49,0xd3,0x2e,0x44,0x7e,0x93,0x85,0xeb,0x28,0x59,0x7f}, +{0x70,0x5f,0x69,0x37,0xb3,0x24,0x31,0x4a,0x5e,0x86,0x28,0xf1,0x1d,0xd6,0xe4,0x65,0xc7,0x1b,0x77,0x4,0x51,0xb9,0x20,0xe7,0x74,0xfe,0x43,0xe8,0x23,0xd4,0x87,0x8a}, +{0x7d,0x29,0xe8,0xa3,0x92,0x76,0x94,0xf2,0xdd,0xcb,0x7a,0x9,0x9b,0x30,0xd9,0xc1,0x1d,0x1b,0x30,0xfb,0x5b,0xdc,0x1b,0xe0,0xda,0x24,0x49,0x4f,0xf2,0x9c,0x82,0xbf}, +{0xa4,0xe7,0xba,0x31,0xb4,0x70,0xbf,0xff,0xd,0x32,0x44,0x5,0xde,0xf8,0xbc,0x48,0x3b,0xae,0xfc,0x32,0x53,0xbb,0xd3,0x39,0x45,0x9f,0xc3,0xc1,0xe0,0x29,0x8b,0xa0}, +{0xe5,0xc9,0x5,0xfd,0xf7,0xae,0x9,0xf,0x94,0x70,0x34,0x12,0x42,0x90,0xf1,0x34,0xa2,0x71,0xb7,0x1,0xe3,0x44,0xed,0x95,0xe9,0x3b,0x8e,0x36,0x4f,0x2f,0x98,0x4a}, +{0x88,0x40,0x1d,0x63,0xa0,0x6c,0xf6,0x15,0x47,0xc1,0x44,0x4b,0x87,0x52,0xaf,0xff,0x7e,0xbb,0x4a,0xf1,0xe2,0xa,0xc6,0x30,0x46,0x70,0xb6,0xc5,0xcc,0x6e,0x8c,0xe6}, +{0xa4,0xd5,0xa4,0x56,0xbd,0x4f,0xca,0x0,0xda,0x9d,0x84,0x4b,0xc8,0x3e,0x18,0xae,0x73,0x57,0xce,0x45,0x30,0x64,0xd1,0xad,0xe8,0xa6,0xce,0x68,0x14,0x5c,0x25,0x67}, +{0xa3,0xda,0x8c,0xf2,0xcb,0xe,0xe1,0x16,0x33,0xe9,0x6,0x58,0x9a,0x94,0x99,0x9a,0x1f,0x60,0xb2,0x20,0xc2,0x6f,0x84,0x7b,0xd1,0xce,0xac,0x7f,0xa0,0xd1,0x85,0x18}, +{0x32,0x59,0x5b,0xa1,0x8d,0xdd,0x19,0xd3,0x50,0x9a,0x1c,0xc0,0xaa,0xa5,0xb4,0x46,0x9f,0x3d,0x63,0x67,0xe4,0x4,0x6b,0xba,0xf6,0xca,0x19,0xab,0xb,0x56,0xee,0x7e}, +{0x1f,0xb1,0x79,0xea,0xa9,0x28,0x21,0x74,0xe9,0xbd,0xf7,0x35,0x3b,0x36,0x51,0xee,0x1d,0x57,0xac,0x5a,0x75,0x50,0xd3,0x76,0x3a,0x46,0xc2,0xfe,0xa3,0x7d,0x70,0x1}, +{0xf7,0x35,0xc1,0xaf,0x98,0xa4,0xd8,0x42,0x78,0xed,0xec,0x20,0x9e,0x6b,0x67,0x79,0x41,0x83,0x63,0x15,0xea,0x3a,0xdb,0xa8,0xfa,0xc3,0x3b,0x4d,0x32,0x83,0x2c,0x83}, +{0xa7,0x40,0x3b,0x1f,0x1c,0x27,0x47,0xf3,0x59,0x40,0xf0,0x34,0xb7,0x2d,0x76,0x9a,0xe7,0x3e,0x4e,0x6c,0xd2,0x21,0x4f,0xfd,0xb8,0xfd,0x8d,0x39,0xdc,0x57,0x59,0xef}, +{0x8d,0x9b,0xc,0x49,0x2b,0x49,0xeb,0xda,0x5b,0xa2,0xd7,0x49,0x68,0xf3,0x70,0xd,0x7d,0x3b,0xae,0xd0,0x7a,0x8d,0x55,0x84,0xf5,0xa5,0xe9,0xf0,0xe4,0xf8,0x8e,0x65}, +{0xa0,0xb8,0xa2,0xf4,0x36,0x10,0x3b,0x53,0xc,0xa8,0x7,0x9e,0x75,0x3e,0xec,0x5a,0x91,0x68,0x94,0x92,0x56,0xe8,0x88,0x4f,0x5b,0xb0,0x5c,0x55,0xf8,0xba,0xbc,0x4c}, +{0xe3,0xbb,0x3b,0x99,0xf3,0x87,0x94,0x7b,0x75,0xda,0xf4,0xd6,0x72,0x6b,0x1c,0x5d,0x64,0xae,0xac,0x28,0xdc,0x34,0xb3,0x6d,0x6c,0x34,0xa5,0x50,0xb8,0x28,0xdb,0x71}, +{0xf8,0x61,0xe2,0xf2,0x10,0x8d,0x51,0x2a,0xe3,0xdb,0x64,0x33,0x59,0xdd,0x75,0xfc,0x1c,0xac,0xbc,0xf1,0x43,0xce,0x3f,0xa2,0x67,0xbb,0xd1,0x3c,0x2,0xe8,0x43,0xb0}, +{0x33,0xa,0x5b,0xca,0x88,0x29,0xa1,0x75,0x7f,0x34,0x19,0x4d,0xb4,0x16,0x53,0x5c,0x92,0x3b,0x94,0xc3,0xe,0x79,0x4d,0x1e,0x79,0x74,0x75,0xd7,0xb6,0xee,0xaf,0x3f}, +{0xea,0xa8,0xd4,0xf7,0xbe,0x1a,0x39,0x21,0x5c,0xf4,0x7e,0x9,0x4c,0x23,0x27,0x51,0x26,0xa3,0x24,0x53,0xba,0x32,0x3c,0xd2,0x44,0xa3,0x17,0x4a,0x6d,0xa6,0xd5,0xad}, +{0xb5,0x1d,0x3e,0xa6,0xaf,0xf2,0xc9,0x8,0x83,0x59,0x3d,0x98,0x91,0x6b,0x3c,0x56,0x4c,0xf8,0x7c,0xa1,0x72,0x86,0x60,0x4d,0x46,0xe2,0x3e,0xcc,0x8,0x6e,0xc7,0xf6}, +{0x2f,0x98,0x33,0xb3,0xb1,0xbc,0x76,0x5e,0x2b,0xd6,0x66,0xa5,0xef,0xc4,0xe6,0x2a,0x6,0xf4,0xb6,0xe8,0xbe,0xc1,0xd4,0x36,0x74,0xee,0x82,0x15,0xbc,0xef,0x21,0x63}, +{0xfd,0xc1,0x4e,0xd,0xf4,0x53,0xc9,0x69,0xa7,0x7d,0x5a,0xc4,0x6,0x58,0x58,0x26,0x7e,0xc1,0x14,0x16,0x6,0xe0,0xfa,0x16,0x7e,0x90,0xaf,0x3d,0x28,0x63,0x9d,0x3f}, +{0xd2,0xc9,0xf2,0xe3,0x0,0x9b,0xd2,0xc,0x5f,0xaa,0xce,0x30,0xb7,0xd4,0xc,0x30,0x74,0x2a,0x51,0x16,0xf2,0xe0,0x32,0x98,0xd,0xeb,0x30,0xd8,0xe3,0xce,0xf8,0x9a}, +{0x4b,0xc5,0x9e,0x7b,0xb5,0xf1,0x79,0x92,0xff,0x51,0xe6,0x6e,0x4,0x86,0x68,0xd3,0x9b,0x23,0x4d,0x57,0xe6,0x96,0x67,0x31,0xcc,0xe6,0xa6,0xf3,0x17,0xa,0x75,0x5}, +{0xb1,0x76,0x81,0xd9,0x13,0x32,0x6c,0xce,0x3c,0x17,0x52,0x84,0xf8,0x5,0xa2,0x62,0xf4,0x2b,0xcb,0xb3,0x78,0x47,0x15,0x47,0xff,0x46,0x54,0x82,0x23,0x93,0x6a,0x48}, +{0x38,0xdf,0x58,0x7,0x4e,0x5e,0x65,0x65,0xf2,0xfc,0x7c,0x89,0xfc,0x86,0x50,0x8e,0x31,0x70,0x2e,0x44,0xd0,0xb,0xca,0x86,0xf0,0x40,0x9,0xa2,0x30,0x78,0x47,0x4e}, +{0x65,0xa0,0xee,0x39,0xd1,0xf7,0x38,0x83,0xf7,0x5e,0xe9,0x37,0xe4,0x2c,0x3a,0xbd,0x21,0x97,0xb2,0x26,0x1,0x13,0xf8,0x6f,0xa3,0x44,0xed,0xd1,0xef,0x9f,0xde,0xe7}, +{0x8b,0xa0,0xdf,0x15,0x76,0x25,0x92,0xd9,0x3c,0x85,0xf7,0xf6,0x12,0xdc,0x42,0xbe,0xd8,0xa7,0xec,0x7c,0xab,0x27,0xb0,0x7e,0x53,0x8d,0x7d,0xda,0xaa,0x3e,0xa8,0xde}, +{0xaa,0x25,0xce,0x93,0xbd,0x2,0x69,0xd8,0x5a,0xf6,0x43,0xfd,0x1a,0x73,0x8,0xf9,0xc0,0x5f,0xef,0xda,0x17,0x4a,0x19,0xa5,0x97,0x4d,0x66,0x33,0x4c,0xfd,0x21,0x6a}, +{0x35,0xb4,0x98,0x31,0xdb,0x41,0x15,0x70,0xea,0x1e,0xf,0xbb,0xed,0xcd,0x54,0x9b,0x9a,0xd0,0x63,0xa1,0x51,0x97,0x40,0x72,0xf6,0x75,0x9d,0xbf,0x91,0x47,0x6f,0xe2}}; + + +static void E8(hashState *state); /*The bijective function E8, in bitslice form*/ +static void F8(hashState *state); /*The compression function F8 */ + +/*The API functions*/ +static HashReturn Init(hashState *state, int hashbitlen); +static HashReturn Update(hashState *state, const BitSequence *data, DataLength databitlen); +static HashReturn Final(hashState *state, BitSequence *hashval); +HashReturn jh_hash(int hashbitlen, const BitSequence *data,DataLength databitlen, BitSequence *hashval); + +/*swapping bit 2i with bit 2i+1 of 64-bit x*/ +#define SWAP1(x) (x) = ((((x) & 0x5555555555555555ULL) << 1) | (((x) & 0xaaaaaaaaaaaaaaaaULL) >> 1)); +/*swapping bits 4i||4i+1 with bits 4i+2||4i+3 of 64-bit x*/ +#define SWAP2(x) (x) = ((((x) & 0x3333333333333333ULL) << 2) | (((x) & 0xccccccccccccccccULL) >> 2)); +/*swapping bits 8i||8i+1||8i+2||8i+3 with bits 8i+4||8i+5||8i+6||8i+7 of 64-bit x*/ +#define SWAP4(x) (x) = ((((x) & 0x0f0f0f0f0f0f0f0fULL) << 4) | (((x) & 0xf0f0f0f0f0f0f0f0ULL) >> 4)); +/*swapping bits 16i||16i+1||......||16i+7 with bits 16i+8||16i+9||......||16i+15 of 64-bit x*/ +#define SWAP8(x) (x) = ((((x) & 0x00ff00ff00ff00ffULL) << 8) | (((x) & 0xff00ff00ff00ff00ULL) >> 8)); +/*swapping bits 32i||32i+1||......||32i+15 with bits 32i+16||32i+17||......||32i+31 of 64-bit x*/ +#define SWAP16(x) (x) = ((((x) & 0x0000ffff0000ffffULL) << 16) | (((x) & 0xffff0000ffff0000ULL) >> 16)); +/*swapping bits 64i||64i+1||......||64i+31 with bits 64i+32||64i+33||......||64i+63 of 64-bit x*/ +#define SWAP32(x) (x) = (((x) << 32) | ((x) >> 32)); + +/*The MDS transform*/ +#define L(m0,m1,m2,m3,m4,m5,m6,m7) \ + (m4) ^= (m1); \ + (m5) ^= (m2); \ + (m6) ^= (m0) ^ (m3); \ + (m7) ^= (m0); \ + (m0) ^= (m5); \ + (m1) ^= (m6); \ + (m2) ^= (m4) ^ (m7); \ + (m3) ^= (m4); + +/*Two Sboxes are computed in parallel, each Sbox implements S0 and S1, selected by a constant bit*/ +/*The reason to compute two Sboxes in parallel is to try to fully utilize the parallel processing power*/ +#define SS(m0,m1,m2,m3,m4,m5,m6,m7,cc0,cc1) \ + m3 = ~(m3); \ + m7 = ~(m7); \ + m0 ^= ((~(m2)) & (cc0)); \ + m4 ^= ((~(m6)) & (cc1)); \ + temp0 = (cc0) ^ ((m0) & (m1));\ + temp1 = (cc1) ^ ((m4) & (m5));\ + m0 ^= ((m2) & (m3)); \ + m4 ^= ((m6) & (m7)); \ + m3 ^= ((~(m1)) & (m2)); \ + m7 ^= ((~(m5)) & (m6)); \ + m1 ^= ((m0) & (m2)); \ + m5 ^= ((m4) & (m6)); \ + m2 ^= ((m0) & (~(m3))); \ + m6 ^= ((m4) & (~(m7))); \ + m0 ^= ((m1) | (m3)); \ + m4 ^= ((m5) | (m7)); \ + m3 ^= ((m1) & (m2)); \ + m7 ^= ((m5) & (m6)); \ + m1 ^= (temp0 & (m0)); \ + m5 ^= (temp1 & (m4)); \ + m2 ^= temp0; \ + m6 ^= temp1; + +/*The bijective function E8, in bitslice form*/ +static void E8(hashState *state) +{ + uint64 i,roundnumber,temp0,temp1; + + for (roundnumber = 0; roundnumber < 42; roundnumber = roundnumber+7) { + /*round 7*roundnumber+0: Sbox, MDS and Swapping layers*/ + for (i = 0; i < 2; i++) { + SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],((uint64*)E8_bitslice_roundconstant[roundnumber+0])[i],((uint64*)E8_bitslice_roundconstant[roundnumber+0])[i+2] ); + L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]); + SWAP1(state->x[1][i]); SWAP1(state->x[3][i]); SWAP1(state->x[5][i]); SWAP1(state->x[7][i]); + } + + /*round 7*roundnumber+1: Sbox, MDS and Swapping layers*/ + for (i = 0; i < 2; i++) { + SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],((uint64*)E8_bitslice_roundconstant[roundnumber+1])[i],((uint64*)E8_bitslice_roundconstant[roundnumber+1])[i+2] ); + L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]); + SWAP2(state->x[1][i]); SWAP2(state->x[3][i]); SWAP2(state->x[5][i]); SWAP2(state->x[7][i]); + } + + /*round 7*roundnumber+2: Sbox, MDS and Swapping layers*/ + for (i = 0; i < 2; i++) { + SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],((uint64*)E8_bitslice_roundconstant[roundnumber+2])[i],((uint64*)E8_bitslice_roundconstant[roundnumber+2])[i+2] ); + L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]); + SWAP4(state->x[1][i]); SWAP4(state->x[3][i]); SWAP4(state->x[5][i]); SWAP4(state->x[7][i]); + } + + /*round 7*roundnumber+3: Sbox, MDS and Swapping layers*/ + for (i = 0; i < 2; i++) { + SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],((uint64*)E8_bitslice_roundconstant[roundnumber+3])[i],((uint64*)E8_bitslice_roundconstant[roundnumber+3])[i+2] ); + L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]); + SWAP8(state->x[1][i]); SWAP8(state->x[3][i]); SWAP8(state->x[5][i]); SWAP8(state->x[7][i]); + } + + /*round 7*roundnumber+4: Sbox, MDS and Swapping layers*/ + for (i = 0; i < 2; i++) { + SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],((uint64*)E8_bitslice_roundconstant[roundnumber+4])[i],((uint64*)E8_bitslice_roundconstant[roundnumber+4])[i+2] ); + L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]); + SWAP16(state->x[1][i]); SWAP16(state->x[3][i]); SWAP16(state->x[5][i]); SWAP16(state->x[7][i]); + } + + /*round 7*roundnumber+5: Sbox, MDS and Swapping layers*/ + for (i = 0; i < 2; i++) { + SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],((uint64*)E8_bitslice_roundconstant[roundnumber+5])[i],((uint64*)E8_bitslice_roundconstant[roundnumber+5])[i+2] ); + L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]); + SWAP32(state->x[1][i]); SWAP32(state->x[3][i]); SWAP32(state->x[5][i]); SWAP32(state->x[7][i]); + } + + /*round 7*roundnumber+6: Sbox and MDS layers*/ + for (i = 0; i < 2; i++) { + SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],((uint64*)E8_bitslice_roundconstant[roundnumber+6])[i],((uint64*)E8_bitslice_roundconstant[roundnumber+6])[i+2] ); + L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]); + } + /*round 7*roundnumber+6: swapping layer*/ + for (i = 1; i < 8; i = i+2) { + temp0 = state->x[i][0]; state->x[i][0] = state->x[i][1]; state->x[i][1] = temp0; + } + } + +} + +/*The compression function F8 */ +static void F8(hashState *state) +{ + uint64 i; + + /*xor the 512-bit message with the fist half of the 1024-bit hash state*/ + for (i = 0; i < 8; i++) state->x[i >> 1][i & 1] ^= ((uint64*)state->buffer)[i]; + + /*the bijective function E8 */ + E8(state); + + /*xor the 512-bit message with the second half of the 1024-bit hash state*/ + for (i = 0; i < 8; i++) state->x[(8+i) >> 1][(8+i) & 1] ^= ((uint64*)state->buffer)[i]; +} + +/*before hashing a message, initialize the hash state as H0 */ +static HashReturn Init(hashState *state, int hashbitlen) +{ + state->databitlen = 0; + state->datasize_in_buffer = 0; + + /*initialize the initial hash value of JH*/ + state->hashbitlen = hashbitlen; + + /*load the intital hash value into state*/ + switch (hashbitlen) + { + case 224: memcpy(state->x,JH224_H0,128); break; + case 256: memcpy(state->x,JH256_H0,128); break; + case 384: memcpy(state->x,JH384_H0,128); break; + case 512: memcpy(state->x,JH512_H0,128); break; + } + + return(SUCCESS); +} + + +/*hash each 512-bit message block, except the last partial block*/ +static HashReturn Update(hashState *state, const BitSequence *data, DataLength databitlen) +{ + DataLength index; /*the starting address of the data to be compressed*/ + + state->databitlen += databitlen; + index = 0; + + /*if there is remaining data in the buffer, fill it to a full message block first*/ + /*we assume that the size of the data in the buffer is the multiple of 8 bits if it is not at the end of a message*/ + + /*There is data in the buffer, but the incoming data is insufficient for a full block*/ + if ( (state->datasize_in_buffer > 0 ) && (( state->datasize_in_buffer + databitlen) < 512) ) { + if ( (databitlen & 7) == 0 ) { + memcpy(state->buffer + (state->datasize_in_buffer >> 3), data, 64-(state->datasize_in_buffer >> 3)) ; + } + else memcpy(state->buffer + (state->datasize_in_buffer >> 3), data, 64-(state->datasize_in_buffer >> 3)+1) ; + state->datasize_in_buffer += databitlen; + databitlen = 0; + } + + /*There is data in the buffer, and the incoming data is sufficient for a full block*/ + if ( (state->datasize_in_buffer > 0 ) && (( state->datasize_in_buffer + databitlen) >= 512) ) { + memcpy( state->buffer + (state->datasize_in_buffer >> 3), data, 64-(state->datasize_in_buffer >> 3) ) ; + index = 64-(state->datasize_in_buffer >> 3); + databitlen = databitlen - (512 - state->datasize_in_buffer); + F8(state); + state->datasize_in_buffer = 0; + } + + /*hash the remaining full message blocks*/ + for ( ; databitlen >= 512; index = index+64, databitlen = databitlen - 512) { + memcpy(state->buffer, data+index, 64); + F8(state); + } + + /*store the partial block into buffer, assume that -- if part of the last byte is not part of the message, then that part consists of 0 bits*/ + if ( databitlen > 0) { + if ((databitlen & 7) == 0) + memcpy(state->buffer, data+index, (databitlen & 0x1ff) >> 3); + else + memcpy(state->buffer, data+index, ((databitlen & 0x1ff) >> 3)+1); + state->datasize_in_buffer = databitlen; + } + + return(SUCCESS); +} + +/*pad the message, process the padded block(s), truncate the hash value H to obtain the message digest*/ +static HashReturn Final(hashState *state, BitSequence *hashval) +{ + unsigned int i; + + if ( (state->databitlen & 0x1ff) == 0 ) { + /*pad the message when databitlen is multiple of 512 bits, then process the padded block*/ + memset(state->buffer, 0, 64); + state->buffer[0] = 0x80; + state->buffer[63] = state->databitlen & 0xff; + state->buffer[62] = (state->databitlen >> 8) & 0xff; + state->buffer[61] = (state->databitlen >> 16) & 0xff; + state->buffer[60] = (state->databitlen >> 24) & 0xff; + state->buffer[59] = (state->databitlen >> 32) & 0xff; + state->buffer[58] = (state->databitlen >> 40) & 0xff; + state->buffer[57] = (state->databitlen >> 48) & 0xff; + state->buffer[56] = (state->databitlen >> 56) & 0xff; + F8(state); + } + else { + /*set the rest of the bytes in the buffer to 0*/ + if ( (state->datasize_in_buffer & 7) == 0) + for (i = (state->databitlen & 0x1ff) >> 3; i < 64; i++) state->buffer[i] = 0; + else + for (i = ((state->databitlen & 0x1ff) >> 3)+1; i < 64; i++) state->buffer[i] = 0; + + /*pad and process the partial block when databitlen is not multiple of 512 bits, then hash the padded blocks*/ + state->buffer[((state->databitlen & 0x1ff) >> 3)] |= 1 << (7- (state->databitlen & 7)); + + F8(state); + memset(state->buffer, 0, 64); + state->buffer[63] = state->databitlen & 0xff; + state->buffer[62] = (state->databitlen >> 8) & 0xff; + state->buffer[61] = (state->databitlen >> 16) & 0xff; + state->buffer[60] = (state->databitlen >> 24) & 0xff; + state->buffer[59] = (state->databitlen >> 32) & 0xff; + state->buffer[58] = (state->databitlen >> 40) & 0xff; + state->buffer[57] = (state->databitlen >> 48) & 0xff; + state->buffer[56] = (state->databitlen >> 56) & 0xff; + F8(state); + } + + /*truncating the final hash value to generate the message digest*/ + switch(state->hashbitlen) { + case 224: memcpy(hashval,(unsigned char*)state->x+64+36,28); break; + case 256: memcpy(hashval,(unsigned char*)state->x+64+32,32); break; + case 384: memcpy(hashval,(unsigned char*)state->x+64+16,48); break; + case 512: memcpy(hashval,(unsigned char*)state->x+64,64); break; + } + + return(SUCCESS); +} + +/* hash a message, + three inputs: message digest size in bits (hashbitlen); message (data); message length in bits (databitlen) + one output: message digest (hashval) +*/ +HashReturn jh_hash(int hashbitlen, const BitSequence *data,DataLength databitlen, BitSequence *hashval) +{ + hashState state; + + if ( hashbitlen == 224 || hashbitlen == 256 || hashbitlen == 384 || hashbitlen == 512 ) { + Init(&state, hashbitlen); + Update(&state, data, databitlen); + Final(&state, hashval); + return SUCCESS; + } + else + return(BAD_HASHLEN); +} diff --git a/xmrstak/backend/cpu/crypto/c_jh.h b/xmrstak/backend/cpu/crypto/c_jh.h new file mode 100644 index 0000000..d10d40f --- /dev/null +++ b/xmrstak/backend/cpu/crypto/c_jh.h @@ -0,0 +1,19 @@ +/*This program gives the 64-bit optimized bitslice implementation of JH using ANSI C + + -------------------------------- + Performance + + Microprocessor: Intel CORE 2 processor (Core 2 Duo Mobile T6600 2.2GHz) + Operating System: 64-bit Ubuntu 10.04 (Linux kernel 2.6.32-22-generic) + Speed for long message: + 1) 45.8 cycles/byte compiler: Intel C++ Compiler 11.1 compilation option: icc -O2 + 2) 56.8 cycles/byte compiler: gcc 4.4.3 compilation option: gcc -O3 + + -------------------------------- + Last Modified: January 16, 2011 +*/ +#pragma once + +#include "hash.h" + +HashReturn jh_hash(int hashbitlen, const BitSequence *data, DataLength databitlen, BitSequence *hashval); diff --git a/xmrstak/backend/cpu/crypto/c_keccak.c b/xmrstak/backend/cpu/crypto/c_keccak.c new file mode 100644 index 0000000..eadb85b --- /dev/null +++ b/xmrstak/backend/cpu/crypto/c_keccak.c @@ -0,0 +1,176 @@ +// keccak.c +// 19-Nov-11 Markku-Juhani O. Saarinen <mjos@iki.fi> +// A baseline Keccak (3rd round) implementation. + +#include <stdint.h> +#include <memory.h> + +#define HASH_DATA_AREA 136 +#define KECCAK_ROUNDS 24 + +#ifndef ROTL64 +#define ROTL64(x, y) (((x) << (y)) | ((x) >> (64 - (y)))) +#endif + +const uint64_t keccakf_rndc[24] = +{ + 0x0000000000000001, 0x0000000000008082, 0x800000000000808a, + 0x8000000080008000, 0x000000000000808b, 0x0000000080000001, + 0x8000000080008081, 0x8000000000008009, 0x000000000000008a, + 0x0000000000000088, 0x0000000080008009, 0x000000008000000a, + 0x000000008000808b, 0x800000000000008b, 0x8000000000008089, + 0x8000000000008003, 0x8000000000008002, 0x8000000000000080, + 0x000000000000800a, 0x800000008000000a, 0x8000000080008081, + 0x8000000000008080, 0x0000000080000001, 0x8000000080008008 +}; + +// update the state with given number of rounds + +void keccakf(uint64_t st[25], int rounds) +{ + int i, j, round; + uint64_t t, bc[5]; + + for (round = 0; round < rounds; ++round) { + + // Theta + bc[0] = st[0] ^ st[5] ^ st[10] ^ st[15] ^ st[20]; + bc[1] = st[1] ^ st[6] ^ st[11] ^ st[16] ^ st[21]; + bc[2] = st[2] ^ st[7] ^ st[12] ^ st[17] ^ st[22]; + bc[3] = st[3] ^ st[8] ^ st[13] ^ st[18] ^ st[23]; + bc[4] = st[4] ^ st[9] ^ st[14] ^ st[19] ^ st[24]; + + for (i = 0; i < 5; ++i) { + t = bc[(i + 4) % 5] ^ ROTL64(bc[(i + 1) % 5], 1); + st[i ] ^= t; + st[i + 5] ^= t; + st[i + 10] ^= t; + st[i + 15] ^= t; + st[i + 20] ^= t; + } + + // Rho Pi + t = st[1]; + st[ 1] = ROTL64(st[ 6], 44); + st[ 6] = ROTL64(st[ 9], 20); + st[ 9] = ROTL64(st[22], 61); + st[22] = ROTL64(st[14], 39); + st[14] = ROTL64(st[20], 18); + st[20] = ROTL64(st[ 2], 62); + st[ 2] = ROTL64(st[12], 43); + st[12] = ROTL64(st[13], 25); + st[13] = ROTL64(st[19], 8); + st[19] = ROTL64(st[23], 56); + st[23] = ROTL64(st[15], 41); + st[15] = ROTL64(st[ 4], 27); + st[ 4] = ROTL64(st[24], 14); + st[24] = ROTL64(st[21], 2); + st[21] = ROTL64(st[ 8], 55); + st[ 8] = ROTL64(st[16], 45); + st[16] = ROTL64(st[ 5], 36); + st[ 5] = ROTL64(st[ 3], 28); + st[ 3] = ROTL64(st[18], 21); + st[18] = ROTL64(st[17], 15); + st[17] = ROTL64(st[11], 10); + st[11] = ROTL64(st[ 7], 6); + st[ 7] = ROTL64(st[10], 3); + st[10] = ROTL64(t, 1); + + // Chi + // unrolled loop, where only last iteration is different + j = 0; + bc[0] = st[j ]; + bc[1] = st[j + 1]; + + st[j ] ^= (~st[j + 1]) & st[j + 2]; + st[j + 1] ^= (~st[j + 2]) & st[j + 3]; + st[j + 2] ^= (~st[j + 3]) & st[j + 4]; + st[j + 3] ^= (~st[j + 4]) & bc[0]; + st[j + 4] ^= (~bc[0]) & bc[1]; + + j = 5; + bc[0] = st[j ]; + bc[1] = st[j + 1]; + + st[j ] ^= (~st[j + 1]) & st[j + 2]; + st[j + 1] ^= (~st[j + 2]) & st[j + 3]; + st[j + 2] ^= (~st[j + 3]) & st[j + 4]; + st[j + 3] ^= (~st[j + 4]) & bc[0]; + st[j + 4] ^= (~bc[0]) & bc[1]; + + j = 10; + bc[0] = st[j ]; + bc[1] = st[j + 1]; + + st[j ] ^= (~st[j + 1]) & st[j + 2]; + st[j + 1] ^= (~st[j + 2]) & st[j + 3]; + st[j + 2] ^= (~st[j + 3]) & st[j + 4]; + st[j + 3] ^= (~st[j + 4]) & bc[0]; + st[j + 4] ^= (~bc[0]) & bc[1]; + + j = 15; + bc[0] = st[j ]; + bc[1] = st[j + 1]; + + st[j ] ^= (~st[j + 1]) & st[j + 2]; + st[j + 1] ^= (~st[j + 2]) & st[j + 3]; + st[j + 2] ^= (~st[j + 3]) & st[j + 4]; + st[j + 3] ^= (~st[j + 4]) & bc[0]; + st[j + 4] ^= (~bc[0]) & bc[1]; + + j = 20; + bc[0] = st[j ]; + bc[1] = st[j + 1]; + bc[2] = st[j + 2]; + bc[3] = st[j + 3]; + bc[4] = st[j + 4]; + + st[j ] ^= (~bc[1]) & bc[2]; + st[j + 1] ^= (~bc[2]) & bc[3]; + st[j + 2] ^= (~bc[3]) & bc[4]; + st[j + 3] ^= (~bc[4]) & bc[0]; + st[j + 4] ^= (~bc[0]) & bc[1]; + + // Iota + st[0] ^= keccakf_rndc[round]; + } +} + +// compute a keccak hash (md) of given byte length from "in" +typedef uint64_t state_t[25]; + +void keccak(const uint8_t *in, int inlen, uint8_t *md, int mdlen) +{ + state_t st; + uint8_t temp[144]; + int i, rsiz, rsizw; + + rsiz = sizeof(state_t) == mdlen ? HASH_DATA_AREA : 200 - 2 * mdlen; + rsizw = rsiz / 8; + + memset(st, 0, sizeof(st)); + + for ( ; inlen >= rsiz; inlen -= rsiz, in += rsiz) { + for (i = 0; i < rsizw; i++) + st[i] ^= ((uint64_t *) in)[i]; + keccakf(st, KECCAK_ROUNDS); + } + + // last block and padding + memcpy(temp, in, inlen); + temp[inlen++] = 1; + memset(temp + inlen, 0, rsiz - inlen); + temp[rsiz - 1] |= 0x80; + + for (i = 0; i < rsizw; i++) + st[i] ^= ((uint64_t *) temp)[i]; + + keccakf(st, KECCAK_ROUNDS); + + memcpy(md, st, mdlen); +} + +void keccak1600(const uint8_t *in, int inlen, uint8_t *md) +{ + keccak(in, inlen, md, sizeof(state_t)); +}
\ No newline at end of file diff --git a/xmrstak/backend/cpu/crypto/c_keccak.h b/xmrstak/backend/cpu/crypto/c_keccak.h new file mode 100644 index 0000000..4f7f857 --- /dev/null +++ b/xmrstak/backend/cpu/crypto/c_keccak.h @@ -0,0 +1,26 @@ +// keccak.h +// 19-Nov-11 Markku-Juhani O. Saarinen <mjos@iki.fi> + +#ifndef KECCAK_H +#define KECCAK_H + +#include <stdint.h> +#include <string.h> + +#ifndef KECCAK_ROUNDS +#define KECCAK_ROUNDS 24 +#endif + +#ifndef ROTL64 +#define ROTL64(x, y) (((x) << (y)) | ((x) >> (64 - (y)))) +#endif + +// compute a keccak hash (md) of given byte length from "in" +int keccak(const uint8_t *in, int inlen, uint8_t *md, int mdlen); + +// update the state +void keccakf(uint64_t st[25], int norounds); + +void keccak1600(const uint8_t *in, int inlen, uint8_t *md); + +#endif diff --git a/xmrstak/backend/cpu/crypto/c_skein.c b/xmrstak/backend/cpu/crypto/c_skein.c new file mode 100644 index 0000000..2453713 --- /dev/null +++ b/xmrstak/backend/cpu/crypto/c_skein.c @@ -0,0 +1,2036 @@ +/*********************************************************************** +** +** Implementation of the Skein hash function. +** +** Source code author: Doug Whiting, 2008. +** +** This algorithm and source code is released to the public domain. +** +************************************************************************/ + +#define SKEIN_PORT_CODE /* instantiate any code in skein_port.h */ + +#include <stddef.h> /* get size_t definition */ +#include <string.h> /* get the memcpy/memset functions */ +#include "c_skein.h" /* get the Skein API definitions */ + +#define DISABLE_UNUSED 0 + +#ifndef SKEIN_256_NIST_MAX_HASHBITS +#define SKEIN_256_NIST_MAX_HASHBITS (0) +#endif + +#ifndef SKEIN_512_NIST_MAX_HASHBITS +#define SKEIN_512_NIST_MAX_HASHBITS (512) +#endif + +#define SKEIN_MODIFIER_WORDS ( 2) /* number of modifier (tweak) words */ + +#define SKEIN_256_STATE_WORDS ( 4) +#define SKEIN_512_STATE_WORDS ( 8) +#define SKEIN1024_STATE_WORDS (16) +#define SKEIN_MAX_STATE_WORDS (16) + +#define SKEIN_256_STATE_BYTES ( 8*SKEIN_256_STATE_WORDS) +#define SKEIN_512_STATE_BYTES ( 8*SKEIN_512_STATE_WORDS) +#define SKEIN1024_STATE_BYTES ( 8*SKEIN1024_STATE_WORDS) + +#define SKEIN_256_STATE_BITS (64*SKEIN_256_STATE_WORDS) +#define SKEIN_512_STATE_BITS (64*SKEIN_512_STATE_WORDS) +#define SKEIN1024_STATE_BITS (64*SKEIN1024_STATE_WORDS) + +#define SKEIN_256_BLOCK_BYTES ( 8*SKEIN_256_STATE_WORDS) +#define SKEIN_512_BLOCK_BYTES ( 8*SKEIN_512_STATE_WORDS) +#define SKEIN1024_BLOCK_BYTES ( 8*SKEIN1024_STATE_WORDS) + +#define SKEIN_RND_SPECIAL (1000u) +#define SKEIN_RND_KEY_INITIAL (SKEIN_RND_SPECIAL+0u) +#define SKEIN_RND_KEY_INJECT (SKEIN_RND_SPECIAL+1u) +#define SKEIN_RND_FEED_FWD (SKEIN_RND_SPECIAL+2u) + +typedef struct +{ + size_t hashBitLen; /* size of hash result, in bits */ + size_t bCnt; /* current byte count in buffer b[] */ + u64b_t T[SKEIN_MODIFIER_WORDS]; /* tweak words: T[0]=byte cnt, T[1]=flags */ +} Skein_Ctxt_Hdr_t; + +typedef struct /* 256-bit Skein hash context structure */ +{ + Skein_Ctxt_Hdr_t h; /* common header context variables */ + u64b_t X[SKEIN_256_STATE_WORDS]; /* chaining variables */ + u08b_t b[SKEIN_256_BLOCK_BYTES]; /* partial block buffer (8-byte aligned) */ +} Skein_256_Ctxt_t; + +typedef struct /* 512-bit Skein hash context structure */ +{ + Skein_Ctxt_Hdr_t h; /* common header context variables */ + u64b_t X[SKEIN_512_STATE_WORDS]; /* chaining variables */ + u08b_t b[SKEIN_512_BLOCK_BYTES]; /* partial block buffer (8-byte aligned) */ +} Skein_512_Ctxt_t; + +typedef struct /* 1024-bit Skein hash context structure */ +{ + Skein_Ctxt_Hdr_t h; /* common header context variables */ + u64b_t X[SKEIN1024_STATE_WORDS]; /* chaining variables */ + u08b_t b[SKEIN1024_BLOCK_BYTES]; /* partial block buffer (8-byte aligned) */ +} Skein1024_Ctxt_t; + +/* Skein APIs for (incremental) "straight hashing" */ +#if SKEIN_256_NIST_MAX_HASH_BITS +static int Skein_256_Init (Skein_256_Ctxt_t *ctx, size_t hashBitLen); +#endif +static int Skein_512_Init (Skein_512_Ctxt_t *ctx, size_t hashBitLen); +static int Skein1024_Init (Skein1024_Ctxt_t *ctx, size_t hashBitLen); + +static int Skein_256_Update(Skein_256_Ctxt_t *ctx, const u08b_t *msg, size_t msgByteCnt); +static int Skein_512_Update(Skein_512_Ctxt_t *ctx, const u08b_t *msg, size_t msgByteCnt); +static int Skein1024_Update(Skein1024_Ctxt_t *ctx, const u08b_t *msg, size_t msgByteCnt); + +static int Skein_256_Final (Skein_256_Ctxt_t *ctx, u08b_t * hashVal); +static int Skein_512_Final (Skein_512_Ctxt_t *ctx, u08b_t * hashVal); +static int Skein1024_Final (Skein1024_Ctxt_t *ctx, u08b_t * hashVal); + +/* +** Skein APIs for "extended" initialization: MAC keys, tree hashing. +** After an InitExt() call, just use Update/Final calls as with Init(). +** +** Notes: Same parameters as _Init() calls, plus treeInfo/key/keyBytes. +** When keyBytes == 0 and treeInfo == SKEIN_SEQUENTIAL, +** the results of InitExt() are identical to calling Init(). +** The function Init() may be called once to "precompute" the IV for +** a given hashBitLen value, then by saving a copy of the context +** the IV computation may be avoided in later calls. +** Similarly, the function InitExt() may be called once per MAC key +** to precompute the MAC IV, then a copy of the context saved and +** reused for each new MAC computation. +**/ +#if 0 +static int Skein_256_InitExt(Skein_256_Ctxt_t *ctx, size_t hashBitLen, u64b_t treeInfo, const u08b_t *key, size_t keyBytes); +static int Skein_512_InitExt(Skein_512_Ctxt_t *ctx, size_t hashBitLen, u64b_t treeInfo, const u08b_t *key, size_t keyBytes); +static int Skein1024_InitExt(Skein1024_Ctxt_t *ctx, size_t hashBitLen, u64b_t treeInfo, const u08b_t *key, size_t keyBytes); +#endif + +/* +** Skein APIs for MAC and tree hash: +** Final_Pad: pad, do final block, but no OUTPUT type +** Output: do just the output stage +*/ +#if 0 +static int Skein_256_Final_Pad(Skein_256_Ctxt_t *ctx, u08b_t * hashVal); +static int Skein_512_Final_Pad(Skein_512_Ctxt_t *ctx, u08b_t * hashVal); +static int Skein1024_Final_Pad(Skein1024_Ctxt_t *ctx, u08b_t * hashVal); +#endif + +#ifndef SKEIN_TREE_HASH +#define SKEIN_TREE_HASH (1) +#endif +#if 0 +#if SKEIN_TREE_HASH +static int Skein_256_Output (Skein_256_Ctxt_t *ctx, u08b_t * hashVal); +static int Skein_512_Output (Skein_512_Ctxt_t *ctx, u08b_t * hashVal); +static int Skein1024_Output (Skein1024_Ctxt_t *ctx, u08b_t * hashVal); +#endif +#endif + +/***************************************************************** +** "Internal" Skein definitions +** -- not needed for sequential hashing API, but will be +** helpful for other uses of Skein (e.g., tree hash mode). +** -- included here so that they can be shared between +** reference and optimized code. +******************************************************************/ + +/* tweak word T[1]: bit field starting positions */ +#define SKEIN_T1_BIT(BIT) ((BIT) - 64) /* offset 64 because it's the second word */ + +#define SKEIN_T1_POS_TREE_LVL SKEIN_T1_BIT(112) /* bits 112..118: level in hash tree */ +#define SKEIN_T1_POS_BIT_PAD SKEIN_T1_BIT(119) /* bit 119 : partial final input byte */ +#define SKEIN_T1_POS_BLK_TYPE SKEIN_T1_BIT(120) /* bits 120..125: type field */ +#define SKEIN_T1_POS_FIRST SKEIN_T1_BIT(126) /* bits 126 : first block flag */ +#define SKEIN_T1_POS_FINAL SKEIN_T1_BIT(127) /* bit 127 : final block flag */ + +/* tweak word T[1]: flag bit definition(s) */ +#define SKEIN_T1_FLAG_FIRST (((u64b_t) 1 ) << SKEIN_T1_POS_FIRST) +#define SKEIN_T1_FLAG_FINAL (((u64b_t) 1 ) << SKEIN_T1_POS_FINAL) +#define SKEIN_T1_FLAG_BIT_PAD (((u64b_t) 1 ) << SKEIN_T1_POS_BIT_PAD) + +/* tweak word T[1]: tree level bit field mask */ +#define SKEIN_T1_TREE_LVL_MASK (((u64b_t)0x7F) << SKEIN_T1_POS_TREE_LVL) +#define SKEIN_T1_TREE_LEVEL(n) (((u64b_t) (n)) << SKEIN_T1_POS_TREE_LVL) + +/* tweak word T[1]: block type field */ +#define SKEIN_BLK_TYPE_KEY ( 0) /* key, for MAC and KDF */ +#define SKEIN_BLK_TYPE_CFG ( 4) /* configuration block */ +#define SKEIN_BLK_TYPE_PERS ( 8) /* personalization string */ +#define SKEIN_BLK_TYPE_PK (12) /* public key (for digital signature hashing) */ +#define SKEIN_BLK_TYPE_KDF (16) /* key identifier for KDF */ +#define SKEIN_BLK_TYPE_NONCE (20) /* nonce for PRNG */ +#define SKEIN_BLK_TYPE_MSG (48) /* message processing */ +#define SKEIN_BLK_TYPE_OUT (63) /* output stage */ +#define SKEIN_BLK_TYPE_MASK (63) /* bit field mask */ + +#define SKEIN_T1_BLK_TYPE(T) (((u64b_t) (SKEIN_BLK_TYPE_##T)) << SKEIN_T1_POS_BLK_TYPE) +#define SKEIN_T1_BLK_TYPE_KEY SKEIN_T1_BLK_TYPE(KEY) /* key, for MAC and KDF */ +#define SKEIN_T1_BLK_TYPE_CFG SKEIN_T1_BLK_TYPE(CFG) /* configuration block */ +#define SKEIN_T1_BLK_TYPE_PERS SKEIN_T1_BLK_TYPE(PERS) /* personalization string */ +#define SKEIN_T1_BLK_TYPE_PK SKEIN_T1_BLK_TYPE(PK) /* public key (for digital signature hashing) */ +#define SKEIN_T1_BLK_TYPE_KDF SKEIN_T1_BLK_TYPE(KDF) /* key identifier for KDF */ +#define SKEIN_T1_BLK_TYPE_NONCE SKEIN_T1_BLK_TYPE(NONCE)/* nonce for PRNG */ +#define SKEIN_T1_BLK_TYPE_MSG SKEIN_T1_BLK_TYPE(MSG) /* message processing */ +#define SKEIN_T1_BLK_TYPE_OUT SKEIN_T1_BLK_TYPE(OUT) /* output stage */ +#define SKEIN_T1_BLK_TYPE_MASK SKEIN_T1_BLK_TYPE(MASK) /* field bit mask */ + +#define SKEIN_T1_BLK_TYPE_CFG_FINAL (SKEIN_T1_BLK_TYPE_CFG | SKEIN_T1_FLAG_FINAL) +#define SKEIN_T1_BLK_TYPE_OUT_FINAL (SKEIN_T1_BLK_TYPE_OUT | SKEIN_T1_FLAG_FINAL) + +#define SKEIN_VERSION (1) + +#ifndef SKEIN_ID_STRING_LE /* allow compile-time personalization */ +#define SKEIN_ID_STRING_LE (0x33414853) /* "SHA3" (little-endian)*/ +#endif + +#define SKEIN_MK_64(hi32,lo32) ((lo32) + (((u64b_t) (hi32)) << 32)) +#define SKEIN_SCHEMA_VER SKEIN_MK_64(SKEIN_VERSION,SKEIN_ID_STRING_LE) +#define SKEIN_KS_PARITY SKEIN_MK_64(0x1BD11BDA,0xA9FC1A22) + +#define SKEIN_CFG_STR_LEN (4*8) + +/* bit field definitions in config block treeInfo word */ +#define SKEIN_CFG_TREE_LEAF_SIZE_POS ( 0) +#define SKEIN_CFG_TREE_NODE_SIZE_POS ( 8) +#define SKEIN_CFG_TREE_MAX_LEVEL_POS (16) + +#define SKEIN_CFG_TREE_LEAF_SIZE_MSK (((u64b_t) 0xFF) << SKEIN_CFG_TREE_LEAF_SIZE_POS) +#define SKEIN_CFG_TREE_NODE_SIZE_MSK (((u64b_t) 0xFF) << SKEIN_CFG_TREE_NODE_SIZE_POS) +#define SKEIN_CFG_TREE_MAX_LEVEL_MSK (((u64b_t) 0xFF) << SKEIN_CFG_TREE_MAX_LEVEL_POS) + +#define SKEIN_CFG_TREE_INFO(leaf,node,maxLvl) \ + ( (((u64b_t)(leaf )) << SKEIN_CFG_TREE_LEAF_SIZE_POS) | \ + (((u64b_t)(node )) << SKEIN_CFG_TREE_NODE_SIZE_POS) | \ + (((u64b_t)(maxLvl)) << SKEIN_CFG_TREE_MAX_LEVEL_POS) ) + +#define SKEIN_CFG_TREE_INFO_SEQUENTIAL SKEIN_CFG_TREE_INFO(0,0,0) /* use as treeInfo in InitExt() call for sequential processing */ + +/* +** Skein macros for getting/setting tweak words, etc. +** These are useful for partial input bytes, hash tree init/update, etc. +**/ +#define Skein_Get_Tweak(ctxPtr,TWK_NUM) ((ctxPtr)->h.T[TWK_NUM]) +#define Skein_Set_Tweak(ctxPtr,TWK_NUM,tVal) {(ctxPtr)->h.T[TWK_NUM] = (tVal);} + +#define Skein_Get_T0(ctxPtr) Skein_Get_Tweak(ctxPtr,0) +#define Skein_Get_T1(ctxPtr) Skein_Get_Tweak(ctxPtr,1) +#define Skein_Set_T0(ctxPtr,T0) Skein_Set_Tweak(ctxPtr,0,T0) +#define Skein_Set_T1(ctxPtr,T1) Skein_Set_Tweak(ctxPtr,1,T1) + +/* set both tweak words at once */ +#define Skein_Set_T0_T1(ctxPtr,T0,T1) \ +{ \ + Skein_Set_T0(ctxPtr,(T0)); \ + Skein_Set_T1(ctxPtr,(T1)); \ +} + +#define Skein_Set_Type(ctxPtr,BLK_TYPE) \ + Skein_Set_T1(ctxPtr,SKEIN_T1_BLK_TYPE_##BLK_TYPE) + +/* set up for starting with a new type: h.T[0]=0; h.T[1] = NEW_TYPE; h.bCnt=0; */ +#define Skein_Start_New_Type(ctxPtr,BLK_TYPE) \ +{ Skein_Set_T0_T1(ctxPtr,0,SKEIN_T1_FLAG_FIRST | SKEIN_T1_BLK_TYPE_##BLK_TYPE); (ctxPtr)->h.bCnt=0; } + +#define Skein_Clear_First_Flag(hdr) { (hdr).T[1] &= ~SKEIN_T1_FLAG_FIRST; } +#define Skein_Set_Bit_Pad_Flag(hdr) { (hdr).T[1] |= SKEIN_T1_FLAG_BIT_PAD; } + +#define Skein_Set_Tree_Level(hdr,height) { (hdr).T[1] |= SKEIN_T1_TREE_LEVEL(height);} + +/***************************************************************** +** "Internal" Skein definitions for debugging and error checking +******************************************************************/ +#define Skein_Show_Block(bits,ctx,X,blkPtr,wPtr,ksEvenPtr,ksOddPtr) +#define Skein_Show_Round(bits,ctx,r,X) +#define Skein_Show_R_Ptr(bits,ctx,r,X_ptr) +#define Skein_Show_Final(bits,ctx,cnt,outPtr) +#define Skein_Show_Key(bits,ctx,key,keyBytes) + + +#ifndef SKEIN_ERR_CHECK /* run-time checks (e.g., bad params, uninitialized context)? */ +#define Skein_Assert(x,retCode)/* default: ignore all Asserts, for performance */ +#define Skein_assert(x) +#elif defined(SKEIN_ASSERT) +#include <assert.h> +#define Skein_Assert(x,retCode) assert(x) +#define Skein_assert(x) assert(x) +#else +#include <assert.h> +#define Skein_Assert(x,retCode) { if (!(x)) return retCode; } /* caller error */ +#define Skein_assert(x) assert(x) /* internal error */ +#endif + +/***************************************************************** +** Skein block function constants (shared across Ref and Opt code) +******************************************************************/ +enum +{ + /* Skein_256 round rotation constants */ + R_256_0_0=14, R_256_0_1=16, + R_256_1_0=52, R_256_1_1=57, + R_256_2_0=23, R_256_2_1=40, + R_256_3_0= 5, R_256_3_1=37, + R_256_4_0=25, R_256_4_1=33, + R_256_5_0=46, R_256_5_1=12, + R_256_6_0=58, R_256_6_1=22, + R_256_7_0=32, R_256_7_1=32, + + /* Skein_512 round rotation constants */ + R_512_0_0=46, R_512_0_1=36, R_512_0_2=19, R_512_0_3=37, + R_512_1_0=33, R_512_1_1=27, R_512_1_2=14, R_512_1_3=42, + R_512_2_0=17, R_512_2_1=49, R_512_2_2=36, R_512_2_3=39, + R_512_3_0=44, R_512_3_1= 9, R_512_3_2=54, R_512_3_3=56, + R_512_4_0=39, R_512_4_1=30, R_512_4_2=34, R_512_4_3=24, + R_512_5_0=13, R_512_5_1=50, R_512_5_2=10, R_512_5_3=17, + R_512_6_0=25, R_512_6_1=29, R_512_6_2=39, R_512_6_3=43, + R_512_7_0= 8, R_512_7_1=35, R_512_7_2=56, R_512_7_3=22, + + /* Skein1024 round rotation constants */ + R1024_0_0=24, R1024_0_1=13, R1024_0_2= 8, R1024_0_3=47, R1024_0_4= 8, R1024_0_5=17, R1024_0_6=22, R1024_0_7=37, + R1024_1_0=38, R1024_1_1=19, R1024_1_2=10, R1024_1_3=55, R1024_1_4=49, R1024_1_5=18, R1024_1_6=23, R1024_1_7=52, + R1024_2_0=33, R1024_2_1= 4, R1024_2_2=51, R1024_2_3=13, R1024_2_4=34, R1024_2_5=41, R1024_2_6=59, R1024_2_7=17, + R1024_3_0= 5, R1024_3_1=20, R1024_3_2=48, R1024_3_3=41, R1024_3_4=47, R1024_3_5=28, R1024_3_6=16, R1024_3_7=25, + R1024_4_0=41, R1024_4_1= 9, R1024_4_2=37, R1024_4_3=31, R1024_4_4=12, R1024_4_5=47, R1024_4_6=44, R1024_4_7=30, + R1024_5_0=16, R1024_5_1=34, R1024_5_2=56, R1024_5_3=51, R1024_5_4= 4, R1024_5_5=53, R1024_5_6=42, R1024_5_7=41, + R1024_6_0=31, R1024_6_1=44, R1024_6_2=47, R1024_6_3=46, R1024_6_4=19, R1024_6_5=42, R1024_6_6=44, R1024_6_7=25, + R1024_7_0= 9, R1024_7_1=48, R1024_7_2=35, R1024_7_3=52, R1024_7_4=23, R1024_7_5=31, R1024_7_6=37, R1024_7_7=20 +}; + +#ifndef SKEIN_ROUNDS +#define SKEIN_256_ROUNDS_TOTAL (72) /* number of rounds for the different block sizes */ +#define SKEIN_512_ROUNDS_TOTAL (72) +#define SKEIN1024_ROUNDS_TOTAL (80) +#else /* allow command-line define in range 8*(5..14) */ +#define SKEIN_256_ROUNDS_TOTAL (8*((((SKEIN_ROUNDS/100) + 5) % 10) + 5)) +#define SKEIN_512_ROUNDS_TOTAL (8*((((SKEIN_ROUNDS/ 10) + 5) % 10) + 5)) +#define SKEIN1024_ROUNDS_TOTAL (8*((((SKEIN_ROUNDS ) + 5) % 10) + 5)) +#endif + + +/* +***************** Pre-computed Skein IVs ******************* +** +** NOTE: these values are not "magic" constants, but +** are generated using the Threefish block function. +** They are pre-computed here only for speed; i.e., to +** avoid the need for a Threefish call during Init(). +** +** The IV for any fixed hash length may be pre-computed. +** Only the most common values are included here. +** +************************************************************ +**/ + +#define MK_64 SKEIN_MK_64 + +/* blkSize = 256 bits. hashSize = 128 bits */ +const u64b_t SKEIN_256_IV_128[] = + { + MK_64(0xE1111906,0x964D7260), + MK_64(0x883DAAA7,0x7C8D811C), + MK_64(0x10080DF4,0x91960F7A), + MK_64(0xCCF7DDE5,0xB45BC1C2) + }; + +/* blkSize = 256 bits. hashSize = 160 bits */ +const u64b_t SKEIN_256_IV_160[] = + { + MK_64(0x14202314,0x72825E98), + MK_64(0x2AC4E9A2,0x5A77E590), + MK_64(0xD47A5856,0x8838D63E), + MK_64(0x2DD2E496,0x8586AB7D) + }; + +/* blkSize = 256 bits. hashSize = 224 bits */ +const u64b_t SKEIN_256_IV_224[] = + { + MK_64(0xC6098A8C,0x9AE5EA0B), + MK_64(0x876D5686,0x08C5191C), + MK_64(0x99CB88D7,0xD7F53884), + MK_64(0x384BDDB1,0xAEDDB5DE) + }; + +/* blkSize = 256 bits. hashSize = 256 bits */ +const u64b_t SKEIN_256_IV_256[] = + { + MK_64(0xFC9DA860,0xD048B449), + MK_64(0x2FCA6647,0x9FA7D833), + MK_64(0xB33BC389,0x6656840F), + MK_64(0x6A54E920,0xFDE8DA69) + }; + +/* blkSize = 512 bits. hashSize = 128 bits */ +const u64b_t SKEIN_512_IV_128[] = + { + MK_64(0xA8BC7BF3,0x6FBF9F52), + MK_64(0x1E9872CE,0xBD1AF0AA), + MK_64(0x309B1790,0xB32190D3), + MK_64(0xBCFBB854,0x3F94805C), + MK_64(0x0DA61BCD,0x6E31B11B), + MK_64(0x1A18EBEA,0xD46A32E3), + MK_64(0xA2CC5B18,0xCE84AA82), + MK_64(0x6982AB28,0x9D46982D) + }; + +/* blkSize = 512 bits. hashSize = 160 bits */ +const u64b_t SKEIN_512_IV_160[] = + { + MK_64(0x28B81A2A,0xE013BD91), + MK_64(0xC2F11668,0xB5BDF78F), + MK_64(0x1760D8F3,0xF6A56F12), + MK_64(0x4FB74758,0x8239904F), + MK_64(0x21EDE07F,0x7EAF5056), + MK_64(0xD908922E,0x63ED70B8), + MK_64(0xB8EC76FF,0xECCB52FA), + MK_64(0x01A47BB8,0xA3F27A6E) + }; + +/* blkSize = 512 bits. hashSize = 224 bits */ +const u64b_t SKEIN_512_IV_224[] = + { + MK_64(0xCCD06162,0x48677224), + MK_64(0xCBA65CF3,0xA92339EF), + MK_64(0x8CCD69D6,0x52FF4B64), + MK_64(0x398AED7B,0x3AB890B4), + MK_64(0x0F59D1B1,0x457D2BD0), + MK_64(0x6776FE65,0x75D4EB3D), + MK_64(0x99FBC70E,0x997413E9), + MK_64(0x9E2CFCCF,0xE1C41EF7) + }; + +/* blkSize = 512 bits. hashSize = 256 bits */ +const u64b_t SKEIN_512_IV_256[] = + { + MK_64(0xCCD044A1,0x2FDB3E13), + MK_64(0xE8359030,0x1A79A9EB), + MK_64(0x55AEA061,0x4F816E6F), + MK_64(0x2A2767A4,0xAE9B94DB), + MK_64(0xEC06025E,0x74DD7683), + MK_64(0xE7A436CD,0xC4746251), + MK_64(0xC36FBAF9,0x393AD185), + MK_64(0x3EEDBA18,0x33EDFC13) + }; + +/* blkSize = 512 bits. hashSize = 384 bits */ +const u64b_t SKEIN_512_IV_384[] = + { + MK_64(0xA3F6C6BF,0x3A75EF5F), + MK_64(0xB0FEF9CC,0xFD84FAA4), + MK_64(0x9D77DD66,0x3D770CFE), + MK_64(0xD798CBF3,0xB468FDDA), + MK_64(0x1BC4A666,0x8A0E4465), + MK_64(0x7ED7D434,0xE5807407), + MK_64(0x548FC1AC,0xD4EC44D6), + MK_64(0x266E1754,0x6AA18FF8) + }; + +/* blkSize = 512 bits. hashSize = 512 bits */ +const u64b_t SKEIN_512_IV_512[] = + { + MK_64(0x4903ADFF,0x749C51CE), + MK_64(0x0D95DE39,0x9746DF03), + MK_64(0x8FD19341,0x27C79BCE), + MK_64(0x9A255629,0xFF352CB1), + MK_64(0x5DB62599,0xDF6CA7B0), + MK_64(0xEABE394C,0xA9D5C3F4), + MK_64(0x991112C7,0x1A75B523), + MK_64(0xAE18A40B,0x660FCC33) + }; + +/* blkSize = 1024 bits. hashSize = 384 bits */ +const u64b_t SKEIN1024_IV_384[] = + { + MK_64(0x5102B6B8,0xC1894A35), + MK_64(0xFEEBC9E3,0xFE8AF11A), + MK_64(0x0C807F06,0xE32BED71), + MK_64(0x60C13A52,0xB41A91F6), + MK_64(0x9716D35D,0xD4917C38), + MK_64(0xE780DF12,0x6FD31D3A), + MK_64(0x797846B6,0xC898303A), + MK_64(0xB172C2A8,0xB3572A3B), + MK_64(0xC9BC8203,0xA6104A6C), + MK_64(0x65909338,0xD75624F4), + MK_64(0x94BCC568,0x4B3F81A0), + MK_64(0x3EBBF51E,0x10ECFD46), + MK_64(0x2DF50F0B,0xEEB08542), + MK_64(0x3B5A6530,0x0DBC6516), + MK_64(0x484B9CD2,0x167BBCE1), + MK_64(0x2D136947,0xD4CBAFEA) + }; + +/* blkSize = 1024 bits. hashSize = 512 bits */ +const u64b_t SKEIN1024_IV_512[] = + { + MK_64(0xCAEC0E5D,0x7C1B1B18), + MK_64(0xA01B0E04,0x5F03E802), + MK_64(0x33840451,0xED912885), + MK_64(0x374AFB04,0xEAEC2E1C), + MK_64(0xDF25A0E2,0x813581F7), + MK_64(0xE4004093,0x8B12F9D2), + MK_64(0xA662D539,0xC2ED39B6), + MK_64(0xFA8B85CF,0x45D8C75A), + MK_64(0x8316ED8E,0x29EDE796), + MK_64(0x053289C0,0x2E9F91B8), + MK_64(0xC3F8EF1D,0x6D518B73), + MK_64(0xBDCEC3C4,0xD5EF332E), + MK_64(0x549A7E52,0x22974487), + MK_64(0x67070872,0x5B749816), + MK_64(0xB9CD28FB,0xF0581BD1), + MK_64(0x0E2940B8,0x15804974) + }; + +/* blkSize = 1024 bits. hashSize = 1024 bits */ +const u64b_t SKEIN1024_IV_1024[] = + { + MK_64(0xD593DA07,0x41E72355), + MK_64(0x15B5E511,0xAC73E00C), + MK_64(0x5180E5AE,0xBAF2C4F0), + MK_64(0x03BD41D3,0xFCBCAFAF), + MK_64(0x1CAEC6FD,0x1983A898), + MK_64(0x6E510B8B,0xCDD0589F), + MK_64(0x77E2BDFD,0xC6394ADA), + MK_64(0xC11E1DB5,0x24DCB0A3), + MK_64(0xD6D14AF9,0xC6329AB5), + MK_64(0x6A9B0BFC,0x6EB67E0D), + MK_64(0x9243C60D,0xCCFF1332), + MK_64(0x1A1F1DDE,0x743F02D4), + MK_64(0x0996753C,0x10ED0BB8), + MK_64(0x6572DD22,0xF2B4969A), + MK_64(0x61FD3062,0xD00A579A), + MK_64(0x1DE0536E,0x8682E539) + }; + + +#ifndef SKEIN_USE_ASM +#define SKEIN_USE_ASM (0) /* default is all C code (no ASM) */ +#endif + +#ifndef SKEIN_LOOP +#define SKEIN_LOOP 001 /* default: unroll 256 and 512, but not 1024 */ +#endif + +#define BLK_BITS (WCNT*64) /* some useful definitions for code here */ +#define KW_TWK_BASE (0) +#define KW_KEY_BASE (3) +#define ks (kw + KW_KEY_BASE) +#define ts (kw + KW_TWK_BASE) + +#ifdef SKEIN_DEBUG +#define DebugSaveTweak(ctx) { ctx->h.T[0] = ts[0]; ctx->h.T[1] = ts[1]; } +#else +#define DebugSaveTweak(ctx) +#endif + +/***************************** Skein_256 ******************************/ +#if !(SKEIN_USE_ASM & 256) +static void Skein_256_Process_Block(Skein_256_Ctxt_t *ctx,const u08b_t *blkPtr,size_t blkCnt,size_t byteCntAdd) + { /* do it in C */ + enum + { + WCNT = SKEIN_256_STATE_WORDS + }; +#undef RCNT +#define RCNT (SKEIN_256_ROUNDS_TOTAL/8) + +#ifdef SKEIN_LOOP /* configure how much to unroll the loop */ +#define SKEIN_UNROLL_256 (((SKEIN_LOOP)/100)%10) +#else +#define SKEIN_UNROLL_256 (0) +#endif + +#if SKEIN_UNROLL_256 +#if (RCNT % SKEIN_UNROLL_256) +#error "Invalid SKEIN_UNROLL_256" /* sanity check on unroll count */ +#endif + size_t r; + u64b_t kw[WCNT+4+RCNT*2]; /* key schedule words : chaining vars + tweak + "rotation"*/ +#else + u64b_t kw[WCNT+4]; /* key schedule words : chaining vars + tweak */ +#endif + u64b_t X0,X1,X2,X3; /* local copy of context vars, for speed */ + u64b_t w [WCNT]; /* local copy of input block */ +#ifdef SKEIN_DEBUG + const u64b_t *Xptr[4]; /* use for debugging (help compiler put Xn in registers) */ + Xptr[0] = &X0; Xptr[1] = &X1; Xptr[2] = &X2; Xptr[3] = &X3; +#endif + Skein_assert(blkCnt != 0); /* never call with blkCnt == 0! */ + ts[0] = ctx->h.T[0]; + ts[1] = ctx->h.T[1]; + do { + /* this implementation only supports 2**64 input bytes (no carry out here) */ + ts[0] += byteCntAdd; /* update processed length */ + + /* precompute the key schedule for this block */ + ks[0] = ctx->X[0]; + ks[1] = ctx->X[1]; + ks[2] = ctx->X[2]; + ks[3] = ctx->X[3]; + ks[4] = ks[0] ^ ks[1] ^ ks[2] ^ ks[3] ^ SKEIN_KS_PARITY; + + ts[2] = ts[0] ^ ts[1]; + + Skein_Get64_LSB_First(w,blkPtr,WCNT); /* get input block in little-endian format */ + DebugSaveTweak(ctx); + Skein_Show_Block(BLK_BITS,&ctx->h,ctx->X,blkPtr,w,ks,ts); + + X0 = w[0] + ks[0]; /* do the first full key injection */ + X1 = w[1] + ks[1] + ts[0]; + X2 = w[2] + ks[2] + ts[1]; + X3 = w[3] + ks[3]; + + Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INITIAL,Xptr); /* show starting state values */ + + blkPtr += SKEIN_256_BLOCK_BYTES; + + /* run the rounds */ + +#define Round256(p0,p1,p2,p3,ROT,rNum) \ + X##p0 += X##p1; X##p1 = RotL_64(X##p1,ROT##_0); X##p1 ^= X##p0; \ + X##p2 += X##p3; X##p3 = RotL_64(X##p3,ROT##_1); X##p3 ^= X##p2; \ + +#if SKEIN_UNROLL_256 == 0 +#define R256(p0,p1,p2,p3,ROT,rNum) /* fully unrolled */ \ + Round256(p0,p1,p2,p3,ROT,rNum) \ + Skein_Show_R_Ptr(BLK_BITS,&ctx->h,rNum,Xptr); + +#define I256(R) \ + X0 += ks[((R)+1) % 5]; /* inject the key schedule value */ \ + X1 += ks[((R)+2) % 5] + ts[((R)+1) % 3]; \ + X2 += ks[((R)+3) % 5] + ts[((R)+2) % 3]; \ + X3 += ks[((R)+4) % 5] + (R)+1; \ + Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INJECT,Xptr); +#else /* looping version */ +#define R256(p0,p1,p2,p3,ROT,rNum) \ + Round256(p0,p1,p2,p3,ROT,rNum) \ + Skein_Show_R_Ptr(BLK_BITS,&ctx->h,4*(r-1)+rNum,Xptr); + +#define I256(R) \ + X0 += ks[r+(R)+0]; /* inject the key schedule value */ \ + X1 += ks[r+(R)+1] + ts[r+(R)+0]; \ + X2 += ks[r+(R)+2] + ts[r+(R)+1]; \ + X3 += ks[r+(R)+3] + r+(R) ; \ + ks[r + (R)+4 ] = ks[r+(R)-1]; /* rotate key schedule */\ + ts[r + (R)+2 ] = ts[r+(R)-1]; \ + Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INJECT,Xptr); + + for (r=1;r < 2*RCNT;r+=2*SKEIN_UNROLL_256) /* loop thru it */ +#endif + { +#define R256_8_rounds(R) \ + R256(0,1,2,3,R_256_0,8*(R) + 1); \ + R256(0,3,2,1,R_256_1,8*(R) + 2); \ + R256(0,1,2,3,R_256_2,8*(R) + 3); \ + R256(0,3,2,1,R_256_3,8*(R) + 4); \ + I256(2*(R)); \ + R256(0,1,2,3,R_256_4,8*(R) + 5); \ + R256(0,3,2,1,R_256_5,8*(R) + 6); \ + R256(0,1,2,3,R_256_6,8*(R) + 7); \ + R256(0,3,2,1,R_256_7,8*(R) + 8); \ + I256(2*(R)+1); + + R256_8_rounds( 0); + +#define R256_Unroll_R(NN) ((SKEIN_UNROLL_256 == 0 && SKEIN_256_ROUNDS_TOTAL/8 > (NN)) || (SKEIN_UNROLL_256 > (NN))) + + #if R256_Unroll_R( 1) + R256_8_rounds( 1); + #endif + #if R256_Unroll_R( 2) + R256_8_rounds( 2); + #endif + #if R256_Unroll_R( 3) + R256_8_rounds( 3); + #endif + #if R256_Unroll_R( 4) + R256_8_rounds( 4); + #endif + #if R256_Unroll_R( 5) + R256_8_rounds( 5); + #endif + #if R256_Unroll_R( 6) + R256_8_rounds( 6); + #endif + #if R256_Unroll_R( 7) + R256_8_rounds( 7); + #endif + #if R256_Unroll_R( 8) + R256_8_rounds( 8); + #endif + #if R256_Unroll_R( 9) + R256_8_rounds( 9); + #endif + #if R256_Unroll_R(10) + R256_8_rounds(10); + #endif + #if R256_Unroll_R(11) + R256_8_rounds(11); + #endif + #if R256_Unroll_R(12) + R256_8_rounds(12); + #endif + #if R256_Unroll_R(13) + R256_8_rounds(13); + #endif + #if R256_Unroll_R(14) + R256_8_rounds(14); + #endif + #if (SKEIN_UNROLL_256 > 14) +#error "need more unrolling in Skein_256_Process_Block" + #endif + } + /* do the final "feedforward" xor, update context chaining vars */ + ctx->X[0] = X0 ^ w[0]; + ctx->X[1] = X1 ^ w[1]; + ctx->X[2] = X2 ^ w[2]; + ctx->X[3] = X3 ^ w[3]; + + Skein_Show_Round(BLK_BITS,&ctx->h,SKEIN_RND_FEED_FWD,ctx->X); + + ts[1] &= ~SKEIN_T1_FLAG_FIRST; + } + while (--blkCnt); + ctx->h.T[0] = ts[0]; + ctx->h.T[1] = ts[1]; + } + +#if defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF) +static size_t Skein_256_Process_Block_CodeSize(void) + { + return ((u08b_t *) Skein_256_Process_Block_CodeSize) - + ((u08b_t *) Skein_256_Process_Block); + } +static uint_t Skein_256_Unroll_Cnt(void) + { + return SKEIN_UNROLL_256; + } +#endif +#endif + +/***************************** Skein_512 ******************************/ +#if !(SKEIN_USE_ASM & 512) +static void Skein_512_Process_Block(Skein_512_Ctxt_t *ctx,const u08b_t *blkPtr,size_t blkCnt,size_t byteCntAdd) + { /* do it in C */ + enum + { + WCNT = SKEIN_512_STATE_WORDS + }; +#undef RCNT +#define RCNT (SKEIN_512_ROUNDS_TOTAL/8) + +#ifdef SKEIN_LOOP /* configure how much to unroll the loop */ +#define SKEIN_UNROLL_512 (((SKEIN_LOOP)/10)%10) +#else +#define SKEIN_UNROLL_512 (0) +#endif + +#if SKEIN_UNROLL_512 +#if (RCNT % SKEIN_UNROLL_512) +#error "Invalid SKEIN_UNROLL_512" /* sanity check on unroll count */ +#endif + size_t r; + u64b_t kw[WCNT+4+RCNT*2]; /* key schedule words : chaining vars + tweak + "rotation"*/ +#else + u64b_t kw[WCNT+4]; /* key schedule words : chaining vars + tweak */ +#endif + u64b_t X0,X1,X2,X3,X4,X5,X6,X7; /* local copy of vars, for speed */ + u64b_t w [WCNT]; /* local copy of input block */ +#ifdef SKEIN_DEBUG + const u64b_t *Xptr[8]; /* use for debugging (help compiler put Xn in registers) */ + Xptr[0] = &X0; Xptr[1] = &X1; Xptr[2] = &X2; Xptr[3] = &X3; + Xptr[4] = &X4; Xptr[5] = &X5; Xptr[6] = &X6; Xptr[7] = &X7; +#endif + + Skein_assert(blkCnt != 0); /* never call with blkCnt == 0! */ + ts[0] = ctx->h.T[0]; + ts[1] = ctx->h.T[1]; + do { + /* this implementation only supports 2**64 input bytes (no carry out here) */ + ts[0] += byteCntAdd; /* update processed length */ + + /* precompute the key schedule for this block */ + ks[0] = ctx->X[0]; + ks[1] = ctx->X[1]; + ks[2] = ctx->X[2]; + ks[3] = ctx->X[3]; + ks[4] = ctx->X[4]; + ks[5] = ctx->X[5]; + ks[6] = ctx->X[6]; + ks[7] = ctx->X[7]; + ks[8] = ks[0] ^ ks[1] ^ ks[2] ^ ks[3] ^ + ks[4] ^ ks[5] ^ ks[6] ^ ks[7] ^ SKEIN_KS_PARITY; + + ts[2] = ts[0] ^ ts[1]; + + Skein_Get64_LSB_First(w,blkPtr,WCNT); /* get input block in little-endian format */ + DebugSaveTweak(ctx); + Skein_Show_Block(BLK_BITS,&ctx->h,ctx->X,blkPtr,w,ks,ts); + + X0 = w[0] + ks[0]; /* do the first full key injection */ + X1 = w[1] + ks[1]; + X2 = w[2] + ks[2]; + X3 = w[3] + ks[3]; + X4 = w[4] + ks[4]; + X5 = w[5] + ks[5] + ts[0]; + X6 = w[6] + ks[6] + ts[1]; + X7 = w[7] + ks[7]; + + blkPtr += SKEIN_512_BLOCK_BYTES; + + Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INITIAL,Xptr); + /* run the rounds */ +#define Round512(p0,p1,p2,p3,p4,p5,p6,p7,ROT,rNum) \ + X##p0 += X##p1; X##p1 = RotL_64(X##p1,ROT##_0); X##p1 ^= X##p0; \ + X##p2 += X##p3; X##p3 = RotL_64(X##p3,ROT##_1); X##p3 ^= X##p2; \ + X##p4 += X##p5; X##p5 = RotL_64(X##p5,ROT##_2); X##p5 ^= X##p4; \ + X##p6 += X##p7; X##p7 = RotL_64(X##p7,ROT##_3); X##p7 ^= X##p6; \ + +#if SKEIN_UNROLL_512 == 0 +#define R512(p0,p1,p2,p3,p4,p5,p6,p7,ROT,rNum) /* unrolled */ \ + Round512(p0,p1,p2,p3,p4,p5,p6,p7,ROT,rNum) \ + Skein_Show_R_Ptr(BLK_BITS,&ctx->h,rNum,Xptr); + +#define I512(R) \ + X0 += ks[((R)+1) % 9]; /* inject the key schedule value */ \ + X1 += ks[((R)+2) % 9]; \ + X2 += ks[((R)+3) % 9]; \ + X3 += ks[((R)+4) % 9]; \ + X4 += ks[((R)+5) % 9]; \ + X5 += ks[((R)+6) % 9] + ts[((R)+1) % 3]; \ + X6 += ks[((R)+7) % 9] + ts[((R)+2) % 3]; \ + X7 += ks[((R)+8) % 9] + (R)+1; \ + Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INJECT,Xptr); +#else /* looping version */ +#define R512(p0,p1,p2,p3,p4,p5,p6,p7,ROT,rNum) \ + Round512(p0,p1,p2,p3,p4,p5,p6,p7,ROT,rNum) \ + Skein_Show_R_Ptr(BLK_BITS,&ctx->h,4*(r-1)+rNum,Xptr); + +#define I512(R) \ + X0 += ks[r+(R)+0]; /* inject the key schedule value */ \ + X1 += ks[r+(R)+1]; \ + X2 += ks[r+(R)+2]; \ + X3 += ks[r+(R)+3]; \ + X4 += ks[r+(R)+4]; \ + X5 += ks[r+(R)+5] + ts[r+(R)+0]; \ + X6 += ks[r+(R)+6] + ts[r+(R)+1]; \ + X7 += ks[r+(R)+7] + r+(R) ; \ + ks[r + (R)+8] = ks[r+(R)-1]; /* rotate key schedule */ \ + ts[r + (R)+2] = ts[r+(R)-1]; \ + Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INJECT,Xptr); + + for (r=1;r < 2*RCNT;r+=2*SKEIN_UNROLL_512) /* loop thru it */ +#endif /* end of looped code definitions */ + { +#define R512_8_rounds(R) /* do 8 full rounds */ \ + R512(0,1,2,3,4,5,6,7,R_512_0,8*(R)+ 1); \ + R512(2,1,4,7,6,5,0,3,R_512_1,8*(R)+ 2); \ + R512(4,1,6,3,0,5,2,7,R_512_2,8*(R)+ 3); \ + R512(6,1,0,7,2,5,4,3,R_512_3,8*(R)+ 4); \ + I512(2*(R)); \ + R512(0,1,2,3,4,5,6,7,R_512_4,8*(R)+ 5); \ + R512(2,1,4,7,6,5,0,3,R_512_5,8*(R)+ 6); \ + R512(4,1,6,3,0,5,2,7,R_512_6,8*(R)+ 7); \ + R512(6,1,0,7,2,5,4,3,R_512_7,8*(R)+ 8); \ + I512(2*(R)+1); /* and key injection */ + + R512_8_rounds( 0); + +#define R512_Unroll_R(NN) ((SKEIN_UNROLL_512 == 0 && SKEIN_512_ROUNDS_TOTAL/8 > (NN)) || (SKEIN_UNROLL_512 > (NN))) + + #if R512_Unroll_R( 1) + R512_8_rounds( 1); + #endif + #if R512_Unroll_R( 2) + R512_8_rounds( 2); + #endif + #if R512_Unroll_R( 3) + R512_8_rounds( 3); + #endif + #if R512_Unroll_R( 4) + R512_8_rounds( 4); + #endif + #if R512_Unroll_R( 5) + R512_8_rounds( 5); + #endif + #if R512_Unroll_R( 6) + R512_8_rounds( 6); + #endif + #if R512_Unroll_R( 7) + R512_8_rounds( 7); + #endif + #if R512_Unroll_R( 8) + R512_8_rounds( 8); + #endif + #if R512_Unroll_R( 9) + R512_8_rounds( 9); + #endif + #if R512_Unroll_R(10) + R512_8_rounds(10); + #endif + #if R512_Unroll_R(11) + R512_8_rounds(11); + #endif + #if R512_Unroll_R(12) + R512_8_rounds(12); + #endif + #if R512_Unroll_R(13) + R512_8_rounds(13); + #endif + #if R512_Unroll_R(14) + R512_8_rounds(14); + #endif + #if (SKEIN_UNROLL_512 > 14) +#error "need more unrolling in Skein_512_Process_Block" + #endif + } + + /* do the final "feedforward" xor, update context chaining vars */ + ctx->X[0] = X0 ^ w[0]; + ctx->X[1] = X1 ^ w[1]; + ctx->X[2] = X2 ^ w[2]; + ctx->X[3] = X3 ^ w[3]; + ctx->X[4] = X4 ^ w[4]; + ctx->X[5] = X5 ^ w[5]; + ctx->X[6] = X6 ^ w[6]; + ctx->X[7] = X7 ^ w[7]; + Skein_Show_Round(BLK_BITS,&ctx->h,SKEIN_RND_FEED_FWD,ctx->X); + + ts[1] &= ~SKEIN_T1_FLAG_FIRST; + } + while (--blkCnt); + ctx->h.T[0] = ts[0]; + ctx->h.T[1] = ts[1]; + } + +#if defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF) +static size_t Skein_512_Process_Block_CodeSize(void) + { + return ((u08b_t *) Skein_512_Process_Block_CodeSize) - + ((u08b_t *) Skein_512_Process_Block); + } +static uint_t Skein_512_Unroll_Cnt(void) + { + return SKEIN_UNROLL_512; + } +#endif +#endif + +/***************************** Skein1024 ******************************/ +#if !(SKEIN_USE_ASM & 1024) +static void Skein1024_Process_Block(Skein1024_Ctxt_t *ctx,const u08b_t *blkPtr,size_t blkCnt,size_t byteCntAdd) + { /* do it in C, always looping (unrolled is bigger AND slower!) */ + enum + { + WCNT = SKEIN1024_STATE_WORDS + }; +#undef RCNT +#define RCNT (SKEIN1024_ROUNDS_TOTAL/8) + +#ifdef SKEIN_LOOP /* configure how much to unroll the loop */ +#define SKEIN_UNROLL_1024 ((SKEIN_LOOP)%10) +#else +#define SKEIN_UNROLL_1024 (0) +#endif + +#if (SKEIN_UNROLL_1024 != 0) +#if (RCNT % SKEIN_UNROLL_1024) +#error "Invalid SKEIN_UNROLL_1024" /* sanity check on unroll count */ +#endif + size_t r; + u64b_t kw[WCNT+4+RCNT*2]; /* key schedule words : chaining vars + tweak + "rotation"*/ +#else + u64b_t kw[WCNT+4]; /* key schedule words : chaining vars + tweak */ +#endif + + u64b_t X00,X01,X02,X03,X04,X05,X06,X07, /* local copy of vars, for speed */ + X08,X09,X10,X11,X12,X13,X14,X15; + u64b_t w [WCNT]; /* local copy of input block */ +#ifdef SKEIN_DEBUG + const u64b_t *Xptr[16]; /* use for debugging (help compiler put Xn in registers) */ + Xptr[ 0] = &X00; Xptr[ 1] = &X01; Xptr[ 2] = &X02; Xptr[ 3] = &X03; + Xptr[ 4] = &X04; Xptr[ 5] = &X05; Xptr[ 6] = &X06; Xptr[ 7] = &X07; + Xptr[ 8] = &X08; Xptr[ 9] = &X09; Xptr[10] = &X10; Xptr[11] = &X11; + Xptr[12] = &X12; Xptr[13] = &X13; Xptr[14] = &X14; Xptr[15] = &X15; +#endif + + Skein_assert(blkCnt != 0); /* never call with blkCnt == 0! */ + ts[0] = ctx->h.T[0]; + ts[1] = ctx->h.T[1]; + do { + /* this implementation only supports 2**64 input bytes (no carry out here) */ + ts[0] += byteCntAdd; /* update processed length */ + + /* precompute the key schedule for this block */ + ks[ 0] = ctx->X[ 0]; + ks[ 1] = ctx->X[ 1]; + ks[ 2] = ctx->X[ 2]; + ks[ 3] = ctx->X[ 3]; + ks[ 4] = ctx->X[ 4]; + ks[ 5] = ctx->X[ 5]; + ks[ 6] = ctx->X[ 6]; + ks[ 7] = ctx->X[ 7]; + ks[ 8] = ctx->X[ 8]; + ks[ 9] = ctx->X[ 9]; + ks[10] = ctx->X[10]; + ks[11] = ctx->X[11]; + ks[12] = ctx->X[12]; + ks[13] = ctx->X[13]; + ks[14] = ctx->X[14]; + ks[15] = ctx->X[15]; + ks[16] = ks[ 0] ^ ks[ 1] ^ ks[ 2] ^ ks[ 3] ^ + ks[ 4] ^ ks[ 5] ^ ks[ 6] ^ ks[ 7] ^ + ks[ 8] ^ ks[ 9] ^ ks[10] ^ ks[11] ^ + ks[12] ^ ks[13] ^ ks[14] ^ ks[15] ^ SKEIN_KS_PARITY; + + ts[2] = ts[0] ^ ts[1]; + + Skein_Get64_LSB_First(w,blkPtr,WCNT); /* get input block in little-endian format */ + DebugSaveTweak(ctx); + Skein_Show_Block(BLK_BITS,&ctx->h,ctx->X,blkPtr,w,ks,ts); + + X00 = w[ 0] + ks[ 0]; /* do the first full key injection */ + X01 = w[ 1] + ks[ 1]; + X02 = w[ 2] + ks[ 2]; + X03 = w[ 3] + ks[ 3]; + X04 = w[ 4] + ks[ 4]; + X05 = w[ 5] + ks[ 5]; + X06 = w[ 6] + ks[ 6]; + X07 = w[ 7] + ks[ 7]; + X08 = w[ 8] + ks[ 8]; + X09 = w[ 9] + ks[ 9]; + X10 = w[10] + ks[10]; + X11 = w[11] + ks[11]; + X12 = w[12] + ks[12]; + X13 = w[13] + ks[13] + ts[0]; + X14 = w[14] + ks[14] + ts[1]; + X15 = w[15] + ks[15]; + + Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INITIAL,Xptr); + +#define Round1024(p0,p1,p2,p3,p4,p5,p6,p7,p8,p9,pA,pB,pC,pD,pE,pF,ROT,rNum) \ + X##p0 += X##p1; X##p1 = RotL_64(X##p1,ROT##_0); X##p1 ^= X##p0; \ + X##p2 += X##p3; X##p3 = RotL_64(X##p3,ROT##_1); X##p3 ^= X##p2; \ + X##p4 += X##p5; X##p5 = RotL_64(X##p5,ROT##_2); X##p5 ^= X##p4; \ + X##p6 += X##p7; X##p7 = RotL_64(X##p7,ROT##_3); X##p7 ^= X##p6; \ + X##p8 += X##p9; X##p9 = RotL_64(X##p9,ROT##_4); X##p9 ^= X##p8; \ + X##pA += X##pB; X##pB = RotL_64(X##pB,ROT##_5); X##pB ^= X##pA; \ + X##pC += X##pD; X##pD = RotL_64(X##pD,ROT##_6); X##pD ^= X##pC; \ + X##pE += X##pF; X##pF = RotL_64(X##pF,ROT##_7); X##pF ^= X##pE; \ + +#if SKEIN_UNROLL_1024 == 0 +#define R1024(p0,p1,p2,p3,p4,p5,p6,p7,p8,p9,pA,pB,pC,pD,pE,pF,ROT,rn) \ + Round1024(p0,p1,p2,p3,p4,p5,p6,p7,p8,p9,pA,pB,pC,pD,pE,pF,ROT,rn) \ + Skein_Show_R_Ptr(BLK_BITS,&ctx->h,rn,Xptr); + +#define I1024(R) \ + X00 += ks[((R)+ 1) % 17]; /* inject the key schedule value */ \ + X01 += ks[((R)+ 2) % 17]; \ + X02 += ks[((R)+ 3) % 17]; \ + X03 += ks[((R)+ 4) % 17]; \ + X04 += ks[((R)+ 5) % 17]; \ + X05 += ks[((R)+ 6) % 17]; \ + X06 += ks[((R)+ 7) % 17]; \ + X07 += ks[((R)+ 8) % 17]; \ + X08 += ks[((R)+ 9) % 17]; \ + X09 += ks[((R)+10) % 17]; \ + X10 += ks[((R)+11) % 17]; \ + X11 += ks[((R)+12) % 17]; \ + X12 += ks[((R)+13) % 17]; \ + X13 += ks[((R)+14) % 17] + ts[((R)+1) % 3]; \ + X14 += ks[((R)+15) % 17] + ts[((R)+2) % 3]; \ + X15 += ks[((R)+16) % 17] + (R)+1; \ + Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INJECT,Xptr); +#else /* looping version */ +#define R1024(p0,p1,p2,p3,p4,p5,p6,p7,p8,p9,pA,pB,pC,pD,pE,pF,ROT,rn) \ + Round1024(p0,p1,p2,p3,p4,p5,p6,p7,p8,p9,pA,pB,pC,pD,pE,pF,ROT,rn) \ + Skein_Show_R_Ptr(BLK_BITS,&ctx->h,4*(r-1)+rn,Xptr); + +#define I1024(R) \ + X00 += ks[r+(R)+ 0]; /* inject the key schedule value */ \ + X01 += ks[r+(R)+ 1]; \ + X02 += ks[r+(R)+ 2]; \ + X03 += ks[r+(R)+ 3]; \ + X04 += ks[r+(R)+ 4]; \ + X05 += ks[r+(R)+ 5]; \ + X06 += ks[r+(R)+ 6]; \ + X07 += ks[r+(R)+ 7]; \ + X08 += ks[r+(R)+ 8]; \ + X09 += ks[r+(R)+ 9]; \ + X10 += ks[r+(R)+10]; \ + X11 += ks[r+(R)+11]; \ + X12 += ks[r+(R)+12]; \ + X13 += ks[r+(R)+13] + ts[r+(R)+0]; \ + X14 += ks[r+(R)+14] + ts[r+(R)+1]; \ + X15 += ks[r+(R)+15] + r+(R) ; \ + ks[r + (R)+16] = ks[r+(R)-1]; /* rotate key schedule */ \ + ts[r + (R)+ 2] = ts[r+(R)-1]; \ + Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INJECT,Xptr); + + for (r=1;r <= 2*RCNT;r+=2*SKEIN_UNROLL_1024) /* loop thru it */ +#endif + { +#define R1024_8_rounds(R) /* do 8 full rounds */ \ + R1024(00,01,02,03,04,05,06,07,08,09,10,11,12,13,14,15,R1024_0,8*(R) + 1); \ + R1024(00,09,02,13,06,11,04,15,10,07,12,03,14,05,08,01,R1024_1,8*(R) + 2); \ + R1024(00,07,02,05,04,03,06,01,12,15,14,13,08,11,10,09,R1024_2,8*(R) + 3); \ + R1024(00,15,02,11,06,13,04,09,14,01,08,05,10,03,12,07,R1024_3,8*(R) + 4); \ + I1024(2*(R)); \ + R1024(00,01,02,03,04,05,06,07,08,09,10,11,12,13,14,15,R1024_4,8*(R) + 5); \ + R1024(00,09,02,13,06,11,04,15,10,07,12,03,14,05,08,01,R1024_5,8*(R) + 6); \ + R1024(00,07,02,05,04,03,06,01,12,15,14,13,08,11,10,09,R1024_6,8*(R) + 7); \ + R1024(00,15,02,11,06,13,04,09,14,01,08,05,10,03,12,07,R1024_7,8*(R) + 8); \ + I1024(2*(R)+1); + + R1024_8_rounds( 0); + +#define R1024_Unroll_R(NN) ((SKEIN_UNROLL_1024 == 0 && SKEIN1024_ROUNDS_TOTAL/8 > (NN)) || (SKEIN_UNROLL_1024 > (NN))) + + #if R1024_Unroll_R( 1) + R1024_8_rounds( 1); + #endif + #if R1024_Unroll_R( 2) + R1024_8_rounds( 2); + #endif + #if R1024_Unroll_R( 3) + R1024_8_rounds( 3); + #endif + #if R1024_Unroll_R( 4) + R1024_8_rounds( 4); + #endif + #if R1024_Unroll_R( 5) + R1024_8_rounds( 5); + #endif + #if R1024_Unroll_R( 6) + R1024_8_rounds( 6); + #endif + #if R1024_Unroll_R( 7) + R1024_8_rounds( 7); + #endif + #if R1024_Unroll_R( 8) + R1024_8_rounds( 8); + #endif + #if R1024_Unroll_R( 9) + R1024_8_rounds( 9); + #endif + #if R1024_Unroll_R(10) + R1024_8_rounds(10); + #endif + #if R1024_Unroll_R(11) + R1024_8_rounds(11); + #endif + #if R1024_Unroll_R(12) + R1024_8_rounds(12); + #endif + #if R1024_Unroll_R(13) + R1024_8_rounds(13); + #endif + #if R1024_Unroll_R(14) + R1024_8_rounds(14); + #endif + #if (SKEIN_UNROLL_1024 > 14) +#error "need more unrolling in Skein_1024_Process_Block" + #endif + } + /* do the final "feedforward" xor, update context chaining vars */ + + ctx->X[ 0] = X00 ^ w[ 0]; + ctx->X[ 1] = X01 ^ w[ 1]; + ctx->X[ 2] = X02 ^ w[ 2]; + ctx->X[ 3] = X03 ^ w[ 3]; + ctx->X[ 4] = X04 ^ w[ 4]; + ctx->X[ 5] = X05 ^ w[ 5]; + ctx->X[ 6] = X06 ^ w[ 6]; + ctx->X[ 7] = X07 ^ w[ 7]; + ctx->X[ 8] = X08 ^ w[ 8]; + ctx->X[ 9] = X09 ^ w[ 9]; + ctx->X[10] = X10 ^ w[10]; + ctx->X[11] = X11 ^ w[11]; + ctx->X[12] = X12 ^ w[12]; + ctx->X[13] = X13 ^ w[13]; + ctx->X[14] = X14 ^ w[14]; + ctx->X[15] = X15 ^ w[15]; + + Skein_Show_Round(BLK_BITS,&ctx->h,SKEIN_RND_FEED_FWD,ctx->X); + + ts[1] &= ~SKEIN_T1_FLAG_FIRST; + blkPtr += SKEIN1024_BLOCK_BYTES; + } + while (--blkCnt); + ctx->h.T[0] = ts[0]; + ctx->h.T[1] = ts[1]; + } + +#if defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF) +static size_t Skein1024_Process_Block_CodeSize(void) + { + return ((u08b_t *) Skein1024_Process_Block_CodeSize) - + ((u08b_t *) Skein1024_Process_Block); + } +static uint_t Skein1024_Unroll_Cnt(void) + { + return SKEIN_UNROLL_1024; + } +#endif +#endif + + +#if 0 +/*****************************************************************/ +/* 256-bit Skein */ +/*****************************************************************/ + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* init the context for a straight hashing operation */ +static int Skein_256_Init(Skein_256_Ctxt_t *ctx, size_t hashBitLen) + { + union + { + u08b_t b[SKEIN_256_STATE_BYTES]; + u64b_t w[SKEIN_256_STATE_WORDS]; + } cfg; /* config block */ + + Skein_Assert(hashBitLen > 0,SKEIN_BAD_HASHLEN); + ctx->h.hashBitLen = hashBitLen; /* output hash bit count */ + + switch (hashBitLen) + { /* use pre-computed values, where available */ +#ifndef SKEIN_NO_PRECOMP + case 256: memcpy(ctx->X,SKEIN_256_IV_256,sizeof(ctx->X)); break; + case 224: memcpy(ctx->X,SKEIN_256_IV_224,sizeof(ctx->X)); break; + case 160: memcpy(ctx->X,SKEIN_256_IV_160,sizeof(ctx->X)); break; + case 128: memcpy(ctx->X,SKEIN_256_IV_128,sizeof(ctx->X)); break; +#endif + default: + /* here if there is no precomputed IV value available */ + /* build/process the config block, type == CONFIG (could be precomputed) */ + Skein_Start_New_Type(ctx,CFG_FINAL); /* set tweaks: T0=0; T1=CFG | FINAL */ + + cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER); /* set the schema, version */ + cfg.w[1] = Skein_Swap64(hashBitLen); /* hash result length in bits */ + cfg.w[2] = Skein_Swap64(SKEIN_CFG_TREE_INFO_SEQUENTIAL); + memset(&cfg.w[3],0,sizeof(cfg) - 3*sizeof(cfg.w[0])); /* zero pad config block */ + + /* compute the initial chaining values from config block */ + memset(ctx->X,0,sizeof(ctx->X)); /* zero the chaining variables */ + Skein_256_Process_Block(ctx,cfg.b,1,SKEIN_CFG_STR_LEN); + break; + } + /* The chaining vars ctx->X are now initialized for the given hashBitLen. */ + /* Set up to process the data message portion of the hash (default) */ + Skein_Start_New_Type(ctx,MSG); /* T0=0, T1= MSG type */ + + return SKEIN_SUCCESS; + } + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* init the context for a MAC and/or tree hash operation */ +/* [identical to Skein_256_Init() when keyBytes == 0 && treeInfo == SKEIN_CFG_TREE_INFO_SEQUENTIAL] */ +static int Skein_256_InitExt(Skein_256_Ctxt_t *ctx,size_t hashBitLen,u64b_t treeInfo, const u08b_t *key, size_t keyBytes) + { + union + { + u08b_t b[SKEIN_256_STATE_BYTES]; + u64b_t w[SKEIN_256_STATE_WORDS]; + } cfg; /* config block */ + + Skein_Assert(hashBitLen > 0,SKEIN_BAD_HASHLEN); + Skein_Assert(keyBytes == 0 || key != NULL,SKEIN_FAIL); + + /* compute the initial chaining values ctx->X[], based on key */ + if (keyBytes == 0) /* is there a key? */ + { + memset(ctx->X,0,sizeof(ctx->X)); /* no key: use all zeroes as key for config block */ + } + else /* here to pre-process a key */ + { + Skein_assert(sizeof(cfg.b) >= sizeof(ctx->X)); + /* do a mini-Init right here */ + ctx->h.hashBitLen=8*sizeof(ctx->X); /* set output hash bit count = state size */ + Skein_Start_New_Type(ctx,KEY); /* set tweaks: T0 = 0; T1 = KEY type */ + memset(ctx->X,0,sizeof(ctx->X)); /* zero the initial chaining variables */ + Skein_256_Update(ctx,key,keyBytes); /* hash the key */ + Skein_256_Final_Pad(ctx,cfg.b); /* put result into cfg.b[] */ + memcpy(ctx->X,cfg.b,sizeof(cfg.b)); /* copy over into ctx->X[] */ +#if SKEIN_NEED_SWAP + { + uint_t i; + for (i=0;i<SKEIN_256_STATE_WORDS;i++) /* convert key bytes to context words */ + ctx->X[i] = Skein_Swap64(ctx->X[i]); + } +#endif + } + /* build/process the config block, type == CONFIG (could be precomputed for each key) */ + ctx->h.hashBitLen = hashBitLen; /* output hash bit count */ + Skein_Start_New_Type(ctx,CFG_FINAL); + + memset(&cfg.w,0,sizeof(cfg.w)); /* pre-pad cfg.w[] with zeroes */ + cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER); + cfg.w[1] = Skein_Swap64(hashBitLen); /* hash result length in bits */ + cfg.w[2] = Skein_Swap64(treeInfo); /* tree hash config info (or SKEIN_CFG_TREE_INFO_SEQUENTIAL) */ + + Skein_Show_Key(256,&ctx->h,key,keyBytes); + + /* compute the initial chaining values from config block */ + Skein_256_Process_Block(ctx,cfg.b,1,SKEIN_CFG_STR_LEN); + + /* The chaining vars ctx->X are now initialized */ + /* Set up to process the data message portion of the hash (default) */ + ctx->h.bCnt = 0; /* buffer b[] starts out empty */ + Skein_Start_New_Type(ctx,MSG); + + return SKEIN_SUCCESS; + } +#endif + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* process the input bytes */ +static int Skein_256_Update(Skein_256_Ctxt_t *ctx, const u08b_t *msg, size_t msgByteCnt) + { + size_t n; + + Skein_Assert(ctx->h.bCnt <= SKEIN_256_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */ + + /* process full blocks, if any */ + if (msgByteCnt + ctx->h.bCnt > SKEIN_256_BLOCK_BYTES) + { + if (ctx->h.bCnt) /* finish up any buffered message data */ + { + n = SKEIN_256_BLOCK_BYTES - ctx->h.bCnt; /* # bytes free in buffer b[] */ + if (n) + { + Skein_assert(n < msgByteCnt); /* check on our logic here */ + memcpy(&ctx->b[ctx->h.bCnt],msg,n); + msgByteCnt -= n; + msg += n; + ctx->h.bCnt += n; + } + Skein_assert(ctx->h.bCnt == SKEIN_256_BLOCK_BYTES); + Skein_256_Process_Block(ctx,ctx->b,1,SKEIN_256_BLOCK_BYTES); + ctx->h.bCnt = 0; + } + /* now process any remaining full blocks, directly from input message data */ + if (msgByteCnt > SKEIN_256_BLOCK_BYTES) + { + n = (msgByteCnt-1) / SKEIN_256_BLOCK_BYTES; /* number of full blocks to process */ + Skein_256_Process_Block(ctx,msg,n,SKEIN_256_BLOCK_BYTES); + msgByteCnt -= n * SKEIN_256_BLOCK_BYTES; + msg += n * SKEIN_256_BLOCK_BYTES; + } + Skein_assert(ctx->h.bCnt == 0); + } + + /* copy any remaining source message data bytes into b[] */ + if (msgByteCnt) + { + Skein_assert(msgByteCnt + ctx->h.bCnt <= SKEIN_256_BLOCK_BYTES); + memcpy(&ctx->b[ctx->h.bCnt],msg,msgByteCnt); + ctx->h.bCnt += msgByteCnt; + } + + return SKEIN_SUCCESS; + } + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* finalize the hash computation and output the result */ +static int Skein_256_Final(Skein_256_Ctxt_t *ctx, u08b_t *hashVal) + { + size_t i,n,byteCnt; + u64b_t X[SKEIN_256_STATE_WORDS]; + Skein_Assert(ctx->h.bCnt <= SKEIN_256_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */ + + ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; /* tag as the final block */ + if (ctx->h.bCnt < SKEIN_256_BLOCK_BYTES) /* zero pad b[] if necessary */ + memset(&ctx->b[ctx->h.bCnt],0,SKEIN_256_BLOCK_BYTES - ctx->h.bCnt); + + Skein_256_Process_Block(ctx,ctx->b,1,ctx->h.bCnt); /* process the final block */ + + /* now output the result */ + byteCnt = (ctx->h.hashBitLen + 7) >> 3; /* total number of output bytes */ + + /* run Threefish in "counter mode" to generate output */ + memset(ctx->b,0,sizeof(ctx->b)); /* zero out b[], so it can hold the counter */ + memcpy(X,ctx->X,sizeof(X)); /* keep a local copy of counter mode "key" */ + for (i=0;i < byteCnt;i += SKEIN_256_BLOCK_BYTES) + { + ((u64b_t *)ctx->b)[0]= Skein_Swap64((u64b_t) i); /* build the counter block */ + Skein_Start_New_Type(ctx,OUT_FINAL); + Skein_256_Process_Block(ctx,ctx->b,1,sizeof(u64b_t)); /* run "counter mode" */ + n = byteCnt - i; /* number of output bytes left to go */ + if (n >= SKEIN_256_BLOCK_BYTES) + n = SKEIN_256_BLOCK_BYTES; + Skein_Put64_LSB_First(hashVal+i,ctx->X,n); /* "output" the ctr mode bytes */ + Skein_Show_Final(256,&ctx->h,n,hashVal+i*SKEIN_256_BLOCK_BYTES); + memcpy(ctx->X,X,sizeof(X)); /* restore the counter mode key for next time */ + } + return SKEIN_SUCCESS; + } + +#if defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF) +static size_t Skein_256_API_CodeSize(void) + { + return ((u08b_t *) Skein_256_API_CodeSize) - + ((u08b_t *) Skein_256_Init); + } +#endif + +/*****************************************************************/ +/* 512-bit Skein */ +/*****************************************************************/ + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* init the context for a straight hashing operation */ +static int Skein_512_Init(Skein_512_Ctxt_t *ctx, size_t hashBitLen) + { + union + { + u08b_t b[SKEIN_512_STATE_BYTES]; + u64b_t w[SKEIN_512_STATE_WORDS]; + } cfg; /* config block */ + + Skein_Assert(hashBitLen > 0,SKEIN_BAD_HASHLEN); + ctx->h.hashBitLen = hashBitLen; /* output hash bit count */ + + switch (hashBitLen) + { /* use pre-computed values, where available */ +#ifndef SKEIN_NO_PRECOMP + case 512: memcpy(ctx->X,SKEIN_512_IV_512,sizeof(ctx->X)); break; + case 384: memcpy(ctx->X,SKEIN_512_IV_384,sizeof(ctx->X)); break; + case 256: memcpy(ctx->X,SKEIN_512_IV_256,sizeof(ctx->X)); break; + case 224: memcpy(ctx->X,SKEIN_512_IV_224,sizeof(ctx->X)); break; +#endif + default: + /* here if there is no precomputed IV value available */ + /* build/process the config block, type == CONFIG (could be precomputed) */ + Skein_Start_New_Type(ctx,CFG_FINAL); /* set tweaks: T0=0; T1=CFG | FINAL */ + + cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER); /* set the schema, version */ + cfg.w[1] = Skein_Swap64(hashBitLen); /* hash result length in bits */ + cfg.w[2] = Skein_Swap64(SKEIN_CFG_TREE_INFO_SEQUENTIAL); + memset(&cfg.w[3],0,sizeof(cfg) - 3*sizeof(cfg.w[0])); /* zero pad config block */ + + /* compute the initial chaining values from config block */ + memset(ctx->X,0,sizeof(ctx->X)); /* zero the chaining variables */ + Skein_512_Process_Block(ctx,cfg.b,1,SKEIN_CFG_STR_LEN); + break; + } + + /* The chaining vars ctx->X are now initialized for the given hashBitLen. */ + /* Set up to process the data message portion of the hash (default) */ + Skein_Start_New_Type(ctx,MSG); /* T0=0, T1= MSG type */ + + return SKEIN_SUCCESS; + } + +#if 0 +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* init the context for a MAC and/or tree hash operation */ +/* [identical to Skein_512_Init() when keyBytes == 0 && treeInfo == SKEIN_CFG_TREE_INFO_SEQUENTIAL] */ +static int Skein_512_InitExt(Skein_512_Ctxt_t *ctx,size_t hashBitLen,u64b_t treeInfo, const u08b_t *key, size_t keyBytes) + { + union + { + u08b_t b[SKEIN_512_STATE_BYTES]; + u64b_t w[SKEIN_512_STATE_WORDS]; + } cfg; /* config block */ + + Skein_Assert(hashBitLen > 0,SKEIN_BAD_HASHLEN); + Skein_Assert(keyBytes == 0 || key != NULL,SKEIN_FAIL); + + /* compute the initial chaining values ctx->X[], based on key */ + if (keyBytes == 0) /* is there a key? */ + { + memset(ctx->X,0,sizeof(ctx->X)); /* no key: use all zeroes as key for config block */ + } + else /* here to pre-process a key */ + { + Skein_assert(sizeof(cfg.b) >= sizeof(ctx->X)); + /* do a mini-Init right here */ + ctx->h.hashBitLen=8*sizeof(ctx->X); /* set output hash bit count = state size */ + Skein_Start_New_Type(ctx,KEY); /* set tweaks: T0 = 0; T1 = KEY type */ + memset(ctx->X,0,sizeof(ctx->X)); /* zero the initial chaining variables */ + Skein_512_Update(ctx,key,keyBytes); /* hash the key */ + Skein_512_Final_Pad(ctx,cfg.b); /* put result into cfg.b[] */ + memcpy(ctx->X,cfg.b,sizeof(cfg.b)); /* copy over into ctx->X[] */ +#if SKEIN_NEED_SWAP + { + uint_t i; + for (i=0;i<SKEIN_512_STATE_WORDS;i++) /* convert key bytes to context words */ + ctx->X[i] = Skein_Swap64(ctx->X[i]); + } +#endif + } + /* build/process the config block, type == CONFIG (could be precomputed for each key) */ + ctx->h.hashBitLen = hashBitLen; /* output hash bit count */ + Skein_Start_New_Type(ctx,CFG_FINAL); + + memset(&cfg.w,0,sizeof(cfg.w)); /* pre-pad cfg.w[] with zeroes */ + cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER); + cfg.w[1] = Skein_Swap64(hashBitLen); /* hash result length in bits */ + cfg.w[2] = Skein_Swap64(treeInfo); /* tree hash config info (or SKEIN_CFG_TREE_INFO_SEQUENTIAL) */ + + Skein_Show_Key(512,&ctx->h,key,keyBytes); + + /* compute the initial chaining values from config block */ + Skein_512_Process_Block(ctx,cfg.b,1,SKEIN_CFG_STR_LEN); + + /* The chaining vars ctx->X are now initialized */ + /* Set up to process the data message portion of the hash (default) */ + ctx->h.bCnt = 0; /* buffer b[] starts out empty */ + Skein_Start_New_Type(ctx,MSG); + + return SKEIN_SUCCESS; + } +#endif + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* process the input bytes */ +static int Skein_512_Update(Skein_512_Ctxt_t *ctx, const u08b_t *msg, size_t msgByteCnt) + { + size_t n; + + Skein_Assert(ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */ + + /* process full blocks, if any */ + if (msgByteCnt + ctx->h.bCnt > SKEIN_512_BLOCK_BYTES) + { + if (ctx->h.bCnt) /* finish up any buffered message data */ + { + n = SKEIN_512_BLOCK_BYTES - ctx->h.bCnt; /* # bytes free in buffer b[] */ + if (n) + { + Skein_assert(n < msgByteCnt); /* check on our logic here */ + memcpy(&ctx->b[ctx->h.bCnt],msg,n); + msgByteCnt -= n; + msg += n; + ctx->h.bCnt += n; + } + Skein_assert(ctx->h.bCnt == SKEIN_512_BLOCK_BYTES); + Skein_512_Process_Block(ctx,ctx->b,1,SKEIN_512_BLOCK_BYTES); + ctx->h.bCnt = 0; + } + /* now process any remaining full blocks, directly from input message data */ + if (msgByteCnt > SKEIN_512_BLOCK_BYTES) + { + n = (msgByteCnt-1) / SKEIN_512_BLOCK_BYTES; /* number of full blocks to process */ + Skein_512_Process_Block(ctx,msg,n,SKEIN_512_BLOCK_BYTES); + msgByteCnt -= n * SKEIN_512_BLOCK_BYTES; + msg += n * SKEIN_512_BLOCK_BYTES; + } + Skein_assert(ctx->h.bCnt == 0); + } + + /* copy any remaining source message data bytes into b[] */ + if (msgByteCnt) + { + Skein_assert(msgByteCnt + ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES); + memcpy(&ctx->b[ctx->h.bCnt],msg,msgByteCnt); + ctx->h.bCnt += msgByteCnt; + } + + return SKEIN_SUCCESS; + } + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* finalize the hash computation and output the result */ +static int Skein_512_Final(Skein_512_Ctxt_t *ctx, u08b_t *hashVal) + { + size_t i,n,byteCnt; + u64b_t X[SKEIN_512_STATE_WORDS]; + Skein_Assert(ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */ + + ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; /* tag as the final block */ + if (ctx->h.bCnt < SKEIN_512_BLOCK_BYTES) /* zero pad b[] if necessary */ + memset(&ctx->b[ctx->h.bCnt],0,SKEIN_512_BLOCK_BYTES - ctx->h.bCnt); + + Skein_512_Process_Block(ctx,ctx->b,1,ctx->h.bCnt); /* process the final block */ + + /* now output the result */ + byteCnt = (ctx->h.hashBitLen + 7) >> 3; /* total number of output bytes */ + + /* run Threefish in "counter mode" to generate output */ + memset(ctx->b,0,sizeof(ctx->b)); /* zero out b[], so it can hold the counter */ + memcpy(X,ctx->X,sizeof(X)); /* keep a local copy of counter mode "key" */ + for (i=0;i*SKEIN_512_BLOCK_BYTES < byteCnt;i++) + { + ((u64b_t *)ctx->b)[0]= Skein_Swap64((u64b_t) i); /* build the counter block */ + Skein_Start_New_Type(ctx,OUT_FINAL); + Skein_512_Process_Block(ctx,ctx->b,1,sizeof(u64b_t)); /* run "counter mode" */ + n = byteCnt - i*SKEIN_512_BLOCK_BYTES; /* number of output bytes left to go */ + if (n >= SKEIN_512_BLOCK_BYTES) + n = SKEIN_512_BLOCK_BYTES; + Skein_Put64_LSB_First(hashVal+i*SKEIN_512_BLOCK_BYTES,ctx->X,n); /* "output" the ctr mode bytes */ + Skein_Show_Final(512,&ctx->h,n,hashVal+i*SKEIN_512_BLOCK_BYTES); + memcpy(ctx->X,X,sizeof(X)); /* restore the counter mode key for next time */ + } + return SKEIN_SUCCESS; + } + +#if defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF) +static size_t Skein_512_API_CodeSize(void) + { + return ((u08b_t *) Skein_512_API_CodeSize) - + ((u08b_t *) Skein_512_Init); + } +#endif + +/*****************************************************************/ +/* 1024-bit Skein */ +/*****************************************************************/ +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* init the context for a straight hashing operation */ +static int Skein1024_Init(Skein1024_Ctxt_t *ctx, size_t hashBitLen) + { + union + { + u08b_t b[SKEIN1024_STATE_BYTES]; + u64b_t w[SKEIN1024_STATE_WORDS]; + } cfg; /* config block */ + + Skein_Assert(hashBitLen > 0,SKEIN_BAD_HASHLEN); + ctx->h.hashBitLen = hashBitLen; /* output hash bit count */ + + switch (hashBitLen) + { /* use pre-computed values, where available */ +#ifndef SKEIN_NO_PRECOMP + case 512: memcpy(ctx->X,SKEIN1024_IV_512 ,sizeof(ctx->X)); break; + case 384: memcpy(ctx->X,SKEIN1024_IV_384 ,sizeof(ctx->X)); break; + case 1024: memcpy(ctx->X,SKEIN1024_IV_1024,sizeof(ctx->X)); break; +#endif + default: + /* here if there is no precomputed IV value available */ + /* build/process the config block, type == CONFIG (could be precomputed) */ + Skein_Start_New_Type(ctx,CFG_FINAL); /* set tweaks: T0=0; T1=CFG | FINAL */ + + cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER); /* set the schema, version */ + cfg.w[1] = Skein_Swap64(hashBitLen); /* hash result length in bits */ + cfg.w[2] = Skein_Swap64(SKEIN_CFG_TREE_INFO_SEQUENTIAL); + memset(&cfg.w[3],0,sizeof(cfg) - 3*sizeof(cfg.w[0])); /* zero pad config block */ + + /* compute the initial chaining values from config block */ + memset(ctx->X,0,sizeof(ctx->X)); /* zero the chaining variables */ + Skein1024_Process_Block(ctx,cfg.b,1,SKEIN_CFG_STR_LEN); + break; + } + + /* The chaining vars ctx->X are now initialized for the given hashBitLen. */ + /* Set up to process the data message portion of the hash (default) */ + Skein_Start_New_Type(ctx,MSG); /* T0=0, T1= MSG type */ + + return SKEIN_SUCCESS; + } + +#if 0 +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* init the context for a MAC and/or tree hash operation */ +/* [identical to Skein1024_Init() when keyBytes == 0 && treeInfo == SKEIN_CFG_TREE_INFO_SEQUENTIAL] */ +static int Skein1024_InitExt(Skein1024_Ctxt_t *ctx,size_t hashBitLen,u64b_t treeInfo, const u08b_t *key, size_t keyBytes) + { + union + { + u08b_t b[SKEIN1024_STATE_BYTES]; + u64b_t w[SKEIN1024_STATE_WORDS]; + } cfg; /* config block */ + + Skein_Assert(hashBitLen > 0,SKEIN_BAD_HASHLEN); + Skein_Assert(keyBytes == 0 || key != NULL,SKEIN_FAIL); + + /* compute the initial chaining values ctx->X[], based on key */ + if (keyBytes == 0) /* is there a key? */ + { + memset(ctx->X,0,sizeof(ctx->X)); /* no key: use all zeroes as key for config block */ + } + else /* here to pre-process a key */ + { + Skein_assert(sizeof(cfg.b) >= sizeof(ctx->X)); + /* do a mini-Init right here */ + ctx->h.hashBitLen=8*sizeof(ctx->X); /* set output hash bit count = state size */ + Skein_Start_New_Type(ctx,KEY); /* set tweaks: T0 = 0; T1 = KEY type */ + memset(ctx->X,0,sizeof(ctx->X)); /* zero the initial chaining variables */ + Skein1024_Update(ctx,key,keyBytes); /* hash the key */ + Skein1024_Final_Pad(ctx,cfg.b); /* put result into cfg.b[] */ + memcpy(ctx->X,cfg.b,sizeof(cfg.b)); /* copy over into ctx->X[] */ +#if SKEIN_NEED_SWAP + { + uint_t i; + for (i=0;i<SKEIN1024_STATE_WORDS;i++) /* convert key bytes to context words */ + ctx->X[i] = Skein_Swap64(ctx->X[i]); + } +#endif + } + /* build/process the config block, type == CONFIG (could be precomputed for each key) */ + ctx->h.hashBitLen = hashBitLen; /* output hash bit count */ + Skein_Start_New_Type(ctx,CFG_FINAL); + + memset(&cfg.w,0,sizeof(cfg.w)); /* pre-pad cfg.w[] with zeroes */ + cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER); + cfg.w[1] = Skein_Swap64(hashBitLen); /* hash result length in bits */ + cfg.w[2] = Skein_Swap64(treeInfo); /* tree hash config info (or SKEIN_CFG_TREE_INFO_SEQUENTIAL) */ + + Skein_Show_Key(1024,&ctx->h,key,keyBytes); + + /* compute the initial chaining values from config block */ + Skein1024_Process_Block(ctx,cfg.b,1,SKEIN_CFG_STR_LEN); + + /* The chaining vars ctx->X are now initialized */ + /* Set up to process the data message portion of the hash (default) */ + ctx->h.bCnt = 0; /* buffer b[] starts out empty */ + Skein_Start_New_Type(ctx,MSG); + + return SKEIN_SUCCESS; + } +#endif + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* process the input bytes */ +static int Skein1024_Update(Skein1024_Ctxt_t *ctx, const u08b_t *msg, size_t msgByteCnt) + { + size_t n; + + Skein_Assert(ctx->h.bCnt <= SKEIN1024_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */ + + /* process full blocks, if any */ + if (msgByteCnt + ctx->h.bCnt > SKEIN1024_BLOCK_BYTES) + { + if (ctx->h.bCnt) /* finish up any buffered message data */ + { + n = SKEIN1024_BLOCK_BYTES - ctx->h.bCnt; /* # bytes free in buffer b[] */ + if (n) + { + Skein_assert(n < msgByteCnt); /* check on our logic here */ + memcpy(&ctx->b[ctx->h.bCnt],msg,n); + msgByteCnt -= n; + msg += n; + ctx->h.bCnt += n; + } + Skein_assert(ctx->h.bCnt == SKEIN1024_BLOCK_BYTES); + Skein1024_Process_Block(ctx,ctx->b,1,SKEIN1024_BLOCK_BYTES); + ctx->h.bCnt = 0; + } + /* now process any remaining full blocks, directly from input message data */ + if (msgByteCnt > SKEIN1024_BLOCK_BYTES) + { + n = (msgByteCnt-1) / SKEIN1024_BLOCK_BYTES; /* number of full blocks to process */ + Skein1024_Process_Block(ctx,msg,n,SKEIN1024_BLOCK_BYTES); + msgByteCnt -= n * SKEIN1024_BLOCK_BYTES; + msg += n * SKEIN1024_BLOCK_BYTES; + } + Skein_assert(ctx->h.bCnt == 0); + } + + /* copy any remaining source message data bytes into b[] */ + if (msgByteCnt) + { + Skein_assert(msgByteCnt + ctx->h.bCnt <= SKEIN1024_BLOCK_BYTES); + memcpy(&ctx->b[ctx->h.bCnt],msg,msgByteCnt); + ctx->h.bCnt += msgByteCnt; + } + + return SKEIN_SUCCESS; + } + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* finalize the hash computation and output the result */ +static int Skein1024_Final(Skein1024_Ctxt_t *ctx, u08b_t *hashVal) + { + size_t i,n,byteCnt; + u64b_t X[SKEIN1024_STATE_WORDS]; + Skein_Assert(ctx->h.bCnt <= SKEIN1024_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */ + + ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; /* tag as the final block */ + if (ctx->h.bCnt < SKEIN1024_BLOCK_BYTES) /* zero pad b[] if necessary */ + memset(&ctx->b[ctx->h.bCnt],0,SKEIN1024_BLOCK_BYTES - ctx->h.bCnt); + + Skein1024_Process_Block(ctx,ctx->b,1,ctx->h.bCnt); /* process the final block */ + + /* now output the result */ + byteCnt = (ctx->h.hashBitLen + 7) >> 3; /* total number of output bytes */ + + /* run Threefish in "counter mode" to generate output */ + memset(ctx->b,0,sizeof(ctx->b)); /* zero out b[], so it can hold the counter */ + memcpy(X,ctx->X,sizeof(X)); /* keep a local copy of counter mode "key" */ + for (i=0;i*SKEIN1024_BLOCK_BYTES < byteCnt;i++) + { + ((u64b_t *)ctx->b)[0]= Skein_Swap64((u64b_t) i); /* build the counter block */ + Skein_Start_New_Type(ctx,OUT_FINAL); + Skein1024_Process_Block(ctx,ctx->b,1,sizeof(u64b_t)); /* run "counter mode" */ + n = byteCnt - i*SKEIN1024_BLOCK_BYTES; /* number of output bytes left to go */ + if (n >= SKEIN1024_BLOCK_BYTES) + n = SKEIN1024_BLOCK_BYTES; + Skein_Put64_LSB_First(hashVal+i*SKEIN1024_BLOCK_BYTES,ctx->X,n); /* "output" the ctr mode bytes */ + Skein_Show_Final(1024,&ctx->h,n,hashVal+i*SKEIN1024_BLOCK_BYTES); + memcpy(ctx->X,X,sizeof(X)); /* restore the counter mode key for next time */ + } + return SKEIN_SUCCESS; + } + +#if defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF) +static size_t Skein1024_API_CodeSize(void) + { + return ((u08b_t *) Skein1024_API_CodeSize) - + ((u08b_t *) Skein1024_Init); + } +#endif + +/**************** Functions to support MAC/tree hashing ***************/ +/* (this code is identical for Optimized and Reference versions) */ + +#if 0 +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* finalize the hash computation and output the block, no OUTPUT stage */ +static int Skein_256_Final_Pad(Skein_256_Ctxt_t *ctx, u08b_t *hashVal) + { + Skein_Assert(ctx->h.bCnt <= SKEIN_256_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */ + + ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; /* tag as the final block */ + if (ctx->h.bCnt < SKEIN_256_BLOCK_BYTES) /* zero pad b[] if necessary */ + memset(&ctx->b[ctx->h.bCnt],0,SKEIN_256_BLOCK_BYTES - ctx->h.bCnt); + Skein_256_Process_Block(ctx,ctx->b,1,ctx->h.bCnt); /* process the final block */ + + Skein_Put64_LSB_First(hashVal,ctx->X,SKEIN_256_BLOCK_BYTES); /* "output" the state bytes */ + + return SKEIN_SUCCESS; + } + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* finalize the hash computation and output the block, no OUTPUT stage */ +static int Skein_512_Final_Pad(Skein_512_Ctxt_t *ctx, u08b_t *hashVal) + { + Skein_Assert(ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */ + + ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; /* tag as the final block */ + if (ctx->h.bCnt < SKEIN_512_BLOCK_BYTES) /* zero pad b[] if necessary */ + memset(&ctx->b[ctx->h.bCnt],0,SKEIN_512_BLOCK_BYTES - ctx->h.bCnt); + Skein_512_Process_Block(ctx,ctx->b,1,ctx->h.bCnt); /* process the final block */ + + Skein_Put64_LSB_First(hashVal,ctx->X,SKEIN_512_BLOCK_BYTES); /* "output" the state bytes */ + + return SKEIN_SUCCESS; + } + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* finalize the hash computation and output the block, no OUTPUT stage */ +static int Skein1024_Final_Pad(Skein1024_Ctxt_t *ctx, u08b_t *hashVal) + { + Skein_Assert(ctx->h.bCnt <= SKEIN1024_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */ + + ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; /* tag as the final block */ + if (ctx->h.bCnt < SKEIN1024_BLOCK_BYTES) /* zero pad b[] if necessary */ + memset(&ctx->b[ctx->h.bCnt],0,SKEIN1024_BLOCK_BYTES - ctx->h.bCnt); + Skein1024_Process_Block(ctx,ctx->b,1,ctx->h.bCnt); /* process the final block */ + + Skein_Put64_LSB_First(hashVal,ctx->X,SKEIN1024_BLOCK_BYTES); /* "output" the state bytes */ + + return SKEIN_SUCCESS; + } + + +#if SKEIN_TREE_HASH +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* just do the OUTPUT stage */ +static int Skein_256_Output(Skein_256_Ctxt_t *ctx, u08b_t *hashVal) + { + size_t i,n,byteCnt; + u64b_t X[SKEIN_256_STATE_WORDS]; + Skein_Assert(ctx->h.bCnt <= SKEIN_256_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */ + + /* now output the result */ + byteCnt = (ctx->h.hashBitLen + 7) >> 3; /* total number of output bytes */ + + /* run Threefish in "counter mode" to generate output */ + memset(ctx->b,0,sizeof(ctx->b)); /* zero out b[], so it can hold the counter */ + memcpy(X,ctx->X,sizeof(X)); /* keep a local copy of counter mode "key" */ + for (i=0;i*SKEIN_256_BLOCK_BYTES < byteCnt;i++) + { + ((u64b_t *)ctx->b)[0]= Skein_Swap64((u64b_t) i); /* build the counter block */ + Skein_Start_New_Type(ctx,OUT_FINAL); + Skein_256_Process_Block(ctx,ctx->b,1,sizeof(u64b_t)); /* run "counter mode" */ + n = byteCnt - i*SKEIN_256_BLOCK_BYTES; /* number of output bytes left to go */ + if (n >= SKEIN_256_BLOCK_BYTES) + n = SKEIN_256_BLOCK_BYTES; + Skein_Put64_LSB_First(hashVal+i*SKEIN_256_BLOCK_BYTES,ctx->X,n); /* "output" the ctr mode bytes */ + Skein_Show_Final(256,&ctx->h,n,hashVal+i*SKEIN_256_BLOCK_BYTES); + memcpy(ctx->X,X,sizeof(X)); /* restore the counter mode key for next time */ + } + return SKEIN_SUCCESS; + } + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* just do the OUTPUT stage */ +static int Skein_512_Output(Skein_512_Ctxt_t *ctx, u08b_t *hashVal) + { + size_t i,n,byteCnt; + u64b_t X[SKEIN_512_STATE_WORDS]; + Skein_Assert(ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */ + + /* now output the result */ + byteCnt = (ctx->h.hashBitLen + 7) >> 3; /* total number of output bytes */ + + /* run Threefish in "counter mode" to generate output */ + memset(ctx->b,0,sizeof(ctx->b)); /* zero out b[], so it can hold the counter */ + memcpy(X,ctx->X,sizeof(X)); /* keep a local copy of counter mode "key" */ + for (i=0;i*SKEIN_512_BLOCK_BYTES < byteCnt;i++) + { + ((u64b_t *)ctx->b)[0]= Skein_Swap64((u64b_t) i); /* build the counter block */ + Skein_Start_New_Type(ctx,OUT_FINAL); + Skein_512_Process_Block(ctx,ctx->b,1,sizeof(u64b_t)); /* run "counter mode" */ + n = byteCnt - i*SKEIN_512_BLOCK_BYTES; /* number of output bytes left to go */ + if (n >= SKEIN_512_BLOCK_BYTES) + n = SKEIN_512_BLOCK_BYTES; + Skein_Put64_LSB_First(hashVal+i*SKEIN_512_BLOCK_BYTES,ctx->X,n); /* "output" the ctr mode bytes */ + Skein_Show_Final(256,&ctx->h,n,hashVal+i*SKEIN_512_BLOCK_BYTES); + memcpy(ctx->X,X,sizeof(X)); /* restore the counter mode key for next time */ + } + return SKEIN_SUCCESS; + } + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* just do the OUTPUT stage */ +static int Skein1024_Output(Skein1024_Ctxt_t *ctx, u08b_t *hashVal) + { + size_t i,n,byteCnt; + u64b_t X[SKEIN1024_STATE_WORDS]; + Skein_Assert(ctx->h.bCnt <= SKEIN1024_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */ + + /* now output the result */ + byteCnt = (ctx->h.hashBitLen + 7) >> 3; /* total number of output bytes */ + + /* run Threefish in "counter mode" to generate output */ + memset(ctx->b,0,sizeof(ctx->b)); /* zero out b[], so it can hold the counter */ + memcpy(X,ctx->X,sizeof(X)); /* keep a local copy of counter mode "key" */ + for (i=0;i*SKEIN1024_BLOCK_BYTES < byteCnt;i++) + { + ((u64b_t *)ctx->b)[0]= Skein_Swap64((u64b_t) i); /* build the counter block */ + Skein_Start_New_Type(ctx,OUT_FINAL); + Skein1024_Process_Block(ctx,ctx->b,1,sizeof(u64b_t)); /* run "counter mode" */ + n = byteCnt - i*SKEIN1024_BLOCK_BYTES; /* number of output bytes left to go */ + if (n >= SKEIN1024_BLOCK_BYTES) + n = SKEIN1024_BLOCK_BYTES; + Skein_Put64_LSB_First(hashVal+i*SKEIN1024_BLOCK_BYTES,ctx->X,n); /* "output" the ctr mode bytes */ + Skein_Show_Final(256,&ctx->h,n,hashVal+i*SKEIN1024_BLOCK_BYTES); + memcpy(ctx->X,X,sizeof(X)); /* restore the counter mode key for next time */ + } + return SKEIN_SUCCESS; + } +#endif +#endif + +typedef struct +{ + uint_t statebits; /* 256, 512, or 1024 */ + union + { + Skein_Ctxt_Hdr_t h; /* common header "overlay" */ + Skein_256_Ctxt_t ctx_256; + Skein_512_Ctxt_t ctx_512; + Skein1024_Ctxt_t ctx1024; + } u; +} +hashState; + +/* "incremental" hashing API */ +static SkeinHashReturn Init (hashState *state, int hashbitlen); +static SkeinHashReturn Update(hashState *state, const SkeinBitSequence *data, SkeinDataLength databitlen); +static SkeinHashReturn Final (hashState *state, SkeinBitSequence *hashval); + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* select the context size and init the context */ +static SkeinHashReturn Init(hashState *state, int hashbitlen) +{ +#if SKEIN_256_NIST_MAX_HASH_BITS + if (hashbitlen <= SKEIN_256_NIST_MAX_HASHBITS) + { + Skein_Assert(hashbitlen > 0,BAD_HASHLEN); + state->statebits = 64*SKEIN_256_STATE_WORDS; + return Skein_256_Init(&state->u.ctx_256,(size_t) hashbitlen); + } +#endif + if (hashbitlen <= SKEIN_512_NIST_MAX_HASHBITS) + { + state->statebits = 64*SKEIN_512_STATE_WORDS; + return Skein_512_Init(&state->u.ctx_512,(size_t) hashbitlen); + } + else + { + state->statebits = 64*SKEIN1024_STATE_WORDS; + return Skein1024_Init(&state->u.ctx1024,(size_t) hashbitlen); + } +} + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* process data to be hashed */ +static SkeinHashReturn Update(hashState *state, const SkeinBitSequence *data, SkeinDataLength databitlen) +{ + /* only the final Update() call is allowed do partial bytes, else assert an error */ + Skein_Assert((state->u.h.T[1] & SKEIN_T1_FLAG_BIT_PAD) == 0 || databitlen == 0, SKEIN_FAIL); + + Skein_Assert(state->statebits % 256 == 0 && (state->statebits-256) < 1024,SKEIN_FAIL); + if ((databitlen & 7) == 0) /* partial bytes? */ + { + switch ((state->statebits >> 8) & 3) + { + case 2: return Skein_512_Update(&state->u.ctx_512,data,databitlen >> 3); + case 1: return Skein_256_Update(&state->u.ctx_256,data,databitlen >> 3); + case 0: return Skein1024_Update(&state->u.ctx1024,data,databitlen >> 3); + default: return SKEIN_FAIL; + } + } + else + { /* handle partial final byte */ + size_t bCnt = (databitlen >> 3) + 1; /* number of bytes to handle (nonzero here!) */ + u08b_t b,mask; + + mask = (u08b_t) (1u << (7 - (databitlen & 7))); /* partial byte bit mask */ + b = (u08b_t) ((data[bCnt-1] & (0-mask)) | mask); /* apply bit padding on final byte */ + + switch ((state->statebits >> 8) & 3) + { + case 2: Skein_512_Update(&state->u.ctx_512,data,bCnt-1); /* process all but the final byte */ + Skein_512_Update(&state->u.ctx_512,&b , 1 ); /* process the (masked) partial byte */ + break; + case 1: Skein_256_Update(&state->u.ctx_256,data,bCnt-1); /* process all but the final byte */ + Skein_256_Update(&state->u.ctx_256,&b , 1 ); /* process the (masked) partial byte */ + break; + case 0: Skein1024_Update(&state->u.ctx1024,data,bCnt-1); /* process all but the final byte */ + Skein1024_Update(&state->u.ctx1024,&b , 1 ); /* process the (masked) partial byte */ + break; + default: return SKEIN_FAIL; + } + Skein_Set_Bit_Pad_Flag(state->u.h); /* set tweak flag for the final call */ + + return SKEIN_SUCCESS; + } +} + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* finalize hash computation and output the result (hashbitlen bits) */ +static SkeinHashReturn Final(hashState *state, SkeinBitSequence *hashval) +{ + Skein_Assert(state->statebits % 256 == 0 && (state->statebits-256) < 1024,FAIL); + switch ((state->statebits >> 8) & 3) + { + case 2: return Skein_512_Final(&state->u.ctx_512,hashval); + case 1: return Skein_256_Final(&state->u.ctx_256,hashval); + case 0: return Skein1024_Final(&state->u.ctx1024,hashval); + default: return SKEIN_FAIL; + } +} + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* all-in-one hash function */ +SkeinHashReturn skein_hash(int hashbitlen, const SkeinBitSequence *data, /* all-in-one call */ + SkeinDataLength databitlen,SkeinBitSequence *hashval) +{ + hashState state; + SkeinHashReturn r = Init(&state,hashbitlen); + if (r == SKEIN_SUCCESS) + { /* these calls do not fail when called properly */ + r = Update(&state,data,databitlen); + Final(&state,hashval); + } + return r; +} diff --git a/xmrstak/backend/cpu/crypto/c_skein.h b/xmrstak/backend/cpu/crypto/c_skein.h new file mode 100644 index 0000000..6165a2a --- /dev/null +++ b/xmrstak/backend/cpu/crypto/c_skein.h @@ -0,0 +1,47 @@ +#ifndef _SKEIN_H_ +#define _SKEIN_H_ 1 +/************************************************************************** +** +** Interface declarations and internal definitions for Skein hashing. +** +** Source code author: Doug Whiting, 2008. +** +** This algorithm and source code is released to the public domain. +** +*************************************************************************** +** +** The following compile-time switches may be defined to control some +** tradeoffs between speed, code size, error checking, and security. +** +** The "default" note explains what happens when the switch is not defined. +** +** SKEIN_DEBUG -- make callouts from inside Skein code +** to examine/display intermediate values. +** [default: no callouts (no overhead)] +** +** SKEIN_ERR_CHECK -- how error checking is handled inside Skein +** code. If not defined, most error checking +** is disabled (for performance). Otherwise, +** the switch value is interpreted as: +** 0: use assert() to flag errors +** 1: return SKEIN_FAIL to flag errors +** +***************************************************************************/ +#include "skein_port.h" /* get platform-specific definitions */ + +typedef enum +{ + SKEIN_SUCCESS = 0, /* return codes from Skein calls */ + SKEIN_FAIL = 1, + SKEIN_BAD_HASHLEN = 2 +} +SkeinHashReturn; + +typedef size_t SkeinDataLength; /* bit count type */ +typedef u08b_t SkeinBitSequence; /* bit stream type */ + +/* "all-in-one" call */ +SkeinHashReturn skein_hash(int hashbitlen, const SkeinBitSequence *data, + SkeinDataLength databitlen, SkeinBitSequence *hashval); + +#endif /* ifndef _SKEIN_H_ */ diff --git a/xmrstak/backend/cpu/crypto/cryptonight.h b/xmrstak/backend/cpu/crypto/cryptonight.h new file mode 100644 index 0000000..978c798 --- /dev/null +++ b/xmrstak/backend/cpu/crypto/cryptonight.h @@ -0,0 +1,31 @@ +#ifndef __CRYPTONIGHT_H_INCLUDED +#define __CRYPTONIGHT_H_INCLUDED + +#ifdef __cplusplus +extern "C" { +#endif + +#include <stddef.h> +#include <inttypes.h> + +#define MEMORY 2097152 + +typedef struct { + uint8_t hash_state[224]; // Need only 200, explicit align + uint8_t* long_state; + uint8_t ctx_info[24]; //Use some of the extra memory for flags +} cryptonight_ctx; + +typedef struct { + const char* warning; +} alloc_msg; + +size_t cryptonight_init(size_t use_fast_mem, size_t use_mlock, alloc_msg* msg); +cryptonight_ctx* cryptonight_alloc_ctx(size_t use_fast_mem, size_t use_mlock, alloc_msg* msg); +void cryptonight_free_ctx(cryptonight_ctx* ctx); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/xmrstak/backend/cpu/crypto/cryptonight_aesni.h b/xmrstak/backend/cpu/crypto/cryptonight_aesni.h new file mode 100644 index 0000000..8bbb27c --- /dev/null +++ b/xmrstak/backend/cpu/crypto/cryptonight_aesni.h @@ -0,0 +1,457 @@ +/* + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + */ +#pragma once + +#include "cryptonight.h" +#include <memory.h> +#include <stdio.h> + +#ifdef __GNUC__ +#include <x86intrin.h> +static inline uint64_t _umul128(uint64_t a, uint64_t b, uint64_t* hi) +{ + unsigned __int128 r = (unsigned __int128)a * (unsigned __int128)b; + *hi = r >> 64; + return (uint64_t)r; +} +#define _mm256_set_m128i(v0, v1) _mm256_insertf128_si256(_mm256_castsi128_si256(v1), (v0), 1) +#else +#include <intrin.h> +#endif // __GNUC__ + +#if !defined(_LP64) && !defined(_WIN64) +#error You are trying to do a 32-bit build. This will all end in tears. I know it. +#endif + +extern "C" +{ + void keccak(const uint8_t *in, int inlen, uint8_t *md, int mdlen); + void keccakf(uint64_t st[25], int rounds); + extern void(*const extra_hashes[4])(const void *, size_t, char *); + + __m128i soft_aesenc(__m128i in, __m128i key); + __m128i soft_aeskeygenassist(__m128i key, uint8_t rcon); +} + +// This will shift and xor tmp1 into itself as 4 32-bit vals such as +// sl_xor(a1 a2 a3 a4) = a1 (a2^a1) (a3^a2^a1) (a4^a3^a2^a1) +static inline __m128i sl_xor(__m128i tmp1) +{ + __m128i tmp4; + tmp4 = _mm_slli_si128(tmp1, 0x04); + tmp1 = _mm_xor_si128(tmp1, tmp4); + tmp4 = _mm_slli_si128(tmp4, 0x04); + tmp1 = _mm_xor_si128(tmp1, tmp4); + tmp4 = _mm_slli_si128(tmp4, 0x04); + tmp1 = _mm_xor_si128(tmp1, tmp4); + return tmp1; +} + +template<uint8_t rcon> +static inline void aes_genkey_sub(__m128i* xout0, __m128i* xout2) +{ + __m128i xout1 = _mm_aeskeygenassist_si128(*xout2, rcon); + xout1 = _mm_shuffle_epi32(xout1, 0xFF); // see PSHUFD, set all elems to 4th elem + *xout0 = sl_xor(*xout0); + *xout0 = _mm_xor_si128(*xout0, xout1); + xout1 = _mm_aeskeygenassist_si128(*xout0, 0x00); + xout1 = _mm_shuffle_epi32(xout1, 0xAA); // see PSHUFD, set all elems to 3rd elem + *xout2 = sl_xor(*xout2); + *xout2 = _mm_xor_si128(*xout2, xout1); +} + +static inline void soft_aes_genkey_sub(__m128i* xout0, __m128i* xout2, uint8_t rcon) +{ + __m128i xout1 = soft_aeskeygenassist(*xout2, rcon); + xout1 = _mm_shuffle_epi32(xout1, 0xFF); // see PSHUFD, set all elems to 4th elem + *xout0 = sl_xor(*xout0); + *xout0 = _mm_xor_si128(*xout0, xout1); + xout1 = soft_aeskeygenassist(*xout0, 0x00); + xout1 = _mm_shuffle_epi32(xout1, 0xAA); // see PSHUFD, set all elems to 3rd elem + *xout2 = sl_xor(*xout2); + *xout2 = _mm_xor_si128(*xout2, xout1); +} + +template<bool SOFT_AES> +static inline void aes_genkey(const __m128i* memory, __m128i* k0, __m128i* k1, __m128i* k2, __m128i* k3, + __m128i* k4, __m128i* k5, __m128i* k6, __m128i* k7, __m128i* k8, __m128i* k9) +{ + __m128i xout0, xout2; + + xout0 = _mm_load_si128(memory); + xout2 = _mm_load_si128(memory+1); + *k0 = xout0; + *k1 = xout2; + + if(SOFT_AES) + soft_aes_genkey_sub(&xout0, &xout2, 0x01); + else + aes_genkey_sub<0x01>(&xout0, &xout2); + *k2 = xout0; + *k3 = xout2; + + if(SOFT_AES) + soft_aes_genkey_sub(&xout0, &xout2, 0x02); + else + aes_genkey_sub<0x02>(&xout0, &xout2); + *k4 = xout0; + *k5 = xout2; + + if(SOFT_AES) + soft_aes_genkey_sub(&xout0, &xout2, 0x04); + else + aes_genkey_sub<0x04>(&xout0, &xout2); + *k6 = xout0; + *k7 = xout2; + + if(SOFT_AES) + soft_aes_genkey_sub(&xout0, &xout2, 0x08); + else + aes_genkey_sub<0x08>(&xout0, &xout2); + *k8 = xout0; + *k9 = xout2; +} + +static inline void aes_round(__m128i key, __m128i* x0, __m128i* x1, __m128i* x2, __m128i* x3, __m128i* x4, __m128i* x5, __m128i* x6, __m128i* x7) +{ + *x0 = _mm_aesenc_si128(*x0, key); + *x1 = _mm_aesenc_si128(*x1, key); + *x2 = _mm_aesenc_si128(*x2, key); + *x3 = _mm_aesenc_si128(*x3, key); + *x4 = _mm_aesenc_si128(*x4, key); + *x5 = _mm_aesenc_si128(*x5, key); + *x6 = _mm_aesenc_si128(*x6, key); + *x7 = _mm_aesenc_si128(*x7, key); +} + +static inline void soft_aes_round(__m128i key, __m128i* x0, __m128i* x1, __m128i* x2, __m128i* x3, __m128i* x4, __m128i* x5, __m128i* x6, __m128i* x7) +{ + *x0 = soft_aesenc(*x0, key); + *x1 = soft_aesenc(*x1, key); + *x2 = soft_aesenc(*x2, key); + *x3 = soft_aesenc(*x3, key); + *x4 = soft_aesenc(*x4, key); + *x5 = soft_aesenc(*x5, key); + *x6 = soft_aesenc(*x6, key); + *x7 = soft_aesenc(*x7, key); +} + +template<size_t MEM, bool SOFT_AES, bool PREFETCH> +void cn_explode_scratchpad(const __m128i* input, __m128i* output) +{ + // This is more than we have registers, compiler will assign 2 keys on the stack + __m128i xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7; + __m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9; + + aes_genkey<SOFT_AES>(input, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9); + + xin0 = _mm_load_si128(input + 4); + xin1 = _mm_load_si128(input + 5); + xin2 = _mm_load_si128(input + 6); + xin3 = _mm_load_si128(input + 7); + xin4 = _mm_load_si128(input + 8); + xin5 = _mm_load_si128(input + 9); + xin6 = _mm_load_si128(input + 10); + xin7 = _mm_load_si128(input + 11); + + for (size_t i = 0; i < MEM / sizeof(__m128i); i += 8) + { + if(SOFT_AES) + { + soft_aes_round(k0, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); + soft_aes_round(k1, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); + soft_aes_round(k2, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); + soft_aes_round(k3, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); + soft_aes_round(k4, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); + soft_aes_round(k5, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); + soft_aes_round(k6, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); + soft_aes_round(k7, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); + soft_aes_round(k8, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); + soft_aes_round(k9, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); + } + else + { + aes_round(k0, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); + aes_round(k1, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); + aes_round(k2, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); + aes_round(k3, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); + aes_round(k4, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); + aes_round(k5, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); + aes_round(k6, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); + aes_round(k7, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); + aes_round(k8, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); + aes_round(k9, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); + } + + _mm_store_si128(output + i + 0, xin0); + _mm_store_si128(output + i + 1, xin1); + _mm_store_si128(output + i + 2, xin2); + _mm_store_si128(output + i + 3, xin3); + + if(PREFETCH) + _mm_prefetch((const char*)output + i + 0, _MM_HINT_T2); + + _mm_store_si128(output + i + 4, xin4); + _mm_store_si128(output + i + 5, xin5); + _mm_store_si128(output + i + 6, xin6); + _mm_store_si128(output + i + 7, xin7); + + if(PREFETCH) + _mm_prefetch((const char*)output + i + 4, _MM_HINT_T2); + } +} + +template<size_t MEM, bool SOFT_AES, bool PREFETCH> +void cn_implode_scratchpad(const __m128i* input, __m128i* output) +{ + // This is more than we have registers, compiler will assign 2 keys on the stack + __m128i xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7; + __m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9; + + aes_genkey<SOFT_AES>(output + 2, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9); + + xout0 = _mm_load_si128(output + 4); + xout1 = _mm_load_si128(output + 5); + xout2 = _mm_load_si128(output + 6); + xout3 = _mm_load_si128(output + 7); + xout4 = _mm_load_si128(output + 8); + xout5 = _mm_load_si128(output + 9); + xout6 = _mm_load_si128(output + 10); + xout7 = _mm_load_si128(output + 11); + + for (size_t i = 0; i < MEM / sizeof(__m128i); i += 8) + { + if(PREFETCH) + _mm_prefetch((const char*)input + i + 0, _MM_HINT_NTA); + + xout0 = _mm_xor_si128(_mm_load_si128(input + i + 0), xout0); + xout1 = _mm_xor_si128(_mm_load_si128(input + i + 1), xout1); + xout2 = _mm_xor_si128(_mm_load_si128(input + i + 2), xout2); + xout3 = _mm_xor_si128(_mm_load_si128(input + i + 3), xout3); + + if(PREFETCH) + _mm_prefetch((const char*)input + i + 4, _MM_HINT_NTA); + + xout4 = _mm_xor_si128(_mm_load_si128(input + i + 4), xout4); + xout5 = _mm_xor_si128(_mm_load_si128(input + i + 5), xout5); + xout6 = _mm_xor_si128(_mm_load_si128(input + i + 6), xout6); + xout7 = _mm_xor_si128(_mm_load_si128(input + i + 7), xout7); + + if(SOFT_AES) + { + soft_aes_round(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); + soft_aes_round(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); + soft_aes_round(k2, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); + soft_aes_round(k3, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); + soft_aes_round(k4, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); + soft_aes_round(k5, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); + soft_aes_round(k6, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); + soft_aes_round(k7, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); + soft_aes_round(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); + soft_aes_round(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); + } + else + { + aes_round(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); + aes_round(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); + aes_round(k2, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); + aes_round(k3, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); + aes_round(k4, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); + aes_round(k5, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); + aes_round(k6, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); + aes_round(k7, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); + aes_round(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); + aes_round(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); + } + } + + _mm_store_si128(output + 4, xout0); + _mm_store_si128(output + 5, xout1); + _mm_store_si128(output + 6, xout2); + _mm_store_si128(output + 7, xout3); + _mm_store_si128(output + 8, xout4); + _mm_store_si128(output + 9, xout5); + _mm_store_si128(output + 10, xout6); + _mm_store_si128(output + 11, xout7); +} + +template<size_t ITERATIONS, size_t MEM, bool SOFT_AES, bool PREFETCH> +void cryptonight_hash(const void* input, size_t len, void* output, cryptonight_ctx* ctx0) +{ + keccak((const uint8_t *)input, len, ctx0->hash_state, 200); + + // Optim - 99% time boundary + cn_explode_scratchpad<MEM, SOFT_AES, PREFETCH>((__m128i*)ctx0->hash_state, (__m128i*)ctx0->long_state); + + uint8_t* l0 = ctx0->long_state; + uint64_t* h0 = (uint64_t*)ctx0->hash_state; + + uint64_t al0 = h0[0] ^ h0[4]; + uint64_t ah0 = h0[1] ^ h0[5]; + __m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]); + + uint64_t idx0 = h0[0] ^ h0[4]; + + // Optim - 90% time boundary + for(size_t i = 0; i < ITERATIONS; i++) + { + __m128i cx; + cx = _mm_load_si128((__m128i *)&l0[idx0 & 0x1FFFF0]); + + if(SOFT_AES) + cx = soft_aesenc(cx, _mm_set_epi64x(ah0, al0)); + else + cx = _mm_aesenc_si128(cx, _mm_set_epi64x(ah0, al0)); + + _mm_store_si128((__m128i *)&l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx0, cx)); + idx0 = _mm_cvtsi128_si64(cx); + bx0 = cx; + + if(PREFETCH) + _mm_prefetch((const char*)&l0[idx0 & 0x1FFFF0], _MM_HINT_T0); + + uint64_t hi, lo, cl, ch; + cl = ((uint64_t*)&l0[idx0 & 0x1FFFF0])[0]; + ch = ((uint64_t*)&l0[idx0 & 0x1FFFF0])[1]; + + lo = _umul128(idx0, cl, &hi); + + al0 += hi; + ah0 += lo; + ((uint64_t*)&l0[idx0 & 0x1FFFF0])[0] = al0; + ((uint64_t*)&l0[idx0 & 0x1FFFF0])[1] = ah0; + ah0 ^= ch; + al0 ^= cl; + idx0 = al0; + + if(PREFETCH) + _mm_prefetch((const char*)&l0[idx0 & 0x1FFFF0], _MM_HINT_T0); + } + + // Optim - 90% time boundary + cn_implode_scratchpad<MEM, SOFT_AES, PREFETCH>((__m128i*)ctx0->long_state, (__m128i*)ctx0->hash_state); + + // Optim - 99% time boundary + + keccakf((uint64_t*)ctx0->hash_state, 24); + extra_hashes[ctx0->hash_state[0] & 3](ctx0->hash_state, 200, (char*)output); +} + +// This lovely creation will do 2 cn hashes at a time. We have plenty of space on silicon +// to fit temporary vars for two contexts. Function will read len*2 from input and write 64 bytes to output +// We are still limited by L3 cache, so doubling will only work with CPUs where we have more than 2MB to core (Xeons) +template<size_t ITERATIONS, size_t MEM, bool SOFT_AES, bool PREFETCH> +void cryptonight_double_hash(const void* input, size_t len, void* output, cryptonight_ctx* __restrict ctx0, cryptonight_ctx* __restrict ctx1) +{ + keccak((const uint8_t *)input, len, ctx0->hash_state, 200); + keccak((const uint8_t *)input+len, len, ctx1->hash_state, 200); + + // Optim - 99% time boundary + cn_explode_scratchpad<MEM, SOFT_AES, PREFETCH>((__m128i*)ctx0->hash_state, (__m128i*)ctx0->long_state); + cn_explode_scratchpad<MEM, SOFT_AES, PREFETCH>((__m128i*)ctx1->hash_state, (__m128i*)ctx1->long_state); + + uint8_t* l0 = ctx0->long_state; + uint64_t* h0 = (uint64_t*)ctx0->hash_state; + uint8_t* l1 = ctx1->long_state; + uint64_t* h1 = (uint64_t*)ctx1->hash_state; + + uint64_t axl0 = h0[0] ^ h0[4]; + uint64_t axh0 = h0[1] ^ h0[5]; + __m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]); + uint64_t axl1 = h1[0] ^ h1[4]; + uint64_t axh1 = h1[1] ^ h1[5]; + __m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]); + + uint64_t idx0 = h0[0] ^ h0[4]; + uint64_t idx1 = h1[0] ^ h1[4]; + + // Optim - 90% time boundary + for (size_t i = 0; i < ITERATIONS; i++) + { + __m128i cx; + cx = _mm_load_si128((__m128i *)&l0[idx0 & 0x1FFFF0]); + + if(SOFT_AES) + cx = soft_aesenc(cx, _mm_set_epi64x(axh0, axl0)); + else + cx = _mm_aesenc_si128(cx, _mm_set_epi64x(axh0, axl0)); + + _mm_store_si128((__m128i *)&l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx0, cx)); + idx0 = _mm_cvtsi128_si64(cx); + bx0 = cx; + + if(PREFETCH) + _mm_prefetch((const char*)&l0[idx0 & 0x1FFFF0], _MM_HINT_T0); + + cx = _mm_load_si128((__m128i *)&l1[idx1 & 0x1FFFF0]); + + if(SOFT_AES) + cx = soft_aesenc(cx, _mm_set_epi64x(axh1, axl1)); + else + cx = _mm_aesenc_si128(cx, _mm_set_epi64x(axh1, axl1)); + + _mm_store_si128((__m128i *)&l1[idx1 & 0x1FFFF0], _mm_xor_si128(bx1, cx)); + idx1 = _mm_cvtsi128_si64(cx); + bx1 = cx; + + if(PREFETCH) + _mm_prefetch((const char*)&l1[idx1 & 0x1FFFF0], _MM_HINT_T0); + + uint64_t hi, lo, cl, ch; + cl = ((uint64_t*)&l0[idx0 & 0x1FFFF0])[0]; + ch = ((uint64_t*)&l0[idx0 & 0x1FFFF0])[1]; + + lo = _umul128(idx0, cl, &hi); + + axl0 += hi; + axh0 += lo; + ((uint64_t*)&l0[idx0 & 0x1FFFF0])[0] = axl0; + ((uint64_t*)&l0[idx0 & 0x1FFFF0])[1] = axh0; + axh0 ^= ch; + axl0 ^= cl; + idx0 = axl0; + + if(PREFETCH) + _mm_prefetch((const char*)&l0[idx0 & 0x1FFFF0], _MM_HINT_T0); + + cl = ((uint64_t*)&l1[idx1 & 0x1FFFF0])[0]; + ch = ((uint64_t*)&l1[idx1 & 0x1FFFF0])[1]; + + lo = _umul128(idx1, cl, &hi); + + axl1 += hi; + axh1 += lo; + ((uint64_t*)&l1[idx1 & 0x1FFFF0])[0] = axl1; + ((uint64_t*)&l1[idx1 & 0x1FFFF0])[1] = axh1; + axh1 ^= ch; + axl1 ^= cl; + idx1 = axl1; + + if(PREFETCH) + _mm_prefetch((const char*)&l1[idx1 & 0x1FFFF0], _MM_HINT_T0); + } + + // Optim - 90% time boundary + cn_implode_scratchpad<MEM, SOFT_AES, PREFETCH>((__m128i*)ctx0->long_state, (__m128i*)ctx0->hash_state); + cn_implode_scratchpad<MEM, SOFT_AES, PREFETCH>((__m128i*)ctx1->long_state, (__m128i*)ctx1->hash_state); + + // Optim - 99% time boundary + + keccakf((uint64_t*)ctx0->hash_state, 24); + extra_hashes[ctx0->hash_state[0] & 3](ctx0->hash_state, 200, (char*)output); + keccakf((uint64_t*)ctx1->hash_state, 24); + extra_hashes[ctx1->hash_state[0] & 3](ctx1->hash_state, 200, (char*)output + 32); +} diff --git a/xmrstak/backend/cpu/crypto/cryptonight_common.cpp b/xmrstak/backend/cpu/crypto/cryptonight_common.cpp new file mode 100644 index 0000000..9d03ed7 --- /dev/null +++ b/xmrstak/backend/cpu/crypto/cryptonight_common.cpp @@ -0,0 +1,195 @@ +/* + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * Additional permission under GNU GPL version 3 section 7 + * + * If you modify this Program, or any covered work, by linking or combining + * it with OpenSSL (or a modified version of that library), containing parts + * covered by the terms of OpenSSL License and SSLeay License, the licensors + * of this Program grant you additional permission to convey the resulting work. + * + */ + +extern "C" +{ +#include "c_groestl.h" +#include "c_blake256.h" +#include "c_jh.h" +#include "c_skein.h" +} +#include "cryptonight.h" +#include "cryptonight_aesni.h" +#include <stdio.h> +#include <stdlib.h> + +#ifdef __GNUC__ +#include <mm_malloc.h> +#else +#include <malloc.h> +#endif // __GNUC__ + +#if defined(__APPLE__) +#include <mach/vm_statistics.h> +#endif + +#ifdef _WIN32 +#include <windows.h> +#else +#include <sys/mman.h> +#include <errno.h> +#include <string.h> +#endif // _WIN32 + +void do_blake_hash(const void* input, size_t len, char* output) { + blake256_hash((uint8_t*)output, (const uint8_t*)input, len); +} + +void do_groestl_hash(const void* input, size_t len, char* output) { + groestl((const uint8_t*)input, len * 8, (uint8_t*)output); +} + +void do_jh_hash(const void* input, size_t len, char* output) { + jh_hash(32 * 8, (const uint8_t*)input, 8 * len, (uint8_t*)output); +} + +void do_skein_hash(const void* input, size_t len, char* output) { + skein_hash(8 * 32, (const uint8_t*)input, 8 * len, (uint8_t*)output); +} + +void (* const extra_hashes[4])(const void *, size_t, char *) = {do_blake_hash, do_groestl_hash, do_jh_hash, do_skein_hash}; + +#ifdef _WIN32 +BOOL AddPrivilege(TCHAR* pszPrivilege) +{ + HANDLE hToken; + TOKEN_PRIVILEGES tp; + BOOL status; + + if (!OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &hToken)) + return FALSE; + + if (!LookupPrivilegeValue(NULL, pszPrivilege, &tp.Privileges[0].Luid)) + return FALSE; + + tp.PrivilegeCount = 1; + tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED; + status = AdjustTokenPrivileges(hToken, FALSE, &tp, 0, (PTOKEN_PRIVILEGES)NULL, 0); + + if (!status || (GetLastError() != ERROR_SUCCESS)) + return FALSE; + + CloseHandle(hToken); + return TRUE; +} +#endif + +size_t cryptonight_init(size_t use_fast_mem, size_t use_mlock, alloc_msg* msg) +{ +#ifdef _WIN32 + if (AddPrivilege(TEXT("SeLockMemoryPrivilege")) == 0) + { + msg->warning = "Obtaning SeLockMemoryPrivilege failed."; + return 0; + } + return 1; +#else + return 1; +#endif // _WIN32 +} + +cryptonight_ctx* cryptonight_alloc_ctx(size_t use_fast_mem, size_t use_mlock, alloc_msg* msg) +{ + cryptonight_ctx* ptr = (cryptonight_ctx*)_mm_malloc(sizeof(cryptonight_ctx), 4096); + + if(use_fast_mem == 0) + { + // use 2MiB aligned memory + ptr->long_state = (uint8_t*)_mm_malloc(MEMORY, 2*1024*1024); + ptr->ctx_info[0] = 0; + ptr->ctx_info[1] = 0; + return ptr; + } + +#ifdef _WIN32 + SIZE_T iLargePageMin = GetLargePageMinimum(); + + if(MEMORY > iLargePageMin) + iLargePageMin *= 2; + + ptr->long_state = (uint8_t*)VirtualAlloc(NULL, iLargePageMin, + MEM_COMMIT | MEM_RESERVE | MEM_LARGE_PAGES, PAGE_READWRITE); + + if(ptr->long_state == NULL) + { + _mm_free(ptr); + msg->warning = "VirtualAlloc failed."; + return NULL; + } + else + { + ptr->ctx_info[0] = 1; + return ptr; + } +#else + +#if defined(__APPLE__) + ptr->long_state = (uint8_t*)mmap(0, MEMORY, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANON, VM_FLAGS_SUPERPAGE_SIZE_2MB, 0); +#elif defined(__FreeBSD__) + ptr->long_state = (uint8_t*)mmap(0, MEMORY, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_ALIGNED_SUPER | MAP_PREFAULT_READ, -1, 0); +#else + ptr->long_state = (uint8_t*)mmap(0, MEMORY, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_POPULATE, 0, 0); +#endif + + if (ptr->long_state == MAP_FAILED) + { + _mm_free(ptr); + msg->warning = "mmap failed"; + return NULL; + } + + ptr->ctx_info[0] = 1; + + if(madvise(ptr->long_state, MEMORY, MADV_RANDOM|MADV_WILLNEED) != 0) + msg->warning = "madvise failed"; + + ptr->ctx_info[1] = 0; + if(use_mlock != 0 && mlock(ptr->long_state, MEMORY) != 0) + msg->warning = "mlock failed"; + else + ptr->ctx_info[1] = 1; + + return ptr; +#endif // _WIN32 +} + +void cryptonight_free_ctx(cryptonight_ctx* ctx) +{ + if(ctx->ctx_info[0] != 0) + { +#ifdef _WIN32 + VirtualFree(ctx->long_state, 0, MEM_RELEASE); +#else + if(ctx->ctx_info[1] != 0) + munlock(ctx->long_state, MEMORY); + munmap(ctx->long_state, MEMORY); +#endif // _WIN32 + } + else + _mm_free(ctx->long_state); + + _mm_free(ctx); +} diff --git a/xmrstak/backend/cpu/crypto/groestl_tables.h b/xmrstak/backend/cpu/crypto/groestl_tables.h new file mode 100644 index 0000000..a23295c --- /dev/null +++ b/xmrstak/backend/cpu/crypto/groestl_tables.h @@ -0,0 +1,38 @@ +#ifndef __tables_h +#define __tables_h + + +const uint32_t T[512] = {0xa5f432c6, 0xc6a597f4, 0x84976ff8, 0xf884eb97, 0x99b05eee, 0xee99c7b0, 0x8d8c7af6, 0xf68df78c, 0xd17e8ff, 0xff0de517, 0xbddc0ad6, 0xd6bdb7dc, 0xb1c816de, 0xdeb1a7c8, 0x54fc6d91, 0x915439fc +, 0x50f09060, 0x6050c0f0, 0x3050702, 0x2030405, 0xa9e02ece, 0xcea987e0, 0x7d87d156, 0x567dac87, 0x192bcce7, 0xe719d52b, 0x62a613b5, 0xb56271a6, 0xe6317c4d, 0x4de69a31, 0x9ab559ec, 0xec9ac3b5 +, 0x45cf408f, 0x8f4505cf, 0x9dbca31f, 0x1f9d3ebc, 0x40c04989, 0x894009c0, 0x879268fa, 0xfa87ef92, 0x153fd0ef, 0xef15c53f, 0xeb2694b2, 0xb2eb7f26, 0xc940ce8e, 0x8ec90740, 0xb1de6fb, 0xfb0bed1d +, 0xec2f6e41, 0x41ec822f, 0x67a91ab3, 0xb3677da9, 0xfd1c435f, 0x5ffdbe1c, 0xea256045, 0x45ea8a25, 0xbfdaf923, 0x23bf46da, 0xf7025153, 0x53f7a602, 0x96a145e4, 0xe496d3a1, 0x5bed769b, 0x9b5b2ded +, 0xc25d2875, 0x75c2ea5d, 0x1c24c5e1, 0xe11cd924, 0xaee9d43d, 0x3dae7ae9, 0x6abef24c, 0x4c6a98be, 0x5aee826c, 0x6c5ad8ee, 0x41c3bd7e, 0x7e41fcc3, 0x206f3f5, 0xf502f106, 0x4fd15283, 0x834f1dd1 +, 0x5ce48c68, 0x685cd0e4, 0xf4075651, 0x51f4a207, 0x345c8dd1, 0xd134b95c, 0x818e1f9, 0xf908e918, 0x93ae4ce2, 0xe293dfae, 0x73953eab, 0xab734d95, 0x53f59762, 0x6253c4f5, 0x3f416b2a, 0x2a3f5441 +, 0xc141c08, 0x80c1014, 0x52f66395, 0x955231f6, 0x65afe946, 0x46658caf, 0x5ee27f9d, 0x9d5e21e2, 0x28784830, 0x30286078, 0xa1f8cf37, 0x37a16ef8, 0xf111b0a, 0xa0f1411, 0xb5c4eb2f, 0x2fb55ec4 +, 0x91b150e, 0xe091c1b, 0x365a7e24, 0x2436485a, 0x9bb6ad1b, 0x1b9b36b6, 0x3d4798df, 0xdf3da547, 0x266aa7cd, 0xcd26816a, 0x69bbf54e, 0x4e699cbb, 0xcd4c337f, 0x7fcdfe4c, 0x9fba50ea, 0xea9fcfba +, 0x1b2d3f12, 0x121b242d, 0x9eb9a41d, 0x1d9e3ab9, 0x749cc458, 0x5874b09c, 0x2e724634, 0x342e6872, 0x2d774136, 0x362d6c77, 0xb2cd11dc, 0xdcb2a3cd, 0xee299db4, 0xb4ee7329, 0xfb164d5b, 0x5bfbb616 +, 0xf601a5a4, 0xa4f65301, 0x4dd7a176, 0x764decd7, 0x61a314b7, 0xb76175a3, 0xce49347d, 0x7dcefa49, 0x7b8ddf52, 0x527ba48d, 0x3e429fdd, 0xdd3ea142, 0x7193cd5e, 0x5e71bc93, 0x97a2b113, 0x139726a2 +, 0xf504a2a6, 0xa6f55704, 0x68b801b9, 0xb96869b8, 0x0, 0x0, 0x2c74b5c1, 0xc12c9974, 0x60a0e040, 0x406080a0, 0x1f21c2e3, 0xe31fdd21, 0xc8433a79, 0x79c8f243, 0xed2c9ab6, 0xb6ed772c +, 0xbed90dd4, 0xd4beb3d9, 0x46ca478d, 0x8d4601ca, 0xd9701767, 0x67d9ce70, 0x4bddaf72, 0x724be4dd, 0xde79ed94, 0x94de3379, 0xd467ff98, 0x98d42b67, 0xe82393b0, 0xb0e87b23, 0x4ade5b85, 0x854a11de +, 0x6bbd06bb, 0xbb6b6dbd, 0x2a7ebbc5, 0xc52a917e, 0xe5347b4f, 0x4fe59e34, 0x163ad7ed, 0xed16c13a, 0xc554d286, 0x86c51754, 0xd762f89a, 0x9ad72f62, 0x55ff9966, 0x6655ccff, 0x94a7b611, 0x119422a7 +, 0xcf4ac08a, 0x8acf0f4a, 0x1030d9e9, 0xe910c930, 0x60a0e04, 0x406080a, 0x819866fe, 0xfe81e798, 0xf00baba0, 0xa0f05b0b, 0x44ccb478, 0x7844f0cc, 0xbad5f025, 0x25ba4ad5, 0xe33e754b, 0x4be3963e +, 0xf30eaca2, 0xa2f35f0e, 0xfe19445d, 0x5dfeba19, 0xc05bdb80, 0x80c01b5b, 0x8a858005, 0x58a0a85, 0xadecd33f, 0x3fad7eec, 0xbcdffe21, 0x21bc42df, 0x48d8a870, 0x7048e0d8, 0x40cfdf1, 0xf104f90c +, 0xdf7a1963, 0x63dfc67a, 0xc1582f77, 0x77c1ee58, 0x759f30af, 0xaf75459f, 0x63a5e742, 0x426384a5, 0x30507020, 0x20304050, 0x1a2ecbe5, 0xe51ad12e, 0xe12effd, 0xfd0ee112, 0x6db708bf, 0xbf6d65b7 +, 0x4cd45581, 0x814c19d4, 0x143c2418, 0x1814303c, 0x355f7926, 0x26354c5f, 0x2f71b2c3, 0xc32f9d71, 0xe13886be, 0xbee16738, 0xa2fdc835, 0x35a26afd, 0xcc4fc788, 0x88cc0b4f, 0x394b652e, 0x2e395c4b +, 0x57f96a93, 0x93573df9, 0xf20d5855, 0x55f2aa0d, 0x829d61fc, 0xfc82e39d, 0x47c9b37a, 0x7a47f4c9, 0xacef27c8, 0xc8ac8bef, 0xe73288ba, 0xbae76f32, 0x2b7d4f32, 0x322b647d, 0x95a442e6, 0xe695d7a4 +, 0xa0fb3bc0, 0xc0a09bfb, 0x98b3aa19, 0x199832b3, 0xd168f69e, 0x9ed12768, 0x7f8122a3, 0xa37f5d81, 0x66aaee44, 0x446688aa, 0x7e82d654, 0x547ea882, 0xabe6dd3b, 0x3bab76e6, 0x839e950b, 0xb83169e +, 0xca45c98c, 0x8cca0345, 0x297bbcc7, 0xc729957b, 0xd36e056b, 0x6bd3d66e, 0x3c446c28, 0x283c5044, 0x798b2ca7, 0xa779558b, 0xe23d81bc, 0xbce2633d, 0x1d273116, 0x161d2c27, 0x769a37ad, 0xad76419a +, 0x3b4d96db, 0xdb3bad4d, 0x56fa9e64, 0x6456c8fa, 0x4ed2a674, 0x744ee8d2, 0x1e223614, 0x141e2822, 0xdb76e492, 0x92db3f76, 0xa1e120c, 0xc0a181e, 0x6cb4fc48, 0x486c90b4, 0xe4378fb8, 0xb8e46b37 +, 0x5de7789f, 0x9f5d25e7, 0x6eb20fbd, 0xbd6e61b2, 0xef2a6943, 0x43ef862a, 0xa6f135c4, 0xc4a693f1, 0xa8e3da39, 0x39a872e3, 0xa4f7c631, 0x31a462f7, 0x37598ad3, 0xd337bd59, 0x8b8674f2, 0xf28bff86 +, 0x325683d5, 0xd532b156, 0x43c54e8b, 0x8b430dc5, 0x59eb856e, 0x6e59dceb, 0xb7c218da, 0xdab7afc2, 0x8c8f8e01, 0x18c028f, 0x64ac1db1, 0xb16479ac, 0xd26df19c, 0x9cd2236d, 0xe03b7249, 0x49e0923b +, 0xb4c71fd8, 0xd8b4abc7, 0xfa15b9ac, 0xacfa4315, 0x709faf3, 0xf307fd09, 0x256fa0cf, 0xcf25856f, 0xafea20ca, 0xcaaf8fea, 0x8e897df4, 0xf48ef389, 0xe9206747, 0x47e98e20, 0x18283810, 0x10182028 +, 0xd5640b6f, 0x6fd5de64, 0x888373f0, 0xf088fb83, 0x6fb1fb4a, 0x4a6f94b1, 0x7296ca5c, 0x5c72b896, 0x246c5438, 0x3824706c, 0xf1085f57, 0x57f1ae08, 0xc7522173, 0x73c7e652, 0x51f36497, 0x975135f3 +, 0x2365aecb, 0xcb238d65, 0x7c8425a1, 0xa17c5984, 0x9cbf57e8, 0xe89ccbbf, 0x21635d3e, 0x3e217c63, 0xdd7cea96, 0x96dd377c, 0xdc7f1e61, 0x61dcc27f, 0x86919c0d, 0xd861a91, 0x85949b0f, 0xf851e94 +, 0x90ab4be0, 0xe090dbab, 0x42c6ba7c, 0x7c42f8c6, 0xc4572671, 0x71c4e257, 0xaae529cc, 0xccaa83e5, 0xd873e390, 0x90d83b73, 0x50f0906, 0x6050c0f, 0x103f4f7, 0xf701f503, 0x12362a1c, 0x1c123836 +, 0xa3fe3cc2, 0xc2a39ffe, 0x5fe18b6a, 0x6a5fd4e1, 0xf910beae, 0xaef94710, 0xd06b0269, 0x69d0d26b, 0x91a8bf17, 0x17912ea8, 0x58e87199, 0x995829e8, 0x2769533a, 0x3a277469, 0xb9d0f727, 0x27b94ed0 +, 0x384891d9, 0xd938a948, 0x1335deeb, 0xeb13cd35, 0xb3cee52b, 0x2bb356ce, 0x33557722, 0x22334455, 0xbbd604d2, 0xd2bbbfd6, 0x709039a9, 0xa9704990, 0x89808707, 0x7890e80, 0xa7f2c133, 0x33a766f2 +, 0xb6c1ec2d, 0x2db65ac1, 0x22665a3c, 0x3c227866, 0x92adb815, 0x15922aad, 0x2060a9c9, 0xc9208960, 0x49db5c87, 0x874915db, 0xff1ab0aa, 0xaaff4f1a, 0x7888d850, 0x5078a088, 0x7a8e2ba5, 0xa57a518e +, 0x8f8a8903, 0x38f068a, 0xf8134a59, 0x59f8b213, 0x809b9209, 0x980129b, 0x1739231a, 0x1a173439, 0xda751065, 0x65daca75, 0x315384d7, 0xd731b553, 0xc651d584, 0x84c61351, 0xb8d303d0, 0xd0b8bbd3 +, 0xc35edc82, 0x82c31f5e, 0xb0cbe229, 0x29b052cb, 0x7799c35a, 0x5a77b499, 0x11332d1e, 0x1e113c33, 0xcb463d7b, 0x7bcbf646, 0xfc1fb7a8, 0xa8fc4b1f, 0xd6610c6d, 0x6dd6da61, 0x3a4e622c, 0x2c3a584e}; + +#endif /* __tables_h */ diff --git a/xmrstak/backend/cpu/crypto/hash.h b/xmrstak/backend/cpu/crypto/hash.h new file mode 100644 index 0000000..c12d355 --- /dev/null +++ b/xmrstak/backend/cpu/crypto/hash.h @@ -0,0 +1,5 @@ +#pragma once + +typedef unsigned char BitSequence; +typedef unsigned long long DataLength; +typedef enum {SUCCESS = 0, FAIL = 1, BAD_HASHLEN = 2} HashReturn; diff --git a/xmrstak/backend/cpu/crypto/int-util.h b/xmrstak/backend/cpu/crypto/int-util.h new file mode 100644 index 0000000..8748976 --- /dev/null +++ b/xmrstak/backend/cpu/crypto/int-util.h @@ -0,0 +1,153 @@ +// Copyright(c) 2012 - 2013 The Cryptonote developers +// Distributed under the MIT/X11 software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. + +#pragma once + +#include <assert.h> +#include <stdbool.h> +#include <stdint.h> +#include <string.h> + +#if defined(_MSC_VER) +#include <stdlib.h> + +static inline uint32_t rol32(uint32_t x, int r) { + static_assert(sizeof(uint32_t) == sizeof(unsigned int), "this code assumes 32-bit integers"); + return _rotl(x, r); +} + +static inline uint64_t rol64(uint64_t x, int r) { + return _rotl64(x, r); +} + +#else + +static inline uint32_t rol32(uint32_t x, int r) { + return (x << (r & 31)) | (x >> (-r & 31)); +} + +static inline uint64_t rol64(uint64_t x, int r) { + return (x << (r & 63)) | (x >> (-r & 63)); +} + +#endif + +static inline uint64_t hi_dword(uint64_t val) { + return val >> 32; +} + +static inline uint64_t lo_dword(uint64_t val) { + return val & 0xFFFFFFFF; +} + +static inline uint64_t div_with_reminder(uint64_t dividend, uint32_t divisor, uint32_t* remainder) { + dividend |= ((uint64_t)*remainder) << 32; + *remainder = dividend % divisor; + return dividend / divisor; +} + +// Long division with 2^32 base +static inline uint32_t div128_32(uint64_t dividend_hi, uint64_t dividend_lo, uint32_t divisor, uint64_t* quotient_hi, uint64_t* quotient_lo) { + uint64_t dividend_dwords[4]; + uint32_t remainder = 0; + + dividend_dwords[3] = hi_dword(dividend_hi); + dividend_dwords[2] = lo_dword(dividend_hi); + dividend_dwords[1] = hi_dword(dividend_lo); + dividend_dwords[0] = lo_dword(dividend_lo); + + *quotient_hi = div_with_reminder(dividend_dwords[3], divisor, &remainder) << 32; + *quotient_hi |= div_with_reminder(dividend_dwords[2], divisor, &remainder); + *quotient_lo = div_with_reminder(dividend_dwords[1], divisor, &remainder) << 32; + *quotient_lo |= div_with_reminder(dividend_dwords[0], divisor, &remainder); + + return remainder; +} + +#define IDENT32(x) ((uint32_t) (x)) +#define IDENT64(x) ((uint64_t) (x)) + +#define SWAP32(x) ((((uint32_t) (x) & 0x000000ff) << 24) | \ + (((uint32_t) (x) & 0x0000ff00) << 8) | \ + (((uint32_t) (x) & 0x00ff0000) >> 8) | \ + (((uint32_t) (x) & 0xff000000) >> 24)) +#define SWAP64(x) ((((uint64_t) (x) & 0x00000000000000ff) << 56) | \ + (((uint64_t) (x) & 0x000000000000ff00) << 40) | \ + (((uint64_t) (x) & 0x0000000000ff0000) << 24) | \ + (((uint64_t) (x) & 0x00000000ff000000) << 8) | \ + (((uint64_t) (x) & 0x000000ff00000000) >> 8) | \ + (((uint64_t) (x) & 0x0000ff0000000000) >> 24) | \ + (((uint64_t) (x) & 0x00ff000000000000) >> 40) | \ + (((uint64_t) (x) & 0xff00000000000000) >> 56)) + +static inline uint32_t ident32(uint32_t x) { return x; } +static inline uint64_t ident64(uint64_t x) { return x; } + +static inline uint32_t swap32(uint32_t x) { + x = ((x & 0x00ff00ff) << 8) | ((x & 0xff00ff00) >> 8); + return (x << 16) | (x >> 16); +} +static inline uint64_t swap64(uint64_t x) { + x = ((x & 0x00ff00ff00ff00ff) << 8) | ((x & 0xff00ff00ff00ff00) >> 8); + x = ((x & 0x0000ffff0000ffff) << 16) | ((x & 0xffff0000ffff0000) >> 16); + return (x << 32) | (x >> 32); +} + +#if defined(__GNUC__) +#define UNUSED __attribute__((unused)) +#else +#define UNUSED +#endif +static inline void mem_inplace_ident(void *mem UNUSED, size_t n UNUSED) { } +#undef UNUSED + +static inline void mem_inplace_swap32(void *mem, size_t n) { + size_t i; + for (i = 0; i < n; i++) { + ((uint32_t *)mem)[i] = swap32(((const uint32_t *)mem)[i]); + } +} +static inline void mem_inplace_swap64(void *mem, size_t n) { + size_t i; + for (i = 0; i < n; i++) { + ((uint64_t *)mem)[i] = swap64(((const uint64_t *)mem)[i]); + } +} + +static inline void memcpy_ident32(void *dst, const void *src, size_t n) { + memcpy(dst, src, 4 * n); +} +static inline void memcpy_ident64(void *dst, const void *src, size_t n) { + memcpy(dst, src, 8 * n); +} + +static inline void memcpy_swap32(void *dst, const void *src, size_t n) { + size_t i; + for (i = 0; i < n; i++) { + ((uint32_t *)dst)[i] = swap32(((const uint32_t *)src)[i]); + } +} +static inline void memcpy_swap64(void *dst, const void *src, size_t n) { + size_t i; + for (i = 0; i < n; i++) { + ((uint64_t *)dst)[i] = swap64(((const uint64_t *)src)[i]); + } +} + +#define SWAP32LE IDENT32 +#define SWAP32BE SWAP32 +#define swap32le ident32 +#define swap32be swap32 +#define mem_inplace_swap32le mem_inplace_ident +#define mem_inplace_swap32be mem_inplace_swap32 +#define memcpy_swap32le memcpy_ident32 +#define memcpy_swap32be memcpy_swap32 +#define SWAP64LE IDENT64 +#define SWAP64BE SWAP64 +#define swap64le ident64 +#define swap64be swap64 +#define mem_inplace_swap64le mem_inplace_ident +#define mem_inplace_swap64be mem_inplace_swap64 +#define memcpy_swap64le memcpy_ident64 +#define memcpy_swap64be memcpy_swap64
\ No newline at end of file diff --git a/xmrstak/backend/cpu/crypto/skein_port.h b/xmrstak/backend/cpu/crypto/skein_port.h new file mode 100644 index 0000000..9cbefcb --- /dev/null +++ b/xmrstak/backend/cpu/crypto/skein_port.h @@ -0,0 +1,179 @@ +#ifndef _SKEIN_PORT_H_ +#define _SKEIN_PORT_H_ + +#include <limits.h> +#include <stdint.h> +#include <stddef.h> + +#ifndef RETURN_VALUES +# define RETURN_VALUES +# if defined( DLL_EXPORT ) +# if defined( _MSC_VER ) || defined ( __INTEL_COMPILER ) +# define VOID_RETURN __declspec( dllexport ) void __stdcall +# define INT_RETURN __declspec( dllexport ) int __stdcall +# elif defined( __GNUC__ ) +# define VOID_RETURN __declspec( __dllexport__ ) void +# define INT_RETURN __declspec( __dllexport__ ) int +# else +# error Use of the DLL is only available on the Microsoft, Intel and GCC compilers +# endif +# elif defined( DLL_IMPORT ) +# if defined( _MSC_VER ) || defined ( __INTEL_COMPILER ) +# define VOID_RETURN __declspec( dllimport ) void __stdcall +# define INT_RETURN __declspec( dllimport ) int __stdcall +# elif defined( __GNUC__ ) +# define VOID_RETURN __declspec( __dllimport__ ) void +# define INT_RETURN __declspec( __dllimport__ ) int +# else +# error Use of the DLL is only available on the Microsoft, Intel and GCC compilers +# endif +# elif defined( __WATCOMC__ ) +# define VOID_RETURN void __cdecl +# define INT_RETURN int __cdecl +# else +# define VOID_RETURN void +# define INT_RETURN int +# endif +#endif + +/* These defines are used to declare buffers in a way that allows + faster operations on longer variables to be used. In all these + defines 'size' must be a power of 2 and >= 8 + + dec_unit_type(size,x) declares a variable 'x' of length + 'size' bits + + dec_bufr_type(size,bsize,x) declares a buffer 'x' of length 'bsize' + bytes defined as an array of variables + each of 'size' bits (bsize must be a + multiple of size / 8) + + ptr_cast(x,size) casts a pointer to a pointer to a + varaiable of length 'size' bits +*/ + +#define ui_type(size) uint##size##_t +#define dec_unit_type(size,x) typedef ui_type(size) x +#define dec_bufr_type(size,bsize,x) typedef ui_type(size) x[bsize / (size >> 3)] +#define ptr_cast(x,size) ((ui_type(size)*)(x)) + +typedef unsigned int uint_t; /* native unsigned integer */ +typedef uint8_t u08b_t; /* 8-bit unsigned integer */ +typedef uint64_t u64b_t; /* 64-bit unsigned integer */ + +#ifndef RotL_64 +#define RotL_64(x,N) (((x) << (N)) | ((x) >> (64-(N)))) +#endif + +/* + * Skein is "natively" little-endian (unlike SHA-xxx), for optimal + * performance on x86 CPUs. The Skein code requires the following + * definitions for dealing with endianness: + * + * SKEIN_NEED_SWAP: 0 for little-endian, 1 for big-endian + * Skein_Put64_LSB_First + * Skein_Get64_LSB_First + * Skein_Swap64 + * + * If SKEIN_NEED_SWAP is defined at compile time, it is used here + * along with the portable versions of Put64/Get64/Swap64, which + * are slow in general. + * + * Otherwise, an "auto-detect" of endianness is attempted below. + * If the default handling doesn't work well, the user may insert + * platform-specific code instead (e.g., for big-endian CPUs). + * + */ +#ifndef SKEIN_NEED_SWAP /* compile-time "override" for endianness? */ + +#define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN + +/* special handler for IA64, which may be either endianness (?) */ +/* here we assume little-endian, but this may need to be changed */ +#if defined(__ia64) || defined(__ia64__) || defined(_M_IA64) +# define PLATFORM_MUST_ALIGN (1) +#ifndef PLATFORM_BYTE_ORDER +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif +#endif + +#ifndef PLATFORM_MUST_ALIGN +# define PLATFORM_MUST_ALIGN (0) +#endif + + +#if PLATFORM_BYTE_ORDER == IS_BIG_ENDIAN + /* here for big-endian CPUs */ +#define SKEIN_NEED_SWAP (1) +#elif PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN + /* here for x86 and x86-64 CPUs (and other detected little-endian CPUs) */ +#define SKEIN_NEED_SWAP (0) +#if PLATFORM_MUST_ALIGN == 0 /* ok to use "fast" versions? */ +#define Skein_Put64_LSB_First(dst08,src64,bCnt) memcpy(dst08,src64,bCnt) +#define Skein_Get64_LSB_First(dst64,src08,wCnt) memcpy(dst64,src08,8*(wCnt)) +#endif +#else +#error "Skein needs endianness setting!" +#endif + +#endif /* ifndef SKEIN_NEED_SWAP */ + +/* + ****************************************************************** + * Provide any definitions still needed. + ****************************************************************** + */ +#ifndef Skein_Swap64 /* swap for big-endian, nop for little-endian */ +#if SKEIN_NEED_SWAP +#define Skein_Swap64(w64) \ + ( (( ((u64b_t)(w64)) & 0xFF) << 56) | \ + (((((u64b_t)(w64)) >> 8) & 0xFF) << 48) | \ + (((((u64b_t)(w64)) >>16) & 0xFF) << 40) | \ + (((((u64b_t)(w64)) >>24) & 0xFF) << 32) | \ + (((((u64b_t)(w64)) >>32) & 0xFF) << 24) | \ + (((((u64b_t)(w64)) >>40) & 0xFF) << 16) | \ + (((((u64b_t)(w64)) >>48) & 0xFF) << 8) | \ + (((((u64b_t)(w64)) >>56) & 0xFF) ) ) +#else +#define Skein_Swap64(w64) (w64) +#endif +#endif /* ifndef Skein_Swap64 */ + + +#ifndef Skein_Put64_LSB_First +void Skein_Put64_LSB_First(u08b_t *dst,const u64b_t *src,size_t bCnt) +#ifdef SKEIN_PORT_CODE /* instantiate the function code here? */ + { /* this version is fully portable (big-endian or little-endian), but slow */ + size_t n; + + for (n=0;n<bCnt;n++) + dst[n] = (u08b_t) (src[n>>3] >> (8*(n&7))); + } +#else + ; /* output only the function prototype */ +#endif +#endif /* ifndef Skein_Put64_LSB_First */ + + +#ifndef Skein_Get64_LSB_First +void Skein_Get64_LSB_First(u64b_t *dst,const u08b_t *src,size_t wCnt) +#ifdef SKEIN_PORT_CODE /* instantiate the function code here? */ + { /* this version is fully portable (big-endian or little-endian), but slow */ + size_t n; + + for (n=0;n<8*wCnt;n+=8) + dst[n/8] = (((u64b_t) src[n ]) ) + + (((u64b_t) src[n+1]) << 8) + + (((u64b_t) src[n+2]) << 16) + + (((u64b_t) src[n+3]) << 24) + + (((u64b_t) src[n+4]) << 32) + + (((u64b_t) src[n+5]) << 40) + + (((u64b_t) src[n+6]) << 48) + + (((u64b_t) src[n+7]) << 56) ; + } +#else + ; /* output only the function prototype */ +#endif +#endif /* ifndef Skein_Get64_LSB_First */ + +#endif /* ifndef _SKEIN_PORT_H_ */ diff --git a/xmrstak/backend/cpu/crypto/soft_aes.c b/xmrstak/backend/cpu/crypto/soft_aes.c new file mode 100644 index 0000000..aba7c20 --- /dev/null +++ b/xmrstak/backend/cpu/crypto/soft_aes.c @@ -0,0 +1,212 @@ +/* + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * Additional permission under GNU GPL version 3 section 7 + * + * If you modify this Program, or any covered work, by linking or combining + * it with OpenSSL (or a modified version of that library), containing parts + * covered by the terms of OpenSSL License and SSLeay License, the licensors + * of this Program grant you additional permission to convey the resulting work. + * + */ + +/* + * The orginal author of this AES implementation is Karl Malbrain. + */ + +#ifdef __GNUC__ +#include <x86intrin.h> +#else +#include <intrin.h> +#endif // __GNUC__ + +#include <inttypes.h> + +#define TABLE_ALIGN 32 +#define WPOLY 0x011b +#define N_COLS 4 +#define AES_BLOCK_SIZE 16 +#define RC_LENGTH (5 * (AES_BLOCK_SIZE / 4 - 2)) + +#if defined(_MSC_VER) +#define ALIGN __declspec(align(TABLE_ALIGN)) +#elif defined(__GNUC__) +#define ALIGN __attribute__ ((aligned(16))) +#else +#define ALIGN +#endif + +#define rf1(r,c) (r) +#define word_in(x,c) (*((uint32_t*)(x)+(c))) +#define word_out(x,c,v) (*((uint32_t*)(x)+(c)) = (v)) + +#define s(x,c) x[c] +#define si(y,x,c) (s(y,c) = word_in(x, c)) +#define so(y,x,c) word_out(y, c, s(x,c)) +#define state_in(y,x) si(y,x,0); si(y,x,1); si(y,x,2); si(y,x,3) +#define state_out(y,x) so(y,x,0); so(y,x,1); so(y,x,2); so(y,x,3) +#define round(y,x,k) \ +y[0] = (k)[0] ^ (t_fn[0][x[0] & 0xff] ^ t_fn[1][(x[1] >> 8) & 0xff] ^ t_fn[2][(x[2] >> 16) & 0xff] ^ t_fn[3][x[3] >> 24]); \ +y[1] = (k)[1] ^ (t_fn[0][x[1] & 0xff] ^ t_fn[1][(x[2] >> 8) & 0xff] ^ t_fn[2][(x[3] >> 16) & 0xff] ^ t_fn[3][x[0] >> 24]); \ +y[2] = (k)[2] ^ (t_fn[0][x[2] & 0xff] ^ t_fn[1][(x[3] >> 8) & 0xff] ^ t_fn[2][(x[0] >> 16) & 0xff] ^ t_fn[3][x[1] >> 24]); \ +y[3] = (k)[3] ^ (t_fn[0][x[3] & 0xff] ^ t_fn[1][(x[0] >> 8) & 0xff] ^ t_fn[2][(x[1] >> 16) & 0xff] ^ t_fn[3][x[2] >> 24]); +#define to_byte(x) ((x) & 0xff) +#define bval(x,n) to_byte((x) >> (8 * (n))) + +#define fwd_var(x,r,c)\ + ( r == 0 ? ( c == 0 ? s(x,0) : c == 1 ? s(x,1) : c == 2 ? s(x,2) : s(x,3))\ + : r == 1 ? ( c == 0 ? s(x,1) : c == 1 ? s(x,2) : c == 2 ? s(x,3) : s(x,0))\ + : r == 2 ? ( c == 0 ? s(x,2) : c == 1 ? s(x,3) : c == 2 ? s(x,0) : s(x,1))\ + : ( c == 0 ? s(x,3) : c == 1 ? s(x,0) : c == 2 ? s(x,1) : s(x,2))) + +#define fwd_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_use(f,n),fwd_var,rf1,c)) + +#define sb_data(w) {\ + w(0x63), w(0x7c), w(0x77), w(0x7b), w(0xf2), w(0x6b), w(0x6f), w(0xc5),\ + w(0x30), w(0x01), w(0x67), w(0x2b), w(0xfe), w(0xd7), w(0xab), w(0x76),\ + w(0xca), w(0x82), w(0xc9), w(0x7d), w(0xfa), w(0x59), w(0x47), w(0xf0),\ + w(0xad), w(0xd4), w(0xa2), w(0xaf), w(0x9c), w(0xa4), w(0x72), w(0xc0),\ + w(0xb7), w(0xfd), w(0x93), w(0x26), w(0x36), w(0x3f), w(0xf7), w(0xcc),\ + w(0x34), w(0xa5), w(0xe5), w(0xf1), w(0x71), w(0xd8), w(0x31), w(0x15),\ + w(0x04), w(0xc7), w(0x23), w(0xc3), w(0x18), w(0x96), w(0x05), w(0x9a),\ + w(0x07), w(0x12), w(0x80), w(0xe2), w(0xeb), w(0x27), w(0xb2), w(0x75),\ + w(0x09), w(0x83), w(0x2c), w(0x1a), w(0x1b), w(0x6e), w(0x5a), w(0xa0),\ + w(0x52), w(0x3b), w(0xd6), w(0xb3), w(0x29), w(0xe3), w(0x2f), w(0x84),\ + w(0x53), w(0xd1), w(0x00), w(0xed), w(0x20), w(0xfc), w(0xb1), w(0x5b),\ + w(0x6a), w(0xcb), w(0xbe), w(0x39), w(0x4a), w(0x4c), w(0x58), w(0xcf),\ + w(0xd0), w(0xef), w(0xaa), w(0xfb), w(0x43), w(0x4d), w(0x33), w(0x85),\ + w(0x45), w(0xf9), w(0x02), w(0x7f), w(0x50), w(0x3c), w(0x9f), w(0xa8),\ + w(0x51), w(0xa3), w(0x40), w(0x8f), w(0x92), w(0x9d), w(0x38), w(0xf5),\ + w(0xbc), w(0xb6), w(0xda), w(0x21), w(0x10), w(0xff), w(0xf3), w(0xd2),\ + w(0xcd), w(0x0c), w(0x13), w(0xec), w(0x5f), w(0x97), w(0x44), w(0x17),\ + w(0xc4), w(0xa7), w(0x7e), w(0x3d), w(0x64), w(0x5d), w(0x19), w(0x73),\ + w(0x60), w(0x81), w(0x4f), w(0xdc), w(0x22), w(0x2a), w(0x90), w(0x88),\ + w(0x46), w(0xee), w(0xb8), w(0x14), w(0xde), w(0x5e), w(0x0b), w(0xdb),\ + w(0xe0), w(0x32), w(0x3a), w(0x0a), w(0x49), w(0x06), w(0x24), w(0x5c),\ + w(0xc2), w(0xd3), w(0xac), w(0x62), w(0x91), w(0x95), w(0xe4), w(0x79),\ + w(0xe7), w(0xc8), w(0x37), w(0x6d), w(0x8d), w(0xd5), w(0x4e), w(0xa9),\ + w(0x6c), w(0x56), w(0xf4), w(0xea), w(0x65), w(0x7a), w(0xae), w(0x08),\ + w(0xba), w(0x78), w(0x25), w(0x2e), w(0x1c), w(0xa6), w(0xb4), w(0xc6),\ + w(0xe8), w(0xdd), w(0x74), w(0x1f), w(0x4b), w(0xbd), w(0x8b), w(0x8a),\ + w(0x70), w(0x3e), w(0xb5), w(0x66), w(0x48), w(0x03), w(0xf6), w(0x0e),\ + w(0x61), w(0x35), w(0x57), w(0xb9), w(0x86), w(0xc1), w(0x1d), w(0x9e),\ + w(0xe1), w(0xf8), w(0x98), w(0x11), w(0x69), w(0xd9), w(0x8e), w(0x94),\ + w(0x9b), w(0x1e), w(0x87), w(0xe9), w(0xce), w(0x55), w(0x28), w(0xdf),\ + w(0x8c), w(0xa1), w(0x89), w(0x0d), w(0xbf), w(0xe6), w(0x42), w(0x68),\ + w(0x41), w(0x99), w(0x2d), w(0x0f), w(0xb0), w(0x54), w(0xbb), w(0x16) } + +#define rc_data(w) {\ + w(0x01), w(0x02), w(0x04), w(0x08), w(0x10),w(0x20), w(0x40), w(0x80),\ + w(0x1b), w(0x36) } + +#define bytes2word(b0, b1, b2, b3) (((uint32_t)(b3) << 24) | \ + ((uint32_t)(b2) << 16) | ((uint32_t)(b1) << 8) | (b0)) + +#define h0(x) (x) +#define w0(p) bytes2word(p, 0, 0, 0) +#define w1(p) bytes2word(0, p, 0, 0) +#define w2(p) bytes2word(0, 0, p, 0) +#define w3(p) bytes2word(0, 0, 0, p) + +#define u0(p) bytes2word(f2(p), p, p, f3(p)) +#define u1(p) bytes2word(f3(p), f2(p), p, p) +#define u2(p) bytes2word(p, f3(p), f2(p), p) +#define u3(p) bytes2word(p, p, f3(p), f2(p)) + +#define v0(p) bytes2word(fe(p), f9(p), fd(p), fb(p)) +#define v1(p) bytes2word(fb(p), fe(p), f9(p), fd(p)) +#define v2(p) bytes2word(fd(p), fb(p), fe(p), f9(p)) +#define v3(p) bytes2word(f9(p), fd(p), fb(p), fe(p)) + +#define f2(x) ((x<<1) ^ (((x>>7) & 1) * WPOLY)) +#define f4(x) ((x<<2) ^ (((x>>6) & 1) * WPOLY) ^ (((x>>6) & 2) * WPOLY)) +#define f8(x) ((x<<3) ^ (((x>>5) & 1) * WPOLY) ^ (((x>>5) & 2) * WPOLY) ^ (((x>>5) & 4) * WPOLY)) +#define f3(x) (f2(x) ^ x) +#define f9(x) (f8(x) ^ x) +#define fb(x) (f8(x) ^ f2(x) ^ x) +#define fd(x) (f8(x) ^ f4(x) ^ x) +#define fe(x) (f8(x) ^ f4(x) ^ f2(x)) + +#define t_dec(m,n) t_##m##n +#define t_set(m,n) t_##m##n +#define t_use(m,n) t_##m##n + +#define d_4(t,n,b,e,f,g,h) ALIGN const t n[4][256] = { b(e), b(f), b(g), b(h) } + +#define four_tables(x,tab,vf,rf,c) \ + (tab[0][bval(vf(x,0,c),rf(0,c))] \ + ^ tab[1][bval(vf(x,1,c),rf(1,c))] \ + ^ tab[2][bval(vf(x,2,c),rf(2,c))] \ + ^ tab[3][bval(vf(x,3,c),rf(3,c))]) + +d_4(uint32_t, t_dec(f,n), sb_data, u0, u1, u2, u3); + +__m128i soft_aesenc(__m128i in, __m128i key) +{ + uint32_t x0, x1, x2, x3; + x0 = _mm_cvtsi128_si32(in); + x1 = _mm_cvtsi128_si32(_mm_shuffle_epi32(in, 0x55)); + x2 = _mm_cvtsi128_si32(_mm_shuffle_epi32(in, 0xAA)); + x3 = _mm_cvtsi128_si32(_mm_shuffle_epi32(in, 0xFF)); + + __m128i out = _mm_set_epi32( + (t_fn[0][x3 & 0xff] ^ t_fn[1][(x0 >> 8) & 0xff] ^ t_fn[2][(x1 >> 16) & 0xff] ^ t_fn[3][x2 >> 24]), + (t_fn[0][x2 & 0xff] ^ t_fn[1][(x3 >> 8) & 0xff] ^ t_fn[2][(x0 >> 16) & 0xff] ^ t_fn[3][x1 >> 24]), + (t_fn[0][x1 & 0xff] ^ t_fn[1][(x2 >> 8) & 0xff] ^ t_fn[2][(x3 >> 16) & 0xff] ^ t_fn[3][x0 >> 24]), + (t_fn[0][x0 & 0xff] ^ t_fn[1][(x1 >> 8) & 0xff] ^ t_fn[2][(x2 >> 16) & 0xff] ^ t_fn[3][x3 >> 24])); + + return _mm_xor_si128(out, key); +} + +uint8_t Sbox[256] = { // forward s-box +0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76, +0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0, +0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15, +0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75, +0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84, +0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf, +0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8, +0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2, +0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73, +0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb, +0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79, +0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08, +0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a, +0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e, +0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf, +0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16}; + +static inline void sub_word(uint8_t* key) +{ + key[0] = Sbox[key[0]]; + key[1] = Sbox[key[1]]; + key[2] = Sbox[key[2]]; + key[3] = Sbox[key[3]]; +} + +#ifdef __clang__ +uint32_t _rotr(uint32_t value, uint32_t amount) +{ + return (value >> amount) | (value << ((32 - amount) & 31)); +} +#endif + +__m128i soft_aeskeygenassist(__m128i key, uint8_t rcon) +{ + uint32_t X1 = _mm_cvtsi128_si32(_mm_shuffle_epi32(key, 0x55)); + uint32_t X3 = _mm_cvtsi128_si32(_mm_shuffle_epi32(key, 0xFF)); + sub_word((uint8_t*)&X1); + sub_word((uint8_t*)&X3); + return _mm_set_epi32(_rotr(X3, 8) ^ rcon, X3,_rotr(X1, 8) ^ rcon, X1); +} diff --git a/xmrstak/backend/cpu/hwlocMemory.hpp b/xmrstak/backend/cpu/hwlocMemory.hpp new file mode 100644 index 0000000..f471951 --- /dev/null +++ b/xmrstak/backend/cpu/hwlocMemory.hpp @@ -0,0 +1,55 @@ +#pragma once + +#include "console.h" + +#ifndef CONF_NO_HWLOC + +#include <hwloc.h> + +/** pin memory to NUMA node + * + * Set the default memory policy for the current thread to bind memory to the + * NUMA node. + * + * @param puId core id + */ +void bindMemoryToNUMANode( size_t puId ) +{ + int depth; + hwloc_topology_t topology; + + hwloc_topology_init(&topology); + hwloc_topology_load(topology); + + depth = hwloc_get_type_depth(topology, HWLOC_OBJ_PU); + + for( size_t i = 0; + i < hwloc_get_nbobjs_by_depth(topology, depth); + i++ ) + { + hwloc_obj_t pu = hwloc_get_obj_by_depth(topology, depth, i); + if( pu->os_index == puId ) + { + if( 0 > hwloc_set_membind_nodeset( + topology, + pu->nodeset, + HWLOC_MEMBIND_BIND, + HWLOC_MEMBIND_THREAD)) + { + printer::inst()->print_msg(L0, "hwloc: can't bind memory"); + } + else + { + printer::inst()->print_msg(L0, "hwloc: memory pinned"); + break; + } + } + } +} +#else + +void bindMemoryToNUMANode( size_t ) +{ +} + +#endif diff --git a/xmrstak/backend/cpu/jconf.cpp b/xmrstak/backend/cpu/jconf.cpp new file mode 100644 index 0000000..021d607 --- /dev/null +++ b/xmrstak/backend/cpu/jconf.cpp @@ -0,0 +1,257 @@ +/* + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * Additional permission under GNU GPL version 3 section 7 + * + * If you modify this Program, or any covered work, by linking or combining + * it with OpenSSL (or a modified version of that library), containing parts + * covered by the terms of OpenSSL License and SSLeay License, the licensors + * of this Program grant you additional permission to convey the resulting work. + * + */ + +#include "jconf.h" +#include "../../console.h" +#include <iostream> + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#ifdef _WIN32 +#define strcasecmp _stricmp +#include <intrin.h> +#else +#include <cpuid.h> +#endif + +#include "../../rapidjson/document.h" +#include "../../rapidjson/error/en.h" +#include "../../jext.h" + +namespace xmrstak +{ +namespace cpu +{ + +using namespace rapidjson; + +/* + * This enum needs to match index in oConfigValues, otherwise we will get a runtime error + */ +enum configEnum { aCpuThreadsConf, sUseSlowMem }; + +struct configVal { + configEnum iName; + const char* sName; + Type iType; +}; + +// Same order as in configEnum, as per comment above +// kNullType means any type +configVal oConfigValues[] = { + { aCpuThreadsConf, "cpu_threads_conf", kNullType } +}; + +constexpr size_t iConfigCnt = (sizeof(oConfigValues)/sizeof(oConfigValues[0])); + +inline bool checkType(Type have, Type want) +{ + if(want == have) + return true; + else if(want == kNullType) + return true; + else if(want == kTrueType && have == kFalseType) + return true; + else if(want == kFalseType && have == kTrueType) + return true; + else + return false; +} + +struct jconf::opaque_private +{ + Document jsonDoc; + const Value* configValues[iConfigCnt]; //Compile time constant + + opaque_private() + { + } +}; + +jconf* jconf::oInst = nullptr; + +jconf::jconf() +{ + prv = new opaque_private(); +} + +bool jconf::GetThreadConfig(size_t id, thd_cfg &cfg) +{ + if(!prv->configValues[aCpuThreadsConf]->IsArray()) + return false; + + if(id >= prv->configValues[aCpuThreadsConf]->Size()) + return false; + + const Value& oThdConf = prv->configValues[aCpuThreadsConf]->GetArray()[id]; + + if(!oThdConf.IsObject()) + return false; + + const Value *mode, *no_prefetch, *aff; + mode = GetObjectMember(oThdConf, "low_power_mode"); + no_prefetch = GetObjectMember(oThdConf, "no_prefetch"); + aff = GetObjectMember(oThdConf, "affine_to_cpu"); + + if(mode == nullptr || no_prefetch == nullptr || aff == nullptr) + return false; + + if(!mode->IsBool() || !no_prefetch->IsBool()) + return false; + + if(!aff->IsNumber() && !aff->IsBool()) + return false; + + if(aff->IsNumber() && aff->GetInt64() < 0) + return false; + + cfg.bDoubleMode = mode->GetBool(); + cfg.bNoPrefetch = no_prefetch->GetBool(); + + if(aff->IsNumber()) + cfg.iCpuAff = aff->GetInt64(); + else + cfg.iCpuAff = -1; + + return true; +} + + +size_t jconf::GetThreadCount() +{ + if(prv->configValues[aCpuThreadsConf]->IsArray()) + return prv->configValues[aCpuThreadsConf]->Size(); + else + return 0; +} + +bool jconf::parse_config(const char* sFilename) +{ + FILE * pFile; + char * buffer; + size_t flen; + + pFile = fopen(sFilename, "rb"); + if (pFile == NULL) + { + printer::inst()->print_msg(L0, "Failed to open config file %s.", sFilename); + return false; + } + + fseek(pFile,0,SEEK_END); + flen = ftell(pFile); + rewind(pFile); + + if(flen >= 64*1024) + { + fclose(pFile); + printer::inst()->print_msg(L0, "Oversized config file - %s.", sFilename); + return false; + } + + if(flen <= 16) + { + fclose(pFile); + printer::inst()->print_msg(L0, "File is empty or too short - %s.", sFilename); + return false; + } + + buffer = (char*)malloc(flen + 3); + if(fread(buffer+1, flen, 1, pFile) != 1) + { + free(buffer); + fclose(pFile); + printer::inst()->print_msg(L0, "Read error while reading %s.", sFilename); + return false; + } + fclose(pFile); + + //Replace Unicode BOM with spaces - we always use UTF-8 + unsigned char* ubuffer = (unsigned char*)buffer; + if(ubuffer[1] == 0xEF && ubuffer[2] == 0xBB && ubuffer[3] == 0xBF) + { + buffer[1] = ' '; + buffer[2] = ' '; + buffer[3] = ' '; + } + + buffer[0] = '{'; + buffer[flen] = '}'; + buffer[flen + 1] = '\0'; + + prv->jsonDoc.Parse<kParseCommentsFlag|kParseTrailingCommasFlag>(buffer, flen+2); + free(buffer); + + if(prv->jsonDoc.HasParseError()) + { + printer::inst()->print_msg(L0, "JSON config parse error(offset %llu): %s", + int_port(prv->jsonDoc.GetErrorOffset()), GetParseError_En(prv->jsonDoc.GetParseError())); + return false; + } + + if(!prv->jsonDoc.IsObject()) + { //This should never happen as we created the root ourselves + printer::inst()->print_msg(L0, "Invalid config file. No root?\n"); + return false; + } + + for(size_t i = 0; i < iConfigCnt; i++) + { + if(oConfigValues[i].iName != i) + { + printer::inst()->print_msg(L0, "Code error. oConfigValues are not in order."); + return false; + } + + prv->configValues[i] = GetObjectMember(prv->jsonDoc, oConfigValues[i].sName); + + if(prv->configValues[i] == nullptr) + { + printer::inst()->print_msg(L0, "Invalid config file. Missing value \"%s\".", oConfigValues[i].sName); + return false; + } + + if(!checkType(prv->configValues[i]->GetType(), oConfigValues[i].iType)) + { + printer::inst()->print_msg(L0, "Invalid config file. Value \"%s\" has unexpected type.", oConfigValues[i].sName); + return false; + } + } + + thd_cfg c; + for(size_t i=0; i < GetThreadCount(); i++) + { + if(!GetThreadConfig(i, c)) + { + printer::inst()->print_msg(L0, "Thread %llu has invalid config.", int_port(i)); + return false; + } + } + + return true; +} + +} // namespace cpu +} // namepsace xmrstak diff --git a/xmrstak/backend/cpu/jconf.hpp b/xmrstak/backend/cpu/jconf.hpp new file mode 100644 index 0000000..1f92765 --- /dev/null +++ b/xmrstak/backend/cpu/jconf.hpp @@ -0,0 +1,44 @@ +#pragma once +#include <stdlib.h> +#include <string> +#include "../../Params.hpp" + +namespace xmrstak +{ +namespace cpu +{ + +class jconf +{ +public: + static jconf* inst() + { + if (oInst == nullptr) oInst = new jconf; + return oInst; + }; + + bool parse_config(const char* sFilename = Params::inst().configFileCPU.c_str()); + + struct thd_cfg { + bool bDoubleMode; + bool bNoPrefetch; + long long iCpuAff; + }; + + size_t GetThreadCount(); + bool GetThreadConfig(size_t id, thd_cfg &cfg); + bool NeedsAutoconf(); + + + + +private: + jconf(); + static jconf* oInst; + + struct opaque_private; + opaque_private* prv; +}; + +} // namespace cpu +} // namepsace xmrstak diff --git a/xmrstak/backend/cpu/minethd.cpp b/xmrstak/backend/cpu/minethd.cpp new file mode 100644 index 0000000..3ffdf99 --- /dev/null +++ b/xmrstak/backend/cpu/minethd.cpp @@ -0,0 +1,508 @@ +/* + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * Additional permission under GNU GPL version 3 section 7 + * + * If you modify this Program, or any covered work, by linking or combining + * it with OpenSSL (or a modified version of that library), containing parts + * covered by the terms of OpenSSL License and SSLeay License, the licensors + * of this Program grant you additional permission to convey the resulting work. + * + */ + +#include <assert.h> +#include <cmath> +#include <chrono> +#include <cstring> +#include <thread> +#include <bitset> +#include "../../console.h" +#include "../IBackend.hpp" +#include "../GlobalStates.hpp" +#include "../../ConfigEditor.hpp" +#include "../../Params.hpp" +#include "../../jconf.h" + +#include "../../executor.h" +#include "minethd.h" +#include "./jconf.h" +#include "../../crypto/cryptonight_aesni.h" +#include "../../hwlocMemory.hpp" +#include "../miner_work.h" + +#ifndef CONF_NO_HWLOC +# include "autoAdjustHwloc.hpp" +#else +# include "autoAdjust.hpp" +#endif + + +#ifdef _WIN32 +#include <windows.h> + +namespace xmrstak +{ +namespace cpu +{ +void minethd::thd_setaffinity(std::thread::native_handle_type h, uint64_t cpu_id) +{ + SetThreadAffinityMask(h, 1ULL << cpu_id); +} + +} // namespace cpu +} // namespace xmrstak + +#else +#include <pthread.h> + +#if defined(__APPLE__) +#include <mach/thread_policy.h> +#include <mach/thread_act.h> +#define SYSCTL_CORE_COUNT "machdep.cpu.core_count" +#elif defined(__FreeBSD__) +#include <pthread_np.h> +#endif + +namespace xmrstak +{ +namespace cpu +{ + +void minethd::thd_setaffinity(std::thread::native_handle_type h, uint64_t cpu_id) +{ +#if defined(__APPLE__) + thread_port_t mach_thread; + thread_affinity_policy_data_t policy = { static_cast<integer_t>(cpu_id) }; + mach_thread = pthread_mach_thread_np(h); + thread_policy_set(mach_thread, THREAD_AFFINITY_POLICY, (thread_policy_t)&policy, 1); +#elif defined(__FreeBSD__) + cpuset_t mn; + CPU_ZERO(&mn); + CPU_SET(cpu_id, &mn); + pthread_setaffinity_np(h, sizeof(cpuset_t), &mn); +#else + cpu_set_t mn; + CPU_ZERO(&mn); + CPU_SET(cpu_id, &mn); + pthread_setaffinity_np(h, sizeof(cpu_set_t), &mn); +#endif +} + +} // namespace cpu +} // namespace xmrstak + +#endif // _WIN32 + + +namespace xmrstak +{ +namespace cpu +{ + +minethd::minethd(miner_work& pWork, size_t iNo, bool double_work, bool no_prefetch, int64_t affinity) +{ + oWork = pWork; + bQuit = 0; + iThreadNo = (uint8_t)iNo; + iJobNo = 0; + bNoPrefetch = no_prefetch; + this->affinity = affinity; + + std::lock_guard<std::mutex> lock(work_thd_mtx); + if(double_work) + oWorkThd = std::thread(&minethd::double_work_main, this); + else + oWorkThd = std::thread(&minethd::work_main, this); +} + +cryptonight_ctx* minethd::minethd_alloc_ctx() +{ + cryptonight_ctx* ctx; + alloc_msg msg = { 0 }; + + switch (::jconf::inst()->GetSlowMemSetting()) + { + case ::jconf::never_use: + ctx = cryptonight_alloc_ctx(1, 1, &msg); + if (ctx == NULL) + printer::inst()->print_msg(L0, "MEMORY ALLOC FAILED: %s", msg.warning); + return ctx; + + case ::jconf::no_mlck: + ctx = cryptonight_alloc_ctx(1, 0, &msg); + if (ctx == NULL) + printer::inst()->print_msg(L0, "MEMORY ALLOC FAILED: %s", msg.warning); + return ctx; + + case ::jconf::print_warning: + ctx = cryptonight_alloc_ctx(1, 1, &msg); + if (msg.warning != NULL) + printer::inst()->print_msg(L0, "MEMORY ALLOC FAILED: %s", msg.warning); + if (ctx == NULL) + ctx = cryptonight_alloc_ctx(0, 0, NULL); + return ctx; + + case ::jconf::always_use: + return cryptonight_alloc_ctx(0, 0, NULL); + + case ::jconf::unknown_value: + return NULL; //Shut up compiler + } + + return nullptr; //Should never happen +} + +bool minethd::self_test() +{ + alloc_msg msg = { 0 }; + size_t res; + bool fatal = false; + + switch (::jconf::inst()->GetSlowMemSetting()) + { + case ::jconf::never_use: + res = cryptonight_init(1, 1, &msg); + fatal = true; + break; + + case ::jconf::no_mlck: + res = cryptonight_init(1, 0, &msg); + fatal = true; + break; + + case ::jconf::print_warning: + res = cryptonight_init(1, 1, &msg); + break; + + case ::jconf::always_use: + res = cryptonight_init(0, 0, &msg); + break; + + case ::jconf::unknown_value: + default: + return false; //Shut up compiler + } + + if(msg.warning != nullptr) + printer::inst()->print_msg(L0, "MEMORY INIT ERROR: %s", msg.warning); + + if(res == 0 && fatal) + return false; + + cryptonight_ctx *ctx0, *ctx1; + if((ctx0 = minethd_alloc_ctx()) == nullptr) + return false; + + if((ctx1 = minethd_alloc_ctx()) == nullptr) + { + cryptonight_free_ctx(ctx0); + return false; + } + + unsigned char out[64]; + bool bResult; + + cn_hash_fun hashf; + cn_hash_fun_dbl hashdf; + + hashf = func_selector(::jconf::inst()->HaveHardwareAes(), false); + hashf("This is a test", 14, out, ctx0); + bResult = memcmp(out, "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05", 32) == 0; + + hashf = func_selector(::jconf::inst()->HaveHardwareAes(), true); + hashf("This is a test", 14, out, ctx0); + bResult &= memcmp(out, "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05", 32) == 0; + + hashdf = func_dbl_selector(::jconf::inst()->HaveHardwareAes(), false); + hashdf("The quick brown fox jumps over the lazy dogThe quick brown fox jumps over the lazy log", 43, out, ctx0, ctx1); + bResult &= memcmp(out, "\x3e\xbb\x7f\x9f\x7d\x27\x3d\x7c\x31\x8d\x86\x94\x77\x55\x0c\xc8\x00\xcf\xb1\x1b\x0c\xad\xb7\xff\xbd\xf6\xf8\x9f\x3a\x47\x1c\x59" + "\xb4\x77\xd5\x02\xe4\xd8\x48\x7f\x42\xdf\xe3\x8e\xed\x73\x81\x7a\xda\x91\xb7\xe2\x63\xd2\x91\x71\xb6\x5c\x44\x3a\x01\x2a\x41\x22", 64) == 0; + + hashdf = func_dbl_selector(::jconf::inst()->HaveHardwareAes(), true); + hashdf("The quick brown fox jumps over the lazy dogThe quick brown fox jumps over the lazy log", 43, out, ctx0, ctx1); + bResult &= memcmp(out, "\x3e\xbb\x7f\x9f\x7d\x27\x3d\x7c\x31\x8d\x86\x94\x77\x55\x0c\xc8\x00\xcf\xb1\x1b\x0c\xad\xb7\xff\xbd\xf6\xf8\x9f\x3a\x47\x1c\x59" + "\xb4\x77\xd5\x02\xe4\xd8\x48\x7f\x42\xdf\xe3\x8e\xed\x73\x81\x7a\xda\x91\xb7\xe2\x63\xd2\x91\x71\xb6\x5c\x44\x3a\x01\x2a\x41\x22", 64) == 0; + + cryptonight_free_ctx(ctx0); + cryptonight_free_ctx(ctx1); + + if(!bResult) + printer::inst()->print_msg(L0, + "Cryptonight hash self-test failed. This might be caused by bad compiler optimizations."); + + return bResult; +} + +std::vector<IBackend*> minethd::thread_starter(uint32_t threadOffset, miner_work& pWork) +{ + std::vector<IBackend*> pvThreads; + + if(!ConfigEditor::file_exist(Params::inst().configFileCPU)) + { + autoAdjust adjust; + if(!adjust.printConfig()) + return pvThreads; + } + + if(!jconf::inst()->parse_config()) + { + win_exit(); + } + + + //Launch the requested number of single and double threads, to distribute + //load evenly we need to alternate single and double threads + size_t i, n = jconf::inst()->GetThreadCount(); + pvThreads.reserve(n); + + jconf::thd_cfg cfg; + for (i = 0; i < n; i++) + { + jconf::inst()->GetThreadConfig(i, cfg); + + // \todo need thread offset + minethd* thd = new minethd(pWork, i + threadOffset, cfg.bDoubleMode, cfg.bNoPrefetch, cfg.iCpuAff); + pvThreads.push_back(thd); + + if(cfg.iCpuAff >= 0) + printer::inst()->print_msg(L1, "Starting %s thread, affinity: %d.", cfg.bDoubleMode ? "double" : "single", (int)cfg.iCpuAff); + else + printer::inst()->print_msg(L1, "Starting %s thread, no affinity.", cfg.bDoubleMode ? "double" : "single"); + } + + return pvThreads; +} + +void minethd::consume_work() +{ + memcpy(&oWork, &GlobalStates::inst().inst().oGlobalWork, sizeof(miner_work)); + iJobNo++; + GlobalStates::inst().inst().iConsumeCnt++; +} + +minethd::cn_hash_fun minethd::func_selector(bool bHaveAes, bool bNoPrefetch) +{ + // We have two independent flag bits in the functions + // therefore we will build a binary digit and select the + // function as a two digit binary + // Digit order SOFT_AES, NO_PREFETCH + + static const cn_hash_fun func_table[4] = { + cryptonight_hash<0x80000, MEMORY, false, false>, + cryptonight_hash<0x80000, MEMORY, false, true>, + cryptonight_hash<0x80000, MEMORY, true, false>, + cryptonight_hash<0x80000, MEMORY, true, true> + }; + + std::bitset<2> digit; + digit.set(0, !bNoPrefetch); + digit.set(1, !bHaveAes); + + return func_table[digit.to_ulong()]; +} + +void minethd::pin_thd_affinity() +{ + //Lock is needed because we need to use oWorkThd + std::lock_guard<std::mutex> lock(work_thd_mtx); + + // pin memory to NUMA node + bindMemoryToNUMANode(affinity); + +#if defined(__APPLE__) + printer::inst()->print_msg(L1, "WARNING on MacOS thread affinity is only advisory."); +#endif + thd_setaffinity(oWorkThd.native_handle(), affinity); +} + +void minethd::work_main() +{ + if(affinity >= 0) //-1 means no affinity + pin_thd_affinity(); + + cn_hash_fun hash_fun; + cryptonight_ctx* ctx; + uint64_t iCount = 0; + uint64_t* piHashVal; + uint32_t* piNonce; + job_result result; + + hash_fun = func_selector(::jconf::inst()->HaveHardwareAes(), bNoPrefetch); + ctx = minethd_alloc_ctx(); + + piHashVal = (uint64_t*)(result.bResult + 24); + piNonce = (uint32_t*)(oWork.bWorkBlob + 39); + GlobalStates::inst().inst().iConsumeCnt++; + + while (bQuit == 0) + { + if (oWork.bStall) + { + /* We are stalled here because the executor didn't find a job for us yet, + either because of network latency, or a socket problem. Since we are + raison d'etre of this software it us sensible to just wait until we have something*/ + + while (GlobalStates::inst().inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo) + std::this_thread::sleep_for(std::chrono::milliseconds(100)); + + consume_work(); + continue; + } + + if(oWork.bNiceHash) + result.iNonce = calc_nicehash_nonce(*piNonce, oWork.iResumeCnt); + else + result.iNonce = calc_start_nonce(oWork.iResumeCnt); + + assert(sizeof(job_result::sJobID) == sizeof(pool_job::sJobID)); + memcpy(result.sJobID, oWork.sJobID, sizeof(job_result::sJobID)); + + while(GlobalStates::inst().inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo) + { + if ((iCount & 0xF) == 0) //Store stats every 16 hashes + { + using namespace std::chrono; + uint64_t iStamp = time_point_cast<milliseconds>(high_resolution_clock::now()).time_since_epoch().count(); + iHashCount.store(iCount, std::memory_order_relaxed); + iTimestamp.store(iStamp, std::memory_order_relaxed); + } + iCount++; + + *piNonce = ++result.iNonce; + + hash_fun(oWork.bWorkBlob, oWork.iWorkSize, result.bResult, ctx); + + if (*piHashVal < oWork.iTarget) + executor::inst()->push_event(ex_event(result, oWork.iPoolId)); + + std::this_thread::yield(); + } + + consume_work(); + } + + cryptonight_free_ctx(ctx); +} + +minethd::cn_hash_fun_dbl minethd::func_dbl_selector(bool bHaveAes, bool bNoPrefetch) +{ + // We have two independent flag bits in the functions + // therefore we will build a binary digit and select the + // function as a two digit binary + // Digit order SOFT_AES, NO_PREFETCH + + static const cn_hash_fun_dbl func_table[4] = { + cryptonight_double_hash<0x80000, MEMORY, false, false>, + cryptonight_double_hash<0x80000, MEMORY, false, true>, + cryptonight_double_hash<0x80000, MEMORY, true, false>, + cryptonight_double_hash<0x80000, MEMORY, true, true> + }; + + std::bitset<2> digit; + digit.set(0, !bNoPrefetch); + digit.set(1, !bHaveAes); + + return func_table[digit.to_ulong()]; +} + +void minethd::double_work_main() +{ + if(affinity >= 0) //-1 means no affinity + pin_thd_affinity(); + + cn_hash_fun_dbl hash_fun; + cryptonight_ctx* ctx0; + cryptonight_ctx* ctx1; + uint64_t iCount = 0; + uint64_t *piHashVal0, *piHashVal1; + uint32_t *piNonce0, *piNonce1; + uint8_t bDoubleHashOut[64]; + uint8_t bDoubleWorkBlob[sizeof(miner_work::bWorkBlob) * 2]; + uint32_t iNonce; + job_result res; + + hash_fun = func_dbl_selector(::jconf::inst()->HaveHardwareAes(), bNoPrefetch); + ctx0 = minethd_alloc_ctx(); + ctx1 = minethd_alloc_ctx(); + + piHashVal0 = (uint64_t*)(bDoubleHashOut + 24); + piHashVal1 = (uint64_t*)(bDoubleHashOut + 32 + 24); + piNonce0 = (uint32_t*)(bDoubleWorkBlob + 39); + piNonce1 = nullptr; + + GlobalStates::inst().inst().iConsumeCnt++; + + while (bQuit == 0) + { + if (oWork.bStall) + { + /* We are stalled here because the executor didn't find a job for us yet, + either because of network latency, or a socket problem. Since we are + raison d'etre of this software it us sensible to just wait until we have something*/ + + while (GlobalStates::inst().inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo) + std::this_thread::sleep_for(std::chrono::milliseconds(100)); + + consume_work(); + memcpy(bDoubleWorkBlob, oWork.bWorkBlob, oWork.iWorkSize); + memcpy(bDoubleWorkBlob + oWork.iWorkSize, oWork.bWorkBlob, oWork.iWorkSize); + piNonce1 = (uint32_t*)(bDoubleWorkBlob + oWork.iWorkSize + 39); + continue; + } + + if(oWork.bNiceHash) + iNonce = calc_nicehash_nonce(*piNonce0, oWork.iResumeCnt); + else + iNonce = calc_start_nonce(oWork.iResumeCnt); + + assert(sizeof(job_result::sJobID) == sizeof(pool_job::sJobID)); + + while (GlobalStates::inst().inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo) + { + if ((iCount & 0x7) == 0) //Store stats every 16 hashes + { + using namespace std::chrono; + uint64_t iStamp = time_point_cast<milliseconds>(high_resolution_clock::now()).time_since_epoch().count(); + iHashCount.store(iCount, std::memory_order_relaxed); + iTimestamp.store(iStamp, std::memory_order_relaxed); + } + + iCount += 2; + + *piNonce0 = ++iNonce; + *piNonce1 = ++iNonce; + + hash_fun(bDoubleWorkBlob, oWork.iWorkSize, bDoubleHashOut, ctx0, ctx1); + + if (*piHashVal0 < oWork.iTarget) + executor::inst()->push_event(ex_event(job_result(oWork.sJobID, iNonce-1, bDoubleHashOut), oWork.iPoolId)); + + if (*piHashVal1 < oWork.iTarget) + executor::inst()->push_event(ex_event(job_result(oWork.sJobID, iNonce, bDoubleHashOut + 32), oWork.iPoolId)); + + std::this_thread::yield(); + } + + consume_work(); + memcpy(bDoubleWorkBlob, oWork.bWorkBlob, oWork.iWorkSize); + memcpy(bDoubleWorkBlob + oWork.iWorkSize, oWork.bWorkBlob, oWork.iWorkSize); + piNonce1 = (uint32_t*)(bDoubleWorkBlob + oWork.iWorkSize + 39); + } + + cryptonight_free_ctx(ctx0); + cryptonight_free_ctx(ctx1); +} + +} // namespace cpu +} // namepsace xmrstak diff --git a/xmrstak/backend/cpu/minethd.hpp b/xmrstak/backend/cpu/minethd.hpp new file mode 100644 index 0000000..40383cf --- /dev/null +++ b/xmrstak/backend/cpu/minethd.hpp @@ -0,0 +1,74 @@ +#pragma once +#include <thread> +#include <vector> +#include <atomic> +#include <mutex> +#include "../../crypto/cryptonight.h" +#include "../miner_work.h" +#include "../IBackend.hpp" +#include "../GlobalStates.hpp" +#include <iostream> + +namespace xmrstak +{ +namespace cpu +{ + +class minethd : public IBackend +{ +public: + static std::vector<IBackend*> thread_starter(uint32_t threadOffset, miner_work& pWork); + static bool self_test(); + + typedef void (*cn_hash_fun)(const void*, size_t, void*, cryptonight_ctx*); + + static cn_hash_fun func_selector(bool bHaveAes, bool bNoPrefetch); + static void thd_setaffinity(std::thread::native_handle_type h, uint64_t cpu_id); + + static cryptonight_ctx* minethd_alloc_ctx(); + +private: + + typedef void (*cn_hash_fun_dbl)(const void*, size_t, void*, cryptonight_ctx* __restrict, cryptonight_ctx* __restrict); + static cn_hash_fun_dbl func_dbl_selector(bool bHaveAes, bool bNoPrefetch); + + minethd(miner_work& pWork, size_t iNo, bool double_work, bool no_prefetch, int64_t affinity); + + // We use the top 10 bits of the nonce for thread and resume + // This allows us to resume up to 128 threads 4 times before + // we get nonce collisions + // Bottom 22 bits allow for an hour of work at 1000 H/s + inline uint32_t calc_start_nonce(uint32_t resume) + { + return reverseBits<uint32_t>(static_cast<uint32_t>(iThreadNo + GlobalStates::inst().iThreadCount * resume)); + } + + // Limited version of the nonce calc above + inline uint32_t calc_nicehash_nonce(uint32_t start, uint32_t resume) + { + return start | ( ( reverseBits<uint32_t>(static_cast<uint32_t>(iThreadNo + GlobalStates::inst().iThreadCount * resume)) >> 4u ) ); + } + + void work_main(); + void double_work_main(); + void consume_work(); + + uint64_t iJobNo; + + static miner_work oGlobalWork; + miner_work oWork; + + void pin_thd_affinity(); + // Held by the creating context to prevent a race cond with oWorkThd = std::thread(...) + std::mutex work_thd_mtx; + + std::thread oWorkThd; + uint8_t iThreadNo; + int64_t affinity; + + bool bQuit; + bool bNoPrefetch; +}; + +} // namespace cpu +} // namepsace xmrstak diff --git a/xmrstak/backend/globalStates.cpp b/xmrstak/backend/globalStates.cpp new file mode 100644 index 0000000..5251da8 --- /dev/null +++ b/xmrstak/backend/globalStates.cpp @@ -0,0 +1,51 @@ +/* + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * Additional permission under GNU GPL version 3 section 7 + * + * If you modify this Program, or any covered work, by linking or combining + * it with OpenSSL (or a modified version of that library), containing parts + * covered by the terms of OpenSSL License and SSLeay License, the licensors + * of this Program grant you additional permission to convey the resulting work. + * + */ + +#include <assert.h> +#include <cmath> +#include <chrono> +#include <cstring> + + +#include "miner_work.h" +#include "GlobalStates.hpp" + +namespace xmrstak +{ + + +void GlobalStates::switch_work(miner_work& pWork) +{ + // iConsumeCnt is a basic lock-like polling mechanism just in case we happen to push work + // faster than threads can consume them. This should never happen in real life. + // Pool cant physically send jobs faster than every 250ms or so due to net latency. + + while (iConsumeCnt.load(std::memory_order_seq_cst) < iThreadCount) + std::this_thread::sleep_for(std::chrono::milliseconds(100)); + + oGlobalWork = pWork; + iConsumeCnt.store(0, std::memory_order_seq_cst); + iGlobalJobNo++; +} + +} // namepsace xmrstak diff --git a/xmrstak/backend/globalStates.hpp b/xmrstak/backend/globalStates.hpp new file mode 100644 index 0000000..a9818ba --- /dev/null +++ b/xmrstak/backend/globalStates.hpp @@ -0,0 +1,35 @@ +#pragma once +#include <atomic> +#include "miner_work.h" +#include "../Environment.hpp" + +namespace xmrstak +{ + +struct GlobalStates +{ + + static inline GlobalStates& inst() + { + auto& env = Environment::inst(); + if(env.pGlobalStates == nullptr) + env.pGlobalStates = new GlobalStates; + return *env.pGlobalStates; + } + + void switch_work(miner_work& pWork); + + miner_work oGlobalWork; + std::atomic<uint64_t> iGlobalJobNo; + std::atomic<uint64_t> iConsumeCnt; + uint64_t iThreadCount; + + private: + + GlobalStates() : iThreadCount(0) + { + } + +}; + +} // namepsace xmrstak diff --git a/xmrstak/backend/iBackend.hpp b/xmrstak/backend/iBackend.hpp new file mode 100644 index 0000000..5037028 --- /dev/null +++ b/xmrstak/backend/iBackend.hpp @@ -0,0 +1,20 @@ +#pragma once + +#include <atomic> +#include <cstdint> + + +namespace xmrstak +{ + + struct IBackend + { + std::atomic<uint64_t> iHashCount; + std::atomic<uint64_t> iTimestamp; + + IBackend() : iHashCount(0), iTimestamp(0) + { + } + }; + +} // namepsace xmrstak diff --git a/xmrstak/backend/miner_work.hpp b/xmrstak/backend/miner_work.hpp new file mode 100644 index 0000000..8349fda --- /dev/null +++ b/xmrstak/backend/miner_work.hpp @@ -0,0 +1,103 @@ +#pragma once +#include <thread> +#include <atomic> +#include <mutex> +#include <cstdint> +#include <climits> +#include <iostream> +#include <cassert> + +namespace xmrstak +{ + // only allowed for unsigned value \todo add static assert + template<typename T> + T reverseBits(T value) + { + /* init with value (to get LSB) */ + T result = value; + /* extra shift needed at end */ + int s = sizeof(T) * CHAR_BIT - 1; + for (value >>= 1; value; value >>= 1) + { + result <<= 1; + result |= value & 1; + s--; + } + /* shift when values highest bits are zero */ + result <<= s; + return result; + } + + struct miner_work + { + char sJobID[64]; + uint8_t bWorkBlob[112]; + uint32_t iWorkSize; + uint32_t iResumeCnt; + uint64_t iTarget; + // \todo remove workaround needed for amd + uint32_t iTarget32; + bool bNiceHash; + bool bStall; + size_t iPoolId; + + miner_work() : iWorkSize(0), bNiceHash(false), bStall(true), iPoolId(0) { } + + miner_work(const char* sJobID, const uint8_t* bWork, uint32_t iWorkSize, uint32_t iResumeCnt, + uint64_t iTarget, size_t iPoolId) : iWorkSize(iWorkSize), iResumeCnt(iResumeCnt), + iTarget(iTarget), bNiceHash(false), bStall(false), iPoolId(iPoolId) + { + assert(iWorkSize <= sizeof(bWorkBlob)); + memcpy(this->sJobID, sJobID, sizeof(miner_work::sJobID)); + memcpy(this->bWorkBlob, bWork, iWorkSize); + } + + miner_work(miner_work const&) = delete; + + miner_work& operator=(miner_work const& from) + { + assert(this != &from); + + iWorkSize = from.iWorkSize; + iResumeCnt = from.iResumeCnt; + iTarget = from.iTarget; + iTarget32 = from.iTarget32; + bNiceHash = from.bNiceHash; + bStall = from.bStall; + iPoolId = from.iPoolId; + + assert(iWorkSize <= sizeof(bWorkBlob)); + memcpy(sJobID, from.sJobID, sizeof(sJobID)); + memcpy(bWorkBlob, from.bWorkBlob, iWorkSize); + + return *this; + } + + miner_work(miner_work&& from) : iWorkSize(from.iWorkSize), iTarget(from.iTarget),iTarget32(from.iTarget32), + bStall(from.bStall), iPoolId(from.iPoolId) + { + assert(iWorkSize <= sizeof(bWorkBlob)); + memcpy(sJobID, from.sJobID, sizeof(sJobID)); + memcpy(bWorkBlob, from.bWorkBlob, iWorkSize); + } + + miner_work& operator=(miner_work&& from) + { + assert(this != &from); + + iWorkSize = from.iWorkSize; + iResumeCnt = from.iResumeCnt; + iTarget = from.iTarget; + iTarget32 = from.iTarget32; + bNiceHash = from.bNiceHash; + bStall = from.bStall; + iPoolId = from.iPoolId; + + assert(iWorkSize <= sizeof(bWorkBlob)); + memcpy(sJobID, from.sJobID, sizeof(sJobID)); + memcpy(bWorkBlob, from.bWorkBlob, iWorkSize); + + return *this; + } + }; +} // namepsace xmrstak diff --git a/xmrstak/backend/nvidia/autoAdjust.hpp b/xmrstak/backend/nvidia/autoAdjust.hpp new file mode 100644 index 0000000..84c6dfc --- /dev/null +++ b/xmrstak/backend/nvidia/autoAdjust.hpp @@ -0,0 +1,113 @@ + +#pragma once + +#include "autoAdjust.hpp" + +#include "nvcc_code/cryptonight.h" +#include "jconf.h" +#include "../../console.h" +#include "../../ConfigEditor.hpp" +#include "../../Params.hpp" + +#include <vector> +#include <cstdio> +#include <sstream> +#include <string> + + +namespace xmrstak +{ +namespace nvidia +{ + +class autoAdjust +{ +public: + + autoAdjust() + { + + } + + /** print the adjusted values if needed + * + * Routine exit the application and print the adjusted values if needed else + * nothing is happened. + */ + bool printConfig() + { + int deviceCount = 0; + if(cuda_get_devicecount(&deviceCount) == 0) + std::exit(0); + // evaluate config parameter for if auto adjustment is needed + // evaluate config parameter for if auto adjustment is needed + for(int i = 0; i < deviceCount; i++) + { + + nvid_ctx ctx; + + ctx.device_id = i; + // -1 trigger auto adjustment + ctx.device_blocks = -1; + ctx.device_threads = -1; + + // set all evice option those marked as auto (-1) to a valid value +#ifndef _WIN32 + ctx.device_bfactor = 0; + ctx.device_bsleep = 0; +#else + // windows pass, try to avoid that windows kills the miner if the gpu is blocked for 2 seconds + ctx.device_bfactor = 6; + ctx.device_bsleep = 25; +#endif + if( cuda_get_deviceinfo(&ctx) != 1 ) + { + printer::inst()->print_msg(L0, "Setup failed for GPU %d. Exitting.\n", i); + std::exit(0); + } + nvidCtxVec.push_back(ctx); + + } + + generateThreadConfig(); + return true; + + } + +private: + + void generateThreadConfig() + { + // load the template of the backend config into a char variable + const char *tpl = + #include "./config.tpl" + ; + + ConfigEditor configTpl{}; + configTpl.set( std::string(tpl) ); + + constexpr size_t byte2mib = 1024u * 1024u; + std::string conf; + int i = 0; + for(auto& ctx : nvidCtxVec) + { + conf += std::string(" // gpu: ") + ctx.name + " architecture: " + std::to_string(ctx.device_arch[0] * 10 + ctx.device_arch[1]) + "\n"; + conf += std::string(" // memory: ") + std::to_string(ctx.free_device_memory / byte2mib) + "/" + std::to_string(ctx.total_device_memory / byte2mib) + " MiB\n"; + conf += std::string(" { \"index\" : ") + std::to_string(ctx.device_id) + ",\n" + + " \"threads\" : " + std::to_string(ctx.device_threads) + ", \"blocks\" : " + std::to_string(ctx.device_blocks) + ",\n" + + " \"bfactor\" : " + std::to_string(ctx.device_bfactor) + ", \"bsleep\" : " + std::to_string(ctx.device_bsleep) + ",\n" + + " \"affine_to_cpu\" : false,\n" + + " },\n"; + ++i; + } + + configTpl.replace("GPUCONFIG",conf); + configTpl.write(Params::inst().configFileNVIDIA); + printer::inst()->print_msg(L0, "NVIDIA: GPU configuration stored in file '%s'", Params::inst().configFileNVIDIA.c_str()); + } + + std::vector<nvid_ctx> nvidCtxVec; +}; + +} // namespace nvidia +} // namepsace xmrstak diff --git a/xmrstak/backend/nvidia/config.tpl b/xmrstak/backend/nvidia/config.tpl new file mode 100644 index 0000000..99dc023 --- /dev/null +++ b/xmrstak/backend/nvidia/config.tpl @@ -0,0 +1,28 @@ +R"===( +/* + * GPU configuration. You should play around with threads and blocks as the fastest settings will vary. + * index - GPU index number usually starts from 0. + * threads - Number of GPU threads (nothing to do with CPU threads). + * blocks - Number of GPU blocks (nothing to do with CPU threads). + * bfactor - Enables running the Cryptonight kernel in smaller pieces. + * Increase if you want to reduce GPU lag. Recommended setting on GUI systems - 8 + * bsleep - Insert a delay of X microseconds between kernel launches. + * Increase if you want to reduce GPU lag. Recommended setting on GUI systems - 100 + * affine_to_cpu - This will affine the thread to a CPU. This can make a GPU miner play along nicer with a CPU miner. + * + * On the first run the miner will look at your system and suggest a basic configuration that will work, + * you can try to tweak it from there to get the best performance. + * + * A filled out configuration should look like this: + * "gpu_threads_conf" : + * [ + * { "index" : 0, "threads" : 17, "blocks" : 60, "bfactor" : 0, "bsleep" : 0, "affine_to_cpu" : false}, + * ], + */ + +"gpu_threads_conf" : +[ +GPUCONFIG +], + +)===" diff --git a/xmrstak/backend/nvidia/jconf.cpp b/xmrstak/backend/nvidia/jconf.cpp new file mode 100644 index 0000000..2184acd --- /dev/null +++ b/xmrstak/backend/nvidia/jconf.cpp @@ -0,0 +1,274 @@ +/* + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * Additional permission under GNU GPL version 3 section 7 + * + * If you modify this Program, or any covered work, by linking or combining + * it with OpenSSL (or a modified version of that library), containing parts + * covered by the terms of OpenSSL License and SSLeay License, the licensors + * of this Program grant you additional permission to convey the resulting work. + * + */ + +#include "jconf.h" + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#ifdef _WIN32 +#define strcasecmp _stricmp +#include <intrin.h> +#else +#include <cpuid.h> +#endif + +#include "../../rapidjson/document.h" +#include "../../rapidjson/error/en.h" +#include "../../jext.h" +#include "../../console.h" + +namespace xmrstak +{ +namespace nvidia +{ + +using namespace rapidjson; + +/* + * This enum needs to match index in oConfigValues, otherwise we will get a runtime error + */ +enum configEnum { aGpuThreadsConf }; + +struct configVal { + configEnum iName; + const char* sName; + Type iType; +}; + +// Same order as in configEnum, as per comment above +// kNullType means any type +configVal oConfigValues[] = { + { aGpuThreadsConf, "gpu_threads_conf", kNullType } +}; + +inline bool checkType(Type have, Type want) +{ + if(want == have) + return true; + else if(want == kNullType) + return true; + else if(want == kTrueType && have == kFalseType) + return true; + else if(want == kFalseType && have == kTrueType) + return true; + else + return false; +} + +constexpr size_t iConfigCnt = (sizeof(oConfigValues)/sizeof(oConfigValues[0])); + + + +struct jconf::opaque_private +{ + Document jsonDoc; + const Value* configValues[iConfigCnt]; //Compile time constant + + opaque_private() + { + } +}; + + +bool jconf::NeedsAutoconf() +{ + return !prv->configValues[aGpuThreadsConf]->IsArray(); +} + +jconf* jconf::oInst = nullptr; + +jconf::jconf() +{ + prv = new opaque_private(); +} + +size_t jconf::GetGPUThreadCount() +{ + if(prv->configValues[aGpuThreadsConf]->IsArray()) + return prv->configValues[aGpuThreadsConf]->Size(); + else + return 0; +} + +bool jconf::GetGPUThreadConfig(size_t id, thd_cfg &cfg) +{ + if(!prv->configValues[aGpuThreadsConf]->IsArray()) + return false; + + if(id >= prv->configValues[aGpuThreadsConf]->Size()) + return false; + + const Value& oThdConf = prv->configValues[aGpuThreadsConf]->GetArray()[id]; + + if(!oThdConf.IsObject()) + return false; + + const Value *gid, *blocks, *threads, *bfactor, *bsleep, *aff; + gid = GetObjectMember(oThdConf, "index"); + blocks = GetObjectMember(oThdConf, "blocks"); + threads = GetObjectMember(oThdConf, "threads"); + bfactor = GetObjectMember(oThdConf, "bfactor"); + bsleep = GetObjectMember(oThdConf, "bsleep"); + aff = GetObjectMember(oThdConf, "affine_to_cpu"); + + if(gid == nullptr || blocks == nullptr || threads == nullptr || + bfactor == nullptr || bsleep == nullptr || aff == nullptr) + { + return false; + } + + if(!gid->IsNumber() || gid->GetInt() < 0) + return false; + + if(!blocks->IsNumber() || blocks->GetInt() < 0) + return false; + + if(!threads->IsNumber() || threads->GetInt() < 0) + return false; + + if(!bfactor->IsNumber() || bfactor->GetInt() < 0) + return false; + + if(!bsleep->IsNumber() || bsleep->GetInt() < 0) + return false; + + if(!aff->IsUint64() && !aff->IsBool()) + return false; + + cfg.id = gid->GetInt(); + cfg.blocks = blocks->GetInt(); + cfg.threads = threads->GetInt(); + cfg.bfactor = bfactor->GetInt(); + cfg.bsleep = bsleep->GetInt(); + + if(aff->IsNumber()) + cfg.cpu_aff = aff->GetInt(); + else + cfg.cpu_aff = -1; + + return true; +} + +bool jconf::parse_config(const char* sFilename) +{ + FILE * pFile; + char * buffer; + size_t flen; + + pFile = fopen(sFilename, "rb"); + if (pFile == NULL) + { + printer::inst()->print_msg(L0, "Failed to open config file %s.", sFilename); + return false; + } + + fseek(pFile,0,SEEK_END); + flen = ftell(pFile); + rewind(pFile); + + if(flen >= 64*1024) + { + fclose(pFile); + printer::inst()->print_msg(L0, "Oversized config file - %s.", sFilename); + return false; + } + + if(flen <= 16) + { + fclose(pFile); + printer::inst()->print_msg(L0, "File is empty or too short - %s.", sFilename); + return false; + } + + buffer = (char*)malloc(flen + 3); + if(fread(buffer+1, flen, 1, pFile) != 1) + { + free(buffer); + fclose(pFile); + printer::inst()->print_msg(L0, "Read error while reading %s.", sFilename); + return false; + } + fclose(pFile); + + //Replace Unicode BOM with spaces - we always use UTF-8 + unsigned char* ubuffer = (unsigned char*)buffer; + if(ubuffer[1] == 0xEF && ubuffer[2] == 0xBB && ubuffer[3] == 0xBF) + { + buffer[1] = ' '; + buffer[2] = ' '; + buffer[3] = ' '; + } + + buffer[0] = '{'; + buffer[flen] = '}'; + buffer[flen + 1] = '\0'; + + prv->jsonDoc.Parse<kParseCommentsFlag|kParseTrailingCommasFlag>(buffer, flen+2); + free(buffer); + + if(prv->jsonDoc.HasParseError()) + { + printer::inst()->print_msg(L0, "JSON config parse error(offset %llu): %s", + int_port(prv->jsonDoc.GetErrorOffset()), GetParseError_En(prv->jsonDoc.GetParseError())); + return false; + } + + + if(!prv->jsonDoc.IsObject()) + { //This should never happen as we created the root ourselves + printer::inst()->print_msg(L0, "Invalid config file. No root?\n"); + return false; + } + + for(size_t i = 0; i < iConfigCnt; i++) + { + if(oConfigValues[i].iName != i) + { + printer::inst()->print_msg(L0, "Code error. oConfigValues are not in order. %s",oConfigValues[i].sName); + return false; + } + + prv->configValues[i] = GetObjectMember(prv->jsonDoc, oConfigValues[i].sName); + + if(prv->configValues[i] == nullptr) + { + printer::inst()->print_msg(L0, "Invalid config file. Missing value \"%s\".", oConfigValues[i].sName); + return false; + } + + if(!checkType(prv->configValues[i]->GetType(), oConfigValues[i].iType)) + { + printer::inst()->print_msg(L0, "Invalid config file. Value \"%s\" has unexpected type.", oConfigValues[i].sName); + return false; + } + } + if(NeedsAutoconf()) + return true; + + return true; +} + +} // namespace nvidia +} // namespace xmrstak
\ No newline at end of file diff --git a/xmrstak/backend/nvidia/jconf.hpp b/xmrstak/backend/nvidia/jconf.hpp new file mode 100644 index 0000000..8959088 --- /dev/null +++ b/xmrstak/backend/nvidia/jconf.hpp @@ -0,0 +1,51 @@ +#pragma once +#include <stdlib.h> +#include <string> +#include "../../Params.hpp" + +namespace xmrstak +{ +namespace nvidia +{ + +class jconf +{ +public: + static jconf* inst() + { + if (oInst == nullptr) oInst = new jconf; + return oInst; + }; + + bool parse_config(const char* sFilename = Params::inst().configFileNVIDIA.c_str()); + + struct thd_cfg { + uint32_t id; + uint32_t blocks; + uint32_t threads; + uint32_t bfactor; + uint32_t bsleep; + bool bDoubleMode; + bool bNoPrefetch; + int32_t cpu_aff; + + long long iCpuAff; + }; + + size_t GetGPUThreadCount(); + + bool GetGPUThreadConfig(size_t id, thd_cfg &cfg); + + bool NeedsAutoconf(); + +private: + jconf(); + static jconf* oInst; + + struct opaque_private; + opaque_private* prv; + +}; + +} // namespace nvidia +} // namepsace xmrstak diff --git a/xmrstak/backend/nvidia/minethd.cpp b/xmrstak/backend/nvidia/minethd.cpp new file mode 100644 index 0000000..cbee219 --- /dev/null +++ b/xmrstak/backend/nvidia/minethd.cpp @@ -0,0 +1,273 @@ +/* + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * Additional permission under GNU GPL version 3 section 7 + * + * If you modify this Program, or any covered work, by linking or combining + * it with OpenSSL (or a modified version of that library), containing parts + * covered by the terms of OpenSSL License and SSLeay License, the licensors + * of this Program grant you additional permission to convey the resulting work. + * + */ + +#include <assert.h> +#include <cmath> +#include <chrono> +#include <thread> +#include <bitset> +#include <vector> +#include "../../console.h" +#include "../../crypto/cryptonight_aesni.h" +#include "../cpu/minethd.h" +#include "../../Params.hpp" + +#include "../../executor.h" +#include "minethd.h" +#include "../../jconf.h" +#include "../../crypto/cryptonight.h" +#include "../../Environment.hpp" +#include "autoAdjust.hpp" + + +#ifndef USE_PRECOMPILED_HEADERS +#ifdef WIN32 +#include <direct.h> +#include <windows.h> +#else +#include <sys/types.h> +#include <dlfcn.h> +#endif +#include <iostream> +#endif + +namespace xmrstak +{ +namespace nvidia +{ + +#ifdef WIN32 + HINSTANCE lib_handle; +#else + void *lib_handle; +#endif + +minethd::minethd(miner_work& pWork, size_t iNo, const jconf::thd_cfg& cfg) +{ + oWork = pWork; + bQuit = 0; + iThreadNo = (uint8_t)iNo; + iJobNo = 0; + + ctx.device_id = (int)cfg.id; + ctx.device_blocks = (int)cfg.blocks; + ctx.device_threads = (int)cfg.threads; + ctx.device_bfactor = (int)cfg.bfactor; + ctx.device_bsleep = (int)cfg.bsleep; + + oWorkThd = std::thread(&minethd::work_main, this); +} + + +bool minethd::self_test() +{ + cryptonight_ctx* ctx0; + unsigned char out[32]; + bool bResult = true; + + ctx0 = new cryptonight_ctx; + if(::jconf::inst()->HaveHardwareAes()) + { + //cryptonight_hash_ctx("This is a test", 14, out, ctx0); + bResult = memcmp(out, "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05", 32) == 0; + } + else + { + //cryptonight_hash_ctx_soft("This is a test", 14, out, ctx0); + bResult = memcmp(out, "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05", 32) == 0; + } + delete ctx0; + + //if(!bResult) + // printer::inst()->print_msg(L0, + // "Cryptonight hash self-test failed. This might be caused by bad compiler optimizations."); + + return bResult; +} + + +extern "C" +{ +#ifdef WIN32 +__declspec(dllexport) +#endif +std::vector<IBackend*>* xmrstak_start_backend(uint32_t threadOffset, miner_work& pWork, Environment& env) +{ + Environment::inst() = env; + return nvidia::minethd::thread_starter(threadOffset, pWork); +} +} // extern "C" + +std::vector<IBackend*>* minethd::thread_starter(uint32_t threadOffset, miner_work& pWork) +{ + std::vector<IBackend*>* pvThreads = new std::vector<IBackend*>(); + + if(!ConfigEditor::file_exist(Params::inst().configFileNVIDIA)) + { + autoAdjust adjust; + if(!adjust.printConfig()) + return pvThreads; + } + + if(!jconf::inst()->parse_config()) + { + win_exit(); + } + + int deviceCount = 0; + if(cuda_get_devicecount(&deviceCount) != 1) + { + std::cout<<"WARNING: NVIDIA no device found"<<std::endl; + return pvThreads; + } + + size_t i, n = jconf::inst()->GetGPUThreadCount(); + pvThreads->reserve(n); + + jconf::thd_cfg cfg; + for (i = 0; i < n; i++) + { + jconf::inst()->GetGPUThreadConfig(i, cfg); + minethd* thd = new minethd(pWork, i + threadOffset, cfg); + + if(cfg.cpu_aff >= 0) + { +#if defined(__APPLE__) + printer::inst()->print_msg(L1, "WARNING on MacOS thread affinity is only advisory."); +#endif + cpu::minethd::thd_setaffinity(thd->oWorkThd.native_handle(), cfg.cpu_aff); + } + + pvThreads->push_back(thd); + + if(cfg.cpu_aff >= 0) + printer::inst()->print_msg(L1, "Starting GPU thread, affinity: %d.", (int)cfg.cpu_aff); + else + printer::inst()->print_msg(L1, "Starting GPU thread, no affinity."); + } + + return pvThreads; +} + +void minethd::switch_work(miner_work& pWork) +{ + // iConsumeCnt is a basic lock-like polling mechanism just in case we happen to push work + // faster than threads can consume them. This should never happen in real life. + // Pool cant physically send jobs faster than every 250ms or so due to net latency. + + while (GlobalStates::inst().iConsumeCnt.load(std::memory_order_seq_cst) < GlobalStates::inst().iThreadCount) + std::this_thread::sleep_for(std::chrono::milliseconds(100)); + + GlobalStates::inst().oGlobalWork = pWork; + GlobalStates::inst().iConsumeCnt.store(0, std::memory_order_seq_cst); + GlobalStates::inst().iGlobalJobNo++; +} + +void minethd::consume_work() +{ + memcpy(&oWork, &GlobalStates::inst().oGlobalWork, sizeof(miner_work)); + iJobNo++; + GlobalStates::inst().iConsumeCnt++; +} + +void minethd::work_main() +{ + uint64_t iCount = 0; + uint32_t iNonce; + cryptonight_ctx* cpu_ctx; + cpu_ctx = cpu::minethd::minethd_alloc_ctx(); + cn_hash_fun hash_fun = cpu::minethd::func_selector(::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/); + + GlobalStates::inst().iConsumeCnt++; + + if(/*cuda_get_deviceinfo(&ctx) != 1 ||*/ cryptonight_extra_cpu_init(&ctx) != 1) + { + printer::inst()->print_msg(L0, "Setup failed for GPU %d. Exitting.\n", (int)iThreadNo); + std::exit(0); + } + + while (bQuit == 0) + { + if (oWork.bStall) + { + /* We are stalled here because the executor didn't find a job for us yet, + either because of network latency, or a socket problem. Since we are + raison d'etre of this software it us sensible to just wait until we have something*/ + + while (GlobalStates::inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo) + std::this_thread::sleep_for(std::chrono::milliseconds(100)); + + consume_work(); + continue; + } + + cryptonight_extra_cpu_set_data(&ctx, oWork.bWorkBlob, oWork.iWorkSize); + iNonce = calc_start_nonce(oWork.iResumeCnt); + + assert(sizeof(job_result::sJobID) == sizeof(pool_job::sJobID)); + + while(GlobalStates::inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo) + { + + uint32_t foundNonce[10]; + uint32_t foundCount; + + cryptonight_extra_cpu_prepare(&ctx, iNonce); + cryptonight_core_cpu_hash(&ctx); + cryptonight_extra_cpu_final(&ctx, iNonce, oWork.iTarget, &foundCount, foundNonce); + + for(size_t i = 0; i < foundCount; i++) + { + + uint8_t bWorkBlob[112]; + uint8_t bResult[32]; + + memcpy(bWorkBlob, oWork.bWorkBlob, oWork.iWorkSize); + memset(bResult, 0, sizeof(job_result::bResult)); + + *(uint32_t*)(bWorkBlob + 39) = foundNonce[i]; + + hash_fun(bWorkBlob, oWork.iWorkSize, bResult, cpu_ctx); + if ( (*((uint64_t*)(bResult + 24))) < oWork.iTarget) + executor::inst()->push_event(ex_event(job_result(oWork.sJobID, foundNonce[i], bResult), oWork.iPoolId)); + else + executor::inst()->log_result_error("NVIDIA Invalid Result"); + } + + iCount += ctx.device_blocks * ctx.device_threads; + iNonce += ctx.device_blocks * ctx.device_threads; + + using namespace std::chrono; + uint64_t iStamp = time_point_cast<milliseconds>(high_resolution_clock::now()).time_since_epoch().count(); + iHashCount.store(iCount, std::memory_order_relaxed); + iTimestamp.store(iStamp, std::memory_order_relaxed); + std::this_thread::yield(); + } + + consume_work(); + } +} + +} // namespace xmrstak + +} //namespace nvidia diff --git a/xmrstak/backend/nvidia/minethd.hpp b/xmrstak/backend/nvidia/minethd.hpp new file mode 100644 index 0000000..9f3993e --- /dev/null +++ b/xmrstak/backend/nvidia/minethd.hpp @@ -0,0 +1,66 @@ +#pragma once +#include <thread> +#include <atomic> +#include <vector> +#include "nvcc_code/cryptonight.h" +#include "../../crypto/cryptonight.h" +#include "../../jconf.h" +#include "./jconf.h" +#include "../IBackend.hpp" +#include "../../Environment.hpp" +#include <iostream> + +namespace xmrstak +{ +namespace nvidia +{ + +class minethd : public IBackend +{ +public: + + static void switch_work(miner_work& pWork); + static std::vector<IBackend*>* thread_starter(uint32_t threadOffset, miner_work& pWork); + static bool self_test(); + +private: + typedef void (*cn_hash_fun)(const void*, size_t, void*, cryptonight_ctx*); + + minethd(miner_work& pWork, size_t iNo, const jconf::thd_cfg& cfg); + + // We use the top 10 bits of the nonce for thread and resume + // This allows us to resume up to 128 threads 4 times before + // we get nonce collisions + // Bottom 22 bits allow for an hour of work at 1000 H/s + inline uint32_t calc_start_nonce(uint32_t resume) + { + return reverseBits<uint32_t>(iThreadNo + GlobalStates::inst().iThreadCount * resume); + } + + // Limited version of the nonce calc above + inline uint32_t calc_nicehash_nonce(uint32_t start, uint32_t resume) + { + return start | ( ( reverseBits(iThreadNo + GlobalStates::inst().iThreadCount * resume) >> 4u ) ); + } + + void work_main(); + void consume_work(); + + static std::atomic<uint64_t> iGlobalJobNo; + static std::atomic<uint64_t> iConsumeCnt; + static uint64_t iThreadCount; + uint64_t iJobNo; + + static miner_work oGlobalWork; + miner_work oWork; + + std::thread oWorkThd; + uint8_t iThreadNo; + + nvid_ctx ctx; + + bool bQuit; +}; + +} // namespace nvidia +} // namepsace xmrstak diff --git a/xmrstak/backend/nvidia/nvcc_code/cryptonight.hpp b/xmrstak/backend/nvidia/nvcc_code/cryptonight.hpp new file mode 100644 index 0000000..784c38d --- /dev/null +++ b/xmrstak/backend/nvidia/nvcc_code/cryptonight.hpp @@ -0,0 +1,48 @@ +#pragma once + +#include <stdint.h> +#include <string> + +typedef struct { + int device_id; + const char *device_name; + int device_arch[2]; + int device_mpcount; + int device_blocks; + int device_threads; + int device_bfactor; + int device_bsleep; + + uint32_t *d_input; + uint32_t inputlen; + uint32_t *d_result_count; + uint32_t *d_result_nonce; + uint32_t *d_long_state; + uint32_t *d_ctx_state; + uint32_t *d_ctx_a; + uint32_t *d_ctx_b; + uint32_t *d_ctx_key1; + uint32_t *d_ctx_key2; + uint32_t *d_ctx_text; + std::string name; + size_t free_device_memory; + size_t total_device_memory; +} nvid_ctx; + +extern "C" { + +/** get device count + * + * @param deviceCount[out] cuda device count + * @return error code: 0 == error is occurred, 1 == no error + */ +int cuda_get_devicecount( int* deviceCount); +int cuda_get_deviceinfo(nvid_ctx *ctx); +int cryptonight_extra_cpu_init(nvid_ctx *ctx); +void cryptonight_extra_cpu_set_data( nvid_ctx* ctx, const void *data, uint32_t len); +void cryptonight_extra_cpu_prepare(nvid_ctx* ctx, uint32_t startNonce); +void cryptonight_core_cpu_hash(nvid_ctx* ctx); +void cryptonight_extra_cpu_final(nvid_ctx* ctx, uint32_t startNonce, uint64_t target, uint32_t* rescount, uint32_t *resnonce); + +} + diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_aes.hpp b/xmrstak/backend/nvidia/nvcc_code/cuda_aes.hpp new file mode 100644 index 0000000..e478600 --- /dev/null +++ b/xmrstak/backend/nvidia/nvcc_code/cuda_aes.hpp @@ -0,0 +1,305 @@ + +#pragma once + +#include <stdint.h> + +#define N_COLS 4 +#define WPOLY 0x011b + +static __constant__ uint32_t d_t_fn[1024] = +{ + 0xa56363c6U, 0x847c7cf8U, 0x997777eeU, 0x8d7b7bf6U, + 0x0df2f2ffU, 0xbd6b6bd6U, 0xb16f6fdeU, 0x54c5c591U, + 0x50303060U, 0x03010102U, 0xa96767ceU, 0x7d2b2b56U, + 0x19fefee7U, 0x62d7d7b5U, 0xe6abab4dU, 0x9a7676ecU, + 0x45caca8fU, 0x9d82821fU, 0x40c9c989U, 0x877d7dfaU, + 0x15fafaefU, 0xeb5959b2U, 0xc947478eU, 0x0bf0f0fbU, + 0xecadad41U, 0x67d4d4b3U, 0xfda2a25fU, 0xeaafaf45U, + 0xbf9c9c23U, 0xf7a4a453U, 0x967272e4U, 0x5bc0c09bU, + 0xc2b7b775U, 0x1cfdfde1U, 0xae93933dU, 0x6a26264cU, + 0x5a36366cU, 0x413f3f7eU, 0x02f7f7f5U, 0x4fcccc83U, + 0x5c343468U, 0xf4a5a551U, 0x34e5e5d1U, 0x08f1f1f9U, + 0x937171e2U, 0x73d8d8abU, 0x53313162U, 0x3f15152aU, + 0x0c040408U, 0x52c7c795U, 0x65232346U, 0x5ec3c39dU, + 0x28181830U, 0xa1969637U, 0x0f05050aU, 0xb59a9a2fU, + 0x0907070eU, 0x36121224U, 0x9b80801bU, 0x3de2e2dfU, + 0x26ebebcdU, 0x6927274eU, 0xcdb2b27fU, 0x9f7575eaU, + 0x1b090912U, 0x9e83831dU, 0x742c2c58U, 0x2e1a1a34U, + 0x2d1b1b36U, 0xb26e6edcU, 0xee5a5ab4U, 0xfba0a05bU, + 0xf65252a4U, 0x4d3b3b76U, 0x61d6d6b7U, 0xceb3b37dU, + 0x7b292952U, 0x3ee3e3ddU, 0x712f2f5eU, 0x97848413U, + 0xf55353a6U, 0x68d1d1b9U, 0x00000000U, 0x2cededc1U, + 0x60202040U, 0x1ffcfce3U, 0xc8b1b179U, 0xed5b5bb6U, + 0xbe6a6ad4U, 0x46cbcb8dU, 0xd9bebe67U, 0x4b393972U, + 0xde4a4a94U, 0xd44c4c98U, 0xe85858b0U, 0x4acfcf85U, + 0x6bd0d0bbU, 0x2aefefc5U, 0xe5aaaa4fU, 0x16fbfbedU, + 0xc5434386U, 0xd74d4d9aU, 0x55333366U, 0x94858511U, + 0xcf45458aU, 0x10f9f9e9U, 0x06020204U, 0x817f7ffeU, + 0xf05050a0U, 0x443c3c78U, 0xba9f9f25U, 0xe3a8a84bU, + 0xf35151a2U, 0xfea3a35dU, 0xc0404080U, 0x8a8f8f05U, + 0xad92923fU, 0xbc9d9d21U, 0x48383870U, 0x04f5f5f1U, + 0xdfbcbc63U, 0xc1b6b677U, 0x75dadaafU, 0x63212142U, + 0x30101020U, 0x1affffe5U, 0x0ef3f3fdU, 0x6dd2d2bfU, + 0x4ccdcd81U, 0x140c0c18U, 0x35131326U, 0x2fececc3U, + 0xe15f5fbeU, 0xa2979735U, 0xcc444488U, 0x3917172eU, + 0x57c4c493U, 0xf2a7a755U, 0x827e7efcU, 0x473d3d7aU, + 0xac6464c8U, 0xe75d5dbaU, 0x2b191932U, 0x957373e6U, + 0xa06060c0U, 0x98818119U, 0xd14f4f9eU, 0x7fdcdca3U, + 0x66222244U, 0x7e2a2a54U, 0xab90903bU, 0x8388880bU, + 0xca46468cU, 0x29eeeec7U, 0xd3b8b86bU, 0x3c141428U, + 0x79dedea7U, 0xe25e5ebcU, 0x1d0b0b16U, 0x76dbdbadU, + 0x3be0e0dbU, 0x56323264U, 0x4e3a3a74U, 0x1e0a0a14U, + 0xdb494992U, 0x0a06060cU, 0x6c242448U, 0xe45c5cb8U, + 0x5dc2c29fU, 0x6ed3d3bdU, 0xefacac43U, 0xa66262c4U, + 0xa8919139U, 0xa4959531U, 0x37e4e4d3U, 0x8b7979f2U, + 0x32e7e7d5U, 0x43c8c88bU, 0x5937376eU, 0xb76d6ddaU, + 0x8c8d8d01U, 0x64d5d5b1U, 0xd24e4e9cU, 0xe0a9a949U, + 0xb46c6cd8U, 0xfa5656acU, 0x07f4f4f3U, 0x25eaeacfU, + 0xaf6565caU, 0x8e7a7af4U, 0xe9aeae47U, 0x18080810U, + 0xd5baba6fU, 0x887878f0U, 0x6f25254aU, 0x722e2e5cU, + 0x241c1c38U, 0xf1a6a657U, 0xc7b4b473U, 0x51c6c697U, + 0x23e8e8cbU, 0x7cdddda1U, 0x9c7474e8U, 0x211f1f3eU, + 0xdd4b4b96U, 0xdcbdbd61U, 0x868b8b0dU, 0x858a8a0fU, + 0x907070e0U, 0x423e3e7cU, 0xc4b5b571U, 0xaa6666ccU, + 0xd8484890U, 0x05030306U, 0x01f6f6f7U, 0x120e0e1cU, + 0xa36161c2U, 0x5f35356aU, 0xf95757aeU, 0xd0b9b969U, + 0x91868617U, 0x58c1c199U, 0x271d1d3aU, 0xb99e9e27U, + 0x38e1e1d9U, 0x13f8f8ebU, 0xb398982bU, 0x33111122U, + 0xbb6969d2U, 0x70d9d9a9U, 0x898e8e07U, 0xa7949433U, + 0xb69b9b2dU, 0x221e1e3cU, 0x92878715U, 0x20e9e9c9U, + 0x49cece87U, 0xff5555aaU, 0x78282850U, 0x7adfdfa5U, + 0x8f8c8c03U, 0xf8a1a159U, 0x80898909U, 0x170d0d1aU, + 0xdabfbf65U, 0x31e6e6d7U, 0xc6424284U, 0xb86868d0U, + 0xc3414182U, 0xb0999929U, 0x772d2d5aU, 0x110f0f1eU, + 0xcbb0b07bU, 0xfc5454a8U, 0xd6bbbb6dU, 0x3a16162cU, + 0x6363c6a5U, 0x7c7cf884U, 0x7777ee99U, 0x7b7bf68dU, + 0xf2f2ff0dU, 0x6b6bd6bdU, 0x6f6fdeb1U, 0xc5c59154U, + 0x30306050U, 0x01010203U, 0x6767cea9U, 0x2b2b567dU, + 0xfefee719U, 0xd7d7b562U, 0xabab4de6U, 0x7676ec9aU, + 0xcaca8f45U, 0x82821f9dU, 0xc9c98940U, 0x7d7dfa87U, + 0xfafaef15U, 0x5959b2ebU, 0x47478ec9U, 0xf0f0fb0bU, + 0xadad41ecU, 0xd4d4b367U, 0xa2a25ffdU, 0xafaf45eaU, + 0x9c9c23bfU, 0xa4a453f7U, 0x7272e496U, 0xc0c09b5bU, + 0xb7b775c2U, 0xfdfde11cU, 0x93933daeU, 0x26264c6aU, + 0x36366c5aU, 0x3f3f7e41U, 0xf7f7f502U, 0xcccc834fU, + 0x3434685cU, 0xa5a551f4U, 0xe5e5d134U, 0xf1f1f908U, + 0x7171e293U, 0xd8d8ab73U, 0x31316253U, 0x15152a3fU, + 0x0404080cU, 0xc7c79552U, 0x23234665U, 0xc3c39d5eU, + 0x18183028U, 0x969637a1U, 0x05050a0fU, 0x9a9a2fb5U, + 0x07070e09U, 0x12122436U, 0x80801b9bU, 0xe2e2df3dU, + 0xebebcd26U, 0x27274e69U, 0xb2b27fcdU, 0x7575ea9fU, + 0x0909121bU, 0x83831d9eU, 0x2c2c5874U, 0x1a1a342eU, + 0x1b1b362dU, 0x6e6edcb2U, 0x5a5ab4eeU, 0xa0a05bfbU, + 0x5252a4f6U, 0x3b3b764dU, 0xd6d6b761U, 0xb3b37dceU, + 0x2929527bU, 0xe3e3dd3eU, 0x2f2f5e71U, 0x84841397U, + 0x5353a6f5U, 0xd1d1b968U, 0x00000000U, 0xededc12cU, + 0x20204060U, 0xfcfce31fU, 0xb1b179c8U, 0x5b5bb6edU, + 0x6a6ad4beU, 0xcbcb8d46U, 0xbebe67d9U, 0x3939724bU, + 0x4a4a94deU, 0x4c4c98d4U, 0x5858b0e8U, 0xcfcf854aU, + 0xd0d0bb6bU, 0xefefc52aU, 0xaaaa4fe5U, 0xfbfbed16U, + 0x434386c5U, 0x4d4d9ad7U, 0x33336655U, 0x85851194U, + 0x45458acfU, 0xf9f9e910U, 0x02020406U, 0x7f7ffe81U, + 0x5050a0f0U, 0x3c3c7844U, 0x9f9f25baU, 0xa8a84be3U, + 0x5151a2f3U, 0xa3a35dfeU, 0x404080c0U, 0x8f8f058aU, + 0x92923fadU, 0x9d9d21bcU, 0x38387048U, 0xf5f5f104U, + 0xbcbc63dfU, 0xb6b677c1U, 0xdadaaf75U, 0x21214263U, + 0x10102030U, 0xffffe51aU, 0xf3f3fd0eU, 0xd2d2bf6dU, + 0xcdcd814cU, 0x0c0c1814U, 0x13132635U, 0xececc32fU, + 0x5f5fbee1U, 0x979735a2U, 0x444488ccU, 0x17172e39U, + 0xc4c49357U, 0xa7a755f2U, 0x7e7efc82U, 0x3d3d7a47U, + 0x6464c8acU, 0x5d5dbae7U, 0x1919322bU, 0x7373e695U, + 0x6060c0a0U, 0x81811998U, 0x4f4f9ed1U, 0xdcdca37fU, + 0x22224466U, 0x2a2a547eU, 0x90903babU, 0x88880b83U, + 0x46468ccaU, 0xeeeec729U, 0xb8b86bd3U, 0x1414283cU, + 0xdedea779U, 0x5e5ebce2U, 0x0b0b161dU, 0xdbdbad76U, + 0xe0e0db3bU, 0x32326456U, 0x3a3a744eU, 0x0a0a141eU, + 0x494992dbU, 0x06060c0aU, 0x2424486cU, 0x5c5cb8e4U, + 0xc2c29f5dU, 0xd3d3bd6eU, 0xacac43efU, 0x6262c4a6U, + 0x919139a8U, 0x959531a4U, 0xe4e4d337U, 0x7979f28bU, + 0xe7e7d532U, 0xc8c88b43U, 0x37376e59U, 0x6d6ddab7U, + 0x8d8d018cU, 0xd5d5b164U, 0x4e4e9cd2U, 0xa9a949e0U, + 0x6c6cd8b4U, 0x5656acfaU, 0xf4f4f307U, 0xeaeacf25U, + 0x6565caafU, 0x7a7af48eU, 0xaeae47e9U, 0x08081018U, + 0xbaba6fd5U, 0x7878f088U, 0x25254a6fU, 0x2e2e5c72U, + 0x1c1c3824U, 0xa6a657f1U, 0xb4b473c7U, 0xc6c69751U, + 0xe8e8cb23U, 0xdddda17cU, 0x7474e89cU, 0x1f1f3e21U, + 0x4b4b96ddU, 0xbdbd61dcU, 0x8b8b0d86U, 0x8a8a0f85U, + 0x7070e090U, 0x3e3e7c42U, 0xb5b571c4U, 0x6666ccaaU, + 0x484890d8U, 0x03030605U, 0xf6f6f701U, 0x0e0e1c12U, + 0x6161c2a3U, 0x35356a5fU, 0x5757aef9U, 0xb9b969d0U, + 0x86861791U, 0xc1c19958U, 0x1d1d3a27U, 0x9e9e27b9U, + 0xe1e1d938U, 0xf8f8eb13U, 0x98982bb3U, 0x11112233U, + 0x6969d2bbU, 0xd9d9a970U, 0x8e8e0789U, 0x949433a7U, + 0x9b9b2db6U, 0x1e1e3c22U, 0x87871592U, 0xe9e9c920U, + 0xcece8749U, 0x5555aaffU, 0x28285078U, 0xdfdfa57aU, + 0x8c8c038fU, 0xa1a159f8U, 0x89890980U, 0x0d0d1a17U, + 0xbfbf65daU, 0xe6e6d731U, 0x424284c6U, 0x6868d0b8U, + 0x414182c3U, 0x999929b0U, 0x2d2d5a77U, 0x0f0f1e11U, + 0xb0b07bcbU, 0x5454a8fcU, 0xbbbb6dd6U, 0x16162c3aU, + 0x63c6a563U, 0x7cf8847cU, 0x77ee9977U, 0x7bf68d7bU, + 0xf2ff0df2U, 0x6bd6bd6bU, 0x6fdeb16fU, 0xc59154c5U, + 0x30605030U, 0x01020301U, 0x67cea967U, 0x2b567d2bU, + 0xfee719feU, 0xd7b562d7U, 0xab4de6abU, 0x76ec9a76U, + 0xca8f45caU, 0x821f9d82U, 0xc98940c9U, 0x7dfa877dU, + 0xfaef15faU, 0x59b2eb59U, 0x478ec947U, 0xf0fb0bf0U, + 0xad41ecadU, 0xd4b367d4U, 0xa25ffda2U, 0xaf45eaafU, + 0x9c23bf9cU, 0xa453f7a4U, 0x72e49672U, 0xc09b5bc0U, + 0xb775c2b7U, 0xfde11cfdU, 0x933dae93U, 0x264c6a26U, + 0x366c5a36U, 0x3f7e413fU, 0xf7f502f7U, 0xcc834fccU, + 0x34685c34U, 0xa551f4a5U, 0xe5d134e5U, 0xf1f908f1U, + 0x71e29371U, 0xd8ab73d8U, 0x31625331U, 0x152a3f15U, + 0x04080c04U, 0xc79552c7U, 0x23466523U, 0xc39d5ec3U, + 0x18302818U, 0x9637a196U, 0x050a0f05U, 0x9a2fb59aU, + 0x070e0907U, 0x12243612U, 0x801b9b80U, 0xe2df3de2U, + 0xebcd26ebU, 0x274e6927U, 0xb27fcdb2U, 0x75ea9f75U, + 0x09121b09U, 0x831d9e83U, 0x2c58742cU, 0x1a342e1aU, + 0x1b362d1bU, 0x6edcb26eU, 0x5ab4ee5aU, 0xa05bfba0U, + 0x52a4f652U, 0x3b764d3bU, 0xd6b761d6U, 0xb37dceb3U, + 0x29527b29U, 0xe3dd3ee3U, 0x2f5e712fU, 0x84139784U, + 0x53a6f553U, 0xd1b968d1U, 0x00000000U, 0xedc12cedU, + 0x20406020U, 0xfce31ffcU, 0xb179c8b1U, 0x5bb6ed5bU, + 0x6ad4be6aU, 0xcb8d46cbU, 0xbe67d9beU, 0x39724b39U, + 0x4a94de4aU, 0x4c98d44cU, 0x58b0e858U, 0xcf854acfU, + 0xd0bb6bd0U, 0xefc52aefU, 0xaa4fe5aaU, 0xfbed16fbU, + 0x4386c543U, 0x4d9ad74dU, 0x33665533U, 0x85119485U, + 0x458acf45U, 0xf9e910f9U, 0x02040602U, 0x7ffe817fU, + 0x50a0f050U, 0x3c78443cU, 0x9f25ba9fU, 0xa84be3a8U, + 0x51a2f351U, 0xa35dfea3U, 0x4080c040U, 0x8f058a8fU, + 0x923fad92U, 0x9d21bc9dU, 0x38704838U, 0xf5f104f5U, + 0xbc63dfbcU, 0xb677c1b6U, 0xdaaf75daU, 0x21426321U, + 0x10203010U, 0xffe51affU, 0xf3fd0ef3U, 0xd2bf6dd2U, + 0xcd814ccdU, 0x0c18140cU, 0x13263513U, 0xecc32fecU, + 0x5fbee15fU, 0x9735a297U, 0x4488cc44U, 0x172e3917U, + 0xc49357c4U, 0xa755f2a7U, 0x7efc827eU, 0x3d7a473dU, + 0x64c8ac64U, 0x5dbae75dU, 0x19322b19U, 0x73e69573U, + 0x60c0a060U, 0x81199881U, 0x4f9ed14fU, 0xdca37fdcU, + 0x22446622U, 0x2a547e2aU, 0x903bab90U, 0x880b8388U, + 0x468cca46U, 0xeec729eeU, 0xb86bd3b8U, 0x14283c14U, + 0xdea779deU, 0x5ebce25eU, 0x0b161d0bU, 0xdbad76dbU, + 0xe0db3be0U, 0x32645632U, 0x3a744e3aU, 0x0a141e0aU, + 0x4992db49U, 0x060c0a06U, 0x24486c24U, 0x5cb8e45cU, + 0xc29f5dc2U, 0xd3bd6ed3U, 0xac43efacU, 0x62c4a662U, + 0x9139a891U, 0x9531a495U, 0xe4d337e4U, 0x79f28b79U, + 0xe7d532e7U, 0xc88b43c8U, 0x376e5937U, 0x6ddab76dU, + 0x8d018c8dU, 0xd5b164d5U, 0x4e9cd24eU, 0xa949e0a9U, + 0x6cd8b46cU, 0x56acfa56U, 0xf4f307f4U, 0xeacf25eaU, + 0x65caaf65U, 0x7af48e7aU, 0xae47e9aeU, 0x08101808U, + 0xba6fd5baU, 0x78f08878U, 0x254a6f25U, 0x2e5c722eU, + 0x1c38241cU, 0xa657f1a6U, 0xb473c7b4U, 0xc69751c6U, + 0xe8cb23e8U, 0xdda17cddU, 0x74e89c74U, 0x1f3e211fU, + 0x4b96dd4bU, 0xbd61dcbdU, 0x8b0d868bU, 0x8a0f858aU, + 0x70e09070U, 0x3e7c423eU, 0xb571c4b5U, 0x66ccaa66U, + 0x4890d848U, 0x03060503U, 0xf6f701f6U, 0x0e1c120eU, + 0x61c2a361U, 0x356a5f35U, 0x57aef957U, 0xb969d0b9U, + 0x86179186U, 0xc19958c1U, 0x1d3a271dU, 0x9e27b99eU, + 0xe1d938e1U, 0xf8eb13f8U, 0x982bb398U, 0x11223311U, + 0x69d2bb69U, 0xd9a970d9U, 0x8e07898eU, 0x9433a794U, + 0x9b2db69bU, 0x1e3c221eU, 0x87159287U, 0xe9c920e9U, + 0xce8749ceU, 0x55aaff55U, 0x28507828U, 0xdfa57adfU, + 0x8c038f8cU, 0xa159f8a1U, 0x89098089U, 0x0d1a170dU, + 0xbf65dabfU, 0xe6d731e6U, 0x4284c642U, 0x68d0b868U, + 0x4182c341U, 0x9929b099U, 0x2d5a772dU, 0x0f1e110fU, + 0xb07bcbb0U, 0x54a8fc54U, 0xbb6dd6bbU, 0x162c3a16U, + 0xc6a56363U, 0xf8847c7cU, 0xee997777U, 0xf68d7b7bU, + 0xff0df2f2U, 0xd6bd6b6bU, 0xdeb16f6fU, 0x9154c5c5U, + 0x60503030U, 0x02030101U, 0xcea96767U, 0x567d2b2bU, + 0xe719fefeU, 0xb562d7d7U, 0x4de6ababU, 0xec9a7676U, + 0x8f45cacaU, 0x1f9d8282U, 0x8940c9c9U, 0xfa877d7dU, + 0xef15fafaU, 0xb2eb5959U, 0x8ec94747U, 0xfb0bf0f0U, + 0x41ecadadU, 0xb367d4d4U, 0x5ffda2a2U, 0x45eaafafU, + 0x23bf9c9cU, 0x53f7a4a4U, 0xe4967272U, 0x9b5bc0c0U, + 0x75c2b7b7U, 0xe11cfdfdU, 0x3dae9393U, 0x4c6a2626U, + 0x6c5a3636U, 0x7e413f3fU, 0xf502f7f7U, 0x834fccccU, + 0x685c3434U, 0x51f4a5a5U, 0xd134e5e5U, 0xf908f1f1U, + 0xe2937171U, 0xab73d8d8U, 0x62533131U, 0x2a3f1515U, + 0x080c0404U, 0x9552c7c7U, 0x46652323U, 0x9d5ec3c3U, + 0x30281818U, 0x37a19696U, 0x0a0f0505U, 0x2fb59a9aU, + 0x0e090707U, 0x24361212U, 0x1b9b8080U, 0xdf3de2e2U, + 0xcd26ebebU, 0x4e692727U, 0x7fcdb2b2U, 0xea9f7575U, + 0x121b0909U, 0x1d9e8383U, 0x58742c2cU, 0x342e1a1aU, + 0x362d1b1bU, 0xdcb26e6eU, 0xb4ee5a5aU, 0x5bfba0a0U, + 0xa4f65252U, 0x764d3b3bU, 0xb761d6d6U, 0x7dceb3b3U, + 0x527b2929U, 0xdd3ee3e3U, 0x5e712f2fU, 0x13978484U, + 0xa6f55353U, 0xb968d1d1U, 0x00000000U, 0xc12cededU, + 0x40602020U, 0xe31ffcfcU, 0x79c8b1b1U, 0xb6ed5b5bU, + 0xd4be6a6aU, 0x8d46cbcbU, 0x67d9bebeU, 0x724b3939U, + 0x94de4a4aU, 0x98d44c4cU, 0xb0e85858U, 0x854acfcfU, + 0xbb6bd0d0U, 0xc52aefefU, 0x4fe5aaaaU, 0xed16fbfbU, + 0x86c54343U, 0x9ad74d4dU, 0x66553333U, 0x11948585U, + 0x8acf4545U, 0xe910f9f9U, 0x04060202U, 0xfe817f7fU, + 0xa0f05050U, 0x78443c3cU, 0x25ba9f9fU, 0x4be3a8a8U, + 0xa2f35151U, 0x5dfea3a3U, 0x80c04040U, 0x058a8f8fU, + 0x3fad9292U, 0x21bc9d9dU, 0x70483838U, 0xf104f5f5U, + 0x63dfbcbcU, 0x77c1b6b6U, 0xaf75dadaU, 0x42632121U, + 0x20301010U, 0xe51affffU, 0xfd0ef3f3U, 0xbf6dd2d2U, + 0x814ccdcdU, 0x18140c0cU, 0x26351313U, 0xc32fececU, + 0xbee15f5fU, 0x35a29797U, 0x88cc4444U, 0x2e391717U, + 0x9357c4c4U, 0x55f2a7a7U, 0xfc827e7eU, 0x7a473d3dU, + 0xc8ac6464U, 0xbae75d5dU, 0x322b1919U, 0xe6957373U, + 0xc0a06060U, 0x19988181U, 0x9ed14f4fU, 0xa37fdcdcU, + 0x44662222U, 0x547e2a2aU, 0x3bab9090U, 0x0b838888U, + 0x8cca4646U, 0xc729eeeeU, 0x6bd3b8b8U, 0x283c1414U, + 0xa779dedeU, 0xbce25e5eU, 0x161d0b0bU, 0xad76dbdbU, + 0xdb3be0e0U, 0x64563232U, 0x744e3a3aU, 0x141e0a0aU, + 0x92db4949U, 0x0c0a0606U, 0x486c2424U, 0xb8e45c5cU, + 0x9f5dc2c2U, 0xbd6ed3d3U, 0x43efacacU, 0xc4a66262U, + 0x39a89191U, 0x31a49595U, 0xd337e4e4U, 0xf28b7979U, + 0xd532e7e7U, 0x8b43c8c8U, 0x6e593737U, 0xdab76d6dU, + 0x018c8d8dU, 0xb164d5d5U, 0x9cd24e4eU, 0x49e0a9a9U, + 0xd8b46c6cU, 0xacfa5656U, 0xf307f4f4U, 0xcf25eaeaU, + 0xcaaf6565U, 0xf48e7a7aU, 0x47e9aeaeU, 0x10180808U, + 0x6fd5babaU, 0xf0887878U, 0x4a6f2525U, 0x5c722e2eU, + 0x38241c1cU, 0x57f1a6a6U, 0x73c7b4b4U, 0x9751c6c6U, + 0xcb23e8e8U, 0xa17cddddU, 0xe89c7474U, 0x3e211f1fU, + 0x96dd4b4bU, 0x61dcbdbdU, 0x0d868b8bU, 0x0f858a8aU, + 0xe0907070U, 0x7c423e3eU, 0x71c4b5b5U, 0xccaa6666U, + 0x90d84848U, 0x06050303U, 0xf701f6f6U, 0x1c120e0eU, + 0xc2a36161U, 0x6a5f3535U, 0xaef95757U, 0x69d0b9b9U, + 0x17918686U, 0x9958c1c1U, 0x3a271d1dU, 0x27b99e9eU, + 0xd938e1e1U, 0xeb13f8f8U, 0x2bb39898U, 0x22331111U, + 0xd2bb6969U, 0xa970d9d9U, 0x07898e8eU, 0x33a79494U, + 0x2db69b9bU, 0x3c221e1eU, 0x15928787U, 0xc920e9e9U, + 0x8749ceceU, 0xaaff5555U, 0x50782828U, 0xa57adfdfU, + 0x038f8c8cU, 0x59f8a1a1U, 0x09808989U, 0x1a170d0dU, + 0x65dabfbfU, 0xd731e6e6U, 0x84c64242U, 0xd0b86868U, + 0x82c34141U, 0x29b09999U, 0x5a772d2dU, 0x1e110f0fU, + 0x7bcbb0b0U, 0xa8fc5454U, 0x6dd6bbbbU, 0x2c3a1616U +}; + +#define t_fn0(x) (sharedMemory[ (x)]) +#define t_fn1(x) (sharedMemory[256 + (x)]) +#define t_fn2(x) (sharedMemory[512 + (x)]) +#define t_fn3(x) (sharedMemory[768 + (x)]) + + +#define round(dummy,y,x,k) \ + y[0] = (k)[0] ^ (t_fn0(x[0] & 0xff) ^ t_fn1((x[1] >> 8) & 0xff) ^ t_fn2((x[2] >> 16) & 0xff) ^ t_fn3((x[3] >> 24))); \ + y[1] = (k)[1] ^ (t_fn0(x[1] & 0xff) ^ t_fn1((x[2] >> 8) & 0xff) ^ t_fn2((x[3] >> 16) & 0xff) ^ t_fn3((x[0] >> 24))); \ + y[2] = (k)[2] ^ (t_fn0(x[2] & 0xff) ^ t_fn1((x[3] >> 8) & 0xff) ^ t_fn2((x[0] >> 16) & 0xff) ^ t_fn3((x[1] >> 24))); \ + y[3] = (k)[3] ^ (t_fn0(x[3] & 0xff) ^ t_fn1((x[0] >> 8) & 0xff) ^ t_fn2((x[1] >> 16) & 0xff) ^ t_fn3((x[2] >> 24) )); + +__device__ __forceinline__ static void cn_aes_single_round(uint32_t * __restrict__ sharedMemory, const uint32_t * __restrict__ in, uint32_t * __restrict__ out, const uint32_t * __restrict__ expandedKey) +{ + round(sharedMemory, out, in, expandedKey); +} + +__device__ __forceinline__ static void cn_aes_pseudo_round_mut(const uint32_t * __restrict__ sharedMemory, uint32_t * __restrict__ val, const uint32_t * __restrict__ expandedKey) +{ + uint32_t b1[4]; + round(sharedMemory, b1, val, expandedKey); + round(sharedMemory, val, b1, expandedKey + 1 * N_COLS); + round(sharedMemory, b1, val, expandedKey + 2 * N_COLS); + round(sharedMemory, val, b1, expandedKey + 3 * N_COLS); + round(sharedMemory, b1, val, expandedKey + 4 * N_COLS); + round(sharedMemory, val, b1, expandedKey + 5 * N_COLS); + round(sharedMemory, b1, val, expandedKey + 6 * N_COLS); + round(sharedMemory, val, b1, expandedKey + 7 * N_COLS); + round(sharedMemory, b1, val, expandedKey + 8 * N_COLS); + round(sharedMemory, val, b1, expandedKey + 9 * N_COLS); +} + +__device__ __forceinline__ static void cn_aes_gpu_init(uint32_t *sharedMemory) +{ + for(int i = threadIdx.x; i < 1024; i += blockDim.x) + sharedMemory[i] = d_t_fn[i]; +} diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_blake.hpp b/xmrstak/backend/nvidia/nvcc_code/cuda_blake.hpp new file mode 100644 index 0000000..07ae169 --- /dev/null +++ b/xmrstak/backend/nvidia/nvcc_code/cuda_blake.hpp @@ -0,0 +1,193 @@ +#pragma once + +typedef struct { + uint32_t h[8], s[4], t[2]; + int buflen, nullt; + uint8_t buf[64]; +} blake_state; + +#define U8TO32(p) \ + (((uint32_t)((p)[0]) << 24) | ((uint32_t)((p)[1]) << 16) | \ + ((uint32_t)((p)[2]) << 8) | ((uint32_t)((p)[3]) )) + +#define U32TO8(p, v) \ + (p)[0] = (uint8_t)((v) >> 24); (p)[1] = (uint8_t)((v) >> 16); \ + (p)[2] = (uint8_t)((v) >> 8); (p)[3] = (uint8_t)((v) ); + +#define BLAKE_ROT(x,n) ROTR32(x, n) +#define BLAKE_G(a,b,c,d,e) \ + v[a] += (m[d_blake_sigma[i][e]] ^ d_blake_cst[d_blake_sigma[i][e+1]]) + v[b]; \ + v[d] = BLAKE_ROT(v[d] ^ v[a],16); \ + v[c] += v[d]; \ + v[b] = BLAKE_ROT(v[b] ^ v[c],12); \ + v[a] += (m[d_blake_sigma[i][e+1]] ^ d_blake_cst[d_blake_sigma[i][e]])+v[b]; \ + v[d] = BLAKE_ROT(v[d] ^ v[a], 8); \ + v[c] += v[d]; \ + v[b] = BLAKE_ROT(v[b] ^ v[c], 7); + +__constant__ uint8_t d_blake_sigma[14][16] = +{ + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, + {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3}, + {11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4}, + {7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8}, + {9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13}, + {2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9}, + {12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11}, + {13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10}, + {6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5}, + {10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0}, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, + {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3}, + {11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4}, + {7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8} +}; +__constant__ uint32_t d_blake_cst[16] += { + 0x243F6A88, 0x85A308D3, 0x13198A2E, 0x03707344, + 0xA4093822, 0x299F31D0, 0x082EFA98, 0xEC4E6C89, + 0x452821E6, 0x38D01377, 0xBE5466CF, 0x34E90C6C, + 0xC0AC29B7, 0xC97C50DD, 0x3F84D5B5, 0xB5470917 +}; + +__device__ void cn_blake_compress(blake_state * __restrict__ S, const uint8_t * __restrict__ block) +{ + uint32_t v[16], m[16], i; + + for (i = 0; i < 16; ++i) m[i] = U8TO32(block + i * 4); + for (i = 0; i < 8; ++i) v[i] = S->h[i]; + v[ 8] = S->s[0] ^ 0x243F6A88; + v[ 9] = S->s[1] ^ 0x85A308D3; + v[10] = S->s[2] ^ 0x13198A2E; + v[11] = S->s[3] ^ 0x03707344; + v[12] = 0xA4093822; + v[13] = 0x299F31D0; + v[14] = 0x082EFA98; + v[15] = 0xEC4E6C89; + + if (S->nullt == 0) + { + v[12] ^= S->t[0]; + v[13] ^= S->t[0]; + v[14] ^= S->t[1]; + v[15] ^= S->t[1]; + } + + for (i = 0; i < 14; ++i) + { + BLAKE_G(0, 4, 8, 12, 0); + BLAKE_G(1, 5, 9, 13, 2); + BLAKE_G(2, 6, 10, 14, 4); + BLAKE_G(3, 7, 11, 15, 6); + BLAKE_G(3, 4, 9, 14, 14); + BLAKE_G(2, 7, 8, 13, 12); + BLAKE_G(0, 5, 10, 15, 8); + BLAKE_G(1, 6, 11, 12, 10); + } + + for (i = 0; i < 16; ++i) S->h[i % 8] ^= v[i]; + for (i = 0; i < 8; ++i) S->h[i] ^= S->s[i % 4]; +} + +__device__ void cn_blake_update(blake_state * __restrict__ S, const uint8_t * __restrict__ data, uint64_t datalen) +{ + int left = S->buflen >> 3; + int fill = 64 - left; + + if (left && (((datalen >> 3) & 0x3F) >= (unsigned) fill)) + { + memcpy((void *) (S->buf + left), (void *) data, fill); + S->t[0] += 512; + if (S->t[0] == 0) S->t[1]++; + cn_blake_compress(S, S->buf); + data += fill; + datalen -= (fill << 3); + left = 0; + } + + while (datalen >= 512) + { + S->t[0] += 512; + if (S->t[0] == 0) S->t[1]++; + cn_blake_compress(S, data); + data += 64; + datalen -= 512; + } + + if (datalen > 0) + { + memcpy((void *) (S->buf + left), (void *) data, datalen >> 3); + S->buflen = (left << 3) + datalen; + } + else + { + S->buflen = 0; + } +} + +__device__ void cn_blake_final(blake_state * __restrict__ S, uint8_t * __restrict__ digest) +{ + const uint8_t padding[] = + { + 0x80,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 + }; + + uint8_t pa = 0x81, pb = 0x01; + uint8_t msglen[8]; + uint32_t lo = S->t[0] + S->buflen, hi = S->t[1]; + if (lo < (unsigned) S->buflen) hi++; + U32TO8(msglen + 0, hi); + U32TO8(msglen + 4, lo); + + if (S->buflen == 440) + { + S->t[0] -= 8; + cn_blake_update(S, &pa, 8); + } + else + { + if (S->buflen < 440) + { + if (S->buflen == 0) S->nullt = 1; + S->t[0] -= 440 - S->buflen; + cn_blake_update(S, padding, 440 - S->buflen); + } + else + { + S->t[0] -= 512 - S->buflen; + cn_blake_update(S, padding, 512 - S->buflen); + S->t[0] -= 440; + cn_blake_update(S, padding + 1, 440); + S->nullt = 1; + } + cn_blake_update(S, &pb, 8); + S->t[0] -= 8; + } + S->t[0] -= 64; + cn_blake_update(S, msglen, 64); + + U32TO8(digest + 0, S->h[0]); + U32TO8(digest + 4, S->h[1]); + U32TO8(digest + 8, S->h[2]); + U32TO8(digest + 12, S->h[3]); + U32TO8(digest + 16, S->h[4]); + U32TO8(digest + 20, S->h[5]); + U32TO8(digest + 24, S->h[6]); + U32TO8(digest + 28, S->h[7]); +} + +__device__ void cn_blake(const uint8_t * __restrict__ in, uint64_t inlen, uint8_t * __restrict__ out) +{ + blake_state bs; + blake_state *S = (blake_state *)&bs; + + S->h[0] = 0x6A09E667; S->h[1] = 0xBB67AE85; S->h[2] = 0x3C6EF372; + S->h[3] = 0xA54FF53A; S->h[4] = 0x510E527F; S->h[5] = 0x9B05688C; + S->h[6] = 0x1F83D9AB; S->h[7] = 0x5BE0CD19; + S->t[0] = S->t[1] = S->buflen = S->nullt = 0; + S->s[0] = S->s[1] = S->s[2] = S->s[3] = 0; + + cn_blake_update(S, (uint8_t *)in, inlen * 8); + cn_blake_final(S, (uint8_t *)out); +} diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu b/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu new file mode 100644 index 0000000..7590cf5 --- /dev/null +++ b/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu @@ -0,0 +1,343 @@ +#include <stdio.h> +#include <stdint.h> +#include <string.h> +#include <cuda.h> +#include <cuda_runtime.h> + +#ifdef _WIN32 +#include <windows.h> +extern "C" void compat_usleep(uint64_t waitTime) +{ + if (waitTime > 0) + { + if (waitTime > 100) + { + // use a waitable timer for larger intervals > 0.1ms + + HANDLE timer; + LARGE_INTEGER ft; + + ft.QuadPart = -(10*waitTime); // Convert to 100 nanosecond interval, negative value indicates relative time + + timer = CreateWaitableTimer(NULL, TRUE, NULL); + SetWaitableTimer(timer, &ft, 0, NULL, NULL, 0); + WaitForSingleObject(timer, INFINITE); + CloseHandle(timer); + } + else + { + // use a polling loop for short intervals <= 100ms + + LARGE_INTEGER perfCnt, start, now; + __int64 elapsed; + + QueryPerformanceFrequency(&perfCnt); + QueryPerformanceCounter(&start); + do { + SwitchToThread(); + QueryPerformanceCounter((LARGE_INTEGER*) &now); + elapsed = (__int64)((now.QuadPart - start.QuadPart) / (float)perfCnt.QuadPart * 1000 * 1000); + } while ( elapsed < waitTime ); + } + } +} +#else +#include <unistd.h> +extern "C" void compat_usleep(uint64_t waitTime) +{ + usleep(waitTime); +} +#endif + +#include "cryptonight.h" +#include "cuda_extra.h" +#include "cuda_aes.hpp" +#include "cuda_device.hpp" + +/* sm_2X is limited to 2GB due to the small TLB + * therefore we never use 64bit indices + */ +#if defined(XMR_STAK_LARGEGRID) && (__CUDA_ARCH__ >= 300) +typedef uint64_t IndexType; +#else +typedef int IndexType; +#endif + +__device__ __forceinline__ uint64_t cuda_mul128( uint64_t multiplier, uint64_t multiplicand, uint64_t* product_hi ) +{ + *product_hi = __umul64hi( multiplier, multiplicand ); + return (multiplier * multiplicand ); +} + +template< typename T > +__device__ __forceinline__ T loadGlobal64( T * const addr ) +{ + T x; + asm volatile( "ld.global.cg.u64 %0, [%1];" : "=l"( x ) : "l"( addr ) ); + return x; +} + +template< typename T > +__device__ __forceinline__ T loadGlobal32( T * const addr ) +{ + T x; + asm volatile( "ld.global.cg.u32 %0, [%1];" : "=r"( x ) : "l"( addr ) ); + return x; +} + + +template< typename T > +__device__ __forceinline__ void storeGlobal32( T* addr, T const & val ) +{ + asm volatile( "st.global.cg.u32 [%0], %1;" : : "l"( addr ), "r"( val ) ); +} + +__global__ void cryptonight_core_gpu_phase1( int threads, int bfactor, int partidx, uint32_t * __restrict__ long_state, uint32_t * __restrict__ ctx_state, uint32_t * __restrict__ ctx_key1 ) +{ + __shared__ uint32_t sharedMemory[1024]; + + cn_aes_gpu_init( sharedMemory ); + __syncthreads( ); + + const int thread = ( blockDim.x * blockIdx.x + threadIdx.x ) >> 3; + const int sub = ( threadIdx.x & 7 ) << 2; + + const int batchsize = 0x80000 >> bfactor; + const int start = partidx * batchsize; + const int end = start + batchsize; + + if ( thread >= threads ) + return; + + uint32_t key[40], text[4]; + + MEMCPY8( key, ctx_key1 + thread * 40, 20 ); + + if( partidx == 0 ) + { + // first round + MEMCPY8( text, ctx_state + thread * 50 + sub + 16, 2 ); + } + else + { + // load previous text data + MEMCPY8( text, &long_state[( (uint64_t) thread << 19 ) + sub + start - 32], 2 ); + } + __syncthreads( ); + for ( int i = start; i < end; i += 32 ) + { + cn_aes_pseudo_round_mut( sharedMemory, text, key ); + MEMCPY8(&long_state[((uint64_t) thread << 19) + (sub + i)], text, 2); + } +} + +/** avoid warning `unused parameter` */ +template< typename T > +__forceinline__ __device__ void unusedVar( const T& ) +{ +} + +/** shuffle data for + * + * - this method can be used with all compute architectures + * - for <sm_30 shared memory is needed + * + * @param ptr pointer to shared memory, size must be `threadIdx.x * sizeof(uint32_t)` + * value can be NULL for compute architecture >=sm_30 + * @param sub thread number within the group, range [0;4) + * @param value value to share with other threads within the group + * @param src thread number within the group from where the data is read, range [0;4) + */ +__forceinline__ __device__ uint32_t shuffle(volatile uint32_t* ptr,const uint32_t sub,const int val,const uint32_t src) +{ +#if( __CUDA_ARCH__ < 300 ) + ptr[sub] = val; + return ptr[src&3]; +#else + unusedVar( ptr ); + unusedVar( sub ); + return __shfl( val, src, 4 ); +#endif +} + +#ifdef XMR_STAK_THREADS +__launch_bounds__( XMR_STAK_THREADS * 4 ) +#endif +__global__ void cryptonight_core_gpu_phase2( int threads, int bfactor, int partidx, uint32_t * d_long_state, uint32_t * d_ctx_a, uint32_t * d_ctx_b ) +{ + __shared__ uint32_t sharedMemory[1024]; + + cn_aes_gpu_init( sharedMemory ); + + __syncthreads( ); + + const int thread = ( blockDim.x * blockIdx.x + threadIdx.x ) >> 2; + const int sub = threadIdx.x & 3; + const int sub2 = sub & 2; + +#if( __CUDA_ARCH__ < 300 ) + extern __shared__ uint32_t shuffleMem[]; + volatile uint32_t* sPtr = (volatile uint32_t*)(shuffleMem + (threadIdx.x& 0xFFFFFFFC)); +#else + volatile uint32_t* sPtr = NULL; +#endif + if ( thread >= threads ) + return; + + int i, k; + uint32_t j; + const int batchsize = ITER >> ( 2 + bfactor ); + const int start = partidx * batchsize; + const int end = start + batchsize; + uint32_t * long_state = &d_long_state[(IndexType) thread << 19]; + uint32_t * ctx_a = d_ctx_a + thread * 4; + uint32_t * ctx_b = d_ctx_b + thread * 4; + uint32_t a, d[2]; + uint32_t t1[2], t2[2], res; + + a = ctx_a[sub]; + d[1] = ctx_b[sub]; + #pragma unroll 2 + for ( i = start; i < end; ++i ) + { + #pragma unroll 2 + for ( int x = 0; x < 2; ++x ) + { + j = ( ( shuffle(sPtr,sub, a, 0) & 0x1FFFF0 ) >> 2 ) + sub; + + const uint32_t x_0 = loadGlobal32<uint32_t>( long_state + j ); + const uint32_t x_1 = shuffle(sPtr,sub, x_0, sub + 1); + const uint32_t x_2 = shuffle(sPtr,sub, x_0, sub + 2); + const uint32_t x_3 = shuffle(sPtr,sub, x_0, sub + 3); + d[x] = a ^ + t_fn0( x_0 & 0xff ) ^ + t_fn1( (x_1 >> 8) & 0xff ) ^ + t_fn2( (x_2 >> 16) & 0xff ) ^ + t_fn3( ( x_3 >> 24 ) ); + + + //XOR_BLOCKS_DST(c, b, &long_state[j]); + t1[0] = shuffle(sPtr,sub, d[x], 0); + //long_state[j] = d[0] ^ d[1]; + storeGlobal32( long_state + j, d[0] ^ d[1] ); + + //MUL_SUM_XOR_DST(c, a, &long_state[((uint32_t *)c)[0] & 0x1FFFF0]); + j = ( ( *t1 & 0x1FFFF0 ) >> 2 ) + sub; + + uint32_t yy[2]; + *( (uint64_t*) yy ) = loadGlobal64<uint64_t>( ( (uint64_t *) long_state )+( j >> 1 ) ); + uint32_t zz[2]; + zz[0] = shuffle(sPtr,sub, yy[0], 0); + zz[1] = shuffle(sPtr,sub, yy[1], 0); + + t1[1] = shuffle(sPtr,sub, d[x], 1); + #pragma unroll + for ( k = 0; k < 2; k++ ) + t2[k] = shuffle(sPtr,sub, a, k + sub2); + + *( (uint64_t *) t2 ) += sub2 ? ( *( (uint64_t *) t1 ) * *( (uint64_t*) zz ) ) : __umul64hi( *( (uint64_t *) t1 ), *( (uint64_t*) zz ) ); + + res = *( (uint64_t *) t2 ) >> ( sub & 1 ? 32 : 0 ); + + storeGlobal32( long_state + j, res ); + a = ( sub & 1 ? yy[1] : yy[0] ) ^ res; + } + } + + if ( bfactor > 0 ) + { + ctx_a[sub] = a; + ctx_b[sub] = d[1]; + } +} + +__global__ void cryptonight_core_gpu_phase3( int threads, int bfactor, int partidx, const uint32_t * __restrict__ long_state, uint32_t * __restrict__ d_ctx_state, uint32_t * __restrict__ d_ctx_key2 ) +{ + __shared__ uint32_t sharedMemory[1024]; + + cn_aes_gpu_init( sharedMemory ); + __syncthreads( ); + + int thread = ( blockDim.x * blockIdx.x + threadIdx.x ) >> 3; + int sub = ( threadIdx.x & 7 ) << 2; + + const int batchsize = 0x80000 >> bfactor; + const int start = partidx * batchsize; + const int end = start + batchsize; + + if ( thread >= threads ) + return; + + uint32_t key[40], text[4]; + MEMCPY8( key, d_ctx_key2 + thread * 40, 20 ); + MEMCPY8( text, d_ctx_state + thread * 50 + sub + 16, 2 ); + + __syncthreads( ); + for ( int i = start; i < end; i += 32 ) + { +#pragma unroll + for ( int j = 0; j < 4; ++j ) + text[j] ^= long_state[((IndexType) thread << 19) + (sub + i + j)]; + + cn_aes_pseudo_round_mut( sharedMemory, text, key ); + } + + MEMCPY8( d_ctx_state + thread * 50 + sub + 16, text, 2 ); +} + +extern "C" void cryptonight_core_cpu_hash(nvid_ctx* ctx) +{ + dim3 grid( ctx->device_blocks ); + dim3 block( ctx->device_threads ); + dim3 block4( ctx->device_threads << 2 ); + dim3 block8( ctx->device_threads << 3 ); + + int partcount = 1 << ctx->device_bfactor; + + /* bfactor for phase 1 and 3 + * + * phase 1 and 3 consume less time than phase 2, therefore we begin with the + * kernel splitting if the user defined a `bfactor >= 5` + */ + int bfactorOneThree = ctx->device_bfactor - 4; + if( bfactorOneThree < 0 ) + bfactorOneThree = 0; + + int partcountOneThree = 1 << bfactorOneThree; + + for ( int i = 0; i < partcountOneThree; i++ ) + { + CUDA_CHECK_KERNEL(ctx->device_id, cryptonight_core_gpu_phase1<<< grid, block8 >>>( ctx->device_blocks*ctx->device_threads, + bfactorOneThree, i, + ctx->d_long_state, ctx->d_ctx_state, ctx->d_ctx_key1 )); + + if ( partcount > 1 && ctx->device_bsleep > 0) compat_usleep( ctx->device_bsleep ); + } + if ( partcount > 1 && ctx->device_bsleep > 0) compat_usleep( ctx->device_bsleep ); + + for ( int i = 0; i < partcount; i++ ) + { + CUDA_CHECK_KERNEL(ctx->device_id, cryptonight_core_gpu_phase2<<< + grid, + block4, + block4.x * sizeof(uint32_t) * static_cast< int >( ctx->device_arch[0] < 3 ) + >>>( + ctx->device_blocks*ctx->device_threads, + ctx->device_bfactor, + i, + ctx->d_long_state, + ctx->d_ctx_a, + ctx->d_ctx_b + )); + + if ( partcount > 1 && ctx->device_bsleep > 0) compat_usleep( ctx->device_bsleep ); + } + + for ( int i = 0; i < partcountOneThree; i++ ) + { + CUDA_CHECK_KERNEL(ctx->device_id, cryptonight_core_gpu_phase3<<< grid, block8 >>>( ctx->device_blocks*ctx->device_threads, + bfactorOneThree, i, + ctx->d_long_state, + ctx->d_ctx_state, ctx->d_ctx_key2 )); + } +} diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_device.hpp b/xmrstak/backend/nvidia/nvcc_code/cuda_device.hpp new file mode 100644 index 0000000..078c165 --- /dev/null +++ b/xmrstak/backend/nvidia/nvcc_code/cuda_device.hpp @@ -0,0 +1,30 @@ + +#pragma once + +#include <cuda_runtime.h> +#include <stdexcept> +#include <iostream> +#include <string> + +/** execute and check a CUDA api command + * + * @param id gpu id (thread id) + * @param ... CUDA api command + */ +#define CUDA_CHECK(id, ...) { \ + cudaError_t error = __VA_ARGS__; \ + if(error!=cudaSuccess){ \ + std::cerr << "[CUDA] Error gpu " << id << ": <" << __FILE__ << ">:" << __LINE__ << std::endl; \ + throw std::runtime_error(std::string("[CUDA] Error: ") + std::string(cudaGetErrorString(error))); \ + } \ +} \ +( (void) 0 ) + +/** execute and check a CUDA kernel + * + * @param id gpu id (thread id) + * @param ... CUDA kernel call + */ +#define CUDA_CHECK_KERNEL(id, ...) \ + __VA_ARGS__; \ + CUDA_CHECK(id, cudaGetLastError()) diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu new file mode 100644 index 0000000..7052bc8 --- /dev/null +++ b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu @@ -0,0 +1,367 @@ +#include <stdio.h> +#include <stdint.h> +#include <string.h> +#include <cuda.h> +#include <cuda_runtime.h> +#include <device_functions.hpp> +#include <algorithm> + +#ifdef __CUDACC__ +__constant__ +#else +const +#endif +uint64_t keccakf_rndc[24] ={ + 0x0000000000000001, 0x0000000000008082, 0x800000000000808a, + 0x8000000080008000, 0x000000000000808b, 0x0000000080000001, + 0x8000000080008081, 0x8000000000008009, 0x000000000000008a, + 0x0000000000000088, 0x0000000080008009, 0x000000008000000a, + 0x000000008000808b, 0x800000000000008b, 0x8000000000008089, + 0x8000000000008003, 0x8000000000008002, 0x8000000000000080, + 0x000000000000800a, 0x800000008000000a, 0x8000000080008081, + 0x8000000000008080, 0x0000000080000001, 0x8000000080008008 +}; + +typedef unsigned char BitSequence; +typedef unsigned long long DataLength; + +#include "cryptonight.h" +#include "cuda_extra.h" +#include "cuda_keccak.hpp" +#include "cuda_blake.hpp" +#include "cuda_groestl.hpp" +#include "cuda_jh.hpp" +#include "cuda_skein.hpp" +#include "cuda_device.hpp" + +__constant__ uint8_t d_sub_byte[16][16] ={ + {0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 }, + {0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0 }, + {0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15 }, + {0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75 }, + {0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84 }, + {0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf }, + {0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8 }, + {0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2 }, + {0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73 }, + {0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb }, + {0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79 }, + {0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08 }, + {0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a }, + {0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e }, + {0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf }, + {0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 } +}; + +__device__ __forceinline__ void cryptonight_aes_set_key( uint32_t * __restrict__ key, const uint32_t * __restrict__ data ) +{ + int i, j; + uint8_t temp[4]; + const uint32_t aes_gf[] = { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36 }; + + MEMSET4( key, 0, 40 ); + MEMCPY4( key, data, 8 ); + +#pragma unroll + for ( i = 8; i < 40; i++ ) + { + *(uint32_t *) temp = key[i - 1]; + if ( i % 8 == 0 ) + { + *(uint32_t *) temp = ROTR32( *(uint32_t *) temp, 8 ); + for ( j = 0; j < 4; j++ ) + temp[j] = d_sub_byte[( temp[j] >> 4 ) & 0x0f][temp[j] & 0x0f]; + *(uint32_t *) temp ^= aes_gf[i / 8 - 1]; + } + else + { + if ( i % 8 == 4 ) + { +#pragma unroll + for ( j = 0; j < 4; j++ ) + temp[j] = d_sub_byte[( temp[j] >> 4 ) & 0x0f][temp[j] & 0x0f]; + } + } + + key[i] = key[( i - 8 )] ^ *(uint32_t *) temp; + } +} + +__global__ void cryptonight_extra_gpu_prepare( int threads, uint32_t * __restrict__ d_input, uint32_t len, uint32_t startNonce, uint32_t * __restrict__ d_ctx_state, uint32_t * __restrict__ d_ctx_a, uint32_t * __restrict__ d_ctx_b, uint32_t * __restrict__ d_ctx_key1, uint32_t * __restrict__ d_ctx_key2 ) +{ + int thread = ( blockDim.x * blockIdx.x + threadIdx.x ); + + if ( thread >= threads ) + return; + + uint32_t ctx_state[50]; + uint32_t ctx_a[4]; + uint32_t ctx_b[4]; + uint32_t ctx_key1[40]; + uint32_t ctx_key2[40]; + uint32_t input[21]; + + memcpy( input, d_input, len ); + //*((uint32_t *)(((char *)input) + 39)) = startNonce + thread; + uint32_t nonce = startNonce + thread; + for ( int i = 0; i < sizeof (uint32_t ); ++i ) + ( ( (char *) input ) + 39 )[i] = ( (char*) ( &nonce ) )[i]; //take care of pointer alignment + + cn_keccak( (uint8_t *) input, len, (uint8_t *) ctx_state ); + cryptonight_aes_set_key( ctx_key1, ctx_state ); + cryptonight_aes_set_key( ctx_key2, ctx_state + 8 ); + XOR_BLOCKS_DST( ctx_state, ctx_state + 8, ctx_a ); + XOR_BLOCKS_DST( ctx_state + 4, ctx_state + 12, ctx_b ); + + memcpy( d_ctx_state + thread * 50, ctx_state, 50 * 4 ); + memcpy( d_ctx_a + thread * 4, ctx_a, 4 * 4 ); + memcpy( d_ctx_b + thread * 4, ctx_b, 4 * 4 ); + memcpy( d_ctx_key1 + thread * 40, ctx_key1, 40 * 4 ); + memcpy( d_ctx_key2 + thread * 40, ctx_key2, 40 * 4 ); +} + +__global__ void cryptonight_extra_gpu_final( int threads, uint64_t target, uint32_t* __restrict__ d_res_count, uint32_t * __restrict__ d_res_nonce, uint32_t * __restrict__ d_ctx_state ) +{ + const int thread = blockDim.x * blockIdx.x + threadIdx.x; + + if ( thread >= threads ) + return; + + int i; + uint32_t * __restrict__ ctx_state = d_ctx_state + thread * 50; + uint64_t hash[4]; + uint32_t state[50]; + +#pragma unroll + for ( i = 0; i < 50; i++ ) + state[i] = ctx_state[i]; + + cn_keccakf2( (uint64_t *) state ); + + switch ( ( (uint8_t *) state )[0] & 0x03 ) + { + case 0: + cn_blake( (const uint8_t *) state, 200, (uint8_t *) hash ); + break; + case 1: + cn_groestl( (const BitSequence *) state, 200, (BitSequence *) hash ); + break; + case 2: + cn_jh( (const BitSequence *) state, 200, (BitSequence *) hash ); + break; + case 3: + cn_skein( (const BitSequence *) state, 200, (BitSequence *) hash ); + break; + default: + break; + } + + // Note that comparison is equivalent to subtraction - we can't just compare 8 32-bit values + // and expect an accurate result for target > 32-bit without implementing carries + + if ( hash[3] < target ) + { + uint32_t idx = atomicInc( d_res_count, 0xFFFFFFFF ); + + if(idx < 10) + d_res_nonce[idx] = thread; + } +} + +extern "C" void cryptonight_extra_cpu_set_data( nvid_ctx* ctx, const void *data, uint32_t len ) +{ + ctx->inputlen = len; + CUDA_CHECK(ctx->device_id, cudaMemcpy( ctx->d_input, data, len, cudaMemcpyHostToDevice )); +} + +extern "C" int cryptonight_extra_cpu_init(nvid_ctx* ctx) +{ + cudaError_t err; + err = cudaSetDevice(ctx->device_id); + if(err != cudaSuccess) + { + printf("GPU %d: %s", ctx->device_id, cudaGetErrorString(err)); + return 0; + } + + cudaDeviceReset(); + cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync); + cudaDeviceSetCacheConfig(cudaFuncCachePreferL1); + + size_t wsize = ctx->device_blocks * ctx->device_threads; + CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_long_state, (size_t)MEMORY * wsize)); + CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_ctx_state, 50 * sizeof(uint32_t) * wsize)); + CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_ctx_key1, 40 * sizeof(uint32_t) * wsize)); + CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_ctx_key2, 40 * sizeof(uint32_t) * wsize)); + CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_ctx_text, 32 * sizeof(uint32_t) * wsize)); + CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_ctx_a, 4 * sizeof(uint32_t) * wsize)); + CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_ctx_b, 4 * sizeof(uint32_t) * wsize)); + CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_input, 21 * sizeof (uint32_t ) )); + CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_result_count, sizeof (uint32_t ) )); + CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_result_nonce, 10 * sizeof (uint32_t ) )); + return 1; +} + +extern "C" void cryptonight_extra_cpu_prepare(nvid_ctx* ctx, uint32_t startNonce) +{ + int threadsperblock = 128; + uint32_t wsize = ctx->device_blocks * ctx->device_threads; + + dim3 grid( ( wsize + threadsperblock - 1 ) / threadsperblock ); + dim3 block( threadsperblock ); + + CUDA_CHECK_KERNEL(ctx->device_id, cryptonight_extra_gpu_prepare<<<grid, block >>>( wsize, ctx->d_input, ctx->inputlen, startNonce, + ctx->d_ctx_state, ctx->d_ctx_a, ctx->d_ctx_b, ctx->d_ctx_key1, ctx->d_ctx_key2 )); +} + +extern "C" void cryptonight_extra_cpu_final(nvid_ctx* ctx, uint32_t startNonce, uint64_t target, uint32_t* rescount, uint32_t *resnonce) +{ + int threadsperblock = 128; + uint32_t wsize = ctx->device_blocks * ctx->device_threads; + + dim3 grid( ( wsize + threadsperblock - 1 ) / threadsperblock ); + dim3 block( threadsperblock ); + + CUDA_CHECK(ctx->device_id, cudaMemset( ctx->d_result_nonce, 0xFF, 10 * sizeof (uint32_t ) )); + CUDA_CHECK(ctx->device_id, cudaMemset( ctx->d_result_count, 0, sizeof (uint32_t ) )); + + CUDA_CHECK_KERNEL(ctx->device_id, cryptonight_extra_gpu_final<<<grid, block >>>( wsize, target, ctx->d_result_count, ctx->d_result_nonce, ctx->d_ctx_state )); + + CUDA_CHECK(ctx->device_id, cudaMemcpy( rescount, ctx->d_result_count, sizeof (uint32_t ), cudaMemcpyDeviceToHost )); + CUDA_CHECK(ctx->device_id, cudaMemcpy( resnonce, ctx->d_result_nonce, 10 * sizeof (uint32_t ), cudaMemcpyDeviceToHost )); + + /* There is only a 32bit limit for the counter on the device side + * therefore this value can be greater than 10, in that case limit rescount + * to 10 entries. + */ + if(*rescount > 10) + *rescount = 10; + for(int i=0; i < *rescount; i++) + resnonce[i] += startNonce; +} + +extern "C" int cuda_get_devicecount( int* deviceCount) +{ + cudaError_t err; + *deviceCount = 0; + err = cudaGetDeviceCount(deviceCount); + if(err != cudaSuccess) + { + if(err == cudaErrorNoDevice) + printf("ERROR: NVIDIA no CUDA device found!\n"); + else if(err == cudaErrorInsufficientDriver) + printf("WARNING: NVIDIA Insufficient driver!\n"); + else + printf("WARNING: NVIDIA Unable to query number of CUDA devices!\n"); + return 0; + } + + return 1; +} + +extern "C" int cuda_get_deviceinfo(nvid_ctx* ctx) +{ + cudaError_t err; + int version; + + err = cudaDriverGetVersion(&version); + if(err != cudaSuccess) + { + printf("Unable to query CUDA driver version! Is an nVidia driver installed?\n"); + return 0; + } + + if(version < CUDART_VERSION) + { + printf("Driver does not support CUDA %d.%d API! Update your nVidia driver!\n", CUDART_VERSION / 1000, (CUDART_VERSION % 1000) / 10); + return 0; + } + + int GPU_N; + if(cuda_get_devicecount(&GPU_N) == 0) + { + return 0; + } + + if(ctx->device_id >= GPU_N) + { + printf("Invalid device ID!\n"); + return 0; + } + + cudaDeviceProp props; + err = cudaGetDeviceProperties(&props, ctx->device_id); + if(err != cudaSuccess) + { + printf("\nGPU %d: %s\n%s line %d\n", ctx->device_id, cudaGetErrorString(err), __FILE__, __LINE__); + return 0; + } + + ctx->device_name = strdup(props.name); + ctx->device_mpcount = props.multiProcessorCount; + ctx->device_arch[0] = props.major; + ctx->device_arch[1] = props.minor; + + ctx->name = std::string(props.name); + + // set all evice option those marked as auto (-1) to a valid value + if(ctx->device_blocks == -1) + { + /* good values based of my experience + * - 3 * SMX count >=sm_30 + * - 2 * SMX count for <sm_30 + */ + ctx->device_blocks = props.multiProcessorCount * + ( props.major < 3 ? 2 : 3 ); + } + if(ctx->device_threads == -1) + { + /* sm_20 devices can only run 512 threads per cuda block + * `cryptonight_core_gpu_phase1` and `cryptonight_core_gpu_phase3` starts + * `8 * ctx->device_threads` threads per block + */ + ctx->device_threads = 64; + constexpr size_t byte2mib = 1024u * 1024u; + + // no limit by default 1TiB + size_t maxMemUsage = byte2mib * byte2mib; + if(props.major < 6) + { + // limit memory usage for GPUs before pascal + maxMemUsage = size_t(2048u) * byte2mib; + } + if(props.major == 2) + { + // limit memory usage for sm 20 GPUs + maxMemUsage = size_t(1024u) * byte2mib; + } + + size_t freeMemory = 0; + size_t totalMemory = 0; + CUDA_CHECK(ctx->device_id, cudaMemGetInfo(&freeMemory, &totalMemory)); + + ctx->total_device_memory = totalMemory; + ctx->free_device_memory = freeMemory; + + // keep 64MiB memory free (value is randomly chosen) + // 200byte are meta data memory (result nonce, ...) + size_t availableMem = freeMemory - (64u * 1024 * 1024) - 200u; + size_t limitedMemory = std::min(availableMem, maxMemUsage); + // up to 920bytes extra memory is used per thread for some kernel (lmem/local memory) + // 680bytes are extra meta data memory per hash + size_t perThread = size_t(MEMORY) + 740u + 680u; + size_t max_intensity = limitedMemory / perThread; + ctx->device_threads = max_intensity / ctx->device_blocks; + // use only odd number of threads + ctx->device_threads = ctx->device_threads & 0xFFFFFFFE; + + if(props.major == 2 && ctx->device_threads > 64) + { + // Fermi gpus only support 512 threads per block (we need start 4 * configured threads) + ctx->device_threads = 64; + } + + } + + return 1; +} diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.hpp b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.hpp new file mode 100644 index 0000000..3ccdcd6 --- /dev/null +++ b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.hpp @@ -0,0 +1,104 @@ +#pragma once + +#ifdef __INTELLISENSE__ +#define __CUDA_ARCH__ 520 +/* avoid red underlining */ + +struct uint3 +{ + unsigned int x, y, z; +}; + +struct uint3 threadIdx; +struct uint3 blockIdx; +struct uint3 blockDim; +#define __funnelshift_r(a,b,c) 1 +#define __syncthreads() +#define asm(x) +#define __shfl(a,b,c) 1 +#endif + +#define MEMORY (1 << 21) // 2 MiB / 2097152 B +#define ITER (1 << 20) // 1048576 +#define AES_BLOCK_SIZE 16 +#define AES_KEY_SIZE 32 +#define INIT_SIZE_BLK 8 +#define INIT_SIZE_BYTE (INIT_SIZE_BLK * AES_BLOCK_SIZE) // 128 B + +#define C32(x) ((uint32_t)(x ## U)) +#define T32(x) ((x) & C32(0xFFFFFFFF)) + +#if __CUDA_ARCH__ >= 350 +__forceinline__ __device__ uint64_t cuda_ROTL64(const uint64_t value, const int offset) +{ + uint2 result; + if(offset >= 32) + { + asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.x) : "r"(__double2loint(__longlong_as_double(value))), "r"(__double2hiint(__longlong_as_double(value))), "r"(offset)); + asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.y) : "r"(__double2hiint(__longlong_as_double(value))), "r"(__double2loint(__longlong_as_double(value))), "r"(offset)); + } + else + { + asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.x) : "r"(__double2hiint(__longlong_as_double(value))), "r"(__double2loint(__longlong_as_double(value))), "r"(offset)); + asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.y) : "r"(__double2loint(__longlong_as_double(value))), "r"(__double2hiint(__longlong_as_double(value))), "r"(offset)); + } + return __double_as_longlong(__hiloint2double(result.y, result.x)); +} +#define ROTL64(x, n) (cuda_ROTL64(x, n)) +#else +#define ROTL64(x, n) (((x) << (n)) | ((x) >> (64 - (n)))) +#endif + +#if __CUDA_ARCH__ < 350 +#define ROTL32(x, n) T32(((x) << (n)) | ((x) >> (32 - (n)))) +#define ROTR32(x, n) (((x) >> (n)) | ((x) << (32 - (n)))) +#else +#define ROTL32(x, n) __funnelshift_l( (x), (x), (n) ) +#define ROTR32(x, n) __funnelshift_r( (x), (x), (n) ) +#endif + +#define MEMSET8(dst,what,cnt) { \ + int i_memset8; \ + uint64_t *out_memset8 = (uint64_t *)(dst); \ + for( i_memset8 = 0; i_memset8 < cnt; i_memset8++ ) \ + out_memset8[i_memset8] = (what); } + +#define MEMSET4(dst,what,cnt) { \ + int i_memset4; \ + uint32_t *out_memset4 = (uint32_t *)(dst); \ + for( i_memset4 = 0; i_memset4 < cnt; i_memset4++ ) \ + out_memset4[i_memset4] = (what); } + +#define MEMCPY8(dst,src,cnt) { \ + int i_memcpy8; \ + uint64_t *in_memcpy8 = (uint64_t *)(src); \ + uint64_t *out_memcpy8 = (uint64_t *)(dst); \ + for( i_memcpy8 = 0; i_memcpy8 < cnt; i_memcpy8++ ) \ + out_memcpy8[i_memcpy8] = in_memcpy8[i_memcpy8]; } + +#define MEMCPY4(dst,src,cnt) { \ + int i_memcpy4; \ + uint32_t *in_memcpy4 = (uint32_t *)(src); \ + uint32_t *out_memcpy4 = (uint32_t *)(dst); \ + for( i_memcpy4 = 0; i_memcpy4 < cnt; i_memcpy4++ ) \ + out_memcpy4[i_memcpy4] = in_memcpy4[i_memcpy4]; } + +#define XOR_BLOCKS(a,b) { \ + ((uint64_t *)a)[0] ^= ((uint64_t *)b)[0]; \ + ((uint64_t *)a)[1] ^= ((uint64_t *)b)[1]; } + +#define XOR_BLOCKS_DST(x,y,z) { \ + ((uint64_t *)z)[0] = ((uint64_t *)(x))[0] ^ ((uint64_t *)(y))[0]; \ + ((uint64_t *)z)[1] = ((uint64_t *)(x))[1] ^ ((uint64_t *)(y))[1]; } + +#define MUL_SUM_XOR_DST(a,c,dst) { \ + const uint64_t dst0 = ((uint64_t *)dst)[0]; \ + uint64_t hi, lo = cuda_mul128(((uint64_t *)a)[0], dst0, &hi) + ((uint64_t *)c)[1]; \ + hi += ((uint64_t *)c)[0]; \ + ((uint64_t *)c)[0] = dst0 ^ hi; \ + ((uint64_t *)dst)[0] = hi; \ + ((uint64_t *)c)[1] = atomicExch(((unsigned long long int *)dst) + 1, (unsigned long long int)lo) ^ lo; \ + } + +#define E2I(x) ((size_t)(((*((uint64_t*)(x)) >> 4) & 0x1ffff))) + diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_groestl.hpp b/xmrstak/backend/nvidia/nvcc_code/cuda_groestl.hpp new file mode 100644 index 0000000..a37934c --- /dev/null +++ b/xmrstak/backend/nvidia/nvcc_code/cuda_groestl.hpp @@ -0,0 +1,357 @@ +#pragma once + +#define GROESTL_ROWS 8 +#define GROESTL_LENGTHFIELDLEN GROESTL_ROWS +#define GROESTL_COLS512 8 + +#define GROESTL_SIZE512 (GROESTL_ROWS*GROESTL_COLS512) + +#define GROESTL_ROUNDS512 10 +#define GROESTL_HASH_BIT_LEN 256 + +#define GROESTL_ROTL32(v, n) ROTL32(v, n) + + +#define li_32(h) 0x##h##u +#define GROESTL_EXT_BYTE(var,n) ((uint8_t)((uint32_t)(var) >> (8*n))) + +#define u32BIG(a) \ + ((GROESTL_ROTL32(a,8) & li_32(00FF00FF)) | (GROESTL_ROTL32(a,24) & li_32(FF00FF00))) + +typedef struct { + uint32_t chaining[GROESTL_SIZE512/sizeof(uint32_t)]; /* actual state */ + uint32_t block_counter1, + block_counter2; /* message block counter(s) */ + BitSequence buffer[GROESTL_SIZE512]; /* data buffer */ + int buf_ptr; /* data buffer pointer */ + int bits_in_last_byte; /* no. of message bits in last byte of data buffer */ +} groestlHashState; + + +__constant__ uint32_t d_groestl_T[512] = +{ + 0xa5f432c6, 0xc6a597f4, 0x84976ff8, 0xf884eb97, 0x99b05eee, 0xee99c7b0, 0x8d8c7af6, 0xf68df78c, 0xd17e8ff, 0xff0de517, 0xbddc0ad6, 0xd6bdb7dc, 0xb1c816de, 0xdeb1a7c8, 0x54fc6d91, 0x915439fc +, 0x50f09060, 0x6050c0f0, 0x3050702, 0x2030405, 0xa9e02ece, 0xcea987e0, 0x7d87d156, 0x567dac87, 0x192bcce7, 0xe719d52b, 0x62a613b5, 0xb56271a6, 0xe6317c4d, 0x4de69a31, 0x9ab559ec, 0xec9ac3b5 +, 0x45cf408f, 0x8f4505cf, 0x9dbca31f, 0x1f9d3ebc, 0x40c04989, 0x894009c0, 0x879268fa, 0xfa87ef92, 0x153fd0ef, 0xef15c53f, 0xeb2694b2, 0xb2eb7f26, 0xc940ce8e, 0x8ec90740, 0xb1de6fb, 0xfb0bed1d +, 0xec2f6e41, 0x41ec822f, 0x67a91ab3, 0xb3677da9, 0xfd1c435f, 0x5ffdbe1c, 0xea256045, 0x45ea8a25, 0xbfdaf923, 0x23bf46da, 0xf7025153, 0x53f7a602, 0x96a145e4, 0xe496d3a1, 0x5bed769b, 0x9b5b2ded +, 0xc25d2875, 0x75c2ea5d, 0x1c24c5e1, 0xe11cd924, 0xaee9d43d, 0x3dae7ae9, 0x6abef24c, 0x4c6a98be, 0x5aee826c, 0x6c5ad8ee, 0x41c3bd7e, 0x7e41fcc3, 0x206f3f5, 0xf502f106, 0x4fd15283, 0x834f1dd1 +, 0x5ce48c68, 0x685cd0e4, 0xf4075651, 0x51f4a207, 0x345c8dd1, 0xd134b95c, 0x818e1f9, 0xf908e918, 0x93ae4ce2, 0xe293dfae, 0x73953eab, 0xab734d95, 0x53f59762, 0x6253c4f5, 0x3f416b2a, 0x2a3f5441 +, 0xc141c08, 0x80c1014, 0x52f66395, 0x955231f6, 0x65afe946, 0x46658caf, 0x5ee27f9d, 0x9d5e21e2, 0x28784830, 0x30286078, 0xa1f8cf37, 0x37a16ef8, 0xf111b0a, 0xa0f1411, 0xb5c4eb2f, 0x2fb55ec4 +, 0x91b150e, 0xe091c1b, 0x365a7e24, 0x2436485a, 0x9bb6ad1b, 0x1b9b36b6, 0x3d4798df, 0xdf3da547, 0x266aa7cd, 0xcd26816a, 0x69bbf54e, 0x4e699cbb, 0xcd4c337f, 0x7fcdfe4c, 0x9fba50ea, 0xea9fcfba +, 0x1b2d3f12, 0x121b242d, 0x9eb9a41d, 0x1d9e3ab9, 0x749cc458, 0x5874b09c, 0x2e724634, 0x342e6872, 0x2d774136, 0x362d6c77, 0xb2cd11dc, 0xdcb2a3cd, 0xee299db4, 0xb4ee7329, 0xfb164d5b, 0x5bfbb616 +, 0xf601a5a4, 0xa4f65301, 0x4dd7a176, 0x764decd7, 0x61a314b7, 0xb76175a3, 0xce49347d, 0x7dcefa49, 0x7b8ddf52, 0x527ba48d, 0x3e429fdd, 0xdd3ea142, 0x7193cd5e, 0x5e71bc93, 0x97a2b113, 0x139726a2 +, 0xf504a2a6, 0xa6f55704, 0x68b801b9, 0xb96869b8, 0x0, 0x0, 0x2c74b5c1, 0xc12c9974, 0x60a0e040, 0x406080a0, 0x1f21c2e3, 0xe31fdd21, 0xc8433a79, 0x79c8f243, 0xed2c9ab6, 0xb6ed772c +, 0xbed90dd4, 0xd4beb3d9, 0x46ca478d, 0x8d4601ca, 0xd9701767, 0x67d9ce70, 0x4bddaf72, 0x724be4dd, 0xde79ed94, 0x94de3379, 0xd467ff98, 0x98d42b67, 0xe82393b0, 0xb0e87b23, 0x4ade5b85, 0x854a11de +, 0x6bbd06bb, 0xbb6b6dbd, 0x2a7ebbc5, 0xc52a917e, 0xe5347b4f, 0x4fe59e34, 0x163ad7ed, 0xed16c13a, 0xc554d286, 0x86c51754, 0xd762f89a, 0x9ad72f62, 0x55ff9966, 0x6655ccff, 0x94a7b611, 0x119422a7 +, 0xcf4ac08a, 0x8acf0f4a, 0x1030d9e9, 0xe910c930, 0x60a0e04, 0x406080a, 0x819866fe, 0xfe81e798, 0xf00baba0, 0xa0f05b0b, 0x44ccb478, 0x7844f0cc, 0xbad5f025, 0x25ba4ad5, 0xe33e754b, 0x4be3963e +, 0xf30eaca2, 0xa2f35f0e, 0xfe19445d, 0x5dfeba19, 0xc05bdb80, 0x80c01b5b, 0x8a858005, 0x58a0a85, 0xadecd33f, 0x3fad7eec, 0xbcdffe21, 0x21bc42df, 0x48d8a870, 0x7048e0d8, 0x40cfdf1, 0xf104f90c +, 0xdf7a1963, 0x63dfc67a, 0xc1582f77, 0x77c1ee58, 0x759f30af, 0xaf75459f, 0x63a5e742, 0x426384a5, 0x30507020, 0x20304050, 0x1a2ecbe5, 0xe51ad12e, 0xe12effd, 0xfd0ee112, 0x6db708bf, 0xbf6d65b7 +, 0x4cd45581, 0x814c19d4, 0x143c2418, 0x1814303c, 0x355f7926, 0x26354c5f, 0x2f71b2c3, 0xc32f9d71, 0xe13886be, 0xbee16738, 0xa2fdc835, 0x35a26afd, 0xcc4fc788, 0x88cc0b4f, 0x394b652e, 0x2e395c4b +, 0x57f96a93, 0x93573df9, 0xf20d5855, 0x55f2aa0d, 0x829d61fc, 0xfc82e39d, 0x47c9b37a, 0x7a47f4c9, 0xacef27c8, 0xc8ac8bef, 0xe73288ba, 0xbae76f32, 0x2b7d4f32, 0x322b647d, 0x95a442e6, 0xe695d7a4 +, 0xa0fb3bc0, 0xc0a09bfb, 0x98b3aa19, 0x199832b3, 0xd168f69e, 0x9ed12768, 0x7f8122a3, 0xa37f5d81, 0x66aaee44, 0x446688aa, 0x7e82d654, 0x547ea882, 0xabe6dd3b, 0x3bab76e6, 0x839e950b, 0xb83169e +, 0xca45c98c, 0x8cca0345, 0x297bbcc7, 0xc729957b, 0xd36e056b, 0x6bd3d66e, 0x3c446c28, 0x283c5044, 0x798b2ca7, 0xa779558b, 0xe23d81bc, 0xbce2633d, 0x1d273116, 0x161d2c27, 0x769a37ad, 0xad76419a +, 0x3b4d96db, 0xdb3bad4d, 0x56fa9e64, 0x6456c8fa, 0x4ed2a674, 0x744ee8d2, 0x1e223614, 0x141e2822, 0xdb76e492, 0x92db3f76, 0xa1e120c, 0xc0a181e, 0x6cb4fc48, 0x486c90b4, 0xe4378fb8, 0xb8e46b37 +, 0x5de7789f, 0x9f5d25e7, 0x6eb20fbd, 0xbd6e61b2, 0xef2a6943, 0x43ef862a, 0xa6f135c4, 0xc4a693f1, 0xa8e3da39, 0x39a872e3, 0xa4f7c631, 0x31a462f7, 0x37598ad3, 0xd337bd59, 0x8b8674f2, 0xf28bff86 +, 0x325683d5, 0xd532b156, 0x43c54e8b, 0x8b430dc5, 0x59eb856e, 0x6e59dceb, 0xb7c218da, 0xdab7afc2, 0x8c8f8e01, 0x18c028f, 0x64ac1db1, 0xb16479ac, 0xd26df19c, 0x9cd2236d, 0xe03b7249, 0x49e0923b +, 0xb4c71fd8, 0xd8b4abc7, 0xfa15b9ac, 0xacfa4315, 0x709faf3, 0xf307fd09, 0x256fa0cf, 0xcf25856f, 0xafea20ca, 0xcaaf8fea, 0x8e897df4, 0xf48ef389, 0xe9206747, 0x47e98e20, 0x18283810, 0x10182028 +, 0xd5640b6f, 0x6fd5de64, 0x888373f0, 0xf088fb83, 0x6fb1fb4a, 0x4a6f94b1, 0x7296ca5c, 0x5c72b896, 0x246c5438, 0x3824706c, 0xf1085f57, 0x57f1ae08, 0xc7522173, 0x73c7e652, 0x51f36497, 0x975135f3 +, 0x2365aecb, 0xcb238d65, 0x7c8425a1, 0xa17c5984, 0x9cbf57e8, 0xe89ccbbf, 0x21635d3e, 0x3e217c63, 0xdd7cea96, 0x96dd377c, 0xdc7f1e61, 0x61dcc27f, 0x86919c0d, 0xd861a91, 0x85949b0f, 0xf851e94 +, 0x90ab4be0, 0xe090dbab, 0x42c6ba7c, 0x7c42f8c6, 0xc4572671, 0x71c4e257, 0xaae529cc, 0xccaa83e5, 0xd873e390, 0x90d83b73, 0x50f0906, 0x6050c0f, 0x103f4f7, 0xf701f503, 0x12362a1c, 0x1c123836 +, 0xa3fe3cc2, 0xc2a39ffe, 0x5fe18b6a, 0x6a5fd4e1, 0xf910beae, 0xaef94710, 0xd06b0269, 0x69d0d26b, 0x91a8bf17, 0x17912ea8, 0x58e87199, 0x995829e8, 0x2769533a, 0x3a277469, 0xb9d0f727, 0x27b94ed0 +, 0x384891d9, 0xd938a948, 0x1335deeb, 0xeb13cd35, 0xb3cee52b, 0x2bb356ce, 0x33557722, 0x22334455, 0xbbd604d2, 0xd2bbbfd6, 0x709039a9, 0xa9704990, 0x89808707, 0x7890e80, 0xa7f2c133, 0x33a766f2 +, 0xb6c1ec2d, 0x2db65ac1, 0x22665a3c, 0x3c227866, 0x92adb815, 0x15922aad, 0x2060a9c9, 0xc9208960, 0x49db5c87, 0x874915db, 0xff1ab0aa, 0xaaff4f1a, 0x7888d850, 0x5078a088, 0x7a8e2ba5, 0xa57a518e +, 0x8f8a8903, 0x38f068a, 0xf8134a59, 0x59f8b213, 0x809b9209, 0x980129b, 0x1739231a, 0x1a173439, 0xda751065, 0x65daca75, 0x315384d7, 0xd731b553, 0xc651d584, 0x84c61351, 0xb8d303d0, 0xd0b8bbd3 +, 0xc35edc82, 0x82c31f5e, 0xb0cbe229, 0x29b052cb, 0x7799c35a, 0x5a77b499, 0x11332d1e, 0x1e113c33, 0xcb463d7b, 0x7bcbf646, 0xfc1fb7a8, 0xa8fc4b1f, 0xd6610c6d, 0x6dd6da61, 0x3a4e622c, 0x2c3a584e +}; + +#define GROESTL_ROTATE_COLUMN_DOWN(v1, v2, amount_bytes, temp_var) \ + { temp_var = (v1<<(8*amount_bytes))|(v2>>(8*(4-amount_bytes))); \ + v2 = (v2<<(8*amount_bytes))|(v1>>(8*(4-amount_bytes))); \ + v1 = temp_var; } + +#define GROESTL_COLUMN(x,y,i,c0,c1,c2,c3,c4,c5,c6,c7,tv1,tv2,tu,tl,t) \ + tu = d_groestl_T[2*(uint32_t)x[4*c0+0]]; \ + tl = d_groestl_T[2*(uint32_t)x[4*c0+0]+1]; \ + tv1 = d_groestl_T[2*(uint32_t)x[4*c1+1]]; \ + tv2 = d_groestl_T[2*(uint32_t)x[4*c1+1]+1]; \ + GROESTL_ROTATE_COLUMN_DOWN(tv1,tv2,1,t) \ + tu ^= tv1; \ + tl ^= tv2; \ + tv1 = d_groestl_T[2*(uint32_t)x[4*c2+2]]; \ + tv2 = d_groestl_T[2*(uint32_t)x[4*c2+2]+1]; \ + GROESTL_ROTATE_COLUMN_DOWN(tv1,tv2,2,t) \ + tu ^= tv1; \ + tl ^= tv2; \ + tv1 = d_groestl_T[2*(uint32_t)x[4*c3+3]]; \ + tv2 = d_groestl_T[2*(uint32_t)x[4*c3+3]+1]; \ + GROESTL_ROTATE_COLUMN_DOWN(tv1,tv2,3,t) \ + tu ^= tv1; \ + tl ^= tv2; \ + tl ^= d_groestl_T[2*(uint32_t)x[4*c4+0]]; \ + tu ^= d_groestl_T[2*(uint32_t)x[4*c4+0]+1]; \ + tv1 = d_groestl_T[2*(uint32_t)x[4*c5+1]]; \ + tv2 = d_groestl_T[2*(uint32_t)x[4*c5+1]+1]; \ + GROESTL_ROTATE_COLUMN_DOWN(tv1,tv2,1,t) \ + tl ^= tv1; \ + tu ^= tv2; \ + tv1 = d_groestl_T[2*(uint32_t)x[4*c6+2]]; \ + tv2 = d_groestl_T[2*(uint32_t)x[4*c6+2]+1]; \ + GROESTL_ROTATE_COLUMN_DOWN(tv1,tv2,2,t) \ + tl ^= tv1; \ + tu ^= tv2; \ + tv1 = d_groestl_T[2*(uint32_t)x[4*c7+3]]; \ + tv2 = d_groestl_T[2*(uint32_t)x[4*c7+3]+1]; \ + GROESTL_ROTATE_COLUMN_DOWN(tv1,tv2,3,t) \ + tl ^= tv1; \ + tu ^= tv2; \ + y[i] = tu; \ + y[i+1] = tl; + +__device__ void cn_groestl_RND512P(uint8_t * __restrict__ x, uint32_t * __restrict__ y, uint32_t r) +{ + uint32_t temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp; + uint32_t* x32 = (uint32_t*)x; + x32[ 0] ^= 0x00000000^r; + x32[ 2] ^= 0x00000010^r; + x32[ 4] ^= 0x00000020^r; + x32[ 6] ^= 0x00000030^r; + x32[ 8] ^= 0x00000040^r; + x32[10] ^= 0x00000050^r; + x32[12] ^= 0x00000060^r; + x32[14] ^= 0x00000070^r; + GROESTL_COLUMN(x,y, 0, 0, 2, 4, 6, 9, 11, 13, 15, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp); + GROESTL_COLUMN(x,y, 2, 2, 4, 6, 8, 11, 13, 15, 1, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp); + GROESTL_COLUMN(x,y, 4, 4, 6, 8, 10, 13, 15, 1, 3, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp); + GROESTL_COLUMN(x,y, 6, 6, 8, 10, 12, 15, 1, 3, 5, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp); + GROESTL_COLUMN(x,y, 8, 8, 10, 12, 14, 1, 3, 5, 7, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp); + GROESTL_COLUMN(x,y,10, 10, 12, 14, 0, 3, 5, 7, 9, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp); + GROESTL_COLUMN(x,y,12, 12, 14, 0, 2, 5, 7, 9, 11, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp); + GROESTL_COLUMN(x,y,14, 14, 0, 2, 4, 7, 9, 11, 13, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp); +} + +__device__ void cn_groestl_RND512Q(uint8_t * __restrict__ x, uint32_t * __restrict__ y, uint32_t r) +{ + uint32_t temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp; + uint32_t* x32 = (uint32_t*)x; + x32[ 0] = ~x32[ 0]; + x32[ 1] ^= 0xffffffff^r; + x32[ 2] = ~x32[ 2]; + x32[ 3] ^= 0xefffffff^r; + x32[ 4] = ~x32[ 4]; + x32[ 5] ^= 0xdfffffff^r; + x32[ 6] = ~x32[ 6]; + x32[ 7] ^= 0xcfffffff^r; + x32[ 8] = ~x32[ 8]; + x32[ 9] ^= 0xbfffffff^r; + x32[10] = ~x32[10]; + x32[11] ^= 0xafffffff^r; + x32[12] = ~x32[12]; + x32[13] ^= 0x9fffffff^r; + x32[14] = ~x32[14]; + x32[15] ^= 0x8fffffff^r; + GROESTL_COLUMN(x,y, 0, 2, 6, 10, 14, 1, 5, 9, 13, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp); + GROESTL_COLUMN(x,y, 2, 4, 8, 12, 0, 3, 7, 11, 15, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp); + GROESTL_COLUMN(x,y, 4, 6, 10, 14, 2, 5, 9, 13, 1, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp); + GROESTL_COLUMN(x,y, 6, 8, 12, 0, 4, 7, 11, 15, 3, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp); + GROESTL_COLUMN(x,y, 8, 10, 14, 2, 6, 9, 13, 1, 5, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp); + GROESTL_COLUMN(x,y,10, 12, 0, 4, 8, 11, 15, 3, 7, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp); + GROESTL_COLUMN(x,y,12, 14, 2, 6, 10, 13, 1, 5, 9, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp); + GROESTL_COLUMN(x,y,14, 0, 4, 8, 12, 15, 3, 7, 11, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp); +} + +__device__ void cn_groestl_F512(uint32_t * __restrict__ h, const uint32_t * __restrict__ m) +{ + int i; + uint32_t Ptmp[2*GROESTL_COLS512]; + uint32_t Qtmp[2*GROESTL_COLS512]; + uint32_t y[2*GROESTL_COLS512]; + uint32_t z[2*GROESTL_COLS512]; + + for (i = 0; i < 2*GROESTL_COLS512; i++) + { + z[i] = m[i]; + Ptmp[i] = h[i]^m[i]; + } + + cn_groestl_RND512Q((uint8_t*)z, y, 0x00000000); + cn_groestl_RND512Q((uint8_t*)y, z, 0x01000000); + cn_groestl_RND512Q((uint8_t*)z, y, 0x02000000); + cn_groestl_RND512Q((uint8_t*)y, z, 0x03000000); + cn_groestl_RND512Q((uint8_t*)z, y, 0x04000000); + cn_groestl_RND512Q((uint8_t*)y, z, 0x05000000); + cn_groestl_RND512Q((uint8_t*)z, y, 0x06000000); + cn_groestl_RND512Q((uint8_t*)y, z, 0x07000000); + cn_groestl_RND512Q((uint8_t*)z, y, 0x08000000); + cn_groestl_RND512Q((uint8_t*)y, Qtmp, 0x09000000); + + cn_groestl_RND512P((uint8_t*)Ptmp, y, 0x00000000); + cn_groestl_RND512P((uint8_t*)y, z, 0x00000001); + cn_groestl_RND512P((uint8_t*)z, y, 0x00000002); + cn_groestl_RND512P((uint8_t*)y, z, 0x00000003); + cn_groestl_RND512P((uint8_t*)z, y, 0x00000004); + cn_groestl_RND512P((uint8_t*)y, z, 0x00000005); + cn_groestl_RND512P((uint8_t*)z, y, 0x00000006); + cn_groestl_RND512P((uint8_t*)y, z, 0x00000007); + cn_groestl_RND512P((uint8_t*)z, y, 0x00000008); + cn_groestl_RND512P((uint8_t*)y, Ptmp, 0x00000009); + + for (i = 0; i < 2*GROESTL_COLS512; i++) + h[i] ^= Ptmp[i]^Qtmp[i]; +} + +__device__ void cn_groestl_outputtransformation(groestlHashState *ctx) +{ + int j; + uint32_t temp[2*GROESTL_COLS512]; + uint32_t y[2*GROESTL_COLS512]; + uint32_t z[2*GROESTL_COLS512]; + + for (j = 0; j < 2*GROESTL_COLS512; j++) + temp[j] = ctx->chaining[j]; + + cn_groestl_RND512P((uint8_t*)temp, y, 0x00000000); + cn_groestl_RND512P((uint8_t*)y, z, 0x00000001); + cn_groestl_RND512P((uint8_t*)z, y, 0x00000002); + cn_groestl_RND512P((uint8_t*)y, z, 0x00000003); + cn_groestl_RND512P((uint8_t*)z, y, 0x00000004); + cn_groestl_RND512P((uint8_t*)y, z, 0x00000005); + cn_groestl_RND512P((uint8_t*)z, y, 0x00000006); + cn_groestl_RND512P((uint8_t*)y, z, 0x00000007); + cn_groestl_RND512P((uint8_t*)z, y, 0x00000008); + cn_groestl_RND512P((uint8_t*)y, temp, 0x00000009); + + for (j = 0; j < 2*GROESTL_COLS512; j++) + ctx->chaining[j] ^= temp[j]; +} + +__device__ void cn_groestl_transform(groestlHashState * __restrict__ ctx, + const uint8_t * __restrict__ input, int msglen) +{ + for (; msglen >= GROESTL_SIZE512; msglen -= GROESTL_SIZE512, input += GROESTL_SIZE512) + { + cn_groestl_F512(ctx->chaining,(uint32_t*)input); + ctx->block_counter1++; + + if (ctx->block_counter1 == 0) + ctx->block_counter2++; + } +} + +__device__ void cn_groestl_final(groestlHashState* __restrict__ ctx, + BitSequence* __restrict__ output) +{ + int i, j = 0, hashbytelen = GROESTL_HASH_BIT_LEN/8; + uint8_t *s = (BitSequence*)ctx->chaining; + + if (ctx->bits_in_last_byte) + { + ctx->buffer[(int)ctx->buf_ptr-1] &= ((1<<ctx->bits_in_last_byte)-1)<<(8-ctx->bits_in_last_byte); + ctx->buffer[(int)ctx->buf_ptr-1] ^= 0x1<<(7-ctx->bits_in_last_byte); + ctx->bits_in_last_byte = 0; + } + else + { + ctx->buffer[(int)ctx->buf_ptr++] = 0x80; + } + + if (ctx->buf_ptr > GROESTL_SIZE512-GROESTL_LENGTHFIELDLEN) + { + while (ctx->buf_ptr < GROESTL_SIZE512) + ctx->buffer[(int)ctx->buf_ptr++] = 0; + + cn_groestl_transform(ctx, ctx->buffer, GROESTL_SIZE512); + ctx->buf_ptr = 0; + } + + while (ctx->buf_ptr < GROESTL_SIZE512-GROESTL_LENGTHFIELDLEN) + ctx->buffer[(int)ctx->buf_ptr++] = 0; + + ctx->block_counter1++; + if (ctx->block_counter1 == 0) + ctx->block_counter2++; + ctx->buf_ptr = GROESTL_SIZE512; + + while (ctx->buf_ptr > GROESTL_SIZE512-(int)sizeof(uint32_t)) + { + ctx->buffer[(int)--ctx->buf_ptr] = (uint8_t)ctx->block_counter1; + ctx->block_counter1 >>= 8; + } + while (ctx->buf_ptr > GROESTL_SIZE512-GROESTL_LENGTHFIELDLEN) + { + ctx->buffer[(int)--ctx->buf_ptr] = (uint8_t)ctx->block_counter2; + ctx->block_counter2 >>= 8; + } + cn_groestl_transform(ctx, ctx->buffer, GROESTL_SIZE512); + cn_groestl_outputtransformation(ctx); + + for (i = GROESTL_SIZE512-hashbytelen; i < GROESTL_SIZE512; i++,j++) + output[j] = s[i]; + + for (i = 0; i < GROESTL_COLS512; i++) + ctx->chaining[i] = 0; + for (i = 0; i < GROESTL_SIZE512; i++) + ctx->buffer[i] = 0; +} + +__device__ void cn_groestl_update(groestlHashState* __restrict__ ctx, + const BitSequence* __restrict__ input, DataLength databitlen) +{ + int index = 0; + int msglen = (int)(databitlen/8); + int rem = (int)(databitlen%8); + + if (ctx->buf_ptr) + { + while (ctx->buf_ptr < GROESTL_SIZE512 && index < msglen) + ctx->buffer[(int)ctx->buf_ptr++] = input[index++]; + + if (ctx->buf_ptr < GROESTL_SIZE512) + { + if (rem) + { + ctx->bits_in_last_byte = rem; + ctx->buffer[(int)ctx->buf_ptr++] = input[index]; + } + return; + } + + ctx->buf_ptr = 0; + cn_groestl_transform(ctx, ctx->buffer, GROESTL_SIZE512); + } + + cn_groestl_transform(ctx, input+index, msglen-index); + index += ((msglen-index)/GROESTL_SIZE512)*GROESTL_SIZE512; + + while (index < msglen) + ctx->buffer[(int)ctx->buf_ptr++] = input[index++]; + + if (rem) + { + ctx->bits_in_last_byte = rem; + ctx->buffer[(int)ctx->buf_ptr++] = input[index]; + } +} + +__device__ void cn_groestl_init(groestlHashState* ctx) +{ + int i = 0; + + for(;i<(GROESTL_SIZE512/sizeof(uint32_t));i++) + ctx->chaining[i] = 0; + + ctx->chaining[2*GROESTL_COLS512-1] = u32BIG((uint32_t)GROESTL_HASH_BIT_LEN); + ctx->buf_ptr = 0; + ctx->block_counter1 = 0; + ctx->block_counter2 = 0; + ctx->bits_in_last_byte = 0; +} + +__device__ void cn_groestl(const BitSequence * __restrict__ data, DataLength len, BitSequence * __restrict__ hashval) +{ + DataLength databitlen = len << 3; + groestlHashState context; + + cn_groestl_init(&context); + cn_groestl_update(&context, data, databitlen); + cn_groestl_final(&context, hashval); +} diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_jh.hpp b/xmrstak/backend/nvidia/nvcc_code/cuda_jh.hpp new file mode 100644 index 0000000..679046e --- /dev/null +++ b/xmrstak/backend/nvidia/nvcc_code/cuda_jh.hpp @@ -0,0 +1,301 @@ +#include <stdint.h> + +typedef struct { + int hashbitlen; + unsigned long long databitlen; + unsigned long long datasize_in_buffer; + uint64_t x[8][2]; + unsigned char buffer[64]; +} jhHashState; + +__constant__ unsigned char d_JH256_H0[512] = +{ + 0xeb, 0x98, 0xa3, 0x41, 0x2c, 0x20, 0xd3, 0xeb, 0x92, 0xcd, 0xbe, 0x7b, 0x9c, 0xb2, 0x45, 0xc1, + 0x1c, 0x93, 0x51, 0x91, 0x60, 0xd4, 0xc7, 0xfa, 0x26, 0x0, 0x82, 0xd6, 0x7e, 0x50, 0x8a, 0x3, + 0xa4, 0x23, 0x9e, 0x26, 0x77, 0x26, 0xb9, 0x45, 0xe0, 0xfb, 0x1a, 0x48, 0xd4, 0x1a, 0x94, 0x77, + 0xcd, 0xb5, 0xab, 0x26, 0x2, 0x6b, 0x17, 0x7a, 0x56, 0xf0, 0x24, 0x42, 0xf, 0xff, 0x2f, 0xa8, + 0x71, 0xa3, 0x96, 0x89, 0x7f, 0x2e, 0x4d, 0x75, 0x1d, 0x14, 0x49, 0x8, 0xf7, 0x7d, 0xe2, 0x62, + 0x27, 0x76, 0x95, 0xf7, 0x76, 0x24, 0x8f, 0x94, 0x87, 0xd5, 0xb6, 0x57, 0x47, 0x80, 0x29, 0x6c, + 0x5c, 0x5e, 0x27, 0x2d, 0xac, 0x8e, 0xd, 0x6c, 0x51, 0x84, 0x50, 0xc6, 0x57, 0x5, 0x7a, 0xf, + 0x7b, 0xe4, 0xd3, 0x67, 0x70, 0x24, 0x12, 0xea, 0x89, 0xe3, 0xab, 0x13, 0xd3, 0x1c, 0xd7, 0x69 +}; + +__constant__ unsigned char d_E8_rc[42][32] = +{ + {0x72, 0xd5, 0xde, 0xa2, 0xdf, 0x15, 0xf8, 0x67, 0x7b, 0x84, 0x15, 0xa, 0xb7, 0x23, 0x15, 0x57, 0x81, 0xab, 0xd6, 0x90, 0x4d, 0x5a, 0x87, 0xf6, 0x4e, 0x9f, 0x4f, 0xc5, 0xc3, 0xd1, 0x2b, 0x40}, + {0xea, 0x98, 0x3a, 0xe0, 0x5c, 0x45, 0xfa, 0x9c, 0x3, 0xc5, 0xd2, 0x99, 0x66, 0xb2, 0x99, 0x9a, 0x66, 0x2, 0x96, 0xb4, 0xf2, 0xbb, 0x53, 0x8a, 0xb5, 0x56, 0x14, 0x1a, 0x88, 0xdb, 0xa2, 0x31}, + {0x3, 0xa3, 0x5a, 0x5c, 0x9a, 0x19, 0xe, 0xdb, 0x40, 0x3f, 0xb2, 0xa, 0x87, 0xc1, 0x44, 0x10, 0x1c, 0x5, 0x19, 0x80, 0x84, 0x9e, 0x95, 0x1d, 0x6f, 0x33, 0xeb, 0xad, 0x5e, 0xe7, 0xcd, 0xdc}, + {0x10, 0xba, 0x13, 0x92, 0x2, 0xbf, 0x6b, 0x41, 0xdc, 0x78, 0x65, 0x15, 0xf7, 0xbb, 0x27, 0xd0, 0xa, 0x2c, 0x81, 0x39, 0x37, 0xaa, 0x78, 0x50, 0x3f, 0x1a, 0xbf, 0xd2, 0x41, 0x0, 0x91, 0xd3}, + {0x42, 0x2d, 0x5a, 0xd, 0xf6, 0xcc, 0x7e, 0x90, 0xdd, 0x62, 0x9f, 0x9c, 0x92, 0xc0, 0x97, 0xce, 0x18, 0x5c, 0xa7, 0xb, 0xc7, 0x2b, 0x44, 0xac, 0xd1, 0xdf, 0x65, 0xd6, 0x63, 0xc6, 0xfc, 0x23}, + {0x97, 0x6e, 0x6c, 0x3, 0x9e, 0xe0, 0xb8, 0x1a, 0x21, 0x5, 0x45, 0x7e, 0x44, 0x6c, 0xec, 0xa8, 0xee, 0xf1, 0x3, 0xbb, 0x5d, 0x8e, 0x61, 0xfa, 0xfd, 0x96, 0x97, 0xb2, 0x94, 0x83, 0x81, 0x97}, + {0x4a, 0x8e, 0x85, 0x37, 0xdb, 0x3, 0x30, 0x2f, 0x2a, 0x67, 0x8d, 0x2d, 0xfb, 0x9f, 0x6a, 0x95, 0x8a, 0xfe, 0x73, 0x81, 0xf8, 0xb8, 0x69, 0x6c, 0x8a, 0xc7, 0x72, 0x46, 0xc0, 0x7f, 0x42, 0x14}, + {0xc5, 0xf4, 0x15, 0x8f, 0xbd, 0xc7, 0x5e, 0xc4, 0x75, 0x44, 0x6f, 0xa7, 0x8f, 0x11, 0xbb, 0x80, 0x52, 0xde, 0x75, 0xb7, 0xae, 0xe4, 0x88, 0xbc, 0x82, 0xb8, 0x0, 0x1e, 0x98, 0xa6, 0xa3, 0xf4}, + {0x8e, 0xf4, 0x8f, 0x33, 0xa9, 0xa3, 0x63, 0x15, 0xaa, 0x5f, 0x56, 0x24, 0xd5, 0xb7, 0xf9, 0x89, 0xb6, 0xf1, 0xed, 0x20, 0x7c, 0x5a, 0xe0, 0xfd, 0x36, 0xca, 0xe9, 0x5a, 0x6, 0x42, 0x2c, 0x36}, + {0xce, 0x29, 0x35, 0x43, 0x4e, 0xfe, 0x98, 0x3d, 0x53, 0x3a, 0xf9, 0x74, 0x73, 0x9a, 0x4b, 0xa7, 0xd0, 0xf5, 0x1f, 0x59, 0x6f, 0x4e, 0x81, 0x86, 0xe, 0x9d, 0xad, 0x81, 0xaf, 0xd8, 0x5a, 0x9f}, + {0xa7, 0x5, 0x6, 0x67, 0xee, 0x34, 0x62, 0x6a, 0x8b, 0xb, 0x28, 0xbe, 0x6e, 0xb9, 0x17, 0x27, 0x47, 0x74, 0x7, 0x26, 0xc6, 0x80, 0x10, 0x3f, 0xe0, 0xa0, 0x7e, 0x6f, 0xc6, 0x7e, 0x48, 0x7b}, + {0xd, 0x55, 0xa, 0xa5, 0x4a, 0xf8, 0xa4, 0xc0, 0x91, 0xe3, 0xe7, 0x9f, 0x97, 0x8e, 0xf1, 0x9e, 0x86, 0x76, 0x72, 0x81, 0x50, 0x60, 0x8d, 0xd4, 0x7e, 0x9e, 0x5a, 0x41, 0xf3, 0xe5, 0xb0, 0x62}, + {0xfc, 0x9f, 0x1f, 0xec, 0x40, 0x54, 0x20, 0x7a, 0xe3, 0xe4, 0x1a, 0x0, 0xce, 0xf4, 0xc9, 0x84, 0x4f, 0xd7, 0x94, 0xf5, 0x9d, 0xfa, 0x95, 0xd8, 0x55, 0x2e, 0x7e, 0x11, 0x24, 0xc3, 0x54, 0xa5}, + {0x5b, 0xdf, 0x72, 0x28, 0xbd, 0xfe, 0x6e, 0x28, 0x78, 0xf5, 0x7f, 0xe2, 0xf, 0xa5, 0xc4, 0xb2, 0x5, 0x89, 0x7c, 0xef, 0xee, 0x49, 0xd3, 0x2e, 0x44, 0x7e, 0x93, 0x85, 0xeb, 0x28, 0x59, 0x7f}, + {0x70, 0x5f, 0x69, 0x37, 0xb3, 0x24, 0x31, 0x4a, 0x5e, 0x86, 0x28, 0xf1, 0x1d, 0xd6, 0xe4, 0x65, 0xc7, 0x1b, 0x77, 0x4, 0x51, 0xb9, 0x20, 0xe7, 0x74, 0xfe, 0x43, 0xe8, 0x23, 0xd4, 0x87, 0x8a}, + {0x7d, 0x29, 0xe8, 0xa3, 0x92, 0x76, 0x94, 0xf2, 0xdd, 0xcb, 0x7a, 0x9, 0x9b, 0x30, 0xd9, 0xc1, 0x1d, 0x1b, 0x30, 0xfb, 0x5b, 0xdc, 0x1b, 0xe0, 0xda, 0x24, 0x49, 0x4f, 0xf2, 0x9c, 0x82, 0xbf}, + {0xa4, 0xe7, 0xba, 0x31, 0xb4, 0x70, 0xbf, 0xff, 0xd, 0x32, 0x44, 0x5, 0xde, 0xf8, 0xbc, 0x48, 0x3b, 0xae, 0xfc, 0x32, 0x53, 0xbb, 0xd3, 0x39, 0x45, 0x9f, 0xc3, 0xc1, 0xe0, 0x29, 0x8b, 0xa0}, + {0xe5, 0xc9, 0x5, 0xfd, 0xf7, 0xae, 0x9, 0xf, 0x94, 0x70, 0x34, 0x12, 0x42, 0x90, 0xf1, 0x34, 0xa2, 0x71, 0xb7, 0x1, 0xe3, 0x44, 0xed, 0x95, 0xe9, 0x3b, 0x8e, 0x36, 0x4f, 0x2f, 0x98, 0x4a}, + {0x88, 0x40, 0x1d, 0x63, 0xa0, 0x6c, 0xf6, 0x15, 0x47, 0xc1, 0x44, 0x4b, 0x87, 0x52, 0xaf, 0xff, 0x7e, 0xbb, 0x4a, 0xf1, 0xe2, 0xa, 0xc6, 0x30, 0x46, 0x70, 0xb6, 0xc5, 0xcc, 0x6e, 0x8c, 0xe6}, + {0xa4, 0xd5, 0xa4, 0x56, 0xbd, 0x4f, 0xca, 0x0, 0xda, 0x9d, 0x84, 0x4b, 0xc8, 0x3e, 0x18, 0xae, 0x73, 0x57, 0xce, 0x45, 0x30, 0x64, 0xd1, 0xad, 0xe8, 0xa6, 0xce, 0x68, 0x14, 0x5c, 0x25, 0x67}, + {0xa3, 0xda, 0x8c, 0xf2, 0xcb, 0xe, 0xe1, 0x16, 0x33, 0xe9, 0x6, 0x58, 0x9a, 0x94, 0x99, 0x9a, 0x1f, 0x60, 0xb2, 0x20, 0xc2, 0x6f, 0x84, 0x7b, 0xd1, 0xce, 0xac, 0x7f, 0xa0, 0xd1, 0x85, 0x18}, + {0x32, 0x59, 0x5b, 0xa1, 0x8d, 0xdd, 0x19, 0xd3, 0x50, 0x9a, 0x1c, 0xc0, 0xaa, 0xa5, 0xb4, 0x46, 0x9f, 0x3d, 0x63, 0x67, 0xe4, 0x4, 0x6b, 0xba, 0xf6, 0xca, 0x19, 0xab, 0xb, 0x56, 0xee, 0x7e}, + {0x1f, 0xb1, 0x79, 0xea, 0xa9, 0x28, 0x21, 0x74, 0xe9, 0xbd, 0xf7, 0x35, 0x3b, 0x36, 0x51, 0xee, 0x1d, 0x57, 0xac, 0x5a, 0x75, 0x50, 0xd3, 0x76, 0x3a, 0x46, 0xc2, 0xfe, 0xa3, 0x7d, 0x70, 0x1}, + {0xf7, 0x35, 0xc1, 0xaf, 0x98, 0xa4, 0xd8, 0x42, 0x78, 0xed, 0xec, 0x20, 0x9e, 0x6b, 0x67, 0x79, 0x41, 0x83, 0x63, 0x15, 0xea, 0x3a, 0xdb, 0xa8, 0xfa, 0xc3, 0x3b, 0x4d, 0x32, 0x83, 0x2c, 0x83}, + {0xa7, 0x40, 0x3b, 0x1f, 0x1c, 0x27, 0x47, 0xf3, 0x59, 0x40, 0xf0, 0x34, 0xb7, 0x2d, 0x76, 0x9a, 0xe7, 0x3e, 0x4e, 0x6c, 0xd2, 0x21, 0x4f, 0xfd, 0xb8, 0xfd, 0x8d, 0x39, 0xdc, 0x57, 0x59, 0xef}, + {0x8d, 0x9b, 0xc, 0x49, 0x2b, 0x49, 0xeb, 0xda, 0x5b, 0xa2, 0xd7, 0x49, 0x68, 0xf3, 0x70, 0xd, 0x7d, 0x3b, 0xae, 0xd0, 0x7a, 0x8d, 0x55, 0x84, 0xf5, 0xa5, 0xe9, 0xf0, 0xe4, 0xf8, 0x8e, 0x65}, + {0xa0, 0xb8, 0xa2, 0xf4, 0x36, 0x10, 0x3b, 0x53, 0xc, 0xa8, 0x7, 0x9e, 0x75, 0x3e, 0xec, 0x5a, 0x91, 0x68, 0x94, 0x92, 0x56, 0xe8, 0x88, 0x4f, 0x5b, 0xb0, 0x5c, 0x55, 0xf8, 0xba, 0xbc, 0x4c}, + {0xe3, 0xbb, 0x3b, 0x99, 0xf3, 0x87, 0x94, 0x7b, 0x75, 0xda, 0xf4, 0xd6, 0x72, 0x6b, 0x1c, 0x5d, 0x64, 0xae, 0xac, 0x28, 0xdc, 0x34, 0xb3, 0x6d, 0x6c, 0x34, 0xa5, 0x50, 0xb8, 0x28, 0xdb, 0x71}, + {0xf8, 0x61, 0xe2, 0xf2, 0x10, 0x8d, 0x51, 0x2a, 0xe3, 0xdb, 0x64, 0x33, 0x59, 0xdd, 0x75, 0xfc, 0x1c, 0xac, 0xbc, 0xf1, 0x43, 0xce, 0x3f, 0xa2, 0x67, 0xbb, 0xd1, 0x3c, 0x2, 0xe8, 0x43, 0xb0}, + {0x33, 0xa, 0x5b, 0xca, 0x88, 0x29, 0xa1, 0x75, 0x7f, 0x34, 0x19, 0x4d, 0xb4, 0x16, 0x53, 0x5c, 0x92, 0x3b, 0x94, 0xc3, 0xe, 0x79, 0x4d, 0x1e, 0x79, 0x74, 0x75, 0xd7, 0xb6, 0xee, 0xaf, 0x3f}, + {0xea, 0xa8, 0xd4, 0xf7, 0xbe, 0x1a, 0x39, 0x21, 0x5c, 0xf4, 0x7e, 0x9, 0x4c, 0x23, 0x27, 0x51, 0x26, 0xa3, 0x24, 0x53, 0xba, 0x32, 0x3c, 0xd2, 0x44, 0xa3, 0x17, 0x4a, 0x6d, 0xa6, 0xd5, 0xad}, + {0xb5, 0x1d, 0x3e, 0xa6, 0xaf, 0xf2, 0xc9, 0x8, 0x83, 0x59, 0x3d, 0x98, 0x91, 0x6b, 0x3c, 0x56, 0x4c, 0xf8, 0x7c, 0xa1, 0x72, 0x86, 0x60, 0x4d, 0x46, 0xe2, 0x3e, 0xcc, 0x8, 0x6e, 0xc7, 0xf6}, + {0x2f, 0x98, 0x33, 0xb3, 0xb1, 0xbc, 0x76, 0x5e, 0x2b, 0xd6, 0x66, 0xa5, 0xef, 0xc4, 0xe6, 0x2a, 0x6, 0xf4, 0xb6, 0xe8, 0xbe, 0xc1, 0xd4, 0x36, 0x74, 0xee, 0x82, 0x15, 0xbc, 0xef, 0x21, 0x63}, + {0xfd, 0xc1, 0x4e, 0xd, 0xf4, 0x53, 0xc9, 0x69, 0xa7, 0x7d, 0x5a, 0xc4, 0x6, 0x58, 0x58, 0x26, 0x7e, 0xc1, 0x14, 0x16, 0x6, 0xe0, 0xfa, 0x16, 0x7e, 0x90, 0xaf, 0x3d, 0x28, 0x63, 0x9d, 0x3f}, + {0xd2, 0xc9, 0xf2, 0xe3, 0x0, 0x9b, 0xd2, 0xc, 0x5f, 0xaa, 0xce, 0x30, 0xb7, 0xd4, 0xc, 0x30, 0x74, 0x2a, 0x51, 0x16, 0xf2, 0xe0, 0x32, 0x98, 0xd, 0xeb, 0x30, 0xd8, 0xe3, 0xce, 0xf8, 0x9a}, + {0x4b, 0xc5, 0x9e, 0x7b, 0xb5, 0xf1, 0x79, 0x92, 0xff, 0x51, 0xe6, 0x6e, 0x4, 0x86, 0x68, 0xd3, 0x9b, 0x23, 0x4d, 0x57, 0xe6, 0x96, 0x67, 0x31, 0xcc, 0xe6, 0xa6, 0xf3, 0x17, 0xa, 0x75, 0x5}, + {0xb1, 0x76, 0x81, 0xd9, 0x13, 0x32, 0x6c, 0xce, 0x3c, 0x17, 0x52, 0x84, 0xf8, 0x5, 0xa2, 0x62, 0xf4, 0x2b, 0xcb, 0xb3, 0x78, 0x47, 0x15, 0x47, 0xff, 0x46, 0x54, 0x82, 0x23, 0x93, 0x6a, 0x48}, + {0x38, 0xdf, 0x58, 0x7, 0x4e, 0x5e, 0x65, 0x65, 0xf2, 0xfc, 0x7c, 0x89, 0xfc, 0x86, 0x50, 0x8e, 0x31, 0x70, 0x2e, 0x44, 0xd0, 0xb, 0xca, 0x86, 0xf0, 0x40, 0x9, 0xa2, 0x30, 0x78, 0x47, 0x4e}, + {0x65, 0xa0, 0xee, 0x39, 0xd1, 0xf7, 0x38, 0x83, 0xf7, 0x5e, 0xe9, 0x37, 0xe4, 0x2c, 0x3a, 0xbd, 0x21, 0x97, 0xb2, 0x26, 0x1, 0x13, 0xf8, 0x6f, 0xa3, 0x44, 0xed, 0xd1, 0xef, 0x9f, 0xde, 0xe7}, + {0x8b, 0xa0, 0xdf, 0x15, 0x76, 0x25, 0x92, 0xd9, 0x3c, 0x85, 0xf7, 0xf6, 0x12, 0xdc, 0x42, 0xbe, 0xd8, 0xa7, 0xec, 0x7c, 0xab, 0x27, 0xb0, 0x7e, 0x53, 0x8d, 0x7d, 0xda, 0xaa, 0x3e, 0xa8, 0xde}, + {0xaa, 0x25, 0xce, 0x93, 0xbd, 0x2, 0x69, 0xd8, 0x5a, 0xf6, 0x43, 0xfd, 0x1a, 0x73, 0x8, 0xf9, 0xc0, 0x5f, 0xef, 0xda, 0x17, 0x4a, 0x19, 0xa5, 0x97, 0x4d, 0x66, 0x33, 0x4c, 0xfd, 0x21, 0x6a}, + {0x35, 0xb4, 0x98, 0x31, 0xdb, 0x41, 0x15, 0x70, 0xea, 0x1e, 0xf, 0xbb, 0xed, 0xcd, 0x54, 0x9b, 0x9a, 0xd0, 0x63, 0xa1, 0x51, 0x97, 0x40, 0x72, 0xf6, 0x75, 0x9d, 0xbf, 0x91, 0x47, 0x6f, 0xe2} +}; + +#define JH_SWAP1(x) (x) = ((((x) & 0x5555555555555555ULL) << 1) | (((x) & 0xaaaaaaaaaaaaaaaaULL) >> 1)); +#define JH_SWAP2(x) (x) = ((((x) & 0x3333333333333333ULL) << 2) | (((x) & 0xccccccccccccccccULL) >> 2)); +#define JH_SWAP4(x) (x) = ((((x) & 0x0f0f0f0f0f0f0f0fULL) << 4) | (((x) & 0xf0f0f0f0f0f0f0f0ULL) >> 4)); +#define JH_SWAP8(x) (x) = ((((x) & 0x00ff00ff00ff00ffULL) << 8) | (((x) & 0xff00ff00ff00ff00ULL) >> 8)); +#define JH_SWAP16(x) (x) = ((((x) & 0x0000ffff0000ffffULL) << 16) | (((x) & 0xffff0000ffff0000ULL) >> 16)); +#define JH_SWAP32(x) (x) = (((x) << 32) | ((x) >> 32)); + +#define JH_L(m0,m1,m2,m3,m4,m5,m6,m7) \ + (m4) ^= (m1); \ + (m5) ^= (m2); \ + (m6) ^= (m0) ^ (m3); \ + (m7) ^= (m0); \ + (m0) ^= (m5); \ + (m1) ^= (m6); \ + (m2) ^= (m4) ^ (m7); \ + (m3) ^= (m4); + +#define JH_SS(m0,m1,m2,m3,m4,m5,m6,m7,cc0,cc1) \ + m3 = ~(m3); \ + m7 = ~(m7); \ + m0 ^= ((~(m2)) & (cc0)); \ + m4 ^= ((~(m6)) & (cc1)); \ + temp0 = (cc0) ^ ((m0) & (m1));\ + temp1 = (cc1) ^ ((m4) & (m5));\ + m0 ^= ((m2) & (m3)); \ + m4 ^= ((m6) & (m7)); \ + m3 ^= ((~(m1)) & (m2)); \ + m7 ^= ((~(m5)) & (m6)); \ + m1 ^= ((m0) & (m2)); \ + m5 ^= ((m4) & (m6)); \ + m2 ^= ((m0) & (~(m3))); \ + m6 ^= ((m4) & (~(m7))); \ + m0 ^= ((m1) | (m3)); \ + m4 ^= ((m5) | (m7)); \ + m3 ^= ((m1) & (m2)); \ + m7 ^= ((m5) & (m6)); \ + m1 ^= (temp0 & (m0)); \ + m5 ^= (temp1 & (m4)); \ + m2 ^= temp0; \ + m6 ^= temp1; + +__device__ void cn_jh_E8(jhHashState *state) +{ + uint64_t i,roundnumber,temp0,temp1; + + for (roundnumber = 0; roundnumber < 42; roundnumber = roundnumber+7) + { + for (i = 0; i < 2; i++) + { + JH_SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],((uint64_t *)d_E8_rc[roundnumber+0])[i],((uint64_t *)d_E8_rc[roundnumber+0])[i+2] ); + JH_L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]); + JH_SWAP1(state->x[1][i]); JH_SWAP1(state->x[3][i]); JH_SWAP1(state->x[5][i]); JH_SWAP1(state->x[7][i]); + } + + for (i = 0; i < 2; i++) + { + JH_SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],((uint64_t *)d_E8_rc[roundnumber+1])[i],((uint64_t *)d_E8_rc[roundnumber+1])[i+2] ); + JH_L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]); + JH_SWAP2(state->x[1][i]); JH_SWAP2(state->x[3][i]); JH_SWAP2(state->x[5][i]); JH_SWAP2(state->x[7][i]); + } + + for (i = 0; i < 2; i++) + { + JH_SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],((uint64_t *)d_E8_rc[roundnumber+2])[i],((uint64_t *)d_E8_rc[roundnumber+2])[i+2] ); + JH_L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]); + JH_SWAP4(state->x[1][i]); JH_SWAP4(state->x[3][i]); JH_SWAP4(state->x[5][i]); JH_SWAP4(state->x[7][i]); + } + + for (i = 0; i < 2; i++) + { + JH_SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],((uint64_t *)d_E8_rc[roundnumber+3])[i],((uint64_t *)d_E8_rc[roundnumber+3])[i+2] ); + JH_L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]); + JH_SWAP8(state->x[1][i]); JH_SWAP8(state->x[3][i]); JH_SWAP8(state->x[5][i]); JH_SWAP8(state->x[7][i]); + } + + for (i = 0; i < 2; i++) + { + JH_SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],((uint64_t *)d_E8_rc[roundnumber+4])[i],((uint64_t *)d_E8_rc[roundnumber+4])[i+2] ); + JH_L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]); + JH_SWAP16(state->x[1][i]); JH_SWAP16(state->x[3][i]); JH_SWAP16(state->x[5][i]); JH_SWAP16(state->x[7][i]); + } + + for (i = 0; i < 2; i++) + { + JH_SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],((uint64_t *)d_E8_rc[roundnumber+5])[i],((uint64_t *)d_E8_rc[roundnumber+5])[i+2] ); + JH_L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]); + JH_SWAP32(state->x[1][i]); JH_SWAP32(state->x[3][i]); JH_SWAP32(state->x[5][i]); JH_SWAP32(state->x[7][i]); + } + + for (i = 0; i < 2; i++) + { + JH_SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],((uint64_t *)d_E8_rc[roundnumber+6])[i],((uint64_t *)d_E8_rc[roundnumber+6])[i+2] ); + JH_L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]); + } + + for (i = 1; i < 8; i = i+2) + { + temp0 = state->x[i][0]; + state->x[i][0] = state->x[i][1]; + state->x[i][1] = temp0; + } + } +} + +__device__ void cn_jh_F8(jhHashState *state) +{ + uint64_t i; + + for (i = 0; i < 8; i++) + state->x[i >> 1][i & 1] ^= ((uint64_t *)state->buffer)[i]; + + cn_jh_E8(state); + + for (i = 0; i < 8; i++) + state->x[(8+i) >> 1][(8+i) & 1] ^= ((uint64_t *)state->buffer)[i]; +} + +__device__ void cn_jh_update(jhHashState * __restrict__ state, const BitSequence * __restrict__ data, DataLength databitlen) +{ + DataLength index; + + state->databitlen += databitlen; + index = 0; + + if ( (state->datasize_in_buffer > 0 ) && (( state->datasize_in_buffer + databitlen) < 512) ) + { + if ( (databitlen & 7) == 0 ) + memcpy(state->buffer + (state->datasize_in_buffer >> 3), data, 64-(state->datasize_in_buffer >> 3)); + else + memcpy(state->buffer + (state->datasize_in_buffer >> 3), data, 64-(state->datasize_in_buffer >> 3)+1); + state->datasize_in_buffer += databitlen; + databitlen = 0; + } + + if ( (state->datasize_in_buffer > 0 ) && (( state->datasize_in_buffer + databitlen) >= 512) ) + { + memcpy( state->buffer + (state->datasize_in_buffer >> 3), data, 64-(state->datasize_in_buffer >> 3) ); + index = 64-(state->datasize_in_buffer >> 3); + databitlen = databitlen - (512 - state->datasize_in_buffer); + cn_jh_F8(state); + state->datasize_in_buffer = 0; + } + + for ( ; databitlen >= 512; index = index+64, databitlen = databitlen - 512) + { + memcpy(state->buffer, data+index, 64); + cn_jh_F8(state); + } + + if ( databitlen > 0) + { + if ((databitlen & 7) == 0) + memcpy(state->buffer, data+index, (databitlen & 0x1ff) >> 3); + else + memcpy(state->buffer, data+index, ((databitlen & 0x1ff) >> 3)+1); + state->datasize_in_buffer = databitlen; + } +} + +/*pad the message, process the padded block(s), truncate the hash value H to obtain the message digest*/ +__device__ void cn_jh_final(jhHashState * __restrict__ state, BitSequence * __restrict__ hashval) +{ + unsigned int i; + //uint32_t *bufptr = (uint32_t *)state->buffer; + + if ( (state->databitlen & 0x1ff) == 0 ) + { + /*pad the message when databitlen is multiple of 512 bits, then process the padded block*/ + memset(state->buffer, 0, 64); + //for( i = 0; i < 16; i++ ) *(bufptr+i) = 0x00000000; + state->buffer[0] = 0x80; + state->buffer[63] = state->databitlen & 0xff; + state->buffer[62] = (state->databitlen >> 8) & 0xff; + state->buffer[61] = (state->databitlen >> 16) & 0xff; + state->buffer[60] = (state->databitlen >> 24) & 0xff; + state->buffer[59] = (state->databitlen >> 32) & 0xff; + state->buffer[58] = (state->databitlen >> 40) & 0xff; + state->buffer[57] = (state->databitlen >> 48) & 0xff; + state->buffer[56] = (state->databitlen >> 56) & 0xff; + cn_jh_F8(state); + } + else + { + /*set the rest of the bytes in the buffer to 0*/ + if ( (state->datasize_in_buffer & 7) == 0) + { + for (i = (state->databitlen & 0x1ff) >> 3; i < 64; i++) + state->buffer[i] = 0; + } + else + { + for (i = ((state->databitlen & 0x1ff) >> 3)+1; i < 64; i++) + state->buffer[i] = 0; + } + + /*pad and process the partial block when databitlen is not multiple of 512 bits, then hash the padded blocks*/ + state->buffer[((state->databitlen & 0x1ff) >> 3)] |= 1 << (7- (state->databitlen & 7)); + + cn_jh_F8(state); + memset(state->buffer, 0, 64); + //for( i = 0; i < 16; i++ ) *(bufptr+i) = 0x00000000; + state->buffer[63] = state->databitlen & 0xff; + state->buffer[62] = (state->databitlen >> 8) & 0xff; + state->buffer[61] = (state->databitlen >> 16) & 0xff; + state->buffer[60] = (state->databitlen >> 24) & 0xff; + state->buffer[59] = (state->databitlen >> 32) & 0xff; + state->buffer[58] = (state->databitlen >> 40) & 0xff; + state->buffer[57] = (state->databitlen >> 48) & 0xff; + state->buffer[56] = (state->databitlen >> 56) & 0xff; + cn_jh_F8(state); + } + + memcpy(hashval,(unsigned char*)state->x+64+32,32); +} + +__device__ void cn_jh_init(jhHashState *state, int hashbitlen) +{ + state->databitlen = 0; + state->datasize_in_buffer = 0; + state->hashbitlen = hashbitlen; + memcpy(state->x, d_JH256_H0, 128); +} + +__device__ void cn_jh(const BitSequence * __restrict__ data, DataLength len, BitSequence * __restrict__ hashval) +{ + int hashbitlen = 256; + DataLength databitlen = len << 3; + jhHashState state; + + cn_jh_init(&state, hashbitlen); + cn_jh_update(&state, data, databitlen); + cn_jh_final(&state, hashval); +} diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_keccak.hpp b/xmrstak/backend/nvidia/nvcc_code/cuda_keccak.hpp new file mode 100644 index 0000000..99c6516 --- /dev/null +++ b/xmrstak/backend/nvidia/nvcc_code/cuda_keccak.hpp @@ -0,0 +1,197 @@ +#if __CUDA_ARCH__ >= 350 + __forceinline__ __device__ uint64_t cuda_rotl64(const uint64_t value, const int offset) + { + uint2 result; + if(offset >= 32) + { + asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.x) : "r"(__double2loint(__longlong_as_double(value))), "r"(__double2hiint(__longlong_as_double(value))), "r"(offset)); + asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.y) : "r"(__double2hiint(__longlong_as_double(value))), "r"(__double2loint(__longlong_as_double(value))), "r"(offset)); + } + else + { + asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.x) : "r"(__double2hiint(__longlong_as_double(value))), "r"(__double2loint(__longlong_as_double(value))), "r"(offset)); + asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.y) : "r"(__double2loint(__longlong_as_double(value))), "r"(__double2hiint(__longlong_as_double(value))), "r"(offset)); + } + return __double_as_longlong(__hiloint2double(result.y, result.x)); + } + #define rotl64_1(x, y) (cuda_rotl64((x), (y))) +#else + #define rotl64_1(x, y) ((x) << (y) | ((x) >> (64 - (y)))) +#endif + +#define rotl64_2(x, y) rotl64_1(((x) >> 32) | ((x) << 32), (y)) +#define bitselect(a, b, c) ((a) ^ ((c) & ((b) ^ (a)))) + +__device__ __forceinline__ void cn_keccakf2(uint64_t *s) +{ + uint8_t i; + + for(i = 0; i < 24; ++i) + { + uint64_t bc[5], tmpxor[5], tmp1, tmp2; + + tmpxor[0] = s[0] ^ s[5] ^ s[10] ^ s[15] ^ s[20]; + tmpxor[1] = s[1] ^ s[6] ^ s[11] ^ s[16] ^ s[21]; + tmpxor[2] = s[2] ^ s[7] ^ s[12] ^ s[17] ^ s[22]; + tmpxor[3] = s[3] ^ s[8] ^ s[13] ^ s[18] ^ s[23]; + tmpxor[4] = s[4] ^ s[9] ^ s[14] ^ s[19] ^ s[24]; + + bc[0] = tmpxor[0] ^ rotl64_1(tmpxor[2], 1); + bc[1] = tmpxor[1] ^ rotl64_1(tmpxor[3], 1); + bc[2] = tmpxor[2] ^ rotl64_1(tmpxor[4], 1); + bc[3] = tmpxor[3] ^ rotl64_1(tmpxor[0], 1); + bc[4] = tmpxor[4] ^ rotl64_1(tmpxor[1], 1); + + tmp1 = s[1] ^ bc[0]; + + s[0] ^= bc[4]; + s[1] = rotl64_2(s[6] ^ bc[0], 12); + s[6] = rotl64_1(s[9] ^ bc[3], 20); + s[9] = rotl64_2(s[22] ^ bc[1], 29); + s[22] = rotl64_2(s[14] ^ bc[3], 7); + s[14] = rotl64_1(s[20] ^ bc[4], 18); + s[20] = rotl64_2(s[2] ^ bc[1], 30); + s[2] = rotl64_2(s[12] ^ bc[1], 11); + s[12] = rotl64_1(s[13] ^ bc[2], 25); + s[13] = rotl64_1(s[19] ^ bc[3], 8); + s[19] = rotl64_2(s[23] ^ bc[2], 24); + s[23] = rotl64_2(s[15] ^ bc[4], 9); + s[15] = rotl64_1(s[4] ^ bc[3], 27); + s[4] = rotl64_1(s[24] ^ bc[3], 14); + s[24] = rotl64_1(s[21] ^ bc[0], 2); + s[21] = rotl64_2(s[8] ^ bc[2], 23); + s[8] = rotl64_2(s[16] ^ bc[0], 13); + s[16] = rotl64_2(s[5] ^ bc[4], 4); + s[5] = rotl64_1(s[3] ^ bc[2], 28); + s[3] = rotl64_1(s[18] ^ bc[2], 21); + s[18] = rotl64_1(s[17] ^ bc[1], 15); + s[17] = rotl64_1(s[11] ^ bc[0], 10); + s[11] = rotl64_1(s[7] ^ bc[1], 6); + s[7] = rotl64_1(s[10] ^ bc[4], 3); + s[10] = rotl64_1(tmp1, 1); + + tmp1 = s[0]; tmp2 = s[1]; s[0] = bitselect(s[0] ^ s[2], s[0], s[1]); s[1] = bitselect(s[1] ^ s[3], s[1], s[2]); s[2] = bitselect(s[2] ^ s[4], s[2], s[3]); s[3] = bitselect(s[3] ^ tmp1, s[3], s[4]); s[4] = bitselect(s[4] ^ tmp2, s[4], tmp1); + tmp1 = s[5]; tmp2 = s[6]; s[5] = bitselect(s[5] ^ s[7], s[5], s[6]); s[6] = bitselect(s[6] ^ s[8], s[6], s[7]); s[7] = bitselect(s[7] ^ s[9], s[7], s[8]); s[8] = bitselect(s[8] ^ tmp1, s[8], s[9]); s[9] = bitselect(s[9] ^ tmp2, s[9], tmp1); + tmp1 = s[10]; tmp2 = s[11]; s[10] = bitselect(s[10] ^ s[12], s[10], s[11]); s[11] = bitselect(s[11] ^ s[13], s[11], s[12]); s[12] = bitselect(s[12] ^ s[14], s[12], s[13]); s[13] = bitselect(s[13] ^ tmp1, s[13], s[14]); s[14] = bitselect(s[14] ^ tmp2, s[14], tmp1); + tmp1 = s[15]; tmp2 = s[16]; s[15] = bitselect(s[15] ^ s[17], s[15], s[16]); s[16] = bitselect(s[16] ^ s[18], s[16], s[17]); s[17] = bitselect(s[17] ^ s[19], s[17], s[18]); s[18] = bitselect(s[18] ^ tmp1, s[18], s[19]); s[19] = bitselect(s[19] ^ tmp2, s[19], tmp1); + tmp1 = s[20]; tmp2 = s[21]; s[20] = bitselect(s[20] ^ s[22], s[20], s[21]); s[21] = bitselect(s[21] ^ s[23], s[21], s[22]); s[22] = bitselect(s[22] ^ s[24], s[22], s[23]); s[23] = bitselect(s[23] ^ tmp1, s[23], s[24]); s[24] = bitselect(s[24] ^ tmp2, s[24], tmp1); + s[0] ^= keccakf_rndc[i]; + } +} + +__device__ __forceinline__ void cn_keccakf(uint64_t *s) +{ + uint64_t bc[5], tmpxor[5], tmp1, tmp2; + + tmpxor[0] = s[0] ^ s[5]; + tmpxor[1] = s[1] ^ s[6] ^ 0x8000000000000000ULL; + tmpxor[2] = s[2] ^ s[7]; + tmpxor[3] = s[3] ^ s[8]; + tmpxor[4] = s[4] ^ s[9]; + + bc[0] = tmpxor[0] ^ rotl64_1(tmpxor[2], 1); + bc[1] = tmpxor[1] ^ rotl64_1(tmpxor[3], 1); + bc[2] = tmpxor[2] ^ rotl64_1(tmpxor[4], 1); + bc[3] = tmpxor[3] ^ rotl64_1(tmpxor[0], 1); + bc[4] = tmpxor[4] ^ rotl64_1(tmpxor[1], 1); + + tmp1 = s[1] ^ bc[0]; + + s[0] ^= bc[4]; + s[1] = rotl64_2(s[6] ^ bc[0], 12); + s[6] = rotl64_1(s[9] ^ bc[3], 20); + s[9] = rotl64_2(bc[1], 29); + s[22] = rotl64_2(bc[3], 7); + s[14] = rotl64_1(bc[4], 18); + s[20] = rotl64_2(s[2] ^ bc[1], 30); + s[2] = rotl64_2(bc[1], 11); + s[12] = rotl64_1(bc[2], 25); + s[13] = rotl64_1(bc[3], 8); + s[19] = rotl64_2(bc[2], 24); + s[23] = rotl64_2(bc[4], 9); + s[15] = rotl64_1(s[4] ^ bc[3], 27); + s[4] = rotl64_1(bc[3], 14); + s[24] = rotl64_1(bc[0], 2); + s[21] = rotl64_2(s[8] ^ bc[2], 23); + s[8] = rotl64_2(0x8000000000000000ULL ^ bc[0], 13); + s[16] = rotl64_2(s[5] ^ bc[4], 4); + s[5] = rotl64_1(s[3] ^ bc[2], 28); + s[3] = rotl64_1(bc[2], 21); + s[18] = rotl64_1(bc[1], 15); + s[17] = rotl64_1(bc[0], 10); + s[11] = rotl64_1(s[7] ^ bc[1], 6); + s[7] = rotl64_1(bc[4], 3); + s[10] = rotl64_1(tmp1, 1); + + tmp1 = s[0]; tmp2 = s[1]; s[0] = bitselect(s[0] ^ s[2], s[0], s[1]); s[1] = bitselect(s[1] ^ s[3], s[1], s[2]); s[2] = bitselect(s[2] ^ s[4], s[2], s[3]); s[3] = bitselect(s[3] ^ tmp1, s[3], s[4]); s[4] = bitselect(s[4] ^ tmp2, s[4], tmp1); + tmp1 = s[5]; tmp2 = s[6]; s[5] = bitselect(s[5] ^ s[7], s[5], s[6]); s[6] = bitselect(s[6] ^ s[8], s[6], s[7]); s[7] = bitselect(s[7] ^ s[9], s[7], s[8]); s[8] = bitselect(s[8] ^ tmp1, s[8], s[9]); s[9] = bitselect(s[9] ^ tmp2, s[9], tmp1); + tmp1 = s[10]; tmp2 = s[11]; s[10] = bitselect(s[10] ^ s[12], s[10], s[11]); s[11] = bitselect(s[11] ^ s[13], s[11], s[12]); s[12] = bitselect(s[12] ^ s[14], s[12], s[13]); s[13] = bitselect(s[13] ^ tmp1, s[13], s[14]); s[14] = bitselect(s[14] ^ tmp2, s[14], tmp1); + tmp1 = s[15]; tmp2 = s[16]; s[15] = bitselect(s[15] ^ s[17], s[15], s[16]); s[16] = bitselect(s[16] ^ s[18], s[16], s[17]); s[17] = bitselect(s[17] ^ s[19], s[17], s[18]); s[18] = bitselect(s[18] ^ tmp1, s[18], s[19]); s[19] = bitselect(s[19] ^ tmp2, s[19], tmp1); + tmp1 = s[20]; tmp2 = s[21]; s[20] = bitselect(s[20] ^ s[22], s[20], s[21]); s[21] = bitselect(s[21] ^ s[23], s[21], s[22]); s[22] = bitselect(s[22] ^ s[24], s[22], s[23]); s[23] = bitselect(s[23] ^ tmp1, s[23], s[24]); s[24] = bitselect(s[24] ^ tmp2, s[24], tmp1); + s[0] ^= 0x0000000000000001; + + for(int i = 1; i < 24; ++i) + { + tmpxor[0] = s[0] ^ s[5] ^ s[10] ^ s[15] ^ s[20]; + tmpxor[1] = s[1] ^ s[6] ^ s[11] ^ s[16] ^ s[21]; + tmpxor[2] = s[2] ^ s[7] ^ s[12] ^ s[17] ^ s[22]; + tmpxor[3] = s[3] ^ s[8] ^ s[13] ^ s[18] ^ s[23]; + tmpxor[4] = s[4] ^ s[9] ^ s[14] ^ s[19] ^ s[24]; + + bc[0] = tmpxor[0] ^ rotl64_1(tmpxor[2], 1); + bc[1] = tmpxor[1] ^ rotl64_1(tmpxor[3], 1); + bc[2] = tmpxor[2] ^ rotl64_1(tmpxor[4], 1); + bc[3] = tmpxor[3] ^ rotl64_1(tmpxor[0], 1); + bc[4] = tmpxor[4] ^ rotl64_1(tmpxor[1], 1); + + tmp1 = s[1] ^ bc[0]; + + s[0] ^= bc[4]; + s[1] = rotl64_2(s[6] ^ bc[0], 12); + s[6] = rotl64_1(s[9] ^ bc[3], 20); + s[9] = rotl64_2(s[22] ^ bc[1], 29); + s[22] = rotl64_2(s[14] ^ bc[3], 7); + s[14] = rotl64_1(s[20] ^ bc[4], 18); + s[20] = rotl64_2(s[2] ^ bc[1], 30); + s[2] = rotl64_2(s[12] ^ bc[1], 11); + s[12] = rotl64_1(s[13] ^ bc[2], 25); + s[13] = rotl64_1(s[19] ^ bc[3], 8); + s[19] = rotl64_2(s[23] ^ bc[2], 24); + s[23] = rotl64_2(s[15] ^ bc[4], 9); + s[15] = rotl64_1(s[4] ^ bc[3], 27); + s[4] = rotl64_1(s[24] ^ bc[3], 14); + s[24] = rotl64_1(s[21] ^ bc[0], 2); + s[21] = rotl64_2(s[8] ^ bc[2], 23); + s[8] = rotl64_2(s[16] ^ bc[0], 13); + s[16] = rotl64_2(s[5] ^ bc[4], 4); + s[5] = rotl64_1(s[3] ^ bc[2], 28); + s[3] = rotl64_1(s[18] ^ bc[2], 21); + s[18] = rotl64_1(s[17] ^ bc[1], 15); + s[17] = rotl64_1(s[11] ^ bc[0], 10); + s[11] = rotl64_1(s[7] ^ bc[1], 6); + s[7] = rotl64_1(s[10] ^ bc[4], 3); + s[10] = rotl64_1(tmp1, 1); + + tmp1 = s[0]; tmp2 = s[1]; s[0] = bitselect(s[0] ^ s[2], s[0], s[1]); s[1] = bitselect(s[1] ^ s[3], s[1], s[2]); s[2] = bitselect(s[2] ^ s[4], s[2], s[3]); s[3] = bitselect(s[3] ^ tmp1, s[3], s[4]); s[4] = bitselect(s[4] ^ tmp2, s[4], tmp1); + tmp1 = s[5]; tmp2 = s[6]; s[5] = bitselect(s[5] ^ s[7], s[5], s[6]); s[6] = bitselect(s[6] ^ s[8], s[6], s[7]); s[7] = bitselect(s[7] ^ s[9], s[7], s[8]); s[8] = bitselect(s[8] ^ tmp1, s[8], s[9]); s[9] = bitselect(s[9] ^ tmp2, s[9], tmp1); + tmp1 = s[10]; tmp2 = s[11]; s[10] = bitselect(s[10] ^ s[12], s[10], s[11]); s[11] = bitselect(s[11] ^ s[13], s[11], s[12]); s[12] = bitselect(s[12] ^ s[14], s[12], s[13]); s[13] = bitselect(s[13] ^ tmp1, s[13], s[14]); s[14] = bitselect(s[14] ^ tmp2, s[14], tmp1); + tmp1 = s[15]; tmp2 = s[16]; s[15] = bitselect(s[15] ^ s[17], s[15], s[16]); s[16] = bitselect(s[16] ^ s[18], s[16], s[17]); s[17] = bitselect(s[17] ^ s[19], s[17], s[18]); s[18] = bitselect(s[18] ^ tmp1, s[18], s[19]); s[19] = bitselect(s[19] ^ tmp2, s[19], tmp1); + tmp1 = s[20]; tmp2 = s[21]; s[20] = bitselect(s[20] ^ s[22], s[20], s[21]); s[21] = bitselect(s[21] ^ s[23], s[21], s[22]); s[22] = bitselect(s[22] ^ s[24], s[22], s[23]); s[23] = bitselect(s[23] ^ tmp1, s[23], s[24]); s[24] = bitselect(s[24] ^ tmp2, s[24], tmp1); + s[0] ^= keccakf_rndc[i]; + } +} + +__device__ __forceinline__ void cn_keccak(const uint8_t * __restrict__ in, uint32_t len, uint8_t * __restrict__ md) +{ + uint64_t st[25]; + + MEMSET8(st + 8, 0x00, 25 - 8); + memcpy(st, in, len); + ((uint8_t*)st)[len] = 0x01; + st[16] = 0x8000000000000000ULL; + + cn_keccakf(st); + + MEMCPY8(md, st, 25); + return; +} diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_skein.hpp b/xmrstak/backend/nvidia/nvcc_code/cuda_skein.hpp new file mode 100644 index 0000000..041a593 --- /dev/null +++ b/xmrstak/backend/nvidia/nvcc_code/cuda_skein.hpp @@ -0,0 +1,347 @@ +#pragma once + +typedef unsigned int uint_t; /* native unsigned integer */ + +#define SKEIN_MODIFIER_WORDS ( 2) /* number of modifier (tweak) words */ + +#define SKEIN_256_STATE_WORDS ( 4) +#define SKEIN_512_STATE_WORDS ( 8) +#define SKEIN1024_STATE_WORDS (16) + +#define SKEIN_256_STATE_BYTES ( 8*SKEIN_256_STATE_WORDS) +#define SKEIN_512_STATE_BYTES ( 8*SKEIN_512_STATE_WORDS) +#define SKEIN1024_STATE_BYTES ( 8*SKEIN1024_STATE_WORDS) + +#define SKEIN_256_STATE_BITS (64*SKEIN_256_STATE_WORDS) +#define SKEIN_512_STATE_BITS (64*SKEIN_512_STATE_WORDS) +#define SKEIN1024_STATE_BITS (64*SKEIN1024_STATE_WORDS) + +#define SKEIN_256_BLOCK_BYTES ( 8*SKEIN_256_STATE_WORDS) +#define SKEIN_512_BLOCK_BYTES ( 8*SKEIN_512_STATE_WORDS) +#define SKEIN1024_BLOCK_BYTES ( 8*SKEIN1024_STATE_WORDS) + +#define SKEIN_MK_64(hi32,lo32) ((lo32) + (((uint64_t) (hi32)) << 32)) +#define SKEIN_KS_PARITY SKEIN_MK_64(0x1BD11BDA,0xA9FC1A22) + +#define SKEIN_T1_BIT(BIT) ((BIT) - 64) /* offset 64 because it's the second word */ + +#define SKEIN_T1_POS_FIRST SKEIN_T1_BIT(126) /* bits 126 : first block flag */ +#define SKEIN_T1_POS_BIT_PAD SKEIN_T1_BIT(119) /* bit 119 : partial final input byte */ +#define SKEIN_T1_POS_FINAL SKEIN_T1_BIT(127) /* bit 127 : final block flag */ +#define SKEIN_T1_POS_BLK_TYPE SKEIN_T1_BIT(120) /* bits 120..125: type field */ + +#define SKEIN_T1_FLAG_FIRST (((uint64_t) 1 ) << SKEIN_T1_POS_FIRST) +#define SKEIN_T1_FLAG_BIT_PAD (((uint64_t) 1 ) << SKEIN_T1_POS_BIT_PAD) +#define SKEIN_T1_FLAG_FINAL (((uint64_t) 1 ) << SKEIN_T1_POS_FINAL) + +#define SKEIN_BLK_TYPE_MSG (48) /* message processing */ +#define SKEIN_BLK_TYPE_OUT (63) /* output stage */ + +#define SKEIN_T1_BLK_TYPE(T) (((uint64_t) (SKEIN_BLK_TYPE_##T)) << SKEIN_T1_POS_BLK_TYPE) + +#define SKEIN_T1_BLK_TYPE_MSG SKEIN_T1_BLK_TYPE(MSG) /* message processing */ +#define SKEIN_T1_BLK_TYPE_OUT SKEIN_T1_BLK_TYPE(OUT) /* output stage */ + +#define SKEIN_T1_BLK_TYPE_OUT_FINAL (SKEIN_T1_BLK_TYPE_OUT | SKEIN_T1_FLAG_FINAL) + +#define Skein_Set_Tweak(ctxPtr,TWK_NUM,tVal) {(ctxPtr)->h.T[TWK_NUM] = (tVal);} + +#define Skein_Set_T0(ctxPtr,T0) Skein_Set_Tweak(ctxPtr,0,T0) +#define Skein_Set_T1(ctxPtr,T1) Skein_Set_Tweak(ctxPtr,1,T1) + +#define Skein_Set_T0_T1(ctxPtr,T0,T1) { \ + Skein_Set_T0(ctxPtr,(T0)); \ + Skein_Set_T1(ctxPtr,(T1)); } + +#define Skein_Start_New_Type(ctxPtr,BLK_TYPE) \ +{ Skein_Set_T0_T1(ctxPtr,0,SKEIN_T1_FLAG_FIRST | SKEIN_T1_BLK_TYPE_##BLK_TYPE); (ctxPtr)->h.bCnt=0; } + +#define Skein_Set_Bit_Pad_Flag(hdr) { (hdr).T[1] |= SKEIN_T1_FLAG_BIT_PAD; } + +#define KW_TWK_BASE (0) +#define KW_KEY_BASE (3) +#define ks (kw + KW_KEY_BASE) +#define ts (kw + KW_TWK_BASE) + +#define R512(p0,p1,p2,p3,p4,p5,p6,p7,R512ROT,rNum) \ + X##p0 += X##p1; X##p1 = ROTL64(X##p1,R512ROT##_0); X##p1 ^= X##p0; \ + X##p2 += X##p3; X##p3 = ROTL64(X##p3,R512ROT##_1); X##p3 ^= X##p2; \ + X##p4 += X##p5; X##p5 = ROTL64(X##p5,R512ROT##_2); X##p5 ^= X##p4; \ + X##p6 += X##p7; X##p7 = ROTL64(X##p7,R512ROT##_3); X##p7 ^= X##p6; + +#define I512(R) \ + X0 += ks[((R)+1) % 9]; \ + X1 += ks[((R)+2) % 9]; \ + X2 += ks[((R)+3) % 9]; \ + X3 += ks[((R)+4) % 9]; \ + X4 += ks[((R)+5) % 9]; \ + X5 += ks[((R)+6) % 9] + ts[((R)+1) % 3]; \ + X6 += ks[((R)+7) % 9] + ts[((R)+2) % 3]; \ + X7 += ks[((R)+8) % 9] + (R)+1; + + +#define R512_8_rounds(R) \ + R512(0,1,2,3,4,5,6,7,R_512_0,8*(R)+ 1); \ + R512(2,1,4,7,6,5,0,3,R_512_1,8*(R)+ 2); \ + R512(4,1,6,3,0,5,2,7,R_512_2,8*(R)+ 3); \ + R512(6,1,0,7,2,5,4,3,R_512_3,8*(R)+ 4); \ + I512(2*(R)); \ + R512(0,1,2,3,4,5,6,7,R_512_4,8*(R)+ 5); \ + R512(2,1,4,7,6,5,0,3,R_512_5,8*(R)+ 6); \ + R512(4,1,6,3,0,5,2,7,R_512_6,8*(R)+ 7); \ + R512(6,1,0,7,2,5,4,3,R_512_7,8*(R)+ 8); \ + I512(2*(R)+1); + +typedef struct +{ + size_t hashBitLen; + size_t bCnt; + uint64_t T[SKEIN_MODIFIER_WORDS]; +} Skein_Ctxt_Hdr_t; + +typedef struct { + Skein_Ctxt_Hdr_t h; + uint64_t X[SKEIN_256_STATE_WORDS]; + uint8_t b[SKEIN_256_BLOCK_BYTES]; +} Skein_256_Ctxt_t; + +typedef struct { + Skein_Ctxt_Hdr_t h; + uint64_t X[SKEIN_512_STATE_WORDS]; + uint8_t b[SKEIN_512_BLOCK_BYTES]; +} Skein_512_Ctxt_t; + +typedef struct { + Skein_Ctxt_Hdr_t h; + uint64_t X[SKEIN1024_STATE_WORDS]; + uint8_t b[SKEIN1024_BLOCK_BYTES]; +} Skein1024_Ctxt_t; + +typedef struct { + uint_t statebits; + union { + Skein_Ctxt_Hdr_t h; + Skein_256_Ctxt_t ctx_256; + Skein_512_Ctxt_t ctx_512; + Skein1024_Ctxt_t ctx1024; + } u; +} skeinHashState; + +__device__ void cn_skein_init(skeinHashState *state, size_t hashBitLen) +{ + const uint64_t SKEIN_512_IV_256[] = + { + SKEIN_MK_64(0xCCD044A1,0x2FDB3E13), + SKEIN_MK_64(0xE8359030,0x1A79A9EB), + SKEIN_MK_64(0x55AEA061,0x4F816E6F), + SKEIN_MK_64(0x2A2767A4,0xAE9B94DB), + SKEIN_MK_64(0xEC06025E,0x74DD7683), + SKEIN_MK_64(0xE7A436CD,0xC4746251), + SKEIN_MK_64(0xC36FBAF9,0x393AD185), + SKEIN_MK_64(0x3EEDBA18,0x33EDFC13) + }; + + Skein_512_Ctxt_t *ctx = &state->u.ctx_512; + + ctx->h.hashBitLen = hashBitLen; + + memcpy(ctx->X, SKEIN_512_IV_256, sizeof(ctx->X)); + + Skein_Start_New_Type(ctx, MSG); +} + +__device__ void cn_skein512_processblock(Skein_512_Ctxt_t * __restrict__ ctx, const uint8_t * __restrict__ blkPtr, size_t blkCnt, size_t byteCntAdd) +{ + enum { + R_512_0_0=46, R_512_0_1=36, R_512_0_2=19, R_512_0_3=37, + R_512_1_0=33, R_512_1_1=27, R_512_1_2=14, R_512_1_3=42, + R_512_2_0=17, R_512_2_1=49, R_512_2_2=36, R_512_2_3=39, + R_512_3_0=44, R_512_3_1= 9, R_512_3_2=54, R_512_3_3=56, + R_512_4_0=39, R_512_4_1=30, R_512_4_2=34, R_512_4_3=24, + R_512_5_0=13, R_512_5_1=50, R_512_5_2=10, R_512_5_3=17, + R_512_6_0=25, R_512_6_1=29, R_512_6_2=39, R_512_6_3=43, + R_512_7_0= 8, R_512_7_1=35, R_512_7_2=56, R_512_7_3=22 + }; + + uint64_t X0,X1,X2,X3,X4,X5,X6,X7; + uint64_t w[SKEIN_512_STATE_WORDS]; + uint64_t kw[SKEIN_512_STATE_WORDS+4]; + + ts[0] = ctx->h.T[0]; + ts[1] = ctx->h.T[1]; + + do + { + + ts[0] += byteCntAdd; + + ks[0] = ctx->X[0]; + ks[1] = ctx->X[1]; + ks[2] = ctx->X[2]; + ks[3] = ctx->X[3]; + ks[4] = ctx->X[4]; + ks[5] = ctx->X[5]; + ks[6] = ctx->X[6]; + ks[7] = ctx->X[7]; + ks[8] = ks[0] ^ ks[1] ^ ks[2] ^ ks[3] ^ + ks[4] ^ ks[5] ^ ks[6] ^ ks[7] ^ SKEIN_KS_PARITY; + + ts[2] = ts[0] ^ ts[1]; + + memcpy(w, blkPtr, SKEIN_512_STATE_WORDS << 3); + + X0 = w[0] + ks[0]; + X1 = w[1] + ks[1]; + X2 = w[2] + ks[2]; + X3 = w[3] + ks[3]; + X4 = w[4] + ks[4]; + X5 = w[5] + ks[5] + ts[0]; + X6 = w[6] + ks[6] + ts[1]; + X7 = w[7] + ks[7]; + + blkPtr += SKEIN_512_BLOCK_BYTES; + + R512_8_rounds( 0); + R512_8_rounds( 1); + R512_8_rounds( 2); + R512_8_rounds( 3); + R512_8_rounds( 4); + R512_8_rounds( 5); + R512_8_rounds( 6); + R512_8_rounds( 7); + R512_8_rounds( 8); + + ctx->X[0] = X0 ^ w[0]; + ctx->X[1] = X1 ^ w[1]; + ctx->X[2] = X2 ^ w[2]; + ctx->X[3] = X3 ^ w[3]; + ctx->X[4] = X4 ^ w[4]; + ctx->X[5] = X5 ^ w[5]; + ctx->X[6] = X6 ^ w[6]; + ctx->X[7] = X7 ^ w[7]; + + ts[1] &= ~SKEIN_T1_FLAG_FIRST; + } + while (--blkCnt); + + ctx->h.T[0] = ts[0]; + ctx->h.T[1] = ts[1]; +} + +__device__ void cn_skein_final(skeinHashState * __restrict__ state, uint8_t * __restrict__ hashVal) +{ + size_t i,n,byteCnt; + uint64_t X[SKEIN_512_STATE_WORDS]; + Skein_512_Ctxt_t *ctx = (Skein_512_Ctxt_t *)&state->u.ctx_512; + //size_t tmp; + //uint8_t *p8; + //uint64_t *p64; + + ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; + + if (ctx->h.bCnt < SKEIN_512_BLOCK_BYTES) + { + memset(&ctx->b[ctx->h.bCnt],0,SKEIN_512_BLOCK_BYTES - ctx->h.bCnt); + //p8 = &ctx->b[ctx->h.bCnt]; + //tmp = SKEIN_512_BLOCK_BYTES - ctx->h.bCnt; + //for( i = 0; i < tmp; i++ ) *(p8+i) = 0; + } + + cn_skein512_processblock(ctx,ctx->b,1,ctx->h.bCnt); + + byteCnt = (ctx->h.hashBitLen + 7) >> 3; + + //uint8_t b[SKEIN_512_BLOCK_BYTES] == 64 + memset(ctx->b,0,sizeof(ctx->b)); + //p64 = (uint64_t *)ctx->b; + //for( i = 0; i < 8; i++ ) *(p64+i) = 0; + + memcpy(X,ctx->X,sizeof(X)); + + for (i=0;i*SKEIN_512_BLOCK_BYTES < byteCnt;i++) + { + ((uint64_t *)ctx->b)[0]= (uint64_t)i; + Skein_Start_New_Type(ctx,OUT_FINAL); + cn_skein512_processblock(ctx,ctx->b,1,sizeof(uint64_t)); + n = byteCnt - i*SKEIN_512_BLOCK_BYTES; + if (n >= SKEIN_512_BLOCK_BYTES) + n = SKEIN_512_BLOCK_BYTES; + memcpy(hashVal+i*SKEIN_512_BLOCK_BYTES,ctx->X,n); + memcpy(ctx->X,X,sizeof(X)); /* restore the counter mode key for next time */ + } +} + +__device__ void cn_skein512_update(Skein_512_Ctxt_t * __restrict__ ctx, const uint8_t * __restrict__ msg, size_t msgByteCnt) +{ + size_t n; + + if (msgByteCnt + ctx->h.bCnt > SKEIN_512_BLOCK_BYTES) + { + + if (ctx->h.bCnt) + { + + n = SKEIN_512_BLOCK_BYTES - ctx->h.bCnt; + + if (n) + { + memcpy(&ctx->b[ctx->h.bCnt],msg,n); + msgByteCnt -= n; + msg += n; + ctx->h.bCnt += n; + } + + cn_skein512_processblock(ctx,ctx->b,1,SKEIN_512_BLOCK_BYTES); + ctx->h.bCnt = 0; + } + + if (msgByteCnt > SKEIN_512_BLOCK_BYTES) + { + n = (msgByteCnt-1) / SKEIN_512_BLOCK_BYTES; + cn_skein512_processblock(ctx,msg,n,SKEIN_512_BLOCK_BYTES); + msgByteCnt -= n * SKEIN_512_BLOCK_BYTES; + msg += n * SKEIN_512_BLOCK_BYTES; + } + } + + if (msgByteCnt) + { + memcpy(&ctx->b[ctx->h.bCnt],msg,msgByteCnt); + ctx->h.bCnt += msgByteCnt; + } +} + +__device__ void cn_skein_update(skeinHashState * __restrict__ state, const BitSequence * __restrict__ data, DataLength databitlen) +{ + if ((databitlen & 7) == 0) + { + cn_skein512_update(&state->u.ctx_512,data,databitlen >> 3); + } + else + { + + size_t bCnt = (databitlen >> 3) + 1; + uint8_t b,mask; + + mask = (uint8_t) (1u << (7 - (databitlen & 7))); + b = (uint8_t) ((data[bCnt-1] & (0-mask)) | mask); + + cn_skein512_update(&state->u.ctx_512,data,bCnt-1); + cn_skein512_update(&state->u.ctx_512,&b , 1 ); + + Skein_Set_Bit_Pad_Flag(state->u.h); + } +} + +__device__ void cn_skein(const BitSequence * __restrict__ data, DataLength len, BitSequence * __restrict__ hashval) +{ + int hashbitlen = 256; + DataLength databitlen = len << 3; + skeinHashState state; + + state.statebits = 64*SKEIN_512_STATE_WORDS; + + cn_skein_init(&state, hashbitlen); + cn_skein_update(&state, data, databitlen); + cn_skein_final(&state, hashval); +} diff --git a/xmrstak/backend/plugin.hpp b/xmrstak/backend/plugin.hpp new file mode 100644 index 0000000..9ba9716 --- /dev/null +++ b/xmrstak/backend/plugin.hpp @@ -0,0 +1,96 @@ +#pragma once +#include <thread> +#include <atomic> +#include <vector> +#include <string> +#include "IBackend.hpp" +#include <iostream> +#include "../Environment.hpp" + +#ifndef USE_PRECOMPILED_HEADERS +#ifdef WIN32 +#include <direct.h> +#include <windows.h> +#else +#include <sys/types.h> +#include <dlfcn.h> +#endif +#include <iostream> +#endif + +namespace xmrstak +{ + +struct Plugin +{ + + Plugin(const std::string backendName, const std::string libName) : fn_starterBackend(nullptr), m_backendName(backendName) + { +#ifdef WIN32 + libBackend = LoadLibrary(TEXT((libName + ".dll").c_str())); + if(!libBackend) + { + std::cerr << "WARNING: "<< m_backendName <<" cannot load backend library: " << (libName + ".dll") << std::endl; + return; + } +#else + libBackend = dlopen((std::string("./lib") + libName + ".so").c_str(), RTLD_LAZY); + if(!libBackend) + { + std::cerr << "WARNING: "<< m_backendName <<" cannot load backend library: " << dlerror() << std::endl; + return; + } +#endif + +#ifdef WIN32 + fn_starterBackend = (starterBackend_t) GetProcAddress(libBackend, "xmrstak_start_backend"); + if (!fn_starterBackend) + { + std::cerr << "WARNING: backend plugin " << libName << " contains no entry 'xmrstak_start_backend': " <<GetLastError()<< std::endl; + } +#else + // reset last error + dlerror(); + fn_starterBackend = (starterBackend_t) dlsym(libBackend, "xmrstak_start_backend"); + const char* dlsym_error = dlerror(); + if(dlsym_error) + { + std::cerr << "WARNING: backend plugin " << libName << " contains no entry 'xmrstak_start_backend': " << dlsym_error << std::endl; + } +#endif + } + + std::vector<IBackend*>* startBackend(uint32_t threadOffset, miner_work& pWork, Environment& env) + { + if(fn_starterBackend == nullptr) + { + std::vector<IBackend*>* pvThreads = new std::vector<IBackend*>(); + std::cerr << "WARNING: " << m_backendName << " Backend disabled"<< std::endl; + return pvThreads; + } + + return fn_starterBackend(threadOffset, pWork, env); + } + + std::string m_backendName; + + typedef std::vector<IBackend*>* (*starterBackend_t)(uint32_t threadOffset, miner_work& pWork, Environment& env); + + starterBackend_t fn_starterBackend; + +#ifdef WIN32 + HINSTANCE libBackend; +#else + void *libBackend; +#endif + +/* \todo add unload to destructor and change usage of Plugin that libs keeped open until the miner endss +#ifdef WIN32 + FreeLibrary(libBackend); +#else + dlclose(libBackend); +#endif + * */ +}; + +} // namepsace xmrstak diff --git a/xmrstak/cli/cli-miner.cpp b/xmrstak/cli/cli-miner.cpp new file mode 100644 index 0000000..f2c5c1d --- /dev/null +++ b/xmrstak/cli/cli-miner.cpp @@ -0,0 +1,321 @@ + /* + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * Additional permission under GNU GPL version 3 section 7 + * + * If you modify this Program, or any covered work, by linking or combining + * it with OpenSSL (or a modified version of that library), containing parts + * covered by the terms of OpenSSL License and SSLeay License, the licensors + * of this Program grant you additional permission to convey the resulting work. + * + */ + +#include "../executor.h" +#include "../backend/miner_work.h" +#include "../backend/GlobalStates.hpp" +#include "../backend/BackendConnector.hpp" +#include "../jconf.h" +#include "../console.h" +#include "../donate-level.h" +#include "../Params.hpp" +#include "../ConfigEditor.hpp" + +#include "../version.h" + +#ifndef CONF_NO_HTTPD +# include "../httpd.h" +#endif + +#include <stdlib.h> +#include <stdio.h> +#include <string> +#include <iostream> + +#include <time.h> + +#ifndef CONF_NO_TLS +#include <openssl/ssl.h> +#include <openssl/err.h> +#endif + + +#ifdef _WIN32 +# define strcasecmp _stricmp +#endif // _WIN32 + +void do_benchmark(); + +int main(int argc, char *argv[]) +{ +#ifndef CONF_NO_TLS + SSL_library_init(); + SSL_load_error_strings(); + ERR_load_BIO_strings(); + ERR_load_crypto_strings(); + SSL_load_error_strings(); + OpenSSL_add_all_digests(); +#endif + + srand(time(0)); + + using namespace xmrstak; + + std::string pathWithName(argv[0]); + auto pos = pathWithName.rfind("/"); + if(pos == std::string::npos) + { + // try windows "\" + pos = pathWithName.rfind("\\"); + } + + Params::inst().executablePrefix = std::string(pathWithName, 0, pos); + + for(int i = 1; i < argc; ++i) + { + std::string opName(argv[i]); + if(opName.compare("-h") == 0 || opName.compare("--help") == 0) + { + printer::inst()->print_msg(L0, "Usage: %s [--help|-h] [--benchmark] [-c CONFIGFILE] [CONFIG FILE]", argv[0]); + win_exit(); + return 0; + } + else if(opName.compare("--noCPU") == 0) + { + Params::inst().useCPU = false; + } + else if(opName.compare("--noAMD") == 0) + { + Params::inst().useAMD = false; + } + else if(opName.compare("--noAMD") == 0) + { + Params::inst().useNVIDIA = false; + } + else if(opName.compare("--cpu") == 0) + { + ++i; + if( i >=argc ) + { + printer::inst()->print_msg(L0, "No argument for opName '--cpu' given"); + win_exit(); + return 1; + } + Params::inst().configFileCPU = argv[i]; + } + else if(opName.compare("--amd") == 0) + { + ++i; + if( i >=argc ) + { + printer::inst()->print_msg(L0, "No argument for opName '--amd' given"); + win_exit(); + return 1; + } + Params::inst().configFileAMD = argv[i]; + } + else if(opName.compare("--nvidia") == 0) + { + ++i; + if( i >=argc ) + { + printer::inst()->print_msg(L0, "No argument for opName '--nvidia' given"); + win_exit(); + return 1; + } + Params::inst().configFileNVIDIA = argv[i]; + } + else if(opName.compare("-o") == 0 || opName.compare("--url") == 0) + { + ++i; + if( i >=argc ) + { + printer::inst()->print_msg(L0, "No argument for opName '-o/--url' given"); + win_exit(); + return 1; + } + Params::inst().poolURL = argv[i]; + } + else if(opName.compare("-u") == 0 || opName.compare("--user") == 0) + { + ++i; + if( i >=argc ) + { + printer::inst()->print_msg(L0, "No argument for opName '-u/--user' given"); + win_exit(); + return 1; + } + Params::inst().poolUsername = argv[i]; + } + else if(opName.compare("-p") == 0 || opName.compare("--pass") == 0) + { + ++i; + if( i >=argc ) + { + printer::inst()->print_msg(L0, "No argument for opName '-p/--pass' given"); + win_exit(); + return 1; + } + Params::inst().poolPasswd = argv[i]; + } + else + Params::inst().configFile = argv[i]; + } + + // check if we need a guided start + if(!ConfigEditor::file_exist(Params::inst().configFile)) + { + // load the template of the backend config into a char variable + const char *tpl = + #include "../config.tpl" + ; + ConfigEditor configTpl{}; + configTpl.set(std::string(tpl)); + auto& pool = Params::inst().poolURL; + if(pool.empty()) + { + std::cout<<"Please enter:\n- pool address: e.g. pool.usxmrpool.com:3333"<<std::endl; + std::cin >> pool; + } + auto& userName = Params::inst().poolUsername; + if(userName.empty()) + { + std::cout<<"- user name (wallet address or pool login):"<<std::endl; + std::cin >> userName; + } + auto& passwd = Params::inst().poolPasswd; + if(passwd.empty()) + { + // clear everything from stdin to allow an empty password + std::cin.clear(); std::cin.ignore(INT_MAX,'\n'); + std::cout<<"- password (mostly empty or x):"<<std::endl; + getline(std::cin, passwd); + } + configTpl.replace("POOLURL", pool); + configTpl.replace("POOLUSER", userName); + configTpl.replace("POOLPASSWD", passwd); + configTpl.write(Params::inst().configFile); + std::cout<<"Configuration stored in file '"<<Params::inst().configFile<<"'"<<std::endl; + } + + if(!jconf::inst()->parse_config(Params::inst().configFile.c_str())) + { + win_exit(); + return 0; + } + + if (!BackendConnector::self_test()) + { + win_exit(); + return 0; + } + +#ifndef CONF_NO_HTTPD + if(jconf::inst()->GetHttpdPort() != 0) + { + if (!httpd::inst()->start_daemon()) + { + win_exit(); + return 0; + } + } +#endif + + printer::inst()->print_str("-------------------------------------------------------------------\n"); + printer::inst()->print_str( XMR_STAK_NAME" " XMR_STAK_VERSION " mining software.\n"); + printer::inst()->print_str("Based on CPU mining code by wolf9466 (heavily optimized by fireice_uk).\n"); +#ifndef CONF_NO_CUDA + printer::inst()->print_str("NVIDIA mining code was written by KlausT and psychocrypt.\n"); +#endif +#ifndef CONF_NO_OPENCL + printer::inst()->print_str("AMD mining code was written by wolf9466.\n"); +#endif + printer::inst()->print_str("Brought to you by fireice_uk and psychocrypt under GPLv3.\n\n"); + char buffer[64]; + snprintf(buffer, sizeof(buffer), "Configurable dev donation level is set to %.1f %%\n\n", fDevDonationLevel * 100.0); + printer::inst()->print_str(buffer); + printer::inst()->print_str("You can use following keys to display reports:\n"); + printer::inst()->print_str("'h' - hashrate\n"); + printer::inst()->print_str("'r' - results\n"); + printer::inst()->print_str("'c' - connection\n"); + printer::inst()->print_str("-------------------------------------------------------------------\n"); + + if(strlen(jconf::inst()->GetOutputFile()) != 0) + printer::inst()->open_logfile(jconf::inst()->GetOutputFile()); + + executor::inst()->ex_start(jconf::inst()->DaemonMode()); + + using namespace std::chrono; + uint64_t lastTime = time_point_cast<milliseconds>(high_resolution_clock::now()).time_since_epoch().count(); + + int key; + while(true) + { + key = get_key(); + + switch(key) + { + case 'h': + executor::inst()->push_event(ex_event(EV_USR_HASHRATE)); + break; + case 'r': + executor::inst()->push_event(ex_event(EV_USR_RESULTS)); + break; + case 'c': + executor::inst()->push_event(ex_event(EV_USR_CONNSTAT)); + break; + default: + break; + } + + uint64_t currentTime = time_point_cast<milliseconds>(high_resolution_clock::now()).time_since_epoch().count(); + + /* Hard guard to make sure we never get called more than twice per second */ + if( currentTime - lastTime < 500) + std::this_thread::sleep_for(std::chrono::milliseconds(500 - (currentTime - lastTime))); + lastTime = currentTime; + } + + return 0; +} + +void do_benchmark() +{ + using namespace std::chrono; + std::vector<xmrstak::IBackend*>* pvThreads; + + printer::inst()->print_msg(L0, "Running a 60 second benchmark..."); + + uint8_t work[76] = {0}; + xmrstak::miner_work oWork = xmrstak::miner_work("", work, sizeof(work), 0, 0, 0); + pvThreads = xmrstak::BackendConnector::thread_starter(oWork); + + uint64_t iStartStamp = time_point_cast<milliseconds>(high_resolution_clock::now()).time_since_epoch().count(); + + std::this_thread::sleep_for(std::chrono::seconds(60)); + + oWork = xmrstak::miner_work(); + xmrstak::GlobalStates::inst().switch_work(oWork); + + double fTotalHps = 0.0; + for (uint32_t i = 0; i < pvThreads->size(); i++) + { + double fHps = pvThreads->at(i)->iHashCount; + fHps /= (pvThreads->at(i)->iTimestamp - iStartStamp) / 1000.0; + + printer::inst()->print_msg(L0, "Thread %u: %.1f H/S", i, fHps); + fTotalHps += fHps; + } + + printer::inst()->print_msg(L0, "Total: %.1f H/S", fTotalHps); +} diff --git a/xmrstak/config.tpl b/xmrstak/config.tpl new file mode 100644 index 0000000..60d85cd --- /dev/null +++ b/xmrstak/config.tpl @@ -0,0 +1,161 @@ +R"===( +/* + * pool_address - Pool address should be in the form "pool.supportxmr.com:3333". Only stratum pools are supported. + * wallet_address - Your wallet, or pool login. + * pool_password - Can be empty in most cases or "x". + * + * We feature pools up to 1MH/s. For a more complete list see M5M400's pool list at www.moneropools.com + */ +"pool_address" : "POOLURL", +"wallet_address" : "POOLUSER", +"pool_password" : "POOLPASSWD", + +/* + * Network timeouts. + * Because of the way this client is written it doesn't need to constantly talk (keep-alive) to the server to make + * sure it is there. We detect a buggy / overloaded server by the call timeout. The default values will be ok for + * nearly all cases. If they aren't the pool has most likely overload issues. Low call timeout values are preferable - + * long timeouts mean that we waste hashes on potentially stale jobs. Connection report will tell you how long the + * server usually takes to process our calls. + * + * call_timeout - How long should we wait for a response from the server before we assume it is dead and drop the connection. + * retry_time - How long should we wait before another connection attempt. + * Both values are in seconds. + * giveup_limit - Limit how many times we try to reconnect to the pool. Zero means no limit. Note that stak miners + * don't mine while the connection is lost, so your computer's power usage goes down to idle. + */ +"call_timeout" : 10, +"retry_time" : 10, +"giveup_limit" : 0, + +/* + * Output control. + * Since most people are used to miners printing all the time, that's what we do by default too. This is suboptimal + * really, since you cannot see errors under pages and pages of text and performance stats. Given that we have internal + * performance monitors, there is very little reason to spew out pages of text instead of concise reports. + * Press 'h' (hashrate), 'r' (results) or 'c' (connection) to print reports. + * + * verbose_level - 0 - Don't print anything. + * 1 - Print intro, connection event, disconnect event + * 2 - All of level 1, and new job (block) event if the difficulty is different from the last job + * 3 - All of level 1, and new job (block) event in all cases, result submission event. + * 4 - All of level 3, and automatic hashrate report printing + */ +"verbose_level" : 3, + +/* + * Automatic hashrate report + * + * h_print_time - How often, in seconds, should we print a hashrate report if verbose_level is set to 4. + * This option has no effect if verbose_level is not 4. + */ +"h_print_time" : 60, + +/* + * Manual hardware AES override + * + * Some VMs don't report AES capability correctly. You can set this value to true to enforce hardware AES or + * to false to force disable AES or null to let the miner decide if AES is used. + * + * WARNING: setting this to true on a CPU that doesn't support hardware AES will crash the miner. + */ +"aes_override" : null, + +/* + * LARGE PAGE SUPPORT + * Large pages need a properly set up OS. It can be difficult if you are not used to systems administration, + * but the performance results are worth the trouble - you will get around 20% boost. Slow memory mode is + * meant as a backup, you won't get stellar results there. If you are running into trouble, especially + * on Windows, please read the common issues in the README. + * + * By default we will try to allocate large pages. This means you need to "Run As Administrator" on Windows. + * You need to edit your system's group policies to enable locking large pages. Here are the steps from MSDN + * + * 1. On the Start menu, click Run. In the Open box, type gpedit.msc. + * 2. On the Local Group Policy Editor console, expand Computer Configuration, and then expand Windows Settings. + * 3. Expand Security Settings, and then expand Local Policies. + * 4. Select the User Rights Assignment folder. + * 5. The policies will be displayed in the details pane. + * 6. In the pane, double-click Lock pages in memory. + * 7. In the Local Security Setting – Lock pages in memory dialog box, click Add User or Group. + * 8. In the Select Users, Service Accounts, or Groups dialog box, add an account that you will run the miner on + * 9. Reboot for change to take effect. + * + * Windows also tends to fragment memory a lot. If you are running on a system with 4-8GB of RAM you might need + * to switch off all the auto-start applications and reboot to have a large enough chunk of contiguous memory. + * + * On Linux you will need to configure large page support "sudo sysctl -w vm.nr_hugepages=128" and increase your + * ulimit -l. To do do this you need to add following lines to /etc/security/limits.conf - "* soft memlock 262144" + * and "* hard memlock 262144". You can also do it Windows-style and simply run-as-root, but this is NOT + * recommended for security reasons. + * + * Memory locking means that the kernel can't swap out the page to disk - something that is unlikely to happen on a + * command line system that isn't starved of memory. I haven't observed any difference on a CLI Linux system between + * locked and unlocked memory. If that is your setup see option "no_mlck". + */ + +/* + * use_slow_memory defines our behavior with regards to large pages. There are three possible options here: + * always - Don't even try to use large pages. Always use slow memory. + * warn - We will try to use large pages, but fall back to slow memory if that fails. + * no_mlck - This option is only relevant on Linux, where we can use large pages without locking memory. + * It will never use slow memory, but it won't attempt to mlock + * never - If we fail to allocate large pages we will print an error and exit. + */ +"use_slow_memory" : "warn", + +/* + * NiceHash mode + * nicehash_nonce - Limit the nonce to 3 bytes as required by nicehash. This cuts all the safety margins, and + * if a block isn't found within 30 minutes then you might run into nonce collisions. Number + * of threads in this mode is hard-limited to 32. + */ +"nicehash_nonce" : false, + +/* + * TLS Settings + * If you need real security, make sure tls_secure_algo is enabled (otherwise MITM attack can downgrade encryption + * to trivially breakable stuff like DES and MD5), and verify the server's fingerprint through a trusted channel. + * + * use_tls - This option will make us connect using Transport Layer Security. + * tls_secure_algo - Use only secure algorithms. This will make us quit with an error if we can't negotiate a secure algo. + * tls_fingerprint - Server's SHA256 fingerprint. If this string is non-empty then we will check the server's cert against it. + */ +"use_tls" : false, +"tls_secure_algo" : true, +"tls_fingerprint" : "", + +/* + * Daemon mode + * + * If you are running the process in the background and you don't need the keyboard reports, set this to true. + * This should solve the hashrate problems on some emulated terminals. + */ +"daemon_mode" : false, + +/* + * Output file + * + * output_file - This option will log all output to a file. + * + */ +"output_file" : "", + +/* + * Built-in web server + * I like checking my hashrate on my phone. Don't you? + * Keep in mind that you will need to set up port forwarding on your router if you want to access it from + * outside of your home network. Ports lower than 1024 on Linux systems will require root. + * + * httpd_port - Port we should listen on. Default, 0, will switch off the server. + */ +"httpd_port" : 0, + +/* + * prefer_ipv4 - IPv6 preference. If the host is available on both IPv4 and IPv6 net, which one should be choose? + * This setting will only be needed in 2020's. No need to worry about it now. + */ +"prefer_ipv4" : true, + +)===" +
\ No newline at end of file diff --git a/xmrstak/donate-level.hpp b/xmrstak/donate-level.hpp new file mode 100644 index 0000000..71b7962 --- /dev/null +++ b/xmrstak/donate-level.hpp @@ -0,0 +1,18 @@ +#pragma once + +/* + * Dev donation. + * Percentage of your hashing power that you want to donate to the developer, can be 0.0 if you don't want to do that. + * Example of how it works for the default setting of 2.0: + * You miner will mine into your usual pool for 98 minutes, then switch to the developer's pool for 2.0 minute. + * Switching is instant, and only happens after a successful connection, so you never loose any hashes. + * + * If you plan on changing this setting to 0.0 please consider making a one off donation to our wallets: + * fireice-uk: + * 4581HhZkQHgZrZjKeCfCJxZff9E3xCgHGF25zABZz7oR71TnbbgiS7sK9jveE6Dx6uMs2LwszDuvQJgRZQotdpHt1fTdDhk + * psychocrypt: + * 43NoJVEXo21hGZ6tDG6Z3g4qimiGdJPE6GRxAmiWwm26gwr62Lqo7zRiCJFSBmbkwTGNuuES9ES5TgaVHceuYc4Y75txCTU + * + */ + +constexpr double fDevDonationLevel = 2.0 / 100.0; diff --git a/xmrstak/http/httpd.cpp b/xmrstak/http/httpd.cpp new file mode 100644 index 0000000..53b73f1 --- /dev/null +++ b/xmrstak/http/httpd.cpp @@ -0,0 +1,153 @@ +/* + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * Additional permission under GNU GPL version 3 section 7 + * + * If you modify this Program, or any covered work, by linking or combining + * it with OpenSSL (or a modified version of that library), containing parts + * covered by the terms of OpenSSL License and SSLeay License, the licensors + * of this Program grant you additional permission to convey the resulting work. + * + */ + +#ifndef CONF_NO_HTTPD + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <string> + +#include "msgstruct.h" +#include "httpd.h" +#include "console.h" +#include "executor.h" +#include "jconf.h" + +#include "webdesign.h" + +#include <microhttpd.h> +#ifdef _WIN32 +#define strcasecmp _stricmp +#endif // _WIN32 + +httpd* httpd::oInst = nullptr; + +httpd::httpd() +{ + +} + +int httpd::req_handler(void * cls, + MHD_Connection* connection, + const char* url, + const char* method, + const char* version, + const char* upload_data, + size_t* upload_data_size, + void ** ptr) +{ + struct MHD_Response * rsp; + + if (strcmp(method, "GET") != 0) + return MHD_NO; + + *ptr = nullptr; + + std::string str; + if(strcasecmp(url, "/style.css") == 0) + { + const char* req_etag = MHD_lookup_connection_value(connection, MHD_HEADER_KIND, "If-None-Match"); + + if(req_etag != NULL && strcmp(req_etag, sHtmlCssEtag) == 0) + { //Cache hit + rsp = MHD_create_response_from_buffer(0, nullptr, MHD_RESPMEM_PERSISTENT); + + int ret = MHD_queue_response(connection, MHD_HTTP_NOT_MODIFIED, rsp); + MHD_destroy_response(rsp); + return ret; + } + + rsp = MHD_create_response_from_buffer(sHtmlCssSize, (void*)sHtmlCssFile, MHD_RESPMEM_PERSISTENT); + MHD_add_response_header(rsp, "ETag", sHtmlCssEtag); + MHD_add_response_header(rsp, "Content-Type", "text/css; charset=utf-8"); + } + else if(strcasecmp(url, "/api.json") == 0) + { + executor::inst()->get_http_report(EV_HTML_JSON, str); + + rsp = MHD_create_response_from_buffer(str.size(), (void*)str.c_str(), MHD_RESPMEM_MUST_COPY); + MHD_add_response_header(rsp, "Content-Type", "application/json; charset=utf-8"); + } + else if(strcasecmp(url, "/h") == 0 || strcasecmp(url, "/hashrate") == 0) + { + executor::inst()->get_http_report(EV_HTML_HASHRATE, str); + + rsp = MHD_create_response_from_buffer(str.size(), (void*)str.c_str(), MHD_RESPMEM_MUST_COPY); + MHD_add_response_header(rsp, "Content-Type", "text/html; charset=utf-8"); + } + else if(strcasecmp(url, "/c") == 0 || strcasecmp(url, "/connection") == 0) + { + executor::inst()->get_http_report(EV_HTML_CONNSTAT, str); + + rsp = MHD_create_response_from_buffer(str.size(), (void*)str.c_str(), MHD_RESPMEM_MUST_COPY); + MHD_add_response_header(rsp, "Content-Type", "text/html; charset=utf-8"); + } + else if(strcasecmp(url, "/r") == 0 || strcasecmp(url, "/results") == 0) + { + executor::inst()->get_http_report(EV_HTML_RESULTS, str); + + rsp = MHD_create_response_from_buffer(str.size(), (void*)str.c_str(), MHD_RESPMEM_MUST_COPY); + MHD_add_response_header(rsp, "Content-Type", "text/html; charset=utf-8"); + } + else + { + //Do a 302 redirect to /h + char loc_path[256]; + const char* host_val = MHD_lookup_connection_value(connection, MHD_HEADER_KIND, "Host"); + + if(host_val != nullptr) + snprintf(loc_path, sizeof(loc_path), "http://%s/h", host_val); + else + snprintf(loc_path, sizeof(loc_path), "/h"); + + rsp = MHD_create_response_from_buffer(0, nullptr, MHD_RESPMEM_PERSISTENT); + int ret = MHD_queue_response(connection, MHD_HTTP_TEMPORARY_REDIRECT, rsp); + MHD_add_response_header(rsp, "Location", loc_path); + MHD_destroy_response(rsp); + return ret; + } + + int ret = MHD_queue_response(connection, MHD_HTTP_OK, rsp); + MHD_destroy_response(rsp); + return ret; +} + +bool httpd::start_daemon() +{ + d = MHD_start_daemon(MHD_USE_THREAD_PER_CONNECTION, + jconf::inst()->GetHttpdPort(), NULL, NULL, + &httpd::req_handler, + NULL, MHD_OPTION_END); + + if(d == nullptr) + { + printer::inst()->print_str("HTTP Daemon failed to start."); + return false; + } + + return true; +} + +#endif + diff --git a/xmrstak/http/httpd.hpp b/xmrstak/http/httpd.hpp new file mode 100644 index 0000000..bc8bcf6 --- /dev/null +++ b/xmrstak/http/httpd.hpp @@ -0,0 +1,31 @@ +#pragma once + +struct MHD_Daemon; +struct MHD_Connection; + +class httpd +{ +public: + static httpd* inst() + { + if (oInst == nullptr) oInst = new httpd; + return oInst; + }; + + bool start_daemon(); + +private: + httpd(); + static httpd* oInst; + + static int req_handler(void * cls, + MHD_Connection* connection, + const char* url, + const char* method, + const char* version, + const char* upload_data, + size_t* upload_data_size, + void ** ptr); + + MHD_Daemon *d; +}; diff --git a/xmrstak/http/webdesign.cpp b/xmrstak/http/webdesign.cpp new file mode 100644 index 0000000..4dfd3c2 --- /dev/null +++ b/xmrstak/http/webdesign.cpp @@ -0,0 +1,207 @@ +#include <stdlib.h> + +extern const char sHtmlCssEtag [] = "00000006"; +extern const char sHtmlCssFile [] = + "body {" + "font-family: Tahoma, Arial, sans-serif;" + "font-size: 80%;" + "background-color: rgb(240, 240, 240);" + "}" + + "a {" + "color: rgb(44, 55, 66);" + "}" + + "a:link {" + "text-decoration: none;" + "}" + + "a:visited {" + "color: rgb(44, 55, 66);" + "}" + + "a:hover {" + "color: rgb(255, 153, 0);" + "}" + + "a:active {" + "color: rgb(204, 122, 0);" + "}" + + ".all {" + "max-width:600px;" + "margin: auto;" + "}" + + ".header {" + "background-color: rgb(30, 30, 30);" + "color: white;" + "padding: 10px;" + "font-weight: bold;" + "margin: 10px 0px;" + "}" + + ".links {" + "padding: 7px;" + "text-align: center;" + "background-color: rgb(215, 215, 215);" + "box-shadow: 0px 1px 3px 0px rgba(0, 0, 0, 0.2), 0px 1px 1px 0px rgba(0, 0, 0, 0.14), 0px 2px 1px -1px rgba(0, 0, 0, 0.12);" + "}" + + ".data th, td {" + "padding: 5px 12px;" + "text-align: right;" + "border-bottom: 1px solid #ccc;" + "}" + + ".data tr:nth-child(even) {" + "background-color: #ddd;" + "}" + + ".data th {" + "background-color: #ccc;" + "}" + + ".data table {" + "width: 100%;" + "max-width: 600px;" + "}" + + ".letter {" + "font-weight: bold;" + "}" + + "h4 {" + "background-color: rgb(0, 130, 130);" + "color: white;" + "padding: 10px;" + "margin: 10px 0px;" + "}" + + ".flex-container {" + "display: -webkit-flex;" + "display: flex;" + "}" + + ".flex-item {" + "width: 33%;" + "margin: 3px;" + "}"; + +size_t sHtmlCssSize = sizeof(sHtmlCssFile) - 1; + +extern const char sHtmlCommonHeader [] = + "<!DOCTYPE html>" + "<html>" + "<head><meta name='viewport' content='width=device-width' />" + "<link rel='stylesheet' href='style.css' /><title>%s</title></head>" + "<body>" + "<div class='all'>" + "<div class='header'><span style='color: rgb(255, 160, 0)'>XMR</span>-Stak-CPU</div>" + + "<div class='flex-container'>" + "<div class='links flex-item'>" + "<a href='/h'><div><span class='letter'>H</span>ashrate</div></a>" + "</div>" + "<div class='links flex-item'>" + "<a href='/r'><div><span class='letter'>R</span>esults</div></a>" + "</div>" + "<div class='links flex-item'>" + "<a href='/c'><div><span class='letter'>C</span>onnection</div></a>" + "</div>" + "</div>" + "<h4>%s</h4>"; + +extern const char sHtmlHashrateBodyHigh [] = + "<div class=data>" + "<table>" + "<tr><th>Thread ID</th><th>10s</th><th>60s</th><th>15m</th><th rowspan='%u'>H/s</td></tr>"; + +extern const char sHtmlHashrateTableRow [] = + "<tr><th>%u</th><td>%s</td><td>%s</td><td>%s</td></tr>"; + +extern const char sHtmlHashrateBodyLow [] = + "<tr><th>Totals:</th><td>%s</td><td>%s</td><td>%s</td></tr>" + "<tr><th>Highest:</th><td>%s</td><td colspan='2'></td></tr>" + "</table>" + "</div></div></body></html>"; + +extern const char sHtmlConnectionBodyHigh [] = + "<div class=data>" + "<table>" + "<tr><th>Pool address</th><td>%s</td></tr>" + "<tr><th>Connected since</th><td>%s</td></tr>" + "<tr><th>Pool ping time</th><td>%u ms</td></tr>" + "</table>" + "<h4>Network error log</h4>" + "<table>" + "<tr><th style='width: 20%; min-width: 10em;'>Date</th><th>Error</th></tr>"; + +extern const char sHtmlConnectionTableRow [] = + "<tr><td>%s</td><td>%s</td></tr>"; + +extern const char sHtmlConnectionBodyLow [] = + "</table></div></div></body></html>"; + +extern const char sHtmlResultBodyHigh [] = + "<div class=data>" + "<table>" + "<tr><th>Difficulty</th><td>%u</td></tr>" + "<tr><th>Good results</th><td>%u / %u (%.1f %%)</td></tr>" + "<tr><th>Avg result time</th><td>%.1f sec</td></tr>" + "<tr><th>Pool-side hashes</th><td>%u</td></tr>" + "</table>" + "<h4>Top 10 best results found</h4>" + "<table>" + "<tr><th style='width: 2em;'>1</th><td>%llu</td><th style='width: 2em;'>2</th><td>%llu</td></tr>" + "<tr><th>3</th><td>%llu</td><th>4</th><td>%llu</td></tr>" + "<tr><th>5</th><td>%llu</td><th>6</th><td>%llu</td></tr>" + "<tr><th>7</th><td>%llu</td><th>8</th><td>%llu</td></tr>" + "<tr><th>9</th><td>%llu</td><th>10</th><td>%llu</td></tr>" + "</table>" + "<h4>Error details</h4>" + "<table>" + "<tr><th colspan='2'>Error text</th></tr>" + "<tr><th style='width: 5em;'>Count</th><th>Last seen</th></tr>"; + +extern const char sHtmlResultTableRow [] = + "<tr><td colspan='2'>%s</td></tr><tr><td>%llu</td><td>%s</td></tr>"; + +extern const char sHtmlResultBodyLow[] = + "</table></div></div></body></html>"; + +extern const char sJsonApiThdHashrate[] = + "[%s,%s,%s]"; + +extern const char sJsonApiResultError[] = + "{\"count\":%llu,\"last_seen\":%llu,\"text\":\"%s\"}"; + +extern const char sJsonApiConnectionError[] = + "{\"last_seen\":%llu,\"text\":\"%s\"}"; + +extern const char sJsonApiFormat [] = +"{" + "\"hashrate\":{" + "\"threads\":[%s]," + "\"total\":%s," + "\"highest\":%s" + "}," + + "\"results\":{" + "\"diff_current\":%llu," + "\"shares_good\":%llu," + "\"shares_total\":%llu," + "\"avg_time\":%.1f," + "\"hashes_total\":%llu," + "\"best\":[%llu,%llu,%llu,%llu,%llu,%llu,%llu,%llu,%llu,%llu]," + "\"error_log\":[%s]" + "}," + + "\"connection\":{" + "\"pool\": \"%s\"," + "\"uptime\":%llu," + "\"ping\":%llu," + "\"error_log\":[%s]" + "}" +"}"; + diff --git a/xmrstak/http/webdesign.hpp b/xmrstak/http/webdesign.hpp new file mode 100644 index 0000000..92639a0 --- /dev/null +++ b/xmrstak/http/webdesign.hpp @@ -0,0 +1,24 @@ +#pragma once + +extern const char sHtmlCssEtag[]; +extern const char sHtmlCssFile[]; +extern size_t sHtmlCssSize; + +extern const char sHtmlCommonHeader[]; + +extern const char sHtmlHashrateBodyHigh[]; +extern const char sHtmlHashrateTableRow[]; +extern const char sHtmlHashrateBodyLow[]; + +extern const char sHtmlConnectionBodyHigh[]; +extern const char sHtmlConnectionTableRow[]; +extern const char sHtmlConnectionBodyLow[]; + +extern const char sHtmlResultBodyHigh[]; +extern const char sHtmlResultTableRow[]; +extern const char sHtmlResultBodyLow[]; + +extern const char sJsonApiThdHashrate[]; +extern const char sJsonApiResultError[]; +extern const char sJsonApiConnectionError[]; +extern const char sJsonApiFormat[]; diff --git a/xmrstak/jconf.cpp b/xmrstak/jconf.cpp new file mode 100644 index 0000000..bfa2626 --- /dev/null +++ b/xmrstak/jconf.cpp @@ -0,0 +1,412 @@ +/* + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * Additional permission under GNU GPL version 3 section 7 + * + * If you modify this Program, or any covered work, by linking or combining + * it with OpenSSL (or a modified version of that library), containing parts + * covered by the terms of OpenSSL License and SSLeay License, the licensors + * of this Program grant you additional permission to convey the resulting work. + * + */ + +#include "jconf.h" +#include "console.h" + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#ifdef _WIN32 +#define strcasecmp _stricmp +#include <intrin.h> +#else +#include <cpuid.h> +#endif + +#include "rapidjson/document.h" +#include "rapidjson/error/en.h" +#include "jext.h" +#include "console.h" +#include "Params.hpp" + +using namespace rapidjson; + +/* + * This enum needs to match index in oConfigValues, otherwise we will get a runtime error + */ +enum configEnum { + bTlsMode, bTlsSecureAlgo, sTlsFingerprint, sPoolAddr, sWalletAddr, sPoolPwd, + iCallTimeout, iNetRetry, iGiveUpLimit, iVerboseLevel, iAutohashTime, + bDaemonMode, sOutputFile, iHttpdPort, bPreferIpv4, bNiceHashMode, bAesOverride, sUseSlowMem }; + +struct configVal { + configEnum iName; + const char* sName; + Type iType; +}; + +// Same order as in configEnum, as per comment above +// kNullType means any type +configVal oConfigValues[] = { + { bTlsMode, "use_tls", kTrueType }, + { bTlsSecureAlgo, "tls_secure_algo", kTrueType }, + { sTlsFingerprint, "tls_fingerprint", kStringType }, + { sPoolAddr, "pool_address", kStringType }, + { sWalletAddr, "wallet_address", kStringType }, + { sPoolPwd, "pool_password", kStringType }, + { iCallTimeout, "call_timeout", kNumberType }, + { iNetRetry, "retry_time", kNumberType }, + { iGiveUpLimit, "giveup_limit", kNumberType }, + { iVerboseLevel, "verbose_level", kNumberType }, + { iAutohashTime, "h_print_time", kNumberType }, + { bDaemonMode, "daemon_mode", kTrueType }, + { sOutputFile, "output_file", kStringType }, + { iHttpdPort, "httpd_port", kNumberType }, + { bPreferIpv4, "prefer_ipv4", kTrueType }, + { bNiceHashMode, "nicehash_nonce", kTrueType }, + { bAesOverride, "aes_override", kNullType }, + { sUseSlowMem, "use_slow_memory", kStringType } +}; + +constexpr size_t iConfigCnt = (sizeof(oConfigValues)/sizeof(oConfigValues[0])); + +inline bool checkType(Type have, Type want) +{ + if(want == have) + return true; + else if(want == kNullType) + return true; + else if(want == kTrueType && have == kFalseType) + return true; + else if(want == kFalseType && have == kTrueType) + return true; + else + return false; +} + +struct jconf::opaque_private +{ + Document jsonDoc; + const Value* configValues[iConfigCnt]; //Compile time constant + + opaque_private() + { + } +}; + +jconf::jconf() +{ + prv = new opaque_private(); +} + +bool jconf::GetTlsSetting() +{ + return prv->configValues[bTlsMode]->GetBool(); +} + +bool jconf::TlsSecureAlgos() +{ + return prv->configValues[bTlsSecureAlgo]->GetBool(); +} + +const char* jconf::GetTlsFingerprint() +{ + return prv->configValues[sTlsFingerprint]->GetString(); +} + +const char* jconf::GetPoolAddress() +{ + auto& poolURL = xmrstak::Params::inst().poolURL; + if(poolURL.empty()) + poolURL = prv->configValues[sPoolAddr]->GetString(); + return poolURL.c_str(); +} + +const char* jconf::GetPoolPwd() +{ + auto& poolPasswd = xmrstak::Params::inst().poolPasswd; + if(poolPasswd.empty()) + poolPasswd = prv->configValues[sPoolPwd]->GetString(); + return poolPasswd.c_str(); + +} + +const char* jconf::GetWalletAddress() +{ + auto& poolUsername = xmrstak::Params::inst().poolUsername; + if(poolUsername.empty()) + poolUsername = prv->configValues[sWalletAddr]->GetString(); + return poolUsername.c_str(); +} + +bool jconf::PreferIpv4() +{ + return prv->configValues[bPreferIpv4]->GetBool(); +} + +uint64_t jconf::GetCallTimeout() +{ + return prv->configValues[iCallTimeout]->GetUint64(); +} + +uint64_t jconf::GetNetRetry() +{ + return prv->configValues[iNetRetry]->GetUint64(); +} + +uint64_t jconf::GetGiveUpLimit() +{ + return prv->configValues[iGiveUpLimit]->GetUint64(); +} + +uint64_t jconf::GetVerboseLevel() +{ + return prv->configValues[iVerboseLevel]->GetUint64(); +} + +uint64_t jconf::GetAutohashTime() +{ + return prv->configValues[iAutohashTime]->GetUint64(); +} + +uint16_t jconf::GetHttpdPort() +{ + return prv->configValues[iHttpdPort]->GetUint(); +} + +bool jconf::DaemonMode() +{ + return prv->configValues[bDaemonMode]->GetBool(); +} + +const char* jconf::GetOutputFile() +{ + return prv->configValues[sOutputFile]->GetString(); +} + +bool jconf::NiceHashMode() +{ + return prv->configValues[bNiceHashMode]->GetBool(); +} + + +void jconf::cpuid(uint32_t eax, int32_t ecx, int32_t val[4]) +{ + memset(val, 0, sizeof(int32_t)*4); + +#ifdef _WIN32 + __cpuidex(val, eax, ecx); +#else + __cpuid_count(eax, ecx, val[0], val[1], val[2], val[3]); +#endif +} + +bool jconf::check_cpu_features() +{ + constexpr int AESNI_BIT = 1 << 25; + constexpr int SSE2_BIT = 1 << 26; + int32_t cpu_info[4]; + bool bHaveSse2; + + cpuid(1, 0, cpu_info); + + bHaveAes = (cpu_info[2] & AESNI_BIT) != 0; + bHaveSse2 = (cpu_info[3] & SSE2_BIT) != 0; + + return bHaveSse2; +} + +jconf::slow_mem_cfg jconf::GetSlowMemSetting() +{ + const char* opt = prv->configValues[sUseSlowMem]->GetString(); + + if(strcasecmp(opt, "always") == 0) + return always_use; + else if(strcasecmp(opt, "no_mlck") == 0) + return no_mlck; + else if(strcasecmp(opt, "warn") == 0) + return print_warning; + else if(strcasecmp(opt, "never") == 0) + return never_use; + else + return unknown_value; +} + +bool jconf::parse_config(const char* sFilename) +{ + FILE * pFile; + char * buffer; + size_t flen; + + if(!check_cpu_features()) + { + printer::inst()->print_msg(L0, "CPU support of SSE2 is required."); + return false; + } + + pFile = fopen(sFilename, "rb"); + if (pFile == NULL) + { + printer::inst()->print_msg(L0, "Failed to open config file %s.", sFilename); + return false; + } + + fseek(pFile,0,SEEK_END); + flen = ftell(pFile); + rewind(pFile); + + if(flen >= 64*1024) + { + fclose(pFile); + printer::inst()->print_msg(L0, "Oversized config file - %s.", sFilename); + return false; + } + + if(flen <= 16) + { + fclose(pFile); + printer::inst()->print_msg(L0, "File is empty or too short - %s.", sFilename); + return false; + } + + buffer = (char*)malloc(flen + 3); + if(fread(buffer+1, flen, 1, pFile) != 1) + { + free(buffer); + fclose(pFile); + printer::inst()->print_msg(L0, "Read error while reading %s.", sFilename); + return false; + } + fclose(pFile); + + //Replace Unicode BOM with spaces - we always use UTF-8 + unsigned char* ubuffer = (unsigned char*)buffer; + if(ubuffer[1] == 0xEF && ubuffer[2] == 0xBB && ubuffer[3] == 0xBF) + { + buffer[1] = ' '; + buffer[2] = ' '; + buffer[3] = ' '; + } + + buffer[0] = '{'; + buffer[flen] = '}'; + buffer[flen + 1] = '\0'; + + prv->jsonDoc.Parse<kParseCommentsFlag|kParseTrailingCommasFlag>(buffer, flen+2); + free(buffer); + + if(prv->jsonDoc.HasParseError()) + { + printer::inst()->print_msg(L0, "JSON config parse error(offset %llu): %s", + int_port(prv->jsonDoc.GetErrorOffset()), GetParseError_En(prv->jsonDoc.GetParseError())); + return false; + } + + + if(!prv->jsonDoc.IsObject()) + { //This should never happen as we created the root ourselves + printer::inst()->print_msg(L0, "Invalid config file. No root?\n"); + return false; + } + + for(size_t i = 0; i < iConfigCnt; i++) + { + if(oConfigValues[i].iName != i) + { + printer::inst()->print_msg(L0, "Code error. oConfigValues are not in order."); + return false; + } + + prv->configValues[i] = GetObjectMember(prv->jsonDoc, oConfigValues[i].sName); + + if(prv->configValues[i] == nullptr) + { + printer::inst()->print_msg(L0, "Invalid config file. Missing value \"%s\".", oConfigValues[i].sName); + return false; + } + + if(!checkType(prv->configValues[i]->GetType(), oConfigValues[i].iType)) + { + printer::inst()->print_msg(L0, "Invalid config file. Value \"%s\" has unexpected type.", oConfigValues[i].sName); + return false; + } + } + + if(!prv->configValues[iCallTimeout]->IsUint64() || + !prv->configValues[iNetRetry]->IsUint64() || + !prv->configValues[iGiveUpLimit]->IsUint64()) + { + printer::inst()->print_msg(L0, + "Invalid config file. call_timeout, retry_time and giveup_limit need to be positive integers."); + return false; + } + + if(!prv->configValues[iVerboseLevel]->IsUint64() || !prv->configValues[iAutohashTime]->IsUint64()) + { + printer::inst()->print_msg(L0, + "Invalid config file. verbose_level and h_print_time need to be positive integers."); + return false; + } + + if(!prv->configValues[iHttpdPort]->IsUint() || prv->configValues[iHttpdPort]->GetUint() > 0xFFFF) + { + printer::inst()->print_msg(L0, + "Invalid config file. httpd_port has to be in the range 0 to 65535."); + return false; + } + +#ifdef CONF_NO_TLS + if(prv->configValues[bTlsMode]->GetBool()) + { + printer::inst()->print_msg(L0, + "Invalid config file. TLS enabled while the application has been compiled without TLS support."); + return false; + } +#endif // CONF_NO_TLS + + /* \todo check in the cpu backend if we have more than 32 worker + * keep in mined that we have change the why how the nonce is calculated (reverse thread index) + if(NiceHashMode() && GetThreadCount() >= 32) + { + printer::inst()->print_msg(L0, "You need to use less than 32 threads in NiceHash mode."); + return false; + } + */ + + if(prv->configValues[bAesOverride]->IsBool()) + bHaveAes = prv->configValues[bAesOverride]->GetBool(); + + if(!bHaveAes) + printer::inst()->print_msg(L0, "Your CPU doesn't support hardware AES. Don't expect high hashrates."); + + printer::inst()->set_verbose_level(prv->configValues[iVerboseLevel]->GetUint64()); + + if(GetSlowMemSetting() == unknown_value) + { + printer::inst()->print_msg(L0, + "Invalid config file. use_slow_memory must be \"always\", \"no_mlck\", \"warn\" or \"never\""); + return false; + } + +#ifdef _WIN32 + if(GetSlowMemSetting() == no_mlck) + { + printer::inst()->print_msg(L0, "On Windows large pages need mlock. Please use another option."); + return false; + } +#endif // _WIN32 + + return true; +} diff --git a/xmrstak/jconf.hpp b/xmrstak/jconf.hpp new file mode 100644 index 0000000..934d6f8 --- /dev/null +++ b/xmrstak/jconf.hpp @@ -0,0 +1,74 @@ +#pragma once +#include <stdlib.h> +#include <string> +#include "Environment.hpp" +#include "Params.hpp" + +class jconf +{ +public: + static jconf* inst() + { + auto& env = xmrstak::Environment::inst(); + if(env.pJconfConfig == nullptr) + env.pJconfConfig = new jconf; + return env.pJconfConfig; + }; + + bool parse_config(const char* sFilename = xmrstak::Params::inst().configFile.c_str()); + + struct thd_cfg { + bool bDoubleMode; + bool bNoPrefetch; + long long iCpuAff; + }; + + enum slow_mem_cfg { + always_use, + no_mlck, + print_warning, + never_use, + unknown_value + }; + + bool GetTlsSetting(); + bool TlsSecureAlgos(); + const char* GetTlsFingerprint(); + + const char* GetPoolAddress(); + const char* GetPoolPwd(); + const char* GetWalletAddress(); + + uint64_t GetVerboseLevel(); + uint64_t GetAutohashTime(); + + const char* GetOutputFile(); + + uint64_t GetCallTimeout(); + uint64_t GetNetRetry(); + uint64_t GetGiveUpLimit(); + + uint16_t GetHttpdPort(); + + bool DaemonMode(); + + bool PreferIpv4(); + + + bool NiceHashMode(); + + inline bool HaveHardwareAes() { return bHaveAes; } + + static void cpuid(uint32_t eax, int32_t ecx, int32_t val[4]); + + slow_mem_cfg GetSlowMemSetting(); + +private: + jconf(); + + bool check_cpu_features(); + struct opaque_private; + opaque_private* prv; + + bool bHaveAes; +}; diff --git a/xmrstak/misc/configEditor.hpp b/xmrstak/misc/configEditor.hpp new file mode 100644 index 0000000..80607ff --- /dev/null +++ b/xmrstak/misc/configEditor.hpp @@ -0,0 +1,57 @@ +#pragma once + +#include <atomic> +#include <string> +#include <fstream> +#include <streambuf> +#include <regex> + + +namespace xmrstak +{ + +struct ConfigEditor +{ + std::string m_fileContent; + + ConfigEditor() + { + + } + + static bool file_exist( const std::string filename) + { + std::ifstream fstream(filename); + return fstream.good(); + } + + void set( const std::string && content) + { + m_fileContent = content; + } + + bool load(const std::string filename) + { + std::ifstream fstream(filename); + m_fileContent = std::string( + (std::istreambuf_iterator<char>(fstream)), + std::istreambuf_iterator<char>() + ); + return fstream.good(); + } + + void write(const std::string filename) + { + std::ofstream out(filename); + out << m_fileContent; + out.close(); + } + + void replace(const std::string search, const std::string substring) + { + m_fileContent = std::regex_replace(m_fileContent, std::regex(search), substring); + } + +}; + +} // namepsace xmrstak diff --git a/xmrstak/misc/console.cpp b/xmrstak/misc/console.cpp new file mode 100644 index 0000000..0c73b1d --- /dev/null +++ b/xmrstak/misc/console.cpp @@ -0,0 +1,227 @@ +/* + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * Additional permission under GNU GPL version 3 section 7 + * + * If you modify this Program, or any covered work, by linking or combining + * it with OpenSSL (or a modified version of that library), containing parts + * covered by the terms of OpenSSL License and SSLeay License, the licensors + * of this Program grant you additional permission to convey the resulting work. + * + */ + +#include "console.h" +#include <time.h> +#include <stdio.h> +#include <string.h> +#include <stdarg.h> +#include <cstdlib> + +#ifdef _WIN32 +#include <windows.h> + +int get_key() +{ + DWORD mode, rd; + HANDLE h; + + if ((h = GetStdHandle(STD_INPUT_HANDLE)) == NULL) + return -1; + + GetConsoleMode( h, &mode ); + SetConsoleMode( h, mode & ~(ENABLE_LINE_INPUT | ENABLE_ECHO_INPUT) ); + + int c = 0; + ReadConsole( h, &c, 1, &rd, NULL ); + SetConsoleMode( h, mode ); + + return c; +} + +void set_colour(out_colours cl) +{ + WORD attr = 0; + + switch(cl) + { + case K_RED: + attr = FOREGROUND_RED | FOREGROUND_INTENSITY; + break; + case K_GREEN: + attr = FOREGROUND_GREEN | FOREGROUND_INTENSITY; + break; + case K_BLUE: + attr = FOREGROUND_BLUE | FOREGROUND_INTENSITY; + break; + case K_YELLOW: + attr = FOREGROUND_RED | FOREGROUND_GREEN | FOREGROUND_INTENSITY; + break; + case K_CYAN: + attr = FOREGROUND_BLUE | FOREGROUND_GREEN | FOREGROUND_INTENSITY; + break; + case K_MAGENTA: + attr = FOREGROUND_BLUE | FOREGROUND_RED | FOREGROUND_INTENSITY; + break; + case K_WHITE: + attr = FOREGROUND_BLUE | FOREGROUND_RED | FOREGROUND_GREEN | FOREGROUND_INTENSITY; + break; + default: + break; + } + + SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE), attr); +} + +void reset_colour() +{ + SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE), FOREGROUND_RED | FOREGROUND_GREEN | FOREGROUND_BLUE); +} + +#else +#include <termios.h> +#include <unistd.h> +#include <stdio.h> + +int get_key() +{ + struct termios oldattr, newattr; + int ch; + tcgetattr( STDIN_FILENO, &oldattr ); + newattr = oldattr; + newattr.c_lflag &= ~( ICANON | ECHO ); + tcsetattr( STDIN_FILENO, TCSANOW, &newattr ); + ch = getchar(); + tcsetattr( STDIN_FILENO, TCSANOW, &oldattr ); + return ch; +} + +void set_colour(out_colours cl) +{ + switch(cl) + { + case K_RED: + fputs("\x1B[1;31m", stdout); + break; + case K_GREEN: + fputs("\x1B[1;32m", stdout); + break; + case K_BLUE: + fputs("\x1B[1;34m", stdout); + break; + case K_YELLOW: + fputs("\x1B[1;33m", stdout); + break; + case K_CYAN: + fputs("\x1B[1;36m", stdout); + break; + case K_MAGENTA: + fputs("\x1B[1;35m", stdout); + break; + case K_WHITE: + fputs("\x1B[1;37m", stdout); + break; + default: + break; + } +} + +void reset_colour() +{ + fputs("\x1B[0m", stdout); +} +#endif // _WIN32 + +inline void comp_localtime(const time_t* ctime, tm* stime) +{ +#ifdef _WIN32 + localtime_s(stime, ctime); +#else + localtime_r(ctime, stime); +#endif // __WIN32 +} + +printer::printer() +{ + verbose_level = LINF; + logfile = nullptr; +} + +bool printer::open_logfile(const char* file) +{ + logfile = fopen(file, "ab+"); + return logfile != nullptr; +} + +void printer::print_msg(verbosity verbose, const char* fmt, ...) +{ + if(verbose > verbose_level) + return; + + char buf[1024]; + size_t bpos; + tm stime; + + time_t now = time(nullptr); + comp_localtime(&now, &stime); + strftime(buf, sizeof(buf), "[%F %T] : ", &stime); + bpos = strlen(buf); + + va_list args; + va_start(args, fmt); + vsnprintf(buf+bpos, sizeof(buf)-bpos, fmt, args); + va_end(args); + bpos = strlen(buf); + + if(bpos+2 >= sizeof(buf)) + return; + + buf[bpos] = '\n'; + buf[bpos+1] = '\0'; + + std::unique_lock<std::mutex> lck(print_mutex); + fputs(buf, stdout); + + if(logfile != nullptr) + { + fputs(buf, logfile); + fflush(logfile); + } +} + +void printer::print_str(const char* str) +{ + std::unique_lock<std::mutex> lck(print_mutex); + fputs(str, stdout); + + if(logfile != nullptr) + { + fputs(str, logfile); + fflush(logfile); + } +} + +//Do a press any key for the windows folk. *insert any key joke here* +#ifdef _WIN32 +void win_exit() +{ + printer::inst()->print_str("Press any key to exit."); + get_key(); + std::exit(1); +} + +#else +void win_exit() { + std::exit(1); +} +#endif // _WIN32 diff --git a/xmrstak/misc/console.hpp b/xmrstak/misc/console.hpp new file mode 100644 index 0000000..ac2ed3c --- /dev/null +++ b/xmrstak/misc/console.hpp @@ -0,0 +1,47 @@ +#pragma once +#include <mutex> +#include "Environment.hpp" + +enum out_colours { K_RED, K_GREEN, K_BLUE, K_YELLOW, K_CYAN, K_MAGENTA, K_WHITE, K_NONE }; + +// Warning - on Linux get_key will detect control keys, but not on Windows. +// We will only use it for alphanum keys anyway. +int get_key(); + +void set_colour(out_colours cl); +void reset_colour(); + +// on MSVC sizeof(long int) = 4, gcc sizeof(long int) = 8, this is the workaround +// now we can use %llu on both compilers +inline long long unsigned int int_port(size_t i) +{ + return i; +} + +enum verbosity : size_t { L0 = 0, L1 = 1, L2 = 2, L3 = 3, L4 = 4, LINF = 100}; + +class printer +{ +public: + static inline printer* inst() + { + auto& env = xmrstak::Environment::inst(); + if(env.pPrinter == nullptr) + env.pPrinter = new printer; + return env.pPrinter; + }; + + inline void set_verbose_level(size_t level) { verbose_level = (verbosity)level; } + void print_msg(verbosity verbose, const char* fmt, ...); + void print_str(const char* str); + bool open_logfile(const char* file); + +private: + printer(); + + std::mutex print_mutex; + verbosity verbose_level; + FILE* logfile; +}; + +void win_exit(); diff --git a/xmrstak/misc/environment.hpp b/xmrstak/misc/environment.hpp new file mode 100644 index 0000000..15f8cce --- /dev/null +++ b/xmrstak/misc/environment.hpp @@ -0,0 +1,46 @@ +#pragma once + +class printer; +class jconf; +class executor; + +namespace xmrstak +{ + +class GlobalStates; +class Params; + +struct Environment +{ + + static Environment& inst() + { + static Environment env; + return env; + } + + Environment& operator=(const Environment& env) + { + this->pPrinter = env.pPrinter; + this->pGlobalStates = env.pGlobalStates; + this->pJconfConfig = env.pJconfConfig; + this->pExecutor = env.pExecutor; + this->pParams = env.pParams; + return *this; + } + + + Environment() : pPrinter(nullptr), pGlobalStates(nullptr) + { + } + + + printer* pPrinter; + GlobalStates* pGlobalStates; + jconf* pJconfConfig; + executor* pExecutor; + Params* pParams; + +}; + +} // namepsace xmrstak diff --git a/xmrstak/misc/executor.cpp b/xmrstak/misc/executor.cpp new file mode 100644 index 0000000..64f3d40 --- /dev/null +++ b/xmrstak/misc/executor.cpp @@ -0,0 +1,1005 @@ +/* + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * Additional permission under GNU GPL version 3 section 7 + * + * If you modify this Program, or any covered work, by linking or combining + * it with OpenSSL (or a modified version of that library), containing parts + * covered by the terms of OpenSSL License and SSLeay License, the licensors + * of this Program grant you additional permission to convey the resulting work. + * + */ + +#include <thread> +#include <string> +#include <cmath> +#include <algorithm> +#include <assert.h> +#include <time.h> +#include "executor.h" +#include "jpsock.h" + +#include "telemetry.h" +#include "backend/miner_work.h" +#include "backend/GlobalStates.hpp" +#include "backend/BackendConnector.hpp" + +#include "jconf.h" +#include "console.h" +#include "donate-level.h" +#include "webdesign.h" + +#ifdef _WIN32 +#define strncasecmp _strnicmp +#endif // _WIN32 + +executor::executor() +{ +} + +void executor::push_timed_event(ex_event&& ev, size_t sec) +{ + std::unique_lock<std::mutex> lck(timed_event_mutex); + lTimedEvents.emplace_back(std::move(ev), sec_to_ticks(sec)); +} + +void executor::ex_clock_thd() +{ + size_t iSwitchPeriod = sec_to_ticks(iDevDonatePeriod); + size_t iDevPortion = (size_t)floor(((double)iSwitchPeriod) * fDevDonationLevel); + + //No point in bothering with less than 10 sec + if(iDevPortion < sec_to_ticks(10)) + iDevPortion = 0; + + //Add 2 seconds to compensate for connect + if(iDevPortion != 0) + iDevPortion += sec_to_ticks(2); + + while (true) + { + std::this_thread::sleep_for(std::chrono::milliseconds(size_t(iTickTime))); + + push_event(ex_event(EV_PERF_TICK)); + + // Service timed events + std::unique_lock<std::mutex> lck(timed_event_mutex); + std::list<timed_event>::iterator ev = lTimedEvents.begin(); + while (ev != lTimedEvents.end()) + { + ev->ticks_left--; + if(ev->ticks_left == 0) + { + push_event(std::move(ev->event)); + ev = lTimedEvents.erase(ev); + } + else + ev++; + } + lck.unlock(); + + if(iDevPortion == 0) + continue; + + iSwitchPeriod--; + if(iSwitchPeriod == 0) + { + push_event(ex_event(EV_SWITCH_POOL, usr_pool_id)); + iSwitchPeriod = sec_to_ticks(iDevDonatePeriod); + } + else if(iSwitchPeriod == iDevPortion) + { + push_event(ex_event(EV_SWITCH_POOL, dev_pool_id)); + } + } +} + +void executor::sched_reconnect() +{ + iReconnectAttempts++; + size_t iLimit = jconf::inst()->GetGiveUpLimit(); + if(iLimit != 0 && iReconnectAttempts > iLimit) + { + printer::inst()->print_msg(L0, "Give up limit reached. Exitting."); + exit(0); + } + + long long unsigned int rt = jconf::inst()->GetNetRetry(); + printer::inst()->print_msg(L1, "Pool connection lost. Waiting %lld s before retry (attempt %llu).", + rt, int_port(iReconnectAttempts)); + + auto work = xmrstak::miner_work(); + xmrstak::GlobalStates::inst().switch_work(work); + + push_timed_event(ex_event(EV_RECONNECT, usr_pool_id), rt); +} + +void executor::log_socket_error(std::string&& sError) +{ + vSocketLog.emplace_back(std::move(sError)); + printer::inst()->print_msg(L1, "SOCKET ERROR - %s", vSocketLog.back().msg.c_str()); +} + +void executor::log_result_error(std::string&& sError) +{ + size_t i = 1, ln = vMineResults.size(); + for(; i < ln; i++) + { + if(vMineResults[i].compare(sError)) + { + vMineResults[i].increment(); + break; + } + } + + if(i == ln) //Not found + vMineResults.emplace_back(std::move(sError)); + else + sError.clear(); +} + +void executor::log_result_ok(uint64_t iActualDiff) +{ + iPoolHashes += iPoolDiff; + + size_t ln = iTopDiff.size() - 1; + if(iActualDiff > iTopDiff[ln]) + { + iTopDiff[ln] = iActualDiff; + std::sort(iTopDiff.rbegin(), iTopDiff.rend()); + } + + vMineResults[0].increment(); +} + +jpsock* executor::pick_pool_by_id(size_t pool_id) +{ + assert(pool_id != invalid_pool_id); + + if(pool_id == dev_pool_id) + return dev_pool; + else + return usr_pool; +} + +void executor::on_sock_ready(size_t pool_id) +{ + jpsock* pool = pick_pool_by_id(pool_id); + + if(pool_id == dev_pool_id) + { + if(!pool->cmd_login("", "")) + pool->disconnect(); + + current_pool_id = dev_pool_id; + printer::inst()->print_msg(L1, "Dev pool logged in. Switching work."); + return; + } + + printer::inst()->print_msg(L1, "Connected. Logging in..."); + + if (!pool->cmd_login(jconf::inst()->GetWalletAddress(), jconf::inst()->GetPoolPwd())) + { + if(!pool->have_sock_error()) + { + log_socket_error(pool->get_call_error()); + pool->disconnect(); + } + } + else + { + iReconnectAttempts = 0; + reset_stats(); + } +} + +void executor::on_sock_error(size_t pool_id, std::string&& sError) +{ + jpsock* pool = pick_pool_by_id(pool_id); + + if(pool_id == dev_pool_id) + { + pool->disconnect(); + + if(current_pool_id != dev_pool_id) + return; + + printer::inst()->print_msg(L1, "Dev pool connection error. Switching work."); + on_switch_pool(usr_pool_id); + return; + } + + log_socket_error(std::move(sError)); + pool->disconnect(); + sched_reconnect(); +} + +void executor::on_pool_have_job(size_t pool_id, pool_job& oPoolJob) +{ + if(pool_id != current_pool_id) + return; + + jpsock* pool = pick_pool_by_id(pool_id); + + xmrstak::miner_work oWork(oPoolJob.sJobID, oPoolJob.bWorkBlob, + oPoolJob.iWorkLen, oPoolJob.iResumeCnt, oPoolJob.iTarget, + pool_id); + + oWork.iTarget32 = oPoolJob.iTarget32; + + xmrstak::GlobalStates::inst().switch_work(oWork); + + if(pool_id == dev_pool_id) + return; + + if(iPoolDiff != pool->get_current_diff()) + { + iPoolDiff = pool->get_current_diff(); + printer::inst()->print_msg(L2, "Difficulty changed. Now: %llu.", int_port(iPoolDiff)); + } + + printer::inst()->print_msg(L3, "New block detected."); +} + +void executor::on_miner_result(size_t pool_id, job_result& oResult) +{ + jpsock* pool = pick_pool_by_id(pool_id); + + if(pool_id == dev_pool_id) + { + //Ignore errors silently + if(pool->is_running() && pool->is_logged_in()) + pool->cmd_submit(oResult.sJobID, oResult.iNonce, oResult.bResult); + + return; + } + + if (!pool->is_running() || !pool->is_logged_in()) + { + log_result_error("[NETWORK ERROR]"); + return; + } + + using namespace std::chrono; + size_t t_start = time_point_cast<milliseconds>(high_resolution_clock::now()).time_since_epoch().count(); + bool bResult = pool->cmd_submit(oResult.sJobID, oResult.iNonce, oResult.bResult); + size_t t_len = time_point_cast<milliseconds>(high_resolution_clock::now()).time_since_epoch().count() - t_start; + + if(t_len > 0xFFFF) + t_len = 0xFFFF; + iPoolCallTimes.push_back((uint16_t)t_len); + + if(bResult) + { + uint64_t* targets = (uint64_t*)oResult.bResult; + log_result_ok(jpsock::t64_to_diff(targets[3])); + printer::inst()->print_msg(L3, "Result accepted by the pool."); + } + else + { + if(!pool->have_sock_error()) + { + printer::inst()->print_msg(L3, "Result rejected by the pool."); + + std::string error = pool->get_call_error(); + + if(strncasecmp(error.c_str(), "Unauthenticated", 15) == 0) + { + printer::inst()->print_msg(L2, "Your miner was unable to find a share in time. Either the pool difficulty is too high, or the pool timeout is too low."); + pool->disconnect(); + } + + log_result_error(std::move(error)); + } + else + log_result_error("[NETWORK ERROR]"); + } +} + +void executor::on_reconnect(size_t pool_id) +{ + jpsock* pool = pick_pool_by_id(pool_id); + + std::string error; + if(pool_id == dev_pool_id) + return; + + printer::inst()->print_msg(L1, "Connecting to pool %s ...", jconf::inst()->GetPoolAddress()); + + if(!pool->connect(jconf::inst()->GetPoolAddress(), error)) + { + log_socket_error(std::move(error)); + sched_reconnect(); + } +} + +void executor::on_switch_pool(size_t pool_id) +{ + if(pool_id == current_pool_id) + return; + + jpsock* pool = pick_pool_by_id(pool_id); + if(pool_id == dev_pool_id) + { + std::string error; + + // If it fails, it fails, we carry on on the usr pool + // as we never receive further events + printer::inst()->print_msg(L1, "Connecting to dev pool..."); + const char* dev_pool_addr = jconf::inst()->GetTlsSetting() ? "donate.xmr-stak.net:6666" : "donate.xmr-stak.net:3333"; + if(!pool->connect(dev_pool_addr, error)) + printer::inst()->print_msg(L1, "Error connecting to dev pool. Staying with user pool."); + } + else + { + printer::inst()->print_msg(L1, "Switching back to user pool."); + + current_pool_id = pool_id; + pool_job oPoolJob; + + if(!pool->get_current_job(oPoolJob)) + { + pool->disconnect(); + return; + } + + xmrstak::miner_work oWork(oPoolJob.sJobID, oPoolJob.bWorkBlob, + oPoolJob.iWorkLen, oPoolJob.iResumeCnt, oPoolJob.iTarget, + pool_id); + + oWork.iTarget32 = oPoolJob.iTarget32; + + xmrstak::GlobalStates::inst().switch_work(oWork); + + if(dev_pool->is_running()) + push_timed_event(ex_event(EV_DEV_POOL_EXIT), 5); + } +} + +void executor::ex_main() +{ + assert(1000 % iTickTime == 0); + + xmrstak::miner_work oWork = xmrstak::miner_work(); + + // \todo collect all backend threads + pvThreads = xmrstak::BackendConnector::thread_starter(oWork); + + if(pvThreads->size()==0) + { + printer::inst()->print_msg(L1, "ERROR: No miner backend enabled."); + win_exit(); + } + + telem = new xmrstak::telemetry(pvThreads->size()); + + current_pool_id = usr_pool_id; + usr_pool = new jpsock(usr_pool_id, jconf::inst()->GetTlsSetting()); + dev_pool = new jpsock(dev_pool_id, jconf::inst()->GetTlsSetting()); + + ex_event ev; + std::thread clock_thd(&executor::ex_clock_thd, this); + + //This will connect us to the pool for the first time + push_event(ex_event(EV_RECONNECT, usr_pool_id)); + + // Place the default success result at position 0, it needs to + // be here even if our first result is a failure + vMineResults.emplace_back(); + + // If the user requested it, start the autohash printer + if(jconf::inst()->GetVerboseLevel() >= 4) + push_timed_event(ex_event(EV_HASHRATE_LOOP), jconf::inst()->GetAutohashTime()); + + size_t cnt = 0, i; + while (true) + { + ev = oEventQ.pop(); + switch (ev.iName) + { + case EV_SOCK_READY: + on_sock_ready(ev.iPoolId); + break; + + case EV_SOCK_ERROR: + on_sock_error(ev.iPoolId, std::move(ev.sSocketError)); + break; + + case EV_POOL_HAVE_JOB: + on_pool_have_job(ev.iPoolId, ev.oPoolJob); + break; + + case EV_MINER_HAVE_RESULT: + on_miner_result(ev.iPoolId, ev.oJobResult); + break; + + case EV_RECONNECT: + on_reconnect(ev.iPoolId); + break; + + case EV_SWITCH_POOL: + on_switch_pool(ev.iPoolId); + break; + + case EV_DEV_POOL_EXIT: + dev_pool->disconnect(); + break; + + case EV_PERF_TICK: + for (i = 0; i < pvThreads->size(); i++) + telem->push_perf_value(i, pvThreads->at(i)->iHashCount.load(std::memory_order_relaxed), + pvThreads->at(i)->iTimestamp.load(std::memory_order_relaxed)); + + if((cnt++ & 0xF) == 0) //Every 16 ticks + { + double fHps = 0.0; + double fTelem; + bool normal = true; + + for (i = 0; i < pvThreads->size(); i++) + { + fTelem = telem->calc_telemetry_data(2500, i); + if(std::isnormal(fTelem)) + { + fHps += fTelem; + } + else + { + normal = false; + break; + } + } + + if(normal && fHighestHps < fHps) + fHighestHps = fHps; + } + break; + + case EV_USR_HASHRATE: + case EV_USR_RESULTS: + case EV_USR_CONNSTAT: + print_report(ev.iName); + break; + + case EV_HTML_HASHRATE: + case EV_HTML_RESULTS: + case EV_HTML_CONNSTAT: + case EV_HTML_JSON: + http_report(ev.iName); + break; + + case EV_HASHRATE_LOOP: + print_report(EV_USR_HASHRATE); + push_timed_event(ex_event(EV_HASHRATE_LOOP), jconf::inst()->GetAutohashTime()); + break; + + case EV_INVALID_VAL: + default: + assert(false); + break; + } + } +} + +inline const char* hps_format(double h, char* buf, size_t l) +{ + if(std::isnormal(h) || h == 0.0) + { + snprintf(buf, l, " %03.1f", h); + return buf; + } + else + return " (na)"; +} + +void executor::hashrate_report(std::string& out) +{ + char num[32]; + size_t nthd = pvThreads->size(); + + out.reserve(256 + nthd * 64); + + double fTotal[3] = { 0.0, 0.0, 0.0}; + size_t i; + + out.append("HASHRATE REPORT\n"); + out.append("| ID | 10s | 60s | 15m |"); + if(nthd != 1) + out.append(" ID | 10s | 60s | 15m |\n"); + else + out.append(1, '\n'); + + for (i = 0; i < nthd; i++) + { + double fHps[3]; + + fHps[0] = telem->calc_telemetry_data(10000, i); + fHps[1] = telem->calc_telemetry_data(60000, i); + fHps[2] = telem->calc_telemetry_data(900000, i); + + snprintf(num, sizeof(num), "| %2u |", (unsigned int)i); + out.append(num); + out.append(hps_format(fHps[0], num, sizeof(num))).append(" |"); + out.append(hps_format(fHps[1], num, sizeof(num))).append(" |"); + out.append(hps_format(fHps[2], num, sizeof(num))).append(1, ' '); + + fTotal[0] += fHps[0]; + fTotal[1] += fHps[1]; + fTotal[2] += fHps[2]; + + if((i & 0x1) == 1) //Odd i's + out.append("|\n"); + } + + if((i & 0x1) == 1) //We had odd number of threads + out.append("|\n"); + + if(nthd != 1) + out.append("-----------------------------------------------------\n"); + else + out.append("---------------------------\n"); + + out.append("Totals: "); + out.append(hps_format(fTotal[0], num, sizeof(num))); + out.append(hps_format(fTotal[1], num, sizeof(num))); + out.append(hps_format(fTotal[2], num, sizeof(num))); + out.append(" H/s\nHighest: "); + out.append(hps_format(fHighestHps, num, sizeof(num))); + out.append(" H/s\n"); +} + +char* time_format(char* buf, size_t len, std::chrono::system_clock::time_point time) +{ + time_t ctime = std::chrono::system_clock::to_time_t(time); + tm stime; + + /* + * Oh for god's sake... this feels like we are back to the 90's... + * and don't get me started on lack strcpy_s because NIH - use non-standard strlcpy... + * And of course C++ implements unsafe version because... reasons + */ + +#ifdef _WIN32 + localtime_s(&stime, &ctime); +#else + localtime_r(&ctime, &stime); +#endif // __WIN32 + strftime(buf, len, "%F %T", &stime); + + return buf; +} + +void executor::result_report(std::string& out) +{ + char num[128]; + char date[32]; + + out.reserve(1024); + + size_t iGoodRes = vMineResults[0].count, iTotalRes = iGoodRes; + size_t ln = vMineResults.size(); + + for(size_t i=1; i < ln; i++) + iTotalRes += vMineResults[i].count; + + out.append("RESULT REPORT\n"); + if(iTotalRes == 0) + { + out.append("You haven't found any results yet.\n"); + return; + } + + double dConnSec; + { + using namespace std::chrono; + dConnSec = (double)duration_cast<seconds>(system_clock::now() - tPoolConnTime).count(); + } + + snprintf(num, sizeof(num), " (%.1f %%)\n", 100.0 * iGoodRes / iTotalRes); + + out.append("Difficulty : ").append(std::to_string(iPoolDiff)).append(1, '\n'); + out.append("Good results : ").append(std::to_string(iGoodRes)).append(" / "). + append(std::to_string(iTotalRes)).append(num); + + if(iPoolCallTimes.size() != 0) + { + // Here we use iPoolCallTimes since it also gets reset when we disconnect + snprintf(num, sizeof(num), "%.1f sec\n", dConnSec / iPoolCallTimes.size()); + out.append("Avg result time : ").append(num); + } + out.append("Pool-side hashes : ").append(std::to_string(iPoolHashes)).append(2, '\n'); + out.append("Top 10 best results found:\n"); + + for(size_t i=0; i < 10; i += 2) + { + snprintf(num, sizeof(num), "| %2llu | %16llu | %2llu | %16llu |\n", + int_port(i), int_port(iTopDiff[i]), int_port(i+1), int_port(iTopDiff[i+1])); + out.append(num); + } + + out.append("\nError details:\n"); + if(ln > 1) + { + out.append("| Count | Error text | Last seen |\n"); + for(size_t i=1; i < ln; i++) + { + snprintf(num, sizeof(num), "| %5llu | %-32.32s | %s |\n", int_port(vMineResults[i].count), + vMineResults[i].msg.c_str(), time_format(date, sizeof(date), vMineResults[i].time)); + out.append(num); + } + } + else + out.append("Yay! No errors.\n"); +} + +void executor::connection_report(std::string& out) +{ + char num[128]; + char date[32]; + + out.reserve(512); + + jpsock* pool = pick_pool_by_id(dev_pool_id + 1); + + out.append("CONNECTION REPORT\n"); + out.append("Pool address : ").append(jconf::inst()->GetPoolAddress()).append(1, '\n'); + if (pool->is_running() && pool->is_logged_in()) + out.append("Connected since : ").append(time_format(date, sizeof(date), tPoolConnTime)).append(1, '\n'); + else + out.append("Connected since : <not connected>\n"); + + size_t n_calls = iPoolCallTimes.size(); + if (n_calls > 1) + { + //Not-really-but-good-enough median + std::nth_element(iPoolCallTimes.begin(), iPoolCallTimes.begin() + n_calls/2, iPoolCallTimes.end()); + out.append("Pool ping time : ").append(std::to_string(iPoolCallTimes[n_calls/2])).append(" ms\n"); + } + else + out.append("Pool ping time : (n/a)\n"); + + out.append("\nNetwork error log:\n"); + size_t ln = vSocketLog.size(); + if(ln > 0) + { + out.append("| Date | Error text |\n"); + for(size_t i=0; i < ln; i++) + { + snprintf(num, sizeof(num), "| %s | %-54.54s |\n", + time_format(date, sizeof(date), vSocketLog[i].time), vSocketLog[i].msg.c_str()); + out.append(num); + } + } + else + out.append("Yay! No errors.\n"); +} + +void executor::print_report(ex_event_name ev) +{ + std::string out; + switch(ev) + { + case EV_USR_HASHRATE: + hashrate_report(out); + break; + + case EV_USR_RESULTS: + result_report(out); + break; + + case EV_USR_CONNSTAT: + connection_report(out); + break; + default: + assert(false); + break; + } + + printer::inst()->print_str(out.c_str()); +} + +void executor::http_hashrate_report(std::string& out) +{ + char num_a[32], num_b[32], num_c[32], num_d[32]; + char buffer[4096]; + size_t nthd = pvThreads->size(); + + out.reserve(4096); + + snprintf(buffer, sizeof(buffer), sHtmlCommonHeader, "Hashrate Report", "Hashrate Report"); + out.append(buffer); + + snprintf(buffer, sizeof(buffer), sHtmlHashrateBodyHigh, (unsigned int)nthd + 3); + out.append(buffer); + + double fTotal[3] = { 0.0, 0.0, 0.0}; + for(size_t i=0; i < nthd; i++) + { + double fHps[3]; + + fHps[0] = telem->calc_telemetry_data(2500, i); + fHps[1] = telem->calc_telemetry_data(60000, i); + fHps[2] = telem->calc_telemetry_data(900000, i); + + num_a[0] = num_b[0] = num_c[0] ='\0'; + hps_format(fHps[0], num_a, sizeof(num_a)); + hps_format(fHps[1], num_b, sizeof(num_b)); + hps_format(fHps[2], num_c, sizeof(num_c)); + + fTotal[0] += fHps[0]; + fTotal[1] += fHps[1]; + fTotal[2] += fHps[2]; + + snprintf(buffer, sizeof(buffer), sHtmlHashrateTableRow, (unsigned int)i, num_a, num_b, num_c); + out.append(buffer); + } + + num_a[0] = num_b[0] = num_c[0] = num_d[0] ='\0'; + hps_format(fTotal[0], num_a, sizeof(num_a)); + hps_format(fTotal[1], num_b, sizeof(num_b)); + hps_format(fTotal[2], num_c, sizeof(num_c)); + hps_format(fHighestHps, num_d, sizeof(num_d)); + + snprintf(buffer, sizeof(buffer), sHtmlHashrateBodyLow, num_a, num_b, num_c, num_d); + out.append(buffer); +} + +void executor::http_result_report(std::string& out) +{ + char date[128]; + char buffer[4096]; + + out.reserve(4096); + + snprintf(buffer, sizeof(buffer), sHtmlCommonHeader, "Result Report", "Result Report"); + out.append(buffer); + + size_t iGoodRes = vMineResults[0].count, iTotalRes = iGoodRes; + size_t ln = vMineResults.size(); + + for(size_t i=1; i < ln; i++) + iTotalRes += vMineResults[i].count; + + double fGoodResPrc = 0.0; + if(iTotalRes > 0) + fGoodResPrc = 100.0 * iGoodRes / iTotalRes; + + double fAvgResTime = 0.0; + if(iPoolCallTimes.size() > 0) + { + using namespace std::chrono; + fAvgResTime = ((double)duration_cast<seconds>(system_clock::now() - tPoolConnTime).count()) + / iPoolCallTimes.size(); + } + + snprintf(buffer, sizeof(buffer), sHtmlResultBodyHigh, + iPoolDiff, iGoodRes, iTotalRes, fGoodResPrc, fAvgResTime, iPoolHashes, + int_port(iTopDiff[0]), int_port(iTopDiff[1]), int_port(iTopDiff[2]), int_port(iTopDiff[3]), + int_port(iTopDiff[4]), int_port(iTopDiff[5]), int_port(iTopDiff[6]), int_port(iTopDiff[7]), + int_port(iTopDiff[8]), int_port(iTopDiff[9])); + + out.append(buffer); + + for(size_t i=1; i < vMineResults.size(); i++) + { + snprintf(buffer, sizeof(buffer), sHtmlResultTableRow, vMineResults[i].msg.c_str(), + int_port(vMineResults[i].count), time_format(date, sizeof(date), vMineResults[i].time)); + out.append(buffer); + } + + out.append(sHtmlResultBodyLow); +} + +void executor::http_connection_report(std::string& out) +{ + char date[128]; + char buffer[4096]; + + out.reserve(4096); + + snprintf(buffer, sizeof(buffer), sHtmlCommonHeader, "Connection Report", "Connection Report"); + out.append(buffer); + + jpsock* pool = pick_pool_by_id(dev_pool_id + 1); + const char* cdate = "not connected"; + if (pool->is_running() && pool->is_logged_in()) + cdate = time_format(date, sizeof(date), tPoolConnTime); + + size_t n_calls = iPoolCallTimes.size(); + unsigned int ping_time = 0; + if (n_calls > 1) + { + //Not-really-but-good-enough median + std::nth_element(iPoolCallTimes.begin(), iPoolCallTimes.begin() + n_calls/2, iPoolCallTimes.end()); + ping_time = iPoolCallTimes[n_calls/2]; + } + + snprintf(buffer, sizeof(buffer), sHtmlConnectionBodyHigh, + jconf::inst()->GetPoolAddress(), + cdate, ping_time); + out.append(buffer); + + + for(size_t i=0; i < vSocketLog.size(); i++) + { + snprintf(buffer, sizeof(buffer), sHtmlConnectionTableRow, + time_format(date, sizeof(date), vSocketLog[i].time), vSocketLog[i].msg.c_str()); + out.append(buffer); + } + + out.append(sHtmlConnectionBodyLow); +} + +inline const char* hps_format_json(double h, char* buf, size_t l) +{ + if(std::isnormal(h) || h == 0.0) + { + snprintf(buf, l, "%.1f", h); + return buf; + } + else + return "null"; +} + +void executor::http_json_report(std::string& out) +{ + const char *a, *b, *c; + char num_a[32], num_b[32], num_c[32]; + char hr_buffer[64]; + std::string hr_thds, res_error, cn_error; + + size_t nthd = pvThreads->size(); + double fTotal[3] = { 0.0, 0.0, 0.0}; + hr_thds.reserve(nthd * 32); + + for(size_t i=0; i < nthd; i++) + { + if(i != 0) hr_thds.append(1, ','); + + double fHps[3]; + fHps[0] = telem->calc_telemetry_data(2500, i); + fHps[1] = telem->calc_telemetry_data(60000, i); + fHps[2] = telem->calc_telemetry_data(900000, i); + + fTotal[0] += fHps[0]; + fTotal[1] += fHps[1]; + fTotal[2] += fHps[2]; + + a = hps_format_json(fHps[0], num_a, sizeof(num_a)); + b = hps_format_json(fHps[1], num_b, sizeof(num_b)); + c = hps_format_json(fHps[2], num_c, sizeof(num_c)); + snprintf(hr_buffer, sizeof(hr_buffer), sJsonApiThdHashrate, a, b, c); + hr_thds.append(hr_buffer); + } + + a = hps_format_json(fTotal[0], num_a, sizeof(num_a)); + b = hps_format_json(fTotal[1], num_b, sizeof(num_b)); + c = hps_format_json(fTotal[2], num_c, sizeof(num_c)); + snprintf(hr_buffer, sizeof(hr_buffer), sJsonApiThdHashrate, a, b, c); + + a = hps_format_json(fHighestHps, num_a, sizeof(num_a)); + + size_t iGoodRes = vMineResults[0].count, iTotalRes = iGoodRes; + size_t ln = vMineResults.size(); + + for(size_t i=1; i < ln; i++) + iTotalRes += vMineResults[i].count; + + jpsock* pool = pick_pool_by_id(dev_pool_id + 1); + + size_t iConnSec = 0; + if(pool->is_running() && pool->is_logged_in()) + { + using namespace std::chrono; + iConnSec = duration_cast<seconds>(system_clock::now() - tPoolConnTime).count(); + } + + double fAvgResTime = 0.0; + if(iPoolCallTimes.size() > 0) + fAvgResTime = double(iConnSec) / iPoolCallTimes.size(); + + res_error.reserve((vMineResults.size() - 1) * 128); + char buffer[256]; + for(size_t i=1; i < vMineResults.size(); i++) + { + using namespace std::chrono; + if(i != 1) res_error.append(1, ','); + + snprintf(buffer, sizeof(buffer), sJsonApiResultError, int_port(vMineResults[i].count), + int_port(duration_cast<seconds>(vMineResults[i].time.time_since_epoch()).count()), + vMineResults[i].msg.c_str()); + res_error.append(buffer); + } + + size_t n_calls = iPoolCallTimes.size(); + size_t iPoolPing = 0; + if (n_calls > 1) + { + //Not-really-but-good-enough median + std::nth_element(iPoolCallTimes.begin(), iPoolCallTimes.begin() + n_calls/2, iPoolCallTimes.end()); + iPoolPing = iPoolCallTimes[n_calls/2]; + } + + cn_error.reserve(vSocketLog.size() * 128); + for(size_t i=0; i < vSocketLog.size(); i++) + { + using namespace std::chrono; + if(i != 0) cn_error.append(1, ','); + + snprintf(buffer, sizeof(buffer), sJsonApiConnectionError, + int_port(duration_cast<seconds>(vMineResults[i].time.time_since_epoch()).count()), + vSocketLog[i].msg.c_str()); + cn_error.append(buffer); + } + + size_t bb_size = 1024 + hr_thds.size() + res_error.size() + cn_error.size(); + std::unique_ptr<char[]> bigbuf( new char[ bb_size ] ); + + int bb_len = snprintf(bigbuf.get(), bb_size, sJsonApiFormat, + hr_thds.c_str(), hr_buffer, a, + int_port(iPoolDiff), int_port(iGoodRes), int_port(iTotalRes), fAvgResTime, int_port(iPoolHashes), + int_port(iTopDiff[0]), int_port(iTopDiff[1]), int_port(iTopDiff[2]), int_port(iTopDiff[3]), int_port(iTopDiff[4]), + int_port(iTopDiff[5]), int_port(iTopDiff[6]), int_port(iTopDiff[7]), int_port(iTopDiff[8]), int_port(iTopDiff[9]), + res_error.c_str(), jconf::inst()->GetPoolAddress(), int_port(iConnSec), int_port(iPoolPing), cn_error.c_str()); + + out = std::string(bigbuf.get(), bigbuf.get() + bb_len); +} + +void executor::http_report(ex_event_name ev) +{ + assert(pHttpString != nullptr); + + switch(ev) + { + case EV_HTML_HASHRATE: + http_hashrate_report(*pHttpString); + break; + + case EV_HTML_RESULTS: + http_result_report(*pHttpString); + break; + + case EV_HTML_CONNSTAT: + http_connection_report(*pHttpString); + break; + + case EV_HTML_JSON: + http_json_report(*pHttpString); + break; + + default: + assert(false); + break; + } + + httpReady.set_value(); +} + +void executor::get_http_report(ex_event_name ev_id, std::string& data) +{ + std::lock_guard<std::mutex> lck(httpMutex); + + assert(pHttpString == nullptr); + assert(ev_id == EV_HTML_HASHRATE || ev_id == EV_HTML_RESULTS + || ev_id == EV_HTML_CONNSTAT || ev_id == EV_HTML_JSON); + + pHttpString = &data; + httpReady = std::promise<void>(); + std::future<void> ready = httpReady.get_future(); + + push_event(ex_event(ev_id)); + + ready.wait(); + pHttpString = nullptr; +} diff --git a/xmrstak/misc/executor.hpp b/xmrstak/misc/executor.hpp new file mode 100644 index 0000000..a3a0828 --- /dev/null +++ b/xmrstak/misc/executor.hpp @@ -0,0 +1,186 @@ +#pragma once +#include "thdq.hpp" +#include "msgstruct.h" +#include <atomic> +#include <array> +#include <list> +#include <future> +#include "telemetry.h" +#include "backend/IBackend.hpp" +#include "Environment.hpp" + +class jpsock; + + +namespace xmrstak +{ +namespace cpu +{ +class minethd; + +} // namespace cpu +} // namepsace xmrstak + +class executor +{ +public: + static executor* inst() + { + auto& env = xmrstak::Environment::inst(); + if(env.pExecutor == nullptr) + env.pExecutor = new executor; + return env.pExecutor; + }; + + void ex_start(bool daemon) { daemon ? ex_main() : std::thread(&executor::ex_main, this).detach(); } + + void get_http_report(ex_event_name ev_id, std::string& data); + + inline void push_event(ex_event&& ev) { oEventQ.push(std::move(ev)); } + void push_timed_event(ex_event&& ev, size_t sec); + void log_result_error(std::string&& sError); + + constexpr static size_t invalid_pool_id = 0; + constexpr static size_t dev_pool_id = 1; + constexpr static size_t usr_pool_id = 2; + +private: + struct timed_event + { + ex_event event; + size_t ticks_left; + + timed_event(ex_event&& ev, size_t ticks) : event(std::move(ev)), ticks_left(ticks) {} + }; + + // In miliseconds, has to divide a second (1000ms) into an integer number + constexpr static size_t iTickTime = 500; + + // Dev donation time period in seconds. 100 minutes by default. + // We will divide up this period according to the config setting + constexpr static size_t iDevDonatePeriod = 100 * 60; + + std::list<timed_event> lTimedEvents; + std::mutex timed_event_mutex; + thdq<ex_event> oEventQ; + + xmrstak::telemetry* telem; + std::vector<xmrstak::IBackend*>* pvThreads; + + size_t current_pool_id; + + jpsock* usr_pool; + jpsock* dev_pool; + + jpsock* pick_pool_by_id(size_t pool_id); + + bool is_dev_time; + + executor(); + + void ex_main(); + + void ex_clock_thd(); + void pool_connect(jpsock* pool); + + void hashrate_report(std::string& out); + void result_report(std::string& out); + void connection_report(std::string& out); + + void http_hashrate_report(std::string& out); + void http_result_report(std::string& out); + void http_connection_report(std::string& out); + void http_json_report(std::string& out); + + void http_report(ex_event_name ev); + void print_report(ex_event_name ev); + + std::string* pHttpString = nullptr; + std::promise<void> httpReady; + std::mutex httpMutex; + + size_t iReconnectAttempts = 0; + + struct sck_error_log + { + std::chrono::system_clock::time_point time; + std::string msg; + + sck_error_log(std::string&& err) : msg(std::move(err)) + { + time = std::chrono::system_clock::now(); + } + }; + std::vector<sck_error_log> vSocketLog; + + // Element zero is always the success element. + // Keep in mind that this is a tally and not a log like above + struct result_tally + { + std::chrono::system_clock::time_point time; + std::string msg; + size_t count; + + result_tally() : msg("[OK]"), count(0) + { + time = std::chrono::system_clock::now(); + } + + result_tally(std::string&& err) : msg(std::move(err)), count(1) + { + time = std::chrono::system_clock::now(); + } + + void increment() + { + count++; + time = std::chrono::system_clock::now(); + } + + bool compare(std::string& err) + { + if(msg == err) + return true; + else + return false; + } + }; + std::vector<result_tally> vMineResults; + + //More result statistics + std::array<size_t, 10> iTopDiff { { } }; //Initialize to zero + + std::chrono::system_clock::time_point tPoolConnTime; + size_t iPoolHashes = 0; + uint64_t iPoolDiff = 0; + + // Set it to 16 bit so that we can just let it grow + // Maximum realistic growth rate - 5MB / month + std::vector<uint16_t> iPoolCallTimes; + + //Those stats are reset if we disconnect + inline void reset_stats() + { + iPoolCallTimes.clear(); + tPoolConnTime = std::chrono::system_clock::now(); + iPoolHashes = 0; + iPoolDiff = 0; + } + + double fHighestHps = 0.0; + + void log_socket_error(std::string&& sError); + void log_result_ok(uint64_t iActualDiff); + + void sched_reconnect(); + + void on_sock_ready(size_t pool_id); + void on_sock_error(size_t pool_id, std::string&& sError); + void on_pool_have_job(size_t pool_id, pool_job& oPoolJob); + void on_miner_result(size_t pool_id, job_result& oResult); + void on_reconnect(size_t pool_id); + void on_switch_pool(size_t pool_id); + + inline size_t sec_to_ticks(size_t sec) { return sec * (1000 / iTickTime); } +}; + diff --git a/xmrstak/misc/jext.hpp b/xmrstak/misc/jext.hpp new file mode 100644 index 0000000..dce73a0 --- /dev/null +++ b/xmrstak/misc/jext.hpp @@ -0,0 +1,13 @@ +#pragma once + +using namespace rapidjson; + +/* This macro brings rapidjson more in line with other libs */ +inline const Value* GetObjectMember(const Value& obj, const char* key) +{ + Value::ConstMemberIterator itr = obj.FindMember(key); + if (itr != obj.MemberEnd()) + return &itr->value; + else + return nullptr; +} diff --git a/xmrstak/misc/telemetry.cpp b/xmrstak/misc/telemetry.cpp new file mode 100644 index 0000000..fafccd5 --- /dev/null +++ b/xmrstak/misc/telemetry.cpp @@ -0,0 +1,107 @@ +/* + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * Additional permission under GNU GPL version 3 section 7 + * + * If you modify this Program, or any covered work, by linking or combining + * it with OpenSSL (or a modified version of that library), containing parts + * covered by the terms of OpenSSL License and SSLeay License, the licensors + * of this Program grant you additional permission to convey the resulting work. + * + */ + +#include <cmath> +#include <cstring> +#include <chrono> +#include "telemetry.h" + +namespace xmrstak +{ + +telemetry::telemetry(size_t iThd) +{ + ppHashCounts = new uint64_t*[iThd]; + ppTimestamps = new uint64_t*[iThd]; + iBucketTop = new uint32_t[iThd]; + + for (size_t i = 0; i < iThd; i++) + { + ppHashCounts[i] = new uint64_t[iBucketSize]; + ppTimestamps[i] = new uint64_t[iBucketSize]; + iBucketTop[i] = 0; + memset(ppHashCounts[i], 0, sizeof(uint64_t) * iBucketSize); + memset(ppTimestamps[i], 0, sizeof(uint64_t) * iBucketSize); + } +} + +double telemetry::calc_telemetry_data(size_t iLastMilisec, size_t iThread) +{ + using namespace std::chrono; + uint64_t iTimeNow = time_point_cast<milliseconds>(high_resolution_clock::now()).time_since_epoch().count(); + + uint64_t iEarliestHashCnt = 0; + uint64_t iEarliestStamp = 0; + uint64_t iLastestStamp = 0; + uint64_t iLastestHashCnt = 0; + bool bHaveFullSet = false; + + //Start at 1, buckettop points to next empty + for (size_t i = 1; i < iBucketSize; i++) + { + size_t idx = (iBucketTop[iThread] - i) & iBucketMask; //overflow expected here + + if (ppTimestamps[iThread][idx] == 0) + break; //That means we don't have the data yet + + if (iLastestStamp == 0) + { + iLastestStamp = ppTimestamps[iThread][idx]; + iLastestHashCnt = ppHashCounts[iThread][idx]; + } + + if (iTimeNow - ppTimestamps[iThread][idx] > iLastMilisec) + { + bHaveFullSet = true; + break; //We are out of the requested time period + } + + iEarliestStamp = ppTimestamps[iThread][idx]; + iEarliestHashCnt = ppHashCounts[iThread][idx]; + } + + if (!bHaveFullSet || iEarliestStamp == 0 || iLastestStamp == 0) + return nan(""); + + //Don't think that can happen, but just in case + if (iLastestStamp - iEarliestStamp == 0) + return nan(""); + + double fHashes, fTime; + fHashes = iLastestHashCnt - iEarliestHashCnt; + fTime = iLastestStamp - iEarliestStamp; + fTime /= 1000.0; + + return fHashes / fTime; +} + +void telemetry::push_perf_value(size_t iThd, uint64_t iHashCount, uint64_t iTimestamp) +{ + size_t iTop = iBucketTop[iThd]; + ppHashCounts[iThd][iTop] = iHashCount; + ppTimestamps[iThd][iTop] = iTimestamp; + + iBucketTop[iThd] = (iTop + 1) & iBucketMask; +} + +} // namepsace xmrstak diff --git a/xmrstak/misc/telemetry.hpp b/xmrstak/misc/telemetry.hpp new file mode 100644 index 0000000..0538090 --- /dev/null +++ b/xmrstak/misc/telemetry.hpp @@ -0,0 +1,23 @@ +#pragma once + +#include <cstdint> + +namespace xmrstak +{ + +class telemetry +{ +public: + telemetry(size_t iThd); + void push_perf_value(size_t iThd, uint64_t iHashCount, uint64_t iTimestamp); + double calc_telemetry_data(size_t iLastMilisec, size_t iThread); + +private: + constexpr static size_t iBucketSize = 2 << 11; //Power of 2 to simplify calculations + constexpr static size_t iBucketMask = iBucketSize - 1; + uint32_t* iBucketTop; + uint64_t** ppHashCounts; + uint64_t** ppTimestamps; +}; + +} // namepsace xmrstak diff --git a/xmrstak/misc/thdq.hpp b/xmrstak/misc/thdq.hpp new file mode 100644 index 0000000..248c807 --- /dev/null +++ b/xmrstak/misc/thdq.hpp @@ -0,0 +1,49 @@ +#pragma once
+
+#include <queue>
+#include <thread>
+#include <mutex>
+#include <condition_variable>
+
+template <typename T>
+class thdq
+{
+public:
+ T pop()
+ {
+ std::unique_lock<std::mutex> mlock(mutex_);
+ while (queue_.empty()) { cond_.wait(mlock); }
+ auto item = std::move(queue_.front());
+ queue_.pop();
+ return item;
+ }
+
+ void pop(T& item)
+ {
+ std::unique_lock<std::mutex> mlock(mutex_);
+ while (queue_.empty()) { cond_.wait(mlock); }
+ item = queue_.front();
+ queue_.pop();
+ }
+
+ void push(const T& item)
+ {
+ std::unique_lock<std::mutex> mlock(mutex_);
+ queue_.push(item);
+ mlock.unlock();
+ cond_.notify_one();
+ }
+
+ void push(T&& item)
+ {
+ std::unique_lock<std::mutex> mlock(mutex_);
+ queue_.push(std::move(item));
+ mlock.unlock();
+ cond_.notify_one();
+ }
+
+private:
+ std::queue<T> queue_;
+ std::mutex mutex_;
+ std::condition_variable cond_;
+}; diff --git a/xmrstak/net/jpsock.cpp b/xmrstak/net/jpsock.cpp new file mode 100644 index 0000000..d287375 --- /dev/null +++ b/xmrstak/net/jpsock.cpp @@ -0,0 +1,620 @@ +/* + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * Additional permission under GNU GPL version 3 section 7 + * + * If you modify this Program, or any covered work, by linking or combining + * it with OpenSSL (or a modified version of that library), containing parts + * covered by the terms of OpenSSL License and SSLeay License, the licensors + * of this Program grant you additional permission to convey the resulting work. + * + */ + +#include <stdarg.h> +#include <assert.h> + +#include "jpsock.h" +#include "executor.h" +#include "jconf.h" + +#include "rapidjson/document.h" +#include "jext.h" +#include "socks.h" +#include "socket.h" +#include "version.h" + +#define AGENTID_STR XMR_STAK_NAME "/" XMR_STAK_VERSION + +using namespace rapidjson; + +struct jpsock::call_rsp +{ + bool bHaveResponse; + uint64_t iCallId; + Value* pCallData; + std::string sCallErr; + + call_rsp(Value* val) : pCallData(val) + { + bHaveResponse = false; + iCallId = 0; + sCallErr.clear(); + } +}; + +typedef GenericDocument<UTF8<>, MemoryPoolAllocator<>, MemoryPoolAllocator<>> MemDocument; + +/* + * + * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + * ASSUMPTION - only one calling thread. Multiple calling threads would require better + * thread safety. The calling thread is assumed to be the executor thread. + * If there is a reason to call the pool outside of the executor context, consider + * doing it via an executor event. + * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + * + * Call values and allocators are for the calling thread (executor). When processing + * a call, the recv thread will make a copy of the call response and then erase its copy. + */ + +struct jpsock::opaque_private +{ + Value oCallValue; + + MemoryPoolAllocator<> callAllocator; + MemoryPoolAllocator<> recvAllocator; + MemoryPoolAllocator<> parseAllocator; + MemDocument jsonDoc; + call_rsp oCallRsp; + + opaque_private(uint8_t* bCallMem, uint8_t* bRecvMem, uint8_t* bParseMem) : + callAllocator(bCallMem, jpsock::iJsonMemSize), + recvAllocator(bRecvMem, jpsock::iJsonMemSize), + parseAllocator(bParseMem, jpsock::iJsonMemSize), + jsonDoc(&recvAllocator, jpsock::iJsonMemSize, &parseAllocator), + oCallRsp(nullptr) + { + } +}; + +struct jpsock::opq_json_val +{ + const Value* val; + opq_json_val(const Value* val) : val(val) {} +}; + +jpsock::jpsock(size_t id, bool tls) : pool_id(id) +{ + sock_init(); + + bJsonCallMem = (uint8_t*)malloc(iJsonMemSize); + bJsonRecvMem = (uint8_t*)malloc(iJsonMemSize); + bJsonParseMem = (uint8_t*)malloc(iJsonMemSize); + + prv = new opaque_private(bJsonCallMem, bJsonRecvMem, bJsonParseMem); + +#ifndef CONF_NO_TLS + if(tls) + sck = new tls_socket(this); + else + sck = new plain_socket(this); +#else + sck = new plain_socket(this); +#endif + + oRecvThd = nullptr; + bRunning = false; + bLoggedIn = false; + iJobDiff = 0; + + memset(&oCurrentJob, 0, sizeof(oCurrentJob)); +} + +jpsock::~jpsock() +{ + delete prv; + prv = nullptr; + + free(bJsonCallMem); + free(bJsonRecvMem); + free(bJsonParseMem); +} + +std::string&& jpsock::get_call_error() +{ + return std::move(prv->oCallRsp.sCallErr); +} + +bool jpsock::set_socket_error(const char* a) +{ + if(!bHaveSocketError) + { + bHaveSocketError = true; + sSocketError.assign(a); + } + + return false; +} + +bool jpsock::set_socket_error(const char* a, const char* b) +{ + if(!bHaveSocketError) + { + bHaveSocketError = true; + size_t ln_a = strlen(a); + size_t ln_b = strlen(b); + + sSocketError.reserve(ln_a + ln_b + 2); + sSocketError.assign(a, ln_a); + sSocketError.append(b, ln_b); + } + + return false; +} + +bool jpsock::set_socket_error(const char* a, size_t len) +{ + if(!bHaveSocketError) + { + bHaveSocketError = true; + sSocketError.assign(a, len); + } + + return false; +} + +bool jpsock::set_socket_error_strerr(const char* a) +{ + char sSockErrText[512]; + return set_socket_error(a, sock_strerror(sSockErrText, sizeof(sSockErrText))); +} + +bool jpsock::set_socket_error_strerr(const char* a, int res) +{ + char sSockErrText[512]; + return set_socket_error(a, sock_gai_strerror(res, sSockErrText, sizeof(sSockErrText))); +} + +void jpsock::jpsock_thread() +{ + jpsock_thd_main(); + executor::inst()->push_event(ex_event(std::move(sSocketError), pool_id)); + + // If a call is wating, send an error to end it + bool bCallWaiting = false; + std::unique_lock<std::mutex> mlock(call_mutex); + if(prv->oCallRsp.pCallData != nullptr) + { + prv->oCallRsp.bHaveResponse = true; + prv->oCallRsp.iCallId = 0; + prv->oCallRsp.pCallData = nullptr; + bCallWaiting = true; + } + mlock.unlock(); + + if(bCallWaiting) + call_cond.notify_one(); + + bRunning = false; + bLoggedIn = false; + + std::unique_lock<std::mutex>(job_mutex); + memset(&oCurrentJob, 0, sizeof(oCurrentJob)); +} + +bool jpsock::jpsock_thd_main() +{ + if(!sck->connect()) + return false; + + executor::inst()->push_event(ex_event(EV_SOCK_READY, pool_id)); + + char buf[iSockBufferSize]; + size_t datalen = 0; + while (true) + { + int ret = sck->recv(buf + datalen, sizeof(buf) - datalen); + + if(ret <= 0) + return false; + + datalen += ret; + + if (datalen >= sizeof(buf)) + { + sck->close(false); + return set_socket_error("RECEIVE error: data overflow"); + } + + char* lnend; + char* lnstart = buf; + while ((lnend = (char*)memchr(lnstart, '\n', datalen)) != nullptr) + { + lnend++; + int lnlen = lnend - lnstart; + + if (!process_line(lnstart, lnlen)) + { + sck->close(false); + return false; + } + + datalen -= lnlen; + lnstart = lnend; + } + + //Got leftover data? Move it to the front + if (datalen > 0 && buf != lnstart) + memmove(buf, lnstart, datalen); + } +} + +bool jpsock::process_line(char* line, size_t len) +{ + prv->jsonDoc.SetNull(); + prv->parseAllocator.Clear(); + prv->callAllocator.Clear(); + + /*NULL terminate the line instead of '\n', parsing will add some more NULLs*/ + line[len-1] = '\0'; + + //printf("RECV: %s\n", line); + + if (prv->jsonDoc.ParseInsitu(line).HasParseError()) + return set_socket_error("PARSE error: Invalid JSON"); + + if (!prv->jsonDoc.IsObject()) + return set_socket_error("PARSE error: Invalid root"); + + const Value* mt; + if (prv->jsonDoc.HasMember("method")) + { + mt = GetObjectMember(prv->jsonDoc, "method"); + + if(!mt->IsString()) + return set_socket_error("PARSE error: Protocol error 1"); + + if(strcmp(mt->GetString(), "job") != 0) + return set_socket_error("PARSE error: Unsupported server method ", mt->GetString()); + + mt = GetObjectMember(prv->jsonDoc, "params"); + if(mt == nullptr || !mt->IsObject()) + return set_socket_error("PARSE error: Protocol error 2"); + + opq_json_val v(mt); + return process_pool_job(&v); + } + else + { + uint64_t iCallId; + mt = GetObjectMember(prv->jsonDoc, "id"); + if (mt == nullptr || !mt->IsUint64()) + return set_socket_error("PARSE error: Protocol error 3"); + + iCallId = mt->GetUint64(); + + mt = GetObjectMember(prv->jsonDoc, "error"); + + const char* sError = nullptr; + size_t iErrorLn = 0; + if (mt == nullptr || mt->IsNull()) + { + /* If there was no error we need a result */ + if ((mt = GetObjectMember(prv->jsonDoc, "result")) == nullptr) + return set_socket_error("PARSE error: Protocol error 7"); + } + else + { + if(!mt->IsObject()) + return set_socket_error("PARSE error: Protocol error 5"); + + const Value* msg = GetObjectMember(*mt, "message"); + + if(msg == nullptr || !msg->IsString()) + return set_socket_error("PARSE error: Protocol error 6"); + + iErrorLn = msg->GetStringLength(); + sError = msg->GetString(); + } + + std::unique_lock<std::mutex> mlock(call_mutex); + if (prv->oCallRsp.pCallData == nullptr) + { + /*Server sent us a call reply without us making a call*/ + mlock.unlock(); + return set_socket_error("PARSE error: Unexpected call response"); + } + + prv->oCallRsp.bHaveResponse = true; + prv->oCallRsp.iCallId = iCallId; + + if(sError != nullptr) + { + prv->oCallRsp.pCallData = nullptr; + prv->oCallRsp.sCallErr.assign(sError, iErrorLn); + } + else + prv->oCallRsp.pCallData->CopyFrom(*mt, prv->callAllocator); + + mlock.unlock(); + call_cond.notify_one(); + + return true; + } +} + +bool jpsock::process_pool_job(const opq_json_val* params) +{ + if (!params->val->IsObject()) + return set_socket_error("PARSE error: Job error 1"); + + const Value * blob, *jobid, *target; + jobid = GetObjectMember(*params->val, "job_id"); + blob = GetObjectMember(*params->val, "blob"); + target = GetObjectMember(*params->val, "target"); + + if (jobid == nullptr || blob == nullptr || target == nullptr || + !jobid->IsString() || !blob->IsString() || !target->IsString()) + { + return set_socket_error("PARSE error: Job error 2"); + } + + if (jobid->GetStringLength() >= sizeof(pool_job::sJobID)) // Note >= + return set_socket_error("PARSE error: Job error 3"); + + uint32_t iWorkLn = blob->GetStringLength() / 2; + if (iWorkLn > sizeof(pool_job::bWorkBlob)) + return set_socket_error("PARSE error: Invalid job legth. Are you sure you are mining the correct coin?"); + + pool_job oPoolJob; + if (!hex2bin(blob->GetString(), iWorkLn * 2, oPoolJob.bWorkBlob)) + return set_socket_error("PARSE error: Job error 4"); + + oPoolJob.iWorkLen = iWorkLn; + memset(oPoolJob.sJobID, 0, sizeof(pool_job::sJobID)); + memcpy(oPoolJob.sJobID, jobid->GetString(), jobid->GetStringLength()); //Bounds checking at proto error 3 + + size_t target_slen = target->GetStringLength(); + if(target_slen <= 8) + { + uint32_t iTempInt = 0; + char sTempStr[] = "00000000"; // Little-endian CPU FTW + memcpy(sTempStr, target->GetString(), target_slen); + if(!hex2bin(sTempStr, 8, (unsigned char*)&iTempInt) || iTempInt == 0) + return set_socket_error("PARSE error: Invalid target"); + + + oPoolJob.iTarget = t32_to_t64(iTempInt); + oPoolJob.iTarget32 = iTempInt; + + } + else if(target_slen <= 16) + { + oPoolJob.iTarget = 0; + char sTempStr[] = "0000000000000000"; + memcpy(sTempStr, target->GetString(), target_slen); + if(!hex2bin(sTempStr, 16, (unsigned char*)&oPoolJob.iTarget) || oPoolJob.iTarget == 0) + return set_socket_error("PARSE error: Invalid target"); + } + else + return set_socket_error("PARSE error: Job error 5"); + + iJobDiff = t64_to_diff(oPoolJob.iTarget); + + executor::inst()->push_event(ex_event(oPoolJob, pool_id)); + + std::unique_lock<std::mutex>(job_mutex); + oCurrentJob = oPoolJob; + return true; +} + +bool jpsock::connect(const char* sAddr, std::string& sConnectError) +{ + bHaveSocketError = false; + sSocketError.clear(); + iJobDiff = 0; + + if(sck->set_hostname(sAddr)) + { + bRunning = true; + oRecvThd = new std::thread(&jpsock::jpsock_thread, this); + return true; + } + + sConnectError = std::move(sSocketError); + return false; +} + +void jpsock::disconnect() +{ + sck->close(false); + + if(oRecvThd != nullptr) + { + oRecvThd->join(); + delete oRecvThd; + oRecvThd = nullptr; + } + + sck->close(true); +} + +bool jpsock::cmd_ret_wait(const char* sPacket, opq_json_val& poResult) +{ + //printf("SEND: %s\n", sPacket); + + /*Set up the call rsp for the call reply*/ + prv->oCallValue.SetNull(); + prv->callAllocator.Clear(); + + std::unique_lock<std::mutex> mlock(call_mutex); + prv->oCallRsp = call_rsp(&prv->oCallValue); + mlock.unlock(); + + if(!sck->send(sPacket)) + { + disconnect(); //This will join the other thread; + return false; + } + + //Success is true if the server approves, result is true if there was no socket error + bool bSuccess; + mlock.lock(); + bool bResult = call_cond.wait_for(mlock, std::chrono::seconds(jconf::inst()->GetCallTimeout()), + [&]() { return prv->oCallRsp.bHaveResponse; }); + + bSuccess = prv->oCallRsp.pCallData != nullptr; + prv->oCallRsp.pCallData = nullptr; + mlock.unlock(); + + if(bHaveSocketError) + return false; + + //This means that there was no socket error, but the server is not taking to us + if(!bResult) + { + set_socket_error("CALL error: Timeout while waiting for a reply"); + disconnect(); + return false; + } + + if(bSuccess) + poResult.val = &prv->oCallValue; + + return bSuccess; +} + +bool jpsock::cmd_login(const char* sLogin, const char* sPassword) +{ + char cmd_buffer[1024]; + + snprintf(cmd_buffer, sizeof(cmd_buffer), "{\"method\":\"login\",\"params\":{\"login\":\"%s\",\"pass\":\"%s\",\"agent\":\"" AGENTID_STR "\"},\"id\":1}\n", + sLogin, sPassword); + + opq_json_val oResult(nullptr); + + /*Normal error conditions (failed login etc..) will end here*/ + if (!cmd_ret_wait(cmd_buffer, oResult)) + return false; + + if (!oResult.val->IsObject()) + { + set_socket_error("PARSE error: Login protocol error 1"); + disconnect(); + return false; + } + + const Value* id = GetObjectMember(*oResult.val, "id"); + const Value* job = GetObjectMember(*oResult.val, "job"); + + if (id == nullptr || job == nullptr || !id->IsString()) + { + set_socket_error("PARSE error: Login protocol error 2"); + disconnect(); + return false; + } + + if (id->GetStringLength() >= sizeof(sMinerId)) + { + set_socket_error("PARSE error: Login protocol error 3"); + disconnect(); + return false; + } + + memset(sMinerId, 0, sizeof(sMinerId)); + memcpy(sMinerId, id->GetString(), id->GetStringLength()); + + opq_json_val v(job); + if(!process_pool_job(&v)) + { + disconnect(); + return false; + } + + bLoggedIn = true; + + return true; +} + +bool jpsock::cmd_submit(const char* sJobId, uint32_t iNonce, const uint8_t* bResult) +{ + char cmd_buffer[1024]; + char sNonce[9]; + char sResult[65]; + + bin2hex((unsigned char*)&iNonce, 4, sNonce); + sNonce[8] = '\0'; + + bin2hex(bResult, 32, sResult); + sResult[64] = '\0'; + + snprintf(cmd_buffer, sizeof(cmd_buffer), "{\"method\":\"submit\",\"params\":{\"id\":\"%s\",\"job_id\":\"%s\",\"nonce\":\"%s\",\"result\":\"%s\"},\"id\":1}\n", + sMinerId, sJobId, sNonce, sResult); + + opq_json_val oResult(nullptr); + return cmd_ret_wait(cmd_buffer, oResult); +} + +bool jpsock::get_current_job(pool_job& job) +{ + std::unique_lock<std::mutex>(job_mutex); + + if(oCurrentJob.iWorkLen == 0) + return false; + + oCurrentJob.iResumeCnt++; + job = oCurrentJob; + return true; +} + +inline unsigned char hf_hex2bin(char c, bool &err) +{ + if (c >= '0' && c <= '9') + return c - '0'; + else if (c >= 'a' && c <= 'f') + return c - 'a' + 0xA; + else if (c >= 'A' && c <= 'F') + return c - 'A' + 0xA; + + err = true; + return 0; +} + +bool jpsock::hex2bin(const char* in, unsigned int len, unsigned char* out) +{ + bool error = false; + for (unsigned int i = 0; i < len; i += 2) + { + out[i / 2] = (hf_hex2bin(in[i], error) << 4) | hf_hex2bin(in[i + 1], error); + if (error) return false; + } + return true; +} + +inline char hf_bin2hex(unsigned char c) +{ + if (c <= 0x9) + return '0' + c; + else + return 'a' - 0xA + c; +} + +void jpsock::bin2hex(const unsigned char* in, unsigned int len, char* out) +{ + for (unsigned int i = 0; i < len; i++) + { + out[i * 2] = hf_bin2hex((in[i] & 0xF0) >> 4); + out[i * 2 + 1] = hf_bin2hex(in[i] & 0x0F); + } +} diff --git a/xmrstak/net/jpsock.hpp b/xmrstak/net/jpsock.hpp new file mode 100644 index 0000000..4baaade --- /dev/null +++ b/xmrstak/net/jpsock.hpp @@ -0,0 +1,98 @@ +#pragma once +#include <mutex> +#include <atomic> +#include <condition_variable> +#include <thread> +#include <string> + +#include "msgstruct.h" + +/* Our pool can have two kinds of errors: + - Parsing or connection error + Those are fatal errors (we drop the connection if we encounter them). + After they are constructed from const char* strings from various places. + (can be from read-only mem), we passs them in an exectutor message + once the recv thread expires. + - Call error + This error happens when the "server says no". Usually because the job was + outdated, or we somehow got the hash wrong. It isn't fatal. + We parse it in-situ in the network buffer, after that we copy it to a + std::string. Executor will move the buffer via an r-value ref. +*/ +class base_socket; + +class jpsock +{ +public: + jpsock(size_t id, bool tls); + ~jpsock(); + + bool connect(const char* sAddr, std::string& sConnectError); + void disconnect(); + + bool cmd_login(const char* sLogin, const char* sPassword); + bool cmd_submit(const char* sJobId, uint32_t iNonce, const uint8_t* bResult); + + static bool hex2bin(const char* in, unsigned int len, unsigned char* out); + static void bin2hex(const unsigned char* in, unsigned int len, char* out); + + inline bool is_running() { return bRunning; } + inline bool is_logged_in() { return bLoggedIn; } + + std::string&& get_call_error(); + bool have_sock_error() { return bHaveSocketError; } + + inline static uint64_t t32_to_t64(uint32_t t) { return 0xFFFFFFFFFFFFFFFFULL / (0xFFFFFFFFULL / ((uint64_t)t)); } + inline static uint64_t t64_to_diff(uint64_t t) { return 0xFFFFFFFFFFFFFFFFULL / t; } + inline static uint64_t diff_to_t64(uint64_t d) { return 0xFFFFFFFFFFFFFFFFULL / d; } + + inline uint64_t get_current_diff() { return iJobDiff; } + + bool get_current_job(pool_job& job); + + size_t pool_id; + + bool set_socket_error(const char* a); + bool set_socket_error(const char* a, const char* b); + bool set_socket_error(const char* a, size_t len); + bool set_socket_error_strerr(const char* a); + bool set_socket_error_strerr(const char* a, int res); + +private: + std::atomic<bool> bRunning; + std::atomic<bool> bLoggedIn; + + uint8_t* bJsonRecvMem; + uint8_t* bJsonParseMem; + uint8_t* bJsonCallMem; + + static constexpr size_t iJsonMemSize = 4096; + static constexpr size_t iSockBufferSize = 4096; + + struct call_rsp; + struct opaque_private; + struct opq_json_val; + + void jpsock_thread(); + bool jpsock_thd_main(); + bool process_line(char* line, size_t len); + bool process_pool_job(const opq_json_val* params); + bool cmd_ret_wait(const char* sPacket, opq_json_val& poResult); + + char sMinerId[64]; + std::atomic<uint64_t> iJobDiff; + + std::string sSocketError; + std::atomic<bool> bHaveSocketError; + + std::mutex call_mutex; + std::condition_variable call_cond; + std::thread* oRecvThd; + + std::mutex job_mutex; + pool_job oCurrentJob; + + opaque_private* prv; + base_socket* sck; +}; + diff --git a/xmrstak/net/msgstruct.hpp b/xmrstak/net/msgstruct.hpp new file mode 100644 index 0000000..f3a39b2 --- /dev/null +++ b/xmrstak/net/msgstruct.hpp @@ -0,0 +1,136 @@ +#pragma once +#include <string> +#include <string.h> +#include <assert.h> + +// Structures that we use to pass info between threads constructors are here just to make +// the stack allocation take up less space, heap is a shared resouce that needs locks too of course + +struct pool_job +{ + char sJobID[64]; + uint8_t bWorkBlob[112]; + uint64_t iTarget; + // \todo remove workaround needed for amd + uint32_t iTarget32; + uint32_t iWorkLen; + uint32_t iResumeCnt; + + pool_job() : iWorkLen(0), iResumeCnt(0) {} + pool_job(const char* sJobID, uint64_t iTarget, const uint8_t* bWorkBlob, uint32_t iWorkLen) : + iTarget(iTarget), iWorkLen(iWorkLen), iResumeCnt(0) + { + assert(iWorkLen <= sizeof(pool_job::bWorkBlob)); + memcpy(this->sJobID, sJobID, sizeof(pool_job::sJobID)); + memcpy(this->bWorkBlob, bWorkBlob, iWorkLen); + } +}; + +struct job_result +{ + uint8_t bResult[32]; + char sJobID[64]; + uint32_t iNonce; + + job_result() {} + job_result(const char* sJobID, uint32_t iNonce, const uint8_t* bResult) : iNonce(iNonce) + { + memcpy(this->sJobID, sJobID, sizeof(job_result::sJobID)); + memcpy(this->bResult, bResult, sizeof(job_result::bResult)); + } +}; + + +enum ex_event_name { EV_INVALID_VAL, EV_SOCK_READY, EV_SOCK_ERROR, + EV_POOL_HAVE_JOB, EV_MINER_HAVE_RESULT, EV_PERF_TICK, EV_RECONNECT, + EV_SWITCH_POOL, EV_DEV_POOL_EXIT, EV_USR_HASHRATE, EV_USR_RESULTS, EV_USR_CONNSTAT, + EV_HASHRATE_LOOP, EV_HTML_HASHRATE, EV_HTML_RESULTS, EV_HTML_CONNSTAT, EV_HTML_JSON }; + +/* + This is how I learned to stop worrying and love c++11 =). + Ghosts of endless heap allocations have finally been exorcised. Thanks + to the nifty magic of move semantics, string will only be allocated + once on the heap. Considering that it makes a jorney across stack, + heap alloced queue, to another stack before being finally processed + I think it is kind of nifty, don't you? + Also note that for non-arg events we only copy two qwords +*/ + +struct ex_event +{ + ex_event_name iName; + size_t iPoolId; + + union + { + pool_job oPoolJob; + job_result oJobResult; + std::string sSocketError; + }; + + ex_event() { iName = EV_INVALID_VAL; iPoolId = 0;} + ex_event(std::string&& err, size_t id) : iName(EV_SOCK_ERROR), iPoolId(id), sSocketError(std::move(err)) { } + ex_event(job_result dat, size_t id) : iName(EV_MINER_HAVE_RESULT), iPoolId(id), oJobResult(dat) {} + ex_event(pool_job dat, size_t id) : iName(EV_POOL_HAVE_JOB), iPoolId(id), oPoolJob(dat) {} + ex_event(ex_event_name ev, size_t id = 0) : iName(ev), iPoolId(id) {} + + // Delete the copy operators to make sure we are moving only what is needed + ex_event(ex_event const&) = delete; + ex_event& operator=(ex_event const&) = delete; + + ex_event(ex_event&& from) + { + iName = from.iName; + iPoolId = from.iPoolId; + + switch(iName) + { + case EV_SOCK_ERROR: + new (&sSocketError) std::string(std::move(from.sSocketError)); + break; + case EV_MINER_HAVE_RESULT: + oJobResult = from.oJobResult; + break; + case EV_POOL_HAVE_JOB: + oPoolJob = from.oPoolJob; + break; + default: + break; + } + } + + ex_event& operator=(ex_event&& from) + { + assert(this != &from); + + if(iName == EV_SOCK_ERROR) + sSocketError.~basic_string(); + + iName = from.iName; + iPoolId = from.iPoolId; + + switch(iName) + { + case EV_SOCK_ERROR: + new (&sSocketError) std::string(); + sSocketError = std::move(from.sSocketError); + break; + case EV_MINER_HAVE_RESULT: + oJobResult = from.oJobResult; + break; + case EV_POOL_HAVE_JOB: + oPoolJob = from.oPoolJob; + break; + default: + break; + } + + return *this; + } + + ~ex_event() + { + if(iName == EV_SOCK_ERROR) + sSocketError.~basic_string(); + } +}; diff --git a/xmrstak/net/socket.cpp b/xmrstak/net/socket.cpp new file mode 100644 index 0000000..52f46b5 --- /dev/null +++ b/xmrstak/net/socket.cpp @@ -0,0 +1,366 @@ +/* + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * Additional permission under GNU GPL version 3 section 7 + * + * If you modify this Program, or any covered work, by linking or combining + * it with OpenSSL (or a modified version of that library), containing parts + * covered by the terms of OpenSSL License and SSLeay License, the licensors + * of this Program grant you additional permission to convey the resulting work. + * + */ + +#include "socket.h" +#include "jpsock.h" +#include "jconf.h" +#include "console.h" +#include "executor.h" + +#ifndef CONF_NO_TLS +#include <openssl/ssl.h> +#include <openssl/err.h> +#include <openssl/opensslconf.h> + +#ifndef OPENSSL_THREADS +#error OpenSSL was compiled without thread support +#endif +#endif + +plain_socket::plain_socket(jpsock* err_callback) : pCallback(err_callback) +{ + hSocket = INVALID_SOCKET; + pSockAddr = nullptr; +} + +bool plain_socket::set_hostname(const char* sAddr) +{ + char sAddrMb[256]; + char *sTmp, *sPort; + + size_t ln = strlen(sAddr); + if (ln >= sizeof(sAddrMb)) + return pCallback->set_socket_error("CONNECT error: Pool address overflow."); + + memcpy(sAddrMb, sAddr, ln); + sAddrMb[ln] = '\0'; + + if ((sTmp = strstr(sAddrMb, "//")) != nullptr) + { + sTmp += 2; + memmove(sAddrMb, sTmp, strlen(sTmp) + 1); + } + + if ((sPort = strchr(sAddrMb, ':')) == nullptr) + return pCallback->set_socket_error("CONNECT error: Pool port number not specified, please use format <hostname>:<port>."); + + sPort[0] = '\0'; + sPort++; + + addrinfo hints = { 0 }; + hints.ai_family = AF_UNSPEC; + hints.ai_socktype = SOCK_STREAM; + hints.ai_protocol = IPPROTO_TCP; + + pAddrRoot = nullptr; + int err; + if ((err = getaddrinfo(sAddrMb, sPort, &hints, &pAddrRoot)) != 0) + return pCallback->set_socket_error_strerr("CONNECT error: GetAddrInfo: ", err); + + addrinfo *ptr = pAddrRoot; + std::vector<addrinfo*> ipv4; + std::vector<addrinfo*> ipv6; + + while (ptr != nullptr) + { + if (ptr->ai_family == AF_INET) + ipv4.push_back(ptr); + if (ptr->ai_family == AF_INET6) + ipv6.push_back(ptr); + ptr = ptr->ai_next; + } + + if (ipv4.empty() && ipv6.empty()) + { + freeaddrinfo(pAddrRoot); + pAddrRoot = nullptr; + return pCallback->set_socket_error("CONNECT error: I found some DNS records but no IPv4 or IPv6 addresses."); + } + else if (!ipv4.empty() && ipv6.empty()) + pSockAddr = ipv4[rand() % ipv4.size()]; + else if (ipv4.empty() && !ipv6.empty()) + pSockAddr = ipv6[rand() % ipv6.size()]; + else if (!ipv4.empty() && !ipv6.empty()) + { + if(jconf::inst()->PreferIpv4()) + pSockAddr = ipv4[rand() % ipv4.size()]; + else + pSockAddr = ipv6[rand() % ipv6.size()]; + } + + hSocket = socket(pSockAddr->ai_family, pSockAddr->ai_socktype, pSockAddr->ai_protocol); + + if (hSocket == INVALID_SOCKET) + { + freeaddrinfo(pAddrRoot); + pAddrRoot = nullptr; + return pCallback->set_socket_error_strerr("CONNECT error: Socket creation failed "); + } + + return true; +} + +bool plain_socket::connect() +{ + int ret = ::connect(hSocket, pSockAddr->ai_addr, (int)pSockAddr->ai_addrlen); + + freeaddrinfo(pAddrRoot); + pAddrRoot = nullptr; + + if (ret != 0) + return pCallback->set_socket_error_strerr("CONNECT error: "); + else + return true; +} + +int plain_socket::recv(char* buf, unsigned int len) +{ + int ret = ::recv(hSocket, buf, len, 0); + + if(ret == 0) + pCallback->set_socket_error("RECEIVE error: socket closed"); + if(ret == SOCKET_ERROR || ret < 0) + pCallback->set_socket_error_strerr("RECEIVE error: "); + + return ret; +} + +bool plain_socket::send(const char* buf) +{ + int pos = 0, slen = strlen(buf); + while (pos != slen) + { + int ret = ::send(hSocket, buf + pos, slen - pos, 0); + if (ret == SOCKET_ERROR) + { + pCallback->set_socket_error_strerr("SEND error: "); + return false; + } + else + pos += ret; + } + + return true; +} + +void plain_socket::close(bool free) +{ + if(hSocket != INVALID_SOCKET) + { + sock_close(hSocket); + hSocket = INVALID_SOCKET; + } +} + +#ifndef CONF_NO_TLS +tls_socket::tls_socket(jpsock* err_callback) : pCallback(err_callback) +{ +} + +void tls_socket::print_error() +{ + BIO* err_bio = BIO_new(BIO_s_mem()); + ERR_print_errors(err_bio); + + char *buf = nullptr; + size_t len = BIO_get_mem_data(err_bio, &buf); + + pCallback->set_socket_error(buf, len); + + BIO_free(err_bio); +} + +void tls_socket::init_ctx() +{ + const SSL_METHOD* method = SSLv23_method(); + + if(method == nullptr) + return; + + ctx = SSL_CTX_new(method); + if(ctx == nullptr) + return; + + if(jconf::inst()->TlsSecureAlgos()) + { + SSL_CTX_set_options(ctx, SSL_OP_NO_SSLv2 | SSL_OP_NO_SSLv3 | SSL_OP_NO_TLSv1 | SSL_OP_NO_COMPRESSION); + } +} + +bool tls_socket::set_hostname(const char* sAddr) +{ + if(ctx == nullptr) + { + init_ctx(); + if(ctx == nullptr) + { + print_error(); + return false; + } + } + + if((bio = BIO_new_ssl_connect(ctx)) == nullptr) + { + print_error(); + return false; + } + + if(BIO_set_conn_hostname(bio, sAddr) != 1) + { + print_error(); + return false; + } + + BIO_get_ssl(bio, &ssl); + if(ssl == nullptr) + { + print_error(); + return false; + } + + if(jconf::inst()->TlsSecureAlgos()) + { + if(SSL_set_cipher_list(ssl, "HIGH:!aNULL:!kRSA:!PSK:!SRP:!MD5:!RC4:!SHA1") != 1) + { + print_error(); + return false; + } + } + + return true; +} + +bool tls_socket::connect() +{ + if(BIO_do_connect(bio) != 1) + { + print_error(); + return false; + } + + if(BIO_do_handshake(bio) != 1) + { + print_error(); + return false; + } + + /* Step 1: verify a server certificate was presented during the negotiation */ + X509* cert = SSL_get_peer_certificate(ssl); + if(cert == nullptr) + { + print_error(); + return false; + } + + const EVP_MD* digest; + unsigned char md[EVP_MAX_MD_SIZE]; + unsigned int dlen; + + digest = EVP_get_digestbyname("sha256"); + if(digest == nullptr) + { + print_error(); + return false; + } + + if(X509_digest(cert, digest, md, &dlen) != 1) + { + X509_free(cert); + print_error(); + return false; + } + + if(pCallback->pool_id != executor::dev_pool_id) + { + //Base64 encode digest + BIO *bmem, *b64; + b64 = BIO_new(BIO_f_base64()); + bmem = BIO_new(BIO_s_mem()); + + BIO_puts(bmem, "SHA256:"); + b64 = BIO_push(b64, bmem); + BIO_set_flags(b64, BIO_FLAGS_BASE64_NO_NL); + BIO_write(b64, md, dlen); + BIO_flush(b64); + + const char* conf_md = jconf::inst()->GetTlsFingerprint(); + char *b64_md = nullptr; + size_t b64_len = BIO_get_mem_data(bmem, &b64_md); + + if(strlen(conf_md) == 0) + { + printer::inst()->print_msg(L1, "Server fingerprint: %.*s", (int)b64_len, b64_md); + } + else if(strncmp(b64_md, conf_md, b64_len) != 0) + { + printer::inst()->print_msg(L0, "FINGERPRINT FAILED CHECK: %.*s was given, %s was configured", + (int)b64_len, b64_md, conf_md); + + pCallback->set_socket_error("FINGERPRINT FAILED CHECK"); + BIO_free_all(b64); + X509_free(cert); + return false; + } + + BIO_free_all(b64); + } + + X509_free(cert); + return true; +} + +int tls_socket::recv(char* buf, unsigned int len) +{ + int ret = BIO_read(bio, buf, len); + + if(ret == 0) + pCallback->set_socket_error("RECEIVE error: socket closed"); + if(ret < 0) + print_error(); + + return ret; +} + +bool tls_socket::send(const char* buf) +{ + return BIO_puts(bio, buf) > 0; +} + +void tls_socket::close(bool free) +{ + if(bio == nullptr || ssl == nullptr) + return; + + if(!free) + { + sock_close(BIO_get_fd(bio, nullptr)); + } + else + { + BIO_free_all(bio); + ssl = nullptr; + bio = nullptr; + } +} +#endif + diff --git a/xmrstak/net/socket.hpp b/xmrstak/net/socket.hpp new file mode 100644 index 0000000..94bbf03 --- /dev/null +++ b/xmrstak/net/socket.hpp @@ -0,0 +1,57 @@ +#pragma once +#include "socks.h" +class jpsock; + +class base_socket +{ +public: + virtual bool set_hostname(const char* sAddr) = 0; + virtual bool connect() = 0; + virtual int recv(char* buf, unsigned int len) = 0; + virtual bool send(const char* buf) = 0; + virtual void close(bool free) = 0; +}; + +class plain_socket : public base_socket +{ +public: + plain_socket(jpsock* err_callback); + + bool set_hostname(const char* sAddr); + bool connect(); + int recv(char* buf, unsigned int len); + bool send(const char* buf); + void close(bool free); + +private: + jpsock* pCallback; + addrinfo *pSockAddr; + addrinfo *pAddrRoot; + SOCKET hSocket; +}; + +typedef struct ssl_ctx_st SSL_CTX; +typedef struct bio_st BIO; +typedef struct ssl_st SSL; + +class tls_socket : public base_socket +{ +public: + tls_socket(jpsock* err_callback); + + bool set_hostname(const char* sAddr); + bool connect(); + int recv(char* buf, unsigned int len); + bool send(const char* buf); + void close(bool free); + +private: + void init_ctx(); + void print_error(); + + jpsock* pCallback; + + SSL_CTX* ctx = nullptr; + BIO* bio = nullptr; + SSL* ssl = nullptr; +}; diff --git a/xmrstak/net/socks.hpp b/xmrstak/net/socks.hpp new file mode 100644 index 0000000..82bfa2f --- /dev/null +++ b/xmrstak/net/socks.hpp @@ -0,0 +1,97 @@ +#pragma once +#ifdef _WIN32 +#ifndef _WIN32_WINNT +#define _WIN32_WINNT 0x0601 /* Windows 7 */ +#endif +#include <winsock2.h> +#include <ws2tcpip.h> +#include <windows.h> + +inline void sock_init() +{ + static bool bWSAInit = false; + + if (!bWSAInit) + { + WSADATA wsaData; + WSAStartup(MAKEWORD(2, 2), &wsaData); + bWSAInit = true; + } +} + +inline void sock_close(SOCKET s) +{ + shutdown(s, SD_BOTH); + closesocket(s); +} + +inline const char* sock_strerror(char* buf, size_t len) +{ + buf[0] = '\0'; + + FormatMessageA( + FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS | FORMAT_MESSAGE_MAX_WIDTH_MASK, + NULL, WSAGetLastError(), + MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), + (LPSTR)buf, len, NULL); + + return buf; +} + +inline const char* sock_gai_strerror(int err, char* buf, size_t len) +{ + buf[0] = '\0'; + + FormatMessageA( + FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS | FORMAT_MESSAGE_MAX_WIDTH_MASK, + NULL, (DWORD)err, + MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), + (LPSTR)buf, len, NULL); + + return buf; +} + +#else + +/* Assume that any non-Windows platform uses POSIX-style sockets instead. */ +#include <sys/socket.h> +#include <arpa/inet.h> +#include <netdb.h> /* Needed for getaddrinfo() and freeaddrinfo() */ +#include <unistd.h> /* Needed for close() */ +#include <errno.h> +#include <string.h> +#if defined(__FreeBSD__) +#include <netinet/in.h> /* Needed for IPPROTO_TCP */ +#endif + +inline void sock_init() {} +typedef int SOCKET; + +#define INVALID_SOCKET (-1) +#define SOCKET_ERROR (-1) + +inline void sock_close(SOCKET s) +{ + shutdown(s, SHUT_RDWR); + close(s); +} + +inline const char* sock_strerror(char* buf, size_t len) +{ + buf[0] = '\0'; + +#if defined(__APPLE__) || defined(__FreeBSD__) || !defined(_GNU_SOURCE) || !defined(__GLIBC__) + + strerror_r(errno, buf, len); + return buf; +#else + return strerror_r(errno, buf, len); +#endif +} + +inline const char* sock_gai_strerror(int err, char* buf, size_t len) +{ + buf[0] = '\0'; + return gai_strerror(err); +} +#endif diff --git a/xmrstak/params.hpp b/xmrstak/params.hpp new file mode 100644 index 0000000..92d0da0 --- /dev/null +++ b/xmrstak/params.hpp @@ -0,0 +1,45 @@ +#pragma once +#include <string> +#include "Environment.hpp" + +namespace xmrstak +{ + +struct Params +{ + + static inline Params& inst() + { + auto& env = Environment::inst(); + if(env.pParams == nullptr) + env.pParams = new Params; + return *env.pParams; + } + + std::string executablePrefix; + bool useAMD; + bool useNVIDIA; + bool useCPU; + + std::string poolURL; + std::string poolPasswd; + std::string poolUsername; + + std::string configFile; + std::string configFileAMD; + std::string configFileNVIDIA; + std::string configFileCPU; + + Params() : + useAMD(true), + useNVIDIA(true), + useCPU(true), + configFile("config.txt"), + configFileAMD("amd.txt"), + configFileCPU("cpu.txt"), + configFileNVIDIA("nvidia.txt") + {} + +}; + +} // namepsace xmrstak diff --git a/xmrstak/rapidjson/allocators.h b/xmrstak/rapidjson/allocators.h new file mode 100644 index 0000000..98affe0 --- /dev/null +++ b/xmrstak/rapidjson/allocators.h @@ -0,0 +1,271 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_ALLOCATORS_H_ +#define RAPIDJSON_ALLOCATORS_H_ + +#include "rapidjson.h" + +RAPIDJSON_NAMESPACE_BEGIN + +/////////////////////////////////////////////////////////////////////////////// +// Allocator + +/*! \class rapidjson::Allocator + \brief Concept for allocating, resizing and freeing memory block. + + Note that Malloc() and Realloc() are non-static but Free() is static. + + So if an allocator need to support Free(), it needs to put its pointer in + the header of memory block. + +\code +concept Allocator { + static const bool kNeedFree; //!< Whether this allocator needs to call Free(). + + // Allocate a memory block. + // \param size of the memory block in bytes. + // \returns pointer to the memory block. + void* Malloc(size_t size); + + // Resize a memory block. + // \param originalPtr The pointer to current memory block. Null pointer is permitted. + // \param originalSize The current size in bytes. (Design issue: since some allocator may not book-keep this, explicitly pass to it can save memory.) + // \param newSize the new size in bytes. + void* Realloc(void* originalPtr, size_t originalSize, size_t newSize); + + // Free a memory block. + // \param pointer to the memory block. Null pointer is permitted. + static void Free(void *ptr); +}; +\endcode +*/ + +/////////////////////////////////////////////////////////////////////////////// +// CrtAllocator + +//! C-runtime library allocator. +/*! This class is just wrapper for standard C library memory routines. + \note implements Allocator concept +*/ +class CrtAllocator { +public: + static const bool kNeedFree = true; + void* Malloc(size_t size) { + if (size) // behavior of malloc(0) is implementation defined. + return std::malloc(size); + else + return NULL; // standardize to returning NULL. + } + void* Realloc(void* originalPtr, size_t originalSize, size_t newSize) { + (void)originalSize; + if (newSize == 0) { + std::free(originalPtr); + return NULL; + } + return std::realloc(originalPtr, newSize); + } + static void Free(void *ptr) { std::free(ptr); } +}; + +/////////////////////////////////////////////////////////////////////////////// +// MemoryPoolAllocator + +//! Default memory allocator used by the parser and DOM. +/*! This allocator allocate memory blocks from pre-allocated memory chunks. + + It does not free memory blocks. And Realloc() only allocate new memory. + + The memory chunks are allocated by BaseAllocator, which is CrtAllocator by default. + + User may also supply a buffer as the first chunk. + + If the user-buffer is full then additional chunks are allocated by BaseAllocator. + + The user-buffer is not deallocated by this allocator. + + \tparam BaseAllocator the allocator type for allocating memory chunks. Default is CrtAllocator. + \note implements Allocator concept +*/ +template <typename BaseAllocator = CrtAllocator> +class MemoryPoolAllocator { +public: + static const bool kNeedFree = false; //!< Tell users that no need to call Free() with this allocator. (concept Allocator) + + //! Constructor with chunkSize. + /*! \param chunkSize The size of memory chunk. The default is kDefaultChunkSize. + \param baseAllocator The allocator for allocating memory chunks. + */ + MemoryPoolAllocator(size_t chunkSize = kDefaultChunkCapacity, BaseAllocator* baseAllocator = 0) : + chunkHead_(0), chunk_capacity_(chunkSize), userBuffer_(0), baseAllocator_(baseAllocator), ownBaseAllocator_(0) + { + } + + //! Constructor with user-supplied buffer. + /*! The user buffer will be used firstly. When it is full, memory pool allocates new chunk with chunk size. + + The user buffer will not be deallocated when this allocator is destructed. + + \param buffer User supplied buffer. + \param size Size of the buffer in bytes. It must at least larger than sizeof(ChunkHeader). + \param chunkSize The size of memory chunk. The default is kDefaultChunkSize. + \param baseAllocator The allocator for allocating memory chunks. + */ + MemoryPoolAllocator(void *buffer, size_t size, size_t chunkSize = kDefaultChunkCapacity, BaseAllocator* baseAllocator = 0) : + chunkHead_(0), chunk_capacity_(chunkSize), userBuffer_(buffer), baseAllocator_(baseAllocator), ownBaseAllocator_(0) + { + RAPIDJSON_ASSERT(buffer != 0); + RAPIDJSON_ASSERT(size > sizeof(ChunkHeader)); + chunkHead_ = reinterpret_cast<ChunkHeader*>(buffer); + chunkHead_->capacity = size - sizeof(ChunkHeader); + chunkHead_->size = 0; + chunkHead_->next = 0; + } + + //! Destructor. + /*! This deallocates all memory chunks, excluding the user-supplied buffer. + */ + ~MemoryPoolAllocator() { + Clear(); + RAPIDJSON_DELETE(ownBaseAllocator_); + } + + //! Deallocates all memory chunks, excluding the user-supplied buffer. + void Clear() { + while (chunkHead_ && chunkHead_ != userBuffer_) { + ChunkHeader* next = chunkHead_->next; + baseAllocator_->Free(chunkHead_); + chunkHead_ = next; + } + if (chunkHead_ && chunkHead_ == userBuffer_) + chunkHead_->size = 0; // Clear user buffer + } + + //! Computes the total capacity of allocated memory chunks. + /*! \return total capacity in bytes. + */ + size_t Capacity() const { + size_t capacity = 0; + for (ChunkHeader* c = chunkHead_; c != 0; c = c->next) + capacity += c->capacity; + return capacity; + } + + //! Computes the memory blocks allocated. + /*! \return total used bytes. + */ + size_t Size() const { + size_t size = 0; + for (ChunkHeader* c = chunkHead_; c != 0; c = c->next) + size += c->size; + return size; + } + + //! Allocates a memory block. (concept Allocator) + void* Malloc(size_t size) { + if (!size) + return NULL; + + size = RAPIDJSON_ALIGN(size); + if (chunkHead_ == 0 || chunkHead_->size + size > chunkHead_->capacity) + if (!AddChunk(chunk_capacity_ > size ? chunk_capacity_ : size)) + return NULL; + + void *buffer = reinterpret_cast<char *>(chunkHead_) + RAPIDJSON_ALIGN(sizeof(ChunkHeader)) + chunkHead_->size; + chunkHead_->size += size; + return buffer; + } + + //! Resizes a memory block (concept Allocator) + void* Realloc(void* originalPtr, size_t originalSize, size_t newSize) { + if (originalPtr == 0) + return Malloc(newSize); + + if (newSize == 0) + return NULL; + + originalSize = RAPIDJSON_ALIGN(originalSize); + newSize = RAPIDJSON_ALIGN(newSize); + + // Do not shrink if new size is smaller than original + if (originalSize >= newSize) + return originalPtr; + + // Simply expand it if it is the last allocation and there is sufficient space + if (originalPtr == reinterpret_cast<char *>(chunkHead_) + RAPIDJSON_ALIGN(sizeof(ChunkHeader)) + chunkHead_->size - originalSize) { + size_t increment = static_cast<size_t>(newSize - originalSize); + if (chunkHead_->size + increment <= chunkHead_->capacity) { + chunkHead_->size += increment; + return originalPtr; + } + } + + // Realloc process: allocate and copy memory, do not free original buffer. + if (void* newBuffer = Malloc(newSize)) { + if (originalSize) + std::memcpy(newBuffer, originalPtr, originalSize); + return newBuffer; + } + else + return NULL; + } + + //! Frees a memory block (concept Allocator) + static void Free(void *ptr) { (void)ptr; } // Do nothing + +private: + //! Copy constructor is not permitted. + MemoryPoolAllocator(const MemoryPoolAllocator& rhs) /* = delete */; + //! Copy assignment operator is not permitted. + MemoryPoolAllocator& operator=(const MemoryPoolAllocator& rhs) /* = delete */; + + //! Creates a new chunk. + /*! \param capacity Capacity of the chunk in bytes. + \return true if success. + */ + bool AddChunk(size_t capacity) { + if (!baseAllocator_) + ownBaseAllocator_ = baseAllocator_ = RAPIDJSON_NEW(BaseAllocator()); + if (ChunkHeader* chunk = reinterpret_cast<ChunkHeader*>(baseAllocator_->Malloc(RAPIDJSON_ALIGN(sizeof(ChunkHeader)) + capacity))) { + chunk->capacity = capacity; + chunk->size = 0; + chunk->next = chunkHead_; + chunkHead_ = chunk; + return true; + } + else + return false; + } + + static const int kDefaultChunkCapacity = 64 * 1024; //!< Default chunk capacity. + + //! Chunk header for perpending to each chunk. + /*! Chunks are stored as a singly linked list. + */ + struct ChunkHeader { + size_t capacity; //!< Capacity of the chunk in bytes (excluding the header itself). + size_t size; //!< Current size of allocated memory in bytes. + ChunkHeader *next; //!< Next chunk in the linked list. + }; + + ChunkHeader *chunkHead_; //!< Head of the chunk linked-list. Only the head chunk serves allocation. + size_t chunk_capacity_; //!< The minimum capacity of chunk when they are allocated. + void *userBuffer_; //!< User supplied buffer. + BaseAllocator* baseAllocator_; //!< base allocator for allocating memory chunks. + BaseAllocator* ownBaseAllocator_; //!< base allocator created by this object. +}; + +RAPIDJSON_NAMESPACE_END + +#endif // RAPIDJSON_ENCODINGS_H_ diff --git a/xmrstak/rapidjson/document.h b/xmrstak/rapidjson/document.h new file mode 100644 index 0000000..895af88 --- /dev/null +++ b/xmrstak/rapidjson/document.h @@ -0,0 +1,2602 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_DOCUMENT_H_ +#define RAPIDJSON_DOCUMENT_H_ + +/*! \file document.h */ + +#include "reader.h" +#include "internal/meta.h" +#include "internal/strfunc.h" +#include "memorystream.h" +#include "encodedstream.h" +#include <new> // placement new +#include <limits> + +RAPIDJSON_DIAG_PUSH +#ifdef _MSC_VER +RAPIDJSON_DIAG_OFF(4127) // conditional expression is constant +RAPIDJSON_DIAG_OFF(4244) // conversion from kXxxFlags to 'uint16_t', possible loss of data +#ifdef _MINWINDEF_ // see: http://stackoverflow.com/questions/22744262/cant-call-stdmax-because-minwindef-h-defines-max +#ifndef NOMINMAX +#pragma push_macro("min") +#pragma push_macro("max") +#undef min +#undef max +#endif +#endif +#endif + +#ifdef __clang__ +RAPIDJSON_DIAG_OFF(padded) +RAPIDJSON_DIAG_OFF(switch-enum) +RAPIDJSON_DIAG_OFF(c++98-compat) +#endif + +#ifdef __GNUC__ +RAPIDJSON_DIAG_OFF(effc++) +#if __GNUC__ >= 6 +RAPIDJSON_DIAG_OFF(terminate) // ignore throwing RAPIDJSON_ASSERT in RAPIDJSON_NOEXCEPT functions +#endif +#endif // __GNUC__ + +#ifndef RAPIDJSON_NOMEMBERITERATORCLASS +#include <iterator> // std::iterator, std::random_access_iterator_tag +#endif + +#if RAPIDJSON_HAS_CXX11_RVALUE_REFS +#include <utility> // std::move +#endif + +RAPIDJSON_NAMESPACE_BEGIN + +// Forward declaration. +template <typename Encoding, typename Allocator> +class GenericValue; + +template <typename Encoding, typename Allocator, typename StackAllocator> +class GenericDocument; + +//! Name-value pair in a JSON object value. +/*! + This class was internal to GenericValue. It used to be a inner struct. + But a compiler (IBM XL C/C++ for AIX) have reported to have problem with that so it moved as a namespace scope struct. + https://code.google.com/p/rapidjson/issues/detail?id=64 +*/ +template <typename Encoding, typename Allocator> +struct GenericMember { + GenericValue<Encoding, Allocator> name; //!< name of member (must be a string) + GenericValue<Encoding, Allocator> value; //!< value of member. +}; + +/////////////////////////////////////////////////////////////////////////////// +// GenericMemberIterator + +#ifndef RAPIDJSON_NOMEMBERITERATORCLASS + +//! (Constant) member iterator for a JSON object value +/*! + \tparam Const Is this a constant iterator? + \tparam Encoding Encoding of the value. (Even non-string values need to have the same encoding in a document) + \tparam Allocator Allocator type for allocating memory of object, array and string. + + This class implements a Random Access Iterator for GenericMember elements + of a GenericValue, see ISO/IEC 14882:2003(E) C++ standard, 24.1 [lib.iterator.requirements]. + + \note This iterator implementation is mainly intended to avoid implicit + conversions from iterator values to \c NULL, + e.g. from GenericValue::FindMember. + + \note Define \c RAPIDJSON_NOMEMBERITERATORCLASS to fall back to a + pointer-based implementation, if your platform doesn't provide + the C++ <iterator> header. + + \see GenericMember, GenericValue::MemberIterator, GenericValue::ConstMemberIterator + */ +template <bool Const, typename Encoding, typename Allocator> +class GenericMemberIterator + : public std::iterator<std::random_access_iterator_tag + , typename internal::MaybeAddConst<Const,GenericMember<Encoding,Allocator> >::Type> { + + friend class GenericValue<Encoding,Allocator>; + template <bool, typename, typename> friend class GenericMemberIterator; + + typedef GenericMember<Encoding,Allocator> PlainType; + typedef typename internal::MaybeAddConst<Const,PlainType>::Type ValueType; + typedef std::iterator<std::random_access_iterator_tag,ValueType> BaseType; + +public: + //! Iterator type itself + typedef GenericMemberIterator Iterator; + //! Constant iterator type + typedef GenericMemberIterator<true,Encoding,Allocator> ConstIterator; + //! Non-constant iterator type + typedef GenericMemberIterator<false,Encoding,Allocator> NonConstIterator; + + //! Pointer to (const) GenericMember + typedef typename BaseType::pointer Pointer; + //! Reference to (const) GenericMember + typedef typename BaseType::reference Reference; + //! Signed integer type (e.g. \c ptrdiff_t) + typedef typename BaseType::difference_type DifferenceType; + + //! Default constructor (singular value) + /*! Creates an iterator pointing to no element. + \note All operations, except for comparisons, are undefined on such values. + */ + GenericMemberIterator() : ptr_() {} + + //! Iterator conversions to more const + /*! + \param it (Non-const) iterator to copy from + + Allows the creation of an iterator from another GenericMemberIterator + that is "less const". Especially, creating a non-constant iterator + from a constant iterator are disabled: + \li const -> non-const (not ok) + \li const -> const (ok) + \li non-const -> const (ok) + \li non-const -> non-const (ok) + + \note If the \c Const template parameter is already \c false, this + constructor effectively defines a regular copy-constructor. + Otherwise, the copy constructor is implicitly defined. + */ + GenericMemberIterator(const NonConstIterator & it) : ptr_(it.ptr_) {} + Iterator& operator=(const NonConstIterator & it) { ptr_ = it.ptr_; return *this; } + + //! @name stepping + //@{ + Iterator& operator++(){ ++ptr_; return *this; } + Iterator& operator--(){ --ptr_; return *this; } + Iterator operator++(int){ Iterator old(*this); ++ptr_; return old; } + Iterator operator--(int){ Iterator old(*this); --ptr_; return old; } + //@} + + //! @name increment/decrement + //@{ + Iterator operator+(DifferenceType n) const { return Iterator(ptr_+n); } + Iterator operator-(DifferenceType n) const { return Iterator(ptr_-n); } + + Iterator& operator+=(DifferenceType n) { ptr_+=n; return *this; } + Iterator& operator-=(DifferenceType n) { ptr_-=n; return *this; } + //@} + + //! @name relations + //@{ + bool operator==(ConstIterator that) const { return ptr_ == that.ptr_; } + bool operator!=(ConstIterator that) const { return ptr_ != that.ptr_; } + bool operator<=(ConstIterator that) const { return ptr_ <= that.ptr_; } + bool operator>=(ConstIterator that) const { return ptr_ >= that.ptr_; } + bool operator< (ConstIterator that) const { return ptr_ < that.ptr_; } + bool operator> (ConstIterator that) const { return ptr_ > that.ptr_; } + //@} + + //! @name dereference + //@{ + Reference operator*() const { return *ptr_; } + Pointer operator->() const { return ptr_; } + Reference operator[](DifferenceType n) const { return ptr_[n]; } + //@} + + //! Distance + DifferenceType operator-(ConstIterator that) const { return ptr_-that.ptr_; } + +private: + //! Internal constructor from plain pointer + explicit GenericMemberIterator(Pointer p) : ptr_(p) {} + + Pointer ptr_; //!< raw pointer +}; + +#else // RAPIDJSON_NOMEMBERITERATORCLASS + +// class-based member iterator implementation disabled, use plain pointers + +template <bool Const, typename Encoding, typename Allocator> +struct GenericMemberIterator; + +//! non-const GenericMemberIterator +template <typename Encoding, typename Allocator> +struct GenericMemberIterator<false,Encoding,Allocator> { + //! use plain pointer as iterator type + typedef GenericMember<Encoding,Allocator>* Iterator; +}; +//! const GenericMemberIterator +template <typename Encoding, typename Allocator> +struct GenericMemberIterator<true,Encoding,Allocator> { + //! use plain const pointer as iterator type + typedef const GenericMember<Encoding,Allocator>* Iterator; +}; + +#endif // RAPIDJSON_NOMEMBERITERATORCLASS + +/////////////////////////////////////////////////////////////////////////////// +// GenericStringRef + +//! Reference to a constant string (not taking a copy) +/*! + \tparam CharType character type of the string + + This helper class is used to automatically infer constant string + references for string literals, especially from \c const \b (!) + character arrays. + + The main use is for creating JSON string values without copying the + source string via an \ref Allocator. This requires that the referenced + string pointers have a sufficient lifetime, which exceeds the lifetime + of the associated GenericValue. + + \b Example + \code + Value v("foo"); // ok, no need to copy & calculate length + const char foo[] = "foo"; + v.SetString(foo); // ok + + const char* bar = foo; + // Value x(bar); // not ok, can't rely on bar's lifetime + Value x(StringRef(bar)); // lifetime explicitly guaranteed by user + Value y(StringRef(bar, 3)); // ok, explicitly pass length + \endcode + + \see StringRef, GenericValue::SetString +*/ +template<typename CharType> +struct GenericStringRef { + typedef CharType Ch; //!< character type of the string + + //! Create string reference from \c const character array +#ifndef __clang__ // -Wdocumentation + /*! + This constructor implicitly creates a constant string reference from + a \c const character array. It has better performance than + \ref StringRef(const CharType*) by inferring the string \ref length + from the array length, and also supports strings containing null + characters. + + \tparam N length of the string, automatically inferred + + \param str Constant character array, lifetime assumed to be longer + than the use of the string in e.g. a GenericValue + + \post \ref s == str + + \note Constant complexity. + \note There is a hidden, private overload to disallow references to + non-const character arrays to be created via this constructor. + By this, e.g. function-scope arrays used to be filled via + \c snprintf are excluded from consideration. + In such cases, the referenced string should be \b copied to the + GenericValue instead. + */ +#endif + template<SizeType N> + GenericStringRef(const CharType (&str)[N]) RAPIDJSON_NOEXCEPT + : s(str), length(N-1) {} + + //! Explicitly create string reference from \c const character pointer +#ifndef __clang__ // -Wdocumentation + /*! + This constructor can be used to \b explicitly create a reference to + a constant string pointer. + + \see StringRef(const CharType*) + + \param str Constant character pointer, lifetime assumed to be longer + than the use of the string in e.g. a GenericValue + + \post \ref s == str + + \note There is a hidden, private overload to disallow references to + non-const character arrays to be created via this constructor. + By this, e.g. function-scope arrays used to be filled via + \c snprintf are excluded from consideration. + In such cases, the referenced string should be \b copied to the + GenericValue instead. + */ +#endif + explicit GenericStringRef(const CharType* str) + : s(str), length(internal::StrLen(str)){ RAPIDJSON_ASSERT(s != 0); } + + //! Create constant string reference from pointer and length +#ifndef __clang__ // -Wdocumentation + /*! \param str constant string, lifetime assumed to be longer than the use of the string in e.g. a GenericValue + \param len length of the string, excluding the trailing NULL terminator + + \post \ref s == str && \ref length == len + \note Constant complexity. + */ +#endif + GenericStringRef(const CharType* str, SizeType len) + : s(str), length(len) { RAPIDJSON_ASSERT(s != 0); } + + GenericStringRef(const GenericStringRef& rhs) : s(rhs.s), length(rhs.length) {} + + //! implicit conversion to plain CharType pointer + operator const Ch *() const { return s; } + + const Ch* const s; //!< plain CharType pointer + const SizeType length; //!< length of the string (excluding the trailing NULL terminator) + +private: + //! Disallow construction from non-const array + template<SizeType N> + GenericStringRef(CharType (&str)[N]) /* = delete */; + //! Copy assignment operator not permitted - immutable type + GenericStringRef& operator=(const GenericStringRef& rhs) /* = delete */; +}; + +//! Mark a character pointer as constant string +/*! Mark a plain character pointer as a "string literal". This function + can be used to avoid copying a character string to be referenced as a + value in a JSON GenericValue object, if the string's lifetime is known + to be valid long enough. + \tparam CharType Character type of the string + \param str Constant string, lifetime assumed to be longer than the use of the string in e.g. a GenericValue + \return GenericStringRef string reference object + \relatesalso GenericStringRef + + \see GenericValue::GenericValue(StringRefType), GenericValue::operator=(StringRefType), GenericValue::SetString(StringRefType), GenericValue::PushBack(StringRefType, Allocator&), GenericValue::AddMember +*/ +template<typename CharType> +inline GenericStringRef<CharType> StringRef(const CharType* str) { + return GenericStringRef<CharType>(str, internal::StrLen(str)); +} + +//! Mark a character pointer as constant string +/*! Mark a plain character pointer as a "string literal". This function + can be used to avoid copying a character string to be referenced as a + value in a JSON GenericValue object, if the string's lifetime is known + to be valid long enough. + + This version has better performance with supplied length, and also + supports string containing null characters. + + \tparam CharType character type of the string + \param str Constant string, lifetime assumed to be longer than the use of the string in e.g. a GenericValue + \param length The length of source string. + \return GenericStringRef string reference object + \relatesalso GenericStringRef +*/ +template<typename CharType> +inline GenericStringRef<CharType> StringRef(const CharType* str, size_t length) { + return GenericStringRef<CharType>(str, SizeType(length)); +} + +#if RAPIDJSON_HAS_STDSTRING +//! Mark a string object as constant string +/*! Mark a string object (e.g. \c std::string) as a "string literal". + This function can be used to avoid copying a string to be referenced as a + value in a JSON GenericValue object, if the string's lifetime is known + to be valid long enough. + + \tparam CharType character type of the string + \param str Constant string, lifetime assumed to be longer than the use of the string in e.g. a GenericValue + \return GenericStringRef string reference object + \relatesalso GenericStringRef + \note Requires the definition of the preprocessor symbol \ref RAPIDJSON_HAS_STDSTRING. +*/ +template<typename CharType> +inline GenericStringRef<CharType> StringRef(const std::basic_string<CharType>& str) { + return GenericStringRef<CharType>(str.data(), SizeType(str.size())); +} +#endif + +/////////////////////////////////////////////////////////////////////////////// +// GenericValue type traits +namespace internal { + +template <typename T, typename Encoding = void, typename Allocator = void> +struct IsGenericValueImpl : FalseType {}; + +// select candidates according to nested encoding and allocator types +template <typename T> struct IsGenericValueImpl<T, typename Void<typename T::EncodingType>::Type, typename Void<typename T::AllocatorType>::Type> + : IsBaseOf<GenericValue<typename T::EncodingType, typename T::AllocatorType>, T>::Type {}; + +// helper to match arbitrary GenericValue instantiations, including derived classes +template <typename T> struct IsGenericValue : IsGenericValueImpl<T>::Type {}; + +} // namespace internal + +/////////////////////////////////////////////////////////////////////////////// +// TypeHelper + +namespace internal { + +template <typename ValueType, typename T> +struct TypeHelper {}; + +template<typename ValueType> +struct TypeHelper<ValueType, bool> { + static bool Is(const ValueType& v) { return v.IsBool(); } + static bool Get(const ValueType& v) { return v.GetBool(); } + static ValueType& Set(ValueType& v, bool data) { return v.SetBool(data); } + static ValueType& Set(ValueType& v, bool data, typename ValueType::AllocatorType&) { return v.SetBool(data); } +}; + +template<typename ValueType> +struct TypeHelper<ValueType, int> { + static bool Is(const ValueType& v) { return v.IsInt(); } + static int Get(const ValueType& v) { return v.GetInt(); } + static ValueType& Set(ValueType& v, int data) { return v.SetInt(data); } + static ValueType& Set(ValueType& v, int data, typename ValueType::AllocatorType&) { return v.SetInt(data); } +}; + +template<typename ValueType> +struct TypeHelper<ValueType, unsigned> { + static bool Is(const ValueType& v) { return v.IsUint(); } + static unsigned Get(const ValueType& v) { return v.GetUint(); } + static ValueType& Set(ValueType& v, unsigned data) { return v.SetUint(data); } + static ValueType& Set(ValueType& v, unsigned data, typename ValueType::AllocatorType&) { return v.SetUint(data); } +}; + +template<typename ValueType> +struct TypeHelper<ValueType, int64_t> { + static bool Is(const ValueType& v) { return v.IsInt64(); } + static int64_t Get(const ValueType& v) { return v.GetInt64(); } + static ValueType& Set(ValueType& v, int64_t data) { return v.SetInt64(data); } + static ValueType& Set(ValueType& v, int64_t data, typename ValueType::AllocatorType&) { return v.SetInt64(data); } +}; + +template<typename ValueType> +struct TypeHelper<ValueType, uint64_t> { + static bool Is(const ValueType& v) { return v.IsUint64(); } + static uint64_t Get(const ValueType& v) { return v.GetUint64(); } + static ValueType& Set(ValueType& v, uint64_t data) { return v.SetUint64(data); } + static ValueType& Set(ValueType& v, uint64_t data, typename ValueType::AllocatorType&) { return v.SetUint64(data); } +}; + +template<typename ValueType> +struct TypeHelper<ValueType, double> { + static bool Is(const ValueType& v) { return v.IsDouble(); } + static double Get(const ValueType& v) { return v.GetDouble(); } + static ValueType& Set(ValueType& v, double data) { return v.SetDouble(data); } + static ValueType& Set(ValueType& v, double data, typename ValueType::AllocatorType&) { return v.SetDouble(data); } +}; + +template<typename ValueType> +struct TypeHelper<ValueType, float> { + static bool Is(const ValueType& v) { return v.IsFloat(); } + static float Get(const ValueType& v) { return v.GetFloat(); } + static ValueType& Set(ValueType& v, float data) { return v.SetFloat(data); } + static ValueType& Set(ValueType& v, float data, typename ValueType::AllocatorType&) { return v.SetFloat(data); } +}; + +template<typename ValueType> +struct TypeHelper<ValueType, const typename ValueType::Ch*> { + typedef const typename ValueType::Ch* StringType; + static bool Is(const ValueType& v) { return v.IsString(); } + static StringType Get(const ValueType& v) { return v.GetString(); } + static ValueType& Set(ValueType& v, const StringType data) { return v.SetString(typename ValueType::StringRefType(data)); } + static ValueType& Set(ValueType& v, const StringType data, typename ValueType::AllocatorType& a) { return v.SetString(data, a); } +}; + +#if RAPIDJSON_HAS_STDSTRING +template<typename ValueType> +struct TypeHelper<ValueType, std::basic_string<typename ValueType::Ch> > { + typedef std::basic_string<typename ValueType::Ch> StringType; + static bool Is(const ValueType& v) { return v.IsString(); } + static StringType Get(const ValueType& v) { return StringType(v.GetString(), v.GetStringLength()); } + static ValueType& Set(ValueType& v, const StringType& data, typename ValueType::AllocatorType& a) { return v.SetString(data, a); } +}; +#endif + +template<typename ValueType> +struct TypeHelper<ValueType, typename ValueType::Array> { + typedef typename ValueType::Array ArrayType; + static bool Is(const ValueType& v) { return v.IsArray(); } + static ArrayType Get(ValueType& v) { return v.GetArray(); } + static ValueType& Set(ValueType& v, ArrayType data) { return v = data; } + static ValueType& Set(ValueType& v, ArrayType data, typename ValueType::AllocatorType&) { return v = data; } +}; + +template<typename ValueType> +struct TypeHelper<ValueType, typename ValueType::ConstArray> { + typedef typename ValueType::ConstArray ArrayType; + static bool Is(const ValueType& v) { return v.IsArray(); } + static ArrayType Get(const ValueType& v) { return v.GetArray(); } +}; + +template<typename ValueType> +struct TypeHelper<ValueType, typename ValueType::Object> { + typedef typename ValueType::Object ObjectType; + static bool Is(const ValueType& v) { return v.IsObject(); } + static ObjectType Get(ValueType& v) { return v.GetObject(); } + static ValueType& Set(ValueType& v, ObjectType data) { return v = data; } + static ValueType& Set(ValueType& v, ObjectType data, typename ValueType::AllocatorType&) { v = data; } +}; + +template<typename ValueType> +struct TypeHelper<ValueType, typename ValueType::ConstObject> { + typedef typename ValueType::ConstObject ObjectType; + static bool Is(const ValueType& v) { return v.IsObject(); } + static ObjectType Get(const ValueType& v) { return v.GetObject(); } +}; + +} // namespace internal + +// Forward declarations +template <bool, typename> class GenericArray; +template <bool, typename> class GenericObject; + +/////////////////////////////////////////////////////////////////////////////// +// GenericValue + +//! Represents a JSON value. Use Value for UTF8 encoding and default allocator. +/*! + A JSON value can be one of 7 types. This class is a variant type supporting + these types. + + Use the Value if UTF8 and default allocator + + \tparam Encoding Encoding of the value. (Even non-string values need to have the same encoding in a document) + \tparam Allocator Allocator type for allocating memory of object, array and string. +*/ +template <typename Encoding, typename Allocator = MemoryPoolAllocator<> > +class GenericValue { +public: + //! Name-value pair in an object. + typedef GenericMember<Encoding, Allocator> Member; + typedef Encoding EncodingType; //!< Encoding type from template parameter. + typedef Allocator AllocatorType; //!< Allocator type from template parameter. + typedef typename Encoding::Ch Ch; //!< Character type derived from Encoding. + typedef GenericStringRef<Ch> StringRefType; //!< Reference to a constant string + typedef typename GenericMemberIterator<false,Encoding,Allocator>::Iterator MemberIterator; //!< Member iterator for iterating in object. + typedef typename GenericMemberIterator<true,Encoding,Allocator>::Iterator ConstMemberIterator; //!< Constant member iterator for iterating in object. + typedef GenericValue* ValueIterator; //!< Value iterator for iterating in array. + typedef const GenericValue* ConstValueIterator; //!< Constant value iterator for iterating in array. + typedef GenericValue<Encoding, Allocator> ValueType; //!< Value type of itself. + typedef GenericArray<false, ValueType> Array; + typedef GenericArray<true, ValueType> ConstArray; + typedef GenericObject<false, ValueType> Object; + typedef GenericObject<true, ValueType> ConstObject; + + //!@name Constructors and destructor. + //@{ + + //! Default constructor creates a null value. + GenericValue() RAPIDJSON_NOEXCEPT : data_() { data_.f.flags = kNullFlag; } + +#if RAPIDJSON_HAS_CXX11_RVALUE_REFS + //! Move constructor in C++11 + GenericValue(GenericValue&& rhs) RAPIDJSON_NOEXCEPT : data_(rhs.data_) { + rhs.data_.f.flags = kNullFlag; // give up contents + } +#endif + +private: + //! Copy constructor is not permitted. + GenericValue(const GenericValue& rhs); + +#if RAPIDJSON_HAS_CXX11_RVALUE_REFS + //! Moving from a GenericDocument is not permitted. + template <typename StackAllocator> + GenericValue(GenericDocument<Encoding,Allocator,StackAllocator>&& rhs); + + //! Move assignment from a GenericDocument is not permitted. + template <typename StackAllocator> + GenericValue& operator=(GenericDocument<Encoding,Allocator,StackAllocator>&& rhs); +#endif + +public: + + //! Constructor with JSON value type. + /*! This creates a Value of specified type with default content. + \param type Type of the value. + \note Default content for number is zero. + */ + explicit GenericValue(Type type) RAPIDJSON_NOEXCEPT : data_() { + static const uint16_t defaultFlags[7] = { + kNullFlag, kFalseFlag, kTrueFlag, kObjectFlag, kArrayFlag, kShortStringFlag, + kNumberAnyFlag + }; + RAPIDJSON_ASSERT(type <= kNumberType); + data_.f.flags = defaultFlags[type]; + + // Use ShortString to store empty string. + if (type == kStringType) + data_.ss.SetLength(0); + } + + //! Explicit copy constructor (with allocator) + /*! Creates a copy of a Value by using the given Allocator + \tparam SourceAllocator allocator of \c rhs + \param rhs Value to copy from (read-only) + \param allocator Allocator for allocating copied elements and buffers. Commonly use GenericDocument::GetAllocator(). + \see CopyFrom() + */ + template <typename SourceAllocator> + GenericValue(const GenericValue<Encoding,SourceAllocator>& rhs, Allocator& allocator) { + switch (rhs.GetType()) { + case kObjectType: { + SizeType count = rhs.data_.o.size; + Member* lm = reinterpret_cast<Member*>(allocator.Malloc(count * sizeof(Member))); + const typename GenericValue<Encoding,SourceAllocator>::Member* rm = rhs.GetMembersPointer(); + for (SizeType i = 0; i < count; i++) { + new (&lm[i].name) GenericValue(rm[i].name, allocator); + new (&lm[i].value) GenericValue(rm[i].value, allocator); + } + data_.f.flags = kObjectFlag; + data_.o.size = data_.o.capacity = count; + SetMembersPointer(lm); + } + break; + case kArrayType: { + SizeType count = rhs.data_.a.size; + GenericValue* le = reinterpret_cast<GenericValue*>(allocator.Malloc(count * sizeof(GenericValue))); + const GenericValue<Encoding,SourceAllocator>* re = rhs.GetElementsPointer(); + for (SizeType i = 0; i < count; i++) + new (&le[i]) GenericValue(re[i], allocator); + data_.f.flags = kArrayFlag; + data_.a.size = data_.a.capacity = count; + SetElementsPointer(le); + } + break; + case kStringType: + if (rhs.data_.f.flags == kConstStringFlag) { + data_.f.flags = rhs.data_.f.flags; + data_ = *reinterpret_cast<const Data*>(&rhs.data_); + } + else + SetStringRaw(StringRef(rhs.GetString(), rhs.GetStringLength()), allocator); + break; + default: + data_.f.flags = rhs.data_.f.flags; + data_ = *reinterpret_cast<const Data*>(&rhs.data_); + break; + } + } + + //! Constructor for boolean value. + /*! \param b Boolean value + \note This constructor is limited to \em real boolean values and rejects + implicitly converted types like arbitrary pointers. Use an explicit cast + to \c bool, if you want to construct a boolean JSON value in such cases. + */ +#ifndef RAPIDJSON_DOXYGEN_RUNNING // hide SFINAE from Doxygen + template <typename T> + explicit GenericValue(T b, RAPIDJSON_ENABLEIF((internal::IsSame<bool, T>))) RAPIDJSON_NOEXCEPT // See #472 +#else + explicit GenericValue(bool b) RAPIDJSON_NOEXCEPT +#endif + : data_() { + // safe-guard against failing SFINAE + RAPIDJSON_STATIC_ASSERT((internal::IsSame<bool,T>::Value)); + data_.f.flags = b ? kTrueFlag : kFalseFlag; + } + + //! Constructor for int value. + explicit GenericValue(int i) RAPIDJSON_NOEXCEPT : data_() { + data_.n.i64 = i; + data_.f.flags = (i >= 0) ? (kNumberIntFlag | kUintFlag | kUint64Flag) : kNumberIntFlag; + } + + //! Constructor for unsigned value. + explicit GenericValue(unsigned u) RAPIDJSON_NOEXCEPT : data_() { + data_.n.u64 = u; + data_.f.flags = (u & 0x80000000) ? kNumberUintFlag : (kNumberUintFlag | kIntFlag | kInt64Flag); + } + + //! Constructor for int64_t value. + explicit GenericValue(int64_t i64) RAPIDJSON_NOEXCEPT : data_() { + data_.n.i64 = i64; + data_.f.flags = kNumberInt64Flag; + if (i64 >= 0) { + data_.f.flags |= kNumberUint64Flag; + if (!(static_cast<uint64_t>(i64) & RAPIDJSON_UINT64_C2(0xFFFFFFFF, 0x00000000))) + data_.f.flags |= kUintFlag; + if (!(static_cast<uint64_t>(i64) & RAPIDJSON_UINT64_C2(0xFFFFFFFF, 0x80000000))) + data_.f.flags |= kIntFlag; + } + else if (i64 >= static_cast<int64_t>(RAPIDJSON_UINT64_C2(0xFFFFFFFF, 0x80000000))) + data_.f.flags |= kIntFlag; + } + + //! Constructor for uint64_t value. + explicit GenericValue(uint64_t u64) RAPIDJSON_NOEXCEPT : data_() { + data_.n.u64 = u64; + data_.f.flags = kNumberUint64Flag; + if (!(u64 & RAPIDJSON_UINT64_C2(0x80000000, 0x00000000))) + data_.f.flags |= kInt64Flag; + if (!(u64 & RAPIDJSON_UINT64_C2(0xFFFFFFFF, 0x00000000))) + data_.f.flags |= kUintFlag; + if (!(u64 & RAPIDJSON_UINT64_C2(0xFFFFFFFF, 0x80000000))) + data_.f.flags |= kIntFlag; + } + + //! Constructor for double value. + explicit GenericValue(double d) RAPIDJSON_NOEXCEPT : data_() { data_.n.d = d; data_.f.flags = kNumberDoubleFlag; } + + //! Constructor for float value. + explicit GenericValue(float f) RAPIDJSON_NOEXCEPT : data_() { data_.n.d = static_cast<double>(f); data_.f.flags = kNumberDoubleFlag; } + + //! Constructor for constant string (i.e. do not make a copy of string) + GenericValue(const Ch* s, SizeType length) RAPIDJSON_NOEXCEPT : data_() { SetStringRaw(StringRef(s, length)); } + + //! Constructor for constant string (i.e. do not make a copy of string) + explicit GenericValue(StringRefType s) RAPIDJSON_NOEXCEPT : data_() { SetStringRaw(s); } + + //! Constructor for copy-string (i.e. do make a copy of string) + GenericValue(const Ch* s, SizeType length, Allocator& allocator) : data_() { SetStringRaw(StringRef(s, length), allocator); } + + //! Constructor for copy-string (i.e. do make a copy of string) + GenericValue(const Ch*s, Allocator& allocator) : data_() { SetStringRaw(StringRef(s), allocator); } + +#if RAPIDJSON_HAS_STDSTRING + //! Constructor for copy-string from a string object (i.e. do make a copy of string) + /*! \note Requires the definition of the preprocessor symbol \ref RAPIDJSON_HAS_STDSTRING. + */ + GenericValue(const std::basic_string<Ch>& s, Allocator& allocator) : data_() { SetStringRaw(StringRef(s), allocator); } +#endif + + //! Constructor for Array. + /*! + \param a An array obtained by \c GetArray(). + \note \c Array is always pass-by-value. + \note the source array is moved into this value and the sourec array becomes empty. + */ + GenericValue(Array a) RAPIDJSON_NOEXCEPT : data_(a.value_.data_) { + a.value_.data_ = Data(); + a.value_.data_.f.flags = kArrayFlag; + } + + //! Constructor for Object. + /*! + \param o An object obtained by \c GetObject(). + \note \c Object is always pass-by-value. + \note the source object is moved into this value and the sourec object becomes empty. + */ + GenericValue(Object o) RAPIDJSON_NOEXCEPT : data_(o.value_.data_) { + o.value_.data_ = Data(); + o.value_.data_.f.flags = kObjectFlag; + } + + //! Destructor. + /*! Need to destruct elements of array, members of object, or copy-string. + */ + ~GenericValue() { + if (Allocator::kNeedFree) { // Shortcut by Allocator's trait + switch(data_.f.flags) { + case kArrayFlag: + { + GenericValue* e = GetElementsPointer(); + for (GenericValue* v = e; v != e + data_.a.size; ++v) + v->~GenericValue(); + Allocator::Free(e); + } + break; + + case kObjectFlag: + for (MemberIterator m = MemberBegin(); m != MemberEnd(); ++m) + m->~Member(); + Allocator::Free(GetMembersPointer()); + break; + + case kCopyStringFlag: + Allocator::Free(const_cast<Ch*>(GetStringPointer())); + break; + + default: + break; // Do nothing for other types. + } + } + } + + //@} + + //!@name Assignment operators + //@{ + + //! Assignment with move semantics. + /*! \param rhs Source of the assignment. It will become a null value after assignment. + */ + GenericValue& operator=(GenericValue& rhs) RAPIDJSON_NOEXCEPT { + RAPIDJSON_ASSERT(this != &rhs); + this->~GenericValue(); + RawAssign(rhs); + return *this; + } + +#if RAPIDJSON_HAS_CXX11_RVALUE_REFS + //! Move assignment in C++11 + GenericValue& operator=(GenericValue&& rhs) RAPIDJSON_NOEXCEPT { + return *this = rhs.Move(); + } +#endif + + //! Assignment of constant string reference (no copy) + /*! \param str Constant string reference to be assigned + \note This overload is needed to avoid clashes with the generic primitive type assignment overload below. + \see GenericStringRef, operator=(T) + */ + GenericValue& operator=(StringRefType str) RAPIDJSON_NOEXCEPT { + GenericValue s(str); + return *this = s; + } + + //! Assignment with primitive types. + /*! \tparam T Either \ref Type, \c int, \c unsigned, \c int64_t, \c uint64_t + \param value The value to be assigned. + + \note The source type \c T explicitly disallows all pointer types, + especially (\c const) \ref Ch*. This helps avoiding implicitly + referencing character strings with insufficient lifetime, use + \ref SetString(const Ch*, Allocator&) (for copying) or + \ref StringRef() (to explicitly mark the pointer as constant) instead. + All other pointer types would implicitly convert to \c bool, + use \ref SetBool() instead. + */ + template <typename T> + RAPIDJSON_DISABLEIF_RETURN((internal::IsPointer<T>), (GenericValue&)) + operator=(T value) { + GenericValue v(value); + return *this = v; + } + + //! Deep-copy assignment from Value + /*! Assigns a \b copy of the Value to the current Value object + \tparam SourceAllocator Allocator type of \c rhs + \param rhs Value to copy from (read-only) + \param allocator Allocator to use for copying + */ + template <typename SourceAllocator> + GenericValue& CopyFrom(const GenericValue<Encoding, SourceAllocator>& rhs, Allocator& allocator) { + RAPIDJSON_ASSERT(static_cast<void*>(this) != static_cast<void const*>(&rhs)); + this->~GenericValue(); + new (this) GenericValue(rhs, allocator); + return *this; + } + + //! Exchange the contents of this value with those of other. + /*! + \param other Another value. + \note Constant complexity. + */ + GenericValue& Swap(GenericValue& other) RAPIDJSON_NOEXCEPT { + GenericValue temp; + temp.RawAssign(*this); + RawAssign(other); + other.RawAssign(temp); + return *this; + } + + //! free-standing swap function helper + /*! + Helper function to enable support for common swap implementation pattern based on \c std::swap: + \code + void swap(MyClass& a, MyClass& b) { + using std::swap; + swap(a.value, b.value); + // ... + } + \endcode + \see Swap() + */ + friend inline void swap(GenericValue& a, GenericValue& b) RAPIDJSON_NOEXCEPT { a.Swap(b); } + + //! Prepare Value for move semantics + /*! \return *this */ + GenericValue& Move() RAPIDJSON_NOEXCEPT { return *this; } + //@} + + //!@name Equal-to and not-equal-to operators + //@{ + //! Equal-to operator + /*! + \note If an object contains duplicated named member, comparing equality with any object is always \c false. + \note Linear time complexity (number of all values in the subtree and total lengths of all strings). + */ + template <typename SourceAllocator> + bool operator==(const GenericValue<Encoding, SourceAllocator>& rhs) const { + typedef GenericValue<Encoding, SourceAllocator> RhsType; + if (GetType() != rhs.GetType()) + return false; + + switch (GetType()) { + case kObjectType: // Warning: O(n^2) inner-loop + if (data_.o.size != rhs.data_.o.size) + return false; + for (ConstMemberIterator lhsMemberItr = MemberBegin(); lhsMemberItr != MemberEnd(); ++lhsMemberItr) { + typename RhsType::ConstMemberIterator rhsMemberItr = rhs.FindMember(lhsMemberItr->name); + if (rhsMemberItr == rhs.MemberEnd() || lhsMemberItr->value != rhsMemberItr->value) + return false; + } + return true; + + case kArrayType: + if (data_.a.size != rhs.data_.a.size) + return false; + for (SizeType i = 0; i < data_.a.size; i++) + if ((*this)[i] != rhs[i]) + return false; + return true; + + case kStringType: + return StringEqual(rhs); + + case kNumberType: + if (IsDouble() || rhs.IsDouble()) { + double a = GetDouble(); // May convert from integer to double. + double b = rhs.GetDouble(); // Ditto + return a >= b && a <= b; // Prevent -Wfloat-equal + } + else + return data_.n.u64 == rhs.data_.n.u64; + + default: + return true; + } + } + + //! Equal-to operator with const C-string pointer + bool operator==(const Ch* rhs) const { return *this == GenericValue(StringRef(rhs)); } + +#if RAPIDJSON_HAS_STDSTRING + //! Equal-to operator with string object + /*! \note Requires the definition of the preprocessor symbol \ref RAPIDJSON_HAS_STDSTRING. + */ + bool operator==(const std::basic_string<Ch>& rhs) const { return *this == GenericValue(StringRef(rhs)); } +#endif + + //! Equal-to operator with primitive types + /*! \tparam T Either \ref Type, \c int, \c unsigned, \c int64_t, \c uint64_t, \c double, \c true, \c false + */ + template <typename T> RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T>,internal::IsGenericValue<T> >), (bool)) operator==(const T& rhs) const { return *this == GenericValue(rhs); } + + //! Not-equal-to operator + /*! \return !(*this == rhs) + */ + template <typename SourceAllocator> + bool operator!=(const GenericValue<Encoding, SourceAllocator>& rhs) const { return !(*this == rhs); } + + //! Not-equal-to operator with const C-string pointer + bool operator!=(const Ch* rhs) const { return !(*this == rhs); } + + //! Not-equal-to operator with arbitrary types + /*! \return !(*this == rhs) + */ + template <typename T> RAPIDJSON_DISABLEIF_RETURN((internal::IsGenericValue<T>), (bool)) operator!=(const T& rhs) const { return !(*this == rhs); } + + //! Equal-to operator with arbitrary types (symmetric version) + /*! \return (rhs == lhs) + */ + template <typename T> friend RAPIDJSON_DISABLEIF_RETURN((internal::IsGenericValue<T>), (bool)) operator==(const T& lhs, const GenericValue& rhs) { return rhs == lhs; } + + //! Not-Equal-to operator with arbitrary types (symmetric version) + /*! \return !(rhs == lhs) + */ + template <typename T> friend RAPIDJSON_DISABLEIF_RETURN((internal::IsGenericValue<T>), (bool)) operator!=(const T& lhs, const GenericValue& rhs) { return !(rhs == lhs); } + //@} + + //!@name Type + //@{ + + Type GetType() const { return static_cast<Type>(data_.f.flags & kTypeMask); } + bool IsNull() const { return data_.f.flags == kNullFlag; } + bool IsFalse() const { return data_.f.flags == kFalseFlag; } + bool IsTrue() const { return data_.f.flags == kTrueFlag; } + bool IsBool() const { return (data_.f.flags & kBoolFlag) != 0; } + bool IsObject() const { return data_.f.flags == kObjectFlag; } + bool IsArray() const { return data_.f.flags == kArrayFlag; } + bool IsNumber() const { return (data_.f.flags & kNumberFlag) != 0; } + bool IsInt() const { return (data_.f.flags & kIntFlag) != 0; } + bool IsUint() const { return (data_.f.flags & kUintFlag) != 0; } + bool IsInt64() const { return (data_.f.flags & kInt64Flag) != 0; } + bool IsUint64() const { return (data_.f.flags & kUint64Flag) != 0; } + bool IsDouble() const { return (data_.f.flags & kDoubleFlag) != 0; } + bool IsString() const { return (data_.f.flags & kStringFlag) != 0; } + + // Checks whether a number can be losslessly converted to a double. + bool IsLosslessDouble() const { + if (!IsNumber()) return false; + if (IsUint64()) { + uint64_t u = GetUint64(); + volatile double d = static_cast<double>(u); + return (d >= 0.0) + && (d < static_cast<double>(std::numeric_limits<uint64_t>::max())) + && (u == static_cast<uint64_t>(d)); + } + if (IsInt64()) { + int64_t i = GetInt64(); + volatile double d = static_cast<double>(i); + return (d >= static_cast<double>(std::numeric_limits<int64_t>::min())) + && (d < static_cast<double>(std::numeric_limits<int64_t>::max())) + && (i == static_cast<int64_t>(d)); + } + return true; // double, int, uint are always lossless + } + + // Checks whether a number is a float (possible lossy). + bool IsFloat() const { + if ((data_.f.flags & kDoubleFlag) == 0) + return false; + double d = GetDouble(); + return d >= -3.4028234e38 && d <= 3.4028234e38; + } + // Checks whether a number can be losslessly converted to a float. + bool IsLosslessFloat() const { + if (!IsNumber()) return false; + double a = GetDouble(); + if (a < static_cast<double>(-std::numeric_limits<float>::max()) + || a > static_cast<double>(std::numeric_limits<float>::max())) + return false; + double b = static_cast<double>(static_cast<float>(a)); + return a >= b && a <= b; // Prevent -Wfloat-equal + } + + //@} + + //!@name Null + //@{ + + GenericValue& SetNull() { this->~GenericValue(); new (this) GenericValue(); return *this; } + + //@} + + //!@name Bool + //@{ + + bool GetBool() const { RAPIDJSON_ASSERT(IsBool()); return data_.f.flags == kTrueFlag; } + //!< Set boolean value + /*! \post IsBool() == true */ + GenericValue& SetBool(bool b) { this->~GenericValue(); new (this) GenericValue(b); return *this; } + + //@} + + //!@name Object + //@{ + + //! Set this value as an empty object. + /*! \post IsObject() == true */ + GenericValue& SetObject() { this->~GenericValue(); new (this) GenericValue(kObjectType); return *this; } + + //! Get the number of members in the object. + SizeType MemberCount() const { RAPIDJSON_ASSERT(IsObject()); return data_.o.size; } + + //! Check whether the object is empty. + bool ObjectEmpty() const { RAPIDJSON_ASSERT(IsObject()); return data_.o.size == 0; } + + //! Get a value from an object associated with the name. + /*! \pre IsObject() == true + \tparam T Either \c Ch or \c const \c Ch (template used for disambiguation with \ref operator[](SizeType)) + \note In version 0.1x, if the member is not found, this function returns a null value. This makes issue 7. + Since 0.2, if the name is not correct, it will assert. + If user is unsure whether a member exists, user should use HasMember() first. + A better approach is to use FindMember(). + \note Linear time complexity. + */ + template <typename T> + RAPIDJSON_DISABLEIF_RETURN((internal::NotExpr<internal::IsSame<typename internal::RemoveConst<T>::Type, Ch> >),(GenericValue&)) operator[](T* name) { + GenericValue n(StringRef(name)); + return (*this)[n]; + } + template <typename T> + RAPIDJSON_DISABLEIF_RETURN((internal::NotExpr<internal::IsSame<typename internal::RemoveConst<T>::Type, Ch> >),(const GenericValue&)) operator[](T* name) const { return const_cast<GenericValue&>(*this)[name]; } + + //! Get a value from an object associated with the name. + /*! \pre IsObject() == true + \tparam SourceAllocator Allocator of the \c name value + + \note Compared to \ref operator[](T*), this version is faster because it does not need a StrLen(). + And it can also handle strings with embedded null characters. + + \note Linear time complexity. + */ + template <typename SourceAllocator> + GenericValue& operator[](const GenericValue<Encoding, SourceAllocator>& name) { + MemberIterator member = FindMember(name); + if (member != MemberEnd()) + return member->value; + else { + RAPIDJSON_ASSERT(false); // see above note + + // This will generate -Wexit-time-destructors in clang + // static GenericValue NullValue; + // return NullValue; + + // Use static buffer and placement-new to prevent destruction + static char buffer[sizeof(GenericValue)]; + return *new (buffer) GenericValue(); + } + } + template <typename SourceAllocator> + const GenericValue& operator[](const GenericValue<Encoding, SourceAllocator>& name) const { return const_cast<GenericValue&>(*this)[name]; } + +#if RAPIDJSON_HAS_STDSTRING + //! Get a value from an object associated with name (string object). + GenericValue& operator[](const std::basic_string<Ch>& name) { return (*this)[GenericValue(StringRef(name))]; } + const GenericValue& operator[](const std::basic_string<Ch>& name) const { return (*this)[GenericValue(StringRef(name))]; } +#endif + + //! Const member iterator + /*! \pre IsObject() == true */ + ConstMemberIterator MemberBegin() const { RAPIDJSON_ASSERT(IsObject()); return ConstMemberIterator(GetMembersPointer()); } + //! Const \em past-the-end member iterator + /*! \pre IsObject() == true */ + ConstMemberIterator MemberEnd() const { RAPIDJSON_ASSERT(IsObject()); return ConstMemberIterator(GetMembersPointer() + data_.o.size); } + //! Member iterator + /*! \pre IsObject() == true */ + MemberIterator MemberBegin() { RAPIDJSON_ASSERT(IsObject()); return MemberIterator(GetMembersPointer()); } + //! \em Past-the-end member iterator + /*! \pre IsObject() == true */ + MemberIterator MemberEnd() { RAPIDJSON_ASSERT(IsObject()); return MemberIterator(GetMembersPointer() + data_.o.size); } + + //! Check whether a member exists in the object. + /*! + \param name Member name to be searched. + \pre IsObject() == true + \return Whether a member with that name exists. + \note It is better to use FindMember() directly if you need the obtain the value as well. + \note Linear time complexity. + */ + bool HasMember(const Ch* name) const { return FindMember(name) != MemberEnd(); } + +#if RAPIDJSON_HAS_STDSTRING + //! Check whether a member exists in the object with string object. + /*! + \param name Member name to be searched. + \pre IsObject() == true + \return Whether a member with that name exists. + \note It is better to use FindMember() directly if you need the obtain the value as well. + \note Linear time complexity. + */ + bool HasMember(const std::basic_string<Ch>& name) const { return FindMember(name) != MemberEnd(); } +#endif + + //! Check whether a member exists in the object with GenericValue name. + /*! + This version is faster because it does not need a StrLen(). It can also handle string with null character. + \param name Member name to be searched. + \pre IsObject() == true + \return Whether a member with that name exists. + \note It is better to use FindMember() directly if you need the obtain the value as well. + \note Linear time complexity. + */ + template <typename SourceAllocator> + bool HasMember(const GenericValue<Encoding, SourceAllocator>& name) const { return FindMember(name) != MemberEnd(); } + + //! Find member by name. + /*! + \param name Member name to be searched. + \pre IsObject() == true + \return Iterator to member, if it exists. + Otherwise returns \ref MemberEnd(). + + \note Earlier versions of Rapidjson returned a \c NULL pointer, in case + the requested member doesn't exist. For consistency with e.g. + \c std::map, this has been changed to MemberEnd() now. + \note Linear time complexity. + */ + MemberIterator FindMember(const Ch* name) { + GenericValue n(StringRef(name)); + return FindMember(n); + } + + ConstMemberIterator FindMember(const Ch* name) const { return const_cast<GenericValue&>(*this).FindMember(name); } + + //! Find member by name. + /*! + This version is faster because it does not need a StrLen(). It can also handle string with null character. + \param name Member name to be searched. + \pre IsObject() == true + \return Iterator to member, if it exists. + Otherwise returns \ref MemberEnd(). + + \note Earlier versions of Rapidjson returned a \c NULL pointer, in case + the requested member doesn't exist. For consistency with e.g. + \c std::map, this has been changed to MemberEnd() now. + \note Linear time complexity. + */ + template <typename SourceAllocator> + MemberIterator FindMember(const GenericValue<Encoding, SourceAllocator>& name) { + RAPIDJSON_ASSERT(IsObject()); + RAPIDJSON_ASSERT(name.IsString()); + MemberIterator member = MemberBegin(); + for ( ; member != MemberEnd(); ++member) + if (name.StringEqual(member->name)) + break; + return member; + } + template <typename SourceAllocator> ConstMemberIterator FindMember(const GenericValue<Encoding, SourceAllocator>& name) const { return const_cast<GenericValue&>(*this).FindMember(name); } + +#if RAPIDJSON_HAS_STDSTRING + //! Find member by string object name. + /*! + \param name Member name to be searched. + \pre IsObject() == true + \return Iterator to member, if it exists. + Otherwise returns \ref MemberEnd(). + */ + MemberIterator FindMember(const std::basic_string<Ch>& name) { return FindMember(GenericValue(StringRef(name))); } + ConstMemberIterator FindMember(const std::basic_string<Ch>& name) const { return FindMember(GenericValue(StringRef(name))); } +#endif + + //! Add a member (name-value pair) to the object. + /*! \param name A string value as name of member. + \param value Value of any type. + \param allocator Allocator for reallocating memory. It must be the same one as used before. Commonly use GenericDocument::GetAllocator(). + \return The value itself for fluent API. + \note The ownership of \c name and \c value will be transferred to this object on success. + \pre IsObject() && name.IsString() + \post name.IsNull() && value.IsNull() + \note Amortized Constant time complexity. + */ + GenericValue& AddMember(GenericValue& name, GenericValue& value, Allocator& allocator) { + RAPIDJSON_ASSERT(IsObject()); + RAPIDJSON_ASSERT(name.IsString()); + + ObjectData& o = data_.o; + if (o.size >= o.capacity) { + if (o.capacity == 0) { + o.capacity = kDefaultObjectCapacity; + SetMembersPointer(reinterpret_cast<Member*>(allocator.Malloc(o.capacity * sizeof(Member)))); + } + else { + SizeType oldCapacity = o.capacity; + o.capacity += (oldCapacity + 1) / 2; // grow by factor 1.5 + SetMembersPointer(reinterpret_cast<Member*>(allocator.Realloc(GetMembersPointer(), oldCapacity * sizeof(Member), o.capacity * sizeof(Member)))); + } + } + Member* members = GetMembersPointer(); + members[o.size].name.RawAssign(name); + members[o.size].value.RawAssign(value); + o.size++; + return *this; + } + + //! Add a constant string value as member (name-value pair) to the object. + /*! \param name A string value as name of member. + \param value constant string reference as value of member. + \param allocator Allocator for reallocating memory. It must be the same one as used before. Commonly use GenericDocument::GetAllocator(). + \return The value itself for fluent API. + \pre IsObject() + \note This overload is needed to avoid clashes with the generic primitive type AddMember(GenericValue&,T,Allocator&) overload below. + \note Amortized Constant time complexity. + */ + GenericValue& AddMember(GenericValue& name, StringRefType value, Allocator& allocator) { + GenericValue v(value); + return AddMember(name, v, allocator); + } + +#if RAPIDJSON_HAS_STDSTRING + //! Add a string object as member (name-value pair) to the object. + /*! \param name A string value as name of member. + \param value constant string reference as value of member. + \param allocator Allocator for reallocating memory. It must be the same one as used before. Commonly use GenericDocument::GetAllocator(). + \return The value itself for fluent API. + \pre IsObject() + \note This overload is needed to avoid clashes with the generic primitive type AddMember(GenericValue&,T,Allocator&) overload below. + \note Amortized Constant time complexity. + */ + GenericValue& AddMember(GenericValue& name, std::basic_string<Ch>& value, Allocator& allocator) { + GenericValue v(value, allocator); + return AddMember(name, v, allocator); + } +#endif + + //! Add any primitive value as member (name-value pair) to the object. + /*! \tparam T Either \ref Type, \c int, \c unsigned, \c int64_t, \c uint64_t + \param name A string value as name of member. + \param value Value of primitive type \c T as value of member + \param allocator Allocator for reallocating memory. Commonly use GenericDocument::GetAllocator(). + \return The value itself for fluent API. + \pre IsObject() + + \note The source type \c T explicitly disallows all pointer types, + especially (\c const) \ref Ch*. This helps avoiding implicitly + referencing character strings with insufficient lifetime, use + \ref AddMember(StringRefType, GenericValue&, Allocator&) or \ref + AddMember(StringRefType, StringRefType, Allocator&). + All other pointer types would implicitly convert to \c bool, + use an explicit cast instead, if needed. + \note Amortized Constant time complexity. + */ + template <typename T> + RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T>, internal::IsGenericValue<T> >), (GenericValue&)) + AddMember(GenericValue& name, T value, Allocator& allocator) { + GenericValue v(value); + return AddMember(name, v, allocator); + } + +#if RAPIDJSON_HAS_CXX11_RVALUE_REFS + GenericValue& AddMember(GenericValue&& name, GenericValue&& value, Allocator& allocator) { + return AddMember(name, value, allocator); + } + GenericValue& AddMember(GenericValue&& name, GenericValue& value, Allocator& allocator) { + return AddMember(name, value, allocator); + } + GenericValue& AddMember(GenericValue& name, GenericValue&& value, Allocator& allocator) { + return AddMember(name, value, allocator); + } + GenericValue& AddMember(StringRefType name, GenericValue&& value, Allocator& allocator) { + GenericValue n(name); + return AddMember(n, value, allocator); + } +#endif // RAPIDJSON_HAS_CXX11_RVALUE_REFS + + + //! Add a member (name-value pair) to the object. + /*! \param name A constant string reference as name of member. + \param value Value of any type. + \param allocator Allocator for reallocating memory. It must be the same one as used before. Commonly use GenericDocument::GetAllocator(). + \return The value itself for fluent API. + \note The ownership of \c value will be transferred to this object on success. + \pre IsObject() + \post value.IsNull() + \note Amortized Constant time complexity. + */ + GenericValue& AddMember(StringRefType name, GenericValue& value, Allocator& allocator) { + GenericValue n(name); + return AddMember(n, value, allocator); + } + + //! Add a constant string value as member (name-value pair) to the object. + /*! \param name A constant string reference as name of member. + \param value constant string reference as value of member. + \param allocator Allocator for reallocating memory. It must be the same one as used before. Commonly use GenericDocument::GetAllocator(). + \return The value itself for fluent API. + \pre IsObject() + \note This overload is needed to avoid clashes with the generic primitive type AddMember(StringRefType,T,Allocator&) overload below. + \note Amortized Constant time complexity. + */ + GenericValue& AddMember(StringRefType name, StringRefType value, Allocator& allocator) { + GenericValue v(value); + return AddMember(name, v, allocator); + } + + //! Add any primitive value as member (name-value pair) to the object. + /*! \tparam T Either \ref Type, \c int, \c unsigned, \c int64_t, \c uint64_t + \param name A constant string reference as name of member. + \param value Value of primitive type \c T as value of member + \param allocator Allocator for reallocating memory. Commonly use GenericDocument::GetAllocator(). + \return The value itself for fluent API. + \pre IsObject() + + \note The source type \c T explicitly disallows all pointer types, + especially (\c const) \ref Ch*. This helps avoiding implicitly + referencing character strings with insufficient lifetime, use + \ref AddMember(StringRefType, GenericValue&, Allocator&) or \ref + AddMember(StringRefType, StringRefType, Allocator&). + All other pointer types would implicitly convert to \c bool, + use an explicit cast instead, if needed. + \note Amortized Constant time complexity. + */ + template <typename T> + RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T>, internal::IsGenericValue<T> >), (GenericValue&)) + AddMember(StringRefType name, T value, Allocator& allocator) { + GenericValue n(name); + return AddMember(n, value, allocator); + } + + //! Remove all members in the object. + /*! This function do not deallocate memory in the object, i.e. the capacity is unchanged. + \note Linear time complexity. + */ + void RemoveAllMembers() { + RAPIDJSON_ASSERT(IsObject()); + for (MemberIterator m = MemberBegin(); m != MemberEnd(); ++m) + m->~Member(); + data_.o.size = 0; + } + + //! Remove a member in object by its name. + /*! \param name Name of member to be removed. + \return Whether the member existed. + \note This function may reorder the object members. Use \ref + EraseMember(ConstMemberIterator) if you need to preserve the + relative order of the remaining members. + \note Linear time complexity. + */ + bool RemoveMember(const Ch* name) { + GenericValue n(StringRef(name)); + return RemoveMember(n); + } + +#if RAPIDJSON_HAS_STDSTRING + bool RemoveMember(const std::basic_string<Ch>& name) { return RemoveMember(GenericValue(StringRef(name))); } +#endif + + template <typename SourceAllocator> + bool RemoveMember(const GenericValue<Encoding, SourceAllocator>& name) { + MemberIterator m = FindMember(name); + if (m != MemberEnd()) { + RemoveMember(m); + return true; + } + else + return false; + } + + //! Remove a member in object by iterator. + /*! \param m member iterator (obtained by FindMember() or MemberBegin()). + \return the new iterator after removal. + \note This function may reorder the object members. Use \ref + EraseMember(ConstMemberIterator) if you need to preserve the + relative order of the remaining members. + \note Constant time complexity. + */ + MemberIterator RemoveMember(MemberIterator m) { + RAPIDJSON_ASSERT(IsObject()); + RAPIDJSON_ASSERT(data_.o.size > 0); + RAPIDJSON_ASSERT(GetMembersPointer() != 0); + RAPIDJSON_ASSERT(m >= MemberBegin() && m < MemberEnd()); + + MemberIterator last(GetMembersPointer() + (data_.o.size - 1)); + if (data_.o.size > 1 && m != last) + *m = *last; // Move the last one to this place + else + m->~Member(); // Only one left, just destroy + --data_.o.size; + return m; + } + + //! Remove a member from an object by iterator. + /*! \param pos iterator to the member to remove + \pre IsObject() == true && \ref MemberBegin() <= \c pos < \ref MemberEnd() + \return Iterator following the removed element. + If the iterator \c pos refers to the last element, the \ref MemberEnd() iterator is returned. + \note This function preserves the relative order of the remaining object + members. If you do not need this, use the more efficient \ref RemoveMember(MemberIterator). + \note Linear time complexity. + */ + MemberIterator EraseMember(ConstMemberIterator pos) { + return EraseMember(pos, pos +1); + } + + //! Remove members in the range [first, last) from an object. + /*! \param first iterator to the first member to remove + \param last iterator following the last member to remove + \pre IsObject() == true && \ref MemberBegin() <= \c first <= \c last <= \ref MemberEnd() + \return Iterator following the last removed element. + \note This function preserves the relative order of the remaining object + members. + \note Linear time complexity. + */ + MemberIterator EraseMember(ConstMemberIterator first, ConstMemberIterator last) { + RAPIDJSON_ASSERT(IsObject()); + RAPIDJSON_ASSERT(data_.o.size > 0); + RAPIDJSON_ASSERT(GetMembersPointer() != 0); + RAPIDJSON_ASSERT(first >= MemberBegin()); + RAPIDJSON_ASSERT(first <= last); + RAPIDJSON_ASSERT(last <= MemberEnd()); + + MemberIterator pos = MemberBegin() + (first - MemberBegin()); + for (MemberIterator itr = pos; itr != last; ++itr) + itr->~Member(); + std::memmove(&*pos, &*last, static_cast<size_t>(MemberEnd() - last) * sizeof(Member)); + data_.o.size -= static_cast<SizeType>(last - first); + return pos; + } + + //! Erase a member in object by its name. + /*! \param name Name of member to be removed. + \return Whether the member existed. + \note Linear time complexity. + */ + bool EraseMember(const Ch* name) { + GenericValue n(StringRef(name)); + return EraseMember(n); + } + +#if RAPIDJSON_HAS_STDSTRING + bool EraseMember(const std::basic_string<Ch>& name) { return EraseMember(GenericValue(StringRef(name))); } +#endif + + template <typename SourceAllocator> + bool EraseMember(const GenericValue<Encoding, SourceAllocator>& name) { + MemberIterator m = FindMember(name); + if (m != MemberEnd()) { + EraseMember(m); + return true; + } + else + return false; + } + + Object GetObject() { RAPIDJSON_ASSERT(IsObject()); return Object(*this); } + ConstObject GetObject() const { RAPIDJSON_ASSERT(IsObject()); return ConstObject(*this); } + + //@} + + //!@name Array + //@{ + + //! Set this value as an empty array. + /*! \post IsArray == true */ + GenericValue& SetArray() { this->~GenericValue(); new (this) GenericValue(kArrayType); return *this; } + + //! Get the number of elements in array. + SizeType Size() const { RAPIDJSON_ASSERT(IsArray()); return data_.a.size; } + + //! Get the capacity of array. + SizeType Capacity() const { RAPIDJSON_ASSERT(IsArray()); return data_.a.capacity; } + + //! Check whether the array is empty. + bool Empty() const { RAPIDJSON_ASSERT(IsArray()); return data_.a.size == 0; } + + //! Remove all elements in the array. + /*! This function do not deallocate memory in the array, i.e. the capacity is unchanged. + \note Linear time complexity. + */ + void Clear() { + RAPIDJSON_ASSERT(IsArray()); + GenericValue* e = GetElementsPointer(); + for (GenericValue* v = e; v != e + data_.a.size; ++v) + v->~GenericValue(); + data_.a.size = 0; + } + + //! Get an element from array by index. + /*! \pre IsArray() == true + \param index Zero-based index of element. + \see operator[](T*) + */ + GenericValue& operator[](SizeType index) { + RAPIDJSON_ASSERT(IsArray()); + RAPIDJSON_ASSERT(index < data_.a.size); + return GetElementsPointer()[index]; + } + const GenericValue& operator[](SizeType index) const { return const_cast<GenericValue&>(*this)[index]; } + + //! Element iterator + /*! \pre IsArray() == true */ + ValueIterator Begin() { RAPIDJSON_ASSERT(IsArray()); return GetElementsPointer(); } + //! \em Past-the-end element iterator + /*! \pre IsArray() == true */ + ValueIterator End() { RAPIDJSON_ASSERT(IsArray()); return GetElementsPointer() + data_.a.size; } + //! Constant element iterator + /*! \pre IsArray() == true */ + ConstValueIterator Begin() const { return const_cast<GenericValue&>(*this).Begin(); } + //! Constant \em past-the-end element iterator + /*! \pre IsArray() == true */ + ConstValueIterator End() const { return const_cast<GenericValue&>(*this).End(); } + + //! Request the array to have enough capacity to store elements. + /*! \param newCapacity The capacity that the array at least need to have. + \param allocator Allocator for reallocating memory. It must be the same one as used before. Commonly use GenericDocument::GetAllocator(). + \return The value itself for fluent API. + \note Linear time complexity. + */ + GenericValue& Reserve(SizeType newCapacity, Allocator &allocator) { + RAPIDJSON_ASSERT(IsArray()); + if (newCapacity > data_.a.capacity) { + SetElementsPointer(reinterpret_cast<GenericValue*>(allocator.Realloc(GetElementsPointer(), data_.a.capacity * sizeof(GenericValue), newCapacity * sizeof(GenericValue)))); + data_.a.capacity = newCapacity; + } + return *this; + } + + //! Append a GenericValue at the end of the array. + /*! \param value Value to be appended. + \param allocator Allocator for reallocating memory. It must be the same one as used before. Commonly use GenericDocument::GetAllocator(). + \pre IsArray() == true + \post value.IsNull() == true + \return The value itself for fluent API. + \note The ownership of \c value will be transferred to this array on success. + \note If the number of elements to be appended is known, calls Reserve() once first may be more efficient. + \note Amortized constant time complexity. + */ + GenericValue& PushBack(GenericValue& value, Allocator& allocator) { + RAPIDJSON_ASSERT(IsArray()); + if (data_.a.size >= data_.a.capacity) + Reserve(data_.a.capacity == 0 ? kDefaultArrayCapacity : (data_.a.capacity + (data_.a.capacity + 1) / 2), allocator); + GetElementsPointer()[data_.a.size++].RawAssign(value); + return *this; + } + +#if RAPIDJSON_HAS_CXX11_RVALUE_REFS + GenericValue& PushBack(GenericValue&& value, Allocator& allocator) { + return PushBack(value, allocator); + } +#endif // RAPIDJSON_HAS_CXX11_RVALUE_REFS + + //! Append a constant string reference at the end of the array. + /*! \param value Constant string reference to be appended. + \param allocator Allocator for reallocating memory. It must be the same one used previously. Commonly use GenericDocument::GetAllocator(). + \pre IsArray() == true + \return The value itself for fluent API. + \note If the number of elements to be appended is known, calls Reserve() once first may be more efficient. + \note Amortized constant time complexity. + \see GenericStringRef + */ + GenericValue& PushBack(StringRefType value, Allocator& allocator) { + return (*this).template PushBack<StringRefType>(value, allocator); + } + + //! Append a primitive value at the end of the array. + /*! \tparam T Either \ref Type, \c int, \c unsigned, \c int64_t, \c uint64_t + \param value Value of primitive type T to be appended. + \param allocator Allocator for reallocating memory. It must be the same one as used before. Commonly use GenericDocument::GetAllocator(). + \pre IsArray() == true + \return The value itself for fluent API. + \note If the number of elements to be appended is known, calls Reserve() once first may be more efficient. + + \note The source type \c T explicitly disallows all pointer types, + especially (\c const) \ref Ch*. This helps avoiding implicitly + referencing character strings with insufficient lifetime, use + \ref PushBack(GenericValue&, Allocator&) or \ref + PushBack(StringRefType, Allocator&). + All other pointer types would implicitly convert to \c bool, + use an explicit cast instead, if needed. + \note Amortized constant time complexity. + */ + template <typename T> + RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T>, internal::IsGenericValue<T> >), (GenericValue&)) + PushBack(T value, Allocator& allocator) { + GenericValue v(value); + return PushBack(v, allocator); + } + + //! Remove the last element in the array. + /*! + \note Constant time complexity. + */ + GenericValue& PopBack() { + RAPIDJSON_ASSERT(IsArray()); + RAPIDJSON_ASSERT(!Empty()); + GetElementsPointer()[--data_.a.size].~GenericValue(); + return *this; + } + + //! Remove an element of array by iterator. + /*! + \param pos iterator to the element to remove + \pre IsArray() == true && \ref Begin() <= \c pos < \ref End() + \return Iterator following the removed element. If the iterator pos refers to the last element, the End() iterator is returned. + \note Linear time complexity. + */ + ValueIterator Erase(ConstValueIterator pos) { + return Erase(pos, pos + 1); + } + + //! Remove elements in the range [first, last) of the array. + /*! + \param first iterator to the first element to remove + \param last iterator following the last element to remove + \pre IsArray() == true && \ref Begin() <= \c first <= \c last <= \ref End() + \return Iterator following the last removed element. + \note Linear time complexity. + */ + ValueIterator Erase(ConstValueIterator first, ConstValueIterator last) { + RAPIDJSON_ASSERT(IsArray()); + RAPIDJSON_ASSERT(data_.a.size > 0); + RAPIDJSON_ASSERT(GetElementsPointer() != 0); + RAPIDJSON_ASSERT(first >= Begin()); + RAPIDJSON_ASSERT(first <= last); + RAPIDJSON_ASSERT(last <= End()); + ValueIterator pos = Begin() + (first - Begin()); + for (ValueIterator itr = pos; itr != last; ++itr) + itr->~GenericValue(); + std::memmove(pos, last, static_cast<size_t>(End() - last) * sizeof(GenericValue)); + data_.a.size -= static_cast<SizeType>(last - first); + return pos; + } + + Array GetArray() { RAPIDJSON_ASSERT(IsArray()); return Array(*this); } + ConstArray GetArray() const { RAPIDJSON_ASSERT(IsArray()); return ConstArray(*this); } + + //@} + + //!@name Number + //@{ + + int GetInt() const { RAPIDJSON_ASSERT(data_.f.flags & kIntFlag); return data_.n.i.i; } + unsigned GetUint() const { RAPIDJSON_ASSERT(data_.f.flags & kUintFlag); return data_.n.u.u; } + int64_t GetInt64() const { RAPIDJSON_ASSERT(data_.f.flags & kInt64Flag); return data_.n.i64; } + uint64_t GetUint64() const { RAPIDJSON_ASSERT(data_.f.flags & kUint64Flag); return data_.n.u64; } + + //! Get the value as double type. + /*! \note If the value is 64-bit integer type, it may lose precision. Use \c IsLosslessDouble() to check whether the converison is lossless. + */ + double GetDouble() const { + RAPIDJSON_ASSERT(IsNumber()); + if ((data_.f.flags & kDoubleFlag) != 0) return data_.n.d; // exact type, no conversion. + if ((data_.f.flags & kIntFlag) != 0) return data_.n.i.i; // int -> double + if ((data_.f.flags & kUintFlag) != 0) return data_.n.u.u; // unsigned -> double + if ((data_.f.flags & kInt64Flag) != 0) return static_cast<double>(data_.n.i64); // int64_t -> double (may lose precision) + RAPIDJSON_ASSERT((data_.f.flags & kUint64Flag) != 0); return static_cast<double>(data_.n.u64); // uint64_t -> double (may lose precision) + } + + //! Get the value as float type. + /*! \note If the value is 64-bit integer type, it may lose precision. Use \c IsLosslessFloat() to check whether the converison is lossless. + */ + float GetFloat() const { + return static_cast<float>(GetDouble()); + } + + GenericValue& SetInt(int i) { this->~GenericValue(); new (this) GenericValue(i); return *this; } + GenericValue& SetUint(unsigned u) { this->~GenericValue(); new (this) GenericValue(u); return *this; } + GenericValue& SetInt64(int64_t i64) { this->~GenericValue(); new (this) GenericValue(i64); return *this; } + GenericValue& SetUint64(uint64_t u64) { this->~GenericValue(); new (this) GenericValue(u64); return *this; } + GenericValue& SetDouble(double d) { this->~GenericValue(); new (this) GenericValue(d); return *this; } + GenericValue& SetFloat(float f) { this->~GenericValue(); new (this) GenericValue(static_cast<double>(f)); return *this; } + + //@} + + //!@name String + //@{ + + const Ch* GetString() const { RAPIDJSON_ASSERT(IsString()); return (data_.f.flags & kInlineStrFlag) ? data_.ss.str : GetStringPointer(); } + + //! Get the length of string. + /*! Since rapidjson permits "\\u0000" in the json string, strlen(v.GetString()) may not equal to v.GetStringLength(). + */ + SizeType GetStringLength() const { RAPIDJSON_ASSERT(IsString()); return ((data_.f.flags & kInlineStrFlag) ? (data_.ss.GetLength()) : data_.s.length); } + + //! Set this value as a string without copying source string. + /*! This version has better performance with supplied length, and also support string containing null character. + \param s source string pointer. + \param length The length of source string, excluding the trailing null terminator. + \return The value itself for fluent API. + \post IsString() == true && GetString() == s && GetStringLength() == length + \see SetString(StringRefType) + */ + GenericValue& SetString(const Ch* s, SizeType length) { return SetString(StringRef(s, length)); } + + //! Set this value as a string without copying source string. + /*! \param s source string reference + \return The value itself for fluent API. + \post IsString() == true && GetString() == s && GetStringLength() == s.length + */ + GenericValue& SetString(StringRefType s) { this->~GenericValue(); SetStringRaw(s); return *this; } + + //! Set this value as a string by copying from source string. + /*! This version has better performance with supplied length, and also support string containing null character. + \param s source string. + \param length The length of source string, excluding the trailing null terminator. + \param allocator Allocator for allocating copied buffer. Commonly use GenericDocument::GetAllocator(). + \return The value itself for fluent API. + \post IsString() == true && GetString() != s && strcmp(GetString(),s) == 0 && GetStringLength() == length + */ + GenericValue& SetString(const Ch* s, SizeType length, Allocator& allocator) { this->~GenericValue(); SetStringRaw(StringRef(s, length), allocator); return *this; } + + //! Set this value as a string by copying from source string. + /*! \param s source string. + \param allocator Allocator for allocating copied buffer. Commonly use GenericDocument::GetAllocator(). + \return The value itself for fluent API. + \post IsString() == true && GetString() != s && strcmp(GetString(),s) == 0 && GetStringLength() == length + */ + GenericValue& SetString(const Ch* s, Allocator& allocator) { return SetString(s, internal::StrLen(s), allocator); } + +#if RAPIDJSON_HAS_STDSTRING + //! Set this value as a string by copying from source string. + /*! \param s source string. + \param allocator Allocator for allocating copied buffer. Commonly use GenericDocument::GetAllocator(). + \return The value itself for fluent API. + \post IsString() == true && GetString() != s.data() && strcmp(GetString(),s.data() == 0 && GetStringLength() == s.size() + \note Requires the definition of the preprocessor symbol \ref RAPIDJSON_HAS_STDSTRING. + */ + GenericValue& SetString(const std::basic_string<Ch>& s, Allocator& allocator) { return SetString(s.data(), SizeType(s.size()), allocator); } +#endif + + //@} + + //!@name Array + //@{ + + //! Templated version for checking whether this value is type T. + /*! + \tparam T Either \c bool, \c int, \c unsigned, \c int64_t, \c uint64_t, \c double, \c float, \c const \c char*, \c std::basic_string<Ch> + */ + template <typename T> + bool Is() const { return internal::TypeHelper<ValueType, T>::Is(*this); } + + template <typename T> + T Get() const { return internal::TypeHelper<ValueType, T>::Get(*this); } + + template <typename T> + T Get() { return internal::TypeHelper<ValueType, T>::Get(*this); } + + template<typename T> + ValueType& Set(const T& data) { return internal::TypeHelper<ValueType, T>::Set(*this, data); } + + template<typename T> + ValueType& Set(const T& data, AllocatorType& allocator) { return internal::TypeHelper<ValueType, T>::Set(*this, data, allocator); } + + //@} + + //! Generate events of this value to a Handler. + /*! This function adopts the GoF visitor pattern. + Typical usage is to output this JSON value as JSON text via Writer, which is a Handler. + It can also be used to deep clone this value via GenericDocument, which is also a Handler. + \tparam Handler type of handler. + \param handler An object implementing concept Handler. + */ + template <typename Handler> + bool Accept(Handler& handler) const { + switch(GetType()) { + case kNullType: return handler.Null(); + case kFalseType: return handler.Bool(false); + case kTrueType: return handler.Bool(true); + + case kObjectType: + if (RAPIDJSON_UNLIKELY(!handler.StartObject())) + return false; + for (ConstMemberIterator m = MemberBegin(); m != MemberEnd(); ++m) { + RAPIDJSON_ASSERT(m->name.IsString()); // User may change the type of name by MemberIterator. + if (RAPIDJSON_UNLIKELY(!handler.Key(m->name.GetString(), m->name.GetStringLength(), (m->name.data_.f.flags & kCopyFlag) != 0))) + return false; + if (RAPIDJSON_UNLIKELY(!m->value.Accept(handler))) + return false; + } + return handler.EndObject(data_.o.size); + + case kArrayType: + if (RAPIDJSON_UNLIKELY(!handler.StartArray())) + return false; + for (const GenericValue* v = Begin(); v != End(); ++v) + if (RAPIDJSON_UNLIKELY(!v->Accept(handler))) + return false; + return handler.EndArray(data_.a.size); + + case kStringType: + return handler.String(GetString(), GetStringLength(), (data_.f.flags & kCopyFlag) != 0); + + default: + RAPIDJSON_ASSERT(GetType() == kNumberType); + if (IsDouble()) return handler.Double(data_.n.d); + else if (IsInt()) return handler.Int(data_.n.i.i); + else if (IsUint()) return handler.Uint(data_.n.u.u); + else if (IsInt64()) return handler.Int64(data_.n.i64); + else return handler.Uint64(data_.n.u64); + } + } + +private: + template <typename, typename> friend class GenericValue; + template <typename, typename, typename> friend class GenericDocument; + + enum { + kBoolFlag = 0x0008, + kNumberFlag = 0x0010, + kIntFlag = 0x0020, + kUintFlag = 0x0040, + kInt64Flag = 0x0080, + kUint64Flag = 0x0100, + kDoubleFlag = 0x0200, + kStringFlag = 0x0400, + kCopyFlag = 0x0800, + kInlineStrFlag = 0x1000, + + // Initial flags of different types. + kNullFlag = kNullType, + kTrueFlag = kTrueType | kBoolFlag, + kFalseFlag = kFalseType | kBoolFlag, + kNumberIntFlag = kNumberType | kNumberFlag | kIntFlag | kInt64Flag, + kNumberUintFlag = kNumberType | kNumberFlag | kUintFlag | kUint64Flag | kInt64Flag, + kNumberInt64Flag = kNumberType | kNumberFlag | kInt64Flag, + kNumberUint64Flag = kNumberType | kNumberFlag | kUint64Flag, + kNumberDoubleFlag = kNumberType | kNumberFlag | kDoubleFlag, + kNumberAnyFlag = kNumberType | kNumberFlag | kIntFlag | kInt64Flag | kUintFlag | kUint64Flag | kDoubleFlag, + kConstStringFlag = kStringType | kStringFlag, + kCopyStringFlag = kStringType | kStringFlag | kCopyFlag, + kShortStringFlag = kStringType | kStringFlag | kCopyFlag | kInlineStrFlag, + kObjectFlag = kObjectType, + kArrayFlag = kArrayType, + + kTypeMask = 0x07 + }; + + static const SizeType kDefaultArrayCapacity = 16; + static const SizeType kDefaultObjectCapacity = 16; + + struct Flag { +#if RAPIDJSON_48BITPOINTER_OPTIMIZATION + char payload[sizeof(SizeType) * 2 + 6]; // 2 x SizeType + lower 48-bit pointer +#elif RAPIDJSON_64BIT + char payload[sizeof(SizeType) * 2 + sizeof(void*) + 6]; // 6 padding bytes +#else + char payload[sizeof(SizeType) * 2 + sizeof(void*) + 2]; // 2 padding bytes +#endif + uint16_t flags; + }; + + struct String { + SizeType length; + SizeType hashcode; //!< reserved + const Ch* str; + }; // 12 bytes in 32-bit mode, 16 bytes in 64-bit mode + + // implementation detail: ShortString can represent zero-terminated strings up to MaxSize chars + // (excluding the terminating zero) and store a value to determine the length of the contained + // string in the last character str[LenPos] by storing "MaxSize - length" there. If the string + // to store has the maximal length of MaxSize then str[LenPos] will be 0 and therefore act as + // the string terminator as well. For getting the string length back from that value just use + // "MaxSize - str[LenPos]". + // This allows to store 13-chars strings in 32-bit mode, 21-chars strings in 64-bit mode, + // 13-chars strings for RAPIDJSON_48BITPOINTER_OPTIMIZATION=1 inline (for `UTF8`-encoded strings). + struct ShortString { + enum { MaxChars = sizeof(static_cast<Flag*>(0)->payload) / sizeof(Ch), MaxSize = MaxChars - 1, LenPos = MaxSize }; + Ch str[MaxChars]; + + inline static bool Usable(SizeType len) { return (MaxSize >= len); } + inline void SetLength(SizeType len) { str[LenPos] = static_cast<Ch>(MaxSize - len); } + inline SizeType GetLength() const { return static_cast<SizeType>(MaxSize - str[LenPos]); } + }; // at most as many bytes as "String" above => 12 bytes in 32-bit mode, 16 bytes in 64-bit mode + + // By using proper binary layout, retrieval of different integer types do not need conversions. + union Number { +#if RAPIDJSON_ENDIAN == RAPIDJSON_LITTLEENDIAN + struct I { + int i; + char padding[4]; + }i; + struct U { + unsigned u; + char padding2[4]; + }u; +#else + struct I { + char padding[4]; + int i; + }i; + struct U { + char padding2[4]; + unsigned u; + }u; +#endif + int64_t i64; + uint64_t u64; + double d; + }; // 8 bytes + + struct ObjectData { + SizeType size; + SizeType capacity; + Member* members; + }; // 12 bytes in 32-bit mode, 16 bytes in 64-bit mode + + struct ArrayData { + SizeType size; + SizeType capacity; + GenericValue* elements; + }; // 12 bytes in 32-bit mode, 16 bytes in 64-bit mode + + union Data { + String s; + ShortString ss; + Number n; + ObjectData o; + ArrayData a; + Flag f; + }; // 16 bytes in 32-bit mode, 24 bytes in 64-bit mode, 16 bytes in 64-bit with RAPIDJSON_48BITPOINTER_OPTIMIZATION + + RAPIDJSON_FORCEINLINE const Ch* GetStringPointer() const { return RAPIDJSON_GETPOINTER(Ch, data_.s.str); } + RAPIDJSON_FORCEINLINE const Ch* SetStringPointer(const Ch* str) { return RAPIDJSON_SETPOINTER(Ch, data_.s.str, str); } + RAPIDJSON_FORCEINLINE GenericValue* GetElementsPointer() const { return RAPIDJSON_GETPOINTER(GenericValue, data_.a.elements); } + RAPIDJSON_FORCEINLINE GenericValue* SetElementsPointer(GenericValue* elements) { return RAPIDJSON_SETPOINTER(GenericValue, data_.a.elements, elements); } + RAPIDJSON_FORCEINLINE Member* GetMembersPointer() const { return RAPIDJSON_GETPOINTER(Member, data_.o.members); } + RAPIDJSON_FORCEINLINE Member* SetMembersPointer(Member* members) { return RAPIDJSON_SETPOINTER(Member, data_.o.members, members); } + + // Initialize this value as array with initial data, without calling destructor. + void SetArrayRaw(GenericValue* values, SizeType count, Allocator& allocator) { + data_.f.flags = kArrayFlag; + if (count) { + GenericValue* e = static_cast<GenericValue*>(allocator.Malloc(count * sizeof(GenericValue))); + SetElementsPointer(e); + std::memcpy(e, values, count * sizeof(GenericValue)); + } + else + SetElementsPointer(0); + data_.a.size = data_.a.capacity = count; + } + + //! Initialize this value as object with initial data, without calling destructor. + void SetObjectRaw(Member* members, SizeType count, Allocator& allocator) { + data_.f.flags = kObjectFlag; + if (count) { + Member* m = static_cast<Member*>(allocator.Malloc(count * sizeof(Member))); + SetMembersPointer(m); + std::memcpy(m, members, count * sizeof(Member)); + } + else + SetMembersPointer(0); + data_.o.size = data_.o.capacity = count; + } + + //! Initialize this value as constant string, without calling destructor. + void SetStringRaw(StringRefType s) RAPIDJSON_NOEXCEPT { + data_.f.flags = kConstStringFlag; + SetStringPointer(s); + data_.s.length = s.length; + } + + //! Initialize this value as copy string with initial data, without calling destructor. + void SetStringRaw(StringRefType s, Allocator& allocator) { + Ch* str = 0; + if (ShortString::Usable(s.length)) { + data_.f.flags = kShortStringFlag; + data_.ss.SetLength(s.length); + str = data_.ss.str; + } else { + data_.f.flags = kCopyStringFlag; + data_.s.length = s.length; + str = static_cast<Ch *>(allocator.Malloc((s.length + 1) * sizeof(Ch))); + SetStringPointer(str); + } + std::memcpy(str, s, s.length * sizeof(Ch)); + str[s.length] = '\0'; + } + + //! Assignment without calling destructor + void RawAssign(GenericValue& rhs) RAPIDJSON_NOEXCEPT { + data_ = rhs.data_; + // data_.f.flags = rhs.data_.f.flags; + rhs.data_.f.flags = kNullFlag; + } + + template <typename SourceAllocator> + bool StringEqual(const GenericValue<Encoding, SourceAllocator>& rhs) const { + RAPIDJSON_ASSERT(IsString()); + RAPIDJSON_ASSERT(rhs.IsString()); + + const SizeType len1 = GetStringLength(); + const SizeType len2 = rhs.GetStringLength(); + if(len1 != len2) { return false; } + + const Ch* const str1 = GetString(); + const Ch* const str2 = rhs.GetString(); + if(str1 == str2) { return true; } // fast path for constant string + + return (std::memcmp(str1, str2, sizeof(Ch) * len1) == 0); + } + + Data data_; +}; + +//! GenericValue with UTF8 encoding +typedef GenericValue<UTF8<> > Value; + +/////////////////////////////////////////////////////////////////////////////// +// GenericDocument + +//! A document for parsing JSON text as DOM. +/*! + \note implements Handler concept + \tparam Encoding Encoding for both parsing and string storage. + \tparam Allocator Allocator for allocating memory for the DOM + \tparam StackAllocator Allocator for allocating memory for stack during parsing. + \warning Although GenericDocument inherits from GenericValue, the API does \b not provide any virtual functions, especially no virtual destructor. To avoid memory leaks, do not \c delete a GenericDocument object via a pointer to a GenericValue. +*/ +template <typename Encoding, typename Allocator = MemoryPoolAllocator<>, typename StackAllocator = CrtAllocator> +class GenericDocument : public GenericValue<Encoding, Allocator> { +public: + typedef typename Encoding::Ch Ch; //!< Character type derived from Encoding. + typedef GenericValue<Encoding, Allocator> ValueType; //!< Value type of the document. + typedef Allocator AllocatorType; //!< Allocator type from template parameter. + + //! Constructor + /*! Creates an empty document of specified type. + \param type Mandatory type of object to create. + \param allocator Optional allocator for allocating memory. + \param stackCapacity Optional initial capacity of stack in bytes. + \param stackAllocator Optional allocator for allocating memory for stack. + */ + explicit GenericDocument(Type type, Allocator* allocator = 0, size_t stackCapacity = kDefaultStackCapacity, StackAllocator* stackAllocator = 0) : + GenericValue<Encoding, Allocator>(type), allocator_(allocator), ownAllocator_(0), stack_(stackAllocator, stackCapacity), parseResult_() + { + if (!allocator_) + ownAllocator_ = allocator_ = RAPIDJSON_NEW(Allocator()); + } + + //! Constructor + /*! Creates an empty document which type is Null. + \param allocator Optional allocator for allocating memory. + \param stackCapacity Optional initial capacity of stack in bytes. + \param stackAllocator Optional allocator for allocating memory for stack. + */ + GenericDocument(Allocator* allocator = 0, size_t stackCapacity = kDefaultStackCapacity, StackAllocator* stackAllocator = 0) : + allocator_(allocator), ownAllocator_(0), stack_(stackAllocator, stackCapacity), parseResult_() + { + if (!allocator_) + ownAllocator_ = allocator_ = RAPIDJSON_NEW(Allocator()); + } + +#if RAPIDJSON_HAS_CXX11_RVALUE_REFS + //! Move constructor in C++11 + GenericDocument(GenericDocument&& rhs) RAPIDJSON_NOEXCEPT + : ValueType(std::forward<ValueType>(rhs)), // explicit cast to avoid prohibited move from Document + allocator_(rhs.allocator_), + ownAllocator_(rhs.ownAllocator_), + stack_(std::move(rhs.stack_)), + parseResult_(rhs.parseResult_) + { + rhs.allocator_ = 0; + rhs.ownAllocator_ = 0; + rhs.parseResult_ = ParseResult(); + } +#endif + + ~GenericDocument() { + Destroy(); + } + +#if RAPIDJSON_HAS_CXX11_RVALUE_REFS + //! Move assignment in C++11 + GenericDocument& operator=(GenericDocument&& rhs) RAPIDJSON_NOEXCEPT + { + // The cast to ValueType is necessary here, because otherwise it would + // attempt to call GenericValue's templated assignment operator. + ValueType::operator=(std::forward<ValueType>(rhs)); + + // Calling the destructor here would prematurely call stack_'s destructor + Destroy(); + + allocator_ = rhs.allocator_; + ownAllocator_ = rhs.ownAllocator_; + stack_ = std::move(rhs.stack_); + parseResult_ = rhs.parseResult_; + + rhs.allocator_ = 0; + rhs.ownAllocator_ = 0; + rhs.parseResult_ = ParseResult(); + + return *this; + } +#endif + + //! Exchange the contents of this document with those of another. + /*! + \param rhs Another document. + \note Constant complexity. + \see GenericValue::Swap + */ + GenericDocument& Swap(GenericDocument& rhs) RAPIDJSON_NOEXCEPT { + ValueType::Swap(rhs); + stack_.Swap(rhs.stack_); + internal::Swap(allocator_, rhs.allocator_); + internal::Swap(ownAllocator_, rhs.ownAllocator_); + internal::Swap(parseResult_, rhs.parseResult_); + return *this; + } + + //! free-standing swap function helper + /*! + Helper function to enable support for common swap implementation pattern based on \c std::swap: + \code + void swap(MyClass& a, MyClass& b) { + using std::swap; + swap(a.doc, b.doc); + // ... + } + \endcode + \see Swap() + */ + friend inline void swap(GenericDocument& a, GenericDocument& b) RAPIDJSON_NOEXCEPT { a.Swap(b); } + + //! Populate this document by a generator which produces SAX events. + /*! \tparam Generator A functor with <tt>bool f(Handler)</tt> prototype. + \param g Generator functor which sends SAX events to the parameter. + \return The document itself for fluent API. + */ + template <typename Generator> + GenericDocument& Populate(Generator& g) { + ClearStackOnExit scope(*this); + if (g(*this)) { + RAPIDJSON_ASSERT(stack_.GetSize() == sizeof(ValueType)); // Got one and only one root object + ValueType::operator=(*stack_.template Pop<ValueType>(1));// Move value from stack to document + } + return *this; + } + + //!@name Parse from stream + //!@{ + + //! Parse JSON text from an input stream (with Encoding conversion) + /*! \tparam parseFlags Combination of \ref ParseFlag. + \tparam SourceEncoding Encoding of input stream + \tparam InputStream Type of input stream, implementing Stream concept + \param is Input stream to be parsed. + \return The document itself for fluent API. + */ + template <unsigned parseFlags, typename SourceEncoding, typename InputStream> + GenericDocument& ParseStream(InputStream& is) { + GenericReader<SourceEncoding, Encoding, StackAllocator> reader( + stack_.HasAllocator() ? &stack_.GetAllocator() : 0); + ClearStackOnExit scope(*this); + parseResult_ = reader.template Parse<parseFlags>(is, *this); + if (parseResult_) { + RAPIDJSON_ASSERT(stack_.GetSize() == sizeof(ValueType)); // Got one and only one root object + ValueType::operator=(*stack_.template Pop<ValueType>(1));// Move value from stack to document + } + return *this; + } + + //! Parse JSON text from an input stream + /*! \tparam parseFlags Combination of \ref ParseFlag. + \tparam InputStream Type of input stream, implementing Stream concept + \param is Input stream to be parsed. + \return The document itself for fluent API. + */ + template <unsigned parseFlags, typename InputStream> + GenericDocument& ParseStream(InputStream& is) { + return ParseStream<parseFlags, Encoding, InputStream>(is); + } + + //! Parse JSON text from an input stream (with \ref kParseDefaultFlags) + /*! \tparam InputStream Type of input stream, implementing Stream concept + \param is Input stream to be parsed. + \return The document itself for fluent API. + */ + template <typename InputStream> + GenericDocument& ParseStream(InputStream& is) { + return ParseStream<kParseDefaultFlags, Encoding, InputStream>(is); + } + //!@} + + //!@name Parse in-place from mutable string + //!@{ + + //! Parse JSON text from a mutable string + /*! \tparam parseFlags Combination of \ref ParseFlag. + \param str Mutable zero-terminated string to be parsed. + \return The document itself for fluent API. + */ + template <unsigned parseFlags> + GenericDocument& ParseInsitu(Ch* str) { + GenericInsituStringStream<Encoding> s(str); + return ParseStream<parseFlags | kParseInsituFlag>(s); + } + + //! Parse JSON text from a mutable string (with \ref kParseDefaultFlags) + /*! \param str Mutable zero-terminated string to be parsed. + \return The document itself for fluent API. + */ + GenericDocument& ParseInsitu(Ch* str) { + return ParseInsitu<kParseDefaultFlags>(str); + } + //!@} + + //!@name Parse from read-only string + //!@{ + + //! Parse JSON text from a read-only string (with Encoding conversion) + /*! \tparam parseFlags Combination of \ref ParseFlag (must not contain \ref kParseInsituFlag). + \tparam SourceEncoding Transcoding from input Encoding + \param str Read-only zero-terminated string to be parsed. + */ + template <unsigned parseFlags, typename SourceEncoding> + GenericDocument& Parse(const typename SourceEncoding::Ch* str) { + RAPIDJSON_ASSERT(!(parseFlags & kParseInsituFlag)); + GenericStringStream<SourceEncoding> s(str); + return ParseStream<parseFlags, SourceEncoding>(s); + } + + //! Parse JSON text from a read-only string + /*! \tparam parseFlags Combination of \ref ParseFlag (must not contain \ref kParseInsituFlag). + \param str Read-only zero-terminated string to be parsed. + */ + template <unsigned parseFlags> + GenericDocument& Parse(const Ch* str) { + return Parse<parseFlags, Encoding>(str); + } + + //! Parse JSON text from a read-only string (with \ref kParseDefaultFlags) + /*! \param str Read-only zero-terminated string to be parsed. + */ + GenericDocument& Parse(const Ch* str) { + return Parse<kParseDefaultFlags>(str); + } + + template <unsigned parseFlags, typename SourceEncoding> + GenericDocument& Parse(const typename SourceEncoding::Ch* str, size_t length) { + RAPIDJSON_ASSERT(!(parseFlags & kParseInsituFlag)); + MemoryStream ms(static_cast<const char*>(str), length * sizeof(typename SourceEncoding::Ch)); + EncodedInputStream<SourceEncoding, MemoryStream> is(ms); + ParseStream<parseFlags, SourceEncoding>(is); + return *this; + } + + template <unsigned parseFlags> + GenericDocument& Parse(const Ch* str, size_t length) { + return Parse<parseFlags, Encoding>(str, length); + } + + GenericDocument& Parse(const Ch* str, size_t length) { + return Parse<kParseDefaultFlags>(str, length); + } + +#if RAPIDJSON_HAS_STDSTRING + template <unsigned parseFlags, typename SourceEncoding> + GenericDocument& Parse(const std::basic_string<typename SourceEncoding::Ch>& str) { + // c_str() is constant complexity according to standard. Should be faster than Parse(const char*, size_t) + return Parse<parseFlags, SourceEncoding>(str.c_str()); + } + + template <unsigned parseFlags> + GenericDocument& Parse(const std::basic_string<Ch>& str) { + return Parse<parseFlags, Encoding>(str.c_str()); + } + + GenericDocument& Parse(const std::basic_string<Ch>& str) { + return Parse<kParseDefaultFlags>(str); + } +#endif // RAPIDJSON_HAS_STDSTRING + + //!@} + + //!@name Handling parse errors + //!@{ + + //! Whether a parse error has occured in the last parsing. + bool HasParseError() const { return parseResult_.IsError(); } + + //! Get the \ref ParseErrorCode of last parsing. + ParseErrorCode GetParseError() const { return parseResult_.Code(); } + + //! Get the position of last parsing error in input, 0 otherwise. + size_t GetErrorOffset() const { return parseResult_.Offset(); } + + //! Implicit conversion to get the last parse result +#ifndef __clang // -Wdocumentation + /*! \return \ref ParseResult of the last parse operation + + \code + Document doc; + ParseResult ok = doc.Parse(json); + if (!ok) + printf( "JSON parse error: %s (%u)\n", GetParseError_En(ok.Code()), ok.Offset()); + \endcode + */ +#endif + operator ParseResult() const { return parseResult_; } + //!@} + + //! Get the allocator of this document. + Allocator& GetAllocator() { + RAPIDJSON_ASSERT(allocator_); + return *allocator_; + } + + //! Get the capacity of stack in bytes. + size_t GetStackCapacity() const { return stack_.GetCapacity(); } + +private: + // clear stack on any exit from ParseStream, e.g. due to exception + struct ClearStackOnExit { + explicit ClearStackOnExit(GenericDocument& d) : d_(d) {} + ~ClearStackOnExit() { d_.ClearStack(); } + private: + ClearStackOnExit(const ClearStackOnExit&); + ClearStackOnExit& operator=(const ClearStackOnExit&); + GenericDocument& d_; + }; + + // callers of the following private Handler functions + // template <typename,typename,typename> friend class GenericReader; // for parsing + template <typename, typename> friend class GenericValue; // for deep copying + +public: + // Implementation of Handler + bool Null() { new (stack_.template Push<ValueType>()) ValueType(); return true; } + bool Bool(bool b) { new (stack_.template Push<ValueType>()) ValueType(b); return true; } + bool Int(int i) { new (stack_.template Push<ValueType>()) ValueType(i); return true; } + bool Uint(unsigned i) { new (stack_.template Push<ValueType>()) ValueType(i); return true; } + bool Int64(int64_t i) { new (stack_.template Push<ValueType>()) ValueType(i); return true; } + bool Uint64(uint64_t i) { new (stack_.template Push<ValueType>()) ValueType(i); return true; } + bool Double(double d) { new (stack_.template Push<ValueType>()) ValueType(d); return true; } + + bool RawNumber(const Ch* str, SizeType length, bool copy) { + if (copy) + new (stack_.template Push<ValueType>()) ValueType(str, length, GetAllocator()); + else + new (stack_.template Push<ValueType>()) ValueType(str, length); + return true; + } + + bool String(const Ch* str, SizeType length, bool copy) { + if (copy) + new (stack_.template Push<ValueType>()) ValueType(str, length, GetAllocator()); + else + new (stack_.template Push<ValueType>()) ValueType(str, length); + return true; + } + + bool StartObject() { new (stack_.template Push<ValueType>()) ValueType(kObjectType); return true; } + + bool Key(const Ch* str, SizeType length, bool copy) { return String(str, length, copy); } + + bool EndObject(SizeType memberCount) { + typename ValueType::Member* members = stack_.template Pop<typename ValueType::Member>(memberCount); + stack_.template Top<ValueType>()->SetObjectRaw(members, memberCount, GetAllocator()); + return true; + } + + bool StartArray() { new (stack_.template Push<ValueType>()) ValueType(kArrayType); return true; } + + bool EndArray(SizeType elementCount) { + ValueType* elements = stack_.template Pop<ValueType>(elementCount); + stack_.template Top<ValueType>()->SetArrayRaw(elements, elementCount, GetAllocator()); + return true; + } + +private: + //! Prohibit copying + GenericDocument(const GenericDocument&); + //! Prohibit assignment + GenericDocument& operator=(const GenericDocument&); + + void ClearStack() { + if (Allocator::kNeedFree) + while (stack_.GetSize() > 0) // Here assumes all elements in stack array are GenericValue (Member is actually 2 GenericValue objects) + (stack_.template Pop<ValueType>(1))->~ValueType(); + else + stack_.Clear(); + stack_.ShrinkToFit(); + } + + void Destroy() { + RAPIDJSON_DELETE(ownAllocator_); + } + + static const size_t kDefaultStackCapacity = 1024; + Allocator* allocator_; + Allocator* ownAllocator_; + internal::Stack<StackAllocator> stack_; + ParseResult parseResult_; +}; + +//! GenericDocument with UTF8 encoding +typedef GenericDocument<UTF8<> > Document; + +//! Helper class for accessing Value of array type. +/*! + Instance of this helper class is obtained by \c GenericValue::GetArray(). + In addition to all APIs for array type, it provides range-based for loop if \c RAPIDJSON_HAS_CXX11_RANGE_FOR=1. +*/ +template <bool Const, typename ValueT> +class GenericArray { +public: + typedef GenericArray<true, ValueT> ConstArray; + typedef GenericArray<false, ValueT> Array; + typedef ValueT PlainType; + typedef typename internal::MaybeAddConst<Const,PlainType>::Type ValueType; + typedef ValueType* ValueIterator; // This may be const or non-const iterator + typedef const ValueT* ConstValueIterator; + typedef typename ValueType::AllocatorType AllocatorType; + typedef typename ValueType::StringRefType StringRefType; + + template <typename, typename> + friend class GenericValue; + + GenericArray(const GenericArray& rhs) : value_(rhs.value_) {} + GenericArray& operator=(const GenericArray& rhs) { value_ = rhs.value_; return *this; } + ~GenericArray() {} + + SizeType Size() const { return value_.Size(); } + SizeType Capacity() const { return value_.Capacity(); } + bool Empty() const { return value_.Empty(); } + void Clear() const { value_.Clear(); } + ValueType& operator[](SizeType index) const { return value_[index]; } + ValueIterator Begin() const { return value_.Begin(); } + ValueIterator End() const { return value_.End(); } + GenericArray Reserve(SizeType newCapacity, AllocatorType &allocator) const { value_.Reserve(newCapacity, allocator); return *this; } + GenericArray PushBack(ValueType& value, AllocatorType& allocator) const { value_.PushBack(value, allocator); return *this; } +#if RAPIDJSON_HAS_CXX11_RVALUE_REFS + GenericArray PushBack(ValueType&& value, AllocatorType& allocator) const { value_.PushBack(value, allocator); return *this; } +#endif // RAPIDJSON_HAS_CXX11_RVALUE_REFS + GenericArray PushBack(StringRefType value, AllocatorType& allocator) const { value_.PushBack(value, allocator); return *this; } + template <typename T> RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T>, internal::IsGenericValue<T> >), (const GenericArray&)) PushBack(T value, AllocatorType& allocator) const { value_.PushBack(value, allocator); return *this; } + GenericArray PopBack() const { value_.PopBack(); return *this; } + ValueIterator Erase(ConstValueIterator pos) const { return value_.Erase(pos); } + ValueIterator Erase(ConstValueIterator first, ConstValueIterator last) const { return value_.Erase(first, last); } + +#if RAPIDJSON_HAS_CXX11_RANGE_FOR + ValueIterator begin() const { return value_.Begin(); } + ValueIterator end() const { return value_.End(); } +#endif + +private: + GenericArray(); + GenericArray(ValueType& value) : value_(value) {} + ValueType& value_; +}; + +//! Helper class for accessing Value of object type. +/*! + Instance of this helper class is obtained by \c GenericValue::GetObject(). + In addition to all APIs for array type, it provides range-based for loop if \c RAPIDJSON_HAS_CXX11_RANGE_FOR=1. +*/ +template <bool Const, typename ValueT> +class GenericObject { +public: + typedef GenericObject<true, ValueT> ConstObject; + typedef GenericObject<false, ValueT> Object; + typedef ValueT PlainType; + typedef typename internal::MaybeAddConst<Const,PlainType>::Type ValueType; + typedef GenericMemberIterator<Const, typename ValueT::EncodingType, typename ValueT::AllocatorType> MemberIterator; // This may be const or non-const iterator + typedef GenericMemberIterator<true, typename ValueT::EncodingType, typename ValueT::AllocatorType> ConstMemberIterator; + typedef typename ValueType::AllocatorType AllocatorType; + typedef typename ValueType::StringRefType StringRefType; + typedef typename ValueType::EncodingType EncodingType; + typedef typename ValueType::Ch Ch; + + template <typename, typename> + friend class GenericValue; + + GenericObject(const GenericObject& rhs) : value_(rhs.value_) {} + GenericObject& operator=(const GenericObject& rhs) { value_ = rhs.value_; return *this; } + ~GenericObject() {} + + SizeType MemberCount() const { return value_.MemberCount(); } + bool ObjectEmpty() const { return value_.ObjectEmpty(); } + template <typename T> ValueType& operator[](T* name) const { return value_[name]; } + template <typename SourceAllocator> ValueType& operator[](const GenericValue<EncodingType, SourceAllocator>& name) const { return value_[name]; } +#if RAPIDJSON_HAS_STDSTRING + ValueType& operator[](const std::basic_string<Ch>& name) const { return value_[name]; } +#endif + MemberIterator MemberBegin() const { return value_.MemberBegin(); } + MemberIterator MemberEnd() const { return value_.MemberEnd(); } + bool HasMember(const Ch* name) const { return value_.HasMember(name); } +#if RAPIDJSON_HAS_STDSTRING + bool HasMember(const std::basic_string<Ch>& name) const { return value_.HasMember(name); } +#endif + template <typename SourceAllocator> bool HasMember(const GenericValue<EncodingType, SourceAllocator>& name) const { return value_.HasMember(name); } + MemberIterator FindMember(const Ch* name) const { return value_.FindMember(name); } + template <typename SourceAllocator> MemberIterator FindMember(const GenericValue<EncodingType, SourceAllocator>& name) const { return value_.FindMember(name); } +#if RAPIDJSON_HAS_STDSTRING + MemberIterator FindMember(const std::basic_string<Ch>& name) const { return value_.FindMember(name); } +#endif + GenericObject AddMember(ValueType& name, ValueType& value, AllocatorType& allocator) const { value_.AddMember(name, value, allocator); return *this; } + GenericObject AddMember(ValueType& name, StringRefType value, AllocatorType& allocator) const { value_.AddMember(name, value, allocator); return *this; } +#if RAPIDJSON_HAS_STDSTRING + GenericObject AddMember(ValueType& name, std::basic_string<Ch>& value, AllocatorType& allocator) const { value_.AddMember(name, value, allocator); return *this; } +#endif + template <typename T> RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T>, internal::IsGenericValue<T> >), (ValueType&)) AddMember(ValueType& name, T value, AllocatorType& allocator) const { value_.AddMember(name, value, allocator); return *this; } +#if RAPIDJSON_HAS_CXX11_RVALUE_REFS + GenericObject AddMember(ValueType&& name, ValueType&& value, AllocatorType& allocator) const { value_.AddMember(name, value, allocator); return *this; } + GenericObject AddMember(ValueType&& name, ValueType& value, AllocatorType& allocator) const { value_.AddMember(name, value, allocator); return *this; } + GenericObject AddMember(ValueType& name, ValueType&& value, AllocatorType& allocator) const { value_.AddMember(name, value, allocator); return *this; } + GenericObject AddMember(StringRefType name, ValueType&& value, AllocatorType& allocator) const { value_.AddMember(name, value, allocator); return *this; } +#endif // RAPIDJSON_HAS_CXX11_RVALUE_REFS + GenericObject AddMember(StringRefType name, ValueType& value, AllocatorType& allocator) const { value_.AddMember(name, value, allocator); return *this; } + GenericObject AddMember(StringRefType name, StringRefType value, AllocatorType& allocator) const { value_.AddMember(name, value, allocator); return *this; } + template <typename T> RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T>, internal::IsGenericValue<T> >), (GenericObject)) AddMember(StringRefType name, T value, AllocatorType& allocator) const { value_.AddMember(name, value, allocator); return *this; } + void RemoveAllMembers() { return value_.RemoveAllMembers(); } + bool RemoveMember(const Ch* name) const { return value_.RemoveMember(name); } +#if RAPIDJSON_HAS_STDSTRING + bool RemoveMember(const std::basic_string<Ch>& name) const { return value_.RemoveMember(name); } +#endif + template <typename SourceAllocator> bool RemoveMember(const GenericValue<EncodingType, SourceAllocator>& name) const { return value_.RemoveMember(name); } + MemberIterator RemoveMember(MemberIterator m) const { return value_.RemoveMember(m); } + MemberIterator EraseMember(ConstMemberIterator pos) const { return value_.EraseMember(pos); } + MemberIterator EraseMember(ConstMemberIterator first, ConstMemberIterator last) const { return value_.EraseMember(first, last); } + bool EraseMember(const Ch* name) const { return value_.EraseMember(name); } +#if RAPIDJSON_HAS_STDSTRING + bool EraseMember(const std::basic_string<Ch>& name) const { return EraseMember(ValueType(StringRef(name))); } +#endif + template <typename SourceAllocator> bool EraseMember(const GenericValue<EncodingType, SourceAllocator>& name) const { return value_.EraseMember(name); } + +#if RAPIDJSON_HAS_CXX11_RANGE_FOR + MemberIterator begin() const { return value_.MemberBegin(); } + MemberIterator end() const { return value_.MemberEnd(); } +#endif + +private: + GenericObject(); + GenericObject(ValueType& value) : value_(value) {} + ValueType& value_; +}; + +RAPIDJSON_NAMESPACE_END +#ifdef _MINWINDEF_ // see: http://stackoverflow.com/questions/22744262/cant-call-stdmax-because-minwindef-h-defines-max +#ifndef NOMINMAX +#pragma pop_macro("min") +#pragma pop_macro("max") +#endif +#endif +RAPIDJSON_DIAG_POP + +#endif // RAPIDJSON_DOCUMENT_H_ diff --git a/xmrstak/rapidjson/encodedstream.h b/xmrstak/rapidjson/encodedstream.h new file mode 100644 index 0000000..1450683 --- /dev/null +++ b/xmrstak/rapidjson/encodedstream.h @@ -0,0 +1,299 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_ENCODEDSTREAM_H_ +#define RAPIDJSON_ENCODEDSTREAM_H_ + +#include "stream.h" +#include "memorystream.h" + +#ifdef __GNUC__ +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(effc++) +#endif + +#ifdef __clang__ +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(padded) +#endif + +RAPIDJSON_NAMESPACE_BEGIN + +//! Input byte stream wrapper with a statically bound encoding. +/*! + \tparam Encoding The interpretation of encoding of the stream. Either UTF8, UTF16LE, UTF16BE, UTF32LE, UTF32BE. + \tparam InputByteStream Type of input byte stream. For example, FileReadStream. +*/ +template <typename Encoding, typename InputByteStream> +class EncodedInputStream { + RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); +public: + typedef typename Encoding::Ch Ch; + + EncodedInputStream(InputByteStream& is) : is_(is) { + current_ = Encoding::TakeBOM(is_); + } + + Ch Peek() const { return current_; } + Ch Take() { Ch c = current_; current_ = Encoding::Take(is_); return c; } + size_t Tell() const { return is_.Tell(); } + + // Not implemented + void Put(Ch) { RAPIDJSON_ASSERT(false); } + void Flush() { RAPIDJSON_ASSERT(false); } + Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; } + size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; } + +private: + EncodedInputStream(const EncodedInputStream&); + EncodedInputStream& operator=(const EncodedInputStream&); + + InputByteStream& is_; + Ch current_; +}; + +//! Specialized for UTF8 MemoryStream. +template <> +class EncodedInputStream<UTF8<>, MemoryStream> { +public: + typedef UTF8<>::Ch Ch; + + EncodedInputStream(MemoryStream& is) : is_(is) { + if (static_cast<unsigned char>(is_.Peek()) == 0xEFu) is_.Take(); + if (static_cast<unsigned char>(is_.Peek()) == 0xBBu) is_.Take(); + if (static_cast<unsigned char>(is_.Peek()) == 0xBFu) is_.Take(); + } + Ch Peek() const { return is_.Peek(); } + Ch Take() { return is_.Take(); } + size_t Tell() const { return is_.Tell(); } + + // Not implemented + void Put(Ch) {} + void Flush() {} + Ch* PutBegin() { return 0; } + size_t PutEnd(Ch*) { return 0; } + + MemoryStream& is_; + +private: + EncodedInputStream(const EncodedInputStream&); + EncodedInputStream& operator=(const EncodedInputStream&); +}; + +//! Output byte stream wrapper with statically bound encoding. +/*! + \tparam Encoding The interpretation of encoding of the stream. Either UTF8, UTF16LE, UTF16BE, UTF32LE, UTF32BE. + \tparam OutputByteStream Type of input byte stream. For example, FileWriteStream. +*/ +template <typename Encoding, typename OutputByteStream> +class EncodedOutputStream { + RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); +public: + typedef typename Encoding::Ch Ch; + + EncodedOutputStream(OutputByteStream& os, bool putBOM = true) : os_(os) { + if (putBOM) + Encoding::PutBOM(os_); + } + + void Put(Ch c) { Encoding::Put(os_, c); } + void Flush() { os_.Flush(); } + + // Not implemented + Ch Peek() const { RAPIDJSON_ASSERT(false); return 0;} + Ch Take() { RAPIDJSON_ASSERT(false); return 0;} + size_t Tell() const { RAPIDJSON_ASSERT(false); return 0; } + Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; } + size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; } + +private: + EncodedOutputStream(const EncodedOutputStream&); + EncodedOutputStream& operator=(const EncodedOutputStream&); + + OutputByteStream& os_; +}; + +#define RAPIDJSON_ENCODINGS_FUNC(x) UTF8<Ch>::x, UTF16LE<Ch>::x, UTF16BE<Ch>::x, UTF32LE<Ch>::x, UTF32BE<Ch>::x + +//! Input stream wrapper with dynamically bound encoding and automatic encoding detection. +/*! + \tparam CharType Type of character for reading. + \tparam InputByteStream type of input byte stream to be wrapped. +*/ +template <typename CharType, typename InputByteStream> +class AutoUTFInputStream { + RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); +public: + typedef CharType Ch; + + //! Constructor. + /*! + \param is input stream to be wrapped. + \param type UTF encoding type if it is not detected from the stream. + */ + AutoUTFInputStream(InputByteStream& is, UTFType type = kUTF8) : is_(&is), type_(type), hasBOM_(false) { + RAPIDJSON_ASSERT(type >= kUTF8 && type <= kUTF32BE); + DetectType(); + static const TakeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Take) }; + takeFunc_ = f[type_]; + current_ = takeFunc_(*is_); + } + + UTFType GetType() const { return type_; } + bool HasBOM() const { return hasBOM_; } + + Ch Peek() const { return current_; } + Ch Take() { Ch c = current_; current_ = takeFunc_(*is_); return c; } + size_t Tell() const { return is_->Tell(); } + + // Not implemented + void Put(Ch) { RAPIDJSON_ASSERT(false); } + void Flush() { RAPIDJSON_ASSERT(false); } + Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; } + size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; } + +private: + AutoUTFInputStream(const AutoUTFInputStream&); + AutoUTFInputStream& operator=(const AutoUTFInputStream&); + + // Detect encoding type with BOM or RFC 4627 + void DetectType() { + // BOM (Byte Order Mark): + // 00 00 FE FF UTF-32BE + // FF FE 00 00 UTF-32LE + // FE FF UTF-16BE + // FF FE UTF-16LE + // EF BB BF UTF-8 + + const unsigned char* c = reinterpret_cast<const unsigned char *>(is_->Peek4()); + if (!c) + return; + + unsigned bom = static_cast<unsigned>(c[0] | (c[1] << 8) | (c[2] << 16) | (c[3] << 24)); + hasBOM_ = false; + if (bom == 0xFFFE0000) { type_ = kUTF32BE; hasBOM_ = true; is_->Take(); is_->Take(); is_->Take(); is_->Take(); } + else if (bom == 0x0000FEFF) { type_ = kUTF32LE; hasBOM_ = true; is_->Take(); is_->Take(); is_->Take(); is_->Take(); } + else if ((bom & 0xFFFF) == 0xFFFE) { type_ = kUTF16BE; hasBOM_ = true; is_->Take(); is_->Take(); } + else if ((bom & 0xFFFF) == 0xFEFF) { type_ = kUTF16LE; hasBOM_ = true; is_->Take(); is_->Take(); } + else if ((bom & 0xFFFFFF) == 0xBFBBEF) { type_ = kUTF8; hasBOM_ = true; is_->Take(); is_->Take(); is_->Take(); } + + // RFC 4627: Section 3 + // "Since the first two characters of a JSON text will always be ASCII + // characters [RFC0020], it is possible to determine whether an octet + // stream is UTF-8, UTF-16 (BE or LE), or UTF-32 (BE or LE) by looking + // at the pattern of nulls in the first four octets." + // 00 00 00 xx UTF-32BE + // 00 xx 00 xx UTF-16BE + // xx 00 00 00 UTF-32LE + // xx 00 xx 00 UTF-16LE + // xx xx xx xx UTF-8 + + if (!hasBOM_) { + unsigned pattern = (c[0] ? 1 : 0) | (c[1] ? 2 : 0) | (c[2] ? 4 : 0) | (c[3] ? 8 : 0); + switch (pattern) { + case 0x08: type_ = kUTF32BE; break; + case 0x0A: type_ = kUTF16BE; break; + case 0x01: type_ = kUTF32LE; break; + case 0x05: type_ = kUTF16LE; break; + case 0x0F: type_ = kUTF8; break; + default: break; // Use type defined by user. + } + } + + // Runtime check whether the size of character type is sufficient. It only perform checks with assertion. + if (type_ == kUTF16LE || type_ == kUTF16BE) RAPIDJSON_ASSERT(sizeof(Ch) >= 2); + if (type_ == kUTF32LE || type_ == kUTF32BE) RAPIDJSON_ASSERT(sizeof(Ch) >= 4); + } + + typedef Ch (*TakeFunc)(InputByteStream& is); + InputByteStream* is_; + UTFType type_; + Ch current_; + TakeFunc takeFunc_; + bool hasBOM_; +}; + +//! Output stream wrapper with dynamically bound encoding and automatic encoding detection. +/*! + \tparam CharType Type of character for writing. + \tparam OutputByteStream type of output byte stream to be wrapped. +*/ +template <typename CharType, typename OutputByteStream> +class AutoUTFOutputStream { + RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); +public: + typedef CharType Ch; + + //! Constructor. + /*! + \param os output stream to be wrapped. + \param type UTF encoding type. + \param putBOM Whether to write BOM at the beginning of the stream. + */ + AutoUTFOutputStream(OutputByteStream& os, UTFType type, bool putBOM) : os_(&os), type_(type) { + RAPIDJSON_ASSERT(type >= kUTF8 && type <= kUTF32BE); + + // Runtime check whether the size of character type is sufficient. It only perform checks with assertion. + if (type_ == kUTF16LE || type_ == kUTF16BE) RAPIDJSON_ASSERT(sizeof(Ch) >= 2); + if (type_ == kUTF32LE || type_ == kUTF32BE) RAPIDJSON_ASSERT(sizeof(Ch) >= 4); + + static const PutFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Put) }; + putFunc_ = f[type_]; + + if (putBOM) + PutBOM(); + } + + UTFType GetType() const { return type_; } + + void Put(Ch c) { putFunc_(*os_, c); } + void Flush() { os_->Flush(); } + + // Not implemented + Ch Peek() const { RAPIDJSON_ASSERT(false); return 0;} + Ch Take() { RAPIDJSON_ASSERT(false); return 0;} + size_t Tell() const { RAPIDJSON_ASSERT(false); return 0; } + Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; } + size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; } + +private: + AutoUTFOutputStream(const AutoUTFOutputStream&); + AutoUTFOutputStream& operator=(const AutoUTFOutputStream&); + + void PutBOM() { + typedef void (*PutBOMFunc)(OutputByteStream&); + static const PutBOMFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(PutBOM) }; + f[type_](*os_); + } + + typedef void (*PutFunc)(OutputByteStream&, Ch); + + OutputByteStream* os_; + UTFType type_; + PutFunc putFunc_; +}; + +#undef RAPIDJSON_ENCODINGS_FUNC + +RAPIDJSON_NAMESPACE_END + +#ifdef __clang__ +RAPIDJSON_DIAG_POP +#endif + +#ifdef __GNUC__ +RAPIDJSON_DIAG_POP +#endif + +#endif // RAPIDJSON_FILESTREAM_H_ diff --git a/xmrstak/rapidjson/encodings.h b/xmrstak/rapidjson/encodings.h new file mode 100644 index 0000000..baa7c2b --- /dev/null +++ b/xmrstak/rapidjson/encodings.h @@ -0,0 +1,716 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_ENCODINGS_H_ +#define RAPIDJSON_ENCODINGS_H_ + +#include "rapidjson.h" + +#ifdef _MSC_VER +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(4244) // conversion from 'type1' to 'type2', possible loss of data +RAPIDJSON_DIAG_OFF(4702) // unreachable code +#elif defined(__GNUC__) +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(effc++) +RAPIDJSON_DIAG_OFF(overflow) +#endif + +RAPIDJSON_NAMESPACE_BEGIN + +/////////////////////////////////////////////////////////////////////////////// +// Encoding + +/*! \class rapidjson::Encoding + \brief Concept for encoding of Unicode characters. + +\code +concept Encoding { + typename Ch; //! Type of character. A "character" is actually a code unit in unicode's definition. + + enum { supportUnicode = 1 }; // or 0 if not supporting unicode + + //! \brief Encode a Unicode codepoint to an output stream. + //! \param os Output stream. + //! \param codepoint An unicode codepoint, ranging from 0x0 to 0x10FFFF inclusively. + template<typename OutputStream> + static void Encode(OutputStream& os, unsigned codepoint); + + //! \brief Decode a Unicode codepoint from an input stream. + //! \param is Input stream. + //! \param codepoint Output of the unicode codepoint. + //! \return true if a valid codepoint can be decoded from the stream. + template <typename InputStream> + static bool Decode(InputStream& is, unsigned* codepoint); + + //! \brief Validate one Unicode codepoint from an encoded stream. + //! \param is Input stream to obtain codepoint. + //! \param os Output for copying one codepoint. + //! \return true if it is valid. + //! \note This function just validating and copying the codepoint without actually decode it. + template <typename InputStream, typename OutputStream> + static bool Validate(InputStream& is, OutputStream& os); + + // The following functions are deal with byte streams. + + //! Take a character from input byte stream, skip BOM if exist. + template <typename InputByteStream> + static CharType TakeBOM(InputByteStream& is); + + //! Take a character from input byte stream. + template <typename InputByteStream> + static Ch Take(InputByteStream& is); + + //! Put BOM to output byte stream. + template <typename OutputByteStream> + static void PutBOM(OutputByteStream& os); + + //! Put a character to output byte stream. + template <typename OutputByteStream> + static void Put(OutputByteStream& os, Ch c); +}; +\endcode +*/ + +/////////////////////////////////////////////////////////////////////////////// +// UTF8 + +//! UTF-8 encoding. +/*! http://en.wikipedia.org/wiki/UTF-8 + http://tools.ietf.org/html/rfc3629 + \tparam CharType Code unit for storing 8-bit UTF-8 data. Default is char. + \note implements Encoding concept +*/ +template<typename CharType = char> +struct UTF8 { + typedef CharType Ch; + + enum { supportUnicode = 1 }; + + template<typename OutputStream> + static void Encode(OutputStream& os, unsigned codepoint) { + if (codepoint <= 0x7F) + os.Put(static_cast<Ch>(codepoint & 0xFF)); + else if (codepoint <= 0x7FF) { + os.Put(static_cast<Ch>(0xC0 | ((codepoint >> 6) & 0xFF))); + os.Put(static_cast<Ch>(0x80 | ((codepoint & 0x3F)))); + } + else if (codepoint <= 0xFFFF) { + os.Put(static_cast<Ch>(0xE0 | ((codepoint >> 12) & 0xFF))); + os.Put(static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F))); + os.Put(static_cast<Ch>(0x80 | (codepoint & 0x3F))); + } + else { + RAPIDJSON_ASSERT(codepoint <= 0x10FFFF); + os.Put(static_cast<Ch>(0xF0 | ((codepoint >> 18) & 0xFF))); + os.Put(static_cast<Ch>(0x80 | ((codepoint >> 12) & 0x3F))); + os.Put(static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F))); + os.Put(static_cast<Ch>(0x80 | (codepoint & 0x3F))); + } + } + + template<typename OutputStream> + static void EncodeUnsafe(OutputStream& os, unsigned codepoint) { + if (codepoint <= 0x7F) + PutUnsafe(os, static_cast<Ch>(codepoint & 0xFF)); + else if (codepoint <= 0x7FF) { + PutUnsafe(os, static_cast<Ch>(0xC0 | ((codepoint >> 6) & 0xFF))); + PutUnsafe(os, static_cast<Ch>(0x80 | ((codepoint & 0x3F)))); + } + else if (codepoint <= 0xFFFF) { + PutUnsafe(os, static_cast<Ch>(0xE0 | ((codepoint >> 12) & 0xFF))); + PutUnsafe(os, static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F))); + PutUnsafe(os, static_cast<Ch>(0x80 | (codepoint & 0x3F))); + } + else { + RAPIDJSON_ASSERT(codepoint <= 0x10FFFF); + PutUnsafe(os, static_cast<Ch>(0xF0 | ((codepoint >> 18) & 0xFF))); + PutUnsafe(os, static_cast<Ch>(0x80 | ((codepoint >> 12) & 0x3F))); + PutUnsafe(os, static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F))); + PutUnsafe(os, static_cast<Ch>(0x80 | (codepoint & 0x3F))); + } + } + + template <typename InputStream> + static bool Decode(InputStream& is, unsigned* codepoint) { +#define COPY() c = is.Take(); *codepoint = (*codepoint << 6) | (static_cast<unsigned char>(c) & 0x3Fu) +#define TRANS(mask) result &= ((GetRange(static_cast<unsigned char>(c)) & mask) != 0) +#define TAIL() COPY(); TRANS(0x70) + typename InputStream::Ch c = is.Take(); + if (!(c & 0x80)) { + *codepoint = static_cast<unsigned char>(c); + return true; + } + + unsigned char type = GetRange(static_cast<unsigned char>(c)); + if (type >= 32) { + *codepoint = 0; + } else { + *codepoint = (0xFF >> type) & static_cast<unsigned char>(c); + } + bool result = true; + switch (type) { + case 2: TAIL(); return result; + case 3: TAIL(); TAIL(); return result; + case 4: COPY(); TRANS(0x50); TAIL(); return result; + case 5: COPY(); TRANS(0x10); TAIL(); TAIL(); return result; + case 6: TAIL(); TAIL(); TAIL(); return result; + case 10: COPY(); TRANS(0x20); TAIL(); return result; + case 11: COPY(); TRANS(0x60); TAIL(); TAIL(); return result; + default: return false; + } +#undef COPY +#undef TRANS +#undef TAIL + } + + template <typename InputStream, typename OutputStream> + static bool Validate(InputStream& is, OutputStream& os) { +#define COPY() os.Put(c = is.Take()) +#define TRANS(mask) result &= ((GetRange(static_cast<unsigned char>(c)) & mask) != 0) +#define TAIL() COPY(); TRANS(0x70) + Ch c; + COPY(); + if (!(c & 0x80)) + return true; + + bool result = true; + switch (GetRange(static_cast<unsigned char>(c))) { + case 2: TAIL(); return result; + case 3: TAIL(); TAIL(); return result; + case 4: COPY(); TRANS(0x50); TAIL(); return result; + case 5: COPY(); TRANS(0x10); TAIL(); TAIL(); return result; + case 6: TAIL(); TAIL(); TAIL(); return result; + case 10: COPY(); TRANS(0x20); TAIL(); return result; + case 11: COPY(); TRANS(0x60); TAIL(); TAIL(); return result; + default: return false; + } +#undef COPY +#undef TRANS +#undef TAIL + } + + static unsigned char GetRange(unsigned char c) { + // Referring to DFA of http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ + // With new mapping 1 -> 0x10, 7 -> 0x20, 9 -> 0x40, such that AND operation can test multiple types. + static const unsigned char type[] = { + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10, + 0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40, + 0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20, + 0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20, + 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8, + }; + return type[c]; + } + + template <typename InputByteStream> + static CharType TakeBOM(InputByteStream& is) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); + typename InputByteStream::Ch c = Take(is); + if (static_cast<unsigned char>(c) != 0xEFu) return c; + c = is.Take(); + if (static_cast<unsigned char>(c) != 0xBBu) return c; + c = is.Take(); + if (static_cast<unsigned char>(c) != 0xBFu) return c; + c = is.Take(); + return c; + } + + template <typename InputByteStream> + static Ch Take(InputByteStream& is) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); + return static_cast<Ch>(is.Take()); + } + + template <typename OutputByteStream> + static void PutBOM(OutputByteStream& os) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); + os.Put(static_cast<typename OutputByteStream::Ch>(0xEFu)); + os.Put(static_cast<typename OutputByteStream::Ch>(0xBBu)); + os.Put(static_cast<typename OutputByteStream::Ch>(0xBFu)); + } + + template <typename OutputByteStream> + static void Put(OutputByteStream& os, Ch c) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); + os.Put(static_cast<typename OutputByteStream::Ch>(c)); + } +}; + +/////////////////////////////////////////////////////////////////////////////// +// UTF16 + +//! UTF-16 encoding. +/*! http://en.wikipedia.org/wiki/UTF-16 + http://tools.ietf.org/html/rfc2781 + \tparam CharType Type for storing 16-bit UTF-16 data. Default is wchar_t. C++11 may use char16_t instead. + \note implements Encoding concept + + \note For in-memory access, no need to concern endianness. The code units and code points are represented by CPU's endianness. + For streaming, use UTF16LE and UTF16BE, which handle endianness. +*/ +template<typename CharType = wchar_t> +struct UTF16 { + typedef CharType Ch; + RAPIDJSON_STATIC_ASSERT(sizeof(Ch) >= 2); + + enum { supportUnicode = 1 }; + + template<typename OutputStream> + static void Encode(OutputStream& os, unsigned codepoint) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 2); + if (codepoint <= 0xFFFF) { + RAPIDJSON_ASSERT(codepoint < 0xD800 || codepoint > 0xDFFF); // Code point itself cannot be surrogate pair + os.Put(static_cast<typename OutputStream::Ch>(codepoint)); + } + else { + RAPIDJSON_ASSERT(codepoint <= 0x10FFFF); + unsigned v = codepoint - 0x10000; + os.Put(static_cast<typename OutputStream::Ch>((v >> 10) | 0xD800)); + os.Put((v & 0x3FF) | 0xDC00); + } + } + + + template<typename OutputStream> + static void EncodeUnsafe(OutputStream& os, unsigned codepoint) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 2); + if (codepoint <= 0xFFFF) { + RAPIDJSON_ASSERT(codepoint < 0xD800 || codepoint > 0xDFFF); // Code point itself cannot be surrogate pair + PutUnsafe(os, static_cast<typename OutputStream::Ch>(codepoint)); + } + else { + RAPIDJSON_ASSERT(codepoint <= 0x10FFFF); + unsigned v = codepoint - 0x10000; + PutUnsafe(os, static_cast<typename OutputStream::Ch>((v >> 10) | 0xD800)); + PutUnsafe(os, (v & 0x3FF) | 0xDC00); + } + } + + template <typename InputStream> + static bool Decode(InputStream& is, unsigned* codepoint) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 2); + typename InputStream::Ch c = is.Take(); + if (c < 0xD800 || c > 0xDFFF) { + *codepoint = static_cast<unsigned>(c); + return true; + } + else if (c <= 0xDBFF) { + *codepoint = (static_cast<unsigned>(c) & 0x3FF) << 10; + c = is.Take(); + *codepoint |= (static_cast<unsigned>(c) & 0x3FF); + *codepoint += 0x10000; + return c >= 0xDC00 && c <= 0xDFFF; + } + return false; + } + + template <typename InputStream, typename OutputStream> + static bool Validate(InputStream& is, OutputStream& os) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 2); + RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 2); + typename InputStream::Ch c; + os.Put(static_cast<typename OutputStream::Ch>(c = is.Take())); + if (c < 0xD800 || c > 0xDFFF) + return true; + else if (c <= 0xDBFF) { + os.Put(c = is.Take()); + return c >= 0xDC00 && c <= 0xDFFF; + } + return false; + } +}; + +//! UTF-16 little endian encoding. +template<typename CharType = wchar_t> +struct UTF16LE : UTF16<CharType> { + template <typename InputByteStream> + static CharType TakeBOM(InputByteStream& is) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); + CharType c = Take(is); + return static_cast<uint16_t>(c) == 0xFEFFu ? Take(is) : c; + } + + template <typename InputByteStream> + static CharType Take(InputByteStream& is) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); + unsigned c = static_cast<uint8_t>(is.Take()); + c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 8; + return static_cast<CharType>(c); + } + + template <typename OutputByteStream> + static void PutBOM(OutputByteStream& os) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); + os.Put(static_cast<typename OutputByteStream::Ch>(0xFFu)); + os.Put(static_cast<typename OutputByteStream::Ch>(0xFEu)); + } + + template <typename OutputByteStream> + static void Put(OutputByteStream& os, CharType c) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); + os.Put(static_cast<typename OutputByteStream::Ch>(static_cast<unsigned>(c) & 0xFFu)); + os.Put(static_cast<typename OutputByteStream::Ch>((static_cast<unsigned>(c) >> 8) & 0xFFu)); + } +}; + +//! UTF-16 big endian encoding. +template<typename CharType = wchar_t> +struct UTF16BE : UTF16<CharType> { + template <typename InputByteStream> + static CharType TakeBOM(InputByteStream& is) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); + CharType c = Take(is); + return static_cast<uint16_t>(c) == 0xFEFFu ? Take(is) : c; + } + + template <typename InputByteStream> + static CharType Take(InputByteStream& is) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); + unsigned c = static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 8; + c |= static_cast<uint8_t>(is.Take()); + return static_cast<CharType>(c); + } + + template <typename OutputByteStream> + static void PutBOM(OutputByteStream& os) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); + os.Put(static_cast<typename OutputByteStream::Ch>(0xFEu)); + os.Put(static_cast<typename OutputByteStream::Ch>(0xFFu)); + } + + template <typename OutputByteStream> + static void Put(OutputByteStream& os, CharType c) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); + os.Put(static_cast<typename OutputByteStream::Ch>((static_cast<unsigned>(c) >> 8) & 0xFFu)); + os.Put(static_cast<typename OutputByteStream::Ch>(static_cast<unsigned>(c) & 0xFFu)); + } +}; + +/////////////////////////////////////////////////////////////////////////////// +// UTF32 + +//! UTF-32 encoding. +/*! http://en.wikipedia.org/wiki/UTF-32 + \tparam CharType Type for storing 32-bit UTF-32 data. Default is unsigned. C++11 may use char32_t instead. + \note implements Encoding concept + + \note For in-memory access, no need to concern endianness. The code units and code points are represented by CPU's endianness. + For streaming, use UTF32LE and UTF32BE, which handle endianness. +*/ +template<typename CharType = unsigned> +struct UTF32 { + typedef CharType Ch; + RAPIDJSON_STATIC_ASSERT(sizeof(Ch) >= 4); + + enum { supportUnicode = 1 }; + + template<typename OutputStream> + static void Encode(OutputStream& os, unsigned codepoint) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 4); + RAPIDJSON_ASSERT(codepoint <= 0x10FFFF); + os.Put(codepoint); + } + + template<typename OutputStream> + static void EncodeUnsafe(OutputStream& os, unsigned codepoint) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 4); + RAPIDJSON_ASSERT(codepoint <= 0x10FFFF); + PutUnsafe(os, codepoint); + } + + template <typename InputStream> + static bool Decode(InputStream& is, unsigned* codepoint) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 4); + Ch c = is.Take(); + *codepoint = c; + return c <= 0x10FFFF; + } + + template <typename InputStream, typename OutputStream> + static bool Validate(InputStream& is, OutputStream& os) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 4); + Ch c; + os.Put(c = is.Take()); + return c <= 0x10FFFF; + } +}; + +//! UTF-32 little endian enocoding. +template<typename CharType = unsigned> +struct UTF32LE : UTF32<CharType> { + template <typename InputByteStream> + static CharType TakeBOM(InputByteStream& is) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); + CharType c = Take(is); + return static_cast<uint32_t>(c) == 0x0000FEFFu ? Take(is) : c; + } + + template <typename InputByteStream> + static CharType Take(InputByteStream& is) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); + unsigned c = static_cast<uint8_t>(is.Take()); + c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 8; + c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 16; + c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 24; + return static_cast<CharType>(c); + } + + template <typename OutputByteStream> + static void PutBOM(OutputByteStream& os) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); + os.Put(static_cast<typename OutputByteStream::Ch>(0xFFu)); + os.Put(static_cast<typename OutputByteStream::Ch>(0xFEu)); + os.Put(static_cast<typename OutputByteStream::Ch>(0x00u)); + os.Put(static_cast<typename OutputByteStream::Ch>(0x00u)); + } + + template <typename OutputByteStream> + static void Put(OutputByteStream& os, CharType c) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); + os.Put(static_cast<typename OutputByteStream::Ch>(c & 0xFFu)); + os.Put(static_cast<typename OutputByteStream::Ch>((c >> 8) & 0xFFu)); + os.Put(static_cast<typename OutputByteStream::Ch>((c >> 16) & 0xFFu)); + os.Put(static_cast<typename OutputByteStream::Ch>((c >> 24) & 0xFFu)); + } +}; + +//! UTF-32 big endian encoding. +template<typename CharType = unsigned> +struct UTF32BE : UTF32<CharType> { + template <typename InputByteStream> + static CharType TakeBOM(InputByteStream& is) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); + CharType c = Take(is); + return static_cast<uint32_t>(c) == 0x0000FEFFu ? Take(is) : c; + } + + template <typename InputByteStream> + static CharType Take(InputByteStream& is) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); + unsigned c = static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 24; + c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 16; + c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 8; + c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())); + return static_cast<CharType>(c); + } + + template <typename OutputByteStream> + static void PutBOM(OutputByteStream& os) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); + os.Put(static_cast<typename OutputByteStream::Ch>(0x00u)); + os.Put(static_cast<typename OutputByteStream::Ch>(0x00u)); + os.Put(static_cast<typename OutputByteStream::Ch>(0xFEu)); + os.Put(static_cast<typename OutputByteStream::Ch>(0xFFu)); + } + + template <typename OutputByteStream> + static void Put(OutputByteStream& os, CharType c) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); + os.Put(static_cast<typename OutputByteStream::Ch>((c >> 24) & 0xFFu)); + os.Put(static_cast<typename OutputByteStream::Ch>((c >> 16) & 0xFFu)); + os.Put(static_cast<typename OutputByteStream::Ch>((c >> 8) & 0xFFu)); + os.Put(static_cast<typename OutputByteStream::Ch>(c & 0xFFu)); + } +}; + +/////////////////////////////////////////////////////////////////////////////// +// ASCII + +//! ASCII encoding. +/*! http://en.wikipedia.org/wiki/ASCII + \tparam CharType Code unit for storing 7-bit ASCII data. Default is char. + \note implements Encoding concept +*/ +template<typename CharType = char> +struct ASCII { + typedef CharType Ch; + + enum { supportUnicode = 0 }; + + template<typename OutputStream> + static void Encode(OutputStream& os, unsigned codepoint) { + RAPIDJSON_ASSERT(codepoint <= 0x7F); + os.Put(static_cast<Ch>(codepoint & 0xFF)); + } + + template<typename OutputStream> + static void EncodeUnsafe(OutputStream& os, unsigned codepoint) { + RAPIDJSON_ASSERT(codepoint <= 0x7F); + PutUnsafe(os, static_cast<Ch>(codepoint & 0xFF)); + } + + template <typename InputStream> + static bool Decode(InputStream& is, unsigned* codepoint) { + uint8_t c = static_cast<uint8_t>(is.Take()); + *codepoint = c; + return c <= 0X7F; + } + + template <typename InputStream, typename OutputStream> + static bool Validate(InputStream& is, OutputStream& os) { + uint8_t c = static_cast<uint8_t>(is.Take()); + os.Put(static_cast<typename OutputStream::Ch>(c)); + return c <= 0x7F; + } + + template <typename InputByteStream> + static CharType TakeBOM(InputByteStream& is) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); + uint8_t c = static_cast<uint8_t>(Take(is)); + return static_cast<Ch>(c); + } + + template <typename InputByteStream> + static Ch Take(InputByteStream& is) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); + return static_cast<Ch>(is.Take()); + } + + template <typename OutputByteStream> + static void PutBOM(OutputByteStream& os) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); + (void)os; + } + + template <typename OutputByteStream> + static void Put(OutputByteStream& os, Ch c) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); + os.Put(static_cast<typename OutputByteStream::Ch>(c)); + } +}; + +/////////////////////////////////////////////////////////////////////////////// +// AutoUTF + +//! Runtime-specified UTF encoding type of a stream. +enum UTFType { + kUTF8 = 0, //!< UTF-8. + kUTF16LE = 1, //!< UTF-16 little endian. + kUTF16BE = 2, //!< UTF-16 big endian. + kUTF32LE = 3, //!< UTF-32 little endian. + kUTF32BE = 4 //!< UTF-32 big endian. +}; + +//! Dynamically select encoding according to stream's runtime-specified UTF encoding type. +/*! \note This class can be used with AutoUTFInputtStream and AutoUTFOutputStream, which provides GetType(). +*/ +template<typename CharType> +struct AutoUTF { + typedef CharType Ch; + + enum { supportUnicode = 1 }; + +#define RAPIDJSON_ENCODINGS_FUNC(x) UTF8<Ch>::x, UTF16LE<Ch>::x, UTF16BE<Ch>::x, UTF32LE<Ch>::x, UTF32BE<Ch>::x + + template<typename OutputStream> + RAPIDJSON_FORCEINLINE static void Encode(OutputStream& os, unsigned codepoint) { + typedef void (*EncodeFunc)(OutputStream&, unsigned); + static const EncodeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Encode) }; + (*f[os.GetType()])(os, codepoint); + } + + template<typename OutputStream> + RAPIDJSON_FORCEINLINE static void EncodeUnsafe(OutputStream& os, unsigned codepoint) { + typedef void (*EncodeFunc)(OutputStream&, unsigned); + static const EncodeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(EncodeUnsafe) }; + (*f[os.GetType()])(os, codepoint); + } + + template <typename InputStream> + RAPIDJSON_FORCEINLINE static bool Decode(InputStream& is, unsigned* codepoint) { + typedef bool (*DecodeFunc)(InputStream&, unsigned*); + static const DecodeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Decode) }; + return (*f[is.GetType()])(is, codepoint); + } + + template <typename InputStream, typename OutputStream> + RAPIDJSON_FORCEINLINE static bool Validate(InputStream& is, OutputStream& os) { + typedef bool (*ValidateFunc)(InputStream&, OutputStream&); + static const ValidateFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Validate) }; + return (*f[is.GetType()])(is, os); + } + +#undef RAPIDJSON_ENCODINGS_FUNC +}; + +/////////////////////////////////////////////////////////////////////////////// +// Transcoder + +//! Encoding conversion. +template<typename SourceEncoding, typename TargetEncoding> +struct Transcoder { + //! Take one Unicode codepoint from source encoding, convert it to target encoding and put it to the output stream. + template<typename InputStream, typename OutputStream> + RAPIDJSON_FORCEINLINE static bool Transcode(InputStream& is, OutputStream& os) { + unsigned codepoint; + if (!SourceEncoding::Decode(is, &codepoint)) + return false; + TargetEncoding::Encode(os, codepoint); + return true; + } + + template<typename InputStream, typename OutputStream> + RAPIDJSON_FORCEINLINE static bool TranscodeUnsafe(InputStream& is, OutputStream& os) { + unsigned codepoint; + if (!SourceEncoding::Decode(is, &codepoint)) + return false; + TargetEncoding::EncodeUnsafe(os, codepoint); + return true; + } + + //! Validate one Unicode codepoint from an encoded stream. + template<typename InputStream, typename OutputStream> + RAPIDJSON_FORCEINLINE static bool Validate(InputStream& is, OutputStream& os) { + return Transcode(is, os); // Since source/target encoding is different, must transcode. + } +}; + +// Forward declaration. +template<typename Stream> +inline void PutUnsafe(Stream& stream, typename Stream::Ch c); + +//! Specialization of Transcoder with same source and target encoding. +template<typename Encoding> +struct Transcoder<Encoding, Encoding> { + template<typename InputStream, typename OutputStream> + RAPIDJSON_FORCEINLINE static bool Transcode(InputStream& is, OutputStream& os) { + os.Put(is.Take()); // Just copy one code unit. This semantic is different from primary template class. + return true; + } + + template<typename InputStream, typename OutputStream> + RAPIDJSON_FORCEINLINE static bool TranscodeUnsafe(InputStream& is, OutputStream& os) { + PutUnsafe(os, is.Take()); // Just copy one code unit. This semantic is different from primary template class. + return true; + } + + template<typename InputStream, typename OutputStream> + RAPIDJSON_FORCEINLINE static bool Validate(InputStream& is, OutputStream& os) { + return Encoding::Validate(is, os); // source/target encoding are the same + } +}; + +RAPIDJSON_NAMESPACE_END + +#if defined(__GNUC__) || defined(_MSC_VER) +RAPIDJSON_DIAG_POP +#endif + +#endif // RAPIDJSON_ENCODINGS_H_ diff --git a/xmrstak/rapidjson/error/en.h b/xmrstak/rapidjson/error/en.h new file mode 100644 index 0000000..2db838b --- /dev/null +++ b/xmrstak/rapidjson/error/en.h @@ -0,0 +1,74 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_ERROR_EN_H_ +#define RAPIDJSON_ERROR_EN_H_ + +#include "error.h" + +#ifdef __clang__ +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(switch-enum) +RAPIDJSON_DIAG_OFF(covered-switch-default) +#endif + +RAPIDJSON_NAMESPACE_BEGIN + +//! Maps error code of parsing into error message. +/*! + \ingroup RAPIDJSON_ERRORS + \param parseErrorCode Error code obtained in parsing. + \return the error message. + \note User can make a copy of this function for localization. + Using switch-case is safer for future modification of error codes. +*/ +inline const RAPIDJSON_ERROR_CHARTYPE* GetParseError_En(ParseErrorCode parseErrorCode) { + switch (parseErrorCode) { + case kParseErrorNone: return RAPIDJSON_ERROR_STRING("No error."); + + case kParseErrorDocumentEmpty: return RAPIDJSON_ERROR_STRING("The document is empty."); + case kParseErrorDocumentRootNotSingular: return RAPIDJSON_ERROR_STRING("The document root must not be followed by other values."); + + case kParseErrorValueInvalid: return RAPIDJSON_ERROR_STRING("Invalid value."); + + case kParseErrorObjectMissName: return RAPIDJSON_ERROR_STRING("Missing a name for object member."); + case kParseErrorObjectMissColon: return RAPIDJSON_ERROR_STRING("Missing a colon after a name of object member."); + case kParseErrorObjectMissCommaOrCurlyBracket: return RAPIDJSON_ERROR_STRING("Missing a comma or '}' after an object member."); + + case kParseErrorArrayMissCommaOrSquareBracket: return RAPIDJSON_ERROR_STRING("Missing a comma or ']' after an array element."); + + case kParseErrorStringUnicodeEscapeInvalidHex: return RAPIDJSON_ERROR_STRING("Incorrect hex digit after \\u escape in string."); + case kParseErrorStringUnicodeSurrogateInvalid: return RAPIDJSON_ERROR_STRING("The surrogate pair in string is invalid."); + case kParseErrorStringEscapeInvalid: return RAPIDJSON_ERROR_STRING("Invalid escape character in string."); + case kParseErrorStringMissQuotationMark: return RAPIDJSON_ERROR_STRING("Missing a closing quotation mark in string."); + case kParseErrorStringInvalidEncoding: return RAPIDJSON_ERROR_STRING("Invalid encoding in string."); + + case kParseErrorNumberTooBig: return RAPIDJSON_ERROR_STRING("Number too big to be stored in double."); + case kParseErrorNumberMissFraction: return RAPIDJSON_ERROR_STRING("Miss fraction part in number."); + case kParseErrorNumberMissExponent: return RAPIDJSON_ERROR_STRING("Miss exponent in number."); + + case kParseErrorTermination: return RAPIDJSON_ERROR_STRING("Terminate parsing due to Handler error."); + case kParseErrorUnspecificSyntaxError: return RAPIDJSON_ERROR_STRING("Unspecific syntax error."); + + default: return RAPIDJSON_ERROR_STRING("Unknown error."); + } +} + +RAPIDJSON_NAMESPACE_END + +#ifdef __clang__ +RAPIDJSON_DIAG_POP +#endif + +#endif // RAPIDJSON_ERROR_EN_H_ diff --git a/xmrstak/rapidjson/error/error.h b/xmrstak/rapidjson/error/error.h new file mode 100644 index 0000000..95cb31a --- /dev/null +++ b/xmrstak/rapidjson/error/error.h @@ -0,0 +1,155 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_ERROR_ERROR_H_ +#define RAPIDJSON_ERROR_ERROR_H_ + +#include "../rapidjson.h" + +#ifdef __clang__ +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(padded) +#endif + +/*! \file error.h */ + +/*! \defgroup RAPIDJSON_ERRORS RapidJSON error handling */ + +/////////////////////////////////////////////////////////////////////////////// +// RAPIDJSON_ERROR_CHARTYPE + +//! Character type of error messages. +/*! \ingroup RAPIDJSON_ERRORS + The default character type is \c char. + On Windows, user can define this macro as \c TCHAR for supporting both + unicode/non-unicode settings. +*/ +#ifndef RAPIDJSON_ERROR_CHARTYPE +#define RAPIDJSON_ERROR_CHARTYPE char +#endif + +/////////////////////////////////////////////////////////////////////////////// +// RAPIDJSON_ERROR_STRING + +//! Macro for converting string literial to \ref RAPIDJSON_ERROR_CHARTYPE[]. +/*! \ingroup RAPIDJSON_ERRORS + By default this conversion macro does nothing. + On Windows, user can define this macro as \c _T(x) for supporting both + unicode/non-unicode settings. +*/ +#ifndef RAPIDJSON_ERROR_STRING +#define RAPIDJSON_ERROR_STRING(x) x +#endif + +RAPIDJSON_NAMESPACE_BEGIN + +/////////////////////////////////////////////////////////////////////////////// +// ParseErrorCode + +//! Error code of parsing. +/*! \ingroup RAPIDJSON_ERRORS + \see GenericReader::Parse, GenericReader::GetParseErrorCode +*/ +enum ParseErrorCode { + kParseErrorNone = 0, //!< No error. + + kParseErrorDocumentEmpty, //!< The document is empty. + kParseErrorDocumentRootNotSingular, //!< The document root must not follow by other values. + + kParseErrorValueInvalid, //!< Invalid value. + + kParseErrorObjectMissName, //!< Missing a name for object member. + kParseErrorObjectMissColon, //!< Missing a colon after a name of object member. + kParseErrorObjectMissCommaOrCurlyBracket, //!< Missing a comma or '}' after an object member. + + kParseErrorArrayMissCommaOrSquareBracket, //!< Missing a comma or ']' after an array element. + + kParseErrorStringUnicodeEscapeInvalidHex, //!< Incorrect hex digit after \\u escape in string. + kParseErrorStringUnicodeSurrogateInvalid, //!< The surrogate pair in string is invalid. + kParseErrorStringEscapeInvalid, //!< Invalid escape character in string. + kParseErrorStringMissQuotationMark, //!< Missing a closing quotation mark in string. + kParseErrorStringInvalidEncoding, //!< Invalid encoding in string. + + kParseErrorNumberTooBig, //!< Number too big to be stored in double. + kParseErrorNumberMissFraction, //!< Miss fraction part in number. + kParseErrorNumberMissExponent, //!< Miss exponent in number. + + kParseErrorTermination, //!< Parsing was terminated. + kParseErrorUnspecificSyntaxError //!< Unspecific syntax error. +}; + +//! Result of parsing (wraps ParseErrorCode) +/*! + \ingroup RAPIDJSON_ERRORS + \code + Document doc; + ParseResult ok = doc.Parse("[42]"); + if (!ok) { + fprintf(stderr, "JSON parse error: %s (%u)", + GetParseError_En(ok.Code()), ok.Offset()); + exit(EXIT_FAILURE); + } + \endcode + \see GenericReader::Parse, GenericDocument::Parse +*/ +struct ParseResult { +public: + //! Default constructor, no error. + ParseResult() : code_(kParseErrorNone), offset_(0) {} + //! Constructor to set an error. + ParseResult(ParseErrorCode code, size_t offset) : code_(code), offset_(offset) {} + + //! Get the error code. + ParseErrorCode Code() const { return code_; } + //! Get the error offset, if \ref IsError(), 0 otherwise. + size_t Offset() const { return offset_; } + + //! Conversion to \c bool, returns \c true, iff !\ref IsError(). + operator bool() const { return !IsError(); } + //! Whether the result is an error. + bool IsError() const { return code_ != kParseErrorNone; } + + bool operator==(const ParseResult& that) const { return code_ == that.code_; } + bool operator==(ParseErrorCode code) const { return code_ == code; } + friend bool operator==(ParseErrorCode code, const ParseResult & err) { return code == err.code_; } + + //! Reset error code. + void Clear() { Set(kParseErrorNone); } + //! Update error code and offset. + void Set(ParseErrorCode code, size_t offset = 0) { code_ = code; offset_ = offset; } + +private: + ParseErrorCode code_; + size_t offset_; +}; + +//! Function pointer type of GetParseError(). +/*! \ingroup RAPIDJSON_ERRORS + + This is the prototype for \c GetParseError_X(), where \c X is a locale. + User can dynamically change locale in runtime, e.g.: +\code + GetParseErrorFunc GetParseError = GetParseError_En; // or whatever + const RAPIDJSON_ERROR_CHARTYPE* s = GetParseError(document.GetParseErrorCode()); +\endcode +*/ +typedef const RAPIDJSON_ERROR_CHARTYPE* (*GetParseErrorFunc)(ParseErrorCode); + +RAPIDJSON_NAMESPACE_END + +#ifdef __clang__ +RAPIDJSON_DIAG_POP +#endif + +#endif // RAPIDJSON_ERROR_ERROR_H_ diff --git a/xmrstak/rapidjson/filereadstream.h b/xmrstak/rapidjson/filereadstream.h new file mode 100644 index 0000000..b56ea13 --- /dev/null +++ b/xmrstak/rapidjson/filereadstream.h @@ -0,0 +1,99 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_FILEREADSTREAM_H_ +#define RAPIDJSON_FILEREADSTREAM_H_ + +#include "stream.h" +#include <cstdio> + +#ifdef __clang__ +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(padded) +RAPIDJSON_DIAG_OFF(unreachable-code) +RAPIDJSON_DIAG_OFF(missing-noreturn) +#endif + +RAPIDJSON_NAMESPACE_BEGIN + +//! File byte stream for input using fread(). +/*! + \note implements Stream concept +*/ +class FileReadStream { +public: + typedef char Ch; //!< Character type (byte). + + //! Constructor. + /*! + \param fp File pointer opened for read. + \param buffer user-supplied buffer. + \param bufferSize size of buffer in bytes. Must >=4 bytes. + */ + FileReadStream(std::FILE* fp, char* buffer, size_t bufferSize) : fp_(fp), buffer_(buffer), bufferSize_(bufferSize), bufferLast_(0), current_(buffer_), readCount_(0), count_(0), eof_(false) { + RAPIDJSON_ASSERT(fp_ != 0); + RAPIDJSON_ASSERT(bufferSize >= 4); + Read(); + } + + Ch Peek() const { return *current_; } + Ch Take() { Ch c = *current_; Read(); return c; } + size_t Tell() const { return count_ + static_cast<size_t>(current_ - buffer_); } + + // Not implemented + void Put(Ch) { RAPIDJSON_ASSERT(false); } + void Flush() { RAPIDJSON_ASSERT(false); } + Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; } + size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; } + + // For encoding detection only. + const Ch* Peek4() const { + return (current_ + 4 <= bufferLast_) ? current_ : 0; + } + +private: + void Read() { + if (current_ < bufferLast_) + ++current_; + else if (!eof_) { + count_ += readCount_; + readCount_ = fread(buffer_, 1, bufferSize_, fp_); + bufferLast_ = buffer_ + readCount_ - 1; + current_ = buffer_; + + if (readCount_ < bufferSize_) { + buffer_[readCount_] = '\0'; + ++bufferLast_; + eof_ = true; + } + } + } + + std::FILE* fp_; + Ch *buffer_; + size_t bufferSize_; + Ch *bufferLast_; + Ch *current_; + size_t readCount_; + size_t count_; //!< Number of characters read + bool eof_; +}; + +RAPIDJSON_NAMESPACE_END + +#ifdef __clang__ +RAPIDJSON_DIAG_POP +#endif + +#endif // RAPIDJSON_FILESTREAM_H_ diff --git a/xmrstak/rapidjson/filewritestream.h b/xmrstak/rapidjson/filewritestream.h new file mode 100644 index 0000000..6378dd6 --- /dev/null +++ b/xmrstak/rapidjson/filewritestream.h @@ -0,0 +1,104 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_FILEWRITESTREAM_H_ +#define RAPIDJSON_FILEWRITESTREAM_H_ + +#include "stream.h" +#include <cstdio> + +#ifdef __clang__ +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(unreachable-code) +#endif + +RAPIDJSON_NAMESPACE_BEGIN + +//! Wrapper of C file stream for input using fread(). +/*! + \note implements Stream concept +*/ +class FileWriteStream { +public: + typedef char Ch; //!< Character type. Only support char. + + FileWriteStream(std::FILE* fp, char* buffer, size_t bufferSize) : fp_(fp), buffer_(buffer), bufferEnd_(buffer + bufferSize), current_(buffer_) { + RAPIDJSON_ASSERT(fp_ != 0); + } + + void Put(char c) { + if (current_ >= bufferEnd_) + Flush(); + + *current_++ = c; + } + + void PutN(char c, size_t n) { + size_t avail = static_cast<size_t>(bufferEnd_ - current_); + while (n > avail) { + std::memset(current_, c, avail); + current_ += avail; + Flush(); + n -= avail; + avail = static_cast<size_t>(bufferEnd_ - current_); + } + + if (n > 0) { + std::memset(current_, c, n); + current_ += n; + } + } + + void Flush() { + if (current_ != buffer_) { + size_t result = fwrite(buffer_, 1, static_cast<size_t>(current_ - buffer_), fp_); + if (result < static_cast<size_t>(current_ - buffer_)) { + // failure deliberately ignored at this time + // added to avoid warn_unused_result build errors + } + current_ = buffer_; + } + } + + // Not implemented + char Peek() const { RAPIDJSON_ASSERT(false); return 0; } + char Take() { RAPIDJSON_ASSERT(false); return 0; } + size_t Tell() const { RAPIDJSON_ASSERT(false); return 0; } + char* PutBegin() { RAPIDJSON_ASSERT(false); return 0; } + size_t PutEnd(char*) { RAPIDJSON_ASSERT(false); return 0; } + +private: + // Prohibit copy constructor & assignment operator. + FileWriteStream(const FileWriteStream&); + FileWriteStream& operator=(const FileWriteStream&); + + std::FILE* fp_; + char *buffer_; + char *bufferEnd_; + char *current_; +}; + +//! Implement specialized version of PutN() with memset() for better performance. +template<> +inline void PutN(FileWriteStream& stream, char c, size_t n) { + stream.PutN(c, n); +} + +RAPIDJSON_NAMESPACE_END + +#ifdef __clang__ +RAPIDJSON_DIAG_POP +#endif + +#endif // RAPIDJSON_FILESTREAM_H_ diff --git a/xmrstak/rapidjson/fwd.h b/xmrstak/rapidjson/fwd.h new file mode 100644 index 0000000..e8104e8 --- /dev/null +++ b/xmrstak/rapidjson/fwd.h @@ -0,0 +1,151 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_FWD_H_ +#define RAPIDJSON_FWD_H_ + +#include "rapidjson.h" + +RAPIDJSON_NAMESPACE_BEGIN + +// encodings.h + +template<typename CharType> struct UTF8; +template<typename CharType> struct UTF16; +template<typename CharType> struct UTF16BE; +template<typename CharType> struct UTF16LE; +template<typename CharType> struct UTF32; +template<typename CharType> struct UTF32BE; +template<typename CharType> struct UTF32LE; +template<typename CharType> struct ASCII; +template<typename CharType> struct AutoUTF; + +template<typename SourceEncoding, typename TargetEncoding> +struct Transcoder; + +// allocators.h + +class CrtAllocator; + +template <typename BaseAllocator> +class MemoryPoolAllocator; + +// stream.h + +template <typename Encoding> +struct GenericStringStream; + +typedef GenericStringStream<UTF8<char> > StringStream; + +template <typename Encoding> +struct GenericInsituStringStream; + +typedef GenericInsituStringStream<UTF8<char> > InsituStringStream; + +// stringbuffer.h + +template <typename Encoding, typename Allocator> +class GenericStringBuffer; + +typedef GenericStringBuffer<UTF8<char>, CrtAllocator> StringBuffer; + +// filereadstream.h + +class FileReadStream; + +// filewritestream.h + +class FileWriteStream; + +// memorybuffer.h + +template <typename Allocator> +struct GenericMemoryBuffer; + +typedef GenericMemoryBuffer<CrtAllocator> MemoryBuffer; + +// memorystream.h + +struct MemoryStream; + +// reader.h + +template<typename Encoding, typename Derived> +struct BaseReaderHandler; + +template <typename SourceEncoding, typename TargetEncoding, typename StackAllocator> +class GenericReader; + +typedef GenericReader<UTF8<char>, UTF8<char>, CrtAllocator> Reader; + +// writer.h + +template<typename OutputStream, typename SourceEncoding, typename TargetEncoding, typename StackAllocator, unsigned writeFlags> +class Writer; + +// prettywriter.h + +template<typename OutputStream, typename SourceEncoding, typename TargetEncoding, typename StackAllocator, unsigned writeFlags> +class PrettyWriter; + +// document.h + +template <typename Encoding, typename Allocator> +struct GenericMember; + +template <bool Const, typename Encoding, typename Allocator> +class GenericMemberIterator; + +template<typename CharType> +struct GenericStringRef; + +template <typename Encoding, typename Allocator> +class GenericValue; + +typedef GenericValue<UTF8<char>, MemoryPoolAllocator<CrtAllocator> > Value; + +template <typename Encoding, typename Allocator, typename StackAllocator> +class GenericDocument; + +typedef GenericDocument<UTF8<char>, MemoryPoolAllocator<CrtAllocator>, CrtAllocator> Document; + +// pointer.h + +template <typename ValueType, typename Allocator> +class GenericPointer; + +typedef GenericPointer<Value, CrtAllocator> Pointer; + +// schema.h + +template <typename SchemaDocumentType> +class IGenericRemoteSchemaDocumentProvider; + +template <typename ValueT, typename Allocator> +class GenericSchemaDocument; + +typedef GenericSchemaDocument<Value, CrtAllocator> SchemaDocument; +typedef IGenericRemoteSchemaDocumentProvider<SchemaDocument> IRemoteSchemaDocumentProvider; + +template < + typename SchemaDocumentType, + typename OutputHandler, + typename StateAllocator> +class GenericSchemaValidator; + +typedef GenericSchemaValidator<SchemaDocument, BaseReaderHandler<UTF8<char>, void>, CrtAllocator> SchemaValidator; + +RAPIDJSON_NAMESPACE_END + +#endif // RAPIDJSON_RAPIDJSONFWD_H_ diff --git a/xmrstak/rapidjson/internal/biginteger.h b/xmrstak/rapidjson/internal/biginteger.h new file mode 100644 index 0000000..9d3e88c --- /dev/null +++ b/xmrstak/rapidjson/internal/biginteger.h @@ -0,0 +1,290 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_BIGINTEGER_H_ +#define RAPIDJSON_BIGINTEGER_H_ + +#include "../rapidjson.h" + +#if defined(_MSC_VER) && defined(_M_AMD64) +#include <intrin.h> // for _umul128 +#pragma intrinsic(_umul128) +#endif + +RAPIDJSON_NAMESPACE_BEGIN +namespace internal { + +class BigInteger { +public: + typedef uint64_t Type; + + BigInteger(const BigInteger& rhs) : count_(rhs.count_) { + std::memcpy(digits_, rhs.digits_, count_ * sizeof(Type)); + } + + explicit BigInteger(uint64_t u) : count_(1) { + digits_[0] = u; + } + + BigInteger(const char* decimals, size_t length) : count_(1) { + RAPIDJSON_ASSERT(length > 0); + digits_[0] = 0; + size_t i = 0; + const size_t kMaxDigitPerIteration = 19; // 2^64 = 18446744073709551616 > 10^19 + while (length >= kMaxDigitPerIteration) { + AppendDecimal64(decimals + i, decimals + i + kMaxDigitPerIteration); + length -= kMaxDigitPerIteration; + i += kMaxDigitPerIteration; + } + + if (length > 0) + AppendDecimal64(decimals + i, decimals + i + length); + } + + BigInteger& operator=(const BigInteger &rhs) + { + if (this != &rhs) { + count_ = rhs.count_; + std::memcpy(digits_, rhs.digits_, count_ * sizeof(Type)); + } + return *this; + } + + BigInteger& operator=(uint64_t u) { + digits_[0] = u; + count_ = 1; + return *this; + } + + BigInteger& operator+=(uint64_t u) { + Type backup = digits_[0]; + digits_[0] += u; + for (size_t i = 0; i < count_ - 1; i++) { + if (digits_[i] >= backup) + return *this; // no carry + backup = digits_[i + 1]; + digits_[i + 1] += 1; + } + + // Last carry + if (digits_[count_ - 1] < backup) + PushBack(1); + + return *this; + } + + BigInteger& operator*=(uint64_t u) { + if (u == 0) return *this = 0; + if (u == 1) return *this; + if (*this == 1) return *this = u; + + uint64_t k = 0; + for (size_t i = 0; i < count_; i++) { + uint64_t hi; + digits_[i] = MulAdd64(digits_[i], u, k, &hi); + k = hi; + } + + if (k > 0) + PushBack(k); + + return *this; + } + + BigInteger& operator*=(uint32_t u) { + if (u == 0) return *this = 0; + if (u == 1) return *this; + if (*this == 1) return *this = u; + + uint64_t k = 0; + for (size_t i = 0; i < count_; i++) { + const uint64_t c = digits_[i] >> 32; + const uint64_t d = digits_[i] & 0xFFFFFFFF; + const uint64_t uc = u * c; + const uint64_t ud = u * d; + const uint64_t p0 = ud + k; + const uint64_t p1 = uc + (p0 >> 32); + digits_[i] = (p0 & 0xFFFFFFFF) | (p1 << 32); + k = p1 >> 32; + } + + if (k > 0) + PushBack(k); + + return *this; + } + + BigInteger& operator<<=(size_t shift) { + if (IsZero() || shift == 0) return *this; + + size_t offset = shift / kTypeBit; + size_t interShift = shift % kTypeBit; + RAPIDJSON_ASSERT(count_ + offset <= kCapacity); + + if (interShift == 0) { + std::memmove(&digits_[count_ - 1 + offset], &digits_[count_ - 1], count_ * sizeof(Type)); + count_ += offset; + } + else { + digits_[count_] = 0; + for (size_t i = count_; i > 0; i--) + digits_[i + offset] = (digits_[i] << interShift) | (digits_[i - 1] >> (kTypeBit - interShift)); + digits_[offset] = digits_[0] << interShift; + count_ += offset; + if (digits_[count_]) + count_++; + } + + std::memset(digits_, 0, offset * sizeof(Type)); + + return *this; + } + + bool operator==(const BigInteger& rhs) const { + return count_ == rhs.count_ && std::memcmp(digits_, rhs.digits_, count_ * sizeof(Type)) == 0; + } + + bool operator==(const Type rhs) const { + return count_ == 1 && digits_[0] == rhs; + } + + BigInteger& MultiplyPow5(unsigned exp) { + static const uint32_t kPow5[12] = { + 5, + 5 * 5, + 5 * 5 * 5, + 5 * 5 * 5 * 5, + 5 * 5 * 5 * 5 * 5, + 5 * 5 * 5 * 5 * 5 * 5, + 5 * 5 * 5 * 5 * 5 * 5 * 5, + 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5, + 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5, + 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5, + 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5, + 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 + }; + if (exp == 0) return *this; + for (; exp >= 27; exp -= 27) *this *= RAPIDJSON_UINT64_C2(0X6765C793, 0XFA10079D); // 5^27 + for (; exp >= 13; exp -= 13) *this *= static_cast<uint32_t>(1220703125u); // 5^13 + if (exp > 0) *this *= kPow5[exp - 1]; + return *this; + } + + // Compute absolute difference of this and rhs. + // Assume this != rhs + bool Difference(const BigInteger& rhs, BigInteger* out) const { + int cmp = Compare(rhs); + RAPIDJSON_ASSERT(cmp != 0); + const BigInteger *a, *b; // Makes a > b + bool ret; + if (cmp < 0) { a = &rhs; b = this; ret = true; } + else { a = this; b = &rhs; ret = false; } + + Type borrow = 0; + for (size_t i = 0; i < a->count_; i++) { + Type d = a->digits_[i] - borrow; + if (i < b->count_) + d -= b->digits_[i]; + borrow = (d > a->digits_[i]) ? 1 : 0; + out->digits_[i] = d; + if (d != 0) + out->count_ = i + 1; + } + + return ret; + } + + int Compare(const BigInteger& rhs) const { + if (count_ != rhs.count_) + return count_ < rhs.count_ ? -1 : 1; + + for (size_t i = count_; i-- > 0;) + if (digits_[i] != rhs.digits_[i]) + return digits_[i] < rhs.digits_[i] ? -1 : 1; + + return 0; + } + + size_t GetCount() const { return count_; } + Type GetDigit(size_t index) const { RAPIDJSON_ASSERT(index < count_); return digits_[index]; } + bool IsZero() const { return count_ == 1 && digits_[0] == 0; } + +private: + void AppendDecimal64(const char* begin, const char* end) { + uint64_t u = ParseUint64(begin, end); + if (IsZero()) + *this = u; + else { + unsigned exp = static_cast<unsigned>(end - begin); + (MultiplyPow5(exp) <<= exp) += u; // *this = *this * 10^exp + u + } + } + + void PushBack(Type digit) { + RAPIDJSON_ASSERT(count_ < kCapacity); + digits_[count_++] = digit; + } + + static uint64_t ParseUint64(const char* begin, const char* end) { + uint64_t r = 0; + for (const char* p = begin; p != end; ++p) { + RAPIDJSON_ASSERT(*p >= '0' && *p <= '9'); + r = r * 10u + static_cast<unsigned>(*p - '0'); + } + return r; + } + + // Assume a * b + k < 2^128 + static uint64_t MulAdd64(uint64_t a, uint64_t b, uint64_t k, uint64_t* outHigh) { +#if defined(_MSC_VER) && defined(_M_AMD64) + uint64_t low = _umul128(a, b, outHigh) + k; + if (low < k) + (*outHigh)++; + return low; +#elif (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)) && defined(__x86_64__) + __extension__ typedef unsigned __int128 uint128; + uint128 p = static_cast<uint128>(a) * static_cast<uint128>(b); + p += k; + *outHigh = static_cast<uint64_t>(p >> 64); + return static_cast<uint64_t>(p); +#else + const uint64_t a0 = a & 0xFFFFFFFF, a1 = a >> 32, b0 = b & 0xFFFFFFFF, b1 = b >> 32; + uint64_t x0 = a0 * b0, x1 = a0 * b1, x2 = a1 * b0, x3 = a1 * b1; + x1 += (x0 >> 32); // can't give carry + x1 += x2; + if (x1 < x2) + x3 += (static_cast<uint64_t>(1) << 32); + uint64_t lo = (x1 << 32) + (x0 & 0xFFFFFFFF); + uint64_t hi = x3 + (x1 >> 32); + + lo += k; + if (lo < k) + hi++; + *outHigh = hi; + return lo; +#endif + } + + static const size_t kBitCount = 3328; // 64bit * 54 > 10^1000 + static const size_t kCapacity = kBitCount / sizeof(Type); + static const size_t kTypeBit = sizeof(Type) * 8; + + Type digits_[kCapacity]; + size_t count_; +}; + +} // namespace internal +RAPIDJSON_NAMESPACE_END + +#endif // RAPIDJSON_BIGINTEGER_H_ diff --git a/xmrstak/rapidjson/internal/diyfp.h b/xmrstak/rapidjson/internal/diyfp.h new file mode 100644 index 0000000..c9fefdc --- /dev/null +++ b/xmrstak/rapidjson/internal/diyfp.h @@ -0,0 +1,258 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// This is a C++ header-only implementation of Grisu2 algorithm from the publication: +// Loitsch, Florian. "Printing floating-point numbers quickly and accurately with +// integers." ACM Sigplan Notices 45.6 (2010): 233-243. + +#ifndef RAPIDJSON_DIYFP_H_ +#define RAPIDJSON_DIYFP_H_ + +#include "../rapidjson.h" + +#if defined(_MSC_VER) && defined(_M_AMD64) +#include <intrin.h> +#pragma intrinsic(_BitScanReverse64) +#pragma intrinsic(_umul128) +#endif + +RAPIDJSON_NAMESPACE_BEGIN +namespace internal { + +#ifdef __GNUC__ +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(effc++) +#endif + +#ifdef __clang__ +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(padded) +#endif + +struct DiyFp { + DiyFp() : f(), e() {} + + DiyFp(uint64_t fp, int exp) : f(fp), e(exp) {} + + explicit DiyFp(double d) { + union { + double d; + uint64_t u64; + } u = { d }; + + int biased_e = static_cast<int>((u.u64 & kDpExponentMask) >> kDpSignificandSize); + uint64_t significand = (u.u64 & kDpSignificandMask); + if (biased_e != 0) { + f = significand + kDpHiddenBit; + e = biased_e - kDpExponentBias; + } + else { + f = significand; + e = kDpMinExponent + 1; + } + } + + DiyFp operator-(const DiyFp& rhs) const { + return DiyFp(f - rhs.f, e); + } + + DiyFp operator*(const DiyFp& rhs) const { +#if defined(_MSC_VER) && defined(_M_AMD64) + uint64_t h; + uint64_t l = _umul128(f, rhs.f, &h); + if (l & (uint64_t(1) << 63)) // rounding + h++; + return DiyFp(h, e + rhs.e + 64); +#elif (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)) && defined(__x86_64__) + __extension__ typedef unsigned __int128 uint128; + uint128 p = static_cast<uint128>(f) * static_cast<uint128>(rhs.f); + uint64_t h = static_cast<uint64_t>(p >> 64); + uint64_t l = static_cast<uint64_t>(p); + if (l & (uint64_t(1) << 63)) // rounding + h++; + return DiyFp(h, e + rhs.e + 64); +#else + const uint64_t M32 = 0xFFFFFFFF; + const uint64_t a = f >> 32; + const uint64_t b = f & M32; + const uint64_t c = rhs.f >> 32; + const uint64_t d = rhs.f & M32; + const uint64_t ac = a * c; + const uint64_t bc = b * c; + const uint64_t ad = a * d; + const uint64_t bd = b * d; + uint64_t tmp = (bd >> 32) + (ad & M32) + (bc & M32); + tmp += 1U << 31; /// mult_round + return DiyFp(ac + (ad >> 32) + (bc >> 32) + (tmp >> 32), e + rhs.e + 64); +#endif + } + + DiyFp Normalize() const { +#if defined(_MSC_VER) && defined(_M_AMD64) + unsigned long index; + _BitScanReverse64(&index, f); + return DiyFp(f << (63 - index), e - (63 - index)); +#elif defined(__GNUC__) && __GNUC__ >= 4 + int s = __builtin_clzll(f); + return DiyFp(f << s, e - s); +#else + DiyFp res = *this; + while (!(res.f & (static_cast<uint64_t>(1) << 63))) { + res.f <<= 1; + res.e--; + } + return res; +#endif + } + + DiyFp NormalizeBoundary() const { + DiyFp res = *this; + while (!(res.f & (kDpHiddenBit << 1))) { + res.f <<= 1; + res.e--; + } + res.f <<= (kDiySignificandSize - kDpSignificandSize - 2); + res.e = res.e - (kDiySignificandSize - kDpSignificandSize - 2); + return res; + } + + void NormalizedBoundaries(DiyFp* minus, DiyFp* plus) const { + DiyFp pl = DiyFp((f << 1) + 1, e - 1).NormalizeBoundary(); + DiyFp mi = (f == kDpHiddenBit) ? DiyFp((f << 2) - 1, e - 2) : DiyFp((f << 1) - 1, e - 1); + mi.f <<= mi.e - pl.e; + mi.e = pl.e; + *plus = pl; + *minus = mi; + } + + double ToDouble() const { + union { + double d; + uint64_t u64; + }u; + const uint64_t be = (e == kDpDenormalExponent && (f & kDpHiddenBit) == 0) ? 0 : + static_cast<uint64_t>(e + kDpExponentBias); + u.u64 = (f & kDpSignificandMask) | (be << kDpSignificandSize); + return u.d; + } + + static const int kDiySignificandSize = 64; + static const int kDpSignificandSize = 52; + static const int kDpExponentBias = 0x3FF + kDpSignificandSize; + static const int kDpMaxExponent = 0x7FF - kDpExponentBias; + static const int kDpMinExponent = -kDpExponentBias; + static const int kDpDenormalExponent = -kDpExponentBias + 1; + static const uint64_t kDpExponentMask = RAPIDJSON_UINT64_C2(0x7FF00000, 0x00000000); + static const uint64_t kDpSignificandMask = RAPIDJSON_UINT64_C2(0x000FFFFF, 0xFFFFFFFF); + static const uint64_t kDpHiddenBit = RAPIDJSON_UINT64_C2(0x00100000, 0x00000000); + + uint64_t f; + int e; +}; + +inline DiyFp GetCachedPowerByIndex(size_t index) { + // 10^-348, 10^-340, ..., 10^340 + static const uint64_t kCachedPowers_F[] = { + RAPIDJSON_UINT64_C2(0xfa8fd5a0, 0x081c0288), RAPIDJSON_UINT64_C2(0xbaaee17f, 0xa23ebf76), + RAPIDJSON_UINT64_C2(0x8b16fb20, 0x3055ac76), RAPIDJSON_UINT64_C2(0xcf42894a, 0x5dce35ea), + RAPIDJSON_UINT64_C2(0x9a6bb0aa, 0x55653b2d), RAPIDJSON_UINT64_C2(0xe61acf03, 0x3d1a45df), + RAPIDJSON_UINT64_C2(0xab70fe17, 0xc79ac6ca), RAPIDJSON_UINT64_C2(0xff77b1fc, 0xbebcdc4f), + RAPIDJSON_UINT64_C2(0xbe5691ef, 0x416bd60c), RAPIDJSON_UINT64_C2(0x8dd01fad, 0x907ffc3c), + RAPIDJSON_UINT64_C2(0xd3515c28, 0x31559a83), RAPIDJSON_UINT64_C2(0x9d71ac8f, 0xada6c9b5), + RAPIDJSON_UINT64_C2(0xea9c2277, 0x23ee8bcb), RAPIDJSON_UINT64_C2(0xaecc4991, 0x4078536d), + RAPIDJSON_UINT64_C2(0x823c1279, 0x5db6ce57), RAPIDJSON_UINT64_C2(0xc2109436, 0x4dfb5637), + RAPIDJSON_UINT64_C2(0x9096ea6f, 0x3848984f), RAPIDJSON_UINT64_C2(0xd77485cb, 0x25823ac7), + RAPIDJSON_UINT64_C2(0xa086cfcd, 0x97bf97f4), RAPIDJSON_UINT64_C2(0xef340a98, 0x172aace5), + RAPIDJSON_UINT64_C2(0xb23867fb, 0x2a35b28e), RAPIDJSON_UINT64_C2(0x84c8d4df, 0xd2c63f3b), + RAPIDJSON_UINT64_C2(0xc5dd4427, 0x1ad3cdba), RAPIDJSON_UINT64_C2(0x936b9fce, 0xbb25c996), + RAPIDJSON_UINT64_C2(0xdbac6c24, 0x7d62a584), RAPIDJSON_UINT64_C2(0xa3ab6658, 0x0d5fdaf6), + RAPIDJSON_UINT64_C2(0xf3e2f893, 0xdec3f126), RAPIDJSON_UINT64_C2(0xb5b5ada8, 0xaaff80b8), + RAPIDJSON_UINT64_C2(0x87625f05, 0x6c7c4a8b), RAPIDJSON_UINT64_C2(0xc9bcff60, 0x34c13053), + RAPIDJSON_UINT64_C2(0x964e858c, 0x91ba2655), RAPIDJSON_UINT64_C2(0xdff97724, 0x70297ebd), + RAPIDJSON_UINT64_C2(0xa6dfbd9f, 0xb8e5b88f), RAPIDJSON_UINT64_C2(0xf8a95fcf, 0x88747d94), + RAPIDJSON_UINT64_C2(0xb9447093, 0x8fa89bcf), RAPIDJSON_UINT64_C2(0x8a08f0f8, 0xbf0f156b), + RAPIDJSON_UINT64_C2(0xcdb02555, 0x653131b6), RAPIDJSON_UINT64_C2(0x993fe2c6, 0xd07b7fac), + RAPIDJSON_UINT64_C2(0xe45c10c4, 0x2a2b3b06), RAPIDJSON_UINT64_C2(0xaa242499, 0x697392d3), + RAPIDJSON_UINT64_C2(0xfd87b5f2, 0x8300ca0e), RAPIDJSON_UINT64_C2(0xbce50864, 0x92111aeb), + RAPIDJSON_UINT64_C2(0x8cbccc09, 0x6f5088cc), RAPIDJSON_UINT64_C2(0xd1b71758, 0xe219652c), + RAPIDJSON_UINT64_C2(0x9c400000, 0x00000000), RAPIDJSON_UINT64_C2(0xe8d4a510, 0x00000000), + RAPIDJSON_UINT64_C2(0xad78ebc5, 0xac620000), RAPIDJSON_UINT64_C2(0x813f3978, 0xf8940984), + RAPIDJSON_UINT64_C2(0xc097ce7b, 0xc90715b3), RAPIDJSON_UINT64_C2(0x8f7e32ce, 0x7bea5c70), + RAPIDJSON_UINT64_C2(0xd5d238a4, 0xabe98068), RAPIDJSON_UINT64_C2(0x9f4f2726, 0x179a2245), + RAPIDJSON_UINT64_C2(0xed63a231, 0xd4c4fb27), RAPIDJSON_UINT64_C2(0xb0de6538, 0x8cc8ada8), + RAPIDJSON_UINT64_C2(0x83c7088e, 0x1aab65db), RAPIDJSON_UINT64_C2(0xc45d1df9, 0x42711d9a), + RAPIDJSON_UINT64_C2(0x924d692c, 0xa61be758), RAPIDJSON_UINT64_C2(0xda01ee64, 0x1a708dea), + RAPIDJSON_UINT64_C2(0xa26da399, 0x9aef774a), RAPIDJSON_UINT64_C2(0xf209787b, 0xb47d6b85), + RAPIDJSON_UINT64_C2(0xb454e4a1, 0x79dd1877), RAPIDJSON_UINT64_C2(0x865b8692, 0x5b9bc5c2), + RAPIDJSON_UINT64_C2(0xc83553c5, 0xc8965d3d), RAPIDJSON_UINT64_C2(0x952ab45c, 0xfa97a0b3), + RAPIDJSON_UINT64_C2(0xde469fbd, 0x99a05fe3), RAPIDJSON_UINT64_C2(0xa59bc234, 0xdb398c25), + RAPIDJSON_UINT64_C2(0xf6c69a72, 0xa3989f5c), RAPIDJSON_UINT64_C2(0xb7dcbf53, 0x54e9bece), + RAPIDJSON_UINT64_C2(0x88fcf317, 0xf22241e2), RAPIDJSON_UINT64_C2(0xcc20ce9b, 0xd35c78a5), + RAPIDJSON_UINT64_C2(0x98165af3, 0x7b2153df), RAPIDJSON_UINT64_C2(0xe2a0b5dc, 0x971f303a), + RAPIDJSON_UINT64_C2(0xa8d9d153, 0x5ce3b396), RAPIDJSON_UINT64_C2(0xfb9b7cd9, 0xa4a7443c), + RAPIDJSON_UINT64_C2(0xbb764c4c, 0xa7a44410), RAPIDJSON_UINT64_C2(0x8bab8eef, 0xb6409c1a), + RAPIDJSON_UINT64_C2(0xd01fef10, 0xa657842c), RAPIDJSON_UINT64_C2(0x9b10a4e5, 0xe9913129), + RAPIDJSON_UINT64_C2(0xe7109bfb, 0xa19c0c9d), RAPIDJSON_UINT64_C2(0xac2820d9, 0x623bf429), + RAPIDJSON_UINT64_C2(0x80444b5e, 0x7aa7cf85), RAPIDJSON_UINT64_C2(0xbf21e440, 0x03acdd2d), + RAPIDJSON_UINT64_C2(0x8e679c2f, 0x5e44ff8f), RAPIDJSON_UINT64_C2(0xd433179d, 0x9c8cb841), + RAPIDJSON_UINT64_C2(0x9e19db92, 0xb4e31ba9), RAPIDJSON_UINT64_C2(0xeb96bf6e, 0xbadf77d9), + RAPIDJSON_UINT64_C2(0xaf87023b, 0x9bf0ee6b) + }; + static const int16_t kCachedPowers_E[] = { + -1220, -1193, -1166, -1140, -1113, -1087, -1060, -1034, -1007, -980, + -954, -927, -901, -874, -847, -821, -794, -768, -741, -715, + -688, -661, -635, -608, -582, -555, -529, -502, -475, -449, + -422, -396, -369, -343, -316, -289, -263, -236, -210, -183, + -157, -130, -103, -77, -50, -24, 3, 30, 56, 83, + 109, 136, 162, 189, 216, 242, 269, 295, 322, 348, + 375, 402, 428, 455, 481, 508, 534, 561, 588, 614, + 641, 667, 694, 720, 747, 774, 800, 827, 853, 880, + 907, 933, 960, 986, 1013, 1039, 1066 + }; + return DiyFp(kCachedPowers_F[index], kCachedPowers_E[index]); +} + +inline DiyFp GetCachedPower(int e, int* K) { + + //int k = static_cast<int>(ceil((-61 - e) * 0.30102999566398114)) + 374; + double dk = (-61 - e) * 0.30102999566398114 + 347; // dk must be positive, so can do ceiling in positive + int k = static_cast<int>(dk); + if (dk - k > 0.0) + k++; + + unsigned index = static_cast<unsigned>((k >> 3) + 1); + *K = -(-348 + static_cast<int>(index << 3)); // decimal exponent no need lookup table + + return GetCachedPowerByIndex(index); +} + +inline DiyFp GetCachedPower10(int exp, int *outExp) { + unsigned index = (static_cast<unsigned>(exp) + 348u) / 8u; + *outExp = -348 + static_cast<int>(index) * 8; + return GetCachedPowerByIndex(index); + } + +#ifdef __GNUC__ +RAPIDJSON_DIAG_POP +#endif + +#ifdef __clang__ +RAPIDJSON_DIAG_POP +RAPIDJSON_DIAG_OFF(padded) +#endif + +} // namespace internal +RAPIDJSON_NAMESPACE_END + +#endif // RAPIDJSON_DIYFP_H_ diff --git a/xmrstak/rapidjson/internal/dtoa.h b/xmrstak/rapidjson/internal/dtoa.h new file mode 100644 index 0000000..8d6350e --- /dev/null +++ b/xmrstak/rapidjson/internal/dtoa.h @@ -0,0 +1,245 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// This is a C++ header-only implementation of Grisu2 algorithm from the publication: +// Loitsch, Florian. "Printing floating-point numbers quickly and accurately with +// integers." ACM Sigplan Notices 45.6 (2010): 233-243. + +#ifndef RAPIDJSON_DTOA_ +#define RAPIDJSON_DTOA_ + +#include "itoa.h" // GetDigitsLut() +#include "diyfp.h" +#include "ieee754.h" + +RAPIDJSON_NAMESPACE_BEGIN +namespace internal { + +#ifdef __GNUC__ +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(effc++) +RAPIDJSON_DIAG_OFF(array-bounds) // some gcc versions generate wrong warnings https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59124 +#endif + +inline void GrisuRound(char* buffer, int len, uint64_t delta, uint64_t rest, uint64_t ten_kappa, uint64_t wp_w) { + while (rest < wp_w && delta - rest >= ten_kappa && + (rest + ten_kappa < wp_w || /// closer + wp_w - rest > rest + ten_kappa - wp_w)) { + buffer[len - 1]--; + rest += ten_kappa; + } +} + +inline unsigned CountDecimalDigit32(uint32_t n) { + // Simple pure C++ implementation was faster than __builtin_clz version in this situation. + if (n < 10) return 1; + if (n < 100) return 2; + if (n < 1000) return 3; + if (n < 10000) return 4; + if (n < 100000) return 5; + if (n < 1000000) return 6; + if (n < 10000000) return 7; + if (n < 100000000) return 8; + // Will not reach 10 digits in DigitGen() + //if (n < 1000000000) return 9; + //return 10; + return 9; +} + +inline void DigitGen(const DiyFp& W, const DiyFp& Mp, uint64_t delta, char* buffer, int* len, int* K) { + static const uint32_t kPow10[] = { 1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000 }; + const DiyFp one(uint64_t(1) << -Mp.e, Mp.e); + const DiyFp wp_w = Mp - W; + uint32_t p1 = static_cast<uint32_t>(Mp.f >> -one.e); + uint64_t p2 = Mp.f & (one.f - 1); + unsigned kappa = CountDecimalDigit32(p1); // kappa in [0, 9] + *len = 0; + + while (kappa > 0) { + uint32_t d = 0; + switch (kappa) { + case 9: d = p1 / 100000000; p1 %= 100000000; break; + case 8: d = p1 / 10000000; p1 %= 10000000; break; + case 7: d = p1 / 1000000; p1 %= 1000000; break; + case 6: d = p1 / 100000; p1 %= 100000; break; + case 5: d = p1 / 10000; p1 %= 10000; break; + case 4: d = p1 / 1000; p1 %= 1000; break; + case 3: d = p1 / 100; p1 %= 100; break; + case 2: d = p1 / 10; p1 %= 10; break; + case 1: d = p1; p1 = 0; break; + default:; + } + if (d || *len) + buffer[(*len)++] = static_cast<char>('0' + static_cast<char>(d)); + kappa--; + uint64_t tmp = (static_cast<uint64_t>(p1) << -one.e) + p2; + if (tmp <= delta) { + *K += kappa; + GrisuRound(buffer, *len, delta, tmp, static_cast<uint64_t>(kPow10[kappa]) << -one.e, wp_w.f); + return; + } + } + + // kappa = 0 + for (;;) { + p2 *= 10; + delta *= 10; + char d = static_cast<char>(p2 >> -one.e); + if (d || *len) + buffer[(*len)++] = static_cast<char>('0' + d); + p2 &= one.f - 1; + kappa--; + if (p2 < delta) { + *K += kappa; + int index = -static_cast<int>(kappa); + GrisuRound(buffer, *len, delta, p2, one.f, wp_w.f * (index < 9 ? kPow10[-static_cast<int>(kappa)] : 0)); + return; + } + } +} + +inline void Grisu2(double value, char* buffer, int* length, int* K) { + const DiyFp v(value); + DiyFp w_m, w_p; + v.NormalizedBoundaries(&w_m, &w_p); + + const DiyFp c_mk = GetCachedPower(w_p.e, K); + const DiyFp W = v.Normalize() * c_mk; + DiyFp Wp = w_p * c_mk; + DiyFp Wm = w_m * c_mk; + Wm.f++; + Wp.f--; + DigitGen(W, Wp, Wp.f - Wm.f, buffer, length, K); +} + +inline char* WriteExponent(int K, char* buffer) { + if (K < 0) { + *buffer++ = '-'; + K = -K; + } + + if (K >= 100) { + *buffer++ = static_cast<char>('0' + static_cast<char>(K / 100)); + K %= 100; + const char* d = GetDigitsLut() + K * 2; + *buffer++ = d[0]; + *buffer++ = d[1]; + } + else if (K >= 10) { + const char* d = GetDigitsLut() + K * 2; + *buffer++ = d[0]; + *buffer++ = d[1]; + } + else + *buffer++ = static_cast<char>('0' + static_cast<char>(K)); + + return buffer; +} + +inline char* Prettify(char* buffer, int length, int k, int maxDecimalPlaces) { + const int kk = length + k; // 10^(kk-1) <= v < 10^kk + + if (0 <= k && kk <= 21) { + // 1234e7 -> 12340000000 + for (int i = length; i < kk; i++) + buffer[i] = '0'; + buffer[kk] = '.'; + buffer[kk + 1] = '0'; + return &buffer[kk + 2]; + } + else if (0 < kk && kk <= 21) { + // 1234e-2 -> 12.34 + std::memmove(&buffer[kk + 1], &buffer[kk], static_cast<size_t>(length - kk)); + buffer[kk] = '.'; + if (0 > k + maxDecimalPlaces) { + // When maxDecimalPlaces = 2, 1.2345 -> 1.23, 1.102 -> 1.1 + // Remove extra trailing zeros (at least one) after truncation. + for (int i = kk + maxDecimalPlaces; i > kk + 1; i--) + if (buffer[i] != '0') + return &buffer[i + 1]; + return &buffer[kk + 2]; // Reserve one zero + } + else + return &buffer[length + 1]; + } + else if (-6 < kk && kk <= 0) { + // 1234e-6 -> 0.001234 + const int offset = 2 - kk; + std::memmove(&buffer[offset], &buffer[0], static_cast<size_t>(length)); + buffer[0] = '0'; + buffer[1] = '.'; + for (int i = 2; i < offset; i++) + buffer[i] = '0'; + if (length - kk > maxDecimalPlaces) { + // When maxDecimalPlaces = 2, 0.123 -> 0.12, 0.102 -> 0.1 + // Remove extra trailing zeros (at least one) after truncation. + for (int i = maxDecimalPlaces + 1; i > 2; i--) + if (buffer[i] != '0') + return &buffer[i + 1]; + return &buffer[3]; // Reserve one zero + } + else + return &buffer[length + offset]; + } + else if (kk < -maxDecimalPlaces) { + // Truncate to zero + buffer[0] = '0'; + buffer[1] = '.'; + buffer[2] = '0'; + return &buffer[3]; + } + else if (length == 1) { + // 1e30 + buffer[1] = 'e'; + return WriteExponent(kk - 1, &buffer[2]); + } + else { + // 1234e30 -> 1.234e33 + std::memmove(&buffer[2], &buffer[1], static_cast<size_t>(length - 1)); + buffer[1] = '.'; + buffer[length + 1] = 'e'; + return WriteExponent(kk - 1, &buffer[0 + length + 2]); + } +} + +inline char* dtoa(double value, char* buffer, int maxDecimalPlaces = 324) { + RAPIDJSON_ASSERT(maxDecimalPlaces >= 1); + Double d(value); + if (d.IsZero()) { + if (d.Sign()) + *buffer++ = '-'; // -0.0, Issue #289 + buffer[0] = '0'; + buffer[1] = '.'; + buffer[2] = '0'; + return &buffer[3]; + } + else { + if (value < 0) { + *buffer++ = '-'; + value = -value; + } + int length, K; + Grisu2(value, buffer, &length, &K); + return Prettify(buffer, length, K, maxDecimalPlaces); + } +} + +#ifdef __GNUC__ +RAPIDJSON_DIAG_POP +#endif + +} // namespace internal +RAPIDJSON_NAMESPACE_END + +#endif // RAPIDJSON_DTOA_ diff --git a/xmrstak/rapidjson/internal/ieee754.h b/xmrstak/rapidjson/internal/ieee754.h new file mode 100644 index 0000000..82bb0b9 --- /dev/null +++ b/xmrstak/rapidjson/internal/ieee754.h @@ -0,0 +1,78 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_IEEE754_ +#define RAPIDJSON_IEEE754_ + +#include "../rapidjson.h" + +RAPIDJSON_NAMESPACE_BEGIN +namespace internal { + +class Double { +public: + Double() {} + Double(double d) : d_(d) {} + Double(uint64_t u) : u_(u) {} + + double Value() const { return d_; } + uint64_t Uint64Value() const { return u_; } + + double NextPositiveDouble() const { + RAPIDJSON_ASSERT(!Sign()); + return Double(u_ + 1).Value(); + } + + bool Sign() const { return (u_ & kSignMask) != 0; } + uint64_t Significand() const { return u_ & kSignificandMask; } + int Exponent() const { return static_cast<int>(((u_ & kExponentMask) >> kSignificandSize) - kExponentBias); } + + bool IsNan() const { return (u_ & kExponentMask) == kExponentMask && Significand() != 0; } + bool IsInf() const { return (u_ & kExponentMask) == kExponentMask && Significand() == 0; } + bool IsNanOrInf() const { return (u_ & kExponentMask) == kExponentMask; } + bool IsNormal() const { return (u_ & kExponentMask) != 0 || Significand() == 0; } + bool IsZero() const { return (u_ & (kExponentMask | kSignificandMask)) == 0; } + + uint64_t IntegerSignificand() const { return IsNormal() ? Significand() | kHiddenBit : Significand(); } + int IntegerExponent() const { return (IsNormal() ? Exponent() : kDenormalExponent) - kSignificandSize; } + uint64_t ToBias() const { return (u_ & kSignMask) ? ~u_ + 1 : u_ | kSignMask; } + + static unsigned EffectiveSignificandSize(int order) { + if (order >= -1021) + return 53; + else if (order <= -1074) + return 0; + else + return static_cast<unsigned>(order) + 1074; + } + +private: + static const int kSignificandSize = 52; + static const int kExponentBias = 0x3FF; + static const int kDenormalExponent = 1 - kExponentBias; + static const uint64_t kSignMask = RAPIDJSON_UINT64_C2(0x80000000, 0x00000000); + static const uint64_t kExponentMask = RAPIDJSON_UINT64_C2(0x7FF00000, 0x00000000); + static const uint64_t kSignificandMask = RAPIDJSON_UINT64_C2(0x000FFFFF, 0xFFFFFFFF); + static const uint64_t kHiddenBit = RAPIDJSON_UINT64_C2(0x00100000, 0x00000000); + + union { + double d_; + uint64_t u_; + }; +}; + +} // namespace internal +RAPIDJSON_NAMESPACE_END + +#endif // RAPIDJSON_IEEE754_ diff --git a/xmrstak/rapidjson/internal/itoa.h b/xmrstak/rapidjson/internal/itoa.h new file mode 100644 index 0000000..01a4e7e --- /dev/null +++ b/xmrstak/rapidjson/internal/itoa.h @@ -0,0 +1,304 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_ITOA_ +#define RAPIDJSON_ITOA_ + +#include "../rapidjson.h" + +RAPIDJSON_NAMESPACE_BEGIN +namespace internal { + +inline const char* GetDigitsLut() { + static const char cDigitsLut[200] = { + '0','0','0','1','0','2','0','3','0','4','0','5','0','6','0','7','0','8','0','9', + '1','0','1','1','1','2','1','3','1','4','1','5','1','6','1','7','1','8','1','9', + '2','0','2','1','2','2','2','3','2','4','2','5','2','6','2','7','2','8','2','9', + '3','0','3','1','3','2','3','3','3','4','3','5','3','6','3','7','3','8','3','9', + '4','0','4','1','4','2','4','3','4','4','4','5','4','6','4','7','4','8','4','9', + '5','0','5','1','5','2','5','3','5','4','5','5','5','6','5','7','5','8','5','9', + '6','0','6','1','6','2','6','3','6','4','6','5','6','6','6','7','6','8','6','9', + '7','0','7','1','7','2','7','3','7','4','7','5','7','6','7','7','7','8','7','9', + '8','0','8','1','8','2','8','3','8','4','8','5','8','6','8','7','8','8','8','9', + '9','0','9','1','9','2','9','3','9','4','9','5','9','6','9','7','9','8','9','9' + }; + return cDigitsLut; +} + +inline char* u32toa(uint32_t value, char* buffer) { + const char* cDigitsLut = GetDigitsLut(); + + if (value < 10000) { + const uint32_t d1 = (value / 100) << 1; + const uint32_t d2 = (value % 100) << 1; + + if (value >= 1000) + *buffer++ = cDigitsLut[d1]; + if (value >= 100) + *buffer++ = cDigitsLut[d1 + 1]; + if (value >= 10) + *buffer++ = cDigitsLut[d2]; + *buffer++ = cDigitsLut[d2 + 1]; + } + else if (value < 100000000) { + // value = bbbbcccc + const uint32_t b = value / 10000; + const uint32_t c = value % 10000; + + const uint32_t d1 = (b / 100) << 1; + const uint32_t d2 = (b % 100) << 1; + + const uint32_t d3 = (c / 100) << 1; + const uint32_t d4 = (c % 100) << 1; + + if (value >= 10000000) + *buffer++ = cDigitsLut[d1]; + if (value >= 1000000) + *buffer++ = cDigitsLut[d1 + 1]; + if (value >= 100000) + *buffer++ = cDigitsLut[d2]; + *buffer++ = cDigitsLut[d2 + 1]; + + *buffer++ = cDigitsLut[d3]; + *buffer++ = cDigitsLut[d3 + 1]; + *buffer++ = cDigitsLut[d4]; + *buffer++ = cDigitsLut[d4 + 1]; + } + else { + // value = aabbbbcccc in decimal + + const uint32_t a = value / 100000000; // 1 to 42 + value %= 100000000; + + if (a >= 10) { + const unsigned i = a << 1; + *buffer++ = cDigitsLut[i]; + *buffer++ = cDigitsLut[i + 1]; + } + else + *buffer++ = static_cast<char>('0' + static_cast<char>(a)); + + const uint32_t b = value / 10000; // 0 to 9999 + const uint32_t c = value % 10000; // 0 to 9999 + + const uint32_t d1 = (b / 100) << 1; + const uint32_t d2 = (b % 100) << 1; + + const uint32_t d3 = (c / 100) << 1; + const uint32_t d4 = (c % 100) << 1; + + *buffer++ = cDigitsLut[d1]; + *buffer++ = cDigitsLut[d1 + 1]; + *buffer++ = cDigitsLut[d2]; + *buffer++ = cDigitsLut[d2 + 1]; + *buffer++ = cDigitsLut[d3]; + *buffer++ = cDigitsLut[d3 + 1]; + *buffer++ = cDigitsLut[d4]; + *buffer++ = cDigitsLut[d4 + 1]; + } + return buffer; +} + +inline char* i32toa(int32_t value, char* buffer) { + uint32_t u = static_cast<uint32_t>(value); + if (value < 0) { + *buffer++ = '-'; + u = ~u + 1; + } + + return u32toa(u, buffer); +} + +inline char* u64toa(uint64_t value, char* buffer) { + const char* cDigitsLut = GetDigitsLut(); + const uint64_t kTen8 = 100000000; + const uint64_t kTen9 = kTen8 * 10; + const uint64_t kTen10 = kTen8 * 100; + const uint64_t kTen11 = kTen8 * 1000; + const uint64_t kTen12 = kTen8 * 10000; + const uint64_t kTen13 = kTen8 * 100000; + const uint64_t kTen14 = kTen8 * 1000000; + const uint64_t kTen15 = kTen8 * 10000000; + const uint64_t kTen16 = kTen8 * kTen8; + + if (value < kTen8) { + uint32_t v = static_cast<uint32_t>(value); + if (v < 10000) { + const uint32_t d1 = (v / 100) << 1; + const uint32_t d2 = (v % 100) << 1; + + if (v >= 1000) + *buffer++ = cDigitsLut[d1]; + if (v >= 100) + *buffer++ = cDigitsLut[d1 + 1]; + if (v >= 10) + *buffer++ = cDigitsLut[d2]; + *buffer++ = cDigitsLut[d2 + 1]; + } + else { + // value = bbbbcccc + const uint32_t b = v / 10000; + const uint32_t c = v % 10000; + + const uint32_t d1 = (b / 100) << 1; + const uint32_t d2 = (b % 100) << 1; + + const uint32_t d3 = (c / 100) << 1; + const uint32_t d4 = (c % 100) << 1; + + if (value >= 10000000) + *buffer++ = cDigitsLut[d1]; + if (value >= 1000000) + *buffer++ = cDigitsLut[d1 + 1]; + if (value >= 100000) + *buffer++ = cDigitsLut[d2]; + *buffer++ = cDigitsLut[d2 + 1]; + + *buffer++ = cDigitsLut[d3]; + *buffer++ = cDigitsLut[d3 + 1]; + *buffer++ = cDigitsLut[d4]; + *buffer++ = cDigitsLut[d4 + 1]; + } + } + else if (value < kTen16) { + const uint32_t v0 = static_cast<uint32_t>(value / kTen8); + const uint32_t v1 = static_cast<uint32_t>(value % kTen8); + + const uint32_t b0 = v0 / 10000; + const uint32_t c0 = v0 % 10000; + + const uint32_t d1 = (b0 / 100) << 1; + const uint32_t d2 = (b0 % 100) << 1; + + const uint32_t d3 = (c0 / 100) << 1; + const uint32_t d4 = (c0 % 100) << 1; + + const uint32_t b1 = v1 / 10000; + const uint32_t c1 = v1 % 10000; + + const uint32_t d5 = (b1 / 100) << 1; + const uint32_t d6 = (b1 % 100) << 1; + + const uint32_t d7 = (c1 / 100) << 1; + const uint32_t d8 = (c1 % 100) << 1; + + if (value >= kTen15) + *buffer++ = cDigitsLut[d1]; + if (value >= kTen14) + *buffer++ = cDigitsLut[d1 + 1]; + if (value >= kTen13) + *buffer++ = cDigitsLut[d2]; + if (value >= kTen12) + *buffer++ = cDigitsLut[d2 + 1]; + if (value >= kTen11) + *buffer++ = cDigitsLut[d3]; + if (value >= kTen10) + *buffer++ = cDigitsLut[d3 + 1]; + if (value >= kTen9) + *buffer++ = cDigitsLut[d4]; + if (value >= kTen8) + *buffer++ = cDigitsLut[d4 + 1]; + + *buffer++ = cDigitsLut[d5]; + *buffer++ = cDigitsLut[d5 + 1]; + *buffer++ = cDigitsLut[d6]; + *buffer++ = cDigitsLut[d6 + 1]; + *buffer++ = cDigitsLut[d7]; + *buffer++ = cDigitsLut[d7 + 1]; + *buffer++ = cDigitsLut[d8]; + *buffer++ = cDigitsLut[d8 + 1]; + } + else { + const uint32_t a = static_cast<uint32_t>(value / kTen16); // 1 to 1844 + value %= kTen16; + + if (a < 10) + *buffer++ = static_cast<char>('0' + static_cast<char>(a)); + else if (a < 100) { + const uint32_t i = a << 1; + *buffer++ = cDigitsLut[i]; + *buffer++ = cDigitsLut[i + 1]; + } + else if (a < 1000) { + *buffer++ = static_cast<char>('0' + static_cast<char>(a / 100)); + + const uint32_t i = (a % 100) << 1; + *buffer++ = cDigitsLut[i]; + *buffer++ = cDigitsLut[i + 1]; + } + else { + const uint32_t i = (a / 100) << 1; + const uint32_t j = (a % 100) << 1; + *buffer++ = cDigitsLut[i]; + *buffer++ = cDigitsLut[i + 1]; + *buffer++ = cDigitsLut[j]; + *buffer++ = cDigitsLut[j + 1]; + } + + const uint32_t v0 = static_cast<uint32_t>(value / kTen8); + const uint32_t v1 = static_cast<uint32_t>(value % kTen8); + + const uint32_t b0 = v0 / 10000; + const uint32_t c0 = v0 % 10000; + + const uint32_t d1 = (b0 / 100) << 1; + const uint32_t d2 = (b0 % 100) << 1; + + const uint32_t d3 = (c0 / 100) << 1; + const uint32_t d4 = (c0 % 100) << 1; + + const uint32_t b1 = v1 / 10000; + const uint32_t c1 = v1 % 10000; + + const uint32_t d5 = (b1 / 100) << 1; + const uint32_t d6 = (b1 % 100) << 1; + + const uint32_t d7 = (c1 / 100) << 1; + const uint32_t d8 = (c1 % 100) << 1; + + *buffer++ = cDigitsLut[d1]; + *buffer++ = cDigitsLut[d1 + 1]; + *buffer++ = cDigitsLut[d2]; + *buffer++ = cDigitsLut[d2 + 1]; + *buffer++ = cDigitsLut[d3]; + *buffer++ = cDigitsLut[d3 + 1]; + *buffer++ = cDigitsLut[d4]; + *buffer++ = cDigitsLut[d4 + 1]; + *buffer++ = cDigitsLut[d5]; + *buffer++ = cDigitsLut[d5 + 1]; + *buffer++ = cDigitsLut[d6]; + *buffer++ = cDigitsLut[d6 + 1]; + *buffer++ = cDigitsLut[d7]; + *buffer++ = cDigitsLut[d7 + 1]; + *buffer++ = cDigitsLut[d8]; + *buffer++ = cDigitsLut[d8 + 1]; + } + + return buffer; +} + +inline char* i64toa(int64_t value, char* buffer) { + uint64_t u = static_cast<uint64_t>(value); + if (value < 0) { + *buffer++ = '-'; + u = ~u + 1; + } + + return u64toa(u, buffer); +} + +} // namespace internal +RAPIDJSON_NAMESPACE_END + +#endif // RAPIDJSON_ITOA_ diff --git a/xmrstak/rapidjson/internal/meta.h b/xmrstak/rapidjson/internal/meta.h new file mode 100644 index 0000000..5a9aaa4 --- /dev/null +++ b/xmrstak/rapidjson/internal/meta.h @@ -0,0 +1,181 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_INTERNAL_META_H_ +#define RAPIDJSON_INTERNAL_META_H_ + +#include "../rapidjson.h" + +#ifdef __GNUC__ +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(effc++) +#endif +#if defined(_MSC_VER) +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(6334) +#endif + +#if RAPIDJSON_HAS_CXX11_TYPETRAITS +#include <type_traits> +#endif + +//@cond RAPIDJSON_INTERNAL +RAPIDJSON_NAMESPACE_BEGIN +namespace internal { + +// Helper to wrap/convert arbitrary types to void, useful for arbitrary type matching +template <typename T> struct Void { typedef void Type; }; + +/////////////////////////////////////////////////////////////////////////////// +// BoolType, TrueType, FalseType +// +template <bool Cond> struct BoolType { + static const bool Value = Cond; + typedef BoolType Type; +}; +typedef BoolType<true> TrueType; +typedef BoolType<false> FalseType; + + +/////////////////////////////////////////////////////////////////////////////// +// SelectIf, BoolExpr, NotExpr, AndExpr, OrExpr +// + +template <bool C> struct SelectIfImpl { template <typename T1, typename T2> struct Apply { typedef T1 Type; }; }; +template <> struct SelectIfImpl<false> { template <typename T1, typename T2> struct Apply { typedef T2 Type; }; }; +template <bool C, typename T1, typename T2> struct SelectIfCond : SelectIfImpl<C>::template Apply<T1,T2> {}; +template <typename C, typename T1, typename T2> struct SelectIf : SelectIfCond<C::Value, T1, T2> {}; + +template <bool Cond1, bool Cond2> struct AndExprCond : FalseType {}; +template <> struct AndExprCond<true, true> : TrueType {}; +template <bool Cond1, bool Cond2> struct OrExprCond : TrueType {}; +template <> struct OrExprCond<false, false> : FalseType {}; + +template <typename C> struct BoolExpr : SelectIf<C,TrueType,FalseType>::Type {}; +template <typename C> struct NotExpr : SelectIf<C,FalseType,TrueType>::Type {}; +template <typename C1, typename C2> struct AndExpr : AndExprCond<C1::Value, C2::Value>::Type {}; +template <typename C1, typename C2> struct OrExpr : OrExprCond<C1::Value, C2::Value>::Type {}; + + +/////////////////////////////////////////////////////////////////////////////// +// AddConst, MaybeAddConst, RemoveConst +template <typename T> struct AddConst { typedef const T Type; }; +template <bool Constify, typename T> struct MaybeAddConst : SelectIfCond<Constify, const T, T> {}; +template <typename T> struct RemoveConst { typedef T Type; }; +template <typename T> struct RemoveConst<const T> { typedef T Type; }; + + +/////////////////////////////////////////////////////////////////////////////// +// IsSame, IsConst, IsMoreConst, IsPointer +// +template <typename T, typename U> struct IsSame : FalseType {}; +template <typename T> struct IsSame<T, T> : TrueType {}; + +template <typename T> struct IsConst : FalseType {}; +template <typename T> struct IsConst<const T> : TrueType {}; + +template <typename CT, typename T> +struct IsMoreConst + : AndExpr<IsSame<typename RemoveConst<CT>::Type, typename RemoveConst<T>::Type>, + BoolType<IsConst<CT>::Value >= IsConst<T>::Value> >::Type {}; + +template <typename T> struct IsPointer : FalseType {}; +template <typename T> struct IsPointer<T*> : TrueType {}; + +/////////////////////////////////////////////////////////////////////////////// +// IsBaseOf +// +#if RAPIDJSON_HAS_CXX11_TYPETRAITS + +template <typename B, typename D> struct IsBaseOf + : BoolType< ::std::is_base_of<B,D>::value> {}; + +#else // simplified version adopted from Boost + +template<typename B, typename D> struct IsBaseOfImpl { + RAPIDJSON_STATIC_ASSERT(sizeof(B) != 0); + RAPIDJSON_STATIC_ASSERT(sizeof(D) != 0); + + typedef char (&Yes)[1]; + typedef char (&No) [2]; + + template <typename T> + static Yes Check(const D*, T); + static No Check(const B*, int); + + struct Host { + operator const B*() const; + operator const D*(); + }; + + enum { Value = (sizeof(Check(Host(), 0)) == sizeof(Yes)) }; +}; + +template <typename B, typename D> struct IsBaseOf + : OrExpr<IsSame<B, D>, BoolExpr<IsBaseOfImpl<B, D> > >::Type {}; + +#endif // RAPIDJSON_HAS_CXX11_TYPETRAITS + + +////////////////////////////////////////////////////////////////////////// +// EnableIf / DisableIf +// +template <bool Condition, typename T = void> struct EnableIfCond { typedef T Type; }; +template <typename T> struct EnableIfCond<false, T> { /* empty */ }; + +template <bool Condition, typename T = void> struct DisableIfCond { typedef T Type; }; +template <typename T> struct DisableIfCond<true, T> { /* empty */ }; + +template <typename Condition, typename T = void> +struct EnableIf : EnableIfCond<Condition::Value, T> {}; + +template <typename Condition, typename T = void> +struct DisableIf : DisableIfCond<Condition::Value, T> {}; + +// SFINAE helpers +struct SfinaeTag {}; +template <typename T> struct RemoveSfinaeTag; +template <typename T> struct RemoveSfinaeTag<SfinaeTag&(*)(T)> { typedef T Type; }; + +#define RAPIDJSON_REMOVEFPTR_(type) \ + typename ::RAPIDJSON_NAMESPACE::internal::RemoveSfinaeTag \ + < ::RAPIDJSON_NAMESPACE::internal::SfinaeTag&(*) type>::Type + +#define RAPIDJSON_ENABLEIF(cond) \ + typename ::RAPIDJSON_NAMESPACE::internal::EnableIf \ + <RAPIDJSON_REMOVEFPTR_(cond)>::Type * = NULL + +#define RAPIDJSON_DISABLEIF(cond) \ + typename ::RAPIDJSON_NAMESPACE::internal::DisableIf \ + <RAPIDJSON_REMOVEFPTR_(cond)>::Type * = NULL + +#define RAPIDJSON_ENABLEIF_RETURN(cond,returntype) \ + typename ::RAPIDJSON_NAMESPACE::internal::EnableIf \ + <RAPIDJSON_REMOVEFPTR_(cond), \ + RAPIDJSON_REMOVEFPTR_(returntype)>::Type + +#define RAPIDJSON_DISABLEIF_RETURN(cond,returntype) \ + typename ::RAPIDJSON_NAMESPACE::internal::DisableIf \ + <RAPIDJSON_REMOVEFPTR_(cond), \ + RAPIDJSON_REMOVEFPTR_(returntype)>::Type + +} // namespace internal +RAPIDJSON_NAMESPACE_END +//@endcond + +#if defined(__GNUC__) || defined(_MSC_VER) +RAPIDJSON_DIAG_POP +#endif + +#endif // RAPIDJSON_INTERNAL_META_H_ diff --git a/xmrstak/rapidjson/internal/pow10.h b/xmrstak/rapidjson/internal/pow10.h new file mode 100644 index 0000000..02f475d --- /dev/null +++ b/xmrstak/rapidjson/internal/pow10.h @@ -0,0 +1,55 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_POW10_ +#define RAPIDJSON_POW10_ + +#include "../rapidjson.h" + +RAPIDJSON_NAMESPACE_BEGIN +namespace internal { + +//! Computes integer powers of 10 in double (10.0^n). +/*! This function uses lookup table for fast and accurate results. + \param n non-negative exponent. Must <= 308. + \return 10.0^n +*/ +inline double Pow10(int n) { + static const double e[] = { // 1e-0...1e308: 309 * 8 bytes = 2472 bytes + 1e+0, + 1e+1, 1e+2, 1e+3, 1e+4, 1e+5, 1e+6, 1e+7, 1e+8, 1e+9, 1e+10, 1e+11, 1e+12, 1e+13, 1e+14, 1e+15, 1e+16, 1e+17, 1e+18, 1e+19, 1e+20, + 1e+21, 1e+22, 1e+23, 1e+24, 1e+25, 1e+26, 1e+27, 1e+28, 1e+29, 1e+30, 1e+31, 1e+32, 1e+33, 1e+34, 1e+35, 1e+36, 1e+37, 1e+38, 1e+39, 1e+40, + 1e+41, 1e+42, 1e+43, 1e+44, 1e+45, 1e+46, 1e+47, 1e+48, 1e+49, 1e+50, 1e+51, 1e+52, 1e+53, 1e+54, 1e+55, 1e+56, 1e+57, 1e+58, 1e+59, 1e+60, + 1e+61, 1e+62, 1e+63, 1e+64, 1e+65, 1e+66, 1e+67, 1e+68, 1e+69, 1e+70, 1e+71, 1e+72, 1e+73, 1e+74, 1e+75, 1e+76, 1e+77, 1e+78, 1e+79, 1e+80, + 1e+81, 1e+82, 1e+83, 1e+84, 1e+85, 1e+86, 1e+87, 1e+88, 1e+89, 1e+90, 1e+91, 1e+92, 1e+93, 1e+94, 1e+95, 1e+96, 1e+97, 1e+98, 1e+99, 1e+100, + 1e+101,1e+102,1e+103,1e+104,1e+105,1e+106,1e+107,1e+108,1e+109,1e+110,1e+111,1e+112,1e+113,1e+114,1e+115,1e+116,1e+117,1e+118,1e+119,1e+120, + 1e+121,1e+122,1e+123,1e+124,1e+125,1e+126,1e+127,1e+128,1e+129,1e+130,1e+131,1e+132,1e+133,1e+134,1e+135,1e+136,1e+137,1e+138,1e+139,1e+140, + 1e+141,1e+142,1e+143,1e+144,1e+145,1e+146,1e+147,1e+148,1e+149,1e+150,1e+151,1e+152,1e+153,1e+154,1e+155,1e+156,1e+157,1e+158,1e+159,1e+160, + 1e+161,1e+162,1e+163,1e+164,1e+165,1e+166,1e+167,1e+168,1e+169,1e+170,1e+171,1e+172,1e+173,1e+174,1e+175,1e+176,1e+177,1e+178,1e+179,1e+180, + 1e+181,1e+182,1e+183,1e+184,1e+185,1e+186,1e+187,1e+188,1e+189,1e+190,1e+191,1e+192,1e+193,1e+194,1e+195,1e+196,1e+197,1e+198,1e+199,1e+200, + 1e+201,1e+202,1e+203,1e+204,1e+205,1e+206,1e+207,1e+208,1e+209,1e+210,1e+211,1e+212,1e+213,1e+214,1e+215,1e+216,1e+217,1e+218,1e+219,1e+220, + 1e+221,1e+222,1e+223,1e+224,1e+225,1e+226,1e+227,1e+228,1e+229,1e+230,1e+231,1e+232,1e+233,1e+234,1e+235,1e+236,1e+237,1e+238,1e+239,1e+240, + 1e+241,1e+242,1e+243,1e+244,1e+245,1e+246,1e+247,1e+248,1e+249,1e+250,1e+251,1e+252,1e+253,1e+254,1e+255,1e+256,1e+257,1e+258,1e+259,1e+260, + 1e+261,1e+262,1e+263,1e+264,1e+265,1e+266,1e+267,1e+268,1e+269,1e+270,1e+271,1e+272,1e+273,1e+274,1e+275,1e+276,1e+277,1e+278,1e+279,1e+280, + 1e+281,1e+282,1e+283,1e+284,1e+285,1e+286,1e+287,1e+288,1e+289,1e+290,1e+291,1e+292,1e+293,1e+294,1e+295,1e+296,1e+297,1e+298,1e+299,1e+300, + 1e+301,1e+302,1e+303,1e+304,1e+305,1e+306,1e+307,1e+308 + }; + RAPIDJSON_ASSERT(n >= 0 && n <= 308); + return e[n]; +} + +} // namespace internal +RAPIDJSON_NAMESPACE_END + +#endif // RAPIDJSON_POW10_ diff --git a/xmrstak/rapidjson/internal/regex.h b/xmrstak/rapidjson/internal/regex.h new file mode 100644 index 0000000..8530cd7 --- /dev/null +++ b/xmrstak/rapidjson/internal/regex.h @@ -0,0 +1,731 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_INTERNAL_REGEX_H_ +#define RAPIDJSON_INTERNAL_REGEX_H_ + +#include "../allocators.h" +#include "../stream.h" +#include "stack.h" + +#ifdef __clang__ +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(padded) +RAPIDJSON_DIAG_OFF(switch-enum) +RAPIDJSON_DIAG_OFF(implicit-fallthrough) +#endif + +#ifdef __GNUC__ +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(effc++) +#endif + +#ifdef _MSC_VER +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(4512) // assignment operator could not be generated +#endif + +#ifndef RAPIDJSON_REGEX_VERBOSE +#define RAPIDJSON_REGEX_VERBOSE 0 +#endif + +RAPIDJSON_NAMESPACE_BEGIN +namespace internal { + +/////////////////////////////////////////////////////////////////////////////// +// DecodedStream + +template <typename SourceStream, typename Encoding> +class DecodedStream { +public: + DecodedStream(SourceStream& ss) : ss_(ss), codepoint_() { Decode(); } + unsigned Peek() { return codepoint_; } + unsigned Take() { + unsigned c = codepoint_; + if (c) // No further decoding when '\0' + Decode(); + return c; + } + +private: + void Decode() { + if (!Encoding::Decode(ss_, &codepoint_)) + codepoint_ = 0; + } + + SourceStream& ss_; + unsigned codepoint_; +}; + +/////////////////////////////////////////////////////////////////////////////// +// GenericRegex + +static const SizeType kRegexInvalidState = ~SizeType(0); //!< Represents an invalid index in GenericRegex::State::out, out1 +static const SizeType kRegexInvalidRange = ~SizeType(0); + +template <typename Encoding, typename Allocator> +class GenericRegexSearch; + +//! Regular expression engine with subset of ECMAscript grammar. +/*! + Supported regular expression syntax: + - \c ab Concatenation + - \c a|b Alternation + - \c a? Zero or one + - \c a* Zero or more + - \c a+ One or more + - \c a{3} Exactly 3 times + - \c a{3,} At least 3 times + - \c a{3,5} 3 to 5 times + - \c (ab) Grouping + - \c ^a At the beginning + - \c a$ At the end + - \c . Any character + - \c [abc] Character classes + - \c [a-c] Character class range + - \c [a-z0-9_] Character class combination + - \c [^abc] Negated character classes + - \c [^a-c] Negated character class range + - \c [\b] Backspace (U+0008) + - \c \\| \\\\ ... Escape characters + - \c \\f Form feed (U+000C) + - \c \\n Line feed (U+000A) + - \c \\r Carriage return (U+000D) + - \c \\t Tab (U+0009) + - \c \\v Vertical tab (U+000B) + + \note This is a Thompson NFA engine, implemented with reference to + Cox, Russ. "Regular Expression Matching Can Be Simple And Fast (but is slow in Java, Perl, PHP, Python, Ruby,...).", + https://swtch.com/~rsc/regexp/regexp1.html +*/ +template <typename Encoding, typename Allocator = CrtAllocator> +class GenericRegex { +public: + typedef Encoding EncodingType; + typedef typename Encoding::Ch Ch; + template <typename, typename> friend class GenericRegexSearch; + + GenericRegex(const Ch* source, Allocator* allocator = 0) : + states_(allocator, 256), ranges_(allocator, 256), root_(kRegexInvalidState), stateCount_(), rangeCount_(), + anchorBegin_(), anchorEnd_() + { + GenericStringStream<Encoding> ss(source); + DecodedStream<GenericStringStream<Encoding>, Encoding> ds(ss); + Parse(ds); + } + + ~GenericRegex() {} + + bool IsValid() const { + return root_ != kRegexInvalidState; + } + +private: + enum Operator { + kZeroOrOne, + kZeroOrMore, + kOneOrMore, + kConcatenation, + kAlternation, + kLeftParenthesis + }; + + static const unsigned kAnyCharacterClass = 0xFFFFFFFF; //!< For '.' + static const unsigned kRangeCharacterClass = 0xFFFFFFFE; + static const unsigned kRangeNegationFlag = 0x80000000; + + struct Range { + unsigned start; // + unsigned end; + SizeType next; + }; + + struct State { + SizeType out; //!< Equals to kInvalid for matching state + SizeType out1; //!< Equals to non-kInvalid for split + SizeType rangeStart; + unsigned codepoint; + }; + + struct Frag { + Frag(SizeType s, SizeType o, SizeType m) : start(s), out(o), minIndex(m) {} + SizeType start; + SizeType out; //!< link-list of all output states + SizeType minIndex; + }; + + State& GetState(SizeType index) { + RAPIDJSON_ASSERT(index < stateCount_); + return states_.template Bottom<State>()[index]; + } + + const State& GetState(SizeType index) const { + RAPIDJSON_ASSERT(index < stateCount_); + return states_.template Bottom<State>()[index]; + } + + Range& GetRange(SizeType index) { + RAPIDJSON_ASSERT(index < rangeCount_); + return ranges_.template Bottom<Range>()[index]; + } + + const Range& GetRange(SizeType index) const { + RAPIDJSON_ASSERT(index < rangeCount_); + return ranges_.template Bottom<Range>()[index]; + } + + template <typename InputStream> + void Parse(DecodedStream<InputStream, Encoding>& ds) { + Allocator allocator; + Stack<Allocator> operandStack(&allocator, 256); // Frag + Stack<Allocator> operatorStack(&allocator, 256); // Operator + Stack<Allocator> atomCountStack(&allocator, 256); // unsigned (Atom per parenthesis) + + *atomCountStack.template Push<unsigned>() = 0; + + unsigned codepoint; + while (ds.Peek() != 0) { + switch (codepoint = ds.Take()) { + case '^': + anchorBegin_ = true; + break; + + case '$': + anchorEnd_ = true; + break; + + case '|': + while (!operatorStack.Empty() && *operatorStack.template Top<Operator>() < kAlternation) + if (!Eval(operandStack, *operatorStack.template Pop<Operator>(1))) + return; + *operatorStack.template Push<Operator>() = kAlternation; + *atomCountStack.template Top<unsigned>() = 0; + break; + + case '(': + *operatorStack.template Push<Operator>() = kLeftParenthesis; + *atomCountStack.template Push<unsigned>() = 0; + break; + + case ')': + while (!operatorStack.Empty() && *operatorStack.template Top<Operator>() != kLeftParenthesis) + if (!Eval(operandStack, *operatorStack.template Pop<Operator>(1))) + return; + if (operatorStack.Empty()) + return; + operatorStack.template Pop<Operator>(1); + atomCountStack.template Pop<unsigned>(1); + ImplicitConcatenation(atomCountStack, operatorStack); + break; + + case '?': + if (!Eval(operandStack, kZeroOrOne)) + return; + break; + + case '*': + if (!Eval(operandStack, kZeroOrMore)) + return; + break; + + case '+': + if (!Eval(operandStack, kOneOrMore)) + return; + break; + + case '{': + { + unsigned n, m; + if (!ParseUnsigned(ds, &n)) + return; + + if (ds.Peek() == ',') { + ds.Take(); + if (ds.Peek() == '}') + m = kInfinityQuantifier; + else if (!ParseUnsigned(ds, &m) || m < n) + return; + } + else + m = n; + + if (!EvalQuantifier(operandStack, n, m) || ds.Peek() != '}') + return; + ds.Take(); + } + break; + + case '.': + PushOperand(operandStack, kAnyCharacterClass); + ImplicitConcatenation(atomCountStack, operatorStack); + break; + + case '[': + { + SizeType range; + if (!ParseRange(ds, &range)) + return; + SizeType s = NewState(kRegexInvalidState, kRegexInvalidState, kRangeCharacterClass); + GetState(s).rangeStart = range; + *operandStack.template Push<Frag>() = Frag(s, s, s); + } + ImplicitConcatenation(atomCountStack, operatorStack); + break; + + case '\\': // Escape character + if (!CharacterEscape(ds, &codepoint)) + return; // Unsupported escape character + // fall through to default + + default: // Pattern character + PushOperand(operandStack, codepoint); + ImplicitConcatenation(atomCountStack, operatorStack); + } + } + + while (!operatorStack.Empty()) + if (!Eval(operandStack, *operatorStack.template Pop<Operator>(1))) + return; + + // Link the operand to matching state. + if (operandStack.GetSize() == sizeof(Frag)) { + Frag* e = operandStack.template Pop<Frag>(1); + Patch(e->out, NewState(kRegexInvalidState, kRegexInvalidState, 0)); + root_ = e->start; + +#if RAPIDJSON_REGEX_VERBOSE + printf("root: %d\n", root_); + for (SizeType i = 0; i < stateCount_ ; i++) { + State& s = GetState(i); + printf("[%2d] out: %2d out1: %2d c: '%c'\n", i, s.out, s.out1, (char)s.codepoint); + } + printf("\n"); +#endif + } + } + + SizeType NewState(SizeType out, SizeType out1, unsigned codepoint) { + State* s = states_.template Push<State>(); + s->out = out; + s->out1 = out1; + s->codepoint = codepoint; + s->rangeStart = kRegexInvalidRange; + return stateCount_++; + } + + void PushOperand(Stack<Allocator>& operandStack, unsigned codepoint) { + SizeType s = NewState(kRegexInvalidState, kRegexInvalidState, codepoint); + *operandStack.template Push<Frag>() = Frag(s, s, s); + } + + void ImplicitConcatenation(Stack<Allocator>& atomCountStack, Stack<Allocator>& operatorStack) { + if (*atomCountStack.template Top<unsigned>()) + *operatorStack.template Push<Operator>() = kConcatenation; + (*atomCountStack.template Top<unsigned>())++; + } + + SizeType Append(SizeType l1, SizeType l2) { + SizeType old = l1; + while (GetState(l1).out != kRegexInvalidState) + l1 = GetState(l1).out; + GetState(l1).out = l2; + return old; + } + + void Patch(SizeType l, SizeType s) { + for (SizeType next; l != kRegexInvalidState; l = next) { + next = GetState(l).out; + GetState(l).out = s; + } + } + + bool Eval(Stack<Allocator>& operandStack, Operator op) { + switch (op) { + case kConcatenation: + RAPIDJSON_ASSERT(operandStack.GetSize() >= sizeof(Frag) * 2); + { + Frag e2 = *operandStack.template Pop<Frag>(1); + Frag e1 = *operandStack.template Pop<Frag>(1); + Patch(e1.out, e2.start); + *operandStack.template Push<Frag>() = Frag(e1.start, e2.out, Min(e1.minIndex, e2.minIndex)); + } + return true; + + case kAlternation: + if (operandStack.GetSize() >= sizeof(Frag) * 2) { + Frag e2 = *operandStack.template Pop<Frag>(1); + Frag e1 = *operandStack.template Pop<Frag>(1); + SizeType s = NewState(e1.start, e2.start, 0); + *operandStack.template Push<Frag>() = Frag(s, Append(e1.out, e2.out), Min(e1.minIndex, e2.minIndex)); + return true; + } + return false; + + case kZeroOrOne: + if (operandStack.GetSize() >= sizeof(Frag)) { + Frag e = *operandStack.template Pop<Frag>(1); + SizeType s = NewState(kRegexInvalidState, e.start, 0); + *operandStack.template Push<Frag>() = Frag(s, Append(e.out, s), e.minIndex); + return true; + } + return false; + + case kZeroOrMore: + if (operandStack.GetSize() >= sizeof(Frag)) { + Frag e = *operandStack.template Pop<Frag>(1); + SizeType s = NewState(kRegexInvalidState, e.start, 0); + Patch(e.out, s); + *operandStack.template Push<Frag>() = Frag(s, s, e.minIndex); + return true; + } + return false; + + default: + RAPIDJSON_ASSERT(op == kOneOrMore); + if (operandStack.GetSize() >= sizeof(Frag)) { + Frag e = *operandStack.template Pop<Frag>(1); + SizeType s = NewState(kRegexInvalidState, e.start, 0); + Patch(e.out, s); + *operandStack.template Push<Frag>() = Frag(e.start, s, e.minIndex); + return true; + } + return false; + } + } + + bool EvalQuantifier(Stack<Allocator>& operandStack, unsigned n, unsigned m) { + RAPIDJSON_ASSERT(n <= m); + RAPIDJSON_ASSERT(operandStack.GetSize() >= sizeof(Frag)); + + if (n == 0) { + if (m == 0) // a{0} not support + return false; + else if (m == kInfinityQuantifier) + Eval(operandStack, kZeroOrMore); // a{0,} -> a* + else { + Eval(operandStack, kZeroOrOne); // a{0,5} -> a? + for (unsigned i = 0; i < m - 1; i++) + CloneTopOperand(operandStack); // a{0,5} -> a? a? a? a? a? + for (unsigned i = 0; i < m - 1; i++) + Eval(operandStack, kConcatenation); // a{0,5} -> a?a?a?a?a? + } + return true; + } + + for (unsigned i = 0; i < n - 1; i++) // a{3} -> a a a + CloneTopOperand(operandStack); + + if (m == kInfinityQuantifier) + Eval(operandStack, kOneOrMore); // a{3,} -> a a a+ + else if (m > n) { + CloneTopOperand(operandStack); // a{3,5} -> a a a a + Eval(operandStack, kZeroOrOne); // a{3,5} -> a a a a? + for (unsigned i = n; i < m - 1; i++) + CloneTopOperand(operandStack); // a{3,5} -> a a a a? a? + for (unsigned i = n; i < m; i++) + Eval(operandStack, kConcatenation); // a{3,5} -> a a aa?a? + } + + for (unsigned i = 0; i < n - 1; i++) + Eval(operandStack, kConcatenation); // a{3} -> aaa, a{3,} -> aaa+, a{3.5} -> aaaa?a? + + return true; + } + + static SizeType Min(SizeType a, SizeType b) { return a < b ? a : b; } + + void CloneTopOperand(Stack<Allocator>& operandStack) { + const Frag src = *operandStack.template Top<Frag>(); // Copy constructor to prevent invalidation + SizeType count = stateCount_ - src.minIndex; // Assumes top operand contains states in [src->minIndex, stateCount_) + State* s = states_.template Push<State>(count); + memcpy(s, &GetState(src.minIndex), count * sizeof(State)); + for (SizeType j = 0; j < count; j++) { + if (s[j].out != kRegexInvalidState) + s[j].out += count; + if (s[j].out1 != kRegexInvalidState) + s[j].out1 += count; + } + *operandStack.template Push<Frag>() = Frag(src.start + count, src.out + count, src.minIndex + count); + stateCount_ += count; + } + + template <typename InputStream> + bool ParseUnsigned(DecodedStream<InputStream, Encoding>& ds, unsigned* u) { + unsigned r = 0; + if (ds.Peek() < '0' || ds.Peek() > '9') + return false; + while (ds.Peek() >= '0' && ds.Peek() <= '9') { + if (r >= 429496729 && ds.Peek() > '5') // 2^32 - 1 = 4294967295 + return false; // overflow + r = r * 10 + (ds.Take() - '0'); + } + *u = r; + return true; + } + + template <typename InputStream> + bool ParseRange(DecodedStream<InputStream, Encoding>& ds, SizeType* range) { + bool isBegin = true; + bool negate = false; + int step = 0; + SizeType start = kRegexInvalidRange; + SizeType current = kRegexInvalidRange; + unsigned codepoint; + while ((codepoint = ds.Take()) != 0) { + if (isBegin) { + isBegin = false; + if (codepoint == '^') { + negate = true; + continue; + } + } + + switch (codepoint) { + case ']': + if (start == kRegexInvalidRange) + return false; // Error: nothing inside [] + if (step == 2) { // Add trailing '-' + SizeType r = NewRange('-'); + RAPIDJSON_ASSERT(current != kRegexInvalidRange); + GetRange(current).next = r; + } + if (negate) + GetRange(start).start |= kRangeNegationFlag; + *range = start; + return true; + + case '\\': + if (ds.Peek() == 'b') { + ds.Take(); + codepoint = 0x0008; // Escape backspace character + } + else if (!CharacterEscape(ds, &codepoint)) + return false; + // fall through to default + + default: + switch (step) { + case 1: + if (codepoint == '-') { + step++; + break; + } + // fall through to step 0 for other characters + + case 0: + { + SizeType r = NewRange(codepoint); + if (current != kRegexInvalidRange) + GetRange(current).next = r; + if (start == kRegexInvalidRange) + start = r; + current = r; + } + step = 1; + break; + + default: + RAPIDJSON_ASSERT(step == 2); + GetRange(current).end = codepoint; + step = 0; + } + } + } + return false; + } + + SizeType NewRange(unsigned codepoint) { + Range* r = ranges_.template Push<Range>(); + r->start = r->end = codepoint; + r->next = kRegexInvalidRange; + return rangeCount_++; + } + + template <typename InputStream> + bool CharacterEscape(DecodedStream<InputStream, Encoding>& ds, unsigned* escapedCodepoint) { + unsigned codepoint; + switch (codepoint = ds.Take()) { + case '^': + case '$': + case '|': + case '(': + case ')': + case '?': + case '*': + case '+': + case '.': + case '[': + case ']': + case '{': + case '}': + case '\\': + *escapedCodepoint = codepoint; return true; + case 'f': *escapedCodepoint = 0x000C; return true; + case 'n': *escapedCodepoint = 0x000A; return true; + case 'r': *escapedCodepoint = 0x000D; return true; + case 't': *escapedCodepoint = 0x0009; return true; + case 'v': *escapedCodepoint = 0x000B; return true; + default: + return false; // Unsupported escape character + } + } + + Stack<Allocator> states_; + Stack<Allocator> ranges_; + SizeType root_; + SizeType stateCount_; + SizeType rangeCount_; + + static const unsigned kInfinityQuantifier = ~0u; + + // For SearchWithAnchoring() + bool anchorBegin_; + bool anchorEnd_; +}; + +template <typename RegexType, typename Allocator = CrtAllocator> +class GenericRegexSearch { +public: + typedef typename RegexType::EncodingType Encoding; + typedef typename Encoding::Ch Ch; + + GenericRegexSearch(const RegexType& regex, Allocator* allocator = 0) : + regex_(regex), allocator_(allocator), ownAllocator_(0), + state0_(allocator, 0), state1_(allocator, 0), stateSet_() + { + RAPIDJSON_ASSERT(regex_.IsValid()); + if (!allocator_) + ownAllocator_ = allocator_ = RAPIDJSON_NEW(Allocator()); + stateSet_ = static_cast<unsigned*>(allocator_->Malloc(GetStateSetSize())); + state0_.template Reserve<SizeType>(regex_.stateCount_); + state1_.template Reserve<SizeType>(regex_.stateCount_); + } + + ~GenericRegexSearch() { + Allocator::Free(stateSet_); + RAPIDJSON_DELETE(ownAllocator_); + } + + template <typename InputStream> + bool Match(InputStream& is) { + return SearchWithAnchoring(is, true, true); + } + + bool Match(const Ch* s) { + GenericStringStream<Encoding> is(s); + return Match(is); + } + + template <typename InputStream> + bool Search(InputStream& is) { + return SearchWithAnchoring(is, regex_.anchorBegin_, regex_.anchorEnd_); + } + + bool Search(const Ch* s) { + GenericStringStream<Encoding> is(s); + return Search(is); + } + +private: + typedef typename RegexType::State State; + typedef typename RegexType::Range Range; + + template <typename InputStream> + bool SearchWithAnchoring(InputStream& is, bool anchorBegin, bool anchorEnd) { + DecodedStream<InputStream, Encoding> ds(is); + + state0_.Clear(); + Stack<Allocator> *current = &state0_, *next = &state1_; + const size_t stateSetSize = GetStateSetSize(); + std::memset(stateSet_, 0, stateSetSize); + + bool matched = AddState(*current, regex_.root_); + unsigned codepoint; + while (!current->Empty() && (codepoint = ds.Take()) != 0) { + std::memset(stateSet_, 0, stateSetSize); + next->Clear(); + matched = false; + for (const SizeType* s = current->template Bottom<SizeType>(); s != current->template End<SizeType>(); ++s) { + const State& sr = regex_.GetState(*s); + if (sr.codepoint == codepoint || + sr.codepoint == RegexType::kAnyCharacterClass || + (sr.codepoint == RegexType::kRangeCharacterClass && MatchRange(sr.rangeStart, codepoint))) + { + matched = AddState(*next, sr.out) || matched; + if (!anchorEnd && matched) + return true; + } + if (!anchorBegin) + AddState(*next, regex_.root_); + } + internal::Swap(current, next); + } + + return matched; + } + + size_t GetStateSetSize() const { + return (regex_.stateCount_ + 31) / 32 * 4; + } + + // Return whether the added states is a match state + bool AddState(Stack<Allocator>& l, SizeType index) { + RAPIDJSON_ASSERT(index != kRegexInvalidState); + + const State& s = regex_.GetState(index); + if (s.out1 != kRegexInvalidState) { // Split + bool matched = AddState(l, s.out); + return AddState(l, s.out1) || matched; + } + else if (!(stateSet_[index >> 5] & (1 << (index & 31)))) { + stateSet_[index >> 5] |= (1 << (index & 31)); + *l.template PushUnsafe<SizeType>() = index; + } + return s.out == kRegexInvalidState; // by using PushUnsafe() above, we can ensure s is not validated due to reallocation. + } + + bool MatchRange(SizeType rangeIndex, unsigned codepoint) const { + bool yes = (regex_.GetRange(rangeIndex).start & RegexType::kRangeNegationFlag) == 0; + while (rangeIndex != kRegexInvalidRange) { + const Range& r = regex_.GetRange(rangeIndex); + if (codepoint >= (r.start & ~RegexType::kRangeNegationFlag) && codepoint <= r.end) + return yes; + rangeIndex = r.next; + } + return !yes; + } + + const RegexType& regex_; + Allocator* allocator_; + Allocator* ownAllocator_; + Stack<Allocator> state0_; + Stack<Allocator> state1_; + uint32_t* stateSet_; +}; + +typedef GenericRegex<UTF8<> > Regex; +typedef GenericRegexSearch<Regex> RegexSearch; + +} // namespace internal +RAPIDJSON_NAMESPACE_END + +#ifdef __clang__ +RAPIDJSON_DIAG_POP +#endif + +#ifdef _MSC_VER +RAPIDJSON_DIAG_POP +#endif + +#endif // RAPIDJSON_INTERNAL_REGEX_H_ diff --git a/xmrstak/rapidjson/internal/stack.h b/xmrstak/rapidjson/internal/stack.h new file mode 100644 index 0000000..022c9aa --- /dev/null +++ b/xmrstak/rapidjson/internal/stack.h @@ -0,0 +1,230 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_INTERNAL_STACK_H_ +#define RAPIDJSON_INTERNAL_STACK_H_ + +#include "../allocators.h" +#include "swap.h" + +#if defined(__clang__) +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(c++98-compat) +#endif + +RAPIDJSON_NAMESPACE_BEGIN +namespace internal { + +/////////////////////////////////////////////////////////////////////////////// +// Stack + +//! A type-unsafe stack for storing different types of data. +/*! \tparam Allocator Allocator for allocating stack memory. +*/ +template <typename Allocator> +class Stack { +public: + // Optimization note: Do not allocate memory for stack_ in constructor. + // Do it lazily when first Push() -> Expand() -> Resize(). + Stack(Allocator* allocator, size_t stackCapacity) : allocator_(allocator), ownAllocator_(0), stack_(0), stackTop_(0), stackEnd_(0), initialCapacity_(stackCapacity) { + } + +#if RAPIDJSON_HAS_CXX11_RVALUE_REFS + Stack(Stack&& rhs) + : allocator_(rhs.allocator_), + ownAllocator_(rhs.ownAllocator_), + stack_(rhs.stack_), + stackTop_(rhs.stackTop_), + stackEnd_(rhs.stackEnd_), + initialCapacity_(rhs.initialCapacity_) + { + rhs.allocator_ = 0; + rhs.ownAllocator_ = 0; + rhs.stack_ = 0; + rhs.stackTop_ = 0; + rhs.stackEnd_ = 0; + rhs.initialCapacity_ = 0; + } +#endif + + ~Stack() { + Destroy(); + } + +#if RAPIDJSON_HAS_CXX11_RVALUE_REFS + Stack& operator=(Stack&& rhs) { + if (&rhs != this) + { + Destroy(); + + allocator_ = rhs.allocator_; + ownAllocator_ = rhs.ownAllocator_; + stack_ = rhs.stack_; + stackTop_ = rhs.stackTop_; + stackEnd_ = rhs.stackEnd_; + initialCapacity_ = rhs.initialCapacity_; + + rhs.allocator_ = 0; + rhs.ownAllocator_ = 0; + rhs.stack_ = 0; + rhs.stackTop_ = 0; + rhs.stackEnd_ = 0; + rhs.initialCapacity_ = 0; + } + return *this; + } +#endif + + void Swap(Stack& rhs) RAPIDJSON_NOEXCEPT { + internal::Swap(allocator_, rhs.allocator_); + internal::Swap(ownAllocator_, rhs.ownAllocator_); + internal::Swap(stack_, rhs.stack_); + internal::Swap(stackTop_, rhs.stackTop_); + internal::Swap(stackEnd_, rhs.stackEnd_); + internal::Swap(initialCapacity_, rhs.initialCapacity_); + } + + void Clear() { stackTop_ = stack_; } + + void ShrinkToFit() { + if (Empty()) { + // If the stack is empty, completely deallocate the memory. + Allocator::Free(stack_); + stack_ = 0; + stackTop_ = 0; + stackEnd_ = 0; + } + else + Resize(GetSize()); + } + + // Optimization note: try to minimize the size of this function for force inline. + // Expansion is run very infrequently, so it is moved to another (probably non-inline) function. + template<typename T> + RAPIDJSON_FORCEINLINE void Reserve(size_t count = 1) { + // Expand the stack if needed + if (RAPIDJSON_UNLIKELY(stackTop_ + sizeof(T) * count > stackEnd_)) + Expand<T>(count); + } + + template<typename T> + RAPIDJSON_FORCEINLINE T* Push(size_t count = 1) { + Reserve<T>(count); + return PushUnsafe<T>(count); + } + + template<typename T> + RAPIDJSON_FORCEINLINE T* PushUnsafe(size_t count = 1) { + RAPIDJSON_ASSERT(stackTop_ + sizeof(T) * count <= stackEnd_); + T* ret = reinterpret_cast<T*>(stackTop_); + stackTop_ += sizeof(T) * count; + return ret; + } + + template<typename T> + T* Pop(size_t count) { + RAPIDJSON_ASSERT(GetSize() >= count * sizeof(T)); + stackTop_ -= count * sizeof(T); + return reinterpret_cast<T*>(stackTop_); + } + + template<typename T> + T* Top() { + RAPIDJSON_ASSERT(GetSize() >= sizeof(T)); + return reinterpret_cast<T*>(stackTop_ - sizeof(T)); + } + + template<typename T> + const T* Top() const { + RAPIDJSON_ASSERT(GetSize() >= sizeof(T)); + return reinterpret_cast<T*>(stackTop_ - sizeof(T)); + } + + template<typename T> + T* End() { return reinterpret_cast<T*>(stackTop_); } + + template<typename T> + const T* End() const { return reinterpret_cast<T*>(stackTop_); } + + template<typename T> + T* Bottom() { return reinterpret_cast<T*>(stack_); } + + template<typename T> + const T* Bottom() const { return reinterpret_cast<T*>(stack_); } + + bool HasAllocator() const { + return allocator_ != 0; + } + + Allocator& GetAllocator() { + RAPIDJSON_ASSERT(allocator_); + return *allocator_; + } + + bool Empty() const { return stackTop_ == stack_; } + size_t GetSize() const { return static_cast<size_t>(stackTop_ - stack_); } + size_t GetCapacity() const { return static_cast<size_t>(stackEnd_ - stack_); } + +private: + template<typename T> + void Expand(size_t count) { + // Only expand the capacity if the current stack exists. Otherwise just create a stack with initial capacity. + size_t newCapacity; + if (stack_ == 0) { + if (!allocator_) + ownAllocator_ = allocator_ = RAPIDJSON_NEW(Allocator()); + newCapacity = initialCapacity_; + } else { + newCapacity = GetCapacity(); + newCapacity += (newCapacity + 1) / 2; + } + size_t newSize = GetSize() + sizeof(T) * count; + if (newCapacity < newSize) + newCapacity = newSize; + + Resize(newCapacity); + } + + void Resize(size_t newCapacity) { + const size_t size = GetSize(); // Backup the current size + stack_ = static_cast<char*>(allocator_->Realloc(stack_, GetCapacity(), newCapacity)); + stackTop_ = stack_ + size; + stackEnd_ = stack_ + newCapacity; + } + + void Destroy() { + Allocator::Free(stack_); + RAPIDJSON_DELETE(ownAllocator_); // Only delete if it is owned by the stack + } + + // Prohibit copy constructor & assignment operator. + Stack(const Stack&); + Stack& operator=(const Stack&); + + Allocator* allocator_; + Allocator* ownAllocator_; + char *stack_; + char *stackTop_; + char *stackEnd_; + size_t initialCapacity_; +}; + +} // namespace internal +RAPIDJSON_NAMESPACE_END + +#if defined(__clang__) +RAPIDJSON_DIAG_POP +#endif + +#endif // RAPIDJSON_STACK_H_ diff --git a/xmrstak/rapidjson/internal/strfunc.h b/xmrstak/rapidjson/internal/strfunc.h new file mode 100644 index 0000000..de41d8f --- /dev/null +++ b/xmrstak/rapidjson/internal/strfunc.h @@ -0,0 +1,58 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_INTERNAL_STRFUNC_H_ +#define RAPIDJSON_INTERNAL_STRFUNC_H_ + +#include "../stream.h" + +RAPIDJSON_NAMESPACE_BEGIN +namespace internal { + +//! Custom strlen() which works on different character types. +/*! \tparam Ch Character type (e.g. char, wchar_t, short) + \param s Null-terminated input string. + \return Number of characters in the string. + \note This has the same semantics as strlen(), the return value is not number of Unicode codepoints. +*/ +template <typename Ch> +inline SizeType StrLen(const Ch* s) { + RAPIDJSON_ASSERT(s != 0); + const Ch* p = s; + while (*p) ++p; + return SizeType(p - s); +} + +//! Returns number of code points in a encoded string. +template<typename Encoding> +bool CountStringCodePoint(const typename Encoding::Ch* s, SizeType length, SizeType* outCount) { + RAPIDJSON_ASSERT(s != 0); + RAPIDJSON_ASSERT(outCount != 0); + GenericStringStream<Encoding> is(s); + const typename Encoding::Ch* end = s + length; + SizeType count = 0; + while (is.src_ < end) { + unsigned codepoint; + if (!Encoding::Decode(is, &codepoint)) + return false; + count++; + } + *outCount = count; + return true; +} + +} // namespace internal +RAPIDJSON_NAMESPACE_END + +#endif // RAPIDJSON_INTERNAL_STRFUNC_H_ diff --git a/xmrstak/rapidjson/internal/strtod.h b/xmrstak/rapidjson/internal/strtod.h new file mode 100644 index 0000000..289c413 --- /dev/null +++ b/xmrstak/rapidjson/internal/strtod.h @@ -0,0 +1,269 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_STRTOD_ +#define RAPIDJSON_STRTOD_ + +#include "ieee754.h" +#include "biginteger.h" +#include "diyfp.h" +#include "pow10.h" + +RAPIDJSON_NAMESPACE_BEGIN +namespace internal { + +inline double FastPath(double significand, int exp) { + if (exp < -308) + return 0.0; + else if (exp >= 0) + return significand * internal::Pow10(exp); + else + return significand / internal::Pow10(-exp); +} + +inline double StrtodNormalPrecision(double d, int p) { + if (p < -308) { + // Prevent expSum < -308, making Pow10(p) = 0 + d = FastPath(d, -308); + d = FastPath(d, p + 308); + } + else + d = FastPath(d, p); + return d; +} + +template <typename T> +inline T Min3(T a, T b, T c) { + T m = a; + if (m > b) m = b; + if (m > c) m = c; + return m; +} + +inline int CheckWithinHalfULP(double b, const BigInteger& d, int dExp) { + const Double db(b); + const uint64_t bInt = db.IntegerSignificand(); + const int bExp = db.IntegerExponent(); + const int hExp = bExp - 1; + + int dS_Exp2 = 0, dS_Exp5 = 0, bS_Exp2 = 0, bS_Exp5 = 0, hS_Exp2 = 0, hS_Exp5 = 0; + + // Adjust for decimal exponent + if (dExp >= 0) { + dS_Exp2 += dExp; + dS_Exp5 += dExp; + } + else { + bS_Exp2 -= dExp; + bS_Exp5 -= dExp; + hS_Exp2 -= dExp; + hS_Exp5 -= dExp; + } + + // Adjust for binary exponent + if (bExp >= 0) + bS_Exp2 += bExp; + else { + dS_Exp2 -= bExp; + hS_Exp2 -= bExp; + } + + // Adjust for half ulp exponent + if (hExp >= 0) + hS_Exp2 += hExp; + else { + dS_Exp2 -= hExp; + bS_Exp2 -= hExp; + } + + // Remove common power of two factor from all three scaled values + int common_Exp2 = Min3(dS_Exp2, bS_Exp2, hS_Exp2); + dS_Exp2 -= common_Exp2; + bS_Exp2 -= common_Exp2; + hS_Exp2 -= common_Exp2; + + BigInteger dS = d; + dS.MultiplyPow5(static_cast<unsigned>(dS_Exp5)) <<= static_cast<unsigned>(dS_Exp2); + + BigInteger bS(bInt); + bS.MultiplyPow5(static_cast<unsigned>(bS_Exp5)) <<= static_cast<unsigned>(bS_Exp2); + + BigInteger hS(1); + hS.MultiplyPow5(static_cast<unsigned>(hS_Exp5)) <<= static_cast<unsigned>(hS_Exp2); + + BigInteger delta(0); + dS.Difference(bS, &delta); + + return delta.Compare(hS); +} + +inline bool StrtodFast(double d, int p, double* result) { + // Use fast path for string-to-double conversion if possible + // see http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/ + if (p > 22 && p < 22 + 16) { + // Fast Path Cases In Disguise + d *= internal::Pow10(p - 22); + p = 22; + } + + if (p >= -22 && p <= 22 && d <= 9007199254740991.0) { // 2^53 - 1 + *result = FastPath(d, p); + return true; + } + else + return false; +} + +// Compute an approximation and see if it is within 1/2 ULP +inline bool StrtodDiyFp(const char* decimals, size_t length, size_t decimalPosition, int exp, double* result) { + uint64_t significand = 0; + size_t i = 0; // 2^64 - 1 = 18446744073709551615, 1844674407370955161 = 0x1999999999999999 + for (; i < length; i++) { + if (significand > RAPIDJSON_UINT64_C2(0x19999999, 0x99999999) || + (significand == RAPIDJSON_UINT64_C2(0x19999999, 0x99999999) && decimals[i] > '5')) + break; + significand = significand * 10u + static_cast<unsigned>(decimals[i] - '0'); + } + + if (i < length && decimals[i] >= '5') // Rounding + significand++; + + size_t remaining = length - i; + const unsigned kUlpShift = 3; + const unsigned kUlp = 1 << kUlpShift; + int64_t error = (remaining == 0) ? 0 : kUlp / 2; + + DiyFp v(significand, 0); + v = v.Normalize(); + error <<= -v.e; + + const int dExp = static_cast<int>(decimalPosition) - static_cast<int>(i) + exp; + + int actualExp; + DiyFp cachedPower = GetCachedPower10(dExp, &actualExp); + if (actualExp != dExp) { + static const DiyFp kPow10[] = { + DiyFp(RAPIDJSON_UINT64_C2(0xa0000000, 00000000), -60), // 10^1 + DiyFp(RAPIDJSON_UINT64_C2(0xc8000000, 00000000), -57), // 10^2 + DiyFp(RAPIDJSON_UINT64_C2(0xfa000000, 00000000), -54), // 10^3 + DiyFp(RAPIDJSON_UINT64_C2(0x9c400000, 00000000), -50), // 10^4 + DiyFp(RAPIDJSON_UINT64_C2(0xc3500000, 00000000), -47), // 10^5 + DiyFp(RAPIDJSON_UINT64_C2(0xf4240000, 00000000), -44), // 10^6 + DiyFp(RAPIDJSON_UINT64_C2(0x98968000, 00000000), -40) // 10^7 + }; + int adjustment = dExp - actualExp - 1; + RAPIDJSON_ASSERT(adjustment >= 0 && adjustment < 7); + v = v * kPow10[adjustment]; + if (length + static_cast<unsigned>(adjustment)> 19u) // has more digits than decimal digits in 64-bit + error += kUlp / 2; + } + + v = v * cachedPower; + + error += kUlp + (error == 0 ? 0 : 1); + + const int oldExp = v.e; + v = v.Normalize(); + error <<= oldExp - v.e; + + const unsigned effectiveSignificandSize = Double::EffectiveSignificandSize(64 + v.e); + unsigned precisionSize = 64 - effectiveSignificandSize; + if (precisionSize + kUlpShift >= 64) { + unsigned scaleExp = (precisionSize + kUlpShift) - 63; + v.f >>= scaleExp; + v.e += scaleExp; + error = (error >> scaleExp) + 1 + static_cast<int>(kUlp); + precisionSize -= scaleExp; + } + + DiyFp rounded(v.f >> precisionSize, v.e + static_cast<int>(precisionSize)); + const uint64_t precisionBits = (v.f & ((uint64_t(1) << precisionSize) - 1)) * kUlp; + const uint64_t halfWay = (uint64_t(1) << (precisionSize - 1)) * kUlp; + if (precisionBits >= halfWay + static_cast<unsigned>(error)) { + rounded.f++; + if (rounded.f & (DiyFp::kDpHiddenBit << 1)) { // rounding overflows mantissa (issue #340) + rounded.f >>= 1; + rounded.e++; + } + } + + *result = rounded.ToDouble(); + + return halfWay - static_cast<unsigned>(error) >= precisionBits || precisionBits >= halfWay + static_cast<unsigned>(error); +} + +inline double StrtodBigInteger(double approx, const char* decimals, size_t length, size_t decimalPosition, int exp) { + const BigInteger dInt(decimals, length); + const int dExp = static_cast<int>(decimalPosition) - static_cast<int>(length) + exp; + Double a(approx); + int cmp = CheckWithinHalfULP(a.Value(), dInt, dExp); + if (cmp < 0) + return a.Value(); // within half ULP + else if (cmp == 0) { + // Round towards even + if (a.Significand() & 1) + return a.NextPositiveDouble(); + else + return a.Value(); + } + else // adjustment + return a.NextPositiveDouble(); +} + +inline double StrtodFullPrecision(double d, int p, const char* decimals, size_t length, size_t decimalPosition, int exp) { + RAPIDJSON_ASSERT(d >= 0.0); + RAPIDJSON_ASSERT(length >= 1); + + double result; + if (StrtodFast(d, p, &result)) + return result; + + // Trim leading zeros + while (*decimals == '0' && length > 1) { + length--; + decimals++; + decimalPosition--; + } + + // Trim trailing zeros + while (decimals[length - 1] == '0' && length > 1) { + length--; + decimalPosition--; + exp++; + } + + // Trim right-most digits + const int kMaxDecimalDigit = 780; + if (static_cast<int>(length) > kMaxDecimalDigit) { + int delta = (static_cast<int>(length) - kMaxDecimalDigit); + exp += delta; + decimalPosition -= static_cast<unsigned>(delta); + length = kMaxDecimalDigit; + } + + // If too small, underflow to zero + if (int(length) + exp < -324) + return 0.0; + + if (StrtodDiyFp(decimals, length, decimalPosition, exp, &result)) + return result; + + // Use approximation from StrtodDiyFp and make adjustment with BigInteger comparison + return StrtodBigInteger(result, decimals, length, decimalPosition, exp); +} + +} // namespace internal +RAPIDJSON_NAMESPACE_END + +#endif // RAPIDJSON_STRTOD_ diff --git a/xmrstak/rapidjson/internal/swap.h b/xmrstak/rapidjson/internal/swap.h new file mode 100644 index 0000000..666e49f --- /dev/null +++ b/xmrstak/rapidjson/internal/swap.h @@ -0,0 +1,46 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_INTERNAL_SWAP_H_ +#define RAPIDJSON_INTERNAL_SWAP_H_ + +#include "../rapidjson.h" + +#if defined(__clang__) +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(c++98-compat) +#endif + +RAPIDJSON_NAMESPACE_BEGIN +namespace internal { + +//! Custom swap() to avoid dependency on C++ <algorithm> header +/*! \tparam T Type of the arguments to swap, should be instantiated with primitive C++ types only. + \note This has the same semantics as std::swap(). +*/ +template <typename T> +inline void Swap(T& a, T& b) RAPIDJSON_NOEXCEPT { + T tmp = a; + a = b; + b = tmp; +} + +} // namespace internal +RAPIDJSON_NAMESPACE_END + +#if defined(__clang__) +RAPIDJSON_DIAG_POP +#endif + +#endif // RAPIDJSON_INTERNAL_SWAP_H_ diff --git a/xmrstak/rapidjson/istreamwrapper.h b/xmrstak/rapidjson/istreamwrapper.h new file mode 100644 index 0000000..f5fe289 --- /dev/null +++ b/xmrstak/rapidjson/istreamwrapper.h @@ -0,0 +1,115 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_ISTREAMWRAPPER_H_ +#define RAPIDJSON_ISTREAMWRAPPER_H_ + +#include "stream.h" +#include <iosfwd> + +#ifdef __clang__ +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(padded) +#endif + +#ifdef _MSC_VER +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(4351) // new behavior: elements of array 'array' will be default initialized +#endif + +RAPIDJSON_NAMESPACE_BEGIN + +//! Wrapper of \c std::basic_istream into RapidJSON's Stream concept. +/*! + The classes can be wrapped including but not limited to: + + - \c std::istringstream + - \c std::stringstream + - \c std::wistringstream + - \c std::wstringstream + - \c std::ifstream + - \c std::fstream + - \c std::wifstream + - \c std::wfstream + + \tparam StreamType Class derived from \c std::basic_istream. +*/ + +template <typename StreamType> +class BasicIStreamWrapper { +public: + typedef typename StreamType::char_type Ch; + BasicIStreamWrapper(StreamType& stream) : stream_(stream), count_(), peekBuffer_() {} + + Ch Peek() const { + typename StreamType::int_type c = stream_.peek(); + return RAPIDJSON_LIKELY(c != StreamType::traits_type::eof()) ? static_cast<Ch>(c) : '\0'; + } + + Ch Take() { + typename StreamType::int_type c = stream_.get(); + if (RAPIDJSON_LIKELY(c != StreamType::traits_type::eof())) { + count_++; + return static_cast<Ch>(c); + } + else + return '\0'; + } + + // tellg() may return -1 when failed. So we count by ourself. + size_t Tell() const { return count_; } + + Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; } + void Put(Ch) { RAPIDJSON_ASSERT(false); } + void Flush() { RAPIDJSON_ASSERT(false); } + size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; } + + // For encoding detection only. + const Ch* Peek4() const { + RAPIDJSON_ASSERT(sizeof(Ch) == 1); // Only usable for byte stream. + int i; + bool hasError = false; + for (i = 0; i < 4; ++i) { + typename StreamType::int_type c = stream_.get(); + if (c == StreamType::traits_type::eof()) { + hasError = true; + stream_.clear(); + break; + } + peekBuffer_[i] = static_cast<Ch>(c); + } + for (--i; i >= 0; --i) + stream_.putback(peekBuffer_[i]); + return !hasError ? peekBuffer_ : 0; + } + +private: + BasicIStreamWrapper(const BasicIStreamWrapper&); + BasicIStreamWrapper& operator=(const BasicIStreamWrapper&); + + StreamType& stream_; + size_t count_; //!< Number of characters read. Note: + mutable Ch peekBuffer_[4]; +}; + +typedef BasicIStreamWrapper<std::istream> IStreamWrapper; +typedef BasicIStreamWrapper<std::wistream> WIStreamWrapper; + +#if defined(__clang__) || defined(_MSC_VER) +RAPIDJSON_DIAG_POP +#endif + +RAPIDJSON_NAMESPACE_END + +#endif // RAPIDJSON_ISTREAMWRAPPER_H_ diff --git a/xmrstak/rapidjson/memorybuffer.h b/xmrstak/rapidjson/memorybuffer.h new file mode 100644 index 0000000..39bee1d --- /dev/null +++ b/xmrstak/rapidjson/memorybuffer.h @@ -0,0 +1,70 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_MEMORYBUFFER_H_ +#define RAPIDJSON_MEMORYBUFFER_H_ + +#include "stream.h" +#include "internal/stack.h" + +RAPIDJSON_NAMESPACE_BEGIN + +//! Represents an in-memory output byte stream. +/*! + This class is mainly for being wrapped by EncodedOutputStream or AutoUTFOutputStream. + + It is similar to FileWriteBuffer but the destination is an in-memory buffer instead of a file. + + Differences between MemoryBuffer and StringBuffer: + 1. StringBuffer has Encoding but MemoryBuffer is only a byte buffer. + 2. StringBuffer::GetString() returns a null-terminated string. MemoryBuffer::GetBuffer() returns a buffer without terminator. + + \tparam Allocator type for allocating memory buffer. + \note implements Stream concept +*/ +template <typename Allocator = CrtAllocator> +struct GenericMemoryBuffer { + typedef char Ch; // byte + + GenericMemoryBuffer(Allocator* allocator = 0, size_t capacity = kDefaultCapacity) : stack_(allocator, capacity) {} + + void Put(Ch c) { *stack_.template Push<Ch>() = c; } + void Flush() {} + + void Clear() { stack_.Clear(); } + void ShrinkToFit() { stack_.ShrinkToFit(); } + Ch* Push(size_t count) { return stack_.template Push<Ch>(count); } + void Pop(size_t count) { stack_.template Pop<Ch>(count); } + + const Ch* GetBuffer() const { + return stack_.template Bottom<Ch>(); + } + + size_t GetSize() const { return stack_.GetSize(); } + + static const size_t kDefaultCapacity = 256; + mutable internal::Stack<Allocator> stack_; +}; + +typedef GenericMemoryBuffer<> MemoryBuffer; + +//! Implement specialized version of PutN() with memset() for better performance. +template<> +inline void PutN(MemoryBuffer& memoryBuffer, char c, size_t n) { + std::memset(memoryBuffer.stack_.Push<char>(n), c, n * sizeof(c)); +} + +RAPIDJSON_NAMESPACE_END + +#endif // RAPIDJSON_MEMORYBUFFER_H_ diff --git a/xmrstak/rapidjson/memorystream.h b/xmrstak/rapidjson/memorystream.h new file mode 100644 index 0000000..1d71d8a --- /dev/null +++ b/xmrstak/rapidjson/memorystream.h @@ -0,0 +1,71 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_MEMORYSTREAM_H_ +#define RAPIDJSON_MEMORYSTREAM_H_ + +#include "stream.h" + +#ifdef __clang__ +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(unreachable-code) +RAPIDJSON_DIAG_OFF(missing-noreturn) +#endif + +RAPIDJSON_NAMESPACE_BEGIN + +//! Represents an in-memory input byte stream. +/*! + This class is mainly for being wrapped by EncodedInputStream or AutoUTFInputStream. + + It is similar to FileReadBuffer but the source is an in-memory buffer instead of a file. + + Differences between MemoryStream and StringStream: + 1. StringStream has encoding but MemoryStream is a byte stream. + 2. MemoryStream needs size of the source buffer and the buffer don't need to be null terminated. StringStream assume null-terminated string as source. + 3. MemoryStream supports Peek4() for encoding detection. StringStream is specified with an encoding so it should not have Peek4(). + \note implements Stream concept +*/ +struct MemoryStream { + typedef char Ch; // byte + + MemoryStream(const Ch *src, size_t size) : src_(src), begin_(src), end_(src + size), size_(size) {} + + Ch Peek() const { return RAPIDJSON_UNLIKELY(src_ == end_) ? '\0' : *src_; } + Ch Take() { return RAPIDJSON_UNLIKELY(src_ == end_) ? '\0' : *src_++; } + size_t Tell() const { return static_cast<size_t>(src_ - begin_); } + + Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; } + void Put(Ch) { RAPIDJSON_ASSERT(false); } + void Flush() { RAPIDJSON_ASSERT(false); } + size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; } + + // For encoding detection only. + const Ch* Peek4() const { + return Tell() + 4 <= size_ ? src_ : 0; + } + + const Ch* src_; //!< Current read position. + const Ch* begin_; //!< Original head of the string. + const Ch* end_; //!< End of stream. + size_t size_; //!< Size of the stream. +}; + +RAPIDJSON_NAMESPACE_END + +#ifdef __clang__ +RAPIDJSON_DIAG_POP +#endif + +#endif // RAPIDJSON_MEMORYBUFFER_H_ diff --git a/xmrstak/rapidjson/msinttypes/inttypes.h b/xmrstak/rapidjson/msinttypes/inttypes.h new file mode 100644 index 0000000..1811128 --- /dev/null +++ b/xmrstak/rapidjson/msinttypes/inttypes.h @@ -0,0 +1,316 @@ +// ISO C9x compliant inttypes.h for Microsoft Visual Studio +// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 +// +// Copyright (c) 2006-2013 Alexander Chemeris +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the product nor the names of its contributors may +// be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED +// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO +// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +/////////////////////////////////////////////////////////////////////////////// + +// The above software in this distribution may have been modified by +// THL A29 Limited ("Tencent Modifications"). +// All Tencent Modifications are Copyright (C) 2015 THL A29 Limited. + +#ifndef _MSC_VER // [ +#error "Use this header only with Microsoft Visual C++ compilers!" +#endif // _MSC_VER ] + +#ifndef _MSC_INTTYPES_H_ // [ +#define _MSC_INTTYPES_H_ + +#if _MSC_VER > 1000 +#pragma once +#endif + +#include "stdint.h" + +// miloyip: VC supports inttypes.h since VC2013 +#if _MSC_VER >= 1800 +#include <inttypes.h> +#else + +// 7.8 Format conversion of integer types + +typedef struct { + intmax_t quot; + intmax_t rem; +} imaxdiv_t; + +// 7.8.1 Macros for format specifiers + +#if !defined(__cplusplus) || defined(__STDC_FORMAT_MACROS) // [ See footnote 185 at page 198 + +// The fprintf macros for signed integers are: +#define PRId8 "d" +#define PRIi8 "i" +#define PRIdLEAST8 "d" +#define PRIiLEAST8 "i" +#define PRIdFAST8 "d" +#define PRIiFAST8 "i" + +#define PRId16 "hd" +#define PRIi16 "hi" +#define PRIdLEAST16 "hd" +#define PRIiLEAST16 "hi" +#define PRIdFAST16 "hd" +#define PRIiFAST16 "hi" + +#define PRId32 "I32d" +#define PRIi32 "I32i" +#define PRIdLEAST32 "I32d" +#define PRIiLEAST32 "I32i" +#define PRIdFAST32 "I32d" +#define PRIiFAST32 "I32i" + +#define PRId64 "I64d" +#define PRIi64 "I64i" +#define PRIdLEAST64 "I64d" +#define PRIiLEAST64 "I64i" +#define PRIdFAST64 "I64d" +#define PRIiFAST64 "I64i" + +#define PRIdMAX "I64d" +#define PRIiMAX "I64i" + +#define PRIdPTR "Id" +#define PRIiPTR "Ii" + +// The fprintf macros for unsigned integers are: +#define PRIo8 "o" +#define PRIu8 "u" +#define PRIx8 "x" +#define PRIX8 "X" +#define PRIoLEAST8 "o" +#define PRIuLEAST8 "u" +#define PRIxLEAST8 "x" +#define PRIXLEAST8 "X" +#define PRIoFAST8 "o" +#define PRIuFAST8 "u" +#define PRIxFAST8 "x" +#define PRIXFAST8 "X" + +#define PRIo16 "ho" +#define PRIu16 "hu" +#define PRIx16 "hx" +#define PRIX16 "hX" +#define PRIoLEAST16 "ho" +#define PRIuLEAST16 "hu" +#define PRIxLEAST16 "hx" +#define PRIXLEAST16 "hX" +#define PRIoFAST16 "ho" +#define PRIuFAST16 "hu" +#define PRIxFAST16 "hx" +#define PRIXFAST16 "hX" + +#define PRIo32 "I32o" +#define PRIu32 "I32u" +#define PRIx32 "I32x" +#define PRIX32 "I32X" +#define PRIoLEAST32 "I32o" +#define PRIuLEAST32 "I32u" +#define PRIxLEAST32 "I32x" +#define PRIXLEAST32 "I32X" +#define PRIoFAST32 "I32o" +#define PRIuFAST32 "I32u" +#define PRIxFAST32 "I32x" +#define PRIXFAST32 "I32X" + +#define PRIo64 "I64o" +#define PRIu64 "I64u" +#define PRIx64 "I64x" +#define PRIX64 "I64X" +#define PRIoLEAST64 "I64o" +#define PRIuLEAST64 "I64u" +#define PRIxLEAST64 "I64x" +#define PRIXLEAST64 "I64X" +#define PRIoFAST64 "I64o" +#define PRIuFAST64 "I64u" +#define PRIxFAST64 "I64x" +#define PRIXFAST64 "I64X" + +#define PRIoMAX "I64o" +#define PRIuMAX "I64u" +#define PRIxMAX "I64x" +#define PRIXMAX "I64X" + +#define PRIoPTR "Io" +#define PRIuPTR "Iu" +#define PRIxPTR "Ix" +#define PRIXPTR "IX" + +// The fscanf macros for signed integers are: +#define SCNd8 "d" +#define SCNi8 "i" +#define SCNdLEAST8 "d" +#define SCNiLEAST8 "i" +#define SCNdFAST8 "d" +#define SCNiFAST8 "i" + +#define SCNd16 "hd" +#define SCNi16 "hi" +#define SCNdLEAST16 "hd" +#define SCNiLEAST16 "hi" +#define SCNdFAST16 "hd" +#define SCNiFAST16 "hi" + +#define SCNd32 "ld" +#define SCNi32 "li" +#define SCNdLEAST32 "ld" +#define SCNiLEAST32 "li" +#define SCNdFAST32 "ld" +#define SCNiFAST32 "li" + +#define SCNd64 "I64d" +#define SCNi64 "I64i" +#define SCNdLEAST64 "I64d" +#define SCNiLEAST64 "I64i" +#define SCNdFAST64 "I64d" +#define SCNiFAST64 "I64i" + +#define SCNdMAX "I64d" +#define SCNiMAX "I64i" + +#ifdef _WIN64 // [ +# define SCNdPTR "I64d" +# define SCNiPTR "I64i" +#else // _WIN64 ][ +# define SCNdPTR "ld" +# define SCNiPTR "li" +#endif // _WIN64 ] + +// The fscanf macros for unsigned integers are: +#define SCNo8 "o" +#define SCNu8 "u" +#define SCNx8 "x" +#define SCNX8 "X" +#define SCNoLEAST8 "o" +#define SCNuLEAST8 "u" +#define SCNxLEAST8 "x" +#define SCNXLEAST8 "X" +#define SCNoFAST8 "o" +#define SCNuFAST8 "u" +#define SCNxFAST8 "x" +#define SCNXFAST8 "X" + +#define SCNo16 "ho" +#define SCNu16 "hu" +#define SCNx16 "hx" +#define SCNX16 "hX" +#define SCNoLEAST16 "ho" +#define SCNuLEAST16 "hu" +#define SCNxLEAST16 "hx" +#define SCNXLEAST16 "hX" +#define SCNoFAST16 "ho" +#define SCNuFAST16 "hu" +#define SCNxFAST16 "hx" +#define SCNXFAST16 "hX" + +#define SCNo32 "lo" +#define SCNu32 "lu" +#define SCNx32 "lx" +#define SCNX32 "lX" +#define SCNoLEAST32 "lo" +#define SCNuLEAST32 "lu" +#define SCNxLEAST32 "lx" +#define SCNXLEAST32 "lX" +#define SCNoFAST32 "lo" +#define SCNuFAST32 "lu" +#define SCNxFAST32 "lx" +#define SCNXFAST32 "lX" + +#define SCNo64 "I64o" +#define SCNu64 "I64u" +#define SCNx64 "I64x" +#define SCNX64 "I64X" +#define SCNoLEAST64 "I64o" +#define SCNuLEAST64 "I64u" +#define SCNxLEAST64 "I64x" +#define SCNXLEAST64 "I64X" +#define SCNoFAST64 "I64o" +#define SCNuFAST64 "I64u" +#define SCNxFAST64 "I64x" +#define SCNXFAST64 "I64X" + +#define SCNoMAX "I64o" +#define SCNuMAX "I64u" +#define SCNxMAX "I64x" +#define SCNXMAX "I64X" + +#ifdef _WIN64 // [ +# define SCNoPTR "I64o" +# define SCNuPTR "I64u" +# define SCNxPTR "I64x" +# define SCNXPTR "I64X" +#else // _WIN64 ][ +# define SCNoPTR "lo" +# define SCNuPTR "lu" +# define SCNxPTR "lx" +# define SCNXPTR "lX" +#endif // _WIN64 ] + +#endif // __STDC_FORMAT_MACROS ] + +// 7.8.2 Functions for greatest-width integer types + +// 7.8.2.1 The imaxabs function +#define imaxabs _abs64 + +// 7.8.2.2 The imaxdiv function + +// This is modified version of div() function from Microsoft's div.c found +// in %MSVC.NET%\crt\src\div.c +#ifdef STATIC_IMAXDIV // [ +static +#else // STATIC_IMAXDIV ][ +_inline +#endif // STATIC_IMAXDIV ] +imaxdiv_t __cdecl imaxdiv(intmax_t numer, intmax_t denom) +{ + imaxdiv_t result; + + result.quot = numer / denom; + result.rem = numer % denom; + + if (numer < 0 && result.rem > 0) { + // did division wrong; must fix up + ++result.quot; + result.rem -= denom; + } + + return result; +} + +// 7.8.2.3 The strtoimax and strtoumax functions +#define strtoimax _strtoi64 +#define strtoumax _strtoui64 + +// 7.8.2.4 The wcstoimax and wcstoumax functions +#define wcstoimax _wcstoi64 +#define wcstoumax _wcstoui64 + +#endif // _MSC_VER >= 1800 + +#endif // _MSC_INTTYPES_H_ ] diff --git a/xmrstak/rapidjson/msinttypes/stdint.h b/xmrstak/rapidjson/msinttypes/stdint.h new file mode 100644 index 0000000..3d4477b --- /dev/null +++ b/xmrstak/rapidjson/msinttypes/stdint.h @@ -0,0 +1,300 @@ +// ISO C9x compliant stdint.h for Microsoft Visual Studio +// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 +// +// Copyright (c) 2006-2013 Alexander Chemeris +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the product nor the names of its contributors may +// be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED +// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO +// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +/////////////////////////////////////////////////////////////////////////////// + +// The above software in this distribution may have been modified by +// THL A29 Limited ("Tencent Modifications"). +// All Tencent Modifications are Copyright (C) 2015 THL A29 Limited. + +#ifndef _MSC_VER // [ +#error "Use this header only with Microsoft Visual C++ compilers!" +#endif // _MSC_VER ] + +#ifndef _MSC_STDINT_H_ // [ +#define _MSC_STDINT_H_ + +#if _MSC_VER > 1000 +#pragma once +#endif + +// miloyip: Originally Visual Studio 2010 uses its own stdint.h. However it generates warning with INT64_C(), so change to use this file for vs2010. +#if _MSC_VER >= 1600 // [ +#include <stdint.h> + +#if !defined(__cplusplus) || defined(__STDC_CONSTANT_MACROS) // [ See footnote 224 at page 260 + +#undef INT8_C +#undef INT16_C +#undef INT32_C +#undef INT64_C +#undef UINT8_C +#undef UINT16_C +#undef UINT32_C +#undef UINT64_C + +// 7.18.4.1 Macros for minimum-width integer constants + +#define INT8_C(val) val##i8 +#define INT16_C(val) val##i16 +#define INT32_C(val) val##i32 +#define INT64_C(val) val##i64 + +#define UINT8_C(val) val##ui8 +#define UINT16_C(val) val##ui16 +#define UINT32_C(val) val##ui32 +#define UINT64_C(val) val##ui64 + +// 7.18.4.2 Macros for greatest-width integer constants +// These #ifndef's are needed to prevent collisions with <boost/cstdint.hpp>. +// Check out Issue 9 for the details. +#ifndef INTMAX_C // [ +# define INTMAX_C INT64_C +#endif // INTMAX_C ] +#ifndef UINTMAX_C // [ +# define UINTMAX_C UINT64_C +#endif // UINTMAX_C ] + +#endif // __STDC_CONSTANT_MACROS ] + +#else // ] _MSC_VER >= 1700 [ + +#include <limits.h> + +// For Visual Studio 6 in C++ mode and for many Visual Studio versions when +// compiling for ARM we have to wrap <wchar.h> include with 'extern "C++" {}' +// or compiler would give many errors like this: +// error C2733: second C linkage of overloaded function 'wmemchr' not allowed +#if defined(__cplusplus) && !defined(_M_ARM) +extern "C" { +#endif +# include <wchar.h> +#if defined(__cplusplus) && !defined(_M_ARM) +} +#endif + +// Define _W64 macros to mark types changing their size, like intptr_t. +#ifndef _W64 +# if !defined(__midl) && (defined(_X86_) || defined(_M_IX86)) && _MSC_VER >= 1300 +# define _W64 __w64 +# else +# define _W64 +# endif +#endif + + +// 7.18.1 Integer types + +// 7.18.1.1 Exact-width integer types + +// Visual Studio 6 and Embedded Visual C++ 4 doesn't +// realize that, e.g. char has the same size as __int8 +// so we give up on __intX for them. +#if (_MSC_VER < 1300) + typedef signed char int8_t; + typedef signed short int16_t; + typedef signed int int32_t; + typedef unsigned char uint8_t; + typedef unsigned short uint16_t; + typedef unsigned int uint32_t; +#else + typedef signed __int8 int8_t; + typedef signed __int16 int16_t; + typedef signed __int32 int32_t; + typedef unsigned __int8 uint8_t; + typedef unsigned __int16 uint16_t; + typedef unsigned __int32 uint32_t; +#endif +typedef signed __int64 int64_t; +typedef unsigned __int64 uint64_t; + + +// 7.18.1.2 Minimum-width integer types +typedef int8_t int_least8_t; +typedef int16_t int_least16_t; +typedef int32_t int_least32_t; +typedef int64_t int_least64_t; +typedef uint8_t uint_least8_t; +typedef uint16_t uint_least16_t; +typedef uint32_t uint_least32_t; +typedef uint64_t uint_least64_t; + +// 7.18.1.3 Fastest minimum-width integer types +typedef int8_t int_fast8_t; +typedef int16_t int_fast16_t; +typedef int32_t int_fast32_t; +typedef int64_t int_fast64_t; +typedef uint8_t uint_fast8_t; +typedef uint16_t uint_fast16_t; +typedef uint32_t uint_fast32_t; +typedef uint64_t uint_fast64_t; + +// 7.18.1.4 Integer types capable of holding object pointers +#ifdef _WIN64 // [ + typedef signed __int64 intptr_t; + typedef unsigned __int64 uintptr_t; +#else // _WIN64 ][ + typedef _W64 signed int intptr_t; + typedef _W64 unsigned int uintptr_t; +#endif // _WIN64 ] + +// 7.18.1.5 Greatest-width integer types +typedef int64_t intmax_t; +typedef uint64_t uintmax_t; + + +// 7.18.2 Limits of specified-width integer types + +#if !defined(__cplusplus) || defined(__STDC_LIMIT_MACROS) // [ See footnote 220 at page 257 and footnote 221 at page 259 + +// 7.18.2.1 Limits of exact-width integer types +#define INT8_MIN ((int8_t)_I8_MIN) +#define INT8_MAX _I8_MAX +#define INT16_MIN ((int16_t)_I16_MIN) +#define INT16_MAX _I16_MAX +#define INT32_MIN ((int32_t)_I32_MIN) +#define INT32_MAX _I32_MAX +#define INT64_MIN ((int64_t)_I64_MIN) +#define INT64_MAX _I64_MAX +#define UINT8_MAX _UI8_MAX +#define UINT16_MAX _UI16_MAX +#define UINT32_MAX _UI32_MAX +#define UINT64_MAX _UI64_MAX + +// 7.18.2.2 Limits of minimum-width integer types +#define INT_LEAST8_MIN INT8_MIN +#define INT_LEAST8_MAX INT8_MAX +#define INT_LEAST16_MIN INT16_MIN +#define INT_LEAST16_MAX INT16_MAX +#define INT_LEAST32_MIN INT32_MIN +#define INT_LEAST32_MAX INT32_MAX +#define INT_LEAST64_MIN INT64_MIN +#define INT_LEAST64_MAX INT64_MAX +#define UINT_LEAST8_MAX UINT8_MAX +#define UINT_LEAST16_MAX UINT16_MAX +#define UINT_LEAST32_MAX UINT32_MAX +#define UINT_LEAST64_MAX UINT64_MAX + +// 7.18.2.3 Limits of fastest minimum-width integer types +#define INT_FAST8_MIN INT8_MIN +#define INT_FAST8_MAX INT8_MAX +#define INT_FAST16_MIN INT16_MIN +#define INT_FAST16_MAX INT16_MAX +#define INT_FAST32_MIN INT32_MIN +#define INT_FAST32_MAX INT32_MAX +#define INT_FAST64_MIN INT64_MIN +#define INT_FAST64_MAX INT64_MAX +#define UINT_FAST8_MAX UINT8_MAX +#define UINT_FAST16_MAX UINT16_MAX +#define UINT_FAST32_MAX UINT32_MAX +#define UINT_FAST64_MAX UINT64_MAX + +// 7.18.2.4 Limits of integer types capable of holding object pointers +#ifdef _WIN64 // [ +# define INTPTR_MIN INT64_MIN +# define INTPTR_MAX INT64_MAX +# define UINTPTR_MAX UINT64_MAX +#else // _WIN64 ][ +# define INTPTR_MIN INT32_MIN +# define INTPTR_MAX INT32_MAX +# define UINTPTR_MAX UINT32_MAX +#endif // _WIN64 ] + +// 7.18.2.5 Limits of greatest-width integer types +#define INTMAX_MIN INT64_MIN +#define INTMAX_MAX INT64_MAX +#define UINTMAX_MAX UINT64_MAX + +// 7.18.3 Limits of other integer types + +#ifdef _WIN64 // [ +# define PTRDIFF_MIN _I64_MIN +# define PTRDIFF_MAX _I64_MAX +#else // _WIN64 ][ +# define PTRDIFF_MIN _I32_MIN +# define PTRDIFF_MAX _I32_MAX +#endif // _WIN64 ] + +#define SIG_ATOMIC_MIN INT_MIN +#define SIG_ATOMIC_MAX INT_MAX + +#ifndef SIZE_MAX // [ +# ifdef _WIN64 // [ +# define SIZE_MAX _UI64_MAX +# else // _WIN64 ][ +# define SIZE_MAX _UI32_MAX +# endif // _WIN64 ] +#endif // SIZE_MAX ] + +// WCHAR_MIN and WCHAR_MAX are also defined in <wchar.h> +#ifndef WCHAR_MIN // [ +# define WCHAR_MIN 0 +#endif // WCHAR_MIN ] +#ifndef WCHAR_MAX // [ +# define WCHAR_MAX _UI16_MAX +#endif // WCHAR_MAX ] + +#define WINT_MIN 0 +#define WINT_MAX _UI16_MAX + +#endif // __STDC_LIMIT_MACROS ] + + +// 7.18.4 Limits of other integer types + +#if !defined(__cplusplus) || defined(__STDC_CONSTANT_MACROS) // [ See footnote 224 at page 260 + +// 7.18.4.1 Macros for minimum-width integer constants + +#define INT8_C(val) val##i8 +#define INT16_C(val) val##i16 +#define INT32_C(val) val##i32 +#define INT64_C(val) val##i64 + +#define UINT8_C(val) val##ui8 +#define UINT16_C(val) val##ui16 +#define UINT32_C(val) val##ui32 +#define UINT64_C(val) val##ui64 + +// 7.18.4.2 Macros for greatest-width integer constants +// These #ifndef's are needed to prevent collisions with <boost/cstdint.hpp>. +// Check out Issue 9 for the details. +#ifndef INTMAX_C // [ +# define INTMAX_C INT64_C +#endif // INTMAX_C ] +#ifndef UINTMAX_C // [ +# define UINTMAX_C UINT64_C +#endif // UINTMAX_C ] + +#endif // __STDC_CONSTANT_MACROS ] + +#endif // _MSC_VER >= 1600 ] + +#endif // _MSC_STDINT_H_ ] diff --git a/xmrstak/rapidjson/ostreamwrapper.h b/xmrstak/rapidjson/ostreamwrapper.h new file mode 100644 index 0000000..6f4667c --- /dev/null +++ b/xmrstak/rapidjson/ostreamwrapper.h @@ -0,0 +1,81 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_OSTREAMWRAPPER_H_ +#define RAPIDJSON_OSTREAMWRAPPER_H_ + +#include "stream.h" +#include <iosfwd> + +#ifdef __clang__ +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(padded) +#endif + +RAPIDJSON_NAMESPACE_BEGIN + +//! Wrapper of \c std::basic_ostream into RapidJSON's Stream concept. +/*! + The classes can be wrapped including but not limited to: + + - \c std::ostringstream + - \c std::stringstream + - \c std::wpstringstream + - \c std::wstringstream + - \c std::ifstream + - \c std::fstream + - \c std::wofstream + - \c std::wfstream + + \tparam StreamType Class derived from \c std::basic_ostream. +*/ + +template <typename StreamType> +class BasicOStreamWrapper { +public: + typedef typename StreamType::char_type Ch; + BasicOStreamWrapper(StreamType& stream) : stream_(stream) {} + + void Put(Ch c) { + stream_.put(c); + } + + void Flush() { + stream_.flush(); + } + + // Not implemented + char Peek() const { RAPIDJSON_ASSERT(false); return 0; } + char Take() { RAPIDJSON_ASSERT(false); return 0; } + size_t Tell() const { RAPIDJSON_ASSERT(false); return 0; } + char* PutBegin() { RAPIDJSON_ASSERT(false); return 0; } + size_t PutEnd(char*) { RAPIDJSON_ASSERT(false); return 0; } + +private: + BasicOStreamWrapper(const BasicOStreamWrapper&); + BasicOStreamWrapper& operator=(const BasicOStreamWrapper&); + + StreamType& stream_; +}; + +typedef BasicOStreamWrapper<std::ostream> OStreamWrapper; +typedef BasicOStreamWrapper<std::wostream> WOStreamWrapper; + +#ifdef __clang__ +RAPIDJSON_DIAG_POP +#endif + +RAPIDJSON_NAMESPACE_END + +#endif // RAPIDJSON_OSTREAMWRAPPER_H_ diff --git a/xmrstak/rapidjson/pointer.h b/xmrstak/rapidjson/pointer.h new file mode 100644 index 0000000..0206ac1 --- /dev/null +++ b/xmrstak/rapidjson/pointer.h @@ -0,0 +1,1358 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_POINTER_H_ +#define RAPIDJSON_POINTER_H_ + +#include "document.h" +#include "internal/itoa.h" + +#ifdef __clang__ +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(switch-enum) +#endif + +#ifdef _MSC_VER +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(4512) // assignment operator could not be generated +#endif + +RAPIDJSON_NAMESPACE_BEGIN + +static const SizeType kPointerInvalidIndex = ~SizeType(0); //!< Represents an invalid index in GenericPointer::Token + +//! Error code of parsing. +/*! \ingroup RAPIDJSON_ERRORS + \see GenericPointer::GenericPointer, GenericPointer::GetParseErrorCode +*/ +enum PointerParseErrorCode { + kPointerParseErrorNone = 0, //!< The parse is successful + + kPointerParseErrorTokenMustBeginWithSolidus, //!< A token must begin with a '/' + kPointerParseErrorInvalidEscape, //!< Invalid escape + kPointerParseErrorInvalidPercentEncoding, //!< Invalid percent encoding in URI fragment + kPointerParseErrorCharacterMustPercentEncode //!< A character must percent encoded in URI fragment +}; + +/////////////////////////////////////////////////////////////////////////////// +// GenericPointer + +//! Represents a JSON Pointer. Use Pointer for UTF8 encoding and default allocator. +/*! + This class implements RFC 6901 "JavaScript Object Notation (JSON) Pointer" + (https://tools.ietf.org/html/rfc6901). + + A JSON pointer is for identifying a specific value in a JSON document + (GenericDocument). It can simplify coding of DOM tree manipulation, because it + can access multiple-level depth of DOM tree with single API call. + + After it parses a string representation (e.g. "/foo/0" or URI fragment + representation (e.g. "#/foo/0") into its internal representation (tokens), + it can be used to resolve a specific value in multiple documents, or sub-tree + of documents. + + Contrary to GenericValue, Pointer can be copy constructed and copy assigned. + Apart from assignment, a Pointer cannot be modified after construction. + + Although Pointer is very convenient, please aware that constructing Pointer + involves parsing and dynamic memory allocation. A special constructor with user- + supplied tokens eliminates these. + + GenericPointer depends on GenericDocument and GenericValue. + + \tparam ValueType The value type of the DOM tree. E.g. GenericValue<UTF8<> > + \tparam Allocator The allocator type for allocating memory for internal representation. + + \note GenericPointer uses same encoding of ValueType. + However, Allocator of GenericPointer is independent of Allocator of Value. +*/ +template <typename ValueType, typename Allocator = CrtAllocator> +class GenericPointer { +public: + typedef typename ValueType::EncodingType EncodingType; //!< Encoding type from Value + typedef typename ValueType::Ch Ch; //!< Character type from Value + + //! A token is the basic units of internal representation. + /*! + A JSON pointer string representation "/foo/123" is parsed to two tokens: + "foo" and 123. 123 will be represented in both numeric form and string form. + They are resolved according to the actual value type (object or array). + + For token that are not numbers, or the numeric value is out of bound + (greater than limits of SizeType), they are only treated as string form + (i.e. the token's index will be equal to kPointerInvalidIndex). + + This struct is public so that user can create a Pointer without parsing and + allocation, using a special constructor. + */ + struct Token { + const Ch* name; //!< Name of the token. It has null character at the end but it can contain null character. + SizeType length; //!< Length of the name. + SizeType index; //!< A valid array index, if it is not equal to kPointerInvalidIndex. + }; + + //!@name Constructors and destructor. + //@{ + + //! Default constructor. + GenericPointer(Allocator* allocator = 0) : allocator_(allocator), ownAllocator_(), nameBuffer_(), tokens_(), tokenCount_(), parseErrorOffset_(), parseErrorCode_(kPointerParseErrorNone) {} + + //! Constructor that parses a string or URI fragment representation. + /*! + \param source A null-terminated, string or URI fragment representation of JSON pointer. + \param allocator User supplied allocator for this pointer. If no allocator is provided, it creates a self-owned one. + */ + explicit GenericPointer(const Ch* source, Allocator* allocator = 0) : allocator_(allocator), ownAllocator_(), nameBuffer_(), tokens_(), tokenCount_(), parseErrorOffset_(), parseErrorCode_(kPointerParseErrorNone) { + Parse(source, internal::StrLen(source)); + } + +#if RAPIDJSON_HAS_STDSTRING + //! Constructor that parses a string or URI fragment representation. + /*! + \param source A string or URI fragment representation of JSON pointer. + \param allocator User supplied allocator for this pointer. If no allocator is provided, it creates a self-owned one. + \note Requires the definition of the preprocessor symbol \ref RAPIDJSON_HAS_STDSTRING. + */ + explicit GenericPointer(const std::basic_string<Ch>& source, Allocator* allocator = 0) : allocator_(allocator), ownAllocator_(), nameBuffer_(), tokens_(), tokenCount_(), parseErrorOffset_(), parseErrorCode_(kPointerParseErrorNone) { + Parse(source.c_str(), source.size()); + } +#endif + + //! Constructor that parses a string or URI fragment representation, with length of the source string. + /*! + \param source A string or URI fragment representation of JSON pointer. + \param length Length of source. + \param allocator User supplied allocator for this pointer. If no allocator is provided, it creates a self-owned one. + \note Slightly faster than the overload without length. + */ + GenericPointer(const Ch* source, size_t length, Allocator* allocator = 0) : allocator_(allocator), ownAllocator_(), nameBuffer_(), tokens_(), tokenCount_(), parseErrorOffset_(), parseErrorCode_(kPointerParseErrorNone) { + Parse(source, length); + } + + //! Constructor with user-supplied tokens. + /*! + This constructor let user supplies const array of tokens. + This prevents the parsing process and eliminates allocation. + This is preferred for memory constrained environments. + + \param tokens An constant array of tokens representing the JSON pointer. + \param tokenCount Number of tokens. + + \b Example + \code + #define NAME(s) { s, sizeof(s) / sizeof(s[0]) - 1, kPointerInvalidIndex } + #define INDEX(i) { #i, sizeof(#i) - 1, i } + + static const Pointer::Token kTokens[] = { NAME("foo"), INDEX(123) }; + static const Pointer p(kTokens, sizeof(kTokens) / sizeof(kTokens[0])); + // Equivalent to static const Pointer p("/foo/123"); + + #undef NAME + #undef INDEX + \endcode + */ + GenericPointer(const Token* tokens, size_t tokenCount) : allocator_(), ownAllocator_(), nameBuffer_(), tokens_(const_cast<Token*>(tokens)), tokenCount_(tokenCount), parseErrorOffset_(), parseErrorCode_(kPointerParseErrorNone) {} + + //! Copy constructor. + GenericPointer(const GenericPointer& rhs, Allocator* allocator = 0) : allocator_(allocator), ownAllocator_(), nameBuffer_(), tokens_(), tokenCount_(), parseErrorOffset_(), parseErrorCode_(kPointerParseErrorNone) { + *this = rhs; + } + + //! Destructor. + ~GenericPointer() { + if (nameBuffer_) // If user-supplied tokens constructor is used, nameBuffer_ is nullptr and tokens_ are not deallocated. + Allocator::Free(tokens_); + RAPIDJSON_DELETE(ownAllocator_); + } + + //! Assignment operator. + GenericPointer& operator=(const GenericPointer& rhs) { + if (this != &rhs) { + // Do not delete ownAllcator + if (nameBuffer_) + Allocator::Free(tokens_); + + tokenCount_ = rhs.tokenCount_; + parseErrorOffset_ = rhs.parseErrorOffset_; + parseErrorCode_ = rhs.parseErrorCode_; + + if (rhs.nameBuffer_) + CopyFromRaw(rhs); // Normally parsed tokens. + else { + tokens_ = rhs.tokens_; // User supplied const tokens. + nameBuffer_ = 0; + } + } + return *this; + } + + //@} + + //!@name Append token + //@{ + + //! Append a token and return a new Pointer + /*! + \param token Token to be appended. + \param allocator Allocator for the newly return Pointer. + \return A new Pointer with appended token. + */ + GenericPointer Append(const Token& token, Allocator* allocator = 0) const { + GenericPointer r; + r.allocator_ = allocator; + Ch *p = r.CopyFromRaw(*this, 1, token.length + 1); + std::memcpy(p, token.name, (token.length + 1) * sizeof(Ch)); + r.tokens_[tokenCount_].name = p; + r.tokens_[tokenCount_].length = token.length; + r.tokens_[tokenCount_].index = token.index; + return r; + } + + //! Append a name token with length, and return a new Pointer + /*! + \param name Name to be appended. + \param length Length of name. + \param allocator Allocator for the newly return Pointer. + \return A new Pointer with appended token. + */ + GenericPointer Append(const Ch* name, SizeType length, Allocator* allocator = 0) const { + Token token = { name, length, kPointerInvalidIndex }; + return Append(token, allocator); + } + + //! Append a name token without length, and return a new Pointer + /*! + \param name Name (const Ch*) to be appended. + \param allocator Allocator for the newly return Pointer. + \return A new Pointer with appended token. + */ + template <typename T> + RAPIDJSON_DISABLEIF_RETURN((internal::NotExpr<internal::IsSame<typename internal::RemoveConst<T>::Type, Ch> >), (GenericPointer)) + Append(T* name, Allocator* allocator = 0) const { + return Append(name, StrLen(name), allocator); + } + +#if RAPIDJSON_HAS_STDSTRING + //! Append a name token, and return a new Pointer + /*! + \param name Name to be appended. + \param allocator Allocator for the newly return Pointer. + \return A new Pointer with appended token. + */ + GenericPointer Append(const std::basic_string<Ch>& name, Allocator* allocator = 0) const { + return Append(name.c_str(), static_cast<SizeType>(name.size()), allocator); + } +#endif + + //! Append a index token, and return a new Pointer + /*! + \param index Index to be appended. + \param allocator Allocator for the newly return Pointer. + \return A new Pointer with appended token. + */ + GenericPointer Append(SizeType index, Allocator* allocator = 0) const { + char buffer[21]; + char* end = sizeof(SizeType) == 4 ? internal::u32toa(index, buffer) : internal::u64toa(index, buffer); + SizeType length = static_cast<SizeType>(end - buffer); + buffer[length] = '\0'; + + if (sizeof(Ch) == 1) { + Token token = { reinterpret_cast<Ch*>(buffer), length, index }; + return Append(token, allocator); + } + else { + Ch name[21]; + for (size_t i = 0; i <= length; i++) + name[i] = buffer[i]; + Token token = { name, length, index }; + return Append(token, allocator); + } + } + + //! Append a token by value, and return a new Pointer + /*! + \param token token to be appended. + \param allocator Allocator for the newly return Pointer. + \return A new Pointer with appended token. + */ + GenericPointer Append(const ValueType& token, Allocator* allocator = 0) const { + if (token.IsString()) + return Append(token.GetString(), token.GetStringLength(), allocator); + else { + RAPIDJSON_ASSERT(token.IsUint64()); + RAPIDJSON_ASSERT(token.GetUint64() <= SizeType(~0)); + return Append(static_cast<SizeType>(token.GetUint64()), allocator); + } + } + + //!@name Handling Parse Error + //@{ + + //! Check whether this is a valid pointer. + bool IsValid() const { return parseErrorCode_ == kPointerParseErrorNone; } + + //! Get the parsing error offset in code unit. + size_t GetParseErrorOffset() const { return parseErrorOffset_; } + + //! Get the parsing error code. + PointerParseErrorCode GetParseErrorCode() const { return parseErrorCode_; } + + //@} + + //! Get the allocator of this pointer. + Allocator& GetAllocator() { return *allocator_; } + + //!@name Tokens + //@{ + + //! Get the token array (const version only). + const Token* GetTokens() const { return tokens_; } + + //! Get the number of tokens. + size_t GetTokenCount() const { return tokenCount_; } + + //@} + + //!@name Equality/inequality operators + //@{ + + //! Equality operator. + /*! + \note When any pointers are invalid, always returns false. + */ + bool operator==(const GenericPointer& rhs) const { + if (!IsValid() || !rhs.IsValid() || tokenCount_ != rhs.tokenCount_) + return false; + + for (size_t i = 0; i < tokenCount_; i++) { + if (tokens_[i].index != rhs.tokens_[i].index || + tokens_[i].length != rhs.tokens_[i].length || + (tokens_[i].length != 0 && std::memcmp(tokens_[i].name, rhs.tokens_[i].name, sizeof(Ch)* tokens_[i].length) != 0)) + { + return false; + } + } + + return true; + } + + //! Inequality operator. + /*! + \note When any pointers are invalid, always returns true. + */ + bool operator!=(const GenericPointer& rhs) const { return !(*this == rhs); } + + //@} + + //!@name Stringify + //@{ + + //! Stringify the pointer into string representation. + /*! + \tparam OutputStream Type of output stream. + \param os The output stream. + */ + template<typename OutputStream> + bool Stringify(OutputStream& os) const { + return Stringify<false, OutputStream>(os); + } + + //! Stringify the pointer into URI fragment representation. + /*! + \tparam OutputStream Type of output stream. + \param os The output stream. + */ + template<typename OutputStream> + bool StringifyUriFragment(OutputStream& os) const { + return Stringify<true, OutputStream>(os); + } + + //@} + + //!@name Create value + //@{ + + //! Create a value in a subtree. + /*! + If the value is not exist, it creates all parent values and a JSON Null value. + So it always succeed and return the newly created or existing value. + + Remind that it may change types of parents according to tokens, so it + potentially removes previously stored values. For example, if a document + was an array, and "/foo" is used to create a value, then the document + will be changed to an object, and all existing array elements are lost. + + \param root Root value of a DOM subtree to be resolved. It can be any value other than document root. + \param allocator Allocator for creating the values if the specified value or its parents are not exist. + \param alreadyExist If non-null, it stores whether the resolved value is already exist. + \return The resolved newly created (a JSON Null value), or already exists value. + */ + ValueType& Create(ValueType& root, typename ValueType::AllocatorType& allocator, bool* alreadyExist = 0) const { + RAPIDJSON_ASSERT(IsValid()); + ValueType* v = &root; + bool exist = true; + for (const Token *t = tokens_; t != tokens_ + tokenCount_; ++t) { + if (v->IsArray() && t->name[0] == '-' && t->length == 1) { + v->PushBack(ValueType().Move(), allocator); + v = &((*v)[v->Size() - 1]); + exist = false; + } + else { + if (t->index == kPointerInvalidIndex) { // must be object name + if (!v->IsObject()) + v->SetObject(); // Change to Object + } + else { // object name or array index + if (!v->IsArray() && !v->IsObject()) + v->SetArray(); // Change to Array + } + + if (v->IsArray()) { + if (t->index >= v->Size()) { + v->Reserve(t->index + 1, allocator); + while (t->index >= v->Size()) + v->PushBack(ValueType().Move(), allocator); + exist = false; + } + v = &((*v)[t->index]); + } + else { + typename ValueType::MemberIterator m = v->FindMember(GenericStringRef<Ch>(t->name, t->length)); + if (m == v->MemberEnd()) { + v->AddMember(ValueType(t->name, t->length, allocator).Move(), ValueType().Move(), allocator); + v = &(--v->MemberEnd())->value; // Assumes AddMember() appends at the end + exist = false; + } + else + v = &m->value; + } + } + } + + if (alreadyExist) + *alreadyExist = exist; + + return *v; + } + + //! Creates a value in a document. + /*! + \param document A document to be resolved. + \param alreadyExist If non-null, it stores whether the resolved value is already exist. + \return The resolved newly created, or already exists value. + */ + template <typename stackAllocator> + ValueType& Create(GenericDocument<EncodingType, typename ValueType::AllocatorType, stackAllocator>& document, bool* alreadyExist = 0) const { + return Create(document, document.GetAllocator(), alreadyExist); + } + + //@} + + //!@name Query value + //@{ + + //! Query a value in a subtree. + /*! + \param root Root value of a DOM sub-tree to be resolved. It can be any value other than document root. + \param unresolvedTokenIndex If the pointer cannot resolve a token in the pointer, this parameter can obtain the index of unresolved token. + \return Pointer to the value if it can be resolved. Otherwise null. + + \note + There are only 3 situations when a value cannot be resolved: + 1. A value in the path is not an array nor object. + 2. An object value does not contain the token. + 3. A token is out of range of an array value. + + Use unresolvedTokenIndex to retrieve the token index. + */ + ValueType* Get(ValueType& root, size_t* unresolvedTokenIndex = 0) const { + RAPIDJSON_ASSERT(IsValid()); + ValueType* v = &root; + for (const Token *t = tokens_; t != tokens_ + tokenCount_; ++t) { + switch (v->GetType()) { + case kObjectType: + { + typename ValueType::MemberIterator m = v->FindMember(GenericStringRef<Ch>(t->name, t->length)); + if (m == v->MemberEnd()) + break; + v = &m->value; + } + continue; + case kArrayType: + if (t->index == kPointerInvalidIndex || t->index >= v->Size()) + break; + v = &((*v)[t->index]); + continue; + default: + break; + } + + // Error: unresolved token + if (unresolvedTokenIndex) + *unresolvedTokenIndex = static_cast<size_t>(t - tokens_); + return 0; + } + return v; + } + + //! Query a const value in a const subtree. + /*! + \param root Root value of a DOM sub-tree to be resolved. It can be any value other than document root. + \return Pointer to the value if it can be resolved. Otherwise null. + */ + const ValueType* Get(const ValueType& root, size_t* unresolvedTokenIndex = 0) const { + return Get(const_cast<ValueType&>(root), unresolvedTokenIndex); + } + + //@} + + //!@name Query a value with default + //@{ + + //! Query a value in a subtree with default value. + /*! + Similar to Get(), but if the specified value do not exists, it creates all parents and clone the default value. + So that this function always succeed. + + \param root Root value of a DOM sub-tree to be resolved. It can be any value other than document root. + \param defaultValue Default value to be cloned if the value was not exists. + \param allocator Allocator for creating the values if the specified value or its parents are not exist. + \see Create() + */ + ValueType& GetWithDefault(ValueType& root, const ValueType& defaultValue, typename ValueType::AllocatorType& allocator) const { + bool alreadyExist; + Value& v = Create(root, allocator, &alreadyExist); + return alreadyExist ? v : v.CopyFrom(defaultValue, allocator); + } + + //! Query a value in a subtree with default null-terminated string. + ValueType& GetWithDefault(ValueType& root, const Ch* defaultValue, typename ValueType::AllocatorType& allocator) const { + bool alreadyExist; + Value& v = Create(root, allocator, &alreadyExist); + return alreadyExist ? v : v.SetString(defaultValue, allocator); + } + +#if RAPIDJSON_HAS_STDSTRING + //! Query a value in a subtree with default std::basic_string. + ValueType& GetWithDefault(ValueType& root, const std::basic_string<Ch>& defaultValue, typename ValueType::AllocatorType& allocator) const { + bool alreadyExist; + Value& v = Create(root, allocator, &alreadyExist); + return alreadyExist ? v : v.SetString(defaultValue, allocator); + } +#endif + + //! Query a value in a subtree with default primitive value. + /*! + \tparam T Either \ref Type, \c int, \c unsigned, \c int64_t, \c uint64_t, \c bool + */ + template <typename T> + RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T>, internal::IsGenericValue<T> >), (ValueType&)) + GetWithDefault(ValueType& root, T defaultValue, typename ValueType::AllocatorType& allocator) const { + return GetWithDefault(root, ValueType(defaultValue).Move(), allocator); + } + + //! Query a value in a document with default value. + template <typename stackAllocator> + ValueType& GetWithDefault(GenericDocument<EncodingType, typename ValueType::AllocatorType, stackAllocator>& document, const ValueType& defaultValue) const { + return GetWithDefault(document, defaultValue, document.GetAllocator()); + } + + //! Query a value in a document with default null-terminated string. + template <typename stackAllocator> + ValueType& GetWithDefault(GenericDocument<EncodingType, typename ValueType::AllocatorType, stackAllocator>& document, const Ch* defaultValue) const { + return GetWithDefault(document, defaultValue, document.GetAllocator()); + } + +#if RAPIDJSON_HAS_STDSTRING + //! Query a value in a document with default std::basic_string. + template <typename stackAllocator> + ValueType& GetWithDefault(GenericDocument<EncodingType, typename ValueType::AllocatorType, stackAllocator>& document, const std::basic_string<Ch>& defaultValue) const { + return GetWithDefault(document, defaultValue, document.GetAllocator()); + } +#endif + + //! Query a value in a document with default primitive value. + /*! + \tparam T Either \ref Type, \c int, \c unsigned, \c int64_t, \c uint64_t, \c bool + */ + template <typename T, typename stackAllocator> + RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T>, internal::IsGenericValue<T> >), (ValueType&)) + GetWithDefault(GenericDocument<EncodingType, typename ValueType::AllocatorType, stackAllocator>& document, T defaultValue) const { + return GetWithDefault(document, defaultValue, document.GetAllocator()); + } + + //@} + + //!@name Set a value + //@{ + + //! Set a value in a subtree, with move semantics. + /*! + It creates all parents if they are not exist or types are different to the tokens. + So this function always succeeds but potentially remove existing values. + + \param root Root value of a DOM sub-tree to be resolved. It can be any value other than document root. + \param value Value to be set. + \param allocator Allocator for creating the values if the specified value or its parents are not exist. + \see Create() + */ + ValueType& Set(ValueType& root, ValueType& value, typename ValueType::AllocatorType& allocator) const { + return Create(root, allocator) = value; + } + + //! Set a value in a subtree, with copy semantics. + ValueType& Set(ValueType& root, const ValueType& value, typename ValueType::AllocatorType& allocator) const { + return Create(root, allocator).CopyFrom(value, allocator); + } + + //! Set a null-terminated string in a subtree. + ValueType& Set(ValueType& root, const Ch* value, typename ValueType::AllocatorType& allocator) const { + return Create(root, allocator) = ValueType(value, allocator).Move(); + } + +#if RAPIDJSON_HAS_STDSTRING + //! Set a std::basic_string in a subtree. + ValueType& Set(ValueType& root, const std::basic_string<Ch>& value, typename ValueType::AllocatorType& allocator) const { + return Create(root, allocator) = ValueType(value, allocator).Move(); + } +#endif + + //! Set a primitive value in a subtree. + /*! + \tparam T Either \ref Type, \c int, \c unsigned, \c int64_t, \c uint64_t, \c bool + */ + template <typename T> + RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T>, internal::IsGenericValue<T> >), (ValueType&)) + Set(ValueType& root, T value, typename ValueType::AllocatorType& allocator) const { + return Create(root, allocator) = ValueType(value).Move(); + } + + //! Set a value in a document, with move semantics. + template <typename stackAllocator> + ValueType& Set(GenericDocument<EncodingType, typename ValueType::AllocatorType, stackAllocator>& document, ValueType& value) const { + return Create(document) = value; + } + + //! Set a value in a document, with copy semantics. + template <typename stackAllocator> + ValueType& Set(GenericDocument<EncodingType, typename ValueType::AllocatorType, stackAllocator>& document, const ValueType& value) const { + return Create(document).CopyFrom(value, document.GetAllocator()); + } + + //! Set a null-terminated string in a document. + template <typename stackAllocator> + ValueType& Set(GenericDocument<EncodingType, typename ValueType::AllocatorType, stackAllocator>& document, const Ch* value) const { + return Create(document) = ValueType(value, document.GetAllocator()).Move(); + } + +#if RAPIDJSON_HAS_STDSTRING + //! Sets a std::basic_string in a document. + template <typename stackAllocator> + ValueType& Set(GenericDocument<EncodingType, typename ValueType::AllocatorType, stackAllocator>& document, const std::basic_string<Ch>& value) const { + return Create(document) = ValueType(value, document.GetAllocator()).Move(); + } +#endif + + //! Set a primitive value in a document. + /*! + \tparam T Either \ref Type, \c int, \c unsigned, \c int64_t, \c uint64_t, \c bool + */ + template <typename T, typename stackAllocator> + RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T>, internal::IsGenericValue<T> >), (ValueType&)) + Set(GenericDocument<EncodingType, typename ValueType::AllocatorType, stackAllocator>& document, T value) const { + return Create(document) = value; + } + + //@} + + //!@name Swap a value + //@{ + + //! Swap a value with a value in a subtree. + /*! + It creates all parents if they are not exist or types are different to the tokens. + So this function always succeeds but potentially remove existing values. + + \param root Root value of a DOM sub-tree to be resolved. It can be any value other than document root. + \param value Value to be swapped. + \param allocator Allocator for creating the values if the specified value or its parents are not exist. + \see Create() + */ + ValueType& Swap(ValueType& root, ValueType& value, typename ValueType::AllocatorType& allocator) const { + return Create(root, allocator).Swap(value); + } + + //! Swap a value with a value in a document. + template <typename stackAllocator> + ValueType& Swap(GenericDocument<EncodingType, typename ValueType::AllocatorType, stackAllocator>& document, ValueType& value) const { + return Create(document).Swap(value); + } + + //@} + + //! Erase a value in a subtree. + /*! + \param root Root value of a DOM sub-tree to be resolved. It can be any value other than document root. + \return Whether the resolved value is found and erased. + + \note Erasing with an empty pointer \c Pointer(""), i.e. the root, always fail and return false. + */ + bool Erase(ValueType& root) const { + RAPIDJSON_ASSERT(IsValid()); + if (tokenCount_ == 0) // Cannot erase the root + return false; + + ValueType* v = &root; + const Token* last = tokens_ + (tokenCount_ - 1); + for (const Token *t = tokens_; t != last; ++t) { + switch (v->GetType()) { + case kObjectType: + { + typename ValueType::MemberIterator m = v->FindMember(GenericStringRef<Ch>(t->name, t->length)); + if (m == v->MemberEnd()) + return false; + v = &m->value; + } + break; + case kArrayType: + if (t->index == kPointerInvalidIndex || t->index >= v->Size()) + return false; + v = &((*v)[t->index]); + break; + default: + return false; + } + } + + switch (v->GetType()) { + case kObjectType: + return v->EraseMember(GenericStringRef<Ch>(last->name, last->length)); + case kArrayType: + if (last->index == kPointerInvalidIndex || last->index >= v->Size()) + return false; + v->Erase(v->Begin() + last->index); + return true; + default: + return false; + } + } + +private: + //! Clone the content from rhs to this. + /*! + \param rhs Source pointer. + \param extraToken Extra tokens to be allocated. + \param extraNameBufferSize Extra name buffer size (in number of Ch) to be allocated. + \return Start of non-occupied name buffer, for storing extra names. + */ + Ch* CopyFromRaw(const GenericPointer& rhs, size_t extraToken = 0, size_t extraNameBufferSize = 0) { + if (!allocator_) // allocator is independently owned. + ownAllocator_ = allocator_ = RAPIDJSON_NEW(Allocator()); + + size_t nameBufferSize = rhs.tokenCount_; // null terminators for tokens + for (Token *t = rhs.tokens_; t != rhs.tokens_ + rhs.tokenCount_; ++t) + nameBufferSize += t->length; + + tokenCount_ = rhs.tokenCount_ + extraToken; + tokens_ = static_cast<Token *>(allocator_->Malloc(tokenCount_ * sizeof(Token) + (nameBufferSize + extraNameBufferSize) * sizeof(Ch))); + nameBuffer_ = reinterpret_cast<Ch *>(tokens_ + tokenCount_); + if (rhs.tokenCount_ > 0) { + std::memcpy(tokens_, rhs.tokens_, rhs.tokenCount_ * sizeof(Token)); + } + if (nameBufferSize > 0) { + std::memcpy(nameBuffer_, rhs.nameBuffer_, nameBufferSize * sizeof(Ch)); + } + + // Adjust pointers to name buffer + std::ptrdiff_t diff = nameBuffer_ - rhs.nameBuffer_; + for (Token *t = tokens_; t != tokens_ + rhs.tokenCount_; ++t) + t->name += diff; + + return nameBuffer_ + nameBufferSize; + } + + //! Check whether a character should be percent-encoded. + /*! + According to RFC 3986 2.3 Unreserved Characters. + \param c The character (code unit) to be tested. + */ + bool NeedPercentEncode(Ch c) const { + return !((c >= '0' && c <= '9') || (c >= 'A' && c <='Z') || (c >= 'a' && c <= 'z') || c == '-' || c == '.' || c == '_' || c =='~'); + } + + //! Parse a JSON String or its URI fragment representation into tokens. +#ifndef __clang__ // -Wdocumentation + /*! + \param source Either a JSON Pointer string, or its URI fragment representation. Not need to be null terminated. + \param length Length of the source string. + \note Source cannot be JSON String Representation of JSON Pointer, e.g. In "/\u0000", \u0000 will not be unescaped. + */ +#endif + void Parse(const Ch* source, size_t length) { + RAPIDJSON_ASSERT(source != NULL); + RAPIDJSON_ASSERT(nameBuffer_ == 0); + RAPIDJSON_ASSERT(tokens_ == 0); + + // Create own allocator if user did not supply. + if (!allocator_) + ownAllocator_ = allocator_ = RAPIDJSON_NEW(Allocator()); + + // Count number of '/' as tokenCount + tokenCount_ = 0; + for (const Ch* s = source; s != source + length; s++) + if (*s == '/') + tokenCount_++; + + Token* token = tokens_ = static_cast<Token *>(allocator_->Malloc(tokenCount_ * sizeof(Token) + length * sizeof(Ch))); + Ch* name = nameBuffer_ = reinterpret_cast<Ch *>(tokens_ + tokenCount_); + size_t i = 0; + + // Detect if it is a URI fragment + bool uriFragment = false; + if (source[i] == '#') { + uriFragment = true; + i++; + } + + if (i != length && source[i] != '/') { + parseErrorCode_ = kPointerParseErrorTokenMustBeginWithSolidus; + goto error; + } + + while (i < length) { + RAPIDJSON_ASSERT(source[i] == '/'); + i++; // consumes '/' + + token->name = name; + bool isNumber = true; + + while (i < length && source[i] != '/') { + Ch c = source[i]; + if (uriFragment) { + // Decoding percent-encoding for URI fragment + if (c == '%') { + PercentDecodeStream is(&source[i], source + length); + GenericInsituStringStream<EncodingType> os(name); + Ch* begin = os.PutBegin(); + if (!Transcoder<UTF8<>, EncodingType>().Validate(is, os) || !is.IsValid()) { + parseErrorCode_ = kPointerParseErrorInvalidPercentEncoding; + goto error; + } + size_t len = os.PutEnd(begin); + i += is.Tell() - 1; + if (len == 1) + c = *name; + else { + name += len; + isNumber = false; + i++; + continue; + } + } + else if (NeedPercentEncode(c)) { + parseErrorCode_ = kPointerParseErrorCharacterMustPercentEncode; + goto error; + } + } + + i++; + + // Escaping "~0" -> '~', "~1" -> '/' + if (c == '~') { + if (i < length) { + c = source[i]; + if (c == '0') c = '~'; + else if (c == '1') c = '/'; + else { + parseErrorCode_ = kPointerParseErrorInvalidEscape; + goto error; + } + i++; + } + else { + parseErrorCode_ = kPointerParseErrorInvalidEscape; + goto error; + } + } + + // First check for index: all of characters are digit + if (c < '0' || c > '9') + isNumber = false; + + *name++ = c; + } + token->length = static_cast<SizeType>(name - token->name); + if (token->length == 0) + isNumber = false; + *name++ = '\0'; // Null terminator + + // Second check for index: more than one digit cannot have leading zero + if (isNumber && token->length > 1 && token->name[0] == '0') + isNumber = false; + + // String to SizeType conversion + SizeType n = 0; + if (isNumber) { + for (size_t j = 0; j < token->length; j++) { + SizeType m = n * 10 + static_cast<SizeType>(token->name[j] - '0'); + if (m < n) { // overflow detection + isNumber = false; + break; + } + n = m; + } + } + + token->index = isNumber ? n : kPointerInvalidIndex; + token++; + } + + RAPIDJSON_ASSERT(name <= nameBuffer_ + length); // Should not overflow buffer + parseErrorCode_ = kPointerParseErrorNone; + return; + + error: + Allocator::Free(tokens_); + nameBuffer_ = 0; + tokens_ = 0; + tokenCount_ = 0; + parseErrorOffset_ = i; + return; + } + + //! Stringify to string or URI fragment representation. + /*! + \tparam uriFragment True for stringifying to URI fragment representation. False for string representation. + \tparam OutputStream type of output stream. + \param os The output stream. + */ + template<bool uriFragment, typename OutputStream> + bool Stringify(OutputStream& os) const { + RAPIDJSON_ASSERT(IsValid()); + + if (uriFragment) + os.Put('#'); + + for (Token *t = tokens_; t != tokens_ + tokenCount_; ++t) { + os.Put('/'); + for (size_t j = 0; j < t->length; j++) { + Ch c = t->name[j]; + if (c == '~') { + os.Put('~'); + os.Put('0'); + } + else if (c == '/') { + os.Put('~'); + os.Put('1'); + } + else if (uriFragment && NeedPercentEncode(c)) { + // Transcode to UTF8 sequence + GenericStringStream<typename ValueType::EncodingType> source(&t->name[j]); + PercentEncodeStream<OutputStream> target(os); + if (!Transcoder<EncodingType, UTF8<> >().Validate(source, target)) + return false; + j += source.Tell() - 1; + } + else + os.Put(c); + } + } + return true; + } + + //! A helper stream for decoding a percent-encoded sequence into code unit. + /*! + This stream decodes %XY triplet into code unit (0-255). + If it encounters invalid characters, it sets output code unit as 0 and + mark invalid, and to be checked by IsValid(). + */ + class PercentDecodeStream { + public: + typedef typename ValueType::Ch Ch; + + //! Constructor + /*! + \param source Start of the stream + \param end Past-the-end of the stream. + */ + PercentDecodeStream(const Ch* source, const Ch* end) : src_(source), head_(source), end_(end), valid_(true) {} + + Ch Take() { + if (*src_ != '%' || src_ + 3 > end_) { // %XY triplet + valid_ = false; + return 0; + } + src_++; + Ch c = 0; + for (int j = 0; j < 2; j++) { + c = static_cast<Ch>(c << 4); + Ch h = *src_; + if (h >= '0' && h <= '9') c = static_cast<Ch>(c + h - '0'); + else if (h >= 'A' && h <= 'F') c = static_cast<Ch>(c + h - 'A' + 10); + else if (h >= 'a' && h <= 'f') c = static_cast<Ch>(c + h - 'a' + 10); + else { + valid_ = false; + return 0; + } + src_++; + } + return c; + } + + size_t Tell() const { return static_cast<size_t>(src_ - head_); } + bool IsValid() const { return valid_; } + + private: + const Ch* src_; //!< Current read position. + const Ch* head_; //!< Original head of the string. + const Ch* end_; //!< Past-the-end position. + bool valid_; //!< Whether the parsing is valid. + }; + + //! A helper stream to encode character (UTF-8 code unit) into percent-encoded sequence. + template <typename OutputStream> + class PercentEncodeStream { + public: + PercentEncodeStream(OutputStream& os) : os_(os) {} + void Put(char c) { // UTF-8 must be byte + unsigned char u = static_cast<unsigned char>(c); + static const char hexDigits[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' }; + os_.Put('%'); + os_.Put(hexDigits[u >> 4]); + os_.Put(hexDigits[u & 15]); + } + private: + OutputStream& os_; + }; + + Allocator* allocator_; //!< The current allocator. It is either user-supplied or equal to ownAllocator_. + Allocator* ownAllocator_; //!< Allocator owned by this Pointer. + Ch* nameBuffer_; //!< A buffer containing all names in tokens. + Token* tokens_; //!< A list of tokens. + size_t tokenCount_; //!< Number of tokens in tokens_. + size_t parseErrorOffset_; //!< Offset in code unit when parsing fail. + PointerParseErrorCode parseErrorCode_; //!< Parsing error code. +}; + +//! GenericPointer for Value (UTF-8, default allocator). +typedef GenericPointer<Value> Pointer; + +//!@name Helper functions for GenericPointer +//@{ + +////////////////////////////////////////////////////////////////////////////// + +template <typename T> +typename T::ValueType& CreateValueByPointer(T& root, const GenericPointer<typename T::ValueType>& pointer, typename T::AllocatorType& a) { + return pointer.Create(root, a); +} + +template <typename T, typename CharType, size_t N> +typename T::ValueType& CreateValueByPointer(T& root, const CharType(&source)[N], typename T::AllocatorType& a) { + return GenericPointer<typename T::ValueType>(source, N - 1).Create(root, a); +} + +// No allocator parameter + +template <typename DocumentType> +typename DocumentType::ValueType& CreateValueByPointer(DocumentType& document, const GenericPointer<typename DocumentType::ValueType>& pointer) { + return pointer.Create(document); +} + +template <typename DocumentType, typename CharType, size_t N> +typename DocumentType::ValueType& CreateValueByPointer(DocumentType& document, const CharType(&source)[N]) { + return GenericPointer<typename DocumentType::ValueType>(source, N - 1).Create(document); +} + +////////////////////////////////////////////////////////////////////////////// + +template <typename T> +typename T::ValueType* GetValueByPointer(T& root, const GenericPointer<typename T::ValueType>& pointer, size_t* unresolvedTokenIndex = 0) { + return pointer.Get(root, unresolvedTokenIndex); +} + +template <typename T> +const typename T::ValueType* GetValueByPointer(const T& root, const GenericPointer<typename T::ValueType>& pointer, size_t* unresolvedTokenIndex = 0) { + return pointer.Get(root, unresolvedTokenIndex); +} + +template <typename T, typename CharType, size_t N> +typename T::ValueType* GetValueByPointer(T& root, const CharType (&source)[N], size_t* unresolvedTokenIndex = 0) { + return GenericPointer<typename T::ValueType>(source, N - 1).Get(root, unresolvedTokenIndex); +} + +template <typename T, typename CharType, size_t N> +const typename T::ValueType* GetValueByPointer(const T& root, const CharType(&source)[N], size_t* unresolvedTokenIndex = 0) { + return GenericPointer<typename T::ValueType>(source, N - 1).Get(root, unresolvedTokenIndex); +} + +////////////////////////////////////////////////////////////////////////////// + +template <typename T> +typename T::ValueType& GetValueByPointerWithDefault(T& root, const GenericPointer<typename T::ValueType>& pointer, const typename T::ValueType& defaultValue, typename T::AllocatorType& a) { + return pointer.GetWithDefault(root, defaultValue, a); +} + +template <typename T> +typename T::ValueType& GetValueByPointerWithDefault(T& root, const GenericPointer<typename T::ValueType>& pointer, const typename T::Ch* defaultValue, typename T::AllocatorType& a) { + return pointer.GetWithDefault(root, defaultValue, a); +} + +#if RAPIDJSON_HAS_STDSTRING +template <typename T> +typename T::ValueType& GetValueByPointerWithDefault(T& root, const GenericPointer<typename T::ValueType>& pointer, const std::basic_string<typename T::Ch>& defaultValue, typename T::AllocatorType& a) { + return pointer.GetWithDefault(root, defaultValue, a); +} +#endif + +template <typename T, typename T2> +RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T2>, internal::IsGenericValue<T2> >), (typename T::ValueType&)) +GetValueByPointerWithDefault(T& root, const GenericPointer<typename T::ValueType>& pointer, T2 defaultValue, typename T::AllocatorType& a) { + return pointer.GetWithDefault(root, defaultValue, a); +} + +template <typename T, typename CharType, size_t N> +typename T::ValueType& GetValueByPointerWithDefault(T& root, const CharType(&source)[N], const typename T::ValueType& defaultValue, typename T::AllocatorType& a) { + return GenericPointer<typename T::ValueType>(source, N - 1).GetWithDefault(root, defaultValue, a); +} + +template <typename T, typename CharType, size_t N> +typename T::ValueType& GetValueByPointerWithDefault(T& root, const CharType(&source)[N], const typename T::Ch* defaultValue, typename T::AllocatorType& a) { + return GenericPointer<typename T::ValueType>(source, N - 1).GetWithDefault(root, defaultValue, a); +} + +#if RAPIDJSON_HAS_STDSTRING +template <typename T, typename CharType, size_t N> +typename T::ValueType& GetValueByPointerWithDefault(T& root, const CharType(&source)[N], const std::basic_string<typename T::Ch>& defaultValue, typename T::AllocatorType& a) { + return GenericPointer<typename T::ValueType>(source, N - 1).GetWithDefault(root, defaultValue, a); +} +#endif + +template <typename T, typename CharType, size_t N, typename T2> +RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T2>, internal::IsGenericValue<T2> >), (typename T::ValueType&)) +GetValueByPointerWithDefault(T& root, const CharType(&source)[N], T2 defaultValue, typename T::AllocatorType& a) { + return GenericPointer<typename T::ValueType>(source, N - 1).GetWithDefault(root, defaultValue, a); +} + +// No allocator parameter + +template <typename DocumentType> +typename DocumentType::ValueType& GetValueByPointerWithDefault(DocumentType& document, const GenericPointer<typename DocumentType::ValueType>& pointer, const typename DocumentType::ValueType& defaultValue) { + return pointer.GetWithDefault(document, defaultValue); +} + +template <typename DocumentType> +typename DocumentType::ValueType& GetValueByPointerWithDefault(DocumentType& document, const GenericPointer<typename DocumentType::ValueType>& pointer, const typename DocumentType::Ch* defaultValue) { + return pointer.GetWithDefault(document, defaultValue); +} + +#if RAPIDJSON_HAS_STDSTRING +template <typename DocumentType> +typename DocumentType::ValueType& GetValueByPointerWithDefault(DocumentType& document, const GenericPointer<typename DocumentType::ValueType>& pointer, const std::basic_string<typename DocumentType::Ch>& defaultValue) { + return pointer.GetWithDefault(document, defaultValue); +} +#endif + +template <typename DocumentType, typename T2> +RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T2>, internal::IsGenericValue<T2> >), (typename DocumentType::ValueType&)) +GetValueByPointerWithDefault(DocumentType& document, const GenericPointer<typename DocumentType::ValueType>& pointer, T2 defaultValue) { + return pointer.GetWithDefault(document, defaultValue); +} + +template <typename DocumentType, typename CharType, size_t N> +typename DocumentType::ValueType& GetValueByPointerWithDefault(DocumentType& document, const CharType(&source)[N], const typename DocumentType::ValueType& defaultValue) { + return GenericPointer<typename DocumentType::ValueType>(source, N - 1).GetWithDefault(document, defaultValue); +} + +template <typename DocumentType, typename CharType, size_t N> +typename DocumentType::ValueType& GetValueByPointerWithDefault(DocumentType& document, const CharType(&source)[N], const typename DocumentType::Ch* defaultValue) { + return GenericPointer<typename DocumentType::ValueType>(source, N - 1).GetWithDefault(document, defaultValue); +} + +#if RAPIDJSON_HAS_STDSTRING +template <typename DocumentType, typename CharType, size_t N> +typename DocumentType::ValueType& GetValueByPointerWithDefault(DocumentType& document, const CharType(&source)[N], const std::basic_string<typename DocumentType::Ch>& defaultValue) { + return GenericPointer<typename DocumentType::ValueType>(source, N - 1).GetWithDefault(document, defaultValue); +} +#endif + +template <typename DocumentType, typename CharType, size_t N, typename T2> +RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T2>, internal::IsGenericValue<T2> >), (typename DocumentType::ValueType&)) +GetValueByPointerWithDefault(DocumentType& document, const CharType(&source)[N], T2 defaultValue) { + return GenericPointer<typename DocumentType::ValueType>(source, N - 1).GetWithDefault(document, defaultValue); +} + +////////////////////////////////////////////////////////////////////////////// + +template <typename T> +typename T::ValueType& SetValueByPointer(T& root, const GenericPointer<typename T::ValueType>& pointer, typename T::ValueType& value, typename T::AllocatorType& a) { + return pointer.Set(root, value, a); +} + +template <typename T> +typename T::ValueType& SetValueByPointer(T& root, const GenericPointer<typename T::ValueType>& pointer, const typename T::ValueType& value, typename T::AllocatorType& a) { + return pointer.Set(root, value, a); +} + +template <typename T> +typename T::ValueType& SetValueByPointer(T& root, const GenericPointer<typename T::ValueType>& pointer, const typename T::Ch* value, typename T::AllocatorType& a) { + return pointer.Set(root, value, a); +} + +#if RAPIDJSON_HAS_STDSTRING +template <typename T> +typename T::ValueType& SetValueByPointer(T& root, const GenericPointer<typename T::ValueType>& pointer, const std::basic_string<typename T::Ch>& value, typename T::AllocatorType& a) { + return pointer.Set(root, value, a); +} +#endif + +template <typename T, typename T2> +RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T2>, internal::IsGenericValue<T2> >), (typename T::ValueType&)) +SetValueByPointer(T& root, const GenericPointer<typename T::ValueType>& pointer, T2 value, typename T::AllocatorType& a) { + return pointer.Set(root, value, a); +} + +template <typename T, typename CharType, size_t N> +typename T::ValueType& SetValueByPointer(T& root, const CharType(&source)[N], typename T::ValueType& value, typename T::AllocatorType& a) { + return GenericPointer<typename T::ValueType>(source, N - 1).Set(root, value, a); +} + +template <typename T, typename CharType, size_t N> +typename T::ValueType& SetValueByPointer(T& root, const CharType(&source)[N], const typename T::ValueType& value, typename T::AllocatorType& a) { + return GenericPointer<typename T::ValueType>(source, N - 1).Set(root, value, a); +} + +template <typename T, typename CharType, size_t N> +typename T::ValueType& SetValueByPointer(T& root, const CharType(&source)[N], const typename T::Ch* value, typename T::AllocatorType& a) { + return GenericPointer<typename T::ValueType>(source, N - 1).Set(root, value, a); +} + +#if RAPIDJSON_HAS_STDSTRING +template <typename T, typename CharType, size_t N> +typename T::ValueType& SetValueByPointer(T& root, const CharType(&source)[N], const std::basic_string<typename T::Ch>& value, typename T::AllocatorType& a) { + return GenericPointer<typename T::ValueType>(source, N - 1).Set(root, value, a); +} +#endif + +template <typename T, typename CharType, size_t N, typename T2> +RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T2>, internal::IsGenericValue<T2> >), (typename T::ValueType&)) +SetValueByPointer(T& root, const CharType(&source)[N], T2 value, typename T::AllocatorType& a) { + return GenericPointer<typename T::ValueType>(source, N - 1).Set(root, value, a); +} + +// No allocator parameter + +template <typename DocumentType> +typename DocumentType::ValueType& SetValueByPointer(DocumentType& document, const GenericPointer<typename DocumentType::ValueType>& pointer, typename DocumentType::ValueType& value) { + return pointer.Set(document, value); +} + +template <typename DocumentType> +typename DocumentType::ValueType& SetValueByPointer(DocumentType& document, const GenericPointer<typename DocumentType::ValueType>& pointer, const typename DocumentType::ValueType& value) { + return pointer.Set(document, value); +} + +template <typename DocumentType> +typename DocumentType::ValueType& SetValueByPointer(DocumentType& document, const GenericPointer<typename DocumentType::ValueType>& pointer, const typename DocumentType::Ch* value) { + return pointer.Set(document, value); +} + +#if RAPIDJSON_HAS_STDSTRING +template <typename DocumentType> +typename DocumentType::ValueType& SetValueByPointer(DocumentType& document, const GenericPointer<typename DocumentType::ValueType>& pointer, const std::basic_string<typename DocumentType::Ch>& value) { + return pointer.Set(document, value); +} +#endif + +template <typename DocumentType, typename T2> +RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T2>, internal::IsGenericValue<T2> >), (typename DocumentType::ValueType&)) +SetValueByPointer(DocumentType& document, const GenericPointer<typename DocumentType::ValueType>& pointer, T2 value) { + return pointer.Set(document, value); +} + +template <typename DocumentType, typename CharType, size_t N> +typename DocumentType::ValueType& SetValueByPointer(DocumentType& document, const CharType(&source)[N], typename DocumentType::ValueType& value) { + return GenericPointer<typename DocumentType::ValueType>(source, N - 1).Set(document, value); +} + +template <typename DocumentType, typename CharType, size_t N> +typename DocumentType::ValueType& SetValueByPointer(DocumentType& document, const CharType(&source)[N], const typename DocumentType::ValueType& value) { + return GenericPointer<typename DocumentType::ValueType>(source, N - 1).Set(document, value); +} + +template <typename DocumentType, typename CharType, size_t N> +typename DocumentType::ValueType& SetValueByPointer(DocumentType& document, const CharType(&source)[N], const typename DocumentType::Ch* value) { + return GenericPointer<typename DocumentType::ValueType>(source, N - 1).Set(document, value); +} + +#if RAPIDJSON_HAS_STDSTRING +template <typename DocumentType, typename CharType, size_t N> +typename DocumentType::ValueType& SetValueByPointer(DocumentType& document, const CharType(&source)[N], const std::basic_string<typename DocumentType::Ch>& value) { + return GenericPointer<typename DocumentType::ValueType>(source, N - 1).Set(document, value); +} +#endif + +template <typename DocumentType, typename CharType, size_t N, typename T2> +RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T2>, internal::IsGenericValue<T2> >), (typename DocumentType::ValueType&)) +SetValueByPointer(DocumentType& document, const CharType(&source)[N], T2 value) { + return GenericPointer<typename DocumentType::ValueType>(source, N - 1).Set(document, value); +} + +////////////////////////////////////////////////////////////////////////////// + +template <typename T> +typename T::ValueType& SwapValueByPointer(T& root, const GenericPointer<typename T::ValueType>& pointer, typename T::ValueType& value, typename T::AllocatorType& a) { + return pointer.Swap(root, value, a); +} + +template <typename T, typename CharType, size_t N> +typename T::ValueType& SwapValueByPointer(T& root, const CharType(&source)[N], typename T::ValueType& value, typename T::AllocatorType& a) { + return GenericPointer<typename T::ValueType>(source, N - 1).Swap(root, value, a); +} + +template <typename DocumentType> +typename DocumentType::ValueType& SwapValueByPointer(DocumentType& document, const GenericPointer<typename DocumentType::ValueType>& pointer, typename DocumentType::ValueType& value) { + return pointer.Swap(document, value); +} + +template <typename DocumentType, typename CharType, size_t N> +typename DocumentType::ValueType& SwapValueByPointer(DocumentType& document, const CharType(&source)[N], typename DocumentType::ValueType& value) { + return GenericPointer<typename DocumentType::ValueType>(source, N - 1).Swap(document, value); +} + +////////////////////////////////////////////////////////////////////////////// + +template <typename T> +bool EraseValueByPointer(T& root, const GenericPointer<typename T::ValueType>& pointer) { + return pointer.Erase(root); +} + +template <typename T, typename CharType, size_t N> +bool EraseValueByPointer(T& root, const CharType(&source)[N]) { + return GenericPointer<typename T::ValueType>(source, N - 1).Erase(root); +} + +//@} + +RAPIDJSON_NAMESPACE_END + +#ifdef __clang__ +RAPIDJSON_DIAG_POP +#endif + +#ifdef _MSC_VER +RAPIDJSON_DIAG_POP +#endif + +#endif // RAPIDJSON_POINTER_H_ diff --git a/xmrstak/rapidjson/prettywriter.h b/xmrstak/rapidjson/prettywriter.h new file mode 100644 index 0000000..abd964f --- /dev/null +++ b/xmrstak/rapidjson/prettywriter.h @@ -0,0 +1,275 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_PRETTYWRITER_H_ +#define RAPIDJSON_PRETTYWRITER_H_ + +#include "writer.h" + +#ifdef __GNUC__ +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(effc++) +#endif + +#if defined(__clang__) +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(c++98-compat) +#endif + +RAPIDJSON_NAMESPACE_BEGIN + +//! Combination of PrettyWriter format flags. +/*! \see PrettyWriter::SetFormatOptions + */ +enum PrettyFormatOptions { + kFormatDefault = 0, //!< Default pretty formatting. + kFormatSingleLineArray = 1 //!< Format arrays on a single line. +}; + +//! Writer with indentation and spacing. +/*! + \tparam OutputStream Type of ouptut os. + \tparam SourceEncoding Encoding of source string. + \tparam TargetEncoding Encoding of output stream. + \tparam StackAllocator Type of allocator for allocating memory of stack. +*/ +template<typename OutputStream, typename SourceEncoding = UTF8<>, typename TargetEncoding = UTF8<>, typename StackAllocator = CrtAllocator, unsigned writeFlags = kWriteDefaultFlags> +class PrettyWriter : public Writer<OutputStream, SourceEncoding, TargetEncoding, StackAllocator, writeFlags> { +public: + typedef Writer<OutputStream, SourceEncoding, TargetEncoding, StackAllocator> Base; + typedef typename Base::Ch Ch; + + //! Constructor + /*! \param os Output stream. + \param allocator User supplied allocator. If it is null, it will create a private one. + \param levelDepth Initial capacity of stack. + */ + explicit PrettyWriter(OutputStream& os, StackAllocator* allocator = 0, size_t levelDepth = Base::kDefaultLevelDepth) : + Base(os, allocator, levelDepth), indentChar_(' '), indentCharCount_(4), formatOptions_(kFormatDefault) {} + + + explicit PrettyWriter(StackAllocator* allocator = 0, size_t levelDepth = Base::kDefaultLevelDepth) : + Base(allocator, levelDepth), indentChar_(' '), indentCharCount_(4) {} + +#if RAPIDJSON_HAS_CXX11_RVALUE_REFS + PrettyWriter(PrettyWriter&& rhs) : + Base(std::forward<PrettyWriter>(rhs)), indentChar_(rhs.indentChar_), indentCharCount_(rhs.indentCharCount_), formatOptions_(rhs.formatOptions_) {} +#endif + + //! Set custom indentation. + /*! \param indentChar Character for indentation. Must be whitespace character (' ', '\\t', '\\n', '\\r'). + \param indentCharCount Number of indent characters for each indentation level. + \note The default indentation is 4 spaces. + */ + PrettyWriter& SetIndent(Ch indentChar, unsigned indentCharCount) { + RAPIDJSON_ASSERT(indentChar == ' ' || indentChar == '\t' || indentChar == '\n' || indentChar == '\r'); + indentChar_ = indentChar; + indentCharCount_ = indentCharCount; + return *this; + } + + //! Set pretty writer formatting options. + /*! \param options Formatting options. + */ + PrettyWriter& SetFormatOptions(PrettyFormatOptions options) { + formatOptions_ = options; + return *this; + } + + /*! @name Implementation of Handler + \see Handler + */ + //@{ + + bool Null() { PrettyPrefix(kNullType); return Base::WriteNull(); } + bool Bool(bool b) { PrettyPrefix(b ? kTrueType : kFalseType); return Base::WriteBool(b); } + bool Int(int i) { PrettyPrefix(kNumberType); return Base::WriteInt(i); } + bool Uint(unsigned u) { PrettyPrefix(kNumberType); return Base::WriteUint(u); } + bool Int64(int64_t i64) { PrettyPrefix(kNumberType); return Base::WriteInt64(i64); } + bool Uint64(uint64_t u64) { PrettyPrefix(kNumberType); return Base::WriteUint64(u64); } + bool Double(double d) { PrettyPrefix(kNumberType); return Base::WriteDouble(d); } + + bool RawNumber(const Ch* str, SizeType length, bool copy = false) { + RAPIDJSON_ASSERT(str != 0); + (void)copy; + PrettyPrefix(kNumberType); + return Base::WriteString(str, length); + } + + bool String(const Ch* str, SizeType length, bool copy = false) { + RAPIDJSON_ASSERT(str != 0); + (void)copy; + PrettyPrefix(kStringType); + return Base::WriteString(str, length); + } + +#if RAPIDJSON_HAS_STDSTRING + bool String(const std::basic_string<Ch>& str) { + return String(str.data(), SizeType(str.size())); + } +#endif + + bool StartObject() { + PrettyPrefix(kObjectType); + new (Base::level_stack_.template Push<typename Base::Level>()) typename Base::Level(false); + return Base::WriteStartObject(); + } + + bool Key(const Ch* str, SizeType length, bool copy = false) { return String(str, length, copy); } + +#if RAPIDJSON_HAS_STDSTRING + bool Key(const std::basic_string<Ch>& str) { + return Key(str.data(), SizeType(str.size())); + } +#endif + + bool EndObject(SizeType memberCount = 0) { + (void)memberCount; + RAPIDJSON_ASSERT(Base::level_stack_.GetSize() >= sizeof(typename Base::Level)); + RAPIDJSON_ASSERT(!Base::level_stack_.template Top<typename Base::Level>()->inArray); + bool empty = Base::level_stack_.template Pop<typename Base::Level>(1)->valueCount == 0; + + if (!empty) { + Base::os_->Put('\n'); + WriteIndent(); + } + bool ret = Base::WriteEndObject(); + (void)ret; + RAPIDJSON_ASSERT(ret == true); + if (Base::level_stack_.Empty()) // end of json text + Base::os_->Flush(); + return true; + } + + bool StartArray() { + PrettyPrefix(kArrayType); + new (Base::level_stack_.template Push<typename Base::Level>()) typename Base::Level(true); + return Base::WriteStartArray(); + } + + bool EndArray(SizeType memberCount = 0) { + (void)memberCount; + RAPIDJSON_ASSERT(Base::level_stack_.GetSize() >= sizeof(typename Base::Level)); + RAPIDJSON_ASSERT(Base::level_stack_.template Top<typename Base::Level>()->inArray); + bool empty = Base::level_stack_.template Pop<typename Base::Level>(1)->valueCount == 0; + + if (!empty && !(formatOptions_ & kFormatSingleLineArray)) { + Base::os_->Put('\n'); + WriteIndent(); + } + bool ret = Base::WriteEndArray(); + (void)ret; + RAPIDJSON_ASSERT(ret == true); + if (Base::level_stack_.Empty()) // end of json text + Base::os_->Flush(); + return true; + } + + //@} + + /*! @name Convenience extensions */ + //@{ + + //! Simpler but slower overload. + bool String(const Ch* str) { return String(str, internal::StrLen(str)); } + bool Key(const Ch* str) { return Key(str, internal::StrLen(str)); } + + //@} + + //! Write a raw JSON value. + /*! + For user to write a stringified JSON as a value. + + \param json A well-formed JSON value. It should not contain null character within [0, length - 1] range. + \param length Length of the json. + \param type Type of the root of json. + \note When using PrettyWriter::RawValue(), the result json may not be indented correctly. + */ + bool RawValue(const Ch* json, size_t length, Type type) { + RAPIDJSON_ASSERT(json != 0); + PrettyPrefix(type); + return Base::WriteRawValue(json, length); + } + +protected: + void PrettyPrefix(Type type) { + (void)type; + if (Base::level_stack_.GetSize() != 0) { // this value is not at root + typename Base::Level* level = Base::level_stack_.template Top<typename Base::Level>(); + + if (level->inArray) { + if (level->valueCount > 0) { + Base::os_->Put(','); // add comma if it is not the first element in array + if (formatOptions_ & kFormatSingleLineArray) + Base::os_->Put(' '); + } + + if (!(formatOptions_ & kFormatSingleLineArray)) { + Base::os_->Put('\n'); + WriteIndent(); + } + } + else { // in object + if (level->valueCount > 0) { + if (level->valueCount % 2 == 0) { + Base::os_->Put(','); + Base::os_->Put('\n'); + } + else { + Base::os_->Put(':'); + Base::os_->Put(' '); + } + } + else + Base::os_->Put('\n'); + + if (level->valueCount % 2 == 0) + WriteIndent(); + } + if (!level->inArray && level->valueCount % 2 == 0) + RAPIDJSON_ASSERT(type == kStringType); // if it's in object, then even number should be a name + level->valueCount++; + } + else { + RAPIDJSON_ASSERT(!Base::hasRoot_); // Should only has one and only one root. + Base::hasRoot_ = true; + } + } + + void WriteIndent() { + size_t count = (Base::level_stack_.GetSize() / sizeof(typename Base::Level)) * indentCharCount_; + PutN(*Base::os_, static_cast<typename TargetEncoding::Ch>(indentChar_), count); + } + + Ch indentChar_; + unsigned indentCharCount_; + PrettyFormatOptions formatOptions_; + +private: + // Prohibit copy constructor & assignment operator. + PrettyWriter(const PrettyWriter&); + PrettyWriter& operator=(const PrettyWriter&); +}; + +RAPIDJSON_NAMESPACE_END + +#if defined(__clang__) +RAPIDJSON_DIAG_POP +#endif + +#ifdef __GNUC__ +RAPIDJSON_DIAG_POP +#endif + +#endif // RAPIDJSON_RAPIDJSON_H_ diff --git a/xmrstak/rapidjson/rapidjson.h b/xmrstak/rapidjson/rapidjson.h new file mode 100644 index 0000000..053b2ce --- /dev/null +++ b/xmrstak/rapidjson/rapidjson.h @@ -0,0 +1,615 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_RAPIDJSON_H_ +#define RAPIDJSON_RAPIDJSON_H_ + +/*!\file rapidjson.h + \brief common definitions and configuration + + \see RAPIDJSON_CONFIG + */ + +/*! \defgroup RAPIDJSON_CONFIG RapidJSON configuration + \brief Configuration macros for library features + + Some RapidJSON features are configurable to adapt the library to a wide + variety of platforms, environments and usage scenarios. Most of the + features can be configured in terms of overriden or predefined + preprocessor macros at compile-time. + + Some additional customization is available in the \ref RAPIDJSON_ERRORS APIs. + + \note These macros should be given on the compiler command-line + (where applicable) to avoid inconsistent values when compiling + different translation units of a single application. + */ + +#include <cstdlib> // malloc(), realloc(), free(), size_t +#include <cstring> // memset(), memcpy(), memmove(), memcmp() + +/////////////////////////////////////////////////////////////////////////////// +// RAPIDJSON_VERSION_STRING +// +// ALWAYS synchronize the following 3 macros with corresponding variables in /CMakeLists.txt. +// + +//!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN +// token stringification +#define RAPIDJSON_STRINGIFY(x) RAPIDJSON_DO_STRINGIFY(x) +#define RAPIDJSON_DO_STRINGIFY(x) #x +//!@endcond + +/*! \def RAPIDJSON_MAJOR_VERSION + \ingroup RAPIDJSON_CONFIG + \brief Major version of RapidJSON in integer. +*/ +/*! \def RAPIDJSON_MINOR_VERSION + \ingroup RAPIDJSON_CONFIG + \brief Minor version of RapidJSON in integer. +*/ +/*! \def RAPIDJSON_PATCH_VERSION + \ingroup RAPIDJSON_CONFIG + \brief Patch version of RapidJSON in integer. +*/ +/*! \def RAPIDJSON_VERSION_STRING + \ingroup RAPIDJSON_CONFIG + \brief Version of RapidJSON in "<major>.<minor>.<patch>" string format. +*/ +#define RAPIDJSON_MAJOR_VERSION 1 +#define RAPIDJSON_MINOR_VERSION 1 +#define RAPIDJSON_PATCH_VERSION 0 +#define RAPIDJSON_VERSION_STRING \ + RAPIDJSON_STRINGIFY(RAPIDJSON_MAJOR_VERSION.RAPIDJSON_MINOR_VERSION.RAPIDJSON_PATCH_VERSION) + +/////////////////////////////////////////////////////////////////////////////// +// RAPIDJSON_NAMESPACE_(BEGIN|END) +/*! \def RAPIDJSON_NAMESPACE + \ingroup RAPIDJSON_CONFIG + \brief provide custom rapidjson namespace + + In order to avoid symbol clashes and/or "One Definition Rule" errors + between multiple inclusions of (different versions of) RapidJSON in + a single binary, users can customize the name of the main RapidJSON + namespace. + + In case of a single nesting level, defining \c RAPIDJSON_NAMESPACE + to a custom name (e.g. \c MyRapidJSON) is sufficient. If multiple + levels are needed, both \ref RAPIDJSON_NAMESPACE_BEGIN and \ref + RAPIDJSON_NAMESPACE_END need to be defined as well: + + \code + // in some .cpp file + #define RAPIDJSON_NAMESPACE my::rapidjson + #define RAPIDJSON_NAMESPACE_BEGIN namespace my { namespace rapidjson { + #define RAPIDJSON_NAMESPACE_END } } + #include "rapidjson/..." + \endcode + + \see rapidjson + */ +/*! \def RAPIDJSON_NAMESPACE_BEGIN + \ingroup RAPIDJSON_CONFIG + \brief provide custom rapidjson namespace (opening expression) + \see RAPIDJSON_NAMESPACE +*/ +/*! \def RAPIDJSON_NAMESPACE_END + \ingroup RAPIDJSON_CONFIG + \brief provide custom rapidjson namespace (closing expression) + \see RAPIDJSON_NAMESPACE +*/ +#ifndef RAPIDJSON_NAMESPACE +#define RAPIDJSON_NAMESPACE rapidjson +#endif +#ifndef RAPIDJSON_NAMESPACE_BEGIN +#define RAPIDJSON_NAMESPACE_BEGIN namespace RAPIDJSON_NAMESPACE { +#endif +#ifndef RAPIDJSON_NAMESPACE_END +#define RAPIDJSON_NAMESPACE_END } +#endif + +/////////////////////////////////////////////////////////////////////////////// +// RAPIDJSON_HAS_STDSTRING + +#ifndef RAPIDJSON_HAS_STDSTRING +#ifdef RAPIDJSON_DOXYGEN_RUNNING +#define RAPIDJSON_HAS_STDSTRING 1 // force generation of documentation +#else +#define RAPIDJSON_HAS_STDSTRING 0 // no std::string support by default +#endif +/*! \def RAPIDJSON_HAS_STDSTRING + \ingroup RAPIDJSON_CONFIG + \brief Enable RapidJSON support for \c std::string + + By defining this preprocessor symbol to \c 1, several convenience functions for using + \ref rapidjson::GenericValue with \c std::string are enabled, especially + for construction and comparison. + + \hideinitializer +*/ +#endif // !defined(RAPIDJSON_HAS_STDSTRING) + +#if RAPIDJSON_HAS_STDSTRING +#include <string> +#endif // RAPIDJSON_HAS_STDSTRING + +/////////////////////////////////////////////////////////////////////////////// +// RAPIDJSON_NO_INT64DEFINE + +/*! \def RAPIDJSON_NO_INT64DEFINE + \ingroup RAPIDJSON_CONFIG + \brief Use external 64-bit integer types. + + RapidJSON requires the 64-bit integer types \c int64_t and \c uint64_t types + to be available at global scope. + + If users have their own definition, define RAPIDJSON_NO_INT64DEFINE to + prevent RapidJSON from defining its own types. +*/ +#ifndef RAPIDJSON_NO_INT64DEFINE +//!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN +#if defined(_MSC_VER) && (_MSC_VER < 1800) // Visual Studio 2013 +#include "msinttypes/stdint.h" +#include "msinttypes/inttypes.h" +#else +// Other compilers should have this. +#include <stdint.h> +#include <inttypes.h> +#endif +//!@endcond +#ifdef RAPIDJSON_DOXYGEN_RUNNING +#define RAPIDJSON_NO_INT64DEFINE +#endif +#endif // RAPIDJSON_NO_INT64TYPEDEF + +/////////////////////////////////////////////////////////////////////////////// +// RAPIDJSON_FORCEINLINE + +#ifndef RAPIDJSON_FORCEINLINE +//!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN +#if defined(_MSC_VER) && defined(NDEBUG) +#define RAPIDJSON_FORCEINLINE __forceinline +#elif defined(__GNUC__) && __GNUC__ >= 4 && defined(NDEBUG) +#define RAPIDJSON_FORCEINLINE __attribute__((always_inline)) +#else +#define RAPIDJSON_FORCEINLINE +#endif +//!@endcond +#endif // RAPIDJSON_FORCEINLINE + +/////////////////////////////////////////////////////////////////////////////// +// RAPIDJSON_ENDIAN +#define RAPIDJSON_LITTLEENDIAN 0 //!< Little endian machine +#define RAPIDJSON_BIGENDIAN 1 //!< Big endian machine + +//! Endianness of the machine. +/*! + \def RAPIDJSON_ENDIAN + \ingroup RAPIDJSON_CONFIG + + GCC 4.6 provided macro for detecting endianness of the target machine. But other + compilers may not have this. User can define RAPIDJSON_ENDIAN to either + \ref RAPIDJSON_LITTLEENDIAN or \ref RAPIDJSON_BIGENDIAN. + + Default detection implemented with reference to + \li https://gcc.gnu.org/onlinedocs/gcc-4.6.0/cpp/Common-Predefined-Macros.html + \li http://www.boost.org/doc/libs/1_42_0/boost/detail/endian.hpp +*/ +#ifndef RAPIDJSON_ENDIAN +// Detect with GCC 4.6's macro +# ifdef __BYTE_ORDER__ +# if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +# define RAPIDJSON_ENDIAN RAPIDJSON_LITTLEENDIAN +# elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +# define RAPIDJSON_ENDIAN RAPIDJSON_BIGENDIAN +# else +# error Unknown machine endianess detected. User needs to define RAPIDJSON_ENDIAN. +# endif // __BYTE_ORDER__ +// Detect with GLIBC's endian.h +# elif defined(__GLIBC__) +# include <endian.h> +# if (__BYTE_ORDER == __LITTLE_ENDIAN) +# define RAPIDJSON_ENDIAN RAPIDJSON_LITTLEENDIAN +# elif (__BYTE_ORDER == __BIG_ENDIAN) +# define RAPIDJSON_ENDIAN RAPIDJSON_BIGENDIAN +# else +# error Unknown machine endianess detected. User needs to define RAPIDJSON_ENDIAN. +# endif // __GLIBC__ +// Detect with _LITTLE_ENDIAN and _BIG_ENDIAN macro +# elif defined(_LITTLE_ENDIAN) && !defined(_BIG_ENDIAN) +# define RAPIDJSON_ENDIAN RAPIDJSON_LITTLEENDIAN +# elif defined(_BIG_ENDIAN) && !defined(_LITTLE_ENDIAN) +# define RAPIDJSON_ENDIAN RAPIDJSON_BIGENDIAN +// Detect with architecture macros +# elif defined(__sparc) || defined(__sparc__) || defined(_POWER) || defined(__powerpc__) || defined(__ppc__) || defined(__hpux) || defined(__hppa) || defined(_MIPSEB) || defined(_POWER) || defined(__s390__) +# define RAPIDJSON_ENDIAN RAPIDJSON_BIGENDIAN +# elif defined(__i386__) || defined(__alpha__) || defined(__ia64) || defined(__ia64__) || defined(_M_IX86) || defined(_M_IA64) || defined(_M_ALPHA) || defined(__amd64) || defined(__amd64__) || defined(_M_AMD64) || defined(__x86_64) || defined(__x86_64__) || defined(_M_X64) || defined(__bfin__) +# define RAPIDJSON_ENDIAN RAPIDJSON_LITTLEENDIAN +# elif defined(_MSC_VER) && defined(_M_ARM) +# define RAPIDJSON_ENDIAN RAPIDJSON_LITTLEENDIAN +# elif defined(RAPIDJSON_DOXYGEN_RUNNING) +# define RAPIDJSON_ENDIAN +# else +# error Unknown machine endianess detected. User needs to define RAPIDJSON_ENDIAN. +# endif +#endif // RAPIDJSON_ENDIAN + +/////////////////////////////////////////////////////////////////////////////// +// RAPIDJSON_64BIT + +//! Whether using 64-bit architecture +#ifndef RAPIDJSON_64BIT +#if defined(__LP64__) || (defined(__x86_64__) && defined(__ILP32__)) || defined(_WIN64) || defined(__EMSCRIPTEN__) +#define RAPIDJSON_64BIT 1 +#else +#define RAPIDJSON_64BIT 0 +#endif +#endif // RAPIDJSON_64BIT + +/////////////////////////////////////////////////////////////////////////////// +// RAPIDJSON_ALIGN + +//! Data alignment of the machine. +/*! \ingroup RAPIDJSON_CONFIG + \param x pointer to align + + Some machines require strict data alignment. Currently the default uses 4 bytes + alignment on 32-bit platforms and 8 bytes alignment for 64-bit platforms. + User can customize by defining the RAPIDJSON_ALIGN function macro. +*/ +#ifndef RAPIDJSON_ALIGN +#if RAPIDJSON_64BIT == 1 +#define RAPIDJSON_ALIGN(x) (((x) + static_cast<uint64_t>(7u)) & ~static_cast<uint64_t>(7u)) +#else +#define RAPIDJSON_ALIGN(x) (((x) + 3u) & ~3u) +#endif +#endif + +/////////////////////////////////////////////////////////////////////////////// +// RAPIDJSON_UINT64_C2 + +//! Construct a 64-bit literal by a pair of 32-bit integer. +/*! + 64-bit literal with or without ULL suffix is prone to compiler warnings. + UINT64_C() is C macro which cause compilation problems. + Use this macro to define 64-bit constants by a pair of 32-bit integer. +*/ +#ifndef RAPIDJSON_UINT64_C2 +#define RAPIDJSON_UINT64_C2(high32, low32) ((static_cast<uint64_t>(high32) << 32) | static_cast<uint64_t>(low32)) +#endif + +/////////////////////////////////////////////////////////////////////////////// +// RAPIDJSON_48BITPOINTER_OPTIMIZATION + +//! Use only lower 48-bit address for some pointers. +/*! + \ingroup RAPIDJSON_CONFIG + + This optimization uses the fact that current X86-64 architecture only implement lower 48-bit virtual address. + The higher 16-bit can be used for storing other data. + \c GenericValue uses this optimization to reduce its size form 24 bytes to 16 bytes in 64-bit architecture. +*/ +#ifndef RAPIDJSON_48BITPOINTER_OPTIMIZATION +#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64) +#define RAPIDJSON_48BITPOINTER_OPTIMIZATION 1 +#else +#define RAPIDJSON_48BITPOINTER_OPTIMIZATION 0 +#endif +#endif // RAPIDJSON_48BITPOINTER_OPTIMIZATION + +#if RAPIDJSON_48BITPOINTER_OPTIMIZATION == 1 +#if RAPIDJSON_64BIT != 1 +#error RAPIDJSON_48BITPOINTER_OPTIMIZATION can only be set to 1 when RAPIDJSON_64BIT=1 +#endif +#define RAPIDJSON_SETPOINTER(type, p, x) (p = reinterpret_cast<type *>((reinterpret_cast<uintptr_t>(p) & static_cast<uintptr_t>(RAPIDJSON_UINT64_C2(0xFFFF0000, 0x00000000))) | reinterpret_cast<uintptr_t>(reinterpret_cast<const void*>(x)))) +#define RAPIDJSON_GETPOINTER(type, p) (reinterpret_cast<type *>(reinterpret_cast<uintptr_t>(p) & static_cast<uintptr_t>(RAPIDJSON_UINT64_C2(0x0000FFFF, 0xFFFFFFFF)))) +#else +#define RAPIDJSON_SETPOINTER(type, p, x) (p = (x)) +#define RAPIDJSON_GETPOINTER(type, p) (p) +#endif + +/////////////////////////////////////////////////////////////////////////////// +// RAPIDJSON_SSE2/RAPIDJSON_SSE42/RAPIDJSON_SIMD + +/*! \def RAPIDJSON_SIMD + \ingroup RAPIDJSON_CONFIG + \brief Enable SSE2/SSE4.2 optimization. + + RapidJSON supports optimized implementations for some parsing operations + based on the SSE2 or SSE4.2 SIMD extensions on modern Intel-compatible + processors. + + To enable these optimizations, two different symbols can be defined; + \code + // Enable SSE2 optimization. + #define RAPIDJSON_SSE2 + + // Enable SSE4.2 optimization. + #define RAPIDJSON_SSE42 + \endcode + + \c RAPIDJSON_SSE42 takes precedence, if both are defined. + + If any of these symbols is defined, RapidJSON defines the macro + \c RAPIDJSON_SIMD to indicate the availability of the optimized code. +*/ +#if defined(RAPIDJSON_SSE2) || defined(RAPIDJSON_SSE42) \ + || defined(RAPIDJSON_DOXYGEN_RUNNING) +#define RAPIDJSON_SIMD +#endif + +/////////////////////////////////////////////////////////////////////////////// +// RAPIDJSON_NO_SIZETYPEDEFINE + +#ifndef RAPIDJSON_NO_SIZETYPEDEFINE +/*! \def RAPIDJSON_NO_SIZETYPEDEFINE + \ingroup RAPIDJSON_CONFIG + \brief User-provided \c SizeType definition. + + In order to avoid using 32-bit size types for indexing strings and arrays, + define this preprocessor symbol and provide the type rapidjson::SizeType + before including RapidJSON: + \code + #define RAPIDJSON_NO_SIZETYPEDEFINE + namespace rapidjson { typedef ::std::size_t SizeType; } + #include "rapidjson/..." + \endcode + + \see rapidjson::SizeType +*/ +#ifdef RAPIDJSON_DOXYGEN_RUNNING +#define RAPIDJSON_NO_SIZETYPEDEFINE +#endif +RAPIDJSON_NAMESPACE_BEGIN +//! Size type (for string lengths, array sizes, etc.) +/*! RapidJSON uses 32-bit array/string indices even on 64-bit platforms, + instead of using \c size_t. Users may override the SizeType by defining + \ref RAPIDJSON_NO_SIZETYPEDEFINE. +*/ +typedef unsigned SizeType; +RAPIDJSON_NAMESPACE_END +#endif + +// always import std::size_t to rapidjson namespace +RAPIDJSON_NAMESPACE_BEGIN +using std::size_t; +RAPIDJSON_NAMESPACE_END + +/////////////////////////////////////////////////////////////////////////////// +// RAPIDJSON_ASSERT + +//! Assertion. +/*! \ingroup RAPIDJSON_CONFIG + By default, rapidjson uses C \c assert() for internal assertions. + User can override it by defining RAPIDJSON_ASSERT(x) macro. + + \note Parsing errors are handled and can be customized by the + \ref RAPIDJSON_ERRORS APIs. +*/ +#ifndef RAPIDJSON_ASSERT +#include <cassert> +#define RAPIDJSON_ASSERT(x) assert(x) +#endif // RAPIDJSON_ASSERT + +/////////////////////////////////////////////////////////////////////////////// +// RAPIDJSON_STATIC_ASSERT + +// Adopt from boost +#ifndef RAPIDJSON_STATIC_ASSERT +#ifndef __clang__ +//!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN +#endif +RAPIDJSON_NAMESPACE_BEGIN +template <bool x> struct STATIC_ASSERTION_FAILURE; +template <> struct STATIC_ASSERTION_FAILURE<true> { enum { value = 1 }; }; +template<int x> struct StaticAssertTest {}; +RAPIDJSON_NAMESPACE_END + +#define RAPIDJSON_JOIN(X, Y) RAPIDJSON_DO_JOIN(X, Y) +#define RAPIDJSON_DO_JOIN(X, Y) RAPIDJSON_DO_JOIN2(X, Y) +#define RAPIDJSON_DO_JOIN2(X, Y) X##Y + +#if defined(__GNUC__) +#define RAPIDJSON_STATIC_ASSERT_UNUSED_ATTRIBUTE __attribute__((unused)) +#else +#define RAPIDJSON_STATIC_ASSERT_UNUSED_ATTRIBUTE +#endif +#ifndef __clang__ +//!@endcond +#endif + +/*! \def RAPIDJSON_STATIC_ASSERT + \brief (Internal) macro to check for conditions at compile-time + \param x compile-time condition + \hideinitializer + */ +#define RAPIDJSON_STATIC_ASSERT(x) \ + typedef ::RAPIDJSON_NAMESPACE::StaticAssertTest< \ + sizeof(::RAPIDJSON_NAMESPACE::STATIC_ASSERTION_FAILURE<bool(x) >)> \ + RAPIDJSON_JOIN(StaticAssertTypedef, __LINE__) RAPIDJSON_STATIC_ASSERT_UNUSED_ATTRIBUTE +#endif + +/////////////////////////////////////////////////////////////////////////////// +// RAPIDJSON_LIKELY, RAPIDJSON_UNLIKELY + +//! Compiler branching hint for expression with high probability to be true. +/*! + \ingroup RAPIDJSON_CONFIG + \param x Boolean expression likely to be true. +*/ +#ifndef RAPIDJSON_LIKELY +#if defined(__GNUC__) || defined(__clang__) +#define RAPIDJSON_LIKELY(x) __builtin_expect(!!(x), 1) +#else +#define RAPIDJSON_LIKELY(x) (x) +#endif +#endif + +//! Compiler branching hint for expression with low probability to be true. +/*! + \ingroup RAPIDJSON_CONFIG + \param x Boolean expression unlikely to be true. +*/ +#ifndef RAPIDJSON_UNLIKELY +#if defined(__GNUC__) || defined(__clang__) +#define RAPIDJSON_UNLIKELY(x) __builtin_expect(!!(x), 0) +#else +#define RAPIDJSON_UNLIKELY(x) (x) +#endif +#endif + +/////////////////////////////////////////////////////////////////////////////// +// Helpers + +//!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN + +#define RAPIDJSON_MULTILINEMACRO_BEGIN do { +#define RAPIDJSON_MULTILINEMACRO_END \ +} while((void)0, 0) + +// adopted from Boost +#define RAPIDJSON_VERSION_CODE(x,y,z) \ + (((x)*100000) + ((y)*100) + (z)) + +/////////////////////////////////////////////////////////////////////////////// +// RAPIDJSON_DIAG_PUSH/POP, RAPIDJSON_DIAG_OFF + +#if defined(__GNUC__) +#define RAPIDJSON_GNUC \ + RAPIDJSON_VERSION_CODE(__GNUC__,__GNUC_MINOR__,__GNUC_PATCHLEVEL__) +#endif + +#if defined(__clang__) || (defined(RAPIDJSON_GNUC) && RAPIDJSON_GNUC >= RAPIDJSON_VERSION_CODE(4,2,0)) + +#define RAPIDJSON_PRAGMA(x) _Pragma(RAPIDJSON_STRINGIFY(x)) +#define RAPIDJSON_DIAG_PRAGMA(x) RAPIDJSON_PRAGMA(GCC diagnostic x) +#define RAPIDJSON_DIAG_OFF(x) \ + RAPIDJSON_DIAG_PRAGMA(ignored RAPIDJSON_STRINGIFY(RAPIDJSON_JOIN(-W,x))) + +// push/pop support in Clang and GCC>=4.6 +#if defined(__clang__) || (defined(RAPIDJSON_GNUC) && RAPIDJSON_GNUC >= RAPIDJSON_VERSION_CODE(4,6,0)) +#define RAPIDJSON_DIAG_PUSH RAPIDJSON_DIAG_PRAGMA(push) +#define RAPIDJSON_DIAG_POP RAPIDJSON_DIAG_PRAGMA(pop) +#else // GCC >= 4.2, < 4.6 +#define RAPIDJSON_DIAG_PUSH /* ignored */ +#define RAPIDJSON_DIAG_POP /* ignored */ +#endif + +#elif defined(_MSC_VER) + +// pragma (MSVC specific) +#define RAPIDJSON_PRAGMA(x) __pragma(x) +#define RAPIDJSON_DIAG_PRAGMA(x) RAPIDJSON_PRAGMA(warning(x)) + +#define RAPIDJSON_DIAG_OFF(x) RAPIDJSON_DIAG_PRAGMA(disable: x) +#define RAPIDJSON_DIAG_PUSH RAPIDJSON_DIAG_PRAGMA(push) +#define RAPIDJSON_DIAG_POP RAPIDJSON_DIAG_PRAGMA(pop) + +#else + +#define RAPIDJSON_DIAG_OFF(x) /* ignored */ +#define RAPIDJSON_DIAG_PUSH /* ignored */ +#define RAPIDJSON_DIAG_POP /* ignored */ + +#endif // RAPIDJSON_DIAG_* + +/////////////////////////////////////////////////////////////////////////////// +// C++11 features + +#ifndef RAPIDJSON_HAS_CXX11_RVALUE_REFS +#if defined(__clang__) +#if __has_feature(cxx_rvalue_references) && \ + (defined(_LIBCPP_VERSION) || defined(__GLIBCXX__) && __GLIBCXX__ >= 20080306) +#define RAPIDJSON_HAS_CXX11_RVALUE_REFS 1 +#else +#define RAPIDJSON_HAS_CXX11_RVALUE_REFS 0 +#endif +#elif (defined(RAPIDJSON_GNUC) && (RAPIDJSON_GNUC >= RAPIDJSON_VERSION_CODE(4,3,0)) && defined(__GXX_EXPERIMENTAL_CXX0X__)) || \ + (defined(_MSC_VER) && _MSC_VER >= 1600) + +#define RAPIDJSON_HAS_CXX11_RVALUE_REFS 1 +#else +#define RAPIDJSON_HAS_CXX11_RVALUE_REFS 0 +#endif +#endif // RAPIDJSON_HAS_CXX11_RVALUE_REFS + +#ifndef RAPIDJSON_HAS_CXX11_NOEXCEPT +#if defined(__clang__) +#define RAPIDJSON_HAS_CXX11_NOEXCEPT __has_feature(cxx_noexcept) +#elif (defined(RAPIDJSON_GNUC) && (RAPIDJSON_GNUC >= RAPIDJSON_VERSION_CODE(4,6,0)) && defined(__GXX_EXPERIMENTAL_CXX0X__)) +// (defined(_MSC_VER) && _MSC_VER >= ????) // not yet supported +#define RAPIDJSON_HAS_CXX11_NOEXCEPT 1 +#else +#define RAPIDJSON_HAS_CXX11_NOEXCEPT 0 +#endif +#endif +#if RAPIDJSON_HAS_CXX11_NOEXCEPT +#define RAPIDJSON_NOEXCEPT noexcept +#else +#define RAPIDJSON_NOEXCEPT /* noexcept */ +#endif // RAPIDJSON_HAS_CXX11_NOEXCEPT + +// no automatic detection, yet +#ifndef RAPIDJSON_HAS_CXX11_TYPETRAITS +#define RAPIDJSON_HAS_CXX11_TYPETRAITS 0 +#endif + +#ifndef RAPIDJSON_HAS_CXX11_RANGE_FOR +#if defined(__clang__) +#define RAPIDJSON_HAS_CXX11_RANGE_FOR __has_feature(cxx_range_for) +#elif (defined(RAPIDJSON_GNUC) && (RAPIDJSON_GNUC >= RAPIDJSON_VERSION_CODE(4,3,0)) && defined(__GXX_EXPERIMENTAL_CXX0X__)) || \ + (defined(_MSC_VER) && _MSC_VER >= 1700) +#define RAPIDJSON_HAS_CXX11_RANGE_FOR 1 +#else +#define RAPIDJSON_HAS_CXX11_RANGE_FOR 0 +#endif +#endif // RAPIDJSON_HAS_CXX11_RANGE_FOR + +//!@endcond + +/////////////////////////////////////////////////////////////////////////////// +// new/delete + +#ifndef RAPIDJSON_NEW +///! customization point for global \c new +#define RAPIDJSON_NEW(x) new x +#endif +#ifndef RAPIDJSON_DELETE +///! customization point for global \c delete +#define RAPIDJSON_DELETE(x) delete x +#endif + +/////////////////////////////////////////////////////////////////////////////// +// Type + +/*! \namespace rapidjson + \brief main RapidJSON namespace + \see RAPIDJSON_NAMESPACE +*/ +RAPIDJSON_NAMESPACE_BEGIN + +//! Type of JSON value +enum Type { + kNullType = 0, //!< null + kFalseType = 1, //!< false + kTrueType = 2, //!< true + kObjectType = 3, //!< object + kArrayType = 4, //!< array + kStringType = 5, //!< string + kNumberType = 6 //!< number +}; + +RAPIDJSON_NAMESPACE_END + +#endif // RAPIDJSON_RAPIDJSON_H_ diff --git a/xmrstak/rapidjson/reader.h b/xmrstak/rapidjson/reader.h new file mode 100644 index 0000000..71916c0 --- /dev/null +++ b/xmrstak/rapidjson/reader.h @@ -0,0 +1,1862 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_READER_H_ +#define RAPIDJSON_READER_H_ + +/*! \file reader.h */ + +#include "allocators.h" +#include "stream.h" +#include "encodedstream.h" +#include "internal/meta.h" +#include "internal/stack.h" +#include "internal/strtod.h" +#include <limits> + +#if defined(RAPIDJSON_SIMD) && defined(_MSC_VER) +#include <intrin.h> +#pragma intrinsic(_BitScanForward) +#endif +#ifdef RAPIDJSON_SSE42 +#include <nmmintrin.h> +#elif defined(RAPIDJSON_SSE2) +#include <emmintrin.h> +#endif + +#ifdef _MSC_VER +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(4127) // conditional expression is constant +RAPIDJSON_DIAG_OFF(4702) // unreachable code +#endif + +#ifdef __clang__ +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(old-style-cast) +RAPIDJSON_DIAG_OFF(padded) +RAPIDJSON_DIAG_OFF(switch-enum) +#endif + +#ifdef __GNUC__ +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(effc++) +#endif + +//!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN +#define RAPIDJSON_NOTHING /* deliberately empty */ +#ifndef RAPIDJSON_PARSE_ERROR_EARLY_RETURN +#define RAPIDJSON_PARSE_ERROR_EARLY_RETURN(value) \ + RAPIDJSON_MULTILINEMACRO_BEGIN \ + if (RAPIDJSON_UNLIKELY(HasParseError())) { return value; } \ + RAPIDJSON_MULTILINEMACRO_END +#endif +#define RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID \ + RAPIDJSON_PARSE_ERROR_EARLY_RETURN(RAPIDJSON_NOTHING) +//!@endcond + +/*! \def RAPIDJSON_PARSE_ERROR_NORETURN + \ingroup RAPIDJSON_ERRORS + \brief Macro to indicate a parse error. + \param parseErrorCode \ref rapidjson::ParseErrorCode of the error + \param offset position of the error in JSON input (\c size_t) + + This macros can be used as a customization point for the internal + error handling mechanism of RapidJSON. + + A common usage model is to throw an exception instead of requiring the + caller to explicitly check the \ref rapidjson::GenericReader::Parse's + return value: + + \code + #define RAPIDJSON_PARSE_ERROR_NORETURN(parseErrorCode,offset) \ + throw ParseException(parseErrorCode, #parseErrorCode, offset) + + #include <stdexcept> // std::runtime_error + #include "rapidjson/error/error.h" // rapidjson::ParseResult + + struct ParseException : std::runtime_error, rapidjson::ParseResult { + ParseException(rapidjson::ParseErrorCode code, const char* msg, size_t offset) + : std::runtime_error(msg), ParseResult(code, offset) {} + }; + + #include "rapidjson/reader.h" + \endcode + + \see RAPIDJSON_PARSE_ERROR, rapidjson::GenericReader::Parse + */ +#ifndef RAPIDJSON_PARSE_ERROR_NORETURN +#define RAPIDJSON_PARSE_ERROR_NORETURN(parseErrorCode, offset) \ + RAPIDJSON_MULTILINEMACRO_BEGIN \ + RAPIDJSON_ASSERT(!HasParseError()); /* Error can only be assigned once */ \ + SetParseError(parseErrorCode, offset); \ + RAPIDJSON_MULTILINEMACRO_END +#endif + +/*! \def RAPIDJSON_PARSE_ERROR + \ingroup RAPIDJSON_ERRORS + \brief (Internal) macro to indicate and handle a parse error. + \param parseErrorCode \ref rapidjson::ParseErrorCode of the error + \param offset position of the error in JSON input (\c size_t) + + Invokes RAPIDJSON_PARSE_ERROR_NORETURN and stops the parsing. + + \see RAPIDJSON_PARSE_ERROR_NORETURN + \hideinitializer + */ +#ifndef RAPIDJSON_PARSE_ERROR +#define RAPIDJSON_PARSE_ERROR(parseErrorCode, offset) \ + RAPIDJSON_MULTILINEMACRO_BEGIN \ + RAPIDJSON_PARSE_ERROR_NORETURN(parseErrorCode, offset); \ + RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; \ + RAPIDJSON_MULTILINEMACRO_END +#endif + +#include "error/error.h" // ParseErrorCode, ParseResult + +RAPIDJSON_NAMESPACE_BEGIN + +/////////////////////////////////////////////////////////////////////////////// +// ParseFlag + +/*! \def RAPIDJSON_PARSE_DEFAULT_FLAGS + \ingroup RAPIDJSON_CONFIG + \brief User-defined kParseDefaultFlags definition. + + User can define this as any \c ParseFlag combinations. +*/ +#ifndef RAPIDJSON_PARSE_DEFAULT_FLAGS +#define RAPIDJSON_PARSE_DEFAULT_FLAGS kParseNoFlags +#endif + +//! Combination of parseFlags +/*! \see Reader::Parse, Document::Parse, Document::ParseInsitu, Document::ParseStream + */ +enum ParseFlag { + kParseNoFlags = 0, //!< No flags are set. + kParseInsituFlag = 1, //!< In-situ(destructive) parsing. + kParseValidateEncodingFlag = 2, //!< Validate encoding of JSON strings. + kParseIterativeFlag = 4, //!< Iterative(constant complexity in terms of function call stack size) parsing. + kParseStopWhenDoneFlag = 8, //!< After parsing a complete JSON root from stream, stop further processing the rest of stream. When this flag is used, parser will not generate kParseErrorDocumentRootNotSingular error. + kParseFullPrecisionFlag = 16, //!< Parse number in full precision (but slower). + kParseCommentsFlag = 32, //!< Allow one-line (//) and multi-line (/**/) comments. + kParseNumbersAsStringsFlag = 64, //!< Parse all numbers (ints/doubles) as strings. + kParseTrailingCommasFlag = 128, //!< Allow trailing commas at the end of objects and arrays. + kParseNanAndInfFlag = 256, //!< Allow parsing NaN, Inf, Infinity, -Inf and -Infinity as doubles. + kParseDefaultFlags = RAPIDJSON_PARSE_DEFAULT_FLAGS //!< Default parse flags. Can be customized by defining RAPIDJSON_PARSE_DEFAULT_FLAGS +}; + +/////////////////////////////////////////////////////////////////////////////// +// Handler + +/*! \class rapidjson::Handler + \brief Concept for receiving events from GenericReader upon parsing. + The functions return true if no error occurs. If they return false, + the event publisher should terminate the process. +\code +concept Handler { + typename Ch; + + bool Null(); + bool Bool(bool b); + bool Int(int i); + bool Uint(unsigned i); + bool Int64(int64_t i); + bool Uint64(uint64_t i); + bool Double(double d); + /// enabled via kParseNumbersAsStringsFlag, string is not null-terminated (use length) + bool RawNumber(const Ch* str, SizeType length, bool copy); + bool String(const Ch* str, SizeType length, bool copy); + bool StartObject(); + bool Key(const Ch* str, SizeType length, bool copy); + bool EndObject(SizeType memberCount); + bool StartArray(); + bool EndArray(SizeType elementCount); +}; +\endcode +*/ +/////////////////////////////////////////////////////////////////////////////// +// BaseReaderHandler + +//! Default implementation of Handler. +/*! This can be used as base class of any reader handler. + \note implements Handler concept +*/ +template<typename Encoding = UTF8<>, typename Derived = void> +struct BaseReaderHandler { + typedef typename Encoding::Ch Ch; + + typedef typename internal::SelectIf<internal::IsSame<Derived, void>, BaseReaderHandler, Derived>::Type Override; + + bool Default() { return true; } + bool Null() { return static_cast<Override&>(*this).Default(); } + bool Bool(bool) { return static_cast<Override&>(*this).Default(); } + bool Int(int) { return static_cast<Override&>(*this).Default(); } + bool Uint(unsigned) { return static_cast<Override&>(*this).Default(); } + bool Int64(int64_t) { return static_cast<Override&>(*this).Default(); } + bool Uint64(uint64_t) { return static_cast<Override&>(*this).Default(); } + bool Double(double) { return static_cast<Override&>(*this).Default(); } + /// enabled via kParseNumbersAsStringsFlag, string is not null-terminated (use length) + bool RawNumber(const Ch* str, SizeType len, bool copy) { return static_cast<Override&>(*this).String(str, len, copy); } + bool String(const Ch*, SizeType, bool) { return static_cast<Override&>(*this).Default(); } + bool StartObject() { return static_cast<Override&>(*this).Default(); } + bool Key(const Ch* str, SizeType len, bool copy) { return static_cast<Override&>(*this).String(str, len, copy); } + bool EndObject(SizeType) { return static_cast<Override&>(*this).Default(); } + bool StartArray() { return static_cast<Override&>(*this).Default(); } + bool EndArray(SizeType) { return static_cast<Override&>(*this).Default(); } +}; + +/////////////////////////////////////////////////////////////////////////////// +// StreamLocalCopy + +namespace internal { + +template<typename Stream, int = StreamTraits<Stream>::copyOptimization> +class StreamLocalCopy; + +//! Do copy optimization. +template<typename Stream> +class StreamLocalCopy<Stream, 1> { +public: + StreamLocalCopy(Stream& original) : s(original), original_(original) {} + ~StreamLocalCopy() { original_ = s; } + + Stream s; + +private: + StreamLocalCopy& operator=(const StreamLocalCopy&) /* = delete */; + + Stream& original_; +}; + +//! Keep reference. +template<typename Stream> +class StreamLocalCopy<Stream, 0> { +public: + StreamLocalCopy(Stream& original) : s(original) {} + + Stream& s; + +private: + StreamLocalCopy& operator=(const StreamLocalCopy&) /* = delete */; +}; + +} // namespace internal + +/////////////////////////////////////////////////////////////////////////////// +// SkipWhitespace + +//! Skip the JSON white spaces in a stream. +/*! \param is A input stream for skipping white spaces. + \note This function has SSE2/SSE4.2 specialization. +*/ +template<typename InputStream> +void SkipWhitespace(InputStream& is) { + internal::StreamLocalCopy<InputStream> copy(is); + InputStream& s(copy.s); + + typename InputStream::Ch c; + while ((c = s.Peek()) == ' ' || c == '\n' || c == '\r' || c == '\t') + s.Take(); +} + +inline const char* SkipWhitespace(const char* p, const char* end) { + while (p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')) + ++p; + return p; +} + +#ifdef RAPIDJSON_SSE42 +//! Skip whitespace with SSE 4.2 pcmpistrm instruction, testing 16 8-byte characters at once. +inline const char *SkipWhitespace_SIMD(const char* p) { + // Fast return for single non-whitespace + if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t') + ++p; + else + return p; + + // 16-byte align to the next boundary + const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15)); + while (p != nextAligned) + if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t') + ++p; + else + return p; + + // The rest of string using SIMD + static const char whitespace[16] = " \n\r\t"; + const __m128i w = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespace[0])); + + for (;; p += 16) { + const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p)); + const int r = _mm_cmpistri(w, s, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_LEAST_SIGNIFICANT | _SIDD_NEGATIVE_POLARITY); + if (r != 16) // some of characters is non-whitespace + return p + r; + } +} + +inline const char *SkipWhitespace_SIMD(const char* p, const char* end) { + // Fast return for single non-whitespace + if (p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')) + ++p; + else + return p; + + // The middle of string using SIMD + static const char whitespace[16] = " \n\r\t"; + const __m128i w = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespace[0])); + + for (; p <= end - 16; p += 16) { + const __m128i s = _mm_loadu_si128(reinterpret_cast<const __m128i *>(p)); + const int r = _mm_cmpistri(w, s, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_LEAST_SIGNIFICANT | _SIDD_NEGATIVE_POLARITY); + if (r != 16) // some of characters is non-whitespace + return p + r; + } + + return SkipWhitespace(p, end); +} + +#elif defined(RAPIDJSON_SSE2) + +//! Skip whitespace with SSE2 instructions, testing 16 8-byte characters at once. +inline const char *SkipWhitespace_SIMD(const char* p) { + // Fast return for single non-whitespace + if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t') + ++p; + else + return p; + + // 16-byte align to the next boundary + const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15)); + while (p != nextAligned) + if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t') + ++p; + else + return p; + + // The rest of string + #define C16(c) { c, c, c, c, c, c, c, c, c, c, c, c, c, c, c, c } + static const char whitespaces[4][16] = { C16(' '), C16('\n'), C16('\r'), C16('\t') }; + #undef C16 + + const __m128i w0 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[0][0])); + const __m128i w1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[1][0])); + const __m128i w2 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[2][0])); + const __m128i w3 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[3][0])); + + for (;; p += 16) { + const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p)); + __m128i x = _mm_cmpeq_epi8(s, w0); + x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w1)); + x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w2)); + x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w3)); + unsigned short r = static_cast<unsigned short>(~_mm_movemask_epi8(x)); + if (r != 0) { // some of characters may be non-whitespace +#ifdef _MSC_VER // Find the index of first non-whitespace + unsigned long offset; + _BitScanForward(&offset, r); + return p + offset; +#else + return p + __builtin_ffs(r) - 1; +#endif + } + } +} + +inline const char *SkipWhitespace_SIMD(const char* p, const char* end) { + // Fast return for single non-whitespace + if (p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')) + ++p; + else + return p; + + // The rest of string + #define C16(c) { c, c, c, c, c, c, c, c, c, c, c, c, c, c, c, c } + static const char whitespaces[4][16] = { C16(' '), C16('\n'), C16('\r'), C16('\t') }; + #undef C16 + + const __m128i w0 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[0][0])); + const __m128i w1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[1][0])); + const __m128i w2 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[2][0])); + const __m128i w3 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[3][0])); + + for (; p <= end - 16; p += 16) { + const __m128i s = _mm_loadu_si128(reinterpret_cast<const __m128i *>(p)); + __m128i x = _mm_cmpeq_epi8(s, w0); + x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w1)); + x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w2)); + x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w3)); + unsigned short r = static_cast<unsigned short>(~_mm_movemask_epi8(x)); + if (r != 0) { // some of characters may be non-whitespace +#ifdef _MSC_VER // Find the index of first non-whitespace + unsigned long offset; + _BitScanForward(&offset, r); + return p + offset; +#else + return p + __builtin_ffs(r) - 1; +#endif + } + } + + return SkipWhitespace(p, end); +} + +#endif // RAPIDJSON_SSE2 + +#ifdef RAPIDJSON_SIMD +//! Template function specialization for InsituStringStream +template<> inline void SkipWhitespace(InsituStringStream& is) { + is.src_ = const_cast<char*>(SkipWhitespace_SIMD(is.src_)); +} + +//! Template function specialization for StringStream +template<> inline void SkipWhitespace(StringStream& is) { + is.src_ = SkipWhitespace_SIMD(is.src_); +} + +template<> inline void SkipWhitespace(EncodedInputStream<UTF8<>, MemoryStream>& is) { + is.is_.src_ = SkipWhitespace_SIMD(is.is_.src_, is.is_.end_); +} +#endif // RAPIDJSON_SIMD + +/////////////////////////////////////////////////////////////////////////////// +// GenericReader + +//! SAX-style JSON parser. Use \ref Reader for UTF8 encoding and default allocator. +/*! GenericReader parses JSON text from a stream, and send events synchronously to an + object implementing Handler concept. + + It needs to allocate a stack for storing a single decoded string during + non-destructive parsing. + + For in-situ parsing, the decoded string is directly written to the source + text string, no temporary buffer is required. + + A GenericReader object can be reused for parsing multiple JSON text. + + \tparam SourceEncoding Encoding of the input stream. + \tparam TargetEncoding Encoding of the parse output. + \tparam StackAllocator Allocator type for stack. +*/ +template <typename SourceEncoding, typename TargetEncoding, typename StackAllocator = CrtAllocator> +class GenericReader { +public: + typedef typename SourceEncoding::Ch Ch; //!< SourceEncoding character type + + //! Constructor. + /*! \param stackAllocator Optional allocator for allocating stack memory. (Only use for non-destructive parsing) + \param stackCapacity stack capacity in bytes for storing a single decoded string. (Only use for non-destructive parsing) + */ + GenericReader(StackAllocator* stackAllocator = 0, size_t stackCapacity = kDefaultStackCapacity) : stack_(stackAllocator, stackCapacity), parseResult_() {} + + //! Parse JSON text. + /*! \tparam parseFlags Combination of \ref ParseFlag. + \tparam InputStream Type of input stream, implementing Stream concept. + \tparam Handler Type of handler, implementing Handler concept. + \param is Input stream to be parsed. + \param handler The handler to receive events. + \return Whether the parsing is successful. + */ + template <unsigned parseFlags, typename InputStream, typename Handler> + ParseResult Parse(InputStream& is, Handler& handler) { + if (parseFlags & kParseIterativeFlag) + return IterativeParse<parseFlags>(is, handler); + + parseResult_.Clear(); + + ClearStackOnExit scope(*this); + + SkipWhitespaceAndComments<parseFlags>(is); + RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_); + + if (RAPIDJSON_UNLIKELY(is.Peek() == '\0')) { + RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorDocumentEmpty, is.Tell()); + RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_); + } + else { + ParseValue<parseFlags>(is, handler); + RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_); + + if (!(parseFlags & kParseStopWhenDoneFlag)) { + SkipWhitespaceAndComments<parseFlags>(is); + RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_); + + if (RAPIDJSON_UNLIKELY(is.Peek() != '\0')) { + RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorDocumentRootNotSingular, is.Tell()); + RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_); + } + } + } + + return parseResult_; + } + + //! Parse JSON text (with \ref kParseDefaultFlags) + /*! \tparam InputStream Type of input stream, implementing Stream concept + \tparam Handler Type of handler, implementing Handler concept. + \param is Input stream to be parsed. + \param handler The handler to receive events. + \return Whether the parsing is successful. + */ + template <typename InputStream, typename Handler> + ParseResult Parse(InputStream& is, Handler& handler) { + return Parse<kParseDefaultFlags>(is, handler); + } + + //! Whether a parse error has occured in the last parsing. + bool HasParseError() const { return parseResult_.IsError(); } + + //! Get the \ref ParseErrorCode of last parsing. + ParseErrorCode GetParseErrorCode() const { return parseResult_.Code(); } + + //! Get the position of last parsing error in input, 0 otherwise. + size_t GetErrorOffset() const { return parseResult_.Offset(); } + +protected: + void SetParseError(ParseErrorCode code, size_t offset) { parseResult_.Set(code, offset); } + +private: + // Prohibit copy constructor & assignment operator. + GenericReader(const GenericReader&); + GenericReader& operator=(const GenericReader&); + + void ClearStack() { stack_.Clear(); } + + // clear stack on any exit from ParseStream, e.g. due to exception + struct ClearStackOnExit { + explicit ClearStackOnExit(GenericReader& r) : r_(r) {} + ~ClearStackOnExit() { r_.ClearStack(); } + private: + GenericReader& r_; + ClearStackOnExit(const ClearStackOnExit&); + ClearStackOnExit& operator=(const ClearStackOnExit&); + }; + + template<unsigned parseFlags, typename InputStream> + void SkipWhitespaceAndComments(InputStream& is) { + SkipWhitespace(is); + + if (parseFlags & kParseCommentsFlag) { + while (RAPIDJSON_UNLIKELY(Consume(is, '/'))) { + if (Consume(is, '*')) { + while (true) { + if (RAPIDJSON_UNLIKELY(is.Peek() == '\0')) + RAPIDJSON_PARSE_ERROR(kParseErrorUnspecificSyntaxError, is.Tell()); + else if (Consume(is, '*')) { + if (Consume(is, '/')) + break; + } + else + is.Take(); + } + } + else if (RAPIDJSON_LIKELY(Consume(is, '/'))) + while (is.Peek() != '\0' && is.Take() != '\n') {} + else + RAPIDJSON_PARSE_ERROR(kParseErrorUnspecificSyntaxError, is.Tell()); + + SkipWhitespace(is); + } + } + } + + // Parse object: { string : value, ... } + template<unsigned parseFlags, typename InputStream, typename Handler> + void ParseObject(InputStream& is, Handler& handler) { + RAPIDJSON_ASSERT(is.Peek() == '{'); + is.Take(); // Skip '{' + + if (RAPIDJSON_UNLIKELY(!handler.StartObject())) + RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); + + SkipWhitespaceAndComments<parseFlags>(is); + RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; + + if (Consume(is, '}')) { + if (RAPIDJSON_UNLIKELY(!handler.EndObject(0))) // empty object + RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); + return; + } + + for (SizeType memberCount = 0;;) { + if (RAPIDJSON_UNLIKELY(is.Peek() != '"')) + RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissName, is.Tell()); + + ParseString<parseFlags>(is, handler, true); + RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; + + SkipWhitespaceAndComments<parseFlags>(is); + RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; + + if (RAPIDJSON_UNLIKELY(!Consume(is, ':'))) + RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissColon, is.Tell()); + + SkipWhitespaceAndComments<parseFlags>(is); + RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; + + ParseValue<parseFlags>(is, handler); + RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; + + SkipWhitespaceAndComments<parseFlags>(is); + RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; + + ++memberCount; + + switch (is.Peek()) { + case ',': + is.Take(); + SkipWhitespaceAndComments<parseFlags>(is); + RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; + break; + case '}': + is.Take(); + if (RAPIDJSON_UNLIKELY(!handler.EndObject(memberCount))) + RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); + return; + default: + RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissCommaOrCurlyBracket, is.Tell()); break; // This useless break is only for making warning and coverage happy + } + + if (parseFlags & kParseTrailingCommasFlag) { + if (is.Peek() == '}') { + if (RAPIDJSON_UNLIKELY(!handler.EndObject(memberCount))) + RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); + is.Take(); + return; + } + } + } + } + + // Parse array: [ value, ... ] + template<unsigned parseFlags, typename InputStream, typename Handler> + void ParseArray(InputStream& is, Handler& handler) { + RAPIDJSON_ASSERT(is.Peek() == '['); + is.Take(); // Skip '[' + + if (RAPIDJSON_UNLIKELY(!handler.StartArray())) + RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); + + SkipWhitespaceAndComments<parseFlags>(is); + RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; + + if (Consume(is, ']')) { + if (RAPIDJSON_UNLIKELY(!handler.EndArray(0))) // empty array + RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); + return; + } + + for (SizeType elementCount = 0;;) { + ParseValue<parseFlags>(is, handler); + RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; + + ++elementCount; + SkipWhitespaceAndComments<parseFlags>(is); + RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; + + if (Consume(is, ',')) { + SkipWhitespaceAndComments<parseFlags>(is); + RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; + } + else if (Consume(is, ']')) { + if (RAPIDJSON_UNLIKELY(!handler.EndArray(elementCount))) + RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); + return; + } + else + RAPIDJSON_PARSE_ERROR(kParseErrorArrayMissCommaOrSquareBracket, is.Tell()); + + if (parseFlags & kParseTrailingCommasFlag) { + if (is.Peek() == ']') { + if (RAPIDJSON_UNLIKELY(!handler.EndArray(elementCount))) + RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); + is.Take(); + return; + } + } + } + } + + template<unsigned parseFlags, typename InputStream, typename Handler> + void ParseNull(InputStream& is, Handler& handler) { + RAPIDJSON_ASSERT(is.Peek() == 'n'); + is.Take(); + + if (RAPIDJSON_LIKELY(Consume(is, 'u') && Consume(is, 'l') && Consume(is, 'l'))) { + if (RAPIDJSON_UNLIKELY(!handler.Null())) + RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); + } + else + RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell()); + } + + template<unsigned parseFlags, typename InputStream, typename Handler> + void ParseTrue(InputStream& is, Handler& handler) { + RAPIDJSON_ASSERT(is.Peek() == 't'); + is.Take(); + + if (RAPIDJSON_LIKELY(Consume(is, 'r') && Consume(is, 'u') && Consume(is, 'e'))) { + if (RAPIDJSON_UNLIKELY(!handler.Bool(true))) + RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); + } + else + RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell()); + } + + template<unsigned parseFlags, typename InputStream, typename Handler> + void ParseFalse(InputStream& is, Handler& handler) { + RAPIDJSON_ASSERT(is.Peek() == 'f'); + is.Take(); + + if (RAPIDJSON_LIKELY(Consume(is, 'a') && Consume(is, 'l') && Consume(is, 's') && Consume(is, 'e'))) { + if (RAPIDJSON_UNLIKELY(!handler.Bool(false))) + RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); + } + else + RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell()); + } + + template<typename InputStream> + RAPIDJSON_FORCEINLINE static bool Consume(InputStream& is, typename InputStream::Ch expect) { + if (RAPIDJSON_LIKELY(is.Peek() == expect)) { + is.Take(); + return true; + } + else + return false; + } + + // Helper function to parse four hexidecimal digits in \uXXXX in ParseString(). + template<typename InputStream> + unsigned ParseHex4(InputStream& is, size_t escapeOffset) { + unsigned codepoint = 0; + for (int i = 0; i < 4; i++) { + Ch c = is.Peek(); + codepoint <<= 4; + codepoint += static_cast<unsigned>(c); + if (c >= '0' && c <= '9') + codepoint -= '0'; + else if (c >= 'A' && c <= 'F') + codepoint -= 'A' - 10; + else if (c >= 'a' && c <= 'f') + codepoint -= 'a' - 10; + else { + RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorStringUnicodeEscapeInvalidHex, escapeOffset); + RAPIDJSON_PARSE_ERROR_EARLY_RETURN(0); + } + is.Take(); + } + return codepoint; + } + + template <typename CharType> + class StackStream { + public: + typedef CharType Ch; + + StackStream(internal::Stack<StackAllocator>& stack) : stack_(stack), length_(0) {} + RAPIDJSON_FORCEINLINE void Put(Ch c) { + *stack_.template Push<Ch>() = c; + ++length_; + } + + RAPIDJSON_FORCEINLINE void* Push(SizeType count) { + length_ += count; + return stack_.template Push<Ch>(count); + } + + size_t Length() const { return length_; } + + Ch* Pop() { + return stack_.template Pop<Ch>(length_); + } + + private: + StackStream(const StackStream&); + StackStream& operator=(const StackStream&); + + internal::Stack<StackAllocator>& stack_; + SizeType length_; + }; + + // Parse string and generate String event. Different code paths for kParseInsituFlag. + template<unsigned parseFlags, typename InputStream, typename Handler> + void ParseString(InputStream& is, Handler& handler, bool isKey = false) { + internal::StreamLocalCopy<InputStream> copy(is); + InputStream& s(copy.s); + + RAPIDJSON_ASSERT(s.Peek() == '\"'); + s.Take(); // Skip '\"' + + bool success = false; + if (parseFlags & kParseInsituFlag) { + typename InputStream::Ch *head = s.PutBegin(); + ParseStringToStream<parseFlags, SourceEncoding, SourceEncoding>(s, s); + RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; + size_t length = s.PutEnd(head) - 1; + RAPIDJSON_ASSERT(length <= 0xFFFFFFFF); + const typename TargetEncoding::Ch* const str = reinterpret_cast<typename TargetEncoding::Ch*>(head); + success = (isKey ? handler.Key(str, SizeType(length), false) : handler.String(str, SizeType(length), false)); + } + else { + StackStream<typename TargetEncoding::Ch> stackStream(stack_); + ParseStringToStream<parseFlags, SourceEncoding, TargetEncoding>(s, stackStream); + RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; + SizeType length = static_cast<SizeType>(stackStream.Length()) - 1; + const typename TargetEncoding::Ch* const str = stackStream.Pop(); + success = (isKey ? handler.Key(str, length, true) : handler.String(str, length, true)); + } + if (RAPIDJSON_UNLIKELY(!success)) + RAPIDJSON_PARSE_ERROR(kParseErrorTermination, s.Tell()); + } + + // Parse string to an output is + // This function handles the prefix/suffix double quotes, escaping, and optional encoding validation. + template<unsigned parseFlags, typename SEncoding, typename TEncoding, typename InputStream, typename OutputStream> + RAPIDJSON_FORCEINLINE void ParseStringToStream(InputStream& is, OutputStream& os) { +//!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN +#define Z16 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 + static const char escape[256] = { + Z16, Z16, 0, 0,'\"', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,'/', + Z16, Z16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,'\\', 0, 0, 0, + 0, 0,'\b', 0, 0, 0,'\f', 0, 0, 0, 0, 0, 0, 0,'\n', 0, + 0, 0,'\r', 0,'\t', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + Z16, Z16, Z16, Z16, Z16, Z16, Z16, Z16 + }; +#undef Z16 +//!@endcond + + for (;;) { + // Scan and copy string before "\\\"" or < 0x20. This is an optional optimzation. + if (!(parseFlags & kParseValidateEncodingFlag)) + ScanCopyUnescapedString(is, os); + + Ch c = is.Peek(); + if (RAPIDJSON_UNLIKELY(c == '\\')) { // Escape + size_t escapeOffset = is.Tell(); // For invalid escaping, report the inital '\\' as error offset + is.Take(); + Ch e = is.Peek(); + if ((sizeof(Ch) == 1 || unsigned(e) < 256) && RAPIDJSON_LIKELY(escape[static_cast<unsigned char>(e)])) { + is.Take(); + os.Put(static_cast<typename TEncoding::Ch>(escape[static_cast<unsigned char>(e)])); + } + else if (RAPIDJSON_LIKELY(e == 'u')) { // Unicode + is.Take(); + unsigned codepoint = ParseHex4(is, escapeOffset); + RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; + if (RAPIDJSON_UNLIKELY(codepoint >= 0xD800 && codepoint <= 0xDBFF)) { + // Handle UTF-16 surrogate pair + if (RAPIDJSON_UNLIKELY(!Consume(is, '\\') || !Consume(is, 'u'))) + RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, escapeOffset); + unsigned codepoint2 = ParseHex4(is, escapeOffset); + RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; + if (RAPIDJSON_UNLIKELY(codepoint2 < 0xDC00 || codepoint2 > 0xDFFF)) + RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, escapeOffset); + codepoint = (((codepoint - 0xD800) << 10) | (codepoint2 - 0xDC00)) + 0x10000; + } + TEncoding::Encode(os, codepoint); + } + else + RAPIDJSON_PARSE_ERROR(kParseErrorStringEscapeInvalid, escapeOffset); + } + else if (RAPIDJSON_UNLIKELY(c == '"')) { // Closing double quote + is.Take(); + os.Put('\0'); // null-terminate the string + return; + } + else if (RAPIDJSON_UNLIKELY(static_cast<unsigned>(c) < 0x20)) { // RFC 4627: unescaped = %x20-21 / %x23-5B / %x5D-10FFFF + if (c == '\0') + RAPIDJSON_PARSE_ERROR(kParseErrorStringMissQuotationMark, is.Tell()); + else + RAPIDJSON_PARSE_ERROR(kParseErrorStringEscapeInvalid, is.Tell()); + } + else { + size_t offset = is.Tell(); + if (RAPIDJSON_UNLIKELY((parseFlags & kParseValidateEncodingFlag ? + !Transcoder<SEncoding, TEncoding>::Validate(is, os) : + !Transcoder<SEncoding, TEncoding>::Transcode(is, os)))) + RAPIDJSON_PARSE_ERROR(kParseErrorStringInvalidEncoding, offset); + } + } + } + + template<typename InputStream, typename OutputStream> + static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(InputStream&, OutputStream&) { + // Do nothing for generic version + } + +#if defined(RAPIDJSON_SSE2) || defined(RAPIDJSON_SSE42) + // StringStream -> StackStream<char> + static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(StringStream& is, StackStream<char>& os) { + const char* p = is.src_; + + // Scan one by one until alignment (unaligned load may cross page boundary and cause crash) + const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15)); + while (p != nextAligned) + if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) { + is.src_ = p; + return; + } + else + os.Put(*p++); + + // The rest of string using SIMD + static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' }; + static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' }; + static const char space[16] = { 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19 }; + const __m128i dq = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&dquote[0])); + const __m128i bs = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&bslash[0])); + const __m128i sp = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&space[0])); + + for (;; p += 16) { + const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p)); + const __m128i t1 = _mm_cmpeq_epi8(s, dq); + const __m128i t2 = _mm_cmpeq_epi8(s, bs); + const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x19) == 0x19 + const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3); + unsigned short r = static_cast<unsigned short>(_mm_movemask_epi8(x)); + if (RAPIDJSON_UNLIKELY(r != 0)) { // some of characters is escaped + SizeType length; + #ifdef _MSC_VER // Find the index of first escaped + unsigned long offset; + _BitScanForward(&offset, r); + length = offset; + #else + length = static_cast<SizeType>(__builtin_ffs(r) - 1); + #endif + char* q = reinterpret_cast<char*>(os.Push(length)); + for (size_t i = 0; i < length; i++) + q[i] = p[i]; + + p += length; + break; + } + _mm_storeu_si128(reinterpret_cast<__m128i *>(os.Push(16)), s); + } + + is.src_ = p; + } + + // InsituStringStream -> InsituStringStream + static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(InsituStringStream& is, InsituStringStream& os) { + RAPIDJSON_ASSERT(&is == &os); + (void)os; + + if (is.src_ == is.dst_) { + SkipUnescapedString(is); + return; + } + + char* p = is.src_; + char *q = is.dst_; + + // Scan one by one until alignment (unaligned load may cross page boundary and cause crash) + const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15)); + while (p != nextAligned) + if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) { + is.src_ = p; + is.dst_ = q; + return; + } + else + *q++ = *p++; + + // The rest of string using SIMD + static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' }; + static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' }; + static const char space[16] = { 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19 }; + const __m128i dq = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&dquote[0])); + const __m128i bs = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&bslash[0])); + const __m128i sp = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&space[0])); + + for (;; p += 16, q += 16) { + const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p)); + const __m128i t1 = _mm_cmpeq_epi8(s, dq); + const __m128i t2 = _mm_cmpeq_epi8(s, bs); + const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x19) == 0x19 + const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3); + unsigned short r = static_cast<unsigned short>(_mm_movemask_epi8(x)); + if (RAPIDJSON_UNLIKELY(r != 0)) { // some of characters is escaped + size_t length; +#ifdef _MSC_VER // Find the index of first escaped + unsigned long offset; + _BitScanForward(&offset, r); + length = offset; +#else + length = static_cast<size_t>(__builtin_ffs(r) - 1); +#endif + for (const char* pend = p + length; p != pend; ) + *q++ = *p++; + break; + } + _mm_storeu_si128(reinterpret_cast<__m128i *>(q), s); + } + + is.src_ = p; + is.dst_ = q; + } + + // When read/write pointers are the same for insitu stream, just skip unescaped characters + static RAPIDJSON_FORCEINLINE void SkipUnescapedString(InsituStringStream& is) { + RAPIDJSON_ASSERT(is.src_ == is.dst_); + char* p = is.src_; + + // Scan one by one until alignment (unaligned load may cross page boundary and cause crash) + const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15)); + for (; p != nextAligned; p++) + if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) { + is.src_ = is.dst_ = p; + return; + } + + // The rest of string using SIMD + static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' }; + static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' }; + static const char space[16] = { 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19 }; + const __m128i dq = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&dquote[0])); + const __m128i bs = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&bslash[0])); + const __m128i sp = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&space[0])); + + for (;; p += 16) { + const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p)); + const __m128i t1 = _mm_cmpeq_epi8(s, dq); + const __m128i t2 = _mm_cmpeq_epi8(s, bs); + const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x19) == 0x19 + const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3); + unsigned short r = static_cast<unsigned short>(_mm_movemask_epi8(x)); + if (RAPIDJSON_UNLIKELY(r != 0)) { // some of characters is escaped + size_t length; +#ifdef _MSC_VER // Find the index of first escaped + unsigned long offset; + _BitScanForward(&offset, r); + length = offset; +#else + length = static_cast<size_t>(__builtin_ffs(r) - 1); +#endif + p += length; + break; + } + } + + is.src_ = is.dst_ = p; + } +#endif + + template<typename InputStream, bool backup, bool pushOnTake> + class NumberStream; + + template<typename InputStream> + class NumberStream<InputStream, false, false> { + public: + typedef typename InputStream::Ch Ch; + + NumberStream(GenericReader& reader, InputStream& s) : is(s) { (void)reader; } + + RAPIDJSON_FORCEINLINE Ch Peek() const { return is.Peek(); } + RAPIDJSON_FORCEINLINE Ch TakePush() { return is.Take(); } + RAPIDJSON_FORCEINLINE Ch Take() { return is.Take(); } + RAPIDJSON_FORCEINLINE void Push(char) {} + + size_t Tell() { return is.Tell(); } + size_t Length() { return 0; } + const char* Pop() { return 0; } + + protected: + NumberStream& operator=(const NumberStream&); + + InputStream& is; + }; + + template<typename InputStream> + class NumberStream<InputStream, true, false> : public NumberStream<InputStream, false, false> { + typedef NumberStream<InputStream, false, false> Base; + public: + NumberStream(GenericReader& reader, InputStream& is) : Base(reader, is), stackStream(reader.stack_) {} + + RAPIDJSON_FORCEINLINE Ch TakePush() { + stackStream.Put(static_cast<char>(Base::is.Peek())); + return Base::is.Take(); + } + + RAPIDJSON_FORCEINLINE void Push(char c) { + stackStream.Put(c); + } + + size_t Length() { return stackStream.Length(); } + + const char* Pop() { + stackStream.Put('\0'); + return stackStream.Pop(); + } + + private: + StackStream<char> stackStream; + }; + + template<typename InputStream> + class NumberStream<InputStream, true, true> : public NumberStream<InputStream, true, false> { + typedef NumberStream<InputStream, true, false> Base; + public: + NumberStream(GenericReader& reader, InputStream& is) : Base(reader, is) {} + + RAPIDJSON_FORCEINLINE Ch Take() { return Base::TakePush(); } + }; + + template<unsigned parseFlags, typename InputStream, typename Handler> + void ParseNumber(InputStream& is, Handler& handler) { + internal::StreamLocalCopy<InputStream> copy(is); + NumberStream<InputStream, + ((parseFlags & kParseNumbersAsStringsFlag) != 0) ? + ((parseFlags & kParseInsituFlag) == 0) : + ((parseFlags & kParseFullPrecisionFlag) != 0), + (parseFlags & kParseNumbersAsStringsFlag) != 0 && + (parseFlags & kParseInsituFlag) == 0> s(*this, copy.s); + + size_t startOffset = s.Tell(); + double d = 0.0; + bool useNanOrInf = false; + + // Parse minus + bool minus = Consume(s, '-'); + + // Parse int: zero / ( digit1-9 *DIGIT ) + unsigned i = 0; + uint64_t i64 = 0; + bool use64bit = false; + int significandDigit = 0; + if (RAPIDJSON_UNLIKELY(s.Peek() == '0')) { + i = 0; + s.TakePush(); + } + else if (RAPIDJSON_LIKELY(s.Peek() >= '1' && s.Peek() <= '9')) { + i = static_cast<unsigned>(s.TakePush() - '0'); + + if (minus) + while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) { + if (RAPIDJSON_UNLIKELY(i >= 214748364)) { // 2^31 = 2147483648 + if (RAPIDJSON_LIKELY(i != 214748364 || s.Peek() > '8')) { + i64 = i; + use64bit = true; + break; + } + } + i = i * 10 + static_cast<unsigned>(s.TakePush() - '0'); + significandDigit++; + } + else + while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) { + if (RAPIDJSON_UNLIKELY(i >= 429496729)) { // 2^32 - 1 = 4294967295 + if (RAPIDJSON_LIKELY(i != 429496729 || s.Peek() > '5')) { + i64 = i; + use64bit = true; + break; + } + } + i = i * 10 + static_cast<unsigned>(s.TakePush() - '0'); + significandDigit++; + } + } + // Parse NaN or Infinity here + else if ((parseFlags & kParseNanAndInfFlag) && RAPIDJSON_LIKELY((s.Peek() == 'I' || s.Peek() == 'N'))) { + useNanOrInf = true; + if (RAPIDJSON_LIKELY(Consume(s, 'N') && Consume(s, 'a') && Consume(s, 'N'))) { + d = std::numeric_limits<double>::quiet_NaN(); + } + else if (RAPIDJSON_LIKELY(Consume(s, 'I') && Consume(s, 'n') && Consume(s, 'f'))) { + d = (minus ? -std::numeric_limits<double>::infinity() : std::numeric_limits<double>::infinity()); + if (RAPIDJSON_UNLIKELY(s.Peek() == 'i' && !(Consume(s, 'i') && Consume(s, 'n') + && Consume(s, 'i') && Consume(s, 't') && Consume(s, 'y')))) + RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, s.Tell()); + } + else + RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, s.Tell()); + } + else + RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, s.Tell()); + + // Parse 64bit int + bool useDouble = false; + if (use64bit) { + if (minus) + while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) { + if (RAPIDJSON_UNLIKELY(i64 >= RAPIDJSON_UINT64_C2(0x0CCCCCCC, 0xCCCCCCCC))) // 2^63 = 9223372036854775808 + if (RAPIDJSON_LIKELY(i64 != RAPIDJSON_UINT64_C2(0x0CCCCCCC, 0xCCCCCCCC) || s.Peek() > '8')) { + d = static_cast<double>(i64); + useDouble = true; + break; + } + i64 = i64 * 10 + static_cast<unsigned>(s.TakePush() - '0'); + significandDigit++; + } + else + while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) { + if (RAPIDJSON_UNLIKELY(i64 >= RAPIDJSON_UINT64_C2(0x19999999, 0x99999999))) // 2^64 - 1 = 18446744073709551615 + if (RAPIDJSON_LIKELY(i64 != RAPIDJSON_UINT64_C2(0x19999999, 0x99999999) || s.Peek() > '5')) { + d = static_cast<double>(i64); + useDouble = true; + break; + } + i64 = i64 * 10 + static_cast<unsigned>(s.TakePush() - '0'); + significandDigit++; + } + } + + // Force double for big integer + if (useDouble) { + while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) { + if (RAPIDJSON_UNLIKELY(d >= 1.7976931348623157e307)) // DBL_MAX / 10.0 + RAPIDJSON_PARSE_ERROR(kParseErrorNumberTooBig, startOffset); + d = d * 10 + (s.TakePush() - '0'); + } + } + + // Parse frac = decimal-point 1*DIGIT + int expFrac = 0; + size_t decimalPosition; + if (Consume(s, '.')) { + decimalPosition = s.Length(); + + if (RAPIDJSON_UNLIKELY(!(s.Peek() >= '0' && s.Peek() <= '9'))) + RAPIDJSON_PARSE_ERROR(kParseErrorNumberMissFraction, s.Tell()); + + if (!useDouble) { +#if RAPIDJSON_64BIT + // Use i64 to store significand in 64-bit architecture + if (!use64bit) + i64 = i; + + while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) { + if (i64 > RAPIDJSON_UINT64_C2(0x1FFFFF, 0xFFFFFFFF)) // 2^53 - 1 for fast path + break; + else { + i64 = i64 * 10 + static_cast<unsigned>(s.TakePush() - '0'); + --expFrac; + if (i64 != 0) + significandDigit++; + } + } + + d = static_cast<double>(i64); +#else + // Use double to store significand in 32-bit architecture + d = static_cast<double>(use64bit ? i64 : i); +#endif + useDouble = true; + } + + while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) { + if (significandDigit < 17) { + d = d * 10.0 + (s.TakePush() - '0'); + --expFrac; + if (RAPIDJSON_LIKELY(d > 0.0)) + significandDigit++; + } + else + s.TakePush(); + } + } + else + decimalPosition = s.Length(); // decimal position at the end of integer. + + // Parse exp = e [ minus / plus ] 1*DIGIT + int exp = 0; + if (Consume(s, 'e') || Consume(s, 'E')) { + if (!useDouble) { + d = static_cast<double>(use64bit ? i64 : i); + useDouble = true; + } + + bool expMinus = false; + if (Consume(s, '+')) + ; + else if (Consume(s, '-')) + expMinus = true; + + if (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) { + exp = static_cast<int>(s.Take() - '0'); + if (expMinus) { + while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) { + exp = exp * 10 + static_cast<int>(s.Take() - '0'); + if (exp >= 214748364) { // Issue #313: prevent overflow exponent + while (RAPIDJSON_UNLIKELY(s.Peek() >= '0' && s.Peek() <= '9')) // Consume the rest of exponent + s.Take(); + } + } + } + else { // positive exp + int maxExp = 308 - expFrac; + while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) { + exp = exp * 10 + static_cast<int>(s.Take() - '0'); + if (RAPIDJSON_UNLIKELY(exp > maxExp)) + RAPIDJSON_PARSE_ERROR(kParseErrorNumberTooBig, startOffset); + } + } + } + else + RAPIDJSON_PARSE_ERROR(kParseErrorNumberMissExponent, s.Tell()); + + if (expMinus) + exp = -exp; + } + + // Finish parsing, call event according to the type of number. + bool cont = true; + + if (parseFlags & kParseNumbersAsStringsFlag) { + if (parseFlags & kParseInsituFlag) { + s.Pop(); // Pop stack no matter if it will be used or not. + typename InputStream::Ch* head = is.PutBegin(); + const size_t length = s.Tell() - startOffset; + RAPIDJSON_ASSERT(length <= 0xFFFFFFFF); + // unable to insert the \0 character here, it will erase the comma after this number + const typename TargetEncoding::Ch* const str = reinterpret_cast<typename TargetEncoding::Ch*>(head); + cont = handler.RawNumber(str, SizeType(length), false); + } + else { + SizeType numCharsToCopy = static_cast<SizeType>(s.Length()); + StringStream srcStream(s.Pop()); + StackStream<typename TargetEncoding::Ch> dstStream(stack_); + while (numCharsToCopy--) { + Transcoder<UTF8<>, TargetEncoding>::Transcode(srcStream, dstStream); + } + dstStream.Put('\0'); + const typename TargetEncoding::Ch* str = dstStream.Pop(); + const SizeType length = static_cast<SizeType>(dstStream.Length()) - 1; + cont = handler.RawNumber(str, SizeType(length), true); + } + } + else { + size_t length = s.Length(); + const char* decimal = s.Pop(); // Pop stack no matter if it will be used or not. + + if (useDouble) { + int p = exp + expFrac; + if (parseFlags & kParseFullPrecisionFlag) + d = internal::StrtodFullPrecision(d, p, decimal, length, decimalPosition, exp); + else + d = internal::StrtodNormalPrecision(d, p); + + cont = handler.Double(minus ? -d : d); + } + else if (useNanOrInf) { + cont = handler.Double(d); + } + else { + if (use64bit) { + if (minus) + cont = handler.Int64(static_cast<int64_t>(~i64 + 1)); + else + cont = handler.Uint64(i64); + } + else { + if (minus) + cont = handler.Int(static_cast<int32_t>(~i + 1)); + else + cont = handler.Uint(i); + } + } + } + if (RAPIDJSON_UNLIKELY(!cont)) + RAPIDJSON_PARSE_ERROR(kParseErrorTermination, startOffset); + } + + // Parse any JSON value + template<unsigned parseFlags, typename InputStream, typename Handler> + void ParseValue(InputStream& is, Handler& handler) { + switch (is.Peek()) { + case 'n': ParseNull <parseFlags>(is, handler); break; + case 't': ParseTrue <parseFlags>(is, handler); break; + case 'f': ParseFalse <parseFlags>(is, handler); break; + case '"': ParseString<parseFlags>(is, handler); break; + case '{': ParseObject<parseFlags>(is, handler); break; + case '[': ParseArray <parseFlags>(is, handler); break; + default : + ParseNumber<parseFlags>(is, handler); + break; + + } + } + + // Iterative Parsing + + // States + enum IterativeParsingState { + IterativeParsingStartState = 0, + IterativeParsingFinishState, + IterativeParsingErrorState, + + // Object states + IterativeParsingObjectInitialState, + IterativeParsingMemberKeyState, + IterativeParsingKeyValueDelimiterState, + IterativeParsingMemberValueState, + IterativeParsingMemberDelimiterState, + IterativeParsingObjectFinishState, + + // Array states + IterativeParsingArrayInitialState, + IterativeParsingElementState, + IterativeParsingElementDelimiterState, + IterativeParsingArrayFinishState, + + // Single value state + IterativeParsingValueState + }; + + enum { cIterativeParsingStateCount = IterativeParsingValueState + 1 }; + + // Tokens + enum Token { + LeftBracketToken = 0, + RightBracketToken, + + LeftCurlyBracketToken, + RightCurlyBracketToken, + + CommaToken, + ColonToken, + + StringToken, + FalseToken, + TrueToken, + NullToken, + NumberToken, + + kTokenCount + }; + + RAPIDJSON_FORCEINLINE Token Tokenize(Ch c) { + +//!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN +#define N NumberToken +#define N16 N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N + // Maps from ASCII to Token + static const unsigned char tokenMap[256] = { + N16, // 00~0F + N16, // 10~1F + N, N, StringToken, N, N, N, N, N, N, N, N, N, CommaToken, N, N, N, // 20~2F + N, N, N, N, N, N, N, N, N, N, ColonToken, N, N, N, N, N, // 30~3F + N16, // 40~4F + N, N, N, N, N, N, N, N, N, N, N, LeftBracketToken, N, RightBracketToken, N, N, // 50~5F + N, N, N, N, N, N, FalseToken, N, N, N, N, N, N, N, NullToken, N, // 60~6F + N, N, N, N, TrueToken, N, N, N, N, N, N, LeftCurlyBracketToken, N, RightCurlyBracketToken, N, N, // 70~7F + N16, N16, N16, N16, N16, N16, N16, N16 // 80~FF + }; +#undef N +#undef N16 +//!@endcond + + if (sizeof(Ch) == 1 || static_cast<unsigned>(c) < 256) + return static_cast<Token>(tokenMap[static_cast<unsigned char>(c)]); + else + return NumberToken; + } + + RAPIDJSON_FORCEINLINE IterativeParsingState Predict(IterativeParsingState state, Token token) { + // current state x one lookahead token -> new state + static const char G[cIterativeParsingStateCount][kTokenCount] = { + // Start + { + IterativeParsingArrayInitialState, // Left bracket + IterativeParsingErrorState, // Right bracket + IterativeParsingObjectInitialState, // Left curly bracket + IterativeParsingErrorState, // Right curly bracket + IterativeParsingErrorState, // Comma + IterativeParsingErrorState, // Colon + IterativeParsingValueState, // String + IterativeParsingValueState, // False + IterativeParsingValueState, // True + IterativeParsingValueState, // Null + IterativeParsingValueState // Number + }, + // Finish(sink state) + { + IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, + IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, + IterativeParsingErrorState + }, + // Error(sink state) + { + IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, + IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, + IterativeParsingErrorState + }, + // ObjectInitial + { + IterativeParsingErrorState, // Left bracket + IterativeParsingErrorState, // Right bracket + IterativeParsingErrorState, // Left curly bracket + IterativeParsingObjectFinishState, // Right curly bracket + IterativeParsingErrorState, // Comma + IterativeParsingErrorState, // Colon + IterativeParsingMemberKeyState, // String + IterativeParsingErrorState, // False + IterativeParsingErrorState, // True + IterativeParsingErrorState, // Null + IterativeParsingErrorState // Number + }, + // MemberKey + { + IterativeParsingErrorState, // Left bracket + IterativeParsingErrorState, // Right bracket + IterativeParsingErrorState, // Left curly bracket + IterativeParsingErrorState, // Right curly bracket + IterativeParsingErrorState, // Comma + IterativeParsingKeyValueDelimiterState, // Colon + IterativeParsingErrorState, // String + IterativeParsingErrorState, // False + IterativeParsingErrorState, // True + IterativeParsingErrorState, // Null + IterativeParsingErrorState // Number + }, + // KeyValueDelimiter + { + IterativeParsingArrayInitialState, // Left bracket(push MemberValue state) + IterativeParsingErrorState, // Right bracket + IterativeParsingObjectInitialState, // Left curly bracket(push MemberValue state) + IterativeParsingErrorState, // Right curly bracket + IterativeParsingErrorState, // Comma + IterativeParsingErrorState, // Colon + IterativeParsingMemberValueState, // String + IterativeParsingMemberValueState, // False + IterativeParsingMemberValueState, // True + IterativeParsingMemberValueState, // Null + IterativeParsingMemberValueState // Number + }, + // MemberValue + { + IterativeParsingErrorState, // Left bracket + IterativeParsingErrorState, // Right bracket + IterativeParsingErrorState, // Left curly bracket + IterativeParsingObjectFinishState, // Right curly bracket + IterativeParsingMemberDelimiterState, // Comma + IterativeParsingErrorState, // Colon + IterativeParsingErrorState, // String + IterativeParsingErrorState, // False + IterativeParsingErrorState, // True + IterativeParsingErrorState, // Null + IterativeParsingErrorState // Number + }, + // MemberDelimiter + { + IterativeParsingErrorState, // Left bracket + IterativeParsingErrorState, // Right bracket + IterativeParsingErrorState, // Left curly bracket + IterativeParsingObjectFinishState, // Right curly bracket + IterativeParsingErrorState, // Comma + IterativeParsingErrorState, // Colon + IterativeParsingMemberKeyState, // String + IterativeParsingErrorState, // False + IterativeParsingErrorState, // True + IterativeParsingErrorState, // Null + IterativeParsingErrorState // Number + }, + // ObjectFinish(sink state) + { + IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, + IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, + IterativeParsingErrorState + }, + // ArrayInitial + { + IterativeParsingArrayInitialState, // Left bracket(push Element state) + IterativeParsingArrayFinishState, // Right bracket + IterativeParsingObjectInitialState, // Left curly bracket(push Element state) + IterativeParsingErrorState, // Right curly bracket + IterativeParsingErrorState, // Comma + IterativeParsingErrorState, // Colon + IterativeParsingElementState, // String + IterativeParsingElementState, // False + IterativeParsingElementState, // True + IterativeParsingElementState, // Null + IterativeParsingElementState // Number + }, + // Element + { + IterativeParsingErrorState, // Left bracket + IterativeParsingArrayFinishState, // Right bracket + IterativeParsingErrorState, // Left curly bracket + IterativeParsingErrorState, // Right curly bracket + IterativeParsingElementDelimiterState, // Comma + IterativeParsingErrorState, // Colon + IterativeParsingErrorState, // String + IterativeParsingErrorState, // False + IterativeParsingErrorState, // True + IterativeParsingErrorState, // Null + IterativeParsingErrorState // Number + }, + // ElementDelimiter + { + IterativeParsingArrayInitialState, // Left bracket(push Element state) + IterativeParsingArrayFinishState, // Right bracket + IterativeParsingObjectInitialState, // Left curly bracket(push Element state) + IterativeParsingErrorState, // Right curly bracket + IterativeParsingErrorState, // Comma + IterativeParsingErrorState, // Colon + IterativeParsingElementState, // String + IterativeParsingElementState, // False + IterativeParsingElementState, // True + IterativeParsingElementState, // Null + IterativeParsingElementState // Number + }, + // ArrayFinish(sink state) + { + IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, + IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, + IterativeParsingErrorState + }, + // Single Value (sink state) + { + IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, + IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, + IterativeParsingErrorState + } + }; // End of G + + return static_cast<IterativeParsingState>(G[state][token]); + } + + // Make an advance in the token stream and state based on the candidate destination state which was returned by Transit(). + // May return a new state on state pop. + template <unsigned parseFlags, typename InputStream, typename Handler> + RAPIDJSON_FORCEINLINE IterativeParsingState Transit(IterativeParsingState src, Token token, IterativeParsingState dst, InputStream& is, Handler& handler) { + (void)token; + + switch (dst) { + case IterativeParsingErrorState: + return dst; + + case IterativeParsingObjectInitialState: + case IterativeParsingArrayInitialState: + { + // Push the state(Element or MemeberValue) if we are nested in another array or value of member. + // In this way we can get the correct state on ObjectFinish or ArrayFinish by frame pop. + IterativeParsingState n = src; + if (src == IterativeParsingArrayInitialState || src == IterativeParsingElementDelimiterState) + n = IterativeParsingElementState; + else if (src == IterativeParsingKeyValueDelimiterState) + n = IterativeParsingMemberValueState; + // Push current state. + *stack_.template Push<SizeType>(1) = n; + // Initialize and push the member/element count. + *stack_.template Push<SizeType>(1) = 0; + // Call handler + bool hr = (dst == IterativeParsingObjectInitialState) ? handler.StartObject() : handler.StartArray(); + // On handler short circuits the parsing. + if (!hr) { + RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorTermination, is.Tell()); + return IterativeParsingErrorState; + } + else { + is.Take(); + return dst; + } + } + + case IterativeParsingMemberKeyState: + ParseString<parseFlags>(is, handler, true); + if (HasParseError()) + return IterativeParsingErrorState; + else + return dst; + + case IterativeParsingKeyValueDelimiterState: + RAPIDJSON_ASSERT(token == ColonToken); + is.Take(); + return dst; + + case IterativeParsingMemberValueState: + // Must be non-compound value. Or it would be ObjectInitial or ArrayInitial state. + ParseValue<parseFlags>(is, handler); + if (HasParseError()) { + return IterativeParsingErrorState; + } + return dst; + + case IterativeParsingElementState: + // Must be non-compound value. Or it would be ObjectInitial or ArrayInitial state. + ParseValue<parseFlags>(is, handler); + if (HasParseError()) { + return IterativeParsingErrorState; + } + return dst; + + case IterativeParsingMemberDelimiterState: + case IterativeParsingElementDelimiterState: + is.Take(); + // Update member/element count. + *stack_.template Top<SizeType>() = *stack_.template Top<SizeType>() + 1; + return dst; + + case IterativeParsingObjectFinishState: + { + // Transit from delimiter is only allowed when trailing commas are enabled + if (!(parseFlags & kParseTrailingCommasFlag) && src == IterativeParsingMemberDelimiterState) { + RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorObjectMissName, is.Tell()); + return IterativeParsingErrorState; + } + // Get member count. + SizeType c = *stack_.template Pop<SizeType>(1); + // If the object is not empty, count the last member. + if (src == IterativeParsingMemberValueState) + ++c; + // Restore the state. + IterativeParsingState n = static_cast<IterativeParsingState>(*stack_.template Pop<SizeType>(1)); + // Transit to Finish state if this is the topmost scope. + if (n == IterativeParsingStartState) + n = IterativeParsingFinishState; + // Call handler + bool hr = handler.EndObject(c); + // On handler short circuits the parsing. + if (!hr) { + RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorTermination, is.Tell()); + return IterativeParsingErrorState; + } + else { + is.Take(); + return n; + } + } + + case IterativeParsingArrayFinishState: + { + // Transit from delimiter is only allowed when trailing commas are enabled + if (!(parseFlags & kParseTrailingCommasFlag) && src == IterativeParsingElementDelimiterState) { + RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorValueInvalid, is.Tell()); + return IterativeParsingErrorState; + } + // Get element count. + SizeType c = *stack_.template Pop<SizeType>(1); + // If the array is not empty, count the last element. + if (src == IterativeParsingElementState) + ++c; + // Restore the state. + IterativeParsingState n = static_cast<IterativeParsingState>(*stack_.template Pop<SizeType>(1)); + // Transit to Finish state if this is the topmost scope. + if (n == IterativeParsingStartState) + n = IterativeParsingFinishState; + // Call handler + bool hr = handler.EndArray(c); + // On handler short circuits the parsing. + if (!hr) { + RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorTermination, is.Tell()); + return IterativeParsingErrorState; + } + else { + is.Take(); + return n; + } + } + + default: + // This branch is for IterativeParsingValueState actually. + // Use `default:` rather than + // `case IterativeParsingValueState:` is for code coverage. + + // The IterativeParsingStartState is not enumerated in this switch-case. + // It is impossible for that case. And it can be caught by following assertion. + + // The IterativeParsingFinishState is not enumerated in this switch-case either. + // It is a "derivative" state which cannot triggered from Predict() directly. + // Therefore it cannot happen here. And it can be caught by following assertion. + RAPIDJSON_ASSERT(dst == IterativeParsingValueState); + + // Must be non-compound value. Or it would be ObjectInitial or ArrayInitial state. + ParseValue<parseFlags>(is, handler); + if (HasParseError()) { + return IterativeParsingErrorState; + } + return IterativeParsingFinishState; + } + } + + template <typename InputStream> + void HandleError(IterativeParsingState src, InputStream& is) { + if (HasParseError()) { + // Error flag has been set. + return; + } + + switch (src) { + case IterativeParsingStartState: RAPIDJSON_PARSE_ERROR(kParseErrorDocumentEmpty, is.Tell()); return; + case IterativeParsingFinishState: RAPIDJSON_PARSE_ERROR(kParseErrorDocumentRootNotSingular, is.Tell()); return; + case IterativeParsingObjectInitialState: + case IterativeParsingMemberDelimiterState: RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissName, is.Tell()); return; + case IterativeParsingMemberKeyState: RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissColon, is.Tell()); return; + case IterativeParsingMemberValueState: RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissCommaOrCurlyBracket, is.Tell()); return; + case IterativeParsingKeyValueDelimiterState: + case IterativeParsingArrayInitialState: + case IterativeParsingElementDelimiterState: RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell()); return; + default: RAPIDJSON_ASSERT(src == IterativeParsingElementState); RAPIDJSON_PARSE_ERROR(kParseErrorArrayMissCommaOrSquareBracket, is.Tell()); return; + } + } + + template <unsigned parseFlags, typename InputStream, typename Handler> + ParseResult IterativeParse(InputStream& is, Handler& handler) { + parseResult_.Clear(); + ClearStackOnExit scope(*this); + IterativeParsingState state = IterativeParsingStartState; + + SkipWhitespaceAndComments<parseFlags>(is); + RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_); + while (is.Peek() != '\0') { + Token t = Tokenize(is.Peek()); + IterativeParsingState n = Predict(state, t); + IterativeParsingState d = Transit<parseFlags>(state, t, n, is, handler); + + if (d == IterativeParsingErrorState) { + HandleError(state, is); + break; + } + + state = d; + + // Do not further consume streams if a root JSON has been parsed. + if ((parseFlags & kParseStopWhenDoneFlag) && state == IterativeParsingFinishState) + break; + + SkipWhitespaceAndComments<parseFlags>(is); + RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_); + } + + // Handle the end of file. + if (state != IterativeParsingFinishState) + HandleError(state, is); + + return parseResult_; + } + + static const size_t kDefaultStackCapacity = 256; //!< Default stack capacity in bytes for storing a single decoded string. + internal::Stack<StackAllocator> stack_; //!< A stack for storing decoded string temporarily during non-destructive parsing. + ParseResult parseResult_; +}; // class GenericReader + +//! Reader with UTF8 encoding and default allocator. +typedef GenericReader<UTF8<>, UTF8<> > Reader; + +RAPIDJSON_NAMESPACE_END + +#ifdef __clang__ +RAPIDJSON_DIAG_POP +#endif + + +#ifdef __GNUC__ +RAPIDJSON_DIAG_POP +#endif + +#ifdef _MSC_VER +RAPIDJSON_DIAG_POP +#endif + +#endif // RAPIDJSON_READER_H_ diff --git a/xmrstak/rapidjson/schema.h b/xmrstak/rapidjson/schema.h new file mode 100644 index 0000000..e7af3cf --- /dev/null +++ b/xmrstak/rapidjson/schema.h @@ -0,0 +1,2024 @@ +// Tencent is pleased to support the open source community by making RapidJSON available-> +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip-> All rights reserved-> +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License-> You may obtain a copy of the License at +// +// http://opensource->org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied-> See the License for the +// specific language governing permissions and limitations under the License-> + +#ifndef RAPIDJSON_SCHEMA_H_ +#define RAPIDJSON_SCHEMA_H_ + +#include "document.h" +#include "pointer.h" +#include <cmath> // abs, floor + +#if !defined(RAPIDJSON_SCHEMA_USE_INTERNALREGEX) +#define RAPIDJSON_SCHEMA_USE_INTERNALREGEX 1 +#else +#define RAPIDJSON_SCHEMA_USE_INTERNALREGEX 0 +#endif + +#if !RAPIDJSON_SCHEMA_USE_INTERNALREGEX && !defined(RAPIDJSON_SCHEMA_USE_STDREGEX) && (__cplusplus >=201103L || (defined(_MSC_VER) && _MSC_VER >= 1800)) +#define RAPIDJSON_SCHEMA_USE_STDREGEX 1 +#else +#define RAPIDJSON_SCHEMA_USE_STDREGEX 0 +#endif + +#if RAPIDJSON_SCHEMA_USE_INTERNALREGEX +#include "internal/regex.h" +#elif RAPIDJSON_SCHEMA_USE_STDREGEX +#include <regex> +#endif + +#if RAPIDJSON_SCHEMA_USE_INTERNALREGEX || RAPIDJSON_SCHEMA_USE_STDREGEX +#define RAPIDJSON_SCHEMA_HAS_REGEX 1 +#else +#define RAPIDJSON_SCHEMA_HAS_REGEX 0 +#endif + +#ifndef RAPIDJSON_SCHEMA_VERBOSE +#define RAPIDJSON_SCHEMA_VERBOSE 0 +#endif + +#if RAPIDJSON_SCHEMA_VERBOSE +#include "stringbuffer.h" +#endif + +RAPIDJSON_DIAG_PUSH + +#if defined(__GNUC__) +RAPIDJSON_DIAG_OFF(effc++) +#endif + +#ifdef __clang__ +RAPIDJSON_DIAG_OFF(weak-vtables) +RAPIDJSON_DIAG_OFF(exit-time-destructors) +RAPIDJSON_DIAG_OFF(c++98-compat-pedantic) +RAPIDJSON_DIAG_OFF(variadic-macros) +#endif + +#ifdef _MSC_VER +RAPIDJSON_DIAG_OFF(4512) // assignment operator could not be generated +#endif + +RAPIDJSON_NAMESPACE_BEGIN + +/////////////////////////////////////////////////////////////////////////////// +// Verbose Utilities + +#if RAPIDJSON_SCHEMA_VERBOSE + +namespace internal { + +inline void PrintInvalidKeyword(const char* keyword) { + printf("Fail keyword: %s\n", keyword); +} + +inline void PrintInvalidKeyword(const wchar_t* keyword) { + wprintf(L"Fail keyword: %ls\n", keyword); +} + +inline void PrintInvalidDocument(const char* document) { + printf("Fail document: %s\n\n", document); +} + +inline void PrintInvalidDocument(const wchar_t* document) { + wprintf(L"Fail document: %ls\n\n", document); +} + +inline void PrintValidatorPointers(unsigned depth, const char* s, const char* d) { + printf("S: %*s%s\nD: %*s%s\n\n", depth * 4, " ", s, depth * 4, " ", d); +} + +inline void PrintValidatorPointers(unsigned depth, const wchar_t* s, const wchar_t* d) { + wprintf(L"S: %*ls%ls\nD: %*ls%ls\n\n", depth * 4, L" ", s, depth * 4, L" ", d); +} + +} // namespace internal + +#endif // RAPIDJSON_SCHEMA_VERBOSE + +/////////////////////////////////////////////////////////////////////////////// +// RAPIDJSON_INVALID_KEYWORD_RETURN + +#if RAPIDJSON_SCHEMA_VERBOSE +#define RAPIDJSON_INVALID_KEYWORD_VERBOSE(keyword) internal::PrintInvalidKeyword(keyword) +#else +#define RAPIDJSON_INVALID_KEYWORD_VERBOSE(keyword) +#endif + +#define RAPIDJSON_INVALID_KEYWORD_RETURN(keyword)\ +RAPIDJSON_MULTILINEMACRO_BEGIN\ + context.invalidKeyword = keyword.GetString();\ + RAPIDJSON_INVALID_KEYWORD_VERBOSE(keyword.GetString());\ + return false;\ +RAPIDJSON_MULTILINEMACRO_END + +/////////////////////////////////////////////////////////////////////////////// +// Forward declarations + +template <typename ValueType, typename Allocator> +class GenericSchemaDocument; + +namespace internal { + +template <typename SchemaDocumentType> +class Schema; + +/////////////////////////////////////////////////////////////////////////////// +// ISchemaValidator + +class ISchemaValidator { +public: + virtual ~ISchemaValidator() {} + virtual bool IsValid() const = 0; +}; + +/////////////////////////////////////////////////////////////////////////////// +// ISchemaStateFactory + +template <typename SchemaType> +class ISchemaStateFactory { +public: + virtual ~ISchemaStateFactory() {} + virtual ISchemaValidator* CreateSchemaValidator(const SchemaType&) = 0; + virtual void DestroySchemaValidator(ISchemaValidator* validator) = 0; + virtual void* CreateHasher() = 0; + virtual uint64_t GetHashCode(void* hasher) = 0; + virtual void DestroryHasher(void* hasher) = 0; + virtual void* MallocState(size_t size) = 0; + virtual void FreeState(void* p) = 0; +}; + +/////////////////////////////////////////////////////////////////////////////// +// Hasher + +// For comparison of compound value +template<typename Encoding, typename Allocator> +class Hasher { +public: + typedef typename Encoding::Ch Ch; + + Hasher(Allocator* allocator = 0, size_t stackCapacity = kDefaultSize) : stack_(allocator, stackCapacity) {} + + bool Null() { return WriteType(kNullType); } + bool Bool(bool b) { return WriteType(b ? kTrueType : kFalseType); } + bool Int(int i) { Number n; n.u.i = i; n.d = static_cast<double>(i); return WriteNumber(n); } + bool Uint(unsigned u) { Number n; n.u.u = u; n.d = static_cast<double>(u); return WriteNumber(n); } + bool Int64(int64_t i) { Number n; n.u.i = i; n.d = static_cast<double>(i); return WriteNumber(n); } + bool Uint64(uint64_t u) { Number n; n.u.u = u; n.d = static_cast<double>(u); return WriteNumber(n); } + bool Double(double d) { + Number n; + if (d < 0) n.u.i = static_cast<int64_t>(d); + else n.u.u = static_cast<uint64_t>(d); + n.d = d; + return WriteNumber(n); + } + + bool RawNumber(const Ch* str, SizeType len, bool) { + WriteBuffer(kNumberType, str, len * sizeof(Ch)); + return true; + } + + bool String(const Ch* str, SizeType len, bool) { + WriteBuffer(kStringType, str, len * sizeof(Ch)); + return true; + } + + bool StartObject() { return true; } + bool Key(const Ch* str, SizeType len, bool copy) { return String(str, len, copy); } + bool EndObject(SizeType memberCount) { + uint64_t h = Hash(0, kObjectType); + uint64_t* kv = stack_.template Pop<uint64_t>(memberCount * 2); + for (SizeType i = 0; i < memberCount; i++) + h ^= Hash(kv[i * 2], kv[i * 2 + 1]); // Use xor to achieve member order insensitive + *stack_.template Push<uint64_t>() = h; + return true; + } + + bool StartArray() { return true; } + bool EndArray(SizeType elementCount) { + uint64_t h = Hash(0, kArrayType); + uint64_t* e = stack_.template Pop<uint64_t>(elementCount); + for (SizeType i = 0; i < elementCount; i++) + h = Hash(h, e[i]); // Use hash to achieve element order sensitive + *stack_.template Push<uint64_t>() = h; + return true; + } + + bool IsValid() const { return stack_.GetSize() == sizeof(uint64_t); } + + uint64_t GetHashCode() const { + RAPIDJSON_ASSERT(IsValid()); + return *stack_.template Top<uint64_t>(); + } + +private: + static const size_t kDefaultSize = 256; + struct Number { + union U { + uint64_t u; + int64_t i; + }u; + double d; + }; + + bool WriteType(Type type) { return WriteBuffer(type, 0, 0); } + + bool WriteNumber(const Number& n) { return WriteBuffer(kNumberType, &n, sizeof(n)); } + + bool WriteBuffer(Type type, const void* data, size_t len) { + // FNV-1a from http://isthe.com/chongo/tech/comp/fnv/ + uint64_t h = Hash(RAPIDJSON_UINT64_C2(0x84222325, 0xcbf29ce4), type); + const unsigned char* d = static_cast<const unsigned char*>(data); + for (size_t i = 0; i < len; i++) + h = Hash(h, d[i]); + *stack_.template Push<uint64_t>() = h; + return true; + } + + static uint64_t Hash(uint64_t h, uint64_t d) { + static const uint64_t kPrime = RAPIDJSON_UINT64_C2(0x00000100, 0x000001b3); + h ^= d; + h *= kPrime; + return h; + } + + Stack<Allocator> stack_; +}; + +/////////////////////////////////////////////////////////////////////////////// +// SchemaValidationContext + +template <typename SchemaDocumentType> +struct SchemaValidationContext { + typedef Schema<SchemaDocumentType> SchemaType; + typedef ISchemaStateFactory<SchemaType> SchemaValidatorFactoryType; + typedef typename SchemaType::ValueType ValueType; + typedef typename ValueType::Ch Ch; + + enum PatternValidatorType { + kPatternValidatorOnly, + kPatternValidatorWithProperty, + kPatternValidatorWithAdditionalProperty + }; + + SchemaValidationContext(SchemaValidatorFactoryType& f, const SchemaType* s) : + factory(f), + schema(s), + valueSchema(), + invalidKeyword(), + hasher(), + arrayElementHashCodes(), + validators(), + validatorCount(), + patternPropertiesValidators(), + patternPropertiesValidatorCount(), + patternPropertiesSchemas(), + patternPropertiesSchemaCount(), + valuePatternValidatorType(kPatternValidatorOnly), + propertyExist(), + inArray(false), + valueUniqueness(false), + arrayUniqueness(false) + { + } + + ~SchemaValidationContext() { + if (hasher) + factory.DestroryHasher(hasher); + if (validators) { + for (SizeType i = 0; i < validatorCount; i++) + factory.DestroySchemaValidator(validators[i]); + factory.FreeState(validators); + } + if (patternPropertiesValidators) { + for (SizeType i = 0; i < patternPropertiesValidatorCount; i++) + factory.DestroySchemaValidator(patternPropertiesValidators[i]); + factory.FreeState(patternPropertiesValidators); + } + if (patternPropertiesSchemas) + factory.FreeState(patternPropertiesSchemas); + if (propertyExist) + factory.FreeState(propertyExist); + } + + SchemaValidatorFactoryType& factory; + const SchemaType* schema; + const SchemaType* valueSchema; + const Ch* invalidKeyword; + void* hasher; // Only validator access + void* arrayElementHashCodes; // Only validator access this + ISchemaValidator** validators; + SizeType validatorCount; + ISchemaValidator** patternPropertiesValidators; + SizeType patternPropertiesValidatorCount; + const SchemaType** patternPropertiesSchemas; + SizeType patternPropertiesSchemaCount; + PatternValidatorType valuePatternValidatorType; + PatternValidatorType objectPatternValidatorType; + SizeType arrayElementIndex; + bool* propertyExist; + bool inArray; + bool valueUniqueness; + bool arrayUniqueness; +}; + +/////////////////////////////////////////////////////////////////////////////// +// Schema + +template <typename SchemaDocumentType> +class Schema { +public: + typedef typename SchemaDocumentType::ValueType ValueType; + typedef typename SchemaDocumentType::AllocatorType AllocatorType; + typedef typename SchemaDocumentType::PointerType PointerType; + typedef typename ValueType::EncodingType EncodingType; + typedef typename EncodingType::Ch Ch; + typedef SchemaValidationContext<SchemaDocumentType> Context; + typedef Schema<SchemaDocumentType> SchemaType; + typedef GenericValue<EncodingType, AllocatorType> SValue; + friend class GenericSchemaDocument<ValueType, AllocatorType>; + + Schema(SchemaDocumentType* schemaDocument, const PointerType& p, const ValueType& value, const ValueType& document, AllocatorType* allocator) : + allocator_(allocator), + typeless_(schemaDocument->GetTypeless()), + enum_(), + enumCount_(), + not_(), + type_((1 << kTotalSchemaType) - 1), // typeless + validatorCount_(), + properties_(), + additionalPropertiesSchema_(), + patternProperties_(), + patternPropertyCount_(), + propertyCount_(), + minProperties_(), + maxProperties_(SizeType(~0)), + additionalProperties_(true), + hasDependencies_(), + hasRequired_(), + hasSchemaDependencies_(), + additionalItemsSchema_(), + itemsList_(), + itemsTuple_(), + itemsTupleCount_(), + minItems_(), + maxItems_(SizeType(~0)), + additionalItems_(true), + uniqueItems_(false), + pattern_(), + minLength_(0), + maxLength_(~SizeType(0)), + exclusiveMinimum_(false), + exclusiveMaximum_(false) + { + typedef typename SchemaDocumentType::ValueType ValueType; + typedef typename ValueType::ConstValueIterator ConstValueIterator; + typedef typename ValueType::ConstMemberIterator ConstMemberIterator; + + if (!value.IsObject()) + return; + + if (const ValueType* v = GetMember(value, GetTypeString())) { + type_ = 0; + if (v->IsString()) + AddType(*v); + else if (v->IsArray()) + for (ConstValueIterator itr = v->Begin(); itr != v->End(); ++itr) + AddType(*itr); + } + + if (const ValueType* v = GetMember(value, GetEnumString())) + if (v->IsArray() && v->Size() > 0) { + enum_ = static_cast<uint64_t*>(allocator_->Malloc(sizeof(uint64_t) * v->Size())); + for (ConstValueIterator itr = v->Begin(); itr != v->End(); ++itr) { + typedef Hasher<EncodingType, MemoryPoolAllocator<> > EnumHasherType; + char buffer[256 + 24]; + MemoryPoolAllocator<> hasherAllocator(buffer, sizeof(buffer)); + EnumHasherType h(&hasherAllocator, 256); + itr->Accept(h); + enum_[enumCount_++] = h.GetHashCode(); + } + } + + if (schemaDocument) { + AssignIfExist(allOf_, *schemaDocument, p, value, GetAllOfString(), document); + AssignIfExist(anyOf_, *schemaDocument, p, value, GetAnyOfString(), document); + AssignIfExist(oneOf_, *schemaDocument, p, value, GetOneOfString(), document); + } + + if (const ValueType* v = GetMember(value, GetNotString())) { + schemaDocument->CreateSchema(¬_, p.Append(GetNotString(), allocator_), *v, document); + notValidatorIndex_ = validatorCount_; + validatorCount_++; + } + + // Object + + const ValueType* properties = GetMember(value, GetPropertiesString()); + const ValueType* required = GetMember(value, GetRequiredString()); + const ValueType* dependencies = GetMember(value, GetDependenciesString()); + { + // Gather properties from properties/required/dependencies + SValue allProperties(kArrayType); + + if (properties && properties->IsObject()) + for (ConstMemberIterator itr = properties->MemberBegin(); itr != properties->MemberEnd(); ++itr) + AddUniqueElement(allProperties, itr->name); + + if (required && required->IsArray()) + for (ConstValueIterator itr = required->Begin(); itr != required->End(); ++itr) + if (itr->IsString()) + AddUniqueElement(allProperties, *itr); + + if (dependencies && dependencies->IsObject()) + for (ConstMemberIterator itr = dependencies->MemberBegin(); itr != dependencies->MemberEnd(); ++itr) { + AddUniqueElement(allProperties, itr->name); + if (itr->value.IsArray()) + for (ConstValueIterator i = itr->value.Begin(); i != itr->value.End(); ++i) + if (i->IsString()) + AddUniqueElement(allProperties, *i); + } + + if (allProperties.Size() > 0) { + propertyCount_ = allProperties.Size(); + properties_ = static_cast<Property*>(allocator_->Malloc(sizeof(Property) * propertyCount_)); + for (SizeType i = 0; i < propertyCount_; i++) { + new (&properties_[i]) Property(); + properties_[i].name = allProperties[i]; + properties_[i].schema = typeless_; + } + } + } + + if (properties && properties->IsObject()) { + PointerType q = p.Append(GetPropertiesString(), allocator_); + for (ConstMemberIterator itr = properties->MemberBegin(); itr != properties->MemberEnd(); ++itr) { + SizeType index; + if (FindPropertyIndex(itr->name, &index)) + schemaDocument->CreateSchema(&properties_[index].schema, q.Append(itr->name, allocator_), itr->value, document); + } + } + + if (const ValueType* v = GetMember(value, GetPatternPropertiesString())) { + PointerType q = p.Append(GetPatternPropertiesString(), allocator_); + patternProperties_ = static_cast<PatternProperty*>(allocator_->Malloc(sizeof(PatternProperty) * v->MemberCount())); + patternPropertyCount_ = 0; + + for (ConstMemberIterator itr = v->MemberBegin(); itr != v->MemberEnd(); ++itr) { + new (&patternProperties_[patternPropertyCount_]) PatternProperty(); + patternProperties_[patternPropertyCount_].pattern = CreatePattern(itr->name); + schemaDocument->CreateSchema(&patternProperties_[patternPropertyCount_].schema, q.Append(itr->name, allocator_), itr->value, document); + patternPropertyCount_++; + } + } + + if (required && required->IsArray()) + for (ConstValueIterator itr = required->Begin(); itr != required->End(); ++itr) + if (itr->IsString()) { + SizeType index; + if (FindPropertyIndex(*itr, &index)) { + properties_[index].required = true; + hasRequired_ = true; + } + } + + if (dependencies && dependencies->IsObject()) { + PointerType q = p.Append(GetDependenciesString(), allocator_); + hasDependencies_ = true; + for (ConstMemberIterator itr = dependencies->MemberBegin(); itr != dependencies->MemberEnd(); ++itr) { + SizeType sourceIndex; + if (FindPropertyIndex(itr->name, &sourceIndex)) { + if (itr->value.IsArray()) { + properties_[sourceIndex].dependencies = static_cast<bool*>(allocator_->Malloc(sizeof(bool) * propertyCount_)); + std::memset(properties_[sourceIndex].dependencies, 0, sizeof(bool)* propertyCount_); + for (ConstValueIterator targetItr = itr->value.Begin(); targetItr != itr->value.End(); ++targetItr) { + SizeType targetIndex; + if (FindPropertyIndex(*targetItr, &targetIndex)) + properties_[sourceIndex].dependencies[targetIndex] = true; + } + } + else if (itr->value.IsObject()) { + hasSchemaDependencies_ = true; + schemaDocument->CreateSchema(&properties_[sourceIndex].dependenciesSchema, q.Append(itr->name, allocator_), itr->value, document); + properties_[sourceIndex].dependenciesValidatorIndex = validatorCount_; + validatorCount_++; + } + } + } + } + + if (const ValueType* v = GetMember(value, GetAdditionalPropertiesString())) { + if (v->IsBool()) + additionalProperties_ = v->GetBool(); + else if (v->IsObject()) + schemaDocument->CreateSchema(&additionalPropertiesSchema_, p.Append(GetAdditionalPropertiesString(), allocator_), *v, document); + } + + AssignIfExist(minProperties_, value, GetMinPropertiesString()); + AssignIfExist(maxProperties_, value, GetMaxPropertiesString()); + + // Array + if (const ValueType* v = GetMember(value, GetItemsString())) { + PointerType q = p.Append(GetItemsString(), allocator_); + if (v->IsObject()) // List validation + schemaDocument->CreateSchema(&itemsList_, q, *v, document); + else if (v->IsArray()) { // Tuple validation + itemsTuple_ = static_cast<const Schema**>(allocator_->Malloc(sizeof(const Schema*) * v->Size())); + SizeType index = 0; + for (ConstValueIterator itr = v->Begin(); itr != v->End(); ++itr, index++) + schemaDocument->CreateSchema(&itemsTuple_[itemsTupleCount_++], q.Append(index, allocator_), *itr, document); + } + } + + AssignIfExist(minItems_, value, GetMinItemsString()); + AssignIfExist(maxItems_, value, GetMaxItemsString()); + + if (const ValueType* v = GetMember(value, GetAdditionalItemsString())) { + if (v->IsBool()) + additionalItems_ = v->GetBool(); + else if (v->IsObject()) + schemaDocument->CreateSchema(&additionalItemsSchema_, p.Append(GetAdditionalItemsString(), allocator_), *v, document); + } + + AssignIfExist(uniqueItems_, value, GetUniqueItemsString()); + + // String + AssignIfExist(minLength_, value, GetMinLengthString()); + AssignIfExist(maxLength_, value, GetMaxLengthString()); + + if (const ValueType* v = GetMember(value, GetPatternString())) + pattern_ = CreatePattern(*v); + + // Number + if (const ValueType* v = GetMember(value, GetMinimumString())) + if (v->IsNumber()) + minimum_.CopyFrom(*v, *allocator_); + + if (const ValueType* v = GetMember(value, GetMaximumString())) + if (v->IsNumber()) + maximum_.CopyFrom(*v, *allocator_); + + AssignIfExist(exclusiveMinimum_, value, GetExclusiveMinimumString()); + AssignIfExist(exclusiveMaximum_, value, GetExclusiveMaximumString()); + + if (const ValueType* v = GetMember(value, GetMultipleOfString())) + if (v->IsNumber() && v->GetDouble() > 0.0) + multipleOf_.CopyFrom(*v, *allocator_); + } + + ~Schema() { + AllocatorType::Free(enum_); + if (properties_) { + for (SizeType i = 0; i < propertyCount_; i++) + properties_[i].~Property(); + AllocatorType::Free(properties_); + } + if (patternProperties_) { + for (SizeType i = 0; i < patternPropertyCount_; i++) + patternProperties_[i].~PatternProperty(); + AllocatorType::Free(patternProperties_); + } + AllocatorType::Free(itemsTuple_); +#if RAPIDJSON_SCHEMA_HAS_REGEX + if (pattern_) { + pattern_->~RegexType(); + AllocatorType::Free(pattern_); + } +#endif + } + + bool BeginValue(Context& context) const { + if (context.inArray) { + if (uniqueItems_) + context.valueUniqueness = true; + + if (itemsList_) + context.valueSchema = itemsList_; + else if (itemsTuple_) { + if (context.arrayElementIndex < itemsTupleCount_) + context.valueSchema = itemsTuple_[context.arrayElementIndex]; + else if (additionalItemsSchema_) + context.valueSchema = additionalItemsSchema_; + else if (additionalItems_) + context.valueSchema = typeless_; + else + RAPIDJSON_INVALID_KEYWORD_RETURN(GetItemsString()); + } + else + context.valueSchema = typeless_; + + context.arrayElementIndex++; + } + return true; + } + + RAPIDJSON_FORCEINLINE bool EndValue(Context& context) const { + if (context.patternPropertiesValidatorCount > 0) { + bool otherValid = false; + SizeType count = context.patternPropertiesValidatorCount; + if (context.objectPatternValidatorType != Context::kPatternValidatorOnly) + otherValid = context.patternPropertiesValidators[--count]->IsValid(); + + bool patternValid = true; + for (SizeType i = 0; i < count; i++) + if (!context.patternPropertiesValidators[i]->IsValid()) { + patternValid = false; + break; + } + + if (context.objectPatternValidatorType == Context::kPatternValidatorOnly) { + if (!patternValid) + RAPIDJSON_INVALID_KEYWORD_RETURN(GetPatternPropertiesString()); + } + else if (context.objectPatternValidatorType == Context::kPatternValidatorWithProperty) { + if (!patternValid || !otherValid) + RAPIDJSON_INVALID_KEYWORD_RETURN(GetPatternPropertiesString()); + } + else if (!patternValid && !otherValid) // kPatternValidatorWithAdditionalProperty) + RAPIDJSON_INVALID_KEYWORD_RETURN(GetPatternPropertiesString()); + } + + if (enum_) { + const uint64_t h = context.factory.GetHashCode(context.hasher); + for (SizeType i = 0; i < enumCount_; i++) + if (enum_[i] == h) + goto foundEnum; + RAPIDJSON_INVALID_KEYWORD_RETURN(GetEnumString()); + foundEnum:; + } + + if (allOf_.schemas) + for (SizeType i = allOf_.begin; i < allOf_.begin + allOf_.count; i++) + if (!context.validators[i]->IsValid()) + RAPIDJSON_INVALID_KEYWORD_RETURN(GetAllOfString()); + + if (anyOf_.schemas) { + for (SizeType i = anyOf_.begin; i < anyOf_.begin + anyOf_.count; i++) + if (context.validators[i]->IsValid()) + goto foundAny; + RAPIDJSON_INVALID_KEYWORD_RETURN(GetAnyOfString()); + foundAny:; + } + + if (oneOf_.schemas) { + bool oneValid = false; + for (SizeType i = oneOf_.begin; i < oneOf_.begin + oneOf_.count; i++) + if (context.validators[i]->IsValid()) { + if (oneValid) + RAPIDJSON_INVALID_KEYWORD_RETURN(GetOneOfString()); + else + oneValid = true; + } + if (!oneValid) + RAPIDJSON_INVALID_KEYWORD_RETURN(GetOneOfString()); + } + + if (not_ && context.validators[notValidatorIndex_]->IsValid()) + RAPIDJSON_INVALID_KEYWORD_RETURN(GetNotString()); + + return true; + } + + bool Null(Context& context) const { + if (!(type_ & (1 << kNullSchemaType))) + RAPIDJSON_INVALID_KEYWORD_RETURN(GetTypeString()); + return CreateParallelValidator(context); + } + + bool Bool(Context& context, bool) const { + if (!(type_ & (1 << kBooleanSchemaType))) + RAPIDJSON_INVALID_KEYWORD_RETURN(GetTypeString()); + return CreateParallelValidator(context); + } + + bool Int(Context& context, int i) const { + if (!CheckInt(context, i)) + return false; + return CreateParallelValidator(context); + } + + bool Uint(Context& context, unsigned u) const { + if (!CheckUint(context, u)) + return false; + return CreateParallelValidator(context); + } + + bool Int64(Context& context, int64_t i) const { + if (!CheckInt(context, i)) + return false; + return CreateParallelValidator(context); + } + + bool Uint64(Context& context, uint64_t u) const { + if (!CheckUint(context, u)) + return false; + return CreateParallelValidator(context); + } + + bool Double(Context& context, double d) const { + if (!(type_ & (1 << kNumberSchemaType))) + RAPIDJSON_INVALID_KEYWORD_RETURN(GetTypeString()); + + if (!minimum_.IsNull() && !CheckDoubleMinimum(context, d)) + return false; + + if (!maximum_.IsNull() && !CheckDoubleMaximum(context, d)) + return false; + + if (!multipleOf_.IsNull() && !CheckDoubleMultipleOf(context, d)) + return false; + + return CreateParallelValidator(context); + } + + bool String(Context& context, const Ch* str, SizeType length, bool) const { + if (!(type_ & (1 << kStringSchemaType))) + RAPIDJSON_INVALID_KEYWORD_RETURN(GetTypeString()); + + if (minLength_ != 0 || maxLength_ != SizeType(~0)) { + SizeType count; + if (internal::CountStringCodePoint<EncodingType>(str, length, &count)) { + if (count < minLength_) + RAPIDJSON_INVALID_KEYWORD_RETURN(GetMinLengthString()); + if (count > maxLength_) + RAPIDJSON_INVALID_KEYWORD_RETURN(GetMaxLengthString()); + } + } + + if (pattern_ && !IsPatternMatch(pattern_, str, length)) + RAPIDJSON_INVALID_KEYWORD_RETURN(GetPatternString()); + + return CreateParallelValidator(context); + } + + bool StartObject(Context& context) const { + if (!(type_ & (1 << kObjectSchemaType))) + RAPIDJSON_INVALID_KEYWORD_RETURN(GetTypeString()); + + if (hasDependencies_ || hasRequired_) { + context.propertyExist = static_cast<bool*>(context.factory.MallocState(sizeof(bool) * propertyCount_)); + std::memset(context.propertyExist, 0, sizeof(bool) * propertyCount_); + } + + if (patternProperties_) { // pre-allocate schema array + SizeType count = patternPropertyCount_ + 1; // extra for valuePatternValidatorType + context.patternPropertiesSchemas = static_cast<const SchemaType**>(context.factory.MallocState(sizeof(const SchemaType*) * count)); + context.patternPropertiesSchemaCount = 0; + std::memset(context.patternPropertiesSchemas, 0, sizeof(SchemaType*) * count); + } + + return CreateParallelValidator(context); + } + + bool Key(Context& context, const Ch* str, SizeType len, bool) const { + if (patternProperties_) { + context.patternPropertiesSchemaCount = 0; + for (SizeType i = 0; i < patternPropertyCount_; i++) + if (patternProperties_[i].pattern && IsPatternMatch(patternProperties_[i].pattern, str, len)) + context.patternPropertiesSchemas[context.patternPropertiesSchemaCount++] = patternProperties_[i].schema; + } + + SizeType index; + if (FindPropertyIndex(ValueType(str, len).Move(), &index)) { + if (context.patternPropertiesSchemaCount > 0) { + context.patternPropertiesSchemas[context.patternPropertiesSchemaCount++] = properties_[index].schema; + context.valueSchema = typeless_; + context.valuePatternValidatorType = Context::kPatternValidatorWithProperty; + } + else + context.valueSchema = properties_[index].schema; + + if (context.propertyExist) + context.propertyExist[index] = true; + + return true; + } + + if (additionalPropertiesSchema_) { + if (additionalPropertiesSchema_ && context.patternPropertiesSchemaCount > 0) { + context.patternPropertiesSchemas[context.patternPropertiesSchemaCount++] = additionalPropertiesSchema_; + context.valueSchema = typeless_; + context.valuePatternValidatorType = Context::kPatternValidatorWithAdditionalProperty; + } + else + context.valueSchema = additionalPropertiesSchema_; + return true; + } + else if (additionalProperties_) { + context.valueSchema = typeless_; + return true; + } + + if (context.patternPropertiesSchemaCount == 0) // patternProperties are not additional properties + RAPIDJSON_INVALID_KEYWORD_RETURN(GetAdditionalPropertiesString()); + + return true; + } + + bool EndObject(Context& context, SizeType memberCount) const { + if (hasRequired_) + for (SizeType index = 0; index < propertyCount_; index++) + if (properties_[index].required) + if (!context.propertyExist[index]) + RAPIDJSON_INVALID_KEYWORD_RETURN(GetRequiredString()); + + if (memberCount < minProperties_) + RAPIDJSON_INVALID_KEYWORD_RETURN(GetMinPropertiesString()); + + if (memberCount > maxProperties_) + RAPIDJSON_INVALID_KEYWORD_RETURN(GetMaxPropertiesString()); + + if (hasDependencies_) { + for (SizeType sourceIndex = 0; sourceIndex < propertyCount_; sourceIndex++) + if (context.propertyExist[sourceIndex]) { + if (properties_[sourceIndex].dependencies) { + for (SizeType targetIndex = 0; targetIndex < propertyCount_; targetIndex++) + if (properties_[sourceIndex].dependencies[targetIndex] && !context.propertyExist[targetIndex]) + RAPIDJSON_INVALID_KEYWORD_RETURN(GetDependenciesString()); + } + else if (properties_[sourceIndex].dependenciesSchema) + if (!context.validators[properties_[sourceIndex].dependenciesValidatorIndex]->IsValid()) + RAPIDJSON_INVALID_KEYWORD_RETURN(GetDependenciesString()); + } + } + + return true; + } + + bool StartArray(Context& context) const { + if (!(type_ & (1 << kArraySchemaType))) + RAPIDJSON_INVALID_KEYWORD_RETURN(GetTypeString()); + + context.arrayElementIndex = 0; + context.inArray = true; + + return CreateParallelValidator(context); + } + + bool EndArray(Context& context, SizeType elementCount) const { + context.inArray = false; + + if (elementCount < minItems_) + RAPIDJSON_INVALID_KEYWORD_RETURN(GetMinItemsString()); + + if (elementCount > maxItems_) + RAPIDJSON_INVALID_KEYWORD_RETURN(GetMaxItemsString()); + + return true; + } + + // Generate functions for string literal according to Ch +#define RAPIDJSON_STRING_(name, ...) \ + static const ValueType& Get##name##String() {\ + static const Ch s[] = { __VA_ARGS__, '\0' };\ + static const ValueType v(s, sizeof(s) / sizeof(Ch) - 1);\ + return v;\ + } + + RAPIDJSON_STRING_(Null, 'n', 'u', 'l', 'l') + RAPIDJSON_STRING_(Boolean, 'b', 'o', 'o', 'l', 'e', 'a', 'n') + RAPIDJSON_STRING_(Object, 'o', 'b', 'j', 'e', 'c', 't') + RAPIDJSON_STRING_(Array, 'a', 'r', 'r', 'a', 'y') + RAPIDJSON_STRING_(String, 's', 't', 'r', 'i', 'n', 'g') + RAPIDJSON_STRING_(Number, 'n', 'u', 'm', 'b', 'e', 'r') + RAPIDJSON_STRING_(Integer, 'i', 'n', 't', 'e', 'g', 'e', 'r') + RAPIDJSON_STRING_(Type, 't', 'y', 'p', 'e') + RAPIDJSON_STRING_(Enum, 'e', 'n', 'u', 'm') + RAPIDJSON_STRING_(AllOf, 'a', 'l', 'l', 'O', 'f') + RAPIDJSON_STRING_(AnyOf, 'a', 'n', 'y', 'O', 'f') + RAPIDJSON_STRING_(OneOf, 'o', 'n', 'e', 'O', 'f') + RAPIDJSON_STRING_(Not, 'n', 'o', 't') + RAPIDJSON_STRING_(Properties, 'p', 'r', 'o', 'p', 'e', 'r', 't', 'i', 'e', 's') + RAPIDJSON_STRING_(Required, 'r', 'e', 'q', 'u', 'i', 'r', 'e', 'd') + RAPIDJSON_STRING_(Dependencies, 'd', 'e', 'p', 'e', 'n', 'd', 'e', 'n', 'c', 'i', 'e', 's') + RAPIDJSON_STRING_(PatternProperties, 'p', 'a', 't', 't', 'e', 'r', 'n', 'P', 'r', 'o', 'p', 'e', 'r', 't', 'i', 'e', 's') + RAPIDJSON_STRING_(AdditionalProperties, 'a', 'd', 'd', 'i', 't', 'i', 'o', 'n', 'a', 'l', 'P', 'r', 'o', 'p', 'e', 'r', 't', 'i', 'e', 's') + RAPIDJSON_STRING_(MinProperties, 'm', 'i', 'n', 'P', 'r', 'o', 'p', 'e', 'r', 't', 'i', 'e', 's') + RAPIDJSON_STRING_(MaxProperties, 'm', 'a', 'x', 'P', 'r', 'o', 'p', 'e', 'r', 't', 'i', 'e', 's') + RAPIDJSON_STRING_(Items, 'i', 't', 'e', 'm', 's') + RAPIDJSON_STRING_(MinItems, 'm', 'i', 'n', 'I', 't', 'e', 'm', 's') + RAPIDJSON_STRING_(MaxItems, 'm', 'a', 'x', 'I', 't', 'e', 'm', 's') + RAPIDJSON_STRING_(AdditionalItems, 'a', 'd', 'd', 'i', 't', 'i', 'o', 'n', 'a', 'l', 'I', 't', 'e', 'm', 's') + RAPIDJSON_STRING_(UniqueItems, 'u', 'n', 'i', 'q', 'u', 'e', 'I', 't', 'e', 'm', 's') + RAPIDJSON_STRING_(MinLength, 'm', 'i', 'n', 'L', 'e', 'n', 'g', 't', 'h') + RAPIDJSON_STRING_(MaxLength, 'm', 'a', 'x', 'L', 'e', 'n', 'g', 't', 'h') + RAPIDJSON_STRING_(Pattern, 'p', 'a', 't', 't', 'e', 'r', 'n') + RAPIDJSON_STRING_(Minimum, 'm', 'i', 'n', 'i', 'm', 'u', 'm') + RAPIDJSON_STRING_(Maximum, 'm', 'a', 'x', 'i', 'm', 'u', 'm') + RAPIDJSON_STRING_(ExclusiveMinimum, 'e', 'x', 'c', 'l', 'u', 's', 'i', 'v', 'e', 'M', 'i', 'n', 'i', 'm', 'u', 'm') + RAPIDJSON_STRING_(ExclusiveMaximum, 'e', 'x', 'c', 'l', 'u', 's', 'i', 'v', 'e', 'M', 'a', 'x', 'i', 'm', 'u', 'm') + RAPIDJSON_STRING_(MultipleOf, 'm', 'u', 'l', 't', 'i', 'p', 'l', 'e', 'O', 'f') + +#undef RAPIDJSON_STRING_ + +private: + enum SchemaValueType { + kNullSchemaType, + kBooleanSchemaType, + kObjectSchemaType, + kArraySchemaType, + kStringSchemaType, + kNumberSchemaType, + kIntegerSchemaType, + kTotalSchemaType + }; + +#if RAPIDJSON_SCHEMA_USE_INTERNALREGEX + typedef internal::GenericRegex<EncodingType> RegexType; +#elif RAPIDJSON_SCHEMA_USE_STDREGEX + typedef std::basic_regex<Ch> RegexType; +#else + typedef char RegexType; +#endif + + struct SchemaArray { + SchemaArray() : schemas(), count() {} + ~SchemaArray() { AllocatorType::Free(schemas); } + const SchemaType** schemas; + SizeType begin; // begin index of context.validators + SizeType count; + }; + + template <typename V1, typename V2> + void AddUniqueElement(V1& a, const V2& v) { + for (typename V1::ConstValueIterator itr = a.Begin(); itr != a.End(); ++itr) + if (*itr == v) + return; + V1 c(v, *allocator_); + a.PushBack(c, *allocator_); + } + + static const ValueType* GetMember(const ValueType& value, const ValueType& name) { + typename ValueType::ConstMemberIterator itr = value.FindMember(name); + return itr != value.MemberEnd() ? &(itr->value) : 0; + } + + static void AssignIfExist(bool& out, const ValueType& value, const ValueType& name) { + if (const ValueType* v = GetMember(value, name)) + if (v->IsBool()) + out = v->GetBool(); + } + + static void AssignIfExist(SizeType& out, const ValueType& value, const ValueType& name) { + if (const ValueType* v = GetMember(value, name)) + if (v->IsUint64() && v->GetUint64() <= SizeType(~0)) + out = static_cast<SizeType>(v->GetUint64()); + } + + void AssignIfExist(SchemaArray& out, SchemaDocumentType& schemaDocument, const PointerType& p, const ValueType& value, const ValueType& name, const ValueType& document) { + if (const ValueType* v = GetMember(value, name)) { + if (v->IsArray() && v->Size() > 0) { + PointerType q = p.Append(name, allocator_); + out.count = v->Size(); + out.schemas = static_cast<const Schema**>(allocator_->Malloc(out.count * sizeof(const Schema*))); + memset(out.schemas, 0, sizeof(Schema*)* out.count); + for (SizeType i = 0; i < out.count; i++) + schemaDocument.CreateSchema(&out.schemas[i], q.Append(i, allocator_), (*v)[i], document); + out.begin = validatorCount_; + validatorCount_ += out.count; + } + } + } + +#if RAPIDJSON_SCHEMA_USE_INTERNALREGEX + template <typename ValueType> + RegexType* CreatePattern(const ValueType& value) { + if (value.IsString()) { + RegexType* r = new (allocator_->Malloc(sizeof(RegexType))) RegexType(value.GetString()); + if (!r->IsValid()) { + r->~RegexType(); + AllocatorType::Free(r); + r = 0; + } + return r; + } + return 0; + } + + static bool IsPatternMatch(const RegexType* pattern, const Ch *str, SizeType) { + GenericRegexSearch<RegexType> rs(*pattern); + return rs.Search(str); + } +#elif RAPIDJSON_SCHEMA_USE_STDREGEX + template <typename ValueType> + RegexType* CreatePattern(const ValueType& value) { + if (value.IsString()) + try { + return new (allocator_->Malloc(sizeof(RegexType))) RegexType(value.GetString(), std::size_t(value.GetStringLength()), std::regex_constants::ECMAScript); + } + catch (const std::regex_error&) { + } + return 0; + } + + static bool IsPatternMatch(const RegexType* pattern, const Ch *str, SizeType length) { + std::match_results<const Ch*> r; + return std::regex_search(str, str + length, r, *pattern); + } +#else + template <typename ValueType> + RegexType* CreatePattern(const ValueType&) { return 0; } + + static bool IsPatternMatch(const RegexType*, const Ch *, SizeType) { return true; } +#endif // RAPIDJSON_SCHEMA_USE_STDREGEX + + void AddType(const ValueType& type) { + if (type == GetNullString() ) type_ |= 1 << kNullSchemaType; + else if (type == GetBooleanString()) type_ |= 1 << kBooleanSchemaType; + else if (type == GetObjectString() ) type_ |= 1 << kObjectSchemaType; + else if (type == GetArrayString() ) type_ |= 1 << kArraySchemaType; + else if (type == GetStringString() ) type_ |= 1 << kStringSchemaType; + else if (type == GetIntegerString()) type_ |= 1 << kIntegerSchemaType; + else if (type == GetNumberString() ) type_ |= (1 << kNumberSchemaType) | (1 << kIntegerSchemaType); + } + + bool CreateParallelValidator(Context& context) const { + if (enum_ || context.arrayUniqueness) + context.hasher = context.factory.CreateHasher(); + + if (validatorCount_) { + RAPIDJSON_ASSERT(context.validators == 0); + context.validators = static_cast<ISchemaValidator**>(context.factory.MallocState(sizeof(ISchemaValidator*) * validatorCount_)); + context.validatorCount = validatorCount_; + + if (allOf_.schemas) + CreateSchemaValidators(context, allOf_); + + if (anyOf_.schemas) + CreateSchemaValidators(context, anyOf_); + + if (oneOf_.schemas) + CreateSchemaValidators(context, oneOf_); + + if (not_) + context.validators[notValidatorIndex_] = context.factory.CreateSchemaValidator(*not_); + + if (hasSchemaDependencies_) { + for (SizeType i = 0; i < propertyCount_; i++) + if (properties_[i].dependenciesSchema) + context.validators[properties_[i].dependenciesValidatorIndex] = context.factory.CreateSchemaValidator(*properties_[i].dependenciesSchema); + } + } + + return true; + } + + void CreateSchemaValidators(Context& context, const SchemaArray& schemas) const { + for (SizeType i = 0; i < schemas.count; i++) + context.validators[schemas.begin + i] = context.factory.CreateSchemaValidator(*schemas.schemas[i]); + } + + // O(n) + bool FindPropertyIndex(const ValueType& name, SizeType* outIndex) const { + SizeType len = name.GetStringLength(); + const Ch* str = name.GetString(); + for (SizeType index = 0; index < propertyCount_; index++) + if (properties_[index].name.GetStringLength() == len && + (std::memcmp(properties_[index].name.GetString(), str, sizeof(Ch) * len) == 0)) + { + *outIndex = index; + return true; + } + return false; + } + + bool CheckInt(Context& context, int64_t i) const { + if (!(type_ & ((1 << kIntegerSchemaType) | (1 << kNumberSchemaType)))) + RAPIDJSON_INVALID_KEYWORD_RETURN(GetTypeString()); + + if (!minimum_.IsNull()) { + if (minimum_.IsInt64()) { + if (exclusiveMinimum_ ? i <= minimum_.GetInt64() : i < minimum_.GetInt64()) + RAPIDJSON_INVALID_KEYWORD_RETURN(GetMinimumString()); + } + else if (minimum_.IsUint64()) { + RAPIDJSON_INVALID_KEYWORD_RETURN(GetMinimumString()); // i <= max(int64_t) < minimum.GetUint64() + } + else if (!CheckDoubleMinimum(context, static_cast<double>(i))) + return false; + } + + if (!maximum_.IsNull()) { + if (maximum_.IsInt64()) { + if (exclusiveMaximum_ ? i >= maximum_.GetInt64() : i > maximum_.GetInt64()) + RAPIDJSON_INVALID_KEYWORD_RETURN(GetMaximumString()); + } + else if (maximum_.IsUint64()) + /* do nothing */; // i <= max(int64_t) < maximum_.GetUint64() + else if (!CheckDoubleMaximum(context, static_cast<double>(i))) + return false; + } + + if (!multipleOf_.IsNull()) { + if (multipleOf_.IsUint64()) { + if (static_cast<uint64_t>(i >= 0 ? i : -i) % multipleOf_.GetUint64() != 0) + RAPIDJSON_INVALID_KEYWORD_RETURN(GetMultipleOfString()); + } + else if (!CheckDoubleMultipleOf(context, static_cast<double>(i))) + return false; + } + + return true; + } + + bool CheckUint(Context& context, uint64_t i) const { + if (!(type_ & ((1 << kIntegerSchemaType) | (1 << kNumberSchemaType)))) + RAPIDJSON_INVALID_KEYWORD_RETURN(GetTypeString()); + + if (!minimum_.IsNull()) { + if (minimum_.IsUint64()) { + if (exclusiveMinimum_ ? i <= minimum_.GetUint64() : i < minimum_.GetUint64()) + RAPIDJSON_INVALID_KEYWORD_RETURN(GetMinimumString()); + } + else if (minimum_.IsInt64()) + /* do nothing */; // i >= 0 > minimum.Getint64() + else if (!CheckDoubleMinimum(context, static_cast<double>(i))) + return false; + } + + if (!maximum_.IsNull()) { + if (maximum_.IsUint64()) { + if (exclusiveMaximum_ ? i >= maximum_.GetUint64() : i > maximum_.GetUint64()) + RAPIDJSON_INVALID_KEYWORD_RETURN(GetMaximumString()); + } + else if (maximum_.IsInt64()) + RAPIDJSON_INVALID_KEYWORD_RETURN(GetMaximumString()); // i >= 0 > maximum_ + else if (!CheckDoubleMaximum(context, static_cast<double>(i))) + return false; + } + + if (!multipleOf_.IsNull()) { + if (multipleOf_.IsUint64()) { + if (i % multipleOf_.GetUint64() != 0) + RAPIDJSON_INVALID_KEYWORD_RETURN(GetMultipleOfString()); + } + else if (!CheckDoubleMultipleOf(context, static_cast<double>(i))) + return false; + } + + return true; + } + + bool CheckDoubleMinimum(Context& context, double d) const { + if (exclusiveMinimum_ ? d <= minimum_.GetDouble() : d < minimum_.GetDouble()) + RAPIDJSON_INVALID_KEYWORD_RETURN(GetMinimumString()); + return true; + } + + bool CheckDoubleMaximum(Context& context, double d) const { + if (exclusiveMaximum_ ? d >= maximum_.GetDouble() : d > maximum_.GetDouble()) + RAPIDJSON_INVALID_KEYWORD_RETURN(GetMaximumString()); + return true; + } + + bool CheckDoubleMultipleOf(Context& context, double d) const { + double a = std::abs(d), b = std::abs(multipleOf_.GetDouble()); + double q = std::floor(a / b); + double r = a - q * b; + if (r > 0.0) + RAPIDJSON_INVALID_KEYWORD_RETURN(GetMultipleOfString()); + return true; + } + + struct Property { + Property() : schema(), dependenciesSchema(), dependenciesValidatorIndex(), dependencies(), required(false) {} + ~Property() { AllocatorType::Free(dependencies); } + SValue name; + const SchemaType* schema; + const SchemaType* dependenciesSchema; + SizeType dependenciesValidatorIndex; + bool* dependencies; + bool required; + }; + + struct PatternProperty { + PatternProperty() : schema(), pattern() {} + ~PatternProperty() { + if (pattern) { + pattern->~RegexType(); + AllocatorType::Free(pattern); + } + } + const SchemaType* schema; + RegexType* pattern; + }; + + AllocatorType* allocator_; + const SchemaType* typeless_; + uint64_t* enum_; + SizeType enumCount_; + SchemaArray allOf_; + SchemaArray anyOf_; + SchemaArray oneOf_; + const SchemaType* not_; + unsigned type_; // bitmask of kSchemaType + SizeType validatorCount_; + SizeType notValidatorIndex_; + + Property* properties_; + const SchemaType* additionalPropertiesSchema_; + PatternProperty* patternProperties_; + SizeType patternPropertyCount_; + SizeType propertyCount_; + SizeType minProperties_; + SizeType maxProperties_; + bool additionalProperties_; + bool hasDependencies_; + bool hasRequired_; + bool hasSchemaDependencies_; + + const SchemaType* additionalItemsSchema_; + const SchemaType* itemsList_; + const SchemaType** itemsTuple_; + SizeType itemsTupleCount_; + SizeType minItems_; + SizeType maxItems_; + bool additionalItems_; + bool uniqueItems_; + + RegexType* pattern_; + SizeType minLength_; + SizeType maxLength_; + + SValue minimum_; + SValue maximum_; + SValue multipleOf_; + bool exclusiveMinimum_; + bool exclusiveMaximum_; +}; + +template<typename Stack, typename Ch> +struct TokenHelper { + RAPIDJSON_FORCEINLINE static void AppendIndexToken(Stack& documentStack, SizeType index) { + *documentStack.template Push<Ch>() = '/'; + char buffer[21]; + size_t length = static_cast<size_t>((sizeof(SizeType) == 4 ? u32toa(index, buffer) : u64toa(index, buffer)) - buffer); + for (size_t i = 0; i < length; i++) + *documentStack.template Push<Ch>() = buffer[i]; + } +}; + +// Partial specialized version for char to prevent buffer copying. +template <typename Stack> +struct TokenHelper<Stack, char> { + RAPIDJSON_FORCEINLINE static void AppendIndexToken(Stack& documentStack, SizeType index) { + if (sizeof(SizeType) == 4) { + char *buffer = documentStack.template Push<char>(1 + 10); // '/' + uint + *buffer++ = '/'; + const char* end = internal::u32toa(index, buffer); + documentStack.template Pop<char>(static_cast<size_t>(10 - (end - buffer))); + } + else { + char *buffer = documentStack.template Push<char>(1 + 20); // '/' + uint64 + *buffer++ = '/'; + const char* end = internal::u64toa(index, buffer); + documentStack.template Pop<char>(static_cast<size_t>(20 - (end - buffer))); + } + } +}; + +} // namespace internal + +/////////////////////////////////////////////////////////////////////////////// +// IGenericRemoteSchemaDocumentProvider + +template <typename SchemaDocumentType> +class IGenericRemoteSchemaDocumentProvider { +public: + typedef typename SchemaDocumentType::Ch Ch; + + virtual ~IGenericRemoteSchemaDocumentProvider() {} + virtual const SchemaDocumentType* GetRemoteDocument(const Ch* uri, SizeType length) = 0; +}; + +/////////////////////////////////////////////////////////////////////////////// +// GenericSchemaDocument + +//! JSON schema document. +/*! + A JSON schema document is a compiled version of a JSON schema. + It is basically a tree of internal::Schema. + + \note This is an immutable class (i.e. its instance cannot be modified after construction). + \tparam ValueT Type of JSON value (e.g. \c Value ), which also determine the encoding. + \tparam Allocator Allocator type for allocating memory of this document. +*/ +template <typename ValueT, typename Allocator = CrtAllocator> +class GenericSchemaDocument { +public: + typedef ValueT ValueType; + typedef IGenericRemoteSchemaDocumentProvider<GenericSchemaDocument> IRemoteSchemaDocumentProviderType; + typedef Allocator AllocatorType; + typedef typename ValueType::EncodingType EncodingType; + typedef typename EncodingType::Ch Ch; + typedef internal::Schema<GenericSchemaDocument> SchemaType; + typedef GenericPointer<ValueType, Allocator> PointerType; + friend class internal::Schema<GenericSchemaDocument>; + template <typename, typename, typename> + friend class GenericSchemaValidator; + + //! Constructor. + /*! + Compile a JSON document into schema document. + + \param document A JSON document as source. + \param remoteProvider An optional remote schema document provider for resolving remote reference. Can be null. + \param allocator An optional allocator instance for allocating memory. Can be null. + */ + explicit GenericSchemaDocument(const ValueType& document, IRemoteSchemaDocumentProviderType* remoteProvider = 0, Allocator* allocator = 0) : + remoteProvider_(remoteProvider), + allocator_(allocator), + ownAllocator_(), + root_(), + typeless_(), + schemaMap_(allocator, kInitialSchemaMapSize), + schemaRef_(allocator, kInitialSchemaRefSize) + { + if (!allocator_) + ownAllocator_ = allocator_ = RAPIDJSON_NEW(Allocator()); + + typeless_ = static_cast<SchemaType*>(allocator_->Malloc(sizeof(SchemaType))); + new (typeless_) SchemaType(this, PointerType(), ValueType(kObjectType).Move(), ValueType(kObjectType).Move(), 0); + + // Generate root schema, it will call CreateSchema() to create sub-schemas, + // And call AddRefSchema() if there are $ref. + CreateSchemaRecursive(&root_, PointerType(), document, document); + + // Resolve $ref + while (!schemaRef_.Empty()) { + SchemaRefEntry* refEntry = schemaRef_.template Pop<SchemaRefEntry>(1); + if (const SchemaType* s = GetSchema(refEntry->target)) { + if (refEntry->schema) + *refEntry->schema = s; + + // Create entry in map if not exist + if (!GetSchema(refEntry->source)) { + new (schemaMap_.template Push<SchemaEntry>()) SchemaEntry(refEntry->source, const_cast<SchemaType*>(s), false, allocator_); + } + } + else if (refEntry->schema) + *refEntry->schema = typeless_; + + refEntry->~SchemaRefEntry(); + } + + RAPIDJSON_ASSERT(root_ != 0); + + schemaRef_.ShrinkToFit(); // Deallocate all memory for ref + } + +#if RAPIDJSON_HAS_CXX11_RVALUE_REFS + //! Move constructor in C++11 + GenericSchemaDocument(GenericSchemaDocument&& rhs) RAPIDJSON_NOEXCEPT : + remoteProvider_(rhs.remoteProvider_), + allocator_(rhs.allocator_), + ownAllocator_(rhs.ownAllocator_), + root_(rhs.root_), + typeless_(rhs.typeless_), + schemaMap_(std::move(rhs.schemaMap_)), + schemaRef_(std::move(rhs.schemaRef_)) + { + rhs.remoteProvider_ = 0; + rhs.allocator_ = 0; + rhs.ownAllocator_ = 0; + rhs.typeless_ = 0; + } +#endif + + //! Destructor + ~GenericSchemaDocument() { + while (!schemaMap_.Empty()) + schemaMap_.template Pop<SchemaEntry>(1)->~SchemaEntry(); + + if (typeless_) { + typeless_->~SchemaType(); + Allocator::Free(typeless_); + } + + RAPIDJSON_DELETE(ownAllocator_); + } + + //! Get the root schema. + const SchemaType& GetRoot() const { return *root_; } + +private: + //! Prohibit copying + GenericSchemaDocument(const GenericSchemaDocument&); + //! Prohibit assignment + GenericSchemaDocument& operator=(const GenericSchemaDocument&); + + struct SchemaRefEntry { + SchemaRefEntry(const PointerType& s, const PointerType& t, const SchemaType** outSchema, Allocator *allocator) : source(s, allocator), target(t, allocator), schema(outSchema) {} + PointerType source; + PointerType target; + const SchemaType** schema; + }; + + struct SchemaEntry { + SchemaEntry(const PointerType& p, SchemaType* s, bool o, Allocator* allocator) : pointer(p, allocator), schema(s), owned(o) {} + ~SchemaEntry() { + if (owned) { + schema->~SchemaType(); + Allocator::Free(schema); + } + } + PointerType pointer; + SchemaType* schema; + bool owned; + }; + + void CreateSchemaRecursive(const SchemaType** schema, const PointerType& pointer, const ValueType& v, const ValueType& document) { + if (schema) + *schema = typeless_; + + if (v.GetType() == kObjectType) { + const SchemaType* s = GetSchema(pointer); + if (!s) + CreateSchema(schema, pointer, v, document); + + for (typename ValueType::ConstMemberIterator itr = v.MemberBegin(); itr != v.MemberEnd(); ++itr) + CreateSchemaRecursive(0, pointer.Append(itr->name, allocator_), itr->value, document); + } + else if (v.GetType() == kArrayType) + for (SizeType i = 0; i < v.Size(); i++) + CreateSchemaRecursive(0, pointer.Append(i, allocator_), v[i], document); + } + + void CreateSchema(const SchemaType** schema, const PointerType& pointer, const ValueType& v, const ValueType& document) { + RAPIDJSON_ASSERT(pointer.IsValid()); + if (v.IsObject()) { + if (!HandleRefSchema(pointer, schema, v, document)) { + SchemaType* s = new (allocator_->Malloc(sizeof(SchemaType))) SchemaType(this, pointer, v, document, allocator_); + new (schemaMap_.template Push<SchemaEntry>()) SchemaEntry(pointer, s, true, allocator_); + if (schema) + *schema = s; + } + } + } + + bool HandleRefSchema(const PointerType& source, const SchemaType** schema, const ValueType& v, const ValueType& document) { + static const Ch kRefString[] = { '$', 'r', 'e', 'f', '\0' }; + static const ValueType kRefValue(kRefString, 4); + + typename ValueType::ConstMemberIterator itr = v.FindMember(kRefValue); + if (itr == v.MemberEnd()) + return false; + + if (itr->value.IsString()) { + SizeType len = itr->value.GetStringLength(); + if (len > 0) { + const Ch* s = itr->value.GetString(); + SizeType i = 0; + while (i < len && s[i] != '#') // Find the first # + i++; + + if (i > 0) { // Remote reference, resolve immediately + if (remoteProvider_) { + if (const GenericSchemaDocument* remoteDocument = remoteProvider_->GetRemoteDocument(s, i)) { + PointerType pointer(&s[i], len - i, allocator_); + if (pointer.IsValid()) { + if (const SchemaType* sc = remoteDocument->GetSchema(pointer)) { + if (schema) + *schema = sc; + return true; + } + } + } + } + } + else if (s[i] == '#') { // Local reference, defer resolution + PointerType pointer(&s[i], len - i, allocator_); + if (pointer.IsValid()) { + if (const ValueType* nv = pointer.Get(document)) + if (HandleRefSchema(source, schema, *nv, document)) + return true; + + new (schemaRef_.template Push<SchemaRefEntry>()) SchemaRefEntry(source, pointer, schema, allocator_); + return true; + } + } + } + } + return false; + } + + const SchemaType* GetSchema(const PointerType& pointer) const { + for (const SchemaEntry* target = schemaMap_.template Bottom<SchemaEntry>(); target != schemaMap_.template End<SchemaEntry>(); ++target) + if (pointer == target->pointer) + return target->schema; + return 0; + } + + PointerType GetPointer(const SchemaType* schema) const { + for (const SchemaEntry* target = schemaMap_.template Bottom<SchemaEntry>(); target != schemaMap_.template End<SchemaEntry>(); ++target) + if (schema == target->schema) + return target->pointer; + return PointerType(); + } + + const SchemaType* GetTypeless() const { return typeless_; } + + static const size_t kInitialSchemaMapSize = 64; + static const size_t kInitialSchemaRefSize = 64; + + IRemoteSchemaDocumentProviderType* remoteProvider_; + Allocator *allocator_; + Allocator *ownAllocator_; + const SchemaType* root_; //!< Root schema. + SchemaType* typeless_; + internal::Stack<Allocator> schemaMap_; // Stores created Pointer -> Schemas + internal::Stack<Allocator> schemaRef_; // Stores Pointer from $ref and schema which holds the $ref +}; + +//! GenericSchemaDocument using Value type. +typedef GenericSchemaDocument<Value> SchemaDocument; +//! IGenericRemoteSchemaDocumentProvider using SchemaDocument. +typedef IGenericRemoteSchemaDocumentProvider<SchemaDocument> IRemoteSchemaDocumentProvider; + +/////////////////////////////////////////////////////////////////////////////// +// GenericSchemaValidator + +//! JSON Schema Validator. +/*! + A SAX style JSON schema validator. + It uses a \c GenericSchemaDocument to validate SAX events. + It delegates the incoming SAX events to an output handler. + The default output handler does nothing. + It can be reused multiple times by calling \c Reset(). + + \tparam SchemaDocumentType Type of schema document. + \tparam OutputHandler Type of output handler. Default handler does nothing. + \tparam StateAllocator Allocator for storing the internal validation states. +*/ +template < + typename SchemaDocumentType, + typename OutputHandler = BaseReaderHandler<typename SchemaDocumentType::SchemaType::EncodingType>, + typename StateAllocator = CrtAllocator> +class GenericSchemaValidator : + public internal::ISchemaStateFactory<typename SchemaDocumentType::SchemaType>, + public internal::ISchemaValidator +{ +public: + typedef typename SchemaDocumentType::SchemaType SchemaType; + typedef typename SchemaDocumentType::PointerType PointerType; + typedef typename SchemaType::EncodingType EncodingType; + typedef typename EncodingType::Ch Ch; + + //! Constructor without output handler. + /*! + \param schemaDocument The schema document to conform to. + \param allocator Optional allocator for storing internal validation states. + \param schemaStackCapacity Optional initial capacity of schema path stack. + \param documentStackCapacity Optional initial capacity of document path stack. + */ + GenericSchemaValidator( + const SchemaDocumentType& schemaDocument, + StateAllocator* allocator = 0, + size_t schemaStackCapacity = kDefaultSchemaStackCapacity, + size_t documentStackCapacity = kDefaultDocumentStackCapacity) + : + schemaDocument_(&schemaDocument), + root_(schemaDocument.GetRoot()), + stateAllocator_(allocator), + ownStateAllocator_(0), + schemaStack_(allocator, schemaStackCapacity), + documentStack_(allocator, documentStackCapacity), + outputHandler_(CreateNullHandler()), + valid_(true) +#if RAPIDJSON_SCHEMA_VERBOSE + , depth_(0) +#endif + { + } + + //! Constructor with output handler. + /*! + \param schemaDocument The schema document to conform to. + \param allocator Optional allocator for storing internal validation states. + \param schemaStackCapacity Optional initial capacity of schema path stack. + \param documentStackCapacity Optional initial capacity of document path stack. + */ + GenericSchemaValidator( + const SchemaDocumentType& schemaDocument, + OutputHandler& outputHandler, + StateAllocator* allocator = 0, + size_t schemaStackCapacity = kDefaultSchemaStackCapacity, + size_t documentStackCapacity = kDefaultDocumentStackCapacity) + : + schemaDocument_(&schemaDocument), + root_(schemaDocument.GetRoot()), + stateAllocator_(allocator), + ownStateAllocator_(0), + schemaStack_(allocator, schemaStackCapacity), + documentStack_(allocator, documentStackCapacity), + outputHandler_(outputHandler), + nullHandler_(0), + valid_(true) +#if RAPIDJSON_SCHEMA_VERBOSE + , depth_(0) +#endif + { + } + + //! Destructor. + ~GenericSchemaValidator() { + Reset(); + if (nullHandler_) { + nullHandler_->~OutputHandler(); + StateAllocator::Free(nullHandler_); + } + RAPIDJSON_DELETE(ownStateAllocator_); + } + + //! Reset the internal states. + void Reset() { + while (!schemaStack_.Empty()) + PopSchema(); + documentStack_.Clear(); + valid_ = true; + } + + //! Checks whether the current state is valid. + // Implementation of ISchemaValidator + virtual bool IsValid() const { return valid_; } + + //! Gets the JSON pointer pointed to the invalid schema. + PointerType GetInvalidSchemaPointer() const { + return schemaStack_.Empty() ? PointerType() : schemaDocument_->GetPointer(&CurrentSchema()); + } + + //! Gets the keyword of invalid schema. + const Ch* GetInvalidSchemaKeyword() const { + return schemaStack_.Empty() ? 0 : CurrentContext().invalidKeyword; + } + + //! Gets the JSON pointer pointed to the invalid value. + PointerType GetInvalidDocumentPointer() const { + return documentStack_.Empty() ? PointerType() : PointerType(documentStack_.template Bottom<Ch>(), documentStack_.GetSize() / sizeof(Ch)); + } + +#if RAPIDJSON_SCHEMA_VERBOSE +#define RAPIDJSON_SCHEMA_HANDLE_BEGIN_VERBOSE_() \ +RAPIDJSON_MULTILINEMACRO_BEGIN\ + *documentStack_.template Push<Ch>() = '\0';\ + documentStack_.template Pop<Ch>(1);\ + internal::PrintInvalidDocument(documentStack_.template Bottom<Ch>());\ +RAPIDJSON_MULTILINEMACRO_END +#else +#define RAPIDJSON_SCHEMA_HANDLE_BEGIN_VERBOSE_() +#endif + +#define RAPIDJSON_SCHEMA_HANDLE_BEGIN_(method, arg1)\ + if (!valid_) return false; \ + if (!BeginValue() || !CurrentSchema().method arg1) {\ + RAPIDJSON_SCHEMA_HANDLE_BEGIN_VERBOSE_();\ + return valid_ = false;\ + } + +#define RAPIDJSON_SCHEMA_HANDLE_PARALLEL_(method, arg2)\ + for (Context* context = schemaStack_.template Bottom<Context>(); context != schemaStack_.template End<Context>(); context++) {\ + if (context->hasher)\ + static_cast<HasherType*>(context->hasher)->method arg2;\ + if (context->validators)\ + for (SizeType i_ = 0; i_ < context->validatorCount; i_++)\ + static_cast<GenericSchemaValidator*>(context->validators[i_])->method arg2;\ + if (context->patternPropertiesValidators)\ + for (SizeType i_ = 0; i_ < context->patternPropertiesValidatorCount; i_++)\ + static_cast<GenericSchemaValidator*>(context->patternPropertiesValidators[i_])->method arg2;\ + } + +#define RAPIDJSON_SCHEMA_HANDLE_END_(method, arg2)\ + return valid_ = EndValue() && outputHandler_.method arg2 + +#define RAPIDJSON_SCHEMA_HANDLE_VALUE_(method, arg1, arg2) \ + RAPIDJSON_SCHEMA_HANDLE_BEGIN_ (method, arg1);\ + RAPIDJSON_SCHEMA_HANDLE_PARALLEL_(method, arg2);\ + RAPIDJSON_SCHEMA_HANDLE_END_ (method, arg2) + + bool Null() { RAPIDJSON_SCHEMA_HANDLE_VALUE_(Null, (CurrentContext() ), ( )); } + bool Bool(bool b) { RAPIDJSON_SCHEMA_HANDLE_VALUE_(Bool, (CurrentContext(), b), (b)); } + bool Int(int i) { RAPIDJSON_SCHEMA_HANDLE_VALUE_(Int, (CurrentContext(), i), (i)); } + bool Uint(unsigned u) { RAPIDJSON_SCHEMA_HANDLE_VALUE_(Uint, (CurrentContext(), u), (u)); } + bool Int64(int64_t i) { RAPIDJSON_SCHEMA_HANDLE_VALUE_(Int64, (CurrentContext(), i), (i)); } + bool Uint64(uint64_t u) { RAPIDJSON_SCHEMA_HANDLE_VALUE_(Uint64, (CurrentContext(), u), (u)); } + bool Double(double d) { RAPIDJSON_SCHEMA_HANDLE_VALUE_(Double, (CurrentContext(), d), (d)); } + bool RawNumber(const Ch* str, SizeType length, bool copy) + { RAPIDJSON_SCHEMA_HANDLE_VALUE_(String, (CurrentContext(), str, length, copy), (str, length, copy)); } + bool String(const Ch* str, SizeType length, bool copy) + { RAPIDJSON_SCHEMA_HANDLE_VALUE_(String, (CurrentContext(), str, length, copy), (str, length, copy)); } + + bool StartObject() { + RAPIDJSON_SCHEMA_HANDLE_BEGIN_(StartObject, (CurrentContext())); + RAPIDJSON_SCHEMA_HANDLE_PARALLEL_(StartObject, ()); + return valid_ = outputHandler_.StartObject(); + } + + bool Key(const Ch* str, SizeType len, bool copy) { + if (!valid_) return false; + AppendToken(str, len); + if (!CurrentSchema().Key(CurrentContext(), str, len, copy)) return valid_ = false; + RAPIDJSON_SCHEMA_HANDLE_PARALLEL_(Key, (str, len, copy)); + return valid_ = outputHandler_.Key(str, len, copy); + } + + bool EndObject(SizeType memberCount) { + if (!valid_) return false; + RAPIDJSON_SCHEMA_HANDLE_PARALLEL_(EndObject, (memberCount)); + if (!CurrentSchema().EndObject(CurrentContext(), memberCount)) return valid_ = false; + RAPIDJSON_SCHEMA_HANDLE_END_(EndObject, (memberCount)); + } + + bool StartArray() { + RAPIDJSON_SCHEMA_HANDLE_BEGIN_(StartArray, (CurrentContext())); + RAPIDJSON_SCHEMA_HANDLE_PARALLEL_(StartArray, ()); + return valid_ = outputHandler_.StartArray(); + } + + bool EndArray(SizeType elementCount) { + if (!valid_) return false; + RAPIDJSON_SCHEMA_HANDLE_PARALLEL_(EndArray, (elementCount)); + if (!CurrentSchema().EndArray(CurrentContext(), elementCount)) return valid_ = false; + RAPIDJSON_SCHEMA_HANDLE_END_(EndArray, (elementCount)); + } + +#undef RAPIDJSON_SCHEMA_HANDLE_BEGIN_VERBOSE_ +#undef RAPIDJSON_SCHEMA_HANDLE_BEGIN_ +#undef RAPIDJSON_SCHEMA_HANDLE_PARALLEL_ +#undef RAPIDJSON_SCHEMA_HANDLE_VALUE_ + + // Implementation of ISchemaStateFactory<SchemaType> + virtual ISchemaValidator* CreateSchemaValidator(const SchemaType& root) { + return new (GetStateAllocator().Malloc(sizeof(GenericSchemaValidator))) GenericSchemaValidator(*schemaDocument_, root, +#if RAPIDJSON_SCHEMA_VERBOSE + depth_ + 1, +#endif + &GetStateAllocator()); + } + + virtual void DestroySchemaValidator(ISchemaValidator* validator) { + GenericSchemaValidator* v = static_cast<GenericSchemaValidator*>(validator); + v->~GenericSchemaValidator(); + StateAllocator::Free(v); + } + + virtual void* CreateHasher() { + return new (GetStateAllocator().Malloc(sizeof(HasherType))) HasherType(&GetStateAllocator()); + } + + virtual uint64_t GetHashCode(void* hasher) { + return static_cast<HasherType*>(hasher)->GetHashCode(); + } + + virtual void DestroryHasher(void* hasher) { + HasherType* h = static_cast<HasherType*>(hasher); + h->~HasherType(); + StateAllocator::Free(h); + } + + virtual void* MallocState(size_t size) { + return GetStateAllocator().Malloc(size); + } + + virtual void FreeState(void* p) { + return StateAllocator::Free(p); + } + +private: + typedef typename SchemaType::Context Context; + typedef GenericValue<UTF8<>, StateAllocator> HashCodeArray; + typedef internal::Hasher<EncodingType, StateAllocator> HasherType; + + GenericSchemaValidator( + const SchemaDocumentType& schemaDocument, + const SchemaType& root, +#if RAPIDJSON_SCHEMA_VERBOSE + unsigned depth, +#endif + StateAllocator* allocator = 0, + size_t schemaStackCapacity = kDefaultSchemaStackCapacity, + size_t documentStackCapacity = kDefaultDocumentStackCapacity) + : + schemaDocument_(&schemaDocument), + root_(root), + stateAllocator_(allocator), + ownStateAllocator_(0), + schemaStack_(allocator, schemaStackCapacity), + documentStack_(allocator, documentStackCapacity), + outputHandler_(CreateNullHandler()), + valid_(true) +#if RAPIDJSON_SCHEMA_VERBOSE + , depth_(depth) +#endif + { + } + + StateAllocator& GetStateAllocator() { + if (!stateAllocator_) + stateAllocator_ = ownStateAllocator_ = RAPIDJSON_NEW(StateAllocator()); + return *stateAllocator_; + } + + bool BeginValue() { + if (schemaStack_.Empty()) + PushSchema(root_); + else { + if (CurrentContext().inArray) + internal::TokenHelper<internal::Stack<StateAllocator>, Ch>::AppendIndexToken(documentStack_, CurrentContext().arrayElementIndex); + + if (!CurrentSchema().BeginValue(CurrentContext())) + return false; + + SizeType count = CurrentContext().patternPropertiesSchemaCount; + const SchemaType** sa = CurrentContext().patternPropertiesSchemas; + typename Context::PatternValidatorType patternValidatorType = CurrentContext().valuePatternValidatorType; + bool valueUniqueness = CurrentContext().valueUniqueness; + RAPIDJSON_ASSERT(CurrentContext().valueSchema); + PushSchema(*CurrentContext().valueSchema); + + if (count > 0) { + CurrentContext().objectPatternValidatorType = patternValidatorType; + ISchemaValidator**& va = CurrentContext().patternPropertiesValidators; + SizeType& validatorCount = CurrentContext().patternPropertiesValidatorCount; + va = static_cast<ISchemaValidator**>(MallocState(sizeof(ISchemaValidator*) * count)); + for (SizeType i = 0; i < count; i++) + va[validatorCount++] = CreateSchemaValidator(*sa[i]); + } + + CurrentContext().arrayUniqueness = valueUniqueness; + } + return true; + } + + bool EndValue() { + if (!CurrentSchema().EndValue(CurrentContext())) + return false; + +#if RAPIDJSON_SCHEMA_VERBOSE + GenericStringBuffer<EncodingType> sb; + schemaDocument_->GetPointer(&CurrentSchema()).Stringify(sb); + + *documentStack_.template Push<Ch>() = '\0'; + documentStack_.template Pop<Ch>(1); + internal::PrintValidatorPointers(depth_, sb.GetString(), documentStack_.template Bottom<Ch>()); +#endif + + uint64_t h = CurrentContext().arrayUniqueness ? static_cast<HasherType*>(CurrentContext().hasher)->GetHashCode() : 0; + + PopSchema(); + + if (!schemaStack_.Empty()) { + Context& context = CurrentContext(); + if (context.valueUniqueness) { + HashCodeArray* a = static_cast<HashCodeArray*>(context.arrayElementHashCodes); + if (!a) + CurrentContext().arrayElementHashCodes = a = new (GetStateAllocator().Malloc(sizeof(HashCodeArray))) HashCodeArray(kArrayType); + for (typename HashCodeArray::ConstValueIterator itr = a->Begin(); itr != a->End(); ++itr) + if (itr->GetUint64() == h) + RAPIDJSON_INVALID_KEYWORD_RETURN(SchemaType::GetUniqueItemsString()); + a->PushBack(h, GetStateAllocator()); + } + } + + // Remove the last token of document pointer + while (!documentStack_.Empty() && *documentStack_.template Pop<Ch>(1) != '/') + ; + + return true; + } + + void AppendToken(const Ch* str, SizeType len) { + documentStack_.template Reserve<Ch>(1 + len * 2); // worst case all characters are escaped as two characters + *documentStack_.template PushUnsafe<Ch>() = '/'; + for (SizeType i = 0; i < len; i++) { + if (str[i] == '~') { + *documentStack_.template PushUnsafe<Ch>() = '~'; + *documentStack_.template PushUnsafe<Ch>() = '0'; + } + else if (str[i] == '/') { + *documentStack_.template PushUnsafe<Ch>() = '~'; + *documentStack_.template PushUnsafe<Ch>() = '1'; + } + else + *documentStack_.template PushUnsafe<Ch>() = str[i]; + } + } + + RAPIDJSON_FORCEINLINE void PushSchema(const SchemaType& schema) { new (schemaStack_.template Push<Context>()) Context(*this, &schema); } + + RAPIDJSON_FORCEINLINE void PopSchema() { + Context* c = schemaStack_.template Pop<Context>(1); + if (HashCodeArray* a = static_cast<HashCodeArray*>(c->arrayElementHashCodes)) { + a->~HashCodeArray(); + StateAllocator::Free(a); + } + c->~Context(); + } + + const SchemaType& CurrentSchema() const { return *schemaStack_.template Top<Context>()->schema; } + Context& CurrentContext() { return *schemaStack_.template Top<Context>(); } + const Context& CurrentContext() const { return *schemaStack_.template Top<Context>(); } + + OutputHandler& CreateNullHandler() { + return *(nullHandler_ = static_cast<OutputHandler*>(GetStateAllocator().Malloc(sizeof(OutputHandler)))); + } + + static const size_t kDefaultSchemaStackCapacity = 1024; + static const size_t kDefaultDocumentStackCapacity = 256; + const SchemaDocumentType* schemaDocument_; + const SchemaType& root_; + StateAllocator* stateAllocator_; + StateAllocator* ownStateAllocator_; + internal::Stack<StateAllocator> schemaStack_; //!< stack to store the current path of schema (BaseSchemaType *) + internal::Stack<StateAllocator> documentStack_; //!< stack to store the current path of validating document (Ch) + OutputHandler& outputHandler_; + OutputHandler* nullHandler_; + bool valid_; +#if RAPIDJSON_SCHEMA_VERBOSE + unsigned depth_; +#endif +}; + +typedef GenericSchemaValidator<SchemaDocument> SchemaValidator; + +/////////////////////////////////////////////////////////////////////////////// +// SchemaValidatingReader + +//! A helper class for parsing with validation. +/*! + This helper class is a functor, designed as a parameter of \ref GenericDocument::Populate(). + + \tparam parseFlags Combination of \ref ParseFlag. + \tparam InputStream Type of input stream, implementing Stream concept. + \tparam SourceEncoding Encoding of the input stream. + \tparam SchemaDocumentType Type of schema document. + \tparam StackAllocator Allocator type for stack. +*/ +template < + unsigned parseFlags, + typename InputStream, + typename SourceEncoding, + typename SchemaDocumentType = SchemaDocument, + typename StackAllocator = CrtAllocator> +class SchemaValidatingReader { +public: + typedef typename SchemaDocumentType::PointerType PointerType; + typedef typename InputStream::Ch Ch; + + //! Constructor + /*! + \param is Input stream. + \param sd Schema document. + */ + SchemaValidatingReader(InputStream& is, const SchemaDocumentType& sd) : is_(is), sd_(sd), invalidSchemaKeyword_(), isValid_(true) {} + + template <typename Handler> + bool operator()(Handler& handler) { + GenericReader<SourceEncoding, typename SchemaDocumentType::EncodingType, StackAllocator> reader; + GenericSchemaValidator<SchemaDocumentType, Handler> validator(sd_, handler); + parseResult_ = reader.template Parse<parseFlags>(is_, validator); + + isValid_ = validator.IsValid(); + if (isValid_) { + invalidSchemaPointer_ = PointerType(); + invalidSchemaKeyword_ = 0; + invalidDocumentPointer_ = PointerType(); + } + else { + invalidSchemaPointer_ = validator.GetInvalidSchemaPointer(); + invalidSchemaKeyword_ = validator.GetInvalidSchemaKeyword(); + invalidDocumentPointer_ = validator.GetInvalidDocumentPointer(); + } + + return parseResult_; + } + + const ParseResult& GetParseResult() const { return parseResult_; } + bool IsValid() const { return isValid_; } + const PointerType& GetInvalidSchemaPointer() const { return invalidSchemaPointer_; } + const Ch* GetInvalidSchemaKeyword() const { return invalidSchemaKeyword_; } + const PointerType& GetInvalidDocumentPointer() const { return invalidDocumentPointer_; } + +private: + InputStream& is_; + const SchemaDocumentType& sd_; + + ParseResult parseResult_; + PointerType invalidSchemaPointer_; + const Ch* invalidSchemaKeyword_; + PointerType invalidDocumentPointer_; + bool isValid_; +}; + +RAPIDJSON_NAMESPACE_END +RAPIDJSON_DIAG_POP + +#endif // RAPIDJSON_SCHEMA_H_ diff --git a/xmrstak/rapidjson/stream.h b/xmrstak/rapidjson/stream.h new file mode 100644 index 0000000..fef82c2 --- /dev/null +++ b/xmrstak/rapidjson/stream.h @@ -0,0 +1,179 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#include "rapidjson.h" + +#ifndef RAPIDJSON_STREAM_H_ +#define RAPIDJSON_STREAM_H_ + +#include "encodings.h" + +RAPIDJSON_NAMESPACE_BEGIN + +/////////////////////////////////////////////////////////////////////////////// +// Stream + +/*! \class rapidjson::Stream + \brief Concept for reading and writing characters. + + For read-only stream, no need to implement PutBegin(), Put(), Flush() and PutEnd(). + + For write-only stream, only need to implement Put() and Flush(). + +\code +concept Stream { + typename Ch; //!< Character type of the stream. + + //! Read the current character from stream without moving the read cursor. + Ch Peek() const; + + //! Read the current character from stream and moving the read cursor to next character. + Ch Take(); + + //! Get the current read cursor. + //! \return Number of characters read from start. + size_t Tell(); + + //! Begin writing operation at the current read pointer. + //! \return The begin writer pointer. + Ch* PutBegin(); + + //! Write a character. + void Put(Ch c); + + //! Flush the buffer. + void Flush(); + + //! End the writing operation. + //! \param begin The begin write pointer returned by PutBegin(). + //! \return Number of characters written. + size_t PutEnd(Ch* begin); +} +\endcode +*/ + +//! Provides additional information for stream. +/*! + By using traits pattern, this type provides a default configuration for stream. + For custom stream, this type can be specialized for other configuration. + See TEST(Reader, CustomStringStream) in readertest.cpp for example. +*/ +template<typename Stream> +struct StreamTraits { + //! Whether to make local copy of stream for optimization during parsing. + /*! + By default, for safety, streams do not use local copy optimization. + Stream that can be copied fast should specialize this, like StreamTraits<StringStream>. + */ + enum { copyOptimization = 0 }; +}; + +//! Reserve n characters for writing to a stream. +template<typename Stream> +inline void PutReserve(Stream& stream, size_t count) { + (void)stream; + (void)count; +} + +//! Write character to a stream, presuming buffer is reserved. +template<typename Stream> +inline void PutUnsafe(Stream& stream, typename Stream::Ch c) { + stream.Put(c); +} + +//! Put N copies of a character to a stream. +template<typename Stream, typename Ch> +inline void PutN(Stream& stream, Ch c, size_t n) { + PutReserve(stream, n); + for (size_t i = 0; i < n; i++) + PutUnsafe(stream, c); +} + +/////////////////////////////////////////////////////////////////////////////// +// StringStream + +//! Read-only string stream. +/*! \note implements Stream concept +*/ +template <typename Encoding> +struct GenericStringStream { + typedef typename Encoding::Ch Ch; + + GenericStringStream(const Ch *src) : src_(src), head_(src) {} + + Ch Peek() const { return *src_; } + Ch Take() { return *src_++; } + size_t Tell() const { return static_cast<size_t>(src_ - head_); } + + Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; } + void Put(Ch) { RAPIDJSON_ASSERT(false); } + void Flush() { RAPIDJSON_ASSERT(false); } + size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; } + + const Ch* src_; //!< Current read position. + const Ch* head_; //!< Original head of the string. +}; + +template <typename Encoding> +struct StreamTraits<GenericStringStream<Encoding> > { + enum { copyOptimization = 1 }; +}; + +//! String stream with UTF8 encoding. +typedef GenericStringStream<UTF8<> > StringStream; + +/////////////////////////////////////////////////////////////////////////////// +// InsituStringStream + +//! A read-write string stream. +/*! This string stream is particularly designed for in-situ parsing. + \note implements Stream concept +*/ +template <typename Encoding> +struct GenericInsituStringStream { + typedef typename Encoding::Ch Ch; + + GenericInsituStringStream(Ch *src) : src_(src), dst_(0), head_(src) {} + + // Read + Ch Peek() { return *src_; } + Ch Take() { return *src_++; } + size_t Tell() { return static_cast<size_t>(src_ - head_); } + + // Write + void Put(Ch c) { RAPIDJSON_ASSERT(dst_ != 0); *dst_++ = c; } + + Ch* PutBegin() { return dst_ = src_; } + size_t PutEnd(Ch* begin) { return static_cast<size_t>(dst_ - begin); } + void Flush() {} + + Ch* Push(size_t count) { Ch* begin = dst_; dst_ += count; return begin; } + void Pop(size_t count) { dst_ -= count; } + + Ch* src_; + Ch* dst_; + Ch* head_; +}; + +template <typename Encoding> +struct StreamTraits<GenericInsituStringStream<Encoding> > { + enum { copyOptimization = 1 }; +}; + +//! Insitu string stream with UTF8 encoding. +typedef GenericInsituStringStream<UTF8<> > InsituStringStream; + +RAPIDJSON_NAMESPACE_END + +#endif // RAPIDJSON_STREAM_H_ diff --git a/xmrstak/rapidjson/stringbuffer.h b/xmrstak/rapidjson/stringbuffer.h new file mode 100644 index 0000000..4e38b82 --- /dev/null +++ b/xmrstak/rapidjson/stringbuffer.h @@ -0,0 +1,121 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_STRINGBUFFER_H_ +#define RAPIDJSON_STRINGBUFFER_H_ + +#include "stream.h" +#include "internal/stack.h" + +#if RAPIDJSON_HAS_CXX11_RVALUE_REFS +#include <utility> // std::move +#endif + +#include "internal/stack.h" + +#if defined(__clang__) +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(c++98-compat) +#endif + +RAPIDJSON_NAMESPACE_BEGIN + +//! Represents an in-memory output stream. +/*! + \tparam Encoding Encoding of the stream. + \tparam Allocator type for allocating memory buffer. + \note implements Stream concept +*/ +template <typename Encoding, typename Allocator = CrtAllocator> +class GenericStringBuffer { +public: + typedef typename Encoding::Ch Ch; + + GenericStringBuffer(Allocator* allocator = 0, size_t capacity = kDefaultCapacity) : stack_(allocator, capacity) {} + +#if RAPIDJSON_HAS_CXX11_RVALUE_REFS + GenericStringBuffer(GenericStringBuffer&& rhs) : stack_(std::move(rhs.stack_)) {} + GenericStringBuffer& operator=(GenericStringBuffer&& rhs) { + if (&rhs != this) + stack_ = std::move(rhs.stack_); + return *this; + } +#endif + + void Put(Ch c) { *stack_.template Push<Ch>() = c; } + void PutUnsafe(Ch c) { *stack_.template PushUnsafe<Ch>() = c; } + void Flush() {} + + void Clear() { stack_.Clear(); } + void ShrinkToFit() { + // Push and pop a null terminator. This is safe. + *stack_.template Push<Ch>() = '\0'; + stack_.ShrinkToFit(); + stack_.template Pop<Ch>(1); + } + + void Reserve(size_t count) { stack_.template Reserve<Ch>(count); } + Ch* Push(size_t count) { return stack_.template Push<Ch>(count); } + Ch* PushUnsafe(size_t count) { return stack_.template PushUnsafe<Ch>(count); } + void Pop(size_t count) { stack_.template Pop<Ch>(count); } + + const Ch* GetString() const { + // Push and pop a null terminator. This is safe. + *stack_.template Push<Ch>() = '\0'; + stack_.template Pop<Ch>(1); + + return stack_.template Bottom<Ch>(); + } + + //! Get the size of string in bytes in the string buffer. + size_t GetSize() const { return stack_.GetSize(); } + + //! Get the length of string in Ch in the string buffer. + size_t GetLength() const { return stack_.GetSize() / sizeof(Ch); } + + static const size_t kDefaultCapacity = 256; + mutable internal::Stack<Allocator> stack_; + +private: + // Prohibit copy constructor & assignment operator. + GenericStringBuffer(const GenericStringBuffer&); + GenericStringBuffer& operator=(const GenericStringBuffer&); +}; + +//! String buffer with UTF8 encoding +typedef GenericStringBuffer<UTF8<> > StringBuffer; + +template<typename Encoding, typename Allocator> +inline void PutReserve(GenericStringBuffer<Encoding, Allocator>& stream, size_t count) { + stream.Reserve(count); +} + +template<typename Encoding, typename Allocator> +inline void PutUnsafe(GenericStringBuffer<Encoding, Allocator>& stream, typename Encoding::Ch c) { + stream.PutUnsafe(c); +} + +//! Implement specialized version of PutN() with memset() for better performance. +template<> +inline void PutN(GenericStringBuffer<UTF8<> >& stream, char c, size_t n) { + std::memset(stream.stack_.Push<char>(n), c, n * sizeof(c)); +} + +RAPIDJSON_NAMESPACE_END + +#if defined(__clang__) +RAPIDJSON_DIAG_POP +#endif + +#endif // RAPIDJSON_STRINGBUFFER_H_ diff --git a/xmrstak/rapidjson/writer.h b/xmrstak/rapidjson/writer.h new file mode 100644 index 0000000..8f6e174 --- /dev/null +++ b/xmrstak/rapidjson/writer.h @@ -0,0 +1,624 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_WRITER_H_ +#define RAPIDJSON_WRITER_H_ + +#include "stream.h" +#include "internal/stack.h" +#include "internal/strfunc.h" +#include "internal/dtoa.h" +#include "internal/itoa.h" +#include "stringbuffer.h" +#include <new> // placement new + +#if defined(RAPIDJSON_SIMD) && defined(_MSC_VER) +#include <intrin.h> +#pragma intrinsic(_BitScanForward) +#endif +#ifdef RAPIDJSON_SSE42 +#include <nmmintrin.h> +#elif defined(RAPIDJSON_SSE2) +#include <emmintrin.h> +#endif + +#ifdef _MSC_VER +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(4127) // conditional expression is constant +#endif + +#ifdef __clang__ +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(padded) +RAPIDJSON_DIAG_OFF(unreachable-code) +RAPIDJSON_DIAG_OFF(c++98-compat) +#endif + +RAPIDJSON_NAMESPACE_BEGIN + +/////////////////////////////////////////////////////////////////////////////// +// WriteFlag + +/*! \def RAPIDJSON_WRITE_DEFAULT_FLAGS + \ingroup RAPIDJSON_CONFIG + \brief User-defined kWriteDefaultFlags definition. + + User can define this as any \c WriteFlag combinations. +*/ +#ifndef RAPIDJSON_WRITE_DEFAULT_FLAGS +#define RAPIDJSON_WRITE_DEFAULT_FLAGS kWriteNoFlags +#endif + +//! Combination of writeFlags +enum WriteFlag { + kWriteNoFlags = 0, //!< No flags are set. + kWriteValidateEncodingFlag = 1, //!< Validate encoding of JSON strings. + kWriteNanAndInfFlag = 2, //!< Allow writing of Infinity, -Infinity and NaN. + kWriteDefaultFlags = RAPIDJSON_WRITE_DEFAULT_FLAGS //!< Default write flags. Can be customized by defining RAPIDJSON_WRITE_DEFAULT_FLAGS +}; + +//! JSON writer +/*! Writer implements the concept Handler. + It generates JSON text by events to an output os. + + User may programmatically calls the functions of a writer to generate JSON text. + + On the other side, a writer can also be passed to objects that generates events, + + for example Reader::Parse() and Document::Accept(). + + \tparam OutputStream Type of output stream. + \tparam SourceEncoding Encoding of source string. + \tparam TargetEncoding Encoding of output stream. + \tparam StackAllocator Type of allocator for allocating memory of stack. + \note implements Handler concept +*/ +template<typename OutputStream, typename SourceEncoding = UTF8<>, typename TargetEncoding = UTF8<>, typename StackAllocator = CrtAllocator, unsigned writeFlags = kWriteDefaultFlags> +class Writer { +public: + typedef typename SourceEncoding::Ch Ch; + + static const int kDefaultMaxDecimalPlaces = 324; + + //! Constructor + /*! \param os Output stream. + \param stackAllocator User supplied allocator. If it is null, it will create a private one. + \param levelDepth Initial capacity of stack. + */ + explicit + Writer(OutputStream& os, StackAllocator* stackAllocator = 0, size_t levelDepth = kDefaultLevelDepth) : + os_(&os), level_stack_(stackAllocator, levelDepth * sizeof(Level)), maxDecimalPlaces_(kDefaultMaxDecimalPlaces), hasRoot_(false) {} + + explicit + Writer(StackAllocator* allocator = 0, size_t levelDepth = kDefaultLevelDepth) : + os_(0), level_stack_(allocator, levelDepth * sizeof(Level)), maxDecimalPlaces_(kDefaultMaxDecimalPlaces), hasRoot_(false) {} + +#if RAPIDJSON_HAS_CXX11_RVALUE_REFS + Writer(Writer&& rhs) : + os_(rhs.os_), level_stack_(std::move(rhs.level_stack_)), maxDecimalPlaces_(rhs.maxDecimalPlaces_), hasRoot_(rhs.hasRoot_) { + rhs.os_ = 0; + } +#endif + + //! Reset the writer with a new stream. + /*! + This function reset the writer with a new stream and default settings, + in order to make a Writer object reusable for output multiple JSONs. + + \param os New output stream. + \code + Writer<OutputStream> writer(os1); + writer.StartObject(); + // ... + writer.EndObject(); + + writer.Reset(os2); + writer.StartObject(); + // ... + writer.EndObject(); + \endcode + */ + void Reset(OutputStream& os) { + os_ = &os; + hasRoot_ = false; + level_stack_.Clear(); + } + + //! Checks whether the output is a complete JSON. + /*! + A complete JSON has a complete root object or array. + */ + bool IsComplete() const { + return hasRoot_ && level_stack_.Empty(); + } + + int GetMaxDecimalPlaces() const { + return maxDecimalPlaces_; + } + + //! Sets the maximum number of decimal places for double output. + /*! + This setting truncates the output with specified number of decimal places. + + For example, + + \code + writer.SetMaxDecimalPlaces(3); + writer.StartArray(); + writer.Double(0.12345); // "0.123" + writer.Double(0.0001); // "0.0" + writer.Double(1.234567890123456e30); // "1.234567890123456e30" (do not truncate significand for positive exponent) + writer.Double(1.23e-4); // "0.0" (do truncate significand for negative exponent) + writer.EndArray(); + \endcode + + The default setting does not truncate any decimal places. You can restore to this setting by calling + \code + writer.SetMaxDecimalPlaces(Writer::kDefaultMaxDecimalPlaces); + \endcode + */ + void SetMaxDecimalPlaces(int maxDecimalPlaces) { + maxDecimalPlaces_ = maxDecimalPlaces; + } + + /*!@name Implementation of Handler + \see Handler + */ + //@{ + + bool Null() { Prefix(kNullType); return EndValue(WriteNull()); } + bool Bool(bool b) { Prefix(b ? kTrueType : kFalseType); return EndValue(WriteBool(b)); } + bool Int(int i) { Prefix(kNumberType); return EndValue(WriteInt(i)); } + bool Uint(unsigned u) { Prefix(kNumberType); return EndValue(WriteUint(u)); } + bool Int64(int64_t i64) { Prefix(kNumberType); return EndValue(WriteInt64(i64)); } + bool Uint64(uint64_t u64) { Prefix(kNumberType); return EndValue(WriteUint64(u64)); } + + //! Writes the given \c double value to the stream + /*! + \param d The value to be written. + \return Whether it is succeed. + */ + bool Double(double d) { Prefix(kNumberType); return EndValue(WriteDouble(d)); } + + bool RawNumber(const Ch* str, SizeType length, bool copy = false) { + RAPIDJSON_ASSERT(str != 0); + (void)copy; + Prefix(kNumberType); + return EndValue(WriteString(str, length)); + } + + bool String(const Ch* str, SizeType length, bool copy = false) { + RAPIDJSON_ASSERT(str != 0); + (void)copy; + Prefix(kStringType); + return EndValue(WriteString(str, length)); + } + +#if RAPIDJSON_HAS_STDSTRING + bool String(const std::basic_string<Ch>& str) { + return String(str.data(), SizeType(str.size())); + } +#endif + + bool StartObject() { + Prefix(kObjectType); + new (level_stack_.template Push<Level>()) Level(false); + return WriteStartObject(); + } + + bool Key(const Ch* str, SizeType length, bool copy = false) { return String(str, length, copy); } + + bool EndObject(SizeType memberCount = 0) { + (void)memberCount; + RAPIDJSON_ASSERT(level_stack_.GetSize() >= sizeof(Level)); + RAPIDJSON_ASSERT(!level_stack_.template Top<Level>()->inArray); + level_stack_.template Pop<Level>(1); + return EndValue(WriteEndObject()); + } + + bool StartArray() { + Prefix(kArrayType); + new (level_stack_.template Push<Level>()) Level(true); + return WriteStartArray(); + } + + bool EndArray(SizeType elementCount = 0) { + (void)elementCount; + RAPIDJSON_ASSERT(level_stack_.GetSize() >= sizeof(Level)); + RAPIDJSON_ASSERT(level_stack_.template Top<Level>()->inArray); + level_stack_.template Pop<Level>(1); + return EndValue(WriteEndArray()); + } + //@} + + /*! @name Convenience extensions */ + //@{ + + //! Simpler but slower overload. + bool String(const Ch* str) { return String(str, internal::StrLen(str)); } + bool Key(const Ch* str) { return Key(str, internal::StrLen(str)); } + + //@} + + //! Write a raw JSON value. + /*! + For user to write a stringified JSON as a value. + + \param json A well-formed JSON value. It should not contain null character within [0, length - 1] range. + \param length Length of the json. + \param type Type of the root of json. + */ + bool RawValue(const Ch* json, size_t length, Type type) { + RAPIDJSON_ASSERT(json != 0); + Prefix(type); + return EndValue(WriteRawValue(json, length)); + } + +protected: + //! Information for each nested level + struct Level { + Level(bool inArray_) : valueCount(0), inArray(inArray_) {} + size_t valueCount; //!< number of values in this level + bool inArray; //!< true if in array, otherwise in object + }; + + static const size_t kDefaultLevelDepth = 32; + + bool WriteNull() { + PutReserve(*os_, 4); + PutUnsafe(*os_, 'n'); PutUnsafe(*os_, 'u'); PutUnsafe(*os_, 'l'); PutUnsafe(*os_, 'l'); return true; + } + + bool WriteBool(bool b) { + if (b) { + PutReserve(*os_, 4); + PutUnsafe(*os_, 't'); PutUnsafe(*os_, 'r'); PutUnsafe(*os_, 'u'); PutUnsafe(*os_, 'e'); + } + else { + PutReserve(*os_, 5); + PutUnsafe(*os_, 'f'); PutUnsafe(*os_, 'a'); PutUnsafe(*os_, 'l'); PutUnsafe(*os_, 's'); PutUnsafe(*os_, 'e'); + } + return true; + } + + bool WriteInt(int i) { + char buffer[11]; + const char* end = internal::i32toa(i, buffer); + PutReserve(*os_, static_cast<size_t>(end - buffer)); + for (const char* p = buffer; p != end; ++p) + PutUnsafe(*os_, static_cast<typename TargetEncoding::Ch>(*p)); + return true; + } + + bool WriteUint(unsigned u) { + char buffer[10]; + const char* end = internal::u32toa(u, buffer); + PutReserve(*os_, static_cast<size_t>(end - buffer)); + for (const char* p = buffer; p != end; ++p) + PutUnsafe(*os_, static_cast<typename TargetEncoding::Ch>(*p)); + return true; + } + + bool WriteInt64(int64_t i64) { + char buffer[21]; + const char* end = internal::i64toa(i64, buffer); + PutReserve(*os_, static_cast<size_t>(end - buffer)); + for (const char* p = buffer; p != end; ++p) + PutUnsafe(*os_, static_cast<typename TargetEncoding::Ch>(*p)); + return true; + } + + bool WriteUint64(uint64_t u64) { + char buffer[20]; + char* end = internal::u64toa(u64, buffer); + PutReserve(*os_, static_cast<size_t>(end - buffer)); + for (char* p = buffer; p != end; ++p) + PutUnsafe(*os_, static_cast<typename TargetEncoding::Ch>(*p)); + return true; + } + + bool WriteDouble(double d) { + if (internal::Double(d).IsNanOrInf()) { + if (!(writeFlags & kWriteNanAndInfFlag)) + return false; + if (internal::Double(d).IsNan()) { + PutReserve(*os_, 3); + PutUnsafe(*os_, 'N'); PutUnsafe(*os_, 'a'); PutUnsafe(*os_, 'N'); + return true; + } + if (internal::Double(d).Sign()) { + PutReserve(*os_, 9); + PutUnsafe(*os_, '-'); + } + else + PutReserve(*os_, 8); + PutUnsafe(*os_, 'I'); PutUnsafe(*os_, 'n'); PutUnsafe(*os_, 'f'); + PutUnsafe(*os_, 'i'); PutUnsafe(*os_, 'n'); PutUnsafe(*os_, 'i'); PutUnsafe(*os_, 't'); PutUnsafe(*os_, 'y'); + return true; + } + + char buffer[25]; + char* end = internal::dtoa(d, buffer, maxDecimalPlaces_); + PutReserve(*os_, static_cast<size_t>(end - buffer)); + for (char* p = buffer; p != end; ++p) + PutUnsafe(*os_, static_cast<typename TargetEncoding::Ch>(*p)); + return true; + } + + bool WriteString(const Ch* str, SizeType length) { + static const typename TargetEncoding::Ch hexDigits[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' }; + static const char escape[256] = { +#define Z16 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 + //0 1 2 3 4 5 6 7 8 9 A B C D E F + 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'b', 't', 'n', 'u', 'f', 'r', 'u', 'u', // 00 + 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', // 10 + 0, 0, '"', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 20 + Z16, Z16, // 30~4F + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,'\\', 0, 0, 0, // 50 + Z16, Z16, Z16, Z16, Z16, Z16, Z16, Z16, Z16, Z16 // 60~FF +#undef Z16 + }; + + if (TargetEncoding::supportUnicode) + PutReserve(*os_, 2 + length * 6); // "\uxxxx..." + else + PutReserve(*os_, 2 + length * 12); // "\uxxxx\uyyyy..." + + PutUnsafe(*os_, '\"'); + GenericStringStream<SourceEncoding> is(str); + while (ScanWriteUnescapedString(is, length)) { + const Ch c = is.Peek(); + if (!TargetEncoding::supportUnicode && static_cast<unsigned>(c) >= 0x80) { + // Unicode escaping + unsigned codepoint; + if (RAPIDJSON_UNLIKELY(!SourceEncoding::Decode(is, &codepoint))) + return false; + PutUnsafe(*os_, '\\'); + PutUnsafe(*os_, 'u'); + if (codepoint <= 0xD7FF || (codepoint >= 0xE000 && codepoint <= 0xFFFF)) { + PutUnsafe(*os_, hexDigits[(codepoint >> 12) & 15]); + PutUnsafe(*os_, hexDigits[(codepoint >> 8) & 15]); + PutUnsafe(*os_, hexDigits[(codepoint >> 4) & 15]); + PutUnsafe(*os_, hexDigits[(codepoint ) & 15]); + } + else { + RAPIDJSON_ASSERT(codepoint >= 0x010000 && codepoint <= 0x10FFFF); + // Surrogate pair + unsigned s = codepoint - 0x010000; + unsigned lead = (s >> 10) + 0xD800; + unsigned trail = (s & 0x3FF) + 0xDC00; + PutUnsafe(*os_, hexDigits[(lead >> 12) & 15]); + PutUnsafe(*os_, hexDigits[(lead >> 8) & 15]); + PutUnsafe(*os_, hexDigits[(lead >> 4) & 15]); + PutUnsafe(*os_, hexDigits[(lead ) & 15]); + PutUnsafe(*os_, '\\'); + PutUnsafe(*os_, 'u'); + PutUnsafe(*os_, hexDigits[(trail >> 12) & 15]); + PutUnsafe(*os_, hexDigits[(trail >> 8) & 15]); + PutUnsafe(*os_, hexDigits[(trail >> 4) & 15]); + PutUnsafe(*os_, hexDigits[(trail ) & 15]); + } + } + else if ((sizeof(Ch) == 1 || static_cast<unsigned>(c) < 256) && RAPIDJSON_UNLIKELY(escape[static_cast<unsigned char>(c)])) { + is.Take(); + PutUnsafe(*os_, '\\'); + PutUnsafe(*os_, static_cast<typename TargetEncoding::Ch>(escape[static_cast<unsigned char>(c)])); + if (escape[static_cast<unsigned char>(c)] == 'u') { + PutUnsafe(*os_, '0'); + PutUnsafe(*os_, '0'); + PutUnsafe(*os_, hexDigits[static_cast<unsigned char>(c) >> 4]); + PutUnsafe(*os_, hexDigits[static_cast<unsigned char>(c) & 0xF]); + } + } + else if (RAPIDJSON_UNLIKELY(!(writeFlags & kWriteValidateEncodingFlag ? + Transcoder<SourceEncoding, TargetEncoding>::Validate(is, *os_) : + Transcoder<SourceEncoding, TargetEncoding>::TranscodeUnsafe(is, *os_)))) + return false; + } + PutUnsafe(*os_, '\"'); + return true; + } + + bool ScanWriteUnescapedString(GenericStringStream<SourceEncoding>& is, size_t length) { + return RAPIDJSON_LIKELY(is.Tell() < length); + } + + bool WriteStartObject() { os_->Put('{'); return true; } + bool WriteEndObject() { os_->Put('}'); return true; } + bool WriteStartArray() { os_->Put('['); return true; } + bool WriteEndArray() { os_->Put(']'); return true; } + + bool WriteRawValue(const Ch* json, size_t length) { + PutReserve(*os_, length); + for (size_t i = 0; i < length; i++) { + RAPIDJSON_ASSERT(json[i] != '\0'); + PutUnsafe(*os_, json[i]); + } + return true; + } + + void Prefix(Type type) { + (void)type; + if (RAPIDJSON_LIKELY(level_stack_.GetSize() != 0)) { // this value is not at root + Level* level = level_stack_.template Top<Level>(); + if (level->valueCount > 0) { + if (level->inArray) + os_->Put(','); // add comma if it is not the first element in array + else // in object + os_->Put((level->valueCount % 2 == 0) ? ',' : ':'); + } + if (!level->inArray && level->valueCount % 2 == 0) + RAPIDJSON_ASSERT(type == kStringType); // if it's in object, then even number should be a name + level->valueCount++; + } + else { + RAPIDJSON_ASSERT(!hasRoot_); // Should only has one and only one root. + hasRoot_ = true; + } + } + + // Flush the value if it is the top level one. + bool EndValue(bool ret) { + if (RAPIDJSON_UNLIKELY(level_stack_.Empty())) // end of json text + os_->Flush(); + return ret; + } + + OutputStream* os_; + internal::Stack<StackAllocator> level_stack_; + int maxDecimalPlaces_; + bool hasRoot_; + +private: + // Prohibit copy constructor & assignment operator. + Writer(const Writer&); + Writer& operator=(const Writer&); +}; + +// Full specialization for StringStream to prevent memory copying + +template<> +inline bool Writer<StringBuffer>::WriteInt(int i) { + char *buffer = os_->Push(11); + const char* end = internal::i32toa(i, buffer); + os_->Pop(static_cast<size_t>(11 - (end - buffer))); + return true; +} + +template<> +inline bool Writer<StringBuffer>::WriteUint(unsigned u) { + char *buffer = os_->Push(10); + const char* end = internal::u32toa(u, buffer); + os_->Pop(static_cast<size_t>(10 - (end - buffer))); + return true; +} + +template<> +inline bool Writer<StringBuffer>::WriteInt64(int64_t i64) { + char *buffer = os_->Push(21); + const char* end = internal::i64toa(i64, buffer); + os_->Pop(static_cast<size_t>(21 - (end - buffer))); + return true; +} + +template<> +inline bool Writer<StringBuffer>::WriteUint64(uint64_t u) { + char *buffer = os_->Push(20); + const char* end = internal::u64toa(u, buffer); + os_->Pop(static_cast<size_t>(20 - (end - buffer))); + return true; +} + +template<> +inline bool Writer<StringBuffer>::WriteDouble(double d) { + if (internal::Double(d).IsNanOrInf()) { + // Note: This code path can only be reached if (RAPIDJSON_WRITE_DEFAULT_FLAGS & kWriteNanAndInfFlag). + if (!(kWriteDefaultFlags & kWriteNanAndInfFlag)) + return false; + if (internal::Double(d).IsNan()) { + PutReserve(*os_, 3); + PutUnsafe(*os_, 'N'); PutUnsafe(*os_, 'a'); PutUnsafe(*os_, 'N'); + return true; + } + if (internal::Double(d).Sign()) { + PutReserve(*os_, 9); + PutUnsafe(*os_, '-'); + } + else + PutReserve(*os_, 8); + PutUnsafe(*os_, 'I'); PutUnsafe(*os_, 'n'); PutUnsafe(*os_, 'f'); + PutUnsafe(*os_, 'i'); PutUnsafe(*os_, 'n'); PutUnsafe(*os_, 'i'); PutUnsafe(*os_, 't'); PutUnsafe(*os_, 'y'); + return true; + } + + char *buffer = os_->Push(25); + char* end = internal::dtoa(d, buffer, maxDecimalPlaces_); + os_->Pop(static_cast<size_t>(25 - (end - buffer))); + return true; +} + +#if defined(RAPIDJSON_SSE2) || defined(RAPIDJSON_SSE42) +template<> +inline bool Writer<StringBuffer>::ScanWriteUnescapedString(StringStream& is, size_t length) { + if (length < 16) + return RAPIDJSON_LIKELY(is.Tell() < length); + + if (!RAPIDJSON_LIKELY(is.Tell() < length)) + return false; + + const char* p = is.src_; + const char* end = is.head_ + length; + const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15)); + const char* endAligned = reinterpret_cast<const char*>(reinterpret_cast<size_t>(end) & static_cast<size_t>(~15)); + if (nextAligned > end) + return true; + + while (p != nextAligned) + if (*p < 0x20 || *p == '\"' || *p == '\\') { + is.src_ = p; + return RAPIDJSON_LIKELY(is.Tell() < length); + } + else + os_->PutUnsafe(*p++); + + // The rest of string using SIMD + static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' }; + static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' }; + static const char space[16] = { 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19 }; + const __m128i dq = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&dquote[0])); + const __m128i bs = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&bslash[0])); + const __m128i sp = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&space[0])); + + for (; p != endAligned; p += 16) { + const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p)); + const __m128i t1 = _mm_cmpeq_epi8(s, dq); + const __m128i t2 = _mm_cmpeq_epi8(s, bs); + const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x19) == 0x19 + const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3); + unsigned short r = static_cast<unsigned short>(_mm_movemask_epi8(x)); + if (RAPIDJSON_UNLIKELY(r != 0)) { // some of characters is escaped + SizeType len; +#ifdef _MSC_VER // Find the index of first escaped + unsigned long offset; + _BitScanForward(&offset, r); + len = offset; +#else + len = static_cast<SizeType>(__builtin_ffs(r) - 1); +#endif + char* q = reinterpret_cast<char*>(os_->PushUnsafe(len)); + for (size_t i = 0; i < len; i++) + q[i] = p[i]; + + p += len; + break; + } + _mm_storeu_si128(reinterpret_cast<__m128i *>(os_->PushUnsafe(16)), s); + } + + is.src_ = p; + return RAPIDJSON_LIKELY(is.Tell() < length); +} +#endif // defined(RAPIDJSON_SSE2) || defined(RAPIDJSON_SSE42) + +RAPIDJSON_NAMESPACE_END + +#ifdef _MSC_VER +RAPIDJSON_DIAG_POP +#endif + +#ifdef __clang__ +RAPIDJSON_DIAG_POP +#endif + +#endif // RAPIDJSON_RAPIDJSON_H_ diff --git a/xmrstak/version.hpp b/xmrstak/version.hpp new file mode 100644 index 0000000..44214c8 --- /dev/null +++ b/xmrstak/version.hpp @@ -0,0 +1,4 @@ +#pragma once + +#define XMR_STAK_NAME "xmr-stak" +#define XMR_STAK_VERSION "2.0.0-predev" |