summaryrefslogtreecommitdiffstats
path: root/xmrstak/backend/cpu/autoAdjustHwloc.hpp
diff options
context:
space:
mode:
authorpsychocrypt <psychocrypt@users.noreply.github.com>2017-09-29 20:32:31 +0200
committerpsychocrypt <psychocrypt@users.noreply.github.com>2017-09-30 23:46:08 +0200
commitcc429b68fadc502b981fd0acd64a5ff6e2ae1d15 (patch)
tree3fb23fc4db15dbdd08af4c7ea20134b9d82e58fd /xmrstak/backend/cpu/autoAdjustHwloc.hpp
parente5b0319d5a9f58762fa934ad700113908940cb31 (diff)
downloadxmr-stak-cc429b68fadc502b981fd0acd64a5ff6e2ae1d15.zip
xmr-stak-cc429b68fadc502b981fd0acd64a5ff6e2ae1d15.tar.gz
group files
- move source code to `src` - categorize files and move to group folder - change upper case class files to lower case - change C++ header to `*.hpp`
Diffstat (limited to 'xmrstak/backend/cpu/autoAdjustHwloc.hpp')
-rw-r--r--xmrstak/backend/cpu/autoAdjustHwloc.hpp210
1 files changed, 210 insertions, 0 deletions
diff --git a/xmrstak/backend/cpu/autoAdjustHwloc.hpp b/xmrstak/backend/cpu/autoAdjustHwloc.hpp
new file mode 100644
index 0000000..e1916e0
--- /dev/null
+++ b/xmrstak/backend/cpu/autoAdjustHwloc.hpp
@@ -0,0 +1,210 @@
+#pragma once
+
+#include "../../console.h"
+#include <hwloc.h>
+#include <stdio.h>
+#include "../../Params.hpp"
+
+#ifdef _WIN32
+#include <windows.h>
+#else
+#include <unistd.h>
+#endif // _WIN32
+
+#include <string>
+#include "../../ConfigEditor.hpp"
+
+namespace xmrstak
+{
+namespace cpu
+{
+
+class autoAdjust
+{
+public:
+
+ autoAdjust()
+ {
+ }
+
+ bool printConfig()
+ {
+
+ hwloc_topology_t topology;
+ hwloc_topology_init(&topology);
+ hwloc_topology_load(topology);
+
+ std::string conf;
+ ConfigEditor configTpl{};
+
+ // load the template of the backend config into a char variable
+ const char *tpl =
+ #include "./config.tpl"
+ ;
+ configTpl.set( std::string(tpl) );
+
+ try
+ {
+ std::vector<hwloc_obj_t> tlcs;
+ tlcs.reserve(16);
+ results.reserve(16);
+
+ findChildrenCaches(hwloc_get_root_obj(topology),
+ [&tlcs](hwloc_obj_t found) { tlcs.emplace_back(found); } );
+
+ if(tlcs.size() == 0)
+ throw(std::runtime_error("The CPU doesn't seem to have a cache."));
+
+ for(hwloc_obj_t obj : tlcs)
+ proccessTopLevelCache(obj);
+
+ for(uint32_t id : results)
+ {
+ conf += std::string(" { \"low_power_mode\" : ");
+ conf += std::string((id & 0x8000000) != 0 ? "true" : "false");
+ conf += std::string(", \"no_prefetch\" : true, \"affine_to_cpu\" : ");
+ conf += std::to_string(id & 0x7FFFFFF);
+ conf += std::string(" },\n");
+ }
+ }
+ catch(const std::runtime_error& err)
+ {
+ // \todo add fallback to default auto adjust
+ conf += std::string(" { \"low_power_mode\" : false, \"no_prefetch\" : true, \"affine_to_cpu\" : false },\n");
+ printer::inst()->print_msg(L0, "Autoconf FAILED: %s. Create config for a single thread.", err.what());
+ }
+
+ configTpl.replace("CPUCONFIG",conf);
+ configTpl.write(Params::inst().configFileCPU);
+ printer::inst()->print_msg(L0, "CPU configuration stored in file '%s'", Params::inst().configFileCPU.c_str());
+ /* Destroy topology object. */
+ hwloc_topology_destroy(topology);
+
+ return true;
+ }
+
+private:
+ static constexpr size_t hashSize = 2 * 1024 * 1024;
+ std::vector<uint32_t> results;
+
+ template<typename func>
+ inline void findChildrenByType(hwloc_obj_t obj, hwloc_obj_type_t type, func lambda)
+ {
+ for(size_t i=0; i < obj->arity; i++)
+ {
+ if(obj->children[i]->type == type)
+ lambda(obj->children[i]);
+ else
+ findChildrenByType(obj->children[i], type, lambda);
+ }
+ }
+
+ inline bool isCacheObject(hwloc_obj_t obj)
+ {
+#if HWLOC_API_VERSION >= 0x20000
+ return hwloc_obj_type_is_cache(obj->type);
+#else
+ return obj->type == HWLOC_OBJ_CACHE;
+#endif // HWLOC_API_VERSION
+ }
+
+ template<typename func>
+ inline void findChildrenCaches(hwloc_obj_t obj, func lambda)
+ {
+ for(size_t i=0; i < obj->arity; i++)
+ {
+ if(isCacheObject(obj->children[i]))
+ lambda(obj->children[i]);
+ else
+ findChildrenCaches(obj->children[i], lambda);
+ }
+ }
+
+ inline bool isCacheExclusive(hwloc_obj_t obj)
+ {
+ const char* value = hwloc_obj_get_info_by_name(obj, "Inclusive");
+ return value == nullptr || value[0] != '1';
+ }
+
+ // Top level cache isn't shared with other cores on the same package
+ // This will usually be 1 x L3, but can be 2 x L2 per package
+ void proccessTopLevelCache(hwloc_obj_t obj)
+ {
+ if(obj->attr == nullptr)
+ throw(std::runtime_error("Cache object hasn't got attributes."));
+
+ size_t PUs = 0;
+ findChildrenByType(obj, HWLOC_OBJ_PU, [&PUs](hwloc_obj_t found) { PUs++; } );
+
+ //Strange case, but we will handle it silently, surely there must be one PU somewhere?
+ if(PUs == 0)
+ return;
+
+ if(obj->attr->cache.size == 0)
+ {
+ //We will always have one child if PUs > 0
+ if(!isCacheObject(obj->children[0]))
+ throw(std::runtime_error("The CPU doesn't seem to have a cache."));
+
+ //Try our luck with lower level caches
+ for(size_t i=0; i < obj->arity; i++)
+ proccessTopLevelCache(obj->children[i]);
+ return;
+ }
+
+ size_t cacheSize = obj->attr->cache.size;
+ if(isCacheExclusive(obj))
+ {
+ for(size_t i=0; i < obj->arity; i++)
+ {
+ hwloc_obj_t l2obj = obj->children[i];
+ //If L2 is exclusive and greater or equal to 2MB add room for one more hash
+ if(isCacheObject(l2obj) && l2obj->attr != nullptr && l2obj->attr->cache.size >= hashSize)
+ cacheSize += hashSize;
+ }
+ }
+
+ std::vector<hwloc_obj_t> cores;
+ cores.reserve(16);
+ findChildrenByType(obj, HWLOC_OBJ_CORE, [&cores](hwloc_obj_t found) { cores.emplace_back(found); } );
+
+ size_t cacheHashes = (cacheSize + hashSize/2) / hashSize;
+
+ //Firstly allocate PU 0 of every CORE, then PU 1 etc.
+ size_t pu_id = 0;
+ while(cacheHashes > 0 && PUs > 0)
+ {
+ bool allocated_pu = false;
+ for(hwloc_obj_t core : cores)
+ {
+ if(core->arity <= pu_id || core->children[pu_id]->type != HWLOC_OBJ_PU)
+ continue;
+
+ size_t os_id = core->children[pu_id]->os_index;
+
+ if(cacheHashes > PUs)
+ {
+ cacheHashes -= 2;
+ os_id |= 0x8000000; //double hash marker bit
+ }
+ else
+ cacheHashes--;
+ PUs--;
+
+ allocated_pu = true;
+ results.emplace_back(os_id);
+
+ if(cacheHashes == 0)
+ break;
+ }
+
+ if(!allocated_pu)
+ throw(std::runtime_error("Failed to allocate a PU."));
+
+ pu_id++;
+ }
+ }
+};
+
+} // namespace cpu
+} // namepsace xmrstak
OpenPOWER on IntegriCloud