summaryrefslogtreecommitdiffstats
path: root/autoAdjustHwloc.hpp
blob: 90223304ed794aa1a8ed9800d98e264893e182de (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
#pragma once

#include "console.h"
#include <hwloc.h>

#ifdef _WIN32
#include <windows.h>
#else
#include <unistd.h>
#endif // _WIN32

class autoAdjust
{
public:

	autoAdjust()
	{
	}

	void printConfig()
	{
		printer::inst()->print_str("The configuration for 'cpu_threads_conf' in your config file is 'null'.\n");
		printer::inst()->print_str("The miner evaluates your system and prints a suggestion for the section `cpu_threads_conf` to the terminal.\n");
		printer::inst()->print_str("Please copy & paste the block within the asterisks to your config.\n\n");
		printer::inst()->print_str("\n**************** Copy&Paste ****************\n\n");
		printer::inst()->print_str("\"cpu_threads_conf\" :\n[\n");

		int depth;
		hwloc_topology_t topology;
		hwloc_obj_t socket;


		hwloc_topology_init(&topology);
		hwloc_topology_load(topology);

		depth = hwloc_get_type_depth(topology, HWLOC_OBJ_SOCKET);
		if (depth == HWLOC_TYPE_DEPTH_UNKNOWN)
		{
			printf("*** The number of sockets is unknown\n");
		}

		for (int i = 0; i < hwloc_get_nbobjs_by_depth(topology, depth); i++)
		{
			socket = hwloc_get_obj_by_depth(topology, depth, i);

			// search cacheprinter::inst()->print_str("\n**************** Copy&Paste ****************\n\n");
			for (int j = 0; j < socket->arity; j++)
			{
				hwloc_obj_t nextLvl = socket->children[j];
				findCache(topology, nextLvl);
			}
		}

		/* Destroy topology object. */
		hwloc_topology_destroy(topology);
		
		printer::inst()->print_str("],\n\n**************** Copy&Paste ****************\n");
	}

private:

	int rightZeros(size_t v)
	{
		int c;
		if (v)
		{
			v = (v ^ (v - 1)) >> 1;
			for (c = 0; v; c++)
			{
				v >>= 1;
			}
		}
		else
		{
			c = CHAR_BIT * sizeof (v);
		}
		return c;
	}

	inline void getConfig(hwloc_topology_t topology, hwloc_obj_t obj, size_t& numHashes, int& numCachesLeft)
	{
		if (obj->type == HWLOC_OBJ_CORE)
		{
			if (obj)
			{
				hwloc_cpuset_t cpuset;
				/* Get a copy of its cpuset that we may modify. */
				cpuset = hwloc_bitmap_dup(obj->cpuset);
				size_t allcpu = hwloc_bitmap_to_ulong(cpuset);
				/* Get only one logical processor (in case the core is
				   SMT/hyperthreaded). */
				hwloc_bitmap_singlify(cpuset);


				size_t cpu = hwloc_bitmap_to_ulong(cpuset);
				// move bit mask to right to allow to compare always the first bit
				cpu >>= rightZeros(allcpu);


				int nativeCores = __builtin_popcount(cpu);
				int numPus = obj->arity;
				for (int i = 0; i < numPus && numHashes != 0; i++)
				{
					hwloc_obj_t pu = obj->children[i];
					// only use native pu's
					if (pu->type == HWLOC_OBJ_PU && cpu & 1)
					{
						// if no cache is available we give each native core a hash
						int numUnit = numCachesLeft != 0 ? numCachesLeft : nativeCores;

						// two hashes per native pu if number of hashes if larger than compute units
						int power = numHashes > numUnit ? 2 : 1;
						char strbuf[256];
						//printf("------------------------------core %i -> %i mpu=%i %lu\n", pu->os_index, power, nativeCores, numHashes);
						snprintf(strbuf, sizeof(strbuf), "   { \"low_power_mode\" : %s, \"no_prefetch\" : true, \"affine_to_cpu\" : %u },\n",
							power == 2 ? "true" : "false", pu->os_index);
						printer::inst()->print_str(strbuf);

						// update number of free hashes
						numHashes -= power;
						cpu >>= 1;

						// one cache is filled with hashes
						if (numCachesLeft != 0) numCachesLeft--;
					}
				}
			}
		}
		else
		{
			for (int i = 0; i < obj->arity; i++)
				getConfig(topology, obj->children[i], numHashes, numCachesLeft);
		}
	}

	inline void findCache(hwloc_topology_t topology, hwloc_obj_t obj)
	{
		if (obj->type == HWLOC_OBJ_CACHE)
		{
			size_t cacheSize = obj->attr->cache.size;
			size_t numHashL3 = ( cacheSize + m_scratchPadMemSize/ 2llu ) / m_scratchPadMemSize;

			/* check cache is exclusive or inclusive */
			const char* value = hwloc_obj_get_info_by_name(obj, "Inclusive");


			bool doL3 = true;
			if (value == NULL || value[0] != 49 || cacheSize == 0)
			{
				size_t numHashes = 0;
				int numL2 = obj->arity;
				for (int k = 0; k < numL2; k++)
				{
					hwloc_obj_t l3Cache = obj->children[k];
					size_t l2Cache = 0;

					if (obj->type == HWLOC_OBJ_CACHE)
						l2Cache = l3Cache->attr->cache.size;
					else
						break;

					if (l2Cache < m_scratchPadMemSize)
					{
						// we need to start from L3
						break;
					}

					// start from L2

					/* if more hashes available than objects in the current depth of the topology
					 * than divide with round down else round up
					 */
					int extraHash = numHashL3 > numL2 ? numHashL3 / numL2 : (numHashL3 + numL2 - 1) / numL2;
					numHashL3 -= extraHash;
					if (numHashL3 < 0)
						numHashL3 = 0;
					numHashes += extraHash;
					//add L2 hashes
					numHashes += ( l2Cache + m_scratchPadMemSize / 2llu ) / m_scratchPadMemSize;
					int numCachesLeft = numL2;
					getConfig(topology, l3Cache, numHashes, numCachesLeft);
					doL3 = false;
				}
			}
			if (doL3)
			{
				int numCachesLeft = obj->arity;
				getConfig(topology, obj, numHashL3, numCachesLeft);
			}
		}
		else
			for (int j = 0; j < obj->arity; j++)
				findCache(topology, obj->children[j]);
	}

	static constexpr size_t m_scratchPadMemSize = ( 2llu * 1024llu * 1024llu );
};
OpenPOWER on IntegriCloud