13 files changed, 142 insertions, 27 deletions
diff --git a/xmrstak/backend/amd/minethd.cpp b/xmrstak/backend/amd/minethd.cpp
index ce62565..de0b6e8 100644
--- a/xmrstak/backend/amd/minethd.cpp
+++ b/xmrstak/backend/amd/minethd.cpp
@@ -59,6 +59,7 @@ minethd::minethd(miner_work& pWork, size_t iNo, GpuContext* ctx, const jconf::th
 	pGpuCtx = ctx;
 	this->affinity = cfg.cpu_aff;
 
+	std::unique_lock<std::mutex> lck(thd_aff_set);
 	std::future<void> order_guard = order_fix.get_future();
 	
 	oWorkThd = std::thread(&minethd::work_main, this);
@@ -180,6 +181,9 @@ void minethd::work_main()
 		bindMemoryToNUMANode(affinity);
 
 	order_fix.set_value();
+	std::unique_lock<std::mutex> lck(thd_aff_set);
+	lck.release();
+	std::this_thread::yield();
 	
 	uint64_t iCount = 0;
 	cryptonight_ctx* cpu_ctx;
@@ -239,7 +243,7 @@ void minethd::work_main()
 				if ( (*((uint64_t*)(bResult + 24))) < oWork.iTarget)
 					executor::inst()->push_event(ex_event(job_result(oWork.sJobID, results[i], bResult, iThreadNo), oWork.iPoolId));
 				else
-					executor::inst()->log_result_error("AMD Invalid Result");
+					executor::inst()->push_event(ex_event("AMD Invalid Result", oWork.iPoolId));
 			}
 
 			iCount += pGpuCtx->rawIntensity;
diff --git a/xmrstak/backend/amd/minethd.hpp b/xmrstak/backend/amd/minethd.hpp
index c4c2ce6..c808192 100644
--- a/xmrstak/backend/amd/minethd.hpp
+++ b/xmrstak/backend/amd/minethd.hpp
@@ -38,6 +38,7 @@ private:
 	miner_work oWork;
 
 	std::promise<void> order_fix;
+	std::mutex thd_aff_set;
 
 	std::thread oWorkThd;
 	int64_t affinity;
diff --git a/xmrstak/backend/cpu/minethd.cpp b/xmrstak/backend/cpu/minethd.cpp
index 8b0eec1..03071c4 100644
--- a/xmrstak/backend/cpu/minethd.cpp
+++ b/xmrstak/backend/cpu/minethd.cpp
@@ -102,6 +102,7 @@ minethd::minethd(miner_work& pWork, size_t iNo, bool double_work, bool no_prefet
 	bNoPrefetch = no_prefetch;
 	this->affinity = affinity;
 
+	std::unique_lock<std::mutex> lck(thd_aff_set);
 	std::future<void> order_guard = order_fix.get_future();
 
 	if(double_work)
@@ -340,6 +341,9 @@ void minethd::work_main()
 		bindMemoryToNUMANode(affinity);
 
 	order_fix.set_value();
+	std::unique_lock<std::mutex> lck(thd_aff_set);
+	lck.release();
+	std::this_thread::yield();
 
 	cn_hash_fun hash_fun;
 	cryptonight_ctx* ctx;
@@ -468,6 +472,9 @@ void minethd::double_work_main()
 		bindMemoryToNUMANode(affinity);
 
 	order_fix.set_value();
+	std::unique_lock<std::mutex> lck(thd_aff_set);
+	lck.release();
+	std::this_thread::yield();
 
 	cn_hash_fun_dbl hash_fun;
 	cryptonight_ctx* ctx0;
diff --git a/xmrstak/backend/cpu/minethd.hpp b/xmrstak/backend/cpu/minethd.hpp
index 670ec8d..5520d9e 100644
--- a/xmrstak/backend/cpu/minethd.hpp
+++ b/xmrstak/backend/cpu/minethd.hpp
@@ -46,6 +46,7 @@ private:
 	miner_work oWork;
 
 	std::promise<void> order_fix;
+	std::mutex thd_aff_set;
 
 	std::thread oWorkThd;
 	int64_t affinity;
diff --git a/xmrstak/backend/nvidia/autoAdjust.hpp b/xmrstak/backend/nvidia/autoAdjust.hpp
index bf04518..d36a46a 100644
--- a/xmrstak/backend/nvidia/autoAdjust.hpp
+++ b/xmrstak/backend/nvidia/autoAdjust.hpp
@@ -60,17 +60,15 @@ public:
             ctx.device_bfactor = 6;
             ctx.device_bsleep = 25;
 #endif
-            if( cuda_get_deviceinfo(&ctx) != 1 )
-            {
-                printer::inst()->print_msg(L0, "Setup failed for GPU %d. Exitting.\n", i);
-                std::exit(0);
-            }
-            nvidCtxVec.push_back(ctx);
+            if(cuda_get_deviceinfo(&ctx) == 0)
+                nvidCtxVec.push_back(ctx);
+            else
+                printer::inst()->print_msg(L0, "WARNING: NVIDIA setup failed for GPU %d.\n", i);
 
         }
 
         generateThreadConfig();
-		return true;
+        return true;
 
     }
 
@@ -94,6 +92,7 @@ private:
 			{
 				conf += std::string("  // gpu: ") + ctx.name + " architecture: " + std::to_string(ctx.device_arch[0] * 10 + ctx.device_arch[1]) + "\n";
 				conf += std::string("  //      memory: ") + std::to_string(ctx.free_device_memory / byte2mib) + "/"  + std::to_string(ctx.total_device_memory / byte2mib) + " MiB\n";
+				conf += std::string("  //      smx: ") + std::to_string(ctx.device_mpcount) + "\n";
 				conf += std::string("  { \"index\" : ") + std::to_string(ctx.device_id) + ",\n" +
 					"    \"threads\" : " + std::to_string(ctx.device_threads) + ", \"blocks\" : " + std::to_string(ctx.device_blocks) + ",\n" +
 					"    \"bfactor\" : " + std::to_string(ctx.device_bfactor) + ", \"bsleep\" :  " + std::to_string(ctx.device_bsleep) + ",\n" +
diff --git a/xmrstak/backend/nvidia/minethd.cpp b/xmrstak/backend/nvidia/minethd.cpp
index e15fc56..9dbd83e 100644
--- a/xmrstak/backend/nvidia/minethd.cpp
+++ b/xmrstak/backend/nvidia/minethd.cpp
@@ -79,6 +79,7 @@ minethd::minethd(miner_work& pWork, size_t iNo, const jconf::thd_cfg& cfg)
 	ctx.device_bsleep = (int)cfg.bsleep;
 	this->affinity = cfg.cpu_aff;
 
+	std::unique_lock<std::mutex> lck(thd_aff_set);
 	std::future<void> order_guard = order_fix.get_future();
 	
 	oWorkThd = std::thread(&minethd::work_main, this);
@@ -207,6 +208,9 @@ void minethd::work_main()
 		bindMemoryToNUMANode(affinity);
 
 	order_fix.set_value();
+	std::unique_lock<std::mutex> lck(thd_aff_set);
+	lck.release();
+	std::this_thread::yield();
 	
 	uint64_t iCount = 0;
 	cryptonight_ctx* cpu_ctx;
@@ -216,7 +220,7 @@ void minethd::work_main()
 
 	globalStates::inst().iConsumeCnt++;
 
-	if(/*cuda_get_deviceinfo(&ctx) != 1 ||*/ cryptonight_extra_cpu_init(&ctx) != 1)
+	if(cuda_get_deviceinfo(&ctx) != 0 || cryptonight_extra_cpu_init(&ctx) != 1)
 	{
 		printer::inst()->print_msg(L0, "Setup failed for GPU %d. Exitting.\n", (int)iThreadNo);
 		std::exit(0);
@@ -281,7 +285,7 @@ void minethd::work_main()
 				if ( (*((uint64_t*)(bResult + 24))) < oWork.iTarget)
 					executor::inst()->push_event(ex_event(job_result(oWork.sJobID, foundNonce[i], bResult, iThreadNo), oWork.iPoolId));
 				else
-					executor::inst()->log_result_error("NVIDIA Invalid Result");
+					executor::inst()->push_event(ex_event("NVIDIA Invalid Result", oWork.iPoolId));
 			}
 
 			iCount += h_per_round;
diff --git a/xmrstak/backend/nvidia/minethd.hpp b/xmrstak/backend/nvidia/minethd.hpp
index f6d989c..ffcdab1 100644
--- a/xmrstak/backend/nvidia/minethd.hpp
+++ b/xmrstak/backend/nvidia/minethd.hpp
@@ -45,6 +45,7 @@ private:
 	miner_work oWork;
 
 	std::promise<void> order_fix;
+	std::mutex thd_aff_set;
 
 	std::thread oWorkThd;
 	int64_t affinity;
diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu
index e18532f..9923cb2 100644
--- a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu
+++ b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu
@@ -1,6 +1,9 @@
 #include <stdio.h>
 #include <stdint.h>
 #include <string.h>
+#include <sstream>
+#include <algorithm>
+#include <vector>
 #include <cuda.h>
 #include <cuda_runtime.h>
 #include <device_functions.hpp>
@@ -270,6 +273,15 @@ extern "C" int cuda_get_devicecount( int* deviceCount)
 	return 1;
 }
 
+/** get device information
+ *
+ * @return 0 = all OK,
+ *         1 = something went wrong,
+ *         2 = gpu cannot be selected,
+ *         3 = context cannot be created
+ *         4 = not enough memory
+ *         5 = architecture not supported (not compiled for the gpu architecture)
+ */
 extern "C" int cuda_get_deviceinfo(nvid_ctx* ctx)
 {
 	cudaError_t err;
@@ -279,25 +291,25 @@ extern "C" int cuda_get_deviceinfo(nvid_ctx* ctx)
 	if(err != cudaSuccess)
 	{
 		printf("Unable to query CUDA driver version! Is an nVidia driver installed?\n");
-		return 0;
+		return 1;
 	}
 
 	if(version < CUDART_VERSION)
 	{
 		printf("Driver does not support CUDA %d.%d API! Update your nVidia driver!\n", CUDART_VERSION / 1000, (CUDART_VERSION % 1000) / 10);
-		return 0;
+		return 1;
 	}
 
 	int GPU_N;
 	if(cuda_get_devicecount(&GPU_N) == 0)
 	{
-		return 0;
+		return 1;
 	}
 
 	if(ctx->device_id >= GPU_N)
 	{
 		printf("Invalid device ID!\n");
-		return 0;
+		return 1;
 	}
 
 	cudaDeviceProp props;
@@ -305,7 +317,7 @@ extern "C" int cuda_get_deviceinfo(nvid_ctx* ctx)
 	if(err != cudaSuccess)
 	{
 		printf("\nGPU %d: %s\n%s line %d\n", ctx->device_id, cudaGetErrorString(err), __FILE__, __LINE__);
-		return 0;
+		return 1;
 	}
 
 	ctx->device_name = strdup(props.name);
@@ -313,8 +325,52 @@ extern "C" int cuda_get_deviceinfo(nvid_ctx* ctx)
 	ctx->device_arch[0] = props.major;
 	ctx->device_arch[1] = props.minor;
 
+	const int gpuArch = ctx->device_arch[0] * 10 + ctx->device_arch[1];
+
 	ctx->name = std::string(props.name);
 
+	std::vector<int> arch;
+#define XMRSTAK_PP_TOSTRING1(str) #str
+#define XMRSTAK_PP_TOSTRING(str) XMRSTAK_PP_TOSTRING1(str)
+	char const * archStringList = XMRSTAK_PP_TOSTRING(XMRSTAK_CUDA_ARCH_LIST);
+#undef XMRSTAK_PP_TOSTRING
+#undef XMRSTAK_PP_TOSTRING1
+	std::stringstream ss(archStringList);
+
+	//transform string list sperated with `+` into a vector of integers
+	int tmpArch;
+	while ( ss >> tmpArch )
+		arch.push_back( tmpArch );
+
+	if(gpuArch >= 20 && gpuArch < 30)
+	{
+		// compiled binary must support sm_20 for fermi
+		std::vector<int>::iterator it = std::find(arch.begin(), arch.end(), 20);
+		if(it == arch.end())
+		{
+			printf("WARNING: NVIDIA GPU %d: miner not compiled for the gpu architecture %d.\n", ctx->device_id, gpuArch);
+			return 5;
+		}
+	}
+	if(gpuArch >= 30)
+	{
+		// search the minimum architecture greater than sm_20
+		int minSupportedArch = 0;
+		/* - for newer architecture than fermi we need at least sm_30
+		 * or a architecture >= gpuArch
+		 * - it is not possible to use a gpu with a architecture >= 30
+		 *   with a sm_20 only compiled binary
+		 */
+		for(int i = 0; i < arch.size(); ++i)
+			if(minSupportedArch == 0 || (arch[i] >= 30 && arch[i] < minSupportedArch))
+				minSupportedArch = arch[i];
+		if(minSupportedArch >= 30 && gpuArch <= minSupportedArch)
+		{
+			printf("WARNING: NVIDIA GPU %d: miner not compiled for the gpu architecture %d.\n", ctx->device_id, gpuArch);
+			return 5;
+		}
+	}
+
 	// set all evice option those marked as auto (-1) to a valid value
 	if(ctx->device_blocks == -1)
 	{
@@ -347,9 +403,31 @@ extern "C" int cuda_get_deviceinfo(nvid_ctx* ctx)
 			maxMemUsage = size_t(1024u) * byteToMiB;
 		}
 
+		int* tmp;
+		cudaError_t err;
+		// a device must be selected to get the right memory usage later on
+		err = cudaSetDevice(ctx->device_id);
+		if(err != cudaSuccess)
+		{
+			printf("WARNING: NVIDIA GPU %d: cannot be selected.\n", ctx->device_id);
+			return 2;
+		}
+		// trigger that a context on the gpu will be allocated
+		err = cudaMalloc(&tmp, 256);
+		if(err != cudaSuccess)
+		{
+			printf("WARNING: NVIDIA GPU %d: context cannot be created.\n", ctx->device_id);
+			return 3;
+		}
+
+
 		size_t freeMemory = 0;
 		size_t totalMemory = 0;
 		CUDA_CHECK(ctx->device_id, cudaMemGetInfo(&freeMemory, &totalMemory));
+
+		cudaFree(tmp);
+		// delete created context on the gpu
+		cudaDeviceReset();
 		
 		ctx->total_device_memory = totalMemory;
 		ctx->free_device_memory = freeMemory;
@@ -379,6 +457,7 @@ extern "C" int cuda_get_deviceinfo(nvid_ctx* ctx)
 			printf("WARNING: NVIDIA GPU %d: already %s MiB memory in use, skip GPU.\n",
 				ctx->device_id,
 				std::to_string(usedMem/byteToMiB).c_str());
+			return 4;
 		}
 		else
 			maxMemUsage -= usedMem;
@@ -404,5 +483,5 @@ extern "C" int cuda_get_deviceinfo(nvid_ctx* ctx)
 
 	}
 
-	return 1;
+	return 0;
 }
diff --git a/xmrstak/cli/cli-miner.cpp b/xmrstak/cli/cli-miner.cpp
index 284cf4a..f0f3c3a 100644
--- a/xmrstak/cli/cli-miner.cpp
+++ b/xmrstak/cli/cli-miner.cpp
@@ -130,7 +130,11 @@ std::string get_multipool_entry(bool& final)
 	std::cout<<"- Password (mostly empty or x):"<<std::endl;
 	getline(std::cin, passwd);
 
+#ifdef CONF_NO_TLS
+	bool tls = false;
+#else
 	bool tls = read_yes_no("- Does this pool port support TLS/SSL? Use no if unknown. (y/N)");
+#endif
 	bool nicehash = read_yes_no("- Do you want to use nicehash on this pool? (y/n)");
 
 	int64_t pool_weight;
@@ -207,7 +211,11 @@ void do_guided_config(bool userSetPasswd)
 		getline(std::cin, passwd);
 	}
 
+#ifdef CONF_NO_TLS
+	bool tls = false;
+#else
 	bool tls = read_yes_no("- Does this pool port support TLS/SSL? Use no if unknown. (y/N)");
+#endif
 	bool nicehash = read_yes_no("- Do you want to use nicehash on this pool? (y/n)");
 	
 	bool multipool;
diff --git a/xmrstak/jconf.cpp b/xmrstak/jconf.cpp
index a9f484f..a617f96 100644
--- a/xmrstak/jconf.cpp
+++ b/xmrstak/jconf.cpp
@@ -490,15 +490,6 @@ bool jconf::parse_config(const char* sFilename)
 		return false;
 	}
 
-#ifdef CONF_NO_TLS
-	if(prv->configValues[bTlsMode]->GetBool())
-	{
-		printer::inst()->print_msg(L0,
-			"Invalid config file. TLS enabled while the application has been compiled without TLS support.");
-		return false;
-	}
-#endif // CONF_NO_TLS
-
 	if(prv->configValues[bAesOverride]->IsBool())
 		bHaveAes = prv->configValues[bAesOverride]->GetBool();
 
diff --git a/xmrstak/misc/executor.cpp b/xmrstak/misc/executor.cpp
index e3602c6..43e3d6b 100644
--- a/xmrstak/misc/executor.cpp
+++ b/xmrstak/misc/executor.cpp
@@ -495,6 +495,13 @@ void executor::ex_main()
 	{
 		jconf::pool_cfg cfg;
  		jconf::inst()->GetPoolConfig(i, cfg);
+#ifdef CONF_NO_TLS
+		if(cfg.tls)
+		{
+			printer::inst()->print_msg(L1, "ERROR: No miner was compiled without TLS support.");
+			win_exit();
+		}
+#endif
 		if(!cfg.tls) tls = false;
 		pools.emplace_back(i+1, cfg.sPoolAddr, cfg.sWalletAddr, cfg.sPasswd, cfg.weight, false, cfg.tls, cfg.tls_fingerprint, cfg.nicehash);
 	}
@@ -553,6 +560,10 @@ void executor::ex_main()
 			eval_pool_choice();
 			break;
 
+		case EV_GPU_RES_ERROR:
+			log_result_error(ev.oGpuError.error_str);
+			break;
+
 		case EV_PERF_TICK:
 			for (i = 0; i < pvThreads->size(); i++)
 				telem->push_perf_value(i, pvThreads->at(i)->iHashCount.load(std::memory_order_relaxed),
diff --git a/xmrstak/misc/executor.hpp b/xmrstak/misc/executor.hpp
index 27f89c4..c90e864 100644
--- a/xmrstak/misc/executor.hpp
+++ b/xmrstak/misc/executor.hpp
@@ -42,7 +42,6 @@ public:
 
 	inline void push_event(ex_event&& ev) { oEventQ.push(std::move(ev)); }
 	void push_timed_event(ex_event&& ev, size_t sec);
-	void log_result_error(std::string&& sError);
 
 private:
 	struct timed_event
@@ -184,6 +183,7 @@ private:
 	double fHighestHps = 0.0;
 
 	void log_socket_error(jpsock* pool, std::string&& sError);
+	void log_result_error(std::string&& sError);
 	void log_result_ok(uint64_t iActualDiff);
 
 	void on_sock_ready(size_t pool_id);
diff --git a/xmrstak/net/msgstruct.hpp b/xmrstak/net/msgstruct.hpp
index 305f40b..9065fe3 100644
--- a/xmrstak/net/msgstruct.hpp
+++ b/xmrstak/net/msgstruct.hpp
@@ -63,7 +63,14 @@ struct sock_err
 	sock_err& operator=(sock_err const&) = delete;
 };
 
-enum ex_event_name { EV_INVALID_VAL, EV_SOCK_READY, EV_SOCK_ERROR,
+// Unlike socket errors, GPU errors are read-only strings
+struct gpu_res_err
+{
+	const char* error_str;
+	gpu_res_err(const char* error_str) : error_str(error_str) {}
+};
+
+enum ex_event_name { EV_INVALID_VAL, EV_SOCK_READY, EV_SOCK_ERROR, EV_GPU_RES_ERROR,
 	EV_POOL_HAVE_JOB, EV_MINER_HAVE_RESULT, EV_PERF_TICK, EV_EVAL_POOL_CHOICE, 
 	EV_USR_HASHRATE, EV_USR_RESULTS, EV_USR_CONNSTAT, EV_HASHRATE_LOOP, 
 	EV_HTML_HASHRATE, EV_HTML_RESULTS, EV_HTML_CONNSTAT, EV_HTML_JSON };
@@ -88,9 +95,11 @@ struct ex_event
 		pool_job oPoolJob;
 		job_result oJobResult;
 		sock_err oSocketError;
+		gpu_res_err oGpuError;
 	};
 
 	ex_event() { iName = EV_INVALID_VAL; iPoolId = 0;}
+	ex_event(const char* gpu_err, size_t id) : iName(EV_GPU_RES_ERROR), iPoolId(id), oGpuError(gpu_err) {}
 	ex_event(std::string&& err, bool silent, size_t id) : iName(EV_SOCK_ERROR), iPoolId(id), oSocketError(std::move(err), silent) { }
 	ex_event(job_result dat, size_t id) : iName(EV_MINER_HAVE_RESULT), iPoolId(id), oJobResult(dat) {}
 	ex_event(pool_job dat, size_t id) : iName(EV_POOL_HAVE_JOB), iPoolId(id), oPoolJob(dat) {}