5 files changed, 33 insertions, 7 deletions
diff --git a/doc/FAQ.md b/doc/FAQ.md
index 641a50d..ffbc36f 100644
--- a/doc/FAQ.md
+++ b/doc/FAQ.md
@@ -41,13 +41,18 @@ Download and install this [runtime package](https://go.microsoft.com/fwlink/?Lin
 
 ## Error: MEMORY ALLOC FAILED: mmap failed
 
-On Linux you will need to configure large page support `sudo sysctl -w vm.nr_hugepages=128` and increase your
-ulimit -l. To do this you need to add following lines to /etc/security/limits.conf:
+On Linux you will need to configure large page support and increase your ulimit -l. 
+
+To set large page support, add the following lines to /etc/sysctl.conf:
+    
+    vm.nr_hugepages=128
+
+To increase the ulimit, add following lines to /etc/security/limits.conf:
 
     * soft memlock 262144
     * hard memlock 262144
 
-Save file.  You WILL need to log out and log back in for these settings to take affect on your user (no need to reboot, just relogin in your session).
+You WILL need to log out and log back in for these settings to take affect on your user (no need to reboot, just relogin in your session).
 
 You can also do it Windows-style and simply run-as-root, but this is NOT recommended for security reasons.
 
diff --git a/xmrstak/backend/amd/amd_gpu/gpu.cpp b/xmrstak/backend/amd/amd_gpu/gpu.cpp
index d9bc962..c39c567 100644
--- a/xmrstak/backend/amd/amd_gpu/gpu.cpp
+++ b/xmrstak/backend/amd/amd_gpu/gpu.cpp
@@ -518,13 +518,13 @@ std::vector<GpuContext> getAMDDevices(int index)
 				printer::inst()->print_msg(L1,"WARNING: %s when calling clGetDeviceInfo to get CL_DEVICE_NAME for device %u.", err_to_str(clStatus), k);
 				continue;
 			}
-			printer::inst()->print_msg(L0,"Found OpenCL GPU %s.",ctx.name.c_str());
 
 			// if environment variable GPU_SINGLE_ALLOC_PERCENT is not set we can not allocate the full memory
 			ctx.deviceIdx = k;
 			ctx.freeMem = std::min(ctx.freeMem, maxMem);
 			ctx.name = std::string(devNameVec.data());
 			ctx.DeviceID = device_list[k];
+			printer::inst()->print_msg(L0,"Found OpenCL GPU %s.",ctx.name.c_str());
 			ctxVec.push_back(ctx);
 		}
 	}
diff --git a/xmrstak/backend/amd/jconf.cpp b/xmrstak/backend/amd/jconf.cpp
index 07afb19..f126342 100644
--- a/xmrstak/backend/amd/jconf.cpp
+++ b/xmrstak/backend/amd/jconf.cpp
@@ -56,9 +56,10 @@ struct configVal {
 	Type iType;
 };
 
-//Same order as in configEnum, as per comment above
+// Same order as in configEnum, as per comment above
+// kNullType means any type
 configVal oConfigValues[] = {
-	{ aGpuThreadsConf, "gpu_threads_conf", kArrayType },
+	{ aGpuThreadsConf, "gpu_threads_conf", kNullType },
 	{ iPlatformIdx, "platform_index", kNumberType }
 };
 
@@ -68,6 +69,8 @@ inline bool checkType(Type have, Type want)
 {
 	if(want == have)
 		return true;
+	else if(want == kNullType)
+		return true;
 	else if(want == kTrueType && have == kFalseType)
 		return true;
 	else if(want == kFalseType && have == kTrueType)
diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu b/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu
index 15a6f36..cc97274 100644
--- a/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu
+++ b/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu
@@ -74,24 +74,36 @@ __device__ __forceinline__ uint64_t cuda_mul128( uint64_t multiplier, uint64_t m
 template< typename T >
 __device__ __forceinline__ T loadGlobal64( T * const addr )
 {
+#if (__CUDA_ARCH__ < 700)
 	T x;
 	asm volatile( "ld.global.cg.u64 %0, [%1];" : "=l"( x ) : "l"( addr ) );
 	return x;
+#else
+	return *addr;
+#endif
 }
 
 template< typename T >
 __device__ __forceinline__ T loadGlobal32( T * const addr )
 {
+#if (__CUDA_ARCH__ < 700)
 	T x;
 	asm volatile( "ld.global.cg.u32 %0, [%1];" : "=r"( x ) : "l"( addr ) );
 	return x;
+#else
+	return *addr;
+#endif
 }
 
 
 template< typename T >
 __device__ __forceinline__ void storeGlobal32( T* addr, T const & val )
 {
+#if (__CUDA_ARCH__ < 700)
 	asm volatile( "st.global.cg.u32 [%0], %1;" : : "l"( addr ), "r"( val ) );
+#else
+	*addr = val;
+#endif
 }
 
 template<size_t ITERATIONS, uint32_t THREAD_SHIFT>
diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu
index d5d0039..92259db 100644
--- a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu
+++ b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu
@@ -204,7 +204,13 @@ extern "C" int cryptonight_extra_cpu_init(nvid_ctx* ctx)
 		break;
 
 	};
-	CUDA_CHECK(ctx->device_id, cudaDeviceSetCacheConfig(cudaFuncCachePreferL1));
+	const int gpuArch = ctx->device_arch[0] * 10 + ctx->device_arch[1];
+
+	/* Disable L1 cache for GPUs before Volta.
+	 * L1 speed is increased and latency reduced with Volta.
+	 */
+	if(gpuArch < 70)
+		CUDA_CHECK(ctx->device_id, cudaDeviceSetCacheConfig(cudaFuncCachePreferL1));
 
 	size_t hashMemSize;
 	if(::jconf::inst()->IsCurrencyMonero())