summaryrefslogtreecommitdiffstats
path: root/arch/arm64/kernel/fpsimd.c
diff options
context:
space:
mode:
authorDave Martin <Dave.Martin@arm.com>2017-10-31 15:51:10 +0000
committerWill Deacon <will.deacon@arm.com>2017-11-03 15:24:17 +0000
commit2e0f2478ea37eba945bee007884a2988b8f7d332 (patch)
treeeefdac015c0ee8702fefbdc99e3bf941d8456084 /arch/arm64/kernel/fpsimd.c
parent8f1eec57cdccd17faeaf7f593505124376340fd0 (diff)
downloadop-kernel-dev-2e0f2478ea37eba945bee007884a2988b8f7d332.zip
op-kernel-dev-2e0f2478ea37eba945bee007884a2988b8f7d332.tar.gz
arm64/sve: Probe SVE capabilities and usable vector lengths
This patch uses the cpufeatures framework to determine common SVE capabilities and vector lengths, and configures the runtime SVE support code appropriately. ZCR_ELx is not really a feature register, but it is convenient to use it as a template for recording the maximum vector length supported by a CPU, using the LEN field. This field is similar to a feature field in that it is a contiguous bitfield for which we want to determine the minimum system-wide value. This patch adds ZCR as a pseudo-register in cpuinfo/cpufeatures, with appropriate custom code to populate it. Finding the minimum supported value of the LEN field is left to the cpufeatures framework in the usual way. The meaning of ID_AA64ZFR0_EL1 is not architecturally defined yet, so for now we just require it to be zero. Note that much of this code is dormant and SVE still won't be used yet, since system_supports_sve() remains hardwired to false. Signed-off-by: Dave Martin <Dave.Martin@arm.com> Reviewed-by: Suzuki K Poulose <suzuki.poulose@arm.com> Reviewed-by: Catalin Marinas <catalin.marinas@arm.com> Cc: Alex Bennée <alex.bennee@linaro.org> Signed-off-by: Will Deacon <will.deacon@arm.com>
Diffstat (limited to 'arch/arm64/kernel/fpsimd.c')
-rw-r--r--arch/arm64/kernel/fpsimd.c114
1 files changed, 111 insertions, 3 deletions
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 667be34..715398e 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -115,19 +115,19 @@
static DEFINE_PER_CPU(struct fpsimd_state *, fpsimd_last_state);
/* Default VL for tasks that don't set it explicitly: */
-static int sve_default_vl = SVE_VL_MIN;
+static int sve_default_vl = -1;
#ifdef CONFIG_ARM64_SVE
/* Maximum supported vector length across all CPUs (initially poisoned) */
int __ro_after_init sve_max_vl = -1;
/* Set of available vector lengths, as vq_to_bit(vq): */
-static DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX);
+static __ro_after_init DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX);
#else /* ! CONFIG_ARM64_SVE */
/* Dummy declaration for code that will be optimised out: */
-extern DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX);
+extern __ro_after_init DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX);
#endif /* ! CONFIG_ARM64_SVE */
@@ -497,6 +497,111 @@ out:
}
/*
+ * Bitmap for temporary storage of the per-CPU set of supported vector lengths
+ * during secondary boot.
+ */
+static DECLARE_BITMAP(sve_secondary_vq_map, SVE_VQ_MAX);
+
+static void sve_probe_vqs(DECLARE_BITMAP(map, SVE_VQ_MAX))
+{
+ unsigned int vq, vl;
+ unsigned long zcr;
+
+ bitmap_zero(map, SVE_VQ_MAX);
+
+ zcr = ZCR_ELx_LEN_MASK;
+ zcr = read_sysreg_s(SYS_ZCR_EL1) & ~zcr;
+
+ for (vq = SVE_VQ_MAX; vq >= SVE_VQ_MIN; --vq) {
+ write_sysreg_s(zcr | (vq - 1), SYS_ZCR_EL1); /* self-syncing */
+ vl = sve_get_vl();
+ vq = sve_vq_from_vl(vl); /* skip intervening lengths */
+ set_bit(vq_to_bit(vq), map);
+ }
+}
+
+void __init sve_init_vq_map(void)
+{
+ sve_probe_vqs(sve_vq_map);
+}
+
+/*
+ * If we haven't committed to the set of supported VQs yet, filter out
+ * those not supported by the current CPU.
+ */
+void sve_update_vq_map(void)
+{
+ sve_probe_vqs(sve_secondary_vq_map);
+ bitmap_and(sve_vq_map, sve_vq_map, sve_secondary_vq_map, SVE_VQ_MAX);
+}
+
+/* Check whether the current CPU supports all VQs in the committed set */
+int sve_verify_vq_map(void)
+{
+ int ret = 0;
+
+ sve_probe_vqs(sve_secondary_vq_map);
+ bitmap_andnot(sve_secondary_vq_map, sve_vq_map, sve_secondary_vq_map,
+ SVE_VQ_MAX);
+ if (!bitmap_empty(sve_secondary_vq_map, SVE_VQ_MAX)) {
+ pr_warn("SVE: cpu%d: Required vector length(s) missing\n",
+ smp_processor_id());
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
+/*
+ * Enable SVE for EL1.
+ * Intended for use by the cpufeatures code during CPU boot.
+ */
+int sve_kernel_enable(void *__always_unused p)
+{
+ write_sysreg(read_sysreg(CPACR_EL1) | CPACR_EL1_ZEN_EL1EN, CPACR_EL1);
+ isb();
+
+ return 0;
+}
+
+void __init sve_setup(void)
+{
+ u64 zcr;
+
+ if (!system_supports_sve())
+ return;
+
+ /*
+ * The SVE architecture mandates support for 128-bit vectors,
+ * so sve_vq_map must have at least SVE_VQ_MIN set.
+ * If something went wrong, at least try to patch it up:
+ */
+ if (WARN_ON(!test_bit(vq_to_bit(SVE_VQ_MIN), sve_vq_map)))
+ set_bit(vq_to_bit(SVE_VQ_MIN), sve_vq_map);
+
+ zcr = read_sanitised_ftr_reg(SYS_ZCR_EL1);
+ sve_max_vl = sve_vl_from_vq((zcr & ZCR_ELx_LEN_MASK) + 1);
+
+ /*
+ * Sanity-check that the max VL we determined through CPU features
+ * corresponds properly to sve_vq_map. If not, do our best:
+ */
+ if (WARN_ON(sve_max_vl != find_supported_vector_length(sve_max_vl)))
+ sve_max_vl = find_supported_vector_length(sve_max_vl);
+
+ /*
+ * For the default VL, pick the maximum supported value <= 64.
+ * VL == 64 is guaranteed not to grow the signal frame.
+ */
+ sve_default_vl = find_supported_vector_length(64);
+
+ pr_info("SVE: maximum available vector length %u bytes per vector\n",
+ sve_max_vl);
+ pr_info("SVE: default vector length %u bytes per vector\n",
+ sve_default_vl);
+}
+
+/*
* Called from the put_task_struct() path, which cannot get here
* unless dead_task is really dead and not schedulable.
*/
@@ -631,6 +736,9 @@ void fpsimd_flush_thread(void)
* This is where we ensure that all user tasks have a valid
* vector length configured: no kernel task can become a user
* task without an exec and hence a call to this function.
+ * By the time the first call to this function is made, all
+ * early hardware probing is complete, so sve_default_vl
+ * should be valid.
* If a bug causes this to go wrong, we make some noise and
* try to fudge thread.sve_vl to a safe value here.
*/
OpenPOWER on IntegriCloud