diff options
Diffstat (limited to 'arch/arm64/kernel')
26 files changed, 1304 insertions, 560 deletions
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 3793003..2173149 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -45,6 +45,7 @@ arm64-obj-$(CONFIG_ACPI) += acpi.o arm64-obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL) += acpi_parking_protocol.o arm64-obj-$(CONFIG_PARAVIRT) += paravirt.o arm64-obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o +arm64-obj-$(CONFIG_HIBERNATION) += hibernate.o hibernate-asm.o obj-y += $(arm64-obj-y) vdso/ obj-m += $(arm64-obj-m) diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c index d1ce8e2..3e4f1a4 100644 --- a/arch/arm64/kernel/acpi.c +++ b/arch/arm64/kernel/acpi.c @@ -42,6 +42,7 @@ int acpi_pci_disabled = 1; /* skip ACPI PCI scan and IRQ initialization */ EXPORT_SYMBOL(acpi_pci_disabled); static bool param_acpi_off __initdata; +static bool param_acpi_on __initdata; static bool param_acpi_force __initdata; static int __init parse_acpi(char *arg) @@ -52,6 +53,8 @@ static int __init parse_acpi(char *arg) /* "acpi=off" disables both ACPI table parsing and interpreter */ if (strcmp(arg, "off") == 0) param_acpi_off = true; + else if (strcmp(arg, "on") == 0) /* prefer ACPI over DT */ + param_acpi_on = true; else if (strcmp(arg, "force") == 0) /* force ACPI to be enabled */ param_acpi_force = true; else @@ -66,12 +69,24 @@ static int __init dt_scan_depth1_nodes(unsigned long node, void *data) { /* - * Return 1 as soon as we encounter a node at depth 1 that is - * not the /chosen node. + * Ignore anything not directly under the root node; we'll + * catch its parent instead. */ - if (depth == 1 && (strcmp(uname, "chosen") != 0)) - return 1; - return 0; + if (depth != 1) + return 0; + + if (strcmp(uname, "chosen") == 0) + return 0; + + if (strcmp(uname, "hypervisor") == 0 && + of_flat_dt_is_compatible(node, "xen,xen")) + return 0; + + /* + * This node at depth 1 is neither a chosen node nor a xen node, + * which we do not expect. + */ + return 1; } /* @@ -184,11 +199,13 @@ void __init acpi_boot_table_init(void) /* * Enable ACPI instead of device tree unless * - ACPI has been disabled explicitly (acpi=off), or - * - the device tree is not empty (it has more than just a /chosen node) - * and ACPI has not been force enabled (acpi=force) + * - the device tree is not empty (it has more than just a /chosen node, + * and a /hypervisor node when running on Xen) + * and ACPI has not been [force] enabled (acpi=on|force) */ if (param_acpi_off || - (!param_acpi_force && of_scan_flat_dt(dt_scan_depth1_nodes, NULL))) + (!param_acpi_on && !param_acpi_force && + of_scan_flat_dt(dt_scan_depth1_nodes, NULL))) return; /* diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 3ae6b31..f8e5d47 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -22,6 +22,7 @@ #include <linux/mm.h> #include <linux/dma-mapping.h> #include <linux/kvm_host.h> +#include <linux/suspend.h> #include <asm/thread_info.h> #include <asm/memory.h> #include <asm/smp_plat.h> @@ -119,11 +120,14 @@ int main(void) DEFINE(CPU_CTX_SP, offsetof(struct cpu_suspend_ctx, sp)); DEFINE(MPIDR_HASH_MASK, offsetof(struct mpidr_hash, mask)); DEFINE(MPIDR_HASH_SHIFTS, offsetof(struct mpidr_hash, shift_aff)); - DEFINE(SLEEP_SAVE_SP_SZ, sizeof(struct sleep_save_sp)); - DEFINE(SLEEP_SAVE_SP_PHYS, offsetof(struct sleep_save_sp, save_ptr_stash_phys)); - DEFINE(SLEEP_SAVE_SP_VIRT, offsetof(struct sleep_save_sp, save_ptr_stash)); + DEFINE(SLEEP_STACK_DATA_SYSTEM_REGS, offsetof(struct sleep_stack_data, system_regs)); + DEFINE(SLEEP_STACK_DATA_CALLEE_REGS, offsetof(struct sleep_stack_data, callee_saved_regs)); #endif DEFINE(ARM_SMCCC_RES_X0_OFFS, offsetof(struct arm_smccc_res, a0)); DEFINE(ARM_SMCCC_RES_X2_OFFS, offsetof(struct arm_smccc_res, a2)); + BLANK(); + DEFINE(HIBERN_PBE_ORIG, offsetof(struct pbe, orig_address)); + DEFINE(HIBERN_PBE_ADDR, offsetof(struct pbe, address)); + DEFINE(HIBERN_PBE_NEXT, offsetof(struct pbe, next)); return 0; } diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 06afd04..d427894 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -22,14 +22,16 @@ #include <asm/cpufeature.h> static bool __maybe_unused -is_affected_midr_range(const struct arm64_cpu_capabilities *entry) +is_affected_midr_range(const struct arm64_cpu_capabilities *entry, int scope) { + WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); return MIDR_IS_CPU_MODEL_RANGE(read_cpuid_id(), entry->midr_model, entry->midr_range_min, entry->midr_range_max); } #define MIDR_RANGE(model, min, max) \ + .def_scope = SCOPE_LOCAL_CPU, \ .matches = is_affected_midr_range, \ .midr_model = model, \ .midr_range_min = min, \ @@ -101,6 +103,26 @@ const struct arm64_cpu_capabilities arm64_errata[] = { } }; +/* + * The CPU Errata work arounds are detected and applied at boot time + * and the related information is freed soon after. If the new CPU requires + * an errata not detected at boot, fail this CPU. + */ +void verify_local_cpu_errata(void) +{ + const struct arm64_cpu_capabilities *caps = arm64_errata; + + for (; caps->matches; caps++) + if (!cpus_have_cap(caps->capability) && + caps->matches(caps, SCOPE_LOCAL_CPU)) { + pr_crit("CPU%d: Requires work around for %s, not detected" + " at boot time\n", + smp_processor_id(), + caps->desc ? : "an erratum"); + cpu_die_early(); + } +} + void check_local_cpu_errata(void) { update_cpu_capabilities(arm64_errata, "enabling workaround for"); diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 943f514..811773d 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -71,7 +71,8 @@ DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS); /* meta feature for alternatives */ static bool __maybe_unused -cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry); +cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry, int __unused); + static struct arm64_ftr_bits ftr_id_aa64isar0[] = { ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0), @@ -130,7 +131,11 @@ static struct arm64_ftr_bits ftr_id_aa64mmfr1[] = { }; static struct arm64_ftr_bits ftr_id_aa64mmfr2[] = { + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_LVA_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_IESB_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_LSM_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_UAO_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_CNP_SHIFT, 4, 0), ARM64_FTR_END, }; @@ -435,22 +440,26 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info) init_cpu_ftr_reg(SYS_ID_AA64MMFR2_EL1, info->reg_id_aa64mmfr2); init_cpu_ftr_reg(SYS_ID_AA64PFR0_EL1, info->reg_id_aa64pfr0); init_cpu_ftr_reg(SYS_ID_AA64PFR1_EL1, info->reg_id_aa64pfr1); - init_cpu_ftr_reg(SYS_ID_DFR0_EL1, info->reg_id_dfr0); - init_cpu_ftr_reg(SYS_ID_ISAR0_EL1, info->reg_id_isar0); - init_cpu_ftr_reg(SYS_ID_ISAR1_EL1, info->reg_id_isar1); - init_cpu_ftr_reg(SYS_ID_ISAR2_EL1, info->reg_id_isar2); - init_cpu_ftr_reg(SYS_ID_ISAR3_EL1, info->reg_id_isar3); - init_cpu_ftr_reg(SYS_ID_ISAR4_EL1, info->reg_id_isar4); - init_cpu_ftr_reg(SYS_ID_ISAR5_EL1, info->reg_id_isar5); - init_cpu_ftr_reg(SYS_ID_MMFR0_EL1, info->reg_id_mmfr0); - init_cpu_ftr_reg(SYS_ID_MMFR1_EL1, info->reg_id_mmfr1); - init_cpu_ftr_reg(SYS_ID_MMFR2_EL1, info->reg_id_mmfr2); - init_cpu_ftr_reg(SYS_ID_MMFR3_EL1, info->reg_id_mmfr3); - init_cpu_ftr_reg(SYS_ID_PFR0_EL1, info->reg_id_pfr0); - init_cpu_ftr_reg(SYS_ID_PFR1_EL1, info->reg_id_pfr1); - init_cpu_ftr_reg(SYS_MVFR0_EL1, info->reg_mvfr0); - init_cpu_ftr_reg(SYS_MVFR1_EL1, info->reg_mvfr1); - init_cpu_ftr_reg(SYS_MVFR2_EL1, info->reg_mvfr2); + + if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) { + init_cpu_ftr_reg(SYS_ID_DFR0_EL1, info->reg_id_dfr0); + init_cpu_ftr_reg(SYS_ID_ISAR0_EL1, info->reg_id_isar0); + init_cpu_ftr_reg(SYS_ID_ISAR1_EL1, info->reg_id_isar1); + init_cpu_ftr_reg(SYS_ID_ISAR2_EL1, info->reg_id_isar2); + init_cpu_ftr_reg(SYS_ID_ISAR3_EL1, info->reg_id_isar3); + init_cpu_ftr_reg(SYS_ID_ISAR4_EL1, info->reg_id_isar4); + init_cpu_ftr_reg(SYS_ID_ISAR5_EL1, info->reg_id_isar5); + init_cpu_ftr_reg(SYS_ID_MMFR0_EL1, info->reg_id_mmfr0); + init_cpu_ftr_reg(SYS_ID_MMFR1_EL1, info->reg_id_mmfr1); + init_cpu_ftr_reg(SYS_ID_MMFR2_EL1, info->reg_id_mmfr2); + init_cpu_ftr_reg(SYS_ID_MMFR3_EL1, info->reg_id_mmfr3); + init_cpu_ftr_reg(SYS_ID_PFR0_EL1, info->reg_id_pfr0); + init_cpu_ftr_reg(SYS_ID_PFR1_EL1, info->reg_id_pfr1); + init_cpu_ftr_reg(SYS_MVFR0_EL1, info->reg_mvfr0); + init_cpu_ftr_reg(SYS_MVFR1_EL1, info->reg_mvfr1); + init_cpu_ftr_reg(SYS_MVFR2_EL1, info->reg_mvfr2); + } + } static void update_cpu_ftr_reg(struct arm64_ftr_reg *reg, u64 new) @@ -555,47 +564,51 @@ void update_cpu_features(int cpu, info->reg_id_aa64pfr1, boot->reg_id_aa64pfr1); /* - * If we have AArch32, we care about 32-bit features for compat. These - * registers should be RES0 otherwise. + * If we have AArch32, we care about 32-bit features for compat. + * If the system doesn't support AArch32, don't update them. */ - taint |= check_update_ftr_reg(SYS_ID_DFR0_EL1, cpu, + if (id_aa64pfr0_32bit_el0(read_system_reg(SYS_ID_AA64PFR0_EL1)) && + id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) { + + taint |= check_update_ftr_reg(SYS_ID_DFR0_EL1, cpu, info->reg_id_dfr0, boot->reg_id_dfr0); - taint |= check_update_ftr_reg(SYS_ID_ISAR0_EL1, cpu, + taint |= check_update_ftr_reg(SYS_ID_ISAR0_EL1, cpu, info->reg_id_isar0, boot->reg_id_isar0); - taint |= check_update_ftr_reg(SYS_ID_ISAR1_EL1, cpu, + taint |= check_update_ftr_reg(SYS_ID_ISAR1_EL1, cpu, info->reg_id_isar1, boot->reg_id_isar1); - taint |= check_update_ftr_reg(SYS_ID_ISAR2_EL1, cpu, + taint |= check_update_ftr_reg(SYS_ID_ISAR2_EL1, cpu, info->reg_id_isar2, boot->reg_id_isar2); - taint |= check_update_ftr_reg(SYS_ID_ISAR3_EL1, cpu, + taint |= check_update_ftr_reg(SYS_ID_ISAR3_EL1, cpu, info->reg_id_isar3, boot->reg_id_isar3); - taint |= check_update_ftr_reg(SYS_ID_ISAR4_EL1, cpu, + taint |= check_update_ftr_reg(SYS_ID_ISAR4_EL1, cpu, info->reg_id_isar4, boot->reg_id_isar4); - taint |= check_update_ftr_reg(SYS_ID_ISAR5_EL1, cpu, + taint |= check_update_ftr_reg(SYS_ID_ISAR5_EL1, cpu, info->reg_id_isar5, boot->reg_id_isar5); - /* - * Regardless of the value of the AuxReg field, the AIFSR, ADFSR, and - * ACTLR formats could differ across CPUs and therefore would have to - * be trapped for virtualization anyway. - */ - taint |= check_update_ftr_reg(SYS_ID_MMFR0_EL1, cpu, + /* + * Regardless of the value of the AuxReg field, the AIFSR, ADFSR, and + * ACTLR formats could differ across CPUs and therefore would have to + * be trapped for virtualization anyway. + */ + taint |= check_update_ftr_reg(SYS_ID_MMFR0_EL1, cpu, info->reg_id_mmfr0, boot->reg_id_mmfr0); - taint |= check_update_ftr_reg(SYS_ID_MMFR1_EL1, cpu, + taint |= check_update_ftr_reg(SYS_ID_MMFR1_EL1, cpu, info->reg_id_mmfr1, boot->reg_id_mmfr1); - taint |= check_update_ftr_reg(SYS_ID_MMFR2_EL1, cpu, + taint |= check_update_ftr_reg(SYS_ID_MMFR2_EL1, cpu, info->reg_id_mmfr2, boot->reg_id_mmfr2); - taint |= check_update_ftr_reg(SYS_ID_MMFR3_EL1, cpu, + taint |= check_update_ftr_reg(SYS_ID_MMFR3_EL1, cpu, info->reg_id_mmfr3, boot->reg_id_mmfr3); - taint |= check_update_ftr_reg(SYS_ID_PFR0_EL1, cpu, + taint |= check_update_ftr_reg(SYS_ID_PFR0_EL1, cpu, info->reg_id_pfr0, boot->reg_id_pfr0); - taint |= check_update_ftr_reg(SYS_ID_PFR1_EL1, cpu, + taint |= check_update_ftr_reg(SYS_ID_PFR1_EL1, cpu, info->reg_id_pfr1, boot->reg_id_pfr1); - taint |= check_update_ftr_reg(SYS_MVFR0_EL1, cpu, + taint |= check_update_ftr_reg(SYS_MVFR0_EL1, cpu, info->reg_mvfr0, boot->reg_mvfr0); - taint |= check_update_ftr_reg(SYS_MVFR1_EL1, cpu, + taint |= check_update_ftr_reg(SYS_MVFR1_EL1, cpu, info->reg_mvfr1, boot->reg_mvfr1); - taint |= check_update_ftr_reg(SYS_MVFR2_EL1, cpu, + taint |= check_update_ftr_reg(SYS_MVFR2_EL1, cpu, info->reg_mvfr2, boot->reg_mvfr2); + } /* * Mismatched CPU features are a recipe for disaster. Don't even @@ -614,6 +627,49 @@ u64 read_system_reg(u32 id) return regp->sys_val; } +/* + * __raw_read_system_reg() - Used by a STARTING cpu before cpuinfo is populated. + * Read the system register on the current CPU + */ +static u64 __raw_read_system_reg(u32 sys_id) +{ + switch (sys_id) { + case SYS_ID_PFR0_EL1: return read_cpuid(ID_PFR0_EL1); + case SYS_ID_PFR1_EL1: return read_cpuid(ID_PFR1_EL1); + case SYS_ID_DFR0_EL1: return read_cpuid(ID_DFR0_EL1); + case SYS_ID_MMFR0_EL1: return read_cpuid(ID_MMFR0_EL1); + case SYS_ID_MMFR1_EL1: return read_cpuid(ID_MMFR1_EL1); + case SYS_ID_MMFR2_EL1: return read_cpuid(ID_MMFR2_EL1); + case SYS_ID_MMFR3_EL1: return read_cpuid(ID_MMFR3_EL1); + case SYS_ID_ISAR0_EL1: return read_cpuid(ID_ISAR0_EL1); + case SYS_ID_ISAR1_EL1: return read_cpuid(ID_ISAR1_EL1); + case SYS_ID_ISAR2_EL1: return read_cpuid(ID_ISAR2_EL1); + case SYS_ID_ISAR3_EL1: return read_cpuid(ID_ISAR3_EL1); + case SYS_ID_ISAR4_EL1: return read_cpuid(ID_ISAR4_EL1); + case SYS_ID_ISAR5_EL1: return read_cpuid(ID_ISAR4_EL1); + case SYS_MVFR0_EL1: return read_cpuid(MVFR0_EL1); + case SYS_MVFR1_EL1: return read_cpuid(MVFR1_EL1); + case SYS_MVFR2_EL1: return read_cpuid(MVFR2_EL1); + + case SYS_ID_AA64PFR0_EL1: return read_cpuid(ID_AA64PFR0_EL1); + case SYS_ID_AA64PFR1_EL1: return read_cpuid(ID_AA64PFR0_EL1); + case SYS_ID_AA64DFR0_EL1: return read_cpuid(ID_AA64DFR0_EL1); + case SYS_ID_AA64DFR1_EL1: return read_cpuid(ID_AA64DFR0_EL1); + case SYS_ID_AA64MMFR0_EL1: return read_cpuid(ID_AA64MMFR0_EL1); + case SYS_ID_AA64MMFR1_EL1: return read_cpuid(ID_AA64MMFR1_EL1); + case SYS_ID_AA64MMFR2_EL1: return read_cpuid(ID_AA64MMFR2_EL1); + case SYS_ID_AA64ISAR0_EL1: return read_cpuid(ID_AA64ISAR0_EL1); + case SYS_ID_AA64ISAR1_EL1: return read_cpuid(ID_AA64ISAR1_EL1); + + case SYS_CNTFRQ_EL0: return read_cpuid(CNTFRQ_EL0); + case SYS_CTR_EL0: return read_cpuid(CTR_EL0); + case SYS_DCZID_EL0: return read_cpuid(DCZID_EL0); + default: + BUG(); + return 0; + } +} + #include <linux/irqchip/arm-gic-v3.h> static bool @@ -625,19 +681,24 @@ feature_matches(u64 reg, const struct arm64_cpu_capabilities *entry) } static bool -has_cpuid_feature(const struct arm64_cpu_capabilities *entry) +has_cpuid_feature(const struct arm64_cpu_capabilities *entry, int scope) { u64 val; - val = read_system_reg(entry->sys_reg); + WARN_ON(scope == SCOPE_LOCAL_CPU && preemptible()); + if (scope == SCOPE_SYSTEM) + val = read_system_reg(entry->sys_reg); + else + val = __raw_read_system_reg(entry->sys_reg); + return feature_matches(val, entry); } -static bool has_useable_gicv3_cpuif(const struct arm64_cpu_capabilities *entry) +static bool has_useable_gicv3_cpuif(const struct arm64_cpu_capabilities *entry, int scope) { bool has_sre; - if (!has_cpuid_feature(entry)) + if (!has_cpuid_feature(entry, scope)) return false; has_sre = gic_enable_sre(); @@ -648,7 +709,7 @@ static bool has_useable_gicv3_cpuif(const struct arm64_cpu_capabilities *entry) return has_sre; } -static bool has_no_hw_prefetch(const struct arm64_cpu_capabilities *entry) +static bool has_no_hw_prefetch(const struct arm64_cpu_capabilities *entry, int __unused) { u32 midr = read_cpuid_id(); u32 rv_min, rv_max; @@ -660,7 +721,7 @@ static bool has_no_hw_prefetch(const struct arm64_cpu_capabilities *entry) return MIDR_IS_CPU_MODEL_RANGE(midr, MIDR_THUNDERX, rv_min, rv_max); } -static bool runs_at_el2(const struct arm64_cpu_capabilities *entry) +static bool runs_at_el2(const struct arm64_cpu_capabilities *entry, int __unused) { return is_kernel_in_hyp_mode(); } @@ -669,6 +730,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "GIC system register CPU interface", .capability = ARM64_HAS_SYSREG_GIC_CPUIF, + .def_scope = SCOPE_SYSTEM, .matches = has_useable_gicv3_cpuif, .sys_reg = SYS_ID_AA64PFR0_EL1, .field_pos = ID_AA64PFR0_GIC_SHIFT, @@ -679,6 +741,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "Privileged Access Never", .capability = ARM64_HAS_PAN, + .def_scope = SCOPE_SYSTEM, .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64MMFR1_EL1, .field_pos = ID_AA64MMFR1_PAN_SHIFT, @@ -691,6 +754,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "LSE atomic instructions", .capability = ARM64_HAS_LSE_ATOMICS, + .def_scope = SCOPE_SYSTEM, .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64ISAR0_EL1, .field_pos = ID_AA64ISAR0_ATOMICS_SHIFT, @@ -701,12 +765,14 @@ static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "Software prefetching using PRFM", .capability = ARM64_HAS_NO_HW_PREFETCH, + .def_scope = SCOPE_SYSTEM, .matches = has_no_hw_prefetch, }, #ifdef CONFIG_ARM64_UAO { .desc = "User Access Override", .capability = ARM64_HAS_UAO, + .def_scope = SCOPE_SYSTEM, .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64MMFR2_EL1, .field_pos = ID_AA64MMFR2_UAO_SHIFT, @@ -717,20 +783,33 @@ static const struct arm64_cpu_capabilities arm64_features[] = { #ifdef CONFIG_ARM64_PAN { .capability = ARM64_ALT_PAN_NOT_UAO, + .def_scope = SCOPE_SYSTEM, .matches = cpufeature_pan_not_uao, }, #endif /* CONFIG_ARM64_PAN */ { .desc = "Virtualization Host Extensions", .capability = ARM64_HAS_VIRT_HOST_EXTN, + .def_scope = SCOPE_SYSTEM, .matches = runs_at_el2, }, + { + .desc = "32-bit EL0 Support", + .capability = ARM64_HAS_32BIT_EL0, + .def_scope = SCOPE_SYSTEM, + .matches = has_cpuid_feature, + .sys_reg = SYS_ID_AA64PFR0_EL1, + .sign = FTR_UNSIGNED, + .field_pos = ID_AA64PFR0_EL0_SHIFT, + .min_field_value = ID_AA64PFR0_EL0_32BIT_64BIT, + }, {}, }; #define HWCAP_CAP(reg, field, s, min_value, type, cap) \ { \ .desc = #cap, \ + .def_scope = SCOPE_SYSTEM, \ .matches = has_cpuid_feature, \ .sys_reg = reg, \ .field_pos = field, \ @@ -740,7 +819,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .hwcap = cap, \ } -static const struct arm64_cpu_capabilities arm64_hwcaps[] = { +static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, HWCAP_PMULL), HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_AES), HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA1_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SHA1), @@ -751,6 +830,10 @@ static const struct arm64_cpu_capabilities arm64_hwcaps[] = { HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_FPHP), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_ASIMD), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_ASIMDHP), + {}, +}; + +static const struct arm64_cpu_capabilities compat_elf_hwcaps[] = { #ifdef CONFIG_COMPAT HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_PMULL), HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_AES), @@ -761,7 +844,7 @@ static const struct arm64_cpu_capabilities arm64_hwcaps[] = { {}, }; -static void __init cap_set_hwcap(const struct arm64_cpu_capabilities *cap) +static void __init cap_set_elf_hwcap(const struct arm64_cpu_capabilities *cap) { switch (cap->hwcap_type) { case CAP_HWCAP: @@ -782,7 +865,7 @@ static void __init cap_set_hwcap(const struct arm64_cpu_capabilities *cap) } /* Check if we have a particular HWCAP enabled */ -static bool __maybe_unused cpus_have_hwcap(const struct arm64_cpu_capabilities *cap) +static bool cpus_have_elf_hwcap(const struct arm64_cpu_capabilities *cap) { bool rc; @@ -806,28 +889,23 @@ static bool __maybe_unused cpus_have_hwcap(const struct arm64_cpu_capabilities * return rc; } -static void __init setup_cpu_hwcaps(void) +static void __init setup_elf_hwcaps(const struct arm64_cpu_capabilities *hwcaps) { - int i; - const struct arm64_cpu_capabilities *hwcaps = arm64_hwcaps; - - for (i = 0; hwcaps[i].matches; i++) - if (hwcaps[i].matches(&hwcaps[i])) - cap_set_hwcap(&hwcaps[i]); + for (; hwcaps->matches; hwcaps++) + if (hwcaps->matches(hwcaps, hwcaps->def_scope)) + cap_set_elf_hwcap(hwcaps); } void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps, const char *info) { - int i; - - for (i = 0; caps[i].matches; i++) { - if (!caps[i].matches(&caps[i])) + for (; caps->matches; caps++) { + if (!caps->matches(caps, caps->def_scope)) continue; - if (!cpus_have_cap(caps[i].capability) && caps[i].desc) - pr_info("%s %s\n", info, caps[i].desc); - cpus_set_cap(caps[i].capability); + if (!cpus_have_cap(caps->capability) && caps->desc) + pr_info("%s %s\n", info, caps->desc); + cpus_set_cap(caps->capability); } } @@ -838,11 +916,9 @@ void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps, static void __init enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps) { - int i; - - for (i = 0; caps[i].matches; i++) - if (caps[i].enable && cpus_have_cap(caps[i].capability)) - on_each_cpu(caps[i].enable, NULL, true); + for (; caps->matches; caps++) + if (caps->enable && cpus_have_cap(caps->capability)) + on_each_cpu(caps->enable, NULL, true); } /* @@ -861,54 +937,45 @@ static inline void set_sys_caps_initialised(void) } /* - * __raw_read_system_reg() - Used by a STARTING cpu before cpuinfo is populated. + * Check for CPU features that are used in early boot + * based on the Boot CPU value. */ -static u64 __raw_read_system_reg(u32 sys_id) +static void check_early_cpu_features(void) { - switch (sys_id) { - case SYS_ID_PFR0_EL1: return read_cpuid(ID_PFR0_EL1); - case SYS_ID_PFR1_EL1: return read_cpuid(ID_PFR1_EL1); - case SYS_ID_DFR0_EL1: return read_cpuid(ID_DFR0_EL1); - case SYS_ID_MMFR0_EL1: return read_cpuid(ID_MMFR0_EL1); - case SYS_ID_MMFR1_EL1: return read_cpuid(ID_MMFR1_EL1); - case SYS_ID_MMFR2_EL1: return read_cpuid(ID_MMFR2_EL1); - case SYS_ID_MMFR3_EL1: return read_cpuid(ID_MMFR3_EL1); - case SYS_ID_ISAR0_EL1: return read_cpuid(ID_ISAR0_EL1); - case SYS_ID_ISAR1_EL1: return read_cpuid(ID_ISAR1_EL1); - case SYS_ID_ISAR2_EL1: return read_cpuid(ID_ISAR2_EL1); - case SYS_ID_ISAR3_EL1: return read_cpuid(ID_ISAR3_EL1); - case SYS_ID_ISAR4_EL1: return read_cpuid(ID_ISAR4_EL1); - case SYS_ID_ISAR5_EL1: return read_cpuid(ID_ISAR4_EL1); - case SYS_MVFR0_EL1: return read_cpuid(MVFR0_EL1); - case SYS_MVFR1_EL1: return read_cpuid(MVFR1_EL1); - case SYS_MVFR2_EL1: return read_cpuid(MVFR2_EL1); + verify_cpu_run_el(); + verify_cpu_asid_bits(); +} - case SYS_ID_AA64PFR0_EL1: return read_cpuid(ID_AA64PFR0_EL1); - case SYS_ID_AA64PFR1_EL1: return read_cpuid(ID_AA64PFR0_EL1); - case SYS_ID_AA64DFR0_EL1: return read_cpuid(ID_AA64DFR0_EL1); - case SYS_ID_AA64DFR1_EL1: return read_cpuid(ID_AA64DFR0_EL1); - case SYS_ID_AA64MMFR0_EL1: return read_cpuid(ID_AA64MMFR0_EL1); - case SYS_ID_AA64MMFR1_EL1: return read_cpuid(ID_AA64MMFR1_EL1); - case SYS_ID_AA64MMFR2_EL1: return read_cpuid(ID_AA64MMFR2_EL1); - case SYS_ID_AA64ISAR0_EL1: return read_cpuid(ID_AA64ISAR0_EL1); - case SYS_ID_AA64ISAR1_EL1: return read_cpuid(ID_AA64ISAR1_EL1); +static void +verify_local_elf_hwcaps(const struct arm64_cpu_capabilities *caps) +{ - case SYS_CNTFRQ_EL0: return read_cpuid(CNTFRQ_EL0); - case SYS_CTR_EL0: return read_cpuid(CTR_EL0); - case SYS_DCZID_EL0: return read_cpuid(DCZID_EL0); - default: - BUG(); - return 0; - } + for (; caps->matches; caps++) + if (cpus_have_elf_hwcap(caps) && !caps->matches(caps, SCOPE_LOCAL_CPU)) { + pr_crit("CPU%d: missing HWCAP: %s\n", + smp_processor_id(), caps->desc); + cpu_die_early(); + } } -/* - * Check for CPU features that are used in early boot - * based on the Boot CPU value. - */ -static void check_early_cpu_features(void) +static void +verify_local_cpu_features(const struct arm64_cpu_capabilities *caps) { - verify_cpu_asid_bits(); + for (; caps->matches; caps++) { + if (!cpus_have_cap(caps->capability)) + continue; + /* + * If the new CPU misses an advertised feature, we cannot proceed + * further, park the cpu. + */ + if (!caps->matches(caps, SCOPE_LOCAL_CPU)) { + pr_crit("CPU%d: missing feature: %s\n", + smp_processor_id(), caps->desc); + cpu_die_early(); + } + if (caps->enable) + caps->enable(NULL); + } } /* @@ -921,8 +988,6 @@ static void check_early_cpu_features(void) */ void verify_local_cpu_capabilities(void) { - int i; - const struct arm64_cpu_capabilities *caps; check_early_cpu_features(); @@ -933,32 +998,11 @@ void verify_local_cpu_capabilities(void) if (!sys_caps_initialised) return; - caps = arm64_features; - for (i = 0; caps[i].matches; i++) { - if (!cpus_have_cap(caps[i].capability) || !caps[i].sys_reg) - continue; - /* - * If the new CPU misses an advertised feature, we cannot proceed - * further, park the cpu. - */ - if (!feature_matches(__raw_read_system_reg(caps[i].sys_reg), &caps[i])) { - pr_crit("CPU%d: missing feature: %s\n", - smp_processor_id(), caps[i].desc); - cpu_die_early(); - } - if (caps[i].enable) - caps[i].enable(NULL); - } - - for (i = 0, caps = arm64_hwcaps; caps[i].matches; i++) { - if (!cpus_have_hwcap(&caps[i])) - continue; - if (!feature_matches(__raw_read_system_reg(caps[i].sys_reg), &caps[i])) { - pr_crit("CPU%d: missing HWCAP: %s\n", - smp_processor_id(), caps[i].desc); - cpu_die_early(); - } - } + verify_local_cpu_errata(); + verify_local_cpu_features(arm64_features); + verify_local_elf_hwcaps(arm64_elf_hwcaps); + if (system_supports_32bit_el0()) + verify_local_elf_hwcaps(compat_elf_hwcaps); } static void __init setup_feature_capabilities(void) @@ -967,6 +1011,24 @@ static void __init setup_feature_capabilities(void) enable_cpu_capabilities(arm64_features); } +/* + * Check if the current CPU has a given feature capability. + * Should be called from non-preemptible context. + */ +bool this_cpu_has_cap(unsigned int cap) +{ + const struct arm64_cpu_capabilities *caps; + + if (WARN_ON(preemptible())) + return false; + + for (caps = arm64_features; caps->desc; caps++) + if (caps->capability == cap && caps->matches) + return caps->matches(caps, SCOPE_LOCAL_CPU); + + return false; +} + void __init setup_cpu_features(void) { u32 cwg; @@ -974,7 +1036,10 @@ void __init setup_cpu_features(void) /* Set the CPU feature capabilies */ setup_feature_capabilities(); - setup_cpu_hwcaps(); + setup_elf_hwcaps(arm64_elf_hwcaps); + + if (system_supports_32bit_el0()) + setup_elf_hwcaps(compat_elf_hwcaps); /* Advertise that we have computed the system capabilities */ set_sys_caps_initialised(); @@ -993,7 +1058,7 @@ void __init setup_cpu_features(void) } static bool __maybe_unused -cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry) +cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry, int __unused) { return (cpus_have_cap(ARM64_HAS_PAN) && !cpus_have_cap(ARM64_HAS_UAO)); } diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c index 9047cab6..e11857f 100644 --- a/arch/arm64/kernel/cpuidle.c +++ b/arch/arm64/kernel/cpuidle.c @@ -19,7 +19,8 @@ int __init arm_cpuidle_init(unsigned int cpu) { int ret = -EOPNOTSUPP; - if (cpu_ops[cpu] && cpu_ops[cpu]->cpu_init_idle) + if (cpu_ops[cpu] && cpu_ops[cpu]->cpu_suspend && + cpu_ops[cpu]->cpu_init_idle) ret = cpu_ops[cpu]->cpu_init_idle(cpu); return ret; @@ -36,11 +37,5 @@ int arm_cpuidle_suspend(int index) { int cpu = smp_processor_id(); - /* - * If cpu_ops have not been registered or suspend - * has not been initialized, cpu_suspend call fails early. - */ - if (!cpu_ops[cpu] || !cpu_ops[cpu]->cpu_suspend) - return -EOPNOTSUPP; return cpu_ops[cpu]->cpu_suspend(index); } diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 84c8684..3808470 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -87,7 +87,8 @@ static const char *const compat_hwcap_str[] = { "idivt", "vfpd32", "lpae", - "evtstrm" + "evtstrm", + NULL }; static const char *const compat_hwcap2_str[] = { @@ -216,23 +217,26 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) info->reg_id_aa64pfr0 = read_cpuid(ID_AA64PFR0_EL1); info->reg_id_aa64pfr1 = read_cpuid(ID_AA64PFR1_EL1); - info->reg_id_dfr0 = read_cpuid(ID_DFR0_EL1); - info->reg_id_isar0 = read_cpuid(ID_ISAR0_EL1); - info->reg_id_isar1 = read_cpuid(ID_ISAR1_EL1); - info->reg_id_isar2 = read_cpuid(ID_ISAR2_EL1); - info->reg_id_isar3 = read_cpuid(ID_ISAR3_EL1); - info->reg_id_isar4 = read_cpuid(ID_ISAR4_EL1); - info->reg_id_isar5 = read_cpuid(ID_ISAR5_EL1); - info->reg_id_mmfr0 = read_cpuid(ID_MMFR0_EL1); - info->reg_id_mmfr1 = read_cpuid(ID_MMFR1_EL1); - info->reg_id_mmfr2 = read_cpuid(ID_MMFR2_EL1); - info->reg_id_mmfr3 = read_cpuid(ID_MMFR3_EL1); - info->reg_id_pfr0 = read_cpuid(ID_PFR0_EL1); - info->reg_id_pfr1 = read_cpuid(ID_PFR1_EL1); - - info->reg_mvfr0 = read_cpuid(MVFR0_EL1); - info->reg_mvfr1 = read_cpuid(MVFR1_EL1); - info->reg_mvfr2 = read_cpuid(MVFR2_EL1); + /* Update the 32bit ID registers only if AArch32 is implemented */ + if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) { + info->reg_id_dfr0 = read_cpuid(ID_DFR0_EL1); + info->reg_id_isar0 = read_cpuid(ID_ISAR0_EL1); + info->reg_id_isar1 = read_cpuid(ID_ISAR1_EL1); + info->reg_id_isar2 = read_cpuid(ID_ISAR2_EL1); + info->reg_id_isar3 = read_cpuid(ID_ISAR3_EL1); + info->reg_id_isar4 = read_cpuid(ID_ISAR4_EL1); + info->reg_id_isar5 = read_cpuid(ID_ISAR5_EL1); + info->reg_id_mmfr0 = read_cpuid(ID_MMFR0_EL1); + info->reg_id_mmfr1 = read_cpuid(ID_MMFR1_EL1); + info->reg_id_mmfr2 = read_cpuid(ID_MMFR2_EL1); + info->reg_id_mmfr3 = read_cpuid(ID_MMFR3_EL1); + info->reg_id_pfr0 = read_cpuid(ID_PFR0_EL1); + info->reg_id_pfr1 = read_cpuid(ID_PFR1_EL1); + + info->reg_mvfr0 = read_cpuid(MVFR0_EL1); + info->reg_mvfr1 = read_cpuid(MVFR1_EL1); + info->reg_mvfr2 = read_cpuid(MVFR2_EL1); + } cpuinfo_detect_icache_policy(info); diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index c45f296..4fbf3c5 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -135,9 +135,8 @@ static void clear_os_lock(void *unused) static int os_lock_notify(struct notifier_block *self, unsigned long action, void *data) { - int cpu = (unsigned long)data; if ((action & ~CPU_TASKS_FROZEN) == CPU_ONLINE) - smp_call_function_single(cpu, clear_os_lock, NULL, 1); + clear_os_lock(NULL); return NOTIFY_OK; } diff --git a/arch/arm64/kernel/efi-entry.S b/arch/arm64/kernel/efi-entry.S index cae3112..e88c064 100644 --- a/arch/arm64/kernel/efi-entry.S +++ b/arch/arm64/kernel/efi-entry.S @@ -62,7 +62,7 @@ ENTRY(entry) */ mov x20, x0 // DTB address ldr x0, [sp, #16] // relocated _text address - movz x21, #:abs_g0:stext_offset + ldr w21, =stext_offset add x21, x0, x21 /* diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 85da0f5..2c6e598 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -25,6 +25,7 @@ #include <linux/irqchip/arm-gic-v3.h> #include <asm/assembler.h> +#include <asm/boot.h> #include <asm/ptrace.h> #include <asm/asm-offsets.h> #include <asm/cache.h> @@ -51,9 +52,6 @@ #error TEXT_OFFSET must be less than 2MB #endif -#define KERNEL_START _text -#define KERNEL_END _end - /* * Kernel startup entry point. * --------------------------- @@ -102,8 +100,6 @@ _head: #endif #ifdef CONFIG_EFI - .globl __efistub_stext_offset - .set __efistub_stext_offset, stext - _head .align 3 pe_header: .ascii "PE" @@ -123,11 +119,11 @@ optional_header: .short 0x20b // PE32+ format .byte 0x02 // MajorLinkerVersion .byte 0x14 // MinorLinkerVersion - .long _end - stext // SizeOfCode + .long _end - efi_header_end // SizeOfCode .long 0 // SizeOfInitializedData .long 0 // SizeOfUninitializedData .long __efistub_entry - _head // AddressOfEntryPoint - .long __efistub_stext_offset // BaseOfCode + .long efi_header_end - _head // BaseOfCode extra_header_fields: .quad 0 // ImageBase @@ -144,7 +140,7 @@ extra_header_fields: .long _end - _head // SizeOfImage // Everything before the kernel image is considered part of the header - .long __efistub_stext_offset // SizeOfHeaders + .long efi_header_end - _head // SizeOfHeaders .long 0 // CheckSum .short 0xa // Subsystem (EFI application) .short 0 // DllCharacteristics @@ -188,10 +184,10 @@ section_table: .byte 0 .byte 0 .byte 0 // end of 0 padding of section name - .long _end - stext // VirtualSize - .long __efistub_stext_offset // VirtualAddress - .long _edata - stext // SizeOfRawData - .long __efistub_stext_offset // PointerToRawData + .long _end - efi_header_end // VirtualSize + .long efi_header_end - _head // VirtualAddress + .long _edata - efi_header_end // SizeOfRawData + .long efi_header_end - _head // PointerToRawData .long 0 // PointerToRelocations (0 for executables) .long 0 // PointerToLineNumbers (0 for executables) @@ -200,20 +196,23 @@ section_table: .long 0xe0500020 // Characteristics (section flags) /* - * EFI will load stext onwards at the 4k section alignment + * EFI will load .text onwards at the 4k section alignment * described in the PE/COFF header. To ensure that instruction * sequences using an adrp and a :lo12: immediate will function - * correctly at this alignment, we must ensure that stext is + * correctly at this alignment, we must ensure that .text is * placed at a 4k boundary in the Image to begin with. */ .align 12 +efi_header_end: #endif + __INIT + ENTRY(stext) bl preserve_boot_args bl el2_setup // Drop to EL1, w20=cpu_boot_mode - mov x23, xzr // KASLR offset, defaults to 0 adrp x24, __PHYS_OFFSET + and x23, x24, MIN_KIMG_ALIGN - 1 // KASLR offset, defaults to 0 bl set_cpu_boot_mode_flag bl __create_page_tables // x25=TTBR0, x26=TTBR1 /* @@ -222,13 +221,11 @@ ENTRY(stext) * On return, the CPU will be ready for the MMU to be turned on and * the TCR will have been set. */ - ldr x27, 0f // address to jump to after + bl __cpu_setup // initialise processor + adr_l x27, __primary_switch // address to jump to after // MMU has been enabled - adr_l lr, __enable_mmu // return (PIC) address - b __cpu_setup // initialise processor + b __enable_mmu ENDPROC(stext) - .align 3 -0: .quad __mmap_switched - (_head - TEXT_OFFSET) + KIMAGE_VADDR /* * Preserve the arguments passed by the bootloader in x0 .. x3 @@ -338,7 +335,7 @@ __create_page_tables: cmp x0, x6 b.lo 1b - ldr x7, =SWAPPER_MM_MMUFLAGS + mov x7, SWAPPER_MM_MMUFLAGS /* * Create the identity mapping. @@ -394,12 +391,13 @@ __create_page_tables: * Map the kernel image (starting with PHYS_OFFSET). */ mov x0, x26 // swapper_pg_dir - ldr x5, =KIMAGE_VADDR + mov_q x5, KIMAGE_VADDR + TEXT_OFFSET // compile time __va(_text) add x5, x5, x23 // add KASLR displacement create_pgd_entry x0, x5, x3, x6 - ldr w6, kernel_img_size - add x6, x6, x5 - mov x3, x24 // phys offset + adrp x6, _end // runtime __pa(_end) + adrp x3, _text // runtime __pa(_text) + sub x6, x6, x3 // _end - _text + add x6, x6, x5 // runtime __va(_end) create_block_map x0, x7, x3, x5, x6 /* @@ -414,16 +412,13 @@ __create_page_tables: ret x28 ENDPROC(__create_page_tables) - -kernel_img_size: - .long _end - (_head - TEXT_OFFSET) .ltorg /* * The following fragment of code is executed with the MMU enabled. */ .set initial_sp, init_thread_union + THREAD_START_SP -__mmap_switched: +__primary_switched: mov x28, lr // preserve LR adr_l x8, vectors // load VBAR_EL1 with virtual msr vbar_el1, x8 // vector table address @@ -437,44 +432,6 @@ __mmap_switched: bl __pi_memset dsb ishst // Make zero page visible to PTW -#ifdef CONFIG_RELOCATABLE - - /* - * Iterate over each entry in the relocation table, and apply the - * relocations in place. - */ - adr_l x8, __dynsym_start // start of symbol table - adr_l x9, __reloc_start // start of reloc table - adr_l x10, __reloc_end // end of reloc table - -0: cmp x9, x10 - b.hs 2f - ldp x11, x12, [x9], #24 - ldr x13, [x9, #-8] - cmp w12, #R_AARCH64_RELATIVE - b.ne 1f - add x13, x13, x23 // relocate - str x13, [x11, x23] - b 0b - -1: cmp w12, #R_AARCH64_ABS64 - b.ne 0b - add x12, x12, x12, lsl #1 // symtab offset: 24x top word - add x12, x8, x12, lsr #(32 - 3) // ... shifted into bottom word - ldrsh w14, [x12, #6] // Elf64_Sym::st_shndx - ldr x15, [x12, #8] // Elf64_Sym::st_value - cmp w14, #-0xf // SHN_ABS (0xfff1) ? - add x14, x15, x23 // relocate - csel x15, x14, x15, ne - add x15, x13, x15 - str x15, [x11, x23] - b 0b - -2: adr_l x8, kimage_vaddr // make relocated kimage_vaddr - dc cvac, x8 // value visible to secondaries - dsb sy // with MMU off -#endif - adr_l sp, initial_sp, x4 mov x4, sp and x4, x4, #~(THREAD_SIZE - 1) @@ -490,17 +447,19 @@ __mmap_switched: bl kasan_early_init #endif #ifdef CONFIG_RANDOMIZE_BASE - cbnz x23, 0f // already running randomized? + tst x23, ~(MIN_KIMG_ALIGN - 1) // already running randomized? + b.ne 0f mov x0, x21 // pass FDT address in x0 + mov x1, x23 // pass modulo offset in x1 bl kaslr_early_init // parse FDT for KASLR options cbz x0, 0f // KASLR disabled? just proceed - mov x23, x0 // record KASLR offset + orr x23, x23, x0 // record KASLR offset ret x28 // we must enable KASLR, return // to __enable_mmu() 0: #endif b start_kernel -ENDPROC(__mmap_switched) +ENDPROC(__primary_switched) /* * end early head section, begin head code that is also used for @@ -650,7 +609,7 @@ ENDPROC(el2_setup) * Sets the __boot_cpu_mode flag depending on the CPU boot mode passed * in x20. See arch/arm64/include/asm/virt.h for more info. */ -ENTRY(set_cpu_boot_mode_flag) +set_cpu_boot_mode_flag: adr_l x1, __boot_cpu_mode cmp w20, #BOOT_CPU_MODE_EL2 b.ne 1f @@ -683,7 +642,7 @@ ENTRY(secondary_holding_pen) bl el2_setup // Drop to EL1, w20=cpu_boot_mode bl set_cpu_boot_mode_flag mrs x0, mpidr_el1 - ldr x1, =MPIDR_HWID_BITMASK + mov_q x1, MPIDR_HWID_BITMASK and x0, x0, x1 adr_l x3, secondary_holding_pen_release pen: ldr x4, [x3] @@ -703,7 +662,7 @@ ENTRY(secondary_entry) b secondary_startup ENDPROC(secondary_entry) -ENTRY(secondary_startup) +secondary_startup: /* * Common entry point for secondary CPUs. */ @@ -711,14 +670,11 @@ ENTRY(secondary_startup) adrp x26, swapper_pg_dir bl __cpu_setup // initialise processor - ldr x8, kimage_vaddr - ldr w9, 0f - sub x27, x8, w9, sxtw // address to jump to after enabling the MMU + adr_l x27, __secondary_switch // address to jump to after enabling the MMU b __enable_mmu ENDPROC(secondary_startup) -0: .long (_text - TEXT_OFFSET) - __secondary_switched -ENTRY(__secondary_switched) +__secondary_switched: adr_l x5, vectors msr vbar_el1, x5 isb @@ -768,7 +724,7 @@ ENTRY(__early_cpu_boot_status) * If it isn't, park the CPU */ .section ".idmap.text", "ax" -__enable_mmu: +ENTRY(__enable_mmu) mrs x22, sctlr_el1 // preserve old SCTLR_EL1 value mrs x1, ID_AA64MMFR0_EL1 ubfx x2, x1, #ID_AA64MMFR0_TGRAN_SHIFT, 4 @@ -806,7 +762,6 @@ __enable_mmu: ic iallu // flush instructions fetched dsb nsh // via old mapping isb - add x27, x27, x23 // relocated __mmap_switched #endif br x27 ENDPROC(__enable_mmu) @@ -819,3 +774,53 @@ __no_granule_support: wfi b 1b ENDPROC(__no_granule_support) + +__primary_switch: +#ifdef CONFIG_RELOCATABLE + /* + * Iterate over each entry in the relocation table, and apply the + * relocations in place. + */ + ldr w8, =__dynsym_offset // offset to symbol table + ldr w9, =__rela_offset // offset to reloc table + ldr w10, =__rela_size // size of reloc table + + mov_q x11, KIMAGE_VADDR // default virtual offset + add x11, x11, x23 // actual virtual offset + add x8, x8, x11 // __va(.dynsym) + add x9, x9, x11 // __va(.rela) + add x10, x9, x10 // __va(.rela) + sizeof(.rela) + +0: cmp x9, x10 + b.hs 2f + ldp x11, x12, [x9], #24 + ldr x13, [x9, #-8] + cmp w12, #R_AARCH64_RELATIVE + b.ne 1f + add x13, x13, x23 // relocate + str x13, [x11, x23] + b 0b + +1: cmp w12, #R_AARCH64_ABS64 + b.ne 0b + add x12, x12, x12, lsl #1 // symtab offset: 24x top word + add x12, x8, x12, lsr #(32 - 3) // ... shifted into bottom word + ldrsh w14, [x12, #6] // Elf64_Sym::st_shndx + ldr x15, [x12, #8] // Elf64_Sym::st_value + cmp w14, #-0xf // SHN_ABS (0xfff1) ? + add x14, x15, x23 // relocate + csel x15, x14, x15, ne + add x15, x13, x15 + str x15, [x11, x23] + b 0b + +2: +#endif + ldr x8, =__primary_switched + br x8 +ENDPROC(__primary_switch) + +__secondary_switch: + ldr x8, =__secondary_switched + br x8 +ENDPROC(__secondary_switch) diff --git a/arch/arm64/kernel/hibernate-asm.S b/arch/arm64/kernel/hibernate-asm.S new file mode 100644 index 0000000..46f29b6 --- /dev/null +++ b/arch/arm64/kernel/hibernate-asm.S @@ -0,0 +1,176 @@ +/* + * Hibernate low-level support + * + * Copyright (C) 2016 ARM Ltd. + * Author: James Morse <james.morse@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#include <linux/linkage.h> +#include <linux/errno.h> + +#include <asm/asm-offsets.h> +#include <asm/assembler.h> +#include <asm/cputype.h> +#include <asm/memory.h> +#include <asm/page.h> +#include <asm/virt.h> + +/* + * To prevent the possibility of old and new partial table walks being visible + * in the tlb, switch the ttbr to a zero page when we invalidate the old + * records. D4.7.1 'General TLB maintenance requirements' in ARM DDI 0487A.i + * Even switching to our copied tables will cause a changed output address at + * each stage of the walk. + */ +.macro break_before_make_ttbr_switch zero_page, page_table + msr ttbr1_el1, \zero_page + isb + tlbi vmalle1is + dsb ish + msr ttbr1_el1, \page_table + isb +.endm + + +/* + * Resume from hibernate + * + * Loads temporary page tables then restores the memory image. + * Finally branches to cpu_resume() to restore the state saved by + * swsusp_arch_suspend(). + * + * Because this code has to be copied to a 'safe' page, it can't call out to + * other functions by PC-relative address. Also remember that it may be + * mid-way through over-writing other functions. For this reason it contains + * code from flush_icache_range() and uses the copy_page() macro. + * + * This 'safe' page is mapped via ttbr0, and executed from there. This function + * switches to a copy of the linear map in ttbr1, performs the restore, then + * switches ttbr1 to the original kernel's swapper_pg_dir. + * + * All of memory gets written to, including code. We need to clean the kernel + * text to the Point of Coherence (PoC) before secondary cores can be booted. + * Because the kernel modules and executable pages mapped to user space are + * also written as data, we clean all pages we touch to the Point of + * Unification (PoU). + * + * x0: physical address of temporary page tables + * x1: physical address of swapper page tables + * x2: address of cpu_resume + * x3: linear map address of restore_pblist in the current kernel + * x4: physical address of __hyp_stub_vectors, or 0 + * x5: physical address of a zero page that remains zero after resume + */ +.pushsection ".hibernate_exit.text", "ax" +ENTRY(swsusp_arch_suspend_exit) + /* + * We execute from ttbr0, change ttbr1 to our copied linear map tables + * with a break-before-make via the zero page + */ + break_before_make_ttbr_switch x5, x0 + + mov x21, x1 + mov x30, x2 + mov x24, x4 + mov x25, x5 + + /* walk the restore_pblist and use copy_page() to over-write memory */ + mov x19, x3 + +1: ldr x10, [x19, #HIBERN_PBE_ORIG] + mov x0, x10 + ldr x1, [x19, #HIBERN_PBE_ADDR] + + copy_page x0, x1, x2, x3, x4, x5, x6, x7, x8, x9 + + add x1, x10, #PAGE_SIZE + /* Clean the copied page to PoU - based on flush_icache_range() */ + dcache_line_size x2, x3 + sub x3, x2, #1 + bic x4, x10, x3 +2: dc cvau, x4 /* clean D line / unified line */ + add x4, x4, x2 + cmp x4, x1 + b.lo 2b + + ldr x19, [x19, #HIBERN_PBE_NEXT] + cbnz x19, 1b + dsb ish /* wait for PoU cleaning to finish */ + + /* switch to the restored kernels page tables */ + break_before_make_ttbr_switch x25, x21 + + ic ialluis + dsb ish + isb + + cbz x24, 3f /* Do we need to re-initialise EL2? */ + hvc #0 +3: ret + + .ltorg +ENDPROC(swsusp_arch_suspend_exit) + +/* + * Restore the hyp stub. + * This must be done before the hibernate page is unmapped by _cpu_resume(), + * but happens before any of the hyp-stub's code is cleaned to PoC. + * + * x24: The physical address of __hyp_stub_vectors + */ +el1_sync: + msr vbar_el2, x24 + eret +ENDPROC(el1_sync) + +.macro invalid_vector label +\label: + b \label +ENDPROC(\label) +.endm + + invalid_vector el2_sync_invalid + invalid_vector el2_irq_invalid + invalid_vector el2_fiq_invalid + invalid_vector el2_error_invalid + invalid_vector el1_sync_invalid + invalid_vector el1_irq_invalid + invalid_vector el1_fiq_invalid + invalid_vector el1_error_invalid + +/* el2 vectors - switch el2 here while we restore the memory image. */ + .align 11 +ENTRY(hibernate_el2_vectors) + ventry el2_sync_invalid // Synchronous EL2t + ventry el2_irq_invalid // IRQ EL2t + ventry el2_fiq_invalid // FIQ EL2t + ventry el2_error_invalid // Error EL2t + + ventry el2_sync_invalid // Synchronous EL2h + ventry el2_irq_invalid // IRQ EL2h + ventry el2_fiq_invalid // FIQ EL2h + ventry el2_error_invalid // Error EL2h + + ventry el1_sync // Synchronous 64-bit EL1 + ventry el1_irq_invalid // IRQ 64-bit EL1 + ventry el1_fiq_invalid // FIQ 64-bit EL1 + ventry el1_error_invalid // Error 64-bit EL1 + + ventry el1_sync_invalid // Synchronous 32-bit EL1 + ventry el1_irq_invalid // IRQ 32-bit EL1 + ventry el1_fiq_invalid // FIQ 32-bit EL1 + ventry el1_error_invalid // Error 32-bit EL1 +END(hibernate_el2_vectors) + +.popsection diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c new file mode 100644 index 0000000..f8df75d --- /dev/null +++ b/arch/arm64/kernel/hibernate.c @@ -0,0 +1,487 @@ +/*: + * Hibernate support specific for ARM64 + * + * Derived from work on ARM hibernation support by: + * + * Ubuntu project, hibernation support for mach-dove + * Copyright (C) 2010 Nokia Corporation (Hiroshi Doyu) + * Copyright (C) 2010 Texas Instruments, Inc. (Teerth Reddy et al.) + * https://lkml.org/lkml/2010/6/18/4 + * https://lists.linux-foundation.org/pipermail/linux-pm/2010-June/027422.html + * https://patchwork.kernel.org/patch/96442/ + * + * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl> + * + * License terms: GNU General Public License (GPL) version 2 + */ +#define pr_fmt(x) "hibernate: " x +#include <linux/kvm_host.h> +#include <linux/mm.h> +#include <linux/notifier.h> +#include <linux/pm.h> +#include <linux/sched.h> +#include <linux/suspend.h> +#include <linux/utsname.h> +#include <linux/version.h> + +#include <asm/barrier.h> +#include <asm/cacheflush.h> +#include <asm/irqflags.h> +#include <asm/memory.h> +#include <asm/mmu_context.h> +#include <asm/pgalloc.h> +#include <asm/pgtable.h> +#include <asm/pgtable-hwdef.h> +#include <asm/sections.h> +#include <asm/suspend.h> +#include <asm/virt.h> + +/* + * Hibernate core relies on this value being 0 on resume, and marks it + * __nosavedata assuming it will keep the resume kernel's '0' value. This + * doesn't happen with either KASLR. + * + * defined as "__visible int in_suspend __nosavedata" in + * kernel/power/hibernate.c + */ +extern int in_suspend; + +/* Find a symbols alias in the linear map */ +#define LMADDR(x) phys_to_virt(virt_to_phys(x)) + +/* Do we need to reset el2? */ +#define el2_reset_needed() (is_hyp_mode_available() && !is_kernel_in_hyp_mode()) + +/* + * Start/end of the hibernate exit code, this must be copied to a 'safe' + * location in memory, and executed from there. + */ +extern char __hibernate_exit_text_start[], __hibernate_exit_text_end[]; + +/* temporary el2 vectors in the __hibernate_exit_text section. */ +extern char hibernate_el2_vectors[]; + +/* hyp-stub vectors, used to restore el2 during resume from hibernate. */ +extern char __hyp_stub_vectors[]; + +/* + * Values that may not change over hibernate/resume. We put the build number + * and date in here so that we guarantee not to resume with a different + * kernel. + */ +struct arch_hibernate_hdr_invariants { + char uts_version[__NEW_UTS_LEN + 1]; +}; + +/* These values need to be know across a hibernate/restore. */ +static struct arch_hibernate_hdr { + struct arch_hibernate_hdr_invariants invariants; + + /* These are needed to find the relocated kernel if built with kaslr */ + phys_addr_t ttbr1_el1; + void (*reenter_kernel)(void); + + /* + * We need to know where the __hyp_stub_vectors are after restore to + * re-configure el2. + */ + phys_addr_t __hyp_stub_vectors; +} resume_hdr; + +static inline void arch_hdr_invariants(struct arch_hibernate_hdr_invariants *i) +{ + memset(i, 0, sizeof(*i)); + memcpy(i->uts_version, init_utsname()->version, sizeof(i->uts_version)); +} + +int pfn_is_nosave(unsigned long pfn) +{ + unsigned long nosave_begin_pfn = virt_to_pfn(&__nosave_begin); + unsigned long nosave_end_pfn = virt_to_pfn(&__nosave_end - 1); + + return (pfn >= nosave_begin_pfn) && (pfn <= nosave_end_pfn); +} + +void notrace save_processor_state(void) +{ + WARN_ON(num_online_cpus() != 1); +} + +void notrace restore_processor_state(void) +{ +} + +int arch_hibernation_header_save(void *addr, unsigned int max_size) +{ + struct arch_hibernate_hdr *hdr = addr; + + if (max_size < sizeof(*hdr)) + return -EOVERFLOW; + + arch_hdr_invariants(&hdr->invariants); + hdr->ttbr1_el1 = virt_to_phys(swapper_pg_dir); + hdr->reenter_kernel = _cpu_resume; + + /* We can't use __hyp_get_vectors() because kvm may still be loaded */ + if (el2_reset_needed()) + hdr->__hyp_stub_vectors = virt_to_phys(__hyp_stub_vectors); + else + hdr->__hyp_stub_vectors = 0; + + return 0; +} +EXPORT_SYMBOL(arch_hibernation_header_save); + +int arch_hibernation_header_restore(void *addr) +{ + struct arch_hibernate_hdr_invariants invariants; + struct arch_hibernate_hdr *hdr = addr; + + arch_hdr_invariants(&invariants); + if (memcmp(&hdr->invariants, &invariants, sizeof(invariants))) { + pr_crit("Hibernate image not generated by this kernel!\n"); + return -EINVAL; + } + + resume_hdr = *hdr; + + return 0; +} +EXPORT_SYMBOL(arch_hibernation_header_restore); + +/* + * Copies length bytes, starting at src_start into an new page, + * perform cache maintentance, then maps it at the specified address low + * address as executable. + * + * This is used by hibernate to copy the code it needs to execute when + * overwriting the kernel text. This function generates a new set of page + * tables, which it loads into ttbr0. + * + * Length is provided as we probably only want 4K of data, even on a 64K + * page system. + */ +static int create_safe_exec_page(void *src_start, size_t length, + unsigned long dst_addr, + phys_addr_t *phys_dst_addr, + void *(*allocator)(gfp_t mask), + gfp_t mask) +{ + int rc = 0; + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + unsigned long dst = (unsigned long)allocator(mask); + + if (!dst) { + rc = -ENOMEM; + goto out; + } + + memcpy((void *)dst, src_start, length); + flush_icache_range(dst, dst + length); + + pgd = pgd_offset_raw(allocator(mask), dst_addr); + if (pgd_none(*pgd)) { + pud = allocator(mask); + if (!pud) { + rc = -ENOMEM; + goto out; + } + pgd_populate(&init_mm, pgd, pud); + } + + pud = pud_offset(pgd, dst_addr); + if (pud_none(*pud)) { + pmd = allocator(mask); + if (!pmd) { + rc = -ENOMEM; + goto out; + } + pud_populate(&init_mm, pud, pmd); + } + + pmd = pmd_offset(pud, dst_addr); + if (pmd_none(*pmd)) { + pte = allocator(mask); + if (!pte) { + rc = -ENOMEM; + goto out; + } + pmd_populate_kernel(&init_mm, pmd, pte); + } + + pte = pte_offset_kernel(pmd, dst_addr); + set_pte(pte, __pte(virt_to_phys((void *)dst) | + pgprot_val(PAGE_KERNEL_EXEC))); + + /* Load our new page tables */ + asm volatile("msr ttbr0_el1, %0;" + "isb;" + "tlbi vmalle1is;" + "dsb ish;" + "isb" : : "r"(virt_to_phys(pgd))); + + *phys_dst_addr = virt_to_phys((void *)dst); + +out: + return rc; +} + + +int swsusp_arch_suspend(void) +{ + int ret = 0; + unsigned long flags; + struct sleep_stack_data state; + + local_dbg_save(flags); + + if (__cpu_suspend_enter(&state)) { + ret = swsusp_save(); + } else { + /* Clean kernel to PoC for secondary core startup */ + __flush_dcache_area(LMADDR(KERNEL_START), KERNEL_END - KERNEL_START); + + /* + * Tell the hibernation core that we've just restored + * the memory + */ + in_suspend = 0; + + __cpu_suspend_exit(); + } + + local_dbg_restore(flags); + + return ret; +} + +static int copy_pte(pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long start, + unsigned long end) +{ + pte_t *src_pte; + pte_t *dst_pte; + unsigned long addr = start; + + dst_pte = (pte_t *)get_safe_page(GFP_ATOMIC); + if (!dst_pte) + return -ENOMEM; + pmd_populate_kernel(&init_mm, dst_pmd, dst_pte); + dst_pte = pte_offset_kernel(dst_pmd, start); + + src_pte = pte_offset_kernel(src_pmd, start); + do { + if (!pte_none(*src_pte)) + /* + * Resume will overwrite areas that may be marked + * read only (code, rodata). Clear the RDONLY bit from + * the temporary mappings we use during restore. + */ + set_pte(dst_pte, __pte(pte_val(*src_pte) & ~PTE_RDONLY)); + } while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end); + + return 0; +} + +static int copy_pmd(pud_t *dst_pud, pud_t *src_pud, unsigned long start, + unsigned long end) +{ + pmd_t *src_pmd; + pmd_t *dst_pmd; + unsigned long next; + unsigned long addr = start; + + if (pud_none(*dst_pud)) { + dst_pmd = (pmd_t *)get_safe_page(GFP_ATOMIC); + if (!dst_pmd) + return -ENOMEM; + pud_populate(&init_mm, dst_pud, dst_pmd); + } + dst_pmd = pmd_offset(dst_pud, start); + + src_pmd = pmd_offset(src_pud, start); + do { + next = pmd_addr_end(addr, end); + if (pmd_none(*src_pmd)) + continue; + if (pmd_table(*src_pmd)) { + if (copy_pte(dst_pmd, src_pmd, addr, next)) + return -ENOMEM; + } else { + set_pmd(dst_pmd, + __pmd(pmd_val(*src_pmd) & ~PMD_SECT_RDONLY)); + } + } while (dst_pmd++, src_pmd++, addr = next, addr != end); + + return 0; +} + +static int copy_pud(pgd_t *dst_pgd, pgd_t *src_pgd, unsigned long start, + unsigned long end) +{ + pud_t *dst_pud; + pud_t *src_pud; + unsigned long next; + unsigned long addr = start; + + if (pgd_none(*dst_pgd)) { + dst_pud = (pud_t *)get_safe_page(GFP_ATOMIC); + if (!dst_pud) + return -ENOMEM; + pgd_populate(&init_mm, dst_pgd, dst_pud); + } + dst_pud = pud_offset(dst_pgd, start); + + src_pud = pud_offset(src_pgd, start); + do { + next = pud_addr_end(addr, end); + if (pud_none(*src_pud)) + continue; + if (pud_table(*(src_pud))) { + if (copy_pmd(dst_pud, src_pud, addr, next)) + return -ENOMEM; + } else { + set_pud(dst_pud, + __pud(pud_val(*src_pud) & ~PMD_SECT_RDONLY)); + } + } while (dst_pud++, src_pud++, addr = next, addr != end); + + return 0; +} + +static int copy_page_tables(pgd_t *dst_pgd, unsigned long start, + unsigned long end) +{ + unsigned long next; + unsigned long addr = start; + pgd_t *src_pgd = pgd_offset_k(start); + + dst_pgd = pgd_offset_raw(dst_pgd, start); + do { + next = pgd_addr_end(addr, end); + if (pgd_none(*src_pgd)) + continue; + if (copy_pud(dst_pgd, src_pgd, addr, next)) + return -ENOMEM; + } while (dst_pgd++, src_pgd++, addr = next, addr != end); + + return 0; +} + +/* + * Setup then Resume from the hibernate image using swsusp_arch_suspend_exit(). + * + * Memory allocated by get_safe_page() will be dealt with by the hibernate code, + * we don't need to free it here. + */ +int swsusp_arch_resume(void) +{ + int rc = 0; + void *zero_page; + size_t exit_size; + pgd_t *tmp_pg_dir; + void *lm_restore_pblist; + phys_addr_t phys_hibernate_exit; + void __noreturn (*hibernate_exit)(phys_addr_t, phys_addr_t, void *, + void *, phys_addr_t, phys_addr_t); + + /* + * Locate the exit code in the bottom-but-one page, so that *NULL + * still has disastrous affects. + */ + hibernate_exit = (void *)PAGE_SIZE; + exit_size = __hibernate_exit_text_end - __hibernate_exit_text_start; + /* + * Copy swsusp_arch_suspend_exit() to a safe page. This will generate + * a new set of ttbr0 page tables and load them. + */ + rc = create_safe_exec_page(__hibernate_exit_text_start, exit_size, + (unsigned long)hibernate_exit, + &phys_hibernate_exit, + (void *)get_safe_page, GFP_ATOMIC); + if (rc) { + pr_err("Failed to create safe executable page for hibernate_exit code."); + goto out; + } + + /* + * The hibernate exit text contains a set of el2 vectors, that will + * be executed at el2 with the mmu off in order to reload hyp-stub. + */ + __flush_dcache_area(hibernate_exit, exit_size); + + /* + * Restoring the memory image will overwrite the ttbr1 page tables. + * Create a second copy of just the linear map, and use this when + * restoring. + */ + tmp_pg_dir = (pgd_t *)get_safe_page(GFP_ATOMIC); + if (!tmp_pg_dir) { + pr_err("Failed to allocate memory for temporary page tables."); + rc = -ENOMEM; + goto out; + } + rc = copy_page_tables(tmp_pg_dir, PAGE_OFFSET, 0); + if (rc) + goto out; + + /* + * Since we only copied the linear map, we need to find restore_pblist's + * linear map address. + */ + lm_restore_pblist = LMADDR(restore_pblist); + + /* + * KASLR will cause the el2 vectors to be in a different location in + * the resumed kernel. Load hibernate's temporary copy into el2. + * + * We can skip this step if we booted at EL1, or are running with VHE. + */ + if (el2_reset_needed()) { + phys_addr_t el2_vectors = phys_hibernate_exit; /* base */ + el2_vectors += hibernate_el2_vectors - + __hibernate_exit_text_start; /* offset */ + + __hyp_set_vectors(el2_vectors); + } + + /* + * We need a zero page that is zero before & after resume in order to + * to break before make on the ttbr1 page tables. + */ + zero_page = (void *)get_safe_page(GFP_ATOMIC); + + hibernate_exit(virt_to_phys(tmp_pg_dir), resume_hdr.ttbr1_el1, + resume_hdr.reenter_kernel, lm_restore_pblist, + resume_hdr.__hyp_stub_vectors, virt_to_phys(zero_page)); + +out: + return rc; +} + +static int check_boot_cpu_online_pm_callback(struct notifier_block *nb, + unsigned long action, void *ptr) +{ + if (action == PM_HIBERNATION_PREPARE && + cpumask_first(cpu_online_mask) != 0) { + pr_warn("CPU0 is offline.\n"); + return notifier_from_errno(-ENODEV); + } + + return NOTIFY_OK; +} + +static int __init check_boot_cpu_online_init(void) +{ + /* + * Set this pm_notifier callback with a lower priority than + * cpu_hotplug_pm_callback, so that cpu_hotplug_pm_callback will be + * called earlier to disable cpu hotplug before the cpu online check. + */ + pm_notifier(check_boot_cpu_online_pm_callback, -INT_MAX); + + return 0; +} +core_initcall(check_boot_cpu_online_init); diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index 4ef5373..ce21aa8 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -886,9 +886,11 @@ static int hw_breakpoint_reset_notify(struct notifier_block *self, unsigned long action, void *hcpu) { - int cpu = (long)hcpu; - if ((action & ~CPU_TASKS_FROZEN) == CPU_ONLINE) - smp_call_function_single(cpu, hw_breakpoint_reset, NULL, 1); + if ((action & ~CPU_TASKS_FROZEN) == CPU_ONLINE) { + local_irq_disable(); + hw_breakpoint_reset(NULL); + local_irq_enable(); + } return NOTIFY_OK; } diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S index a272f33..8727f44 100644 --- a/arch/arm64/kernel/hyp-stub.S +++ b/arch/arm64/kernel/hyp-stub.S @@ -22,6 +22,8 @@ #include <linux/irqchip/arm-gic-v3.h> #include <asm/assembler.h> +#include <asm/kvm_arm.h> +#include <asm/kvm_asm.h> #include <asm/ptrace.h> #include <asm/virt.h> @@ -53,15 +55,26 @@ ENDPROC(__hyp_stub_vectors) .align 11 el1_sync: - mrs x1, esr_el2 - lsr x1, x1, #26 - cmp x1, #0x16 - b.ne 2f // Not an HVC trap - cbz x0, 1f - msr vbar_el2, x0 // Set vbar_el2 - b 2f -1: mrs x0, vbar_el2 // Return vbar_el2 -2: eret + mrs x30, esr_el2 + lsr x30, x30, #ESR_ELx_EC_SHIFT + + cmp x30, #ESR_ELx_EC_HVC64 + b.ne 9f // Not an HVC trap + + cmp x0, #HVC_GET_VECTORS + b.ne 1f + mrs x0, vbar_el2 + b 9f + +1: cmp x0, #HVC_SET_VECTORS + b.ne 2f + msr vbar_el2, x1 + b 9f + + /* Someone called kvm_call_hyp() against the hyp-stub... */ +2: mov x0, #ARM_EXCEPTION_HYP_GONE + +9: eret ENDPROC(el1_sync) .macro invalid_vector label @@ -101,10 +114,18 @@ ENDPROC(\label) */ ENTRY(__hyp_get_vectors) - mov x0, xzr - // fall through -ENTRY(__hyp_set_vectors) + str lr, [sp, #-16]! + mov x0, #HVC_GET_VECTORS hvc #0 + ldr lr, [sp], #16 ret ENDPROC(__hyp_get_vectors) + +ENTRY(__hyp_set_vectors) + str lr, [sp, #-16]! + mov x1, x0 + mov x0, #HVC_SET_VECTORS + hvc #0 + ldr lr, [sp], #16 + ret ENDPROC(__hyp_set_vectors) diff --git a/arch/arm64/kernel/image.h b/arch/arm64/kernel/image.h index 1428849a..c7fcb23 100644 --- a/arch/arm64/kernel/image.h +++ b/arch/arm64/kernel/image.h @@ -73,6 +73,8 @@ #ifdef CONFIG_EFI +__efistub_stext_offset = stext - _text; + /* * Prevent the symbol aliases below from being emitted into the kallsyms * table, by forcing them to be absolute symbols (which are conveniently diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c index 7371455..368c082 100644 --- a/arch/arm64/kernel/insn.c +++ b/arch/arm64/kernel/insn.c @@ -96,7 +96,7 @@ static void __kprobes *patch_map(void *addr, int fixmap) if (module && IS_ENABLED(CONFIG_DEBUG_SET_MODULE_RONX)) page = vmalloc_to_page(addr); else if (!module && IS_ENABLED(CONFIG_DEBUG_RODATA)) - page = virt_to_page(addr); + page = pfn_to_page(PHYS_PFN(__pa(addr))); else return addr; diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c index 5829839..b054691 100644 --- a/arch/arm64/kernel/kaslr.c +++ b/arch/arm64/kernel/kaslr.c @@ -74,7 +74,7 @@ extern void *__init __fixmap_remap_fdt(phys_addr_t dt_phys, int *size, * containing function pointers) to be reinitialized, and zero-initialized * .bss variables will be reset to 0. */ -u64 __init kaslr_early_init(u64 dt_phys) +u64 __init kaslr_early_init(u64 dt_phys, u64 modulo_offset) { void *fdt; u64 seed, offset, mask, module_range; @@ -132,8 +132,8 @@ u64 __init kaslr_early_init(u64 dt_phys) * boundary (for 4KB/16KB/64KB granule kernels, respectively). If this * happens, increase the KASLR offset by the size of the kernel image. */ - if ((((u64)_text + offset) >> SWAPPER_TABLE_SHIFT) != - (((u64)_end + offset) >> SWAPPER_TABLE_SHIFT)) + if ((((u64)_text + offset + modulo_offset) >> SWAPPER_TABLE_SHIFT) != + (((u64)_end + offset + modulo_offset) >> SWAPPER_TABLE_SHIFT)) offset = (offset + (u64)(_end - _text)) & mask; if (IS_ENABLED(CONFIG_KASAN)) diff --git a/arch/arm64/kernel/pci.c b/arch/arm64/kernel/pci.c index c72de66..3c4e308 100644 --- a/arch/arm64/kernel/pci.c +++ b/arch/arm64/kernel/pci.c @@ -74,6 +74,16 @@ int raw_pci_write(unsigned int domain, unsigned int bus, return -ENXIO; } +#ifdef CONFIG_NUMA + +int pcibus_to_node(struct pci_bus *bus) +{ + return dev_to_node(&bus->dev); +} +EXPORT_SYMBOL(pcibus_to_node); + +#endif + #ifdef CONFIG_ACPI /* Root bridge scanning */ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root) diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 8062482..48eea68 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -265,9 +265,6 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start, if (stack_start) { if (is_compat_thread(task_thread_info(p))) childregs->compat_sp = stack_start; - /* 16-byte aligned stack mandatory on AArch64 */ - else if (stack_start & 15) - return -EINVAL; else childregs->sp = stack_start; } @@ -382,13 +379,14 @@ unsigned long arch_align_stack(unsigned long sp) return sp & ~0xf; } -static unsigned long randomize_base(unsigned long base) -{ - unsigned long range_end = base + (STACK_RND_MASK << PAGE_SHIFT) + 1; - return randomize_range(base, range_end, 0) ? : base; -} - unsigned long arch_randomize_brk(struct mm_struct *mm) { - return randomize_base(mm->brk); + unsigned long range_end = mm->brk; + + if (is_compat_task()) + range_end += 0x02000000; + else + range_end += 0x40000000; + + return randomize_range(mm->brk, range_end, 0) ? : mm->brk; } diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 9dc6776..3279def 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -53,6 +53,7 @@ #include <asm/cpufeature.h> #include <asm/cpu_ops.h> #include <asm/kasan.h> +#include <asm/numa.h> #include <asm/sections.h> #include <asm/setup.h> #include <asm/smp_plat.h> @@ -175,7 +176,6 @@ static void __init smp_build_mpidr_hash(void) */ if (mpidr_hash_size() > 4 * num_possible_cpus()) pr_warn("Large number of MPIDR hash buckets detected\n"); - __flush_dcache_area(&mpidr_hash, sizeof(struct mpidr_hash)); } static void __init setup_machine_fdt(phys_addr_t dt_phys) @@ -224,69 +224,6 @@ static void __init request_standard_resources(void) } } -#ifdef CONFIG_BLK_DEV_INITRD -/* - * Relocate initrd if it is not completely within the linear mapping. - * This would be the case if mem= cuts out all or part of it. - */ -static void __init relocate_initrd(void) -{ - phys_addr_t orig_start = __virt_to_phys(initrd_start); - phys_addr_t orig_end = __virt_to_phys(initrd_end); - phys_addr_t ram_end = memblock_end_of_DRAM(); - phys_addr_t new_start; - unsigned long size, to_free = 0; - void *dest; - - if (orig_end <= ram_end) - return; - - /* - * Any of the original initrd which overlaps the linear map should - * be freed after relocating. - */ - if (orig_start < ram_end) - to_free = ram_end - orig_start; - - size = orig_end - orig_start; - if (!size) - return; - - /* initrd needs to be relocated completely inside linear mapping */ - new_start = memblock_find_in_range(0, PFN_PHYS(max_pfn), - size, PAGE_SIZE); - if (!new_start) - panic("Cannot relocate initrd of size %ld\n", size); - memblock_reserve(new_start, size); - - initrd_start = __phys_to_virt(new_start); - initrd_end = initrd_start + size; - - pr_info("Moving initrd from [%llx-%llx] to [%llx-%llx]\n", - orig_start, orig_start + size - 1, - new_start, new_start + size - 1); - - dest = (void *)initrd_start; - - if (to_free) { - memcpy(dest, (void *)__phys_to_virt(orig_start), to_free); - dest += to_free; - } - - copy_from_early_mem(dest, orig_start + to_free, size - to_free); - - if (to_free) { - pr_info("Freeing original RAMDISK from [%llx-%llx]\n", - orig_start, orig_start + to_free - 1); - memblock_free(orig_start, to_free); - } -} -#else -static inline void __init relocate_initrd(void) -{ -} -#endif - u64 __cpu_logical_map[NR_CPUS] = { [0 ... NR_CPUS-1] = INVALID_HWID }; void __init setup_arch(char **cmdline_p) @@ -327,7 +264,11 @@ void __init setup_arch(char **cmdline_p) acpi_boot_table_init(); paging_init(); - relocate_initrd(); + + if (acpi_disabled) + unflatten_device_tree(); + + bootmem_init(); kasan_init(); @@ -335,12 +276,11 @@ void __init setup_arch(char **cmdline_p) early_ioremap_reset(); - if (acpi_disabled) { - unflatten_device_tree(); + if (acpi_disabled) psci_dt_init(); - } else { + else psci_acpi_init(); - } + xen_early_init(); cpu_read_bootcpu_ops(); @@ -379,6 +319,9 @@ static int __init topology_init(void) { int i; + for_each_online_node(i) + register_one_node(i); + for_each_possible_cpu(i) { struct cpu *cpu = &per_cpu(cpu_data.cpu, i); cpu->hotpluggable = 1; diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S index fd10eb6..9a3aec9 100644 --- a/arch/arm64/kernel/sleep.S +++ b/arch/arm64/kernel/sleep.S @@ -49,39 +49,32 @@ orr \dst, \dst, \mask // dst|=(aff3>>rs3) .endm /* - * Save CPU state for a suspend and execute the suspend finisher. - * On success it will return 0 through cpu_resume - ie through a CPU - * soft/hard reboot from the reset vector. - * On failure it returns the suspend finisher return value or force - * -EOPNOTSUPP if the finisher erroneously returns 0 (the suspend finisher - * is not allowed to return, if it does this must be considered failure). - * It saves callee registers, and allocates space on the kernel stack - * to save the CPU specific registers + some other data for resume. + * Save CPU state in the provided sleep_stack_data area, and publish its + * location for cpu_resume()'s use in sleep_save_stash. * - * x0 = suspend finisher argument - * x1 = suspend finisher function pointer + * cpu_resume() will restore this saved state, and return. Because the + * link-register is saved and restored, it will appear to return from this + * function. So that the caller can tell the suspend/resume paths apart, + * __cpu_suspend_enter() will always return a non-zero value, whereas the + * path through cpu_resume() will return 0. + * + * x0 = struct sleep_stack_data area */ ENTRY(__cpu_suspend_enter) - stp x29, lr, [sp, #-96]! - stp x19, x20, [sp,#16] - stp x21, x22, [sp,#32] - stp x23, x24, [sp,#48] - stp x25, x26, [sp,#64] - stp x27, x28, [sp,#80] - /* - * Stash suspend finisher and its argument in x20 and x19 - */ - mov x19, x0 - mov x20, x1 + stp x29, lr, [x0, #SLEEP_STACK_DATA_CALLEE_REGS] + stp x19, x20, [x0,#SLEEP_STACK_DATA_CALLEE_REGS+16] + stp x21, x22, [x0,#SLEEP_STACK_DATA_CALLEE_REGS+32] + stp x23, x24, [x0,#SLEEP_STACK_DATA_CALLEE_REGS+48] + stp x25, x26, [x0,#SLEEP_STACK_DATA_CALLEE_REGS+64] + stp x27, x28, [x0,#SLEEP_STACK_DATA_CALLEE_REGS+80] + + /* save the sp in cpu_suspend_ctx */ mov x2, sp - sub sp, sp, #CPU_SUSPEND_SZ // allocate cpu_suspend_ctx - mov x0, sp - /* - * x0 now points to struct cpu_suspend_ctx allocated on the stack - */ - str x2, [x0, #CPU_CTX_SP] - ldr x1, =sleep_save_sp - ldr x1, [x1, #SLEEP_SAVE_SP_VIRT] + str x2, [x0, #SLEEP_STACK_DATA_SYSTEM_REGS + CPU_CTX_SP] + + /* find the mpidr_hash */ + ldr x1, =sleep_save_stash + ldr x1, [x1] mrs x7, mpidr_el1 ldr x9, =mpidr_hash ldr x10, [x9, #MPIDR_HASH_MASK] @@ -93,74 +86,28 @@ ENTRY(__cpu_suspend_enter) ldp w5, w6, [x9, #(MPIDR_HASH_SHIFTS + 8)] compute_mpidr_hash x8, x3, x4, x5, x6, x7, x10 add x1, x1, x8, lsl #3 - bl __cpu_suspend_save - /* - * Grab suspend finisher in x20 and its argument in x19 - */ - mov x0, x19 - mov x1, x20 - /* - * We are ready for power down, fire off the suspend finisher - * in x1, with argument in x0 - */ - blr x1 - /* - * Never gets here, unless suspend finisher fails. - * Successful cpu_suspend should return from cpu_resume, returning - * through this code path is considered an error - * If the return value is set to 0 force x0 = -EOPNOTSUPP - * to make sure a proper error condition is propagated - */ - cmp x0, #0 - mov x3, #-EOPNOTSUPP - csel x0, x3, x0, eq - add sp, sp, #CPU_SUSPEND_SZ // rewind stack pointer - ldp x19, x20, [sp, #16] - ldp x21, x22, [sp, #32] - ldp x23, x24, [sp, #48] - ldp x25, x26, [sp, #64] - ldp x27, x28, [sp, #80] - ldp x29, lr, [sp], #96 + + str x0, [x1] + add x0, x0, #SLEEP_STACK_DATA_SYSTEM_REGS + stp x29, lr, [sp, #-16]! + bl cpu_do_suspend + ldp x29, lr, [sp], #16 + mov x0, #1 ret ENDPROC(__cpu_suspend_enter) .ltorg -/* - * x0 must contain the sctlr value retrieved from restored context - */ - .pushsection ".idmap.text", "ax" -ENTRY(cpu_resume_mmu) - ldr x3, =cpu_resume_after_mmu - msr sctlr_el1, x0 // restore sctlr_el1 - isb - /* - * Invalidate the local I-cache so that any instructions fetched - * speculatively from the PoC are discarded, since they may have - * been dynamically patched at the PoU. - */ - ic iallu - dsb nsh - isb - br x3 // global jump to virtual address -ENDPROC(cpu_resume_mmu) - .popsection -cpu_resume_after_mmu: -#ifdef CONFIG_KASAN - mov x0, sp - bl kasan_unpoison_remaining_stack -#endif - mov x0, #0 // return zero on success - ldp x19, x20, [sp, #16] - ldp x21, x22, [sp, #32] - ldp x23, x24, [sp, #48] - ldp x25, x26, [sp, #64] - ldp x27, x28, [sp, #80] - ldp x29, lr, [sp], #96 - ret -ENDPROC(cpu_resume_after_mmu) - ENTRY(cpu_resume) bl el2_setup // if in EL2 drop to EL1 cleanly + /* enable the MMU early - so we can access sleep_save_stash by va */ + adr_l lr, __enable_mmu /* __cpu_setup will return here */ + ldr x27, =_cpu_resume /* __enable_mmu will branch here */ + adrp x25, idmap_pg_dir + adrp x26, swapper_pg_dir + b __cpu_setup +ENDPROC(cpu_resume) + +ENTRY(_cpu_resume) mrs x1, mpidr_el1 adrp x8, mpidr_hash add x8, x8, #:lo12:mpidr_hash // x8 = struct mpidr_hash phys address @@ -170,20 +117,32 @@ ENTRY(cpu_resume) ldp w5, w6, [x8, #(MPIDR_HASH_SHIFTS + 8)] compute_mpidr_hash x7, x3, x4, x5, x6, x1, x2 /* x7 contains hash index, let's use it to grab context pointer */ - ldr_l x0, sleep_save_sp + SLEEP_SAVE_SP_PHYS + ldr_l x0, sleep_save_stash ldr x0, [x0, x7, lsl #3] + add x29, x0, #SLEEP_STACK_DATA_CALLEE_REGS + add x0, x0, #SLEEP_STACK_DATA_SYSTEM_REGS /* load sp from context */ ldr x2, [x0, #CPU_CTX_SP] - /* load physical address of identity map page table in x1 */ - adrp x1, idmap_pg_dir mov sp, x2 /* save thread_info */ and x2, x2, #~(THREAD_SIZE - 1) msr sp_el0, x2 /* - * cpu_do_resume expects x0 to contain context physical address - * pointer and x1 to contain physical address of 1:1 page tables + * cpu_do_resume expects x0 to contain context address pointer */ - bl cpu_do_resume // PC relative jump, MMU off - b cpu_resume_mmu // Resume MMU, never returns -ENDPROC(cpu_resume) + bl cpu_do_resume + +#ifdef CONFIG_KASAN + mov x0, sp + bl kasan_unpoison_remaining_stack +#endif + + ldp x19, x20, [x29, #16] + ldp x21, x22, [x29, #32] + ldp x23, x24, [x29, #48] + ldp x25, x26, [x29, #64] + ldp x27, x28, [x29, #80] + ldp x29, lr, [x29] + mov x0, #0 + ret +ENDPROC(_cpu_resume) diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index b2d5f4e..678e084 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -45,6 +45,7 @@ #include <asm/cputype.h> #include <asm/cpu_ops.h> #include <asm/mmu_context.h> +#include <asm/numa.h> #include <asm/pgtable.h> #include <asm/pgalloc.h> #include <asm/processor.h> @@ -75,6 +76,43 @@ enum ipi_msg_type { IPI_WAKEUP }; +#ifdef CONFIG_ARM64_VHE + +/* Whether the boot CPU is running in HYP mode or not*/ +static bool boot_cpu_hyp_mode; + +static inline void save_boot_cpu_run_el(void) +{ + boot_cpu_hyp_mode = is_kernel_in_hyp_mode(); +} + +static inline bool is_boot_cpu_in_hyp_mode(void) +{ + return boot_cpu_hyp_mode; +} + +/* + * Verify that a secondary CPU is running the kernel at the same + * EL as that of the boot CPU. + */ +void verify_cpu_run_el(void) +{ + bool in_el2 = is_kernel_in_hyp_mode(); + bool boot_cpu_el2 = is_boot_cpu_in_hyp_mode(); + + if (in_el2 ^ boot_cpu_el2) { + pr_crit("CPU%d: mismatched Exception Level(EL%d) with boot CPU(EL%d)\n", + smp_processor_id(), + in_el2 ? 2 : 1, + boot_cpu_el2 ? 2 : 1); + cpu_panic_kernel(); + } +} + +#else +static inline void save_boot_cpu_run_el(void) {} +#endif + #ifdef CONFIG_HOTPLUG_CPU static int op_cpu_kill(unsigned int cpu); #else @@ -166,6 +204,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) static void smp_store_cpu_info(unsigned int cpuid) { store_cpu_topology(cpuid); + numa_store_cpu_info(cpuid); } /* @@ -225,8 +264,6 @@ asmlinkage void secondary_start_kernel(void) pr_info("CPU%u: Booted secondary processor [%08x]\n", cpu, read_cpuid_id()); update_cpu_boot_status(CPU_BOOT_SUCCESS); - /* Make sure the status update is visible before we complete */ - smp_wmb(); set_cpu_online(cpu, true); complete(&cpu_running); @@ -401,6 +438,7 @@ void __init smp_cpus_done(unsigned int max_cpus) void __init smp_prepare_boot_cpu(void) { cpuinfo_store_boot_cpu(); + save_boot_cpu_run_el(); set_my_cpu_offset(per_cpu_offset(smp_processor_id())); } @@ -595,6 +633,8 @@ static void __init of_parse_and_init_cpus(void) pr_debug("cpu logical map 0x%llx\n", hwid); cpu_logical_map(cpu_count) = hwid; + + early_map_cpu_to_node(cpu_count, of_node_to_nid(dn)); next: cpu_count++; } @@ -647,33 +687,18 @@ void __init smp_init_cpus(void) void __init smp_prepare_cpus(unsigned int max_cpus) { int err; - unsigned int cpu, ncores = num_possible_cpus(); + unsigned int cpu; init_cpu_topology(); smp_store_cpu_info(smp_processor_id()); /* - * are we trying to boot more cores than exist? - */ - if (max_cpus > ncores) - max_cpus = ncores; - - /* Don't bother if we're effectively UP */ - if (max_cpus <= 1) - return; - - /* * Initialise the present map (which describes the set of CPUs * actually populated at the present time) and release the * secondaries from the bootloader. - * - * Make sure we online at most (max_cpus - 1) additional CPUs. */ - max_cpus--; for_each_possible_cpu(cpu) { - if (max_cpus == 0) - break; if (cpu == smp_processor_id()) continue; @@ -686,7 +711,6 @@ void __init smp_prepare_cpus(unsigned int max_cpus) continue; set_cpu_present(cpu, true); - max_cpus--; } } @@ -763,21 +787,11 @@ void arch_irq_work_raise(void) } #endif -static DEFINE_RAW_SPINLOCK(stop_lock); - /* * ipi_cpu_stop - handle IPI from smp_send_stop() */ static void ipi_cpu_stop(unsigned int cpu) { - if (system_state == SYSTEM_BOOTING || - system_state == SYSTEM_RUNNING) { - raw_spin_lock(&stop_lock); - pr_crit("CPU%u: stopping\n", cpu); - dump_stack(); - raw_spin_unlock(&stop_lock); - } - set_cpu_online(cpu, false); local_irq_disable(); @@ -872,6 +886,9 @@ void smp_send_stop(void) cpumask_copy(&mask, cpu_online_mask); cpumask_clear_cpu(smp_processor_id(), &mask); + if (system_state == SYSTEM_BOOTING || + system_state == SYSTEM_RUNNING) + pr_crit("SMP: stopping secondary CPUs\n"); smp_cross_call(&mask, IPI_CPU_STOP); } @@ -881,7 +898,8 @@ void smp_send_stop(void) udelay(1); if (num_online_cpus() > 1) - pr_warning("SMP: failed to stop secondary CPUs\n"); + pr_warning("SMP: failed to stop secondary CPUs %*pbl\n", + cpumask_pr_args(cpu_online_mask)); } /* diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c index 6605539..b616e36 100644 --- a/arch/arm64/kernel/suspend.c +++ b/arch/arm64/kernel/suspend.c @@ -10,30 +10,11 @@ #include <asm/suspend.h> #include <asm/tlbflush.h> -extern int __cpu_suspend_enter(unsigned long arg, int (*fn)(unsigned long)); /* - * This is called by __cpu_suspend_enter() to save the state, and do whatever - * flushing is required to ensure that when the CPU goes to sleep we have - * the necessary data available when the caches are not searched. - * - * ptr: CPU context virtual address - * save_ptr: address of the location where the context physical address - * must be saved + * This is allocated by cpu_suspend_init(), and used to store a pointer to + * the 'struct sleep_stack_data' the contains a particular CPUs state. */ -void notrace __cpu_suspend_save(struct cpu_suspend_ctx *ptr, - phys_addr_t *save_ptr) -{ - *save_ptr = virt_to_phys(ptr); - - cpu_do_suspend(ptr); - /* - * Only flush the context that must be retrieved with the MMU - * off. VA primitives ensure the flush is applied to all - * cache levels so context is pushed to DRAM. - */ - __flush_dcache_area(ptr, sizeof(*ptr)); - __flush_dcache_area(save_ptr, sizeof(*save_ptr)); -} +unsigned long *sleep_save_stash; /* * This hook is provided so that cpu_suspend code can restore HW @@ -51,6 +32,30 @@ void __init cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *)) hw_breakpoint_restore = hw_bp_restore; } +void notrace __cpu_suspend_exit(void) +{ + /* + * We are resuming from reset with the idmap active in TTBR0_EL1. + * We must uninstall the idmap and restore the expected MMU + * state before we can possibly return to userspace. + */ + cpu_uninstall_idmap(); + + /* + * Restore per-cpu offset before any kernel + * subsystem relying on it has a chance to run. + */ + set_my_cpu_offset(per_cpu_offset(smp_processor_id())); + + /* + * Restore HW breakpoint registers to sane values + * before debug exceptions are possibly reenabled + * through local_dbg_restore. + */ + if (hw_breakpoint_restore) + hw_breakpoint_restore(NULL); +} + /* * cpu_suspend * @@ -60,8 +65,9 @@ void __init cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *)) */ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) { - int ret; + int ret = 0; unsigned long flags; + struct sleep_stack_data state; /* * From this point debug exceptions are disabled to prevent @@ -77,34 +83,21 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) */ pause_graph_tracing(); - /* - * mm context saved on the stack, it will be restored when - * the cpu comes out of reset through the identity mapped - * page tables, so that the thread address space is properly - * set-up on function return. - */ - ret = __cpu_suspend_enter(arg, fn); - if (ret == 0) { - /* - * We are resuming from reset with the idmap active in TTBR0_EL1. - * We must uninstall the idmap and restore the expected MMU - * state before we can possibly return to userspace. - */ - cpu_uninstall_idmap(); + if (__cpu_suspend_enter(&state)) { + /* Call the suspend finisher */ + ret = fn(arg); /* - * Restore per-cpu offset before any kernel - * subsystem relying on it has a chance to run. + * Never gets here, unless the suspend finisher fails. + * Successful cpu_suspend() should return from cpu_resume(), + * returning through this code path is considered an error + * If the return value is set to 0 force ret = -EOPNOTSUPP + * to make sure a proper error condition is propagated */ - set_my_cpu_offset(per_cpu_offset(smp_processor_id())); - - /* - * Restore HW breakpoint registers to sane values - * before debug exceptions are possibly reenabled - * through local_dbg_restore. - */ - if (hw_breakpoint_restore) - hw_breakpoint_restore(NULL); + if (!ret) + ret = -EOPNOTSUPP; + } else { + __cpu_suspend_exit(); } unpause_graph_tracing(); @@ -119,22 +112,15 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) return ret; } -struct sleep_save_sp sleep_save_sp; - static int __init cpu_suspend_init(void) { - void *ctx_ptr; - /* ctx_ptr is an array of physical addresses */ - ctx_ptr = kcalloc(mpidr_hash_size(), sizeof(phys_addr_t), GFP_KERNEL); + sleep_save_stash = kcalloc(mpidr_hash_size(), sizeof(*sleep_save_stash), + GFP_KERNEL); - if (WARN_ON(!ctx_ptr)) + if (WARN_ON(!sleep_save_stash)) return -ENOMEM; - sleep_save_sp.save_ptr_stash = ctx_ptr; - sleep_save_sp.save_ptr_stash_phys = virt_to_phys(ctx_ptr); - __flush_dcache_area(&sleep_save_sp, sizeof(struct sleep_save_sp)); - return 0; } early_initcall(cpu_suspend_init); diff --git a/arch/arm64/kernel/sys.c b/arch/arm64/kernel/sys.c index 75151aa..26fe8ea 100644 --- a/arch/arm64/kernel/sys.c +++ b/arch/arm64/kernel/sys.c @@ -25,6 +25,7 @@ #include <linux/sched.h> #include <linux/slab.h> #include <linux/syscalls.h> +#include <asm/cpufeature.h> asmlinkage long sys_mmap(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, @@ -36,11 +37,20 @@ asmlinkage long sys_mmap(unsigned long addr, unsigned long len, return sys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT); } +SYSCALL_DEFINE1(arm64_personality, unsigned int, personality) +{ + if (personality(personality) == PER_LINUX32 && + !system_supports_32bit_el0()) + return -EINVAL; + return sys_personality(personality); +} + /* * Wrappers to pass the pt_regs argument. */ asmlinkage long sys_rt_sigreturn_wrapper(void); #define sys_rt_sigreturn sys_rt_sigreturn_wrapper +#define sys_personality sys_arm64_personality #undef __SYSCALL #define __SYSCALL(nr, sym) [nr] = sym, diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 97bc68f..64fc030 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -131,11 +131,11 @@ static int __init vdso_init(void) return -ENOMEM; /* Grab the vDSO data page. */ - vdso_pagelist[0] = virt_to_page(vdso_data); + vdso_pagelist[0] = pfn_to_page(PHYS_PFN(__pa(vdso_data))); /* Grab the vDSO code pages. */ for (i = 0; i < vdso_pages; i++) - vdso_pagelist[i + 1] = virt_to_page(&vdso_start + i * PAGE_SIZE); + vdso_pagelist[i + 1] = pfn_to_page(PHYS_PFN(__pa(&vdso_start)) + i); /* Populate the special mapping structures */ vdso_spec[0] = (struct vm_special_mapping) { diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 5a1939a..435e820 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -46,6 +46,16 @@ jiffies = jiffies_64; *(.idmap.text) \ VMLINUX_SYMBOL(__idmap_text_end) = .; +#ifdef CONFIG_HIBERNATION +#define HIBERNATE_TEXT \ + . = ALIGN(SZ_4K); \ + VMLINUX_SYMBOL(__hibernate_exit_text_start) = .;\ + *(.hibernate_exit.text) \ + VMLINUX_SYMBOL(__hibernate_exit_text_end) = .; +#else +#define HIBERNATE_TEXT +#endif + /* * The size of the PE/COFF section that covers the kernel image, which * runs from stext to _edata, must be a round multiple of the PE/COFF @@ -63,14 +73,19 @@ PECOFF_FILE_ALIGNMENT = 0x200; #endif #if defined(CONFIG_DEBUG_ALIGN_RODATA) -#define ALIGN_DEBUG_RO . = ALIGN(1<<SECTION_SHIFT); -#define ALIGN_DEBUG_RO_MIN(min) ALIGN_DEBUG_RO -#elif defined(CONFIG_DEBUG_RODATA) -#define ALIGN_DEBUG_RO . = ALIGN(1<<PAGE_SHIFT); -#define ALIGN_DEBUG_RO_MIN(min) ALIGN_DEBUG_RO +/* + * 4 KB granule: 1 level 2 entry + * 16 KB granule: 128 level 3 entries, with contiguous bit + * 64 KB granule: 32 level 3 entries, with contiguous bit + */ +#define SEGMENT_ALIGN SZ_2M #else -#define ALIGN_DEBUG_RO -#define ALIGN_DEBUG_RO_MIN(min) . = ALIGN(min); +/* + * 4 KB granule: 16 level 3 entries, with contiguous bit + * 16 KB granule: 4 level 3 entries, without contiguous bit + * 64 KB granule: 1 level 3 entry + */ +#define SEGMENT_ALIGN SZ_64K #endif SECTIONS @@ -96,7 +111,6 @@ SECTIONS _text = .; HEAD_TEXT } - ALIGN_DEBUG_RO_MIN(PAGE_SIZE) .text : { /* Real text segment */ _stext = .; /* Text and read-only data */ __exception_text_start = .; @@ -109,18 +123,19 @@ SECTIONS LOCK_TEXT HYPERVISOR_TEXT IDMAP_TEXT + HIBERNATE_TEXT *(.fixup) *(.gnu.warning) . = ALIGN(16); *(.got) /* Global offset table */ } - ALIGN_DEBUG_RO_MIN(PAGE_SIZE) + . = ALIGN(SEGMENT_ALIGN); RO_DATA(PAGE_SIZE) /* everything from this point to */ EXCEPTION_TABLE(8) /* _etext will be marked RO NX */ NOTES - ALIGN_DEBUG_RO_MIN(PAGE_SIZE) + . = ALIGN(SEGMENT_ALIGN); _etext = .; /* End of text and rodata section */ __init_begin = .; @@ -154,12 +169,9 @@ SECTIONS *(.altinstr_replacement) } .rela : ALIGN(8) { - __reloc_start = .; *(.rela .rela*) - __reloc_end = .; } .dynsym : ALIGN(8) { - __dynsym_start = .; *(.dynsym) } .dynstr : { @@ -169,7 +181,11 @@ SECTIONS *(.hash) } - . = ALIGN(PAGE_SIZE); + __rela_offset = ADDR(.rela) - KIMAGE_VADDR; + __rela_size = SIZEOF(.rela); + __dynsym_offset = ADDR(.dynsym) - KIMAGE_VADDR; + + . = ALIGN(SEGMENT_ALIGN); __init_end = .; _data = .; @@ -201,6 +217,10 @@ ASSERT(__hyp_idmap_text_end - (__hyp_idmap_text_start & ~(SZ_4K - 1)) <= SZ_4K, "HYP init code too big or misaligned") ASSERT(__idmap_text_end - (__idmap_text_start & ~(SZ_4K - 1)) <= SZ_4K, "ID map text too big or misaligned") +#ifdef CONFIG_HIBERNATION +ASSERT(__hibernate_exit_text_end - (__hibernate_exit_text_start & ~(SZ_4K - 1)) + <= SZ_4K, "Hibernate exit text too big or misaligned") +#endif /* * If padding is applied before .head.text, virt<->phys conversions will fail. |