diff options
author | Luiz Souza <luiz@netgate.com> | 2018-02-21 14:21:29 -0300 |
---|---|---|
committer | Luiz Souza <luiz@netgate.com> | 2018-02-21 14:21:29 -0300 |
commit | d3d59b01294138e59995b31d2bcbbbdf45e26a3c (patch) | |
tree | 11fbc866b8a4436698a543bf2dc20a8360bcf479 /sys/amd64/amd64/pmap.c | |
parent | 63302e53ed4b3fe59711d939ba87433a9a12199d (diff) | |
download | FreeBSD-src-d3d59b01294138e59995b31d2bcbbbdf45e26a3c.zip FreeBSD-src-d3d59b01294138e59995b31d2bcbbbdf45e26a3c.tar.gz |
Revert "MFC r328083,328096,328116,328119,328120,328128,328135,328153,328157,"
This reverts commit 430a2bea3907149b30cc75fc722b6cf1f81da82a.
Diffstat (limited to 'sys/amd64/amd64/pmap.c')
-rw-r--r-- | sys/amd64/amd64/pmap.c | 574 |
1 files changed, 29 insertions, 545 deletions
diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index c7317b1..a7ce847 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -9,17 +9,11 @@ * All rights reserved. * Copyright (c) 2005-2010 Alan L. Cox <alc@cs.rice.edu> * All rights reserved. - * Copyright (c) 2014-2018 The FreeBSD Foundation - * All rights reserved. * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer * Science Department and William Jolitz of UUNET Technologies Inc. * - * Portions of this software were developed by - * Konstantin Belousov <kib@FreeBSD.org> under sponsorship from - * the FreeBSD Foundation. - * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: @@ -153,7 +147,6 @@ __FBSDID("$FreeBSD$"); #ifdef SMP #include <machine/smp.h> #endif -#include <machine/tss.h> static __inline boolean_t pmap_type_guest(pmap_t pmap) @@ -215,8 +208,6 @@ pmap_rw_bit(pmap_t pmap) return (mask); } -static pt_entry_t pg_g; - static __inline pt_entry_t pmap_global_bit(pmap_t pmap) { @@ -224,7 +215,7 @@ pmap_global_bit(pmap_t pmap) switch (pmap->pm_type) { case PT_X86: - mask = pg_g; + mask = X86_PG_G; break; case PT_RVI: case PT_EPT: @@ -414,15 +405,6 @@ int invpcid_works = 0; SYSCTL_INT(_vm_pmap, OID_AUTO, invpcid_works, CTLFLAG_RD, &invpcid_works, 0, "Is the invpcid instruction available ?"); -int pti = 0; -SYSCTL_INT(_vm_pmap, OID_AUTO, pti, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, - &pti, 0, - "Page Table Isolation enabled"); -static vm_object_t pti_obj; -static pml4_entry_t *pti_pml4; -static vm_pindex_t pti_pg_idx; -static bool pti_finalized; - static int pmap_pcid_save_cnt_proc(SYSCTL_HANDLER_ARGS) { @@ -640,11 +622,6 @@ static void pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, static boolean_t pmap_protect_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t sva, vm_prot_t prot); static void pmap_pte_attr(pt_entry_t *pte, int cache_bits, int mask); -static void pmap_pti_add_kva_locked(vm_offset_t sva, vm_offset_t eva, - bool exec); -static pdp_entry_t *pmap_pti_pdpe(vm_offset_t va); -static pd_entry_t *pmap_pti_pde(vm_offset_t va); -static void pmap_pti_wire_pte(void *pte); static int pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva, struct spglist *free, struct rwlock **lockp); static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva, @@ -924,7 +901,7 @@ create_pagetables(vm_paddr_t *firstaddr) /* XXX not fully used, underneath 2M pages */ pt_p = (pt_entry_t *)KPTphys; for (i = 0; ptoa(i) < *firstaddr; i++) - pt_p[i] = ptoa(i) | X86_PG_RW | X86_PG_V | pg_g; + pt_p[i] = ptoa(i) | X86_PG_RW | X86_PG_V | X86_PG_G; /* Now map the page tables at their location within PTmap */ pd_p = (pd_entry_t *)KPDphys; @@ -935,7 +912,7 @@ create_pagetables(vm_paddr_t *firstaddr) /* This replaces some of the KPTphys entries above */ for (i = 0; (i << PDRSHIFT) < *firstaddr; i++) pd_p[i] = (i << PDRSHIFT) | X86_PG_RW | X86_PG_V | PG_PS | - pg_g; + X86_PG_G; /* And connect up the PD to the PDP (leaving room for L4 pages) */ pdp_p = (pdp_entry_t *)(KPDPphys + ptoa(KPML4I - KPML4BASE)); @@ -955,14 +932,14 @@ create_pagetables(vm_paddr_t *firstaddr) for (i = NPDEPG * ndm1g, j = 0; i < NPDEPG * ndmpdp; i++, j++) { pd_p[j] = (vm_paddr_t)i << PDRSHIFT; /* Preset PG_M and PG_A because demotion expects it. */ - pd_p[j] |= X86_PG_RW | X86_PG_V | PG_PS | pg_g | + pd_p[j] |= X86_PG_RW | X86_PG_V | PG_PS | X86_PG_G | X86_PG_M | X86_PG_A; } pdp_p = (pdp_entry_t *)DMPDPphys; for (i = 0; i < ndm1g; i++) { pdp_p[i] = (vm_paddr_t)i << PDPSHIFT; /* Preset PG_M and PG_A because demotion expects it. */ - pdp_p[i] |= X86_PG_RW | X86_PG_V | PG_PS | pg_g | + pdp_p[i] |= X86_PG_RW | X86_PG_V | PG_PS | X86_PG_G | X86_PG_M | X86_PG_A; } for (j = 0; i < ndmpdp; i++, j++) { @@ -1005,9 +982,6 @@ pmap_bootstrap(vm_paddr_t *firstaddr) pt_entry_t *pte; int i; - if (!pti) - pg_g = X86_PG_G; - /* * Create an initial set of page tables to run the kernel in. */ @@ -1040,7 +1014,6 @@ pmap_bootstrap(vm_paddr_t *firstaddr) PMAP_LOCK_INIT(kernel_pmap); kernel_pmap->pm_pml4 = (pdp_entry_t *)PHYS_TO_DMAP(KPML4phys); kernel_pmap->pm_cr3 = KPML4phys; - kernel_pmap->pm_ucr3 = PMAP_NO_CR3; CPU_FILL(&kernel_pmap->pm_active); /* don't allow deactivation */ TAILQ_INIT(&kernel_pmap->pm_pvchunk); kernel_pmap->pm_flags = pmap_flags; @@ -1555,9 +1528,6 @@ void pmap_invalidate_page(pmap_t pmap, vm_offset_t va) { cpuset_t *mask; - struct invpcid_descr d; - uint64_t kcr3, ucr3; - uint32_t pcid; u_int cpuid, i; if (pmap_type_guest(pmap)) { @@ -1574,32 +1544,9 @@ pmap_invalidate_page(pmap_t pmap, vm_offset_t va) mask = &all_cpus; } else { cpuid = PCPU_GET(cpuid); - if (pmap == PCPU_GET(curpmap)) { + if (pmap == PCPU_GET(curpmap)) invlpg(va); - if (pmap_pcid_enabled && pmap->pm_ucr3 != PMAP_NO_CR3) { - /* - * Disable context switching. pm_pcid - * is recalculated on switch, which - * might make us use wrong pcid below. - */ - critical_enter(); - pcid = pmap->pm_pcids[cpuid].pm_pcid; - - if (invpcid_works) { - d.pcid = pcid | PMAP_PCID_USER_PT; - d.pad = 0; - d.addr = va; - invpcid(&d, INVPCID_ADDR); - } else { - kcr3 = pmap->pm_cr3 | pcid | - CR3_PCID_SAVE; - ucr3 = pmap->pm_ucr3 | pcid | - PMAP_PCID_USER_PT | CR3_PCID_SAVE; - pmap_pti_pcid_invlpg(ucr3, kcr3, va); - } - critical_exit(); - } - } else if (pmap_pcid_enabled) + else if (pmap_pcid_enabled) pmap->pm_pcids[cpuid].pm_gen = 0; if (pmap_pcid_enabled) { CPU_FOREACH(i) { @@ -1609,7 +1556,7 @@ pmap_invalidate_page(pmap_t pmap, vm_offset_t va) } mask = &pmap->pm_active; } - smp_masked_invlpg(*mask, va, pmap); + smp_masked_invlpg(*mask, va); sched_unpin(); } @@ -1620,10 +1567,7 @@ void pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { cpuset_t *mask; - struct invpcid_descr d; vm_offset_t addr; - uint64_t kcr3, ucr3; - uint32_t pcid; u_int cpuid, i; if (eva - sva >= PMAP_INVLPG_THRESHOLD) { @@ -1649,26 +1593,6 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) if (pmap == PCPU_GET(curpmap)) { for (addr = sva; addr < eva; addr += PAGE_SIZE) invlpg(addr); - if (pmap_pcid_enabled && pmap->pm_ucr3 != PMAP_NO_CR3) { - critical_enter(); - pcid = pmap->pm_pcids[cpuid].pm_pcid; - if (invpcid_works) { - d.pcid = pcid | PMAP_PCID_USER_PT; - d.pad = 0; - d.addr = sva; - for (; d.addr < eva; d.addr += - PAGE_SIZE) - invpcid(&d, INVPCID_ADDR); - } else { - kcr3 = pmap->pm_cr3 | pcid | - CR3_PCID_SAVE; - ucr3 = pmap->pm_ucr3 | pcid | - PMAP_PCID_USER_PT | CR3_PCID_SAVE; - pmap_pti_pcid_invlrng(ucr3, kcr3, sva, - eva); - } - critical_exit(); - } } else if (pmap_pcid_enabled) { pmap->pm_pcids[cpuid].pm_gen = 0; } @@ -1680,7 +1604,7 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) } mask = &pmap->pm_active; } - smp_masked_invlpg_range(*mask, sva, eva, pmap); + smp_masked_invlpg_range(*mask, sva, eva); sched_unpin(); } @@ -1689,8 +1613,6 @@ pmap_invalidate_all(pmap_t pmap) { cpuset_t *mask; struct invpcid_descr d; - uint64_t kcr3, ucr3; - uint32_t pcid; u_int cpuid, i; if (pmap_type_guest(pmap)) { @@ -1714,29 +1636,15 @@ pmap_invalidate_all(pmap_t pmap) cpuid = PCPU_GET(cpuid); if (pmap == PCPU_GET(curpmap)) { if (pmap_pcid_enabled) { - critical_enter(); - pcid = pmap->pm_pcids[cpuid].pm_pcid; if (invpcid_works) { - d.pcid = pcid; + d.pcid = pmap->pm_pcids[cpuid].pm_pcid; d.pad = 0; d.addr = 0; invpcid(&d, INVPCID_CTX); - if (pmap->pm_ucr3 != PMAP_NO_CR3) { - d.pcid |= PMAP_PCID_USER_PT; - invpcid(&d, INVPCID_CTX); - } } else { - kcr3 = pmap->pm_cr3 | pcid; - ucr3 = pmap->pm_ucr3; - if (ucr3 != PMAP_NO_CR3) { - ucr3 |= pcid | PMAP_PCID_USER_PT; - pmap_pti_pcid_invalidate(ucr3, - kcr3); - } else { - load_cr3(kcr3); - } + load_cr3(pmap->pm_cr3 | pmap->pm_pcids + [PCPU_GET(cpuid)].pm_pcid); } - critical_exit(); } else { invltlb(); } @@ -1841,9 +1749,6 @@ pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde) void pmap_invalidate_page(pmap_t pmap, vm_offset_t va) { - struct invpcid_descr d; - uint64_t kcr3, ucr3; - uint32_t pcid; if (pmap->pm_type == PT_RVI || pmap->pm_type == PT_EPT) { pmap->pm_eptgen++; @@ -1852,35 +1757,16 @@ pmap_invalidate_page(pmap_t pmap, vm_offset_t va) KASSERT(pmap->pm_type == PT_X86, ("pmap_invalidate_range: unknown type %d", pmap->pm_type)); - if (pmap == kernel_pmap || pmap == PCPU_GET(curpmap)) { + if (pmap == kernel_pmap || pmap == PCPU_GET(curpmap)) invlpg(va); - if (pmap == PCPU_GET(curpmap) && pmap_pcid_enabled && - pmap->pm_ucr3 != PMAP_NO_CR3) { - critical_enter(); - pcid = pmap->pm_pcids[0].pm_pcid; - if (invpcid_works) { - d.pcid = pcid | PMAP_PCID_USER_PT; - d.pad = 0; - d.addr = va; - invpcid(&d, INVPCID_ADDR); - } else { - kcr3 = pmap->pm_cr3 | pcid | CR3_PCID_SAVE; - ucr3 = pmap->pm_ucr3 | pcid | - PMAP_PCID_USER_PT | CR3_PCID_SAVE; - pmap_pti_pcid_invlpg(ucr3, kcr3, va); - } - critical_exit(); - } - } else if (pmap_pcid_enabled) + else if (pmap_pcid_enabled) pmap->pm_pcids[0].pm_gen = 0; } void pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { - struct invpcid_descr d; vm_offset_t addr; - uint64_t kcr3, ucr3; if (pmap->pm_type == PT_RVI || pmap->pm_type == PT_EPT) { pmap->pm_eptgen++; @@ -1892,25 +1778,6 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) if (pmap == kernel_pmap || pmap == PCPU_GET(curpmap)) { for (addr = sva; addr < eva; addr += PAGE_SIZE) invlpg(addr); - if (pmap == PCPU_GET(curpmap) && pmap_pcid_enabled && - pmap->pm_ucr3 != PMAP_NO_CR3) { - critical_enter(); - if (invpcid_works) { - d.pcid = pmap->pm_pcids[0].pm_pcid | - PMAP_PCID_USER_PT; - d.pad = 0; - d.addr = sva; - for (; d.addr < eva; d.addr += PAGE_SIZE) - invpcid(&d, INVPCID_ADDR); - } else { - kcr3 = pmap->pm_cr3 | pmap->pm_pcids[0]. - pm_pcid | CR3_PCID_SAVE; - ucr3 = pmap->pm_ucr3 | pmap->pm_pcids[0]. - pm_pcid | PMAP_PCID_USER_PT | CR3_PCID_SAVE; - pmap_pti_pcid_invlrng(ucr3, kcr3, sva, eva); - } - critical_exit(); - } } else if (pmap_pcid_enabled) { pmap->pm_pcids[0].pm_gen = 0; } @@ -1920,7 +1787,6 @@ void pmap_invalidate_all(pmap_t pmap) { struct invpcid_descr d; - uint64_t kcr3, ucr3; if (pmap->pm_type == PT_RVI || pmap->pm_type == PT_EPT) { pmap->pm_eptgen++; @@ -1938,26 +1804,15 @@ pmap_invalidate_all(pmap_t pmap) } } else if (pmap == PCPU_GET(curpmap)) { if (pmap_pcid_enabled) { - critical_enter(); if (invpcid_works) { d.pcid = pmap->pm_pcids[0].pm_pcid; d.pad = 0; d.addr = 0; invpcid(&d, INVPCID_CTX); - if (pmap->pm_ucr3 != PMAP_NO_CR3) { - d.pcid |= PMAP_PCID_USER_PT; - invpcid(&d, INVPCID_CTX); - } } else { - kcr3 = pmap->pm_cr3 | pmap->pm_pcids[0].pm_pcid; - if (pmap->pm_ucr3 != PMAP_NO_CR3) { - ucr3 = pmap->pm_ucr3 | pmap->pm_pcids[ - 0].pm_pcid | PMAP_PCID_USER_PT; - pmap_pti_pcid_invalidate(ucr3, kcr3); - } else - load_cr3(kcr3); + load_cr3(pmap->pm_cr3 | pmap->pm_pcids[0]. + pm_pcid); } - critical_exit(); } else { invltlb(); } @@ -2239,7 +2094,7 @@ pmap_kenter(vm_offset_t va, vm_paddr_t pa) pt_entry_t *pte; pte = vtopte(va); - pte_store(pte, pa | X86_PG_RW | X86_PG_V | pg_g); + pte_store(pte, pa | X86_PG_RW | X86_PG_V | X86_PG_G); } static __inline void @@ -2250,7 +2105,7 @@ pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode) pte = vtopte(va); cache_bits = pmap_cache_bits(kernel_pmap, mode, 0); - pte_store(pte, pa | X86_PG_RW | X86_PG_V | pg_g | cache_bits); + pte_store(pte, pa | X86_PG_RW | X86_PG_V | X86_PG_G | cache_bits); } /* @@ -2310,7 +2165,7 @@ pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count) pa = VM_PAGE_TO_PHYS(m) | cache_bits; if ((*pte & (PG_FRAME | X86_PG_PTE_CACHE)) != pa) { oldpte |= *pte; - pte_store(pte, pa | pg_g | X86_PG_RW | X86_PG_V); + pte_store(pte, pa | X86_PG_G | X86_PG_RW | X86_PG_V); } pte++; } @@ -2429,10 +2284,6 @@ _pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free) pml4_entry_t *pml4; pml4 = pmap_pml4e(pmap, va); *pml4 = 0; - if (pmap->pm_pml4u != NULL && va <= VM_MAXUSER_ADDRESS) { - pml4 = &pmap->pm_pml4u[pmap_pml4e_index(va)]; - *pml4 = 0; - } } else if (m->pindex >= NUPDE) { /* PD page */ pdp_entry_t *pdp; @@ -2498,10 +2349,7 @@ pmap_pinit0(pmap_t pmap) PMAP_LOCK_INIT(pmap); pmap->pm_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(KPML4phys); - pmap->pm_pml4u = NULL; pmap->pm_cr3 = KPML4phys; - /* hack to keep pmap_pti_pcid_invalidate() alive */ - pmap->pm_ucr3 = PMAP_NO_CR3; pmap->pm_root.rt_root = 0; CPU_ZERO(&pmap->pm_active); TAILQ_INIT(&pmap->pm_pvchunk); @@ -2510,8 +2358,6 @@ pmap_pinit0(pmap_t pmap) CPU_FOREACH(i) { pmap->pm_pcids[i].pm_pcid = PMAP_PCID_NONE; pmap->pm_pcids[i].pm_gen = 0; - if (!pti) - __pcpu[i].pc_kcr3 = PMAP_NO_CR3; } PCPU_SET(curpmap, kernel_pmap); pmap_activate(curthread); @@ -2541,17 +2387,6 @@ pmap_pinit_pml4(vm_page_t pml4pg) X86_PG_A | X86_PG_M; } -static void -pmap_pinit_pml4_pti(vm_page_t pml4pg) -{ - pml4_entry_t *pm_pml4; - int i; - - pm_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pml4pg)); - for (i = 0; i < NPML4EPG; i++) - pm_pml4[i] = pti_pml4[i]; -} - /* * Initialize a preallocated and zeroed pmap structure, * such as one in a vmspace structure. @@ -2559,7 +2394,7 @@ pmap_pinit_pml4_pti(vm_page_t pml4pg) int pmap_pinit_type(pmap_t pmap, enum pmap_type pm_type, int flags) { - vm_page_t pml4pg, pml4pgu; + vm_page_t pml4pg; vm_paddr_t pml4phys; int i; @@ -2576,11 +2411,8 @@ pmap_pinit_type(pmap_t pmap, enum pmap_type pm_type, int flags) pmap->pm_pcids[i].pm_pcid = PMAP_PCID_NONE; pmap->pm_pcids[i].pm_gen = 0; } - pmap->pm_cr3 = PMAP_NO_CR3; /* initialize to an invalid value */ - pmap->pm_ucr3 = PMAP_NO_CR3; - pmap->pm_pml4u = NULL; + pmap->pm_cr3 = ~0; /* initialize to an invalid value */ - pmap->pm_type = pm_type; if ((pml4pg->flags & PG_ZERO) == 0) pagezero(pmap->pm_pml4); @@ -2588,19 +2420,10 @@ pmap_pinit_type(pmap_t pmap, enum pmap_type pm_type, int flags) * Do not install the host kernel mappings in the nested page * tables. These mappings are meaningless in the guest physical * address space. - * Install minimal kernel mappings in PTI case. */ - if (pm_type == PT_X86) { + if ((pmap->pm_type = pm_type) == PT_X86) { pmap->pm_cr3 = pml4phys; pmap_pinit_pml4(pml4pg); - if (pti) { - pml4pgu = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | - VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_WAITOK); - pmap->pm_pml4u = (pml4_entry_t *)PHYS_TO_DMAP( - VM_PAGE_TO_PHYS(pml4pgu)); - pmap_pinit_pml4_pti(pml4pgu); - pmap->pm_ucr3 = VM_PAGE_TO_PHYS(pml4pgu); - } } pmap->pm_root.rt_root = 0; @@ -2672,27 +2495,13 @@ _pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp) */ if (ptepindex >= (NUPDE + NUPDPE)) { - pml4_entry_t *pml4, *pml4u; + pml4_entry_t *pml4; vm_pindex_t pml4index; /* Wire up a new PDPE page */ pml4index = ptepindex - (NUPDE + NUPDPE); pml4 = &pmap->pm_pml4[pml4index]; *pml4 = VM_PAGE_TO_PHYS(m) | PG_U | PG_RW | PG_V | PG_A | PG_M; - if (pmap->pm_pml4u != NULL && pml4index < NUPML4E) { - /* - * PTI: Make all user-space mappings in the - * kernel-mode page table no-execute so that - * we detect any programming errors that leave - * the kernel-mode page table active on return - * to user space. - */ - *pml4 |= pg_nx; - - pml4u = &pmap->pm_pml4u[pml4index]; - *pml4u = VM_PAGE_TO_PHYS(m) | PG_U | PG_RW | PG_V | - PG_A | PG_M; - } } else if (ptepindex >= NUPDE) { vm_pindex_t pml4index; @@ -2893,13 +2702,6 @@ pmap_release(pmap_t pmap) m->wire_count--; atomic_subtract_int(&vm_cnt.v_wire_count, 1); vm_page_free_zero(m); - - if (pmap->pm_pml4u != NULL) { - m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pmap->pm_pml4u)); - m->wire_count--; - atomic_subtract_int(&vm_cnt.v_wire_count, 1); - vm_page_free(m); - } } static int @@ -7065,15 +6867,13 @@ pmap_pcid_alloc(pmap_t pmap, u_int cpuid) CRITICAL_ASSERT(curthread); gen = PCPU_GET(pcid_gen); - if (!pti && (pmap->pm_pcids[cpuid].pm_pcid == PMAP_PCID_KERN || - pmap->pm_pcids[cpuid].pm_gen == gen)) + if (pmap->pm_pcids[cpuid].pm_pcid == PMAP_PCID_KERN || + pmap->pm_pcids[cpuid].pm_gen == gen) return (CR3_PCID_SAVE); pcid_next = PCPU_GET(pcid_next); - KASSERT((!pti && pcid_next <= PMAP_PCID_OVERMAX) || - (pti && pcid_next <= PMAP_PCID_OVERMAX_KERN), - ("cpu %d pcid_next %#x", cpuid, pcid_next)); - if ((!pti && pcid_next == PMAP_PCID_OVERMAX) || - (pti && pcid_next == PMAP_PCID_OVERMAX_KERN)) { + KASSERT(pcid_next <= PMAP_PCID_OVERMAX, ("cpu %d pcid_next %#x", + cpuid, pcid_next)); + if (pcid_next == PMAP_PCID_OVERMAX) { new_gen = gen + 1; if (new_gen == 0) new_gen = 1; @@ -7092,8 +6892,7 @@ void pmap_activate_sw(struct thread *td) { pmap_t oldpmap, pmap; - struct invpcid_descr d; - uint64_t cached, cr3, kcr3, ucr3; + uint64_t cached, cr3; register_t rflags; u_int cpuid; @@ -7149,41 +6948,11 @@ pmap_activate_sw(struct thread *td) PCPU_INC(pm_save_cnt); } PCPU_SET(curpmap, pmap); - if (pti) { - kcr3 = pmap->pm_cr3 | pmap->pm_pcids[cpuid].pm_pcid; - ucr3 = pmap->pm_ucr3 | pmap->pm_pcids[cpuid].pm_pcid | - PMAP_PCID_USER_PT; - - /* - * Manually invalidate translations cached - * from the user page table, which are not - * flushed by reload of cr3 with the kernel - * page table pointer above. - */ - if (pmap->pm_ucr3 != PMAP_NO_CR3) { - if (invpcid_works) { - d.pcid = PMAP_PCID_USER_PT | - pmap->pm_pcids[cpuid].pm_pcid; - d.pad = 0; - d.addr = 0; - invpcid(&d, INVPCID_CTX); - } else { - pmap_pti_pcid_invalidate(ucr3, kcr3); - } - } - - PCPU_SET(kcr3, kcr3 | CR3_PCID_SAVE); - PCPU_SET(ucr3, ucr3 | CR3_PCID_SAVE); - } if (!invpcid_works) intr_restore(rflags); } else if (cr3 != pmap->pm_cr3) { load_cr3(pmap->pm_cr3); PCPU_SET(curpmap, pmap); - if (pti) { - PCPU_SET(kcr3, pmap->pm_cr3); - PCPU_SET(ucr3, pmap->pm_ucr3); - } } #ifdef SMP CPU_CLR_ATOMIC(cpuid, &oldpmap->pm_active); @@ -7502,291 +7271,6 @@ pmap_quick_remove_page(vm_offset_t addr) mtx_unlock_spin(&qframe_mtx); } -static vm_page_t -pmap_pti_alloc_page(void) -{ - vm_page_t m; - - VM_OBJECT_ASSERT_WLOCKED(pti_obj); - m = vm_page_grab(pti_obj, pti_pg_idx++, VM_ALLOC_NOBUSY | - VM_ALLOC_WIRED | VM_ALLOC_ZERO); - return (m); -} - -static bool -pmap_pti_free_page(vm_page_t m) -{ - - KASSERT(m->wire_count > 0, ("page %p not wired", m)); - m->wire_count--; - if (m->wire_count != 0) - return (false); - atomic_subtract_int(&vm_cnt.v_wire_count, 1); - vm_page_free_zero(m); - return (true); -} - -static void -pmap_pti_init(void) -{ - vm_page_t pml4_pg; - pdp_entry_t *pdpe; - vm_offset_t va; - int i; - - if (!pti) - return; - pti_obj = vm_pager_allocate(OBJT_PHYS, NULL, 0, VM_PROT_ALL, 0, NULL); - VM_OBJECT_WLOCK(pti_obj); - pml4_pg = pmap_pti_alloc_page(); - pti_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pml4_pg)); - for (va = VM_MIN_KERNEL_ADDRESS; va <= VM_MAX_KERNEL_ADDRESS && - va >= VM_MIN_KERNEL_ADDRESS && va > NBPML4; va += NBPML4) { - pdpe = pmap_pti_pdpe(va); - pmap_pti_wire_pte(pdpe); - } - pmap_pti_add_kva_locked((vm_offset_t)&__pcpu[0], - (vm_offset_t)&__pcpu[0] + sizeof(__pcpu[0]) * MAXCPU, false); - pmap_pti_add_kva_locked((vm_offset_t)gdt, (vm_offset_t)gdt + - sizeof(struct user_segment_descriptor) * NGDT * MAXCPU, false); - pmap_pti_add_kva_locked((vm_offset_t)idt, (vm_offset_t)idt + - sizeof(struct gate_descriptor) * NIDT, false); - pmap_pti_add_kva_locked((vm_offset_t)common_tss, - (vm_offset_t)common_tss + sizeof(struct amd64tss) * MAXCPU, false); - CPU_FOREACH(i) { - /* Doublefault stack IST 1 */ - va = common_tss[i].tss_ist1; - pmap_pti_add_kva_locked(va - PAGE_SIZE, va, false); - /* NMI stack IST 2 */ - va = common_tss[i].tss_ist2 + sizeof(struct nmi_pcpu); - pmap_pti_add_kva_locked(va - PAGE_SIZE, va, false); - /* MC# stack IST 3 */ - va = common_tss[i].tss_ist3 + sizeof(struct nmi_pcpu); - pmap_pti_add_kva_locked(va - PAGE_SIZE, va, false); - } - pmap_pti_add_kva_locked((vm_offset_t)kernphys + KERNBASE, - (vm_offset_t)etext, true); - pti_finalized = true; - VM_OBJECT_WUNLOCK(pti_obj); -} -SYSINIT(pmap_pti, SI_SUB_CPU + 1, SI_ORDER_ANY, pmap_pti_init, NULL); - -static pdp_entry_t * -pmap_pti_pdpe(vm_offset_t va) -{ - pml4_entry_t *pml4e; - pdp_entry_t *pdpe; - vm_page_t m; - vm_pindex_t pml4_idx; - vm_paddr_t mphys; - - VM_OBJECT_ASSERT_WLOCKED(pti_obj); - - pml4_idx = pmap_pml4e_index(va); - pml4e = &pti_pml4[pml4_idx]; - m = NULL; - if (*pml4e == 0) { - if (pti_finalized) - panic("pml4 alloc after finalization\n"); - m = pmap_pti_alloc_page(); - if (*pml4e != 0) { - pmap_pti_free_page(m); - mphys = *pml4e & ~PAGE_MASK; - } else { - mphys = VM_PAGE_TO_PHYS(m); - *pml4e = mphys | X86_PG_RW | X86_PG_V; - } - } else { - mphys = *pml4e & ~PAGE_MASK; - } - pdpe = (pdp_entry_t *)PHYS_TO_DMAP(mphys) + pmap_pdpe_index(va); - return (pdpe); -} - -static void -pmap_pti_wire_pte(void *pte) -{ - vm_page_t m; - - VM_OBJECT_ASSERT_WLOCKED(pti_obj); - m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((uintptr_t)pte)); - m->wire_count++; -} - -static void -pmap_pti_unwire_pde(void *pde, bool only_ref) -{ - vm_page_t m; - - VM_OBJECT_ASSERT_WLOCKED(pti_obj); - m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((uintptr_t)pde)); - MPASS(m->wire_count > 0); - MPASS(only_ref || m->wire_count > 1); - pmap_pti_free_page(m); -} - -static void -pmap_pti_unwire_pte(void *pte, vm_offset_t va) -{ - vm_page_t m; - pd_entry_t *pde; - - VM_OBJECT_ASSERT_WLOCKED(pti_obj); - m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((uintptr_t)pte)); - MPASS(m->wire_count > 0); - if (pmap_pti_free_page(m)) { - pde = pmap_pti_pde(va); - MPASS((*pde & (X86_PG_PS | X86_PG_V)) == X86_PG_V); - *pde = 0; - pmap_pti_unwire_pde(pde, false); - } -} - -static pd_entry_t * -pmap_pti_pde(vm_offset_t va) -{ - pdp_entry_t *pdpe; - pd_entry_t *pde; - vm_page_t m; - vm_pindex_t pd_idx; - vm_paddr_t mphys; - - VM_OBJECT_ASSERT_WLOCKED(pti_obj); - - pdpe = pmap_pti_pdpe(va); - if (*pdpe == 0) { - m = pmap_pti_alloc_page(); - if (*pdpe != 0) { - pmap_pti_free_page(m); - MPASS((*pdpe & X86_PG_PS) == 0); - mphys = *pdpe & ~PAGE_MASK; - } else { - mphys = VM_PAGE_TO_PHYS(m); - *pdpe = mphys | X86_PG_RW | X86_PG_V; - } - } else { - MPASS((*pdpe & X86_PG_PS) == 0); - mphys = *pdpe & ~PAGE_MASK; - } - - pde = (pd_entry_t *)PHYS_TO_DMAP(mphys); - pd_idx = pmap_pde_index(va); - pde += pd_idx; - return (pde); -} - -static pt_entry_t * -pmap_pti_pte(vm_offset_t va, bool *unwire_pde) -{ - pd_entry_t *pde; - pt_entry_t *pte; - vm_page_t m; - vm_paddr_t mphys; - - VM_OBJECT_ASSERT_WLOCKED(pti_obj); - - pde = pmap_pti_pde(va); - if (unwire_pde != NULL) { - *unwire_pde = true; - pmap_pti_wire_pte(pde); - } - if (*pde == 0) { - m = pmap_pti_alloc_page(); - if (*pde != 0) { - pmap_pti_free_page(m); - MPASS((*pde & X86_PG_PS) == 0); - mphys = *pde & ~(PAGE_MASK | pg_nx); - } else { - mphys = VM_PAGE_TO_PHYS(m); - *pde = mphys | X86_PG_RW | X86_PG_V; - if (unwire_pde != NULL) - *unwire_pde = false; - } - } else { - MPASS((*pde & X86_PG_PS) == 0); - mphys = *pde & ~(PAGE_MASK | pg_nx); - } - - pte = (pt_entry_t *)PHYS_TO_DMAP(mphys); - pte += pmap_pte_index(va); - - return (pte); -} - -static void -pmap_pti_add_kva_locked(vm_offset_t sva, vm_offset_t eva, bool exec) -{ - vm_paddr_t pa; - pd_entry_t *pde; - pt_entry_t *pte, ptev; - bool unwire_pde; - - VM_OBJECT_ASSERT_WLOCKED(pti_obj); - - sva = trunc_page(sva); - MPASS(sva > VM_MAXUSER_ADDRESS); - eva = round_page(eva); - MPASS(sva < eva); - for (; sva < eva; sva += PAGE_SIZE) { - pte = pmap_pti_pte(sva, &unwire_pde); - pa = pmap_kextract(sva); - ptev = pa | X86_PG_RW | X86_PG_V | X86_PG_A | - (exec ? 0 : pg_nx) | pmap_cache_bits(kernel_pmap, - VM_MEMATTR_DEFAULT, FALSE); - if (*pte == 0) { - pte_store(pte, ptev); - pmap_pti_wire_pte(pte); - } else { - KASSERT(!pti_finalized, - ("pti overlap after fin %#lx %#lx %#lx", - sva, *pte, ptev)); - KASSERT(*pte == ptev, - ("pti non-identical pte after fin %#lx %#lx %#lx", - sva, *pte, ptev)); - } - if (unwire_pde) { - pde = pmap_pti_pde(sva); - pmap_pti_unwire_pde(pde, true); - } - } -} - -void -pmap_pti_add_kva(vm_offset_t sva, vm_offset_t eva, bool exec) -{ - - if (!pti) - return; - VM_OBJECT_WLOCK(pti_obj); - pmap_pti_add_kva_locked(sva, eva, exec); - VM_OBJECT_WUNLOCK(pti_obj); -} - -void -pmap_pti_remove_kva(vm_offset_t sva, vm_offset_t eva) -{ - pt_entry_t *pte; - vm_offset_t va; - - if (!pti) - return; - sva = rounddown2(sva, PAGE_SIZE); - MPASS(sva > VM_MAXUSER_ADDRESS); - eva = roundup2(eva, PAGE_SIZE); - MPASS(sva < eva); - VM_OBJECT_WLOCK(pti_obj); - for (va = sva; va < eva; va += PAGE_SIZE) { - pte = pmap_pti_pte(va, NULL); - KASSERT((*pte & X86_PG_V) != 0, - ("invalid pte va %#lx pte %#lx pt %#lx", va, - (u_long)pte, *pte)); - pte_clear(pte); - pmap_pti_unwire_pte(pte, va); - } - pmap_invalidate_range(kernel_pmap, sva, eva); - VM_OBJECT_WUNLOCK(pti_obj); -} - #include "opt_ddb.h" #ifdef DDB #include <ddb/ddb.h> |