diff options
author | royger <royger@FreeBSD.org> | 2014-10-24 09:48:58 +0000 |
---|---|---|
committer | royger <royger@FreeBSD.org> | 2014-10-24 09:48:58 +0000 |
commit | 0e3d9b8126c5ba0e3328ab6b3ba55b3d503d9715 (patch) | |
tree | e2b9160b6c3a1969d72f8ff01f8e9f734b08863d /sys | |
parent | 233b5312683de0c43509ba3e339e17452caa5eea (diff) | |
download | FreeBSD-src-0e3d9b8126c5ba0e3328ab6b3ba55b3d503d9715.zip FreeBSD-src-0e3d9b8126c5ba0e3328ab6b3ba55b3d503d9715.tar.gz |
amd64: make uiomove_fromphys functional for pages not mapped by the DMAP
Place the code introduced in r268660 into a separate function that can be
called from uiomove_fromphys. Instead of pre-allocating two KVA pages use
vmem_alloc to allocate them on demand when needed. This prevents blocking if
a page fault is taken while physical addresses from outside the DMAP are
used, since the lock is now removed.
Also introduce a safety catch in PHYS_TO_DMAP and DMAP_TO_PHYS.
Sponsored by: Citrix Systems R&D
Reviewed by: kib
Differential Revision: https://reviews.freebsd.org/D947
amd64/amd64/pmap.c:
- Factor out the code to deal with non DMAP addresses from pmap_copy_pages
and place it in pmap_map_io_transient.
- Change the code to use vmem_alloc instead of a set of pre-allocated
pages.
- Use pmap_qenter and don't pin the thread if there can be page faults.
amd64/amd64/uio_machdep.c:
- Use pmap_map_io_transient in order to correctly deal with physical
addresses not covered by the DMAP.
amd64/include/pmap.h:
- Add the prototypes for the new functions.
amd64/include/vmparam.h:
- Add safety catches to make sure PHYS_TO_DMAP and DMAP_TO_PHYS are only
used with addresses covered by the DMAP.
Diffstat (limited to 'sys')
-rw-r--r-- | sys/amd64/amd64/pmap.c | 173 | ||||
-rw-r--r-- | sys/amd64/amd64/uio_machdep.c | 19 | ||||
-rw-r--r-- | sys/amd64/include/pmap.h | 2 | ||||
-rw-r--r-- | sys/amd64/include/vmparam.h | 14 |
4 files changed, 142 insertions, 66 deletions
diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index a2d1646..b57e5f1 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -115,6 +115,7 @@ __FBSDID("$FreeBSD$"); #include <sys/proc.h> #include <sys/rwlock.h> #include <sys/sx.h> +#include <sys/vmem.h> #include <sys/vmmeter.h> #include <sys/sched.h> #include <sys/sysctl.h> @@ -402,11 +403,6 @@ SYSCTL_PROC(_vm_pmap, OID_AUTO, pcid_save_cnt, CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0, pmap_pcid_save_cnt_proc, "QU", "Count of saved TLB context on switch"); -/* pmap_copy_pages() over non-DMAP */ -static struct mtx cpage_lock; -static vm_offset_t cpage_a; -static vm_offset_t cpage_b; - /* * Crashdump maps. */ @@ -1072,10 +1068,6 @@ pmap_init(void) M_WAITOK | M_ZERO); for (i = 0; i < pv_npg; i++) TAILQ_INIT(&pv_table[i].pv_list); - - mtx_init(&cpage_lock, "cpage", NULL, MTX_DEF); - cpage_a = kva_alloc(PAGE_SIZE); - cpage_b = kva_alloc(PAGE_SIZE); } static SYSCTL_NODE(_vm_pmap, OID_AUTO, pde, CTLFLAG_RD, 0, @@ -5056,66 +5048,24 @@ pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[], vm_offset_t b_offset, int xfersize) { void *a_cp, *b_cp; - vm_page_t m_a, m_b; - vm_paddr_t p_a, p_b; - pt_entry_t *pte; - vm_offset_t a_pg_offset, b_pg_offset; + vm_page_t pages[2]; + vm_offset_t vaddr[2], a_pg_offset, b_pg_offset; int cnt; - boolean_t pinned; + boolean_t mapped; - /* - * NB: The sequence of updating a page table followed by accesses - * to the corresponding pages used in the !DMAP case is subject to - * the situation described in the "AMD64 Architecture Programmer's - * Manual Volume 2: System Programming" rev. 3.23, "7.3.1 Special - * Coherency Considerations". Therefore, issuing the INVLPG right - * after modifying the PTE bits is crucial. - */ - pinned = FALSE; while (xfersize > 0) { a_pg_offset = a_offset & PAGE_MASK; - m_a = ma[a_offset >> PAGE_SHIFT]; - p_a = m_a->phys_addr; + pages[0] = ma[a_offset >> PAGE_SHIFT]; b_pg_offset = b_offset & PAGE_MASK; - m_b = mb[b_offset >> PAGE_SHIFT]; - p_b = m_b->phys_addr; + pages[1] = mb[b_offset >> PAGE_SHIFT]; cnt = min(xfersize, PAGE_SIZE - a_pg_offset); cnt = min(cnt, PAGE_SIZE - b_pg_offset); - if (__predict_false(p_a < DMAP_MIN_ADDRESS || - p_a > DMAP_MIN_ADDRESS + dmaplimit)) { - mtx_lock(&cpage_lock); - sched_pin(); - pinned = TRUE; - pte = vtopte(cpage_a); - *pte = p_a | X86_PG_A | X86_PG_V | - pmap_cache_bits(kernel_pmap, m_a->md.pat_mode, 0); - invlpg(cpage_a); - a_cp = (char *)cpage_a + a_pg_offset; - } else { - a_cp = (char *)PHYS_TO_DMAP(p_a) + a_pg_offset; - } - if (__predict_false(p_b < DMAP_MIN_ADDRESS || - p_b > DMAP_MIN_ADDRESS + dmaplimit)) { - if (!pinned) { - mtx_lock(&cpage_lock); - sched_pin(); - pinned = TRUE; - } - pte = vtopte(cpage_b); - *pte = p_b | X86_PG_A | X86_PG_M | X86_PG_RW | - X86_PG_V | pmap_cache_bits(kernel_pmap, - m_b->md.pat_mode, 0); - invlpg(cpage_b); - b_cp = (char *)cpage_b + b_pg_offset; - } else { - b_cp = (char *)PHYS_TO_DMAP(p_b) + b_pg_offset; - } + mapped = pmap_map_io_transient(pages, vaddr, 2, FALSE); + a_cp = (char *)vaddr[0] + a_pg_offset; + b_cp = (char *)vaddr[1] + b_pg_offset; bcopy(a_cp, b_cp, cnt); - if (__predict_false(pinned)) { - sched_unpin(); - mtx_unlock(&cpage_lock); - pinned = FALSE; - } + if (__predict_false(mapped)) + pmap_unmap_io_transient(pages, vaddr, 2, FALSE); a_offset += cnt; b_offset += cnt; xfersize -= cnt; @@ -6901,6 +6851,107 @@ done: *num = idx; } +/** + * Get the kernel virtual address of a set of physical pages. If there are + * physical addresses not covered by the DMAP perform a transient mapping + * that will be removed when calling pmap_unmap_io_transient. + * + * \param page The pages the caller wishes to obtain the virtual + * address on the kernel memory map. + * \param vaddr On return contains the kernel virtual memory address + * of the pages passed in the page parameter. + * \param count Number of pages passed in. + * \param can_fault TRUE if the thread using the mapped pages can take + * page faults, FALSE otherwise. + * + * \returns TRUE if the caller must call pmap_unmap_io_transient when + * finished or FALSE otherwise. + * + */ +boolean_t +pmap_map_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count, + boolean_t can_fault) +{ + vm_paddr_t paddr; + boolean_t needs_mapping; + pt_entry_t *pte; + int cache_bits, error, i; + + /* + * Allocate any KVA space that we need, this is done in a separate + * loop to prevent calling vmem_alloc while pinned. + */ + needs_mapping = FALSE; + for (i = 0; i < count; i++) { + paddr = VM_PAGE_TO_PHYS(page[i]); + if (__predict_false(paddr >= dmaplimit)) { + error = vmem_alloc(kernel_arena, PAGE_SIZE, + M_BESTFIT | M_WAITOK, &vaddr[i]); + KASSERT(error == 0, ("vmem_alloc failed: %d", error)); + needs_mapping = TRUE; + } else { + vaddr[i] = PHYS_TO_DMAP(paddr); + } + } + + /* Exit early if everything is covered by the DMAP */ + if (!needs_mapping) + return (FALSE); + + /* + * NB: The sequence of updating a page table followed by accesses + * to the corresponding pages used in the !DMAP case is subject to + * the situation described in the "AMD64 Architecture Programmer's + * Manual Volume 2: System Programming" rev. 3.23, "7.3.1 Special + * Coherency Considerations". Therefore, issuing the INVLPG right + * after modifying the PTE bits is crucial. + */ + if (!can_fault) + sched_pin(); + for (i = 0; i < count; i++) { + paddr = VM_PAGE_TO_PHYS(page[i]); + if (paddr >= dmaplimit) { + if (can_fault) { + /* + * Slow path, since we can get page faults + * while mappings are active don't pin the + * thread to the CPU and instead add a global + * mapping visible to all CPUs. + */ + pmap_qenter(vaddr[i], &page[i], 1); + } else { + pte = vtopte(vaddr[i]); + cache_bits = pmap_cache_bits(kernel_pmap, + page[i]->md.pat_mode, 0); + pte_store(pte, paddr | X86_PG_RW | X86_PG_V | + cache_bits); + invlpg(vaddr[i]); + } + } + } + + return (needs_mapping); +} + +void +pmap_unmap_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count, + boolean_t can_fault) +{ + vm_paddr_t paddr; + int i; + + if (!can_fault) + sched_unpin(); + for (i = 0; i < count; i++) { + paddr = VM_PAGE_TO_PHYS(page[i]); + if (paddr >= dmaplimit) { + if (can_fault) + pmap_qremove(vaddr[i], 1); + vmem_free(kernel_arena, vaddr[i], PAGE_SIZE); + } + } +} + #include "opt_ddb.h" #ifdef DDB #include <ddb/ddb.h> diff --git a/sys/amd64/amd64/uio_machdep.c b/sys/amd64/amd64/uio_machdep.c index 41ac367..7565438 100644 --- a/sys/amd64/amd64/uio_machdep.c +++ b/sys/amd64/amd64/uio_machdep.c @@ -61,10 +61,11 @@ uiomove_fromphys(vm_page_t ma[], vm_offset_t offset, int n, struct uio *uio) struct thread *td = curthread; struct iovec *iov; void *cp; - vm_offset_t page_offset; + vm_offset_t page_offset, vaddr; size_t cnt; int error = 0; int save = 0; + boolean_t mapped; KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE, ("uiomove_fromphys: mode")); @@ -72,6 +73,7 @@ uiomove_fromphys(vm_page_t ma[], vm_offset_t offset, int n, struct uio *uio) ("uiomove_fromphys proc")); save = td->td_pflags & TDP_DEADLKTREAT; td->td_pflags |= TDP_DEADLKTREAT; + mapped = FALSE; while (n > 0 && uio->uio_resid) { iov = uio->uio_iov; cnt = iov->iov_len; @@ -84,8 +86,11 @@ uiomove_fromphys(vm_page_t ma[], vm_offset_t offset, int n, struct uio *uio) cnt = n; page_offset = offset & PAGE_MASK; cnt = min(cnt, PAGE_SIZE - page_offset); - cp = (char *)PHYS_TO_DMAP(ma[offset >> PAGE_SHIFT]->phys_addr) + - page_offset; + if (uio->uio_segflg != UIO_NOCOPY) { + mapped = pmap_map_io_transient( + &ma[offset >> PAGE_SHIFT], &vaddr, 1, TRUE); + cp = (char *)vaddr + page_offset; + } switch (uio->uio_segflg) { case UIO_USERSPACE: maybe_yield(); @@ -105,6 +110,11 @@ uiomove_fromphys(vm_page_t ma[], vm_offset_t offset, int n, struct uio *uio) case UIO_NOCOPY: break; } + if (__predict_false(mapped)) { + pmap_unmap_io_transient(&ma[offset >> PAGE_SHIFT], + &vaddr, 1, TRUE); + mapped = FALSE; + } iov->iov_base = (char *)iov->iov_base + cnt; iov->iov_len -= cnt; uio->uio_resid -= cnt; @@ -113,6 +123,9 @@ uiomove_fromphys(vm_page_t ma[], vm_offset_t offset, int n, struct uio *uio) n -= cnt; } out: + if (__predict_false(mapped)) + pmap_unmap_io_transient(&ma[offset >> PAGE_SHIFT], &vaddr, 1, + TRUE); if (save == 0) td->td_pflags &= ~TDP_DEADLKTREAT; return (error); diff --git a/sys/amd64/include/pmap.h b/sys/amd64/include/pmap.h index ebf32c6..868db7d 100644 --- a/sys/amd64/include/pmap.h +++ b/sys/amd64/include/pmap.h @@ -397,6 +397,8 @@ void pmap_invalidate_cache_pages(vm_page_t *pages, int count); void pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva, boolean_t force); void pmap_get_mapping(pmap_t pmap, vm_offset_t va, uint64_t *ptr, int *num); +boolean_t pmap_map_io_transient(vm_page_t *, vm_offset_t *, int, boolean_t); +void pmap_unmap_io_transient(vm_page_t *, vm_offset_t *, int, boolean_t); #endif /* _KERNEL */ #endif /* !LOCORE */ diff --git a/sys/amd64/include/vmparam.h b/sys/amd64/include/vmparam.h index 14c5134..b1b89b9 100644 --- a/sys/amd64/include/vmparam.h +++ b/sys/amd64/include/vmparam.h @@ -175,8 +175,18 @@ #define VM_MAX_ADDRESS UPT_MAX_ADDRESS #define VM_MIN_ADDRESS (0) -#define PHYS_TO_DMAP(x) ((x) | DMAP_MIN_ADDRESS) -#define DMAP_TO_PHYS(x) ((x) & ~DMAP_MIN_ADDRESS) +#define PHYS_TO_DMAP(x) ({ \ + KASSERT((x) < dmaplimit, \ + ("physical address %#jx not covered by the DMAP", \ + (uintmax_t)x)); \ + (x) | DMAP_MIN_ADDRESS; }) + +#define DMAP_TO_PHYS(x) ({ \ + KASSERT((x) < (DMAP_MIN_ADDRESS + dmaplimit) && \ + (x) >= DMAP_MIN_ADDRESS, \ + ("virtual address %#jx not covered by the DMAP", \ + (uintmax_t)x)); \ + (x) & ~DMAP_MIN_ADDRESS; }) /* * How many physical pages per kmem arena virtual page. |