summaryrefslogtreecommitdiffstats
path: root/sys
diff options
context:
space:
mode:
authorroyger <royger@FreeBSD.org>2014-10-24 09:48:58 +0000
committerroyger <royger@FreeBSD.org>2014-10-24 09:48:58 +0000
commit0e3d9b8126c5ba0e3328ab6b3ba55b3d503d9715 (patch)
treee2b9160b6c3a1969d72f8ff01f8e9f734b08863d /sys
parent233b5312683de0c43509ba3e339e17452caa5eea (diff)
downloadFreeBSD-src-0e3d9b8126c5ba0e3328ab6b3ba55b3d503d9715.zip
FreeBSD-src-0e3d9b8126c5ba0e3328ab6b3ba55b3d503d9715.tar.gz
amd64: make uiomove_fromphys functional for pages not mapped by the DMAP
Place the code introduced in r268660 into a separate function that can be called from uiomove_fromphys. Instead of pre-allocating two KVA pages use vmem_alloc to allocate them on demand when needed. This prevents blocking if a page fault is taken while physical addresses from outside the DMAP are used, since the lock is now removed. Also introduce a safety catch in PHYS_TO_DMAP and DMAP_TO_PHYS. Sponsored by: Citrix Systems R&D Reviewed by: kib Differential Revision: https://reviews.freebsd.org/D947 amd64/amd64/pmap.c: - Factor out the code to deal with non DMAP addresses from pmap_copy_pages and place it in pmap_map_io_transient. - Change the code to use vmem_alloc instead of a set of pre-allocated pages. - Use pmap_qenter and don't pin the thread if there can be page faults. amd64/amd64/uio_machdep.c: - Use pmap_map_io_transient in order to correctly deal with physical addresses not covered by the DMAP. amd64/include/pmap.h: - Add the prototypes for the new functions. amd64/include/vmparam.h: - Add safety catches to make sure PHYS_TO_DMAP and DMAP_TO_PHYS are only used with addresses covered by the DMAP.
Diffstat (limited to 'sys')
-rw-r--r--sys/amd64/amd64/pmap.c173
-rw-r--r--sys/amd64/amd64/uio_machdep.c19
-rw-r--r--sys/amd64/include/pmap.h2
-rw-r--r--sys/amd64/include/vmparam.h14
4 files changed, 142 insertions, 66 deletions
diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index a2d1646..b57e5f1 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -115,6 +115,7 @@ __FBSDID("$FreeBSD$");
#include <sys/proc.h>
#include <sys/rwlock.h>
#include <sys/sx.h>
+#include <sys/vmem.h>
#include <sys/vmmeter.h>
#include <sys/sched.h>
#include <sys/sysctl.h>
@@ -402,11 +403,6 @@ SYSCTL_PROC(_vm_pmap, OID_AUTO, pcid_save_cnt, CTLTYPE_U64 | CTLFLAG_RW |
CTLFLAG_MPSAFE, NULL, 0, pmap_pcid_save_cnt_proc, "QU",
"Count of saved TLB context on switch");
-/* pmap_copy_pages() over non-DMAP */
-static struct mtx cpage_lock;
-static vm_offset_t cpage_a;
-static vm_offset_t cpage_b;
-
/*
* Crashdump maps.
*/
@@ -1072,10 +1068,6 @@ pmap_init(void)
M_WAITOK | M_ZERO);
for (i = 0; i < pv_npg; i++)
TAILQ_INIT(&pv_table[i].pv_list);
-
- mtx_init(&cpage_lock, "cpage", NULL, MTX_DEF);
- cpage_a = kva_alloc(PAGE_SIZE);
- cpage_b = kva_alloc(PAGE_SIZE);
}
static SYSCTL_NODE(_vm_pmap, OID_AUTO, pde, CTLFLAG_RD, 0,
@@ -5056,66 +5048,24 @@ pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[],
vm_offset_t b_offset, int xfersize)
{
void *a_cp, *b_cp;
- vm_page_t m_a, m_b;
- vm_paddr_t p_a, p_b;
- pt_entry_t *pte;
- vm_offset_t a_pg_offset, b_pg_offset;
+ vm_page_t pages[2];
+ vm_offset_t vaddr[2], a_pg_offset, b_pg_offset;
int cnt;
- boolean_t pinned;
+ boolean_t mapped;
- /*
- * NB: The sequence of updating a page table followed by accesses
- * to the corresponding pages used in the !DMAP case is subject to
- * the situation described in the "AMD64 Architecture Programmer's
- * Manual Volume 2: System Programming" rev. 3.23, "7.3.1 Special
- * Coherency Considerations". Therefore, issuing the INVLPG right
- * after modifying the PTE bits is crucial.
- */
- pinned = FALSE;
while (xfersize > 0) {
a_pg_offset = a_offset & PAGE_MASK;
- m_a = ma[a_offset >> PAGE_SHIFT];
- p_a = m_a->phys_addr;
+ pages[0] = ma[a_offset >> PAGE_SHIFT];
b_pg_offset = b_offset & PAGE_MASK;
- m_b = mb[b_offset >> PAGE_SHIFT];
- p_b = m_b->phys_addr;
+ pages[1] = mb[b_offset >> PAGE_SHIFT];
cnt = min(xfersize, PAGE_SIZE - a_pg_offset);
cnt = min(cnt, PAGE_SIZE - b_pg_offset);
- if (__predict_false(p_a < DMAP_MIN_ADDRESS ||
- p_a > DMAP_MIN_ADDRESS + dmaplimit)) {
- mtx_lock(&cpage_lock);
- sched_pin();
- pinned = TRUE;
- pte = vtopte(cpage_a);
- *pte = p_a | X86_PG_A | X86_PG_V |
- pmap_cache_bits(kernel_pmap, m_a->md.pat_mode, 0);
- invlpg(cpage_a);
- a_cp = (char *)cpage_a + a_pg_offset;
- } else {
- a_cp = (char *)PHYS_TO_DMAP(p_a) + a_pg_offset;
- }
- if (__predict_false(p_b < DMAP_MIN_ADDRESS ||
- p_b > DMAP_MIN_ADDRESS + dmaplimit)) {
- if (!pinned) {
- mtx_lock(&cpage_lock);
- sched_pin();
- pinned = TRUE;
- }
- pte = vtopte(cpage_b);
- *pte = p_b | X86_PG_A | X86_PG_M | X86_PG_RW |
- X86_PG_V | pmap_cache_bits(kernel_pmap,
- m_b->md.pat_mode, 0);
- invlpg(cpage_b);
- b_cp = (char *)cpage_b + b_pg_offset;
- } else {
- b_cp = (char *)PHYS_TO_DMAP(p_b) + b_pg_offset;
- }
+ mapped = pmap_map_io_transient(pages, vaddr, 2, FALSE);
+ a_cp = (char *)vaddr[0] + a_pg_offset;
+ b_cp = (char *)vaddr[1] + b_pg_offset;
bcopy(a_cp, b_cp, cnt);
- if (__predict_false(pinned)) {
- sched_unpin();
- mtx_unlock(&cpage_lock);
- pinned = FALSE;
- }
+ if (__predict_false(mapped))
+ pmap_unmap_io_transient(pages, vaddr, 2, FALSE);
a_offset += cnt;
b_offset += cnt;
xfersize -= cnt;
@@ -6901,6 +6851,107 @@ done:
*num = idx;
}
+/**
+ * Get the kernel virtual address of a set of physical pages. If there are
+ * physical addresses not covered by the DMAP perform a transient mapping
+ * that will be removed when calling pmap_unmap_io_transient.
+ *
+ * \param page The pages the caller wishes to obtain the virtual
+ * address on the kernel memory map.
+ * \param vaddr On return contains the kernel virtual memory address
+ * of the pages passed in the page parameter.
+ * \param count Number of pages passed in.
+ * \param can_fault TRUE if the thread using the mapped pages can take
+ * page faults, FALSE otherwise.
+ *
+ * \returns TRUE if the caller must call pmap_unmap_io_transient when
+ * finished or FALSE otherwise.
+ *
+ */
+boolean_t
+pmap_map_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count,
+ boolean_t can_fault)
+{
+ vm_paddr_t paddr;
+ boolean_t needs_mapping;
+ pt_entry_t *pte;
+ int cache_bits, error, i;
+
+ /*
+ * Allocate any KVA space that we need, this is done in a separate
+ * loop to prevent calling vmem_alloc while pinned.
+ */
+ needs_mapping = FALSE;
+ for (i = 0; i < count; i++) {
+ paddr = VM_PAGE_TO_PHYS(page[i]);
+ if (__predict_false(paddr >= dmaplimit)) {
+ error = vmem_alloc(kernel_arena, PAGE_SIZE,
+ M_BESTFIT | M_WAITOK, &vaddr[i]);
+ KASSERT(error == 0, ("vmem_alloc failed: %d", error));
+ needs_mapping = TRUE;
+ } else {
+ vaddr[i] = PHYS_TO_DMAP(paddr);
+ }
+ }
+
+ /* Exit early if everything is covered by the DMAP */
+ if (!needs_mapping)
+ return (FALSE);
+
+ /*
+ * NB: The sequence of updating a page table followed by accesses
+ * to the corresponding pages used in the !DMAP case is subject to
+ * the situation described in the "AMD64 Architecture Programmer's
+ * Manual Volume 2: System Programming" rev. 3.23, "7.3.1 Special
+ * Coherency Considerations". Therefore, issuing the INVLPG right
+ * after modifying the PTE bits is crucial.
+ */
+ if (!can_fault)
+ sched_pin();
+ for (i = 0; i < count; i++) {
+ paddr = VM_PAGE_TO_PHYS(page[i]);
+ if (paddr >= dmaplimit) {
+ if (can_fault) {
+ /*
+ * Slow path, since we can get page faults
+ * while mappings are active don't pin the
+ * thread to the CPU and instead add a global
+ * mapping visible to all CPUs.
+ */
+ pmap_qenter(vaddr[i], &page[i], 1);
+ } else {
+ pte = vtopte(vaddr[i]);
+ cache_bits = pmap_cache_bits(kernel_pmap,
+ page[i]->md.pat_mode, 0);
+ pte_store(pte, paddr | X86_PG_RW | X86_PG_V |
+ cache_bits);
+ invlpg(vaddr[i]);
+ }
+ }
+ }
+
+ return (needs_mapping);
+}
+
+void
+pmap_unmap_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count,
+ boolean_t can_fault)
+{
+ vm_paddr_t paddr;
+ int i;
+
+ if (!can_fault)
+ sched_unpin();
+ for (i = 0; i < count; i++) {
+ paddr = VM_PAGE_TO_PHYS(page[i]);
+ if (paddr >= dmaplimit) {
+ if (can_fault)
+ pmap_qremove(vaddr[i], 1);
+ vmem_free(kernel_arena, vaddr[i], PAGE_SIZE);
+ }
+ }
+}
+
#include "opt_ddb.h"
#ifdef DDB
#include <ddb/ddb.h>
diff --git a/sys/amd64/amd64/uio_machdep.c b/sys/amd64/amd64/uio_machdep.c
index 41ac367..7565438 100644
--- a/sys/amd64/amd64/uio_machdep.c
+++ b/sys/amd64/amd64/uio_machdep.c
@@ -61,10 +61,11 @@ uiomove_fromphys(vm_page_t ma[], vm_offset_t offset, int n, struct uio *uio)
struct thread *td = curthread;
struct iovec *iov;
void *cp;
- vm_offset_t page_offset;
+ vm_offset_t page_offset, vaddr;
size_t cnt;
int error = 0;
int save = 0;
+ boolean_t mapped;
KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE,
("uiomove_fromphys: mode"));
@@ -72,6 +73,7 @@ uiomove_fromphys(vm_page_t ma[], vm_offset_t offset, int n, struct uio *uio)
("uiomove_fromphys proc"));
save = td->td_pflags & TDP_DEADLKTREAT;
td->td_pflags |= TDP_DEADLKTREAT;
+ mapped = FALSE;
while (n > 0 && uio->uio_resid) {
iov = uio->uio_iov;
cnt = iov->iov_len;
@@ -84,8 +86,11 @@ uiomove_fromphys(vm_page_t ma[], vm_offset_t offset, int n, struct uio *uio)
cnt = n;
page_offset = offset & PAGE_MASK;
cnt = min(cnt, PAGE_SIZE - page_offset);
- cp = (char *)PHYS_TO_DMAP(ma[offset >> PAGE_SHIFT]->phys_addr) +
- page_offset;
+ if (uio->uio_segflg != UIO_NOCOPY) {
+ mapped = pmap_map_io_transient(
+ &ma[offset >> PAGE_SHIFT], &vaddr, 1, TRUE);
+ cp = (char *)vaddr + page_offset;
+ }
switch (uio->uio_segflg) {
case UIO_USERSPACE:
maybe_yield();
@@ -105,6 +110,11 @@ uiomove_fromphys(vm_page_t ma[], vm_offset_t offset, int n, struct uio *uio)
case UIO_NOCOPY:
break;
}
+ if (__predict_false(mapped)) {
+ pmap_unmap_io_transient(&ma[offset >> PAGE_SHIFT],
+ &vaddr, 1, TRUE);
+ mapped = FALSE;
+ }
iov->iov_base = (char *)iov->iov_base + cnt;
iov->iov_len -= cnt;
uio->uio_resid -= cnt;
@@ -113,6 +123,9 @@ uiomove_fromphys(vm_page_t ma[], vm_offset_t offset, int n, struct uio *uio)
n -= cnt;
}
out:
+ if (__predict_false(mapped))
+ pmap_unmap_io_transient(&ma[offset >> PAGE_SHIFT], &vaddr, 1,
+ TRUE);
if (save == 0)
td->td_pflags &= ~TDP_DEADLKTREAT;
return (error);
diff --git a/sys/amd64/include/pmap.h b/sys/amd64/include/pmap.h
index ebf32c6..868db7d 100644
--- a/sys/amd64/include/pmap.h
+++ b/sys/amd64/include/pmap.h
@@ -397,6 +397,8 @@ void pmap_invalidate_cache_pages(vm_page_t *pages, int count);
void pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva,
boolean_t force);
void pmap_get_mapping(pmap_t pmap, vm_offset_t va, uint64_t *ptr, int *num);
+boolean_t pmap_map_io_transient(vm_page_t *, vm_offset_t *, int, boolean_t);
+void pmap_unmap_io_transient(vm_page_t *, vm_offset_t *, int, boolean_t);
#endif /* _KERNEL */
#endif /* !LOCORE */
diff --git a/sys/amd64/include/vmparam.h b/sys/amd64/include/vmparam.h
index 14c5134..b1b89b9 100644
--- a/sys/amd64/include/vmparam.h
+++ b/sys/amd64/include/vmparam.h
@@ -175,8 +175,18 @@
#define VM_MAX_ADDRESS UPT_MAX_ADDRESS
#define VM_MIN_ADDRESS (0)
-#define PHYS_TO_DMAP(x) ((x) | DMAP_MIN_ADDRESS)
-#define DMAP_TO_PHYS(x) ((x) & ~DMAP_MIN_ADDRESS)
+#define PHYS_TO_DMAP(x) ({ \
+ KASSERT((x) < dmaplimit, \
+ ("physical address %#jx not covered by the DMAP", \
+ (uintmax_t)x)); \
+ (x) | DMAP_MIN_ADDRESS; })
+
+#define DMAP_TO_PHYS(x) ({ \
+ KASSERT((x) < (DMAP_MIN_ADDRESS + dmaplimit) && \
+ (x) >= DMAP_MIN_ADDRESS, \
+ ("virtual address %#jx not covered by the DMAP", \
+ (uintmax_t)x)); \
+ (x) & ~DMAP_MIN_ADDRESS; })
/*
* How many physical pages per kmem arena virtual page.
OpenPOWER on IntegriCloud