diff options
Diffstat (limited to 'sys')
-rw-r--r-- | sys/conf/files | 2 | ||||
-rw-r--r-- | sys/kern/init_main.c | 5 | ||||
-rw-r--r-- | sys/kern/init_sysent.c | 2 | ||||
-rw-r--r-- | sys/kern/kern_exit.c | 6 | ||||
-rw-r--r-- | sys/kern/kern_fork.c | 10 | ||||
-rw-r--r-- | sys/kern/kern_numa.c | 170 | ||||
-rw-r--r-- | sys/kern/kern_thr.c | 9 | ||||
-rw-r--r-- | sys/kern/kern_thread.c | 3 | ||||
-rw-r--r-- | sys/sys/_vm_domain.h | 61 | ||||
-rw-r--r-- | sys/sys/numa.h | 41 | ||||
-rw-r--r-- | sys/sys/proc.h | 3 | ||||
-rw-r--r-- | sys/vm/vm_domain.c | 374 | ||||
-rw-r--r-- | sys/vm/vm_domain.h | 66 | ||||
-rw-r--r-- | sys/vm/vm_phys.c | 166 | ||||
-rw-r--r-- | sys/vm/vm_phys.h | 1 |
15 files changed, 908 insertions, 11 deletions
diff --git a/sys/conf/files b/sys/conf/files index e2fdfc8..0448ad4 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -3017,6 +3017,7 @@ kern/kern_module.c standard kern/kern_mtxpool.c standard kern/kern_mutex.c standard kern/kern_ntptime.c standard +kern/kern_numa.c standard kern/kern_osd.c standard kern/kern_physio.c standard kern/kern_pmc.c standard @@ -4043,6 +4044,7 @@ vm/vm_pager.c standard vm/vm_phys.c standard vm/vm_radix.c standard vm/vm_reserv.c standard +vm/vm_domain.c standard vm/vm_unix.c standard vm/vm_zeroidle.c standard vm/vnode_pager.c standard diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c index 37539c4..7552d51 100644 --- a/sys/kern/init_main.c +++ b/sys/kern/init_main.c @@ -87,6 +87,7 @@ __FBSDID("$FreeBSD$"); #include <vm/vm_param.h> #include <vm/pmap.h> #include <vm/vm_map.h> +#include <vm/vm_domain.h> #include <sys/copyright.h> #include <ddb/ddb.h> @@ -496,6 +497,10 @@ proc0_init(void *dummy __unused) td->td_flags = TDF_INMEM; td->td_pflags = TDP_KTHREAD; td->td_cpuset = cpuset_thread0(); + vm_domain_policy_init(&td->td_vm_dom_policy); + vm_domain_policy_set(&td->td_vm_dom_policy, VM_POLICY_NONE, -1); + vm_domain_policy_init(&p->p_vm_dom_policy); + vm_domain_policy_set(&p->p_vm_dom_policy, VM_POLICY_NONE, -1); prison0_init(); p->p_peers = 0; p->p_leader = p; diff --git a/sys/kern/init_sysent.c b/sys/kern/init_sysent.c index 14e8281..a328906 100644 --- a/sys/kern/init_sysent.c +++ b/sys/kern/init_sysent.c @@ -588,4 +588,6 @@ struct sysent sysent[] = { { AS(ppoll_args), (sy_call_t *)sys_ppoll, AUE_POLL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 545 = ppoll */ { AS(futimens_args), (sy_call_t *)sys_futimens, AUE_FUTIMES, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 546 = futimens */ { AS(utimensat_args), (sy_call_t *)sys_utimensat, AUE_FUTIMESAT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 547 = utimensat */ + { AS(numa_getaffinity_args), (sy_call_t *)sys_numa_getaffinity, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 548 = numa_getaffinity */ + { AS(numa_setaffinity_args), (sy_call_t *)sys_numa_setaffinity, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 549 = numa_setaffinity */ }; diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c index 60691f0..8fe968e 100644 --- a/sys/kern/kern_exit.c +++ b/sys/kern/kern_exit.c @@ -86,6 +86,7 @@ __FBSDID("$FreeBSD$"); #include <vm/vm_map.h> #include <vm/vm_page.h> #include <vm/uma.h> +#include <vm/vm_domain.h> #ifdef KDTRACE_HOOKS #include <sys/dtrace_bsd.h> @@ -950,6 +951,11 @@ proc_reap(struct thread *td, struct proc *p, int *status, int options) #ifdef MAC mac_proc_destroy(p); #endif + /* + * Free any domain policy that's still hiding around. + */ + vm_domain_policy_cleanup(&p->p_vm_dom_policy); + KASSERT(FIRST_THREAD_IN_PROC(p), ("proc_reap: no residual thread!")); uma_zfree(proc_zone, p); diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c index 3fd4f09..a031435 100644 --- a/sys/kern/kern_fork.c +++ b/sys/kern/kern_fork.c @@ -80,6 +80,7 @@ __FBSDID("$FreeBSD$"); #include <vm/vm_map.h> #include <vm/vm_extern.h> #include <vm/uma.h> +#include <vm/vm_domain.h> #ifdef KDTRACE_HOOKS #include <sys/dtrace_bsd.h> @@ -405,6 +406,7 @@ do_fork(struct thread *td, int flags, struct proc *p2, struct thread *td2, bcopy(&p1->p_startcopy, &p2->p_startcopy, __rangeof(struct proc, p_startcopy, p_endcopy)); pargs_hold(p2->p_args); + PROC_UNLOCK(p1); bzero(&p2->p_startzero, @@ -497,6 +499,14 @@ do_fork(struct thread *td, int flags, struct proc *p2, struct thread *td2, if (p1->p_flag & P_PROFIL) startprofclock(p2); + /* + * Whilst the proc lock is held, copy the VM domain data out + * using the VM domain method. + */ + vm_domain_policy_init(&p2->p_vm_dom_policy); + vm_domain_policy_localcopy(&p2->p_vm_dom_policy, + &p1->p_vm_dom_policy); + if (flags & RFSIGSHARE) { p2->p_sigacts = sigacts_hold(p1->p_sigacts); } else { diff --git a/sys/kern/kern_numa.c b/sys/kern/kern_numa.c new file mode 100644 index 0000000..e3a5837 --- /dev/null +++ b/sys/kern/kern_numa.c @@ -0,0 +1,170 @@ +/*- + * Copyright (c) 2015, Adrian Chadd <adrian@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/sysproto.h> +#include <sys/jail.h> +#include <sys/kernel.h> +#include <sys/lock.h> +#include <sys/malloc.h> +#include <sys/mutex.h> +#include <sys/priv.h> +#include <sys/proc.h> +#include <sys/refcount.h> +#include <sys/sched.h> +#include <sys/smp.h> +#include <sys/syscallsubr.h> +#include <sys/cpuset.h> +#include <sys/sx.h> +#include <sys/queue.h> +#include <sys/libkern.h> +#include <sys/limits.h> +#include <sys/bus.h> +#include <sys/interrupt.h> + +#include <vm/uma.h> +#include <vm/vm.h> +#include <vm/vm_page.h> +#include <vm/vm_param.h> +#include <vm/vm_phys.h> +#include <vm/vm_domain.h> + +int +sys_numa_setaffinity(struct thread *td, struct numa_setaffinity_args *uap) +{ + int error; + struct vm_domain_policy vp; + struct thread *ttd; + struct proc *p; + struct cpuset *set; + + set = NULL; + p = NULL; + + /* + * Copy in just the policy information into the policy + * struct. Userland only supplies vm_domain_policy_entry. + */ + error = copyin(uap->policy, &vp.p, sizeof(vp.p)); + if (error) + goto out; + + /* + * Ensure the seq number is zero - otherwise seq.h + * may get very confused. + */ + vp.seq = 0; + + /* + * Validate policy. + */ + if (vm_domain_policy_validate(&vp) != 0) { + error = EINVAL; + goto out; + } + + /* + * Go find the desired proc/tid for this operation. + */ + error = cpuset_which(uap->which, uap->id, &p, + &ttd, &set); + if (error) + goto out; + + /* Only handle CPU_WHICH_TID and CPU_WHICH_PID */ + /* + * XXX if cpuset_which is called with WHICH_CPUSET and NULL cpuset, + * it'll return ESRCH. We should just return EINVAL. + */ + switch (uap->which) { + case CPU_WHICH_TID: + vm_domain_policy_copy(&ttd->td_vm_dom_policy, &vp); + break; + case CPU_WHICH_PID: + vm_domain_policy_copy(&p->p_vm_dom_policy, &vp); + break; + default: + error = EINVAL; + break; + } + + PROC_UNLOCK(p); +out: + if (set) + cpuset_rel(set); + return (error); +} + +int +sys_numa_getaffinity(struct thread *td, struct numa_getaffinity_args *uap) +{ + int error; + struct vm_domain_policy vp; + struct thread *ttd; + struct proc *p; + struct cpuset *set; + + set = NULL; + p = NULL; + + error = cpuset_which(uap->which, uap->id, &p, + &ttd, &set); + if (error) + goto out; + + /* Only handle CPU_WHICH_TID and CPU_WHICH_PID */ + /* + * XXX if cpuset_which is called with WHICH_CPUSET and NULL cpuset, + * it'll return ESRCH. We should just return EINVAL. + */ + switch (uap->which) { + case CPU_WHICH_TID: + vm_domain_policy_localcopy(&vp, &ttd->td_vm_dom_policy); + break; + case CPU_WHICH_PID: + vm_domain_policy_localcopy(&vp, &p->p_vm_dom_policy); + break; + default: + error = EINVAL; + break; + } + if (p) + PROC_UNLOCK(p); + /* + * Copy out only the vm_domain_policy_entry part. + */ + if (error == 0) + error = copyout(&vp.p, uap->policy, sizeof(vp.p)); +out: + if (set) + cpuset_rel(set); + return (error); +} diff --git a/sys/kern/kern_thr.c b/sys/kern/kern_thr.c index e6b0a59..7e7b88f 100644 --- a/sys/kern/kern_thr.c +++ b/sys/kern/kern_thr.c @@ -54,6 +54,8 @@ __FBSDID("$FreeBSD$"); #include <sys/umtx.h> #include <sys/limits.h> +#include <vm/vm_domain.h> + #include <machine/frame.h> #include <security/audit/audit.h> @@ -254,6 +256,13 @@ create_thread(struct thread *td, mcontext_t *ctx, thread_unlock(td); if (P_SHOULDSTOP(p)) newtd->td_flags |= TDF_ASTPENDING | TDF_NEEDSUSPCHK; + + /* + * Copy the existing thread VM policy into the new thread. + */ + vm_domain_policy_localcopy(&newtd->td_vm_dom_policy, + &td->td_vm_dom_policy); + PROC_UNLOCK(p); tidhash_add(newtd); diff --git a/sys/kern/kern_thread.c b/sys/kern/kern_thread.c index 4343b64..0f65403 100644 --- a/sys/kern/kern_thread.c +++ b/sys/kern/kern_thread.c @@ -60,6 +60,7 @@ __FBSDID("$FreeBSD$"); #include <vm/vm.h> #include <vm/vm_extern.h> #include <vm/uma.h> +#include <vm/vm_domain.h> #include <sys/eventhandler.h> SDT_PROVIDER_DECLARE(proc); @@ -351,6 +352,7 @@ thread_alloc(int pages) return (NULL); } cpu_thread_alloc(td); + vm_domain_policy_init(&td->td_vm_dom_policy); return (td); } @@ -380,6 +382,7 @@ thread_free(struct thread *td) cpu_thread_free(td); if (td->td_kstack != 0) vm_thread_dispose(td); + vm_domain_policy_cleanup(&td->td_vm_dom_policy); uma_zfree(thread_zone, td); } diff --git a/sys/sys/_vm_domain.h b/sys/sys/_vm_domain.h new file mode 100644 index 0000000..36d107a --- /dev/null +++ b/sys/sys/_vm_domain.h @@ -0,0 +1,61 @@ +/*- + * Copyright (c) 2015 Adrian Chadd <adrian@FreeBSD.org>. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * similar to the "NO WARRANTY" disclaimer below ("Disclaimer") and any + * redistribution must be conditioned upon including a substantially + * similar Disclaimer requirement for further binary redistribution. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTIBILITY + * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, + * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGES. + * + * $FreeBSD$ + */ +#ifndef __SYS_VM_DOMAIN_H__ +#define __SYS_VM_DOMAIN_H__ + +#include <sys/seq.h> + +typedef enum { + VM_POLICY_NONE, + VM_POLICY_ROUND_ROBIN, + VM_POLICY_FIXED_DOMAIN, + VM_POLICY_FIXED_DOMAIN_ROUND_ROBIN, + VM_POLICY_FIRST_TOUCH, + VM_POLICY_FIRST_TOUCH_ROUND_ROBIN, + VM_POLICY_MAX +} vm_domain_policy_type_t; + +struct vm_domain_policy_entry { + vm_domain_policy_type_t policy; + int domain; +}; + +struct vm_domain_policy { + seq_t seq; + struct vm_domain_policy_entry p; +}; + +#define VM_DOMAIN_POLICY_STATIC_INITIALISER(vt, vd) \ + { .seq = 0, \ + .p.policy = vt, \ + .p.domain = vd } + +#endif /* __SYS_VM_DOMAIN_H__ */ diff --git a/sys/sys/numa.h b/sys/sys/numa.h new file mode 100644 index 0000000..982f9e7 --- /dev/null +++ b/sys/sys/numa.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2015 Adrian Chadd <adrian@FreeBSD.org>. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ +#ifndef __SYS_NUMA_H__ +#define __SYS_NUMA_H__ + +#include <sys/_vm_domain.h> + +extern int numa_setaffinity(cpuwhich_t which, id_t id, + struct vm_domain_policy_entry *vd); +extern int numa_getaffinity(cpuwhich_t which, id_t id, + struct vm_domain_policy_entry *vd); + +#endif /* __SYS_NUMA_H__ */ diff --git a/sys/sys/proc.h b/sys/sys/proc.h index e6c83b4..95a4e041 100644 --- a/sys/sys/proc.h +++ b/sys/sys/proc.h @@ -63,6 +63,7 @@ #endif #include <sys/ucontext.h> #include <sys/ucred.h> +#include <sys/_vm_domain.h> #include <machine/proc.h> /* Machine-dependent proc substruct. */ /* @@ -217,6 +218,7 @@ struct thread { struct turnstile *td_turnstile; /* (k) Associated turnstile. */ struct rl_q_entry *td_rlqe; /* (k) Associated range lock entry. */ struct umtx_q *td_umtxq; /* (c?) Link for when we're blocked. */ + struct vm_domain_policy td_vm_dom_policy; /* (c) current numa domain policy */ lwpid_t td_tid; /* (b) Thread ID. */ sigqueue_t td_sigqueue; /* (c) Sigs arrived, not delivered. */ #define td_siglist td_sigqueue.sq_signals @@ -606,6 +608,7 @@ struct proc { uint64_t p_prev_runtime; /* (c) Resource usage accounting. */ struct racct *p_racct; /* (b) Resource accounting. */ u_char p_throttled; /* (c) Flag for racct pcpu throttling */ + struct vm_domain_policy p_vm_dom_policy; /* (c) process default VM domain, or -1 */ /* * An orphan is the child that has beed re-parented to the * debugger as a result of attaching to it. Need to keep diff --git a/sys/vm/vm_domain.c b/sys/vm/vm_domain.c new file mode 100644 index 0000000..83814d5 --- /dev/null +++ b/sys/vm/vm_domain.c @@ -0,0 +1,374 @@ +/*- + * Copyright (c) 2015 Adrian Chadd <adrian@FreeBSD.org>. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * similar to the "NO WARRANTY" disclaimer below ("Disclaimer") and any + * redistribution must be conditioned upon including a substantially + * similar Disclaimer requirement for further binary redistribution. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTIBILITY + * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, + * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGES. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include "opt_vm.h" +#include "opt_ddb.h" + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/lock.h> +#include <sys/kernel.h> +#include <sys/malloc.h> +#include <sys/mutex.h> +#if MAXMEMDOM > 1 +#include <sys/proc.h> +#endif +#include <sys/queue.h> +#include <sys/rwlock.h> +#include <sys/sbuf.h> +#include <sys/sysctl.h> +#include <sys/tree.h> +#include <sys/vmmeter.h> +#include <sys/seq.h> + +#include <ddb/ddb.h> + +#include <vm/vm.h> +#include <vm/vm_param.h> +#include <vm/vm_kern.h> +#include <vm/vm_object.h> +#include <vm/vm_page.h> +#include <vm/vm_phys.h> + +#include <vm/vm_domain.h> + +static __inline int +vm_domain_rr_selectdomain(void) +{ +#if MAXMEMDOM > 1 + struct thread *td; + + td = curthread; + + td->td_dom_rr_idx++; + td->td_dom_rr_idx %= vm_ndomains; + return (td->td_dom_rr_idx); +#else + return (0); +#endif +} + +/* + * This implements a very simple set of VM domain memory allocation + * policies and iterators. + */ + +/* + * A VM domain policy represents a desired VM domain policy. + * Iterators implement searching through VM domains in a specific + * order. + */ + +/* + * When setting a policy, the caller must establish their own + * exclusive write protection for the contents of the domain + * policy. + */ +int +vm_domain_policy_init(struct vm_domain_policy *vp) +{ + + bzero(vp, sizeof(*vp)); + vp->p.policy = VM_POLICY_NONE; + vp->p.domain = -1; + return (0); +} + +int +vm_domain_policy_set(struct vm_domain_policy *vp, + vm_domain_policy_type_t vt, int domain) +{ + + seq_write_begin(&vp->seq); + vp->p.policy = vt; + vp->p.domain = domain; + seq_write_end(&vp->seq); + return (0); +} + +/* + * Take a local copy of a policy. + * + * The destination policy isn't write-barriered; this is used + * for doing local copies into something that isn't shared. + */ +void +vm_domain_policy_localcopy(struct vm_domain_policy *dst, + const struct vm_domain_policy *src) +{ + seq_t seq; + + for (;;) { + seq = seq_read(&src->seq); + *dst = *src; + if (seq_consistent(&src->seq, seq)) + return; + cpu_spinwait(); + } +} + +/* + * Take a write-barrier copy of a policy. + * + * The destination policy is write -barriered; this is used + * for doing copies into policies that may be read by other + * threads. + */ +void +vm_domain_policy_copy(struct vm_domain_policy *dst, + const struct vm_domain_policy *src) +{ + seq_t seq; + struct vm_domain_policy d; + + for (;;) { + seq = seq_read(&src->seq); + d = *src; + if (seq_consistent(&src->seq, seq)) { + seq_write_begin(&dst->seq); + dst->p.domain = d.p.domain; + dst->p.policy = d.p.policy; + seq_write_end(&dst->seq); + return; + } + cpu_spinwait(); + } +} + +int +vm_domain_policy_validate(const struct vm_domain_policy *vp) +{ + + switch (vp->p.policy) { + case VM_POLICY_NONE: + case VM_POLICY_ROUND_ROBIN: + case VM_POLICY_FIRST_TOUCH: + case VM_POLICY_FIRST_TOUCH_ROUND_ROBIN: + if (vp->p.domain == -1) + return (0); + return (-1); + case VM_POLICY_FIXED_DOMAIN: + case VM_POLICY_FIXED_DOMAIN_ROUND_ROBIN: + if (vp->p.domain >= 0 && vp->p.domain < vm_ndomains) + return (0); + return (-1); + default: + return (-1); + } + return (-1); +} + +int +vm_domain_policy_cleanup(struct vm_domain_policy *vp) +{ + + /* For now, empty */ + return (0); +} + +int +vm_domain_iterator_init(struct vm_domain_iterator *vi) +{ + + /* Nothing to do for now */ + return (0); +} + +/* + * Manually setup an iterator with the given details. + */ +int +vm_domain_iterator_set(struct vm_domain_iterator *vi, + vm_domain_policy_type_t vt, int domain) +{ + + switch (vt) { + case VM_POLICY_FIXED_DOMAIN: + vi->policy = VM_POLICY_FIXED_DOMAIN; + vi->domain = domain; + vi->n = 1; + break; + case VM_POLICY_FIXED_DOMAIN_ROUND_ROBIN: + vi->policy = VM_POLICY_FIXED_DOMAIN_ROUND_ROBIN; + vi->domain = domain; + vi->n = vm_ndomains; + break; + case VM_POLICY_FIRST_TOUCH: + vi->policy = VM_POLICY_FIRST_TOUCH; + vi->domain = PCPU_GET(domain); + vi->n = 1; + break; + case VM_POLICY_FIRST_TOUCH_ROUND_ROBIN: + vi->policy = VM_POLICY_FIRST_TOUCH_ROUND_ROBIN; + vi->domain = PCPU_GET(domain); + vi->n = vm_ndomains; + break; + case VM_POLICY_ROUND_ROBIN: + default: + vi->policy = VM_POLICY_ROUND_ROBIN; + vi->domain = -1; + vi->n = vm_ndomains; + break; + } + return (0); +} + +/* + * Setup an iterator based on the given policy. + */ +static inline void +_vm_domain_iterator_set_policy(struct vm_domain_iterator *vi, + const struct vm_domain_policy *vt) +{ + /* + * Initialise the iterator. + * + * For first-touch, the initial domain is set + * via the current thread CPU domain. + * + * For fixed-domain, it's assumed that the + * caller has initialised the specific domain + * it is after. + */ + switch (vt->p.policy) { + case VM_POLICY_FIXED_DOMAIN: + vi->policy = vt->p.policy; + vi->domain = vt->p.domain; + vi->n = 1; + break; + case VM_POLICY_FIXED_DOMAIN_ROUND_ROBIN: + vi->policy = vt->p.policy; + vi->domain = vt->p.domain; + vi->n = vm_ndomains; + break; + case VM_POLICY_FIRST_TOUCH: + vi->policy = vt->p.policy; + vi->domain = PCPU_GET(domain); + vi->n = 1; + break; + case VM_POLICY_FIRST_TOUCH_ROUND_ROBIN: + vi->policy = vt->p.policy; + vi->domain = PCPU_GET(domain); + vi->n = vm_ndomains; + break; + case VM_POLICY_ROUND_ROBIN: + default: + /* + * Default to round-robin policy. + */ + vi->policy = VM_POLICY_ROUND_ROBIN; + vi->domain = -1; + vi->n = vm_ndomains; + break; + } +} + +void +vm_domain_iterator_set_policy(struct vm_domain_iterator *vi, + const struct vm_domain_policy *vt) +{ + seq_t seq; + struct vm_domain_policy vt_lcl; + + for (;;) { + seq = seq_read(&vt->seq); + vt_lcl = *vt; + if (seq_consistent(&vt->seq, seq)) { + _vm_domain_iterator_set_policy(vi, &vt_lcl); + return; + } + cpu_spinwait(); + } +} + +/* + * Return the next VM domain to use. + * + * Returns 0 w/ domain set to the next domain to use, or + * -1 to indicate no more domains are available. + */ +int +vm_domain_iterator_run(struct vm_domain_iterator *vi, int *domain) +{ + + /* General catch-all */ + if (vi->n <= 0) + return (-1); + + switch (vi->policy) { + case VM_POLICY_FIXED_DOMAIN: + case VM_POLICY_FIRST_TOUCH: + *domain = vi->domain; + vi->n--; + break; + case VM_POLICY_FIXED_DOMAIN_ROUND_ROBIN: + case VM_POLICY_FIRST_TOUCH_ROUND_ROBIN: + /* + * XXX TODO: skip over the rr'ed domain + * if it equals the one we started with. + */ + if (vi->n == vm_ndomains) + *domain = vi->domain; + else + *domain = vm_domain_rr_selectdomain(); + vi->n--; + break; + case VM_POLICY_ROUND_ROBIN: + default: + *domain = vm_domain_rr_selectdomain(); + vi->n--; + break; + } + + return (0); +} + +/* + * Returns 1 if the iteration is done, or 0 if it has not. + + * This can only be called after at least one loop through + * the iterator. Ie, it's designed to be used as a tail + * check of a loop, not the head check of a loop. + */ +int +vm_domain_iterator_isdone(struct vm_domain_iterator *vi) +{ + + return (vi->n <= 0); +} + +int +vm_domain_iterator_cleanup(struct vm_domain_iterator *vi) +{ + + return (0); +} diff --git a/sys/vm/vm_domain.h b/sys/vm/vm_domain.h new file mode 100644 index 0000000..7d9e07c --- /dev/null +++ b/sys/vm/vm_domain.h @@ -0,0 +1,66 @@ +/*- + * Copyright (c) 2015 Adrian Chadd <adrian@FreeBSD.org>. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * similar to the "NO WARRANTY" disclaimer below ("Disclaimer") and any + * redistribution must be conditioned upon including a substantially + * similar Disclaimer requirement for further binary redistribution. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTIBILITY + * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, + * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGES. + * + * $FreeBSD$ + */ +#ifndef __VM_DOMAIN_H__ +#define __VM_DOMAIN_H__ + +#include <sys/_vm_domain.h> + +struct vm_domain_iterator { + vm_domain_policy_type_t policy; + int domain; + int n; +}; + +/* + * TODO: check to see if these should just become inline functions + * at some point. + */ +extern int vm_domain_policy_init(struct vm_domain_policy *vp); +extern int vm_domain_policy_set(struct vm_domain_policy *vp, + vm_domain_policy_type_t vt, int domain); +extern int vm_domain_policy_cleanup(struct vm_domain_policy *vp); +extern void vm_domain_policy_localcopy(struct vm_domain_policy *dst, + const struct vm_domain_policy *src); +extern void vm_domain_policy_copy(struct vm_domain_policy *dst, + const struct vm_domain_policy *src); +extern int vm_domain_policy_validate(const struct vm_domain_policy *vp); + +extern int vm_domain_iterator_init(struct vm_domain_iterator *vi); +extern int vm_domain_iterator_set(struct vm_domain_iterator *vi, + vm_domain_policy_type_t vt, int domain); +extern void vm_domain_iterator_set_policy(struct vm_domain_iterator *vi, + const struct vm_domain_policy *vt); +extern int vm_domain_iterator_run(struct vm_domain_iterator *vi, + int *domain); +extern int vm_domain_iterator_isdone(struct vm_domain_iterator *vi); +extern int vm_domain_iterator_cleanup(struct vm_domain_iterator *vi); + +#endif /* __VM_DOMAIN_H__ */ diff --git a/sys/vm/vm_phys.c b/sys/vm/vm_phys.c index 71fadd7..d26b8b5 100644 --- a/sys/vm/vm_phys.c +++ b/sys/vm/vm_phys.c @@ -57,6 +57,7 @@ __FBSDID("$FreeBSD$"); #include <sys/sysctl.h> #include <sys/tree.h> #include <sys/vmmeter.h> +#include <sys/seq.h> #include <ddb/ddb.h> @@ -67,6 +68,8 @@ __FBSDID("$FreeBSD$"); #include <vm/vm_page.h> #include <vm/vm_phys.h> +#include <vm/vm_domain.h> + _Static_assert(sizeof(long) * NBBY >= VM_PHYSSEG_MAX, "Too many physsegs."); @@ -141,13 +144,30 @@ static int sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS); SYSCTL_OID(_vm, OID_AUTO, phys_segs, CTLTYPE_STRING | CTLFLAG_RD, NULL, 0, sysctl_vm_phys_segs, "A", "Phys Seg Info"); +#if MAXMEMDOM > 1 static int sysctl_vm_phys_locality(SYSCTL_HANDLER_ARGS); SYSCTL_OID(_vm, OID_AUTO, phys_locality, CTLTYPE_STRING | CTLFLAG_RD, NULL, 0, sysctl_vm_phys_locality, "A", "Phys Locality Info"); +#endif SYSCTL_INT(_vm, OID_AUTO, ndomains, CTLFLAG_RD, &vm_ndomains, 0, "Number of physical memory domains available."); +/* + * Default to first-touch + round-robin. + */ +static struct mtx vm_default_policy_mtx; +MTX_SYSINIT(vm_default_policy, &vm_default_policy_mtx, "default policy mutex", + MTX_DEF); +#if MAXMEMDOM > 1 +static struct vm_domain_policy vm_default_policy = + VM_DOMAIN_POLICY_STATIC_INITIALISER(VM_POLICY_FIRST_TOUCH_ROUND_ROBIN, 0); +#else +/* Use round-robin so the domain policy code will only try once per allocation */ +static struct vm_domain_policy vm_default_policy = + VM_DOMAIN_POLICY_STATIC_INITIALISER(VM_POLICY_ROUND_ROBIN, 0); +#endif + static vm_page_t vm_phys_alloc_domain_pages(int domain, int flind, int pool, int order); static void _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int domain); @@ -156,6 +176,60 @@ static int vm_phys_paddr_to_segind(vm_paddr_t pa); static void vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, int order); +static int +sysctl_vm_default_policy(SYSCTL_HANDLER_ARGS) +{ + char policy_name[32]; + int error; + + mtx_lock(&vm_default_policy_mtx); + + /* Map policy to output string */ + switch (vm_default_policy.p.policy) { + case VM_POLICY_FIRST_TOUCH: + strcpy(policy_name, "first-touch"); + break; + case VM_POLICY_FIRST_TOUCH_ROUND_ROBIN: + strcpy(policy_name, "first-touch-rr"); + break; + case VM_POLICY_ROUND_ROBIN: + default: + strcpy(policy_name, "rr"); + break; + } + mtx_unlock(&vm_default_policy_mtx); + + error = sysctl_handle_string(oidp, &policy_name[0], + sizeof(policy_name), req); + if (error != 0 || req->newptr == NULL) + return (error); + + mtx_lock(&vm_default_policy_mtx); + /* Set: match on the subset of policies that make sense as a default */ + if (strcmp("first-touch-rr", policy_name) == 0) { + vm_domain_policy_set(&vm_default_policy, + VM_POLICY_FIRST_TOUCH_ROUND_ROBIN, 0); + } else if (strcmp("first-touch", policy_name) == 0) { + vm_domain_policy_set(&vm_default_policy, + VM_POLICY_FIRST_TOUCH, 0); + } else if (strcmp("rr", policy_name) == 0) { + vm_domain_policy_set(&vm_default_policy, + VM_POLICY_ROUND_ROBIN, 0); + } else { + error = EINVAL; + goto finish; + } + + error = 0; +finish: + mtx_unlock(&vm_default_policy_mtx); + return (error); +} + +SYSCTL_PROC(_vm, OID_AUTO, default_policy, CTLTYPE_STRING | CTLFLAG_RW, + 0, 0, sysctl_vm_default_policy, "A", + "Default policy (rr, first-touch, first-touch-rr"); + /* * Red-black tree helpers for vm fictitious range management. */ @@ -213,6 +287,53 @@ vm_rr_selectdomain(void) #endif } +/* + * Initialise a VM domain iterator. + * + * Check the thread policy, then the proc policy, + * then default to the system policy. + * + * Later on the various layers will have this logic + * plumbed into them and the phys code will be explicitly + * handed a VM domain policy to use. + */ +static void +vm_policy_iterator_init(struct vm_domain_iterator *vi) +{ +#if MAXMEMDOM > 1 + struct vm_domain_policy lcl; +#endif + + vm_domain_iterator_init(vi); + +#if MAXMEMDOM > 1 + /* Copy out the thread policy */ + vm_domain_policy_localcopy(&lcl, &curthread->td_vm_dom_policy); + if (lcl.p.policy != VM_POLICY_NONE) { + /* Thread policy is present; use it */ + vm_domain_iterator_set_policy(vi, &lcl); + return; + } + + vm_domain_policy_localcopy(&lcl, + &curthread->td_proc->p_vm_dom_policy); + if (lcl.p.policy != VM_POLICY_NONE) { + /* Process policy is present; use it */ + vm_domain_iterator_set_policy(vi, &lcl); + return; + } +#endif + /* Use system default policy */ + vm_domain_iterator_set_policy(vi, &vm_default_policy); +} + +static void +vm_policy_iterator_finish(struct vm_domain_iterator *vi) +{ + + vm_domain_iterator_cleanup(vi); +} + boolean_t vm_phys_domain_intersects(long mask, vm_paddr_t low, vm_paddr_t high) { @@ -305,17 +426,22 @@ sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS) /* * Return affinity, or -1 if there's no affinity information. */ -static int +int vm_phys_mem_affinity(int f, int t) { +#if MAXMEMDOM > 1 if (mem_locality == NULL) return (-1); if (f >= vm_ndomains || t >= vm_ndomains) return (-1); return (mem_locality[f * vm_ndomains + t]); +#else + return (-1); +#endif } +#if MAXMEMDOM > 1 /* * Outputs the VM locality table. */ @@ -343,6 +469,7 @@ sysctl_vm_phys_locality(SYSCTL_HANDLER_ARGS) sbuf_delete(&sbuf); return (error); } +#endif static void vm_freelist_add(struct vm_freelist *fl, vm_page_t m, int order, int tail) @@ -634,15 +761,17 @@ vm_page_t vm_phys_alloc_pages(int pool, int order) { vm_page_t m; - int dom, domain, flind; + int domain, flind; + struct vm_domain_iterator vi; KASSERT(pool < VM_NFREEPOOL, ("vm_phys_alloc_pages: pool %d is out of range", pool)); KASSERT(order < VM_NFREEORDER, ("vm_phys_alloc_pages: order %d is out of range", order)); - for (dom = 0; dom < vm_ndomains; dom++) { - domain = vm_rr_selectdomain(); + vm_policy_iterator_init(&vi); + + while ((vm_domain_iterator_run(&vi, &domain)) == 0) { for (flind = 0; flind < vm_nfreelists; flind++) { m = vm_phys_alloc_domain_pages(domain, flind, pool, order); @@ -650,6 +779,8 @@ vm_phys_alloc_pages(int pool, int order) return (m); } } + + vm_policy_iterator_finish(&vi); return (NULL); } @@ -664,7 +795,8 @@ vm_page_t vm_phys_alloc_freelist_pages(int freelist, int pool, int order) { vm_page_t m; - int dom, domain; + struct vm_domain_iterator vi; + int domain; KASSERT(freelist < VM_NFREELIST, ("vm_phys_alloc_freelist_pages: freelist %d is out of range", @@ -673,13 +805,17 @@ vm_phys_alloc_freelist_pages(int freelist, int pool, int order) ("vm_phys_alloc_freelist_pages: pool %d is out of range", pool)); KASSERT(order < VM_NFREEORDER, ("vm_phys_alloc_freelist_pages: order %d is out of range", order)); - for (dom = 0; dom < vm_ndomains; dom++) { - domain = vm_rr_selectdomain(); + + vm_policy_iterator_init(&vi); + + while ((vm_domain_iterator_run(&vi, &domain)) == 0) { m = vm_phys_alloc_domain_pages(domain, vm_freelist_to_flind[freelist], pool, order); if (m != NULL) return (m); } + + vm_policy_iterator_finish(&vi); return (NULL); } @@ -1169,7 +1305,8 @@ vm_phys_alloc_contig(u_long npages, vm_paddr_t low, vm_paddr_t high, vm_paddr_t pa, pa_last, size; vm_page_t m, m_ret; u_long npages_end; - int dom, domain, flind, oind, order, pind; + int domain, flind, oind, order, pind; + struct vm_domain_iterator vi; mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); size = npages << PAGE_SHIFT; @@ -1181,9 +1318,15 @@ vm_phys_alloc_contig(u_long npages, vm_paddr_t low, vm_paddr_t high, ("vm_phys_alloc_contig: boundary must be a power of 2")); /* Compute the queue that is the best fit for npages. */ for (order = 0; (1 << order) < npages; order++); - dom = 0; + + vm_policy_iterator_init(&vi); + restartdom: - domain = vm_rr_selectdomain(); + if (vm_domain_iterator_run(&vi, &domain) != 0) { + vm_policy_iterator_finish(&vi); + return (NULL); + } + for (flind = 0; flind < vm_nfreelists; flind++) { for (oind = min(order, VM_NFREEORDER - 1); oind < VM_NFREEORDER; oind++) { for (pind = 0; pind < VM_NFREEPOOL; pind++) { @@ -1241,8 +1384,9 @@ restartdom: } } } - if (++dom < vm_ndomains) + if (!vm_domain_iterator_isdone(&vi)) goto restartdom; + vm_policy_iterator_finish(&vi); return (NULL); done: for (m = m_ret; m < &m_ret[npages]; m = &m[1 << oind]) { diff --git a/sys/vm/vm_phys.h b/sys/vm/vm_phys.h index 575b93c..37864db 100644 --- a/sys/vm/vm_phys.h +++ b/sys/vm/vm_phys.h @@ -87,6 +87,7 @@ vm_page_t vm_phys_paddr_to_vm_page(vm_paddr_t pa); void vm_phys_set_pool(int pool, vm_page_t m, int order); boolean_t vm_phys_unfree_page(vm_page_t m); boolean_t vm_phys_zero_pages_idle(void); +int vm_phys_mem_affinity(int f, int t); /* * vm_phys_domain: |