summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--UPDATING27
-rw-r--r--bin/sh/eval.c3
-rw-r--r--contrib/groff/tmac/doc-common12
-rw-r--r--contrib/groff/tmac/doc-syms4
-rw-r--r--contrib/groff/tmac/doc.tmac20
-rw-r--r--etc/network.subr28
-rw-r--r--gnu/usr.bin/gdb/kgdb/kthr.c18
-rw-r--r--gnu/usr.bin/groff/tmac/mdoc.local3
-rw-r--r--lib/libiconv/Makefile1
-rw-r--r--lib/libkvm/kvm_pcpu.c31
-rw-r--r--lib/libmemstat/memstat_uma.c15
-rw-r--r--libexec/comsat/comsat.c2
-rw-r--r--release/ia64/mkisoimages.sh18
-rw-r--r--sbin/geom/class/part/geom_part.c11
-rw-r--r--sbin/geom/class/part/gpart.874
-rw-r--r--sbin/geom/class/sched/Makefile2
-rw-r--r--sbin/ifconfig/af_inet6.c4
-rw-r--r--sbin/ifconfig/af_nd6.c3
-rw-r--r--sbin/ipfw/main.c5
-rw-r--r--sbin/mount/mount.813
-rw-r--r--sbin/mount/mount.c16
-rw-r--r--sbin/rcorder/rcorder.88
-rw-r--r--share/man/man4/amdsbwd.414
-rw-r--r--share/man/man4/atkbd.47
-rw-r--r--share/man/man5/fstab.538
-rw-r--r--share/misc/committers-ports.dot4
-rw-r--r--sys/amd64/acpica/acpi_wakeup.c21
-rw-r--r--sys/amd64/amd64/intr_machdep.c10
-rw-r--r--sys/amd64/amd64/machdep.c7
-rw-r--r--sys/amd64/amd64/mp_machdep.c342
-rw-r--r--sys/amd64/amd64/pmap.c84
-rw-r--r--sys/amd64/amd64/vm_machdep.c21
-rw-r--r--sys/amd64/include/_types.h1
-rw-r--r--sys/amd64/include/pmap.h3
-rw-r--r--sys/amd64/include/smp.h9
-rw-r--r--sys/arm/arm/pmap.c4
-rw-r--r--sys/arm/include/_types.h1
-rw-r--r--sys/arm/include/pmap.h3
-rw-r--r--sys/boot/ia64/common/Makefile2
-rw-r--r--sys/boot/ia64/common/exec.c2
-rw-r--r--sys/boot/ia64/common/icache.c51
-rw-r--r--sys/boot/ia64/common/libia64.h1
-rw-r--r--sys/boot/ia64/efi/efimd.c32
-rw-r--r--sys/boot/ia64/efi/main.c37
-rw-r--r--sys/boot/ia64/efi/version2
-rw-r--r--sys/cddl/compat/opensolaris/sys/atomic.h2
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_history.c2
-rw-r--r--sys/cddl/dev/cyclic/i386/cyclic_machdep.c4
-rw-r--r--sys/cddl/dev/dtrace/amd64/dtrace_subr.c9
-rw-r--r--sys/cddl/dev/dtrace/i386/dtrace_subr.c10
-rw-r--r--sys/conf/NOTES7
-rw-r--r--sys/conf/files2
-rw-r--r--sys/conf/options1
-rw-r--r--sys/ddb/db_command.c24
-rw-r--r--sys/dev/amdsbwd/amdsbwd.c184
-rw-r--r--sys/dev/ath/ath_hal/ah.h14
-rw-r--r--sys/dev/ath/ath_hal/ar5212/ar5212.h3
-rw-r--r--sys/dev/ath/ath_hal/ar5212/ar5212_attach.c1
-rw-r--r--sys/dev/ath/ath_hal/ar5212/ar5212_misc.c46
-rw-r--r--sys/dev/ath/ath_hal/ar5416/ar5416.h3
-rw-r--r--sys/dev/ath/ath_hal/ar5416/ar5416_attach.c1
-rw-r--r--sys/dev/ath/ath_hal/ar5416/ar5416_misc.c16
-rw-r--r--sys/dev/ath/if_athvar.h2
-rw-r--r--sys/dev/atkbdc/atkbd.c6
-rw-r--r--sys/dev/atkbdc/atkbdreg.h1
-rw-r--r--sys/dev/cardbus/cardbus_cis.c5
-rw-r--r--sys/dev/hwpmc/hwpmc_mod.c8
-rw-r--r--sys/dev/pccard/pccard.c8
-rw-r--r--sys/dev/pci/pci.c11
-rw-r--r--sys/dev/puc/pucdata.c10
-rw-r--r--sys/dev/sound/pcm/sound.c54
-rw-r--r--sys/dev/usb/usb_device.h2
-rw-r--r--sys/dev/usb/usb_freebsd.h1
-rw-r--r--sys/dev/usb/usb_generic.c6
-rw-r--r--sys/dev/usb/usb_hub.c27
-rw-r--r--sys/dev/usb/usb_request.c100
-rw-r--r--sys/dev/usb/usb_request.h4
-rw-r--r--sys/dev/usb/usb_transfer.c5
-rw-r--r--sys/dev/usb/usbdi.h1
-rw-r--r--sys/dev/xen/control/control.c21
-rw-r--r--sys/fs/nfs/nfs_var.h19
-rw-r--r--sys/fs/nfsclient/nfs_clport.c66
-rw-r--r--sys/fs/nfsclient/nfs_clrpcops.c18
-rw-r--r--sys/fs/nfsclient/nfs_clstate.c32
-rw-r--r--sys/fs/nfsclient/nfs_clvnops.c12
-rw-r--r--sys/geom/eli/g_eli.c2
-rw-r--r--sys/i386/i386/intr_machdep.c10
-rw-r--r--sys/i386/i386/machdep.c7
-rw-r--r--sys/i386/i386/mp_machdep.c332
-rw-r--r--sys/i386/i386/pmap.c118
-rw-r--r--sys/i386/i386/vm_machdep.c36
-rw-r--r--sys/i386/include/_types.h1
-rw-r--r--sys/i386/include/pmap.h3
-rw-r--r--sys/i386/include/sf_buf.h3
-rw-r--r--sys/i386/include/smp.h9
-rw-r--r--sys/i386/xen/mp_machdep.c117
-rw-r--r--sys/i386/xen/pmap.c81
-rw-r--r--sys/ia64/acpica/acpi_machdep.c7
-rw-r--r--sys/ia64/ia64/machdep.c35
-rw-r--r--sys/ia64/ia64/mp_machdep.c21
-rw-r--r--sys/ia64/ia64/pal.S25
-rw-r--r--sys/ia64/include/_types.h1
-rw-r--r--sys/ia64/include/smp.h4
-rw-r--r--sys/kern/kern_cpuset.c88
-rw-r--r--sys/kern/kern_ktr.c61
-rw-r--r--sys/kern/kern_pmc.c6
-rw-r--r--sys/kern/kern_rmlock.c15
-rw-r--r--sys/kern/kern_shutdown.c30
-rw-r--r--sys/kern/ksched.c2
-rw-r--r--sys/kern/sched_4bsd.c66
-rw-r--r--sys/kern/sched_ule.c9
-rw-r--r--sys/kern/subr_kdb.c3
-rw-r--r--sys/kern/subr_pcpu.c2
-rw-r--r--sys/kern/subr_prf.c3
-rw-r--r--sys/kern/subr_rman.c1
-rw-r--r--sys/kern/subr_smp.c87
-rw-r--r--sys/kern/uipc_syscalls.c4
-rw-r--r--sys/mips/cavium/octeon_mp.c14
-rw-r--r--sys/mips/include/_types.h1
-rw-r--r--sys/mips/include/hwfunc.h4
-rw-r--r--sys/mips/include/pmap.h3
-rw-r--r--sys/mips/include/smp.h4
-rw-r--r--sys/mips/mips/mp_machdep.c62
-rw-r--r--sys/mips/mips/pmap.c36
-rw-r--r--sys/mips/rmi/xlr_machdep.c10
-rw-r--r--sys/mips/sibyte/sb_scd.c11
-rw-r--r--sys/net/bridgestp.c5
-rw-r--r--sys/net/bridgestp.h1
-rw-r--r--sys/net/if_stf.c50
-rw-r--r--sys/netgraph/ng_nat.c20
-rw-r--r--sys/netinet/in_pcb.c229
-rw-r--r--sys/netinet/in_pcb.h87
-rw-r--r--sys/netinet/in_pcbgroup.c457
-rw-r--r--sys/netinet/ip_divert.c3
-rw-r--r--sys/netinet/ip_input.c2
-rw-r--r--sys/netinet/ip_ipsec.c2
-rw-r--r--sys/netinet/ipfw/ip_fw2.c4
-rw-r--r--sys/netinet/ipfw/ip_fw_nat.c22
-rw-r--r--sys/netinet/ipfw/ip_fw_sockopt.c7
-rw-r--r--sys/netinet/libalias/alias_sctp.h8
-rw-r--r--sys/netinet/raw_ip.c3
-rw-r--r--sys/netinet/tcp_subr.c3
-rw-r--r--sys/netinet/tcp_syncache.c9
-rw-r--r--sys/netinet/udp_usrreq.c3
-rw-r--r--sys/netinet6/in6_pcb.c167
-rw-r--r--sys/netinet6/in6_pcb.h10
-rw-r--r--sys/netinet6/in6_pcbgroup.c103
-rw-r--r--sys/netinet6/ip6_ipsec.c12
-rw-r--r--sys/ofed/include/linux/list.h1
-rw-r--r--sys/pc98/pc98/machdep.c7
-rw-r--r--sys/powerpc/aim/mmu_oea.c13
-rw-r--r--sys/powerpc/aim/mmu_oea64.c13
-rw-r--r--sys/powerpc/booke/platform_bare.c4
-rw-r--r--sys/powerpc/booke/pmap.c11
-rw-r--r--sys/powerpc/include/_types.h1
-rw-r--r--sys/powerpc/include/openpicvar.h2
-rw-r--r--sys/powerpc/include/pmap.h5
-rw-r--r--sys/powerpc/include/smp.h4
-rw-r--r--sys/powerpc/mpc85xx/openpic_fdt.c3
-rw-r--r--sys/powerpc/powerpc/intr_machdep.c7
-rw-r--r--sys/powerpc/powerpc/mp_machdep.c26
-rw-r--r--sys/powerpc/powerpc/openpic.c9
-rw-r--r--sys/powerpc/powerpc/pic_if.m3
-rw-r--r--sys/sparc64/include/_types.h1
-rw-r--r--sys/sparc64/include/ktr.h42
-rw-r--r--sys/sparc64/include/pmap.h3
-rw-r--r--sys/sparc64/include/smp.h47
-rw-r--r--sys/sparc64/sparc64/exception.S99
-rw-r--r--sys/sparc64/sparc64/genassym.c4
-rw-r--r--sys/sparc64/sparc64/intr_machdep.c10
-rw-r--r--sys/sparc64/sparc64/mp_exception.S34
-rw-r--r--sys/sparc64/sparc64/mp_locore.S8
-rw-r--r--sys/sparc64/sparc64/mp_machdep.c105
-rw-r--r--sys/sparc64/sparc64/pmap.c8
-rw-r--r--sys/sparc64/sparc64/swtch.S40
-rw-r--r--sys/sparc64/sparc64/tlb.c6
-rw-r--r--sys/sys/_cpuset.h52
-rw-r--r--sys/sys/_rmlock.h2
-rw-r--r--sys/sys/conf.h1
-rw-r--r--sys/sys/cpuset.h61
-rw-r--r--sys/sys/ktr.h5
-rw-r--r--sys/sys/pcpu.h15
-rw-r--r--sys/sys/pmckern.h4
-rw-r--r--sys/sys/smp.h26
-rw-r--r--sys/sys/soundcard.h3
-rw-r--r--sys/sys/types.h1
-rw-r--r--sys/ufs/ffs/ffs_alloc.c6
-rw-r--r--sys/x86/x86/local_apic.c2
-rw-r--r--tools/regression/bin/sh/expansion/heredoc1.025
-rw-r--r--tools/regression/bin/sh/expansion/heredoc2.015
-rw-r--r--usr.bin/calendar/calendars/calendar.freebsd1
-rw-r--r--usr.bin/calendar/io.c2
-rw-r--r--usr.bin/grep/Makefile2
-rw-r--r--usr.bin/iconv/Makefile2
-rw-r--r--usr.bin/kdump/mksubr2
-rw-r--r--usr.bin/rctl/Makefile2
-rw-r--r--usr.bin/su/su.116
-rw-r--r--usr.sbin/bluetooth/ath3kfw/Makefile1
-rwxr-xr-xusr.sbin/bsdinstall/scripts/netconfig_ipv61
-rw-r--r--usr.sbin/bsnmpd/modules/snmp_wlan/Makefile2
-rw-r--r--usr.sbin/lastlogin/lastlogin.820
-rw-r--r--usr.sbin/lastlogin/lastlogin.c58
-rw-r--r--usr.sbin/pmccontrol/pmccontrol.c25
-rw-r--r--usr.sbin/route6d/route6d.c12
-rw-r--r--usr.sbin/rtadvd/advcap.c3
-rw-r--r--usr.sbin/rtadvd/config.c18
-rw-r--r--usr.sbin/rtadvd/dump.c2
-rw-r--r--usr.sbin/rtadvd/rtadvd.c6
-rw-r--r--usr.sbin/rtsold/rtsol.c2
-rw-r--r--usr.sbin/rtsold/rtsold.c3
-rw-r--r--usr.sbin/tcpdrop/tcpdrop.c3
211 files changed, 3648 insertions, 1572 deletions
diff --git a/UPDATING b/UPDATING
index 1ff9e8f..c8c3582 100644
--- a/UPDATING
+++ b/UPDATING
@@ -22,6 +22,33 @@ NOTE TO PEOPLE WHO THINK THAT FreeBSD 9.x IS SLOW:
machines to maximize performance. (To disable malloc debugging, run
ln -s aj /etc/malloc.conf.)
+20110608:
+ The following sysctls and tunables are retired on x86 platforms:
+ machdep.hlt_cpus
+ machdep.hlt_logical_cpus
+ The following sysctl is retired:
+ machdep.hyperthreading_allowed
+ The sysctls were supposed to provide a way to dynamically offline and
+ online selected CPUs on x86 platforms, but the implementation has not
+ been reliable especially with SCHED_ULE scheduler.
+ machdep.hyperthreading_allowed tunable is still available to ignore
+ hyperthreading CPUs at OS level.
+ Individual CPUs can be disabled using hint.lapic.X.disabled tunable,
+ where X is an APIC ID of a CPU. Be advised, though, that disabling
+ CPUs in non-uniform fashion will result in non-uniform topology and
+ may lead to sub-optimal system performance with SCHED_ULE, which is
+ a default scheduler.
+
+20110607:
+ cpumask_t type is retired and cpuset_t is used in order to describe
+ a mask of CPUs.
+
+20110531:
+ Changes to ifconfig(8) for dynamic address family detection mandate
+ that you are running a kernel of 20110525 or later. Make sure to
+ follow the update procedure to boot a new kernel before installing
+ world.
+
20110513:
Support for sun4v architecture is officially dropped
diff --git a/bin/sh/eval.c b/bin/sh/eval.c
index 404de33..585f91e 100644
--- a/bin/sh/eval.c
+++ b/bin/sh/eval.c
@@ -409,6 +409,7 @@ evalsubshell(union node *n, int flags)
struct job *jp;
int backgnd = (n->type == NBACKGND);
+ oexitstatus = exitstatus;
expredir(n->nredir.redirect);
if ((!backgnd && flags & EV_EXIT && !have_traps()) ||
forkshell(jp = makejob(n, 1), n, backgnd) == 0) {
@@ -436,6 +437,7 @@ evalredir(union node *n, int flags)
struct jmploc *savehandler;
volatile int in_redirect = 1;
+ oexitstatus = exitstatus;
expredir(n->nredir.redirect);
savehandler = handler;
if (setjmp(jmploc.loc)) {
@@ -478,7 +480,6 @@ expredir(union node *n)
for (redir = n ; redir ; redir = redir->nfile.next) {
struct arglist fn;
fn.lastp = &fn.list;
- oexitstatus = exitstatus;
switch (redir->type) {
case NFROM:
case NTO:
diff --git a/contrib/groff/tmac/doc-common b/contrib/groff/tmac/doc-common
index 26dcc56..20d0cab 100644
--- a/contrib/groff/tmac/doc-common
+++ b/contrib/groff/tmac/doc-common
@@ -543,6 +543,7 @@
.ds doc-operating-system-FreeBSD-7.3 7.3
.ds doc-operating-system-FreeBSD-8.0 8.0
.ds doc-operating-system-FreeBSD-8.1 8.1
+.ds doc-operating-system-FreeBSD-8.2 8.2
.
.ds doc-operating-system-Darwin-8.0.0 8.0.0
.ds doc-operating-system-Darwin-8.1.0 8.1.0
@@ -563,6 +564,17 @@
.ds doc-operating-system-Darwin-9.4.0 9.4.0
.ds doc-operating-system-Darwin-9.5.0 9.5.0
.ds doc-operating-system-Darwin-9.6.0 9.6.0
+.ds doc-operating-system-Darwin-9.7.0 9.7.0
+.ds doc-operating-system-Darwin-9.8.0 9.8.0
+.ds doc-operating-system-Darwin-10.6.0 10.6.0
+.ds doc-operating-system-Darwin-10.1.0 10.1.0
+.ds doc-operating-system-Darwin-10.2.0 10.2.0
+.ds doc-operating-system-Darwin-10.3.0 10.3.0
+.ds doc-operating-system-Darwin-10.4.0 10.4.0
+.ds doc-operating-system-Darwin-10.5.0 10.5.0
+.ds doc-operating-system-Darwin-10.6.0 10.6.0
+.ds doc-operating-system-Darwin-10.7.0 10.7.0
+.ds doc-operating-system-Darwin-11.0.0 11.0.0
.
.ds doc-operating-system-DragonFly-1.0 1.0
.ds doc-operating-system-DragonFly-1.1 1.1
diff --git a/contrib/groff/tmac/doc-syms b/contrib/groff/tmac/doc-syms
index d2a070d..0e862ad 100644
--- a/contrib/groff/tmac/doc-syms
+++ b/contrib/groff/tmac/doc-syms
@@ -617,6 +617,8 @@
.\" POSIX Part 1: System API
.ds doc-str-St--p1003.1 \*[doc-Tn-font-size]\%IEEE\*[doc-str-St] Std 1003.1
.as doc-str-St--p1003.1 " (\*[Lq]\)\*[Px]\*[doc-str-St].1\*[Rq])
+.ds doc-str-St--p1003.1b \*[doc-Tn-font-size]\%IEEE\*[doc-str-St] Std 1003.1b
+.as doc-str-St--p1003.1b " (\*[Lq]\)\*[Px]\*[doc-str-St].1\*[Rq])
.ds doc-str-St--p1003.1-88 \*[doc-Tn-font-size]\%IEEE\*[doc-str-St] Std 1003.1-1988
.as doc-str-St--p1003.1-88 " (\*[Lq]\)\*[Px]\*[doc-str-St].1\*[Rq])
.ds doc-str-St--p1003.1-90 \*[doc-Tn-font-size]ISO/IEC\*[doc-str-St] 9945-1:1990
@@ -754,6 +756,7 @@
.
.ds doc-str-Lb-libarm ARM Architecture Library (libarm, \-larm)
.ds doc-str-Lb-libarm32 ARM32 Architecture Library (libarm32, \-larm32)
+.ds doc-str-Lb-libbsm Basic Security Module Library (libbsm, \-lbsm)
.ds doc-str-Lb-libc Standard C\~Library (libc, \-lc)
.ds doc-str-Lb-libcdk Curses Development Kit Library (libcdk, \-lcdk)
.ds doc-str-Lb-libcompat Compatibility Library (libcompat, \-lcompat)
@@ -779,6 +782,7 @@
.ds doc-str-Lb-libpthread \*[Px] \*[doc-str-Lb]Threads Library (libpthread, \-lpthread)
.ds doc-str-Lb-libresolv DNS Resolver Library (libresolv, \-lresolv)
.ds doc-str-Lb-librt \*[Px] \*[doc-str-Lb]Real-time Library (librt, \-lrt)
+.ds doc-str-Lb-libSystem System Library (libSystem, \-lSystem)
.ds doc-str-Lb-libtermcap Termcap Access Library (libtermcap, \-ltermcap)
.ds doc-str-Lb-libusbhid USB Human Interface Devices Library (libusbhid, \-lusbhid)
.ds doc-str-Lb-libutil System Utilities Library (libutil, \-lutil)
diff --git a/contrib/groff/tmac/doc.tmac b/contrib/groff/tmac/doc.tmac
index 7bdbf32..c1c32dc 100644
--- a/contrib/groff/tmac/doc.tmac
+++ b/contrib/groff/tmac/doc.tmac
@@ -1197,8 +1197,14 @@
. if !\n[doc-arg-limit] \
. ds doc-macro-name Aq
.
-. ds doc-quote-left <
-. ds doc-quote-right >
+. ie "\*[doc-macro-name]"An" \{\
+. ds doc-quote-left <
+. ds doc-quote-right >
+. \}
+. el \{\
+. ds doc-quote-left \[la]
+. ds doc-quote-right \[ra]
+. \}
.
. doc-enclose-string \$@
..
@@ -1527,7 +1533,10 @@
. if !\n[doc-arg-limit] \
. ds doc-macro-name Ao
.
-. ds doc-quote-left \[la]
+. ie "\*[doc-macro-name]"An" \
+. ds doc-quote-left <
+. el \
+. ds doc-quote-left \[la]
.
. doc-enclose-open \$@
..
@@ -1546,7 +1555,10 @@
. if !\n[doc-arg-limit] \
. ds doc-macro-name Ac
.
-. ds doc-quote-right \[ra]
+. ie "\*[doc-macro-name]"An" \
+. ds doc-quote-right >
+. el \
+. ds doc-quote-right \[ra]
.
. doc-enclose-close \$@
..
diff --git a/etc/network.subr b/etc/network.subr
index 64fb0fe..ce71b78 100644
--- a/etc/network.subr
+++ b/etc/network.subr
@@ -100,25 +100,19 @@ ifconfig_up()
# inet6 specific
if afexists inet6; then
- if ipv6if $1; then
- if checkyesno ipv6_gateway_enable; then
- _ipv6_opts="-accept_rtadv"
- fi
- else
- if checkyesno ipv6_activate_all_interfaces; then
- _ipv6_opts="-ifdisabled"
- else
- _ipv6_opts="ifdisabled"
- fi
-
- # backward compatibility: $ipv6_enable
- case $ipv6_enable in
- [Yy][Ee][Ss]|[Tt][Rr][Uu][Ee]|[Oo][Nn]|1)
- _ipv6_opts="${_ipv6_opts} accept_rtadv"
- ;;
- esac
+ if checkyesno ipv6_activate_all_interfaces; then
+ _ipv6_opts="-ifdisabled"
+ elif [ "$1" != "lo0" ]; then
+ _ipv6_opts="ifdisabled"
fi
+ # backward compatibility: $ipv6_enable
+ case $ipv6_enable in
+ [Yy][Ee][Ss]|[Tt][Rr][Uu][Ee]|[Oo][Nn]|1)
+ _ipv6_opts="${_ipv6_opts} accept_rtadv"
+ ;;
+ esac
+
if [ -n "${_ipv6_opts}" ]; then
ifconfig $1 inet6 ${_ipv6_opts}
fi
diff --git a/gnu/usr.bin/gdb/kgdb/kthr.c b/gnu/usr.bin/gdb/kgdb/kthr.c
index 5036c9c..461f408 100644
--- a/gnu/usr.bin/gdb/kgdb/kthr.c
+++ b/gnu/usr.bin/gdb/kgdb/kthr.c
@@ -28,6 +28,7 @@
__FBSDID("$FreeBSD$");
#include <sys/param.h>
+#include <sys/cpuset.h>
#include <sys/proc.h>
#include <sys/types.h>
#include <sys/signal.h>
@@ -37,6 +38,7 @@ __FBSDID("$FreeBSD$");
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <unistd.h>
#include <defs.h>
#include <frame-unwind.h>
@@ -48,7 +50,7 @@ static CORE_ADDR dumppcb;
static int dumptid;
static CORE_ADDR stoppcbs;
-static __cpumask_t stopped_cpus;
+static cpuset_t stopped_cpus;
static struct kthr *first;
struct kthr *curkthr;
@@ -76,6 +78,7 @@ kgdb_thr_init(void)
{
struct proc p;
struct thread td;
+ long cpusetsize;
struct kthr *kt;
CORE_ADDR addr;
uintptr_t paddr;
@@ -102,10 +105,11 @@ kgdb_thr_init(void)
dumptid = -1;
addr = kgdb_lookup("stopped_cpus");
- if (addr != 0)
- kvm_read(kvm, addr, &stopped_cpus, sizeof(stopped_cpus));
- else
- stopped_cpus = 0;
+ CPU_ZERO(&stopped_cpus);
+ cpusetsize = sysconf(_SC_CPUSET_SIZE);
+ if (cpusetsize != -1 && (u_long)cpusetsize <= sizeof(cpuset_t) &&
+ addr != 0)
+ kvm_read(kvm, addr, &stopped_cpus, cpusetsize);
stoppcbs = kgdb_lookup("stoppcbs");
@@ -126,8 +130,8 @@ kgdb_thr_init(void)
kt->kaddr = addr;
if (td.td_tid == dumptid)
kt->pcb = dumppcb;
- else if (td.td_state == TDS_RUNNING && ((1 << td.td_oncpu) & stopped_cpus)
- && stoppcbs != 0)
+ else if (td.td_state == TDS_RUNNING && stoppcbs != 0 &&
+ CPU_ISSET(td.td_oncpu, &stopped_cpus))
kt->pcb = (uintptr_t) stoppcbs + sizeof(struct pcb) * td.td_oncpu;
else
kt->pcb = (uintptr_t)td.td_pcb;
diff --git a/gnu/usr.bin/groff/tmac/mdoc.local b/gnu/usr.bin/groff/tmac/mdoc.local
index d46f5db..befc87d 100644
--- a/gnu/usr.bin/groff/tmac/mdoc.local
+++ b/gnu/usr.bin/groff/tmac/mdoc.local
@@ -34,7 +34,6 @@
.\" FreeBSD .Lb values
.ds doc-str-Lb-libarchive Streaming Archive Library (libarchive, \-larchive)
.ds doc-str-Lb-libbluetooth Bluetooth User Library (libbluetooth, \-lbluetooth)
-.ds doc-str-Lb-libbsm Basic Security Module User Library (libbsm, \-lbsm)
.ds doc-str-Lb-libc_r Reentrant C\~Library (libc_r, \-lc_r)
.ds doc-str-Lb-libcalendar Calendar Arithmetic Library (libcalendar, \-lcalendar)
.ds doc-str-Lb-libcam Common Access Method User Library (libcam, \-lcam)
@@ -75,7 +74,7 @@
.
.\" FreeBSD releases not found in doc-common
.ds doc-operating-system-FreeBSD-7.4 7.4
-.ds doc-operating-system-FreeBSD-8.2 8.2
+.ds doc-operating-system-FreeBSD-8.3 8.3
.ds doc-operating-system-FreeBSD-9.0 9.0
.
.\" Definitions not (yet) in doc-syms
diff --git a/lib/libiconv/Makefile b/lib/libiconv/Makefile
index 078771e..71c2885 100644
--- a/lib/libiconv/Makefile
+++ b/lib/libiconv/Makefile
@@ -19,7 +19,6 @@ SRCS= citrus_bcs.c citrus_bcs_strtol.c citrus_bcs_strtoul.c \
citrus_module.c citrus_none.c citrus_pivot_factory.c \
citrus_prop.c citrus_stdenc.c iconv.c
-WARNS?= 6
CFLAGS+= --param max-inline-insns-single=128 -I ${.CURDIR}/../../include -I${.CURDIR}/../libc/include
.include <bsd.lib.mk>
diff --git a/lib/libkvm/kvm_pcpu.c b/lib/libkvm/kvm_pcpu.c
index fd09fc8..bc73baf 100644
--- a/lib/libkvm/kvm_pcpu.c
+++ b/lib/libkvm/kvm_pcpu.c
@@ -39,11 +39,13 @@
__FBSDID("$FreeBSD$");
#include <sys/param.h>
+#include <sys/cpuset.h>
#include <sys/pcpu.h>
#include <sys/sysctl.h>
#include <kvm.h>
#include <limits.h>
#include <stdlib.h>
+#include <unistd.h>
#include "kvm_private.h"
@@ -118,6 +120,9 @@ _kvm_pcpu_clear(void)
void *
kvm_getpcpu(kvm_t *kd, int cpu)
{
+ long kcpusetsize;
+ ssize_t nbytes;
+ uintptr_t readptr;
char *buf;
if (kd == NULL) {
@@ -125,6 +130,10 @@ kvm_getpcpu(kvm_t *kd, int cpu)
return (NULL);
}
+ kcpusetsize = sysconf(_SC_CPUSET_SIZE);
+ if (kcpusetsize == -1 || (u_long)kcpusetsize > sizeof(cpuset_t))
+ return ((void *)-1);
+
if (maxcpu == 0)
if (_kvm_pcpu_init(kd) < 0)
return ((void *)-1);
@@ -137,8 +146,26 @@ kvm_getpcpu(kvm_t *kd, int cpu)
_kvm_err(kd, kd->program, "out of memory");
return ((void *)-1);
}
- if (kvm_read(kd, (uintptr_t)pcpu_data[cpu], buf, sizeof(struct pcpu)) !=
- sizeof(struct pcpu)) {
+ nbytes = sizeof(struct pcpu) - 2 * kcpusetsize;
+ readptr = (uintptr_t)pcpu_data[cpu];
+ if (kvm_read(kd, readptr, buf, nbytes) != nbytes) {
+ _kvm_err(kd, kd->program, "unable to read per-CPU data");
+ free(buf);
+ return ((void *)-1);
+ }
+
+ /* Fetch the valid cpuset_t objects. */
+ CPU_ZERO((cpuset_t *)(buf + nbytes));
+ CPU_ZERO((cpuset_t *)(buf + nbytes + sizeof(cpuset_t)));
+ readptr += nbytes;
+ if (kvm_read(kd, readptr, buf + nbytes, kcpusetsize) != kcpusetsize) {
+ _kvm_err(kd, kd->program, "unable to read per-CPU data");
+ free(buf);
+ return ((void *)-1);
+ }
+ readptr += kcpusetsize;
+ if (kvm_read(kd, readptr, buf + nbytes + sizeof(cpuset_t),
+ kcpusetsize) != kcpusetsize) {
_kvm_err(kd, kd->program, "unable to read per-CPU data");
free(buf);
return ((void *)-1);
diff --git a/lib/libmemstat/memstat_uma.c b/lib/libmemstat/memstat_uma.c
index 4aae61a..485a4f2 100644
--- a/lib/libmemstat/memstat_uma.c
+++ b/lib/libmemstat/memstat_uma.c
@@ -27,6 +27,7 @@
*/
#include <sys/param.h>
+#include <sys/cpuset.h>
#include <sys/sysctl.h>
#define LIBMEMSTAT /* Cause vm_page.h not to include opt_vmpage.h */
@@ -44,6 +45,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <unistd.h>
#include "memstat.h"
#include "memstat_internal.h"
@@ -313,7 +315,8 @@ memstat_kvm_uma(struct memory_type_list *list, void *kvm_handle)
struct uma_keg *kzp, kz;
int hint_dontsearch, i, mp_maxid, ret;
char name[MEMTYPE_MAXNAME];
- __cpumask_t all_cpus;
+ cpuset_t all_cpus;
+ long cpusetsize;
kvm_t *kvm;
kvm = (kvm_t *)kvm_handle;
@@ -337,7 +340,13 @@ memstat_kvm_uma(struct memory_type_list *list, void *kvm_handle)
list->mtl_error = ret;
return (-1);
}
- ret = kread_symbol(kvm, X_ALL_CPUS, &all_cpus, sizeof(all_cpus), 0);
+ cpusetsize = sysconf(_SC_CPUSET_SIZE);
+ if (cpusetsize == -1 || (u_long)cpusetsize > sizeof(cpuset_t)) {
+ list->mtl_error = MEMSTAT_ERROR_KVM_NOSYMBOL;
+ return (-1);
+ }
+ CPU_ZERO(&all_cpus);
+ ret = kread_symbol(kvm, X_ALL_CPUS, &all_cpus, cpusetsize, 0);
if (ret != 0) {
list->mtl_error = ret;
return (-1);
@@ -407,7 +416,7 @@ memstat_kvm_uma(struct memory_type_list *list, void *kvm_handle)
if (kz.uk_flags & UMA_ZFLAG_INTERNAL)
goto skip_percpu;
for (i = 0; i < mp_maxid + 1; i++) {
- if ((all_cpus & (1 << i)) == 0)
+ if (!CPU_ISSET(i, &all_cpus))
continue;
ucp = &ucp_array[i];
mtp->mt_numallocs += ucp->uc_allocs;
diff --git a/libexec/comsat/comsat.c b/libexec/comsat/comsat.c
index d0ff7a4..2a0fd3c 100644
--- a/libexec/comsat/comsat.c
+++ b/libexec/comsat/comsat.c
@@ -180,7 +180,7 @@ notify(struct utmpx *utp, char file[], off_t offset, int folder)
dsyslog(LOG_DEBUG, "%s: wrong mode on %s", utp->ut_user, tty);
return;
}
- dsyslog(LOG_DEBUG, "notify %s on %s\n", utp->ut_user, tty);
+ dsyslog(LOG_DEBUG, "notify %s on %s", utp->ut_user, tty);
switch (fork()) {
case -1:
syslog(LOG_NOTICE, "fork failed (%m)");
diff --git a/release/ia64/mkisoimages.sh b/release/ia64/mkisoimages.sh
index 33ba192..8709c12 100644
--- a/release/ia64/mkisoimages.sh
+++ b/release/ia64/mkisoimages.sh
@@ -48,28 +48,32 @@ EFIPART=efipart.sys
if [ $bootable = yes ]; then
EFISZ=65536
MNT=/mnt
- dd if=/dev/zero of=$BASE/$EFIPART count=$EFISZ
- md=`mdconfig -a -t vnode -f $BASE/$EFIPART`
+ dd if=/dev/zero of=$EFIPART count=$EFISZ
+ md=`mdconfig -a -t vnode -f $EFIPART`
newfs_msdos -F 12 -S 512 -h 4 -o 0 -s $EFISZ -u 16 $md
mount -t msdosfs /dev/$md $MNT
mkdir -p $MNT/efi/boot $MNT/boot $MNT/boot/kernel
cp -R $BASE/boot/defaults $MNT/boot
cp $BASE/boot/kernel/kernel $MNT/boot/kernel
- cp $BASE/boot/kernel/ispfw.ko $MNT/boot/kernel
+ if [ -s $BASE/boot/kernel/ispfw.ko ]; then
+ cp $BASE/boot/kernel/ispfw.ko $MNT/boot/kernel
+ fi
cp $BASE/boot/device.hints $MNT/boot
cp $BASE/boot/loader.* $MNT/boot
- cp $BASE/boot/mfsroot.gz $MNT/boot
+ if [ -s $BASE/boot/mfsroot.gz ]; then
+ cp $BASE/boot/mfsroot.gz $MNT/boot
+ fi
cp $BASE/boot/support.4th $MNT/boot
mv $MNT/boot/loader.efi $MNT/efi/boot/bootia64.efi
umount $MNT
mdconfig -d -u $md
- BOOTOPTS="-b bootimage=i386;$EFIPART -o no-emul-boot"
+ BOOTOPTS="-o bootimage=i386;$EFIPART -o no-emul-boot"
else
BOOTOPTS=""
fi
-echo "/dev/iso9660/$LABEL / cd9660 ro 0 0" > $1/etc/fstab
+echo "/dev/iso9660/$LABEL / cd9660 ro 0 0" > $BASE/etc/fstab
makefs -t cd9660 $BOOTOPTS -o rockridge -o label=$LABEL $NAME $BASE $*
-rm -f $BASE/$EFIPART
+rm -f $EFIPART
rm $1/etc/fstab
exit 0
diff --git a/sbin/geom/class/part/geom_part.c b/sbin/geom/class/part/geom_part.c
index ae9f4b7..55a055e 100644
--- a/sbin/geom/class/part/geom_part.c
+++ b/sbin/geom/class/part/geom_part.c
@@ -341,9 +341,10 @@ gpart_autofill_resize(struct gctl_req *req)
errc(EXIT_FAILURE, error, "Invalid alignment param");
if (alignment == 0)
errx(EXIT_FAILURE, "Invalid alignment param");
+ } else {
lba = pp->lg_stripesize / pp->lg_sectorsize;
if (lba > 0)
- alignment = g_lcm(lba, alignment);
+ alignment = lba;
}
error = gctl_delete_param(req, "alignment");
if (error)
@@ -491,13 +492,9 @@ gpart_autofill(struct gctl_req *req)
if (has_size && has_start && !has_alignment)
goto done;
- /*
- * If stripesize is not zero, then recalculate alignment value.
- * Use LCM from stripesize and user specified alignment.
- */
len = pp->lg_stripesize / pp->lg_sectorsize;
- if (len > 0 )
- alignment = g_lcm(len, alignment);
+ if (len > 0 && !has_alignment)
+ alignment = len;
/* Adjust parameters to stripeoffset */
offset = pp->lg_stripeoffset / pp->lg_sectorsize;
diff --git a/sbin/geom/class/part/gpart.8 b/sbin/geom/class/part/gpart.8
index 940620c..4365d6b 100644
--- a/sbin/geom/class/part/gpart.8
+++ b/sbin/geom/class/part/gpart.8
@@ -24,7 +24,7 @@
.\"
.\" $FreeBSD$
.\"
-.Dd May 30, 2011
+.Dd June 6, 2011
.Dt GPART 8
.Os
.Sh NAME
@@ -530,16 +530,17 @@ about its use.
.El
.\"
.Sh PARTITION TYPES
+Partition types are identified on disk by particular strings or magic
+values.
The
.Nm
-utility uses symbolic names for common partition types to avoid that the
-user needs to know what the partitioning scheme in question is and what
-the actual number or identification needs to be used for a particular
-type.
+utility uses symbolic names for common partition types to avoid the
+user needing to know these values or other details of the partitioning
+scheme in question.
The
.Nm
utility also allows the user to specify scheme-specific partition types
-for partition types that do not have symbol names.
+for partition types that do not have symbolic names.
The symbolic names currently understood are:
.Bl -tag -width ".Cm freebsd-vinum"
.It Cm bios-boot
@@ -740,30 +741,30 @@ action or reverted with the
.Cm undo
action.
.Sh RECOVERING
-The GEOM class PART supports recovering of partition tables only for GPT.
+The GEOM PART class supports recovering of partition tables only for GPT.
The GUID partition table has a primary and secondary (backup) copy of
-metadata for redundance.
-They are stored in the begining and in the end of device respectively.
-Therefore it is acceptable to have some corruptions in the metadata that
-are not fatal to work with GPT.
-When kernel detects corrupt metadata it marks this table as corrupt and
-reports about corruption.
-Any changes in corrupt table are prohibited except
+metadata for redundance, these are stored at the begining and the end
+of the device respectively.
+As a result of having two copies, it is acceptable to have some corruption
+within the metadata that is not fatal to the working of GPT.
+When the kernel detects corrupt metadata it marks this table as corrupt and
+reports the corruption.
+Any operations on corrupt tables are prohibited except for
.Cm destroy
and
.Cm recover .
.Pp
-In case when only first sector is corrupt kernel can not detect GPT even
-if partition table is not corrupt.
-You can write protective MBR with
+If the first sector of a provider is corrupt, the kernel can not detect GPT
+even if partition table itself is not corrupt.
+You can rewrite the protective MBR using the
.Xr dd 1
-command to restore ability of GPT detection.
-The copy of protective MBR is usually located in the
+command, to restore the ability to detect the GPT.
+The copy of the protective MBR is usually located in the
.Pa /boot/pmbr
file.
.Pp
-In case when some of metadata is corrupt you will get to know about this
-from kernel's messages like these:
+If one GPT header appears to be corrupt but the other copy remains intact,
+the kernel will log the following:
.Bd -literal -offset indent
GEOM: provider: the primary GPT table is corrupt or invalid.
GEOM: provider: using the secondary instead -- recovery strongly advised.
@@ -777,32 +778,31 @@ GEOM: provider: using the primary only -- recovery suggested.
.Pp
Also
.Nm
-commands like
+commands such as
.Cm show , status
and
.Cm list
-will report about corrupt table.
+will report about corrupt tables.
.Pp
-In case when the size of device has changed (e.g.\& volume expansion) the
-secondary GPT header will become located not in the last sector.
+If the size of the device has changed (e.g.\& volume expansion) the
+secondary GPT header will no longer be located in the last sector.
This is not a metadata corruption, but it is dangerous because any
-corruption of the primary GPT will lead to lost of partition table.
-Kernel reports about this problem with message:
+corruption of the primary GPT will lead to loss of partition table.
+This problem is reported by the kernel with the message:
.Bd -literal -offset indent
GEOM: provider: the secondary GPT header is not in the last LBA.
.Ed
.Pp
-A corrupt table can be recovered with
+This situation can be recovered with the
.Cm recover
command.
-This command does reconstruction of corrupt metadata using
-known valid metadata.
-Also it can relocate secondary GPT to the end of device.
+This command reconstructs the corrupt metadata using known valid
+metadata and relocates the secondary GPT to the end of the device.
.Pp
.Em NOTE :
-The GEOM class PART can detect the same partition table on different GEOM
-providers and some of them will be marked as corrupt.
-Be careful when choosing a provider for recovering.
+The GEOM PART class can detect the same partition table visible through
+different GEOM providers, and some of them will be marked as corrupt.
+Be careful when choosing a provider for recovery.
If you choose incorrectly you can destroy the metadata of another GEOM class,
e.g.\& GEOM MIRROR or GEOM LABEL.
.Sh SYSCTL VARIABLES
@@ -815,11 +815,11 @@ The default value is shown next to each variable.
.Bl -tag -width indent
.It Va kern.geom.part.check_integrity : No 1
This variable controls the behaviour of metadata integrity checks.
-When integrity checks are enabled
+When integrity checks are enabled, the
.Nm PART
-GEOM class verifies all generic partition parameters that it gets from the
+GEOM class verifies all generic partition parameters obtained from the
disk metadata.
-If some inconsistency is detected, partition table will be
+If some inconsistency is detected, the partition table will be
rejected with a diagnostic message:
.Sy "GEOM_PART: Integrity check failed (provider, scheme)" .
.El
diff --git a/sbin/geom/class/sched/Makefile b/sbin/geom/class/sched/Makefile
index a6ccd58..6f54d3f 100644
--- a/sbin/geom/class/sched/Makefile
+++ b/sbin/geom/class/sched/Makefile
@@ -5,6 +5,4 @@
GEOM_CLASS= sched
-WARNS?= 6
-
.include <bsd.lib.mk>
diff --git a/sbin/ifconfig/af_inet6.c b/sbin/ifconfig/af_inet6.c
index 7fdca0d..585be05 100644
--- a/sbin/ifconfig/af_inet6.c
+++ b/sbin/ifconfig/af_inet6.c
@@ -499,8 +499,8 @@ static struct cmd inet6_cmds[] = {
DEF_CMD("-autoconf", -IN6_IFF_AUTOCONF, setip6flags),
DEF_CMD("accept_rtadv", ND6_IFF_ACCEPT_RTADV, setnd6flags),
DEF_CMD("-accept_rtadv",-ND6_IFF_ACCEPT_RTADV, setnd6flags),
- DEF_CMD("defroute_rtadv",ND6_IFF_DEFROUTE_RTADV,setnd6flags),
- DEF_CMD("-defroute_rtadv",-ND6_IFF_DEFROUTE_RTADV,setnd6flags),
+ DEF_CMD("no_radr", ND6_IFF_NO_RADR, setnd6flags),
+ DEF_CMD("-no_radr", -ND6_IFF_NO_RADR, setnd6flags),
DEF_CMD("defaultif", 1, setnd6defif),
DEF_CMD("-defaultif", -1, setnd6defif),
DEF_CMD("ifdisabled", ND6_IFF_IFDISABLED, setnd6flags),
diff --git a/sbin/ifconfig/af_nd6.c b/sbin/ifconfig/af_nd6.c
index 273e8ff..eed00ea 100644
--- a/sbin/ifconfig/af_nd6.c
+++ b/sbin/ifconfig/af_nd6.c
@@ -58,7 +58,7 @@ static const char rcsid[] =
#define MAX_SYSCTL_TRY 5
#define ND6BITS "\020\001PERFORMNUD\002ACCEPT_RTADV\003PREFER_SOURCE" \
"\004IFDISABLED\005DONT_SET_IFROUTE\006AUTO_LINKLOCAL" \
- "\007DEFROUTE_RTADV\020DEFAULTIF"
+ "\007NO_RADR\020DEFAULTIF"
static int isnd6defif(int);
void setnd6flags(const char *, int, int, const struct afswtch *);
@@ -159,7 +159,6 @@ nd6_status(int s)
}
isdefif = isnd6defif(s6);
close(s6);
-
if (nd.ndi.flags == 0 && !isdefif)
return;
printb("\tnd6 options",
diff --git a/sbin/ipfw/main.c b/sbin/ipfw/main.c
index fb3f3fb..109b62b 100644
--- a/sbin/ipfw/main.c
+++ b/sbin/ipfw/main.c
@@ -356,6 +356,7 @@ ipfw_main(int oldac, char **oldav)
*/
co.do_nat = 0;
co.do_pipe = 0;
+ co.use_set = 0;
if (!strncmp(*av, "nat", strlen(*av)))
co.do_nat = 1;
else if (!strncmp(*av, "pipe", strlen(*av)))
@@ -444,7 +445,7 @@ static void
ipfw_readfile(int ac, char *av[])
{
#define MAX_ARGS 32
- char buf[BUFSIZ];
+ char buf[4096];
char *progname = av[0]; /* original program name */
const char *cmd = NULL; /* preprocessor name, if any */
const char *filename = av[ac-1]; /* file to read */
@@ -552,7 +553,7 @@ ipfw_readfile(int ac, char *av[])
}
}
- while (fgets(buf, BUFSIZ, f)) { /* read commands */
+ while (fgets(buf, sizeof(buf), f)) { /* read commands */
char linename[20];
char *args[2];
diff --git a/sbin/mount/mount.8 b/sbin/mount/mount.8
index 2140b37..fdfd75c 100644
--- a/sbin/mount/mount.8
+++ b/sbin/mount/mount.8
@@ -28,7 +28,7 @@
.\" @(#)mount.8 8.8 (Berkeley) 6/16/94
.\" $FreeBSD$
.\"
-.Dd April 28, 2011
+.Dd June 6, 2011
.Dt MOUNT 8
.Os
.Sh NAME
@@ -348,7 +348,6 @@ option) may be passed as a comma separated list; these options are
distinguished by a leading
.Dq \&-
(dash).
-Options that take a value are specified using the syntax -option=value.
For example, the
.Nm
command:
@@ -363,6 +362,16 @@ to execute the equivalent of:
/sbin/mount_cd9660 -e /dev/cd0 /cdrom
.Ed
.Pp
+Options that take a value are specified using the -option=value syntax:
+.Bd -literal -offset indent
+mount -t msdosfs -o -u=fred,-g=wheel /dev/da0s1 /mnt
+.Ed
+.Pp
+is equivalent to
+.Bd -literal -offset indent
+/sbin/mount_msdosfs -u fred -g wheel /dev/da0s1 /mnt
+.Ed
+.Pp
Additional options specific to file system types
which are not internally known
(see the description of the
diff --git a/sbin/mount/mount.c b/sbin/mount/mount.c
index acded1c..2229419 100644
--- a/sbin/mount/mount.c
+++ b/sbin/mount/mount.c
@@ -243,7 +243,7 @@ main(int argc, char *argv[])
const char *mntfromname, **vfslist, *vfstype;
struct fstab *fs;
struct statfs *mntbuf;
- int all, ch, i, init_flags, late, mntsize, rval, have_fstab, ro;
+ int all, ch, i, init_flags, late, failok, mntsize, rval, have_fstab, ro;
char *cp, *ep, *options;
all = init_flags = late = 0;
@@ -328,6 +328,10 @@ main(int argc, char *argv[])
continue;
if (hasopt(fs->fs_mntops, "late") && !late)
continue;
+ if (hasopt(fs->fs_mntops, "failok"))
+ failok = 1;
+ else
+ failok = 0;
if (!(init_flags & MNT_UPDATE) &&
ismounted(fs, mntbuf, mntsize))
continue;
@@ -335,7 +339,7 @@ main(int argc, char *argv[])
mntbuf->f_flags);
if (mountfs(fs->fs_vfstype, fs->fs_spec,
fs->fs_file, init_flags, options,
- fs->fs_mntops))
+ fs->fs_mntops) && !failok)
rval = 1;
}
} else if (fstab_style) {
@@ -717,6 +721,14 @@ mangle(char *options, struct cpa *a)
* before mountd starts.
*/
continue;
+ } else if (strcmp(p, "failok") == 0) {
+ /*
+ * "failok" is used to prevent certain file
+ * systems from being causing the system to
+ * drop into single user mode in the boot
+ * cycle, and is not a real mount option.
+ */
+ continue;
} else if (strncmp(p, "mountprog", 9) == 0) {
/*
* "mountprog" is used to force the use of
diff --git a/sbin/rcorder/rcorder.8 b/sbin/rcorder/rcorder.8
index b17b0a3..a47f013 100644
--- a/sbin/rcorder/rcorder.8
+++ b/sbin/rcorder/rcorder.8
@@ -31,7 +31,7 @@
.\"
.\" $FreeBSD$
.\"
-.Dd June 9, 2008
+.Dd June 6, 2011
.Dt RCORDER 8
.Os
.Sh NAME
@@ -89,6 +89,12 @@ and
lines may appear, but all such lines must appear in a sequence without
any intervening lines, as once a line that does not follow the format
is reached, parsing stops.
+Note that for historical reasons,
+.Dq Li REQUIRES ,
+.Dq Li PROVIDES ,
+and
+.Dq Li KEYWORDS
+are also accepted in addition to the above.
.Pp
The options are as follows:
.Bl -tag -width indent
diff --git a/share/man/man4/amdsbwd.4 b/share/man/man4/amdsbwd.4
index 370cfa8..cfb2f79 100644
--- a/share/man/man4/amdsbwd.4
+++ b/share/man/man4/amdsbwd.4
@@ -25,12 +25,12 @@
.\"
.\" $FreeBSD$
.\"
-.Dd November 30, 2009
+.Dd June 7, 2011
.Dt AMDSBWD 4
.Os
.Sh NAME
.Nm amdsbwd
-.Nd device driver for the AMD SB600/SB700/SB710/SB750 watchdog timer
+.Nd device driver for the AMD SB600/SB7xx/SB8xx watchdog timers
.Sh SYNOPSIS
To compile this driver into the kernel,
place the following line in your
@@ -51,7 +51,7 @@ The
driver provides
.Xr watchdog 4
support for the watchdog timers present on
-AMD SB600 and SB7xx south bridge chips.
+AMD SB600, SB7xx and SB8xx southbridges.
.Sh SEE ALSO
.Xr watchdog 4 ,
.Xr watchdog 8 ,
@@ -61,12 +61,14 @@ AMD SB600 and SB7xx south bridge chips.
The
.Nm
driver first appeared in
-.Fx 9.0 .
+.Fx 7.3
+and
+.Fx 8.1 .
.Sh AUTHORS
.An -nosplit
The
.Nm
driver was written by
-.An Andiry Gapon Aq avg@FreeBSD.org .
+.An Andriy Gapon Aq avg@FreeBSD.org .
This manual page was written by
-.An Andiry Gapon Aq avg@FreeBSD.org .
+.An Andriy Gapon Aq avg@FreeBSD.org .
diff --git a/share/man/man4/atkbd.4 b/share/man/man4/atkbd.4
index 0c486e2..73831c2 100644
--- a/share/man/man4/atkbd.4
+++ b/share/man/man4/atkbd.4
@@ -26,7 +26,7 @@
.\"
.\" $FreeBSD$
.\"
-.Dd May 20, 2011
+.Dd January 29, 2008
.Dt ATKBD 4
.Os
.Sh NAME
@@ -176,11 +176,6 @@ When this option is given, the
.Nm
driver will not test the keyboard port during the probe routine.
Some machines hang during boot when this test is performed.
-.It bit 4 (PROBE_TYPEMATIC)
-When this option is given, the
-.Nm
-driver will try to probe the keyboard typematic rate on boot.
-Some machines hang during boot when this test is performed.
.El
.\".Sh FILES
.Sh EXAMPLES
diff --git a/share/man/man5/fstab.5 b/share/man/man5/fstab.5
index 1c6f17e..adbf489 100644
--- a/share/man/man5/fstab.5
+++ b/share/man/man5/fstab.5
@@ -32,7 +32,7 @@
.\" @(#)fstab.5 8.1 (Berkeley) 6/5/93
.\" $FreeBSD$
.\"
-.Dd November 23, 2008
+.Dd June 7, 2011
.Dt FSTAB 5
.Os
.Sh NAME
@@ -70,7 +70,8 @@ remote file system to be mounted.
The second field,
.Pq Fa fs_file ,
describes the mount point for the file system.
-For swap partitions, this field should be specified as ``none''.
+For swap partitions, this field should be specified as
+.Dq none .
.Pp
The third field,
.Pq Fa fs_vfstype ,
@@ -125,7 +126,11 @@ sync,noatime,-m=644,-M=755,-u=foo,-g=bar
in the option field of
.Nm .
.Pp
-If the options ``userquota'' and/or ``groupquota'' are specified,
+If the options
+.Dq userquota
+and/or
+.Dq groupquota
+are specified,
the file system is automatically processed by the
.Xr quotacheck 8
command, and user and/or group disk quotas are enabled with
@@ -147,7 +152,18 @@ this location can be specified as:
userquota=/var/quotas/tmp.user
.Ed
.Pp
-If the option ``noauto'' is specified, the file system will not be automatically
+If the option
+.Dq failok
+is specified,
+the system will ignore any error which happens during the mount of that filesystem,
+which would otherwise cause the system to drop into single user mode.
+This option is implemented by the
+.Xr mount 8
+command and will not be passed to the kernel.
+.Pp
+If the option
+.Dq noauto
+is specified, the file system will not be automatically
mounted at system startup.
Note that, for network file systems
of third party types
@@ -170,13 +186,19 @@ field (it is not deleted from the
field).
If
.Fa fs_type
-is ``rw'' or ``ro'' then the file system whose name is given in the
+is
+.Dq rw
+or
+.Dq ro
+then the file system whose name is given in the
.Fa fs_file
field is normally mounted read-write or read-only on the
specified special file.
If
.Fa fs_type
-is ``sw'' then the special file is made available as a piece of swap
+is
+.Dq sw
+then the special file is made available as a piece of swap
space by the
.Xr swapon 8
command at the end of the system reboot procedure.
@@ -187,7 +209,9 @@ and
are unused.
If
.Fa fs_type
-is specified as ``xx'' the entry is ignored.
+is specified as
+.Dq xx
+the entry is ignored.
This is useful to show disk partitions which are currently unused.
.Pp
The fifth field,
diff --git a/share/misc/committers-ports.dot b/share/misc/committers-ports.dot
index bfe9f4e..d97abcc 100644
--- a/share/misc/committers-ports.dot
+++ b/share/misc/committers-ports.dot
@@ -97,6 +97,7 @@ itetcu [label="Ion-Mihai Tetcu\nitetcu@FreeBSD.org\n2006/06/07"]
jacula [label="Giuseppe Pilichi\njacula@FreeBSD.org\n2010/04/05"]
jadawin [label="Philippe Audeoud\njadawin@FreeBSD.org\n2008/03/02"]
jkim [label="Jung-uk Kim\njkim@FreeBSD.org\n2007/09/12"]
+jlaffaye [label="Julien Laffaye\njlaffaye@FreeBSD.org\n2011/06/06"]
jmelo [label="Jean Milanez Melo\njmelo@FreeBSD.org\n2006/03/31"]
joerg [label="Joerg Wunsch\njoerg@FreeBSD.org\n1994/08/22"]
johans [label="Johan Selst\njohans@FreeBSD.org\n2006/04/01"]
@@ -204,6 +205,8 @@ arved -> stefan
asami -> obrien
+bapt -> jlaffaye
+
beat -> decke
beech -> glarkin
@@ -401,6 +404,7 @@ tabthorpe -> dhn
tabthorpe -> fluffy
tabthorpe -> jacula
tabthorpe -> jadawin
+tabthorpe -> jlaffaye
tabthorpe -> pgj
tabthorpe -> rene
diff --git a/sys/amd64/acpica/acpi_wakeup.c b/sys/amd64/acpica/acpi_wakeup.c
index 57341c9..29e66c5 100644
--- a/sys/amd64/acpica/acpi_wakeup.c
+++ b/sys/amd64/acpica/acpi_wakeup.c
@@ -78,7 +78,7 @@ static void acpi_stop_beep(void *);
#ifdef SMP
static int acpi_wakeup_ap(struct acpi_softc *, int);
-static void acpi_wakeup_cpus(struct acpi_softc *, cpumask_t);
+static void acpi_wakeup_cpus(struct acpi_softc *, const cpuset_t *);
#endif
#define WAKECODE_VADDR(sc) ((sc)->acpi_wakeaddr + (3 * PAGE_SIZE))
@@ -173,7 +173,7 @@ acpi_wakeup_ap(struct acpi_softc *sc, int cpu)
#define BIOS_WARM (0x0a)
static void
-acpi_wakeup_cpus(struct acpi_softc *sc, cpumask_t wakeup_cpus)
+acpi_wakeup_cpus(struct acpi_softc *sc, const cpuset_t *wakeup_cpus)
{
uint32_t mpbioswarmvec;
int cpu;
@@ -192,7 +192,7 @@ acpi_wakeup_cpus(struct acpi_softc *sc, cpumask_t wakeup_cpus)
/* Wake up each AP. */
for (cpu = 1; cpu < mp_ncpus; cpu++) {
- if ((wakeup_cpus & (1 << cpu)) == 0)
+ if (!CPU_ISSET(cpu, wakeup_cpus))
continue;
if (acpi_wakeup_ap(sc, cpu) == 0) {
/* restore the warmstart vector */
@@ -214,7 +214,7 @@ int
acpi_sleep_machdep(struct acpi_softc *sc, int state)
{
#ifdef SMP
- cpumask_t wakeup_cpus;
+ cpuset_t wakeup_cpus;
#endif
register_t cr3, rf;
ACPI_STATUS status;
@@ -244,10 +244,9 @@ acpi_sleep_machdep(struct acpi_softc *sc, int state)
if (savectx(susppcbs[0])) {
#ifdef SMP
- if (wakeup_cpus != 0 && suspend_cpus(wakeup_cpus) == 0) {
- device_printf(sc->acpi_dev,
- "Failed to suspend APs: CPU mask = 0x%jx\n",
- (uintmax_t)(wakeup_cpus & ~stopped_cpus));
+ if (!CPU_EMPTY(&wakeup_cpus) &&
+ suspend_cpus(wakeup_cpus) == 0) {
+ device_printf(sc->acpi_dev, "Failed to suspend APs\n");
goto out;
}
#endif
@@ -282,8 +281,8 @@ acpi_sleep_machdep(struct acpi_softc *sc, int state)
PCPU_SET(switchtime, 0);
PCPU_SET(switchticks, ticks);
#ifdef SMP
- if (wakeup_cpus != 0)
- acpi_wakeup_cpus(sc, wakeup_cpus);
+ if (!CPU_EMPTY(&wakeup_cpus))
+ acpi_wakeup_cpus(sc, &wakeup_cpus);
#endif
acpi_resync_clock(sc);
ret = 0;
@@ -291,7 +290,7 @@ acpi_sleep_machdep(struct acpi_softc *sc, int state)
out:
#ifdef SMP
- if (wakeup_cpus != 0)
+ if (!CPU_EMPTY(&wakeup_cpus))
restart_cpus(wakeup_cpus);
#endif
diff --git a/sys/amd64/amd64/intr_machdep.c b/sys/amd64/amd64/intr_machdep.c
index 4edef81..3a89531 100644
--- a/sys/amd64/amd64/intr_machdep.c
+++ b/sys/amd64/amd64/intr_machdep.c
@@ -443,8 +443,7 @@ DB_SHOW_COMMAND(irqs, db_show_irqs)
* allocate CPUs round-robin.
*/
-/* The BSP is always a valid target. */
-static cpumask_t intr_cpus = (1 << 0);
+static cpuset_t intr_cpus;
static int current_cpu;
/*
@@ -466,7 +465,7 @@ intr_next_cpu(void)
current_cpu++;
if (current_cpu > mp_maxid)
current_cpu = 0;
- } while (!(intr_cpus & (1 << current_cpu)));
+ } while (!CPU_ISSET(current_cpu, &intr_cpus));
mtx_unlock_spin(&icu_lock);
return (apic_id);
}
@@ -497,7 +496,7 @@ intr_add_cpu(u_int cpu)
printf("INTR: Adding local APIC %d as a target\n",
cpu_apic_ids[cpu]);
- intr_cpus |= (1 << cpu);
+ CPU_SET(cpu, &intr_cpus);
}
/*
@@ -510,6 +509,9 @@ intr_shuffle_irqs(void *arg __unused)
struct intsrc *isrc;
int i;
+ /* The BSP is always a valid target. */
+ CPU_SETOF(0, &intr_cpus);
+
/* Don't bother on UP. */
if (mp_ncpus == 1)
return;
diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c
index 94b4037..f90ad03 100644
--- a/sys/amd64/amd64/machdep.c
+++ b/sys/amd64/amd64/machdep.c
@@ -51,6 +51,7 @@ __FBSDID("$FreeBSD$");
#include "opt_isa.h"
#include "opt_kstack_pages.h"
#include "opt_maxmem.h"
+#include "opt_mp_watchdog.h"
#include "opt_perfmon.h"
#include "opt_sched.h"
#include "opt_kdtrace.h"
@@ -116,6 +117,7 @@ __FBSDID("$FreeBSD$");
#include <x86/mca.h>
#include <machine/md_var.h>
#include <machine/metadata.h>
+#include <machine/mp_watchdog.h>
#include <machine/pc/bios.h>
#include <machine/pcb.h>
#include <machine/proc.h>
@@ -734,9 +736,8 @@ cpu_idle(int busy)
CTR2(KTR_SPARE2, "cpu_idle(%d) at %d",
busy, curcpu);
-#ifdef SMP
- if (mp_grab_cpu_hlt())
- return;
+#ifdef MP_WATCHDOG
+ ap_watchdog(PCPU_GET(cpuid));
#endif
/* If we are busy - try to use fast methods. */
if (busy) {
diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c
index 5c90034..53988e9 100644
--- a/sys/amd64/amd64/mp_machdep.c
+++ b/sys/amd64/amd64/mp_machdep.c
@@ -29,13 +29,13 @@ __FBSDID("$FreeBSD$");
#include "opt_cpu.h"
#include "opt_kstack_pages.h"
-#include "opt_mp_watchdog.h"
#include "opt_sched.h"
#include "opt_smp.h"
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/bus.h>
+#include <sys/cpuset.h>
#ifdef GPROF
#include <sys/gmon.h>
#endif
@@ -63,7 +63,6 @@ __FBSDID("$FreeBSD$");
#include <machine/cpufunc.h>
#include <x86/mca.h>
#include <machine/md_var.h>
-#include <machine/mp_watchdog.h>
#include <machine/pcb.h>
#include <machine/psl.h>
#include <machine/smp.h>
@@ -125,7 +124,7 @@ extern inthand_t IDTVEC(fast_syscall), IDTVEC(fast_syscall32);
* Local data and functions.
*/
-static volatile cpumask_t ipi_nmi_pending;
+static volatile cpuset_t ipi_nmi_pending;
/* used to hold the AP's until we are ready to release them */
static struct mtx ap_boot_mtx;
@@ -159,11 +158,8 @@ static int start_all_aps(void);
static int start_ap(int apic_id);
static void release_aps(void *dummy);
-static int hlt_logical_cpus;
static u_int hyperthreading_cpus; /* logical cpus sharing L1 cache */
-static cpumask_t hyperthreading_cpus_mask;
static int hyperthreading_allowed = 1;
-static struct sysctl_ctx_list logical_cpu_clist;
static u_int bootMP_size;
static void
@@ -241,8 +237,11 @@ topo_probe_0x4(void)
* logical processors that belong to the same core
* as BSP thus deducing number of threads per core.
*/
- cpuid_count(0x04, 0, p);
- max_cores = ((p[0] >> 26) & 0x3f) + 1;
+ if (cpu_high >= 0x4) {
+ cpuid_count(0x04, 0, p);
+ max_cores = ((p[0] >> 26) & 0x3f) + 1;
+ } else
+ max_cores = 1;
core_id_bits = mask_width(max_logical/max_cores);
if (core_id_bits < 0)
return;
@@ -334,7 +333,7 @@ topo_probe(void)
if (cpu_topo_probed)
return;
- logical_cpus_mask = 0;
+ CPU_ZERO(&logical_cpus_mask);
if (mp_ncpus <= 1)
cpu_cores = cpu_logical = 1;
else if (cpu_vendor_id == CPU_VENDOR_AMD)
@@ -478,7 +477,7 @@ cpu_mp_probe(void)
* Always record BSP in CPU map so that the mbuf init code works
* correctly.
*/
- all_cpus = 1;
+ CPU_SETOF(0, &all_cpus);
if (mp_ncpus == 0) {
/*
* No CPUs were found, so this must be a UP system. Setup
@@ -605,6 +604,7 @@ cpu_mp_announce(void)
void
init_secondary(void)
{
+ cpuset_t tcpuset, tallcpus;
struct pcpu *pc;
struct nmi_pcpu *np;
u_int64_t msr, cr0;
@@ -736,19 +736,17 @@ init_secondary(void)
CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", PCPU_GET(cpuid));
printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid));
+ tcpuset = PCPU_GET(cpumask);
/* Determine if we are a logical CPU. */
/* XXX Calculation depends on cpu_logical being a power of 2, e.g. 2 */
if (cpu_logical > 1 && PCPU_GET(apic_id) % cpu_logical != 0)
- logical_cpus_mask |= PCPU_GET(cpumask);
-
- /* Determine if we are a hyperthread. */
- if (hyperthreading_cpus > 1 &&
- PCPU_GET(apic_id) % hyperthreading_cpus != 0)
- hyperthreading_cpus_mask |= PCPU_GET(cpumask);
+ CPU_OR(&logical_cpus_mask, &tcpuset);
/* Build our map of 'other' CPUs. */
- PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
+ tallcpus = all_cpus;
+ CPU_NAND(&tallcpus, &tcpuset);
+ PCPU_SET(other_cpus, tallcpus);
if (bootverbose)
lapic_dump("AP");
@@ -835,7 +833,7 @@ assign_cpu_ids(void)
if (hyperthreading_cpus > 1 && i % hyperthreading_cpus != 0) {
cpu_info[i].cpu_hyperthread = 1;
-#if defined(SCHED_ULE)
+
/*
* Don't use HT CPU if it has been disabled by a
* tunable.
@@ -844,7 +842,6 @@ assign_cpu_ids(void)
cpu_info[i].cpu_disabled = 1;
continue;
}
-#endif
}
/* Don't use this CPU if it has been disabled by a tunable. */
@@ -854,6 +851,11 @@ assign_cpu_ids(void)
}
}
+ if (hyperthreading_allowed == 0 && hyperthreading_cpus > 1) {
+ hyperthreading_cpus = 0;
+ cpu_logical = 1;
+ }
+
/*
* Assign CPU IDs to local APIC IDs and disable any CPUs
* beyond MAXCPU. CPU 0 is always assigned to the BSP.
@@ -891,6 +893,7 @@ assign_cpu_ids(void)
static int
start_all_aps(void)
{
+ cpuset_t tallcpus, tcpuset;
vm_offset_t va = boot_address + KERNBASE;
u_int64_t *pt4, *pt3, *pt2;
u_int32_t mpbioswarmvec;
@@ -955,11 +958,14 @@ start_all_aps(void)
panic("AP #%d (PHY# %d) failed!", cpu, apic_id);
}
- all_cpus |= (1 << cpu); /* record AP in CPU map */
+ CPU_SET(cpu, &all_cpus); /* record AP in CPU map */
}
/* build our map of 'other' CPUs */
- PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
+ tallcpus = all_cpus;
+ tcpuset = PCPU_GET(cpumask);
+ CPU_NAND(&tallcpus, &tcpuset);
+ PCPU_SET(other_cpus, tallcpus);
/* restore the warmstart vector */
*(u_int32_t *) WARMBOOT_OFF = mpbioswarmvec;
@@ -1088,6 +1094,30 @@ SYSCTL_UINT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW,
#endif /* COUNT_XINVLTLB_HITS */
/*
+ * Send an IPI to specified CPU handling the bitmap logic.
+ */
+static void
+ipi_send_cpu(int cpu, u_int ipi)
+{
+ u_int bitmap, old_pending, new_pending;
+
+ KASSERT(cpu_apic_ids[cpu] != -1, ("IPI to non-existent CPU %d", cpu));
+
+ if (IPI_IS_BITMAPED(ipi)) {
+ bitmap = 1 << ipi;
+ ipi = IPI_BITMAP_VECTOR;
+ do {
+ old_pending = cpu_ipi_pending[cpu];
+ new_pending = old_pending | bitmap;
+ } while (!atomic_cmpset_int(&cpu_ipi_pending[cpu],
+ old_pending, new_pending));
+ if (old_pending)
+ return;
+ }
+ lapic_ipi_vectored(ipi, cpu_apic_ids[cpu]);
+}
+
+/*
* Flush the TLB on all other CPU's
*/
static void
@@ -1111,28 +1141,19 @@ smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2)
}
static void
-smp_targeted_tlb_shootdown(cpumask_t mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2)
+smp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2)
{
- int ncpu, othercpus;
+ int cpu, ncpu, othercpus;
othercpus = mp_ncpus - 1;
- if (mask == (cpumask_t)-1) {
- ncpu = othercpus;
- if (ncpu < 1)
+ if (CPU_ISFULLSET(&mask)) {
+ if (othercpus < 1)
return;
} else {
- mask &= ~PCPU_GET(cpumask);
- if (mask == 0)
- return;
- ncpu = bitcount32(mask);
- if (ncpu > othercpus) {
- /* XXX this should be a panic offence */
- printf("SMP: tlb shootdown to %d other cpus (only have %d)\n",
- ncpu, othercpus);
- ncpu = othercpus;
- }
- /* XXX should be a panic, implied by mask == 0 above */
- if (ncpu < 1)
+ sched_pin();
+ CPU_NAND(&mask, PCPU_PTR(cpumask));
+ sched_unpin();
+ if (CPU_EMPTY(&mask))
return;
}
if (!(read_rflags() & PSL_I))
@@ -1141,39 +1162,25 @@ smp_targeted_tlb_shootdown(cpumask_t mask, u_int vector, vm_offset_t addr1, vm_o
smp_tlb_addr1 = addr1;
smp_tlb_addr2 = addr2;
atomic_store_rel_int(&smp_tlb_wait, 0);
- if (mask == (cpumask_t)-1)
+ if (CPU_ISFULLSET(&mask)) {
+ ncpu = othercpus;
ipi_all_but_self(vector);
- else
- ipi_selected(mask, vector);
+ } else {
+ ncpu = 0;
+ while ((cpu = cpusetobj_ffs(&mask)) != 0) {
+ cpu--;
+ CPU_CLR(cpu, &mask);
+ CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__,
+ cpu, vector);
+ ipi_send_cpu(cpu, vector);
+ ncpu++;
+ }
+ }
while (smp_tlb_wait < ncpu)
ia32_pause();
mtx_unlock_spin(&smp_ipi_mtx);
}
-/*
- * Send an IPI to specified CPU handling the bitmap logic.
- */
-static void
-ipi_send_cpu(int cpu, u_int ipi)
-{
- u_int bitmap, old_pending, new_pending;
-
- KASSERT(cpu_apic_ids[cpu] != -1, ("IPI to non-existent CPU %d", cpu));
-
- if (IPI_IS_BITMAPED(ipi)) {
- bitmap = 1 << ipi;
- ipi = IPI_BITMAP_VECTOR;
- do {
- old_pending = cpu_ipi_pending[cpu];
- new_pending = old_pending | bitmap;
- } while (!atomic_cmpset_int(&cpu_ipi_pending[cpu],
- old_pending, new_pending));
- if (old_pending)
- return;
- }
- lapic_ipi_vectored(ipi, cpu_apic_ids[cpu]);
-}
-
void
smp_cache_flush(void)
{
@@ -1220,7 +1227,7 @@ smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2)
}
void
-smp_masked_invltlb(cpumask_t mask)
+smp_masked_invltlb(cpuset_t mask)
{
if (smp_started) {
@@ -1232,7 +1239,7 @@ smp_masked_invltlb(cpumask_t mask)
}
void
-smp_masked_invlpg(cpumask_t mask, vm_offset_t addr)
+smp_masked_invlpg(cpuset_t mask, vm_offset_t addr)
{
if (smp_started) {
@@ -1244,7 +1251,7 @@ smp_masked_invlpg(cpumask_t mask, vm_offset_t addr)
}
void
-smp_masked_invlpg_range(cpumask_t mask, vm_offset_t addr1, vm_offset_t addr2)
+smp_masked_invlpg_range(cpuset_t mask, vm_offset_t addr1, vm_offset_t addr2)
{
if (smp_started) {
@@ -1297,7 +1304,7 @@ ipi_bitmap_handler(struct trapframe frame)
* send an IPI to a set of cpus.
*/
void
-ipi_selected(cpumask_t cpus, u_int ipi)
+ipi_selected(cpuset_t cpus, u_int ipi)
{
int cpu;
@@ -1307,12 +1314,12 @@ ipi_selected(cpumask_t cpus, u_int ipi)
* Set the mask of receiving CPUs for this purpose.
*/
if (ipi == IPI_STOP_HARD)
- atomic_set_int(&ipi_nmi_pending, cpus);
+ CPU_OR_ATOMIC(&ipi_nmi_pending, &cpus);
- CTR3(KTR_SMP, "%s: cpus: %x ipi: %x", __func__, cpus, ipi);
- while ((cpu = ffs(cpus)) != 0) {
+ while ((cpu = cpusetobj_ffs(&cpus)) != 0) {
cpu--;
- cpus &= ~(1 << cpu);
+ CPU_CLR(cpu, &cpus);
+ CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi);
ipi_send_cpu(cpu, ipi);
}
}
@@ -1330,7 +1337,7 @@ ipi_cpu(int cpu, u_int ipi)
* Set the mask of receiving CPUs for this purpose.
*/
if (ipi == IPI_STOP_HARD)
- atomic_set_int(&ipi_nmi_pending, 1 << cpu);
+ CPU_SET_ATOMIC(cpu, &ipi_nmi_pending);
CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi);
ipi_send_cpu(cpu, ipi);
@@ -1343,8 +1350,10 @@ void
ipi_all_but_self(u_int ipi)
{
+ sched_pin();
if (IPI_IS_BITMAPED(ipi)) {
ipi_selected(PCPU_GET(other_cpus), ipi);
+ sched_unpin();
return;
}
@@ -1354,7 +1363,8 @@ ipi_all_but_self(u_int ipi)
* Set the mask of receiving CPUs for this purpose.
*/
if (ipi == IPI_STOP_HARD)
- atomic_set_int(&ipi_nmi_pending, PCPU_GET(other_cpus));
+ CPU_OR_ATOMIC(&ipi_nmi_pending, PCPU_PTR(other_cpus));
+ sched_unpin();
CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
lapic_ipi_vectored(ipi, APIC_IPI_DEST_OTHERS);
@@ -1363,7 +1373,7 @@ ipi_all_but_self(u_int ipi)
int
ipi_nmi_handler()
{
- cpumask_t cpumask;
+ cpuset_t cpumask;
/*
* As long as there is not a simple way to know about a NMI's
@@ -1371,11 +1381,13 @@ ipi_nmi_handler()
* the global pending bitword an IPI_STOP_HARD has been issued
* and should be handled.
*/
+ sched_pin();
cpumask = PCPU_GET(cpumask);
- if ((ipi_nmi_pending & cpumask) == 0)
+ sched_unpin();
+ if (!CPU_OVERLAP(&ipi_nmi_pending, &cpumask))
return (1);
- atomic_clear_int(&ipi_nmi_pending, cpumask);
+ CPU_NAND_ATOMIC(&ipi_nmi_pending, &cpumask);
cpustop_handler();
return (0);
}
@@ -1387,23 +1399,25 @@ ipi_nmi_handler()
void
cpustop_handler(void)
{
- cpumask_t cpumask;
+ cpuset_t cpumask;
u_int cpu;
+ sched_pin();
cpu = PCPU_GET(cpuid);
cpumask = PCPU_GET(cpumask);
+ sched_unpin();
savectx(&stoppcbs[cpu]);
/* Indicate that we are stopped */
- atomic_set_int(&stopped_cpus, cpumask);
+ CPU_OR_ATOMIC(&stopped_cpus, &cpumask);
/* Wait for restart */
- while (!(started_cpus & cpumask))
+ while (!CPU_OVERLAP(&started_cpus, &cpumask))
ia32_pause();
- atomic_clear_int(&started_cpus, cpumask);
- atomic_clear_int(&stopped_cpus, cpumask);
+ CPU_NAND_ATOMIC(&started_cpus, &cpumask);
+ CPU_NAND_ATOMIC(&stopped_cpus, &cpumask);
if (cpu == 0 && cpustop_restartfunc != NULL) {
cpustop_restartfunc();
@@ -1418,7 +1432,7 @@ cpustop_handler(void)
void
cpususpend_handler(void)
{
- cpumask_t cpumask;
+ cpuset_t cpumask;
register_t cr3, rf;
u_int cpu;
@@ -1430,7 +1444,7 @@ cpususpend_handler(void)
if (savectx(susppcbs[cpu])) {
wbinvd();
- atomic_set_int(&stopped_cpus, cpumask);
+ CPU_OR_ATOMIC(&stopped_cpus, &cpumask);
} else {
pmap_init_pat();
PCPU_SET(switchtime, 0);
@@ -1438,11 +1452,11 @@ cpususpend_handler(void)
}
/* Wait for resume */
- while (!(started_cpus & cpumask))
+ while (!CPU_OVERLAP(&started_cpus, &cpumask))
ia32_pause();
- atomic_clear_int(&started_cpus, cpumask);
- atomic_clear_int(&stopped_cpus, cpumask);
+ CPU_NAND_ATOMIC(&started_cpus, &cpumask);
+ CPU_NAND_ATOMIC(&stopped_cpus, &cpumask);
/* Restore CR3 and enable interrupts */
load_cr3(cr3);
@@ -1467,158 +1481,6 @@ release_aps(void *dummy __unused)
}
SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
-static int
-sysctl_hlt_cpus(SYSCTL_HANDLER_ARGS)
-{
- cpumask_t mask;
- int error;
-
- mask = hlt_cpus_mask;
- error = sysctl_handle_int(oidp, &mask, 0, req);
- if (error || !req->newptr)
- return (error);
-
- if (logical_cpus_mask != 0 &&
- (mask & logical_cpus_mask) == logical_cpus_mask)
- hlt_logical_cpus = 1;
- else
- hlt_logical_cpus = 0;
-
- if (! hyperthreading_allowed)
- mask |= hyperthreading_cpus_mask;
-
- if ((mask & all_cpus) == all_cpus)
- mask &= ~(1<<0);
- hlt_cpus_mask = mask;
- return (error);
-}
-SYSCTL_PROC(_machdep, OID_AUTO, hlt_cpus, CTLTYPE_INT|CTLFLAG_RW,
- 0, 0, sysctl_hlt_cpus, "IU",
- "Bitmap of CPUs to halt. 101 (binary) will halt CPUs 0 and 2.");
-
-static int
-sysctl_hlt_logical_cpus(SYSCTL_HANDLER_ARGS)
-{
- int disable, error;
-
- disable = hlt_logical_cpus;
- error = sysctl_handle_int(oidp, &disable, 0, req);
- if (error || !req->newptr)
- return (error);
-
- if (disable)
- hlt_cpus_mask |= logical_cpus_mask;
- else
- hlt_cpus_mask &= ~logical_cpus_mask;
-
- if (! hyperthreading_allowed)
- hlt_cpus_mask |= hyperthreading_cpus_mask;
-
- if ((hlt_cpus_mask & all_cpus) == all_cpus)
- hlt_cpus_mask &= ~(1<<0);
-
- hlt_logical_cpus = disable;
- return (error);
-}
-
-static int
-sysctl_hyperthreading_allowed(SYSCTL_HANDLER_ARGS)
-{
- int allowed, error;
-
- allowed = hyperthreading_allowed;
- error = sysctl_handle_int(oidp, &allowed, 0, req);
- if (error || !req->newptr)
- return (error);
-
-#ifdef SCHED_ULE
- /*
- * SCHED_ULE doesn't allow enabling/disabling HT cores at
- * run-time.
- */
- if (allowed != hyperthreading_allowed)
- return (ENOTSUP);
- return (error);
-#endif
-
- if (allowed)
- hlt_cpus_mask &= ~hyperthreading_cpus_mask;
- else
- hlt_cpus_mask |= hyperthreading_cpus_mask;
-
- if (logical_cpus_mask != 0 &&
- (hlt_cpus_mask & logical_cpus_mask) == logical_cpus_mask)
- hlt_logical_cpus = 1;
- else
- hlt_logical_cpus = 0;
-
- if ((hlt_cpus_mask & all_cpus) == all_cpus)
- hlt_cpus_mask &= ~(1<<0);
-
- hyperthreading_allowed = allowed;
- return (error);
-}
-
-static void
-cpu_hlt_setup(void *dummy __unused)
-{
-
- if (logical_cpus_mask != 0) {
- TUNABLE_INT_FETCH("machdep.hlt_logical_cpus",
- &hlt_logical_cpus);
- sysctl_ctx_init(&logical_cpu_clist);
- SYSCTL_ADD_PROC(&logical_cpu_clist,
- SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO,
- "hlt_logical_cpus", CTLTYPE_INT|CTLFLAG_RW, 0, 0,
- sysctl_hlt_logical_cpus, "IU", "");
- SYSCTL_ADD_UINT(&logical_cpu_clist,
- SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO,
- "logical_cpus_mask", CTLTYPE_INT|CTLFLAG_RD,
- &logical_cpus_mask, 0, "");
-
- if (hlt_logical_cpus)
- hlt_cpus_mask |= logical_cpus_mask;
-
- /*
- * If necessary for security purposes, force
- * hyperthreading off, regardless of the value
- * of hlt_logical_cpus.
- */
- if (hyperthreading_cpus_mask) {
- SYSCTL_ADD_PROC(&logical_cpu_clist,
- SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO,
- "hyperthreading_allowed", CTLTYPE_INT|CTLFLAG_RW,
- 0, 0, sysctl_hyperthreading_allowed, "IU", "");
- if (! hyperthreading_allowed)
- hlt_cpus_mask |= hyperthreading_cpus_mask;
- }
- }
-}
-SYSINIT(cpu_hlt, SI_SUB_SMP, SI_ORDER_ANY, cpu_hlt_setup, NULL);
-
-int
-mp_grab_cpu_hlt(void)
-{
- cpumask_t mask;
-#ifdef MP_WATCHDOG
- u_int cpuid;
-#endif
- int retval;
-
- mask = PCPU_GET(cpumask);
-#ifdef MP_WATCHDOG
- cpuid = PCPU_GET(cpuid);
- ap_watchdog(cpuid);
-#endif
-
- retval = 0;
- while (mask & hlt_cpus_mask) {
- retval = 1;
- __asm __volatile("sti; hlt" : : : "memory");
- }
- return (retval);
-}
-
#ifdef COUNT_IPIS
/*
* Setup interrupt counters for IPI handlers.
diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index c9ff9bc..025ca5f 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -123,6 +123,8 @@ __FBSDID("$FreeBSD$");
#include <sys/sysctl.h>
#ifdef SMP
#include <sys/smp.h>
+#else
+#include <sys/cpuset.h>
#endif
#include <vm/vm.h>
@@ -581,7 +583,7 @@ pmap_bootstrap(vm_paddr_t *firstaddr)
PMAP_LOCK_INIT(kernel_pmap);
kernel_pmap->pm_pml4 = (pdp_entry_t *)PHYS_TO_DMAP(KPML4phys);
kernel_pmap->pm_root = NULL;
- kernel_pmap->pm_active = -1; /* don't allow deactivation */
+ CPU_FILL(&kernel_pmap->pm_active); /* don't allow deactivation */
TAILQ_INIT(&kernel_pmap->pm_pvchunk);
/*
@@ -923,19 +925,20 @@ pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde)
void
pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
{
- cpumask_t cpumask, other_cpus;
+ cpuset_t cpumask, other_cpus;
sched_pin();
- if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
+ if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) {
invlpg(va);
smp_invlpg(va);
} else {
cpumask = PCPU_GET(cpumask);
other_cpus = PCPU_GET(other_cpus);
- if (pmap->pm_active & cpumask)
+ if (CPU_OVERLAP(&pmap->pm_active, &cpumask))
invlpg(va);
- if (pmap->pm_active & other_cpus)
- smp_masked_invlpg(pmap->pm_active & other_cpus, va);
+ CPU_AND(&other_cpus, &pmap->pm_active);
+ if (!CPU_EMPTY(&other_cpus))
+ smp_masked_invlpg(other_cpus, va);
}
sched_unpin();
}
@@ -943,23 +946,23 @@ pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
void
pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
{
- cpumask_t cpumask, other_cpus;
+ cpuset_t cpumask, other_cpus;
vm_offset_t addr;
sched_pin();
- if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
+ if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) {
for (addr = sva; addr < eva; addr += PAGE_SIZE)
invlpg(addr);
smp_invlpg_range(sva, eva);
} else {
cpumask = PCPU_GET(cpumask);
other_cpus = PCPU_GET(other_cpus);
- if (pmap->pm_active & cpumask)
+ if (CPU_OVERLAP(&pmap->pm_active, &cpumask))
for (addr = sva; addr < eva; addr += PAGE_SIZE)
invlpg(addr);
- if (pmap->pm_active & other_cpus)
- smp_masked_invlpg_range(pmap->pm_active & other_cpus,
- sva, eva);
+ CPU_AND(&other_cpus, &pmap->pm_active);
+ if (!CPU_EMPTY(&other_cpus))
+ smp_masked_invlpg_range(other_cpus, sva, eva);
}
sched_unpin();
}
@@ -967,19 +970,20 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
void
pmap_invalidate_all(pmap_t pmap)
{
- cpumask_t cpumask, other_cpus;
+ cpuset_t cpumask, other_cpus;
sched_pin();
- if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
+ if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) {
invltlb();
smp_invltlb();
} else {
cpumask = PCPU_GET(cpumask);
other_cpus = PCPU_GET(other_cpus);
- if (pmap->pm_active & cpumask)
+ if (CPU_OVERLAP(&pmap->pm_active, &cpumask))
invltlb();
- if (pmap->pm_active & other_cpus)
- smp_masked_invltlb(pmap->pm_active & other_cpus);
+ CPU_AND(&other_cpus, &pmap->pm_active);
+ if (!CPU_EMPTY(&other_cpus))
+ smp_masked_invltlb(other_cpus);
}
sched_unpin();
}
@@ -995,8 +999,8 @@ pmap_invalidate_cache(void)
}
struct pde_action {
- cpumask_t store; /* processor that updates the PDE */
- cpumask_t invalidate; /* processors that invalidate their TLB */
+ cpuset_t store; /* processor that updates the PDE */
+ cpuset_t invalidate; /* processors that invalidate their TLB */
vm_offset_t va;
pd_entry_t *pde;
pd_entry_t newpde;
@@ -1007,8 +1011,12 @@ pmap_update_pde_action(void *arg)
{
struct pde_action *act = arg;
- if (act->store == PCPU_GET(cpumask))
+ sched_pin();
+ if (!CPU_CMP(&act->store, PCPU_PTR(cpumask))) {
+ sched_unpin();
pde_store(act->pde, act->newpde);
+ } else
+ sched_unpin();
}
static void
@@ -1016,8 +1024,12 @@ pmap_update_pde_teardown(void *arg)
{
struct pde_action *act = arg;
- if ((act->invalidate & PCPU_GET(cpumask)) != 0)
+ sched_pin();
+ if (CPU_OVERLAP(&act->invalidate, PCPU_PTR(cpumask))) {
+ sched_unpin();
pmap_update_pde_invalidate(act->va, act->newpde);
+ } else
+ sched_unpin();
}
/*
@@ -1032,26 +1044,28 @@ static void
pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde)
{
struct pde_action act;
- cpumask_t active, cpumask;
+ cpuset_t active, cpumask, other_cpus;
sched_pin();
cpumask = PCPU_GET(cpumask);
+ other_cpus = PCPU_GET(other_cpus);
if (pmap == kernel_pmap)
active = all_cpus;
else
active = pmap->pm_active;
- if ((active & PCPU_GET(other_cpus)) != 0) {
+ if (CPU_OVERLAP(&active, &other_cpus)) {
act.store = cpumask;
act.invalidate = active;
act.va = va;
act.pde = pde;
act.newpde = newpde;
- smp_rendezvous_cpus(cpumask | active,
+ CPU_OR(&cpumask, &active);
+ smp_rendezvous_cpus(cpumask,
smp_no_rendevous_barrier, pmap_update_pde_action,
pmap_update_pde_teardown, &act);
} else {
pde_store(pde, newpde);
- if ((active & cpumask) != 0)
+ if (CPU_OVERLAP(&active, &cpumask))
pmap_update_pde_invalidate(va, newpde);
}
sched_unpin();
@@ -1065,7 +1079,7 @@ PMAP_INLINE void
pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
{
- if (pmap == kernel_pmap || pmap->pm_active)
+ if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
invlpg(va);
}
@@ -1074,7 +1088,7 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
{
vm_offset_t addr;
- if (pmap == kernel_pmap || pmap->pm_active)
+ if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
for (addr = sva; addr < eva; addr += PAGE_SIZE)
invlpg(addr);
}
@@ -1083,7 +1097,7 @@ PMAP_INLINE void
pmap_invalidate_all(pmap_t pmap)
{
- if (pmap == kernel_pmap || pmap->pm_active)
+ if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
invltlb();
}
@@ -1099,7 +1113,7 @@ pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde)
{
pde_store(pde, newpde);
- if (pmap == kernel_pmap || pmap->pm_active)
+ if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
pmap_update_pde_invalidate(va, newpde);
}
#endif /* !SMP */
@@ -1607,7 +1621,7 @@ pmap_pinit0(pmap_t pmap)
PMAP_LOCK_INIT(pmap);
pmap->pm_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(KPML4phys);
pmap->pm_root = NULL;
- pmap->pm_active = 0;
+ CPU_ZERO(&pmap->pm_active);
PCPU_SET(curpmap, pmap);
TAILQ_INIT(&pmap->pm_pvchunk);
bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
@@ -1649,7 +1663,7 @@ pmap_pinit(pmap_t pmap)
pmap->pm_pml4[PML4PML4I] = VM_PAGE_TO_PHYS(pml4pg) | PG_V | PG_RW | PG_A | PG_M;
pmap->pm_root = NULL;
- pmap->pm_active = 0;
+ CPU_ZERO(&pmap->pm_active);
TAILQ_INIT(&pmap->pm_pvchunk);
bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
@@ -5087,11 +5101,11 @@ pmap_activate(struct thread *td)
pmap = vmspace_pmap(td->td_proc->p_vmspace);
oldpmap = PCPU_GET(curpmap);
#ifdef SMP
- atomic_clear_int(&oldpmap->pm_active, PCPU_GET(cpumask));
- atomic_set_int(&pmap->pm_active, PCPU_GET(cpumask));
+ CPU_NAND_ATOMIC(&oldpmap->pm_active, PCPU_PTR(cpumask));
+ CPU_OR_ATOMIC(&pmap->pm_active, PCPU_PTR(cpumask));
#else
- oldpmap->pm_active &= ~PCPU_GET(cpumask);
- pmap->pm_active |= PCPU_GET(cpumask);
+ CPU_NAND(&oldpmap->pm_active, PCPU_PTR(cpumask));
+ CPU_OR(&pmap->pm_active, PCPU_PTR(cpumask));
#endif
cr3 = DMAP_TO_PHYS((vm_offset_t)pmap->pm_pml4);
td->td_pcb->pcb_cr3 = cr3;
diff --git a/sys/amd64/amd64/vm_machdep.c b/sys/amd64/amd64/vm_machdep.c
index 972484a..13f5cd0 100644
--- a/sys/amd64/amd64/vm_machdep.c
+++ b/sys/amd64/amd64/vm_machdep.c
@@ -59,6 +59,7 @@ __FBSDID("$FreeBSD$");
#include <sys/mutex.h>
#include <sys/pioctl.h>
#include <sys/proc.h>
+#include <sys/sched.h>
#include <sys/sf_buf.h>
#include <sys/smp.h>
#include <sys/sysctl.h>
@@ -70,6 +71,7 @@ __FBSDID("$FreeBSD$");
#include <machine/cpu.h>
#include <machine/md_var.h>
#include <machine/pcb.h>
+#include <machine/smp.h>
#include <machine/specialreg.h>
#include <machine/tss.h>
@@ -512,11 +514,13 @@ cpu_set_user_tls(struct thread *td, void *tls_base)
static void
cpu_reset_proxy()
{
+ cpuset_t tcrp;
cpu_reset_proxy_active = 1;
while (cpu_reset_proxy_active == 1)
; /* Wait for other cpu to see that we've started */
- stop_cpus((1<<cpu_reset_proxyid));
+ CPU_SETOF(cpu_reset_proxyid, &tcrp);
+ stop_cpus(tcrp);
printf("cpu_reset_proxy: Stopped CPU %d\n", cpu_reset_proxyid);
DELAY(1000000);
cpu_reset_real();
@@ -527,24 +531,28 @@ void
cpu_reset()
{
#ifdef SMP
- cpumask_t map;
+ cpuset_t map;
u_int cnt;
if (smp_active) {
- map = PCPU_GET(other_cpus) & ~stopped_cpus;
- if (map != 0) {
+ sched_pin();
+ map = PCPU_GET(other_cpus);
+ CPU_NAND(&map, &stopped_cpus);
+ if (!CPU_EMPTY(&map)) {
printf("cpu_reset: Stopping other CPUs\n");
stop_cpus(map);
}
if (PCPU_GET(cpuid) != 0) {
cpu_reset_proxyid = PCPU_GET(cpuid);
+ sched_unpin();
cpustop_restartfunc = cpu_reset_proxy;
cpu_reset_proxy_active = 0;
printf("cpu_reset: Restarting BSP\n");
/* Restart CPU #0. */
- atomic_store_rel_int(&started_cpus, 1 << 0);
+ CPU_SETOF(0, &started_cpus);
+ wmb();
cnt = 0;
while (cpu_reset_proxy_active == 0 && cnt < 10000000)
@@ -556,7 +564,8 @@ cpu_reset()
while (1);
/* NOTREACHED */
- }
+ } else
+ sched_unpin();
DELAY(1000000);
}
diff --git a/sys/amd64/include/_types.h b/sys/amd64/include/_types.h
index 89d2e86..13dc3ea 100644
--- a/sys/amd64/include/_types.h
+++ b/sys/amd64/include/_types.h
@@ -61,7 +61,6 @@ typedef unsigned long __uint64_t;
* Standard type definitions.
*/
typedef __int32_t __clock_t; /* clock()... */
-typedef unsigned int __cpumask_t;
typedef __int64_t __critical_t;
typedef double __double_t;
typedef float __float_t;
diff --git a/sys/amd64/include/pmap.h b/sys/amd64/include/pmap.h
index 7a62851..1b8108a 100644
--- a/sys/amd64/include/pmap.h
+++ b/sys/amd64/include/pmap.h
@@ -152,6 +152,7 @@
#ifndef LOCORE
#include <sys/queue.h>
+#include <sys/_cpuset.h>
#include <sys/_lock.h>
#include <sys/_mutex.h>
@@ -251,7 +252,7 @@ struct pmap {
struct mtx pm_mtx;
pml4_entry_t *pm_pml4; /* KVA of level 4 page table */
TAILQ_HEAD(,pv_chunk) pm_pvchunk; /* list of mappings in pmap */
- cpumask_t pm_active; /* active on cpus */
+ cpuset_t pm_active; /* active on cpus */
/* spare u_int here due to padding */
struct pmap_statistics pm_stats; /* pmap statistics */
vm_page_t pm_root; /* spare page table pages */
diff --git a/sys/amd64/include/smp.h b/sys/amd64/include/smp.h
index ec107f9..de686b7 100644
--- a/sys/amd64/include/smp.h
+++ b/sys/amd64/include/smp.h
@@ -63,17 +63,16 @@ void ipi_all_but_self(u_int ipi);
void ipi_bitmap_handler(struct trapframe frame);
void ipi_cpu(int cpu, u_int ipi);
int ipi_nmi_handler(void);
-void ipi_selected(cpumask_t cpus, u_int ipi);
+void ipi_selected(cpuset_t cpus, u_int ipi);
u_int mp_bootaddress(u_int);
-int mp_grab_cpu_hlt(void);
void smp_cache_flush(void);
void smp_invlpg(vm_offset_t addr);
-void smp_masked_invlpg(cpumask_t mask, vm_offset_t addr);
+void smp_masked_invlpg(cpuset_t mask, vm_offset_t addr);
void smp_invlpg_range(vm_offset_t startva, vm_offset_t endva);
-void smp_masked_invlpg_range(cpumask_t mask, vm_offset_t startva,
+void smp_masked_invlpg_range(cpuset_t mask, vm_offset_t startva,
vm_offset_t endva);
void smp_invltlb(void);
-void smp_masked_invltlb(cpumask_t mask);
+void smp_masked_invltlb(cpuset_t mask);
#endif /* !LOCORE */
#endif /* SMP */
diff --git a/sys/arm/arm/pmap.c b/sys/arm/arm/pmap.c
index 087a744..cecf363 100644
--- a/sys/arm/arm/pmap.c
+++ b/sys/arm/arm/pmap.c
@@ -2395,7 +2395,7 @@ pmap_bootstrap(vm_offset_t firstaddr, vm_offset_t lastaddr, struct pv_addr *l1pt
cpu_cpwait();
PMAP_LOCK_INIT(kernel_pmap);
- kernel_pmap->pm_active = -1;
+ CPU_FILL(&kernel_pmap->pm_active);
kernel_pmap->pm_domain = PMAP_DOMAIN_KERNEL;
TAILQ_INIT(&kernel_pmap->pm_pvlist);
@@ -3826,7 +3826,7 @@ pmap_pinit(pmap_t pmap)
pmap_alloc_l1(pmap);
bzero(pmap->pm_l2, sizeof(pmap->pm_l2));
- pmap->pm_active = 0;
+ CPU_ZERO(&pmap->pm_active);
TAILQ_INIT(&pmap->pm_pvlist);
bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
diff --git a/sys/arm/include/_types.h b/sys/arm/include/_types.h
index 48dd2a7..d8386f3 100644
--- a/sys/arm/include/_types.h
+++ b/sys/arm/include/_types.h
@@ -67,7 +67,6 @@ typedef unsigned long long __uint64_t;
* Standard type definitions.
*/
typedef __uint32_t __clock_t; /* clock()... */
-typedef unsigned int __cpumask_t;
typedef __int32_t __critical_t;
typedef double __double_t;
typedef double __float_t;
diff --git a/sys/arm/include/pmap.h b/sys/arm/include/pmap.h
index 701390a..3d63432 100644
--- a/sys/arm/include/pmap.h
+++ b/sys/arm/include/pmap.h
@@ -62,6 +62,7 @@
#ifndef LOCORE
#include <sys/queue.h>
+#include <sys/_cpuset.h>
#include <sys/_lock.h>
#include <sys/_mutex.h>
@@ -134,7 +135,7 @@ struct pmap {
struct l1_ttable *pm_l1;
struct l2_dtable *pm_l2[L2_SIZE];
pd_entry_t *pm_pdir; /* KVA of page directory */
- cpumask_t pm_active; /* active on cpus */
+ cpuset_t pm_active; /* active on cpus */
struct pmap_statistics pm_stats; /* pmap statictics */
TAILQ_HEAD(,pv_entry) pm_pvlist; /* list of mappings in pmap */
};
diff --git a/sys/boot/ia64/common/Makefile b/sys/boot/ia64/common/Makefile
index f16f13d..d90898f 100644
--- a/sys/boot/ia64/common/Makefile
+++ b/sys/boot/ia64/common/Makefile
@@ -6,7 +6,7 @@ MK_SSP= no
LIB= ia64
INTERNALLIB=
-SRCS= autoload.c bootinfo.c copy.c devicename.c exec.c
+SRCS= autoload.c bootinfo.c copy.c devicename.c exec.c icache.c
CFLAGS+= -I${.CURDIR}/../../efi/include
CFLAGS+= -I${.CURDIR}/../../efi/include/${MACHINE_CPUARCH}
diff --git a/sys/boot/ia64/common/exec.c b/sys/boot/ia64/common/exec.c
index dd9c9ba..65886fa 100644
--- a/sys/boot/ia64/common/exec.c
+++ b/sys/boot/ia64/common/exec.c
@@ -258,6 +258,8 @@ ia64_loadseg(Elf_Ehdr *eh, Elf_Phdr *ph, uint64_t delta)
if (ph->p_flags & PF_X) {
ia64_text_start = ph->p_vaddr + delta;
ia64_text_size = ph->p_memsz;
+
+ ia64_sync_icache(ia64_text_start, ia64_text_size);
} else {
ia64_data_start = ph->p_vaddr + delta;
ia64_data_size = ph->p_memsz;
diff --git a/sys/boot/ia64/common/icache.c b/sys/boot/ia64/common/icache.c
new file mode 100644
index 0000000..77a35d7
--- /dev/null
+++ b/sys/boot/ia64/common/icache.c
@@ -0,0 +1,51 @@
+/*-
+ * Copyright (c) 2011 Marcel Moolenaar
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <stand.h>
+#include <machine/ia64_cpu.h>
+
+#include "libia64.h"
+
+void
+ia64_sync_icache(vm_offset_t va, size_t sz)
+{
+ uintptr_t pa;
+ size_t cnt, max;
+
+ while (sz > 0) {
+ max = sz;
+ pa = (uintptr_t)ia64_va2pa(va, &max);
+ for (cnt = 0; cnt < max; cnt += 32)
+ ia64_fc_i(pa + cnt);
+ ia64_sync_i();
+ va += max;
+ sz -= max;
+ }
+ ia64_srlz_i();
+}
diff --git a/sys/boot/ia64/common/libia64.h b/sys/boot/ia64/common/libia64.h
index 29912f5..4bc7638 100644
--- a/sys/boot/ia64/common/libia64.h
+++ b/sys/boot/ia64/common/libia64.h
@@ -64,6 +64,7 @@ void ia64_loadseg(void *, void *, uint64_t);
ssize_t ia64_copyin(const void *, vm_offset_t, size_t);
ssize_t ia64_copyout(vm_offset_t, void *, size_t);
+void ia64_sync_icache(vm_offset_t, size_t);
ssize_t ia64_readin(int, vm_offset_t, size_t);
void *ia64_va2pa(vm_offset_t, size_t *);
diff --git a/sys/boot/ia64/efi/efimd.c b/sys/boot/ia64/efi/efimd.c
index 0f7f02a..0b29e12 100644
--- a/sys/boot/ia64/efi/efimd.c
+++ b/sys/boot/ia64/efi/efimd.c
@@ -230,3 +230,35 @@ ia64_platform_enter(const char *kernel)
return (0);
}
+
+COMMAND_SET(pbvm, "pbvm", "show PBVM details", command_pbvm);
+
+static int
+command_pbvm(int argc, char *argv[])
+{
+ uint64_t limit, pg, start;
+ u_int idx;
+
+ printf("Page table @ %p, size %x\n", ia64_pgtbl, ia64_pgtblsz);
+
+ if (ia64_pgtbl == NULL)
+ return (0);
+
+ limit = ~0;
+ start = ~0;
+ idx = 0;
+ while (ia64_pgtbl[idx] != 0) {
+ pg = ia64_pgtbl[idx];
+ if (pg != limit) {
+ if (start != ~0)
+ printf("%#lx-%#lx\n", start, limit);
+ start = pg;
+ }
+ limit = pg + IA64_PBVM_PAGE_SIZE;
+ idx++;
+ }
+ if (start != ~0)
+ printf("%#lx-%#lx\n", start, limit);
+
+ return (0);
+}
diff --git a/sys/boot/ia64/efi/main.c b/sys/boot/ia64/efi/main.c
index 485a26d..ec12b42 100644
--- a/sys/boot/ia64/efi/main.c
+++ b/sys/boot/ia64/efi/main.c
@@ -153,9 +153,7 @@ main(int argc, CHAR16 *argv[])
*/
cons_probe();
- printf("\n");
- printf("%s, Revision %s\n", bootprog_name, bootprog_rev);
- printf("(%s, %s)\n", bootprog_maker, bootprog_date);
+ printf("\n%s, Revision %s\n", bootprog_name, bootprog_rev);
find_pal_proc();
@@ -214,6 +212,18 @@ static int
command_quit(int argc, char *argv[])
{
exit(0);
+ /* NOTREACHED */
+ return (CMD_OK);
+}
+
+COMMAND_SET(reboot, "reboot", "reboot the system", command_reboot);
+
+static int
+command_reboot(int argc, char *argv[])
+{
+
+ RS->ResetSystem(EfiResetWarm, EFI_SUCCESS, 0, NULL);
+ /* NOTREACHED */
return (CMD_OK);
}
@@ -585,3 +595,24 @@ command_hcdp(int argc, char *argv[])
printf("<EOT>\n");
return (CMD_OK);
}
+
+COMMAND_SET(about, "about", "about the loader", command_about);
+
+extern uint64_t _start_plabel[];
+
+static int
+command_about(int argc, char *argv[])
+{
+ EFI_LOADED_IMAGE *img;
+
+ printf("%s\n", bootprog_name);
+ printf("revision %s\n", bootprog_rev);
+ printf("built by %s\n", bootprog_maker);
+ printf("built on %s\n", bootprog_date);
+
+ printf("\n");
+
+ BS->HandleProtocol(IH, &imgid, (VOID**)&img);
+ printf("image loaded at %p\n", img->ImageBase);
+ printf("entry at %#lx (%#lx)\n", _start_plabel[0], _start_plabel[1]);
+}
diff --git a/sys/boot/ia64/efi/version b/sys/boot/ia64/efi/version
index 3a947c8..17d14ea 100644
--- a/sys/boot/ia64/efi/version
+++ b/sys/boot/ia64/efi/version
@@ -3,6 +3,8 @@ $FreeBSD$
NOTE ANY CHANGES YOU MAKE TO THE BOOTBLOCKS HERE. The format of this
file is important. Make sure the current version number is on line 6.
+3.1: Add the about, reboot and pbvm commands.
+ I-cache coherency is maintained.
3.0: Add support for PBVM.
2.2: Create direct mapping based on start address instead of mapping
first 256M.
diff --git a/sys/cddl/compat/opensolaris/sys/atomic.h b/sys/cddl/compat/opensolaris/sys/atomic.h
index af9cc5d..f34d77e 100644
--- a/sys/cddl/compat/opensolaris/sys/atomic.h
+++ b/sys/cddl/compat/opensolaris/sys/atomic.h
@@ -40,8 +40,6 @@
extern void atomic_add_64(volatile uint64_t *target, int64_t delta);
extern void atomic_dec_64(volatile uint64_t *target);
#endif
-#ifndef __LP64__
-#endif
#ifndef __sparc64__
extern uint32_t atomic_cas_32(volatile uint32_t *target, uint32_t cmp,
uint32_t newval);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_history.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_history.c
index 942636b..130c918 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_history.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_history.c
@@ -500,9 +500,11 @@ spa_history_log_version(spa_t *spa, history_internal_events_t event)
utsname.nodename, utsname.release, utsname.version,
utsname.machine);
}
+#if 0
cmn_err(CE_CONT, "!%s version %llu pool %s using %llu",
event == LOG_POOL_IMPORT ? "imported" :
event == LOG_POOL_CREATE ? "created" : "accessed",
(u_longlong_t)current_vers, spa_name(spa), SPA_VERSION);
#endif
+#endif
}
diff --git a/sys/cddl/dev/cyclic/i386/cyclic_machdep.c b/sys/cddl/dev/cyclic/i386/cyclic_machdep.c
index 6f93663..9ba2fd3 100644
--- a/sys/cddl/dev/cyclic/i386/cyclic_machdep.c
+++ b/sys/cddl/dev/cyclic/i386/cyclic_machdep.c
@@ -123,7 +123,9 @@ reprogram(cyb_arg_t arg __unused, hrtime_t exp)
static void xcall(cyb_arg_t arg __unused, cpu_t *c, cyc_func_t func,
void *param)
{
+ cpuset_t cpus;
- smp_rendezvous_cpus((cpumask_t)1 << c->cpuid,
+ CPU_SETOF(c->cpuid, &cpus);
+ smp_rendezvous_cpus(cpus,
smp_no_rendevous_barrier, func, smp_no_rendevous_barrier, param);
}
diff --git a/sys/cddl/dev/dtrace/amd64/dtrace_subr.c b/sys/cddl/dev/dtrace/amd64/dtrace_subr.c
index a081f67..0b86eac 100644
--- a/sys/cddl/dev/dtrace/amd64/dtrace_subr.c
+++ b/sys/cddl/dev/dtrace/amd64/dtrace_subr.c
@@ -113,12 +113,12 @@ dtrace_toxic_ranges(void (*func)(uintptr_t base, uintptr_t limit))
void
dtrace_xcall(processorid_t cpu, dtrace_xcall_t func, void *arg)
{
- cpumask_t cpus;
+ cpuset_t cpus;
if (cpu == DTRACE_CPUALL)
cpus = all_cpus;
else
- cpus = (cpumask_t)1 << cpu;
+ CPU_SETOF(cpu, &cpus);
smp_rendezvous_cpus(cpus, smp_no_rendevous_barrier, func,
smp_no_rendevous_barrier, arg);
@@ -374,7 +374,7 @@ dtrace_gethrtime_init(void *arg)
{
struct pcpu *pc;
uint64_t tsc_f;
- cpumask_t map;
+ cpuset_t map;
int i;
/*
@@ -412,7 +412,8 @@ dtrace_gethrtime_init(void *arg)
continue;
pc = pcpu_find(i);
- map = PCPU_GET(cpumask) | pc->pc_cpumask;
+ map = PCPU_GET(cpumask);
+ CPU_OR(&map, &pc->pc_cpumask);
smp_rendezvous_cpus(map, NULL,
dtrace_gethrtime_init_cpu,
diff --git a/sys/cddl/dev/dtrace/i386/dtrace_subr.c b/sys/cddl/dev/dtrace/i386/dtrace_subr.c
index 2753ffc..412fc38 100644
--- a/sys/cddl/dev/dtrace/i386/dtrace_subr.c
+++ b/sys/cddl/dev/dtrace/i386/dtrace_subr.c
@@ -30,6 +30,7 @@
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/types.h>
+#include <sys/cpuset.h>
#include <sys/kernel.h>
#include <sys/malloc.h>
#include <sys/kmem.h>
@@ -113,12 +114,12 @@ dtrace_toxic_ranges(void (*func)(uintptr_t base, uintptr_t limit))
void
dtrace_xcall(processorid_t cpu, dtrace_xcall_t func, void *arg)
{
- cpumask_t cpus;
+ cpuset_t cpus;
if (cpu == DTRACE_CPUALL)
cpus = all_cpus;
else
- cpus = (cpumask_t)1 << cpu;
+ CPU_SETOF(cpu, &cpus);
smp_rendezvous_cpus(cpus, smp_no_rendevous_barrier, func,
smp_no_rendevous_barrier, arg);
@@ -372,9 +373,9 @@ dtrace_gethrtime_init_cpu(void *arg)
static void
dtrace_gethrtime_init(void *arg)
{
+ cpuset_t map;
struct pcpu *pc;
uint64_t tsc_f;
- cpumask_t map;
int i;
/*
@@ -412,7 +413,8 @@ dtrace_gethrtime_init(void *arg)
continue;
pc = pcpu_find(i);
- map = PCPU_GET(cpumask) | pc->pc_cpumask;
+ map = PCPU_GET(cpumask);
+ CPU_OR(&map, &pc->pc_cpumask);
smp_rendezvous_cpus(map, NULL,
dtrace_gethrtime_init_cpu,
diff --git a/sys/conf/NOTES b/sys/conf/NOTES
index 94311c6..b84d0c5 100644
--- a/sys/conf/NOTES
+++ b/sys/conf/NOTES
@@ -432,7 +432,10 @@ options KTRACE_REQUEST_POOL=101
# defined by the KTR_* constants in <sys/ktr.h>. KTR_MASK defines the
# initial value of the ktr_mask variable which determines at runtime
# what events to trace. KTR_CPUMASK determines which CPU's log
-# events, with bit X corresponding to CPU X. KTR_VERBOSE enables
+# events, with bit X corresponding to CPU X. The layout of the string
+# passed as KTR_CPUMASK must match a serie of bitmasks each of them
+# separated by the ", " characters (ie:
+# KTR_CPUMASK=("0xAF, 0xFFFFFFFFFFFFFFFF")). KTR_VERBOSE enables
# dumping of KTR events to the console by default. This functionality
# can be toggled via the debug.ktr_verbose sysctl and defaults to off
# if KTR_VERBOSE is not defined. See ktr(4) and ktrdump(8) for details.
@@ -441,7 +444,7 @@ options KTR
options KTR_ENTRIES=1024
options KTR_COMPILE=(KTR_INTR|KTR_PROC)
options KTR_MASK=KTR_INTR
-options KTR_CPUMASK=0x3
+options KTR_CPUMASK=("0x3")
options KTR_VERBOSE
#
diff --git a/sys/conf/files b/sys/conf/files
index 59286a5..d654c6f 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -2748,6 +2748,7 @@ netinet/ip_gre.c optional gre inet
netinet/ip_id.c optional inet
netinet/in_mcast.c optional inet
netinet/in_pcb.c optional inet | inet6
+netinet/in_pcbgroup.c optional inet pcbgroup | inet6 pcbgroup
netinet/in_proto.c optional inet | inet6 \
compile-with "${NORMAL_C} -I$S/contrib/pf"
netinet/in_rmx.c optional inet
@@ -2825,6 +2826,7 @@ netinet6/in6_gif.c optional gif inet6 | netgraph_gif inet6
netinet6/in6_ifattach.c optional inet6
netinet6/in6_mcast.c optional inet6
netinet6/in6_pcb.c optional inet6
+netinet6/in6_pcbgroup.c optional inet6 pcbgroup
netinet6/in6_proto.c optional inet6
netinet6/in6_rmx.c optional inet6
netinet6/in6_src.c optional inet6
diff --git a/sys/conf/options b/sys/conf/options
index a608d86..ee696a8 100644
--- a/sys/conf/options
+++ b/sys/conf/options
@@ -419,6 +419,7 @@ MROUTING opt_mrouting.h
NCP
NETATALK opt_atalk.h
NFSLOCKD
+PCBGROUP opt_pcbgroup.h
RADIX_MPATH opt_mpath.h
ROUTETABLES opt_route.h
SLIP_IFF_OPTS opt_slip.h
diff --git a/sys/ddb/db_command.c b/sys/ddb/db_command.c
index 21cb7c5..f2e2c42 100644
--- a/sys/ddb/db_command.c
+++ b/sys/ddb/db_command.c
@@ -44,6 +44,7 @@ __FBSDID("$FreeBSD$");
#include <sys/signalvar.h>
#include <sys/systm.h>
#include <sys/cons.h>
+#include <sys/conf.h>
#include <sys/watchdog.h>
#include <sys/kernel.h>
@@ -64,6 +65,7 @@ db_addr_t db_last_addr;
db_addr_t db_prev;
db_addr_t db_next;
+static db_cmdfcn_t db_dump;
static db_cmdfcn_t db_fncall;
static db_cmdfcn_t db_gdb;
static db_cmdfcn_t db_halt;
@@ -102,6 +104,7 @@ static struct command db_cmds[] = {
{ "w", db_write_cmd, CS_MORE|CS_SET_DOT, 0 },
{ "delete", db_delete_cmd, 0, 0 },
{ "d", db_delete_cmd, 0, 0 },
+ { "dump", db_dump, 0, 0 },
{ "break", db_breakpoint_cmd, 0, 0 },
{ "b", db_breakpoint_cmd, 0, 0 },
{ "dwatch", db_deletewatch_cmd, 0, 0 },
@@ -526,6 +529,27 @@ db_error(s)
kdb_reenter();
}
+static void
+db_dump(db_expr_t dummy, boolean_t dummy2, db_expr_t dummy3, char *dummy4)
+{
+ int error;
+
+ error = doadump(FALSE);
+ if (error) {
+ db_printf("Cannot dump: ");
+ switch (error) {
+ case EBUSY:
+ db_printf("debugger got invoked while dumping.\n");
+ break;
+ case ENXIO:
+ db_printf("no dump device specified.\n");
+ break;
+ default:
+ db_printf("unknown error (error=%d).\n", error);
+ break;
+ }
+ }
+}
/*
* Call random function:
diff --git a/sys/dev/amdsbwd/amdsbwd.c b/sys/dev/amdsbwd/amdsbwd.c
index f5f0f87..4256381 100644
--- a/sys/dev/amdsbwd/amdsbwd.c
+++ b/sys/dev/amdsbwd/amdsbwd.c
@@ -25,8 +25,8 @@
*/
/*
- * This is a driver for watchdog timer present in AMD SB600/SB7xx
- * south bridges and other watchdog timers advertised via WDRT ACPI table.
+ * This is a driver for watchdog timer present in AMD SB600/SB7xx/SB8xx
+ * southbridges.
* Please see the following specifications for the descriptions of the
* registers and flags:
* - AMD SB600 Register Reference Guide, Public Version, Rev. 3.03 (SB600 RRG)
@@ -35,11 +35,13 @@
* http://developer.amd.com/assets/43009_sb7xx_rrg_pub_1.00.pdf
* - AMD SB700/710/750 Register Programming Requirements (RPR)
* http://developer.amd.com/assets/42413_sb7xx_rpr_pub_1.00.pdf
+ * - AMD SB800-Series Southbridges Register Reference Guide (RRG)
+ * http://support.amd.com/us/Embedded_TechDocs/45482.pdf
* Please see the following for Watchdog Resource Table specification:
* - Watchdog Timer Hardware Requirements for Windows Server 2003 (WDRT)
* http://www.microsoft.com/whdc/system/sysinternals/watchdog.mspx
- * AMD SB600/SB7xx watchdog hardware seems to conform to the above,
- * but my system doesn't provide the table.
+ * AMD SB600/SB7xx/SB8xx watchdog hardware seems to conform to the above
+ * specifications, but the table hasn't been spotted in the wild yet.
*/
#include <sys/cdefs.h>
@@ -59,15 +61,15 @@ __FBSDID("$FreeBSD$");
#include <dev/pci/pcivar.h>
#include <isa/isavar.h>
-/* RRG 2.3.3.1.1, page 161. */
+/* SB7xx RRG 2.3.3.1.1. */
#define AMDSB_PMIO_INDEX 0xcd6
#define AMDSB_PMIO_DATA (PMIO_INDEX + 1)
#define AMDSB_PMIO_WIDTH 2
-/* RRG 2.3.3.2, page 181. */
+/* SB7xx RRG 2.3.3.2. */
#define AMDSB_PM_RESET_STATUS0 0x44
#define AMDSB_PM_RESET_STATUS1 0x45
#define AMDSB_WD_RST_STS 0x02
-/* RRG 2.3.3.2, page 188; RPR 2.36, page 30. */
+/* SB7xx RRG 2.3.3.2, RPR 2.36. */
#define AMDSB_PM_WDT_CTRL 0x69
#define AMDSB_WDT_DISABLE 0x01
#define AMDSB_WDT_RES_MASK (0x02 | 0x04)
@@ -77,7 +79,18 @@ __FBSDID("$FreeBSD$");
#define AMDSB_WDT_RES_1S 0x06
#define AMDSB_PM_WDT_BASE_LSB 0x6c
#define AMDSB_PM_WDT_BASE_MSB 0x6f
-/* RRG 2.3.4, page 223, WDRT. */
+/* SB8xx RRG 2.3.3. */
+#define AMDSB8_PM_WDT_EN 0x48
+#define AMDSB8_WDT_DEC_EN 0x01
+#define AMDSB8_WDT_DISABLE 0x02
+#define AMDSB8_PM_WDT_CTRL 0x4c
+#define AMDSB8_WDT_32KHZ 0x00
+#define AMDSB8_WDT_1HZ 0x03
+#define AMDSB8_WDT_RES_MASK 0x03
+#define AMDSB8_PM_RESET_STATUS0 0xC0
+#define AMDSB8_PM_RESET_STATUS1 0xC1
+#define AMDSB8_WD_RST_STS 0x20
+/* SB7xx RRG 2.3.4, WDRT. */
#define AMDSB_WD_CTRL 0x00
#define AMDSB_WD_RUN 0x01
#define AMDSB_WD_FIRED 0x02
@@ -90,8 +103,9 @@ __FBSDID("$FreeBSD$");
#define AMDSB_WDIO_REG_WIDTH 4
/* WDRT */
#define MAXCOUNT_MIN_VALUE 511
-/* RRG 2.3.1.1, page 122; SB600 RRG 2.3.1.1, page 97. */
-#define AMDSB7xx_SMBUS_DEVID 0x43851002
+/* SB7xx RRG 2.3.1.1, SB600 RRG 2.3.1.1, SB8xx RRG 2.3.1. */
+#define AMDSB_SMBUS_DEVID 0x43851002
+#define AMDSB8_SMBUS_REVID 0x40
#define amdsbwd_verbose_printf(dev, ...) \
do { \
@@ -265,7 +279,7 @@ amdsbwd_identify(driver_t *driver, device_t parent)
smb_dev = pci_find_bsf(0, 20, 0);
if (smb_dev == NULL)
return;
- if (pci_get_devid(smb_dev) != AMDSB7xx_SMBUS_DEVID)
+ if (pci_get_devid(smb_dev) != AMDSB_SMBUS_DEVID)
return;
child = BUS_ADD_CHILD(parent, ISA_ORDER_SPECULATIVE, "amdsbwd", -1);
@@ -273,15 +287,102 @@ amdsbwd_identify(driver_t *driver, device_t parent)
device_printf(parent, "add amdsbwd child failed\n");
}
+
+static void
+amdsbwd_probe_sb7xx(device_t dev, struct resource *pmres, uint32_t *addr)
+{
+ uint32_t val;
+ int i;
+
+ /* Report cause of previous reset for user's convenience. */
+ val = pmio_read(pmres, AMDSB_PM_RESET_STATUS0);
+ if (val != 0)
+ amdsbwd_verbose_printf(dev, "ResetStatus0 = %#04x\n", val);
+ val = pmio_read(pmres, AMDSB_PM_RESET_STATUS1);
+ if (val != 0)
+ amdsbwd_verbose_printf(dev, "ResetStatus1 = %#04x\n", val);
+ if ((val & AMDSB_WD_RST_STS) != 0)
+ device_printf(dev, "Previous Reset was caused by Watchdog\n");
+
+ /* Find base address of memory mapped WDT registers. */
+ for (*addr = 0, i = 0; i < 4; i++) {
+ *addr <<= 8;
+ *addr |= pmio_read(pmres, AMDSB_PM_WDT_BASE_MSB - i);
+ }
+ /* Set watchdog timer tick to 1s. */
+ val = pmio_read(pmres, AMDSB_PM_WDT_CTRL);
+ val &= ~AMDSB_WDT_RES_MASK;
+ val |= AMDSB_WDT_RES_10MS;
+ pmio_write(pmres, AMDSB_PM_WDT_CTRL, val);
+
+ /* Enable watchdog device (in stopped state). */
+ val = pmio_read(pmres, AMDSB_PM_WDT_CTRL);
+ val &= ~AMDSB_WDT_DISABLE;
+ pmio_write(pmres, AMDSB_PM_WDT_CTRL, val);
+
+ /*
+ * XXX TODO: Ensure that watchdog decode is enabled
+ * (register 0x41, bit 3).
+ */
+ device_set_desc(dev, "AMD SB600/SB7xx Watchdog Timer");
+}
+
+static void
+amdsbwd_probe_sb8xx(device_t dev, struct resource *pmres, uint32_t *addr)
+{
+ uint32_t val;
+ int i;
+
+ /* Report cause of previous reset for user's convenience. */
+ val = pmio_read(pmres, AMDSB8_PM_RESET_STATUS0);
+ if (val != 0)
+ amdsbwd_verbose_printf(dev, "ResetStatus0 = %#04x\n", val);
+ val = pmio_read(pmres, AMDSB8_PM_RESET_STATUS1);
+ if (val != 0)
+ amdsbwd_verbose_printf(dev, "ResetStatus1 = %#04x\n", val);
+ if ((val & AMDSB8_WD_RST_STS) != 0)
+ device_printf(dev, "Previous Reset was caused by Watchdog\n");
+
+ /* Find base address of memory mapped WDT registers. */
+ for (*addr = 0, i = 0; i < 4; i++) {
+ *addr <<= 8;
+ *addr |= pmio_read(pmres, AMDSB8_PM_WDT_EN + 3 - i);
+ }
+ *addr &= ~0x07u;
+
+ /* Set watchdog timer tick to 1s. */
+ val = pmio_read(pmres, AMDSB8_PM_WDT_CTRL);
+ val &= ~AMDSB8_WDT_RES_MASK;
+ val |= AMDSB8_WDT_1HZ;
+ pmio_write(pmres, AMDSB8_PM_WDT_CTRL, val);
+#ifdef AMDSBWD_DEBUG
+ val = pmio_read(pmres, AMDSB8_PM_WDT_CTRL);
+ amdsbwd_verbose_printf(dev, "AMDSB8_PM_WDT_CTRL value = %#02x\n", val);
+#endif
+
+ /*
+ * Enable watchdog device (in stopped state)
+ * and decoding of its address.
+ */
+ val = pmio_read(pmres, AMDSB8_PM_WDT_EN);
+ val &= ~AMDSB8_WDT_DISABLE;
+ val |= AMDSB8_WDT_DEC_EN;
+ pmio_write(pmres, AMDSB8_PM_WDT_EN, val);
+#ifdef AMDSBWD_DEBUG
+ val = pmio_read(pmres, AMDSB8_PM_WDT_EN);
+ device_printf(dev, "AMDSB8_PM_WDT_EN value = %#02x\n", val);
+#endif
+ device_set_desc(dev, "AMD SB8xx Watchdog Timer");
+}
+
static int
amdsbwd_probe(device_t dev)
{
struct resource *res;
+ device_t smb_dev;
uint32_t addr;
- uint32_t val;
int rid;
int rc;
- int i;
/* Do not claim some ISA PnP device by accident. */
if (isa_get_logicalid(dev) != 0)
@@ -301,21 +402,16 @@ amdsbwd_probe(device_t dev)
return (ENXIO);
}
- /* Report cause of previous reset for user's convenience. */
- val = pmio_read(res, AMDSB_PM_RESET_STATUS0);
- if (val != 0)
- amdsbwd_verbose_printf(dev, "ResetStatus0 = %#04x\n", val);
- val = pmio_read(res, AMDSB_PM_RESET_STATUS1);
- if (val != 0)
- amdsbwd_verbose_printf(dev, "ResetStatus1 = %#04x\n", val);
- if ((val & AMDSB_WD_RST_STS) != 0)
- device_printf(dev, "Previous Reset was caused by Watchdog\n");
+ smb_dev = pci_find_bsf(0, 20, 0);
+ KASSERT(smb_dev != NULL, ("can't find SMBus PCI device\n"));
+ if (pci_get_revid(smb_dev) < AMDSB8_SMBUS_REVID)
+ amdsbwd_probe_sb7xx(dev, res, &addr);
+ else
+ amdsbwd_probe_sb8xx(dev, res, &addr);
+
+ bus_release_resource(dev, SYS_RES_IOPORT, rid, res);
+ bus_delete_resource(dev, SYS_RES_IOPORT, rid);
- /* Find base address of memory mapped WDT registers. */
- for (addr = 0, i = 0; i < 4; i++) {
- addr <<= 8;
- addr |= pmio_read(res, AMDSB_PM_WDT_BASE_MSB - i);
- }
amdsbwd_verbose_printf(dev, "memory base address = %#010x\n", addr);
rc = bus_set_resource(dev, SYS_RES_MEMORY, 0, addr + AMDSB_WD_CTRL,
AMDSB_WDIO_REG_WIDTH);
@@ -330,36 +426,25 @@ amdsbwd_probe(device_t dev)
return (ENXIO);
}
- /* Set watchdog timer tick to 10ms. */
- val = pmio_read(res, AMDSB_PM_WDT_CTRL);
- val &= ~AMDSB_WDT_RES_MASK;
- val |= AMDSB_WDT_RES_10MS;
- pmio_write(res, AMDSB_PM_WDT_CTRL, val);
-
- /* Enable watchdog device (in stopped state). */
- val = pmio_read(res, AMDSB_PM_WDT_CTRL);
- val &= ~AMDSB_WDT_DISABLE;
- pmio_write(res, AMDSB_PM_WDT_CTRL, val);
-
- /*
- * XXX TODO: Ensure that watchdog decode is enabled
- * (register 0x41, bit 3).
- */
- bus_release_resource(dev, SYS_RES_IOPORT, rid, res);
- bus_delete_resource(dev, SYS_RES_IOPORT, rid);
-
- device_set_desc(dev, "AMD SB600/SB7xx Watchdog Timer");
return (0);
}
static int
amdsbwd_attach_sb(device_t dev, struct amdsbwd_softc *sc)
{
+ device_t smb_dev;
+
sc->max_ticks = UINT16_MAX;
- sc->ms_per_tick = 10;
sc->rid_ctrl = 0;
sc->rid_count = 1;
+ smb_dev = pci_find_bsf(0, 20, 0);
+ KASSERT(smb_dev != NULL, ("can't find SMBus PCI device\n"));
+ if (pci_get_revid(smb_dev) < AMDSB8_SMBUS_REVID)
+ sc->ms_per_tick = 10;
+ else
+ sc->ms_per_tick = 1000;
+
sc->res_ctrl = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
&sc->rid_ctrl, RF_ACTIVE);
if (sc->res_ctrl == NULL) {
@@ -388,6 +473,11 @@ amdsbwd_attach(device_t dev)
if (rc != 0)
goto fail;
+#ifdef AMDSBWD_DEBUG
+ device_printf(dev, "wd ctrl = %#04x\n", wdctrl_read(sc));
+ device_printf(dev, "wd count = %#04x\n", wdcount_read(sc));
+#endif
+
/* Setup initial state of Watchdog Control. */
wdctrl_write(sc, AMDSB_WD_FIRED);
diff --git a/sys/dev/ath/ath_hal/ah.h b/sys/dev/ath/ath_hal/ah.h
index 165d919..7a01be3 100644
--- a/sys/dev/ath/ath_hal/ah.h
+++ b/sys/dev/ath/ath_hal/ah.h
@@ -745,6 +745,17 @@ typedef enum {
HAL_QUIET_ADD_SWBA_RESP_TIME = 0x4, /* add beacon response time to next_start offset */
} HAL_QUIET_FLAG;
+#define HAL_DFS_EVENT_PRICH 0x0000001
+
+struct dfs_event {
+ uint64_t re_full_ts; /* 64-bit full timestamp from interrupt time */
+ uint32_t re_ts; /* Original 15 bit recv timestamp */
+ uint8_t re_rssi; /* rssi of radar event */
+ uint8_t re_dur; /* duration of radar pulse */
+ uint32_t re_flags; /* Flags (see above) */
+};
+typedef struct dfs_event HAL_DFS_EVENT;
+
/*
* Hardware Access Layer (HAL) API.
*
@@ -928,6 +939,9 @@ struct ath_hal {
HAL_PHYERR_PARAM *pe);
void __ahdecl(*ah_getDfsThresh)(struct ath_hal *ah,
HAL_PHYERR_PARAM *pe);
+ HAL_BOOL __ahdecl(*ah_procRadarEvent)(struct ath_hal *ah,
+ struct ath_rx_status *rxs, uint64_t fulltsf,
+ const char *buf, HAL_DFS_EVENT *event);
/* Key Cache Functions */
uint32_t __ahdecl(*ah_getKeyCacheSize)(struct ath_hal*);
diff --git a/sys/dev/ath/ath_hal/ar5212/ar5212.h b/sys/dev/ath/ath_hal/ar5212/ar5212.h
index 16394a3..8503a62 100644
--- a/sys/dev/ath/ath_hal/ar5212/ar5212.h
+++ b/sys/dev/ath/ath_hal/ar5212/ar5212.h
@@ -622,5 +622,8 @@ extern HAL_BOOL ar5212IsNFCalInProgress(struct ath_hal *ah);
extern HAL_BOOL ar5212WaitNFCalComplete(struct ath_hal *ah, int i);
extern void ar5212EnableDfs(struct ath_hal *ah, HAL_PHYERR_PARAM *pe);
extern void ar5212GetDfsThresh(struct ath_hal *ah, HAL_PHYERR_PARAM *pe);
+extern HAL_BOOL ar5212ProcessRadarEvent(struct ath_hal *ah,
+ struct ath_rx_status *rxs, uint64_t fulltsf, const char *buf,
+ HAL_DFS_EVENT *event);
#endif /* _ATH_AR5212_H_ */
diff --git a/sys/dev/ath/ath_hal/ar5212/ar5212_attach.c b/sys/dev/ath/ath_hal/ar5212/ar5212_attach.c
index 5999a60..8e7f3cb 100644
--- a/sys/dev/ath/ath_hal/ar5212/ar5212_attach.c
+++ b/sys/dev/ath/ath_hal/ar5212/ar5212_attach.c
@@ -132,6 +132,7 @@ static const struct ath_hal_private ar5212hal = {{
/* DFS Functions */
.ah_enableDfs = ar5212EnableDfs,
.ah_getDfsThresh = ar5212GetDfsThresh,
+ .ah_procRadarEvent = ar5212ProcessRadarEvent,
/* Key Cache Functions */
.ah_getKeyCacheSize = ar5212GetKeyCacheSize,
diff --git a/sys/dev/ath/ath_hal/ar5212/ar5212_misc.c b/sys/dev/ath/ath_hal/ar5212/ar5212_misc.c
index 276671d..3a6019d 100644
--- a/sys/dev/ath/ath_hal/ar5212/ar5212_misc.c
+++ b/sys/dev/ath/ath_hal/ar5212/ar5212_misc.c
@@ -21,9 +21,7 @@
#include "ah.h"
#include "ah_internal.h"
#include "ah_devid.h"
-#ifdef AH_DEBUG
#include "ah_desc.h" /* NB: for HAL_PHYERR* */
-#endif
#include "ar5212/ar5212.h"
#include "ar5212/ar5212reg.h"
@@ -1180,3 +1178,47 @@ ar5212GetDfsThresh(struct ath_hal *ah, HAL_PHYERR_PARAM *pe)
pe->pe_extchannel = AH_FALSE;
}
+/*
+ * Process the radar phy error and extract the pulse duration.
+ */
+HAL_BOOL
+ar5212ProcessRadarEvent(struct ath_hal *ah, struct ath_rx_status *rxs,
+ uint64_t fulltsf, const char *buf, HAL_DFS_EVENT *event)
+{
+ uint8_t dur;
+ uint8_t rssi;
+
+ /* Check whether the given phy error is a radar event */
+ if ((rxs->rs_phyerr != HAL_PHYERR_RADAR) &&
+ (rxs->rs_phyerr != HAL_PHYERR_FALSE_RADAR_EXT))
+ return AH_FALSE;
+
+ /*
+ * The first byte is the pulse width - if there's
+ * no data, simply set the duration to 0
+ */
+ if (rxs->rs_datalen >= 1)
+ /* The pulse width is byte 0 of the data */
+ dur = ((uint8_t) buf[0]) & 0xff;
+ else
+ dur = 0;
+
+ /* Pulse RSSI is the normal reported RSSI */
+ rssi = (uint8_t) rxs->rs_rssi;
+
+ /* 0 duration/rssi is not a valid radar event */
+ if (dur == 0 && rssi == 0)
+ return AH_FALSE;
+
+ HALDEBUG(ah, HAL_DEBUG_DFS, "%s: rssi=%d, dur=%d\n",
+ __func__, rssi, dur);
+
+ /* Record the event */
+ event->re_full_ts = fulltsf;
+ event->re_ts = rxs->rs_tstamp;
+ event->re_rssi = rssi;
+ event->re_dur = dur;
+ event->re_flags = HAL_DFS_EVENT_PRICH;
+
+ return AH_TRUE;
+}
diff --git a/sys/dev/ath/ath_hal/ar5416/ar5416.h b/sys/dev/ath/ath_hal/ar5416/ar5416.h
index 510afe0..e5294b0 100644
--- a/sys/dev/ath/ath_hal/ar5416/ar5416.h
+++ b/sys/dev/ath/ath_hal/ar5416/ar5416.h
@@ -205,6 +205,9 @@ extern HAL_BOOL ar5416SetRifsDelay(struct ath_hal *ah,
const struct ieee80211_channel *chan, HAL_BOOL enable);
extern void ar5416EnableDfs(struct ath_hal *ah, HAL_PHYERR_PARAM *pe);
extern void ar5416GetDfsThresh(struct ath_hal *ah, HAL_PHYERR_PARAM *pe);
+extern HAL_BOOL ar5416ProcessRadarEvent(struct ath_hal *ah,
+ struct ath_rx_status *rxs, uint64_t fulltsf, const char *buf,
+ HAL_DFS_EVENT *event);
extern HAL_BOOL ar5416SetPowerMode(struct ath_hal *ah, HAL_POWER_MODE mode,
int setChip);
diff --git a/sys/dev/ath/ath_hal/ar5416/ar5416_attach.c b/sys/dev/ath/ath_hal/ar5416/ar5416_attach.c
index 22d05ff..e636325 100644
--- a/sys/dev/ath/ath_hal/ar5416/ar5416_attach.c
+++ b/sys/dev/ath/ath_hal/ar5416/ar5416_attach.c
@@ -147,6 +147,7 @@ ar5416InitState(struct ath_hal_5416 *ahp5416, uint16_t devid, HAL_SOFTC sc,
/* DFS Functions */
ah->ah_enableDfs = ar5416EnableDfs;
ah->ah_getDfsThresh = ar5416GetDfsThresh;
+ ah->ah_procRadarEvent = ar5416ProcessRadarEvent;
/* Power Management Functions */
ah->ah_setPowerMode = ar5416SetPowerMode;
diff --git a/sys/dev/ath/ath_hal/ar5416/ar5416_misc.c b/sys/dev/ath/ath_hal/ar5416/ar5416_misc.c
index 2c08730..2332656 100644
--- a/sys/dev/ath/ath_hal/ar5416/ar5416_misc.c
+++ b/sys/dev/ath/ath_hal/ar5416/ar5416_misc.c
@@ -692,3 +692,19 @@ ar5416EnableDfs(struct ath_hal *ah, HAL_PHYERR_PARAM *pe)
OS_REG_WRITE(ah, AR_PHY_RADAR_1, val);
}
}
+
+/*
+ * Extract the radar event information from the given phy error.
+ *
+ * Returns AH_TRUE if the phy error was actually a phy error,
+ * AH_FALSE if the phy error wasn't a phy error.
+ */
+HAL_BOOL
+ar5416ProcessRadarEvent(struct ath_hal *ah, struct ath_rx_status *rxs,
+ uint64_t fulltsf, const char *buf, HAL_DFS_EVENT *event)
+{
+ /*
+ * For now, this isn't implemented.
+ */
+ return AH_FALSE;
+}
diff --git a/sys/dev/ath/if_athvar.h b/sys/dev/ath/if_athvar.h
index 97666c5..3bc8522 100644
--- a/sys/dev/ath/if_athvar.h
+++ b/sys/dev/ath/if_athvar.h
@@ -709,6 +709,8 @@ void ath_intr(void *);
((*(_ah)->ah_enableDfs)((_ah), (_param)))
#define ath_hal_getdfsthresh(_ah, _param) \
((*(_ah)->ah_getDfsThresh)((_ah), (_param)))
+#define ath_hal_procradarevent(_ah, _rxs, _fulltsf, _buf, _event) \
+ ((*(_ah)->ah_procRadarEvent)((_ah), (_rxs), (_fulltsf), (_buf), (_event)))
#define ath_hal_gpioCfgOutput(_ah, _gpio, _type) \
((*(_ah)->ah_gpioCfgOutput)((_ah), (_gpio), (_type)))
diff --git a/sys/dev/atkbdc/atkbd.c b/sys/dev/atkbdc/atkbd.c
index 643554d..b7156cf 100644
--- a/sys/dev/atkbdc/atkbd.c
+++ b/sys/dev/atkbdc/atkbd.c
@@ -1097,10 +1097,8 @@ get_typematic(keyboard_t *kbd)
x86regs_t regs;
uint8_t *p;
- if (!(kbd->kb_config & KB_CONF_PROBE_TYPEMATIC))
- return (ENODEV);
-
- if (x86bios_get_intr(0x15) == 0 || x86bios_get_intr(0x16) == 0)
+ if (x86bios_get_intr(0x15) != 0xf000f859 ||
+ x86bios_get_intr(0x16) != 0xf000e82e)
return (ENODEV);
/* Is BIOS system configuration table supported? */
diff --git a/sys/dev/atkbdc/atkbdreg.h b/sys/dev/atkbdc/atkbdreg.h
index 3d54b4d..cf7ee6b 100644
--- a/sys/dev/atkbdc/atkbdreg.h
+++ b/sys/dev/atkbdc/atkbdreg.h
@@ -36,7 +36,6 @@
#define KB_CONF_NO_RESET (1 << 1) /* don't reset the keyboard */
#define KB_CONF_ALT_SCANCODESET (1 << 2) /* assume the XT type keyboard */
#define KB_CONF_NO_PROBE_TEST (1 << 3) /* don't test keyboard during probe */
-#define KB_CONF_PROBE_TYPEMATIC (1 << 4) /* probe keyboard typematic */
#ifdef _KERNEL
diff --git a/sys/dev/cardbus/cardbus_cis.c b/sys/dev/cardbus/cardbus_cis.c
index 2cfea19..3352a56 100644
--- a/sys/dev/cardbus/cardbus_cis.c
+++ b/sys/dev/cardbus/cardbus_cis.c
@@ -324,7 +324,7 @@ decode_tuple_bar(device_t cbdev, device_t child, int id,
* hint when the cardbus bridge is a child of pci0 (the main
* bus). The PC Card spec seems to indicate that this should
* only be done on x86 based machines, which suggests that on
- * non-x86 machines the adddresses can be anywhere. Since the
+ * non-x86 machines the addresses can be anywhere. Since the
* hardware can do it on non-x86 machines, it should be able
* to do it on x86 machines too. Therefore, we can and should
* ignore this hint. Furthermore, the PC Card spec recommends
@@ -430,7 +430,6 @@ cardbus_read_tuple_finish(device_t cbdev, device_t child, int rid,
{
if (res != CIS_CONFIG_SPACE) {
bus_release_resource(child, SYS_RES_MEMORY, rid, res);
- bus_delete_resource(child, SYS_RES_MEMORY, rid);
}
}
@@ -467,7 +466,7 @@ cardbus_read_tuple_init(device_t cbdev, device_t child, uint32_t *start,
}
/* allocate the memory space to read CIS */
- res = bus_alloc_resource(child, SYS_RES_MEMORY, rid, 0, ~0, 1,
+ res = bus_alloc_resource_any(child, SYS_RES_MEMORY, rid,
rman_make_alignment_flags(4096) | RF_ACTIVE);
if (res == NULL) {
device_printf(cbdev, "Unable to allocate resource "
diff --git a/sys/dev/hwpmc/hwpmc_mod.c b/sys/dev/hwpmc/hwpmc_mod.c
index d6225d8..4cfcea8 100644
--- a/sys/dev/hwpmc/hwpmc_mod.c
+++ b/sys/dev/hwpmc/hwpmc_mod.c
@@ -1991,7 +1991,7 @@ pmc_hook_handler(struct thread *td, int function, void *arg)
* had already processed the interrupt). We don't
* lose the interrupt sample.
*/
- atomic_clear_int(&pmc_cpumask, (1 << PCPU_GET(cpuid)));
+ CPU_CLR_ATOMIC(PCPU_GET(cpuid), &pmc_cpumask);
pmc_process_samples(PCPU_GET(cpuid));
break;
@@ -4083,7 +4083,7 @@ pmc_process_interrupt(int cpu, struct pmc *pm, struct trapframe *tf,
done:
/* mark CPU as needing processing */
- atomic_set_int(&pmc_cpumask, (1 << cpu));
+ CPU_SET_ATOMIC(cpu, &pmc_cpumask);
return (error);
}
@@ -4193,7 +4193,7 @@ pmc_process_samples(int cpu)
break;
if (ps->ps_nsamples == PMC_SAMPLE_INUSE) {
/* Need a rescan at a later time. */
- atomic_set_int(&pmc_cpumask, (1 << cpu));
+ CPU_SET_ATOMIC(cpu, &pmc_cpumask);
break;
}
@@ -4782,7 +4782,7 @@ pmc_cleanup(void)
PMCDBG(MOD,INI,0, "%s", "cleanup");
/* switch off sampling */
- pmc_cpumask = 0;
+ CPU_ZERO(&pmc_cpumask);
pmc_intr = NULL;
sx_xlock(&pmc_sx);
diff --git a/sys/dev/pccard/pccard.c b/sys/dev/pccard/pccard.c
index 00cd1dc..1de571c 100644
--- a/sys/dev/pccard/pccard.c
+++ b/sys/dev/pccard/pccard.c
@@ -1405,8 +1405,8 @@ pccard_ccr_read_impl(device_t brdev, device_t child, uint32_t offset,
struct pccard_ivar *devi = PCCARD_IVAR(child);
*val = pccard_ccr_read(devi->pf, offset);
- device_printf(child, "ccr_read of %#x (%#x) is %#x\n", offset,
- devi->pf->pf_ccr_offset, *val);
+ DEVPRINTF((child, "ccr_read of %#x (%#x) is %#x\n", offset,
+ devi->pf->pf_ccr_offset, *val));
return 0;
}
@@ -1421,8 +1421,8 @@ pccard_ccr_write_impl(device_t brdev, device_t child, uint32_t offset,
* Can't use pccard_ccr_write since client drivers may access
* registers not contained in the 'mask' if they are non-standard.
*/
- device_printf(child, "ccr_write of %#x to %#x (%#x)\n", val, offset,
- devi->pf->pf_ccr_offset);
+ DEVPRINTF((child, "ccr_write of %#x to %#x (%#x)\n", val, offset,
+ devi->pf->pf_ccr_offset));
bus_space_write_1(pf->pf_ccrt, pf->pf_ccrh, pf->pf_ccr_offset + offset,
val);
return 0;
diff --git a/sys/dev/pci/pci.c b/sys/dev/pci/pci.c
index 22046c1..9cd5a1c 100644
--- a/sys/dev/pci/pci.c
+++ b/sys/dev/pci/pci.c
@@ -2576,6 +2576,17 @@ pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
uint16_t cmd;
struct resource *res;
+ /*
+ * The BAR may already exist if the device is a CardBus card
+ * whose CIS is stored in this BAR.
+ */
+ pm = pci_find_bar(dev, reg);
+ if (pm != NULL) {
+ maprange = pci_maprange(pm->pm_value);
+ barlen = maprange == 64 ? 2 : 1;
+ return (barlen);
+ }
+
pci_read_bar(dev, reg, &map, &testval);
if (PCI_BAR_MEM(map)) {
type = SYS_RES_MEMORY;
diff --git a/sys/dev/puc/pucdata.c b/sys/dev/puc/pucdata.c
index a56971e..2b38d9b 100644
--- a/sys/dev/puc/pucdata.c
+++ b/sys/dev/puc/pucdata.c
@@ -51,12 +51,12 @@ static puc_config_f puc_config_amc;
static puc_config_f puc_config_diva;
static puc_config_f puc_config_exar;
static puc_config_f puc_config_icbook;
+static puc_config_f puc_config_oxford_pcie;
static puc_config_f puc_config_quatech;
static puc_config_f puc_config_syba;
static puc_config_f puc_config_siig;
static puc_config_f puc_config_timedia;
static puc_config_f puc_config_titan;
-static puc_config_f puc_config_oxford_pcie;
const struct puc_cfg puc_pci_devices[] = {
@@ -1366,14 +1366,12 @@ puc_config_oxford_pcie(struct puc_softc *sc, enum puc_cfg_cmd cmd, int port,
bar = puc_get_bar(sc, cfg->rid);
if (bar == NULL)
return (ENXIO);
-
for (idx = 0; idx < sc->sc_nports; idx++) {
- value = bus_read_1(bar->b_res, 0x1000 + (idx << 9)
- + 0x92);
+ value = bus_read_1(bar->b_res, 0x1000 + (idx << 9) +
+ 0x92);
bus_write_1(bar->b_res, 0x1000 + (idx << 9) + 0x92,
- value | 0x10);
+ value | 0x10);
}
-
return (0);
case PUC_CFG_GET_LEN:
*res = 0x200;
diff --git a/sys/dev/sound/pcm/sound.c b/sys/dev/sound/pcm/sound.c
index caa7841..958065f 100644
--- a/sys/dev/sound/pcm/sound.c
+++ b/sys/dev/sound/pcm/sound.c
@@ -51,7 +51,7 @@ int pcm_veto_load = 1;
int snd_unit = -1;
TUNABLE_INT("hw.snd.default_unit", &snd_unit);
-static int snd_unit_auto = 0;
+static int snd_unit_auto = -1;
TUNABLE_INT("hw.snd.default_auto", &snd_unit_auto);
SYSCTL_INT(_hw_snd, OID_AUTO, default_auto, CTLFLAG_RW,
&snd_unit_auto, 0, "assign default unit to a newly attached device");
@@ -443,6 +443,7 @@ sysctl_hw_snd_default_unit(SYSCTL_HANDLER_ARGS)
if (!PCM_REGISTERED(d) || CHN_EMPTY(d, channels.pcm))
return EINVAL;
snd_unit = unit;
+ snd_unit_auto = 0;
}
return (error);
}
@@ -737,6 +738,32 @@ pcm_killchan(device_t dev)
return (pcm_chn_destroy(ch));
}
+static int
+pcm_best_unit(int old)
+{
+ struct snddev_info *d;
+ int i, best, bestprio, prio;
+
+ best = -1;
+ bestprio = -100;
+ for (i = 0; pcm_devclass != NULL &&
+ i < devclass_get_maxunit(pcm_devclass); i++) {
+ d = devclass_get_softc(pcm_devclass, i);
+ if (!PCM_REGISTERED(d))
+ continue;
+ prio = 0;
+ if (d->playcount == 0)
+ prio -= 10;
+ if (d->reccount == 0)
+ prio -= 2;
+ if (prio > bestprio || (prio == bestprio && i == old)) {
+ best = i;
+ bestprio = prio;
+ }
+ }
+ return (best);
+}
+
int
pcm_setstatus(device_t dev, char *str)
{
@@ -770,8 +797,12 @@ pcm_setstatus(device_t dev, char *str)
PCM_UNLOCK(d);
- if (snd_unit < 0 || snd_unit_auto != 0)
+ if (snd_unit_auto < 0)
+ snd_unit_auto = (snd_unit < 0) ? 1 : 0;
+ if (snd_unit < 0 || snd_unit_auto > 1)
snd_unit = device_get_unit(dev);
+ else if (snd_unit_auto == 1)
+ snd_unit = pcm_best_unit(snd_unit);
return (0);
}
@@ -1113,7 +1144,6 @@ pcm_unregister(device_t dev)
struct snddev_info *d;
struct pcm_channel *ch;
struct thread *td;
- int i;
td = curthread;
d = device_get_softc(dev);
@@ -1216,21 +1246,9 @@ pcm_unregister(device_t dev)
sndstat_release(td);
if (snd_unit == device_get_unit(dev)) {
- /*
- * Reassign default unit to the next available dev, but
- * first, reset snd_unit to something ridiculous.
- */
- snd_unit = -1;
- for (i = 0; pcm_devclass != NULL &&
- i < devclass_get_maxunit(pcm_devclass); i++) {
- if (device_get_unit(dev) == i)
- continue;
- d = devclass_get_softc(pcm_devclass, i);
- if (PCM_REGISTERED(d)) {
- snd_unit = i;
- break;
- }
- }
+ snd_unit = pcm_best_unit(-1);
+ if (snd_unit_auto == 0)
+ snd_unit_auto = 1;
}
return (0);
diff --git a/sys/dev/usb/usb_device.h b/sys/dev/usb/usb_device.h
index c8bc5eb..bf41221 100644
--- a/sys/dev/usb/usb_device.h
+++ b/sys/dev/usb/usb_device.h
@@ -187,6 +187,8 @@ struct usb_device {
struct usb_host_endpoint *linux_endpoint_end;
uint16_t devnum;
#endif
+
+ uint32_t clear_stall_errors; /* number of clear-stall failures */
};
/* globals */
diff --git a/sys/dev/usb/usb_freebsd.h b/sys/dev/usb/usb_freebsd.h
index a44e530..ae69cdb 100644
--- a/sys/dev/usb/usb_freebsd.h
+++ b/sys/dev/usb/usb_freebsd.h
@@ -66,6 +66,7 @@
#define USB_HUB_MAX_DEPTH 5
#define USB_EP0_BUFSIZE 1024 /* bytes */
+#define USB_CS_RESET_LIMIT 20 /* failures = 20 * 50 ms = 1sec */
typedef uint32_t usb_timeout_t; /* milliseconds */
typedef uint32_t usb_frlength_t; /* bytes */
diff --git a/sys/dev/usb/usb_generic.c b/sys/dev/usb/usb_generic.c
index 714ee6f..d62f8f9 100644
--- a/sys/dev/usb/usb_generic.c
+++ b/sys/dev/usb/usb_generic.c
@@ -966,10 +966,8 @@ ugen_re_enumerate(struct usb_fifo *f)
/* ignore any errors */
DPRINTFN(6, "no FIFOs\n");
}
- if (udev->re_enumerate_wait == 0) {
- udev->re_enumerate_wait = 1;
- usb_needs_explore(udev->bus, 0);
- }
+ /* start re-enumeration of device */
+ usbd_start_re_enumerate(udev);
return (0);
}
diff --git a/sys/dev/usb/usb_hub.c b/sys/dev/usb/usb_hub.c
index ce8a4a5..351b134 100644
--- a/sys/dev/usb/usb_hub.c
+++ b/sys/dev/usb/usb_hub.c
@@ -242,9 +242,14 @@ uhub_explore_sub(struct uhub_softc *sc, struct usb_port *up)
if (child->flags.usb_mode == USB_MODE_HOST) {
usbd_enum_lock(child);
if (child->re_enumerate_wait) {
- err = usbd_set_config_index(child, USB_UNCONFIG_INDEX);
- if (err == 0)
- err = usbd_req_re_enumerate(child, NULL);
+ err = usbd_set_config_index(child,
+ USB_UNCONFIG_INDEX);
+ if (err != 0) {
+ DPRINTF("Unconfigure failed: "
+ "%s: Ignored.\n",
+ usbd_errstr(err));
+ }
+ err = usbd_req_re_enumerate(child, NULL);
if (err == 0)
err = usbd_set_config_index(child, 0);
if (err == 0) {
@@ -2471,3 +2476,19 @@ usbd_filter_power_mode(struct usb_device *udev, uint8_t power_mode)
/* use fixed power mode given by hardware driver */
return (temp);
}
+
+/*------------------------------------------------------------------------*
+ * usbd_start_re_enumerate
+ *
+ * This function starts re-enumeration of the given USB device. This
+ * function does not need to be called BUS-locked. This function does
+ * not wait until the re-enumeration is completed.
+ *------------------------------------------------------------------------*/
+void
+usbd_start_re_enumerate(struct usb_device *udev)
+{
+ if (udev->re_enumerate_wait == 0) {
+ udev->re_enumerate_wait = 1;
+ usb_needs_explore(udev->bus, 0);
+ }
+}
diff --git a/sys/dev/usb/usb_request.c b/sys/dev/usb/usb_request.c
index c099e71..4358ef4 100644
--- a/sys/dev/usb/usb_request.c
+++ b/sys/dev/usb/usb_request.c
@@ -238,6 +238,10 @@ usb_do_clear_stall_callback(struct usb_xfer *xfer, usb_error_t error)
switch (USB_GET_STATE(xfer)) {
case USB_ST_TRANSFERRED:
+
+ /* reset error counter */
+ udev->clear_stall_errors = 0;
+
if (ep == NULL)
goto tr_setup; /* device was unconfigured */
if (ep->edesc &&
@@ -289,8 +293,23 @@ tr_setup:
goto tr_setup;
default:
- if (xfer->error == USB_ERR_CANCELLED) {
+ if (error == USB_ERR_CANCELLED)
break;
+
+ DPRINTF("Clear stall failed.\n");
+ if (udev->clear_stall_errors == USB_CS_RESET_LIMIT)
+ goto tr_setup;
+
+ if (error == USB_ERR_TIMEOUT) {
+ udev->clear_stall_errors = USB_CS_RESET_LIMIT;
+ DPRINTF("Trying to re-enumerate.\n");
+ usbd_start_re_enumerate(udev);
+ } else {
+ udev->clear_stall_errors++;
+ if (udev->clear_stall_errors == USB_CS_RESET_LIMIT) {
+ DPRINTF("Trying to re-enumerate.\n");
+ usbd_start_re_enumerate(udev);
+ }
}
goto tr_setup;
}
@@ -1936,6 +1955,23 @@ usbd_req_re_enumerate(struct usb_device *udev, struct mtx *mtx)
return (USB_ERR_INVAL);
}
retry:
+ /*
+ * Try to reset the High Speed parent HUB of a LOW- or FULL-
+ * speed device, if any.
+ */
+ if (udev->parent_hs_hub != NULL &&
+ udev->speed != USB_SPEED_HIGH) {
+ DPRINTF("Trying to reset parent High Speed TT.\n");
+ err = usbd_req_reset_tt(udev->parent_hs_hub, NULL,
+ udev->hs_port_no);
+ if (err) {
+ DPRINTF("Resetting parent High "
+ "Speed TT failed (%s).\n",
+ usbd_errstr(err));
+ }
+ }
+
+ /* Try to reset the parent HUB port. */
err = usbd_req_reset_port(parent_hub, mtx, udev->port_no);
if (err) {
DPRINTFN(0, "addr=%d, port reset failed, %s\n",
@@ -2033,3 +2069,65 @@ usbd_req_set_device_feature(struct usb_device *udev, struct mtx *mtx,
USETW(req.wLength, 0);
return (usbd_do_request(udev, mtx, &req, 0));
}
+
+/*------------------------------------------------------------------------*
+ * usbd_req_reset_tt
+ *
+ * Returns:
+ * 0: Success
+ * Else: Failure
+ *------------------------------------------------------------------------*/
+usb_error_t
+usbd_req_reset_tt(struct usb_device *udev, struct mtx *mtx,
+ uint8_t port)
+{
+ struct usb_device_request req;
+
+ /* For single TT HUBs the port should be 1 */
+
+ if (udev->ddesc.bDeviceClass == UDCLASS_HUB &&
+ udev->ddesc.bDeviceProtocol == UDPROTO_HSHUBSTT)
+ port = 1;
+
+ req.bmRequestType = UT_WRITE_CLASS_OTHER;
+ req.bRequest = UR_RESET_TT;
+ USETW(req.wValue, 0);
+ req.wIndex[0] = port;
+ req.wIndex[1] = 0;
+ USETW(req.wLength, 0);
+ return (usbd_do_request(udev, mtx, &req, 0));
+}
+
+/*------------------------------------------------------------------------*
+ * usbd_req_clear_tt_buffer
+ *
+ * For single TT HUBs the port should be 1.
+ *
+ * Returns:
+ * 0: Success
+ * Else: Failure
+ *------------------------------------------------------------------------*/
+usb_error_t
+usbd_req_clear_tt_buffer(struct usb_device *udev, struct mtx *mtx,
+ uint8_t port, uint8_t addr, uint8_t type, uint8_t endpoint)
+{
+ struct usb_device_request req;
+ uint16_t wValue;
+
+ /* For single TT HUBs the port should be 1 */
+
+ if (udev->ddesc.bDeviceClass == UDCLASS_HUB &&
+ udev->ddesc.bDeviceProtocol == UDPROTO_HSHUBSTT)
+ port = 1;
+
+ wValue = (endpoint & 0xF) | ((addr & 0x7F) << 4) |
+ ((endpoint & 0x80) << 8) | ((type & 3) << 12);
+
+ req.bmRequestType = UT_WRITE_CLASS_OTHER;
+ req.bRequest = UR_CLEAR_TT_BUFFER;
+ USETW(req.wValue, wValue);
+ req.wIndex[0] = port;
+ req.wIndex[1] = 0;
+ USETW(req.wLength, 0);
+ return (usbd_do_request(udev, mtx, &req, 0));
+}
diff --git a/sys/dev/usb/usb_request.h b/sys/dev/usb/usb_request.h
index 12f373d..ac7a7c1 100644
--- a/sys/dev/usb/usb_request.h
+++ b/sys/dev/usb/usb_request.h
@@ -85,5 +85,9 @@ usb_error_t usbd_req_set_hub_u2_timeout(struct usb_device *udev,
struct mtx *mtx, uint8_t port, uint8_t timeout);
usb_error_t usbd_req_set_hub_depth(struct usb_device *udev,
struct mtx *mtx, uint16_t depth);
+usb_error_t usbd_req_reset_tt(struct usb_device *udev, struct mtx *mtx,
+ uint8_t port);
+usb_error_t usbd_req_clear_tt_buffer(struct usb_device *udev, struct mtx *mtx,
+ uint8_t port, uint8_t addr, uint8_t type, uint8_t endpoint);
#endif /* _USB_REQUEST_H_ */
diff --git a/sys/dev/usb/usb_transfer.c b/sys/dev/usb/usb_transfer.c
index 5fd4f5a..d4c2408 100644
--- a/sys/dev/usb/usb_transfer.c
+++ b/sys/dev/usb/usb_transfer.c
@@ -2928,6 +2928,11 @@ repeat:
usbd_transfer_unsetup(udev->ctrl_xfer, USB_CTRL_XFER_MAX);
/*
+ * Reset clear stall error counter.
+ */
+ udev->clear_stall_errors = 0;
+
+ /*
* Try to setup a new USB transfer for the
* default control endpoint:
*/
diff --git a/sys/dev/usb/usbdi.h b/sys/dev/usb/usbdi.h
index 8f6da7c..91cd3fa 100644
--- a/sys/dev/usb/usbdi.h
+++ b/sys/dev/usb/usbdi.h
@@ -542,6 +542,7 @@ void usbd_m_copy_in(struct usb_page_cache *cache, usb_frlength_t dst_offset,
struct mbuf *m, usb_size_t src_offset, usb_frlength_t src_len);
void usbd_frame_zero(struct usb_page_cache *cache, usb_frlength_t offset,
usb_frlength_t len);
+void usbd_start_re_enumerate(struct usb_device *udev);
int usb_fifo_attach(struct usb_device *udev, void *priv_sc,
struct mtx *priv_mtx, struct usb_fifo_methods *pm,
diff --git a/sys/dev/xen/control/control.c b/sys/dev/xen/control/control.c
index c03d536..0f44181 100644
--- a/sys/dev/xen/control/control.c
+++ b/sys/dev/xen/control/control.c
@@ -203,24 +203,29 @@ xctrl_suspend()
unsigned long max_pfn, start_info_mfn;
#ifdef SMP
- cpumask_t map;
+ struct thread *td;
+ cpuset_t map;
/*
* Bind us to CPU 0 and stop any other VCPUs.
*/
- thread_lock(curthread);
- sched_bind(curthread, 0);
- thread_unlock(curthread);
+ td = curthread;
+ thread_lock(td);
+ sched_bind(td, 0);
+ thread_unlock(td);
KASSERT(PCPU_GET(cpuid) == 0, ("xen_suspend: not running on cpu 0"));
- map = PCPU_GET(other_cpus) & ~stopped_cpus;
- if (map)
+ sched_pin();
+ map = PCPU_GET(other_cpus);
+ sched_unpin();
+ CPU_NAND(&map, &stopped_cpus);
+ if (!CPU_EMPTY(&map))
stop_cpus(map);
#endif
if (DEVICE_SUSPEND(root_bus) != 0) {
printf("xen_suspend: device_suspend failed\n");
#ifdef SMP
- if (map)
+ if (!CPU_EMPTY(&map))
restart_cpus(map);
#endif
return;
@@ -289,7 +294,7 @@ xctrl_suspend()
thread_lock(curthread);
sched_unbind(curthread);
thread_unlock(curthread);
- if (map)
+ if (!CPU_EMPTY(&map))
restart_cpus(map);
#endif
}
diff --git a/sys/fs/nfs/nfs_var.h b/sys/fs/nfs/nfs_var.h
index 8ed60a7..5f944b5 100644
--- a/sys/fs/nfs/nfs_var.h
+++ b/sys/fs/nfs/nfs_var.h
@@ -401,10 +401,10 @@ int nfsrpc_readdirplus(vnode_t, struct uio *, nfsuint64 *,
int nfsrpc_commit(vnode_t, u_quad_t, int, struct ucred *,
NFSPROC_T *, u_char *, struct nfsvattr *, int *, void *);
int nfsrpc_advlock(vnode_t, off_t, int, struct flock *, int,
- struct ucred *, NFSPROC_T *);
+ struct ucred *, NFSPROC_T *, void *, int);
int nfsrpc_lockt(struct nfsrv_descript *, vnode_t,
struct nfsclclient *, u_int64_t, u_int64_t, struct flock *,
- struct ucred *, NFSPROC_T *);
+ struct ucred *, NFSPROC_T *, void *, int);
int nfsrpc_lock(struct nfsrv_descript *, struct nfsmount *, vnode_t,
u_int8_t *, int, struct nfscllockowner *, int, int, u_int64_t,
u_int64_t, short, struct ucred *, NFSPROC_T *, int);
@@ -439,16 +439,16 @@ struct nfsclclient *nfscl_findcl(struct nfsmount *);
void nfscl_clientrelease(struct nfsclclient *);
void nfscl_freelock(struct nfscllock *, int);
int nfscl_getbytelock(vnode_t, u_int64_t, u_int64_t, short,
- struct ucred *, NFSPROC_T *, struct nfsclclient *, int, u_int8_t *,
- u_int8_t *, struct nfscllockowner **, int *, int *);
+ struct ucred *, NFSPROC_T *, struct nfsclclient *, int, void *, int,
+ u_int8_t *, u_int8_t *, struct nfscllockowner **, int *, int *);
int nfscl_relbytelock(vnode_t, u_int64_t, u_int64_t,
struct ucred *, NFSPROC_T *, int, struct nfsclclient *,
- struct nfscllockowner **, int *);
+ void *, int, struct nfscllockowner **, int *);
int nfscl_checkwritelocked(vnode_t, struct flock *,
- struct ucred *, NFSPROC_T *);
+ struct ucred *, NFSPROC_T *, void *, int);
void nfscl_lockrelease(struct nfscllockowner *, int, int);
void nfscl_fillclid(u_int64_t, char *, u_int8_t *, u_int16_t);
-void nfscl_filllockowner(NFSPROC_T *, u_int8_t *);
+void nfscl_filllockowner(void *, u_int8_t *, int);
void nfscl_freeopen(struct nfsclopen *, int);
void nfscl_umount(struct nfsmount *, NFSPROC_T *);
void nfscl_renewthread(struct nfsclclient *, NFSPROC_T *);
@@ -466,9 +466,10 @@ void nfscl_lockexcl(struct nfsv4lock *, void *);
void nfscl_lockunlock(struct nfsv4lock *);
void nfscl_lockderef(struct nfsv4lock *);
void nfscl_docb(struct nfsrv_descript *, NFSPROC_T *);
-void nfscl_releasealllocks(struct nfsclclient *, vnode_t, NFSPROC_T *);
+void nfscl_releasealllocks(struct nfsclclient *, vnode_t, NFSPROC_T *, void *,
+ int);
int nfscl_lockt(vnode_t, struct nfsclclient *, u_int64_t,
- u_int64_t, struct flock *, NFSPROC_T *);
+ u_int64_t, struct flock *, NFSPROC_T *, void *, int);
int nfscl_mustflush(vnode_t);
int nfscl_nodeleg(vnode_t, int);
int nfscl_removedeleg(vnode_t, NFSPROC_T *, nfsv4stateid_t *);
diff --git a/sys/fs/nfsclient/nfs_clport.c b/sys/fs/nfsclient/nfs_clport.c
index 0c3a4c9..4d88bd2 100644
--- a/sys/fs/nfsclient/nfs_clport.c
+++ b/sys/fs/nfsclient/nfs_clport.c
@@ -500,7 +500,7 @@ nfscl_fillclid(u_int64_t clval, char *uuid, u_int8_t *cp, u_int16_t idlen)
* Fill in a lock owner name. For now, pid + the process's creation time.
*/
void
-nfscl_filllockowner(struct thread *td, u_int8_t *cp)
+nfscl_filllockowner(void *id, u_int8_t *cp, int flags)
{
union {
u_int32_t lval;
@@ -508,37 +508,35 @@ nfscl_filllockowner(struct thread *td, u_int8_t *cp)
} tl;
struct proc *p;
-if (td == NULL) {
- printf("NULL td\n");
- bzero(cp, 12);
- return;
-}
- p = td->td_proc;
-if (p == NULL) {
- printf("NULL pid\n");
- bzero(cp, 12);
- return;
-}
- tl.lval = p->p_pid;
- *cp++ = tl.cval[0];
- *cp++ = tl.cval[1];
- *cp++ = tl.cval[2];
- *cp++ = tl.cval[3];
-if (p->p_stats == NULL) {
- printf("pstats null\n");
- bzero(cp, 8);
- return;
-}
- tl.lval = p->p_stats->p_start.tv_sec;
- *cp++ = tl.cval[0];
- *cp++ = tl.cval[1];
- *cp++ = tl.cval[2];
- *cp++ = tl.cval[3];
- tl.lval = p->p_stats->p_start.tv_usec;
- *cp++ = tl.cval[0];
- *cp++ = tl.cval[1];
- *cp++ = tl.cval[2];
- *cp = tl.cval[3];
+ if (id == NULL) {
+ printf("NULL id\n");
+ bzero(cp, NFSV4CL_LOCKNAMELEN);
+ return;
+ }
+ if ((flags & F_POSIX) != 0) {
+ p = (struct proc *)id;
+ tl.lval = p->p_pid;
+ *cp++ = tl.cval[0];
+ *cp++ = tl.cval[1];
+ *cp++ = tl.cval[2];
+ *cp++ = tl.cval[3];
+ tl.lval = p->p_stats->p_start.tv_sec;
+ *cp++ = tl.cval[0];
+ *cp++ = tl.cval[1];
+ *cp++ = tl.cval[2];
+ *cp++ = tl.cval[3];
+ tl.lval = p->p_stats->p_start.tv_usec;
+ *cp++ = tl.cval[0];
+ *cp++ = tl.cval[1];
+ *cp++ = tl.cval[2];
+ *cp = tl.cval[3];
+ } else if ((flags & F_FLOCK) != 0) {
+ bcopy(&id, cp, sizeof(id));
+ bzero(&cp[sizeof(id)], NFSV4CL_LOCKNAMELEN - sizeof(id));
+ } else {
+ printf("nfscl_filllockowner: not F_POSIX or F_FLOCK\n");
+ bzero(cp, NFSV4CL_LOCKNAMELEN);
+ }
}
/*
@@ -943,6 +941,7 @@ nfscl_getmyip(struct nfsmount *nmp, int *isinet6p)
sad.sin_family = AF_INET;
sad.sin_len = sizeof (struct sockaddr_in);
sad.sin_addr.s_addr = sin->sin_addr.s_addr;
+ CURVNET_SET(CRED_TO_VNET(nmp->nm_sockreq.nr_cred));
rt = rtalloc1((struct sockaddr *)&sad, 0, 0UL);
if (rt != NULL) {
if (rt->rt_ifp != NULL &&
@@ -956,6 +955,7 @@ nfscl_getmyip(struct nfsmount *nmp, int *isinet6p)
}
RTFREE_LOCKED(rt);
}
+ CURVNET_RESTORE();
#ifdef INET6
} else if (nmp->nm_nam->sa_family == AF_INET6) {
struct sockaddr_in6 sad6, *sin6;
@@ -966,6 +966,7 @@ nfscl_getmyip(struct nfsmount *nmp, int *isinet6p)
sad6.sin6_family = AF_INET6;
sad6.sin6_len = sizeof (struct sockaddr_in6);
sad6.sin6_addr = sin6->sin6_addr;
+ CURVNET_SET(CRED_TO_VNET(nmp->nm_sockreq.nr_cred));
rt = rtalloc1((struct sockaddr *)&sad6, 0, 0UL);
if (rt != NULL) {
if (rt->rt_ifp != NULL &&
@@ -980,6 +981,7 @@ nfscl_getmyip(struct nfsmount *nmp, int *isinet6p)
}
RTFREE_LOCKED(rt);
}
+ CURVNET_RESTORE();
#endif
}
return (retp);
diff --git a/sys/fs/nfsclient/nfs_clrpcops.c b/sys/fs/nfsclient/nfs_clrpcops.c
index 0fc9bfd..5d83d0b 100644
--- a/sys/fs/nfsclient/nfs_clrpcops.c
+++ b/sys/fs/nfsclient/nfs_clrpcops.c
@@ -3459,7 +3459,7 @@ nfsmout:
*/
APPLESTATIC int
nfsrpc_advlock(vnode_t vp, off_t size, int op, struct flock *fl,
- int reclaim, struct ucred *cred, NFSPROC_T *p)
+ int reclaim, struct ucred *cred, NFSPROC_T *p, void *id, int flags)
{
struct nfscllockowner *lp;
struct nfsclclient *clp;
@@ -3511,11 +3511,11 @@ nfsrpc_advlock(vnode_t vp, off_t size, int op, struct flock *fl,
error = nfscl_getcl(vp, cred, p, &clp);
if (error)
return (error);
- error = nfscl_lockt(vp, clp, off, len, fl, p);
+ error = nfscl_lockt(vp, clp, off, len, fl, p, id, flags);
if (!error) {
clidrev = clp->nfsc_clientidrev;
error = nfsrpc_lockt(nd, vp, clp, off, len, fl, cred,
- p);
+ p, id, flags);
} else if (error == -1) {
error = 0;
}
@@ -3530,7 +3530,7 @@ nfsrpc_advlock(vnode_t vp, off_t size, int op, struct flock *fl,
return (error);
do {
error = nfscl_relbytelock(vp, off, len, cred, p, callcnt,
- clp, &lp, &dorpc);
+ clp, id, flags, &lp, &dorpc);
/*
* If it returns a NULL lp, we're done.
*/
@@ -3538,7 +3538,7 @@ nfsrpc_advlock(vnode_t vp, off_t size, int op, struct flock *fl,
if (callcnt == 0)
nfscl_clientrelease(clp);
else
- nfscl_releasealllocks(clp, vp, p);
+ nfscl_releasealllocks(clp, vp, p, id, flags);
return (error);
}
if (nmp->nm_clp != NULL)
@@ -3572,10 +3572,10 @@ nfsrpc_advlock(vnode_t vp, off_t size, int op, struct flock *fl,
}
callcnt++;
} while (error == 0 && nd->nd_repstat == 0);
- nfscl_releasealllocks(clp, vp, p);
+ nfscl_releasealllocks(clp, vp, p, id, flags);
} else if (op == F_SETLK) {
error = nfscl_getbytelock(vp, off, len, fl->l_type, cred, p,
- NULL, 0, NULL, NULL, &lp, &newone, &donelocally);
+ NULL, 0, id, flags, NULL, NULL, &lp, &newone, &donelocally);
if (error || donelocally) {
return (error);
}
@@ -3625,7 +3625,7 @@ nfsrpc_advlock(vnode_t vp, off_t size, int op, struct flock *fl,
APPLESTATIC int
nfsrpc_lockt(struct nfsrv_descript *nd, vnode_t vp,
struct nfsclclient *clp, u_int64_t off, u_int64_t len, struct flock *fl,
- struct ucred *cred, NFSPROC_T *p)
+ struct ucred *cred, NFSPROC_T *p, void *id, int flags)
{
u_int32_t *tl;
int error, type, size;
@@ -3643,7 +3643,7 @@ nfsrpc_lockt(struct nfsrv_descript *nd, vnode_t vp,
tl += 2;
*tl++ = clp->nfsc_clientid.lval[0];
*tl = clp->nfsc_clientid.lval[1];
- nfscl_filllockowner(p, own);
+ nfscl_filllockowner(id, own, flags);
(void) nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN);
error = nfscl_request(nd, vp, p, cred, NULL);
if (error)
diff --git a/sys/fs/nfsclient/nfs_clstate.c b/sys/fs/nfsclient/nfs_clstate.c
index 86d71b6..aa81437 100644
--- a/sys/fs/nfsclient/nfs_clstate.c
+++ b/sys/fs/nfsclient/nfs_clstate.c
@@ -226,7 +226,7 @@ nfscl_open(vnode_t vp, u_int8_t *nfhp, int fhlen, u_int32_t amode, int usedeleg,
* If none found, add the new one or return error, depending upon
* "create".
*/
- nfscl_filllockowner(p, own);
+ nfscl_filllockowner(p->td_proc, own, F_POSIX);
NFSLOCKCLSTATE();
dp = NULL;
/* First check the delegation list */
@@ -521,7 +521,7 @@ nfscl_getstateid(vnode_t vp, u_int8_t *nfhp, int fhlen, u_int32_t mode,
* If p != NULL, we want to search the parentage tree
* for a matching OpenOwner and use that.
*/
- nfscl_filllockowner(p, own);
+ nfscl_filllockowner(p->td_proc, own, F_POSIX);
error = nfscl_getopen(&clp->nfsc_owner, nfhp, fhlen, NULL, p,
mode, NULL, &op);
if (error == 0) {
@@ -596,7 +596,7 @@ nfscl_getopen(struct nfsclownerhead *ohp, u_int8_t *nfhp, int fhlen,
op = NULL;
while (op == NULL && (nproc != NULL || rown != NULL)) {
if (nproc != NULL) {
- nfscl_filllockowner(nproc, own);
+ nfscl_filllockowner(nproc->td_proc, own, F_POSIX);
ownp = own;
} else {
ownp = rown;
@@ -881,7 +881,7 @@ nfscl_clientrelease(struct nfsclclient *clp)
APPLESTATIC int
nfscl_getbytelock(vnode_t vp, u_int64_t off, u_int64_t len,
short type, struct ucred *cred, NFSPROC_T *p, struct nfsclclient *rclp,
- int recovery, u_int8_t *rownp, u_int8_t *ropenownp,
+ int recovery, void *id, int flags, u_int8_t *rownp, u_int8_t *ropenownp,
struct nfscllockowner **lpp, int *newonep, int *donelocallyp)
{
struct nfscllockowner *lp;
@@ -942,7 +942,7 @@ nfscl_getbytelock(vnode_t vp, u_int64_t off, u_int64_t len,
if (recovery) {
ownp = rownp;
} else {
- nfscl_filllockowner(p, own);
+ nfscl_filllockowner(id, own, flags);
ownp = own;
}
if (!recovery) {
@@ -1079,7 +1079,8 @@ nfscl_getbytelock(vnode_t vp, u_int64_t off, u_int64_t len,
APPLESTATIC int
nfscl_relbytelock(vnode_t vp, u_int64_t off, u_int64_t len,
__unused struct ucred *cred, NFSPROC_T *p, int callcnt,
- struct nfsclclient *clp, struct nfscllockowner **lpp, int *dorpcp)
+ struct nfsclclient *clp, void *id, int flags,
+ struct nfscllockowner **lpp, int *dorpcp)
{
struct nfscllockowner *lp;
struct nfsclowner *owp;
@@ -1116,7 +1117,7 @@ nfscl_relbytelock(vnode_t vp, u_int64_t off, u_int64_t len,
sizeof (struct nfscllock), M_NFSCLLOCK, M_WAITOK);
*other_lop = *nlop;
}
- nfscl_filllockowner(p, own);
+ nfscl_filllockowner(id, own, flags);
dp = NULL;
NFSLOCKCLSTATE();
if (callcnt == 0)
@@ -1188,7 +1189,8 @@ nfscl_relbytelock(vnode_t vp, u_int64_t off, u_int64_t len,
* Release all lockowners marked in progess for this process and file.
*/
APPLESTATIC void
-nfscl_releasealllocks(struct nfsclclient *clp, vnode_t vp, NFSPROC_T *p)
+nfscl_releasealllocks(struct nfsclclient *clp, vnode_t vp, NFSPROC_T *p,
+ void *id, int flags)
{
struct nfsclowner *owp;
struct nfsclopen *op;
@@ -1197,7 +1199,7 @@ nfscl_releasealllocks(struct nfsclclient *clp, vnode_t vp, NFSPROC_T *p)
u_int8_t own[NFSV4CL_LOCKNAMELEN];
np = VTONFS(vp);
- nfscl_filllockowner(p, own);
+ nfscl_filllockowner(id, own, flags);
NFSLOCKCLSTATE();
LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) {
LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
@@ -1226,7 +1228,7 @@ nfscl_releasealllocks(struct nfsclclient *clp, vnode_t vp, NFSPROC_T *p)
*/
APPLESTATIC int
nfscl_checkwritelocked(vnode_t vp, struct flock *fl,
- struct ucred *cred, NFSPROC_T *p)
+ struct ucred *cred, NFSPROC_T *p, void *id, int flags)
{
struct nfsclowner *owp;
struct nfscllockowner *lp;
@@ -1266,7 +1268,7 @@ nfscl_checkwritelocked(vnode_t vp, struct flock *fl,
error = nfscl_getcl(vp, cred, p, &clp);
if (error)
return (1);
- nfscl_filllockowner(p, own);
+ nfscl_filllockowner(id, own, flags);
NFSLOCKCLSTATE();
/*
@@ -1641,7 +1643,7 @@ nfscl_cleanup(NFSPROC_T *p)
if (!nfscl_inited)
return;
- nfscl_filllockowner(p, own);
+ nfscl_filllockowner(p->td_proc, own, F_POSIX);
NFSLOCKCLSTATE();
/*
@@ -3322,7 +3324,7 @@ nfscl_checkconflict(struct nfscllockownerhead *lhp, struct nfscllock *nlop,
*/
APPLESTATIC int
nfscl_lockt(vnode_t vp, struct nfsclclient *clp, u_int64_t off,
- u_int64_t len, struct flock *fl, NFSPROC_T *p)
+ u_int64_t len, struct flock *fl, NFSPROC_T *p, void *id, int flags)
{
struct nfscllock *lop, nlck;
struct nfscldeleg *dp;
@@ -3340,7 +3342,7 @@ nfscl_lockt(vnode_t vp, struct nfsclclient *clp, u_int64_t off,
return (NFSERR_INVAL);
}
np = VTONFS(vp);
- nfscl_filllockowner(p, own);
+ nfscl_filllockowner(id, own, flags);
NFSLOCKCLSTATE();
dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
error = nfscl_localconflict(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len,
@@ -3615,7 +3617,7 @@ nfscl_relock(vnode_t vp, struct nfsclclient *clp, struct nfsmount *nmp,
off = lop->nfslo_first;
len = lop->nfslo_end - lop->nfslo_first;
error = nfscl_getbytelock(vp, off, len, lop->nfslo_type, cred, p,
- clp, 1, lp->nfsl_owner, lp->nfsl_openowner, &nlp, &newone,
+ clp, 1, NULL, 0, lp->nfsl_owner, lp->nfsl_openowner, &nlp, &newone,
&donelocally);
if (error || donelocally)
return (error);
diff --git a/sys/fs/nfsclient/nfs_clvnops.c b/sys/fs/nfsclient/nfs_clvnops.c
index 984724d..3e1c66d 100644
--- a/sys/fs/nfsclient/nfs_clvnops.c
+++ b/sys/fs/nfsclient/nfs_clvnops.c
@@ -2884,8 +2884,11 @@ nfs_advlock(struct vop_advlock_args *ap)
int ret, error = EOPNOTSUPP;
u_quad_t size;
- if (NFS_ISV4(vp) && (ap->a_flags & F_POSIX)) {
- cred = p->p_ucred;
+ if (NFS_ISV4(vp) && (ap->a_flags & (F_POSIX | F_FLOCK)) != 0) {
+ if ((ap->a_flags & F_POSIX) != 0)
+ cred = p->p_ucred;
+ else
+ cred = td->td_ucred;
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
if (vp->v_iflag & VI_DOOMED) {
VOP_UNLOCK(vp, 0);
@@ -2898,7 +2901,8 @@ nfs_advlock(struct vop_advlock_args *ap)
* RFC3530 Sec. 9.3.2.
*/
if (ap->a_op == F_UNLCK &&
- nfscl_checkwritelocked(vp, ap->a_fl, cred, td))
+ nfscl_checkwritelocked(vp, ap->a_fl, cred, td, ap->a_id,
+ ap->a_flags))
(void) ncl_flush(vp, MNT_WAIT, cred, td, 1, 0);
/*
@@ -2907,7 +2911,7 @@ nfs_advlock(struct vop_advlock_args *ap)
*/
do {
ret = nfsrpc_advlock(vp, np->n_size, ap->a_op,
- ap->a_fl, 0, cred, td);
+ ap->a_fl, 0, cred, td, ap->a_id, ap->a_flags);
if (ret == NFSERR_DENIED && (ap->a_flags & F_WAIT) &&
ap->a_op == F_SETLK) {
VOP_UNLOCK(vp, 0);
diff --git a/sys/geom/eli/g_eli.c b/sys/geom/eli/g_eli.c
index 74c70ff..30497a4 100644
--- a/sys/geom/eli/g_eli.c
+++ b/sys/geom/eli/g_eli.c
@@ -672,7 +672,7 @@ static int
g_eli_cpu_is_disabled(int cpu)
{
#ifdef SMP
- return ((hlt_cpus_mask & (1 << cpu)) != 0);
+ return (CPU_ISSET(cpu, &hlt_cpus_mask));
#else
return (0);
#endif
diff --git a/sys/i386/i386/intr_machdep.c b/sys/i386/i386/intr_machdep.c
index 77b8004..56529f7 100644
--- a/sys/i386/i386/intr_machdep.c
+++ b/sys/i386/i386/intr_machdep.c
@@ -409,8 +409,7 @@ DB_SHOW_COMMAND(irqs, db_show_irqs)
* allocate CPUs round-robin.
*/
-/* The BSP is always a valid target. */
-static cpumask_t intr_cpus = (1 << 0);
+static cpuset_t intr_cpus;
static int current_cpu;
/*
@@ -432,7 +431,7 @@ intr_next_cpu(void)
current_cpu++;
if (current_cpu > mp_maxid)
current_cpu = 0;
- } while (!(intr_cpus & (1 << current_cpu)));
+ } while (!CPU_ISSET(current_cpu, &intr_cpus));
mtx_unlock_spin(&icu_lock);
return (apic_id);
}
@@ -463,7 +462,7 @@ intr_add_cpu(u_int cpu)
printf("INTR: Adding local APIC %d as a target\n",
cpu_apic_ids[cpu]);
- intr_cpus |= (1 << cpu);
+ CPU_SET(cpu, &intr_cpus);
}
/*
@@ -483,6 +482,9 @@ intr_shuffle_irqs(void *arg __unused)
return;
#endif
+ /* The BSP is always a valid target. */
+ CPU_SETOF(0, &intr_cpus);
+
/* Don't bother on UP. */
if (mp_ncpus == 1)
return;
diff --git a/sys/i386/i386/machdep.c b/sys/i386/i386/machdep.c
index fbf444a..91050c4 100644
--- a/sys/i386/i386/machdep.c
+++ b/sys/i386/i386/machdep.c
@@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$");
#include "opt_isa.h"
#include "opt_kstack_pages.h"
#include "opt_maxmem.h"
+#include "opt_mp_watchdog.h"
#include "opt_npx.h"
#include "opt_perfmon.h"
#include "opt_xbox.h"
@@ -118,6 +119,7 @@ __FBSDID("$FreeBSD$");
#include <x86/mca.h>
#include <machine/md_var.h>
#include <machine/metadata.h>
+#include <machine/mp_watchdog.h>
#include <machine/pc/bios.h>
#include <machine/pcb.h>
#include <machine/pcb_ext.h>
@@ -1357,9 +1359,8 @@ cpu_idle(int busy)
CTR2(KTR_SPARE2, "cpu_idle(%d) at %d",
busy, curcpu);
-#if defined(SMP) && !defined(XEN)
- if (mp_grab_cpu_hlt())
- return;
+#if defined(MP_WATCHDOG) && !defined(XEN)
+ ap_watchdog(PCPU_GET(cpuid));
#endif
#ifndef XEN
/* If we are busy - try to use fast methods. */
diff --git a/sys/i386/i386/mp_machdep.c b/sys/i386/i386/mp_machdep.c
index a4db401..78c90c0 100644
--- a/sys/i386/i386/mp_machdep.c
+++ b/sys/i386/i386/mp_machdep.c
@@ -29,7 +29,6 @@ __FBSDID("$FreeBSD$");
#include "opt_apic.h"
#include "opt_cpu.h"
#include "opt_kstack_pages.h"
-#include "opt_mp_watchdog.h"
#include "opt_pmap.h"
#include "opt_sched.h"
#include "opt_smp.h"
@@ -51,6 +50,7 @@ __FBSDID("$FreeBSD$");
#include <sys/systm.h>
#include <sys/bus.h>
#include <sys/cons.h> /* cngetc() */
+#include <sys/cpuset.h>
#ifdef GPROF
#include <sys/gmon.h>
#endif
@@ -77,7 +77,6 @@ __FBSDID("$FreeBSD$");
#include <machine/cputypes.h>
#include <x86/mca.h>
#include <machine/md_var.h>
-#include <machine/mp_watchdog.h>
#include <machine/pcb.h>
#include <machine/psl.h>
#include <machine/smp.h>
@@ -173,7 +172,7 @@ static u_long *ipi_hardclock_counts[MAXCPU];
* Local data and functions.
*/
-static volatile cpumask_t ipi_nmi_pending;
+static volatile cpuset_t ipi_nmi_pending;
/* used to hold the AP's until we are ready to release them */
static struct mtx ap_boot_mtx;
@@ -208,11 +207,8 @@ static int start_all_aps(void);
static int start_ap(int apic_id);
static void release_aps(void *dummy);
-static int hlt_logical_cpus;
static u_int hyperthreading_cpus; /* logical cpus sharing L1 cache */
-static cpumask_t hyperthreading_cpus_mask;
static int hyperthreading_allowed = 1;
-static struct sysctl_ctx_list logical_cpu_clist;
static void
mem_range_AP_init(void)
@@ -289,8 +285,11 @@ topo_probe_0x4(void)
* logical processors that belong to the same core
* as BSP thus deducing number of threads per core.
*/
- cpuid_count(0x04, 0, p);
- max_cores = ((p[0] >> 26) & 0x3f) + 1;
+ if (cpu_high >= 0x4) {
+ cpuid_count(0x04, 0, p);
+ max_cores = ((p[0] >> 26) & 0x3f) + 1;
+ } else
+ max_cores = 1;
core_id_bits = mask_width(max_logical/max_cores);
if (core_id_bits < 0)
return;
@@ -382,7 +381,7 @@ topo_probe(void)
if (cpu_topo_probed)
return;
- logical_cpus_mask = 0;
+ CPU_ZERO(&logical_cpus_mask);
if (mp_ncpus <= 1)
cpu_cores = cpu_logical = 1;
else if (cpu_vendor_id == CPU_VENDOR_AMD)
@@ -524,7 +523,7 @@ cpu_mp_probe(void)
* Always record BSP in CPU map so that the mbuf init code works
* correctly.
*/
- all_cpus = 1;
+ CPU_SETOF(0, &all_cpus);
if (mp_ncpus == 0) {
/*
* No CPUs were found, so this must be a UP system. Setup
@@ -659,6 +658,7 @@ cpu_mp_announce(void)
void
init_secondary(void)
{
+ cpuset_t tcpuset, tallcpus;
struct pcpu *pc;
vm_offset_t addr;
int gsel_tss;
@@ -783,19 +783,17 @@ init_secondary(void)
CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", PCPU_GET(cpuid));
printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid));
+ tcpuset = PCPU_GET(cpumask);
/* Determine if we are a logical CPU. */
/* XXX Calculation depends on cpu_logical being a power of 2, e.g. 2 */
if (cpu_logical > 1 && PCPU_GET(apic_id) % cpu_logical != 0)
- logical_cpus_mask |= PCPU_GET(cpumask);
-
- /* Determine if we are a hyperthread. */
- if (hyperthreading_cpus > 1 &&
- PCPU_GET(apic_id) % hyperthreading_cpus != 0)
- hyperthreading_cpus_mask |= PCPU_GET(cpumask);
+ CPU_OR(&logical_cpus_mask, &tcpuset);
/* Build our map of 'other' CPUs. */
- PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
+ tallcpus = all_cpus;
+ CPU_NAND(&tallcpus, &tcpuset);
+ PCPU_SET(other_cpus, tallcpus);
if (bootverbose)
lapic_dump("AP");
@@ -874,7 +872,7 @@ assign_cpu_ids(void)
if (hyperthreading_cpus > 1 && i % hyperthreading_cpus != 0) {
cpu_info[i].cpu_hyperthread = 1;
-#if defined(SCHED_ULE)
+
/*
* Don't use HT CPU if it has been disabled by a
* tunable.
@@ -883,7 +881,6 @@ assign_cpu_ids(void)
cpu_info[i].cpu_disabled = 1;
continue;
}
-#endif
}
/* Don't use this CPU if it has been disabled by a tunable. */
@@ -893,6 +890,11 @@ assign_cpu_ids(void)
}
}
+ if (hyperthreading_allowed == 0 && hyperthreading_cpus > 1) {
+ hyperthreading_cpus = 0;
+ cpu_logical = 1;
+ }
+
/*
* Assign CPU IDs to local APIC IDs and disable any CPUs
* beyond MAXCPU. CPU 0 is always assigned to the BSP.
@@ -932,6 +934,7 @@ assign_cpu_ids(void)
static int
start_all_aps(void)
{
+ cpuset_t tallcpus;
#ifndef PC98
u_char mpbiosreason;
#endif
@@ -991,11 +994,13 @@ start_all_aps(void)
}
CHECK_PRINT("trace"); /* show checkpoints */
- all_cpus |= (1 << cpu); /* record AP in CPU map */
+ CPU_SET(cpu, &all_cpus); /* record AP in CPU map */
}
/* build our map of 'other' CPUs */
- PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
+ tallcpus = all_cpus;
+ CPU_NAND(&tallcpus, PCPU_PTR(cpumask));
+ PCPU_SET(other_cpus, tallcpus);
/* restore the warmstart vector */
*(u_int32_t *) WARMBOOT_OFF = mpbioswarmvec;
@@ -1192,6 +1197,30 @@ SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW,
#endif /* COUNT_XINVLTLB_HITS */
/*
+ * Send an IPI to specified CPU handling the bitmap logic.
+ */
+static void
+ipi_send_cpu(int cpu, u_int ipi)
+{
+ u_int bitmap, old_pending, new_pending;
+
+ KASSERT(cpu_apic_ids[cpu] != -1, ("IPI to non-existent CPU %d", cpu));
+
+ if (IPI_IS_BITMAPED(ipi)) {
+ bitmap = 1 << ipi;
+ ipi = IPI_BITMAP_VECTOR;
+ do {
+ old_pending = cpu_ipi_pending[cpu];
+ new_pending = old_pending | bitmap;
+ } while (!atomic_cmpset_int(&cpu_ipi_pending[cpu],
+ old_pending, new_pending));
+ if (old_pending)
+ return;
+ }
+ lapic_ipi_vectored(ipi, cpu_apic_ids[cpu]);
+}
+
+/*
* Flush the TLB on all other CPU's
*/
static void
@@ -1215,28 +1244,19 @@ smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2)
}
static void
-smp_targeted_tlb_shootdown(cpumask_t mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2)
+smp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2)
{
- int ncpu, othercpus;
+ int cpu, ncpu, othercpus;
othercpus = mp_ncpus - 1;
- if (mask == (u_int)-1) {
- ncpu = othercpus;
- if (ncpu < 1)
+ if (CPU_ISFULLSET(&mask)) {
+ if (othercpus < 1)
return;
} else {
- mask &= ~PCPU_GET(cpumask);
- if (mask == 0)
- return;
- ncpu = bitcount32(mask);
- if (ncpu > othercpus) {
- /* XXX this should be a panic offence */
- printf("SMP: tlb shootdown to %d other cpus (only have %d)\n",
- ncpu, othercpus);
- ncpu = othercpus;
- }
- /* XXX should be a panic, implied by mask == 0 above */
- if (ncpu < 1)
+ sched_pin();
+ CPU_NAND(&mask, PCPU_PTR(cpumask));
+ sched_unpin();
+ if (CPU_EMPTY(&mask))
return;
}
if (!(read_eflags() & PSL_I))
@@ -1245,39 +1265,25 @@ smp_targeted_tlb_shootdown(cpumask_t mask, u_int vector, vm_offset_t addr1, vm_o
smp_tlb_addr1 = addr1;
smp_tlb_addr2 = addr2;
atomic_store_rel_int(&smp_tlb_wait, 0);
- if (mask == (u_int)-1)
+ if (CPU_ISFULLSET(&mask)) {
+ ncpu = othercpus;
ipi_all_but_self(vector);
- else
- ipi_selected(mask, vector);
+ } else {
+ ncpu = 0;
+ while ((cpu = cpusetobj_ffs(&mask)) != 0) {
+ cpu--;
+ CPU_CLR(cpu, &mask);
+ CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu,
+ vector);
+ ipi_send_cpu(cpu, vector);
+ ncpu++;
+ }
+ }
while (smp_tlb_wait < ncpu)
ia32_pause();
mtx_unlock_spin(&smp_ipi_mtx);
}
-/*
- * Send an IPI to specified CPU handling the bitmap logic.
- */
-static void
-ipi_send_cpu(int cpu, u_int ipi)
-{
- u_int bitmap, old_pending, new_pending;
-
- KASSERT(cpu_apic_ids[cpu] != -1, ("IPI to non-existent CPU %d", cpu));
-
- if (IPI_IS_BITMAPED(ipi)) {
- bitmap = 1 << ipi;
- ipi = IPI_BITMAP_VECTOR;
- do {
- old_pending = cpu_ipi_pending[cpu];
- new_pending = old_pending | bitmap;
- } while (!atomic_cmpset_int(&cpu_ipi_pending[cpu],
- old_pending, new_pending));
- if (old_pending)
- return;
- }
- lapic_ipi_vectored(ipi, cpu_apic_ids[cpu]);
-}
-
void
smp_cache_flush(void)
{
@@ -1324,7 +1330,7 @@ smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2)
}
void
-smp_masked_invltlb(cpumask_t mask)
+smp_masked_invltlb(cpuset_t mask)
{
if (smp_started) {
@@ -1336,7 +1342,7 @@ smp_masked_invltlb(cpumask_t mask)
}
void
-smp_masked_invlpg(cpumask_t mask, vm_offset_t addr)
+smp_masked_invlpg(cpuset_t mask, vm_offset_t addr)
{
if (smp_started) {
@@ -1348,7 +1354,7 @@ smp_masked_invlpg(cpumask_t mask, vm_offset_t addr)
}
void
-smp_masked_invlpg_range(cpumask_t mask, vm_offset_t addr1, vm_offset_t addr2)
+smp_masked_invlpg_range(cpuset_t mask, vm_offset_t addr1, vm_offset_t addr2)
{
if (smp_started) {
@@ -1401,7 +1407,7 @@ ipi_bitmap_handler(struct trapframe frame)
* send an IPI to a set of cpus.
*/
void
-ipi_selected(cpumask_t cpus, u_int ipi)
+ipi_selected(cpuset_t cpus, u_int ipi)
{
int cpu;
@@ -1411,12 +1417,12 @@ ipi_selected(cpumask_t cpus, u_int ipi)
* Set the mask of receiving CPUs for this purpose.
*/
if (ipi == IPI_STOP_HARD)
- atomic_set_int(&ipi_nmi_pending, cpus);
+ CPU_OR_ATOMIC(&ipi_nmi_pending, &cpus);
- CTR3(KTR_SMP, "%s: cpus: %x ipi: %x", __func__, cpus, ipi);
- while ((cpu = ffs(cpus)) != 0) {
+ while ((cpu = cpusetobj_ffs(&cpus)) != 0) {
cpu--;
- cpus &= ~(1 << cpu);
+ CPU_CLR(cpu, &cpus);
+ CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi);
ipi_send_cpu(cpu, ipi);
}
}
@@ -1434,7 +1440,7 @@ ipi_cpu(int cpu, u_int ipi)
* Set the mask of receiving CPUs for this purpose.
*/
if (ipi == IPI_STOP_HARD)
- atomic_set_int(&ipi_nmi_pending, 1 << cpu);
+ CPU_SET_ATOMIC(cpu, &ipi_nmi_pending);
CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi);
ipi_send_cpu(cpu, ipi);
@@ -1447,8 +1453,10 @@ void
ipi_all_but_self(u_int ipi)
{
+ sched_pin();
if (IPI_IS_BITMAPED(ipi)) {
ipi_selected(PCPU_GET(other_cpus), ipi);
+ sched_unpin();
return;
}
@@ -1458,7 +1466,9 @@ ipi_all_but_self(u_int ipi)
* Set the mask of receiving CPUs for this purpose.
*/
if (ipi == IPI_STOP_HARD)
- atomic_set_int(&ipi_nmi_pending, PCPU_GET(other_cpus));
+ CPU_OR_ATOMIC(&ipi_nmi_pending, PCPU_PTR(other_cpus));
+ sched_unpin();
+
CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
lapic_ipi_vectored(ipi, APIC_IPI_DEST_OTHERS);
}
@@ -1466,7 +1476,7 @@ ipi_all_but_self(u_int ipi)
int
ipi_nmi_handler()
{
- cpumask_t cpumask;
+ cpuset_t cpumask;
/*
* As long as there is not a simple way to know about a NMI's
@@ -1474,11 +1484,13 @@ ipi_nmi_handler()
* the global pending bitword an IPI_STOP_HARD has been issued
* and should be handled.
*/
+ sched_pin();
cpumask = PCPU_GET(cpumask);
- if ((ipi_nmi_pending & cpumask) == 0)
+ sched_unpin();
+ if (!CPU_OVERLAP(&ipi_nmi_pending, &cpumask))
return (1);
- atomic_clear_int(&ipi_nmi_pending, cpumask);
+ CPU_NAND_ATOMIC(&ipi_nmi_pending, &cpumask);
cpustop_handler();
return (0);
}
@@ -1490,23 +1502,25 @@ ipi_nmi_handler()
void
cpustop_handler(void)
{
- cpumask_t cpumask;
+ cpuset_t cpumask;
u_int cpu;
+ sched_pin();
cpu = PCPU_GET(cpuid);
cpumask = PCPU_GET(cpumask);
+ sched_unpin();
savectx(&stoppcbs[cpu]);
/* Indicate that we are stopped */
- atomic_set_int(&stopped_cpus, cpumask);
+ CPU_OR_ATOMIC(&stopped_cpus, &cpumask);
/* Wait for restart */
- while (!(started_cpus & cpumask))
+ while (!CPU_OVERLAP(&started_cpus, &cpumask))
ia32_pause();
- atomic_clear_int(&started_cpus, cpumask);
- atomic_clear_int(&stopped_cpus, cpumask);
+ CPU_NAND_ATOMIC(&started_cpus, &cpumask);
+ CPU_NAND_ATOMIC(&stopped_cpus, &cpumask);
if (cpu == 0 && cpustop_restartfunc != NULL) {
cpustop_restartfunc();
@@ -1530,158 +1544,6 @@ release_aps(void *dummy __unused)
}
SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
-static int
-sysctl_hlt_cpus(SYSCTL_HANDLER_ARGS)
-{
- cpumask_t mask;
- int error;
-
- mask = hlt_cpus_mask;
- error = sysctl_handle_int(oidp, &mask, 0, req);
- if (error || !req->newptr)
- return (error);
-
- if (logical_cpus_mask != 0 &&
- (mask & logical_cpus_mask) == logical_cpus_mask)
- hlt_logical_cpus = 1;
- else
- hlt_logical_cpus = 0;
-
- if (! hyperthreading_allowed)
- mask |= hyperthreading_cpus_mask;
-
- if ((mask & all_cpus) == all_cpus)
- mask &= ~(1<<0);
- hlt_cpus_mask = mask;
- return (error);
-}
-SYSCTL_PROC(_machdep, OID_AUTO, hlt_cpus, CTLTYPE_INT|CTLFLAG_RW,
- 0, 0, sysctl_hlt_cpus, "IU",
- "Bitmap of CPUs to halt. 101 (binary) will halt CPUs 0 and 2.");
-
-static int
-sysctl_hlt_logical_cpus(SYSCTL_HANDLER_ARGS)
-{
- int disable, error;
-
- disable = hlt_logical_cpus;
- error = sysctl_handle_int(oidp, &disable, 0, req);
- if (error || !req->newptr)
- return (error);
-
- if (disable)
- hlt_cpus_mask |= logical_cpus_mask;
- else
- hlt_cpus_mask &= ~logical_cpus_mask;
-
- if (! hyperthreading_allowed)
- hlt_cpus_mask |= hyperthreading_cpus_mask;
-
- if ((hlt_cpus_mask & all_cpus) == all_cpus)
- hlt_cpus_mask &= ~(1<<0);
-
- hlt_logical_cpus = disable;
- return (error);
-}
-
-static int
-sysctl_hyperthreading_allowed(SYSCTL_HANDLER_ARGS)
-{
- int allowed, error;
-
- allowed = hyperthreading_allowed;
- error = sysctl_handle_int(oidp, &allowed, 0, req);
- if (error || !req->newptr)
- return (error);
-
-#ifdef SCHED_ULE
- /*
- * SCHED_ULE doesn't allow enabling/disabling HT cores at
- * run-time.
- */
- if (allowed != hyperthreading_allowed)
- return (ENOTSUP);
- return (error);
-#endif
-
- if (allowed)
- hlt_cpus_mask &= ~hyperthreading_cpus_mask;
- else
- hlt_cpus_mask |= hyperthreading_cpus_mask;
-
- if (logical_cpus_mask != 0 &&
- (hlt_cpus_mask & logical_cpus_mask) == logical_cpus_mask)
- hlt_logical_cpus = 1;
- else
- hlt_logical_cpus = 0;
-
- if ((hlt_cpus_mask & all_cpus) == all_cpus)
- hlt_cpus_mask &= ~(1<<0);
-
- hyperthreading_allowed = allowed;
- return (error);
-}
-
-static void
-cpu_hlt_setup(void *dummy __unused)
-{
-
- if (logical_cpus_mask != 0) {
- TUNABLE_INT_FETCH("machdep.hlt_logical_cpus",
- &hlt_logical_cpus);
- sysctl_ctx_init(&logical_cpu_clist);
- SYSCTL_ADD_PROC(&logical_cpu_clist,
- SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO,
- "hlt_logical_cpus", CTLTYPE_INT|CTLFLAG_RW, 0, 0,
- sysctl_hlt_logical_cpus, "IU", "");
- SYSCTL_ADD_UINT(&logical_cpu_clist,
- SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO,
- "logical_cpus_mask", CTLTYPE_INT|CTLFLAG_RD,
- &logical_cpus_mask, 0, "");
-
- if (hlt_logical_cpus)
- hlt_cpus_mask |= logical_cpus_mask;
-
- /*
- * If necessary for security purposes, force
- * hyperthreading off, regardless of the value
- * of hlt_logical_cpus.
- */
- if (hyperthreading_cpus_mask) {
- SYSCTL_ADD_PROC(&logical_cpu_clist,
- SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO,
- "hyperthreading_allowed", CTLTYPE_INT|CTLFLAG_RW,
- 0, 0, sysctl_hyperthreading_allowed, "IU", "");
- if (! hyperthreading_allowed)
- hlt_cpus_mask |= hyperthreading_cpus_mask;
- }
- }
-}
-SYSINIT(cpu_hlt, SI_SUB_SMP, SI_ORDER_ANY, cpu_hlt_setup, NULL);
-
-int
-mp_grab_cpu_hlt(void)
-{
- cpumask_t mask;
-#ifdef MP_WATCHDOG
- u_int cpuid;
-#endif
- int retval;
-
- mask = PCPU_GET(cpumask);
-#ifdef MP_WATCHDOG
- cpuid = PCPU_GET(cpuid);
- ap_watchdog(cpuid);
-#endif
-
- retval = 0;
- while (mask & hlt_cpus_mask) {
- retval = 1;
- __asm __volatile("sti; hlt" : : : "memory");
- }
- return (retval);
-}
-
#ifdef COUNT_IPIS
/*
* Setup interrupt counters for IPI handlers.
diff --git a/sys/i386/i386/pmap.c b/sys/i386/i386/pmap.c
index d10bbe5..3f9248d 100644
--- a/sys/i386/i386/pmap.c
+++ b/sys/i386/i386/pmap.c
@@ -125,6 +125,8 @@ __FBSDID("$FreeBSD$");
#include <sys/sysctl.h>
#ifdef SMP
#include <sys/smp.h>
+#else
+#include <sys/cpuset.h>
#endif
#include <vm/vm.h>
@@ -386,7 +388,7 @@ pmap_bootstrap(vm_paddr_t firstaddr)
kernel_pmap->pm_pdpt = (pdpt_entry_t *) (KERNBASE + (u_int)IdlePDPT);
#endif
kernel_pmap->pm_root = NULL;
- kernel_pmap->pm_active = -1; /* don't allow deactivation */
+ CPU_FILL(&kernel_pmap->pm_active); /* don't allow deactivation */
TAILQ_INIT(&kernel_pmap->pm_pvchunk);
LIST_INIT(&allpmaps);
@@ -930,19 +932,20 @@ pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde)
void
pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
{
- cpumask_t cpumask, other_cpus;
+ cpuset_t cpumask, other_cpus;
sched_pin();
- if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
+ if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) {
invlpg(va);
smp_invlpg(va);
} else {
cpumask = PCPU_GET(cpumask);
other_cpus = PCPU_GET(other_cpus);
- if (pmap->pm_active & cpumask)
+ if (CPU_OVERLAP(&pmap->pm_active, &cpumask))
invlpg(va);
- if (pmap->pm_active & other_cpus)
- smp_masked_invlpg(pmap->pm_active & other_cpus, va);
+ CPU_AND(&other_cpus, &pmap->pm_active);
+ if (!CPU_EMPTY(&other_cpus))
+ smp_masked_invlpg(other_cpus, va);
}
sched_unpin();
}
@@ -950,23 +953,23 @@ pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
void
pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
{
- cpumask_t cpumask, other_cpus;
+ cpuset_t cpumask, other_cpus;
vm_offset_t addr;
sched_pin();
- if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
+ if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) {
for (addr = sva; addr < eva; addr += PAGE_SIZE)
invlpg(addr);
smp_invlpg_range(sva, eva);
} else {
cpumask = PCPU_GET(cpumask);
other_cpus = PCPU_GET(other_cpus);
- if (pmap->pm_active & cpumask)
+ if (CPU_OVERLAP(&pmap->pm_active, &cpumask))
for (addr = sva; addr < eva; addr += PAGE_SIZE)
invlpg(addr);
- if (pmap->pm_active & other_cpus)
- smp_masked_invlpg_range(pmap->pm_active & other_cpus,
- sva, eva);
+ CPU_AND(&other_cpus, &pmap->pm_active);
+ if (!CPU_EMPTY(&other_cpus))
+ smp_masked_invlpg_range(other_cpus, sva, eva);
}
sched_unpin();
}
@@ -974,19 +977,20 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
void
pmap_invalidate_all(pmap_t pmap)
{
- cpumask_t cpumask, other_cpus;
+ cpuset_t cpumask, other_cpus;
sched_pin();
- if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
+ if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) {
invltlb();
smp_invltlb();
} else {
cpumask = PCPU_GET(cpumask);
other_cpus = PCPU_GET(other_cpus);
- if (pmap->pm_active & cpumask)
+ if (CPU_OVERLAP(&pmap->pm_active, &cpumask))
invltlb();
- if (pmap->pm_active & other_cpus)
- smp_masked_invltlb(pmap->pm_active & other_cpus);
+ CPU_AND(&other_cpus, &pmap->pm_active);
+ if (!CPU_EMPTY(&other_cpus))
+ smp_masked_invltlb(other_cpus);
}
sched_unpin();
}
@@ -1002,8 +1006,8 @@ pmap_invalidate_cache(void)
}
struct pde_action {
- cpumask_t store; /* processor that updates the PDE */
- cpumask_t invalidate; /* processors that invalidate their TLB */
+ cpuset_t store; /* processor that updates the PDE */
+ cpuset_t invalidate; /* processors that invalidate their TLB */
vm_offset_t va;
pd_entry_t *pde;
pd_entry_t newpde;
@@ -1016,7 +1020,10 @@ pmap_update_pde_kernel(void *arg)
pd_entry_t *pde;
pmap_t pmap;
- if (act->store == PCPU_GET(cpumask))
+ sched_pin();
+ if (!CPU_CMP(&act->store, PCPU_PTR(cpumask))) {
+ sched_unpin();
+
/*
* Elsewhere, this operation requires allpmaps_lock for
* synchronization. Here, it does not because it is being
@@ -1026,6 +1033,8 @@ pmap_update_pde_kernel(void *arg)
pde = pmap_pde(pmap, act->va);
pde_store(pde, act->newpde);
}
+ } else
+ sched_unpin();
}
static void
@@ -1033,8 +1042,12 @@ pmap_update_pde_user(void *arg)
{
struct pde_action *act = arg;
- if (act->store == PCPU_GET(cpumask))
+ sched_pin();
+ if (!CPU_CMP(&act->store, PCPU_PTR(cpumask))) {
+ sched_unpin();
pde_store(act->pde, act->newpde);
+ } else
+ sched_unpin();
}
static void
@@ -1042,8 +1055,12 @@ pmap_update_pde_teardown(void *arg)
{
struct pde_action *act = arg;
- if ((act->invalidate & PCPU_GET(cpumask)) != 0)
+ sched_pin();
+ if (CPU_OVERLAP(&act->invalidate, PCPU_PTR(cpumask))) {
+ sched_unpin();
pmap_update_pde_invalidate(act->va, act->newpde);
+ } else
+ sched_unpin();
}
/*
@@ -1058,21 +1075,23 @@ static void
pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde)
{
struct pde_action act;
- cpumask_t active, cpumask;
+ cpuset_t active, cpumask, other_cpus;
sched_pin();
cpumask = PCPU_GET(cpumask);
+ other_cpus = PCPU_GET(other_cpus);
if (pmap == kernel_pmap)
active = all_cpus;
else
active = pmap->pm_active;
- if ((active & PCPU_GET(other_cpus)) != 0) {
+ if (CPU_OVERLAP(&active, &other_cpus)) {
act.store = cpumask;
act.invalidate = active;
act.va = va;
act.pde = pde;
act.newpde = newpde;
- smp_rendezvous_cpus(cpumask | active,
+ CPU_OR(&cpumask, &active);
+ smp_rendezvous_cpus(cpumask,
smp_no_rendevous_barrier, pmap == kernel_pmap ?
pmap_update_pde_kernel : pmap_update_pde_user,
pmap_update_pde_teardown, &act);
@@ -1081,7 +1100,7 @@ pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde)
pmap_kenter_pde(va, newpde);
else
pde_store(pde, newpde);
- if ((active & cpumask) != 0)
+ if (CPU_OVERLAP(&active, &cpumask))
pmap_update_pde_invalidate(va, newpde);
}
sched_unpin();
@@ -1095,7 +1114,7 @@ PMAP_INLINE void
pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
{
- if (pmap == kernel_pmap || pmap->pm_active)
+ if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
invlpg(va);
}
@@ -1104,7 +1123,7 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
{
vm_offset_t addr;
- if (pmap == kernel_pmap || pmap->pm_active)
+ if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
for (addr = sva; addr < eva; addr += PAGE_SIZE)
invlpg(addr);
}
@@ -1113,7 +1132,7 @@ PMAP_INLINE void
pmap_invalidate_all(pmap_t pmap)
{
- if (pmap == kernel_pmap || pmap->pm_active)
+ if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
invltlb();
}
@@ -1132,7 +1151,7 @@ pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde)
pmap_kenter_pde(va, newpde);
else
pde_store(pde, newpde);
- if (pmap == kernel_pmap || pmap->pm_active)
+ if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
pmap_update_pde_invalidate(va, newpde);
}
#endif /* !SMP */
@@ -1689,7 +1708,7 @@ pmap_pinit0(pmap_t pmap)
pmap->pm_pdpt = (pdpt_entry_t *)(KERNBASE + (vm_offset_t)IdlePDPT);
#endif
pmap->pm_root = NULL;
- pmap->pm_active = 0;
+ CPU_ZERO(&pmap->pm_active);
PCPU_SET(curpmap, pmap);
TAILQ_INIT(&pmap->pm_pvchunk);
bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
@@ -1770,7 +1789,7 @@ pmap_pinit(pmap_t pmap)
#endif
}
- pmap->pm_active = 0;
+ CPU_ZERO(&pmap->pm_active);
TAILQ_INIT(&pmap->pm_pvchunk);
bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
@@ -1886,7 +1905,7 @@ retry:
* Deal with a SMP shootdown of other users of the pmap that we are
* trying to dispose of. This can be a bit hairy.
*/
-static cpumask_t *lazymask;
+static cpuset_t *lazymask;
static u_int lazyptd;
static volatile u_int lazywait;
@@ -1895,36 +1914,42 @@ void pmap_lazyfix_action(void);
void
pmap_lazyfix_action(void)
{
- cpumask_t mymask = PCPU_GET(cpumask);
#ifdef COUNT_IPIS
(*ipi_lazypmap_counts[PCPU_GET(cpuid)])++;
#endif
if (rcr3() == lazyptd)
load_cr3(PCPU_GET(curpcb)->pcb_cr3);
- atomic_clear_int(lazymask, mymask);
+ CPU_CLR_ATOMIC(PCPU_GET(cpuid), lazymask);
atomic_store_rel_int(&lazywait, 1);
}
static void
-pmap_lazyfix_self(cpumask_t mymask)
+pmap_lazyfix_self(cpuset_t mymask)
{
if (rcr3() == lazyptd)
load_cr3(PCPU_GET(curpcb)->pcb_cr3);
- atomic_clear_int(lazymask, mymask);
+ CPU_NAND_ATOMIC(lazymask, &mymask);
}
static void
pmap_lazyfix(pmap_t pmap)
{
- cpumask_t mymask, mask;
+ cpuset_t mymask, mask;
u_int spins;
+ int lsb;
- while ((mask = pmap->pm_active) != 0) {
+ mask = pmap->pm_active;
+ while (!CPU_EMPTY(&mask)) {
spins = 50000000;
- mask = mask & -mask; /* Find least significant set bit */
+
+ /* Find least significant set bit. */
+ lsb = cpusetobj_ffs(&mask);
+ MPASS(lsb != 0);
+ lsb--;
+ CPU_SETOF(lsb, &mask);
mtx_lock_spin(&smp_ipi_mtx);
#ifdef PAE
lazyptd = vtophys(pmap->pm_pdpt);
@@ -1932,7 +1957,7 @@ pmap_lazyfix(pmap_t pmap)
lazyptd = vtophys(pmap->pm_pdir);
#endif
mymask = PCPU_GET(cpumask);
- if (mask == mymask) {
+ if (!CPU_CMP(&mask, &mymask)) {
lazymask = &pmap->pm_active;
pmap_lazyfix_self(mymask);
} else {
@@ -1949,6 +1974,7 @@ pmap_lazyfix(pmap_t pmap)
mtx_unlock_spin(&smp_ipi_mtx);
if (spins == 0)
printf("pmap_lazyfix: spun for 50000000\n");
+ mask = pmap->pm_active;
}
}
@@ -1968,7 +1994,7 @@ pmap_lazyfix(pmap_t pmap)
cr3 = vtophys(pmap->pm_pdir);
if (cr3 == rcr3()) {
load_cr3(PCPU_GET(curpcb)->pcb_cr3);
- pmap->pm_active &= ~(PCPU_GET(cpumask));
+ CPU_CLR(PCPU_GET(cpuid), &pmap->pm_active);
}
}
#endif /* SMP */
@@ -5078,11 +5104,11 @@ pmap_activate(struct thread *td)
pmap = vmspace_pmap(td->td_proc->p_vmspace);
oldpmap = PCPU_GET(curpmap);
#if defined(SMP)
- atomic_clear_int(&oldpmap->pm_active, PCPU_GET(cpumask));
- atomic_set_int(&pmap->pm_active, PCPU_GET(cpumask));
+ CPU_NAND_ATOMIC(&oldpmap->pm_active, PCPU_PTR(cpumask));
+ CPU_OR_ATOMIC(&pmap->pm_active, PCPU_PTR(cpumask));
#else
- oldpmap->pm_active &= ~1;
- pmap->pm_active |= 1;
+ CPU_NAND(&oldpmap->pm_active, PCPU_PTR(cpumask));
+ CPU_OR(&pmap->pm_active, PCPU_PTR(cpumask));
#endif
#ifdef PAE
cr3 = vtophys(pmap->pm_pdpt);
diff --git a/sys/i386/i386/vm_machdep.c b/sys/i386/i386/vm_machdep.c
index 232e1a1..a084e09 100644
--- a/sys/i386/i386/vm_machdep.c
+++ b/sys/i386/i386/vm_machdep.c
@@ -573,11 +573,13 @@ kvtop(void *addr)
static void
cpu_reset_proxy()
{
+ cpuset_t tcrp;
cpu_reset_proxy_active = 1;
while (cpu_reset_proxy_active == 1)
; /* Wait for other cpu to see that we've started */
- stop_cpus((1<<cpu_reset_proxyid));
+ CPU_SETOF(cpu_reset_proxyid, &tcrp);
+ stop_cpus(tcrp);
printf("cpu_reset_proxy: Stopped CPU %d\n", cpu_reset_proxyid);
DELAY(1000000);
cpu_reset_real();
@@ -596,25 +598,29 @@ cpu_reset()
#endif
#ifdef SMP
- cpumask_t map;
+ cpuset_t map;
u_int cnt;
if (smp_active) {
- map = PCPU_GET(other_cpus) & ~stopped_cpus;
- if (map != 0) {
+ sched_pin();
+ map = PCPU_GET(other_cpus);
+ CPU_NAND(&map, &stopped_cpus);
+ if (!CPU_EMPTY(&map)) {
printf("cpu_reset: Stopping other CPUs\n");
stop_cpus(map);
}
if (PCPU_GET(cpuid) != 0) {
cpu_reset_proxyid = PCPU_GET(cpuid);
+ sched_unpin();
cpustop_restartfunc = cpu_reset_proxy;
cpu_reset_proxy_active = 0;
printf("cpu_reset: Restarting BSP\n");
/* Restart CPU #0. */
/* XXX: restart_cpus(1 << 0); */
- atomic_store_rel_int(&started_cpus, (1 << 0));
+ CPU_SETOF(0, &started_cpus);
+ wmb();
cnt = 0;
while (cpu_reset_proxy_active == 0 && cnt < 10000000)
@@ -626,7 +632,8 @@ cpu_reset()
while (1);
/* NOTREACHED */
- }
+ } else
+ sched_unpin();
DELAY(1000000);
}
@@ -795,7 +802,7 @@ sf_buf_alloc(struct vm_page *m, int flags)
struct sf_head *hash_list;
struct sf_buf *sf;
#ifdef SMP
- cpumask_t cpumask, other_cpus;
+ cpuset_t cpumask, other_cpus;
#endif
int error;
@@ -867,22 +874,23 @@ sf_buf_alloc(struct vm_page *m, int flags)
*/
#ifdef SMP
if ((opte & (PG_V | PG_A)) == (PG_V | PG_A))
- sf->cpumask = 0;
+ CPU_ZERO(&sf->cpumask);
shootdown:
sched_pin();
cpumask = PCPU_GET(cpumask);
- if ((sf->cpumask & cpumask) == 0) {
- sf->cpumask |= cpumask;
+ if (!CPU_OVERLAP(&cpumask, &sf->cpumask)) {
+ CPU_OR(&sf->cpumask, &cpumask);
invlpg(sf->kva);
}
if ((flags & SFB_CPUPRIVATE) == 0) {
- other_cpus = PCPU_GET(other_cpus) & ~sf->cpumask;
- if (other_cpus != 0) {
- sf->cpumask |= other_cpus;
+ other_cpus = PCPU_GET(other_cpus);
+ CPU_NAND(&other_cpus, &sf->cpumask);
+ if (!CPU_EMPTY(&other_cpus)) {
+ CPU_OR(&sf->cpumask, &other_cpus);
smp_masked_invlpg(other_cpus, sf->kva);
}
}
- sched_unpin();
+ sched_unpin();
#else
if ((opte & (PG_V | PG_A)) == (PG_V | PG_A))
pmap_invalidate_page(kernel_pmap, sf->kva);
diff --git a/sys/i386/include/_types.h b/sys/i386/include/_types.h
index 7a969fe..3194fd6 100644
--- a/sys/i386/include/_types.h
+++ b/sys/i386/include/_types.h
@@ -69,7 +69,6 @@ typedef unsigned long long __uint64_t;
* Standard type definitions.
*/
typedef unsigned long __clock_t; /* clock()... */
-typedef unsigned int __cpumask_t;
typedef __int32_t __critical_t;
typedef long double __double_t;
typedef long double __float_t;
diff --git a/sys/i386/include/pmap.h b/sys/i386/include/pmap.h
index eeada2e..3012a00 100644
--- a/sys/i386/include/pmap.h
+++ b/sys/i386/include/pmap.h
@@ -155,6 +155,7 @@
#ifndef LOCORE
#include <sys/queue.h>
+#include <sys/_cpuset.h>
#include <sys/_lock.h>
#include <sys/_mutex.h>
@@ -433,7 +434,7 @@ struct pmap {
struct mtx pm_mtx;
pd_entry_t *pm_pdir; /* KVA of page directory */
TAILQ_HEAD(,pv_chunk) pm_pvchunk; /* list of mappings in pmap */
- cpumask_t pm_active; /* active on cpus */
+ cpuset_t pm_active; /* active on cpus */
struct pmap_statistics pm_stats; /* pmap statistics */
LIST_ENTRY(pmap) pm_list; /* List of all pmaps */
#ifdef PAE
diff --git a/sys/i386/include/sf_buf.h b/sys/i386/include/sf_buf.h
index 7bc1095..415dcbb 100644
--- a/sys/i386/include/sf_buf.h
+++ b/sys/i386/include/sf_buf.h
@@ -29,6 +29,7 @@
#ifndef _MACHINE_SF_BUF_H_
#define _MACHINE_SF_BUF_H_
+#include <sys/_cpuset.h>
#include <sys/queue.h>
struct vm_page;
@@ -40,7 +41,7 @@ struct sf_buf {
vm_offset_t kva; /* va of mapping */
int ref_count; /* usage of this mapping */
#ifdef SMP
- cpumask_t cpumask; /* cpus on which mapping is valid */
+ cpuset_t cpumask; /* cpus on which mapping is valid */
#endif
};
diff --git a/sys/i386/include/smp.h b/sys/i386/include/smp.h
index d364cd9..04d67c9 100644
--- a/sys/i386/include/smp.h
+++ b/sys/i386/include/smp.h
@@ -66,17 +66,16 @@ void ipi_bitmap_handler(struct trapframe frame);
#endif
void ipi_cpu(int cpu, u_int ipi);
int ipi_nmi_handler(void);
-void ipi_selected(cpumask_t cpus, u_int ipi);
+void ipi_selected(cpuset_t cpus, u_int ipi);
u_int mp_bootaddress(u_int);
-int mp_grab_cpu_hlt(void);
void smp_cache_flush(void);
void smp_invlpg(vm_offset_t addr);
-void smp_masked_invlpg(cpumask_t mask, vm_offset_t addr);
+void smp_masked_invlpg(cpuset_t mask, vm_offset_t addr);
void smp_invlpg_range(vm_offset_t startva, vm_offset_t endva);
-void smp_masked_invlpg_range(cpumask_t mask, vm_offset_t startva,
+void smp_masked_invlpg_range(cpuset_t mask, vm_offset_t startva,
vm_offset_t endva);
void smp_invltlb(void);
-void smp_masked_invltlb(cpumask_t mask);
+void smp_masked_invltlb(cpuset_t mask);
#ifdef XEN
void ipi_to_irq_init(void);
diff --git a/sys/i386/xen/mp_machdep.c b/sys/i386/xen/mp_machdep.c
index 2919570..2d05596 100644
--- a/sys/i386/xen/mp_machdep.c
+++ b/sys/i386/xen/mp_machdep.c
@@ -52,6 +52,7 @@ __FBSDID("$FreeBSD$");
#include <sys/systm.h>
#include <sys/bus.h>
#include <sys/cons.h> /* cngetc() */
+#include <sys/cpuset.h>
#ifdef GPROF
#include <sys/gmon.h>
#endif
@@ -116,7 +117,7 @@ volatile int smp_tlb_wait;
typedef void call_data_func_t(uintptr_t , uintptr_t);
static u_int logical_cpus;
-static volatile cpumask_t ipi_nmi_pending;
+static volatile cpuset_t ipi_nmi_pending;
/* used to hold the AP's until we are ready to release them */
static struct mtx ap_boot_mtx;
@@ -149,7 +150,7 @@ static int start_ap(int apic_id);
static void release_aps(void *dummy);
static u_int hyperthreading_cpus;
-static cpumask_t hyperthreading_cpus_mask;
+static cpuset_t hyperthreading_cpus_mask;
extern void Xhypervisor_callback(void);
extern void failsafe_callback(void);
@@ -239,7 +240,7 @@ cpu_mp_probe(void)
* Always record BSP in CPU map so that the mbuf init code works
* correctly.
*/
- all_cpus = 1;
+ CPU_SETOF(0, &all_cpus);
if (mp_ncpus == 0) {
/*
* No CPUs were found, so this must be a UP system. Setup
@@ -293,7 +294,8 @@ cpu_mp_start(void)
start_all_aps();
/* Setup the initial logical CPUs info. */
- logical_cpus = logical_cpus_mask = 0;
+ logical_cpus = 0;
+ CPU_ZERO(&logical_cpus_mask);
if (cpu_feature & CPUID_HTT)
logical_cpus = (cpu_procinfo & CPUID_HTT_CORES) >> 16;
@@ -521,6 +523,7 @@ xen_smp_intr_init_cpus(void *unused)
void
init_secondary(void)
{
+ cpuset_t tcpuset, tallcpus;
vm_offset_t addr;
int gsel_tss;
@@ -600,18 +603,21 @@ init_secondary(void)
CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", PCPU_GET(cpuid));
printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid));
+ tcpuset = PCPU_GET(cpumask);
/* Determine if we are a logical CPU. */
if (logical_cpus > 1 && PCPU_GET(apic_id) % logical_cpus != 0)
- logical_cpus_mask |= PCPU_GET(cpumask);
+ CPU_OR(&logical_cpus_mask, &tcpuset);
/* Determine if we are a hyperthread. */
if (hyperthreading_cpus > 1 &&
PCPU_GET(apic_id) % hyperthreading_cpus != 0)
- hyperthreading_cpus_mask |= PCPU_GET(cpumask);
+ CPU_OR(&hyperthreading_cpus_mask, &tcpuset);
/* Build our map of 'other' CPUs. */
- PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
+ tallcpus = all_cpus;
+ CPU_NAND(&tallcpus, &tcpuset);
+ PCPU_SET(other_cpus, tallcpus);
#if 0
if (bootverbose)
lapic_dump("AP");
@@ -725,6 +731,7 @@ assign_cpu_ids(void)
int
start_all_aps(void)
{
+ cpuset_t tallcpus;
int x,apic_id, cpu;
struct pcpu *pc;
@@ -778,12 +785,14 @@ start_all_aps(void)
panic("bye-bye");
}
- all_cpus |= (1 << cpu); /* record AP in CPU map */
+ CPU_SET(cpu, &all_cpus); /* record AP in CPU map */
}
/* build our map of 'other' CPUs */
- PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
+ tallcpus = all_cpus;
+ CPU_NAND(&tallcpus, PCPU_PTR(cpumask));
+ PCPU_SET(other_cpus, tallcpus);
pmap_invalidate_range(kernel_pmap, 0, NKPT * NBPDR - 1);
@@ -1012,29 +1021,20 @@ smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2)
}
static void
-smp_targeted_tlb_shootdown(cpumask_t mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2)
+smp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2)
{
- int ncpu, othercpus;
+ int cpu, ncpu, othercpus;
struct _call_data data;
othercpus = mp_ncpus - 1;
- if (mask == (u_int)-1) {
- ncpu = othercpus;
- if (ncpu < 1)
+ if (CPU_ISFULLSET(&mask)) {
+ if (othercpus < 1)
return;
} else {
- mask &= ~PCPU_GET(cpumask);
- if (mask == 0)
- return;
- ncpu = bitcount32(mask);
- if (ncpu > othercpus) {
- /* XXX this should be a panic offence */
- printf("SMP: tlb shootdown to %d other cpus (only have %d)\n",
- ncpu, othercpus);
- ncpu = othercpus;
- }
- /* XXX should be a panic, implied by mask == 0 above */
- if (ncpu < 1)
+ critical_enter();
+ CPU_NAND(&mask, PCPU_PTR(cpumask));
+ critical_exit();
+ if (CPU_EMPTY(&mask))
return;
}
if (!(read_eflags() & PSL_I))
@@ -1046,10 +1046,20 @@ smp_targeted_tlb_shootdown(cpumask_t mask, u_int vector, vm_offset_t addr1, vm_o
call_data->arg1 = addr1;
call_data->arg2 = addr2;
atomic_store_rel_int(&smp_tlb_wait, 0);
- if (mask == (u_int)-1)
+ if (CPU_ISFULLSET(&mask)) {
+ ncpu = othercpus;
ipi_all_but_self(vector);
- else
- ipi_selected(mask, vector);
+ } else {
+ ncpu = 0;
+ while ((cpu = cpusetobj_ffs(&mask)) != 0) {
+ cpu--;
+ CPU_CLR(cpu, &mask);
+ CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu,
+ vector);
+ ipi_send_cpu(cpu, vector);
+ ncpu++;
+ }
+ }
while (smp_tlb_wait < ncpu)
ia32_pause();
call_data = NULL;
@@ -1092,7 +1102,7 @@ smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2)
}
void
-smp_masked_invltlb(cpumask_t mask)
+smp_masked_invltlb(cpuset_t mask)
{
if (smp_started) {
@@ -1101,7 +1111,7 @@ smp_masked_invltlb(cpumask_t mask)
}
void
-smp_masked_invlpg(cpumask_t mask, vm_offset_t addr)
+smp_masked_invlpg(cpuset_t mask, vm_offset_t addr)
{
if (smp_started) {
@@ -1110,7 +1120,7 @@ smp_masked_invlpg(cpumask_t mask, vm_offset_t addr)
}
void
-smp_masked_invlpg_range(cpumask_t mask, vm_offset_t addr1, vm_offset_t addr2)
+smp_masked_invlpg_range(cpuset_t mask, vm_offset_t addr1, vm_offset_t addr2)
{
if (smp_started) {
@@ -1122,7 +1132,7 @@ smp_masked_invlpg_range(cpumask_t mask, vm_offset_t addr1, vm_offset_t addr2)
* send an IPI to a set of cpus.
*/
void
-ipi_selected(cpumask_t cpus, u_int ipi)
+ipi_selected(cpuset_t cpus, u_int ipi)
{
int cpu;
@@ -1132,11 +1142,11 @@ ipi_selected(cpumask_t cpus, u_int ipi)
* Set the mask of receiving CPUs for this purpose.
*/
if (ipi == IPI_STOP_HARD)
- atomic_set_int(&ipi_nmi_pending, cpus);
+ CPU_OR_ATOMIC(&ipi_nmi_pending, &cpus);
- while ((cpu = ffs(cpus)) != 0) {
+ while ((cpu = cpusetobj_ffs(&cpus)) != 0) {
cpu--;
- cpus &= ~(1 << cpu);
+ CPU_CLR(cpu, &cpus);
CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi);
ipi_send_cpu(cpu, ipi);
}
@@ -1155,7 +1165,7 @@ ipi_cpu(int cpu, u_int ipi)
* Set the mask of receiving CPUs for this purpose.
*/
if (ipi == IPI_STOP_HARD)
- atomic_set_int(&ipi_nmi_pending, 1 << cpu);
+ CPU_SET_ATOMIC(cpu, &ipi_nmi_pending);
CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi);
ipi_send_cpu(cpu, ipi);
@@ -1167,23 +1177,27 @@ ipi_cpu(int cpu, u_int ipi)
void
ipi_all_but_self(u_int ipi)
{
+ cpuset_t other_cpus;
/*
* IPI_STOP_HARD maps to a NMI and the trap handler needs a bit
* of help in order to understand what is the source.
* Set the mask of receiving CPUs for this purpose.
*/
+ sched_pin();
+ other_cpus = PCPU_GET(other_cpus);
+ sched_unpin();
if (ipi == IPI_STOP_HARD)
- atomic_set_int(&ipi_nmi_pending, PCPU_GET(other_cpus));
+ CPU_OR_ATOMIC(&ipi_nmi_pending, &other_cpus);
CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
- ipi_selected(PCPU_GET(other_cpus), ipi);
+ ipi_selected(other_cpus, ipi);
}
int
ipi_nmi_handler()
{
- cpumask_t cpumask;
+ cpuset_t cpumask;
/*
* As long as there is not a simple way to know about a NMI's
@@ -1191,11 +1205,13 @@ ipi_nmi_handler()
* the global pending bitword an IPI_STOP_HARD has been issued
* and should be handled.
*/
+ sched_pin();
cpumask = PCPU_GET(cpumask);
- if ((ipi_nmi_pending & cpumask) == 0)
+ sched_unpin();
+ if (!CPU_OVERLAP(&ipi_nmi_pending, &cpumask))
return (1);
- atomic_clear_int(&ipi_nmi_pending, cpumask);
+ CPU_NAND_ATOMIC(&ipi_nmi_pending, &cpumask);
cpustop_handler();
return (0);
}
@@ -1207,20 +1223,25 @@ ipi_nmi_handler()
void
cpustop_handler(void)
{
- int cpu = PCPU_GET(cpuid);
- int cpumask = PCPU_GET(cpumask);
+ cpuset_t cpumask;
+ int cpu;
+
+ sched_pin();
+ cpumask = PCPU_GET(cpumask);
+ cpu = PCPU_GET(cpuid);
+ sched_unpin();
savectx(&stoppcbs[cpu]);
/* Indicate that we are stopped */
- atomic_set_int(&stopped_cpus, cpumask);
+ CPU_OR_ATOMIC(&stopped_cpus, &cpumask);
/* Wait for restart */
- while (!(started_cpus & cpumask))
+ while (!CPU_OVERLAP(&started_cpus, &cpumask))
ia32_pause();
- atomic_clear_int(&started_cpus, cpumask);
- atomic_clear_int(&stopped_cpus, cpumask);
+ CPU_NAND_ATOMIC(&started_cpus, &cpumask);
+ CPU_NAND_ATOMIC(&stopped_cpus, &cpumask);
if (cpu == 0 && cpustop_restartfunc != NULL) {
cpustop_restartfunc();
diff --git a/sys/i386/xen/pmap.c b/sys/i386/xen/pmap.c
index eb3c803..3efa4f1 100644
--- a/sys/i386/xen/pmap.c
+++ b/sys/i386/xen/pmap.c
@@ -422,7 +422,7 @@ pmap_bootstrap(vm_paddr_t firstaddr)
#ifdef PAE
kernel_pmap->pm_pdpt = (pdpt_entry_t *) (KERNBASE + (u_int)IdlePDPT);
#endif
- kernel_pmap->pm_active = -1; /* don't allow deactivation */
+ CPU_FILL(&kernel_pmap->pm_active); /* don't allow deactivation */
TAILQ_INIT(&kernel_pmap->pm_pvchunk);
LIST_INIT(&allpmaps);
mtx_init(&allpmaps_lock, "allpmaps", NULL, MTX_SPIN);
@@ -802,22 +802,23 @@ pmap_cache_bits(int mode, boolean_t is_pde)
void
pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
{
- cpumask_t cpumask, other_cpus;
+ cpuset_t cpumask, other_cpus;
CTR2(KTR_PMAP, "pmap_invalidate_page: pmap=%p va=0x%x",
pmap, va);
sched_pin();
- if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
+ if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) {
invlpg(va);
smp_invlpg(va);
} else {
cpumask = PCPU_GET(cpumask);
other_cpus = PCPU_GET(other_cpus);
- if (pmap->pm_active & cpumask)
+ if (CPU_OVERLAP(&pmap->pm_active, &cpumask))
invlpg(va);
- if (pmap->pm_active & other_cpus)
- smp_masked_invlpg(pmap->pm_active & other_cpus, va);
+ CPU_AND(&other_cpus, &pmap->pm_active);
+ if (!CPU_EMPTY(&other_cpus))
+ smp_masked_invlpg(other_cpus, va);
}
sched_unpin();
PT_UPDATES_FLUSH();
@@ -826,26 +827,26 @@ pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
void
pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
{
- cpumask_t cpumask, other_cpus;
+ cpuset_t cpumask, other_cpus;
vm_offset_t addr;
CTR3(KTR_PMAP, "pmap_invalidate_page: pmap=%p eva=0x%x sva=0x%x",
pmap, sva, eva);
sched_pin();
- if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
+ if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) {
for (addr = sva; addr < eva; addr += PAGE_SIZE)
invlpg(addr);
smp_invlpg_range(sva, eva);
} else {
cpumask = PCPU_GET(cpumask);
other_cpus = PCPU_GET(other_cpus);
- if (pmap->pm_active & cpumask)
+ if (CPU_OVERLAP(&pmap->pm_active, &cpumask))
for (addr = sva; addr < eva; addr += PAGE_SIZE)
invlpg(addr);
- if (pmap->pm_active & other_cpus)
- smp_masked_invlpg_range(pmap->pm_active & other_cpus,
- sva, eva);
+ CPU_AND(&other_cpus, &pmap->pm_active);
+ if (!CPU_EMPTY(&other_cpus))
+ smp_masked_invlpg_range(other_cpus, sva, eva);
}
sched_unpin();
PT_UPDATES_FLUSH();
@@ -854,21 +855,22 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
void
pmap_invalidate_all(pmap_t pmap)
{
- cpumask_t cpumask, other_cpus;
+ cpuset_t cpumask, other_cpus;
CTR1(KTR_PMAP, "pmap_invalidate_page: pmap=%p", pmap);
sched_pin();
- if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
+ if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) {
invltlb();
smp_invltlb();
} else {
cpumask = PCPU_GET(cpumask);
other_cpus = PCPU_GET(other_cpus);
- if (pmap->pm_active & cpumask)
+ if (CPU_OVERLAP(&pmap->pm_active, &cpumask))
invltlb();
- if (pmap->pm_active & other_cpus)
- smp_masked_invltlb(pmap->pm_active & other_cpus);
+ CPU_AND(&other_cpus, &pmap->pm_active);
+ if (!CPU_EMPTY(&other_cpus))
+ smp_masked_invltlb(other_cpus);
}
sched_unpin();
}
@@ -893,7 +895,7 @@ pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
CTR2(KTR_PMAP, "pmap_invalidate_page: pmap=%p va=0x%x",
pmap, va);
- if (pmap == kernel_pmap || pmap->pm_active)
+ if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
invlpg(va);
PT_UPDATES_FLUSH();
}
@@ -907,7 +909,7 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
CTR3(KTR_PMAP, "pmap_invalidate_range: pmap=%p sva=0x%x eva=0x%x",
pmap, sva, eva);
- if (pmap == kernel_pmap || pmap->pm_active)
+ if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
for (addr = sva; addr < eva; addr += PAGE_SIZE)
invlpg(addr);
PT_UPDATES_FLUSH();
@@ -919,7 +921,7 @@ pmap_invalidate_all(pmap_t pmap)
CTR1(KTR_PMAP, "pmap_invalidate_all: pmap=%p", pmap);
- if (pmap == kernel_pmap || pmap->pm_active)
+ if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
invltlb();
}
@@ -1449,7 +1451,7 @@ pmap_pinit0(pmap_t pmap)
#ifdef PAE
pmap->pm_pdpt = (pdpt_entry_t *)(KERNBASE + (vm_offset_t)IdlePDPT);
#endif
- pmap->pm_active = 0;
+ CPU_ZERO(&pmap->pm_active);
PCPU_SET(curpmap, pmap);
TAILQ_INIT(&pmap->pm_pvchunk);
bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
@@ -1556,7 +1558,7 @@ pmap_pinit(pmap_t pmap)
}
xen_flush_queue();
vm_page_unlock_queues();
- pmap->pm_active = 0;
+ CPU_ZERO(&pmap->pm_active);
TAILQ_INIT(&pmap->pm_pvchunk);
bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
@@ -1686,7 +1688,7 @@ retry:
* Deal with a SMP shootdown of other users of the pmap that we are
* trying to dispose of. This can be a bit hairy.
*/
-static cpumask_t *lazymask;
+static cpuset_t *lazymask;
static u_int lazyptd;
static volatile u_int lazywait;
@@ -1695,36 +1697,42 @@ void pmap_lazyfix_action(void);
void
pmap_lazyfix_action(void)
{
- cpumask_t mymask = PCPU_GET(cpumask);
#ifdef COUNT_IPIS
(*ipi_lazypmap_counts[PCPU_GET(cpuid)])++;
#endif
if (rcr3() == lazyptd)
load_cr3(PCPU_GET(curpcb)->pcb_cr3);
- atomic_clear_int(lazymask, mymask);
+ CPU_CLR_ATOMIC(PCPU_GET(cpuid), lazymask);
atomic_store_rel_int(&lazywait, 1);
}
static void
-pmap_lazyfix_self(cpumask_t mymask)
+pmap_lazyfix_self(cpuset_t mymask)
{
if (rcr3() == lazyptd)
load_cr3(PCPU_GET(curpcb)->pcb_cr3);
- atomic_clear_int(lazymask, mymask);
+ CPU_NAND_ATOMIC(lazymask, &mymask);
}
static void
pmap_lazyfix(pmap_t pmap)
{
- cpumask_t mymask, mask;
+ cpuset_t mymask, mask;
u_int spins;
+ int lsb;
- while ((mask = pmap->pm_active) != 0) {
+ mask = pmap->pm_active;
+ while (!CPU_EMPTY(&mask)) {
spins = 50000000;
- mask = mask & -mask; /* Find least significant set bit */
+
+ /* Find least significant set bit. */
+ lsb = cpusetobj_ffs(&mask);
+ MPASS(lsb != 0);
+ lsb--;
+ CPU_SETOF(lsb, &mask);
mtx_lock_spin(&smp_ipi_mtx);
#ifdef PAE
lazyptd = vtophys(pmap->pm_pdpt);
@@ -1732,7 +1740,7 @@ pmap_lazyfix(pmap_t pmap)
lazyptd = vtophys(pmap->pm_pdir);
#endif
mymask = PCPU_GET(cpumask);
- if (mask == mymask) {
+ if (!CPU_CMP(&mask, &mymask)) {
lazymask = &pmap->pm_active;
pmap_lazyfix_self(mymask);
} else {
@@ -1749,6 +1757,7 @@ pmap_lazyfix(pmap_t pmap)
mtx_unlock_spin(&smp_ipi_mtx);
if (spins == 0)
printf("pmap_lazyfix: spun for 50000000\n");
+ mask = pmap->pm_active;
}
}
@@ -1768,7 +1777,7 @@ pmap_lazyfix(pmap_t pmap)
cr3 = vtophys(pmap->pm_pdir);
if (cr3 == rcr3()) {
load_cr3(PCPU_GET(curpcb)->pcb_cr3);
- pmap->pm_active &= ~(PCPU_GET(cpumask));
+ CPU_CLR(PCPU_GET(cpuid), &pmap->pm_active);
}
}
#endif /* SMP */
@@ -4123,11 +4132,11 @@ pmap_activate(struct thread *td)
pmap = vmspace_pmap(td->td_proc->p_vmspace);
oldpmap = PCPU_GET(curpmap);
#if defined(SMP)
- atomic_clear_int(&oldpmap->pm_active, PCPU_GET(cpumask));
- atomic_set_int(&pmap->pm_active, PCPU_GET(cpumask));
+ CPU_NAND_ATOMIC(&oldpmap->pm_active, PCPU_PTR(cpumask));
+ CPU_OR_ATOMIC(&pmap->pm_active, PCPU_PTR(cpumask));
#else
- oldpmap->pm_active &= ~1;
- pmap->pm_active |= 1;
+ CPU_NAND(&oldpmap->pm_active, PCPU_PTR(cpumask));
+ CPU_OR(&pmap->pm_active, PCPU_PTR(cpumask));
#endif
#ifdef PAE
cr3 = vtophys(pmap->pm_pdpt);
diff --git a/sys/ia64/acpica/acpi_machdep.c b/sys/ia64/acpica/acpi_machdep.c
index b7b612f..1466cfe 100644
--- a/sys/ia64/acpica/acpi_machdep.c
+++ b/sys/ia64/acpica/acpi_machdep.c
@@ -56,7 +56,14 @@ acpi_machdep_quirks(int *quirks)
void
acpi_cpu_c1()
{
+#ifdef INVARIANTS
+ register_t ie;
+
+ ie = intr_disable();
+ KASSERT(ie == 0, ("%s called with interrupts enabled\n", __func__));
+#endif
ia64_call_pal_static(PAL_HALT_LIGHT, 0, 0, 0);
+ ia64_enable_intr();
}
void *
diff --git a/sys/ia64/ia64/machdep.c b/sys/ia64/ia64/machdep.c
index 7252865..1463fb5 100644
--- a/sys/ia64/ia64/machdep.c
+++ b/sys/ia64/ia64/machdep.c
@@ -411,12 +411,34 @@ cpu_halt()
void
cpu_idle(int busy)
{
- struct ia64_pal_result res;
+ register_t ie;
- if (cpu_idle_hook != NULL)
+#if 0
+ if (!busy) {
+ critical_enter();
+ cpu_idleclock();
+ }
+#endif
+
+ ie = intr_disable();
+ KASSERT(ie != 0, ("%s called with interrupts disabled\n", __func__));
+
+ if (sched_runnable())
+ ia64_enable_intr();
+ else if (cpu_idle_hook != NULL) {
(*cpu_idle_hook)();
- else
- res = ia64_call_pal_static(PAL_HALT_LIGHT, 0, 0, 0);
+ /* The hook must enable interrupts! */
+ } else {
+ ia64_call_pal_static(PAL_HALT_LIGHT, 0, 0, 0);
+ ia64_enable_intr();
+ }
+
+#if 0
+ if (!busy) {
+ cpu_activeclock();
+ critical_exit();
+ }
+#endif
}
int
@@ -644,9 +666,12 @@ calculate_frequencies(void)
{
struct ia64_sal_result sal;
struct ia64_pal_result pal;
+ register_t ie;
+ ie = intr_disable();
sal = ia64_sal_entry(SAL_FREQ_BASE, 0, 0, 0, 0, 0, 0, 0);
pal = ia64_call_pal_static(PAL_FREQ_RATIOS, 0, 0, 0);
+ intr_restore(ie);
if (sal.sal_status == 0 && pal.pal_status == 0) {
if (bootverbose) {
@@ -761,6 +786,8 @@ ia64_init(void)
ia64_sal_init();
calculate_frequencies();
+ set_cputicker(ia64_get_itc, (u_long)itc_freq * 1000000, 0);
+
/*
* Setup the PCPU data for the bootstrap processor. It is needed
* by printf(). Also, since printf() has critical sections, we
diff --git a/sys/ia64/ia64/mp_machdep.c b/sys/ia64/ia64/mp_machdep.c
index b6b0bef..15afea0 100644
--- a/sys/ia64/ia64/mp_machdep.c
+++ b/sys/ia64/ia64/mp_machdep.c
@@ -139,18 +139,18 @@ ia64_ih_rndzvs(struct thread *td, u_int xiv, struct trapframe *tf)
static u_int
ia64_ih_stop(struct thread *td, u_int xiv, struct trapframe *tf)
{
- cpumask_t mybit;
+ cpuset_t mybit;
PCPU_INC(md.stats.pcs_nstops);
mybit = PCPU_GET(cpumask);
savectx(PCPU_PTR(md.pcb));
- atomic_set_int(&stopped_cpus, mybit);
- while ((started_cpus & mybit) == 0)
+ CPU_OR_ATOMIC(&stopped_cpus, &mybit);
+ while (!CPU_OVERLAP(&started_cpus, &mybit))
cpu_spinwait();
- atomic_clear_int(&started_cpus, mybit);
- atomic_clear_int(&stopped_cpus, mybit);
+ CPU_NAND_ATOMIC(&started_cpus, &mybit);
+ CPU_NAND_ATOMIC(&stopped_cpus, &mybit);
return (0);
}
@@ -286,7 +286,7 @@ cpu_mp_add(u_int acpi_id, u_int id, u_int eid)
cpuid = (IA64_LID_GET_SAPIC_ID(ia64_get_lid()) == sapic_id)
? 0 : smp_cpus++;
- KASSERT((all_cpus & (1UL << cpuid)) == 0,
+ KASSERT(!CPU_ISSET(cpuid, &all_cpus),
("%s: cpu%d already in CPU map", __func__, acpi_id));
if (cpuid != 0) {
@@ -300,7 +300,7 @@ cpu_mp_add(u_int acpi_id, u_int id, u_int eid)
pc->pc_acpi_id = acpi_id;
pc->pc_md.lid = IA64_LID_SET_SAPIC_ID(sapic_id);
- all_cpus |= (1UL << pc->pc_cpuid);
+ CPU_SET(pc->pc_cpuid, &all_cpus);
}
void
@@ -359,7 +359,8 @@ cpu_mp_start()
STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) {
pc->pc_md.current_pmap = kernel_pmap;
- pc->pc_other_cpus = all_cpus & ~pc->pc_cpumask;
+ pc->pc_other_cpus = all_cpus;
+ CPU_NAND(&pc->pc_other_cpus, &pc->pc_cpumask);
/* The BSP is obviously running already. */
if (pc->pc_cpuid == 0) {
pc->pc_md.awake = 1;
@@ -458,12 +459,12 @@ cpu_mp_unleash(void *dummy)
* send an IPI to a set of cpus.
*/
void
-ipi_selected(cpumask_t cpus, int ipi)
+ipi_selected(cpuset_t cpus, int ipi)
{
struct pcpu *pc;
STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) {
- if (cpus & pc->pc_cpumask)
+ if (CPU_OVERLAP(&cpus, &pc->pc_cpumask))
ipi_send(pc, ipi);
}
}
diff --git a/sys/ia64/ia64/pal.S b/sys/ia64/ia64/pal.S
index 2f0d0da..2e3f4cd 100644
--- a/sys/ia64/ia64/pal.S
+++ b/sys/ia64/ia64/pal.S
@@ -38,43 +38,40 @@ ia64_pal_entry: .quad 0
* u_int64_t arg1, u_int64_t arg2, u_int64_t arg3)
*/
ENTRY(ia64_call_pal_static, 4)
-
- .regstk 4,5,0,0
+
+ .regstk 4,4,0,0
palret = loc0
entry = loc1
rpsave = loc2
pfssave = loc3
-psrsave = loc4
- alloc pfssave=ar.pfs,4,5,0,0
+ alloc pfssave=ar.pfs,4,4,0,0
;;
mov rpsave=rp
-
movl entry=@gprel(ia64_pal_entry)
+
1: mov palret=ip // for return address
;;
add entry=entry,gp
- mov psrsave=psr
+ add palret=2f-1b,palret // calculate return address
mov r28=in0 // procedure number
- ;;
- ld8 entry=[entry] // read entry point
mov r29=in1 // copy arguments
mov r30=in2
mov r31=in3
;;
- mov b6=entry
- add palret=2f-1b,palret // calculate return address
- ;;
+ ld8 entry=[entry] // read entry point
mov b0=palret
- rsm psr.i // disable interrupts
+ ;;
+ mov b6=entry
;;
br.cond.sptk b6 // call into firmware
-2: mov psr.l=psrsave
+ ;;
+2:
mov rp=rpsave
mov ar.pfs=pfssave
;;
- srlz.d
br.ret.sptk rp
+ ;;
END(ia64_call_pal_static)
/*
diff --git a/sys/ia64/include/_types.h b/sys/ia64/include/_types.h
index 8fc1be2..0c2f5cc 100644
--- a/sys/ia64/include/_types.h
+++ b/sys/ia64/include/_types.h
@@ -59,7 +59,6 @@ typedef unsigned long __uint64_t;
* Standard type definitions.
*/
typedef __int32_t __clock_t; /* clock()... */
-typedef unsigned int __cpumask_t;
typedef __int64_t __critical_t;
typedef double __double_t;
typedef float __float_t;
diff --git a/sys/ia64/include/smp.h b/sys/ia64/include/smp.h
index 26557a7..d2aff76 100644
--- a/sys/ia64/include/smp.h
+++ b/sys/ia64/include/smp.h
@@ -14,6 +14,8 @@
#ifndef LOCORE
+#include <sys/_cpuset.h>
+
struct pcpu;
struct ia64_ap_state {
@@ -44,7 +46,7 @@ extern int ia64_ipi_wakeup;
void ipi_all_but_self(int ipi);
void ipi_cpu(int cpu, u_int ipi);
-void ipi_selected(cpumask_t cpus, int ipi);
+void ipi_selected(cpuset_t cpus, int ipi);
void ipi_send(struct pcpu *, int ipi);
#endif /* !LOCORE */
diff --git a/sys/kern/kern_cpuset.c b/sys/kern/kern_cpuset.c
index 6489ffb..e1f2801 100644
--- a/sys/kern/kern_cpuset.c
+++ b/sys/kern/kern_cpuset.c
@@ -50,6 +50,7 @@ __FBSDID("$FreeBSD$");
#include <sys/cpuset.h>
#include <sys/sx.h>
#include <sys/queue.h>
+#include <sys/libkern.h>
#include <sys/limits.h>
#include <sys/bus.h>
#include <sys/interrupt.h>
@@ -617,6 +618,86 @@ out:
}
/*
+ * Calculate the ffs() of the cpuset.
+ */
+int
+cpusetobj_ffs(const cpuset_t *set)
+{
+ size_t i;
+ int cbit;
+
+ cbit = 0;
+ for (i = 0; i < _NCPUWORDS; i++) {
+ if (set->__bits[i] != 0) {
+ cbit = ffsl(set->__bits[i]);
+ cbit += i * _NCPUBITS;
+ break;
+ }
+ }
+ return (cbit);
+}
+
+/*
+ * Return a string representing a valid layout for a cpuset_t object.
+ * It expects an incoming buffer at least sized as CPUSETBUFSIZ.
+ */
+char *
+cpusetobj_strprint(char *buf, const cpuset_t *set)
+{
+ char *tbuf;
+ size_t i, bytesp, bufsiz;
+
+ tbuf = buf;
+ bytesp = 0;
+ bufsiz = CPUSETBUFSIZ;
+
+ for (i = _NCPUWORDS - 1; i > 0; i--) {
+ bytesp = snprintf(tbuf, bufsiz, "%lx, ", set->__bits[i]);
+ bufsiz -= bytesp;
+ tbuf += bytesp;
+ }
+ snprintf(tbuf, bufsiz, "%lx", set->__bits[0]);
+ return (buf);
+}
+
+/*
+ * Build a valid cpuset_t object from a string representation.
+ * It expects an incoming buffer at least sized as CPUSETBUFSIZ.
+ */
+int
+cpusetobj_strscan(cpuset_t *set, const char *buf)
+{
+ u_int nwords;
+ int i, ret;
+
+ if (strlen(buf) > CPUSETBUFSIZ - 1)
+ return (-1);
+
+ /* Allow to pass a shorter version of the mask when necessary. */
+ nwords = 1;
+ for (i = 0; buf[i] != '\0'; i++)
+ if (buf[i] == ',')
+ nwords++;
+ if (nwords > _NCPUWORDS)
+ return (-1);
+
+ CPU_ZERO(set);
+ for (i = nwords - 1; i > 0; i--) {
+ ret = sscanf(buf, "%lx, ", &set->__bits[i]);
+ if (ret == 0 || ret == -1)
+ return (-1);
+ buf = strstr(buf, " ");
+ if (buf == NULL)
+ return (-1);
+ buf++;
+ }
+ ret = sscanf(buf, "%lx", &set->__bits[0]);
+ if (ret == 0 || ret == -1)
+ return (-1);
+ return (0);
+}
+
+/*
* Apply an anonymous mask to a single thread.
*/
int
@@ -754,12 +835,7 @@ cpuset_init(void *arg)
{
cpuset_t mask;
- CPU_ZERO(&mask);
-#ifdef SMP
- mask.__bits[0] = all_cpus;
-#else
- mask.__bits[0] = 1;
-#endif
+ mask = all_cpus;
if (cpuset_modify(cpuset_zero, &mask))
panic("Can't set initial cpuset mask.\n");
cpuset_zero->cs_flags |= CPU_SET_RDONLY;
diff --git a/sys/kern/kern_ktr.c b/sys/kern/kern_ktr.c
index 2e5e06f..eff3d5b 100644
--- a/sys/kern/kern_ktr.c
+++ b/sys/kern/kern_ktr.c
@@ -40,8 +40,10 @@ __FBSDID("$FreeBSD$");
#include "opt_alq.h"
#include <sys/param.h>
+#include <sys/queue.h>
#include <sys/alq.h>
#include <sys/cons.h>
+#include <sys/cpuset.h>
#include <sys/kernel.h>
#include <sys/ktr.h>
#include <sys/libkern.h>
@@ -68,10 +70,6 @@ __FBSDID("$FreeBSD$");
#define KTR_MASK (0)
#endif
-#ifndef KTR_CPUMASK
-#define KTR_CPUMASK (~0)
-#endif
-
#ifndef KTR_TIME
#define KTR_TIME get_cyclecount()
#endif
@@ -84,11 +82,6 @@ FEATURE(ktr, "Kernel support for KTR kernel tracing facility");
SYSCTL_NODE(_debug, OID_AUTO, ktr, CTLFLAG_RD, 0, "KTR options");
-int ktr_cpumask = KTR_CPUMASK;
-TUNABLE_INT("debug.ktr.cpumask", &ktr_cpumask);
-SYSCTL_INT(_debug_ktr, OID_AUTO, cpumask, CTLFLAG_RW,
- &ktr_cpumask, 0, "Bitmask of CPUs on which KTR logging is enabled");
-
int ktr_mask = KTR_MASK;
TUNABLE_INT("debug.ktr.mask", &ktr_mask);
SYSCTL_INT(_debug_ktr, OID_AUTO, mask, CTLFLAG_RW,
@@ -106,6 +99,54 @@ int ktr_version = KTR_VERSION;
SYSCTL_INT(_debug_ktr, OID_AUTO, version, CTLFLAG_RD,
&ktr_version, 0, "Version of the KTR interface");
+cpuset_t ktr_cpumask;
+static char ktr_cpumask_str[CPUSETBUFSIZ];
+TUNABLE_STR("debug.ktr.cpumask", ktr_cpumask_str, sizeof(ktr_cpumask_str));
+
+static void
+ktr_cpumask_initializer(void *dummy __unused)
+{
+
+ CPU_FILL(&ktr_cpumask);
+#ifdef KTR_CPUMASK
+ if (cpusetobj_strscan(&ktr_cpumask, KTR_CPUMASK) == -1)
+ CPU_FILL(&ktr_cpumask);
+#endif
+
+ /*
+ * TUNABLE_STR() runs with SI_ORDER_MIDDLE priority, thus it must be
+ * already set, if necessary.
+ */
+ if (ktr_cpumask_str[0] != '\0' &&
+ cpusetobj_strscan(&ktr_cpumask, ktr_cpumask_str) == -1)
+ CPU_FILL(&ktr_cpumask);
+}
+SYSINIT(ktr_cpumask_initializer, SI_SUB_TUNABLES, SI_ORDER_ANY,
+ ktr_cpumask_initializer, NULL);
+
+static int
+sysctl_debug_ktr_cpumask(SYSCTL_HANDLER_ARGS)
+{
+ char lktr_cpumask_str[CPUSETBUFSIZ];
+ cpuset_t imask;
+ int error;
+
+ cpusetobj_strprint(lktr_cpumask_str, &ktr_cpumask);
+ error = sysctl_handle_string(oidp, lktr_cpumask_str,
+ sizeof(lktr_cpumask_str), req);
+ if (error != 0 || req->newptr == NULL)
+ return (error);
+ if (cpusetobj_strscan(&imask, lktr_cpumask_str) == -1)
+ return (EINVAL);
+ CPU_COPY(&imask, &ktr_cpumask);
+
+ return (error);
+}
+SYSCTL_PROC(_debug_ktr, OID_AUTO, cpumask,
+ CTLFLAG_RW | CTLFLAG_MPSAFE | CTLTYPE_STRING, NULL, 0,
+ sysctl_debug_ktr_cpumask, "S",
+ "Bitmask of CPUs on which KTR logging is enabled");
+
volatile int ktr_idx = 0;
struct ktr_entry ktr_buf[KTR_ENTRIES];
@@ -213,7 +254,7 @@ ktr_tracepoint(u_int mask, const char *file, int line, const char *format,
if ((ktr_mask & mask) == 0)
return;
cpu = KTR_CPU;
- if (((1 << cpu) & ktr_cpumask) == 0)
+ if (!CPU_ISSET(cpu, &ktr_cpumask))
return;
#if defined(KTR_VERBOSE) || defined(KTR_ALQ)
td = curthread;
diff --git a/sys/kern/kern_pmc.c b/sys/kern/kern_pmc.c
index 7532378..8d9c7c0 100644
--- a/sys/kern/kern_pmc.c
+++ b/sys/kern/kern_pmc.c
@@ -55,7 +55,7 @@ int (*pmc_hook)(struct thread *td, int function, void *arg) = NULL;
int (*pmc_intr)(int cpu, struct trapframe *tf) = NULL;
/* Bitmask of CPUs requiring servicing at hardclock time */
-volatile cpumask_t pmc_cpumask;
+volatile cpuset_t pmc_cpumask;
/*
* A global count of SS mode PMCs. When non-zero, this means that
@@ -112,7 +112,7 @@ pmc_cpu_is_active(int cpu)
{
#ifdef SMP
return (pmc_cpu_is_present(cpu) &&
- (hlt_cpus_mask & (1 << cpu)) == 0);
+ !CPU_ISSET(cpu, &hlt_cpus_mask));
#else
return (1);
#endif
@@ -139,7 +139,7 @@ int
pmc_cpu_is_primary(int cpu)
{
#ifdef SMP
- return ((logical_cpus_mask & (1 << cpu)) == 0);
+ return (!CPU_ISSET(cpu, &logical_cpus_mask));
#else
return (1);
#endif
diff --git a/sys/kern/kern_rmlock.c b/sys/kern/kern_rmlock.c
index 7f2b4e7..3214e1b 100644
--- a/sys/kern/kern_rmlock.c
+++ b/sys/kern/kern_rmlock.c
@@ -263,7 +263,7 @@ _rm_rlock_hard(struct rmlock *rm, struct rm_priotracker *tracker, int trylock)
pc = pcpu_find(curcpu);
/* Check if we just need to do a proper critical_exit. */
- if (!(pc->pc_cpumask & rm->rm_writecpus)) {
+ if (!CPU_OVERLAP(&pc->pc_cpumask, &rm->rm_writecpus)) {
critical_exit();
return (1);
}
@@ -325,7 +325,7 @@ _rm_rlock_hard(struct rmlock *rm, struct rm_priotracker *tracker, int trylock)
critical_enter();
pc = pcpu_find(curcpu);
- rm->rm_writecpus &= ~pc->pc_cpumask;
+ CPU_NAND(&rm->rm_writecpus, &pc->pc_cpumask);
rm_tracker_add(pc, tracker);
sched_pin();
critical_exit();
@@ -366,7 +366,8 @@ _rm_rlock(struct rmlock *rm, struct rm_priotracker *tracker, int trylock)
* Fast path to combine two common conditions into a single
* conditional jump.
*/
- if (0 == (td->td_owepreempt | (rm->rm_writecpus & pc->pc_cpumask)))
+ if (0 == (td->td_owepreempt |
+ CPU_OVERLAP(&rm->rm_writecpus, &pc->pc_cpumask)))
return (1);
/* We do not have a read token and need to acquire one. */
@@ -429,17 +430,17 @@ _rm_wlock(struct rmlock *rm)
{
struct rm_priotracker *prio;
struct turnstile *ts;
- cpumask_t readcpus;
+ cpuset_t readcpus;
if (rm->lock_object.lo_flags & RM_SLEEPABLE)
sx_xlock(&rm->rm_lock_sx);
else
mtx_lock(&rm->rm_lock_mtx);
- if (rm->rm_writecpus != all_cpus) {
+ if (CPU_CMP(&rm->rm_writecpus, &all_cpus)) {
/* Get all read tokens back */
-
- readcpus = all_cpus & (all_cpus & ~rm->rm_writecpus);
+ readcpus = all_cpus;
+ CPU_NAND(&readcpus, &rm->rm_writecpus);
rm->rm_writecpus = all_cpus;
/*
diff --git a/sys/kern/kern_shutdown.c b/sys/kern/kern_shutdown.c
index 001da3d..da041fa 100644
--- a/sys/kern/kern_shutdown.c
+++ b/sys/kern/kern_shutdown.c
@@ -233,30 +233,32 @@ print_uptime(void)
printf("%lds\n", (long)ts.tv_sec);
}
-static void
-doadump(void)
+int
+doadump(boolean_t textdump)
{
+ boolean_t coredump;
- /*
- * Sometimes people have to call this from the kernel debugger.
- * (if 'panic' can not dump)
- * Give them a clue as to why they can't dump.
- */
- if (dumper.dumper == NULL) {
- printf("Cannot dump. Device not defined or unavailable.\n");
- return;
- }
+ if (dumping)
+ return (EBUSY);
+ if (dumper.dumper == NULL)
+ return (ENXIO);
savectx(&dumppcb);
dumptid = curthread->td_tid;
dumping++;
+
+ coredump = TRUE;
#ifdef DDB
- if (textdump_pending)
+ if (textdump && textdump_pending) {
+ coredump = FALSE;
textdump_dumpsys(&dumper);
- else
+ }
#endif
+ if (coredump)
dumpsys(&dumper);
+
dumping--;
+ return (0);
}
static int
@@ -425,7 +427,7 @@ kern_reboot(int howto)
EVENTHANDLER_INVOKE(shutdown_post_sync, howto);
if ((howto & (RB_HALT|RB_DUMP)) == RB_DUMP && !cold && !dumping)
- doadump();
+ doadump(TRUE);
/* Now that we're going to really halt the system... */
EVENTHANDLER_INVOKE(shutdown_final, howto);
diff --git a/sys/kern/ksched.c b/sys/kern/ksched.c
index 7ee56d5..799b60d 100644
--- a/sys/kern/ksched.c
+++ b/sys/kern/ksched.c
@@ -206,7 +206,7 @@ ksched_setscheduler(struct ksched *ksched,
if (param->sched_priority >= 0 &&
param->sched_priority <= (PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE)) {
rtp.type = RTP_PRIO_NORMAL;
- rtp.prio = p4prio_to_rtpprio(param->sched_priority);
+ rtp.prio = p4prio_to_tsprio(param->sched_priority);
rtp_to_pri(&rtp, td);
} else
e = EINVAL;
diff --git a/sys/kern/sched_4bsd.c b/sys/kern/sched_4bsd.c
index 519cae5..592bb80 100644
--- a/sys/kern/sched_4bsd.c
+++ b/sys/kern/sched_4bsd.c
@@ -156,7 +156,7 @@ static struct runq runq;
static struct runq runq_pcpu[MAXCPU];
long runq_length[MAXCPU];
-static cpumask_t idle_cpus_mask;
+static cpuset_t idle_cpus_mask;
#endif
struct pcpuidlestat {
@@ -951,7 +951,8 @@ sched_switch(struct thread *td, struct thread *newtd, int flags)
if (td->td_flags & TDF_IDLETD) {
TD_SET_CAN_RUN(td);
#ifdef SMP
- idle_cpus_mask &= ~PCPU_GET(cpumask);
+ /* Spinlock held here, assume no migration. */
+ CPU_NAND(&idle_cpus_mask, PCPU_PTR(cpumask));
#endif
} else {
if (TD_IS_RUNNING(td)) {
@@ -1025,7 +1026,7 @@ sched_switch(struct thread *td, struct thread *newtd, int flags)
#ifdef SMP
if (td->td_flags & TDF_IDLETD)
- idle_cpus_mask |= PCPU_GET(cpumask);
+ CPU_OR(&idle_cpus_mask, PCPU_PTR(cpumask));
#endif
sched_lock.mtx_lock = (uintptr_t)td;
td->td_oncpu = PCPU_GET(cpuid);
@@ -1054,7 +1055,8 @@ static int
forward_wakeup(int cpunum)
{
struct pcpu *pc;
- cpumask_t dontuse, id, map, map2, me;
+ cpuset_t dontuse, id, map, map2, me;
+ int iscpuset;
mtx_assert(&sched_lock, MA_OWNED);
@@ -1071,32 +1073,38 @@ forward_wakeup(int cpunum)
/*
* Check the idle mask we received against what we calculated
* before in the old version.
+ *
+ * Also note that sched_lock is held now, thus no migration is
+ * expected.
*/
me = PCPU_GET(cpumask);
/* Don't bother if we should be doing it ourself. */
- if ((me & idle_cpus_mask) && (cpunum == NOCPU || me == (1 << cpunum)))
+ if (CPU_OVERLAP(&me, &idle_cpus_mask) &&
+ (cpunum == NOCPU || CPU_ISSET(cpunum, &me)))
return (0);
- dontuse = me | stopped_cpus | hlt_cpus_mask;
- map2 = 0;
+ dontuse = me;
+ CPU_OR(&dontuse, &stopped_cpus);
+ CPU_OR(&dontuse, &hlt_cpus_mask);
+ CPU_ZERO(&map2);
if (forward_wakeup_use_loop) {
STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) {
id = pc->pc_cpumask;
- if ((id & dontuse) == 0 &&
+ if (!CPU_OVERLAP(&id, &dontuse) &&
pc->pc_curthread == pc->pc_idlethread) {
- map2 |= id;
+ CPU_OR(&map2, &id);
}
}
}
if (forward_wakeup_use_mask) {
- map = 0;
- map = idle_cpus_mask & ~dontuse;
+ map = idle_cpus_mask;
+ CPU_NAND(&map, &dontuse);
/* If they are both on, compare and use loop if different. */
if (forward_wakeup_use_loop) {
- if (map != map2) {
+ if (CPU_CMP(&map, &map2)) {
printf("map != map2, loop method preferred\n");
map = map2;
}
@@ -1108,18 +1116,22 @@ forward_wakeup(int cpunum)
/* If we only allow a specific CPU, then mask off all the others. */
if (cpunum != NOCPU) {
KASSERT((cpunum <= mp_maxcpus),("forward_wakeup: bad cpunum."));
- map &= (1 << cpunum);
+ iscpuset = CPU_ISSET(cpunum, &map);
+ if (iscpuset == 0)
+ CPU_ZERO(&map);
+ else
+ CPU_SETOF(cpunum, &map);
}
- if (map) {
+ if (!CPU_EMPTY(&map)) {
forward_wakeups_delivered++;
STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) {
id = pc->pc_cpumask;
- if ((map & id) == 0)
+ if (!CPU_OVERLAP(&map, &id))
continue;
if (cpu_idle_wakeup(pc->pc_cpuid))
- map &= ~id;
+ CPU_NAND(&map, &id);
}
- if (map)
+ if (!CPU_EMPTY(&map))
ipi_selected(map, IPI_AST);
return (1);
}
@@ -1135,7 +1147,7 @@ kick_other_cpu(int pri, int cpuid)
int cpri;
pcpu = pcpu_find(cpuid);
- if (idle_cpus_mask & pcpu->pc_cpumask) {
+ if (CPU_OVERLAP(&idle_cpus_mask, &pcpu->pc_cpumask)) {
forward_wakeups_delivered++;
if (!cpu_idle_wakeup(cpuid))
ipi_cpu(cpuid, IPI_AST);
@@ -1193,6 +1205,7 @@ void
sched_add(struct thread *td, int flags)
#ifdef SMP
{
+ cpuset_t idle, me, tidlemsk;
struct td_sched *ts;
int forwarded = 0;
int cpu;
@@ -1262,11 +1275,20 @@ sched_add(struct thread *td, int flags)
kick_other_cpu(td->td_priority, cpu);
} else {
if (!single_cpu) {
- cpumask_t me = PCPU_GET(cpumask);
- cpumask_t idle = idle_cpus_mask & me;
- if (!idle && ((flags & SRQ_INTR) == 0) &&
- (idle_cpus_mask & ~(hlt_cpus_mask | me)))
+ /*
+ * Thread spinlock is held here, assume no
+ * migration is possible.
+ */
+ me = PCPU_GET(cpumask);
+ idle = idle_cpus_mask;
+ tidlemsk = idle;
+ CPU_AND(&idle, &me);
+ CPU_OR(&me, &hlt_cpus_mask);
+ CPU_NAND(&tidlemsk, &me);
+
+ if (CPU_EMPTY(&idle) && ((flags & SRQ_INTR) == 0) &&
+ !CPU_EMPTY(&tidlemsk))
forwarded = forward_wakeup(cpu);
}
diff --git a/sys/kern/sched_ule.c b/sys/kern/sched_ule.c
index ac18e77..05267f3 100644
--- a/sys/kern/sched_ule.c
+++ b/sys/kern/sched_ule.c
@@ -564,7 +564,7 @@ struct cpu_search {
#define CPUSET_FOREACH(cpu, mask) \
for ((cpu) = 0; (cpu) <= mp_maxid; (cpu)++) \
- if ((mask) & 1 << (cpu))
+ if (CPU_ISSET(cpu, &mask))
static __inline int cpu_search(struct cpu_group *cg, struct cpu_search *low,
struct cpu_search *high, const int match);
@@ -2650,15 +2650,16 @@ static int
sysctl_kern_sched_topology_spec_internal(struct sbuf *sb, struct cpu_group *cg,
int indent)
{
+ char cpusetbuf[CPUSETBUFSIZ];
int i, first;
sbuf_printf(sb, "%*s<group level=\"%d\" cache-level=\"%d\">\n", indent,
"", 1 + indent / 2, cg->cg_level);
- sbuf_printf(sb, "%*s <cpu count=\"%d\" mask=\"0x%x\">", indent, "",
- cg->cg_count, cg->cg_mask);
+ sbuf_printf(sb, "%*s <cpu count=\"%d\" mask=\"%s\">", indent, "",
+ cg->cg_count, cpusetobj_strprint(cpusetbuf, &cg->cg_mask));
first = TRUE;
for (i = 0; i < MAXCPU; i++) {
- if ((cg->cg_mask & (1 << i)) != 0) {
+ if (CPU_ISSET(i, &cg->cg_mask)) {
if (!first)
sbuf_printf(sb, ", ");
else
diff --git a/sys/kern/subr_kdb.c b/sys/kern/subr_kdb.c
index 5d68ae2..1d67864 100644
--- a/sys/kern/subr_kdb.c
+++ b/sys/kern/subr_kdb.c
@@ -413,7 +413,8 @@ kdb_thr_ctx(struct thread *thr)
#if defined(SMP) && defined(KDB_STOPPEDPCB)
STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) {
- if (pc->pc_curthread == thr && (stopped_cpus & pc->pc_cpumask))
+ if (pc->pc_curthread == thr &&
+ CPU_OVERLAP(&stopped_cpus, &pc->pc_cpumask))
return (KDB_STOPPEDPCB(pc));
}
#endif
diff --git a/sys/kern/subr_pcpu.c b/sys/kern/subr_pcpu.c
index 5cb4f26..a6b3ae0 100644
--- a/sys/kern/subr_pcpu.c
+++ b/sys/kern/subr_pcpu.c
@@ -87,7 +87,7 @@ pcpu_init(struct pcpu *pcpu, int cpuid, size_t size)
KASSERT(cpuid >= 0 && cpuid < MAXCPU,
("pcpu_init: invalid cpuid %d", cpuid));
pcpu->pc_cpuid = cpuid;
- pcpu->pc_cpumask = 1 << cpuid;
+ CPU_SETOF(cpuid, &pcpu->pc_cpumask);
cpuid_to_pcpu[cpuid] = pcpu;
STAILQ_INSERT_TAIL(&cpuhead, pcpu, pc_allcpu);
cpu_pcpu_init(pcpu, cpuid, size);
diff --git a/sys/kern/subr_prf.c b/sys/kern/subr_prf.c
index 3334837..48f2dd9 100644
--- a/sys/kern/subr_prf.c
+++ b/sys/kern/subr_prf.c
@@ -163,6 +163,7 @@ uprintf(const char *fmt, ...)
goto out;
}
pca.flags = TOTTY;
+ pca.p_bufr = NULL;
va_start(ap, fmt);
tty_lock(pca.tty);
retval = kvprintf(fmt, putchar, &pca, 10, ap);
@@ -206,6 +207,7 @@ tprintf(struct proc *p, int pri, const char *fmt, ...)
pca.pri = pri;
pca.tty = tp;
pca.flags = flags;
+ pca.p_bufr = NULL;
va_start(ap, fmt);
if (pca.tty != NULL)
tty_lock(pca.tty);
@@ -234,6 +236,7 @@ ttyprintf(struct tty *tp, const char *fmt, ...)
va_start(ap, fmt);
pca.tty = tp;
pca.flags = TOTTY;
+ pca.p_bufr = NULL;
retval = kvprintf(fmt, putchar, &pca, 10, ap);
va_end(ap);
return (retval);
diff --git a/sys/kern/subr_rman.c b/sys/kern/subr_rman.c
index 3014b19..abd72c0 100644
--- a/sys/kern/subr_rman.c
+++ b/sys/kern/subr_rman.c
@@ -839,6 +839,7 @@ int_rman_release_resource(struct rman *rm, struct resource_i *r)
* without freeing anything.
*/
r->r_flags &= ~RF_ALLOCATED;
+ r->r_dev = NULL;
return 0;
}
diff --git a/sys/kern/subr_smp.c b/sys/kern/subr_smp.c
index 351f096..c38177b 100644
--- a/sys/kern/subr_smp.c
+++ b/sys/kern/subr_smp.c
@@ -53,15 +53,15 @@ __FBSDID("$FreeBSD$");
#include "opt_sched.h"
#ifdef SMP
-volatile cpumask_t stopped_cpus;
-volatile cpumask_t started_cpus;
-cpumask_t hlt_cpus_mask;
-cpumask_t logical_cpus_mask;
+volatile cpuset_t stopped_cpus;
+volatile cpuset_t started_cpus;
+cpuset_t hlt_cpus_mask;
+cpuset_t logical_cpus_mask;
void (*cpustop_restartfunc)(void);
#endif
/* This is used in modules that need to work in both SMP and UP. */
-cpumask_t all_cpus;
+cpuset_t all_cpus;
int mp_ncpus;
/* export this for libkvm consumers. */
@@ -200,8 +200,11 @@ forward_signal(struct thread *td)
*
*/
static int
-generic_stop_cpus(cpumask_t map, u_int type)
+generic_stop_cpus(cpuset_t map, u_int type)
{
+#ifdef KTR
+ char cpusetbuf[CPUSETBUFSIZ];
+#endif
static volatile u_int stopping_cpu = NOCPU;
int i;
@@ -216,7 +219,8 @@ generic_stop_cpus(cpumask_t map, u_int type)
if (!smp_started)
return (0);
- CTR2(KTR_SMP, "stop_cpus(%x) with %u type", map, type);
+ CTR2(KTR_SMP, "stop_cpus(%s) with %u type",
+ cpusetobj_strprint(cpusetbuf, &map), type);
if (stopping_cpu != PCPU_GET(cpuid))
while (atomic_cmpset_int(&stopping_cpu, NOCPU,
@@ -228,7 +232,7 @@ generic_stop_cpus(cpumask_t map, u_int type)
ipi_selected(map, type);
i = 0;
- while ((stopped_cpus & map) != map) {
+ while (!CPU_SUBSET(&stopped_cpus, &map)) {
/* spin */
cpu_spinwait();
i++;
@@ -245,14 +249,14 @@ generic_stop_cpus(cpumask_t map, u_int type)
}
int
-stop_cpus(cpumask_t map)
+stop_cpus(cpuset_t map)
{
return (generic_stop_cpus(map, IPI_STOP));
}
int
-stop_cpus_hard(cpumask_t map)
+stop_cpus_hard(cpuset_t map)
{
return (generic_stop_cpus(map, IPI_STOP_HARD));
@@ -260,7 +264,7 @@ stop_cpus_hard(cpumask_t map)
#if defined(__amd64__)
int
-suspend_cpus(cpumask_t map)
+suspend_cpus(cpuset_t map)
{
return (generic_stop_cpus(map, IPI_SUSPEND));
@@ -281,19 +285,22 @@ suspend_cpus(cpumask_t map)
* 1: ok
*/
int
-restart_cpus(cpumask_t map)
+restart_cpus(cpuset_t map)
{
+#ifdef KTR
+ char cpusetbuf[CPUSETBUFSIZ];
+#endif
if (!smp_started)
return 0;
- CTR1(KTR_SMP, "restart_cpus(%x)", map);
+ CTR1(KTR_SMP, "restart_cpus(%s)", cpusetobj_strprint(cpusetbuf, &map));
/* signal other cpus to restart */
- atomic_store_rel_int(&started_cpus, map);
+ CPU_COPY_STORE_REL(&map, &started_cpus);
/* wait for each to clear its bit */
- while ((stopped_cpus & map) != 0)
+ while (CPU_OVERLAP(&stopped_cpus, &map))
cpu_spinwait();
return 1;
@@ -409,13 +416,13 @@ smp_rendezvous_action(void)
}
void
-smp_rendezvous_cpus(cpumask_t map,
+smp_rendezvous_cpus(cpuset_t map,
void (* setup_func)(void *),
void (* action_func)(void *),
void (* teardown_func)(void *),
void *arg)
{
- int i, ncpus = 0;
+ int curcpumap, i, ncpus = 0;
if (!smp_started) {
if (setup_func != NULL)
@@ -428,11 +435,11 @@ smp_rendezvous_cpus(cpumask_t map,
}
CPU_FOREACH(i) {
- if (((1 << i) & map) != 0)
+ if (CPU_ISSET(i, &map))
ncpus++;
}
if (ncpus == 0)
- panic("ncpus is 0 with map=0x%x", map);
+ panic("ncpus is 0 with non-zero map");
mtx_lock_spin(&smp_ipi_mtx);
@@ -452,10 +459,12 @@ smp_rendezvous_cpus(cpumask_t map,
* Signal other processors, which will enter the IPI with
* interrupts off.
*/
- ipi_selected(map & ~(1 << curcpu), IPI_RENDEZVOUS);
+ curcpumap = CPU_ISSET(curcpu, &map);
+ CPU_CLR(curcpu, &map);
+ ipi_selected(map, IPI_RENDEZVOUS);
/* Check if the current CPU is in the map */
- if ((map & (1 << curcpu)) != 0)
+ if (curcpumap != 0)
smp_rendezvous_action();
/*
@@ -484,6 +493,7 @@ static struct cpu_group group[MAXCPU];
struct cpu_group *
smp_topo(void)
{
+ char cpusetbuf[CPUSETBUFSIZ], cpusetbuf2[CPUSETBUFSIZ];
struct cpu_group *top;
/*
@@ -530,9 +540,10 @@ smp_topo(void)
if (top->cg_count != mp_ncpus)
panic("Built bad topology at %p. CPU count %d != %d",
top, top->cg_count, mp_ncpus);
- if (top->cg_mask != all_cpus)
- panic("Built bad topology at %p. CPU mask 0x%X != 0x%X",
- top, top->cg_mask, all_cpus);
+ if (CPU_CMP(&top->cg_mask, &all_cpus))
+ panic("Built bad topology at %p. CPU mask (%s) != (%s)",
+ top, cpusetobj_strprint(cpusetbuf, &top->cg_mask),
+ cpusetobj_strprint(cpusetbuf2, &all_cpus));
return (top);
}
@@ -557,11 +568,13 @@ static int
smp_topo_addleaf(struct cpu_group *parent, struct cpu_group *child, int share,
int count, int flags, int start)
{
- cpumask_t mask;
+ char cpusetbuf[CPUSETBUFSIZ], cpusetbuf2[CPUSETBUFSIZ];
+ cpuset_t mask;
int i;
- for (mask = 0, i = 0; i < count; i++, start++)
- mask |= (1 << start);
+ CPU_ZERO(&mask);
+ for (i = 0; i < count; i++, start++)
+ CPU_SET(start, &mask);
child->cg_parent = parent;
child->cg_child = NULL;
child->cg_children = 0;
@@ -571,10 +584,12 @@ smp_topo_addleaf(struct cpu_group *parent, struct cpu_group *child, int share,
child->cg_mask = mask;
parent->cg_children++;
for (; parent != NULL; parent = parent->cg_parent) {
- if ((parent->cg_mask & child->cg_mask) != 0)
- panic("Duplicate children in %p. mask 0x%X child 0x%X",
- parent, parent->cg_mask, child->cg_mask);
- parent->cg_mask |= child->cg_mask;
+ if (CPU_OVERLAP(&parent->cg_mask, &child->cg_mask))
+ panic("Duplicate children in %p. mask (%s) child (%s)",
+ parent,
+ cpusetobj_strprint(cpusetbuf, &parent->cg_mask),
+ cpusetobj_strprint(cpusetbuf2, &child->cg_mask));
+ CPU_OR(&parent->cg_mask, &child->cg_mask);
parent->cg_count += child->cg_count;
}
@@ -634,20 +649,20 @@ struct cpu_group *
smp_topo_find(struct cpu_group *top, int cpu)
{
struct cpu_group *cg;
- cpumask_t mask;
+ cpuset_t mask;
int children;
int i;
- mask = (1 << cpu);
+ CPU_SETOF(cpu, &mask);
cg = top;
for (;;) {
- if ((cg->cg_mask & mask) == 0)
+ if (!CPU_OVERLAP(&cg->cg_mask, &mask))
return (NULL);
if (cg->cg_children == 0)
return (cg);
children = cg->cg_children;
for (i = 0, cg = cg->cg_child; i < children; cg++, i++)
- if ((cg->cg_mask & mask) != 0)
+ if (CPU_OVERLAP(&cg->cg_mask, &mask))
break;
}
return (NULL);
@@ -655,7 +670,7 @@ smp_topo_find(struct cpu_group *top, int cpu)
#else /* !SMP */
void
-smp_rendezvous_cpus(cpumask_t map,
+smp_rendezvous_cpus(cpuset_t map,
void (*setup_func)(void *),
void (*action_func)(void *),
void (*teardown_func)(void *),
diff --git a/sys/kern/uipc_syscalls.c b/sys/kern/uipc_syscalls.c
index a4bbdba..19aaee0 100644
--- a/sys/kern/uipc_syscalls.c
+++ b/sys/kern/uipc_syscalls.c
@@ -747,6 +747,10 @@ kern_sendit(td, s, mp, flags, control, segflg)
return (error);
so = (struct socket *)fp->f_data;
+#ifdef KTRACE
+ if (mp->msg_name != NULL && KTRPOINT(td, KTR_STRUCT))
+ ktrsockaddr(mp->msg_name);
+#endif
#ifdef MAC
if (mp->msg_name != NULL) {
error = mac_socket_check_connect(td->td_ucred, so,
diff --git a/sys/mips/cavium/octeon_mp.c b/sys/mips/cavium/octeon_mp.c
index 78eafa6..efddee8 100644
--- a/sys/mips/cavium/octeon_mp.c
+++ b/sys/mips/cavium/octeon_mp.c
@@ -102,10 +102,18 @@ platform_init_ap(int cpuid)
mips_wbflush();
}
-cpumask_t
-platform_cpu_mask(void)
+void
+platform_cpu_mask(cpuset_t *mask)
{
- return (octeon_bootinfo->core_mask);
+
+ CPU_ZERO(mask);
+
+ /*
+ * XXX: hack in order to simplify CPU set building, assuming that
+ * core_mask is 32-bits.
+ */
+ memcpy(mask, &octeon_bootinfo->core_mask,
+ sizeof(octeon_bootinfo->core_mask));
}
struct cpu_group *
diff --git a/sys/mips/include/_types.h b/sys/mips/include/_types.h
index 4d57e20..2f23db6 100644
--- a/sys/mips/include/_types.h
+++ b/sys/mips/include/_types.h
@@ -73,7 +73,6 @@ typedef unsigned long long __uint64_t;
* Standard type definitions.
*/
typedef __int32_t __clock_t; /* clock()... */
-typedef unsigned int __cpumask_t;
typedef double __double_t;
typedef double __float_t;
#ifdef __mips_n64
diff --git a/sys/mips/include/hwfunc.h b/sys/mips/include/hwfunc.h
index 683aedb..a9e3285 100644
--- a/sys/mips/include/hwfunc.h
+++ b/sys/mips/include/hwfunc.h
@@ -28,6 +28,8 @@
#ifndef _MACHINE_HWFUNC_H_
#define _MACHINE_HWFUNC_H_
+#include <sys/_cpuset.h>
+
struct trapframe;
struct timecounter;
/*
@@ -91,7 +93,7 @@ extern int platform_processor_id(void);
/*
* Return the cpumask of available processors.
*/
-extern cpumask_t platform_cpu_mask(void);
+extern void platform_cpu_mask(cpuset_t *mask);
/*
* Return the topology of processors on this platform
diff --git a/sys/mips/include/pmap.h b/sys/mips/include/pmap.h
index e710635..90375eb 100644
--- a/sys/mips/include/pmap.h
+++ b/sys/mips/include/pmap.h
@@ -58,6 +58,7 @@
#ifndef LOCORE
#include <sys/queue.h>
+#include <sys/_cpuset.h>
#include <sys/_lock.h>
#include <sys/_mutex.h>
@@ -83,7 +84,7 @@ struct pmap {
pd_entry_t *pm_segtab; /* KVA of segment table */
TAILQ_HEAD(, pv_entry) pm_pvlist; /* list of mappings in
* pmap */
- cpumask_t pm_active; /* active on cpus */
+ cpuset_t pm_active; /* active on cpus */
struct {
u_int32_t asid:ASID_BITS; /* TLB address space tag */
u_int32_t gen:ASIDGEN_BITS; /* its generation number */
diff --git a/sys/mips/include/smp.h b/sys/mips/include/smp.h
index 58aaf03..0fcca9a 100644
--- a/sys/mips/include/smp.h
+++ b/sys/mips/include/smp.h
@@ -17,6 +17,8 @@
#ifdef _KERNEL
+#include <sys/_cpuset.h>
+
#include <machine/pcb.h>
/*
@@ -33,7 +35,7 @@
void ipi_all_but_self(int ipi);
void ipi_cpu(int cpu, u_int ipi);
-void ipi_selected(cpumask_t cpus, int ipi);
+void ipi_selected(cpuset_t cpus, int ipi);
void smp_init_secondary(u_int32_t cpuid);
void mpentry(void);
diff --git a/sys/mips/mips/mp_machdep.c b/sys/mips/mips/mp_machdep.c
index 7191b37..79a3476 100644
--- a/sys/mips/mips/mp_machdep.c
+++ b/sys/mips/mips/mp_machdep.c
@@ -29,6 +29,7 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
+#include <sys/cpuset.h>
#include <sys/ktr.h>
#include <sys/proc.h>
#include <sys/lock.h>
@@ -80,15 +81,16 @@ ipi_all_but_self(int ipi)
/* Send an IPI to a set of cpus. */
void
-ipi_selected(cpumask_t cpus, int ipi)
+ipi_selected(cpuset_t cpus, int ipi)
{
struct pcpu *pc;
- CTR3(KTR_SMP, "%s: cpus: %x, ipi: %x\n", __func__, cpus, ipi);
-
STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) {
- if ((cpus & pc->pc_cpumask) != 0)
+ if (CPU_OVERLAP(&cpus, &pc->pc_cpumask)) {
+ CTR3(KTR_SMP, "%s: pc: %p, ipi: %x\n", __func__, pc,
+ ipi);
ipi_send(pc, ipi);
+ }
}
}
@@ -108,7 +110,7 @@ static int
mips_ipi_handler(void *arg)
{
int cpu;
- cpumask_t cpumask;
+ cpuset_t cpumask;
u_int ipi, ipi_bitmap;
int bit;
@@ -148,14 +150,14 @@ mips_ipi_handler(void *arg)
tlb_save();
/* Indicate we are stopped */
- atomic_set_int(&stopped_cpus, cpumask);
+ CPU_OR_ATOMIC(&stopped_cpus, &cpumask);
/* Wait for restart */
- while ((started_cpus & cpumask) == 0)
+ while (!CPU_OVERLAP(&started_cpus, &cpumask))
cpu_spinwait();
- atomic_clear_int(&started_cpus, cpumask);
- atomic_clear_int(&stopped_cpus, cpumask);
+ CPU_NAND_ATOMIC(&started_cpus, &cpumask);
+ CPU_NAND_ATOMIC(&stopped_cpus, &cpumask);
CTR0(KTR_SMP, "IPI_STOP (restart)");
break;
case IPI_PREEMPT:
@@ -200,14 +202,22 @@ start_ap(int cpuid)
void
cpu_mp_setmaxid(void)
{
- cpumask_t cpumask;
-
- cpumask = platform_cpu_mask();
- mp_ncpus = bitcount32(cpumask);
+ cpuset_t cpumask;
+ int cpu, last;
+
+ platform_cpu_mask(&cpumask);
+ mp_ncpus = 0;
+ last = 1;
+ while ((cpu = cpusetobj_ffs(&cpumask)) != 0) {
+ last = cpu;
+ cpu--;
+ CPU_CLR(cpu, &cpumask);
+ mp_ncpus++;
+ }
if (mp_ncpus <= 0)
mp_ncpus = 1;
- mp_maxid = min(fls(cpumask), MAXCPU) - 1;
+ mp_maxid = min(last, MAXCPU) - 1;
}
void
@@ -233,16 +243,16 @@ void
cpu_mp_start(void)
{
int error, cpuid;
- cpumask_t cpumask;
+ cpuset_t cpumask, ocpus;
mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN);
- all_cpus = 0;
- cpumask = platform_cpu_mask();
+ CPU_ZERO(&all_cpus);
+ platform_cpu_mask(&cpumask);
- while (cpumask != 0) {
- cpuid = ffs(cpumask) - 1;
- cpumask &= ~(1 << cpuid);
+ while (!CPU_EMPTY(&cpumask)) {
+ cpuid = cpusetobj_ffs(&cpumask) - 1;
+ CPU_CLR(cpuid, &cpumask);
if (cpuid >= MAXCPU) {
printf("cpu_mp_start: ignoring AP #%d.\n", cpuid);
@@ -257,15 +267,19 @@ cpu_mp_start(void)
if (bootverbose)
printf("AP #%d started!\n", cpuid);
}
- all_cpus |= 1 << cpuid;
+ CPU_SET(cpuid, &all_cpus);
}
- PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
+ ocpus = all_cpus;
+ CPU_CLR(PCPU_GET(cpuid), &ocpus);
+ PCPU_SET(other_cpus, ocpus);
}
void
smp_init_secondary(u_int32_t cpuid)
{
+ cpuset_t ocpus;
+
/* TLB */
mips_wr_wired(0);
tlb_invalidate_all();
@@ -303,7 +317,9 @@ smp_init_secondary(u_int32_t cpuid)
CTR1(KTR_SMP, "SMP: AP CPU #%d launched", PCPU_GET(cpuid));
/* Build our map of 'other' CPUs. */
- PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
+ ocpus = all_cpus;
+ CPU_CLR(PCPU_GET(cpuid), &ocpus);
+ PCPU_SET(other_cpus, ocpus);
if (bootverbose)
printf("SMP: AP CPU #%d launched.\n", PCPU_GET(cpuid));
diff --git a/sys/mips/mips/pmap.c b/sys/mips/mips/pmap.c
index 7f0f4f0..f7ea660 100644
--- a/sys/mips/mips/pmap.c
+++ b/sys/mips/mips/pmap.c
@@ -471,7 +471,7 @@ pmap_create_kernel_pagetable(void)
PMAP_LOCK_INIT(kernel_pmap);
kernel_pmap->pm_segtab = kernel_segmap;
- kernel_pmap->pm_active = ~0;
+ CPU_FILL(&kernel_pmap->pm_active);
TAILQ_INIT(&kernel_pmap->pm_pvlist);
kernel_pmap->pm_asid[0].asid = PMAP_ASID_RESERVED;
kernel_pmap->pm_asid[0].gen = 0;
@@ -630,10 +630,14 @@ pmap_invalidate_all_local(pmap_t pmap)
tlb_invalidate_all();
return;
}
- if (pmap->pm_active & PCPU_GET(cpumask))
+ sched_pin();
+ if (CPU_OVERLAP(&pmap->pm_active, PCPU_PTR(cpumask))) {
+ sched_unpin();
tlb_invalidate_all_user(pmap);
- else
+ } else {
+ sched_unpin();
pmap->pm_asid[PCPU_GET(cpuid)].gen = 0;
+ }
}
#ifdef SMP
@@ -667,12 +671,16 @@ pmap_invalidate_page_local(pmap_t pmap, vm_offset_t va)
tlb_invalidate_address(pmap, va);
return;
}
- if (pmap->pm_asid[PCPU_GET(cpuid)].gen != PCPU_GET(asid_generation))
+ sched_pin();
+ if (pmap->pm_asid[PCPU_GET(cpuid)].gen != PCPU_GET(asid_generation)) {
+ sched_unpin();
return;
- else if (!(pmap->pm_active & PCPU_GET(cpumask))) {
+ } else if (!CPU_OVERLAP(&pmap->pm_active, PCPU_PTR(cpumask))) {
pmap->pm_asid[PCPU_GET(cpuid)].gen = 0;
+ sched_unpin();
return;
}
+ sched_unpin();
tlb_invalidate_address(pmap, va);
}
@@ -716,12 +724,16 @@ pmap_update_page_local(pmap_t pmap, vm_offset_t va, pt_entry_t pte)
tlb_update(pmap, va, pte);
return;
}
- if (pmap->pm_asid[PCPU_GET(cpuid)].gen != PCPU_GET(asid_generation))
+ sched_pin();
+ if (pmap->pm_asid[PCPU_GET(cpuid)].gen != PCPU_GET(asid_generation)) {
+ sched_unpin();
return;
- else if (!(pmap->pm_active & PCPU_GET(cpumask))) {
+ } else if (!CPU_OVERLAP(&pmap->pm_active, PCPU_PTR(cpumask))) {
pmap->pm_asid[PCPU_GET(cpuid)].gen = 0;
+ sched_unpin();
return;
}
+ sched_unpin();
tlb_update(pmap, va, pte);
}
@@ -1041,7 +1053,7 @@ pmap_pinit0(pmap_t pmap)
PMAP_LOCK_INIT(pmap);
pmap->pm_segtab = kernel_segmap;
- pmap->pm_active = 0;
+ CPU_ZERO(&pmap->pm_active);
pmap->pm_ptphint = NULL;
for (i = 0; i < MAXCPU; i++) {
pmap->pm_asid[i].asid = PMAP_ASID_RESERVED;
@@ -1102,7 +1114,7 @@ pmap_pinit(pmap_t pmap)
ptdva = MIPS_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(ptdpg));
pmap->pm_segtab = (pd_entry_t *)ptdva;
- pmap->pm_active = 0;
+ CPU_ZERO(&pmap->pm_active);
pmap->pm_ptphint = NULL;
for (i = 0; i < MAXCPU; i++) {
pmap->pm_asid[i].asid = PMAP_ASID_RESERVED;
@@ -2948,8 +2960,8 @@ pmap_activate(struct thread *td)
oldpmap = PCPU_GET(curpmap);
if (oldpmap)
- atomic_clear_32(&oldpmap->pm_active, PCPU_GET(cpumask));
- atomic_set_32(&pmap->pm_active, PCPU_GET(cpumask));
+ CPU_NAND_ATOMIC(&oldpmap->pm_active, PCPU_PTR(cpumask));
+ CPU_OR_ATOMIC(&pmap->pm_active, PCPU_PTR(cpumask));
pmap_asid_alloc(pmap);
if (td == curthread) {
PCPU_SET(segbase, pmap->pm_segtab);
@@ -3283,7 +3295,7 @@ pmap_kextract(vm_offset_t va)
pt_entry_t *ptep;
/* Is the kernel pmap initialized? */
- if (kernel_pmap->pm_active) {
+ if (!CPU_EMPTY(&kernel_pmap->pm_active)) {
/* It's inside the virtual address range */
ptep = pmap_pte(kernel_pmap, va);
if (ptep) {
diff --git a/sys/mips/rmi/xlr_machdep.c b/sys/mips/rmi/xlr_machdep.c
index 4a1734a..836c605 100644
--- a/sys/mips/rmi/xlr_machdep.c
+++ b/sys/mips/rmi/xlr_machdep.c
@@ -614,11 +614,15 @@ platform_processor_id(void)
return (xlr_hwtid_to_cpuid[xlr_cpu_id()]);
}
-cpumask_t
-platform_cpu_mask(void)
+void
+platform_cpu_mask(cpuset_t *mask)
{
+ int i, s;
- return (~0U >> (32 - (xlr_ncores * xlr_threads_per_core)));
+ CPU_ZERO(mask);
+ s = xlr_ncores * xlr_threads_per_core;
+ for (i = 0; i < s; i++)
+ CPU_SET(i, mask);
}
struct cpu_group *
diff --git a/sys/mips/sibyte/sb_scd.c b/sys/mips/sibyte/sb_scd.c
index e5ac23c..50b9987 100644
--- a/sys/mips/sibyte/sb_scd.c
+++ b/sys/mips/sibyte/sb_scd.c
@@ -32,6 +32,7 @@ __FBSDID("$FreeBSD$");
#include <sys/systm.h>
#include <sys/module.h>
#include <sys/bus.h>
+#include <sys/cpuset.h>
#include <machine/resource.h>
#include <machine/hwfunc.h>
@@ -242,11 +243,15 @@ sb_clear_mailbox(int cpu, uint64_t val)
sb_store64(regaddr, val);
}
-cpumask_t
-platform_cpu_mask(void)
+void
+platform_cpu_mask(cpuset_t *mask)
{
+ int i, s;
- return (~0U >> (32 - SYSREV_NUM_PROCESSORS(sb_read_sysrev())));
+ CPU_ZERO(mask);
+ s = SYSREV_NUM_PROCESSORS(sb_read_sysrev());
+ for (i = 0; i < s; i++)
+ CPU_SET(i, mask);
}
#endif /* SMP */
diff --git a/sys/net/bridgestp.c b/sys/net/bridgestp.c
index 2993838..e263b0b 100644
--- a/sys/net/bridgestp.c
+++ b/sys/net/bridgestp.c
@@ -1860,6 +1860,8 @@ bstp_tick(void *arg)
if (bs->bs_running == 0)
return;
+ CURVNET_SET(bs->bs_vnet);
+
/* slow timer to catch missed link events */
if (bstp_timer_expired(&bs->bs_link_timer)) {
LIST_FOREACH(bp, &bs->bs_bplist, bp_next)
@@ -1893,6 +1895,8 @@ bstp_tick(void *arg)
bp->bp_txcount--;
}
+ CURVNET_RESTORE();
+
callout_reset(&bs->bs_bstpcallout, hz, bstp_tick, bs);
}
@@ -2126,6 +2130,7 @@ bstp_attach(struct bstp_state *bs, struct bstp_cb_ops *cb)
bs->bs_protover = BSTP_PROTO_RSTP;
bs->bs_state_cb = cb->bcb_state;
bs->bs_rtage_cb = cb->bcb_rtage;
+ bs->bs_vnet = curvnet;
getmicrotime(&bs->bs_last_tc_time);
diff --git a/sys/net/bridgestp.h b/sys/net/bridgestp.h
index 74086fc..fdf16aa 100644
--- a/sys/net/bridgestp.h
+++ b/sys/net/bridgestp.h
@@ -358,6 +358,7 @@ struct bstp_state {
LIST_HEAD(, bstp_port) bs_bplist;
bstp_state_cb_t bs_state_cb;
bstp_rtage_cb_t bs_rtage_cb;
+ struct vnet *bs_vnet;
};
#define BSTP_LOCK_INIT(_bs) mtx_init(&(_bs)->bs_mtx, "bstp", NULL, MTX_DEF)
diff --git a/sys/net/if_stf.c b/sys/net/if_stf.c
index 4f904a5..7d136fd 100644
--- a/sys/net/if_stf.c
+++ b/sys/net/if_stf.c
@@ -3,7 +3,7 @@
/*-
* Copyright (C) 2000 WIDE Project.
- * Copyright (c) 2010 Hiroki Sato <hrs@FreeBSD.org>
+ * Copyright (c) 2010-2011 Hiroki Sato <hrs@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -32,7 +32,7 @@
*/
/*
- * 6to4 interface, based on RFC3056 + 6rd (RFC5569) support.
+ * 6to4 interface, based on RFC 3056 + 6rd (RFC 5969) support.
*
* 6to4 interface is NOT capable of link-layer (I mean, IPv4) multicasting.
* There is no address mapping defined from IPv6 multicast address to IPv4
@@ -74,10 +74,9 @@
* for details. The code tries to filter out some of malicious packets.
* Note that there is no way to be 100% secure.
*
- * 6rd (RFC5569) extension is enabled when an IPv6 GUA other than
- * 2002::/16 is assigned. The stf(4) recognizes a 32-bit just after
- * prefixlen as the IPv4 address of the 6rd customer site. The
- * prefixlen must be shorter than 32.
+ * 6rd (RFC 5969) extension is enabled when an IPv6 GUA other than
+ * 2002::/16 is assigned. The stf(4) calculates a 6rd delegated
+ * prefix from a 6rd prefix and an IPv4 address.
*
*/
@@ -280,10 +279,10 @@ stf_clone_create(struct if_clone *ifc, int unit, caddr_t params)
LIST_INSERT_HEAD(&V_stf_softc_list, sc, stf_list);
mtx_unlock(&stf_mtx);
- sc->sc_ifaddr_event_tag = EVENTHANDLER_REGISTER(ifaddr_event,
- stf_ifaddr_change,
- NULL,
- EVENTHANDLER_PRI_ANY);
+ sc->sc_ifaddr_event_tag =
+ EVENTHANDLER_REGISTER(ifaddr_event, stf_ifaddr_change, NULL,
+ EVENTHANDLER_PRI_ANY);
+
return (0);
}
@@ -1367,35 +1366,20 @@ stf_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
case SIOCSIFADDR:
DEBUG_PRINTF(1, "enter SIOCSIFADDR.\n");
ifa = (struct ifaddr *)data;
- if (ifa == NULL || ifa->ifa_addr->sa_family != AF_INET6) {
+ if (ifa == NULL) {
error = EAFNOSUPPORT;
break;
}
- ifa->ifa_rtrequest = stf_rtrequest;
- ifp->if_flags |= IFF_UP;
- break;
-
-/*
- case STFSSRDADDR:
- ifra6 = (struct in6_aliasreq *)data;
- if (ifra6 == NULL || ifra6->ifra_addr->sa_family != AF_INET6) {
- error = EAFNOSUPPORT;
+ if (ifa->ifa_addr->sa_family == AF_INET6 &&
+ ifa->ifa_dstaddr->sa_family == AF_INET &&
+ ifa->ifa_netmask->sa_family == AF_INET6) {
+ ifa->ifa_rtrequest = stf_rtrequest;
+ ifp->if_flags |= IFF_UP;
+ } else {
+ error = EINVAL;
break;
}
- sa6 = &ifra6->ifra_addr;
- if (ifra6->ifra_dstaddr->sa_family != AF_INET) {
- error = EAFNOSUPPORT;
- break;
- }
- memcpy(&ifra.ifra_addr, sa6, sizeof(ifra.ifra_addr));
- error = in6_control(NULL, SIOCAIFADDR_IN6, (caddr_t)&ifra, ifp, curthread);
- if (error)
- return (error);
-
break;
-
- case STFDSRDADDR:
-*/
case SIOCADDMULTI:
case SIOCDELMULTI:
ifr = (struct ifreq *)data;
diff --git a/sys/netgraph/ng_nat.c b/sys/netgraph/ng_nat.c
index 84da500..59818d9 100644
--- a/sys/netgraph/ng_nat.c
+++ b/sys/netgraph/ng_nat.c
@@ -43,6 +43,7 @@
#include <machine/in_cksum.h>
#include <netinet/libalias/alias.h>
+#include <netinet/libalias/alias_local.h>
#include <netgraph/ng_message.h>
#include <netgraph/ng_parse.h>
@@ -696,22 +697,35 @@ ng_nat_rcvdata(hook_p hook, item_p item )
KASSERT(m->m_pkthdr.len == ntohs(ip->ip_len),
("ng_nat: ip_len != m_pkthdr.len"));
+ /*
+ * We drop packet when:
+ * 1. libalias returns PKT_ALIAS_ERROR;
+ * 2. For incoming packets:
+ * a) for unresolved fragments;
+ * b) libalias returns PKT_ALIAS_IGNORED and
+ * PKT_ALIAS_DENY_INCOMING flag is set.
+ */
if (hook == priv->in) {
rval = LibAliasIn(priv->lib, c, m->m_len + M_TRAILINGSPACE(m));
- if (rval != PKT_ALIAS_OK &&
- rval != PKT_ALIAS_FOUND_HEADER_FRAGMENT) {
+ if (rval == PKT_ALIAS_ERROR ||
+ rval == PKT_ALIAS_UNRESOLVED_FRAGMENT ||
+ (rval == PKT_ALIAS_IGNORED &&
+ (priv->lib->packetAliasMode &
+ PKT_ALIAS_DENY_INCOMING) != 0)) {
NG_FREE_ITEM(item);
return (EINVAL);
}
} else if (hook == priv->out) {
rval = LibAliasOut(priv->lib, c, m->m_len + M_TRAILINGSPACE(m));
- if (rval != PKT_ALIAS_OK) {
+ if (rval == PKT_ALIAS_ERROR) {
NG_FREE_ITEM(item);
return (EINVAL);
}
} else
panic("ng_nat: unknown hook!\n");
+ if (rval == PKT_ALIAS_RESPOND)
+ m->m_flags |= M_SKIP_FIREWALL;
m->m_pkthdr.len = m->m_len = ntohs(ip->ip_len);
if ((ip->ip_off & htons(IP_OFFMASK)) == 0 &&
diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c
index 4aa998f..4eb309a 100644
--- a/sys/netinet/in_pcb.c
+++ b/sys/netinet/in_pcb.c
@@ -42,6 +42,7 @@ __FBSDID("$FreeBSD$");
#include "opt_ipsec.h"
#include "opt_inet.h"
#include "opt_inet6.h"
+#include "opt_pcbgroup.h"
#include <sys/param.h>
#include <sys/systm.h>
@@ -212,7 +213,7 @@ void
in_pcbinfo_init(struct inpcbinfo *pcbinfo, const char *name,
struct inpcbhead *listhead, int hash_nelements, int porthash_nelements,
char *inpcbzone_name, uma_init inpcbzone_init, uma_fini inpcbzone_fini,
- uint32_t inpcbzone_flags)
+ uint32_t inpcbzone_flags, u_int hashfields)
{
INP_INFO_LOCK_INIT(pcbinfo, name);
@@ -227,6 +228,9 @@ in_pcbinfo_init(struct inpcbinfo *pcbinfo, const char *name,
&pcbinfo->ipi_hashmask);
pcbinfo->ipi_porthashbase = hashinit(porthash_nelements, M_PCB,
&pcbinfo->ipi_porthashmask);
+#ifdef PCBGROUP
+ in_pcbgroup_init(pcbinfo, hashfields, hash_nelements);
+#endif
pcbinfo->ipi_zone = uma_zcreate(inpcbzone_name, sizeof(struct inpcb),
NULL, NULL, inpcbzone_init, inpcbzone_fini, UMA_ALIGN_PTR,
inpcbzone_flags);
@@ -246,6 +250,9 @@ in_pcbinfo_destroy(struct inpcbinfo *pcbinfo)
hashdestroy(pcbinfo->ipi_hashbase, M_PCB, pcbinfo->ipi_hashmask);
hashdestroy(pcbinfo->ipi_porthashbase, M_PCB,
pcbinfo->ipi_porthashmask);
+#ifdef PCBGROUP
+ in_pcbgroup_destroy(pcbinfo);
+#endif
uma_zdestroy(pcbinfo->ipi_zone);
INP_HASH_LOCK_DESTROY(pcbinfo);
INP_INFO_LOCK_DESTROY(pcbinfo);
@@ -1053,7 +1060,8 @@ in_pcbdetach(struct inpcb *inp)
* in_pcbref() bumps the reference count on an inpcb in order to maintain
* stability of an inpcb pointer despite the inpcb lock being released. This
* is used in TCP when the inpcbinfo lock needs to be acquired or upgraded,
- * but where the inpcb lock is already held.
+ * but where the inpcb lock may already held, or when acquiring a reference
+ * via a pcbgroup.
*
* in_pcbref() should be used only to provide brief memory stability, and
* must always be followed by a call to INP_WLOCK() and in_pcbrele() to
@@ -1223,6 +1231,9 @@ in_pcbdrop(struct inpcb *inp)
}
INP_HASH_WUNLOCK(inp->inp_pcbinfo);
inp->inp_flags &= ~INP_INHASHLIST;
+#ifdef PCBGROUP
+ in_pcbgroup_remove(inp);
+#endif
}
}
@@ -1472,6 +1483,148 @@ in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr,
}
#undef INP_LOOKUP_MAPPED_PCB_COST
+#ifdef PCBGROUP
+/*
+ * Lookup PCB in hash list, using pcbgroup tables.
+ */
+static struct inpcb *
+in_pcblookup_group(struct inpcbinfo *pcbinfo, struct inpcbgroup *pcbgroup,
+ struct in_addr faddr, u_int fport_arg, struct in_addr laddr,
+ u_int lport_arg, int lookupflags, struct ifnet *ifp)
+{
+ struct inpcbhead *head;
+ struct inpcb *inp, *tmpinp;
+ u_short fport = fport_arg, lport = lport_arg;
+
+ /*
+ * First look for an exact match.
+ */
+ tmpinp = NULL;
+ INP_GROUP_LOCK(pcbgroup);
+ head = &pcbgroup->ipg_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport,
+ pcbgroup->ipg_hashmask)];
+ LIST_FOREACH(inp, head, inp_pcbgrouphash) {
+#ifdef INET6
+ /* XXX inp locking */
+ if ((inp->inp_vflag & INP_IPV4) == 0)
+ continue;
+#endif
+ if (inp->inp_faddr.s_addr == faddr.s_addr &&
+ inp->inp_laddr.s_addr == laddr.s_addr &&
+ inp->inp_fport == fport &&
+ inp->inp_lport == lport) {
+ /*
+ * XXX We should be able to directly return
+ * the inp here, without any checks.
+ * Well unless both bound with SO_REUSEPORT?
+ */
+ if (prison_flag(inp->inp_cred, PR_IP4))
+ goto found;
+ if (tmpinp == NULL)
+ tmpinp = inp;
+ }
+ }
+ if (tmpinp != NULL) {
+ inp = tmpinp;
+ goto found;
+ }
+
+ /*
+ * Then look for a wildcard match, if requested.
+ */
+ if ((lookupflags & INPLOOKUP_WILDCARD) != 0) {
+ struct inpcb *local_wild = NULL, *local_exact = NULL;
+#ifdef INET6
+ struct inpcb *local_wild_mapped = NULL;
+#endif
+ struct inpcb *jail_wild = NULL;
+ struct inpcbhead *head;
+ int injail;
+
+ /*
+ * Order of socket selection - we always prefer jails.
+ * 1. jailed, non-wild.
+ * 2. jailed, wild.
+ * 3. non-jailed, non-wild.
+ * 4. non-jailed, wild.
+ */
+ head = &pcbinfo->ipi_wildbase[INP_PCBHASH(INADDR_ANY, lport,
+ 0, pcbinfo->ipi_wildmask)];
+ LIST_FOREACH(inp, head, inp_pcbgroup_wild) {
+#ifdef INET6
+ /* XXX inp locking */
+ if ((inp->inp_vflag & INP_IPV4) == 0)
+ continue;
+#endif
+ if (inp->inp_faddr.s_addr != INADDR_ANY ||
+ inp->inp_lport != lport)
+ continue;
+
+ /* XXX inp locking */
+ if (ifp && ifp->if_type == IFT_FAITH &&
+ (inp->inp_flags & INP_FAITH) == 0)
+ continue;
+
+ injail = prison_flag(inp->inp_cred, PR_IP4);
+ if (injail) {
+ if (prison_check_ip4(inp->inp_cred,
+ &laddr) != 0)
+ continue;
+ } else {
+ if (local_exact != NULL)
+ continue;
+ }
+
+ if (inp->inp_laddr.s_addr == laddr.s_addr) {
+ if (injail)
+ goto found;
+ else
+ local_exact = inp;
+ } else if (inp->inp_laddr.s_addr == INADDR_ANY) {
+#ifdef INET6
+ /* XXX inp locking, NULL check */
+ if (inp->inp_vflag & INP_IPV6PROTO)
+ local_wild_mapped = inp;
+ else
+#endif /* INET6 */
+ if (injail)
+ jail_wild = inp;
+ else
+ local_wild = inp;
+ }
+ } /* LIST_FOREACH */
+ inp = jail_wild;
+ if (inp == NULL)
+ inp = local_exact;
+ if (inp == NULL)
+ inp = local_wild;
+#ifdef INET6
+ if (inp == NULL)
+ inp = local_wild_mapped;
+#endif /* defined(INET6) */
+ if (inp != NULL)
+ goto found;
+ } /* if (lookupflags & INPLOOKUP_WILDCARD) */
+ INP_GROUP_UNLOCK(pcbgroup);
+ return (NULL);
+
+found:
+ in_pcbref(inp);
+ INP_GROUP_UNLOCK(pcbgroup);
+ if (lookupflags & INPLOOKUP_WLOCKPCB) {
+ INP_WLOCK(inp);
+ if (in_pcbrele_wlocked(inp))
+ return (NULL);
+ } else if (lookupflags & INPLOOKUP_RLOCKPCB) {
+ INP_RLOCK(inp);
+ if (in_pcbrele_rlocked(inp))
+ return (NULL);
+ } else
+ panic("%s: locking bug", __func__);
+ return (inp);
+}
+#endif /* PCBGROUP */
+
/*
* Lookup PCB in hash list, using pcbinfo tables. This variation assumes
* that the caller has locked the hash list, and will not perform any further
@@ -1636,17 +1789,30 @@ in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr,
/*
* Public inpcb lookup routines, accepting a 4-tuple, and optionally, an mbuf
* from which a pre-calculated hash value may be extracted.
+ *
+ * Possibly more of this logic should be in in_pcbgroup.c.
*/
struct inpcb *
in_pcblookup(struct inpcbinfo *pcbinfo, struct in_addr faddr, u_int fport,
struct in_addr laddr, u_int lport, int lookupflags, struct ifnet *ifp)
{
+#if defined(PCBGROUP)
+ struct inpcbgroup *pcbgroup;
+#endif
KASSERT((lookupflags & ~INPLOOKUP_MASK) == 0,
("%s: invalid lookup flags %d", __func__, lookupflags));
KASSERT((lookupflags & (INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)) != 0,
("%s: LOCKPCB not set", __func__));
+#if defined(PCBGROUP)
+ if (in_pcbgroup_enabled(pcbinfo)) {
+ pcbgroup = in_pcbgroup_bytuple(pcbinfo, laddr, lport, faddr,
+ fport);
+ return (in_pcblookup_group(pcbinfo, pcbgroup, faddr, fport,
+ laddr, lport, lookupflags, ifp));
+ }
+#endif
return (in_pcblookup_hash(pcbinfo, faddr, fport, laddr, lport,
lookupflags, ifp));
}
@@ -1656,12 +1822,28 @@ in_pcblookup_mbuf(struct inpcbinfo *pcbinfo, struct in_addr faddr,
u_int fport, struct in_addr laddr, u_int lport, int lookupflags,
struct ifnet *ifp, struct mbuf *m)
{
+#ifdef PCBGROUP
+ struct inpcbgroup *pcbgroup;
+#endif
KASSERT((lookupflags & ~INPLOOKUP_MASK) == 0,
("%s: invalid lookup flags %d", __func__, lookupflags));
KASSERT((lookupflags & (INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)) != 0,
("%s: LOCKPCB not set", __func__));
+#ifdef PCBGROUP
+ if (in_pcbgroup_enabled(pcbinfo)) {
+ pcbgroup = in_pcbgroup_byhash(pcbinfo, M_HASHTYPE_GET(m),
+ m->m_pkthdr.flowid);
+ if (pcbgroup != NULL)
+ return (in_pcblookup_group(pcbinfo, pcbgroup, faddr,
+ fport, laddr, lport, lookupflags, ifp));
+ pcbgroup = in_pcbgroup_bytuple(pcbinfo, laddr, lport, faddr,
+ fport);
+ return (in_pcblookup_group(pcbinfo, pcbgroup, faddr, fport,
+ laddr, lport, lookupflags, ifp));
+ }
+#endif
return (in_pcblookup_hash(pcbinfo, faddr, fport, laddr, lport,
lookupflags, ifp));
}
@@ -1670,8 +1852,8 @@ in_pcblookup_mbuf(struct inpcbinfo *pcbinfo, struct in_addr faddr,
/*
* Insert PCB onto various hash lists.
*/
-int
-in_pcbinshash(struct inpcb *inp)
+static int
+in_pcbinshash_internal(struct inpcb *inp, int do_pcbgroup_update)
{
struct inpcbhead *pcbhash;
struct inpcbporthead *pcbporthash;
@@ -1721,10 +1903,39 @@ in_pcbinshash(struct inpcb *inp)
LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist);
LIST_INSERT_HEAD(pcbhash, inp, inp_hash);
inp->inp_flags |= INP_INHASHLIST;
+#ifdef PCBGROUP
+ if (do_pcbgroup_update)
+ in_pcbgroup_update(inp);
+#endif
return (0);
}
/*
+ * For now, there are two public interfaces to insert an inpcb into the hash
+ * lists -- one that does update pcbgroups, and one that doesn't. The latter
+ * is used only in the TCP syncache, where in_pcbinshash is called before the
+ * full 4-tuple is set for the inpcb, and we don't want to install in the
+ * pcbgroup until later.
+ *
+ * XXXRW: This seems like a misfeature. in_pcbinshash should always update
+ * connection groups, and partially initialised inpcbs should not be exposed
+ * to either reservation hash tables or pcbgroups.
+ */
+int
+in_pcbinshash(struct inpcb *inp)
+{
+
+ return (in_pcbinshash_internal(inp, 1));
+}
+
+int
+in_pcbinshash_nopcbgroup(struct inpcb *inp)
+{
+
+ return (in_pcbinshash_internal(inp, 0));
+}
+
+/*
* Move PCB to the proper hash bucket when { faddr, fport } have been
* changed. NOTE: This does not handle the case of the lport changing (the
* hashed port list would have to be updated as well), so the lport must
@@ -1755,6 +1966,13 @@ in_pcbrehash_mbuf(struct inpcb *inp, struct mbuf *m)
LIST_REMOVE(inp, inp_hash);
LIST_INSERT_HEAD(head, inp, inp_hash);
+
+#ifdef PCBGROUP
+ if (m != NULL)
+ in_pcbgroup_update_mbuf(inp, m);
+ else
+ in_pcbgroup_update(inp);
+#endif
}
void
@@ -1791,6 +2009,9 @@ in_pcbremlists(struct inpcb *inp)
}
LIST_REMOVE(inp, inp_list);
pcbinfo->ipi_count--;
+#ifdef PCBGROUP
+ in_pcbgroup_remove(inp);
+#endif
}
/*
diff --git a/sys/netinet/in_pcb.h b/sys/netinet/in_pcb.h
index 809bc05..dfef963 100644
--- a/sys/netinet/in_pcb.h
+++ b/sys/netinet/in_pcb.h
@@ -44,6 +44,7 @@
#include <sys/_rwlock.h>
#ifdef _KERNEL
+#include <sys/lock.h>
#include <sys/rwlock.h>
#include <net/vnet.h>
#include <vm/uma.h>
@@ -141,6 +142,7 @@ struct icmp6_filter;
*
* Key:
* (c) - Constant after initialization
+ * (g) - Protected by the pcbgroup lock
* (i) - Protected by the inpcb lock
* (p) - Protected by the pcbinfo lock for the inpcb
* (s) - Protected by another subsystem's locks
@@ -160,9 +162,12 @@ struct icmp6_filter;
*/
struct inpcb {
LIST_ENTRY(inpcb) inp_hash; /* (i/p) hash list */
+ LIST_ENTRY(inpcb) inp_pcbgrouphash; /* (g/i) hash list */
LIST_ENTRY(inpcb) inp_list; /* (i/p) list for all PCBs for proto */
void *inp_ppcb; /* (i) pointer to per-protocol pcb */
struct inpcbinfo *inp_pcbinfo; /* (c) PCB list info */
+ struct inpcbgroup *inp_pcbgroup; /* (g/i) PCB group list */
+ LIST_ENTRY(inpcb) inp_pcbgroup_wild; /* (g/i/p) group wildcard entry */
struct socket *inp_socket; /* (i) back pointer to socket */
struct ucred *inp_cred; /* (c) cache of socket cred */
u_int32_t inp_flow; /* (i) IPv6 flow information */
@@ -272,13 +277,14 @@ struct inpcbport {
* the former covering mutable global fields (such as the global pcb list),
* and the latter covering the hashed lookup tables. The lock order is:
*
- * ipi_lock (before) inpcb locks (before) ipi_hash_lock
+ * ipi_lock (before) inpcb locks (before) {ipi_hash_lock, pcbgroup locks}
*
* Locking key:
*
* (c) Constant or nearly constant after initialisation
* (g) Locked by ipi_lock
- * (h) Read using either ipi_hash_lock or inpcb lock; write requires both.
+ * (h) Read using either ipi_hash_lock or inpcb lock; write requires both
+ * (p) Protected by one or more pcbgroup locks
* (x) Synchronisation properties poorly defined
*/
struct inpcbinfo {
@@ -312,7 +318,16 @@ struct inpcbinfo {
struct uma_zone *ipi_zone; /* (c) */
/*
- * Global lock protecting hash lookup tables.
+ * Connection groups associated with this protocol. These fields are
+ * constant, but pcbgroup structures themselves are protected by
+ * per-pcbgroup locks.
+ */
+ struct inpcbgroup *ipi_pcbgroups; /* (c) */
+ u_int ipi_npcbgroups; /* (c) */
+ u_int ipi_hashfields; /* (c) */
+
+ /*
+ * Global lock protecting non-pcbgroup hash lookup tables.
*/
struct rwlock ipi_hash_lock;
@@ -330,6 +345,14 @@ struct inpcbinfo {
u_long ipi_porthashmask; /* (h) */
/*
+ * List of wildcard inpcbs for use with pcbgroups. In the past, was
+ * per-pcbgroup but is now global. All pcbgroup locks must be held
+ * to modify the list, so any is sufficient to read it.
+ */
+ struct inpcbhead *ipi_wildbase; /* (p) */
+ u_long ipi_wildmask; /* (p) */
+
+ /*
* Pointer to network stack instance
*/
struct vnet *ipi_vnet; /* (c) */
@@ -340,6 +363,31 @@ struct inpcbinfo {
void *ipi_pspare[2];
};
+/*
+ * Connection groups hold sets of connections that have similar CPU/thread
+ * affinity. Each connection belongs to exactly one connection group.
+ */
+struct inpcbgroup {
+ /*
+ * Per-connection group hash of inpcbs, hashed by local and foreign
+ * addresses and port numbers.
+ */
+ struct inpcbhead *ipg_hashbase; /* (c) */
+ u_long ipg_hashmask; /* (c) */
+
+ /*
+ * Notional affinity of this pcbgroup.
+ */
+ u_int ipg_cpu; /* (p) */
+
+ /*
+ * Per-connection group lock, not to be confused with ipi_lock.
+ * Protects the hash table hung off the group, but also the global
+ * wildcard list in inpcbinfo.
+ */
+ struct mtx ipg_lock;
+} __aligned(CACHE_LINE_SIZE);
+
#define INP_LOCK_INIT(inp, d, t) \
rw_init_flags(&(inp)->inp_lock, (t), RW_RECURSE | RW_DUPOK)
#define INP_LOCK_DESTROY(inp) rw_destroy(&(inp)->inp_lock)
@@ -423,6 +471,14 @@ void inp_4tuple_get(struct inpcb *inp, uint32_t *laddr, uint16_t *lp,
#define INP_HASH_WLOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_hash_lock, \
RA_WLOCKED)
+#define INP_GROUP_LOCK_INIT(ipg, d) mtx_init(&(ipg)->ipg_lock, (d), NULL, \
+ MTX_DEF | MTX_DUPOK)
+#define INP_GROUP_LOCK_DESTROY(ipg) mtx_destroy(&(ipg)->ipg_lock)
+
+#define INP_GROUP_LOCK(ipg) mtx_lock(&(ipg)->ipg_lock)
+#define INP_GROUP_LOCK_ASSERT(ipg) mtx_assert(&(ipg)->ipg_lock, MA_OWNED)
+#define INP_GROUP_UNLOCK(ipg) mtx_unlock(&(ipg)->ipg_lock)
+
#define INP_PCBHASH(faddr, lport, fport, mask) \
(((faddr) ^ ((faddr) >> 16) ^ ntohs((lport) ^ (fport))) & (mask))
#define INP_PCBPORTHASH(lport, mask) \
@@ -482,6 +538,7 @@ void inp_4tuple_get(struct inpcb *inp, uint32_t *laddr, uint16_t *lp,
*/
#define INP_LLE_VALID 0x00000001 /* cached lle is valid */
#define INP_RT_VALID 0x00000002 /* cached rtentry is valid */
+#define INP_PCBGROUPWILD 0x00000004 /* in pcbgroup wildcard list */
/*
* Flags passed to in_pcblookup*() functions.
@@ -500,6 +557,13 @@ void inp_4tuple_get(struct inpcb *inp, uint32_t *laddr, uint16_t *lp,
#define INP_CHECK_SOCKAF(so, af) (INP_SOCKAF(so) == af)
+/*
+ * Constants for pcbinfo.ipi_hashfields.
+ */
+#define IPI_HASHFIELDS_NONE 0
+#define IPI_HASHFIELDS_2TUPLE 1
+#define IPI_HASHFIELDS_4TUPLE 2
+
#ifdef _KERNEL
VNET_DECLARE(int, ipport_reservedhigh);
VNET_DECLARE(int, ipport_reservedlow);
@@ -531,7 +595,21 @@ VNET_DECLARE(int, ipport_tcpallocs);
void in_pcbinfo_destroy(struct inpcbinfo *);
void in_pcbinfo_init(struct inpcbinfo *, const char *, struct inpcbhead *,
- int, int, char *, uma_init, uma_fini, uint32_t);
+ int, int, char *, uma_init, uma_fini, uint32_t, u_int);
+
+struct inpcbgroup *
+ in_pcbgroup_byhash(struct inpcbinfo *, u_int, uint32_t);
+struct inpcbgroup *
+ in_pcbgroup_byinpcb(struct inpcb *);
+struct inpcbgroup *
+ in_pcbgroup_bytuple(struct inpcbinfo *, struct in_addr, u_short,
+ struct in_addr, u_short);
+void in_pcbgroup_destroy(struct inpcbinfo *);
+int in_pcbgroup_enabled(struct inpcbinfo *);
+void in_pcbgroup_init(struct inpcbinfo *, u_int, int);
+void in_pcbgroup_remove(struct inpcb *);
+void in_pcbgroup_update(struct inpcb *);
+void in_pcbgroup_update_mbuf(struct inpcb *, struct mbuf *);
void in_pcbpurgeif0(struct inpcbinfo *, struct ifnet *);
int in_pcballoc(struct socket *, struct inpcbinfo *);
@@ -551,6 +629,7 @@ void in_pcbdisconnect(struct inpcb *);
void in_pcbdrop(struct inpcb *);
void in_pcbfree(struct inpcb *);
int in_pcbinshash(struct inpcb *);
+int in_pcbinshash_nopcbgroup(struct inpcb *);
struct inpcb *
in_pcblookup_local(struct inpcbinfo *,
struct in_addr, u_short, int, struct ucred *);
diff --git a/sys/netinet/in_pcbgroup.c b/sys/netinet/in_pcbgroup.c
new file mode 100644
index 0000000..c9f5c70
--- /dev/null
+++ b/sys/netinet/in_pcbgroup.c
@@ -0,0 +1,457 @@
+/*-
+ * Copyright (c) 2010-2011 Juniper Networks, Inc.
+ * All rights reserved.
+ *
+ * This software was developed by Robert N. M. Watson under contract
+ * to Juniper Networks, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+
+__FBSDID("$FreeBSD$");
+
+#include "opt_inet6.h"
+
+#include <sys/param.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/mutex.h>
+#include <sys/smp.h>
+#include <sys/socketvar.h>
+
+#include <netinet/in.h>
+#include <netinet/in_pcb.h>
+#ifdef INET6
+#include <netinet6/in6_pcb.h>
+#endif /* INET6 */
+
+/*
+ * pcbgroups, or "connection groups" are based on Willman, Rixner, and Cox's
+ * 2006 USENIX paper, "An Evaluation of Network Stack Parallelization
+ * Strategies in Modern Operating Systems". This implementation differs
+ * significantly from that described in the paper, in that it attempts to
+ * introduce not just notions of affinity for connections and distribute work
+ * so as to reduce lock contention, but also align those notions with
+ * hardware work distribution strategies such as RSS. In this construction,
+ * connection groups supplement, rather than replace, existing reservation
+ * tables for protocol 4-tuples, offering CPU-affine lookup tables with
+ * minimal cache line migration and lock contention during steady state
+ * operation.
+ *
+ * Internet protocols, such as UDP and TCP, register to use connection groups
+ * by providing an ipi_hashfields value other than IPI_HASHFIELDS_NONE; this
+ * indicates to the connection group code whether a 2-tuple or 4-tuple is
+ * used as an argument to hashes that assign a connection to a particular
+ * group. This must be aligned with any hardware offloaded distribution
+ * model, such as RSS or similar approaches taken in embedded network boards.
+ * Wildcard sockets require special handling, as in Willman 2006, and are
+ * shared between connection groups -- while being protected by group-local
+ * locks. This means that connection establishment and teardown can be
+ * signficantly more expensive than without connection groups, but that
+ * steady-state processing can be significantly faster.
+ *
+ * Most of the implementation of connection groups is in this file; however,
+ * connection group lookup is implemented in in_pcb.c alongside reservation
+ * table lookups -- see in_pcblookup_group().
+ *
+ * TODO:
+ *
+ * Implement dynamic rebalancing of buckets with connection groups; when
+ * load is unevenly distributed, search for more optimal balancing on
+ * demand. This might require scaling up the number of connection groups
+ * by <<1.
+ *
+ * Provide an IP 2-tuple or 4-tuple netisr m2cpu handler based on connection
+ * groups for ip_input and ip6_input, allowing non-offloaded work
+ * distribution.
+ *
+ * Expose effective CPU affinity of connections to userspace using socket
+ * options.
+ *
+ * Investigate per-connection affinity overrides based on socket options; an
+ * option could be set, certainly resulting in work being distributed
+ * differently in software, and possibly propagated to supporting hardware
+ * with TCAMs or hardware hash tables. This might require connections to
+ * exist in more than one connection group at a time.
+ *
+ * Hook netisr thread reconfiguration events, and propagate those to RSS so
+ * that rebalancing can occur when the thread pool grows or shrinks.
+ *
+ * Expose per-pcbgroup statistics to userspace monitoring tools such as
+ * netstat, in order to allow better debugging and profiling.
+ */
+
+void
+in_pcbgroup_init(struct inpcbinfo *pcbinfo, u_int hashfields,
+ int hash_nelements)
+{
+ struct inpcbgroup *pcbgroup;
+ u_int numpcbgroups, pgn;
+
+ /*
+ * Only enable connection groups for a protocol if it has been
+ * specifically requested.
+ */
+ if (hashfields == IPI_HASHFIELDS_NONE)
+ return;
+
+ /*
+ * Connection groups are about multi-processor load distribution,
+ * lock contention, and connection CPU affinity. As such, no point
+ * in turning them on for a uniprocessor machine, it only wastes
+ * memory.
+ */
+ if (mp_ncpus == 1)
+ return;
+
+ /*
+ * Use one group per CPU for now. If we decide to do dynamic
+ * rebalancing a la RSS, we'll need to shift left by at least 1.
+ */
+ numpcbgroups = mp_ncpus;
+
+ pcbinfo->ipi_hashfields = hashfields;
+ pcbinfo->ipi_pcbgroups = malloc(numpcbgroups *
+ sizeof(*pcbinfo->ipi_pcbgroups), M_PCB, M_WAITOK | M_ZERO);
+ pcbinfo->ipi_npcbgroups = numpcbgroups;
+ pcbinfo->ipi_wildbase = hashinit(hash_nelements, M_PCB,
+ &pcbinfo->ipi_wildmask);
+ for (pgn = 0; pgn < pcbinfo->ipi_npcbgroups; pgn++) {
+ pcbgroup = &pcbinfo->ipi_pcbgroups[pgn];
+ pcbgroup->ipg_hashbase = hashinit(hash_nelements, M_PCB,
+ &pcbgroup->ipg_hashmask);
+ INP_GROUP_LOCK_INIT(pcbgroup, "pcbgroup");
+
+ /*
+ * Initialise notional affinity of the pcbgroup -- for RSS,
+ * we want the same notion of affinity as NICs to be used.
+ * Just round robin for the time being.
+ */
+ pcbgroup->ipg_cpu = (pgn % mp_ncpus);
+ }
+}
+
+void
+in_pcbgroup_destroy(struct inpcbinfo *pcbinfo)
+{
+ struct inpcbgroup *pcbgroup;
+ u_int pgn;
+
+ if (pcbinfo->ipi_npcbgroups == 0)
+ return;
+
+ for (pgn = 0; pgn < pcbinfo->ipi_npcbgroups; pgn++) {
+ pcbgroup = &pcbinfo->ipi_pcbgroups[pgn];
+ KASSERT(LIST_EMPTY(pcbinfo->ipi_listhead),
+ ("in_pcbinfo_destroy: listhead not empty"));
+ INP_GROUP_LOCK_DESTROY(pcbgroup);
+ hashdestroy(pcbgroup->ipg_hashbase, M_PCB,
+ pcbgroup->ipg_hashmask);
+ }
+ hashdestroy(pcbinfo->ipi_wildbase, M_PCB, pcbinfo->ipi_wildmask);
+ free(pcbinfo->ipi_pcbgroups, M_PCB);
+ pcbinfo->ipi_pcbgroups = NULL;
+ pcbinfo->ipi_npcbgroups = 0;
+ pcbinfo->ipi_hashfields = 0;
+}
+
+/*
+ * Given a hash of whatever the covered tuple might be, return a pcbgroup
+ * index.
+ */
+static __inline u_int
+in_pcbgroup_getbucket(struct inpcbinfo *pcbinfo, uint32_t hash)
+{
+
+ return (hash % pcbinfo->ipi_npcbgroups);
+}
+
+/*
+ * Map a (hashtype, hash) tuple into a connection group, or NULL if the hash
+ * information is insufficient to identify the pcbgroup.
+ */
+struct inpcbgroup *
+in_pcbgroup_byhash(struct inpcbinfo *pcbinfo, u_int hashtype, uint32_t hash)
+{
+
+ return (NULL);
+}
+
+static struct inpcbgroup *
+in_pcbgroup_bymbuf(struct inpcbinfo *pcbinfo, struct mbuf *m)
+{
+
+ return (in_pcbgroup_byhash(pcbinfo, M_HASHTYPE_GET(m),
+ m->m_pkthdr.flowid));
+}
+
+struct inpcbgroup *
+in_pcbgroup_bytuple(struct inpcbinfo *pcbinfo, struct in_addr laddr,
+ u_short lport, struct in_addr faddr, u_short fport)
+{
+ uint32_t hash;
+
+ switch (pcbinfo->ipi_hashfields) {
+ case IPI_HASHFIELDS_4TUPLE:
+ hash = faddr.s_addr ^ fport;
+ break;
+
+ case IPI_HASHFIELDS_2TUPLE:
+ hash = faddr.s_addr ^ laddr.s_addr;
+ break;
+
+ default:
+ hash = 0;
+ }
+ return (&pcbinfo->ipi_pcbgroups[in_pcbgroup_getbucket(pcbinfo,
+ hash)]);
+}
+
+struct inpcbgroup *
+in_pcbgroup_byinpcb(struct inpcb *inp)
+{
+
+ return (in_pcbgroup_bytuple(inp->inp_pcbinfo, inp->inp_laddr,
+ inp->inp_lport, inp->inp_faddr, inp->inp_fport));
+}
+
+static void
+in_pcbwild_add(struct inpcb *inp)
+{
+ struct inpcbinfo *pcbinfo;
+ struct inpcbhead *head;
+ u_int pgn;
+
+ INP_WLOCK_ASSERT(inp);
+ KASSERT(!(inp->inp_flags2 & INP_PCBGROUPWILD),
+ ("%s: is wild",__func__));
+
+ pcbinfo = inp->inp_pcbinfo;
+ for (pgn = 0; pgn < pcbinfo->ipi_npcbgroups; pgn++)
+ INP_GROUP_LOCK(&pcbinfo->ipi_pcbgroups[pgn]);
+ head = &pcbinfo->ipi_wildbase[INP_PCBHASH(INADDR_ANY, inp->inp_lport,
+ 0, pcbinfo->ipi_wildmask)];
+ LIST_INSERT_HEAD(head, inp, inp_pcbgroup_wild);
+ inp->inp_flags2 |= INP_PCBGROUPWILD;
+ for (pgn = 0; pgn < pcbinfo->ipi_npcbgroups; pgn++)
+ INP_GROUP_UNLOCK(&pcbinfo->ipi_pcbgroups[pgn]);
+}
+
+static void
+in_pcbwild_remove(struct inpcb *inp)
+{
+ struct inpcbinfo *pcbinfo;
+ u_int pgn;
+
+ INP_WLOCK_ASSERT(inp);
+ KASSERT((inp->inp_flags2 & INP_PCBGROUPWILD),
+ ("%s: not wild", __func__));
+
+ pcbinfo = inp->inp_pcbinfo;
+ for (pgn = 0; pgn < pcbinfo->ipi_npcbgroups; pgn++)
+ INP_GROUP_LOCK(&pcbinfo->ipi_pcbgroups[pgn]);
+ LIST_REMOVE(inp, inp_pcbgroup_wild);
+ for (pgn = 0; pgn < pcbinfo->ipi_npcbgroups; pgn++)
+ INP_GROUP_UNLOCK(&pcbinfo->ipi_pcbgroups[pgn]);
+ inp->inp_flags2 &= ~INP_PCBGROUPWILD;
+}
+
+static __inline int
+in_pcbwild_needed(struct inpcb *inp)
+{
+
+#ifdef INET6
+ if (inp->inp_vflag & INP_IPV6)
+ return (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr));
+ else
+#endif
+ return (inp->inp_faddr.s_addr == htonl(INADDR_ANY));
+}
+
+static void
+in_pcbwild_update_internal(struct inpcb *inp)
+{
+ int wildcard_needed;
+
+ wildcard_needed = in_pcbwild_needed(inp);
+ if (wildcard_needed && !(inp->inp_flags2 & INP_PCBGROUPWILD))
+ in_pcbwild_add(inp);
+ else if (!wildcard_needed && (inp->inp_flags2 & INP_PCBGROUPWILD))
+ in_pcbwild_remove(inp);
+}
+
+/*
+ * Update the pcbgroup of an inpcb, which might include removing an old
+ * pcbgroup reference and/or adding a new one. Wildcard processing is not
+ * performed here, although ideally we'll never install a pcbgroup for a
+ * wildcard inpcb (asserted below).
+ */
+static void
+in_pcbgroup_update_internal(struct inpcbinfo *pcbinfo,
+ struct inpcbgroup *newpcbgroup, struct inpcb *inp)
+{
+ struct inpcbgroup *oldpcbgroup;
+ struct inpcbhead *pcbhash;
+ uint32_t hashkey_faddr;
+
+ INP_WLOCK_ASSERT(inp);
+
+ oldpcbgroup = inp->inp_pcbgroup;
+ if (oldpcbgroup != NULL && oldpcbgroup != newpcbgroup) {
+ INP_GROUP_LOCK(oldpcbgroup);
+ LIST_REMOVE(inp, inp_pcbgrouphash);
+ inp->inp_pcbgroup = NULL;
+ INP_GROUP_UNLOCK(oldpcbgroup);
+ }
+ if (newpcbgroup != NULL && oldpcbgroup != newpcbgroup) {
+#ifdef INET6
+ if (inp->inp_vflag & INP_IPV6)
+ hashkey_faddr = inp->in6p_faddr.s6_addr32[3]; /* XXX */
+ else
+#endif
+ hashkey_faddr = inp->inp_faddr.s_addr;
+ INP_GROUP_LOCK(newpcbgroup);
+ pcbhash = &newpcbgroup->ipg_hashbase[
+ INP_PCBHASH(hashkey_faddr, inp->inp_lport, inp->inp_fport,
+ newpcbgroup->ipg_hashmask)];
+ LIST_INSERT_HEAD(pcbhash, inp, inp_pcbgrouphash);
+ inp->inp_pcbgroup = newpcbgroup;
+ INP_GROUP_UNLOCK(newpcbgroup);
+ }
+
+ KASSERT(!(newpcbgroup != NULL && in_pcbwild_needed(inp)),
+ ("%s: pcbgroup and wildcard!", __func__));
+}
+
+/*
+ * Two update paths: one in which the 4-tuple on an inpcb has been updated
+ * and therefore connection groups may need to change (or a wildcard entry
+ * may needed to be installed), and another in which the 4-tuple has been
+ * set as a result of a packet received, in which case we may be able to use
+ * the hash on the mbuf to avoid doing a software hash calculation for RSS.
+ *
+ * In each case: first, let the wildcard code have a go at placing it as a
+ * wildcard socket. If it was a wildcard, or if the connection has been
+ * dropped, then no pcbgroup is required (so potentially clear it);
+ * otherwise, calculate and update the pcbgroup for the inpcb.
+ */
+void
+in_pcbgroup_update(struct inpcb *inp)
+{
+ struct inpcbinfo *pcbinfo;
+ struct inpcbgroup *newpcbgroup;
+
+ INP_WLOCK_ASSERT(inp);
+
+ pcbinfo = inp->inp_pcbinfo;
+ if (!in_pcbgroup_enabled(pcbinfo))
+ return;
+
+ in_pcbwild_update_internal(inp);
+ if (!(inp->inp_flags2 & INP_PCBGROUPWILD) &&
+ !(inp->inp_flags & INP_DROPPED)) {
+#ifdef INET6
+ if (inp->inp_vflag & INP_IPV6)
+ newpcbgroup = in6_pcbgroup_byinpcb(inp);
+ else
+#endif
+ newpcbgroup = in_pcbgroup_byinpcb(inp);
+ } else
+ newpcbgroup = NULL;
+ in_pcbgroup_update_internal(pcbinfo, newpcbgroup, inp);
+}
+
+void
+in_pcbgroup_update_mbuf(struct inpcb *inp, struct mbuf *m)
+{
+ struct inpcbinfo *pcbinfo;
+ struct inpcbgroup *newpcbgroup;
+
+ INP_WLOCK_ASSERT(inp);
+
+ pcbinfo = inp->inp_pcbinfo;
+ if (!in_pcbgroup_enabled(pcbinfo))
+ return;
+
+ /*
+ * Possibly should assert !INP_PCBGROUPWILD rather than testing for
+ * it; presumably this function should never be called for anything
+ * other than non-wildcard socket?
+ */
+ in_pcbwild_update_internal(inp);
+ if (!(inp->inp_flags2 & INP_PCBGROUPWILD) &&
+ !(inp->inp_flags & INP_DROPPED)) {
+ newpcbgroup = in_pcbgroup_bymbuf(pcbinfo, m);
+#ifdef INET6
+ if (inp->inp_vflag & INP_IPV6) {
+ if (newpcbgroup == NULL)
+ newpcbgroup = in6_pcbgroup_byinpcb(inp);
+ } else {
+#endif
+ if (newpcbgroup == NULL)
+ newpcbgroup = in_pcbgroup_byinpcb(inp);
+#ifdef INET6
+ }
+#endif
+ } else
+ newpcbgroup = NULL;
+ in_pcbgroup_update_internal(pcbinfo, newpcbgroup, inp);
+}
+
+/*
+ * Remove pcbgroup entry and optional pcbgroup wildcard entry for this inpcb.
+ */
+void
+in_pcbgroup_remove(struct inpcb *inp)
+{
+ struct inpcbgroup *pcbgroup;
+
+ INP_WLOCK_ASSERT(inp);
+
+ if (!in_pcbgroup_enabled(inp->inp_pcbinfo))
+ return;
+
+ if (inp->inp_flags2 & INP_PCBGROUPWILD)
+ in_pcbwild_remove(inp);
+
+ pcbgroup = inp->inp_pcbgroup;
+ if (pcbgroup != NULL) {
+ INP_GROUP_LOCK(pcbgroup);
+ LIST_REMOVE(inp, inp_pcbgrouphash);
+ inp->inp_pcbgroup = NULL;
+ INP_GROUP_UNLOCK(pcbgroup);
+ }
+}
+
+/*
+ * Query whether or not it is appropriate to use pcbgroups to look up inpcbs
+ * for a protocol.
+ */
+int
+in_pcbgroup_enabled(struct inpcbinfo *pcbinfo)
+{
+
+ return (pcbinfo->ipi_npcbgroups > 0);
+}
diff --git a/sys/netinet/ip_divert.c b/sys/netinet/ip_divert.c
index 6f5bce7..527ce56 100644
--- a/sys/netinet/ip_divert.c
+++ b/sys/netinet/ip_divert.c
@@ -153,7 +153,8 @@ div_init(void)
* place for hashbase == NULL.
*/
in_pcbinfo_init(&V_divcbinfo, "div", &V_divcb, 1, 1, "divcb",
- div_inpcb_init, div_inpcb_fini, UMA_ZONE_NOFREE);
+ div_inpcb_init, div_inpcb_fini, UMA_ZONE_NOFREE,
+ IPI_HASHFIELDS_NONE);
}
static void
diff --git a/sys/netinet/ip_input.c b/sys/netinet/ip_input.c
index ac1c723..67fcb74 100644
--- a/sys/netinet/ip_input.c
+++ b/sys/netinet/ip_input.c
@@ -488,7 +488,7 @@ tooshort:
}
#ifdef IPSEC
/*
- * Bypass packet filtering for packets from a tunnel (gif).
+ * Bypass packet filtering for packets previously handled by IPsec.
*/
if (ip_ipsec_filtertunnel(m))
goto passin;
diff --git a/sys/netinet/ip_ipsec.c b/sys/netinet/ip_ipsec.c
index 50a6ce4..a3c87f5 100644
--- a/sys/netinet/ip_ipsec.c
+++ b/sys/netinet/ip_ipsec.c
@@ -95,7 +95,7 @@ ip_ipsec_filtertunnel(struct mbuf *m)
#if defined(IPSEC)
/*
- * Bypass packet filtering for packets from a tunnel.
+ * Bypass packet filtering for packets previously handled by IPsec.
*/
if (!V_ip4_ipsec_filtertunnel &&
m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL) != NULL)
diff --git a/sys/netinet/ipfw/ip_fw2.c b/sys/netinet/ipfw/ip_fw2.c
index b4d3abb..49c48b9 100644
--- a/sys/netinet/ipfw/ip_fw2.c
+++ b/sys/netinet/ipfw/ip_fw2.c
@@ -692,6 +692,10 @@ check_uidgid(ipfw_insn_u32 *insn, int proto, struct ifnet *oif,
lookupflags |= INPLOOKUP_RLOCKPCB;
match = 0;
if (*ugid_lookupp == 0) {
+ /*
+ * XXXRW: If we had the mbuf here, could use
+ * in_pcblookup_mbuf().
+ */
pcb = (oif) ?
in_pcblookup(pi,
dst_ip, htons(dst_port),
diff --git a/sys/netinet/ipfw/ip_fw_nat.c b/sys/netinet/ipfw/ip_fw_nat.c
index f8c3e63..fd6f09a 100644
--- a/sys/netinet/ipfw/ip_fw_nat.c
+++ b/sys/netinet/ipfw/ip_fw_nat.c
@@ -262,17 +262,27 @@ ipfw_nat(struct ip_fw_args *args, struct cfg_nat *t, struct mbuf *m)
else
retval = LibAliasOut(t->lib, c,
mcl->m_len + M_TRAILINGSPACE(mcl));
- if (retval == PKT_ALIAS_RESPOND) {
- m->m_flags |= M_SKIP_FIREWALL;
- retval = PKT_ALIAS_OK;
- }
- if (retval != PKT_ALIAS_OK &&
- retval != PKT_ALIAS_FOUND_HEADER_FRAGMENT) {
+
+ /*
+ * We drop packet when:
+ * 1. libalias returns PKT_ALIAS_ERROR;
+ * 2. For incoming packets:
+ * a) for unresolved fragments;
+ * b) libalias returns PKT_ALIAS_IGNORED and
+ * PKT_ALIAS_DENY_INCOMING flag is set.
+ */
+ if (retval == PKT_ALIAS_ERROR ||
+ (args->oif == NULL && (retval == PKT_ALIAS_UNRESOLVED_FRAGMENT ||
+ (retval == PKT_ALIAS_IGNORED &&
+ (t->lib->packetAliasMode & PKT_ALIAS_DENY_INCOMING) != 0)))) {
/* XXX - should i add some logging? */
m_free(mcl);
args->m = NULL;
return (IP_FW_DENY);
}
+
+ if (retval == PKT_ALIAS_RESPOND)
+ m->m_flags |= M_SKIP_FIREWALL;
mcl->m_pkthdr.len = mcl->m_len = ntohs(ip->ip_len);
/*
diff --git a/sys/netinet/ipfw/ip_fw_sockopt.c b/sys/netinet/ipfw/ip_fw_sockopt.c
index f81d57d..2347456 100644
--- a/sys/netinet/ipfw/ip_fw_sockopt.c
+++ b/sys/netinet/ipfw/ip_fw_sockopt.c
@@ -349,12 +349,13 @@ del_entry(struct ip_fw_chain *chain, uint32_t arg)
}
if (n == 0) {
- /* A flush request (arg == 0) on empty ruleset
- * returns with no error. On the contrary,
+ /* A flush request (arg == 0 or cmd == 1) on empty
+ * ruleset returns with no error. On the contrary,
* if there is no match on a specific request,
* we return EINVAL.
*/
- error = (arg == 0) ? 0 : EINVAL;
+ if (arg != 0 && cmd != 1)
+ error = EINVAL;
break;
}
diff --git a/sys/netinet/libalias/alias_sctp.h b/sys/netinet/libalias/alias_sctp.h
index 80ed965..99d54ce 100644
--- a/sys/netinet/libalias/alias_sctp.h
+++ b/sys/netinet/libalias/alias_sctp.h
@@ -135,13 +135,13 @@ struct sctp_nat_assoc {
struct in_addr a_addr; /**< alias ip address */
int state; /**< current state of NAT association */
int TableRegister; /**< stores which look up tables association is registered in */
- int exp; /**< timer expiration in seconds from uptime */
+ int exp; /**< timer expiration in seconds from uptime */
int exp_loc; /**< current location in timer_Q */
int num_Gaddr; /**< number of global IP addresses in the list */
LIST_HEAD(sctpGlobalAddresshead,sctp_GlobalAddress) Gaddr; /**< List of global addresses */
- LIST_ENTRY (sctp_nat_assoc) list_L; /**< Linked list of pointers for Local table*/
- LIST_ENTRY (sctp_nat_assoc) list_G; /**< Linked list of pointers for Global table */
- LIST_ENTRY (sctp_nat_assoc) timer_Q; /**< Linked list of pointers for timer Q */
+ LIST_ENTRY (sctp_nat_assoc) list_L; /**< Linked list of pointers for Local table*/
+ LIST_ENTRY (sctp_nat_assoc) list_G; /**< Linked list of pointers for Global table */
+ LIST_ENTRY (sctp_nat_assoc) timer_Q; /**< Linked list of pointers for timer Q */
//Using libalias locking
};
diff --git a/sys/netinet/raw_ip.c b/sys/netinet/raw_ip.c
index 635f08f..e754b88 100644
--- a/sys/netinet/raw_ip.c
+++ b/sys/netinet/raw_ip.c
@@ -205,7 +205,8 @@ rip_init(void)
{
in_pcbinfo_init(&V_ripcbinfo, "rip", &V_ripcb, INP_PCBHASH_RAW_SIZE,
- 1, "ripcb", rip_inpcb_init, NULL, UMA_ZONE_NOFREE);
+ 1, "ripcb", rip_inpcb_init, NULL, UMA_ZONE_NOFREE,
+ IPI_HASHFIELDS_NONE);
EVENTHANDLER_REGISTER(maxsockets_change, rip_zone_change, NULL,
EVENTHANDLER_PRI_ANY);
}
diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c
index 06854ec..6ed58911 100644
--- a/sys/netinet/tcp_subr.c
+++ b/sys/netinet/tcp_subr.c
@@ -300,7 +300,8 @@ tcp_init(void)
hashsize = 512; /* safe default */
}
in_pcbinfo_init(&V_tcbinfo, "tcp", &V_tcb, hashsize, hashsize,
- "tcp_inpcb", tcp_inpcb_init, NULL, UMA_ZONE_NOFREE);
+ "tcp_inpcb", tcp_inpcb_init, NULL, UMA_ZONE_NOFREE,
+ IPI_HASHFIELDS_4TUPLE);
/*
* These have to be type stable for the benefit of the timers.
diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c
index 5125134..66e4732 100644
--- a/sys/netinet/tcp_syncache.c
+++ b/sys/netinet/tcp_syncache.c
@@ -36,6 +36,7 @@ __FBSDID("$FreeBSD$");
#include "opt_inet.h"
#include "opt_inet6.h"
#include "opt_ipsec.h"
+#include "opt_pcbgroup.h"
#include <sys/param.h>
#include <sys/systm.h>
@@ -676,8 +677,14 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
#ifdef INET6
}
#endif
+
+ /*
+ * Install in the reservation hash table for now, but don't yet
+ * install a connection group since the full 4-tuple isn't yet
+ * configured.
+ */
inp->inp_lport = sc->sc_inc.inc_lport;
- if ((error = in_pcbinshash(inp)) != 0) {
+ if ((error = in_pcbinshash_nopcbgroup(inp)) != 0) {
/*
* Undo the assignments above if we failed to
* put the PCB on the hash lists.
diff --git a/sys/netinet/udp_usrreq.c b/sys/netinet/udp_usrreq.c
index fd864c0..28eb8fd 100644
--- a/sys/netinet/udp_usrreq.c
+++ b/sys/netinet/udp_usrreq.c
@@ -186,7 +186,8 @@ udp_init(void)
{
in_pcbinfo_init(&V_udbinfo, "udp", &V_udb, UDBHASHSIZE, UDBHASHSIZE,
- "udp_inpcb", udp_inpcb_init, NULL, UMA_ZONE_NOFREE);
+ "udp_inpcb", udp_inpcb_init, NULL, UMA_ZONE_NOFREE,
+ IPI_HASHFIELDS_2TUPLE);
V_udpcb_zone = uma_zcreate("udpcb", sizeof(struct udpcb),
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
uma_zone_set_max(V_udpcb_zone, maxsockets);
diff --git a/sys/netinet6/in6_pcb.c b/sys/netinet6/in6_pcb.c
index da73f21..d15c605 100644
--- a/sys/netinet6/in6_pcb.c
+++ b/sys/netinet6/in6_pcb.c
@@ -70,6 +70,7 @@ __FBSDID("$FreeBSD$");
#include "opt_inet.h"
#include "opt_inet6.h"
#include "opt_ipsec.h"
+#include "opt_pcbgroup.h"
#include <sys/param.h>
#include <sys/systm.h>
@@ -827,6 +828,141 @@ in6_rtchange(struct inpcb *inp, int errno)
return inp;
}
+#ifdef PCBGROUP
+/*
+ * Lookup PCB in hash list, using pcbgroup tables.
+ */
+static struct inpcb *
+in6_pcblookup_group(struct inpcbinfo *pcbinfo, struct inpcbgroup *pcbgroup,
+ struct in6_addr *faddr, u_int fport_arg, struct in6_addr *laddr,
+ u_int lport_arg, int lookupflags, struct ifnet *ifp)
+{
+ struct inpcbhead *head;
+ struct inpcb *inp, *tmpinp;
+ u_short fport = fport_arg, lport = lport_arg;
+ int faith;
+
+ if (faithprefix_p != NULL)
+ faith = (*faithprefix_p)(laddr);
+ else
+ faith = 0;
+
+ /*
+ * First look for an exact match.
+ */
+ tmpinp = NULL;
+ INP_GROUP_LOCK(pcbgroup);
+ head = &pcbgroup->ipg_hashbase[
+ INP_PCBHASH(faddr->s6_addr32[3] /* XXX */, lport, fport,
+ pcbgroup->ipg_hashmask)];
+ LIST_FOREACH(inp, head, inp_pcbgrouphash) {
+ /* XXX inp locking */
+ if ((inp->inp_vflag & INP_IPV6) == 0)
+ continue;
+ if (IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, faddr) &&
+ IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr) &&
+ inp->inp_fport == fport &&
+ inp->inp_lport == lport) {
+ /*
+ * XXX We should be able to directly return
+ * the inp here, without any checks.
+ * Well unless both bound with SO_REUSEPORT?
+ */
+ if (prison_flag(inp->inp_cred, PR_IP6))
+ goto found;
+ if (tmpinp == NULL)
+ tmpinp = inp;
+ }
+ }
+ if (tmpinp != NULL) {
+ inp = tmpinp;
+ goto found;
+ }
+
+ /*
+ * Then look for a wildcard match, if requested.
+ */
+ if ((lookupflags & INPLOOKUP_WILDCARD) != 0) {
+ struct inpcb *local_wild = NULL, *local_exact = NULL;
+ struct inpcb *jail_wild = NULL;
+ int injail;
+
+ /*
+ * Order of socket selection - we always prefer jails.
+ * 1. jailed, non-wild.
+ * 2. jailed, wild.
+ * 3. non-jailed, non-wild.
+ * 4. non-jailed, wild.
+ */
+ head = &pcbinfo->ipi_wildbase[INP_PCBHASH(INADDR_ANY, lport,
+ 0, pcbinfo->ipi_wildmask)];
+ LIST_FOREACH(inp, head, inp_pcbgroup_wild) {
+ /* XXX inp locking */
+ if ((inp->inp_vflag & INP_IPV6) == 0)
+ continue;
+
+ if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) ||
+ inp->inp_lport != lport) {
+ continue;
+ }
+
+ /* XXX inp locking */
+ if (faith && (inp->inp_flags & INP_FAITH) == 0)
+ continue;
+
+ injail = prison_flag(inp->inp_cred, PR_IP6);
+ if (injail) {
+ if (prison_check_ip6(inp->inp_cred,
+ laddr) != 0)
+ continue;
+ } else {
+ if (local_exact != NULL)
+ continue;
+ }
+
+ if (IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr)) {
+ if (injail)
+ goto found;
+ else
+ local_exact = inp;
+ } else if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
+ if (injail)
+ jail_wild = inp;
+ else
+ local_wild = inp;
+ }
+ } /* LIST_FOREACH */
+
+ inp = jail_wild;
+ if (inp == NULL)
+ inp = jail_wild;
+ if (inp == NULL)
+ inp = local_exact;
+ if (inp == NULL)
+ inp = local_wild;
+ if (inp != NULL)
+ goto found;
+ } /* if ((lookupflags & INPLOOKUP_WILDCARD) != 0) */
+ INP_GROUP_UNLOCK(pcbgroup);
+ return (NULL);
+
+found:
+ in_pcbref(inp);
+ INP_GROUP_UNLOCK(pcbgroup);
+ if (lookupflags & INPLOOKUP_WLOCKPCB) {
+ INP_WLOCK(inp);
+ if (in_pcbrele_wlocked(inp))
+ return (NULL);
+ } else if (lookupflags & INPLOOKUP_RLOCKPCB) {
+ INP_RLOCK(inp);
+ if (in_pcbrele_rlocked(inp))
+ return (NULL);
+ } else
+ panic("%s: locking buf", __func__);
+ return (inp);
+}
+#endif /* PCBGROUP */
+
/*
* Lookup PCB in hash list.
*/
@@ -983,16 +1119,30 @@ in6_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in6_addr *faddr,
/*
* Public inpcb lookup routines, accepting a 4-tuple, and optionally, an mbuf
* from which a pre-calculated hash value may be extracted.
+ *
+ * Possibly more of this logic should be in in6_pcbgroup.c.
*/
struct inpcb *
in6_pcblookup(struct inpcbinfo *pcbinfo, struct in6_addr *faddr, u_int fport,
struct in6_addr *laddr, u_int lport, int lookupflags, struct ifnet *ifp)
{
+#if defined(PCBGROUP)
+ struct inpcbgroup *pcbgroup;
+#endif
+
KASSERT((lookupflags & ~INPLOOKUP_MASK) == 0,
("%s: invalid lookup flags %d", __func__, lookupflags));
KASSERT((lookupflags & (INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)) != 0,
("%s: LOCKPCB not set", __func__));
+#if defined(PCBGROUP)
+ if (in_pcbgroup_enabled(pcbinfo)) {
+ pcbgroup = in6_pcbgroup_bytuple(pcbinfo, laddr, lport, faddr,
+ fport);
+ return (in6_pcblookup_group(pcbinfo, pcbgroup, faddr, fport,
+ laddr, lport, lookupflags, ifp));
+ }
+#endif
return (in6_pcblookup_hash(pcbinfo, faddr, fport, laddr, lport,
lookupflags, ifp));
}
@@ -1002,11 +1152,28 @@ in6_pcblookup_mbuf(struct inpcbinfo *pcbinfo, struct in6_addr *faddr,
u_int fport, struct in6_addr *laddr, u_int lport, int lookupflags,
struct ifnet *ifp, struct mbuf *m)
{
+#ifdef PCBGROUP
+ struct inpcbgroup *pcbgroup;
+#endif
+
KASSERT((lookupflags & ~INPLOOKUP_MASK) == 0,
("%s: invalid lookup flags %d", __func__, lookupflags));
KASSERT((lookupflags & (INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)) != 0,
("%s: LOCKPCB not set", __func__));
+#ifdef PCBGROUP
+ if (in_pcbgroup_enabled(pcbinfo)) {
+ pcbgroup = in6_pcbgroup_byhash(pcbinfo, M_HASHTYPE_GET(m),
+ m->m_pkthdr.flowid);
+ if (pcbgroup != NULL)
+ return (in6_pcblookup_group(pcbinfo, pcbgroup, faddr,
+ fport, laddr, lport, lookupflags, ifp));
+ pcbgroup = in6_pcbgroup_bytuple(pcbinfo, laddr, lport, faddr,
+ fport);
+ return (in6_pcblookup_group(pcbinfo, pcbgroup, faddr, fport,
+ laddr, lport, lookupflags, ifp));
+ }
+#endif
return (in6_pcblookup_hash(pcbinfo, faddr, fport, laddr, lport,
lookupflags, ifp));
}
diff --git a/sys/netinet6/in6_pcb.h b/sys/netinet6/in6_pcb.h
index cf24704..8398d54 100644
--- a/sys/netinet6/in6_pcb.h
+++ b/sys/netinet6/in6_pcb.h
@@ -69,6 +69,16 @@
#define sin6tosa(sin6) ((struct sockaddr *)(sin6))
#define ifatoia6(ifa) ((struct in6_ifaddr *)(ifa))
+struct inpcbgroup *
+ in6_pcbgroup_byhash(struct inpcbinfo *, u_int, uint32_t);
+struct inpcbgroup *
+ in6_pcbgroup_byinpcb __P((struct inpcb *));
+struct inpcbgroup *
+ in6_pcbgroup_bymbuf(struct inpcbinfo *, struct mbuf *);
+struct inpcbgroup *
+ in6_pcbgroup_bytuple __P((struct inpcbinfo *, const struct in6_addr *,
+ u_short, const struct in6_addr *, u_short));
+
void in6_pcbpurgeif0 __P((struct inpcbinfo *, struct ifnet *));
void in6_losing __P((struct inpcb *));
int in6_pcbbind __P((struct inpcb *, struct sockaddr *, struct ucred *));
diff --git a/sys/netinet6/in6_pcbgroup.c b/sys/netinet6/in6_pcbgroup.c
new file mode 100644
index 0000000..850d7f4
--- /dev/null
+++ b/sys/netinet6/in6_pcbgroup.c
@@ -0,0 +1,103 @@
+/*-
+ * Copyright (c) 2010-2011 Juniper Networks, Inc.
+ * All rights reserved.
+ *
+ * This software was developed by Robert N. M. Watson under contract
+ * to Juniper Networks, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+
+__FBSDID("$FreeBSD$");
+
+#include "opt_inet6.h"
+
+#include <sys/param.h>
+#include <sys/mbuf.h>
+
+#include <netinet/in.h>
+#include <netinet/in_pcb.h>
+#ifdef INET6
+#include <netinet6/in6_pcb.h>
+#endif /* INET6 */
+
+/*
+ * Given a hash of whatever the covered tuple might be, return a pcbgroup
+ * index.
+ */
+static __inline u_int
+in6_pcbgroup_getbucket(struct inpcbinfo *pcbinfo, uint32_t hash)
+{
+
+ return (hash % pcbinfo->ipi_npcbgroups);
+}
+
+/*
+ * Map a (hashtype, hash) tuple into a connection group, or NULL if the hash
+ * information is insufficient to identify the pcbgroup.
+ */
+struct inpcbgroup *
+in6_pcbgroup_byhash(struct inpcbinfo *pcbinfo, u_int hashtype, uint32_t hash)
+{
+
+ return (NULL);
+}
+
+struct inpcbgroup *
+in6_pcbgroup_bymbuf(struct inpcbinfo *pcbinfo, struct mbuf *m)
+{
+
+ return (in6_pcbgroup_byhash(pcbinfo, M_HASHTYPE_GET(m),
+ m->m_pkthdr.flowid));
+}
+
+struct inpcbgroup *
+in6_pcbgroup_bytuple(struct inpcbinfo *pcbinfo, const struct in6_addr *laddrp,
+ u_short lport, const struct in6_addr *faddrp, u_short fport)
+{
+ uint32_t hash;
+
+ switch (pcbinfo->ipi_hashfields) {
+ case IPI_HASHFIELDS_4TUPLE:
+ hash = faddrp->s6_addr32[3] ^ fport;
+ break;
+
+ case IPI_HASHFIELDS_2TUPLE:
+ hash = faddrp->s6_addr32[3] ^ laddrp->s6_addr32[3];
+ break;
+
+ default:
+ hash = 0;
+ }
+ return (&pcbinfo->ipi_pcbgroups[in6_pcbgroup_getbucket(pcbinfo,
+ hash)]);
+}
+
+struct inpcbgroup *
+in6_pcbgroup_byinpcb(struct inpcb *inp)
+{
+
+ return (in6_pcbgroup_bytuple(inp->inp_pcbinfo, &inp->in6p_laddr,
+ inp->inp_lport, &inp->in6p_faddr, inp->inp_fport));
+}
diff --git a/sys/netinet6/ip6_ipsec.c b/sys/netinet6/ip6_ipsec.c
index 8731e12..bbbc9c9 100644
--- a/sys/netinet6/ip6_ipsec.c
+++ b/sys/netinet6/ip6_ipsec.c
@@ -97,7 +97,7 @@ SYSCTL_VNET_INT(_net_inet6_ipsec6, OID_AUTO,
/*
* Check if we have to jump over firewall processing for this packet.
- * Called from ip_input().
+ * Called from ip6_input().
* 1 = jump over firewall, 0 = packet goes through firewall.
*/
int
@@ -106,7 +106,7 @@ ip6_ipsec_filtertunnel(struct mbuf *m)
#if defined(IPSEC)
/*
- * Bypass packet filtering for packets from a tunnel.
+ * Bypass packet filtering for packets previously handled by IPsec.
*/
if (!V_ip6_ipsec6_filtertunnel &&
m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL) != NULL)
@@ -118,7 +118,7 @@ ip6_ipsec_filtertunnel(struct mbuf *m)
/*
* Check if this packet has an active SA and needs to be dropped instead
* of forwarded.
- * Called from ip_input().
+ * Called from ip6_input().
* 1 = drop packet, 0 = forward packet.
*/
int
@@ -141,7 +141,7 @@ ip6_ipsec_fwd(struct mbuf *m)
if (sp == NULL) { /* NB: can happen if error */
splx(s);
/*XXX error stat???*/
- DPRINTF(("ip_input: no SP for forwarding\n")); /*XXX*/
+ DPRINTF(("%s: no SP for forwarding\n", __func__)); /*XXX*/
return 1;
}
@@ -163,7 +163,7 @@ ip6_ipsec_fwd(struct mbuf *m)
* Check if protocol type doesn't have a further header and do IPSEC
* decryption or reject right now. Protocols with further headers get
* their IPSEC treatment within the protocol specific processing.
- * Called from ip_input().
+ * Called from ip6_input().
* 1 = drop packet, 0 = continue processing packet.
*/
int
@@ -206,7 +206,7 @@ ip6_ipsec_input(struct mbuf *m, int nxt)
} else {
/* XXX error stat??? */
error = EINVAL;
- DPRINTF(("ip_input: no SP, packet discarded\n"));/*XXX*/
+ DPRINTF(("%s: no SP, packet discarded\n", __func__));/*XXX*/
return 1;
}
splx(s);
diff --git a/sys/ofed/include/linux/list.h b/sys/ofed/include/linux/list.h
index f6f9404..61b42d2 100644
--- a/sys/ofed/include/linux/list.h
+++ b/sys/ofed/include/linux/list.h
@@ -38,6 +38,7 @@
#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/queue.h>
+#include <sys/cpuset.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/proc.h>
diff --git a/sys/pc98/pc98/machdep.c b/sys/pc98/pc98/machdep.c
index 853ac69..8bcb618 100644
--- a/sys/pc98/pc98/machdep.c
+++ b/sys/pc98/pc98/machdep.c
@@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$");
#include "opt_isa.h"
#include "opt_kstack_pages.h"
#include "opt_maxmem.h"
+#include "opt_mp_watchdog.h"
#include "opt_npx.h"
#include "opt_perfmon.h"
@@ -115,6 +116,7 @@ __FBSDID("$FreeBSD$");
#include <machine/intr_machdep.h>
#include <x86/mca.h>
#include <machine/md_var.h>
+#include <machine/mp_watchdog.h>
#include <machine/pc/bios.h>
#include <machine/pcb.h>
#include <machine/pcb_ext.h>
@@ -1193,9 +1195,8 @@ cpu_idle(int busy)
CTR2(KTR_SPARE2, "cpu_idle(%d) at %d",
busy, curcpu);
-#ifdef SMP
- if (mp_grab_cpu_hlt())
- return;
+#ifdef MP_WATCHDOG
+ ap_watchdog(PCPU_GET(cpuid));
#endif
/* If we are busy - try to use fast methods. */
if (busy) {
diff --git a/sys/powerpc/aim/mmu_oea.c b/sys/powerpc/aim/mmu_oea.c
index 51c6f8a..be80455 100644
--- a/sys/powerpc/aim/mmu_oea.c
+++ b/sys/powerpc/aim/mmu_oea.c
@@ -118,11 +118,14 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/kernel.h>
+#include <sys/queue.h>
+#include <sys/cpuset.h>
#include <sys/ktr.h>
#include <sys/lock.h>
#include <sys/msgbuf.h>
#include <sys/mutex.h>
#include <sys/proc.h>
+#include <sys/sched.h>
#include <sys/sysctl.h>
#include <sys/systm.h>
#include <sys/vmmeter.h>
@@ -820,7 +823,7 @@ moea_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend)
PMAP_LOCK_INIT(kernel_pmap);
for (i = 0; i < 16; i++)
kernel_pmap->pm_sr[i] = EMPTY_SEGMENT + i;
- kernel_pmap->pm_active = ~0;
+ CPU_FILL(&kernel_pmap->pm_active);
/*
* Set up the Open Firmware mappings
@@ -942,7 +945,9 @@ moea_activate(mmu_t mmu, struct thread *td)
pm = &td->td_proc->p_vmspace->vm_pmap;
pmr = pm->pmap_phys;
- pm->pm_active |= PCPU_GET(cpumask);
+ sched_pin();
+ CPU_OR(&pm->pm_active, PCPU_PTR(cpumask));
+ sched_unpin();
PCPU_SET(curpmap, pmr);
}
@@ -952,7 +957,9 @@ moea_deactivate(mmu_t mmu, struct thread *td)
pmap_t pm;
pm = &td->td_proc->p_vmspace->vm_pmap;
- pm->pm_active &= ~PCPU_GET(cpumask);
+ sched_pin();
+ CPU_NAND(&pm->pm_active, PCPU_PTR(cpumask));
+ sched_unpin();
PCPU_SET(curpmap, NULL);
}
diff --git a/sys/powerpc/aim/mmu_oea64.c b/sys/powerpc/aim/mmu_oea64.c
index 12a1201..291d89b 100644
--- a/sys/powerpc/aim/mmu_oea64.c
+++ b/sys/powerpc/aim/mmu_oea64.c
@@ -118,11 +118,14 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/kernel.h>
+#include <sys/queue.h>
+#include <sys/cpuset.h>
#include <sys/ktr.h>
#include <sys/lock.h>
#include <sys/msgbuf.h>
#include <sys/mutex.h>
#include <sys/proc.h>
+#include <sys/sched.h>
#include <sys/sysctl.h>
#include <sys/systm.h>
#include <sys/vmmeter.h>
@@ -827,7 +830,7 @@ moea64_mid_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend)
#endif
kernel_pmap->pmap_phys = kernel_pmap;
- kernel_pmap->pm_active = ~0;
+ CPU_FILL(&kernel_pmap->pm_active);
PMAP_LOCK_INIT(kernel_pmap);
@@ -995,7 +998,9 @@ moea64_activate(mmu_t mmu, struct thread *td)
pmap_t pm;
pm = &td->td_proc->p_vmspace->vm_pmap;
- pm->pm_active |= PCPU_GET(cpumask);
+ sched_pin();
+ CPU_OR(&pm->pm_active, PCPU_PTR(cpumask));
+ sched_unpin();
#ifdef __powerpc64__
PCPU_SET(userslb, pm->pm_slb);
@@ -1010,7 +1015,9 @@ moea64_deactivate(mmu_t mmu, struct thread *td)
pmap_t pm;
pm = &td->td_proc->p_vmspace->vm_pmap;
- pm->pm_active &= ~(PCPU_GET(cpumask));
+ sched_pin();
+ CPU_NAND(&pm->pm_active, PCPU_PTR(cpumask));
+ sched_unpin();
#ifdef __powerpc64__
PCPU_SET(userslb, NULL);
#else
diff --git a/sys/powerpc/booke/platform_bare.c b/sys/powerpc/booke/platform_bare.c
index 90c73e0..d76664e 100644
--- a/sys/powerpc/booke/platform_bare.c
+++ b/sys/powerpc/booke/platform_bare.c
@@ -256,7 +256,7 @@ bare_smp_start_cpu(platform_t plat, struct pcpu *pc)
int timeout;
eebpcr = ccsr_read4(OCP85XX_EEBPCR);
- if ((eebpcr & (pc->pc_cpumask << 24)) != 0) {
+ if ((eebpcr & (1 << (pc->pc_cpuid + 24))) != 0) {
printf("%s: CPU=%d already out of hold-off state!\n",
__func__, pc->pc_cpuid);
return (ENXIO);
@@ -274,7 +274,7 @@ bare_smp_start_cpu(platform_t plat, struct pcpu *pc)
/*
* Release AP from hold-off state
*/
- eebpcr |= (pc->pc_cpumask << 24);
+ eebpcr |= (1 << (pc->pc_cpuid + 24));
ccsr_write4(OCP85XX_EEBPCR, eebpcr);
__asm __volatile("isync; msync");
diff --git a/sys/powerpc/booke/pmap.c b/sys/powerpc/booke/pmap.c
index cabe58f..e1cd071 100644
--- a/sys/powerpc/booke/pmap.c
+++ b/sys/powerpc/booke/pmap.c
@@ -63,6 +63,7 @@ __FBSDID("$FreeBSD$");
#include <sys/msgbuf.h>
#include <sys/lock.h>
#include <sys/mutex.h>
+#include <sys/sched.h>
#include <sys/smp.h>
#include <sys/vmmeter.h>
@@ -1225,7 +1226,7 @@ mmu_booke_bootstrap(mmu_t mmu, vm_offset_t start, vm_offset_t kernelend)
PTE_VALID;
}
/* Mark kernel_pmap active on all CPUs */
- kernel_pmap->pm_active = ~0;
+ CPU_FILL(&kernel_pmap->pm_active);
/*******************************************************/
/* Final setup */
@@ -1480,7 +1481,7 @@ mmu_booke_pinit(mmu_t mmu, pmap_t pmap)
PMAP_LOCK_INIT(pmap);
for (i = 0; i < MAXCPU; i++)
pmap->pm_tid[i] = TID_NONE;
- pmap->pm_active = 0;
+ CPU_ZERO(&kernel_pmap->pm_active);
bzero(&pmap->pm_stats, sizeof(pmap->pm_stats));
bzero(&pmap->pm_pdir, sizeof(pte_t *) * PDIR_NENTRIES);
TAILQ_INIT(&pmap->pm_ptbl_list);
@@ -1835,7 +1836,7 @@ mmu_booke_activate(mmu_t mmu, struct thread *td)
mtx_lock_spin(&sched_lock);
- atomic_set_int(&pmap->pm_active, PCPU_GET(cpumask));
+ CPU_OR_ATOMIC(&pmap->pm_active, PCPU_PTR(cpumask));
PCPU_SET(curpmap, pmap);
if (pmap->pm_tid[PCPU_GET(cpuid)] == TID_NONE)
@@ -1864,7 +1865,9 @@ mmu_booke_deactivate(mmu_t mmu, struct thread *td)
CTR5(KTR_PMAP, "%s: td=%p, proc = '%s', id = %d, pmap = 0x%08x",
__func__, td, td->td_proc->p_comm, td->td_proc->p_pid, pmap);
- atomic_clear_int(&pmap->pm_active, PCPU_GET(cpumask));
+ sched_pin();
+ CPU_NAND_ATOMIC(&pmap->pm_active, PCPU_PTR(cpumask));
+ sched_unpin();
PCPU_SET(curpmap, NULL);
}
diff --git a/sys/powerpc/include/_types.h b/sys/powerpc/include/_types.h
index fae2416..b0b582e 100644
--- a/sys/powerpc/include/_types.h
+++ b/sys/powerpc/include/_types.h
@@ -72,7 +72,6 @@ typedef unsigned long long __uint64_t;
* Standard type definitions.
*/
typedef __uint32_t __clock_t; /* clock()... */
-typedef unsigned int __cpumask_t;
typedef double __double_t;
typedef double __float_t;
#ifdef __LP64__
diff --git a/sys/powerpc/include/openpicvar.h b/sys/powerpc/include/openpicvar.h
index 4fb9aa7..605dc0f 100644
--- a/sys/powerpc/include/openpicvar.h
+++ b/sys/powerpc/include/openpicvar.h
@@ -57,7 +57,7 @@ int openpic_common_attach(device_t, uint32_t);
/*
* PIC interface.
*/
-void openpic_bind(device_t dev, u_int irq, cpumask_t cpumask);
+void openpic_bind(device_t dev, u_int irq, cpuset_t cpumask);
void openpic_config(device_t, u_int, enum intr_trigger, enum intr_polarity);
void openpic_dispatch(device_t, struct trapframe *);
void openpic_enable(device_t, u_int, u_int);
diff --git a/sys/powerpc/include/pmap.h b/sys/powerpc/include/pmap.h
index 369ca9d..9166d04 100644
--- a/sys/powerpc/include/pmap.h
+++ b/sys/powerpc/include/pmap.h
@@ -66,6 +66,7 @@
#include <sys/queue.h>
#include <sys/tree.h>
+#include <sys/_cpuset.h>
#include <sys/_lock.h>
#include <sys/_mutex.h>
#include <machine/sr.h>
@@ -98,7 +99,7 @@ struct pmap {
#else
register_t pm_sr[16];
#endif
- cpumask_t pm_active;
+ cpuset_t pm_active;
struct pmap *pmap_phys;
struct pmap_statistics pm_stats;
@@ -175,7 +176,7 @@ void slb_free_user_cache(struct slb **);
struct pmap {
struct mtx pm_mtx; /* pmap mutex */
tlbtid_t pm_tid[MAXCPU]; /* TID to identify this pmap entries in TLB */
- cpumask_t pm_active; /* active on cpus */
+ cpuset_t pm_active; /* active on cpus */
struct pmap_statistics pm_stats; /* pmap statistics */
/* Page table directory, array of pointers to page tables. */
diff --git a/sys/powerpc/include/smp.h b/sys/powerpc/include/smp.h
index cf95278..32fcfb4 100644
--- a/sys/powerpc/include/smp.h
+++ b/sys/powerpc/include/smp.h
@@ -40,9 +40,11 @@
#ifndef LOCORE
+#include <sys/_cpuset.h>
+
void ipi_all_but_self(int ipi);
void ipi_cpu(int cpu, u_int ipi);
-void ipi_selected(cpumask_t cpus, int ipi);
+void ipi_selected(cpuset_t cpus, int ipi);
struct cpuref {
uintptr_t cr_hwref;
diff --git a/sys/powerpc/mpc85xx/openpic_fdt.c b/sys/powerpc/mpc85xx/openpic_fdt.c
index 7cf18ea..1cd9369 100644
--- a/sys/powerpc/mpc85xx/openpic_fdt.c
+++ b/sys/powerpc/mpc85xx/openpic_fdt.c
@@ -37,11 +37,12 @@ __FBSDID("$FreeBSD$");
#include <machine/bus.h>
#include <machine/intr_machdep.h>
-#include <machine/openpicvar.h>
#include <dev/ofw/ofw_bus.h>
#include <dev/ofw/ofw_bus_subr.h>
+#include <machine/openpicvar.h>
+
#include "pic_if.h"
static int openpic_fdt_probe(device_t);
diff --git a/sys/powerpc/powerpc/intr_machdep.c b/sys/powerpc/powerpc/intr_machdep.c
index f2bfa33..1e6342c 100644
--- a/sys/powerpc/powerpc/intr_machdep.c
+++ b/sys/powerpc/powerpc/intr_machdep.c
@@ -67,6 +67,7 @@
#include <sys/kernel.h>
#include <sys/queue.h>
#include <sys/bus.h>
+#include <sys/cpuset.h>
#include <sys/interrupt.h>
#include <sys/ktr.h>
#include <sys/lock.h>
@@ -98,7 +99,7 @@ struct powerpc_intr {
u_int intline;
u_int vector;
u_int cntindex;
- cpumask_t cpu;
+ cpuset_t cpu;
enum intr_trigger trig;
enum intr_polarity pol;
};
@@ -205,7 +206,7 @@ intr_lookup(u_int irq)
#ifdef SMP
i->cpu = all_cpus;
#else
- i->cpu = 1;
+ CPU_SETOF(0, &i->cpu);
#endif
for (vector = 0; vector < INTR_VECTORS && vector <= nvectors;
@@ -296,7 +297,7 @@ powerpc_assign_intr_cpu(void *arg, u_char cpu)
if (cpu == NOCPU)
i->cpu = all_cpus;
else
- i->cpu = 1 << cpu;
+ CPU_SETOF(cpu, &i->cpu);
if (!cold && i->pic != NULL && i->pic == root_pic)
PIC_BIND(i->pic, i->intline, i->cpu);
diff --git a/sys/powerpc/powerpc/mp_machdep.c b/sys/powerpc/powerpc/mp_machdep.c
index 577d4dc..62a97e9 100644
--- a/sys/powerpc/powerpc/mp_machdep.c
+++ b/sys/powerpc/powerpc/mp_machdep.c
@@ -32,6 +32,7 @@ __FBSDID("$FreeBSD$");
#include <sys/kernel.h>
#include <sys/ktr.h>
#include <sys/bus.h>
+#include <sys/cpuset.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/pcpu.h>
@@ -157,7 +158,7 @@ cpu_mp_start(void)
cpu.cr_cpuid);
goto next;
}
- if (all_cpus & (1 << cpu.cr_cpuid)) {
+ if (CPU_ISSET(cpu.cr_cpuid, &all_cpus)) {
printf("SMP: cpu%d: skipped - duplicate ID\n",
cpu.cr_cpuid);
goto next;
@@ -174,9 +175,9 @@ cpu_mp_start(void)
pc->pc_cpuid = bsp.cr_cpuid;
pc->pc_bsp = 1;
}
- pc->pc_cpumask = 1 << pc->pc_cpuid;
+ CPU_SETOF(pc->pc_cpuid, &pc->pc_cpumask);
pc->pc_hwref = cpu.cr_hwref;
- all_cpus |= pc->pc_cpumask;
+ CPU_OR(&all_cpus, &pc->pc_cpumask);
next:
error = platform_smp_next_cpu(&cpu);
}
@@ -214,7 +215,8 @@ cpu_mp_unleash(void *dummy)
smp_cpus = 0;
STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) {
cpus++;
- pc->pc_other_cpus = all_cpus & ~pc->pc_cpumask;
+ pc->pc_other_cpus = all_cpus;
+ CPU_NAND(&pc->pc_other_cpus, &pc->pc_cpumask);
if (!pc->pc_bsp) {
if (bootverbose)
printf("Waking up CPU %d (dev=%x)\n",
@@ -236,7 +238,7 @@ cpu_mp_unleash(void *dummy)
pc->pc_cpuid, pc->pc_pir, pc->pc_awake);
smp_cpus++;
} else
- stopped_cpus |= (1 << pc->pc_cpuid);
+ CPU_SET(pc->pc_cpuid, &stopped_cpus);
}
ap_awake = 1;
@@ -276,7 +278,7 @@ SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, cpu_mp_unleash, NULL);
int
powerpc_ipi_handler(void *arg)
{
- cpumask_t self;
+ cpuset_t self;
uint32_t ipimask;
int msg;
@@ -311,11 +313,11 @@ powerpc_ipi_handler(void *arg)
savectx(&stoppcbs[PCPU_GET(cpuid)]);
self = PCPU_GET(cpumask);
savectx(PCPU_GET(curpcb));
- atomic_set_int(&stopped_cpus, self);
- while ((started_cpus & self) == 0)
+ CPU_OR_ATOMIC(&stopped_cpus, &self);
+ while (!CPU_OVERLAP(&started_cpus, &self))
cpu_spinwait();
- atomic_clear_int(&started_cpus, self);
- atomic_clear_int(&stopped_cpus, self);
+ CPU_NAND_ATOMIC(&started_cpus, &self);
+ CPU_NAND_ATOMIC(&stopped_cpus, &self);
CTR1(KTR_SMP, "%s: IPI_STOP (restart)", __func__);
break;
case IPI_HARDCLOCK:
@@ -343,12 +345,12 @@ ipi_send(struct pcpu *pc, int ipi)
/* Send an IPI to a set of cpus. */
void
-ipi_selected(cpumask_t cpus, int ipi)
+ipi_selected(cpuset_t cpus, int ipi)
{
struct pcpu *pc;
STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) {
- if (cpus & pc->pc_cpumask)
+ if (CPU_OVERLAP(&cpus, &pc->pc_cpumask))
ipi_send(pc, ipi);
}
}
diff --git a/sys/powerpc/powerpc/openpic.c b/sys/powerpc/powerpc/openpic.c
index 042f8b8..347dc3f 100644
--- a/sys/powerpc/powerpc/openpic.c
+++ b/sys/powerpc/powerpc/openpic.c
@@ -231,7 +231,7 @@ openpic_common_attach(device_t dev, uint32_t node)
*/
void
-openpic_bind(device_t dev, u_int irq, cpumask_t cpumask)
+openpic_bind(device_t dev, u_int irq, cpuset_t cpumask)
{
struct openpic_softc *sc;
@@ -240,7 +240,12 @@ openpic_bind(device_t dev, u_int irq, cpumask_t cpumask)
return;
sc = device_get_softc(dev);
- openpic_write(sc, OPENPIC_IDEST(irq), cpumask);
+
+ /*
+ * XXX: openpic_write() is very special and just needs a 32 bits mask.
+ * For the moment, just play dirty and get the first half word.
+ */
+ openpic_write(sc, OPENPIC_IDEST(irq), cpumask.__bits[0] & 0xffffffff);
}
void
diff --git a/sys/powerpc/powerpc/pic_if.m b/sys/powerpc/powerpc/pic_if.m
index 185cc08..e429d31 100644
--- a/sys/powerpc/powerpc/pic_if.m
+++ b/sys/powerpc/powerpc/pic_if.m
@@ -28,6 +28,7 @@
#
#include <sys/bus.h>
+#include <sys/cpuset.h>
#include <machine/frame.h>
INTERFACE pic;
@@ -35,7 +36,7 @@ INTERFACE pic;
METHOD void bind {
device_t dev;
u_int irq;
- cpumask_t cpumask;
+ cpuset_t cpumask;
};
METHOD void config {
diff --git a/sys/sparc64/include/_types.h b/sys/sparc64/include/_types.h
index f810c15..7e993c4 100644
--- a/sys/sparc64/include/_types.h
+++ b/sys/sparc64/include/_types.h
@@ -55,7 +55,6 @@ typedef unsigned long __uint64_t;
* Standard type definitions.
*/
typedef __int32_t __clock_t; /* clock()... */
-typedef unsigned int __cpumask_t;
typedef __int64_t __critical_t;
typedef double __double_t;
typedef float __float_t;
diff --git a/sys/sparc64/include/ktr.h b/sys/sparc64/include/ktr.h
index 5948ba2..f13865f 100644
--- a/sys/sparc64/include/ktr.h
+++ b/sys/sparc64/include/ktr.h
@@ -40,16 +40,6 @@
#else
-#define AND(var, mask, r1, r2) \
- SET(var, r2, r1) ; \
- lduw [r1], r2 ; \
- and r2, mask, r1
-
-#define TEST(var, mask, r1, r2, l1) \
- AND(var, mask, r1, r2) ; \
- brz r1, l1 ## f ; \
- nop
-
/*
* XXX could really use another register...
*/
@@ -79,13 +69,37 @@ l2: add r2, 1, r3 ; \
SET(l1 ## b, r3, r2) ; \
stx r2, [r1 + KTR_DESC]
+/*
+ * NB: this clobbers %y.
+ */
#define CATR(mask, desc, r1, r2, r3, l1, l2, l3) \
set mask, r1 ; \
- TEST(ktr_mask, r1, r2, r2, l3) ; \
- lduw [PCPU(MID)], r1 ; \
+ SET(ktr_mask, r3, r2) ; \
+ lduw [r2], r2 ; \
+ and r2, r1, r1 ; \
+ brz r1, l3 ## f ; \
+ nop ; \
+ lduw [PCPU(CPUID)], r2 ; \
+ mov _NCPUBITS, r3 ; \
+ mov %g0, %y ; \
+ udiv r2, r3, r2 ; \
+ srl r2, 0, r2 ; \
+ sllx r2, PTR_SHIFT, r2 ; \
+ SET(ktr_cpumask, r3, r1) ; \
+ ldx [r1 + r2], r1 ; \
+ lduw [PCPU(CPUID)], r2 ; \
+ mov _NCPUBITS, r3 ; \
+ mov %g0, %y ; \
+ udiv r2, r3, r2 ; \
+ srl r2, 0, r2 ; \
+ smul r2, r3, r3 ; \
+ lduw [PCPU(CPUID)], r2 ; \
+ sub r2, r3, r3 ; \
mov 1, r2 ; \
- sllx r2, r1, r1 ; \
- TEST(ktr_cpumask, r1, r2, r3, l3) ; \
+ sllx r2, r3, r2 ; \
+ andn r1, r2, r1 ; \
+ brz r1, l3 ## f ; \
+ nop ; \
ATR(desc, r1, r2, r3, l1, l2)
#endif /* LOCORE */
diff --git a/sys/sparc64/include/pmap.h b/sys/sparc64/include/pmap.h
index e16ea97..adad257 100644
--- a/sys/sparc64/include/pmap.h
+++ b/sys/sparc64/include/pmap.h
@@ -40,6 +40,7 @@
#define _MACHINE_PMAP_H_
#include <sys/queue.h>
+#include <sys/_cpuset.h>
#include <sys/_lock.h>
#include <sys/_mutex.h>
#include <machine/cache.h>
@@ -61,7 +62,7 @@ struct pmap {
struct mtx pm_mtx;
struct tte *pm_tsb;
vm_object_t pm_tsb_obj;
- cpumask_t pm_active;
+ cpuset_t pm_active;
u_int pm_context[MAXCPU];
struct pmap_statistics pm_stats;
};
diff --git a/sys/sparc64/include/smp.h b/sys/sparc64/include/smp.h
index 3ca8e03..1ba0d9e 100644
--- a/sys/sparc64/include/smp.h
+++ b/sys/sparc64/include/smp.h
@@ -38,6 +38,7 @@
#ifndef LOCORE
+#include <sys/cpuset.h>
#include <sys/proc.h>
#include <sys/sched.h>
@@ -76,17 +77,17 @@ struct cpu_start_args {
};
struct ipi_cache_args {
- cpumask_t ica_mask;
+ cpuset_t ica_mask;
vm_paddr_t ica_pa;
};
struct ipi_rd_args {
- cpumask_t ira_mask;
+ cpuset_t ira_mask;
register_t *ira_val;
};
struct ipi_tlb_args {
- cpumask_t ita_mask;
+ cpuset_t ita_mask;
struct pmap *ita_pmap;
u_long ita_start;
u_long ita_end;
@@ -100,7 +101,7 @@ extern struct pcb stoppcbs[];
void cpu_mp_bootstrap(struct pcpu *pc);
void cpu_mp_shutdown(void);
-typedef void cpu_ipi_selected_t(u_int, u_long, u_long, u_long);
+typedef void cpu_ipi_selected_t(cpuset_t, u_long, u_long, u_long);
extern cpu_ipi_selected_t *cpu_ipi_selected;
typedef void cpu_ipi_single_t(u_int, u_long, u_long, u_long);
extern cpu_ipi_single_t *cpu_ipi_single;
@@ -140,7 +141,7 @@ ipi_all_but_self(u_int ipi)
}
static __inline void
-ipi_selected(u_int cpus, u_int ipi)
+ipi_selected(cpuset_t cpus, u_int ipi)
{
cpu_ipi_selected(cpus, 0, (u_long)tl_ipi_level, ipi);
@@ -197,7 +198,8 @@ ipi_rd(u_int cpu, void *func, u_long *val)
sched_pin();
ira = &ipi_rd_args;
mtx_lock_spin(&ipi_mtx);
- ira->ira_mask = 1 << cpu | PCPU_GET(cpumask);
+ ira->ira_mask = PCPU_GET(cpumask);
+ CPU_SET(cpu, &ira->ira_mask);
ira->ira_val = val;
cpu_ipi_single(cpu, 0, (u_long)func, (u_long)ira);
return (&ira->ira_mask);
@@ -207,18 +209,21 @@ static __inline void *
ipi_tlb_context_demap(struct pmap *pm)
{
struct ipi_tlb_args *ita;
- cpumask_t cpus;
+ cpuset_t cpus;
if (smp_cpus == 1)
return (NULL);
sched_pin();
- if ((cpus = (pm->pm_active & PCPU_GET(other_cpus))) == 0) {
+ cpus = pm->pm_active;
+ CPU_AND(&cpus, PCPU_PTR(other_cpus));
+ if (CPU_EMPTY(&cpus)) {
sched_unpin();
return (NULL);
}
ita = &ipi_tlb_args;
mtx_lock_spin(&ipi_mtx);
- ita->ita_mask = cpus | PCPU_GET(cpumask);
+ ita->ita_mask = cpus;
+ CPU_OR(&ita->ita_mask, PCPU_PTR(cpumask));
ita->ita_pmap = pm;
cpu_ipi_selected(cpus, 0, (u_long)tl_ipi_tlb_context_demap,
(u_long)ita);
@@ -229,18 +234,21 @@ static __inline void *
ipi_tlb_page_demap(struct pmap *pm, vm_offset_t va)
{
struct ipi_tlb_args *ita;
- cpumask_t cpus;
+ cpuset_t cpus;
if (smp_cpus == 1)
return (NULL);
sched_pin();
- if ((cpus = (pm->pm_active & PCPU_GET(other_cpus))) == 0) {
+ cpus = pm->pm_active;
+ CPU_AND(&cpus, PCPU_PTR(other_cpus));
+ if (CPU_EMPTY(&cpus)) {
sched_unpin();
return (NULL);
}
ita = &ipi_tlb_args;
mtx_lock_spin(&ipi_mtx);
- ita->ita_mask = cpus | PCPU_GET(cpumask);
+ ita->ita_mask = cpus;
+ CPU_OR(&ita->ita_mask, PCPU_PTR(cpumask));
ita->ita_pmap = pm;
ita->ita_va = va;
cpu_ipi_selected(cpus, 0, (u_long)tl_ipi_tlb_page_demap, (u_long)ita);
@@ -251,18 +259,21 @@ static __inline void *
ipi_tlb_range_demap(struct pmap *pm, vm_offset_t start, vm_offset_t end)
{
struct ipi_tlb_args *ita;
- cpumask_t cpus;
+ cpuset_t cpus;
if (smp_cpus == 1)
return (NULL);
sched_pin();
- if ((cpus = (pm->pm_active & PCPU_GET(other_cpus))) == 0) {
+ cpus = pm->pm_active;
+ CPU_AND(&cpus, PCPU_PTR(other_cpus));
+ if (CPU_EMPTY(&cpus)) {
sched_unpin();
return (NULL);
}
ita = &ipi_tlb_args;
mtx_lock_spin(&ipi_mtx);
- ita->ita_mask = cpus | PCPU_GET(cpumask);
+ ita->ita_mask = cpus;
+ CPU_OR(&ita->ita_mask, PCPU_PTR(cpumask));
ita->ita_pmap = pm;
ita->ita_start = start;
ita->ita_end = end;
@@ -274,11 +285,11 @@ ipi_tlb_range_demap(struct pmap *pm, vm_offset_t start, vm_offset_t end)
static __inline void
ipi_wait(void *cookie)
{
- volatile cpumask_t *mask;
+ volatile cpuset_t *mask;
if ((mask = cookie) != NULL) {
- atomic_clear_int(mask, PCPU_GET(cpumask));
- while (*mask != 0)
+ CPU_NAND_ATOMIC(mask, PCPU_PTR(cpumask));
+ while (!CPU_EMPTY(mask))
;
mtx_unlock_spin(&ipi_mtx);
sched_unpin();
diff --git a/sys/sparc64/sparc64/exception.S b/sys/sparc64/sparc64/exception.S
index ed0e381..0b8a0fa 100644
--- a/sys/sparc64/sparc64/exception.S
+++ b/sys/sparc64/sparc64/exception.S
@@ -1280,6 +1280,7 @@ ENTRY(tl1_data_excptn_trap)
END(tl1_data_excptn_trap)
.macro tl1_align
+ wrpr %g0, PSTATE_ALT, %pstate
ba,a %xcc, tl1_align_trap
nop
.align 32
@@ -1289,7 +1290,7 @@ ENTRY(tl1_align_trap)
RESUME_SPILLFILL_ALIGN
ba %xcc, tl1_sfsr_trap
mov T_MEM_ADDRESS_NOT_ALIGNED | T_KERNEL, %g2
-END(tl1_data_excptn_trap)
+END(tl1_align_trap)
ENTRY(tl1_sfsr_trap)
wr %g0, ASI_DMMU, %asi
@@ -2615,9 +2616,9 @@ ENTRY(tl0_ret)
andn %l4, TSTATE_CWP_MASK, %g2
/*
- * Restore %y. Could also be below if we had more alternate globals.
+ * Save %y in an alternate global.
*/
- wr %l5, 0, %y
+ mov %l5, %g4
/*
* Setup %wstate for return. We need to restore the user window state
@@ -2662,8 +2663,8 @@ tl0_ret_fill:
* Fixup %tstate so the saved %cwp points to the current window and
* restore it.
*/
- rdpr %cwp, %g4
- wrpr %g2, %g4, %tstate
+ rdpr %cwp, %g1
+ wrpr %g2, %g1, %tstate
/*
* Restore the user window state. The transition bit was set above
@@ -2673,20 +2674,25 @@ tl0_ret_fill:
#if KTR_COMPILE & KTR_TRAP
CATR(KTR_TRAP, "tl0_ret: td=%#lx pil=%#lx pc=%#lx npc=%#lx sp=%#lx"
- , %g2, %g3, %g4, 7, 8, 9)
- ldx [PCPU(CURTHREAD)], %g3
- stx %g3, [%g2 + KTR_PARM1]
- rdpr %pil, %g3
- stx %g3, [%g2 + KTR_PARM2]
- rdpr %tpc, %g3
- stx %g3, [%g2 + KTR_PARM3]
- rdpr %tnpc, %g3
- stx %g3, [%g2 + KTR_PARM4]
- stx %sp, [%g2 + KTR_PARM5]
+ , %g1, %g2, %g3, 7, 8, 9)
+ ldx [PCPU(CURTHREAD)], %g2
+ stx %g2, [%g1 + KTR_PARM1]
+ rdpr %pil, %g2
+ stx %g2, [%g1 + KTR_PARM2]
+ rdpr %tpc, %g2
+ stx %g2, [%g1 + KTR_PARM3]
+ rdpr %tnpc, %g2
+ stx %g2, [%g1 + KTR_PARM4]
+ stx %sp, [%g1 + KTR_PARM5]
9:
#endif
/*
+ * Restore %y. Note that the CATR above clobbered it.
+ */
+ wr %g4, 0, %y
+
+ /*
* Return to usermode.
*/
retry
@@ -2697,9 +2703,14 @@ tl0_ret_fill_end:
, %l0, %l1, %l2, 7, 8, 9)
rdpr %pstate, %l1
stx %l1, [%l0 + KTR_PARM1]
- stx %l5, [%l0 + KTR_PARM2]
+ stx %l6, [%l0 + KTR_PARM2]
stx %sp, [%l0 + KTR_PARM3]
9:
+
+ /*
+ * Restore %y clobbered by the CATR. This was saved in %l5 above.
+ */
+ wr %l5, 0, %y
#endif
/*
@@ -2867,34 +2878,36 @@ ENTRY(tl1_ret)
andn %l0, TSTATE_CWP_MASK, %g1
mov %l1, %g2
mov %l2, %g3
+ mov %l4, %g4
wrpr %l3, 0, %pil
- wr %l4, 0, %y
restore
wrpr %g0, 2, %tl
- rdpr %cwp, %g4
- wrpr %g1, %g4, %tstate
wrpr %g2, 0, %tpc
wrpr %g3, 0, %tnpc
+ rdpr %cwp, %g2
+ wrpr %g1, %g2, %tstate
#if KTR_COMPILE & KTR_TRAP
CATR(KTR_TRAP, "tl1_ret: td=%#lx pil=%#lx ts=%#lx pc=%#lx sp=%#lx"
- , %g2, %g3, %g4, 7, 8, 9)
- ldx [PCPU(CURTHREAD)], %g3
- stx %g3, [%g2 + KTR_PARM1]
- rdpr %pil, %g3
- stx %g3, [%g2 + KTR_PARM2]
- rdpr %tstate, %g3
- stx %g3, [%g2 + KTR_PARM3]
- rdpr %tpc, %g3
- stx %g3, [%g2 + KTR_PARM4]
- stx %sp, [%g2 + KTR_PARM5]
+ , %g1, %g2, %g3, 7, 8, 9)
+ ldx [PCPU(CURTHREAD)], %g2
+ stx %g2, [%g1 + KTR_PARM1]
+ rdpr %pil, %g2
+ stx %g2, [%g1 + KTR_PARM2]
+ rdpr %tstate, %g2
+ stx %g2, [%g1 + KTR_PARM3]
+ rdpr %tpc, %g2
+ stx %g2, [%g1 + KTR_PARM4]
+ stx %sp, [%g1 + KTR_PARM5]
9:
#endif
+ wr %g4, 0, %y
+
retry
END(tl1_ret)
@@ -2995,33 +3008,35 @@ ENTRY(tl1_intr)
andn %l0, TSTATE_CWP_MASK, %g1
mov %l1, %g2
mov %l2, %g3
+ mov %l4, %g4
wrpr %l3, 0, %pil
- wr %l4, 0, %y
restore
wrpr %g0, 2, %tl
- rdpr %cwp, %g4
- wrpr %g1, %g4, %tstate
wrpr %g2, 0, %tpc
wrpr %g3, 0, %tnpc
+ rdpr %cwp, %g2
+ wrpr %g1, %g2, %tstate
#if KTR_COMPILE & KTR_INTR
CATR(KTR_INTR, "tl1_intr: td=%#x pil=%#lx ts=%#lx pc=%#lx sp=%#lx"
- , %g2, %g3, %g4, 7, 8, 9)
- ldx [PCPU(CURTHREAD)], %g3
- stx %g3, [%g2 + KTR_PARM1]
- rdpr %pil, %g3
- stx %g3, [%g2 + KTR_PARM2]
- rdpr %tstate, %g3
- stx %g3, [%g2 + KTR_PARM3]
- rdpr %tpc, %g3
- stx %g3, [%g2 + KTR_PARM4]
- stx %sp, [%g2 + KTR_PARM5]
+ , %g1, %g2, %g3, 7, 8, 9)
+ ldx [PCPU(CURTHREAD)], %g2
+ stx %g2, [%g1 + KTR_PARM1]
+ rdpr %pil, %g2
+ stx %g2, [%g1 + KTR_PARM2]
+ rdpr %tstate, %g2
+ stx %g2, [%g1 + KTR_PARM3]
+ rdpr %tpc, %g2
+ stx %g2, [%g1 + KTR_PARM4]
+ stx %sp, [%g1 + KTR_PARM5]
9:
#endif
+ wr %g4, 0, %y
+
retry
END(tl1_intr)
diff --git a/sys/sparc64/sparc64/genassym.c b/sys/sparc64/sparc64/genassym.c
index e33e581..89ec718 100644
--- a/sys/sparc64/sparc64/genassym.c
+++ b/sys/sparc64/sparc64/genassym.c
@@ -37,6 +37,7 @@ __FBSDID("$FreeBSD$");
#include <sys/proc.h>
#include <sys/smp.h>
#include <sys/vmmeter.h>
+#include <sys/_cpuset.h>
#include <vm/vm.h>
#include <vm/vm_page.h>
@@ -59,6 +60,8 @@ ASSYM(PCPU_PAGES, PCPU_PAGES);
ASSYM(TAR_VPN_SHIFT, TAR_VPN_SHIFT);
+ASSYM(_NCPUBITS, _NCPUBITS);
+
#ifdef SUN4U
ASSYM(TLB_DEMAP_ALL, TLB_DEMAP_ALL);
#endif
@@ -137,7 +140,6 @@ ASSYM(MAXCOMLEN, MAXCOMLEN);
ASSYM(PC_CURTHREAD, offsetof(struct pcpu, pc_curthread));
ASSYM(PC_CURPCB, offsetof(struct pcpu, pc_curpcb));
ASSYM(PC_CPUID, offsetof(struct pcpu, pc_cpuid));
-ASSYM(PC_CPUMASK, offsetof(struct pcpu, pc_cpumask));
ASSYM(PC_IRHEAD, offsetof(struct pcpu, pc_irhead));
ASSYM(PC_IRTAIL, offsetof(struct pcpu, pc_irtail));
ASSYM(PC_IRFREE, offsetof(struct pcpu, pc_irfree));
diff --git a/sys/sparc64/sparc64/intr_machdep.c b/sys/sparc64/sparc64/intr_machdep.c
index f6ef9a7..ed30182 100644
--- a/sys/sparc64/sparc64/intr_machdep.c
+++ b/sys/sparc64/sparc64/intr_machdep.c
@@ -445,8 +445,7 @@ intr_describe(int vec, void *ih, const char *descr)
* allocate CPUs round-robin.
*/
-/* The BSP is always a valid target. */
-static cpumask_t intr_cpus = (1 << 0);
+static cpuset_t intr_cpus;
static int current_cpu;
static void
@@ -468,7 +467,7 @@ intr_assign_next_cpu(struct intr_vector *iv)
current_cpu++;
if (current_cpu > mp_maxid)
current_cpu = 0;
- } while (!(intr_cpus & (1 << current_cpu)));
+ } while (!CPU_ISSET(current_cpu, &intr_cpus));
}
/* Attempt to bind the specified IRQ to the specified CPU. */
@@ -504,7 +503,7 @@ intr_add_cpu(u_int cpu)
if (bootverbose)
printf("INTR: Adding CPU %d as a target\n", cpu);
- intr_cpus |= (1 << cpu);
+ CPU_SET(cpu, &intr_cpus);
}
/*
@@ -518,6 +517,9 @@ intr_shuffle_irqs(void *arg __unused)
struct intr_vector *iv;
int i;
+ /* The BSP is always a valid target. */
+ CPU_SETOF(0, &intr_cpus);
+
/* Don't bother on UP. */
if (mp_ncpus == 1)
return;
diff --git a/sys/sparc64/sparc64/mp_exception.S b/sys/sparc64/sparc64/mp_exception.S
index 5a8a105..f1b323a 100644
--- a/sys/sparc64/sparc64/mp_exception.S
+++ b/sys/sparc64/sparc64/mp_exception.S
@@ -38,9 +38,21 @@ __FBSDID("$FreeBSD$");
.register %g2, #ignore
.register %g3, #ignore
-#define IPI_DONE(r1, r2, r3, r4) \
- lduw [PCPU(CPUMASK)], r4 ; \
- ATOMIC_CLEAR_INT(r1, r2, r3, r4)
+#define IPI_DONE(r1, r2, r3, r4, r5, r6) \
+ rd %y, r6 ; \
+ lduw [PCPU(CPUID)], r2 ; \
+ mov _NCPUBITS, r3 ; \
+ mov %g0, %y ; \
+ udiv r2, r3, r4 ; \
+ srl r4, 0, r5 ; \
+ sllx r5, PTR_SHIFT, r5 ; \
+ add r1, r5, r1 ; \
+ smul r4, r3, r3 ; \
+ sub r2, r3, r3 ; \
+ mov 1, r4 ; \
+ sllx r4, r3, r4 ; \
+ wr r6, %y ; \
+ ATOMIC_CLEAR_LONG(r1, r2, r3, r4)
/*
* Invalidate a physical page in the data cache. For UltraSPARC I and II.
@@ -77,7 +89,7 @@ ENTRY(tl_ipi_spitfire_dcache_page_inval)
2: brgz,pt %g2, 1b
sub %g2, %g4, %g2
- IPI_DONE(%g5, %g1, %g2, %g3)
+ IPI_DONE(%g5, %g1, %g2, %g3, %g4, %g6)
retry
END(tl_ipi_spitfire_dcache_page_inval)
@@ -117,7 +129,7 @@ ENTRY(tl_ipi_spitfire_icache_page_inval)
2: brgz,pt %g2, 1b
sub %g2, %g4, %g2
- IPI_DONE(%g5, %g1, %g2, %g3)
+ IPI_DONE(%g5, %g1, %g2, %g3, %g4, %g6)
retry
END(tl_ipi_spitfire_icache_page_inval)
@@ -148,7 +160,7 @@ ENTRY(tl_ipi_cheetah_dcache_page_inval)
blt,a,pt %xcc, 1b
nop
- IPI_DONE(%g5, %g1, %g2, %g3)
+ IPI_DONE(%g5, %g1, %g2, %g3, %g4, %g6)
retry
END(tl_ipi_cheetah_dcache_page_inval)
@@ -204,7 +216,7 @@ ENTRY(tl_ipi_tlb_page_demap)
stxa %g0, [%g2] ASI_IMMU_DEMAP
flush %g3
- IPI_DONE(%g5, %g1, %g2, %g3)
+ IPI_DONE(%g5, %g1, %g2, %g3, %g4, %g6)
retry
END(tl_ipi_tlb_page_demap)
@@ -247,7 +259,7 @@ ENTRY(tl_ipi_tlb_range_demap)
blt,a,pt %xcc, 1b
nop
- IPI_DONE(%g5, %g1, %g2, %g3)
+ IPI_DONE(%g5, %g1, %g2, %g3, %g4, %g6)
retry
END(tl_ipi_tlb_range_demap)
@@ -271,7 +283,7 @@ ENTRY(tl_ipi_tlb_context_demap)
stxa %g0, [%g1] ASI_IMMU_DEMAP
flush %g3
- IPI_DONE(%g5, %g1, %g2, %g3)
+ IPI_DONE(%g5, %g1, %g2, %g3, %g4, %g6)
retry
END(tl_ipi_tlb_context_demap)
@@ -283,7 +295,7 @@ ENTRY(tl_ipi_stick_rd)
rd %asr24, %g2
stx %g2, [%g1]
- IPI_DONE(%g5, %g1, %g2, %g3)
+ IPI_DONE(%g5, %g1, %g2, %g3, %g4, %g6)
retry
END(tl_ipi_stick_rd)
@@ -295,6 +307,6 @@ ENTRY(tl_ipi_tick_rd)
rd %tick, %g2
stx %g2, [%g1]
- IPI_DONE(%g5, %g1, %g2, %g3)
+ IPI_DONE(%g5, %g1, %g2, %g3, %g4, %g6)
retry
END(tl_ipi_tick_rd)
diff --git a/sys/sparc64/sparc64/mp_locore.S b/sys/sparc64/sparc64/mp_locore.S
index fbcb767..fd4357e 100644
--- a/sys/sparc64/sparc64/mp_locore.S
+++ b/sys/sparc64/sparc64/mp_locore.S
@@ -269,13 +269,17 @@ ENTRY(mp_startup)
add %l1, %l2, %l1
sub %l1, SPOFF + CCFSZ, %sp
+ /* Initialize global registers. */
+ call cpu_setregs
+ mov %l1, %o0
+
#if KTR_COMPILE & KTR_SMP
CATR(KTR_SMP,
"mp_startup: bootstrap cpuid=%d mid=%d pcpu=%#lx data=%#lx sp=%#lx"
, %g1, %g2, %g3, 7, 8, 9)
- lduw [%l1 + PC_CPUID], %g2
+ lduw [PCPU(CPUID)], %g2
stx %g2, [%g1 + KTR_PARM1]
- lduw [%l1 + PC_MID], %g2
+ lduw [PCPU(MID)], %g2
stx %g2, [%g1 + KTR_PARM2]
stx %l1, [%g1 + KTR_PARM3]
stx %sp, [%g1 + KTR_PARM5]
diff --git a/sys/sparc64/sparc64/mp_machdep.c b/sys/sparc64/sparc64/mp_machdep.c
index 4d9151e..f2e76df 100644
--- a/sys/sparc64/sparc64/mp_machdep.c
+++ b/sys/sparc64/sparc64/mp_machdep.c
@@ -121,7 +121,7 @@ cpu_ipi_single_t *cpu_ipi_single;
static vm_offset_t mp_tramp;
static u_int cpuid_to_mid[MAXCPU];
static int isjbus;
-static volatile cpumask_t shutdown_cpus;
+static volatile cpuset_t shutdown_cpus;
static void ap_count(phandle_t node, u_int mid, u_int cpu_impl);
static void ap_start(phandle_t node, u_int mid, u_int cpu_impl);
@@ -228,7 +228,7 @@ void
cpu_mp_setmaxid()
{
- all_cpus = 1 << curcpu;
+ CPU_SETOF(curcpu, &all_cpus);
mp_ncpus = 1;
mp_maxid = 0;
@@ -283,6 +283,7 @@ sun4u_startcpu(phandle_t cpu, void *func, u_long arg)
void
cpu_mp_start(void)
{
+ cpuset_t ocpus;
mtx_init(&ipi_mtx, "ipi", NULL, MTX_SPIN);
@@ -299,7 +300,9 @@ cpu_mp_start(void)
KASSERT(!isjbus || mp_ncpus <= IDR_JALAPENO_MAX_BN_PAIRS,
("%s: can only IPI a maximum of %d JBus-CPUs",
__func__, IDR_JALAPENO_MAX_BN_PAIRS));
- PCPU_SET(other_cpus, all_cpus & ~(1 << curcpu));
+ ocpus = all_cpus;
+ CPU_CLR(curcpu, &ocpus);
+ PCPU_SET(other_cpus, ocpus);
smp_active = 1;
}
@@ -357,7 +360,7 @@ ap_start(phandle_t node, u_int mid, u_int cpu_impl)
cache_init(pc);
- all_cpus |= 1 << cpuid;
+ CPU_SET(cpuid, &all_cpus);
intr_add_cpu(cpuid);
}
@@ -421,6 +424,7 @@ cpu_mp_unleash(void *v)
void
cpu_mp_bootstrap(struct pcpu *pc)
{
+ cpuset_t ocpus;
volatile struct cpu_start_args *csa;
csa = &cpu_start_args;
@@ -453,9 +457,6 @@ cpu_mp_bootstrap(struct pcpu *pc)
*/
tlb_flush_nonlocked();
- /* Initialize global registers. */
- cpu_setregs(pc);
-
/*
* Enable interrupts.
* Note that the PIL we be lowered indirectly via sched_throw(NULL)
@@ -465,7 +466,9 @@ cpu_mp_bootstrap(struct pcpu *pc)
smp_cpus++;
KASSERT(curthread != NULL, ("%s: curthread", __func__));
- PCPU_SET(other_cpus, all_cpus & ~(1 << curcpu));
+ ocpus = all_cpus;
+ CPU_CLR(curcpu, &ocpus);
+ PCPU_SET(other_cpus, ocpus);
printf("SMP: AP CPU #%d Launched!\n", curcpu);
csa->csa_count--;
@@ -484,14 +487,22 @@ cpu_mp_bootstrap(struct pcpu *pc)
void
cpu_mp_shutdown(void)
{
+ cpuset_t cpus;
int i;
critical_enter();
shutdown_cpus = PCPU_GET(other_cpus);
- if (stopped_cpus != PCPU_GET(other_cpus)) /* XXX */
- stop_cpus(stopped_cpus ^ PCPU_GET(other_cpus));
+ cpus = shutdown_cpus;
+
+ /* XXX: Stop all the CPUs which aren't already. */
+ if (CPU_CMP(&stopped_cpus, &cpus)) {
+
+ /* pc_other_cpus is just a flat "on" mask without curcpu. */
+ CPU_NAND(&cpus, &stopped_cpus);
+ stop_cpus(cpus);
+ }
i = 0;
- while (shutdown_cpus != 0) {
+ while (!CPU_EMPTY(&shutdown_cpus)) {
if (i++ > 100000) {
printf("timeout shutting down CPUs.\n");
break;
@@ -509,20 +520,24 @@ cpu_ipi_ast(struct trapframe *tf)
static void
cpu_ipi_stop(struct trapframe *tf)
{
+ cpuset_t tcmask;
CTR2(KTR_SMP, "%s: stopped %d", __func__, curcpu);
+ sched_pin();
savectx(&stoppcbs[curcpu]);
- atomic_set_acq_int(&stopped_cpus, PCPU_GET(cpumask));
- while ((started_cpus & PCPU_GET(cpumask)) == 0) {
- if ((shutdown_cpus & PCPU_GET(cpumask)) != 0) {
- atomic_clear_int(&shutdown_cpus, PCPU_GET(cpumask));
+ tcmask = PCPU_GET(cpumask);
+ CPU_OR_ATOMIC(&stopped_cpus, &tcmask);
+ while (!CPU_OVERLAP(&started_cpus, &tcmask)) {
+ if (CPU_OVERLAP(&shutdown_cpus, &tcmask)) {
+ CPU_NAND_ATOMIC(&shutdown_cpus, &tcmask);
(void)intr_disable();
for (;;)
;
}
}
- atomic_clear_rel_int(&started_cpus, PCPU_GET(cpumask));
- atomic_clear_rel_int(&stopped_cpus, PCPU_GET(cpumask));
+ CPU_NAND_ATOMIC(&started_cpus, &tcmask);
+ CPU_NAND_ATOMIC(&stopped_cpus, &tcmask);
+ sched_unpin();
CTR2(KTR_SMP, "%s: restarted %d", __func__, curcpu);
}
@@ -551,13 +566,13 @@ cpu_ipi_hardclock(struct trapframe *tf)
}
static void
-spitfire_ipi_selected(u_int cpus, u_long d0, u_long d1, u_long d2)
+spitfire_ipi_selected(cpuset_t cpus, u_long d0, u_long d1, u_long d2)
{
u_int cpu;
- while (cpus) {
- cpu = ffs(cpus) - 1;
- cpus &= ~(1 << cpu);
+ while ((cpu = cpusetobj_ffs(&cpus)) != 0) {
+ cpu--;
+ CPU_CLR(cpu, &cpus);
spitfire_ipi_single(cpu, d0, d1, d2);
}
}
@@ -657,20 +672,21 @@ cheetah_ipi_single(u_int cpu, u_long d0, u_long d1, u_long d2)
}
static void
-cheetah_ipi_selected(u_int cpus, u_long d0, u_long d1, u_long d2)
+cheetah_ipi_selected(cpuset_t cpus, u_long d0, u_long d1, u_long d2)
{
+ char pbuf[CPUSETBUFSIZ];
register_t s;
u_long ids;
u_int bnp;
u_int cpu;
int i;
- KASSERT((cpus & (1 << curcpu)) == 0,
- ("%s: CPU can't IPI itself", __func__));
+ KASSERT(!CPU_ISSET(curcpu, &cpus), ("%s: CPU can't IPI itself",
+ __func__));
KASSERT((ldxa(0, ASI_INTR_DISPATCH_STATUS) &
IDR_CHEETAH_ALL_BUSY) == 0,
("%s: outstanding dispatch", __func__));
- if (cpus == 0)
+ if (CPU_EMPTY(&cpus))
return;
ids = 0;
for (i = 0; i < IPI_RETRIES * mp_ncpus; i++) {
@@ -681,7 +697,7 @@ cheetah_ipi_selected(u_int cpus, u_long d0, u_long d1, u_long d2)
membar(Sync);
bnp = 0;
for (cpu = 0; cpu < mp_ncpus; cpu++) {
- if ((cpus & (1 << cpu)) != 0) {
+ if (CPU_ISSET(cpu, &cpus)) {
stxa(AA_INTR_SEND | (cpuid_to_mid[cpu] <<
IDC_ITID_SHIFT) | bnp << IDC_BN_SHIFT,
ASI_SDB_INTR_W, 0);
@@ -698,9 +714,9 @@ cheetah_ipi_selected(u_int cpus, u_long d0, u_long d1, u_long d2)
return;
bnp = 0;
for (cpu = 0; cpu < mp_ncpus; cpu++) {
- if ((cpus & (1 << cpu)) != 0) {
+ if (CPU_ISSET(cpu, &cpus)) {
if ((ids & (IDR_NACK << (2 * bnp))) == 0)
- cpus &= ~(1 << cpu);
+ CPU_CLR(cpu, &cpus);
bnp++;
}
}
@@ -709,7 +725,7 @@ cheetah_ipi_selected(u_int cpus, u_long d0, u_long d1, u_long d2)
* CPUs we actually haven't tried to send an IPI to,
* but which apparently can be safely ignored.
*/
- if (cpus == 0)
+ if (CPU_EMPTY(&cpus))
return;
/*
* Leave interrupts enabled for a bit before retrying
@@ -719,11 +735,11 @@ cheetah_ipi_selected(u_int cpus, u_long d0, u_long d1, u_long d2)
DELAY(2 * mp_ncpus);
}
if (kdb_active != 0 || panicstr != NULL)
- printf("%s: couldn't send IPI (cpus=0x%u ids=0x%lu)\n",
- __func__, cpus, ids);
+ printf("%s: couldn't send IPI (cpus=%s ids=0x%lu)\n",
+ __func__, cpusetobj_strprint(pbuf, &cpus), ids);
else
- panic("%s: couldn't send IPI (cpus=0x%u ids=0x%lu)",
- __func__, cpus, ids);
+ panic("%s: couldn't send IPI (cpus=%s ids=0x%lu)",
+ __func__, cpusetobj_strprint(pbuf, &cpus), ids);
}
static void
@@ -772,19 +788,20 @@ jalapeno_ipi_single(u_int cpu, u_long d0, u_long d1, u_long d2)
}
static void
-jalapeno_ipi_selected(u_int cpus, u_long d0, u_long d1, u_long d2)
+jalapeno_ipi_selected(cpuset_t cpus, u_long d0, u_long d1, u_long d2)
{
+ char pbuf[CPUSETBUFSIZ];
register_t s;
u_long ids;
u_int cpu;
int i;
- KASSERT((cpus & (1 << curcpu)) == 0,
- ("%s: CPU can't IPI itself", __func__));
+ KASSERT(!CPU_ISSET(curcpu, &cpus), ("%s: CPU can't IPI itself",
+ __func__));
KASSERT((ldxa(0, ASI_INTR_DISPATCH_STATUS) &
IDR_CHEETAH_ALL_BUSY) == 0,
("%s: outstanding dispatch", __func__));
- if (cpus == 0)
+ if (CPU_EMPTY(&cpus))
return;
ids = 0;
for (i = 0; i < IPI_RETRIES * mp_ncpus; i++) {
@@ -794,7 +811,7 @@ jalapeno_ipi_selected(u_int cpus, u_long d0, u_long d1, u_long d2)
stxa(AA_SDB_INTR_D2, ASI_SDB_INTR_W, d2);
membar(Sync);
for (cpu = 0; cpu < mp_ncpus; cpu++) {
- if ((cpus & (1 << cpu)) != 0) {
+ if (CPU_ISSET(cpu, &cpus)) {
stxa(AA_INTR_SEND | (cpuid_to_mid[cpu] <<
IDC_ITID_SHIFT), ASI_SDB_INTR_W, 0);
membar(Sync);
@@ -808,10 +825,10 @@ jalapeno_ipi_selected(u_int cpus, u_long d0, u_long d1, u_long d2)
(IDR_CHEETAH_ALL_BUSY | IDR_CHEETAH_ALL_NACK)) == 0)
return;
for (cpu = 0; cpu < mp_ncpus; cpu++)
- if ((cpus & (1 << cpu)) != 0)
+ if (CPU_ISSET(cpu, &cpus))
if ((ids & (IDR_NACK <<
(2 * cpuid_to_mid[cpu]))) == 0)
- cpus &= ~(1 << cpu);
+ CPU_CLR(cpu, &cpus);
/*
* Leave interrupts enabled for a bit before retrying
* in order to avoid deadlocks if the other CPUs are
@@ -820,9 +837,9 @@ jalapeno_ipi_selected(u_int cpus, u_long d0, u_long d1, u_long d2)
DELAY(2 * mp_ncpus);
}
if (kdb_active != 0 || panicstr != NULL)
- printf("%s: couldn't send IPI (cpus=0x%u ids=0x%lu)\n",
- __func__, cpus, ids);
+ printf("%s: couldn't send IPI (cpus=%s ids=0x%lu)\n",
+ __func__, cpusetobj_strprint(pbuf, &cpus), ids);
else
- panic("%s: couldn't send IPI (cpus=0x%u ids=0x%lu)",
- __func__, cpus, ids);
+ panic("%s: couldn't send IPI (cpus=%s ids=0x%lu)",
+ __func__, cpusetobj_strprint(pbuf, &cpus), ids);
}
diff --git a/sys/sparc64/sparc64/pmap.c b/sys/sparc64/sparc64/pmap.c
index c34fc45..b01a558 100644
--- a/sys/sparc64/sparc64/pmap.c
+++ b/sys/sparc64/sparc64/pmap.c
@@ -664,7 +664,7 @@ pmap_bootstrap(u_int cpu_impl)
pm = kernel_pmap;
for (i = 0; i < MAXCPU; i++)
pm->pm_context[i] = TLB_CTX_KERNEL;
- pm->pm_active = ~0;
+ CPU_FILL(&pm->pm_active);
/*
* Flush all non-locked TLB entries possibly left over by the
@@ -1189,7 +1189,7 @@ pmap_pinit0(pmap_t pm)
PMAP_LOCK_INIT(pm);
for (i = 0; i < MAXCPU; i++)
pm->pm_context[i] = TLB_CTX_KERNEL;
- pm->pm_active = 0;
+ CPU_ZERO(&pm->pm_active);
pm->pm_tsb = NULL;
pm->pm_tsb_obj = NULL;
bzero(&pm->pm_stats, sizeof(pm->pm_stats));
@@ -1229,7 +1229,7 @@ pmap_pinit(pmap_t pm)
mtx_lock_spin(&sched_lock);
for (i = 0; i < MAXCPU; i++)
pm->pm_context[i] = -1;
- pm->pm_active = 0;
+ CPU_ZERO(&pm->pm_active);
mtx_unlock_spin(&sched_lock);
VM_OBJECT_LOCK(pm->pm_tsb_obj);
@@ -2230,7 +2230,7 @@ pmap_activate(struct thread *td)
PCPU_SET(tlb_ctx, context + 1);
pm->pm_context[curcpu] = context;
- pm->pm_active |= PCPU_GET(cpumask);
+ CPU_OR(&pm->pm_active, PCPU_PTR(cpumask));
PCPU_SET(pmap, pm);
stxa(AA_DMMU_TSB, ASI_DMMU, pm->pm_tsb);
diff --git a/sys/sparc64/sparc64/swtch.S b/sys/sparc64/sparc64/swtch.S
index ea13779..7515734 100644
--- a/sys/sparc64/sparc64/swtch.S
+++ b/sys/sparc64/sparc64/swtch.S
@@ -164,20 +164,29 @@ ENTRY(cpu_switch)
* If there was no non-kernel pmap, don't try to deactivate it.
*/
brz,pn %l2, 3f
- lduw [PCPU(CPUMASK)], %l4
+ lduw [PCPU(CPUID)], %l3
/*
* Mark the pmap of the last non-kernel vmspace to run as no longer
* active on this CPU.
*/
- lduw [%l2 + PM_ACTIVE], %l3
- andn %l3, %l4, %l3
- stw %l3, [%l2 + PM_ACTIVE]
+ mov _NCPUBITS, %l5
+ mov %g0, %y
+ udiv %l3, %l5, %l6
+ srl %l6, 0, %l4
+ sllx %l4, PTR_SHIFT, %l4
+ add %l4, PM_ACTIVE, %l4
+ smul %l6, %l5, %l5
+ sub %l3, %l5, %l5
+ mov 1, %l6
+ sllx %l6, %l5, %l5
+ ldx [%l2 + %l4], %l6
+ andn %l6, %l5, %l6
+ stx %l6, [%l2 + %l4]
/*
* Take away its context number.
*/
- lduw [PCPU(CPUID)], %l3
sllx %l3, INT_SHIFT, %l3
add %l2, PM_CONTEXT, %l4
mov -1, %l5
@@ -210,18 +219,27 @@ ENTRY(cpu_switch)
/*
* Set the new context number in the pmap.
*/
- lduw [PCPU(CPUID)], %i4
- sllx %i4, INT_SHIFT, %i4
+ lduw [PCPU(CPUID)], %l3
+ sllx %l3, INT_SHIFT, %i4
add %l1, PM_CONTEXT, %i5
stw %i3, [%i4 + %i5]
/*
* Mark the pmap as active on this CPU.
*/
- lduw [%l1 + PM_ACTIVE], %i4
- lduw [PCPU(CPUMASK)], %i5
- or %i4, %i5, %i4
- stw %i4, [%l1 + PM_ACTIVE]
+ mov _NCPUBITS, %l5
+ mov %g0, %y
+ udiv %l3, %l5, %l6
+ srl %l6, 0, %l4
+ sllx %l4, PTR_SHIFT, %l4
+ add %l4, PM_ACTIVE, %l4
+ smul %l6, %l5, %l5
+ sub %l3, %l5, %l5
+ mov 1, %l6
+ sllx %l6, %l5, %l5
+ ldx [%l1 + %l4], %l6
+ or %l6, %l5, %l6
+ stx %l6, [%l1 + %l4]
/*
* Make note of the change in pmap.
diff --git a/sys/sparc64/sparc64/tlb.c b/sys/sparc64/sparc64/tlb.c
index 990c777..9fcece6 100644
--- a/sys/sparc64/sparc64/tlb.c
+++ b/sys/sparc64/sparc64/tlb.c
@@ -80,7 +80,7 @@ tlb_context_demap(struct pmap *pm)
PMAP_STATS_INC(tlb_ncontext_demap);
cookie = ipi_tlb_context_demap(pm);
s = intr_disable();
- if (pm->pm_active & PCPU_GET(cpumask)) {
+ if (CPU_OVERLAP(&pm->pm_active, PCPU_PTR(cpumask))) {
KASSERT(pm->pm_context[curcpu] != -1,
("tlb_context_demap: inactive pmap?"));
stxa(TLB_DEMAP_PRIMARY | TLB_DEMAP_CONTEXT, ASI_DMMU_DEMAP, 0);
@@ -101,7 +101,7 @@ tlb_page_demap(struct pmap *pm, vm_offset_t va)
PMAP_STATS_INC(tlb_npage_demap);
cookie = ipi_tlb_page_demap(pm, va);
s = intr_disable();
- if (pm->pm_active & PCPU_GET(cpumask)) {
+ if (CPU_OVERLAP(&pm->pm_active, PCPU_PTR(cpumask))) {
KASSERT(pm->pm_context[curcpu] != -1,
("tlb_page_demap: inactive pmap?"));
if (pm == kernel_pmap)
@@ -128,7 +128,7 @@ tlb_range_demap(struct pmap *pm, vm_offset_t start, vm_offset_t end)
PMAP_STATS_INC(tlb_nrange_demap);
cookie = ipi_tlb_range_demap(pm, start, end);
s = intr_disable();
- if (pm->pm_active & PCPU_GET(cpumask)) {
+ if (CPU_OVERLAP(&pm->pm_active, PCPU_PTR(cpumask))) {
KASSERT(pm->pm_context[curcpu] != -1,
("tlb_range_demap: inactive pmap?"));
if (pm == kernel_pmap)
diff --git a/sys/sys/_cpuset.h b/sys/sys/_cpuset.h
new file mode 100644
index 0000000..42a0a6a
--- /dev/null
+++ b/sys/sys/_cpuset.h
@@ -0,0 +1,52 @@
+/*-
+ * Copyright (c) 2008, Jeffrey Roberson <jeff@freebsd.org>
+ * All rights reserved.
+ *
+ * Copyright (c) 2008 Nokia Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice unmodified, this list of conditions, and the following
+ * disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _SYS__CPUSET_H_
+#define _SYS__CPUSET_H_
+
+#ifdef _KERNEL
+#define CPU_SETSIZE MAXCPU
+#endif
+
+#define CPU_MAXSIZE 128
+
+#ifndef CPU_SETSIZE
+#define CPU_SETSIZE CPU_MAXSIZE
+#endif
+
+#define _NCPUBITS (sizeof(long) * NBBY) /* bits per mask */
+#define _NCPUWORDS howmany(CPU_SETSIZE, _NCPUBITS)
+
+typedef struct _cpuset {
+ long __bits[howmany(CPU_SETSIZE, _NCPUBITS)];
+} cpuset_t;
+
+#endif /* !_SYS__CPUSET_H_ */
diff --git a/sys/sys/_rmlock.h b/sys/sys/_rmlock.h
index 75a159c..15d6c49 100644
--- a/sys/sys/_rmlock.h
+++ b/sys/sys/_rmlock.h
@@ -45,7 +45,7 @@ LIST_HEAD(rmpriolist,rm_priotracker);
struct rmlock {
struct lock_object lock_object;
- volatile cpumask_t rm_writecpus;
+ volatile cpuset_t rm_writecpus;
LIST_HEAD(,rm_priotracker) rm_activeReaders;
union {
struct mtx _rm_lock_mtx;
diff --git a/sys/sys/conf.h b/sys/sys/conf.h
index 31fd34d..0c7ed41 100644
--- a/sys/sys/conf.h
+++ b/sys/sys/conf.h
@@ -332,6 +332,7 @@ struct dumperinfo {
int set_dumper(struct dumperinfo *);
int dump_write(struct dumperinfo *, void *, vm_offset_t, off_t, size_t);
void dumpsys(struct dumperinfo *);
+int doadump(boolean_t);
extern int dumping; /* system is dumping */
#endif /* _KERNEL */
diff --git a/sys/sys/cpuset.h b/sys/sys/cpuset.h
index 854fa29..030a874 100644
--- a/sys/sys/cpuset.h
+++ b/sys/sys/cpuset.h
@@ -32,22 +32,9 @@
#ifndef _SYS_CPUSET_H_
#define _SYS_CPUSET_H_
-#ifdef _KERNEL
-#define CPU_SETSIZE MAXCPU
-#endif
+#include <sys/_cpuset.h>
-#define CPU_MAXSIZE 128
-
-#ifndef CPU_SETSIZE
-#define CPU_SETSIZE CPU_MAXSIZE
-#endif
-
-#define _NCPUBITS (sizeof(long) * NBBY) /* bits per mask */
-#define _NCPUWORDS howmany(CPU_SETSIZE, _NCPUBITS)
-
-typedef struct _cpuset {
- long __bits[howmany(CPU_SETSIZE, _NCPUBITS)];
-} cpuset_t;
+#define CPUSETBUFSIZ ((2 + sizeof(long) * 2) * _NCPUWORDS)
#define __cpuset_mask(n) ((long)1 << ((n) % _NCPUBITS))
#define CPU_CLR(n, p) ((p)->__bits[(n)/_NCPUBITS] &= ~__cpuset_mask(n))
@@ -66,6 +53,11 @@ typedef struct _cpuset {
(p)->__bits[__i] = -1; \
} while (0)
+#define CPU_SETOF(n, p) do { \
+ CPU_ZERO(p); \
+ ((p)->__bits[(n)/_NCPUBITS] = __cpuset_mask(n)); \
+} while (0)
+
/* Is p empty. */
#define CPU_EMPTY(p) __extension__ ({ \
__size_t __i; \
@@ -75,6 +67,15 @@ typedef struct _cpuset {
__i == _NCPUWORDS; \
})
+/* Is p full set. */
+#define CPU_ISFULLSET(p) __extension__ ({ \
+ __size_t __i; \
+ for (__i = 0; __i < _NCPUWORDS; __i++) \
+ if ((p)->__bits[__i] != (long)-1) \
+ break; \
+ __i == _NCPUWORDS; \
+})
+
/* Is c a subset of p. */
#define CPU_SUBSET(p, c) __extension__ ({ \
__size_t __i; \
@@ -124,6 +125,33 @@ typedef struct _cpuset {
(d)->__bits[__i] &= ~(s)->__bits[__i]; \
} while (0)
+#define CPU_CLR_ATOMIC(n, p) \
+ atomic_clear_long(&(p)->__bits[(n)/_NCPUBITS], __cpuset_mask(n))
+
+#define CPU_SET_ATOMIC(n, p) \
+ atomic_set_long(&(p)->__bits[(n)/_NCPUBITS], __cpuset_mask(n))
+
+#define CPU_OR_ATOMIC(d, s) do { \
+ __size_t __i; \
+ for (__i = 0; __i < _NCPUWORDS; __i++) \
+ atomic_set_long(&(d)->__bits[__i], \
+ (s)->__bits[__i]); \
+} while (0)
+
+#define CPU_NAND_ATOMIC(d, s) do { \
+ __size_t __i; \
+ for (__i = 0; __i < _NCPUWORDS; __i++) \
+ atomic_clear_long(&(d)->__bits[__i], \
+ (s)->__bits[__i]); \
+} while (0)
+
+#define CPU_COPY_STORE_REL(f, t) do { \
+ __size_t __i; \
+ for (__i = 0; __i < _NCPUWORDS; __i++) \
+ atomic_store_rel_long(&(t)->__bits[__i], \
+ (f)->__bits[__i]); \
+} while (0)
+
/*
* Valid cpulevel_t values.
*/
@@ -184,6 +212,9 @@ void cpuset_rel(struct cpuset *);
int cpuset_setthread(lwpid_t id, cpuset_t *);
int cpuset_create_root(struct prison *, struct cpuset **);
int cpuset_setproc_update_set(struct proc *, struct cpuset *);
+int cpusetobj_ffs(const cpuset_t *);
+char *cpusetobj_strprint(char *, const cpuset_t *);
+int cpusetobj_strscan(cpuset_t *, const char *);
#else
__BEGIN_DECLS
diff --git a/sys/sys/ktr.h b/sys/sys/ktr.h
index 3b78101..7885b22 100644
--- a/sys/sys/ktr.h
+++ b/sys/sys/ktr.h
@@ -97,6 +97,9 @@
#ifndef LOCORE
+#include <sys/param.h>
+#include <sys/_cpuset.h>
+
struct ktr_entry {
u_int64_t ktr_timestamp;
int ktr_cpu;
@@ -107,7 +110,7 @@ struct ktr_entry {
u_long ktr_parms[KTR_PARMS];
};
-extern int ktr_cpumask;
+extern cpuset_t ktr_cpumask;
extern int ktr_mask;
extern int ktr_entries;
extern int ktr_verbose;
diff --git a/sys/sys/pcpu.h b/sys/sys/pcpu.h
index 0bb2cbd..e6044a7 100644
--- a/sys/sys/pcpu.h
+++ b/sys/sys/pcpu.h
@@ -37,6 +37,7 @@
#error "no assembler-serviceable parts inside"
#endif
+#include <sys/_cpuset.h>
#include <sys/queue.h>
#include <sys/vmmeter.h>
#include <sys/resource.h>
@@ -162,8 +163,6 @@ struct pcpu {
uint64_t pc_switchtime; /* cpu_ticks() at last csw */
int pc_switchticks; /* `ticks' at last csw */
u_int pc_cpuid; /* This cpu number */
- cpumask_t pc_cpumask; /* This cpu mask */
- cpumask_t pc_other_cpus; /* Mask of all other cpus */
STAILQ_ENTRY(pcpu) pc_allcpu;
struct lock_list_entry *pc_spinlocks;
#ifdef KTR
@@ -197,6 +196,18 @@ struct pcpu {
* if only to make kernel debugging easier.
*/
PCPU_MD_FIELDS;
+
+ /*
+ * XXX
+ * For the time being, keep the cpuset_t objects as the very last
+ * members of the structure.
+ * They are actually tagged to be removed soon, but as long as this
+ * does not happen, it is necessary to find a way to implement
+ * easilly interfaces to userland and leaving them last makes that
+ * possible.
+ */
+ cpuset_t pc_cpumask; /* This cpu mask */
+ cpuset_t pc_other_cpus; /* Mask of all other cpus */
} __aligned(CACHE_LINE_SIZE);
#ifdef _KERNEL
diff --git a/sys/sys/pmckern.h b/sys/sys/pmckern.h
index 3e8c1ef..796c4ca 100644
--- a/sys/sys/pmckern.h
+++ b/sys/sys/pmckern.h
@@ -76,7 +76,7 @@ extern int (*pmc_intr)(int _cpu, struct trapframe *_frame);
extern struct sx pmc_sx;
/* Per-cpu flags indicating availability of sampling data */
-extern volatile cpumask_t pmc_cpumask;
+extern volatile cpuset_t pmc_cpumask;
/* Count of system-wide sampling PMCs in existence */
extern volatile int pmc_ss_count;
@@ -122,7 +122,7 @@ do { \
#define PMC_SYSTEM_SAMPLING_ACTIVE() (pmc_ss_count > 0)
/* Check if a CPU has recorded samples. */
-#define PMC_CPU_HAS_SAMPLES(C) (__predict_false(pmc_cpumask & (1 << (C))))
+#define PMC_CPU_HAS_SAMPLES(C) (__predict_false(CPU_ISSET(C, &pmc_cpumask)))
/*
* Helper functions.
diff --git a/sys/sys/smp.h b/sys/sys/smp.h
index f8cce5f..66e8008 100644
--- a/sys/sys/smp.h
+++ b/sys/sys/smp.h
@@ -16,6 +16,8 @@
#ifndef LOCORE
+#include <sys/cpuset.h>
+
/*
* Topology of a NUMA or HTT system.
*
@@ -32,7 +34,7 @@
struct cpu_group {
struct cpu_group *cg_parent; /* Our parent group. */
struct cpu_group *cg_child; /* Optional children groups. */
- cpumask_t cg_mask; /* Mask of cpus in this group. */
+ cpuset_t cg_mask; /* Mask of cpus in this group. */
int32_t cg_count; /* Count of cpus in this group. */
int16_t cg_children; /* Number of children groups. */
int8_t cg_level; /* Shared cache level. */
@@ -71,10 +73,10 @@ struct cpu_group *smp_topo_find(struct cpu_group *top, int cpu);
extern void (*cpustop_restartfunc)(void);
extern int smp_active;
extern int smp_cpus;
-extern volatile cpumask_t started_cpus;
-extern volatile cpumask_t stopped_cpus;
-extern cpumask_t hlt_cpus_mask;
-extern cpumask_t logical_cpus_mask;
+extern volatile cpuset_t started_cpus;
+extern volatile cpuset_t stopped_cpus;
+extern cpuset_t hlt_cpus_mask;
+extern cpuset_t logical_cpus_mask;
#endif /* SMP */
extern u_int mp_maxid;
@@ -82,14 +84,14 @@ extern int mp_maxcpus;
extern int mp_ncpus;
extern volatile int smp_started;
-extern cpumask_t all_cpus;
+extern cpuset_t all_cpus;
/*
* Macro allowing us to determine whether a CPU is absent at any given
* time, thus permitting us to configure sparse maps of cpuid-dependent
* (per-CPU) structures.
*/
-#define CPU_ABSENT(x_cpu) ((all_cpus & (1 << (x_cpu))) == 0)
+#define CPU_ABSENT(x_cpu) (!CPU_ISSET(x_cpu, &all_cpus))
/*
* Macros to iterate over non-absent CPUs. CPU_FOREACH() takes an
@@ -158,11 +160,11 @@ void cpu_mp_setmaxid(void);
void cpu_mp_start(void);
void forward_signal(struct thread *);
-int restart_cpus(cpumask_t);
-int stop_cpus(cpumask_t);
-int stop_cpus_hard(cpumask_t);
+int restart_cpus(cpuset_t);
+int stop_cpus(cpuset_t);
+int stop_cpus_hard(cpuset_t);
#if defined(__amd64__)
-int suspend_cpus(cpumask_t);
+int suspend_cpus(cpuset_t);
#endif
void smp_rendezvous_action(void);
extern struct mtx smp_ipi_mtx;
@@ -173,7 +175,7 @@ void smp_rendezvous(void (*)(void *),
void (*)(void *),
void (*)(void *),
void *arg);
-void smp_rendezvous_cpus(cpumask_t,
+void smp_rendezvous_cpus(cpuset_t,
void (*)(void *),
void (*)(void *),
void (*)(void *),
diff --git a/sys/sys/soundcard.h b/sys/sys/soundcard.h
index c4cfc27..a6817df 100644
--- a/sys/sys/soundcard.h
+++ b/sys/sys/soundcard.h
@@ -311,7 +311,8 @@ typedef struct _snd_capabilities {
* IOCTL Commands for /dev/sequencer
*/
-#define SNDCTL_SEQ_RESET _IO ('Q', 0)
+#define SNDCTL_SEQ_HALT _IO ('Q', 0)
+#define SNDCTL_SEQ_RESET SNDCTL_SEQ_HALT /* Historic interface */
#define SNDCTL_SEQ_SYNC _IO ('Q', 1)
#define SNDCTL_SYNTH_INFO _IOWR('Q', 2, struct synth_info)
#define SNDCTL_SEQ_CTRLRATE _IOWR('Q', 3, int) /* Set/get timer res.(hz) */
diff --git a/sys/sys/types.h b/sys/sys/types.h
index 4bc1a8d..cb513af 100644
--- a/sys/sys/types.h
+++ b/sys/sys/types.h
@@ -99,7 +99,6 @@ typedef __clockid_t clockid_t;
#define _CLOCKID_T_DECLARED
#endif
-typedef __cpumask_t cpumask_t;
typedef __critical_t critical_t; /* Critical section value */
typedef __int64_t daddr_t; /* disk address */
diff --git a/sys/ufs/ffs/ffs_alloc.c b/sys/ufs/ffs/ffs_alloc.c
index 5b8f3c8..7f5d1b4 100644
--- a/sys/ufs/ffs/ffs_alloc.c
+++ b/sys/ufs/ffs/ffs_alloc.c
@@ -420,13 +420,13 @@ nospace:
*/
if (reclaimed == 0) {
reclaimed = 1;
- softdep_request_cleanup(fs, vp, cred, FLUSH_BLOCKS_WAIT);
UFS_UNLOCK(ump);
if (bp) {
brelse(bp);
bp = NULL;
}
UFS_LOCK(ump);
+ softdep_request_cleanup(fs, vp, cred, FLUSH_BLOCKS_WAIT);
goto retry;
}
UFS_UNLOCK(ump);
@@ -2356,8 +2356,8 @@ ffs_fserr(fs, inum, cp)
* specified inode by the specified amount. Under normal
* operation the count should always go down. Decrementing
* the count to zero will cause the inode to be freed.
- * adjblkcnt(inode, amt) - adjust the number of blocks used to
- * by the specifed amount.
+ * adjblkcnt(inode, amt) - adjust the number of blocks used by the
+ * inode by the specified amount.
* adjndir, adjbfree, adjifree, adjffree, adjnumclusters(amt) -
* adjust the superblock summary.
* freedirs(inode, count) - directory inodes [inode..inode + count - 1]
diff --git a/sys/x86/x86/local_apic.c b/sys/x86/x86/local_apic.c
index f5ac443..dfef3a7 100644
--- a/sys/x86/x86/local_apic.c
+++ b/sys/x86/x86/local_apic.c
@@ -802,7 +802,7 @@ lapic_handle_timer(struct trapframe *frame)
* and unlike other schedulers it actually schedules threads to
* those CPUs.
*/
- if ((hlt_cpus_mask & (1 << PCPU_GET(cpuid))) != 0)
+ if (CPU_ISSET(PCPU_GET(cpuid), &hlt_cpus_mask))
return;
#endif
diff --git a/tools/regression/bin/sh/expansion/heredoc1.0 b/tools/regression/bin/sh/expansion/heredoc1.0
new file mode 100644
index 0000000..a67b2da
--- /dev/null
+++ b/tools/regression/bin/sh/expansion/heredoc1.0
@@ -0,0 +1,25 @@
+# $FreeBSD$
+
+f() { return $1; }
+
+[ `f 42; { cat; } <<EOF
+$?
+EOF
+` = 42 ] || echo compound command bad
+
+[ `f 42; (cat) <<EOF
+$?
+EOF
+` = 42 ] || echo subshell bad
+
+long=`printf %08192d 0`
+
+[ `f 42; { cat; } <<EOF
+$long.$?
+EOF
+` = $long.42 ] || echo long compound command bad
+
+[ `f 42; (cat) <<EOF
+$long.$?
+EOF
+` = $long.42 ] || echo long subshell bad
diff --git a/tools/regression/bin/sh/expansion/heredoc2.0 b/tools/regression/bin/sh/expansion/heredoc2.0
new file mode 100644
index 0000000..2551432
--- /dev/null
+++ b/tools/regression/bin/sh/expansion/heredoc2.0
@@ -0,0 +1,15 @@
+# $FreeBSD$
+
+f() { return $1; }
+
+[ `f 42; cat <<EOF
+$?
+EOF
+` = 42 ] || echo simple command bad
+
+long=`printf %08192d 0`
+
+[ `f 42; cat <<EOF
+$long.$?
+EOF
+` = $long.42 ] || echo long simple command bad
diff --git a/usr.bin/calendar/calendars/calendar.freebsd b/usr.bin/calendar/calendars/calendar.freebsd
index a04476a..32e13e0 100644
--- a/usr.bin/calendar/calendars/calendar.freebsd
+++ b/usr.bin/calendar/calendars/calendar.freebsd
@@ -294,6 +294,7 @@
10/22 Jean-Sebastien Pedron <dumbbell@FreeBSD.org> born in Redon, Ille-et-Vilaine, France, 1980
10/23 Mario Sergio Fujikawa Ferreira <lioux@FreeBSD.org> born in Brasilia, Distrito Federal, Brazil, 1976
10/25 Eric Melville <eric@FreeBSD.org> born in Los Gatos, California, United States, 1980
+10/25 Julien Laffaye <jlaffaye@FreeBSD.org> born in Toulouse, France, 1988
10/26 Philip M. Gollucci <pgollucci@FreeBSD.org> born in Silver Spring, Maryland, United States, 1979
10/27 Takanori Watanabe <takawata@FreeBSD.org> born in Numazu, Shizuoka, Japan, 1972
11/05 M. Warner Losh <imp@FreeBSD.org> born in Kansas City, Kansas, United States, 1966
diff --git a/usr.bin/calendar/io.c b/usr.bin/calendar/io.c
index ef98d5d..eb37eac 100644
--- a/usr.bin/calendar/io.c
+++ b/usr.bin/calendar/io.c
@@ -346,7 +346,7 @@ closecal(FILE *fp)
write(pdes[1], pw->pw_name, strlen(pw->pw_name));
write(pdes[1], ">\nTo: <", 7);
write(pdes[1], pw->pw_name, strlen(pw->pw_name));
- write(pdes[1], ">\nSubject: ", 12);
+ write(pdes[1], ">\nSubject: ", 11);
write(pdes[1], dayname, strlen(dayname));
write(pdes[1], "'s Calendar\nPrecedence: bulk\n\n", 30);
diff --git a/usr.bin/grep/Makefile b/usr.bin/grep/Makefile
index 8cd490d..f09a7d6 100644
--- a/usr.bin/grep/Makefile
+++ b/usr.bin/grep/Makefile
@@ -28,8 +28,6 @@ MLINKS= grep.1 egrep.1 \
bsdgrep.1: grep.1
cp ${.ALLSRC} ${.TARGET}
-WARNS?= 6
-
LDADD= -lz -lbz2
DPADD= ${LIBZ} ${LIBBZ2}
diff --git a/usr.bin/iconv/Makefile b/usr.bin/iconv/Makefile
index 7e8f6e7..deab092 100644
--- a/usr.bin/iconv/Makefile
+++ b/usr.bin/iconv/Makefile
@@ -7,8 +7,6 @@ PROG= iconv
#SRCS= iconv.c
MAN= iconv.1
-WARNS?= 6
-
LDADD+= -lcrypt
DPADD+= ${LIBCRYPT}
diff --git a/usr.bin/kdump/mksubr b/usr.bin/kdump/mksubr
index afff24d..dbde92b 100644
--- a/usr.bin/kdump/mksubr
+++ b/usr.bin/kdump/mksubr
@@ -345,7 +345,7 @@ auto_if_type "sockfamilyname" "AF_[[:alnum:]]+[[:space:]]+" "sys/socket.h"
auto_if_type "sockipprotoname" "IPPROTO_[[:alnum:]]+[[:space:]]+" "netinet/in.h"
auto_switch_type "sockoptname" "SO_[A-Z]+[[:space:]]+0x[0-9]+" "sys/socket.h"
auto_switch_type "socktypename" "SOCK_[A-Z]+[[:space:]]+[1-9]+[0-9]*" "sys/socket.h"
-auto_switch_type "ptraceopname" "PT_[[:alnum:]]+[[:space:]]+[0-9]+" "sys/ptrace.h"
+auto_switch_type "ptraceopname" "PT_[[:alnum:]_]+[[:space:]]+[0-9]+" "sys/ptrace.h"
cat <<_EOF_
/*
diff --git a/usr.bin/rctl/Makefile b/usr.bin/rctl/Makefile
index 1088cf1..c5c32eb 100644
--- a/usr.bin/rctl/Makefile
+++ b/usr.bin/rctl/Makefile
@@ -6,6 +6,4 @@ MAN= rctl.8
DPADD= ${LIBUTIL}
LDADD= -lutil
-WARNS?= 6
-
.include <bsd.prog.mk>
diff --git a/usr.bin/su/su.1 b/usr.bin/su/su.1
index 8b79d41..d9180e3 100644
--- a/usr.bin/su/su.1
+++ b/usr.bin/su/su.1
@@ -28,7 +28,7 @@
.\" @(#)su.1 8.2 (Berkeley) 4/18/94
.\" $FreeBSD$
.\"
-.Dd July 1, 2008
+.Dd June 6, 2011
.Dt SU 1
.Os
.Sh NAME
@@ -193,16 +193,22 @@ PAM configuration for
.Sh EXAMPLES
.Bl -tag -width 5n -compact
.It Li "su -m man -c catman"
-Runs the command
-.Li catman
-as user
-.Li man .
+Starts a shell as user
+.Li man ,
+and runs the command
+.Li catman .
You will be asked for man's password unless your real UID is 0.
Note that the
.Fl m
option is required since user
.Dq man
does not have a valid shell by default.
+In this example,
+.Fl c
+is passed to the shell of the user
+.Dq man ,
+and is not interpreted as an argument to
+.Nm .
.It Li "su -m man -c 'catman /usr/share/man /usr/local/man'"
Same as above, but the target command consists of more than a
single word and hence is quoted for use with the
diff --git a/usr.sbin/bluetooth/ath3kfw/Makefile b/usr.sbin/bluetooth/ath3kfw/Makefile
index 0ff010f..373655b 100644
--- a/usr.sbin/bluetooth/ath3kfw/Makefile
+++ b/usr.sbin/bluetooth/ath3kfw/Makefile
@@ -2,7 +2,6 @@
PROG= ath3kfw
MAN= ath3kfw.8
-WARNS?= 6
DPADD+= ${LIBUSB}
LDADD+= -lusb
diff --git a/usr.sbin/bsdinstall/scripts/netconfig_ipv6 b/usr.sbin/bsdinstall/scripts/netconfig_ipv6
index 70bd203..8bff816 100755
--- a/usr.sbin/bsdinstall/scripts/netconfig_ipv6
+++ b/usr.sbin/bsdinstall/scripts/netconfig_ipv6
@@ -33,7 +33,6 @@
#
# TODO:
-# - Add -R /sbin/resolvconf to rtsol once support is in tree.
# - Add DHCPv6 support once FreeBSD ships with it.
#
diff --git a/usr.sbin/bsnmpd/modules/snmp_wlan/Makefile b/usr.sbin/bsnmpd/modules/snmp_wlan/Makefile
index 9b8628b..6a53d46 100644
--- a/usr.sbin/bsnmpd/modules/snmp_wlan/Makefile
+++ b/usr.sbin/bsnmpd/modules/snmp_wlan/Makefile
@@ -6,8 +6,6 @@ MOD= wlan
SRCS= wlan_snmp.c wlan_sys.c
CFLAGS+= -DSNMPTREE_TYPES
-WARNS= 6
-
XSYM= begemotWlan
BMIBS= BEGEMOT-WIRELESS-MIB.txt
diff --git a/usr.sbin/lastlogin/lastlogin.8 b/usr.sbin/lastlogin/lastlogin.8
index 0630163..fdbc871 100644
--- a/usr.sbin/lastlogin/lastlogin.8
+++ b/usr.sbin/lastlogin/lastlogin.8
@@ -31,7 +31,7 @@
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
-.Dd January 11, 1996
+.Dd June 6, 2011
.Dt LASTLOGIN 8
.Os
.Sh NAME
@@ -39,6 +39,8 @@
.Nd indicate last login time of users
.Sh SYNOPSIS
.Nm
+.Op Fl f Ar file
+.Op Fl rt
.Op Ar user ...
.Sh DESCRIPTION
The
@@ -54,8 +56,8 @@ If more than one
.Ar user
is given, the session information for each user is printed in
the order given on the command line.
-Otherwise, information
-for all users is printed, sorted by name.
+Otherwise, information for all users is printed.
+By default, the entries are sorted by user name.
.Pp
The
.Nm
@@ -63,6 +65,18 @@ utility differs from
.Xr last 1
in that it only prints information regarding the very last login session.
The last login database is never turned over or deleted in standard usage.
+.Pp
+The following options are available:
+.Bl -tag -width indent
+.It Fl f Ar file
+Open last login database
+.Ar file
+instead of the system-wide database.
+.It Fl r
+Print the entries in reverse sorted order.
+.It Fl t
+Sort the elements by last login time, instead of user name.
+.El
.Sh FILES
.Bl -tag -width /var/log/utx.lastlogin -compact
.It Pa /var/log/utx.lastlogin
diff --git a/usr.sbin/lastlogin/lastlogin.c b/usr.sbin/lastlogin/lastlogin.c
index 4c08547..2f8dd78 100644
--- a/usr.sbin/lastlogin/lastlogin.c
+++ b/usr.sbin/lastlogin/lastlogin.c
@@ -47,30 +47,59 @@ __RCSID("$NetBSD: lastlogin.c,v 1.4 1998/02/03 04:45:35 perry Exp $");
int main(int, char **);
static void output(struct utmpx *);
static void usage(void);
+static int utcmp_user(const void *, const void *);
+
+static int order = 1;
+static const char *file = NULL;
+static int (*utcmp)(const void *, const void *) = utcmp_user;
static int
-utcmp(const void *u1, const void *u2)
+utcmp_user(const void *u1, const void *u2)
{
- return (strcmp(((const struct utmpx *)u1)->ut_user,
+ return (order * strcmp(((const struct utmpx *)u1)->ut_user,
((const struct utmpx *)u2)->ut_user));
}
+static int
+utcmp_time(const void *u1, const void *u2)
+{
+ time_t t1, t2;
+
+ t1 = ((const struct utmpx *)u1)->ut_tv.tv_sec;
+ t2 = ((const struct utmpx *)u2)->ut_tv.tv_sec;
+ return (t1 < t2 ? order : t1 > t2 ? -order : 0);
+}
+
int
main(int argc, char *argv[])
{
int ch, i, ulistsize;
struct utmpx *u, *ulist;
- while ((ch = getopt(argc, argv, "")) != -1) {
- usage();
+ while ((ch = getopt(argc, argv, "f:rt")) != -1) {
+ switch (ch) {
+ case 'f':
+ file = optarg;
+ break;
+ case 'r':
+ order = -1;
+ break;
+ case 't':
+ utcmp = utcmp_time;
+ break;
+ default:
+ usage();
+ }
}
+ argc -= optind;
+ argv += optind;
- /* Process usernames given on the command line. */
- if (argc > 1) {
- for (i = 1; i < argc; ++i) {
- if (setutxdb(UTXDB_LASTLOGIN, NULL) != 0)
- errx(1, "failed to open lastlog database");
+ if (argc > 0) {
+ /* Process usernames given on the command line. */
+ for (i = 0; i < argc; i++) {
+ if (setutxdb(UTXDB_LASTLOGIN, file) != 0)
+ err(1, "failed to open lastlog database");
if ((u = getutxuser(argv[i])) == NULL) {
warnx("user '%s' not found", argv[i]);
continue;
@@ -78,11 +107,10 @@ main(int argc, char *argv[])
output(u);
endutxent();
}
- }
- /* Read all lastlog entries, looking for active ones */
- else {
- if (setutxdb(UTXDB_LASTLOGIN, NULL) != 0)
- errx(1, "failed to open lastlog database");
+ } else {
+ /* Read all lastlog entries, looking for active ones. */
+ if (setutxdb(UTXDB_LASTLOGIN, file) != 0)
+ err(1, "failed to open lastlog database");
ulist = NULL;
ulistsize = 0;
while ((u = getutxent()) != NULL) {
@@ -119,6 +147,6 @@ output(struct utmpx *u)
static void
usage(void)
{
- fprintf(stderr, "usage: lastlogin [user ...]\n");
+ fprintf(stderr, "usage: lastlogin [-f file] [-rt] [user ...]\n");
exit(1);
}
diff --git a/usr.sbin/pmccontrol/pmccontrol.c b/usr.sbin/pmccontrol/pmccontrol.c
index cce1e0e..80d4bd7 100644
--- a/usr.sbin/pmccontrol/pmccontrol.c
+++ b/usr.sbin/pmccontrol/pmccontrol.c
@@ -28,8 +28,9 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
-#include <sys/types.h>
+#include <sys/param.h>
#include <sys/queue.h>
+#include <sys/cpuset.h>
#include <sys/sysctl.h>
#include <assert.h>
@@ -133,26 +134,32 @@ pmcc_init_debug(void)
static int
pmcc_do_enable_disable(struct pmcc_op_list *op_list)
{
+ long cpusetsize;
int c, error, i, j, ncpu, npmc, t;
- cpumask_t haltedcpus, cpumask;
+ cpuset_t haltedcpus, cpumask;
struct pmcc_op *np;
unsigned char *map;
unsigned char op;
int cpu, pmc;
- size_t dummy;
+ size_t setsize;
if ((ncpu = pmc_ncpu()) < 0)
err(EX_OSERR, "Unable to determine the number of cpus");
/* Determine the set of active CPUs. */
- cpumask = (1 << ncpu) - 1;
- dummy = sizeof(int);
- haltedcpus = (cpumask_t) 0;
+ cpusetsize = sysconf(_SC_CPUSET_SIZE);
+ if (cpusetsize == -1 || (u_long)cpusetsize > sizeof(cpuset_t)) {
+ err(EX_OSERR, "ERROR: Cannot determine which CPUs are "
+ "halted");
+ }
+ CPU_ZERO(&haltedcpus);
+ setsize = (size_t)cpusetsize;
if (ncpu > 1 && sysctlbyname("machdep.hlt_cpus", &haltedcpus,
- &dummy, NULL, 0) < 0)
+ &setsize, NULL, 0) < 0)
err(EX_OSERR, "ERROR: Cannot determine which CPUs are "
"halted");
- cpumask &= ~haltedcpus;
+ CPU_FILL(&cpumask);
+ CPU_NAND(&cpumask, &haltedcpus);
/* Determine the maximum number of PMCs in any CPU. */
npmc = 0;
@@ -200,7 +207,7 @@ pmcc_do_enable_disable(struct pmcc_op_list *op_list)
if (cpu == PMCC_CPU_ALL)
for (i = 0; i < ncpu; i++) {
- if ((1 << i) & cpumask)
+ if (CPU_ISSET(i, &cpumask))
SET_PMCS(i, pmc, op);
}
else
diff --git a/usr.sbin/route6d/route6d.c b/usr.sbin/route6d/route6d.c
index 761deeb..4868829 100644
--- a/usr.sbin/route6d/route6d.c
+++ b/usr.sbin/route6d/route6d.c
@@ -106,7 +106,7 @@ static const char _rcsid[] = "$KAME: route6d.c,v 1.104 2003/10/31 00:30:20 itoju
struct ifc { /* Configuration of an interface */
char ifc_name[IFNAMSIZ]; /* if name */
- struct ifc *ifc_next;
+ TAILQ_ENTRY(ifc) ifc_next;
int ifc_index; /* if index */
int ifc_mtu; /* if mtu */
int ifc_metric; /* if metric */
@@ -120,7 +120,7 @@ struct ifc { /* Configuration of an interface */
struct ifac { /* Adddress associated to an interface */
struct ifc *ifa_conf; /* back pointer */
- struct ifac *ifa_next;
+ TAILQ_ENTRY(ifac) ifa_next;
struct in6_addr ifa_addr; /* address */
struct in6_addr ifa_raddr; /* remote address, valid in p2p */
int ifa_plen; /* prefix length */
@@ -134,8 +134,10 @@ struct iff {
struct iff *iff_next;
};
-struct ifc *ifc;
-struct iff *iff_head;
+TAILQ_HEAD(, ifc) ifc =
+ TAILQ_HEAD_INITIALIZER(ifc);
+TAILQ_HEAD(, iff) iff_head =
+ TAILQ_HEAD_INITIALIZER(iff_head);
int nifc; /* number of valid ifc's */
struct ifc **index2ifc;
unsigned int nindex2ifc;
@@ -166,7 +168,7 @@ struct rip6 *ripbuf; /* packet buffer for sending */
*/
struct riprt {
- struct riprt *rrt_next; /* next destination */
+ TAILQ_ENTRY(riprt) rrt_next; /* next destination */
struct riprt *rrt_same; /* same destination - future use */
struct netinfo6 rrt_info; /* network info */
struct in6_addr rrt_gw; /* gateway */
diff --git a/usr.sbin/rtadvd/advcap.c b/usr.sbin/rtadvd/advcap.c
index b0f5ee0..7280f40 100644
--- a/usr.sbin/rtadvd/advcap.c
+++ b/usr.sbin/rtadvd/advcap.c
@@ -81,7 +81,6 @@
static char *tbuf;
static int hopcount; /* detect infinite loops in termcap, init 0 */
-static const char *remotefile;
extern const char *conffile;
int tgetent(char *, char *);
@@ -204,7 +203,7 @@ tnchktc(void)
write(STDERR_FILENO, "Infinite tc= loop\n", 18);
return (0);
}
- if (getent(tcbuf, tcname, remotefile) != 1) {
+ if (getent(tcbuf, tcname, conffile) != 1) {
return (0);
}
for (q = tcbuf; *q++ != ':'; )
diff --git a/usr.sbin/rtadvd/config.c b/usr.sbin/rtadvd/config.c
index 4c870b9..1b48868 100644
--- a/usr.sbin/rtadvd/config.c
+++ b/usr.sbin/rtadvd/config.c
@@ -109,7 +109,7 @@ dname_labelenc(char *dst, const char *src)
/* Always need a 0-length label at the tail. */
*dst++ = '\0';
- syslog(LOG_DEBUG, "<%s> labellen = %d", __func__, dst - dst_origin);
+ syslog(LOG_DEBUG, "<%s> labellen = %td", __func__, dst - dst_origin);
return (dst - dst_origin);
}
@@ -229,13 +229,7 @@ getconfig(int idx)
__func__, intface);
}
- rai = malloc(sizeof(*rai));
- if (rai == NULL) {
- syslog(LOG_INFO, "<%s> %s: can't allocate enough memory",
- __func__, intface);
- exit(1);
- }
- memset(rai, 0, sizeof(*rai));
+ ELM_MALLOC(rai, exit(1));
TAILQ_INIT(&rai->rai_prefix);
#ifdef ROUTEINFO
TAILQ_INIT(&rai->rai_route);
@@ -394,10 +388,7 @@ getconfig(int idx)
/* allocate memory to store prefix information */
ELM_MALLOC(pfx, exit(1));
-
- /* link into chain */
- TAILQ_INSERT_TAIL(&rai->rai_prefix, pfx, pfx_next);
- rai->rai_pfxs++;
+ pfx->pfx_rainfo = rai;
pfx->pfx_origin = PREFIX_FROM_CONFIG;
if (inet_pton(AF_INET6, addr, &pfx->pfx_prefix) != 1) {
@@ -481,6 +472,9 @@ getconfig(int idx)
pfx->pfx_pltimeexpire =
now.tv_sec + pfx->pfx_preflifetime;
}
+ /* link into chain */
+ TAILQ_INSERT_TAIL(&rai->rai_prefix, pfx, pfx_next);
+ rai->rai_pfxs++;
}
if (rai->rai_advifprefix && rai->rai_pfxs == 0)
get_prefix(rai);
diff --git a/usr.sbin/rtadvd/dump.c b/usr.sbin/rtadvd/dump.c
index f79319b..fac3fb2 100644
--- a/usr.sbin/rtadvd/dump.c
+++ b/usr.sbin/rtadvd/dump.c
@@ -310,7 +310,7 @@ dname_labeldec(char *dst, size_t dlen, const char *src)
(src + len) <= src_last) {
if (dst != dst_origin)
*dst++ = '.';
- syslog(LOG_DEBUG, "<%s> labellen = %d", __func__, len);
+ syslog(LOG_DEBUG, "<%s> labellen = %zd", __func__, len);
memcpy(dst, src, len);
src += len;
dst += len;
diff --git a/usr.sbin/rtadvd/rtadvd.c b/usr.sbin/rtadvd/rtadvd.c
index 614d109..6423e7b 100644
--- a/usr.sbin/rtadvd/rtadvd.c
+++ b/usr.sbin/rtadvd/rtadvd.c
@@ -789,7 +789,7 @@ rtadvd_input(void)
#else
if ((size_t)i < sizeof(struct icmp6_hdr)) {
syslog(LOG_ERR,
- "<%s> packet size(%d) is too short",
+ "<%s> packet size(%zd) is too short",
__func__, i);
return;
}
@@ -827,7 +827,7 @@ rtadvd_input(void)
if ((size_t)i < sizeof(struct nd_router_solicit)) {
syslog(LOG_NOTICE,
"<%s> RS from %s on %s does not have enough "
- "length (len = %d)",
+ "length (len = %zd)",
__func__,
inet_ntop(AF_INET6, &rcvfrom.sin6_addr, ntopbuf,
sizeof(ntopbuf)),
@@ -873,7 +873,7 @@ rtadvd_input(void)
if ((size_t)i < sizeof(struct nd_router_advert)) {
syslog(LOG_NOTICE,
"<%s> RA from %s on %s does not have enough "
- "length (len = %d)",
+ "length (len = %zd)",
__func__,
inet_ntop(AF_INET6, &rcvfrom.sin6_addr, ntopbuf,
sizeof(ntopbuf)),
diff --git a/usr.sbin/rtsold/rtsol.c b/usr.sbin/rtsold/rtsol.c
index afa935e..fe8bfaf 100644
--- a/usr.sbin/rtsold/rtsol.c
+++ b/usr.sbin/rtsold/rtsol.c
@@ -625,8 +625,10 @@ free1:
if (!TAILQ_EMPTY(&sm_rdnss_head))
CALL_SCRIPT(RESADD, &sm_rdnss_head);
+#if 0
else
CALL_SCRIPT(RESDEL, NULL);
+#endif
ra_opt_handler_freeit:
/* Clear script message queue. */
diff --git a/usr.sbin/rtsold/rtsold.c b/usr.sbin/rtsold/rtsold.c
index a8b7ca7..4413d55 100644
--- a/usr.sbin/rtsold/rtsold.c
+++ b/usr.sbin/rtsold/rtsold.c
@@ -124,6 +124,7 @@ main(int argc, char **argv)
int maxfd;
#endif
int rtsock;
+ char *argv0;
#ifndef SMALL
/* rtsold */
@@ -134,6 +135,8 @@ main(int argc, char **argv)
fflag = 1;
once = 1;
#endif
+ argv0 = argv[0];
+
while ((ch = getopt(argc, argv, opts)) != -1) {
switch (ch) {
case 'a':
diff --git a/usr.sbin/tcpdrop/tcpdrop.c b/usr.sbin/tcpdrop/tcpdrop.c
index 6aae85c..dce6c6d 100644
--- a/usr.sbin/tcpdrop/tcpdrop.c
+++ b/usr.sbin/tcpdrop/tcpdrop.c
@@ -20,12 +20,13 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#include <sys/param.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/sysctl.h>
-#include <netinet/in.h>
+#include <netinet/in.h>
#include <netinet/in_pcb.h>
#define TCPSTATES
#include <netinet/tcp_fsm.h>
OpenPOWER on IntegriCloud