summaryrefslogtreecommitdiffstats
path: root/sys
diff options
context:
space:
mode:
Diffstat (limited to 'sys')
-rw-r--r--sys/ddb/db_ps.c9
-rw-r--r--sys/dev/mlx5/mlx5_en/en.h4
-rw-r--r--sys/dev/usb/controller/xhci.c18
-rw-r--r--sys/kern/kern_clocksource.c8
-rw-r--r--sys/kern/kern_event.c2
-rw-r--r--sys/kern/kern_fork.c1
-rw-r--r--sys/kern/kern_kthread.c1
-rw-r--r--sys/kern/kern_thr.c1
-rw-r--r--sys/kern/kern_thread.c4
-rw-r--r--sys/kern/kern_timeout.c101
-rw-r--r--sys/kern/subr_sleepqueue.c111
-rw-r--r--sys/kern/sys_generic.c4
-rw-r--r--sys/netinet/tcp_lro.c36
-rw-r--r--sys/ofed/drivers/infiniband/core/ucma.c16
-rw-r--r--sys/sys/callout.h3
-rw-r--r--sys/sys/param.h2
-rw-r--r--sys/sys/proc.h3
-rw-r--r--sys/sys/time.h2
-rw-r--r--sys/vm/vm_pageout.c25
19 files changed, 208 insertions, 143 deletions
diff --git a/sys/ddb/db_ps.c b/sys/ddb/db_ps.c
index efbc8dc..e030f47 100644
--- a/sys/ddb/db_ps.c
+++ b/sys/ddb/db_ps.c
@@ -371,8 +371,13 @@ DB_SHOW_COMMAND(thread, db_show_thread)
db_printf(" lock: %s turnstile: %p\n", td->td_lockname,
td->td_blocked);
if (TD_ON_SLEEPQ(td))
- db_printf(" wmesg: %s wchan: %p\n", td->td_wmesg,
- td->td_wchan);
+ db_printf(
+ " wmesg: %s wchan: %p sleeptimo %lx. %jx (curr %lx. %jx)\n",
+ td->td_wmesg, td->td_wchan,
+ (long)sbttobt(td->td_sleeptimo).sec,
+ (uintmax_t)sbttobt(td->td_sleeptimo).frac,
+ (long)sbttobt(sbinuptime()).sec,
+ (uintmax_t)sbttobt(sbinuptime()).frac);
db_printf(" priority: %d\n", td->td_priority);
db_printf(" container lock: %s (%p)\n", lock->lo_name, lock);
}
diff --git a/sys/dev/mlx5/mlx5_en/en.h b/sys/dev/mlx5/mlx5_en/en.h
index c4415c1..2573f15 100644
--- a/sys/dev/mlx5/mlx5_en/en.h
+++ b/sys/dev/mlx5/mlx5_en/en.h
@@ -76,11 +76,11 @@
#define MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE 0x7
#define MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE 0xa
-#define MLX5E_PARAMS_MAXIMUM_LOG_SQ_SIZE 0xd
+#define MLX5E_PARAMS_MAXIMUM_LOG_SQ_SIZE 0xe
#define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE 0x7
#define MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE 0xa
-#define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE 0xd
+#define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE 0xe
/* freeBSD HW LRO is limited by 16KB - the size of max mbuf */
#define MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ MJUM16BYTES
diff --git a/sys/dev/usb/controller/xhci.c b/sys/dev/usb/controller/xhci.c
index 0b708b8..2376cee2 100644
--- a/sys/dev/usb/controller/xhci.c
+++ b/sys/dev/usb/controller/xhci.c
@@ -2227,7 +2227,11 @@ xhci_setup_generic_chain(struct usb_xfer *xfer)
* Send a DATA1 message and invert the current
* endpoint direction.
*/
+#ifdef XHCI_STEP_STATUS_STAGE
temp.step_td = (xfer->nframes != 0);
+#else
+ temp.step_td = 0;
+#endif
temp.direction = UE_GET_DIR(xfer->endpointno) ^ UE_DIR_IN;
temp.len = 0;
temp.pc = NULL;
@@ -3873,12 +3877,10 @@ xhci_configure_reset_endpoint(struct usb_xfer *xfer)
xhci_configure_mask(udev, (1U << epno) | 1U, 0);
- err = xhci_cmd_evaluate_ctx(sc, buf_inp.physaddr, index);
-
- if (err != 0)
- DPRINTF("Could not configure endpoint %u\n", epno);
-
- err = xhci_cmd_configure_ep(sc, buf_inp.physaddr, 0, index);
+ if (epno > 1)
+ err = xhci_cmd_configure_ep(sc, buf_inp.physaddr, 0, index);
+ else
+ err = xhci_cmd_evaluate_ctx(sc, buf_inp.physaddr, index);
if (err != 0)
DPRINTF("Could not configure endpoint %u\n", epno);
@@ -4261,6 +4263,10 @@ xhci_device_state_change(struct usb_device *udev)
sc->sc_hw.devs[index].state = XHCI_ST_ADDRESSED;
+ /* set configure mask to slot only */
+ xhci_configure_mask(udev, 1, 0);
+
+ /* deconfigure all endpoints, except EP0 */
err = xhci_cmd_configure_ep(sc, 0, 1, index);
if (err) {
diff --git a/sys/kern/kern_clocksource.c b/sys/kern/kern_clocksource.c
index 49ac7bf..fc4ca83a 100644
--- a/sys/kern/kern_clocksource.c
+++ b/sys/kern/kern_clocksource.c
@@ -211,7 +211,7 @@ handleevents(sbintime_t now, int fake)
} else
state->nextprof = state->nextstat;
if (now >= state->nextcallopt) {
- state->nextcall = state->nextcallopt = INT64_MAX;
+ state->nextcall = state->nextcallopt = SBT_MAX;
callout_process(now);
}
@@ -492,7 +492,7 @@ configtimer(int start)
state = DPCPU_ID_PTR(cpu, timerstate);
state->now = now;
if (!smp_started && cpu != CPU_FIRST())
- state->nextevent = INT64_MAX;
+ state->nextevent = SBT_MAX;
else
state->nextevent = next;
if (periodic)
@@ -580,8 +580,8 @@ cpu_initclocks_bsp(void)
CPU_FOREACH(cpu) {
state = DPCPU_ID_PTR(cpu, timerstate);
mtx_init(&state->et_hw_mtx, "et_hw_mtx", NULL, MTX_SPIN);
- state->nextcall = INT64_MAX;
- state->nextcallopt = INT64_MAX;
+ state->nextcall = SBT_MAX;
+ state->nextcallopt = SBT_MAX;
}
periodic = want_periodic;
/* Grab requested timer or the best of present. */
diff --git a/sys/kern/kern_event.c b/sys/kern/kern_event.c
index e4bf40b..fba163f 100644
--- a/sys/kern/kern_event.c
+++ b/sys/kern/kern_event.c
@@ -1471,7 +1471,7 @@ kqueue_scan(struct kqueue *kq, int maxevents, struct kevent_copyops *k_ops,
rsbt = tstosbt(*tsp);
if (TIMESEL(&asbt, rsbt))
asbt += tc_tick_sbt;
- if (asbt <= INT64_MAX - rsbt)
+ if (asbt <= SBT_MAX - rsbt)
asbt += rsbt;
else
asbt = 0;
diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c
index 8fa6bcd..7877fab 100644
--- a/sys/kern/kern_fork.c
+++ b/sys/kern/kern_fork.c
@@ -470,6 +470,7 @@ do_fork(struct thread *td, int flags, struct proc *p2, struct thread *td2,
bzero(&td2->td_startzero,
__rangeof(struct thread, td_startzero, td_endzero));
td2->td_su = NULL;
+ td2->td_sleeptimo = 0;
bcopy(&td->td_startcopy, &td2->td_startcopy,
__rangeof(struct thread, td_startcopy, td_endcopy));
diff --git a/sys/kern/kern_kthread.c b/sys/kern/kern_kthread.c
index fb46025..4c77f4f 100644
--- a/sys/kern/kern_kthread.c
+++ b/sys/kern/kern_kthread.c
@@ -272,6 +272,7 @@ kthread_add(void (*func)(void *), void *arg, struct proc *p,
bzero(&newtd->td_startzero,
__rangeof(struct thread, td_startzero, td_endzero));
newtd->td_su = NULL;
+ newtd->td_sleeptimo = 0;
bcopy(&oldtd->td_startcopy, &newtd->td_startcopy,
__rangeof(struct thread, td_startcopy, td_endcopy));
diff --git a/sys/kern/kern_thr.c b/sys/kern/kern_thr.c
index b01aecb..74050ff 100644
--- a/sys/kern/kern_thr.c
+++ b/sys/kern/kern_thr.c
@@ -229,6 +229,7 @@ thread_create(struct thread *td, struct rtprio *rtp,
bzero(&newtd->td_startzero,
__rangeof(struct thread, td_startzero, td_endzero));
newtd->td_su = NULL;
+ newtd->td_sleeptimo = 0;
bcopy(&td->td_startcopy, &newtd->td_startcopy,
__rangeof(struct thread, td_startcopy, td_endcopy));
newtd->td_proc = td->td_proc;
diff --git a/sys/kern/kern_thread.c b/sys/kern/kern_thread.c
index 2f8382c..7b04d96 100644
--- a/sys/kern/kern_thread.c
+++ b/sys/kern/kern_thread.c
@@ -319,7 +319,7 @@ thread_reap(void)
/*
* Don't even bother to lock if none at this instant,
- * we really don't care about the next instant..
+ * we really don't care about the next instant.
*/
if (!TAILQ_EMPTY(&zombie_threads)) {
mtx_lock_spin(&zombie_lock);
@@ -383,6 +383,7 @@ thread_free(struct thread *td)
cpu_thread_free(td);
if (td->td_kstack != 0)
vm_thread_dispose(td);
+ callout_drain(&td->td_slpcallout);
uma_zfree(thread_zone, td);
}
@@ -524,6 +525,7 @@ thread_wait(struct proc *p)
td->td_cpuset = NULL;
cpu_thread_clean(td);
crfree(td->td_ucred);
+ callout_drain(&td->td_slpcallout);
thread_reap(); /* check for zombie threads etc. */
}
diff --git a/sys/kern/kern_timeout.c b/sys/kern/kern_timeout.c
index 9aa11ba..7e5aab7 100644
--- a/sys/kern/kern_timeout.c
+++ b/sys/kern/kern_timeout.c
@@ -296,7 +296,7 @@ callout_cpu_init(struct callout_cpu *cc, int cpu)
for (i = 0; i < callwheelsize; i++)
LIST_INIT(&cc->cc_callwheel[i]);
TAILQ_INIT(&cc->cc_expireq);
- cc->cc_firstevent = INT64_MAX;
+ cc->cc_firstevent = SBT_MAX;
for (i = 0; i < 2; i++)
cc_cce_cleanup(cc, i);
snprintf(cc->cc_ktr_event_name, sizeof(cc->cc_ktr_event_name),
@@ -569,8 +569,8 @@ callout_cc_add(struct callout *c, struct callout_cpu *cc,
* Inform the eventtimers(4) subsystem there's a new callout
* that has been inserted, but only if really required.
*/
- if (INT64_MAX - c->c_time < c->c_precision)
- c->c_precision = INT64_MAX - c->c_time;
+ if (SBT_MAX - c->c_time < c->c_precision)
+ c->c_precision = SBT_MAX - c->c_time;
sbt = c->c_time + c->c_precision;
if (sbt < cc->cc_firstevent) {
cc->cc_firstevent = sbt;
@@ -896,6 +896,56 @@ callout_handle_init(struct callout_handle *handle)
handle->callout = NULL;
}
+void
+callout_when(sbintime_t sbt, sbintime_t precision, int flags,
+ sbintime_t *res, sbintime_t *prec_res)
+{
+ sbintime_t to_sbt, to_pr;
+
+ if ((flags & (C_ABSOLUTE | C_PRECALC)) != 0) {
+ *res = sbt;
+ *prec_res = precision;
+ return;
+ }
+ if ((flags & C_HARDCLOCK) != 0 && sbt < tick_sbt)
+ sbt = tick_sbt;
+ if ((flags & C_HARDCLOCK) != 0 ||
+#ifdef NO_EVENTTIMERS
+ sbt >= sbt_timethreshold) {
+ to_sbt = getsbinuptime();
+
+ /* Add safety belt for the case of hz > 1000. */
+ to_sbt += tc_tick_sbt - tick_sbt;
+#else
+ sbt >= sbt_tickthreshold) {
+ /*
+ * Obtain the time of the last hardclock() call on
+ * this CPU directly from the kern_clocksource.c.
+ * This value is per-CPU, but it is equal for all
+ * active ones.
+ */
+#ifdef __LP64__
+ to_sbt = DPCPU_GET(hardclocktime);
+#else
+ spinlock_enter();
+ to_sbt = DPCPU_GET(hardclocktime);
+ spinlock_exit();
+#endif
+#endif
+ if ((flags & C_HARDCLOCK) == 0)
+ to_sbt += tick_sbt;
+ } else
+ to_sbt = sbinuptime();
+ if (SBT_MAX - to_sbt < sbt)
+ to_sbt = SBT_MAX;
+ else
+ to_sbt += sbt;
+ *res = to_sbt;
+ to_pr = ((C_PRELGET(flags) < 0) ? sbt >> tc_precexp :
+ sbt >> C_PRELGET(flags));
+ *prec_res = to_pr > precision ? to_pr : precision;
+}
+
/*
* New interface; clients allocate their own callout structures.
*
@@ -913,10 +963,10 @@ callout_handle_init(struct callout_handle *handle)
* callout_deactivate() - marks the callout as having been serviced
*/
int
-callout_reset_sbt_on(struct callout *c, sbintime_t sbt, sbintime_t precision,
+callout_reset_sbt_on(struct callout *c, sbintime_t sbt, sbintime_t prec,
void (*ftn)(void *), void *arg, int cpu, int flags)
{
- sbintime_t to_sbt, pr;
+ sbintime_t to_sbt, precision;
struct callout_cpu *cc;
int cancelled, direct;
int ignore_cpu=0;
@@ -929,47 +979,8 @@ callout_reset_sbt_on(struct callout *c, sbintime_t sbt, sbintime_t precision,
/* Invalid CPU spec */
panic("Invalid CPU in callout %d", cpu);
}
- if (flags & C_ABSOLUTE) {
- to_sbt = sbt;
- } else {
- if ((flags & C_HARDCLOCK) && (sbt < tick_sbt))
- sbt = tick_sbt;
- if ((flags & C_HARDCLOCK) ||
-#ifdef NO_EVENTTIMERS
- sbt >= sbt_timethreshold) {
- to_sbt = getsbinuptime();
+ callout_when(sbt, prec, flags, &to_sbt, &precision);
- /* Add safety belt for the case of hz > 1000. */
- to_sbt += tc_tick_sbt - tick_sbt;
-#else
- sbt >= sbt_tickthreshold) {
- /*
- * Obtain the time of the last hardclock() call on
- * this CPU directly from the kern_clocksource.c.
- * This value is per-CPU, but it is equal for all
- * active ones.
- */
-#ifdef __LP64__
- to_sbt = DPCPU_GET(hardclocktime);
-#else
- spinlock_enter();
- to_sbt = DPCPU_GET(hardclocktime);
- spinlock_exit();
-#endif
-#endif
- if ((flags & C_HARDCLOCK) == 0)
- to_sbt += tick_sbt;
- } else
- to_sbt = sbinuptime();
- if (INT64_MAX - to_sbt < sbt)
- to_sbt = INT64_MAX;
- else
- to_sbt += sbt;
- pr = ((C_PRELGET(flags) < 0) ? sbt >> tc_precexp :
- sbt >> C_PRELGET(flags));
- if (pr > precision)
- precision = pr;
- }
/*
* This flag used to be added by callout_cc_add, but the
* first time you call this we could end up with the
diff --git a/sys/kern/subr_sleepqueue.c b/sys/kern/subr_sleepqueue.c
index c490460..4941b47 100644
--- a/sys/kern/subr_sleepqueue.c
+++ b/sys/kern/subr_sleepqueue.c
@@ -361,6 +361,7 @@ sleepq_set_timeout_sbt(void *wchan, sbintime_t sbt, sbintime_t pr,
{
struct sleepqueue_chain *sc;
struct thread *td;
+ sbintime_t pr1;
td = curthread;
sc = SC_LOOKUP(wchan);
@@ -368,8 +369,14 @@ sleepq_set_timeout_sbt(void *wchan, sbintime_t sbt, sbintime_t pr,
MPASS(TD_ON_SLEEPQ(td));
MPASS(td->td_sleepqueue == NULL);
MPASS(wchan != NULL);
- callout_reset_sbt_on(&td->td_slpcallout, sbt, pr,
- sleepq_timeout, td, PCPU_GET(cpuid), flags | C_DIRECT_EXEC);
+ KASSERT(td->td_sleeptimo == 0, ("td %d %p td_sleeptimo %jx",
+ td->td_tid, td, (uintmax_t)td->td_sleeptimo));
+ thread_lock(td);
+ callout_when(sbt, pr, flags, &td->td_sleeptimo, &pr1);
+ thread_unlock(td);
+ callout_reset_sbt_on(&td->td_slpcallout, td->td_sleeptimo, pr1,
+ sleepq_timeout, td, PCPU_GET(cpuid), flags | C_PRECALC |
+ C_DIRECT_EXEC);
}
/*
@@ -548,37 +555,36 @@ static int
sleepq_check_timeout(void)
{
struct thread *td;
+ int res;
td = curthread;
THREAD_LOCK_ASSERT(td, MA_OWNED);
/*
- * If TDF_TIMEOUT is set, we timed out.
+ * If TDF_TIMEOUT is set, we timed out. But recheck
+ * td_sleeptimo anyway.
*/
- if (td->td_flags & TDF_TIMEOUT) {
- td->td_flags &= ~TDF_TIMEOUT;
- return (EWOULDBLOCK);
+ res = 0;
+ if (td->td_sleeptimo != 0) {
+ if (td->td_sleeptimo <= sbinuptime())
+ res = EWOULDBLOCK;
+ td->td_sleeptimo = 0;
}
-
- /*
- * If TDF_TIMOFAIL is set, the timeout ran after we had
- * already been woken up.
- */
- if (td->td_flags & TDF_TIMOFAIL)
- td->td_flags &= ~TDF_TIMOFAIL;
-
- /*
- * If callout_stop() fails, then the timeout is running on
- * another CPU, so synchronize with it to avoid having it
- * accidentally wake up a subsequent sleep.
- */
- else if (_callout_stop_safe(&td->td_slpcallout, CS_MIGRBLOCK)
- == 0) {
- td->td_flags |= TDF_TIMEOUT;
- TD_SET_SLEEPING(td);
- mi_switch(SW_INVOL | SWT_SLEEPQTIMO, NULL);
- }
- return (0);
+ if (td->td_flags & TDF_TIMEOUT)
+ td->td_flags &= ~TDF_TIMEOUT;
+ else
+ /*
+ * We ignore the situation where timeout subsystem was
+ * unable to stop our callout. The struct thread is
+ * type-stable, the callout will use the correct
+ * memory when running. The checks of the
+ * td_sleeptimo value in this function and in
+ * sleepq_timeout() ensure that the thread does not
+ * get spurious wakeups, even if the callout was reset
+ * or thread reused.
+ */
+ callout_stop(&td->td_slpcallout);
+ return (res);
}
/*
@@ -887,12 +893,17 @@ sleepq_timeout(void *arg)
CTR3(KTR_PROC, "sleepq_timeout: thread %p (pid %ld, %s)",
(void *)td, (long)td->td_proc->p_pid, (void *)td->td_name);
- /*
- * First, see if the thread is asleep and get the wait channel if
- * it is.
- */
thread_lock(td);
- if (TD_IS_SLEEPING(td) && TD_ON_SLEEPQ(td)) {
+
+ if (td->td_sleeptimo > sbinuptime() || td->td_sleeptimo == 0) {
+ /*
+ * The thread does not want a timeout (yet).
+ */
+ } else if (TD_IS_SLEEPING(td) && TD_ON_SLEEPQ(td)) {
+ /*
+ * See if the thread is asleep and get the wait
+ * channel if it is.
+ */
wchan = td->td_wchan;
sc = SC_LOOKUP(wchan);
THREAD_LOCKPTR_ASSERT(td, &sc->sc_lock);
@@ -900,40 +911,16 @@ sleepq_timeout(void *arg)
MPASS(sq != NULL);
td->td_flags |= TDF_TIMEOUT;
wakeup_swapper = sleepq_resume_thread(sq, td, 0);
- thread_unlock(td);
- if (wakeup_swapper)
- kick_proc0();
- return;
- }
-
- /*
- * If the thread is on the SLEEPQ but isn't sleeping yet, it
- * can either be on another CPU in between sleepq_add() and
- * one of the sleepq_*wait*() routines or it can be in
- * sleepq_catch_signals().
- */
- if (TD_ON_SLEEPQ(td)) {
+ } else if (TD_ON_SLEEPQ(td)) {
+ /*
+ * If the thread is on the SLEEPQ but isn't sleeping
+ * yet, it can either be on another CPU in between
+ * sleepq_add() and one of the sleepq_*wait*()
+ * routines or it can be in sleepq_catch_signals().
+ */
td->td_flags |= TDF_TIMEOUT;
- thread_unlock(td);
- return;
}
- /*
- * Now check for the edge cases. First, if TDF_TIMEOUT is set,
- * then the other thread has already yielded to us, so clear
- * the flag and resume it. If TDF_TIMEOUT is not set, then the
- * we know that the other thread is not on a sleep queue, but it
- * hasn't resumed execution yet. In that case, set TDF_TIMOFAIL
- * to let it know that the timeout has already run and doesn't
- * need to be canceled.
- */
- if (td->td_flags & TDF_TIMEOUT) {
- MPASS(TD_IS_SLEEPING(td));
- td->td_flags &= ~TDF_TIMEOUT;
- TD_CLR_SLEEPING(td);
- wakeup_swapper = setrunnable(td);
- } else
- td->td_flags |= TDF_TIMOFAIL;
thread_unlock(td);
if (wakeup_swapper)
kick_proc0();
diff --git a/sys/kern/sys_generic.c b/sys/kern/sys_generic.c
index 033bbf0..96ec1d1 100644
--- a/sys/kern/sys_generic.c
+++ b/sys/kern/sys_generic.c
@@ -1089,7 +1089,7 @@ kern_select(struct thread *td, int nd, fd_set *fd_in, fd_set *fd_ou,
precision >>= tc_precexp;
if (TIMESEL(&asbt, rsbt))
asbt += tc_tick_sbt;
- if (asbt <= INT64_MAX - rsbt)
+ if (asbt <= SBT_MAX - rsbt)
asbt += rsbt;
else
asbt = -1;
@@ -1626,7 +1626,7 @@ selsocket(struct socket *so, int events, struct timeval *tvp, struct thread *td)
precision >>= tc_precexp;
if (TIMESEL(&asbt, rsbt))
asbt += tc_tick_sbt;
- if (asbt <= INT64_MAX - rsbt)
+ if (asbt <= SBT_MAX - rsbt)
asbt += rsbt;
else
asbt = -1;
diff --git a/sys/netinet/tcp_lro.c b/sys/netinet/tcp_lro.c
index a290aae..761d8f3 100644
--- a/sys/netinet/tcp_lro.c
+++ b/sys/netinet/tcp_lro.c
@@ -382,6 +382,7 @@ tcp_lro_rx(struct lro_ctrl *lc, struct mbuf *m, uint32_t csum)
tcp_seq seq;
int error, ip_len, l;
uint16_t eh_type, tcp_data_len;
+ int force_flush = 0;
/* We expect a contiguous header [eh, ip, tcp]. */
@@ -448,8 +449,15 @@ tcp_lro_rx(struct lro_ctrl *lc, struct mbuf *m, uint32_t csum)
* Check TCP header constraints.
*/
/* Ensure no bits set besides ACK or PSH. */
- if ((th->th_flags & ~(TH_ACK | TH_PUSH)) != 0)
- return (TCP_LRO_CANNOT);
+ if ((th->th_flags & ~(TH_ACK | TH_PUSH)) != 0) {
+ if (th->th_flags & TH_SYN)
+ return (TCP_LRO_CANNOT);
+ /*
+ * Make sure that previously seen segements/ACKs are delivered
+ * before this segement, e.g. FIN.
+ */
+ force_flush = 1;
+ }
/* XXX-BZ We lose a ACK|PUSH flag concatenating multiple segments. */
/* XXX-BZ Ideally we'd flush on PUSH? */
@@ -465,8 +473,13 @@ tcp_lro_rx(struct lro_ctrl *lc, struct mbuf *m, uint32_t csum)
ts_ptr = (uint32_t *)(th + 1);
if (l != 0 && (__predict_false(l != TCPOLEN_TSTAMP_APPA) ||
(*ts_ptr != ntohl(TCPOPT_NOP<<24|TCPOPT_NOP<<16|
- TCPOPT_TIMESTAMP<<8|TCPOLEN_TIMESTAMP))))
- return (TCP_LRO_CANNOT);
+ TCPOPT_TIMESTAMP<<8|TCPOLEN_TIMESTAMP)))) {
+ /*
+ * Make sure that previously seen segements/ACKs are delivered
+ * before this segement.
+ */
+ force_flush = 1;
+ }
/* If the driver did not pass in the checksum, set it now. */
if (csum == 0x0000)
@@ -500,6 +513,13 @@ tcp_lro_rx(struct lro_ctrl *lc, struct mbuf *m, uint32_t csum)
#endif
}
+ if (force_flush) {
+ /* Timestamps mismatch; this is a FIN, etc */
+ SLIST_REMOVE(&lc->lro_active, le, lro_entry, next);
+ tcp_lro_flush(lc, le);
+ return (TCP_LRO_CANNOT);
+ }
+
/* Flush now if appending will result in overflow. */
if (le->p_len > (65535 - tcp_data_len)) {
SLIST_REMOVE(&lc->lro_active, le, lro_entry, next);
@@ -568,6 +588,14 @@ tcp_lro_rx(struct lro_ctrl *lc, struct mbuf *m, uint32_t csum)
return (0);
}
+ if (force_flush) {
+ /*
+ * Nothing to flush, but this segment can not be further
+ * aggregated/delayed.
+ */
+ return (TCP_LRO_CANNOT);
+ }
+
/* Try to find an empty slot. */
if (SLIST_EMPTY(&lc->lro_free))
return (TCP_LRO_NO_ENTRIES);
diff --git a/sys/ofed/drivers/infiniband/core/ucma.c b/sys/ofed/drivers/infiniband/core/ucma.c
index 23cbf7b..3c7c751 100644
--- a/sys/ofed/drivers/infiniband/core/ucma.c
+++ b/sys/ofed/drivers/infiniband/core/ucma.c
@@ -39,6 +39,8 @@
#include <linux/in6.h>
#include <linux/miscdevice.h>
+#include <sys/filio.h>
+
#include <rdma/rdma_user_cm.h>
#include <rdma/ib_marshall.h>
#include <rdma/rdma_cm.h>
@@ -1285,11 +1287,25 @@ static int ucma_close(struct inode *inode, struct file *filp)
return 0;
}
+static long
+ucma_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+{
+
+ switch (cmd) {
+ case FIONBIO:
+ case FIOASYNC:
+ return (0);
+ default:
+ return (-ENOTTY);
+ }
+}
+
static const struct file_operations ucma_fops = {
.owner = THIS_MODULE,
.open = ucma_open,
.release = ucma_close,
.write = ucma_write,
+ .unlocked_ioctl = ucma_ioctl,
.poll = ucma_poll,
};
diff --git a/sys/sys/callout.h b/sys/sys/callout.h
index d3f2bca..4e86b16 100644
--- a/sys/sys/callout.h
+++ b/sys/sys/callout.h
@@ -57,6 +57,7 @@
#define C_PRELGET(x) (int)((((x) >> 1) & C_PRELRANGE) - 1)
#define C_HARDCLOCK 0x0100 /* align to hardclock() calls */
#define C_ABSOLUTE 0x0200 /* event time is absolute. */
+#define C_PRECALC 0x0400 /* event time is pre-calculated. */
struct callout_handle {
struct callout *callout;
@@ -129,6 +130,8 @@ int callout_schedule_on(struct callout *, int, int);
int _callout_stop_safe(struct callout *, int);
void callout_process(sbintime_t now);
+void callout_when(sbintime_t sbt, sbintime_t precision, int flags,
+ sbintime_t *sbt_res, sbintime_t *prec_res);
#endif
#endif /* _SYS_CALLOUT_H_ */
diff --git a/sys/sys/param.h b/sys/sys/param.h
index 07f69c6..4bd9584 100644
--- a/sys/sys/param.h
+++ b/sys/sys/param.h
@@ -58,7 +58,7 @@
* in the range 5 to 9.
*/
#undef __FreeBSD_version
-#define __FreeBSD_version 1003506 /* Master, propagated to newvers */
+#define __FreeBSD_version 1003507 /* Master, propagated to newvers */
/*
* __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD,
diff --git a/sys/sys/proc.h b/sys/sys/proc.h
index 59c75c5..6b0c924 100644
--- a/sys/sys/proc.h
+++ b/sys/sys/proc.h
@@ -325,6 +325,7 @@ struct thread {
u_int td_dbg_sc_code; /* (c) Syscall code to debugger. */
u_int td_dbg_sc_narg; /* (c) Syscall arg count to debugger.*/
void *td_emuldata; /* Emulator state data */
+ sbintime_t td_sleeptimo; /* (t) Sleep timeout. */
};
struct mtx *thread_lock_block(struct thread *);
@@ -364,7 +365,7 @@ do { \
#define TDF_ALLPROCSUSP 0x00000200 /* suspended by SINGLE_ALLPROC */
#define TDF_BOUNDARY 0x00000400 /* Thread suspended at user boundary */
#define TDF_ASTPENDING 0x00000800 /* Thread has some asynchronous events. */
-#define TDF_TIMOFAIL 0x00001000 /* Timeout from sleep after we were awake. */
+#define TDF_UNUSED12 0x00001000 /* --available-- */
#define TDF_SBDRY 0x00002000 /* Stop only on usermode boundary. */
#define TDF_UPIBLOCKED 0x00004000 /* Thread blocked on user PI mutex. */
#define TDF_NEEDSUSPCHK 0x00008000 /* Thread may need to suspend. */
diff --git a/sys/sys/time.h b/sys/sys/time.h
index b589527..1f0a530 100644
--- a/sys/sys/time.h
+++ b/sys/sys/time.h
@@ -129,7 +129,7 @@ bintime_shift(struct bintime *_bt, int _exp)
#define SBT_1MS (SBT_1S / 1000)
#define SBT_1US (SBT_1S / 1000000)
#define SBT_1NS (SBT_1S / 1000000000)
-#define SBT_MAX 0x7fffffffffffffff
+#define SBT_MAX 0x7fffffffffffffffLL
static __inline int
sbintime_getsec(sbintime_t _sbt)
diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c
index c9c1271..233bb99 100644
--- a/sys/vm/vm_pageout.c
+++ b/sys/vm/vm_pageout.c
@@ -1399,15 +1399,13 @@ relock_queue:
/*
* Scan the active queue for pages that can be deactivated. Update
* the per-page activity counter and use it to identify deactivation
- * candidates.
+ * candidates. Held pages may be deactivated.
*/
for (m = TAILQ_FIRST(&pq->pq_pl), scanned = 0; m != NULL && (scanned <
min_scan || (page_shortage > 0 && scanned < maxscan)); m = next,
scanned++) {
-
KASSERT(m->queue == PQ_ACTIVE,
("vm_pageout_scan: page %p isn't active", m));
-
next = TAILQ_NEXT(m, plinks.q);
if ((m->flags & PG_MARKER) != 0)
continue;
@@ -1421,8 +1419,8 @@ relock_queue:
}
/*
- * The count for pagedaemon pages is done after checking the
- * page for eligibility...
+ * The count for page daemon pages is updated after checking
+ * the page for eligibility.
*/
PCPU_INC(cnt.v_pdpages);
@@ -1435,12 +1433,17 @@ relock_queue:
act_delta += 1;
}
/*
- * Unlocked object ref count check. Two races are possible.
- * 1) The ref was transitioning to zero and we saw non-zero,
- * the pmap bits will be checked unnecessarily.
- * 2) The ref was transitioning to one and we saw zero.
- * The page lock prevents a new reference to this page so
- * we need not check the reference bits.
+ * Perform an unsynchronized object ref count check. While
+ * the page lock ensures that the page is not reallocated to
+ * another object, in particular, one with unmanaged mappings
+ * that cannot support pmap_ts_referenced(), two races are,
+ * nonetheless, possible:
+ * 1) The count was transitioning to zero, but we saw a non-
+ * zero value. pmap_ts_referenced() will return zero
+ * because the page is not mapped.
+ * 2) The count was transitioning to one, but we saw zero.
+ * This race delays the detection of a new reference. At
+ * worst, we will deactivate and reactivate the page.
*/
if (m->object->ref_count != 0)
act_delta += pmap_ts_referenced(m);
OpenPOWER on IntegriCloud