diff options
Diffstat (limited to 'sys')
-rw-r--r-- | sys/ddb/db_ps.c | 9 | ||||
-rw-r--r-- | sys/dev/mlx5/mlx5_en/en.h | 4 | ||||
-rw-r--r-- | sys/dev/usb/controller/xhci.c | 18 | ||||
-rw-r--r-- | sys/kern/kern_clocksource.c | 8 | ||||
-rw-r--r-- | sys/kern/kern_event.c | 2 | ||||
-rw-r--r-- | sys/kern/kern_fork.c | 1 | ||||
-rw-r--r-- | sys/kern/kern_kthread.c | 1 | ||||
-rw-r--r-- | sys/kern/kern_thr.c | 1 | ||||
-rw-r--r-- | sys/kern/kern_thread.c | 4 | ||||
-rw-r--r-- | sys/kern/kern_timeout.c | 101 | ||||
-rw-r--r-- | sys/kern/subr_sleepqueue.c | 111 | ||||
-rw-r--r-- | sys/kern/sys_generic.c | 4 | ||||
-rw-r--r-- | sys/netinet/tcp_lro.c | 36 | ||||
-rw-r--r-- | sys/ofed/drivers/infiniband/core/ucma.c | 16 | ||||
-rw-r--r-- | sys/sys/callout.h | 3 | ||||
-rw-r--r-- | sys/sys/param.h | 2 | ||||
-rw-r--r-- | sys/sys/proc.h | 3 | ||||
-rw-r--r-- | sys/sys/time.h | 2 | ||||
-rw-r--r-- | sys/vm/vm_pageout.c | 25 |
19 files changed, 208 insertions, 143 deletions
diff --git a/sys/ddb/db_ps.c b/sys/ddb/db_ps.c index efbc8dc..e030f47 100644 --- a/sys/ddb/db_ps.c +++ b/sys/ddb/db_ps.c @@ -371,8 +371,13 @@ DB_SHOW_COMMAND(thread, db_show_thread) db_printf(" lock: %s turnstile: %p\n", td->td_lockname, td->td_blocked); if (TD_ON_SLEEPQ(td)) - db_printf(" wmesg: %s wchan: %p\n", td->td_wmesg, - td->td_wchan); + db_printf( + " wmesg: %s wchan: %p sleeptimo %lx. %jx (curr %lx. %jx)\n", + td->td_wmesg, td->td_wchan, + (long)sbttobt(td->td_sleeptimo).sec, + (uintmax_t)sbttobt(td->td_sleeptimo).frac, + (long)sbttobt(sbinuptime()).sec, + (uintmax_t)sbttobt(sbinuptime()).frac); db_printf(" priority: %d\n", td->td_priority); db_printf(" container lock: %s (%p)\n", lock->lo_name, lock); } diff --git a/sys/dev/mlx5/mlx5_en/en.h b/sys/dev/mlx5/mlx5_en/en.h index c4415c1..2573f15 100644 --- a/sys/dev/mlx5/mlx5_en/en.h +++ b/sys/dev/mlx5/mlx5_en/en.h @@ -76,11 +76,11 @@ #define MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE 0x7 #define MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE 0xa -#define MLX5E_PARAMS_MAXIMUM_LOG_SQ_SIZE 0xd +#define MLX5E_PARAMS_MAXIMUM_LOG_SQ_SIZE 0xe #define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE 0x7 #define MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE 0xa -#define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE 0xd +#define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE 0xe /* freeBSD HW LRO is limited by 16KB - the size of max mbuf */ #define MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ MJUM16BYTES diff --git a/sys/dev/usb/controller/xhci.c b/sys/dev/usb/controller/xhci.c index 0b708b8..2376cee2 100644 --- a/sys/dev/usb/controller/xhci.c +++ b/sys/dev/usb/controller/xhci.c @@ -2227,7 +2227,11 @@ xhci_setup_generic_chain(struct usb_xfer *xfer) * Send a DATA1 message and invert the current * endpoint direction. */ +#ifdef XHCI_STEP_STATUS_STAGE temp.step_td = (xfer->nframes != 0); +#else + temp.step_td = 0; +#endif temp.direction = UE_GET_DIR(xfer->endpointno) ^ UE_DIR_IN; temp.len = 0; temp.pc = NULL; @@ -3873,12 +3877,10 @@ xhci_configure_reset_endpoint(struct usb_xfer *xfer) xhci_configure_mask(udev, (1U << epno) | 1U, 0); - err = xhci_cmd_evaluate_ctx(sc, buf_inp.physaddr, index); - - if (err != 0) - DPRINTF("Could not configure endpoint %u\n", epno); - - err = xhci_cmd_configure_ep(sc, buf_inp.physaddr, 0, index); + if (epno > 1) + err = xhci_cmd_configure_ep(sc, buf_inp.physaddr, 0, index); + else + err = xhci_cmd_evaluate_ctx(sc, buf_inp.physaddr, index); if (err != 0) DPRINTF("Could not configure endpoint %u\n", epno); @@ -4261,6 +4263,10 @@ xhci_device_state_change(struct usb_device *udev) sc->sc_hw.devs[index].state = XHCI_ST_ADDRESSED; + /* set configure mask to slot only */ + xhci_configure_mask(udev, 1, 0); + + /* deconfigure all endpoints, except EP0 */ err = xhci_cmd_configure_ep(sc, 0, 1, index); if (err) { diff --git a/sys/kern/kern_clocksource.c b/sys/kern/kern_clocksource.c index 49ac7bf..fc4ca83a 100644 --- a/sys/kern/kern_clocksource.c +++ b/sys/kern/kern_clocksource.c @@ -211,7 +211,7 @@ handleevents(sbintime_t now, int fake) } else state->nextprof = state->nextstat; if (now >= state->nextcallopt) { - state->nextcall = state->nextcallopt = INT64_MAX; + state->nextcall = state->nextcallopt = SBT_MAX; callout_process(now); } @@ -492,7 +492,7 @@ configtimer(int start) state = DPCPU_ID_PTR(cpu, timerstate); state->now = now; if (!smp_started && cpu != CPU_FIRST()) - state->nextevent = INT64_MAX; + state->nextevent = SBT_MAX; else state->nextevent = next; if (periodic) @@ -580,8 +580,8 @@ cpu_initclocks_bsp(void) CPU_FOREACH(cpu) { state = DPCPU_ID_PTR(cpu, timerstate); mtx_init(&state->et_hw_mtx, "et_hw_mtx", NULL, MTX_SPIN); - state->nextcall = INT64_MAX; - state->nextcallopt = INT64_MAX; + state->nextcall = SBT_MAX; + state->nextcallopt = SBT_MAX; } periodic = want_periodic; /* Grab requested timer or the best of present. */ diff --git a/sys/kern/kern_event.c b/sys/kern/kern_event.c index e4bf40b..fba163f 100644 --- a/sys/kern/kern_event.c +++ b/sys/kern/kern_event.c @@ -1471,7 +1471,7 @@ kqueue_scan(struct kqueue *kq, int maxevents, struct kevent_copyops *k_ops, rsbt = tstosbt(*tsp); if (TIMESEL(&asbt, rsbt)) asbt += tc_tick_sbt; - if (asbt <= INT64_MAX - rsbt) + if (asbt <= SBT_MAX - rsbt) asbt += rsbt; else asbt = 0; diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c index 8fa6bcd..7877fab 100644 --- a/sys/kern/kern_fork.c +++ b/sys/kern/kern_fork.c @@ -470,6 +470,7 @@ do_fork(struct thread *td, int flags, struct proc *p2, struct thread *td2, bzero(&td2->td_startzero, __rangeof(struct thread, td_startzero, td_endzero)); td2->td_su = NULL; + td2->td_sleeptimo = 0; bcopy(&td->td_startcopy, &td2->td_startcopy, __rangeof(struct thread, td_startcopy, td_endcopy)); diff --git a/sys/kern/kern_kthread.c b/sys/kern/kern_kthread.c index fb46025..4c77f4f 100644 --- a/sys/kern/kern_kthread.c +++ b/sys/kern/kern_kthread.c @@ -272,6 +272,7 @@ kthread_add(void (*func)(void *), void *arg, struct proc *p, bzero(&newtd->td_startzero, __rangeof(struct thread, td_startzero, td_endzero)); newtd->td_su = NULL; + newtd->td_sleeptimo = 0; bcopy(&oldtd->td_startcopy, &newtd->td_startcopy, __rangeof(struct thread, td_startcopy, td_endcopy)); diff --git a/sys/kern/kern_thr.c b/sys/kern/kern_thr.c index b01aecb..74050ff 100644 --- a/sys/kern/kern_thr.c +++ b/sys/kern/kern_thr.c @@ -229,6 +229,7 @@ thread_create(struct thread *td, struct rtprio *rtp, bzero(&newtd->td_startzero, __rangeof(struct thread, td_startzero, td_endzero)); newtd->td_su = NULL; + newtd->td_sleeptimo = 0; bcopy(&td->td_startcopy, &newtd->td_startcopy, __rangeof(struct thread, td_startcopy, td_endcopy)); newtd->td_proc = td->td_proc; diff --git a/sys/kern/kern_thread.c b/sys/kern/kern_thread.c index 2f8382c..7b04d96 100644 --- a/sys/kern/kern_thread.c +++ b/sys/kern/kern_thread.c @@ -319,7 +319,7 @@ thread_reap(void) /* * Don't even bother to lock if none at this instant, - * we really don't care about the next instant.. + * we really don't care about the next instant. */ if (!TAILQ_EMPTY(&zombie_threads)) { mtx_lock_spin(&zombie_lock); @@ -383,6 +383,7 @@ thread_free(struct thread *td) cpu_thread_free(td); if (td->td_kstack != 0) vm_thread_dispose(td); + callout_drain(&td->td_slpcallout); uma_zfree(thread_zone, td); } @@ -524,6 +525,7 @@ thread_wait(struct proc *p) td->td_cpuset = NULL; cpu_thread_clean(td); crfree(td->td_ucred); + callout_drain(&td->td_slpcallout); thread_reap(); /* check for zombie threads etc. */ } diff --git a/sys/kern/kern_timeout.c b/sys/kern/kern_timeout.c index 9aa11ba..7e5aab7 100644 --- a/sys/kern/kern_timeout.c +++ b/sys/kern/kern_timeout.c @@ -296,7 +296,7 @@ callout_cpu_init(struct callout_cpu *cc, int cpu) for (i = 0; i < callwheelsize; i++) LIST_INIT(&cc->cc_callwheel[i]); TAILQ_INIT(&cc->cc_expireq); - cc->cc_firstevent = INT64_MAX; + cc->cc_firstevent = SBT_MAX; for (i = 0; i < 2; i++) cc_cce_cleanup(cc, i); snprintf(cc->cc_ktr_event_name, sizeof(cc->cc_ktr_event_name), @@ -569,8 +569,8 @@ callout_cc_add(struct callout *c, struct callout_cpu *cc, * Inform the eventtimers(4) subsystem there's a new callout * that has been inserted, but only if really required. */ - if (INT64_MAX - c->c_time < c->c_precision) - c->c_precision = INT64_MAX - c->c_time; + if (SBT_MAX - c->c_time < c->c_precision) + c->c_precision = SBT_MAX - c->c_time; sbt = c->c_time + c->c_precision; if (sbt < cc->cc_firstevent) { cc->cc_firstevent = sbt; @@ -896,6 +896,56 @@ callout_handle_init(struct callout_handle *handle) handle->callout = NULL; } +void +callout_when(sbintime_t sbt, sbintime_t precision, int flags, + sbintime_t *res, sbintime_t *prec_res) +{ + sbintime_t to_sbt, to_pr; + + if ((flags & (C_ABSOLUTE | C_PRECALC)) != 0) { + *res = sbt; + *prec_res = precision; + return; + } + if ((flags & C_HARDCLOCK) != 0 && sbt < tick_sbt) + sbt = tick_sbt; + if ((flags & C_HARDCLOCK) != 0 || +#ifdef NO_EVENTTIMERS + sbt >= sbt_timethreshold) { + to_sbt = getsbinuptime(); + + /* Add safety belt for the case of hz > 1000. */ + to_sbt += tc_tick_sbt - tick_sbt; +#else + sbt >= sbt_tickthreshold) { + /* + * Obtain the time of the last hardclock() call on + * this CPU directly from the kern_clocksource.c. + * This value is per-CPU, but it is equal for all + * active ones. + */ +#ifdef __LP64__ + to_sbt = DPCPU_GET(hardclocktime); +#else + spinlock_enter(); + to_sbt = DPCPU_GET(hardclocktime); + spinlock_exit(); +#endif +#endif + if ((flags & C_HARDCLOCK) == 0) + to_sbt += tick_sbt; + } else + to_sbt = sbinuptime(); + if (SBT_MAX - to_sbt < sbt) + to_sbt = SBT_MAX; + else + to_sbt += sbt; + *res = to_sbt; + to_pr = ((C_PRELGET(flags) < 0) ? sbt >> tc_precexp : + sbt >> C_PRELGET(flags)); + *prec_res = to_pr > precision ? to_pr : precision; +} + /* * New interface; clients allocate their own callout structures. * @@ -913,10 +963,10 @@ callout_handle_init(struct callout_handle *handle) * callout_deactivate() - marks the callout as having been serviced */ int -callout_reset_sbt_on(struct callout *c, sbintime_t sbt, sbintime_t precision, +callout_reset_sbt_on(struct callout *c, sbintime_t sbt, sbintime_t prec, void (*ftn)(void *), void *arg, int cpu, int flags) { - sbintime_t to_sbt, pr; + sbintime_t to_sbt, precision; struct callout_cpu *cc; int cancelled, direct; int ignore_cpu=0; @@ -929,47 +979,8 @@ callout_reset_sbt_on(struct callout *c, sbintime_t sbt, sbintime_t precision, /* Invalid CPU spec */ panic("Invalid CPU in callout %d", cpu); } - if (flags & C_ABSOLUTE) { - to_sbt = sbt; - } else { - if ((flags & C_HARDCLOCK) && (sbt < tick_sbt)) - sbt = tick_sbt; - if ((flags & C_HARDCLOCK) || -#ifdef NO_EVENTTIMERS - sbt >= sbt_timethreshold) { - to_sbt = getsbinuptime(); + callout_when(sbt, prec, flags, &to_sbt, &precision); - /* Add safety belt for the case of hz > 1000. */ - to_sbt += tc_tick_sbt - tick_sbt; -#else - sbt >= sbt_tickthreshold) { - /* - * Obtain the time of the last hardclock() call on - * this CPU directly from the kern_clocksource.c. - * This value is per-CPU, but it is equal for all - * active ones. - */ -#ifdef __LP64__ - to_sbt = DPCPU_GET(hardclocktime); -#else - spinlock_enter(); - to_sbt = DPCPU_GET(hardclocktime); - spinlock_exit(); -#endif -#endif - if ((flags & C_HARDCLOCK) == 0) - to_sbt += tick_sbt; - } else - to_sbt = sbinuptime(); - if (INT64_MAX - to_sbt < sbt) - to_sbt = INT64_MAX; - else - to_sbt += sbt; - pr = ((C_PRELGET(flags) < 0) ? sbt >> tc_precexp : - sbt >> C_PRELGET(flags)); - if (pr > precision) - precision = pr; - } /* * This flag used to be added by callout_cc_add, but the * first time you call this we could end up with the diff --git a/sys/kern/subr_sleepqueue.c b/sys/kern/subr_sleepqueue.c index c490460..4941b47 100644 --- a/sys/kern/subr_sleepqueue.c +++ b/sys/kern/subr_sleepqueue.c @@ -361,6 +361,7 @@ sleepq_set_timeout_sbt(void *wchan, sbintime_t sbt, sbintime_t pr, { struct sleepqueue_chain *sc; struct thread *td; + sbintime_t pr1; td = curthread; sc = SC_LOOKUP(wchan); @@ -368,8 +369,14 @@ sleepq_set_timeout_sbt(void *wchan, sbintime_t sbt, sbintime_t pr, MPASS(TD_ON_SLEEPQ(td)); MPASS(td->td_sleepqueue == NULL); MPASS(wchan != NULL); - callout_reset_sbt_on(&td->td_slpcallout, sbt, pr, - sleepq_timeout, td, PCPU_GET(cpuid), flags | C_DIRECT_EXEC); + KASSERT(td->td_sleeptimo == 0, ("td %d %p td_sleeptimo %jx", + td->td_tid, td, (uintmax_t)td->td_sleeptimo)); + thread_lock(td); + callout_when(sbt, pr, flags, &td->td_sleeptimo, &pr1); + thread_unlock(td); + callout_reset_sbt_on(&td->td_slpcallout, td->td_sleeptimo, pr1, + sleepq_timeout, td, PCPU_GET(cpuid), flags | C_PRECALC | + C_DIRECT_EXEC); } /* @@ -548,37 +555,36 @@ static int sleepq_check_timeout(void) { struct thread *td; + int res; td = curthread; THREAD_LOCK_ASSERT(td, MA_OWNED); /* - * If TDF_TIMEOUT is set, we timed out. + * If TDF_TIMEOUT is set, we timed out. But recheck + * td_sleeptimo anyway. */ - if (td->td_flags & TDF_TIMEOUT) { - td->td_flags &= ~TDF_TIMEOUT; - return (EWOULDBLOCK); + res = 0; + if (td->td_sleeptimo != 0) { + if (td->td_sleeptimo <= sbinuptime()) + res = EWOULDBLOCK; + td->td_sleeptimo = 0; } - - /* - * If TDF_TIMOFAIL is set, the timeout ran after we had - * already been woken up. - */ - if (td->td_flags & TDF_TIMOFAIL) - td->td_flags &= ~TDF_TIMOFAIL; - - /* - * If callout_stop() fails, then the timeout is running on - * another CPU, so synchronize with it to avoid having it - * accidentally wake up a subsequent sleep. - */ - else if (_callout_stop_safe(&td->td_slpcallout, CS_MIGRBLOCK) - == 0) { - td->td_flags |= TDF_TIMEOUT; - TD_SET_SLEEPING(td); - mi_switch(SW_INVOL | SWT_SLEEPQTIMO, NULL); - } - return (0); + if (td->td_flags & TDF_TIMEOUT) + td->td_flags &= ~TDF_TIMEOUT; + else + /* + * We ignore the situation where timeout subsystem was + * unable to stop our callout. The struct thread is + * type-stable, the callout will use the correct + * memory when running. The checks of the + * td_sleeptimo value in this function and in + * sleepq_timeout() ensure that the thread does not + * get spurious wakeups, even if the callout was reset + * or thread reused. + */ + callout_stop(&td->td_slpcallout); + return (res); } /* @@ -887,12 +893,17 @@ sleepq_timeout(void *arg) CTR3(KTR_PROC, "sleepq_timeout: thread %p (pid %ld, %s)", (void *)td, (long)td->td_proc->p_pid, (void *)td->td_name); - /* - * First, see if the thread is asleep and get the wait channel if - * it is. - */ thread_lock(td); - if (TD_IS_SLEEPING(td) && TD_ON_SLEEPQ(td)) { + + if (td->td_sleeptimo > sbinuptime() || td->td_sleeptimo == 0) { + /* + * The thread does not want a timeout (yet). + */ + } else if (TD_IS_SLEEPING(td) && TD_ON_SLEEPQ(td)) { + /* + * See if the thread is asleep and get the wait + * channel if it is. + */ wchan = td->td_wchan; sc = SC_LOOKUP(wchan); THREAD_LOCKPTR_ASSERT(td, &sc->sc_lock); @@ -900,40 +911,16 @@ sleepq_timeout(void *arg) MPASS(sq != NULL); td->td_flags |= TDF_TIMEOUT; wakeup_swapper = sleepq_resume_thread(sq, td, 0); - thread_unlock(td); - if (wakeup_swapper) - kick_proc0(); - return; - } - - /* - * If the thread is on the SLEEPQ but isn't sleeping yet, it - * can either be on another CPU in between sleepq_add() and - * one of the sleepq_*wait*() routines or it can be in - * sleepq_catch_signals(). - */ - if (TD_ON_SLEEPQ(td)) { + } else if (TD_ON_SLEEPQ(td)) { + /* + * If the thread is on the SLEEPQ but isn't sleeping + * yet, it can either be on another CPU in between + * sleepq_add() and one of the sleepq_*wait*() + * routines or it can be in sleepq_catch_signals(). + */ td->td_flags |= TDF_TIMEOUT; - thread_unlock(td); - return; } - /* - * Now check for the edge cases. First, if TDF_TIMEOUT is set, - * then the other thread has already yielded to us, so clear - * the flag and resume it. If TDF_TIMEOUT is not set, then the - * we know that the other thread is not on a sleep queue, but it - * hasn't resumed execution yet. In that case, set TDF_TIMOFAIL - * to let it know that the timeout has already run and doesn't - * need to be canceled. - */ - if (td->td_flags & TDF_TIMEOUT) { - MPASS(TD_IS_SLEEPING(td)); - td->td_flags &= ~TDF_TIMEOUT; - TD_CLR_SLEEPING(td); - wakeup_swapper = setrunnable(td); - } else - td->td_flags |= TDF_TIMOFAIL; thread_unlock(td); if (wakeup_swapper) kick_proc0(); diff --git a/sys/kern/sys_generic.c b/sys/kern/sys_generic.c index 033bbf0..96ec1d1 100644 --- a/sys/kern/sys_generic.c +++ b/sys/kern/sys_generic.c @@ -1089,7 +1089,7 @@ kern_select(struct thread *td, int nd, fd_set *fd_in, fd_set *fd_ou, precision >>= tc_precexp; if (TIMESEL(&asbt, rsbt)) asbt += tc_tick_sbt; - if (asbt <= INT64_MAX - rsbt) + if (asbt <= SBT_MAX - rsbt) asbt += rsbt; else asbt = -1; @@ -1626,7 +1626,7 @@ selsocket(struct socket *so, int events, struct timeval *tvp, struct thread *td) precision >>= tc_precexp; if (TIMESEL(&asbt, rsbt)) asbt += tc_tick_sbt; - if (asbt <= INT64_MAX - rsbt) + if (asbt <= SBT_MAX - rsbt) asbt += rsbt; else asbt = -1; diff --git a/sys/netinet/tcp_lro.c b/sys/netinet/tcp_lro.c index a290aae..761d8f3 100644 --- a/sys/netinet/tcp_lro.c +++ b/sys/netinet/tcp_lro.c @@ -382,6 +382,7 @@ tcp_lro_rx(struct lro_ctrl *lc, struct mbuf *m, uint32_t csum) tcp_seq seq; int error, ip_len, l; uint16_t eh_type, tcp_data_len; + int force_flush = 0; /* We expect a contiguous header [eh, ip, tcp]. */ @@ -448,8 +449,15 @@ tcp_lro_rx(struct lro_ctrl *lc, struct mbuf *m, uint32_t csum) * Check TCP header constraints. */ /* Ensure no bits set besides ACK or PSH. */ - if ((th->th_flags & ~(TH_ACK | TH_PUSH)) != 0) - return (TCP_LRO_CANNOT); + if ((th->th_flags & ~(TH_ACK | TH_PUSH)) != 0) { + if (th->th_flags & TH_SYN) + return (TCP_LRO_CANNOT); + /* + * Make sure that previously seen segements/ACKs are delivered + * before this segement, e.g. FIN. + */ + force_flush = 1; + } /* XXX-BZ We lose a ACK|PUSH flag concatenating multiple segments. */ /* XXX-BZ Ideally we'd flush on PUSH? */ @@ -465,8 +473,13 @@ tcp_lro_rx(struct lro_ctrl *lc, struct mbuf *m, uint32_t csum) ts_ptr = (uint32_t *)(th + 1); if (l != 0 && (__predict_false(l != TCPOLEN_TSTAMP_APPA) || (*ts_ptr != ntohl(TCPOPT_NOP<<24|TCPOPT_NOP<<16| - TCPOPT_TIMESTAMP<<8|TCPOLEN_TIMESTAMP)))) - return (TCP_LRO_CANNOT); + TCPOPT_TIMESTAMP<<8|TCPOLEN_TIMESTAMP)))) { + /* + * Make sure that previously seen segements/ACKs are delivered + * before this segement. + */ + force_flush = 1; + } /* If the driver did not pass in the checksum, set it now. */ if (csum == 0x0000) @@ -500,6 +513,13 @@ tcp_lro_rx(struct lro_ctrl *lc, struct mbuf *m, uint32_t csum) #endif } + if (force_flush) { + /* Timestamps mismatch; this is a FIN, etc */ + SLIST_REMOVE(&lc->lro_active, le, lro_entry, next); + tcp_lro_flush(lc, le); + return (TCP_LRO_CANNOT); + } + /* Flush now if appending will result in overflow. */ if (le->p_len > (65535 - tcp_data_len)) { SLIST_REMOVE(&lc->lro_active, le, lro_entry, next); @@ -568,6 +588,14 @@ tcp_lro_rx(struct lro_ctrl *lc, struct mbuf *m, uint32_t csum) return (0); } + if (force_flush) { + /* + * Nothing to flush, but this segment can not be further + * aggregated/delayed. + */ + return (TCP_LRO_CANNOT); + } + /* Try to find an empty slot. */ if (SLIST_EMPTY(&lc->lro_free)) return (TCP_LRO_NO_ENTRIES); diff --git a/sys/ofed/drivers/infiniband/core/ucma.c b/sys/ofed/drivers/infiniband/core/ucma.c index 23cbf7b..3c7c751 100644 --- a/sys/ofed/drivers/infiniband/core/ucma.c +++ b/sys/ofed/drivers/infiniband/core/ucma.c @@ -39,6 +39,8 @@ #include <linux/in6.h> #include <linux/miscdevice.h> +#include <sys/filio.h> + #include <rdma/rdma_user_cm.h> #include <rdma/ib_marshall.h> #include <rdma/rdma_cm.h> @@ -1285,11 +1287,25 @@ static int ucma_close(struct inode *inode, struct file *filp) return 0; } +static long +ucma_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) +{ + + switch (cmd) { + case FIONBIO: + case FIOASYNC: + return (0); + default: + return (-ENOTTY); + } +} + static const struct file_operations ucma_fops = { .owner = THIS_MODULE, .open = ucma_open, .release = ucma_close, .write = ucma_write, + .unlocked_ioctl = ucma_ioctl, .poll = ucma_poll, }; diff --git a/sys/sys/callout.h b/sys/sys/callout.h index d3f2bca..4e86b16 100644 --- a/sys/sys/callout.h +++ b/sys/sys/callout.h @@ -57,6 +57,7 @@ #define C_PRELGET(x) (int)((((x) >> 1) & C_PRELRANGE) - 1) #define C_HARDCLOCK 0x0100 /* align to hardclock() calls */ #define C_ABSOLUTE 0x0200 /* event time is absolute. */ +#define C_PRECALC 0x0400 /* event time is pre-calculated. */ struct callout_handle { struct callout *callout; @@ -129,6 +130,8 @@ int callout_schedule_on(struct callout *, int, int); int _callout_stop_safe(struct callout *, int); void callout_process(sbintime_t now); +void callout_when(sbintime_t sbt, sbintime_t precision, int flags, + sbintime_t *sbt_res, sbintime_t *prec_res); #endif #endif /* _SYS_CALLOUT_H_ */ diff --git a/sys/sys/param.h b/sys/sys/param.h index 07f69c6..4bd9584 100644 --- a/sys/sys/param.h +++ b/sys/sys/param.h @@ -58,7 +58,7 @@ * in the range 5 to 9. */ #undef __FreeBSD_version -#define __FreeBSD_version 1003506 /* Master, propagated to newvers */ +#define __FreeBSD_version 1003507 /* Master, propagated to newvers */ /* * __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD, diff --git a/sys/sys/proc.h b/sys/sys/proc.h index 59c75c5..6b0c924 100644 --- a/sys/sys/proc.h +++ b/sys/sys/proc.h @@ -325,6 +325,7 @@ struct thread { u_int td_dbg_sc_code; /* (c) Syscall code to debugger. */ u_int td_dbg_sc_narg; /* (c) Syscall arg count to debugger.*/ void *td_emuldata; /* Emulator state data */ + sbintime_t td_sleeptimo; /* (t) Sleep timeout. */ }; struct mtx *thread_lock_block(struct thread *); @@ -364,7 +365,7 @@ do { \ #define TDF_ALLPROCSUSP 0x00000200 /* suspended by SINGLE_ALLPROC */ #define TDF_BOUNDARY 0x00000400 /* Thread suspended at user boundary */ #define TDF_ASTPENDING 0x00000800 /* Thread has some asynchronous events. */ -#define TDF_TIMOFAIL 0x00001000 /* Timeout from sleep after we were awake. */ +#define TDF_UNUSED12 0x00001000 /* --available-- */ #define TDF_SBDRY 0x00002000 /* Stop only on usermode boundary. */ #define TDF_UPIBLOCKED 0x00004000 /* Thread blocked on user PI mutex. */ #define TDF_NEEDSUSPCHK 0x00008000 /* Thread may need to suspend. */ diff --git a/sys/sys/time.h b/sys/sys/time.h index b589527..1f0a530 100644 --- a/sys/sys/time.h +++ b/sys/sys/time.h @@ -129,7 +129,7 @@ bintime_shift(struct bintime *_bt, int _exp) #define SBT_1MS (SBT_1S / 1000) #define SBT_1US (SBT_1S / 1000000) #define SBT_1NS (SBT_1S / 1000000000) -#define SBT_MAX 0x7fffffffffffffff +#define SBT_MAX 0x7fffffffffffffffLL static __inline int sbintime_getsec(sbintime_t _sbt) diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c index c9c1271..233bb99 100644 --- a/sys/vm/vm_pageout.c +++ b/sys/vm/vm_pageout.c @@ -1399,15 +1399,13 @@ relock_queue: /* * Scan the active queue for pages that can be deactivated. Update * the per-page activity counter and use it to identify deactivation - * candidates. + * candidates. Held pages may be deactivated. */ for (m = TAILQ_FIRST(&pq->pq_pl), scanned = 0; m != NULL && (scanned < min_scan || (page_shortage > 0 && scanned < maxscan)); m = next, scanned++) { - KASSERT(m->queue == PQ_ACTIVE, ("vm_pageout_scan: page %p isn't active", m)); - next = TAILQ_NEXT(m, plinks.q); if ((m->flags & PG_MARKER) != 0) continue; @@ -1421,8 +1419,8 @@ relock_queue: } /* - * The count for pagedaemon pages is done after checking the - * page for eligibility... + * The count for page daemon pages is updated after checking + * the page for eligibility. */ PCPU_INC(cnt.v_pdpages); @@ -1435,12 +1433,17 @@ relock_queue: act_delta += 1; } /* - * Unlocked object ref count check. Two races are possible. - * 1) The ref was transitioning to zero and we saw non-zero, - * the pmap bits will be checked unnecessarily. - * 2) The ref was transitioning to one and we saw zero. - * The page lock prevents a new reference to this page so - * we need not check the reference bits. + * Perform an unsynchronized object ref count check. While + * the page lock ensures that the page is not reallocated to + * another object, in particular, one with unmanaged mappings + * that cannot support pmap_ts_referenced(), two races are, + * nonetheless, possible: + * 1) The count was transitioning to zero, but we saw a non- + * zero value. pmap_ts_referenced() will return zero + * because the page is not mapped. + * 2) The count was transitioning to one, but we saw zero. + * This race delays the detection of a new reference. At + * worst, we will deactivate and reactivate the page. */ if (m->object->ref_count != 0) act_delta += pmap_ts_referenced(m); |