summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--sys/kern/kern_clock.c8
-rw-r--r--sys/kern/kern_intr.c2
-rw-r--r--sys/kern/kern_poll.c159
3 files changed, 102 insertions, 67 deletions
diff --git a/sys/kern/kern_clock.c b/sys/kern/kern_clock.c
index 7591f11..1b3f9c5 100644
--- a/sys/kern/kern_clock.c
+++ b/sys/kern/kern_clock.c
@@ -70,9 +70,7 @@
#endif
#ifdef DEVICE_POLLING
-#include <net/netisr.h> /* for NETISR_POLL */
-
-extern void ether_poll1(void);
+extern void init_device_poll(void);
extern void hardclock_device_poll(void);
#endif /* DEVICE_POLLING */
@@ -147,7 +145,7 @@ initclocks(dummy)
cpu_initclocks();
#ifdef DEVICE_POLLING
- register_netisr(NETISR_POLL, ether_poll1);
+ init_device_poll();
#endif
/*
* Compute profhz/stathz, and fix profhz if needed.
@@ -222,7 +220,7 @@ hardclock(frame)
tc_windup();
#ifdef DEVICE_POLLING
- hardclock_device_poll();
+ hardclock_device_poll(); /* this is very short and quick */
#endif /* DEVICE_POLLING */
/*
diff --git a/sys/kern/kern_intr.c b/sys/kern/kern_intr.c
index 3a8bc8a..6bdf00f 100644
--- a/sys/kern/kern_intr.c
+++ b/sys/kern/kern_intr.c
@@ -644,7 +644,7 @@ swi_net(void *dummy)
}
#ifdef DEVICE_POLLING
if (pollmore)
- ether_pollmore();
+ netisr_pollmore();
}
#endif
}
diff --git a/sys/kern/kern_poll.c b/sys/kern/kern_poll.c
index 7a45f74..8737672 100644
--- a/sys/kern/kern_poll.c
+++ b/sys/kern/kern_poll.c
@@ -1,5 +1,7 @@
/*-
- * Copyright (c) 2001 Luigi Rizzo
+ * Copyright (c) 2001-2002 Luigi Rizzo
+ *
+ * Supported by: the Xorp Project (www.xorp.org)
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -42,10 +44,12 @@
#error DEVICE_POLLING is not compatible with SMP
#endif
-void ether_poll1(void);
+static void netisr_poll(void); /* the two netisr handlers */
+void netisr_pollmore(void);
+
+void init_device_poll(void); /* init routine */
+void hardclock_device_poll(void); /* hook from hardclock */
void ether_poll(int); /* polling while in trap */
-void ether_pollmore(void);
-void hardclock_device_poll(void);
/*
* Polling support for [network] device drivers.
@@ -101,56 +105,73 @@ SYSCTL_NODE(_kern, OID_AUTO, polling, CTLFLAG_RW, 0,
"Device polling parameters");
static u_int32_t poll_burst = 5;
-SYSCTL_ULONG(_kern_polling, OID_AUTO, burst, CTLFLAG_RW,
+SYSCTL_UINT(_kern_polling, OID_AUTO, burst, CTLFLAG_RW,
&poll_burst, 0, "Current polling burst size");
static u_int32_t poll_each_burst = 5;
-SYSCTL_ULONG(_kern_polling, OID_AUTO, each_burst, CTLFLAG_RW,
+SYSCTL_UINT(_kern_polling, OID_AUTO, each_burst, CTLFLAG_RW,
&poll_each_burst, 0, "Max size of each burst");
static u_int32_t poll_burst_max = 150; /* good for 100Mbit net and HZ=1000 */
-SYSCTL_ULONG(_kern_polling, OID_AUTO, burst_max, CTLFLAG_RW,
+SYSCTL_UINT(_kern_polling, OID_AUTO, burst_max, CTLFLAG_RW,
&poll_burst_max, 0, "Max Polling burst size");
+static u_int32_t poll_in_idle_loop=1; /* do we poll in idle loop ? */
+SYSCTL_UINT(_kern_polling, OID_AUTO, idle_poll, CTLFLAG_RW,
+ &poll_in_idle_loop, 0, "Enable device polling in idle loop");
+
u_int32_t poll_in_trap; /* used in trap.c */
-SYSCTL_ULONG(_kern_polling, OID_AUTO, poll_in_trap, CTLFLAG_RW,
+SYSCTL_UINT(_kern_polling, OID_AUTO, poll_in_trap, CTLFLAG_RW,
&poll_in_trap, 0, "Poll burst size during a trap");
static u_int32_t user_frac = 50;
-SYSCTL_ULONG(_kern_polling, OID_AUTO, user_frac, CTLFLAG_RW,
+SYSCTL_UINT(_kern_polling, OID_AUTO, user_frac, CTLFLAG_RW,
&user_frac, 0, "Desired user fraction of cpu time");
static u_int32_t reg_frac = 20 ;
-SYSCTL_ULONG(_kern_polling, OID_AUTO, reg_frac, CTLFLAG_RW,
+SYSCTL_UINT(_kern_polling, OID_AUTO, reg_frac, CTLFLAG_RW,
&reg_frac, 0, "Every this many cycles poll register");
static u_int32_t short_ticks;
-SYSCTL_ULONG(_kern_polling, OID_AUTO, short_ticks, CTLFLAG_RW,
+SYSCTL_UINT(_kern_polling, OID_AUTO, short_ticks, CTLFLAG_RW,
&short_ticks, 0, "Hardclock ticks shorter than they should be");
static u_int32_t lost_polls;
-SYSCTL_ULONG(_kern_polling, OID_AUTO, lost_polls, CTLFLAG_RW,
+SYSCTL_UINT(_kern_polling, OID_AUTO, lost_polls, CTLFLAG_RW,
&lost_polls, 0, "How many times we would have lost a poll tick");
-static u_int32_t poll_handlers; /* next free entry in pr[]. */
-SYSCTL_ULONG(_kern_polling, OID_AUTO, handlers, CTLFLAG_RD,
- &poll_handlers, 0, "Number of registered poll handlers");
+static u_int32_t pending_polls;
+SYSCTL_UINT(_kern_polling, OID_AUTO, pending_polls, CTLFLAG_RW,
+ &pending_polls, 0, "Do we need to poll again");
-static u_int32_t poll_in_idle=1; /* boolean */
-SYSCTL_ULONG(_kern_polling, OID_AUTO, poll_in_idle, CTLFLAG_RW,
- &poll_in_idle, 0, "Poll during idle loop");
+static int residual_burst = 0;
+SYSCTL_INT(_kern_polling, OID_AUTO, residual_burst, CTLFLAG_RW,
+ &residual_burst, 0, "# of residual cycles in burst");
-static u_int32_t idlepoll_sleeping; /* idlepoll is sleeping */
-SYSCTL_ULONG(_kern_polling, OID_AUTO, idlepoll_sleeping, CTLFLAG_RD,
- &idlepoll_sleeping, 0, "idlepoll is sleeping");
+static u_int32_t poll_handlers; /* next free entry in pr[]. */
+SYSCTL_UINT(_kern_polling, OID_AUTO, handlers, CTLFLAG_RD,
+ &poll_handlers, 0, "Number of registered poll handlers");
static int polling = 0; /* global polling enable */
-SYSCTL_ULONG(_kern_polling, OID_AUTO, enable, CTLFLAG_RW,
+SYSCTL_UINT(_kern_polling, OID_AUTO, enable, CTLFLAG_RW,
&polling, 0, "Polling enabled");
+static volatile u_int32_t phase;
+SYSCTL_UINT(_kern_polling, OID_AUTO, phase, CTLFLAG_RW,
+ &phase, 0, "Polling phase");
+
+static u_int32_t suspect;
+SYSCTL_UINT(_kern_polling, OID_AUTO, suspect, CTLFLAG_RW,
+ &suspect, 0, "suspect event");
+
+static volatile u_int32_t stalled;
+SYSCTL_UINT(_kern_polling, OID_AUTO, stalled, CTLFLAG_RW,
+ &stalled, 0, "potential stalls");
+
+static u_int32_t idlepoll_sleeping; /* idlepoll is sleeping */
+SYSCTL_UINT(_kern_polling, OID_AUTO, idlepoll_sleeping, CTLFLAG_RD,
+ &idlepoll_sleeping, 0, "idlepoll is sleeping");
-static u_int32_t poll1_active;
-static u_int32_t need_poll_again;
#define POLL_LIST_LEN 128
struct pollrec {
@@ -161,6 +182,15 @@ struct pollrec {
static struct pollrec pr[POLL_LIST_LEN];
/*
+ * register relevant netisr. Called from kern_clock.c:
+ */
+void
+init_device_poll(void)
+{
+ register_netisr(NETISR_POLL, netisr_poll);
+}
+
+/*
* Hook from hardclock. Tries to schedule a netisr, but keeps track
* of lost ticks due to the previous handler taking too long.
* The first part of the code is just for debugging purposes, and tries
@@ -173,6 +203,9 @@ hardclock_device_poll(void)
static struct timeval prev_t, t;
int delta;
+ if (poll_handlers == 0)
+ return;
+
microuptime(&t);
delta = (t.tv_usec - prev_t.tv_usec) +
(t.tv_sec - prev_t.tv_sec)*1000000;
@@ -181,15 +214,24 @@ hardclock_device_poll(void)
else
prev_t = t;
- if (poll_handlers > 0) {
- if (poll1_active) {
- lost_polls++;
- need_poll_again++;
- } else {
- poll1_active = 1;
- schednetisr(NETISR_POLL);
- }
+ if (pending_polls > 100) {
+ /* too much, assume it has stalled */
+ stalled++;
+ printf("poll stalled [%d] in phase %d\n",
+ stalled, phase);
+ pending_polls = 0;
+ phase = 0;
+ }
+
+ if (phase <= 2) {
+ if (phase != 0)
+ suspect++;
+ phase = 1;
+ schednetisr(NETISR_POLL);
+ phase = 2;
}
+ if (pending_polls++ > 0)
+ lost_polls++;
}
/*
@@ -199,7 +241,6 @@ void
ether_poll(int count)
{
int i;
- int s = splimp();
mtx_lock(&Giant);
@@ -210,18 +251,17 @@ ether_poll(int count)
(pr[i].ifp->if_flags & (IFF_UP|IFF_RUNNING)) )
pr[i].handler(pr[i].ifp, 0, count); /* quick check */
mtx_unlock(&Giant);
- splx(s);
}
/*
- * ether_pollmore is called after other netisr's, possibly scheduling
+ * netisr_pollmore is called after other netisr's, possibly scheduling
* another NETISR_POLL call, or adapting the burst size for the next cycle.
*
* It is very bad to fetch large bursts of packets from a single card at once,
* because the burst could take a long time to be completely processed, or
* could saturate the intermediate queue (ipintrq or similar) leading to
* losses or unfairness. To reduce the problem, and also to account better for
- * time spent in network-related processnig, we split the burst in smaller
+ * time spent in network-related processing, we split the burst in smaller
* chunks of fixed size, giving control to the other netisr's between chunks.
* This helps in improving the fairness, reducing livelock (because we
* emulate more closely the "process to completion" that we have with
@@ -229,21 +269,19 @@ ether_poll(int count)
* handling and forwarding.
*/
-static int residual_burst = 0;
-
static struct timeval poll_start_t;
void
-ether_pollmore()
+netisr_pollmore()
{
struct timeval t;
int kern_load;
- int s = splhigh();
+ /* XXX run at splhigh() or equivalent */
+ phase = 5;
if (residual_burst > 0) {
schednetisr(NETISR_POLL);
/* will run immediately on return, followed by netisrs */
- splx(s);
return ;
}
/* here we can account time spent in netisr's in this tick */
@@ -259,36 +297,37 @@ ether_pollmore()
poll_burst++;
}
- if (need_poll_again) {
+ pending_polls--;
+ if (pending_polls == 0) /* we are done */
+ phase = 0;
+ else {
/*
* Last cycle was long and caused us to miss one or more
- * hardclock ticks. Restart processnig again, but slightly
+ * hardclock ticks. Restart processing again, but slightly
* reduce the burst size to prevent that this happens again.
*/
- need_poll_again--;
poll_burst -= (poll_burst / 8);
if (poll_burst < 1)
poll_burst = 1;
schednetisr(NETISR_POLL);
- } else
- poll1_active = 0;
- splx(s);
+ phase = 6;
+ }
}
/*
- * ether_poll1 is called by schednetisr when appropriate, typically once
+ * netisr_poll is scheduled by schednetisr when appropriate, typically once
* per tick. It is called at splnet() so first thing to do is to upgrade to
* splimp(), and call all registered handlers.
*/
-void
-ether_poll1(void)
+static void
+netisr_poll(void)
{
static int reg_frac_count;
int i, cycles;
enum poll_cmd arg = POLL_ONLY;
- int s=splimp();
mtx_lock(&Giant);
+ phase = 3;
if (residual_burst == 0) { /* first call in this tick */
microuptime(&poll_start_t);
/*
@@ -344,8 +383,8 @@ ether_poll1(void)
poll_handlers = 0;
}
/* on -stable, schednetisr(NETISR_POLLMORE); */
+ phase = 4;
mtx_unlock(&Giant);
- splx(s);
}
/*
@@ -353,8 +392,8 @@ ether_poll1(void)
* (and polling should be enabled), 0 otherwise.
* A device is not supposed to register itself multiple times.
*
- * This is called from within the *_intr() function, so we should
- * probably not need further locking. XXX
+ * This is called from within the *_intr() functions, so we do not need
+ * further locking.
*/
int
ether_poll_register(poll_handler_t *h, struct ifnet *ifp)
@@ -400,12 +439,10 @@ ether_poll_register(poll_handler_t *h, struct ifnet *ifp)
}
/*
- * Remove the interface from the list of polling ones.
- * Normally run by *_stop().
- * We allow it being called with IFF_POLLING clear, the
- * call is sufficiently rare so it is preferable to save the
- * space for the extra test in each device in exchange of one
- * additional function call.
+ * Remove interface from the polling list. Normally called by *_stop().
+ * It is not an error to call it with IFF_POLLING clear, the call is
+ * sufficiently rare to be preferable to save the space for the extra
+ * test in each driver in exchange of one additional function call.
*/
int
ether_poll_deregister(struct ifnet *ifp)
@@ -450,7 +487,7 @@ poll_idle(void)
mtx_unlock_spin(&sched_lock);
for (;;) {
- if (poll_in_idle && poll_handlers > 0) {
+ if (poll_in_idle_loop && poll_handlers > 0) {
idlepoll_sleeping = 0;
mtx_lock(&Giant);
ether_poll(poll_each_burst);
OpenPOWER on IntegriCloud