summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorluigi <luigi@FreeBSD.org>2001-01-17 01:09:40 +0000
committerluigi <luigi@FreeBSD.org>2001-01-17 01:09:40 +0000
commit02bffeb68b29f64d1673f2d3aff9577227e2736c (patch)
treea270fd63d25d2d69d711c271cbd3bfcf4fd5648f
parent69aff11b5db450d01fcdd725b06997b5064c6461 (diff)
downloadFreeBSD-src-02bffeb68b29f64d1673f2d3aff9577227e2736c.zip
FreeBSD-src-02bffeb68b29f64d1673f2d3aff9577227e2736c.tar.gz
Document data structures and operation on dummynet so next time
I or someone else browse through this code I do not have a hard time understanding what is going on.
-rw-r--r--sys/netinet/ip_dummynet.h159
1 files changed, 117 insertions, 42 deletions
diff --git a/sys/netinet/ip_dummynet.h b/sys/netinet/ip_dummynet.h
index 504995a..5578525 100644
--- a/sys/netinet/ip_dummynet.h
+++ b/sys/netinet/ip_dummynet.h
@@ -31,42 +31,66 @@
#define _IP_DUMMYNET_H
/*
- * Definition of dummynet data structures.
- * We first start with the heap which is used by the scheduler.
- *
- * Each list contains a set of parameters identifying the pipe, and
- * a set of packets queued on the pipe itself.
- *
- * I could have used queue macros, but the management i have
- * is pretty simple and this makes the code more portable.
+ * Definition of dummynet data structures. In the structures, I decided
+ * not to use the macros in <sys/queue.h> in the hope of making the code
+ * easier to port to other architectures. The type of lists and queue we
+ * use here is pretty simple anyways.
*/
/*
- * The key for the heap is used for two different values
- 1. timer ticks- max 10K/second, so 32 bits are enough
- 2. virtual times. These increase in steps of len/x, where len is the
- packet length, and x is either the weight of the flow, or the
- sum of all weights.
- If we limit to max 1000 flows and a max weight of 100, then
- x needs 17 bits. The packet size is 16 bits, so we can easily
- overflow if we do not allow errors.
-
+ * We start with a heap, which is used in the scheduler to decide when
+ * to transmit packets etc.
+ *
+ * The key for the heap is used for two different values:
+ *
+ * 1. timer ticks- max 10K/second, so 32 bits are enough;
+ *
+ * 2. virtual times. These increase in steps of len/x, where len is the
+ * packet length, and x is either the weight of the flow, or the
+ * sum of all weights.
+ * If we limit to max 1000 flows and a max weight of 100, then
+ * x needs 17 bits. The packet size is 16 bits, so we can easily
+ * overflow if we do not allow errors.
+ * So we use a key "dn_key" which is 64 bits. Some macros are used to
+ * compare key values and handle wraparounds.
+ * MAX64 returns the largest of two key values.
+ * MY_M is used as a shift count when doing fixed point arithmetic
+ * (a better name would be useful...).
*/
typedef u_int64_t dn_key ; /* sorting key */
#define DN_KEY_LT(a,b) ((int64_t)((a)-(b)) < 0)
#define DN_KEY_LEQ(a,b) ((int64_t)((a)-(b)) <= 0)
#define DN_KEY_GT(a,b) ((int64_t)((a)-(b)) > 0)
#define DN_KEY_GEQ(a,b) ((int64_t)((a)-(b)) >= 0)
-/* XXX check names of next two macros */
#define MAX64(x,y) (( (int64_t) ( (y)-(x) )) > 0 ) ? (y) : (x)
#define MY_M 16 /* number of left shift to obtain a larger precision */
+
/*
* XXX With this scaling, max 1000 flows, max weight 100, 1Gbit/s, the
* virtual time wraps every 15 days.
*/
+/*
+ * The OFFSET_OF macro is used to return the offset of a field within
+ * a structure. It is used by the heap management routines.
+ */
#define OFFSET_OF(type, field) ((int)&( ((type *)0)->field) )
+/*
+ * A heap entry is made of a key and a pointer to the actual
+ * object stored in the heap.
+ * The heap is an array of dn_heap_entry entries, dynamically allocated.
+ * Current size is "size", with "elements" actually in use.
+ * The heap normally supports only ordered insert and extract from the top.
+ * If we want to extract an object from the middle of the heap, we
+ * have to know where the object itself is located in the heap (or we
+ * need to scan the whole array). To this purpose, an object has a
+ * field (int) which contains the index of the object itself into the
+ * heap. When the object is moved, the field must also be updated.
+ * The offset of the index in the object is stored in the 'offset'
+ * field in the heap descriptor. The assumption is that this offset
+ * is non-zero if we want to support extract from the middle.
+ */
struct dn_heap_entry {
dn_key key ; /* sorting key. Topmost element is smallest one */
void *object ; /* object pointer */
@@ -87,13 +111,15 @@ struct dn_heap {
#define MT_DUMMYNET MT_CONTROL
-
/*
* struct dn_pkt identifies a packet in the dummynet queue. The
* first part is really an m_hdr for implementation purposes, and some
* fields are saved there. When passing the packet back to the ip_input/
- * ip_output(), the struct is prepended to the mbuf chain with type
+ * ip_output()/bdg_forward, the struct is prepended to the mbuf chain with type
* MT_DUMMYNET, and contains the pointer to the matching rule.
+ *
+ * Note: there is no real need to make this structure contain an m_hdr,
+ * in the future this should be changed to a normal data structure.
*/
struct dn_pkt {
struct m_hdr hdr ;
@@ -113,33 +139,69 @@ struct dn_pkt {
};
/*
- * Overall structure (with WFQ):
+ * Overall structure of dummynet (with WF2Q+):
+
+In dummynet, packets are selected with the firewall rules, and passed
+to two different objects: PIPE or QUEUE.
+
+A QUEUE is just a queue with configurable size and queue management
+policy. It is also associated with a mask (to discriminate among
+different flows), a weight (used to give different shares of the
+bandwidth to different flows) and a "pipe", which essentially
+supplies the transmit clock for all queues associated with that
+pipe.
+
+A PIPE emulates a fixed-bandwidth link, whose bandwidth is
+configurable. The "clock" for a pipe can come from either an
+internal timer, or from the transmit interrupt of an interface.
+A pipe is also associated with one (or more, if masks are used)
+queue, where all packets for that pipe are stored.
+
+The bandwidth available on the pipe is shared by the queues
+associated with that pipe (only one in case the packet is sent
+to a PIPE) according to the WF2Q+ scheduling algorithm and the
+configured weights.
+
+In general, incoming packets are stored in the appropriate queue,
+which is then placed into one of a few heaps managed by a scheduler
+to decide when the packet should be extracted.
+The scheduler (a function called dummynet()) is run at every timer
+tick, and grabs queues from the head of the heaps when they are
+ready for processing.
+
+There are three data structures definining a pipe and associated queues:
-We have 3 data structures definining a pipe and associated queues:
+ dn_pipe, which contains the main configuration parameters related
- to delay and bandwidth
- + dn_flow_set which contains WFQ configuration, flow
- masks, plr and RED configuration
- + dn_flow_queue which is the per-flow queue.
- Multiple dn_flow_set can be linked to the same pipe, and multiple
- dn_flow_queue can be linked to the same dn_flow_set.
-
- During configuration we set the dn_flow_set and dn_pipe parameters.
- At runtime: packets are sent to the dn_flow_set (either WFQ ones, or
- the one embedded in the dn_pipe for fixed-rate flows) which in turn
- dispatches them to the appropriate dn_flow_queue (created dynamically
- according to the masks).
- The transmit clock for fixed rate flows (ready_event) selects the
- dn_flow_queue to be used to transmit the next packet. For WF2Q,
- wfq_ready_event() extract a pipe which in turn selects the right
- flow using a number of heaps defined into the pipe.
+ to delay and bandwidth;
+ + dn_flow_set, which contains WF2Q+ configuration, flow
+ masks, plr and RED configuration;
+ + dn_flow_queue, which is the per-flow queue (containing the packets)
+
+Multiple dn_flow_set can be linked to the same pipe, and multiple
+dn_flow_queue can be linked to the same dn_flow_set.
+All data structures are linked in a linear list which is used for
+housekeeping purposes.
+
+During configuration, we create and initialize the dn_flow_set
+and dn_pipe structures (a dn_pipe also contains a dn_flow_set).
+
+At runtime: packets are sent to the appropriate dn_flow_set (either
+WFQ ones, or the one embedded in the dn_pipe for fixed-rate flows),
+which in turn dispatches them to the appropriate dn_flow_queue
+(created dynamically according to the masks).
+
+The transmit clock for fixed rate flows (ready_event()) selects the
+dn_flow_queue to be used to transmit the next packet. For WF2Q,
+wfq_ready_event() extract a pipe which in turn selects the right
+flow using a number of heaps defined into the pipe itself.
*
*/
/*
- * We use per flow queues. Hashing is used to select the right slot,
- * then we scan the list to match the flow-id.
+ * per flow queue. This contains the flow identifier, the queue
+ * of packets, counters, and parameters used to support both RED and
+ * WF2Q+.
*/
struct dn_flow_queue {
struct dn_flow_queue *next ;
@@ -171,6 +233,15 @@ struct dn_flow_queue {
*/
} ;
+/*
+ * flow_set descriptor. Contains the "template" parameters for the
+ * queue configuration, and pointers to the hash table of dn_flow_queue's.
+ *
+ * The hash table is an array of lists -- we identify the slot by
+ * hashing the flow-id, then scan the list looking for a match.
+ * The size of the hash table (buckets) is configurable on a per-queue
+ * basis.
+ */
struct dn_flow_set {
struct dn_flow_set *next; /* next flow set in all_flow_sets list */
@@ -221,15 +292,19 @@ struct dn_flow_set {
} ;
/*
- * Pipe descriptor. Contains global parameters, delay-line queue.
+ * Pipe descriptor. Contains global parameters, delay-line queue,
+ * and the flow_set used for fixed-rate queues.
*
- * For WF2Q support it also has 3 heaps holding dn_flow_queue:
+ * For WF2Q support it also has 4 heaps holding dn_flow_queue:
* not_eligible_heap, for queues whose start time is higher
* than the virtual time. Sorted by start time.
* scheduler_heap, for queues eligible for scheduling. Sorted by
* finish time.
* backlogged_heap, all flows in the two heaps above, sorted by
* start time. This is used to compute the virtual time.
+ * idle_heap, all flows that are idle and can be removed. We
+ * do that on each tick so we do not slow down too much
+ * operations during forwarding.
*
*/
struct dn_pipe { /* a pipe */
OpenPOWER on IntegriCloud