diff options
author | luigi <luigi@FreeBSD.org> | 2001-01-17 01:09:40 +0000 |
---|---|---|
committer | luigi <luigi@FreeBSD.org> | 2001-01-17 01:09:40 +0000 |
commit | 02bffeb68b29f64d1673f2d3aff9577227e2736c (patch) | |
tree | a270fd63d25d2d69d711c271cbd3bfcf4fd5648f /sys/netinet | |
parent | 69aff11b5db450d01fcdd725b06997b5064c6461 (diff) | |
download | FreeBSD-src-02bffeb68b29f64d1673f2d3aff9577227e2736c.zip FreeBSD-src-02bffeb68b29f64d1673f2d3aff9577227e2736c.tar.gz |
Document data structures and operation on dummynet so next time
I or someone else browse through this code I do not have a hard
time understanding what is going on.
Diffstat (limited to 'sys/netinet')
-rw-r--r-- | sys/netinet/ip_dummynet.h | 159 |
1 files changed, 117 insertions, 42 deletions
diff --git a/sys/netinet/ip_dummynet.h b/sys/netinet/ip_dummynet.h index 504995a..5578525 100644 --- a/sys/netinet/ip_dummynet.h +++ b/sys/netinet/ip_dummynet.h @@ -31,42 +31,66 @@ #define _IP_DUMMYNET_H /* - * Definition of dummynet data structures. - * We first start with the heap which is used by the scheduler. - * - * Each list contains a set of parameters identifying the pipe, and - * a set of packets queued on the pipe itself. - * - * I could have used queue macros, but the management i have - * is pretty simple and this makes the code more portable. + * Definition of dummynet data structures. In the structures, I decided + * not to use the macros in <sys/queue.h> in the hope of making the code + * easier to port to other architectures. The type of lists and queue we + * use here is pretty simple anyways. */ /* - * The key for the heap is used for two different values - 1. timer ticks- max 10K/second, so 32 bits are enough - 2. virtual times. These increase in steps of len/x, where len is the - packet length, and x is either the weight of the flow, or the - sum of all weights. - If we limit to max 1000 flows and a max weight of 100, then - x needs 17 bits. The packet size is 16 bits, so we can easily - overflow if we do not allow errors. - + * We start with a heap, which is used in the scheduler to decide when + * to transmit packets etc. + * + * The key for the heap is used for two different values: + * + * 1. timer ticks- max 10K/second, so 32 bits are enough; + * + * 2. virtual times. These increase in steps of len/x, where len is the + * packet length, and x is either the weight of the flow, or the + * sum of all weights. + * If we limit to max 1000 flows and a max weight of 100, then + * x needs 17 bits. The packet size is 16 bits, so we can easily + * overflow if we do not allow errors. + * So we use a key "dn_key" which is 64 bits. Some macros are used to + * compare key values and handle wraparounds. + * MAX64 returns the largest of two key values. + * MY_M is used as a shift count when doing fixed point arithmetic + * (a better name would be useful...). */ typedef u_int64_t dn_key ; /* sorting key */ #define DN_KEY_LT(a,b) ((int64_t)((a)-(b)) < 0) #define DN_KEY_LEQ(a,b) ((int64_t)((a)-(b)) <= 0) #define DN_KEY_GT(a,b) ((int64_t)((a)-(b)) > 0) #define DN_KEY_GEQ(a,b) ((int64_t)((a)-(b)) >= 0) -/* XXX check names of next two macros */ #define MAX64(x,y) (( (int64_t) ( (y)-(x) )) > 0 ) ? (y) : (x) #define MY_M 16 /* number of left shift to obtain a larger precision */ + /* * XXX With this scaling, max 1000 flows, max weight 100, 1Gbit/s, the * virtual time wraps every 15 days. */ +/* + * The OFFSET_OF macro is used to return the offset of a field within + * a structure. It is used by the heap management routines. + */ #define OFFSET_OF(type, field) ((int)&( ((type *)0)->field) ) +/* + * A heap entry is made of a key and a pointer to the actual + * object stored in the heap. + * The heap is an array of dn_heap_entry entries, dynamically allocated. + * Current size is "size", with "elements" actually in use. + * The heap normally supports only ordered insert and extract from the top. + * If we want to extract an object from the middle of the heap, we + * have to know where the object itself is located in the heap (or we + * need to scan the whole array). To this purpose, an object has a + * field (int) which contains the index of the object itself into the + * heap. When the object is moved, the field must also be updated. + * The offset of the index in the object is stored in the 'offset' + * field in the heap descriptor. The assumption is that this offset + * is non-zero if we want to support extract from the middle. + */ struct dn_heap_entry { dn_key key ; /* sorting key. Topmost element is smallest one */ void *object ; /* object pointer */ @@ -87,13 +111,15 @@ struct dn_heap { #define MT_DUMMYNET MT_CONTROL - /* * struct dn_pkt identifies a packet in the dummynet queue. The * first part is really an m_hdr for implementation purposes, and some * fields are saved there. When passing the packet back to the ip_input/ - * ip_output(), the struct is prepended to the mbuf chain with type + * ip_output()/bdg_forward, the struct is prepended to the mbuf chain with type * MT_DUMMYNET, and contains the pointer to the matching rule. + * + * Note: there is no real need to make this structure contain an m_hdr, + * in the future this should be changed to a normal data structure. */ struct dn_pkt { struct m_hdr hdr ; @@ -113,33 +139,69 @@ struct dn_pkt { }; /* - * Overall structure (with WFQ): + * Overall structure of dummynet (with WF2Q+): + +In dummynet, packets are selected with the firewall rules, and passed +to two different objects: PIPE or QUEUE. + +A QUEUE is just a queue with configurable size and queue management +policy. It is also associated with a mask (to discriminate among +different flows), a weight (used to give different shares of the +bandwidth to different flows) and a "pipe", which essentially +supplies the transmit clock for all queues associated with that +pipe. + +A PIPE emulates a fixed-bandwidth link, whose bandwidth is +configurable. The "clock" for a pipe can come from either an +internal timer, or from the transmit interrupt of an interface. +A pipe is also associated with one (or more, if masks are used) +queue, where all packets for that pipe are stored. + +The bandwidth available on the pipe is shared by the queues +associated with that pipe (only one in case the packet is sent +to a PIPE) according to the WF2Q+ scheduling algorithm and the +configured weights. + +In general, incoming packets are stored in the appropriate queue, +which is then placed into one of a few heaps managed by a scheduler +to decide when the packet should be extracted. +The scheduler (a function called dummynet()) is run at every timer +tick, and grabs queues from the head of the heaps when they are +ready for processing. + +There are three data structures definining a pipe and associated queues: -We have 3 data structures definining a pipe and associated queues: + dn_pipe, which contains the main configuration parameters related - to delay and bandwidth - + dn_flow_set which contains WFQ configuration, flow - masks, plr and RED configuration - + dn_flow_queue which is the per-flow queue. - Multiple dn_flow_set can be linked to the same pipe, and multiple - dn_flow_queue can be linked to the same dn_flow_set. - - During configuration we set the dn_flow_set and dn_pipe parameters. - At runtime: packets are sent to the dn_flow_set (either WFQ ones, or - the one embedded in the dn_pipe for fixed-rate flows) which in turn - dispatches them to the appropriate dn_flow_queue (created dynamically - according to the masks). - The transmit clock for fixed rate flows (ready_event) selects the - dn_flow_queue to be used to transmit the next packet. For WF2Q, - wfq_ready_event() extract a pipe which in turn selects the right - flow using a number of heaps defined into the pipe. + to delay and bandwidth; + + dn_flow_set, which contains WF2Q+ configuration, flow + masks, plr and RED configuration; + + dn_flow_queue, which is the per-flow queue (containing the packets) + +Multiple dn_flow_set can be linked to the same pipe, and multiple +dn_flow_queue can be linked to the same dn_flow_set. +All data structures are linked in a linear list which is used for +housekeeping purposes. + +During configuration, we create and initialize the dn_flow_set +and dn_pipe structures (a dn_pipe also contains a dn_flow_set). + +At runtime: packets are sent to the appropriate dn_flow_set (either +WFQ ones, or the one embedded in the dn_pipe for fixed-rate flows), +which in turn dispatches them to the appropriate dn_flow_queue +(created dynamically according to the masks). + +The transmit clock for fixed rate flows (ready_event()) selects the +dn_flow_queue to be used to transmit the next packet. For WF2Q, +wfq_ready_event() extract a pipe which in turn selects the right +flow using a number of heaps defined into the pipe itself. * */ /* - * We use per flow queues. Hashing is used to select the right slot, - * then we scan the list to match the flow-id. + * per flow queue. This contains the flow identifier, the queue + * of packets, counters, and parameters used to support both RED and + * WF2Q+. */ struct dn_flow_queue { struct dn_flow_queue *next ; @@ -171,6 +233,15 @@ struct dn_flow_queue { */ } ; +/* + * flow_set descriptor. Contains the "template" parameters for the + * queue configuration, and pointers to the hash table of dn_flow_queue's. + * + * The hash table is an array of lists -- we identify the slot by + * hashing the flow-id, then scan the list looking for a match. + * The size of the hash table (buckets) is configurable on a per-queue + * basis. + */ struct dn_flow_set { struct dn_flow_set *next; /* next flow set in all_flow_sets list */ @@ -221,15 +292,19 @@ struct dn_flow_set { } ; /* - * Pipe descriptor. Contains global parameters, delay-line queue. + * Pipe descriptor. Contains global parameters, delay-line queue, + * and the flow_set used for fixed-rate queues. * - * For WF2Q support it also has 3 heaps holding dn_flow_queue: + * For WF2Q support it also has 4 heaps holding dn_flow_queue: * not_eligible_heap, for queues whose start time is higher * than the virtual time. Sorted by start time. * scheduler_heap, for queues eligible for scheduling. Sorted by * finish time. * backlogged_heap, all flows in the two heaps above, sorted by * start time. This is used to compute the virtual time. + * idle_heap, all flows that are idle and can be removed. We + * do that on each tick so we do not slow down too much + * operations during forwarding. * */ struct dn_pipe { /* a pipe */ |