diff options
author | Luiz Otavio O Souza <luiz@netgate.com> | 2016-04-17 23:10:07 -0500 |
---|---|---|
committer | Luiz Otavio O Souza <luiz@netgate.com> | 2016-04-17 23:10:07 -0500 |
commit | d83ceefd74a8cd372bb5f899a80bd540ceca8c8f (patch) | |
tree | ae4793ba52720ca9310b957817e256c9be09c514 | |
parent | 0d7eb369f4ec7afd37b400407072943d34864188 (diff) | |
parent | dab85cf430c20f64caedfab11029a1c75239181c (diff) | |
download | FreeBSD-src-d83ceefd74a8cd372bb5f899a80bd540ceca8c8f.zip FreeBSD-src-d83ceefd74a8cd372bb5f899a80bd540ceca8c8f.tar.gz |
Merge remote-tracking branch 'origin/stable/10' into devel
33 files changed, 2593 insertions, 50 deletions
diff --git a/sbin/devd/devd.conf.5 b/sbin/devd/devd.conf.5 index a174de8..3df30aa 100644 --- a/sbin/devd/devd.conf.5 +++ b/sbin/devd/devd.conf.5 @@ -41,7 +41,7 @@ .\" ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS .\" SOFTWARE. .\" -.Dd July 11, 2015 +.Dd April 14, 2016 .Dt DEVD.CONF 5 .Os .Sh NAME @@ -492,6 +492,23 @@ Information about the state of the system. Notification that the system has woken from the suspended state. .El .El +.Pp +.It Li CAM +Events related to the +.Xr cam 4 +system. +.Bl -tag -width ".Sy Subsystem" -compact +.It Sy Subsystem +.It Li periph +Events related to peripheral devices. +.Bl -tag -width ".li timeout" -compact +.It Sy Type +.It Li error +Generic errors. +.It Li timeout +Command timeouts. +.El +.El .El .Pp A link state change to UP on the interface @@ -621,4 +638,5 @@ has many additional examples. .Xr coretemp 4 , .Xr devfs 5 , .Xr re_format 7 , -.Xr devd 8 +.Xr devd 8 , +.Xr cam 4 diff --git a/sbin/pfctl/parse.y b/sbin/pfctl/parse.y index 3edb634..b5577e2 100644 --- a/sbin/pfctl/parse.y +++ b/sbin/pfctl/parse.y @@ -48,8 +48,10 @@ __FBSDID("$FreeBSD$"); #include <arpa/inet.h> #include <altq/altq.h> #include <altq/altq_cbq.h> +#include <altq/altq_codel.h> #include <altq/altq_priq.h> #include <altq/altq_hfsc.h> +#include <altq/altq_fairq.h> #include <stdio.h> #include <unistd.h> @@ -315,8 +317,9 @@ struct pool_opts { } pool_opts; - +struct codel_opts codel_opts; struct node_hfsc_opts hfsc_opts; +struct node_fairq_opts fairq_opts; struct node_state_opt *keep_state_defaults = NULL; int disallow_table(struct node_host *, const char *); @@ -440,6 +443,8 @@ typedef struct { struct table_opts table_opts; struct pool_opts pool_opts; struct node_hfsc_opts hfsc_opts; + struct node_fairq_opts fairq_opts; + struct codel_opts codel_opts; } v; int lineno; } YYSTYPE; @@ -464,8 +469,8 @@ int parseport(char *, struct range *r, int); %token REQUIREORDER SYNPROXY FINGERPRINTS NOSYNC DEBUG SKIP HOSTID %token ANTISPOOF FOR INCLUDE %token BITMASK RANDOM SOURCEHASH ROUNDROBIN STATICPORT PROBABILITY -%token ALTQ CBQ PRIQ HFSC BANDWIDTH TBRSIZE LINKSHARE REALTIME UPPERLIMIT -%token QUEUE PRIORITY QLIMIT RTABLE +%token ALTQ CBQ CODEL PRIQ HFSC FAIRQ BANDWIDTH TBRSIZE LINKSHARE REALTIME +%token UPPERLIMIT QUEUE PRIORITY QLIMIT HOGS BUCKETS RTABLE TARGET INTERVAL %token DNPIPE DNQUEUE %token LOAD RULESET_OPTIMIZATION %token STICKYADDRESS MAXSRCSTATES MAXSRCNODES SOURCETRACK GLOBAL RULE @@ -515,6 +520,8 @@ int parseport(char *, struct range *r, int); %type <v.number> cbqflags_list cbqflags_item %type <v.number> priqflags_list priqflags_item %type <v.hfsc_opts> hfscopts_list hfscopts_item hfsc_opts +%type <v.fairq_opts> fairqopts_list fairqopts_item fairq_opts +%type <v.codel_opts> codelopts_list codelopts_item codel_opts %type <v.queue_bwspec> bandwidth %type <v.filter_opts> filter_opts filter_opt filter_opts_l %type <v.antispoof_opts> antispoof_opts antispoof_opt antispoof_opts_l @@ -1496,7 +1503,7 @@ altqif : ALTQ interface queue_opts QUEUE qassign { a.scheduler = $3.scheduler.qtype; a.qlimit = $3.qlimit; a.tbrsize = $3.tbrsize; - if ($5 == NULL) { + if ($5 == NULL && $3.scheduler.qtype != ALTQT_CODEL) { yyerror("no child queues specified"); YYERROR; } @@ -1697,6 +1704,24 @@ scheduler : CBQ { $$.qtype = ALTQT_HFSC; $$.data.hfsc_opts = $3; } + | FAIRQ { + $$.qtype = ALTQT_FAIRQ; + bzero(&$$.data.fairq_opts, + sizeof(struct node_fairq_opts)); + } + | FAIRQ '(' fairq_opts ')' { + $$.qtype = ALTQT_FAIRQ; + $$.data.fairq_opts = $3; + } + | CODEL { + $$.qtype = ALTQT_CODEL; + bzero(&$$.data.codel_opts, + sizeof(struct codel_opts)); + } + | CODEL '(' codel_opts ')' { + $$.qtype = ALTQT_CODEL; + $$.data.codel_opts = $3; + } ; cbqflags_list : cbqflags_item { $$ |= $1; } @@ -1714,6 +1739,8 @@ cbqflags_item : STRING { $$ = CBQCLF_RED|CBQCLF_ECN; else if (!strcmp($1, "rio")) $$ = CBQCLF_RIO; + else if (!strcmp($1, "codel")) + $$ = CBQCLF_CODEL; else { yyerror("unknown cbq flag \"%s\"", $1); free($1); @@ -1736,6 +1763,8 @@ priqflags_item : STRING { $$ = PRCF_RED|PRCF_ECN; else if (!strcmp($1, "rio")) $$ = PRCF_RIO; + else if (!strcmp($1, "codel")) + $$ = PRCF_CODEL; else { yyerror("unknown priq flag \"%s\"", $1); free($1); @@ -1836,6 +1865,8 @@ hfscopts_item : LINKSHARE bandwidth { hfsc_opts.flags |= HFCF_RED|HFCF_ECN; else if (!strcmp($1, "rio")) hfsc_opts.flags |= HFCF_RIO; + else if (!strcmp($1, "codel")) + hfsc_opts.flags |= HFCF_CODEL; else { yyerror("unknown hfsc flag \"%s\"", $1); free($1); @@ -1845,6 +1876,102 @@ hfscopts_item : LINKSHARE bandwidth { } ; +fairq_opts : { + bzero(&fairq_opts, + sizeof(struct node_fairq_opts)); + } + fairqopts_list { + $$ = fairq_opts; + } + ; + +fairqopts_list : fairqopts_item + | fairqopts_list comma fairqopts_item + ; + +fairqopts_item : LINKSHARE bandwidth { + if (fairq_opts.linkshare.used) { + yyerror("linkshare already specified"); + YYERROR; + } + fairq_opts.linkshare.m2 = $2; + fairq_opts.linkshare.used = 1; + } + | LINKSHARE '(' bandwidth number bandwidth ')' { + if (fairq_opts.linkshare.used) { + yyerror("linkshare already specified"); + YYERROR; + } + fairq_opts.linkshare.m1 = $3; + fairq_opts.linkshare.d = $4; + fairq_opts.linkshare.m2 = $5; + fairq_opts.linkshare.used = 1; + } + | HOGS bandwidth { + fairq_opts.hogs_bw = $2; + } + | BUCKETS number { + fairq_opts.nbuckets = $2; + } + | STRING { + if (!strcmp($1, "default")) + fairq_opts.flags |= FARF_DEFAULTCLASS; + else if (!strcmp($1, "red")) + fairq_opts.flags |= FARF_RED; + else if (!strcmp($1, "ecn")) + fairq_opts.flags |= FARF_RED|FARF_ECN; + else if (!strcmp($1, "rio")) + fairq_opts.flags |= FARF_RIO; + else if (!strcmp($1, "codel")) + fairq_opts.flags |= FARF_CODEL; + else { + yyerror("unknown fairq flag \"%s\"", $1); + free($1); + YYERROR; + } + free($1); + } + ; + +codel_opts : { + bzero(&codel_opts, + sizeof(struct codel_opts)); + } + codelopts_list { + $$ = codel_opts; + } + ; + +codelopts_list : codelopts_item + | codelopts_list comma codelopts_item + ; + +codelopts_item : INTERVAL number { + if (codel_opts.interval) { + yyerror("interval already specified"); + YYERROR; + } + codel_opts.interval = $2; + } + | TARGET number { + if (codel_opts.target) { + yyerror("target already specified"); + YYERROR; + } + codel_opts.target = $2; + } + | STRING { + if (!strcmp($1, "ecn")) + codel_opts.ecn = 1; + else { + yyerror("unknown codel option \"%s\"", $1); + free($1); + YYERROR; + } + free($1); + } + ; + qassign : /* empty */ { $$ = NULL; } | qassign_item { $$ = $1; } | '{' optnl qassign_list '}' { $$ = $3; } @@ -5014,7 +5141,8 @@ expand_altq(struct pf_altq *a, struct node_if *interfaces, if ((pf->loadopt & PFCTL_FLAG_ALTQ) == 0) { FREE_LIST(struct node_if, interfaces); - FREE_LIST(struct node_queue, nqueues); + if (nqueues) + FREE_LIST(struct node_queue, nqueues); return (0); } @@ -5105,7 +5233,8 @@ expand_altq(struct pf_altq *a, struct node_if *interfaces, } ); FREE_LIST(struct node_if, interfaces); - FREE_LIST(struct node_queue, nqueues); + if (nqueues) + FREE_LIST(struct node_queue, nqueues); return (errs); } @@ -5516,8 +5645,10 @@ lookup(char *s) { "bitmask", BITMASK}, { "block", BLOCK}, { "block-policy", BLOCKPOLICY}, + { "buckets", BUCKETS}, { "cbq", CBQ}, { "code", CODE}, + { "codelq", CODEL}, { "crop", FRAGCROP}, { "debug", DEBUG}, { "divert-reply", DIVERTREPLY}, @@ -5528,6 +5659,7 @@ lookup(char *s) { "drop-ovl", FRAGDROP}, { "dscp", DSCP}, { "dup-to", DUPTO}, + { "fairq", FAIRQ}, { "fastroute", FASTROUTE}, { "file", FILENAME}, { "fingerprints", FINGERPRINTS}, @@ -5540,6 +5672,7 @@ lookup(char *s) { "global", GLOBAL}, { "group", GROUP}, { "hfsc", HFSC}, + { "hogs", HOGS}, { "hostid", HOSTID}, { "icmp-type", ICMPTYPE}, { "icmp6-type", ICMP6TYPE}, @@ -5550,6 +5683,7 @@ lookup(char *s) { "include", INCLUDE}, { "inet", INET}, { "inet6", INET6}, + { "interval", INTERVAL}, { "keep", KEEP}, { "label", LABEL}, { "limit", LIMIT}, @@ -5622,6 +5756,7 @@ lookup(char *s) { "table", TABLE}, { "tag", TAG}, { "tagged", TAGGED}, + { "target", TARGET}, { "tbrsize", TBRSIZE}, { "timeout", TIMEOUT}, { "to", TO}, diff --git a/sbin/pfctl/pfctl_altq.c b/sbin/pfctl/pfctl_altq.c index a99e641..9a21754 100644 --- a/sbin/pfctl/pfctl_altq.c +++ b/sbin/pfctl/pfctl_altq.c @@ -41,8 +41,10 @@ __FBSDID("$FreeBSD$"); #include <altq/altq.h> #include <altq/altq_cbq.h> +#include <altq/altq_codel.h> #include <altq/altq_priq.h> #include <altq/altq_hfsc.h> +#include <altq/altq_fairq.h> #include "pfctl_parser.h" #include "pfctl.h" @@ -60,6 +62,9 @@ static int cbq_compute_idletime(struct pfctl *, struct pf_altq *); static int check_commit_cbq(int, int, struct pf_altq *); static int print_cbq_opts(const struct pf_altq *); +static int print_codel_opts(const struct pf_altq *, + const struct node_queue_opt *); + static int eval_pfqueue_priq(struct pfctl *, struct pf_altq *); static int check_commit_priq(int, int, struct pf_altq *); static int print_priq_opts(const struct pf_altq *); @@ -69,6 +74,11 @@ static int check_commit_hfsc(int, int, struct pf_altq *); static int print_hfsc_opts(const struct pf_altq *, const struct node_queue_opt *); +static int eval_pfqueue_fairq(struct pfctl *, struct pf_altq *); +static int print_fairq_opts(const struct pf_altq *, + const struct node_queue_opt *); +static int check_commit_fairq(int, int, struct pf_altq *); + static void gsc_add_sc(struct gen_sc *, struct service_curve *); static int is_gsc_under_sc(struct gen_sc *, struct service_curve *); @@ -89,6 +99,8 @@ int eval_queue_opts(struct pf_altq *, struct node_queue_opt *, u_int32_t eval_bwspec(struct node_queue_bw *, u_int32_t); void print_hfsc_sc(const char *, u_int, u_int, u_int, const struct node_hfsc_sc *); +void print_fairq_sc(const char *, u_int, u_int, u_int, + const struct node_fairq_sc *); void pfaltq_store(struct pf_altq *a) @@ -174,6 +186,14 @@ print_altq(const struct pf_altq *a, unsigned int level, if (!print_hfsc_opts(a, qopts)) printf("hfsc "); break; + case ALTQT_FAIRQ: + if (!print_fairq_opts(a, qopts)) + printf("fairq "); + break; + case ALTQT_CODEL: + if (!print_codel_opts(a, qopts)) + printf("codel "); + break; } if (bw != NULL && bw->bw_percent > 0) { @@ -204,7 +224,8 @@ print_queue(const struct pf_altq *a, unsigned int level, printf("%s ", a->qname); if (print_interface) printf("on %s ", a->ifname); - if (a->scheduler == ALTQT_CBQ || a->scheduler == ALTQT_HFSC) { + if (a->scheduler == ALTQT_CBQ || a->scheduler == ALTQT_HFSC || + a->scheduler == ALTQT_FAIRQ) { if (bw != NULL && bw->bw_percent > 0) { if (bw->bw_percent < 100) printf("bandwidth %u%% ", bw->bw_percent); @@ -225,6 +246,9 @@ print_queue(const struct pf_altq *a, unsigned int level, case ALTQT_HFSC: print_hfsc_opts(a, qopts); break; + case ALTQT_FAIRQ: + print_fairq_opts(a, qopts); + break; } } @@ -291,6 +315,9 @@ check_commit_altq(int dev, int opts) case ALTQT_HFSC: error = check_commit_hfsc(dev, opts, altq); break; + case ALTQT_FAIRQ: + error = check_commit_fairq(dev, opts, altq); + break; default: break; } @@ -339,7 +366,8 @@ eval_pfqueue(struct pfctl *pf, struct pf_altq *pa, struct node_queue_bw *bw, if (pa->qlimit == 0) pa->qlimit = DEFAULT_QLIMIT; - if (pa->scheduler == ALTQT_CBQ || pa->scheduler == ALTQT_HFSC) { + if (pa->scheduler == ALTQT_CBQ || pa->scheduler == ALTQT_HFSC || + pa->scheduler == ALTQT_FAIRQ) { pa->bandwidth = eval_bwspec(bw, parent == NULL ? 0 : parent->bandwidth); @@ -385,6 +413,9 @@ eval_pfqueue(struct pfctl *pf, struct pf_altq *pa, struct node_queue_bw *bw, case ALTQT_HFSC: error = eval_pfqueue_hfsc(pf, pa); break; + case ALTQT_FAIRQ: + error = eval_pfqueue_fairq(pf, pa); + break; default: break; } @@ -565,6 +596,8 @@ print_cbq_opts(const struct pf_altq *a) printf(" ecn"); if (opts->flags & CBQCLF_RIO) printf(" rio"); + if (opts->flags & CBQCLF_CODEL) + printf(" codel"); if (opts->flags & CBQCLF_CLEARDSCP) printf(" cleardscp"); if (opts->flags & CBQCLF_FLOWVALVE) @@ -652,6 +685,8 @@ print_priq_opts(const struct pf_altq *a) printf(" ecn"); if (opts->flags & PRCF_RIO) printf(" rio"); + if (opts->flags & PRCF_CODEL) + printf(" codel"); if (opts->flags & PRCF_CLEARDSCP) printf(" cleardscp"); if (opts->flags & PRCF_DEFAULTCLASS) @@ -797,6 +832,85 @@ err_ret: return (-1); } +/* + * FAIRQ support functions + */ +static int +eval_pfqueue_fairq(struct pfctl *pf __unused, struct pf_altq *pa) +{ + struct pf_altq *altq, *parent; + struct fairq_opts *opts; + struct service_curve sc; + + opts = &pa->pq_u.fairq_opts; + + if (pa->parent[0] == 0) { + /* root queue */ + opts->lssc_m1 = pa->ifbandwidth; + opts->lssc_m2 = pa->ifbandwidth; + opts->lssc_d = 0; + return (0); + } + + LIST_INIT(&lssc); + + /* if link_share is not specified, use bandwidth */ + if (opts->lssc_m2 == 0) + opts->lssc_m2 = pa->bandwidth; + + /* + * admission control: + * for the real-time service curve, the sum of the service curves + * should not exceed 80% of the interface bandwidth. 20% is reserved + * not to over-commit the actual interface bandwidth. + * for the link-sharing service curve, the sum of the child service + * curve should not exceed the parent service curve. + * for the upper-limit service curve, the assigned bandwidth should + * be smaller than the interface bandwidth, and the upper-limit should + * be larger than the real-time service curve when both are defined. + */ + parent = qname_to_pfaltq(pa->parent, pa->ifname); + if (parent == NULL) + errx(1, "parent %s not found for %s", pa->parent, pa->qname); + + TAILQ_FOREACH(altq, &altqs, entries) { + if (strncmp(altq->ifname, pa->ifname, IFNAMSIZ) != 0) + continue; + if (altq->qname[0] == 0) /* this is for interface */ + continue; + + if (strncmp(altq->parent, pa->parent, PF_QNAME_SIZE) != 0) + continue; + + /* if the class has a link-sharing service curve, add it. */ + if (opts->lssc_m2 != 0 && altq->pq_u.fairq_opts.lssc_m2 != 0) { + sc.m1 = altq->pq_u.fairq_opts.lssc_m1; + sc.d = altq->pq_u.fairq_opts.lssc_d; + sc.m2 = altq->pq_u.fairq_opts.lssc_m2; + gsc_add_sc(&lssc, &sc); + } + } + + /* check the link-sharing service curve. */ + if (opts->lssc_m2 != 0) { + sc.m1 = parent->pq_u.fairq_opts.lssc_m1; + sc.d = parent->pq_u.fairq_opts.lssc_d; + sc.m2 = parent->pq_u.fairq_opts.lssc_m2; + if (!is_gsc_under_sc(&lssc, &sc)) { + warnx("link-sharing sc exceeds parent's sc"); + goto err_ret; + } + } + + gsc_destroy(&lssc); + + return (0); + +err_ret: + gsc_destroy(&lssc); + return (-1); +} + static int check_commit_hfsc(int dev, int opts, struct pf_altq *pa) { @@ -837,6 +951,43 @@ check_commit_hfsc(int dev, int opts, struct pf_altq *pa) } static int +check_commit_fairq(int dev __unused, int opts __unused, struct pf_altq *pa) +{ + struct pf_altq *altq, *def = NULL; + int default_class; + int error = 0; + + /* check if fairq has one default queue for this interface */ + default_class = 0; + TAILQ_FOREACH(altq, &altqs, entries) { + if (strncmp(altq->ifname, pa->ifname, IFNAMSIZ) != 0) + continue; + if (altq->qname[0] == 0) /* this is for interface */ + continue; + if (altq->pq_u.fairq_opts.flags & FARF_DEFAULTCLASS) { + default_class++; + def = altq; + } + } + if (default_class != 1) { + warnx("should have one default queue on %s", pa->ifname); + return (1); + } + /* make sure the default queue is a leaf */ + TAILQ_FOREACH(altq, &altqs, entries) { + if (strncmp(altq->ifname, pa->ifname, IFNAMSIZ) != 0) + continue; + if (altq->qname[0] == 0) /* this is for interface */ + continue; + if (strncmp(altq->parent, def->qname, PF_QNAME_SIZE) == 0) { + warnx("default queue is not a leaf"); + error++; + } + } + return (error); +} + +static int print_hfsc_opts(const struct pf_altq *a, const struct node_queue_opt *qopts) { const struct hfsc_opts *opts; @@ -861,6 +1012,8 @@ print_hfsc_opts(const struct pf_altq *a, const struct node_queue_opt *qopts) printf(" ecn"); if (opts->flags & HFCF_RIO) printf(" rio"); + if (opts->flags & HFCF_CODEL) + printf(" codel"); if (opts->flags & HFCF_CLEARDSCP) printf(" cleardscp"); if (opts->flags & HFCF_DEFAULTCLASS) @@ -882,6 +1035,67 @@ print_hfsc_opts(const struct pf_altq *a, const struct node_queue_opt *qopts) return (0); } +static int +print_codel_opts(const struct pf_altq *a, const struct node_queue_opt *qopts) +{ + const struct codel_opts *opts; + + opts = &a->pq_u.codel_opts; + if (opts->target || opts->interval || opts->ecn) { + printf("codel("); + if (opts->target) + printf(" target %d", opts->target); + if (opts->interval) + printf(" interval %d", opts->interval); + if (opts->ecn) + printf("ecn"); + printf(" ) "); + + return (1); + } + + return (0); +} + +static int +print_fairq_opts(const struct pf_altq *a, const struct node_queue_opt *qopts) +{ + const struct fairq_opts *opts; + const struct node_fairq_sc *loc_lssc; + + opts = &a->pq_u.fairq_opts; + if (qopts == NULL) + loc_lssc = NULL; + else + loc_lssc = &qopts->data.fairq_opts.linkshare; + + if (opts->flags || + (opts->lssc_m2 != 0 && (opts->lssc_m2 != a->bandwidth || + opts->lssc_d != 0))) { + printf("fairq("); + if (opts->flags & FARF_RED) + printf(" red"); + if (opts->flags & FARF_ECN) + printf(" ecn"); + if (opts->flags & FARF_RIO) + printf(" rio"); + if (opts->flags & FARF_CODEL) + printf(" codel"); + if (opts->flags & FARF_CLEARDSCP) + printf(" cleardscp"); + if (opts->flags & FARF_DEFAULTCLASS) + printf(" default"); + if (opts->lssc_m2 != 0 && (opts->lssc_m2 != a->bandwidth || + opts->lssc_d != 0)) + print_fairq_sc("linkshare", opts->lssc_m1, opts->lssc_d, + opts->lssc_m2, loc_lssc); + printf(" ) "); + + return (1); + } else + return (0); +} + /* * admission control using generalized service curve */ @@ -1201,6 +1415,28 @@ eval_queue_opts(struct pf_altq *pa, struct node_queue_opt *opts, opts->data.hfsc_opts.upperlimit.d; } break; + case ALTQT_FAIRQ: + pa->pq_u.fairq_opts.flags = opts->data.fairq_opts.flags; + pa->pq_u.fairq_opts.nbuckets = opts->data.fairq_opts.nbuckets; + pa->pq_u.fairq_opts.hogs_m1 = + eval_bwspec(&opts->data.fairq_opts.hogs_bw, ref_bw); + + if (opts->data.fairq_opts.linkshare.used) { + pa->pq_u.fairq_opts.lssc_m1 = + eval_bwspec(&opts->data.fairq_opts.linkshare.m1, + ref_bw); + pa->pq_u.fairq_opts.lssc_m2 = + eval_bwspec(&opts->data.fairq_opts.linkshare.m2, + ref_bw); + pa->pq_u.fairq_opts.lssc_d = + opts->data.fairq_opts.linkshare.d; + } + break; + case ALTQT_CODEL: + pa->pq_u.codel_opts.target = opts->data.codel_opts.target; + pa->pq_u.codel_opts.interval = opts->data.codel_opts.interval; + pa->pq_u.codel_opts.ecn = opts->data.codel_opts.ecn; + break; default: warnx("eval_queue_opts: unknown scheduler type %u", opts->qtype); @@ -1246,3 +1482,27 @@ print_hfsc_sc(const char *scname, u_int m1, u_int d, u_int m2, if (d != 0) printf(")"); } + +void +print_fairq_sc(const char *scname, u_int m1, u_int d, u_int m2, + const struct node_fairq_sc *sc) +{ + printf(" %s", scname); + + if (d != 0) { + printf("("); + if (sc != NULL && sc->m1.bw_percent > 0) + printf("%u%%", sc->m1.bw_percent); + else + printf("%s", rate2str((double)m1)); + printf(" %u", d); + } + + if (sc != NULL && sc->m2.bw_percent > 0) + printf(" %u%%", sc->m2.bw_percent); + else + printf(" %s", rate2str((double)m2)); + + if (d != 0) + printf(")"); +} diff --git a/sbin/pfctl/pfctl_parser.h b/sbin/pfctl/pfctl_parser.h index 5c909e9..2b7fea7 100644 --- a/sbin/pfctl/pfctl_parser.h +++ b/sbin/pfctl/pfctl_parser.h @@ -150,12 +150,28 @@ struct node_hfsc_opts { int flags; }; +struct node_fairq_sc { + struct node_queue_bw m1; /* slope of 1st segment; bps */ + u_int d; /* x-projection of m1; msec */ + struct node_queue_bw m2; /* slope of 2nd segment; bps */ + u_int8_t used; +}; + +struct node_fairq_opts { + struct node_fairq_sc linkshare; + struct node_queue_bw hogs_bw; + u_int nbuckets; + int flags; +}; + struct node_queue_opt { int qtype; union { struct cbq_opts cbq_opts; + struct codel_opts codel_opts; struct priq_opts priq_opts; struct node_hfsc_opts hfsc_opts; + struct node_fairq_opts fairq_opts; } data; }; diff --git a/sbin/pfctl/pfctl_qstats.c b/sbin/pfctl/pfctl_qstats.c index 95371e4..8ffd87d 100644 --- a/sbin/pfctl/pfctl_qstats.c +++ b/sbin/pfctl/pfctl_qstats.c @@ -36,8 +36,10 @@ __FBSDID("$FreeBSD$"); #include <altq/altq.h> #include <altq/altq_cbq.h> +#include <altq/altq_codel.h> #include <altq/altq_priq.h> #include <altq/altq_hfsc.h> +#include <altq/altq_fairq.h> #include "pfctl.h" #include "pfctl_parser.h" @@ -46,6 +48,8 @@ union class_stats { class_stats_t cbq_stats; struct priq_classstats priq_stats; struct hfsc_classstats hfsc_stats; + struct fairq_classstats fairq_stats; + struct codel_ifstats codel_stats; }; #define AVGN_MAX 8 @@ -75,8 +79,10 @@ struct pf_altq_node *pfctl_find_altq_node(struct pf_altq_node *, void pfctl_print_altq_node(int, const struct pf_altq_node *, unsigned, int); void print_cbqstats(struct queue_stats); +void print_codelstats(struct queue_stats); void print_priqstats(struct queue_stats); void print_hfscstats(struct queue_stats); +void print_fairqstats(struct queue_stats); void pfctl_free_altq_node(struct pf_altq_node *); void pfctl_print_altq_nodestat(int, const struct pf_altq_node *); @@ -162,7 +168,7 @@ pfctl_update_qstats(int dev, struct pf_altq_node **root) return (-1); } #ifdef __FreeBSD__ - if (pa.altq.qid > 0 && + if ((pa.altq.qid > 0 || pa.altq.scheduler == ALTQT_CODEL) && !(pa.altq.local_flags & PFALTQ_FLAG_IF_REMOVED)) { #else if (pa.altq.qid > 0) { @@ -300,7 +306,7 @@ pfctl_print_altq_node(int dev, const struct pf_altq_node *node, void pfctl_print_altq_nodestat(int dev, const struct pf_altq_node *a) { - if (a->altq.qid == 0) + if (a->altq.qid == 0 && a->altq.scheduler != ALTQT_CODEL) return; #ifdef __FreeBSD__ @@ -317,6 +323,12 @@ pfctl_print_altq_nodestat(int dev, const struct pf_altq_node *a) case ALTQT_HFSC: print_hfscstats(a->qstats); break; + case ALTQT_FAIRQ: + print_fairqstats(a->qstats); + break; + case ALTQT_CODEL: + print_codelstats(a->qstats); + break; } } @@ -342,6 +354,28 @@ print_cbqstats(struct queue_stats cur) } void +print_codelstats(struct queue_stats cur) +{ + printf(" [ pkts: %10llu bytes: %10llu " + "dropped pkts: %6llu bytes: %6llu ]\n", + (unsigned long long)cur.data.codel_stats.cl_xmitcnt.packets, + (unsigned long long)cur.data.codel_stats.cl_xmitcnt.bytes, + (unsigned long long)cur.data.codel_stats.cl_dropcnt.packets + + cur.data.codel_stats.stats.drop_cnt.packets, + (unsigned long long)cur.data.codel_stats.cl_dropcnt.bytes + + cur.data.codel_stats.stats.drop_cnt.bytes); + printf(" [ qlength: %3d/%3d ]\n", + cur.data.codel_stats.qlength, cur.data.codel_stats.qlimit); + + if (cur.avgn < 2) + return; + + printf(" [ measured: %7.1f packets/s, %s/s ]\n", + cur.avg_packets / STAT_INTERVAL, + rate2str((8 * cur.avg_bytes) / STAT_INTERVAL)); +} + +void print_priqstats(struct queue_stats cur) { printf(" [ pkts: %10llu bytes: %10llu " @@ -382,6 +416,26 @@ print_hfscstats(struct queue_stats cur) } void +print_fairqstats(struct queue_stats cur) +{ + printf(" [ pkts: %10llu bytes: %10llu " + "dropped pkts: %6llu bytes: %6llu ]\n", + (unsigned long long)cur.data.fairq_stats.xmit_cnt.packets, + (unsigned long long)cur.data.fairq_stats.xmit_cnt.bytes, + (unsigned long long)cur.data.fairq_stats.drop_cnt.packets, + (unsigned long long)cur.data.fairq_stats.drop_cnt.bytes); + printf(" [ qlength: %3d/%3d ]\n", + cur.data.fairq_stats.qlength, cur.data.fairq_stats.qlimit); + + if (cur.avgn < 2) + return; + + printf(" [ measured: %7.1f packets/s, %s/s ]\n", + cur.avg_packets / STAT_INTERVAL, + rate2str((8 * cur.avg_bytes) / STAT_INTERVAL)); +} + +void pfctl_free_altq_node(struct pf_altq_node *node) { while (node != NULL) { @@ -402,7 +456,7 @@ update_avg(struct pf_altq_node *a) u_int64_t b, p; int n; - if (a->altq.qid == 0) + if (a->altq.qid == 0 && a->altq.scheduler != ALTQT_CODEL) return; qs = &a->qstats; @@ -421,6 +475,14 @@ update_avg(struct pf_altq_node *a) b = qs->data.hfsc_stats.xmit_cnt.bytes; p = qs->data.hfsc_stats.xmit_cnt.packets; break; + case ALTQT_FAIRQ: + b = qs->data.fairq_stats.xmit_cnt.bytes; + p = qs->data.fairq_stats.xmit_cnt.packets; + break; + case ALTQT_CODEL: + b = qs->data.codel_stats.cl_xmitcnt.bytes; + p = qs->data.codel_stats.cl_xmitcnt.packets; + break; default: b = 0; p = 0; diff --git a/share/man/man4/altq.4 b/share/man/man4/altq.4 index 1effdb2..771dc53 100644 --- a/share/man/man4/altq.4 +++ b/share/man/man4/altq.4 @@ -25,7 +25,7 @@ .\" .\" $FreeBSD$ .\" -.Dd December 9, 2011 +.Dd July 24, 2015 .Dt ALTQ 4 .Os .Sh NAME @@ -35,11 +35,13 @@ .Cd options ALTQ .Pp .Cd options ALTQ_CBQ +.Cd options ALTQ_CODEL .Cd options ALTQ_RED .Cd options ALTQ_RIO .Cd options ALTQ_HFSC .Cd options ALTQ_CDNR .Cd options ALTQ_PRIQ +.Cd options ALTQ_FAIRQ .Sh DESCRIPTION The .Nm @@ -73,6 +75,10 @@ Enable Build the .Dq "Class Based Queuing" discipline. +.It Dv ALTQ_CODEL +Build the +.Dq "Controlled Delay" +discipline. .It Dv ALTQ_RED Build the .Dq "Random Early Detection" @@ -93,6 +99,10 @@ any of the available disciplines or consumers. Build the .Dq "Priority Queuing" discipline. +.It Dv ALTQ_FAIRQ +Build the +.Dq "Fair Queuing" +discipline. .It Dv ALTQ_NOPCC Required if the TSC is unusable. .It Dv ALTQ_DEBUG diff --git a/sys/cam/ata/ata_all.c b/sys/cam/ata/ata_all.c index d5220e8..f0d6202 100644 --- a/sys/cam/ata/ata_all.c +++ b/sys/cam/ata/ata_all.c @@ -211,15 +211,31 @@ ata_op_string(struct ata_cmd *cmd) char * ata_cmd_string(struct ata_cmd *cmd, char *cmd_string, size_t len) { + struct sbuf sb; + int error; - snprintf(cmd_string, len, "%02x %02x %02x %02x " + if (len == 0) + return (""); + + sbuf_new(&sb, cmd_string, len, SBUF_FIXEDLEN); + ata_cmd_sbuf(cmd, &sb); + + error = sbuf_finish(&sb); + if (error != 0 && error != ENOMEM) + return (""); + + return(sbuf_data(&sb)); +} + +void +ata_cmd_sbuf(struct ata_cmd *cmd, struct sbuf *sb) +{ + sbuf_printf(sb, "%02x %02x %02x %02x " "%02x %02x %02x %02x %02x %02x %02x %02x", cmd->command, cmd->features, cmd->lba_low, cmd->lba_mid, cmd->lba_high, cmd->device, cmd->lba_low_exp, cmd->lba_mid_exp, cmd->lba_high_exp, cmd->features_exp, cmd->sector_count, cmd->sector_count_exp); - - return(cmd_string); } char * @@ -233,7 +249,7 @@ ata_res_string(struct ata_res *res, char *res_string, size_t len) res->lba_low_exp, res->lba_mid_exp, res->lba_high_exp, res->sector_count, res->sector_count_exp); - return(res_string); + return (res_string); } /* @@ -242,11 +258,10 @@ ata_res_string(struct ata_res *res, char *res_string, size_t len) int ata_command_sbuf(struct ccb_ataio *ataio, struct sbuf *sb) { - char cmd_str[(12 * 3) + 1]; - sbuf_printf(sb, "%s. ACB: %s", - ata_op_string(&ataio->cmd), - ata_cmd_string(&ataio->cmd, cmd_str, sizeof(cmd_str))); + sbuf_printf(sb, "%s. ACB: ", + ata_op_string(&ataio->cmd)); + ata_cmd_sbuf(&ataio->cmd, sb); return(0); } diff --git a/sys/cam/ata/ata_all.h b/sys/cam/ata/ata_all.h index 91e941c..849640f 100644 --- a/sys/cam/ata/ata_all.h +++ b/sys/cam/ata/ata_all.h @@ -103,6 +103,7 @@ int ata_version(int ver); char * ata_op_string(struct ata_cmd *cmd); char * ata_cmd_string(struct ata_cmd *cmd, char *cmd_string, size_t len); +void ata_cmd_sbuf(struct ata_cmd *cmd, struct sbuf *sb); char * ata_res_string(struct ata_res *res, char *res_string, size_t len); int ata_command_sbuf(struct ccb_ataio *ataio, struct sbuf *sb); int ata_status_sbuf(struct ccb_ataio *ataio, struct sbuf *sb); diff --git a/sys/cam/cam_periph.c b/sys/cam/cam_periph.c index 625be21..d8940a0 100644 --- a/sys/cam/cam_periph.c +++ b/sys/cam/cam_periph.c @@ -86,6 +86,7 @@ static int camperiphscsisenseerror(union ccb *ccb, u_int32_t *timeout, u_int32_t *action, const char **action_string); +static void cam_periph_devctl_notify(union ccb *ccb); static int nperiph_drivers; static int initialized = 0; @@ -1615,7 +1616,7 @@ cam_periph_error(union ccb *ccb, cam_flags camflags, struct cam_periph *periph; const char *action_string; cam_status status; - int frozen, error, openings; + int frozen, error, openings, devctl_err; u_int32_t action, relsim_flags, timeout; action = SSQ_PRINT_SENSE; @@ -1624,9 +1625,26 @@ cam_periph_error(union ccb *ccb, cam_flags camflags, status = ccb->ccb_h.status; frozen = (status & CAM_DEV_QFRZN) != 0; status &= CAM_STATUS_MASK; - openings = relsim_flags = timeout = 0; + devctl_err = openings = relsim_flags = timeout = 0; orig_ccb = ccb; + /* Filter the errors that should be reported via devctl */ + switch (ccb->ccb_h.status & CAM_STATUS_MASK) { + case CAM_CMD_TIMEOUT: + case CAM_REQ_ABORTED: + case CAM_REQ_CMP_ERR: + case CAM_REQ_TERMIO: + case CAM_UNREC_HBA_ERROR: + case CAM_DATA_RUN_ERR: + case CAM_SCSI_STATUS_ERROR: + case CAM_ATA_STATUS_ERROR: + case CAM_SMP_STATUS_ERROR: + devctl_err++; + break; + default: + break; + } + switch (status) { case CAM_REQ_CMP: error = 0; @@ -1754,6 +1772,9 @@ cam_periph_error(union ccb *ccb, cam_flags camflags, xpt_print(ccb->ccb_h.path, "Retrying command\n"); } + if (devctl_err) + cam_periph_devctl_notify(orig_ccb); + if ((action & SSQ_LOST) != 0) { lun_id_t lun_id; @@ -1824,3 +1845,83 @@ cam_periph_error(union ccb *ccb, cam_flags camflags, return (error); } + +#define CAM_PERIPH_DEVD_MSG_SIZE 256 + +static void +cam_periph_devctl_notify(union ccb *ccb) +{ + struct cam_periph *periph; + struct ccb_getdev *cgd; + struct sbuf sb; + int serr, sk, asc, ascq; + char *sbmsg, *type; + + sbmsg = malloc(CAM_PERIPH_DEVD_MSG_SIZE, M_CAMPERIPH, M_NOWAIT); + if (sbmsg == NULL) + return; + + sbuf_new(&sb, sbmsg, CAM_PERIPH_DEVD_MSG_SIZE, SBUF_FIXEDLEN); + + periph = xpt_path_periph(ccb->ccb_h.path); + sbuf_printf(&sb, "device=%s%d ", periph->periph_name, + periph->unit_number); + + sbuf_printf(&sb, "serial=\""); + if ((cgd = (struct ccb_getdev *)xpt_alloc_ccb_nowait()) != NULL) { + xpt_setup_ccb(&cgd->ccb_h, ccb->ccb_h.path, + CAM_PRIORITY_NORMAL); + cgd->ccb_h.func_code = XPT_GDEV_TYPE; + xpt_action((union ccb *)cgd); + + if (cgd->ccb_h.status == CAM_REQ_CMP) + sbuf_bcat(&sb, cgd->serial_num, cgd->serial_num_len); + } + sbuf_printf(&sb, "\" "); + sbuf_printf(&sb, "cam_status=\"0x%x\" ", ccb->ccb_h.status); + + switch (ccb->ccb_h.status & CAM_STATUS_MASK) { + case CAM_CMD_TIMEOUT: + sbuf_printf(&sb, "timeout=%d ", ccb->ccb_h.timeout); + type = "timeout"; + break; + case CAM_SCSI_STATUS_ERROR: + sbuf_printf(&sb, "scsi_status=%d ", ccb->csio.scsi_status); + if (scsi_extract_sense_ccb(ccb, &serr, &sk, &asc, &ascq)) + sbuf_printf(&sb, "scsi_sense=\"%02x %02x %02x %02x\" ", + serr, sk, asc, ascq); + type = "error"; + break; + case CAM_ATA_STATUS_ERROR: + { + char res_str[(11 * 3) + 1]; + + sbuf_printf(&sb, "RES=\"%s\" ", ata_res_string(&ccb->ataio.res, + res_str, sizeof(res_str))); + type = "error"; + break; + } + default: + type = "error"; + break; + } + + if (ccb->ccb_h.func_code == XPT_SCSI_IO) { + sbuf_printf(&sb, "CDB=\""); + if ((ccb->ccb_h.flags & CAM_CDB_POINTER) != 0) + scsi_cdb_sbuf(ccb->csio.cdb_io.cdb_ptr, &sb); + else + scsi_cdb_sbuf(ccb->csio.cdb_io.cdb_bytes, &sb); + sbuf_printf(&sb, "\" "); + } else if (ccb->ccb_h.func_code == XPT_ATA_IO) { + sbuf_printf(&sb, "ACB=\""); + ata_cmd_sbuf(&ccb->ataio.cmd, &sb); + sbuf_printf(&sb, "\" "); + } + + if (sbuf_finish(&sb) == 0) + devctl_notify("CAM", "periph", type, sbuf_data(&sb)); + sbuf_delete(&sb); + free(sbmsg, M_CAMPERIPH); +} + diff --git a/sys/cam/scsi/scsi_all.c b/sys/cam/scsi/scsi_all.c index 023103b..3a073ee 100644 --- a/sys/cam/scsi/scsi_all.c +++ b/sys/cam/scsi/scsi_all.c @@ -3464,14 +3464,32 @@ scsi_error_action(struct ccb_scsiio *csio, struct scsi_inquiry_data *inq_data, char * scsi_cdb_string(u_int8_t *cdb_ptr, char *cdb_string, size_t len) { + struct sbuf sb; + int error; + + if (len == 0) + return (""); + + sbuf_new(&sb, cdb_string, len, SBUF_FIXEDLEN); + + scsi_cdb_sbuf(cdb_ptr, &sb); + + /* ENOMEM just means that the fixed buffer is full, OK to ignore */ + error = sbuf_finish(&sb); + if (error != 0 && error != ENOMEM) + return (""); + + return(sbuf_data(&sb)); +} + +void +scsi_cdb_sbuf(u_int8_t *cdb_ptr, struct sbuf *sb) +{ u_int8_t cdb_len; int i; if (cdb_ptr == NULL) - return(""); - - /* Silence warnings */ - cdb_len = 0; + return; /* * This is taken from the SCSI-3 draft spec. @@ -3508,12 +3526,11 @@ scsi_cdb_string(u_int8_t *cdb_ptr, char *cdb_string, size_t len) cdb_len = 12; break; } - *cdb_string = '\0'; + for (i = 0; i < cdb_len; i++) - snprintf(cdb_string + strlen(cdb_string), - len - strlen(cdb_string), "%02hhx ", cdb_ptr[i]); + sbuf_printf(sb, "%02hhx ", cdb_ptr[i]); - return(cdb_string); + return; } const char * @@ -3562,7 +3579,6 @@ scsi_command_string(struct cam_device *device, struct ccb_scsiio *csio, #endif /* _KERNEL/!_KERNEL */ { struct scsi_inquiry_data *inq_data; - char cdb_str[(SCSI_MAX_CDBLEN * 3) + 1]; #ifdef _KERNEL struct ccb_getdev *cgd; #endif /* _KERNEL */ @@ -3595,15 +3611,13 @@ scsi_command_string(struct cam_device *device, struct ccb_scsiio *csio, #endif /* _KERNEL/!_KERNEL */ if ((csio->ccb_h.flags & CAM_CDB_POINTER) != 0) { - sbuf_printf(sb, "%s. CDB: %s", - scsi_op_desc(csio->cdb_io.cdb_ptr[0], inq_data), - scsi_cdb_string(csio->cdb_io.cdb_ptr, cdb_str, - sizeof(cdb_str))); + sbuf_printf(sb, "%s. CDB: ", + scsi_op_desc(csio->cdb_io.cdb_ptr[0], inq_data)); + scsi_cdb_sbuf(csio->cdb_io.cdb_ptr, sb); } else { - sbuf_printf(sb, "%s. CDB: %s", - scsi_op_desc(csio->cdb_io.cdb_bytes[0], inq_data), - scsi_cdb_string(csio->cdb_io.cdb_bytes, cdb_str, - sizeof(cdb_str))); + sbuf_printf(sb, "%s. CDB: ", + scsi_op_desc(csio->cdb_io.cdb_bytes[0], inq_data)); + scsi_cdb_sbuf(csio->cdb_io.cdb_bytes, sb); } #ifdef _KERNEL diff --git a/sys/cam/scsi/scsi_all.h b/sys/cam/scsi/scsi_all.h index f2b4b21..615a42a 100644 --- a/sys/cam/scsi/scsi_all.h +++ b/sys/cam/scsi/scsi_all.h @@ -3644,6 +3644,7 @@ const char * scsi_op_desc(u_int16_t opcode, struct scsi_inquiry_data *inq_data); char * scsi_cdb_string(u_int8_t *cdb_ptr, char *cdb_string, size_t len); +void scsi_cdb_sbuf(u_int8_t *cdb_ptr, struct sbuf *sb); void scsi_print_inquiry(struct scsi_inquiry_data *inq_data); void scsi_print_inquiry_short(struct scsi_inquiry_data *inq_data); diff --git a/sys/conf/NOTES b/sys/conf/NOTES index a977898..7b5c725 100644 --- a/sys/conf/NOTES +++ b/sys/conf/NOTES @@ -699,7 +699,9 @@ options ALTQ options ALTQ_CBQ # Class Based Queueing options ALTQ_RED # Random Early Detection options ALTQ_RIO # RED In/Out +options ALTQ_CODEL # CoDel Active Queueing options ALTQ_HFSC # Hierarchical Packet Scheduler +options ALTQ_FAIRQ # Fair Packet Scheduler options ALTQ_CDNR # Traffic conditioner options ALTQ_PRIQ # Priority Queueing options ALTQ_NOPCC # Required if the TSC is unusable diff --git a/sys/conf/files b/sys/conf/files index 23e0353..945bfe9 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -252,6 +252,8 @@ compat/freebsd32/freebsd32_syscalls.c optional compat_freebsd32 compat/freebsd32/freebsd32_sysent.c optional compat_freebsd32 contrib/altq/altq/altq_cbq.c optional altq contrib/altq/altq/altq_cdnr.c optional altq +contrib/altq/altq/altq_codel.c optional altq +contrib/altq/altq/altq_fairq.c optional altq contrib/altq/altq/altq_hfsc.c optional altq contrib/altq/altq/altq_priq.c optional altq contrib/altq/altq/altq_red.c optional altq diff --git a/sys/conf/options b/sys/conf/options index 0699c2c..4f1ab3a 100644 --- a/sys/conf/options +++ b/sys/conf/options @@ -384,7 +384,9 @@ ACCEPT_FILTER_HTTP ALTQ opt_global.h ALTQ_CBQ opt_altq.h ALTQ_CDNR opt_altq.h +ALTQ_CODEL opt_altq.h ALTQ_DEBUG opt_altq.h +ALTQ_FAIRQ opt_altq.h ALTQ_HFSC opt_altq.h ALTQ_NOPCC opt_altq.h ALTQ_PRIQ opt_altq.h diff --git a/sys/contrib/altq/altq/altq.h b/sys/contrib/altq/altq/altq.h index c740ed3..4ef6cd2 100644 --- a/sys/contrib/altq/altq/altq.h +++ b/sys/contrib/altq/altq/altq.h @@ -63,7 +63,9 @@ #define ALTQT_BLUE 10 /* blue */ #define ALTQT_PRIQ 11 /* priority queue */ #define ALTQT_JOBS 12 /* JoBS */ -#define ALTQT_MAX 13 /* should be max discipline type + 1 */ +#define ALTQT_FAIRQ 13 /* fairq */ +#define ALTQT_CODEL 14 /* CoDel */ +#define ALTQT_MAX 15 /* should be max discipline type + 1 */ #ifdef ALTQ3_COMPAT struct altqreq { diff --git a/sys/contrib/altq/altq/altq_cbq.c b/sys/contrib/altq/altq/altq_cbq.c index 3991d1d..13f9721 100644 --- a/sys/contrib/altq/altq/altq_cbq.c +++ b/sys/contrib/altq/altq/altq_cbq.c @@ -241,6 +241,10 @@ get_class_stats(class_stats_t *statsp, struct rm_class *cl) if (q_is_rio(cl->q_)) rio_getstats((rio_t *)cl->red_, &statsp->red[0]); #endif +#ifdef ALTQ_CODEL + if (q_is_codel(cl->q_)) + codel_getstats(cl->codel_, &statsp->codel); +#endif } int diff --git a/sys/contrib/altq/altq/altq_cbq.h b/sys/contrib/altq/altq/altq_cbq.h index 76096af..f0d9172 100644 --- a/sys/contrib/altq/altq/altq_cbq.h +++ b/sys/contrib/altq/altq/altq_cbq.h @@ -34,6 +34,7 @@ #define _ALTQ_ALTQ_CBQ_H_ #include <altq/altq.h> +#include <altq/altq_codel.h> #include <altq/altq_rmclass.h> #include <altq/altq_red.h> #include <altq/altq_rio.h> @@ -51,6 +52,7 @@ extern "C" { #define CBQCLF_FLOWVALVE 0x0008 /* use flowvalve (aka penalty-box) */ #define CBQCLF_CLEARDSCP 0x0010 /* clear diffserv codepoint */ #define CBQCLF_BORROW 0x0020 /* borrow from parent */ +#define CBQCLF_CODEL 0x0040 /* use CoDel */ /* class flags only for root class */ #define CBQCLF_WRR 0x0100 /* weighted-round robin */ @@ -90,9 +92,10 @@ typedef struct _cbq_class_stats_ { int qcnt; /* # packets in queue */ int avgidle; - /* red and rio related info */ + /* codel, red and rio related info */ int qtype; struct redstats red[3]; + struct codel_stats codel; } class_stats_t; #ifdef ALTQ3_COMPAT diff --git a/sys/contrib/altq/altq/altq_classq.h b/sys/contrib/altq/altq/altq_classq.h index dc5c646..daec994 100644 --- a/sys/contrib/altq/altq/altq_classq.h +++ b/sys/contrib/altq/altq/altq_classq.h @@ -49,6 +49,7 @@ extern "C" { #define Q_RED 0x01 #define Q_RIO 0x02 #define Q_DROPTAIL 0x03 +#define Q_CODEL 0x04 #ifdef _KERNEL @@ -59,6 +60,7 @@ struct _class_queue_ { struct mbuf *tail_; /* Tail of packet queue */ int qlen_; /* Queue length (in number of packets) */ int qlim_; /* Queue limit (in number of packets*) */ + int qsize_; /* Queue size (in number of bytes*) */ int qtype_; /* Queue type */ }; @@ -67,10 +69,12 @@ typedef struct _class_queue_ class_queue_t; #define qtype(q) (q)->qtype_ /* Get queue type */ #define qlimit(q) (q)->qlim_ /* Max packets to be queued */ #define qlen(q) (q)->qlen_ /* Current queue length. */ +#define qsize(q) (q)->qsize_ /* Current queue size. */ #define qtail(q) (q)->tail_ /* Tail of the queue */ #define qhead(q) ((q)->tail_ ? (q)->tail_->m_nextpkt : NULL) #define qempty(q) ((q)->qlen_ == 0) /* Is the queue empty?? */ +#define q_is_codel(q) ((q)->qtype_ == Q_CODEL) /* Is the queue a codel queue */ #define q_is_red(q) ((q)->qtype_ == Q_RED) /* Is the queue a red queue */ #define q_is_rio(q) ((q)->qtype_ == Q_RIO) /* Is the queue a rio queue */ #define q_is_red_or_rio(q) ((q)->qtype_ == Q_RED || (q)->qtype_ == Q_RIO) @@ -100,6 +104,7 @@ _addq(class_queue_t *q, struct mbuf *m) m0->m_nextpkt = m; qtail(q) = m; qlen(q)++; + qsize(q) += m_pktlen(m); } static __inline struct mbuf * @@ -114,6 +119,7 @@ _getq(class_queue_t *q) else qtail(q) = NULL; qlen(q)--; + qsize(q) -= m_pktlen(m0); m0->m_nextpkt = NULL; return (m0); } diff --git a/sys/contrib/altq/altq/altq_codel.c b/sys/contrib/altq/altq/altq_codel.c new file mode 100644 index 0000000..45c5ca6 --- /dev/null +++ b/sys/contrib/altq/altq/altq_codel.c @@ -0,0 +1,477 @@ +/* + * CoDel - The Controlled-Delay Active Queue Management algorithm + * + * Copyright (C) 2013 Ermal Luçi <eri@FreeBSD.org> + * Copyright (C) 2011-2012 Kathleen Nichols <nichols@pollere.com> + * Copyright (C) 2011-2012 Van Jacobson <van@pollere.net> + * Copyright (C) 2012 Michael D. Taht <dave.taht@bufferbloat.net> + * Copyright (C) 2012 Eric Dumazet <edumazet@google.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The names of the authors may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * Alternatively, provided that this notice is retained in full, this + * software may be distributed under the terms of the GNU General + * Public License ("GPL") version 2, in which case the provisions of the + * GPL apply INSTEAD OF those given above. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * $FreeBSD$ + */ +#include "opt_altq.h" +#include "opt_inet.h" +#include "opt_inet6.h" + +#ifdef ALTQ_CODEL /* CoDel is enabled by ALTQ_CODEL option in opt_altq.h */ + +#include <sys/param.h> +#include <sys/malloc.h> +#include <sys/mbuf.h> +#include <sys/socket.h> +#include <sys/systm.h> + +#include <net/if.h> +#include <net/if_var.h> +#include <netinet/in.h> + +#include <netpfil/pf/pf.h> +#include <netpfil/pf/pf_altq.h> +#include <altq/if_altq.h> +#include <altq/altq.h> +#include <altq/altq_codel.h> + +static int codel_should_drop(struct codel *, class_queue_t *, + struct mbuf *, u_int64_t); +static void codel_Newton_step(struct codel_vars *); +static u_int64_t codel_control_law(u_int64_t t, u_int64_t, u_int32_t); + +#define codel_time_after(a, b) ((int64_t)(a) - (int64_t)(b) > 0) +#define codel_time_after_eq(a, b) ((int64_t)(a) - (int64_t)(b) >= 0) +#define codel_time_before(a, b) ((int64_t)(a) - (int64_t)(b) < 0) +#define codel_time_before_eq(a, b) ((int64_t)(a) - (int64_t)(b) <= 0) + +static int codel_request(struct ifaltq *, int, void *); + +static int codel_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *); +static struct mbuf *codel_dequeue(struct ifaltq *, int); + +int +codel_pfattach(struct pf_altq *a) +{ + struct ifnet *ifp; + + if ((ifp = ifunit(a->ifname)) == NULL || a->altq_disc == NULL) + return (EINVAL); + + return (altq_attach(&ifp->if_snd, ALTQT_CODEL, a->altq_disc, + codel_enqueue, codel_dequeue, codel_request, NULL, NULL)); +} + +int +codel_add_altq(struct pf_altq *a) +{ + struct codel_if *cif; + struct ifnet *ifp; + struct codel_opts *opts; + + if ((ifp = ifunit(a->ifname)) == NULL) + return (EINVAL); + if (!ALTQ_IS_READY(&ifp->if_snd)) + return (ENODEV); + + opts = &a->pq_u.codel_opts; + + cif = malloc(sizeof(struct codel_if), M_DEVBUF, M_NOWAIT | M_ZERO); + if (cif == NULL) + return (ENOMEM); + cif->cif_bandwidth = a->ifbandwidth; + cif->cif_ifq = &ifp->if_snd; + + cif->cl_q = malloc(sizeof(class_queue_t), M_DEVBUF, M_NOWAIT | M_ZERO); + if (cif->cl_q == NULL) { + free(cif, M_DEVBUF); + return (ENOMEM); + } + + if (a->qlimit == 0) + a->qlimit = 50; /* use default. */ + qlimit(cif->cl_q) = a->qlimit; + qtype(cif->cl_q) = Q_CODEL; + qlen(cif->cl_q) = 0; + qsize(cif->cl_q) = 0; + + if (opts->target == 0) + opts->target = 5; + if (opts->interval == 0) + opts->interval = 100; + cif->codel.params.target = machclk_freq * opts->target / 1000; + cif->codel.params.interval = machclk_freq * opts->interval / 1000; + cif->codel.params.ecn = opts->ecn; + cif->codel.stats.maxpacket = 256; + + cif->cl_stats.qlength = qlen(cif->cl_q); + cif->cl_stats.qlimit = qlimit(cif->cl_q); + + /* keep the state in pf_altq */ + a->altq_disc = cif; + + return (0); +} + +int +codel_remove_altq(struct pf_altq *a) +{ + struct codel_if *cif; + + if ((cif = a->altq_disc) == NULL) + return (EINVAL); + a->altq_disc = NULL; + + if (cif->cl_q) + free(cif->cl_q, M_DEVBUF); + free(cif, M_DEVBUF); + + return (0); +} + +int +codel_getqstats(struct pf_altq *a, void *ubuf, int *nbytes) +{ + struct codel_if *cif; + struct codel_ifstats stats; + int error = 0; + + if ((cif = altq_lookup(a->ifname, ALTQT_CODEL)) == NULL) + return (EBADF); + + if (*nbytes < sizeof(stats)) + return (EINVAL); + + stats = cif->cl_stats; + stats.stats = cif->codel.stats; + + if ((error = copyout((caddr_t)&stats, ubuf, sizeof(stats))) != 0) + return (error); + *nbytes = sizeof(stats); + + return (0); +} + +static int +codel_request(struct ifaltq *ifq, int req, void *arg) +{ + struct codel_if *cif = (struct codel_if *)ifq->altq_disc; + struct mbuf *m; + + IFQ_LOCK_ASSERT(ifq); + + switch (req) { + case ALTRQ_PURGE: + if (!ALTQ_IS_ENABLED(cif->cif_ifq)) + break; + + if (qempty(cif->cl_q)) + break; + + while ((m = _getq(cif->cl_q)) != NULL) { + PKTCNTR_ADD(&cif->cl_stats.cl_dropcnt, m_pktlen(m)); + m_freem(m); + IFQ_DEC_LEN(cif->cif_ifq); + } + cif->cif_ifq->ifq_len = 0; + break; + } + + return (0); +} + +static int +codel_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr) +{ + + struct codel_if *cif = (struct codel_if *) ifq->altq_disc; + + IFQ_LOCK_ASSERT(ifq); + + /* grab class set by classifier */ + if ((m->m_flags & M_PKTHDR) == 0) { + /* should not happen */ + printf("altq: packet for %s does not have pkthdr\n", + ifq->altq_ifp->if_xname); + m_freem(m); + PKTCNTR_ADD(&cif->cl_stats.cl_dropcnt, m_pktlen(m)); + return (ENOBUFS); + } + + if (codel_addq(&cif->codel, cif->cl_q, m)) { + PKTCNTR_ADD(&cif->cl_stats.cl_dropcnt, m_pktlen(m)); + return (ENOBUFS); + } + IFQ_INC_LEN(ifq); + + return (0); +} + +static struct mbuf * +codel_dequeue(struct ifaltq *ifq, int op) +{ + struct codel_if *cif = (struct codel_if *)ifq->altq_disc; + struct mbuf *m; + + IFQ_LOCK_ASSERT(ifq); + + if (IFQ_IS_EMPTY(ifq)) + return (NULL); + + if (op == ALTDQ_POLL) + return (qhead(cif->cl_q)); + + + m = codel_getq(&cif->codel, cif->cl_q); + if (m != NULL) { + IFQ_DEC_LEN(ifq); + PKTCNTR_ADD(&cif->cl_stats.cl_xmitcnt, m_pktlen(m)); + return (m); + } + + return (NULL); +} + +struct codel * +codel_alloc(int target, int interval, int ecn) +{ + struct codel *c; + + c = malloc(sizeof(*c), M_DEVBUF, M_NOWAIT | M_ZERO); + if (c != NULL) { + c->params.target = machclk_freq * target / 1000; + c->params.interval = machclk_freq * interval / 1000; + c->params.ecn = ecn; + c->stats.maxpacket = 256; + } + + return (c); +} + +void +codel_destroy(struct codel *c) +{ + + free(c, M_DEVBUF); +} + +#define MTAG_CODEL 1438031249 +int +codel_addq(struct codel *c, class_queue_t *q, struct mbuf *m) +{ + struct m_tag *mtag; + uint64_t *enqueue_time; + + if (qlen(q) < qlimit(q)) { + mtag = m_tag_locate(m, MTAG_CODEL, 0, NULL); + if (mtag == NULL) + mtag = m_tag_alloc(MTAG_CODEL, 0, sizeof(uint64_t), + M_NOWAIT); + if (mtag == NULL) { + m_freem(m); + return (-1); + } + enqueue_time = (uint64_t *)(mtag + 1); + *enqueue_time = read_machclk(); + m_tag_prepend(m, mtag); + _addq(q, m); + return (0); + } + c->drop_overlimit++; + m_freem(m); + + return (-1); +} + +static int +codel_should_drop(struct codel *c, class_queue_t *q, struct mbuf *m, + u_int64_t now) +{ + struct m_tag *mtag; + uint64_t *enqueue_time; + + if (m == NULL) { + c->vars.first_above_time = 0; + return (0); + } + + mtag = m_tag_locate(m, MTAG_CODEL, 0, NULL); + if (mtag == NULL) { + /* Only one warning per second. */ + if (ppsratecheck(&c->last_log, &c->last_pps, 1)) + printf("%s: could not found the packet mtag!\n", + __func__); + c->vars.first_above_time = 0; + return (0); + } + enqueue_time = (uint64_t *)(mtag + 1); + c->vars.ldelay = now - *enqueue_time; + c->stats.maxpacket = MAX(c->stats.maxpacket, m_pktlen(m)); + + if (codel_time_before(c->vars.ldelay, c->params.target) || + qsize(q) <= c->stats.maxpacket) { + /* went below - stay below for at least interval */ + c->vars.first_above_time = 0; + return (0); + } + if (c->vars.first_above_time == 0) { + /* just went above from below. If we stay above + * for at least interval we'll say it's ok to drop + */ + c->vars.first_above_time = now + c->params.interval; + return (0); + } + if (codel_time_after(now, c->vars.first_above_time)) + return (1); + + return (0); +} + +/* + * Run a Newton method step: + * new_invsqrt = (invsqrt / 2) * (3 - count * invsqrt^2) + * + * Here, invsqrt is a fixed point number (< 1.0), 32bit mantissa, aka Q0.32 + */ +static void +codel_Newton_step(struct codel_vars *vars) +{ + uint32_t invsqrt, invsqrt2; + uint64_t val; + +/* sizeof_in_bits(rec_inv_sqrt) */ +#define REC_INV_SQRT_BITS (8 * sizeof(u_int16_t)) +/* needed shift to get a Q0.32 number from rec_inv_sqrt */ +#define REC_INV_SQRT_SHIFT (32 - REC_INV_SQRT_BITS) + + invsqrt = ((u_int32_t)vars->rec_inv_sqrt) << REC_INV_SQRT_SHIFT; + invsqrt2 = ((u_int64_t)invsqrt * invsqrt) >> 32; + val = (3LL << 32) - ((u_int64_t)vars->count * invsqrt2); + val >>= 2; /* avoid overflow in following multiply */ + val = (val * invsqrt) >> (32 - 2 + 1); + + vars->rec_inv_sqrt = val >> REC_INV_SQRT_SHIFT; +} + +static u_int64_t +codel_control_law(u_int64_t t, u_int64_t interval, u_int32_t rec_inv_sqrt) +{ + + return (t + (u_int32_t)(((u_int64_t)interval * + (rec_inv_sqrt << REC_INV_SQRT_SHIFT)) >> 32)); +} + +struct mbuf * +codel_getq(struct codel *c, class_queue_t *q) +{ + struct mbuf *m; + u_int64_t now; + int drop; + + if ((m = _getq(q)) == NULL) { + c->vars.dropping = 0; + return (m); + } + + now = read_machclk(); + drop = codel_should_drop(c, q, m, now); + if (c->vars.dropping) { + if (!drop) { + /* sojourn time below target - leave dropping state */ + c->vars.dropping = 0; + } else if (codel_time_after_eq(now, c->vars.drop_next)) { + /* It's time for the next drop. Drop the current + * packet and dequeue the next. The dequeue might + * take us out of dropping state. + * If not, schedule the next drop. + * A large backlog might result in drop rates so high + * that the next drop should happen now, + * hence the while loop. + */ + while (c->vars.dropping && + codel_time_after_eq(now, c->vars.drop_next)) { + c->vars.count++; /* don't care of possible wrap + * since there is no more + * divide */ + codel_Newton_step(&c->vars); + /* TODO ECN */ + PKTCNTR_ADD(&c->stats.drop_cnt, m_pktlen(m)); + m_freem(m); + m = _getq(q); + if (!codel_should_drop(c, q, m, now)) + /* leave dropping state */ + c->vars.dropping = 0; + else + /* and schedule the next drop */ + c->vars.drop_next = + codel_control_law(c->vars.drop_next, + c->params.interval, + c->vars.rec_inv_sqrt); + } + } + } else if (drop) { + /* TODO ECN */ + PKTCNTR_ADD(&c->stats.drop_cnt, m_pktlen(m)); + m_freem(m); + + m = _getq(q); + drop = codel_should_drop(c, q, m, now); + + c->vars.dropping = 1; + /* if min went above target close to when we last went below it + * assume that the drop rate that controlled the queue on the + * last cycle is a good starting point to control it now. + */ + if (codel_time_before(now - c->vars.drop_next, + 16 * c->params.interval)) { + c->vars.count = (c->vars.count - c->vars.lastcount) | 1; + /* we dont care if rec_inv_sqrt approximation + * is not very precise : + * Next Newton steps will correct it quadratically. + */ + codel_Newton_step(&c->vars); + } else { + c->vars.count = 1; + c->vars.rec_inv_sqrt = ~0U >> REC_INV_SQRT_SHIFT; + } + c->vars.lastcount = c->vars.count; + c->vars.drop_next = codel_control_law(now, c->params.interval, + c->vars.rec_inv_sqrt); + } + + return (m); +} + +void +codel_getstats(struct codel *c, struct codel_stats *s) +{ + *s = c->stats; +} + +#endif /* ALTQ_CODEL */ diff --git a/sys/contrib/altq/altq/altq_codel.h b/sys/contrib/altq/altq/altq_codel.h new file mode 100644 index 0000000..991af42 --- /dev/null +++ b/sys/contrib/altq/altq/altq_codel.h @@ -0,0 +1,129 @@ +/* + * CoDel - The Controlled-Delay Active Queue Management algorithm + * + * Copyright (C) 2013 Ermal Luçi <eri@FreeBSD.org> + * Copyright (C) 2011-2012 Kathleen Nichols <nichols@pollere.com> + * Copyright (C) 2011-2012 Van Jacobson <van@pollere.net> + * Copyright (C) 2012 Michael D. Taht <dave.taht@bufferbloat.net> + * Copyright (C) 2012 Eric Dumazet <edumazet@google.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The names of the authors may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * Alternatively, provided that this notice is retained in full, this + * software may be distributed under the terms of the GNU General + * Public License ("GPL") version 2, in which case the provisions of the + * GPL apply INSTEAD OF those given above. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _ALTQ_ALTQ_CODEL_H_ +#define _ALTQ_ALTQ_CODEL_H_ + +struct codel_stats { + u_int32_t maxpacket; + struct pktcntr drop_cnt; + u_int marked_packets; +}; + +struct codel_ifstats { + u_int qlength; + u_int qlimit; + struct codel_stats stats; + struct pktcntr cl_xmitcnt; /* transmitted packet counter */ + struct pktcntr cl_dropcnt; /* dropped packet counter */ +}; + +#ifdef _KERNEL +#include <altq/altq_classq.h> + +/** + * struct codel_params - contains codel parameters + * <at> target: target queue size (in time units) + * <at> interval: width of moving time window + * <at> ecn: is Explicit Congestion Notification enabled + */ +struct codel_params { + u_int64_t target; + u_int64_t interval; + int ecn; +}; + +/** + * struct codel_vars - contains codel variables + * <at> count: how many drops we've done since the last time we + * entered dropping state + * <at> lastcount: count at entry to dropping state + * <at> dropping: set to true if in dropping state + * <at> rec_inv_sqrt: reciprocal value of sqrt(count) >> 1 + * <at> first_above_time: when we went (or will go) continuously above + * target for interval + * <at> drop_next: time to drop next packet, or when we dropped last + * <at> ldelay: sojourn time of last dequeued packet + */ +struct codel_vars { + u_int32_t count; + u_int32_t lastcount; + int dropping; + u_int16_t rec_inv_sqrt; + u_int64_t first_above_time; + u_int64_t drop_next; + u_int64_t ldelay; +}; + +struct codel { + int last_pps; + struct codel_params params; + struct codel_vars vars; + struct codel_stats stats; + struct timeval last_log; + u_int32_t drop_overlimit; +}; + +/* + * codel interface state + */ +struct codel_if { + struct codel_if *cif_next; /* interface state list */ + struct ifaltq *cif_ifq; /* backpointer to ifaltq */ + u_int cif_bandwidth; /* link bandwidth in bps */ + + class_queue_t *cl_q; /* class queue structure */ + struct codel codel; + + /* statistics */ + struct codel_ifstats cl_stats; +}; + +struct codel *codel_alloc(int, int, int); +void codel_destroy(struct codel *); +int codel_addq(struct codel *, class_queue_t *, struct mbuf *); +struct mbuf *codel_getq(struct codel *, class_queue_t *); +void codel_getstats(struct codel *, struct codel_stats *); + +#endif /* _KERNEL */ + +#endif /* _ALTQ_ALTQ_CODEL_H_ */ diff --git a/sys/contrib/altq/altq/altq_fairq.c b/sys/contrib/altq/altq/altq_fairq.c new file mode 100644 index 0000000..2189996 --- /dev/null +++ b/sys/contrib/altq/altq/altq_fairq.c @@ -0,0 +1,909 @@ +/* + * Copyright (c) 2008 The DragonFly Project. All rights reserved. + * + * This code is derived from software contributed to The DragonFly Project + * by Matthew Dillon <dillon@backplane.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. Neither the name of The DragonFly Project nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific, prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $DragonFly: src/sys/net/altq/altq_fairq.c,v 1.1 2008/04/06 18:58:15 dillon Exp $ + * $FreeBSD$ + */ +/* + * Matt: I gutted altq_priq.c and used it as a skeleton on which to build + * fairq. The fairq algorithm is completely different then priq, of course, + * but because I used priq's skeleton I believe I should include priq's + * copyright. + * + * Copyright (C) 2000-2003 + * Sony Computer Science Laboratories Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * FAIRQ - take traffic classified by keep state (hashed into + * mbuf->m_pkthdr.altq_state_hash) and bucketize it. Fairly extract + * the first packet from each bucket in a round-robin fashion. + * + * TODO - better overall qlimit support (right now it is per-bucket). + * - NOTE: red etc is per bucket, not overall. + * - better service curve support. + * + * EXAMPLE: + * + * altq on em0 fairq bandwidth 650Kb queue { std, bulk } + * queue std priority 3 bandwidth 400Kb \ + * fairq (buckets 64, default, hogs 1Kb) qlimit 50 + * queue bulk priority 2 bandwidth 100Kb \ + * fairq (buckets 64, hogs 1Kb) qlimit 50 + * + * pass out on em0 from any to any keep state queue std + * pass out on em0 inet proto tcp ..... port ... keep state queue bulk + */ +#include "opt_altq.h" +#include "opt_inet.h" +#include "opt_inet6.h" + +#ifdef ALTQ_FAIRQ /* fairq is enabled in the kernel conf */ + +#include <sys/param.h> +#include <sys/malloc.h> +#include <sys/mbuf.h> +#include <sys/socket.h> +#include <sys/sockio.h> +#include <sys/systm.h> +#include <sys/proc.h> +#include <sys/errno.h> +#include <sys/kernel.h> +#include <sys/queue.h> + +#include <net/if.h> +#include <net/if_var.h> +#include <netinet/in.h> + +#include <netpfil/pf/pf.h> +#include <netpfil/pf/pf_altq.h> +#include <netpfil/pf/pf_mtag.h> +#include <altq/altq.h> +#include <altq/altq_fairq.h> + +/* + * function prototypes + */ +static int fairq_clear_interface(struct fairq_if *); +static int fairq_request(struct ifaltq *, int, void *); +static void fairq_purge(struct fairq_if *); +static struct fairq_class *fairq_class_create(struct fairq_if *, int, int, u_int, struct fairq_opts *, int); +static int fairq_class_destroy(struct fairq_class *); +static int fairq_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *); +static struct mbuf *fairq_dequeue(struct ifaltq *, int); + +static int fairq_addq(struct fairq_class *, struct mbuf *, u_int32_t); +static struct mbuf *fairq_getq(struct fairq_class *, uint64_t); +static struct mbuf *fairq_pollq(struct fairq_class *, uint64_t, int *); +static fairq_bucket_t *fairq_selectq(struct fairq_class *, int); +static void fairq_purgeq(struct fairq_class *); + +static void get_class_stats(struct fairq_classstats *, struct fairq_class *); +static struct fairq_class *clh_to_clp(struct fairq_if *, uint32_t); + +int +fairq_pfattach(struct pf_altq *a) +{ + struct ifnet *ifp; + int error; + + if ((ifp = ifunit(a->ifname)) == NULL || a->altq_disc == NULL) + return (EINVAL); + + error = altq_attach(&ifp->if_snd, ALTQT_FAIRQ, a->altq_disc, + fairq_enqueue, fairq_dequeue, fairq_request, NULL, NULL); + + return (error); +} + +int +fairq_add_altq(struct pf_altq *a) +{ + struct fairq_if *pif; + struct ifnet *ifp; + + if ((ifp = ifunit(a->ifname)) == NULL) + return (EINVAL); + if (!ALTQ_IS_READY(&ifp->if_snd)) + return (ENODEV); + + + pif = malloc(sizeof(struct fairq_if), + M_DEVBUF, M_WAITOK | M_ZERO); + pif->pif_bandwidth = a->ifbandwidth; + pif->pif_maxpri = -1; + pif->pif_ifq = &ifp->if_snd; + + /* keep the state in pf_altq */ + a->altq_disc = pif; + + return (0); +} + +int +fairq_remove_altq(struct pf_altq *a) +{ + struct fairq_if *pif; + + if ((pif = a->altq_disc) == NULL) + return (EINVAL); + a->altq_disc = NULL; + + fairq_clear_interface(pif); + + free(pif, M_DEVBUF); + return (0); +} + +int +fairq_add_queue(struct pf_altq *a) +{ + struct fairq_if *pif; + struct fairq_class *cl; + + if ((pif = a->altq_disc) == NULL) + return (EINVAL); + + /* check parameters */ + if (a->priority >= FAIRQ_MAXPRI) + return (EINVAL); + if (a->qid == 0) + return (EINVAL); + if (pif->pif_classes[a->priority] != NULL) + return (EBUSY); + if (clh_to_clp(pif, a->qid) != NULL) + return (EBUSY); + + cl = fairq_class_create(pif, a->priority, a->qlimit, a->bandwidth, + &a->pq_u.fairq_opts, a->qid); + if (cl == NULL) + return (ENOMEM); + + return (0); +} + +int +fairq_remove_queue(struct pf_altq *a) +{ + struct fairq_if *pif; + struct fairq_class *cl; + + if ((pif = a->altq_disc) == NULL) + return (EINVAL); + + if ((cl = clh_to_clp(pif, a->qid)) == NULL) + return (EINVAL); + + return (fairq_class_destroy(cl)); +} + +int +fairq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes) +{ + struct fairq_if *pif; + struct fairq_class *cl; + struct fairq_classstats stats; + int error = 0; + + if ((pif = altq_lookup(a->ifname, ALTQT_FAIRQ)) == NULL) + return (EBADF); + + if ((cl = clh_to_clp(pif, a->qid)) == NULL) + return (EINVAL); + + if (*nbytes < sizeof(stats)) + return (EINVAL); + + get_class_stats(&stats, cl); + + if ((error = copyout((caddr_t)&stats, ubuf, sizeof(stats))) != 0) + return (error); + *nbytes = sizeof(stats); + return (0); +} + +/* + * bring the interface back to the initial state by discarding + * all the filters and classes. + */ +static int +fairq_clear_interface(struct fairq_if *pif) +{ + struct fairq_class *cl; + int pri; + + /* clear out the classes */ + for (pri = 0; pri <= pif->pif_maxpri; pri++) { + if ((cl = pif->pif_classes[pri]) != NULL) + fairq_class_destroy(cl); + } + + return (0); +} + +static int +fairq_request(struct ifaltq *ifq, int req, void *arg) +{ + struct fairq_if *pif = (struct fairq_if *)ifq->altq_disc; + + IFQ_LOCK_ASSERT(ifq); + + switch (req) { + case ALTRQ_PURGE: + fairq_purge(pif); + break; + } + return (0); +} + +/* discard all the queued packets on the interface */ +static void +fairq_purge(struct fairq_if *pif) +{ + struct fairq_class *cl; + int pri; + + for (pri = 0; pri <= pif->pif_maxpri; pri++) { + if ((cl = pif->pif_classes[pri]) != NULL && cl->cl_head) + fairq_purgeq(cl); + } + if (ALTQ_IS_ENABLED(pif->pif_ifq)) + pif->pif_ifq->ifq_len = 0; +} + +static struct fairq_class * +fairq_class_create(struct fairq_if *pif, int pri, int qlimit, + u_int bandwidth, struct fairq_opts *opts, int qid) +{ + struct fairq_class *cl; + int flags = opts->flags; + u_int nbuckets = opts->nbuckets; + int i; + +#ifndef ALTQ_RED + if (flags & FARF_RED) { +#ifdef ALTQ_DEBUG + printf("fairq_class_create: RED not configured for FAIRQ!\n"); +#endif + return (NULL); + } +#endif +#ifndef ALTQ_CODEL + if (flags & FARF_CODEL) { +#ifdef ALTQ_DEBUG + printf("fairq_class_create: CODEL not configured for FAIRQ!\n"); +#endif + return (NULL); + } +#endif + if (nbuckets == 0) + nbuckets = 256; + if (nbuckets > FAIRQ_MAX_BUCKETS) + nbuckets = FAIRQ_MAX_BUCKETS; + /* enforce power-of-2 size */ + while ((nbuckets ^ (nbuckets - 1)) != ((nbuckets << 1) - 1)) + ++nbuckets; + + if ((cl = pif->pif_classes[pri]) != NULL) { + /* modify the class instead of creating a new one */ + IFQ_LOCK(cl->cl_pif->pif_ifq); + if (cl->cl_head) + fairq_purgeq(cl); + IFQ_UNLOCK(cl->cl_pif->pif_ifq); +#ifdef ALTQ_RIO + if (cl->cl_qtype == Q_RIO) + rio_destroy((rio_t *)cl->cl_red); +#endif +#ifdef ALTQ_RED + if (cl->cl_qtype == Q_RED) + red_destroy(cl->cl_red); +#endif +#ifdef ALTQ_CODEL + if (cl->cl_qtype == Q_CODEL) + codel_destroy(cl->cl_codel); +#endif + } else { + cl = malloc(sizeof(struct fairq_class), + M_DEVBUF, M_WAITOK | M_ZERO); + cl->cl_nbuckets = nbuckets; + cl->cl_nbucket_mask = nbuckets - 1; + + cl->cl_buckets = malloc( + sizeof(struct fairq_bucket) * cl->cl_nbuckets, + M_DEVBUF, M_WAITOK | M_ZERO); + cl->cl_head = NULL; + } + + pif->pif_classes[pri] = cl; + if (flags & FARF_DEFAULTCLASS) + pif->pif_default = cl; + if (qlimit == 0) + qlimit = 50; /* use default */ + cl->cl_qlimit = qlimit; + for (i = 0; i < cl->cl_nbuckets; ++i) { + qlimit(&cl->cl_buckets[i].queue) = qlimit; + } + cl->cl_bandwidth = bandwidth / 8; + cl->cl_qtype = Q_DROPTAIL; + cl->cl_flags = flags & FARF_USERFLAGS; + cl->cl_pri = pri; + if (pri > pif->pif_maxpri) + pif->pif_maxpri = pri; + cl->cl_pif = pif; + cl->cl_handle = qid; + cl->cl_hogs_m1 = opts->hogs_m1 / 8; + cl->cl_lssc_m1 = opts->lssc_m1 / 8; /* NOT YET USED */ + +#ifdef ALTQ_RED + if (flags & (FARF_RED|FARF_RIO)) { + int red_flags, red_pkttime; + + red_flags = 0; + if (flags & FARF_ECN) + red_flags |= REDF_ECN; +#ifdef ALTQ_RIO + if (flags & FARF_CLEARDSCP) + red_flags |= RIOF_CLEARDSCP; +#endif + if (pif->pif_bandwidth < 8) + red_pkttime = 1000 * 1000 * 1000; /* 1 sec */ + else + red_pkttime = (int64_t)pif->pif_ifq->altq_ifp->if_mtu + * 1000 * 1000 * 1000 / (pif->pif_bandwidth / 8); +#ifdef ALTQ_RIO + if (flags & FARF_RIO) { + cl->cl_red = (red_t *)rio_alloc(0, NULL, + red_flags, red_pkttime); + if (cl->cl_red != NULL) + cl->cl_qtype = Q_RIO; + } else +#endif + if (flags & FARF_RED) { + cl->cl_red = red_alloc(0, 0, + cl->cl_qlimit * 10/100, + cl->cl_qlimit * 30/100, + red_flags, red_pkttime); + if (cl->cl_red != NULL) + cl->cl_qtype = Q_RED; + } + } +#endif /* ALTQ_RED */ +#ifdef ALTQ_CODEL + if (flags & FARF_CODEL) { + cl->cl_codel = codel_alloc(5, 100, 0); + if (cl->cl_codel != NULL) + cl->cl_qtype = Q_CODEL; + } +#endif + + return (cl); +} + +static int +fairq_class_destroy(struct fairq_class *cl) +{ + struct fairq_if *pif; + int pri; + + IFQ_LOCK(cl->cl_pif->pif_ifq); + + if (cl->cl_head) + fairq_purgeq(cl); + + pif = cl->cl_pif; + pif->pif_classes[cl->cl_pri] = NULL; + if (pif->pif_poll_cache == cl) + pif->pif_poll_cache = NULL; + if (pif->pif_maxpri == cl->cl_pri) { + for (pri = cl->cl_pri; pri >= 0; pri--) + if (pif->pif_classes[pri] != NULL) { + pif->pif_maxpri = pri; + break; + } + if (pri < 0) + pif->pif_maxpri = -1; + } + IFQ_UNLOCK(cl->cl_pif->pif_ifq); + + if (cl->cl_red != NULL) { +#ifdef ALTQ_RIO + if (cl->cl_qtype == Q_RIO) + rio_destroy((rio_t *)cl->cl_red); +#endif +#ifdef ALTQ_RED + if (cl->cl_qtype == Q_RED) + red_destroy(cl->cl_red); +#endif +#ifdef ALTQ_CODEL + if (cl->cl_qtype == Q_CODEL) + codel_destroy(cl->cl_codel); +#endif + } + free(cl->cl_buckets, M_DEVBUF); + free(cl, M_DEVBUF); + + return (0); +} + +/* + * fairq_enqueue is an enqueue function to be registered to + * (*altq_enqueue) in struct ifaltq. + */ +static int +fairq_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr) +{ + struct fairq_if *pif = (struct fairq_if *)ifq->altq_disc; + struct fairq_class *cl = NULL; /* Make compiler happy */ + struct pf_mtag *t; + u_int32_t qid_hash = 0; + int len; + + IFQ_LOCK_ASSERT(ifq); + + /* grab class set by classifier */ + if ((m->m_flags & M_PKTHDR) == 0) { + /* should not happen */ + printf("altq: packet for %s does not have pkthdr\n", + ifq->altq_ifp->if_xname); + m_freem(m); + return (ENOBUFS); + } + + if ((t = pf_find_mtag(m)) != NULL) { + cl = clh_to_clp(pif, t->qid); + qid_hash = t->qid_hash; + } + if (cl == NULL) { + cl = pif->pif_default; + if (cl == NULL) { + m_freem(m); + return (ENOBUFS); + } + } + cl->cl_flags |= FARF_HAS_PACKETS; + cl->cl_pktattr = NULL; + len = m_pktlen(m); + if (fairq_addq(cl, m, qid_hash) != 0) { + /* drop occurred. mbuf was freed in fairq_addq. */ + PKTCNTR_ADD(&cl->cl_dropcnt, len); + return (ENOBUFS); + } + IFQ_INC_LEN(ifq); + + return (0); +} + +/* + * fairq_dequeue is a dequeue function to be registered to + * (*altq_dequeue) in struct ifaltq. + * + * note: ALTDQ_POLL returns the next packet without removing the packet + * from the queue. ALTDQ_REMOVE is a normal dequeue operation. + * ALTDQ_REMOVE must return the same packet if called immediately + * after ALTDQ_POLL. + */ +static struct mbuf * +fairq_dequeue(struct ifaltq *ifq, int op) +{ + struct fairq_if *pif = (struct fairq_if *)ifq->altq_disc; + struct fairq_class *cl; + struct fairq_class *best_cl; + struct mbuf *best_m; + struct mbuf *m = NULL; + uint64_t cur_time = read_machclk(); + int pri; + int hit_limit; + + IFQ_LOCK_ASSERT(ifq); + + if (IFQ_IS_EMPTY(ifq)) { + return (NULL); + } + + if (pif->pif_poll_cache && op == ALTDQ_REMOVE) { + best_cl = pif->pif_poll_cache; + m = fairq_getq(best_cl, cur_time); + pif->pif_poll_cache = NULL; + if (m) { + IFQ_DEC_LEN(ifq); + PKTCNTR_ADD(&best_cl->cl_xmitcnt, m_pktlen(m)); + return (m); + } + } else { + best_cl = NULL; + best_m = NULL; + + for (pri = pif->pif_maxpri; pri >= 0; pri--) { + if ((cl = pif->pif_classes[pri]) == NULL) + continue; + if ((cl->cl_flags & FARF_HAS_PACKETS) == 0) + continue; + m = fairq_pollq(cl, cur_time, &hit_limit); + if (m == NULL) { + cl->cl_flags &= ~FARF_HAS_PACKETS; + continue; + } + + /* + * Only override the best choice if we are under + * the BW limit. + */ + if (hit_limit == 0 || best_cl == NULL) { + best_cl = cl; + best_m = m; + } + + /* + * Remember the highest priority mbuf in case we + * do not find any lower priority mbufs. + */ + if (hit_limit) + continue; + break; + } + if (op == ALTDQ_POLL) { + pif->pif_poll_cache = best_cl; + m = best_m; + } else if (best_cl) { + m = fairq_getq(best_cl, cur_time); + if (m != NULL) { + IFQ_DEC_LEN(ifq); + PKTCNTR_ADD(&best_cl->cl_xmitcnt, m_pktlen(m)); + } + } + return (m); + } + return (NULL); +} + +static int +fairq_addq(struct fairq_class *cl, struct mbuf *m, u_int32_t bucketid) +{ + fairq_bucket_t *b; + u_int hindex; + uint64_t bw; + + /* + * If the packet doesn't have any keep state put it on the end of + * our queue. XXX this can result in out of order delivery. + */ + if (bucketid == 0) { + if (cl->cl_head) + b = cl->cl_head->prev; + else + b = &cl->cl_buckets[0]; + } else { + hindex = bucketid & cl->cl_nbucket_mask; + b = &cl->cl_buckets[hindex]; + } + + /* + * Add the bucket to the end of the circular list of active buckets. + * + * As a special case we add the bucket to the beginning of the list + * instead of the end if it was not previously on the list and if + * its traffic is less then the hog level. + */ + if (b->in_use == 0) { + b->in_use = 1; + if (cl->cl_head == NULL) { + cl->cl_head = b; + b->next = b; + b->prev = b; + } else { + b->next = cl->cl_head; + b->prev = cl->cl_head->prev; + b->prev->next = b; + b->next->prev = b; + + if (b->bw_delta && cl->cl_hogs_m1) { + bw = b->bw_bytes * machclk_freq / b->bw_delta; + if (bw < cl->cl_hogs_m1) + cl->cl_head = b; + } + } + } + +#ifdef ALTQ_RIO + if (cl->cl_qtype == Q_RIO) + return rio_addq((rio_t *)cl->cl_red, &b->queue, m, cl->cl_pktattr); +#endif +#ifdef ALTQ_RED + if (cl->cl_qtype == Q_RED) + return red_addq(cl->cl_red, &b->queue, m, cl->cl_pktattr); +#endif +#ifdef ALTQ_CODEL + if (cl->cl_qtype == Q_CODEL) + return codel_addq(cl->cl_codel, &b->queue, m); +#endif + if (qlen(&b->queue) >= qlimit(&b->queue)) { + m_freem(m); + return (-1); + } + + if (cl->cl_flags & FARF_CLEARDSCP) + write_dsfield(m, cl->cl_pktattr, 0); + + _addq(&b->queue, m); + + return (0); +} + +static struct mbuf * +fairq_getq(struct fairq_class *cl, uint64_t cur_time) +{ + fairq_bucket_t *b; + struct mbuf *m; + + b = fairq_selectq(cl, 0); + if (b == NULL) + m = NULL; +#ifdef ALTQ_RIO + else if (cl->cl_qtype == Q_RIO) + m = rio_getq((rio_t *)cl->cl_red, &b->queue); +#endif +#ifdef ALTQ_RED + else if (cl->cl_qtype == Q_RED) + m = red_getq(cl->cl_red, &b->queue); +#endif +#ifdef ALTQ_CODEL + else if (cl->cl_qtype == Q_CODEL) + m = codel_getq(cl->cl_codel, &b->queue); +#endif + else + m = _getq(&b->queue); + + /* + * Calculate the BW change + */ + if (m != NULL) { + uint64_t delta; + + /* + * Per-class bandwidth calculation + */ + delta = (cur_time - cl->cl_last_time); + if (delta > machclk_freq * 8) + delta = machclk_freq * 8; + cl->cl_bw_delta += delta; + cl->cl_bw_bytes += m->m_pkthdr.len; + cl->cl_last_time = cur_time; + cl->cl_bw_delta -= cl->cl_bw_delta >> 3; + cl->cl_bw_bytes -= cl->cl_bw_bytes >> 3; + + /* + * Per-bucket bandwidth calculation + */ + delta = (cur_time - b->last_time); + if (delta > machclk_freq * 8) + delta = machclk_freq * 8; + b->bw_delta += delta; + b->bw_bytes += m->m_pkthdr.len; + b->last_time = cur_time; + b->bw_delta -= b->bw_delta >> 3; + b->bw_bytes -= b->bw_bytes >> 3; + } + return(m); +} + +/* + * Figure out what the next packet would be if there were no limits. If + * this class hits its bandwidth limit *hit_limit is set to no-zero, otherwise + * it is set to 0. A non-NULL mbuf is returned either way. + */ +static struct mbuf * +fairq_pollq(struct fairq_class *cl, uint64_t cur_time, int *hit_limit) +{ + fairq_bucket_t *b; + struct mbuf *m; + uint64_t delta; + uint64_t bw; + + *hit_limit = 0; + b = fairq_selectq(cl, 1); + if (b == NULL) + return(NULL); + m = qhead(&b->queue); + + /* + * Did this packet exceed the class bandwidth? Calculate the + * bandwidth component of the packet. + * + * - Calculate bytes per second + */ + delta = cur_time - cl->cl_last_time; + if (delta > machclk_freq * 8) + delta = machclk_freq * 8; + cl->cl_bw_delta += delta; + cl->cl_last_time = cur_time; + if (cl->cl_bw_delta) { + bw = cl->cl_bw_bytes * machclk_freq / cl->cl_bw_delta; + + if (bw > cl->cl_bandwidth) + *hit_limit = 1; +#ifdef ALTQ_DEBUG + printf("BW %6ju relative to %6u %d queue %p\n", + (uintmax_t)bw, cl->cl_bandwidth, *hit_limit, b); +#endif + } + return(m); +} + +/* + * Locate the next queue we want to pull a packet out of. This code + * is also responsible for removing empty buckets from the circular list. + */ +static +fairq_bucket_t * +fairq_selectq(struct fairq_class *cl, int ispoll) +{ + fairq_bucket_t *b; + uint64_t bw; + + if (ispoll == 0 && cl->cl_polled) { + b = cl->cl_polled; + cl->cl_polled = NULL; + return(b); + } + + while ((b = cl->cl_head) != NULL) { + /* + * Remove empty queues from consideration + */ + if (qempty(&b->queue)) { + b->in_use = 0; + cl->cl_head = b->next; + if (cl->cl_head == b) { + cl->cl_head = NULL; + } else { + b->next->prev = b->prev; + b->prev->next = b->next; + } + continue; + } + + /* + * Advance the round robin. Queues with bandwidths less + * then the hog bandwidth are allowed to burst. + */ + if (cl->cl_hogs_m1 == 0) { + cl->cl_head = b->next; + } else if (b->bw_delta) { + bw = b->bw_bytes * machclk_freq / b->bw_delta; + if (bw >= cl->cl_hogs_m1) { + cl->cl_head = b->next; + } + /* + * XXX TODO - + */ + } + + /* + * Return bucket b. + */ + break; + } + if (ispoll) + cl->cl_polled = b; + return(b); +} + +static void +fairq_purgeq(struct fairq_class *cl) +{ + fairq_bucket_t *b; + struct mbuf *m; + + while ((b = fairq_selectq(cl, 0)) != NULL) { + while ((m = _getq(&b->queue)) != NULL) { + PKTCNTR_ADD(&cl->cl_dropcnt, m_pktlen(m)); + m_freem(m); + } + ASSERT(qlen(&b->queue) == 0); + } +} + +static void +get_class_stats(struct fairq_classstats *sp, struct fairq_class *cl) +{ + fairq_bucket_t *b; + + sp->class_handle = cl->cl_handle; + sp->qlimit = cl->cl_qlimit; + sp->xmit_cnt = cl->cl_xmitcnt; + sp->drop_cnt = cl->cl_dropcnt; + sp->qtype = cl->cl_qtype; + sp->qlength = 0; + + if (cl->cl_head) { + b = cl->cl_head; + do { + sp->qlength += qlen(&b->queue); + b = b->next; + } while (b != cl->cl_head); + } + +#ifdef ALTQ_RED + if (cl->cl_qtype == Q_RED) + red_getstats(cl->cl_red, &sp->red[0]); +#endif +#ifdef ALTQ_RIO + if (cl->cl_qtype == Q_RIO) + rio_getstats((rio_t *)cl->cl_red, &sp->red[0]); +#endif +#ifdef ALTQ_CODEL + if (cl->cl_qtype == Q_CODEL) + codel_getstats(cl->cl_codel, &sp->codel); +#endif +} + +/* convert a class handle to the corresponding class pointer */ +static struct fairq_class * +clh_to_clp(struct fairq_if *pif, uint32_t chandle) +{ + struct fairq_class *cl; + int idx; + + if (chandle == 0) + return (NULL); + + for (idx = pif->pif_maxpri; idx >= 0; idx--) + if ((cl = pif->pif_classes[idx]) != NULL && + cl->cl_handle == chandle) + return (cl); + + return (NULL); +} + +#endif /* ALTQ_FAIRQ */ diff --git a/sys/contrib/altq/altq/altq_fairq.h b/sys/contrib/altq/altq/altq_fairq.h new file mode 100644 index 0000000..8fede5b --- /dev/null +++ b/sys/contrib/altq/altq/altq_fairq.h @@ -0,0 +1,145 @@ +/* + * Copyright (c) 2008 The DragonFly Project. All rights reserved. + * + * This code is derived from software contributed to The DragonFly Project + * by Matthew Dillon <dillon@backplane.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. Neither the name of The DragonFly Project nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific, prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $DragonFly: src/sys/net/altq/altq_fairq.h,v 1.1 2008/04/06 18:58:15 dillon Exp $ + * $FreeBSD$ + */ + +#ifndef _ALTQ_ALTQ_FAIRQ_H_ +#define _ALTQ_ALTQ_FAIRQ_H_ + +#include <altq/altq.h> +#include <altq/altq_classq.h> +#include <altq/altq_codel.h> +#include <altq/altq_red.h> +#include <altq/altq_rio.h> +#include <altq/altq_rmclass.h> + +#define FAIRQ_MAX_BUCKETS 2048 /* maximum number of sorting buckets */ +#define FAIRQ_MAXPRI RM_MAXPRIO +#define FAIRQ_BITMAP_WIDTH (sizeof(fairq_bitmap_t)*8) +#define FAIRQ_BITMAP_MASK (FAIRQ_BITMAP_WIDTH - 1) + +/* fairq class flags */ +#define FARF_RED 0x0001 /* use RED */ +#define FARF_ECN 0x0002 /* use RED/ECN */ +#define FARF_RIO 0x0004 /* use RIO */ +#define FARF_CODEL 0x0008 /* use CoDel */ +#define FARF_CLEARDSCP 0x0010 /* clear diffserv codepoint */ +#define FARF_DEFAULTCLASS 0x1000 /* default class */ + +#define FARF_HAS_PACKETS 0x2000 /* might have queued packets */ + +#define FARF_USERFLAGS (FARF_RED|FARF_ECN|FARF_RIO|FARF_CLEARDSCP| \ + FARF_DEFAULTCLASS) + +/* special class handles */ +#define FAIRQ_NULLCLASS_HANDLE 0 + +typedef u_int fairq_bitmap_t; + +struct fairq_classstats { + uint32_t class_handle; + + u_int qlength; + u_int qlimit; + struct pktcntr xmit_cnt; /* transmitted packet counter */ + struct pktcntr drop_cnt; /* dropped packet counter */ + + /* codel, red and rio related info */ + int qtype; + struct redstats red[3]; /* rio has 3 red stats */ + struct codel_stats codel; +}; + +#ifdef _KERNEL + +typedef struct fairq_bucket { + struct fairq_bucket *next; /* circular list */ + struct fairq_bucket *prev; /* circular list */ + class_queue_t queue; /* the actual queue */ + uint64_t bw_bytes; /* statistics used to calculate bw */ + uint64_t bw_delta; /* statistics used to calculate bw */ + uint64_t last_time; + int in_use; +} fairq_bucket_t; + +struct fairq_class { + uint32_t cl_handle; /* class handle */ + u_int cl_nbuckets; /* (power of 2) */ + u_int cl_nbucket_mask; /* bucket mask */ + fairq_bucket_t *cl_buckets; + fairq_bucket_t *cl_head; /* head of circular bucket list */ + fairq_bucket_t *cl_polled; + union { + struct red *cl_red; /* RED state */ + struct codel *cl_codel; /* CoDel state */ + } cl_aqm; +#define cl_red cl_aqm.cl_red +#define cl_codel cl_aqm.cl_codel + u_int cl_hogs_m1; + u_int cl_lssc_m1; + u_int cl_bandwidth; + uint64_t cl_bw_bytes; + uint64_t cl_bw_delta; + uint64_t cl_last_time; + int cl_qtype; /* rollup */ + int cl_qlimit; + int cl_pri; /* priority */ + int cl_flags; /* class flags */ + struct fairq_if *cl_pif; /* back pointer to pif */ + struct altq_pktattr *cl_pktattr; /* saved header used by ECN */ + + /* round robin index */ + + /* statistics */ + struct pktcntr cl_xmitcnt; /* transmitted packet counter */ + struct pktcntr cl_dropcnt; /* dropped packet counter */ +}; + +/* + * fairq interface state + */ +struct fairq_if { + struct fairq_if *pif_next; /* interface state list */ + struct ifaltq *pif_ifq; /* backpointer to ifaltq */ + u_int pif_bandwidth; /* link bandwidth in bps */ + int pif_maxpri; /* max priority in use */ + struct fairq_class *pif_poll_cache;/* cached poll */ + struct fairq_class *pif_default; /* default class */ + struct fairq_class *pif_classes[FAIRQ_MAXPRI]; /* classes */ +}; + +#endif /* _KERNEL */ + +#endif /* _ALTQ_ALTQ_FAIRQ_H_ */ diff --git a/sys/contrib/altq/altq/altq_hfsc.c b/sys/contrib/altq/altq/altq_hfsc.c index 0363016..d4f7ab9 100644 --- a/sys/contrib/altq/altq/altq_hfsc.c +++ b/sys/contrib/altq/altq/altq_hfsc.c @@ -391,6 +391,14 @@ hfsc_class_create(struct hfsc_if *hif, struct service_curve *rsc, return (NULL); } #endif +#ifndef ALTQ_CODEL + if (flags & HFCF_CODEL) { +#ifdef ALTQ_DEBUG + printf("hfsc_class_create: CODEL not configured for HFSC!\n"); +#endif + return (NULL); + } +#endif cl = malloc(sizeof(struct hfsc_class), M_DEVBUF, M_NOWAIT | M_ZERO); if (cl == NULL) @@ -407,6 +415,7 @@ hfsc_class_create(struct hfsc_if *hif, struct service_curve *rsc, qlimit(cl->cl_q) = qlimit; qtype(cl->cl_q) = Q_DROPTAIL; qlen(cl->cl_q) = 0; + qsize(cl->cl_q) = 0; cl->cl_flags = flags; #ifdef ALTQ_RED if (flags & (HFCF_RED|HFCF_RIO)) { @@ -451,6 +460,13 @@ hfsc_class_create(struct hfsc_if *hif, struct service_curve *rsc, #endif } #endif /* ALTQ_RED */ +#ifdef ALTQ_CODEL + if (flags & HFCF_CODEL) { + cl->cl_codel = codel_alloc(5, 100, 0); + if (cl->cl_codel != NULL) + qtype(cl->cl_q) = Q_CODEL; + } +#endif if (rsc != NULL && (rsc->m1 != 0 || rsc->m2 != 0)) { cl->cl_rsc = malloc(sizeof(struct internal_sc), @@ -543,6 +559,10 @@ hfsc_class_create(struct hfsc_if *hif, struct service_curve *rsc, if (q_is_red(cl->cl_q)) red_destroy(cl->cl_red); #endif +#ifdef ALTQ_CODEL + if (q_is_codel(cl->cl_q)) + codel_destroy(cl->cl_codel); +#endif } if (cl->cl_fsc != NULL) free(cl->cl_fsc, M_DEVBUF); @@ -617,6 +637,10 @@ hfsc_class_destroy(struct hfsc_class *cl) if (q_is_red(cl->cl_q)) red_destroy(cl->cl_red); #endif +#ifdef ALTQ_CODEL + if (q_is_codel(cl->cl_q)) + codel_destroy(cl->cl_codel); +#endif } IFQ_LOCK(cl->cl_hif->hif_ifq); @@ -844,6 +868,10 @@ hfsc_addq(struct hfsc_class *cl, struct mbuf *m) if (q_is_red(cl->cl_q)) return red_addq(cl->cl_red, cl->cl_q, m, cl->cl_pktattr); #endif +#ifdef ALTQ_CODEL + if (q_is_codel(cl->cl_q)) + return codel_addq(cl->cl_codel, cl->cl_q, m); +#endif if (qlen(cl->cl_q) >= qlimit(cl->cl_q)) { m_freem(m); return (-1); @@ -868,6 +896,10 @@ hfsc_getq(struct hfsc_class *cl) if (q_is_red(cl->cl_q)) return red_getq(cl->cl_red, cl->cl_q); #endif +#ifdef ALTQ_CODEL + if (q_is_codel(cl->cl_q)) + return codel_getq(cl->cl_codel, cl->cl_q); +#endif return _getq(cl->cl_q); } @@ -1652,6 +1684,10 @@ get_class_stats(struct hfsc_classstats *sp, struct hfsc_class *cl) if (q_is_rio(cl->cl_q)) rio_getstats((rio_t *)cl->cl_red, &sp->red[0]); #endif +#ifdef ALTQ_CODEL + if (q_is_codel(cl->cl_q)) + codel_getstats(cl->cl_codel, &sp->codel); +#endif } /* convert a class handle to the corresponding class pointer */ diff --git a/sys/contrib/altq/altq/altq_hfsc.h b/sys/contrib/altq/altq/altq_hfsc.h index e5595cb..b0228c5 100644 --- a/sys/contrib/altq/altq/altq_hfsc.h +++ b/sys/contrib/altq/altq/altq_hfsc.h @@ -34,6 +34,7 @@ #include <altq/altq.h> #include <altq/altq_classq.h> +#include <altq/altq_codel.h> #include <altq/altq_red.h> #include <altq/altq_rio.h> @@ -55,6 +56,7 @@ struct service_curve { #define HFCF_RED 0x0001 /* use RED */ #define HFCF_ECN 0x0002 /* use RED/ECN */ #define HFCF_RIO 0x0004 /* use RIO */ +#define HFCF_CODEL 0x0008 /* use CoDel */ #define HFCF_CLEARDSCP 0x0010 /* clear diffserv codepoint */ #define HFCF_DEFAULTCLASS 0x1000 /* default class */ @@ -101,9 +103,10 @@ struct hfsc_classstats { u_int parentperiod; /* parent's vt period seqno */ int nactive; /* number of active children */ - /* red and rio related info */ + /* codel, red and rio related info */ int qtype; struct redstats red[3]; + struct codel_stats codel; }; #ifdef ALTQ3_COMPAT @@ -229,7 +232,12 @@ struct hfsc_class { struct hfsc_class *cl_children; /* child classes */ class_queue_t *cl_q; /* class queue structure */ - struct red *cl_red; /* RED state */ + union { + struct red *cl_red; /* RED state */ + struct codel *cl_codel; /* CoDel state */ + } cl_aqm; +#define cl_red cl_aqm.cl_red +#define cl_codel cl_aqm.cl_codel struct altq_pktattr *cl_pktattr; /* saved header used by ECN */ u_int64_t cl_total; /* total work in bytes */ diff --git a/sys/contrib/altq/altq/altq_priq.c b/sys/contrib/altq/altq/altq_priq.c index 3ce65dc..c77ba40 100644 --- a/sys/contrib/altq/altq/altq_priq.c +++ b/sys/contrib/altq/altq/altq_priq.c @@ -297,6 +297,14 @@ priq_class_create(struct priq_if *pif, int pri, int qlimit, int flags, int qid) return (NULL); } #endif +#ifndef ALTQ_CODEL + if (flags & PRCF_CODEL) { +#ifdef ALTQ_DEBUG + printf("priq_class_create: CODEL not configured for PRIQ!\n"); +#endif + return (NULL); + } +#endif if ((cl = pif->pif_classes[pri]) != NULL) { /* modify the class instead of creating a new one */ @@ -318,6 +326,10 @@ priq_class_create(struct priq_if *pif, int pri, int qlimit, int flags, int qid) if (q_is_red(cl->cl_q)) red_destroy(cl->cl_red); #endif +#ifdef ALTQ_CODEL + if (q_is_codel(cl->cl_q)) + codel_destroy(cl->cl_codel); +#endif } else { cl = malloc(sizeof(struct priq_class), M_DEVBUF, M_NOWAIT | M_ZERO); @@ -338,6 +350,7 @@ priq_class_create(struct priq_if *pif, int pri, int qlimit, int flags, int qid) qlimit(cl->cl_q) = qlimit; qtype(cl->cl_q) = Q_DROPTAIL; qlen(cl->cl_q) = 0; + qsize(cl->cl_q) = 0; cl->cl_flags = flags; cl->cl_pri = pri; if (pri > pif->pif_maxpri) @@ -381,6 +394,13 @@ priq_class_create(struct priq_if *pif, int pri, int qlimit, int flags, int qid) } } #endif /* ALTQ_RED */ +#ifdef ALTQ_CODEL + if (flags & PRCF_CODEL) { + cl->cl_codel = codel_alloc(5, 100, 0); + if (cl->cl_codel != NULL) + qtype(cl->cl_q) = Q_CODEL; + } +#endif return (cl); @@ -394,6 +414,10 @@ priq_class_create(struct priq_if *pif, int pri, int qlimit, int flags, int qid) if (q_is_red(cl->cl_q)) red_destroy(cl->cl_red); #endif +#ifdef ALTQ_CODEL + if (q_is_codel(cl->cl_q)) + codel_destroy(cl->cl_codel); +#endif } if (cl->cl_q != NULL) free(cl->cl_q, M_DEVBUF); @@ -445,6 +469,10 @@ priq_class_destroy(struct priq_class *cl) if (q_is_red(cl->cl_q)) red_destroy(cl->cl_red); #endif +#ifdef ALTQ_CODEL + if (q_is_codel(cl->cl_q)) + codel_destroy(cl->cl_codel); +#endif } free(cl->cl_q, M_DEVBUF); free(cl, M_DEVBUF); @@ -560,6 +588,10 @@ priq_addq(struct priq_class *cl, struct mbuf *m) if (q_is_red(cl->cl_q)) return red_addq(cl->cl_red, cl->cl_q, m, cl->cl_pktattr); #endif +#ifdef ALTQ_CODEL + if (q_is_codel(cl->cl_q)) + return codel_addq(cl->cl_codel, cl->cl_q, m); +#endif if (qlen(cl->cl_q) >= qlimit(cl->cl_q)) { m_freem(m); return (-1); @@ -584,6 +616,10 @@ priq_getq(struct priq_class *cl) if (q_is_red(cl->cl_q)) return red_getq(cl->cl_red, cl->cl_q); #endif +#ifdef ALTQ_CODEL + if (q_is_codel(cl->cl_q)) + return codel_getq(cl->cl_codel, cl->cl_q); +#endif return _getq(cl->cl_q); } @@ -628,7 +664,10 @@ get_class_stats(struct priq_classstats *sp, struct priq_class *cl) if (q_is_rio(cl->cl_q)) rio_getstats((rio_t *)cl->cl_red, &sp->red[0]); #endif - +#ifdef ALTQ_CODEL + if (q_is_codel(cl->cl_q)) + codel_getstats(cl->cl_codel, &sp->codel); +#endif } /* convert a class handle to the corresponding class pointer */ diff --git a/sys/contrib/altq/altq/altq_priq.h b/sys/contrib/altq/altq/altq_priq.h index 481d31b..110847d 100644 --- a/sys/contrib/altq/altq/altq_priq.h +++ b/sys/contrib/altq/altq/altq_priq.h @@ -30,6 +30,7 @@ #include <altq/altq.h> #include <altq/altq_classq.h> +#include <altq/altq_codel.h> #include <altq/altq_red.h> #include <altq/altq_rio.h> @@ -59,6 +60,7 @@ struct priq_add_class { #define PRCF_RED 0x0001 /* use RED */ #define PRCF_ECN 0x0002 /* use RED/ECN */ #define PRCF_RIO 0x0004 /* use RIO */ +#define PRCF_CODEL 0x0008 /* use CoDel */ #define PRCF_CLEARDSCP 0x0010 /* clear diffserv codepoint */ #define PRCF_DEFAULTCLASS 0x1000 /* default class */ @@ -102,9 +104,10 @@ struct priq_classstats { struct pktcntr xmitcnt; /* transmitted packet counter */ struct pktcntr dropcnt; /* dropped packet counter */ - /* red and rio related info */ + /* codel, red and rio related info */ int qtype; struct redstats red[3]; /* rio has 3 red stats */ + struct codel_stats codel; }; #ifdef ALTQ3_COMPAT @@ -134,7 +137,12 @@ struct priq_class_stats { struct priq_class { u_int32_t cl_handle; /* class handle */ class_queue_t *cl_q; /* class queue structure */ - struct red *cl_red; /* RED state */ + union { + struct red *cl_red; /* RED state */ + struct codel *cl_codel; /* CoDel state */ + } cl_aqm; +#define cl_red cl_aqm.cl_red +#define cl_codel cl_aqm.cl_codel int cl_pri; /* priority */ int cl_flags; /* class flags */ struct priq_if *cl_pif; /* back pointer to pif */ diff --git a/sys/contrib/altq/altq/altq_rmclass.c b/sys/contrib/altq/altq/altq_rmclass.c index c433024..656d0ee 100644 --- a/sys/contrib/altq/altq/altq_rmclass.c +++ b/sys/contrib/altq/altq/altq_rmclass.c @@ -68,6 +68,7 @@ #include <altq/if_altq.h> #include <altq/altq.h> +#include <altq/altq_codel.h> #include <altq/altq_rmclass.h> #include <altq/altq_rmclass_debug.h> #include <altq/altq_red.h> @@ -218,6 +219,14 @@ rmc_newclass(int pri, struct rm_ifdat *ifd, u_int nsecPerByte, return (NULL); } #endif +#ifndef ALTQ_CODEL + if (flags & RMCF_CODEL) { +#ifdef ALTQ_DEBUG + printf("rmc_newclass: CODEL not configured for CBQ!\n"); +#endif + return (NULL); + } +#endif cl = malloc(sizeof(struct rm_class), M_DEVBUF, M_NOWAIT | M_ZERO); if (cl == NULL) @@ -302,6 +311,13 @@ rmc_newclass(int pri, struct rm_ifdat *ifd, u_int nsecPerByte, #endif } #endif /* ALTQ_RED */ +#ifdef ALTQ_CODEL + if (flags & RMCF_CODEL) { + cl->codel_ = codel_alloc(5, 100, 0); + if (cl->codel_ != NULL) + qtype(cl->q_) = Q_CODEL; + } +#endif /* * put the class into the class tree @@ -652,6 +668,10 @@ rmc_delete_class(struct rm_ifdat *ifd, struct rm_class *cl) if (q_is_red(cl->q_)) red_destroy(cl->red_); #endif +#ifdef ALTQ_CODEL + if (q_is_codel(cl->q_)) + codel_destroy(cl->codel_); +#endif } free(cl->q_, M_DEVBUF); free(cl, M_DEVBUF); @@ -1618,6 +1638,10 @@ _rmc_addq(rm_class_t *cl, mbuf_t *m) if (q_is_red(cl->q_)) return red_addq(cl->red_, cl->q_, m, cl->pktattr_); #endif /* ALTQ_RED */ +#ifdef ALTQ_CODEL + if (q_is_codel(cl->q_)) + return codel_addq(cl->codel_, cl->q_, m); +#endif if (cl->flags_ & RMCF_CLEARDSCP) write_dsfield(m, cl->pktattr_, 0); @@ -1647,6 +1671,10 @@ _rmc_getq(rm_class_t *cl) if (q_is_red(cl->q_)) return red_getq(cl->red_, cl->q_); #endif +#ifdef ALTQ_CODEL + if (q_is_codel(cl->q_)) + return codel_getq(cl->codel_, cl->q_); +#endif return _getq(cl->q_); } @@ -1717,7 +1745,8 @@ void cbqtrace_dump(int counter) #endif /* CBQ_TRACE */ #endif /* ALTQ_CBQ */ -#if defined(ALTQ_CBQ) || defined(ALTQ_RED) || defined(ALTQ_RIO) || defined(ALTQ_HFSC) || defined(ALTQ_PRIQ) +#if defined(ALTQ_CBQ) || defined(ALTQ_RED) || defined(ALTQ_RIO) || \ + defined(ALTQ_HFSC) || defined(ALTQ_PRIQ) || defined(ALTQ_CODEL) #if !defined(__GNUC__) || defined(ALTQ_DEBUG) void diff --git a/sys/contrib/altq/altq/altq_rmclass.h b/sys/contrib/altq/altq/altq_rmclass.h index cf0ddf4..782bc13 100644 --- a/sys/contrib/altq/altq/altq_rmclass.h +++ b/sys/contrib/altq/altq/altq_rmclass.h @@ -164,7 +164,12 @@ struct rm_class { void (*overlimit)(struct rm_class *, struct rm_class *); void (*drop)(struct rm_class *); /* Class drop action. */ - struct red *red_; /* RED state pointer */ + union { + struct red *red_; /* RED state pointer */ + struct codel *codel_; /* codel state pointer */ + } cl_aqm_; +#define red_ cl_aqm_.red_ +#define codel_ cl_aqm_.codel_ struct altq_pktattr *pktattr_; /* saved hdr used by RED/ECN */ int flags_; @@ -233,6 +238,7 @@ struct rm_ifdat { #define RMCF_RIO 0x0004 #define RMCF_FLOWVALVE 0x0008 /* use flowvalve (aka penalty-box) */ #define RMCF_CLEARDSCP 0x0010 /* clear diffserv codepoint */ +#define RMCF_CODEL 0x0020 /* flags for rmc_init */ #define RMCF_WRR 0x0100 diff --git a/sys/contrib/altq/altq/altq_subr.c b/sys/contrib/altq/altq/altq_subr.c index 16b796a..bddf73f 100644 --- a/sys/contrib/altq/altq/altq_subr.c +++ b/sys/contrib/altq/altq/altq_subr.c @@ -537,6 +537,16 @@ altq_pfattach(struct pf_altq *a) error = hfsc_pfattach(a); break; #endif +#ifdef ALTQ_FAIRQ + case ALTQT_FAIRQ: + error = fairq_pfattach(a); + break; +#endif +#ifdef ALTQ_CODEL + case ALTQT_CODEL: + error = codel_pfattach(a); + break; +#endif default: error = ENXIO; } @@ -612,6 +622,16 @@ altq_add(struct pf_altq *a) error = hfsc_add_altq(a); break; #endif +#ifdef ALTQ_FAIRQ + case ALTQT_FAIRQ: + error = fairq_add_altq(a); + break; +#endif +#ifdef ALTQ_CODEL + case ALTQT_CODEL: + error = codel_add_altq(a); + break; +#endif default: error = ENXIO; } @@ -648,6 +668,16 @@ altq_remove(struct pf_altq *a) error = hfsc_remove_altq(a); break; #endif +#ifdef ALTQ_FAIRQ + case ALTQT_FAIRQ: + error = fairq_remove_altq(a); + break; +#endif +#ifdef ALTQ_CODEL + case ALTQT_CODEL: + error = codel_remove_altq(a); + break; +#endif default: error = ENXIO; } @@ -681,6 +711,11 @@ altq_add_queue(struct pf_altq *a) error = hfsc_add_queue(a); break; #endif +#ifdef ALTQ_FAIRQ + case ALTQT_FAIRQ: + error = fairq_add_queue(a); + break; +#endif default: error = ENXIO; } @@ -714,6 +749,11 @@ altq_remove_queue(struct pf_altq *a) error = hfsc_remove_queue(a); break; #endif +#ifdef ALTQ_FAIRQ + case ALTQT_FAIRQ: + error = fairq_remove_queue(a); + break; +#endif default: error = ENXIO; } @@ -747,6 +787,16 @@ altq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes) error = hfsc_getqstats(a, ubuf, nbytes); break; #endif +#ifdef ALTQ_FAIRQ + case ALTQT_FAIRQ: + error = fairq_getqstats(a, ubuf, nbytes); + break; +#endif +#ifdef ALTQ_CODEL + case ALTQT_CODEL: + error = codel_getqstats(a, ubuf, nbytes); + break; +#endif default: error = ENXIO; } diff --git a/sys/contrib/altq/altq/altq_var.h b/sys/contrib/altq/altq/altq_var.h index 956ee16..355c94c 100644 --- a/sys/contrib/altq/altq/altq_var.h +++ b/sys/contrib/altq/altq/altq_var.h @@ -243,6 +243,11 @@ int cbq_add_queue(struct pf_altq *); int cbq_remove_queue(struct pf_altq *); int cbq_getqstats(struct pf_altq *, void *, int *); +int codel_pfattach(struct pf_altq *); +int codel_add_altq(struct pf_altq *); +int codel_remove_altq(struct pf_altq *); +int codel_getqstats(struct pf_altq *, void *, int *); + int priq_pfattach(struct pf_altq *); int priq_add_altq(struct pf_altq *); int priq_remove_altq(struct pf_altq *); @@ -257,5 +262,12 @@ int hfsc_add_queue(struct pf_altq *); int hfsc_remove_queue(struct pf_altq *); int hfsc_getqstats(struct pf_altq *, void *, int *); +int fairq_pfattach(struct pf_altq *); +int fairq_add_altq(struct pf_altq *); +int fairq_remove_altq(struct pf_altq *); +int fairq_add_queue(struct pf_altq *); +int fairq_remove_queue(struct pf_altq *); +int fairq_getqstats(struct pf_altq *, void *, int *); + #endif /* _KERNEL */ #endif /* _ALTQ_ALTQ_VAR_H_ */ diff --git a/sys/netpfil/pf/pf.c b/sys/netpfil/pf/pf.c index 4563ee4..ebda220 100644 --- a/sys/netpfil/pf/pf.c +++ b/sys/netpfil/pf/pf.c @@ -614,6 +614,20 @@ pf_hashsrc(struct pf_addr *addr, sa_family_t af) return (h & pf_srchashmask); } +#ifdef ALTQ +static int +pf_state_hash(struct pf_state *s) +{ + u_int32_t hv = (intptr_t)s / sizeof(*s); + + hv ^= crc32(&s->src, sizeof(s->src)); + hv ^= crc32(&s->dst, sizeof(s->dst)); + if (hv == 0) + hv = 1; + return (hv); +} +#endif + #ifdef INET6 void pf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af) @@ -6439,6 +6453,8 @@ done: pd.act.qid = r->qid; } if (action == PF_PASS && pd.act.qid) { + if (s != NULL) + pd.pf_mtag->qid_hash = pf_state_hash(s); if (pqid || (pd.tos & IPTOS_LOWDELAY)) pd.pf_mtag->qid = pd.act.pqid; else @@ -7006,6 +7022,8 @@ done: pd.act.qid = r->qid; } if (action == PF_PASS && pd.act.qid) { + if (s != NULL) + pd.pf_mtag->qid_hash = pf_state_hash(s); if (pd.tos & IPTOS_LOWDELAY) pd.pf_mtag->qid = pd.act.pqid; else diff --git a/sys/netpfil/pf/pf_altq.h b/sys/netpfil/pf/pf_altq.h index eda0965..3efd4ff 100644 --- a/sys/netpfil/pf/pf_altq.h +++ b/sys/netpfil/pf/pf_altq.h @@ -45,6 +45,12 @@ struct cbq_opts { int flags; }; +struct codel_opts { + u_int target; + u_int interval; + int ecn; +}; + struct priq_opts { int flags; }; @@ -65,6 +71,20 @@ struct hfsc_opts { int flags; }; +/* + * XXX this needs some work + */ +struct fairq_opts { + u_int nbuckets; + u_int hogs_m1; + int flags; + + /* link sharing service curve */ + u_int lssc_m1; + u_int lssc_d; + u_int lssc_m2; +}; + struct pf_altq { char ifname[IFNAMSIZ]; @@ -89,8 +109,10 @@ struct pf_altq { uint16_t flags; /* misc flags */ union { struct cbq_opts cbq_opts; + struct codel_opts codel_opts; struct priq_opts priq_opts; struct hfsc_opts hfsc_opts; + struct fairq_opts fairq_opts; } pq_u; uint32_t qid; /* return value */ diff --git a/sys/netpfil/pf/pf_mtag.h b/sys/netpfil/pf/pf_mtag.h index 3aacb2e..fd8554a 100644 --- a/sys/netpfil/pf/pf_mtag.h +++ b/sys/netpfil/pf/pf_mtag.h @@ -44,6 +44,7 @@ struct pf_mtag { void *hdr; /* saved hdr pos in mbuf, for ECN */ u_int32_t qid; /* queue id */ + u_int32_t qid_hash; /* queue hashid used by WFQ like algos */ u_int16_t tag; /* tag id */ u_int8_t flags; u_int8_t routed; |