summaryrefslogtreecommitdiffstats
path: root/sys/net/altq
diff options
context:
space:
mode:
authorglebius <glebius@FreeBSD.org>2015-04-16 20:22:40 +0000
committerglebius <glebius@FreeBSD.org>2015-04-16 20:22:40 +0000
commita29f5e7ca80965b7551a01382485f2e7cba838ec (patch)
treec8f5ed55ba8f99a6440ebcb37c38c74604cb8cc7 /sys/net/altq
parent6a373c232e9be2bf3c8f6ce3cb113bf7e749e9ff (diff)
downloadFreeBSD-src-a29f5e7ca80965b7551a01382485f2e7cba838ec.zip
FreeBSD-src-a29f5e7ca80965b7551a01382485f2e7cba838ec.tar.gz
Move ALTQ from contrib to net/altq. The ALTQ code is for many years
discontinued by its initial authors. In FreeBSD the code was already slightly edited during the pf(4) SMP project. It is about to be edited more in the projects/ifnet. Moving out of contrib also allows to remove several hacks to the make glue. Reviewed by: net@
Diffstat (limited to 'sys/net/altq')
-rw-r--r--sys/net/altq/altq.h204
-rw-r--r--sys/net/altq/altq_cbq.c1173
-rw-r--r--sys/net/altq/altq_cbq.h222
-rw-r--r--sys/net/altq/altq_cdnr.c1390
-rw-r--r--sys/net/altq/altq_cdnr.h336
-rw-r--r--sys/net/altq/altq_classq.h206
-rw-r--r--sys/net/altq/altq_hfsc.c2222
-rw-r--r--sys/net/altq/altq_hfsc.h311
-rw-r--r--sys/net/altq/altq_priq.c1046
-rw-r--r--sys/net/altq/altq_priq.h172
-rw-r--r--sys/net/altq/altq_red.c1500
-rw-r--r--sys/net/altq/altq_red.h199
-rw-r--r--sys/net/altq/altq_rio.c852
-rw-r--r--sys/net/altq/altq_rio.h145
-rw-r--r--sys/net/altq/altq_rmclass.c1836
-rw-r--r--sys/net/altq/altq_rmclass.h267
-rw-r--r--sys/net/altq/altq_rmclass_debug.h112
-rw-r--r--sys/net/altq/altq_subr.c1981
-rw-r--r--sys/net/altq/altq_var.h261
-rw-r--r--sys/net/altq/altqconf.h29
-rw-r--r--sys/net/altq/if_altq.h190
21 files changed, 14654 insertions, 0 deletions
diff --git a/sys/net/altq/altq.h b/sys/net/altq/altq.h
new file mode 100644
index 0000000..d018243
--- /dev/null
+++ b/sys/net/altq/altq.h
@@ -0,0 +1,204 @@
+/* $FreeBSD$ */
+/* $KAME: altq.h,v 1.10 2003/07/10 12:07:47 kjc Exp $ */
+
+/*
+ * Copyright (C) 1998-2003
+ * Sony Computer Science Laboratories Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#ifndef _ALTQ_ALTQ_H_
+#define _ALTQ_ALTQ_H_
+
+#if 0
+/*
+ * allow altq-3 (altqd(8) and /dev/altq) to coexist with the new pf-based altq.
+ * altq3 is mainly for research experiments. pf-based altq is for daily use.
+ */
+#define ALTQ3_COMPAT /* for compatibility with altq-3 */
+#define ALTQ3_CLFIER_COMPAT /* for compatibility with altq-3 classifier */
+#endif
+
+#ifdef ALTQ3_COMPAT
+#include <sys/param.h>
+#include <sys/ioccom.h>
+#include <sys/queue.h>
+#include <netinet/in.h>
+
+#ifndef IFNAMSIZ
+#define IFNAMSIZ 16
+#endif
+#endif /* ALTQ3_COMPAT */
+
+/* altq discipline type */
+#define ALTQT_NONE 0 /* reserved */
+#define ALTQT_CBQ 1 /* cbq */
+#define ALTQT_WFQ 2 /* wfq */
+#define ALTQT_AFMAP 3 /* afmap */
+#define ALTQT_FIFOQ 4 /* fifoq */
+#define ALTQT_RED 5 /* red */
+#define ALTQT_RIO 6 /* rio */
+#define ALTQT_LOCALQ 7 /* local use */
+#define ALTQT_HFSC 8 /* hfsc */
+#define ALTQT_CDNR 9 /* traffic conditioner */
+#define ALTQT_BLUE 10 /* blue */
+#define ALTQT_PRIQ 11 /* priority queue */
+#define ALTQT_JOBS 12 /* JoBS */
+#define ALTQT_MAX 13 /* should be max discipline type + 1 */
+
+#ifdef ALTQ3_COMPAT
+struct altqreq {
+ char ifname[IFNAMSIZ]; /* if name, e.g. "en0" */
+ u_long arg; /* request-specific argument */
+};
+#endif
+
+/* simple token backet meter profile */
+struct tb_profile {
+ u_int rate; /* rate in bit-per-sec */
+ u_int depth; /* depth in bytes */
+};
+
+#ifdef ALTQ3_COMPAT
+struct tbrreq {
+ char ifname[IFNAMSIZ]; /* if name, e.g. "en0" */
+ struct tb_profile tb_prof; /* token bucket profile */
+};
+
+#ifdef ALTQ3_CLFIER_COMPAT
+/*
+ * common network flow info structure
+ */
+struct flowinfo {
+ u_char fi_len; /* total length */
+ u_char fi_family; /* address family */
+ u_int8_t fi_data[46]; /* actually longer; address family
+ specific flow info. */
+};
+
+/*
+ * flow info structure for internet protocol family.
+ * (currently this is the only protocol family supported)
+ */
+struct flowinfo_in {
+ u_char fi_len; /* sizeof(struct flowinfo_in) */
+ u_char fi_family; /* AF_INET */
+ u_int8_t fi_proto; /* IPPROTO_XXX */
+ u_int8_t fi_tos; /* type-of-service */
+ struct in_addr fi_dst; /* dest address */
+ struct in_addr fi_src; /* src address */
+ u_int16_t fi_dport; /* dest port */
+ u_int16_t fi_sport; /* src port */
+ u_int32_t fi_gpi; /* generalized port id for ipsec */
+ u_int8_t _pad[28]; /* make the size equal to
+ flowinfo_in6 */
+};
+
+#ifdef SIN6_LEN
+struct flowinfo_in6 {
+ u_char fi6_len; /* sizeof(struct flowinfo_in6) */
+ u_char fi6_family; /* AF_INET6 */
+ u_int8_t fi6_proto; /* IPPROTO_XXX */
+ u_int8_t fi6_tclass; /* traffic class */
+ u_int32_t fi6_flowlabel; /* ipv6 flowlabel */
+ u_int16_t fi6_dport; /* dest port */
+ u_int16_t fi6_sport; /* src port */
+ u_int32_t fi6_gpi; /* generalized port id */
+ struct in6_addr fi6_dst; /* dest address */
+ struct in6_addr fi6_src; /* src address */
+};
+#endif /* INET6 */
+
+/*
+ * flow filters for AF_INET and AF_INET6
+ */
+struct flow_filter {
+ int ff_ruleno;
+ struct flowinfo_in ff_flow;
+ struct {
+ struct in_addr mask_dst;
+ struct in_addr mask_src;
+ u_int8_t mask_tos;
+ u_int8_t _pad[3];
+ } ff_mask;
+ u_int8_t _pad2[24]; /* make the size equal to flow_filter6 */
+};
+
+#ifdef SIN6_LEN
+struct flow_filter6 {
+ int ff_ruleno;
+ struct flowinfo_in6 ff_flow6;
+ struct {
+ struct in6_addr mask6_dst;
+ struct in6_addr mask6_src;
+ u_int8_t mask6_tclass;
+ u_int8_t _pad[3];
+ } ff_mask6;
+};
+#endif /* INET6 */
+#endif /* ALTQ3_CLFIER_COMPAT */
+#endif /* ALTQ3_COMPAT */
+
+/*
+ * generic packet counter
+ */
+struct pktcntr {
+ u_int64_t packets;
+ u_int64_t bytes;
+};
+
+#define PKTCNTR_ADD(cntr, len) \
+ do { (cntr)->packets++; (cntr)->bytes += len; } while (/*CONSTCOND*/ 0)
+
+#ifdef ALTQ3_COMPAT
+/*
+ * altq related ioctls
+ */
+#define ALTQGTYPE _IOWR('q', 0, struct altqreq) /* get queue type */
+#if 0
+/*
+ * these ioctls are currently discipline-specific but could be shared
+ * in the future.
+ */
+#define ALTQATTACH _IOW('q', 1, struct altqreq) /* attach discipline */
+#define ALTQDETACH _IOW('q', 2, struct altqreq) /* detach discipline */
+#define ALTQENABLE _IOW('q', 3, struct altqreq) /* enable discipline */
+#define ALTQDISABLE _IOW('q', 4, struct altqreq) /* disable discipline*/
+#define ALTQCLEAR _IOW('q', 5, struct altqreq) /* (re)initialize */
+#define ALTQCONFIG _IOWR('q', 6, struct altqreq) /* set config params */
+#define ALTQADDCLASS _IOWR('q', 7, struct altqreq) /* add a class */
+#define ALTQMODCLASS _IOWR('q', 8, struct altqreq) /* modify a class */
+#define ALTQDELCLASS _IOWR('q', 9, struct altqreq) /* delete a class */
+#define ALTQADDFILTER _IOWR('q', 10, struct altqreq) /* add a filter */
+#define ALTQDELFILTER _IOWR('q', 11, struct altqreq) /* delete a filter */
+#define ALTQGETSTATS _IOWR('q', 12, struct altqreq) /* get statistics */
+#define ALTQGETCNTR _IOWR('q', 13, struct altqreq) /* get a pkt counter */
+#endif /* 0 */
+#define ALTQTBRSET _IOW('q', 14, struct tbrreq) /* set tb regulator */
+#define ALTQTBRGET _IOWR('q', 15, struct tbrreq) /* get tb regulator */
+#endif /* ALTQ3_COMPAT */
+
+#ifdef _KERNEL
+#include <net/altq/altq_var.h>
+#endif
+
+#endif /* _ALTQ_ALTQ_H_ */
diff --git a/sys/net/altq/altq_cbq.c b/sys/net/altq/altq_cbq.c
new file mode 100644
index 0000000..1a7f4ca
--- /dev/null
+++ b/sys/net/altq/altq_cbq.c
@@ -0,0 +1,1173 @@
+/* $FreeBSD$ */
+/* $KAME: altq_cbq.c,v 1.19 2003/09/17 14:23:25 kjc Exp $ */
+
+/*
+ * Copyright (c) Sun Microsystems, Inc. 1993-1998 All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the SMCC Technology
+ * Development Group at Sun Microsystems, Inc.
+ *
+ * 4. The name of the Sun Microsystems, Inc nor may not be used to endorse or
+ * promote products derived from this software without specific prior
+ * written permission.
+ *
+ * SUN MICROSYSTEMS DOES NOT CLAIM MERCHANTABILITY OF THIS SOFTWARE OR THE
+ * SUITABILITY OF THIS SOFTWARE FOR ANY PARTICULAR PURPOSE. The software is
+ * provided "as is" without express or implied warranty of any kind.
+ *
+ * These notices must be retained in any copies of any part of this software.
+ */
+
+#if defined(__FreeBSD__) || defined(__NetBSD__)
+#include "opt_altq.h"
+#include "opt_inet.h"
+#ifdef __FreeBSD__
+#include "opt_inet6.h"
+#endif
+#endif /* __FreeBSD__ || __NetBSD__ */
+#ifdef ALTQ_CBQ /* cbq is enabled by ALTQ_CBQ option in opt_altq.h */
+
+#include <sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/errno.h>
+#include <sys/time.h>
+#ifdef ALTQ3_COMPAT
+#include <sys/uio.h>
+#include <sys/kernel.h>
+#endif
+
+#include <net/if.h>
+#include <net/if_var.h>
+#include <netinet/in.h>
+
+#include <netpfil/pf/pf.h>
+#include <netpfil/pf/pf_altq.h>
+#include <netpfil/pf/pf_mtag.h>
+#include <net/altq/altq.h>
+#include <net/altq/altq_cbq.h>
+#ifdef ALTQ3_COMPAT
+#include <net/altq/altq_conf.h>
+#endif
+
+#ifdef ALTQ3_COMPAT
+/*
+ * Local Data structures.
+ */
+static cbq_state_t *cbq_list = NULL;
+#endif
+
+/*
+ * Forward Declarations.
+ */
+static int cbq_class_destroy(cbq_state_t *, struct rm_class *);
+static struct rm_class *clh_to_clp(cbq_state_t *, u_int32_t);
+static int cbq_clear_interface(cbq_state_t *);
+static int cbq_request(struct ifaltq *, int, void *);
+static int cbq_enqueue(struct ifaltq *, struct mbuf *,
+ struct altq_pktattr *);
+static struct mbuf *cbq_dequeue(struct ifaltq *, int);
+static void cbqrestart(struct ifaltq *);
+static void get_class_stats(class_stats_t *, struct rm_class *);
+static void cbq_purge(cbq_state_t *);
+#ifdef ALTQ3_COMPAT
+static int cbq_add_class(struct cbq_add_class *);
+static int cbq_delete_class(struct cbq_delete_class *);
+static int cbq_modify_class(struct cbq_modify_class *);
+static int cbq_class_create(cbq_state_t *, struct cbq_add_class *,
+ struct rm_class *, struct rm_class *);
+static int cbq_clear_hierarchy(struct cbq_interface *);
+static int cbq_set_enable(struct cbq_interface *, int);
+static int cbq_ifattach(struct cbq_interface *);
+static int cbq_ifdetach(struct cbq_interface *);
+static int cbq_getstats(struct cbq_getstats *);
+
+static int cbq_add_filter(struct cbq_add_filter *);
+static int cbq_delete_filter(struct cbq_delete_filter *);
+#endif /* ALTQ3_COMPAT */
+
+/*
+ * int
+ * cbq_class_destroy(cbq_mod_state_t *, struct rm_class *) - This
+ * function destroys a given traffic class. Before destroying
+ * the class, all traffic for that class is released.
+ */
+static int
+cbq_class_destroy(cbq_state_t *cbqp, struct rm_class *cl)
+{
+ int i;
+
+ /* delete the class */
+ rmc_delete_class(&cbqp->ifnp, cl);
+
+ /*
+ * free the class handle
+ */
+ for (i = 0; i < CBQ_MAX_CLASSES; i++)
+ if (cbqp->cbq_class_tbl[i] == cl)
+ cbqp->cbq_class_tbl[i] = NULL;
+
+ if (cl == cbqp->ifnp.root_)
+ cbqp->ifnp.root_ = NULL;
+ if (cl == cbqp->ifnp.default_)
+ cbqp->ifnp.default_ = NULL;
+#ifdef ALTQ3_COMPAT
+ if (cl == cbqp->ifnp.ctl_)
+ cbqp->ifnp.ctl_ = NULL;
+#endif
+ return (0);
+}
+
+/* convert class handle to class pointer */
+static struct rm_class *
+clh_to_clp(cbq_state_t *cbqp, u_int32_t chandle)
+{
+ int i;
+ struct rm_class *cl;
+
+ if (chandle == 0)
+ return (NULL);
+ /*
+ * first, try optimistically the slot matching the lower bits of
+ * the handle. if it fails, do the linear table search.
+ */
+ i = chandle % CBQ_MAX_CLASSES;
+ if ((cl = cbqp->cbq_class_tbl[i]) != NULL &&
+ cl->stats_.handle == chandle)
+ return (cl);
+ for (i = 0; i < CBQ_MAX_CLASSES; i++)
+ if ((cl = cbqp->cbq_class_tbl[i]) != NULL &&
+ cl->stats_.handle == chandle)
+ return (cl);
+ return (NULL);
+}
+
+static int
+cbq_clear_interface(cbq_state_t *cbqp)
+{
+ int again, i;
+ struct rm_class *cl;
+
+#ifdef ALTQ3_CLFIER_COMPAT
+ /* free the filters for this interface */
+ acc_discard_filters(&cbqp->cbq_classifier, NULL, 1);
+#endif
+
+ /* clear out the classes now */
+ do {
+ again = 0;
+ for (i = 0; i < CBQ_MAX_CLASSES; i++) {
+ if ((cl = cbqp->cbq_class_tbl[i]) != NULL) {
+ if (is_a_parent_class(cl))
+ again++;
+ else {
+ cbq_class_destroy(cbqp, cl);
+ cbqp->cbq_class_tbl[i] = NULL;
+ if (cl == cbqp->ifnp.root_)
+ cbqp->ifnp.root_ = NULL;
+ if (cl == cbqp->ifnp.default_)
+ cbqp->ifnp.default_ = NULL;
+#ifdef ALTQ3_COMPAT
+ if (cl == cbqp->ifnp.ctl_)
+ cbqp->ifnp.ctl_ = NULL;
+#endif
+ }
+ }
+ }
+ } while (again);
+
+ return (0);
+}
+
+static int
+cbq_request(struct ifaltq *ifq, int req, void *arg)
+{
+ cbq_state_t *cbqp = (cbq_state_t *)ifq->altq_disc;
+
+ IFQ_LOCK_ASSERT(ifq);
+
+ switch (req) {
+ case ALTRQ_PURGE:
+ cbq_purge(cbqp);
+ break;
+ }
+ return (0);
+}
+
+/* copy the stats info in rm_class to class_states_t */
+static void
+get_class_stats(class_stats_t *statsp, struct rm_class *cl)
+{
+ statsp->xmit_cnt = cl->stats_.xmit_cnt;
+ statsp->drop_cnt = cl->stats_.drop_cnt;
+ statsp->over = cl->stats_.over;
+ statsp->borrows = cl->stats_.borrows;
+ statsp->overactions = cl->stats_.overactions;
+ statsp->delays = cl->stats_.delays;
+
+ statsp->depth = cl->depth_;
+ statsp->priority = cl->pri_;
+ statsp->maxidle = cl->maxidle_;
+ statsp->minidle = cl->minidle_;
+ statsp->offtime = cl->offtime_;
+ statsp->qmax = qlimit(cl->q_);
+ statsp->ns_per_byte = cl->ns_per_byte_;
+ statsp->wrr_allot = cl->w_allotment_;
+ statsp->qcnt = qlen(cl->q_);
+ statsp->avgidle = cl->avgidle_;
+
+ statsp->qtype = qtype(cl->q_);
+#ifdef ALTQ_RED
+ if (q_is_red(cl->q_))
+ red_getstats(cl->red_, &statsp->red[0]);
+#endif
+#ifdef ALTQ_RIO
+ if (q_is_rio(cl->q_))
+ rio_getstats((rio_t *)cl->red_, &statsp->red[0]);
+#endif
+}
+
+int
+cbq_pfattach(struct pf_altq *a)
+{
+ struct ifnet *ifp;
+ int s, error;
+
+ if ((ifp = ifunit(a->ifname)) == NULL || a->altq_disc == NULL)
+ return (EINVAL);
+#ifdef __NetBSD__
+ s = splnet();
+#else
+ s = splimp();
+#endif
+ error = altq_attach(&ifp->if_snd, ALTQT_CBQ, a->altq_disc,
+ cbq_enqueue, cbq_dequeue, cbq_request, NULL, NULL);
+ splx(s);
+ return (error);
+}
+
+int
+cbq_add_altq(struct pf_altq *a)
+{
+ cbq_state_t *cbqp;
+ struct ifnet *ifp;
+
+ if ((ifp = ifunit(a->ifname)) == NULL)
+ return (EINVAL);
+ if (!ALTQ_IS_READY(&ifp->if_snd))
+ return (ENODEV);
+
+ /* allocate and initialize cbq_state_t */
+ cbqp = malloc(sizeof(cbq_state_t), M_DEVBUF, M_NOWAIT | M_ZERO);
+ if (cbqp == NULL)
+ return (ENOMEM);
+ CALLOUT_INIT(&cbqp->cbq_callout);
+ cbqp->cbq_qlen = 0;
+ cbqp->ifnp.ifq_ = &ifp->if_snd; /* keep the ifq */
+
+ /* keep the state in pf_altq */
+ a->altq_disc = cbqp;
+
+ return (0);
+}
+
+int
+cbq_remove_altq(struct pf_altq *a)
+{
+ cbq_state_t *cbqp;
+
+ if ((cbqp = a->altq_disc) == NULL)
+ return (EINVAL);
+ a->altq_disc = NULL;
+
+ cbq_clear_interface(cbqp);
+
+ if (cbqp->ifnp.default_)
+ cbq_class_destroy(cbqp, cbqp->ifnp.default_);
+ if (cbqp->ifnp.root_)
+ cbq_class_destroy(cbqp, cbqp->ifnp.root_);
+
+ /* deallocate cbq_state_t */
+ free(cbqp, M_DEVBUF);
+
+ return (0);
+}
+
+int
+cbq_add_queue(struct pf_altq *a)
+{
+ struct rm_class *borrow, *parent;
+ cbq_state_t *cbqp;
+ struct rm_class *cl;
+ struct cbq_opts *opts;
+ int i;
+
+ if ((cbqp = a->altq_disc) == NULL)
+ return (EINVAL);
+ if (a->qid == 0)
+ return (EINVAL);
+
+ /*
+ * find a free slot in the class table. if the slot matching
+ * the lower bits of qid is free, use this slot. otherwise,
+ * use the first free slot.
+ */
+ i = a->qid % CBQ_MAX_CLASSES;
+ if (cbqp->cbq_class_tbl[i] != NULL) {
+ for (i = 0; i < CBQ_MAX_CLASSES; i++)
+ if (cbqp->cbq_class_tbl[i] == NULL)
+ break;
+ if (i == CBQ_MAX_CLASSES)
+ return (EINVAL);
+ }
+
+ opts = &a->pq_u.cbq_opts;
+ /* check parameters */
+ if (a->priority >= CBQ_MAXPRI)
+ return (EINVAL);
+
+ /* Get pointers to parent and borrow classes. */
+ parent = clh_to_clp(cbqp, a->parent_qid);
+ if (opts->flags & CBQCLF_BORROW)
+ borrow = parent;
+ else
+ borrow = NULL;
+
+ /*
+ * A class must borrow from it's parent or it can not
+ * borrow at all. Hence, borrow can be null.
+ */
+ if (parent == NULL && (opts->flags & CBQCLF_ROOTCLASS) == 0) {
+ printf("cbq_add_queue: no parent class!\n");
+ return (EINVAL);
+ }
+
+ if ((borrow != parent) && (borrow != NULL)) {
+ printf("cbq_add_class: borrow class != parent\n");
+ return (EINVAL);
+ }
+
+ /*
+ * check parameters
+ */
+ switch (opts->flags & CBQCLF_CLASSMASK) {
+ case CBQCLF_ROOTCLASS:
+ if (parent != NULL)
+ return (EINVAL);
+ if (cbqp->ifnp.root_)
+ return (EINVAL);
+ break;
+ case CBQCLF_DEFCLASS:
+ if (cbqp->ifnp.default_)
+ return (EINVAL);
+ break;
+ case 0:
+ if (a->qid == 0)
+ return (EINVAL);
+ break;
+ default:
+ /* more than two flags bits set */
+ return (EINVAL);
+ }
+
+ /*
+ * create a class. if this is a root class, initialize the
+ * interface.
+ */
+ if ((opts->flags & CBQCLF_CLASSMASK) == CBQCLF_ROOTCLASS) {
+ rmc_init(cbqp->ifnp.ifq_, &cbqp->ifnp, opts->ns_per_byte,
+ cbqrestart, a->qlimit, RM_MAXQUEUED,
+ opts->maxidle, opts->minidle, opts->offtime,
+ opts->flags);
+ cl = cbqp->ifnp.root_;
+ } else {
+ cl = rmc_newclass(a->priority,
+ &cbqp->ifnp, opts->ns_per_byte,
+ rmc_delay_action, a->qlimit, parent, borrow,
+ opts->maxidle, opts->minidle, opts->offtime,
+ opts->pktsize, opts->flags);
+ }
+ if (cl == NULL)
+ return (ENOMEM);
+
+ /* return handle to user space. */
+ cl->stats_.handle = a->qid;
+ cl->stats_.depth = cl->depth_;
+
+ /* save the allocated class */
+ cbqp->cbq_class_tbl[i] = cl;
+
+ if ((opts->flags & CBQCLF_CLASSMASK) == CBQCLF_DEFCLASS)
+ cbqp->ifnp.default_ = cl;
+
+ return (0);
+}
+
+int
+cbq_remove_queue(struct pf_altq *a)
+{
+ struct rm_class *cl;
+ cbq_state_t *cbqp;
+ int i;
+
+ if ((cbqp = a->altq_disc) == NULL)
+ return (EINVAL);
+
+ if ((cl = clh_to_clp(cbqp, a->qid)) == NULL)
+ return (EINVAL);
+
+ /* if we are a parent class, then return an error. */
+ if (is_a_parent_class(cl))
+ return (EINVAL);
+
+ /* delete the class */
+ rmc_delete_class(&cbqp->ifnp, cl);
+
+ /*
+ * free the class handle
+ */
+ for (i = 0; i < CBQ_MAX_CLASSES; i++)
+ if (cbqp->cbq_class_tbl[i] == cl) {
+ cbqp->cbq_class_tbl[i] = NULL;
+ if (cl == cbqp->ifnp.root_)
+ cbqp->ifnp.root_ = NULL;
+ if (cl == cbqp->ifnp.default_)
+ cbqp->ifnp.default_ = NULL;
+ break;
+ }
+
+ return (0);
+}
+
+int
+cbq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes)
+{
+ cbq_state_t *cbqp;
+ struct rm_class *cl;
+ class_stats_t stats;
+ int error = 0;
+
+ if ((cbqp = altq_lookup(a->ifname, ALTQT_CBQ)) == NULL)
+ return (EBADF);
+
+ if ((cl = clh_to_clp(cbqp, a->qid)) == NULL)
+ return (EINVAL);
+
+ if (*nbytes < sizeof(stats))
+ return (EINVAL);
+
+ get_class_stats(&stats, cl);
+
+ if ((error = copyout((caddr_t)&stats, ubuf, sizeof(stats))) != 0)
+ return (error);
+ *nbytes = sizeof(stats);
+ return (0);
+}
+
+/*
+ * int
+ * cbq_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pattr)
+ * - Queue data packets.
+ *
+ * cbq_enqueue is set to ifp->if_altqenqueue and called by an upper
+ * layer (e.g. ether_output). cbq_enqueue queues the given packet
+ * to the cbq, then invokes the driver's start routine.
+ *
+ * Assumptions: called in splimp
+ * Returns: 0 if the queueing is successful.
+ * ENOBUFS if a packet dropping occurred as a result of
+ * the queueing.
+ */
+
+static int
+cbq_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr)
+{
+ cbq_state_t *cbqp = (cbq_state_t *)ifq->altq_disc;
+ struct rm_class *cl;
+ struct pf_mtag *t;
+ int len;
+
+ IFQ_LOCK_ASSERT(ifq);
+
+ /* grab class set by classifier */
+ if ((m->m_flags & M_PKTHDR) == 0) {
+ /* should not happen */
+ printf("altq: packet for %s does not have pkthdr\n",
+ ifq->altq_ifp->if_xname);
+ m_freem(m);
+ return (ENOBUFS);
+ }
+ cl = NULL;
+ if ((t = pf_find_mtag(m)) != NULL)
+ cl = clh_to_clp(cbqp, t->qid);
+#ifdef ALTQ3_COMPAT
+ else if ((ifq->altq_flags & ALTQF_CLASSIFY) && pktattr != NULL)
+ cl = pktattr->pattr_class;
+#endif
+ if (cl == NULL) {
+ cl = cbqp->ifnp.default_;
+ if (cl == NULL) {
+ m_freem(m);
+ return (ENOBUFS);
+ }
+ }
+#ifdef ALTQ3_COMPAT
+ if (pktattr != NULL)
+ cl->pktattr_ = pktattr; /* save proto hdr used by ECN */
+ else
+#endif
+ cl->pktattr_ = NULL;
+ len = m_pktlen(m);
+ if (rmc_queue_packet(cl, m) != 0) {
+ /* drop occurred. some mbuf was freed in rmc_queue_packet. */
+ PKTCNTR_ADD(&cl->stats_.drop_cnt, len);
+ return (ENOBUFS);
+ }
+
+ /* successfully queued. */
+ ++cbqp->cbq_qlen;
+ IFQ_INC_LEN(ifq);
+ return (0);
+}
+
+static struct mbuf *
+cbq_dequeue(struct ifaltq *ifq, int op)
+{
+ cbq_state_t *cbqp = (cbq_state_t *)ifq->altq_disc;
+ struct mbuf *m;
+
+ IFQ_LOCK_ASSERT(ifq);
+
+ m = rmc_dequeue_next(&cbqp->ifnp, op);
+
+ if (m && op == ALTDQ_REMOVE) {
+ --cbqp->cbq_qlen; /* decrement # of packets in cbq */
+ IFQ_DEC_LEN(ifq);
+
+ /* Update the class. */
+ rmc_update_class_util(&cbqp->ifnp);
+ }
+ return (m);
+}
+
+/*
+ * void
+ * cbqrestart(queue_t *) - Restart sending of data.
+ * called from rmc_restart in splimp via timeout after waking up
+ * a suspended class.
+ * Returns: NONE
+ */
+
+static void
+cbqrestart(struct ifaltq *ifq)
+{
+ cbq_state_t *cbqp;
+ struct ifnet *ifp;
+
+ IFQ_LOCK_ASSERT(ifq);
+
+ if (!ALTQ_IS_ENABLED(ifq))
+ /* cbq must have been detached */
+ return;
+
+ if ((cbqp = (cbq_state_t *)ifq->altq_disc) == NULL)
+ /* should not happen */
+ return;
+
+ ifp = ifq->altq_ifp;
+ if (ifp->if_start &&
+ cbqp->cbq_qlen > 0 && (ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) {
+ IFQ_UNLOCK(ifq);
+ (*ifp->if_start)(ifp);
+ IFQ_LOCK(ifq);
+ }
+}
+
+static void cbq_purge(cbq_state_t *cbqp)
+{
+ struct rm_class *cl;
+ int i;
+
+ for (i = 0; i < CBQ_MAX_CLASSES; i++)
+ if ((cl = cbqp->cbq_class_tbl[i]) != NULL)
+ rmc_dropall(cl);
+ if (ALTQ_IS_ENABLED(cbqp->ifnp.ifq_))
+ cbqp->ifnp.ifq_->ifq_len = 0;
+}
+#ifdef ALTQ3_COMPAT
+
+static int
+cbq_add_class(acp)
+ struct cbq_add_class *acp;
+{
+ char *ifacename;
+ struct rm_class *borrow, *parent;
+ cbq_state_t *cbqp;
+
+ ifacename = acp->cbq_iface.cbq_ifacename;
+ if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL)
+ return (EBADF);
+
+ /* check parameters */
+ if (acp->cbq_class.priority >= CBQ_MAXPRI ||
+ acp->cbq_class.maxq > CBQ_MAXQSIZE)
+ return (EINVAL);
+
+ /* Get pointers to parent and borrow classes. */
+ parent = clh_to_clp(cbqp, acp->cbq_class.parent_class_handle);
+ borrow = clh_to_clp(cbqp, acp->cbq_class.borrow_class_handle);
+
+ /*
+ * A class must borrow from it's parent or it can not
+ * borrow at all. Hence, borrow can be null.
+ */
+ if (parent == NULL && (acp->cbq_class.flags & CBQCLF_ROOTCLASS) == 0) {
+ printf("cbq_add_class: no parent class!\n");
+ return (EINVAL);
+ }
+
+ if ((borrow != parent) && (borrow != NULL)) {
+ printf("cbq_add_class: borrow class != parent\n");
+ return (EINVAL);
+ }
+
+ return cbq_class_create(cbqp, acp, parent, borrow);
+}
+
+static int
+cbq_delete_class(dcp)
+ struct cbq_delete_class *dcp;
+{
+ char *ifacename;
+ struct rm_class *cl;
+ cbq_state_t *cbqp;
+
+ ifacename = dcp->cbq_iface.cbq_ifacename;
+ if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL)
+ return (EBADF);
+
+ if ((cl = clh_to_clp(cbqp, dcp->cbq_class_handle)) == NULL)
+ return (EINVAL);
+
+ /* if we are a parent class, then return an error. */
+ if (is_a_parent_class(cl))
+ return (EINVAL);
+
+ /* if a filter has a reference to this class delete the filter */
+ acc_discard_filters(&cbqp->cbq_classifier, cl, 0);
+
+ return cbq_class_destroy(cbqp, cl);
+}
+
+static int
+cbq_modify_class(acp)
+ struct cbq_modify_class *acp;
+{
+ char *ifacename;
+ struct rm_class *cl;
+ cbq_state_t *cbqp;
+
+ ifacename = acp->cbq_iface.cbq_ifacename;
+ if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL)
+ return (EBADF);
+
+ /* Get pointer to this class */
+ if ((cl = clh_to_clp(cbqp, acp->cbq_class_handle)) == NULL)
+ return (EINVAL);
+
+ if (rmc_modclass(cl, acp->cbq_class.nano_sec_per_byte,
+ acp->cbq_class.maxq, acp->cbq_class.maxidle,
+ acp->cbq_class.minidle, acp->cbq_class.offtime,
+ acp->cbq_class.pktsize) < 0)
+ return (EINVAL);
+ return (0);
+}
+
+/*
+ * struct rm_class *
+ * cbq_class_create(cbq_mod_state_t *cbqp, struct cbq_add_class *acp,
+ * struct rm_class *parent, struct rm_class *borrow)
+ *
+ * This function create a new traffic class in the CBQ class hierarchy of
+ * given paramters. The class that created is either the root, default,
+ * or a new dynamic class. If CBQ is not initilaized, the the root class
+ * will be created.
+ */
+static int
+cbq_class_create(cbqp, acp, parent, borrow)
+ cbq_state_t *cbqp;
+ struct cbq_add_class *acp;
+ struct rm_class *parent, *borrow;
+{
+ struct rm_class *cl;
+ cbq_class_spec_t *spec = &acp->cbq_class;
+ u_int32_t chandle;
+ int i;
+
+ /*
+ * allocate class handle
+ */
+ for (i = 1; i < CBQ_MAX_CLASSES; i++)
+ if (cbqp->cbq_class_tbl[i] == NULL)
+ break;
+ if (i == CBQ_MAX_CLASSES)
+ return (EINVAL);
+ chandle = i; /* use the slot number as class handle */
+
+ /*
+ * create a class. if this is a root class, initialize the
+ * interface.
+ */
+ if ((spec->flags & CBQCLF_CLASSMASK) == CBQCLF_ROOTCLASS) {
+ rmc_init(cbqp->ifnp.ifq_, &cbqp->ifnp, spec->nano_sec_per_byte,
+ cbqrestart, spec->maxq, RM_MAXQUEUED,
+ spec->maxidle, spec->minidle, spec->offtime,
+ spec->flags);
+ cl = cbqp->ifnp.root_;
+ } else {
+ cl = rmc_newclass(spec->priority,
+ &cbqp->ifnp, spec->nano_sec_per_byte,
+ rmc_delay_action, spec->maxq, parent, borrow,
+ spec->maxidle, spec->minidle, spec->offtime,
+ spec->pktsize, spec->flags);
+ }
+ if (cl == NULL)
+ return (ENOMEM);
+
+ /* return handle to user space. */
+ acp->cbq_class_handle = chandle;
+
+ cl->stats_.handle = chandle;
+ cl->stats_.depth = cl->depth_;
+
+ /* save the allocated class */
+ cbqp->cbq_class_tbl[i] = cl;
+
+ if ((spec->flags & CBQCLF_CLASSMASK) == CBQCLF_DEFCLASS)
+ cbqp->ifnp.default_ = cl;
+ if ((spec->flags & CBQCLF_CLASSMASK) == CBQCLF_CTLCLASS)
+ cbqp->ifnp.ctl_ = cl;
+
+ return (0);
+}
+
+static int
+cbq_add_filter(afp)
+ struct cbq_add_filter *afp;
+{
+ char *ifacename;
+ cbq_state_t *cbqp;
+ struct rm_class *cl;
+
+ ifacename = afp->cbq_iface.cbq_ifacename;
+ if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL)
+ return (EBADF);
+
+ /* Get the pointer to class. */
+ if ((cl = clh_to_clp(cbqp, afp->cbq_class_handle)) == NULL)
+ return (EINVAL);
+
+ return acc_add_filter(&cbqp->cbq_classifier, &afp->cbq_filter,
+ cl, &afp->cbq_filter_handle);
+}
+
+static int
+cbq_delete_filter(dfp)
+ struct cbq_delete_filter *dfp;
+{
+ char *ifacename;
+ cbq_state_t *cbqp;
+
+ ifacename = dfp->cbq_iface.cbq_ifacename;
+ if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL)
+ return (EBADF);
+
+ return acc_delete_filter(&cbqp->cbq_classifier,
+ dfp->cbq_filter_handle);
+}
+
+/*
+ * cbq_clear_hierarchy deletes all classes and their filters on the
+ * given interface.
+ */
+static int
+cbq_clear_hierarchy(ifacep)
+ struct cbq_interface *ifacep;
+{
+ char *ifacename;
+ cbq_state_t *cbqp;
+
+ ifacename = ifacep->cbq_ifacename;
+ if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL)
+ return (EBADF);
+
+ return cbq_clear_interface(cbqp);
+}
+
+/*
+ * static int
+ * cbq_set_enable(struct cbq_enable *ep) - this function processed the
+ * ioctl request to enable class based queueing. It searches the list
+ * of interfaces for the specified interface and then enables CBQ on
+ * that interface.
+ *
+ * Returns: 0, for no error.
+ * EBADF, for specified inteface not found.
+ */
+
+static int
+cbq_set_enable(ep, enable)
+ struct cbq_interface *ep;
+ int enable;
+{
+ int error = 0;
+ cbq_state_t *cbqp;
+ char *ifacename;
+
+ ifacename = ep->cbq_ifacename;
+ if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL)
+ return (EBADF);
+
+ switch (enable) {
+ case ENABLE:
+ if (cbqp->ifnp.root_ == NULL || cbqp->ifnp.default_ == NULL ||
+ cbqp->ifnp.ctl_ == NULL) {
+ if (cbqp->ifnp.root_ == NULL)
+ printf("No Root Class for %s\n", ifacename);
+ if (cbqp->ifnp.default_ == NULL)
+ printf("No Default Class for %s\n", ifacename);
+ if (cbqp->ifnp.ctl_ == NULL)
+ printf("No Control Class for %s\n", ifacename);
+ error = EINVAL;
+ } else if ((error = altq_enable(cbqp->ifnp.ifq_)) == 0) {
+ cbqp->cbq_qlen = 0;
+ }
+ break;
+
+ case DISABLE:
+ error = altq_disable(cbqp->ifnp.ifq_);
+ break;
+ }
+ return (error);
+}
+
+static int
+cbq_getstats(gsp)
+ struct cbq_getstats *gsp;
+{
+ char *ifacename;
+ int i, n, nclasses;
+ cbq_state_t *cbqp;
+ struct rm_class *cl;
+ class_stats_t stats, *usp;
+ int error = 0;
+
+ ifacename = gsp->iface.cbq_ifacename;
+ nclasses = gsp->nclasses;
+ usp = gsp->stats;
+
+ if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL)
+ return (EBADF);
+ if (nclasses <= 0)
+ return (EINVAL);
+
+ for (n = 0, i = 0; n < nclasses && i < CBQ_MAX_CLASSES; n++, i++) {
+ while ((cl = cbqp->cbq_class_tbl[i]) == NULL)
+ if (++i >= CBQ_MAX_CLASSES)
+ goto out;
+
+ get_class_stats(&stats, cl);
+ stats.handle = cl->stats_.handle;
+
+ if ((error = copyout((caddr_t)&stats, (caddr_t)usp++,
+ sizeof(stats))) != 0)
+ return (error);
+ }
+
+ out:
+ gsp->nclasses = n;
+ return (error);
+}
+
+static int
+cbq_ifattach(ifacep)
+ struct cbq_interface *ifacep;
+{
+ int error = 0;
+ char *ifacename;
+ cbq_state_t *new_cbqp;
+ struct ifnet *ifp;
+
+ ifacename = ifacep->cbq_ifacename;
+ if ((ifp = ifunit(ifacename)) == NULL)
+ return (ENXIO);
+ if (!ALTQ_IS_READY(&ifp->if_snd))
+ return (ENXIO);
+
+ /* allocate and initialize cbq_state_t */
+ new_cbqp = malloc(sizeof(cbq_state_t), M_DEVBUF, M_WAITOK);
+ if (new_cbqp == NULL)
+ return (ENOMEM);
+ bzero(new_cbqp, sizeof(cbq_state_t));
+ CALLOUT_INIT(&new_cbqp->cbq_callout);
+
+ new_cbqp->cbq_qlen = 0;
+ new_cbqp->ifnp.ifq_ = &ifp->if_snd; /* keep the ifq */
+
+ /*
+ * set CBQ to this ifnet structure.
+ */
+ error = altq_attach(&ifp->if_snd, ALTQT_CBQ, new_cbqp,
+ cbq_enqueue, cbq_dequeue, cbq_request,
+ &new_cbqp->cbq_classifier, acc_classify);
+ if (error) {
+ free(new_cbqp, M_DEVBUF);
+ return (error);
+ }
+
+ /* prepend to the list of cbq_state_t's. */
+ new_cbqp->cbq_next = cbq_list;
+ cbq_list = new_cbqp;
+
+ return (0);
+}
+
+static int
+cbq_ifdetach(ifacep)
+ struct cbq_interface *ifacep;
+{
+ char *ifacename;
+ cbq_state_t *cbqp;
+
+ ifacename = ifacep->cbq_ifacename;
+ if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL)
+ return (EBADF);
+
+ (void)cbq_set_enable(ifacep, DISABLE);
+
+ cbq_clear_interface(cbqp);
+
+ /* remove CBQ from the ifnet structure. */
+ (void)altq_detach(cbqp->ifnp.ifq_);
+
+ /* remove from the list of cbq_state_t's. */
+ if (cbq_list == cbqp)
+ cbq_list = cbqp->cbq_next;
+ else {
+ cbq_state_t *cp;
+
+ for (cp = cbq_list; cp != NULL; cp = cp->cbq_next)
+ if (cp->cbq_next == cbqp) {
+ cp->cbq_next = cbqp->cbq_next;
+ break;
+ }
+ ASSERT(cp != NULL);
+ }
+
+ /* deallocate cbq_state_t */
+ free(cbqp, M_DEVBUF);
+
+ return (0);
+}
+
+/*
+ * cbq device interface
+ */
+
+altqdev_decl(cbq);
+
+int
+cbqopen(dev, flag, fmt, p)
+ dev_t dev;
+ int flag, fmt;
+#if (__FreeBSD_version > 500000)
+ struct thread *p;
+#else
+ struct proc *p;
+#endif
+{
+ return (0);
+}
+
+int
+cbqclose(dev, flag, fmt, p)
+ dev_t dev;
+ int flag, fmt;
+#if (__FreeBSD_version > 500000)
+ struct thread *p;
+#else
+ struct proc *p;
+#endif
+{
+ struct ifnet *ifp;
+ struct cbq_interface iface;
+ int err, error = 0;
+
+ while (cbq_list) {
+ ifp = cbq_list->ifnp.ifq_->altq_ifp;
+ sprintf(iface.cbq_ifacename, "%s", ifp->if_xname);
+ err = cbq_ifdetach(&iface);
+ if (err != 0 && error == 0)
+ error = err;
+ }
+
+ return (error);
+}
+
+int
+cbqioctl(dev, cmd, addr, flag, p)
+ dev_t dev;
+ ioctlcmd_t cmd;
+ caddr_t addr;
+ int flag;
+#if (__FreeBSD_version > 500000)
+ struct thread *p;
+#else
+ struct proc *p;
+#endif
+{
+ int error = 0;
+
+ /* check cmd for superuser only */
+ switch (cmd) {
+ case CBQ_GETSTATS:
+ /* currently only command that an ordinary user can call */
+ break;
+ default:
+#if (__FreeBSD_version > 700000)
+ error = priv_check(p, PRIV_ALTQ_MANAGE);
+#elsif (__FreeBSD_version > 400000)
+ error = suser(p);
+#else
+ error = suser(p->p_ucred, &p->p_acflag);
+#endif
+ if (error)
+ return (error);
+ break;
+ }
+
+ switch (cmd) {
+
+ case CBQ_ENABLE:
+ error = cbq_set_enable((struct cbq_interface *)addr, ENABLE);
+ break;
+
+ case CBQ_DISABLE:
+ error = cbq_set_enable((struct cbq_interface *)addr, DISABLE);
+ break;
+
+ case CBQ_ADD_FILTER:
+ error = cbq_add_filter((struct cbq_add_filter *)addr);
+ break;
+
+ case CBQ_DEL_FILTER:
+ error = cbq_delete_filter((struct cbq_delete_filter *)addr);
+ break;
+
+ case CBQ_ADD_CLASS:
+ error = cbq_add_class((struct cbq_add_class *)addr);
+ break;
+
+ case CBQ_DEL_CLASS:
+ error = cbq_delete_class((struct cbq_delete_class *)addr);
+ break;
+
+ case CBQ_MODIFY_CLASS:
+ error = cbq_modify_class((struct cbq_modify_class *)addr);
+ break;
+
+ case CBQ_CLEAR_HIERARCHY:
+ error = cbq_clear_hierarchy((struct cbq_interface *)addr);
+ break;
+
+ case CBQ_IF_ATTACH:
+ error = cbq_ifattach((struct cbq_interface *)addr);
+ break;
+
+ case CBQ_IF_DETACH:
+ error = cbq_ifdetach((struct cbq_interface *)addr);
+ break;
+
+ case CBQ_GETSTATS:
+ error = cbq_getstats((struct cbq_getstats *)addr);
+ break;
+
+ default:
+ error = EINVAL;
+ break;
+ }
+
+ return error;
+}
+
+#if 0
+/* for debug */
+static void cbq_class_dump(int);
+
+static void cbq_class_dump(i)
+ int i;
+{
+ struct rm_class *cl;
+ rm_class_stats_t *s;
+ struct _class_queue_ *q;
+
+ if (cbq_list == NULL) {
+ printf("cbq_class_dump: no cbq_state found\n");
+ return;
+ }
+ cl = cbq_list->cbq_class_tbl[i];
+
+ printf("class %d cl=%p\n", i, cl);
+ if (cl != NULL) {
+ s = &cl->stats_;
+ q = cl->q_;
+
+ printf("pri=%d, depth=%d, maxrate=%d, allotment=%d\n",
+ cl->pri_, cl->depth_, cl->maxrate_, cl->allotment_);
+ printf("w_allotment=%d, bytes_alloc=%d, avgidle=%d, maxidle=%d\n",
+ cl->w_allotment_, cl->bytes_alloc_, cl->avgidle_,
+ cl->maxidle_);
+ printf("minidle=%d, offtime=%d, sleeping=%d, leaf=%d\n",
+ cl->minidle_, cl->offtime_, cl->sleeping_, cl->leaf_);
+ printf("handle=%d, depth=%d, packets=%d, bytes=%d\n",
+ s->handle, s->depth,
+ (int)s->xmit_cnt.packets, (int)s->xmit_cnt.bytes);
+ printf("over=%d\n, borrows=%d, drops=%d, overactions=%d, delays=%d\n",
+ s->over, s->borrows, (int)s->drop_cnt.packets,
+ s->overactions, s->delays);
+ printf("tail=%p, head=%p, qlen=%d, qlim=%d, qthresh=%d,qtype=%d\n",
+ q->tail_, q->head_, q->qlen_, q->qlim_,
+ q->qthresh_, q->qtype_);
+ }
+}
+#endif /* 0 */
+
+#ifdef KLD_MODULE
+
+static struct altqsw cbq_sw =
+ {"cbq", cbqopen, cbqclose, cbqioctl};
+
+ALTQ_MODULE(altq_cbq, ALTQT_CBQ, &cbq_sw);
+MODULE_DEPEND(altq_cbq, altq_red, 1, 1, 1);
+MODULE_DEPEND(altq_cbq, altq_rio, 1, 1, 1);
+
+#endif /* KLD_MODULE */
+#endif /* ALTQ3_COMPAT */
+
+#endif /* ALTQ_CBQ */
diff --git a/sys/net/altq/altq_cbq.h b/sys/net/altq/altq_cbq.h
new file mode 100644
index 0000000..792c9fd
--- /dev/null
+++ b/sys/net/altq/altq_cbq.h
@@ -0,0 +1,222 @@
+/*-
+ * Copyright (c) Sun Microsystems, Inc. 1993-1998 All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the SMCC Technology
+ * Development Group at Sun Microsystems, Inc.
+ *
+ * 4. The name of the Sun Microsystems, Inc nor may not be used to endorse or
+ * promote products derived from this software without specific prior
+ * written permission.
+ *
+ * SUN MICROSYSTEMS DOES NOT CLAIM MERCHANTABILITY OF THIS SOFTWARE OR THE
+ * SUITABILITY OF THIS SOFTWARE FOR ANY PARTICULAR PURPOSE. The software is
+ * provided "as is" without express or implied warranty of any kind.
+ *
+ * These notices must be retained in any copies of any part of this software.
+ *
+ * $KAME: altq_cbq.h,v 1.12 2003/10/03 05:05:15 kjc Exp $
+ * $FreeBSD$
+ */
+
+#ifndef _ALTQ_ALTQ_CBQ_H_
+#define _ALTQ_ALTQ_CBQ_H_
+
+#include <net/altq/altq.h>
+#include <net/altq/altq_rmclass.h>
+#include <net/altq/altq_red.h>
+#include <net/altq/altq_rio.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define NULL_CLASS_HANDLE 0
+
+/* class flags should be same as class flags in rm_class.h */
+#define CBQCLF_RED 0x0001 /* use RED */
+#define CBQCLF_ECN 0x0002 /* use RED/ECN */
+#define CBQCLF_RIO 0x0004 /* use RIO */
+#define CBQCLF_FLOWVALVE 0x0008 /* use flowvalve (aka penalty-box) */
+#define CBQCLF_CLEARDSCP 0x0010 /* clear diffserv codepoint */
+#define CBQCLF_BORROW 0x0020 /* borrow from parent */
+
+/* class flags only for root class */
+#define CBQCLF_WRR 0x0100 /* weighted-round robin */
+#define CBQCLF_EFFICIENT 0x0200 /* work-conserving */
+
+/* class flags for special classes */
+#define CBQCLF_ROOTCLASS 0x1000 /* root class */
+#define CBQCLF_DEFCLASS 0x2000 /* default class */
+#ifdef ALTQ3_COMPAT
+#define CBQCLF_CTLCLASS 0x4000 /* control class */
+#endif
+#define CBQCLF_CLASSMASK 0xf000 /* class mask */
+
+#define CBQ_MAXQSIZE 200
+#define CBQ_MAXPRI RM_MAXPRIO
+
+typedef struct _cbq_class_stats_ {
+ u_int32_t handle;
+ u_int depth;
+
+ struct pktcntr xmit_cnt; /* packets sent in this class */
+ struct pktcntr drop_cnt; /* dropped packets */
+ u_int over; /* # times went over limit */
+ u_int borrows; /* # times tried to borrow */
+ u_int overactions; /* # times invoked overlimit action */
+ u_int delays; /* # times invoked delay actions */
+
+ /* other static class parameters useful for debugging */
+ int priority;
+ int maxidle;
+ int minidle;
+ int offtime;
+ int qmax;
+ int ns_per_byte;
+ int wrr_allot;
+
+ int qcnt; /* # packets in queue */
+ int avgidle;
+
+ /* red and rio related info */
+ int qtype;
+ struct redstats red[3];
+} class_stats_t;
+
+#ifdef ALTQ3_COMPAT
+/*
+ * Define structures associated with IOCTLS for cbq.
+ */
+
+/*
+ * Define the CBQ interface structure. This must be included in all
+ * IOCTL's such that the CBQ driver may find the appropriate CBQ module
+ * associated with the network interface to be affected.
+ */
+struct cbq_interface {
+ char cbq_ifacename[IFNAMSIZ];
+};
+
+typedef struct cbq_class_spec {
+ u_int priority;
+ u_int nano_sec_per_byte;
+ u_int maxq;
+ u_int maxidle;
+ int minidle;
+ u_int offtime;
+ u_int32_t parent_class_handle;
+ u_int32_t borrow_class_handle;
+
+ u_int pktsize;
+ int flags;
+} cbq_class_spec_t;
+
+struct cbq_add_class {
+ struct cbq_interface cbq_iface;
+
+ cbq_class_spec_t cbq_class;
+ u_int32_t cbq_class_handle;
+};
+
+struct cbq_delete_class {
+ struct cbq_interface cbq_iface;
+ u_int32_t cbq_class_handle;
+};
+
+struct cbq_modify_class {
+ struct cbq_interface cbq_iface;
+
+ cbq_class_spec_t cbq_class;
+ u_int32_t cbq_class_handle;
+};
+
+struct cbq_add_filter {
+ struct cbq_interface cbq_iface;
+ u_int32_t cbq_class_handle;
+ struct flow_filter cbq_filter;
+
+ u_long cbq_filter_handle;
+};
+
+struct cbq_delete_filter {
+ struct cbq_interface cbq_iface;
+ u_long cbq_filter_handle;
+};
+
+/* number of classes are returned in nclasses field */
+struct cbq_getstats {
+ struct cbq_interface iface;
+ int nclasses;
+ class_stats_t *stats;
+};
+
+/*
+ * Define IOCTLs for CBQ.
+ */
+#define CBQ_IF_ATTACH _IOW('Q', 1, struct cbq_interface)
+#define CBQ_IF_DETACH _IOW('Q', 2, struct cbq_interface)
+#define CBQ_ENABLE _IOW('Q', 3, struct cbq_interface)
+#define CBQ_DISABLE _IOW('Q', 4, struct cbq_interface)
+#define CBQ_CLEAR_HIERARCHY _IOW('Q', 5, struct cbq_interface)
+#define CBQ_ADD_CLASS _IOWR('Q', 7, struct cbq_add_class)
+#define CBQ_DEL_CLASS _IOW('Q', 8, struct cbq_delete_class)
+#define CBQ_MODIFY_CLASS _IOWR('Q', 9, struct cbq_modify_class)
+#define CBQ_ADD_FILTER _IOWR('Q', 10, struct cbq_add_filter)
+#define CBQ_DEL_FILTER _IOW('Q', 11, struct cbq_delete_filter)
+#define CBQ_GETSTATS _IOWR('Q', 12, struct cbq_getstats)
+#endif /* ALTQ3_COMPAT */
+
+#ifdef _KERNEL
+/*
+ * Define macros only good for kernel drivers and modules.
+ */
+#define CBQ_WATCHDOG (hz / 20)
+#define CBQ_TIMEOUT 10
+#define CBQ_LS_TIMEOUT (20 * hz / 1000)
+
+#define CBQ_MAX_CLASSES 256
+
+#ifdef ALTQ3_COMPAT
+#define CBQ_MAX_FILTERS 256
+
+#define DISABLE 0x00
+#define ENABLE 0x01
+#endif /* ALTQ3_COMPAT */
+
+/*
+ * Define State structures.
+ */
+typedef struct cbqstate {
+#ifdef ALTQ3_COMPAT
+ struct cbqstate *cbq_next;
+#endif
+ int cbq_qlen; /* # of packets in cbq */
+ struct rm_class *cbq_class_tbl[CBQ_MAX_CLASSES];
+
+ struct rm_ifdat ifnp;
+ struct callout cbq_callout; /* for timeouts */
+#ifdef ALTQ3_CLFIER_COMPAT
+ struct acc_classifier cbq_classifier;
+#endif
+} cbq_state_t;
+
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* !_ALTQ_ALTQ_CBQ_H_ */
diff --git a/sys/net/altq/altq_cdnr.c b/sys/net/altq/altq_cdnr.c
new file mode 100644
index 0000000..122643a
--- /dev/null
+++ b/sys/net/altq/altq_cdnr.c
@@ -0,0 +1,1390 @@
+/* $FreeBSD$ */
+/* $KAME: altq_cdnr.c,v 1.15 2005/04/13 03:44:24 suz Exp $ */
+
+/*
+ * Copyright (C) 1999-2002
+ * Sony Computer Science Laboratories Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(__FreeBSD__) || defined(__NetBSD__)
+#include "opt_altq.h"
+#include "opt_inet.h"
+#ifdef __FreeBSD__
+#include "opt_inet6.h"
+#endif
+#endif /* __FreeBSD__ || __NetBSD__ */
+
+#include <sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/errno.h>
+#include <sys/kernel.h>
+#include <sys/queue.h>
+
+#include <net/if.h>
+#include <net/if_types.h>
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#ifdef INET6
+#include <netinet/ip6.h>
+#endif
+
+#include <net/altq/if_altq.h>
+#include <net/altq/altq.h>
+#ifdef ALTQ3_COMPAT
+#include <net/altq/altq_conf.h>
+#endif
+#include <net/altq/altq_cdnr.h>
+
+#ifdef ALTQ3_COMPAT
+/*
+ * diffserv traffic conditioning module
+ */
+
+int altq_cdnr_enabled = 0;
+
+/* traffic conditioner is enabled by ALTQ_CDNR option in opt_altq.h */
+#ifdef ALTQ_CDNR
+
+/* cdnr_list keeps all cdnr's allocated. */
+static LIST_HEAD(, top_cdnr) tcb_list;
+
+static int altq_cdnr_input(struct mbuf *, int);
+static struct top_cdnr *tcb_lookup(char *ifname);
+static struct cdnr_block *cdnr_handle2cb(u_long);
+static u_long cdnr_cb2handle(struct cdnr_block *);
+static void *cdnr_cballoc(struct top_cdnr *, int,
+ struct tc_action *(*)(struct cdnr_block *, struct cdnr_pktinfo *));
+static void cdnr_cbdestroy(void *);
+static int tca_verify_action(struct tc_action *);
+static void tca_import_action(struct tc_action *, struct tc_action *);
+static void tca_invalidate_action(struct tc_action *);
+
+static int generic_element_destroy(struct cdnr_block *);
+static struct top_cdnr *top_create(struct ifaltq *);
+static int top_destroy(struct top_cdnr *);
+static struct cdnr_block *element_create(struct top_cdnr *, struct tc_action *);
+static int element_destroy(struct cdnr_block *);
+static void tb_import_profile(struct tbe *, struct tb_profile *);
+static struct tbmeter *tbm_create(struct top_cdnr *, struct tb_profile *,
+ struct tc_action *, struct tc_action *);
+static int tbm_destroy(struct tbmeter *);
+static struct tc_action *tbm_input(struct cdnr_block *, struct cdnr_pktinfo *);
+static struct trtcm *trtcm_create(struct top_cdnr *,
+ struct tb_profile *, struct tb_profile *,
+ struct tc_action *, struct tc_action *, struct tc_action *,
+ int);
+static int trtcm_destroy(struct trtcm *);
+static struct tc_action *trtcm_input(struct cdnr_block *, struct cdnr_pktinfo *);
+static struct tswtcm *tswtcm_create(struct top_cdnr *,
+ u_int32_t, u_int32_t, u_int32_t,
+ struct tc_action *, struct tc_action *, struct tc_action *);
+static int tswtcm_destroy(struct tswtcm *);
+static struct tc_action *tswtcm_input(struct cdnr_block *, struct cdnr_pktinfo *);
+
+static int cdnrcmd_if_attach(char *);
+static int cdnrcmd_if_detach(char *);
+static int cdnrcmd_add_element(struct cdnr_add_element *);
+static int cdnrcmd_delete_element(struct cdnr_delete_element *);
+static int cdnrcmd_add_filter(struct cdnr_add_filter *);
+static int cdnrcmd_delete_filter(struct cdnr_delete_filter *);
+static int cdnrcmd_add_tbm(struct cdnr_add_tbmeter *);
+static int cdnrcmd_modify_tbm(struct cdnr_modify_tbmeter *);
+static int cdnrcmd_tbm_stats(struct cdnr_tbmeter_stats *);
+static int cdnrcmd_add_trtcm(struct cdnr_add_trtcm *);
+static int cdnrcmd_modify_trtcm(struct cdnr_modify_trtcm *);
+static int cdnrcmd_tcm_stats(struct cdnr_tcm_stats *);
+static int cdnrcmd_add_tswtcm(struct cdnr_add_tswtcm *);
+static int cdnrcmd_modify_tswtcm(struct cdnr_modify_tswtcm *);
+static int cdnrcmd_get_stats(struct cdnr_get_stats *);
+
+altqdev_decl(cdnr);
+
+/*
+ * top level input function called from ip_input.
+ * should be called before converting header fields to host-byte-order.
+ */
+int
+altq_cdnr_input(m, af)
+ struct mbuf *m;
+ int af; /* address family */
+{
+ struct ifnet *ifp;
+ struct ip *ip;
+ struct top_cdnr *top;
+ struct tc_action *tca;
+ struct cdnr_block *cb;
+ struct cdnr_pktinfo pktinfo;
+
+ ifp = m->m_pkthdr.rcvif;
+ if (!ALTQ_IS_CNDTNING(&ifp->if_snd))
+ /* traffic conditioner is not enabled on this interface */
+ return (1);
+
+ top = ifp->if_snd.altq_cdnr;
+
+ ip = mtod(m, struct ip *);
+#ifdef INET6
+ if (af == AF_INET6) {
+ u_int32_t flowlabel;
+
+ flowlabel = ((struct ip6_hdr *)ip)->ip6_flow;
+ pktinfo.pkt_dscp = (ntohl(flowlabel) >> 20) & DSCP_MASK;
+ } else
+#endif
+ pktinfo.pkt_dscp = ip->ip_tos & DSCP_MASK;
+ pktinfo.pkt_len = m_pktlen(m);
+
+ tca = NULL;
+
+ cb = acc_classify(&top->tc_classifier, m, af);
+ if (cb != NULL)
+ tca = &cb->cb_action;
+
+ if (tca == NULL)
+ tca = &top->tc_block.cb_action;
+
+ while (1) {
+ PKTCNTR_ADD(&top->tc_cnts[tca->tca_code], pktinfo.pkt_len);
+
+ switch (tca->tca_code) {
+ case TCACODE_PASS:
+ return (1);
+ case TCACODE_DROP:
+ m_freem(m);
+ return (0);
+ case TCACODE_RETURN:
+ return (0);
+ case TCACODE_MARK:
+#ifdef INET6
+ if (af == AF_INET6) {
+ struct ip6_hdr *ip6 = (struct ip6_hdr *)ip;
+ u_int32_t flowlabel;
+
+ flowlabel = ntohl(ip6->ip6_flow);
+ flowlabel = (tca->tca_dscp << 20) |
+ (flowlabel & ~(DSCP_MASK << 20));
+ ip6->ip6_flow = htonl(flowlabel);
+ } else
+#endif
+ ip->ip_tos = tca->tca_dscp |
+ (ip->ip_tos & DSCP_CUMASK);
+ return (1);
+ case TCACODE_NEXT:
+ cb = tca->tca_next;
+ tca = (*cb->cb_input)(cb, &pktinfo);
+ break;
+ case TCACODE_NONE:
+ default:
+ return (1);
+ }
+ }
+}
+
+static struct top_cdnr *
+tcb_lookup(ifname)
+ char *ifname;
+{
+ struct top_cdnr *top;
+ struct ifnet *ifp;
+
+ if ((ifp = ifunit(ifname)) != NULL)
+ LIST_FOREACH(top, &tcb_list, tc_next)
+ if (top->tc_ifq->altq_ifp == ifp)
+ return (top);
+ return (NULL);
+}
+
+static struct cdnr_block *
+cdnr_handle2cb(handle)
+ u_long handle;
+{
+ struct cdnr_block *cb;
+
+ cb = (struct cdnr_block *)handle;
+ if (handle != ALIGN(cb))
+ return (NULL);
+
+ if (cb == NULL || cb->cb_handle != handle)
+ return (NULL);
+ return (cb);
+}
+
+static u_long
+cdnr_cb2handle(cb)
+ struct cdnr_block *cb;
+{
+ return (cb->cb_handle);
+}
+
+static void *
+cdnr_cballoc(top, type, input_func)
+ struct top_cdnr *top;
+ int type;
+ struct tc_action *(*input_func)(struct cdnr_block *,
+ struct cdnr_pktinfo *);
+{
+ struct cdnr_block *cb;
+ int size;
+
+ switch (type) {
+ case TCETYPE_TOP:
+ size = sizeof(struct top_cdnr);
+ break;
+ case TCETYPE_ELEMENT:
+ size = sizeof(struct cdnr_block);
+ break;
+ case TCETYPE_TBMETER:
+ size = sizeof(struct tbmeter);
+ break;
+ case TCETYPE_TRTCM:
+ size = sizeof(struct trtcm);
+ break;
+ case TCETYPE_TSWTCM:
+ size = sizeof(struct tswtcm);
+ break;
+ default:
+ return (NULL);
+ }
+
+ cb = malloc(size, M_DEVBUF, M_WAITOK);
+ if (cb == NULL)
+ return (NULL);
+ bzero(cb, size);
+
+ cb->cb_len = size;
+ cb->cb_type = type;
+ cb->cb_ref = 0;
+ cb->cb_handle = (u_long)cb;
+ if (top == NULL)
+ cb->cb_top = (struct top_cdnr *)cb;
+ else
+ cb->cb_top = top;
+
+ if (input_func != NULL) {
+ /*
+ * if this cdnr has an action function,
+ * make tc_action to call itself.
+ */
+ cb->cb_action.tca_code = TCACODE_NEXT;
+ cb->cb_action.tca_next = cb;
+ cb->cb_input = input_func;
+ } else
+ cb->cb_action.tca_code = TCACODE_NONE;
+
+ /* if this isn't top, register the element to the top level cdnr */
+ if (top != NULL)
+ LIST_INSERT_HEAD(&top->tc_elements, cb, cb_next);
+
+ return ((void *)cb);
+}
+
+static void
+cdnr_cbdestroy(cblock)
+ void *cblock;
+{
+ struct cdnr_block *cb = cblock;
+
+ /* delete filters belonging to this cdnr */
+ acc_discard_filters(&cb->cb_top->tc_classifier, cb, 0);
+
+ /* remove from the top level cdnr */
+ if (cb->cb_top != cblock)
+ LIST_REMOVE(cb, cb_next);
+
+ free(cb, M_DEVBUF);
+}
+
+/*
+ * conditioner common destroy routine
+ */
+static int
+generic_element_destroy(cb)
+ struct cdnr_block *cb;
+{
+ int error = 0;
+
+ switch (cb->cb_type) {
+ case TCETYPE_TOP:
+ error = top_destroy((struct top_cdnr *)cb);
+ break;
+ case TCETYPE_ELEMENT:
+ error = element_destroy(cb);
+ break;
+ case TCETYPE_TBMETER:
+ error = tbm_destroy((struct tbmeter *)cb);
+ break;
+ case TCETYPE_TRTCM:
+ error = trtcm_destroy((struct trtcm *)cb);
+ break;
+ case TCETYPE_TSWTCM:
+ error = tswtcm_destroy((struct tswtcm *)cb);
+ break;
+ default:
+ error = EINVAL;
+ }
+ return (error);
+}
+
+static int
+tca_verify_action(utca)
+ struct tc_action *utca;
+{
+ switch (utca->tca_code) {
+ case TCACODE_PASS:
+ case TCACODE_DROP:
+ case TCACODE_MARK:
+ /* these are ok */
+ break;
+
+ case TCACODE_HANDLE:
+ /* verify handle value */
+ if (cdnr_handle2cb(utca->tca_handle) == NULL)
+ return (-1);
+ break;
+
+ case TCACODE_NONE:
+ case TCACODE_RETURN:
+ case TCACODE_NEXT:
+ default:
+ /* should not be passed from a user */
+ return (-1);
+ }
+ return (0);
+}
+
+static void
+tca_import_action(ktca, utca)
+ struct tc_action *ktca, *utca;
+{
+ struct cdnr_block *cb;
+
+ *ktca = *utca;
+ if (ktca->tca_code == TCACODE_HANDLE) {
+ cb = cdnr_handle2cb(ktca->tca_handle);
+ if (cb == NULL) {
+ ktca->tca_code = TCACODE_NONE;
+ return;
+ }
+ ktca->tca_code = TCACODE_NEXT;
+ ktca->tca_next = cb;
+ cb->cb_ref++;
+ } else if (ktca->tca_code == TCACODE_MARK) {
+ ktca->tca_dscp &= DSCP_MASK;
+ }
+ return;
+}
+
+static void
+tca_invalidate_action(tca)
+ struct tc_action *tca;
+{
+ struct cdnr_block *cb;
+
+ if (tca->tca_code == TCACODE_NEXT) {
+ cb = tca->tca_next;
+ if (cb == NULL)
+ return;
+ cb->cb_ref--;
+ }
+ tca->tca_code = TCACODE_NONE;
+}
+
+/*
+ * top level traffic conditioner
+ */
+static struct top_cdnr *
+top_create(ifq)
+ struct ifaltq *ifq;
+{
+ struct top_cdnr *top;
+
+ if ((top = cdnr_cballoc(NULL, TCETYPE_TOP, NULL)) == NULL)
+ return (NULL);
+
+ top->tc_ifq = ifq;
+ /* set default action for the top level conditioner */
+ top->tc_block.cb_action.tca_code = TCACODE_PASS;
+
+ LIST_INSERT_HEAD(&tcb_list, top, tc_next);
+
+ ifq->altq_cdnr = top;
+
+ return (top);
+}
+
+static int
+top_destroy(top)
+ struct top_cdnr *top;
+{
+ struct cdnr_block *cb;
+
+ if (ALTQ_IS_CNDTNING(top->tc_ifq))
+ ALTQ_CLEAR_CNDTNING(top->tc_ifq);
+ top->tc_ifq->altq_cdnr = NULL;
+
+ /*
+ * destroy all the conditioner elements belonging to this interface
+ */
+ while ((cb = LIST_FIRST(&top->tc_elements)) != NULL) {
+ while (cb != NULL && cb->cb_ref > 0)
+ cb = LIST_NEXT(cb, cb_next);
+ if (cb != NULL)
+ generic_element_destroy(cb);
+ }
+
+ LIST_REMOVE(top, tc_next);
+
+ cdnr_cbdestroy(top);
+
+ /* if there is no active conditioner, remove the input hook */
+ if (altq_input != NULL) {
+ LIST_FOREACH(top, &tcb_list, tc_next)
+ if (ALTQ_IS_CNDTNING(top->tc_ifq))
+ break;
+ if (top == NULL)
+ altq_input = NULL;
+ }
+
+ return (0);
+}
+
+/*
+ * simple tc elements without input function (e.g., dropper and makers).
+ */
+static struct cdnr_block *
+element_create(top, action)
+ struct top_cdnr *top;
+ struct tc_action *action;
+{
+ struct cdnr_block *cb;
+
+ if (tca_verify_action(action) < 0)
+ return (NULL);
+
+ if ((cb = cdnr_cballoc(top, TCETYPE_ELEMENT, NULL)) == NULL)
+ return (NULL);
+
+ tca_import_action(&cb->cb_action, action);
+
+ return (cb);
+}
+
+static int
+element_destroy(cb)
+ struct cdnr_block *cb;
+{
+ if (cb->cb_ref > 0)
+ return (EBUSY);
+
+ tca_invalidate_action(&cb->cb_action);
+
+ cdnr_cbdestroy(cb);
+ return (0);
+}
+
+/*
+ * internal representation of token bucket parameters
+ * rate: byte_per_unittime << 32
+ * (((bits_per_sec) / 8) << 32) / machclk_freq
+ * depth: byte << 32
+ *
+ */
+#define TB_SHIFT 32
+#define TB_SCALE(x) ((u_int64_t)(x) << TB_SHIFT)
+#define TB_UNSCALE(x) ((x) >> TB_SHIFT)
+
+static void
+tb_import_profile(tb, profile)
+ struct tbe *tb;
+ struct tb_profile *profile;
+{
+ tb->rate = TB_SCALE(profile->rate / 8) / machclk_freq;
+ tb->depth = TB_SCALE(profile->depth);
+ if (tb->rate > 0)
+ tb->filluptime = tb->depth / tb->rate;
+ else
+ tb->filluptime = 0xffffffffffffffffLL;
+ tb->token = tb->depth;
+ tb->last = read_machclk();
+}
+
+/*
+ * simple token bucket meter
+ */
+static struct tbmeter *
+tbm_create(top, profile, in_action, out_action)
+ struct top_cdnr *top;
+ struct tb_profile *profile;
+ struct tc_action *in_action, *out_action;
+{
+ struct tbmeter *tbm = NULL;
+
+ if (tca_verify_action(in_action) < 0
+ || tca_verify_action(out_action) < 0)
+ return (NULL);
+
+ if ((tbm = cdnr_cballoc(top, TCETYPE_TBMETER,
+ tbm_input)) == NULL)
+ return (NULL);
+
+ tb_import_profile(&tbm->tb, profile);
+
+ tca_import_action(&tbm->in_action, in_action);
+ tca_import_action(&tbm->out_action, out_action);
+
+ return (tbm);
+}
+
+static int
+tbm_destroy(tbm)
+ struct tbmeter *tbm;
+{
+ if (tbm->cdnrblk.cb_ref > 0)
+ return (EBUSY);
+
+ tca_invalidate_action(&tbm->in_action);
+ tca_invalidate_action(&tbm->out_action);
+
+ cdnr_cbdestroy(tbm);
+ return (0);
+}
+
+static struct tc_action *
+tbm_input(cb, pktinfo)
+ struct cdnr_block *cb;
+ struct cdnr_pktinfo *pktinfo;
+{
+ struct tbmeter *tbm = (struct tbmeter *)cb;
+ u_int64_t len;
+ u_int64_t interval, now;
+
+ len = TB_SCALE(pktinfo->pkt_len);
+
+ if (tbm->tb.token < len) {
+ now = read_machclk();
+ interval = now - tbm->tb.last;
+ if (interval >= tbm->tb.filluptime)
+ tbm->tb.token = tbm->tb.depth;
+ else {
+ tbm->tb.token += interval * tbm->tb.rate;
+ if (tbm->tb.token > tbm->tb.depth)
+ tbm->tb.token = tbm->tb.depth;
+ }
+ tbm->tb.last = now;
+ }
+
+ if (tbm->tb.token < len) {
+ PKTCNTR_ADD(&tbm->out_cnt, pktinfo->pkt_len);
+ return (&tbm->out_action);
+ }
+
+ tbm->tb.token -= len;
+ PKTCNTR_ADD(&tbm->in_cnt, pktinfo->pkt_len);
+ return (&tbm->in_action);
+}
+
+/*
+ * two rate three color marker
+ * as described in draft-heinanen-diffserv-trtcm-01.txt
+ */
+static struct trtcm *
+trtcm_create(top, cmtd_profile, peak_profile,
+ green_action, yellow_action, red_action, coloraware)
+ struct top_cdnr *top;
+ struct tb_profile *cmtd_profile, *peak_profile;
+ struct tc_action *green_action, *yellow_action, *red_action;
+ int coloraware;
+{
+ struct trtcm *tcm = NULL;
+
+ if (tca_verify_action(green_action) < 0
+ || tca_verify_action(yellow_action) < 0
+ || tca_verify_action(red_action) < 0)
+ return (NULL);
+
+ if ((tcm = cdnr_cballoc(top, TCETYPE_TRTCM,
+ trtcm_input)) == NULL)
+ return (NULL);
+
+ tb_import_profile(&tcm->cmtd_tb, cmtd_profile);
+ tb_import_profile(&tcm->peak_tb, peak_profile);
+
+ tca_import_action(&tcm->green_action, green_action);
+ tca_import_action(&tcm->yellow_action, yellow_action);
+ tca_import_action(&tcm->red_action, red_action);
+
+ /* set dscps to use */
+ if (tcm->green_action.tca_code == TCACODE_MARK)
+ tcm->green_dscp = tcm->green_action.tca_dscp & DSCP_MASK;
+ else
+ tcm->green_dscp = DSCP_AF11;
+ if (tcm->yellow_action.tca_code == TCACODE_MARK)
+ tcm->yellow_dscp = tcm->yellow_action.tca_dscp & DSCP_MASK;
+ else
+ tcm->yellow_dscp = DSCP_AF12;
+ if (tcm->red_action.tca_code == TCACODE_MARK)
+ tcm->red_dscp = tcm->red_action.tca_dscp & DSCP_MASK;
+ else
+ tcm->red_dscp = DSCP_AF13;
+
+ tcm->coloraware = coloraware;
+
+ return (tcm);
+}
+
+static int
+trtcm_destroy(tcm)
+ struct trtcm *tcm;
+{
+ if (tcm->cdnrblk.cb_ref > 0)
+ return (EBUSY);
+
+ tca_invalidate_action(&tcm->green_action);
+ tca_invalidate_action(&tcm->yellow_action);
+ tca_invalidate_action(&tcm->red_action);
+
+ cdnr_cbdestroy(tcm);
+ return (0);
+}
+
+static struct tc_action *
+trtcm_input(cb, pktinfo)
+ struct cdnr_block *cb;
+ struct cdnr_pktinfo *pktinfo;
+{
+ struct trtcm *tcm = (struct trtcm *)cb;
+ u_int64_t len;
+ u_int64_t interval, now;
+ u_int8_t color;
+
+ len = TB_SCALE(pktinfo->pkt_len);
+ if (tcm->coloraware) {
+ color = pktinfo->pkt_dscp;
+ if (color != tcm->yellow_dscp && color != tcm->red_dscp)
+ color = tcm->green_dscp;
+ } else {
+ /* if color-blind, precolor it as green */
+ color = tcm->green_dscp;
+ }
+
+ now = read_machclk();
+ if (tcm->cmtd_tb.token < len) {
+ interval = now - tcm->cmtd_tb.last;
+ if (interval >= tcm->cmtd_tb.filluptime)
+ tcm->cmtd_tb.token = tcm->cmtd_tb.depth;
+ else {
+ tcm->cmtd_tb.token += interval * tcm->cmtd_tb.rate;
+ if (tcm->cmtd_tb.token > tcm->cmtd_tb.depth)
+ tcm->cmtd_tb.token = tcm->cmtd_tb.depth;
+ }
+ tcm->cmtd_tb.last = now;
+ }
+ if (tcm->peak_tb.token < len) {
+ interval = now - tcm->peak_tb.last;
+ if (interval >= tcm->peak_tb.filluptime)
+ tcm->peak_tb.token = tcm->peak_tb.depth;
+ else {
+ tcm->peak_tb.token += interval * tcm->peak_tb.rate;
+ if (tcm->peak_tb.token > tcm->peak_tb.depth)
+ tcm->peak_tb.token = tcm->peak_tb.depth;
+ }
+ tcm->peak_tb.last = now;
+ }
+
+ if (color == tcm->red_dscp || tcm->peak_tb.token < len) {
+ pktinfo->pkt_dscp = tcm->red_dscp;
+ PKTCNTR_ADD(&tcm->red_cnt, pktinfo->pkt_len);
+ return (&tcm->red_action);
+ }
+
+ if (color == tcm->yellow_dscp || tcm->cmtd_tb.token < len) {
+ pktinfo->pkt_dscp = tcm->yellow_dscp;
+ tcm->peak_tb.token -= len;
+ PKTCNTR_ADD(&tcm->yellow_cnt, pktinfo->pkt_len);
+ return (&tcm->yellow_action);
+ }
+
+ pktinfo->pkt_dscp = tcm->green_dscp;
+ tcm->cmtd_tb.token -= len;
+ tcm->peak_tb.token -= len;
+ PKTCNTR_ADD(&tcm->green_cnt, pktinfo->pkt_len);
+ return (&tcm->green_action);
+}
+
+/*
+ * time sliding window three color marker
+ * as described in draft-fang-diffserv-tc-tswtcm-00.txt
+ */
+static struct tswtcm *
+tswtcm_create(top, cmtd_rate, peak_rate, avg_interval,
+ green_action, yellow_action, red_action)
+ struct top_cdnr *top;
+ u_int32_t cmtd_rate, peak_rate, avg_interval;
+ struct tc_action *green_action, *yellow_action, *red_action;
+{
+ struct tswtcm *tsw;
+
+ if (tca_verify_action(green_action) < 0
+ || tca_verify_action(yellow_action) < 0
+ || tca_verify_action(red_action) < 0)
+ return (NULL);
+
+ if ((tsw = cdnr_cballoc(top, TCETYPE_TSWTCM,
+ tswtcm_input)) == NULL)
+ return (NULL);
+
+ tca_import_action(&tsw->green_action, green_action);
+ tca_import_action(&tsw->yellow_action, yellow_action);
+ tca_import_action(&tsw->red_action, red_action);
+
+ /* set dscps to use */
+ if (tsw->green_action.tca_code == TCACODE_MARK)
+ tsw->green_dscp = tsw->green_action.tca_dscp & DSCP_MASK;
+ else
+ tsw->green_dscp = DSCP_AF11;
+ if (tsw->yellow_action.tca_code == TCACODE_MARK)
+ tsw->yellow_dscp = tsw->yellow_action.tca_dscp & DSCP_MASK;
+ else
+ tsw->yellow_dscp = DSCP_AF12;
+ if (tsw->red_action.tca_code == TCACODE_MARK)
+ tsw->red_dscp = tsw->red_action.tca_dscp & DSCP_MASK;
+ else
+ tsw->red_dscp = DSCP_AF13;
+
+ /* convert rates from bits/sec to bytes/sec */
+ tsw->cmtd_rate = cmtd_rate / 8;
+ tsw->peak_rate = peak_rate / 8;
+ tsw->avg_rate = 0;
+
+ /* timewin is converted from msec to machine clock unit */
+ tsw->timewin = (u_int64_t)machclk_freq * avg_interval / 1000;
+
+ return (tsw);
+}
+
+static int
+tswtcm_destroy(tsw)
+ struct tswtcm *tsw;
+{
+ if (tsw->cdnrblk.cb_ref > 0)
+ return (EBUSY);
+
+ tca_invalidate_action(&tsw->green_action);
+ tca_invalidate_action(&tsw->yellow_action);
+ tca_invalidate_action(&tsw->red_action);
+
+ cdnr_cbdestroy(tsw);
+ return (0);
+}
+
+static struct tc_action *
+tswtcm_input(cb, pktinfo)
+ struct cdnr_block *cb;
+ struct cdnr_pktinfo *pktinfo;
+{
+ struct tswtcm *tsw = (struct tswtcm *)cb;
+ int len;
+ u_int32_t avg_rate;
+ u_int64_t interval, now, tmp;
+
+ /*
+ * rate estimator
+ */
+ len = pktinfo->pkt_len;
+ now = read_machclk();
+
+ interval = now - tsw->t_front;
+ /*
+ * calculate average rate:
+ * avg = (avg * timewin + pkt_len)/(timewin + interval)
+ * pkt_len needs to be multiplied by machclk_freq in order to
+ * get (bytes/sec).
+ * note: when avg_rate (bytes/sec) and timewin (machclk unit) are
+ * less than 32 bits, the following 64-bit operation has enough
+ * precision.
+ */
+ tmp = ((u_int64_t)tsw->avg_rate * tsw->timewin
+ + (u_int64_t)len * machclk_freq) / (tsw->timewin + interval);
+ tsw->avg_rate = avg_rate = (u_int32_t)tmp;
+ tsw->t_front = now;
+
+ /*
+ * marker
+ */
+ if (avg_rate > tsw->cmtd_rate) {
+ u_int32_t randval = arc4random() % avg_rate;
+
+ if (avg_rate > tsw->peak_rate) {
+ if (randval < avg_rate - tsw->peak_rate) {
+ /* mark red */
+ pktinfo->pkt_dscp = tsw->red_dscp;
+ PKTCNTR_ADD(&tsw->red_cnt, len);
+ return (&tsw->red_action);
+ } else if (randval < avg_rate - tsw->cmtd_rate)
+ goto mark_yellow;
+ } else {
+ /* peak_rate >= avg_rate > cmtd_rate */
+ if (randval < avg_rate - tsw->cmtd_rate) {
+ mark_yellow:
+ pktinfo->pkt_dscp = tsw->yellow_dscp;
+ PKTCNTR_ADD(&tsw->yellow_cnt, len);
+ return (&tsw->yellow_action);
+ }
+ }
+ }
+
+ /* mark green */
+ pktinfo->pkt_dscp = tsw->green_dscp;
+ PKTCNTR_ADD(&tsw->green_cnt, len);
+ return (&tsw->green_action);
+}
+
+/*
+ * ioctl requests
+ */
+static int
+cdnrcmd_if_attach(ifname)
+ char *ifname;
+{
+ struct ifnet *ifp;
+ struct top_cdnr *top;
+
+ if ((ifp = ifunit(ifname)) == NULL)
+ return (EBADF);
+
+ if (ifp->if_snd.altq_cdnr != NULL)
+ return (EBUSY);
+
+ if ((top = top_create(&ifp->if_snd)) == NULL)
+ return (ENOMEM);
+ return (0);
+}
+
+static int
+cdnrcmd_if_detach(ifname)
+ char *ifname;
+{
+ struct top_cdnr *top;
+
+ if ((top = tcb_lookup(ifname)) == NULL)
+ return (EBADF);
+
+ return top_destroy(top);
+}
+
+static int
+cdnrcmd_add_element(ap)
+ struct cdnr_add_element *ap;
+{
+ struct top_cdnr *top;
+ struct cdnr_block *cb;
+
+ if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
+ return (EBADF);
+
+ cb = element_create(top, &ap->action);
+ if (cb == NULL)
+ return (EINVAL);
+ /* return a class handle to the user */
+ ap->cdnr_handle = cdnr_cb2handle(cb);
+ return (0);
+}
+
+static int
+cdnrcmd_delete_element(ap)
+ struct cdnr_delete_element *ap;
+{
+ struct top_cdnr *top;
+ struct cdnr_block *cb;
+
+ if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
+ return (EBADF);
+
+ if ((cb = cdnr_handle2cb(ap->cdnr_handle)) == NULL)
+ return (EINVAL);
+
+ if (cb->cb_type != TCETYPE_ELEMENT)
+ return generic_element_destroy(cb);
+
+ return element_destroy(cb);
+}
+
+static int
+cdnrcmd_add_filter(ap)
+ struct cdnr_add_filter *ap;
+{
+ struct top_cdnr *top;
+ struct cdnr_block *cb;
+
+ if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
+ return (EBADF);
+
+ if ((cb = cdnr_handle2cb(ap->cdnr_handle)) == NULL)
+ return (EINVAL);
+
+ return acc_add_filter(&top->tc_classifier, &ap->filter,
+ cb, &ap->filter_handle);
+}
+
+static int
+cdnrcmd_delete_filter(ap)
+ struct cdnr_delete_filter *ap;
+{
+ struct top_cdnr *top;
+
+ if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
+ return (EBADF);
+
+ return acc_delete_filter(&top->tc_classifier, ap->filter_handle);
+}
+
+static int
+cdnrcmd_add_tbm(ap)
+ struct cdnr_add_tbmeter *ap;
+{
+ struct top_cdnr *top;
+ struct tbmeter *tbm;
+
+ if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
+ return (EBADF);
+
+ tbm = tbm_create(top, &ap->profile, &ap->in_action, &ap->out_action);
+ if (tbm == NULL)
+ return (EINVAL);
+ /* return a class handle to the user */
+ ap->cdnr_handle = cdnr_cb2handle(&tbm->cdnrblk);
+ return (0);
+}
+
+static int
+cdnrcmd_modify_tbm(ap)
+ struct cdnr_modify_tbmeter *ap;
+{
+ struct tbmeter *tbm;
+
+ if ((tbm = (struct tbmeter *)cdnr_handle2cb(ap->cdnr_handle)) == NULL)
+ return (EINVAL);
+
+ tb_import_profile(&tbm->tb, &ap->profile);
+
+ return (0);
+}
+
+static int
+cdnrcmd_tbm_stats(ap)
+ struct cdnr_tbmeter_stats *ap;
+{
+ struct tbmeter *tbm;
+
+ if ((tbm = (struct tbmeter *)cdnr_handle2cb(ap->cdnr_handle)) == NULL)
+ return (EINVAL);
+
+ ap->in_cnt = tbm->in_cnt;
+ ap->out_cnt = tbm->out_cnt;
+
+ return (0);
+}
+
+static int
+cdnrcmd_add_trtcm(ap)
+ struct cdnr_add_trtcm *ap;
+{
+ struct top_cdnr *top;
+ struct trtcm *tcm;
+
+ if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
+ return (EBADF);
+
+ tcm = trtcm_create(top, &ap->cmtd_profile, &ap->peak_profile,
+ &ap->green_action, &ap->yellow_action,
+ &ap->red_action, ap->coloraware);
+ if (tcm == NULL)
+ return (EINVAL);
+
+ /* return a class handle to the user */
+ ap->cdnr_handle = cdnr_cb2handle(&tcm->cdnrblk);
+ return (0);
+}
+
+static int
+cdnrcmd_modify_trtcm(ap)
+ struct cdnr_modify_trtcm *ap;
+{
+ struct trtcm *tcm;
+
+ if ((tcm = (struct trtcm *)cdnr_handle2cb(ap->cdnr_handle)) == NULL)
+ return (EINVAL);
+
+ tb_import_profile(&tcm->cmtd_tb, &ap->cmtd_profile);
+ tb_import_profile(&tcm->peak_tb, &ap->peak_profile);
+
+ return (0);
+}
+
+static int
+cdnrcmd_tcm_stats(ap)
+ struct cdnr_tcm_stats *ap;
+{
+ struct cdnr_block *cb;
+
+ if ((cb = cdnr_handle2cb(ap->cdnr_handle)) == NULL)
+ return (EINVAL);
+
+ if (cb->cb_type == TCETYPE_TRTCM) {
+ struct trtcm *tcm = (struct trtcm *)cb;
+
+ ap->green_cnt = tcm->green_cnt;
+ ap->yellow_cnt = tcm->yellow_cnt;
+ ap->red_cnt = tcm->red_cnt;
+ } else if (cb->cb_type == TCETYPE_TSWTCM) {
+ struct tswtcm *tsw = (struct tswtcm *)cb;
+
+ ap->green_cnt = tsw->green_cnt;
+ ap->yellow_cnt = tsw->yellow_cnt;
+ ap->red_cnt = tsw->red_cnt;
+ } else
+ return (EINVAL);
+
+ return (0);
+}
+
+static int
+cdnrcmd_add_tswtcm(ap)
+ struct cdnr_add_tswtcm *ap;
+{
+ struct top_cdnr *top;
+ struct tswtcm *tsw;
+
+ if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
+ return (EBADF);
+
+ if (ap->cmtd_rate > ap->peak_rate)
+ return (EINVAL);
+
+ tsw = tswtcm_create(top, ap->cmtd_rate, ap->peak_rate,
+ ap->avg_interval, &ap->green_action,
+ &ap->yellow_action, &ap->red_action);
+ if (tsw == NULL)
+ return (EINVAL);
+
+ /* return a class handle to the user */
+ ap->cdnr_handle = cdnr_cb2handle(&tsw->cdnrblk);
+ return (0);
+}
+
+static int
+cdnrcmd_modify_tswtcm(ap)
+ struct cdnr_modify_tswtcm *ap;
+{
+ struct tswtcm *tsw;
+
+ if ((tsw = (struct tswtcm *)cdnr_handle2cb(ap->cdnr_handle)) == NULL)
+ return (EINVAL);
+
+ if (ap->cmtd_rate > ap->peak_rate)
+ return (EINVAL);
+
+ /* convert rates from bits/sec to bytes/sec */
+ tsw->cmtd_rate = ap->cmtd_rate / 8;
+ tsw->peak_rate = ap->peak_rate / 8;
+ tsw->avg_rate = 0;
+
+ /* timewin is converted from msec to machine clock unit */
+ tsw->timewin = (u_int64_t)machclk_freq * ap->avg_interval / 1000;
+
+ return (0);
+}
+
+static int
+cdnrcmd_get_stats(ap)
+ struct cdnr_get_stats *ap;
+{
+ struct top_cdnr *top;
+ struct cdnr_block *cb;
+ struct tbmeter *tbm;
+ struct trtcm *tcm;
+ struct tswtcm *tsw;
+ struct tce_stats tce, *usp;
+ int error, n, nskip, nelements;
+
+ if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
+ return (EBADF);
+
+ /* copy action stats */
+ bcopy(top->tc_cnts, ap->cnts, sizeof(ap->cnts));
+
+ /* stats for each element */
+ nelements = ap->nelements;
+ usp = ap->tce_stats;
+ if (nelements <= 0 || usp == NULL)
+ return (0);
+
+ nskip = ap->nskip;
+ n = 0;
+ LIST_FOREACH(cb, &top->tc_elements, cb_next) {
+ if (nskip > 0) {
+ nskip--;
+ continue;
+ }
+
+ bzero(&tce, sizeof(tce));
+ tce.tce_handle = cb->cb_handle;
+ tce.tce_type = cb->cb_type;
+ switch (cb->cb_type) {
+ case TCETYPE_TBMETER:
+ tbm = (struct tbmeter *)cb;
+ tce.tce_cnts[0] = tbm->in_cnt;
+ tce.tce_cnts[1] = tbm->out_cnt;
+ break;
+ case TCETYPE_TRTCM:
+ tcm = (struct trtcm *)cb;
+ tce.tce_cnts[0] = tcm->green_cnt;
+ tce.tce_cnts[1] = tcm->yellow_cnt;
+ tce.tce_cnts[2] = tcm->red_cnt;
+ break;
+ case TCETYPE_TSWTCM:
+ tsw = (struct tswtcm *)cb;
+ tce.tce_cnts[0] = tsw->green_cnt;
+ tce.tce_cnts[1] = tsw->yellow_cnt;
+ tce.tce_cnts[2] = tsw->red_cnt;
+ break;
+ default:
+ continue;
+ }
+
+ if ((error = copyout((caddr_t)&tce, (caddr_t)usp++,
+ sizeof(tce))) != 0)
+ return (error);
+
+ if (++n == nelements)
+ break;
+ }
+ ap->nelements = n;
+
+ return (0);
+}
+
+/*
+ * conditioner device interface
+ */
+int
+cdnropen(dev, flag, fmt, p)
+ dev_t dev;
+ int flag, fmt;
+#if (__FreeBSD_version > 500000)
+ struct thread *p;
+#else
+ struct proc *p;
+#endif
+{
+ if (machclk_freq == 0)
+ init_machclk();
+
+ if (machclk_freq == 0) {
+ printf("cdnr: no cpu clock available!\n");
+ return (ENXIO);
+ }
+
+ /* everything will be done when the queueing scheme is attached. */
+ return 0;
+}
+
+int
+cdnrclose(dev, flag, fmt, p)
+ dev_t dev;
+ int flag, fmt;
+#if (__FreeBSD_version > 500000)
+ struct thread *p;
+#else
+ struct proc *p;
+#endif
+{
+ struct top_cdnr *top;
+ int err, error = 0;
+
+ while ((top = LIST_FIRST(&tcb_list)) != NULL) {
+ /* destroy all */
+ err = top_destroy(top);
+ if (err != 0 && error == 0)
+ error = err;
+ }
+ altq_input = NULL;
+
+ return (error);
+}
+
+int
+cdnrioctl(dev, cmd, addr, flag, p)
+ dev_t dev;
+ ioctlcmd_t cmd;
+ caddr_t addr;
+ int flag;
+#if (__FreeBSD_version > 500000)
+ struct thread *p;
+#else
+ struct proc *p;
+#endif
+{
+ struct top_cdnr *top;
+ struct cdnr_interface *ifacep;
+ int s, error = 0;
+
+ /* check super-user privilege */
+ switch (cmd) {
+ case CDNR_GETSTATS:
+ break;
+ default:
+#if (__FreeBSD_version > 700000)
+ if ((error = priv_check(p, PRIV_ALTQ_MANAGE)) != 0)
+#elsif (__FreeBSD_version > 400000)
+ if ((error = suser(p)) != 0)
+#else
+ if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
+#endif
+ return (error);
+ break;
+ }
+
+#ifdef __NetBSD__
+ s = splnet();
+#else
+ s = splimp();
+#endif
+ switch (cmd) {
+
+ case CDNR_IF_ATTACH:
+ ifacep = (struct cdnr_interface *)addr;
+ error = cdnrcmd_if_attach(ifacep->cdnr_ifname);
+ break;
+
+ case CDNR_IF_DETACH:
+ ifacep = (struct cdnr_interface *)addr;
+ error = cdnrcmd_if_detach(ifacep->cdnr_ifname);
+ break;
+
+ case CDNR_ENABLE:
+ case CDNR_DISABLE:
+ ifacep = (struct cdnr_interface *)addr;
+ if ((top = tcb_lookup(ifacep->cdnr_ifname)) == NULL) {
+ error = EBADF;
+ break;
+ }
+
+ switch (cmd) {
+
+ case CDNR_ENABLE:
+ ALTQ_SET_CNDTNING(top->tc_ifq);
+ if (altq_input == NULL)
+ altq_input = altq_cdnr_input;
+ break;
+
+ case CDNR_DISABLE:
+ ALTQ_CLEAR_CNDTNING(top->tc_ifq);
+ LIST_FOREACH(top, &tcb_list, tc_next)
+ if (ALTQ_IS_CNDTNING(top->tc_ifq))
+ break;
+ if (top == NULL)
+ altq_input = NULL;
+ break;
+ }
+ break;
+
+ case CDNR_ADD_ELEM:
+ error = cdnrcmd_add_element((struct cdnr_add_element *)addr);
+ break;
+
+ case CDNR_DEL_ELEM:
+ error = cdnrcmd_delete_element((struct cdnr_delete_element *)addr);
+ break;
+
+ case CDNR_ADD_TBM:
+ error = cdnrcmd_add_tbm((struct cdnr_add_tbmeter *)addr);
+ break;
+
+ case CDNR_MOD_TBM:
+ error = cdnrcmd_modify_tbm((struct cdnr_modify_tbmeter *)addr);
+ break;
+
+ case CDNR_TBM_STATS:
+ error = cdnrcmd_tbm_stats((struct cdnr_tbmeter_stats *)addr);
+ break;
+
+ case CDNR_ADD_TCM:
+ error = cdnrcmd_add_trtcm((struct cdnr_add_trtcm *)addr);
+ break;
+
+ case CDNR_MOD_TCM:
+ error = cdnrcmd_modify_trtcm((struct cdnr_modify_trtcm *)addr);
+ break;
+
+ case CDNR_TCM_STATS:
+ error = cdnrcmd_tcm_stats((struct cdnr_tcm_stats *)addr);
+ break;
+
+ case CDNR_ADD_FILTER:
+ error = cdnrcmd_add_filter((struct cdnr_add_filter *)addr);
+ break;
+
+ case CDNR_DEL_FILTER:
+ error = cdnrcmd_delete_filter((struct cdnr_delete_filter *)addr);
+ break;
+
+ case CDNR_GETSTATS:
+ error = cdnrcmd_get_stats((struct cdnr_get_stats *)addr);
+ break;
+
+ case CDNR_ADD_TSW:
+ error = cdnrcmd_add_tswtcm((struct cdnr_add_tswtcm *)addr);
+ break;
+
+ case CDNR_MOD_TSW:
+ error = cdnrcmd_modify_tswtcm((struct cdnr_modify_tswtcm *)addr);
+ break;
+
+ default:
+ error = EINVAL;
+ break;
+ }
+ splx(s);
+
+ return error;
+}
+
+#ifdef KLD_MODULE
+
+static struct altqsw cdnr_sw =
+ {"cdnr", cdnropen, cdnrclose, cdnrioctl};
+
+ALTQ_MODULE(altq_cdnr, ALTQT_CDNR, &cdnr_sw);
+
+#endif /* KLD_MODULE */
+
+#endif /* ALTQ3_COMPAT */
+#endif /* ALTQ_CDNR */
diff --git a/sys/net/altq/altq_cdnr.h b/sys/net/altq/altq_cdnr.h
new file mode 100644
index 0000000..06fa9c9
--- /dev/null
+++ b/sys/net/altq/altq_cdnr.h
@@ -0,0 +1,336 @@
+/*-
+ * Copyright (C) 1999-2002
+ * Sony Computer Science Laboratories Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $KAME: altq_cdnr.h,v 1.9 2003/07/10 12:07:48 kjc Exp $
+ * $FreeBSD$
+ */
+
+#ifndef _ALTQ_ALTQ_CDNR_H_
+#define _ALTQ_ALTQ_CDNR_H_
+
+#include <net/altq/altq.h>
+
+/*
+ * traffic conditioner element types
+ */
+#define TCETYPE_NONE 0
+#define TCETYPE_TOP 1 /* top level conditioner */
+#define TCETYPE_ELEMENT 2 /* a simple tc element */
+#define TCETYPE_TBMETER 3 /* token bucket meter */
+#define TCETYPE_TRTCM 4 /* (two-rate) three color marker */
+#define TCETYPE_TSWTCM 5 /* time sliding window 3-color maker */
+
+/*
+ * traffic conditioner action
+ */
+struct cdnr_block;
+
+struct tc_action {
+ int tca_code; /* e.g., TCACODE_PASS */
+ /* tca_code dependent variable */
+ union {
+ u_long un_value; /* template */
+ u_int8_t un_dscp; /* diffserv code point */
+ u_long un_handle; /* tc action handle */
+ struct cdnr_block *un_next; /* next tc element block */
+ } tca_un;
+};
+#define tca_value tca_un.un_value
+#define tca_dscp tca_un.un_dscp
+#define tca_handle tca_un.un_handle
+#define tca_next tca_un.un_next
+
+#define TCACODE_NONE 0 /* action is not set */
+#define TCACODE_PASS 1 /* pass this packet */
+#define TCACODE_DROP 2 /* discard this packet */
+#define TCACODE_RETURN 3 /* do not process this packet */
+#define TCACODE_MARK 4 /* mark dscp */
+#define TCACODE_HANDLE 5 /* take action specified by handle */
+#define TCACODE_NEXT 6 /* take action in the next tc element */
+#define TCACODE_MAX 6
+
+#define CDNR_NULL_HANDLE 0
+
+struct cdnr_interface {
+ char cdnr_ifname[IFNAMSIZ]; /* interface name (e.g., fxp0) */
+};
+
+/* simple element operations */
+struct cdnr_add_element {
+ struct cdnr_interface iface;
+ struct tc_action action;
+
+ u_long cdnr_handle; /* return value */
+};
+
+struct cdnr_delete_element {
+ struct cdnr_interface iface;
+ u_long cdnr_handle;
+};
+
+/* token-bucket meter operations */
+struct cdnr_add_tbmeter {
+ struct cdnr_interface iface;
+ struct tb_profile profile;
+ struct tc_action in_action;
+ struct tc_action out_action;
+
+ u_long cdnr_handle; /* return value */
+};
+
+struct cdnr_modify_tbmeter {
+ struct cdnr_interface iface;
+ u_long cdnr_handle;
+ struct tb_profile profile;
+};
+
+struct cdnr_tbmeter_stats {
+ struct cdnr_interface iface;
+ u_long cdnr_handle;
+ struct pktcntr in_cnt;
+ struct pktcntr out_cnt;
+};
+
+/* two-rate three-color marker operations */
+struct cdnr_add_trtcm {
+ struct cdnr_interface iface;
+ struct tb_profile cmtd_profile; /* profile for committed tb */
+ struct tb_profile peak_profile; /* profile for peak tb */
+ struct tc_action green_action; /* action for green packets */
+ struct tc_action yellow_action; /* action for yellow packets */
+ struct tc_action red_action; /* action for red packets */
+ int coloraware; /* color-aware/color-blind */
+
+ u_long cdnr_handle; /* return value */
+};
+
+struct cdnr_modify_trtcm {
+ struct cdnr_interface iface;
+ u_long cdnr_handle;
+ struct tb_profile cmtd_profile; /* profile for committed tb */
+ struct tb_profile peak_profile; /* profile for peak tb */
+ int coloraware; /* color-aware/color-blind */
+};
+
+struct cdnr_tcm_stats {
+ struct cdnr_interface iface;
+ u_long cdnr_handle;
+ struct pktcntr green_cnt;
+ struct pktcntr yellow_cnt;
+ struct pktcntr red_cnt;
+};
+
+/* time sliding window three-color marker operations */
+struct cdnr_add_tswtcm {
+ struct cdnr_interface iface;
+ u_int32_t cmtd_rate; /* committed rate (bits/sec) */
+ u_int32_t peak_rate; /* peak rate (bits/sec) */
+ u_int32_t avg_interval; /* averaging interval (msec) */
+ struct tc_action green_action; /* action for green packets */
+ struct tc_action yellow_action; /* action for yellow packets */
+ struct tc_action red_action; /* action for red packets */
+
+ u_long cdnr_handle; /* return value */
+};
+
+struct cdnr_modify_tswtcm {
+ struct cdnr_interface iface;
+ u_long cdnr_handle;
+ u_int32_t cmtd_rate; /* committed rate (bits/sec) */
+ u_int32_t peak_rate; /* peak rate (bits/sec) */
+ u_int32_t avg_interval; /* averaging interval (msec) */
+};
+
+struct cdnr_add_filter {
+ struct cdnr_interface iface;
+ u_long cdnr_handle;
+#ifdef ALTQ3_CLFIER_COMPAT
+ struct flow_filter filter;
+#endif
+ u_long filter_handle; /* return value */
+};
+
+struct cdnr_delete_filter {
+ struct cdnr_interface iface;
+ u_long filter_handle;
+};
+
+struct tce_stats {
+ u_long tce_handle; /* tc element handle */
+ int tce_type; /* e.g., TCETYPE_ELEMENT */
+ struct pktcntr tce_cnts[3]; /* tcm returns 3 counters */
+};
+
+struct cdnr_get_stats {
+ struct cdnr_interface iface;
+ struct pktcntr cnts[TCACODE_MAX+1];
+
+ /* element stats */
+ int nskip; /* skip # of elements */
+ int nelements; /* # of element stats (WR) */
+ struct tce_stats *tce_stats; /* pointer to stats array */
+};
+
+#define CDNR_IF_ATTACH _IOW('Q', 1, struct cdnr_interface)
+#define CDNR_IF_DETACH _IOW('Q', 2, struct cdnr_interface)
+#define CDNR_ENABLE _IOW('Q', 3, struct cdnr_interface)
+#define CDNR_DISABLE _IOW('Q', 4, struct cdnr_interface)
+#define CDNR_ADD_FILTER _IOWR('Q', 10, struct cdnr_add_filter)
+#define CDNR_DEL_FILTER _IOW('Q', 11, struct cdnr_delete_filter)
+#define CDNR_GETSTATS _IOWR('Q', 12, struct cdnr_get_stats)
+#define CDNR_ADD_ELEM _IOWR('Q', 30, struct cdnr_add_element)
+#define CDNR_DEL_ELEM _IOW('Q', 31, struct cdnr_delete_element)
+#define CDNR_ADD_TBM _IOWR('Q', 32, struct cdnr_add_tbmeter)
+#define CDNR_MOD_TBM _IOW('Q', 33, struct cdnr_modify_tbmeter)
+#define CDNR_TBM_STATS _IOWR('Q', 34, struct cdnr_tbmeter_stats)
+#define CDNR_ADD_TCM _IOWR('Q', 35, struct cdnr_add_trtcm)
+#define CDNR_MOD_TCM _IOWR('Q', 36, struct cdnr_modify_trtcm)
+#define CDNR_TCM_STATS _IOWR('Q', 37, struct cdnr_tcm_stats)
+#define CDNR_ADD_TSW _IOWR('Q', 38, struct cdnr_add_tswtcm)
+#define CDNR_MOD_TSW _IOWR('Q', 39, struct cdnr_modify_tswtcm)
+
+#ifndef DSCP_EF
+/* diffserve code points */
+#define DSCP_MASK 0xfc
+#define DSCP_CUMASK 0x03
+#define DSCP_EF 0xb8
+#define DSCP_AF11 0x28
+#define DSCP_AF12 0x30
+#define DSCP_AF13 0x38
+#define DSCP_AF21 0x48
+#define DSCP_AF22 0x50
+#define DSCP_AF23 0x58
+#define DSCP_AF31 0x68
+#define DSCP_AF32 0x70
+#define DSCP_AF33 0x78
+#define DSCP_AF41 0x88
+#define DSCP_AF42 0x90
+#define DSCP_AF43 0x98
+#define AF_CLASSMASK 0xe0
+#define AF_DROPPRECMASK 0x18
+#endif
+
+#ifdef _KERNEL
+
+/*
+ * packet information passed to the input function of tc elements
+ */
+struct cdnr_pktinfo {
+ int pkt_len; /* packet length */
+ u_int8_t pkt_dscp; /* diffserv code point */
+};
+
+/*
+ * traffic conditioner control block common to all types of tc elements
+ */
+struct cdnr_block {
+ LIST_ENTRY(cdnr_block) cb_next;
+ int cb_len; /* size of this tc element */
+ int cb_type; /* cdnr block type */
+ int cb_ref; /* reference count of this element */
+ u_long cb_handle; /* handle of this tc element */
+ struct top_cdnr *cb_top; /* back pointer to top */
+ struct tc_action cb_action; /* top level action for this tcb */
+ struct tc_action *(*cb_input)(struct cdnr_block *,
+ struct cdnr_pktinfo *);
+};
+
+/*
+ * top level traffic conditioner structure for an interface
+ */
+struct top_cdnr {
+ struct cdnr_block tc_block;
+
+ LIST_ENTRY(top_cdnr) tc_next;
+ struct ifaltq *tc_ifq;
+
+ LIST_HEAD(, cdnr_block) tc_elements;
+#ifdef ALTQ3_CLFIER_COMPAT
+ struct acc_classifier tc_classifier;
+#endif
+ struct pktcntr tc_cnts[TCACODE_MAX+1];
+};
+
+/* token bucket element */
+struct tbe {
+ u_int64_t rate;
+ u_int64_t depth;
+
+ u_int64_t token;
+ u_int64_t filluptime;
+ u_int64_t last;
+};
+
+/* token bucket meter structure */
+struct tbmeter {
+ struct cdnr_block cdnrblk; /* conditioner block */
+ struct tbe tb; /* token bucket */
+ struct tc_action in_action; /* actions for IN/OUT */
+ struct tc_action out_action; /* actions for IN/OUT */
+ struct pktcntr in_cnt; /* statistics for IN/OUT */
+ struct pktcntr out_cnt; /* statistics for IN/OUT */
+};
+
+/* two-rate three-color marker structure */
+struct trtcm {
+ struct cdnr_block cdnrblk; /* conditioner block */
+ struct tbe cmtd_tb; /* committed tb profile */
+ struct tbe peak_tb; /* peak tb profile */
+ struct tc_action green_action;
+ struct tc_action yellow_action;
+ struct tc_action red_action;
+ int coloraware;
+ u_int8_t green_dscp;
+ u_int8_t yellow_dscp;
+ u_int8_t red_dscp;
+ struct pktcntr green_cnt;
+ struct pktcntr yellow_cnt;
+ struct pktcntr red_cnt;
+};
+
+/* time sliding window three-color marker structure */
+struct tswtcm {
+ struct cdnr_block cdnrblk; /* conditioner block */
+
+ u_int32_t avg_rate; /* average rate (bytes/sec) */
+ u_int64_t t_front; /* timestamp of last update */
+
+ u_int64_t timewin; /* average interval */
+ u_int32_t cmtd_rate; /* committed target rate */
+ u_int32_t peak_rate; /* peak target rate */
+ struct tc_action green_action;
+ struct tc_action yellow_action;
+ struct tc_action red_action;
+ u_int8_t green_dscp;
+ u_int8_t yellow_dscp;
+ u_int8_t red_dscp;
+ struct pktcntr green_cnt;
+ struct pktcntr yellow_cnt;
+ struct pktcntr red_cnt;
+};
+
+#endif /* _KERNEL */
+
+#endif /* _ALTQ_ALTQ_CDNR_H_ */
diff --git a/sys/net/altq/altq_classq.h b/sys/net/altq/altq_classq.h
new file mode 100644
index 0000000..dc5c646
--- /dev/null
+++ b/sys/net/altq/altq_classq.h
@@ -0,0 +1,206 @@
+/* $KAME: altq_classq.h,v 1.6 2003/01/07 07:33:38 kjc Exp $ */
+
+/*
+ * Copyright (c) 1991-1997 Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the Network Research
+ * Group at Lawrence Berkeley Laboratory.
+ * 4. Neither the name of the University nor of the Laboratory may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+/*
+ * class queue definitions extracted from rm_class.h.
+ */
+#ifndef _ALTQ_ALTQ_CLASSQ_H_
+#define _ALTQ_ALTQ_CLASSQ_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Packet Queue types: RED or DROPHEAD.
+ */
+#define Q_DROPHEAD 0x00
+#define Q_RED 0x01
+#define Q_RIO 0x02
+#define Q_DROPTAIL 0x03
+
+#ifdef _KERNEL
+
+/*
+ * Packet Queue structures and macros to manipulate them.
+ */
+struct _class_queue_ {
+ struct mbuf *tail_; /* Tail of packet queue */
+ int qlen_; /* Queue length (in number of packets) */
+ int qlim_; /* Queue limit (in number of packets*) */
+ int qtype_; /* Queue type */
+};
+
+typedef struct _class_queue_ class_queue_t;
+
+#define qtype(q) (q)->qtype_ /* Get queue type */
+#define qlimit(q) (q)->qlim_ /* Max packets to be queued */
+#define qlen(q) (q)->qlen_ /* Current queue length. */
+#define qtail(q) (q)->tail_ /* Tail of the queue */
+#define qhead(q) ((q)->tail_ ? (q)->tail_->m_nextpkt : NULL)
+
+#define qempty(q) ((q)->qlen_ == 0) /* Is the queue empty?? */
+#define q_is_red(q) ((q)->qtype_ == Q_RED) /* Is the queue a red queue */
+#define q_is_rio(q) ((q)->qtype_ == Q_RIO) /* Is the queue a rio queue */
+#define q_is_red_or_rio(q) ((q)->qtype_ == Q_RED || (q)->qtype_ == Q_RIO)
+
+#if !defined(__GNUC__) || defined(ALTQ_DEBUG)
+
+extern void _addq(class_queue_t *, struct mbuf *);
+extern struct mbuf *_getq(class_queue_t *);
+extern struct mbuf *_getq_tail(class_queue_t *);
+extern struct mbuf *_getq_random(class_queue_t *);
+extern void _removeq(class_queue_t *, struct mbuf *);
+extern void _flushq(class_queue_t *);
+
+#else /* __GNUC__ && !ALTQ_DEBUG */
+/*
+ * inlined versions
+ */
+static __inline void
+_addq(class_queue_t *q, struct mbuf *m)
+{
+ struct mbuf *m0;
+
+ if ((m0 = qtail(q)) != NULL)
+ m->m_nextpkt = m0->m_nextpkt;
+ else
+ m0 = m;
+ m0->m_nextpkt = m;
+ qtail(q) = m;
+ qlen(q)++;
+}
+
+static __inline struct mbuf *
+_getq(class_queue_t *q)
+{
+ struct mbuf *m, *m0;
+
+ if ((m = qtail(q)) == NULL)
+ return (NULL);
+ if ((m0 = m->m_nextpkt) != m)
+ m->m_nextpkt = m0->m_nextpkt;
+ else
+ qtail(q) = NULL;
+ qlen(q)--;
+ m0->m_nextpkt = NULL;
+ return (m0);
+}
+
+/* drop a packet at the tail of the queue */
+static __inline struct mbuf *
+_getq_tail(class_queue_t *q)
+{
+ struct mbuf *m, *m0, *prev;
+
+ if ((m = m0 = qtail(q)) == NULL)
+ return NULL;
+ do {
+ prev = m0;
+ m0 = m0->m_nextpkt;
+ } while (m0 != m);
+ prev->m_nextpkt = m->m_nextpkt;
+ if (prev == m)
+ qtail(q) = NULL;
+ else
+ qtail(q) = prev;
+ qlen(q)--;
+ m->m_nextpkt = NULL;
+ return (m);
+}
+
+/* randomly select a packet in the queue */
+static __inline struct mbuf *
+_getq_random(class_queue_t *q)
+{
+ struct mbuf *m;
+ int i, n;
+
+ if ((m = qtail(q)) == NULL)
+ return NULL;
+ if (m->m_nextpkt == m)
+ qtail(q) = NULL;
+ else {
+ struct mbuf *prev = NULL;
+
+ n = random() % qlen(q) + 1;
+ for (i = 0; i < n; i++) {
+ prev = m;
+ m = m->m_nextpkt;
+ }
+ prev->m_nextpkt = m->m_nextpkt;
+ if (m == qtail(q))
+ qtail(q) = prev;
+ }
+ qlen(q)--;
+ m->m_nextpkt = NULL;
+ return (m);
+}
+
+static __inline void
+_removeq(class_queue_t *q, struct mbuf *m)
+{
+ struct mbuf *m0, *prev;
+
+ m0 = qtail(q);
+ do {
+ prev = m0;
+ m0 = m0->m_nextpkt;
+ } while (m0 != m);
+ prev->m_nextpkt = m->m_nextpkt;
+ if (prev == m)
+ qtail(q) = NULL;
+ else if (qtail(q) == m)
+ qtail(q) = prev;
+ qlen(q)--;
+}
+
+static __inline void
+_flushq(class_queue_t *q)
+{
+ struct mbuf *m;
+
+ while ((m = _getq(q)) != NULL)
+ m_freem(m);
+}
+
+#endif /* __GNUC__ && !ALTQ_DEBUG */
+
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ALTQ_ALTQ_CLASSQ_H_ */
diff --git a/sys/net/altq/altq_hfsc.c b/sys/net/altq/altq_hfsc.c
new file mode 100644
index 0000000..05fca40
--- /dev/null
+++ b/sys/net/altq/altq_hfsc.c
@@ -0,0 +1,2222 @@
+/* $FreeBSD$ */
+/* $KAME: altq_hfsc.c,v 1.24 2003/12/05 05:40:46 kjc Exp $ */
+
+/*
+ * Copyright (c) 1997-1999 Carnegie Mellon University. All Rights Reserved.
+ *
+ * Permission to use, copy, modify, and distribute this software and
+ * its documentation is hereby granted (including for commercial or
+ * for-profit use), provided that both the copyright notice and this
+ * permission notice appear in all copies of the software, derivative
+ * works, or modified versions, and any portions thereof.
+ *
+ * THIS SOFTWARE IS EXPERIMENTAL AND IS KNOWN TO HAVE BUGS, SOME OF
+ * WHICH MAY HAVE SERIOUS CONSEQUENCES. CARNEGIE MELLON PROVIDES THIS
+ * SOFTWARE IN ITS ``AS IS'' CONDITION, AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+ * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * Carnegie Mellon encourages (but does not require) users of this
+ * software to return any improvements or extensions that they make,
+ * and to grant Carnegie Mellon the rights to redistribute these
+ * changes without encumbrance.
+ */
+/*
+ * H-FSC is described in Proceedings of SIGCOMM'97,
+ * "A Hierarchical Fair Service Curve Algorithm for Link-Sharing,
+ * Real-Time and Priority Service"
+ * by Ion Stoica, Hui Zhang, and T. S. Eugene Ng.
+ *
+ * Oleg Cherevko <olwi@aq.ml.com.ua> added the upperlimit for link-sharing.
+ * when a class has an upperlimit, the fit-time is computed from the
+ * upperlimit service curve. the link-sharing scheduler does not schedule
+ * a class whose fit-time exceeds the current time.
+ */
+
+#if defined(__FreeBSD__) || defined(__NetBSD__)
+#include "opt_altq.h"
+#include "opt_inet.h"
+#ifdef __FreeBSD__
+#include "opt_inet6.h"
+#endif
+#endif /* __FreeBSD__ || __NetBSD__ */
+
+#ifdef ALTQ_HFSC /* hfsc is enabled by ALTQ_HFSC option in opt_altq.h */
+
+#include <sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/systm.h>
+#include <sys/errno.h>
+#include <sys/queue.h>
+#if 1 /* ALTQ3_COMPAT */
+#include <sys/sockio.h>
+#include <sys/proc.h>
+#include <sys/kernel.h>
+#endif /* ALTQ3_COMPAT */
+
+#include <net/if.h>
+#include <net/if_var.h>
+#include <netinet/in.h>
+
+#include <netpfil/pf/pf.h>
+#include <netpfil/pf/pf_altq.h>
+#include <netpfil/pf/pf_mtag.h>
+#include <net/altq/altq.h>
+#include <net/altq/altq_hfsc.h>
+#ifdef ALTQ3_COMPAT
+#include <net/altq/altq_conf.h>
+#endif
+
+/*
+ * function prototypes
+ */
+static int hfsc_clear_interface(struct hfsc_if *);
+static int hfsc_request(struct ifaltq *, int, void *);
+static void hfsc_purge(struct hfsc_if *);
+static struct hfsc_class *hfsc_class_create(struct hfsc_if *,
+ struct service_curve *, struct service_curve *, struct service_curve *,
+ struct hfsc_class *, int, int, int);
+static int hfsc_class_destroy(struct hfsc_class *);
+static struct hfsc_class *hfsc_nextclass(struct hfsc_class *);
+static int hfsc_enqueue(struct ifaltq *, struct mbuf *,
+ struct altq_pktattr *);
+static struct mbuf *hfsc_dequeue(struct ifaltq *, int);
+
+static int hfsc_addq(struct hfsc_class *, struct mbuf *);
+static struct mbuf *hfsc_getq(struct hfsc_class *);
+static struct mbuf *hfsc_pollq(struct hfsc_class *);
+static void hfsc_purgeq(struct hfsc_class *);
+
+static void update_cfmin(struct hfsc_class *);
+static void set_active(struct hfsc_class *, int);
+static void set_passive(struct hfsc_class *);
+
+static void init_ed(struct hfsc_class *, int);
+static void update_ed(struct hfsc_class *, int);
+static void update_d(struct hfsc_class *, int);
+static void init_vf(struct hfsc_class *, int);
+static void update_vf(struct hfsc_class *, int, u_int64_t);
+static void ellist_insert(struct hfsc_class *);
+static void ellist_remove(struct hfsc_class *);
+static void ellist_update(struct hfsc_class *);
+struct hfsc_class *hfsc_get_mindl(struct hfsc_if *, u_int64_t);
+static void actlist_insert(struct hfsc_class *);
+static void actlist_remove(struct hfsc_class *);
+static void actlist_update(struct hfsc_class *);
+
+static struct hfsc_class *actlist_firstfit(struct hfsc_class *,
+ u_int64_t);
+
+static __inline u_int64_t seg_x2y(u_int64_t, u_int64_t);
+static __inline u_int64_t seg_y2x(u_int64_t, u_int64_t);
+static __inline u_int64_t m2sm(u_int);
+static __inline u_int64_t m2ism(u_int);
+static __inline u_int64_t d2dx(u_int);
+static u_int sm2m(u_int64_t);
+static u_int dx2d(u_int64_t);
+
+static void sc2isc(struct service_curve *, struct internal_sc *);
+static void rtsc_init(struct runtime_sc *, struct internal_sc *,
+ u_int64_t, u_int64_t);
+static u_int64_t rtsc_y2x(struct runtime_sc *, u_int64_t);
+static u_int64_t rtsc_x2y(struct runtime_sc *, u_int64_t);
+static void rtsc_min(struct runtime_sc *, struct internal_sc *,
+ u_int64_t, u_int64_t);
+
+static void get_class_stats(struct hfsc_classstats *,
+ struct hfsc_class *);
+static struct hfsc_class *clh_to_clp(struct hfsc_if *, u_int32_t);
+
+
+#ifdef ALTQ3_COMPAT
+static struct hfsc_if *hfsc_attach(struct ifaltq *, u_int);
+static int hfsc_detach(struct hfsc_if *);
+static int hfsc_class_modify(struct hfsc_class *, struct service_curve *,
+ struct service_curve *, struct service_curve *);
+
+static int hfsccmd_if_attach(struct hfsc_attach *);
+static int hfsccmd_if_detach(struct hfsc_interface *);
+static int hfsccmd_add_class(struct hfsc_add_class *);
+static int hfsccmd_delete_class(struct hfsc_delete_class *);
+static int hfsccmd_modify_class(struct hfsc_modify_class *);
+static int hfsccmd_add_filter(struct hfsc_add_filter *);
+static int hfsccmd_delete_filter(struct hfsc_delete_filter *);
+static int hfsccmd_class_stats(struct hfsc_class_stats *);
+
+altqdev_decl(hfsc);
+#endif /* ALTQ3_COMPAT */
+
+/*
+ * macros
+ */
+#define is_a_parent_class(cl) ((cl)->cl_children != NULL)
+
+#define HT_INFINITY 0xffffffffffffffffLL /* infinite time value */
+
+#ifdef ALTQ3_COMPAT
+/* hif_list keeps all hfsc_if's allocated. */
+static struct hfsc_if *hif_list = NULL;
+#endif /* ALTQ3_COMPAT */
+
+int
+hfsc_pfattach(struct pf_altq *a)
+{
+ struct ifnet *ifp;
+ int s, error;
+
+ if ((ifp = ifunit(a->ifname)) == NULL || a->altq_disc == NULL)
+ return (EINVAL);
+#ifdef __NetBSD__
+ s = splnet();
+#else
+ s = splimp();
+#endif
+ error = altq_attach(&ifp->if_snd, ALTQT_HFSC, a->altq_disc,
+ hfsc_enqueue, hfsc_dequeue, hfsc_request, NULL, NULL);
+ splx(s);
+ return (error);
+}
+
+int
+hfsc_add_altq(struct pf_altq *a)
+{
+ struct hfsc_if *hif;
+ struct ifnet *ifp;
+
+ if ((ifp = ifunit(a->ifname)) == NULL)
+ return (EINVAL);
+ if (!ALTQ_IS_READY(&ifp->if_snd))
+ return (ENODEV);
+
+ hif = malloc(sizeof(struct hfsc_if), M_DEVBUF, M_NOWAIT | M_ZERO);
+ if (hif == NULL)
+ return (ENOMEM);
+
+ TAILQ_INIT(&hif->hif_eligible);
+ hif->hif_ifq = &ifp->if_snd;
+
+ /* keep the state in pf_altq */
+ a->altq_disc = hif;
+
+ return (0);
+}
+
+int
+hfsc_remove_altq(struct pf_altq *a)
+{
+ struct hfsc_if *hif;
+
+ if ((hif = a->altq_disc) == NULL)
+ return (EINVAL);
+ a->altq_disc = NULL;
+
+ (void)hfsc_clear_interface(hif);
+ (void)hfsc_class_destroy(hif->hif_rootclass);
+
+ free(hif, M_DEVBUF);
+
+ return (0);
+}
+
+int
+hfsc_add_queue(struct pf_altq *a)
+{
+ struct hfsc_if *hif;
+ struct hfsc_class *cl, *parent;
+ struct hfsc_opts *opts;
+ struct service_curve rtsc, lssc, ulsc;
+
+ if ((hif = a->altq_disc) == NULL)
+ return (EINVAL);
+
+ opts = &a->pq_u.hfsc_opts;
+
+ if (a->parent_qid == HFSC_NULLCLASS_HANDLE &&
+ hif->hif_rootclass == NULL)
+ parent = NULL;
+ else if ((parent = clh_to_clp(hif, a->parent_qid)) == NULL)
+ return (EINVAL);
+
+ if (a->qid == 0)
+ return (EINVAL);
+
+ if (clh_to_clp(hif, a->qid) != NULL)
+ return (EBUSY);
+
+ rtsc.m1 = opts->rtsc_m1;
+ rtsc.d = opts->rtsc_d;
+ rtsc.m2 = opts->rtsc_m2;
+ lssc.m1 = opts->lssc_m1;
+ lssc.d = opts->lssc_d;
+ lssc.m2 = opts->lssc_m2;
+ ulsc.m1 = opts->ulsc_m1;
+ ulsc.d = opts->ulsc_d;
+ ulsc.m2 = opts->ulsc_m2;
+
+ cl = hfsc_class_create(hif, &rtsc, &lssc, &ulsc,
+ parent, a->qlimit, opts->flags, a->qid);
+ if (cl == NULL)
+ return (ENOMEM);
+
+ return (0);
+}
+
+int
+hfsc_remove_queue(struct pf_altq *a)
+{
+ struct hfsc_if *hif;
+ struct hfsc_class *cl;
+
+ if ((hif = a->altq_disc) == NULL)
+ return (EINVAL);
+
+ if ((cl = clh_to_clp(hif, a->qid)) == NULL)
+ return (EINVAL);
+
+ return (hfsc_class_destroy(cl));
+}
+
+int
+hfsc_getqstats(struct pf_altq *a, void *ubuf, int *nbytes)
+{
+ struct hfsc_if *hif;
+ struct hfsc_class *cl;
+ struct hfsc_classstats stats;
+ int error = 0;
+
+ if ((hif = altq_lookup(a->ifname, ALTQT_HFSC)) == NULL)
+ return (EBADF);
+
+ if ((cl = clh_to_clp(hif, a->qid)) == NULL)
+ return (EINVAL);
+
+ if (*nbytes < sizeof(stats))
+ return (EINVAL);
+
+ get_class_stats(&stats, cl);
+
+ if ((error = copyout((caddr_t)&stats, ubuf, sizeof(stats))) != 0)
+ return (error);
+ *nbytes = sizeof(stats);
+ return (0);
+}
+
+/*
+ * bring the interface back to the initial state by discarding
+ * all the filters and classes except the root class.
+ */
+static int
+hfsc_clear_interface(struct hfsc_if *hif)
+{
+ struct hfsc_class *cl;
+
+#ifdef ALTQ3_COMPAT
+ /* free the filters for this interface */
+ acc_discard_filters(&hif->hif_classifier, NULL, 1);
+#endif
+
+ /* clear out the classes */
+ while (hif->hif_rootclass != NULL &&
+ (cl = hif->hif_rootclass->cl_children) != NULL) {
+ /*
+ * remove the first leaf class found in the hierarchy
+ * then start over
+ */
+ for (; cl != NULL; cl = hfsc_nextclass(cl)) {
+ if (!is_a_parent_class(cl)) {
+ (void)hfsc_class_destroy(cl);
+ break;
+ }
+ }
+ }
+
+ return (0);
+}
+
+static int
+hfsc_request(struct ifaltq *ifq, int req, void *arg)
+{
+ struct hfsc_if *hif = (struct hfsc_if *)ifq->altq_disc;
+
+ IFQ_LOCK_ASSERT(ifq);
+
+ switch (req) {
+ case ALTRQ_PURGE:
+ hfsc_purge(hif);
+ break;
+ }
+ return (0);
+}
+
+/* discard all the queued packets on the interface */
+static void
+hfsc_purge(struct hfsc_if *hif)
+{
+ struct hfsc_class *cl;
+
+ for (cl = hif->hif_rootclass; cl != NULL; cl = hfsc_nextclass(cl))
+ if (!qempty(cl->cl_q))
+ hfsc_purgeq(cl);
+ if (ALTQ_IS_ENABLED(hif->hif_ifq))
+ hif->hif_ifq->ifq_len = 0;
+}
+
+struct hfsc_class *
+hfsc_class_create(struct hfsc_if *hif, struct service_curve *rsc,
+ struct service_curve *fsc, struct service_curve *usc,
+ struct hfsc_class *parent, int qlimit, int flags, int qid)
+{
+ struct hfsc_class *cl, *p;
+ int i, s;
+
+ if (hif->hif_classes >= HFSC_MAX_CLASSES)
+ return (NULL);
+
+#ifndef ALTQ_RED
+ if (flags & HFCF_RED) {
+#ifdef ALTQ_DEBUG
+ printf("hfsc_class_create: RED not configured for HFSC!\n");
+#endif
+ return (NULL);
+ }
+#endif
+
+ cl = malloc(sizeof(struct hfsc_class), M_DEVBUF, M_NOWAIT | M_ZERO);
+ if (cl == NULL)
+ return (NULL);
+
+ cl->cl_q = malloc(sizeof(class_queue_t), M_DEVBUF, M_NOWAIT | M_ZERO);
+ if (cl->cl_q == NULL)
+ goto err_ret;
+
+ TAILQ_INIT(&cl->cl_actc);
+
+ if (qlimit == 0)
+ qlimit = 50; /* use default */
+ qlimit(cl->cl_q) = qlimit;
+ qtype(cl->cl_q) = Q_DROPTAIL;
+ qlen(cl->cl_q) = 0;
+ cl->cl_flags = flags;
+#ifdef ALTQ_RED
+ if (flags & (HFCF_RED|HFCF_RIO)) {
+ int red_flags, red_pkttime;
+ u_int m2;
+
+ m2 = 0;
+ if (rsc != NULL && rsc->m2 > m2)
+ m2 = rsc->m2;
+ if (fsc != NULL && fsc->m2 > m2)
+ m2 = fsc->m2;
+ if (usc != NULL && usc->m2 > m2)
+ m2 = usc->m2;
+
+ red_flags = 0;
+ if (flags & HFCF_ECN)
+ red_flags |= REDF_ECN;
+#ifdef ALTQ_RIO
+ if (flags & HFCF_CLEARDSCP)
+ red_flags |= RIOF_CLEARDSCP;
+#endif
+ if (m2 < 8)
+ red_pkttime = 1000 * 1000 * 1000; /* 1 sec */
+ else
+ red_pkttime = (int64_t)hif->hif_ifq->altq_ifp->if_mtu
+ * 1000 * 1000 * 1000 / (m2 / 8);
+ if (flags & HFCF_RED) {
+ cl->cl_red = red_alloc(0, 0,
+ qlimit(cl->cl_q) * 10/100,
+ qlimit(cl->cl_q) * 30/100,
+ red_flags, red_pkttime);
+ if (cl->cl_red != NULL)
+ qtype(cl->cl_q) = Q_RED;
+ }
+#ifdef ALTQ_RIO
+ else {
+ cl->cl_red = (red_t *)rio_alloc(0, NULL,
+ red_flags, red_pkttime);
+ if (cl->cl_red != NULL)
+ qtype(cl->cl_q) = Q_RIO;
+ }
+#endif
+ }
+#endif /* ALTQ_RED */
+
+ if (rsc != NULL && (rsc->m1 != 0 || rsc->m2 != 0)) {
+ cl->cl_rsc = malloc(sizeof(struct internal_sc),
+ M_DEVBUF, M_NOWAIT);
+ if (cl->cl_rsc == NULL)
+ goto err_ret;
+ sc2isc(rsc, cl->cl_rsc);
+ rtsc_init(&cl->cl_deadline, cl->cl_rsc, 0, 0);
+ rtsc_init(&cl->cl_eligible, cl->cl_rsc, 0, 0);
+ }
+ if (fsc != NULL && (fsc->m1 != 0 || fsc->m2 != 0)) {
+ cl->cl_fsc = malloc(sizeof(struct internal_sc),
+ M_DEVBUF, M_NOWAIT);
+ if (cl->cl_fsc == NULL)
+ goto err_ret;
+ sc2isc(fsc, cl->cl_fsc);
+ rtsc_init(&cl->cl_virtual, cl->cl_fsc, 0, 0);
+ }
+ if (usc != NULL && (usc->m1 != 0 || usc->m2 != 0)) {
+ cl->cl_usc = malloc(sizeof(struct internal_sc),
+ M_DEVBUF, M_NOWAIT);
+ if (cl->cl_usc == NULL)
+ goto err_ret;
+ sc2isc(usc, cl->cl_usc);
+ rtsc_init(&cl->cl_ulimit, cl->cl_usc, 0, 0);
+ }
+
+ cl->cl_id = hif->hif_classid++;
+ cl->cl_handle = qid;
+ cl->cl_hif = hif;
+ cl->cl_parent = parent;
+
+#ifdef __NetBSD__
+ s = splnet();
+#else
+ s = splimp();
+#endif
+ IFQ_LOCK(hif->hif_ifq);
+ hif->hif_classes++;
+
+ /*
+ * find a free slot in the class table. if the slot matching
+ * the lower bits of qid is free, use this slot. otherwise,
+ * use the first free slot.
+ */
+ i = qid % HFSC_MAX_CLASSES;
+ if (hif->hif_class_tbl[i] == NULL)
+ hif->hif_class_tbl[i] = cl;
+ else {
+ for (i = 0; i < HFSC_MAX_CLASSES; i++)
+ if (hif->hif_class_tbl[i] == NULL) {
+ hif->hif_class_tbl[i] = cl;
+ break;
+ }
+ if (i == HFSC_MAX_CLASSES) {
+ IFQ_UNLOCK(hif->hif_ifq);
+ splx(s);
+ goto err_ret;
+ }
+ }
+
+ if (flags & HFCF_DEFAULTCLASS)
+ hif->hif_defaultclass = cl;
+
+ if (parent == NULL) {
+ /* this is root class */
+ hif->hif_rootclass = cl;
+ } else {
+ /* add this class to the children list of the parent */
+ if ((p = parent->cl_children) == NULL)
+ parent->cl_children = cl;
+ else {
+ while (p->cl_siblings != NULL)
+ p = p->cl_siblings;
+ p->cl_siblings = cl;
+ }
+ }
+ IFQ_UNLOCK(hif->hif_ifq);
+ splx(s);
+
+ return (cl);
+
+ err_ret:
+ if (cl->cl_red != NULL) {
+#ifdef ALTQ_RIO
+ if (q_is_rio(cl->cl_q))
+ rio_destroy((rio_t *)cl->cl_red);
+#endif
+#ifdef ALTQ_RED
+ if (q_is_red(cl->cl_q))
+ red_destroy(cl->cl_red);
+#endif
+ }
+ if (cl->cl_fsc != NULL)
+ free(cl->cl_fsc, M_DEVBUF);
+ if (cl->cl_rsc != NULL)
+ free(cl->cl_rsc, M_DEVBUF);
+ if (cl->cl_usc != NULL)
+ free(cl->cl_usc, M_DEVBUF);
+ if (cl->cl_q != NULL)
+ free(cl->cl_q, M_DEVBUF);
+ free(cl, M_DEVBUF);
+ return (NULL);
+}
+
+static int
+hfsc_class_destroy(struct hfsc_class *cl)
+{
+ int i, s;
+
+ if (cl == NULL)
+ return (0);
+
+ if (is_a_parent_class(cl))
+ return (EBUSY);
+
+#ifdef __NetBSD__
+ s = splnet();
+#else
+ s = splimp();
+#endif
+ IFQ_LOCK(cl->cl_hif->hif_ifq);
+
+#ifdef ALTQ3_COMPAT
+ /* delete filters referencing to this class */
+ acc_discard_filters(&cl->cl_hif->hif_classifier, cl, 0);
+#endif /* ALTQ3_COMPAT */
+
+ if (!qempty(cl->cl_q))
+ hfsc_purgeq(cl);
+
+ if (cl->cl_parent == NULL) {
+ /* this is root class */
+ } else {
+ struct hfsc_class *p = cl->cl_parent->cl_children;
+
+ if (p == cl)
+ cl->cl_parent->cl_children = cl->cl_siblings;
+ else do {
+ if (p->cl_siblings == cl) {
+ p->cl_siblings = cl->cl_siblings;
+ break;
+ }
+ } while ((p = p->cl_siblings) != NULL);
+ ASSERT(p != NULL);
+ }
+
+ for (i = 0; i < HFSC_MAX_CLASSES; i++)
+ if (cl->cl_hif->hif_class_tbl[i] == cl) {
+ cl->cl_hif->hif_class_tbl[i] = NULL;
+ break;
+ }
+
+ cl->cl_hif->hif_classes--;
+ IFQ_UNLOCK(cl->cl_hif->hif_ifq);
+ splx(s);
+
+ if (cl->cl_red != NULL) {
+#ifdef ALTQ_RIO
+ if (q_is_rio(cl->cl_q))
+ rio_destroy((rio_t *)cl->cl_red);
+#endif
+#ifdef ALTQ_RED
+ if (q_is_red(cl->cl_q))
+ red_destroy(cl->cl_red);
+#endif
+ }
+
+ IFQ_LOCK(cl->cl_hif->hif_ifq);
+ if (cl == cl->cl_hif->hif_rootclass)
+ cl->cl_hif->hif_rootclass = NULL;
+ if (cl == cl->cl_hif->hif_defaultclass)
+ cl->cl_hif->hif_defaultclass = NULL;
+ IFQ_UNLOCK(cl->cl_hif->hif_ifq);
+
+ if (cl->cl_usc != NULL)
+ free(cl->cl_usc, M_DEVBUF);
+ if (cl->cl_fsc != NULL)
+ free(cl->cl_fsc, M_DEVBUF);
+ if (cl->cl_rsc != NULL)
+ free(cl->cl_rsc, M_DEVBUF);
+ free(cl->cl_q, M_DEVBUF);
+ free(cl, M_DEVBUF);
+
+ return (0);
+}
+
+/*
+ * hfsc_nextclass returns the next class in the tree.
+ * usage:
+ * for (cl = hif->hif_rootclass; cl != NULL; cl = hfsc_nextclass(cl))
+ * do_something;
+ */
+static struct hfsc_class *
+hfsc_nextclass(struct hfsc_class *cl)
+{
+ if (cl->cl_children != NULL)
+ cl = cl->cl_children;
+ else if (cl->cl_siblings != NULL)
+ cl = cl->cl_siblings;
+ else {
+ while ((cl = cl->cl_parent) != NULL)
+ if (cl->cl_siblings) {
+ cl = cl->cl_siblings;
+ break;
+ }
+ }
+
+ return (cl);
+}
+
+/*
+ * hfsc_enqueue is an enqueue function to be registered to
+ * (*altq_enqueue) in struct ifaltq.
+ */
+static int
+hfsc_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr)
+{
+ struct hfsc_if *hif = (struct hfsc_if *)ifq->altq_disc;
+ struct hfsc_class *cl;
+ struct pf_mtag *t;
+ int len;
+
+ IFQ_LOCK_ASSERT(ifq);
+
+ /* grab class set by classifier */
+ if ((m->m_flags & M_PKTHDR) == 0) {
+ /* should not happen */
+ printf("altq: packet for %s does not have pkthdr\n",
+ ifq->altq_ifp->if_xname);
+ m_freem(m);
+ return (ENOBUFS);
+ }
+ cl = NULL;
+ if ((t = pf_find_mtag(m)) != NULL)
+ cl = clh_to_clp(hif, t->qid);
+#ifdef ALTQ3_COMPAT
+ else if ((ifq->altq_flags & ALTQF_CLASSIFY) && pktattr != NULL)
+ cl = pktattr->pattr_class;
+#endif
+ if (cl == NULL || is_a_parent_class(cl)) {
+ cl = hif->hif_defaultclass;
+ if (cl == NULL) {
+ m_freem(m);
+ return (ENOBUFS);
+ }
+ }
+#ifdef ALTQ3_COMPAT
+ if (pktattr != NULL)
+ cl->cl_pktattr = pktattr; /* save proto hdr used by ECN */
+ else
+#endif
+ cl->cl_pktattr = NULL;
+ len = m_pktlen(m);
+ if (hfsc_addq(cl, m) != 0) {
+ /* drop occurred. mbuf was freed in hfsc_addq. */
+ PKTCNTR_ADD(&cl->cl_stats.drop_cnt, len);
+ return (ENOBUFS);
+ }
+ IFQ_INC_LEN(ifq);
+ cl->cl_hif->hif_packets++;
+
+ /* successfully queued. */
+ if (qlen(cl->cl_q) == 1)
+ set_active(cl, m_pktlen(m));
+
+ return (0);
+}
+
+/*
+ * hfsc_dequeue is a dequeue function to be registered to
+ * (*altq_dequeue) in struct ifaltq.
+ *
+ * note: ALTDQ_POLL returns the next packet without removing the packet
+ * from the queue. ALTDQ_REMOVE is a normal dequeue operation.
+ * ALTDQ_REMOVE must return the same packet if called immediately
+ * after ALTDQ_POLL.
+ */
+static struct mbuf *
+hfsc_dequeue(struct ifaltq *ifq, int op)
+{
+ struct hfsc_if *hif = (struct hfsc_if *)ifq->altq_disc;
+ struct hfsc_class *cl;
+ struct mbuf *m;
+ int len, next_len;
+ int realtime = 0;
+ u_int64_t cur_time;
+
+ IFQ_LOCK_ASSERT(ifq);
+
+ if (hif->hif_packets == 0)
+ /* no packet in the tree */
+ return (NULL);
+
+ cur_time = read_machclk();
+
+ if (op == ALTDQ_REMOVE && hif->hif_pollcache != NULL) {
+
+ cl = hif->hif_pollcache;
+ hif->hif_pollcache = NULL;
+ /* check if the class was scheduled by real-time criteria */
+ if (cl->cl_rsc != NULL)
+ realtime = (cl->cl_e <= cur_time);
+ } else {
+ /*
+ * if there are eligible classes, use real-time criteria.
+ * find the class with the minimum deadline among
+ * the eligible classes.
+ */
+ if ((cl = hfsc_get_mindl(hif, cur_time))
+ != NULL) {
+ realtime = 1;
+ } else {
+#ifdef ALTQ_DEBUG
+ int fits = 0;
+#endif
+ /*
+ * use link-sharing criteria
+ * get the class with the minimum vt in the hierarchy
+ */
+ cl = hif->hif_rootclass;
+ while (is_a_parent_class(cl)) {
+
+ cl = actlist_firstfit(cl, cur_time);
+ if (cl == NULL) {
+#ifdef ALTQ_DEBUG
+ if (fits > 0)
+ printf("%d fit but none found\n",fits);
+#endif
+ return (NULL);
+ }
+ /*
+ * update parent's cl_cvtmin.
+ * don't update if the new vt is smaller.
+ */
+ if (cl->cl_parent->cl_cvtmin < cl->cl_vt)
+ cl->cl_parent->cl_cvtmin = cl->cl_vt;
+#ifdef ALTQ_DEBUG
+ fits++;
+#endif
+ }
+ }
+
+ if (op == ALTDQ_POLL) {
+ hif->hif_pollcache = cl;
+ m = hfsc_pollq(cl);
+ return (m);
+ }
+ }
+
+ m = hfsc_getq(cl);
+ if (m == NULL)
+ panic("hfsc_dequeue:");
+ len = m_pktlen(m);
+ cl->cl_hif->hif_packets--;
+ IFQ_DEC_LEN(ifq);
+ PKTCNTR_ADD(&cl->cl_stats.xmit_cnt, len);
+
+ update_vf(cl, len, cur_time);
+ if (realtime)
+ cl->cl_cumul += len;
+
+ if (!qempty(cl->cl_q)) {
+ if (cl->cl_rsc != NULL) {
+ /* update ed */
+ next_len = m_pktlen(qhead(cl->cl_q));
+
+ if (realtime)
+ update_ed(cl, next_len);
+ else
+ update_d(cl, next_len);
+ }
+ } else {
+ /* the class becomes passive */
+ set_passive(cl);
+ }
+
+ return (m);
+}
+
+static int
+hfsc_addq(struct hfsc_class *cl, struct mbuf *m)
+{
+
+#ifdef ALTQ_RIO
+ if (q_is_rio(cl->cl_q))
+ return rio_addq((rio_t *)cl->cl_red, cl->cl_q,
+ m, cl->cl_pktattr);
+#endif
+#ifdef ALTQ_RED
+ if (q_is_red(cl->cl_q))
+ return red_addq(cl->cl_red, cl->cl_q, m, cl->cl_pktattr);
+#endif
+ if (qlen(cl->cl_q) >= qlimit(cl->cl_q)) {
+ m_freem(m);
+ return (-1);
+ }
+
+ if (cl->cl_flags & HFCF_CLEARDSCP)
+ write_dsfield(m, cl->cl_pktattr, 0);
+
+ _addq(cl->cl_q, m);
+
+ return (0);
+}
+
+static struct mbuf *
+hfsc_getq(struct hfsc_class *cl)
+{
+#ifdef ALTQ_RIO
+ if (q_is_rio(cl->cl_q))
+ return rio_getq((rio_t *)cl->cl_red, cl->cl_q);
+#endif
+#ifdef ALTQ_RED
+ if (q_is_red(cl->cl_q))
+ return red_getq(cl->cl_red, cl->cl_q);
+#endif
+ return _getq(cl->cl_q);
+}
+
+static struct mbuf *
+hfsc_pollq(struct hfsc_class *cl)
+{
+ return qhead(cl->cl_q);
+}
+
+static void
+hfsc_purgeq(struct hfsc_class *cl)
+{
+ struct mbuf *m;
+
+ if (qempty(cl->cl_q))
+ return;
+
+ while ((m = _getq(cl->cl_q)) != NULL) {
+ PKTCNTR_ADD(&cl->cl_stats.drop_cnt, m_pktlen(m));
+ m_freem(m);
+ cl->cl_hif->hif_packets--;
+ IFQ_DEC_LEN(cl->cl_hif->hif_ifq);
+ }
+ ASSERT(qlen(cl->cl_q) == 0);
+
+ update_vf(cl, 0, 0); /* remove cl from the actlist */
+ set_passive(cl);
+}
+
+static void
+set_active(struct hfsc_class *cl, int len)
+{
+ if (cl->cl_rsc != NULL)
+ init_ed(cl, len);
+ if (cl->cl_fsc != NULL)
+ init_vf(cl, len);
+
+ cl->cl_stats.period++;
+}
+
+static void
+set_passive(struct hfsc_class *cl)
+{
+ if (cl->cl_rsc != NULL)
+ ellist_remove(cl);
+
+ /*
+ * actlist is now handled in update_vf() so that update_vf(cl, 0, 0)
+ * needs to be called explicitly to remove a class from actlist
+ */
+}
+
+static void
+init_ed(struct hfsc_class *cl, int next_len)
+{
+ u_int64_t cur_time;
+
+ cur_time = read_machclk();
+
+ /* update the deadline curve */
+ rtsc_min(&cl->cl_deadline, cl->cl_rsc, cur_time, cl->cl_cumul);
+
+ /*
+ * update the eligible curve.
+ * for concave, it is equal to the deadline curve.
+ * for convex, it is a linear curve with slope m2.
+ */
+ cl->cl_eligible = cl->cl_deadline;
+ if (cl->cl_rsc->sm1 <= cl->cl_rsc->sm2) {
+ cl->cl_eligible.dx = 0;
+ cl->cl_eligible.dy = 0;
+ }
+
+ /* compute e and d */
+ cl->cl_e = rtsc_y2x(&cl->cl_eligible, cl->cl_cumul);
+ cl->cl_d = rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len);
+
+ ellist_insert(cl);
+}
+
+static void
+update_ed(struct hfsc_class *cl, int next_len)
+{
+ cl->cl_e = rtsc_y2x(&cl->cl_eligible, cl->cl_cumul);
+ cl->cl_d = rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len);
+
+ ellist_update(cl);
+}
+
+static void
+update_d(struct hfsc_class *cl, int next_len)
+{
+ cl->cl_d = rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len);
+}
+
+static void
+init_vf(struct hfsc_class *cl, int len)
+{
+ struct hfsc_class *max_cl, *p;
+ u_int64_t vt, f, cur_time;
+ int go_active;
+
+ cur_time = 0;
+ go_active = 1;
+ for ( ; cl->cl_parent != NULL; cl = cl->cl_parent) {
+
+ if (go_active && cl->cl_nactive++ == 0)
+ go_active = 1;
+ else
+ go_active = 0;
+
+ if (go_active) {
+ max_cl = TAILQ_LAST(&cl->cl_parent->cl_actc, acthead);
+ if (max_cl != NULL) {
+ /*
+ * set vt to the average of the min and max
+ * classes. if the parent's period didn't
+ * change, don't decrease vt of the class.
+ */
+ vt = max_cl->cl_vt;
+ if (cl->cl_parent->cl_cvtmin != 0)
+ vt = (cl->cl_parent->cl_cvtmin + vt)/2;
+
+ if (cl->cl_parent->cl_vtperiod !=
+ cl->cl_parentperiod || vt > cl->cl_vt)
+ cl->cl_vt = vt;
+ } else {
+ /*
+ * first child for a new parent backlog period.
+ * add parent's cvtmax to vtoff of children
+ * to make a new vt (vtoff + vt) larger than
+ * the vt in the last period for all children.
+ */
+ vt = cl->cl_parent->cl_cvtmax;
+ for (p = cl->cl_parent->cl_children; p != NULL;
+ p = p->cl_siblings)
+ p->cl_vtoff += vt;
+ cl->cl_vt = 0;
+ cl->cl_parent->cl_cvtmax = 0;
+ cl->cl_parent->cl_cvtmin = 0;
+ }
+ cl->cl_initvt = cl->cl_vt;
+
+ /* update the virtual curve */
+ vt = cl->cl_vt + cl->cl_vtoff;
+ rtsc_min(&cl->cl_virtual, cl->cl_fsc, vt, cl->cl_total);
+ if (cl->cl_virtual.x == vt) {
+ cl->cl_virtual.x -= cl->cl_vtoff;
+ cl->cl_vtoff = 0;
+ }
+ cl->cl_vtadj = 0;
+
+ cl->cl_vtperiod++; /* increment vt period */
+ cl->cl_parentperiod = cl->cl_parent->cl_vtperiod;
+ if (cl->cl_parent->cl_nactive == 0)
+ cl->cl_parentperiod++;
+ cl->cl_f = 0;
+
+ actlist_insert(cl);
+
+ if (cl->cl_usc != NULL) {
+ /* class has upper limit curve */
+ if (cur_time == 0)
+ cur_time = read_machclk();
+
+ /* update the ulimit curve */
+ rtsc_min(&cl->cl_ulimit, cl->cl_usc, cur_time,
+ cl->cl_total);
+ /* compute myf */
+ cl->cl_myf = rtsc_y2x(&cl->cl_ulimit,
+ cl->cl_total);
+ cl->cl_myfadj = 0;
+ }
+ }
+
+ if (cl->cl_myf > cl->cl_cfmin)
+ f = cl->cl_myf;
+ else
+ f = cl->cl_cfmin;
+ if (f != cl->cl_f) {
+ cl->cl_f = f;
+ update_cfmin(cl->cl_parent);
+ }
+ }
+}
+
+static void
+update_vf(struct hfsc_class *cl, int len, u_int64_t cur_time)
+{
+ u_int64_t f, myf_bound, delta;
+ int go_passive;
+
+ go_passive = qempty(cl->cl_q);
+
+ for (; cl->cl_parent != NULL; cl = cl->cl_parent) {
+
+ cl->cl_total += len;
+
+ if (cl->cl_fsc == NULL || cl->cl_nactive == 0)
+ continue;
+
+ if (go_passive && --cl->cl_nactive == 0)
+ go_passive = 1;
+ else
+ go_passive = 0;
+
+ if (go_passive) {
+ /* no more active child, going passive */
+
+ /* update cvtmax of the parent class */
+ if (cl->cl_vt > cl->cl_parent->cl_cvtmax)
+ cl->cl_parent->cl_cvtmax = cl->cl_vt;
+
+ /* remove this class from the vt list */
+ actlist_remove(cl);
+
+ update_cfmin(cl->cl_parent);
+
+ continue;
+ }
+
+ /*
+ * update vt and f
+ */
+ cl->cl_vt = rtsc_y2x(&cl->cl_virtual, cl->cl_total)
+ - cl->cl_vtoff + cl->cl_vtadj;
+
+ /*
+ * if vt of the class is smaller than cvtmin,
+ * the class was skipped in the past due to non-fit.
+ * if so, we need to adjust vtadj.
+ */
+ if (cl->cl_vt < cl->cl_parent->cl_cvtmin) {
+ cl->cl_vtadj += cl->cl_parent->cl_cvtmin - cl->cl_vt;
+ cl->cl_vt = cl->cl_parent->cl_cvtmin;
+ }
+
+ /* update the vt list */
+ actlist_update(cl);
+
+ if (cl->cl_usc != NULL) {
+ cl->cl_myf = cl->cl_myfadj
+ + rtsc_y2x(&cl->cl_ulimit, cl->cl_total);
+
+ /*
+ * if myf lags behind by more than one clock tick
+ * from the current time, adjust myfadj to prevent
+ * a rate-limited class from going greedy.
+ * in a steady state under rate-limiting, myf
+ * fluctuates within one clock tick.
+ */
+ myf_bound = cur_time - machclk_per_tick;
+ if (cl->cl_myf < myf_bound) {
+ delta = cur_time - cl->cl_myf;
+ cl->cl_myfadj += delta;
+ cl->cl_myf += delta;
+ }
+ }
+
+ /* cl_f is max(cl_myf, cl_cfmin) */
+ if (cl->cl_myf > cl->cl_cfmin)
+ f = cl->cl_myf;
+ else
+ f = cl->cl_cfmin;
+ if (f != cl->cl_f) {
+ cl->cl_f = f;
+ update_cfmin(cl->cl_parent);
+ }
+ }
+}
+
+static void
+update_cfmin(struct hfsc_class *cl)
+{
+ struct hfsc_class *p;
+ u_int64_t cfmin;
+
+ if (TAILQ_EMPTY(&cl->cl_actc)) {
+ cl->cl_cfmin = 0;
+ return;
+ }
+ cfmin = HT_INFINITY;
+ TAILQ_FOREACH(p, &cl->cl_actc, cl_actlist) {
+ if (p->cl_f == 0) {
+ cl->cl_cfmin = 0;
+ return;
+ }
+ if (p->cl_f < cfmin)
+ cfmin = p->cl_f;
+ }
+ cl->cl_cfmin = cfmin;
+}
+
+/*
+ * TAILQ based ellist and actlist implementation
+ * (ion wanted to make a calendar queue based implementation)
+ */
+/*
+ * eligible list holds backlogged classes being sorted by their eligible times.
+ * there is one eligible list per interface.
+ */
+
+static void
+ellist_insert(struct hfsc_class *cl)
+{
+ struct hfsc_if *hif = cl->cl_hif;
+ struct hfsc_class *p;
+
+ /* check the last entry first */
+ if ((p = TAILQ_LAST(&hif->hif_eligible, elighead)) == NULL ||
+ p->cl_e <= cl->cl_e) {
+ TAILQ_INSERT_TAIL(&hif->hif_eligible, cl, cl_ellist);
+ return;
+ }
+
+ TAILQ_FOREACH(p, &hif->hif_eligible, cl_ellist) {
+ if (cl->cl_e < p->cl_e) {
+ TAILQ_INSERT_BEFORE(p, cl, cl_ellist);
+ return;
+ }
+ }
+ ASSERT(0); /* should not reach here */
+}
+
+static void
+ellist_remove(struct hfsc_class *cl)
+{
+ struct hfsc_if *hif = cl->cl_hif;
+
+ TAILQ_REMOVE(&hif->hif_eligible, cl, cl_ellist);
+}
+
+static void
+ellist_update(struct hfsc_class *cl)
+{
+ struct hfsc_if *hif = cl->cl_hif;
+ struct hfsc_class *p, *last;
+
+ /*
+ * the eligible time of a class increases monotonically.
+ * if the next entry has a larger eligible time, nothing to do.
+ */
+ p = TAILQ_NEXT(cl, cl_ellist);
+ if (p == NULL || cl->cl_e <= p->cl_e)
+ return;
+
+ /* check the last entry */
+ last = TAILQ_LAST(&hif->hif_eligible, elighead);
+ ASSERT(last != NULL);
+ if (last->cl_e <= cl->cl_e) {
+ TAILQ_REMOVE(&hif->hif_eligible, cl, cl_ellist);
+ TAILQ_INSERT_TAIL(&hif->hif_eligible, cl, cl_ellist);
+ return;
+ }
+
+ /*
+ * the new position must be between the next entry
+ * and the last entry
+ */
+ while ((p = TAILQ_NEXT(p, cl_ellist)) != NULL) {
+ if (cl->cl_e < p->cl_e) {
+ TAILQ_REMOVE(&hif->hif_eligible, cl, cl_ellist);
+ TAILQ_INSERT_BEFORE(p, cl, cl_ellist);
+ return;
+ }
+ }
+ ASSERT(0); /* should not reach here */
+}
+
+/* find the class with the minimum deadline among the eligible classes */
+struct hfsc_class *
+hfsc_get_mindl(struct hfsc_if *hif, u_int64_t cur_time)
+{
+ struct hfsc_class *p, *cl = NULL;
+
+ TAILQ_FOREACH(p, &hif->hif_eligible, cl_ellist) {
+ if (p->cl_e > cur_time)
+ break;
+ if (cl == NULL || p->cl_d < cl->cl_d)
+ cl = p;
+ }
+ return (cl);
+}
+
+/*
+ * active children list holds backlogged child classes being sorted
+ * by their virtual time.
+ * each intermediate class has one active children list.
+ */
+
+static void
+actlist_insert(struct hfsc_class *cl)
+{
+ struct hfsc_class *p;
+
+ /* check the last entry first */
+ if ((p = TAILQ_LAST(&cl->cl_parent->cl_actc, acthead)) == NULL
+ || p->cl_vt <= cl->cl_vt) {
+ TAILQ_INSERT_TAIL(&cl->cl_parent->cl_actc, cl, cl_actlist);
+ return;
+ }
+
+ TAILQ_FOREACH(p, &cl->cl_parent->cl_actc, cl_actlist) {
+ if (cl->cl_vt < p->cl_vt) {
+ TAILQ_INSERT_BEFORE(p, cl, cl_actlist);
+ return;
+ }
+ }
+ ASSERT(0); /* should not reach here */
+}
+
+static void
+actlist_remove(struct hfsc_class *cl)
+{
+ TAILQ_REMOVE(&cl->cl_parent->cl_actc, cl, cl_actlist);
+}
+
+static void
+actlist_update(struct hfsc_class *cl)
+{
+ struct hfsc_class *p, *last;
+
+ /*
+ * the virtual time of a class increases monotonically during its
+ * backlogged period.
+ * if the next entry has a larger virtual time, nothing to do.
+ */
+ p = TAILQ_NEXT(cl, cl_actlist);
+ if (p == NULL || cl->cl_vt < p->cl_vt)
+ return;
+
+ /* check the last entry */
+ last = TAILQ_LAST(&cl->cl_parent->cl_actc, acthead);
+ ASSERT(last != NULL);
+ if (last->cl_vt <= cl->cl_vt) {
+ TAILQ_REMOVE(&cl->cl_parent->cl_actc, cl, cl_actlist);
+ TAILQ_INSERT_TAIL(&cl->cl_parent->cl_actc, cl, cl_actlist);
+ return;
+ }
+
+ /*
+ * the new position must be between the next entry
+ * and the last entry
+ */
+ while ((p = TAILQ_NEXT(p, cl_actlist)) != NULL) {
+ if (cl->cl_vt < p->cl_vt) {
+ TAILQ_REMOVE(&cl->cl_parent->cl_actc, cl, cl_actlist);
+ TAILQ_INSERT_BEFORE(p, cl, cl_actlist);
+ return;
+ }
+ }
+ ASSERT(0); /* should not reach here */
+}
+
+static struct hfsc_class *
+actlist_firstfit(struct hfsc_class *cl, u_int64_t cur_time)
+{
+ struct hfsc_class *p;
+
+ TAILQ_FOREACH(p, &cl->cl_actc, cl_actlist) {
+ if (p->cl_f <= cur_time)
+ return (p);
+ }
+ return (NULL);
+}
+
+/*
+ * service curve support functions
+ *
+ * external service curve parameters
+ * m: bits/sec
+ * d: msec
+ * internal service curve parameters
+ * sm: (bytes/tsc_interval) << SM_SHIFT
+ * ism: (tsc_count/byte) << ISM_SHIFT
+ * dx: tsc_count
+ *
+ * SM_SHIFT and ISM_SHIFT are scaled in order to keep effective digits.
+ * we should be able to handle 100K-1Gbps linkspeed with 200Hz-1GHz CPU
+ * speed. SM_SHIFT and ISM_SHIFT are selected to have at least 3 effective
+ * digits in decimal using the following table.
+ *
+ * bits/sec 100Kbps 1Mbps 10Mbps 100Mbps 1Gbps
+ * ----------+-------------------------------------------------------
+ * bytes/nsec 12.5e-6 125e-6 1250e-6 12500e-6 125000e-6
+ * sm(500MHz) 25.0e-6 250e-6 2500e-6 25000e-6 250000e-6
+ * sm(200MHz) 62.5e-6 625e-6 6250e-6 62500e-6 625000e-6
+ *
+ * nsec/byte 80000 8000 800 80 8
+ * ism(500MHz) 40000 4000 400 40 4
+ * ism(200MHz) 16000 1600 160 16 1.6
+ */
+#define SM_SHIFT 24
+#define ISM_SHIFT 10
+
+#define SM_MASK ((1LL << SM_SHIFT) - 1)
+#define ISM_MASK ((1LL << ISM_SHIFT) - 1)
+
+static __inline u_int64_t
+seg_x2y(u_int64_t x, u_int64_t sm)
+{
+ u_int64_t y;
+
+ /*
+ * compute
+ * y = x * sm >> SM_SHIFT
+ * but divide it for the upper and lower bits to avoid overflow
+ */
+ y = (x >> SM_SHIFT) * sm + (((x & SM_MASK) * sm) >> SM_SHIFT);
+ return (y);
+}
+
+static __inline u_int64_t
+seg_y2x(u_int64_t y, u_int64_t ism)
+{
+ u_int64_t x;
+
+ if (y == 0)
+ x = 0;
+ else if (ism == HT_INFINITY)
+ x = HT_INFINITY;
+ else {
+ x = (y >> ISM_SHIFT) * ism
+ + (((y & ISM_MASK) * ism) >> ISM_SHIFT);
+ }
+ return (x);
+}
+
+static __inline u_int64_t
+m2sm(u_int m)
+{
+ u_int64_t sm;
+
+ sm = ((u_int64_t)m << SM_SHIFT) / 8 / machclk_freq;
+ return (sm);
+}
+
+static __inline u_int64_t
+m2ism(u_int m)
+{
+ u_int64_t ism;
+
+ if (m == 0)
+ ism = HT_INFINITY;
+ else
+ ism = ((u_int64_t)machclk_freq << ISM_SHIFT) * 8 / m;
+ return (ism);
+}
+
+static __inline u_int64_t
+d2dx(u_int d)
+{
+ u_int64_t dx;
+
+ dx = ((u_int64_t)d * machclk_freq) / 1000;
+ return (dx);
+}
+
+static u_int
+sm2m(u_int64_t sm)
+{
+ u_int64_t m;
+
+ m = (sm * 8 * machclk_freq) >> SM_SHIFT;
+ return ((u_int)m);
+}
+
+static u_int
+dx2d(u_int64_t dx)
+{
+ u_int64_t d;
+
+ d = dx * 1000 / machclk_freq;
+ return ((u_int)d);
+}
+
+static void
+sc2isc(struct service_curve *sc, struct internal_sc *isc)
+{
+ isc->sm1 = m2sm(sc->m1);
+ isc->ism1 = m2ism(sc->m1);
+ isc->dx = d2dx(sc->d);
+ isc->dy = seg_x2y(isc->dx, isc->sm1);
+ isc->sm2 = m2sm(sc->m2);
+ isc->ism2 = m2ism(sc->m2);
+}
+
+/*
+ * initialize the runtime service curve with the given internal
+ * service curve starting at (x, y).
+ */
+static void
+rtsc_init(struct runtime_sc *rtsc, struct internal_sc * isc, u_int64_t x,
+ u_int64_t y)
+{
+ rtsc->x = x;
+ rtsc->y = y;
+ rtsc->sm1 = isc->sm1;
+ rtsc->ism1 = isc->ism1;
+ rtsc->dx = isc->dx;
+ rtsc->dy = isc->dy;
+ rtsc->sm2 = isc->sm2;
+ rtsc->ism2 = isc->ism2;
+}
+
+/*
+ * calculate the y-projection of the runtime service curve by the
+ * given x-projection value
+ */
+static u_int64_t
+rtsc_y2x(struct runtime_sc *rtsc, u_int64_t y)
+{
+ u_int64_t x;
+
+ if (y < rtsc->y)
+ x = rtsc->x;
+ else if (y <= rtsc->y + rtsc->dy) {
+ /* x belongs to the 1st segment */
+ if (rtsc->dy == 0)
+ x = rtsc->x + rtsc->dx;
+ else
+ x = rtsc->x + seg_y2x(y - rtsc->y, rtsc->ism1);
+ } else {
+ /* x belongs to the 2nd segment */
+ x = rtsc->x + rtsc->dx
+ + seg_y2x(y - rtsc->y - rtsc->dy, rtsc->ism2);
+ }
+ return (x);
+}
+
+static u_int64_t
+rtsc_x2y(struct runtime_sc *rtsc, u_int64_t x)
+{
+ u_int64_t y;
+
+ if (x <= rtsc->x)
+ y = rtsc->y;
+ else if (x <= rtsc->x + rtsc->dx)
+ /* y belongs to the 1st segment */
+ y = rtsc->y + seg_x2y(x - rtsc->x, rtsc->sm1);
+ else
+ /* y belongs to the 2nd segment */
+ y = rtsc->y + rtsc->dy
+ + seg_x2y(x - rtsc->x - rtsc->dx, rtsc->sm2);
+ return (y);
+}
+
+/*
+ * update the runtime service curve by taking the minimum of the current
+ * runtime service curve and the service curve starting at (x, y).
+ */
+static void
+rtsc_min(struct runtime_sc *rtsc, struct internal_sc *isc, u_int64_t x,
+ u_int64_t y)
+{
+ u_int64_t y1, y2, dx, dy;
+
+ if (isc->sm1 <= isc->sm2) {
+ /* service curve is convex */
+ y1 = rtsc_x2y(rtsc, x);
+ if (y1 < y)
+ /* the current rtsc is smaller */
+ return;
+ rtsc->x = x;
+ rtsc->y = y;
+ return;
+ }
+
+ /*
+ * service curve is concave
+ * compute the two y values of the current rtsc
+ * y1: at x
+ * y2: at (x + dx)
+ */
+ y1 = rtsc_x2y(rtsc, x);
+ if (y1 <= y) {
+ /* rtsc is below isc, no change to rtsc */
+ return;
+ }
+
+ y2 = rtsc_x2y(rtsc, x + isc->dx);
+ if (y2 >= y + isc->dy) {
+ /* rtsc is above isc, replace rtsc by isc */
+ rtsc->x = x;
+ rtsc->y = y;
+ rtsc->dx = isc->dx;
+ rtsc->dy = isc->dy;
+ return;
+ }
+
+ /*
+ * the two curves intersect
+ * compute the offsets (dx, dy) using the reverse
+ * function of seg_x2y()
+ * seg_x2y(dx, sm1) == seg_x2y(dx, sm2) + (y1 - y)
+ */
+ dx = ((y1 - y) << SM_SHIFT) / (isc->sm1 - isc->sm2);
+ /*
+ * check if (x, y1) belongs to the 1st segment of rtsc.
+ * if so, add the offset.
+ */
+ if (rtsc->x + rtsc->dx > x)
+ dx += rtsc->x + rtsc->dx - x;
+ dy = seg_x2y(dx, isc->sm1);
+
+ rtsc->x = x;
+ rtsc->y = y;
+ rtsc->dx = dx;
+ rtsc->dy = dy;
+ return;
+}
+
+static void
+get_class_stats(struct hfsc_classstats *sp, struct hfsc_class *cl)
+{
+ sp->class_id = cl->cl_id;
+ sp->class_handle = cl->cl_handle;
+
+ if (cl->cl_rsc != NULL) {
+ sp->rsc.m1 = sm2m(cl->cl_rsc->sm1);
+ sp->rsc.d = dx2d(cl->cl_rsc->dx);
+ sp->rsc.m2 = sm2m(cl->cl_rsc->sm2);
+ } else {
+ sp->rsc.m1 = 0;
+ sp->rsc.d = 0;
+ sp->rsc.m2 = 0;
+ }
+ if (cl->cl_fsc != NULL) {
+ sp->fsc.m1 = sm2m(cl->cl_fsc->sm1);
+ sp->fsc.d = dx2d(cl->cl_fsc->dx);
+ sp->fsc.m2 = sm2m(cl->cl_fsc->sm2);
+ } else {
+ sp->fsc.m1 = 0;
+ sp->fsc.d = 0;
+ sp->fsc.m2 = 0;
+ }
+ if (cl->cl_usc != NULL) {
+ sp->usc.m1 = sm2m(cl->cl_usc->sm1);
+ sp->usc.d = dx2d(cl->cl_usc->dx);
+ sp->usc.m2 = sm2m(cl->cl_usc->sm2);
+ } else {
+ sp->usc.m1 = 0;
+ sp->usc.d = 0;
+ sp->usc.m2 = 0;
+ }
+
+ sp->total = cl->cl_total;
+ sp->cumul = cl->cl_cumul;
+
+ sp->d = cl->cl_d;
+ sp->e = cl->cl_e;
+ sp->vt = cl->cl_vt;
+ sp->f = cl->cl_f;
+
+ sp->initvt = cl->cl_initvt;
+ sp->vtperiod = cl->cl_vtperiod;
+ sp->parentperiod = cl->cl_parentperiod;
+ sp->nactive = cl->cl_nactive;
+ sp->vtoff = cl->cl_vtoff;
+ sp->cvtmax = cl->cl_cvtmax;
+ sp->myf = cl->cl_myf;
+ sp->cfmin = cl->cl_cfmin;
+ sp->cvtmin = cl->cl_cvtmin;
+ sp->myfadj = cl->cl_myfadj;
+ sp->vtadj = cl->cl_vtadj;
+
+ sp->cur_time = read_machclk();
+ sp->machclk_freq = machclk_freq;
+
+ sp->qlength = qlen(cl->cl_q);
+ sp->qlimit = qlimit(cl->cl_q);
+ sp->xmit_cnt = cl->cl_stats.xmit_cnt;
+ sp->drop_cnt = cl->cl_stats.drop_cnt;
+ sp->period = cl->cl_stats.period;
+
+ sp->qtype = qtype(cl->cl_q);
+#ifdef ALTQ_RED
+ if (q_is_red(cl->cl_q))
+ red_getstats(cl->cl_red, &sp->red[0]);
+#endif
+#ifdef ALTQ_RIO
+ if (q_is_rio(cl->cl_q))
+ rio_getstats((rio_t *)cl->cl_red, &sp->red[0]);
+#endif
+}
+
+/* convert a class handle to the corresponding class pointer */
+static struct hfsc_class *
+clh_to_clp(struct hfsc_if *hif, u_int32_t chandle)
+{
+ int i;
+ struct hfsc_class *cl;
+
+ if (chandle == 0)
+ return (NULL);
+ /*
+ * first, try optimistically the slot matching the lower bits of
+ * the handle. if it fails, do the linear table search.
+ */
+ i = chandle % HFSC_MAX_CLASSES;
+ if ((cl = hif->hif_class_tbl[i]) != NULL && cl->cl_handle == chandle)
+ return (cl);
+ for (i = 0; i < HFSC_MAX_CLASSES; i++)
+ if ((cl = hif->hif_class_tbl[i]) != NULL &&
+ cl->cl_handle == chandle)
+ return (cl);
+ return (NULL);
+}
+
+#ifdef ALTQ3_COMPAT
+static struct hfsc_if *
+hfsc_attach(ifq, bandwidth)
+ struct ifaltq *ifq;
+ u_int bandwidth;
+{
+ struct hfsc_if *hif;
+
+ hif = malloc(sizeof(struct hfsc_if), M_DEVBUF, M_WAITOK);
+ if (hif == NULL)
+ return (NULL);
+ bzero(hif, sizeof(struct hfsc_if));
+
+ hif->hif_eligible = ellist_alloc();
+ if (hif->hif_eligible == NULL) {
+ free(hif, M_DEVBUF);
+ return NULL;
+ }
+
+ hif->hif_ifq = ifq;
+
+ /* add this state to the hfsc list */
+ hif->hif_next = hif_list;
+ hif_list = hif;
+
+ return (hif);
+}
+
+static int
+hfsc_detach(hif)
+ struct hfsc_if *hif;
+{
+ (void)hfsc_clear_interface(hif);
+ (void)hfsc_class_destroy(hif->hif_rootclass);
+
+ /* remove this interface from the hif list */
+ if (hif_list == hif)
+ hif_list = hif->hif_next;
+ else {
+ struct hfsc_if *h;
+
+ for (h = hif_list; h != NULL; h = h->hif_next)
+ if (h->hif_next == hif) {
+ h->hif_next = hif->hif_next;
+ break;
+ }
+ ASSERT(h != NULL);
+ }
+
+ ellist_destroy(hif->hif_eligible);
+
+ free(hif, M_DEVBUF);
+
+ return (0);
+}
+
+static int
+hfsc_class_modify(cl, rsc, fsc, usc)
+ struct hfsc_class *cl;
+ struct service_curve *rsc, *fsc, *usc;
+{
+ struct internal_sc *rsc_tmp, *fsc_tmp, *usc_tmp;
+ u_int64_t cur_time;
+ int s;
+
+ rsc_tmp = fsc_tmp = usc_tmp = NULL;
+ if (rsc != NULL && (rsc->m1 != 0 || rsc->m2 != 0) &&
+ cl->cl_rsc == NULL) {
+ rsc_tmp = malloc(sizeof(struct internal_sc),
+ M_DEVBUF, M_WAITOK);
+ if (rsc_tmp == NULL)
+ return (ENOMEM);
+ }
+ if (fsc != NULL && (fsc->m1 != 0 || fsc->m2 != 0) &&
+ cl->cl_fsc == NULL) {
+ fsc_tmp = malloc(sizeof(struct internal_sc),
+ M_DEVBUF, M_WAITOK);
+ if (fsc_tmp == NULL) {
+ free(rsc_tmp);
+ return (ENOMEM);
+ }
+ }
+ if (usc != NULL && (usc->m1 != 0 || usc->m2 != 0) &&
+ cl->cl_usc == NULL) {
+ usc_tmp = malloc(sizeof(struct internal_sc),
+ M_DEVBUF, M_WAITOK);
+ if (usc_tmp == NULL) {
+ free(rsc_tmp);
+ free(fsc_tmp);
+ return (ENOMEM);
+ }
+ }
+
+ cur_time = read_machclk();
+#ifdef __NetBSD__
+ s = splnet();
+#else
+ s = splimp();
+#endif
+ IFQ_LOCK(cl->cl_hif->hif_ifq);
+
+ if (rsc != NULL) {
+ if (rsc->m1 == 0 && rsc->m2 == 0) {
+ if (cl->cl_rsc != NULL) {
+ if (!qempty(cl->cl_q))
+ hfsc_purgeq(cl);
+ free(cl->cl_rsc, M_DEVBUF);
+ cl->cl_rsc = NULL;
+ }
+ } else {
+ if (cl->cl_rsc == NULL)
+ cl->cl_rsc = rsc_tmp;
+ sc2isc(rsc, cl->cl_rsc);
+ rtsc_init(&cl->cl_deadline, cl->cl_rsc, cur_time,
+ cl->cl_cumul);
+ cl->cl_eligible = cl->cl_deadline;
+ if (cl->cl_rsc->sm1 <= cl->cl_rsc->sm2) {
+ cl->cl_eligible.dx = 0;
+ cl->cl_eligible.dy = 0;
+ }
+ }
+ }
+
+ if (fsc != NULL) {
+ if (fsc->m1 == 0 && fsc->m2 == 0) {
+ if (cl->cl_fsc != NULL) {
+ if (!qempty(cl->cl_q))
+ hfsc_purgeq(cl);
+ free(cl->cl_fsc, M_DEVBUF);
+ cl->cl_fsc = NULL;
+ }
+ } else {
+ if (cl->cl_fsc == NULL)
+ cl->cl_fsc = fsc_tmp;
+ sc2isc(fsc, cl->cl_fsc);
+ rtsc_init(&cl->cl_virtual, cl->cl_fsc, cl->cl_vt,
+ cl->cl_total);
+ }
+ }
+
+ if (usc != NULL) {
+ if (usc->m1 == 0 && usc->m2 == 0) {
+ if (cl->cl_usc != NULL) {
+ free(cl->cl_usc, M_DEVBUF);
+ cl->cl_usc = NULL;
+ cl->cl_myf = 0;
+ }
+ } else {
+ if (cl->cl_usc == NULL)
+ cl->cl_usc = usc_tmp;
+ sc2isc(usc, cl->cl_usc);
+ rtsc_init(&cl->cl_ulimit, cl->cl_usc, cur_time,
+ cl->cl_total);
+ }
+ }
+
+ if (!qempty(cl->cl_q)) {
+ if (cl->cl_rsc != NULL)
+ update_ed(cl, m_pktlen(qhead(cl->cl_q)));
+ if (cl->cl_fsc != NULL)
+ update_vf(cl, 0, cur_time);
+ /* is this enough? */
+ }
+
+ IFQ_UNLOCK(cl->cl_hif->hif_ifq);
+ splx(s);
+
+ return (0);
+}
+
+/*
+ * hfsc device interface
+ */
+int
+hfscopen(dev, flag, fmt, p)
+ dev_t dev;
+ int flag, fmt;
+#if (__FreeBSD_version > 500000)
+ struct thread *p;
+#else
+ struct proc *p;
+#endif
+{
+ if (machclk_freq == 0)
+ init_machclk();
+
+ if (machclk_freq == 0) {
+ printf("hfsc: no cpu clock available!\n");
+ return (ENXIO);
+ }
+
+ /* everything will be done when the queueing scheme is attached. */
+ return 0;
+}
+
+int
+hfscclose(dev, flag, fmt, p)
+ dev_t dev;
+ int flag, fmt;
+#if (__FreeBSD_version > 500000)
+ struct thread *p;
+#else
+ struct proc *p;
+#endif
+{
+ struct hfsc_if *hif;
+ int err, error = 0;
+
+ while ((hif = hif_list) != NULL) {
+ /* destroy all */
+ if (ALTQ_IS_ENABLED(hif->hif_ifq))
+ altq_disable(hif->hif_ifq);
+
+ err = altq_detach(hif->hif_ifq);
+ if (err == 0)
+ err = hfsc_detach(hif);
+ if (err != 0 && error == 0)
+ error = err;
+ }
+
+ return error;
+}
+
+int
+hfscioctl(dev, cmd, addr, flag, p)
+ dev_t dev;
+ ioctlcmd_t cmd;
+ caddr_t addr;
+ int flag;
+#if (__FreeBSD_version > 500000)
+ struct thread *p;
+#else
+ struct proc *p;
+#endif
+{
+ struct hfsc_if *hif;
+ struct hfsc_interface *ifacep;
+ int error = 0;
+
+ /* check super-user privilege */
+ switch (cmd) {
+ case HFSC_GETSTATS:
+ break;
+ default:
+#if (__FreeBSD_version > 700000)
+ if ((error = priv_check(p, PRIV_ALTQ_MANAGE)) != 0)
+ return (error);
+#elsif (__FreeBSD_version > 400000)
+ if ((error = suser(p)) != 0)
+ return (error);
+#else
+ if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
+ return (error);
+#endif
+ break;
+ }
+
+ switch (cmd) {
+
+ case HFSC_IF_ATTACH:
+ error = hfsccmd_if_attach((struct hfsc_attach *)addr);
+ break;
+
+ case HFSC_IF_DETACH:
+ error = hfsccmd_if_detach((struct hfsc_interface *)addr);
+ break;
+
+ case HFSC_ENABLE:
+ case HFSC_DISABLE:
+ case HFSC_CLEAR_HIERARCHY:
+ ifacep = (struct hfsc_interface *)addr;
+ if ((hif = altq_lookup(ifacep->hfsc_ifname,
+ ALTQT_HFSC)) == NULL) {
+ error = EBADF;
+ break;
+ }
+
+ switch (cmd) {
+
+ case HFSC_ENABLE:
+ if (hif->hif_defaultclass == NULL) {
+#ifdef ALTQ_DEBUG
+ printf("hfsc: no default class\n");
+#endif
+ error = EINVAL;
+ break;
+ }
+ error = altq_enable(hif->hif_ifq);
+ break;
+
+ case HFSC_DISABLE:
+ error = altq_disable(hif->hif_ifq);
+ break;
+
+ case HFSC_CLEAR_HIERARCHY:
+ hfsc_clear_interface(hif);
+ break;
+ }
+ break;
+
+ case HFSC_ADD_CLASS:
+ error = hfsccmd_add_class((struct hfsc_add_class *)addr);
+ break;
+
+ case HFSC_DEL_CLASS:
+ error = hfsccmd_delete_class((struct hfsc_delete_class *)addr);
+ break;
+
+ case HFSC_MOD_CLASS:
+ error = hfsccmd_modify_class((struct hfsc_modify_class *)addr);
+ break;
+
+ case HFSC_ADD_FILTER:
+ error = hfsccmd_add_filter((struct hfsc_add_filter *)addr);
+ break;
+
+ case HFSC_DEL_FILTER:
+ error = hfsccmd_delete_filter((struct hfsc_delete_filter *)addr);
+ break;
+
+ case HFSC_GETSTATS:
+ error = hfsccmd_class_stats((struct hfsc_class_stats *)addr);
+ break;
+
+ default:
+ error = EINVAL;
+ break;
+ }
+ return error;
+}
+
+static int
+hfsccmd_if_attach(ap)
+ struct hfsc_attach *ap;
+{
+ struct hfsc_if *hif;
+ struct ifnet *ifp;
+ int error;
+
+ if ((ifp = ifunit(ap->iface.hfsc_ifname)) == NULL)
+ return (ENXIO);
+
+ if ((hif = hfsc_attach(&ifp->if_snd, ap->bandwidth)) == NULL)
+ return (ENOMEM);
+
+ /*
+ * set HFSC to this ifnet structure.
+ */
+ if ((error = altq_attach(&ifp->if_snd, ALTQT_HFSC, hif,
+ hfsc_enqueue, hfsc_dequeue, hfsc_request,
+ &hif->hif_classifier, acc_classify)) != 0)
+ (void)hfsc_detach(hif);
+
+ return (error);
+}
+
+static int
+hfsccmd_if_detach(ap)
+ struct hfsc_interface *ap;
+{
+ struct hfsc_if *hif;
+ int error;
+
+ if ((hif = altq_lookup(ap->hfsc_ifname, ALTQT_HFSC)) == NULL)
+ return (EBADF);
+
+ if (ALTQ_IS_ENABLED(hif->hif_ifq))
+ altq_disable(hif->hif_ifq);
+
+ if ((error = altq_detach(hif->hif_ifq)))
+ return (error);
+
+ return hfsc_detach(hif);
+}
+
+static int
+hfsccmd_add_class(ap)
+ struct hfsc_add_class *ap;
+{
+ struct hfsc_if *hif;
+ struct hfsc_class *cl, *parent;
+ int i;
+
+ if ((hif = altq_lookup(ap->iface.hfsc_ifname, ALTQT_HFSC)) == NULL)
+ return (EBADF);
+
+ if (ap->parent_handle == HFSC_NULLCLASS_HANDLE &&
+ hif->hif_rootclass == NULL)
+ parent = NULL;
+ else if ((parent = clh_to_clp(hif, ap->parent_handle)) == NULL)
+ return (EINVAL);
+
+ /* assign a class handle (use a free slot number for now) */
+ for (i = 1; i < HFSC_MAX_CLASSES; i++)
+ if (hif->hif_class_tbl[i] == NULL)
+ break;
+ if (i == HFSC_MAX_CLASSES)
+ return (EBUSY);
+
+ if ((cl = hfsc_class_create(hif, &ap->service_curve, NULL, NULL,
+ parent, ap->qlimit, ap->flags, i)) == NULL)
+ return (ENOMEM);
+
+ /* return a class handle to the user */
+ ap->class_handle = i;
+
+ return (0);
+}
+
+static int
+hfsccmd_delete_class(ap)
+ struct hfsc_delete_class *ap;
+{
+ struct hfsc_if *hif;
+ struct hfsc_class *cl;
+
+ if ((hif = altq_lookup(ap->iface.hfsc_ifname, ALTQT_HFSC)) == NULL)
+ return (EBADF);
+
+ if ((cl = clh_to_clp(hif, ap->class_handle)) == NULL)
+ return (EINVAL);
+
+ return hfsc_class_destroy(cl);
+}
+
+static int
+hfsccmd_modify_class(ap)
+ struct hfsc_modify_class *ap;
+{
+ struct hfsc_if *hif;
+ struct hfsc_class *cl;
+ struct service_curve *rsc = NULL;
+ struct service_curve *fsc = NULL;
+ struct service_curve *usc = NULL;
+
+ if ((hif = altq_lookup(ap->iface.hfsc_ifname, ALTQT_HFSC)) == NULL)
+ return (EBADF);
+
+ if ((cl = clh_to_clp(hif, ap->class_handle)) == NULL)
+ return (EINVAL);
+
+ if (ap->sctype & HFSC_REALTIMESC)
+ rsc = &ap->service_curve;
+ if (ap->sctype & HFSC_LINKSHARINGSC)
+ fsc = &ap->service_curve;
+ if (ap->sctype & HFSC_UPPERLIMITSC)
+ usc = &ap->service_curve;
+
+ return hfsc_class_modify(cl, rsc, fsc, usc);
+}
+
+static int
+hfsccmd_add_filter(ap)
+ struct hfsc_add_filter *ap;
+{
+ struct hfsc_if *hif;
+ struct hfsc_class *cl;
+
+ if ((hif = altq_lookup(ap->iface.hfsc_ifname, ALTQT_HFSC)) == NULL)
+ return (EBADF);
+
+ if ((cl = clh_to_clp(hif, ap->class_handle)) == NULL)
+ return (EINVAL);
+
+ if (is_a_parent_class(cl)) {
+#ifdef ALTQ_DEBUG
+ printf("hfsccmd_add_filter: not a leaf class!\n");
+#endif
+ return (EINVAL);
+ }
+
+ return acc_add_filter(&hif->hif_classifier, &ap->filter,
+ cl, &ap->filter_handle);
+}
+
+static int
+hfsccmd_delete_filter(ap)
+ struct hfsc_delete_filter *ap;
+{
+ struct hfsc_if *hif;
+
+ if ((hif = altq_lookup(ap->iface.hfsc_ifname, ALTQT_HFSC)) == NULL)
+ return (EBADF);
+
+ return acc_delete_filter(&hif->hif_classifier,
+ ap->filter_handle);
+}
+
+static int
+hfsccmd_class_stats(ap)
+ struct hfsc_class_stats *ap;
+{
+ struct hfsc_if *hif;
+ struct hfsc_class *cl;
+ struct hfsc_classstats stats, *usp;
+ int n, nclasses, error;
+
+ if ((hif = altq_lookup(ap->iface.hfsc_ifname, ALTQT_HFSC)) == NULL)
+ return (EBADF);
+
+ ap->cur_time = read_machclk();
+ ap->machclk_freq = machclk_freq;
+ ap->hif_classes = hif->hif_classes;
+ ap->hif_packets = hif->hif_packets;
+
+ /* skip the first N classes in the tree */
+ nclasses = ap->nskip;
+ for (cl = hif->hif_rootclass, n = 0; cl != NULL && n < nclasses;
+ cl = hfsc_nextclass(cl), n++)
+ ;
+ if (n != nclasses)
+ return (EINVAL);
+
+ /* then, read the next N classes in the tree */
+ nclasses = ap->nclasses;
+ usp = ap->stats;
+ for (n = 0; cl != NULL && n < nclasses; cl = hfsc_nextclass(cl), n++) {
+
+ get_class_stats(&stats, cl);
+
+ if ((error = copyout((caddr_t)&stats, (caddr_t)usp++,
+ sizeof(stats))) != 0)
+ return (error);
+ }
+
+ ap->nclasses = n;
+
+ return (0);
+}
+
+#ifdef KLD_MODULE
+
+static struct altqsw hfsc_sw =
+ {"hfsc", hfscopen, hfscclose, hfscioctl};
+
+ALTQ_MODULE(altq_hfsc, ALTQT_HFSC, &hfsc_sw);
+MODULE_DEPEND(altq_hfsc, altq_red, 1, 1, 1);
+MODULE_DEPEND(altq_hfsc, altq_rio, 1, 1, 1);
+
+#endif /* KLD_MODULE */
+#endif /* ALTQ3_COMPAT */
+
+#endif /* ALTQ_HFSC */
diff --git a/sys/net/altq/altq_hfsc.h b/sys/net/altq/altq_hfsc.h
new file mode 100644
index 0000000..8101428
--- /dev/null
+++ b/sys/net/altq/altq_hfsc.h
@@ -0,0 +1,311 @@
+/*-
+ * Copyright (c) 1997-1999 Carnegie Mellon University. All Rights Reserved.
+ *
+ * Permission to use, copy, modify, and distribute this software and
+ * its documentation is hereby granted (including for commercial or
+ * for-profit use), provided that both the copyright notice and this
+ * permission notice appear in all copies of the software, derivative
+ * works, or modified versions, and any portions thereof.
+ *
+ * THIS SOFTWARE IS EXPERIMENTAL AND IS KNOWN TO HAVE BUGS, SOME OF
+ * WHICH MAY HAVE SERIOUS CONSEQUENCES. CARNEGIE MELLON PROVIDES THIS
+ * SOFTWARE IN ITS ``AS IS'' CONDITION, AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+ * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * Carnegie Mellon encourages (but does not require) users of this
+ * software to return any improvements or extensions that they make,
+ * and to grant Carnegie Mellon the rights to redistribute these
+ * changes without encumbrance.
+ *
+ * $KAME: altq_hfsc.h,v 1.12 2003/12/05 05:40:46 kjc Exp $
+ * $FreeBSD$
+ */
+#ifndef _ALTQ_ALTQ_HFSC_H_
+#define _ALTQ_ALTQ_HFSC_H_
+
+#include <net/altq/altq.h>
+#include <net/altq/altq_classq.h>
+#include <net/altq/altq_red.h>
+#include <net/altq/altq_rio.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct service_curve {
+ u_int m1; /* slope of the first segment in bits/sec */
+ u_int d; /* the x-projection of the first segment in msec */
+ u_int m2; /* slope of the second segment in bits/sec */
+};
+
+/* special class handles */
+#define HFSC_NULLCLASS_HANDLE 0
+#define HFSC_MAX_CLASSES 64
+
+/* hfsc class flags */
+#define HFCF_RED 0x0001 /* use RED */
+#define HFCF_ECN 0x0002 /* use RED/ECN */
+#define HFCF_RIO 0x0004 /* use RIO */
+#define HFCF_CLEARDSCP 0x0010 /* clear diffserv codepoint */
+#define HFCF_DEFAULTCLASS 0x1000 /* default class */
+
+/* service curve types */
+#define HFSC_REALTIMESC 1
+#define HFSC_LINKSHARINGSC 2
+#define HFSC_UPPERLIMITSC 4
+#define HFSC_DEFAULTSC (HFSC_REALTIMESC|HFSC_LINKSHARINGSC)
+
+struct hfsc_classstats {
+ u_int class_id;
+ u_int32_t class_handle;
+ struct service_curve rsc;
+ struct service_curve fsc;
+ struct service_curve usc; /* upper limit service curve */
+
+ u_int64_t total; /* total work in bytes */
+ u_int64_t cumul; /* cumulative work in bytes
+ done by real-time criteria */
+ u_int64_t d; /* deadline */
+ u_int64_t e; /* eligible time */
+ u_int64_t vt; /* virtual time */
+ u_int64_t f; /* fit time for upper-limit */
+
+ /* info helpful for debugging */
+ u_int64_t initvt; /* init virtual time */
+ u_int64_t vtoff; /* cl_vt_ipoff */
+ u_int64_t cvtmax; /* cl_maxvt */
+ u_int64_t myf; /* cl_myf */
+ u_int64_t cfmin; /* cl_mincf */
+ u_int64_t cvtmin; /* cl_mincvt */
+ u_int64_t myfadj; /* cl_myfadj */
+ u_int64_t vtadj; /* cl_vtadj */
+ u_int64_t cur_time;
+ u_int32_t machclk_freq;
+
+ u_int qlength;
+ u_int qlimit;
+ struct pktcntr xmit_cnt;
+ struct pktcntr drop_cnt;
+ u_int period;
+
+ u_int vtperiod; /* vt period sequence no */
+ u_int parentperiod; /* parent's vt period seqno */
+ int nactive; /* number of active children */
+
+ /* red and rio related info */
+ int qtype;
+ struct redstats red[3];
+};
+
+#ifdef ALTQ3_COMPAT
+struct hfsc_interface {
+ char hfsc_ifname[IFNAMSIZ]; /* interface name (e.g., fxp0) */
+};
+
+struct hfsc_attach {
+ struct hfsc_interface iface;
+ u_int bandwidth; /* link bandwidth in bits/sec */
+};
+
+struct hfsc_add_class {
+ struct hfsc_interface iface;
+ u_int32_t parent_handle;
+ struct service_curve service_curve;
+ int qlimit;
+ int flags;
+
+ u_int32_t class_handle; /* return value */
+};
+
+struct hfsc_delete_class {
+ struct hfsc_interface iface;
+ u_int32_t class_handle;
+};
+
+struct hfsc_modify_class {
+ struct hfsc_interface iface;
+ u_int32_t class_handle;
+ struct service_curve service_curve;
+ int sctype;
+};
+
+struct hfsc_add_filter {
+ struct hfsc_interface iface;
+ u_int32_t class_handle;
+ struct flow_filter filter;
+
+ u_long filter_handle; /* return value */
+};
+
+struct hfsc_delete_filter {
+ struct hfsc_interface iface;
+ u_long filter_handle;
+};
+
+struct hfsc_class_stats {
+ struct hfsc_interface iface;
+ int nskip; /* skip # of classes */
+ int nclasses; /* # of class stats (WR) */
+ u_int64_t cur_time; /* current time */
+ u_int32_t machclk_freq; /* machine clock frequency */
+ u_int hif_classes; /* # of classes in the tree */
+ u_int hif_packets; /* # of packets in the tree */
+ struct hfsc_classstats *stats; /* pointer to stats array */
+};
+
+#define HFSC_IF_ATTACH _IOW('Q', 1, struct hfsc_attach)
+#define HFSC_IF_DETACH _IOW('Q', 2, struct hfsc_interface)
+#define HFSC_ENABLE _IOW('Q', 3, struct hfsc_interface)
+#define HFSC_DISABLE _IOW('Q', 4, struct hfsc_interface)
+#define HFSC_CLEAR_HIERARCHY _IOW('Q', 5, struct hfsc_interface)
+#define HFSC_ADD_CLASS _IOWR('Q', 7, struct hfsc_add_class)
+#define HFSC_DEL_CLASS _IOW('Q', 8, struct hfsc_delete_class)
+#define HFSC_MOD_CLASS _IOW('Q', 9, struct hfsc_modify_class)
+#define HFSC_ADD_FILTER _IOWR('Q', 10, struct hfsc_add_filter)
+#define HFSC_DEL_FILTER _IOW('Q', 11, struct hfsc_delete_filter)
+#define HFSC_GETSTATS _IOWR('Q', 12, struct hfsc_class_stats)
+#endif /* ALTQ3_COMPAT */
+
+#ifdef _KERNEL
+/*
+ * kernel internal service curve representation
+ * coordinates are given by 64 bit unsigned integers.
+ * x-axis: unit is clock count. for the intel x86 architecture,
+ * the raw Pentium TSC (Timestamp Counter) value is used.
+ * virtual time is also calculated in this time scale.
+ * y-axis: unit is byte.
+ *
+ * the service curve parameters are converted to the internal
+ * representation.
+ * the slope values are scaled to avoid overflow.
+ * the inverse slope values as well as the y-projection of the 1st
+ * segment are kept in order to to avoid 64-bit divide operations
+ * that are expensive on 32-bit architectures.
+ *
+ * note: Intel Pentium TSC never wraps around in several thousands of years.
+ * x-axis doesn't wrap around for 1089 years with 1GHz clock.
+ * y-axis doesn't wrap around for 4358 years with 1Gbps bandwidth.
+ */
+
+/* kernel internal representation of a service curve */
+struct internal_sc {
+ u_int64_t sm1; /* scaled slope of the 1st segment */
+ u_int64_t ism1; /* scaled inverse-slope of the 1st segment */
+ u_int64_t dx; /* the x-projection of the 1st segment */
+ u_int64_t dy; /* the y-projection of the 1st segment */
+ u_int64_t sm2; /* scaled slope of the 2nd segment */
+ u_int64_t ism2; /* scaled inverse-slope of the 2nd segment */
+};
+
+/* runtime service curve */
+struct runtime_sc {
+ u_int64_t x; /* current starting position on x-axis */
+ u_int64_t y; /* current starting position on x-axis */
+ u_int64_t sm1; /* scaled slope of the 1st segment */
+ u_int64_t ism1; /* scaled inverse-slope of the 1st segment */
+ u_int64_t dx; /* the x-projection of the 1st segment */
+ u_int64_t dy; /* the y-projection of the 1st segment */
+ u_int64_t sm2; /* scaled slope of the 2nd segment */
+ u_int64_t ism2; /* scaled inverse-slope of the 2nd segment */
+};
+
+struct hfsc_class {
+ u_int cl_id; /* class id (just for debug) */
+ u_int32_t cl_handle; /* class handle */
+ struct hfsc_if *cl_hif; /* back pointer to struct hfsc_if */
+ int cl_flags; /* misc flags */
+
+ struct hfsc_class *cl_parent; /* parent class */
+ struct hfsc_class *cl_siblings; /* sibling classes */
+ struct hfsc_class *cl_children; /* child classes */
+
+ class_queue_t *cl_q; /* class queue structure */
+ struct red *cl_red; /* RED state */
+ struct altq_pktattr *cl_pktattr; /* saved header used by ECN */
+
+ u_int64_t cl_total; /* total work in bytes */
+ u_int64_t cl_cumul; /* cumulative work in bytes
+ done by real-time criteria */
+ u_int64_t cl_d; /* deadline */
+ u_int64_t cl_e; /* eligible time */
+ u_int64_t cl_vt; /* virtual time */
+ u_int64_t cl_f; /* time when this class will fit for
+ link-sharing, max(myf, cfmin) */
+ u_int64_t cl_myf; /* my fit-time (as calculated from this
+ class's own upperlimit curve) */
+ u_int64_t cl_myfadj; /* my fit-time adjustment
+ (to cancel history dependence) */
+ u_int64_t cl_cfmin; /* earliest children's fit-time (used
+ with cl_myf to obtain cl_f) */
+ u_int64_t cl_cvtmin; /* minimal virtual time among the
+ children fit for link-sharing
+ (monotonic within a period) */
+ u_int64_t cl_vtadj; /* intra-period cumulative vt
+ adjustment */
+ u_int64_t cl_vtoff; /* inter-period cumulative vt offset */
+ u_int64_t cl_cvtmax; /* max child's vt in the last period */
+
+ u_int64_t cl_initvt; /* init virtual time (for debugging) */
+
+ struct internal_sc *cl_rsc; /* internal real-time service curve */
+ struct internal_sc *cl_fsc; /* internal fair service curve */
+ struct internal_sc *cl_usc; /* internal upperlimit service curve */
+ struct runtime_sc cl_deadline; /* deadline curve */
+ struct runtime_sc cl_eligible; /* eligible curve */
+ struct runtime_sc cl_virtual; /* virtual curve */
+ struct runtime_sc cl_ulimit; /* upperlimit curve */
+
+ u_int cl_vtperiod; /* vt period sequence no */
+ u_int cl_parentperiod; /* parent's vt period seqno */
+ int cl_nactive; /* number of active children */
+
+ TAILQ_HEAD(acthead, hfsc_class) cl_actc; /* active children list */
+ TAILQ_ENTRY(hfsc_class) cl_actlist; /* active children list entry */
+ TAILQ_ENTRY(hfsc_class) cl_ellist; /* eligible list entry */
+
+ struct {
+ struct pktcntr xmit_cnt;
+ struct pktcntr drop_cnt;
+ u_int period;
+ } cl_stats;
+};
+
+/*
+ * hfsc interface state
+ */
+struct hfsc_if {
+ struct hfsc_if *hif_next; /* interface state list */
+ struct ifaltq *hif_ifq; /* backpointer to ifaltq */
+ struct hfsc_class *hif_rootclass; /* root class */
+ struct hfsc_class *hif_defaultclass; /* default class */
+ struct hfsc_class *hif_class_tbl[HFSC_MAX_CLASSES];
+ struct hfsc_class *hif_pollcache; /* cache for poll operation */
+
+ u_int hif_classes; /* # of classes in the tree */
+ u_int hif_packets; /* # of packets in the tree */
+ u_int hif_classid; /* class id sequence number */
+
+ TAILQ_HEAD(elighead, hfsc_class) hif_eligible; /* eligible list */
+
+#ifdef ALTQ3_CLFIER_COMPAT
+ struct acc_classifier hif_classifier;
+#endif
+};
+
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ALTQ_ALTQ_HFSC_H_ */
diff --git a/sys/net/altq/altq_priq.c b/sys/net/altq/altq_priq.c
new file mode 100644
index 0000000..be62373
--- /dev/null
+++ b/sys/net/altq/altq_priq.c
@@ -0,0 +1,1046 @@
+/* $FreeBSD$ */
+/* $KAME: altq_priq.c,v 1.11 2003/09/17 14:23:25 kjc Exp $ */
+/*
+ * Copyright (C) 2000-2003
+ * Sony Computer Science Laboratories Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+/*
+ * priority queue
+ */
+
+#if defined(__FreeBSD__) || defined(__NetBSD__)
+#include "opt_altq.h"
+#include "opt_inet.h"
+#ifdef __FreeBSD__
+#include "opt_inet6.h"
+#endif
+#endif /* __FreeBSD__ || __NetBSD__ */
+
+#ifdef ALTQ_PRIQ /* priq is enabled by ALTQ_PRIQ option in opt_altq.h */
+
+#include <sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/errno.h>
+#include <sys/kernel.h>
+#include <sys/queue.h>
+
+#include <net/if.h>
+#include <net/if_var.h>
+#include <netinet/in.h>
+
+#include <netpfil/pf/pf.h>
+#include <netpfil/pf/pf_altq.h>
+#include <netpfil/pf/pf_mtag.h>
+#include <net/altq/altq.h>
+#ifdef ALTQ3_COMPAT
+#include <net/altq/altq_conf.h>
+#endif
+#include <net/altq/altq_priq.h>
+
+/*
+ * function prototypes
+ */
+#ifdef ALTQ3_COMPAT
+static struct priq_if *priq_attach(struct ifaltq *, u_int);
+static int priq_detach(struct priq_if *);
+#endif
+static int priq_clear_interface(struct priq_if *);
+static int priq_request(struct ifaltq *, int, void *);
+static void priq_purge(struct priq_if *);
+static struct priq_class *priq_class_create(struct priq_if *, int, int, int,
+ int);
+static int priq_class_destroy(struct priq_class *);
+static int priq_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
+static struct mbuf *priq_dequeue(struct ifaltq *, int);
+
+static int priq_addq(struct priq_class *, struct mbuf *);
+static struct mbuf *priq_getq(struct priq_class *);
+static struct mbuf *priq_pollq(struct priq_class *);
+static void priq_purgeq(struct priq_class *);
+
+#ifdef ALTQ3_COMPAT
+static int priqcmd_if_attach(struct priq_interface *);
+static int priqcmd_if_detach(struct priq_interface *);
+static int priqcmd_add_class(struct priq_add_class *);
+static int priqcmd_delete_class(struct priq_delete_class *);
+static int priqcmd_modify_class(struct priq_modify_class *);
+static int priqcmd_add_filter(struct priq_add_filter *);
+static int priqcmd_delete_filter(struct priq_delete_filter *);
+static int priqcmd_class_stats(struct priq_class_stats *);
+#endif /* ALTQ3_COMPAT */
+
+static void get_class_stats(struct priq_classstats *, struct priq_class *);
+static struct priq_class *clh_to_clp(struct priq_if *, u_int32_t);
+
+#ifdef ALTQ3_COMPAT
+altqdev_decl(priq);
+
+/* pif_list keeps all priq_if's allocated. */
+static struct priq_if *pif_list = NULL;
+#endif /* ALTQ3_COMPAT */
+
+int
+priq_pfattach(struct pf_altq *a)
+{
+ struct ifnet *ifp;
+ int s, error;
+
+ if ((ifp = ifunit(a->ifname)) == NULL || a->altq_disc == NULL)
+ return (EINVAL);
+#ifdef __NetBSD__
+ s = splnet();
+#else
+ s = splimp();
+#endif
+ error = altq_attach(&ifp->if_snd, ALTQT_PRIQ, a->altq_disc,
+ priq_enqueue, priq_dequeue, priq_request, NULL, NULL);
+ splx(s);
+ return (error);
+}
+
+int
+priq_add_altq(struct pf_altq *a)
+{
+ struct priq_if *pif;
+ struct ifnet *ifp;
+
+ if ((ifp = ifunit(a->ifname)) == NULL)
+ return (EINVAL);
+ if (!ALTQ_IS_READY(&ifp->if_snd))
+ return (ENODEV);
+
+ pif = malloc(sizeof(struct priq_if), M_DEVBUF, M_NOWAIT | M_ZERO);
+ if (pif == NULL)
+ return (ENOMEM);
+ pif->pif_bandwidth = a->ifbandwidth;
+ pif->pif_maxpri = -1;
+ pif->pif_ifq = &ifp->if_snd;
+
+ /* keep the state in pf_altq */
+ a->altq_disc = pif;
+
+ return (0);
+}
+
+int
+priq_remove_altq(struct pf_altq *a)
+{
+ struct priq_if *pif;
+
+ if ((pif = a->altq_disc) == NULL)
+ return (EINVAL);
+ a->altq_disc = NULL;
+
+ (void)priq_clear_interface(pif);
+
+ free(pif, M_DEVBUF);
+ return (0);
+}
+
+int
+priq_add_queue(struct pf_altq *a)
+{
+ struct priq_if *pif;
+ struct priq_class *cl;
+
+ if ((pif = a->altq_disc) == NULL)
+ return (EINVAL);
+
+ /* check parameters */
+ if (a->priority >= PRIQ_MAXPRI)
+ return (EINVAL);
+ if (a->qid == 0)
+ return (EINVAL);
+ if (pif->pif_classes[a->priority] != NULL)
+ return (EBUSY);
+ if (clh_to_clp(pif, a->qid) != NULL)
+ return (EBUSY);
+
+ cl = priq_class_create(pif, a->priority, a->qlimit,
+ a->pq_u.priq_opts.flags, a->qid);
+ if (cl == NULL)
+ return (ENOMEM);
+
+ return (0);
+}
+
+int
+priq_remove_queue(struct pf_altq *a)
+{
+ struct priq_if *pif;
+ struct priq_class *cl;
+
+ if ((pif = a->altq_disc) == NULL)
+ return (EINVAL);
+
+ if ((cl = clh_to_clp(pif, a->qid)) == NULL)
+ return (EINVAL);
+
+ return (priq_class_destroy(cl));
+}
+
+int
+priq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes)
+{
+ struct priq_if *pif;
+ struct priq_class *cl;
+ struct priq_classstats stats;
+ int error = 0;
+
+ if ((pif = altq_lookup(a->ifname, ALTQT_PRIQ)) == NULL)
+ return (EBADF);
+
+ if ((cl = clh_to_clp(pif, a->qid)) == NULL)
+ return (EINVAL);
+
+ if (*nbytes < sizeof(stats))
+ return (EINVAL);
+
+ get_class_stats(&stats, cl);
+
+ if ((error = copyout((caddr_t)&stats, ubuf, sizeof(stats))) != 0)
+ return (error);
+ *nbytes = sizeof(stats);
+ return (0);
+}
+
+/*
+ * bring the interface back to the initial state by discarding
+ * all the filters and classes.
+ */
+static int
+priq_clear_interface(struct priq_if *pif)
+{
+ struct priq_class *cl;
+ int pri;
+
+#ifdef ALTQ3_CLFIER_COMPAT
+ /* free the filters for this interface */
+ acc_discard_filters(&pif->pif_classifier, NULL, 1);
+#endif
+
+ /* clear out the classes */
+ for (pri = 0; pri <= pif->pif_maxpri; pri++)
+ if ((cl = pif->pif_classes[pri]) != NULL)
+ priq_class_destroy(cl);
+
+ return (0);
+}
+
+static int
+priq_request(struct ifaltq *ifq, int req, void *arg)
+{
+ struct priq_if *pif = (struct priq_if *)ifq->altq_disc;
+
+ IFQ_LOCK_ASSERT(ifq);
+
+ switch (req) {
+ case ALTRQ_PURGE:
+ priq_purge(pif);
+ break;
+ }
+ return (0);
+}
+
+/* discard all the queued packets on the interface */
+static void
+priq_purge(struct priq_if *pif)
+{
+ struct priq_class *cl;
+ int pri;
+
+ for (pri = 0; pri <= pif->pif_maxpri; pri++) {
+ if ((cl = pif->pif_classes[pri]) != NULL && !qempty(cl->cl_q))
+ priq_purgeq(cl);
+ }
+ if (ALTQ_IS_ENABLED(pif->pif_ifq))
+ pif->pif_ifq->ifq_len = 0;
+}
+
+static struct priq_class *
+priq_class_create(struct priq_if *pif, int pri, int qlimit, int flags, int qid)
+{
+ struct priq_class *cl;
+ int s;
+
+#ifndef ALTQ_RED
+ if (flags & PRCF_RED) {
+#ifdef ALTQ_DEBUG
+ printf("priq_class_create: RED not configured for PRIQ!\n");
+#endif
+ return (NULL);
+ }
+#endif
+
+ if ((cl = pif->pif_classes[pri]) != NULL) {
+ /* modify the class instead of creating a new one */
+#ifdef __NetBSD__
+ s = splnet();
+#else
+ s = splimp();
+#endif
+ IFQ_LOCK(cl->cl_pif->pif_ifq);
+ if (!qempty(cl->cl_q))
+ priq_purgeq(cl);
+ IFQ_UNLOCK(cl->cl_pif->pif_ifq);
+ splx(s);
+#ifdef ALTQ_RIO
+ if (q_is_rio(cl->cl_q))
+ rio_destroy((rio_t *)cl->cl_red);
+#endif
+#ifdef ALTQ_RED
+ if (q_is_red(cl->cl_q))
+ red_destroy(cl->cl_red);
+#endif
+ } else {
+ cl = malloc(sizeof(struct priq_class), M_DEVBUF,
+ M_NOWAIT | M_ZERO);
+ if (cl == NULL)
+ return (NULL);
+
+ cl->cl_q = malloc(sizeof(class_queue_t), M_DEVBUF,
+ M_NOWAIT | M_ZERO);
+ if (cl->cl_q == NULL)
+ goto err_ret;
+ }
+
+ pif->pif_classes[pri] = cl;
+ if (flags & PRCF_DEFAULTCLASS)
+ pif->pif_default = cl;
+ if (qlimit == 0)
+ qlimit = 50; /* use default */
+ qlimit(cl->cl_q) = qlimit;
+ qtype(cl->cl_q) = Q_DROPTAIL;
+ qlen(cl->cl_q) = 0;
+ cl->cl_flags = flags;
+ cl->cl_pri = pri;
+ if (pri > pif->pif_maxpri)
+ pif->pif_maxpri = pri;
+ cl->cl_pif = pif;
+ cl->cl_handle = qid;
+
+#ifdef ALTQ_RED
+ if (flags & (PRCF_RED|PRCF_RIO)) {
+ int red_flags, red_pkttime;
+
+ red_flags = 0;
+ if (flags & PRCF_ECN)
+ red_flags |= REDF_ECN;
+#ifdef ALTQ_RIO
+ if (flags & PRCF_CLEARDSCP)
+ red_flags |= RIOF_CLEARDSCP;
+#endif
+ if (pif->pif_bandwidth < 8)
+ red_pkttime = 1000 * 1000 * 1000; /* 1 sec */
+ else
+ red_pkttime = (int64_t)pif->pif_ifq->altq_ifp->if_mtu
+ * 1000 * 1000 * 1000 / (pif->pif_bandwidth / 8);
+#ifdef ALTQ_RIO
+ if (flags & PRCF_RIO) {
+ cl->cl_red = (red_t *)rio_alloc(0, NULL,
+ red_flags, red_pkttime);
+ if (cl->cl_red == NULL)
+ goto err_ret;
+ qtype(cl->cl_q) = Q_RIO;
+ } else
+#endif
+ if (flags & PRCF_RED) {
+ cl->cl_red = red_alloc(0, 0,
+ qlimit(cl->cl_q) * 10/100,
+ qlimit(cl->cl_q) * 30/100,
+ red_flags, red_pkttime);
+ if (cl->cl_red == NULL)
+ goto err_ret;
+ qtype(cl->cl_q) = Q_RED;
+ }
+ }
+#endif /* ALTQ_RED */
+
+ return (cl);
+
+ err_ret:
+ if (cl->cl_red != NULL) {
+#ifdef ALTQ_RIO
+ if (q_is_rio(cl->cl_q))
+ rio_destroy((rio_t *)cl->cl_red);
+#endif
+#ifdef ALTQ_RED
+ if (q_is_red(cl->cl_q))
+ red_destroy(cl->cl_red);
+#endif
+ }
+ if (cl->cl_q != NULL)
+ free(cl->cl_q, M_DEVBUF);
+ free(cl, M_DEVBUF);
+ return (NULL);
+}
+
+static int
+priq_class_destroy(struct priq_class *cl)
+{
+ struct priq_if *pif;
+ int s, pri;
+
+#ifdef __NetBSD__
+ s = splnet();
+#else
+ s = splimp();
+#endif
+ IFQ_LOCK(cl->cl_pif->pif_ifq);
+
+#ifdef ALTQ3_CLFIER_COMPAT
+ /* delete filters referencing to this class */
+ acc_discard_filters(&cl->cl_pif->pif_classifier, cl, 0);
+#endif
+
+ if (!qempty(cl->cl_q))
+ priq_purgeq(cl);
+
+ pif = cl->cl_pif;
+ pif->pif_classes[cl->cl_pri] = NULL;
+ if (pif->pif_maxpri == cl->cl_pri) {
+ for (pri = cl->cl_pri; pri >= 0; pri--)
+ if (pif->pif_classes[pri] != NULL) {
+ pif->pif_maxpri = pri;
+ break;
+ }
+ if (pri < 0)
+ pif->pif_maxpri = -1;
+ }
+ IFQ_UNLOCK(cl->cl_pif->pif_ifq);
+ splx(s);
+
+ if (cl->cl_red != NULL) {
+#ifdef ALTQ_RIO
+ if (q_is_rio(cl->cl_q))
+ rio_destroy((rio_t *)cl->cl_red);
+#endif
+#ifdef ALTQ_RED
+ if (q_is_red(cl->cl_q))
+ red_destroy(cl->cl_red);
+#endif
+ }
+ free(cl->cl_q, M_DEVBUF);
+ free(cl, M_DEVBUF);
+ return (0);
+}
+
+/*
+ * priq_enqueue is an enqueue function to be registered to
+ * (*altq_enqueue) in struct ifaltq.
+ */
+static int
+priq_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr)
+{
+ struct priq_if *pif = (struct priq_if *)ifq->altq_disc;
+ struct priq_class *cl;
+ struct pf_mtag *t;
+ int len;
+
+ IFQ_LOCK_ASSERT(ifq);
+
+ /* grab class set by classifier */
+ if ((m->m_flags & M_PKTHDR) == 0) {
+ /* should not happen */
+ printf("altq: packet for %s does not have pkthdr\n",
+ ifq->altq_ifp->if_xname);
+ m_freem(m);
+ return (ENOBUFS);
+ }
+ cl = NULL;
+ if ((t = pf_find_mtag(m)) != NULL)
+ cl = clh_to_clp(pif, t->qid);
+#ifdef ALTQ3_COMPAT
+ else if ((ifq->altq_flags & ALTQF_CLASSIFY) && pktattr != NULL)
+ cl = pktattr->pattr_class;
+#endif
+ if (cl == NULL) {
+ cl = pif->pif_default;
+ if (cl == NULL) {
+ m_freem(m);
+ return (ENOBUFS);
+ }
+ }
+#ifdef ALTQ3_COMPAT
+ if (pktattr != NULL)
+ cl->cl_pktattr = pktattr; /* save proto hdr used by ECN */
+ else
+#endif
+ cl->cl_pktattr = NULL;
+ len = m_pktlen(m);
+ if (priq_addq(cl, m) != 0) {
+ /* drop occurred. mbuf was freed in priq_addq. */
+ PKTCNTR_ADD(&cl->cl_dropcnt, len);
+ return (ENOBUFS);
+ }
+ IFQ_INC_LEN(ifq);
+
+ /* successfully queued. */
+ return (0);
+}
+
+/*
+ * priq_dequeue is a dequeue function to be registered to
+ * (*altq_dequeue) in struct ifaltq.
+ *
+ * note: ALTDQ_POLL returns the next packet without removing the packet
+ * from the queue. ALTDQ_REMOVE is a normal dequeue operation.
+ * ALTDQ_REMOVE must return the same packet if called immediately
+ * after ALTDQ_POLL.
+ */
+static struct mbuf *
+priq_dequeue(struct ifaltq *ifq, int op)
+{
+ struct priq_if *pif = (struct priq_if *)ifq->altq_disc;
+ struct priq_class *cl;
+ struct mbuf *m;
+ int pri;
+
+ IFQ_LOCK_ASSERT(ifq);
+
+ if (IFQ_IS_EMPTY(ifq))
+ /* no packet in the queue */
+ return (NULL);
+
+ for (pri = pif->pif_maxpri; pri >= 0; pri--) {
+ if ((cl = pif->pif_classes[pri]) != NULL &&
+ !qempty(cl->cl_q)) {
+ if (op == ALTDQ_POLL)
+ return (priq_pollq(cl));
+
+ m = priq_getq(cl);
+ if (m != NULL) {
+ IFQ_DEC_LEN(ifq);
+ if (qempty(cl->cl_q))
+ cl->cl_period++;
+ PKTCNTR_ADD(&cl->cl_xmitcnt, m_pktlen(m));
+ }
+ return (m);
+ }
+ }
+ return (NULL);
+}
+
+static int
+priq_addq(struct priq_class *cl, struct mbuf *m)
+{
+
+#ifdef ALTQ_RIO
+ if (q_is_rio(cl->cl_q))
+ return rio_addq((rio_t *)cl->cl_red, cl->cl_q, m,
+ cl->cl_pktattr);
+#endif
+#ifdef ALTQ_RED
+ if (q_is_red(cl->cl_q))
+ return red_addq(cl->cl_red, cl->cl_q, m, cl->cl_pktattr);
+#endif
+ if (qlen(cl->cl_q) >= qlimit(cl->cl_q)) {
+ m_freem(m);
+ return (-1);
+ }
+
+ if (cl->cl_flags & PRCF_CLEARDSCP)
+ write_dsfield(m, cl->cl_pktattr, 0);
+
+ _addq(cl->cl_q, m);
+
+ return (0);
+}
+
+static struct mbuf *
+priq_getq(struct priq_class *cl)
+{
+#ifdef ALTQ_RIO
+ if (q_is_rio(cl->cl_q))
+ return rio_getq((rio_t *)cl->cl_red, cl->cl_q);
+#endif
+#ifdef ALTQ_RED
+ if (q_is_red(cl->cl_q))
+ return red_getq(cl->cl_red, cl->cl_q);
+#endif
+ return _getq(cl->cl_q);
+}
+
+static struct mbuf *
+priq_pollq(cl)
+ struct priq_class *cl;
+{
+ return qhead(cl->cl_q);
+}
+
+static void
+priq_purgeq(struct priq_class *cl)
+{
+ struct mbuf *m;
+
+ if (qempty(cl->cl_q))
+ return;
+
+ while ((m = _getq(cl->cl_q)) != NULL) {
+ PKTCNTR_ADD(&cl->cl_dropcnt, m_pktlen(m));
+ m_freem(m);
+ }
+ ASSERT(qlen(cl->cl_q) == 0);
+}
+
+static void
+get_class_stats(struct priq_classstats *sp, struct priq_class *cl)
+{
+ sp->class_handle = cl->cl_handle;
+ sp->qlength = qlen(cl->cl_q);
+ sp->qlimit = qlimit(cl->cl_q);
+ sp->period = cl->cl_period;
+ sp->xmitcnt = cl->cl_xmitcnt;
+ sp->dropcnt = cl->cl_dropcnt;
+
+ sp->qtype = qtype(cl->cl_q);
+#ifdef ALTQ_RED
+ if (q_is_red(cl->cl_q))
+ red_getstats(cl->cl_red, &sp->red[0]);
+#endif
+#ifdef ALTQ_RIO
+ if (q_is_rio(cl->cl_q))
+ rio_getstats((rio_t *)cl->cl_red, &sp->red[0]);
+#endif
+
+}
+
+/* convert a class handle to the corresponding class pointer */
+static struct priq_class *
+clh_to_clp(struct priq_if *pif, u_int32_t chandle)
+{
+ struct priq_class *cl;
+ int idx;
+
+ if (chandle == 0)
+ return (NULL);
+
+ for (idx = pif->pif_maxpri; idx >= 0; idx--)
+ if ((cl = pif->pif_classes[idx]) != NULL &&
+ cl->cl_handle == chandle)
+ return (cl);
+
+ return (NULL);
+}
+
+
+#ifdef ALTQ3_COMPAT
+
+static struct priq_if *
+priq_attach(ifq, bandwidth)
+ struct ifaltq *ifq;
+ u_int bandwidth;
+{
+ struct priq_if *pif;
+
+ pif = malloc(sizeof(struct priq_if),
+ M_DEVBUF, M_WAITOK);
+ if (pif == NULL)
+ return (NULL);
+ bzero(pif, sizeof(struct priq_if));
+ pif->pif_bandwidth = bandwidth;
+ pif->pif_maxpri = -1;
+ pif->pif_ifq = ifq;
+
+ /* add this state to the priq list */
+ pif->pif_next = pif_list;
+ pif_list = pif;
+
+ return (pif);
+}
+
+static int
+priq_detach(pif)
+ struct priq_if *pif;
+{
+ (void)priq_clear_interface(pif);
+
+ /* remove this interface from the pif list */
+ if (pif_list == pif)
+ pif_list = pif->pif_next;
+ else {
+ struct priq_if *p;
+
+ for (p = pif_list; p != NULL; p = p->pif_next)
+ if (p->pif_next == pif) {
+ p->pif_next = pif->pif_next;
+ break;
+ }
+ ASSERT(p != NULL);
+ }
+
+ free(pif, M_DEVBUF);
+ return (0);
+}
+
+/*
+ * priq device interface
+ */
+int
+priqopen(dev, flag, fmt, p)
+ dev_t dev;
+ int flag, fmt;
+#if (__FreeBSD_version > 500000)
+ struct thread *p;
+#else
+ struct proc *p;
+#endif
+{
+ /* everything will be done when the queueing scheme is attached. */
+ return 0;
+}
+
+int
+priqclose(dev, flag, fmt, p)
+ dev_t dev;
+ int flag, fmt;
+#if (__FreeBSD_version > 500000)
+ struct thread *p;
+#else
+ struct proc *p;
+#endif
+{
+ struct priq_if *pif;
+ int err, error = 0;
+
+ while ((pif = pif_list) != NULL) {
+ /* destroy all */
+ if (ALTQ_IS_ENABLED(pif->pif_ifq))
+ altq_disable(pif->pif_ifq);
+
+ err = altq_detach(pif->pif_ifq);
+ if (err == 0)
+ err = priq_detach(pif);
+ if (err != 0 && error == 0)
+ error = err;
+ }
+
+ return error;
+}
+
+int
+priqioctl(dev, cmd, addr, flag, p)
+ dev_t dev;
+ ioctlcmd_t cmd;
+ caddr_t addr;
+ int flag;
+#if (__FreeBSD_version > 500000)
+ struct thread *p;
+#else
+ struct proc *p;
+#endif
+{
+ struct priq_if *pif;
+ struct priq_interface *ifacep;
+ int error = 0;
+
+ /* check super-user privilege */
+ switch (cmd) {
+ case PRIQ_GETSTATS:
+ break;
+ default:
+#if (__FreeBSD_version > 700000)
+ if ((error = priv_check(p, PRIV_ALTQ_MANAGE)) != 0)
+ return (error);
+#elsif (__FreeBSD_version > 400000)
+ if ((error = suser(p)) != 0)
+ return (error);
+#else
+ if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
+ return (error);
+#endif
+ break;
+ }
+
+ switch (cmd) {
+
+ case PRIQ_IF_ATTACH:
+ error = priqcmd_if_attach((struct priq_interface *)addr);
+ break;
+
+ case PRIQ_IF_DETACH:
+ error = priqcmd_if_detach((struct priq_interface *)addr);
+ break;
+
+ case PRIQ_ENABLE:
+ case PRIQ_DISABLE:
+ case PRIQ_CLEAR:
+ ifacep = (struct priq_interface *)addr;
+ if ((pif = altq_lookup(ifacep->ifname,
+ ALTQT_PRIQ)) == NULL) {
+ error = EBADF;
+ break;
+ }
+
+ switch (cmd) {
+ case PRIQ_ENABLE:
+ if (pif->pif_default == NULL) {
+#ifdef ALTQ_DEBUG
+ printf("priq: no default class\n");
+#endif
+ error = EINVAL;
+ break;
+ }
+ error = altq_enable(pif->pif_ifq);
+ break;
+
+ case PRIQ_DISABLE:
+ error = altq_disable(pif->pif_ifq);
+ break;
+
+ case PRIQ_CLEAR:
+ priq_clear_interface(pif);
+ break;
+ }
+ break;
+
+ case PRIQ_ADD_CLASS:
+ error = priqcmd_add_class((struct priq_add_class *)addr);
+ break;
+
+ case PRIQ_DEL_CLASS:
+ error = priqcmd_delete_class((struct priq_delete_class *)addr);
+ break;
+
+ case PRIQ_MOD_CLASS:
+ error = priqcmd_modify_class((struct priq_modify_class *)addr);
+ break;
+
+ case PRIQ_ADD_FILTER:
+ error = priqcmd_add_filter((struct priq_add_filter *)addr);
+ break;
+
+ case PRIQ_DEL_FILTER:
+ error = priqcmd_delete_filter((struct priq_delete_filter *)addr);
+ break;
+
+ case PRIQ_GETSTATS:
+ error = priqcmd_class_stats((struct priq_class_stats *)addr);
+ break;
+
+ default:
+ error = EINVAL;
+ break;
+ }
+ return error;
+}
+
+static int
+priqcmd_if_attach(ap)
+ struct priq_interface *ap;
+{
+ struct priq_if *pif;
+ struct ifnet *ifp;
+ int error;
+
+ if ((ifp = ifunit(ap->ifname)) == NULL)
+ return (ENXIO);
+
+ if ((pif = priq_attach(&ifp->if_snd, ap->arg)) == NULL)
+ return (ENOMEM);
+
+ /*
+ * set PRIQ to this ifnet structure.
+ */
+ if ((error = altq_attach(&ifp->if_snd, ALTQT_PRIQ, pif,
+ priq_enqueue, priq_dequeue, priq_request,
+ &pif->pif_classifier, acc_classify)) != 0)
+ (void)priq_detach(pif);
+
+ return (error);
+}
+
+static int
+priqcmd_if_detach(ap)
+ struct priq_interface *ap;
+{
+ struct priq_if *pif;
+ int error;
+
+ if ((pif = altq_lookup(ap->ifname, ALTQT_PRIQ)) == NULL)
+ return (EBADF);
+
+ if (ALTQ_IS_ENABLED(pif->pif_ifq))
+ altq_disable(pif->pif_ifq);
+
+ if ((error = altq_detach(pif->pif_ifq)))
+ return (error);
+
+ return priq_detach(pif);
+}
+
+static int
+priqcmd_add_class(ap)
+ struct priq_add_class *ap;
+{
+ struct priq_if *pif;
+ struct priq_class *cl;
+ int qid;
+
+ if ((pif = altq_lookup(ap->iface.ifname, ALTQT_PRIQ)) == NULL)
+ return (EBADF);
+
+ if (ap->pri < 0 || ap->pri >= PRIQ_MAXPRI)
+ return (EINVAL);
+ if (pif->pif_classes[ap->pri] != NULL)
+ return (EBUSY);
+
+ qid = ap->pri + 1;
+ if ((cl = priq_class_create(pif, ap->pri,
+ ap->qlimit, ap->flags, qid)) == NULL)
+ return (ENOMEM);
+
+ /* return a class handle to the user */
+ ap->class_handle = cl->cl_handle;
+
+ return (0);
+}
+
+static int
+priqcmd_delete_class(ap)
+ struct priq_delete_class *ap;
+{
+ struct priq_if *pif;
+ struct priq_class *cl;
+
+ if ((pif = altq_lookup(ap->iface.ifname, ALTQT_PRIQ)) == NULL)
+ return (EBADF);
+
+ if ((cl = clh_to_clp(pif, ap->class_handle)) == NULL)
+ return (EINVAL);
+
+ return priq_class_destroy(cl);
+}
+
+static int
+priqcmd_modify_class(ap)
+ struct priq_modify_class *ap;
+{
+ struct priq_if *pif;
+ struct priq_class *cl;
+
+ if ((pif = altq_lookup(ap->iface.ifname, ALTQT_PRIQ)) == NULL)
+ return (EBADF);
+
+ if (ap->pri < 0 || ap->pri >= PRIQ_MAXPRI)
+ return (EINVAL);
+
+ if ((cl = clh_to_clp(pif, ap->class_handle)) == NULL)
+ return (EINVAL);
+
+ /*
+ * if priority is changed, move the class to the new priority
+ */
+ if (pif->pif_classes[ap->pri] != cl) {
+ if (pif->pif_classes[ap->pri] != NULL)
+ return (EEXIST);
+ pif->pif_classes[cl->cl_pri] = NULL;
+ pif->pif_classes[ap->pri] = cl;
+ cl->cl_pri = ap->pri;
+ }
+
+ /* call priq_class_create to change class parameters */
+ if ((cl = priq_class_create(pif, ap->pri,
+ ap->qlimit, ap->flags, ap->class_handle)) == NULL)
+ return (ENOMEM);
+ return 0;
+}
+
+static int
+priqcmd_add_filter(ap)
+ struct priq_add_filter *ap;
+{
+ struct priq_if *pif;
+ struct priq_class *cl;
+
+ if ((pif = altq_lookup(ap->iface.ifname, ALTQT_PRIQ)) == NULL)
+ return (EBADF);
+
+ if ((cl = clh_to_clp(pif, ap->class_handle)) == NULL)
+ return (EINVAL);
+
+ return acc_add_filter(&pif->pif_classifier, &ap->filter,
+ cl, &ap->filter_handle);
+}
+
+static int
+priqcmd_delete_filter(ap)
+ struct priq_delete_filter *ap;
+{
+ struct priq_if *pif;
+
+ if ((pif = altq_lookup(ap->iface.ifname, ALTQT_PRIQ)) == NULL)
+ return (EBADF);
+
+ return acc_delete_filter(&pif->pif_classifier,
+ ap->filter_handle);
+}
+
+static int
+priqcmd_class_stats(ap)
+ struct priq_class_stats *ap;
+{
+ struct priq_if *pif;
+ struct priq_class *cl;
+ struct priq_classstats stats, *usp;
+ int pri, error;
+
+ if ((pif = altq_lookup(ap->iface.ifname, ALTQT_PRIQ)) == NULL)
+ return (EBADF);
+
+ ap->maxpri = pif->pif_maxpri;
+
+ /* then, read the next N classes in the tree */
+ usp = ap->stats;
+ for (pri = 0; pri <= pif->pif_maxpri; pri++) {
+ cl = pif->pif_classes[pri];
+ if (cl != NULL)
+ get_class_stats(&stats, cl);
+ else
+ bzero(&stats, sizeof(stats));
+ if ((error = copyout((caddr_t)&stats, (caddr_t)usp++,
+ sizeof(stats))) != 0)
+ return (error);
+ }
+ return (0);
+}
+
+#ifdef KLD_MODULE
+
+static struct altqsw priq_sw =
+ {"priq", priqopen, priqclose, priqioctl};
+
+ALTQ_MODULE(altq_priq, ALTQT_PRIQ, &priq_sw);
+MODULE_DEPEND(altq_priq, altq_red, 1, 1, 1);
+MODULE_DEPEND(altq_priq, altq_rio, 1, 1, 1);
+
+#endif /* KLD_MODULE */
+
+#endif /* ALTQ3_COMPAT */
+#endif /* ALTQ_PRIQ */
diff --git a/sys/net/altq/altq_priq.h b/sys/net/altq/altq_priq.h
new file mode 100644
index 0000000..d3cea33
--- /dev/null
+++ b/sys/net/altq/altq_priq.h
@@ -0,0 +1,172 @@
+/*-
+ * Copyright (C) 2000-2003
+ * Sony Computer Science Laboratories Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $KAME: altq_priq.h,v 1.7 2003/10/03 05:05:15 kjc Exp $
+ * $FreeBSD$
+ */
+
+#ifndef _ALTQ_ALTQ_PRIQ_H_
+#define _ALTQ_ALTQ_PRIQ_H_
+
+#include <net/altq/altq.h>
+#include <net/altq/altq_classq.h>
+#include <net/altq/altq_red.h>
+#include <net/altq/altq_rio.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define PRIQ_MAXPRI 16 /* upper limit of the number of priorities */
+
+#ifdef ALTQ3_COMPAT
+struct priq_interface {
+ char ifname[IFNAMSIZ]; /* interface name (e.g., fxp0) */
+ u_long arg; /* request-specific argument */
+};
+
+struct priq_add_class {
+ struct priq_interface iface;
+ int pri; /* priority (0 is the lowest) */
+ int qlimit; /* queue size limit */
+ int flags; /* misc flags (see below) */
+
+ u_int32_t class_handle; /* return value */
+};
+#endif /* ALTQ3_COMPAT */
+
+/* priq class flags */
+#define PRCF_RED 0x0001 /* use RED */
+#define PRCF_ECN 0x0002 /* use RED/ECN */
+#define PRCF_RIO 0x0004 /* use RIO */
+#define PRCF_CLEARDSCP 0x0010 /* clear diffserv codepoint */
+#define PRCF_DEFAULTCLASS 0x1000 /* default class */
+
+/* special class handles */
+#define PRIQ_NULLCLASS_HANDLE 0
+
+#ifdef ALTQ3_COMPAT
+struct priq_delete_class {
+ struct priq_interface iface;
+ u_int32_t class_handle;
+};
+
+struct priq_modify_class {
+ struct priq_interface iface;
+ u_int32_t class_handle;
+ int pri;
+ int qlimit;
+ int flags;
+};
+
+struct priq_add_filter {
+ struct priq_interface iface;
+ u_int32_t class_handle;
+ struct flow_filter filter;
+
+ u_long filter_handle; /* return value */
+};
+
+struct priq_delete_filter {
+ struct priq_interface iface;
+ u_long filter_handle;
+};
+#endif /* ALTQ3_COMPAT */
+
+struct priq_classstats {
+ u_int32_t class_handle;
+
+ u_int qlength;
+ u_int qlimit;
+ u_int period;
+ struct pktcntr xmitcnt; /* transmitted packet counter */
+ struct pktcntr dropcnt; /* dropped packet counter */
+
+ /* red and rio related info */
+ int qtype;
+ struct redstats red[3]; /* rio has 3 red stats */
+};
+
+#ifdef ALTQ3_COMPAT
+struct priq_class_stats {
+ struct priq_interface iface;
+ int maxpri; /* in/out */
+
+ struct priq_classstats *stats; /* pointer to stats array */
+};
+
+#define PRIQ_IF_ATTACH _IOW('Q', 1, struct priq_interface)
+#define PRIQ_IF_DETACH _IOW('Q', 2, struct priq_interface)
+#define PRIQ_ENABLE _IOW('Q', 3, struct priq_interface)
+#define PRIQ_DISABLE _IOW('Q', 4, struct priq_interface)
+#define PRIQ_CLEAR _IOW('Q', 5, struct priq_interface)
+#define PRIQ_ADD_CLASS _IOWR('Q', 7, struct priq_add_class)
+#define PRIQ_DEL_CLASS _IOW('Q', 8, struct priq_delete_class)
+#define PRIQ_MOD_CLASS _IOW('Q', 9, struct priq_modify_class)
+#define PRIQ_ADD_FILTER _IOWR('Q', 10, struct priq_add_filter)
+#define PRIQ_DEL_FILTER _IOW('Q', 11, struct priq_delete_filter)
+#define PRIQ_GETSTATS _IOWR('Q', 12, struct priq_class_stats)
+
+#endif /* ALTQ3_COMPAT */
+
+#ifdef _KERNEL
+
+struct priq_class {
+ u_int32_t cl_handle; /* class handle */
+ class_queue_t *cl_q; /* class queue structure */
+ struct red *cl_red; /* RED state */
+ int cl_pri; /* priority */
+ int cl_flags; /* class flags */
+ struct priq_if *cl_pif; /* back pointer to pif */
+ struct altq_pktattr *cl_pktattr; /* saved header used by ECN */
+
+ /* statistics */
+ u_int cl_period; /* backlog period */
+ struct pktcntr cl_xmitcnt; /* transmitted packet counter */
+ struct pktcntr cl_dropcnt; /* dropped packet counter */
+};
+
+/*
+ * priq interface state
+ */
+struct priq_if {
+ struct priq_if *pif_next; /* interface state list */
+ struct ifaltq *pif_ifq; /* backpointer to ifaltq */
+ u_int pif_bandwidth; /* link bandwidth in bps */
+ int pif_maxpri; /* max priority in use */
+ struct priq_class *pif_default; /* default class */
+ struct priq_class *pif_classes[PRIQ_MAXPRI]; /* classes */
+#ifdef ALTQ3_CLFIER_COMPAT
+ struct acc_classifier pif_classifier; /* classifier */
+#endif
+};
+
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ALTQ_ALTQ_PRIQ_H_ */
diff --git a/sys/net/altq/altq_red.c b/sys/net/altq/altq_red.c
new file mode 100644
index 0000000..7cd690f
--- /dev/null
+++ b/sys/net/altq/altq_red.c
@@ -0,0 +1,1500 @@
+/* $FreeBSD$ */
+/* $KAME: altq_red.c,v 1.18 2003/09/05 22:40:36 itojun Exp $ */
+
+/*
+ * Copyright (C) 1997-2003
+ * Sony Computer Science Laboratories Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+/*
+ * Copyright (c) 1990-1994 Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the Computer Systems
+ * Engineering Group at Lawrence Berkeley Laboratory.
+ * 4. Neither the name of the University nor of the Laboratory may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(__FreeBSD__) || defined(__NetBSD__)
+#include "opt_altq.h"
+#include "opt_inet.h"
+#ifdef __FreeBSD__
+#include "opt_inet6.h"
+#endif
+#endif /* __FreeBSD__ || __NetBSD__ */
+#ifdef ALTQ_RED /* red is enabled by ALTQ_RED option in opt_altq.h */
+
+#include <sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/systm.h>
+#include <sys/errno.h>
+#if 1 /* ALTQ3_COMPAT */
+#include <sys/sockio.h>
+#include <sys/proc.h>
+#include <sys/kernel.h>
+#ifdef ALTQ_FLOWVALVE
+#include <sys/queue.h>
+#include <sys/time.h>
+#endif
+#endif /* ALTQ3_COMPAT */
+
+#include <net/if.h>
+#include <net/if_var.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#ifdef INET6
+#include <netinet/ip6.h>
+#endif
+
+#include <netpfil/pf/pf.h>
+#include <netpfil/pf/pf_altq.h>
+#include <netpfil/pf/pf_mtag.h>
+#include <net/altq/altq.h>
+#include <net/altq/altq_red.h>
+#ifdef ALTQ3_COMPAT
+#include <net/altq/altq_conf.h>
+#ifdef ALTQ_FLOWVALVE
+#include <net/altq/altq_flowvalve.h>
+#endif
+#endif
+
+/*
+ * ALTQ/RED (Random Early Detection) implementation using 32-bit
+ * fixed-point calculation.
+ *
+ * written by kjc using the ns code as a reference.
+ * you can learn more about red and ns from Sally's home page at
+ * http://www-nrg.ee.lbl.gov/floyd/
+ *
+ * most of the red parameter values are fixed in this implementation
+ * to prevent fixed-point overflow/underflow.
+ * if you change the parameters, watch out for overflow/underflow!
+ *
+ * the parameters used are recommended values by Sally.
+ * the corresponding ns config looks:
+ * q_weight=0.00195
+ * minthresh=5 maxthresh=15 queue-size=60
+ * linterm=30
+ * dropmech=drop-tail
+ * bytes=false (can't be handled by 32-bit fixed-point)
+ * doubleq=false dqthresh=false
+ * wait=true
+ */
+/*
+ * alternative red parameters for a slow link.
+ *
+ * assume the queue length becomes from zero to L and keeps L, it takes
+ * N packets for q_avg to reach 63% of L.
+ * when q_weight is 0.002, N is about 500 packets.
+ * for a slow link like dial-up, 500 packets takes more than 1 minute!
+ * when q_weight is 0.008, N is about 127 packets.
+ * when q_weight is 0.016, N is about 63 packets.
+ * bursts of 50 packets are allowed for 0.002, bursts of 25 packets
+ * are allowed for 0.016.
+ * see Sally's paper for more details.
+ */
+/* normal red parameters */
+#define W_WEIGHT 512 /* inverse of weight of EWMA (511/512) */
+ /* q_weight = 0.00195 */
+
+/* red parameters for a slow link */
+#define W_WEIGHT_1 128 /* inverse of weight of EWMA (127/128) */
+ /* q_weight = 0.0078125 */
+
+/* red parameters for a very slow link (e.g., dialup) */
+#define W_WEIGHT_2 64 /* inverse of weight of EWMA (63/64) */
+ /* q_weight = 0.015625 */
+
+/* fixed-point uses 12-bit decimal places */
+#define FP_SHIFT 12 /* fixed-point shift */
+
+/* red parameters for drop probability */
+#define INV_P_MAX 10 /* inverse of max drop probability */
+#define TH_MIN 5 /* min threshold */
+#define TH_MAX 15 /* max threshold */
+
+#define RED_LIMIT 60 /* default max queue lenght */
+#define RED_STATS /* collect statistics */
+
+/*
+ * our default policy for forced-drop is drop-tail.
+ * (in altq-1.1.2 or earlier, the default was random-drop.
+ * but it makes more sense to punish the cause of the surge.)
+ * to switch to the random-drop policy, define "RED_RANDOM_DROP".
+ */
+
+#ifdef ALTQ3_COMPAT
+#ifdef ALTQ_FLOWVALVE
+/*
+ * flow-valve is an extention to protect red from unresponsive flows
+ * and to promote end-to-end congestion control.
+ * flow-valve observes the average drop rates of the flows that have
+ * experienced packet drops in the recent past.
+ * when the average drop rate exceeds the threshold, the flow is
+ * blocked by the flow-valve. the trapped flow should back off
+ * exponentially to escape from the flow-valve.
+ */
+#ifdef RED_RANDOM_DROP
+#error "random-drop can't be used with flow-valve!"
+#endif
+#endif /* ALTQ_FLOWVALVE */
+
+/* red_list keeps all red_queue_t's allocated. */
+static red_queue_t *red_list = NULL;
+
+#endif /* ALTQ3_COMPAT */
+
+/* default red parameter values */
+static int default_th_min = TH_MIN;
+static int default_th_max = TH_MAX;
+static int default_inv_pmax = INV_P_MAX;
+
+#ifdef ALTQ3_COMPAT
+/* internal function prototypes */
+static int red_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
+static struct mbuf *red_dequeue(struct ifaltq *, int);
+static int red_request(struct ifaltq *, int, void *);
+static void red_purgeq(red_queue_t *);
+static int red_detach(red_queue_t *);
+#ifdef ALTQ_FLOWVALVE
+static __inline struct fve *flowlist_lookup(struct flowvalve *,
+ struct altq_pktattr *, struct timeval *);
+static __inline struct fve *flowlist_reclaim(struct flowvalve *,
+ struct altq_pktattr *);
+static __inline void flowlist_move_to_head(struct flowvalve *, struct fve *);
+static __inline int fv_p2f(struct flowvalve *, int);
+#if 0 /* XXX: make the compiler happy (fv_alloc unused) */
+static struct flowvalve *fv_alloc(struct red *);
+#endif
+static void fv_destroy(struct flowvalve *);
+static int fv_checkflow(struct flowvalve *, struct altq_pktattr *,
+ struct fve **);
+static void fv_dropbyred(struct flowvalve *fv, struct altq_pktattr *,
+ struct fve *);
+#endif
+#endif /* ALTQ3_COMPAT */
+
+/*
+ * red support routines
+ */
+red_t *
+red_alloc(int weight, int inv_pmax, int th_min, int th_max, int flags,
+ int pkttime)
+{
+ red_t *rp;
+ int w, i;
+ int npkts_per_sec;
+
+ rp = malloc(sizeof(red_t), M_DEVBUF, M_NOWAIT | M_ZERO);
+ if (rp == NULL)
+ return (NULL);
+
+ if (weight == 0)
+ rp->red_weight = W_WEIGHT;
+ else
+ rp->red_weight = weight;
+
+ /* allocate weight table */
+ rp->red_wtab = wtab_alloc(rp->red_weight);
+ if (rp->red_wtab == NULL) {
+ free(rp, M_DEVBUF);
+ return (NULL);
+ }
+
+ rp->red_avg = 0;
+ rp->red_idle = 1;
+
+ if (inv_pmax == 0)
+ rp->red_inv_pmax = default_inv_pmax;
+ else
+ rp->red_inv_pmax = inv_pmax;
+ if (th_min == 0)
+ rp->red_thmin = default_th_min;
+ else
+ rp->red_thmin = th_min;
+ if (th_max == 0)
+ rp->red_thmax = default_th_max;
+ else
+ rp->red_thmax = th_max;
+
+ rp->red_flags = flags;
+
+ if (pkttime == 0)
+ /* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */
+ rp->red_pkttime = 800;
+ else
+ rp->red_pkttime = pkttime;
+
+ if (weight == 0) {
+ /* when the link is very slow, adjust red parameters */
+ npkts_per_sec = 1000000 / rp->red_pkttime;
+ if (npkts_per_sec < 50) {
+ /* up to about 400Kbps */
+ rp->red_weight = W_WEIGHT_2;
+ } else if (npkts_per_sec < 300) {
+ /* up to about 2.4Mbps */
+ rp->red_weight = W_WEIGHT_1;
+ }
+ }
+
+ /* calculate wshift. weight must be power of 2 */
+ w = rp->red_weight;
+ for (i = 0; w > 1; i++)
+ w = w >> 1;
+ rp->red_wshift = i;
+ w = 1 << rp->red_wshift;
+ if (w != rp->red_weight) {
+ printf("invalid weight value %d for red! use %d\n",
+ rp->red_weight, w);
+ rp->red_weight = w;
+ }
+
+ /*
+ * thmin_s and thmax_s are scaled versions of th_min and th_max
+ * to be compared with avg.
+ */
+ rp->red_thmin_s = rp->red_thmin << (rp->red_wshift + FP_SHIFT);
+ rp->red_thmax_s = rp->red_thmax << (rp->red_wshift + FP_SHIFT);
+
+ /*
+ * precompute probability denominator
+ * probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point
+ */
+ rp->red_probd = (2 * (rp->red_thmax - rp->red_thmin)
+ * rp->red_inv_pmax) << FP_SHIFT;
+
+ microtime(&rp->red_last);
+ return (rp);
+}
+
+void
+red_destroy(red_t *rp)
+{
+#ifdef ALTQ3_COMPAT
+#ifdef ALTQ_FLOWVALVE
+ if (rp->red_flowvalve != NULL)
+ fv_destroy(rp->red_flowvalve);
+#endif
+#endif /* ALTQ3_COMPAT */
+ wtab_destroy(rp->red_wtab);
+ free(rp, M_DEVBUF);
+}
+
+void
+red_getstats(red_t *rp, struct redstats *sp)
+{
+ sp->q_avg = rp->red_avg >> rp->red_wshift;
+ sp->xmit_cnt = rp->red_stats.xmit_cnt;
+ sp->drop_cnt = rp->red_stats.drop_cnt;
+ sp->drop_forced = rp->red_stats.drop_forced;
+ sp->drop_unforced = rp->red_stats.drop_unforced;
+ sp->marked_packets = rp->red_stats.marked_packets;
+}
+
+int
+red_addq(red_t *rp, class_queue_t *q, struct mbuf *m,
+ struct altq_pktattr *pktattr)
+{
+ int avg, droptype;
+ int n;
+#ifdef ALTQ3_COMPAT
+#ifdef ALTQ_FLOWVALVE
+ struct fve *fve = NULL;
+
+ if (rp->red_flowvalve != NULL && rp->red_flowvalve->fv_flows > 0)
+ if (fv_checkflow(rp->red_flowvalve, pktattr, &fve)) {
+ m_freem(m);
+ return (-1);
+ }
+#endif
+#endif /* ALTQ3_COMPAT */
+
+ avg = rp->red_avg;
+
+ /*
+ * if we were idle, we pretend that n packets arrived during
+ * the idle period.
+ */
+ if (rp->red_idle) {
+ struct timeval now;
+ int t;
+
+ rp->red_idle = 0;
+ microtime(&now);
+ t = (now.tv_sec - rp->red_last.tv_sec);
+ if (t > 60) {
+ /*
+ * being idle for more than 1 minute, set avg to zero.
+ * this prevents t from overflow.
+ */
+ avg = 0;
+ } else {
+ t = t * 1000000 + (now.tv_usec - rp->red_last.tv_usec);
+ n = t / rp->red_pkttime - 1;
+
+ /* the following line does (avg = (1 - Wq)^n * avg) */
+ if (n > 0)
+ avg = (avg >> FP_SHIFT) *
+ pow_w(rp->red_wtab, n);
+ }
+ }
+
+ /* run estimator. (note: avg is scaled by WEIGHT in fixed-point) */
+ avg += (qlen(q) << FP_SHIFT) - (avg >> rp->red_wshift);
+ rp->red_avg = avg; /* save the new value */
+
+ /*
+ * red_count keeps a tally of arriving traffic that has not
+ * been dropped.
+ */
+ rp->red_count++;
+
+ /* see if we drop early */
+ droptype = DTYPE_NODROP;
+ if (avg >= rp->red_thmin_s && qlen(q) > 1) {
+ if (avg >= rp->red_thmax_s) {
+ /* avg >= th_max: forced drop */
+ droptype = DTYPE_FORCED;
+ } else if (rp->red_old == 0) {
+ /* first exceeds th_min */
+ rp->red_count = 1;
+ rp->red_old = 1;
+ } else if (drop_early((avg - rp->red_thmin_s) >> rp->red_wshift,
+ rp->red_probd, rp->red_count)) {
+ /* mark or drop by red */
+ if ((rp->red_flags & REDF_ECN) &&
+ mark_ecn(m, pktattr, rp->red_flags)) {
+ /* successfully marked. do not drop. */
+ rp->red_count = 0;
+#ifdef RED_STATS
+ rp->red_stats.marked_packets++;
+#endif
+ } else {
+ /* unforced drop by red */
+ droptype = DTYPE_EARLY;
+ }
+ }
+ } else {
+ /* avg < th_min */
+ rp->red_old = 0;
+ }
+
+ /*
+ * if the queue length hits the hard limit, it's a forced drop.
+ */
+ if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q))
+ droptype = DTYPE_FORCED;
+
+#ifdef RED_RANDOM_DROP
+ /* if successful or forced drop, enqueue this packet. */
+ if (droptype != DTYPE_EARLY)
+ _addq(q, m);
+#else
+ /* if successful, enqueue this packet. */
+ if (droptype == DTYPE_NODROP)
+ _addq(q, m);
+#endif
+ if (droptype != DTYPE_NODROP) {
+ if (droptype == DTYPE_EARLY) {
+ /* drop the incoming packet */
+#ifdef RED_STATS
+ rp->red_stats.drop_unforced++;
+#endif
+ } else {
+ /* forced drop, select a victim packet in the queue. */
+#ifdef RED_RANDOM_DROP
+ m = _getq_random(q);
+#endif
+#ifdef RED_STATS
+ rp->red_stats.drop_forced++;
+#endif
+ }
+#ifdef RED_STATS
+ PKTCNTR_ADD(&rp->red_stats.drop_cnt, m_pktlen(m));
+#endif
+ rp->red_count = 0;
+#ifdef ALTQ3_COMPAT
+#ifdef ALTQ_FLOWVALVE
+ if (rp->red_flowvalve != NULL)
+ fv_dropbyred(rp->red_flowvalve, pktattr, fve);
+#endif
+#endif /* ALTQ3_COMPAT */
+ m_freem(m);
+ return (-1);
+ }
+ /* successfully queued */
+#ifdef RED_STATS
+ PKTCNTR_ADD(&rp->red_stats.xmit_cnt, m_pktlen(m));
+#endif
+ return (0);
+}
+
+/*
+ * early-drop probability is calculated as follows:
+ * prob = p_max * (avg - th_min) / (th_max - th_min)
+ * prob_a = prob / (2 - count*prob)
+ * = (avg-th_min) / (2*(th_max-th_min)*inv_p_max - count*(avg-th_min))
+ * here prob_a increases as successive undrop count increases.
+ * (prob_a starts from prob/2, becomes prob when (count == (1 / prob)),
+ * becomes 1 when (count >= (2 / prob))).
+ */
+int
+drop_early(int fp_len, int fp_probd, int count)
+{
+ int d; /* denominator of drop-probability */
+
+ d = fp_probd - count * fp_len;
+ if (d <= 0)
+ /* count exceeds the hard limit: drop or mark */
+ return (1);
+
+ /*
+ * now the range of d is [1..600] in fixed-point. (when
+ * th_max-th_min=10 and p_max=1/30)
+ * drop probability = (avg - TH_MIN) / d
+ */
+
+ if ((arc4random() % d) < fp_len) {
+ /* drop or mark */
+ return (1);
+ }
+ /* no drop/mark */
+ return (0);
+}
+
+/*
+ * try to mark CE bit to the packet.
+ * returns 1 if successfully marked, 0 otherwise.
+ */
+int
+mark_ecn(struct mbuf *m, struct altq_pktattr *pktattr, int flags)
+{
+ struct mbuf *m0;
+ struct pf_mtag *at;
+ void *hdr;
+
+ at = pf_find_mtag(m);
+ if (at != NULL) {
+ hdr = at->hdr;
+#ifdef ALTQ3_COMPAT
+ } else if (pktattr != NULL) {
+ af = pktattr->pattr_af;
+ hdr = pktattr->pattr_hdr;
+#endif /* ALTQ3_COMPAT */
+ } else
+ return (0);
+
+ /* verify that pattr_hdr is within the mbuf data */
+ for (m0 = m; m0 != NULL; m0 = m0->m_next)
+ if (((caddr_t)hdr >= m0->m_data) &&
+ ((caddr_t)hdr < m0->m_data + m0->m_len))
+ break;
+ if (m0 == NULL) {
+ /* ick, tag info is stale */
+ return (0);
+ }
+
+ switch (((struct ip *)hdr)->ip_v) {
+ case IPVERSION:
+ if (flags & REDF_ECN4) {
+ struct ip *ip = hdr;
+ u_int8_t otos;
+ int sum;
+
+ if (ip->ip_v != 4)
+ return (0); /* version mismatch! */
+
+ if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_NOTECT)
+ return (0); /* not-ECT */
+ if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_CE)
+ return (1); /* already marked */
+
+ /*
+ * ecn-capable but not marked,
+ * mark CE and update checksum
+ */
+ otos = ip->ip_tos;
+ ip->ip_tos |= IPTOS_ECN_CE;
+ /*
+ * update checksum (from RFC1624)
+ * HC' = ~(~HC + ~m + m')
+ */
+ sum = ~ntohs(ip->ip_sum) & 0xffff;
+ sum += (~otos & 0xffff) + ip->ip_tos;
+ sum = (sum >> 16) + (sum & 0xffff);
+ sum += (sum >> 16); /* add carry */
+ ip->ip_sum = htons(~sum & 0xffff);
+ return (1);
+ }
+ break;
+#ifdef INET6
+ case (IPV6_VERSION >> 4):
+ if (flags & REDF_ECN6) {
+ struct ip6_hdr *ip6 = hdr;
+ u_int32_t flowlabel;
+
+ flowlabel = ntohl(ip6->ip6_flow);
+ if ((flowlabel >> 28) != 6)
+ return (0); /* version mismatch! */
+ if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
+ (IPTOS_ECN_NOTECT << 20))
+ return (0); /* not-ECT */
+ if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
+ (IPTOS_ECN_CE << 20))
+ return (1); /* already marked */
+ /*
+ * ecn-capable but not marked, mark CE
+ */
+ flowlabel |= (IPTOS_ECN_CE << 20);
+ ip6->ip6_flow = htonl(flowlabel);
+ return (1);
+ }
+ break;
+#endif /* INET6 */
+ }
+
+ /* not marked */
+ return (0);
+}
+
+struct mbuf *
+red_getq(rp, q)
+ red_t *rp;
+ class_queue_t *q;
+{
+ struct mbuf *m;
+
+ if ((m = _getq(q)) == NULL) {
+ if (rp->red_idle == 0) {
+ rp->red_idle = 1;
+ microtime(&rp->red_last);
+ }
+ return NULL;
+ }
+
+ rp->red_idle = 0;
+ return (m);
+}
+
+/*
+ * helper routine to calibrate avg during idle.
+ * pow_w(wtab, n) returns (1 - Wq)^n in fixed-point
+ * here Wq = 1/weight and the code assumes Wq is close to zero.
+ *
+ * w_tab[n] holds ((1 - Wq)^(2^n)) in fixed-point.
+ */
+static struct wtab *wtab_list = NULL; /* pointer to wtab list */
+
+struct wtab *
+wtab_alloc(int weight)
+{
+ struct wtab *w;
+ int i;
+
+ for (w = wtab_list; w != NULL; w = w->w_next)
+ if (w->w_weight == weight) {
+ w->w_refcount++;
+ return (w);
+ }
+
+ w = malloc(sizeof(struct wtab), M_DEVBUF, M_NOWAIT | M_ZERO);
+ if (w == NULL)
+ return (NULL);
+ w->w_weight = weight;
+ w->w_refcount = 1;
+ w->w_next = wtab_list;
+ wtab_list = w;
+
+ /* initialize the weight table */
+ w->w_tab[0] = ((weight - 1) << FP_SHIFT) / weight;
+ for (i = 1; i < 32; i++) {
+ w->w_tab[i] = (w->w_tab[i-1] * w->w_tab[i-1]) >> FP_SHIFT;
+ if (w->w_tab[i] == 0 && w->w_param_max == 0)
+ w->w_param_max = 1 << i;
+ }
+
+ return (w);
+}
+
+int
+wtab_destroy(struct wtab *w)
+{
+ struct wtab *prev;
+
+ if (--w->w_refcount > 0)
+ return (0);
+
+ if (wtab_list == w)
+ wtab_list = w->w_next;
+ else for (prev = wtab_list; prev->w_next != NULL; prev = prev->w_next)
+ if (prev->w_next == w) {
+ prev->w_next = w->w_next;
+ break;
+ }
+
+ free(w, M_DEVBUF);
+ return (0);
+}
+
+int32_t
+pow_w(struct wtab *w, int n)
+{
+ int i, bit;
+ int32_t val;
+
+ if (n >= w->w_param_max)
+ return (0);
+
+ val = 1 << FP_SHIFT;
+ if (n <= 0)
+ return (val);
+
+ bit = 1;
+ i = 0;
+ while (n) {
+ if (n & bit) {
+ val = (val * w->w_tab[i]) >> FP_SHIFT;
+ n &= ~bit;
+ }
+ i++;
+ bit <<= 1;
+ }
+ return (val);
+}
+
+#ifdef ALTQ3_COMPAT
+/*
+ * red device interface
+ */
+altqdev_decl(red);
+
+int
+redopen(dev, flag, fmt, p)
+ dev_t dev;
+ int flag, fmt;
+#if (__FreeBSD_version > 500000)
+ struct thread *p;
+#else
+ struct proc *p;
+#endif
+{
+ /* everything will be done when the queueing scheme is attached. */
+ return 0;
+}
+
+int
+redclose(dev, flag, fmt, p)
+ dev_t dev;
+ int flag, fmt;
+#if (__FreeBSD_version > 500000)
+ struct thread *p;
+#else
+ struct proc *p;
+#endif
+{
+ red_queue_t *rqp;
+ int err, error = 0;
+
+ while ((rqp = red_list) != NULL) {
+ /* destroy all */
+ err = red_detach(rqp);
+ if (err != 0 && error == 0)
+ error = err;
+ }
+
+ return error;
+}
+
+int
+redioctl(dev, cmd, addr, flag, p)
+ dev_t dev;
+ ioctlcmd_t cmd;
+ caddr_t addr;
+ int flag;
+#if (__FreeBSD_version > 500000)
+ struct thread *p;
+#else
+ struct proc *p;
+#endif
+{
+ red_queue_t *rqp;
+ struct red_interface *ifacep;
+ struct ifnet *ifp;
+ int error = 0;
+
+ /* check super-user privilege */
+ switch (cmd) {
+ case RED_GETSTATS:
+ break;
+ default:
+#if (__FreeBSD_version > 700000)
+ if ((error = priv_check(p, PRIV_ALTQ_MANAGE)) != 0)
+#elsif (__FreeBSD_version > 400000)
+ if ((error = suser(p)) != 0)
+#else
+ if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
+#endif
+ return (error);
+ break;
+ }
+
+ switch (cmd) {
+
+ case RED_ENABLE:
+ ifacep = (struct red_interface *)addr;
+ if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
+ error = EBADF;
+ break;
+ }
+ error = altq_enable(rqp->rq_ifq);
+ break;
+
+ case RED_DISABLE:
+ ifacep = (struct red_interface *)addr;
+ if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
+ error = EBADF;
+ break;
+ }
+ error = altq_disable(rqp->rq_ifq);
+ break;
+
+ case RED_IF_ATTACH:
+ ifp = ifunit(((struct red_interface *)addr)->red_ifname);
+ if (ifp == NULL) {
+ error = ENXIO;
+ break;
+ }
+
+ /* allocate and initialize red_queue_t */
+ rqp = malloc(sizeof(red_queue_t), M_DEVBUF, M_WAITOK);
+ if (rqp == NULL) {
+ error = ENOMEM;
+ break;
+ }
+ bzero(rqp, sizeof(red_queue_t));
+
+ rqp->rq_q = malloc(sizeof(class_queue_t),
+ M_DEVBUF, M_WAITOK);
+ if (rqp->rq_q == NULL) {
+ free(rqp, M_DEVBUF);
+ error = ENOMEM;
+ break;
+ }
+ bzero(rqp->rq_q, sizeof(class_queue_t));
+
+ rqp->rq_red = red_alloc(0, 0, 0, 0, 0, 0);
+ if (rqp->rq_red == NULL) {
+ free(rqp->rq_q, M_DEVBUF);
+ free(rqp, M_DEVBUF);
+ error = ENOMEM;
+ break;
+ }
+
+ rqp->rq_ifq = &ifp->if_snd;
+ qtail(rqp->rq_q) = NULL;
+ qlen(rqp->rq_q) = 0;
+ qlimit(rqp->rq_q) = RED_LIMIT;
+ qtype(rqp->rq_q) = Q_RED;
+
+ /*
+ * set RED to this ifnet structure.
+ */
+ error = altq_attach(rqp->rq_ifq, ALTQT_RED, rqp,
+ red_enqueue, red_dequeue, red_request,
+ NULL, NULL);
+ if (error) {
+ red_destroy(rqp->rq_red);
+ free(rqp->rq_q, M_DEVBUF);
+ free(rqp, M_DEVBUF);
+ break;
+ }
+
+ /* add this state to the red list */
+ rqp->rq_next = red_list;
+ red_list = rqp;
+ break;
+
+ case RED_IF_DETACH:
+ ifacep = (struct red_interface *)addr;
+ if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
+ error = EBADF;
+ break;
+ }
+ error = red_detach(rqp);
+ break;
+
+ case RED_GETSTATS:
+ do {
+ struct red_stats *q_stats;
+ red_t *rp;
+
+ q_stats = (struct red_stats *)addr;
+ if ((rqp = altq_lookup(q_stats->iface.red_ifname,
+ ALTQT_RED)) == NULL) {
+ error = EBADF;
+ break;
+ }
+
+ q_stats->q_len = qlen(rqp->rq_q);
+ q_stats->q_limit = qlimit(rqp->rq_q);
+
+ rp = rqp->rq_red;
+ q_stats->q_avg = rp->red_avg >> rp->red_wshift;
+ q_stats->xmit_cnt = rp->red_stats.xmit_cnt;
+ q_stats->drop_cnt = rp->red_stats.drop_cnt;
+ q_stats->drop_forced = rp->red_stats.drop_forced;
+ q_stats->drop_unforced = rp->red_stats.drop_unforced;
+ q_stats->marked_packets = rp->red_stats.marked_packets;
+
+ q_stats->weight = rp->red_weight;
+ q_stats->inv_pmax = rp->red_inv_pmax;
+ q_stats->th_min = rp->red_thmin;
+ q_stats->th_max = rp->red_thmax;
+
+#ifdef ALTQ_FLOWVALVE
+ if (rp->red_flowvalve != NULL) {
+ struct flowvalve *fv = rp->red_flowvalve;
+ q_stats->fv_flows = fv->fv_flows;
+ q_stats->fv_pass = fv->fv_stats.pass;
+ q_stats->fv_predrop = fv->fv_stats.predrop;
+ q_stats->fv_alloc = fv->fv_stats.alloc;
+ q_stats->fv_escape = fv->fv_stats.escape;
+ } else {
+#endif /* ALTQ_FLOWVALVE */
+ q_stats->fv_flows = 0;
+ q_stats->fv_pass = 0;
+ q_stats->fv_predrop = 0;
+ q_stats->fv_alloc = 0;
+ q_stats->fv_escape = 0;
+#ifdef ALTQ_FLOWVALVE
+ }
+#endif /* ALTQ_FLOWVALVE */
+ } while (/*CONSTCOND*/ 0);
+ break;
+
+ case RED_CONFIG:
+ do {
+ struct red_conf *fc;
+ red_t *new;
+ int s, limit;
+
+ fc = (struct red_conf *)addr;
+ if ((rqp = altq_lookup(fc->iface.red_ifname,
+ ALTQT_RED)) == NULL) {
+ error = EBADF;
+ break;
+ }
+ new = red_alloc(fc->red_weight,
+ fc->red_inv_pmax,
+ fc->red_thmin,
+ fc->red_thmax,
+ fc->red_flags,
+ fc->red_pkttime);
+ if (new == NULL) {
+ error = ENOMEM;
+ break;
+ }
+
+#ifdef __NetBSD__
+ s = splnet();
+#else
+ s = splimp();
+#endif
+ red_purgeq(rqp);
+ limit = fc->red_limit;
+ if (limit < fc->red_thmax)
+ limit = fc->red_thmax;
+ qlimit(rqp->rq_q) = limit;
+ fc->red_limit = limit; /* write back the new value */
+
+ red_destroy(rqp->rq_red);
+ rqp->rq_red = new;
+
+ splx(s);
+
+ /* write back new values */
+ fc->red_limit = limit;
+ fc->red_inv_pmax = rqp->rq_red->red_inv_pmax;
+ fc->red_thmin = rqp->rq_red->red_thmin;
+ fc->red_thmax = rqp->rq_red->red_thmax;
+
+ } while (/*CONSTCOND*/ 0);
+ break;
+
+ case RED_SETDEFAULTS:
+ do {
+ struct redparams *rp;
+
+ rp = (struct redparams *)addr;
+
+ default_th_min = rp->th_min;
+ default_th_max = rp->th_max;
+ default_inv_pmax = rp->inv_pmax;
+ } while (/*CONSTCOND*/ 0);
+ break;
+
+ default:
+ error = EINVAL;
+ break;
+ }
+ return error;
+}
+
+static int
+red_detach(rqp)
+ red_queue_t *rqp;
+{
+ red_queue_t *tmp;
+ int error = 0;
+
+ if (ALTQ_IS_ENABLED(rqp->rq_ifq))
+ altq_disable(rqp->rq_ifq);
+
+ if ((error = altq_detach(rqp->rq_ifq)))
+ return (error);
+
+ if (red_list == rqp)
+ red_list = rqp->rq_next;
+ else {
+ for (tmp = red_list; tmp != NULL; tmp = tmp->rq_next)
+ if (tmp->rq_next == rqp) {
+ tmp->rq_next = rqp->rq_next;
+ break;
+ }
+ if (tmp == NULL)
+ printf("red_detach: no state found in red_list!\n");
+ }
+
+ red_destroy(rqp->rq_red);
+ free(rqp->rq_q, M_DEVBUF);
+ free(rqp, M_DEVBUF);
+ return (error);
+}
+
+/*
+ * enqueue routine:
+ *
+ * returns: 0 when successfully queued.
+ * ENOBUFS when drop occurs.
+ */
+static int
+red_enqueue(ifq, m, pktattr)
+ struct ifaltq *ifq;
+ struct mbuf *m;
+ struct altq_pktattr *pktattr;
+{
+ red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
+
+ IFQ_LOCK_ASSERT(ifq);
+
+ if (red_addq(rqp->rq_red, rqp->rq_q, m, pktattr) < 0)
+ return ENOBUFS;
+ ifq->ifq_len++;
+ return 0;
+}
+
+/*
+ * dequeue routine:
+ * must be called in splimp.
+ *
+ * returns: mbuf dequeued.
+ * NULL when no packet is available in the queue.
+ */
+
+static struct mbuf *
+red_dequeue(ifq, op)
+ struct ifaltq *ifq;
+ int op;
+{
+ red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
+ struct mbuf *m;
+
+ IFQ_LOCK_ASSERT(ifq);
+
+ if (op == ALTDQ_POLL)
+ return qhead(rqp->rq_q);
+
+ /* op == ALTDQ_REMOVE */
+ m = red_getq(rqp->rq_red, rqp->rq_q);
+ if (m != NULL)
+ ifq->ifq_len--;
+ return (m);
+}
+
+static int
+red_request(ifq, req, arg)
+ struct ifaltq *ifq;
+ int req;
+ void *arg;
+{
+ red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
+
+ IFQ_LOCK_ASSERT(ifq);
+
+ switch (req) {
+ case ALTRQ_PURGE:
+ red_purgeq(rqp);
+ break;
+ }
+ return (0);
+}
+
+static void
+red_purgeq(rqp)
+ red_queue_t *rqp;
+{
+ _flushq(rqp->rq_q);
+ if (ALTQ_IS_ENABLED(rqp->rq_ifq))
+ rqp->rq_ifq->ifq_len = 0;
+}
+
+#ifdef ALTQ_FLOWVALVE
+
+#define FV_PSHIFT 7 /* weight of average drop rate -- 1/128 */
+#define FV_PSCALE(x) ((x) << FV_PSHIFT)
+#define FV_PUNSCALE(x) ((x) >> FV_PSHIFT)
+#define FV_FSHIFT 5 /* weight of average fraction -- 1/32 */
+#define FV_FSCALE(x) ((x) << FV_FSHIFT)
+#define FV_FUNSCALE(x) ((x) >> FV_FSHIFT)
+
+#define FV_TIMER (3 * hz) /* timer value for garbage collector */
+#define FV_FLOWLISTSIZE 64 /* how many flows in flowlist */
+
+#define FV_N 10 /* update fve_f every FV_N packets */
+
+#define FV_BACKOFFTHRESH 1 /* backoff threshold interval in second */
+#define FV_TTHRESH 3 /* time threshold to delete fve */
+#define FV_ALPHA 5 /* extra packet count */
+
+#define FV_STATS
+
+#if (__FreeBSD_version > 300000)
+#define FV_TIMESTAMP(tp) getmicrotime(tp)
+#else
+#define FV_TIMESTAMP(tp) { (*(tp)) = time; }
+#endif
+
+/*
+ * Brtt table: 127 entry table to convert drop rate (p) to
+ * the corresponding bandwidth fraction (f)
+ * the following equation is implemented to use scaled values,
+ * fve_p and fve_f, in the fixed point format.
+ *
+ * Brtt(p) = 1 /(sqrt(4*p/3) + min(1,3*sqrt(p*6/8)) * p * (1+32 * p*p))
+ * f = Brtt(p) / (max_th + alpha)
+ */
+#define BRTT_SIZE 128
+#define BRTT_SHIFT 12
+#define BRTT_MASK 0x0007f000
+#define BRTT_PMAX (1 << (FV_PSHIFT + FP_SHIFT))
+
+const int brtt_tab[BRTT_SIZE] = {
+ 0, 1262010, 877019, 703694, 598706, 525854, 471107, 427728,
+ 392026, 361788, 335598, 312506, 291850, 273158, 256081, 240361,
+ 225800, 212247, 199585, 187788, 178388, 169544, 161207, 153333,
+ 145888, 138841, 132165, 125836, 119834, 114141, 108739, 103612,
+ 98747, 94129, 89746, 85585, 81637, 77889, 74333, 70957,
+ 67752, 64711, 61824, 59084, 56482, 54013, 51667, 49440,
+ 47325, 45315, 43406, 41591, 39866, 38227, 36667, 35184,
+ 33773, 32430, 31151, 29933, 28774, 27668, 26615, 25611,
+ 24653, 23740, 22868, 22035, 21240, 20481, 19755, 19062,
+ 18399, 17764, 17157, 16576, 16020, 15487, 14976, 14487,
+ 14017, 13567, 13136, 12721, 12323, 11941, 11574, 11222,
+ 10883, 10557, 10243, 9942, 9652, 9372, 9103, 8844,
+ 8594, 8354, 8122, 7898, 7682, 7474, 7273, 7079,
+ 6892, 6711, 6536, 6367, 6204, 6046, 5893, 5746,
+ 5603, 5464, 5330, 5201, 5075, 4954, 4836, 4722,
+ 4611, 4504, 4400, 4299, 4201, 4106, 4014, 3924
+};
+
+static __inline struct fve *
+flowlist_lookup(fv, pktattr, now)
+ struct flowvalve *fv;
+ struct altq_pktattr *pktattr;
+ struct timeval *now;
+{
+ struct fve *fve;
+ int flows;
+ struct ip *ip;
+#ifdef INET6
+ struct ip6_hdr *ip6;
+#endif
+ struct timeval tthresh;
+
+ if (pktattr == NULL)
+ return (NULL);
+
+ tthresh.tv_sec = now->tv_sec - FV_TTHRESH;
+ flows = 0;
+ /*
+ * search the flow list
+ */
+ switch (pktattr->pattr_af) {
+ case AF_INET:
+ ip = (struct ip *)pktattr->pattr_hdr;
+ TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){
+ if (fve->fve_lastdrop.tv_sec == 0)
+ break;
+ if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) {
+ fve->fve_lastdrop.tv_sec = 0;
+ break;
+ }
+ if (fve->fve_flow.flow_af == AF_INET &&
+ fve->fve_flow.flow_ip.ip_src.s_addr ==
+ ip->ip_src.s_addr &&
+ fve->fve_flow.flow_ip.ip_dst.s_addr ==
+ ip->ip_dst.s_addr)
+ return (fve);
+ flows++;
+ }
+ break;
+#ifdef INET6
+ case AF_INET6:
+ ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
+ TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){
+ if (fve->fve_lastdrop.tv_sec == 0)
+ break;
+ if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) {
+ fve->fve_lastdrop.tv_sec = 0;
+ break;
+ }
+ if (fve->fve_flow.flow_af == AF_INET6 &&
+ IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_src,
+ &ip6->ip6_src) &&
+ IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_dst,
+ &ip6->ip6_dst))
+ return (fve);
+ flows++;
+ }
+ break;
+#endif /* INET6 */
+
+ default:
+ /* unknown protocol. no drop. */
+ return (NULL);
+ }
+ fv->fv_flows = flows; /* save the number of active fve's */
+ return (NULL);
+}
+
+static __inline struct fve *
+flowlist_reclaim(fv, pktattr)
+ struct flowvalve *fv;
+ struct altq_pktattr *pktattr;
+{
+ struct fve *fve;
+ struct ip *ip;
+#ifdef INET6
+ struct ip6_hdr *ip6;
+#endif
+
+ /*
+ * get an entry from the tail of the LRU list.
+ */
+ fve = TAILQ_LAST(&fv->fv_flowlist, fv_flowhead);
+
+ switch (pktattr->pattr_af) {
+ case AF_INET:
+ ip = (struct ip *)pktattr->pattr_hdr;
+ fve->fve_flow.flow_af = AF_INET;
+ fve->fve_flow.flow_ip.ip_src = ip->ip_src;
+ fve->fve_flow.flow_ip.ip_dst = ip->ip_dst;
+ break;
+#ifdef INET6
+ case AF_INET6:
+ ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
+ fve->fve_flow.flow_af = AF_INET6;
+ fve->fve_flow.flow_ip6.ip6_src = ip6->ip6_src;
+ fve->fve_flow.flow_ip6.ip6_dst = ip6->ip6_dst;
+ break;
+#endif
+ }
+
+ fve->fve_state = Green;
+ fve->fve_p = 0.0;
+ fve->fve_f = 0.0;
+ fve->fve_ifseq = fv->fv_ifseq - 1;
+ fve->fve_count = 0;
+
+ fv->fv_flows++;
+#ifdef FV_STATS
+ fv->fv_stats.alloc++;
+#endif
+ return (fve);
+}
+
+static __inline void
+flowlist_move_to_head(fv, fve)
+ struct flowvalve *fv;
+ struct fve *fve;
+{
+ if (TAILQ_FIRST(&fv->fv_flowlist) != fve) {
+ TAILQ_REMOVE(&fv->fv_flowlist, fve, fve_lru);
+ TAILQ_INSERT_HEAD(&fv->fv_flowlist, fve, fve_lru);
+ }
+}
+
+#if 0 /* XXX: make the compiler happy (fv_alloc unused) */
+/*
+ * allocate flowvalve structure
+ */
+static struct flowvalve *
+fv_alloc(rp)
+ struct red *rp;
+{
+ struct flowvalve *fv;
+ struct fve *fve;
+ int i, num;
+
+ num = FV_FLOWLISTSIZE;
+ fv = malloc(sizeof(struct flowvalve),
+ M_DEVBUF, M_WAITOK);
+ if (fv == NULL)
+ return (NULL);
+ bzero(fv, sizeof(struct flowvalve));
+
+ fv->fv_fves = malloc(sizeof(struct fve) * num,
+ M_DEVBUF, M_WAITOK);
+ if (fv->fv_fves == NULL) {
+ free(fv, M_DEVBUF);
+ return (NULL);
+ }
+ bzero(fv->fv_fves, sizeof(struct fve) * num);
+
+ fv->fv_flows = 0;
+ TAILQ_INIT(&fv->fv_flowlist);
+ for (i = 0; i < num; i++) {
+ fve = &fv->fv_fves[i];
+ fve->fve_lastdrop.tv_sec = 0;
+ TAILQ_INSERT_TAIL(&fv->fv_flowlist, fve, fve_lru);
+ }
+
+ /* initialize drop rate threshold in scaled fixed-point */
+ fv->fv_pthresh = (FV_PSCALE(1) << FP_SHIFT) / rp->red_inv_pmax;
+
+ /* initialize drop rate to fraction table */
+ fv->fv_p2ftab = malloc(sizeof(int) * BRTT_SIZE,
+ M_DEVBUF, M_WAITOK);
+ if (fv->fv_p2ftab == NULL) {
+ free(fv->fv_fves, M_DEVBUF);
+ free(fv, M_DEVBUF);
+ return (NULL);
+ }
+ /*
+ * create the p2f table.
+ * (shift is used to keep the precision)
+ */
+ for (i = 1; i < BRTT_SIZE; i++) {
+ int f;
+
+ f = brtt_tab[i] << 8;
+ fv->fv_p2ftab[i] = (f / (rp->red_thmax + FV_ALPHA)) >> 8;
+ }
+
+ return (fv);
+}
+#endif
+
+static void fv_destroy(fv)
+ struct flowvalve *fv;
+{
+ free(fv->fv_p2ftab, M_DEVBUF);
+ free(fv->fv_fves, M_DEVBUF);
+ free(fv, M_DEVBUF);
+}
+
+static __inline int
+fv_p2f(fv, p)
+ struct flowvalve *fv;
+ int p;
+{
+ int val, f;
+
+ if (p >= BRTT_PMAX)
+ f = fv->fv_p2ftab[BRTT_SIZE-1];
+ else if ((val = (p & BRTT_MASK)))
+ f = fv->fv_p2ftab[(val >> BRTT_SHIFT)];
+ else
+ f = fv->fv_p2ftab[1];
+ return (f);
+}
+
+/*
+ * check if an arriving packet should be pre-dropped.
+ * called from red_addq() when a packet arrives.
+ * returns 1 when the packet should be pre-dropped.
+ * should be called in splimp.
+ */
+static int
+fv_checkflow(fv, pktattr, fcache)
+ struct flowvalve *fv;
+ struct altq_pktattr *pktattr;
+ struct fve **fcache;
+{
+ struct fve *fve;
+ struct timeval now;
+
+ fv->fv_ifseq++;
+ FV_TIMESTAMP(&now);
+
+ if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL)
+ /* no matching entry in the flowlist */
+ return (0);
+
+ *fcache = fve;
+
+ /* update fraction f for every FV_N packets */
+ if (++fve->fve_count == FV_N) {
+ /*
+ * f = Wf * N / (fv_ifseq - fve_ifseq) + (1 - Wf) * f
+ */
+ fve->fve_f =
+ (FV_N << FP_SHIFT) / (fv->fv_ifseq - fve->fve_ifseq)
+ + fve->fve_f - FV_FUNSCALE(fve->fve_f);
+ fve->fve_ifseq = fv->fv_ifseq;
+ fve->fve_count = 0;
+ }
+
+ /*
+ * overpumping test
+ */
+ if (fve->fve_state == Green && fve->fve_p > fv->fv_pthresh) {
+ int fthresh;
+
+ /* calculate a threshold */
+ fthresh = fv_p2f(fv, fve->fve_p);
+ if (fve->fve_f > fthresh)
+ fve->fve_state = Red;
+ }
+
+ if (fve->fve_state == Red) {
+ /*
+ * backoff test
+ */
+ if (now.tv_sec - fve->fve_lastdrop.tv_sec > FV_BACKOFFTHRESH) {
+ /* no drop for at least FV_BACKOFFTHRESH sec */
+ fve->fve_p = 0;
+ fve->fve_state = Green;
+#ifdef FV_STATS
+ fv->fv_stats.escape++;
+#endif
+ } else {
+ /* block this flow */
+ flowlist_move_to_head(fv, fve);
+ fve->fve_lastdrop = now;
+#ifdef FV_STATS
+ fv->fv_stats.predrop++;
+#endif
+ return (1);
+ }
+ }
+
+ /*
+ * p = (1 - Wp) * p
+ */
+ fve->fve_p -= FV_PUNSCALE(fve->fve_p);
+ if (fve->fve_p < 0)
+ fve->fve_p = 0;
+#ifdef FV_STATS
+ fv->fv_stats.pass++;
+#endif
+ return (0);
+}
+
+/*
+ * called from red_addq when a packet is dropped by red.
+ * should be called in splimp.
+ */
+static void fv_dropbyred(fv, pktattr, fcache)
+ struct flowvalve *fv;
+ struct altq_pktattr *pktattr;
+ struct fve *fcache;
+{
+ struct fve *fve;
+ struct timeval now;
+
+ if (pktattr == NULL)
+ return;
+ FV_TIMESTAMP(&now);
+
+ if (fcache != NULL)
+ /* the fve of this packet is already cached */
+ fve = fcache;
+ else if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL)
+ fve = flowlist_reclaim(fv, pktattr);
+
+ flowlist_move_to_head(fv, fve);
+
+ /*
+ * update p: the following line cancels the update
+ * in fv_checkflow() and calculate
+ * p = Wp + (1 - Wp) * p
+ */
+ fve->fve_p = (1 << FP_SHIFT) + fve->fve_p;
+
+ fve->fve_lastdrop = now;
+}
+
+#endif /* ALTQ_FLOWVALVE */
+
+#ifdef KLD_MODULE
+
+static struct altqsw red_sw =
+ {"red", redopen, redclose, redioctl};
+
+ALTQ_MODULE(altq_red, ALTQT_RED, &red_sw);
+MODULE_VERSION(altq_red, 1);
+
+#endif /* KLD_MODULE */
+#endif /* ALTQ3_COMPAT */
+
+#endif /* ALTQ_RED */
diff --git a/sys/net/altq/altq_red.h b/sys/net/altq/altq_red.h
new file mode 100644
index 0000000..8ae8d29
--- /dev/null
+++ b/sys/net/altq/altq_red.h
@@ -0,0 +1,199 @@
+/*-
+ * Copyright (C) 1997-2003
+ * Sony Computer Science Laboratories Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $KAME: altq_red.h,v 1.8 2003/07/10 12:07:49 kjc Exp $
+ * $FreeBSD$
+ */
+
+#ifndef _ALTQ_ALTQ_RED_H_
+#define _ALTQ_ALTQ_RED_H_
+
+#include <net/altq/altq_classq.h>
+
+#ifdef ALTQ3_COMPAT
+struct red_interface {
+ char red_ifname[IFNAMSIZ];
+};
+
+struct red_stats {
+ struct red_interface iface;
+ int q_len;
+ int q_avg;
+
+ struct pktcntr xmit_cnt;
+ struct pktcntr drop_cnt;
+ u_int drop_forced;
+ u_int drop_unforced;
+ u_int marked_packets;
+
+ /* static red parameters */
+ int q_limit;
+ int weight;
+ int inv_pmax;
+ int th_min;
+ int th_max;
+
+ /* flowvalve related stuff */
+ u_int fv_flows;
+ u_int fv_pass;
+ u_int fv_predrop;
+ u_int fv_alloc;
+ u_int fv_escape;
+};
+
+struct red_conf {
+ struct red_interface iface;
+ int red_weight; /* weight for EWMA */
+ int red_inv_pmax; /* inverse of max drop probability */
+ int red_thmin; /* red min threshold */
+ int red_thmax; /* red max threshold */
+ int red_limit; /* max queue length */
+ int red_pkttime; /* average packet time in usec */
+ int red_flags; /* see below */
+};
+#endif /* ALTQ3_COMPAT */
+
+/* red flags */
+#define REDF_ECN4 0x01 /* use packet marking for IPv4 packets */
+#define REDF_ECN6 0x02 /* use packet marking for IPv6 packets */
+#define REDF_ECN (REDF_ECN4 | REDF_ECN6)
+#define REDF_FLOWVALVE 0x04 /* use flowvalve (aka penalty-box) */
+
+/*
+ * simpler versions of red parameters and statistics used by other
+ * disciplines (e.g., CBQ)
+ */
+struct redparams {
+ int th_min; /* red min threshold */
+ int th_max; /* red max threshold */
+ int inv_pmax; /* inverse of max drop probability */
+};
+
+struct redstats {
+ int q_avg;
+ struct pktcntr xmit_cnt;
+ struct pktcntr drop_cnt;
+ u_int drop_forced;
+ u_int drop_unforced;
+ u_int marked_packets;
+};
+
+#ifdef ALTQ3_COMPAT
+/*
+ * IOCTLs for RED
+ */
+#define RED_IF_ATTACH _IOW('Q', 1, struct red_interface)
+#define RED_IF_DETACH _IOW('Q', 2, struct red_interface)
+#define RED_ENABLE _IOW('Q', 3, struct red_interface)
+#define RED_DISABLE _IOW('Q', 4, struct red_interface)
+#define RED_CONFIG _IOWR('Q', 6, struct red_conf)
+#define RED_GETSTATS _IOWR('Q', 12, struct red_stats)
+#define RED_SETDEFAULTS _IOW('Q', 30, struct redparams)
+#endif /* ALTQ3_COMPAT */
+
+#ifdef _KERNEL
+
+#ifdef ALTQ3_COMPAT
+struct flowvalve;
+#endif
+
+/* weight table structure for idle time calibration */
+struct wtab {
+ struct wtab *w_next;
+ int w_weight;
+ int w_param_max;
+ int w_refcount;
+ int32_t w_tab[32];
+};
+
+typedef struct red {
+ int red_pkttime; /* average packet time in micro sec
+ used for idle calibration */
+ int red_flags; /* red flags */
+
+ /* red parameters */
+ int red_weight; /* weight for EWMA */
+ int red_inv_pmax; /* inverse of max drop probability */
+ int red_thmin; /* red min threshold */
+ int red_thmax; /* red max threshold */
+
+ /* variables for internal use */
+ int red_wshift; /* log(red_weight) */
+ int red_thmin_s; /* th_min scaled by avgshift */
+ int red_thmax_s; /* th_max scaled by avgshift */
+ int red_probd; /* drop probability denominator */
+
+ int red_avg; /* queue len avg scaled by avgshift */
+ int red_count; /* packet count since last dropped/
+ marked packet */
+ int red_idle; /* queue was empty */
+ int red_old; /* avg is above th_min */
+ struct wtab *red_wtab; /* weight table */
+ struct timeval red_last; /* time when the queue becomes idle */
+
+#ifdef ALTQ3_COMPAT
+ struct flowvalve *red_flowvalve; /* flowvalve state */
+#endif
+
+ struct {
+ struct pktcntr xmit_cnt;
+ struct pktcntr drop_cnt;
+ u_int drop_forced;
+ u_int drop_unforced;
+ u_int marked_packets;
+ } red_stats;
+} red_t;
+
+#ifdef ALTQ3_COMPAT
+typedef struct red_queue {
+ struct red_queue *rq_next; /* next red_state in the list */
+ struct ifaltq *rq_ifq; /* backpointer to ifaltq */
+
+ class_queue_t *rq_q;
+
+ red_t *rq_red;
+} red_queue_t;
+#endif /* ALTQ3_COMPAT */
+
+/* red drop types */
+#define DTYPE_NODROP 0 /* no drop */
+#define DTYPE_FORCED 1 /* a "forced" drop */
+#define DTYPE_EARLY 2 /* an "unforced" (early) drop */
+
+extern red_t *red_alloc(int, int, int, int, int, int);
+extern void red_destroy(red_t *);
+extern void red_getstats(red_t *, struct redstats *);
+extern int red_addq(red_t *, class_queue_t *, struct mbuf *,
+ struct altq_pktattr *);
+extern struct mbuf *red_getq(red_t *, class_queue_t *);
+extern int drop_early(int, int, int);
+extern int mark_ecn(struct mbuf *, struct altq_pktattr *, int);
+extern struct wtab *wtab_alloc(int);
+extern int wtab_destroy(struct wtab *);
+extern int32_t pow_w(struct wtab *, int);
+
+#endif /* _KERNEL */
+
+#endif /* _ALTQ_ALTQ_RED_H_ */
diff --git a/sys/net/altq/altq_rio.c b/sys/net/altq/altq_rio.c
new file mode 100644
index 0000000..ccf0e8d
--- /dev/null
+++ b/sys/net/altq/altq_rio.c
@@ -0,0 +1,852 @@
+/* $FreeBSD$ */
+/* $KAME: altq_rio.c,v 1.17 2003/07/10 12:07:49 kjc Exp $ */
+
+/*
+ * Copyright (C) 1998-2003
+ * Sony Computer Science Laboratories Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+/*
+ * Copyright (c) 1990-1994 Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the Computer Systems
+ * Engineering Group at Lawrence Berkeley Laboratory.
+ * 4. Neither the name of the University nor of the Laboratory may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(__FreeBSD__) || defined(__NetBSD__)
+#include "opt_altq.h"
+#include "opt_inet.h"
+#ifdef __FreeBSD__
+#include "opt_inet6.h"
+#endif
+#endif /* __FreeBSD__ || __NetBSD__ */
+#ifdef ALTQ_RIO /* rio is enabled by ALTQ_RIO option in opt_altq.h */
+
+#include <sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/systm.h>
+#include <sys/errno.h>
+#if 1 /* ALTQ3_COMPAT */
+#include <sys/proc.h>
+#include <sys/sockio.h>
+#include <sys/kernel.h>
+#endif
+
+#include <net/if.h>
+#include <net/if_var.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#ifdef INET6
+#include <netinet/ip6.h>
+#endif
+
+#include <netpfil/pf/pf.h>
+#include <netpfil/pf/pf_altq.h>
+#include <net/altq/altq.h>
+#include <net/altq/altq_cdnr.h>
+#include <net/altq/altq_red.h>
+#include <net/altq/altq_rio.h>
+#ifdef ALTQ3_COMPAT
+#include <net/altq/altq_conf.h>
+#endif
+
+/*
+ * RIO: RED with IN/OUT bit
+ * described in
+ * "Explicit Allocation of Best Effort Packet Delivery Service"
+ * David D. Clark and Wenjia Fang, MIT Lab for Computer Science
+ * http://diffserv.lcs.mit.edu/Papers/exp-alloc-ddc-wf.{ps,pdf}
+ *
+ * this implementation is extended to support more than 2 drop precedence
+ * values as described in RFC2597 (Assured Forwarding PHB Group).
+ *
+ */
+/*
+ * AF DS (differentiated service) codepoints.
+ * (classes can be mapped to CBQ or H-FSC classes.)
+ *
+ * 0 1 2 3 4 5 6 7
+ * +---+---+---+---+---+---+---+---+
+ * | CLASS |DropPre| 0 | CU |
+ * +---+---+---+---+---+---+---+---+
+ *
+ * class 1: 001
+ * class 2: 010
+ * class 3: 011
+ * class 4: 100
+ *
+ * low drop prec: 01
+ * medium drop prec: 10
+ * high drop prec: 01
+ */
+
+/* normal red parameters */
+#define W_WEIGHT 512 /* inverse of weight of EWMA (511/512) */
+ /* q_weight = 0.00195 */
+
+/* red parameters for a slow link */
+#define W_WEIGHT_1 128 /* inverse of weight of EWMA (127/128) */
+ /* q_weight = 0.0078125 */
+
+/* red parameters for a very slow link (e.g., dialup) */
+#define W_WEIGHT_2 64 /* inverse of weight of EWMA (63/64) */
+ /* q_weight = 0.015625 */
+
+/* fixed-point uses 12-bit decimal places */
+#define FP_SHIFT 12 /* fixed-point shift */
+
+/* red parameters for drop probability */
+#define INV_P_MAX 10 /* inverse of max drop probability */
+#define TH_MIN 5 /* min threshold */
+#define TH_MAX 15 /* max threshold */
+
+#define RIO_LIMIT 60 /* default max queue lenght */
+#define RIO_STATS /* collect statistics */
+
+#define TV_DELTA(a, b, delta) { \
+ register int xxs; \
+ \
+ delta = (a)->tv_usec - (b)->tv_usec; \
+ if ((xxs = (a)->tv_sec - (b)->tv_sec) != 0) { \
+ if (xxs < 0) { \
+ delta = 60000000; \
+ } else if (xxs > 4) { \
+ if (xxs > 60) \
+ delta = 60000000; \
+ else \
+ delta += xxs * 1000000; \
+ } else while (xxs > 0) { \
+ delta += 1000000; \
+ xxs--; \
+ } \
+ } \
+}
+
+#ifdef ALTQ3_COMPAT
+/* rio_list keeps all rio_queue_t's allocated. */
+static rio_queue_t *rio_list = NULL;
+#endif
+/* default rio parameter values */
+static struct redparams default_rio_params[RIO_NDROPPREC] = {
+ /* th_min, th_max, inv_pmax */
+ { TH_MAX * 2 + TH_MIN, TH_MAX * 3, INV_P_MAX }, /* low drop precedence */
+ { TH_MAX + TH_MIN, TH_MAX * 2, INV_P_MAX }, /* medium drop precedence */
+ { TH_MIN, TH_MAX, INV_P_MAX } /* high drop precedence */
+};
+
+/* internal function prototypes */
+static int dscp2index(u_int8_t);
+#ifdef ALTQ3_COMPAT
+static int rio_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
+static struct mbuf *rio_dequeue(struct ifaltq *, int);
+static int rio_request(struct ifaltq *, int, void *);
+static int rio_detach(rio_queue_t *);
+
+/*
+ * rio device interface
+ */
+altqdev_decl(rio);
+
+#endif /* ALTQ3_COMPAT */
+
+rio_t *
+rio_alloc(int weight, struct redparams *params, int flags, int pkttime)
+{
+ rio_t *rp;
+ int w, i;
+ int npkts_per_sec;
+
+ rp = malloc(sizeof(rio_t), M_DEVBUF, M_NOWAIT | M_ZERO);
+ if (rp == NULL)
+ return (NULL);
+
+ rp->rio_flags = flags;
+ if (pkttime == 0)
+ /* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */
+ rp->rio_pkttime = 800;
+ else
+ rp->rio_pkttime = pkttime;
+
+ if (weight != 0)
+ rp->rio_weight = weight;
+ else {
+ /* use default */
+ rp->rio_weight = W_WEIGHT;
+
+ /* when the link is very slow, adjust red parameters */
+ npkts_per_sec = 1000000 / rp->rio_pkttime;
+ if (npkts_per_sec < 50) {
+ /* up to about 400Kbps */
+ rp->rio_weight = W_WEIGHT_2;
+ } else if (npkts_per_sec < 300) {
+ /* up to about 2.4Mbps */
+ rp->rio_weight = W_WEIGHT_1;
+ }
+ }
+
+ /* calculate wshift. weight must be power of 2 */
+ w = rp->rio_weight;
+ for (i = 0; w > 1; i++)
+ w = w >> 1;
+ rp->rio_wshift = i;
+ w = 1 << rp->rio_wshift;
+ if (w != rp->rio_weight) {
+ printf("invalid weight value %d for red! use %d\n",
+ rp->rio_weight, w);
+ rp->rio_weight = w;
+ }
+
+ /* allocate weight table */
+ rp->rio_wtab = wtab_alloc(rp->rio_weight);
+
+ for (i = 0; i < RIO_NDROPPREC; i++) {
+ struct dropprec_state *prec = &rp->rio_precstate[i];
+
+ prec->avg = 0;
+ prec->idle = 1;
+
+ if (params == NULL || params[i].inv_pmax == 0)
+ prec->inv_pmax = default_rio_params[i].inv_pmax;
+ else
+ prec->inv_pmax = params[i].inv_pmax;
+ if (params == NULL || params[i].th_min == 0)
+ prec->th_min = default_rio_params[i].th_min;
+ else
+ prec->th_min = params[i].th_min;
+ if (params == NULL || params[i].th_max == 0)
+ prec->th_max = default_rio_params[i].th_max;
+ else
+ prec->th_max = params[i].th_max;
+
+ /*
+ * th_min_s and th_max_s are scaled versions of th_min
+ * and th_max to be compared with avg.
+ */
+ prec->th_min_s = prec->th_min << (rp->rio_wshift + FP_SHIFT);
+ prec->th_max_s = prec->th_max << (rp->rio_wshift + FP_SHIFT);
+
+ /*
+ * precompute probability denominator
+ * probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point
+ */
+ prec->probd = (2 * (prec->th_max - prec->th_min)
+ * prec->inv_pmax) << FP_SHIFT;
+
+ microtime(&prec->last);
+ }
+
+ return (rp);
+}
+
+void
+rio_destroy(rio_t *rp)
+{
+ wtab_destroy(rp->rio_wtab);
+ free(rp, M_DEVBUF);
+}
+
+void
+rio_getstats(rio_t *rp, struct redstats *sp)
+{
+ int i;
+
+ for (i = 0; i < RIO_NDROPPREC; i++) {
+ bcopy(&rp->q_stats[i], sp, sizeof(struct redstats));
+ sp->q_avg = rp->rio_precstate[i].avg >> rp->rio_wshift;
+ sp++;
+ }
+}
+
+#if (RIO_NDROPPREC == 3)
+/*
+ * internally, a drop precedence value is converted to an index
+ * starting from 0.
+ */
+static int
+dscp2index(u_int8_t dscp)
+{
+ int dpindex = dscp & AF_DROPPRECMASK;
+
+ if (dpindex == 0)
+ return (0);
+ return ((dpindex >> 3) - 1);
+}
+#endif
+
+#if 1
+/*
+ * kludge: when a packet is dequeued, we need to know its drop precedence
+ * in order to keep the queue length of each drop precedence.
+ * use m_pkthdr.rcvif to pass this info.
+ */
+#define RIOM_SET_PRECINDEX(m, idx) \
+ do { (m)->m_pkthdr.rcvif = (void *)((long)(idx)); } while (0)
+#define RIOM_GET_PRECINDEX(m) \
+ ({ long idx; idx = (long)((m)->m_pkthdr.rcvif); \
+ (m)->m_pkthdr.rcvif = NULL; idx; })
+#endif
+
+int
+rio_addq(rio_t *rp, class_queue_t *q, struct mbuf *m,
+ struct altq_pktattr *pktattr)
+{
+ int avg, droptype;
+ u_int8_t dsfield, odsfield;
+ int dpindex, i, n, t;
+ struct timeval now;
+ struct dropprec_state *prec;
+
+ dsfield = odsfield = read_dsfield(m, pktattr);
+ dpindex = dscp2index(dsfield);
+
+ /*
+ * update avg of the precedence states whose drop precedence
+ * is larger than or equal to the drop precedence of the packet
+ */
+ now.tv_sec = 0;
+ for (i = dpindex; i < RIO_NDROPPREC; i++) {
+ prec = &rp->rio_precstate[i];
+ avg = prec->avg;
+ if (prec->idle) {
+ prec->idle = 0;
+ if (now.tv_sec == 0)
+ microtime(&now);
+ t = (now.tv_sec - prec->last.tv_sec);
+ if (t > 60)
+ avg = 0;
+ else {
+ t = t * 1000000 +
+ (now.tv_usec - prec->last.tv_usec);
+ n = t / rp->rio_pkttime;
+ /* calculate (avg = (1 - Wq)^n * avg) */
+ if (n > 0)
+ avg = (avg >> FP_SHIFT) *
+ pow_w(rp->rio_wtab, n);
+ }
+ }
+
+ /* run estimator. (avg is scaled by WEIGHT in fixed-point) */
+ avg += (prec->qlen << FP_SHIFT) - (avg >> rp->rio_wshift);
+ prec->avg = avg; /* save the new value */
+ /*
+ * count keeps a tally of arriving traffic that has not
+ * been dropped.
+ */
+ prec->count++;
+ }
+
+ prec = &rp->rio_precstate[dpindex];
+ avg = prec->avg;
+
+ /* see if we drop early */
+ droptype = DTYPE_NODROP;
+ if (avg >= prec->th_min_s && prec->qlen > 1) {
+ if (avg >= prec->th_max_s) {
+ /* avg >= th_max: forced drop */
+ droptype = DTYPE_FORCED;
+ } else if (prec->old == 0) {
+ /* first exceeds th_min */
+ prec->count = 1;
+ prec->old = 1;
+ } else if (drop_early((avg - prec->th_min_s) >> rp->rio_wshift,
+ prec->probd, prec->count)) {
+ /* unforced drop by red */
+ droptype = DTYPE_EARLY;
+ }
+ } else {
+ /* avg < th_min */
+ prec->old = 0;
+ }
+
+ /*
+ * if the queue length hits the hard limit, it's a forced drop.
+ */
+ if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q))
+ droptype = DTYPE_FORCED;
+
+ if (droptype != DTYPE_NODROP) {
+ /* always drop incoming packet (as opposed to randomdrop) */
+ for (i = dpindex; i < RIO_NDROPPREC; i++)
+ rp->rio_precstate[i].count = 0;
+#ifdef RIO_STATS
+ if (droptype == DTYPE_EARLY)
+ rp->q_stats[dpindex].drop_unforced++;
+ else
+ rp->q_stats[dpindex].drop_forced++;
+ PKTCNTR_ADD(&rp->q_stats[dpindex].drop_cnt, m_pktlen(m));
+#endif
+ m_freem(m);
+ return (-1);
+ }
+
+ for (i = dpindex; i < RIO_NDROPPREC; i++)
+ rp->rio_precstate[i].qlen++;
+
+ /* save drop precedence index in mbuf hdr */
+ RIOM_SET_PRECINDEX(m, dpindex);
+
+ if (rp->rio_flags & RIOF_CLEARDSCP)
+ dsfield &= ~DSCP_MASK;
+
+ if (dsfield != odsfield)
+ write_dsfield(m, pktattr, dsfield);
+
+ _addq(q, m);
+
+#ifdef RIO_STATS
+ PKTCNTR_ADD(&rp->q_stats[dpindex].xmit_cnt, m_pktlen(m));
+#endif
+ return (0);
+}
+
+struct mbuf *
+rio_getq(rio_t *rp, class_queue_t *q)
+{
+ struct mbuf *m;
+ int dpindex, i;
+
+ if ((m = _getq(q)) == NULL)
+ return NULL;
+
+ dpindex = RIOM_GET_PRECINDEX(m);
+ for (i = dpindex; i < RIO_NDROPPREC; i++) {
+ if (--rp->rio_precstate[i].qlen == 0) {
+ if (rp->rio_precstate[i].idle == 0) {
+ rp->rio_precstate[i].idle = 1;
+ microtime(&rp->rio_precstate[i].last);
+ }
+ }
+ }
+ return (m);
+}
+
+#ifdef ALTQ3_COMPAT
+int
+rioopen(dev, flag, fmt, p)
+ dev_t dev;
+ int flag, fmt;
+#if (__FreeBSD_version > 500000)
+ struct thread *p;
+#else
+ struct proc *p;
+#endif
+{
+ /* everything will be done when the queueing scheme is attached. */
+ return 0;
+}
+
+int
+rioclose(dev, flag, fmt, p)
+ dev_t dev;
+ int flag, fmt;
+#if (__FreeBSD_version > 500000)
+ struct thread *p;
+#else
+ struct proc *p;
+#endif
+{
+ rio_queue_t *rqp;
+ int err, error = 0;
+
+ while ((rqp = rio_list) != NULL) {
+ /* destroy all */
+ err = rio_detach(rqp);
+ if (err != 0 && error == 0)
+ error = err;
+ }
+
+ return error;
+}
+
+int
+rioioctl(dev, cmd, addr, flag, p)
+ dev_t dev;
+ ioctlcmd_t cmd;
+ caddr_t addr;
+ int flag;
+#if (__FreeBSD_version > 500000)
+ struct thread *p;
+#else
+ struct proc *p;
+#endif
+{
+ rio_queue_t *rqp;
+ struct rio_interface *ifacep;
+ struct ifnet *ifp;
+ int error = 0;
+
+ /* check super-user privilege */
+ switch (cmd) {
+ case RIO_GETSTATS:
+ break;
+ default:
+#if (__FreeBSD_version > 700000)
+ if ((error = priv_check(p, PRIV_ALTQ_MANAGE)) != 0)
+ return (error);
+#elsif (__FreeBSD_version > 400000)
+ if ((error = suser(p)) != 0)
+ return (error);
+#else
+ if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
+ return (error);
+#endif
+ break;
+ }
+
+ switch (cmd) {
+
+ case RIO_ENABLE:
+ ifacep = (struct rio_interface *)addr;
+ if ((rqp = altq_lookup(ifacep->rio_ifname, ALTQT_RIO)) == NULL) {
+ error = EBADF;
+ break;
+ }
+ error = altq_enable(rqp->rq_ifq);
+ break;
+
+ case RIO_DISABLE:
+ ifacep = (struct rio_interface *)addr;
+ if ((rqp = altq_lookup(ifacep->rio_ifname, ALTQT_RIO)) == NULL) {
+ error = EBADF;
+ break;
+ }
+ error = altq_disable(rqp->rq_ifq);
+ break;
+
+ case RIO_IF_ATTACH:
+ ifp = ifunit(((struct rio_interface *)addr)->rio_ifname);
+ if (ifp == NULL) {
+ error = ENXIO;
+ break;
+ }
+
+ /* allocate and initialize rio_queue_t */
+ rqp = malloc(sizeof(rio_queue_t), M_DEVBUF, M_WAITOK);
+ if (rqp == NULL) {
+ error = ENOMEM;
+ break;
+ }
+ bzero(rqp, sizeof(rio_queue_t));
+
+ rqp->rq_q = malloc(sizeof(class_queue_t),
+ M_DEVBUF, M_WAITOK);
+ if (rqp->rq_q == NULL) {
+ free(rqp, M_DEVBUF);
+ error = ENOMEM;
+ break;
+ }
+ bzero(rqp->rq_q, sizeof(class_queue_t));
+
+ rqp->rq_rio = rio_alloc(0, NULL, 0, 0);
+ if (rqp->rq_rio == NULL) {
+ free(rqp->rq_q, M_DEVBUF);
+ free(rqp, M_DEVBUF);
+ error = ENOMEM;
+ break;
+ }
+
+ rqp->rq_ifq = &ifp->if_snd;
+ qtail(rqp->rq_q) = NULL;
+ qlen(rqp->rq_q) = 0;
+ qlimit(rqp->rq_q) = RIO_LIMIT;
+ qtype(rqp->rq_q) = Q_RIO;
+
+ /*
+ * set RIO to this ifnet structure.
+ */
+ error = altq_attach(rqp->rq_ifq, ALTQT_RIO, rqp,
+ rio_enqueue, rio_dequeue, rio_request,
+ NULL, NULL);
+ if (error) {
+ rio_destroy(rqp->rq_rio);
+ free(rqp->rq_q, M_DEVBUF);
+ free(rqp, M_DEVBUF);
+ break;
+ }
+
+ /* add this state to the rio list */
+ rqp->rq_next = rio_list;
+ rio_list = rqp;
+ break;
+
+ case RIO_IF_DETACH:
+ ifacep = (struct rio_interface *)addr;
+ if ((rqp = altq_lookup(ifacep->rio_ifname, ALTQT_RIO)) == NULL) {
+ error = EBADF;
+ break;
+ }
+ error = rio_detach(rqp);
+ break;
+
+ case RIO_GETSTATS:
+ do {
+ struct rio_stats *q_stats;
+ rio_t *rp;
+ int i;
+
+ q_stats = (struct rio_stats *)addr;
+ if ((rqp = altq_lookup(q_stats->iface.rio_ifname,
+ ALTQT_RIO)) == NULL) {
+ error = EBADF;
+ break;
+ }
+
+ rp = rqp->rq_rio;
+
+ q_stats->q_limit = qlimit(rqp->rq_q);
+ q_stats->weight = rp->rio_weight;
+ q_stats->flags = rp->rio_flags;
+
+ for (i = 0; i < RIO_NDROPPREC; i++) {
+ q_stats->q_len[i] = rp->rio_precstate[i].qlen;
+ bcopy(&rp->q_stats[i], &q_stats->q_stats[i],
+ sizeof(struct redstats));
+ q_stats->q_stats[i].q_avg =
+ rp->rio_precstate[i].avg >> rp->rio_wshift;
+
+ q_stats->q_params[i].inv_pmax
+ = rp->rio_precstate[i].inv_pmax;
+ q_stats->q_params[i].th_min
+ = rp->rio_precstate[i].th_min;
+ q_stats->q_params[i].th_max
+ = rp->rio_precstate[i].th_max;
+ }
+ } while (/*CONSTCOND*/ 0);
+ break;
+
+ case RIO_CONFIG:
+ do {
+ struct rio_conf *fc;
+ rio_t *new;
+ int s, limit, i;
+
+ fc = (struct rio_conf *)addr;
+ if ((rqp = altq_lookup(fc->iface.rio_ifname,
+ ALTQT_RIO)) == NULL) {
+ error = EBADF;
+ break;
+ }
+
+ new = rio_alloc(fc->rio_weight, &fc->q_params[0],
+ fc->rio_flags, fc->rio_pkttime);
+ if (new == NULL) {
+ error = ENOMEM;
+ break;
+ }
+
+#ifdef __NetBSD__
+ s = splnet();
+#else
+ s = splimp();
+#endif
+ _flushq(rqp->rq_q);
+ limit = fc->rio_limit;
+ if (limit < fc->q_params[RIO_NDROPPREC-1].th_max)
+ limit = fc->q_params[RIO_NDROPPREC-1].th_max;
+ qlimit(rqp->rq_q) = limit;
+
+ rio_destroy(rqp->rq_rio);
+ rqp->rq_rio = new;
+
+ splx(s);
+
+ /* write back new values */
+ fc->rio_limit = limit;
+ for (i = 0; i < RIO_NDROPPREC; i++) {
+ fc->q_params[i].inv_pmax =
+ rqp->rq_rio->rio_precstate[i].inv_pmax;
+ fc->q_params[i].th_min =
+ rqp->rq_rio->rio_precstate[i].th_min;
+ fc->q_params[i].th_max =
+ rqp->rq_rio->rio_precstate[i].th_max;
+ }
+ } while (/*CONSTCOND*/ 0);
+ break;
+
+ case RIO_SETDEFAULTS:
+ do {
+ struct redparams *rp;
+ int i;
+
+ rp = (struct redparams *)addr;
+ for (i = 0; i < RIO_NDROPPREC; i++)
+ default_rio_params[i] = rp[i];
+ } while (/*CONSTCOND*/ 0);
+ break;
+
+ default:
+ error = EINVAL;
+ break;
+ }
+
+ return error;
+}
+
+static int
+rio_detach(rqp)
+ rio_queue_t *rqp;
+{
+ rio_queue_t *tmp;
+ int error = 0;
+
+ if (ALTQ_IS_ENABLED(rqp->rq_ifq))
+ altq_disable(rqp->rq_ifq);
+
+ if ((error = altq_detach(rqp->rq_ifq)))
+ return (error);
+
+ if (rio_list == rqp)
+ rio_list = rqp->rq_next;
+ else {
+ for (tmp = rio_list; tmp != NULL; tmp = tmp->rq_next)
+ if (tmp->rq_next == rqp) {
+ tmp->rq_next = rqp->rq_next;
+ break;
+ }
+ if (tmp == NULL)
+ printf("rio_detach: no state found in rio_list!\n");
+ }
+
+ rio_destroy(rqp->rq_rio);
+ free(rqp->rq_q, M_DEVBUF);
+ free(rqp, M_DEVBUF);
+ return (error);
+}
+
+/*
+ * rio support routines
+ */
+static int
+rio_request(ifq, req, arg)
+ struct ifaltq *ifq;
+ int req;
+ void *arg;
+{
+ rio_queue_t *rqp = (rio_queue_t *)ifq->altq_disc;
+
+ IFQ_LOCK_ASSERT(ifq);
+
+ switch (req) {
+ case ALTRQ_PURGE:
+ _flushq(rqp->rq_q);
+ if (ALTQ_IS_ENABLED(ifq))
+ ifq->ifq_len = 0;
+ break;
+ }
+ return (0);
+}
+
+/*
+ * enqueue routine:
+ *
+ * returns: 0 when successfully queued.
+ * ENOBUFS when drop occurs.
+ */
+static int
+rio_enqueue(ifq, m, pktattr)
+ struct ifaltq *ifq;
+ struct mbuf *m;
+ struct altq_pktattr *pktattr;
+{
+ rio_queue_t *rqp = (rio_queue_t *)ifq->altq_disc;
+ int error = 0;
+
+ IFQ_LOCK_ASSERT(ifq);
+
+ if (rio_addq(rqp->rq_rio, rqp->rq_q, m, pktattr) == 0)
+ ifq->ifq_len++;
+ else
+ error = ENOBUFS;
+ return error;
+}
+
+/*
+ * dequeue routine:
+ * must be called in splimp.
+ *
+ * returns: mbuf dequeued.
+ * NULL when no packet is available in the queue.
+ */
+
+static struct mbuf *
+rio_dequeue(ifq, op)
+ struct ifaltq *ifq;
+ int op;
+{
+ rio_queue_t *rqp = (rio_queue_t *)ifq->altq_disc;
+ struct mbuf *m = NULL;
+
+ IFQ_LOCK_ASSERT(ifq);
+
+ if (op == ALTDQ_POLL)
+ return qhead(rqp->rq_q);
+
+ m = rio_getq(rqp->rq_rio, rqp->rq_q);
+ if (m != NULL)
+ ifq->ifq_len--;
+ return m;
+}
+
+#ifdef KLD_MODULE
+
+static struct altqsw rio_sw =
+ {"rio", rioopen, rioclose, rioioctl};
+
+ALTQ_MODULE(altq_rio, ALTQT_RIO, &rio_sw);
+MODULE_VERSION(altq_rio, 1);
+MODULE_DEPEND(altq_rio, altq_red, 1, 1, 1);
+
+#endif /* KLD_MODULE */
+#endif /* ALTQ3_COMPAT */
+
+#endif /* ALTQ_RIO */
diff --git a/sys/net/altq/altq_rio.h b/sys/net/altq/altq_rio.h
new file mode 100644
index 0000000..ce9dc0e
--- /dev/null
+++ b/sys/net/altq/altq_rio.h
@@ -0,0 +1,145 @@
+/*-
+ * Copyright (C) 1998-2003
+ * Sony Computer Science Laboratories Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $KAME: altq_rio.h,v 1.9 2003/07/10 12:07:49 kjc Exp $
+ * $FreeBSD$
+ */
+
+#ifndef _ALTQ_ALTQ_RIO_H_
+#define _ALTQ_ALTQ_RIO_H_
+
+#include <net/altq/altq_classq.h>
+
+/*
+ * RIO: RED with IN/OUT bit
+ * (extended to support more than 2 drop precedence values)
+ */
+#define RIO_NDROPPREC 3 /* number of drop precedence values */
+
+#ifdef ALTQ3_COMPAT
+struct rio_interface {
+ char rio_ifname[IFNAMSIZ];
+};
+
+struct rio_stats {
+ struct rio_interface iface;
+ int q_len[RIO_NDROPPREC];
+ struct redstats q_stats[RIO_NDROPPREC];
+
+ /* static red parameters */
+ int q_limit;
+ int weight;
+ int flags;
+ struct redparams q_params[RIO_NDROPPREC];
+};
+
+struct rio_conf {
+ struct rio_interface iface;
+ struct redparams q_params[RIO_NDROPPREC];
+ int rio_weight; /* weight for EWMA */
+ int rio_limit; /* max queue length */
+ int rio_pkttime; /* average packet time in usec */
+ int rio_flags; /* see below */
+};
+#endif /* ALTQ3_COMPAT */
+
+/* rio flags */
+#define RIOF_ECN4 0x01 /* use packet marking for IPv4 packets */
+#define RIOF_ECN6 0x02 /* use packet marking for IPv6 packets */
+#define RIOF_ECN (RIOF_ECN4 | RIOF_ECN6)
+#define RIOF_CLEARDSCP 0x200 /* clear diffserv codepoint */
+
+#ifdef ALTQ3_COMPAT
+/*
+ * IOCTLs for RIO
+ */
+#define RIO_IF_ATTACH _IOW('Q', 1, struct rio_interface)
+#define RIO_IF_DETACH _IOW('Q', 2, struct rio_interface)
+#define RIO_ENABLE _IOW('Q', 3, struct rio_interface)
+#define RIO_DISABLE _IOW('Q', 4, struct rio_interface)
+#define RIO_CONFIG _IOWR('Q', 6, struct rio_conf)
+#define RIO_GETSTATS _IOWR('Q', 12, struct rio_stats)
+#define RIO_SETDEFAULTS _IOW('Q', 30, struct redparams[RIO_NDROPPREC])
+#endif /* ALTQ3_COMPAT */
+
+#ifdef _KERNEL
+
+typedef struct rio {
+ /* per drop precedence structure */
+ struct dropprec_state {
+ /* red parameters */
+ int inv_pmax; /* inverse of max drop probability */
+ int th_min; /* red min threshold */
+ int th_max; /* red max threshold */
+
+ /* variables for internal use */
+ int th_min_s; /* th_min scaled by avgshift */
+ int th_max_s; /* th_max scaled by avgshift */
+ int probd; /* drop probability denominator */
+
+ int qlen; /* queue length */
+ int avg; /* (scaled) queue length average */
+ int count; /* packet count since the last dropped/
+ marked packet */
+ int idle; /* queue was empty */
+ int old; /* avg is above th_min */
+ struct timeval last; /* timestamp when queue becomes idle */
+ } rio_precstate[RIO_NDROPPREC];
+
+ int rio_wshift; /* log(red_weight) */
+ int rio_weight; /* weight for EWMA */
+ struct wtab *rio_wtab; /* weight table */
+
+ int rio_pkttime; /* average packet time in micro sec
+ used for idle calibration */
+ int rio_flags; /* rio flags */
+
+ u_int8_t rio_codepoint; /* codepoint value to tag packets */
+ u_int8_t rio_codepointmask; /* codepoint mask bits */
+
+ struct redstats q_stats[RIO_NDROPPREC]; /* statistics */
+} rio_t;
+
+#ifdef ALTQ3_COMPAT
+typedef struct rio_queue {
+ struct rio_queue *rq_next; /* next red_state in the list */
+ struct ifaltq *rq_ifq; /* backpointer to ifaltq */
+
+ class_queue_t *rq_q;
+
+ rio_t *rq_rio;
+} rio_queue_t;
+#endif /* ALTQ3_COMPAT */
+
+extern rio_t *rio_alloc(int, struct redparams *, int, int);
+extern void rio_destroy(rio_t *);
+extern void rio_getstats(rio_t *, struct redstats *);
+extern int rio_addq(rio_t *, class_queue_t *, struct mbuf *,
+ struct altq_pktattr *);
+extern struct mbuf *rio_getq(rio_t *, class_queue_t *);
+
+#endif /* _KERNEL */
+
+#endif /* _ALTQ_ALTQ_RIO_H_ */
diff --git a/sys/net/altq/altq_rmclass.c b/sys/net/altq/altq_rmclass.c
new file mode 100644
index 0000000..b5e23f7
--- /dev/null
+++ b/sys/net/altq/altq_rmclass.c
@@ -0,0 +1,1836 @@
+/* $FreeBSD$ */
+/* $KAME: altq_rmclass.c,v 1.19 2005/04/13 03:44:25 suz Exp $ */
+
+/*
+ * Copyright (c) 1991-1997 Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the Network Research
+ * Group at Lawrence Berkeley Laboratory.
+ * 4. Neither the name of the University nor of the Laboratory may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * LBL code modified by speer@eng.sun.com, May 1977.
+ * For questions and/or comments, please send mail to cbq@ee.lbl.gov
+ *
+ * @(#)rm_class.c 1.48 97/12/05 SMI
+ */
+#if defined(__FreeBSD__) || defined(__NetBSD__)
+#include "opt_altq.h"
+#include "opt_inet.h"
+#ifdef __FreeBSD__
+#include "opt_inet6.h"
+#endif
+#endif /* __FreeBSD__ || __NetBSD__ */
+#ifdef ALTQ_CBQ /* cbq is enabled by ALTQ_CBQ option in opt_altq.h */
+
+#include <sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/systm.h>
+#include <sys/errno.h>
+#include <sys/time.h>
+#ifdef ALTQ3_COMPAT
+#include <sys/kernel.h>
+#endif
+
+#include <net/if.h>
+#include <net/if_var.h>
+#ifdef ALTQ3_COMPAT
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#endif
+
+#include <net/altq/if_altq.h>
+#include <net/altq/altq.h>
+#include <net/altq/altq_rmclass.h>
+#include <net/altq/altq_rmclass_debug.h>
+#include <net/altq/altq_red.h>
+#include <net/altq/altq_rio.h>
+
+/*
+ * Local Macros
+ */
+
+#define reset_cutoff(ifd) { ifd->cutoff_ = RM_MAXDEPTH; }
+
+/*
+ * Local routines.
+ */
+
+static int rmc_satisfied(struct rm_class *, struct timeval *);
+static void rmc_wrr_set_weights(struct rm_ifdat *);
+static void rmc_depth_compute(struct rm_class *);
+static void rmc_depth_recompute(rm_class_t *);
+
+static mbuf_t *_rmc_wrr_dequeue_next(struct rm_ifdat *, int);
+static mbuf_t *_rmc_prr_dequeue_next(struct rm_ifdat *, int);
+
+static int _rmc_addq(rm_class_t *, mbuf_t *);
+static void _rmc_dropq(rm_class_t *);
+static mbuf_t *_rmc_getq(rm_class_t *);
+static mbuf_t *_rmc_pollq(rm_class_t *);
+
+static int rmc_under_limit(struct rm_class *, struct timeval *);
+static void rmc_tl_satisfied(struct rm_ifdat *, struct timeval *);
+static void rmc_drop_action(struct rm_class *);
+static void rmc_restart(struct rm_class *);
+static void rmc_root_overlimit(struct rm_class *, struct rm_class *);
+
+#define BORROW_OFFTIME
+/*
+ * BORROW_OFFTIME (experimental):
+ * borrow the offtime of the class borrowing from.
+ * the reason is that when its own offtime is set, the class is unable
+ * to borrow much, especially when cutoff is taking effect.
+ * but when the borrowed class is overloaded (advidle is close to minidle),
+ * use the borrowing class's offtime to avoid overload.
+ */
+#define ADJUST_CUTOFF
+/*
+ * ADJUST_CUTOFF (experimental):
+ * if no underlimit class is found due to cutoff, increase cutoff and
+ * retry the scheduling loop.
+ * also, don't invoke delay_actions while cutoff is taking effect,
+ * since a sleeping class won't have a chance to be scheduled in the
+ * next loop.
+ *
+ * now heuristics for setting the top-level variable (cutoff_) becomes:
+ * 1. if a packet arrives for a not-overlimit class, set cutoff
+ * to the depth of the class.
+ * 2. if cutoff is i, and a packet arrives for an overlimit class
+ * with an underlimit ancestor at a lower level than i (say j),
+ * then set cutoff to j.
+ * 3. at scheduling a packet, if there is no underlimit class
+ * due to the current cutoff level, increase cutoff by 1 and
+ * then try to schedule again.
+ */
+
+/*
+ * rm_class_t *
+ * rmc_newclass(...) - Create a new resource management class at priority
+ * 'pri' on the interface given by 'ifd'.
+ *
+ * nsecPerByte is the data rate of the interface in nanoseconds/byte.
+ * E.g., 800 for a 10Mb/s ethernet. If the class gets less
+ * than 100% of the bandwidth, this number should be the
+ * 'effective' rate for the class. Let f be the
+ * bandwidth fraction allocated to this class, and let
+ * nsPerByte be the data rate of the output link in
+ * nanoseconds/byte. Then nsecPerByte is set to
+ * nsPerByte / f. E.g., 1600 (= 800 / .5)
+ * for a class that gets 50% of an ethernet's bandwidth.
+ *
+ * action the routine to call when the class is over limit.
+ *
+ * maxq max allowable queue size for class (in packets).
+ *
+ * parent parent class pointer.
+ *
+ * borrow class to borrow from (should be either 'parent' or null).
+ *
+ * maxidle max value allowed for class 'idle' time estimate (this
+ * parameter determines how large an initial burst of packets
+ * can be before overlimit action is invoked.
+ *
+ * offtime how long 'delay' action will delay when class goes over
+ * limit (this parameter determines the steady-state burst
+ * size when a class is running over its limit).
+ *
+ * Maxidle and offtime have to be computed from the following: If the
+ * average packet size is s, the bandwidth fraction allocated to this
+ * class is f, we want to allow b packet bursts, and the gain of the
+ * averaging filter is g (= 1 - 2^(-RM_FILTER_GAIN)), then:
+ *
+ * ptime = s * nsPerByte * (1 - f) / f
+ * maxidle = ptime * (1 - g^b) / g^b
+ * minidle = -ptime * (1 / (f - 1))
+ * offtime = ptime * (1 + 1/(1 - g) * (1 - g^(b - 1)) / g^(b - 1)
+ *
+ * Operationally, it's convenient to specify maxidle & offtime in units
+ * independent of the link bandwidth so the maxidle & offtime passed to
+ * this routine are the above values multiplied by 8*f/(1000*nsPerByte).
+ * (The constant factor is a scale factor needed to make the parameters
+ * integers. This scaling also means that the 'unscaled' values of
+ * maxidle*nsecPerByte/8 and offtime*nsecPerByte/8 will be in microseconds,
+ * not nanoseconds.) Also note that the 'idle' filter computation keeps
+ * an estimate scaled upward by 2^RM_FILTER_GAIN so the passed value of
+ * maxidle also must be scaled upward by this value. Thus, the passed
+ * values for maxidle and offtime can be computed as follows:
+ *
+ * maxidle = maxidle * 2^RM_FILTER_GAIN * 8 / (1000 * nsecPerByte)
+ * offtime = offtime * 8 / (1000 * nsecPerByte)
+ *
+ * When USE_HRTIME is employed, then maxidle and offtime become:
+ * maxidle = maxilde * (8.0 / nsecPerByte);
+ * offtime = offtime * (8.0 / nsecPerByte);
+ */
+struct rm_class *
+rmc_newclass(int pri, struct rm_ifdat *ifd, u_int nsecPerByte,
+ void (*action)(rm_class_t *, rm_class_t *), int maxq,
+ struct rm_class *parent, struct rm_class *borrow, u_int maxidle,
+ int minidle, u_int offtime, int pktsize, int flags)
+{
+ struct rm_class *cl;
+ struct rm_class *peer;
+ int s;
+
+ if (pri >= RM_MAXPRIO)
+ return (NULL);
+#ifndef ALTQ_RED
+ if (flags & RMCF_RED) {
+#ifdef ALTQ_DEBUG
+ printf("rmc_newclass: RED not configured for CBQ!\n");
+#endif
+ return (NULL);
+ }
+#endif
+#ifndef ALTQ_RIO
+ if (flags & RMCF_RIO) {
+#ifdef ALTQ_DEBUG
+ printf("rmc_newclass: RIO not configured for CBQ!\n");
+#endif
+ return (NULL);
+ }
+#endif
+
+ cl = malloc(sizeof(struct rm_class), M_DEVBUF, M_NOWAIT | M_ZERO);
+ if (cl == NULL)
+ return (NULL);
+ CALLOUT_INIT(&cl->callout_);
+ cl->q_ = malloc(sizeof(class_queue_t), M_DEVBUF, M_NOWAIT | M_ZERO);
+ if (cl->q_ == NULL) {
+ free(cl, M_DEVBUF);
+ return (NULL);
+ }
+
+ /*
+ * Class initialization.
+ */
+ cl->children_ = NULL;
+ cl->parent_ = parent;
+ cl->borrow_ = borrow;
+ cl->leaf_ = 1;
+ cl->ifdat_ = ifd;
+ cl->pri_ = pri;
+ cl->allotment_ = RM_NS_PER_SEC / nsecPerByte; /* Bytes per sec */
+ cl->depth_ = 0;
+ cl->qthresh_ = 0;
+ cl->ns_per_byte_ = nsecPerByte;
+
+ qlimit(cl->q_) = maxq;
+ qtype(cl->q_) = Q_DROPHEAD;
+ qlen(cl->q_) = 0;
+ cl->flags_ = flags;
+
+#if 1 /* minidle is also scaled in ALTQ */
+ cl->minidle_ = (minidle * (int)nsecPerByte) / 8;
+ if (cl->minidle_ > 0)
+ cl->minidle_ = 0;
+#else
+ cl->minidle_ = minidle;
+#endif
+ cl->maxidle_ = (maxidle * nsecPerByte) / 8;
+ if (cl->maxidle_ == 0)
+ cl->maxidle_ = 1;
+#if 1 /* offtime is also scaled in ALTQ */
+ cl->avgidle_ = cl->maxidle_;
+ cl->offtime_ = ((offtime * nsecPerByte) / 8) >> RM_FILTER_GAIN;
+ if (cl->offtime_ == 0)
+ cl->offtime_ = 1;
+#else
+ cl->avgidle_ = 0;
+ cl->offtime_ = (offtime * nsecPerByte) / 8;
+#endif
+ cl->overlimit = action;
+
+#ifdef ALTQ_RED
+ if (flags & (RMCF_RED|RMCF_RIO)) {
+ int red_flags, red_pkttime;
+
+ red_flags = 0;
+ if (flags & RMCF_ECN)
+ red_flags |= REDF_ECN;
+ if (flags & RMCF_FLOWVALVE)
+ red_flags |= REDF_FLOWVALVE;
+#ifdef ALTQ_RIO
+ if (flags & RMCF_CLEARDSCP)
+ red_flags |= RIOF_CLEARDSCP;
+#endif
+ red_pkttime = nsecPerByte * pktsize / 1000;
+
+ if (flags & RMCF_RED) {
+ cl->red_ = red_alloc(0, 0,
+ qlimit(cl->q_) * 10/100,
+ qlimit(cl->q_) * 30/100,
+ red_flags, red_pkttime);
+ if (cl->red_ != NULL)
+ qtype(cl->q_) = Q_RED;
+ }
+#ifdef ALTQ_RIO
+ else {
+ cl->red_ = (red_t *)rio_alloc(0, NULL,
+ red_flags, red_pkttime);
+ if (cl->red_ != NULL)
+ qtype(cl->q_) = Q_RIO;
+ }
+#endif
+ }
+#endif /* ALTQ_RED */
+
+ /*
+ * put the class into the class tree
+ */
+#ifdef __NetBSD__
+ s = splnet();
+#else
+ s = splimp();
+#endif
+ IFQ_LOCK(ifd->ifq_);
+ if ((peer = ifd->active_[pri]) != NULL) {
+ /* find the last class at this pri */
+ cl->peer_ = peer;
+ while (peer->peer_ != ifd->active_[pri])
+ peer = peer->peer_;
+ peer->peer_ = cl;
+ } else {
+ ifd->active_[pri] = cl;
+ cl->peer_ = cl;
+ }
+
+ if (cl->parent_) {
+ cl->next_ = parent->children_;
+ parent->children_ = cl;
+ parent->leaf_ = 0;
+ }
+
+ /*
+ * Compute the depth of this class and its ancestors in the class
+ * hierarchy.
+ */
+ rmc_depth_compute(cl);
+
+ /*
+ * If CBQ's WRR is enabled, then initialize the class WRR state.
+ */
+ if (ifd->wrr_) {
+ ifd->num_[pri]++;
+ ifd->alloc_[pri] += cl->allotment_;
+ rmc_wrr_set_weights(ifd);
+ }
+ IFQ_UNLOCK(ifd->ifq_);
+ splx(s);
+ return (cl);
+}
+
+int
+rmc_modclass(struct rm_class *cl, u_int nsecPerByte, int maxq, u_int maxidle,
+ int minidle, u_int offtime, int pktsize)
+{
+ struct rm_ifdat *ifd;
+ u_int old_allotment;
+ int s;
+
+ ifd = cl->ifdat_;
+ old_allotment = cl->allotment_;
+
+#ifdef __NetBSD__
+ s = splnet();
+#else
+ s = splimp();
+#endif
+ IFQ_LOCK(ifd->ifq_);
+ cl->allotment_ = RM_NS_PER_SEC / nsecPerByte; /* Bytes per sec */
+ cl->qthresh_ = 0;
+ cl->ns_per_byte_ = nsecPerByte;
+
+ qlimit(cl->q_) = maxq;
+
+#if 1 /* minidle is also scaled in ALTQ */
+ cl->minidle_ = (minidle * nsecPerByte) / 8;
+ if (cl->minidle_ > 0)
+ cl->minidle_ = 0;
+#else
+ cl->minidle_ = minidle;
+#endif
+ cl->maxidle_ = (maxidle * nsecPerByte) / 8;
+ if (cl->maxidle_ == 0)
+ cl->maxidle_ = 1;
+#if 1 /* offtime is also scaled in ALTQ */
+ cl->avgidle_ = cl->maxidle_;
+ cl->offtime_ = ((offtime * nsecPerByte) / 8) >> RM_FILTER_GAIN;
+ if (cl->offtime_ == 0)
+ cl->offtime_ = 1;
+#else
+ cl->avgidle_ = 0;
+ cl->offtime_ = (offtime * nsecPerByte) / 8;
+#endif
+
+ /*
+ * If CBQ's WRR is enabled, then initialize the class WRR state.
+ */
+ if (ifd->wrr_) {
+ ifd->alloc_[cl->pri_] += cl->allotment_ - old_allotment;
+ rmc_wrr_set_weights(ifd);
+ }
+ IFQ_UNLOCK(ifd->ifq_);
+ splx(s);
+ return (0);
+}
+
+/*
+ * static void
+ * rmc_wrr_set_weights(struct rm_ifdat *ifdat) - This function computes
+ * the appropriate run robin weights for the CBQ weighted round robin
+ * algorithm.
+ *
+ * Returns: NONE
+ */
+
+static void
+rmc_wrr_set_weights(struct rm_ifdat *ifd)
+{
+ int i;
+ struct rm_class *cl, *clh;
+
+ for (i = 0; i < RM_MAXPRIO; i++) {
+ /*
+ * This is inverted from that of the simulator to
+ * maintain precision.
+ */
+ if (ifd->num_[i] == 0)
+ ifd->M_[i] = 0;
+ else
+ ifd->M_[i] = ifd->alloc_[i] /
+ (ifd->num_[i] * ifd->maxpkt_);
+ /*
+ * Compute the weighted allotment for each class.
+ * This takes the expensive div instruction out
+ * of the main loop for the wrr scheduling path.
+ * These only get recomputed when a class comes or
+ * goes.
+ */
+ if (ifd->active_[i] != NULL) {
+ clh = cl = ifd->active_[i];
+ do {
+ /* safe-guard for slow link or alloc_ == 0 */
+ if (ifd->M_[i] == 0)
+ cl->w_allotment_ = 0;
+ else
+ cl->w_allotment_ = cl->allotment_ /
+ ifd->M_[i];
+ cl = cl->peer_;
+ } while ((cl != NULL) && (cl != clh));
+ }
+ }
+}
+
+int
+rmc_get_weight(struct rm_ifdat *ifd, int pri)
+{
+ if ((pri >= 0) && (pri < RM_MAXPRIO))
+ return (ifd->M_[pri]);
+ else
+ return (0);
+}
+
+/*
+ * static void
+ * rmc_depth_compute(struct rm_class *cl) - This function computes the
+ * appropriate depth of class 'cl' and its ancestors.
+ *
+ * Returns: NONE
+ */
+
+static void
+rmc_depth_compute(struct rm_class *cl)
+{
+ rm_class_t *t = cl, *p;
+
+ /*
+ * Recompute the depth for the branch of the tree.
+ */
+ while (t != NULL) {
+ p = t->parent_;
+ if (p && (t->depth_ >= p->depth_)) {
+ p->depth_ = t->depth_ + 1;
+ t = p;
+ } else
+ t = NULL;
+ }
+}
+
+/*
+ * static void
+ * rmc_depth_recompute(struct rm_class *cl) - This function re-computes
+ * the depth of the tree after a class has been deleted.
+ *
+ * Returns: NONE
+ */
+
+static void
+rmc_depth_recompute(rm_class_t *cl)
+{
+#if 1 /* ALTQ */
+ rm_class_t *p, *t;
+
+ p = cl;
+ while (p != NULL) {
+ if ((t = p->children_) == NULL) {
+ p->depth_ = 0;
+ } else {
+ int cdepth = 0;
+
+ while (t != NULL) {
+ if (t->depth_ > cdepth)
+ cdepth = t->depth_;
+ t = t->next_;
+ }
+
+ if (p->depth_ == cdepth + 1)
+ /* no change to this parent */
+ return;
+
+ p->depth_ = cdepth + 1;
+ }
+
+ p = p->parent_;
+ }
+#else
+ rm_class_t *t;
+
+ if (cl->depth_ >= 1) {
+ if (cl->children_ == NULL) {
+ cl->depth_ = 0;
+ } else if ((t = cl->children_) != NULL) {
+ while (t != NULL) {
+ if (t->children_ != NULL)
+ rmc_depth_recompute(t);
+ t = t->next_;
+ }
+ } else
+ rmc_depth_compute(cl);
+ }
+#endif
+}
+
+/*
+ * void
+ * rmc_delete_class(struct rm_ifdat *ifdat, struct rm_class *cl) - This
+ * function deletes a class from the link-sharing structure and frees
+ * all resources associated with the class.
+ *
+ * Returns: NONE
+ */
+
+void
+rmc_delete_class(struct rm_ifdat *ifd, struct rm_class *cl)
+{
+ struct rm_class *p, *head, *previous;
+ int s;
+
+ ASSERT(cl->children_ == NULL);
+
+ if (cl->sleeping_)
+ CALLOUT_STOP(&cl->callout_);
+
+#ifdef __NetBSD__
+ s = splnet();
+#else
+ s = splimp();
+#endif
+ IFQ_LOCK(ifd->ifq_);
+ /*
+ * Free packets in the packet queue.
+ * XXX - this may not be a desired behavior. Packets should be
+ * re-queued.
+ */
+ rmc_dropall(cl);
+
+ /*
+ * If the class has a parent, then remove the class from the
+ * class from the parent's children chain.
+ */
+ if (cl->parent_ != NULL) {
+ head = cl->parent_->children_;
+ p = previous = head;
+ if (head->next_ == NULL) {
+ ASSERT(head == cl);
+ cl->parent_->children_ = NULL;
+ cl->parent_->leaf_ = 1;
+ } else while (p != NULL) {
+ if (p == cl) {
+ if (cl == head)
+ cl->parent_->children_ = cl->next_;
+ else
+ previous->next_ = cl->next_;
+ cl->next_ = NULL;
+ p = NULL;
+ } else {
+ previous = p;
+ p = p->next_;
+ }
+ }
+ }
+
+ /*
+ * Delete class from class priority peer list.
+ */
+ if ((p = ifd->active_[cl->pri_]) != NULL) {
+ /*
+ * If there is more than one member of this priority
+ * level, then look for class(cl) in the priority level.
+ */
+ if (p != p->peer_) {
+ while (p->peer_ != cl)
+ p = p->peer_;
+ p->peer_ = cl->peer_;
+
+ if (ifd->active_[cl->pri_] == cl)
+ ifd->active_[cl->pri_] = cl->peer_;
+ } else {
+ ASSERT(p == cl);
+ ifd->active_[cl->pri_] = NULL;
+ }
+ }
+
+ /*
+ * Recompute the WRR weights.
+ */
+ if (ifd->wrr_) {
+ ifd->alloc_[cl->pri_] -= cl->allotment_;
+ ifd->num_[cl->pri_]--;
+ rmc_wrr_set_weights(ifd);
+ }
+
+ /*
+ * Re-compute the depth of the tree.
+ */
+#if 1 /* ALTQ */
+ rmc_depth_recompute(cl->parent_);
+#else
+ rmc_depth_recompute(ifd->root_);
+#endif
+
+ IFQ_UNLOCK(ifd->ifq_);
+ splx(s);
+
+ /*
+ * Free the class structure.
+ */
+ if (cl->red_ != NULL) {
+#ifdef ALTQ_RIO
+ if (q_is_rio(cl->q_))
+ rio_destroy((rio_t *)cl->red_);
+#endif
+#ifdef ALTQ_RED
+ if (q_is_red(cl->q_))
+ red_destroy(cl->red_);
+#endif
+ }
+ free(cl->q_, M_DEVBUF);
+ free(cl, M_DEVBUF);
+}
+
+
+/*
+ * void
+ * rmc_init(...) - Initialize the resource management data structures
+ * associated with the output portion of interface 'ifp'. 'ifd' is
+ * where the structures will be built (for backwards compatibility, the
+ * structures aren't kept in the ifnet struct). 'nsecPerByte'
+ * gives the link speed (inverse of bandwidth) in nanoseconds/byte.
+ * 'restart' is the driver-specific routine that the generic 'delay
+ * until under limit' action will call to restart output. `maxq'
+ * is the queue size of the 'link' & 'default' classes. 'maxqueued'
+ * is the maximum number of packets that the resource management
+ * code will allow to be queued 'downstream' (this is typically 1).
+ *
+ * Returns: NONE
+ */
+
+void
+rmc_init(struct ifaltq *ifq, struct rm_ifdat *ifd, u_int nsecPerByte,
+ void (*restart)(struct ifaltq *), int maxq, int maxqueued, u_int maxidle,
+ int minidle, u_int offtime, int flags)
+{
+ int i, mtu;
+
+ /*
+ * Initialize the CBQ tracing/debug facility.
+ */
+ CBQTRACEINIT();
+
+ bzero((char *)ifd, sizeof (*ifd));
+ mtu = ifq->altq_ifp->if_mtu;
+ ifd->ifq_ = ifq;
+ ifd->restart = restart;
+ ifd->maxqueued_ = maxqueued;
+ ifd->ns_per_byte_ = nsecPerByte;
+ ifd->maxpkt_ = mtu;
+ ifd->wrr_ = (flags & RMCF_WRR) ? 1 : 0;
+ ifd->efficient_ = (flags & RMCF_EFFICIENT) ? 1 : 0;
+#if 1
+ ifd->maxiftime_ = mtu * nsecPerByte / 1000 * 16;
+ if (mtu * nsecPerByte > 10 * 1000000)
+ ifd->maxiftime_ /= 4;
+#endif
+
+ reset_cutoff(ifd);
+ CBQTRACE(rmc_init, 'INIT', ifd->cutoff_);
+
+ /*
+ * Initialize the CBQ's WRR state.
+ */
+ for (i = 0; i < RM_MAXPRIO; i++) {
+ ifd->alloc_[i] = 0;
+ ifd->M_[i] = 0;
+ ifd->num_[i] = 0;
+ ifd->na_[i] = 0;
+ ifd->active_[i] = NULL;
+ }
+
+ /*
+ * Initialize current packet state.
+ */
+ ifd->qi_ = 0;
+ ifd->qo_ = 0;
+ for (i = 0; i < RM_MAXQUEUED; i++) {
+ ifd->class_[i] = NULL;
+ ifd->curlen_[i] = 0;
+ ifd->borrowed_[i] = NULL;
+ }
+
+ /*
+ * Create the root class of the link-sharing structure.
+ */
+ if ((ifd->root_ = rmc_newclass(0, ifd,
+ nsecPerByte,
+ rmc_root_overlimit, maxq, 0, 0,
+ maxidle, minidle, offtime,
+ 0, 0)) == NULL) {
+ printf("rmc_init: root class not allocated\n");
+ return ;
+ }
+ ifd->root_->depth_ = 0;
+}
+
+/*
+ * void
+ * rmc_queue_packet(struct rm_class *cl, mbuf_t *m) - Add packet given by
+ * mbuf 'm' to queue for resource class 'cl'. This routine is called
+ * by a driver's if_output routine. This routine must be called with
+ * output packet completion interrupts locked out (to avoid racing with
+ * rmc_dequeue_next).
+ *
+ * Returns: 0 on successful queueing
+ * -1 when packet drop occurs
+ */
+int
+rmc_queue_packet(struct rm_class *cl, mbuf_t *m)
+{
+ struct timeval now;
+ struct rm_ifdat *ifd = cl->ifdat_;
+ int cpri = cl->pri_;
+ int is_empty = qempty(cl->q_);
+
+ RM_GETTIME(now);
+ if (ifd->cutoff_ > 0) {
+ if (TV_LT(&cl->undertime_, &now)) {
+ if (ifd->cutoff_ > cl->depth_)
+ ifd->cutoff_ = cl->depth_;
+ CBQTRACE(rmc_queue_packet, 'ffoc', cl->depth_);
+ }
+#if 1 /* ALTQ */
+ else {
+ /*
+ * the class is overlimit. if the class has
+ * underlimit ancestors, set cutoff to the lowest
+ * depth among them.
+ */
+ struct rm_class *borrow = cl->borrow_;
+
+ while (borrow != NULL &&
+ borrow->depth_ < ifd->cutoff_) {
+ if (TV_LT(&borrow->undertime_, &now)) {
+ ifd->cutoff_ = borrow->depth_;
+ CBQTRACE(rmc_queue_packet, 'ffob', ifd->cutoff_);
+ break;
+ }
+ borrow = borrow->borrow_;
+ }
+ }
+#else /* !ALTQ */
+ else if ((ifd->cutoff_ > 1) && cl->borrow_) {
+ if (TV_LT(&cl->borrow_->undertime_, &now)) {
+ ifd->cutoff_ = cl->borrow_->depth_;
+ CBQTRACE(rmc_queue_packet, 'ffob',
+ cl->borrow_->depth_);
+ }
+ }
+#endif /* !ALTQ */
+ }
+
+ if (_rmc_addq(cl, m) < 0)
+ /* failed */
+ return (-1);
+
+ if (is_empty) {
+ CBQTRACE(rmc_queue_packet, 'ytpe', cl->stats_.handle);
+ ifd->na_[cpri]++;
+ }
+
+ if (qlen(cl->q_) > qlimit(cl->q_)) {
+ /* note: qlimit can be set to 0 or 1 */
+ rmc_drop_action(cl);
+ return (-1);
+ }
+ return (0);
+}
+
+/*
+ * void
+ * rmc_tl_satisfied(struct rm_ifdat *ifd, struct timeval *now) - Check all
+ * classes to see if there are satified.
+ */
+
+static void
+rmc_tl_satisfied(struct rm_ifdat *ifd, struct timeval *now)
+{
+ int i;
+ rm_class_t *p, *bp;
+
+ for (i = RM_MAXPRIO - 1; i >= 0; i--) {
+ if ((bp = ifd->active_[i]) != NULL) {
+ p = bp;
+ do {
+ if (!rmc_satisfied(p, now)) {
+ ifd->cutoff_ = p->depth_;
+ return;
+ }
+ p = p->peer_;
+ } while (p != bp);
+ }
+ }
+
+ reset_cutoff(ifd);
+}
+
+/*
+ * rmc_satisfied - Return 1 of the class is satisfied. O, otherwise.
+ */
+
+static int
+rmc_satisfied(struct rm_class *cl, struct timeval *now)
+{
+ rm_class_t *p;
+
+ if (cl == NULL)
+ return (1);
+ if (TV_LT(now, &cl->undertime_))
+ return (1);
+ if (cl->depth_ == 0) {
+ if (!cl->sleeping_ && (qlen(cl->q_) > cl->qthresh_))
+ return (0);
+ else
+ return (1);
+ }
+ if (cl->children_ != NULL) {
+ p = cl->children_;
+ while (p != NULL) {
+ if (!rmc_satisfied(p, now))
+ return (0);
+ p = p->next_;
+ }
+ }
+
+ return (1);
+}
+
+/*
+ * Return 1 if class 'cl' is under limit or can borrow from a parent,
+ * 0 if overlimit. As a side-effect, this routine will invoke the
+ * class overlimit action if the class if overlimit.
+ */
+
+static int
+rmc_under_limit(struct rm_class *cl, struct timeval *now)
+{
+ rm_class_t *p = cl;
+ rm_class_t *top;
+ struct rm_ifdat *ifd = cl->ifdat_;
+
+ ifd->borrowed_[ifd->qi_] = NULL;
+ /*
+ * If cl is the root class, then always return that it is
+ * underlimit. Otherwise, check to see if the class is underlimit.
+ */
+ if (cl->parent_ == NULL)
+ return (1);
+
+ if (cl->sleeping_) {
+ if (TV_LT(now, &cl->undertime_))
+ return (0);
+
+ CALLOUT_STOP(&cl->callout_);
+ cl->sleeping_ = 0;
+ cl->undertime_.tv_sec = 0;
+ return (1);
+ }
+
+ top = NULL;
+ while (cl->undertime_.tv_sec && TV_LT(now, &cl->undertime_)) {
+ if (((cl = cl->borrow_) == NULL) ||
+ (cl->depth_ > ifd->cutoff_)) {
+#ifdef ADJUST_CUTOFF
+ if (cl != NULL)
+ /* cutoff is taking effect, just
+ return false without calling
+ the delay action. */
+ return (0);
+#endif
+#ifdef BORROW_OFFTIME
+ /*
+ * check if the class can borrow offtime too.
+ * borrow offtime from the top of the borrow
+ * chain if the top class is not overloaded.
+ */
+ if (cl != NULL) {
+ /* cutoff is taking effect, use this class as top. */
+ top = cl;
+ CBQTRACE(rmc_under_limit, 'ffou', ifd->cutoff_);
+ }
+ if (top != NULL && top->avgidle_ == top->minidle_)
+ top = NULL;
+ p->overtime_ = *now;
+ (p->overlimit)(p, top);
+#else
+ p->overtime_ = *now;
+ (p->overlimit)(p, NULL);
+#endif
+ return (0);
+ }
+ top = cl;
+ }
+
+ if (cl != p)
+ ifd->borrowed_[ifd->qi_] = cl;
+ return (1);
+}
+
+/*
+ * _rmc_wrr_dequeue_next() - This is scheduler for WRR as opposed to
+ * Packet-by-packet round robin.
+ *
+ * The heart of the weighted round-robin scheduler, which decides which
+ * class next gets to send a packet. Highest priority first, then
+ * weighted round-robin within priorites.
+ *
+ * Each able-to-send class gets to send until its byte allocation is
+ * exhausted. Thus, the active pointer is only changed after a class has
+ * exhausted its allocation.
+ *
+ * If the scheduler finds no class that is underlimit or able to borrow,
+ * then the first class found that had a nonzero queue and is allowed to
+ * borrow gets to send.
+ */
+
+static mbuf_t *
+_rmc_wrr_dequeue_next(struct rm_ifdat *ifd, int op)
+{
+ struct rm_class *cl = NULL, *first = NULL;
+ u_int deficit;
+ int cpri;
+ mbuf_t *m;
+ struct timeval now;
+
+ RM_GETTIME(now);
+
+ /*
+ * if the driver polls the top of the queue and then removes
+ * the polled packet, we must return the same packet.
+ */
+ if (op == ALTDQ_REMOVE && ifd->pollcache_) {
+ cl = ifd->pollcache_;
+ cpri = cl->pri_;
+ if (ifd->efficient_) {
+ /* check if this class is overlimit */
+ if (cl->undertime_.tv_sec != 0 &&
+ rmc_under_limit(cl, &now) == 0)
+ first = cl;
+ }
+ ifd->pollcache_ = NULL;
+ goto _wrr_out;
+ }
+ else {
+ /* mode == ALTDQ_POLL || pollcache == NULL */
+ ifd->pollcache_ = NULL;
+ ifd->borrowed_[ifd->qi_] = NULL;
+ }
+#ifdef ADJUST_CUTOFF
+ _again:
+#endif
+ for (cpri = RM_MAXPRIO - 1; cpri >= 0; cpri--) {
+ if (ifd->na_[cpri] == 0)
+ continue;
+ deficit = 0;
+ /*
+ * Loop through twice for a priority level, if some class
+ * was unable to send a packet the first round because
+ * of the weighted round-robin mechanism.
+ * During the second loop at this level, deficit==2.
+ * (This second loop is not needed if for every class,
+ * "M[cl->pri_])" times "cl->allotment" is greater than
+ * the byte size for the largest packet in the class.)
+ */
+ _wrr_loop:
+ cl = ifd->active_[cpri];
+ ASSERT(cl != NULL);
+ do {
+ if ((deficit < 2) && (cl->bytes_alloc_ <= 0))
+ cl->bytes_alloc_ += cl->w_allotment_;
+ if (!qempty(cl->q_)) {
+ if ((cl->undertime_.tv_sec == 0) ||
+ rmc_under_limit(cl, &now)) {
+ if (cl->bytes_alloc_ > 0 || deficit > 1)
+ goto _wrr_out;
+
+ /* underlimit but no alloc */
+ deficit = 1;
+#if 1
+ ifd->borrowed_[ifd->qi_] = NULL;
+#endif
+ }
+ else if (first == NULL && cl->borrow_ != NULL)
+ first = cl; /* borrowing candidate */
+ }
+
+ cl->bytes_alloc_ = 0;
+ cl = cl->peer_;
+ } while (cl != ifd->active_[cpri]);
+
+ if (deficit == 1) {
+ /* first loop found an underlimit class with deficit */
+ /* Loop on same priority level, with new deficit. */
+ deficit = 2;
+ goto _wrr_loop;
+ }
+ }
+
+#ifdef ADJUST_CUTOFF
+ /*
+ * no underlimit class found. if cutoff is taking effect,
+ * increase cutoff and try again.
+ */
+ if (first != NULL && ifd->cutoff_ < ifd->root_->depth_) {
+ ifd->cutoff_++;
+ CBQTRACE(_rmc_wrr_dequeue_next, 'ojda', ifd->cutoff_);
+ goto _again;
+ }
+#endif /* ADJUST_CUTOFF */
+ /*
+ * If LINK_EFFICIENCY is turned on, then the first overlimit
+ * class we encounter will send a packet if all the classes
+ * of the link-sharing structure are overlimit.
+ */
+ reset_cutoff(ifd);
+ CBQTRACE(_rmc_wrr_dequeue_next, 'otsr', ifd->cutoff_);
+
+ if (!ifd->efficient_ || first == NULL)
+ return (NULL);
+
+ cl = first;
+ cpri = cl->pri_;
+#if 0 /* too time-consuming for nothing */
+ if (cl->sleeping_)
+ CALLOUT_STOP(&cl->callout_);
+ cl->sleeping_ = 0;
+ cl->undertime_.tv_sec = 0;
+#endif
+ ifd->borrowed_[ifd->qi_] = cl->borrow_;
+ ifd->cutoff_ = cl->borrow_->depth_;
+
+ /*
+ * Deque the packet and do the book keeping...
+ */
+ _wrr_out:
+ if (op == ALTDQ_REMOVE) {
+ m = _rmc_getq(cl);
+ if (m == NULL)
+ panic("_rmc_wrr_dequeue_next");
+ if (qempty(cl->q_))
+ ifd->na_[cpri]--;
+
+ /*
+ * Update class statistics and link data.
+ */
+ if (cl->bytes_alloc_ > 0)
+ cl->bytes_alloc_ -= m_pktlen(m);
+
+ if ((cl->bytes_alloc_ <= 0) || first == cl)
+ ifd->active_[cl->pri_] = cl->peer_;
+ else
+ ifd->active_[cl->pri_] = cl;
+
+ ifd->class_[ifd->qi_] = cl;
+ ifd->curlen_[ifd->qi_] = m_pktlen(m);
+ ifd->now_[ifd->qi_] = now;
+ ifd->qi_ = (ifd->qi_ + 1) % ifd->maxqueued_;
+ ifd->queued_++;
+ } else {
+ /* mode == ALTDQ_PPOLL */
+ m = _rmc_pollq(cl);
+ ifd->pollcache_ = cl;
+ }
+ return (m);
+}
+
+/*
+ * Dequeue & return next packet from the highest priority class that
+ * has a packet to send & has enough allocation to send it. This
+ * routine is called by a driver whenever it needs a new packet to
+ * output.
+ */
+static mbuf_t *
+_rmc_prr_dequeue_next(struct rm_ifdat *ifd, int op)
+{
+ mbuf_t *m;
+ int cpri;
+ struct rm_class *cl, *first = NULL;
+ struct timeval now;
+
+ RM_GETTIME(now);
+
+ /*
+ * if the driver polls the top of the queue and then removes
+ * the polled packet, we must return the same packet.
+ */
+ if (op == ALTDQ_REMOVE && ifd->pollcache_) {
+ cl = ifd->pollcache_;
+ cpri = cl->pri_;
+ ifd->pollcache_ = NULL;
+ goto _prr_out;
+ } else {
+ /* mode == ALTDQ_POLL || pollcache == NULL */
+ ifd->pollcache_ = NULL;
+ ifd->borrowed_[ifd->qi_] = NULL;
+ }
+#ifdef ADJUST_CUTOFF
+ _again:
+#endif
+ for (cpri = RM_MAXPRIO - 1; cpri >= 0; cpri--) {
+ if (ifd->na_[cpri] == 0)
+ continue;
+ cl = ifd->active_[cpri];
+ ASSERT(cl != NULL);
+ do {
+ if (!qempty(cl->q_)) {
+ if ((cl->undertime_.tv_sec == 0) ||
+ rmc_under_limit(cl, &now))
+ goto _prr_out;
+ if (first == NULL && cl->borrow_ != NULL)
+ first = cl;
+ }
+ cl = cl->peer_;
+ } while (cl != ifd->active_[cpri]);
+ }
+
+#ifdef ADJUST_CUTOFF
+ /*
+ * no underlimit class found. if cutoff is taking effect, increase
+ * cutoff and try again.
+ */
+ if (first != NULL && ifd->cutoff_ < ifd->root_->depth_) {
+ ifd->cutoff_++;
+ goto _again;
+ }
+#endif /* ADJUST_CUTOFF */
+ /*
+ * If LINK_EFFICIENCY is turned on, then the first overlimit
+ * class we encounter will send a packet if all the classes
+ * of the link-sharing structure are overlimit.
+ */
+ reset_cutoff(ifd);
+ if (!ifd->efficient_ || first == NULL)
+ return (NULL);
+
+ cl = first;
+ cpri = cl->pri_;
+#if 0 /* too time-consuming for nothing */
+ if (cl->sleeping_)
+ CALLOUT_STOP(&cl->callout_);
+ cl->sleeping_ = 0;
+ cl->undertime_.tv_sec = 0;
+#endif
+ ifd->borrowed_[ifd->qi_] = cl->borrow_;
+ ifd->cutoff_ = cl->borrow_->depth_;
+
+ /*
+ * Deque the packet and do the book keeping...
+ */
+ _prr_out:
+ if (op == ALTDQ_REMOVE) {
+ m = _rmc_getq(cl);
+ if (m == NULL)
+ panic("_rmc_prr_dequeue_next");
+ if (qempty(cl->q_))
+ ifd->na_[cpri]--;
+
+ ifd->active_[cpri] = cl->peer_;
+
+ ifd->class_[ifd->qi_] = cl;
+ ifd->curlen_[ifd->qi_] = m_pktlen(m);
+ ifd->now_[ifd->qi_] = now;
+ ifd->qi_ = (ifd->qi_ + 1) % ifd->maxqueued_;
+ ifd->queued_++;
+ } else {
+ /* mode == ALTDQ_POLL */
+ m = _rmc_pollq(cl);
+ ifd->pollcache_ = cl;
+ }
+ return (m);
+}
+
+/*
+ * mbuf_t *
+ * rmc_dequeue_next(struct rm_ifdat *ifd, struct timeval *now) - this function
+ * is invoked by the packet driver to get the next packet to be
+ * dequeued and output on the link. If WRR is enabled, then the
+ * WRR dequeue next routine will determine the next packet to sent.
+ * Otherwise, packet-by-packet round robin is invoked.
+ *
+ * Returns: NULL, if a packet is not available or if all
+ * classes are overlimit.
+ *
+ * Otherwise, Pointer to the next packet.
+ */
+
+mbuf_t *
+rmc_dequeue_next(struct rm_ifdat *ifd, int mode)
+{
+ if (ifd->queued_ >= ifd->maxqueued_)
+ return (NULL);
+ else if (ifd->wrr_)
+ return (_rmc_wrr_dequeue_next(ifd, mode));
+ else
+ return (_rmc_prr_dequeue_next(ifd, mode));
+}
+
+/*
+ * Update the utilization estimate for the packet that just completed.
+ * The packet's class & the parent(s) of that class all get their
+ * estimators updated. This routine is called by the driver's output-
+ * packet-completion interrupt service routine.
+ */
+
+/*
+ * a macro to approximate "divide by 1000" that gives 0.000999,
+ * if a value has enough effective digits.
+ * (on pentium, mul takes 9 cycles but div takes 46!)
+ */
+#define NSEC_TO_USEC(t) (((t) >> 10) + ((t) >> 16) + ((t) >> 17))
+void
+rmc_update_class_util(struct rm_ifdat *ifd)
+{
+ int idle, avgidle, pktlen;
+ int pkt_time, tidle;
+ rm_class_t *cl, *borrowed;
+ rm_class_t *borrows;
+ struct timeval *nowp;
+
+ /*
+ * Get the most recent completed class.
+ */
+ if ((cl = ifd->class_[ifd->qo_]) == NULL)
+ return;
+
+ pktlen = ifd->curlen_[ifd->qo_];
+ borrowed = ifd->borrowed_[ifd->qo_];
+ borrows = borrowed;
+
+ PKTCNTR_ADD(&cl->stats_.xmit_cnt, pktlen);
+
+ /*
+ * Run estimator on class and its ancestors.
+ */
+ /*
+ * rm_update_class_util is designed to be called when the
+ * transfer is completed from a xmit complete interrupt,
+ * but most drivers don't implement an upcall for that.
+ * so, just use estimated completion time.
+ * as a result, ifd->qi_ and ifd->qo_ are always synced.
+ */
+ nowp = &ifd->now_[ifd->qo_];
+ /* get pkt_time (for link) in usec */
+#if 1 /* use approximation */
+ pkt_time = ifd->curlen_[ifd->qo_] * ifd->ns_per_byte_;
+ pkt_time = NSEC_TO_USEC(pkt_time);
+#else
+ pkt_time = ifd->curlen_[ifd->qo_] * ifd->ns_per_byte_ / 1000;
+#endif
+#if 1 /* ALTQ4PPP */
+ if (TV_LT(nowp, &ifd->ifnow_)) {
+ int iftime;
+
+ /*
+ * make sure the estimated completion time does not go
+ * too far. it can happen when the link layer supports
+ * data compression or the interface speed is set to
+ * a much lower value.
+ */
+ TV_DELTA(&ifd->ifnow_, nowp, iftime);
+ if (iftime+pkt_time < ifd->maxiftime_) {
+ TV_ADD_DELTA(&ifd->ifnow_, pkt_time, &ifd->ifnow_);
+ } else {
+ TV_ADD_DELTA(nowp, ifd->maxiftime_, &ifd->ifnow_);
+ }
+ } else {
+ TV_ADD_DELTA(nowp, pkt_time, &ifd->ifnow_);
+ }
+#else
+ if (TV_LT(nowp, &ifd->ifnow_)) {
+ TV_ADD_DELTA(&ifd->ifnow_, pkt_time, &ifd->ifnow_);
+ } else {
+ TV_ADD_DELTA(nowp, pkt_time, &ifd->ifnow_);
+ }
+#endif
+
+ while (cl != NULL) {
+ TV_DELTA(&ifd->ifnow_, &cl->last_, idle);
+ if (idle >= 2000000)
+ /*
+ * this class is idle enough, reset avgidle.
+ * (TV_DELTA returns 2000000 us when delta is large.)
+ */
+ cl->avgidle_ = cl->maxidle_;
+
+ /* get pkt_time (for class) in usec */
+#if 1 /* use approximation */
+ pkt_time = pktlen * cl->ns_per_byte_;
+ pkt_time = NSEC_TO_USEC(pkt_time);
+#else
+ pkt_time = pktlen * cl->ns_per_byte_ / 1000;
+#endif
+ idle -= pkt_time;
+
+ avgidle = cl->avgidle_;
+ avgidle += idle - (avgidle >> RM_FILTER_GAIN);
+ cl->avgidle_ = avgidle;
+
+ /* Are we overlimit ? */
+ if (avgidle <= 0) {
+ CBQTRACE(rmc_update_class_util, 'milo', cl->stats_.handle);
+#if 1 /* ALTQ */
+ /*
+ * need some lower bound for avgidle, otherwise
+ * a borrowing class gets unbounded penalty.
+ */
+ if (avgidle < cl->minidle_)
+ avgidle = cl->avgidle_ = cl->minidle_;
+#endif
+ /* set next idle to make avgidle 0 */
+ tidle = pkt_time +
+ (((1 - RM_POWER) * avgidle) >> RM_FILTER_GAIN);
+ TV_ADD_DELTA(nowp, tidle, &cl->undertime_);
+ ++cl->stats_.over;
+ } else {
+ cl->avgidle_ =
+ (avgidle > cl->maxidle_) ? cl->maxidle_ : avgidle;
+ cl->undertime_.tv_sec = 0;
+ if (cl->sleeping_) {
+ CALLOUT_STOP(&cl->callout_);
+ cl->sleeping_ = 0;
+ }
+ }
+
+ if (borrows != NULL) {
+ if (borrows != cl)
+ ++cl->stats_.borrows;
+ else
+ borrows = NULL;
+ }
+ cl->last_ = ifd->ifnow_;
+ cl->last_pkttime_ = pkt_time;
+
+#if 1
+ if (cl->parent_ == NULL) {
+ /* take stats of root class */
+ PKTCNTR_ADD(&cl->stats_.xmit_cnt, pktlen);
+ }
+#endif
+
+ cl = cl->parent_;
+ }
+
+ /*
+ * Check to see if cutoff needs to set to a new level.
+ */
+ cl = ifd->class_[ifd->qo_];
+ if (borrowed && (ifd->cutoff_ >= borrowed->depth_)) {
+#if 1 /* ALTQ */
+ if ((qlen(cl->q_) <= 0) || TV_LT(nowp, &borrowed->undertime_)) {
+ rmc_tl_satisfied(ifd, nowp);
+ CBQTRACE(rmc_update_class_util, 'broe', ifd->cutoff_);
+ } else {
+ ifd->cutoff_ = borrowed->depth_;
+ CBQTRACE(rmc_update_class_util, 'ffob', borrowed->depth_);
+ }
+#else /* !ALTQ */
+ if ((qlen(cl->q_) <= 1) || TV_LT(&now, &borrowed->undertime_)) {
+ reset_cutoff(ifd);
+#ifdef notdef
+ rmc_tl_satisfied(ifd, &now);
+#endif
+ CBQTRACE(rmc_update_class_util, 'broe', ifd->cutoff_);
+ } else {
+ ifd->cutoff_ = borrowed->depth_;
+ CBQTRACE(rmc_update_class_util, 'ffob', borrowed->depth_);
+ }
+#endif /* !ALTQ */
+ }
+
+ /*
+ * Release class slot
+ */
+ ifd->borrowed_[ifd->qo_] = NULL;
+ ifd->class_[ifd->qo_] = NULL;
+ ifd->qo_ = (ifd->qo_ + 1) % ifd->maxqueued_;
+ ifd->queued_--;
+}
+
+/*
+ * void
+ * rmc_drop_action(struct rm_class *cl) - Generic (not protocol-specific)
+ * over-limit action routines. These get invoked by rmc_under_limit()
+ * if a class with packets to send if over its bandwidth limit & can't
+ * borrow from a parent class.
+ *
+ * Returns: NONE
+ */
+
+static void
+rmc_drop_action(struct rm_class *cl)
+{
+ struct rm_ifdat *ifd = cl->ifdat_;
+
+ ASSERT(qlen(cl->q_) > 0);
+ _rmc_dropq(cl);
+ if (qempty(cl->q_))
+ ifd->na_[cl->pri_]--;
+}
+
+void rmc_dropall(struct rm_class *cl)
+{
+ struct rm_ifdat *ifd = cl->ifdat_;
+
+ if (!qempty(cl->q_)) {
+ _flushq(cl->q_);
+
+ ifd->na_[cl->pri_]--;
+ }
+}
+
+#if (__FreeBSD_version > 300000)
+/* hzto() is removed from FreeBSD-3.0 */
+static int hzto(struct timeval *);
+
+static int
+hzto(tv)
+ struct timeval *tv;
+{
+ struct timeval t2;
+
+ getmicrotime(&t2);
+ t2.tv_sec = tv->tv_sec - t2.tv_sec;
+ t2.tv_usec = tv->tv_usec - t2.tv_usec;
+ return (tvtohz(&t2));
+}
+#endif /* __FreeBSD_version > 300000 */
+
+/*
+ * void
+ * rmc_delay_action(struct rm_class *cl) - This function is the generic CBQ
+ * delay action routine. It is invoked via rmc_under_limit when the
+ * packet is discoverd to be overlimit.
+ *
+ * If the delay action is result of borrow class being overlimit, then
+ * delay for the offtime of the borrowing class that is overlimit.
+ *
+ * Returns: NONE
+ */
+
+void
+rmc_delay_action(struct rm_class *cl, struct rm_class *borrow)
+{
+ int delay, t, extradelay;
+
+ cl->stats_.overactions++;
+ TV_DELTA(&cl->undertime_, &cl->overtime_, delay);
+#ifndef BORROW_OFFTIME
+ delay += cl->offtime_;
+#endif
+
+ if (!cl->sleeping_) {
+ CBQTRACE(rmc_delay_action, 'yled', cl->stats_.handle);
+#ifdef BORROW_OFFTIME
+ if (borrow != NULL)
+ extradelay = borrow->offtime_;
+ else
+#endif
+ extradelay = cl->offtime_;
+
+#ifdef ALTQ
+ /*
+ * XXX recalculate suspend time:
+ * current undertime is (tidle + pkt_time) calculated
+ * from the last transmission.
+ * tidle: time required to bring avgidle back to 0
+ * pkt_time: target waiting time for this class
+ * we need to replace pkt_time by offtime
+ */
+ extradelay -= cl->last_pkttime_;
+#endif
+ if (extradelay > 0) {
+ TV_ADD_DELTA(&cl->undertime_, extradelay, &cl->undertime_);
+ delay += extradelay;
+ }
+
+ cl->sleeping_ = 1;
+ cl->stats_.delays++;
+
+ /*
+ * Since packets are phased randomly with respect to the
+ * clock, 1 tick (the next clock tick) can be an arbitrarily
+ * short time so we have to wait for at least two ticks.
+ * NOTE: If there's no other traffic, we need the timer as
+ * a 'backstop' to restart this class.
+ */
+ if (delay > tick * 2) {
+#ifdef __FreeBSD__
+ /* FreeBSD rounds up the tick */
+ t = hzto(&cl->undertime_);
+#else
+ /* other BSDs round down the tick */
+ t = hzto(&cl->undertime_) + 1;
+#endif
+ } else
+ t = 2;
+ CALLOUT_RESET(&cl->callout_, t,
+ (timeout_t *)rmc_restart, (caddr_t)cl);
+ }
+}
+
+/*
+ * void
+ * rmc_restart() - is just a helper routine for rmc_delay_action -- it is
+ * called by the system timer code & is responsible checking if the
+ * class is still sleeping (it might have been restarted as a side
+ * effect of the queue scan on a packet arrival) and, if so, restarting
+ * output for the class. Inspecting the class state & restarting output
+ * require locking the class structure. In general the driver is
+ * responsible for locking but this is the only routine that is not
+ * called directly or indirectly from the interface driver so it has
+ * know about system locking conventions. Under bsd, locking is done
+ * by raising IPL to splimp so that's what's implemented here. On a
+ * different system this would probably need to be changed.
+ *
+ * Returns: NONE
+ */
+
+static void
+rmc_restart(struct rm_class *cl)
+{
+ struct rm_ifdat *ifd = cl->ifdat_;
+ int s;
+
+#ifdef __NetBSD__
+ s = splnet();
+#else
+ s = splimp();
+#endif
+ IFQ_LOCK(ifd->ifq_);
+ if (cl->sleeping_) {
+ cl->sleeping_ = 0;
+ cl->undertime_.tv_sec = 0;
+
+ if (ifd->queued_ < ifd->maxqueued_ && ifd->restart != NULL) {
+ CBQTRACE(rmc_restart, 'trts', cl->stats_.handle);
+ (ifd->restart)(ifd->ifq_);
+ }
+ }
+ IFQ_UNLOCK(ifd->ifq_);
+ splx(s);
+}
+
+/*
+ * void
+ * rmc_root_overlimit(struct rm_class *cl) - This the generic overlimit
+ * handling routine for the root class of the link sharing structure.
+ *
+ * Returns: NONE
+ */
+
+static void
+rmc_root_overlimit(struct rm_class *cl, struct rm_class *borrow)
+{
+ panic("rmc_root_overlimit");
+}
+
+/*
+ * Packet Queue handling routines. Eventually, this is to localize the
+ * effects on the code whether queues are red queues or droptail
+ * queues.
+ */
+
+static int
+_rmc_addq(rm_class_t *cl, mbuf_t *m)
+{
+#ifdef ALTQ_RIO
+ if (q_is_rio(cl->q_))
+ return rio_addq((rio_t *)cl->red_, cl->q_, m, cl->pktattr_);
+#endif
+#ifdef ALTQ_RED
+ if (q_is_red(cl->q_))
+ return red_addq(cl->red_, cl->q_, m, cl->pktattr_);
+#endif /* ALTQ_RED */
+
+ if (cl->flags_ & RMCF_CLEARDSCP)
+ write_dsfield(m, cl->pktattr_, 0);
+
+ _addq(cl->q_, m);
+ return (0);
+}
+
+/* note: _rmc_dropq is not called for red */
+static void
+_rmc_dropq(rm_class_t *cl)
+{
+ mbuf_t *m;
+
+ if ((m = _getq(cl->q_)) != NULL)
+ m_freem(m);
+}
+
+static mbuf_t *
+_rmc_getq(rm_class_t *cl)
+{
+#ifdef ALTQ_RIO
+ if (q_is_rio(cl->q_))
+ return rio_getq((rio_t *)cl->red_, cl->q_);
+#endif
+#ifdef ALTQ_RED
+ if (q_is_red(cl->q_))
+ return red_getq(cl->red_, cl->q_);
+#endif
+ return _getq(cl->q_);
+}
+
+static mbuf_t *
+_rmc_pollq(rm_class_t *cl)
+{
+ return qhead(cl->q_);
+}
+
+#ifdef CBQ_TRACE
+
+struct cbqtrace cbqtrace_buffer[NCBQTRACE+1];
+struct cbqtrace *cbqtrace_ptr = NULL;
+int cbqtrace_count;
+
+/*
+ * DDB hook to trace cbq events:
+ * the last 1024 events are held in a circular buffer.
+ * use "call cbqtrace_dump(N)" to display 20 events from Nth event.
+ */
+void cbqtrace_dump(int);
+static char *rmc_funcname(void *);
+
+static struct rmc_funcs {
+ void *func;
+ char *name;
+} rmc_funcs[] =
+{
+ rmc_init, "rmc_init",
+ rmc_queue_packet, "rmc_queue_packet",
+ rmc_under_limit, "rmc_under_limit",
+ rmc_update_class_util, "rmc_update_class_util",
+ rmc_delay_action, "rmc_delay_action",
+ rmc_restart, "rmc_restart",
+ _rmc_wrr_dequeue_next, "_rmc_wrr_dequeue_next",
+ NULL, NULL
+};
+
+static char *rmc_funcname(void *func)
+{
+ struct rmc_funcs *fp;
+
+ for (fp = rmc_funcs; fp->func != NULL; fp++)
+ if (fp->func == func)
+ return (fp->name);
+ return ("unknown");
+}
+
+void cbqtrace_dump(int counter)
+{
+ int i, *p;
+ char *cp;
+
+ counter = counter % NCBQTRACE;
+ p = (int *)&cbqtrace_buffer[counter];
+
+ for (i=0; i<20; i++) {
+ printf("[0x%x] ", *p++);
+ printf("%s: ", rmc_funcname((void *)*p++));
+ cp = (char *)p++;
+ printf("%c%c%c%c: ", cp[0], cp[1], cp[2], cp[3]);
+ printf("%d\n",*p++);
+
+ if (p >= (int *)&cbqtrace_buffer[NCBQTRACE])
+ p = (int *)cbqtrace_buffer;
+ }
+}
+#endif /* CBQ_TRACE */
+#endif /* ALTQ_CBQ */
+
+#if defined(ALTQ_CBQ) || defined(ALTQ_RED) || defined(ALTQ_RIO) || defined(ALTQ_HFSC) || defined(ALTQ_PRIQ)
+#if !defined(__GNUC__) || defined(ALTQ_DEBUG)
+
+void
+_addq(class_queue_t *q, mbuf_t *m)
+{
+ mbuf_t *m0;
+
+ if ((m0 = qtail(q)) != NULL)
+ m->m_nextpkt = m0->m_nextpkt;
+ else
+ m0 = m;
+ m0->m_nextpkt = m;
+ qtail(q) = m;
+ qlen(q)++;
+}
+
+mbuf_t *
+_getq(class_queue_t *q)
+{
+ mbuf_t *m, *m0;
+
+ if ((m = qtail(q)) == NULL)
+ return (NULL);
+ if ((m0 = m->m_nextpkt) != m)
+ m->m_nextpkt = m0->m_nextpkt;
+ else {
+ ASSERT(qlen(q) == 1);
+ qtail(q) = NULL;
+ }
+ qlen(q)--;
+ m0->m_nextpkt = NULL;
+ return (m0);
+}
+
+/* drop a packet at the tail of the queue */
+mbuf_t *
+_getq_tail(class_queue_t *q)
+{
+ mbuf_t *m, *m0, *prev;
+
+ if ((m = m0 = qtail(q)) == NULL)
+ return NULL;
+ do {
+ prev = m0;
+ m0 = m0->m_nextpkt;
+ } while (m0 != m);
+ prev->m_nextpkt = m->m_nextpkt;
+ if (prev == m) {
+ ASSERT(qlen(q) == 1);
+ qtail(q) = NULL;
+ } else
+ qtail(q) = prev;
+ qlen(q)--;
+ m->m_nextpkt = NULL;
+ return (m);
+}
+
+/* randomly select a packet in the queue */
+mbuf_t *
+_getq_random(class_queue_t *q)
+{
+ struct mbuf *m;
+ int i, n;
+
+ if ((m = qtail(q)) == NULL)
+ return NULL;
+ if (m->m_nextpkt == m) {
+ ASSERT(qlen(q) == 1);
+ qtail(q) = NULL;
+ } else {
+ struct mbuf *prev = NULL;
+
+ n = arc4random() % qlen(q) + 1;
+ for (i = 0; i < n; i++) {
+ prev = m;
+ m = m->m_nextpkt;
+ }
+ prev->m_nextpkt = m->m_nextpkt;
+ if (m == qtail(q))
+ qtail(q) = prev;
+ }
+ qlen(q)--;
+ m->m_nextpkt = NULL;
+ return (m);
+}
+
+void
+_removeq(class_queue_t *q, mbuf_t *m)
+{
+ mbuf_t *m0, *prev;
+
+ m0 = qtail(q);
+ do {
+ prev = m0;
+ m0 = m0->m_nextpkt;
+ } while (m0 != m);
+ prev->m_nextpkt = m->m_nextpkt;
+ if (prev == m)
+ qtail(q) = NULL;
+ else if (qtail(q) == m)
+ qtail(q) = prev;
+ qlen(q)--;
+}
+
+void
+_flushq(class_queue_t *q)
+{
+ mbuf_t *m;
+
+ while ((m = _getq(q)) != NULL)
+ m_freem(m);
+ ASSERT(qlen(q) == 0);
+}
+
+#endif /* !__GNUC__ || ALTQ_DEBUG */
+#endif /* ALTQ_CBQ || ALTQ_RED || ALTQ_RIO || ALTQ_HFSC || ALTQ_PRIQ */
diff --git a/sys/net/altq/altq_rmclass.h b/sys/net/altq/altq_rmclass.h
new file mode 100644
index 0000000..e2cae89
--- /dev/null
+++ b/sys/net/altq/altq_rmclass.h
@@ -0,0 +1,267 @@
+/*-
+ * Copyright (c) 1991-1997 Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the Network Research
+ * Group at Lawrence Berkeley Laboratory.
+ * 4. Neither the name of the University nor of the Laboratory may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $KAME: altq_rmclass.h,v 1.10 2003/08/20 23:30:23 itojun Exp $
+ * $FreeBSD$
+ */
+
+#ifndef _ALTQ_ALTQ_RMCLASS_H_
+#define _ALTQ_ALTQ_RMCLASS_H_
+
+#include <net/altq/altq_classq.h>
+
+/* #pragma ident "@(#)rm_class.h 1.20 97/10/23 SMI" */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define RM_MAXPRIO 8 /* Max priority */
+
+#ifdef _KERNEL
+
+typedef struct mbuf mbuf_t;
+typedef struct rm_ifdat rm_ifdat_t;
+typedef struct rm_class rm_class_t;
+
+struct red;
+
+/*
+ * Macros for dealing with time values. We assume all times are
+ * 'timevals'. `microtime' is used to get the best available clock
+ * resolution. If `microtime' *doesn't* return a value that's about
+ * ten times smaller than the average packet time on the fastest
+ * link that will use these routines, a slightly different clock
+ * scheme than this one should be used.
+ * (Bias due to truncation error in this scheme will overestimate utilization
+ * and discriminate against high bandwidth classes. To remove this bias an
+ * integrator needs to be added. The simplest integrator uses a history of
+ * 10 * avg.packet.time / min.tick.time packet completion entries. This is
+ * straight forward to add but we don't want to pay the extra memory
+ * traffic to maintain it if it's not necessary (occasionally a vendor
+ * accidentally builds a workstation with a decent clock - e.g., Sun & HP).)
+ */
+
+#define RM_GETTIME(now) microtime(&now)
+
+#define TV_LT(a, b) (((a)->tv_sec < (b)->tv_sec) || \
+ (((a)->tv_usec < (b)->tv_usec) && ((a)->tv_sec <= (b)->tv_sec)))
+
+#define TV_DELTA(a, b, delta) { \
+ register int xxs; \
+ \
+ delta = (a)->tv_usec - (b)->tv_usec; \
+ if ((xxs = (a)->tv_sec - (b)->tv_sec)) { \
+ switch (xxs) { \
+ default: \
+ /* if (xxs < 0) \
+ printf("rm_class: bogus time values\n"); */ \
+ delta = 0; \
+ /* fall through */ \
+ case 2: \
+ delta += 1000000; \
+ /* fall through */ \
+ case 1: \
+ delta += 1000000; \
+ break; \
+ } \
+ } \
+}
+
+#define TV_ADD_DELTA(a, delta, res) { \
+ register int xxus = (a)->tv_usec + (delta); \
+ \
+ (res)->tv_sec = (a)->tv_sec; \
+ while (xxus >= 1000000) { \
+ ++((res)->tv_sec); \
+ xxus -= 1000000; \
+ } \
+ (res)->tv_usec = xxus; \
+}
+
+#define RM_TIMEOUT 2 /* 1 Clock tick. */
+
+#if 1
+#define RM_MAXQUEUED 1 /* this isn't used in ALTQ/CBQ */
+#else
+#define RM_MAXQUEUED 16 /* Max number of packets downstream of CBQ */
+#endif
+#define RM_MAXQUEUE 64 /* Max queue length */
+#define RM_FILTER_GAIN 5 /* log2 of gain, e.g., 5 => 31/32 */
+#define RM_POWER (1 << RM_FILTER_GAIN)
+#define RM_MAXDEPTH 32
+#define RM_NS_PER_SEC (1000000000)
+
+typedef struct _rm_class_stats_ {
+ u_int handle;
+ u_int depth;
+
+ struct pktcntr xmit_cnt; /* packets sent in this class */
+ struct pktcntr drop_cnt; /* dropped packets */
+ u_int over; /* # times went over limit */
+ u_int borrows; /* # times tried to borrow */
+ u_int overactions; /* # times invoked overlimit action */
+ u_int delays; /* # times invoked delay actions */
+} rm_class_stats_t;
+
+/*
+ * CBQ Class state structure
+ */
+struct rm_class {
+ class_queue_t *q_; /* Queue of packets */
+ rm_ifdat_t *ifdat_;
+ int pri_; /* Class priority. */
+ int depth_; /* Class depth */
+ u_int ns_per_byte_; /* NanoSeconds per byte. */
+ u_int maxrate_; /* Bytes per second for this class. */
+ u_int allotment_; /* Fraction of link bandwidth. */
+ u_int w_allotment_; /* Weighted allotment for WRR */
+ int bytes_alloc_; /* Allocation for round of WRR */
+
+ int avgidle_;
+ int maxidle_;
+ int minidle_;
+ int offtime_;
+ int sleeping_; /* != 0 if delaying */
+ int qthresh_; /* Queue threshold for formal link sharing */
+ int leaf_; /* Note whether leaf class or not.*/
+
+ rm_class_t *children_; /* Children of this class */
+ rm_class_t *next_; /* Next pointer, used if child */
+
+ rm_class_t *peer_; /* Peer class */
+ rm_class_t *borrow_; /* Borrow class */
+ rm_class_t *parent_; /* Parent class */
+
+ void (*overlimit)(struct rm_class *, struct rm_class *);
+ void (*drop)(struct rm_class *); /* Class drop action. */
+
+ struct red *red_; /* RED state pointer */
+ struct altq_pktattr *pktattr_; /* saved hdr used by RED/ECN */
+ int flags_;
+
+ int last_pkttime_; /* saved pkt_time */
+ struct timeval undertime_; /* time can next send */
+ struct timeval last_; /* time last packet sent */
+ struct timeval overtime_;
+ struct callout callout_; /* for timeout() calls */
+
+ rm_class_stats_t stats_; /* Class Statistics */
+};
+
+/*
+ * CBQ Interface state
+ */
+struct rm_ifdat {
+ int queued_; /* # pkts queued downstream */
+ int efficient_; /* Link Efficency bit */
+ int wrr_; /* Enable Weighted Round-Robin */
+ u_long ns_per_byte_; /* Link byte speed. */
+ int maxqueued_; /* Max packets to queue */
+ int maxpkt_; /* Max packet size. */
+ int qi_; /* In/out pointers for downstream */
+ int qo_; /* packets */
+
+ /*
+ * Active class state and WRR state.
+ */
+ rm_class_t *active_[RM_MAXPRIO]; /* Active cl's in each pri */
+ int na_[RM_MAXPRIO]; /* # of active cl's in a pri */
+ int num_[RM_MAXPRIO]; /* # of cl's per pri */
+ int alloc_[RM_MAXPRIO]; /* Byte Allocation */
+ u_long M_[RM_MAXPRIO]; /* WRR weights. */
+
+ /*
+ * Network Interface/Solaris Queue state pointer.
+ */
+ struct ifaltq *ifq_;
+ rm_class_t *default_; /* Default Pkt class, BE */
+ rm_class_t *root_; /* Root Link class. */
+ rm_class_t *ctl_; /* Control Traffic class. */
+ void (*restart)(struct ifaltq *); /* Restart routine. */
+
+ /*
+ * Current packet downstream packet state and dynamic state.
+ */
+ rm_class_t *borrowed_[RM_MAXQUEUED]; /* Class borrowed last */
+ rm_class_t *class_[RM_MAXQUEUED]; /* class sending */
+ int curlen_[RM_MAXQUEUED]; /* Current pktlen */
+ struct timeval now_[RM_MAXQUEUED]; /* Current packet time. */
+ int is_overlimit_[RM_MAXQUEUED];/* Current packet time. */
+
+ int cutoff_; /* Cut-off depth for borrowing */
+
+ struct timeval ifnow_; /* expected xmit completion time */
+#if 1 /* ALTQ4PPP */
+ int maxiftime_; /* max delay inside interface */
+#endif
+ rm_class_t *pollcache_; /* cached rm_class by poll operation */
+};
+
+/* flags for rmc_init and rmc_newclass */
+/* class flags */
+#define RMCF_RED 0x0001
+#define RMCF_ECN 0x0002
+#define RMCF_RIO 0x0004
+#define RMCF_FLOWVALVE 0x0008 /* use flowvalve (aka penalty-box) */
+#define RMCF_CLEARDSCP 0x0010 /* clear diffserv codepoint */
+
+/* flags for rmc_init */
+#define RMCF_WRR 0x0100
+#define RMCF_EFFICIENT 0x0200
+
+#define is_a_parent_class(cl) ((cl)->children_ != NULL)
+
+extern rm_class_t *rmc_newclass(int, struct rm_ifdat *, u_int,
+ void (*)(struct rm_class *, struct rm_class *),
+ int, struct rm_class *, struct rm_class *,
+ u_int, int, u_int, int, int);
+extern void rmc_delete_class(struct rm_ifdat *, struct rm_class *);
+extern int rmc_modclass(struct rm_class *, u_int, int,
+ u_int, int, u_int, int);
+extern void rmc_init(struct ifaltq *, struct rm_ifdat *, u_int,
+ void (*)(struct ifaltq *),
+ int, int, u_int, int, u_int, int);
+extern int rmc_queue_packet(struct rm_class *, mbuf_t *);
+extern mbuf_t *rmc_dequeue_next(struct rm_ifdat *, int);
+extern void rmc_update_class_util(struct rm_ifdat *);
+extern void rmc_delay_action(struct rm_class *, struct rm_class *);
+extern void rmc_dropall(struct rm_class *);
+extern int rmc_get_weight(struct rm_ifdat *, int);
+
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ALTQ_ALTQ_RMCLASS_H_ */
diff --git a/sys/net/altq/altq_rmclass_debug.h b/sys/net/altq/altq_rmclass_debug.h
new file mode 100644
index 0000000..8f471b2
--- /dev/null
+++ b/sys/net/altq/altq_rmclass_debug.h
@@ -0,0 +1,112 @@
+/* $KAME: altq_rmclass_debug.h,v 1.3 2002/11/29 04:36:24 kjc Exp $ */
+
+/*
+ * Copyright (c) Sun Microsystems, Inc. 1998 All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the SMCC Technology
+ * Development Group at Sun Microsystems, Inc.
+ *
+ * 4. The name of the Sun Microsystems, Inc nor may not be used to endorse or
+ * promote products derived from this software without specific prior
+ * written permission.
+ *
+ * SUN MICROSYSTEMS DOES NOT CLAIM MERCHANTABILITY OF THIS SOFTWARE OR THE
+ * SUITABILITY OF THIS SOFTWARE FOR ANY PARTICULAR PURPOSE. The software is
+ * provided "as is" without express or implied warranty of any kind.
+ *
+ * These notices must be retained in any copies of any part of this software.
+ */
+
+#ifndef _ALTQ_ALTQ_RMCLASS_DEBUG_H_
+#define _ALTQ_ALTQ_RMCLASS_DEBUG_H_
+
+/* #pragma ident "@(#)rm_class_debug.h 1.7 98/05/04 SMI" */
+
+/*
+ * Cbq debugging macros
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef CBQ_TRACE
+#ifndef NCBQTRACE
+#define NCBQTRACE (16 * 1024)
+#endif
+
+/*
+ * To view the trace output, using adb, type:
+ * adb -k /dev/ksyms /dev/mem <cr>, then type
+ * cbqtrace_count/D to get the count, then type
+ * cbqtrace_buffer,0tcount/Dp4C" "Xn
+ * This will dump the trace buffer from 0 to count.
+ */
+/*
+ * in ALTQ, "call cbqtrace_dump(N)" from DDB to display 20 events
+ * from Nth event in the circular buffer.
+ */
+
+struct cbqtrace {
+ int count;
+ int function; /* address of function */
+ int trace_action; /* descriptive 4 characters */
+ int object; /* object operated on */
+};
+
+extern struct cbqtrace cbqtrace_buffer[];
+extern struct cbqtrace *cbqtrace_ptr;
+extern int cbqtrace_count;
+
+#define CBQTRACEINIT() { \
+ if (cbqtrace_ptr == NULL) \
+ cbqtrace_ptr = cbqtrace_buffer; \
+ else { \
+ cbqtrace_ptr = cbqtrace_buffer; \
+ bzero((void *)cbqtrace_ptr, sizeof(cbqtrace_buffer)); \
+ cbqtrace_count = 0; \
+ } \
+}
+
+#define LOCK_TRACE() splimp()
+#define UNLOCK_TRACE(x) splx(x)
+
+#define CBQTRACE(func, act, obj) { \
+ int __s = LOCK_TRACE(); \
+ int *_p = &cbqtrace_ptr->count; \
+ *_p++ = ++cbqtrace_count; \
+ *_p++ = (int)(func); \
+ *_p++ = (int)(act); \
+ *_p++ = (int)(obj); \
+ if ((struct cbqtrace *)(void *)_p >= &cbqtrace_buffer[NCBQTRACE])\
+ cbqtrace_ptr = cbqtrace_buffer; \
+ else \
+ cbqtrace_ptr = (struct cbqtrace *)(void *)_p; \
+ UNLOCK_TRACE(__s); \
+ }
+#else
+
+/* If no tracing, define no-ops */
+#define CBQTRACEINIT()
+#define CBQTRACE(a, b, c)
+
+#endif /* !CBQ_TRACE */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ALTQ_ALTQ_RMCLASS_DEBUG_H_ */
diff --git a/sys/net/altq/altq_subr.c b/sys/net/altq/altq_subr.c
new file mode 100644
index 0000000..eefa12f
--- /dev/null
+++ b/sys/net/altq/altq_subr.c
@@ -0,0 +1,1981 @@
+/* $FreeBSD$ */
+/* $KAME: altq_subr.c,v 1.21 2003/11/06 06:32:53 kjc Exp $ */
+
+/*
+ * Copyright (C) 1997-2003
+ * Sony Computer Science Laboratories Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(__FreeBSD__) || defined(__NetBSD__)
+#include "opt_altq.h"
+#include "opt_inet.h"
+#ifdef __FreeBSD__
+#include "opt_inet6.h"
+#endif
+#endif /* __FreeBSD__ || __NetBSD__ */
+
+#include <sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/kernel.h>
+#include <sys/errno.h>
+#include <sys/syslog.h>
+#include <sys/sysctl.h>
+#include <sys/queue.h>
+
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/if_dl.h>
+#include <net/if_types.h>
+#ifdef __FreeBSD__
+#include <net/vnet.h>
+#endif
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#ifdef INET6
+#include <netinet/ip6.h>
+#endif
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+
+#include <netpfil/pf/pf.h>
+#include <netpfil/pf/pf_altq.h>
+#include <net/altq/altq.h>
+#ifdef ALTQ3_COMPAT
+#include <net/altq/altq_conf.h>
+#endif
+
+/* machine dependent clock related includes */
+#ifdef __FreeBSD__
+#include <sys/bus.h>
+#include <sys/cpu.h>
+#include <sys/eventhandler.h>
+#include <machine/clock.h>
+#endif
+#if defined(__amd64__) || defined(__i386__)
+#include <machine/cpufunc.h> /* for pentium tsc */
+#include <machine/specialreg.h> /* for CPUID_TSC */
+#ifdef __FreeBSD__
+#include <machine/md_var.h> /* for cpu_feature */
+#elif defined(__NetBSD__) || defined(__OpenBSD__)
+#include <machine/cpu.h> /* for cpu_feature */
+#endif
+#endif /* __amd64 || __i386__ */
+
+/*
+ * internal function prototypes
+ */
+static void tbr_timeout(void *);
+int (*altq_input)(struct mbuf *, int) = NULL;
+static struct mbuf *tbr_dequeue(struct ifaltq *, int);
+static int tbr_timer = 0; /* token bucket regulator timer */
+#if !defined(__FreeBSD__) || (__FreeBSD_version < 600000)
+static struct callout tbr_callout = CALLOUT_INITIALIZER;
+#else
+static struct callout tbr_callout;
+#endif
+
+#ifdef ALTQ3_CLFIER_COMPAT
+static int extract_ports4(struct mbuf *, struct ip *, struct flowinfo_in *);
+#ifdef INET6
+static int extract_ports6(struct mbuf *, struct ip6_hdr *,
+ struct flowinfo_in6 *);
+#endif
+static int apply_filter4(u_int32_t, struct flow_filter *,
+ struct flowinfo_in *);
+static int apply_ppfilter4(u_int32_t, struct flow_filter *,
+ struct flowinfo_in *);
+#ifdef INET6
+static int apply_filter6(u_int32_t, struct flow_filter6 *,
+ struct flowinfo_in6 *);
+#endif
+static int apply_tosfilter4(u_int32_t, struct flow_filter *,
+ struct flowinfo_in *);
+static u_long get_filt_handle(struct acc_classifier *, int);
+static struct acc_filter *filth_to_filtp(struct acc_classifier *, u_long);
+static u_int32_t filt2fibmask(struct flow_filter *);
+
+static void ip4f_cache(struct ip *, struct flowinfo_in *);
+static int ip4f_lookup(struct ip *, struct flowinfo_in *);
+static int ip4f_init(void);
+static struct ip4_frag *ip4f_alloc(void);
+static void ip4f_free(struct ip4_frag *);
+#endif /* ALTQ3_CLFIER_COMPAT */
+
+/*
+ * alternate queueing support routines
+ */
+
+/* look up the queue state by the interface name and the queueing type. */
+void *
+altq_lookup(name, type)
+ char *name;
+ int type;
+{
+ struct ifnet *ifp;
+
+ if ((ifp = ifunit(name)) != NULL) {
+ /* read if_snd unlocked */
+ if (type != ALTQT_NONE && ifp->if_snd.altq_type == type)
+ return (ifp->if_snd.altq_disc);
+ }
+
+ return NULL;
+}
+
+int
+altq_attach(ifq, type, discipline, enqueue, dequeue, request, clfier, classify)
+ struct ifaltq *ifq;
+ int type;
+ void *discipline;
+ int (*enqueue)(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
+ struct mbuf *(*dequeue)(struct ifaltq *, int);
+ int (*request)(struct ifaltq *, int, void *);
+ void *clfier;
+ void *(*classify)(void *, struct mbuf *, int);
+{
+ IFQ_LOCK(ifq);
+ if (!ALTQ_IS_READY(ifq)) {
+ IFQ_UNLOCK(ifq);
+ return ENXIO;
+ }
+
+#ifdef ALTQ3_COMPAT
+ /*
+ * pfaltq can override the existing discipline, but altq3 cannot.
+ * check these if clfier is not NULL (which implies altq3).
+ */
+ if (clfier != NULL) {
+ if (ALTQ_IS_ENABLED(ifq)) {
+ IFQ_UNLOCK(ifq);
+ return EBUSY;
+ }
+ if (ALTQ_IS_ATTACHED(ifq)) {
+ IFQ_UNLOCK(ifq);
+ return EEXIST;
+ }
+ }
+#endif
+ ifq->altq_type = type;
+ ifq->altq_disc = discipline;
+ ifq->altq_enqueue = enqueue;
+ ifq->altq_dequeue = dequeue;
+ ifq->altq_request = request;
+ ifq->altq_clfier = clfier;
+ ifq->altq_classify = classify;
+ ifq->altq_flags &= (ALTQF_CANTCHANGE|ALTQF_ENABLED);
+#ifdef ALTQ3_COMPAT
+#ifdef ALTQ_KLD
+ altq_module_incref(type);
+#endif
+#endif
+ IFQ_UNLOCK(ifq);
+ return 0;
+}
+
+int
+altq_detach(ifq)
+ struct ifaltq *ifq;
+{
+ IFQ_LOCK(ifq);
+
+ if (!ALTQ_IS_READY(ifq)) {
+ IFQ_UNLOCK(ifq);
+ return ENXIO;
+ }
+ if (ALTQ_IS_ENABLED(ifq)) {
+ IFQ_UNLOCK(ifq);
+ return EBUSY;
+ }
+ if (!ALTQ_IS_ATTACHED(ifq)) {
+ IFQ_UNLOCK(ifq);
+ return (0);
+ }
+#ifdef ALTQ3_COMPAT
+#ifdef ALTQ_KLD
+ altq_module_declref(ifq->altq_type);
+#endif
+#endif
+
+ ifq->altq_type = ALTQT_NONE;
+ ifq->altq_disc = NULL;
+ ifq->altq_enqueue = NULL;
+ ifq->altq_dequeue = NULL;
+ ifq->altq_request = NULL;
+ ifq->altq_clfier = NULL;
+ ifq->altq_classify = NULL;
+ ifq->altq_flags &= ALTQF_CANTCHANGE;
+
+ IFQ_UNLOCK(ifq);
+ return 0;
+}
+
+int
+altq_enable(ifq)
+ struct ifaltq *ifq;
+{
+ int s;
+
+ IFQ_LOCK(ifq);
+
+ if (!ALTQ_IS_READY(ifq)) {
+ IFQ_UNLOCK(ifq);
+ return ENXIO;
+ }
+ if (ALTQ_IS_ENABLED(ifq)) {
+ IFQ_UNLOCK(ifq);
+ return 0;
+ }
+
+#ifdef __NetBSD__
+ s = splnet();
+#else
+ s = splimp();
+#endif
+ IFQ_PURGE_NOLOCK(ifq);
+ ASSERT(ifq->ifq_len == 0);
+ ifq->ifq_drv_maxlen = 0; /* disable bulk dequeue */
+ ifq->altq_flags |= ALTQF_ENABLED;
+ if (ifq->altq_clfier != NULL)
+ ifq->altq_flags |= ALTQF_CLASSIFY;
+ splx(s);
+
+ IFQ_UNLOCK(ifq);
+ return 0;
+}
+
+int
+altq_disable(ifq)
+ struct ifaltq *ifq;
+{
+ int s;
+
+ IFQ_LOCK(ifq);
+ if (!ALTQ_IS_ENABLED(ifq)) {
+ IFQ_UNLOCK(ifq);
+ return 0;
+ }
+
+#ifdef __NetBSD__
+ s = splnet();
+#else
+ s = splimp();
+#endif
+ IFQ_PURGE_NOLOCK(ifq);
+ ASSERT(ifq->ifq_len == 0);
+ ifq->altq_flags &= ~(ALTQF_ENABLED|ALTQF_CLASSIFY);
+ splx(s);
+
+ IFQ_UNLOCK(ifq);
+ return 0;
+}
+
+#ifdef ALTQ_DEBUG
+void
+altq_assert(file, line, failedexpr)
+ const char *file, *failedexpr;
+ int line;
+{
+ (void)printf("altq assertion \"%s\" failed: file \"%s\", line %d\n",
+ failedexpr, file, line);
+ panic("altq assertion");
+ /* NOTREACHED */
+}
+#endif
+
+/*
+ * internal representation of token bucket parameters
+ * rate: byte_per_unittime << 32
+ * (((bits_per_sec) / 8) << 32) / machclk_freq
+ * depth: byte << 32
+ *
+ */
+#define TBR_SHIFT 32
+#define TBR_SCALE(x) ((int64_t)(x) << TBR_SHIFT)
+#define TBR_UNSCALE(x) ((x) >> TBR_SHIFT)
+
+static struct mbuf *
+tbr_dequeue(ifq, op)
+ struct ifaltq *ifq;
+ int op;
+{
+ struct tb_regulator *tbr;
+ struct mbuf *m;
+ int64_t interval;
+ u_int64_t now;
+
+ IFQ_LOCK_ASSERT(ifq);
+ tbr = ifq->altq_tbr;
+ if (op == ALTDQ_REMOVE && tbr->tbr_lastop == ALTDQ_POLL) {
+ /* if this is a remove after poll, bypass tbr check */
+ } else {
+ /* update token only when it is negative */
+ if (tbr->tbr_token <= 0) {
+ now = read_machclk();
+ interval = now - tbr->tbr_last;
+ if (interval >= tbr->tbr_filluptime)
+ tbr->tbr_token = tbr->tbr_depth;
+ else {
+ tbr->tbr_token += interval * tbr->tbr_rate;
+ if (tbr->tbr_token > tbr->tbr_depth)
+ tbr->tbr_token = tbr->tbr_depth;
+ }
+ tbr->tbr_last = now;
+ }
+ /* if token is still negative, don't allow dequeue */
+ if (tbr->tbr_token <= 0)
+ return (NULL);
+ }
+
+ if (ALTQ_IS_ENABLED(ifq))
+ m = (*ifq->altq_dequeue)(ifq, op);
+ else {
+ if (op == ALTDQ_POLL)
+ _IF_POLL(ifq, m);
+ else
+ _IF_DEQUEUE(ifq, m);
+ }
+
+ if (m != NULL && op == ALTDQ_REMOVE)
+ tbr->tbr_token -= TBR_SCALE(m_pktlen(m));
+ tbr->tbr_lastop = op;
+ return (m);
+}
+
+/*
+ * set a token bucket regulator.
+ * if the specified rate is zero, the token bucket regulator is deleted.
+ */
+int
+tbr_set(ifq, profile)
+ struct ifaltq *ifq;
+ struct tb_profile *profile;
+{
+ struct tb_regulator *tbr, *otbr;
+
+ if (tbr_dequeue_ptr == NULL)
+ tbr_dequeue_ptr = tbr_dequeue;
+
+ if (machclk_freq == 0)
+ init_machclk();
+ if (machclk_freq == 0) {
+ printf("tbr_set: no cpu clock available!\n");
+ return (ENXIO);
+ }
+
+ IFQ_LOCK(ifq);
+ if (profile->rate == 0) {
+ /* delete this tbr */
+ if ((tbr = ifq->altq_tbr) == NULL) {
+ IFQ_UNLOCK(ifq);
+ return (ENOENT);
+ }
+ ifq->altq_tbr = NULL;
+ free(tbr, M_DEVBUF);
+ IFQ_UNLOCK(ifq);
+ return (0);
+ }
+
+ tbr = malloc(sizeof(struct tb_regulator), M_DEVBUF, M_NOWAIT | M_ZERO);
+ if (tbr == NULL) {
+ IFQ_UNLOCK(ifq);
+ return (ENOMEM);
+ }
+
+ tbr->tbr_rate = TBR_SCALE(profile->rate / 8) / machclk_freq;
+ tbr->tbr_depth = TBR_SCALE(profile->depth);
+ if (tbr->tbr_rate > 0)
+ tbr->tbr_filluptime = tbr->tbr_depth / tbr->tbr_rate;
+ else
+ tbr->tbr_filluptime = 0xffffffffffffffffLL;
+ tbr->tbr_token = tbr->tbr_depth;
+ tbr->tbr_last = read_machclk();
+ tbr->tbr_lastop = ALTDQ_REMOVE;
+
+ otbr = ifq->altq_tbr;
+ ifq->altq_tbr = tbr; /* set the new tbr */
+
+ if (otbr != NULL)
+ free(otbr, M_DEVBUF);
+ else {
+ if (tbr_timer == 0) {
+ CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0);
+ tbr_timer = 1;
+ }
+ }
+ IFQ_UNLOCK(ifq);
+ return (0);
+}
+
+/*
+ * tbr_timeout goes through the interface list, and kicks the drivers
+ * if necessary.
+ *
+ * MPSAFE
+ */
+static void
+tbr_timeout(arg)
+ void *arg;
+{
+#ifdef __FreeBSD__
+ VNET_ITERATOR_DECL(vnet_iter);
+#endif
+ struct ifnet *ifp;
+ int active, s;
+
+ active = 0;
+#ifdef __NetBSD__
+ s = splnet();
+#else
+ s = splimp();
+#endif
+#ifdef __FreeBSD__
+ IFNET_RLOCK_NOSLEEP();
+ VNET_LIST_RLOCK_NOSLEEP();
+ VNET_FOREACH(vnet_iter) {
+ CURVNET_SET(vnet_iter);
+#endif
+ for (ifp = TAILQ_FIRST(&V_ifnet); ifp;
+ ifp = TAILQ_NEXT(ifp, if_list)) {
+ /* read from if_snd unlocked */
+ if (!TBR_IS_ENABLED(&ifp->if_snd))
+ continue;
+ active++;
+ if (!IFQ_IS_EMPTY(&ifp->if_snd) &&
+ ifp->if_start != NULL)
+ (*ifp->if_start)(ifp);
+ }
+#ifdef __FreeBSD__
+ CURVNET_RESTORE();
+ }
+ VNET_LIST_RUNLOCK_NOSLEEP();
+ IFNET_RUNLOCK_NOSLEEP();
+#endif
+ splx(s);
+ if (active > 0)
+ CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0);
+ else
+ tbr_timer = 0; /* don't need tbr_timer anymore */
+}
+
+/*
+ * get token bucket regulator profile
+ */
+int
+tbr_get(ifq, profile)
+ struct ifaltq *ifq;
+ struct tb_profile *profile;
+{
+ struct tb_regulator *tbr;
+
+ IFQ_LOCK(ifq);
+ if ((tbr = ifq->altq_tbr) == NULL) {
+ profile->rate = 0;
+ profile->depth = 0;
+ } else {
+ profile->rate =
+ (u_int)TBR_UNSCALE(tbr->tbr_rate * 8 * machclk_freq);
+ profile->depth = (u_int)TBR_UNSCALE(tbr->tbr_depth);
+ }
+ IFQ_UNLOCK(ifq);
+ return (0);
+}
+
+/*
+ * attach a discipline to the interface. if one already exists, it is
+ * overridden.
+ * Locking is done in the discipline specific attach functions. Basically
+ * they call back to altq_attach which takes care of the attach and locking.
+ */
+int
+altq_pfattach(struct pf_altq *a)
+{
+ int error = 0;
+
+ switch (a->scheduler) {
+ case ALTQT_NONE:
+ break;
+#ifdef ALTQ_CBQ
+ case ALTQT_CBQ:
+ error = cbq_pfattach(a);
+ break;
+#endif
+#ifdef ALTQ_PRIQ
+ case ALTQT_PRIQ:
+ error = priq_pfattach(a);
+ break;
+#endif
+#ifdef ALTQ_HFSC
+ case ALTQT_HFSC:
+ error = hfsc_pfattach(a);
+ break;
+#endif
+ default:
+ error = ENXIO;
+ }
+
+ return (error);
+}
+
+/*
+ * detach a discipline from the interface.
+ * it is possible that the discipline was already overridden by another
+ * discipline.
+ */
+int
+altq_pfdetach(struct pf_altq *a)
+{
+ struct ifnet *ifp;
+ int s, error = 0;
+
+ if ((ifp = ifunit(a->ifname)) == NULL)
+ return (EINVAL);
+
+ /* if this discipline is no longer referenced, just return */
+ /* read unlocked from if_snd */
+ if (a->altq_disc == NULL || a->altq_disc != ifp->if_snd.altq_disc)
+ return (0);
+
+#ifdef __NetBSD__
+ s = splnet();
+#else
+ s = splimp();
+#endif
+ /* read unlocked from if_snd, _disable and _detach take care */
+ if (ALTQ_IS_ENABLED(&ifp->if_snd))
+ error = altq_disable(&ifp->if_snd);
+ if (error == 0)
+ error = altq_detach(&ifp->if_snd);
+ splx(s);
+
+ return (error);
+}
+
+/*
+ * add a discipline or a queue
+ * Locking is done in the discipline specific functions with regards to
+ * malloc with WAITOK, also it is not yet clear which lock to use.
+ */
+int
+altq_add(struct pf_altq *a)
+{
+ int error = 0;
+
+ if (a->qname[0] != 0)
+ return (altq_add_queue(a));
+
+ if (machclk_freq == 0)
+ init_machclk();
+ if (machclk_freq == 0)
+ panic("altq_add: no cpu clock");
+
+ switch (a->scheduler) {
+#ifdef ALTQ_CBQ
+ case ALTQT_CBQ:
+ error = cbq_add_altq(a);
+ break;
+#endif
+#ifdef ALTQ_PRIQ
+ case ALTQT_PRIQ:
+ error = priq_add_altq(a);
+ break;
+#endif
+#ifdef ALTQ_HFSC
+ case ALTQT_HFSC:
+ error = hfsc_add_altq(a);
+ break;
+#endif
+ default:
+ error = ENXIO;
+ }
+
+ return (error);
+}
+
+/*
+ * remove a discipline or a queue
+ * It is yet unclear what lock to use to protect this operation, the
+ * discipline specific functions will determine and grab it
+ */
+int
+altq_remove(struct pf_altq *a)
+{
+ int error = 0;
+
+ if (a->qname[0] != 0)
+ return (altq_remove_queue(a));
+
+ switch (a->scheduler) {
+#ifdef ALTQ_CBQ
+ case ALTQT_CBQ:
+ error = cbq_remove_altq(a);
+ break;
+#endif
+#ifdef ALTQ_PRIQ
+ case ALTQT_PRIQ:
+ error = priq_remove_altq(a);
+ break;
+#endif
+#ifdef ALTQ_HFSC
+ case ALTQT_HFSC:
+ error = hfsc_remove_altq(a);
+ break;
+#endif
+ default:
+ error = ENXIO;
+ }
+
+ return (error);
+}
+
+/*
+ * add a queue to the discipline
+ * It is yet unclear what lock to use to protect this operation, the
+ * discipline specific functions will determine and grab it
+ */
+int
+altq_add_queue(struct pf_altq *a)
+{
+ int error = 0;
+
+ switch (a->scheduler) {
+#ifdef ALTQ_CBQ
+ case ALTQT_CBQ:
+ error = cbq_add_queue(a);
+ break;
+#endif
+#ifdef ALTQ_PRIQ
+ case ALTQT_PRIQ:
+ error = priq_add_queue(a);
+ break;
+#endif
+#ifdef ALTQ_HFSC
+ case ALTQT_HFSC:
+ error = hfsc_add_queue(a);
+ break;
+#endif
+ default:
+ error = ENXIO;
+ }
+
+ return (error);
+}
+
+/*
+ * remove a queue from the discipline
+ * It is yet unclear what lock to use to protect this operation, the
+ * discipline specific functions will determine and grab it
+ */
+int
+altq_remove_queue(struct pf_altq *a)
+{
+ int error = 0;
+
+ switch (a->scheduler) {
+#ifdef ALTQ_CBQ
+ case ALTQT_CBQ:
+ error = cbq_remove_queue(a);
+ break;
+#endif
+#ifdef ALTQ_PRIQ
+ case ALTQT_PRIQ:
+ error = priq_remove_queue(a);
+ break;
+#endif
+#ifdef ALTQ_HFSC
+ case ALTQT_HFSC:
+ error = hfsc_remove_queue(a);
+ break;
+#endif
+ default:
+ error = ENXIO;
+ }
+
+ return (error);
+}
+
+/*
+ * get queue statistics
+ * Locking is done in the discipline specific functions with regards to
+ * copyout operations, also it is not yet clear which lock to use.
+ */
+int
+altq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes)
+{
+ int error = 0;
+
+ switch (a->scheduler) {
+#ifdef ALTQ_CBQ
+ case ALTQT_CBQ:
+ error = cbq_getqstats(a, ubuf, nbytes);
+ break;
+#endif
+#ifdef ALTQ_PRIQ
+ case ALTQT_PRIQ:
+ error = priq_getqstats(a, ubuf, nbytes);
+ break;
+#endif
+#ifdef ALTQ_HFSC
+ case ALTQT_HFSC:
+ error = hfsc_getqstats(a, ubuf, nbytes);
+ break;
+#endif
+ default:
+ error = ENXIO;
+ }
+
+ return (error);
+}
+
+/*
+ * read and write diffserv field in IPv4 or IPv6 header
+ */
+u_int8_t
+read_dsfield(m, pktattr)
+ struct mbuf *m;
+ struct altq_pktattr *pktattr;
+{
+ struct mbuf *m0;
+ u_int8_t ds_field = 0;
+
+ if (pktattr == NULL ||
+ (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6))
+ return ((u_int8_t)0);
+
+ /* verify that pattr_hdr is within the mbuf data */
+ for (m0 = m; m0 != NULL; m0 = m0->m_next)
+ if ((pktattr->pattr_hdr >= m0->m_data) &&
+ (pktattr->pattr_hdr < m0->m_data + m0->m_len))
+ break;
+ if (m0 == NULL) {
+ /* ick, pattr_hdr is stale */
+ pktattr->pattr_af = AF_UNSPEC;
+#ifdef ALTQ_DEBUG
+ printf("read_dsfield: can't locate header!\n");
+#endif
+ return ((u_int8_t)0);
+ }
+
+ if (pktattr->pattr_af == AF_INET) {
+ struct ip *ip = (struct ip *)pktattr->pattr_hdr;
+
+ if (ip->ip_v != 4)
+ return ((u_int8_t)0); /* version mismatch! */
+ ds_field = ip->ip_tos;
+ }
+#ifdef INET6
+ else if (pktattr->pattr_af == AF_INET6) {
+ struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
+ u_int32_t flowlabel;
+
+ flowlabel = ntohl(ip6->ip6_flow);
+ if ((flowlabel >> 28) != 6)
+ return ((u_int8_t)0); /* version mismatch! */
+ ds_field = (flowlabel >> 20) & 0xff;
+ }
+#endif
+ return (ds_field);
+}
+
+void
+write_dsfield(struct mbuf *m, struct altq_pktattr *pktattr, u_int8_t dsfield)
+{
+ struct mbuf *m0;
+
+ if (pktattr == NULL ||
+ (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6))
+ return;
+
+ /* verify that pattr_hdr is within the mbuf data */
+ for (m0 = m; m0 != NULL; m0 = m0->m_next)
+ if ((pktattr->pattr_hdr >= m0->m_data) &&
+ (pktattr->pattr_hdr < m0->m_data + m0->m_len))
+ break;
+ if (m0 == NULL) {
+ /* ick, pattr_hdr is stale */
+ pktattr->pattr_af = AF_UNSPEC;
+#ifdef ALTQ_DEBUG
+ printf("write_dsfield: can't locate header!\n");
+#endif
+ return;
+ }
+
+ if (pktattr->pattr_af == AF_INET) {
+ struct ip *ip = (struct ip *)pktattr->pattr_hdr;
+ u_int8_t old;
+ int32_t sum;
+
+ if (ip->ip_v != 4)
+ return; /* version mismatch! */
+ old = ip->ip_tos;
+ dsfield |= old & 3; /* leave CU bits */
+ if (old == dsfield)
+ return;
+ ip->ip_tos = dsfield;
+ /*
+ * update checksum (from RFC1624)
+ * HC' = ~(~HC + ~m + m')
+ */
+ sum = ~ntohs(ip->ip_sum) & 0xffff;
+ sum += 0xff00 + (~old & 0xff) + dsfield;
+ sum = (sum >> 16) + (sum & 0xffff);
+ sum += (sum >> 16); /* add carry */
+
+ ip->ip_sum = htons(~sum & 0xffff);
+ }
+#ifdef INET6
+ else if (pktattr->pattr_af == AF_INET6) {
+ struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
+ u_int32_t flowlabel;
+
+ flowlabel = ntohl(ip6->ip6_flow);
+ if ((flowlabel >> 28) != 6)
+ return; /* version mismatch! */
+ flowlabel = (flowlabel & 0xf03fffff) | (dsfield << 20);
+ ip6->ip6_flow = htonl(flowlabel);
+ }
+#endif
+ return;
+}
+
+
+/*
+ * high resolution clock support taking advantage of a machine dependent
+ * high resolution time counter (e.g., timestamp counter of intel pentium).
+ * we assume
+ * - 64-bit-long monotonically-increasing counter
+ * - frequency range is 100M-4GHz (CPU speed)
+ */
+/* if pcc is not available or disabled, emulate 256MHz using microtime() */
+#define MACHCLK_SHIFT 8
+
+int machclk_usepcc;
+u_int32_t machclk_freq;
+u_int32_t machclk_per_tick;
+
+#if defined(__i386__) && defined(__NetBSD__)
+extern u_int64_t cpu_tsc_freq;
+#endif
+
+#if (__FreeBSD_version >= 700035)
+/* Update TSC freq with the value indicated by the caller. */
+static void
+tsc_freq_changed(void *arg, const struct cf_level *level, int status)
+{
+ /* If there was an error during the transition, don't do anything. */
+ if (status != 0)
+ return;
+
+#if (__FreeBSD_version >= 701102) && (defined(__amd64__) || defined(__i386__))
+ /* If TSC is P-state invariant, don't do anything. */
+ if (tsc_is_invariant)
+ return;
+#endif
+
+ /* Total setting for this level gives the new frequency in MHz. */
+ init_machclk();
+}
+EVENTHANDLER_DEFINE(cpufreq_post_change, tsc_freq_changed, NULL,
+ EVENTHANDLER_PRI_LAST);
+#endif /* __FreeBSD_version >= 700035 */
+
+static void
+init_machclk_setup(void)
+{
+#if (__FreeBSD_version >= 600000)
+ callout_init(&tbr_callout, 0);
+#endif
+
+ machclk_usepcc = 1;
+
+#if (!defined(__amd64__) && !defined(__i386__)) || defined(ALTQ_NOPCC)
+ machclk_usepcc = 0;
+#endif
+#if defined(__FreeBSD__) && defined(SMP)
+ machclk_usepcc = 0;
+#endif
+#if defined(__NetBSD__) && defined(MULTIPROCESSOR)
+ machclk_usepcc = 0;
+#endif
+#if defined(__amd64__) || defined(__i386__)
+ /* check if TSC is available */
+#ifdef __FreeBSD__
+ if ((cpu_feature & CPUID_TSC) == 0 ||
+ atomic_load_acq_64(&tsc_freq) == 0)
+#else
+ if ((cpu_feature & CPUID_TSC) == 0)
+#endif
+ machclk_usepcc = 0;
+#endif
+}
+
+void
+init_machclk(void)
+{
+ static int called;
+
+ /* Call one-time initialization function. */
+ if (!called) {
+ init_machclk_setup();
+ called = 1;
+ }
+
+ if (machclk_usepcc == 0) {
+ /* emulate 256MHz using microtime() */
+ machclk_freq = 1000000 << MACHCLK_SHIFT;
+ machclk_per_tick = machclk_freq / hz;
+#ifdef ALTQ_DEBUG
+ printf("altq: emulate %uHz cpu clock\n", machclk_freq);
+#endif
+ return;
+ }
+
+ /*
+ * if the clock frequency (of Pentium TSC or Alpha PCC) is
+ * accessible, just use it.
+ */
+#if defined(__amd64__) || defined(__i386__)
+#ifdef __FreeBSD__
+ machclk_freq = atomic_load_acq_64(&tsc_freq);
+#elif defined(__NetBSD__)
+ machclk_freq = (u_int32_t)cpu_tsc_freq;
+#elif defined(__OpenBSD__) && (defined(I586_CPU) || defined(I686_CPU))
+ machclk_freq = pentium_mhz * 1000000;
+#endif
+#endif
+
+ /*
+ * if we don't know the clock frequency, measure it.
+ */
+ if (machclk_freq == 0) {
+ static int wait;
+ struct timeval tv_start, tv_end;
+ u_int64_t start, end, diff;
+ int timo;
+
+ microtime(&tv_start);
+ start = read_machclk();
+ timo = hz; /* 1 sec */
+ (void)tsleep(&wait, PWAIT | PCATCH, "init_machclk", timo);
+ microtime(&tv_end);
+ end = read_machclk();
+ diff = (u_int64_t)(tv_end.tv_sec - tv_start.tv_sec) * 1000000
+ + tv_end.tv_usec - tv_start.tv_usec;
+ if (diff != 0)
+ machclk_freq = (u_int)((end - start) * 1000000 / diff);
+ }
+
+ machclk_per_tick = machclk_freq / hz;
+
+#ifdef ALTQ_DEBUG
+ printf("altq: CPU clock: %uHz\n", machclk_freq);
+#endif
+}
+
+#if defined(__OpenBSD__) && defined(__i386__)
+static __inline u_int64_t
+rdtsc(void)
+{
+ u_int64_t rv;
+ __asm __volatile(".byte 0x0f, 0x31" : "=A" (rv));
+ return (rv);
+}
+#endif /* __OpenBSD__ && __i386__ */
+
+u_int64_t
+read_machclk(void)
+{
+ u_int64_t val;
+
+ if (machclk_usepcc) {
+#if defined(__amd64__) || defined(__i386__)
+ val = rdtsc();
+#else
+ panic("read_machclk");
+#endif
+ } else {
+ struct timeval tv;
+
+ microtime(&tv);
+ val = (((u_int64_t)(tv.tv_sec - boottime.tv_sec) * 1000000
+ + tv.tv_usec) << MACHCLK_SHIFT);
+ }
+ return (val);
+}
+
+#ifdef ALTQ3_CLFIER_COMPAT
+
+#ifndef IPPROTO_ESP
+#define IPPROTO_ESP 50 /* encapsulating security payload */
+#endif
+#ifndef IPPROTO_AH
+#define IPPROTO_AH 51 /* authentication header */
+#endif
+
+/*
+ * extract flow information from a given packet.
+ * filt_mask shows flowinfo fields required.
+ * we assume the ip header is in one mbuf, and addresses and ports are
+ * in network byte order.
+ */
+int
+altq_extractflow(m, af, flow, filt_bmask)
+ struct mbuf *m;
+ int af;
+ struct flowinfo *flow;
+ u_int32_t filt_bmask;
+{
+
+ switch (af) {
+ case PF_INET: {
+ struct flowinfo_in *fin;
+ struct ip *ip;
+
+ ip = mtod(m, struct ip *);
+
+ if (ip->ip_v != 4)
+ break;
+
+ fin = (struct flowinfo_in *)flow;
+ fin->fi_len = sizeof(struct flowinfo_in);
+ fin->fi_family = AF_INET;
+
+ fin->fi_proto = ip->ip_p;
+ fin->fi_tos = ip->ip_tos;
+
+ fin->fi_src.s_addr = ip->ip_src.s_addr;
+ fin->fi_dst.s_addr = ip->ip_dst.s_addr;
+
+ if (filt_bmask & FIMB4_PORTS)
+ /* if port info is required, extract port numbers */
+ extract_ports4(m, ip, fin);
+ else {
+ fin->fi_sport = 0;
+ fin->fi_dport = 0;
+ fin->fi_gpi = 0;
+ }
+ return (1);
+ }
+
+#ifdef INET6
+ case PF_INET6: {
+ struct flowinfo_in6 *fin6;
+ struct ip6_hdr *ip6;
+
+ ip6 = mtod(m, struct ip6_hdr *);
+ /* should we check the ip version? */
+
+ fin6 = (struct flowinfo_in6 *)flow;
+ fin6->fi6_len = sizeof(struct flowinfo_in6);
+ fin6->fi6_family = AF_INET6;
+
+ fin6->fi6_proto = ip6->ip6_nxt;
+ fin6->fi6_tclass = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
+
+ fin6->fi6_flowlabel = ip6->ip6_flow & htonl(0x000fffff);
+ fin6->fi6_src = ip6->ip6_src;
+ fin6->fi6_dst = ip6->ip6_dst;
+
+ if ((filt_bmask & FIMB6_PORTS) ||
+ ((filt_bmask & FIMB6_PROTO)
+ && ip6->ip6_nxt > IPPROTO_IPV6))
+ /*
+ * if port info is required, or proto is required
+ * but there are option headers, extract port
+ * and protocol numbers.
+ */
+ extract_ports6(m, ip6, fin6);
+ else {
+ fin6->fi6_sport = 0;
+ fin6->fi6_dport = 0;
+ fin6->fi6_gpi = 0;
+ }
+ return (1);
+ }
+#endif /* INET6 */
+
+ default:
+ break;
+ }
+
+ /* failed */
+ flow->fi_len = sizeof(struct flowinfo);
+ flow->fi_family = AF_UNSPEC;
+ return (0);
+}
+
+/*
+ * helper routine to extract port numbers
+ */
+/* structure for ipsec and ipv6 option header template */
+struct _opt6 {
+ u_int8_t opt6_nxt; /* next header */
+ u_int8_t opt6_hlen; /* header extension length */
+ u_int16_t _pad;
+ u_int32_t ah_spi; /* security parameter index
+ for authentication header */
+};
+
+/*
+ * extract port numbers from a ipv4 packet.
+ */
+static int
+extract_ports4(m, ip, fin)
+ struct mbuf *m;
+ struct ip *ip;
+ struct flowinfo_in *fin;
+{
+ struct mbuf *m0;
+ u_short ip_off;
+ u_int8_t proto;
+ int off;
+
+ fin->fi_sport = 0;
+ fin->fi_dport = 0;
+ fin->fi_gpi = 0;
+
+ ip_off = ntohs(ip->ip_off);
+ /* if it is a fragment, try cached fragment info */
+ if (ip_off & IP_OFFMASK) {
+ ip4f_lookup(ip, fin);
+ return (1);
+ }
+
+ /* locate the mbuf containing the protocol header */
+ for (m0 = m; m0 != NULL; m0 = m0->m_next)
+ if (((caddr_t)ip >= m0->m_data) &&
+ ((caddr_t)ip < m0->m_data + m0->m_len))
+ break;
+ if (m0 == NULL) {
+#ifdef ALTQ_DEBUG
+ printf("extract_ports4: can't locate header! ip=%p\n", ip);
+#endif
+ return (0);
+ }
+ off = ((caddr_t)ip - m0->m_data) + (ip->ip_hl << 2);
+ proto = ip->ip_p;
+
+#ifdef ALTQ_IPSEC
+ again:
+#endif
+ while (off >= m0->m_len) {
+ off -= m0->m_len;
+ m0 = m0->m_next;
+ if (m0 == NULL)
+ return (0); /* bogus ip_hl! */
+ }
+ if (m0->m_len < off + 4)
+ return (0);
+
+ switch (proto) {
+ case IPPROTO_TCP:
+ case IPPROTO_UDP: {
+ struct udphdr *udp;
+
+ udp = (struct udphdr *)(mtod(m0, caddr_t) + off);
+ fin->fi_sport = udp->uh_sport;
+ fin->fi_dport = udp->uh_dport;
+ fin->fi_proto = proto;
+ }
+ break;
+
+#ifdef ALTQ_IPSEC
+ case IPPROTO_ESP:
+ if (fin->fi_gpi == 0){
+ u_int32_t *gpi;
+
+ gpi = (u_int32_t *)(mtod(m0, caddr_t) + off);
+ fin->fi_gpi = *gpi;
+ }
+ fin->fi_proto = proto;
+ break;
+
+ case IPPROTO_AH: {
+ /* get next header and header length */
+ struct _opt6 *opt6;
+
+ opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off);
+ proto = opt6->opt6_nxt;
+ off += 8 + (opt6->opt6_hlen * 4);
+ if (fin->fi_gpi == 0 && m0->m_len >= off + 8)
+ fin->fi_gpi = opt6->ah_spi;
+ }
+ /* goto the next header */
+ goto again;
+#endif /* ALTQ_IPSEC */
+
+ default:
+ fin->fi_proto = proto;
+ return (0);
+ }
+
+ /* if this is a first fragment, cache it. */
+ if (ip_off & IP_MF)
+ ip4f_cache(ip, fin);
+
+ return (1);
+}
+
+#ifdef INET6
+static int
+extract_ports6(m, ip6, fin6)
+ struct mbuf *m;
+ struct ip6_hdr *ip6;
+ struct flowinfo_in6 *fin6;
+{
+ struct mbuf *m0;
+ int off;
+ u_int8_t proto;
+
+ fin6->fi6_gpi = 0;
+ fin6->fi6_sport = 0;
+ fin6->fi6_dport = 0;
+
+ /* locate the mbuf containing the protocol header */
+ for (m0 = m; m0 != NULL; m0 = m0->m_next)
+ if (((caddr_t)ip6 >= m0->m_data) &&
+ ((caddr_t)ip6 < m0->m_data + m0->m_len))
+ break;
+ if (m0 == NULL) {
+#ifdef ALTQ_DEBUG
+ printf("extract_ports6: can't locate header! ip6=%p\n", ip6);
+#endif
+ return (0);
+ }
+ off = ((caddr_t)ip6 - m0->m_data) + sizeof(struct ip6_hdr);
+
+ proto = ip6->ip6_nxt;
+ do {
+ while (off >= m0->m_len) {
+ off -= m0->m_len;
+ m0 = m0->m_next;
+ if (m0 == NULL)
+ return (0);
+ }
+ if (m0->m_len < off + 4)
+ return (0);
+
+ switch (proto) {
+ case IPPROTO_TCP:
+ case IPPROTO_UDP: {
+ struct udphdr *udp;
+
+ udp = (struct udphdr *)(mtod(m0, caddr_t) + off);
+ fin6->fi6_sport = udp->uh_sport;
+ fin6->fi6_dport = udp->uh_dport;
+ fin6->fi6_proto = proto;
+ }
+ return (1);
+
+ case IPPROTO_ESP:
+ if (fin6->fi6_gpi == 0) {
+ u_int32_t *gpi;
+
+ gpi = (u_int32_t *)(mtod(m0, caddr_t) + off);
+ fin6->fi6_gpi = *gpi;
+ }
+ fin6->fi6_proto = proto;
+ return (1);
+
+ case IPPROTO_AH: {
+ /* get next header and header length */
+ struct _opt6 *opt6;
+
+ opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off);
+ if (fin6->fi6_gpi == 0 && m0->m_len >= off + 8)
+ fin6->fi6_gpi = opt6->ah_spi;
+ proto = opt6->opt6_nxt;
+ off += 8 + (opt6->opt6_hlen * 4);
+ /* goto the next header */
+ break;
+ }
+
+ case IPPROTO_HOPOPTS:
+ case IPPROTO_ROUTING:
+ case IPPROTO_DSTOPTS: {
+ /* get next header and header length */
+ struct _opt6 *opt6;
+
+ opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off);
+ proto = opt6->opt6_nxt;
+ off += (opt6->opt6_hlen + 1) * 8;
+ /* goto the next header */
+ break;
+ }
+
+ case IPPROTO_FRAGMENT:
+ /* ipv6 fragmentations are not supported yet */
+ default:
+ fin6->fi6_proto = proto;
+ return (0);
+ }
+ } while (1);
+ /*NOTREACHED*/
+}
+#endif /* INET6 */
+
+/*
+ * altq common classifier
+ */
+int
+acc_add_filter(classifier, filter, class, phandle)
+ struct acc_classifier *classifier;
+ struct flow_filter *filter;
+ void *class;
+ u_long *phandle;
+{
+ struct acc_filter *afp, *prev, *tmp;
+ int i, s;
+
+#ifdef INET6
+ if (filter->ff_flow.fi_family != AF_INET &&
+ filter->ff_flow.fi_family != AF_INET6)
+ return (EINVAL);
+#else
+ if (filter->ff_flow.fi_family != AF_INET)
+ return (EINVAL);
+#endif
+
+ afp = malloc(sizeof(struct acc_filter),
+ M_DEVBUF, M_WAITOK);
+ if (afp == NULL)
+ return (ENOMEM);
+ bzero(afp, sizeof(struct acc_filter));
+
+ afp->f_filter = *filter;
+ afp->f_class = class;
+
+ i = ACC_WILDCARD_INDEX;
+ if (filter->ff_flow.fi_family == AF_INET) {
+ struct flow_filter *filter4 = &afp->f_filter;
+
+ /*
+ * if address is 0, it's a wildcard. if address mask
+ * isn't set, use full mask.
+ */
+ if (filter4->ff_flow.fi_dst.s_addr == 0)
+ filter4->ff_mask.mask_dst.s_addr = 0;
+ else if (filter4->ff_mask.mask_dst.s_addr == 0)
+ filter4->ff_mask.mask_dst.s_addr = 0xffffffff;
+ if (filter4->ff_flow.fi_src.s_addr == 0)
+ filter4->ff_mask.mask_src.s_addr = 0;
+ else if (filter4->ff_mask.mask_src.s_addr == 0)
+ filter4->ff_mask.mask_src.s_addr = 0xffffffff;
+
+ /* clear extra bits in addresses */
+ filter4->ff_flow.fi_dst.s_addr &=
+ filter4->ff_mask.mask_dst.s_addr;
+ filter4->ff_flow.fi_src.s_addr &=
+ filter4->ff_mask.mask_src.s_addr;
+
+ /*
+ * if dst address is a wildcard, use hash-entry
+ * ACC_WILDCARD_INDEX.
+ */
+ if (filter4->ff_mask.mask_dst.s_addr != 0xffffffff)
+ i = ACC_WILDCARD_INDEX;
+ else
+ i = ACC_GET_HASH_INDEX(filter4->ff_flow.fi_dst.s_addr);
+ }
+#ifdef INET6
+ else if (filter->ff_flow.fi_family == AF_INET6) {
+ struct flow_filter6 *filter6 =
+ (struct flow_filter6 *)&afp->f_filter;
+#ifndef IN6MASK0 /* taken from kame ipv6 */
+#define IN6MASK0 {{{ 0, 0, 0, 0 }}}
+#define IN6MASK128 {{{ 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }}}
+ const struct in6_addr in6mask0 = IN6MASK0;
+ const struct in6_addr in6mask128 = IN6MASK128;
+#endif
+
+ if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_flow6.fi6_dst))
+ filter6->ff_mask6.mask6_dst = in6mask0;
+ else if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_mask6.mask6_dst))
+ filter6->ff_mask6.mask6_dst = in6mask128;
+ if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_flow6.fi6_src))
+ filter6->ff_mask6.mask6_src = in6mask0;
+ else if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_mask6.mask6_src))
+ filter6->ff_mask6.mask6_src = in6mask128;
+
+ /* clear extra bits in addresses */
+ for (i = 0; i < 16; i++)
+ filter6->ff_flow6.fi6_dst.s6_addr[i] &=
+ filter6->ff_mask6.mask6_dst.s6_addr[i];
+ for (i = 0; i < 16; i++)
+ filter6->ff_flow6.fi6_src.s6_addr[i] &=
+ filter6->ff_mask6.mask6_src.s6_addr[i];
+
+ if (filter6->ff_flow6.fi6_flowlabel == 0)
+ i = ACC_WILDCARD_INDEX;
+ else
+ i = ACC_GET_HASH_INDEX(filter6->ff_flow6.fi6_flowlabel);
+ }
+#endif /* INET6 */
+
+ afp->f_handle = get_filt_handle(classifier, i);
+
+ /* update filter bitmask */
+ afp->f_fbmask = filt2fibmask(filter);
+ classifier->acc_fbmask |= afp->f_fbmask;
+
+ /*
+ * add this filter to the filter list.
+ * filters are ordered from the highest rule number.
+ */
+#ifdef __NetBSD__
+ s = splnet();
+#else
+ s = splimp();
+#endif
+ prev = NULL;
+ LIST_FOREACH(tmp, &classifier->acc_filters[i], f_chain) {
+ if (tmp->f_filter.ff_ruleno > afp->f_filter.ff_ruleno)
+ prev = tmp;
+ else
+ break;
+ }
+ if (prev == NULL)
+ LIST_INSERT_HEAD(&classifier->acc_filters[i], afp, f_chain);
+ else
+ LIST_INSERT_AFTER(prev, afp, f_chain);
+ splx(s);
+
+ *phandle = afp->f_handle;
+ return (0);
+}
+
+int
+acc_delete_filter(classifier, handle)
+ struct acc_classifier *classifier;
+ u_long handle;
+{
+ struct acc_filter *afp;
+ int s;
+
+ if ((afp = filth_to_filtp(classifier, handle)) == NULL)
+ return (EINVAL);
+
+#ifdef __NetBSD__
+ s = splnet();
+#else
+ s = splimp();
+#endif
+ LIST_REMOVE(afp, f_chain);
+ splx(s);
+
+ free(afp, M_DEVBUF);
+
+ /* todo: update filt_bmask */
+
+ return (0);
+}
+
+/*
+ * delete filters referencing to the specified class.
+ * if the all flag is not 0, delete all the filters.
+ */
+int
+acc_discard_filters(classifier, class, all)
+ struct acc_classifier *classifier;
+ void *class;
+ int all;
+{
+ struct acc_filter *afp;
+ int i, s;
+
+#ifdef __NetBSD__
+ s = splnet();
+#else
+ s = splimp();
+#endif
+ for (i = 0; i < ACC_FILTER_TABLESIZE; i++) {
+ do {
+ LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
+ if (all || afp->f_class == class) {
+ LIST_REMOVE(afp, f_chain);
+ free(afp, M_DEVBUF);
+ /* start again from the head */
+ break;
+ }
+ } while (afp != NULL);
+ }
+ splx(s);
+
+ if (all)
+ classifier->acc_fbmask = 0;
+
+ return (0);
+}
+
+void *
+acc_classify(clfier, m, af)
+ void *clfier;
+ struct mbuf *m;
+ int af;
+{
+ struct acc_classifier *classifier;
+ struct flowinfo flow;
+ struct acc_filter *afp;
+ int i;
+
+ classifier = (struct acc_classifier *)clfier;
+ altq_extractflow(m, af, &flow, classifier->acc_fbmask);
+
+ if (flow.fi_family == AF_INET) {
+ struct flowinfo_in *fp = (struct flowinfo_in *)&flow;
+
+ if ((classifier->acc_fbmask & FIMB4_ALL) == FIMB4_TOS) {
+ /* only tos is used */
+ LIST_FOREACH(afp,
+ &classifier->acc_filters[ACC_WILDCARD_INDEX],
+ f_chain)
+ if (apply_tosfilter4(afp->f_fbmask,
+ &afp->f_filter, fp))
+ /* filter matched */
+ return (afp->f_class);
+ } else if ((classifier->acc_fbmask &
+ (~(FIMB4_PROTO|FIMB4_SPORT|FIMB4_DPORT) & FIMB4_ALL))
+ == 0) {
+ /* only proto and ports are used */
+ LIST_FOREACH(afp,
+ &classifier->acc_filters[ACC_WILDCARD_INDEX],
+ f_chain)
+ if (apply_ppfilter4(afp->f_fbmask,
+ &afp->f_filter, fp))
+ /* filter matched */
+ return (afp->f_class);
+ } else {
+ /* get the filter hash entry from its dest address */
+ i = ACC_GET_HASH_INDEX(fp->fi_dst.s_addr);
+ do {
+ /*
+ * go through this loop twice. first for dst
+ * hash, second for wildcards.
+ */
+ LIST_FOREACH(afp, &classifier->acc_filters[i],
+ f_chain)
+ if (apply_filter4(afp->f_fbmask,
+ &afp->f_filter, fp))
+ /* filter matched */
+ return (afp->f_class);
+
+ /*
+ * check again for filters with a dst addr
+ * wildcard.
+ * (daddr == 0 || dmask != 0xffffffff).
+ */
+ if (i != ACC_WILDCARD_INDEX)
+ i = ACC_WILDCARD_INDEX;
+ else
+ break;
+ } while (1);
+ }
+ }
+#ifdef INET6
+ else if (flow.fi_family == AF_INET6) {
+ struct flowinfo_in6 *fp6 = (struct flowinfo_in6 *)&flow;
+
+ /* get the filter hash entry from its flow ID */
+ if (fp6->fi6_flowlabel != 0)
+ i = ACC_GET_HASH_INDEX(fp6->fi6_flowlabel);
+ else
+ /* flowlable can be zero */
+ i = ACC_WILDCARD_INDEX;
+
+ /* go through this loop twice. first for flow hash, second
+ for wildcards. */
+ do {
+ LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
+ if (apply_filter6(afp->f_fbmask,
+ (struct flow_filter6 *)&afp->f_filter,
+ fp6))
+ /* filter matched */
+ return (afp->f_class);
+
+ /*
+ * check again for filters with a wildcard.
+ */
+ if (i != ACC_WILDCARD_INDEX)
+ i = ACC_WILDCARD_INDEX;
+ else
+ break;
+ } while (1);
+ }
+#endif /* INET6 */
+
+ /* no filter matched */
+ return (NULL);
+}
+
+static int
+apply_filter4(fbmask, filt, pkt)
+ u_int32_t fbmask;
+ struct flow_filter *filt;
+ struct flowinfo_in *pkt;
+{
+ if (filt->ff_flow.fi_family != AF_INET)
+ return (0);
+ if ((fbmask & FIMB4_SPORT) && filt->ff_flow.fi_sport != pkt->fi_sport)
+ return (0);
+ if ((fbmask & FIMB4_DPORT) && filt->ff_flow.fi_dport != pkt->fi_dport)
+ return (0);
+ if ((fbmask & FIMB4_DADDR) &&
+ filt->ff_flow.fi_dst.s_addr !=
+ (pkt->fi_dst.s_addr & filt->ff_mask.mask_dst.s_addr))
+ return (0);
+ if ((fbmask & FIMB4_SADDR) &&
+ filt->ff_flow.fi_src.s_addr !=
+ (pkt->fi_src.s_addr & filt->ff_mask.mask_src.s_addr))
+ return (0);
+ if ((fbmask & FIMB4_PROTO) && filt->ff_flow.fi_proto != pkt->fi_proto)
+ return (0);
+ if ((fbmask & FIMB4_TOS) && filt->ff_flow.fi_tos !=
+ (pkt->fi_tos & filt->ff_mask.mask_tos))
+ return (0);
+ if ((fbmask & FIMB4_GPI) && filt->ff_flow.fi_gpi != (pkt->fi_gpi))
+ return (0);
+ /* match */
+ return (1);
+}
+
+/*
+ * filter matching function optimized for a common case that checks
+ * only protocol and port numbers
+ */
+static int
+apply_ppfilter4(fbmask, filt, pkt)
+ u_int32_t fbmask;
+ struct flow_filter *filt;
+ struct flowinfo_in *pkt;
+{
+ if (filt->ff_flow.fi_family != AF_INET)
+ return (0);
+ if ((fbmask & FIMB4_SPORT) && filt->ff_flow.fi_sport != pkt->fi_sport)
+ return (0);
+ if ((fbmask & FIMB4_DPORT) && filt->ff_flow.fi_dport != pkt->fi_dport)
+ return (0);
+ if ((fbmask & FIMB4_PROTO) && filt->ff_flow.fi_proto != pkt->fi_proto)
+ return (0);
+ /* match */
+ return (1);
+}
+
+/*
+ * filter matching function only for tos field.
+ */
+static int
+apply_tosfilter4(fbmask, filt, pkt)
+ u_int32_t fbmask;
+ struct flow_filter *filt;
+ struct flowinfo_in *pkt;
+{
+ if (filt->ff_flow.fi_family != AF_INET)
+ return (0);
+ if ((fbmask & FIMB4_TOS) && filt->ff_flow.fi_tos !=
+ (pkt->fi_tos & filt->ff_mask.mask_tos))
+ return (0);
+ /* match */
+ return (1);
+}
+
+#ifdef INET6
+static int
+apply_filter6(fbmask, filt, pkt)
+ u_int32_t fbmask;
+ struct flow_filter6 *filt;
+ struct flowinfo_in6 *pkt;
+{
+ int i;
+
+ if (filt->ff_flow6.fi6_family != AF_INET6)
+ return (0);
+ if ((fbmask & FIMB6_FLABEL) &&
+ filt->ff_flow6.fi6_flowlabel != pkt->fi6_flowlabel)
+ return (0);
+ if ((fbmask & FIMB6_PROTO) &&
+ filt->ff_flow6.fi6_proto != pkt->fi6_proto)
+ return (0);
+ if ((fbmask & FIMB6_SPORT) &&
+ filt->ff_flow6.fi6_sport != pkt->fi6_sport)
+ return (0);
+ if ((fbmask & FIMB6_DPORT) &&
+ filt->ff_flow6.fi6_dport != pkt->fi6_dport)
+ return (0);
+ if (fbmask & FIMB6_SADDR) {
+ for (i = 0; i < 4; i++)
+ if (filt->ff_flow6.fi6_src.s6_addr32[i] !=
+ (pkt->fi6_src.s6_addr32[i] &
+ filt->ff_mask6.mask6_src.s6_addr32[i]))
+ return (0);
+ }
+ if (fbmask & FIMB6_DADDR) {
+ for (i = 0; i < 4; i++)
+ if (filt->ff_flow6.fi6_dst.s6_addr32[i] !=
+ (pkt->fi6_dst.s6_addr32[i] &
+ filt->ff_mask6.mask6_dst.s6_addr32[i]))
+ return (0);
+ }
+ if ((fbmask & FIMB6_TCLASS) &&
+ filt->ff_flow6.fi6_tclass !=
+ (pkt->fi6_tclass & filt->ff_mask6.mask6_tclass))
+ return (0);
+ if ((fbmask & FIMB6_GPI) &&
+ filt->ff_flow6.fi6_gpi != pkt->fi6_gpi)
+ return (0);
+ /* match */
+ return (1);
+}
+#endif /* INET6 */
+
+/*
+ * filter handle:
+ * bit 20-28: index to the filter hash table
+ * bit 0-19: unique id in the hash bucket.
+ */
+static u_long
+get_filt_handle(classifier, i)
+ struct acc_classifier *classifier;
+ int i;
+{
+ static u_long handle_number = 1;
+ u_long handle;
+ struct acc_filter *afp;
+
+ while (1) {
+ handle = handle_number++ & 0x000fffff;
+
+ if (LIST_EMPTY(&classifier->acc_filters[i]))
+ break;
+
+ LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
+ if ((afp->f_handle & 0x000fffff) == handle)
+ break;
+ if (afp == NULL)
+ break;
+ /* this handle is already used, try again */
+ }
+
+ return ((i << 20) | handle);
+}
+
+/* convert filter handle to filter pointer */
+static struct acc_filter *
+filth_to_filtp(classifier, handle)
+ struct acc_classifier *classifier;
+ u_long handle;
+{
+ struct acc_filter *afp;
+ int i;
+
+ i = ACC_GET_HINDEX(handle);
+
+ LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
+ if (afp->f_handle == handle)
+ return (afp);
+
+ return (NULL);
+}
+
+/* create flowinfo bitmask */
+static u_int32_t
+filt2fibmask(filt)
+ struct flow_filter *filt;
+{
+ u_int32_t mask = 0;
+#ifdef INET6
+ struct flow_filter6 *filt6;
+#endif
+
+ switch (filt->ff_flow.fi_family) {
+ case AF_INET:
+ if (filt->ff_flow.fi_proto != 0)
+ mask |= FIMB4_PROTO;
+ if (filt->ff_flow.fi_tos != 0)
+ mask |= FIMB4_TOS;
+ if (filt->ff_flow.fi_dst.s_addr != 0)
+ mask |= FIMB4_DADDR;
+ if (filt->ff_flow.fi_src.s_addr != 0)
+ mask |= FIMB4_SADDR;
+ if (filt->ff_flow.fi_sport != 0)
+ mask |= FIMB4_SPORT;
+ if (filt->ff_flow.fi_dport != 0)
+ mask |= FIMB4_DPORT;
+ if (filt->ff_flow.fi_gpi != 0)
+ mask |= FIMB4_GPI;
+ break;
+#ifdef INET6
+ case AF_INET6:
+ filt6 = (struct flow_filter6 *)filt;
+
+ if (filt6->ff_flow6.fi6_proto != 0)
+ mask |= FIMB6_PROTO;
+ if (filt6->ff_flow6.fi6_tclass != 0)
+ mask |= FIMB6_TCLASS;
+ if (!IN6_IS_ADDR_UNSPECIFIED(&filt6->ff_flow6.fi6_dst))
+ mask |= FIMB6_DADDR;
+ if (!IN6_IS_ADDR_UNSPECIFIED(&filt6->ff_flow6.fi6_src))
+ mask |= FIMB6_SADDR;
+ if (filt6->ff_flow6.fi6_sport != 0)
+ mask |= FIMB6_SPORT;
+ if (filt6->ff_flow6.fi6_dport != 0)
+ mask |= FIMB6_DPORT;
+ if (filt6->ff_flow6.fi6_gpi != 0)
+ mask |= FIMB6_GPI;
+ if (filt6->ff_flow6.fi6_flowlabel != 0)
+ mask |= FIMB6_FLABEL;
+ break;
+#endif /* INET6 */
+ }
+ return (mask);
+}
+
+
+/*
+ * helper functions to handle IPv4 fragments.
+ * currently only in-sequence fragments are handled.
+ * - fragment info is cached in a LRU list.
+ * - when a first fragment is found, cache its flow info.
+ * - when a non-first fragment is found, lookup the cache.
+ */
+
+struct ip4_frag {
+ TAILQ_ENTRY(ip4_frag) ip4f_chain;
+ char ip4f_valid;
+ u_short ip4f_id;
+ struct flowinfo_in ip4f_info;
+};
+
+static TAILQ_HEAD(ip4f_list, ip4_frag) ip4f_list; /* IPv4 fragment cache */
+
+#define IP4F_TABSIZE 16 /* IPv4 fragment cache size */
+
+
+static void
+ip4f_cache(ip, fin)
+ struct ip *ip;
+ struct flowinfo_in *fin;
+{
+ struct ip4_frag *fp;
+
+ if (TAILQ_EMPTY(&ip4f_list)) {
+ /* first time call, allocate fragment cache entries. */
+ if (ip4f_init() < 0)
+ /* allocation failed! */
+ return;
+ }
+
+ fp = ip4f_alloc();
+ fp->ip4f_id = ip->ip_id;
+ fp->ip4f_info.fi_proto = ip->ip_p;
+ fp->ip4f_info.fi_src.s_addr = ip->ip_src.s_addr;
+ fp->ip4f_info.fi_dst.s_addr = ip->ip_dst.s_addr;
+
+ /* save port numbers */
+ fp->ip4f_info.fi_sport = fin->fi_sport;
+ fp->ip4f_info.fi_dport = fin->fi_dport;
+ fp->ip4f_info.fi_gpi = fin->fi_gpi;
+}
+
+static int
+ip4f_lookup(ip, fin)
+ struct ip *ip;
+ struct flowinfo_in *fin;
+{
+ struct ip4_frag *fp;
+
+ for (fp = TAILQ_FIRST(&ip4f_list); fp != NULL && fp->ip4f_valid;
+ fp = TAILQ_NEXT(fp, ip4f_chain))
+ if (ip->ip_id == fp->ip4f_id &&
+ ip->ip_src.s_addr == fp->ip4f_info.fi_src.s_addr &&
+ ip->ip_dst.s_addr == fp->ip4f_info.fi_dst.s_addr &&
+ ip->ip_p == fp->ip4f_info.fi_proto) {
+
+ /* found the matching entry */
+ fin->fi_sport = fp->ip4f_info.fi_sport;
+ fin->fi_dport = fp->ip4f_info.fi_dport;
+ fin->fi_gpi = fp->ip4f_info.fi_gpi;
+
+ if ((ntohs(ip->ip_off) & IP_MF) == 0)
+ /* this is the last fragment,
+ release the entry. */
+ ip4f_free(fp);
+
+ return (1);
+ }
+
+ /* no matching entry found */
+ return (0);
+}
+
+static int
+ip4f_init(void)
+{
+ struct ip4_frag *fp;
+ int i;
+
+ TAILQ_INIT(&ip4f_list);
+ for (i=0; i<IP4F_TABSIZE; i++) {
+ fp = malloc(sizeof(struct ip4_frag),
+ M_DEVBUF, M_NOWAIT);
+ if (fp == NULL) {
+ printf("ip4f_init: can't alloc %dth entry!\n", i);
+ if (i == 0)
+ return (-1);
+ return (0);
+ }
+ fp->ip4f_valid = 0;
+ TAILQ_INSERT_TAIL(&ip4f_list, fp, ip4f_chain);
+ }
+ return (0);
+}
+
+static struct ip4_frag *
+ip4f_alloc(void)
+{
+ struct ip4_frag *fp;
+
+ /* reclaim an entry at the tail, put it at the head */
+ fp = TAILQ_LAST(&ip4f_list, ip4f_list);
+ TAILQ_REMOVE(&ip4f_list, fp, ip4f_chain);
+ fp->ip4f_valid = 1;
+ TAILQ_INSERT_HEAD(&ip4f_list, fp, ip4f_chain);
+ return (fp);
+}
+
+static void
+ip4f_free(fp)
+ struct ip4_frag *fp;
+{
+ TAILQ_REMOVE(&ip4f_list, fp, ip4f_chain);
+ fp->ip4f_valid = 0;
+ TAILQ_INSERT_TAIL(&ip4f_list, fp, ip4f_chain);
+}
+
+#endif /* ALTQ3_CLFIER_COMPAT */
diff --git a/sys/net/altq/altq_var.h b/sys/net/altq/altq_var.h
new file mode 100644
index 0000000..956ee16
--- /dev/null
+++ b/sys/net/altq/altq_var.h
@@ -0,0 +1,261 @@
+/* $FreeBSD$ */
+/* $KAME: altq_var.h,v 1.16 2003/10/03 05:05:15 kjc Exp $ */
+
+/*
+ * Copyright (C) 1998-2003
+ * Sony Computer Science Laboratories Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#ifndef _ALTQ_ALTQ_VAR_H_
+#define _ALTQ_ALTQ_VAR_H_
+
+#ifdef _KERNEL
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/queue.h>
+
+#ifdef ALTQ3_CLFIER_COMPAT
+/*
+ * filter structure for altq common classifier
+ */
+struct acc_filter {
+ LIST_ENTRY(acc_filter) f_chain;
+ void *f_class; /* pointer to the class */
+ u_long f_handle; /* filter id */
+ u_int32_t f_fbmask; /* filter bitmask */
+ struct flow_filter f_filter; /* filter value */
+};
+
+/*
+ * XXX ACC_FILTER_TABLESIZE can't be larger than 2048 unless we fix
+ * the handle assignment.
+ */
+#define ACC_FILTER_TABLESIZE (256+1)
+#define ACC_FILTER_MASK (ACC_FILTER_TABLESIZE - 2)
+#define ACC_WILDCARD_INDEX (ACC_FILTER_TABLESIZE - 1)
+#ifdef __GNUC__
+#define ACC_GET_HASH_INDEX(addr) \
+ ({int x = (addr) + ((addr) >> 16); (x + (x >> 8)) & ACC_FILTER_MASK;})
+#else
+#define ACC_GET_HASH_INDEX(addr) \
+ (((addr) + ((addr) >> 8) + ((addr) >> 16) + ((addr) >> 24)) \
+ & ACC_FILTER_MASK)
+#endif
+#define ACC_GET_HINDEX(handle) ((handle) >> 20)
+
+#if (__FreeBSD_version > 500000)
+#define ACC_LOCK_INIT(ac) mtx_init(&(ac)->acc_mtx, "classifier", MTX_DEF)
+#define ACC_LOCK_DESTROY(ac) mtx_destroy(&(ac)->acc_mtx)
+#define ACC_LOCK(ac) mtx_lock(&(ac)->acc_mtx)
+#define ACC_UNLOCK(ac) mtx_unlock(&(ac)->acc_mtx)
+#else
+#define ACC_LOCK_INIT(ac)
+#define ACC_LOCK_DESTROY(ac)
+#define ACC_LOCK(ac)
+#define ACC_UNLOCK(ac)
+#endif
+
+struct acc_classifier {
+ u_int32_t acc_fbmask;
+ LIST_HEAD(filt, acc_filter) acc_filters[ACC_FILTER_TABLESIZE];
+
+#if (__FreeBSD_version > 500000)
+ struct mtx acc_mtx;
+#endif
+};
+
+/*
+ * flowinfo mask bits used by classifier
+ */
+/* for ipv4 */
+#define FIMB4_PROTO 0x0001
+#define FIMB4_TOS 0x0002
+#define FIMB4_DADDR 0x0004
+#define FIMB4_SADDR 0x0008
+#define FIMB4_DPORT 0x0010
+#define FIMB4_SPORT 0x0020
+#define FIMB4_GPI 0x0040
+#define FIMB4_ALL 0x007f
+/* for ipv6 */
+#define FIMB6_PROTO 0x0100
+#define FIMB6_TCLASS 0x0200
+#define FIMB6_DADDR 0x0400
+#define FIMB6_SADDR 0x0800
+#define FIMB6_DPORT 0x1000
+#define FIMB6_SPORT 0x2000
+#define FIMB6_GPI 0x4000
+#define FIMB6_FLABEL 0x8000
+#define FIMB6_ALL 0xff00
+
+#define FIMB_ALL (FIMB4_ALL|FIMB6_ALL)
+
+#define FIMB4_PORTS (FIMB4_DPORT|FIMB4_SPORT|FIMB4_GPI)
+#define FIMB6_PORTS (FIMB6_DPORT|FIMB6_SPORT|FIMB6_GPI)
+#endif /* ALTQ3_CLFIER_COMPAT */
+
+/*
+ * machine dependent clock
+ * a 64bit high resolution time counter.
+ */
+extern int machclk_usepcc;
+extern u_int32_t machclk_freq;
+extern u_int32_t machclk_per_tick;
+extern void init_machclk(void);
+extern u_int64_t read_machclk(void);
+
+/*
+ * debug support
+ */
+#ifdef ALTQ_DEBUG
+#ifdef __STDC__
+#define ASSERT(e) ((e) ? (void)0 : altq_assert(__FILE__, __LINE__, #e))
+#else /* PCC */
+#define ASSERT(e) ((e) ? (void)0 : altq_assert(__FILE__, __LINE__, "e"))
+#endif
+#else
+#define ASSERT(e) ((void)0)
+#endif
+
+/*
+ * misc stuff for compatibility
+ */
+/* ioctl cmd type */
+typedef u_long ioctlcmd_t;
+
+/*
+ * queue macros:
+ * the interface of TAILQ_LAST macro changed after the introduction
+ * of softupdate. redefine it here to make it work with pre-2.2.7.
+ */
+#undef TAILQ_LAST
+#define TAILQ_LAST(head, headname) \
+ (*(((struct headname *)((head)->tqh_last))->tqh_last))
+
+#ifndef TAILQ_EMPTY
+#define TAILQ_EMPTY(head) ((head)->tqh_first == NULL)
+#endif
+#ifndef TAILQ_FOREACH
+#define TAILQ_FOREACH(var, head, field) \
+ for (var = TAILQ_FIRST(head); var; var = TAILQ_NEXT(var, field))
+#endif
+
+/* macro for timeout/untimeout */
+#if (__FreeBSD_version > 300000) || defined(__NetBSD__)
+/* use callout */
+#include <sys/callout.h>
+
+#if (__FreeBSD_version > 500000)
+#define CALLOUT_INIT(c) callout_init((c), 0)
+#else
+#define CALLOUT_INIT(c) callout_init((c))
+#endif
+#define CALLOUT_RESET(c,t,f,a) callout_reset((c),(t),(f),(a))
+#define CALLOUT_STOP(c) callout_stop((c))
+#if !defined(CALLOUT_INITIALIZER) && (__FreeBSD_version < 600000)
+#define CALLOUT_INITIALIZER { { { NULL } }, 0, NULL, NULL, 0 }
+#endif
+#elif defined(__OpenBSD__)
+#include <sys/timeout.h>
+/* callout structure as a wrapper of struct timeout */
+struct callout {
+ struct timeout c_to;
+};
+#define CALLOUT_INIT(c) do { bzero((c), sizeof(*(c))); } while (/*CONSTCOND*/ 0)
+#define CALLOUT_RESET(c,t,f,a) do { if (!timeout_initialized(&(c)->c_to)) \
+ timeout_set(&(c)->c_to, (f), (a)); \
+ timeout_add(&(c)->c_to, (t)); } while (/*CONSTCOND*/ 0)
+#define CALLOUT_STOP(c) timeout_del(&(c)->c_to)
+#define CALLOUT_INITIALIZER { { { NULL }, NULL, NULL, 0, 0 } }
+#else
+/* use old-style timeout/untimeout */
+/* dummy callout structure */
+struct callout {
+ void *c_arg; /* function argument */
+ void (*c_func)(void *); /* functiuon to call */
+};
+#define CALLOUT_INIT(c) do { bzero((c), sizeof(*(c))); } while (/*CONSTCOND*/ 0)
+#define CALLOUT_RESET(c,t,f,a) do { (c)->c_arg = (a); \
+ (c)->c_func = (f); \
+ timeout((f),(a),(t)); } while (/*CONSTCOND*/ 0)
+#define CALLOUT_STOP(c) untimeout((c)->c_func,(c)->c_arg)
+#define CALLOUT_INITIALIZER { NULL, NULL }
+#endif
+#if !defined(__FreeBSD__)
+typedef void (timeout_t)(void *);
+#endif
+
+#define m_pktlen(m) ((m)->m_pkthdr.len)
+
+struct ifnet; struct mbuf;
+struct pf_altq;
+#ifdef ALTQ3_CLFIER_COMPAT
+struct flowinfo;
+#endif
+
+void *altq_lookup(char *, int);
+#ifdef ALTQ3_CLFIER_COMPAT
+int altq_extractflow(struct mbuf *, int, struct flowinfo *, u_int32_t);
+int acc_add_filter(struct acc_classifier *, struct flow_filter *,
+ void *, u_long *);
+int acc_delete_filter(struct acc_classifier *, u_long);
+int acc_discard_filters(struct acc_classifier *, void *, int);
+void *acc_classify(void *, struct mbuf *, int);
+#endif
+u_int8_t read_dsfield(struct mbuf *, struct altq_pktattr *);
+void write_dsfield(struct mbuf *, struct altq_pktattr *, u_int8_t);
+void altq_assert(const char *, int, const char *);
+int tbr_set(struct ifaltq *, struct tb_profile *);
+int tbr_get(struct ifaltq *, struct tb_profile *);
+
+int altq_pfattach(struct pf_altq *);
+int altq_pfdetach(struct pf_altq *);
+int altq_add(struct pf_altq *);
+int altq_remove(struct pf_altq *);
+int altq_add_queue(struct pf_altq *);
+int altq_remove_queue(struct pf_altq *);
+int altq_getqstats(struct pf_altq *, void *, int *);
+
+int cbq_pfattach(struct pf_altq *);
+int cbq_add_altq(struct pf_altq *);
+int cbq_remove_altq(struct pf_altq *);
+int cbq_add_queue(struct pf_altq *);
+int cbq_remove_queue(struct pf_altq *);
+int cbq_getqstats(struct pf_altq *, void *, int *);
+
+int priq_pfattach(struct pf_altq *);
+int priq_add_altq(struct pf_altq *);
+int priq_remove_altq(struct pf_altq *);
+int priq_add_queue(struct pf_altq *);
+int priq_remove_queue(struct pf_altq *);
+int priq_getqstats(struct pf_altq *, void *, int *);
+
+int hfsc_pfattach(struct pf_altq *);
+int hfsc_add_altq(struct pf_altq *);
+int hfsc_remove_altq(struct pf_altq *);
+int hfsc_add_queue(struct pf_altq *);
+int hfsc_remove_queue(struct pf_altq *);
+int hfsc_getqstats(struct pf_altq *, void *, int *);
+
+#endif /* _KERNEL */
+#endif /* _ALTQ_ALTQ_VAR_H_ */
diff --git a/sys/net/altq/altqconf.h b/sys/net/altq/altqconf.h
new file mode 100644
index 0000000..4d3921c
--- /dev/null
+++ b/sys/net/altq/altqconf.h
@@ -0,0 +1,29 @@
+/* $OpenBSD: altqconf.h,v 1.1 2001/06/27 05:28:36 kjc Exp $ */
+/* $NetBSD: altqconf.h,v 1.2 2001/05/30 11:57:16 mrg Exp $ */
+
+#if defined(_KERNEL_OPT) || defined(__OpenBSD__)
+
+#if defined(_KERNEL_OPT)
+#include "opt_altq_enabled.h"
+#endif
+
+#include <sys/conf.h>
+
+#ifdef ALTQ
+#define NALTQ 1
+#else
+#define NALTQ 0
+#endif
+
+cdev_decl(altq);
+
+#ifdef __OpenBSD__
+#define cdev_altq_init(c,n) { \
+ dev_init(c,n,open), dev_init(c,n,close), (dev_type_read((*))) enodev, \
+ (dev_type_write((*))) enodev, dev_init(c,n,ioctl), \
+ (dev_type_stop((*))) enodev, 0, (dev_type_select((*))) enodev, \
+ (dev_type_mmap((*))) enodev }
+#else
+#define cdev_altq_init(x,y) cdev__oci_init(x,y)
+#endif
+#endif /* defined(_KERNEL_OPT) || defined(__OpenBSD__) */
diff --git a/sys/net/altq/if_altq.h b/sys/net/altq/if_altq.h
new file mode 100644
index 0000000..3c43fc7
--- /dev/null
+++ b/sys/net/altq/if_altq.h
@@ -0,0 +1,190 @@
+/* $FreeBSD$ */
+/* $KAME: if_altq.h,v 1.12 2005/04/13 03:44:25 suz Exp $ */
+
+/*
+ * Copyright (C) 1997-2003
+ * Sony Computer Science Laboratories Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#ifndef _ALTQ_IF_ALTQ_H_
+#define _ALTQ_IF_ALTQ_H_
+
+#ifdef __FreeBSD__
+#include <sys/lock.h> /* XXX */
+#include <sys/mutex.h> /* XXX */
+#include <sys/event.h> /* XXX */
+#endif
+
+#ifdef _KERNEL_OPT
+#include <net/altq/altqconf.h>
+#endif
+
+struct altq_pktattr; struct tb_regulator; struct top_cdnr;
+
+/*
+ * Structure defining a queue for a network interface.
+ */
+struct ifaltq {
+ /* fields compatible with struct ifqueue */
+ struct mbuf *ifq_head;
+ struct mbuf *ifq_tail;
+ int ifq_len;
+ int ifq_maxlen;
+#ifdef __FreeBSD__
+ struct mtx ifq_mtx;
+#endif
+
+ /* driver owned queue (used for bulk dequeue and prepend) UNLOCKED */
+ struct mbuf *ifq_drv_head;
+ struct mbuf *ifq_drv_tail;
+ int ifq_drv_len;
+ int ifq_drv_maxlen;
+
+ /* alternate queueing related fields */
+ int altq_type; /* discipline type */
+ int altq_flags; /* flags (e.g. ready, in-use) */
+ void *altq_disc; /* for discipline-specific use */
+ struct ifnet *altq_ifp; /* back pointer to interface */
+
+ int (*altq_enqueue)(struct ifaltq *, struct mbuf *,
+ struct altq_pktattr *);
+ struct mbuf *(*altq_dequeue)(struct ifaltq *, int);
+ int (*altq_request)(struct ifaltq *, int, void *);
+
+ /* classifier fields */
+ void *altq_clfier; /* classifier-specific use */
+ void *(*altq_classify)(void *, struct mbuf *, int);
+
+ /* token bucket regulator */
+ struct tb_regulator *altq_tbr;
+
+ /* input traffic conditioner (doesn't belong to the output queue...) */
+ struct top_cdnr *altq_cdnr;
+};
+
+
+#ifdef _KERNEL
+
+/*
+ * packet attributes used by queueing disciplines.
+ * pattr_class is a discipline-dependent scheduling class that is
+ * set by a classifier.
+ * pattr_hdr and pattr_af may be used by a discipline to access
+ * the header within a mbuf. (e.g. ECN needs to update the CE bit)
+ * note that pattr_hdr could be stale after m_pullup, though link
+ * layer output routines usually don't use m_pullup. link-level
+ * compression also invalidates these fields. thus, pattr_hdr needs
+ * to be verified when a discipline touches the header.
+ */
+struct altq_pktattr {
+ void *pattr_class; /* sched class set by classifier */
+ int pattr_af; /* address family */
+ caddr_t pattr_hdr; /* saved header position in mbuf */
+};
+
+/*
+ * mbuf tag to carry a queue id (and hints for ECN).
+ */
+struct altq_tag {
+ u_int32_t qid; /* queue id */
+ /* hints for ecn */
+ int af; /* address family */
+ void *hdr; /* saved header position in mbuf */
+};
+
+/*
+ * a token-bucket regulator limits the rate that a network driver can
+ * dequeue packets from the output queue.
+ * modern cards are able to buffer a large amount of packets and dequeue
+ * too many packets at a time. this bursty dequeue behavior makes it
+ * impossible to schedule packets by queueing disciplines.
+ * a token-bucket is used to control the burst size in a device
+ * independent manner.
+ */
+struct tb_regulator {
+ int64_t tbr_rate; /* (scaled) token bucket rate */
+ int64_t tbr_depth; /* (scaled) token bucket depth */
+
+ int64_t tbr_token; /* (scaled) current token */
+ int64_t tbr_filluptime; /* (scaled) time to fill up bucket */
+ u_int64_t tbr_last; /* last time token was updated */
+
+ int tbr_lastop; /* last dequeue operation type
+ needed for poll-and-dequeue */
+};
+
+/* if_altqflags */
+#define ALTQF_READY 0x01 /* driver supports alternate queueing */
+#define ALTQF_ENABLED 0x02 /* altq is in use */
+#define ALTQF_CLASSIFY 0x04 /* classify packets */
+#define ALTQF_CNDTNING 0x08 /* altq traffic conditioning is enabled */
+#define ALTQF_DRIVER1 0x40 /* driver specific */
+
+/* if_altqflags set internally only: */
+#define ALTQF_CANTCHANGE (ALTQF_READY)
+
+/* altq_dequeue 2nd arg */
+#define ALTDQ_REMOVE 1 /* dequeue mbuf from the queue */
+#define ALTDQ_POLL 2 /* don't dequeue mbuf from the queue */
+
+/* altq request types (currently only purge is defined) */
+#define ALTRQ_PURGE 1 /* purge all packets */
+
+#define ALTQ_IS_READY(ifq) ((ifq)->altq_flags & ALTQF_READY)
+#define ALTQ_IS_ENABLED(ifq) ((ifq)->altq_flags & ALTQF_ENABLED)
+#define ALTQ_NEEDS_CLASSIFY(ifq) ((ifq)->altq_flags & ALTQF_CLASSIFY)
+#define ALTQ_IS_CNDTNING(ifq) ((ifq)->altq_flags & ALTQF_CNDTNING)
+
+#define ALTQ_SET_CNDTNING(ifq) ((ifq)->altq_flags |= ALTQF_CNDTNING)
+#define ALTQ_CLEAR_CNDTNING(ifq) ((ifq)->altq_flags &= ~ALTQF_CNDTNING)
+#define ALTQ_IS_ATTACHED(ifq) ((ifq)->altq_disc != NULL)
+
+#define ALTQ_ENQUEUE(ifq, m, pa, err) \
+ (err) = (*(ifq)->altq_enqueue)((ifq),(m),(pa))
+#define ALTQ_DEQUEUE(ifq, m) \
+ (m) = (*(ifq)->altq_dequeue)((ifq), ALTDQ_REMOVE)
+#define ALTQ_POLL(ifq, m) \
+ (m) = (*(ifq)->altq_dequeue)((ifq), ALTDQ_POLL)
+#define ALTQ_PURGE(ifq) \
+ (void)(*(ifq)->altq_request)((ifq), ALTRQ_PURGE, (void *)0)
+#define ALTQ_IS_EMPTY(ifq) ((ifq)->ifq_len == 0)
+#define TBR_IS_ENABLED(ifq) ((ifq)->altq_tbr != NULL)
+
+extern int altq_attach(struct ifaltq *, int, void *,
+ int (*)(struct ifaltq *, struct mbuf *,
+ struct altq_pktattr *),
+ struct mbuf *(*)(struct ifaltq *, int),
+ int (*)(struct ifaltq *, int, void *),
+ void *,
+ void *(*)(void *, struct mbuf *, int));
+extern int altq_detach(struct ifaltq *);
+extern int altq_enable(struct ifaltq *);
+extern int altq_disable(struct ifaltq *);
+extern struct mbuf *(*tbr_dequeue_ptr)(struct ifaltq *, int);
+extern int (*altq_input)(struct mbuf *, int);
+#if 0 /* ALTQ3_CLFIER_COMPAT */
+void altq_etherclassify(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
+#endif
+#endif /* _KERNEL */
+
+#endif /* _ALTQ_IF_ALTQ_H_ */
OpenPOWER on IntegriCloud