summaryrefslogtreecommitdiffstats
path: root/sys/netinet
diff options
context:
space:
mode:
authorglebius <glebius@FreeBSD.org>2012-09-14 11:51:49 +0000
committerglebius <glebius@FreeBSD.org>2012-09-14 11:51:49 +0000
commit0ccf4838d7a8b4da2c3beaac7ea1fd977aa0ed11 (patch)
treeec60da6e90cde2e87aa91ac9450c84ce3446233a /sys/netinet
parentf99fc207edf21e7c05c1147864077ce3fe1f3e2c (diff)
downloadFreeBSD-src-0ccf4838d7a8b4da2c3beaac7ea1fd977aa0ed11.zip
FreeBSD-src-0ccf4838d7a8b4da2c3beaac7ea1fd977aa0ed11.tar.gz
o Create directory sys/netpfil, where all packet filters should
reside, and move there ipfw(4) and pf(4). o Move most modified parts of pf out of contrib. Actual movements: sys/contrib/pf/net/*.c -> sys/netpfil/pf/ sys/contrib/pf/net/*.h -> sys/net/ contrib/pf/pfctl/*.c -> sbin/pfctl contrib/pf/pfctl/*.h -> sbin/pfctl contrib/pf/pfctl/pfctl.8 -> sbin/pfctl contrib/pf/pfctl/*.4 -> share/man/man4 contrib/pf/pfctl/*.5 -> share/man/man5 sys/netinet/ipfw -> sys/netpfil/ipfw The arguable movement is pf/net/*.h -> sys/net. There are future plans to refactor pf includes, so I decided not to break things twice. Not modified bits of pf left in contrib: authpf, ftp-proxy, tftp-proxy, pflogd. The ipfw(4) movement is planned to be merged to stable/9, to make head and stable match. Discussed with: bz, luigi
Diffstat (limited to 'sys/netinet')
-rw-r--r--sys/netinet/ipfw/dn_heap.c552
-rw-r--r--sys/netinet/ipfw/dn_heap.h191
-rw-r--r--sys/netinet/ipfw/dn_sched.h191
-rw-r--r--sys/netinet/ipfw/dn_sched_fifo.c120
-rw-r--r--sys/netinet/ipfw/dn_sched_prio.c229
-rw-r--r--sys/netinet/ipfw/dn_sched_qfq.c864
-rw-r--r--sys/netinet/ipfw/dn_sched_rr.c307
-rw-r--r--sys/netinet/ipfw/dn_sched_wf2q.c373
-rw-r--r--sys/netinet/ipfw/dummynet.txt860
-rw-r--r--sys/netinet/ipfw/ip_dn_glue.c845
-rw-r--r--sys/netinet/ipfw/ip_dn_io.c858
-rw-r--r--sys/netinet/ipfw/ip_dn_private.h403
-rw-r--r--sys/netinet/ipfw/ip_dummynet.c2314
-rw-r--r--sys/netinet/ipfw/ip_fw2.c2790
-rw-r--r--sys/netinet/ipfw/ip_fw_dynamic.c1244
-rw-r--r--sys/netinet/ipfw/ip_fw_log.c552
-rw-r--r--sys/netinet/ipfw/ip_fw_nat.c661
-rw-r--r--sys/netinet/ipfw/ip_fw_pfil.c588
-rw-r--r--sys/netinet/ipfw/ip_fw_private.h309
-rw-r--r--sys/netinet/ipfw/ip_fw_sockopt.c1448
-rw-r--r--sys/netinet/ipfw/ip_fw_table.c761
-rw-r--r--sys/netinet/ipfw/test/Makefile51
-rw-r--r--sys/netinet/ipfw/test/dn_test.h175
-rw-r--r--sys/netinet/ipfw/test/main.c636
-rw-r--r--sys/netinet/ipfw/test/mylist.h49
-rw-r--r--sys/netinet/ipfw/test/test_dn_heap.c162
-rw-r--r--sys/netinet/ipfw/test/test_dn_sched.c89
27 files changed, 0 insertions, 17622 deletions
diff --git a/sys/netinet/ipfw/dn_heap.c b/sys/netinet/ipfw/dn_heap.c
deleted file mode 100644
index 3bdfd9d..0000000
--- a/sys/netinet/ipfw/dn_heap.c
+++ /dev/null
@@ -1,552 +0,0 @@
-/*-
- * Copyright (c) 1998-2002,2010 Luigi Rizzo, Universita` di Pisa
- * All rights reserved
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-/*
- * Binary heap and hash tables, used in dummynet
- *
- * $FreeBSD$
- */
-
-#include <sys/cdefs.h>
-#include <sys/param.h>
-#ifdef _KERNEL
-__FBSDID("$FreeBSD$");
-#include <sys/systm.h>
-#include <sys/malloc.h>
-#include <sys/kernel.h>
-#include <netinet/ipfw/dn_heap.h>
-#ifndef log
-#define log(x, arg...)
-#endif
-
-#else /* !_KERNEL */
-
-#include <stdio.h>
-#include <dn_test.h>
-#include <strings.h>
-#include <stdlib.h>
-
-#include "dn_heap.h"
-#define log(x, arg...) fprintf(stderr, ## arg)
-#define panic(x...) fprintf(stderr, ## x), exit(1)
-#define MALLOC_DEFINE(a, b, c)
-static void *my_malloc(int s) { return malloc(s); }
-static void my_free(void *p) { free(p); }
-#define malloc(s, t, w) my_malloc(s)
-#define free(p, t) my_free(p)
-#endif /* !_KERNEL */
-
-static MALLOC_DEFINE(M_DN_HEAP, "dummynet", "dummynet heap");
-
-/*
- * Heap management functions.
- *
- * In the heap, first node is element 0. Children of i are 2i+1 and 2i+2.
- * Some macros help finding parent/children so we can optimize them.
- *
- * heap_init() is called to expand the heap when needed.
- * Increment size in blocks of 16 entries.
- * Returns 1 on error, 0 on success
- */
-#define HEAP_FATHER(x) ( ( (x) - 1 ) / 2 )
-#define HEAP_LEFT(x) ( (x)+(x) + 1 )
-#define HEAP_SWAP(a, b, buffer) { buffer = a ; a = b ; b = buffer ; }
-#define HEAP_INCREMENT 15
-
-static int
-heap_resize(struct dn_heap *h, unsigned int new_size)
-{
- struct dn_heap_entry *p;
-
- if (h->size >= new_size ) /* have enough room */
- return 0;
-#if 1 /* round to the next power of 2 */
- new_size |= new_size >> 1;
- new_size |= new_size >> 2;
- new_size |= new_size >> 4;
- new_size |= new_size >> 8;
- new_size |= new_size >> 16;
-#else
- new_size = (new_size + HEAP_INCREMENT ) & ~HEAP_INCREMENT;
-#endif
- p = malloc(new_size * sizeof(*p), M_DN_HEAP, M_NOWAIT);
- if (p == NULL) {
- printf("--- %s, resize %d failed\n", __func__, new_size );
- return 1; /* error */
- }
- if (h->size > 0) {
- bcopy(h->p, p, h->size * sizeof(*p) );
- free(h->p, M_DN_HEAP);
- }
- h->p = p;
- h->size = new_size;
- return 0;
-}
-
-int
-heap_init(struct dn_heap *h, int size, int ofs)
-{
- if (heap_resize(h, size))
- return 1;
- h->elements = 0;
- h->ofs = ofs;
- return 0;
-}
-
-/*
- * Insert element in heap. Normally, p != NULL, we insert p in
- * a new position and bubble up. If p == NULL, then the element is
- * already in place, and key is the position where to start the
- * bubble-up.
- * Returns 1 on failure (cannot allocate new heap entry)
- *
- * If ofs > 0 the position (index, int) of the element in the heap is
- * also stored in the element itself at the given offset in bytes.
- */
-#define SET_OFFSET(h, i) do { \
- if (h->ofs > 0) \
- *((int32_t *)((char *)(h->p[i].object) + h->ofs)) = i; \
- } while (0)
-/*
- * RESET_OFFSET is used for sanity checks. It sets ofs
- * to an invalid value.
- */
-#define RESET_OFFSET(h, i) do { \
- if (h->ofs > 0) \
- *((int32_t *)((char *)(h->p[i].object) + h->ofs)) = -16; \
- } while (0)
-
-int
-heap_insert(struct dn_heap *h, uint64_t key1, void *p)
-{
- int son = h->elements;
-
- //log("%s key %llu p %p\n", __FUNCTION__, key1, p);
- if (p == NULL) { /* data already there, set starting point */
- son = key1;
- } else { /* insert new element at the end, possibly resize */
- son = h->elements;
- if (son == h->size) /* need resize... */
- // XXX expand by 16 or so
- if (heap_resize(h, h->elements+16) )
- return 1; /* failure... */
- h->p[son].object = p;
- h->p[son].key = key1;
- h->elements++;
- }
- /* make sure that son >= father along the path */
- while (son > 0) {
- int father = HEAP_FATHER(son);
- struct dn_heap_entry tmp;
-
- if (DN_KEY_LT( h->p[father].key, h->p[son].key ) )
- break; /* found right position */
- /* son smaller than father, swap and repeat */
- HEAP_SWAP(h->p[son], h->p[father], tmp);
- SET_OFFSET(h, son);
- son = father;
- }
- SET_OFFSET(h, son);
- return 0;
-}
-
-/*
- * remove top element from heap, or obj if obj != NULL
- */
-void
-heap_extract(struct dn_heap *h, void *obj)
-{
- int child, father, max = h->elements - 1;
-
- if (max < 0) {
- printf("--- %s: empty heap 0x%p\n", __FUNCTION__, h);
- return;
- }
- if (obj == NULL)
- father = 0; /* default: move up smallest child */
- else { /* extract specific element, index is at offset */
- if (h->ofs <= 0)
- panic("%s: extract from middle not set on %p\n",
- __FUNCTION__, h);
- father = *((int *)((char *)obj + h->ofs));
- if (father < 0 || father >= h->elements) {
- panic("%s: father %d out of bound 0..%d\n",
- __FUNCTION__, father, h->elements);
- }
- }
- /*
- * below, father is the index of the empty element, which
- * we replace at each step with the smallest child until we
- * reach the bottom level.
- */
- // XXX why removing RESET_OFFSET increases runtime by 10% ?
- RESET_OFFSET(h, father);
- while ( (child = HEAP_LEFT(father)) <= max ) {
- if (child != max &&
- DN_KEY_LT(h->p[child+1].key, h->p[child].key) )
- child++; /* take right child, otherwise left */
- h->p[father] = h->p[child];
- SET_OFFSET(h, father);
- father = child;
- }
- h->elements--;
- if (father != max) {
- /*
- * Fill hole with last entry and bubble up,
- * reusing the insert code
- */
- h->p[father] = h->p[max];
- heap_insert(h, father, NULL);
- }
-}
-
-#if 0
-/*
- * change object position and update references
- * XXX this one is never used!
- */
-static void
-heap_move(struct dn_heap *h, uint64_t new_key, void *object)
-{
- int temp, i, max = h->elements-1;
- struct dn_heap_entry *p, buf;
-
- if (h->ofs <= 0)
- panic("cannot move items on this heap");
- p = h->p; /* shortcut */
-
- i = *((int *)((char *)object + h->ofs));
- if (DN_KEY_LT(new_key, p[i].key) ) { /* must move up */
- p[i].key = new_key;
- for (; i>0 &&
- DN_KEY_LT(new_key, p[(temp = HEAP_FATHER(i))].key);
- i = temp ) { /* bubble up */
- HEAP_SWAP(p[i], p[temp], buf);
- SET_OFFSET(h, i);
- }
- } else { /* must move down */
- p[i].key = new_key;
- while ( (temp = HEAP_LEFT(i)) <= max ) {
- /* found left child */
- if (temp != max &&
- DN_KEY_LT(p[temp+1].key, p[temp].key))
- temp++; /* select child with min key */
- if (DN_KEY_LT(>p[temp].key, new_key)) {
- /* go down */
- HEAP_SWAP(p[i], p[temp], buf);
- SET_OFFSET(h, i);
- } else
- break;
- i = temp;
- }
- }
- SET_OFFSET(h, i);
-}
-#endif /* heap_move, unused */
-
-/*
- * heapify() will reorganize data inside an array to maintain the
- * heap property. It is needed when we delete a bunch of entries.
- */
-static void
-heapify(struct dn_heap *h)
-{
- int i;
-
- for (i = 0; i < h->elements; i++ )
- heap_insert(h, i , NULL);
-}
-
-int
-heap_scan(struct dn_heap *h, int (*fn)(void *, uintptr_t),
- uintptr_t arg)
-{
- int i, ret, found;
-
- for (i = found = 0 ; i < h->elements ;) {
- ret = fn(h->p[i].object, arg);
- if (ret & HEAP_SCAN_DEL) {
- h->elements-- ;
- h->p[i] = h->p[h->elements] ;
- found++ ;
- } else
- i++ ;
- if (ret & HEAP_SCAN_END)
- break;
- }
- if (found)
- heapify(h);
- return found;
-}
-
-/*
- * cleanup the heap and free data structure
- */
-void
-heap_free(struct dn_heap *h)
-{
- if (h->size >0 )
- free(h->p, M_DN_HEAP);
- bzero(h, sizeof(*h) );
-}
-
-/*
- * hash table support.
- */
-
-struct dn_ht {
- int buckets; /* how many buckets, really buckets - 1*/
- int entries; /* how many entries */
- int ofs; /* offset of link field */
- uint32_t (*hash)(uintptr_t, int, void *arg);
- int (*match)(void *_el, uintptr_t key, int, void *);
- void *(*newh)(uintptr_t, int, void *);
- void **ht; /* bucket heads */
-};
-/*
- * Initialize, allocating bucket pointers inline.
- * Recycle previous record if possible.
- * If the 'newh' function is not supplied, we assume that the
- * key passed to ht_find is the same object to be stored in.
- */
-struct dn_ht *
-dn_ht_init(struct dn_ht *ht, int buckets, int ofs,
- uint32_t (*h)(uintptr_t, int, void *),
- int (*match)(void *, uintptr_t, int, void *),
- void *(*newh)(uintptr_t, int, void *))
-{
- int l;
-
- /*
- * Notes about rounding bucket size to a power of two.
- * Given the original bucket size, we compute the nearest lower and
- * higher power of two, minus 1 (respectively b_min and b_max) because
- * this value will be used to do an AND with the index returned
- * by hash function.
- * To choice between these two values, the original bucket size is
- * compared with b_min. If the original size is greater than 4/3 b_min,
- * we round the bucket size to b_max, else to b_min.
- * This ratio try to round to the nearest power of two, advantaging
- * the greater size if the different between two power is relatively
- * big.
- * Rounding the bucket size to a power of two avoid the use of
- * module when calculating the correct bucket.
- * The ht->buckets variable store the bucket size - 1 to simply
- * do an AND between the index returned by hash function and ht->bucket
- * instead of a module.
- */
- int b_min; /* min buckets */
- int b_max; /* max buckets */
- int b_ori; /* original buckets */
-
- if (h == NULL || match == NULL) {
- printf("--- missing hash or match function");
- return NULL;
- }
- if (buckets < 1 || buckets > 65536)
- return NULL;
-
- b_ori = buckets;
- /* calculate next power of 2, - 1*/
- buckets |= buckets >> 1;
- buckets |= buckets >> 2;
- buckets |= buckets >> 4;
- buckets |= buckets >> 8;
- buckets |= buckets >> 16;
-
- b_max = buckets; /* Next power */
- b_min = buckets >> 1; /* Previous power */
-
- /* Calculate the 'nearest' bucket size */
- if (b_min * 4000 / 3000 < b_ori)
- buckets = b_max;
- else
- buckets = b_min;
-
- if (ht) { /* see if we can reuse */
- if (buckets <= ht->buckets) {
- ht->buckets = buckets;
- } else {
- /* free pointers if not allocated inline */
- if (ht->ht != (void *)(ht + 1))
- free(ht->ht, M_DN_HEAP);
- free(ht, M_DN_HEAP);
- ht = NULL;
- }
- }
- if (ht == NULL) {
- /* Allocate buckets + 1 entries because buckets is use to
- * do the AND with the index returned by hash function
- */
- l = sizeof(*ht) + (buckets + 1) * sizeof(void **);
- ht = malloc(l, M_DN_HEAP, M_NOWAIT | M_ZERO);
- }
- if (ht) {
- ht->ht = (void **)(ht + 1);
- ht->buckets = buckets;
- ht->ofs = ofs;
- ht->hash = h;
- ht->match = match;
- ht->newh = newh;
- }
- return ht;
-}
-
-/* dummy callback for dn_ht_free to unlink all */
-static int
-do_del(void *obj, void *arg)
-{
- return DNHT_SCAN_DEL;
-}
-
-void
-dn_ht_free(struct dn_ht *ht, int flags)
-{
- if (ht == NULL)
- return;
- if (flags & DNHT_REMOVE) {
- (void)dn_ht_scan(ht, do_del, NULL);
- } else {
- if (ht->ht && ht->ht != (void *)(ht + 1))
- free(ht->ht, M_DN_HEAP);
- free(ht, M_DN_HEAP);
- }
-}
-
-int
-dn_ht_entries(struct dn_ht *ht)
-{
- return ht ? ht->entries : 0;
-}
-
-/* lookup and optionally create or delete element */
-void *
-dn_ht_find(struct dn_ht *ht, uintptr_t key, int flags, void *arg)
-{
- int i;
- void **pp, *p;
-
- if (ht == NULL) /* easy on an empty hash */
- return NULL;
- i = (ht->buckets == 1) ? 0 :
- (ht->hash(key, flags, arg) & ht->buckets);
-
- for (pp = &ht->ht[i]; (p = *pp); pp = (void **)((char *)p + ht->ofs)) {
- if (flags & DNHT_MATCH_PTR) {
- if (key == (uintptr_t)p)
- break;
- } else if (ht->match(p, key, flags, arg)) /* found match */
- break;
- }
- if (p) {
- if (flags & DNHT_REMOVE) {
- /* link in the next element */
- *pp = *(void **)((char *)p + ht->ofs);
- *(void **)((char *)p + ht->ofs) = NULL;
- ht->entries--;
- }
- } else if (flags & DNHT_INSERT) {
- // printf("%s before calling new, bucket %d ofs %d\n",
- // __FUNCTION__, i, ht->ofs);
- p = ht->newh ? ht->newh(key, flags, arg) : (void *)key;
- // printf("%s newh returns %p\n", __FUNCTION__, p);
- if (p) {
- ht->entries++;
- *(void **)((char *)p + ht->ofs) = ht->ht[i];
- ht->ht[i] = p;
- }
- }
- return p;
-}
-
-/*
- * do a scan with the option to delete the object. Extract next before
- * running the callback because the element may be destroyed there.
- */
-int
-dn_ht_scan(struct dn_ht *ht, int (*fn)(void *, void *), void *arg)
-{
- int i, ret, found = 0;
- void **curp, *cur, *next;
-
- if (ht == NULL || fn == NULL)
- return 0;
- for (i = 0; i <= ht->buckets; i++) {
- curp = &ht->ht[i];
- while ( (cur = *curp) != NULL) {
- next = *(void **)((char *)cur + ht->ofs);
- ret = fn(cur, arg);
- if (ret & DNHT_SCAN_DEL) {
- found++;
- ht->entries--;
- *curp = next;
- } else {
- curp = (void **)((char *)cur + ht->ofs);
- }
- if (ret & DNHT_SCAN_END)
- return found;
- }
- }
- return found;
-}
-
-/*
- * Similar to dn_ht_scan(), except that the scan is performed only
- * in the bucket 'bucket'. The function returns a correct bucket number if
- * the original is invalid.
- * If the callback returns DNHT_SCAN_END, the function move the ht->ht[i]
- * pointer to the last entry processed. Moreover, the bucket number passed
- * by caller is decremented, because usually the caller increment it.
- */
-int
-dn_ht_scan_bucket(struct dn_ht *ht, int *bucket, int (*fn)(void *, void *),
- void *arg)
-{
- int i, ret, found = 0;
- void **curp, *cur, *next;
-
- if (ht == NULL || fn == NULL)
- return 0;
- if (*bucket > ht->buckets)
- *bucket = 0;
- i = *bucket;
-
- curp = &ht->ht[i];
- while ( (cur = *curp) != NULL) {
- next = *(void **)((char *)cur + ht->ofs);
- ret = fn(cur, arg);
- if (ret & DNHT_SCAN_DEL) {
- found++;
- ht->entries--;
- *curp = next;
- } else {
- curp = (void **)((char *)cur + ht->ofs);
- }
- if (ret & DNHT_SCAN_END)
- return found;
- }
- return found;
-}
diff --git a/sys/netinet/ipfw/dn_heap.h b/sys/netinet/ipfw/dn_heap.h
deleted file mode 100644
index c95473a..0000000
--- a/sys/netinet/ipfw/dn_heap.h
+++ /dev/null
@@ -1,191 +0,0 @@
-/*-
- * Copyright (c) 1998-2010 Luigi Rizzo, Universita` di Pisa
- * All rights reserved
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-/*
- * Binary heap and hash tables, header file
- *
- * $FreeBSD$
- */
-
-#ifndef _IP_DN_HEAP_H
-#define _IP_DN_HEAP_H
-
-#define DN_KEY_LT(a,b) ((int64_t)((a)-(b)) < 0)
-#define DN_KEY_LEQ(a,b) ((int64_t)((a)-(b)) <= 0)
-
-/*
- * This module implements a binary heap supporting random extraction.
- *
- * A heap entry contains an uint64_t key and a pointer to object.
- * DN_KEY_LT(a,b) returns true if key 'a' is smaller than 'b'
- *
- * The heap is a struct dn_heap plus a dynamically allocated
- * array of dn_heap_entry entries. 'size' represents the size of
- * the array, 'elements' count entries in use. The topmost
- * element has the smallest key.
- * The heap supports ordered insert, and extract from the top.
- * To extract an object from the middle of the heap, we the object
- * must reserve an 'int32_t' to store the position of the object
- * in the heap itself, and the location of this field must be
- * passed as an argument to heap_init() -- use -1 if the feature
- * is not used.
- */
-struct dn_heap_entry {
- uint64_t key; /* sorting key, smallest comes first */
- void *object; /* object pointer */
-};
-
-struct dn_heap {
- int size; /* the size of the array */
- int elements; /* elements in use */
- int ofs; /* offset in the object of heap index */
- struct dn_heap_entry *p; /* array of "size" entries */
-};
-
-enum {
- HEAP_SCAN_DEL = 1,
- HEAP_SCAN_END = 2,
-};
-
-/*
- * heap_init() reinitializes the heap setting the size and the offset
- * of the index for random extraction (use -1 if not used).
- * The 'elements' counter is set to 0.
- *
- * SET_HEAP_OFS() indicates where, in the object, is stored the index
- * for random extractions from the heap.
- *
- * heap_free() frees the memory associated to a heap.
- *
- * heap_insert() adds a key-pointer pair to the heap
- *
- * HEAP_TOP() returns a pointer to the top element of the heap,
- * but makes no checks on its existance (XXX should we change ?)
- *
- * heap_extract() removes the entry at the top, returing the pointer.
- * (the key should have been read before).
- *
- * heap_scan() invokes a callback on each entry of the heap.
- * The callback can return a combination of HEAP_SCAN_DEL and
- * HEAP_SCAN_END. HEAP_SCAN_DEL means the current element must
- * be removed, and HEAP_SCAN_END means to terminate the scan.
- * heap_scan() returns the number of elements removed.
- * Because the order is not guaranteed, we should use heap_scan()
- * only as a last resort mechanism.
- */
-#define HEAP_TOP(h) ((h)->p)
-#define SET_HEAP_OFS(h, n) do { (h)->ofs = n; } while (0)
-int heap_init(struct dn_heap *h, int size, int ofs);
-int heap_insert(struct dn_heap *h, uint64_t key1, void *p);
-void heap_extract(struct dn_heap *h, void *obj);
-void heap_free(struct dn_heap *h);
-int heap_scan(struct dn_heap *, int (*)(void *, uintptr_t), uintptr_t);
-
-/*------------------------------------------------------
- * This module implements a generic hash table with support for
- * running callbacks on the entire table. To avoid allocating
- * memory during hash table operations, objects must reserve
- * space for a link field. XXX if the heap is moderately full,
- * an SLIST suffices, and we can tolerate the cost of a hash
- * computation on each removal.
- *
- * dn_ht_init() initializes the table, setting the number of
- * buckets, the offset of the link field, the main callbacks.
- * Callbacks are:
- *
- * hash(key, flags, arg) called to return a bucket index.
- * match(obj, key, flags, arg) called to determine if key
- * matches the current 'obj' in the heap
- * newh(key, flags, arg) optional, used to allocate a new
- * object during insertions.
- *
- * dn_ht_free() frees the heap or unlink elements.
- * DNHT_REMOVE unlink elements, 0 frees the heap.
- * You need two calls to do both.
- *
- * dn_ht_find() is the main lookup function, which can also be
- * used to insert or delete elements in the hash table.
- * The final 'arg' is passed to all callbacks.
- *
- * dn_ht_scan() is used to invoke a callback on all entries of
- * the heap, or possibly on just one bucket. The callback
- * is invoked with a pointer to the object, and must return
- * one of DNHT_SCAN_DEL or DNHT_SCAN_END to request the
- * removal of the object from the heap and the end of the
- * scan, respectively.
- *
- * dn_ht_scan_bucket() is similar to dn_ht_scan(), except that it scans
- * only the specific bucket of the table. The bucket is a in-out
- * parameter and return a valid bucket number if the original
- * is invalid.
- *
- * A combination of flags can be used to modify the operation
- * of the dn_ht_find(), and of the callbacks:
- *
- * DNHT_KEY_IS_OBJ means the key is the object pointer.
- * It is usally of interest for the hash and match functions.
- *
- * DNHT_MATCH_PTR during a lookup, match pointers instead
- * of calling match(). Normally used when removing specific
- * entries. Does not imply KEY_IS_OBJ as the latter _is_ used
- * by the match function.
- *
- * DNHT_INSERT insert the element if not found.
- * Calls new() to allocates a new object unless
- * DNHT_KEY_IS_OBJ is set.
- *
- * DNHT_UNIQUE only insert if object not found.
- * XXX should it imply DNHT_INSERT ?
- *
- * DNHT_REMOVE remove objects if we find them.
- */
-struct dn_ht; /* should be opaque */
-
-struct dn_ht *dn_ht_init(struct dn_ht *, int buckets, int ofs,
- uint32_t (*hash)(uintptr_t, int, void *),
- int (*match)(void *, uintptr_t, int, void *),
- void *(*newh)(uintptr_t, int, void *));
-void dn_ht_free(struct dn_ht *, int flags);
-
-void *dn_ht_find(struct dn_ht *, uintptr_t, int, void *);
-int dn_ht_scan(struct dn_ht *, int (*)(void *, void *), void *);
-int dn_ht_scan_bucket(struct dn_ht *, int * , int (*)(void *, void *), void *);
-int dn_ht_entries(struct dn_ht *);
-
-enum { /* flags values.
- * first two are returned by the scan callback to indicate
- * to delete the matching element or to end the scan
- */
- DNHT_SCAN_DEL = 0x0001,
- DNHT_SCAN_END = 0x0002,
- DNHT_KEY_IS_OBJ = 0x0004, /* key is the obj pointer */
- DNHT_MATCH_PTR = 0x0008, /* match by pointer, not match() */
- DNHT_INSERT = 0x0010, /* insert if not found */
- DNHT_UNIQUE = 0x0020, /* report error if already there */
- DNHT_REMOVE = 0x0040, /* remove on find or dn_ht_free */
-};
-
-#endif /* _IP_DN_HEAP_H */
diff --git a/sys/netinet/ipfw/dn_sched.h b/sys/netinet/ipfw/dn_sched.h
deleted file mode 100644
index ab823fe..0000000
--- a/sys/netinet/ipfw/dn_sched.h
+++ /dev/null
@@ -1,191 +0,0 @@
-/*
- * Copyright (c) 2010 Riccardo Panicucci, Luigi Rizzo, Universita` di Pisa
- * All rights reserved
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-/*
- * The API to write a packet scheduling algorithm for dummynet.
- *
- * $FreeBSD$
- */
-
-#ifndef _DN_SCHED_H
-#define _DN_SCHED_H
-
-#define DN_MULTIQUEUE 0x01
-/*
- * Descriptor for a scheduling algorithm.
- * Contains all function pointers for a given scheduler
- * This is typically created when a module is loaded, and stored
- * in a global list of schedulers.
- */
-struct dn_alg {
- uint32_t type; /* the scheduler type */
- const char *name; /* scheduler name */
- uint32_t flags; /* DN_MULTIQUEUE if supports multiple queues */
-
- /*
- * The following define the size of 3 optional data structures
- * that may need to be allocated at runtime, and are appended
- * to each of the base data structures: scheduler, sched.inst,
- * and queue. We don't have a per-flowset structure.
- */
- /* + parameters attached to the template, e.g.
- * default queue sizes, weights, quantum size, and so on;
- */
- size_t schk_datalen;
-
- /* + per-instance parameters, such as timestamps,
- * containers for queues, etc;
- */
- size_t si_datalen;
-
- size_t q_datalen; /* per-queue parameters (e.g. S,F) */
-
- /*
- * Methods implemented by the scheduler:
- * enqueue enqueue packet 'm' on scheduler 's', queue 'q'.
- * q is NULL for !MULTIQUEUE.
- * Return 0 on success, 1 on drop (packet consumed anyways).
- * Note that q should be interpreted only as a hint
- * on the flow that the mbuf belongs to: while a
- * scheduler will normally enqueue m into q, it is ok
- * to leave q alone and put the mbuf elsewhere.
- * This function is called in two cases:
- * - when a new packet arrives to the scheduler;
- * - when a scheduler is reconfigured. In this case the
- * call is issued by the new_queue callback, with a
- * non empty queue (q) and m pointing to the first
- * mbuf in the queue. For this reason, the function
- * should internally check for (m != q->mq.head)
- * before calling dn_enqueue().
- *
- * dequeue Called when scheduler instance 's' can
- * dequeue a packet. Return NULL if none are available.
- * XXX what about non work-conserving ?
- *
- * config called on 'sched X config ...', normally writes
- * in the area of size sch_arg
- *
- * destroy called on 'sched delete', frees everything
- * in sch_arg (other parts are handled by more specific
- * functions)
- *
- * new_sched called when a new instance is created, e.g.
- * to create the local queue for !MULTIQUEUE, set V or
- * copy parameters for WFQ, and so on.
- *
- * free_sched called when deleting an instance, cleans
- * extra data in the per-instance area.
- *
- * new_fsk called when a flowset is linked to a scheduler,
- * e.g. to validate parameters such as weights etc.
- * free_fsk when a flowset is unlinked from a scheduler.
- * (probably unnecessary)
- *
- * new_queue called to set the per-queue parameters,
- * e.g. S and F, adjust sum of weights in the parent, etc.
- *
- * The new_queue callback is normally called from when
- * creating a new queue. In some cases (such as a
- * scheduler change or reconfiguration) it can be called
- * with a non empty queue. In this case, the queue
- * In case of non empty queue, the new_queue callback could
- * need to call the enqueue function. In this case,
- * the callback should eventually call enqueue() passing
- * as m the first element in the queue.
- *
- * free_queue actions related to a queue removal, e.g. undo
- * all the above. If the queue has data in it, also remove
- * from the scheduler. This can e.g. happen during a reconfigure.
- */
- int (*enqueue)(struct dn_sch_inst *, struct dn_queue *,
- struct mbuf *);
- struct mbuf * (*dequeue)(struct dn_sch_inst *);
-
- int (*config)(struct dn_schk *);
- int (*destroy)(struct dn_schk*);
- int (*new_sched)(struct dn_sch_inst *);
- int (*free_sched)(struct dn_sch_inst *);
- int (*new_fsk)(struct dn_fsk *f);
- int (*free_fsk)(struct dn_fsk *f);
- int (*new_queue)(struct dn_queue *q);
- int (*free_queue)(struct dn_queue *q);
-
- /* run-time fields */
- int ref_count; /* XXX number of instances in the system */
- SLIST_ENTRY(dn_alg) next; /* Next scheduler in the list */
-};
-
-/* MSVC does not support initializers so we need this ugly macro */
-#ifdef _WIN32
-#define _SI(fld)
-#else
-#define _SI(fld) fld
-#endif
-
-/*
- * Additionally, dummynet exports some functions and macros
- * to be used by schedulers:
- */
-
-void dn_free_pkts(struct mbuf *mnext);
-int dn_enqueue(struct dn_queue *q, struct mbuf* m, int drop);
-/* bound a variable between min and max */
-int ipdn_bound_var(int *v, int dflt, int lo, int hi, const char *msg);
-
-/*
- * Extract the head of a queue, update stats. Must be the very last
- * thing done on a dequeue as the queue itself may go away.
- */
-static __inline struct mbuf*
-dn_dequeue(struct dn_queue *q)
-{
- struct mbuf *m = q->mq.head;
- if (m == NULL)
- return NULL;
- q->mq.head = m->m_nextpkt;
-
- /* Update stats for the queue */
- q->ni.length--;
- q->ni.len_bytes -= m->m_pkthdr.len;
- if (q->_si) {
- q->_si->ni.length--;
- q->_si->ni.len_bytes -= m->m_pkthdr.len;
- }
- if (q->ni.length == 0) /* queue is now idle */
- q->q_time = dn_cfg.curr_time;
- return m;
-}
-
-int dn_sched_modevent(module_t mod, int cmd, void *arg);
-
-#define DECLARE_DNSCHED_MODULE(name, dnsched) \
- static moduledata_t name##_mod = { \
- #name, dn_sched_modevent, dnsched \
- }; \
- DECLARE_MODULE(name, name##_mod, \
- SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY); \
- MODULE_DEPEND(name, dummynet, 3, 3, 3);
-#endif /* _DN_SCHED_H */
diff --git a/sys/netinet/ipfw/dn_sched_fifo.c b/sys/netinet/ipfw/dn_sched_fifo.c
deleted file mode 100644
index 0bb3800..0000000
--- a/sys/netinet/ipfw/dn_sched_fifo.c
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- * Copyright (c) 2010 Riccardo Panicucci, Universita` di Pisa
- * All rights reserved
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-/*
- * $FreeBSD$
- */
-
-#ifdef _KERNEL
-#include <sys/malloc.h>
-#include <sys/socket.h>
-#include <sys/socketvar.h>
-#include <sys/kernel.h>
-#include <sys/mbuf.h>
-#include <sys/module.h>
-#include <net/if.h> /* IFNAMSIZ */
-#include <netinet/in.h>
-#include <netinet/ip_var.h> /* ipfw_rule_ref */
-#include <netinet/ip_fw.h> /* flow_id */
-#include <netinet/ip_dummynet.h>
-#include <netinet/ipfw/dn_heap.h>
-#include <netinet/ipfw/ip_dn_private.h>
-#include <netinet/ipfw/dn_sched.h>
-#else
-#include <dn_test.h>
-#endif
-
-/*
- * This file implements a FIFO scheduler for a single queue.
- * The queue is allocated as part of the scheduler instance,
- * and there is a single flowset is in the template which stores
- * queue size and policy.
- * Enqueue and dequeue use the default library functions.
- */
-static int
-fifo_enqueue(struct dn_sch_inst *si, struct dn_queue *q, struct mbuf *m)
-{
- /* XXX if called with q != NULL and m=NULL, this is a
- * re-enqueue from an existing scheduler, which we should
- * handle.
- */
- return dn_enqueue((struct dn_queue *)(si+1), m, 0);
-}
-
-static struct mbuf *
-fifo_dequeue(struct dn_sch_inst *si)
-{
- return dn_dequeue((struct dn_queue *)(si + 1));
-}
-
-static int
-fifo_new_sched(struct dn_sch_inst *si)
-{
- /* This scheduler instance contains the queue */
- struct dn_queue *q = (struct dn_queue *)(si + 1);
-
- set_oid(&q->ni.oid, DN_QUEUE, sizeof(*q));
- q->_si = si;
- q->fs = si->sched->fs;
- return 0;
-}
-
-static int
-fifo_free_sched(struct dn_sch_inst *si)
-{
- struct dn_queue *q = (struct dn_queue *)(si + 1);
- dn_free_pkts(q->mq.head);
- bzero(q, sizeof(*q));
- return 0;
-}
-
-/*
- * FIFO scheduler descriptor
- * contains the type of the scheduler, the name, the size of extra
- * data structures, and function pointers.
- */
-static struct dn_alg fifo_desc = {
- _SI( .type = ) DN_SCHED_FIFO,
- _SI( .name = ) "FIFO",
- _SI( .flags = ) 0,
-
- _SI( .schk_datalen = ) 0,
- _SI( .si_datalen = ) sizeof(struct dn_queue),
- _SI( .q_datalen = ) 0,
-
- _SI( .enqueue = ) fifo_enqueue,
- _SI( .dequeue = ) fifo_dequeue,
- _SI( .config = ) NULL,
- _SI( .destroy = ) NULL,
- _SI( .new_sched = ) fifo_new_sched,
- _SI( .free_sched = ) fifo_free_sched,
- _SI( .new_fsk = ) NULL,
- _SI( .free_fsk = ) NULL,
- _SI( .new_queue = ) NULL,
- _SI( .free_queue = ) NULL,
-};
-
-DECLARE_DNSCHED_MODULE(dn_fifo, &fifo_desc);
diff --git a/sys/netinet/ipfw/dn_sched_prio.c b/sys/netinet/ipfw/dn_sched_prio.c
deleted file mode 100644
index 28f6006..0000000
--- a/sys/netinet/ipfw/dn_sched_prio.c
+++ /dev/null
@@ -1,229 +0,0 @@
-/*
- * Copyright (c) 2010 Riccardo Panicucci, Universita` di Pisa
- * All rights reserved
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-/*
- * $FreeBSD$
- */
-#ifdef _KERNEL
-#include <sys/malloc.h>
-#include <sys/socket.h>
-#include <sys/socketvar.h>
-#include <sys/kernel.h>
-#include <sys/mbuf.h>
-#include <sys/module.h>
-#include <net/if.h> /* IFNAMSIZ */
-#include <netinet/in.h>
-#include <netinet/ip_var.h> /* ipfw_rule_ref */
-#include <netinet/ip_fw.h> /* flow_id */
-#include <netinet/ip_dummynet.h>
-#include <netinet/ipfw/dn_heap.h>
-#include <netinet/ipfw/ip_dn_private.h>
-#include <netinet/ipfw/dn_sched.h>
-#else
-#include <dn_test.h>
-#endif
-
-#define DN_SCHED_PRIO 5 //XXX
-
-#if !defined(_KERNEL) || !defined(__linux__)
-#define test_bit(ix, pData) ((*pData) & (1<<(ix)))
-#define __set_bit(ix, pData) (*pData) |= (1<<(ix))
-#define __clear_bit(ix, pData) (*pData) &= ~(1<<(ix))
-#endif
-
-#ifdef __MIPSEL__
-#define __clear_bit(ix, pData) (*pData) &= ~(1<<(ix))
-#endif
-
-/* Size of the array of queues pointers. */
-#define BITMAP_T unsigned long
-#define MAXPRIO (sizeof(BITMAP_T) * 8)
-
-/*
- * The scheduler instance contains an array of pointers to queues,
- * one for each priority, and a bitmap listing backlogged queues.
- */
-struct prio_si {
- BITMAP_T bitmap; /* array bitmap */
- struct dn_queue *q_array[MAXPRIO]; /* Array of queues pointers */
-};
-
-/*
- * If a queue with the same priority is already backlogged, use
- * that one instead of the queue passed as argument.
- */
-static int
-prio_enqueue(struct dn_sch_inst *_si, struct dn_queue *q, struct mbuf *m)
-{
- struct prio_si *si = (struct prio_si *)(_si + 1);
- int prio = q->fs->fs.par[0];
-
- if (test_bit(prio, &si->bitmap) == 0) {
- /* No queue with this priority, insert */
- __set_bit(prio, &si->bitmap);
- si->q_array[prio] = q;
- } else { /* use the existing queue */
- q = si->q_array[prio];
- }
- if (dn_enqueue(q, m, 0))
- return 1;
- return 0;
-}
-
-/*
- * Packets are dequeued only from the highest priority queue.
- * The function ffs() return the lowest bit in the bitmap that rapresent
- * the array index (-1) which contains the pointer to the highest priority
- * queue.
- * After the dequeue, if this queue become empty, it is index is removed
- * from the bitmap.
- * Scheduler is idle if the bitmap is empty
- *
- * NOTE: highest priority is 0, lowest is sched->max_prio_q
- */
-static struct mbuf *
-prio_dequeue(struct dn_sch_inst *_si)
-{
- struct prio_si *si = (struct prio_si *)(_si + 1);
- struct mbuf *m;
- struct dn_queue *q;
- int prio;
-
- if (si->bitmap == 0) /* scheduler idle */
- return NULL;
-
- prio = ffs(si->bitmap) - 1;
-
- /* Take the highest priority queue in the scheduler */
- q = si->q_array[prio];
- // assert(q)
-
- m = dn_dequeue(q);
- if (q->mq.head == NULL) {
- /* Queue is now empty, remove from scheduler
- * and mark it
- */
- si->q_array[prio] = NULL;
- __clear_bit(prio, &si->bitmap);
- }
- return m;
-}
-
-static int
-prio_new_sched(struct dn_sch_inst *_si)
-{
- struct prio_si *si = (struct prio_si *)(_si + 1);
-
- bzero(si->q_array, sizeof(si->q_array));
- si->bitmap = 0;
-
- return 0;
-}
-
-static int
-prio_new_fsk(struct dn_fsk *fs)
-{
- /* Check if the prioritiy is between 0 and MAXPRIO-1 */
- ipdn_bound_var(&fs->fs.par[0], 0, 0, MAXPRIO - 1, "PRIO priority");
- return 0;
-}
-
-static int
-prio_new_queue(struct dn_queue *q)
-{
- struct prio_si *si = (struct prio_si *)(q->_si + 1);
- int prio = q->fs->fs.par[0];
- struct dn_queue *oldq;
-
- q->ni.oid.subtype = DN_SCHED_PRIO;
-
- if (q->mq.head == NULL)
- return 0;
-
- /* Queue already full, must insert in the scheduler or append
- * mbufs to existing queue. This partly duplicates prio_enqueue
- */
- if (test_bit(prio, &si->bitmap) == 0) {
- /* No queue with this priority, insert */
- __set_bit(prio, &si->bitmap);
- si->q_array[prio] = q;
- } else if ( (oldq = si->q_array[prio]) != q) {
- /* must append to the existing queue.
- * can simply append q->mq.head to q2->...
- * and add the counters to those of q2
- */
- oldq->mq.tail->m_nextpkt = q->mq.head;
- oldq->mq.tail = q->mq.tail;
- oldq->ni.length += q->ni.length;
- q->ni.length = 0;
- oldq->ni.len_bytes += q->ni.len_bytes;
- q->ni.len_bytes = 0;
- q->mq.tail = q->mq.head = NULL;
- }
- return 0;
-}
-
-static int
-prio_free_queue(struct dn_queue *q)
-{
- int prio = q->fs->fs.par[0];
- struct prio_si *si = (struct prio_si *)(q->_si + 1);
-
- if (si->q_array[prio] == q) {
- si->q_array[prio] = NULL;
- __clear_bit(prio, &si->bitmap);
- }
- return 0;
-}
-
-
-static struct dn_alg prio_desc = {
- _SI( .type = ) DN_SCHED_PRIO,
- _SI( .name = ) "PRIO",
- _SI( .flags = ) DN_MULTIQUEUE,
-
- /* we need extra space in the si and the queue */
- _SI( .schk_datalen = ) 0,
- _SI( .si_datalen = ) sizeof(struct prio_si),
- _SI( .q_datalen = ) 0,
-
- _SI( .enqueue = ) prio_enqueue,
- _SI( .dequeue = ) prio_dequeue,
-
- _SI( .config = ) NULL,
- _SI( .destroy = ) NULL,
- _SI( .new_sched = ) prio_new_sched,
- _SI( .free_sched = ) NULL,
-
- _SI( .new_fsk = ) prio_new_fsk,
- _SI( .free_fsk = ) NULL,
-
- _SI( .new_queue = ) prio_new_queue,
- _SI( .free_queue = ) prio_free_queue,
-};
-
-
-DECLARE_DNSCHED_MODULE(dn_prio, &prio_desc);
diff --git a/sys/netinet/ipfw/dn_sched_qfq.c b/sys/netinet/ipfw/dn_sched_qfq.c
deleted file mode 100644
index be7fba3..0000000
--- a/sys/netinet/ipfw/dn_sched_qfq.c
+++ /dev/null
@@ -1,864 +0,0 @@
-/*
- * Copyright (c) 2010 Fabio Checconi, Luigi Rizzo, Paolo Valente
- * All rights reserved
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-/*
- * $FreeBSD$
- */
-
-#ifdef _KERNEL
-#include <sys/malloc.h>
-#include <sys/socket.h>
-#include <sys/socketvar.h>
-#include <sys/kernel.h>
-#include <sys/mbuf.h>
-#include <sys/module.h>
-#include <net/if.h> /* IFNAMSIZ */
-#include <netinet/in.h>
-#include <netinet/ip_var.h> /* ipfw_rule_ref */
-#include <netinet/ip_fw.h> /* flow_id */
-#include <netinet/ip_dummynet.h>
-#include <netinet/ipfw/dn_heap.h>
-#include <netinet/ipfw/ip_dn_private.h>
-#include <netinet/ipfw/dn_sched.h>
-#else
-#include <dn_test.h>
-#endif
-
-#ifdef QFQ_DEBUG
-struct qfq_sched;
-static void dump_sched(struct qfq_sched *q, const char *msg);
-#define NO(x) x
-#else
-#define NO(x)
-#endif
-#define DN_SCHED_QFQ 4 // XXX Where?
-typedef unsigned long bitmap;
-
-/*
- * bitmaps ops are critical. Some linux versions have __fls
- * and the bitmap ops. Some machines have ffs
- */
-#if defined(_WIN32) || (defined(__MIPSEL__) && defined(LINUX_24))
-int fls(unsigned int n)
-{
- int i = 0;
- for (i = 0; n > 0; n >>= 1, i++)
- ;
- return i;
-}
-#endif
-
-#if !defined(_KERNEL) || defined( __FreeBSD__ ) || defined(_WIN32) || (defined(__MIPSEL__) && defined(LINUX_24))
-static inline unsigned long __fls(unsigned long word)
-{
- return fls(word) - 1;
-}
-#endif
-
-#if !defined(_KERNEL) || !defined(__linux__)
-#ifdef QFQ_DEBUG
-int test_bit(int ix, bitmap *p)
-{
- if (ix < 0 || ix > 31)
- D("bad index %d", ix);
- return *p & (1<<ix);
-}
-void __set_bit(int ix, bitmap *p)
-{
- if (ix < 0 || ix > 31)
- D("bad index %d", ix);
- *p |= (1<<ix);
-}
-void __clear_bit(int ix, bitmap *p)
-{
- if (ix < 0 || ix > 31)
- D("bad index %d", ix);
- *p &= ~(1<<ix);
-}
-#else /* !QFQ_DEBUG */
-/* XXX do we have fast version, or leave it to the compiler ? */
-#define test_bit(ix, pData) ((*pData) & (1<<(ix)))
-#define __set_bit(ix, pData) (*pData) |= (1<<(ix))
-#define __clear_bit(ix, pData) (*pData) &= ~(1<<(ix))
-#endif /* !QFQ_DEBUG */
-#endif /* !__linux__ */
-
-#ifdef __MIPSEL__
-#define __clear_bit(ix, pData) (*pData) &= ~(1<<(ix))
-#endif
-
-/*-------------------------------------------*/
-/*
-
-Virtual time computations.
-
-S, F and V are all computed in fixed point arithmetic with
-FRAC_BITS decimal bits.
-
- QFQ_MAX_INDEX is the maximum index allowed for a group. We need
- one bit per index.
- QFQ_MAX_WSHIFT is the maximum power of two supported as a weight.
- The layout of the bits is as below:
-
- [ MTU_SHIFT ][ FRAC_BITS ]
- [ MAX_INDEX ][ MIN_SLOT_SHIFT ]
- ^.__grp->index = 0
- *.__grp->slot_shift
-
- where MIN_SLOT_SHIFT is derived by difference from the others.
-
-The max group index corresponds to Lmax/w_min, where
-Lmax=1<<MTU_SHIFT, w_min = 1 .
-From this, and knowing how many groups (MAX_INDEX) we want,
-we can derive the shift corresponding to each group.
-
-Because we often need to compute
- F = S + len/w_i and V = V + len/wsum
-instead of storing w_i store the value
- inv_w = (1<<FRAC_BITS)/w_i
-so we can do F = S + len * inv_w * wsum.
-We use W_TOT in the formulas so we can easily move between
-static and adaptive weight sum.
-
-The per-scheduler-instance data contain all the data structures
-for the scheduler: bitmaps and bucket lists.
-
- */
-/*
- * Maximum number of consecutive slots occupied by backlogged classes
- * inside a group. This is approx lmax/lmin + 5.
- * XXX check because it poses constraints on MAX_INDEX
- */
-#define QFQ_MAX_SLOTS 32
-/*
- * Shifts used for class<->group mapping. Class weights are
- * in the range [1, QFQ_MAX_WEIGHT], we to map each class i to the
- * group with the smallest index that can support the L_i / r_i
- * configured for the class.
- *
- * grp->index is the index of the group; and grp->slot_shift
- * is the shift for the corresponding (scaled) sigma_i.
- *
- * When computing the group index, we do (len<<FP_SHIFT)/weight,
- * then compute an FLS (which is like a log2()), and if the result
- * is below the MAX_INDEX region we use 0 (which is the same as
- * using a larger len).
- */
-#define QFQ_MAX_INDEX 19
-#define QFQ_MAX_WSHIFT 16 /* log2(max_weight) */
-
-#define QFQ_MAX_WEIGHT (1<<QFQ_MAX_WSHIFT)
-#define QFQ_MAX_WSUM (2*QFQ_MAX_WEIGHT)
-//#define IWSUM (q->i_wsum)
-#define IWSUM ((1<<FRAC_BITS)/QFQ_MAX_WSUM)
-
-#define FRAC_BITS 30 /* fixed point arithmetic */
-#define ONE_FP (1UL << FRAC_BITS)
-
-#define QFQ_MTU_SHIFT 11 /* log2(max_len) */
-#define QFQ_MIN_SLOT_SHIFT (FRAC_BITS + QFQ_MTU_SHIFT - QFQ_MAX_INDEX)
-
-/*
- * Possible group states, also indexes for the bitmaps array in
- * struct qfq_queue. We rely on ER, IR, EB, IB being numbered 0..3
- */
-enum qfq_state { ER, IR, EB, IB, QFQ_MAX_STATE };
-
-struct qfq_group;
-/*
- * additional queue info. Some of this info should come from
- * the flowset, we copy them here for faster processing.
- * This is an overlay of the struct dn_queue
- */
-struct qfq_class {
- struct dn_queue _q;
- uint64_t S, F; /* flow timestamps (exact) */
- struct qfq_class *next; /* Link for the slot list. */
-
- /* group we belong to. In principle we would need the index,
- * which is log_2(lmax/weight), but we never reference it
- * directly, only the group.
- */
- struct qfq_group *grp;
-
- /* these are copied from the flowset. */
- uint32_t inv_w; /* ONE_FP/weight */
- uint32_t lmax; /* Max packet size for this flow. */
-};
-
-/* Group descriptor, see the paper for details.
- * Basically this contains the bucket lists
- */
-struct qfq_group {
- uint64_t S, F; /* group timestamps (approx). */
- unsigned int slot_shift; /* Slot shift. */
- unsigned int index; /* Group index. */
- unsigned int front; /* Index of the front slot. */
- bitmap full_slots; /* non-empty slots */
-
- /* Array of lists of active classes. */
- struct qfq_class *slots[QFQ_MAX_SLOTS];
-};
-
-/* scheduler instance descriptor. */
-struct qfq_sched {
- uint64_t V; /* Precise virtual time. */
- uint32_t wsum; /* weight sum */
- NO(uint32_t i_wsum; /* ONE_FP/w_sum */
- uint32_t _queued; /* debugging */
- uint32_t loops; /* debugging */)
- bitmap bitmaps[QFQ_MAX_STATE]; /* Group bitmaps. */
- struct qfq_group groups[QFQ_MAX_INDEX + 1]; /* The groups. */
-};
-
-/*---- support functions ----------------------------*/
-
-/* Generic comparison function, handling wraparound. */
-static inline int qfq_gt(uint64_t a, uint64_t b)
-{
- return (int64_t)(a - b) > 0;
-}
-
-/* Round a precise timestamp to its slotted value. */
-static inline uint64_t qfq_round_down(uint64_t ts, unsigned int shift)
-{
- return ts & ~((1ULL << shift) - 1);
-}
-
-/* return the pointer to the group with lowest index in the bitmap */
-static inline struct qfq_group *qfq_ffs(struct qfq_sched *q,
- unsigned long bitmap)
-{
- int index = ffs(bitmap) - 1; // zero-based
- return &q->groups[index];
-}
-
-/*
- * Calculate a flow index, given its weight and maximum packet length.
- * index = log_2(maxlen/weight) but we need to apply the scaling.
- * This is used only once at flow creation.
- */
-static int qfq_calc_index(uint32_t inv_w, unsigned int maxlen)
-{
- uint64_t slot_size = (uint64_t)maxlen *inv_w;
- unsigned long size_map;
- int index = 0;
-
- size_map = (unsigned long)(slot_size >> QFQ_MIN_SLOT_SHIFT);
- if (!size_map)
- goto out;
-
- index = __fls(size_map) + 1; // basically a log_2()
- index -= !(slot_size - (1ULL << (index + QFQ_MIN_SLOT_SHIFT - 1)));
-
- if (index < 0)
- index = 0;
-
-out:
- ND("W = %d, L = %d, I = %d\n", ONE_FP/inv_w, maxlen, index);
- return index;
-}
-/*---- end support functions ----*/
-
-/*-------- API calls --------------------------------*/
-/*
- * Validate and copy parameters from flowset.
- */
-static int
-qfq_new_queue(struct dn_queue *_q)
-{
- struct qfq_sched *q = (struct qfq_sched *)(_q->_si + 1);
- struct qfq_class *cl = (struct qfq_class *)_q;
- int i;
- uint32_t w; /* approximated weight */
-
- /* import parameters from the flowset. They should be correct
- * already.
- */
- w = _q->fs->fs.par[0];
- cl->lmax = _q->fs->fs.par[1];
- if (!w || w > QFQ_MAX_WEIGHT) {
- w = 1;
- D("rounding weight to 1");
- }
- cl->inv_w = ONE_FP/w;
- w = ONE_FP/cl->inv_w;
- if (q->wsum + w > QFQ_MAX_WSUM)
- return EINVAL;
-
- i = qfq_calc_index(cl->inv_w, cl->lmax);
- cl->grp = &q->groups[i];
- q->wsum += w;
- // XXX cl->S = q->V; ?
- // XXX compute q->i_wsum
- return 0;
-}
-
-/* remove an empty queue */
-static int
-qfq_free_queue(struct dn_queue *_q)
-{
- struct qfq_sched *q = (struct qfq_sched *)(_q->_si + 1);
- struct qfq_class *cl = (struct qfq_class *)_q;
- if (cl->inv_w) {
- q->wsum -= ONE_FP/cl->inv_w;
- cl->inv_w = 0; /* reset weight to avoid run twice */
- }
- return 0;
-}
-
-/* Calculate a mask to mimic what would be ffs_from(). */
-static inline unsigned long
-mask_from(unsigned long bitmap, int from)
-{
- return bitmap & ~((1UL << from) - 1);
-}
-
-/*
- * The state computation relies on ER=0, IR=1, EB=2, IB=3
- * First compute eligibility comparing grp->S, q->V,
- * then check if someone is blocking us and possibly add EB
- */
-static inline unsigned int
-qfq_calc_state(struct qfq_sched *q, struct qfq_group *grp)
-{
- /* if S > V we are not eligible */
- unsigned int state = qfq_gt(grp->S, q->V);
- unsigned long mask = mask_from(q->bitmaps[ER], grp->index);
- struct qfq_group *next;
-
- if (mask) {
- next = qfq_ffs(q, mask);
- if (qfq_gt(grp->F, next->F))
- state |= EB;
- }
-
- return state;
-}
-
-/*
- * In principle
- * q->bitmaps[dst] |= q->bitmaps[src] & mask;
- * q->bitmaps[src] &= ~mask;
- * but we should make sure that src != dst
- */
-static inline void
-qfq_move_groups(struct qfq_sched *q, unsigned long mask, int src, int dst)
-{
- q->bitmaps[dst] |= q->bitmaps[src] & mask;
- q->bitmaps[src] &= ~mask;
-}
-
-static inline void
-qfq_unblock_groups(struct qfq_sched *q, int index, uint64_t old_finish)
-{
- unsigned long mask = mask_from(q->bitmaps[ER], index + 1);
- struct qfq_group *next;
-
- if (mask) {
- next = qfq_ffs(q, mask);
- if (!qfq_gt(next->F, old_finish))
- return;
- }
-
- mask = (1UL << index) - 1;
- qfq_move_groups(q, mask, EB, ER);
- qfq_move_groups(q, mask, IB, IR);
-}
-
-/*
- * perhaps
- *
- old_V ^= q->V;
- old_V >>= QFQ_MIN_SLOT_SHIFT;
- if (old_V) {
- ...
- }
- *
- */
-static inline void
-qfq_make_eligible(struct qfq_sched *q, uint64_t old_V)
-{
- unsigned long mask, vslot, old_vslot;
-
- vslot = q->V >> QFQ_MIN_SLOT_SHIFT;
- old_vslot = old_V >> QFQ_MIN_SLOT_SHIFT;
-
- if (vslot != old_vslot) {
- mask = (2UL << (__fls(vslot ^ old_vslot))) - 1;
- qfq_move_groups(q, mask, IR, ER);
- qfq_move_groups(q, mask, IB, EB);
- }
-}
-
-/*
- * XXX we should make sure that slot becomes less than 32.
- * This is guaranteed by the input values.
- * roundedS is always cl->S rounded on grp->slot_shift bits.
- */
-static inline void
-qfq_slot_insert(struct qfq_group *grp, struct qfq_class *cl, uint64_t roundedS)
-{
- uint64_t slot = (roundedS - grp->S) >> grp->slot_shift;
- unsigned int i = (grp->front + slot) % QFQ_MAX_SLOTS;
-
- cl->next = grp->slots[i];
- grp->slots[i] = cl;
- __set_bit(slot, &grp->full_slots);
-}
-
-/*
- * remove the entry from the slot
- */
-static inline void
-qfq_front_slot_remove(struct qfq_group *grp)
-{
- struct qfq_class **h = &grp->slots[grp->front];
-
- *h = (*h)->next;
- if (!*h)
- __clear_bit(0, &grp->full_slots);
-}
-
-/*
- * Returns the first full queue in a group. As a side effect,
- * adjust the bucket list so the first non-empty bucket is at
- * position 0 in full_slots.
- */
-static inline struct qfq_class *
-qfq_slot_scan(struct qfq_group *grp)
-{
- int i;
-
- ND("grp %d full %x", grp->index, grp->full_slots);
- if (!grp->full_slots)
- return NULL;
-
- i = ffs(grp->full_slots) - 1; // zero-based
- if (i > 0) {
- grp->front = (grp->front + i) % QFQ_MAX_SLOTS;
- grp->full_slots >>= i;
- }
-
- return grp->slots[grp->front];
-}
-
-/*
- * adjust the bucket list. When the start time of a group decreases,
- * we move the index down (modulo QFQ_MAX_SLOTS) so we don't need to
- * move the objects. The mask of occupied slots must be shifted
- * because we use ffs() to find the first non-empty slot.
- * This covers decreases in the group's start time, but what about
- * increases of the start time ?
- * Here too we should make sure that i is less than 32
- */
-static inline void
-qfq_slot_rotate(struct qfq_sched *q, struct qfq_group *grp, uint64_t roundedS)
-{
- unsigned int i = (grp->S - roundedS) >> grp->slot_shift;
-
- grp->full_slots <<= i;
- grp->front = (grp->front - i) % QFQ_MAX_SLOTS;
-}
-
-
-static inline void
-qfq_update_eligible(struct qfq_sched *q, uint64_t old_V)
-{
- bitmap ineligible;
-
- ineligible = q->bitmaps[IR] | q->bitmaps[IB];
- if (ineligible) {
- if (!q->bitmaps[ER]) {
- struct qfq_group *grp;
- grp = qfq_ffs(q, ineligible);
- if (qfq_gt(grp->S, q->V))
- q->V = grp->S;
- }
- qfq_make_eligible(q, old_V);
- }
-}
-
-/*
- * Updates the class, returns true if also the group needs to be updated.
- */
-static inline int
-qfq_update_class(struct qfq_sched *q, struct qfq_group *grp,
- struct qfq_class *cl)
-{
-
- cl->S = cl->F;
- if (cl->_q.mq.head == NULL) {
- qfq_front_slot_remove(grp);
- } else {
- unsigned int len;
- uint64_t roundedS;
-
- len = cl->_q.mq.head->m_pkthdr.len;
- cl->F = cl->S + (uint64_t)len * cl->inv_w;
- roundedS = qfq_round_down(cl->S, grp->slot_shift);
- if (roundedS == grp->S)
- return 0;
-
- qfq_front_slot_remove(grp);
- qfq_slot_insert(grp, cl, roundedS);
- }
- return 1;
-}
-
-static struct mbuf *
-qfq_dequeue(struct dn_sch_inst *si)
-{
- struct qfq_sched *q = (struct qfq_sched *)(si + 1);
- struct qfq_group *grp;
- struct qfq_class *cl;
- struct mbuf *m;
- uint64_t old_V;
-
- NO(q->loops++;)
- if (!q->bitmaps[ER]) {
- NO(if (q->queued)
- dump_sched(q, "start dequeue");)
- return NULL;
- }
-
- grp = qfq_ffs(q, q->bitmaps[ER]);
-
- cl = grp->slots[grp->front];
- /* extract from the first bucket in the bucket list */
- m = dn_dequeue(&cl->_q);
-
- if (!m) {
- D("BUG/* non-workconserving leaf */");
- return NULL;
- }
- NO(q->queued--;)
- old_V = q->V;
- q->V += (uint64_t)m->m_pkthdr.len * IWSUM;
- ND("m is %p F 0x%llx V now 0x%llx", m, cl->F, q->V);
-
- if (qfq_update_class(q, grp, cl)) {
- uint64_t old_F = grp->F;
- cl = qfq_slot_scan(grp);
- if (!cl) { /* group gone, remove from ER */
- __clear_bit(grp->index, &q->bitmaps[ER]);
- // grp->S = grp->F + 1; // XXX debugging only
- } else {
- uint64_t roundedS = qfq_round_down(cl->S, grp->slot_shift);
- unsigned int s;
-
- if (grp->S == roundedS)
- goto skip_unblock;
- grp->S = roundedS;
- grp->F = roundedS + (2ULL << grp->slot_shift);
- /* remove from ER and put in the new set */
- __clear_bit(grp->index, &q->bitmaps[ER]);
- s = qfq_calc_state(q, grp);
- __set_bit(grp->index, &q->bitmaps[s]);
- }
- /* we need to unblock even if the group has gone away */
- qfq_unblock_groups(q, grp->index, old_F);
- }
-
-skip_unblock:
- qfq_update_eligible(q, old_V);
- NO(if (!q->bitmaps[ER] && q->queued)
- dump_sched(q, "end dequeue");)
-
- return m;
-}
-
-/*
- * Assign a reasonable start time for a new flow k in group i.
- * Admissible values for \hat(F) are multiples of \sigma_i
- * no greater than V+\sigma_i . Larger values mean that
- * we had a wraparound so we consider the timestamp to be stale.
- *
- * If F is not stale and F >= V then we set S = F.
- * Otherwise we should assign S = V, but this may violate
- * the ordering in ER. So, if we have groups in ER, set S to
- * the F_j of the first group j which would be blocking us.
- * We are guaranteed not to move S backward because
- * otherwise our group i would still be blocked.
- */
-static inline void
-qfq_update_start(struct qfq_sched *q, struct qfq_class *cl)
-{
- unsigned long mask;
- uint64_t limit, roundedF;
- int slot_shift = cl->grp->slot_shift;
-
- roundedF = qfq_round_down(cl->F, slot_shift);
- limit = qfq_round_down(q->V, slot_shift) + (1UL << slot_shift);
-
- if (!qfq_gt(cl->F, q->V) || qfq_gt(roundedF, limit)) {
- /* timestamp was stale */
- mask = mask_from(q->bitmaps[ER], cl->grp->index);
- if (mask) {
- struct qfq_group *next = qfq_ffs(q, mask);
- if (qfq_gt(roundedF, next->F)) {
- cl->S = next->F;
- return;
- }
- }
- cl->S = q->V;
- } else { /* timestamp is not stale */
- cl->S = cl->F;
- }
-}
-
-static int
-qfq_enqueue(struct dn_sch_inst *si, struct dn_queue *_q, struct mbuf *m)
-{
- struct qfq_sched *q = (struct qfq_sched *)(si + 1);
- struct qfq_group *grp;
- struct qfq_class *cl = (struct qfq_class *)_q;
- uint64_t roundedS;
- int s;
-
- NO(q->loops++;)
- DX(4, "len %d flow %p inv_w 0x%x grp %d", m->m_pkthdr.len,
- _q, cl->inv_w, cl->grp->index);
- /* XXX verify that the packet obeys the parameters */
- if (m != _q->mq.head) {
- if (dn_enqueue(_q, m, 0)) /* packet was dropped */
- return 1;
- NO(q->queued++;)
- if (m != _q->mq.head)
- return 0;
- }
- /* If reach this point, queue q was idle */
- grp = cl->grp;
- qfq_update_start(q, cl); /* adjust start time */
- /* compute new finish time and rounded start. */
- cl->F = cl->S + (uint64_t)(m->m_pkthdr.len) * cl->inv_w;
- roundedS = qfq_round_down(cl->S, grp->slot_shift);
-
- /*
- * insert cl in the correct bucket.
- * If cl->S >= grp->S we don't need to adjust the
- * bucket list and simply go to the insertion phase.
- * Otherwise grp->S is decreasing, we must make room
- * in the bucket list, and also recompute the group state.
- * Finally, if there were no flows in this group and nobody
- * was in ER make sure to adjust V.
- */
- if (grp->full_slots) {
- if (!qfq_gt(grp->S, cl->S))
- goto skip_update;
- /* create a slot for this cl->S */
- qfq_slot_rotate(q, grp, roundedS);
- /* group was surely ineligible, remove */
- __clear_bit(grp->index, &q->bitmaps[IR]);
- __clear_bit(grp->index, &q->bitmaps[IB]);
- } else if (!q->bitmaps[ER] && qfq_gt(roundedS, q->V))
- q->V = roundedS;
-
- grp->S = roundedS;
- grp->F = roundedS + (2ULL << grp->slot_shift); // i.e. 2\sigma_i
- s = qfq_calc_state(q, grp);
- __set_bit(grp->index, &q->bitmaps[s]);
- ND("new state %d 0x%x", s, q->bitmaps[s]);
- ND("S %llx F %llx V %llx", cl->S, cl->F, q->V);
-skip_update:
- qfq_slot_insert(grp, cl, roundedS);
-
- return 0;
-}
-
-
-#if 0
-static inline void
-qfq_slot_remove(struct qfq_sched *q, struct qfq_group *grp,
- struct qfq_class *cl, struct qfq_class **pprev)
-{
- unsigned int i, offset;
- uint64_t roundedS;
-
- roundedS = qfq_round_down(cl->S, grp->slot_shift);
- offset = (roundedS - grp->S) >> grp->slot_shift;
- i = (grp->front + offset) % QFQ_MAX_SLOTS;
-
-#ifdef notyet
- if (!pprev) {
- pprev = &grp->slots[i];
- while (*pprev && *pprev != cl)
- pprev = &(*pprev)->next;
- }
-#endif
-
- *pprev = cl->next;
- if (!grp->slots[i])
- __clear_bit(offset, &grp->full_slots);
-}
-
-/*
- * called to forcibly destroy a queue.
- * If the queue is not in the front bucket, or if it has
- * other queues in the front bucket, we can simply remove
- * the queue with no other side effects.
- * Otherwise we must propagate the event up.
- * XXX description to be completed.
- */
-static void
-qfq_deactivate_class(struct qfq_sched *q, struct qfq_class *cl,
- struct qfq_class **pprev)
-{
- struct qfq_group *grp = &q->groups[cl->index];
- unsigned long mask;
- uint64_t roundedS;
- int s;
-
- cl->F = cl->S; // not needed if the class goes away.
- qfq_slot_remove(q, grp, cl, pprev);
-
- if (!grp->full_slots) {
- /* nothing left in the group, remove from all sets.
- * Do ER last because if we were blocking other groups
- * we must unblock them.
- */
- __clear_bit(grp->index, &q->bitmaps[IR]);
- __clear_bit(grp->index, &q->bitmaps[EB]);
- __clear_bit(grp->index, &q->bitmaps[IB]);
-
- if (test_bit(grp->index, &q->bitmaps[ER]) &&
- !(q->bitmaps[ER] & ~((1UL << grp->index) - 1))) {
- mask = q->bitmaps[ER] & ((1UL << grp->index) - 1);
- if (mask)
- mask = ~((1UL << __fls(mask)) - 1);
- else
- mask = ~0UL;
- qfq_move_groups(q, mask, EB, ER);
- qfq_move_groups(q, mask, IB, IR);
- }
- __clear_bit(grp->index, &q->bitmaps[ER]);
- } else if (!grp->slots[grp->front]) {
- cl = qfq_slot_scan(grp);
- roundedS = qfq_round_down(cl->S, grp->slot_shift);
- if (grp->S != roundedS) {
- __clear_bit(grp->index, &q->bitmaps[ER]);
- __clear_bit(grp->index, &q->bitmaps[IR]);
- __clear_bit(grp->index, &q->bitmaps[EB]);
- __clear_bit(grp->index, &q->bitmaps[IB]);
- grp->S = roundedS;
- grp->F = roundedS + (2ULL << grp->slot_shift);
- s = qfq_calc_state(q, grp);
- __set_bit(grp->index, &q->bitmaps[s]);
- }
- }
- qfq_update_eligible(q, q->V);
-}
-#endif
-
-static int
-qfq_new_fsk(struct dn_fsk *f)
-{
- ipdn_bound_var(&f->fs.par[0], 1, 1, QFQ_MAX_WEIGHT, "qfq weight");
- ipdn_bound_var(&f->fs.par[1], 1500, 1, 2000, "qfq maxlen");
- ND("weight %d len %d\n", f->fs.par[0], f->fs.par[1]);
- return 0;
-}
-
-/*
- * initialize a new scheduler instance
- */
-static int
-qfq_new_sched(struct dn_sch_inst *si)
-{
- struct qfq_sched *q = (struct qfq_sched *)(si + 1);
- struct qfq_group *grp;
- int i;
-
- for (i = 0; i <= QFQ_MAX_INDEX; i++) {
- grp = &q->groups[i];
- grp->index = i;
- grp->slot_shift = QFQ_MTU_SHIFT + FRAC_BITS -
- (QFQ_MAX_INDEX - i);
- }
- return 0;
-}
-
-/*
- * QFQ scheduler descriptor
- */
-static struct dn_alg qfq_desc = {
- _SI( .type = ) DN_SCHED_QFQ,
- _SI( .name = ) "QFQ",
- _SI( .flags = ) DN_MULTIQUEUE,
-
- _SI( .schk_datalen = ) 0,
- _SI( .si_datalen = ) sizeof(struct qfq_sched),
- _SI( .q_datalen = ) sizeof(struct qfq_class) - sizeof(struct dn_queue),
-
- _SI( .enqueue = ) qfq_enqueue,
- _SI( .dequeue = ) qfq_dequeue,
-
- _SI( .config = ) NULL,
- _SI( .destroy = ) NULL,
- _SI( .new_sched = ) qfq_new_sched,
- _SI( .free_sched = ) NULL,
- _SI( .new_fsk = ) qfq_new_fsk,
- _SI( .free_fsk = ) NULL,
- _SI( .new_queue = ) qfq_new_queue,
- _SI( .free_queue = ) qfq_free_queue,
-};
-
-DECLARE_DNSCHED_MODULE(dn_qfq, &qfq_desc);
-
-#ifdef QFQ_DEBUG
-static void
-dump_groups(struct qfq_sched *q, uint32_t mask)
-{
- int i, j;
-
- for (i = 0; i < QFQ_MAX_INDEX + 1; i++) {
- struct qfq_group *g = &q->groups[i];
-
- if (0 == (mask & (1<<i)))
- continue;
- for (j = 0; j < QFQ_MAX_SLOTS; j++) {
- if (g->slots[j])
- D(" bucket %d %p", j, g->slots[j]);
- }
- D("full_slots 0x%x", g->full_slots);
- D(" %2d S 0x%20llx F 0x%llx %c", i,
- g->S, g->F,
- mask & (1<<i) ? '1' : '0');
- }
-}
-
-static void
-dump_sched(struct qfq_sched *q, const char *msg)
-{
- D("--- in %s: ---", msg);
- ND("loops %d queued %d V 0x%llx", q->loops, q->queued, q->V);
- D(" ER 0x%08x", q->bitmaps[ER]);
- D(" EB 0x%08x", q->bitmaps[EB]);
- D(" IR 0x%08x", q->bitmaps[IR]);
- D(" IB 0x%08x", q->bitmaps[IB]);
- dump_groups(q, 0xffffffff);
-};
-#endif /* QFQ_DEBUG */
diff --git a/sys/netinet/ipfw/dn_sched_rr.c b/sys/netinet/ipfw/dn_sched_rr.c
deleted file mode 100644
index 1bbd800..0000000
--- a/sys/netinet/ipfw/dn_sched_rr.c
+++ /dev/null
@@ -1,307 +0,0 @@
-/*
- * Copyright (c) 2010 Riccardo Panicucci, Universita` di Pisa
- * All rights reserved
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-/*
- * $FreeBSD$
- */
-
-#ifdef _KERNEL
-#include <sys/malloc.h>
-#include <sys/socket.h>
-#include <sys/socketvar.h>
-#include <sys/kernel.h>
-#include <sys/mbuf.h>
-#include <sys/module.h>
-#include <net/if.h> /* IFNAMSIZ */
-#include <netinet/in.h>
-#include <netinet/ip_var.h> /* ipfw_rule_ref */
-#include <netinet/ip_fw.h> /* flow_id */
-#include <netinet/ip_dummynet.h>
-#include <netinet/ipfw/dn_heap.h>
-#include <netinet/ipfw/ip_dn_private.h>
-#include <netinet/ipfw/dn_sched.h>
-#else
-#include <dn_test.h>
-#endif
-
-#define DN_SCHED_RR 3 // XXX Where?
-
-struct rr_queue {
- struct dn_queue q; /* Standard queue */
- int status; /* 1: queue is in the list */
- int credit; /* Number of bytes to transmit */
- int quantum; /* quantum * C */
- struct rr_queue *qnext; /* */
-};
-
-/* struct rr_schk contains global config parameters
- * and is right after dn_schk
- */
-struct rr_schk {
- int min_q; /* Min quantum */
- int max_q; /* Max quantum */
- int q_bytes; /* Bytes per quantum */
-};
-
-/* per-instance round robin list, right after dn_sch_inst */
-struct rr_si {
- struct rr_queue *head, *tail; /* Pointer to current queue */
-};
-
-/* Append a queue to the rr list */
-static inline void
-rr_append(struct rr_queue *q, struct rr_si *si)
-{
- q->status = 1; /* mark as in-rr_list */
- q->credit = q->quantum; /* initialize credit */
-
- /* append to the tail */
- if (si->head == NULL)
- si->head = q;
- else
- si->tail->qnext = q;
- si->tail = q; /* advance the tail pointer */
- q->qnext = si->head; /* make it circular */
-}
-
-/* Remove the head queue from circular list. */
-static inline void
-rr_remove_head(struct rr_si *si)
-{
- if (si->head == NULL)
- return; /* empty queue */
- si->head->status = 0;
-
- if (si->head == si->tail) {
- si->head = si->tail = NULL;
- return;
- }
-
- si->head = si->head->qnext;
- si->tail->qnext = si->head;
-}
-
-/* Remove a queue from circular list.
- * XXX see if ti can be merge with remove_queue()
- */
-static inline void
-remove_queue_q(struct rr_queue *q, struct rr_si *si)
-{
- struct rr_queue *prev;
-
- if (q->status != 1)
- return;
- if (q == si->head) {
- rr_remove_head(si);
- return;
- }
-
- for (prev = si->head; prev; prev = prev->qnext) {
- if (prev->qnext != q)
- continue;
- prev->qnext = q->qnext;
- if (q == si->tail)
- si->tail = prev;
- q->status = 0;
- break;
- }
-}
-
-
-static inline void
-next_pointer(struct rr_si *si)
-{
- if (si->head == NULL)
- return; /* empty queue */
-
- si->head = si->head->qnext;
- si->tail = si->tail->qnext;
-}
-
-static int
-rr_enqueue(struct dn_sch_inst *_si, struct dn_queue *q, struct mbuf *m)
-{
- struct rr_si *si;
- struct rr_queue *rrq;
-
- if (m != q->mq.head) {
- if (dn_enqueue(q, m, 0)) /* packet was dropped */
- return 1;
- if (m != q->mq.head)
- return 0;
- }
-
- /* If reach this point, queue q was idle */
- si = (struct rr_si *)(_si + 1);
- rrq = (struct rr_queue *)q;
-
- if (rrq->status == 1) /* Queue is already in the queue list */
- return 0;
-
- /* Insert the queue in the queue list */
- rr_append(rrq, si);
-
- return 0;
-}
-
-static struct mbuf *
-rr_dequeue(struct dn_sch_inst *_si)
-{
- /* Access scheduler instance private data */
- struct rr_si *si = (struct rr_si *)(_si + 1);
- struct rr_queue *rrq;
- uint64_t len;
-
- while ( (rrq = si->head) ) {
- struct mbuf *m = rrq->q.mq.head;
- if ( m == NULL) {
- /* empty queue, remove from list */
- rr_remove_head(si);
- continue;
- }
- len = m->m_pkthdr.len;
-
- if (len > rrq->credit) {
- /* Packet too big */
- rrq->credit += rrq->quantum;
- /* Try next queue */
- next_pointer(si);
- } else {
- rrq->credit -= len;
- return dn_dequeue(&rrq->q);
- }
- }
-
- /* no packet to dequeue*/
- return NULL;
-}
-
-static int
-rr_config(struct dn_schk *_schk)
-{
- struct rr_schk *schk = (struct rr_schk *)(_schk + 1);
- ND("called");
-
- /* use reasonable quantums (64..2k bytes, default 1500) */
- schk->min_q = 64;
- schk->max_q = 2048;
- schk->q_bytes = 1500; /* quantum */
-
- return 0;
-}
-
-static int
-rr_new_sched(struct dn_sch_inst *_si)
-{
- struct rr_si *si = (struct rr_si *)(_si + 1);
-
- ND("called");
- si->head = si->tail = NULL;
-
- return 0;
-}
-
-static int
-rr_free_sched(struct dn_sch_inst *_si)
-{
- ND("called");
- /* Nothing to do? */
- return 0;
-}
-
-static int
-rr_new_fsk(struct dn_fsk *fs)
-{
- struct rr_schk *schk = (struct rr_schk *)(fs->sched + 1);
- /* par[0] is the weight, par[1] is the quantum step */
- ipdn_bound_var(&fs->fs.par[0], 1,
- 1, 65536, "RR weight");
- ipdn_bound_var(&fs->fs.par[1], schk->q_bytes,
- schk->min_q, schk->max_q, "RR quantum");
- return 0;
-}
-
-static int
-rr_new_queue(struct dn_queue *_q)
-{
- struct rr_queue *q = (struct rr_queue *)_q;
-
- _q->ni.oid.subtype = DN_SCHED_RR;
-
- q->quantum = _q->fs->fs.par[0] * _q->fs->fs.par[1];
- ND("called, q->quantum %d", q->quantum);
- q->credit = q->quantum;
- q->status = 0;
-
- if (_q->mq.head != NULL) {
- /* Queue NOT empty, insert in the queue list */
- rr_append(q, (struct rr_si *)(_q->_si + 1));
- }
- return 0;
-}
-
-static int
-rr_free_queue(struct dn_queue *_q)
-{
- struct rr_queue *q = (struct rr_queue *)_q;
-
- ND("called");
- if (q->status == 1) {
- struct rr_si *si = (struct rr_si *)(_q->_si + 1);
- remove_queue_q(q, si);
- }
- return 0;
-}
-
-/*
- * RR scheduler descriptor
- * contains the type of the scheduler, the name, the size of the
- * structures and function pointers.
- */
-static struct dn_alg rr_desc = {
- _SI( .type = ) DN_SCHED_RR,
- _SI( .name = ) "RR",
- _SI( .flags = ) DN_MULTIQUEUE,
-
- _SI( .schk_datalen = ) 0,
- _SI( .si_datalen = ) sizeof(struct rr_si),
- _SI( .q_datalen = ) sizeof(struct rr_queue) - sizeof(struct dn_queue),
-
- _SI( .enqueue = ) rr_enqueue,
- _SI( .dequeue = ) rr_dequeue,
-
- _SI( .config = ) rr_config,
- _SI( .destroy = ) NULL,
- _SI( .new_sched = ) rr_new_sched,
- _SI( .free_sched = ) rr_free_sched,
- _SI( .new_fsk = ) rr_new_fsk,
- _SI( .free_fsk = ) NULL,
- _SI( .new_queue = ) rr_new_queue,
- _SI( .free_queue = ) rr_free_queue,
-};
-
-
-DECLARE_DNSCHED_MODULE(dn_rr, &rr_desc);
diff --git a/sys/netinet/ipfw/dn_sched_wf2q.c b/sys/netinet/ipfw/dn_sched_wf2q.c
deleted file mode 100644
index 7f16719..0000000
--- a/sys/netinet/ipfw/dn_sched_wf2q.c
+++ /dev/null
@@ -1,373 +0,0 @@
-/*
- * Copyright (c) 2010 Riccardo Panicucci, Universita` di Pisa
- * Copyright (c) 2000-2002 Luigi Rizzo, Universita` di Pisa
- * All rights reserved
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-/*
- * $FreeBSD$
- */
-
-#ifdef _KERNEL
-#include <sys/malloc.h>
-#include <sys/socket.h>
-#include <sys/socketvar.h>
-#include <sys/kernel.h>
-#include <sys/mbuf.h>
-#include <sys/module.h>
-#include <net/if.h> /* IFNAMSIZ */
-#include <netinet/in.h>
-#include <netinet/ip_var.h> /* ipfw_rule_ref */
-#include <netinet/ip_fw.h> /* flow_id */
-#include <netinet/ip_dummynet.h>
-#include <netinet/ipfw/dn_heap.h>
-#include <netinet/ipfw/ip_dn_private.h>
-#include <netinet/ipfw/dn_sched.h>
-#else
-#include <dn_test.h>
-#endif
-
-#ifndef MAX64
-#define MAX64(x,y) (( (int64_t) ( (y)-(x) )) > 0 ) ? (y) : (x)
-#endif
-
-/*
- * timestamps are computed on 64 bit using fixed point arithmetic.
- * LMAX_BITS, WMAX_BITS are the max number of bits for the packet len
- * and sum of weights, respectively. FRAC_BITS is the number of
- * fractional bits. We want FRAC_BITS >> WMAX_BITS to avoid too large
- * errors when computing the inverse, FRAC_BITS < 32 so we can do 1/w
- * using an unsigned 32-bit division, and to avoid wraparounds we need
- * LMAX_BITS + WMAX_BITS + FRAC_BITS << 64
- * As an example
- * FRAC_BITS = 26, LMAX_BITS=14, WMAX_BITS = 19
- */
-#ifndef FRAC_BITS
-#define FRAC_BITS 28 /* shift for fixed point arithmetic */
-#define ONE_FP (1UL << FRAC_BITS)
-#endif
-
-/*
- * Private information for the scheduler instance:
- * sch_heap (key is Finish time) returns the next queue to serve
- * ne_heap (key is Start time) stores not-eligible queues
- * idle_heap (key=start/finish time) stores idle flows. It must
- * support extract-from-middle.
- * A flow is only in 1 of the three heaps.
- * XXX todo: use a more efficient data structure, e.g. a tree sorted
- * by F with min_subtree(S) in each node
- */
-struct wf2qp_si {
- struct dn_heap sch_heap; /* top extract - key Finish time */
- struct dn_heap ne_heap; /* top extract - key Start time */
- struct dn_heap idle_heap; /* random extract - key Start=Finish time */
- uint64_t V; /* virtual time */
- uint32_t inv_wsum; /* inverse of sum of weights */
- uint32_t wsum; /* sum of weights */
-};
-
-struct wf2qp_queue {
- struct dn_queue _q;
- uint64_t S, F; /* start time, finish time */
- uint32_t inv_w; /* ONE_FP / weight */
- int32_t heap_pos; /* position (index) of struct in heap */
-};
-
-/*
- * This file implements a WF2Q+ scheduler as it has been in dummynet
- * since 2000.
- * The scheduler supports per-flow queues and has O(log N) complexity.
- *
- * WF2Q+ needs to drain entries from the idle heap so that we
- * can keep the sum of weights up to date. We can do it whenever
- * we get a chance, or periodically, or following some other
- * strategy. The function idle_check() drains at most N elements
- * from the idle heap.
- */
-static void
-idle_check(struct wf2qp_si *si, int n, int force)
-{
- struct dn_heap *h = &si->idle_heap;
- while (n-- > 0 && h->elements > 0 &&
- (force || DN_KEY_LT(HEAP_TOP(h)->key, si->V))) {
- struct dn_queue *q = HEAP_TOP(h)->object;
- struct wf2qp_queue *alg_fq = (struct wf2qp_queue *)q;
-
- heap_extract(h, NULL);
- /* XXX to let the flowset delete the queue we should
- * mark it as 'unused' by the scheduler.
- */
- alg_fq->S = alg_fq->F + 1; /* Mark timestamp as invalid. */
- si->wsum -= q->fs->fs.par[0]; /* adjust sum of weights */
- if (si->wsum > 0)
- si->inv_wsum = ONE_FP/si->wsum;
- }
-}
-
-static int
-wf2qp_enqueue(struct dn_sch_inst *_si, struct dn_queue *q, struct mbuf *m)
-{
- struct dn_fsk *fs = q->fs;
- struct wf2qp_si *si = (struct wf2qp_si *)(_si + 1);
- struct wf2qp_queue *alg_fq;
- uint64_t len = m->m_pkthdr.len;
-
- if (m != q->mq.head) {
- if (dn_enqueue(q, m, 0)) /* packet was dropped */
- return 1;
- if (m != q->mq.head) /* queue was already busy */
- return 0;
- }
-
- /* If reach this point, queue q was idle */
- alg_fq = (struct wf2qp_queue *)q;
-
- if (DN_KEY_LT(alg_fq->F, alg_fq->S)) {
- /* F<S means timestamps are invalid ->brand new queue. */
- alg_fq->S = si->V; /* init start time */
- si->wsum += fs->fs.par[0]; /* add weight of new queue. */
- si->inv_wsum = ONE_FP/si->wsum;
- } else { /* if it was idle then it was in the idle heap */
- heap_extract(&si->idle_heap, q);
- alg_fq->S = MAX64(alg_fq->F, si->V); /* compute new S */
- }
- alg_fq->F = alg_fq->S + len * alg_fq->inv_w;
-
- /* if nothing is backlogged, make sure this flow is eligible */
- if (si->ne_heap.elements == 0 && si->sch_heap.elements == 0)
- si->V = MAX64(alg_fq->S, si->V);
-
- /*
- * Look at eligibility. A flow is not eligibile if S>V (when
- * this happens, it means that there is some other flow already
- * scheduled for the same pipe, so the sch_heap cannot be
- * empty). If the flow is not eligible we just store it in the
- * ne_heap. Otherwise, we store in the sch_heap.
- * Note that for all flows in sch_heap (SCH), S_i <= V,
- * and for all flows in ne_heap (NEH), S_i > V.
- * So when we need to compute max(V, min(S_i)) forall i in
- * SCH+NEH, we only need to look into NEH.
- */
- if (DN_KEY_LT(si->V, alg_fq->S)) {
- /* S>V means flow Not eligible. */
- if (si->sch_heap.elements == 0)
- D("++ ouch! not eligible but empty scheduler!");
- heap_insert(&si->ne_heap, alg_fq->S, q);
- } else {
- heap_insert(&si->sch_heap, alg_fq->F, q);
- }
- return 0;
-}
-
-/* XXX invariant: sch > 0 || V >= min(S in neh) */
-static struct mbuf *
-wf2qp_dequeue(struct dn_sch_inst *_si)
-{
- /* Access scheduler instance private data */
- struct wf2qp_si *si = (struct wf2qp_si *)(_si + 1);
- struct mbuf *m;
- struct dn_queue *q;
- struct dn_heap *sch = &si->sch_heap;
- struct dn_heap *neh = &si->ne_heap;
- struct wf2qp_queue *alg_fq;
-
- if (sch->elements == 0 && neh->elements == 0) {
- /* we have nothing to do. We could kill the idle heap
- * altogether and reset V
- */
- idle_check(si, 0x7fffffff, 1);
- si->V = 0;
- si->wsum = 0; /* should be set already */
- return NULL; /* quick return if nothing to do */
- }
- idle_check(si, 1, 0); /* drain something from the idle heap */
-
- /* make sure at least one element is eligible, bumping V
- * and moving entries that have become eligible.
- * We need to repeat the first part twice, before and
- * after extracting the candidate, or enqueue() will
- * find the data structure in a wrong state.
- */
- m = NULL;
- for(;;) {
- /*
- * Compute V = max(V, min(S_i)). Remember that all elements
- * in sch have by definition S_i <= V so if sch is not empty,
- * V is surely the max and we must not update it. Conversely,
- * if sch is empty we only need to look at neh.
- * We don't need to move the queues, as it will be done at the
- * next enqueue
- */
- if (sch->elements == 0 && neh->elements > 0) {
- si->V = MAX64(si->V, HEAP_TOP(neh)->key);
- }
- while (neh->elements > 0 &&
- DN_KEY_LEQ(HEAP_TOP(neh)->key, si->V)) {
- q = HEAP_TOP(neh)->object;
- alg_fq = (struct wf2qp_queue *)q;
- heap_extract(neh, NULL);
- heap_insert(sch, alg_fq->F, q);
- }
- if (m) /* pkt found in previous iteration */
- break;
- /* ok we have at least one eligible pkt */
- q = HEAP_TOP(sch)->object;
- alg_fq = (struct wf2qp_queue *)q;
- m = dn_dequeue(q);
- heap_extract(sch, NULL); /* Remove queue from heap. */
- si->V += (uint64_t)(m->m_pkthdr.len) * si->inv_wsum;
- alg_fq->S = alg_fq->F; /* Update start time. */
- if (q->mq.head == 0) { /* not backlogged any more. */
- heap_insert(&si->idle_heap, alg_fq->F, q);
- } else { /* Still backlogged. */
- /* Update F, store in neh or sch */
- uint64_t len = q->mq.head->m_pkthdr.len;
- alg_fq->F += len * alg_fq->inv_w;
- if (DN_KEY_LEQ(alg_fq->S, si->V)) {
- heap_insert(sch, alg_fq->F, q);
- } else {
- heap_insert(neh, alg_fq->S, q);
- }
- }
- }
- return m;
-}
-
-static int
-wf2qp_new_sched(struct dn_sch_inst *_si)
-{
- struct wf2qp_si *si = (struct wf2qp_si *)(_si + 1);
- int ofs = offsetof(struct wf2qp_queue, heap_pos);
-
- /* all heaps support extract from middle */
- if (heap_init(&si->idle_heap, 16, ofs) ||
- heap_init(&si->sch_heap, 16, ofs) ||
- heap_init(&si->ne_heap, 16, ofs)) {
- heap_free(&si->ne_heap);
- heap_free(&si->sch_heap);
- heap_free(&si->idle_heap);
- return ENOMEM;
- }
- return 0;
-}
-
-static int
-wf2qp_free_sched(struct dn_sch_inst *_si)
-{
- struct wf2qp_si *si = (struct wf2qp_si *)(_si + 1);
-
- heap_free(&si->sch_heap);
- heap_free(&si->ne_heap);
- heap_free(&si->idle_heap);
-
- return 0;
-}
-
-static int
-wf2qp_new_fsk(struct dn_fsk *fs)
-{
- ipdn_bound_var(&fs->fs.par[0], 1,
- 1, 100, "WF2Q+ weight");
- return 0;
-}
-
-static int
-wf2qp_new_queue(struct dn_queue *_q)
-{
- struct wf2qp_queue *q = (struct wf2qp_queue *)_q;
-
- _q->ni.oid.subtype = DN_SCHED_WF2QP;
- q->F = 0; /* not strictly necessary */
- q->S = q->F + 1; /* mark timestamp as invalid. */
- q->inv_w = ONE_FP / _q->fs->fs.par[0];
- if (_q->mq.head != NULL) {
- wf2qp_enqueue(_q->_si, _q, _q->mq.head);
- }
- return 0;
-}
-
-/*
- * Called when the infrastructure removes a queue (e.g. flowset
- * is reconfigured). Nothing to do if we did not 'own' the queue,
- * otherwise remove it from the right heap and adjust the sum
- * of weights.
- */
-static int
-wf2qp_free_queue(struct dn_queue *q)
-{
- struct wf2qp_queue *alg_fq = (struct wf2qp_queue *)q;
- struct wf2qp_si *si = (struct wf2qp_si *)(q->_si + 1);
-
- if (alg_fq->S >= alg_fq->F + 1)
- return 0; /* nothing to do, not in any heap */
- si->wsum -= q->fs->fs.par[0];
- if (si->wsum > 0)
- si->inv_wsum = ONE_FP/si->wsum;
-
- /* extract from the heap. XXX TODO we may need to adjust V
- * to make sure the invariants hold.
- */
- if (q->mq.head == NULL) {
- heap_extract(&si->idle_heap, q);
- } else if (DN_KEY_LT(si->V, alg_fq->S)) {
- heap_extract(&si->ne_heap, q);
- } else {
- heap_extract(&si->sch_heap, q);
- }
- return 0;
-}
-
-/*
- * WF2Q+ scheduler descriptor
- * contains the type of the scheduler, the name, the size of the
- * structures and function pointers.
- */
-static struct dn_alg wf2qp_desc = {
- _SI( .type = ) DN_SCHED_WF2QP,
- _SI( .name = ) "WF2Q+",
- _SI( .flags = ) DN_MULTIQUEUE,
-
- /* we need extra space in the si and the queue */
- _SI( .schk_datalen = ) 0,
- _SI( .si_datalen = ) sizeof(struct wf2qp_si),
- _SI( .q_datalen = ) sizeof(struct wf2qp_queue) -
- sizeof(struct dn_queue),
-
- _SI( .enqueue = ) wf2qp_enqueue,
- _SI( .dequeue = ) wf2qp_dequeue,
-
- _SI( .config = ) NULL,
- _SI( .destroy = ) NULL,
- _SI( .new_sched = ) wf2qp_new_sched,
- _SI( .free_sched = ) wf2qp_free_sched,
-
- _SI( .new_fsk = ) wf2qp_new_fsk,
- _SI( .free_fsk = ) NULL,
-
- _SI( .new_queue = ) wf2qp_new_queue,
- _SI( .free_queue = ) wf2qp_free_queue,
-};
-
-
-DECLARE_DNSCHED_MODULE(dn_wf2qp, &wf2qp_desc);
diff --git a/sys/netinet/ipfw/dummynet.txt b/sys/netinet/ipfw/dummynet.txt
deleted file mode 100644
index e8c9725..0000000
--- a/sys/netinet/ipfw/dummynet.txt
+++ /dev/null
@@ -1,860 +0,0 @@
-#
-# $FreeBSD$
-#
-
-Notes on the internal structure of dummynet (2010 version)
-by Riccardo Panicucci and Luigi Rizzo
-Work supported by the EC project ONELAB2
-
-
-*********
-* INDEX *
-*********
-Implementation of new dummynet
- Internal structure
- Files
-Packet arrival
- The reconfiguration routine
-dummynet_task()
-Configuration
- Add a pipe
- Add a scheduler
- Add a flowset
-Listing object
-Delete of object
- Delete a pipe
- Delete a flowset
- Delete a scheduler
-Compatibility with FreeBSD7.2 and FreeBSD 8 ipfw binary
- ip_dummynet_glue.c
- ip_fw_glue.c
-How to configure dummynet
-How to implement a new scheduler
-
-
-
-OPEN ISSUES
-------------------------------
-20100131 deleting RR causes infinite loop
- presumably in the rr_free_queue() call -- seems to hang
- forever when deleting a live flow
-------------------------------
-
-Dummynet is a traffic shaper and network emulator. Packets are
-selected by an external filter such as ipfw, and passed to the emulator
-with a tag such as "pipe 10" or "queue 5" which tells what to
-do with the packet. As an example
-
- ipfw add queue 5 icmp from 10.0.0.2 to all
-
-All packets with the same tag belong to a "flowset", or a set
-of flows which can be further partitioned according to a mask.
-Flowsets are then passed to a scheduler for processing. The
-association of flowsets and schedulers is configurable e.g.
-
- ipfw queue 5 config sched 10 weight 3 flow_mask xxxx
- ipfw queue 8 config sched 10 weight 1 ...
- ipfw queue 3 config sched 20 weight 1 ...
-
-"sched 10" represents one or more scheduler instances,
-selected through a mask on the 5-tuple itself.
-
- ipfw sched 20 config type FIFO sched_mask yyy ...
-
-There are in fact two masks applied to each packet:
-+ the "sched_mask" sends packets arriving to a scheduler_id to
- one of many instances.
-+ the "flow_mask" together with the flowset_id is used to
- collect packets into independent flows on each scheduler.
-
-As an example, we can have
- ipfw queue 5 config sched 10 flow_mask src-ip 0x000000ff
- ipfw sched 10 config type WF2Q+ sched_mask src-ip 0xffffff00
-
-means that sched 10 will have one instance per /24 source subnet,
-and within that, each individual source will be a flow.
-
-Internal structure
------------------
-Dummynet-related data is split into several data structures,
-part of them constituting the userland-kernel API, and others
-specific to the kernel.
-NOTE: for up-to-date details please look at the relevant source
- headers (ip_dummynet.h, ip_dn_private.h, dn_sched.h)
-
-USERLAND-KERNEL API (ip_dummynet.h)
-
- struct dn_link:
- contains data about the physical link such as
- bandwith, delay, burst size;
-
- struct dn_fs:
- describes a flowset, i.e. a template for queues.
- Main parameters are the scheduler we attach to, a flow_mask,
- buckets, queue size, plr, weight, and other scheduler-specific
- parameters.
-
- struct dn_flow
- contains information on a flow, including masks and
- statistics
-
- struct dn_sch:
- defines a scheduler (and a link attached to it).
- Parameters include scheduler type, sched_mask, number of
- buckets, and possibly other scheduler-specific parameters,
-
- struct dn_profile:
- fields to simulate a delay profile
-
-
-KERNEL REPRESENTATION (ip_dn_private.h)
-
- struct mq
- a queue of mbufs with head and tail.
-
- struct dn_queue
- individual queue of packets, created by a flowset using
- flow_mask and attached to a scheduler instance selected
- through sched_mask.
- A dn_queue has a pointer to the dn_fsk (which in turn counts
- how many queues point to it), a pointer to the
- dn_sch_inst it attaches to, and is in a hash table in the
- flowset. scheduler instances also should store queues in
- their own containers used for scheduling (lists, trees, etc.)
- CREATE: done on packet arrivals when a flow matches a flowset.
- DELETE: done only when deleting the parent dn_sch_inst
- or draining memory.
-
- struct dn_fsk
- includes a dn_fs; a pointer to the dn_schk; a link field
- for the list of dn_fsk attached to the same scheduler,
- or for the unlinked list;
- a refcount for the number of queues pointing to it;
- The dn_fsk is in a hash table, fshash.
- CREATE: done on configuration commands.
- DELETE: on configuration commands.
-
- struct dn_sch_inst
- a scheduler instance, created from a dn_schk applying sched_mask.
- Contains a delay line, a reference to the parent, and scheduler-
- specific info. Both dn_sch_inst and its delay line can be in the
- evheap if they have events to be processed.
- CREATE: created from a dn_schk applying sched_mask
- DELETE: configuration command delete a scheduler which in turn
- sweeps the hash table of instances deleting them
-
- struct dn_schk
- includes dn_sch, dn_link, a pointer to dn_profile,
- a hash table of dn_sch_inst, a list of dn_fsk
- attached to it.
- CREATE: configuration command. If there are flowsets that
- refer to this number, they are attached and moved
- to the hash table
- DELETE: manual, see dn_sch_inst
-
-
- fshash schedhash
- +---------------+ sched +--------------+
- | sched-------------------->| NEW_SCHK|
- -<----*sch_chain |<-----------------*fsk_list |
- |NEW_FSK |<----. | [dn_link] |
- +---------------+ | +--------------+
- |qht (hash) | | | siht(hash) |
- | [dn_queue] | | | [dn_si] |
- | [dn_queue] | | | [dn_si] |
- | ... | | | ... |
- | +--------+ | | | +---------+ |
- | |dn_queue| | | | |dn_si | |
- | | fs *----------' | | | |
- | | si *---------------------->| | |
- | +---------+ | | +---------+ |
- +---------------+ +--------------+
-
-The following global data structures contain all
-schedulers and flowsets.
-
-- schedhash[x]: contains all scheduler templates in the system.
- Looked up only on manual configurations, where flowsets
- are attached to matching schedulers.
- We have one entry per 'sched X config' command
- (plus one for each 'pipe X config').
-
-- fshash[x]: contains all flowsets.
- We do a lookup on this for each packet.
- We have one entry for each 'queue X config'
- (plus one for each 'pipe X config').
-
-Additionally, a list that contains all unlinked flowset:
-- fsu: contains flowset that are not linked with any scheduler.
- Flowset are put in this list when they refer to a non
- existing scheduler.
- We don't need an efficient data structure as we never search
- here on a packet arrivals.
-
-Scheduler instances and the delay lines associated with each scheduler
-instance need to be woken up at certain times. Because we have many
-such objects, we keep them in a priority heap (system_heap).
-
-Almost all objects in this implementation are preceded by a structure
-(struct dn_id) which makes it easier to identify them.
-
-
-Files
------
-The dummynet code is split in several files.
-All kernel code is in sys/netinet/ipfw except ip_dummynet.h
-All userland code is in sbin/ipfw.
-Files are
-- sys/netinet/ip_dummynet.h defines the kernel-userland API
-- ip_dn_private.h contains the kernel-specific APIs
- and data structures
-- dn_sched.h defines the scheduler API
-- ip_dummynet.c cointains module glue and sockopt handlers, with all
- functions to configure and list objects.
-- ip_dn_io.c contains the functions directly related to packet processing,
- and run in the critical path. It also contains some functions
- exported to the schedulers.
-- dn_heap.[ch] implement a binary heap and a generic hash table
-- dn_sched_* implement the various scheduler modules
-
-- dummynet.c is the file used to implement the user side of dummynet.
- It contains the function to parsing command line, and functions to
- show the output of dummynet objects.
-Moreover, there are two new file (ip_dummynet_glue.c and ip_fw_glue.c) that
-are used to allow compatibility with the "ipfw" binary from FreeBSD 7.2 and
-FreeBSD 8.
-
-LOCKING
-=======
-At the moment the entire processing occurs under a single lock
-which is expected to be acquired in exclusive mode
-DN_BH_WLOCK() / DN_BH_WUNLOCK().
-
-In perspective we aim at the following:
-- the 'busy' flag, 'pending' list and all structures modified by packet
- arrivals and departures are protected by the BH_WLOCK.
- This is normally acquired in exclusive mode by the packet processing
- functions for short sections of code (exception -- the timer).
- If 'busy' is not set, we can do regular packet processing.
- If 'busy' is set, no pieces can be accessed.
- We must enqueue the packet on 'pending' and return immediately.
-
-- the 'busy' flag is set/cleared by long sections of code as follows:
- UH_WLOCK(); KASSERT(busy == 0);
- BH_WLOCK(); busy=1; BH_WUNLOCK();
- ... do processing ...
- BH_WLOCK(); busy=0; drain_queue(pending); BH_WUNLOCK();
- UH_WUNLOCK();
- this normally happens when the upper half has something heavy
- to do. The prologue and epilogue are not in the critical path.
-
-- the main containers (fshash, schedhash, ...) are protected by
- UH_WLOCK.
-
-Packet processing
-=================
-A packet enters dummynet through dummynet_io(). We first lookup
-the flowset number in fshash using dn_ht_find(), then find the scheduler
-instance using ipdn_si_find(), then possibly identify the correct
-queue with ipdn_q_find().
-If successful, we call the scheduler's enqueue function(), and
-if needed start I/O on the link calling serve_sched().
-If the packet can be returned immediately, this is done by
-leaving *m0 set. Otherwise, the packet is absorbed by dummynet
-and we simply return, possibly with some appropriate error code.
-
-Reconfiguration
----------------
-Reconfiguration is the complex part of the system because we need to
-keep track of the various objects and containers.
-At the moment we do not use reference counts for objects so all
-processing must be done under a lock.
-
-The main entry points for configuration is the ip_dn_ctl() handler
-for the IP_DUMMYNET3 sockopt (others are provided only for backward
-compatibility). Modifications to the configuration call do_config().
-The argument is a sequence of blocks each starting with a struct dn_id
-which specifies its content.
-The first dn_id must contain as obj.id the DN_API_VERSION
-The obj.type is DN_CMD_CONFIG (followed by actual objects),
-DN_CMD_DELETE (with the correct subtype and list of objects), or
-DN_CMD_FLUSH.
-
-DN_CMD_CONFIG is followed by objects to add/reconfigure. In general,
-if an object already exists it is reconfigured, otherwise it is
-created in a way that keeps the structure consistent.
-We have the following objects in the system, normally numbered with
-an identifier N between 1 and 65535. For certain objects we have
-"shadow" copies numbered I+NMAX and I+ 2*NMAX which are used to
-implement certain backward compatibility features.
-
-In general we have the following linking
-
- TRADITIONAL DUMMYNET QUEUES "queue N config ... pipe M ..."
- corresponds to a dn_fs object numbered N
-
- TRADITIONAL DUMMYNET PIPES "pipe N config ..."
- dn_fs N+2*NMAX --> dn_sch N+NMAX type FIFO --> dn_link N+NMAX
-
- GENERIC SCHEDULER "sched N config ... "
- [dn_fs N+NMAX] --> dn_sch N --> dn_link N
- The flowset N+NMAX is created only if the scheduler is not
- of type MULTIQUEUE.
-
- DELAY PROFILE "pipe N config profile ..."
- it is always attached to an existing dn_link N
-
-Because traditional dummynet pipes actually configure both a
-'standalone' instance and one that can be used by queues,
-we do the following:
-
- "pipe N config ..." configures:
- dn_sched N type WF2Q+
- dn_sched N+NMAX type FIFO
- dn_fs N+2NMAX attached to dn_sched N+NMAX
- dn_pipe N
- dn_pipe N+NMAX
-
- "queue N config" configures
- dn_fs N
-
- "sched N config" configures
- dn_sched N type as desired
- dn_fs N+NMAX attached to dn_sched N
-
-
-dummynet_task()
-===============
-The dummynet_task() function is the main dummynet processing function and is
-called every tick. This function first calculate the new current time, then
-it checks if it is the time to wake up object from the system_heap comparing
-the current time and the key of the heap. Two types of object (really the
-heap contains pointer to objects) are in the
-system_heap:
-
-- scheduler instance: if a scheduler instance is waked up, the dequeue()
- function is called until it has credit. If the dequeue() returns packets,
- the scheduler instance is inserted in the heap with a new key depending of
- the data that will be send out. If the scheduler instance remains with
- some credit, it means that is hasn't other packet to send and so the
- instance is no longer inserted in the heap.
-
- If the scheduler instance extracted from the heap has the DELETE flag set,
- the dequeue() is not called and the instance is destroyed now.
-
-- delay line: when extracting a delay line, the function transmit_event() is
- called to send out packet from delay line.
-
- If the scheduler instance associated with this delay line doesn't exists,
- the delay line will be delete now.
-
-Configuration
-=============
-To create a pipe, queue or scheduler, the user should type commands like:
-"ipfw pipe x config"
-"ipfw queue y config pipe x"
-"ipfw pipe x config sched <type>"
-
-The userland side of dummynet will prepare a buffer contains data to pass to
-kernel side.
-The buffer contains all struct needed to configure an object. In more detail,
-to configure a pipe all three structs (dn_link, dn_sch, dn_fs) are needed,
-plus the delay profile struct if the pipe has a delay profile.
-
-If configuring a scheduler only the struct dn_sch is wrote in the buffer,
-while if configuring a flowset only the dn_fs struct is wrote.
-
-The first struct in the buffer contains the type of command request, that is
-if it is configuring a pipe, a queue, or a scheduler. Then there are structs
-need to configure the object, and finally there is the struct that mark
-the end of the buffer.
-
-To support the insertion of pipe and queue using the old syntax, when adding
-a pipe it's necessary to create a FIFO flowset and a FIFO scheduler, which
-have a number x + DN_PIPEOFFSET.
-
-Add a pipe
-----------
-A pipe is only a template for a link.
-If the pipe already exists, parameters are updated. If a delay profile exists
-it is deleted and a new one is created.
-If the pipe doesn't exist a new one is created. After the creation, the
-flowset unlinked list is scanned to see if there are some flowset that would
-be linked with this pipe. If so, these flowset will be of wf2q+ type (for
-compatibility) and a new wf2q+ scheduler is created now.
-
-Add a scheduler
----------------
-If the scheduler already exists, and the type and the mask are the same, the
-scheduler is simply reconfigured calling the config_scheduler() scheduler
-function with the RECONFIGURE flag active.
-If the type or the mask differ, it is necessary to delete the old scheduler
-and create a new one.
-If the scheduler doesn't exists, a new one is created. If the scheduler has
-a mask, the hash table is created to store pointers to scheduler instances.
-When a new scheduler is created, it is necessary to scan the unlinked
-flowset list to search eventually flowset that would be linked with this
-scheduler number. If some are found, flowsets became of the type of this
-scheduler and they are configured properly.
-
-Add a flowset
--------------
-Flowset pointers are store in the system in two list. The unlinked flowset list
-contains all flowset that aren't linked with a scheduler, the flowset list
-contains flowset linked to a scheduler, and so they have a type.
-When adding a new flowset, first it is checked if the flowset exists (that is,
-it is in the flowset list) and if it doesn't exists a new flowset is created
-and added to unlinked flowset list if the scheduler which the flowset would be
-linked doesn't exists, or added in the flowset list and configured properly if
-the scheduler exists. If the flowset (before to be created) was in the
-unlinked flowset list, it is removed and deleted, and then recreated.
-If the flowset exists, to allow reconfiguration of this flowset, the
-scheduler number and types must match with the one in memory. If this isn't
-so, the flowset is deleted and a new one will be created. Really, the flowset
-it isn't deleted now, but it is removed from flowset list and it will be
-deleted later because there could be some queues that are using it.
-
-Listing of object
-=================
-The user can request a list of object present in dummynet through the command
-"ipfw [-v] pipe|queue [x] list|show"
-The kernel side of dummynet send a buffer to user side that contains all
-pipe, all scheduler, all flowset, plus all scheduler instances and all queues.
-The dummynet user land will format the output and show only the relevant
-information.
-The buffer sent start with all pipe from the system. The entire struct dn_link
-is passed, except the delay_profile struct that is useless in user space.
-After pipes, all flowset are wrote in the buffer. The struct contains
-scheduler flowset specific data is linked with the flowset writing the
-'obj' id of the extension into the 'alg_fs' pointer.
-Then schedulers are wrote. If a scheduler has one or more scheduler instance,
-these are linked to the parent scheduler writing the id of the parent in the
-'ptr_sched' pointer. If a scheduler instance has queues, there are wrote in
-the buffer and linked thorugh the 'obj' and 'sched_inst' pointer.
-Finally, flowsets in the unlinked flowset list are write in the buffer, and
-then a struct gen in saved in the buffer to mark the last struct in the buffer.
-
-
-Delete of object
-================
-An object is usually removed by user through a command like
-"ipfw pipe|queue x delete". XXX sched?
-ipfw pass to the kernel a struct gen that contains the type and the number
-of the object to remove
-
-Delete of pipe x
-----------------
-A pipe can be deleted by the user throught the command 'ipfw pipe x delete'.
-To delete a pipe, the pipe is removed from the pipe list, and then deleted.
-Also the scheduler associated with this pipe should be deleted.
-For compatibility with old dummynet syntax, the associated FIFO scheduler and
-FIFO flowset must be deleted.
-
-Delete of flowset x
--------------------
-To remove a flowset, we must be sure that is no loger referenced by any object.
-If the flowset to remove is in the unlinked flowset list, there is not any
-issue, the flowset can be safely removed calling a free() (the flowset
-extension is not yet created if the flowset is in this list).
-If the flowset is in the flowset list, first we remove from it so new packet
-are discarded when arrive. Next, the flowset is marked as delete.
-Now we must check if some queue is using this flowset.
-To do this, a counter (active_f) is provided. This counter indicate how many
-queues exist using this flowset.
-The active_f counter is automatically incremented when a queue is created
-and decremented when a queue is deleted.
-If the counter is 0, the flowset can be safely deleted, and the delete_alg_fs()
-scheduler function is called before deallocate memory.
-If the counter is not 0, the flowset remain in memory until the counter become
-zero. When a queue is delete (by dn_delete_queue() function) it is checked if
-the linked flowset is deleting and if so the counter is decrementing. If the
-counter reaches 0, the flowset is deleted.
-The deletion of a queue can be done only by the scheduler, or when the scheduler
-is destroyed.
-
-Delete of scheduler x
----------------------
-To delete a scheduler we must be sure that any scheduler instance of this type
-are in the system_heap. To do so, a counter (inst_counter) is provided.
-This counter is managed by the system: it is incremented every time it is
-inserted in the system_heap, and decremented every time it is extracted from it.
-To delete the scheduler, first we remove it from the scheduler list, so new
-packet are discarded when they arrive, and mark the scheduler as deleting.
-
-If the counter is 0, we can remove the scheduler safely calling the
-really_deletescheduler() function. This function will scan all scheduler
-instances and call the delete_scheduler_instance() function that will delete
-the instance. When all instance are deleted, the scheduler template is
-deleted calling the delete_scheduler_template(). If the delay line associate
-with the scheduler is empty, it is deleted now, else it will be deleted when
-it will became empy.
-If the counter was not 0, we wait for it. Every time the dummynet_task()
-function extract a scheduler from the system_heap, the counter is decremented.
-If the scheduler has the delete flag enabled the dequeue() is not called and
-delete_scheduler_instance() is called to delete the instance.
-Obviously this scheduler instance is no loger inserted in the system_heap.
-If the counter reaches 0, the delete_scheduler_template() function is called
-all memory is released.
-NOTE: Flowsets that belong to this scheduler are not deleted, so if a new
- scheduler with the same number is inserted will use these flowsets.
- To do so, the best approach would be insert these flowset in the
- unlinked flowset list, but doing this now will be very expensive.
- So flowsets will remain in memory and linked with a scheduler that no
- longer exists until a packet belonging to this flowset arrives. When
- this packet arrives, the reconfigure() function is called because the
- generation number mismatch with one contains in the flowset and so
- the flowset will be moved into the flowset unlinked list, or will be
- linked with the new scheduler if a new one was created.
-
-
-COMPATIBILITY WITH FREEBSD 7.2 AND FREEBSD 8 'IPFW' BINARY
-==========================================================
-Dummynet is not compatible with old ipfw binary because internal structs are
-changed. Moreover, the old ipfw binary is not compatible with new kernels
-because the struct that represents a firewall rule has changed. So, if a user
-install a new kernel on a FreeBSD 7.2, the ipfw (and possibly many other
-commands) will not work.
-New dummynet uses a new socket option: IP_DUMMYNET3, used for both set and get.
-The old option can be used to allow compatibility with the 'ipfw' binary of
-older version (tested with 7.2 and 8.0) of FreeBSD.
-Two file are provided for this purpose:
-- ip_dummynet_glue.c translates old dummynet requests to the new ones,
-- ip_fw_glue.c converts the rule format between 7.2 and 8 versions.
-Let see in detail these two files.
-
-IP_DUMMYNET_GLUE.C
-------------------
-The internal structs of new dummynet are very different from the original.
-Because of there are some difference from between dummynet in FreeBSD 7.2 and
-dummynet in FreeBSD 8 (the FreeBSD 8 version includes support to pipe delay
-profile and burst option), I have to include both header files. I copied
-the revision 191715 (for version 7.2) and the revision 196045 (for version 8)
-and I appended a number to each struct to mark them.
-
-The main function of this file is ip_dummynet_compat() that is called by
-ip_dn_ctl() when it receive a request of old socket option.
-
-A global variabile ('is7') store the version of 'ipfw' that FreeBSD is using.
-This variable is set every time a request of configuration is done, because
-with this request we receive a buffer of which size depending of ipfw version.
-Because of in general the first action is a configuration, this variable is
-usually set accordly. If the first action is a request of listing of pipes
-or queues, the system cannot know the version of ipfw, and we suppose that
-version 7.2 is used. If version is wrong, the output can be senseless, but
-the application should not crash.
-
-There are four request for old dummynet:
-- IP_DUMMYNET_FLUSH: the flush options have no parameter, so simply the
- dummynet_flush() function is called;
-- IP_DUMMYNET_DEL: the delete option need to be translate.
- It is only necessary to extract the number and the type of the object
- (pipe or queue) to delete from the buffer received and build a new struct
- gen contains the right parameters, then call the delete_object() function;
-- IP_DUMMYNET_CONFIGURE: the configure command receive a buffer depending of
- the ipfw version. After the properly extraction of all data, that depends
- by the ipfw version used, new structures are filled and then the dummynet
- config_link() function is properly called. Note that the 7.2 version does
- not support some parameter as burst or delay profile.
-- IP_DUMMYNET_GET: The get command should send to the ipfw the correct buffer
- depending of its version. There are two function that build the
- corrected buffer, ip_dummynet_get7() and ip_dummynet_get8(). These
- functions reproduce the buffer exactly as 'ipfw' expect. The only difference
- is that the weight parameter for a queue is no loger sent by dummynet and so
- it is set to 0.
- Moreover, because of the internal structure has changed, the bucket size
- of a queue could not be correct, because now all flowset share the hash
- table.
- If the version of ipfw is wrong, the output could be senseless or truncated,
- but the application should not crash.
-
-IP_FW_GLUE.C
-------------
-The ipfw binary also is used to add rules to FreeBSD firewall. Because of the
-struct ip_fw is changed from FreeBsd 7.2 to FreeBSD 8, it is necessary
-to write some glue code to allow use ipfw from FreeBSD 7.2 with the kernel
-provided with FreeBSD 8.
-This file contains two functions to convert a rule from FreeBSD 7.2 format to
-FreeBSD 8 format, and viceversa.
-The conversion should be done when a rule passes from userspace to kernel space
-and viceversa.
-I have to modify the ip_fw2.c file to manage these two case, and added a
-variable (is7) to store the ipfw version used, using an approach like the
-previous file:
-- when a new rule is added (option IP_FW_ADD) the is7 variable is set if the
- size of the rule received corrispond to FreeBSD 7.2 ipfw version. If so, the
- rule is converted to version 8 calling the function convert_rule_to_8().
- Moreover, after the insertion of the rule, the rule is now reconverted to
- version 7 because the ipfw binary will print it.
-- when the user request a list of rules (option IP_FW_GET) the is7 variable
- should be set correctly because we suppose that a configure command was done,
- else we suppose that the FreeBSD version is 8. The function ipfw_getrules()
- in ip_fw2.c file return all rules, eventually converted to version 7 (if
- the is7 is set) to the ipfw binary.
-The conversion of a rule is quite simple. The only difference between the
-two structures (struct ip_fw) is that in the new there is a new field
-(uint32_t id). So, I copy the entire rule in a buffer and the copy the rule in
-the right position in the new (or old) struct. The size of commands are not
-changed, and the copy is done into a cicle.
-
-How to configure dummynet
-=========================
-It is possible to configure dummynet through two main commands:
-'ipfw pipe' and 'ipfw queue'.
-To allow compatibility with old version, it is possible configure dummynet
-using the old command syntax. Doing so, obviously, it is only possible to
-configure a FIFO scheduler or a wf2q+ scheduler.
-A new command, 'ipfw pipe x config sched <type>' is supported to add a new
-scheduler to the system.
-
-- ipfw pipe x config ...
- create a new pipe with the link parameters
- create a new scheduler fifo (x + offset)
- create a new flowset fifo (x + offset)
- the mask is eventually stored in the FIFO scheduler
-
-- ipfw queue y config pipe x ...
- create a new flowset y linked to sched x.
- The type of flowset depends by the specified scheduler.
- If the scheduler does not exist, this flowset is inserted in a special
- list and will be not active.
- If pipe x exists and sched does not exist, a new wf2q+ scheduler is
- created and the flowset will be linked to this new scheduler (this is
- done for compatibility with old syntax).
-
-- ipfw pipe x config sched <type> ...
- create a new scheduler x of type <type>.
- Search into the flowset unlinked list if there are some flowset that
- should be linked with this new scheduler.
-
-- ipfw pipe x delete
- delete the pipe x
- delete the scheduler fifo (x + offset)
- delete the scheduler x
- delete the flowset fifo (x + offset)
-
-- ipfw queue x delete
- delete the flowset x
-
-- ipfw sched x delete ///XXX
- delete the scheduler x
-
-Follow now some examples to how configure dummynet:
-- Ex1:
- ipfw pipe 10 config bw 1M delay 15 // create a pipe with band and delay
- A FIFO flowset and scheduler is
- also created
- ipfw queue 5 config pipe 10 weight 56 // create a flowset. This flowset
- will be of wf2q+ because a pipe 10
- exists. Moreover, the wf2q+
- scheduler is created now.
-- Ex2:
- ipfw queue 5 config pipe 10 weight 56 // Create a flowset. Scheduler 10
- does not exist, so this flowset
- is inserted in the unlinked
- flowset list.
- ipfw pipe 10 config bw... // Create a pipe, a FIFO flowset and scheduler.
- Because of a flowset with 'pipe 10' exists,
- a wf2q+ scheduler is created now and that
- flowset is linked with this sceduler.
-
-- Ex3:
- ipfw pipe 10 config bw... // Create a pipe, a FIFO flowset and scheduler.
- ipfw pipe 10 config sched rr // Create a scheduler of type RR, linked to
- pipe 10
- ipfw queue 5 config pipe 10 weight 56 // Create a flowset 5. This flowset
- will belong to scheduler 10 and
- it is of type RR
-
-- Ex4:
- ipfw pipe 10 config sched rr // Create a scheduler of type RR, linked to
- pipe 10 (not exist yet)
- ipfw pipe 10 config bw... // Create a pipe, a FIFO flowset and scheduler.
- ipfw queue 5 config pipe 10 weight 56 // Create a flowset 5.This flowset
- will belong to scheduler 10 and
- it is of type RR
- ipfw pipe 10 config sched wf2q+ // Modify the type of scheduler 10. It
- becomes a wf2q+ scheduler.
- When a new packet of flowset 5 arrives,
- the flowset 5 becomes to wf2q+ type.
-
-How to implement a new scheduler
-================================
-In dummynet, a scheduler algorithm is represented by two main structs, some
-functions and other minor structs.
-- A struct dn_sch_xyz (where xyz is the 'type' of scheduler algorithm
- implemented) contains data relative to scheduler, as global parameter that
- are common to all instances of the scheduler
-- A struct dn_sch_inst_xyz contains data relative to a single scheduler
- instance, as local status variable depending for example by flows that
- are linked with the scheduler, and so on.
-To add a scheduler to dummynet, the user should type a command like:
-'ipfw pipe x config sched <type> [mask ... ...]'
-This command creates a new struct dn_sch_xyz of type <type>, and
-store the optional parameter in that struct.
-
-The parameter mask determines how many scheduler instance of this
-scheduler may exist. For example, it is possible to divide traffic
-depending on the source port (or destination, or ip address...),
-so that every scheduler instance act as an independent scheduler.
-If the mask is not set, all traffic goes to the same instance.
-
-When a packet arrives to a scheduler, the system search the corrected
-scheduler instance, and if it does not exist it is created now (the
-struct dn_sch_inst_xyz is allocated by the system, and the scheduler
-fills the field correctly). It is a task of the scheduler to create
-the struct that contains all queues for a scheduler instance.
-Dummynet provides some function to create an hash table to store
-queues, but the schedule algorithm can choice the own struct.
-
-To link a flow to a scheduler, the user should type a command like:
-'ipfw queue z config pipe x [mask... ...]'
-
-This command creates a new 'dn_fs' struct that will be inserted
-in the system. If the scheduler x exists, this flowset will be
-linked to that scheduler and the flowset type become the same as
-the scheduler type. At this point, the function create_alg_fs_xyz()
-is called to allow store eventually parameter for the flowset that
-depend by scheduler (for example the 'weight' parameter for a wf2q+
-scheduler, or some priority...). A parameter mask can be used for
-a flowset. If the mask parameter is set, the scheduler instance can
-separate packet according to its flow id (src and dst ip, ports...)
-and assign it to a separate queue. This is done by the scheduler,
-so it can ignore the mask if it wants.
-
-See now the two main structs:
-struct dn_sch_xyz {
- struct gen g; /* important the name g */
- /* global params */
-};
-struct dn_sch_inst_xyz {
- struct gen g; /* important the name g */
- /* params of the instance */
-};
-It is important to embed the struct gen as first parameter. The struct gen
-contains some values that the scheduler instance must fill (the 'type' of
-scheduler, the 'len' of the struct...)
-The function create_scheduler_xyz() should be implemented to initialize global
-parameters in the first struct, and if memory allocation is done it is
-mandatory to implement the delete_scheduler_template() function to free that
-memory.
-The function create_scheduler_instance_xyz() must be implemented even if the
-scheduler instance does not use extra parameters. In this function the struct
-gen fields must be filled with corrected infos. The
-delete_scheduler_instance_xyz() function must bu implemented if the instance
-has allocated some memory in the previous function.
-
-To store data belonging to a flowset the follow struct is used:
-struct alg_fs_xyz {
- struct gen g;
- /* fill correctly the gen struct
- g.subtype = DN_XYZ;
- g.len = sizeof(struct alg_fs_xyz)
- ...
- */
- /* params for the flow */
-};
-The create_alg_fs_xyz() function is mandatory, because it must fill the struct
-gen, but the delete_alg_fs_xyz() is mandatory only if the previous function
-has allocated some memory.
-
-A struct dn_queue contains packets belonging to a queue and some statistical
-data. The scheduler could have to store data in this struct, so it must define
-a dn_queue_xyz struct:
-struct dn_queue_xyz {
- struct dn_queue q;
- /* parameter for a queue */
-}
-
-All structures are allocated by the system. To do so, the scheduler must
-set the size of its structs in the scheduler descriptor:
-scheduler_size: sizeof(dn_sch_xyz)
-scheduler_i_size: sizeof(dn_sch_inst_xyz)
-flowset_size: sizeof(alg_fs_xyz)
-queue_size: sizeof(dn_queue_xyz);
-The scheduler_size could be 0, but other struct must have at least a struct gen.
-
-
-After the definition of structs, it is necessary to implement the
-scheduler functions.
-
-- int (*config_scheduler)(char *command, void *sch, int reconfigure);
- Configure a scheduler, or reconfigure if 'reconfigure' == 1.
- This function performs additional allocation and initialization of global
- parameter for this scheduler.
- If memory is allocated here, the delete_scheduler_template() function
- should be implemented to remove this memory.
-- int (*delete_scheduler_template)(void* sch);
- Delete a scheduler template. This function is mandatory if the scheduler
- uses extra data respect the struct dn_sch.
-- int (*create_scheduler_instance)(void *s);
- Create a new scheduler instance. The system allocate the necessary memory
- and the schedulet can access it using the 's' pointer.
- The scheduler instance stores all queues, and to do this can use the
- hash table provided by the system.
-- int (*delete_scheduler_instance)(void *s);
- Delete a scheduler instance. It is important to free memory allocated
- by create_scheduler_instance() function. The memory allocated by system
- is freed by the system itself. The struct contains all queue also has
- to be deleted.
-- int (*enqueue)(void *s, struct gen *f, struct mbuf *m,
- struct ipfw_flow_id *id);
- Called when a packet arrives. The packet 'm' belongs to the scheduler
- instance 's', has a flowset 'f' and the flowid 'id' has already been
- masked. The enqueue() must call dn_queue_packet(q, m) function to really
- enqueue packet in the queue q. The queue 'q' is chosen by the scheduler
- and if it does not exist should be created calling the dn_create_queue()
- function. If the schedule want to drop the packet, it must call the
- dn_drop_packet() function and then return 1.
-- struct mbuf * (*dequeue)(void *s);
- Called when the timer expires (or when a packet arrives and the scheduler
- instance is idle).
- This function is called when at least a packet can be send out. The
- scheduler choices the packet and returns it; if no packet are in the
- schedulerinstance, the function must return NULL.
- Before return a packet, it is important to call the function
- dn_return_packet() to update some statistic of the queue and update the
- queue counters.
-- int (*drain_queue)(void *s, int flag);
- The system request to scheduler to delete all queues that is not using
- to free memory. The flag parameter indicate if a queue must be deleted
- even if it is active.
-
-- int (*create_alg_fs)(char *command, struct gen *g, int reconfigure);
- It is called when a flowset is linked with a scheduler. This is done
- when the scheduler is defined, so we can know the type of flowset.
- The function initialize the flowset paramenter parsing the command
- line. The parameter will be stored in the g struct that have the right
- size allocated by the system. If the reconfigure flag is set, it means
- that the flowset is reconfiguring
-- int (*delete_alg_fs)(struct gen *f);
- It is called when a flowset is deleting. Must remove the memory allocate
- by the create_alg_fs() function.
-
-- int (*create_queue_alg)(struct dn_queue *q, struct gen *f);
- Called when a queue is created. The function should link the queue
- to the struct used by the scheduler instance to store all queues.
-- int (*delete_queue_alg)(struct dn_queue *q);
- Called when a queue is deleting. The function should remove extra data
- and update the struct contains all queues in the scheduler instance.
-
-The struct scheduler represent the scheduler descriptor that is passed to
-dummynet when a scheduler module is loaded.
-This struct contains the type of scheduler, the length of all structs and
-all function pointers.
-If a function is not implemented should be initialize to NULL. Some functions
-are mandatory, other are mandatory if some memory should be freed.
-Mandatory functions:
-- create_scheduler_instance()
-- enqueue()
-- dequeue()
-- create_alg_fs()
-- drain_queue()
-Optional functions:
-- config_scheduler()
-- create_queue_alg()
-Mandatory functions if the corresponding create...() has allocated memory:
-- delete_scheduler_template()
-- delete_scheduler_instance()
-- delete_alg_fs()
-- delete_queue_alg()
-
diff --git a/sys/netinet/ipfw/ip_dn_glue.c b/sys/netinet/ipfw/ip_dn_glue.c
deleted file mode 100644
index 9fc6b23..0000000
--- a/sys/netinet/ipfw/ip_dn_glue.c
+++ /dev/null
@@ -1,845 +0,0 @@
-/*-
- * Copyright (c) 2010 Riccardo Panicucci, Universita` di Pisa
- * All rights reserved
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-/*
- * $FreeBSD$
- *
- * Binary compatibility support for /sbin/ipfw RELENG_7 and RELENG_8
- */
-
-#include "opt_inet6.h"
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/malloc.h>
-#include <sys/mbuf.h>
-#include <sys/kernel.h>
-#include <sys/lock.h>
-#include <sys/module.h>
-#include <sys/priv.h>
-#include <sys/proc.h>
-#include <sys/rwlock.h>
-#include <sys/socket.h>
-#include <sys/socketvar.h>
-#include <sys/time.h>
-#include <sys/taskqueue.h>
-#include <net/if.h> /* IFNAMSIZ, struct ifaddr, ifq head, lock.h mutex.h */
-#include <netinet/in.h>
-#include <netinet/ip_var.h> /* ip_output(), IP_FORWARDING */
-#include <netinet/ip_fw.h>
-#include <netinet/ipfw/ip_fw_private.h>
-#include <netinet/ipfw/dn_heap.h>
-#include <netinet/ip_dummynet.h>
-#include <netinet/ipfw/ip_dn_private.h>
-#include <netinet/ipfw/dn_sched.h>
-
-/* FREEBSD7.2 ip_dummynet.h r191715*/
-
-struct dn_heap_entry7 {
- int64_t key; /* sorting key. Topmost element is smallest one */
- void *object; /* object pointer */
-};
-
-struct dn_heap7 {
- int size;
- int elements;
- int offset; /* XXX if > 0 this is the offset of direct ptr to obj */
- struct dn_heap_entry7 *p; /* really an array of "size" entries */
-};
-
-/* Common to 7.2 and 8 */
-struct dn_flow_set {
- SLIST_ENTRY(dn_flow_set) next; /* linked list in a hash slot */
-
- u_short fs_nr ; /* flow_set number */
- u_short flags_fs;
-#define DNOLD_HAVE_FLOW_MASK 0x0001
-#define DNOLD_IS_RED 0x0002
-#define DNOLD_IS_GENTLE_RED 0x0004
-#define DNOLD_QSIZE_IS_BYTES 0x0008 /* queue size is measured in bytes */
-#define DNOLD_NOERROR 0x0010 /* do not report ENOBUFS on drops */
-#define DNOLD_HAS_PROFILE 0x0020 /* the pipe has a delay profile. */
-#define DNOLD_IS_PIPE 0x4000
-#define DNOLD_IS_QUEUE 0x8000
-
- struct dn_pipe7 *pipe ; /* pointer to parent pipe */
- u_short parent_nr ; /* parent pipe#, 0 if local to a pipe */
-
- int weight ; /* WFQ queue weight */
- int qsize ; /* queue size in slots or bytes */
- int plr ; /* pkt loss rate (2^31-1 means 100%) */
-
- struct ipfw_flow_id flow_mask ;
-
- /* hash table of queues onto this flow_set */
- int rq_size ; /* number of slots */
- int rq_elements ; /* active elements */
- struct dn_flow_queue7 **rq; /* array of rq_size entries */
-
- u_int32_t last_expired ; /* do not expire too frequently */
- int backlogged ; /* #active queues for this flowset */
-
- /* RED parameters */
-#define SCALE_RED 16
-#define SCALE(x) ( (x) << SCALE_RED )
-#define SCALE_VAL(x) ( (x) >> SCALE_RED )
-#define SCALE_MUL(x,y) ( ( (x) * (y) ) >> SCALE_RED )
- int w_q ; /* queue weight (scaled) */
- int max_th ; /* maximum threshold for queue (scaled) */
- int min_th ; /* minimum threshold for queue (scaled) */
- int max_p ; /* maximum value for p_b (scaled) */
- u_int c_1 ; /* max_p/(max_th-min_th) (scaled) */
- u_int c_2 ; /* max_p*min_th/(max_th-min_th) (scaled) */
- u_int c_3 ; /* for GRED, (1-max_p)/max_th (scaled) */
- u_int c_4 ; /* for GRED, 1 - 2*max_p (scaled) */
- u_int * w_q_lookup ; /* lookup table for computing (1-w_q)^t */
- u_int lookup_depth ; /* depth of lookup table */
- int lookup_step ; /* granularity inside the lookup table */
- int lookup_weight ; /* equal to (1-w_q)^t / (1-w_q)^(t+1) */
- int avg_pkt_size ; /* medium packet size */
- int max_pkt_size ; /* max packet size */
-};
-SLIST_HEAD(dn_flow_set_head, dn_flow_set);
-
-#define DN_IS_PIPE 0x4000
-#define DN_IS_QUEUE 0x8000
-struct dn_flow_queue7 {
- struct dn_flow_queue7 *next ;
- struct ipfw_flow_id id ;
-
- struct mbuf *head, *tail ; /* queue of packets */
- u_int len ;
- u_int len_bytes ;
-
- u_long numbytes;
-
- u_int64_t tot_pkts ; /* statistics counters */
- u_int64_t tot_bytes ;
- u_int32_t drops ;
-
- int hash_slot ; /* debugging/diagnostic */
-
- /* RED parameters */
- int avg ; /* average queue length est. (scaled) */
- int count ; /* arrivals since last RED drop */
- int random ; /* random value (scaled) */
- u_int32_t q_time; /* start of queue idle time */
-
- /* WF2Q+ support */
- struct dn_flow_set *fs ; /* parent flow set */
- int heap_pos ; /* position (index) of struct in heap */
- int64_t sched_time ; /* current time when queue enters ready_heap */
-
- int64_t S,F ; /* start time, finish time */
-};
-
-struct dn_pipe7 { /* a pipe */
- SLIST_ENTRY(dn_pipe7) next; /* linked list in a hash slot */
-
- int pipe_nr ; /* number */
- int bandwidth; /* really, bytes/tick. */
- int delay ; /* really, ticks */
-
- struct mbuf *head, *tail ; /* packets in delay line */
-
- /* WF2Q+ */
- struct dn_heap7 scheduler_heap ; /* top extract - key Finish time*/
- struct dn_heap7 not_eligible_heap; /* top extract- key Start time */
- struct dn_heap7 idle_heap ; /* random extract - key Start=Finish time */
-
- int64_t V ; /* virtual time */
- int sum; /* sum of weights of all active sessions */
-
- int numbytes;
-
- int64_t sched_time ; /* time pipe was scheduled in ready_heap */
-
- /*
- * When the tx clock come from an interface (if_name[0] != '\0'), its name
- * is stored below, whereas the ifp is filled when the rule is configured.
- */
- char if_name[IFNAMSIZ];
- struct ifnet *ifp ;
- int ready ; /* set if ifp != NULL and we got a signal from it */
-
- struct dn_flow_set fs ; /* used with fixed-rate flows */
-};
-SLIST_HEAD(dn_pipe_head7, dn_pipe7);
-
-
-/* FREEBSD8 ip_dummynet.h r196045 */
-struct dn_flow_queue8 {
- struct dn_flow_queue8 *next ;
- struct ipfw_flow_id id ;
-
- struct mbuf *head, *tail ; /* queue of packets */
- u_int len ;
- u_int len_bytes ;
-
- uint64_t numbytes ; /* credit for transmission (dynamic queues) */
- int64_t extra_bits; /* extra bits simulating unavailable channel */
-
- u_int64_t tot_pkts ; /* statistics counters */
- u_int64_t tot_bytes ;
- u_int32_t drops ;
-
- int hash_slot ; /* debugging/diagnostic */
-
- /* RED parameters */
- int avg ; /* average queue length est. (scaled) */
- int count ; /* arrivals since last RED drop */
- int random ; /* random value (scaled) */
- int64_t idle_time; /* start of queue idle time */
-
- /* WF2Q+ support */
- struct dn_flow_set *fs ; /* parent flow set */
- int heap_pos ; /* position (index) of struct in heap */
- int64_t sched_time ; /* current time when queue enters ready_heap */
-
- int64_t S,F ; /* start time, finish time */
-};
-
-struct dn_pipe8 { /* a pipe */
- SLIST_ENTRY(dn_pipe8) next; /* linked list in a hash slot */
-
- int pipe_nr ; /* number */
- int bandwidth; /* really, bytes/tick. */
- int delay ; /* really, ticks */
-
- struct mbuf *head, *tail ; /* packets in delay line */
-
- /* WF2Q+ */
- struct dn_heap7 scheduler_heap ; /* top extract - key Finish time*/
- struct dn_heap7 not_eligible_heap; /* top extract- key Start time */
- struct dn_heap7 idle_heap ; /* random extract - key Start=Finish time */
-
- int64_t V ; /* virtual time */
- int sum; /* sum of weights of all active sessions */
-
- /* Same as in dn_flow_queue, numbytes can become large */
- int64_t numbytes; /* bits I can transmit (more or less). */
- uint64_t burst; /* burst size, scaled: bits * hz */
-
- int64_t sched_time ; /* time pipe was scheduled in ready_heap */
- int64_t idle_time; /* start of pipe idle time */
-
- char if_name[IFNAMSIZ];
- struct ifnet *ifp ;
- int ready ; /* set if ifp != NULL and we got a signal from it */
-
- struct dn_flow_set fs ; /* used with fixed-rate flows */
-
- /* fields to simulate a delay profile */
-#define ED_MAX_NAME_LEN 32
- char name[ED_MAX_NAME_LEN];
- int loss_level;
- int samples_no;
- int *samples;
-};
-
-#define ED_MAX_SAMPLES_NO 1024
-struct dn_pipe_max8 {
- struct dn_pipe8 pipe;
- int samples[ED_MAX_SAMPLES_NO];
-};
-SLIST_HEAD(dn_pipe_head8, dn_pipe8);
-
-/*
- * Changes from 7.2 to 8:
- * dn_pipe:
- * numbytes from int to int64_t
- * add burst (int64_t)
- * add idle_time (int64_t)
- * add profile
- * add struct dn_pipe_max
- * add flag DN_HAS_PROFILE
- *
- * dn_flow_queue
- * numbytes from u_long to int64_t
- * add extra_bits (int64_t)
- * q_time from u_int32_t to int64_t and name idle_time
- *
- * dn_flow_set unchanged
- *
- */
-
-/* NOTE:XXX copied from dummynet.c */
-#define O_NEXT(p, len) ((void *)((char *)p + len))
-static void
-oid_fill(struct dn_id *oid, int len, int type, uintptr_t id)
-{
- oid->len = len;
- oid->type = type;
- oid->subtype = 0;
- oid->id = id;
-}
-/* make room in the buffer and move the pointer forward */
-static void *
-o_next(struct dn_id **o, int len, int type)
-{
- struct dn_id *ret = *o;
- oid_fill(ret, len, type, 0);
- *o = O_NEXT(*o, len);
- return ret;
-}
-
-
-static size_t pipesize7 = sizeof(struct dn_pipe7);
-static size_t pipesize8 = sizeof(struct dn_pipe8);
-static size_t pipesizemax8 = sizeof(struct dn_pipe_max8);
-
-/* Indicate 'ipfw' version
- * 1: from FreeBSD 7.2
- * 0: from FreeBSD 8
- * -1: unknow (for now is unused)
- *
- * It is update when a IP_DUMMYNET_DEL or IP_DUMMYNET_CONFIGURE request arrives
- * NOTE: if a IP_DUMMYNET_GET arrives and the 'ipfw' version is unknow,
- * it is suppose to be the FreeBSD 8 version.
- */
-static int is7 = 0;
-
-static int
-convertflags2new(int src)
-{
- int dst = 0;
-
- if (src & DNOLD_HAVE_FLOW_MASK)
- dst |= DN_HAVE_MASK;
- if (src & DNOLD_QSIZE_IS_BYTES)
- dst |= DN_QSIZE_BYTES;
- if (src & DNOLD_NOERROR)
- dst |= DN_NOERROR;
- if (src & DNOLD_IS_RED)
- dst |= DN_IS_RED;
- if (src & DNOLD_IS_GENTLE_RED)
- dst |= DN_IS_GENTLE_RED;
- if (src & DNOLD_HAS_PROFILE)
- dst |= DN_HAS_PROFILE;
-
- return dst;
-}
-
-static int
-convertflags2old(int src)
-{
- int dst = 0;
-
- if (src & DN_HAVE_MASK)
- dst |= DNOLD_HAVE_FLOW_MASK;
- if (src & DN_IS_RED)
- dst |= DNOLD_IS_RED;
- if (src & DN_IS_GENTLE_RED)
- dst |= DNOLD_IS_GENTLE_RED;
- if (src & DN_NOERROR)
- dst |= DNOLD_NOERROR;
- if (src & DN_HAS_PROFILE)
- dst |= DNOLD_HAS_PROFILE;
- if (src & DN_QSIZE_BYTES)
- dst |= DNOLD_QSIZE_IS_BYTES;
-
- return dst;
-}
-
-static int
-dn_compat_del(void *v)
-{
- struct dn_pipe7 *p = (struct dn_pipe7 *) v;
- struct dn_pipe8 *p8 = (struct dn_pipe8 *) v;
- struct {
- struct dn_id oid;
- uintptr_t a[1]; /* add more if we want a list */
- } cmd;
-
- /* XXX DN_API_VERSION ??? */
- oid_fill((void *)&cmd, sizeof(cmd), DN_CMD_DELETE, DN_API_VERSION);
-
- if (is7) {
- if (p->pipe_nr == 0 && p->fs.fs_nr == 0)
- return EINVAL;
- if (p->pipe_nr != 0 && p->fs.fs_nr != 0)
- return EINVAL;
- } else {
- if (p8->pipe_nr == 0 && p8->fs.fs_nr == 0)
- return EINVAL;
- if (p8->pipe_nr != 0 && p8->fs.fs_nr != 0)
- return EINVAL;
- }
-
- if (p->pipe_nr != 0) { /* pipe x delete */
- cmd.a[0] = p->pipe_nr;
- cmd.oid.subtype = DN_LINK;
- } else { /* queue x delete */
- cmd.oid.subtype = DN_FS;
- cmd.a[0] = (is7) ? p->fs.fs_nr : p8->fs.fs_nr;
- }
-
- return do_config(&cmd, cmd.oid.len);
-}
-
-static int
-dn_compat_config_queue(struct dn_fs *fs, void* v)
-{
- struct dn_pipe7 *p7 = (struct dn_pipe7 *)v;
- struct dn_pipe8 *p8 = (struct dn_pipe8 *)v;
- struct dn_flow_set *f;
-
- if (is7)
- f = &p7->fs;
- else
- f = &p8->fs;
-
- fs->fs_nr = f->fs_nr;
- fs->sched_nr = f->parent_nr;
- fs->flow_mask = f->flow_mask;
- fs->buckets = f->rq_size;
- fs->qsize = f->qsize;
- fs->plr = f->plr;
- fs->par[0] = f->weight;
- fs->flags = convertflags2new(f->flags_fs);
- if (fs->flags & DN_IS_GENTLE_RED || fs->flags & DN_IS_RED) {
- fs->w_q = f->w_q;
- fs->max_th = f->max_th;
- fs->min_th = f->min_th;
- fs->max_p = f->max_p;
- }
-
- return 0;
-}
-
-static int
-dn_compat_config_pipe(struct dn_sch *sch, struct dn_link *p,
- struct dn_fs *fs, void* v)
-{
- struct dn_pipe7 *p7 = (struct dn_pipe7 *)v;
- struct dn_pipe8 *p8 = (struct dn_pipe8 *)v;
- int i = p7->pipe_nr;
-
- sch->sched_nr = i;
- sch->oid.subtype = 0;
- p->link_nr = i;
- fs->fs_nr = i + 2*DN_MAX_ID;
- fs->sched_nr = i + DN_MAX_ID;
-
- /* Common to 7 and 8 */
- p->bandwidth = p7->bandwidth;
- p->delay = p7->delay;
- if (!is7) {
- /* FreeBSD 8 has burst */
- p->burst = p8->burst;
- }
-
- /* fill the fifo flowset */
- dn_compat_config_queue(fs, v);
- fs->fs_nr = i + 2*DN_MAX_ID;
- fs->sched_nr = i + DN_MAX_ID;
-
- /* Move scheduler related parameter from fs to sch */
- sch->buckets = fs->buckets; /*XXX*/
- fs->buckets = 0;
- if (fs->flags & DN_HAVE_MASK) {
- sch->flags |= DN_HAVE_MASK;
- fs->flags &= ~DN_HAVE_MASK;
- sch->sched_mask = fs->flow_mask;
- bzero(&fs->flow_mask, sizeof(struct ipfw_flow_id));
- }
-
- return 0;
-}
-
-static int
-dn_compat_config_profile(struct dn_profile *pf, struct dn_link *p,
- void *v)
-{
- struct dn_pipe8 *p8 = (struct dn_pipe8 *)v;
-
- p8->samples = &(((struct dn_pipe_max8 *)p8)->samples[0]);
-
- pf->link_nr = p->link_nr;
- pf->loss_level = p8->loss_level;
-// pf->bandwidth = p->bandwidth; //XXX bandwidth redundant?
- pf->samples_no = p8->samples_no;
- strncpy(pf->name, p8->name,sizeof(pf->name));
- bcopy(p8->samples, pf->samples, sizeof(pf->samples));
-
- return 0;
-}
-
-/*
- * If p->pipe_nr != 0 the command is 'pipe x config', so need to create
- * the three main struct, else only a flowset is created
- */
-static int
-dn_compat_configure(void *v)
-{
- struct dn_id *buf = NULL, *base;
- struct dn_sch *sch = NULL;
- struct dn_link *p = NULL;
- struct dn_fs *fs = NULL;
- struct dn_profile *pf = NULL;
- int lmax;
- int error;
-
- struct dn_pipe7 *p7 = (struct dn_pipe7 *)v;
- struct dn_pipe8 *p8 = (struct dn_pipe8 *)v;
-
- int i; /* number of object to configure */
-
- lmax = sizeof(struct dn_id); /* command header */
- lmax += sizeof(struct dn_sch) + sizeof(struct dn_link) +
- sizeof(struct dn_fs) + sizeof(struct dn_profile);
-
- base = buf = malloc(lmax, M_DUMMYNET, M_WAIT|M_ZERO);
- o_next(&buf, sizeof(struct dn_id), DN_CMD_CONFIG);
- base->id = DN_API_VERSION;
-
- /* pipe_nr is the same in p7 and p8 */
- i = p7->pipe_nr;
- if (i != 0) { /* pipe config */
- sch = o_next(&buf, sizeof(*sch), DN_SCH);
- p = o_next(&buf, sizeof(*p), DN_LINK);
- fs = o_next(&buf, sizeof(*fs), DN_FS);
-
- error = dn_compat_config_pipe(sch, p, fs, v);
- if (error) {
- free(buf, M_DUMMYNET);
- return error;
- }
- if (!is7 && p8->samples_no > 0) {
- /* Add profiles*/
- pf = o_next(&buf, sizeof(*pf), DN_PROFILE);
- error = dn_compat_config_profile(pf, p, v);
- if (error) {
- free(buf, M_DUMMYNET);
- return error;
- }
- }
- } else { /* queue config */
- fs = o_next(&buf, sizeof(*fs), DN_FS);
- error = dn_compat_config_queue(fs, v);
- if (error) {
- free(buf, M_DUMMYNET);
- return error;
- }
- }
- error = do_config(base, (char *)buf - (char *)base);
-
- if (buf)
- free(buf, M_DUMMYNET);
- return error;
-}
-
-int
-dn_compat_calc_size(void)
-{
- int need = 0;
- /* XXX use FreeBSD 8 struct size */
- /* NOTE:
- * - half scheduler: schk_count/2
- * - all flowset: fsk_count
- * - all flowset queues: queue_count
- * - all pipe queue: si_count
- */
- need += dn_cfg.schk_count * sizeof(struct dn_pipe8) / 2;
- need += dn_cfg.fsk_count * sizeof(struct dn_flow_set);
- need += dn_cfg.si_count * sizeof(struct dn_flow_queue8);
- need += dn_cfg.queue_count * sizeof(struct dn_flow_queue8);
-
- return need;
-}
-
-int
-dn_c_copy_q (void *_ni, void *arg)
-{
- struct copy_args *a = arg;
- struct dn_flow_queue7 *fq7 = (struct dn_flow_queue7 *)*a->start;
- struct dn_flow_queue8 *fq8 = (struct dn_flow_queue8 *)*a->start;
- struct dn_flow *ni = (struct dn_flow *)_ni;
- int size = 0;
-
- /* XXX hash slot not set */
- /* No difference between 7.2/8 */
- fq7->len = ni->length;
- fq7->len_bytes = ni->len_bytes;
- fq7->id = ni->fid;
-
- if (is7) {
- size = sizeof(struct dn_flow_queue7);
- fq7->tot_pkts = ni->tot_pkts;
- fq7->tot_bytes = ni->tot_bytes;
- fq7->drops = ni->drops;
- } else {
- size = sizeof(struct dn_flow_queue8);
- fq8->tot_pkts = ni->tot_pkts;
- fq8->tot_bytes = ni->tot_bytes;
- fq8->drops = ni->drops;
- }
-
- *a->start += size;
- return 0;
-}
-
-int
-dn_c_copy_pipe(struct dn_schk *s, struct copy_args *a, int nq)
-{
- struct dn_link *l = &s->link;
- struct dn_fsk *f = s->fs;
-
- struct dn_pipe7 *pipe7 = (struct dn_pipe7 *)*a->start;
- struct dn_pipe8 *pipe8 = (struct dn_pipe8 *)*a->start;
- struct dn_flow_set *fs;
- int size = 0;
-
- if (is7) {
- fs = &pipe7->fs;
- size = sizeof(struct dn_pipe7);
- } else {
- fs = &pipe8->fs;
- size = sizeof(struct dn_pipe8);
- }
-
- /* These 4 field are the same in pipe7 and pipe8 */
- pipe7->next.sle_next = (struct dn_pipe7 *)DN_IS_PIPE;
- pipe7->bandwidth = l->bandwidth;
- pipe7->delay = l->delay * 1000 / hz;
- pipe7->pipe_nr = l->link_nr - DN_MAX_ID;
-
- if (!is7) {
- if (s->profile) {
- struct dn_profile *pf = s->profile;
- strncpy(pipe8->name, pf->name, sizeof(pf->name));
- pipe8->loss_level = pf->loss_level;
- pipe8->samples_no = pf->samples_no;
- }
- pipe8->burst = div64(l->burst , 8 * hz);
- }
-
- fs->flow_mask = s->sch.sched_mask;
- fs->rq_size = s->sch.buckets ? s->sch.buckets : 1;
-
- fs->parent_nr = l->link_nr - DN_MAX_ID;
- fs->qsize = f->fs.qsize;
- fs->plr = f->fs.plr;
- fs->w_q = f->fs.w_q;
- fs->max_th = f->max_th;
- fs->min_th = f->min_th;
- fs->max_p = f->fs.max_p;
- fs->rq_elements = nq;
-
- fs->flags_fs = convertflags2old(f->fs.flags);
-
- *a->start += size;
- return 0;
-}
-
-
-int
-dn_compat_copy_pipe(struct copy_args *a, void *_o)
-{
- int have = a->end - *a->start;
- int need = 0;
- int pipe_size = sizeof(struct dn_pipe8);
- int queue_size = sizeof(struct dn_flow_queue8);
- int n_queue = 0; /* number of queues */
-
- struct dn_schk *s = (struct dn_schk *)_o;
- /* calculate needed space:
- * - struct dn_pipe
- * - if there are instances, dn_queue * n_instances
- */
- n_queue = (s->sch.flags & DN_HAVE_MASK ? dn_ht_entries(s->siht) :
- (s->siht ? 1 : 0));
- need = pipe_size + queue_size * n_queue;
- if (have < need) {
- D("have %d < need %d", have, need);
- return 1;
- }
- /* copy pipe */
- dn_c_copy_pipe(s, a, n_queue);
-
- /* copy queues */
- if (s->sch.flags & DN_HAVE_MASK)
- dn_ht_scan(s->siht, dn_c_copy_q, a);
- else if (s->siht)
- dn_c_copy_q(s->siht, a);
- return 0;
-}
-
-int
-dn_c_copy_fs(struct dn_fsk *f, struct copy_args *a, int nq)
-{
- struct dn_flow_set *fs = (struct dn_flow_set *)*a->start;
-
- fs->next.sle_next = (struct dn_flow_set *)DN_IS_QUEUE;
- fs->fs_nr = f->fs.fs_nr;
- fs->qsize = f->fs.qsize;
- fs->plr = f->fs.plr;
- fs->w_q = f->fs.w_q;
- fs->max_th = f->max_th;
- fs->min_th = f->min_th;
- fs->max_p = f->fs.max_p;
- fs->flow_mask = f->fs.flow_mask;
- fs->rq_elements = nq;
- fs->rq_size = (f->fs.buckets ? f->fs.buckets : 1);
- fs->parent_nr = f->fs.sched_nr;
- fs->weight = f->fs.par[0];
-
- fs->flags_fs = convertflags2old(f->fs.flags);
- *a->start += sizeof(struct dn_flow_set);
- return 0;
-}
-
-int
-dn_compat_copy_queue(struct copy_args *a, void *_o)
-{
- int have = a->end - *a->start;
- int need = 0;
- int fs_size = sizeof(struct dn_flow_set);
- int queue_size = sizeof(struct dn_flow_queue8);
-
- struct dn_fsk *fs = (struct dn_fsk *)_o;
- int n_queue = 0; /* number of queues */
-
- n_queue = (fs->fs.flags & DN_HAVE_MASK ? dn_ht_entries(fs->qht) :
- (fs->qht ? 1 : 0));
-
- need = fs_size + queue_size * n_queue;
- if (have < need) {
- D("have < need");
- return 1;
- }
-
- /* copy flowset */
- dn_c_copy_fs(fs, a, n_queue);
-
- /* copy queues */
- if (fs->fs.flags & DN_HAVE_MASK)
- dn_ht_scan(fs->qht, dn_c_copy_q, a);
- else if (fs->qht)
- dn_c_copy_q(fs->qht, a);
-
- return 0;
-}
-
-int
-copy_data_helper_compat(void *_o, void *_arg)
-{
- struct copy_args *a = _arg;
-
- if (a->type == DN_COMPAT_PIPE) {
- struct dn_schk *s = _o;
- if (s->sch.oid.subtype != 1 || s->sch.sched_nr <= DN_MAX_ID) {
- return 0; /* not old type */
- }
- /* copy pipe parameters, and if instance exists, copy
- * other parameters and eventually queues.
- */
- if(dn_compat_copy_pipe(a, _o))
- return DNHT_SCAN_END;
- } else if (a->type == DN_COMPAT_QUEUE) {
- struct dn_fsk *fs = _o;
- if (fs->fs.fs_nr >= DN_MAX_ID)
- return 0;
- if (dn_compat_copy_queue(a, _o))
- return DNHT_SCAN_END;
- }
- return 0;
-}
-
-/* Main function to manage old requests */
-int
-ip_dummynet_compat(struct sockopt *sopt)
-{
- int error=0;
- void *v = NULL;
- struct dn_id oid;
-
- /* Lenght of data, used to found ipfw version... */
- int len = sopt->sopt_valsize;
-
- /* len can be 0 if command was dummynet_flush */
- if (len == pipesize7) {
- D("setting compatibility with FreeBSD 7.2");
- is7 = 1;
- }
- else if (len == pipesize8 || len == pipesizemax8) {
- D("setting compatibility with FreeBSD 8");
- is7 = 0;
- }
-
- switch (sopt->sopt_name) {
- default:
- printf("dummynet: -- unknown option %d", sopt->sopt_name);
- error = EINVAL;
- break;
-
- case IP_DUMMYNET_FLUSH:
- oid_fill(&oid, sizeof(oid), DN_CMD_FLUSH, DN_API_VERSION);
- do_config(&oid, oid.len);
- break;
-
- case IP_DUMMYNET_DEL:
- v = malloc(len, M_TEMP, M_WAITOK);
- error = sooptcopyin(sopt, v, len, len);
- if (error)
- break;
- error = dn_compat_del(v);
- free(v, M_TEMP);
- break;
-
- case IP_DUMMYNET_CONFIGURE:
- v = malloc(len, M_TEMP, M_WAITOK);
- error = sooptcopyin(sopt, v, len, len);
- if (error)
- break;
- error = dn_compat_configure(v);
- free(v, M_TEMP);
- break;
-
- case IP_DUMMYNET_GET: {
- void *buf;
- int ret;
- int original_size = sopt->sopt_valsize;
- int size;
-
- ret = dummynet_get(sopt, &buf);
- if (ret)
- return 0;//XXX ?
- size = sopt->sopt_valsize;
- sopt->sopt_valsize = original_size;
- D("size=%d, buf=%p", size, buf);
- ret = sooptcopyout(sopt, buf, size);
- if (ret)
- printf(" %s ERROR sooptcopyout\n", __FUNCTION__);
- if (buf)
- free(buf, M_DUMMYNET);
- }
- }
-
- return error;
-}
-
-
diff --git a/sys/netinet/ipfw/ip_dn_io.c b/sys/netinet/ipfw/ip_dn_io.c
deleted file mode 100644
index becd85e..0000000
--- a/sys/netinet/ipfw/ip_dn_io.c
+++ /dev/null
@@ -1,858 +0,0 @@
-/*-
- * Copyright (c) 2010 Luigi Rizzo, Riccardo Panicucci, Universita` di Pisa
- * All rights reserved
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-/*
- * Dummynet portions related to packet handling.
- */
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#include "opt_inet6.h"
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/malloc.h>
-#include <sys/mbuf.h>
-#include <sys/kernel.h>
-#include <sys/lock.h>
-#include <sys/module.h>
-#include <sys/priv.h>
-#include <sys/proc.h>
-#include <sys/rwlock.h>
-#include <sys/socket.h>
-#include <sys/time.h>
-#include <sys/sysctl.h>
-
-#include <net/if.h> /* IFNAMSIZ, struct ifaddr, ifq head, lock.h mutex.h */
-#include <net/netisr.h>
-#include <net/vnet.h>
-
-#include <netinet/in.h>
-#include <netinet/ip.h> /* ip_len, ip_off */
-#include <netinet/ip_var.h> /* ip_output(), IP_FORWARDING */
-#include <netinet/ip_fw.h>
-#include <netinet/ipfw/ip_fw_private.h>
-#include <netinet/ipfw/dn_heap.h>
-#include <netinet/ip_dummynet.h>
-#include <netinet/ipfw/ip_dn_private.h>
-#include <netinet/ipfw/dn_sched.h>
-
-#include <netinet/if_ether.h> /* various ether_* routines */
-
-#include <netinet/ip6.h> /* for ip6_input, ip6_output prototypes */
-#include <netinet6/ip6_var.h>
-
-/*
- * We keep a private variable for the simulation time, but we could
- * probably use an existing one ("softticks" in sys/kern/kern_timeout.c)
- * instead of dn_cfg.curr_time
- */
-
-struct dn_parms dn_cfg;
-//VNET_DEFINE(struct dn_parms, _base_dn_cfg);
-
-static long tick_last; /* Last tick duration (usec). */
-static long tick_delta; /* Last vs standard tick diff (usec). */
-static long tick_delta_sum; /* Accumulated tick difference (usec).*/
-static long tick_adjustment; /* Tick adjustments done. */
-static long tick_lost; /* Lost(coalesced) ticks number. */
-/* Adjusted vs non-adjusted curr_time difference (ticks). */
-static long tick_diff;
-
-static unsigned long io_pkt;
-static unsigned long io_pkt_fast;
-static unsigned long io_pkt_drop;
-
-/*
- * We use a heap to store entities for which we have pending timer events.
- * The heap is checked at every tick and all entities with expired events
- * are extracted.
- */
-
-MALLOC_DEFINE(M_DUMMYNET, "dummynet", "dummynet heap");
-
-extern void (*bridge_dn_p)(struct mbuf *, struct ifnet *);
-
-#ifdef SYSCTL_NODE
-
-/*
- * Because of the way the SYSBEGIN/SYSEND macros work on other
- * platforms, there should not be functions between them.
- * So keep the handlers outside the block.
- */
-static int
-sysctl_hash_size(SYSCTL_HANDLER_ARGS)
-{
- int error, value;
-
- value = dn_cfg.hash_size;
- error = sysctl_handle_int(oidp, &value, 0, req);
- if (error != 0 || req->newptr == NULL)
- return (error);
- if (value < 16 || value > 65536)
- return (EINVAL);
- dn_cfg.hash_size = value;
- return (0);
-}
-
-static int
-sysctl_limits(SYSCTL_HANDLER_ARGS)
-{
- int error;
- long value;
-
- if (arg2 != 0)
- value = dn_cfg.slot_limit;
- else
- value = dn_cfg.byte_limit;
- error = sysctl_handle_long(oidp, &value, 0, req);
-
- if (error != 0 || req->newptr == NULL)
- return (error);
- if (arg2 != 0) {
- if (value < 1)
- return (EINVAL);
- dn_cfg.slot_limit = value;
- } else {
- if (value < 1500)
- return (EINVAL);
- dn_cfg.byte_limit = value;
- }
- return (0);
-}
-
-SYSBEGIN(f4)
-
-SYSCTL_DECL(_net_inet);
-SYSCTL_DECL(_net_inet_ip);
-static SYSCTL_NODE(_net_inet_ip, OID_AUTO, dummynet, CTLFLAG_RW, 0, "Dummynet");
-
-/* wrapper to pass dn_cfg fields to SYSCTL_* */
-//#define DC(x) (&(VNET_NAME(_base_dn_cfg).x))
-#define DC(x) (&(dn_cfg.x))
-/* parameters */
-
-
-SYSCTL_PROC(_net_inet_ip_dummynet, OID_AUTO, hash_size,
- CTLTYPE_INT | CTLFLAG_RW, 0, 0, sysctl_hash_size,
- "I", "Default hash table size");
-
-
-SYSCTL_PROC(_net_inet_ip_dummynet, OID_AUTO, pipe_slot_limit,
- CTLTYPE_LONG | CTLFLAG_RW, 0, 1, sysctl_limits,
- "L", "Upper limit in slots for pipe queue.");
-SYSCTL_PROC(_net_inet_ip_dummynet, OID_AUTO, pipe_byte_limit,
- CTLTYPE_LONG | CTLFLAG_RW, 0, 0, sysctl_limits,
- "L", "Upper limit in bytes for pipe queue.");
-SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, io_fast,
- CTLFLAG_RW, DC(io_fast), 0, "Enable fast dummynet io.");
-SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, debug,
- CTLFLAG_RW, DC(debug), 0, "Dummynet debug level");
-
-/* RED parameters */
-SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, red_lookup_depth,
- CTLFLAG_RD, DC(red_lookup_depth), 0, "Depth of RED lookup table");
-SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, red_avg_pkt_size,
- CTLFLAG_RD, DC(red_avg_pkt_size), 0, "RED Medium packet size");
-SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, red_max_pkt_size,
- CTLFLAG_RD, DC(red_max_pkt_size), 0, "RED Max packet size");
-
-/* time adjustment */
-SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_delta,
- CTLFLAG_RD, &tick_delta, 0, "Last vs standard tick difference (usec).");
-SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_delta_sum,
- CTLFLAG_RD, &tick_delta_sum, 0, "Accumulated tick difference (usec).");
-SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_adjustment,
- CTLFLAG_RD, &tick_adjustment, 0, "Tick adjustments done.");
-SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_diff,
- CTLFLAG_RD, &tick_diff, 0,
- "Adjusted vs non-adjusted curr_time difference (ticks).");
-SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_lost,
- CTLFLAG_RD, &tick_lost, 0,
- "Number of ticks coalesced by dummynet taskqueue.");
-
-/* Drain parameters */
-SYSCTL_UINT(_net_inet_ip_dummynet, OID_AUTO, expire,
- CTLFLAG_RW, DC(expire), 0, "Expire empty queues/pipes");
-SYSCTL_UINT(_net_inet_ip_dummynet, OID_AUTO, expire_cycle,
- CTLFLAG_RD, DC(expire_cycle), 0, "Expire cycle for queues/pipes");
-
-/* statistics */
-SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, schk_count,
- CTLFLAG_RD, DC(schk_count), 0, "Number of schedulers");
-SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, si_count,
- CTLFLAG_RD, DC(si_count), 0, "Number of scheduler instances");
-SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, fsk_count,
- CTLFLAG_RD, DC(fsk_count), 0, "Number of flowsets");
-SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, queue_count,
- CTLFLAG_RD, DC(queue_count), 0, "Number of queues");
-SYSCTL_ULONG(_net_inet_ip_dummynet, OID_AUTO, io_pkt,
- CTLFLAG_RD, &io_pkt, 0,
- "Number of packets passed to dummynet.");
-SYSCTL_ULONG(_net_inet_ip_dummynet, OID_AUTO, io_pkt_fast,
- CTLFLAG_RD, &io_pkt_fast, 0,
- "Number of packets bypassed dummynet scheduler.");
-SYSCTL_ULONG(_net_inet_ip_dummynet, OID_AUTO, io_pkt_drop,
- CTLFLAG_RD, &io_pkt_drop, 0,
- "Number of packets dropped by dummynet.");
-#undef DC
-SYSEND
-
-#endif
-
-static void dummynet_send(struct mbuf *);
-
-/*
- * Packets processed by dummynet have an mbuf tag associated with
- * them that carries their dummynet state.
- * Outside dummynet, only the 'rule' field is relevant, and it must
- * be at the beginning of the structure.
- */
-struct dn_pkt_tag {
- struct ipfw_rule_ref rule; /* matching rule */
-
- /* second part, dummynet specific */
- int dn_dir; /* action when packet comes out.*/
- /* see ip_fw_private.h */
- uint64_t output_time; /* when the pkt is due for delivery*/
- struct ifnet *ifp; /* interface, for ip_output */
- struct _ip6dn_args ip6opt; /* XXX ipv6 options */
-};
-
-/*
- * Return the mbuf tag holding the dummynet state (it should
- * be the first one on the list).
- */
-static struct dn_pkt_tag *
-dn_tag_get(struct mbuf *m)
-{
- struct m_tag *mtag = m_tag_first(m);
- KASSERT(mtag != NULL &&
- mtag->m_tag_cookie == MTAG_ABI_COMPAT &&
- mtag->m_tag_id == PACKET_TAG_DUMMYNET,
- ("packet on dummynet queue w/o dummynet tag!"));
- return (struct dn_pkt_tag *)(mtag+1);
-}
-
-static inline void
-mq_append(struct mq *q, struct mbuf *m)
-{
- if (q->head == NULL)
- q->head = m;
- else
- q->tail->m_nextpkt = m;
- q->tail = m;
- m->m_nextpkt = NULL;
-}
-
-/*
- * Dispose a list of packet. Use a functions so if we need to do
- * more work, this is a central point to do it.
- */
-void dn_free_pkts(struct mbuf *mnext)
-{
- struct mbuf *m;
-
- while ((m = mnext) != NULL) {
- mnext = m->m_nextpkt;
- FREE_PKT(m);
- }
-}
-
-static int
-red_drops (struct dn_queue *q, int len)
-{
- /*
- * RED algorithm
- *
- * RED calculates the average queue size (avg) using a low-pass filter
- * with an exponential weighted (w_q) moving average:
- * avg <- (1-w_q) * avg + w_q * q_size
- * where q_size is the queue length (measured in bytes or * packets).
- *
- * If q_size == 0, we compute the idle time for the link, and set
- * avg = (1 - w_q)^(idle/s)
- * where s is the time needed for transmitting a medium-sized packet.
- *
- * Now, if avg < min_th the packet is enqueued.
- * If avg > max_th the packet is dropped. Otherwise, the packet is
- * dropped with probability P function of avg.
- */
-
- struct dn_fsk *fs = q->fs;
- int64_t p_b = 0;
-
- /* Queue in bytes or packets? */
- uint32_t q_size = (fs->fs.flags & DN_QSIZE_BYTES) ?
- q->ni.len_bytes : q->ni.length;
-
- /* Average queue size estimation. */
- if (q_size != 0) {
- /* Queue is not empty, avg <- avg + (q_size - avg) * w_q */
- int diff = SCALE(q_size) - q->avg;
- int64_t v = SCALE_MUL((int64_t)diff, (int64_t)fs->w_q);
-
- q->avg += (int)v;
- } else {
- /*
- * Queue is empty, find for how long the queue has been
- * empty and use a lookup table for computing
- * (1 - * w_q)^(idle_time/s) where s is the time to send a
- * (small) packet.
- * XXX check wraps...
- */
- if (q->avg) {
- u_int t = div64((dn_cfg.curr_time - q->q_time), fs->lookup_step);
-
- q->avg = (t < fs->lookup_depth) ?
- SCALE_MUL(q->avg, fs->w_q_lookup[t]) : 0;
- }
- }
-
- /* Should i drop? */
- if (q->avg < fs->min_th) {
- q->count = -1;
- return (0); /* accept packet */
- }
- if (q->avg >= fs->max_th) { /* average queue >= max threshold */
- if (fs->fs.flags & DN_IS_GENTLE_RED) {
- /*
- * According to Gentle-RED, if avg is greater than
- * max_th the packet is dropped with a probability
- * p_b = c_3 * avg - c_4
- * where c_3 = (1 - max_p) / max_th
- * c_4 = 1 - 2 * max_p
- */
- p_b = SCALE_MUL((int64_t)fs->c_3, (int64_t)q->avg) -
- fs->c_4;
- } else {
- q->count = -1;
- return (1);
- }
- } else if (q->avg > fs->min_th) {
- /*
- * We compute p_b using the linear dropping function
- * p_b = c_1 * avg - c_2
- * where c_1 = max_p / (max_th - min_th)
- * c_2 = max_p * min_th / (max_th - min_th)
- */
- p_b = SCALE_MUL((int64_t)fs->c_1, (int64_t)q->avg) - fs->c_2;
- }
-
- if (fs->fs.flags & DN_QSIZE_BYTES)
- p_b = div64((p_b * len) , fs->max_pkt_size);
- if (++q->count == 0)
- q->random = random() & 0xffff;
- else {
- /*
- * q->count counts packets arrived since last drop, so a greater
- * value of q->count means a greater packet drop probability.
- */
- if (SCALE_MUL(p_b, SCALE((int64_t)q->count)) > q->random) {
- q->count = 0;
- /* After a drop we calculate a new random value. */
- q->random = random() & 0xffff;
- return (1); /* drop */
- }
- }
- /* End of RED algorithm. */
-
- return (0); /* accept */
-
-}
-
-/*
- * Enqueue a packet in q, subject to space and queue management policy
- * (whose parameters are in q->fs).
- * Update stats for the queue and the scheduler.
- * Return 0 on success, 1 on drop. The packet is consumed anyways.
- */
-int
-dn_enqueue(struct dn_queue *q, struct mbuf* m, int drop)
-{
- struct dn_fs *f;
- struct dn_flow *ni; /* stats for scheduler instance */
- uint64_t len;
-
- if (q->fs == NULL || q->_si == NULL) {
- printf("%s fs %p si %p, dropping\n",
- __FUNCTION__, q->fs, q->_si);
- FREE_PKT(m);
- return 1;
- }
- f = &(q->fs->fs);
- ni = &q->_si->ni;
- len = m->m_pkthdr.len;
- /* Update statistics, then check reasons to drop pkt. */
- q->ni.tot_bytes += len;
- q->ni.tot_pkts++;
- ni->tot_bytes += len;
- ni->tot_pkts++;
- if (drop)
- goto drop;
- if (f->plr && random() < f->plr)
- goto drop;
- if (f->flags & DN_IS_RED && red_drops(q, m->m_pkthdr.len))
- goto drop;
- if (f->flags & DN_QSIZE_BYTES) {
- if (q->ni.len_bytes > f->qsize)
- goto drop;
- } else if (q->ni.length >= f->qsize) {
- goto drop;
- }
- mq_append(&q->mq, m);
- q->ni.length++;
- q->ni.len_bytes += len;
- ni->length++;
- ni->len_bytes += len;
- return 0;
-
-drop:
- io_pkt_drop++;
- q->ni.drops++;
- ni->drops++;
- FREE_PKT(m);
- return 1;
-}
-
-/*
- * Fetch packets from the delay line which are due now. If there are
- * leftover packets, reinsert the delay line in the heap.
- * Runs under scheduler lock.
- */
-static void
-transmit_event(struct mq *q, struct delay_line *dline, uint64_t now)
-{
- struct mbuf *m;
- struct dn_pkt_tag *pkt = NULL;
-
- dline->oid.subtype = 0; /* not in heap */
- while ((m = dline->mq.head) != NULL) {
- pkt = dn_tag_get(m);
- if (!DN_KEY_LEQ(pkt->output_time, now))
- break;
- dline->mq.head = m->m_nextpkt;
- mq_append(q, m);
- }
- if (m != NULL) {
- dline->oid.subtype = 1; /* in heap */
- heap_insert(&dn_cfg.evheap, pkt->output_time, dline);
- }
-}
-
-/*
- * Convert the additional MAC overheads/delays into an equivalent
- * number of bits for the given data rate. The samples are
- * in milliseconds so we need to divide by 1000.
- */
-static uint64_t
-extra_bits(struct mbuf *m, struct dn_schk *s)
-{
- int index;
- uint64_t bits;
- struct dn_profile *pf = s->profile;
-
- if (!pf || pf->samples_no == 0)
- return 0;
- index = random() % pf->samples_no;
- bits = div64((uint64_t)pf->samples[index] * s->link.bandwidth, 1000);
- if (index >= pf->loss_level) {
- struct dn_pkt_tag *dt = dn_tag_get(m);
- if (dt)
- dt->dn_dir = DIR_DROP;
- }
- return bits;
-}
-
-/*
- * Send traffic from a scheduler instance due by 'now'.
- * Return a pointer to the head of the queue.
- */
-static struct mbuf *
-serve_sched(struct mq *q, struct dn_sch_inst *si, uint64_t now)
-{
- struct mq def_q;
- struct dn_schk *s = si->sched;
- struct mbuf *m = NULL;
- int delay_line_idle = (si->dline.mq.head == NULL);
- int done, bw;
-
- if (q == NULL) {
- q = &def_q;
- q->head = NULL;
- }
-
- bw = s->link.bandwidth;
- si->kflags &= ~DN_ACTIVE;
-
- if (bw > 0)
- si->credit += (now - si->sched_time) * bw;
- else
- si->credit = 0;
- si->sched_time = now;
- done = 0;
- while (si->credit >= 0 && (m = s->fp->dequeue(si)) != NULL) {
- uint64_t len_scaled;
-
- done++;
- len_scaled = (bw == 0) ? 0 : hz *
- (m->m_pkthdr.len * 8 + extra_bits(m, s));
- si->credit -= len_scaled;
- /* Move packet in the delay line */
- dn_tag_get(m)->output_time = dn_cfg.curr_time + s->link.delay ;
- mq_append(&si->dline.mq, m);
- }
-
- /*
- * If credit >= 0 the instance is idle, mark time.
- * Otherwise put back in the heap, and adjust the output
- * time of the last inserted packet, m, which was too early.
- */
- if (si->credit >= 0) {
- si->idle_time = now;
- } else {
- uint64_t t;
- KASSERT (bw > 0, ("bw=0 and credit<0 ?"));
- t = div64(bw - 1 - si->credit, bw);
- if (m)
- dn_tag_get(m)->output_time += t;
- si->kflags |= DN_ACTIVE;
- heap_insert(&dn_cfg.evheap, now + t, si);
- }
- if (delay_line_idle && done)
- transmit_event(q, &si->dline, now);
- return q->head;
-}
-
-/*
- * The timer handler for dummynet. Time is computed in ticks, but
- * but the code is tolerant to the actual rate at which this is called.
- * Once complete, the function reschedules itself for the next tick.
- */
-void
-dummynet_task(void *context, int pending)
-{
- struct timeval t;
- struct mq q = { NULL, NULL }; /* queue to accumulate results */
-
- CURVNET_SET((struct vnet *)context);
-
- DN_BH_WLOCK();
-
- /* Update number of lost(coalesced) ticks. */
- tick_lost += pending - 1;
-
- getmicrouptime(&t);
- /* Last tick duration (usec). */
- tick_last = (t.tv_sec - dn_cfg.prev_t.tv_sec) * 1000000 +
- (t.tv_usec - dn_cfg.prev_t.tv_usec);
- /* Last tick vs standard tick difference (usec). */
- tick_delta = (tick_last * hz - 1000000) / hz;
- /* Accumulated tick difference (usec). */
- tick_delta_sum += tick_delta;
-
- dn_cfg.prev_t = t;
-
- /*
- * Adjust curr_time if the accumulated tick difference is
- * greater than the 'standard' tick. Since curr_time should
- * be monotonically increasing, we do positive adjustments
- * as required, and throttle curr_time in case of negative
- * adjustment.
- */
- dn_cfg.curr_time++;
- if (tick_delta_sum - tick >= 0) {
- int diff = tick_delta_sum / tick;
-
- dn_cfg.curr_time += diff;
- tick_diff += diff;
- tick_delta_sum %= tick;
- tick_adjustment++;
- } else if (tick_delta_sum + tick <= 0) {
- dn_cfg.curr_time--;
- tick_diff--;
- tick_delta_sum += tick;
- tick_adjustment++;
- }
-
- /* serve pending events, accumulate in q */
- for (;;) {
- struct dn_id *p; /* generic parameter to handler */
-
- if (dn_cfg.evheap.elements == 0 ||
- DN_KEY_LT(dn_cfg.curr_time, HEAP_TOP(&dn_cfg.evheap)->key))
- break;
- p = HEAP_TOP(&dn_cfg.evheap)->object;
- heap_extract(&dn_cfg.evheap, NULL);
-
- if (p->type == DN_SCH_I) {
- serve_sched(&q, (struct dn_sch_inst *)p, dn_cfg.curr_time);
- } else { /* extracted a delay line */
- transmit_event(&q, (struct delay_line *)p, dn_cfg.curr_time);
- }
- }
- if (dn_cfg.expire && ++dn_cfg.expire_cycle >= dn_cfg.expire) {
- dn_cfg.expire_cycle = 0;
- dn_drain_scheduler();
- dn_drain_queue();
- }
-
- DN_BH_WUNLOCK();
- dn_reschedule();
- if (q.head != NULL)
- dummynet_send(q.head);
- CURVNET_RESTORE();
-}
-
-/*
- * forward a chain of packets to the proper destination.
- * This runs outside the dummynet lock.
- */
-static void
-dummynet_send(struct mbuf *m)
-{
- struct mbuf *n;
-
- for (; m != NULL; m = n) {
- struct ifnet *ifp = NULL; /* gcc 3.4.6 complains */
- struct m_tag *tag;
- int dst;
-
- n = m->m_nextpkt;
- m->m_nextpkt = NULL;
- tag = m_tag_first(m);
- if (tag == NULL) { /* should not happen */
- dst = DIR_DROP;
- } else {
- struct dn_pkt_tag *pkt = dn_tag_get(m);
- /* extract the dummynet info, rename the tag
- * to carry reinject info.
- */
- dst = pkt->dn_dir;
- ifp = pkt->ifp;
- tag->m_tag_cookie = MTAG_IPFW_RULE;
- tag->m_tag_id = 0;
- }
-
- switch (dst) {
- case DIR_OUT:
- SET_HOST_IPLEN(mtod(m, struct ip *));
- ip_output(m, NULL, NULL, IP_FORWARDING, NULL, NULL);
- break ;
-
- case DIR_IN :
- /* put header in network format for ip_input() */
- //SET_NET_IPLEN(mtod(m, struct ip *));
- netisr_dispatch(NETISR_IP, m);
- break;
-
-#ifdef INET6
- case DIR_IN | PROTO_IPV6:
- netisr_dispatch(NETISR_IPV6, m);
- break;
-
- case DIR_OUT | PROTO_IPV6:
- ip6_output(m, NULL, NULL, IPV6_FORWARDING, NULL, NULL, NULL);
- break;
-#endif
-
- case DIR_FWD | PROTO_IFB: /* DN_TO_IFB_FWD: */
- if (bridge_dn_p != NULL)
- ((*bridge_dn_p)(m, ifp));
- else
- printf("dummynet: if_bridge not loaded\n");
-
- break;
-
- case DIR_IN | PROTO_LAYER2: /* DN_TO_ETH_DEMUX: */
- /*
- * The Ethernet code assumes the Ethernet header is
- * contiguous in the first mbuf header.
- * Insure this is true.
- */
- if (m->m_len < ETHER_HDR_LEN &&
- (m = m_pullup(m, ETHER_HDR_LEN)) == NULL) {
- printf("dummynet/ether: pullup failed, "
- "dropping packet\n");
- break;
- }
- ether_demux(m->m_pkthdr.rcvif, m);
- break;
-
- case DIR_OUT | PROTO_LAYER2: /* N_TO_ETH_OUT: */
- ether_output_frame(ifp, m);
- break;
-
- case DIR_DROP:
- /* drop the packet after some time */
- FREE_PKT(m);
- break;
-
- default:
- printf("dummynet: bad switch %d!\n", dst);
- FREE_PKT(m);
- break;
- }
- }
-}
-
-static inline int
-tag_mbuf(struct mbuf *m, int dir, struct ip_fw_args *fwa)
-{
- struct dn_pkt_tag *dt;
- struct m_tag *mtag;
-
- mtag = m_tag_get(PACKET_TAG_DUMMYNET,
- sizeof(*dt), M_NOWAIT | M_ZERO);
- if (mtag == NULL)
- return 1; /* Cannot allocate packet header. */
- m_tag_prepend(m, mtag); /* Attach to mbuf chain. */
- dt = (struct dn_pkt_tag *)(mtag + 1);
- dt->rule = fwa->rule;
- dt->rule.info &= IPFW_ONEPASS; /* only keep this info */
- dt->dn_dir = dir;
- dt->ifp = fwa->oif;
- /* dt->output tame is updated as we move through */
- dt->output_time = dn_cfg.curr_time;
- return 0;
-}
-
-
-/*
- * dummynet hook for packets.
- * We use the argument to locate the flowset fs and the sched_set sch
- * associated to it. The we apply flow_mask and sched_mask to
- * determine the queue and scheduler instances.
- *
- * dir where shall we send the packet after dummynet.
- * *m0 the mbuf with the packet
- * ifp the 'ifp' parameter from the caller.
- * NULL in ip_input, destination interface in ip_output,
- */
-int
-dummynet_io(struct mbuf **m0, int dir, struct ip_fw_args *fwa)
-{
- struct mbuf *m = *m0;
- struct dn_fsk *fs = NULL;
- struct dn_sch_inst *si;
- struct dn_queue *q = NULL; /* default */
-
- int fs_id = (fwa->rule.info & IPFW_INFO_MASK) +
- ((fwa->rule.info & IPFW_IS_PIPE) ? 2*DN_MAX_ID : 0);
- DN_BH_WLOCK();
- io_pkt++;
- /* we could actually tag outside the lock, but who cares... */
- if (tag_mbuf(m, dir, fwa))
- goto dropit;
- if (dn_cfg.busy) {
- /* if the upper half is busy doing something expensive,
- * lets queue the packet and move forward
- */
- mq_append(&dn_cfg.pending, m);
- m = *m0 = NULL; /* consumed */
- goto done; /* already active, nothing to do */
- }
- /* XXX locate_flowset could be optimised with a direct ref. */
- fs = dn_ht_find(dn_cfg.fshash, fs_id, 0, NULL);
- if (fs == NULL)
- goto dropit; /* This queue/pipe does not exist! */
- if (fs->sched == NULL) /* should not happen */
- goto dropit;
- /* find scheduler instance, possibly applying sched_mask */
- si = ipdn_si_find(fs->sched, &(fwa->f_id));
- if (si == NULL)
- goto dropit;
- /*
- * If the scheduler supports multiple queues, find the right one
- * (otherwise it will be ignored by enqueue).
- */
- if (fs->sched->fp->flags & DN_MULTIQUEUE) {
- q = ipdn_q_find(fs, si, &(fwa->f_id));
- if (q == NULL)
- goto dropit;
- }
- if (fs->sched->fp->enqueue(si, q, m)) {
- /* packet was dropped by enqueue() */
- m = *m0 = NULL;
- goto dropit;
- }
-
- if (si->kflags & DN_ACTIVE) {
- m = *m0 = NULL; /* consumed */
- goto done; /* already active, nothing to do */
- }
-
- /* compute the initial allowance */
- if (si->idle_time < dn_cfg.curr_time) {
- /* Do this only on the first packet on an idle pipe */
- struct dn_link *p = &fs->sched->link;
-
- si->sched_time = dn_cfg.curr_time;
- si->credit = dn_cfg.io_fast ? p->bandwidth : 0;
- if (p->burst) {
- uint64_t burst = (dn_cfg.curr_time - si->idle_time) * p->bandwidth;
- if (burst > p->burst)
- burst = p->burst;
- si->credit += burst;
- }
- }
- /* pass through scheduler and delay line */
- m = serve_sched(NULL, si, dn_cfg.curr_time);
-
- /* optimization -- pass it back to ipfw for immediate send */
- /* XXX Don't call dummynet_send() if scheduler return the packet
- * just enqueued. This avoid a lock order reversal.
- *
- */
- if (/*dn_cfg.io_fast &&*/ m == *m0 && (dir & PROTO_LAYER2) == 0 ) {
- /* fast io, rename the tag * to carry reinject info. */
- struct m_tag *tag = m_tag_first(m);
-
- tag->m_tag_cookie = MTAG_IPFW_RULE;
- tag->m_tag_id = 0;
- io_pkt_fast++;
- if (m->m_nextpkt != NULL) {
- printf("dummynet: fast io: pkt chain detected!\n");
- m->m_nextpkt = NULL;
- }
- m = NULL;
- } else {
- *m0 = NULL;
- }
-done:
- DN_BH_WUNLOCK();
- if (m)
- dummynet_send(m);
- return 0;
-
-dropit:
- io_pkt_drop++;
- DN_BH_WUNLOCK();
- if (m)
- FREE_PKT(m);
- *m0 = NULL;
- return (fs && (fs->fs.flags & DN_NOERROR)) ? 0 : ENOBUFS;
-}
diff --git a/sys/netinet/ipfw/ip_dn_private.h b/sys/netinet/ipfw/ip_dn_private.h
deleted file mode 100644
index 159ddc9..0000000
--- a/sys/netinet/ipfw/ip_dn_private.h
+++ /dev/null
@@ -1,403 +0,0 @@
-/*-
- * Copyright (c) 2010 Luigi Rizzo, Riccardo Panicucci, Universita` di Pisa
- * All rights reserved
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-/*
- * internal dummynet APIs.
- *
- * $FreeBSD$
- */
-
-#ifndef _IP_DN_PRIVATE_H
-#define _IP_DN_PRIVATE_H
-
-/* debugging support
- * use ND() to remove debugging, D() to print a line,
- * DX(level, ...) to print above a certain level
- * If you redefine D() you are expected to redefine all.
- */
-#ifndef D
-#define ND(fmt, ...) do {} while (0)
-#define D1(fmt, ...) do {} while (0)
-#define D(fmt, ...) printf("%-10s " fmt "\n", \
- __FUNCTION__, ## __VA_ARGS__)
-#define DX(lev, fmt, ...) do { \
- if (dn_cfg.debug > lev) D(fmt, ## __VA_ARGS__); } while (0)
-#endif
-
-MALLOC_DECLARE(M_DUMMYNET);
-
-#ifndef __linux__
-#define div64(a, b) ((int64_t)(a) / (int64_t)(b))
-#endif
-
-#define DN_LOCK_INIT() do { \
- mtx_init(&dn_cfg.uh_mtx, "dn_uh", NULL, MTX_DEF); \
- mtx_init(&dn_cfg.bh_mtx, "dn_bh", NULL, MTX_DEF); \
- } while (0)
-#define DN_LOCK_DESTROY() do { \
- mtx_destroy(&dn_cfg.uh_mtx); \
- mtx_destroy(&dn_cfg.bh_mtx); \
- } while (0)
-#if 0 /* not used yet */
-#define DN_UH_RLOCK() mtx_lock(&dn_cfg.uh_mtx)
-#define DN_UH_RUNLOCK() mtx_unlock(&dn_cfg.uh_mtx)
-#define DN_UH_WLOCK() mtx_lock(&dn_cfg.uh_mtx)
-#define DN_UH_WUNLOCK() mtx_unlock(&dn_cfg.uh_mtx)
-#define DN_UH_LOCK_ASSERT() mtx_assert(&dn_cfg.uh_mtx, MA_OWNED)
-#endif
-
-#define DN_BH_RLOCK() mtx_lock(&dn_cfg.uh_mtx)
-#define DN_BH_RUNLOCK() mtx_unlock(&dn_cfg.uh_mtx)
-#define DN_BH_WLOCK() mtx_lock(&dn_cfg.uh_mtx)
-#define DN_BH_WUNLOCK() mtx_unlock(&dn_cfg.uh_mtx)
-#define DN_BH_LOCK_ASSERT() mtx_assert(&dn_cfg.uh_mtx, MA_OWNED)
-
-SLIST_HEAD(dn_schk_head, dn_schk);
-SLIST_HEAD(dn_sch_inst_head, dn_sch_inst);
-SLIST_HEAD(dn_fsk_head, dn_fsk);
-SLIST_HEAD(dn_queue_head, dn_queue);
-SLIST_HEAD(dn_alg_head, dn_alg);
-
-struct mq { /* a basic queue of packets*/
- struct mbuf *head, *tail;
-};
-
-static inline void
-set_oid(struct dn_id *o, int type, int len)
-{
- o->type = type;
- o->len = len;
- o->subtype = 0;
-};
-
-/*
- * configuration and global data for a dummynet instance
- *
- * When a configuration is modified from userland, 'id' is incremented
- * so we can use the value to check for stale pointers.
- */
-struct dn_parms {
- uint32_t id; /* configuration version */
-
- /* defaults (sysctl-accessible) */
- int red_lookup_depth;
- int red_avg_pkt_size;
- int red_max_pkt_size;
- int hash_size;
- int max_hash_size;
- long byte_limit; /* max queue sizes */
- long slot_limit;
-
- int io_fast;
- int debug;
-
- /* timekeeping */
- struct timeval prev_t; /* last time dummynet_tick ran */
- struct dn_heap evheap; /* scheduled events */
-
- /* counters of objects -- used for reporting space */
- int schk_count;
- int si_count;
- int fsk_count;
- int queue_count;
-
- /* ticks and other stuff */
- uint64_t curr_time;
- /* flowsets and schedulers are in hash tables, with 'hash_size'
- * buckets. fshash is looked up at every packet arrival
- * so better be generous if we expect many entries.
- */
- struct dn_ht *fshash;
- struct dn_ht *schedhash;
- /* list of flowsets without a scheduler -- use sch_chain */
- struct dn_fsk_head fsu; /* list of unlinked flowsets */
- struct dn_alg_head schedlist; /* list of algorithms */
-
- /* Store the fs/sch to scan when draining. The value is the
- * bucket number of the hash table. Expire can be disabled
- * with net.inet.ip.dummynet.expire=0, or it happens every
- * expire ticks.
- **/
- int drain_fs;
- int drain_sch;
- uint32_t expire;
- uint32_t expire_cycle; /* tick count */
-
- int init_done;
-
- /* if the upper half is busy doing something long,
- * can set the busy flag and we will enqueue packets in
- * a queue for later processing.
- */
- int busy;
- struct mq pending;
-
-#ifdef _KERNEL
- /*
- * This file is normally used in the kernel, unless we do
- * some userland tests, in which case we do not need a mtx.
- * uh_mtx arbitrates between system calls and also
- * protects fshash, schedhash and fsunlinked.
- * These structures are readonly for the lower half.
- * bh_mtx protects all other structures which may be
- * modified upon packet arrivals
- */
-#if defined( __linux__ ) || defined( _WIN32 )
- spinlock_t uh_mtx;
- spinlock_t bh_mtx;
-#else
- struct mtx uh_mtx;
- struct mtx bh_mtx;
-#endif
-
-#endif /* _KERNEL */
-};
-
-/*
- * Delay line, contains all packets on output from a link.
- * Every scheduler instance has one.
- */
-struct delay_line {
- struct dn_id oid;
- struct dn_sch_inst *si;
- struct mq mq;
-};
-
-/*
- * The kernel side of a flowset. It is linked in a hash table
- * of flowsets, and in a list of children of their parent scheduler.
- * qht is either the queue or (if HAVE_MASK) a hash table queues.
- * Note that the mask to use is the (flow_mask|sched_mask), which
- * changes as we attach/detach schedulers. So we store it here.
- *
- * XXX If we want to add scheduler-specific parameters, we need to
- * put them in external storage because the scheduler may not be
- * available when the fsk is created.
- */
-struct dn_fsk { /* kernel side of a flowset */
- struct dn_fs fs;
- SLIST_ENTRY(dn_fsk) fsk_next; /* hash chain for fshash */
-
- struct ipfw_flow_id fsk_mask;
-
- /* qht is a hash table of queues, or just a single queue
- * a bit in fs.flags tells us which one
- */
- struct dn_ht *qht;
- struct dn_schk *sched; /* Sched we are linked to */
- SLIST_ENTRY(dn_fsk) sch_chain; /* list of fsk attached to sched */
-
- /* bucket index used by drain routine to drain queues for this
- * flowset
- */
- int drain_bucket;
- /* Parameter realted to RED / GRED */
- /* original values are in dn_fs*/
- int w_q ; /* queue weight (scaled) */
- int max_th ; /* maximum threshold for queue (scaled) */
- int min_th ; /* minimum threshold for queue (scaled) */
- int max_p ; /* maximum value for p_b (scaled) */
-
- u_int c_1 ; /* max_p/(max_th-min_th) (scaled) */
- u_int c_2 ; /* max_p*min_th/(max_th-min_th) (scaled) */
- u_int c_3 ; /* for GRED, (1-max_p)/max_th (scaled) */
- u_int c_4 ; /* for GRED, 1 - 2*max_p (scaled) */
- u_int * w_q_lookup ; /* lookup table for computing (1-w_q)^t */
- u_int lookup_depth ; /* depth of lookup table */
- int lookup_step ; /* granularity inside the lookup table */
- int lookup_weight ; /* equal to (1-w_q)^t / (1-w_q)^(t+1) */
- int avg_pkt_size ; /* medium packet size */
- int max_pkt_size ; /* max packet size */
-};
-
-/*
- * A queue is created as a child of a flowset unless it belongs to
- * a !MULTIQUEUE scheduler. It is normally in a hash table in the
- * flowset. fs always points to the parent flowset.
- * si normally points to the sch_inst, unless the flowset has been
- * detached from the scheduler -- in this case si == NULL and we
- * should not enqueue.
- */
-struct dn_queue {
- struct dn_flow ni; /* oid, flow_id, stats */
- struct mq mq; /* packets queue */
- struct dn_sch_inst *_si; /* owner scheduler instance */
- SLIST_ENTRY(dn_queue) q_next; /* hash chain list for qht */
- struct dn_fsk *fs; /* parent flowset. */
-
- /* RED parameters */
- int avg; /* average queue length est. (scaled) */
- int count; /* arrivals since last RED drop */
- int random; /* random value (scaled) */
- uint64_t q_time; /* start of queue idle time */
-
-};
-
-/*
- * The kernel side of a scheduler. Contains the userland config,
- * a link, pointer to extra config arguments from command line,
- * kernel flags, and a pointer to the scheduler methods.
- * It is stored in a hash table, and holds a list of all
- * flowsets and scheduler instances.
- * XXX sch must be at the beginning, see schk_hash().
- */
-struct dn_schk {
- struct dn_sch sch;
- struct dn_alg *fp; /* Pointer to scheduler functions */
- struct dn_link link; /* The link, embedded */
- struct dn_profile *profile; /* delay profile, if any */
- struct dn_id *cfg; /* extra config arguments */
-
- SLIST_ENTRY(dn_schk) schk_next; /* hash chain for schedhash */
-
- struct dn_fsk_head fsk_list; /* all fsk linked to me */
- struct dn_fsk *fs; /* Flowset for !MULTIQUEUE */
-
- /* bucket index used by the drain routine to drain the scheduler
- * instance for this flowset.
- */
- int drain_bucket;
-
- /* Hash table of all instances (through sch.sched_mask)
- * or single instance if no mask. Always valid.
- */
- struct dn_ht *siht;
-};
-
-
-/*
- * Scheduler instance.
- * Contains variables and all queues relative to a this instance.
- * This struct is created a runtime.
- */
-struct dn_sch_inst {
- struct dn_flow ni; /* oid, flowid and stats */
- SLIST_ENTRY(dn_sch_inst) si_next; /* hash chain for siht */
- struct delay_line dline;
- struct dn_schk *sched; /* the template */
- int kflags; /* DN_ACTIVE */
-
- int64_t credit; /* bits I can transmit (more or less). */
- uint64_t sched_time; /* time link was scheduled in ready_heap */
- uint64_t idle_time; /* start of scheduler instance idle time */
-
- /* q_count is the number of queues that this instance is using.
- * The counter is incremented or decremented when
- * a reference from the queue is created or deleted.
- * It is used to make sure that a scheduler instance can be safely
- * deleted by the drain routine. See notes below.
- */
- int q_count;
-
-};
-
-/*
- * NOTE about object drain.
- * The system will automatically (XXX check when) drain queues and
- * scheduler instances when they are idle.
- * A queue is idle when it has no packets; an instance is idle when
- * it is not in the evheap heap, and the corresponding delay line is empty.
- * A queue can be safely deleted when it is idle because of the scheduler
- * function xxx_free_queue() will remove any references to it.
- * An instance can be only deleted when no queues reference it. To be sure
- * of that, a counter (q_count) stores the number of queues that are pointing
- * to the instance.
- *
- * XXX
- * Order of scan:
- * - take all flowset in a bucket for the flowset hash table
- * - take all queues in a bucket for the flowset
- * - increment the queue bucket
- * - scan next flowset bucket
- * Nothing is done if a bucket contains no entries.
- *
- * The same schema is used for sceduler instances
- */
-
-
-/* kernel-side flags. Linux has DN_DELETE in fcntl.h
- */
-enum {
- /* 1 and 2 are reserved for the SCAN flags */
- DN_DESTROY = 0x0004, /* destroy */
- DN_DELETE_FS = 0x0008, /* destroy flowset */
- DN_DETACH = 0x0010,
- DN_ACTIVE = 0x0020, /* object is in evheap */
- DN_F_DLINE = 0x0040, /* object is a delay line */
- DN_DEL_SAFE = 0x0080, /* delete a queue only if no longer needed
- * by scheduler */
- DN_QHT_IS_Q = 0x0100, /* in flowset, qht is a single queue */
-};
-
-extern struct dn_parms dn_cfg;
-//VNET_DECLARE(struct dn_parms, _base_dn_cfg);
-//#define dn_cfg VNET(_base_dn_cfg)
-
-int dummynet_io(struct mbuf **, int , struct ip_fw_args *);
-void dummynet_task(void *context, int pending);
-void dn_reschedule(void);
-
-struct dn_queue *ipdn_q_find(struct dn_fsk *, struct dn_sch_inst *,
- struct ipfw_flow_id *);
-struct dn_sch_inst *ipdn_si_find(struct dn_schk *, struct ipfw_flow_id *);
-
-/*
- * copy_range is a template for requests for ranges of pipes/queues/scheds.
- * The number of ranges is variable and can be derived by o.len.
- * As a default, we use a small number of entries so that the struct
- * fits easily on the stack and is sufficient for most common requests.
- */
-#define DEFAULT_RANGES 5
-struct copy_range {
- struct dn_id o;
- uint32_t r[ 2 * DEFAULT_RANGES ];
-};
-
-struct copy_args {
- char **start;
- char *end;
- int flags;
- int type;
- struct copy_range *extra; /* extra filtering */
-};
-
-struct sockopt;
-int ip_dummynet_compat(struct sockopt *sopt);
-int dummynet_get(struct sockopt *sopt, void **compat);
-int dn_c_copy_q (void *_ni, void *arg);
-int dn_c_copy_pipe(struct dn_schk *s, struct copy_args *a, int nq);
-int dn_c_copy_fs(struct dn_fsk *f, struct copy_args *a, int nq);
-int dn_compat_copy_queue(struct copy_args *a, void *_o);
-int dn_compat_copy_pipe(struct copy_args *a, void *_o);
-int copy_data_helper_compat(void *_o, void *_arg);
-int dn_compat_calc_size(void);
-int do_config(void *p, int l);
-
-/* function to drain idle object */
-void dn_drain_scheduler(void);
-void dn_drain_queue(void);
-
-#endif /* _IP_DN_PRIVATE_H */
diff --git a/sys/netinet/ipfw/ip_dummynet.c b/sys/netinet/ipfw/ip_dummynet.c
deleted file mode 100644
index e1c7a08..0000000
--- a/sys/netinet/ipfw/ip_dummynet.c
+++ /dev/null
@@ -1,2314 +0,0 @@
-/*-
- * Copyright (c) 1998-2002,2010 Luigi Rizzo, Universita` di Pisa
- * Portions Copyright (c) 2000 Akamba Corp.
- * All rights reserved
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-/*
- * Configuration and internal object management for dummynet.
- */
-
-#include "opt_inet6.h"
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/malloc.h>
-#include <sys/mbuf.h>
-#include <sys/kernel.h>
-#include <sys/lock.h>
-#include <sys/module.h>
-#include <sys/priv.h>
-#include <sys/proc.h>
-#include <sys/rwlock.h>
-#include <sys/socket.h>
-#include <sys/socketvar.h>
-#include <sys/time.h>
-#include <sys/taskqueue.h>
-#include <net/if.h> /* IFNAMSIZ, struct ifaddr, ifq head, lock.h mutex.h */
-#include <netinet/in.h>
-#include <netinet/ip_var.h> /* ip_output(), IP_FORWARDING */
-#include <netinet/ip_fw.h>
-#include <netinet/ipfw/ip_fw_private.h>
-#include <netinet/ipfw/dn_heap.h>
-#include <netinet/ip_dummynet.h>
-#include <netinet/ipfw/ip_dn_private.h>
-#include <netinet/ipfw/dn_sched.h>
-
-/* which objects to copy */
-#define DN_C_LINK 0x01
-#define DN_C_SCH 0x02
-#define DN_C_FLOW 0x04
-#define DN_C_FS 0x08
-#define DN_C_QUEUE 0x10
-
-/* we use this argument in case of a schk_new */
-struct schk_new_arg {
- struct dn_alg *fp;
- struct dn_sch *sch;
-};
-
-/*---- callout hooks. ----*/
-static struct callout dn_timeout;
-static struct task dn_task;
-static struct taskqueue *dn_tq = NULL;
-
-static void
-dummynet(void *arg)
-{
-
- (void)arg; /* UNUSED */
- taskqueue_enqueue(dn_tq, &dn_task);
-}
-
-void
-dn_reschedule(void)
-{
- callout_reset(&dn_timeout, 1, dummynet, NULL);
-}
-/*----- end of callout hooks -----*/
-
-/* Return a scheduler descriptor given the type or name. */
-static struct dn_alg *
-find_sched_type(int type, char *name)
-{
- struct dn_alg *d;
-
- SLIST_FOREACH(d, &dn_cfg.schedlist, next) {
- if (d->type == type || (name && !strcasecmp(d->name, name)))
- return d;
- }
- return NULL; /* not found */
-}
-
-int
-ipdn_bound_var(int *v, int dflt, int lo, int hi, const char *msg)
-{
- int oldv = *v;
- const char *op = NULL;
- if (dflt < lo)
- dflt = lo;
- if (dflt > hi)
- dflt = hi;
- if (oldv < lo) {
- *v = dflt;
- op = "Bump";
- } else if (oldv > hi) {
- *v = hi;
- op = "Clamp";
- } else
- return *v;
- if (op && msg)
- printf("%s %s to %d (was %d)\n", op, msg, *v, oldv);
- return *v;
-}
-
-/*---- flow_id mask, hash and compare functions ---*/
-/*
- * The flow_id includes the 5-tuple, the queue/pipe number
- * which we store in the extra area in host order,
- * and for ipv6 also the flow_id6.
- * XXX see if we want the tos byte (can store in 'flags')
- */
-static struct ipfw_flow_id *
-flow_id_mask(struct ipfw_flow_id *mask, struct ipfw_flow_id *id)
-{
- int is_v6 = IS_IP6_FLOW_ID(id);
-
- id->dst_port &= mask->dst_port;
- id->src_port &= mask->src_port;
- id->proto &= mask->proto;
- id->extra &= mask->extra;
- if (is_v6) {
- APPLY_MASK(&id->dst_ip6, &mask->dst_ip6);
- APPLY_MASK(&id->src_ip6, &mask->src_ip6);
- id->flow_id6 &= mask->flow_id6;
- } else {
- id->dst_ip &= mask->dst_ip;
- id->src_ip &= mask->src_ip;
- }
- return id;
-}
-
-/* computes an OR of two masks, result in dst and also returned */
-static struct ipfw_flow_id *
-flow_id_or(struct ipfw_flow_id *src, struct ipfw_flow_id *dst)
-{
- int is_v6 = IS_IP6_FLOW_ID(dst);
-
- dst->dst_port |= src->dst_port;
- dst->src_port |= src->src_port;
- dst->proto |= src->proto;
- dst->extra |= src->extra;
- if (is_v6) {
-#define OR_MASK(_d, _s) \
- (_d)->__u6_addr.__u6_addr32[0] |= (_s)->__u6_addr.__u6_addr32[0]; \
- (_d)->__u6_addr.__u6_addr32[1] |= (_s)->__u6_addr.__u6_addr32[1]; \
- (_d)->__u6_addr.__u6_addr32[2] |= (_s)->__u6_addr.__u6_addr32[2]; \
- (_d)->__u6_addr.__u6_addr32[3] |= (_s)->__u6_addr.__u6_addr32[3];
- OR_MASK(&dst->dst_ip6, &src->dst_ip6);
- OR_MASK(&dst->src_ip6, &src->src_ip6);
-#undef OR_MASK
- dst->flow_id6 |= src->flow_id6;
- } else {
- dst->dst_ip |= src->dst_ip;
- dst->src_ip |= src->src_ip;
- }
- return dst;
-}
-
-static int
-nonzero_mask(struct ipfw_flow_id *m)
-{
- if (m->dst_port || m->src_port || m->proto || m->extra)
- return 1;
- if (IS_IP6_FLOW_ID(m)) {
- return
- m->dst_ip6.__u6_addr.__u6_addr32[0] ||
- m->dst_ip6.__u6_addr.__u6_addr32[1] ||
- m->dst_ip6.__u6_addr.__u6_addr32[2] ||
- m->dst_ip6.__u6_addr.__u6_addr32[3] ||
- m->src_ip6.__u6_addr.__u6_addr32[0] ||
- m->src_ip6.__u6_addr.__u6_addr32[1] ||
- m->src_ip6.__u6_addr.__u6_addr32[2] ||
- m->src_ip6.__u6_addr.__u6_addr32[3] ||
- m->flow_id6;
- } else {
- return m->dst_ip || m->src_ip;
- }
-}
-
-/* XXX we may want a better hash function */
-static uint32_t
-flow_id_hash(struct ipfw_flow_id *id)
-{
- uint32_t i;
-
- if (IS_IP6_FLOW_ID(id)) {
- uint32_t *d = (uint32_t *)&id->dst_ip6;
- uint32_t *s = (uint32_t *)&id->src_ip6;
- i = (d[0] ) ^ (d[1]) ^
- (d[2] ) ^ (d[3]) ^
- (d[0] >> 15) ^ (d[1] >> 15) ^
- (d[2] >> 15) ^ (d[3] >> 15) ^
- (s[0] << 1) ^ (s[1] << 1) ^
- (s[2] << 1) ^ (s[3] << 1) ^
- (s[0] << 16) ^ (s[1] << 16) ^
- (s[2] << 16) ^ (s[3] << 16) ^
- (id->dst_port << 1) ^ (id->src_port) ^
- (id->extra) ^
- (id->proto ) ^ (id->flow_id6);
- } else {
- i = (id->dst_ip) ^ (id->dst_ip >> 15) ^
- (id->src_ip << 1) ^ (id->src_ip >> 16) ^
- (id->extra) ^
- (id->dst_port << 1) ^ (id->src_port) ^ (id->proto);
- }
- return i;
-}
-
-/* Like bcmp, returns 0 if ids match, 1 otherwise. */
-static int
-flow_id_cmp(struct ipfw_flow_id *id1, struct ipfw_flow_id *id2)
-{
- int is_v6 = IS_IP6_FLOW_ID(id1);
-
- if (!is_v6) {
- if (IS_IP6_FLOW_ID(id2))
- return 1; /* different address families */
-
- return (id1->dst_ip == id2->dst_ip &&
- id1->src_ip == id2->src_ip &&
- id1->dst_port == id2->dst_port &&
- id1->src_port == id2->src_port &&
- id1->proto == id2->proto &&
- id1->extra == id2->extra) ? 0 : 1;
- }
- /* the ipv6 case */
- return (
- !bcmp(&id1->dst_ip6,&id2->dst_ip6, sizeof(id1->dst_ip6)) &&
- !bcmp(&id1->src_ip6,&id2->src_ip6, sizeof(id1->src_ip6)) &&
- id1->dst_port == id2->dst_port &&
- id1->src_port == id2->src_port &&
- id1->proto == id2->proto &&
- id1->extra == id2->extra &&
- id1->flow_id6 == id2->flow_id6) ? 0 : 1;
-}
-/*--------- end of flow-id mask, hash and compare ---------*/
-
-/*--- support functions for the qht hashtable ----
- * Entries are hashed by flow-id
- */
-static uint32_t
-q_hash(uintptr_t key, int flags, void *arg)
-{
- /* compute the hash slot from the flow id */
- struct ipfw_flow_id *id = (flags & DNHT_KEY_IS_OBJ) ?
- &((struct dn_queue *)key)->ni.fid :
- (struct ipfw_flow_id *)key;
-
- return flow_id_hash(id);
-}
-
-static int
-q_match(void *obj, uintptr_t key, int flags, void *arg)
-{
- struct dn_queue *o = (struct dn_queue *)obj;
- struct ipfw_flow_id *id2;
-
- if (flags & DNHT_KEY_IS_OBJ) {
- /* compare pointers */
- id2 = &((struct dn_queue *)key)->ni.fid;
- } else {
- id2 = (struct ipfw_flow_id *)key;
- }
- return (0 == flow_id_cmp(&o->ni.fid, id2));
-}
-
-/*
- * create a new queue instance for the given 'key'.
- */
-static void *
-q_new(uintptr_t key, int flags, void *arg)
-{
- struct dn_queue *q, *template = arg;
- struct dn_fsk *fs = template->fs;
- int size = sizeof(*q) + fs->sched->fp->q_datalen;
-
- q = malloc(size, M_DUMMYNET, M_NOWAIT | M_ZERO);
- if (q == NULL) {
- D("no memory for new queue");
- return NULL;
- }
-
- set_oid(&q->ni.oid, DN_QUEUE, size);
- if (fs->fs.flags & DN_QHT_HASH)
- q->ni.fid = *(struct ipfw_flow_id *)key;
- q->fs = fs;
- q->_si = template->_si;
- q->_si->q_count++;
-
- if (fs->sched->fp->new_queue)
- fs->sched->fp->new_queue(q);
- dn_cfg.queue_count++;
- return q;
-}
-
-/*
- * Notify schedulers that a queue is going away.
- * If (flags & DN_DESTROY), also free the packets.
- * The version for callbacks is called q_delete_cb().
- */
-static void
-dn_delete_queue(struct dn_queue *q, int flags)
-{
- struct dn_fsk *fs = q->fs;
-
- // D("fs %p si %p\n", fs, q->_si);
- /* notify the parent scheduler that the queue is going away */
- if (fs && fs->sched->fp->free_queue)
- fs->sched->fp->free_queue(q);
- q->_si->q_count--;
- q->_si = NULL;
- if (flags & DN_DESTROY) {
- if (q->mq.head)
- dn_free_pkts(q->mq.head);
- bzero(q, sizeof(*q)); // safety
- free(q, M_DUMMYNET);
- dn_cfg.queue_count--;
- }
-}
-
-static int
-q_delete_cb(void *q, void *arg)
-{
- int flags = (int)(uintptr_t)arg;
- dn_delete_queue(q, flags);
- return (flags & DN_DESTROY) ? DNHT_SCAN_DEL : 0;
-}
-
-/*
- * calls dn_delete_queue/q_delete_cb on all queues,
- * which notifies the parent scheduler and possibly drains packets.
- * flags & DN_DESTROY: drains queues and destroy qht;
- */
-static void
-qht_delete(struct dn_fsk *fs, int flags)
-{
- ND("fs %d start flags %d qht %p",
- fs->fs.fs_nr, flags, fs->qht);
- if (!fs->qht)
- return;
- if (fs->fs.flags & DN_QHT_HASH) {
- dn_ht_scan(fs->qht, q_delete_cb, (void *)(uintptr_t)flags);
- if (flags & DN_DESTROY) {
- dn_ht_free(fs->qht, 0);
- fs->qht = NULL;
- }
- } else {
- dn_delete_queue((struct dn_queue *)(fs->qht), flags);
- if (flags & DN_DESTROY)
- fs->qht = NULL;
- }
-}
-
-/*
- * Find and possibly create the queue for a MULTIQUEUE scheduler.
- * We never call it for !MULTIQUEUE (the queue is in the sch_inst).
- */
-struct dn_queue *
-ipdn_q_find(struct dn_fsk *fs, struct dn_sch_inst *si,
- struct ipfw_flow_id *id)
-{
- struct dn_queue template;
-
- template._si = si;
- template.fs = fs;
-
- if (fs->fs.flags & DN_QHT_HASH) {
- struct ipfw_flow_id masked_id;
- if (fs->qht == NULL) {
- fs->qht = dn_ht_init(NULL, fs->fs.buckets,
- offsetof(struct dn_queue, q_next),
- q_hash, q_match, q_new);
- if (fs->qht == NULL)
- return NULL;
- }
- masked_id = *id;
- flow_id_mask(&fs->fsk_mask, &masked_id);
- return dn_ht_find(fs->qht, (uintptr_t)&masked_id,
- DNHT_INSERT, &template);
- } else {
- if (fs->qht == NULL)
- fs->qht = q_new(0, 0, &template);
- return (struct dn_queue *)fs->qht;
- }
-}
-/*--- end of queue hash table ---*/
-
-/*--- support functions for the sch_inst hashtable ----
- *
- * These are hashed by flow-id
- */
-static uint32_t
-si_hash(uintptr_t key, int flags, void *arg)
-{
- /* compute the hash slot from the flow id */
- struct ipfw_flow_id *id = (flags & DNHT_KEY_IS_OBJ) ?
- &((struct dn_sch_inst *)key)->ni.fid :
- (struct ipfw_flow_id *)key;
-
- return flow_id_hash(id);
-}
-
-static int
-si_match(void *obj, uintptr_t key, int flags, void *arg)
-{
- struct dn_sch_inst *o = obj;
- struct ipfw_flow_id *id2;
-
- id2 = (flags & DNHT_KEY_IS_OBJ) ?
- &((struct dn_sch_inst *)key)->ni.fid :
- (struct ipfw_flow_id *)key;
- return flow_id_cmp(&o->ni.fid, id2) == 0;
-}
-
-/*
- * create a new instance for the given 'key'
- * Allocate memory for instance, delay line and scheduler private data.
- */
-static void *
-si_new(uintptr_t key, int flags, void *arg)
-{
- struct dn_schk *s = arg;
- struct dn_sch_inst *si;
- int l = sizeof(*si) + s->fp->si_datalen;
-
- si = malloc(l, M_DUMMYNET, M_NOWAIT | M_ZERO);
- if (si == NULL)
- goto error;
-
- /* Set length only for the part passed up to userland. */
- set_oid(&si->ni.oid, DN_SCH_I, sizeof(struct dn_flow));
- set_oid(&(si->dline.oid), DN_DELAY_LINE,
- sizeof(struct delay_line));
- /* mark si and dline as outside the event queue */
- si->ni.oid.id = si->dline.oid.id = -1;
-
- si->sched = s;
- si->dline.si = si;
-
- if (s->fp->new_sched && s->fp->new_sched(si)) {
- D("new_sched error");
- goto error;
- }
- if (s->sch.flags & DN_HAVE_MASK)
- si->ni.fid = *(struct ipfw_flow_id *)key;
-
- dn_cfg.si_count++;
- return si;
-
-error:
- if (si) {
- bzero(si, sizeof(*si)); // safety
- free(si, M_DUMMYNET);
- }
- return NULL;
-}
-
-/*
- * Callback from siht to delete all scheduler instances. Remove
- * si and delay line from the system heap, destroy all queues.
- * We assume that all flowset have been notified and do not
- * point to us anymore.
- */
-static int
-si_destroy(void *_si, void *arg)
-{
- struct dn_sch_inst *si = _si;
- struct dn_schk *s = si->sched;
- struct delay_line *dl = &si->dline;
-
- if (dl->oid.subtype) /* remove delay line from event heap */
- heap_extract(&dn_cfg.evheap, dl);
- dn_free_pkts(dl->mq.head); /* drain delay line */
- if (si->kflags & DN_ACTIVE) /* remove si from event heap */
- heap_extract(&dn_cfg.evheap, si);
- if (s->fp->free_sched)
- s->fp->free_sched(si);
- bzero(si, sizeof(*si)); /* safety */
- free(si, M_DUMMYNET);
- dn_cfg.si_count--;
- return DNHT_SCAN_DEL;
-}
-
-/*
- * Find the scheduler instance for this packet. If we need to apply
- * a mask, do on a local copy of the flow_id to preserve the original.
- * Assume siht is always initialized if we have a mask.
- */
-struct dn_sch_inst *
-ipdn_si_find(struct dn_schk *s, struct ipfw_flow_id *id)
-{
-
- if (s->sch.flags & DN_HAVE_MASK) {
- struct ipfw_flow_id id_t = *id;
- flow_id_mask(&s->sch.sched_mask, &id_t);
- return dn_ht_find(s->siht, (uintptr_t)&id_t,
- DNHT_INSERT, s);
- }
- if (!s->siht)
- s->siht = si_new(0, 0, s);
- return (struct dn_sch_inst *)s->siht;
-}
-
-/* callback to flush credit for the scheduler instance */
-static int
-si_reset_credit(void *_si, void *arg)
-{
- struct dn_sch_inst *si = _si;
- struct dn_link *p = &si->sched->link;
-
- si->credit = p->burst + (dn_cfg.io_fast ? p->bandwidth : 0);
- return 0;
-}
-
-static void
-schk_reset_credit(struct dn_schk *s)
-{
- if (s->sch.flags & DN_HAVE_MASK)
- dn_ht_scan(s->siht, si_reset_credit, NULL);
- else if (s->siht)
- si_reset_credit(s->siht, NULL);
-}
-/*---- end of sch_inst hashtable ---------------------*/
-
-/*-------------------------------------------------------
- * flowset hash (fshash) support. Entries are hashed by fs_nr.
- * New allocations are put in the fsunlinked list, from which
- * they are removed when they point to a specific scheduler.
- */
-static uint32_t
-fsk_hash(uintptr_t key, int flags, void *arg)
-{
- uint32_t i = !(flags & DNHT_KEY_IS_OBJ) ? key :
- ((struct dn_fsk *)key)->fs.fs_nr;
-
- return ( (i>>8)^(i>>4)^i );
-}
-
-static int
-fsk_match(void *obj, uintptr_t key, int flags, void *arg)
-{
- struct dn_fsk *fs = obj;
- int i = !(flags & DNHT_KEY_IS_OBJ) ? key :
- ((struct dn_fsk *)key)->fs.fs_nr;
-
- return (fs->fs.fs_nr == i);
-}
-
-static void *
-fsk_new(uintptr_t key, int flags, void *arg)
-{
- struct dn_fsk *fs;
-
- fs = malloc(sizeof(*fs), M_DUMMYNET, M_NOWAIT | M_ZERO);
- if (fs) {
- set_oid(&fs->fs.oid, DN_FS, sizeof(fs->fs));
- dn_cfg.fsk_count++;
- fs->drain_bucket = 0;
- SLIST_INSERT_HEAD(&dn_cfg.fsu, fs, sch_chain);
- }
- return fs;
-}
-
-/*
- * detach flowset from its current scheduler. Flags as follows:
- * DN_DETACH removes from the fsk_list
- * DN_DESTROY deletes individual queues
- * DN_DELETE_FS destroys the flowset (otherwise goes in unlinked).
- */
-static void
-fsk_detach(struct dn_fsk *fs, int flags)
-{
- if (flags & DN_DELETE_FS)
- flags |= DN_DESTROY;
- ND("fs %d from sched %d flags %s %s %s",
- fs->fs.fs_nr, fs->fs.sched_nr,
- (flags & DN_DELETE_FS) ? "DEL_FS":"",
- (flags & DN_DESTROY) ? "DEL":"",
- (flags & DN_DETACH) ? "DET":"");
- if (flags & DN_DETACH) { /* detach from the list */
- struct dn_fsk_head *h;
- h = fs->sched ? &fs->sched->fsk_list : &dn_cfg.fsu;
- SLIST_REMOVE(h, fs, dn_fsk, sch_chain);
- }
- /* Free the RED parameters, they will be recomputed on
- * subsequent attach if needed.
- */
- if (fs->w_q_lookup)
- free(fs->w_q_lookup, M_DUMMYNET);
- fs->w_q_lookup = NULL;
- qht_delete(fs, flags);
- if (fs->sched && fs->sched->fp->free_fsk)
- fs->sched->fp->free_fsk(fs);
- fs->sched = NULL;
- if (flags & DN_DELETE_FS) {
- bzero(fs, sizeof(fs)); /* safety */
- free(fs, M_DUMMYNET);
- dn_cfg.fsk_count--;
- } else {
- SLIST_INSERT_HEAD(&dn_cfg.fsu, fs, sch_chain);
- }
-}
-
-/*
- * Detach or destroy all flowsets in a list.
- * flags specifies what to do:
- * DN_DESTROY: flush all queues
- * DN_DELETE_FS: DN_DESTROY + destroy flowset
- * DN_DELETE_FS implies DN_DESTROY
- */
-static void
-fsk_detach_list(struct dn_fsk_head *h, int flags)
-{
- struct dn_fsk *fs;
- int n = 0; /* only for stats */
-
- ND("head %p flags %x", h, flags);
- while ((fs = SLIST_FIRST(h))) {
- SLIST_REMOVE_HEAD(h, sch_chain);
- n++;
- fsk_detach(fs, flags);
- }
- ND("done %d flowsets", n);
-}
-
-/*
- * called on 'queue X delete' -- removes the flowset from fshash,
- * deletes all queues for the flowset, and removes the flowset.
- */
-static int
-delete_fs(int i, int locked)
-{
- struct dn_fsk *fs;
- int err = 0;
-
- if (!locked)
- DN_BH_WLOCK();
- fs = dn_ht_find(dn_cfg.fshash, i, DNHT_REMOVE, NULL);
- ND("fs %d found %p", i, fs);
- if (fs) {
- fsk_detach(fs, DN_DETACH | DN_DELETE_FS);
- err = 0;
- } else
- err = EINVAL;
- if (!locked)
- DN_BH_WUNLOCK();
- return err;
-}
-
-/*----- end of flowset hashtable support -------------*/
-
-/*------------------------------------------------------------
- * Scheduler hash. When searching by index we pass sched_nr,
- * otherwise we pass struct dn_sch * which is the first field in
- * struct dn_schk so we can cast between the two. We use this trick
- * because in the create phase (but it should be fixed).
- */
-static uint32_t
-schk_hash(uintptr_t key, int flags, void *_arg)
-{
- uint32_t i = !(flags & DNHT_KEY_IS_OBJ) ? key :
- ((struct dn_schk *)key)->sch.sched_nr;
- return ( (i>>8)^(i>>4)^i );
-}
-
-static int
-schk_match(void *obj, uintptr_t key, int flags, void *_arg)
-{
- struct dn_schk *s = (struct dn_schk *)obj;
- int i = !(flags & DNHT_KEY_IS_OBJ) ? key :
- ((struct dn_schk *)key)->sch.sched_nr;
- return (s->sch.sched_nr == i);
-}
-
-/*
- * Create the entry and intialize with the sched hash if needed.
- * Leave s->fp unset so we can tell whether a dn_ht_find() returns
- * a new object or a previously existing one.
- */
-static void *
-schk_new(uintptr_t key, int flags, void *arg)
-{
- struct schk_new_arg *a = arg;
- struct dn_schk *s;
- int l = sizeof(*s) +a->fp->schk_datalen;
-
- s = malloc(l, M_DUMMYNET, M_NOWAIT | M_ZERO);
- if (s == NULL)
- return NULL;
- set_oid(&s->link.oid, DN_LINK, sizeof(s->link));
- s->sch = *a->sch; // copy initial values
- s->link.link_nr = s->sch.sched_nr;
- SLIST_INIT(&s->fsk_list);
- /* initialize the hash table or create the single instance */
- s->fp = a->fp; /* si_new needs this */
- s->drain_bucket = 0;
- if (s->sch.flags & DN_HAVE_MASK) {
- s->siht = dn_ht_init(NULL, s->sch.buckets,
- offsetof(struct dn_sch_inst, si_next),
- si_hash, si_match, si_new);
- if (s->siht == NULL) {
- free(s, M_DUMMYNET);
- return NULL;
- }
- }
- s->fp = NULL; /* mark as a new scheduler */
- dn_cfg.schk_count++;
- return s;
-}
-
-/*
- * Callback for sched delete. Notify all attached flowsets to
- * detach from the scheduler, destroy the internal flowset, and
- * all instances. The scheduler goes away too.
- * arg is 0 (only detach flowsets and destroy instances)
- * DN_DESTROY (detach & delete queues, delete schk)
- * or DN_DELETE_FS (delete queues and flowsets, delete schk)
- */
-static int
-schk_delete_cb(void *obj, void *arg)
-{
- struct dn_schk *s = obj;
-#if 0
- int a = (int)arg;
- ND("sched %d arg %s%s",
- s->sch.sched_nr,
- a&DN_DESTROY ? "DEL ":"",
- a&DN_DELETE_FS ? "DEL_FS":"");
-#endif
- fsk_detach_list(&s->fsk_list, arg ? DN_DESTROY : 0);
- /* no more flowset pointing to us now */
- if (s->sch.flags & DN_HAVE_MASK) {
- dn_ht_scan(s->siht, si_destroy, NULL);
- dn_ht_free(s->siht, 0);
- } else if (s->siht)
- si_destroy(s->siht, NULL);
- if (s->profile) {
- free(s->profile, M_DUMMYNET);
- s->profile = NULL;
- }
- s->siht = NULL;
- if (s->fp->destroy)
- s->fp->destroy(s);
- bzero(s, sizeof(*s)); // safety
- free(obj, M_DUMMYNET);
- dn_cfg.schk_count--;
- return DNHT_SCAN_DEL;
-}
-
-/*
- * called on a 'sched X delete' command. Deletes a single scheduler.
- * This is done by removing from the schedhash, unlinking all
- * flowsets and deleting their traffic.
- */
-static int
-delete_schk(int i)
-{
- struct dn_schk *s;
-
- s = dn_ht_find(dn_cfg.schedhash, i, DNHT_REMOVE, NULL);
- ND("%d %p", i, s);
- if (!s)
- return EINVAL;
- delete_fs(i + DN_MAX_ID, 1); /* first delete internal fs */
- /* then detach flowsets, delete traffic */
- schk_delete_cb(s, (void*)(uintptr_t)DN_DESTROY);
- return 0;
-}
-/*--- end of schk hashtable support ---*/
-
-static int
-copy_obj(char **start, char *end, void *_o, const char *msg, int i)
-{
- struct dn_id *o = _o;
- int have = end - *start;
-
- if (have < o->len || o->len == 0 || o->type == 0) {
- D("(WARN) type %d %s %d have %d need %d",
- o->type, msg, i, have, o->len);
- return 1;
- }
- ND("type %d %s %d len %d", o->type, msg, i, o->len);
- bcopy(_o, *start, o->len);
- if (o->type == DN_LINK) {
- /* Adjust burst parameter for link */
- struct dn_link *l = (struct dn_link *)*start;
- l->burst = div64(l->burst, 8 * hz);
- l->delay = l->delay * 1000 / hz;
- } else if (o->type == DN_SCH) {
- /* Set id->id to the number of instances */
- struct dn_schk *s = _o;
- struct dn_id *id = (struct dn_id *)(*start);
- id->id = (s->sch.flags & DN_HAVE_MASK) ?
- dn_ht_entries(s->siht) : (s->siht ? 1 : 0);
- }
- *start += o->len;
- return 0;
-}
-
-/* Specific function to copy a queue.
- * Copies only the user-visible part of a queue (which is in
- * a struct dn_flow), and sets len accordingly.
- */
-static int
-copy_obj_q(char **start, char *end, void *_o, const char *msg, int i)
-{
- struct dn_id *o = _o;
- int have = end - *start;
- int len = sizeof(struct dn_flow); /* see above comment */
-
- if (have < len || o->len == 0 || o->type != DN_QUEUE) {
- D("ERROR type %d %s %d have %d need %d",
- o->type, msg, i, have, len);
- return 1;
- }
- ND("type %d %s %d len %d", o->type, msg, i, len);
- bcopy(_o, *start, len);
- ((struct dn_id*)(*start))->len = len;
- *start += len;
- return 0;
-}
-
-static int
-copy_q_cb(void *obj, void *arg)
-{
- struct dn_queue *q = obj;
- struct copy_args *a = arg;
- struct dn_flow *ni = (struct dn_flow *)(*a->start);
- if (copy_obj_q(a->start, a->end, &q->ni, "queue", -1))
- return DNHT_SCAN_END;
- ni->oid.type = DN_FLOW; /* override the DN_QUEUE */
- ni->oid.id = si_hash((uintptr_t)&ni->fid, 0, NULL);
- return 0;
-}
-
-static int
-copy_q(struct copy_args *a, struct dn_fsk *fs, int flags)
-{
- if (!fs->qht)
- return 0;
- if (fs->fs.flags & DN_QHT_HASH)
- dn_ht_scan(fs->qht, copy_q_cb, a);
- else
- copy_q_cb(fs->qht, a);
- return 0;
-}
-
-/*
- * This routine only copies the initial part of a profile ? XXX
- */
-static int
-copy_profile(struct copy_args *a, struct dn_profile *p)
-{
- int have = a->end - *a->start;
- /* XXX here we check for max length */
- int profile_len = sizeof(struct dn_profile) -
- ED_MAX_SAMPLES_NO*sizeof(int);
-
- if (p == NULL)
- return 0;
- if (have < profile_len) {
- D("error have %d need %d", have, profile_len);
- return 1;
- }
- bcopy(p, *a->start, profile_len);
- ((struct dn_id *)(*a->start))->len = profile_len;
- *a->start += profile_len;
- return 0;
-}
-
-static int
-copy_flowset(struct copy_args *a, struct dn_fsk *fs, int flags)
-{
- struct dn_fs *ufs = (struct dn_fs *)(*a->start);
- if (!fs)
- return 0;
- ND("flowset %d", fs->fs.fs_nr);
- if (copy_obj(a->start, a->end, &fs->fs, "flowset", fs->fs.fs_nr))
- return DNHT_SCAN_END;
- ufs->oid.id = (fs->fs.flags & DN_QHT_HASH) ?
- dn_ht_entries(fs->qht) : (fs->qht ? 1 : 0);
- if (flags) { /* copy queues */
- copy_q(a, fs, 0);
- }
- return 0;
-}
-
-static int
-copy_si_cb(void *obj, void *arg)
-{
- struct dn_sch_inst *si = obj;
- struct copy_args *a = arg;
- struct dn_flow *ni = (struct dn_flow *)(*a->start);
- if (copy_obj(a->start, a->end, &si->ni, "inst",
- si->sched->sch.sched_nr))
- return DNHT_SCAN_END;
- ni->oid.type = DN_FLOW; /* override the DN_SCH_I */
- ni->oid.id = si_hash((uintptr_t)si, DNHT_KEY_IS_OBJ, NULL);
- return 0;
-}
-
-static int
-copy_si(struct copy_args *a, struct dn_schk *s, int flags)
-{
- if (s->sch.flags & DN_HAVE_MASK)
- dn_ht_scan(s->siht, copy_si_cb, a);
- else if (s->siht)
- copy_si_cb(s->siht, a);
- return 0;
-}
-
-/*
- * compute a list of children of a scheduler and copy up
- */
-static int
-copy_fsk_list(struct copy_args *a, struct dn_schk *s, int flags)
-{
- struct dn_fsk *fs;
- struct dn_id *o;
- uint32_t *p;
-
- int n = 0, space = sizeof(*o);
- SLIST_FOREACH(fs, &s->fsk_list, sch_chain) {
- if (fs->fs.fs_nr < DN_MAX_ID)
- n++;
- }
- space += n * sizeof(uint32_t);
- DX(3, "sched %d has %d flowsets", s->sch.sched_nr, n);
- if (a->end - *(a->start) < space)
- return DNHT_SCAN_END;
- o = (struct dn_id *)(*(a->start));
- o->len = space;
- *a->start += o->len;
- o->type = DN_TEXT;
- p = (uint32_t *)(o+1);
- SLIST_FOREACH(fs, &s->fsk_list, sch_chain)
- if (fs->fs.fs_nr < DN_MAX_ID)
- *p++ = fs->fs.fs_nr;
- return 0;
-}
-
-static int
-copy_data_helper(void *_o, void *_arg)
-{
- struct copy_args *a = _arg;
- uint32_t *r = a->extra->r; /* start of first range */
- uint32_t *lim; /* first invalid pointer */
- int n;
-
- lim = (uint32_t *)((char *)(a->extra) + a->extra->o.len);
-
- if (a->type == DN_LINK || a->type == DN_SCH) {
- /* pipe|sched show, we receive a dn_schk */
- struct dn_schk *s = _o;
-
- n = s->sch.sched_nr;
- if (a->type == DN_SCH && n >= DN_MAX_ID)
- return 0; /* not a scheduler */
- if (a->type == DN_LINK && n <= DN_MAX_ID)
- return 0; /* not a pipe */
-
- /* see if the object is within one of our ranges */
- for (;r < lim; r += 2) {
- if (n < r[0] || n > r[1])
- continue;
- /* Found a valid entry, copy and we are done */
- if (a->flags & DN_C_LINK) {
- if (copy_obj(a->start, a->end,
- &s->link, "link", n))
- return DNHT_SCAN_END;
- if (copy_profile(a, s->profile))
- return DNHT_SCAN_END;
- if (copy_flowset(a, s->fs, 0))
- return DNHT_SCAN_END;
- }
- if (a->flags & DN_C_SCH) {
- if (copy_obj(a->start, a->end,
- &s->sch, "sched", n))
- return DNHT_SCAN_END;
- /* list all attached flowsets */
- if (copy_fsk_list(a, s, 0))
- return DNHT_SCAN_END;
- }
- if (a->flags & DN_C_FLOW)
- copy_si(a, s, 0);
- break;
- }
- } else if (a->type == DN_FS) {
- /* queue show, skip internal flowsets */
- struct dn_fsk *fs = _o;
-
- n = fs->fs.fs_nr;
- if (n >= DN_MAX_ID)
- return 0;
- /* see if the object is within one of our ranges */
- for (;r < lim; r += 2) {
- if (n < r[0] || n > r[1])
- continue;
- if (copy_flowset(a, fs, 0))
- return DNHT_SCAN_END;
- copy_q(a, fs, 0);
- break; /* we are done */
- }
- }
- return 0;
-}
-
-static inline struct dn_schk *
-locate_scheduler(int i)
-{
- return dn_ht_find(dn_cfg.schedhash, i, 0, NULL);
-}
-
-/*
- * red parameters are in fixed point arithmetic.
- */
-static int
-config_red(struct dn_fsk *fs)
-{
- int64_t s, idle, weight, w0;
- int t, i;
-
- fs->w_q = fs->fs.w_q;
- fs->max_p = fs->fs.max_p;
- ND("called");
- /* Doing stuff that was in userland */
- i = fs->sched->link.bandwidth;
- s = (i <= 0) ? 0 :
- hz * dn_cfg.red_avg_pkt_size * 8 * SCALE(1) / i;
-
- idle = div64((s * 3) , fs->w_q); /* s, fs->w_q scaled; idle not scaled */
- fs->lookup_step = div64(idle , dn_cfg.red_lookup_depth);
- /* fs->lookup_step not scaled, */
- if (!fs->lookup_step)
- fs->lookup_step = 1;
- w0 = weight = SCALE(1) - fs->w_q; //fs->w_q scaled
-
- for (t = fs->lookup_step; t > 1; --t)
- weight = SCALE_MUL(weight, w0);
- fs->lookup_weight = (int)(weight); // scaled
-
- /* Now doing stuff that was in kerneland */
- fs->min_th = SCALE(fs->fs.min_th);
- fs->max_th = SCALE(fs->fs.max_th);
-
- fs->c_1 = fs->max_p / (fs->fs.max_th - fs->fs.min_th);
- fs->c_2 = SCALE_MUL(fs->c_1, SCALE(fs->fs.min_th));
-
- if (fs->fs.flags & DN_IS_GENTLE_RED) {
- fs->c_3 = (SCALE(1) - fs->max_p) / fs->fs.max_th;
- fs->c_4 = SCALE(1) - 2 * fs->max_p;
- }
-
- /* If the lookup table already exist, free and create it again. */
- if (fs->w_q_lookup) {
- free(fs->w_q_lookup, M_DUMMYNET);
- fs->w_q_lookup = NULL;
- }
- if (dn_cfg.red_lookup_depth == 0) {
- printf("\ndummynet: net.inet.ip.dummynet.red_lookup_depth"
- "must be > 0\n");
- fs->fs.flags &= ~DN_IS_RED;
- fs->fs.flags &= ~DN_IS_GENTLE_RED;
- return (EINVAL);
- }
- fs->lookup_depth = dn_cfg.red_lookup_depth;
- fs->w_q_lookup = (u_int *)malloc(fs->lookup_depth * sizeof(int),
- M_DUMMYNET, M_NOWAIT);
- if (fs->w_q_lookup == NULL) {
- printf("dummynet: sorry, cannot allocate red lookup table\n");
- fs->fs.flags &= ~DN_IS_RED;
- fs->fs.flags &= ~DN_IS_GENTLE_RED;
- return(ENOSPC);
- }
-
- /* Fill the lookup table with (1 - w_q)^x */
- fs->w_q_lookup[0] = SCALE(1) - fs->w_q;
-
- for (i = 1; i < fs->lookup_depth; i++)
- fs->w_q_lookup[i] =
- SCALE_MUL(fs->w_q_lookup[i - 1], fs->lookup_weight);
-
- if (dn_cfg.red_avg_pkt_size < 1)
- dn_cfg.red_avg_pkt_size = 512;
- fs->avg_pkt_size = dn_cfg.red_avg_pkt_size;
- if (dn_cfg.red_max_pkt_size < 1)
- dn_cfg.red_max_pkt_size = 1500;
- fs->max_pkt_size = dn_cfg.red_max_pkt_size;
- ND("exit");
- return 0;
-}
-
-/* Scan all flowset attached to this scheduler and update red */
-static void
-update_red(struct dn_schk *s)
-{
- struct dn_fsk *fs;
- SLIST_FOREACH(fs, &s->fsk_list, sch_chain) {
- if (fs && (fs->fs.flags & DN_IS_RED))
- config_red(fs);
- }
-}
-
-/* attach flowset to scheduler s, possibly requeue */
-static void
-fsk_attach(struct dn_fsk *fs, struct dn_schk *s)
-{
- ND("remove fs %d from fsunlinked, link to sched %d",
- fs->fs.fs_nr, s->sch.sched_nr);
- SLIST_REMOVE(&dn_cfg.fsu, fs, dn_fsk, sch_chain);
- fs->sched = s;
- SLIST_INSERT_HEAD(&s->fsk_list, fs, sch_chain);
- if (s->fp->new_fsk)
- s->fp->new_fsk(fs);
- /* XXX compute fsk_mask */
- fs->fsk_mask = fs->fs.flow_mask;
- if (fs->sched->sch.flags & DN_HAVE_MASK)
- flow_id_or(&fs->sched->sch.sched_mask, &fs->fsk_mask);
- if (fs->qht) {
- /*
- * we must drain qht according to the old
- * type, and reinsert according to the new one.
- * The requeue is complex -- in general we need to
- * reclassify every single packet.
- * For the time being, let's hope qht is never set
- * when we reach this point.
- */
- D("XXX TODO requeue from fs %d to sch %d",
- fs->fs.fs_nr, s->sch.sched_nr);
- fs->qht = NULL;
- }
- /* set the new type for qht */
- if (nonzero_mask(&fs->fsk_mask))
- fs->fs.flags |= DN_QHT_HASH;
- else
- fs->fs.flags &= ~DN_QHT_HASH;
-
- /* XXX config_red() can fail... */
- if (fs->fs.flags & DN_IS_RED)
- config_red(fs);
-}
-
-/* update all flowsets which may refer to this scheduler */
-static void
-update_fs(struct dn_schk *s)
-{
- struct dn_fsk *fs, *tmp;
-
- SLIST_FOREACH_SAFE(fs, &dn_cfg.fsu, sch_chain, tmp) {
- if (s->sch.sched_nr != fs->fs.sched_nr) {
- D("fs %d for sch %d not %d still unlinked",
- fs->fs.fs_nr, fs->fs.sched_nr,
- s->sch.sched_nr);
- continue;
- }
- fsk_attach(fs, s);
- }
-}
-
-/*
- * Configuration -- to preserve backward compatibility we use
- * the following scheme (N is 65536)
- * NUMBER SCHED LINK FLOWSET
- * 1 .. N-1 (1)WFQ (2)WFQ (3)queue
- * N+1 .. 2N-1 (4)FIFO (5)FIFO (6)FIFO for sched 1..N-1
- * 2N+1 .. 3N-1 -- -- (7)FIFO for sched N+1..2N-1
- *
- * "pipe i config" configures #1, #2 and #3
- * "sched i config" configures #1 and possibly #6
- * "queue i config" configures #3
- * #1 is configured with 'pipe i config' or 'sched i config'
- * #2 is configured with 'pipe i config', and created if not
- * existing with 'sched i config'
- * #3 is configured with 'queue i config'
- * #4 is automatically configured after #1, can only be FIFO
- * #5 is automatically configured after #2
- * #6 is automatically created when #1 is !MULTIQUEUE,
- * and can be updated.
- * #7 is automatically configured after #2
- */
-
-/*
- * configure a link (and its FIFO instance)
- */
-static int
-config_link(struct dn_link *p, struct dn_id *arg)
-{
- int i;
-
- if (p->oid.len != sizeof(*p)) {
- D("invalid pipe len %d", p->oid.len);
- return EINVAL;
- }
- i = p->link_nr;
- if (i <= 0 || i >= DN_MAX_ID)
- return EINVAL;
- /*
- * The config program passes parameters as follows:
- * bw = bits/second (0 means no limits),
- * delay = ms, must be translated into ticks.
- * qsize = slots/bytes
- * burst ???
- */
- p->delay = (p->delay * hz) / 1000;
- /* Scale burst size: bytes -> bits * hz */
- p->burst *= 8 * hz;
-
- DN_BH_WLOCK();
- /* do it twice, base link and FIFO link */
- for (; i < 2*DN_MAX_ID; i += DN_MAX_ID) {
- struct dn_schk *s = locate_scheduler(i);
- if (s == NULL) {
- DN_BH_WUNLOCK();
- D("sched %d not found", i);
- return EINVAL;
- }
- /* remove profile if exists */
- if (s->profile) {
- free(s->profile, M_DUMMYNET);
- s->profile = NULL;
- }
- /* copy all parameters */
- s->link.oid = p->oid;
- s->link.link_nr = i;
- s->link.delay = p->delay;
- if (s->link.bandwidth != p->bandwidth) {
- /* XXX bandwidth changes, need to update red params */
- s->link.bandwidth = p->bandwidth;
- update_red(s);
- }
- s->link.burst = p->burst;
- schk_reset_credit(s);
- }
- dn_cfg.id++;
- DN_BH_WUNLOCK();
- return 0;
-}
-
-/*
- * configure a flowset. Can be called from inside with locked=1,
- */
-static struct dn_fsk *
-config_fs(struct dn_fs *nfs, struct dn_id *arg, int locked)
-{
- int i;
- struct dn_fsk *fs;
-
- if (nfs->oid.len != sizeof(*nfs)) {
- D("invalid flowset len %d", nfs->oid.len);
- return NULL;
- }
- i = nfs->fs_nr;
- if (i <= 0 || i >= 3*DN_MAX_ID)
- return NULL;
- ND("flowset %d", i);
- /* XXX other sanity checks */
- if (nfs->flags & DN_QSIZE_BYTES) {
- ipdn_bound_var(&nfs->qsize, 16384,
- 1500, dn_cfg.byte_limit, NULL); // "queue byte size");
- } else {
- ipdn_bound_var(&nfs->qsize, 50,
- 1, dn_cfg.slot_limit, NULL); // "queue slot size");
- }
- if (nfs->flags & DN_HAVE_MASK) {
- /* make sure we have some buckets */
- ipdn_bound_var((int *)&nfs->buckets, dn_cfg.hash_size,
- 1, dn_cfg.max_hash_size, "flowset buckets");
- } else {
- nfs->buckets = 1; /* we only need 1 */
- }
- if (!locked)
- DN_BH_WLOCK();
- do { /* exit with break when done */
- struct dn_schk *s;
- int flags = nfs->sched_nr ? DNHT_INSERT : 0;
- int j;
- int oldc = dn_cfg.fsk_count;
- fs = dn_ht_find(dn_cfg.fshash, i, flags, NULL);
- if (fs == NULL) {
- D("missing sched for flowset %d", i);
- break;
- }
- /* grab some defaults from the existing one */
- if (nfs->sched_nr == 0) /* reuse */
- nfs->sched_nr = fs->fs.sched_nr;
- for (j = 0; j < sizeof(nfs->par)/sizeof(nfs->par[0]); j++) {
- if (nfs->par[j] == -1) /* reuse */
- nfs->par[j] = fs->fs.par[j];
- }
- if (bcmp(&fs->fs, nfs, sizeof(*nfs)) == 0) {
- ND("flowset %d unchanged", i);
- break; /* no change, nothing to do */
- }
- if (oldc != dn_cfg.fsk_count) /* new item */
- dn_cfg.id++;
- s = locate_scheduler(nfs->sched_nr);
- /* detach from old scheduler if needed, preserving
- * queues if we need to reattach. Then update the
- * configuration, and possibly attach to the new sched.
- */
- DX(2, "fs %d changed sched %d@%p to %d@%p",
- fs->fs.fs_nr,
- fs->fs.sched_nr, fs->sched, nfs->sched_nr, s);
- if (fs->sched) {
- int flags = s ? DN_DETACH : (DN_DETACH | DN_DESTROY);
- flags |= DN_DESTROY; /* XXX temporary */
- fsk_detach(fs, flags);
- }
- fs->fs = *nfs; /* copy configuration */
- if (s != NULL)
- fsk_attach(fs, s);
- } while (0);
- if (!locked)
- DN_BH_WUNLOCK();
- return fs;
-}
-
-/*
- * config/reconfig a scheduler and its FIFO variant.
- * For !MULTIQUEUE schedulers, also set up the flowset.
- *
- * On reconfigurations (detected because s->fp is set),
- * detach existing flowsets preserving traffic, preserve link,
- * and delete the old scheduler creating a new one.
- */
-static int
-config_sched(struct dn_sch *_nsch, struct dn_id *arg)
-{
- struct dn_schk *s;
- struct schk_new_arg a; /* argument for schk_new */
- int i;
- struct dn_link p; /* copy of oldlink */
- struct dn_profile *pf = NULL; /* copy of old link profile */
- /* Used to preserv mask parameter */
- struct ipfw_flow_id new_mask;
- int new_buckets = 0;
- int new_flags = 0;
- int pipe_cmd;
- int err = ENOMEM;
-
- a.sch = _nsch;
- if (a.sch->oid.len != sizeof(*a.sch)) {
- D("bad sched len %d", a.sch->oid.len);
- return EINVAL;
- }
- i = a.sch->sched_nr;
- if (i <= 0 || i >= DN_MAX_ID)
- return EINVAL;
- /* make sure we have some buckets */
- if (a.sch->flags & DN_HAVE_MASK)
- ipdn_bound_var((int *)&a.sch->buckets, dn_cfg.hash_size,
- 1, dn_cfg.max_hash_size, "sched buckets");
- /* XXX other sanity checks */
- bzero(&p, sizeof(p));
-
- pipe_cmd = a.sch->flags & DN_PIPE_CMD;
- a.sch->flags &= ~DN_PIPE_CMD; //XXX do it even if is not set?
- if (pipe_cmd) {
- /* Copy mask parameter */
- new_mask = a.sch->sched_mask;
- new_buckets = a.sch->buckets;
- new_flags = a.sch->flags;
- }
- DN_BH_WLOCK();
-again: /* run twice, for wfq and fifo */
- /*
- * lookup the type. If not supplied, use the previous one
- * or default to WF2Q+. Otherwise, return an error.
- */
- dn_cfg.id++;
- a.fp = find_sched_type(a.sch->oid.subtype, a.sch->name);
- if (a.fp != NULL) {
- /* found. Lookup or create entry */
- s = dn_ht_find(dn_cfg.schedhash, i, DNHT_INSERT, &a);
- } else if (a.sch->oid.subtype == 0 && !a.sch->name[0]) {
- /* No type. search existing s* or retry with WF2Q+ */
- s = dn_ht_find(dn_cfg.schedhash, i, 0, &a);
- if (s != NULL) {
- a.fp = s->fp;
- /* Scheduler exists, skip to FIFO scheduler
- * if command was pipe config...
- */
- if (pipe_cmd)
- goto next;
- } else {
- /* New scheduler, create a wf2q+ with no mask
- * if command was pipe config...
- */
- if (pipe_cmd) {
- /* clear mask parameter */
- bzero(&a.sch->sched_mask, sizeof(new_mask));
- a.sch->buckets = 0;
- a.sch->flags &= ~DN_HAVE_MASK;
- }
- a.sch->oid.subtype = DN_SCHED_WF2QP;
- goto again;
- }
- } else {
- D("invalid scheduler type %d %s",
- a.sch->oid.subtype, a.sch->name);
- err = EINVAL;
- goto error;
- }
- /* normalize name and subtype */
- a.sch->oid.subtype = a.fp->type;
- bzero(a.sch->name, sizeof(a.sch->name));
- strlcpy(a.sch->name, a.fp->name, sizeof(a.sch->name));
- if (s == NULL) {
- D("cannot allocate scheduler %d", i);
- goto error;
- }
- /* restore existing link if any */
- if (p.link_nr) {
- s->link = p;
- if (!pf || pf->link_nr != p.link_nr) { /* no saved value */
- s->profile = NULL; /* XXX maybe not needed */
- } else {
- s->profile = malloc(sizeof(struct dn_profile),
- M_DUMMYNET, M_NOWAIT | M_ZERO);
- if (s->profile == NULL) {
- D("cannot allocate profile");
- goto error; //XXX
- }
- bcopy(pf, s->profile, sizeof(*pf));
- }
- }
- p.link_nr = 0;
- if (s->fp == NULL) {
- DX(2, "sched %d new type %s", i, a.fp->name);
- } else if (s->fp != a.fp ||
- bcmp(a.sch, &s->sch, sizeof(*a.sch)) ) {
- /* already existing. */
- DX(2, "sched %d type changed from %s to %s",
- i, s->fp->name, a.fp->name);
- DX(4, " type/sub %d/%d -> %d/%d",
- s->sch.oid.type, s->sch.oid.subtype,
- a.sch->oid.type, a.sch->oid.subtype);
- if (s->link.link_nr == 0)
- D("XXX WARNING link 0 for sched %d", i);
- p = s->link; /* preserve link */
- if (s->profile) {/* preserve profile */
- if (!pf)
- pf = malloc(sizeof(*pf),
- M_DUMMYNET, M_NOWAIT | M_ZERO);
- if (pf) /* XXX should issue a warning otherwise */
- bcopy(s->profile, pf, sizeof(*pf));
- }
- /* remove from the hash */
- dn_ht_find(dn_cfg.schedhash, i, DNHT_REMOVE, NULL);
- /* Detach flowsets, preserve queues. */
- // schk_delete_cb(s, NULL);
- // XXX temporarily, kill queues
- schk_delete_cb(s, (void *)DN_DESTROY);
- goto again;
- } else {
- DX(4, "sched %d unchanged type %s", i, a.fp->name);
- }
- /* complete initialization */
- s->sch = *a.sch;
- s->fp = a.fp;
- s->cfg = arg;
- // XXX schk_reset_credit(s);
- /* create the internal flowset if needed,
- * trying to reuse existing ones if available
- */
- if (!(s->fp->flags & DN_MULTIQUEUE) && !s->fs) {
- s->fs = dn_ht_find(dn_cfg.fshash, i, 0, NULL);
- if (!s->fs) {
- struct dn_fs fs;
- bzero(&fs, sizeof(fs));
- set_oid(&fs.oid, DN_FS, sizeof(fs));
- fs.fs_nr = i + DN_MAX_ID;
- fs.sched_nr = i;
- s->fs = config_fs(&fs, NULL, 1 /* locked */);
- }
- if (!s->fs) {
- schk_delete_cb(s, (void *)DN_DESTROY);
- D("error creating internal fs for %d", i);
- goto error;
- }
- }
- /* call init function after the flowset is created */
- if (s->fp->config)
- s->fp->config(s);
- update_fs(s);
-next:
- if (i < DN_MAX_ID) { /* now configure the FIFO instance */
- i += DN_MAX_ID;
- if (pipe_cmd) {
- /* Restore mask parameter for FIFO */
- a.sch->sched_mask = new_mask;
- a.sch->buckets = new_buckets;
- a.sch->flags = new_flags;
- } else {
- /* sched config shouldn't modify the FIFO scheduler */
- if (dn_ht_find(dn_cfg.schedhash, i, 0, &a) != NULL) {
- /* FIFO already exist, don't touch it */
- err = 0; /* and this is not an error */
- goto error;
- }
- }
- a.sch->sched_nr = i;
- a.sch->oid.subtype = DN_SCHED_FIFO;
- bzero(a.sch->name, sizeof(a.sch->name));
- goto again;
- }
- err = 0;
-error:
- DN_BH_WUNLOCK();
- if (pf)
- free(pf, M_DUMMYNET);
- return err;
-}
-
-/*
- * attach a profile to a link
- */
-static int
-config_profile(struct dn_profile *pf, struct dn_id *arg)
-{
- struct dn_schk *s;
- int i, olen, err = 0;
-
- if (pf->oid.len < sizeof(*pf)) {
- D("short profile len %d", pf->oid.len);
- return EINVAL;
- }
- i = pf->link_nr;
- if (i <= 0 || i >= DN_MAX_ID)
- return EINVAL;
- /* XXX other sanity checks */
- DN_BH_WLOCK();
- for (; i < 2*DN_MAX_ID; i += DN_MAX_ID) {
- s = locate_scheduler(i);
-
- if (s == NULL) {
- err = EINVAL;
- break;
- }
- dn_cfg.id++;
- /*
- * If we had a profile and the new one does not fit,
- * or it is deleted, then we need to free memory.
- */
- if (s->profile && (pf->samples_no == 0 ||
- s->profile->oid.len < pf->oid.len)) {
- free(s->profile, M_DUMMYNET);
- s->profile = NULL;
- }
- if (pf->samples_no == 0)
- continue;
- /*
- * new profile, possibly allocate memory
- * and copy data.
- */
- if (s->profile == NULL)
- s->profile = malloc(pf->oid.len,
- M_DUMMYNET, M_NOWAIT | M_ZERO);
- if (s->profile == NULL) {
- D("no memory for profile %d", i);
- err = ENOMEM;
- break;
- }
- /* preserve larger length XXX double check */
- olen = s->profile->oid.len;
- if (olen < pf->oid.len)
- olen = pf->oid.len;
- bcopy(pf, s->profile, pf->oid.len);
- s->profile->oid.len = olen;
- }
- DN_BH_WUNLOCK();
- return err;
-}
-
-/*
- * Delete all objects:
- */
-static void
-dummynet_flush(void)
-{
-
- /* delete all schedulers and related links/queues/flowsets */
- dn_ht_scan(dn_cfg.schedhash, schk_delete_cb,
- (void *)(uintptr_t)DN_DELETE_FS);
- /* delete all remaining (unlinked) flowsets */
- DX(4, "still %d unlinked fs", dn_cfg.fsk_count);
- dn_ht_free(dn_cfg.fshash, DNHT_REMOVE);
- fsk_detach_list(&dn_cfg.fsu, DN_DELETE_FS);
- /* Reinitialize system heap... */
- heap_init(&dn_cfg.evheap, 16, offsetof(struct dn_id, id));
-}
-
-/*
- * Main handler for configuration. We are guaranteed to be called
- * with an oid which is at least a dn_id.
- * - the first object is the command (config, delete, flush, ...)
- * - config_link must be issued after the corresponding config_sched
- * - parameters (DN_TXT) for an object must preceed the object
- * processed on a config_sched.
- */
-int
-do_config(void *p, int l)
-{
- struct dn_id *next, *o;
- int err = 0, err2 = 0;
- struct dn_id *arg = NULL;
- uintptr_t *a;
-
- o = p;
- if (o->id != DN_API_VERSION) {
- D("invalid api version got %d need %d",
- o->id, DN_API_VERSION);
- return EINVAL;
- }
- for (; l >= sizeof(*o); o = next) {
- struct dn_id *prev = arg;
- if (o->len < sizeof(*o) || l < o->len) {
- D("bad len o->len %d len %d", o->len, l);
- err = EINVAL;
- break;
- }
- l -= o->len;
- next = (struct dn_id *)((char *)o + o->len);
- err = 0;
- switch (o->type) {
- default:
- D("cmd %d not implemented", o->type);
- break;
-
-#ifdef EMULATE_SYSCTL
- /* sysctl emulation.
- * if we recognize the command, jump to the correct
- * handler and return
- */
- case DN_SYSCTL_SET:
- err = kesysctl_emu_set(p, l);
- return err;
-#endif
-
- case DN_CMD_CONFIG: /* simply a header */
- break;
-
- case DN_CMD_DELETE:
- /* the argument is in the first uintptr_t after o */
- a = (uintptr_t *)(o+1);
- if (o->len < sizeof(*o) + sizeof(*a)) {
- err = EINVAL;
- break;
- }
- switch (o->subtype) {
- case DN_LINK:
- /* delete base and derived schedulers */
- DN_BH_WLOCK();
- err = delete_schk(*a);
- err2 = delete_schk(*a + DN_MAX_ID);
- DN_BH_WUNLOCK();
- if (!err)
- err = err2;
- break;
-
- default:
- D("invalid delete type %d",
- o->subtype);
- err = EINVAL;
- break;
-
- case DN_FS:
- err = (*a <1 || *a >= DN_MAX_ID) ?
- EINVAL : delete_fs(*a, 0) ;
- break;
- }
- break;
-
- case DN_CMD_FLUSH:
- DN_BH_WLOCK();
- dummynet_flush();
- DN_BH_WUNLOCK();
- break;
- case DN_TEXT: /* store argument the next block */
- prev = NULL;
- arg = o;
- break;
- case DN_LINK:
- err = config_link((struct dn_link *)o, arg);
- break;
- case DN_PROFILE:
- err = config_profile((struct dn_profile *)o, arg);
- break;
- case DN_SCH:
- err = config_sched((struct dn_sch *)o, arg);
- break;
- case DN_FS:
- err = (NULL==config_fs((struct dn_fs *)o, arg, 0));
- break;
- }
- if (prev)
- arg = NULL;
- if (err != 0)
- break;
- }
- return err;
-}
-
-static int
-compute_space(struct dn_id *cmd, struct copy_args *a)
-{
- int x = 0, need = 0;
- int profile_size = sizeof(struct dn_profile) -
- ED_MAX_SAMPLES_NO*sizeof(int);
-
- /* NOTE about compute space:
- * NP = dn_cfg.schk_count
- * NSI = dn_cfg.si_count
- * NF = dn_cfg.fsk_count
- * NQ = dn_cfg.queue_count
- * - ipfw pipe show
- * (NP/2)*(dn_link + dn_sch + dn_id + dn_fs) only half scheduler
- * link, scheduler template, flowset
- * integrated in scheduler and header
- * for flowset list
- * (NSI)*(dn_flow) all scheduler instance (includes
- * the queue instance)
- * - ipfw sched show
- * (NP/2)*(dn_link + dn_sch + dn_id + dn_fs) only half scheduler
- * link, scheduler template, flowset
- * integrated in scheduler and header
- * for flowset list
- * (NSI * dn_flow) all scheduler instances
- * (NF * sizeof(uint_32)) space for flowset list linked to scheduler
- * (NQ * dn_queue) all queue [XXXfor now not listed]
- * - ipfw queue show
- * (NF * dn_fs) all flowset
- * (NQ * dn_queue) all queues
- */
- switch (cmd->subtype) {
- default:
- return -1;
- /* XXX where do LINK and SCH differ ? */
- /* 'ipfw sched show' could list all queues associated to
- * a scheduler. This feature for now is disabled
- */
- case DN_LINK: /* pipe show */
- x = DN_C_LINK | DN_C_SCH | DN_C_FLOW;
- need += dn_cfg.schk_count *
- (sizeof(struct dn_fs) + profile_size) / 2;
- need += dn_cfg.fsk_count * sizeof(uint32_t);
- break;
- case DN_SCH: /* sched show */
- need += dn_cfg.schk_count *
- (sizeof(struct dn_fs) + profile_size) / 2;
- need += dn_cfg.fsk_count * sizeof(uint32_t);
- x = DN_C_SCH | DN_C_LINK | DN_C_FLOW;
- break;
- case DN_FS: /* queue show */
- x = DN_C_FS | DN_C_QUEUE;
- break;
- case DN_GET_COMPAT: /* compatibility mode */
- need = dn_compat_calc_size();
- break;
- }
- a->flags = x;
- if (x & DN_C_SCH) {
- need += dn_cfg.schk_count * sizeof(struct dn_sch) / 2;
- /* NOT also, each fs might be attached to a sched */
- need += dn_cfg.schk_count * sizeof(struct dn_id) / 2;
- }
- if (x & DN_C_FS)
- need += dn_cfg.fsk_count * sizeof(struct dn_fs);
- if (x & DN_C_LINK) {
- need += dn_cfg.schk_count * sizeof(struct dn_link) / 2;
- }
- /*
- * When exporting a queue to userland, only pass up the
- * struct dn_flow, which is the only visible part.
- */
-
- if (x & DN_C_QUEUE)
- need += dn_cfg.queue_count * sizeof(struct dn_flow);
- if (x & DN_C_FLOW)
- need += dn_cfg.si_count * (sizeof(struct dn_flow));
- return need;
-}
-
-/*
- * If compat != NULL dummynet_get is called in compatibility mode.
- * *compat will be the pointer to the buffer to pass to ipfw
- */
-int
-dummynet_get(struct sockopt *sopt, void **compat)
-{
- int have, i, need, error;
- char *start = NULL, *buf;
- size_t sopt_valsize;
- struct dn_id *cmd;
- struct copy_args a;
- struct copy_range r;
- int l = sizeof(struct dn_id);
-
- bzero(&a, sizeof(a));
- bzero(&r, sizeof(r));
-
- /* save and restore original sopt_valsize around copyin */
- sopt_valsize = sopt->sopt_valsize;
-
- cmd = &r.o;
-
- if (!compat) {
- /* copy at least an oid, and possibly a full object */
- error = sooptcopyin(sopt, cmd, sizeof(r), sizeof(*cmd));
- sopt->sopt_valsize = sopt_valsize;
- if (error)
- goto done;
- l = cmd->len;
-#ifdef EMULATE_SYSCTL
- /* sysctl emulation. */
- if (cmd->type == DN_SYSCTL_GET)
- return kesysctl_emu_get(sopt);
-#endif
- if (l > sizeof(r)) {
- /* request larger than default, allocate buffer */
- cmd = malloc(l, M_DUMMYNET, M_WAITOK);
- error = sooptcopyin(sopt, cmd, l, l);
- sopt->sopt_valsize = sopt_valsize;
- if (error)
- goto done;
- }
- } else { /* compatibility */
- error = 0;
- cmd->type = DN_CMD_GET;
- cmd->len = sizeof(struct dn_id);
- cmd->subtype = DN_GET_COMPAT;
- // cmd->id = sopt_valsize;
- D("compatibility mode");
- }
- a.extra = (struct copy_range *)cmd;
- if (cmd->len == sizeof(*cmd)) { /* no range, create a default */
- uint32_t *rp = (uint32_t *)(cmd + 1);
- cmd->len += 2* sizeof(uint32_t);
- rp[0] = 1;
- rp[1] = DN_MAX_ID - 1;
- if (cmd->subtype == DN_LINK) {
- rp[0] += DN_MAX_ID;
- rp[1] += DN_MAX_ID;
- }
- }
- /* Count space (under lock) and allocate (outside lock).
- * Exit with lock held if we manage to get enough buffer.
- * Try a few times then give up.
- */
- for (have = 0, i = 0; i < 10; i++) {
- DN_BH_WLOCK();
- need = compute_space(cmd, &a);
-
- /* if there is a range, ignore value from compute_space() */
- if (l > sizeof(*cmd))
- need = sopt_valsize - sizeof(*cmd);
-
- if (need < 0) {
- DN_BH_WUNLOCK();
- error = EINVAL;
- goto done;
- }
- need += sizeof(*cmd);
- cmd->id = need;
- if (have >= need)
- break;
-
- DN_BH_WUNLOCK();
- if (start)
- free(start, M_DUMMYNET);
- start = NULL;
- if (need > sopt_valsize)
- break;
-
- have = need;
- start = malloc(have, M_DUMMYNET, M_WAITOK | M_ZERO);
- }
-
- if (start == NULL) {
- if (compat) {
- *compat = NULL;
- error = 1; // XXX
- } else {
- error = sooptcopyout(sopt, cmd, sizeof(*cmd));
- }
- goto done;
- }
- ND("have %d:%d sched %d, %d:%d links %d, %d:%d flowsets %d, "
- "%d:%d si %d, %d:%d queues %d",
- dn_cfg.schk_count, sizeof(struct dn_sch), DN_SCH,
- dn_cfg.schk_count, sizeof(struct dn_link), DN_LINK,
- dn_cfg.fsk_count, sizeof(struct dn_fs), DN_FS,
- dn_cfg.si_count, sizeof(struct dn_flow), DN_SCH_I,
- dn_cfg.queue_count, sizeof(struct dn_queue), DN_QUEUE);
- sopt->sopt_valsize = sopt_valsize;
- a.type = cmd->subtype;
-
- if (compat == NULL) {
- bcopy(cmd, start, sizeof(*cmd));
- ((struct dn_id*)(start))->len = sizeof(struct dn_id);
- buf = start + sizeof(*cmd);
- } else
- buf = start;
- a.start = &buf;
- a.end = start + have;
- /* start copying other objects */
- if (compat) {
- a.type = DN_COMPAT_PIPE;
- dn_ht_scan(dn_cfg.schedhash, copy_data_helper_compat, &a);
- a.type = DN_COMPAT_QUEUE;
- dn_ht_scan(dn_cfg.fshash, copy_data_helper_compat, &a);
- } else if (a.type == DN_FS) {
- dn_ht_scan(dn_cfg.fshash, copy_data_helper, &a);
- } else {
- dn_ht_scan(dn_cfg.schedhash, copy_data_helper, &a);
- }
- DN_BH_WUNLOCK();
-
- if (compat) {
- *compat = start;
- sopt->sopt_valsize = buf - start;
- /* free() is done by ip_dummynet_compat() */
- start = NULL; //XXX hack
- } else {
- error = sooptcopyout(sopt, start, buf - start);
- }
-done:
- if (cmd && cmd != &r.o)
- free(cmd, M_DUMMYNET);
- if (start)
- free(start, M_DUMMYNET);
- return error;
-}
-
-/* Callback called on scheduler instance to delete it if idle */
-static int
-drain_scheduler_cb(void *_si, void *arg)
-{
- struct dn_sch_inst *si = _si;
-
- if ((si->kflags & DN_ACTIVE) || si->dline.mq.head != NULL)
- return 0;
-
- if (si->sched->fp->flags & DN_MULTIQUEUE) {
- if (si->q_count == 0)
- return si_destroy(si, NULL);
- else
- return 0;
- } else { /* !DN_MULTIQUEUE */
- if ((si+1)->ni.length == 0)
- return si_destroy(si, NULL);
- else
- return 0;
- }
- return 0; /* unreachable */
-}
-
-/* Callback called on scheduler to check if it has instances */
-static int
-drain_scheduler_sch_cb(void *_s, void *arg)
-{
- struct dn_schk *s = _s;
-
- if (s->sch.flags & DN_HAVE_MASK) {
- dn_ht_scan_bucket(s->siht, &s->drain_bucket,
- drain_scheduler_cb, NULL);
- s->drain_bucket++;
- } else {
- if (s->siht) {
- if (drain_scheduler_cb(s->siht, NULL) == DNHT_SCAN_DEL)
- s->siht = NULL;
- }
- }
- return 0;
-}
-
-/* Called every tick, try to delete a 'bucket' of scheduler */
-void
-dn_drain_scheduler(void)
-{
- dn_ht_scan_bucket(dn_cfg.schedhash, &dn_cfg.drain_sch,
- drain_scheduler_sch_cb, NULL);
- dn_cfg.drain_sch++;
-}
-
-/* Callback called on queue to delete if it is idle */
-static int
-drain_queue_cb(void *_q, void *arg)
-{
- struct dn_queue *q = _q;
-
- if (q->ni.length == 0) {
- dn_delete_queue(q, DN_DESTROY);
- return DNHT_SCAN_DEL; /* queue is deleted */
- }
-
- return 0; /* queue isn't deleted */
-}
-
-/* Callback called on flowset used to check if it has queues */
-static int
-drain_queue_fs_cb(void *_fs, void *arg)
-{
- struct dn_fsk *fs = _fs;
-
- if (fs->fs.flags & DN_QHT_HASH) {
- /* Flowset has a hash table for queues */
- dn_ht_scan_bucket(fs->qht, &fs->drain_bucket,
- drain_queue_cb, NULL);
- fs->drain_bucket++;
- } else {
- /* No hash table for this flowset, null the pointer
- * if the queue is deleted
- */
- if (fs->qht) {
- if (drain_queue_cb(fs->qht, NULL) == DNHT_SCAN_DEL)
- fs->qht = NULL;
- }
- }
- return 0;
-}
-
-/* Called every tick, try to delete a 'bucket' of queue */
-void
-dn_drain_queue(void)
-{
- /* scan a bucket of flowset */
- dn_ht_scan_bucket(dn_cfg.fshash, &dn_cfg.drain_fs,
- drain_queue_fs_cb, NULL);
- dn_cfg.drain_fs++;
-}
-
-/*
- * Handler for the various dummynet socket options
- */
-static int
-ip_dn_ctl(struct sockopt *sopt)
-{
- void *p = NULL;
- int error, l;
-
- error = priv_check(sopt->sopt_td, PRIV_NETINET_DUMMYNET);
- if (error)
- return (error);
-
- /* Disallow sets in really-really secure mode. */
- if (sopt->sopt_dir == SOPT_SET) {
- error = securelevel_ge(sopt->sopt_td->td_ucred, 3);
- if (error)
- return (error);
- }
-
- switch (sopt->sopt_name) {
- default :
- D("dummynet: unknown option %d", sopt->sopt_name);
- error = EINVAL;
- break;
-
- case IP_DUMMYNET_FLUSH:
- case IP_DUMMYNET_CONFIGURE:
- case IP_DUMMYNET_DEL: /* remove a pipe or queue */
- case IP_DUMMYNET_GET:
- D("dummynet: compat option %d", sopt->sopt_name);
- error = ip_dummynet_compat(sopt);
- break;
-
- case IP_DUMMYNET3 :
- if (sopt->sopt_dir == SOPT_GET) {
- error = dummynet_get(sopt, NULL);
- break;
- }
- l = sopt->sopt_valsize;
- if (l < sizeof(struct dn_id) || l > 12000) {
- D("argument len %d invalid", l);
- break;
- }
- p = malloc(l, M_TEMP, M_WAITOK); // XXX can it fail ?
- error = sooptcopyin(sopt, p, l, l);
- if (error)
- break ;
- error = do_config(p, l);
- break;
- }
-
- if (p != NULL)
- free(p, M_TEMP);
-
- return error ;
-}
-
-
-static void
-ip_dn_init(void)
-{
- if (dn_cfg.init_done)
- return;
- printf("DUMMYNET %p with IPv6 initialized (100409)\n", curvnet);
- dn_cfg.init_done = 1;
- /* Set defaults here. MSVC does not accept initializers,
- * and this is also useful for vimages
- */
- /* queue limits */
- dn_cfg.slot_limit = 100; /* Foot shooting limit for queues. */
- dn_cfg.byte_limit = 1024 * 1024;
- dn_cfg.expire = 1;
-
- /* RED parameters */
- dn_cfg.red_lookup_depth = 256; /* default lookup table depth */
- dn_cfg.red_avg_pkt_size = 512; /* default medium packet size */
- dn_cfg.red_max_pkt_size = 1500; /* default max packet size */
-
- /* hash tables */
- dn_cfg.max_hash_size = 65536; /* max in the hash tables */
- dn_cfg.hash_size = 64; /* default hash size */
-
- /* create hash tables for schedulers and flowsets.
- * In both we search by key and by pointer.
- */
- dn_cfg.schedhash = dn_ht_init(NULL, dn_cfg.hash_size,
- offsetof(struct dn_schk, schk_next),
- schk_hash, schk_match, schk_new);
- dn_cfg.fshash = dn_ht_init(NULL, dn_cfg.hash_size,
- offsetof(struct dn_fsk, fsk_next),
- fsk_hash, fsk_match, fsk_new);
-
- /* bucket index to drain object */
- dn_cfg.drain_fs = 0;
- dn_cfg.drain_sch = 0;
-
- heap_init(&dn_cfg.evheap, 16, offsetof(struct dn_id, id));
- SLIST_INIT(&dn_cfg.fsu);
- SLIST_INIT(&dn_cfg.schedlist);
-
- DN_LOCK_INIT();
-
- TASK_INIT(&dn_task, 0, dummynet_task, curvnet);
- dn_tq = taskqueue_create("dummynet", M_WAITOK,
- taskqueue_thread_enqueue, &dn_tq);
- taskqueue_start_threads(&dn_tq, 1, PI_NET, "dummynet");
-
- callout_init(&dn_timeout, CALLOUT_MPSAFE);
- callout_reset(&dn_timeout, 1, dummynet, NULL);
-
- /* Initialize curr_time adjustment mechanics. */
- getmicrouptime(&dn_cfg.prev_t);
-}
-
-#ifdef KLD_MODULE
-static void
-ip_dn_destroy(int last)
-{
- callout_drain(&dn_timeout);
-
- DN_BH_WLOCK();
- if (last) {
- ND("removing last instance\n");
- ip_dn_ctl_ptr = NULL;
- ip_dn_io_ptr = NULL;
- }
-
- dummynet_flush();
- DN_BH_WUNLOCK();
- taskqueue_drain(dn_tq, &dn_task);
- taskqueue_free(dn_tq);
-
- dn_ht_free(dn_cfg.schedhash, 0);
- dn_ht_free(dn_cfg.fshash, 0);
- heap_free(&dn_cfg.evheap);
-
- DN_LOCK_DESTROY();
-}
-#endif /* KLD_MODULE */
-
-static int
-dummynet_modevent(module_t mod, int type, void *data)
-{
-
- if (type == MOD_LOAD) {
- if (ip_dn_io_ptr) {
- printf("DUMMYNET already loaded\n");
- return EEXIST ;
- }
- ip_dn_init();
- ip_dn_ctl_ptr = ip_dn_ctl;
- ip_dn_io_ptr = dummynet_io;
- return 0;
- } else if (type == MOD_UNLOAD) {
-#if !defined(KLD_MODULE)
- printf("dummynet statically compiled, cannot unload\n");
- return EINVAL ;
-#else
- ip_dn_destroy(1 /* last */);
- return 0;
-#endif
- } else
- return EOPNOTSUPP;
-}
-
-/* modevent helpers for the modules */
-static int
-load_dn_sched(struct dn_alg *d)
-{
- struct dn_alg *s;
-
- if (d == NULL)
- return 1; /* error */
- ip_dn_init(); /* just in case, we need the lock */
-
- /* Check that mandatory funcs exists */
- if (d->enqueue == NULL || d->dequeue == NULL) {
- D("missing enqueue or dequeue for %s", d->name);
- return 1;
- }
-
- /* Search if scheduler already exists */
- DN_BH_WLOCK();
- SLIST_FOREACH(s, &dn_cfg.schedlist, next) {
- if (strcmp(s->name, d->name) == 0) {
- D("%s already loaded", d->name);
- break; /* scheduler already exists */
- }
- }
- if (s == NULL)
- SLIST_INSERT_HEAD(&dn_cfg.schedlist, d, next);
- DN_BH_WUNLOCK();
- D("dn_sched %s %sloaded", d->name, s ? "not ":"");
- return s ? 1 : 0;
-}
-
-static int
-unload_dn_sched(struct dn_alg *s)
-{
- struct dn_alg *tmp, *r;
- int err = EINVAL;
-
- ND("called for %s", s->name);
-
- DN_BH_WLOCK();
- SLIST_FOREACH_SAFE(r, &dn_cfg.schedlist, next, tmp) {
- if (strcmp(s->name, r->name) != 0)
- continue;
- ND("ref_count = %d", r->ref_count);
- err = (r->ref_count != 0) ? EBUSY : 0;
- if (err == 0)
- SLIST_REMOVE(&dn_cfg.schedlist, r, dn_alg, next);
- break;
- }
- DN_BH_WUNLOCK();
- D("dn_sched %s %sunloaded", s->name, err ? "not ":"");
- return err;
-}
-
-int
-dn_sched_modevent(module_t mod, int cmd, void *arg)
-{
- struct dn_alg *sch = arg;
-
- if (cmd == MOD_LOAD)
- return load_dn_sched(sch);
- else if (cmd == MOD_UNLOAD)
- return unload_dn_sched(sch);
- else
- return EINVAL;
-}
-
-static moduledata_t dummynet_mod = {
- "dummynet", dummynet_modevent, NULL
-};
-
-#define DN_SI_SUB SI_SUB_PROTO_IFATTACHDOMAIN
-#define DN_MODEV_ORD (SI_ORDER_ANY - 128) /* after ipfw */
-DECLARE_MODULE(dummynet, dummynet_mod, DN_SI_SUB, DN_MODEV_ORD);
-MODULE_DEPEND(dummynet, ipfw, 2, 2, 2);
-MODULE_VERSION(dummynet, 3);
-
-/*
- * Starting up. Done in order after dummynet_modevent() has been called.
- * VNET_SYSINIT is also called for each existing vnet and each new vnet.
- */
-//VNET_SYSINIT(vnet_dn_init, DN_SI_SUB, DN_MODEV_ORD+2, ip_dn_init, NULL);
-
-/*
- * Shutdown handlers up shop. These are done in REVERSE ORDER, but still
- * after dummynet_modevent() has been called. Not called on reboot.
- * VNET_SYSUNINIT is also called for each exiting vnet as it exits.
- * or when the module is unloaded.
- */
-//VNET_SYSUNINIT(vnet_dn_uninit, DN_SI_SUB, DN_MODEV_ORD+2, ip_dn_destroy, NULL);
-
-/* end of file */
diff --git a/sys/netinet/ipfw/ip_fw2.c b/sys/netinet/ipfw/ip_fw2.c
deleted file mode 100644
index 0dfab1f..0000000
--- a/sys/netinet/ipfw/ip_fw2.c
+++ /dev/null
@@ -1,2790 +0,0 @@
-/*-
- * Copyright (c) 2002-2009 Luigi Rizzo, Universita` di Pisa
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-/*
- * The FreeBSD IP packet firewall, main file
- */
-
-#include "opt_ipfw.h"
-#include "opt_ipdivert.h"
-#include "opt_inet.h"
-#ifndef INET
-#error "IPFIREWALL requires INET"
-#endif /* INET */
-#include "opt_inet6.h"
-#include "opt_ipsec.h"
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/condvar.h>
-#include <sys/eventhandler.h>
-#include <sys/malloc.h>
-#include <sys/mbuf.h>
-#include <sys/kernel.h>
-#include <sys/lock.h>
-#include <sys/jail.h>
-#include <sys/module.h>
-#include <sys/priv.h>
-#include <sys/proc.h>
-#include <sys/rwlock.h>
-#include <sys/socket.h>
-#include <sys/socketvar.h>
-#include <sys/sysctl.h>
-#include <sys/syslog.h>
-#include <sys/ucred.h>
-#include <net/ethernet.h> /* for ETHERTYPE_IP */
-#include <net/if.h>
-#include <net/route.h>
-#include <net/pf_mtag.h>
-#include <net/vnet.h>
-
-#include <netinet/in.h>
-#include <netinet/in_var.h>
-#include <netinet/in_pcb.h>
-#include <netinet/ip.h>
-#include <netinet/ip_var.h>
-#include <netinet/ip_icmp.h>
-#include <netinet/ip_fw.h>
-#include <netinet/ipfw/ip_fw_private.h>
-#include <netinet/ip_carp.h>
-#include <netinet/pim.h>
-#include <netinet/tcp_var.h>
-#include <netinet/udp.h>
-#include <netinet/udp_var.h>
-#include <netinet/sctp.h>
-
-#include <netinet/ip6.h>
-#include <netinet/icmp6.h>
-#ifdef INET6
-#include <netinet6/in6_pcb.h>
-#include <netinet6/scope6_var.h>
-#include <netinet6/ip6_var.h>
-#endif
-
-#include <machine/in_cksum.h> /* XXX for in_cksum */
-
-#ifdef MAC
-#include <security/mac/mac_framework.h>
-#endif
-
-/*
- * static variables followed by global ones.
- * All ipfw global variables are here.
- */
-
-/* ipfw_vnet_ready controls when we are open for business */
-static VNET_DEFINE(int, ipfw_vnet_ready) = 0;
-#define V_ipfw_vnet_ready VNET(ipfw_vnet_ready)
-
-static VNET_DEFINE(int, fw_deny_unknown_exthdrs);
-#define V_fw_deny_unknown_exthdrs VNET(fw_deny_unknown_exthdrs)
-
-static VNET_DEFINE(int, fw_permit_single_frag6) = 1;
-#define V_fw_permit_single_frag6 VNET(fw_permit_single_frag6)
-
-#ifdef IPFIREWALL_DEFAULT_TO_ACCEPT
-static int default_to_accept = 1;
-#else
-static int default_to_accept;
-#endif
-
-VNET_DEFINE(int, autoinc_step);
-VNET_DEFINE(int, fw_one_pass) = 1;
-
-VNET_DEFINE(unsigned int, fw_tables_max);
-/* Use 128 tables by default */
-static unsigned int default_fw_tables = IPFW_TABLES_DEFAULT;
-
-/*
- * Each rule belongs to one of 32 different sets (0..31).
- * The variable set_disable contains one bit per set.
- * If the bit is set, all rules in the corresponding set
- * are disabled. Set RESVD_SET(31) is reserved for the default rule
- * and rules that are not deleted by the flush command,
- * and CANNOT be disabled.
- * Rules in set RESVD_SET can only be deleted individually.
- */
-VNET_DEFINE(u_int32_t, set_disable);
-#define V_set_disable VNET(set_disable)
-
-VNET_DEFINE(int, fw_verbose);
-/* counter for ipfw_log(NULL...) */
-VNET_DEFINE(u_int64_t, norule_counter);
-VNET_DEFINE(int, verbose_limit);
-
-/* layer3_chain contains the list of rules for layer 3 */
-VNET_DEFINE(struct ip_fw_chain, layer3_chain);
-
-ipfw_nat_t *ipfw_nat_ptr = NULL;
-struct cfg_nat *(*lookup_nat_ptr)(struct nat_list *, int);
-ipfw_nat_cfg_t *ipfw_nat_cfg_ptr;
-ipfw_nat_cfg_t *ipfw_nat_del_ptr;
-ipfw_nat_cfg_t *ipfw_nat_get_cfg_ptr;
-ipfw_nat_cfg_t *ipfw_nat_get_log_ptr;
-
-#ifdef SYSCTL_NODE
-uint32_t dummy_def = IPFW_DEFAULT_RULE;
-static int sysctl_ipfw_table_num(SYSCTL_HANDLER_ARGS);
-
-SYSBEGIN(f3)
-
-SYSCTL_NODE(_net_inet_ip, OID_AUTO, fw, CTLFLAG_RW, 0, "Firewall");
-SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, one_pass,
- CTLFLAG_RW | CTLFLAG_SECURE3, &VNET_NAME(fw_one_pass), 0,
- "Only do a single pass through ipfw when using dummynet(4)");
-SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, autoinc_step,
- CTLFLAG_RW, &VNET_NAME(autoinc_step), 0,
- "Rule number auto-increment step");
-SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, verbose,
- CTLFLAG_RW | CTLFLAG_SECURE3, &VNET_NAME(fw_verbose), 0,
- "Log matches to ipfw rules");
-SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, verbose_limit,
- CTLFLAG_RW, &VNET_NAME(verbose_limit), 0,
- "Set upper limit of matches of ipfw rules logged");
-SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, default_rule, CTLFLAG_RD,
- &dummy_def, 0,
- "The default/max possible rule number.");
-SYSCTL_VNET_PROC(_net_inet_ip_fw, OID_AUTO, tables_max,
- CTLTYPE_UINT|CTLFLAG_RW, 0, 0, sysctl_ipfw_table_num, "IU",
- "Maximum number of tables");
-SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, default_to_accept, CTLFLAG_RDTUN,
- &default_to_accept, 0,
- "Make the default rule accept all packets.");
-TUNABLE_INT("net.inet.ip.fw.default_to_accept", &default_to_accept);
-TUNABLE_INT("net.inet.ip.fw.tables_max", (int *)&default_fw_tables);
-SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, static_count,
- CTLFLAG_RD, &VNET_NAME(layer3_chain.n_rules), 0,
- "Number of static rules");
-
-#ifdef INET6
-SYSCTL_DECL(_net_inet6_ip6);
-SYSCTL_NODE(_net_inet6_ip6, OID_AUTO, fw, CTLFLAG_RW, 0, "Firewall");
-SYSCTL_VNET_INT(_net_inet6_ip6_fw, OID_AUTO, deny_unknown_exthdrs,
- CTLFLAG_RW | CTLFLAG_SECURE, &VNET_NAME(fw_deny_unknown_exthdrs), 0,
- "Deny packets with unknown IPv6 Extension Headers");
-SYSCTL_VNET_INT(_net_inet6_ip6_fw, OID_AUTO, permit_single_frag6,
- CTLFLAG_RW | CTLFLAG_SECURE, &VNET_NAME(fw_permit_single_frag6), 0,
- "Permit single packet IPv6 fragments");
-#endif /* INET6 */
-
-SYSEND
-
-#endif /* SYSCTL_NODE */
-
-
-/*
- * Some macros used in the various matching options.
- * L3HDR maps an ipv4 pointer into a layer3 header pointer of type T
- * Other macros just cast void * into the appropriate type
- */
-#define L3HDR(T, ip) ((T *)((u_int32_t *)(ip) + (ip)->ip_hl))
-#define TCP(p) ((struct tcphdr *)(p))
-#define SCTP(p) ((struct sctphdr *)(p))
-#define UDP(p) ((struct udphdr *)(p))
-#define ICMP(p) ((struct icmphdr *)(p))
-#define ICMP6(p) ((struct icmp6_hdr *)(p))
-
-static __inline int
-icmptype_match(struct icmphdr *icmp, ipfw_insn_u32 *cmd)
-{
- int type = icmp->icmp_type;
-
- return (type <= ICMP_MAXTYPE && (cmd->d[0] & (1<<type)) );
-}
-
-#define TT ( (1 << ICMP_ECHO) | (1 << ICMP_ROUTERSOLICIT) | \
- (1 << ICMP_TSTAMP) | (1 << ICMP_IREQ) | (1 << ICMP_MASKREQ) )
-
-static int
-is_icmp_query(struct icmphdr *icmp)
-{
- int type = icmp->icmp_type;
-
- return (type <= ICMP_MAXTYPE && (TT & (1<<type)) );
-}
-#undef TT
-
-/*
- * The following checks use two arrays of 8 or 16 bits to store the
- * bits that we want set or clear, respectively. They are in the
- * low and high half of cmd->arg1 or cmd->d[0].
- *
- * We scan options and store the bits we find set. We succeed if
- *
- * (want_set & ~bits) == 0 && (want_clear & ~bits) == want_clear
- *
- * The code is sometimes optimized not to store additional variables.
- */
-
-static int
-flags_match(ipfw_insn *cmd, u_int8_t bits)
-{
- u_char want_clear;
- bits = ~bits;
-
- if ( ((cmd->arg1 & 0xff) & bits) != 0)
- return 0; /* some bits we want set were clear */
- want_clear = (cmd->arg1 >> 8) & 0xff;
- if ( (want_clear & bits) != want_clear)
- return 0; /* some bits we want clear were set */
- return 1;
-}
-
-static int
-ipopts_match(struct ip *ip, ipfw_insn *cmd)
-{
- int optlen, bits = 0;
- u_char *cp = (u_char *)(ip + 1);
- int x = (ip->ip_hl << 2) - sizeof (struct ip);
-
- for (; x > 0; x -= optlen, cp += optlen) {
- int opt = cp[IPOPT_OPTVAL];
-
- if (opt == IPOPT_EOL)
- break;
- if (opt == IPOPT_NOP)
- optlen = 1;
- else {
- optlen = cp[IPOPT_OLEN];
- if (optlen <= 0 || optlen > x)
- return 0; /* invalid or truncated */
- }
- switch (opt) {
-
- default:
- break;
-
- case IPOPT_LSRR:
- bits |= IP_FW_IPOPT_LSRR;
- break;
-
- case IPOPT_SSRR:
- bits |= IP_FW_IPOPT_SSRR;
- break;
-
- case IPOPT_RR:
- bits |= IP_FW_IPOPT_RR;
- break;
-
- case IPOPT_TS:
- bits |= IP_FW_IPOPT_TS;
- break;
- }
- }
- return (flags_match(cmd, bits));
-}
-
-static int
-tcpopts_match(struct tcphdr *tcp, ipfw_insn *cmd)
-{
- int optlen, bits = 0;
- u_char *cp = (u_char *)(tcp + 1);
- int x = (tcp->th_off << 2) - sizeof(struct tcphdr);
-
- for (; x > 0; x -= optlen, cp += optlen) {
- int opt = cp[0];
- if (opt == TCPOPT_EOL)
- break;
- if (opt == TCPOPT_NOP)
- optlen = 1;
- else {
- optlen = cp[1];
- if (optlen <= 0)
- break;
- }
-
- switch (opt) {
-
- default:
- break;
-
- case TCPOPT_MAXSEG:
- bits |= IP_FW_TCPOPT_MSS;
- break;
-
- case TCPOPT_WINDOW:
- bits |= IP_FW_TCPOPT_WINDOW;
- break;
-
- case TCPOPT_SACK_PERMITTED:
- case TCPOPT_SACK:
- bits |= IP_FW_TCPOPT_SACK;
- break;
-
- case TCPOPT_TIMESTAMP:
- bits |= IP_FW_TCPOPT_TS;
- break;
-
- }
- }
- return (flags_match(cmd, bits));
-}
-
-static int
-iface_match(struct ifnet *ifp, ipfw_insn_if *cmd, struct ip_fw_chain *chain, uint32_t *tablearg)
-{
- if (ifp == NULL) /* no iface with this packet, match fails */
- return 0;
- /* Check by name or by IP address */
- if (cmd->name[0] != '\0') { /* match by name */
- if (cmd->name[0] == '\1') /* use tablearg to match */
- return ipfw_lookup_table_extended(chain, cmd->p.glob,
- ifp->if_xname, tablearg, IPFW_TABLE_INTERFACE);
- /* Check name */
- if (cmd->p.glob) {
- if (fnmatch(cmd->name, ifp->if_xname, 0) == 0)
- return(1);
- } else {
- if (strncmp(ifp->if_xname, cmd->name, IFNAMSIZ) == 0)
- return(1);
- }
- } else {
-#ifdef __FreeBSD__ /* and OSX too ? */
- struct ifaddr *ia;
-
- if_addr_rlock(ifp);
- TAILQ_FOREACH(ia, &ifp->if_addrhead, ifa_link) {
- if (ia->ifa_addr->sa_family != AF_INET)
- continue;
- if (cmd->p.ip.s_addr == ((struct sockaddr_in *)
- (ia->ifa_addr))->sin_addr.s_addr) {
- if_addr_runlock(ifp);
- return(1); /* match */
- }
- }
- if_addr_runlock(ifp);
-#endif /* __FreeBSD__ */
- }
- return(0); /* no match, fail ... */
-}
-
-/*
- * The verify_path function checks if a route to the src exists and
- * if it is reachable via ifp (when provided).
- *
- * The 'verrevpath' option checks that the interface that an IP packet
- * arrives on is the same interface that traffic destined for the
- * packet's source address would be routed out of.
- * The 'versrcreach' option just checks that the source address is
- * reachable via any route (except default) in the routing table.
- * These two are a measure to block forged packets. This is also
- * commonly known as "anti-spoofing" or Unicast Reverse Path
- * Forwarding (Unicast RFP) in Cisco-ese. The name of the knobs
- * is purposely reminiscent of the Cisco IOS command,
- *
- * ip verify unicast reverse-path
- * ip verify unicast source reachable-via any
- *
- * which implements the same functionality. But note that the syntax
- * is misleading, and the check may be performed on all IP packets
- * whether unicast, multicast, or broadcast.
- */
-static int
-verify_path(struct in_addr src, struct ifnet *ifp, u_int fib)
-{
-#ifndef __FreeBSD__
- return 0;
-#else
- struct route ro;
- struct sockaddr_in *dst;
-
- bzero(&ro, sizeof(ro));
-
- dst = (struct sockaddr_in *)&(ro.ro_dst);
- dst->sin_family = AF_INET;
- dst->sin_len = sizeof(*dst);
- dst->sin_addr = src;
- in_rtalloc_ign(&ro, 0, fib);
-
- if (ro.ro_rt == NULL)
- return 0;
-
- /*
- * If ifp is provided, check for equality with rtentry.
- * We should use rt->rt_ifa->ifa_ifp, instead of rt->rt_ifp,
- * in order to pass packets injected back by if_simloop():
- * if useloopback == 1 routing entry (via lo0) for our own address
- * may exist, so we need to handle routing assymetry.
- */
- if (ifp != NULL && ro.ro_rt->rt_ifa->ifa_ifp != ifp) {
- RTFREE(ro.ro_rt);
- return 0;
- }
-
- /* if no ifp provided, check if rtentry is not default route */
- if (ifp == NULL &&
- satosin(rt_key(ro.ro_rt))->sin_addr.s_addr == INADDR_ANY) {
- RTFREE(ro.ro_rt);
- return 0;
- }
-
- /* or if this is a blackhole/reject route */
- if (ifp == NULL && ro.ro_rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
- RTFREE(ro.ro_rt);
- return 0;
- }
-
- /* found valid route */
- RTFREE(ro.ro_rt);
- return 1;
-#endif /* __FreeBSD__ */
-}
-
-#ifdef INET6
-/*
- * ipv6 specific rules here...
- */
-static __inline int
-icmp6type_match (int type, ipfw_insn_u32 *cmd)
-{
- return (type <= ICMP6_MAXTYPE && (cmd->d[type/32] & (1<<(type%32)) ) );
-}
-
-static int
-flow6id_match( int curr_flow, ipfw_insn_u32 *cmd )
-{
- int i;
- for (i=0; i <= cmd->o.arg1; ++i )
- if (curr_flow == cmd->d[i] )
- return 1;
- return 0;
-}
-
-/* support for IP6_*_ME opcodes */
-static int
-search_ip6_addr_net (struct in6_addr * ip6_addr)
-{
- struct ifnet *mdc;
- struct ifaddr *mdc2;
- struct in6_ifaddr *fdm;
- struct in6_addr copia;
-
- TAILQ_FOREACH(mdc, &V_ifnet, if_link) {
- if_addr_rlock(mdc);
- TAILQ_FOREACH(mdc2, &mdc->if_addrhead, ifa_link) {
- if (mdc2->ifa_addr->sa_family == AF_INET6) {
- fdm = (struct in6_ifaddr *)mdc2;
- copia = fdm->ia_addr.sin6_addr;
- /* need for leaving scope_id in the sock_addr */
- in6_clearscope(&copia);
- if (IN6_ARE_ADDR_EQUAL(ip6_addr, &copia)) {
- if_addr_runlock(mdc);
- return 1;
- }
- }
- }
- if_addr_runlock(mdc);
- }
- return 0;
-}
-
-static int
-verify_path6(struct in6_addr *src, struct ifnet *ifp, u_int fib)
-{
- struct route_in6 ro;
- struct sockaddr_in6 *dst;
-
- bzero(&ro, sizeof(ro));
-
- dst = (struct sockaddr_in6 * )&(ro.ro_dst);
- dst->sin6_family = AF_INET6;
- dst->sin6_len = sizeof(*dst);
- dst->sin6_addr = *src;
-
- in6_rtalloc_ign(&ro, 0, fib);
- if (ro.ro_rt == NULL)
- return 0;
-
- /*
- * if ifp is provided, check for equality with rtentry
- * We should use rt->rt_ifa->ifa_ifp, instead of rt->rt_ifp,
- * to support the case of sending packets to an address of our own.
- * (where the former interface is the first argument of if_simloop()
- * (=ifp), the latter is lo0)
- */
- if (ifp != NULL && ro.ro_rt->rt_ifa->ifa_ifp != ifp) {
- RTFREE(ro.ro_rt);
- return 0;
- }
-
- /* if no ifp provided, check if rtentry is not default route */
- if (ifp == NULL &&
- IN6_IS_ADDR_UNSPECIFIED(&satosin6(rt_key(ro.ro_rt))->sin6_addr)) {
- RTFREE(ro.ro_rt);
- return 0;
- }
-
- /* or if this is a blackhole/reject route */
- if (ifp == NULL && ro.ro_rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
- RTFREE(ro.ro_rt);
- return 0;
- }
-
- /* found valid route */
- RTFREE(ro.ro_rt);
- return 1;
-
-}
-
-static int
-is_icmp6_query(int icmp6_type)
-{
- if ((icmp6_type <= ICMP6_MAXTYPE) &&
- (icmp6_type == ICMP6_ECHO_REQUEST ||
- icmp6_type == ICMP6_MEMBERSHIP_QUERY ||
- icmp6_type == ICMP6_WRUREQUEST ||
- icmp6_type == ICMP6_FQDN_QUERY ||
- icmp6_type == ICMP6_NI_QUERY))
- return (1);
-
- return (0);
-}
-
-static void
-send_reject6(struct ip_fw_args *args, int code, u_int hlen, struct ip6_hdr *ip6)
-{
- struct mbuf *m;
-
- m = args->m;
- if (code == ICMP6_UNREACH_RST && args->f_id.proto == IPPROTO_TCP) {
- struct tcphdr *tcp;
- tcp = (struct tcphdr *)((char *)ip6 + hlen);
-
- if ((tcp->th_flags & TH_RST) == 0) {
- struct mbuf *m0;
- m0 = ipfw_send_pkt(args->m, &(args->f_id),
- ntohl(tcp->th_seq), ntohl(tcp->th_ack),
- tcp->th_flags | TH_RST);
- if (m0 != NULL)
- ip6_output(m0, NULL, NULL, 0, NULL, NULL,
- NULL);
- }
- FREE_PKT(m);
- } else if (code != ICMP6_UNREACH_RST) { /* Send an ICMPv6 unreach. */
-#if 0
- /*
- * Unlike above, the mbufs need to line up with the ip6 hdr,
- * as the contents are read. We need to m_adj() the
- * needed amount.
- * The mbuf will however be thrown away so we can adjust it.
- * Remember we did an m_pullup on it already so we
- * can make some assumptions about contiguousness.
- */
- if (args->L3offset)
- m_adj(m, args->L3offset);
-#endif
- icmp6_error(m, ICMP6_DST_UNREACH, code, 0);
- } else
- FREE_PKT(m);
-
- args->m = NULL;
-}
-
-#endif /* INET6 */
-
-
-/*
- * sends a reject message, consuming the mbuf passed as an argument.
- */
-static void
-send_reject(struct ip_fw_args *args, int code, int iplen, struct ip *ip)
-{
-
-#if 0
- /* XXX When ip is not guaranteed to be at mtod() we will
- * need to account for this */
- * The mbuf will however be thrown away so we can adjust it.
- * Remember we did an m_pullup on it already so we
- * can make some assumptions about contiguousness.
- */
- if (args->L3offset)
- m_adj(m, args->L3offset);
-#endif
- if (code != ICMP_REJECT_RST) { /* Send an ICMP unreach */
- /* We need the IP header in host order for icmp_error(). */
- SET_HOST_IPLEN(ip);
- icmp_error(args->m, ICMP_UNREACH, code, 0L, 0);
- } else if (args->f_id.proto == IPPROTO_TCP) {
- struct tcphdr *const tcp =
- L3HDR(struct tcphdr, mtod(args->m, struct ip *));
- if ( (tcp->th_flags & TH_RST) == 0) {
- struct mbuf *m;
- m = ipfw_send_pkt(args->m, &(args->f_id),
- ntohl(tcp->th_seq), ntohl(tcp->th_ack),
- tcp->th_flags | TH_RST);
- if (m != NULL)
- ip_output(m, NULL, NULL, 0, NULL, NULL);
- }
- FREE_PKT(args->m);
- } else
- FREE_PKT(args->m);
- args->m = NULL;
-}
-
-/*
- * Support for uid/gid/jail lookup. These tests are expensive
- * (because we may need to look into the list of active sockets)
- * so we cache the results. ugid_lookupp is 0 if we have not
- * yet done a lookup, 1 if we succeeded, and -1 if we tried
- * and failed. The function always returns the match value.
- * We could actually spare the variable and use *uc, setting
- * it to '(void *)check_uidgid if we have no info, NULL if
- * we tried and failed, or any other value if successful.
- */
-static int
-check_uidgid(ipfw_insn_u32 *insn, struct ip_fw_args *args, int *ugid_lookupp,
- struct ucred **uc)
-{
-#ifndef __FreeBSD__
- /* XXX */
- return cred_check(insn, proto, oif,
- dst_ip, dst_port, src_ip, src_port,
- (struct bsd_ucred *)uc, ugid_lookupp, ((struct mbuf *)inp)->m_skb);
-#else /* FreeBSD */
- struct in_addr src_ip, dst_ip;
- struct inpcbinfo *pi;
- struct ipfw_flow_id *id;
- struct inpcb *pcb, *inp;
- struct ifnet *oif;
- int lookupflags;
- int match;
-
- id = &args->f_id;
- inp = args->inp;
- oif = args->oif;
-
- /*
- * Check to see if the UDP or TCP stack supplied us with
- * the PCB. If so, rather then holding a lock and looking
- * up the PCB, we can use the one that was supplied.
- */
- if (inp && *ugid_lookupp == 0) {
- INP_LOCK_ASSERT(inp);
- if (inp->inp_socket != NULL) {
- *uc = crhold(inp->inp_cred);
- *ugid_lookupp = 1;
- } else
- *ugid_lookupp = -1;
- }
- /*
- * If we have already been here and the packet has no
- * PCB entry associated with it, then we can safely
- * assume that this is a no match.
- */
- if (*ugid_lookupp == -1)
- return (0);
- if (id->proto == IPPROTO_TCP) {
- lookupflags = 0;
- pi = &V_tcbinfo;
- } else if (id->proto == IPPROTO_UDP) {
- lookupflags = INPLOOKUP_WILDCARD;
- pi = &V_udbinfo;
- } else
- return 0;
- lookupflags |= INPLOOKUP_RLOCKPCB;
- match = 0;
- if (*ugid_lookupp == 0) {
- if (id->addr_type == 6) {
-#ifdef INET6
- if (oif == NULL)
- pcb = in6_pcblookup_mbuf(pi,
- &id->src_ip6, htons(id->src_port),
- &id->dst_ip6, htons(id->dst_port),
- lookupflags, oif, args->m);
- else
- pcb = in6_pcblookup_mbuf(pi,
- &id->dst_ip6, htons(id->dst_port),
- &id->src_ip6, htons(id->src_port),
- lookupflags, oif, args->m);
-#else
- *ugid_lookupp = -1;
- return (0);
-#endif
- } else {
- src_ip.s_addr = htonl(id->src_ip);
- dst_ip.s_addr = htonl(id->dst_ip);
- if (oif == NULL)
- pcb = in_pcblookup_mbuf(pi,
- src_ip, htons(id->src_port),
- dst_ip, htons(id->dst_port),
- lookupflags, oif, args->m);
- else
- pcb = in_pcblookup_mbuf(pi,
- dst_ip, htons(id->dst_port),
- src_ip, htons(id->src_port),
- lookupflags, oif, args->m);
- }
- if (pcb != NULL) {
- INP_RLOCK_ASSERT(pcb);
- *uc = crhold(pcb->inp_cred);
- *ugid_lookupp = 1;
- INP_RUNLOCK(pcb);
- }
- if (*ugid_lookupp == 0) {
- /*
- * We tried and failed, set the variable to -1
- * so we will not try again on this packet.
- */
- *ugid_lookupp = -1;
- return (0);
- }
- }
- if (insn->o.opcode == O_UID)
- match = ((*uc)->cr_uid == (uid_t)insn->d[0]);
- else if (insn->o.opcode == O_GID)
- match = groupmember((gid_t)insn->d[0], *uc);
- else if (insn->o.opcode == O_JAIL)
- match = ((*uc)->cr_prison->pr_id == (int)insn->d[0]);
- return (match);
-#endif /* __FreeBSD__ */
-}
-
-/*
- * Helper function to set args with info on the rule after the matching
- * one. slot is precise, whereas we guess rule_id as they are
- * assigned sequentially.
- */
-static inline void
-set_match(struct ip_fw_args *args, int slot,
- struct ip_fw_chain *chain)
-{
- args->rule.chain_id = chain->id;
- args->rule.slot = slot + 1; /* we use 0 as a marker */
- args->rule.rule_id = 1 + chain->map[slot]->id;
- args->rule.rulenum = chain->map[slot]->rulenum;
-}
-
-/*
- * The main check routine for the firewall.
- *
- * All arguments are in args so we can modify them and return them
- * back to the caller.
- *
- * Parameters:
- *
- * args->m (in/out) The packet; we set to NULL when/if we nuke it.
- * Starts with the IP header.
- * args->eh (in) Mac header if present, NULL for layer3 packet.
- * args->L3offset Number of bytes bypassed if we came from L2.
- * e.g. often sizeof(eh) ** NOTYET **
- * args->oif Outgoing interface, NULL if packet is incoming.
- * The incoming interface is in the mbuf. (in)
- * args->divert_rule (in/out)
- * Skip up to the first rule past this rule number;
- * upon return, non-zero port number for divert or tee.
- *
- * args->rule Pointer to the last matching rule (in/out)
- * args->next_hop Socket we are forwarding to (out).
- * args->next_hop6 IPv6 next hop we are forwarding to (out).
- * args->f_id Addresses grabbed from the packet (out)
- * args->rule.info a cookie depending on rule action
- *
- * Return value:
- *
- * IP_FW_PASS the packet must be accepted
- * IP_FW_DENY the packet must be dropped
- * IP_FW_DIVERT divert packet, port in m_tag
- * IP_FW_TEE tee packet, port in m_tag
- * IP_FW_DUMMYNET to dummynet, pipe in args->cookie
- * IP_FW_NETGRAPH into netgraph, cookie args->cookie
- * args->rule contains the matching rule,
- * args->rule.info has additional information.
- *
- */
-int
-ipfw_chk(struct ip_fw_args *args)
-{
-
- /*
- * Local variables holding state while processing a packet:
- *
- * IMPORTANT NOTE: to speed up the processing of rules, there
- * are some assumption on the values of the variables, which
- * are documented here. Should you change them, please check
- * the implementation of the various instructions to make sure
- * that they still work.
- *
- * args->eh The MAC header. It is non-null for a layer2
- * packet, it is NULL for a layer-3 packet.
- * **notyet**
- * args->L3offset Offset in the packet to the L3 (IP or equiv.) header.
- *
- * m | args->m Pointer to the mbuf, as received from the caller.
- * It may change if ipfw_chk() does an m_pullup, or if it
- * consumes the packet because it calls send_reject().
- * XXX This has to change, so that ipfw_chk() never modifies
- * or consumes the buffer.
- * ip is the beginning of the ip(4 or 6) header.
- * Calculated by adding the L3offset to the start of data.
- * (Until we start using L3offset, the packet is
- * supposed to start with the ip header).
- */
- struct mbuf *m = args->m;
- struct ip *ip = mtod(m, struct ip *);
-
- /*
- * For rules which contain uid/gid or jail constraints, cache
- * a copy of the users credentials after the pcb lookup has been
- * executed. This will speed up the processing of rules with
- * these types of constraints, as well as decrease contention
- * on pcb related locks.
- */
-#ifndef __FreeBSD__
- struct bsd_ucred ucred_cache;
-#else
- struct ucred *ucred_cache = NULL;
-#endif
- int ucred_lookup = 0;
-
- /*
- * oif | args->oif If NULL, ipfw_chk has been called on the
- * inbound path (ether_input, ip_input).
- * If non-NULL, ipfw_chk has been called on the outbound path
- * (ether_output, ip_output).
- */
- struct ifnet *oif = args->oif;
-
- int f_pos = 0; /* index of current rule in the array */
- int retval = 0;
-
- /*
- * hlen The length of the IP header.
- */
- u_int hlen = 0; /* hlen >0 means we have an IP pkt */
-
- /*
- * offset The offset of a fragment. offset != 0 means that
- * we have a fragment at this offset of an IPv4 packet.
- * offset == 0 means that (if this is an IPv4 packet)
- * this is the first or only fragment.
- * For IPv6 offset|ip6f_mf == 0 means there is no Fragment Header
- * or there is a single packet fragement (fragement header added
- * without needed). We will treat a single packet fragment as if
- * there was no fragment header (or log/block depending on the
- * V_fw_permit_single_frag6 sysctl setting).
- */
- u_short offset = 0;
- u_short ip6f_mf = 0;
-
- /*
- * Local copies of addresses. They are only valid if we have
- * an IP packet.
- *
- * proto The protocol. Set to 0 for non-ip packets,
- * or to the protocol read from the packet otherwise.
- * proto != 0 means that we have an IPv4 packet.
- *
- * src_port, dst_port port numbers, in HOST format. Only
- * valid for TCP and UDP packets.
- *
- * src_ip, dst_ip ip addresses, in NETWORK format.
- * Only valid for IPv4 packets.
- */
- uint8_t proto;
- uint16_t src_port = 0, dst_port = 0; /* NOTE: host format */
- struct in_addr src_ip, dst_ip; /* NOTE: network format */
- uint16_t iplen=0;
- int pktlen;
- uint16_t etype = 0; /* Host order stored ether type */
-
- /*
- * dyn_dir = MATCH_UNKNOWN when rules unchecked,
- * MATCH_NONE when checked and not matched (q = NULL),
- * MATCH_FORWARD or MATCH_REVERSE otherwise (q != NULL)
- */
- int dyn_dir = MATCH_UNKNOWN;
- ipfw_dyn_rule *q = NULL;
- struct ip_fw_chain *chain = &V_layer3_chain;
-
- /*
- * We store in ulp a pointer to the upper layer protocol header.
- * In the ipv4 case this is easy to determine from the header,
- * but for ipv6 we might have some additional headers in the middle.
- * ulp is NULL if not found.
- */
- void *ulp = NULL; /* upper layer protocol pointer. */
-
- /* XXX ipv6 variables */
- int is_ipv6 = 0;
- uint8_t icmp6_type = 0;
- uint16_t ext_hd = 0; /* bits vector for extension header filtering */
- /* end of ipv6 variables */
-
- int is_ipv4 = 0;
-
- int done = 0; /* flag to exit the outer loop */
-
- if (m->m_flags & M_SKIP_FIREWALL || (! V_ipfw_vnet_ready))
- return (IP_FW_PASS); /* accept */
-
- dst_ip.s_addr = 0; /* make sure it is initialized */
- src_ip.s_addr = 0; /* make sure it is initialized */
- pktlen = m->m_pkthdr.len;
- args->f_id.fib = M_GETFIB(m); /* note mbuf not altered) */
- proto = args->f_id.proto = 0; /* mark f_id invalid */
- /* XXX 0 is a valid proto: IP/IPv6 Hop-by-Hop Option */
-
-/*
- * PULLUP_TO(len, p, T) makes sure that len + sizeof(T) is contiguous,
- * then it sets p to point at the offset "len" in the mbuf. WARNING: the
- * pointer might become stale after other pullups (but we never use it
- * this way).
- */
-#define PULLUP_TO(_len, p, T) PULLUP_LEN(_len, p, sizeof(T))
-#define PULLUP_LEN(_len, p, T) \
-do { \
- int x = (_len) + T; \
- if ((m)->m_len < x) { \
- args->m = m = m_pullup(m, x); \
- if (m == NULL) \
- goto pullup_failed; \
- } \
- p = (mtod(m, char *) + (_len)); \
-} while (0)
-
- /*
- * if we have an ether header,
- */
- if (args->eh)
- etype = ntohs(args->eh->ether_type);
-
- /* Identify IP packets and fill up variables. */
- if (pktlen >= sizeof(struct ip6_hdr) &&
- (args->eh == NULL || etype == ETHERTYPE_IPV6) && ip->ip_v == 6) {
- struct ip6_hdr *ip6 = (struct ip6_hdr *)ip;
- is_ipv6 = 1;
- args->f_id.addr_type = 6;
- hlen = sizeof(struct ip6_hdr);
- proto = ip6->ip6_nxt;
-
- /* Search extension headers to find upper layer protocols */
- while (ulp == NULL && offset == 0) {
- switch (proto) {
- case IPPROTO_ICMPV6:
- PULLUP_TO(hlen, ulp, struct icmp6_hdr);
- icmp6_type = ICMP6(ulp)->icmp6_type;
- break;
-
- case IPPROTO_TCP:
- PULLUP_TO(hlen, ulp, struct tcphdr);
- dst_port = TCP(ulp)->th_dport;
- src_port = TCP(ulp)->th_sport;
- /* save flags for dynamic rules */
- args->f_id._flags = TCP(ulp)->th_flags;
- break;
-
- case IPPROTO_SCTP:
- PULLUP_TO(hlen, ulp, struct sctphdr);
- src_port = SCTP(ulp)->src_port;
- dst_port = SCTP(ulp)->dest_port;
- break;
-
- case IPPROTO_UDP:
- PULLUP_TO(hlen, ulp, struct udphdr);
- dst_port = UDP(ulp)->uh_dport;
- src_port = UDP(ulp)->uh_sport;
- break;
-
- case IPPROTO_HOPOPTS: /* RFC 2460 */
- PULLUP_TO(hlen, ulp, struct ip6_hbh);
- ext_hd |= EXT_HOPOPTS;
- hlen += (((struct ip6_hbh *)ulp)->ip6h_len + 1) << 3;
- proto = ((struct ip6_hbh *)ulp)->ip6h_nxt;
- ulp = NULL;
- break;
-
- case IPPROTO_ROUTING: /* RFC 2460 */
- PULLUP_TO(hlen, ulp, struct ip6_rthdr);
- switch (((struct ip6_rthdr *)ulp)->ip6r_type) {
- case 0:
- ext_hd |= EXT_RTHDR0;
- break;
- case 2:
- ext_hd |= EXT_RTHDR2;
- break;
- default:
- if (V_fw_verbose)
- printf("IPFW2: IPV6 - Unknown "
- "Routing Header type(%d)\n",
- ((struct ip6_rthdr *)
- ulp)->ip6r_type);
- if (V_fw_deny_unknown_exthdrs)
- return (IP_FW_DENY);
- break;
- }
- ext_hd |= EXT_ROUTING;
- hlen += (((struct ip6_rthdr *)ulp)->ip6r_len + 1) << 3;
- proto = ((struct ip6_rthdr *)ulp)->ip6r_nxt;
- ulp = NULL;
- break;
-
- case IPPROTO_FRAGMENT: /* RFC 2460 */
- PULLUP_TO(hlen, ulp, struct ip6_frag);
- ext_hd |= EXT_FRAGMENT;
- hlen += sizeof (struct ip6_frag);
- proto = ((struct ip6_frag *)ulp)->ip6f_nxt;
- offset = ((struct ip6_frag *)ulp)->ip6f_offlg &
- IP6F_OFF_MASK;
- ip6f_mf = ((struct ip6_frag *)ulp)->ip6f_offlg &
- IP6F_MORE_FRAG;
- if (V_fw_permit_single_frag6 == 0 &&
- offset == 0 && ip6f_mf == 0) {
- if (V_fw_verbose)
- printf("IPFW2: IPV6 - Invalid "
- "Fragment Header\n");
- if (V_fw_deny_unknown_exthdrs)
- return (IP_FW_DENY);
- break;
- }
- args->f_id.extra =
- ntohl(((struct ip6_frag *)ulp)->ip6f_ident);
- ulp = NULL;
- break;
-
- case IPPROTO_DSTOPTS: /* RFC 2460 */
- PULLUP_TO(hlen, ulp, struct ip6_hbh);
- ext_hd |= EXT_DSTOPTS;
- hlen += (((struct ip6_hbh *)ulp)->ip6h_len + 1) << 3;
- proto = ((struct ip6_hbh *)ulp)->ip6h_nxt;
- ulp = NULL;
- break;
-
- case IPPROTO_AH: /* RFC 2402 */
- PULLUP_TO(hlen, ulp, struct ip6_ext);
- ext_hd |= EXT_AH;
- hlen += (((struct ip6_ext *)ulp)->ip6e_len + 2) << 2;
- proto = ((struct ip6_ext *)ulp)->ip6e_nxt;
- ulp = NULL;
- break;
-
- case IPPROTO_ESP: /* RFC 2406 */
- PULLUP_TO(hlen, ulp, uint32_t); /* SPI, Seq# */
- /* Anything past Seq# is variable length and
- * data past this ext. header is encrypted. */
- ext_hd |= EXT_ESP;
- break;
-
- case IPPROTO_NONE: /* RFC 2460 */
- /*
- * Packet ends here, and IPv6 header has
- * already been pulled up. If ip6e_len!=0
- * then octets must be ignored.
- */
- ulp = ip; /* non-NULL to get out of loop. */
- break;
-
- case IPPROTO_OSPFIGP:
- /* XXX OSPF header check? */
- PULLUP_TO(hlen, ulp, struct ip6_ext);
- break;
-
- case IPPROTO_PIM:
- /* XXX PIM header check? */
- PULLUP_TO(hlen, ulp, struct pim);
- break;
-
- case IPPROTO_CARP:
- PULLUP_TO(hlen, ulp, struct carp_header);
- if (((struct carp_header *)ulp)->carp_version !=
- CARP_VERSION)
- return (IP_FW_DENY);
- if (((struct carp_header *)ulp)->carp_type !=
- CARP_ADVERTISEMENT)
- return (IP_FW_DENY);
- break;
-
- case IPPROTO_IPV6: /* RFC 2893 */
- PULLUP_TO(hlen, ulp, struct ip6_hdr);
- break;
-
- case IPPROTO_IPV4: /* RFC 2893 */
- PULLUP_TO(hlen, ulp, struct ip);
- break;
-
- default:
- if (V_fw_verbose)
- printf("IPFW2: IPV6 - Unknown "
- "Extension Header(%d), ext_hd=%x\n",
- proto, ext_hd);
- if (V_fw_deny_unknown_exthdrs)
- return (IP_FW_DENY);
- PULLUP_TO(hlen, ulp, struct ip6_ext);
- break;
- } /*switch */
- }
- ip = mtod(m, struct ip *);
- ip6 = (struct ip6_hdr *)ip;
- args->f_id.src_ip6 = ip6->ip6_src;
- args->f_id.dst_ip6 = ip6->ip6_dst;
- args->f_id.src_ip = 0;
- args->f_id.dst_ip = 0;
- args->f_id.flow_id6 = ntohl(ip6->ip6_flow);
- } else if (pktlen >= sizeof(struct ip) &&
- (args->eh == NULL || etype == ETHERTYPE_IP) && ip->ip_v == 4) {
- is_ipv4 = 1;
- hlen = ip->ip_hl << 2;
- args->f_id.addr_type = 4;
-
- /*
- * Collect parameters into local variables for faster matching.
- */
- proto = ip->ip_p;
- src_ip = ip->ip_src;
- dst_ip = ip->ip_dst;
- offset = ntohs(ip->ip_off) & IP_OFFMASK;
- iplen = ntohs(ip->ip_len);
- pktlen = iplen < pktlen ? iplen : pktlen;
-
- if (offset == 0) {
- switch (proto) {
- case IPPROTO_TCP:
- PULLUP_TO(hlen, ulp, struct tcphdr);
- dst_port = TCP(ulp)->th_dport;
- src_port = TCP(ulp)->th_sport;
- /* save flags for dynamic rules */
- args->f_id._flags = TCP(ulp)->th_flags;
- break;
-
- case IPPROTO_SCTP:
- PULLUP_TO(hlen, ulp, struct sctphdr);
- src_port = SCTP(ulp)->src_port;
- dst_port = SCTP(ulp)->dest_port;
- break;
-
- case IPPROTO_UDP:
- PULLUP_TO(hlen, ulp, struct udphdr);
- dst_port = UDP(ulp)->uh_dport;
- src_port = UDP(ulp)->uh_sport;
- break;
-
- case IPPROTO_ICMP:
- PULLUP_TO(hlen, ulp, struct icmphdr);
- //args->f_id.flags = ICMP(ulp)->icmp_type;
- break;
-
- default:
- break;
- }
- }
-
- ip = mtod(m, struct ip *);
- args->f_id.src_ip = ntohl(src_ip.s_addr);
- args->f_id.dst_ip = ntohl(dst_ip.s_addr);
- }
-#undef PULLUP_TO
- if (proto) { /* we may have port numbers, store them */
- args->f_id.proto = proto;
- args->f_id.src_port = src_port = ntohs(src_port);
- args->f_id.dst_port = dst_port = ntohs(dst_port);
- }
-
- IPFW_RLOCK(chain);
- if (! V_ipfw_vnet_ready) { /* shutting down, leave NOW. */
- IPFW_RUNLOCK(chain);
- return (IP_FW_PASS); /* accept */
- }
- if (args->rule.slot) {
- /*
- * Packet has already been tagged as a result of a previous
- * match on rule args->rule aka args->rule_id (PIPE, QUEUE,
- * REASS, NETGRAPH, DIVERT/TEE...)
- * Validate the slot and continue from the next one
- * if still present, otherwise do a lookup.
- */
- f_pos = (args->rule.chain_id == chain->id) ?
- args->rule.slot :
- ipfw_find_rule(chain, args->rule.rulenum,
- args->rule.rule_id);
- } else {
- f_pos = 0;
- }
-
- /*
- * Now scan the rules, and parse microinstructions for each rule.
- * We have two nested loops and an inner switch. Sometimes we
- * need to break out of one or both loops, or re-enter one of
- * the loops with updated variables. Loop variables are:
- *
- * f_pos (outer loop) points to the current rule.
- * On output it points to the matching rule.
- * done (outer loop) is used as a flag to break the loop.
- * l (inner loop) residual length of current rule.
- * cmd points to the current microinstruction.
- *
- * We break the inner loop by setting l=0 and possibly
- * cmdlen=0 if we don't want to advance cmd.
- * We break the outer loop by setting done=1
- * We can restart the inner loop by setting l>0 and f_pos, f, cmd
- * as needed.
- */
- for (; f_pos < chain->n_rules; f_pos++) {
- ipfw_insn *cmd;
- uint32_t tablearg = 0;
- int l, cmdlen, skip_or; /* skip rest of OR block */
- struct ip_fw *f;
-
- f = chain->map[f_pos];
- if (V_set_disable & (1 << f->set) )
- continue;
-
- skip_or = 0;
- for (l = f->cmd_len, cmd = f->cmd ; l > 0 ;
- l -= cmdlen, cmd += cmdlen) {
- int match;
-
- /*
- * check_body is a jump target used when we find a
- * CHECK_STATE, and need to jump to the body of
- * the target rule.
- */
-
-/* check_body: */
- cmdlen = F_LEN(cmd);
- /*
- * An OR block (insn_1 || .. || insn_n) has the
- * F_OR bit set in all but the last instruction.
- * The first match will set "skip_or", and cause
- * the following instructions to be skipped until
- * past the one with the F_OR bit clear.
- */
- if (skip_or) { /* skip this instruction */
- if ((cmd->len & F_OR) == 0)
- skip_or = 0; /* next one is good */
- continue;
- }
- match = 0; /* set to 1 if we succeed */
-
- switch (cmd->opcode) {
- /*
- * The first set of opcodes compares the packet's
- * fields with some pattern, setting 'match' if a
- * match is found. At the end of the loop there is
- * logic to deal with F_NOT and F_OR flags associated
- * with the opcode.
- */
- case O_NOP:
- match = 1;
- break;
-
- case O_FORWARD_MAC:
- printf("ipfw: opcode %d unimplemented\n",
- cmd->opcode);
- break;
-
- case O_GID:
- case O_UID:
- case O_JAIL:
- /*
- * We only check offset == 0 && proto != 0,
- * as this ensures that we have a
- * packet with the ports info.
- */
- if (offset != 0)
- break;
- if (proto == IPPROTO_TCP ||
- proto == IPPROTO_UDP)
- match = check_uidgid(
- (ipfw_insn_u32 *)cmd,
- args, &ucred_lookup,
-#ifdef __FreeBSD__
- &ucred_cache);
-#else
- (void *)&ucred_cache);
-#endif
- break;
-
- case O_RECV:
- match = iface_match(m->m_pkthdr.rcvif,
- (ipfw_insn_if *)cmd, chain, &tablearg);
- break;
-
- case O_XMIT:
- match = iface_match(oif, (ipfw_insn_if *)cmd,
- chain, &tablearg);
- break;
-
- case O_VIA:
- match = iface_match(oif ? oif :
- m->m_pkthdr.rcvif, (ipfw_insn_if *)cmd,
- chain, &tablearg);
- break;
-
- case O_MACADDR2:
- if (args->eh != NULL) { /* have MAC header */
- u_int32_t *want = (u_int32_t *)
- ((ipfw_insn_mac *)cmd)->addr;
- u_int32_t *mask = (u_int32_t *)
- ((ipfw_insn_mac *)cmd)->mask;
- u_int32_t *hdr = (u_int32_t *)args->eh;
-
- match =
- ( want[0] == (hdr[0] & mask[0]) &&
- want[1] == (hdr[1] & mask[1]) &&
- want[2] == (hdr[2] & mask[2]) );
- }
- break;
-
- case O_MAC_TYPE:
- if (args->eh != NULL) {
- u_int16_t *p =
- ((ipfw_insn_u16 *)cmd)->ports;
- int i;
-
- for (i = cmdlen - 1; !match && i>0;
- i--, p += 2)
- match = (etype >= p[0] &&
- etype <= p[1]);
- }
- break;
-
- case O_FRAG:
- match = (offset != 0);
- break;
-
- case O_IN: /* "out" is "not in" */
- match = (oif == NULL);
- break;
-
- case O_LAYER2:
- match = (args->eh != NULL);
- break;
-
- case O_DIVERTED:
- {
- /* For diverted packets, args->rule.info
- * contains the divert port (in host format)
- * reason and direction.
- */
- uint32_t i = args->rule.info;
- match = (i&IPFW_IS_MASK) == IPFW_IS_DIVERT &&
- cmd->arg1 & ((i & IPFW_INFO_IN) ? 1 : 2);
- }
- break;
-
- case O_PROTO:
- /*
- * We do not allow an arg of 0 so the
- * check of "proto" only suffices.
- */
- match = (proto == cmd->arg1);
- break;
-
- case O_IP_SRC:
- match = is_ipv4 &&
- (((ipfw_insn_ip *)cmd)->addr.s_addr ==
- src_ip.s_addr);
- break;
-
- case O_IP_SRC_LOOKUP:
- case O_IP_DST_LOOKUP:
- if (is_ipv4) {
- uint32_t key =
- (cmd->opcode == O_IP_DST_LOOKUP) ?
- dst_ip.s_addr : src_ip.s_addr;
- uint32_t v = 0;
-
- if (cmdlen > F_INSN_SIZE(ipfw_insn_u32)) {
- /* generic lookup. The key must be
- * in 32bit big-endian format.
- */
- v = ((ipfw_insn_u32 *)cmd)->d[1];
- if (v == 0)
- key = dst_ip.s_addr;
- else if (v == 1)
- key = src_ip.s_addr;
- else if (v == 6) /* dscp */
- key = (ip->ip_tos >> 2) & 0x3f;
- else if (offset != 0)
- break;
- else if (proto != IPPROTO_TCP &&
- proto != IPPROTO_UDP)
- break;
- else if (v == 2)
- key = htonl(dst_port);
- else if (v == 3)
- key = htonl(src_port);
- else if (v == 4 || v == 5) {
- check_uidgid(
- (ipfw_insn_u32 *)cmd,
- args, &ucred_lookup,
-#ifdef __FreeBSD__
- &ucred_cache);
- if (v == 4 /* O_UID */)
- key = ucred_cache->cr_uid;
- else if (v == 5 /* O_JAIL */)
- key = ucred_cache->cr_prison->pr_id;
-#else /* !__FreeBSD__ */
- (void *)&ucred_cache);
- if (v ==4 /* O_UID */)
- key = ucred_cache.uid;
- else if (v == 5 /* O_JAIL */)
- key = ucred_cache.xid;
-#endif /* !__FreeBSD__ */
- key = htonl(key);
- } else
- break;
- }
- match = ipfw_lookup_table(chain,
- cmd->arg1, key, &v);
- if (!match)
- break;
- if (cmdlen == F_INSN_SIZE(ipfw_insn_u32))
- match =
- ((ipfw_insn_u32 *)cmd)->d[0] == v;
- else
- tablearg = v;
- } else if (is_ipv6) {
- uint32_t v = 0;
- void *pkey = (cmd->opcode == O_IP_DST_LOOKUP) ?
- &args->f_id.dst_ip6: &args->f_id.src_ip6;
- match = ipfw_lookup_table_extended(chain,
- cmd->arg1, pkey, &v,
- IPFW_TABLE_CIDR);
- if (cmdlen == F_INSN_SIZE(ipfw_insn_u32))
- match = ((ipfw_insn_u32 *)cmd)->d[0] == v;
- if (match)
- tablearg = v;
- }
- break;
-
- case O_IP_SRC_MASK:
- case O_IP_DST_MASK:
- if (is_ipv4) {
- uint32_t a =
- (cmd->opcode == O_IP_DST_MASK) ?
- dst_ip.s_addr : src_ip.s_addr;
- uint32_t *p = ((ipfw_insn_u32 *)cmd)->d;
- int i = cmdlen-1;
-
- for (; !match && i>0; i-= 2, p+= 2)
- match = (p[0] == (a & p[1]));
- }
- break;
-
- case O_IP_SRC_ME:
- if (is_ipv4) {
- struct ifnet *tif;
-
- INADDR_TO_IFP(src_ip, tif);
- match = (tif != NULL);
- break;
- }
-#ifdef INET6
- /* FALLTHROUGH */
- case O_IP6_SRC_ME:
- match= is_ipv6 && search_ip6_addr_net(&args->f_id.src_ip6);
-#endif
- break;
-
- case O_IP_DST_SET:
- case O_IP_SRC_SET:
- if (is_ipv4) {
- u_int32_t *d = (u_int32_t *)(cmd+1);
- u_int32_t addr =
- cmd->opcode == O_IP_DST_SET ?
- args->f_id.dst_ip :
- args->f_id.src_ip;
-
- if (addr < d[0])
- break;
- addr -= d[0]; /* subtract base */
- match = (addr < cmd->arg1) &&
- ( d[ 1 + (addr>>5)] &
- (1<<(addr & 0x1f)) );
- }
- break;
-
- case O_IP_DST:
- match = is_ipv4 &&
- (((ipfw_insn_ip *)cmd)->addr.s_addr ==
- dst_ip.s_addr);
- break;
-
- case O_IP_DST_ME:
- if (is_ipv4) {
- struct ifnet *tif;
-
- INADDR_TO_IFP(dst_ip, tif);
- match = (tif != NULL);
- break;
- }
-#ifdef INET6
- /* FALLTHROUGH */
- case O_IP6_DST_ME:
- match= is_ipv6 && search_ip6_addr_net(&args->f_id.dst_ip6);
-#endif
- break;
-
-
- case O_IP_SRCPORT:
- case O_IP_DSTPORT:
- /*
- * offset == 0 && proto != 0 is enough
- * to guarantee that we have a
- * packet with port info.
- */
- if ((proto==IPPROTO_UDP || proto==IPPROTO_TCP)
- && offset == 0) {
- u_int16_t x =
- (cmd->opcode == O_IP_SRCPORT) ?
- src_port : dst_port ;
- u_int16_t *p =
- ((ipfw_insn_u16 *)cmd)->ports;
- int i;
-
- for (i = cmdlen - 1; !match && i>0;
- i--, p += 2)
- match = (x>=p[0] && x<=p[1]);
- }
- break;
-
- case O_ICMPTYPE:
- match = (offset == 0 && proto==IPPROTO_ICMP &&
- icmptype_match(ICMP(ulp), (ipfw_insn_u32 *)cmd) );
- break;
-
-#ifdef INET6
- case O_ICMP6TYPE:
- match = is_ipv6 && offset == 0 &&
- proto==IPPROTO_ICMPV6 &&
- icmp6type_match(
- ICMP6(ulp)->icmp6_type,
- (ipfw_insn_u32 *)cmd);
- break;
-#endif /* INET6 */
-
- case O_IPOPT:
- match = (is_ipv4 &&
- ipopts_match(ip, cmd) );
- break;
-
- case O_IPVER:
- match = (is_ipv4 &&
- cmd->arg1 == ip->ip_v);
- break;
-
- case O_IPID:
- case O_IPLEN:
- case O_IPTTL:
- if (is_ipv4) { /* only for IP packets */
- uint16_t x;
- uint16_t *p;
- int i;
-
- if (cmd->opcode == O_IPLEN)
- x = iplen;
- else if (cmd->opcode == O_IPTTL)
- x = ip->ip_ttl;
- else /* must be IPID */
- x = ntohs(ip->ip_id);
- if (cmdlen == 1) {
- match = (cmd->arg1 == x);
- break;
- }
- /* otherwise we have ranges */
- p = ((ipfw_insn_u16 *)cmd)->ports;
- i = cmdlen - 1;
- for (; !match && i>0; i--, p += 2)
- match = (x >= p[0] && x <= p[1]);
- }
- break;
-
- case O_IPPRECEDENCE:
- match = (is_ipv4 &&
- (cmd->arg1 == (ip->ip_tos & 0xe0)) );
- break;
-
- case O_IPTOS:
- match = (is_ipv4 &&
- flags_match(cmd, ip->ip_tos));
- break;
-
- case O_TCPDATALEN:
- if (proto == IPPROTO_TCP && offset == 0) {
- struct tcphdr *tcp;
- uint16_t x;
- uint16_t *p;
- int i;
-
- tcp = TCP(ulp);
- x = iplen -
- ((ip->ip_hl + tcp->th_off) << 2);
- if (cmdlen == 1) {
- match = (cmd->arg1 == x);
- break;
- }
- /* otherwise we have ranges */
- p = ((ipfw_insn_u16 *)cmd)->ports;
- i = cmdlen - 1;
- for (; !match && i>0; i--, p += 2)
- match = (x >= p[0] && x <= p[1]);
- }
- break;
-
- case O_TCPFLAGS:
- match = (proto == IPPROTO_TCP && offset == 0 &&
- flags_match(cmd, TCP(ulp)->th_flags));
- break;
-
- case O_TCPOPTS:
- PULLUP_LEN(hlen, ulp, (TCP(ulp)->th_off << 2));
- match = (proto == IPPROTO_TCP && offset == 0 &&
- tcpopts_match(TCP(ulp), cmd));
- break;
-
- case O_TCPSEQ:
- match = (proto == IPPROTO_TCP && offset == 0 &&
- ((ipfw_insn_u32 *)cmd)->d[0] ==
- TCP(ulp)->th_seq);
- break;
-
- case O_TCPACK:
- match = (proto == IPPROTO_TCP && offset == 0 &&
- ((ipfw_insn_u32 *)cmd)->d[0] ==
- TCP(ulp)->th_ack);
- break;
-
- case O_TCPWIN:
- if (proto == IPPROTO_TCP && offset == 0) {
- uint16_t x;
- uint16_t *p;
- int i;
-
- x = ntohs(TCP(ulp)->th_win);
- if (cmdlen == 1) {
- match = (cmd->arg1 == x);
- break;
- }
- /* Otherwise we have ranges. */
- p = ((ipfw_insn_u16 *)cmd)->ports;
- i = cmdlen - 1;
- for (; !match && i > 0; i--, p += 2)
- match = (x >= p[0] && x <= p[1]);
- }
- break;
-
- case O_ESTAB:
- /* reject packets which have SYN only */
- /* XXX should i also check for TH_ACK ? */
- match = (proto == IPPROTO_TCP && offset == 0 &&
- (TCP(ulp)->th_flags &
- (TH_RST | TH_ACK | TH_SYN)) != TH_SYN);
- break;
-
- case O_ALTQ: {
- struct pf_mtag *at;
- struct m_tag *mtag;
- ipfw_insn_altq *altq = (ipfw_insn_altq *)cmd;
-
- /*
- * ALTQ uses mbuf tags from another
- * packet filtering system - pf(4).
- * We allocate a tag in its format
- * and fill it in, pretending to be pf(4).
- */
- match = 1;
- at = pf_find_mtag(m);
- if (at != NULL && at->qid != 0)
- break;
- mtag = m_tag_get(PACKET_TAG_PF,
- sizeof(struct pf_mtag), M_NOWAIT | M_ZERO);
- if (mtag == NULL) {
- /*
- * Let the packet fall back to the
- * default ALTQ.
- */
- break;
- }
- m_tag_prepend(m, mtag);
- at = (struct pf_mtag *)(mtag + 1);
- at->qid = altq->qid;
- at->hdr = ip;
- break;
- }
-
- case O_LOG:
- ipfw_log(f, hlen, args, m,
- oif, offset | ip6f_mf, tablearg, ip);
- match = 1;
- break;
-
- case O_PROB:
- match = (random()<((ipfw_insn_u32 *)cmd)->d[0]);
- break;
-
- case O_VERREVPATH:
- /* Outgoing packets automatically pass/match */
- match = ((oif != NULL) ||
- (m->m_pkthdr.rcvif == NULL) ||
- (
-#ifdef INET6
- is_ipv6 ?
- verify_path6(&(args->f_id.src_ip6),
- m->m_pkthdr.rcvif, args->f_id.fib) :
-#endif
- verify_path(src_ip, m->m_pkthdr.rcvif,
- args->f_id.fib)));
- break;
-
- case O_VERSRCREACH:
- /* Outgoing packets automatically pass/match */
- match = (hlen > 0 && ((oif != NULL) ||
-#ifdef INET6
- is_ipv6 ?
- verify_path6(&(args->f_id.src_ip6),
- NULL, args->f_id.fib) :
-#endif
- verify_path(src_ip, NULL, args->f_id.fib)));
- break;
-
- case O_ANTISPOOF:
- /* Outgoing packets automatically pass/match */
- if (oif == NULL && hlen > 0 &&
- ( (is_ipv4 && in_localaddr(src_ip))
-#ifdef INET6
- || (is_ipv6 &&
- in6_localaddr(&(args->f_id.src_ip6)))
-#endif
- ))
- match =
-#ifdef INET6
- is_ipv6 ? verify_path6(
- &(args->f_id.src_ip6),
- m->m_pkthdr.rcvif,
- args->f_id.fib) :
-#endif
- verify_path(src_ip,
- m->m_pkthdr.rcvif,
- args->f_id.fib);
- else
- match = 1;
- break;
-
- case O_IPSEC:
-#ifdef IPSEC
- match = (m_tag_find(m,
- PACKET_TAG_IPSEC_IN_DONE, NULL) != NULL);
-#endif
- /* otherwise no match */
- break;
-
-#ifdef INET6
- case O_IP6_SRC:
- match = is_ipv6 &&
- IN6_ARE_ADDR_EQUAL(&args->f_id.src_ip6,
- &((ipfw_insn_ip6 *)cmd)->addr6);
- break;
-
- case O_IP6_DST:
- match = is_ipv6 &&
- IN6_ARE_ADDR_EQUAL(&args->f_id.dst_ip6,
- &((ipfw_insn_ip6 *)cmd)->addr6);
- break;
- case O_IP6_SRC_MASK:
- case O_IP6_DST_MASK:
- if (is_ipv6) {
- int i = cmdlen - 1;
- struct in6_addr p;
- struct in6_addr *d =
- &((ipfw_insn_ip6 *)cmd)->addr6;
-
- for (; !match && i > 0; d += 2,
- i -= F_INSN_SIZE(struct in6_addr)
- * 2) {
- p = (cmd->opcode ==
- O_IP6_SRC_MASK) ?
- args->f_id.src_ip6:
- args->f_id.dst_ip6;
- APPLY_MASK(&p, &d[1]);
- match =
- IN6_ARE_ADDR_EQUAL(&d[0],
- &p);
- }
- }
- break;
-
- case O_FLOW6ID:
- match = is_ipv6 &&
- flow6id_match(args->f_id.flow_id6,
- (ipfw_insn_u32 *) cmd);
- break;
-
- case O_EXT_HDR:
- match = is_ipv6 &&
- (ext_hd & ((ipfw_insn *) cmd)->arg1);
- break;
-
- case O_IP6:
- match = is_ipv6;
- break;
-#endif
-
- case O_IP4:
- match = is_ipv4;
- break;
-
- case O_TAG: {
- struct m_tag *mtag;
- uint32_t tag = (cmd->arg1 == IP_FW_TABLEARG) ?
- tablearg : cmd->arg1;
-
- /* Packet is already tagged with this tag? */
- mtag = m_tag_locate(m, MTAG_IPFW, tag, NULL);
-
- /* We have `untag' action when F_NOT flag is
- * present. And we must remove this mtag from
- * mbuf and reset `match' to zero (`match' will
- * be inversed later).
- * Otherwise we should allocate new mtag and
- * push it into mbuf.
- */
- if (cmd->len & F_NOT) { /* `untag' action */
- if (mtag != NULL)
- m_tag_delete(m, mtag);
- match = 0;
- } else {
- if (mtag == NULL) {
- mtag = m_tag_alloc( MTAG_IPFW,
- tag, 0, M_NOWAIT);
- if (mtag != NULL)
- m_tag_prepend(m, mtag);
- }
- match = 1;
- }
- break;
- }
-
- case O_FIB: /* try match the specified fib */
- if (args->f_id.fib == cmd->arg1)
- match = 1;
- break;
-
- case O_SOCKARG: {
- struct inpcb *inp = args->inp;
- struct inpcbinfo *pi;
-
- if (is_ipv6) /* XXX can we remove this ? */
- break;
-
- if (proto == IPPROTO_TCP)
- pi = &V_tcbinfo;
- else if (proto == IPPROTO_UDP)
- pi = &V_udbinfo;
- else
- break;
-
- /*
- * XXXRW: so_user_cookie should almost
- * certainly be inp_user_cookie?
- */
-
- /* For incomming packet, lookup up the
- inpcb using the src/dest ip/port tuple */
- if (inp == NULL) {
- inp = in_pcblookup(pi,
- src_ip, htons(src_port),
- dst_ip, htons(dst_port),
- INPLOOKUP_RLOCKPCB, NULL);
- if (inp != NULL) {
- tablearg =
- inp->inp_socket->so_user_cookie;
- if (tablearg)
- match = 1;
- INP_RUNLOCK(inp);
- }
- } else {
- if (inp->inp_socket) {
- tablearg =
- inp->inp_socket->so_user_cookie;
- if (tablearg)
- match = 1;
- }
- }
- break;
- }
-
- case O_TAGGED: {
- struct m_tag *mtag;
- uint32_t tag = (cmd->arg1 == IP_FW_TABLEARG) ?
- tablearg : cmd->arg1;
-
- if (cmdlen == 1) {
- match = m_tag_locate(m, MTAG_IPFW,
- tag, NULL) != NULL;
- break;
- }
-
- /* we have ranges */
- for (mtag = m_tag_first(m);
- mtag != NULL && !match;
- mtag = m_tag_next(m, mtag)) {
- uint16_t *p;
- int i;
-
- if (mtag->m_tag_cookie != MTAG_IPFW)
- continue;
-
- p = ((ipfw_insn_u16 *)cmd)->ports;
- i = cmdlen - 1;
- for(; !match && i > 0; i--, p += 2)
- match =
- mtag->m_tag_id >= p[0] &&
- mtag->m_tag_id <= p[1];
- }
- break;
- }
-
- /*
- * The second set of opcodes represents 'actions',
- * i.e. the terminal part of a rule once the packet
- * matches all previous patterns.
- * Typically there is only one action for each rule,
- * and the opcode is stored at the end of the rule
- * (but there are exceptions -- see below).
- *
- * In general, here we set retval and terminate the
- * outer loop (would be a 'break 3' in some language,
- * but we need to set l=0, done=1)
- *
- * Exceptions:
- * O_COUNT and O_SKIPTO actions:
- * instead of terminating, we jump to the next rule
- * (setting l=0), or to the SKIPTO target (setting
- * f/f_len, cmd and l as needed), respectively.
- *
- * O_TAG, O_LOG and O_ALTQ action parameters:
- * perform some action and set match = 1;
- *
- * O_LIMIT and O_KEEP_STATE: these opcodes are
- * not real 'actions', and are stored right
- * before the 'action' part of the rule.
- * These opcodes try to install an entry in the
- * state tables; if successful, we continue with
- * the next opcode (match=1; break;), otherwise
- * the packet must be dropped (set retval,
- * break loops with l=0, done=1)
- *
- * O_PROBE_STATE and O_CHECK_STATE: these opcodes
- * cause a lookup of the state table, and a jump
- * to the 'action' part of the parent rule
- * if an entry is found, or
- * (CHECK_STATE only) a jump to the next rule if
- * the entry is not found.
- * The result of the lookup is cached so that
- * further instances of these opcodes become NOPs.
- * The jump to the next rule is done by setting
- * l=0, cmdlen=0.
- */
- case O_LIMIT:
- case O_KEEP_STATE:
- if (ipfw_install_state(f,
- (ipfw_insn_limit *)cmd, args, tablearg)) {
- /* error or limit violation */
- retval = IP_FW_DENY;
- l = 0; /* exit inner loop */
- done = 1; /* exit outer loop */
- }
- match = 1;
- break;
-
- case O_PROBE_STATE:
- case O_CHECK_STATE:
- /*
- * dynamic rules are checked at the first
- * keep-state or check-state occurrence,
- * with the result being stored in dyn_dir.
- * The compiler introduces a PROBE_STATE
- * instruction for us when we have a
- * KEEP_STATE (because PROBE_STATE needs
- * to be run first).
- */
- if (dyn_dir == MATCH_UNKNOWN &&
- (q = ipfw_lookup_dyn_rule(&args->f_id,
- &dyn_dir, proto == IPPROTO_TCP ?
- TCP(ulp) : NULL))
- != NULL) {
- /*
- * Found dynamic entry, update stats
- * and jump to the 'action' part of
- * the parent rule by setting
- * f, cmd, l and clearing cmdlen.
- */
- q->pcnt++;
- q->bcnt += pktlen;
- /* XXX we would like to have f_pos
- * readily accessible in the dynamic
- * rule, instead of having to
- * lookup q->rule.
- */
- f = q->rule;
- f_pos = ipfw_find_rule(chain,
- f->rulenum, f->id);
- cmd = ACTION_PTR(f);
- l = f->cmd_len - f->act_ofs;
- ipfw_dyn_unlock();
- cmdlen = 0;
- match = 1;
- break;
- }
- /*
- * Dynamic entry not found. If CHECK_STATE,
- * skip to next rule, if PROBE_STATE just
- * ignore and continue with next opcode.
- */
- if (cmd->opcode == O_CHECK_STATE)
- l = 0; /* exit inner loop */
- match = 1;
- break;
-
- case O_ACCEPT:
- retval = 0; /* accept */
- l = 0; /* exit inner loop */
- done = 1; /* exit outer loop */
- break;
-
- case O_PIPE:
- case O_QUEUE:
- set_match(args, f_pos, chain);
- args->rule.info = (cmd->arg1 == IP_FW_TABLEARG) ?
- tablearg : cmd->arg1;
- if (cmd->opcode == O_PIPE)
- args->rule.info |= IPFW_IS_PIPE;
- if (V_fw_one_pass)
- args->rule.info |= IPFW_ONEPASS;
- retval = IP_FW_DUMMYNET;
- l = 0; /* exit inner loop */
- done = 1; /* exit outer loop */
- break;
-
- case O_DIVERT:
- case O_TEE:
- if (args->eh) /* not on layer 2 */
- break;
- /* otherwise this is terminal */
- l = 0; /* exit inner loop */
- done = 1; /* exit outer loop */
- retval = (cmd->opcode == O_DIVERT) ?
- IP_FW_DIVERT : IP_FW_TEE;
- set_match(args, f_pos, chain);
- args->rule.info = (cmd->arg1 == IP_FW_TABLEARG) ?
- tablearg : cmd->arg1;
- break;
-
- case O_COUNT:
- f->pcnt++; /* update stats */
- f->bcnt += pktlen;
- f->timestamp = time_uptime;
- l = 0; /* exit inner loop */
- break;
-
- case O_SKIPTO:
- f->pcnt++; /* update stats */
- f->bcnt += pktlen;
- f->timestamp = time_uptime;
- /* If possible use cached f_pos (in f->next_rule),
- * whose version is written in f->next_rule
- * (horrible hacks to avoid changing the ABI).
- */
- if (cmd->arg1 != IP_FW_TABLEARG &&
- (uintptr_t)f->x_next == chain->id) {
- f_pos = (uintptr_t)f->next_rule;
- } else {
- int i = (cmd->arg1 == IP_FW_TABLEARG) ?
- tablearg : cmd->arg1;
- /* make sure we do not jump backward */
- if (i <= f->rulenum)
- i = f->rulenum + 1;
- f_pos = ipfw_find_rule(chain, i, 0);
- /* update the cache */
- if (cmd->arg1 != IP_FW_TABLEARG) {
- f->next_rule =
- (void *)(uintptr_t)f_pos;
- f->x_next =
- (void *)(uintptr_t)chain->id;
- }
- }
- /*
- * Skip disabled rules, and re-enter
- * the inner loop with the correct
- * f_pos, f, l and cmd.
- * Also clear cmdlen and skip_or
- */
- for (; f_pos < chain->n_rules - 1 &&
- (V_set_disable &
- (1 << chain->map[f_pos]->set));
- f_pos++)
- ;
- /* Re-enter the inner loop at the skipto rule. */
- f = chain->map[f_pos];
- l = f->cmd_len;
- cmd = f->cmd;
- match = 1;
- cmdlen = 0;
- skip_or = 0;
- continue;
- break; /* not reached */
-
- case O_CALLRETURN: {
- /*
- * Implementation of `subroutine' call/return,
- * in the stack carried in an mbuf tag. This
- * is different from `skipto' in that any call
- * address is possible (`skipto' must prevent
- * backward jumps to avoid endless loops).
- * We have `return' action when F_NOT flag is
- * present. The `m_tag_id' field is used as
- * stack pointer.
- */
- struct m_tag *mtag;
- uint16_t jmpto, *stack;
-
-#define IS_CALL ((cmd->len & F_NOT) == 0)
-#define IS_RETURN ((cmd->len & F_NOT) != 0)
- /*
- * Hand-rolled version of m_tag_locate() with
- * wildcard `type'.
- * If not already tagged, allocate new tag.
- */
- mtag = m_tag_first(m);
- while (mtag != NULL) {
- if (mtag->m_tag_cookie ==
- MTAG_IPFW_CALL)
- break;
- mtag = m_tag_next(m, mtag);
- }
- if (mtag == NULL && IS_CALL) {
- mtag = m_tag_alloc(MTAG_IPFW_CALL, 0,
- IPFW_CALLSTACK_SIZE *
- sizeof(uint16_t), M_NOWAIT);
- if (mtag != NULL)
- m_tag_prepend(m, mtag);
- }
-
- /*
- * On error both `call' and `return' just
- * continue with next rule.
- */
- if (IS_RETURN && (mtag == NULL ||
- mtag->m_tag_id == 0)) {
- l = 0; /* exit inner loop */
- break;
- }
- if (IS_CALL && (mtag == NULL ||
- mtag->m_tag_id >= IPFW_CALLSTACK_SIZE)) {
- printf("ipfw: call stack error, "
- "go to next rule\n");
- l = 0; /* exit inner loop */
- break;
- }
-
- f->pcnt++; /* update stats */
- f->bcnt += pktlen;
- f->timestamp = time_uptime;
- stack = (uint16_t *)(mtag + 1);
-
- /*
- * The `call' action may use cached f_pos
- * (in f->next_rule), whose version is written
- * in f->next_rule.
- * The `return' action, however, doesn't have
- * fixed jump address in cmd->arg1 and can't use
- * cache.
- */
- if (IS_CALL) {
- stack[mtag->m_tag_id] = f->rulenum;
- mtag->m_tag_id++;
- if (cmd->arg1 != IP_FW_TABLEARG &&
- (uintptr_t)f->x_next == chain->id) {
- f_pos = (uintptr_t)f->next_rule;
- } else {
- jmpto = (cmd->arg1 ==
- IP_FW_TABLEARG) ? tablearg:
- cmd->arg1;
- f_pos = ipfw_find_rule(chain,
- jmpto, 0);
- /* update the cache */
- if (cmd->arg1 !=
- IP_FW_TABLEARG) {
- f->next_rule =
- (void *)(uintptr_t)
- f_pos;
- f->x_next =
- (void *)(uintptr_t)
- chain->id;
- }
- }
- } else { /* `return' action */
- mtag->m_tag_id--;
- jmpto = stack[mtag->m_tag_id] + 1;
- f_pos = ipfw_find_rule(chain, jmpto, 0);
- }
-
- /*
- * Skip disabled rules, and re-enter
- * the inner loop with the correct
- * f_pos, f, l and cmd.
- * Also clear cmdlen and skip_or
- */
- for (; f_pos < chain->n_rules - 1 &&
- (V_set_disable &
- (1 << chain->map[f_pos]->set)); f_pos++)
- ;
- /* Re-enter the inner loop at the dest rule. */
- f = chain->map[f_pos];
- l = f->cmd_len;
- cmd = f->cmd;
- cmdlen = 0;
- skip_or = 0;
- continue;
- break; /* NOTREACHED */
- }
-#undef IS_CALL
-#undef IS_RETURN
-
- case O_REJECT:
- /*
- * Drop the packet and send a reject notice
- * if the packet is not ICMP (or is an ICMP
- * query), and it is not multicast/broadcast.
- */
- if (hlen > 0 && is_ipv4 && offset == 0 &&
- (proto != IPPROTO_ICMP ||
- is_icmp_query(ICMP(ulp))) &&
- !(m->m_flags & (M_BCAST|M_MCAST)) &&
- !IN_MULTICAST(ntohl(dst_ip.s_addr))) {
- send_reject(args, cmd->arg1, iplen, ip);
- m = args->m;
- }
- /* FALLTHROUGH */
-#ifdef INET6
- case O_UNREACH6:
- if (hlen > 0 && is_ipv6 &&
- ((offset & IP6F_OFF_MASK) == 0) &&
- (proto != IPPROTO_ICMPV6 ||
- (is_icmp6_query(icmp6_type) == 1)) &&
- !(m->m_flags & (M_BCAST|M_MCAST)) &&
- !IN6_IS_ADDR_MULTICAST(&args->f_id.dst_ip6)) {
- send_reject6(
- args, cmd->arg1, hlen,
- (struct ip6_hdr *)ip);
- m = args->m;
- }
- /* FALLTHROUGH */
-#endif
- case O_DENY:
- retval = IP_FW_DENY;
- l = 0; /* exit inner loop */
- done = 1; /* exit outer loop */
- break;
-
- case O_FORWARD_IP:
- if (args->eh) /* not valid on layer2 pkts */
- break;
- if (q == NULL || q->rule != f ||
- dyn_dir == MATCH_FORWARD) {
- struct sockaddr_in *sa;
- sa = &(((ipfw_insn_sa *)cmd)->sa);
- if (sa->sin_addr.s_addr == INADDR_ANY) {
- bcopy(sa, &args->hopstore,
- sizeof(*sa));
- args->hopstore.sin_addr.s_addr =
- htonl(tablearg);
- args->next_hop = &args->hopstore;
- } else {
- args->next_hop = sa;
- }
- }
- retval = IP_FW_PASS;
- l = 0; /* exit inner loop */
- done = 1; /* exit outer loop */
- break;
-
-#ifdef INET6
- case O_FORWARD_IP6:
- if (args->eh) /* not valid on layer2 pkts */
- break;
- if (q == NULL || q->rule != f ||
- dyn_dir == MATCH_FORWARD) {
- struct sockaddr_in6 *sin6;
-
- sin6 = &(((ipfw_insn_sa6 *)cmd)->sa);
- args->next_hop6 = sin6;
- }
- retval = IP_FW_PASS;
- l = 0; /* exit inner loop */
- done = 1; /* exit outer loop */
- break;
-#endif
-
- case O_NETGRAPH:
- case O_NGTEE:
- set_match(args, f_pos, chain);
- args->rule.info = (cmd->arg1 == IP_FW_TABLEARG) ?
- tablearg : cmd->arg1;
- if (V_fw_one_pass)
- args->rule.info |= IPFW_ONEPASS;
- retval = (cmd->opcode == O_NETGRAPH) ?
- IP_FW_NETGRAPH : IP_FW_NGTEE;
- l = 0; /* exit inner loop */
- done = 1; /* exit outer loop */
- break;
-
- case O_SETFIB: {
- uint32_t fib;
-
- f->pcnt++; /* update stats */
- f->bcnt += pktlen;
- f->timestamp = time_uptime;
- fib = (cmd->arg1 == IP_FW_TABLEARG) ? tablearg:
- cmd->arg1;
- if (fib >= rt_numfibs)
- fib = 0;
- M_SETFIB(m, fib);
- args->f_id.fib = fib;
- l = 0; /* exit inner loop */
- break;
- }
-
- case O_NAT:
- if (!IPFW_NAT_LOADED) {
- retval = IP_FW_DENY;
- } else {
- struct cfg_nat *t;
- int nat_id;
-
- set_match(args, f_pos, chain);
- /* Check if this is 'global' nat rule */
- if (cmd->arg1 == 0) {
- retval = ipfw_nat_ptr(args, NULL, m);
- l = 0;
- done = 1;
- break;
- }
- t = ((ipfw_insn_nat *)cmd)->nat;
- if (t == NULL) {
- nat_id = (cmd->arg1 == IP_FW_TABLEARG) ?
- tablearg : cmd->arg1;
- t = (*lookup_nat_ptr)(&chain->nat, nat_id);
-
- if (t == NULL) {
- retval = IP_FW_DENY;
- l = 0; /* exit inner loop */
- done = 1; /* exit outer loop */
- break;
- }
- if (cmd->arg1 != IP_FW_TABLEARG)
- ((ipfw_insn_nat *)cmd)->nat = t;
- }
- retval = ipfw_nat_ptr(args, t, m);
- }
- l = 0; /* exit inner loop */
- done = 1; /* exit outer loop */
- break;
-
- case O_REASS: {
- int ip_off;
-
- f->pcnt++;
- f->bcnt += pktlen;
- l = 0; /* in any case exit inner loop */
- ip_off = ntohs(ip->ip_off);
-
- /* if not fragmented, go to next rule */
- if ((ip_off & (IP_MF | IP_OFFMASK)) == 0)
- break;
- /*
- * ip_reass() expects len & off in host
- * byte order.
- */
- SET_HOST_IPLEN(ip);
-
- args->m = m = ip_reass(m);
-
- /*
- * do IP header checksum fixup.
- */
- if (m == NULL) { /* fragment got swallowed */
- retval = IP_FW_DENY;
- } else { /* good, packet complete */
- int hlen;
-
- ip = mtod(m, struct ip *);
- hlen = ip->ip_hl << 2;
- SET_NET_IPLEN(ip);
- ip->ip_sum = 0;
- if (hlen == sizeof(struct ip))
- ip->ip_sum = in_cksum_hdr(ip);
- else
- ip->ip_sum = in_cksum(m, hlen);
- retval = IP_FW_REASS;
- set_match(args, f_pos, chain);
- }
- done = 1; /* exit outer loop */
- break;
- }
-
- default:
- panic("-- unknown opcode %d\n", cmd->opcode);
- } /* end of switch() on opcodes */
- /*
- * if we get here with l=0, then match is irrelevant.
- */
-
- if (cmd->len & F_NOT)
- match = !match;
-
- if (match) {
- if (cmd->len & F_OR)
- skip_or = 1;
- } else {
- if (!(cmd->len & F_OR)) /* not an OR block, */
- break; /* try next rule */
- }
-
- } /* end of inner loop, scan opcodes */
-#undef PULLUP_LEN
-
- if (done)
- break;
-
-/* next_rule:; */ /* try next rule */
-
- } /* end of outer for, scan rules */
-
- if (done) {
- struct ip_fw *rule = chain->map[f_pos];
- /* Update statistics */
- rule->pcnt++;
- rule->bcnt += pktlen;
- rule->timestamp = time_uptime;
- } else {
- retval = IP_FW_DENY;
- printf("ipfw: ouch!, skip past end of rules, denying packet\n");
- }
- IPFW_RUNLOCK(chain);
-#ifdef __FreeBSD__
- if (ucred_cache != NULL)
- crfree(ucred_cache);
-#endif
- return (retval);
-
-pullup_failed:
- if (V_fw_verbose)
- printf("ipfw: pullup failed\n");
- return (IP_FW_DENY);
-}
-
-/*
- * Set maximum number of tables that can be used in given VNET ipfw instance.
- */
-#ifdef SYSCTL_NODE
-static int
-sysctl_ipfw_table_num(SYSCTL_HANDLER_ARGS)
-{
- int error;
- unsigned int ntables;
-
- ntables = V_fw_tables_max;
-
- error = sysctl_handle_int(oidp, &ntables, 0, req);
- /* Read operation or some error */
- if ((error != 0) || (req->newptr == NULL))
- return (error);
-
- return (ipfw_resize_tables(&V_layer3_chain, ntables));
-}
-#endif
-/*
- * Module and VNET glue
- */
-
-/*
- * Stuff that must be initialised only on boot or module load
- */
-static int
-ipfw_init(void)
-{
- int error = 0;
-
- ipfw_dyn_attach();
- /*
- * Only print out this stuff the first time around,
- * when called from the sysinit code.
- */
- printf("ipfw2 "
-#ifdef INET6
- "(+ipv6) "
-#endif
- "initialized, divert %s, nat %s, "
- "rule-based forwarding "
-#ifdef IPFIREWALL_FORWARD
- "enabled, "
-#else
- "disabled, "
-#endif
- "default to %s, logging ",
-#ifdef IPDIVERT
- "enabled",
-#else
- "loadable",
-#endif
-#ifdef IPFIREWALL_NAT
- "enabled",
-#else
- "loadable",
-#endif
- default_to_accept ? "accept" : "deny");
-
- /*
- * Note: V_xxx variables can be accessed here but the vnet specific
- * initializer may not have been called yet for the VIMAGE case.
- * Tuneables will have been processed. We will print out values for
- * the default vnet.
- * XXX This should all be rationalized AFTER 8.0
- */
- if (V_fw_verbose == 0)
- printf("disabled\n");
- else if (V_verbose_limit == 0)
- printf("unlimited\n");
- else
- printf("limited to %d packets/entry by default\n",
- V_verbose_limit);
-
- /* Check user-supplied table count for validness */
- if (default_fw_tables > IPFW_TABLES_MAX)
- default_fw_tables = IPFW_TABLES_MAX;
-
- ipfw_log_bpf(1); /* init */
- return (error);
-}
-
-/*
- * Called for the removal of the last instance only on module unload.
- */
-static void
-ipfw_destroy(void)
-{
-
- ipfw_log_bpf(0); /* uninit */
- ipfw_dyn_detach();
- printf("IP firewall unloaded\n");
-}
-
-/*
- * Stuff that must be initialized for every instance
- * (including the first of course).
- */
-static int
-vnet_ipfw_init(const void *unused)
-{
- int error;
- struct ip_fw *rule = NULL;
- struct ip_fw_chain *chain;
-
- chain = &V_layer3_chain;
-
- /* First set up some values that are compile time options */
- V_autoinc_step = 100; /* bounded to 1..1000 in add_rule() */
- V_fw_deny_unknown_exthdrs = 1;
-#ifdef IPFIREWALL_VERBOSE
- V_fw_verbose = 1;
-#endif
-#ifdef IPFIREWALL_VERBOSE_LIMIT
- V_verbose_limit = IPFIREWALL_VERBOSE_LIMIT;
-#endif
-#ifdef IPFIREWALL_NAT
- LIST_INIT(&chain->nat);
-#endif
-
- /* insert the default rule and create the initial map */
- chain->n_rules = 1;
- chain->static_len = sizeof(struct ip_fw);
- chain->map = malloc(sizeof(struct ip_fw *), M_IPFW, M_WAITOK | M_ZERO);
- if (chain->map)
- rule = malloc(chain->static_len, M_IPFW, M_WAITOK | M_ZERO);
-
- /* Set initial number of tables */
- V_fw_tables_max = default_fw_tables;
- error = ipfw_init_tables(chain);
- if (error) {
- printf("ipfw2: setting up tables failed\n");
- free(chain->map, M_IPFW);
- free(rule, M_IPFW);
- return (ENOSPC);
- }
-
- /* fill and insert the default rule */
- rule->act_ofs = 0;
- rule->rulenum = IPFW_DEFAULT_RULE;
- rule->cmd_len = 1;
- rule->set = RESVD_SET;
- rule->cmd[0].len = 1;
- rule->cmd[0].opcode = default_to_accept ? O_ACCEPT : O_DENY;
- chain->rules = chain->default_rule = chain->map[0] = rule;
- chain->id = rule->id = 1;
-
- IPFW_LOCK_INIT(chain);
- ipfw_dyn_init();
-
- /* First set up some values that are compile time options */
- V_ipfw_vnet_ready = 1; /* Open for business */
-
- /*
- * Hook the sockopt handler and pfil hooks for ipv4 and ipv6.
- * Even if the latter two fail we still keep the module alive
- * because the sockopt and layer2 paths are still useful.
- * ipfw[6]_hook return 0 on success, ENOENT on failure,
- * so we can ignore the exact return value and just set a flag.
- *
- * Note that V_fw[6]_enable are manipulated by a SYSCTL_PROC so
- * changes in the underlying (per-vnet) variables trigger
- * immediate hook()/unhook() calls.
- * In layer2 we have the same behaviour, except that V_ether_ipfw
- * is checked on each packet because there are no pfil hooks.
- */
- V_ip_fw_ctl_ptr = ipfw_ctl;
- error = ipfw_attach_hooks(1);
- return (error);
-}
-
-/*
- * Called for the removal of each instance.
- */
-static int
-vnet_ipfw_uninit(const void *unused)
-{
- struct ip_fw *reap, *rule;
- struct ip_fw_chain *chain = &V_layer3_chain;
- int i;
-
- V_ipfw_vnet_ready = 0; /* tell new callers to go away */
- /*
- * disconnect from ipv4, ipv6, layer2 and sockopt.
- * Then grab, release and grab again the WLOCK so we make
- * sure the update is propagated and nobody will be in.
- */
- (void)ipfw_attach_hooks(0 /* detach */);
- V_ip_fw_ctl_ptr = NULL;
- IPFW_UH_WLOCK(chain);
- IPFW_UH_WUNLOCK(chain);
- IPFW_UH_WLOCK(chain);
-
- IPFW_WLOCK(chain);
- ipfw_dyn_uninit(0); /* run the callout_drain */
- IPFW_WUNLOCK(chain);
-
- ipfw_destroy_tables(chain);
- reap = NULL;
- IPFW_WLOCK(chain);
- for (i = 0; i < chain->n_rules; i++) {
- rule = chain->map[i];
- rule->x_next = reap;
- reap = rule;
- }
- if (chain->map)
- free(chain->map, M_IPFW);
- IPFW_WUNLOCK(chain);
- IPFW_UH_WUNLOCK(chain);
- if (reap != NULL)
- ipfw_reap_rules(reap);
- IPFW_LOCK_DESTROY(chain);
- ipfw_dyn_uninit(1); /* free the remaining parts */
- return 0;
-}
-
-/*
- * Module event handler.
- * In general we have the choice of handling most of these events by the
- * event handler or by the (VNET_)SYS(UN)INIT handlers. I have chosen to
- * use the SYSINIT handlers as they are more capable of expressing the
- * flow of control during module and vnet operations, so this is just
- * a skeleton. Note there is no SYSINIT equivalent of the module
- * SHUTDOWN handler, but we don't have anything to do in that case anyhow.
- */
-static int
-ipfw_modevent(module_t mod, int type, void *unused)
-{
- int err = 0;
-
- switch (type) {
- case MOD_LOAD:
- /* Called once at module load or
- * system boot if compiled in. */
- break;
- case MOD_QUIESCE:
- /* Called before unload. May veto unloading. */
- break;
- case MOD_UNLOAD:
- /* Called during unload. */
- break;
- case MOD_SHUTDOWN:
- /* Called during system shutdown. */
- break;
- default:
- err = EOPNOTSUPP;
- break;
- }
- return err;
-}
-
-static moduledata_t ipfwmod = {
- "ipfw",
- ipfw_modevent,
- 0
-};
-
-/* Define startup order. */
-#define IPFW_SI_SUB_FIREWALL SI_SUB_PROTO_IFATTACHDOMAIN
-#define IPFW_MODEVENT_ORDER (SI_ORDER_ANY - 255) /* On boot slot in here. */
-#define IPFW_MODULE_ORDER (IPFW_MODEVENT_ORDER + 1) /* A little later. */
-#define IPFW_VNET_ORDER (IPFW_MODEVENT_ORDER + 2) /* Later still. */
-
-DECLARE_MODULE(ipfw, ipfwmod, IPFW_SI_SUB_FIREWALL, IPFW_MODEVENT_ORDER);
-MODULE_VERSION(ipfw, 2);
-/* should declare some dependencies here */
-
-/*
- * Starting up. Done in order after ipfwmod() has been called.
- * VNET_SYSINIT is also called for each existing vnet and each new vnet.
- */
-SYSINIT(ipfw_init, IPFW_SI_SUB_FIREWALL, IPFW_MODULE_ORDER,
- ipfw_init, NULL);
-VNET_SYSINIT(vnet_ipfw_init, IPFW_SI_SUB_FIREWALL, IPFW_VNET_ORDER,
- vnet_ipfw_init, NULL);
-
-/*
- * Closing up shop. These are done in REVERSE ORDER, but still
- * after ipfwmod() has been called. Not called on reboot.
- * VNET_SYSUNINIT is also called for each exiting vnet as it exits.
- * or when the module is unloaded.
- */
-SYSUNINIT(ipfw_destroy, IPFW_SI_SUB_FIREWALL, IPFW_MODULE_ORDER,
- ipfw_destroy, NULL);
-VNET_SYSUNINIT(vnet_ipfw_uninit, IPFW_SI_SUB_FIREWALL, IPFW_VNET_ORDER,
- vnet_ipfw_uninit, NULL);
-/* end of file */
diff --git a/sys/netinet/ipfw/ip_fw_dynamic.c b/sys/netinet/ipfw/ip_fw_dynamic.c
deleted file mode 100644
index edf7639..0000000
--- a/sys/netinet/ipfw/ip_fw_dynamic.c
+++ /dev/null
@@ -1,1244 +0,0 @@
-/*-
- * Copyright (c) 2002 Luigi Rizzo, Universita` di Pisa
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#define DEB(x)
-#define DDB(x) x
-
-/*
- * Dynamic rule support for ipfw
- */
-
-#include "opt_ipfw.h"
-#include "opt_inet.h"
-#ifndef INET
-#error IPFIREWALL requires INET.
-#endif /* INET */
-#include "opt_inet6.h"
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/malloc.h>
-#include <sys/mbuf.h>
-#include <sys/kernel.h>
-#include <sys/lock.h>
-#include <sys/socket.h>
-#include <sys/sysctl.h>
-#include <sys/syslog.h>
-#include <net/ethernet.h> /* for ETHERTYPE_IP */
-#include <net/if.h>
-#include <net/vnet.h>
-
-#include <netinet/in.h>
-#include <netinet/ip.h>
-#include <netinet/ip_var.h> /* ip_defttl */
-#include <netinet/ip_fw.h>
-#include <netinet/ipfw/ip_fw_private.h>
-#include <netinet/tcp_var.h>
-#include <netinet/udp.h>
-
-#include <netinet/ip6.h> /* IN6_ARE_ADDR_EQUAL */
-#ifdef INET6
-#include <netinet6/in6_var.h>
-#include <netinet6/ip6_var.h>
-#endif
-
-#include <machine/in_cksum.h> /* XXX for in_cksum */
-
-#ifdef MAC
-#include <security/mac/mac_framework.h>
-#endif
-
-/*
- * Description of dynamic rules.
- *
- * Dynamic rules are stored in lists accessed through a hash table
- * (ipfw_dyn_v) whose size is curr_dyn_buckets. This value can
- * be modified through the sysctl variable dyn_buckets which is
- * updated when the table becomes empty.
- *
- * XXX currently there is only one list, ipfw_dyn.
- *
- * When a packet is received, its address fields are first masked
- * with the mask defined for the rule, then hashed, then matched
- * against the entries in the corresponding list.
- * Dynamic rules can be used for different purposes:
- * + stateful rules;
- * + enforcing limits on the number of sessions;
- * + in-kernel NAT (not implemented yet)
- *
- * The lifetime of dynamic rules is regulated by dyn_*_lifetime,
- * measured in seconds and depending on the flags.
- *
- * The total number of dynamic rules is stored in dyn_count.
- * The max number of dynamic rules is dyn_max. When we reach
- * the maximum number of rules we do not create anymore. This is
- * done to avoid consuming too much memory, but also too much
- * time when searching on each packet (ideally, we should try instead
- * to put a limit on the length of the list on each bucket...).
- *
- * Each dynamic rule holds a pointer to the parent ipfw rule so
- * we know what action to perform. Dynamic rules are removed when
- * the parent rule is deleted. XXX we should make them survive.
- *
- * There are some limitations with dynamic rules -- we do not
- * obey the 'randomized match', and we do not do multiple
- * passes through the firewall. XXX check the latter!!!
- */
-
-/*
- * Static variables followed by global ones
- */
-static VNET_DEFINE(ipfw_dyn_rule **, ipfw_dyn_v);
-static VNET_DEFINE(u_int32_t, dyn_buckets);
-static VNET_DEFINE(u_int32_t, curr_dyn_buckets);
-static VNET_DEFINE(struct callout, ipfw_timeout);
-#define V_ipfw_dyn_v VNET(ipfw_dyn_v)
-#define V_dyn_buckets VNET(dyn_buckets)
-#define V_curr_dyn_buckets VNET(curr_dyn_buckets)
-#define V_ipfw_timeout VNET(ipfw_timeout)
-
-static uma_zone_t ipfw_dyn_rule_zone;
-#ifndef __FreeBSD__
-DEFINE_SPINLOCK(ipfw_dyn_mtx);
-#else
-static struct mtx ipfw_dyn_mtx; /* mutex guarding dynamic rules */
-#endif
-
-#define IPFW_DYN_LOCK_INIT() \
- mtx_init(&ipfw_dyn_mtx, "IPFW dynamic rules", NULL, MTX_DEF)
-#define IPFW_DYN_LOCK_DESTROY() mtx_destroy(&ipfw_dyn_mtx)
-#define IPFW_DYN_LOCK() mtx_lock(&ipfw_dyn_mtx)
-#define IPFW_DYN_UNLOCK() mtx_unlock(&ipfw_dyn_mtx)
-#define IPFW_DYN_LOCK_ASSERT() mtx_assert(&ipfw_dyn_mtx, MA_OWNED)
-
-void
-ipfw_dyn_unlock(void)
-{
- IPFW_DYN_UNLOCK();
-}
-
-/*
- * Timeouts for various events in handing dynamic rules.
- */
-static VNET_DEFINE(u_int32_t, dyn_ack_lifetime);
-static VNET_DEFINE(u_int32_t, dyn_syn_lifetime);
-static VNET_DEFINE(u_int32_t, dyn_fin_lifetime);
-static VNET_DEFINE(u_int32_t, dyn_rst_lifetime);
-static VNET_DEFINE(u_int32_t, dyn_udp_lifetime);
-static VNET_DEFINE(u_int32_t, dyn_short_lifetime);
-
-#define V_dyn_ack_lifetime VNET(dyn_ack_lifetime)
-#define V_dyn_syn_lifetime VNET(dyn_syn_lifetime)
-#define V_dyn_fin_lifetime VNET(dyn_fin_lifetime)
-#define V_dyn_rst_lifetime VNET(dyn_rst_lifetime)
-#define V_dyn_udp_lifetime VNET(dyn_udp_lifetime)
-#define V_dyn_short_lifetime VNET(dyn_short_lifetime)
-
-/*
- * Keepalives are sent if dyn_keepalive is set. They are sent every
- * dyn_keepalive_period seconds, in the last dyn_keepalive_interval
- * seconds of lifetime of a rule.
- * dyn_rst_lifetime and dyn_fin_lifetime should be strictly lower
- * than dyn_keepalive_period.
- */
-
-static VNET_DEFINE(u_int32_t, dyn_keepalive_interval);
-static VNET_DEFINE(u_int32_t, dyn_keepalive_period);
-static VNET_DEFINE(u_int32_t, dyn_keepalive);
-
-#define V_dyn_keepalive_interval VNET(dyn_keepalive_interval)
-#define V_dyn_keepalive_period VNET(dyn_keepalive_period)
-#define V_dyn_keepalive VNET(dyn_keepalive)
-
-static VNET_DEFINE(u_int32_t, dyn_count); /* # of dynamic rules */
-static VNET_DEFINE(u_int32_t, dyn_max); /* max # of dynamic rules */
-
-#define V_dyn_count VNET(dyn_count)
-#define V_dyn_max VNET(dyn_max)
-
-#ifdef SYSCTL_NODE
-
-SYSBEGIN(f2)
-
-SYSCTL_DECL(_net_inet_ip_fw);
-SYSCTL_VNET_UINT(_net_inet_ip_fw, OID_AUTO, dyn_buckets,
- CTLFLAG_RW, &VNET_NAME(dyn_buckets), 0,
- "Number of dyn. buckets");
-SYSCTL_VNET_UINT(_net_inet_ip_fw, OID_AUTO, curr_dyn_buckets,
- CTLFLAG_RD, &VNET_NAME(curr_dyn_buckets), 0,
- "Current Number of dyn. buckets");
-SYSCTL_VNET_UINT(_net_inet_ip_fw, OID_AUTO, dyn_count,
- CTLFLAG_RD, &VNET_NAME(dyn_count), 0,
- "Number of dyn. rules");
-SYSCTL_VNET_UINT(_net_inet_ip_fw, OID_AUTO, dyn_max,
- CTLFLAG_RW, &VNET_NAME(dyn_max), 0,
- "Max number of dyn. rules");
-SYSCTL_VNET_UINT(_net_inet_ip_fw, OID_AUTO, dyn_ack_lifetime,
- CTLFLAG_RW, &VNET_NAME(dyn_ack_lifetime), 0,
- "Lifetime of dyn. rules for acks");
-SYSCTL_VNET_UINT(_net_inet_ip_fw, OID_AUTO, dyn_syn_lifetime,
- CTLFLAG_RW, &VNET_NAME(dyn_syn_lifetime), 0,
- "Lifetime of dyn. rules for syn");
-SYSCTL_VNET_UINT(_net_inet_ip_fw, OID_AUTO, dyn_fin_lifetime,
- CTLFLAG_RW, &VNET_NAME(dyn_fin_lifetime), 0,
- "Lifetime of dyn. rules for fin");
-SYSCTL_VNET_UINT(_net_inet_ip_fw, OID_AUTO, dyn_rst_lifetime,
- CTLFLAG_RW, &VNET_NAME(dyn_rst_lifetime), 0,
- "Lifetime of dyn. rules for rst");
-SYSCTL_VNET_UINT(_net_inet_ip_fw, OID_AUTO, dyn_udp_lifetime,
- CTLFLAG_RW, &VNET_NAME(dyn_udp_lifetime), 0,
- "Lifetime of dyn. rules for UDP");
-SYSCTL_VNET_UINT(_net_inet_ip_fw, OID_AUTO, dyn_short_lifetime,
- CTLFLAG_RW, &VNET_NAME(dyn_short_lifetime), 0,
- "Lifetime of dyn. rules for other situations");
-SYSCTL_VNET_UINT(_net_inet_ip_fw, OID_AUTO, dyn_keepalive,
- CTLFLAG_RW, &VNET_NAME(dyn_keepalive), 0,
- "Enable keepalives for dyn. rules");
-
-SYSEND
-
-#endif /* SYSCTL_NODE */
-
-
-static __inline int
-hash_packet6(struct ipfw_flow_id *id)
-{
- u_int32_t i;
- i = (id->dst_ip6.__u6_addr.__u6_addr32[2]) ^
- (id->dst_ip6.__u6_addr.__u6_addr32[3]) ^
- (id->src_ip6.__u6_addr.__u6_addr32[2]) ^
- (id->src_ip6.__u6_addr.__u6_addr32[3]) ^
- (id->dst_port) ^ (id->src_port);
- return i;
-}
-
-/*
- * IMPORTANT: the hash function for dynamic rules must be commutative
- * in source and destination (ip,port), because rules are bidirectional
- * and we want to find both in the same bucket.
- */
-static __inline int
-hash_packet(struct ipfw_flow_id *id)
-{
- u_int32_t i;
-
-#ifdef INET6
- if (IS_IP6_FLOW_ID(id))
- i = hash_packet6(id);
- else
-#endif /* INET6 */
- i = (id->dst_ip) ^ (id->src_ip) ^ (id->dst_port) ^ (id->src_port);
- i &= (V_curr_dyn_buckets - 1);
- return i;
-}
-
-static __inline void
-unlink_dyn_rule_print(struct ipfw_flow_id *id)
-{
- struct in_addr da;
-#ifdef INET6
- char src[INET6_ADDRSTRLEN], dst[INET6_ADDRSTRLEN];
-#else
- char src[INET_ADDRSTRLEN], dst[INET_ADDRSTRLEN];
-#endif
-
-#ifdef INET6
- if (IS_IP6_FLOW_ID(id)) {
- ip6_sprintf(src, &id->src_ip6);
- ip6_sprintf(dst, &id->dst_ip6);
- } else
-#endif
- {
- da.s_addr = htonl(id->src_ip);
- inet_ntop(AF_INET, &da, src, sizeof(src));
- da.s_addr = htonl(id->dst_ip);
- inet_ntop(AF_INET, &da, dst, sizeof(dst));
- }
- printf("ipfw: unlink entry %s %d -> %s %d, %d left\n",
- src, id->src_port, dst, id->dst_port, V_dyn_count - 1);
-}
-
-/**
- * unlink a dynamic rule from a chain. prev is a pointer to
- * the previous one, q is a pointer to the rule to delete,
- * head is a pointer to the head of the queue.
- * Modifies q and potentially also head.
- */
-#define UNLINK_DYN_RULE(prev, head, q) { \
- ipfw_dyn_rule *old_q = q; \
- \
- /* remove a refcount to the parent */ \
- if (q->dyn_type == O_LIMIT) \
- q->parent->count--; \
- DEB(unlink_dyn_rule_print(&q->id);) \
- if (prev != NULL) \
- prev->next = q = q->next; \
- else \
- head = q = q->next; \
- V_dyn_count--; \
- uma_zfree(ipfw_dyn_rule_zone, old_q); }
-
-#define TIME_LEQ(a,b) ((int)((a)-(b)) <= 0)
-
-/**
- * Remove dynamic rules pointing to "rule", or all of them if rule == NULL.
- *
- * If keep_me == NULL, rules are deleted even if not expired,
- * otherwise only expired rules are removed.
- *
- * The value of the second parameter is also used to point to identify
- * a rule we absolutely do not want to remove (e.g. because we are
- * holding a reference to it -- this is the case with O_LIMIT_PARENT
- * rules). The pointer is only used for comparison, so any non-null
- * value will do.
- */
-static void
-remove_dyn_rule(struct ip_fw *rule, ipfw_dyn_rule *keep_me)
-{
- static u_int32_t last_remove = 0;
-
-#define FORCE (keep_me == NULL)
-
- ipfw_dyn_rule *prev, *q;
- int i, pass = 0, max_pass = 0;
-
- IPFW_DYN_LOCK_ASSERT();
-
- if (V_ipfw_dyn_v == NULL || V_dyn_count == 0)
- return;
- /* do not expire more than once per second, it is useless */
- if (!FORCE && last_remove == time_uptime)
- return;
- last_remove = time_uptime;
-
- /*
- * because O_LIMIT refer to parent rules, during the first pass only
- * remove child and mark any pending LIMIT_PARENT, and remove
- * them in a second pass.
- */
-next_pass:
- for (i = 0 ; i < V_curr_dyn_buckets ; i++) {
- for (prev=NULL, q = V_ipfw_dyn_v[i] ; q ; ) {
- /*
- * Logic can become complex here, so we split tests.
- */
- if (q == keep_me)
- goto next;
- if (rule != NULL && rule != q->rule)
- goto next; /* not the one we are looking for */
- if (q->dyn_type == O_LIMIT_PARENT) {
- /*
- * handle parent in the second pass,
- * record we need one.
- */
- max_pass = 1;
- if (pass == 0)
- goto next;
- if (FORCE && q->count != 0 ) {
- /* XXX should not happen! */
- printf("ipfw: OUCH! cannot remove rule,"
- " count %d\n", q->count);
- }
- } else {
- if (!FORCE &&
- !TIME_LEQ( q->expire, time_uptime ))
- goto next;
- }
- if (q->dyn_type != O_LIMIT_PARENT || !q->count) {
- UNLINK_DYN_RULE(prev, V_ipfw_dyn_v[i], q);
- continue;
- }
-next:
- prev=q;
- q=q->next;
- }
- }
- if (pass++ < max_pass)
- goto next_pass;
-}
-
-void
-ipfw_remove_dyn_children(struct ip_fw *rule)
-{
- IPFW_DYN_LOCK();
- remove_dyn_rule(rule, NULL /* force removal */);
- IPFW_DYN_UNLOCK();
-}
-
-/*
- * Lookup a dynamic rule, locked version.
- */
-static ipfw_dyn_rule *
-lookup_dyn_rule_locked(struct ipfw_flow_id *pkt, int *match_direction,
- struct tcphdr *tcp)
-{
- /*
- * Stateful ipfw extensions.
- * Lookup into dynamic session queue.
- */
-#define MATCH_REVERSE 0
-#define MATCH_FORWARD 1
-#define MATCH_NONE 2
-#define MATCH_UNKNOWN 3
- int i, dir = MATCH_NONE;
- ipfw_dyn_rule *prev, *q = NULL;
-
- IPFW_DYN_LOCK_ASSERT();
-
- if (V_ipfw_dyn_v == NULL)
- goto done; /* not found */
- i = hash_packet(pkt);
- for (prev = NULL, q = V_ipfw_dyn_v[i]; q != NULL;) {
- if (q->dyn_type == O_LIMIT_PARENT && q->count)
- goto next;
- if (TIME_LEQ(q->expire, time_uptime)) { /* expire entry */
- UNLINK_DYN_RULE(prev, V_ipfw_dyn_v[i], q);
- continue;
- }
- if (pkt->proto != q->id.proto || q->dyn_type == O_LIMIT_PARENT)
- goto next;
-
- if (IS_IP6_FLOW_ID(pkt)) {
- if (IN6_ARE_ADDR_EQUAL(&pkt->src_ip6, &q->id.src_ip6) &&
- IN6_ARE_ADDR_EQUAL(&pkt->dst_ip6, &q->id.dst_ip6) &&
- pkt->src_port == q->id.src_port &&
- pkt->dst_port == q->id.dst_port) {
- dir = MATCH_FORWARD;
- break;
- }
- if (IN6_ARE_ADDR_EQUAL(&pkt->src_ip6, &q->id.dst_ip6) &&
- IN6_ARE_ADDR_EQUAL(&pkt->dst_ip6, &q->id.src_ip6) &&
- pkt->src_port == q->id.dst_port &&
- pkt->dst_port == q->id.src_port) {
- dir = MATCH_REVERSE;
- break;
- }
- } else {
- if (pkt->src_ip == q->id.src_ip &&
- pkt->dst_ip == q->id.dst_ip &&
- pkt->src_port == q->id.src_port &&
- pkt->dst_port == q->id.dst_port) {
- dir = MATCH_FORWARD;
- break;
- }
- if (pkt->src_ip == q->id.dst_ip &&
- pkt->dst_ip == q->id.src_ip &&
- pkt->src_port == q->id.dst_port &&
- pkt->dst_port == q->id.src_port) {
- dir = MATCH_REVERSE;
- break;
- }
- }
-next:
- prev = q;
- q = q->next;
- }
- if (q == NULL)
- goto done; /* q = NULL, not found */
-
- if (prev != NULL) { /* found and not in front */
- prev->next = q->next;
- q->next = V_ipfw_dyn_v[i];
- V_ipfw_dyn_v[i] = q;
- }
- if (pkt->proto == IPPROTO_TCP) { /* update state according to flags */
- uint32_t ack;
- u_char flags = pkt->_flags & (TH_FIN | TH_SYN | TH_RST);
-
-#define BOTH_SYN (TH_SYN | (TH_SYN << 8))
-#define BOTH_FIN (TH_FIN | (TH_FIN << 8))
-#define TCP_FLAGS (TH_FLAGS | (TH_FLAGS << 8))
-#define ACK_FWD 0x10000 /* fwd ack seen */
-#define ACK_REV 0x20000 /* rev ack seen */
-
- q->state |= (dir == MATCH_FORWARD) ? flags : (flags << 8);
- switch (q->state & TCP_FLAGS) {
- case TH_SYN: /* opening */
- q->expire = time_uptime + V_dyn_syn_lifetime;
- break;
-
- case BOTH_SYN: /* move to established */
- case BOTH_SYN | TH_FIN: /* one side tries to close */
- case BOTH_SYN | (TH_FIN << 8):
-#define _SEQ_GE(a,b) ((int)(a) - (int)(b) >= 0)
- if (tcp == NULL)
- break;
-
- ack = ntohl(tcp->th_ack);
- if (dir == MATCH_FORWARD) {
- if (q->ack_fwd == 0 ||
- _SEQ_GE(ack, q->ack_fwd)) {
- q->ack_fwd = ack;
- q->state |= ACK_FWD;
- }
- } else {
- if (q->ack_rev == 0 ||
- _SEQ_GE(ack, q->ack_rev)) {
- q->ack_rev = ack;
- q->state |= ACK_REV;
- }
- }
- if ((q->state & (ACK_FWD | ACK_REV)) ==
- (ACK_FWD | ACK_REV)) {
- q->expire = time_uptime + V_dyn_ack_lifetime;
- q->state &= ~(ACK_FWD | ACK_REV);
- }
- break;
-
- case BOTH_SYN | BOTH_FIN: /* both sides closed */
- if (V_dyn_fin_lifetime >= V_dyn_keepalive_period)
- V_dyn_fin_lifetime = V_dyn_keepalive_period - 1;
- q->expire = time_uptime + V_dyn_fin_lifetime;
- break;
-
- default:
-#if 0
- /*
- * reset or some invalid combination, but can also
- * occur if we use keep-state the wrong way.
- */
- if ( (q->state & ((TH_RST << 8)|TH_RST)) == 0)
- printf("invalid state: 0x%x\n", q->state);
-#endif
- if (V_dyn_rst_lifetime >= V_dyn_keepalive_period)
- V_dyn_rst_lifetime = V_dyn_keepalive_period - 1;
- q->expire = time_uptime + V_dyn_rst_lifetime;
- break;
- }
- } else if (pkt->proto == IPPROTO_UDP) {
- q->expire = time_uptime + V_dyn_udp_lifetime;
- } else {
- /* other protocols */
- q->expire = time_uptime + V_dyn_short_lifetime;
- }
-done:
- if (match_direction != NULL)
- *match_direction = dir;
- return (q);
-}
-
-ipfw_dyn_rule *
-ipfw_lookup_dyn_rule(struct ipfw_flow_id *pkt, int *match_direction,
- struct tcphdr *tcp)
-{
- ipfw_dyn_rule *q;
-
- IPFW_DYN_LOCK();
- q = lookup_dyn_rule_locked(pkt, match_direction, tcp);
- if (q == NULL)
- IPFW_DYN_UNLOCK();
- /* NB: return table locked when q is not NULL */
- return q;
-}
-
-static void
-realloc_dynamic_table(void)
-{
- IPFW_DYN_LOCK_ASSERT();
-
- /*
- * Try reallocation, make sure we have a power of 2 and do
- * not allow more than 64k entries. In case of overflow,
- * default to 1024.
- */
-
- if (V_dyn_buckets > 65536)
- V_dyn_buckets = 1024;
- if ((V_dyn_buckets & (V_dyn_buckets-1)) != 0) { /* not a power of 2 */
- V_dyn_buckets = V_curr_dyn_buckets; /* reset */
- return;
- }
- V_curr_dyn_buckets = V_dyn_buckets;
- if (V_ipfw_dyn_v != NULL)
- free(V_ipfw_dyn_v, M_IPFW);
- for (;;) {
- V_ipfw_dyn_v = malloc(V_curr_dyn_buckets * sizeof(ipfw_dyn_rule *),
- M_IPFW, M_NOWAIT | M_ZERO);
- if (V_ipfw_dyn_v != NULL || V_curr_dyn_buckets <= 2)
- break;
- V_curr_dyn_buckets /= 2;
- }
-}
-
-/**
- * Install state of type 'type' for a dynamic session.
- * The hash table contains two type of rules:
- * - regular rules (O_KEEP_STATE)
- * - rules for sessions with limited number of sess per user
- * (O_LIMIT). When they are created, the parent is
- * increased by 1, and decreased on delete. In this case,
- * the third parameter is the parent rule and not the chain.
- * - "parent" rules for the above (O_LIMIT_PARENT).
- */
-static ipfw_dyn_rule *
-add_dyn_rule(struct ipfw_flow_id *id, u_int8_t dyn_type, struct ip_fw *rule)
-{
- ipfw_dyn_rule *r;
- int i;
-
- IPFW_DYN_LOCK_ASSERT();
-
- if (V_ipfw_dyn_v == NULL ||
- (V_dyn_count == 0 && V_dyn_buckets != V_curr_dyn_buckets)) {
- realloc_dynamic_table();
- if (V_ipfw_dyn_v == NULL)
- return NULL; /* failed ! */
- }
- i = hash_packet(id);
-
- r = uma_zalloc(ipfw_dyn_rule_zone, M_NOWAIT | M_ZERO);
- if (r == NULL) {
- printf ("ipfw: sorry cannot allocate state\n");
- return NULL;
- }
-
- /* increase refcount on parent, and set pointer */
- if (dyn_type == O_LIMIT) {
- ipfw_dyn_rule *parent = (ipfw_dyn_rule *)rule;
- if ( parent->dyn_type != O_LIMIT_PARENT)
- panic("invalid parent");
- parent->count++;
- r->parent = parent;
- rule = parent->rule;
- }
-
- r->id = *id;
- r->expire = time_uptime + V_dyn_syn_lifetime;
- r->rule = rule;
- r->dyn_type = dyn_type;
- r->pcnt = r->bcnt = 0;
- r->count = 0;
-
- r->bucket = i;
- r->next = V_ipfw_dyn_v[i];
- V_ipfw_dyn_v[i] = r;
- V_dyn_count++;
- DEB({
- struct in_addr da;
-#ifdef INET6
- char src[INET6_ADDRSTRLEN];
- char dst[INET6_ADDRSTRLEN];
-#else
- char src[INET_ADDRSTRLEN];
- char dst[INET_ADDRSTRLEN];
-#endif
-
-#ifdef INET6
- if (IS_IP6_FLOW_ID(&(r->id))) {
- ip6_sprintf(src, &r->id.src_ip6);
- ip6_sprintf(dst, &r->id.dst_ip6);
- } else
-#endif
- {
- da.s_addr = htonl(r->id.src_ip);
- inet_ntop(AF_INET, &da, src, sizeof(src));
- da.s_addr = htonl(r->id.dst_ip);
- inet_ntop(AF_INET, &da, dst, sizeof(dst));
- }
- printf("ipfw: add dyn entry ty %d %s %d -> %s %d, total %d\n",
- dyn_type, src, r->id.src_port, dst, r->id.dst_port,
- V_dyn_count);
- })
- return r;
-}
-
-/**
- * lookup dynamic parent rule using pkt and rule as search keys.
- * If the lookup fails, then install one.
- */
-static ipfw_dyn_rule *
-lookup_dyn_parent(struct ipfw_flow_id *pkt, struct ip_fw *rule)
-{
- ipfw_dyn_rule *q;
- int i;
-
- IPFW_DYN_LOCK_ASSERT();
-
- if (V_ipfw_dyn_v) {
- int is_v6 = IS_IP6_FLOW_ID(pkt);
- i = hash_packet( pkt );
- for (q = V_ipfw_dyn_v[i] ; q != NULL ; q=q->next)
- if (q->dyn_type == O_LIMIT_PARENT &&
- rule== q->rule &&
- pkt->proto == q->id.proto &&
- pkt->src_port == q->id.src_port &&
- pkt->dst_port == q->id.dst_port &&
- (
- (is_v6 &&
- IN6_ARE_ADDR_EQUAL(&(pkt->src_ip6),
- &(q->id.src_ip6)) &&
- IN6_ARE_ADDR_EQUAL(&(pkt->dst_ip6),
- &(q->id.dst_ip6))) ||
- (!is_v6 &&
- pkt->src_ip == q->id.src_ip &&
- pkt->dst_ip == q->id.dst_ip)
- )
- ) {
- q->expire = time_uptime + V_dyn_short_lifetime;
- DEB(printf("ipfw: lookup_dyn_parent found 0x%p\n",q);)
- return q;
- }
- }
- return add_dyn_rule(pkt, O_LIMIT_PARENT, rule);
-}
-
-/**
- * Install dynamic state for rule type cmd->o.opcode
- *
- * Returns 1 (failure) if state is not installed because of errors or because
- * session limitations are enforced.
- */
-int
-ipfw_install_state(struct ip_fw *rule, ipfw_insn_limit *cmd,
- struct ip_fw_args *args, uint32_t tablearg)
-{
- static int last_log;
- ipfw_dyn_rule *q;
- struct in_addr da;
-#ifdef INET6
- char src[INET6_ADDRSTRLEN + 2], dst[INET6_ADDRSTRLEN + 2];
-#else
- char src[INET_ADDRSTRLEN], dst[INET_ADDRSTRLEN];
-#endif
-
- src[0] = '\0';
- dst[0] = '\0';
-
- IPFW_DYN_LOCK();
-
- DEB(
-#ifdef INET6
- if (IS_IP6_FLOW_ID(&(args->f_id))) {
- ip6_sprintf(src, &args->f_id.src_ip6);
- ip6_sprintf(dst, &args->f_id.dst_ip6);
- } else
-#endif
- {
- da.s_addr = htonl(args->f_id.src_ip);
- inet_ntop(AF_INET, &da, src, sizeof(src));
- da.s_addr = htonl(args->f_id.dst_ip);
- inet_ntop(AF_INET, &da, dst, sizeof(dst));
- }
- printf("ipfw: %s: type %d %s %u -> %s %u\n",
- __func__, cmd->o.opcode, src, args->f_id.src_port,
- dst, args->f_id.dst_port);
- src[0] = '\0';
- dst[0] = '\0';
- )
-
- q = lookup_dyn_rule_locked(&args->f_id, NULL, NULL);
-
- if (q != NULL) { /* should never occur */
- DEB(
- if (last_log != time_uptime) {
- last_log = time_uptime;
- printf("ipfw: %s: entry already present, done\n",
- __func__);
- })
- IPFW_DYN_UNLOCK();
- return (0);
- }
-
- if (V_dyn_count >= V_dyn_max)
- /* Run out of slots, try to remove any expired rule. */
- remove_dyn_rule(NULL, (ipfw_dyn_rule *)1);
-
- if (V_dyn_count >= V_dyn_max) {
- if (last_log != time_uptime) {
- last_log = time_uptime;
- printf("ipfw: %s: Too many dynamic rules\n", __func__);
- }
- IPFW_DYN_UNLOCK();
- return (1); /* cannot install, notify caller */
- }
-
- switch (cmd->o.opcode) {
- case O_KEEP_STATE: /* bidir rule */
- add_dyn_rule(&args->f_id, O_KEEP_STATE, rule);
- break;
-
- case O_LIMIT: { /* limit number of sessions */
- struct ipfw_flow_id id;
- ipfw_dyn_rule *parent;
- uint32_t conn_limit;
- uint16_t limit_mask = cmd->limit_mask;
-
- conn_limit = (cmd->conn_limit == IP_FW_TABLEARG) ?
- tablearg : cmd->conn_limit;
-
- DEB(
- if (cmd->conn_limit == IP_FW_TABLEARG)
- printf("ipfw: %s: O_LIMIT rule, conn_limit: %u "
- "(tablearg)\n", __func__, conn_limit);
- else
- printf("ipfw: %s: O_LIMIT rule, conn_limit: %u\n",
- __func__, conn_limit);
- )
-
- id.dst_ip = id.src_ip = id.dst_port = id.src_port = 0;
- id.proto = args->f_id.proto;
- id.addr_type = args->f_id.addr_type;
- id.fib = M_GETFIB(args->m);
-
- if (IS_IP6_FLOW_ID (&(args->f_id))) {
- if (limit_mask & DYN_SRC_ADDR)
- id.src_ip6 = args->f_id.src_ip6;
- if (limit_mask & DYN_DST_ADDR)
- id.dst_ip6 = args->f_id.dst_ip6;
- } else {
- if (limit_mask & DYN_SRC_ADDR)
- id.src_ip = args->f_id.src_ip;
- if (limit_mask & DYN_DST_ADDR)
- id.dst_ip = args->f_id.dst_ip;
- }
- if (limit_mask & DYN_SRC_PORT)
- id.src_port = args->f_id.src_port;
- if (limit_mask & DYN_DST_PORT)
- id.dst_port = args->f_id.dst_port;
- if ((parent = lookup_dyn_parent(&id, rule)) == NULL) {
- printf("ipfw: %s: add parent failed\n", __func__);
- IPFW_DYN_UNLOCK();
- return (1);
- }
-
- if (parent->count >= conn_limit) {
- /* See if we can remove some expired rule. */
- remove_dyn_rule(rule, parent);
- if (parent->count >= conn_limit) {
- if (V_fw_verbose && last_log != time_uptime) {
- last_log = time_uptime;
-#ifdef INET6
- /*
- * XXX IPv6 flows are not
- * supported yet.
- */
- if (IS_IP6_FLOW_ID(&(args->f_id))) {
- char ip6buf[INET6_ADDRSTRLEN];
- snprintf(src, sizeof(src),
- "[%s]", ip6_sprintf(ip6buf,
- &args->f_id.src_ip6));
- snprintf(dst, sizeof(dst),
- "[%s]", ip6_sprintf(ip6buf,
- &args->f_id.dst_ip6));
- } else
-#endif
- {
- da.s_addr =
- htonl(args->f_id.src_ip);
- inet_ntop(AF_INET, &da, src,
- sizeof(src));
- da.s_addr =
- htonl(args->f_id.dst_ip);
- inet_ntop(AF_INET, &da, dst,
- sizeof(dst));
- }
- log(LOG_SECURITY | LOG_DEBUG,
- "ipfw: %d %s %s:%u -> %s:%u, %s\n",
- parent->rule->rulenum,
- "drop session",
- src, (args->f_id.src_port),
- dst, (args->f_id.dst_port),
- "too many entries");
- }
- IPFW_DYN_UNLOCK();
- return (1);
- }
- }
- add_dyn_rule(&args->f_id, O_LIMIT, (struct ip_fw *)parent);
- break;
- }
- default:
- printf("ipfw: %s: unknown dynamic rule type %u\n",
- __func__, cmd->o.opcode);
- IPFW_DYN_UNLOCK();
- return (1);
- }
-
- /* XXX just set lifetime */
- lookup_dyn_rule_locked(&args->f_id, NULL, NULL);
-
- IPFW_DYN_UNLOCK();
- return (0);
-}
-
-/*
- * Generate a TCP packet, containing either a RST or a keepalive.
- * When flags & TH_RST, we are sending a RST packet, because of a
- * "reset" action matched the packet.
- * Otherwise we are sending a keepalive, and flags & TH_
- * The 'replyto' mbuf is the mbuf being replied to, if any, and is required
- * so that MAC can label the reply appropriately.
- */
-struct mbuf *
-ipfw_send_pkt(struct mbuf *replyto, struct ipfw_flow_id *id, u_int32_t seq,
- u_int32_t ack, int flags)
-{
- struct mbuf *m = NULL; /* stupid compiler */
- int len, dir;
- struct ip *h = NULL; /* stupid compiler */
-#ifdef INET6
- struct ip6_hdr *h6 = NULL;
-#endif
- struct tcphdr *th = NULL;
-
- MGETHDR(m, M_DONTWAIT, MT_DATA);
- if (m == NULL)
- return (NULL);
-
- M_SETFIB(m, id->fib);
-#ifdef MAC
- if (replyto != NULL)
- mac_netinet_firewall_reply(replyto, m);
- else
- mac_netinet_firewall_send(m);
-#else
- (void)replyto; /* don't warn about unused arg */
-#endif
-
- switch (id->addr_type) {
- case 4:
- len = sizeof(struct ip) + sizeof(struct tcphdr);
- break;
-#ifdef INET6
- case 6:
- len = sizeof(struct ip6_hdr) + sizeof(struct tcphdr);
- break;
-#endif
- default:
- /* XXX: log me?!? */
- FREE_PKT(m);
- return (NULL);
- }
- dir = ((flags & (TH_SYN | TH_RST)) == TH_SYN);
-
- m->m_data += max_linkhdr;
- m->m_flags |= M_SKIP_FIREWALL;
- m->m_pkthdr.len = m->m_len = len;
- m->m_pkthdr.rcvif = NULL;
- bzero(m->m_data, len);
-
- switch (id->addr_type) {
- case 4:
- h = mtod(m, struct ip *);
-
- /* prepare for checksum */
- h->ip_p = IPPROTO_TCP;
- h->ip_len = htons(sizeof(struct tcphdr));
- if (dir) {
- h->ip_src.s_addr = htonl(id->src_ip);
- h->ip_dst.s_addr = htonl(id->dst_ip);
- } else {
- h->ip_src.s_addr = htonl(id->dst_ip);
- h->ip_dst.s_addr = htonl(id->src_ip);
- }
-
- th = (struct tcphdr *)(h + 1);
- break;
-#ifdef INET6
- case 6:
- h6 = mtod(m, struct ip6_hdr *);
-
- /* prepare for checksum */
- h6->ip6_nxt = IPPROTO_TCP;
- h6->ip6_plen = htons(sizeof(struct tcphdr));
- if (dir) {
- h6->ip6_src = id->src_ip6;
- h6->ip6_dst = id->dst_ip6;
- } else {
- h6->ip6_src = id->dst_ip6;
- h6->ip6_dst = id->src_ip6;
- }
-
- th = (struct tcphdr *)(h6 + 1);
- break;
-#endif
- }
-
- if (dir) {
- th->th_sport = htons(id->src_port);
- th->th_dport = htons(id->dst_port);
- } else {
- th->th_sport = htons(id->dst_port);
- th->th_dport = htons(id->src_port);
- }
- th->th_off = sizeof(struct tcphdr) >> 2;
-
- if (flags & TH_RST) {
- if (flags & TH_ACK) {
- th->th_seq = htonl(ack);
- th->th_flags = TH_RST;
- } else {
- if (flags & TH_SYN)
- seq++;
- th->th_ack = htonl(seq);
- th->th_flags = TH_RST | TH_ACK;
- }
- } else {
- /*
- * Keepalive - use caller provided sequence numbers
- */
- th->th_seq = htonl(seq);
- th->th_ack = htonl(ack);
- th->th_flags = TH_ACK;
- }
-
- switch (id->addr_type) {
- case 4:
- th->th_sum = in_cksum(m, len);
-
- /* finish the ip header */
- h->ip_v = 4;
- h->ip_hl = sizeof(*h) >> 2;
- h->ip_tos = IPTOS_LOWDELAY;
- h->ip_off = 0;
- /* ip_len must be in host format for ip_output */
- h->ip_len = len;
- h->ip_ttl = V_ip_defttl;
- h->ip_sum = 0;
- break;
-#ifdef INET6
- case 6:
- th->th_sum = in6_cksum(m, IPPROTO_TCP, sizeof(*h6),
- sizeof(struct tcphdr));
-
- /* finish the ip6 header */
- h6->ip6_vfc |= IPV6_VERSION;
- h6->ip6_hlim = IPV6_DEFHLIM;
- break;
-#endif
- }
-
- return (m);
-}
-
-/*
- * This procedure is only used to handle keepalives. It is invoked
- * every dyn_keepalive_period
- */
-static void
-ipfw_tick(void * vnetx)
-{
- struct mbuf *m0, *m, *mnext, **mtailp;
-#ifdef INET6
- struct mbuf *m6, **m6_tailp;
-#endif
- int i;
- ipfw_dyn_rule *q;
-#ifdef VIMAGE
- struct vnet *vp = vnetx;
-#endif
-
- CURVNET_SET(vp);
- if (V_dyn_keepalive == 0 || V_ipfw_dyn_v == NULL || V_dyn_count == 0)
- goto done;
-
- /*
- * We make a chain of packets to go out here -- not deferring
- * until after we drop the IPFW dynamic rule lock would result
- * in a lock order reversal with the normal packet input -> ipfw
- * call stack.
- */
- m0 = NULL;
- mtailp = &m0;
-#ifdef INET6
- m6 = NULL;
- m6_tailp = &m6;
-#endif
- IPFW_DYN_LOCK();
- for (i = 0 ; i < V_curr_dyn_buckets ; i++) {
- for (q = V_ipfw_dyn_v[i] ; q ; q = q->next ) {
- if (q->dyn_type == O_LIMIT_PARENT)
- continue;
- if (q->id.proto != IPPROTO_TCP)
- continue;
- if ( (q->state & BOTH_SYN) != BOTH_SYN)
- continue;
- if (TIME_LEQ(time_uptime + V_dyn_keepalive_interval,
- q->expire))
- continue; /* too early */
- if (TIME_LEQ(q->expire, time_uptime))
- continue; /* too late, rule expired */
-
- m = (q->state & ACK_REV) ? NULL :
- ipfw_send_pkt(NULL, &(q->id), q->ack_rev - 1,
- q->ack_fwd, TH_SYN);
- mnext = (q->state & ACK_FWD) ? NULL :
- ipfw_send_pkt(NULL, &(q->id), q->ack_fwd - 1,
- q->ack_rev, 0);
-
- switch (q->id.addr_type) {
- case 4:
- if (m != NULL) {
- *mtailp = m;
- mtailp = &(*mtailp)->m_nextpkt;
- }
- if (mnext != NULL) {
- *mtailp = mnext;
- mtailp = &(*mtailp)->m_nextpkt;
- }
- break;
-#ifdef INET6
- case 6:
- if (m != NULL) {
- *m6_tailp = m;
- m6_tailp = &(*m6_tailp)->m_nextpkt;
- }
- if (mnext != NULL) {
- *m6_tailp = mnext;
- m6_tailp = &(*m6_tailp)->m_nextpkt;
- }
- break;
-#endif
- }
- }
- }
- IPFW_DYN_UNLOCK();
- for (m = m0; m != NULL; m = mnext) {
- mnext = m->m_nextpkt;
- m->m_nextpkt = NULL;
- ip_output(m, NULL, NULL, 0, NULL, NULL);
- }
-#ifdef INET6
- for (m = m6; m != NULL; m = mnext) {
- mnext = m->m_nextpkt;
- m->m_nextpkt = NULL;
- ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL);
- }
-#endif
-done:
- callout_reset_on(&V_ipfw_timeout, V_dyn_keepalive_period * hz,
- ipfw_tick, vnetx, 0);
- CURVNET_RESTORE();
-}
-
-void
-ipfw_dyn_attach(void)
-{
- ipfw_dyn_rule_zone = uma_zcreate("IPFW dynamic rule",
- sizeof(ipfw_dyn_rule), NULL, NULL, NULL, NULL,
- UMA_ALIGN_PTR, 0);
-
- IPFW_DYN_LOCK_INIT();
-}
-
-void
-ipfw_dyn_detach(void)
-{
- uma_zdestroy(ipfw_dyn_rule_zone);
- IPFW_DYN_LOCK_DESTROY();
-}
-
-void
-ipfw_dyn_init(void)
-{
- V_ipfw_dyn_v = NULL;
- V_dyn_buckets = 256; /* must be power of 2 */
- V_curr_dyn_buckets = 256; /* must be power of 2 */
-
- V_dyn_ack_lifetime = 300;
- V_dyn_syn_lifetime = 20;
- V_dyn_fin_lifetime = 1;
- V_dyn_rst_lifetime = 1;
- V_dyn_udp_lifetime = 10;
- V_dyn_short_lifetime = 5;
-
- V_dyn_keepalive_interval = 20;
- V_dyn_keepalive_period = 5;
- V_dyn_keepalive = 1; /* do send keepalives */
-
- V_dyn_max = 4096; /* max # of dynamic rules */
- callout_init(&V_ipfw_timeout, CALLOUT_MPSAFE);
- callout_reset_on(&V_ipfw_timeout, hz, ipfw_tick, curvnet, 0);
-}
-
-void
-ipfw_dyn_uninit(int pass)
-{
- if (pass == 0)
- callout_drain(&V_ipfw_timeout);
- else {
- if (V_ipfw_dyn_v != NULL)
- free(V_ipfw_dyn_v, M_IPFW);
- }
-}
-
-int
-ipfw_dyn_len(void)
-{
- return (V_ipfw_dyn_v == NULL) ? 0 :
- (V_dyn_count * sizeof(ipfw_dyn_rule));
-}
-
-void
-ipfw_get_dynamic(char **pbp, const char *ep)
-{
- ipfw_dyn_rule *p, *last = NULL;
- char *bp;
- int i;
-
- if (V_ipfw_dyn_v == NULL)
- return;
- bp = *pbp;
-
- IPFW_DYN_LOCK();
- for (i = 0 ; i < V_curr_dyn_buckets; i++)
- for (p = V_ipfw_dyn_v[i] ; p != NULL; p = p->next) {
- if (bp + sizeof *p <= ep) {
- ipfw_dyn_rule *dst =
- (ipfw_dyn_rule *)bp;
- bcopy(p, dst, sizeof *p);
- bcopy(&(p->rule->rulenum), &(dst->rule),
- sizeof(p->rule->rulenum));
- /*
- * store set number into high word of
- * dst->rule pointer.
- */
- bcopy(&(p->rule->set),
- (char *)&dst->rule +
- sizeof(p->rule->rulenum),
- sizeof(p->rule->set));
- /*
- * store a non-null value in "next".
- * The userland code will interpret a
- * NULL here as a marker
- * for the last dynamic rule.
- */
- bcopy(&dst, &dst->next, sizeof(dst));
- last = dst;
- dst->expire =
- TIME_LEQ(dst->expire, time_uptime) ?
- 0 : dst->expire - time_uptime ;
- bp += sizeof(ipfw_dyn_rule);
- }
- }
- IPFW_DYN_UNLOCK();
- if (last != NULL) /* mark last dynamic rule */
- bzero(&last->next, sizeof(last));
- *pbp = bp;
-}
-/* end of file */
diff --git a/sys/netinet/ipfw/ip_fw_log.c b/sys/netinet/ipfw/ip_fw_log.c
deleted file mode 100644
index c0f8fcd..0000000
--- a/sys/netinet/ipfw/ip_fw_log.c
+++ /dev/null
@@ -1,552 +0,0 @@
-/*-
- * Copyright (c) 2002-2009 Luigi Rizzo, Universita` di Pisa
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-/*
- * Logging support for ipfw
- */
-
-#include "opt_ipfw.h"
-#include "opt_inet.h"
-#ifndef INET
-#error IPFIREWALL requires INET.
-#endif /* INET */
-#include "opt_inet6.h"
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/mbuf.h>
-#include <sys/kernel.h>
-#include <sys/socket.h>
-#include <sys/sysctl.h>
-#include <sys/syslog.h>
-#include <sys/lock.h>
-#include <sys/rwlock.h>
-#include <net/ethernet.h> /* for ETHERTYPE_IP */
-#include <net/if.h>
-#include <net/if_clone.h>
-#include <net/vnet.h>
-#include <net/if_types.h> /* for IFT_PFLOG */
-#include <net/bpf.h> /* for BPF */
-
-#include <netinet/in.h>
-#include <netinet/ip.h>
-#include <netinet/ip_icmp.h>
-#include <netinet/ip_var.h>
-#include <netinet/ip_fw.h>
-#include <netinet/ipfw/ip_fw_private.h>
-#include <netinet/tcp_var.h>
-#include <netinet/udp.h>
-
-#include <netinet/ip6.h>
-#include <netinet/icmp6.h>
-#ifdef INET6
-#include <netinet6/in6_var.h> /* ip6_sprintf() */
-#endif
-
-#ifdef MAC
-#include <security/mac/mac_framework.h>
-#endif
-
-/*
- * L3HDR maps an ipv4 pointer into a layer3 header pointer of type T
- * Other macros just cast void * into the appropriate type
- */
-#define L3HDR(T, ip) ((T *)((u_int32_t *)(ip) + (ip)->ip_hl))
-#define TCP(p) ((struct tcphdr *)(p))
-#define SCTP(p) ((struct sctphdr *)(p))
-#define UDP(p) ((struct udphdr *)(p))
-#define ICMP(p) ((struct icmphdr *)(p))
-#define ICMP6(p) ((struct icmp6_hdr *)(p))
-
-#define SNPARGS(buf, len) buf + len, sizeof(buf) > len ? sizeof(buf) - len : 0
-#define SNP(buf) buf, sizeof(buf)
-
-#ifdef WITHOUT_BPF
-void
-ipfw_log_bpf(int onoff)
-{
-}
-#else /* !WITHOUT_BPF */
-static struct ifnet *log_if; /* hook to attach to bpf */
-static struct rwlock log_if_lock;
-#define LOGIF_LOCK_INIT(x) rw_init(&log_if_lock, "ipfw log_if lock")
-#define LOGIF_LOCK_DESTROY(x) rw_destroy(&log_if_lock)
-#define LOGIF_RLOCK(x) rw_rlock(&log_if_lock)
-#define LOGIF_RUNLOCK(x) rw_runlock(&log_if_lock)
-#define LOGIF_WLOCK(x) rw_wlock(&log_if_lock)
-#define LOGIF_WUNLOCK(x) rw_wunlock(&log_if_lock)
-
-#define IPFWNAME "ipfw"
-
-/* we use this dummy function for all ifnet callbacks */
-static int
-log_dummy(struct ifnet *ifp, u_long cmd, caddr_t addr)
-{
- return EINVAL;
-}
-
-static int
-ipfw_log_output(struct ifnet *ifp, struct mbuf *m,
- struct sockaddr *dst, struct route *ro)
-{
- if (m != NULL)
- FREE_PKT(m);
- return EINVAL;
-}
-
-static void
-ipfw_log_start(struct ifnet* ifp)
-{
- panic("ipfw_log_start() must not be called");
-}
-
-static const u_char ipfwbroadcastaddr[6] =
- { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
-
-static int
-ipfw_log_clone_match(struct if_clone *ifc, const char *name)
-{
-
- return (strncmp(name, IPFWNAME, sizeof(IPFWNAME) - 1) == 0);
-}
-
-static int
-ipfw_log_clone_create(struct if_clone *ifc, char *name, size_t len,
- caddr_t params)
-{
- int error;
- int unit;
- struct ifnet *ifp;
-
- error = ifc_name2unit(name, &unit);
- if (error)
- return (error);
-
- error = ifc_alloc_unit(ifc, &unit);
- if (error)
- return (error);
-
- ifp = if_alloc(IFT_PFLOG);
- if (ifp == NULL) {
- ifc_free_unit(ifc, unit);
- return (ENOSPC);
- }
- ifp->if_dname = IPFWNAME;
- ifp->if_dunit = unit;
- snprintf(ifp->if_xname, IFNAMSIZ, "%s%d", IPFWNAME, unit);
- strlcpy(name, ifp->if_xname, len);
- ifp->if_mtu = 65536;
- ifp->if_flags = IFF_UP | IFF_SIMPLEX | IFF_MULTICAST;
- ifp->if_init = (void *)log_dummy;
- ifp->if_ioctl = log_dummy;
- ifp->if_start = ipfw_log_start;
- ifp->if_output = ipfw_log_output;
- ifp->if_addrlen = 6;
- ifp->if_hdrlen = 14;
- ifp->if_broadcastaddr = ipfwbroadcastaddr;
- ifp->if_baudrate = IF_Mbps(10);
-
- LOGIF_WLOCK();
- if (log_if == NULL)
- log_if = ifp;
- else {
- LOGIF_WUNLOCK();
- if_free(ifp);
- ifc_free_unit(ifc, unit);
- return (EEXIST);
- }
- LOGIF_WUNLOCK();
- if_attach(ifp);
- bpfattach(ifp, DLT_EN10MB, 14);
-
- return (0);
-}
-
-static int
-ipfw_log_clone_destroy(struct if_clone *ifc, struct ifnet *ifp)
-{
- int unit;
-
- if (ifp == NULL)
- return (0);
-
- LOGIF_WLOCK();
- if (log_if != NULL && ifp == log_if)
- log_if = NULL;
- else {
- LOGIF_WUNLOCK();
- return (EINVAL);
- }
- LOGIF_WUNLOCK();
-
- unit = ifp->if_dunit;
- bpfdetach(ifp);
- if_detach(ifp);
- if_free(ifp);
- ifc_free_unit(ifc, unit);
-
- return (0);
-}
-
-static struct if_clone ipfw_log_cloner = IFC_CLONE_INITIALIZER(
- IPFWNAME, NULL, IF_MAXUNIT,
- NULL, ipfw_log_clone_match, ipfw_log_clone_create, ipfw_log_clone_destroy);
-
-void
-ipfw_log_bpf(int onoff)
-{
-
- if (onoff) {
- LOGIF_LOCK_INIT();
- if_clone_attach(&ipfw_log_cloner);
- } else {
- if_clone_detach(&ipfw_log_cloner);
- LOGIF_LOCK_DESTROY();
- }
-}
-#endif /* !WITHOUT_BPF */
-
-/*
- * We enter here when we have a rule with O_LOG.
- * XXX this function alone takes about 2Kbytes of code!
- */
-void
-ipfw_log(struct ip_fw *f, u_int hlen, struct ip_fw_args *args,
- struct mbuf *m, struct ifnet *oif, u_short offset, uint32_t tablearg,
- struct ip *ip)
-{
- char *action;
- int limit_reached = 0;
- char action2[92], proto[128], fragment[32];
-
- if (V_fw_verbose == 0) {
-#ifndef WITHOUT_BPF
- LOGIF_RLOCK();
- if (log_if == NULL || log_if->if_bpf == NULL) {
- LOGIF_RUNLOCK();
- return;
- }
-
- if (args->eh) /* layer2, use orig hdr */
- BPF_MTAP2(log_if, args->eh, ETHER_HDR_LEN, m);
- else
- /* Add fake header. Later we will store
- * more info in the header.
- */
- BPF_MTAP2(log_if, "DDDDDDSSSSSS\x08\x00", ETHER_HDR_LEN, m);
- LOGIF_RUNLOCK();
-#endif /* !WITHOUT_BPF */
- return;
- }
- /* the old 'log' function */
- fragment[0] = '\0';
- proto[0] = '\0';
-
- if (f == NULL) { /* bogus pkt */
- if (V_verbose_limit != 0 && V_norule_counter >= V_verbose_limit)
- return;
- V_norule_counter++;
- if (V_norule_counter == V_verbose_limit)
- limit_reached = V_verbose_limit;
- action = "Refuse";
- } else { /* O_LOG is the first action, find the real one */
- ipfw_insn *cmd = ACTION_PTR(f);
- ipfw_insn_log *l = (ipfw_insn_log *)cmd;
-
- if (l->max_log != 0 && l->log_left == 0)
- return;
- l->log_left--;
- if (l->log_left == 0)
- limit_reached = l->max_log;
- cmd += F_LEN(cmd); /* point to first action */
- if (cmd->opcode == O_ALTQ) {
- ipfw_insn_altq *altq = (ipfw_insn_altq *)cmd;
-
- snprintf(SNPARGS(action2, 0), "Altq %d",
- altq->qid);
- cmd += F_LEN(cmd);
- }
- if (cmd->opcode == O_PROB)
- cmd += F_LEN(cmd);
-
- if (cmd->opcode == O_TAG)
- cmd += F_LEN(cmd);
-
- action = action2;
- switch (cmd->opcode) {
- case O_DENY:
- action = "Deny";
- break;
-
- case O_REJECT:
- if (cmd->arg1==ICMP_REJECT_RST)
- action = "Reset";
- else if (cmd->arg1==ICMP_UNREACH_HOST)
- action = "Reject";
- else
- snprintf(SNPARGS(action2, 0), "Unreach %d",
- cmd->arg1);
- break;
-
- case O_UNREACH6:
- if (cmd->arg1==ICMP6_UNREACH_RST)
- action = "Reset";
- else
- snprintf(SNPARGS(action2, 0), "Unreach %d",
- cmd->arg1);
- break;
-
- case O_ACCEPT:
- action = "Accept";
- break;
- case O_COUNT:
- action = "Count";
- break;
- case O_DIVERT:
- snprintf(SNPARGS(action2, 0), "Divert %d",
- cmd->arg1);
- break;
- case O_TEE:
- snprintf(SNPARGS(action2, 0), "Tee %d",
- cmd->arg1);
- break;
- case O_SETFIB:
- snprintf(SNPARGS(action2, 0), "SetFib %d",
- cmd->arg1);
- break;
- case O_SKIPTO:
- snprintf(SNPARGS(action2, 0), "SkipTo %d",
- cmd->arg1);
- break;
- case O_PIPE:
- snprintf(SNPARGS(action2, 0), "Pipe %d",
- cmd->arg1);
- break;
- case O_QUEUE:
- snprintf(SNPARGS(action2, 0), "Queue %d",
- cmd->arg1);
- break;
- case O_FORWARD_IP: {
- ipfw_insn_sa *sa = (ipfw_insn_sa *)cmd;
- int len;
- struct in_addr dummyaddr;
- if (sa->sa.sin_addr.s_addr == INADDR_ANY)
- dummyaddr.s_addr = htonl(tablearg);
- else
- dummyaddr.s_addr = sa->sa.sin_addr.s_addr;
-
- len = snprintf(SNPARGS(action2, 0), "Forward to %s",
- inet_ntoa(dummyaddr));
-
- if (sa->sa.sin_port)
- snprintf(SNPARGS(action2, len), ":%d",
- sa->sa.sin_port);
- }
- break;
-#ifdef INET6
- case O_FORWARD_IP6: {
- char buf[INET6_ADDRSTRLEN];
- ipfw_insn_sa6 *sa = (ipfw_insn_sa6 *)cmd;
- int len;
-
- len = snprintf(SNPARGS(action2, 0), "Forward to [%s]",
- ip6_sprintf(buf, &sa->sa.sin6_addr));
-
- if (sa->sa.sin6_port)
- snprintf(SNPARGS(action2, len), ":%u",
- sa->sa.sin6_port);
- }
- break;
-#endif
- case O_NETGRAPH:
- snprintf(SNPARGS(action2, 0), "Netgraph %d",
- cmd->arg1);
- break;
- case O_NGTEE:
- snprintf(SNPARGS(action2, 0), "Ngtee %d",
- cmd->arg1);
- break;
- case O_NAT:
- action = "Nat";
- break;
- case O_REASS:
- action = "Reass";
- break;
- case O_CALLRETURN:
- if (cmd->len & F_NOT)
- action = "Return";
- else
- snprintf(SNPARGS(action2, 0), "Call %d",
- cmd->arg1);
- break;
- default:
- action = "UNKNOWN";
- break;
- }
- }
-
- if (hlen == 0) { /* non-ip */
- snprintf(SNPARGS(proto, 0), "MAC");
-
- } else {
- int len;
-#ifdef INET6
- char src[INET6_ADDRSTRLEN + 2], dst[INET6_ADDRSTRLEN + 2];
-#else
- char src[INET_ADDRSTRLEN], dst[INET_ADDRSTRLEN];
-#endif
- struct icmphdr *icmp;
- struct tcphdr *tcp;
- struct udphdr *udp;
-#ifdef INET6
- struct ip6_hdr *ip6 = NULL;
- struct icmp6_hdr *icmp6;
- u_short ip6f_mf;
-#endif
- src[0] = '\0';
- dst[0] = '\0';
-#ifdef INET6
- ip6f_mf = offset & IP6F_MORE_FRAG;
- offset &= IP6F_OFF_MASK;
-
- if (IS_IP6_FLOW_ID(&(args->f_id))) {
- char ip6buf[INET6_ADDRSTRLEN];
- snprintf(src, sizeof(src), "[%s]",
- ip6_sprintf(ip6buf, &args->f_id.src_ip6));
- snprintf(dst, sizeof(dst), "[%s]",
- ip6_sprintf(ip6buf, &args->f_id.dst_ip6));
-
- ip6 = (struct ip6_hdr *)ip;
- tcp = (struct tcphdr *)(((char *)ip) + hlen);
- udp = (struct udphdr *)(((char *)ip) + hlen);
- } else
-#endif
- {
- tcp = L3HDR(struct tcphdr, ip);
- udp = L3HDR(struct udphdr, ip);
-
- inet_ntop(AF_INET, &ip->ip_src, src, sizeof(src));
- inet_ntop(AF_INET, &ip->ip_dst, dst, sizeof(dst));
- }
-
- switch (args->f_id.proto) {
- case IPPROTO_TCP:
- len = snprintf(SNPARGS(proto, 0), "TCP %s", src);
- if (offset == 0)
- snprintf(SNPARGS(proto, len), ":%d %s:%d",
- ntohs(tcp->th_sport),
- dst,
- ntohs(tcp->th_dport));
- else
- snprintf(SNPARGS(proto, len), " %s", dst);
- break;
-
- case IPPROTO_UDP:
- len = snprintf(SNPARGS(proto, 0), "UDP %s", src);
- if (offset == 0)
- snprintf(SNPARGS(proto, len), ":%d %s:%d",
- ntohs(udp->uh_sport),
- dst,
- ntohs(udp->uh_dport));
- else
- snprintf(SNPARGS(proto, len), " %s", dst);
- break;
-
- case IPPROTO_ICMP:
- icmp = L3HDR(struct icmphdr, ip);
- if (offset == 0)
- len = snprintf(SNPARGS(proto, 0),
- "ICMP:%u.%u ",
- icmp->icmp_type, icmp->icmp_code);
- else
- len = snprintf(SNPARGS(proto, 0), "ICMP ");
- len += snprintf(SNPARGS(proto, len), "%s", src);
- snprintf(SNPARGS(proto, len), " %s", dst);
- break;
-#ifdef INET6
- case IPPROTO_ICMPV6:
- icmp6 = (struct icmp6_hdr *)(((char *)ip) + hlen);
- if (offset == 0)
- len = snprintf(SNPARGS(proto, 0),
- "ICMPv6:%u.%u ",
- icmp6->icmp6_type, icmp6->icmp6_code);
- else
- len = snprintf(SNPARGS(proto, 0), "ICMPv6 ");
- len += snprintf(SNPARGS(proto, len), "%s", src);
- snprintf(SNPARGS(proto, len), " %s", dst);
- break;
-#endif
- default:
- len = snprintf(SNPARGS(proto, 0), "P:%d %s",
- args->f_id.proto, src);
- snprintf(SNPARGS(proto, len), " %s", dst);
- break;
- }
-
-#ifdef INET6
- if (IS_IP6_FLOW_ID(&(args->f_id))) {
- if (offset & (IP6F_OFF_MASK | IP6F_MORE_FRAG))
- snprintf(SNPARGS(fragment, 0),
- " (frag %08x:%d@%d%s)",
- args->f_id.extra,
- ntohs(ip6->ip6_plen) - hlen,
- ntohs(offset) << 3, ip6f_mf ? "+" : "");
- } else
-#endif
- {
- int ipoff, iplen;
- ipoff = ntohs(ip->ip_off);
- iplen = ntohs(ip->ip_len);
- if (ipoff & (IP_MF | IP_OFFMASK))
- snprintf(SNPARGS(fragment, 0),
- " (frag %d:%d@%d%s)",
- ntohs(ip->ip_id), iplen - (ip->ip_hl << 2),
- offset << 3,
- (ipoff & IP_MF) ? "+" : "");
- }
- }
-#ifdef __FreeBSD__
- if (oif || m->m_pkthdr.rcvif)
- log(LOG_SECURITY | LOG_INFO,
- "ipfw: %d %s %s %s via %s%s\n",
- f ? f->rulenum : -1,
- action, proto, oif ? "out" : "in",
- oif ? oif->if_xname : m->m_pkthdr.rcvif->if_xname,
- fragment);
- else
-#endif
- log(LOG_SECURITY | LOG_INFO,
- "ipfw: %d %s %s [no if info]%s\n",
- f ? f->rulenum : -1,
- action, proto, fragment);
- if (limit_reached)
- log(LOG_SECURITY | LOG_NOTICE,
- "ipfw: limit %d reached on entry %d\n",
- limit_reached, f ? f->rulenum : -1);
-}
-/* end of file */
diff --git a/sys/netinet/ipfw/ip_fw_nat.c b/sys/netinet/ipfw/ip_fw_nat.c
deleted file mode 100644
index dbeb254..0000000
--- a/sys/netinet/ipfw/ip_fw_nat.c
+++ /dev/null
@@ -1,661 +0,0 @@
-/*-
- * Copyright (c) 2008 Paolo Pisati
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/eventhandler.h>
-#include <sys/malloc.h>
-#include <sys/kernel.h>
-#include <sys/lock.h>
-#include <sys/module.h>
-#include <sys/rwlock.h>
-
-#define IPFW_INTERNAL /* Access to protected data structures in ip_fw.h. */
-
-#include <netinet/libalias/alias.h>
-#include <netinet/libalias/alias_local.h>
-
-#include <net/if.h>
-#include <netinet/in.h>
-#include <netinet/ip.h>
-#include <netinet/ip_var.h>
-#include <netinet/ip_fw.h>
-#include <netinet/ipfw/ip_fw_private.h>
-#include <netinet/tcp.h>
-#include <netinet/udp.h>
-
-#include <machine/in_cksum.h> /* XXX for in_cksum */
-
-static VNET_DEFINE(eventhandler_tag, ifaddr_event_tag);
-#define V_ifaddr_event_tag VNET(ifaddr_event_tag)
-
-static void
-ifaddr_change(void *arg __unused, struct ifnet *ifp)
-{
- struct cfg_nat *ptr;
- struct ifaddr *ifa;
- struct ip_fw_chain *chain;
-
- chain = &V_layer3_chain;
- IPFW_WLOCK(chain);
- /* Check every nat entry... */
- LIST_FOREACH(ptr, &chain->nat, _next) {
- /* ...using nic 'ifp->if_xname' as dynamic alias address. */
- if (strncmp(ptr->if_name, ifp->if_xname, IF_NAMESIZE) != 0)
- continue;
- if_addr_rlock(ifp);
- TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
- if (ifa->ifa_addr == NULL)
- continue;
- if (ifa->ifa_addr->sa_family != AF_INET)
- continue;
- ptr->ip = ((struct sockaddr_in *)
- (ifa->ifa_addr))->sin_addr;
- LibAliasSetAddress(ptr->lib, ptr->ip);
- }
- if_addr_runlock(ifp);
- }
- IPFW_WUNLOCK(chain);
-}
-
-/*
- * delete the pointers for nat entry ix, or all of them if ix < 0
- */
-static void
-flush_nat_ptrs(struct ip_fw_chain *chain, const int ix)
-{
- int i;
- ipfw_insn_nat *cmd;
-
- IPFW_WLOCK_ASSERT(chain);
- for (i = 0; i < chain->n_rules; i++) {
- cmd = (ipfw_insn_nat *)ACTION_PTR(chain->map[i]);
- /* XXX skip log and the like ? */
- if (cmd->o.opcode == O_NAT && cmd->nat != NULL &&
- (ix < 0 || cmd->nat->id == ix))
- cmd->nat = NULL;
- }
-}
-
-static void
-del_redir_spool_cfg(struct cfg_nat *n, struct redir_chain *head)
-{
- struct cfg_redir *r, *tmp_r;
- struct cfg_spool *s, *tmp_s;
- int i, num;
-
- LIST_FOREACH_SAFE(r, head, _next, tmp_r) {
- num = 1; /* Number of alias_link to delete. */
- switch (r->mode) {
- case REDIR_PORT:
- num = r->pport_cnt;
- /* FALLTHROUGH */
- case REDIR_ADDR:
- case REDIR_PROTO:
- /* Delete all libalias redirect entry. */
- for (i = 0; i < num; i++)
- LibAliasRedirectDelete(n->lib, r->alink[i]);
- /* Del spool cfg if any. */
- LIST_FOREACH_SAFE(s, &r->spool_chain, _next, tmp_s) {
- LIST_REMOVE(s, _next);
- free(s, M_IPFW);
- }
- free(r->alink, M_IPFW);
- LIST_REMOVE(r, _next);
- free(r, M_IPFW);
- break;
- default:
- printf("unknown redirect mode: %u\n", r->mode);
- /* XXX - panic?!?!? */
- break;
- }
- }
-}
-
-static void
-add_redir_spool_cfg(char *buf, struct cfg_nat *ptr)
-{
- struct cfg_redir *r, *ser_r;
- struct cfg_spool *s, *ser_s;
- int cnt, off, i;
-
- for (cnt = 0, off = 0; cnt < ptr->redir_cnt; cnt++) {
- ser_r = (struct cfg_redir *)&buf[off];
- r = malloc(SOF_REDIR, M_IPFW, M_WAITOK | M_ZERO);
- memcpy(r, ser_r, SOF_REDIR);
- LIST_INIT(&r->spool_chain);
- off += SOF_REDIR;
- r->alink = malloc(sizeof(struct alias_link *) * r->pport_cnt,
- M_IPFW, M_WAITOK | M_ZERO);
- switch (r->mode) {
- case REDIR_ADDR:
- r->alink[0] = LibAliasRedirectAddr(ptr->lib, r->laddr,
- r->paddr);
- break;
- case REDIR_PORT:
- for (i = 0 ; i < r->pport_cnt; i++) {
- /* If remotePort is all ports, set it to 0. */
- u_short remotePortCopy = r->rport + i;
- if (r->rport_cnt == 1 && r->rport == 0)
- remotePortCopy = 0;
- r->alink[i] = LibAliasRedirectPort(ptr->lib,
- r->laddr, htons(r->lport + i), r->raddr,
- htons(remotePortCopy), r->paddr,
- htons(r->pport + i), r->proto);
- if (r->alink[i] == NULL) {
- r->alink[0] = NULL;
- break;
- }
- }
- break;
- case REDIR_PROTO:
- r->alink[0] = LibAliasRedirectProto(ptr->lib ,r->laddr,
- r->raddr, r->paddr, r->proto);
- break;
- default:
- printf("unknown redirect mode: %u\n", r->mode);
- break;
- }
- /* XXX perhaps return an error instead of panic ? */
- if (r->alink[0] == NULL)
- panic("LibAliasRedirect* returned NULL");
- /* LSNAT handling. */
- for (i = 0; i < r->spool_cnt; i++) {
- ser_s = (struct cfg_spool *)&buf[off];
- s = malloc(SOF_REDIR, M_IPFW, M_WAITOK | M_ZERO);
- memcpy(s, ser_s, SOF_SPOOL);
- LibAliasAddServer(ptr->lib, r->alink[0],
- s->addr, htons(s->port));
- off += SOF_SPOOL;
- /* Hook spool entry. */
- LIST_INSERT_HEAD(&r->spool_chain, s, _next);
- }
- /* And finally hook this redir entry. */
- LIST_INSERT_HEAD(&ptr->redir_chain, r, _next);
- }
-}
-
-static int
-ipfw_nat(struct ip_fw_args *args, struct cfg_nat *t, struct mbuf *m)
-{
- struct mbuf *mcl;
- struct ip *ip;
- /* XXX - libalias duct tape */
- int ldt, retval, found;
- struct ip_fw_chain *chain;
- char *c;
-
- ldt = 0;
- retval = 0;
- mcl = m_megapullup(m, m->m_pkthdr.len);
- if (mcl == NULL) {
- args->m = NULL;
- return (IP_FW_DENY);
- }
- ip = mtod(mcl, struct ip *);
-
- /*
- * XXX - Libalias checksum offload 'duct tape':
- *
- * locally generated packets have only pseudo-header checksum
- * calculated and libalias will break it[1], so mark them for
- * later fix. Moreover there are cases when libalias modifies
- * tcp packet data[2], mark them for later fix too.
- *
- * [1] libalias was never meant to run in kernel, so it does
- * not have any knowledge about checksum offloading, and
- * expects a packet with a full internet checksum.
- * Unfortunately, packets generated locally will have just the
- * pseudo header calculated, and when libalias tries to adjust
- * the checksum it will actually compute a wrong value.
- *
- * [2] when libalias modifies tcp's data content, full TCP
- * checksum has to be recomputed: the problem is that
- * libalias does not have any idea about checksum offloading.
- * To work around this, we do not do checksumming in LibAlias,
- * but only mark the packets in th_x2 field. If we receive a
- * marked packet, we calculate correct checksum for it
- * aware of offloading. Why such a terrible hack instead of
- * recalculating checksum for each packet?
- * Because the previous checksum was not checked!
- * Recalculating checksums for EVERY packet will hide ALL
- * transmission errors. Yes, marked packets still suffer from
- * this problem. But, sigh, natd(8) has this problem, too.
- *
- * TODO: -make libalias mbuf aware (so
- * it can handle delayed checksum and tso)
- */
-
- if (mcl->m_pkthdr.rcvif == NULL &&
- mcl->m_pkthdr.csum_flags & CSUM_DELAY_DATA)
- ldt = 1;
-
- c = mtod(mcl, char *);
-
- /* Check if this is 'global' instance */
- if (t == NULL) {
- if (args->oif == NULL) {
- /* Wrong direction, skip processing */
- args->m = mcl;
- return (IP_FW_NAT);
- }
-
- found = 0;
- chain = &V_layer3_chain;
- IPFW_RLOCK(chain);
- /* Check every nat entry... */
- LIST_FOREACH(t, &chain->nat, _next) {
- if ((t->mode & PKT_ALIAS_SKIP_GLOBAL) != 0)
- continue;
- retval = LibAliasOutTry(t->lib, c,
- mcl->m_len + M_TRAILINGSPACE(mcl), 0);
- if (retval == PKT_ALIAS_OK) {
- /* Nat instance recognises state */
- found = 1;
- break;
- }
- }
- IPFW_RUNLOCK(chain);
- if (found != 1) {
- /* No instance found, return ignore */
- args->m = mcl;
- return (IP_FW_NAT);
- }
- } else {
- if (args->oif == NULL)
- retval = LibAliasIn(t->lib, c,
- mcl->m_len + M_TRAILINGSPACE(mcl));
- else
- retval = LibAliasOut(t->lib, c,
- mcl->m_len + M_TRAILINGSPACE(mcl));
- }
-
- /*
- * We drop packet when:
- * 1. libalias returns PKT_ALIAS_ERROR;
- * 2. For incoming packets:
- * a) for unresolved fragments;
- * b) libalias returns PKT_ALIAS_IGNORED and
- * PKT_ALIAS_DENY_INCOMING flag is set.
- */
- if (retval == PKT_ALIAS_ERROR ||
- (args->oif == NULL && (retval == PKT_ALIAS_UNRESOLVED_FRAGMENT ||
- (retval == PKT_ALIAS_IGNORED &&
- (t->mode & PKT_ALIAS_DENY_INCOMING) != 0)))) {
- /* XXX - should i add some logging? */
- m_free(mcl);
- args->m = NULL;
- return (IP_FW_DENY);
- }
-
- if (retval == PKT_ALIAS_RESPOND)
- mcl->m_flags |= M_SKIP_FIREWALL;
- mcl->m_pkthdr.len = mcl->m_len = ntohs(ip->ip_len);
-
- /*
- * XXX - libalias checksum offload
- * 'duct tape' (see above)
- */
-
- if ((ip->ip_off & htons(IP_OFFMASK)) == 0 &&
- ip->ip_p == IPPROTO_TCP) {
- struct tcphdr *th;
-
- th = (struct tcphdr *)(ip + 1);
- if (th->th_x2)
- ldt = 1;
- }
-
- if (ldt) {
- struct tcphdr *th;
- struct udphdr *uh;
- u_short cksum;
-
- ip->ip_len = ntohs(ip->ip_len);
- cksum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
- htons(ip->ip_p + ip->ip_len - (ip->ip_hl << 2)));
-
- switch (ip->ip_p) {
- case IPPROTO_TCP:
- th = (struct tcphdr *)(ip + 1);
- /*
- * Maybe it was set in
- * libalias...
- */
- th->th_x2 = 0;
- th->th_sum = cksum;
- mcl->m_pkthdr.csum_data =
- offsetof(struct tcphdr, th_sum);
- break;
- case IPPROTO_UDP:
- uh = (struct udphdr *)(ip + 1);
- uh->uh_sum = cksum;
- mcl->m_pkthdr.csum_data =
- offsetof(struct udphdr, uh_sum);
- break;
- }
- /* No hw checksum offloading: do it ourselves */
- if ((mcl->m_pkthdr.csum_flags & CSUM_DELAY_DATA) == 0) {
- in_delayed_cksum(mcl);
- mcl->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
- }
- ip->ip_len = htons(ip->ip_len);
- }
- args->m = mcl;
- return (IP_FW_NAT);
-}
-
-static struct cfg_nat *
-lookup_nat(struct nat_list *l, int nat_id)
-{
- struct cfg_nat *res;
-
- LIST_FOREACH(res, l, _next) {
- if (res->id == nat_id)
- break;
- }
- return res;
-}
-
-static int
-ipfw_nat_cfg(struct sockopt *sopt)
-{
- struct cfg_nat *cfg, *ptr;
- char *buf;
- struct ip_fw_chain *chain = &V_layer3_chain;
- size_t len;
- int gencnt, error = 0;
-
- len = sopt->sopt_valsize;
- buf = malloc(len, M_TEMP, M_WAITOK | M_ZERO);
- if ((error = sooptcopyin(sopt, buf, len, sizeof(struct cfg_nat))) != 0)
- goto out;
-
- cfg = (struct cfg_nat *)buf;
- if (cfg->id < 0) {
- error = EINVAL;
- goto out;
- }
-
- /*
- * Find/create nat rule.
- */
- IPFW_WLOCK(chain);
- gencnt = chain->gencnt;
- ptr = lookup_nat(&chain->nat, cfg->id);
- if (ptr == NULL) {
- IPFW_WUNLOCK(chain);
- /* New rule: allocate and init new instance. */
- ptr = malloc(sizeof(struct cfg_nat), M_IPFW, M_WAITOK | M_ZERO);
- ptr->lib = LibAliasInit(NULL);
- LIST_INIT(&ptr->redir_chain);
- } else {
- /* Entry already present: temporarily unhook it. */
- LIST_REMOVE(ptr, _next);
- flush_nat_ptrs(chain, cfg->id);
- IPFW_WUNLOCK(chain);
- }
-
- /*
- * Basic nat configuration.
- */
- ptr->id = cfg->id;
- /*
- * XXX - what if this rule doesn't nat any ip and just
- * redirect?
- * do we set aliasaddress to 0.0.0.0?
- */
- ptr->ip = cfg->ip;
- ptr->redir_cnt = cfg->redir_cnt;
- ptr->mode = cfg->mode;
- LibAliasSetMode(ptr->lib, cfg->mode, cfg->mode);
- LibAliasSetAddress(ptr->lib, ptr->ip);
- memcpy(ptr->if_name, cfg->if_name, IF_NAMESIZE);
-
- /*
- * Redir and LSNAT configuration.
- */
- /* Delete old cfgs. */
- del_redir_spool_cfg(ptr, &ptr->redir_chain);
- /* Add new entries. */
- add_redir_spool_cfg(&buf[(sizeof(struct cfg_nat))], ptr);
-
- IPFW_WLOCK(chain);
- /* Extra check to avoid race with another ipfw_nat_cfg() */
- if (gencnt != chain->gencnt &&
- ((cfg = lookup_nat(&chain->nat, ptr->id)) != NULL))
- LIST_REMOVE(cfg, _next);
- LIST_INSERT_HEAD(&chain->nat, ptr, _next);
- chain->gencnt++;
- IPFW_WUNLOCK(chain);
-
-out:
- free(buf, M_TEMP);
- return (error);
-}
-
-static int
-ipfw_nat_del(struct sockopt *sopt)
-{
- struct cfg_nat *ptr;
- struct ip_fw_chain *chain = &V_layer3_chain;
- int i;
-
- sooptcopyin(sopt, &i, sizeof i, sizeof i);
- /* XXX validate i */
- IPFW_WLOCK(chain);
- ptr = lookup_nat(&chain->nat, i);
- if (ptr == NULL) {
- IPFW_WUNLOCK(chain);
- return (EINVAL);
- }
- LIST_REMOVE(ptr, _next);
- flush_nat_ptrs(chain, i);
- IPFW_WUNLOCK(chain);
- del_redir_spool_cfg(ptr, &ptr->redir_chain);
- LibAliasUninit(ptr->lib);
- free(ptr, M_IPFW);
- return (0);
-}
-
-static int
-ipfw_nat_get_cfg(struct sockopt *sopt)
-{
- struct ip_fw_chain *chain = &V_layer3_chain;
- struct cfg_nat *n;
- struct cfg_redir *r;
- struct cfg_spool *s;
- char *data;
- int gencnt, nat_cnt, len, error;
-
- nat_cnt = 0;
- len = sizeof(nat_cnt);
-
- IPFW_RLOCK(chain);
-retry:
- gencnt = chain->gencnt;
- /* Estimate memory amount */
- LIST_FOREACH(n, &chain->nat, _next) {
- nat_cnt++;
- len += sizeof(struct cfg_nat);
- LIST_FOREACH(r, &n->redir_chain, _next) {
- len += sizeof(struct cfg_redir);
- LIST_FOREACH(s, &r->spool_chain, _next)
- len += sizeof(struct cfg_spool);
- }
- }
- IPFW_RUNLOCK(chain);
-
- data = malloc(len, M_TEMP, M_WAITOK | M_ZERO);
- bcopy(&nat_cnt, data, sizeof(nat_cnt));
-
- nat_cnt = 0;
- len = sizeof(nat_cnt);
-
- IPFW_RLOCK(chain);
- if (gencnt != chain->gencnt) {
- free(data, M_TEMP);
- goto retry;
- }
- /* Serialize all the data. */
- LIST_FOREACH(n, &chain->nat, _next) {
- bcopy(n, &data[len], sizeof(struct cfg_nat));
- len += sizeof(struct cfg_nat);
- LIST_FOREACH(r, &n->redir_chain, _next) {
- bcopy(r, &data[len], sizeof(struct cfg_redir));
- len += sizeof(struct cfg_redir);
- LIST_FOREACH(s, &r->spool_chain, _next) {
- bcopy(s, &data[len], sizeof(struct cfg_spool));
- len += sizeof(struct cfg_spool);
- }
- }
- }
- IPFW_RUNLOCK(chain);
-
- error = sooptcopyout(sopt, data, len);
- free(data, M_TEMP);
-
- return (error);
-}
-
-static int
-ipfw_nat_get_log(struct sockopt *sopt)
-{
- uint8_t *data;
- struct cfg_nat *ptr;
- int i, size;
- struct ip_fw_chain *chain;
-
- chain = &V_layer3_chain;
-
- IPFW_RLOCK(chain);
- /* one pass to count, one to copy the data */
- i = 0;
- LIST_FOREACH(ptr, &chain->nat, _next) {
- if (ptr->lib->logDesc == NULL)
- continue;
- i++;
- }
- size = i * (LIBALIAS_BUF_SIZE + sizeof(int));
- data = malloc(size, M_IPFW, M_NOWAIT | M_ZERO);
- if (data == NULL) {
- IPFW_RUNLOCK(chain);
- return (ENOSPC);
- }
- i = 0;
- LIST_FOREACH(ptr, &chain->nat, _next) {
- if (ptr->lib->logDesc == NULL)
- continue;
- bcopy(&ptr->id, &data[i], sizeof(int));
- i += sizeof(int);
- bcopy(ptr->lib->logDesc, &data[i], LIBALIAS_BUF_SIZE);
- i += LIBALIAS_BUF_SIZE;
- }
- IPFW_RUNLOCK(chain);
- sooptcopyout(sopt, data, size);
- free(data, M_IPFW);
- return(0);
-}
-
-static void
-ipfw_nat_init(void)
-{
-
- IPFW_WLOCK(&V_layer3_chain);
- /* init ipfw hooks */
- ipfw_nat_ptr = ipfw_nat;
- lookup_nat_ptr = lookup_nat;
- ipfw_nat_cfg_ptr = ipfw_nat_cfg;
- ipfw_nat_del_ptr = ipfw_nat_del;
- ipfw_nat_get_cfg_ptr = ipfw_nat_get_cfg;
- ipfw_nat_get_log_ptr = ipfw_nat_get_log;
- IPFW_WUNLOCK(&V_layer3_chain);
- V_ifaddr_event_tag = EVENTHANDLER_REGISTER(
- ifaddr_event, ifaddr_change,
- NULL, EVENTHANDLER_PRI_ANY);
-}
-
-static void
-ipfw_nat_destroy(void)
-{
- struct cfg_nat *ptr, *ptr_temp;
- struct ip_fw_chain *chain;
-
- chain = &V_layer3_chain;
- IPFW_WLOCK(chain);
- LIST_FOREACH_SAFE(ptr, &chain->nat, _next, ptr_temp) {
- LIST_REMOVE(ptr, _next);
- del_redir_spool_cfg(ptr, &ptr->redir_chain);
- LibAliasUninit(ptr->lib);
- free(ptr, M_IPFW);
- }
- EVENTHANDLER_DEREGISTER(ifaddr_event, V_ifaddr_event_tag);
- flush_nat_ptrs(chain, -1 /* flush all */);
- /* deregister ipfw_nat */
- ipfw_nat_ptr = NULL;
- lookup_nat_ptr = NULL;
- ipfw_nat_cfg_ptr = NULL;
- ipfw_nat_del_ptr = NULL;
- ipfw_nat_get_cfg_ptr = NULL;
- ipfw_nat_get_log_ptr = NULL;
- IPFW_WUNLOCK(chain);
-}
-
-static int
-ipfw_nat_modevent(module_t mod, int type, void *unused)
-{
- int err = 0;
-
- switch (type) {
- case MOD_LOAD:
- ipfw_nat_init();
- break;
-
- case MOD_UNLOAD:
- ipfw_nat_destroy();
- break;
-
- default:
- return EOPNOTSUPP;
- break;
- }
- return err;
-}
-
-static moduledata_t ipfw_nat_mod = {
- "ipfw_nat",
- ipfw_nat_modevent,
- 0
-};
-
-DECLARE_MODULE(ipfw_nat, ipfw_nat_mod, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY);
-MODULE_DEPEND(ipfw_nat, libalias, 1, 1, 1);
-MODULE_DEPEND(ipfw_nat, ipfw, 2, 2, 2);
-MODULE_VERSION(ipfw_nat, 1);
-/* end of file */
diff --git a/sys/netinet/ipfw/ip_fw_pfil.c b/sys/netinet/ipfw/ip_fw_pfil.c
deleted file mode 100644
index 8d429e7..0000000
--- a/sys/netinet/ipfw/ip_fw_pfil.c
+++ /dev/null
@@ -1,588 +0,0 @@
-/*-
- * Copyright (c) 2004 Andre Oppermann, Internet Business Solutions AG
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#include "opt_ipfw.h"
-#include "opt_inet.h"
-#include "opt_inet6.h"
-#ifndef INET
-#error IPFIREWALL requires INET.
-#endif /* INET */
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/malloc.h>
-#include <sys/mbuf.h>
-#include <sys/module.h>
-#include <sys/kernel.h>
-#include <sys/lock.h>
-#include <sys/rwlock.h>
-#include <sys/socket.h>
-#include <sys/sysctl.h>
-
-#include <net/if.h>
-#include <net/route.h>
-#include <net/ethernet.h>
-#include <net/pfil.h>
-#include <net/vnet.h>
-
-#include <netinet/in.h>
-#include <netinet/in_systm.h>
-#include <netinet/ip.h>
-#include <netinet/ip_var.h>
-#include <netinet/ip_fw.h>
-#ifdef INET6
-#include <netinet/ip6.h>
-#include <netinet6/ip6_var.h>
-#endif
-#include <netinet/ipfw/ip_fw_private.h>
-#include <netgraph/ng_ipfw.h>
-
-#include <machine/in_cksum.h>
-
-static VNET_DEFINE(int, fw_enable) = 1;
-#define V_fw_enable VNET(fw_enable)
-
-#ifdef INET6
-static VNET_DEFINE(int, fw6_enable) = 1;
-#define V_fw6_enable VNET(fw6_enable)
-#endif
-
-static VNET_DEFINE(int, fwlink_enable) = 0;
-#define V_fwlink_enable VNET(fwlink_enable)
-
-int ipfw_chg_hook(SYSCTL_HANDLER_ARGS);
-
-/* Forward declarations. */
-static int ipfw_divert(struct mbuf **, int, struct ipfw_rule_ref *, int);
-static int ipfw_check_packet(void *, struct mbuf **, struct ifnet *, int,
- struct inpcb *);
-static int ipfw_check_frame(void *, struct mbuf **, struct ifnet *, int,
- struct inpcb *);
-
-#ifdef SYSCTL_NODE
-
-SYSBEGIN(f1)
-
-SYSCTL_DECL(_net_inet_ip_fw);
-SYSCTL_VNET_PROC(_net_inet_ip_fw, OID_AUTO, enable,
- CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE3, &VNET_NAME(fw_enable), 0,
- ipfw_chg_hook, "I", "Enable ipfw");
-#ifdef INET6
-SYSCTL_DECL(_net_inet6_ip6_fw);
-SYSCTL_VNET_PROC(_net_inet6_ip6_fw, OID_AUTO, enable,
- CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE3, &VNET_NAME(fw6_enable), 0,
- ipfw_chg_hook, "I", "Enable ipfw+6");
-#endif /* INET6 */
-
-SYSCTL_DECL(_net_link_ether);
-SYSCTL_VNET_PROC(_net_link_ether, OID_AUTO, ipfw,
- CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE3, &VNET_NAME(fwlink_enable), 0,
- ipfw_chg_hook, "I", "Pass ether pkts through firewall");
-
-SYSEND
-
-#endif /* SYSCTL_NODE */
-
-/*
- * The pfilter hook to pass packets to ipfw_chk and then to
- * dummynet, divert, netgraph or other modules.
- * The packet may be consumed.
- */
-static int
-ipfw_check_packet(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir,
- struct inpcb *inp)
-{
- struct ip_fw_args args;
- struct m_tag *tag;
- int ipfw;
- int ret;
-
- /* all the processing now uses ip_len in net format */
- if (mtod(*m0, struct ip *)->ip_v == 4)
- SET_NET_IPLEN(mtod(*m0, struct ip *));
-
- /* convert dir to IPFW values */
- dir = (dir == PFIL_IN) ? DIR_IN : DIR_OUT;
- bzero(&args, sizeof(args));
-
-again:
- /*
- * extract and remove the tag if present. If we are left
- * with onepass, optimize the outgoing path.
- */
- tag = m_tag_locate(*m0, MTAG_IPFW_RULE, 0, NULL);
- if (tag != NULL) {
- args.rule = *((struct ipfw_rule_ref *)(tag+1));
- m_tag_delete(*m0, tag);
- if (args.rule.info & IPFW_ONEPASS) {
- if (mtod(*m0, struct ip *)->ip_v == 4)
- SET_HOST_IPLEN(mtod(*m0, struct ip *));
- return (0);
- }
- }
-
- args.m = *m0;
- args.oif = dir == DIR_OUT ? ifp : NULL;
- args.inp = inp;
-
- ipfw = ipfw_chk(&args);
- *m0 = args.m;
-
- KASSERT(*m0 != NULL || ipfw == IP_FW_DENY, ("%s: m0 is NULL",
- __func__));
-
- /* breaking out of the switch means drop */
- ret = 0; /* default return value for pass */
- switch (ipfw) {
- case IP_FW_PASS:
- /* next_hop may be set by ipfw_chk */
- if (args.next_hop == NULL && args.next_hop6 == NULL)
- break; /* pass */
-#if !defined(IPFIREWALL_FORWARD) || (!defined(INET6) && !defined(INET))
- ret = EACCES;
-#else
- {
- struct m_tag *fwd_tag;
- size_t len;
-
- KASSERT(args.next_hop == NULL || args.next_hop6 == NULL,
- ("%s: both next_hop=%p and next_hop6=%p not NULL", __func__,
- args.next_hop, args.next_hop6));
-#ifdef INET6
- if (args.next_hop6 != NULL)
- len = sizeof(struct sockaddr_in6);
-#endif
-#ifdef INET
- if (args.next_hop != NULL)
- len = sizeof(struct sockaddr_in);
-#endif
-
- /* Incoming packets should not be tagged so we do not
- * m_tag_find. Outgoing packets may be tagged, so we
- * reuse the tag if present.
- */
- fwd_tag = (dir == DIR_IN) ? NULL :
- m_tag_find(*m0, PACKET_TAG_IPFORWARD, NULL);
- if (fwd_tag != NULL) {
- m_tag_unlink(*m0, fwd_tag);
- } else {
- fwd_tag = m_tag_get(PACKET_TAG_IPFORWARD, len,
- M_NOWAIT);
- if (fwd_tag == NULL) {
- ret = EACCES;
- break; /* i.e. drop */
- }
- }
-#ifdef INET6
- if (args.next_hop6 != NULL) {
- bcopy(args.next_hop6, (fwd_tag+1), len);
- if (in6_localip(&args.next_hop6->sin6_addr))
- (*m0)->m_flags |= M_FASTFWD_OURS;
- }
-#endif
-#ifdef INET
- if (args.next_hop != NULL) {
- bcopy(args.next_hop, (fwd_tag+1), len);
- if (in_localip(args.next_hop->sin_addr))
- (*m0)->m_flags |= M_FASTFWD_OURS;
- }
-#endif
- m_tag_prepend(*m0, fwd_tag);
- }
-#endif /* IPFIREWALL_FORWARD */
- break;
-
- case IP_FW_DENY:
- ret = EACCES;
- break; /* i.e. drop */
-
- case IP_FW_DUMMYNET:
- ret = EACCES;
- if (ip_dn_io_ptr == NULL)
- break; /* i.e. drop */
- if (mtod(*m0, struct ip *)->ip_v == 4)
- ret = ip_dn_io_ptr(m0, dir, &args);
- else if (mtod(*m0, struct ip *)->ip_v == 6)
- ret = ip_dn_io_ptr(m0, dir | PROTO_IPV6, &args);
- else
- break; /* drop it */
- /*
- * XXX should read the return value.
- * dummynet normally eats the packet and sets *m0=NULL
- * unless the packet can be sent immediately. In this
- * case args is updated and we should re-run the
- * check without clearing args.
- */
- if (*m0 != NULL)
- goto again;
- break;
-
- case IP_FW_TEE:
- case IP_FW_DIVERT:
- if (ip_divert_ptr == NULL) {
- ret = EACCES;
- break; /* i.e. drop */
- }
- ret = ipfw_divert(m0, dir, &args.rule,
- (ipfw == IP_FW_TEE) ? 1 : 0);
- /* continue processing for the original packet (tee). */
- if (*m0)
- goto again;
- break;
-
- case IP_FW_NGTEE:
- case IP_FW_NETGRAPH:
- if (ng_ipfw_input_p == NULL) {
- ret = EACCES;
- break; /* i.e. drop */
- }
- ret = ng_ipfw_input_p(m0, dir, &args,
- (ipfw == IP_FW_NGTEE) ? 1 : 0);
- if (ipfw == IP_FW_NGTEE) /* ignore errors for NGTEE */
- goto again; /* continue with packet */
- break;
-
- case IP_FW_NAT:
- /* honor one-pass in case of successful nat */
- if (V_fw_one_pass)
- break; /* ret is already 0 */
- goto again;
-
- case IP_FW_REASS:
- goto again; /* continue with packet */
-
- default:
- KASSERT(0, ("%s: unknown retval", __func__));
- }
-
- if (ret != 0) {
- if (*m0)
- FREE_PKT(*m0);
- *m0 = NULL;
- }
- if (*m0 && mtod(*m0, struct ip *)->ip_v == 4)
- SET_HOST_IPLEN(mtod(*m0, struct ip *));
- return ret;
-}
-
-/*
- * ipfw processing for ethernet packets (in and out).
- * Inteface is NULL from ether_demux, and ifp from
- * ether_output_frame.
- */
-static int
-ipfw_check_frame(void *arg, struct mbuf **m0, struct ifnet *dst, int dir,
- struct inpcb *inp)
-{
- struct ether_header *eh;
- struct ether_header save_eh;
- struct mbuf *m;
- int i, ret;
- struct ip_fw_args args;
- struct m_tag *mtag;
-
- /* fetch start point from rule, if any */
- mtag = m_tag_locate(*m0, MTAG_IPFW_RULE, 0, NULL);
- if (mtag == NULL) {
- args.rule.slot = 0;
- } else {
- /* dummynet packet, already partially processed */
- struct ipfw_rule_ref *r;
-
- /* XXX can we free it after use ? */
- mtag->m_tag_id = PACKET_TAG_NONE;
- r = (struct ipfw_rule_ref *)(mtag + 1);
- if (r->info & IPFW_ONEPASS)
- return (0);
- args.rule = *r;
- }
-
- /* I need some amt of data to be contiguous */
- m = *m0;
- i = min(m->m_pkthdr.len, max_protohdr);
- if (m->m_len < i) {
- m = m_pullup(m, i);
- if (m == NULL) {
- *m0 = m;
- return (0);
- }
- }
- eh = mtod(m, struct ether_header *);
- save_eh = *eh; /* save copy for restore below */
- m_adj(m, ETHER_HDR_LEN); /* strip ethernet header */
-
- args.m = m; /* the packet we are looking at */
- args.oif = dst; /* destination, if any */
- args.next_hop = NULL; /* we do not support forward yet */
- args.next_hop6 = NULL; /* we do not support forward yet */
- args.eh = &save_eh; /* MAC header for bridged/MAC packets */
- args.inp = NULL; /* used by ipfw uid/gid/jail rules */
- i = ipfw_chk(&args);
- m = args.m;
- if (m != NULL) {
- /*
- * Restore Ethernet header, as needed, in case the
- * mbuf chain was replaced by ipfw.
- */
- M_PREPEND(m, ETHER_HDR_LEN, M_DONTWAIT);
- if (m == NULL) {
- *m0 = NULL;
- return (0);
- }
- if (eh != mtod(m, struct ether_header *))
- bcopy(&save_eh, mtod(m, struct ether_header *),
- ETHER_HDR_LEN);
- }
- *m0 = m;
-
- ret = 0;
- /* Check result of ipfw_chk() */
- switch (i) {
- case IP_FW_PASS:
- break;
-
- case IP_FW_DENY:
- ret = EACCES;
- break; /* i.e. drop */
-
- case IP_FW_DUMMYNET:
- ret = EACCES;
- int dir;
-
- if (ip_dn_io_ptr == NULL)
- break; /* i.e. drop */
-
- *m0 = NULL;
- dir = PROTO_LAYER2 | (dst ? DIR_OUT : DIR_IN);
- ip_dn_io_ptr(&m, dir, &args);
- return 0;
-
- default:
- KASSERT(0, ("%s: unknown retval", __func__));
- }
-
- if (ret != 0) {
- if (*m0)
- FREE_PKT(*m0);
- *m0 = NULL;
- }
-
- return ret;
-}
-
-/* do the divert, return 1 on error 0 on success */
-static int
-ipfw_divert(struct mbuf **m0, int incoming, struct ipfw_rule_ref *rule,
- int tee)
-{
- /*
- * ipfw_chk() has already tagged the packet with the divert tag.
- * If tee is set, copy packet and return original.
- * If not tee, consume packet and send it to divert socket.
- */
- struct mbuf *clone;
- struct ip *ip = mtod(*m0, struct ip *);
- struct m_tag *tag;
-
- /* Cloning needed for tee? */
- if (tee == 0) {
- clone = *m0; /* use the original mbuf */
- *m0 = NULL;
- } else {
- clone = m_dup(*m0, M_DONTWAIT);
- /* If we cannot duplicate the mbuf, we sacrifice the divert
- * chain and continue with the tee-ed packet.
- */
- if (clone == NULL)
- return 1;
- }
-
- /*
- * Divert listeners can normally handle non-fragmented packets,
- * but we can only reass in the non-tee case.
- * This means that listeners on a tee rule may get fragments,
- * and have to live with that.
- * Note that we now have the 'reass' ipfw option so if we care
- * we can do it before a 'tee'.
- */
- if (!tee) switch (ip->ip_v) {
- case IPVERSION:
- if (ntohs(ip->ip_off) & (IP_MF | IP_OFFMASK)) {
- int hlen;
- struct mbuf *reass;
-
- SET_HOST_IPLEN(ip); /* ip_reass wants host order */
- reass = ip_reass(clone); /* Reassemble packet. */
- if (reass == NULL)
- return 0; /* not an error */
- /* if reass = NULL then it was consumed by ip_reass */
- /*
- * IP header checksum fixup after reassembly and leave header
- * in network byte order.
- */
- ip = mtod(reass, struct ip *);
- hlen = ip->ip_hl << 2;
- SET_NET_IPLEN(ip);
- ip->ip_sum = 0;
- if (hlen == sizeof(struct ip))
- ip->ip_sum = in_cksum_hdr(ip);
- else
- ip->ip_sum = in_cksum(reass, hlen);
- clone = reass;
- }
- break;
-#ifdef INET6
- case IPV6_VERSION >> 4:
- {
- struct ip6_hdr *const ip6 = mtod(clone, struct ip6_hdr *);
-
- if (ip6->ip6_nxt == IPPROTO_FRAGMENT) {
- int nxt, off;
-
- off = sizeof(struct ip6_hdr);
- nxt = frag6_input(&clone, &off, 0);
- if (nxt == IPPROTO_DONE)
- return (0);
- }
- break;
- }
-#endif
- }
-
- /* attach a tag to the packet with the reinject info */
- tag = m_tag_alloc(MTAG_IPFW_RULE, 0,
- sizeof(struct ipfw_rule_ref), M_NOWAIT);
- if (tag == NULL) {
- FREE_PKT(clone);
- return 1;
- }
- *((struct ipfw_rule_ref *)(tag+1)) = *rule;
- m_tag_prepend(clone, tag);
-
- /* Do the dirty job... */
- ip_divert_ptr(clone, incoming);
- return 0;
-}
-
-/*
- * attach or detach hooks for a given protocol family
- */
-static int
-ipfw_hook(int onoff, int pf)
-{
- struct pfil_head *pfh;
- void *hook_func;
-
- pfh = pfil_head_get(PFIL_TYPE_AF, pf);
- if (pfh == NULL)
- return ENOENT;
-
- hook_func = (pf == AF_LINK) ? ipfw_check_frame : ipfw_check_packet;
-
- (void) (onoff ? pfil_add_hook : pfil_remove_hook)
- (hook_func, NULL, PFIL_IN | PFIL_OUT | PFIL_WAITOK, pfh);
-
- return 0;
-}
-
-int
-ipfw_attach_hooks(int arg)
-{
- int error = 0;
-
- if (arg == 0) /* detach */
- ipfw_hook(0, AF_INET);
- else if (V_fw_enable && ipfw_hook(1, AF_INET) != 0) {
- error = ENOENT; /* see ip_fw_pfil.c::ipfw_hook() */
- printf("ipfw_hook() error\n");
- }
-#ifdef INET6
- if (arg == 0) /* detach */
- ipfw_hook(0, AF_INET6);
- else if (V_fw6_enable && ipfw_hook(1, AF_INET6) != 0) {
- error = ENOENT;
- printf("ipfw6_hook() error\n");
- }
-#endif
- if (arg == 0) /* detach */
- ipfw_hook(0, AF_LINK);
- else if (V_fwlink_enable && ipfw_hook(1, AF_LINK) != 0) {
- error = ENOENT;
- printf("ipfw_link_hook() error\n");
- }
- return error;
-}
-
-int
-ipfw_chg_hook(SYSCTL_HANDLER_ARGS)
-{
- int *enable;
- int newval;
- int error;
- int af;
-
- if (arg1 == &VNET_NAME(fw_enable)) {
- enable = &V_fw_enable;
- af = AF_INET;
- }
-#ifdef INET6
- else if (arg1 == &VNET_NAME(fw6_enable)) {
- enable = &V_fw6_enable;
- af = AF_INET6;
- }
-#endif
- else if (arg1 == &VNET_NAME(fwlink_enable)) {
- enable = &V_fwlink_enable;
- af = AF_LINK;
- }
- else
- return (EINVAL);
-
- newval = *enable;
-
- /* Handle sysctl change */
- error = sysctl_handle_int(oidp, &newval, 0, req);
-
- if (error)
- return (error);
-
- /* Formalize new value */
- newval = (newval) ? 1 : 0;
-
- if (*enable == newval)
- return (0);
-
- error = ipfw_hook(newval, af);
- if (error)
- return (error);
- *enable = newval;
-
- return (0);
-}
-/* end of file */
diff --git a/sys/netinet/ipfw/ip_fw_private.h b/sys/netinet/ipfw/ip_fw_private.h
deleted file mode 100644
index fb13a72..0000000
--- a/sys/netinet/ipfw/ip_fw_private.h
+++ /dev/null
@@ -1,309 +0,0 @@
-/*-
- * Copyright (c) 2002-2009 Luigi Rizzo, Universita` di Pisa
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-
-#ifndef _IPFW2_PRIVATE_H
-#define _IPFW2_PRIVATE_H
-
-/*
- * Internal constants and data structures used by ipfw components
- * and not meant to be exported outside the kernel.
- */
-
-#ifdef _KERNEL
-
-/*
- * For platforms that do not have SYSCTL support, we wrap the
- * SYSCTL_* into a function (one per file) to collect the values
- * into an array at module initialization. The wrapping macros,
- * SYSBEGIN() and SYSEND, are empty in the default case.
- */
-#ifndef SYSBEGIN
-#define SYSBEGIN(x)
-#endif
-#ifndef SYSEND
-#define SYSEND
-#endif
-
-/* Return values from ipfw_chk() */
-enum {
- IP_FW_PASS = 0,
- IP_FW_DENY,
- IP_FW_DIVERT,
- IP_FW_TEE,
- IP_FW_DUMMYNET,
- IP_FW_NETGRAPH,
- IP_FW_NGTEE,
- IP_FW_NAT,
- IP_FW_REASS,
-};
-
-/*
- * Structure for collecting parameters to dummynet for ip6_output forwarding
- */
-struct _ip6dn_args {
- struct ip6_pktopts *opt_or;
- struct route_in6 ro_or;
- int flags_or;
- struct ip6_moptions *im6o_or;
- struct ifnet *origifp_or;
- struct ifnet *ifp_or;
- struct sockaddr_in6 dst_or;
- u_long mtu_or;
- struct route_in6 ro_pmtu_or;
-};
-
-
-/*
- * Arguments for calling ipfw_chk() and dummynet_io(). We put them
- * all into a structure because this way it is easier and more
- * efficient to pass variables around and extend the interface.
- */
-struct ip_fw_args {
- struct mbuf *m; /* the mbuf chain */
- struct ifnet *oif; /* output interface */
- struct sockaddr_in *next_hop; /* forward address */
- struct sockaddr_in6 *next_hop6; /* ipv6 forward address */
-
- /*
- * On return, it points to the matching rule.
- * On entry, rule.slot > 0 means the info is valid and
- * contains the starting rule for an ipfw search.
- * If chain_id == chain->id && slot >0 then jump to that slot.
- * Otherwise, we locate the first rule >= rulenum:rule_id
- */
- struct ipfw_rule_ref rule; /* match/restart info */
-
- struct ether_header *eh; /* for bridged packets */
-
- struct ipfw_flow_id f_id; /* grabbed from IP header */
- //uint32_t cookie; /* a cookie depending on rule action */
- struct inpcb *inp;
-
- struct _ip6dn_args dummypar; /* dummynet->ip6_output */
- struct sockaddr_in hopstore; /* store here if cannot use a pointer */
-};
-
-MALLOC_DECLARE(M_IPFW);
-
-/*
- * Hooks sometime need to know the direction of the packet
- * (divert, dummynet, netgraph, ...)
- * We use a generic definition here, with bit0-1 indicating the
- * direction, bit 2 indicating layer2 or 3, bit 3-4 indicating the
- * specific protocol
- * indicating the protocol (if necessary)
- */
-enum {
- DIR_MASK = 0x3,
- DIR_OUT = 0,
- DIR_IN = 1,
- DIR_FWD = 2,
- DIR_DROP = 3,
- PROTO_LAYER2 = 0x4, /* set for layer 2 */
- /* PROTO_DEFAULT = 0, */
- PROTO_IPV4 = 0x08,
- PROTO_IPV6 = 0x10,
- PROTO_IFB = 0x0c, /* layer2 + ifbridge */
- /* PROTO_OLDBDG = 0x14, unused, old bridge */
-};
-
-/* wrapper for freeing a packet, in case we need to do more work */
-#ifndef FREE_PKT
-#if defined(__linux__) || defined(_WIN32)
-#define FREE_PKT(m) netisr_dispatch(-1, m)
-#else
-#define FREE_PKT(m) m_freem(m)
-#endif
-#endif /* !FREE_PKT */
-
-/*
- * Function definitions.
- */
-
-/* attach (arg = 1) or detach (arg = 0) hooks */
-int ipfw_attach_hooks(int);
-#ifdef NOTYET
-void ipfw_nat_destroy(void);
-#endif
-
-/* In ip_fw_log.c */
-struct ip;
-void ipfw_log_bpf(int);
-void ipfw_log(struct ip_fw *f, u_int hlen, struct ip_fw_args *args,
- struct mbuf *m, struct ifnet *oif, u_short offset, uint32_t tablearg,
- struct ip *ip);
-VNET_DECLARE(u_int64_t, norule_counter);
-#define V_norule_counter VNET(norule_counter)
-VNET_DECLARE(int, verbose_limit);
-#define V_verbose_limit VNET(verbose_limit)
-
-/* In ip_fw_dynamic.c */
-
-enum { /* result for matching dynamic rules */
- MATCH_REVERSE = 0,
- MATCH_FORWARD,
- MATCH_NONE,
- MATCH_UNKNOWN,
-};
-
-/*
- * The lock for dynamic rules is only used once outside the file,
- * and only to release the result of lookup_dyn_rule().
- * Eventually we may implement it with a callback on the function.
- */
-void ipfw_dyn_unlock(void);
-
-struct tcphdr;
-struct mbuf *ipfw_send_pkt(struct mbuf *, struct ipfw_flow_id *,
- u_int32_t, u_int32_t, int);
-int ipfw_install_state(struct ip_fw *rule, ipfw_insn_limit *cmd,
- struct ip_fw_args *args, uint32_t tablearg);
-ipfw_dyn_rule *ipfw_lookup_dyn_rule(struct ipfw_flow_id *pkt,
- int *match_direction, struct tcphdr *tcp);
-void ipfw_remove_dyn_children(struct ip_fw *rule);
-void ipfw_get_dynamic(char **bp, const char *ep);
-
-void ipfw_dyn_attach(void); /* uma_zcreate .... */
-void ipfw_dyn_detach(void); /* uma_zdestroy ... */
-void ipfw_dyn_init(void); /* per-vnet initialization */
-void ipfw_dyn_uninit(int); /* per-vnet deinitialization */
-int ipfw_dyn_len(void);
-
-/* common variables */
-VNET_DECLARE(int, fw_one_pass);
-#define V_fw_one_pass VNET(fw_one_pass)
-
-VNET_DECLARE(int, fw_verbose);
-#define V_fw_verbose VNET(fw_verbose)
-
-VNET_DECLARE(struct ip_fw_chain, layer3_chain);
-#define V_layer3_chain VNET(layer3_chain)
-
-VNET_DECLARE(u_int32_t, set_disable);
-#define V_set_disable VNET(set_disable)
-
-VNET_DECLARE(int, autoinc_step);
-#define V_autoinc_step VNET(autoinc_step)
-
-VNET_DECLARE(unsigned int, fw_tables_max);
-#define V_fw_tables_max VNET(fw_tables_max)
-
-struct ip_fw_chain {
- struct ip_fw *rules; /* list of rules */
- struct ip_fw *reap; /* list of rules to reap */
- struct ip_fw *default_rule;
- int n_rules; /* number of static rules */
- int static_len; /* total len of static rules */
- struct ip_fw **map; /* array of rule ptrs to ease lookup */
- LIST_HEAD(nat_list, cfg_nat) nat; /* list of nat entries */
- struct radix_node_head **tables; /* IPv4 tables */
- struct radix_node_head **xtables; /* extended tables */
- uint8_t *tabletype; /* Array of table types */
-#if defined( __linux__ ) || defined( _WIN32 )
- spinlock_t rwmtx;
- spinlock_t uh_lock;
-#else
- struct rwlock rwmtx;
- struct rwlock uh_lock; /* lock for upper half */
-#endif
- uint32_t id; /* ruleset id */
- uint32_t gencnt; /* generation count */
-};
-
-struct sockopt; /* used by tcp_var.h */
-
-/*
- * The lock is heavily used by ip_fw2.c (the main file) and ip_fw_nat.c
- * so the variable and the macros must be here.
- */
-
-#define IPFW_LOCK_INIT(_chain) do { \
- rw_init(&(_chain)->rwmtx, "IPFW static rules"); \
- rw_init(&(_chain)->uh_lock, "IPFW UH lock"); \
- } while (0)
-
-#define IPFW_LOCK_DESTROY(_chain) do { \
- rw_destroy(&(_chain)->rwmtx); \
- rw_destroy(&(_chain)->uh_lock); \
- } while (0)
-
-#define IPFW_WLOCK_ASSERT(_chain) rw_assert(&(_chain)->rwmtx, RA_WLOCKED)
-
-#define IPFW_RLOCK(p) rw_rlock(&(p)->rwmtx)
-#define IPFW_RUNLOCK(p) rw_runlock(&(p)->rwmtx)
-#define IPFW_WLOCK(p) rw_wlock(&(p)->rwmtx)
-#define IPFW_WUNLOCK(p) rw_wunlock(&(p)->rwmtx)
-
-#define IPFW_UH_RLOCK(p) rw_rlock(&(p)->uh_lock)
-#define IPFW_UH_RUNLOCK(p) rw_runlock(&(p)->uh_lock)
-#define IPFW_UH_WLOCK(p) rw_wlock(&(p)->uh_lock)
-#define IPFW_UH_WUNLOCK(p) rw_wunlock(&(p)->uh_lock)
-
-/* In ip_fw_sockopt.c */
-int ipfw_find_rule(struct ip_fw_chain *chain, uint32_t key, uint32_t id);
-int ipfw_add_rule(struct ip_fw_chain *chain, struct ip_fw *input_rule);
-int ipfw_ctl(struct sockopt *sopt);
-int ipfw_chk(struct ip_fw_args *args);
-void ipfw_reap_rules(struct ip_fw *head);
-
-/* In ip_fw_table.c */
-struct radix_node;
-int ipfw_lookup_table(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr,
- uint32_t *val);
-int ipfw_lookup_table_extended(struct ip_fw_chain *ch, uint16_t tbl, void *paddr,
- uint32_t *val, int type);
-int ipfw_init_tables(struct ip_fw_chain *ch);
-void ipfw_destroy_tables(struct ip_fw_chain *ch);
-int ipfw_flush_table(struct ip_fw_chain *ch, uint16_t tbl);
-int ipfw_add_table_entry(struct ip_fw_chain *ch, uint16_t tbl, void *paddr,
- uint8_t plen, uint8_t mlen, uint8_t type, uint32_t value);
-int ipfw_del_table_entry(struct ip_fw_chain *ch, uint16_t tbl, void *paddr,
- uint8_t plen, uint8_t mlen, uint8_t type);
-int ipfw_count_table(struct ip_fw_chain *ch, uint32_t tbl, uint32_t *cnt);
-int ipfw_dump_table_entry(struct radix_node *rn, void *arg);
-int ipfw_dump_table(struct ip_fw_chain *ch, ipfw_table *tbl);
-int ipfw_count_xtable(struct ip_fw_chain *ch, uint32_t tbl, uint32_t *cnt);
-int ipfw_dump_xtable(struct ip_fw_chain *ch, ipfw_xtable *tbl);
-int ipfw_resize_tables(struct ip_fw_chain *ch, unsigned int ntables);
-
-/* In ip_fw_nat.c -- XXX to be moved to ip_var.h */
-
-extern struct cfg_nat *(*lookup_nat_ptr)(struct nat_list *, int);
-
-typedef int ipfw_nat_t(struct ip_fw_args *, struct cfg_nat *, struct mbuf *);
-typedef int ipfw_nat_cfg_t(struct sockopt *);
-
-extern ipfw_nat_t *ipfw_nat_ptr;
-#define IPFW_NAT_LOADED (ipfw_nat_ptr != NULL)
-
-extern ipfw_nat_cfg_t *ipfw_nat_cfg_ptr;
-extern ipfw_nat_cfg_t *ipfw_nat_del_ptr;
-extern ipfw_nat_cfg_t *ipfw_nat_get_cfg_ptr;
-extern ipfw_nat_cfg_t *ipfw_nat_get_log_ptr;
-
-#endif /* _KERNEL */
-#endif /* _IPFW2_PRIVATE_H */
diff --git a/sys/netinet/ipfw/ip_fw_sockopt.c b/sys/netinet/ipfw/ip_fw_sockopt.c
deleted file mode 100644
index 2a5f4e7..0000000
--- a/sys/netinet/ipfw/ip_fw_sockopt.c
+++ /dev/null
@@ -1,1448 +0,0 @@
-/*-
- * Copyright (c) 2002-2009 Luigi Rizzo, Universita` di Pisa
- *
- * Supported by: Valeria Paoli
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-/*
- * Sockopt support for ipfw. The routines here implement
- * the upper half of the ipfw code.
- */
-
-#include "opt_ipfw.h"
-#include "opt_inet.h"
-#ifndef INET
-#error IPFIREWALL requires INET.
-#endif /* INET */
-#include "opt_inet6.h"
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/malloc.h>
-#include <sys/mbuf.h> /* struct m_tag used by nested headers */
-#include <sys/kernel.h>
-#include <sys/lock.h>
-#include <sys/priv.h>
-#include <sys/proc.h>
-#include <sys/rwlock.h>
-#include <sys/socket.h>
-#include <sys/socketvar.h>
-#include <sys/sysctl.h>
-#include <sys/syslog.h>
-#include <net/if.h>
-#include <net/route.h>
-#include <net/vnet.h>
-
-#include <netinet/in.h>
-#include <netinet/ip_var.h> /* hooks */
-#include <netinet/ip_fw.h>
-#include <netinet/ipfw/ip_fw_private.h>
-
-#ifdef MAC
-#include <security/mac/mac_framework.h>
-#endif
-
-MALLOC_DEFINE(M_IPFW, "IpFw/IpAcct", "IpFw/IpAcct chain's");
-
-/*
- * static variables followed by global ones (none in this file)
- */
-
-/*
- * Find the smallest rule >= key, id.
- * We could use bsearch but it is so simple that we code it directly
- */
-int
-ipfw_find_rule(struct ip_fw_chain *chain, uint32_t key, uint32_t id)
-{
- int i, lo, hi;
- struct ip_fw *r;
-
- for (lo = 0, hi = chain->n_rules - 1; lo < hi;) {
- i = (lo + hi) / 2;
- r = chain->map[i];
- if (r->rulenum < key)
- lo = i + 1; /* continue from the next one */
- else if (r->rulenum > key)
- hi = i; /* this might be good */
- else if (r->id < id)
- lo = i + 1; /* continue from the next one */
- else /* r->id >= id */
- hi = i; /* this might be good */
- };
- return hi;
-}
-
-/*
- * allocate a new map, returns the chain locked. extra is the number
- * of entries to add or delete.
- */
-static struct ip_fw **
-get_map(struct ip_fw_chain *chain, int extra, int locked)
-{
-
- for (;;) {
- struct ip_fw **map;
- int i;
-
- i = chain->n_rules + extra;
- map = malloc(i * sizeof(struct ip_fw *), M_IPFW,
- locked ? M_NOWAIT : M_WAITOK);
- if (map == NULL) {
- printf("%s: cannot allocate map\n", __FUNCTION__);
- return NULL;
- }
- if (!locked)
- IPFW_UH_WLOCK(chain);
- if (i >= chain->n_rules + extra) /* good */
- return map;
- /* otherwise we lost the race, free and retry */
- if (!locked)
- IPFW_UH_WUNLOCK(chain);
- free(map, M_IPFW);
- }
-}
-
-/*
- * swap the maps. It is supposed to be called with IPFW_UH_WLOCK
- */
-static struct ip_fw **
-swap_map(struct ip_fw_chain *chain, struct ip_fw **new_map, int new_len)
-{
- struct ip_fw **old_map;
-
- IPFW_WLOCK(chain);
- chain->id++;
- chain->n_rules = new_len;
- old_map = chain->map;
- chain->map = new_map;
- IPFW_WUNLOCK(chain);
- return old_map;
-}
-
-/*
- * Add a new rule to the list. Copy the rule into a malloc'ed area, then
- * possibly create a rule number and add the rule to the list.
- * Update the rule_number in the input struct so the caller knows it as well.
- * XXX DO NOT USE FOR THE DEFAULT RULE.
- * Must be called without IPFW_UH held
- */
-int
-ipfw_add_rule(struct ip_fw_chain *chain, struct ip_fw *input_rule)
-{
- struct ip_fw *rule;
- int i, l, insert_before;
- struct ip_fw **map; /* the new array of pointers */
-
- if (chain->rules == NULL || input_rule->rulenum > IPFW_DEFAULT_RULE-1)
- return (EINVAL);
-
- l = RULESIZE(input_rule);
- rule = malloc(l, M_IPFW, M_WAITOK | M_ZERO);
- /* get_map returns with IPFW_UH_WLOCK if successful */
- map = get_map(chain, 1, 0 /* not locked */);
- if (map == NULL) {
- free(rule, M_IPFW);
- return ENOSPC;
- }
-
- bcopy(input_rule, rule, l);
- /* clear fields not settable from userland */
- rule->x_next = NULL;
- rule->next_rule = NULL;
- rule->pcnt = 0;
- rule->bcnt = 0;
- rule->timestamp = 0;
-
- if (V_autoinc_step < 1)
- V_autoinc_step = 1;
- else if (V_autoinc_step > 1000)
- V_autoinc_step = 1000;
- /* find the insertion point, we will insert before */
- insert_before = rule->rulenum ? rule->rulenum + 1 : IPFW_DEFAULT_RULE;
- i = ipfw_find_rule(chain, insert_before, 0);
- /* duplicate first part */
- if (i > 0)
- bcopy(chain->map, map, i * sizeof(struct ip_fw *));
- map[i] = rule;
- /* duplicate remaining part, we always have the default rule */
- bcopy(chain->map + i, map + i + 1,
- sizeof(struct ip_fw *) *(chain->n_rules - i));
- if (rule->rulenum == 0) {
- /* write back the number */
- rule->rulenum = i > 0 ? map[i-1]->rulenum : 0;
- if (rule->rulenum < IPFW_DEFAULT_RULE - V_autoinc_step)
- rule->rulenum += V_autoinc_step;
- input_rule->rulenum = rule->rulenum;
- }
-
- rule->id = chain->id + 1;
- map = swap_map(chain, map, chain->n_rules + 1);
- chain->static_len += l;
- IPFW_UH_WUNLOCK(chain);
- if (map)
- free(map, M_IPFW);
- return (0);
-}
-
-/*
- * Reclaim storage associated with a list of rules. This is
- * typically the list created using remove_rule.
- * A NULL pointer on input is handled correctly.
- */
-void
-ipfw_reap_rules(struct ip_fw *head)
-{
- struct ip_fw *rule;
-
- while ((rule = head) != NULL) {
- head = head->x_next;
- free(rule, M_IPFW);
- }
-}
-
-/*
- * Used by del_entry() to check if a rule should be kept.
- * Returns 1 if the rule must be kept, 0 otherwise.
- *
- * Called with cmd = {0,1,5}.
- * cmd == 0 matches on rule numbers, excludes rules in RESVD_SET if n == 0 ;
- * cmd == 1 matches on set numbers only, rule numbers are ignored;
- * cmd == 5 matches on rule and set numbers.
- *
- * n == 0 is a wildcard for rule numbers, there is no wildcard for sets.
- *
- * Rules to keep are
- * (default || reserved || !match_set || !match_number)
- * where
- * default ::= (rule->rulenum == IPFW_DEFAULT_RULE)
- * // the default rule is always protected
- *
- * reserved ::= (cmd == 0 && n == 0 && rule->set == RESVD_SET)
- * // RESVD_SET is protected only if cmd == 0 and n == 0 ("ipfw flush")
- *
- * match_set ::= (cmd == 0 || rule->set == set)
- * // set number is ignored for cmd == 0
- *
- * match_number ::= (cmd == 1 || n == 0 || n == rule->rulenum)
- * // number is ignored for cmd == 1 or n == 0
- *
- */
-static int
-keep_rule(struct ip_fw *rule, uint8_t cmd, uint8_t set, uint32_t n)
-{
- return
- (rule->rulenum == IPFW_DEFAULT_RULE) ||
- (cmd == 0 && n == 0 && rule->set == RESVD_SET) ||
- !(cmd == 0 || rule->set == set) ||
- !(cmd == 1 || n == 0 || n == rule->rulenum);
-}
-
-/**
- * Remove all rules with given number, or do set manipulation.
- * Assumes chain != NULL && *chain != NULL.
- *
- * The argument is an uint32_t. The low 16 bit are the rule or set number;
- * the next 8 bits are the new set; the top 8 bits indicate the command:
- *
- * 0 delete rules numbered "rulenum"
- * 1 delete rules in set "rulenum"
- * 2 move rules "rulenum" to set "new_set"
- * 3 move rules from set "rulenum" to set "new_set"
- * 4 swap sets "rulenum" and "new_set"
- * 5 delete rules "rulenum" and set "new_set"
- */
-static int
-del_entry(struct ip_fw_chain *chain, uint32_t arg)
-{
- struct ip_fw *rule;
- uint32_t num; /* rule number or old_set */
- uint8_t cmd, new_set;
- int start, end, i, ofs, n;
- struct ip_fw **map = NULL;
- int error = 0;
-
- num = arg & 0xffff;
- cmd = (arg >> 24) & 0xff;
- new_set = (arg >> 16) & 0xff;
-
- if (cmd > 5 || new_set > RESVD_SET)
- return EINVAL;
- if (cmd == 0 || cmd == 2 || cmd == 5) {
- if (num >= IPFW_DEFAULT_RULE)
- return EINVAL;
- } else {
- if (num > RESVD_SET) /* old_set */
- return EINVAL;
- }
-
- IPFW_UH_WLOCK(chain); /* arbitrate writers */
- chain->reap = NULL; /* prepare for deletions */
-
- switch (cmd) {
- case 0: /* delete rules "num" (num == 0 matches all) */
- case 1: /* delete all rules in set N */
- case 5: /* delete rules with number N and set "new_set". */
-
- /*
- * Locate first rule to delete (start), the rule after
- * the last one to delete (end), and count how many
- * rules to delete (n). Always use keep_rule() to
- * determine which rules to keep.
- */
- n = 0;
- if (cmd == 1) {
- /* look for a specific set including RESVD_SET.
- * Must scan the entire range, ignore num.
- */
- new_set = num;
- for (start = -1, end = i = 0; i < chain->n_rules; i++) {
- if (keep_rule(chain->map[i], cmd, new_set, 0))
- continue;
- if (start < 0)
- start = i;
- end = i;
- n++;
- }
- end++; /* first non-matching */
- } else {
- /* Optimized search on rule numbers */
- start = ipfw_find_rule(chain, num, 0);
- for (end = start; end < chain->n_rules; end++) {
- rule = chain->map[end];
- if (num > 0 && rule->rulenum != num)
- break;
- if (!keep_rule(rule, cmd, new_set, num))
- n++;
- }
- }
-
- if (n == 0) {
- /* A flush request (arg == 0 or cmd == 1) on empty
- * ruleset returns with no error. On the contrary,
- * if there is no match on a specific request,
- * we return EINVAL.
- */
- if (arg != 0 && cmd != 1)
- error = EINVAL;
- break;
- }
-
- /* We have something to delete. Allocate the new map */
- map = get_map(chain, -n, 1 /* locked */);
- if (map == NULL) {
- error = EINVAL;
- break;
- }
-
- /* 1. bcopy the initial part of the map */
- if (start > 0)
- bcopy(chain->map, map, start * sizeof(struct ip_fw *));
- /* 2. copy active rules between start and end */
- for (i = ofs = start; i < end; i++) {
- rule = chain->map[i];
- if (keep_rule(rule, cmd, new_set, num))
- map[ofs++] = rule;
- }
- /* 3. copy the final part of the map */
- bcopy(chain->map + end, map + ofs,
- (chain->n_rules - end) * sizeof(struct ip_fw *));
- /* 4. swap the maps (under BH_LOCK) */
- map = swap_map(chain, map, chain->n_rules - n);
- /* 5. now remove the rules deleted from the old map */
- for (i = start; i < end; i++) {
- int l;
- rule = map[i];
- if (keep_rule(rule, cmd, new_set, num))
- continue;
- l = RULESIZE(rule);
- chain->static_len -= l;
- ipfw_remove_dyn_children(rule);
- rule->x_next = chain->reap;
- chain->reap = rule;
- }
- break;
-
- /*
- * In the next 3 cases the loop stops at (n_rules - 1)
- * because the default rule is never eligible..
- */
-
- case 2: /* move rules with given RULE number to new set */
- for (i = 0; i < chain->n_rules - 1; i++) {
- rule = chain->map[i];
- if (rule->rulenum == num)
- rule->set = new_set;
- }
- break;
-
- case 3: /* move rules with given SET number to new set */
- for (i = 0; i < chain->n_rules - 1; i++) {
- rule = chain->map[i];
- if (rule->set == num)
- rule->set = new_set;
- }
- break;
-
- case 4: /* swap two sets */
- for (i = 0; i < chain->n_rules - 1; i++) {
- rule = chain->map[i];
- if (rule->set == num)
- rule->set = new_set;
- else if (rule->set == new_set)
- rule->set = num;
- }
- break;
- }
-
- rule = chain->reap;
- chain->reap = NULL;
- IPFW_UH_WUNLOCK(chain);
- ipfw_reap_rules(rule);
- if (map)
- free(map, M_IPFW);
- return error;
-}
-
-/*
- * Clear counters for a specific rule.
- * Normally run under IPFW_UH_RLOCK, but these are idempotent ops
- * so we only care that rules do not disappear.
- */
-static void
-clear_counters(struct ip_fw *rule, int log_only)
-{
- ipfw_insn_log *l = (ipfw_insn_log *)ACTION_PTR(rule);
-
- if (log_only == 0) {
- rule->bcnt = rule->pcnt = 0;
- rule->timestamp = 0;
- }
- if (l->o.opcode == O_LOG)
- l->log_left = l->max_log;
-}
-
-/**
- * Reset some or all counters on firewall rules.
- * The argument `arg' is an u_int32_t. The low 16 bit are the rule number,
- * the next 8 bits are the set number, the top 8 bits are the command:
- * 0 work with rules from all set's;
- * 1 work with rules only from specified set.
- * Specified rule number is zero if we want to clear all entries.
- * log_only is 1 if we only want to reset logs, zero otherwise.
- */
-static int
-zero_entry(struct ip_fw_chain *chain, u_int32_t arg, int log_only)
-{
- struct ip_fw *rule;
- char *msg;
- int i;
-
- uint16_t rulenum = arg & 0xffff;
- uint8_t set = (arg >> 16) & 0xff;
- uint8_t cmd = (arg >> 24) & 0xff;
-
- if (cmd > 1)
- return (EINVAL);
- if (cmd == 1 && set > RESVD_SET)
- return (EINVAL);
-
- IPFW_UH_RLOCK(chain);
- if (rulenum == 0) {
- V_norule_counter = 0;
- for (i = 0; i < chain->n_rules; i++) {
- rule = chain->map[i];
- /* Skip rules not in our set. */
- if (cmd == 1 && rule->set != set)
- continue;
- clear_counters(rule, log_only);
- }
- msg = log_only ? "All logging counts reset" :
- "Accounting cleared";
- } else {
- int cleared = 0;
- for (i = 0; i < chain->n_rules; i++) {
- rule = chain->map[i];
- if (rule->rulenum == rulenum) {
- if (cmd == 0 || rule->set == set)
- clear_counters(rule, log_only);
- cleared = 1;
- }
- if (rule->rulenum > rulenum)
- break;
- }
- if (!cleared) { /* we did not find any matching rules */
- IPFW_UH_RUNLOCK(chain);
- return (EINVAL);
- }
- msg = log_only ? "logging count reset" : "cleared";
- }
- IPFW_UH_RUNLOCK(chain);
-
- if (V_fw_verbose) {
- int lev = LOG_SECURITY | LOG_NOTICE;
-
- if (rulenum)
- log(lev, "ipfw: Entry %d %s.\n", rulenum, msg);
- else
- log(lev, "ipfw: %s.\n", msg);
- }
- return (0);
-}
-
-/*
- * Check validity of the structure before insert.
- * Rules are simple, so this mostly need to check rule sizes.
- */
-static int
-check_ipfw_struct(struct ip_fw *rule, int size)
-{
- int l, cmdlen = 0;
- int have_action=0;
- ipfw_insn *cmd;
-
- if (size < sizeof(*rule)) {
- printf("ipfw: rule too short\n");
- return (EINVAL);
- }
- /* first, check for valid size */
- l = RULESIZE(rule);
- if (l != size) {
- printf("ipfw: size mismatch (have %d want %d)\n", size, l);
- return (EINVAL);
- }
- if (rule->act_ofs >= rule->cmd_len) {
- printf("ipfw: bogus action offset (%u > %u)\n",
- rule->act_ofs, rule->cmd_len - 1);
- return (EINVAL);
- }
- /*
- * Now go for the individual checks. Very simple ones, basically only
- * instruction sizes.
- */
- for (l = rule->cmd_len, cmd = rule->cmd ;
- l > 0 ; l -= cmdlen, cmd += cmdlen) {
- cmdlen = F_LEN(cmd);
- if (cmdlen > l) {
- printf("ipfw: opcode %d size truncated\n",
- cmd->opcode);
- return EINVAL;
- }
- switch (cmd->opcode) {
- case O_PROBE_STATE:
- case O_KEEP_STATE:
- case O_PROTO:
- case O_IP_SRC_ME:
- case O_IP_DST_ME:
- case O_LAYER2:
- case O_IN:
- case O_FRAG:
- case O_DIVERTED:
- case O_IPOPT:
- case O_IPTOS:
- case O_IPPRECEDENCE:
- case O_IPVER:
- case O_SOCKARG:
- case O_TCPFLAGS:
- case O_TCPOPTS:
- case O_ESTAB:
- case O_VERREVPATH:
- case O_VERSRCREACH:
- case O_ANTISPOOF:
- case O_IPSEC:
-#ifdef INET6
- case O_IP6_SRC_ME:
- case O_IP6_DST_ME:
- case O_EXT_HDR:
- case O_IP6:
-#endif
- case O_IP4:
- case O_TAG:
- if (cmdlen != F_INSN_SIZE(ipfw_insn))
- goto bad_size;
- break;
-
- case O_FIB:
- if (cmdlen != F_INSN_SIZE(ipfw_insn))
- goto bad_size;
- if (cmd->arg1 >= rt_numfibs) {
- printf("ipfw: invalid fib number %d\n",
- cmd->arg1);
- return EINVAL;
- }
- break;
-
- case O_SETFIB:
- if (cmdlen != F_INSN_SIZE(ipfw_insn))
- goto bad_size;
- if ((cmd->arg1 != IP_FW_TABLEARG) &&
- (cmd->arg1 >= rt_numfibs)) {
- printf("ipfw: invalid fib number %d\n",
- cmd->arg1);
- return EINVAL;
- }
- goto check_action;
-
- case O_UID:
- case O_GID:
- case O_JAIL:
- case O_IP_SRC:
- case O_IP_DST:
- case O_TCPSEQ:
- case O_TCPACK:
- case O_PROB:
- case O_ICMPTYPE:
- if (cmdlen != F_INSN_SIZE(ipfw_insn_u32))
- goto bad_size;
- break;
-
- case O_LIMIT:
- if (cmdlen != F_INSN_SIZE(ipfw_insn_limit))
- goto bad_size;
- break;
-
- case O_LOG:
- if (cmdlen != F_INSN_SIZE(ipfw_insn_log))
- goto bad_size;
-
- ((ipfw_insn_log *)cmd)->log_left =
- ((ipfw_insn_log *)cmd)->max_log;
-
- break;
-
- case O_IP_SRC_MASK:
- case O_IP_DST_MASK:
- /* only odd command lengths */
- if ( !(cmdlen & 1) || cmdlen > 31)
- goto bad_size;
- break;
-
- case O_IP_SRC_SET:
- case O_IP_DST_SET:
- if (cmd->arg1 == 0 || cmd->arg1 > 256) {
- printf("ipfw: invalid set size %d\n",
- cmd->arg1);
- return EINVAL;
- }
- if (cmdlen != F_INSN_SIZE(ipfw_insn_u32) +
- (cmd->arg1+31)/32 )
- goto bad_size;
- break;
-
- case O_IP_SRC_LOOKUP:
- case O_IP_DST_LOOKUP:
- if (cmd->arg1 >= IPFW_TABLES_MAX) {
- printf("ipfw: invalid table number %d\n",
- cmd->arg1);
- return (EINVAL);
- }
- if (cmdlen != F_INSN_SIZE(ipfw_insn) &&
- cmdlen != F_INSN_SIZE(ipfw_insn_u32) + 1 &&
- cmdlen != F_INSN_SIZE(ipfw_insn_u32))
- goto bad_size;
- break;
- case O_MACADDR2:
- if (cmdlen != F_INSN_SIZE(ipfw_insn_mac))
- goto bad_size;
- break;
-
- case O_NOP:
- case O_IPID:
- case O_IPTTL:
- case O_IPLEN:
- case O_TCPDATALEN:
- case O_TCPWIN:
- case O_TAGGED:
- if (cmdlen < 1 || cmdlen > 31)
- goto bad_size;
- break;
-
- case O_MAC_TYPE:
- case O_IP_SRCPORT:
- case O_IP_DSTPORT: /* XXX artificial limit, 30 port pairs */
- if (cmdlen < 2 || cmdlen > 31)
- goto bad_size;
- break;
-
- case O_RECV:
- case O_XMIT:
- case O_VIA:
- if (cmdlen != F_INSN_SIZE(ipfw_insn_if))
- goto bad_size;
- break;
-
- case O_ALTQ:
- if (cmdlen != F_INSN_SIZE(ipfw_insn_altq))
- goto bad_size;
- break;
-
- case O_PIPE:
- case O_QUEUE:
- if (cmdlen != F_INSN_SIZE(ipfw_insn))
- goto bad_size;
- goto check_action;
-
- case O_FORWARD_IP:
-#ifdef IPFIREWALL_FORWARD
- if (cmdlen != F_INSN_SIZE(ipfw_insn_sa))
- goto bad_size;
- goto check_action;
-#else
- return EINVAL;
-#endif
-
-#ifdef INET6
- case O_FORWARD_IP6:
-#ifdef IPFIREWALL_FORWARD
- if (cmdlen != F_INSN_SIZE(ipfw_insn_sa6))
- goto bad_size;
- goto check_action;
-#else
- return (EINVAL);
-#endif
-#endif /* INET6 */
-
- case O_DIVERT:
- case O_TEE:
- if (ip_divert_ptr == NULL)
- return EINVAL;
- else
- goto check_size;
- case O_NETGRAPH:
- case O_NGTEE:
- if (ng_ipfw_input_p == NULL)
- return EINVAL;
- else
- goto check_size;
- case O_NAT:
- if (!IPFW_NAT_LOADED)
- return EINVAL;
- if (cmdlen != F_INSN_SIZE(ipfw_insn_nat))
- goto bad_size;
- goto check_action;
- case O_FORWARD_MAC: /* XXX not implemented yet */
- case O_CHECK_STATE:
- case O_COUNT:
- case O_ACCEPT:
- case O_DENY:
- case O_REJECT:
-#ifdef INET6
- case O_UNREACH6:
-#endif
- case O_SKIPTO:
- case O_REASS:
- case O_CALLRETURN:
-check_size:
- if (cmdlen != F_INSN_SIZE(ipfw_insn))
- goto bad_size;
-check_action:
- if (have_action) {
- printf("ipfw: opcode %d, multiple actions"
- " not allowed\n",
- cmd->opcode);
- return EINVAL;
- }
- have_action = 1;
- if (l != cmdlen) {
- printf("ipfw: opcode %d, action must be"
- " last opcode\n",
- cmd->opcode);
- return EINVAL;
- }
- break;
-#ifdef INET6
- case O_IP6_SRC:
- case O_IP6_DST:
- if (cmdlen != F_INSN_SIZE(struct in6_addr) +
- F_INSN_SIZE(ipfw_insn))
- goto bad_size;
- break;
-
- case O_FLOW6ID:
- if (cmdlen != F_INSN_SIZE(ipfw_insn_u32) +
- ((ipfw_insn_u32 *)cmd)->o.arg1)
- goto bad_size;
- break;
-
- case O_IP6_SRC_MASK:
- case O_IP6_DST_MASK:
- if ( !(cmdlen & 1) || cmdlen > 127)
- goto bad_size;
- break;
- case O_ICMP6TYPE:
- if( cmdlen != F_INSN_SIZE( ipfw_insn_icmp6 ) )
- goto bad_size;
- break;
-#endif
-
- default:
- switch (cmd->opcode) {
-#ifndef INET6
- case O_IP6_SRC_ME:
- case O_IP6_DST_ME:
- case O_EXT_HDR:
- case O_IP6:
- case O_UNREACH6:
- case O_IP6_SRC:
- case O_IP6_DST:
- case O_FLOW6ID:
- case O_IP6_SRC_MASK:
- case O_IP6_DST_MASK:
- case O_ICMP6TYPE:
- printf("ipfw: no IPv6 support in kernel\n");
- return EPROTONOSUPPORT;
-#endif
- default:
- printf("ipfw: opcode %d, unknown opcode\n",
- cmd->opcode);
- return EINVAL;
- }
- }
- }
- if (have_action == 0) {
- printf("ipfw: missing action\n");
- return EINVAL;
- }
- return 0;
-
-bad_size:
- printf("ipfw: opcode %d size %d wrong\n",
- cmd->opcode, cmdlen);
- return EINVAL;
-}
-
-
-/*
- * Translation of requests for compatibility with FreeBSD 7.2/8.
- * a static variable tells us if we have an old client from userland,
- * and if necessary we translate requests and responses between the
- * two formats.
- */
-static int is7 = 0;
-
-struct ip_fw7 {
- struct ip_fw7 *next; /* linked list of rules */
- struct ip_fw7 *next_rule; /* ptr to next [skipto] rule */
- /* 'next_rule' is used to pass up 'set_disable' status */
-
- uint16_t act_ofs; /* offset of action in 32-bit units */
- uint16_t cmd_len; /* # of 32-bit words in cmd */
- uint16_t rulenum; /* rule number */
- uint8_t set; /* rule set (0..31) */
- // #define RESVD_SET 31 /* set for default and persistent rules */
- uint8_t _pad; /* padding */
- // uint32_t id; /* rule id, only in v.8 */
- /* These fields are present in all rules. */
- uint64_t pcnt; /* Packet counter */
- uint64_t bcnt; /* Byte counter */
- uint32_t timestamp; /* tv_sec of last match */
-
- ipfw_insn cmd[1]; /* storage for commands */
-};
-
- int convert_rule_to_7(struct ip_fw *rule);
-int convert_rule_to_8(struct ip_fw *rule);
-
-#ifndef RULESIZE7
-#define RULESIZE7(rule) (sizeof(struct ip_fw7) + \
- ((struct ip_fw7 *)(rule))->cmd_len * 4 - 4)
-#endif
-
-
-/*
- * Copy the static and dynamic rules to the supplied buffer
- * and return the amount of space actually used.
- * Must be run under IPFW_UH_RLOCK
- */
-static size_t
-ipfw_getrules(struct ip_fw_chain *chain, void *buf, size_t space)
-{
- char *bp = buf;
- char *ep = bp + space;
- struct ip_fw *rule, *dst;
- int l, i;
- time_t boot_seconds;
-
- boot_seconds = boottime.tv_sec;
- for (i = 0; i < chain->n_rules; i++) {
- rule = chain->map[i];
-
- if (is7) {
- /* Convert rule to FreeBSd 7.2 format */
- l = RULESIZE7(rule);
- if (bp + l + sizeof(uint32_t) <= ep) {
- int error;
- bcopy(rule, bp, l + sizeof(uint32_t));
- error = convert_rule_to_7((struct ip_fw *) bp);
- if (error)
- return 0; /*XXX correct? */
- /*
- * XXX HACK. Store the disable mask in the "next"
- * pointer in a wild attempt to keep the ABI the same.
- * Why do we do this on EVERY rule?
- */
- bcopy(&V_set_disable,
- &(((struct ip_fw7 *)bp)->next_rule),
- sizeof(V_set_disable));
- if (((struct ip_fw7 *)bp)->timestamp)
- ((struct ip_fw7 *)bp)->timestamp += boot_seconds;
- bp += l;
- }
- continue; /* go to next rule */
- }
-
- /* normal mode, don't touch rules */
- l = RULESIZE(rule);
- if (bp + l > ep) { /* should not happen */
- printf("overflow dumping static rules\n");
- break;
- }
- dst = (struct ip_fw *)bp;
- bcopy(rule, dst, l);
- /*
- * XXX HACK. Store the disable mask in the "next"
- * pointer in a wild attempt to keep the ABI the same.
- * Why do we do this on EVERY rule?
- */
- bcopy(&V_set_disable, &dst->next_rule, sizeof(V_set_disable));
- if (dst->timestamp)
- dst->timestamp += boot_seconds;
- bp += l;
- }
- ipfw_get_dynamic(&bp, ep); /* protected by the dynamic lock */
- return (bp - (char *)buf);
-}
-
-
-#define IP_FW3_OPLENGTH(x) ((x)->sopt_valsize - sizeof(ip_fw3_opheader))
-/**
- * {set|get}sockopt parser.
- */
-int
-ipfw_ctl(struct sockopt *sopt)
-{
-#define RULE_MAXSIZE (256*sizeof(u_int32_t))
- int error;
- size_t size, len, valsize;
- struct ip_fw *buf, *rule;
- struct ip_fw_chain *chain;
- u_int32_t rulenum[2];
- uint32_t opt;
- char xbuf[128];
- ip_fw3_opheader *op3 = NULL;
-
- error = priv_check(sopt->sopt_td, PRIV_NETINET_IPFW);
- if (error)
- return (error);
-
- /*
- * Disallow modifications in really-really secure mode, but still allow
- * the logging counters to be reset.
- */
- if (sopt->sopt_name == IP_FW_ADD ||
- (sopt->sopt_dir == SOPT_SET && sopt->sopt_name != IP_FW_RESETLOG)) {
- error = securelevel_ge(sopt->sopt_td->td_ucred, 3);
- if (error)
- return (error);
- }
-
- chain = &V_layer3_chain;
- error = 0;
-
- /* Save original valsize before it is altered via sooptcopyin() */
- valsize = sopt->sopt_valsize;
- if ((opt = sopt->sopt_name) == IP_FW3) {
- /*
- * Copy not less than sizeof(ip_fw3_opheader).
- * We hope any IP_FW3 command will fit into 128-byte buffer.
- */
- if ((error = sooptcopyin(sopt, xbuf, sizeof(xbuf),
- sizeof(ip_fw3_opheader))) != 0)
- return (error);
- op3 = (ip_fw3_opheader *)xbuf;
- opt = op3->opcode;
- }
-
- switch (opt) {
- case IP_FW_GET:
- /*
- * pass up a copy of the current rules. Static rules
- * come first (the last of which has number IPFW_DEFAULT_RULE),
- * followed by a possibly empty list of dynamic rule.
- * The last dynamic rule has NULL in the "next" field.
- *
- * Note that the calculated size is used to bound the
- * amount of data returned to the user. The rule set may
- * change between calculating the size and returning the
- * data in which case we'll just return what fits.
- */
- for (;;) {
- int len = 0, want;
-
- size = chain->static_len;
- size += ipfw_dyn_len();
- if (size >= sopt->sopt_valsize)
- break;
- buf = malloc(size, M_TEMP, M_WAITOK);
- IPFW_UH_RLOCK(chain);
- /* check again how much space we need */
- want = chain->static_len + ipfw_dyn_len();
- if (size >= want)
- len = ipfw_getrules(chain, buf, size);
- IPFW_UH_RUNLOCK(chain);
- if (size >= want)
- error = sooptcopyout(sopt, buf, len);
- free(buf, M_TEMP);
- if (size >= want)
- break;
- }
- break;
-
- case IP_FW_FLUSH:
- /* locking is done within del_entry() */
- error = del_entry(chain, 0); /* special case, rule=0, cmd=0 means all */
- break;
-
- case IP_FW_ADD:
- rule = malloc(RULE_MAXSIZE, M_TEMP, M_WAITOK);
- error = sooptcopyin(sopt, rule, RULE_MAXSIZE,
- sizeof(struct ip_fw7) );
-
- /*
- * If the size of commands equals RULESIZE7 then we assume
- * a FreeBSD7.2 binary is talking to us (set is7=1).
- * is7 is persistent so the next 'ipfw list' command
- * will use this format.
- * NOTE: If wrong version is guessed (this can happen if
- * the first ipfw command is 'ipfw [pipe] list')
- * the ipfw binary may crash or loop infinitly...
- */
- if (sopt->sopt_valsize == RULESIZE7(rule)) {
- is7 = 1;
- error = convert_rule_to_8(rule);
- if (error)
- return error;
- if (error == 0)
- error = check_ipfw_struct(rule, RULESIZE(rule));
- } else {
- is7 = 0;
- if (error == 0)
- error = check_ipfw_struct(rule, sopt->sopt_valsize);
- }
- if (error == 0) {
- /* locking is done within ipfw_add_rule() */
- error = ipfw_add_rule(chain, rule);
- size = RULESIZE(rule);
- if (!error && sopt->sopt_dir == SOPT_GET) {
- if (is7) {
- error = convert_rule_to_7(rule);
- size = RULESIZE7(rule);
- if (error)
- return error;
- }
- error = sooptcopyout(sopt, rule, size);
- }
- }
- free(rule, M_TEMP);
- break;
-
- case IP_FW_DEL:
- /*
- * IP_FW_DEL is used for deleting single rules or sets,
- * and (ab)used to atomically manipulate sets. Argument size
- * is used to distinguish between the two:
- * sizeof(u_int32_t)
- * delete single rule or set of rules,
- * or reassign rules (or sets) to a different set.
- * 2*sizeof(u_int32_t)
- * atomic disable/enable sets.
- * first u_int32_t contains sets to be disabled,
- * second u_int32_t contains sets to be enabled.
- */
- error = sooptcopyin(sopt, rulenum,
- 2*sizeof(u_int32_t), sizeof(u_int32_t));
- if (error)
- break;
- size = sopt->sopt_valsize;
- if (size == sizeof(u_int32_t) && rulenum[0] != 0) {
- /* delete or reassign, locking done in del_entry() */
- error = del_entry(chain, rulenum[0]);
- } else if (size == 2*sizeof(u_int32_t)) { /* set enable/disable */
- IPFW_UH_WLOCK(chain);
- V_set_disable =
- (V_set_disable | rulenum[0]) & ~rulenum[1] &
- ~(1<<RESVD_SET); /* set RESVD_SET always enabled */
- IPFW_UH_WUNLOCK(chain);
- } else
- error = EINVAL;
- break;
-
- case IP_FW_ZERO:
- case IP_FW_RESETLOG: /* argument is an u_int_32, the rule number */
- rulenum[0] = 0;
- if (sopt->sopt_val != 0) {
- error = sooptcopyin(sopt, rulenum,
- sizeof(u_int32_t), sizeof(u_int32_t));
- if (error)
- break;
- }
- error = zero_entry(chain, rulenum[0],
- sopt->sopt_name == IP_FW_RESETLOG);
- break;
-
- /*--- TABLE manipulations are protected by the IPFW_LOCK ---*/
- case IP_FW_TABLE_ADD:
- {
- ipfw_table_entry ent;
-
- error = sooptcopyin(sopt, &ent,
- sizeof(ent), sizeof(ent));
- if (error)
- break;
- error = ipfw_add_table_entry(chain, ent.tbl,
- &ent.addr, sizeof(ent.addr), ent.masklen,
- IPFW_TABLE_CIDR, ent.value);
- }
- break;
-
- case IP_FW_TABLE_DEL:
- {
- ipfw_table_entry ent;
-
- error = sooptcopyin(sopt, &ent,
- sizeof(ent), sizeof(ent));
- if (error)
- break;
- error = ipfw_del_table_entry(chain, ent.tbl,
- &ent.addr, sizeof(ent.addr), ent.masklen, IPFW_TABLE_CIDR);
- }
- break;
-
- case IP_FW_TABLE_XADD: /* IP_FW3 */
- case IP_FW_TABLE_XDEL: /* IP_FW3 */
- {
- ipfw_table_xentry *xent = (ipfw_table_xentry *)(op3 + 1);
-
- /* Check minimum header size */
- if (IP_FW3_OPLENGTH(sopt) < offsetof(ipfw_table_xentry, k)) {
- error = EINVAL;
- break;
- }
-
- /* Check if len field is valid */
- if (xent->len > sizeof(ipfw_table_xentry)) {
- error = EINVAL;
- break;
- }
-
- len = xent->len - offsetof(ipfw_table_xentry, k);
-
- error = (opt == IP_FW_TABLE_XADD) ?
- ipfw_add_table_entry(chain, xent->tbl, &xent->k,
- len, xent->masklen, xent->type, xent->value) :
- ipfw_del_table_entry(chain, xent->tbl, &xent->k,
- len, xent->masklen, xent->type);
- }
- break;
-
- case IP_FW_TABLE_FLUSH:
- {
- u_int16_t tbl;
-
- error = sooptcopyin(sopt, &tbl,
- sizeof(tbl), sizeof(tbl));
- if (error)
- break;
- error = ipfw_flush_table(chain, tbl);
- }
- break;
-
- case IP_FW_TABLE_GETSIZE:
- {
- u_int32_t tbl, cnt;
-
- if ((error = sooptcopyin(sopt, &tbl, sizeof(tbl),
- sizeof(tbl))))
- break;
- IPFW_RLOCK(chain);
- error = ipfw_count_table(chain, tbl, &cnt);
- IPFW_RUNLOCK(chain);
- if (error)
- break;
- error = sooptcopyout(sopt, &cnt, sizeof(cnt));
- }
- break;
-
- case IP_FW_TABLE_LIST:
- {
- ipfw_table *tbl;
-
- if (sopt->sopt_valsize < sizeof(*tbl)) {
- error = EINVAL;
- break;
- }
- size = sopt->sopt_valsize;
- tbl = malloc(size, M_TEMP, M_WAITOK);
- error = sooptcopyin(sopt, tbl, size, sizeof(*tbl));
- if (error) {
- free(tbl, M_TEMP);
- break;
- }
- tbl->size = (size - sizeof(*tbl)) /
- sizeof(ipfw_table_entry);
- IPFW_RLOCK(chain);
- error = ipfw_dump_table(chain, tbl);
- IPFW_RUNLOCK(chain);
- if (error) {
- free(tbl, M_TEMP);
- break;
- }
- error = sooptcopyout(sopt, tbl, size);
- free(tbl, M_TEMP);
- }
- break;
-
- case IP_FW_TABLE_XGETSIZE: /* IP_FW3 */
- {
- uint32_t *tbl;
-
- if (IP_FW3_OPLENGTH(sopt) < sizeof(uint32_t)) {
- error = EINVAL;
- break;
- }
-
- tbl = (uint32_t *)(op3 + 1);
-
- IPFW_RLOCK(chain);
- error = ipfw_count_xtable(chain, *tbl, tbl);
- IPFW_RUNLOCK(chain);
- if (error)
- break;
- error = sooptcopyout(sopt, op3, sopt->sopt_valsize);
- }
- break;
-
- case IP_FW_TABLE_XLIST: /* IP_FW3 */
- {
- ipfw_xtable *tbl;
-
- if ((size = valsize) < sizeof(ipfw_xtable)) {
- error = EINVAL;
- break;
- }
-
- tbl = malloc(size, M_TEMP, M_ZERO | M_WAITOK);
- memcpy(tbl, op3, sizeof(ipfw_xtable));
-
- /* Get maximum number of entries we can store */
- tbl->size = (size - sizeof(ipfw_xtable)) /
- sizeof(ipfw_table_xentry);
- IPFW_RLOCK(chain);
- error = ipfw_dump_xtable(chain, tbl);
- IPFW_RUNLOCK(chain);
- if (error) {
- free(tbl, M_TEMP);
- break;
- }
-
- /* Revert size field back to bytes */
- tbl->size = tbl->size * sizeof(ipfw_table_xentry) +
- sizeof(ipfw_table);
- /*
- * Since we call sooptcopyin() with small buffer, sopt_valsize is
- * decreased to reflect supplied buffer size. Set it back to original value
- */
- sopt->sopt_valsize = valsize;
- error = sooptcopyout(sopt, tbl, size);
- free(tbl, M_TEMP);
- }
- break;
-
- /*--- NAT operations are protected by the IPFW_LOCK ---*/
- case IP_FW_NAT_CFG:
- if (IPFW_NAT_LOADED)
- error = ipfw_nat_cfg_ptr(sopt);
- else {
- printf("IP_FW_NAT_CFG: %s\n",
- "ipfw_nat not present, please load it");
- error = EINVAL;
- }
- break;
-
- case IP_FW_NAT_DEL:
- if (IPFW_NAT_LOADED)
- error = ipfw_nat_del_ptr(sopt);
- else {
- printf("IP_FW_NAT_DEL: %s\n",
- "ipfw_nat not present, please load it");
- error = EINVAL;
- }
- break;
-
- case IP_FW_NAT_GET_CONFIG:
- if (IPFW_NAT_LOADED)
- error = ipfw_nat_get_cfg_ptr(sopt);
- else {
- printf("IP_FW_NAT_GET_CFG: %s\n",
- "ipfw_nat not present, please load it");
- error = EINVAL;
- }
- break;
-
- case IP_FW_NAT_GET_LOG:
- if (IPFW_NAT_LOADED)
- error = ipfw_nat_get_log_ptr(sopt);
- else {
- printf("IP_FW_NAT_GET_LOG: %s\n",
- "ipfw_nat not present, please load it");
- error = EINVAL;
- }
- break;
-
- default:
- printf("ipfw: ipfw_ctl invalid option %d\n", sopt->sopt_name);
- error = EINVAL;
- }
-
- return (error);
-#undef RULE_MAXSIZE
-}
-
-
-#define RULE_MAXSIZE (256*sizeof(u_int32_t))
-
-/* Functions to convert rules 7.2 <==> 8.0 */
-int
-convert_rule_to_7(struct ip_fw *rule)
-{
- /* Used to modify original rule */
- struct ip_fw7 *rule7 = (struct ip_fw7 *)rule;
- /* copy of original rule, version 8 */
- struct ip_fw *tmp;
-
- /* Used to copy commands */
- ipfw_insn *ccmd, *dst;
- int ll = 0, ccmdlen = 0;
-
- tmp = malloc(RULE_MAXSIZE, M_TEMP, M_NOWAIT | M_ZERO);
- if (tmp == NULL) {
- return 1; //XXX error
- }
- bcopy(rule, tmp, RULE_MAXSIZE);
-
- /* Copy fields */
- rule7->_pad = tmp->_pad;
- rule7->set = tmp->set;
- rule7->rulenum = tmp->rulenum;
- rule7->cmd_len = tmp->cmd_len;
- rule7->act_ofs = tmp->act_ofs;
- rule7->next_rule = (struct ip_fw7 *)tmp->next_rule;
- rule7->next = (struct ip_fw7 *)tmp->x_next;
- rule7->cmd_len = tmp->cmd_len;
- rule7->pcnt = tmp->pcnt;
- rule7->bcnt = tmp->bcnt;
- rule7->timestamp = tmp->timestamp;
-
- /* Copy commands */
- for (ll = tmp->cmd_len, ccmd = tmp->cmd, dst = rule7->cmd ;
- ll > 0 ; ll -= ccmdlen, ccmd += ccmdlen, dst += ccmdlen) {
- ccmdlen = F_LEN(ccmd);
-
- bcopy(ccmd, dst, F_LEN(ccmd)*sizeof(uint32_t));
-
- if (dst->opcode > O_NAT)
- /* O_REASS doesn't exists in 7.2 version, so
- * decrement opcode if it is after O_REASS
- */
- dst->opcode--;
-
- if (ccmdlen > ll) {
- printf("ipfw: opcode %d size truncated\n",
- ccmd->opcode);
- return EINVAL;
- }
- }
- free(tmp, M_TEMP);
-
- return 0;
-}
-
-int
-convert_rule_to_8(struct ip_fw *rule)
-{
- /* Used to modify original rule */
- struct ip_fw7 *rule7 = (struct ip_fw7 *) rule;
-
- /* Used to copy commands */
- ipfw_insn *ccmd, *dst;
- int ll = 0, ccmdlen = 0;
-
- /* Copy of original rule */
- struct ip_fw7 *tmp = malloc(RULE_MAXSIZE, M_TEMP, M_NOWAIT | M_ZERO);
- if (tmp == NULL) {
- return 1; //XXX error
- }
-
- bcopy(rule7, tmp, RULE_MAXSIZE);
-
- for (ll = tmp->cmd_len, ccmd = tmp->cmd, dst = rule->cmd ;
- ll > 0 ; ll -= ccmdlen, ccmd += ccmdlen, dst += ccmdlen) {
- ccmdlen = F_LEN(ccmd);
-
- bcopy(ccmd, dst, F_LEN(ccmd)*sizeof(uint32_t));
-
- if (dst->opcode > O_NAT)
- /* O_REASS doesn't exists in 7.2 version, so
- * increment opcode if it is after O_REASS
- */
- dst->opcode++;
-
- if (ccmdlen > ll) {
- printf("ipfw: opcode %d size truncated\n",
- ccmd->opcode);
- return EINVAL;
- }
- }
-
- rule->_pad = tmp->_pad;
- rule->set = tmp->set;
- rule->rulenum = tmp->rulenum;
- rule->cmd_len = tmp->cmd_len;
- rule->act_ofs = tmp->act_ofs;
- rule->next_rule = (struct ip_fw *)tmp->next_rule;
- rule->x_next = (struct ip_fw *)tmp->next;
- rule->cmd_len = tmp->cmd_len;
- rule->id = 0; /* XXX see if is ok = 0 */
- rule->pcnt = tmp->pcnt;
- rule->bcnt = tmp->bcnt;
- rule->timestamp = tmp->timestamp;
-
- free (tmp, M_TEMP);
- return 0;
-}
-
-/* end of file */
diff --git a/sys/netinet/ipfw/ip_fw_table.c b/sys/netinet/ipfw/ip_fw_table.c
deleted file mode 100644
index 68a6220..0000000
--- a/sys/netinet/ipfw/ip_fw_table.c
+++ /dev/null
@@ -1,761 +0,0 @@
-/*-
- * Copyright (c) 2004 Ruslan Ermilov and Vsevolod Lobko.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-/*
- * Lookup table support for ipfw
- *
- * Lookup tables are implemented (at the moment) using the radix
- * tree used for routing tables. Tables store key-value entries, where
- * keys are network prefixes (addr/masklen), and values are integers.
- * As a degenerate case we can interpret keys as 32-bit integers
- * (with a /32 mask).
- *
- * The table is protected by the IPFW lock even for manipulation coming
- * from userland, because operations are typically fast.
- */
-
-#include "opt_ipfw.h"
-#include "opt_inet.h"
-#ifndef INET
-#error IPFIREWALL requires INET.
-#endif /* INET */
-#include "opt_inet6.h"
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/malloc.h>
-#include <sys/kernel.h>
-#include <sys/lock.h>
-#include <sys/rwlock.h>
-#include <sys/socket.h>
-#include <net/if.h> /* ip_fw.h requires IFNAMSIZ */
-#include <net/radix.h>
-#include <net/route.h>
-#include <net/vnet.h>
-
-#include <netinet/in.h>
-#include <netinet/ip_var.h> /* struct ipfw_rule_ref */
-#include <netinet/ip_fw.h>
-#include <sys/queue.h> /* LIST_HEAD */
-#include <netinet/ipfw/ip_fw_private.h>
-
-#ifdef MAC
-#include <security/mac/mac_framework.h>
-#endif
-
-static MALLOC_DEFINE(M_IPFW_TBL, "ipfw_tbl", "IpFw tables");
-
-struct table_entry {
- struct radix_node rn[2];
- struct sockaddr_in addr, mask;
- u_int32_t value;
-};
-
-struct xaddr_iface {
- uint8_t if_len; /* length of this struct */
- uint8_t pad[7]; /* Align name */
- char ifname[IF_NAMESIZE]; /* Interface name */
-};
-
-struct table_xentry {
- struct radix_node rn[2];
- union {
-#ifdef INET6
- struct sockaddr_in6 addr6;
-#endif
- struct xaddr_iface iface;
- } a;
- union {
-#ifdef INET6
- struct sockaddr_in6 mask6;
-#endif
- struct xaddr_iface ifmask;
- } m;
- u_int32_t value;
-};
-
-/*
- * The radix code expects addr and mask to be array of bytes,
- * with the first byte being the length of the array. rn_inithead
- * is called with the offset in bits of the lookup key within the
- * array. If we use a sockaddr_in as the underlying type,
- * sin_len is conveniently located at offset 0, sin_addr is at
- * offset 4 and normally aligned.
- * But for portability, let's avoid assumption and make the code explicit
- */
-#define KEY_LEN(v) *((uint8_t *)&(v))
-#define KEY_OFS (8*offsetof(struct sockaddr_in, sin_addr))
-/*
- * Do not require radix to compare more than actual IPv4/IPv6 address
- */
-#define KEY_LEN_INET (offsetof(struct sockaddr_in, sin_addr) + sizeof(in_addr_t))
-#define KEY_LEN_INET6 (offsetof(struct sockaddr_in6, sin6_addr) + sizeof(struct in6_addr))
-#define KEY_LEN_IFACE (offsetof(struct xaddr_iface, ifname))
-
-#define OFF_LEN_INET (8 * offsetof(struct sockaddr_in, sin_addr))
-#define OFF_LEN_INET6 (8 * offsetof(struct sockaddr_in6, sin6_addr))
-#define OFF_LEN_IFACE (8 * offsetof(struct xaddr_iface, ifname))
-
-
-static inline void
-ipv6_writemask(struct in6_addr *addr6, uint8_t mask)
-{
- uint32_t *cp;
-
- for (cp = (uint32_t *)addr6; mask >= 32; mask -= 32)
- *cp++ = 0xFFFFFFFF;
- *cp = htonl(mask ? ~((1 << (32 - mask)) - 1) : 0);
-}
-
-int
-ipfw_add_table_entry(struct ip_fw_chain *ch, uint16_t tbl, void *paddr,
- uint8_t plen, uint8_t mlen, uint8_t type, uint32_t value)
-{
- struct radix_node_head *rnh, **rnh_ptr;
- struct table_entry *ent;
- struct table_xentry *xent;
- struct radix_node *rn;
- in_addr_t addr;
- int offset;
- void *ent_ptr;
- struct sockaddr *addr_ptr, *mask_ptr;
- char c;
-
- if (tbl >= V_fw_tables_max)
- return (EINVAL);
-
- switch (type) {
- case IPFW_TABLE_CIDR:
- if (plen == sizeof(in_addr_t)) {
-#ifdef INET
- /* IPv4 case */
- if (mlen > 32)
- return (EINVAL);
- ent = malloc(sizeof(*ent), M_IPFW_TBL, M_WAITOK | M_ZERO);
- ent->value = value;
- /* Set 'total' structure length */
- KEY_LEN(ent->addr) = KEY_LEN_INET;
- KEY_LEN(ent->mask) = KEY_LEN_INET;
- /* Set offset of IPv4 address in bits */
- offset = OFF_LEN_INET;
- ent->mask.sin_addr.s_addr = htonl(mlen ? ~((1 << (32 - mlen)) - 1) : 0);
- addr = *((in_addr_t *)paddr);
- ent->addr.sin_addr.s_addr = addr & ent->mask.sin_addr.s_addr;
- /* Set pointers */
- rnh_ptr = &ch->tables[tbl];
- ent_ptr = ent;
- addr_ptr = (struct sockaddr *)&ent->addr;
- mask_ptr = (struct sockaddr *)&ent->mask;
-#endif
-#ifdef INET6
- } else if (plen == sizeof(struct in6_addr)) {
- /* IPv6 case */
- if (mlen > 128)
- return (EINVAL);
- xent = malloc(sizeof(*xent), M_IPFW_TBL, M_WAITOK | M_ZERO);
- xent->value = value;
- /* Set 'total' structure length */
- KEY_LEN(xent->a.addr6) = KEY_LEN_INET6;
- KEY_LEN(xent->m.mask6) = KEY_LEN_INET6;
- /* Set offset of IPv6 address in bits */
- offset = OFF_LEN_INET6;
- ipv6_writemask(&xent->m.mask6.sin6_addr, mlen);
- memcpy(&xent->a.addr6.sin6_addr, paddr, sizeof(struct in6_addr));
- APPLY_MASK(&xent->a.addr6.sin6_addr, &xent->m.mask6.sin6_addr);
- /* Set pointers */
- rnh_ptr = &ch->xtables[tbl];
- ent_ptr = xent;
- addr_ptr = (struct sockaddr *)&xent->a.addr6;
- mask_ptr = (struct sockaddr *)&xent->m.mask6;
-#endif
- } else {
- /* Unknown CIDR type */
- return (EINVAL);
- }
- break;
-
- case IPFW_TABLE_INTERFACE:
- /* Check if string is terminated */
- c = ((char *)paddr)[IF_NAMESIZE - 1];
- ((char *)paddr)[IF_NAMESIZE - 1] = '\0';
- if (((mlen = strlen((char *)paddr)) == IF_NAMESIZE - 1) && (c != '\0'))
- return (EINVAL);
-
- /* Include last \0 into comparison */
- mlen++;
-
- xent = malloc(sizeof(*xent), M_IPFW_TBL, M_WAITOK | M_ZERO);
- xent->value = value;
- /* Set 'total' structure length */
- KEY_LEN(xent->a.iface) = KEY_LEN_IFACE + mlen;
- KEY_LEN(xent->m.ifmask) = KEY_LEN_IFACE + mlen;
- /* Set offset of interface name in bits */
- offset = OFF_LEN_IFACE;
- memcpy(xent->a.iface.ifname, paddr, mlen);
- /* Assume direct match */
- /* TODO: Add interface pattern matching */
-#if 0
- memset(xent->m.ifmask.ifname, 0xFF, IF_NAMESIZE);
- mask_ptr = (struct sockaddr *)&xent->m.ifmask;
-#endif
- /* Set pointers */
- rnh_ptr = &ch->xtables[tbl];
- ent_ptr = xent;
- addr_ptr = (struct sockaddr *)&xent->a.iface;
- mask_ptr = NULL;
- break;
-
- default:
- return (EINVAL);
- }
-
- IPFW_WLOCK(ch);
-
- /* Check if tabletype is valid */
- if ((ch->tabletype[tbl] != 0) && (ch->tabletype[tbl] != type)) {
- IPFW_WUNLOCK(ch);
- free(ent_ptr, M_IPFW_TBL);
- return (EINVAL);
- }
-
- /* Check if radix tree exists */
- if ((rnh = *rnh_ptr) == NULL) {
- IPFW_WUNLOCK(ch);
- /* Create radix for a new table */
- if (!rn_inithead((void **)&rnh, offset)) {
- free(ent_ptr, M_IPFW_TBL);
- return (ENOMEM);
- }
-
- IPFW_WLOCK(ch);
- if (*rnh_ptr != NULL) {
- /* Tree is already attached by other thread */
- rn_detachhead((void **)&rnh);
- rnh = *rnh_ptr;
- /* Check table type another time */
- if (ch->tabletype[tbl] != type) {
- IPFW_WUNLOCK(ch);
- free(ent_ptr, M_IPFW_TBL);
- return (EINVAL);
- }
- } else {
- *rnh_ptr = rnh;
- /*
- * Set table type. It can be set already
- * (if we have IPv6-only table) but setting
- * it another time does not hurt
- */
- ch->tabletype[tbl] = type;
- }
- }
-
- rn = rnh->rnh_addaddr(addr_ptr, mask_ptr, rnh, ent_ptr);
- IPFW_WUNLOCK(ch);
-
- if (rn == NULL) {
- free(ent_ptr, M_IPFW_TBL);
- return (EEXIST);
- }
- return (0);
-}
-
-int
-ipfw_del_table_entry(struct ip_fw_chain *ch, uint16_t tbl, void *paddr,
- uint8_t plen, uint8_t mlen, uint8_t type)
-{
- struct radix_node_head *rnh, **rnh_ptr;
- struct table_entry *ent;
- in_addr_t addr;
- struct sockaddr_in sa, mask;
- struct sockaddr *sa_ptr, *mask_ptr;
- char c;
-
- if (tbl >= V_fw_tables_max)
- return (EINVAL);
-
- switch (type) {
- case IPFW_TABLE_CIDR:
- if (plen == sizeof(in_addr_t)) {
- /* Set 'total' structure length */
- KEY_LEN(sa) = KEY_LEN_INET;
- KEY_LEN(mask) = KEY_LEN_INET;
- mask.sin_addr.s_addr = htonl(mlen ? ~((1 << (32 - mlen)) - 1) : 0);
- addr = *((in_addr_t *)paddr);
- sa.sin_addr.s_addr = addr & mask.sin_addr.s_addr;
- rnh_ptr = &ch->tables[tbl];
- sa_ptr = (struct sockaddr *)&sa;
- mask_ptr = (struct sockaddr *)&mask;
-#ifdef INET6
- } else if (plen == sizeof(struct in6_addr)) {
- /* IPv6 case */
- if (mlen > 128)
- return (EINVAL);
- struct sockaddr_in6 sa6, mask6;
- memset(&sa6, 0, sizeof(struct sockaddr_in6));
- memset(&mask6, 0, sizeof(struct sockaddr_in6));
- /* Set 'total' structure length */
- KEY_LEN(sa6) = KEY_LEN_INET6;
- KEY_LEN(mask6) = KEY_LEN_INET6;
- ipv6_writemask(&mask6.sin6_addr, mlen);
- memcpy(&sa6.sin6_addr, paddr, sizeof(struct in6_addr));
- APPLY_MASK(&sa6.sin6_addr, &mask6.sin6_addr);
- rnh_ptr = &ch->xtables[tbl];
- sa_ptr = (struct sockaddr *)&sa6;
- mask_ptr = (struct sockaddr *)&mask6;
-#endif
- } else {
- /* Unknown CIDR type */
- return (EINVAL);
- }
- break;
-
- case IPFW_TABLE_INTERFACE:
- /* Check if string is terminated */
- c = ((char *)paddr)[IF_NAMESIZE - 1];
- ((char *)paddr)[IF_NAMESIZE - 1] = '\0';
- if (((mlen = strlen((char *)paddr)) == IF_NAMESIZE - 1) && (c != '\0'))
- return (EINVAL);
-
- struct xaddr_iface ifname, ifmask;
- memset(&ifname, 0, sizeof(ifname));
-
- /* Include last \0 into comparison */
- mlen++;
-
- /* Set 'total' structure length */
- KEY_LEN(ifname) = KEY_LEN_IFACE + mlen;
- KEY_LEN(ifmask) = KEY_LEN_IFACE + mlen;
- /* Assume direct match */
- /* FIXME: Add interface pattern matching */
-#if 0
- memset(ifmask.ifname, 0xFF, IF_NAMESIZE);
- mask_ptr = (struct sockaddr *)&ifmask;
-#endif
- mask_ptr = NULL;
- memcpy(ifname.ifname, paddr, mlen);
- /* Set pointers */
- rnh_ptr = &ch->xtables[tbl];
- sa_ptr = (struct sockaddr *)&ifname;
-
- break;
-
- default:
- return (EINVAL);
- }
-
- IPFW_WLOCK(ch);
- if ((rnh = *rnh_ptr) == NULL) {
- IPFW_WUNLOCK(ch);
- return (ESRCH);
- }
-
- if (ch->tabletype[tbl] != type) {
- IPFW_WUNLOCK(ch);
- return (EINVAL);
- }
-
- ent = (struct table_entry *)rnh->rnh_deladdr(sa_ptr, mask_ptr, rnh);
- IPFW_WUNLOCK(ch);
-
- if (ent == NULL)
- return (ESRCH);
-
- free(ent, M_IPFW_TBL);
- return (0);
-}
-
-static int
-flush_table_entry(struct radix_node *rn, void *arg)
-{
- struct radix_node_head * const rnh = arg;
- struct table_entry *ent;
-
- ent = (struct table_entry *)
- rnh->rnh_deladdr(rn->rn_key, rn->rn_mask, rnh);
- if (ent != NULL)
- free(ent, M_IPFW_TBL);
- return (0);
-}
-
-int
-ipfw_flush_table(struct ip_fw_chain *ch, uint16_t tbl)
-{
- struct radix_node_head *rnh, *xrnh;
-
- if (tbl >= V_fw_tables_max)
- return (EINVAL);
-
- /*
- * We free both (IPv4 and extended) radix trees and
- * clear table type here to permit table to be reused
- * for different type without module reload
- */
-
- IPFW_WLOCK(ch);
- /* Set IPv4 table pointer to zero */
- if ((rnh = ch->tables[tbl]) != NULL)
- ch->tables[tbl] = NULL;
- /* Set extended table pointer to zero */
- if ((xrnh = ch->xtables[tbl]) != NULL)
- ch->xtables[tbl] = NULL;
- /* Zero table type */
- ch->tabletype[tbl] = 0;
- IPFW_WUNLOCK(ch);
-
- if (rnh != NULL) {
- rnh->rnh_walktree(rnh, flush_table_entry, rnh);
- rn_detachhead((void **)&rnh);
- }
-
- if (xrnh != NULL) {
- xrnh->rnh_walktree(xrnh, flush_table_entry, xrnh);
- rn_detachhead((void **)&xrnh);
- }
-
- return (0);
-}
-
-void
-ipfw_destroy_tables(struct ip_fw_chain *ch)
-{
- uint16_t tbl;
-
- /* Flush all tables */
- for (tbl = 0; tbl < V_fw_tables_max; tbl++)
- ipfw_flush_table(ch, tbl);
-
- /* Free pointers itself */
- free(ch->tables, M_IPFW);
- free(ch->xtables, M_IPFW);
- free(ch->tabletype, M_IPFW);
-}
-
-int
-ipfw_init_tables(struct ip_fw_chain *ch)
-{
- /* Allocate pointers */
- ch->tables = malloc(V_fw_tables_max * sizeof(void *), M_IPFW, M_WAITOK | M_ZERO);
- ch->xtables = malloc(V_fw_tables_max * sizeof(void *), M_IPFW, M_WAITOK | M_ZERO);
- ch->tabletype = malloc(V_fw_tables_max * sizeof(uint8_t), M_IPFW, M_WAITOK | M_ZERO);
- return (0);
-}
-
-int
-ipfw_resize_tables(struct ip_fw_chain *ch, unsigned int ntables)
-{
- struct radix_node_head **tables, **xtables, *rnh;
- struct radix_node_head **tables_old, **xtables_old;
- uint8_t *tabletype, *tabletype_old;
- unsigned int ntables_old, tbl;
-
- /* Check new value for validity */
- if (ntables > IPFW_TABLES_MAX)
- ntables = IPFW_TABLES_MAX;
-
- /* Allocate new pointers */
- tables = malloc(ntables * sizeof(void *), M_IPFW, M_WAITOK | M_ZERO);
- xtables = malloc(ntables * sizeof(void *), M_IPFW, M_WAITOK | M_ZERO);
- tabletype = malloc(ntables * sizeof(uint8_t), M_IPFW, M_WAITOK | M_ZERO);
-
- IPFW_WLOCK(ch);
-
- tbl = (ntables >= V_fw_tables_max) ? V_fw_tables_max : ntables;
-
- /* Copy old table pointers */
- memcpy(tables, ch->tables, sizeof(void *) * tbl);
- memcpy(xtables, ch->xtables, sizeof(void *) * tbl);
- memcpy(tabletype, ch->tabletype, sizeof(uint8_t) * tbl);
-
- /* Change pointers and number of tables */
- tables_old = ch->tables;
- xtables_old = ch->xtables;
- tabletype_old = ch->tabletype;
- ch->tables = tables;
- ch->xtables = xtables;
- ch->tabletype = tabletype;
-
- ntables_old = V_fw_tables_max;
- V_fw_tables_max = ntables;
-
- IPFW_WUNLOCK(ch);
-
- /* Check if we need to destroy radix trees */
- if (ntables < ntables_old) {
- for (tbl = ntables; tbl < ntables_old; tbl++) {
- if ((rnh = tables_old[tbl]) != NULL) {
- rnh->rnh_walktree(rnh, flush_table_entry, rnh);
- rn_detachhead((void **)&rnh);
- }
-
- if ((rnh = xtables_old[tbl]) != NULL) {
- rnh->rnh_walktree(rnh, flush_table_entry, rnh);
- rn_detachhead((void **)&rnh);
- }
- }
- }
-
- /* Free old pointers */
- free(tables_old, M_IPFW);
- free(xtables_old, M_IPFW);
- free(tabletype_old, M_IPFW);
-
- return (0);
-}
-
-int
-ipfw_lookup_table(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr,
- uint32_t *val)
-{
- struct radix_node_head *rnh;
- struct table_entry *ent;
- struct sockaddr_in sa;
-
- if (tbl >= V_fw_tables_max)
- return (0);
- if ((rnh = ch->tables[tbl]) == NULL)
- return (0);
- KEY_LEN(sa) = KEY_LEN_INET;
- sa.sin_addr.s_addr = addr;
- ent = (struct table_entry *)(rnh->rnh_lookup(&sa, NULL, rnh));
- if (ent != NULL) {
- *val = ent->value;
- return (1);
- }
- return (0);
-}
-
-int
-ipfw_lookup_table_extended(struct ip_fw_chain *ch, uint16_t tbl, void *paddr,
- uint32_t *val, int type)
-{
- struct radix_node_head *rnh;
- struct table_xentry *xent;
- struct sockaddr_in6 sa6;
- struct xaddr_iface iface;
-
- if (tbl >= V_fw_tables_max)
- return (0);
- if ((rnh = ch->xtables[tbl]) == NULL)
- return (0);
-
- switch (type) {
- case IPFW_TABLE_CIDR:
- KEY_LEN(sa6) = KEY_LEN_INET6;
- memcpy(&sa6.sin6_addr, paddr, sizeof(struct in6_addr));
- xent = (struct table_xentry *)(rnh->rnh_lookup(&sa6, NULL, rnh));
- break;
-
- case IPFW_TABLE_INTERFACE:
- KEY_LEN(iface) = KEY_LEN_IFACE +
- strlcpy(iface.ifname, (char *)paddr, IF_NAMESIZE) + 1;
- /* Assume direct match */
- /* FIXME: Add interface pattern matching */
- xent = (struct table_xentry *)(rnh->rnh_lookup(&iface, NULL, rnh));
- break;
-
- default:
- return (0);
- }
-
- if (xent != NULL) {
- *val = xent->value;
- return (1);
- }
- return (0);
-}
-
-static int
-count_table_entry(struct radix_node *rn, void *arg)
-{
- u_int32_t * const cnt = arg;
-
- (*cnt)++;
- return (0);
-}
-
-int
-ipfw_count_table(struct ip_fw_chain *ch, uint32_t tbl, uint32_t *cnt)
-{
- struct radix_node_head *rnh;
-
- if (tbl >= V_fw_tables_max)
- return (EINVAL);
- *cnt = 0;
- if ((rnh = ch->tables[tbl]) == NULL)
- return (0);
- rnh->rnh_walktree(rnh, count_table_entry, cnt);
- return (0);
-}
-
-static int
-dump_table_entry(struct radix_node *rn, void *arg)
-{
- struct table_entry * const n = (struct table_entry *)rn;
- ipfw_table * const tbl = arg;
- ipfw_table_entry *ent;
-
- if (tbl->cnt == tbl->size)
- return (1);
- ent = &tbl->ent[tbl->cnt];
- ent->tbl = tbl->tbl;
- if (in_nullhost(n->mask.sin_addr))
- ent->masklen = 0;
- else
- ent->masklen = 33 - ffs(ntohl(n->mask.sin_addr.s_addr));
- ent->addr = n->addr.sin_addr.s_addr;
- ent->value = n->value;
- tbl->cnt++;
- return (0);
-}
-
-int
-ipfw_dump_table(struct ip_fw_chain *ch, ipfw_table *tbl)
-{
- struct radix_node_head *rnh;
-
- if (tbl->tbl >= V_fw_tables_max)
- return (EINVAL);
- tbl->cnt = 0;
- if ((rnh = ch->tables[tbl->tbl]) == NULL)
- return (0);
- rnh->rnh_walktree(rnh, dump_table_entry, tbl);
- return (0);
-}
-
-static int
-count_table_xentry(struct radix_node *rn, void *arg)
-{
- uint32_t * const cnt = arg;
-
- (*cnt) += sizeof(ipfw_table_xentry);
- return (0);
-}
-
-int
-ipfw_count_xtable(struct ip_fw_chain *ch, uint32_t tbl, uint32_t *cnt)
-{
- struct radix_node_head *rnh;
-
- if (tbl >= V_fw_tables_max)
- return (EINVAL);
- *cnt = 0;
- if ((rnh = ch->tables[tbl]) != NULL)
- rnh->rnh_walktree(rnh, count_table_xentry, cnt);
- if ((rnh = ch->xtables[tbl]) != NULL)
- rnh->rnh_walktree(rnh, count_table_xentry, cnt);
- /* Return zero if table is empty */
- if (*cnt > 0)
- (*cnt) += sizeof(ipfw_xtable);
- return (0);
-}
-
-
-static int
-dump_table_xentry_base(struct radix_node *rn, void *arg)
-{
- struct table_entry * const n = (struct table_entry *)rn;
- ipfw_xtable * const tbl = arg;
- ipfw_table_xentry *xent;
-
- /* Out of memory, returning */
- if (tbl->cnt == tbl->size)
- return (1);
- xent = &tbl->xent[tbl->cnt];
- xent->len = sizeof(ipfw_table_xentry);
- xent->tbl = tbl->tbl;
- if (in_nullhost(n->mask.sin_addr))
- xent->masklen = 0;
- else
- xent->masklen = 33 - ffs(ntohl(n->mask.sin_addr.s_addr));
- /* Save IPv4 address as deprecated IPv6 compatible */
- xent->k.addr6.s6_addr32[3] = n->addr.sin_addr.s_addr;
- xent->value = n->value;
- tbl->cnt++;
- return (0);
-}
-
-static int
-dump_table_xentry_extended(struct radix_node *rn, void *arg)
-{
- struct table_xentry * const n = (struct table_xentry *)rn;
- ipfw_xtable * const tbl = arg;
- ipfw_table_xentry *xent;
-#ifdef INET6
- int i;
- uint32_t *v;
-#endif
- /* Out of memory, returning */
- if (tbl->cnt == tbl->size)
- return (1);
- xent = &tbl->xent[tbl->cnt];
- xent->len = sizeof(ipfw_table_xentry);
- xent->tbl = tbl->tbl;
-
- switch (tbl->type) {
-#ifdef INET6
- case IPFW_TABLE_CIDR:
- /* Count IPv6 mask */
- v = (uint32_t *)&n->m.mask6.sin6_addr;
- for (i = 0; i < sizeof(struct in6_addr) / 4; i++, v++)
- xent->masklen += bitcount32(*v);
- memcpy(&xent->k, &n->a.addr6.sin6_addr, sizeof(struct in6_addr));
- break;
-#endif
- case IPFW_TABLE_INTERFACE:
- /* Assume exact mask */
- xent->masklen = 8 * IF_NAMESIZE;
- memcpy(&xent->k, &n->a.iface.ifname, IF_NAMESIZE);
- break;
-
- default:
- /* unknown, skip entry */
- return (0);
- }
-
- xent->value = n->value;
- tbl->cnt++;
- return (0);
-}
-
-int
-ipfw_dump_xtable(struct ip_fw_chain *ch, ipfw_xtable *tbl)
-{
- struct radix_node_head *rnh;
-
- if (tbl->tbl >= V_fw_tables_max)
- return (EINVAL);
- tbl->cnt = 0;
- tbl->type = ch->tabletype[tbl->tbl];
- if ((rnh = ch->tables[tbl->tbl]) != NULL)
- rnh->rnh_walktree(rnh, dump_table_xentry_base, tbl);
- if ((rnh = ch->xtables[tbl->tbl]) != NULL)
- rnh->rnh_walktree(rnh, dump_table_xentry_extended, tbl);
- return (0);
-}
-
-/* end of file */
diff --git a/sys/netinet/ipfw/test/Makefile b/sys/netinet/ipfw/test/Makefile
deleted file mode 100644
index c556a4b..0000000
--- a/sys/netinet/ipfw/test/Makefile
+++ /dev/null
@@ -1,51 +0,0 @@
-#
-# $FreeBSD$
-#
-# Makefile for building userland tests
-# this is written in a form compatible with gmake
-
-SCHED_SRCS = test_dn_sched.c
-SCHED_SRCS += dn_sched_fifo.c
-SCHED_SRCS += dn_sched_prio.c
-SCHED_SRCS += dn_sched_qfq.c
-SCHED_SRCS += dn_sched_rr.c
-SCHED_SRCS += dn_sched_wf2q.c
-SCHED_SRCS += dn_heap.c
-SCHED_SRCS += main.c
-
-SCHED_OBJS=$(SCHED_SRCS:.c=.o)
-
-HEAP_SRCS = dn_heap.c test_dn_heap.c
-HEAP_OBJS=$(HEAP_SRCS:.c=.o)
-
-VPATH= .:..
-
-CFLAGS = -I.. -I. -Wall -Werror -O3 -DIPFW
-TARGETS= test_sched # no test_heap by default
-
-all: $(TARGETS)
-
-test_heap : $(HEAP_OBJS)
- $(CC) -o $@ $(HEAP_OBJS)
-
-test_sched : $(SCHED_OBJS)
- $(CC) -o $@ $(SCHED_OBJS)
-
-$(SCHED_OBJS): dn_test.h
-main.o: mylist.h
-
-clean:
- - rm *.o $(TARGETS) *.core
-
-ALLSRCS = $(SCHED_SRCS) dn_test.h mylist.h \
- dn_sched.h dn_heap.h ip_dn_private.h Makefile
-TMPBASE = /tmp/testXYZ
-TMPDIR = $(TMPBASE)/test
-
-tgz:
- -rm -rf $(TMPDIR)
- mkdir -p $(TMPDIR)
- -cp -p $(ALLSRCS) $(TMPDIR)
- -(cd ..; cp -p $(ALLSRCS) $(TMPDIR))
- ls -la $(TMPDIR)
- (cd $(TMPBASE); tar cvzf /tmp/test.tgz test)
diff --git a/sys/netinet/ipfw/test/dn_test.h b/sys/netinet/ipfw/test/dn_test.h
deleted file mode 100644
index 4e079bc..0000000
--- a/sys/netinet/ipfw/test/dn_test.h
+++ /dev/null
@@ -1,175 +0,0 @@
-/*
- * $FreeBSD$
- *
- * userspace compatibility code for dummynet schedulers
- */
-
-#ifndef _DN_TEST_H
-#define _DN_TEST_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include <inttypes.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <strings.h> /* bzero, ffs, ... */
-#include <string.h> /* strcmp */
-#include <errno.h>
-#include <sys/queue.h>
-#include <sys/time.h>
-
-extern int debug;
-#define ND(fmt, args...) do {} while (0)
-#define D1(fmt, args...) do {} while (0)
-#define D(fmt, args...) fprintf(stderr, "%-8s " fmt "\n", \
- __FUNCTION__, ## args)
-#define DX(lev, fmt, args...) do { \
- if (debug > lev) D(fmt, ## args); } while (0)
-
-
-#ifndef offsetof
-#define offsetof(t,m) (int)((&((t *)0L)->m))
-#endif
-
-#include <mylist.h>
-
-/* prevent include of other system headers */
-#define _NETINET_IP_VAR_H_ /* ip_fw_args */
-#define _IPFW2_H
-#define _SYS_MBUF_H_
-
-enum {
- DN_QUEUE,
-};
-
-enum {
- DN_SCHED_FIFO,
- DN_SCHED_WF2QP,
-};
-
-struct dn_id {
- int type, subtype, len, id;
-};
-
-struct dn_fs {
- int par[4]; /* flowset parameters */
-
- /* simulation entries.
- * 'index' is not strictly necessary
- * y is used for the inverse mapping ,
- */
- int index;
- int y; /* inverse mapping */
- int base_y; /* inverse mapping */
- int next_y; /* inverse mapping */
- int n_flows;
- int first_flow;
- int next_flow; /* first_flow + n_flows */
- /*
- * when generating, let 'cur' go from 0 to n_flows-1,
- * then point to flow first_flow + cur
- */
- int cur;
-};
-
-struct dn_sch {
-};
-
-struct dn_flow {
- struct dn_id oid;
- int length;
- int len_bytes;
- int drops;
- uint64_t tot_bytes;
- uint32_t flow_id;
- struct list_head h; /* used by the generator */
-};
-
-struct dn_link {
-};
-
-struct ip_fw_args {
-};
-
-struct mbuf {
- struct {
- int len;
- } m_pkthdr;
- struct mbuf *m_nextpkt;
- int flow_id; /* for testing, index of a flow */
- //int flowset_id; /* for testing, index of a flowset */
- void *cfg; /* config args */
-};
-
-#define MALLOC_DECLARE(x)
-#define KASSERT(x, y) do { if (!(x)) printf y ; exit(0); } while (0)
-struct ipfw_flow_id {
-};
-
-typedef void * module_t;
-
-struct _md_t {
- const char *name;
- int (*f)(module_t, int, void *);
- void *p;
-};
-
-typedef struct _md_t moduledata_t;
-
-#define DECLARE_MODULE(name, b, c, d) \
- moduledata_t *_g_##name = & b
-#define MODULE_DEPEND(a, b, c, d, e)
-
-#ifdef IPFW
-#include <dn_heap.h>
-#include <ip_dn_private.h>
-#include <dn_sched.h>
-#else
-struct dn_queue {
- struct dn_fsk *fs; /* parent flowset. */
- struct dn_sch_inst *_si; /* parent sched instance. */
-};
-struct dn_schk {
-};
-struct dn_fsk {
- struct dn_fs fs;
- struct dn_schk *sched;
-};
-struct dn_sch_inst {
- struct dn_schk *sched;
-};
-struct dn_alg {
- int type;
- const char *name;
- void *enqueue, *dequeue;
- int q_datalen, si_datalen, schk_datalen;
- int (*config)(struct dn_schk *);
- int (*new_sched)(struct dn_sch_inst *);
- int (*new_fsk)(struct dn_fsk *);
- int (*new_queue)(struct dn_queue *q);
-};
-
-#endif
-
-#ifndef __FreeBSD__
-int fls(int);
-#endif
-
-static inline void
-mq_append(struct mq *q, struct mbuf *m)
-{
- if (q->head == NULL)
- q->head = m;
- else
- q->tail->m_nextpkt = m;
- q->tail = m;
- m->m_nextpkt = NULL;
-}
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _DN_TEST_H */
diff --git a/sys/netinet/ipfw/test/main.c b/sys/netinet/ipfw/test/main.c
deleted file mode 100644
index be9fdf5..0000000
--- a/sys/netinet/ipfw/test/main.c
+++ /dev/null
@@ -1,636 +0,0 @@
-/*
- * $FreeBSD$
- *
- * Testing program for schedulers
- *
- * The framework include a simple controller which, at each
- * iteration, decides whether we can enqueue and/or dequeue.
- * Then the mainloop runs the required number of tests,
- * keeping track of statistics.
- */
-
-#include "dn_test.h"
-
-struct q_list {
- struct list_head h;
-};
-
-struct cfg_s {
- int ac;
- char * const *av;
-
- const char *name;
- int loops;
- struct timeval time;
-
- /* running counters */
- uint32_t _enqueue;
- uint32_t drop;
- uint32_t pending;
- uint32_t dequeue;
-
- /* generator parameters */
- int th_min, th_max;
- int maxburst;
- int lmin, lmax; /* packet len */
- int flows; /* number of flows */
- int flowsets; /* number of flowsets */
- int wsum; /* sum of weights of all flows */
- int max_y; /* max random number in the generation */
- int cur_y, cur_fs; /* used in generation, between 0 and max_y - 1 */
- const char *fs_config; /* flowset config */
- int can_dequeue;
- int burst; /* count of packets sent in a burst */
- struct mbuf *tosend; /* packet to send -- also flag to enqueue */
-
- struct mbuf *freelist;
-
- struct mbuf *head, *tail; /* a simple tailq */
-
- /* scheduler hooks */
- int (*enq)(struct dn_sch_inst *, struct dn_queue *,
- struct mbuf *);
- struct mbuf * (*deq)(struct dn_sch_inst *);
- /* size of the three fields including sched-specific areas */
- int schk_len;
- int q_len; /* size of a queue including sched-fields */
- int si_len; /* size of a sch_inst including sched-fields */
- char *q; /* array of flow queues */
- /* use a char* because size is variable */
- struct dn_fsk *fs; /* array of flowsets */
- struct dn_sch_inst *si;
- struct dn_schk *sched;
-
- /* generator state */
- int state; /* 0 = going up, 1: going down */
-
- /*
- * We keep lists for each backlog level, and always serve
- * the one with shortest backlog. llmask contains a bitmap
- * of lists, and ll are the heads of the lists. The last
- * entry (BACKLOG) contains all entries considered 'full'
- * XXX to optimize things, entry i could contain queues with
- * 2^{i-1}+1 .. 2^i entries.
- */
-#define BACKLOG 30
- uint32_t llmask;
- struct list_head ll[BACKLOG + 10];
-};
-
-/* FI2Q and Q2FI converts from flow_id to dn_queue and back.
- * We cannot easily use pointer arithmetic because it is variable size.
- */
-#define FI2Q(c, i) ((struct dn_queue *)((c)->q + (c)->q_len * (i)))
-#define Q2FI(c, q) (((char *)(q) - (c)->q)/(c)->q_len)
-
-int debug = 0;
-
-struct dn_parms dn_cfg;
-
-static void controller(struct cfg_s *c);
-
-/* release a packet: put the mbuf in the freelist, and the queue in
- * the bucket.
- */
-int
-drop(struct cfg_s *c, struct mbuf *m)
-{
- struct dn_queue *q;
- int i;
-
- c->drop++;
- q = FI2Q(c, m->flow_id);
- i = q->ni.length; // XXX or ffs...
-
- ND("q %p id %d current length %d", q, m->flow_id, i);
- if (i < BACKLOG) {
- struct list_head *h = &q->ni.h;
- c->llmask &= ~(1<<(i+1));
- c->llmask |= (1<<(i));
- list_del(h);
- list_add_tail(h, &c->ll[i]);
- }
- m->m_nextpkt = c->freelist;
- c->freelist = m;
- return 0;
-}
-
-/* dequeue returns NON-NULL when a packet is dropped */
-static int
-enqueue(struct cfg_s *c, void *_m)
-{
- struct mbuf *m = _m;
- if (c->enq)
- return c->enq(c->si, FI2Q(c, m->flow_id), m);
- if (c->head == NULL)
- c->head = m;
- else
- c->tail->m_nextpkt = m;
- c->tail = m;
- return 0; /* default - success */
-}
-
-/* dequeue returns NON-NULL when a packet is available */
-static void *
-dequeue(struct cfg_s *c)
-{
- struct mbuf *m;
- if (c->deq)
- return c->deq(c->si);
- if ((m = c->head)) {
- m = c->head;
- c->head = m->m_nextpkt;
- m->m_nextpkt = NULL;
- }
- return m;
-}
-
-static int
-mainloop(struct cfg_s *c)
-{
- int i;
- struct mbuf *m;
-
- for (i=0; i < c->loops; i++) {
- /* implement histeresis */
- controller(c);
- DX(3, "loop %d enq %d send %p rx %d",
- i, c->_enqueue, c->tosend, c->can_dequeue);
- if ( (m = c->tosend) ) {
- c->_enqueue++;
- if (enqueue(c, m)) {
- drop(c, m);
- ND("loop %d enqueue fail", i );
- } else {
- ND("enqueue ok");
- c->pending++;
- }
- }
- if (c->can_dequeue) {
- c->dequeue++;
- if ((m = dequeue(c))) {
- c->pending--;
- drop(c, m);
- c->drop--; /* compensate */
- }
- }
- }
- DX(1, "mainloop ends %d", i);
- return 0;
-}
-
-int
-dump(struct cfg_s *c)
-{
- int i;
- struct dn_queue *q;
-
- for (i=0; i < c->flows; i++) {
- q = FI2Q(c, i);
- DX(1, "queue %4d tot %10lld", i, q->ni.tot_bytes);
- }
- DX(1, "done %d loops\n", c->loops);
- return 0;
-}
-
-/* interpret a number in human form */
-static long
-getnum(const char *s, char **next, const char *key)
-{
- char *end = NULL;
- long l;
-
- if (next) /* default */
- *next = NULL;
- if (s && *s) {
- DX(3, "token is <%s> %s", s, key ? key : "-");
- l = strtol(s, &end, 0);
- } else {
- DX(3, "empty string");
- l = -1;
- }
- if (l < 0) {
- DX(2, "invalid %s for %s", s ? s : "NULL", (key ? key : "") );
- return 0; // invalid
- }
- if (!end || !*end)
- return l;
- if (*end == 'n')
- l = -l; /* multiply by n */
- else if (*end == 'K')
- l = l*1000;
- else if (*end == 'M')
- l = l*1000000;
- else if (*end == 'k')
- l = l*1024;
- else if (*end == 'm')
- l = l*1024*1024;
- else if (*end == 'w')
- ;
- else {/* not recognized */
- D("suffix %s for %s, next %p", end, key, next);
- end--;
- }
- end++;
- DX(3, "suffix now %s for %s, next %p", end, key, next);
- if (next && *end) {
- DX(3, "setting next to %s for %s", end, key);
- *next = end;
- }
- return l;
-}
-
-/*
- * flowsets are a comma-separated list of
- * weight:maxlen:flows
- * indicating how many flows are hooked to that fs.
- * Both weight and range can be min-max-steps.
- * In a first pass we just count the number of flowsets and flows,
- * in a second pass we complete the setup.
- */
-static void
-parse_flowsets(struct cfg_s *c, const char *fs, int pass)
-{
- char *s, *cur, *next;
- int n_flows = 0, n_fs = 0, wsum = 0;
- int i, j;
- struct dn_fs *prev = NULL;
-
- DX(3, "--- pass %d flows %d flowsets %d", pass, c->flows, c->flowsets);
- if (pass == 0)
- c->fs_config = fs;
- s = c->fs_config ? strdup(c->fs_config) : NULL;
- if (s == NULL) {
- if (pass == 0)
- D("no fsconfig");
- return;
- }
- for (next = s; (cur = strsep(&next, ","));) {
- char *p = NULL;
- int w, w_h, w_steps, wi;
- int len, len_h, l_steps, li;
- int flows;
-
- w = getnum(strsep(&cur, ":"), &p, "weight");
- if (w <= 0)
- w = 1;
- w_h = p ? getnum(p+1, &p, "weight_max") : w;
- w_steps = p ? getnum(p+1, &p, "w_steps") : (w_h == w ?1:2);
- len = getnum(strsep(&cur, ":"), &p, "len");
- if (len <= 0)
- len = 1000;
- len_h = p ? getnum(p+1, &p, "len_max") : len;
- l_steps = p ? getnum(p+1, &p, "l_steps") : (len_h == len ? 1 : 2);
- flows = getnum(strsep(&cur, ":"), NULL, "flows");
- if (flows == 0)
- flows = 1;
- DX(4, "weight %d..%d (%d) len %d..%d (%d) flows %d",
- w, w_h, w_steps, len, len_h, l_steps, flows);
- if (w == 0 || w_h < w || len == 0 || len_h < len ||
- flows == 0) {
- DX(4,"wrong parameters %s", fs);
- return;
- }
- n_flows += flows * w_steps * l_steps;
- for (i = 0; i < w_steps; i++) {
- wi = w + ((w_h - w)* i)/(w_steps == 1 ? 1 : (w_steps-1));
- for (j = 0; j < l_steps; j++, n_fs++) {
- struct dn_fs *fs = &c->fs[n_fs].fs; // tentative
- int x;
-
- li = len + ((len_h - len)* j)/(l_steps == 1 ? 1 : (l_steps-1));
- x = (wi*2048)/li;
- DX(3, "----- fs %4d weight %4d lmax %4d X %4d flows %d",
- n_fs, wi, li, x, flows);
- if (pass == 0)
- continue;
- if (c->fs == NULL || c->flowsets <= n_fs) {
- D("error in number of flowsets");
- return;
- }
- wsum += wi * flows;
- fs->par[0] = wi;
- fs->par[1] = li;
- fs->index = n_fs;
- fs->n_flows = flows;
- fs->cur = fs->first_flow = prev==NULL ? 0 : prev->next_flow;
- fs->next_flow = fs->first_flow + fs->n_flows;
- fs->y = x * flows;
- fs->base_y = (prev == NULL) ? 0 : prev->next_y;
- fs->next_y = fs->base_y + fs->y;
- prev = fs;
- }
- }
- }
- c->max_y = prev ? prev->base_y + prev->y : 0;
- c->flows = n_flows;
- c->flowsets = n_fs;
- c->wsum = wsum;
- if (pass == 0)
- return;
-
- /* now link all flows to their parent flowsets */
- DX(1,"%d flows on %d flowsets max_y %d", c->flows, c->flowsets, c->max_y);
- for (i=0; i < c->flowsets; i++) {
- struct dn_fs *fs = &c->fs[i].fs;
- DX(1, "fs %3d w %5d l %4d flow %5d .. %5d y %6d .. %6d",
- i, fs->par[0], fs->par[1],
- fs->first_flow, fs->next_flow,
- fs->base_y, fs->next_y);
- for (j = fs->first_flow; j < fs->next_flow; j++) {
- struct dn_queue *q = FI2Q(c, j);
- q->fs = &c->fs[i];
- }
- }
-}
-
-static int
-init(struct cfg_s *c)
-{
- int i;
- int ac = c->ac;
- char * const *av = c->av;
-
- c->si_len = sizeof(struct dn_sch_inst);
- c->q_len = sizeof(struct dn_queue);
- moduledata_t *mod = NULL;
- struct dn_alg *p = NULL;
-
- c->th_min = 0;
- c->th_max = -20;/* 20 packets per flow */
- c->lmin = c->lmax = 1280; /* packet len */
- c->flows = 1;
- c->flowsets = 1;
- c->name = "null";
- ac--; av++;
- while (ac > 1) {
- if (!strcmp(*av, "-n")) {
- c->loops = getnum(av[1], NULL, av[0]);
- } else if (!strcmp(*av, "-d")) {
- debug = atoi(av[1]);
- } else if (!strcmp(*av, "-alg")) {
- extern moduledata_t *_g_dn_fifo;
- extern moduledata_t *_g_dn_wf2qp;
- extern moduledata_t *_g_dn_rr;
- extern moduledata_t *_g_dn_qfq;
-#ifdef WITH_KPS
- extern moduledata_t *_g_dn_kps;
-#endif
- if (!strcmp(av[1], "rr"))
- mod = _g_dn_rr;
- else if (!strcmp(av[1], "wf2qp"))
- mod = _g_dn_wf2qp;
- else if (!strcmp(av[1], "fifo"))
- mod = _g_dn_fifo;
- else if (!strcmp(av[1], "qfq"))
- mod = _g_dn_qfq;
-#ifdef WITH_KPS
- else if (!strcmp(av[1], "kps"))
- mod = _g_dn_kps;
-#endif
- else
- mod = NULL;
- c->name = mod ? mod->name : "NULL";
- DX(3, "using scheduler %s", c->name);
- } else if (!strcmp(*av, "-len")) {
- c->lmin = getnum(av[1], NULL, av[0]);
- c->lmax = c->lmin;
- DX(3, "setting max to %d", c->th_max);
- } else if (!strcmp(*av, "-burst")) {
- c->maxburst = getnum(av[1], NULL, av[0]);
- DX(3, "setting max to %d", c->th_max);
- } else if (!strcmp(*av, "-qmax")) {
- c->th_max = getnum(av[1], NULL, av[0]);
- DX(3, "setting max to %d", c->th_max);
- } else if (!strcmp(*av, "-qmin")) {
- c->th_min = getnum(av[1], NULL, av[0]);
- DX(3, "setting min to %d", c->th_min);
- } else if (!strcmp(*av, "-flows")) {
- c->flows = getnum(av[1], NULL, av[0]);
- DX(3, "setting flows to %d", c->flows);
- } else if (!strcmp(*av, "-flowsets")) {
- parse_flowsets(c, av[1], 0);
- DX(3, "setting flowsets to %d", c->flowsets);
- } else {
- D("option %s not recognised, ignore", *av);
- }
- ac -= 2; av += 2;
- }
- if (c->maxburst <= 0)
- c->maxburst = 1;
- if (c->loops <= 0)
- c->loops = 1;
- if (c->flows <= 0)
- c->flows = 1;
- if (c->flowsets <= 0)
- c->flowsets = 1;
- if (c->lmin <= 0)
- c->lmin = 1;
- if (c->lmax <= 0)
- c->lmax = 1;
- /* multiply by N */
- if (c->th_min < 0)
- c->th_min = c->flows * -c->th_min;
- if (c->th_max < 0)
- c->th_max = c->flows * -c->th_max;
- if (c->th_max <= c->th_min)
- c->th_max = c->th_min + 1;
- if (mod) {
- p = mod->p;
- DX(3, "using module %s f %p p %p", mod->name, mod->f, mod->p);
- DX(3, "modname %s ty %d", p->name, p->type);
- c->enq = p->enqueue;
- c->deq = p->dequeue;
- c->si_len += p->si_datalen;
- c->q_len += p->q_datalen;
- c->schk_len += p->schk_datalen;
- }
- /* allocate queues, flowsets and one scheduler */
- c->q = calloc(c->flows, c->q_len);
- c->fs = calloc(c->flowsets, sizeof(struct dn_fsk));
- c->si = calloc(1, c->si_len);
- c->sched = calloc(c->flows, c->schk_len);
- if (c->q == NULL || c->fs == NULL) {
- D("error allocating memory for flows");
- exit(1);
- }
- c->si->sched = c->sched;
- if (p) {
- if (p->config)
- p->config(c->sched);
- if (p->new_sched)
- p->new_sched(c->si);
- }
- /* parse_flowsets links queues to their flowsets */
- parse_flowsets(c, av[1], 1);
- /* complete the work calling new_fsk */
- for (i = 0; i < c->flowsets; i++) {
- if (c->fs[i].fs.par[1] == 0)
- c->fs[i].fs.par[1] = 1000; /* default pkt len */
- c->fs[i].sched = c->sched;
- if (p && p->new_fsk)
- p->new_fsk(&c->fs[i]);
- }
-
- /* initialize the lists for the generator, and put
- * all flows in the list for backlog = 0
- */
- for (i=0; i <= BACKLOG+5; i++)
- INIT_LIST_HEAD(&c->ll[i]);
-
- for (i = 0; i < c->flows; i++) {
- struct dn_queue *q = FI2Q(c, i);
- if (q->fs == NULL)
- q->fs = &c->fs[0]; /* XXX */
- q->_si = c->si;
- if (p && p->new_queue)
- p->new_queue(q);
- INIT_LIST_HEAD(&q->ni.h);
- list_add_tail(&q->ni.h, &c->ll[0]);
- }
- c->llmask = 1;
- return 0;
-}
-
-
-int
-main(int ac, char *av[])
-{
- struct cfg_s c;
- struct timeval end;
- double ll;
- int i;
- char msg[40];
-
- bzero(&c, sizeof(c));
- c.ac = ac;
- c.av = av;
- init(&c);
- gettimeofday(&c.time, NULL);
- mainloop(&c);
- gettimeofday(&end, NULL);
- end.tv_sec -= c.time.tv_sec;
- end.tv_usec -= c.time.tv_usec;
- if (end.tv_usec < 0) {
- end.tv_usec += 1000000;
- end.tv_sec--;
- }
- c.time = end;
- ll = end.tv_sec*1000000 + end.tv_usec;
- ll *= 1000; /* convert to nanoseconds */
- ll /= c._enqueue;
- sprintf(msg, "1::%d", c.flows);
- D("%-8s n %d %d time %d.%06d %8.3f qlen %d %d flows %s drops %d",
- c.name, c._enqueue, c.loops,
- (int)c.time.tv_sec, (int)c.time.tv_usec, ll,
- c.th_min, c.th_max,
- c.fs_config ? c.fs_config : msg, c.drop);
- dump(&c);
- DX(1, "done ac %d av %p", ac, av);
- for (i=0; i < ac; i++)
- DX(1, "arg %d %s", i, av[i]);
- return 0;
-}
-
-/*
- * The controller decides whether in this iteration we should send
- * (the packet is in c->tosend) and/or receive (flag c->can_dequeue)
- */
-static void
-controller(struct cfg_s *c)
-{
- struct mbuf *m;
- struct dn_fs *fs;
- int flow_id;
-
- /* histeresis between max and min */
- if (c->state == 0 && c->pending >= c->th_max)
- c->state = 1;
- else if (c->state == 1 && c->pending <= c->th_min)
- c->state = 0;
- ND(1, "state %d pending %2d", c->state, c->pending);
- c->can_dequeue = c->state;
- c->tosend = NULL;
- if (c->state)
- return;
-
- if (1) {
- int i;
- struct dn_queue *q;
- struct list_head *h;
-
- i = ffs(c->llmask) - 1;
- if (i < 0) {
- DX(2, "no candidate");
- c->can_dequeue = 1;
- return;
- }
- h = &c->ll[i];
- ND(1, "backlog %d p %p prev %p next %p", i, h, h->prev, h->next);
- q = list_first_entry(h, struct dn_queue, ni.h);
- list_del(&q->ni.h);
- flow_id = Q2FI(c, q);
- DX(2, "extracted flow %p %d backlog %d", q, flow_id, i);
- if (list_empty(h)) {
- ND(2, "backlog %d empty", i);
- c->llmask &= ~(1<<i);
- }
- ND(1, "before %d p %p prev %p next %p", i+1, h+1, h[1].prev, h[1].next);
- list_add_tail(&q->ni.h, h+1);
- ND(1, " after %d p %p prev %p next %p", i+1, h+1, h[1].prev, h[1].next);
- if (i < BACKLOG) {
- ND(2, "backlog %d full", i+1);
- c->llmask |= 1<<(1+i);
- }
- fs = &q->fs->fs;
- c->cur_fs = q->fs - c->fs;
- fs->cur = flow_id;
- } else {
- /* XXX this does not work ? */
- /* now decide whom to send the packet, and the length */
- /* lookup in the flow table */
- if (c->cur_y >= c->max_y) { /* handle wraparound */
- c->cur_y = 0;
- c->cur_fs = 0;
- }
- fs = &c->fs[c->cur_fs].fs;
- flow_id = fs->cur++;
- if (fs->cur >= fs->next_flow)
- fs->cur = fs->first_flow;
- c->cur_y++;
- if (c->cur_y >= fs->next_y)
- c->cur_fs++;
- }
-
- /* construct a packet */
- if (c->freelist) {
- m = c->tosend = c->freelist;
- c->freelist = c->freelist->m_nextpkt;
- } else {
- m = c->tosend = calloc(1, sizeof(struct mbuf));
- }
- if (m == NULL)
- return;
-
- m->cfg = c;
- m->m_nextpkt = NULL;
- m->m_pkthdr.len = fs->par[1]; // XXX maxlen
- m->flow_id = flow_id;
-
- ND(2,"y %6d flow %5d fs %3d weight %4d len %4d",
- c->cur_y, m->flow_id, c->cur_fs,
- fs->par[0], m->m_pkthdr.len);
-
-}
-
-/*
-Packet allocation:
-to achieve a distribution that matches weights, for each X=w/lmax class
-we should generate a number of packets proportional to Y = X times the number
-of flows in the class.
-So we construct an array with the cumulative distribution of Y's,
-and use it to identify the flow via inverse mapping (if the Y's are
-not too many we can use an array for the lookup). In practice,
-each flow will have X entries [virtually] pointing to it.
-
-*/
diff --git a/sys/netinet/ipfw/test/mylist.h b/sys/netinet/ipfw/test/mylist.h
deleted file mode 100644
index 6247f32..0000000
--- a/sys/netinet/ipfw/test/mylist.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * $FreeBSD$
- *
- * linux-like bidirectional lists
- */
-
-#ifndef _MYLIST_H
-#define _MYLIST_H
-struct list_head {
- struct list_head *prev, *next;
-};
-
-#define INIT_LIST_HEAD(l) do { (l)->prev = (l)->next = (l); } while (0)
-#define list_empty(l) ( (l)->next == l )
-static inline void
-__list_add(struct list_head *o, struct list_head *prev,
- struct list_head *next)
-{
- next->prev = o;
- o->next = next;
- o->prev = prev;
- prev->next = o;
-}
-
-static inline void
-list_add_tail(struct list_head *o, struct list_head *head)
-{
- __list_add(o, head->prev, head);
-}
-
-#define list_first_entry(pL, ty, member) \
- (ty *)((char *)((pL)->next) - offsetof(ty, member))
-
-static inline void
-__list_del(struct list_head *prev, struct list_head *next)
-{
- next->prev = prev;
- prev->next = next;
-}
-
-static inline void
-list_del(struct list_head *entry)
-{
- ND("called on %p", entry);
- __list_del(entry->prev, entry->next);
- entry->next = entry->prev = NULL;
-}
-
-#endif /* _MYLIST_H */
diff --git a/sys/netinet/ipfw/test/test_dn_heap.c b/sys/netinet/ipfw/test/test_dn_heap.c
deleted file mode 100644
index d460cf2..0000000
--- a/sys/netinet/ipfw/test/test_dn_heap.c
+++ /dev/null
@@ -1,162 +0,0 @@
-/*-
- * Copyright (c) 1998-2002,2010 Luigi Rizzo, Universita` di Pisa
- * All rights reserved
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-/*
- * Userland code for testing binary heaps and hash tables
- *
- * $FreeBSD$
- */
-
-#include <sys/cdefs.h>
-#include <sys/param.h>
-
-#include <stdio.h>
-#include <strings.h>
-#include <stdlib.h>
-
-#include "dn_heap.h"
-#define log(x, arg...) fprintf(stderr, ## arg)
-#define panic(x...) fprintf(stderr, ## x), exit(1)
-
-#include <string.h>
-
-struct x {
- struct x *ht_link;
- char buf[0];
-};
-
-uint32_t hf(uintptr_t key, int flags, void *arg)
-{
- return (flags & DNHT_KEY_IS_OBJ) ?
- ((struct x *)key)->buf[0] : *(char *)key;
-}
-
-int matchf(void *obj, uintptr_t key, int flags, void *arg)
-{
- char *s = (flags & DNHT_KEY_IS_OBJ) ?
- ((struct x *)key)->buf : (char *)key;
- return (strcmp(((struct x *)obj)->buf, s) == 0);
-}
-
-void *newfn(uintptr_t key, int flags, void *arg)
-{
- char *s = (char *)key;
- struct x *p = malloc(sizeof(*p) + 1 + strlen(s));
- if (p)
- strcpy(p->buf, s);
- return p;
-}
-
-char *strings[] = {
- "undici", "unico", "doppio", "devoto",
- "uno", "due", "tre", "quattro", "cinque", "sei",
- "uno", "due", "tre", "quattro", "cinque", "sei",
- NULL,
-};
-
-int doprint(void *_x, void *arg)
-{
- struct x *x = _x;
- printf("found element <%s>\n", x->buf);
- return (int)arg;
-}
-
-static void
-test_hash()
-{
- char **p;
- struct dn_ht *h;
- uintptr_t x = 0;
- uintptr_t x1 = 0;
-
- /* first, find and allocate */
- h = dn_ht_init(NULL, 10, 0, hf, matchf, newfn);
-
- for (p = strings; *p; p++) {
- dn_ht_find(h, (uintptr_t)*p, DNHT_INSERT, NULL);
- }
- dn_ht_scan(h, doprint, 0);
- printf("/* second -- find without allocate */\n");
- h = dn_ht_init(NULL, 10, 0, hf, matchf, NULL);
- for (p = strings; *p; p++) {
- void **y = newfn((uintptr_t)*p, 0, NULL);
- if (x == 0)
- x = (uintptr_t)y;
- else {
- if (x1 == 0)
- x1 = (uintptr_t)*p;
- }
- dn_ht_find(h, (uintptr_t)y, DNHT_INSERT | DNHT_KEY_IS_OBJ, NULL);
- }
- dn_ht_scan(h, doprint, 0);
- printf("remove %p gives %p\n", (void *)x,
- dn_ht_find(h, x, DNHT_KEY_IS_OBJ | DNHT_REMOVE, NULL));
- printf("remove %p gives %p\n", (void *)x,
- dn_ht_find(h, x, DNHT_KEY_IS_OBJ | DNHT_REMOVE, NULL));
- printf("remove %p gives %p\n", (void *)x,
- dn_ht_find(h, x1, DNHT_REMOVE, NULL));
- printf("remove %p gives %p\n", (void *)x,
- dn_ht_find(h, x1, DNHT_REMOVE, NULL));
- dn_ht_scan(h, doprint, 0);
-}
-
-int
-main(int argc, char *argv[])
-{
- struct dn_heap h;
- int i, n, n2, n3;
-
- test_hash();
- return 0;
-
- /* n = elements, n2 = cycles */
- n = (argc > 1) ? atoi(argv[1]) : 0;
- if (n <= 0 || n > 1000000)
- n = 100;
- n2 = (argc > 2) ? atoi(argv[2]) : 0;
- if (n2 <= 0)
- n = 1000000;
- n3 = (argc > 3) ? atoi(argv[3]) : 0;
- bzero(&h, sizeof(h));
- heap_init(&h, n, -1);
- while (n2-- > 0) {
- uint64_t prevk = 0;
- for (i=0; i < n; i++)
- heap_insert(&h, n3 ? n-i: random(), (void *)(100+i));
-
- for (i=0; h.elements > 0; i++) {
- uint64_t k = h.p[0].key;
- if (k < prevk)
- panic("wrong sequence\n");
- prevk = k;
- if (0)
- printf("%d key %llu, val %p\n",
- i, h.p[0].key, h.p[0].object);
- heap_extract(&h, NULL);
- }
- }
- return 0;
-}
diff --git a/sys/netinet/ipfw/test/test_dn_sched.c b/sys/netinet/ipfw/test/test_dn_sched.c
deleted file mode 100644
index ee46c95..0000000
--- a/sys/netinet/ipfw/test/test_dn_sched.c
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * $FreeBSD$
- *
- * library functions for userland testing of dummynet schedulers
- */
-
-#include "dn_test.h"
-
-void
-m_freem(struct mbuf *m)
-{
- printf("free %p\n", m);
-}
-
-int
-dn_sched_modevent(module_t mod, int cmd, void *arg)
-{
- return 0;
-}
-
-void
-dn_free_pkts(struct mbuf *m)
-{
- struct mbuf *x;
- while ( (x = m) ) {
- m = m->m_nextpkt;
- m_freem(x);
- }
-}
-
-int
-dn_delete_queue(void *_q, void *do_free)
-{
- struct dn_queue *q = _q;
- if (q->mq.head)
- dn_free_pkts(q->mq.head);
- free(q);
- return 0;
-}
-
-/*
- * This is a simplified function for testing purposes, which does
- * not implement statistics or random loss.
- * Enqueue a packet in q, subject to space and queue management policy
- * (whose parameters are in q->fs).
- * Update stats for the queue and the scheduler.
- * Return 0 on success, 1 on drop. The packet is consumed anyways.
- */
-int
-dn_enqueue(struct dn_queue *q, struct mbuf* m, int drop)
-{
- if (drop)
- goto drop;
- if (q->ni.length >= 200)
- goto drop;
- mq_append(&q->mq, m);
- q->ni.length++;
- q->ni.tot_bytes += m->m_pkthdr.len;
- return 0;
-
-drop:
- q->ni.drops++;
- return 1;
-}
-
-int
-ipdn_bound_var(int *v, int dflt, int lo, int hi, const char *msg)
-{
- if (*v < lo) {
- *v = dflt;
- } else if (*v > hi) {
- *v = hi;
- }
- return *v;
-}
-
-#ifndef __FreeBSD__
-int
-fls(int mask)
-{
- int bit;
-
- if (mask == 0)
- return (0);
- for (bit = 1; mask != 1; bit++)
- mask = (unsigned int)mask >> 1;
- return (bit);
-}
-#endif
OpenPOWER on IntegriCloud