diff options
256 files changed, 15288 insertions, 4936 deletions
diff --git a/Makefile.inc1 b/Makefile.inc1 index 9f96c7a..5e6d687 100644 --- a/Makefile.inc1 +++ b/Makefile.inc1 @@ -1375,6 +1375,7 @@ _texinfo= gnu/usr.bin/texinfo/libtxi \ ${_bt}-gnu/usr.bin/texinfo/install-info: ${_bt}-gnu/usr.bin/texinfo/libtxi ${_bt}-gnu/usr.bin/texinfo/makeinfo: ${_bt}-gnu/usr.bin/texinfo/libtxi .endif + bootstrap-tools: .PHONY # Please document (add comment) why something is in 'bootstrap-tools'. @@ -1565,7 +1566,6 @@ native-xtools: .PHONY bin/realpath \ bin/rm \ bin/rmdir \ - bin/sh \ bin/sleep \ ${_clang_tblgen} \ usr.bin/ar \ @@ -1583,7 +1583,6 @@ native-xtools: .PHONY usr.bin/bzip2 \ usr.bin/cmp \ usr.bin/dirname \ - usr.bin/env \ usr.bin/fetch \ usr.bin/find \ usr.bin/grep \ @@ -1595,7 +1594,6 @@ native-xtools: .PHONY usr.bin/mt \ usr.bin/patch \ usr.bin/sed \ - usr.bin/sort \ usr.bin/tar \ usr.bin/touch \ usr.bin/tr \ diff --git a/ObsoleteFiles.inc b/ObsoleteFiles.inc index 52e9733..ef2f2f5 100644 --- a/ObsoleteFiles.inc +++ b/ObsoleteFiles.inc @@ -89,6 +89,8 @@ OLD_FILES+=usr/include/netinet6/in6_gif.h OLD_FILES+=usr/tests/sbin/mdconfig/legacy_test OLD_FILES+=usr/tests/sbin/mdconfig/mdconfig.test OLD_FILES+=usr/tests/sbin/mdconfig/run.pl +# 20141126: remove xform_ipip decapsulation fallback +OLD_FILES+=usr/include/netipsec/ipip_var.h # 20141107: overhaul if_gre(4) OLD_FILES+=usr/include/netinet/ip_gre.h # 20141028: debug files accidentally installed as directory name diff --git a/contrib/tcpdump/interface.h b/contrib/tcpdump/interface.h index 175c33e..0c9672b 100644 --- a/contrib/tcpdump/interface.h +++ b/contrib/tcpdump/interface.h @@ -187,6 +187,7 @@ extern void egp_print(const u_char *, u_int); extern u_int enc_if_print(const struct pcap_pkthdr *, const u_char *); extern u_int pflog_if_print(const struct pcap_pkthdr *, const u_char *); extern void pfsync_ip_print(const u_char *, u_int); +extern u_int pfsync_if_print(const struct pcap_pkthdr *, const u_char *); extern u_int arcnet_if_print(const struct pcap_pkthdr *, const u_char *); extern u_int arcnet_linux_if_print(const struct pcap_pkthdr *, const u_char *); extern u_int token_print(const u_char *, u_int, u_int); diff --git a/contrib/tcpdump/print-pfsync.c b/contrib/tcpdump/print-pfsync.c index 7a6da89..fd4dbe9 100644 --- a/contrib/tcpdump/print-pfsync.c +++ b/contrib/tcpdump/print-pfsync.c @@ -53,9 +53,8 @@ static void print_src_dst(const struct pfsync_state_peer *, const struct pfsync_state_peer *, uint8_t); static void print_state(struct pfsync_state *); -#ifdef notyet -void -pfsync_if_print(u_char *user, const struct pcap_pkthdr *h, +u_int +pfsync_if_print(const struct pcap_pkthdr *h, register const u_char *p) { u_int caplen = h->caplen; @@ -75,8 +74,8 @@ out: default_print((const u_char *)p, caplen); } putchar('\n'); + return caplen; } -#endif /* notyet */ void pfsync_ip_print(const u_char *bp, u_int len) diff --git a/contrib/tcpdump/tcpdump.c b/contrib/tcpdump/tcpdump.c index fc10c98..3b3e8f8 100644 --- a/contrib/tcpdump/tcpdump.c +++ b/contrib/tcpdump/tcpdump.c @@ -224,6 +224,9 @@ static struct printer printers[] = { #if defined(DLT_PFLOG) && defined(HAVE_NET_PFVAR_H) { pflog_if_print, DLT_PFLOG }, #endif +#if defined(DLT_PFSYNC) && defined(HAVE_NET_PFVAR_H) + { pfsync_if_print, DLT_PFSYNC}, +#endif #ifdef DLT_FR { fr_if_print, DLT_FR }, #endif diff --git a/contrib/wpa/src/l2_packet/l2_packet_freebsd.c b/contrib/wpa/src/l2_packet/l2_packet_freebsd.c index 2e9a04c..1dc0b8f 100644 --- a/contrib/wpa/src/l2_packet/l2_packet_freebsd.c +++ b/contrib/wpa/src/l2_packet/l2_packet_freebsd.c @@ -208,6 +208,7 @@ static int eth_get(const char *device, u8 ea[ETH_ALEN]) (ifm->ifm_addrs & RTA_IFP) == 0) continue; if (sdl->sdl_family != AF_LINK || sdl->sdl_nlen == 0 || + (strlen(device) != sdl->sdl_nlen) || os_memcmp(sdl->sdl_data, device, sdl->sdl_nlen) != 0) continue; os_memcpy(ea, LLADDR(sdl), sdl->sdl_alen); diff --git a/etc/gettytab b/etc/gettytab index 8b9463e..649241c 100644 --- a/etc/gettytab +++ b/etc/gettytab @@ -166,6 +166,9 @@ X|Xwindow|X window system:\ P|Pc|Pc console:\ :ht:np:sp#9600: +al.Pc:\ + :ht:np:sp#9600:al=root: + # # Weirdo special case for fast crt's with hardcopy devices # diff --git a/etc/mtree/BSD.tests.dist b/etc/mtree/BSD.tests.dist index 3e470d5..38fc83f 100644 --- a/etc/mtree/BSD.tests.dist +++ b/etc/mtree/BSD.tests.dist @@ -224,6 +224,8 @@ .. netinet .. + opencrypto + .. pjdfstest chflags .. diff --git a/etc/pam.d/passwd b/etc/pam.d/passwd index e655083..fb768cf 100644 --- a/etc/pam.d/passwd +++ b/etc/pam.d/passwd @@ -8,4 +8,4 @@ # password #password requisite pam_passwdqc.so enforce=users -password required pam_unix.so no_warn try_first_pass nullok +password required pam_unix.so no_warn try_first_pass diff --git a/etc/pam.d/system b/etc/pam.d/system index b8b7101..faf006b 100644 --- a/etc/pam.d/system +++ b/etc/pam.d/system @@ -9,7 +9,7 @@ auth sufficient pam_opie.so no_warn no_fake_prompts auth requisite pam_opieaccess.so no_warn allow_local #auth sufficient pam_krb5.so no_warn try_first_pass #auth sufficient pam_ssh.so no_warn try_first_pass -auth required pam_unix.so no_warn try_first_pass nullok +auth required pam_unix.so no_warn try_first_pass # account #account required pam_krb5.so diff --git a/etc/pkg/FreeBSD.conf b/etc/pkg/FreeBSD.conf index e4eec8f..68e1d32 100644 --- a/etc/pkg/FreeBSD.conf +++ b/etc/pkg/FreeBSD.conf @@ -8,7 +8,7 @@ # FreeBSD: { - url: "pkg+http://pkg.FreeBSD.org/${ABI}/latest", + url: "pkg+http://pkg.FreeBSD.org/${ABI}/quarterly", mirror_type: "srv", signature_type: "fingerprints", fingerprints: "/usr/share/keys/pkg", diff --git a/include/strings.h b/include/strings.h index 4fb9311..2210df0 100644 --- a/include/strings.h +++ b/include/strings.h @@ -43,6 +43,9 @@ int bcmp(const void *, const void *, size_t) __pure; /* LEGACY */ void bcopy(const void *, void *, size_t); /* LEGACY */ void bzero(void *, size_t); /* LEGACY */ #endif +#if __BSD_VISIBLE +void explicit_bzero(void *, size_t); +#endif #if __XSI_VISIBLE int ffs(int) __pure2; #endif diff --git a/lib/libc/db/hash/hash.c b/lib/libc/db/hash/hash.c index af80929..333b1a1 100644 --- a/lib/libc/db/hash/hash.c +++ b/lib/libc/db/hash/hash.c @@ -422,8 +422,10 @@ hdestroy(HTAB *hashp) if (hashp->tmp_buf) free(hashp->tmp_buf); - if (hashp->fp != -1) + if (hashp->fp != -1) { + (void)_fsync(hashp->fp); (void)_close(hashp->fp); + } free(hashp); @@ -458,6 +460,8 @@ hash_sync(const DB *dbp, u_int32_t flags) return (0); if (__buf_free(hashp, 0, 1) || flush_meta(hashp)) return (ERROR); + if (hashp->fp != -1 && _fsync(hashp->fp) != 0) + return (ERROR); hashp->new_file = 0; return (0); } diff --git a/lib/libc/string/Makefile.inc b/lib/libc/string/Makefile.inc index cc25c25..4efe764 100644 --- a/lib/libc/string/Makefile.inc +++ b/lib/libc/string/Makefile.inc @@ -6,7 +6,8 @@ CFLAGS+= -I${.CURDIR}/locale # machine-independent string sources -MISRCS+=bcmp.c bcopy.c bzero.c ffs.c ffsl.c ffsll.c fls.c flsl.c flsll.c \ +MISRCS+=bcmp.c bcopy.c bzero.c explicit_bzero.c \ + ffs.c ffsl.c ffsll.c fls.c flsl.c flsll.c \ memccpy.c memchr.c memrchr.c memcmp.c \ memcpy.c memmem.c memmove.c memset.c \ stpcpy.c stpncpy.c strcasecmp.c \ diff --git a/lib/libc/string/Symbol.map b/lib/libc/string/Symbol.map index 8e80165..c876e76 100644 --- a/lib/libc/string/Symbol.map +++ b/lib/libc/string/Symbol.map @@ -92,6 +92,7 @@ FBSD_1.1 { }; FBSD_1.3 { + explicit_bzero; strcasecmp_l; strcasestr_l; strchrnul; diff --git a/lib/libc/string/bzero.3 b/lib/libc/string/bzero.3 index 029644a..ea572be 100644 --- a/lib/libc/string/bzero.3 +++ b/lib/libc/string/bzero.3 @@ -35,7 +35,8 @@ .Dt BZERO 3 .Os .Sh NAME -.Nm bzero +.Nm bzero , +.Nm explicit_bzero .Nd write zeroes to a byte string .Sh LIBRARY .Lb libc @@ -43,6 +44,8 @@ .In strings.h .Ft void .Fn bzero "void *b" "size_t len" +.Ft void +.Fn explicit_bzero "void *b" "size_t len" .Sh DESCRIPTION The .Fn bzero @@ -56,6 +59,12 @@ If is zero, .Fn bzero does nothing. +.Pp +The +.Fn explicit_bzero +variant behaves the same, but will not be removed by a compiler's dead store +optimization pass, making it useful for clearing sensitive memory such as a +password. .Sh SEE ALSO .Xr memset 3 , .Xr swab 3 @@ -72,3 +81,10 @@ before it was moved to for .St -p1003.1-2001 compliance. +.Pp +The +.Fn explicit_bzero +function first appeared in +.Ox 5.5 +and +.Fx 11.0 . diff --git a/lib/libc/string/explicit_bzero.c b/lib/libc/string/explicit_bzero.c new file mode 100644 index 0000000..a7811b0 --- /dev/null +++ b/lib/libc/string/explicit_bzero.c @@ -0,0 +1,22 @@ +/* $OpenBSD: explicit_bzero.c,v 1.3 2014/06/21 02:34:26 matthew Exp $ */ +/* + * Public domain. + * Written by Matthew Dempsky. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <string.h> + +__attribute__((weak)) void +__explicit_bzero_hook(void *buf, size_t len) +{ +} + +void +explicit_bzero(void *buf, size_t len) +{ + memset(buf, 0, len); + __explicit_bzero_hook(buf, len); +} diff --git a/lib/libcrypt/Makefile b/lib/libcrypt/Makefile index 798b4b6..aca24d8 100644 --- a/lib/libcrypt/Makefile +++ b/lib/libcrypt/Makefile @@ -9,7 +9,7 @@ SHLIBDIR?= /lib SHLIB_MAJOR= 5 LIB= crypt -.PATH: ${.CURDIR}/../libmd +.PATH: ${.CURDIR}/../libmd ${.CURDIR}/../../sys/crypto/sha2 SRCS= crypt.c misc.c \ crypt-md5.c md5c.c \ crypt-nthash.c md4c.c \ diff --git a/lib/libipsec/pfkey_dump.c b/lib/libipsec/pfkey_dump.c index 068ad0e..872aa43 100644 --- a/lib/libipsec/pfkey_dump.c +++ b/lib/libipsec/pfkey_dump.c @@ -187,6 +187,9 @@ static struct val2str str_alg_enc[] = { #ifdef SADB_X_EALG_AESCTR { SADB_X_EALG_AESCTR, "aes-ctr", }, #endif +#ifdef SADB_X_EALG_AESGCM16 + { SADB_X_EALG_AESGCM16, "aes-gcm-16", }, +#endif #ifdef SADB_X_EALG_CAMELLIACBC { SADB_X_EALG_CAMELLIACBC, "camellia-cbc", }, #endif diff --git a/lib/libmd/Makefile b/lib/libmd/Makefile index d0fe705..c1f6eb9 100644 --- a/lib/libmd/Makefile +++ b/lib/libmd/Makefile @@ -42,8 +42,15 @@ CLEANFILES+= md[245]hl.c md[245].ref md[245].3 mddriver \ sha0.ref sha0hl.c sha1.ref sha1hl.c shadriver \ sha256.ref sha256hl.c sha512.ref sha512hl.c -CFLAGS+= -I${.CURDIR} -.PATH: ${.CURDIR}/${MACHINE_ARCH} +# Define WEAK_REFS to provide weak aliases for libmd symbols +# +# Note that the same sources are also used internally by libcrypt, +# in which case: +# * macros are used to rename symbols to libcrypt internal names +# * no weak aliases are generated +CFLAGS+= -I${.CURDIR} -I${.CURDIR}/../../sys/crypto/sha2 +CFLAGS+= -DWEAK_REFS +.PATH: ${.CURDIR}/${MACHINE_ARCH} ${.CURDIR}/../../sys/crypto/sha2 .if exists(${MACHINE_ARCH}/sha.S) SRCS+= sha.S diff --git a/lib/libmd/md4.h b/lib/libmd/md4.h index 4773513..086a27b 100644 --- a/lib/libmd/md4.h +++ b/lib/libmd/md4.h @@ -35,6 +35,34 @@ typedef struct MD4Context { #include <sys/cdefs.h> __BEGIN_DECLS + +/* Ensure libmd symbols do not clash with libcrypto */ + +#ifndef MD4Init +#define MD4Init _libmd_MD4Init +#endif +#ifndef MD4Update +#define MD4Update _libmd_MD4Update +#endif +#ifndef MD4Pad +#define MD4Pad _libmd_MD4Pad +#endif +#ifndef MD4Final +#define MD4Final _libmd_MD4Final +#endif +#ifndef MD4End +#define MD4End _libmd_MD4End +#endif +#ifndef MD4File +#define MD4File _libmd_MD4File +#endif +#ifndef MD4FileChunk +#define MD4FileChunk _libmd_MD4FileChunk +#endif +#ifndef MD4Data +#define MD4Data _libmd_MD4Data +#endif + void MD4Init(MD4_CTX *); void MD4Update(MD4_CTX *, const void *, unsigned int); void MD4Pad(MD4_CTX *); diff --git a/lib/libmd/md4c.c b/lib/libmd/md4c.c index 1211a98..de1a7b4a 100644 --- a/lib/libmd/md4c.c +++ b/lib/libmd/md4c.c @@ -290,3 +290,18 @@ unsigned int len; output[i] = ((UINT4)input[j]) | (((UINT4)input[j+1]) << 8) | (((UINT4)input[j+2]) << 16) | (((UINT4)input[j+3]) << 24); } + +#ifdef WEAK_REFS +/* When building libmd, provide weak references. Note: this is not + activated in the context of compiling these sources for internal + use in libcrypt. + */ +#undef MD4Init +__weak_reference(_libmd_MD4Init, MD4Init); +#undef MD4Update +__weak_reference(_libmd_MD4Update, MD4Update); +#undef MD4Pad +__weak_reference(_libmd_MD4Pad, MD4Pad); +#undef MD4Final +__weak_reference(_libmd_MD4Final, MD4Final); +#endif diff --git a/lib/libmd/md5.h b/lib/libmd/md5.h index 803a88f..5ab3b14 100644 --- a/lib/libmd/md5.h +++ b/lib/libmd/md5.h @@ -1,4 +1,41 @@ +/* $FreeBSD$ */ + #ifndef _MD5_H_ #define _MD5_H_ + +#ifndef _KERNEL + +/* Ensure libmd symbols do not clash with libcrypto */ + +#ifndef MD5Init +#define MD5Init _libmd_MD5Init +#endif +#ifndef MD5Update +#define MD5Update _libmd_MD5Update +#endif +#ifndef MD5Pad +#define MD5Pad _libmd_MD5Pad +#endif +#ifndef MD5Final +#define MD5Final _libmd_MD5Final +#endif +#ifndef MD5Transform +#define MD5Transform _libmd_MD5Transform +#endif +#ifndef MD5End +#define MD5End _libmd_MD5End +#endif +#ifndef MD5File +#define MD5File _libmd_MD5File +#endif +#ifndef MD5FileChunk +#define MD5FileChunk _libmd_MD5FileChunk +#endif +#ifndef MD5Data +#define MD5Data _libmd_MD5Data +#endif + +#endif + #include <sys/md5.h> #endif /* _MD5_H_ */ diff --git a/lib/libmd/md5c.c b/lib/libmd/md5c.c index d097390..6b50464 100644 --- a/lib/libmd/md5c.c +++ b/lib/libmd/md5c.c @@ -39,7 +39,7 @@ __FBSDID("$FreeBSD$"); #include <machine/endian.h> #include <sys/endian.h> -#include <sys/md5.h> +#include "md5.h" static void MD5Transform(u_int32_t [4], const unsigned char [64]); @@ -335,3 +335,20 @@ MD5Transform (state, block) /* Zeroize sensitive information. */ memset ((void *)x, 0, sizeof (x)); } + +#ifdef WEAK_REFS +/* When building libmd, provide weak references. Note: this is not + activated in the context of compiling these sources for internal + use in libcrypt. + */ +#undef MD5Init +__weak_reference(_libmd_MD5Init, MD5Init); +#undef MD5Update +__weak_reference(_libmd_MD5Update, MD5Update); +#undef MD5Pad +__weak_reference(_libmd_MD5Pad, MD5Pad); +#undef MD5Final +__weak_reference(_libmd_MD5Final, MD5Final); +#undef MD5Transform +__weak_reference(_libmd_MD5Transform, MD5Transform); +#endif diff --git a/lib/libmd/mdXhl.c b/lib/libmd/mdXhl.c index 2954548..62be235 100644 --- a/lib/libmd/mdXhl.c +++ b/lib/libmd/mdXhl.c @@ -96,3 +96,18 @@ MDXData (const void *data, unsigned int len, char *buf) MDXUpdate(&ctx,data,len); return (MDXEnd(&ctx, buf)); } + +#ifdef WEAK_REFS +/* When building libmd, provide weak references. Note: this is not + activated in the context of compiling these sources for internal + use in libcrypt. + */ +#undef MDXEnd +__weak_reference(_libmd_MDXEnd, MDXEnd); +#undef MDXFile +__weak_reference(_libmd_MDXFile, MDXFile); +#undef MDXFileChunk +__weak_reference(_libmd_MDXFileChunk, MDXFileChunk); +#undef MDXData +__weak_reference(_libmd_MDXData, MDXData); +#endif diff --git a/lib/libmd/ripemd.h b/lib/libmd/ripemd.h index 2ff35cc..778808f 100644 --- a/lib/libmd/ripemd.h +++ b/lib/libmd/ripemd.h @@ -81,6 +81,41 @@ typedef struct RIPEMD160state_st { } RIPEMD160_CTX; __BEGIN_DECLS + +/* Ensure libmd symbols do not clash with libcrypto */ + +#ifndef RIPEMD160_Init +#define RIPEMD160_Init _libmd_RIPEMD160_Init +#endif +#ifndef RIPEMD160_Update +#define RIPEMD160_Update _libmd_RIPEMD160_Update +#endif +#ifndef RIPEMD160_Final +#define RIPEMD160_Final _libmd_RIPEMD160_Final +#endif +#ifndef RIPEMD160_End +#define RIPEMD160_End _libmd_RIPEMD160_End +#endif +#ifndef RIPEMD160_File +#define RIPEMD160_File _libmd_RIPEMD160_File +#endif +#ifndef RIPEMD160_FileChunk +#define RIPEMD160_FileChunk _libmd_RIPEMD160_FileChunk +#endif +#ifndef RIPEMD160_Data +#define RIPEMD160_Data _libmd_RIPEMD160_Data +#endif + +#ifndef RIPEMD160_Transform +#define RIPEMD160_Transform _libmd_RIPEMD160_Transform +#endif +#ifndef RMD160_version +#define RMD160_version _libmd_RMD160_version +#endif +#ifndef ripemd160_block +#define ripemd160_block _libmd_ripemd160_block +#endif + void RIPEMD160_Init(RIPEMD160_CTX *c); void RIPEMD160_Update(RIPEMD160_CTX *c, const void *data, size_t len); diff --git a/lib/libmd/rmd160c.c b/lib/libmd/rmd160c.c index e01f1e0..5a3b783 100644 --- a/lib/libmd/rmd160c.c +++ b/lib/libmd/rmd160c.c @@ -545,3 +545,22 @@ unsigned long *l; } } #endif + +#ifdef WEAK_REFS +/* When building libmd, provide weak references. Note: this is not + activated in the context of compiling these sources for internal + use in libcrypt. + */ +#undef RIPEMD160_Init +__weak_reference(_libmd_RIPEMD160_Init, RIPEMD160_Init); +#undef RIPEMD160_Update +__weak_reference(_libmd_RIPEMD160_Update, RIPEMD160_Update); +#undef RIPEMD160_Final +__weak_reference(_libmd_RIPEMD160_Final, RIPEMD160_Final); +#undef RIPEMD160_Transform +__weak_reference(_libmd_RIPEMD160_Transform, RIPEMD160_Transform); +#undef RMD160_version +__weak_reference(_libmd_RMD160_version, RMD160_version); +#undef ripemd160_block +__weak_reference(_libmd_ripemd160_block, ripemd160_block); +#endif diff --git a/lib/libmd/sha.h b/lib/libmd/sha.h index 8a0b7c2..c608c5b 100644 --- a/lib/libmd/sha.h +++ b/lib/libmd/sha.h @@ -79,6 +79,73 @@ typedef struct SHAstate_st { #define SHA1_CTX SHA_CTX __BEGIN_DECLS + +/* Ensure libmd symbols do not clash with libcrypto */ + +#ifndef SHA_Init +#define SHA_Init _libmd_SHA_Init +#endif +#ifndef SHA_Update +#define SHA_Update _libmd_SHA_Update +#endif +#ifndef SHA_Final +#define SHA_Final _libmd_SHA_Final +#endif +#ifndef SHA_End +#define SHA_End _libmd_SHA_End +#endif +#ifndef SHA_File +#define SHA_File _libmd_SHA_File +#endif +#ifndef SHA_FileChunk +#define SHA_FileChunk _libmd_SHA_FileChunk +#endif +#ifndef SHA_Data +#define SHA_Data _libmd_SHA_Data +#endif + +#ifndef SHA_Transform +#define SHA_Transform _libmd_SHA_Transform +#endif +#ifndef SHA_version +#define SHA_version _libmd_SHA_version +#endif +#ifndef sha_block +#define sha_block _libmd_sha_block +#endif + +#ifndef SHA1_Init +#define SHA1_Init _libmd_SHA1_Init +#endif +#ifndef SHA1_Update +#define SHA1_Update _libmd_SHA1_Update +#endif +#ifndef SHA1_Final +#define SHA1_Final _libmd_SHA1_Final +#endif +#ifndef SHA1_End +#define SHA1_End _libmd_SHA1_End +#endif +#ifndef SHA1_File +#define SHA1_File _libmd_SHA1_File +#endif +#ifndef SHA1_FileChunk +#define SHA1_FileChunk _libmd_SHA1_FileChunk +#endif +#ifndef SHA1_Data +#define SHA1_Data _libmd_SHA1_Data +#endif + +#ifndef SHA1_Transform +#define SHA1_Transform _libmd_SHA1_Transform +#endif +#ifndef SHA1_version +#define SHA1_version _libmd_SHA1_version +#endif +#ifndef sha1_block +#define sha1_block _libmd_sha1_block +#endif + void SHA_Init(SHA_CTX *c); void SHA_Update(SHA_CTX *c, const void *data, size_t len); void SHA_Final(unsigned char *md, SHA_CTX *c); @@ -86,6 +153,7 @@ char *SHA_End(SHA_CTX *, char *); char *SHA_File(const char *, char *); char *SHA_FileChunk(const char *, char *, off_t, off_t); char *SHA_Data(const void *, unsigned int, char *); + void SHA1_Init(SHA_CTX *c); void SHA1_Update(SHA_CTX *c, const void *data, size_t len); void SHA1_Final(unsigned char *md, SHA_CTX *c); diff --git a/lib/libmd/sha1c.c b/lib/libmd/sha1c.c index ba3278a..93a4ff3 100644 --- a/lib/libmd/sha1c.c +++ b/lib/libmd/sha1c.c @@ -488,3 +488,33 @@ SHA_CTX *c; /* memset((char *)&c,0,sizeof(c));*/ } +#ifdef WEAK_REFS +/* When building libmd, provide weak references. Note: this is not + activated in the context of compiling these sources for internal + use in libcrypt. + */ +#undef SHA_Init +__weak_reference(_libmd_SHA_Init, SHA_Init); +#undef SHA_Update +__weak_reference(_libmd_SHA_Update, SHA_Update); +#undef SHA_Final +__weak_reference(_libmd_SHA_Final, SHA_Final); +#undef SHA_Transform +__weak_reference(_libmd_SHA_Transform, SHA_Transform); +#undef SHA_version +__weak_reference(_libmd_SHA_version, SHA_version); +#undef sha_block +__weak_reference(_libmd_sha_block, sha_block); +#undef SHA1_Init +__weak_reference(_libmd_SHA1_Init, SHA1_Init); +#undef SHA1_Update +__weak_reference(_libmd_SHA1_Update, SHA1_Update); +#undef SHA1_Final +__weak_reference(_libmd_SHA1_Final, SHA1_Final); +#undef SHA1_Transform +__weak_reference(_libmd_SHA1_Transform, SHA1_Transform); +#undef SHA1_version +__weak_reference(_libmd_SHA1_version, SHA1_version); +#undef sha1_block +__weak_reference(_libmd_sha1_block, sha1_block); +#endif diff --git a/lib/libmd/sha512.h b/lib/libmd/sha512.h index 8998f4c..b034ca5 100644 --- a/lib/libmd/sha512.h +++ b/lib/libmd/sha512.h @@ -38,6 +38,38 @@ typedef struct SHA512Context { } SHA512_CTX; __BEGIN_DECLS + +/* Ensure libmd symbols do not clash with libcrypto */ + +#ifndef SHA512_Init +#define SHA512_Init _libmd_SHA512_Init +#endif +#ifndef SHA512_Update +#define SHA512_Update _libmd_SHA512_Update +#endif +#ifndef SHA512_Final +#define SHA512_Final _libmd_SHA512_Final +#endif +#ifndef SHA512_End +#define SHA512_End _libmd_SHA512_End +#endif +#ifndef SHA512_File +#define SHA512_File _libmd_SHA512_File +#endif +#ifndef SHA512_FileChunk +#define SHA512_FileChunk _libmd_SHA512_FileChunk +#endif +#ifndef SHA512_Data +#define SHA512_Data _libmd_SHA512_Data +#endif + +#ifndef SHA512_Transform +#define SHA512_Transform _libmd_SHA512_Transform +#endif +#ifndef SHA512_version +#define SHA512_version _libmd_SHA512_version +#endif + void SHA512_Init(SHA512_CTX *); void SHA512_Update(SHA512_CTX *, const void *, size_t); void SHA512_Final(unsigned char [64], SHA512_CTX *); diff --git a/lib/libmd/sha512c.c b/lib/libmd/sha512c.c index c2a93be..e5e52a8 100644 --- a/lib/libmd/sha512c.c +++ b/lib/libmd/sha512c.c @@ -318,3 +318,18 @@ SHA512_Final(unsigned char digest[64], SHA512_CTX * ctx) /* Clear the context state */ memset((void *)ctx, 0, sizeof(*ctx)); } + +#ifdef WEAK_REFS +/* When building libmd, provide weak references. Note: this is not + activated in the context of compiling these sources for internal + use in libcrypt. + */ +#undef SHA512_Init +__weak_reference(_libmd_SHA512_Init, SHA512_Init); +#undef SHA512_Update +__weak_reference(_libmd_SHA512_Update, SHA512_Update); +#undef SHA512_Final +__weak_reference(_libmd_SHA512_Final, SHA512_Final); +#undef SHA512_Transform +__weak_reference(_libmd_SHA512_Transform, SHA512_Transform); +#endif diff --git a/lib/libmd/shadriver.c b/lib/libmd/shadriver.c index adaf684..29a3cf0 100644 --- a/lib/libmd/shadriver.c +++ b/lib/libmd/shadriver.c @@ -31,10 +31,13 @@ __FBSDID("$FreeBSD$"); #endif #if SHA == 1 +#undef SHA_Data #define SHA_Data SHA1_Data #elif SHA == 256 +#undef SHA_Data #define SHA_Data SHA256_Data #elif SHA == 512 +#undef SHA_Data #define SHA_Data SHA512_Data #endif diff --git a/release/conf/pfSense_make.conf b/release/conf/pfSense_make.conf new file mode 100644 index 0000000..02cbb6b --- /dev/null +++ b/release/conf/pfSense_make.conf @@ -0,0 +1,7 @@ +HOSTAPD_CFLAGS+=-DEAP_PAX -DEAP_SAKE -DCONFIG_RSN_PREAUTH -DCONFIG_IEEE80211N +HOSTAPD_CFLAGS+=-DEAP_SERVER -DEAP_GTC -DEAP_AKA -DEAP_SIM -DEAP_GPSK +WPA_SUPPLICANT_CFLAGS+=-DCONFIG_IEEE80211N + +# Default serial console speed +BOOT_COMCONSOLE_SPEED=115200 +BOOT_BOOT0_COMCONSOLE_SPEED=0 diff --git a/release/conf/pfSense_src.conf b/release/conf/pfSense_src.conf new file mode 100644 index 0000000..924c78b --- /dev/null +++ b/release/conf/pfSense_src.conf @@ -0,0 +1,53 @@ +WITHOUT_AMD=YES +WITHOUT_ASSERT_DEBUG=YES +WITHOUT_ATM=YES +WITHOUT_AUDIT=YES +WITHOUT_AUTHPF=YES +WITHOUT_BLUETOOTH=YES +WITHOUT_BSDINSTALL=yes +WITHOUT_CALENDAR=YES +WITHOUT_CLANG=YES +WITHOUT_CTM=YES +WITHOUT_DICT=YES +WITHOUT_EXAMPLES=YES +WITHOUT_FDT=YES +WITHOUT_FREEBSD_UPDATE=YES +WITHOUT_GAMES=YES +WITHOUT_GCC=YES +WITHOUT_GCOV=YES +WITHOUT_GDB=YES +WITHOUT_GPIB=YES +WITHOUT_GROFF=YES +WITHOUT_HTML=YES +WITHOUT_INETD=YES +WITHOUT_INFO=YES +WITHOUT_IPFILTER=YES +WITHOUT_IPX=YES +WITHOUT_JAIL=YES +WITHOUT_LIB32=YES +WITHOUT_LOCALES=YES +WITHOUT_LOCATE=YES +WITHOUT_LPR=YES +WITHOUT_MAIL=YES +WITHOUT_MAILWRAPPER=YES +WITHOUT_MAN=YES +WITHOUT_MAN_UTILS=YES +WITHOUT_NIS=YES +WITHOUT_NLS=YES +WITHOUT_NLS_CATALOGS=YES +WITHOUT_NS_CACHING=YES +WITHOUT_NTP=YES +WITHOUT_PC_SYSINSTALL=yes +WITHOUT_PORTSNAP=yes +WITHOUT_PROFILE=YES +WITHOUT_QUOTAS=YES +WITHOUT_RCMDS=YES +WITHOUT_RCS=YES +WITHOUT_SENDMAIL=YES +WITHOUT_SETUID_LOGIN=YES +WITHOUT_SHAREDOCS=YES +WITHOUT_SVN=YES +WITHOUT_SVNLITE=YES +WITHOUT_SYSCONS=YES +WITHOUT_SYSINSTALL=yes +WITHOUT_UNBOUND=YES diff --git a/release/pkg_repos/pfSense-devel.conf.template b/release/pkg_repos/pfSense-devel.conf.template new file mode 100644 index 0000000..fa5eb1b --- /dev/null +++ b/release/pkg_repos/pfSense-devel.conf.template @@ -0,0 +1,17 @@ +FreeBSD: { enabled: no } + +%%PRODUCT_NAME%%-core: { + url: "%%PKG_REPO_SERVER%%/%%PRODUCT_NAME%%_%%GIT_REPO_BRANCH_OR_TAG%%_%%ARCH%%-core", + mirror_type: "srv", + signature_type: "fingerprints", + fingerprints: "/usr/local/share/%%PRODUCT_NAME%%/keys/pkg" + enabled: yes +} + +%%PRODUCT_NAME%%: { + url: "%%PKG_REPO_SERVER%%/%%PRODUCT_NAME%%_%%GIT_REPO_BRANCH_OR_TAG%%_%%ARCH%%-%%PRODUCT_NAME%%_%%GIT_REPO_BRANCH_OR_TAG%%", + mirror_type: "srv", + signature_type: "fingerprints", + fingerprints: "/usr/local/share/%%PRODUCT_NAME%%/keys/pkg" + enabled: yes +} diff --git a/release/pkg_repos/pfSense.conf.template b/release/pkg_repos/pfSense.conf.template new file mode 100644 index 0000000..2b30aec --- /dev/null +++ b/release/pkg_repos/pfSense.conf.template @@ -0,0 +1,15 @@ +FreeBSD: { enabled: no } + +%%PRODUCT_NAME%%-core: { + url: "%%PKG_REPO_SERVER%%/%%PRODUCT_NAME%%_%%GIT_REPO_BRANCH_OR_TAG%%_%%ARCH%%-core", + mirror_type: "srv", + signature_type: "none", + enabled: yes +} + +%%PRODUCT_NAME%%: { + url: "%%PKG_REPO_SERVER%%/%%PRODUCT_NAME%%_%%GIT_REPO_BRANCH_OR_TAG%%_%%ARCH%%-%%PRODUCT_NAME%%_%%GIT_REPO_BRANCH_OR_TAG%%", + mirror_type: "srv", + signature_type: "none", + enabled: yes +} diff --git a/sbin/ifconfig/Makefile b/sbin/ifconfig/Makefile index 84509d7..0718081 100644 --- a/sbin/ifconfig/Makefile +++ b/sbin/ifconfig/Makefile @@ -34,6 +34,7 @@ SRCS+= ifvlan.c # SIOC[GS]ETVLAN support SRCS+= ifvxlan.c # VXLAN support SRCS+= ifgre.c # GRE keys etc SRCS+= ifgif.c # GIF reversed header workaround +SRCS+= ifstf.c # STF configuration options SRCS+= sfp.c # SFP/SFP+ information DPADD+= ${LIBM} diff --git a/sbin/ifconfig/ifconfig.c b/sbin/ifconfig/ifconfig.c index 19bd1ac..1b89e67 100644 --- a/sbin/ifconfig/ifconfig.c +++ b/sbin/ifconfig/ifconfig.c @@ -634,6 +634,23 @@ top: return(0); } +static void +setaddrfirst(const char *addr, int dummy __unused, int s, + const struct afswtch *afp) +{ + + if (afp == NULL) + err(2, "No address family"); + if (afp->af_getaddr == NULL) + err(2, "No appropriate functions from address family"); + afp->af_getaddr(addr, ADDR); + + strncpy(afp->af_addreq, name, sizeof ifr.ifr_name); + printf("Interface name: %s, socket %d, addr %s\n", name, s, addr); + if (ioctl(s, SIOCORDERIFADDR, afp->af_addreq) < 0) + err(1, "SIOCORDERIFADDR"); +} + /*ARGSUSED*/ static void setifaddr(const char *addr, int param, int s, const struct afswtch *afp) @@ -1226,6 +1243,7 @@ static struct cmd basic_cmds[] = { DEF_CMD("noicmp", IFF_LINK1, setifflags), DEF_CMD_ARG("mtu", setifmtu), DEF_CMD_ARG("name", setifname), + DEF_CMD_ARG("setfirst", setaddrfirst), }; static __constructor void diff --git a/sbin/ifconfig/ifpfsync.c b/sbin/ifconfig/ifpfsync.c index 9dbe1d6..4094610 100644 --- a/sbin/ifconfig/ifpfsync.c +++ b/sbin/ifconfig/ifpfsync.c @@ -203,7 +203,8 @@ pfsync_status(int s) if (preq.pfsyncr_syncdev[0] != '\0' || preq.pfsyncr_syncpeer.s_addr != INADDR_PFSYNC_GROUP) { printf("maxupd: %d ", preq.pfsyncr_maxupdates); - printf("defer: %s\n", preq.pfsyncr_defer ? "on" : "off"); + printf("defer: %s\n", (preq.pfsyncr_defer & PFSYNCF_DEFER) ? "on" : "off"); + printf("\tsyncok: %d\n", (preq.pfsyncr_defer & PFSYNCF_OK) ? 1 : 0); } } diff --git a/sbin/ifconfig/ifstf.c b/sbin/ifconfig/ifstf.c new file mode 100644 index 0000000..7d58909 --- /dev/null +++ b/sbin/ifconfig/ifstf.c @@ -0,0 +1,157 @@ +/*- + * Copyright 2013 Ermal Luci + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/param.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <sys/sockio.h> + +#include <stdlib.h> +#include <unistd.h> + +#include <net/ethernet.h> +#include <net/if.h> +#include <net/route.h> + +#include <netinet/in.h> +#include <sys/mbuf.h> +#include <net/if_stf.h> +#include <arpa/inet.h> + +#include <ctype.h> +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <unistd.h> +#include <err.h> +#include <errno.h> + +#include "ifconfig.h" + +static int +do_cmd(int sock, u_long op, void *arg, size_t argsize, int set) +{ + struct ifdrv ifd; + + memset(&ifd, 0, sizeof(ifd)); + + strlcpy(ifd.ifd_name, ifr.ifr_name, sizeof(ifd.ifd_name)); + ifd.ifd_cmd = op; + ifd.ifd_len = argsize; + ifd.ifd_data = arg; + + return (ioctl(sock, set ? SIOCSDRVSPEC : SIOCGDRVSPEC, &ifd)); +} + +static void +stf_status(int s) +{ + struct stfv4args param; + + if (do_cmd(s, STF_GV4NET, ¶m, sizeof(param), 0) < 0) + return; + + printf("\tv4net %s/%d -> ", inet_ntoa(param.inaddr), param.prefix ? param.prefix : 32); + printf("tv4br %s\n", inet_ntoa(param.dstv4_addr)); + + return; +} + +static void +setstf_br(const char *val, int d, int s, const struct afswtch *afp) +{ + struct stfv4args req; + struct sockaddr_in sin; + + memset(&req, 0, sizeof(req)); + + sin.sin_len = sizeof(sin); + sin.sin_family = AF_INET; + + if (!inet_aton(val, &sin.sin_addr)) + errx(1, "%s: bad value", val); + + req.dstv4_addr = sin.sin_addr; + if (do_cmd(s, STF_SDSTV4, &req, sizeof(req), 1) < 0) + err(1, "STF_SV4DST %s", val); +} + +static void +setstf_set(const char *val, int d, int s, const struct afswtch *afp) +{ + struct stfv4args req; + struct sockaddr_in sin; + const char *errstr; + char *p = NULL; + + memset(&req, 0, sizeof(req)); + + sin.sin_len = sizeof(sin); + sin.sin_family = AF_INET; + + p = strrchr(val, '/'); + if (p == NULL) + errx(2, "Wrong argument given"); + + *p = '\0'; + if (!isdigit(*(p + 1))) + errstr = "invalid"; + else + req.prefix = (int)strtonum(p + 1, 0, 32, &errstr); + if (errstr != NULL) { + *p = '/'; + errx(1, "%s: bad value (width %s)", val, errstr); + } + + if (!inet_aton(val, &sin.sin_addr)) + errx(1, "%s: bad value", val); + + req.inaddr = sin.sin_addr; + if (do_cmd(s, STF_SV4NET, &req, sizeof(req), 1) < 0) + err(1, "STF_SV4NET %s", val); +} + +static struct cmd stf_cmds[] = { + DEF_CMD_ARG("stfv4net", setstf_set), + DEF_CMD_ARG("stfv4br", setstf_br), +}; +static struct afswtch af_stf = { + .af_name = "af_stf", + .af_af = AF_UNSPEC, + .af_other_status = stf_status, +}; + +static __constructor void +stf_ctor(void) +{ +#define N(a) (sizeof(a) / sizeof(a[0])) + int i; + + for (i = 0; i < N(stf_cmds); i++) + cmd_register(&stf_cmds[i]); + af_register(&af_stf); +#undef N +} diff --git a/sbin/ifconfig/ifvlan.c b/sbin/ifconfig/ifvlan.c index 944ae17..714e4e0 100644 --- a/sbin/ifconfig/ifvlan.c +++ b/sbin/ifconfig/ifvlan.c @@ -1,6 +1,10 @@ /* - * Copyright (c) 1999 - * Bill Paul <wpaul@ctr.columbia.edu>. All rights reserved. + * Copyright (c) 1999 Bill Paul <wpaul@ctr.columbia.edu> + * Copyright (c) 2012 ADARA Networks, Inc. + * All rights reserved. + * + * Portions of this software were developed by Robert N. M. Watson under + * contract to ADARA Networks, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -79,10 +83,14 @@ vlan_status(int s) { struct vlanreq vreq; - if (getvlan(s, &ifr, &vreq) != -1) - printf("\tvlan: %d parent interface: %s\n", - vreq.vlr_tag, vreq.vlr_parent[0] == '\0' ? - "<none>" : vreq.vlr_parent); + if (getvlan(s, &ifr, &vreq) == -1) + return; + printf("\tvlan: %d", vreq.vlr_tag); + if (ioctl(s, SIOCGVLANPCP, (caddr_t)&ifr) != -1) + printf(" vlanpcp: %u", ifr.ifr_vlan_pcp); + printf(" parent interface: %s", vreq.vlr_parent[0] == '\0' ? + "<none>" : vreq.vlr_parent); + printf("\n"); } static void @@ -150,6 +158,22 @@ DECL_CMD_FUNC(setvlandev, val, d) } static +DECL_CMD_FUNC(setvlanpcp, val, d) +{ + u_long ul; + char *endp; + + ul = strtoul(val, &endp, 0); + if (*endp != '\0') + errx(1, "invalid value for vlanpcp"); + if (ul > 7) + errx(1, "value for vlanpcp out of range"); + ifr.ifr_vlan_pcp = ul; + if (ioctl(s, SIOCSVLANPCP, (caddr_t)&ifr) == -1) + err(1, "SIOCSVLANPCP"); +} + +static DECL_CMD_FUNC(unsetvlandev, val, d) { struct vlanreq vreq; @@ -170,6 +194,7 @@ DECL_CMD_FUNC(unsetvlandev, val, d) static struct cmd vlan_cmds[] = { DEF_CLONE_CMD_ARG("vlan", setvlantag), DEF_CLONE_CMD_ARG("vlandev", setvlandev), + DEF_CMD_ARG("vlanpcp", setvlanpcp), /* NB: non-clone cmds */ DEF_CMD_ARG("vlan", setvlantag), DEF_CMD_ARG("vlandev", setvlandev), diff --git a/sbin/ipfw/Makefile b/sbin/ipfw/Makefile index 6aea26b..e137e08 100644 --- a/sbin/ipfw/Makefile +++ b/sbin/ipfw/Makefile @@ -3,7 +3,7 @@ .include <bsd.own.mk> PROG= ipfw -SRCS= ipfw2.c dummynet.c ipv6.c main.c nat.c +SRCS= ipfw2.c dummynet.c ipv6.c main.c nat.c context.c WARNS?= 2 .if ${MK_PF} != "no" diff --git a/sbin/ipfw/context.c b/sbin/ipfw/context.c new file mode 100644 index 0000000..bce576e --- /dev/null +++ b/sbin/ipfw/context.c @@ -0,0 +1,148 @@ +/* + * Copyright (c) 2013 Ermal Lu‡i + * + * Redistribution and use in source forms, with and without modification, + * are permitted provided that this entire comment appears intact. + * + * Redistribution in binary form may occur without any restrictions. + * Obviously, it would be nice if you gave credit where credit is due + * but requiring it would be too onerous. + * + * This software is provided ``AS IS'' without any warranties of any kind. + * + * $FreeBSD$ + */ + +#include <sys/types.h> +#include <sys/socket.h> + +#include <net/if.h> +#include <net/if_var.h> + +#include <netinet/in.h> +#include <netinet/ip_fw.h> + +#include "ipfw2.h" + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <sysexits.h> +#include <string.h> +#include <errno.h> +#include <err.h> + +extern int ipfw_socket; + +int +ipfw_context_handler(int ac, char **av) +{ + ip_fw3_opheader *op3; + int error = 0; + uint32_t action = 0; + socklen_t len, nlen; + char *ifname; + + av++; + ac--; + NEED1("bad arguments, for usage summary ``ipfw''"); + + if (!strncmp(*av, "list", strlen(*av))) { + action = IP_FW_CTX_GET; + av++; + ac--; + if (ac > 0) + errx(EX_DATAERR, "list: does not take any extra arguments."); + + } else { + co.ctx = atoi(*av); + + av++; + ac--; + NEED1("bad arguments, for usage summary ``ipfw''"); + + if (!strncmp(*av, "create", strlen(*av))) + action = IP_FW_CTX_ADD; + else if (!strncmp(*av, "destroy", strlen(*av))) + action = IP_FW_CTX_DEL; + else { + if (!strncmp(*av, "madd", strlen(*av))) + action = IP_FW_CTX_ADDMEMBER; + else if (!strncmp(*av, "mdel", strlen(*av))) + action = IP_FW_CTX_DELMEMBER; + else + errx(EX_DATAERR, "Wrong parameters passed"); + + av++; + ac--; + NEED1("bad arguments, for usage summary ``ipfw''"); + + ifname = *av; + } + + ac--; + if (ac > 0) + errx(EX_DATAERR, "context handling: Too many arguments passed"); + + } + + if (co.test_only) + return (0); + + if (ipfw_socket < 0) + ipfw_socket = socket(AF_INET, SOCK_RAW, IPPROTO_RAW); + if (ipfw_socket < 0) + err(EX_UNAVAILABLE, "socket"); + + switch (action) { + case IP_FW_CTX_ADD: + case IP_FW_CTX_DEL: + case IP_FW_CTX_SET: + len = sizeof(ip_fw3_opheader); + op3 = alloca(len); + /* Zero reserved fields */ + memset(op3, 0, sizeof(ip_fw3_opheader)); + op3->opcode = action; + op3->ctxid = co.ctx; + error = setsockopt(ipfw_socket, IPPROTO_IP, IP_FW3, op3, len); + break; + case IP_FW_CTX_ADDMEMBER: + case IP_FW_CTX_DELMEMBER: + len = sizeof(ip_fw3_opheader) + strlen(ifname) + 1; + op3 = alloca(len); + /* Zero reserved fields */ + memset(op3, 0, sizeof(ip_fw3_opheader) + strlen(ifname) + 1); + memcpy((op3 + 1), ifname, strlen(ifname)); + op3->opcode = action; + op3->ctxid = co.ctx; + error = setsockopt(ipfw_socket, IPPROTO_IP, IP_FW3, op3, len); + break; + case IP_FW_CTX_GET: + len = sizeof(ip_fw3_opheader) + 1000; + nlen = len; + do { + if (nlen > len) { + len = nlen; + } + op3 = alloca(len); + /* Zero reserved fields */ + memset(op3, 0, len); + op3->opcode = action; + op3->ctxid = co.ctx; + nlen = len; + error = getsockopt(ipfw_socket, IPPROTO_IP, IP_FW3, op3, &nlen); + } while (nlen > len && !error); + + if (!error) { + if (nlen == 0) + printf("There are no contexts defined\n"); + else + printf("Currently defined contexts and their members:\n%s\n", (char *)op3); + } else + err(EX_UNAVAILABLE, "Error returned: %s\n", strerror(error)); + + break; + } + + return (error); +} diff --git a/sbin/ipfw/dummynet.c b/sbin/ipfw/dummynet.c index 28dc2c7..c5b5253 100644 --- a/sbin/ipfw/dummynet.c +++ b/sbin/ipfw/dummynet.c @@ -538,10 +538,10 @@ read_bandwidth(char *arg, int *bandwidth, char *if_name, int namelen) if_name[namelen] = '\0'; *bandwidth = 0; } else { /* read bandwidth value */ - int bw; + double bw; char *end = NULL; - bw = strtoul(arg, &end, 0); + bw = strtod(arg, &end); if (*end == 'K' || *end == 'k') { end++; bw *= 1000; @@ -557,7 +557,7 @@ read_bandwidth(char *arg, int *bandwidth, char *if_name, int namelen) if (bw < 0) errx(EX_DATAERR, "bandwidth too large"); - *bandwidth = bw; + *bandwidth = (int)bw; if (if_name) if_name[0] = '\0'; } diff --git a/sbin/ipfw/ipfw2.c b/sbin/ipfw/ipfw2.c index ff8395d..11ccb9f 100644 --- a/sbin/ipfw/ipfw2.c +++ b/sbin/ipfw/ipfw2.c @@ -54,7 +54,7 @@ #include <netinet/tcp.h> #include <arpa/inet.h> -struct cmdline_opts co; /* global options */ +struct cmdline_opts co = { 0 }; /* global options */ int resvd_set_number = RESVD_SET; @@ -422,6 +422,7 @@ safe_realloc(void *ptr, size_t size) int do_cmd(int optname, void *optval, uintptr_t optlen) { + ip_fw3_opheader op3; int i; if (co.test_only) @@ -432,6 +433,15 @@ do_cmd(int optname, void *optval, uintptr_t optlen) if (ipfw_socket < 0) err(EX_UNAVAILABLE, "socket"); + if (optname != IP_FW3 && optname != IP_DUMMYNET3 && optname != -IP_DUMMYNET3) { + memset(&op3, 0, sizeof op3); + op3.ctxid = co.ctx; + op3.opcode = IP_FW_CTX_SET; + i = setsockopt(ipfw_socket, IPPROTO_IP, IP_FW3, &op3, sizeof(op3)); + if (i) + errx(EX_OSERR, "setsockopt: choosing context"); + } + if (optname == IP_FW_GET || optname == IP_DUMMYNET_GET || optname == IP_FW_ADD || optname == IP_FW3 || optname == IP_FW_NAT_GET_CONFIG || @@ -477,6 +487,7 @@ do_setcmd3(int optname, void *optval, socklen_t optlen) memset(op3, 0, sizeof(ip_fw3_opheader)); memcpy(op3 + 1, optval, optlen); op3->opcode = optname; + op3->ctxid = co.ctx; return setsockopt(ipfw_socket, IPPROTO_IP, IP_FW3, op3, len); } @@ -4122,8 +4133,9 @@ ipfw_flush(int force) } +static void table_list_entry(ipfw_table_xentry *); static void table_list(uint16_t num, int need_header); -static void table_fill_xentry(char *arg, ipfw_table_xentry *xent); +static void table_fill_xentry(int ac, char *av[], ipfw_table_xentry *xent); /* * Retrieve maximum number of tables supported by ipfw(4) module. @@ -4194,29 +4206,9 @@ ipfw_table_handler(int ac, char *av[]) if (_substrcmp(*av, "add") == 0 || _substrcmp(*av, "delete") == 0) { do_add = **av == 'a'; - ac--; av++; - if (!ac) - errx(EX_USAGE, "address required"); - table_fill_xentry(*av, &xent); + table_fill_xentry(ac, av, &xent); - ac--; av++; - if (do_add && ac) { - unsigned int tval; - /* isdigit is a bit of a hack here.. */ - if (strchr(*av, (int)'.') == NULL && isdigit(**av)) { - xent.value = strtoul(*av, NULL, 0); - } else { - if (lookup_host(*av, (struct in_addr *)&tval) == 0) { - /* The value must be stored in host order * - * so that the values < 65k can be distinguished */ - xent.value = ntohl(tval); - } else { - errx(EX_NOHOST, "hostname ``%s'' unknown", *av); - } - } - } else - xent.value = 0; if (do_setcmd3(do_add ? IP_FW_TABLE_XADD : IP_FW_TABLE_XDEL, &xent, xent.len) < 0) { /* If running silent, don't bomb out on these errors. */ @@ -4243,19 +4235,41 @@ ipfw_table_handler(int ac, char *av[]) do { table_list(xent.tbl, is_all); } while (++xent.tbl < a); + } else if (_substrcmp(*av, "entrystats") == 0) { + table_fill_xentry(ac, av, &xent); + + if (do_setcmd3(IP_FW_TABLE_XLISTENTRY, &xent, xent.len) < 0) { + /* If running silent, don't bomb out on these errors. */ + if (!(co.do_quiet)) + err(EX_OSERR, "setsockopt(IP_FW_TABLE_XLISTENTRY)"); + } else + table_list_entry(&xent); + } else if (_substrcmp(*av, "entryzerostats") == 0) { + table_fill_xentry(ac, av, &xent); + + if (do_setcmd3(IP_FW_TABLE_XZEROENTRY, &xent, xent.len) < 0) { + /* If running silent, don't bomb out on these errors. */ + if (!(co.do_quiet)) + err(EX_OSERR, "setsockopt(IP_FW_TABLE_XZEROENTRY)"); + } } else errx(EX_USAGE, "invalid table command %s", *av); } static void -table_fill_xentry(char *arg, ipfw_table_xentry *xent) +table_fill_xentry(int ac, char *av[], ipfw_table_xentry *xent) { - int addrlen, mask, masklen, type; + int addrlen, mask, masklen, type, do_add; struct in6_addr *paddr; uint32_t *pkey; - char *p; + char *p, *arg; uint32_t key; + do_add = **av == 'a'; + ac--; av++; + if (!ac) + errx(EX_USAGE, "address required"); + mask = 0; type = 0; addrlen = 0; @@ -4270,7 +4284,20 @@ table_fill_xentry(char *arg, ipfw_table_xentry *xent) * 4) port, uid/gid or other u32 key (base 10 format) * 5) hostname */ - paddr = &xent->k.addr6; + if (ac > 1 && av && _substrcmp(*av, "mac") == 0) { + uint8_t _mask[8]; + + type = IPFW_TABLE_MIX; + get_mac_addr_mask(av[1], (uint8_t *)xent->mac_addr, _mask); + ac-=2; av+=2; + if (ac <= 0) + errx(EX_DATAERR, "wrong argument passed."); + + paddr = (struct in6_addr *)&xent->k.addr6; + } else + paddr = &xent->k.addr6; + + arg = *av; if (ishexnumber(*arg) != 0 || *arg == ':') { /* Remove / if exists */ if ((p = strchr(arg, '/')) != NULL) { @@ -4283,9 +4310,11 @@ table_fill_xentry(char *arg, ipfw_table_xentry *xent) errx(EX_DATAERR, "bad IPv4 mask width: %s", p + 1); - type = IPFW_TABLE_CIDR; + if (type == 0) + type = IPFW_TABLE_CIDR; masklen = p ? mask : 32; addrlen = sizeof(struct in_addr); + xent->flags = IPFW_TCF_INET; } else if (inet_pton(AF_INET6, arg, paddr) == 1) { if (IN6_IS_ADDR_V4COMPAT(paddr)) errx(EX_DATAERR, @@ -4294,10 +4323,14 @@ table_fill_xentry(char *arg, ipfw_table_xentry *xent) errx(EX_DATAERR, "bad IPv6 mask width: %s", p + 1); - type = IPFW_TABLE_CIDR; + if (type == 0) + type = IPFW_TABLE_CIDR; masklen = p ? mask : 128; addrlen = sizeof(struct in6_addr); } else { + if (type != 0 && type != IPFW_TABLE_MIX) + errx(EX_DATAERR, "Wrong value passed as address"); + /* Port or any other key */ /* Skip non-base 10 entries like 'fa1' */ key = strtol(arg, &p, 10); @@ -4338,11 +4371,88 @@ table_fill_xentry(char *arg, ipfw_table_xentry *xent) masklen = 32; type = IPFW_TABLE_CIDR; addrlen = sizeof(struct in_addr); + xent->flags = IPFW_TCF_INET; + } + + ac--; av++; + if (ac > 1 && av) { + if (_substrcmp(*av, "mac") == 0) { + uint8_t _mask[8]; + + if (type == 0) + type = IPFW_TABLE_CIDR; + get_mac_addr_mask(av[1], (uint8_t *)&xent->mac_addr, _mask); + ac-=2; av+=2; + } } + if (do_add && ac > 0) { + unsigned int tval; + /* isdigit is a bit of a hack here.. */ + if (strchr(*av, (int)'.') == NULL && isdigit(**av)) { + xent->value = strtoul(*av, NULL, 0); + } else { + if (lookup_host(*av, (struct in_addr *)&tval) == 0) { + /* The value must be stored in host order * + * so that the values < 65k can be distinguished */ + xent->value = ntohl(tval); + } else { + errx(EX_NOHOST, "hostname ``%s'' unknown", *av); + } + } + } else + xent->value = 0; + xent->type = type; xent->masklen = masklen; - xent->len = offsetof(ipfw_table_xentry, k) + addrlen; + if (type == IPFW_TABLE_MIX) + xent->len = offsetof(ipfw_table_xentry, k) + addrlen + ETHER_ADDR_LEN; + else + xent->len = offsetof(ipfw_table_xentry, k) + addrlen; +} + +static void +table_list_entry(ipfw_table_xentry *xent) +{ + struct in6_addr *addr6; + uint32_t tval; + char tbuf[128]; + + switch (xent->type) { + case IPFW_TABLE_CIDR: + /* IPv4 or IPv6 prefixes */ + tval = xent->value; + addr6 = &xent->k.addr6; + + if ((xent->flags & IPFW_TCF_INET) != 0) { + /* IPv4 address */ + inet_ntop(AF_INET, (in_addr_t *)&addr6->s6_addr32[0], tbuf, sizeof(tbuf)); + } else { + /* IPv6 address */ + inet_ntop(AF_INET6, addr6, tbuf, sizeof(tbuf)); + } + + if (co.do_value_as_ip) { + tval = htonl(tval); + printf("%s/%u %s %d %d %u\n", tbuf, xent->masklen, + inet_ntoa(*(struct in_addr *)&tval), pr_u64(&xent->packets, 0), pr_u64(&xent->bytes, 0), xent->timestamp); + } else + printf("%s/%u %u %d %d %u\n", tbuf, xent->masklen, tval, + pr_u64(&xent->packets, 0), pr_u64(&xent->bytes, 0), xent->timestamp); + break; + case IPFW_TABLE_INTERFACE: + /* Interface names */ + tval = xent->value; + if (co.do_value_as_ip) { + tval = htonl(tval); + printf("%s %u %s %d %d %u\n", xent->k.iface, xent->masklen, + inet_ntoa(*(struct in_addr *)&tval), pr_u64(&xent->packets, 0), pr_u64(&xent->bytes, 0), xent->timestamp); + } else + printf("%s %u %u %d %d %u\n", xent->k.iface, xent->masklen, tval, + pr_u64(&xent->packets, 0), pr_u64(&xent->bytes, 0), xent->timestamp); + + break; + } } static void @@ -4364,6 +4474,7 @@ table_list(uint16_t num, int need_header) a = (uint32_t *)(op3 + 1); *a = num; op3->opcode = IP_FW_TABLE_XGETSIZE; + op3->ctxid = co.ctx; if (do_cmd(IP_FW3, op3, (uintptr_t)&l) < 0) err(EX_OSERR, "getsockopt(IP_FW_TABLE_XGETSIZE)"); @@ -4374,6 +4485,7 @@ table_list(uint16_t num, int need_header) l = *a; tbl = safe_calloc(1, l); tbl->opheader.opcode = IP_FW_TABLE_XLIST; + tbl->opheader.ctxid = co.ctx; tbl->tbl = num; if (do_cmd(IP_FW3, tbl, (uintptr_t)&l) < 0) err(EX_OSERR, "getsockopt(IP_FW_TABLE_XLIST)"); @@ -4388,21 +4500,24 @@ table_list(uint16_t num, int need_header) tval = xent->value; addr6 = &xent->k.addr6; - if ((xent->flags & IPFW_TCF_INET) != 0) { /* IPv4 address */ - inet_ntop(AF_INET, &addr6->s6_addr32[3], tbuf, sizeof(tbuf)); + inet_ntop(AF_INET, &addr6->s6_addr32[0], tbuf, sizeof(tbuf)); } else { /* IPv6 address */ inet_ntop(AF_INET6, addr6, tbuf, sizeof(tbuf)); } + printf("%s/%u", tbuf, xent->masklen); + if (xent->mac_addr) + printf(" mac %s", ether_ntoa((struct ether_addr *)&xent->mac_addr)); + if (co.do_value_as_ip) { tval = htonl(tval); - printf("%s/%u %s\n", tbuf, xent->masklen, + printf(" %s\n", inet_ntoa(*(struct in_addr *)&tval)); } else - printf("%s/%u %u\n", tbuf, xent->masklen, tval); + printf(" %u\n", tval); break; case IPFW_TABLE_INTERFACE: /* Interface names */ @@ -4413,6 +4528,8 @@ table_list(uint16_t num, int need_header) inet_ntoa(*(struct in_addr *)&tval)); } else printf("%s %u\n", xent->k.iface, tval); + + break; } if (sz < xent->len) diff --git a/sbin/ipfw/ipfw2.h b/sbin/ipfw/ipfw2.h index 6e895b8..3ef9b19 100644 --- a/sbin/ipfw/ipfw2.h +++ b/sbin/ipfw/ipfw2.h @@ -54,6 +54,7 @@ struct cmdline_opts { int use_set; /* work with specified set number */ /* 0 means all sets, otherwise apply to set use_set - 1 */ + u_int ctx; }; extern struct cmdline_opts co; @@ -282,6 +283,9 @@ void dummynet_list(int ac, char *av[], int show_counters); void dummynet_flush(void); int ipfw_delete_pipe(int pipe_or_queue, int n); +/* Contextes */ +int ipfw_context_handler(int, char **); + /* ipv6.c */ void print_unreach6_code(uint16_t code); void print_ip6(struct _ipfw_insn_ip6 *cmd, char const *s); diff --git a/sbin/ipfw/main.c b/sbin/ipfw/main.c index 82a299b..ab75a1b 100644 --- a/sbin/ipfw/main.c +++ b/sbin/ipfw/main.c @@ -262,7 +262,7 @@ ipfw_main(int oldac, char **oldav) save_av = av; optind = optreset = 1; /* restart getopt() */ - while ((ch = getopt(ac, av, "abcdefhinNp:qs:STtv")) != -1) + while ((ch = getopt(ac, av, "abcdefhinNp:qs:STtvx:")) != -1) switch (ch) { case 'a': do_acct = 1; @@ -335,6 +335,12 @@ ipfw_main(int oldac, char **oldav) co.verbose = 1; break; + case 'x': + co.ctx = atoi(optarg); + if (co.ctx == 0) + errx(EX_USAGE, "Context 0 is invalid"); + break; + default: free(save_av); return 1; @@ -362,7 +368,9 @@ ipfw_main(int oldac, char **oldav) co.do_nat = 0; co.do_pipe = 0; co.use_set = 0; - if (!strncmp(*av, "nat", strlen(*av))) + if (!strncmp(*av, "zone", strlen(*av))) + return (ipfw_context_handler(ac, av)); + else if (!strncmp(*av, "nat", strlen(*av))) co.do_nat = 1; else if (!strncmp(*av, "pipe", strlen(*av))) co.do_pipe = 1; @@ -389,6 +397,9 @@ ipfw_main(int oldac, char **oldav) } NEED1("missing command"); + if (!co.ctx && !co.do_pipe) + err(11, "Context is mandatory"); + /* * For pipes, queues and nats we normally say 'nat|pipe NN config' * but the code is easier to parse as 'nat|pipe config NN' @@ -458,7 +469,7 @@ ipfw_readfile(int ac, char *av[]) FILE *f = NULL; pid_t preproc = 0; - while ((c = getopt(ac, av, "cfNnp:qS")) != -1) { + while ((c = getopt(ac, av, "cfNnp:qSx:")) != -1) { switch(c) { case 'c': co.do_compact = 1; @@ -509,6 +520,12 @@ ipfw_readfile(int ac, char *av[]) co.show_sets = 1; break; + case 'x': + co.ctx = atoi(optarg); + if (co.ctx == 0) + errx(EX_USAGE, "Context 0 is invalid"); + break; + default: errx(EX_USAGE, "bad arguments, for usage" " summary ``ipfw''"); diff --git a/sbin/pfctl/parse.y b/sbin/pfctl/parse.y index 99c26c0..b5577e2 100644 --- a/sbin/pfctl/parse.y +++ b/sbin/pfctl/parse.y @@ -37,6 +37,8 @@ __FBSDID("$FreeBSD$"); #include <sys/sysctl.h> #endif #include <net/if.h> +#include <net/ethernet.h> +#include <net/if_vlan_var.h> #include <netinet/in.h> #include <netinet/in_systm.h> #include <netinet/ip.h> @@ -46,8 +48,10 @@ __FBSDID("$FreeBSD$"); #include <arpa/inet.h> #include <altq/altq.h> #include <altq/altq_cbq.h> +#include <altq/altq_codel.h> #include <altq/altq_priq.h> #include <altq/altq_hfsc.h> +#include <altq/altq_fairq.h> #include <stdio.h> #include <unistd.h> @@ -158,6 +162,7 @@ struct node_icmp { enum { PF_STATE_OPT_MAX, PF_STATE_OPT_NOSYNC, PF_STATE_OPT_SRCTRACK, PF_STATE_OPT_MAX_SRC_STATES, PF_STATE_OPT_MAX_SRC_CONN, PF_STATE_OPT_MAX_SRC_CONN_RATE, PF_STATE_OPT_MAX_SRC_NODES, + PF_STATE_OPT_MAX_PACKETS, PF_STATE_OPT_OVERLOAD, PF_STATE_OPT_STATELOCK, PF_STATE_OPT_TIMEOUT, PF_STATE_OPT_SLOPPY, }; @@ -169,6 +174,7 @@ struct node_state_opt { u_int32_t max_states; u_int32_t max_src_states; u_int32_t max_src_conn; + u_int32_t max_packets; struct { u_int32_t limit; u_int32_t seconds; @@ -215,6 +221,7 @@ struct filter_opts { #define FOM_TOS 0x04 #define FOM_KEEP 0x08 #define FOM_SRCTRACK 0x10 +#define FOM_DSCP 0x20 struct node_uid *uid; struct node_gid *gid; struct { @@ -225,7 +232,9 @@ struct filter_opts { } flags; struct node_icmp *icmpspec; u_int32_t tos; + u_int32_t dscp; u_int32_t prob; + u_int32_t tracker; struct { int action; struct node_state_opt *options; @@ -233,10 +242,19 @@ struct filter_opts { int fragment; int allowopts; char *label; + char *schedule; struct node_qassign queues; char *tag; char *match_tag; u_int8_t match_tag_not; + struct { + uint8_t pcp[2]; + uint8_t op; + uint8_t setpcp; + } ieee8021q_pcp; + u_int32_t dnpipe; + u_int32_t pdnpipe; + u_int32_t free_flags; u_int rtableid; struct { struct node_host *addr; @@ -246,6 +264,7 @@ struct filter_opts { struct antispoof_opts { char *label; + u_int32_t tracker; u_int rtableid; } antispoof_opts; @@ -298,8 +317,9 @@ struct pool_opts { } pool_opts; - +struct codel_opts codel_opts; struct node_hfsc_opts hfsc_opts; +struct node_fairq_opts fairq_opts; struct node_state_opt *keep_state_defaults = NULL; int disallow_table(struct node_host *, const char *); @@ -337,6 +357,7 @@ int expand_skip_interface(struct node_if *); int check_rulestate(int); int getservice(char *); int rule_label(struct pf_rule *, char *); +int rule_schedule(struct pf_rule *, char *); int rt_tableid_max(void); void mv_rules(struct pf_ruleset *, struct pf_ruleset *); @@ -422,6 +443,8 @@ typedef struct { struct table_opts table_opts; struct pool_opts pool_opts; struct node_hfsc_opts hfsc_opts; + struct node_fairq_opts fairq_opts; + struct codel_opts codel_opts; } v; int lineno; } YYSTYPE; @@ -436,29 +459,31 @@ int parseport(char *, struct range *r, int); %} -%token PASS BLOCK SCRUB RETURN IN OS OUT LOG QUICK ON FROM TO FLAGS +%token PASS BLOCK MATCH SCRUB RETURN IN OS OUT LOG QUICK ON FROM TO FLAGS %token RETURNRST RETURNICMP RETURNICMP6 PROTO INET INET6 ALL ANY ICMPTYPE %token ICMP6TYPE CODE KEEP MODULATE STATE PORT RDR NAT BINAT ARROW NODF -%token MINTTL ERROR ALLOWOPTS FASTROUTE FILENAME ROUTETO DUPTO REPLYTO NO LABEL -%token NOROUTE URPFFAILED FRAGMENT USER GROUP MAXMSS MAXIMUM TTL TOS DROP TABLE +%token MINTTL ERROR ALLOWOPTS FASTROUTE FILENAME ROUTETO DUPTO REPLYTO NO LABEL SCHEDULE +%token NOROUTE URPFFAILED FRAGMENT USER GROUP MAXMSS MAXIMUM TTL TOS DSCP DROP TABLE TRACKER %token REASSEMBLE FRAGDROP FRAGCROP ANCHOR NATANCHOR RDRANCHOR BINATANCHOR %token SET OPTIMIZATION TIMEOUT LIMIT LOGINTERFACE BLOCKPOLICY RANDOMID %token REQUIREORDER SYNPROXY FINGERPRINTS NOSYNC DEBUG SKIP HOSTID %token ANTISPOOF FOR INCLUDE %token BITMASK RANDOM SOURCEHASH ROUNDROBIN STATICPORT PROBABILITY -%token ALTQ CBQ PRIQ HFSC BANDWIDTH TBRSIZE LINKSHARE REALTIME UPPERLIMIT -%token QUEUE PRIORITY QLIMIT RTABLE +%token ALTQ CBQ CODEL PRIQ HFSC FAIRQ BANDWIDTH TBRSIZE LINKSHARE REALTIME +%token UPPERLIMIT QUEUE PRIORITY QLIMIT HOGS BUCKETS RTABLE TARGET INTERVAL +%token DNPIPE DNQUEUE %token LOAD RULESET_OPTIMIZATION %token STICKYADDRESS MAXSRCSTATES MAXSRCNODES SOURCETRACK GLOBAL RULE %token MAXSRCCONN MAXSRCCONNRATE OVERLOAD FLUSH SLOPPY -%token TAGGED TAG IFBOUND FLOATING STATEPOLICY STATEDEFAULTS ROUTE SETTOS +%token TAGGED TAG IFBOUND FLOATING STATEPOLICY STATEDEFAULTS ROUTE SETTOS MAXPCKT +%token IEEE8021QPCP IEEE8021QSETPCP %token DIVERTTO DIVERTREPLY %token <v.string> STRING %token <v.number> NUMBER %token <v.i> PORTBINARY %type <v.interface> interface if_list if_item_not if_item %type <v.number> number icmptype icmp6type uid gid -%type <v.number> tos not yesno +%type <v.number> tos dscp not yesno %type <v.probability> probability %type <v.i> no dir af fragcache optimizer %type <v.i> sourcetrack flush unaryop statelock @@ -473,8 +498,8 @@ int parseport(char *, struct range *r, int); %type <v.icmp> icmp6_list icmp6_item %type <v.number> reticmpspec reticmp6spec %type <v.fromto> fromto -%type <v.peer> ipportspec from to -%type <v.host> ipspec toipspec xhost host dynaddr host_list +%type <v.peer> ipportspec from to toipportspec +%type <v.host> ipspec xhost host dynaddr host_list %type <v.host> redir_host_list redirspec %type <v.host> route_host route_host_list routespec %type <v.os> os xos os_list @@ -483,7 +508,7 @@ int parseport(char *, struct range *r, int); %type <v.gid> gids gid_list gid_item %type <v.route> route %type <v.redirection> redirection redirpool -%type <v.string> label stringall tag anchorname +%type <v.string> label schedule stringall tag anchorname %type <v.string> string varstring numberstring %type <v.keep_state> keep %type <v.state_opt> state_opt_spec state_opt_list state_opt_item @@ -495,6 +520,8 @@ int parseport(char *, struct range *r, int); %type <v.number> cbqflags_list cbqflags_item %type <v.number> priqflags_list priqflags_item %type <v.hfsc_opts> hfscopts_list hfscopts_item hfsc_opts +%type <v.fairq_opts> fairqopts_list fairqopts_item fairq_opts +%type <v.codel_opts> codelopts_list codelopts_item codel_opts %type <v.queue_bwspec> bandwidth %type <v.filter_opts> filter_opts filter_opt filter_opts_l %type <v.antispoof_opts> antispoof_opts antispoof_opt antispoof_opts_l @@ -874,6 +901,11 @@ anchorrule : ANCHOR anchorname dir quick interface af proto fromto YYERROR; } + r.ieee8021q_pcp.pcp[0] = $9.ieee8021q_pcp.pcp[0]; + r.ieee8021q_pcp.pcp[1] = $9.ieee8021q_pcp.pcp[1]; + r.ieee8021q_pcp.op = $9.ieee8021q_pcp.op; + r.ieee8021q_pcp.setpcp = $9.ieee8021q_pcp.setpcp; + if ($9.match_tag) if (strlcpy(r.match_tagname, $9.match_tag, PF_TAG_NAME_SIZE) >= PF_TAG_NAME_SIZE) { @@ -1214,6 +1246,7 @@ antispoof : ANTISPOOF logquick antispoof_ifspc af antispoof_opts { if (rule_label(&r, $5.label)) YYERROR; r.rtableid = $5.rtableid; + r.cuid = $5.tracker; j = calloc(1, sizeof(struct node_if)); if (j == NULL) err(1, "antispoof: calloc"); @@ -1263,6 +1296,7 @@ antispoof : ANTISPOOF logquick antispoof_ifspc af antispoof_opts { r.logif = $2.logif; r.quick = $2.quick; r.af = $4; + r.cuid = $5.tracker; if (rule_label(&r, $5.label)) YYERROR; r.rtableid = $5.rtableid; @@ -1324,6 +1358,9 @@ antispoof_opt : label { } antispoof_opts.label = $1; } + | TRACKER number { + antispoof_opts.tracker = $2; + } | RTABLE NUMBER { if ($2 < 0 || $2 > rt_tableid_max()) { yyerror("invalid rtable id"); @@ -1466,7 +1503,7 @@ altqif : ALTQ interface queue_opts QUEUE qassign { a.scheduler = $3.scheduler.qtype; a.qlimit = $3.qlimit; a.tbrsize = $3.tbrsize; - if ($5 == NULL) { + if ($5 == NULL && $3.scheduler.qtype != ALTQT_CODEL) { yyerror("no child queues specified"); YYERROR; } @@ -1598,14 +1635,22 @@ bandwidth : STRING { bps = strtod($1, &cp); if (cp != NULL) { - if (!strcmp(cp, "b")) + if (!strcmp(cp, "b") || !strcmp(cp, "bit")) ; /* nothing */ - else if (!strcmp(cp, "Kb")) + else if (!strcmp(cp, "Kb") || !strcmp(cp, "Kbit")) bps *= 1000; - else if (!strcmp(cp, "Mb")) + else if (!strcmp(cp, "Mb") || !strcmp(cp, "Mbit")) bps *= 1000 * 1000; - else if (!strcmp(cp, "Gb")) + else if (!strcmp(cp, "Gb") || !strcmp(cp, "Gbit")) bps *= 1000 * 1000 * 1000; + else if (!strcmp(cp, "B") || !strcmp(cp, "Byte")) + ; /* nothing */ + else if (!strcmp(cp, "KB") || !strcmp(cp, "Kbyte")) + bps *= 1024; + else if (!strcmp(cp, "MB") || !strcmp(cp, "Mbyte")) + bps *= 1024 * 1024; + else if (!strcmp(cp, "GB") || !strcmp(cp, "Gbyte")) + bps *= 1024 * 1024 * 1024; else if (!strcmp(cp, "%")) { if (bps < 0 || bps > 100) { yyerror("bandwidth spec " @@ -1659,6 +1704,24 @@ scheduler : CBQ { $$.qtype = ALTQT_HFSC; $$.data.hfsc_opts = $3; } + | FAIRQ { + $$.qtype = ALTQT_FAIRQ; + bzero(&$$.data.fairq_opts, + sizeof(struct node_fairq_opts)); + } + | FAIRQ '(' fairq_opts ')' { + $$.qtype = ALTQT_FAIRQ; + $$.data.fairq_opts = $3; + } + | CODEL { + $$.qtype = ALTQT_CODEL; + bzero(&$$.data.codel_opts, + sizeof(struct codel_opts)); + } + | CODEL '(' codel_opts ')' { + $$.qtype = ALTQT_CODEL; + $$.data.codel_opts = $3; + } ; cbqflags_list : cbqflags_item { $$ |= $1; } @@ -1676,6 +1739,8 @@ cbqflags_item : STRING { $$ = CBQCLF_RED|CBQCLF_ECN; else if (!strcmp($1, "rio")) $$ = CBQCLF_RIO; + else if (!strcmp($1, "codel")) + $$ = CBQCLF_CODEL; else { yyerror("unknown cbq flag \"%s\"", $1); free($1); @@ -1698,6 +1763,8 @@ priqflags_item : STRING { $$ = PRCF_RED|PRCF_ECN; else if (!strcmp($1, "rio")) $$ = PRCF_RIO; + else if (!strcmp($1, "codel")) + $$ = PRCF_CODEL; else { yyerror("unknown priq flag \"%s\"", $1); free($1); @@ -1798,6 +1865,8 @@ hfscopts_item : LINKSHARE bandwidth { hfsc_opts.flags |= HFCF_RED|HFCF_ECN; else if (!strcmp($1, "rio")) hfsc_opts.flags |= HFCF_RIO; + else if (!strcmp($1, "codel")) + hfsc_opts.flags |= HFCF_CODEL; else { yyerror("unknown hfsc flag \"%s\"", $1); free($1); @@ -1807,6 +1876,102 @@ hfscopts_item : LINKSHARE bandwidth { } ; +fairq_opts : { + bzero(&fairq_opts, + sizeof(struct node_fairq_opts)); + } + fairqopts_list { + $$ = fairq_opts; + } + ; + +fairqopts_list : fairqopts_item + | fairqopts_list comma fairqopts_item + ; + +fairqopts_item : LINKSHARE bandwidth { + if (fairq_opts.linkshare.used) { + yyerror("linkshare already specified"); + YYERROR; + } + fairq_opts.linkshare.m2 = $2; + fairq_opts.linkshare.used = 1; + } + | LINKSHARE '(' bandwidth number bandwidth ')' { + if (fairq_opts.linkshare.used) { + yyerror("linkshare already specified"); + YYERROR; + } + fairq_opts.linkshare.m1 = $3; + fairq_opts.linkshare.d = $4; + fairq_opts.linkshare.m2 = $5; + fairq_opts.linkshare.used = 1; + } + | HOGS bandwidth { + fairq_opts.hogs_bw = $2; + } + | BUCKETS number { + fairq_opts.nbuckets = $2; + } + | STRING { + if (!strcmp($1, "default")) + fairq_opts.flags |= FARF_DEFAULTCLASS; + else if (!strcmp($1, "red")) + fairq_opts.flags |= FARF_RED; + else if (!strcmp($1, "ecn")) + fairq_opts.flags |= FARF_RED|FARF_ECN; + else if (!strcmp($1, "rio")) + fairq_opts.flags |= FARF_RIO; + else if (!strcmp($1, "codel")) + fairq_opts.flags |= FARF_CODEL; + else { + yyerror("unknown fairq flag \"%s\"", $1); + free($1); + YYERROR; + } + free($1); + } + ; + +codel_opts : { + bzero(&codel_opts, + sizeof(struct codel_opts)); + } + codelopts_list { + $$ = codel_opts; + } + ; + +codelopts_list : codelopts_item + | codelopts_list comma codelopts_item + ; + +codelopts_item : INTERVAL number { + if (codel_opts.interval) { + yyerror("interval already specified"); + YYERROR; + } + codel_opts.interval = $2; + } + | TARGET number { + if (codel_opts.target) { + yyerror("target already specified"); + YYERROR; + } + codel_opts.target = $2; + } + | STRING { + if (!strcmp($1, "ecn")) + codel_opts.ecn = 1; + else { + yyerror("unknown codel option \"%s\"", $1); + free($1); + YYERROR; + } + free($1); + } + ; + qassign : /* empty */ { $$ = NULL; } | qassign_item { $$ = $1; } | '{' optnl qassign_list '}' { $$ = $3; } @@ -1878,6 +2043,11 @@ pfrule : action dir logquick interface route af proto fromto r.prob = $9.prob; r.rtableid = $9.rtableid; + r.ieee8021q_pcp.pcp[0] = $9.ieee8021q_pcp.pcp[0]; + r.ieee8021q_pcp.pcp[1] = $9.ieee8021q_pcp.pcp[1]; + r.ieee8021q_pcp.op = $9.ieee8021q_pcp.op; + r.ieee8021q_pcp.setpcp = $9.ieee8021q_pcp.setpcp; + r.af = $6; if ($9.tag) if (strlcpy(r.tagname, $9.tag, @@ -1897,6 +2067,11 @@ pfrule : action dir logquick interface route af proto fromto if (rule_label(&r, $9.label)) YYERROR; free($9.label); + if (rule_schedule(&r, $9.schedule)) + YYERROR; + free($9.schedule); + if ($9.tracker) + r.cuid = $9.tracker; r.flags = $9.flags.b1; r.flagset = $9.flags.b2; if (($9.flags.b1 & $9.flags.b2) != $9.flags.b1) { @@ -1928,7 +2103,14 @@ pfrule : action dir logquick interface route af proto fromto #endif } - r.tos = $9.tos; + if ($9.tos) { + r.tos = $9.tos; + r.rule_flag |= PFRULE_TOS; + } + if ($9.dscp) { + r.tos = $9.dscp; + r.rule_flag |= PFRULE_DSCP; + } r.keep_state = $9.keep.action; o = $9.keep.options; @@ -1960,6 +2142,14 @@ pfrule : action dir logquick interface route af proto fromto } r.rule_flag |= PFRULE_NOSYNC; break; + case PF_STATE_OPT_MAX_PACKETS: + if (o->data.max_packets == 0) { + yyerror("max_packets must be" + "greater than 0"); + YYERROR; + } + r.spare2 = o->data.max_packets; + break; case PF_STATE_OPT_SRCTRACK: if (srctrack) { yyerror("state option " @@ -2246,6 +2436,15 @@ pfrule : action dir logquick interface route af proto fromto } #endif + if ($9.dnpipe) { + r.dnpipe = $9.dnpipe; + if ($9.free_flags & PFRULE_DN_IS_PIPE) + r.free_flags |= PFRULE_DN_IS_PIPE; + else + r.free_flags |= PFRULE_DN_IS_QUEUE; + r.pdnpipe = $9.pdnpipe; + } + expand_rule(&r, $4, $5.host, $7, $8.src_os, $8.src.host, $8.src.port, $8.dst.host, $8.dst.port, $9.uid, $9.gid, $9.icmpspec, ""); @@ -2306,6 +2505,14 @@ filter_opt : USER uids { filter_opts.marker |= FOM_TOS; filter_opts.tos = $2; } + | dscp { + if (filter_opts.marker & FOM_DSCP) { + yyerror("dscp cannot be redefined"); + YYERROR; + } + filter_opts.marker |= FOM_DSCP; + filter_opts.dscp = $1; + } | keep { if (filter_opts.marker & FOM_KEEP) { yyerror("modulate or keep cannot be redefined"); @@ -2315,6 +2522,9 @@ filter_opt : USER uids { filter_opts.keep.action = $1.action; filter_opts.keep.options = $1.options; } + | TRACKER number { + filter_opts.tracker = $2; + } | FRAGMENT { filter_opts.fragment = 1; } @@ -2328,6 +2538,13 @@ filter_opt : USER uids { } filter_opts.label = $1; } + | schedule { + if (filter_opts.schedule) { + yyerror("schedule label cannot be redefined"); + YYERROR; + } + filter_opts.schedule = $1; + } | qname { if (filter_opts.queues.qname) { yyerror("queue cannot be redefined"); @@ -2335,6 +2552,32 @@ filter_opt : USER uids { } filter_opts.queues = $1; } + | DNPIPE number { + filter_opts.dnpipe = $2; + filter_opts.free_flags |= PFRULE_DN_IS_PIPE; + } + | DNPIPE '(' number ')' { + filter_opts.dnpipe = $3; + filter_opts.free_flags |= PFRULE_DN_IS_PIPE; + } + | DNPIPE '(' number comma number ')' { + filter_opts.pdnpipe = $5; + filter_opts.dnpipe = $3; + filter_opts.free_flags |= PFRULE_DN_IS_PIPE; + } + | DNQUEUE number { + filter_opts.dnpipe = $2; + filter_opts.free_flags |= PFRULE_DN_IS_QUEUE; + } + | DNQUEUE '(' number comma number ')' { + filter_opts.pdnpipe = $5; + filter_opts.dnpipe = $3; + filter_opts.free_flags |= PFRULE_DN_IS_QUEUE; + } + | DNQUEUE '(' number ')' { + filter_opts.dnpipe = $3; + filter_opts.free_flags |= PFRULE_DN_IS_QUEUE; + } | TAG string { filter_opts.tag = $2; } @@ -2354,6 +2597,98 @@ filter_opt : USER uids { if (filter_opts.prob == 0) filter_opts.prob = 1; } + | IEEE8021QPCP STRING { + u_int pcp; + + /* + * XXXRW: More complete set of operations, similar to + * ports. + */ + if (!strcmp($2, "be")) + pcp = IEEE8021Q_PCP_BE; + else if (!strcmp($2, "bk")) + pcp = IEEE8021Q_PCP_BK; + else if (!strcmp($2, "ee")) + pcp = IEEE8021Q_PCP_EE; + else if (!strcmp($2, "ca")) + pcp = IEEE8021Q_PCP_CA; + else if (!strcmp($2, "vi")) + pcp = IEEE8021Q_PCP_VI; + else if (!strcmp($2, "vo")) + pcp = IEEE8021Q_PCP_VO; + else if (!strcmp($2, "ic")) + pcp = IEEE8021Q_PCP_IC; + else if (!strcmp($2, "nc")) + pcp = IEEE8021Q_PCP_NC; + else + pcp = 8; /* flag bad argument */ + if (pcp > 7) { + yyerror("invalid ieee8021q_pcp value %s", $2); + free($2); + YYERROR; + } + free($2); + filter_opts.ieee8021q_pcp.pcp[0] = pcp; + filter_opts.ieee8021q_pcp.pcp[1] = 0; + filter_opts.ieee8021q_pcp.op = PF_OP_EQ; + } + | IEEE8021QPCP number { + u_int pcp; + + pcp = $2; + if (pcp > 7) { + yyerror("invalid ieee8021q_pcp value %u", pcp); + YYERROR; + } + filter_opts.ieee8021q_pcp.pcp[0] = pcp; + filter_opts.ieee8021q_pcp.pcp[1] = 0; + filter_opts.ieee8021q_pcp.op = PF_OP_EQ; + } + | IEEE8021QSETPCP STRING { + u_int pcp; + + /* + * XXXRW: More complete set of operations, similar to + * ports. + */ + if (!strcmp($2, "be")) + pcp = IEEE8021Q_PCP_BE; + else if (!strcmp($2, "bk")) + pcp = IEEE8021Q_PCP_BK; + else if (!strcmp($2, "ee")) + pcp = IEEE8021Q_PCP_EE; + else if (!strcmp($2, "ca")) + pcp = IEEE8021Q_PCP_CA; + else if (!strcmp($2, "vi")) + pcp = IEEE8021Q_PCP_VI; + else if (!strcmp($2, "vo")) + pcp = IEEE8021Q_PCP_VO; + else if (!strcmp($2, "ic")) + pcp = IEEE8021Q_PCP_IC; + else if (!strcmp($2, "nc")) + pcp = IEEE8021Q_PCP_NC; + else + pcp = 8; /* flag bad argument */ + if (pcp > 7) { + yyerror("invalid ieee8021q_setpcp value %s", + $2); + free($2); + YYERROR; + } + free($2); + filter_opts.ieee8021q_pcp.setpcp = pcp | SETPCP_VALID; + } + | IEEE8021QSETPCP number { + u_int pcp; + + pcp = $2; + if (pcp > 7) { + yyerror("invalid ieee8021q_setpcp value %u", + pcp); + YYERROR; + } + filter_opts.ieee8021q_pcp.setpcp = pcp | SETPCP_VALID; + } | RTABLE NUMBER { if ($2 < 0 || $2 > rt_tableid_max()) { yyerror("invalid rtable id"); @@ -2421,6 +2756,7 @@ probability : STRING { action : PASS { $$.b1 = PF_PASS; $$.b2 = $$.w = 0; } + | MATCH { $$.b1 = PF_MATCH; $$.b2 = $$.w = 0; } | BLOCK blockspec { $$ = $2; $$.b1 = PF_DROP; } ; @@ -2749,8 +3085,8 @@ ipspec : ANY { $$ = NULL; } | '{' optnl host_list '}' { $$ = $3; } ; -toipspec : TO ipspec { $$ = $2; } - | /* empty */ { $$ = NULL; } +toipportspec : TO ipportspec { $$ = $2; } + | /* empty */ { $$.host = NULL; $$.port = NULL; } ; host_list : ipspec optnl { $$ = $1; } @@ -3398,6 +3734,48 @@ tos : STRING { } ; +dscp : DSCP STRING { + if (!strcmp($2, "EF")) + $$ = DSCP_EF; + else if (!strcmp($2, "VA")) + $$ = DSCP_VA; + else if (!strcmp($2, "af11")) + $$ = DSCP_AF11; + else if (!strcmp($2, "af12")) + $$ = DSCP_AF12; + else if (!strcmp($2, "af13")) + $$ = DSCP_AF13; + else if (!strcmp($2, "af21")) + $$ = DSCP_AF21; + else if (!strcmp($2, "af22")) + $$ = DSCP_AF22; + else if (!strcmp($2, "af23")) + $$ = DSCP_AF23; + else if (!strcmp($2, "af31")) + $$ = DSCP_AF31; + else if (!strcmp($2, "af32")) + $$ = DSCP_AF32; + else if (!strcmp($2, "af33")) + $$ = DSCP_AF33; + else if (!strcmp($2, "af41")) + $$ = DSCP_AF41; + else if (!strcmp($2, "af42")) + $$ = DSCP_AF42; + else if (!strcmp($2, "af43")) + $$ = DSCP_AF43; + else if ($2[0] == '0' && $2[1] == 'x') + $$ = strtoul($2, NULL, 16) * 4; + else + $$ = strtoul($2, NULL, 10) * 4; + if (!$$ || $$ > 255) { + yyerror("illegal dscp value %s", $2); + free($2); + YYERROR; + } + free($2); + } + ; + sourcetrack : SOURCETRACK { $$ = PF_SRCTRACK; } | SOURCETRACK GLOBAL { $$ = PF_SRCTRACK_GLOBAL; } | SOURCETRACK RULE { $$ = PF_SRCTRACK_RULE; } @@ -3482,6 +3860,15 @@ state_opt_item : MAXIMUM NUMBER { $$->next = NULL; $$->tail = $$; } + | MAXPCKT NUMBER { + $$ = calloc(1, sizeof(struct node_state_opt)); + if ($$ == NULL) + err(1, "state_opt_item: calloc"); + $$->type = PF_STATE_OPT_MAX_PACKETS; + $$->data.max_packets = $2; + $$->next = NULL; + $$->tail = $$; + } | MAXSRCCONN NUMBER { if ($2 < 0 || $2 > UINT_MAX) { yyerror("only positive values permitted"); @@ -3604,6 +3991,11 @@ label : LABEL STRING { } ; +schedule : SCHEDULE STRING { + $$ = $2; + } + ; + qname : QUEUE STRING { $$.qname = $2; $$.pqname = NULL; @@ -3979,7 +4371,7 @@ natrule : nataction interface af proto fromto tag tagged rtable } ; -binatrule : no BINAT natpasslog interface af proto FROM host toipspec tag +binatrule : no BINAT natpasslog interface af proto FROM ipportspec toipportspec tag tagged rtable redirection { struct pf_rule binat; @@ -3987,7 +4379,7 @@ binatrule : no BINAT natpasslog interface af proto FROM host toipspec tag if (check_rulestate(PFCTL_STATE_NAT)) YYERROR; - if (disallow_urpf_failed($9, "\"urpf-failed\" is not " + if (disallow_urpf_failed($9.host, "\"urpf-failed\" is not " "permitted as a binat destination")) YYERROR; @@ -4005,10 +4397,10 @@ binatrule : no BINAT natpasslog interface af proto FROM host toipspec tag binat.log = $3.b2; binat.logif = $3.w2; binat.af = $5; - if (!binat.af && $8 != NULL && $8->af) - binat.af = $8->af; - if (!binat.af && $9 != NULL && $9->af) - binat.af = $9->af; + if (!binat.af && $8.host != NULL && $8.host->af) + binat.af = $8.host->af; + if (!binat.af && $9.host != NULL && $9.host->af) + binat.af = $9.host->af; if (!binat.af && $13 != NULL && $13->host) binat.af = $13->host->af; @@ -4047,10 +4439,10 @@ binatrule : no BINAT natpasslog interface af proto FROM host toipspec tag free($6); } - if ($8 != NULL && disallow_table($8, "invalid use of " + if ($8.host != NULL && disallow_table($8.host, "invalid use of " "table <%s> as the source address of a binat rule")) YYERROR; - if ($8 != NULL && disallow_alias($8, "invalid use of " + if ($8.host != NULL && disallow_alias($8.host, "invalid use of " "interface (%s) as the source address of a binat " "rule")) YYERROR; @@ -4063,38 +4455,46 @@ binatrule : no BINAT natpasslog interface af proto FROM host toipspec tag "redirect address of a binat rule")) YYERROR; - if ($8 != NULL) { - if ($8->next) { + if ($8.host != NULL) { + if ($8.host->next) { yyerror("multiple binat ip addresses"); YYERROR; } - if ($8->addr.type == PF_ADDR_DYNIFTL) - $8->af = binat.af; - if ($8->af != binat.af) { + if ($8.host->addr.type == PF_ADDR_DYNIFTL) + $8.host->af = binat.af; + if ($8.host->af != binat.af) { yyerror("binat ip versions must match"); YYERROR; } - if (check_netmask($8, binat.af)) + if (check_netmask($8.host, binat.af)) YYERROR; - memcpy(&binat.src.addr, &$8->addr, + memcpy(&binat.src.addr, &$8.host->addr, sizeof(binat.src.addr)); - free($8); + binat.src.neg = $8.host->not; + free($8.host); } - if ($9 != NULL) { - if ($9->next) { + if ($9.host != NULL) { + if ($9.host->next) { yyerror("multiple binat ip addresses"); YYERROR; } - if ($9->af != binat.af && $9->af) { + if ($9.host->af != binat.af && $9.host->af) { yyerror("binat ip versions must match"); YYERROR; } - if (check_netmask($9, binat.af)) + if (check_netmask($9.host, binat.af)) YYERROR; - memcpy(&binat.dst.addr, &$9->addr, + memcpy(&binat.dst.addr, &$9.host->addr, sizeof(binat.dst.addr)); - binat.dst.neg = $9->not; - free($9); + binat.dst.neg = $9.host->not; + free($9.host); + } + + if ($9.port != NULL) { + binat.dst.port[0] = $9.port->port[0]; + binat.dst.port[1] = $9.port->port[1]; + binat.dst.port_op = $9.port->op; + free($9.port); } if (binat.action == PF_NOBINAT) { @@ -4440,6 +4840,15 @@ filter_consistent(struct pf_rule *r, int anchor_call) "synproxy state or modulate state"); problems++; } + if ((r->rule_flag & PFRULE_TOS) && (r->rule_flag & PFRULE_DSCP)) { + yyerror("tos and dscp cannot be used together"); + problems++; + } + if (r->dnpipe && r->pdnpipe && !r->direction) { + yyerror("dummynet cannot be specified without direction"); + problems++; + } + return (-problems); } @@ -4732,7 +5141,8 @@ expand_altq(struct pf_altq *a, struct node_if *interfaces, if ((pf->loadopt & PFCTL_FLAG_ALTQ) == 0) { FREE_LIST(struct node_if, interfaces); - FREE_LIST(struct node_queue, nqueues); + if (nqueues) + FREE_LIST(struct node_queue, nqueues); return (0); } @@ -4823,7 +5233,8 @@ expand_altq(struct pf_altq *a, struct node_if *interfaces, } ); FREE_LIST(struct node_if, interfaces); - FREE_LIST(struct node_queue, nqueues); + if (nqueues) + FREE_LIST(struct node_queue, nqueues); return (errs); } @@ -4983,6 +5394,7 @@ expand_rule(struct pf_rule *r, int added = 0, error = 0; char ifname[IF_NAMESIZE]; char label[PF_RULE_LABEL_SIZE]; + char schedule[PF_RULE_LABEL_SIZE]; char tagname[PF_TAG_NAME_SIZE]; char match_tagname[PF_TAG_NAME_SIZE]; struct pf_pooladdr *pa; @@ -4991,6 +5403,8 @@ expand_rule(struct pf_rule *r, if (strlcpy(label, r->label, sizeof(label)) >= sizeof(label)) errx(1, "expand_rule: strlcpy"); + if (strlcpy(schedule, r->schedule, sizeof(schedule)) > sizeof(schedule)) + errx(1, "expand_rule: strlcpy"); if (strlcpy(tagname, r->tagname, sizeof(tagname)) >= sizeof(tagname)) errx(1, "expand_rule: strlcpy"); if (strlcpy(match_tagname, r->match_tagname, sizeof(match_tagname)) >= @@ -5042,6 +5456,9 @@ expand_rule(struct pf_rule *r, if (strlcpy(r->label, label, sizeof(r->label)) >= sizeof(r->label)) errx(1, "expand_rule: strlcpy"); + if (strlcpy(r->schedule, schedule, sizeof(r->schedule)) >= + sizeof(r->schedule)) + errx(1, "expand_rule: strlcpy"); if (strlcpy(r->tagname, tagname, sizeof(r->tagname)) >= sizeof(r->tagname)) errx(1, "expand_rule: strlcpy"); @@ -5050,6 +5467,8 @@ expand_rule(struct pf_rule *r, errx(1, "expand_rule: strlcpy"); expand_label(r->label, PF_RULE_LABEL_SIZE, r->ifname, r->af, src_host, src_port, dst_host, dst_port, proto->proto); + expand_label(r->schedule, PF_RULE_LABEL_SIZE, r->ifname, r->af, + src_host, src_port, dst_host, dst_port, proto->proto); expand_label(r->tagname, PF_TAG_NAME_SIZE, r->ifname, r->af, src_host, src_port, dst_host, dst_port, proto->proto); expand_label(r->match_tagname, PF_TAG_NAME_SIZE, r->ifname, @@ -5226,15 +5645,21 @@ lookup(char *s) { "bitmask", BITMASK}, { "block", BLOCK}, { "block-policy", BLOCKPOLICY}, + { "buckets", BUCKETS}, { "cbq", CBQ}, { "code", CODE}, + { "codelq", CODEL}, { "crop", FRAGCROP}, { "debug", DEBUG}, { "divert-reply", DIVERTREPLY}, { "divert-to", DIVERTTO}, + { "dnpipe", DNPIPE}, + { "dnqueue", DNQUEUE}, { "drop", DROP}, { "drop-ovl", FRAGDROP}, + { "dscp", DSCP}, { "dup-to", DUPTO}, + { "fairq", FAIRQ}, { "fastroute", FASTROUTE}, { "file", FILENAME}, { "fingerprints", FINGERPRINTS}, @@ -5247,14 +5672,18 @@ lookup(char *s) { "global", GLOBAL}, { "group", GROUP}, { "hfsc", HFSC}, + { "hogs", HOGS}, { "hostid", HOSTID}, { "icmp-type", ICMPTYPE}, { "icmp6-type", ICMP6TYPE}, + { "ieee8021q-pcp", IEEE8021QPCP}, + { "ieee8021q-setpcp", IEEE8021QSETPCP}, { "if-bound", IFBOUND}, { "in", IN}, { "include", INCLUDE}, { "inet", INET}, { "inet6", INET6}, + { "interval", INTERVAL}, { "keep", KEEP}, { "label", LABEL}, { "limit", LIMIT}, @@ -5262,8 +5691,10 @@ lookup(char *s) { "load", LOAD}, { "log", LOG}, { "loginterface", LOGINTERFACE}, + { "match", MATCH}, { "max", MAXIMUM}, { "max-mss", MAXMSS}, + { "max-packets", MAXPCKT}, { "max-src-conn", MAXSRCCONN}, { "max-src-conn-rate", MAXSRCCONNRATE}, { "max-src-nodes", MAXSRCNODES}, @@ -5308,6 +5739,7 @@ lookup(char *s) { "rtable", RTABLE}, { "rule", RULE}, { "ruleset-optimization", RULESET_OPTIMIZATION}, + { "schedule", SCHEDULE}, { "scrub", SCRUB}, { "set", SET}, { "set-tos", SETTOS}, @@ -5324,10 +5756,12 @@ lookup(char *s) { "table", TABLE}, { "tag", TAG}, { "tagged", TAGGED}, + { "target", TARGET}, { "tbrsize", TBRSIZE}, { "timeout", TIMEOUT}, { "to", TO}, { "tos", TOS}, + { "tracker", TRACKER}, { "ttl", TTL}, { "upperlimit", UPPERLIMIT}, { "urpf-failed", URPFFAILED}, @@ -5939,6 +6373,20 @@ rule_label(struct pf_rule *r, char *s) return (0); } +int +rule_schedule(struct pf_rule *r, char *s) +{ + if (s) { + if (strlcpy(r->schedule, s, sizeof(r->label)) >= + sizeof(r->label)) { + yyerror("rule schedule label too long (max %d chars)", + sizeof(r->label)-1); + return (-1); + } + } + return (0); +} + u_int16_t parseicmpspec(char *w, sa_family_t af) { diff --git a/sbin/pfctl/pfctl.c b/sbin/pfctl/pfctl.c index f8802b3..e770275 100644 --- a/sbin/pfctl/pfctl.c +++ b/sbin/pfctl/pfctl.c @@ -78,6 +78,7 @@ void pfctl_addrprefix(char *, struct pf_addr *); int pfctl_kill_src_nodes(int, const char *, int); int pfctl_net_kill_states(int, const char *, int); int pfctl_label_kill_states(int, const char *, int); +int pfctl_kill_schedule(int, const char *, int); int pfctl_id_kill_states(int, const char *, int); void pfctl_init_options(struct pfctl *); int pfctl_load_options(struct pfctl *); @@ -117,6 +118,7 @@ const char *optiopt = NULL; char *pf_device = "/dev/pf"; char *ifaceopt; char *tableopt; +char *schedule = NULL; const char *tblcmdopt; int src_node_killers; char *src_node_kill[2]; @@ -654,6 +656,25 @@ pfctl_net_kill_states(int dev, const char *iface, int opts) } int +pfctl_kill_schedule(int dev, const char *sched, int opts) +{ + struct pfioc_schedule_kill psk; + + memset(&psk, 0, sizeof(psk)); + if (sched != NULL && strlcpy(psk.schedule, sched, + sizeof(psk.schedule)) >= sizeof(psk.schedule)) + errx(1, "invalid schedule label: %s", sched); + + if (ioctl(dev, DIOCKILLSCHEDULE, &psk)) + err(1, "DIOCKILLSCHEDULE"); + + if ((opts & PF_OPT_QUIET) == 0) + fprintf(stderr, "killed %d states from %s schedule label\n", + psk.numberkilled, sched); + return (0); +} + +int pfctl_label_kill_states(int dev, const char *iface, int opts) { struct pfioc_state_kill psk; @@ -804,10 +825,17 @@ pfctl_print_rule_counters(struct pf_rule *rule, int opts) (unsigned long long)(rule->bytes[0] + rule->bytes[1]), (uintmax_t)rule->u_states_cur); if (!(opts & PF_OPT_DEBUG)) +#ifdef PF_USER_INFO printf(" [ Inserted: uid %u pid %u " "State Creations: %-6ju]\n", (unsigned)rule->cuid, (unsigned)rule->cpid, (uintmax_t)rule->u_states_tot); +#else + printf(" [ Inserted: pid %u " + "State Creations: %-6ju]\n", + (unsigned)rule->cpid, + (uintmax_t)rule->u_states_tot); +#endif } } @@ -2004,7 +2032,7 @@ main(int argc, char *argv[]) usage(); while ((ch = getopt(argc, argv, - "a:AdD:eqf:F:ghi:k:K:mnNOo:Pp:rRs:t:T:vx:z")) != -1) { + "a:AdD:eqf:F:ghi:k:K:mnNOo:Pp:rRs:t:T:vx:y:z")) != -1) { switch (ch) { case 'a': anchoropt = optarg; @@ -2118,6 +2146,12 @@ main(int argc, char *argv[]) opts |= PF_OPT_VERBOSE2; opts |= PF_OPT_VERBOSE; break; + case 'y': + if (schedule != NULL && strlen(schedule) > 64) + errx(1, "Schedule label cannot be more than 64 characters\n"); + schedule = optarg; + mode = O_RDWR; + break; case 'x': debugopt = pfctl_lookup_option(optarg, debugopt_list); if (debugopt == NULL) { @@ -2326,6 +2360,9 @@ main(int argc, char *argv[]) if (src_node_killers) pfctl_kill_src_nodes(dev, ifaceopt, opts); + if (schedule) + pfctl_kill_schedule(dev, schedule, opts); + if (tblcmdopt != NULL) { error = pfctl_command_tables(argc, argv, tableopt, tblcmdopt, rulesopt, anchorname, opts); diff --git a/sbin/pfctl/pfctl_altq.c b/sbin/pfctl/pfctl_altq.c index ae566f8..9a21754 100644 --- a/sbin/pfctl/pfctl_altq.c +++ b/sbin/pfctl/pfctl_altq.c @@ -33,6 +33,7 @@ __FBSDID("$FreeBSD$"); #include <errno.h> #include <limits.h> #include <math.h> +#include <stdint.h> #include <stdio.h> #include <stdlib.h> #include <string.h> @@ -40,8 +41,10 @@ __FBSDID("$FreeBSD$"); #include <altq/altq.h> #include <altq/altq_cbq.h> +#include <altq/altq_codel.h> #include <altq/altq_priq.h> #include <altq/altq_hfsc.h> +#include <altq/altq_fairq.h> #include "pfctl_parser.h" #include "pfctl.h" @@ -59,6 +62,9 @@ static int cbq_compute_idletime(struct pfctl *, struct pf_altq *); static int check_commit_cbq(int, int, struct pf_altq *); static int print_cbq_opts(const struct pf_altq *); +static int print_codel_opts(const struct pf_altq *, + const struct node_queue_opt *); + static int eval_pfqueue_priq(struct pfctl *, struct pf_altq *); static int check_commit_priq(int, int, struct pf_altq *); static int print_priq_opts(const struct pf_altq *); @@ -68,6 +74,11 @@ static int check_commit_hfsc(int, int, struct pf_altq *); static int print_hfsc_opts(const struct pf_altq *, const struct node_queue_opt *); +static int eval_pfqueue_fairq(struct pfctl *, struct pf_altq *); +static int print_fairq_opts(const struct pf_altq *, + const struct node_queue_opt *); +static int check_commit_fairq(int, int, struct pf_altq *); + static void gsc_add_sc(struct gen_sc *, struct service_curve *); static int is_gsc_under_sc(struct gen_sc *, struct service_curve *); @@ -88,6 +99,8 @@ int eval_queue_opts(struct pf_altq *, struct node_queue_opt *, u_int32_t eval_bwspec(struct node_queue_bw *, u_int32_t); void print_hfsc_sc(const char *, u_int, u_int, u_int, const struct node_hfsc_sc *); +void print_fairq_sc(const char *, u_int, u_int, u_int, + const struct node_fairq_sc *); void pfaltq_store(struct pf_altq *a) @@ -173,6 +186,14 @@ print_altq(const struct pf_altq *a, unsigned int level, if (!print_hfsc_opts(a, qopts)) printf("hfsc "); break; + case ALTQT_FAIRQ: + if (!print_fairq_opts(a, qopts)) + printf("fairq "); + break; + case ALTQT_CODEL: + if (!print_codel_opts(a, qopts)) + printf("codel "); + break; } if (bw != NULL && bw->bw_percent > 0) { @@ -203,7 +224,8 @@ print_queue(const struct pf_altq *a, unsigned int level, printf("%s ", a->qname); if (print_interface) printf("on %s ", a->ifname); - if (a->scheduler == ALTQT_CBQ || a->scheduler == ALTQT_HFSC) { + if (a->scheduler == ALTQT_CBQ || a->scheduler == ALTQT_HFSC || + a->scheduler == ALTQT_FAIRQ) { if (bw != NULL && bw->bw_percent > 0) { if (bw->bw_percent < 100) printf("bandwidth %u%% ", bw->bw_percent); @@ -224,6 +246,9 @@ print_queue(const struct pf_altq *a, unsigned int level, case ALTQT_HFSC: print_hfsc_opts(a, qopts); break; + case ALTQT_FAIRQ: + print_fairq_opts(a, qopts); + break; } } @@ -240,15 +265,11 @@ eval_pfaltq(struct pfctl *pf, struct pf_altq *pa, struct node_queue_bw *bw, pa->ifbandwidth = bw->bw_absolute; else #ifdef __FreeBSD__ - if ((rate = getifspeed(pf->dev, pa->ifname)) == 0) { -#else - if ((rate = getifspeed(pa->ifname)) == 0) { + rate = getifspeed(pf->dev, pa->ifname); + if (rate == 0) + rate = IF_Mbps(1000); #endif - fprintf(stderr, "interface %s does not know its bandwidth, " - "please specify an absolute bandwidth\n", - pa->ifname); - errors++; - } else if ((pa->ifbandwidth = eval_bwspec(bw, rate)) == 0) + if ((pa->ifbandwidth = eval_bwspec(bw, rate)) == 0) pa->ifbandwidth = rate; errors += eval_queue_opts(pa, opts, pa->ifbandwidth); @@ -294,6 +315,9 @@ check_commit_altq(int dev, int opts) case ALTQT_HFSC: error = check_commit_hfsc(dev, opts, altq); break; + case ALTQT_FAIRQ: + error = check_commit_fairq(dev, opts, altq); + break; default: break; } @@ -342,7 +366,8 @@ eval_pfqueue(struct pfctl *pf, struct pf_altq *pa, struct node_queue_bw *bw, if (pa->qlimit == 0) pa->qlimit = DEFAULT_QLIMIT; - if (pa->scheduler == ALTQT_CBQ || pa->scheduler == ALTQT_HFSC) { + if (pa->scheduler == ALTQT_CBQ || pa->scheduler == ALTQT_HFSC || + pa->scheduler == ALTQT_FAIRQ) { pa->bandwidth = eval_bwspec(bw, parent == NULL ? 0 : parent->bandwidth); @@ -388,6 +413,9 @@ eval_pfqueue(struct pfctl *pf, struct pf_altq *pa, struct node_queue_bw *bw, case ALTQT_HFSC: error = eval_pfqueue_hfsc(pf, pa); break; + case ALTQT_FAIRQ: + error = eval_pfqueue_fairq(pf, pa); + break; default: break; } @@ -568,6 +596,8 @@ print_cbq_opts(const struct pf_altq *a) printf(" ecn"); if (opts->flags & CBQCLF_RIO) printf(" rio"); + if (opts->flags & CBQCLF_CODEL) + printf(" codel"); if (opts->flags & CBQCLF_CLEARDSCP) printf(" cleardscp"); if (opts->flags & CBQCLF_FLOWVALVE) @@ -655,6 +685,8 @@ print_priq_opts(const struct pf_altq *a) printf(" ecn"); if (opts->flags & PRCF_RIO) printf(" rio"); + if (opts->flags & PRCF_CODEL) + printf(" codel"); if (opts->flags & PRCF_CLEARDSCP) printf(" cleardscp"); if (opts->flags & PRCF_DEFAULTCLASS) @@ -700,13 +732,6 @@ eval_pfqueue_hfsc(struct pfctl *pf, struct pf_altq *pa) return (-1); } - if ((opts->rtsc_m1 < opts->rtsc_m2 && opts->rtsc_m1 != 0) || - (opts->lssc_m1 < opts->lssc_m2 && opts->lssc_m1 != 0) || - (opts->ulsc_m1 < opts->ulsc_m2 && opts->ulsc_m1 != 0)) { - warnx("m1 must be zero for convex curve: %s", pa->qname); - return (-1); - } - /* * admission control: * for the real-time service curve, the sum of the service curves @@ -807,6 +832,85 @@ err_ret: return (-1); } +/* + * FAIRQ support functions + */ +static int +eval_pfqueue_fairq(struct pfctl *pf __unused, struct pf_altq *pa) +{ + struct pf_altq *altq, *parent; + struct fairq_opts *opts; + struct service_curve sc; + + opts = &pa->pq_u.fairq_opts; + + if (pa->parent[0] == 0) { + /* root queue */ + opts->lssc_m1 = pa->ifbandwidth; + opts->lssc_m2 = pa->ifbandwidth; + opts->lssc_d = 0; + return (0); + } + + LIST_INIT(&lssc); + + /* if link_share is not specified, use bandwidth */ + if (opts->lssc_m2 == 0) + opts->lssc_m2 = pa->bandwidth; + + /* + * admission control: + * for the real-time service curve, the sum of the service curves + * should not exceed 80% of the interface bandwidth. 20% is reserved + * not to over-commit the actual interface bandwidth. + * for the link-sharing service curve, the sum of the child service + * curve should not exceed the parent service curve. + * for the upper-limit service curve, the assigned bandwidth should + * be smaller than the interface bandwidth, and the upper-limit should + * be larger than the real-time service curve when both are defined. + */ + parent = qname_to_pfaltq(pa->parent, pa->ifname); + if (parent == NULL) + errx(1, "parent %s not found for %s", pa->parent, pa->qname); + + TAILQ_FOREACH(altq, &altqs, entries) { + if (strncmp(altq->ifname, pa->ifname, IFNAMSIZ) != 0) + continue; + if (altq->qname[0] == 0) /* this is for interface */ + continue; + + if (strncmp(altq->parent, pa->parent, PF_QNAME_SIZE) != 0) + continue; + + /* if the class has a link-sharing service curve, add it. */ + if (opts->lssc_m2 != 0 && altq->pq_u.fairq_opts.lssc_m2 != 0) { + sc.m1 = altq->pq_u.fairq_opts.lssc_m1; + sc.d = altq->pq_u.fairq_opts.lssc_d; + sc.m2 = altq->pq_u.fairq_opts.lssc_m2; + gsc_add_sc(&lssc, &sc); + } + } + + /* check the link-sharing service curve. */ + if (opts->lssc_m2 != 0) { + sc.m1 = parent->pq_u.fairq_opts.lssc_m1; + sc.d = parent->pq_u.fairq_opts.lssc_d; + sc.m2 = parent->pq_u.fairq_opts.lssc_m2; + if (!is_gsc_under_sc(&lssc, &sc)) { + warnx("link-sharing sc exceeds parent's sc"); + goto err_ret; + } + } + + gsc_destroy(&lssc); + + return (0); + +err_ret: + gsc_destroy(&lssc); + return (-1); +} + static int check_commit_hfsc(int dev, int opts, struct pf_altq *pa) { @@ -847,6 +951,43 @@ check_commit_hfsc(int dev, int opts, struct pf_altq *pa) } static int +check_commit_fairq(int dev __unused, int opts __unused, struct pf_altq *pa) +{ + struct pf_altq *altq, *def = NULL; + int default_class; + int error = 0; + + /* check if fairq has one default queue for this interface */ + default_class = 0; + TAILQ_FOREACH(altq, &altqs, entries) { + if (strncmp(altq->ifname, pa->ifname, IFNAMSIZ) != 0) + continue; + if (altq->qname[0] == 0) /* this is for interface */ + continue; + if (altq->pq_u.fairq_opts.flags & FARF_DEFAULTCLASS) { + default_class++; + def = altq; + } + } + if (default_class != 1) { + warnx("should have one default queue on %s", pa->ifname); + return (1); + } + /* make sure the default queue is a leaf */ + TAILQ_FOREACH(altq, &altqs, entries) { + if (strncmp(altq->ifname, pa->ifname, IFNAMSIZ) != 0) + continue; + if (altq->qname[0] == 0) /* this is for interface */ + continue; + if (strncmp(altq->parent, def->qname, PF_QNAME_SIZE) == 0) { + warnx("default queue is not a leaf"); + error++; + } + } + return (error); +} + +static int print_hfsc_opts(const struct pf_altq *a, const struct node_queue_opt *qopts) { const struct hfsc_opts *opts; @@ -871,6 +1012,8 @@ print_hfsc_opts(const struct pf_altq *a, const struct node_queue_opt *qopts) printf(" ecn"); if (opts->flags & HFCF_RIO) printf(" rio"); + if (opts->flags & HFCF_CODEL) + printf(" codel"); if (opts->flags & HFCF_CLEARDSCP) printf(" cleardscp"); if (opts->flags & HFCF_DEFAULTCLASS) @@ -892,6 +1035,67 @@ print_hfsc_opts(const struct pf_altq *a, const struct node_queue_opt *qopts) return (0); } +static int +print_codel_opts(const struct pf_altq *a, const struct node_queue_opt *qopts) +{ + const struct codel_opts *opts; + + opts = &a->pq_u.codel_opts; + if (opts->target || opts->interval || opts->ecn) { + printf("codel("); + if (opts->target) + printf(" target %d", opts->target); + if (opts->interval) + printf(" interval %d", opts->interval); + if (opts->ecn) + printf("ecn"); + printf(" ) "); + + return (1); + } + + return (0); +} + +static int +print_fairq_opts(const struct pf_altq *a, const struct node_queue_opt *qopts) +{ + const struct fairq_opts *opts; + const struct node_fairq_sc *loc_lssc; + + opts = &a->pq_u.fairq_opts; + if (qopts == NULL) + loc_lssc = NULL; + else + loc_lssc = &qopts->data.fairq_opts.linkshare; + + if (opts->flags || + (opts->lssc_m2 != 0 && (opts->lssc_m2 != a->bandwidth || + opts->lssc_d != 0))) { + printf("fairq("); + if (opts->flags & FARF_RED) + printf(" red"); + if (opts->flags & FARF_ECN) + printf(" ecn"); + if (opts->flags & FARF_RIO) + printf(" rio"); + if (opts->flags & FARF_CODEL) + printf(" codel"); + if (opts->flags & FARF_CLEARDSCP) + printf(" cleardscp"); + if (opts->flags & FARF_DEFAULTCLASS) + printf(" default"); + if (opts->lssc_m2 != 0 && (opts->lssc_m2 != a->bandwidth || + opts->lssc_d != 0)) + print_fairq_sc("linkshare", opts->lssc_m1, opts->lssc_d, + opts->lssc_m2, loc_lssc); + printf(" ) "); + + return (1); + } else + return (0); +} + /* * admission control using generalized service curve */ @@ -1211,6 +1415,28 @@ eval_queue_opts(struct pf_altq *pa, struct node_queue_opt *opts, opts->data.hfsc_opts.upperlimit.d; } break; + case ALTQT_FAIRQ: + pa->pq_u.fairq_opts.flags = opts->data.fairq_opts.flags; + pa->pq_u.fairq_opts.nbuckets = opts->data.fairq_opts.nbuckets; + pa->pq_u.fairq_opts.hogs_m1 = + eval_bwspec(&opts->data.fairq_opts.hogs_bw, ref_bw); + + if (opts->data.fairq_opts.linkshare.used) { + pa->pq_u.fairq_opts.lssc_m1 = + eval_bwspec(&opts->data.fairq_opts.linkshare.m1, + ref_bw); + pa->pq_u.fairq_opts.lssc_m2 = + eval_bwspec(&opts->data.fairq_opts.linkshare.m2, + ref_bw); + pa->pq_u.fairq_opts.lssc_d = + opts->data.fairq_opts.linkshare.d; + } + break; + case ALTQT_CODEL: + pa->pq_u.codel_opts.target = opts->data.codel_opts.target; + pa->pq_u.codel_opts.interval = opts->data.codel_opts.interval; + pa->pq_u.codel_opts.ecn = opts->data.codel_opts.ecn; + break; default: warnx("eval_queue_opts: unknown scheduler type %u", opts->qtype); @@ -1256,3 +1482,27 @@ print_hfsc_sc(const char *scname, u_int m1, u_int d, u_int m2, if (d != 0) printf(")"); } + +void +print_fairq_sc(const char *scname, u_int m1, u_int d, u_int m2, + const struct node_fairq_sc *sc) +{ + printf(" %s", scname); + + if (d != 0) { + printf("("); + if (sc != NULL && sc->m1.bw_percent > 0) + printf("%u%%", sc->m1.bw_percent); + else + printf("%s", rate2str((double)m1)); + printf(" %u", d); + } + + if (sc != NULL && sc->m2.bw_percent > 0) + printf(" %u%%", sc->m2.bw_percent); + else + printf(" %s", rate2str((double)m2)); + + if (d != 0) + printf(")"); +} diff --git a/sbin/pfctl/pfctl_parser.c b/sbin/pfctl/pfctl_parser.c index 1f4375a..5b03a93 100644 --- a/sbin/pfctl/pfctl_parser.c +++ b/sbin/pfctl/pfctl_parser.c @@ -40,6 +40,8 @@ __FBSDID("$FreeBSD$"); #include <sys/param.h> #include <sys/proc.h> #include <net/if.h> +#include <net/ethernet.h> +#include <net/if_vlan_var.h> #include <netinet/in.h> #include <netinet/in_systm.h> #include <netinet/ip.h> @@ -65,6 +67,8 @@ __FBSDID("$FreeBSD$"); void print_op (u_int8_t, const char *, const char *); void print_port (u_int8_t, u_int16_t, u_int16_t, const char *, int); void print_ugid (u_int8_t, unsigned, unsigned, const char *, unsigned); +void print_ieee8021q_pcp (u_int8_t, uint8_t, uint8_t); +void print_ieee8021q_setpcp (u_int8_t); void print_flags (u_int8_t); void print_fromto(struct pf_rule_addr *, pf_osfp_t, struct pf_rule_addr *, u_int8_t, u_int8_t, int, int); @@ -353,6 +357,47 @@ print_ugid(u_int8_t op, unsigned u1, unsigned u2, const char *t, unsigned umax) print_op(op, a1, a2); } +static const char * +ieee8021q_pcp_name(u_int8_t pcp) +{ + const char *s; + + if (pcp == IEEE8021Q_PCP_BE) + s = "be"; + else if (pcp == IEEE8021Q_PCP_BK) + s = "bk"; + else if (pcp == IEEE8021Q_PCP_EE) + s = "ee"; + else if (pcp == IEEE8021Q_PCP_CA) + s = "ca"; + else if (pcp == IEEE8021Q_PCP_VI) + s = "vi"; + else if (pcp == IEEE8021Q_PCP_VO) + s = "vo"; + else if (pcp == IEEE8021Q_PCP_IC) + s = "ic"; + else if (pcp == IEEE8021Q_PCP_NC) + s = "nc"; + else + s = "??"; + return (s); +} + + void +print_ieee8021q_pcp(u_int8_t op, u_int8_t pcp0, u_int8_t pcp1) +{ + + printf(" ieee8021q-pcp"); + print_op(op, ieee8021q_pcp_name(pcp0), ieee8021q_pcp_name(pcp1)); +} + +void +print_ieee8021q_setpcp(u_int8_t pcp) +{ + + printf(" ieee8021q-setpcp %s", ieee8021q_pcp_name(pcp)); +} + void print_flags(u_int8_t f) { @@ -691,8 +736,14 @@ print_rule(struct pf_rule *r, const char *anchor_call, int verbose, int numeric) int i, opts; if (verbose) +#ifdef PF_USER_INFO printf("@%d ", r->nr); - if (r->action > PF_NORDR) +#else + printf("@%d(%u) ", r->nr, r->cuid); +#endif + if (r->action == PF_MATCH) + printf("match"); + else if (r->action > PF_NORDR) printf("action(%d)", r->action); else if (anchor_call[0]) { if (anchor_call[0] == '_') { @@ -847,8 +898,10 @@ print_rule(struct pf_rule *r, const char *anchor_call, int verbose, int numeric) printf(" code %u", r->code-1); } } - if (r->tos) + if (r->tos && (r->rule_flag & PFRULE_TOS)) printf(" tos 0x%2.2x", r->tos); + if (r->tos && (r->rule_flag & PFRULE_DSCP)) + printf(" dscp 0x%2.2x", r->tos & DSCP_MASK); if (!r->keep_state && r->action == PF_PASS && !anchor_call[0]) printf(" no state"); else if (r->keep_state == PF_STATE_NORMAL) @@ -873,7 +926,7 @@ print_rule(struct pf_rule *r, const char *anchor_call, int verbose, int numeric) printf(" probability %s%%", buf); } opts = 0; - if (r->max_states || r->max_src_nodes || r->max_src_states) + if (r->max_states || r->max_src_nodes || r->max_src_states || r->spare2) opts = 1; if (r->rule_flag & PFRULE_NOSYNC) opts = 1; @@ -920,6 +973,12 @@ print_rule(struct pf_rule *r, const char *anchor_call, int verbose, int numeric) printf("max-src-conn %u", r->max_src_conn); opts = 0; } + if (r->spare2) { + if (!opts) + printf(", "); + printf("max-packets %u", r->spare2); + opts = 0; + } if (r->max_src_conn_rate.limit) { if (!opts) printf(", "); @@ -999,6 +1058,14 @@ print_rule(struct pf_rule *r, const char *anchor_call, int verbose, int numeric) } if (r->label[0]) printf(" label \"%s\"", r->label); + if (r->dnpipe && r->pdnpipe) + printf(" %s(%d, %d)", + r->free_flags & PFRULE_DN_IS_PIPE ? "dnpipe" : "dnqueue", + r->dnpipe, r->pdnpipe); + else if (r->dnpipe) + printf(" %s %d", + r->free_flags & PFRULE_DN_IS_PIPE ? "dnpipe" : "dnqueue", + r->dnpipe); if (r->qname[0] && r->pqname[0]) printf(" queue(%s, %s)", r->qname, r->pqname); else if (r->qname[0]) @@ -1012,6 +1079,13 @@ print_rule(struct pf_rule *r, const char *anchor_call, int verbose, int numeric) } if (r->rtableid != -1) printf(" rtable %u", r->rtableid); + if (r->ieee8021q_pcp.op != 0) + print_ieee8021q_pcp(r->ieee8021q_pcp.op, + r->ieee8021q_pcp.pcp[0], r->ieee8021q_pcp.pcp[1]); + if (r->ieee8021q_pcp.setpcp & SETPCP_VALID) + print_ieee8021q_setpcp(r->ieee8021q_pcp.setpcp & + SETPCP_PCP_MASK); + if (r->divert.port) { #ifdef __FreeBSD__ printf(" divert-to %u", ntohs(r->divert.port)); diff --git a/sbin/pfctl/pfctl_parser.h b/sbin/pfctl/pfctl_parser.h index 5c909e9..2b7fea7 100644 --- a/sbin/pfctl/pfctl_parser.h +++ b/sbin/pfctl/pfctl_parser.h @@ -150,12 +150,28 @@ struct node_hfsc_opts { int flags; }; +struct node_fairq_sc { + struct node_queue_bw m1; /* slope of 1st segment; bps */ + u_int d; /* x-projection of m1; msec */ + struct node_queue_bw m2; /* slope of 2nd segment; bps */ + u_int8_t used; +}; + +struct node_fairq_opts { + struct node_fairq_sc linkshare; + struct node_queue_bw hogs_bw; + u_int nbuckets; + int flags; +}; + struct node_queue_opt { int qtype; union { struct cbq_opts cbq_opts; + struct codel_opts codel_opts; struct priq_opts priq_opts; struct node_hfsc_opts hfsc_opts; + struct node_fairq_opts fairq_opts; } data; }; diff --git a/sbin/pfctl/pfctl_qstats.c b/sbin/pfctl/pfctl_qstats.c index 95371e4..8ffd87d 100644 --- a/sbin/pfctl/pfctl_qstats.c +++ b/sbin/pfctl/pfctl_qstats.c @@ -36,8 +36,10 @@ __FBSDID("$FreeBSD$"); #include <altq/altq.h> #include <altq/altq_cbq.h> +#include <altq/altq_codel.h> #include <altq/altq_priq.h> #include <altq/altq_hfsc.h> +#include <altq/altq_fairq.h> #include "pfctl.h" #include "pfctl_parser.h" @@ -46,6 +48,8 @@ union class_stats { class_stats_t cbq_stats; struct priq_classstats priq_stats; struct hfsc_classstats hfsc_stats; + struct fairq_classstats fairq_stats; + struct codel_ifstats codel_stats; }; #define AVGN_MAX 8 @@ -75,8 +79,10 @@ struct pf_altq_node *pfctl_find_altq_node(struct pf_altq_node *, void pfctl_print_altq_node(int, const struct pf_altq_node *, unsigned, int); void print_cbqstats(struct queue_stats); +void print_codelstats(struct queue_stats); void print_priqstats(struct queue_stats); void print_hfscstats(struct queue_stats); +void print_fairqstats(struct queue_stats); void pfctl_free_altq_node(struct pf_altq_node *); void pfctl_print_altq_nodestat(int, const struct pf_altq_node *); @@ -162,7 +168,7 @@ pfctl_update_qstats(int dev, struct pf_altq_node **root) return (-1); } #ifdef __FreeBSD__ - if (pa.altq.qid > 0 && + if ((pa.altq.qid > 0 || pa.altq.scheduler == ALTQT_CODEL) && !(pa.altq.local_flags & PFALTQ_FLAG_IF_REMOVED)) { #else if (pa.altq.qid > 0) { @@ -300,7 +306,7 @@ pfctl_print_altq_node(int dev, const struct pf_altq_node *node, void pfctl_print_altq_nodestat(int dev, const struct pf_altq_node *a) { - if (a->altq.qid == 0) + if (a->altq.qid == 0 && a->altq.scheduler != ALTQT_CODEL) return; #ifdef __FreeBSD__ @@ -317,6 +323,12 @@ pfctl_print_altq_nodestat(int dev, const struct pf_altq_node *a) case ALTQT_HFSC: print_hfscstats(a->qstats); break; + case ALTQT_FAIRQ: + print_fairqstats(a->qstats); + break; + case ALTQT_CODEL: + print_codelstats(a->qstats); + break; } } @@ -342,6 +354,28 @@ print_cbqstats(struct queue_stats cur) } void +print_codelstats(struct queue_stats cur) +{ + printf(" [ pkts: %10llu bytes: %10llu " + "dropped pkts: %6llu bytes: %6llu ]\n", + (unsigned long long)cur.data.codel_stats.cl_xmitcnt.packets, + (unsigned long long)cur.data.codel_stats.cl_xmitcnt.bytes, + (unsigned long long)cur.data.codel_stats.cl_dropcnt.packets + + cur.data.codel_stats.stats.drop_cnt.packets, + (unsigned long long)cur.data.codel_stats.cl_dropcnt.bytes + + cur.data.codel_stats.stats.drop_cnt.bytes); + printf(" [ qlength: %3d/%3d ]\n", + cur.data.codel_stats.qlength, cur.data.codel_stats.qlimit); + + if (cur.avgn < 2) + return; + + printf(" [ measured: %7.1f packets/s, %s/s ]\n", + cur.avg_packets / STAT_INTERVAL, + rate2str((8 * cur.avg_bytes) / STAT_INTERVAL)); +} + +void print_priqstats(struct queue_stats cur) { printf(" [ pkts: %10llu bytes: %10llu " @@ -382,6 +416,26 @@ print_hfscstats(struct queue_stats cur) } void +print_fairqstats(struct queue_stats cur) +{ + printf(" [ pkts: %10llu bytes: %10llu " + "dropped pkts: %6llu bytes: %6llu ]\n", + (unsigned long long)cur.data.fairq_stats.xmit_cnt.packets, + (unsigned long long)cur.data.fairq_stats.xmit_cnt.bytes, + (unsigned long long)cur.data.fairq_stats.drop_cnt.packets, + (unsigned long long)cur.data.fairq_stats.drop_cnt.bytes); + printf(" [ qlength: %3d/%3d ]\n", + cur.data.fairq_stats.qlength, cur.data.fairq_stats.qlimit); + + if (cur.avgn < 2) + return; + + printf(" [ measured: %7.1f packets/s, %s/s ]\n", + cur.avg_packets / STAT_INTERVAL, + rate2str((8 * cur.avg_bytes) / STAT_INTERVAL)); +} + +void pfctl_free_altq_node(struct pf_altq_node *node) { while (node != NULL) { @@ -402,7 +456,7 @@ update_avg(struct pf_altq_node *a) u_int64_t b, p; int n; - if (a->altq.qid == 0) + if (a->altq.qid == 0 && a->altq.scheduler != ALTQT_CODEL) return; qs = &a->qstats; @@ -421,6 +475,14 @@ update_avg(struct pf_altq_node *a) b = qs->data.hfsc_stats.xmit_cnt.bytes; p = qs->data.hfsc_stats.xmit_cnt.packets; break; + case ALTQT_FAIRQ: + b = qs->data.fairq_stats.xmit_cnt.bytes; + p = qs->data.fairq_stats.xmit_cnt.packets; + break; + case ALTQT_CODEL: + b = qs->data.codel_stats.cl_xmitcnt.bytes; + p = qs->data.codel_stats.cl_xmitcnt.packets; + break; default: b = 0; p = 0; diff --git a/sbin/route/route.c b/sbin/route/route.c index f64f525..6f73ded 100644 --- a/sbin/route/route.c +++ b/sbin/route/route.c @@ -1557,10 +1557,15 @@ rtmsg(int cmd, int flags, int fib) print_rtmsg(&rtm, l); if (debugonly) return (0); +testagain: if ((rlen = write(s, (char *)&m_rtmsg, l)) < 0) { if (errno == EPERM) err(1, "writing to routing socket"); warn("writing to routing socket"); + if (rtm.rtm_type == RTM_CHANGE) { + rtm.rtm_type = RTM_ADD; + goto testagain; + } return (-1); } if (cmd == RTM_GET) { diff --git a/sbin/setkey/parse.y b/sbin/setkey/parse.y index c551c35..60e779d 100644 --- a/sbin/setkey/parse.y +++ b/sbin/setkey/parse.y @@ -100,6 +100,7 @@ extern void yyerror(const char *); %token F_EXT EXTENSION NOCYCLICSEQ %token ALG_AUTH ALG_AUTH_NOKEY %token ALG_ENC ALG_ENC_NOKEY ALG_ENC_DESDERIV ALG_ENC_DES32IV ALG_ENC_OLD +%token ALG_ENC_SALT %token ALG_COMP %token F_LIFETIME_HARD F_LIFETIME_SOFT %token DECSTRING QUOTEDSTRING HEXSTRING STRING ANY @@ -111,6 +112,7 @@ extern void yyerror(const char *); %type <num> prefix protocol_spec upper_spec %type <num> ALG_ENC ALG_ENC_DESDERIV ALG_ENC_DES32IV ALG_ENC_OLD ALG_ENC_NOKEY +%type <num> ALG_ENC_SALT %type <num> ALG_AUTH ALG_AUTH_NOKEY %type <num> ALG_COMP %type <num> PR_ESP PR_AH PR_IPCOMP PR_TCP @@ -402,6 +404,27 @@ enc_alg return -1; } } + | ALG_ENC_SALT key_string + { + if ($1 < 0) { + yyerror("unsupported algorithm"); + return -1; + } + p_alg_enc = $1; + + p_key_enc_len = $2.len; + + p_key_enc = $2.buf; + /* + * Salted keys include a 4 byte value that is + * not part of the key. + */ + if (ipsec_check_keylen(SADB_EXT_SUPPORTED_ENCRYPT, + p_alg_enc, PFKEY_UNUNIT64(p_key_enc_len - 4)) < 0) { + yyerror(ipsec_strerror()); + return -1; + } + } ; auth_alg diff --git a/sbin/setkey/setkey.8 b/sbin/setkey/setkey.8 index 4306ec2..b6d4157 100644 --- a/sbin/setkey/setkey.8 +++ b/sbin/setkey/setkey.8 @@ -627,11 +627,12 @@ des-deriv 64 ipsec-ciph-des-derived-01 3des-deriv 192 no document rijndael-cbc 128/192/256 rfc3602 aes-ctr 160/224/288 draft-ietf-ipsec-ciph-aes-ctr-03 +aes-gcm-16 160/224/288 rfc4106 camellia-cbc 128/192/256 rfc4312 .Ed .Pp Note that the first 128/192/256 bits of a key for -.Li aes-ctr +.Li aes-ctr or aes-gcm-16 will be used as AES key, and remaining 32 bits will be used as nonce. .Pp The following are the list of compression algorithms that can be used diff --git a/sbin/setkey/token.l b/sbin/setkey/token.l index c89982f..7f30859 100644 --- a/sbin/setkey/token.l +++ b/sbin/setkey/token.l @@ -159,15 +159,16 @@ tcp { yylval.num = 0; return(PR_TCP); } {hyphen}E { BEGIN S_ENCALG; return(F_ENC); } <S_ENCALG>des-cbc { yylval.num = SADB_EALG_DESCBC; BEGIN INITIAL; return(ALG_ENC); } <S_ENCALG>3des-cbc { yylval.num = SADB_EALG_3DESCBC; BEGIN INITIAL; return(ALG_ENC); } -<S_ENCALG>null { yylval.num = SADB_EALG_NULL; BEGIN INITIAL; return(ALG_ENC_NOKEY); } +<S_ENCALG>null { yylval.num = SADB_EALG_NULL; BEGIN INITIAL; return(ALG_ENC); } <S_ENCALG>simple { yylval.num = SADB_EALG_NULL; BEGIN INITIAL; return(ALG_ENC_OLD); } <S_ENCALG>blowfish-cbc { yylval.num = SADB_X_EALG_BLOWFISHCBC; BEGIN INITIAL; return(ALG_ENC); } <S_ENCALG>cast128-cbc { yylval.num = SADB_X_EALG_CAST128CBC; BEGIN INITIAL; return(ALG_ENC); } <S_ENCALG>des-deriv { yylval.num = SADB_EALG_DESCBC; BEGIN INITIAL; return(ALG_ENC_DESDERIV); } <S_ENCALG>des-32iv { yylval.num = SADB_EALG_DESCBC; BEGIN INITIAL; return(ALG_ENC_DES32IV); } <S_ENCALG>rijndael-cbc { yylval.num = SADB_X_EALG_RIJNDAELCBC; BEGIN INITIAL; return(ALG_ENC); } -<S_ENCALG>aes-ctr { yylval.num = SADB_X_EALG_AESCTR; BEGIN INITIAL; return(ALG_ENC); } +<S_ENCALG>aes-ctr { yylval.num = SADB_X_EALG_AESCTR; BEGIN INITIAL; return(ALG_ENC_SALT); } <S_ENCALG>camellia-cbc { yylval.num = SADB_X_EALG_CAMELLIACBC; BEGIN INITIAL; return(ALG_ENC); } +<S_ENCALG>aes-gcm-16 { yylval.num = SADB_X_EALG_AESGCM16; BEGIN INITIAL; return(ALG_ENC_SALT); } /* compression algorithms */ {hyphen}C { return(F_COMP); } diff --git a/share/man/man4/altq.4 b/share/man/man4/altq.4 index 1effdb2..771dc53 100644 --- a/share/man/man4/altq.4 +++ b/share/man/man4/altq.4 @@ -25,7 +25,7 @@ .\" .\" $FreeBSD$ .\" -.Dd December 9, 2011 +.Dd July 24, 2015 .Dt ALTQ 4 .Os .Sh NAME @@ -35,11 +35,13 @@ .Cd options ALTQ .Pp .Cd options ALTQ_CBQ +.Cd options ALTQ_CODEL .Cd options ALTQ_RED .Cd options ALTQ_RIO .Cd options ALTQ_HFSC .Cd options ALTQ_CDNR .Cd options ALTQ_PRIQ +.Cd options ALTQ_FAIRQ .Sh DESCRIPTION The .Nm @@ -73,6 +75,10 @@ Enable Build the .Dq "Class Based Queuing" discipline. +.It Dv ALTQ_CODEL +Build the +.Dq "Controlled Delay" +discipline. .It Dv ALTQ_RED Build the .Dq "Random Early Detection" @@ -93,6 +99,10 @@ any of the available disciplines or consumers. Build the .Dq "Priority Queuing" discipline. +.It Dv ALTQ_FAIRQ +Build the +.Dq "Fair Queuing" +discipline. .It Dv ALTQ_NOPCC Required if the TSC is unusable. .It Dv ALTQ_DEBUG diff --git a/share/man/man4/crypto.4 b/share/man/man4/crypto.4 index bb62825..c3d328c 100644 --- a/share/man/man4/crypto.4 +++ b/share/man/man4/crypto.4 @@ -1,8 +1,16 @@ -.\" $OpenBSD: crypto.4,v 1.4 2002/09/12 07:15:03 deraadt Exp $ +.\" $NetBSD: crypto.4,v 1.24 2014/01/27 21:23:59 pgoyette Exp $ .\" -.\" Copyright (c) 2001 Theo de Raadt +.\" Copyright (c) 2008 The NetBSD Foundation, Inc. +.\" Copyright (c) 2014 The FreeBSD Foundation .\" All rights reserved. .\" +.\" Portions of this documentation were written by John-Mark Gurney +.\" under sponsorship of the FreeBSD Foundation and +.\" Rubicon Communications, LLC (Netgate). +.\" +.\" This code is derived from software contributed to The NetBSD Foundation +.\" by Coyote Point Systems, Inc. +.\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: @@ -11,99 +19,378 @@ .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. -.\" 3. The name of the author may not be used to endorse or promote products -.\" derived from this software without specific prior written permission. -.\" -.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR -.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -.\" WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -.\" DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, -.\" INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -.\" (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -.\" SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -.\" STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN -.\" ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS +.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +.\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +.\" PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS +.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE .\" POSSIBILITY OF SUCH DAMAGE. .\" +.\" +.\" +.\" Copyright (c) 2004 +.\" Jonathan Stone <jonathan@dsg.stanford.edu>. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY Jonathan Stone AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL Jonathan Stone OR THE VOICES IN HIS HEAD +.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +.\" THE POSSIBILITY OF SUCH DAMAGE. +.\" .\" $FreeBSD$ .\" -.Dd September 7, 2010 +.Dd December 12, 2014 .Dt CRYPTO 4 .Os .Sh NAME .Nm crypto , .Nm cryptodev -.Nd hardware crypto access driver +.Nd user-mode access to hardware-accelerated cryptography .Sh SYNOPSIS .Cd device crypto .Cd device cryptodev +.Pp +.In sys/ioctl.h +.In sys/time.h +.In crypto/cryptodev.h .Sh DESCRIPTION The .Nm -driver provides a device-independent framework to support -cryptographic operations in the kernel. +driver gives user-mode applications access to hardware-accelerated +cryptographic transforms, as implemented by the +.Xr opencrypto 9 +in-kernel interface. +.Pp The -.Nm cryptodev -driver provides userland applications access to this support -through the .Pa /dev/crypto -device. -This node primarily operates in an +special device provides an +.Xr ioctl 2 +based interface. +User-mode applications should open the special device, +then issue .Xr ioctl 2 -based model, permitting a variety of applications to query device capabilities, -submit transactions, and get results. +calls on the descriptor. +User-mode access to +.Pa /dev/crypto +is controlled by three +.Xr sysctl 8 +variables, +.Ic kern.userasymcrypto +and +.Ic kern.cryptodevallowsoft . +See +.Xr sysctl 7 +for additional details. .Pp -If -.Ar count -given in the specification, and is greater than 0, a maximum of one +The .Nm -device is created. +device provides two distinct modes of operation: one mode for +symmetric-keyed cryptographic requests, and a second mode for +both asymmetric-key (public-key/private-key) requests, and for +modular arithmetic (for Diffie-Hellman key exchange and other +cryptographic protocols). +The two modes are described separately below. +.Sh THEORY OF OPERATION +Regardless of whether symmetric-key or asymmetric-key operations are +to be performed, use of the device requires a basic series of steps: +.Pp +.Bl -enum +.It +Open a file descriptor for the device. +See +.Xr open 2 . +.It +If any symmetric operation will be performed, +create one session, with +.Dv CIOCGSESSION . +Most applications will require at least one symmetric session. +Since cipher and MAC keys are tied to sessions, many +applications will require more. +Asymmetric operations do not use sessions. +.It +Submit requests, synchronously with +.Dv CIOCCRYPT +(symmetric) +or +.Dv CIOCKEY +(asymmetric). +.It +Destroy one session with +.Dv CIOCFSESSION . +.It +Close the device with +.Xr close 2 . +.El +.Sh SYMMETRIC-KEY OPERATION +The symmetric-key operation mode provides a context-based API +to traditional symmetric-key encryption (or privacy) algorithms, +or to keyed and unkeyed one-way hash (HMAC and MAC) algorithms. +The symmetric-key mode also permits fused operation, +where the hardware performs both a privacy algorithm and an integrity-check +algorithm in a single pass over the data: either a fused +encrypt/HMAC-generate operation, or a fused HMAC-verify/decrypt operation. +.Pp +To use symmetric mode, you must first create a session specifying +the algorithm(s) and key(s) to use; then issue encrypt or decrypt +requests against the session. +.Ss Algorithms +For a list of supported algorithms, see +.Xr crypto 7 +and +.Xr crypto 9 . +.Ss IOCTL Request Descriptions +.\" +.Bl -tag -width CIOCGSESSION +.\" +.It Dv CRIOGET Fa int *fd +Clone the fd argument to +.Xr ioctl 2 , +yielding a new file descriptor for the creation of sessions. +.\" +.It Dv CIOCFINDDEV Fa struct crypt_find_op *fop +.Bd -literal +struct crypt_find_op { + int crid; /* driver id + flags */ + char name[32]; /* device/driver name */ +}; + +.Ed +If +.Fa crid +is -1, then find the driver named +.Fa name +and return the id in +.Fa crid . +If +.Fa crid +is not -1, return the name of the driver with +.Fa crid +in +.Fa name . +In either case, if the driver is not found, +.Dv ENOENT +is returned. +.It Dv CIOCGSESSION Fa struct session_op *sessp +.Bd -literal +struct session_op { + u_int32_t cipher; /* e.g. CRYPTO_DES_CBC */ + u_int32_t mac; /* e.g. CRYPTO_MD5_HMAC */ + + u_int32_t keylen; /* cipher key */ + void * key; + int mackeylen; /* mac key */ + void * mackey; + + u_int32_t ses; /* returns: ses # */ +}; + +.Ed +Create a new cryptographic session on a file descriptor for the device; +that is, a persistent object specific to the chosen +privacy algorithm, integrity algorithm, and keys specified in +.Fa sessp . +The special value 0 for either privacy or integrity +is reserved to indicate that the indicated operation (privacy or integrity) +is not desired for this session. +.Pp +Multiple sessions may be bound to a single file descriptor. +The session ID returned in +.Fa sessp-\*[Gt]ses +is supplied as a required field in the symmetric-operation structure +.Fa crypt_op +for future encryption or hashing requests. +.\" .Pp +.\" This implementation will never return a session ID of 0 for a successful +.\" creation of a session, which is a +.\" .Nx +.\" extension. +.Pp +For non-zero symmetric-key privacy algorithms, the privacy algorithm +must be specified in +.Fa sessp-\*[Gt]cipher , +the key length in +.Fa sessp-\*[Gt]keylen , +and the key value in the octets addressed by +.Fa sessp-\*[Gt]key . .Pp -The following +For keyed one-way hash algorithms, the one-way hash must be specified +in +.Fa sessp-\*[Gt]mac , +the key length in +.Fa sessp-\*[Gt]mackey , +and the key value in the octets addressed by +.Fa sessp-\*[Gt]mackeylen . +.\" +.Pp +Support for a specific combination of fused privacy and +integrity-check algorithms depends on whether the underlying +hardware supports that combination. +Not all combinations are supported +by all hardware, even if the hardware supports each operation as a +stand-alone non-fused operation. +.It Dv CIOCCRYPT Fa struct crypt_op *cr_op +.Bd -literal +struct crypt_op { + u_int32_t ses; + u_int16_t op; /* e.g. COP_ENCRYPT */ + u_int16_t flags; + u_int len; + caddr_t src, dst; + caddr_t mac; /* must be large enough for result */ + caddr_t iv; +}; + +.Ed +Request a symmetric-key (or hash) operation. +The file descriptor argument to .Xr ioctl 2 -calls apply only to the -.Nm -devices: -.Bl -tag -width ".Dv CIOCGSESSION" -.It Dv CIOCGSESSION -Setup a new crypto session for a new type of operation. -.It Dv CIOCFSESSION -Free a previously established session. -.It Dv CIOCCRYPT -Perform a crypto operation against a previously setup session. +must have been bound to a valid session. +To encrypt, set +.Fa cr_op-\*[Gt]op +to +.Dv COP_ENCRYPT . +To decrypt, set +.Fa cr_op-\*[Gt]op +to +.Dv COP_DECRYPT . +The field +.Fa cr_op-\*[Gt]len +supplies the length of the input buffer; the fields +.Fa cr_op-\*[Gt]src , +.Fa cr_op-\*[Gt]dst , +.Fa cr_op-\*[Gt]mac , +.Fa cr_op-\*[Gt]iv +supply the addresses of the input buffer, output buffer, +one-way hash, and initialization vector, respectively. +.It Dv CIOCCRYPTAEAD Fa struct crypt_aead *cr_aead +.Bd -literal +struct crypt_aead { + u_int32_t ses; + u_int16_t op; /* e.g. COP_ENCRYPT */ + u_int16_t flags; + u_int len; + u_int aadlen; + u_int ivlen; + caddr_t src, dst; + caddr_t aad; + caddr_t tag; /* must be large enough for result */ + caddr_t iv; +}; + +.Ed +The +.Dv CIOCCRYPTAEAD +is similar to the +.Dv CIOCCRYPT +but provides additional data in +.Fa cr_aead-\*[Gt]aad +to include in the authentication mode. +.It Dv CIOCFSESSION Fa u_int32_t ses_id +Destroys the /dev/crypto session associated with the file-descriptor +argument. +.It Dv CIOCNFSESSION Fa struct crypt_sfop *sfop ; +.Bd -literal +struct crypt_sfop { + size_t count; + u_int32_t *sesid; +}; + +.Ed +Destroys the +.Fa sfop-\*[Gt]count +sessions specified by the +.Fa sfop +array of session identifiers. .El -.Sh FEATURES -Depending on hardware being present, the following symmetric and -asymmetric cryptographic features are potentially available from -.Pa /dev/crypto : +.\" +.Sh ASYMMETRIC-KEY OPERATION +.Ss Asymmetric-key algorithms +Contingent upon hardware support, the following asymmetric +(public-key/private-key; or key-exchange subroutine) operations may +also be available: .Pp -.Bl -tag -width ".Dv CRYPTO_RIPEMD160_HMAC" -offset indent -compact -.It Dv CRYPTO_DES_CBC -.It Dv CRYPTO_3DES_CBC -.It Dv CRYPTO_BLF_CBC -.It Dv CRYPTO_CAMELLIA_CBC -.It Dv CRYPTO_CAST_CBC -.It Dv CRYPTO_SKIPJACK_CBC -.It Dv CRYPTO_MD5_HMAC -.It Dv CRYPTO_SHA1_HMAC -.It Dv CRYPTO_RIPEMD160_HMAC -.It Dv CRYPTO_MD5_KPDK -.It Dv CRYPTO_SHA1_KPDK -.It Dv CRYPTO_AES_CBC -.It Dv CRYPTO_ARC4 -.It Dv CRYPTO_MD5 -.It Dv CRYPTO_SHA1 -.It Dv CRK_MOD_EXP -.It Dv CRK_MOD_EXP_CRT -.It Dv CRK_DSA_SIGN -.It Dv CRK_DSA_VERIFY -.It Dv CRK_DH_COMPUTE_KEY +.Bl -column "CRK_DH_COMPUTE_KEY" "Input parameter" "Output parameter" -offset indent -compact +.It Em "Algorithm" Ta "Input parameter" Ta "Output parameter" +.It Em " " Ta "Count" Ta "Count" +.It Dv CRK_MOD_EXP Ta 3 Ta 1 +.It Dv CRK_MOD_EXP_CRT Ta 6 Ta 1 +.It Dv CRK_DSA_SIGN Ta 5 Ta 2 +.It Dv CRK_DSA_VERIFY Ta 7 Ta 0 +.It Dv CRK_DH_COMPUTE_KEY Ta 3 Ta 1 .El -.Sh FILES -.Bl -tag -width ".Pa /dev/crypto" -compact -.It Pa /dev/crypto -crypto access device +.Pp +See below for discussion of the input and output parameter counts. +.Ss Asymmetric-key commands +.Bl -tag -width CIOCKEY +.It Dv CIOCASYMFEAT Fa int *feature_mask +Returns a bitmask of supported asymmetric-key operations. +Each of the above-listed asymmetric operations is present +if and only if the bit position numbered by the code for that operation +is set. +For example, +.Dv CRK_MOD_EXP +is available if and only if the bit +.Pq 1 \*[Lt]\*[Lt] Dv CRK_MOD_EXP +is set. +.It Dv CIOCKEY Fa struct crypt_kop *kop +.Bd -literal +struct crypt_kop { + u_int crk_op; /* e.g. CRK_MOD_EXP */ + u_int crk_status; /* return status */ + u_short crk_iparams; /* # of input params */ + u_short crk_oparams; /* # of output params */ + u_int crk_pad1; + struct crparam crk_param[CRK_MAXPARAM]; +}; + +/* Bignum parameter, in packed bytes. */ +struct crparam { + void * crp_p; + u_int crp_nbits; +}; + +.Ed +Performs an asymmetric-key operation from the list above. +The specific operation is supplied in +.Fa kop-\*[Gt]crk_op ; +final status for the operation is returned in +.Fa kop-\*[Gt]crk_status . +The number of input arguments and the number of output arguments +is specified in +.Fa kop-\*[Gt]crk_iparams +and +.Fa kop-\*[Gt]crk_iparams , +respectively. +The field +.Fa crk_param[] +must be filled in with exactly +.Fa kop-\*[Gt]crk_iparams + kop-\*[Gt]crk_oparams +arguments, each encoded as a +.Fa struct crparam +(address, bitlength) pair. +.Pp +The semantics of these arguments are currently undocumented. .El .Sh SEE ALSO .Xr aesni 4 , @@ -113,6 +400,7 @@ crypto access device .Xr padlock 4 , .Xr safe 4 , .Xr ubsec 4 , +.Xr crypto 7 , .Xr geli 8 , .Xr crypto 9 .Sh HISTORY @@ -124,3 +412,24 @@ The .Nm driver was imported to .Fx 5.0 . +.Sh BUGS +Error checking and reporting is weak. +.Pp +The values specified for symmetric-key key sizes to +.Dv CIOCGSESSION +must exactly match the values expected by +.Xr opencrypto 9 . +The output buffer and MAC buffers supplied to +.Dv CIOCCRYPT +must follow whether privacy or integrity algorithms were specified for +session: if you request a +.No non- Ns Dv NULL +algorithm, you must supply a suitably-sized buffer. +.Pp +The scheme for passing arguments for asymmetric requests is baroque. +.Pp +The naming inconsistency between +.Dv CRIOGET +and the various +.Dv CIOC Ns \&* +names is an unfortunate historical artifact. diff --git a/share/man/man4/inet.4 b/share/man/man4/inet.4 index 09cb0bc..6fe4e61 100644 --- a/share/man/man4/inet.4 +++ b/share/man/man4/inet.4 @@ -32,7 +32,7 @@ .\" From: @(#)inet.4 8.1 (Berkeley) 6/5/93 .\" $FreeBSD$ .\" -.Dd January 26, 2012 +.Dd Feb 4, 2016 .Dt INET 4 .Os .Sh NAME @@ -169,33 +169,11 @@ MIB. In addition to the variables supported by the transport protocols (for which the respective manual pages may be consulted), the following general variables are defined: -.Bl -tag -width IPCTL_FASTFORWARDING +.Bl -tag -width IPCTL_ACCEPTSOURCEROUTE .It Dv IPCTL_FORWARDING .Pq ip.forwarding Boolean: enable/disable forwarding of IP packets. Defaults to off. -.It Dv IPCTL_FASTFORWARDING -.Pq ip.fastforwarding -Boolean: enable/disable the use of -.Tn fast IP forwarding -code. -Defaults to off. -When -.Tn fast IP forwarding -is enabled, IP packets are forwarded directly to the appropriate network -interface with direct processing to completion, which greatly improves -the throughput. -All packets for local IP addresses, non-unicast, or with IP options are -handled by the normal IP input processing path. -All features of the normal (slow) IP forwarding path are supported -including firewall (through -.Xr pfil 9 -hooks) checking, except -.Xr ipsec 4 -tunnel brokering. -The -.Tn IP fastforwarding -path does not generate ICMP redirect or source quench messages. .It Dv IPCTL_SENDREDIRECTS .Pq ip.redirect Boolean: enable/disable sending of ICMP redirects in response to diff --git a/share/man/man7/Makefile b/share/man/man7/Makefile index 37afd65..93c14c6 100644 --- a/share/man/man7/Makefile +++ b/share/man/man7/Makefile @@ -9,6 +9,7 @@ MAN= adding_user.7 \ bsd.snmpmod.mk.7 \ build.7 \ clocks.7 \ + crypto.7 \ c99.7 \ development.7 \ environ.7 \ diff --git a/share/man/man7/crypto.7 b/share/man/man7/crypto.7 new file mode 100644 index 0000000..a268996 --- /dev/null +++ b/share/man/man7/crypto.7 @@ -0,0 +1,141 @@ +.\" Copyright (c) 2014 The FreeBSD Foundation +.\" All rights reserved. +.\" +.\" This documentation was written by John-Mark Gurney under +.\" the sponsorship of the FreeBSD Foundation and +.\" Rubicon Communications, LLC (Netgate). +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD$ +.\" +.Dd December 12, 2014 +.Dt CRYPTO 7 +.Os +.Sh NAME +.Nm crypto +.Nd OpenCrypto algorithms +.Sh SYNOPSIS +In the kernel configuration file: +.Cd "device crypto" +.Pp +Or load the crypto.ko module. +.Sh DESCRIPTION +The following cryptographic algorithms that are part of the OpenCrypto +framework have the following requirements. +.Pp +Cipher algorithms: +.Bl -tag -width ".Dv CRYPTO_AES_CBC" +.It Dv CRYPTO_AES_CBC +.Bl -tag -width "Block size :" -compact -offset indent +.It IV size : +16 +.It Block size : +16 +.It Key size : +16, 24 or 32 +.El +.Pp +This algorithm implements Cipher-block chaining. +.It Dv CRYPTO_AES_NIST_GCM_16 +.Bl -tag -width "Block size :" -compact -offset indent +.It IV size : +12 +.It Block size : +1 +.It Key size : +16, 24 or 32 +.It Digest size : +16 +.El +.Pp +This algorithm implements Galois/Counter Mode. +This is the cipher part of an AEAD +.Pq Authenticated Encryption with Associated Data +mode. +This requires use of the use of a proper authentication mode, one of +.Dv CRYPTO_AES_128_NIST_GMAC , +.Dv CRYPTO_AES_192_NIST_GMAC +or +.Dv CRYPTO_AES_256_NIST_GMAC , +that corresponds with the number of bits in the key that you are using. +.Pp +The associated data (if any) must be provided by the authentication mode op. +The authentication tag will be read/written from/to the offset crd_inject +specified in the descriptor for the authentication mode. +.Pp +Note: You must provide an IV on every call. +.It Dv CRYPTO_AES_ICM +.Bl -tag -width "Block size :" -compact -offset indent +.It IV size : +16 +.It Block size : +1 (aesni), 16 (software) +.It Key size : +16, 24 or 32 +.El +.Pp +This algorithm implements Integer Counter Mode. +This is similar to what most people call counter mode, but instead of the +counter being split into a nonce and a counter part, then entire nonce is +used as the initial counter. +This does mean that if a counter is required that rolls over at 32 bits, +the transaction need to be split into two parts where the counter rolls over. +The counter incremented as a 128-bit big endian number. +.Pp +Note: You must provide an IV on every call. +.It Dv CRYPTO_AES_XTS +.Bl -tag -width "Block size :" -compact -offset indent +.It IV size : +16 +.It Block size : +16 +.It Key size : +32 or 64 +.El +.Pp +This algorithm implements XEX Tweakable Block Cipher with Ciphertext Stealing +as defined in NIST SP 800-38E. +.Pp +NOTE: The ciphertext stealing part is not implemented which is why this cipher +is listed as having a block size of 16 instead of 1. +.El +.Pp +Authentication algorithms: +.Bl -tag -width ".Dv CRYPTO_AES_256_NIST_GMAC" +.It CRYPTO_AES_128_NIST_GMAC +See +.Dv CRYPTO_AES_NIST_GCM_16 +in the cipher mode section. +.It CRYPTO_AES_192_NIST_GMAC +See +.Dv CRYPTO_AES_NIST_GCM_16 +in the cipher mode section. +.It CRYPTO_AES_256_NIST_GMAC +See +.Dv CRYPTO_AES_NIST_GCM_16 +in the cipher mode section. +.El +.Sh SEE ALSO +.Xr crypto 4 , +.Xr crypto 9 +.Sh BUGS +Not all the implemented algorithms are listed. diff --git a/share/man/man9/crypto.9 b/share/man/man9/crypto.9 index c2682af..2aaf130 100644 --- a/share/man/man9/crypto.9 +++ b/share/man/man9/crypto.9 @@ -175,17 +175,26 @@ Contains an algorithm identifier. Currently supported algorithms are: .Pp .Bl -tag -width ".Dv CRYPTO_RIPEMD160_HMAC" -compact +.It Dv CRYPTO_AES_128_NIST_GMAC +.It Dv CRYPTO_AES_192_NIST_GMAC +.It Dv CRYPTO_AES_256_NIST_GMAC .It Dv CRYPTO_AES_CBC +.It Dv CRYPTO_AES_ICM +.It Dv CRYPTO_AES_NIST_GCM_16 +.It Dv CRYPTO_AES_NIST_GMAC +.It Dv CRYPTO_AES_XTS .It Dv CRYPTO_ARC4 .It Dv CRYPTO_BLF_CBC .It Dv CRYPTO_CAMELLIA_CBC .It Dv CRYPTO_CAST_CBC +.It Dv CRYPTO_DEFLATE_COMP .It Dv CRYPTO_DES_CBC .It Dv CRYPTO_3DES_CBC -.It Dv CRYPTO_SKIPJACK_CBC .It Dv CRYPTO_MD5 .It Dv CRYPTO_MD5_HMAC .It Dv CRYPTO_MD5_KPDK +.It Dv CRYPTO_NULL_HMAC +.It Dv CRYPTO_NULL_CBC .It Dv CRYPTO_RIPEMD160_HMAC .It Dv CRYPTO_SHA1 .It Dv CRYPTO_SHA1_HMAC @@ -193,8 +202,7 @@ Currently supported algorithms are: .It Dv CRYPTO_SHA2_256_HMAC .It Dv CRYPTO_SHA2_384_HMAC .It Dv CRYPTO_SHA2_512_HMAC -.It Dv CRYPTO_NULL_HMAC -.It Dv CRYPTO_NULL_CBC +.It Dv CRYPTO_SKIPJACK_CBC .El .It Va cri_klen Specifies the length of the key in bits, for variable-size key @@ -207,7 +215,8 @@ Contains the key to be used with the algorithm. .It Va cri_iv Contains an explicit initialization vector (IV), if it does not prefix the data. -This field is ignored during initialization. +This field is ignored during initialization +.Pq Nm crypto_newsession . If no IV is explicitly passed (see below on details), a random IV is used by the device driver processing the request. .It Va cri_next @@ -296,8 +305,6 @@ The buffer pointed to by is an .Vt uio structure. -.It Dv CRYPTO_F_REL -Must return data in the same place. .It Dv CRYPTO_F_BATCH Batch operation if possible. .It Dv CRYPTO_F_CBIMM @@ -363,7 +370,7 @@ The following flags are defined: For encryption algorithms, this bit is set when encryption is required (when not set, decryption is performed). .It Dv CRD_F_IV_PRESENT -For encryption algorithms, this bit is set when the IV already +For encryption, this bit is set when the IV already precedes the data, so the .Va crd_inject value will be ignored and no IV will be written in the buffer. @@ -372,7 +379,7 @@ at the location pointed to by .Va crd_inject . The IV length is assumed to be equal to the blocksize of the encryption algorithm. -Some applications that do special +Applications that do special .Dq "IV cooking" , such as the half-IV mode in .Xr ipsec 4 , @@ -403,6 +410,8 @@ field for the given operation. Otherwise, the key is taken at newsession time from the .Va cri_key field. +As calculating the key schedule may take a while, it is recommended that often +used keys are given their own session. .It Dv CRD_F_COMP For compression algorithms, this bit is set when compression is required (when not set, decompression is performed). @@ -641,6 +650,7 @@ most of the framework code .El .Sh SEE ALSO .Xr ipsec 4 , +.Xr crypto 7 , .Xr malloc 9 , .Xr sleep 9 .Sh HISTORY diff --git a/sys/amd64/conf/pfSense b/sys/amd64/conf/pfSense new file mode 100644 index 0000000..ce69124 --- /dev/null +++ b/sys/amd64/conf/pfSense @@ -0,0 +1,194 @@ +include GENERIC + +nooptions KDB_TRACE +options DDB # Support DDB. + +ident pfSense + +nooptions MAC # TrustedBSD MAC Framework +nooptions COMPAT_FREEBSD4 # Compatible with FreeBSD4 +nooptions COMPAT_FREEBSD5 # Compatible with FreeBSD5 +nooptions COMPAT_FREEBSD6 # Compatible with FreeBSD6 +nooptions COMPAT_FREEBSD7 # Compatible with FreeBSD7 + +options GEOM_MIRROR +options GEOM_UZIP +options GEOM_PART_MBR +options GEOM_PART_BSD +options GEOM_ELI +options GEOM_BDE + +options TMPFS +options UNIONFS +options NULLFS +options PPS_SYNC + +# Bus support. Do not remove isa, even if you have no isa slots +device isa + +# Wireless +nooptions IEEE80211_DEBUG # enable debug msgs +device wlan_rssadapt +device wlan_xauth +device wlan_acl +device iwifw +device ipwfw # Firmware for Intel PRO/Wireless 2100 IEEE 802.11 driver +device wpifw # Firmware for Intel 3945ABG Wireless LAN IEEE 802.11 driver +device iwnfw # Firmware for Intel Wireless WiFi Link 4965AGN IEEE 802.11n driver +device uath # Atheros USB IEEE 802.11a/b/g wireless network device +device ralfw # Firmware for Ralink Technology RT2500 wireless NICs. +device ural # Ralink Technology RT2500USB IEEE 802.11 driver +device urtw # Realtek RTL8187B/L USB IEEE 802.11b/g wireless network device +device rum # Ralink Technology USB IEEE 802.11a/b/g wireless network device +device mwlfw # Firmware for Marvell 88W8363 IEEE 802.11n wireless network driver +device zyd # ZyDAS ZD1211/ZD1211B USB IEEE 802.11b/g wireless network device +device upgt # Conexant/Intersil PrismGT SoftMAC USB IEEE 802.11b/g wireless +device udav # Davicom DM9601 USB Ethernet driver +device axe +device axge +device aue +device cue +device kue +device mos +device rsu +device rsufw +device run # Ralink RT2700U/RT2800U/RT3000U USB 802.11agn +device runfw +device rue +device urtwn +device urtwnfw + +# Only for 8.1+ +device siba_bwn # Broadcom BCM43xx IEEE 802.11b/g wireless network driver +device bwn # Broadcom BCM43xx IEEE 802.11b/g wireless network driver +device bwi # Broadcom BCM43xx IEEE 802.11b/g wireless network driver + +# Pseudo devices. +device pty # Pseudo-ttys (telnet etc) + +# USB support +nooptions USB_DEBUG # enable debug msgs + +# 3G devices +device ufoma +device ucom +device uslcom +device uplcom +device umct +device uvisor +device uark +device uftdi +device uvscom +device umodem +device u3g +device cdce + +device uhid # "Human Interface Devices" + +# FireWire support +device firewire # FireWire bus code +device sbp # SCSI over FireWire (Requires scbus and da) + +# pfsense addons + +device tap +device gre +device if_bridge +device lagg +device vte +device netmap + +options IPFIREWALL_DEFAULT_TO_ACCEPT +options IPFIREWALL_VERBOSE + +options IPSTEALTH + +options NETGRAPH #netgraph(4) system +options NETGRAPH_VLAN +options NETGRAPH_L2TP +options NETGRAPH_BPF +options NETGRAPH_ETHER +options NETGRAPH_IFACE +options NETGRAPH_EIFACE +options NETGRAPH_PPP +options NETGRAPH_PPPOE +options NETGRAPH_PPTPGRE +options NETGRAPH_RFC1490 +options NETGRAPH_SOCKET +options NETGRAPH_TTY +options NETGRAPH_MPPC_ENCRYPTION +options NETGRAPH_UI +options NETGRAPH_VJC +options NETGRAPH_KSOCKET +options NETGRAPH_LMI +options NETGRAPH_ONE2MANY +options NETGRAPH_BRIDGE +options NETGRAPH_CISCO +options NETGRAPH_ECHO +options NETGRAPH_ASYNC +options NETGRAPH_FRAME_RELAY +options NETGRAPH_HOLE +options NETGRAPH_TEE +options NETGRAPH_TCPMSS +options NETGRAPH_PIPE +options NETGRAPH_CAR +options NETGRAPH_DEFLATE +options NETGRAPH_PRED1 + +options IPSEC +options IPSEC_NAT_T +options TCP_SIGNATURE + +# IPSEC filtering interface +device enc + +options ALTQ +options ALTQ_CBQ +options ALTQ_RED +options ALTQ_RIO +options ALTQ_HFSC +options ALTQ_PRIQ +options ALTQ_FAIRQ +options ALTQ_NOPCC +options ALTQ_CODEL + +# Squid related settings +options MSGMNB=8192 # max # of bytes in a queue +options MSGMNI=40 # number of message queue identifiers +options MSGSEG=512 # number of message segments per queue +options MSGSSZ=32 # size of a message segment +options MSGTQL=2048 # max messages in system + +device pf +device pflog +device carp +device pfsync + +device crypto # core crypto support +device cryptodev # /dev/crypto for access to h/w +device rndtest # FIPS 140-2 entropy tester +device hifn # Hifn 7951, 7781, etc. +options HIFN_DEBUG # enable debugging support: hw.hifn.debug +options HIFN_RNDTEST # enable rndtest support +device ubsec # Broadcom 5501, 5601, 58xx +device safe # safe -- SafeNet crypto accelerator +device padlock + +device speaker + +options DEVICE_POLLING + +options MROUTING + +# Additional cards +device mxge # mxge - Myricom Myri10GE 10 Gigabit Ethernet adapter driver +device cxgb # cxgb -- Chelsio T3 10 Gigabit Ethernet adapter driver +device cxgbe # cxgbe -- Chelsio T5 10 Gigabit Ethernet adapter driver +device nve # nVidia nForce MCP on-board Ethernet Networking +device oce + +# Default serial speed +options CONSPEED=115200 + +# Enable gpioapu +device gpioapu diff --git a/sys/boot/forth/Makefile.inc b/sys/boot/forth/Makefile.inc index 97ab433..7c82ac4 100644 --- a/sys/boot/forth/Makefile.inc +++ b/sys/boot/forth/Makefile.inc @@ -23,3 +23,7 @@ FILES+= shortcuts.4th FILES+= support.4th FILES+= version.4th FILESDIR_loader.conf= /boot/defaults + +# pfSense +FILES+= logo-pfSensebw.4th +FILES+= brand-pfSense.4th diff --git a/sys/boot/forth/brand-pfSense.4th b/sys/boot/forth/brand-pfSense.4th new file mode 100644 index 0000000..d56ecc2 --- /dev/null +++ b/sys/boot/forth/brand-pfSense.4th @@ -0,0 +1,46 @@ +\ Copyright (c) 2004-2015 Electric Sheep Fencing LLC +\ All rights reserved. +\ +\ Redistribution and use in source and binary forms, with or without +\ modification, are permitted provided that the following conditions +\ are met: +\ 1. Redistributions of source code must retain the above copyright +\ notice, this list of conditions and the following disclaimer. +\ 2. Redistributions in binary form must reproduce the above copyright +\ notice, this list of conditions and the following disclaimer in the +\ documentation and/or other materials provided with the distribution. +\ +\ THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +\ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +\ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +\ ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +\ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +\ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +\ OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +\ HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +\ LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +\ OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +\ SUCH DAMAGE. +\ +\ $FreeBSD$ + +2 brandX ! 1 brandY ! \ Initialize brand placement defaults + +: brand+ ( x y c-addr/u -- x y' ) + 2swap 2dup at-xy 2swap \ position the cursor + type \ print to the screen + 1+ \ increase y for next time we're called +; + +: brand ( x y -- ) \ "pfSense" [wide] logo in B/W (7 rows x 42 columns) + + s" __ ____ " brand+ + s" _ __ / _/ ___| ___ _ __ ___ ___ " brand+ + s" | '_ \| |_\___ \ / _ \ '_ \/ __|/ _ \ " brand+ + s" | |_) | _|___) | __/ | | \__ \ __/ " brand+ + s" | .__/|_| |____/ \___|_| |_|___/\___| " brand+ + s" |_| " brand+ + s" " brand+ + + 2drop +; diff --git a/sys/boot/forth/loader.conf b/sys/boot/forth/loader.conf index a76f6ca..141b862 100644 --- a/sys/boot/forth/loader.conf +++ b/sys/boot/forth/loader.conf @@ -553,3 +553,10 @@ mac_seeotheruids_load="NO" # UID visbility MAC policy #module_before="cmd" # executes "cmd" before loading the module #module_after="cmd" # executes "cmd" after loading the module #module_error="cmd" # executes "cmd" if load fails + +# pfSense specific default values +loader_color="NO" +loader_logo="pfSensebw" +loader_brand="pfSense" +hw.usb.no_pf="1" +net.isr.maxthreads="-1" diff --git a/sys/boot/forth/logo-pfSensebw.4th b/sys/boot/forth/logo-pfSensebw.4th new file mode 100644 index 0000000..6da2413 --- /dev/null +++ b/sys/boot/forth/logo-pfSensebw.4th @@ -0,0 +1,54 @@ +\ Copyright (c) 2004-2015 Electric Sheep Fencing LLC +\ All rights reserved. +\ +\ Redistribution and use in source and binary forms, with or without +\ modification, are permitted provided that the following conditions +\ are met: +\ 1. Redistributions of source code must retain the above copyright +\ notice, this list of conditions and the following disclaimer. +\ 2. Redistributions in binary form must reproduce the above copyright +\ notice, this list of conditions and the following disclaimer in the +\ documentation and/or other materials provided with the distribution. +\ +\ THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +\ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +\ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +\ ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +\ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +\ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +\ OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +\ HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +\ LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +\ OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +\ SUCH DAMAGE. +\ +\ $FreeBSD$ + +46 logoX ! 7 logoY ! \ Initialize logo placement defaults + +: logo+ ( x y c-addr/u -- x y' ) + 2swap 2dup at-xy 2swap \ position the cursor + type \ print to the screen + 1+ \ increase y for next time we're called +; + +: logo ( x y -- ) \ B/W pfSense logo (15 rows x 32 columns) + + s" " logo+ + s" " logo+ + s" " logo+ + s" ______ " logo+ + s" / \ " logo+ + s" _____/ f \ " logo+ + s" / \ / " logo+ + s" / p \______/ Sense" logo+ + s" \ / \ " logo+ + s" \_____/ \ " logo+ + s" \ / " logo+ + s" \______/ " logo+ + s" " logo+ + s" " logo+ + s" " logo+ + + 2drop +; diff --git a/sys/boot/forth/menu-commands.4th b/sys/boot/forth/menu-commands.4th index 9adf30a..5c6350a 100644 --- a/sys/boot/forth/menu-commands.4th +++ b/sys/boot/forth/menu-commands.4th @@ -253,7 +253,7 @@ also menu-namespace also menu-command-helpers cr ." To get back to the menu, type `menu' and press ENTER" cr - ." or type `boot' and press ENTER to start FreeBSD." cr + ." or type `boot' and press ENTER to start pfSense." cr cr FALSE \ exit the menu diff --git a/sys/boot/forth/menu.4th b/sys/boot/forth/menu.4th index 9127565..1f95b66 100644 --- a/sys/boot/forth/menu.4th +++ b/sys/boot/forth/menu.4th @@ -470,7 +470,7 @@ also menu-infrastructure definitions \ Print the frame caption at (x,y) s" loader_menu_title" getenv dup -1 = if - drop s" Welcome to FreeBSD" + drop s" Welcome to pfSense" then TRUE ( use default alignment ) s" loader_menu_title_align" getenv dup -1 <> if diff --git a/sys/boot/i386/boot0/boot0.S b/sys/boot/i386/boot0/boot0.S index 798ee97..ab17088 100644 --- a/sys/boot/i386/boot0/boot0.S +++ b/sys/boot/i386/boot0/boot0.S @@ -647,8 +647,8 @@ os_dos: #endif os_win: .ascii "Wi"; .byte 'n'|0x80 os_linux: .ascii "Linu"; .byte 'x'|0x80 -os_freebsd: .ascii "Free" -os_bsd: .ascii "BS"; .byte 'D'|0x80 +os_freebsd: .ascii "pfSe" +os_bsd: .ascii "ns"; .byte 'e'|0x80 #ifndef SAVE_MORE_MEMORY os_ext: .ascii "EX"; .byte 'T'|0x80 #endif diff --git a/sys/conf/NOTES b/sys/conf/NOTES index a977898..78fb6ee 100644 --- a/sys/conf/NOTES +++ b/sys/conf/NOTES @@ -700,6 +700,7 @@ options ALTQ_CBQ # Class Based Queueing options ALTQ_RED # Random Early Detection options ALTQ_RIO # RED In/Out options ALTQ_HFSC # Hierarchical Packet Scheduler +options ALTQ_FAIRQ # Fair Packet Scheduler options ALTQ_CDNR # Traffic conditioner options ALTQ_PRIQ # Priority Queueing options ALTQ_NOPCC # Required if the TSC is unusable diff --git a/sys/conf/files b/sys/conf/files index b3f7f9e..2f0627b 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -252,7 +252,9 @@ compat/freebsd32/freebsd32_syscalls.c optional compat_freebsd32 compat/freebsd32/freebsd32_sysent.c optional compat_freebsd32 contrib/altq/altq/altq_cbq.c optional altq contrib/altq/altq/altq_cdnr.c optional altq +contrib/altq/altq/altq_codel.c optional altq contrib/altq/altq/altq_hfsc.c optional altq +contrib/altq/altq/altq_fairq.c optional altq contrib/altq/altq/altq_priq.c optional altq contrib/altq/altq/altq_red.c optional altq contrib/altq/altq/altq_rio.c optional altq @@ -535,6 +537,8 @@ crypto/sha1.c optional carp | crypto | ipsec | \ netgraph_mppc_encryption | sctp crypto/sha2/sha2.c optional crypto | geom_bde | ipsec | random | \ sctp | zfs +crypto/sha2/sha256c.c optional crypto | geom_bde | ipsec | random | \ + sctp | zfs crypto/siphash/siphash.c optional inet | inet6 crypto/siphash/siphash_test.c optional inet | inet6 ddb/db_access.c optional ddb @@ -1410,6 +1414,7 @@ dev/fxp/inphy.c optional fxp dev/gem/if_gem.c optional gem dev/gem/if_gem_pci.c optional gem pci dev/gem/if_gem_sbus.c optional gem sbus +/dev/gpioapu/gpioapu.c optional gpioapu pci dev/gpio/gpiobus.c optional gpio \ dependency "gpiobus_if.h" dev/gpio/gpioc.c optional gpio \ @@ -3226,6 +3231,7 @@ libkern/asprintf.c standard libkern/bcd.c standard libkern/bsearch.c standard libkern/crc32.c standard +libkern/explicit_bzero.c standard libkern/fnmatch.c standard libkern/iconv.c optional libiconv libkern/iconv_converter_if.m optional libiconv @@ -3269,6 +3275,7 @@ libkern/strtoq.c standard libkern/strtoul.c standard libkern/strtouq.c standard libkern/strvalid.c standard +libkern/timingsafe_bcmp.c standard net/bpf.c standard net/bpf_buffer.c optional bpf net/bpf_jitter.c optional bpf_jitter @@ -3568,7 +3575,6 @@ netipsec/keysock.c optional ipsec inet | ipsec inet6 netipsec/xform_ah.c optional ipsec inet | ipsec inet6 netipsec/xform_esp.c optional ipsec inet | ipsec inet6 netipsec/xform_ipcomp.c optional ipsec inet | ipsec inet6 -netipsec/xform_ipip.c optional ipsec inet | ipsec inet6 netipsec/xform_tcp.c optional ipsec inet tcp_signature | \ ipsec inet6 tcp_signature netipx/ipx.c optional ipx @@ -3960,6 +3966,8 @@ opencrypto/cryptodev.c optional cryptodev opencrypto/cryptodev_if.m optional crypto opencrypto/cryptosoft.c optional crypto opencrypto/cryptodeflate.c optional crypto +opencrypto/gmac.c optional crypto +opencrypto/gfmult.c optional crypto opencrypto/rmd160.c optional crypto | ipsec opencrypto/skipjack.c optional crypto opencrypto/xform.c optional crypto diff --git a/sys/conf/files.amd64 b/sys/conf/files.amd64 index f91191b..f9080fd 100644 --- a/sys/conf/files.amd64 +++ b/sys/conf/files.amd64 @@ -151,9 +151,14 @@ amd64/pci/pci_cfgreg.c optional pci cddl/contrib/opensolaris/common/atomic/amd64/opensolaris_atomic.S optional zfs compile-with "${ZFS_S}" crypto/aesni/aeskeys_amd64.S optional aesni crypto/aesni/aesni.c optional aesni +aesni_ghash.o optional aesni \ + dependency "$S/crypto/aesni/aesni_ghash.c" \ + compile-with "${CC} -c ${CFLAGS:C/^-O2$/-O3/:N-nostdinc} ${WERROR} ${PROF} -mmmx -msse -msse4 -maes -mpclmul ${.IMPSRC}" \ + no-implicit-rule \ + clean "aesni_ghash.o" aesni_wrap.o optional aesni \ dependency "$S/crypto/aesni/aesni_wrap.c" \ - compile-with "${CC} -c ${CFLAGS:C/^-O2$/-O3/:N-nostdinc} ${WERROR} ${PROF} -mmmx -msse -maes ${.IMPSRC}" \ + compile-with "${CC} -c ${CFLAGS:C/^-O2$/-O3/:N-nostdinc} ${WERROR} ${PROF} -mmmx -msse -msse4 -maes ${.IMPSRC}" \ no-implicit-rule \ clean "aesni_wrap.o" crypto/blowfish/bf_enc.c optional crypto | ipsec diff --git a/sys/conf/files.i386 b/sys/conf/files.i386 index e495da7..f9816ae 100644 --- a/sys/conf/files.i386 +++ b/sys/conf/files.i386 @@ -142,9 +142,14 @@ bf_enc.o optional crypto | ipsec \ no-implicit-rule crypto/aesni/aeskeys_i386.S optional aesni crypto/aesni/aesni.c optional aesni +aesni_ghash.o optional aesni \ + dependency "$S/crypto/aesni/aesni_ghash.c" \ + compile-with "${CC} -c ${CFLAGS:C/^-O2$/-O3/:N-nostdinc} ${WERROR} ${PROF} -mmmx -msse -msse4 -maes -mpclmul ${.IMPSRC}" \ + no-implicit-rule \ + clean "aesni_ghash.o" aesni_wrap.o optional aesni \ dependency "$S/crypto/aesni/aesni_wrap.c" \ - compile-with "${CC} -c ${CFLAGS:C/^-O2$/-O3/:N-nostdinc} ${WERROR} ${PROF} -mmmx -msse -maes ${.IMPSRC}" \ + compile-with "${CC} -c ${CFLAGS:C/^-O2$/-O3/:N-nostdinc} ${WERROR} ${PROF} -mmmx -msse -msse4 -maes ${.IMPSRC}" \ no-implicit-rule \ clean "aesni_wrap.o" crypto/des/arch/i386/des_enc.S optional crypto | ipsec | netsmb diff --git a/sys/conf/options b/sys/conf/options index 0699c2c..c2588bd 100644 --- a/sys/conf/options +++ b/sys/conf/options @@ -384,8 +384,10 @@ ACCEPT_FILTER_HTTP ALTQ opt_global.h ALTQ_CBQ opt_altq.h ALTQ_CDNR opt_altq.h +ALTQ_CODEL opt_altq.h ALTQ_DEBUG opt_altq.h ALTQ_HFSC opt_altq.h +ALTQ_FAIRQ opt_altq.h ALTQ_NOPCC opt_altq.h ALTQ_PRIQ opt_altq.h ALTQ_RED opt_altq.h diff --git a/sys/contrib/altq/altq/altq.h b/sys/contrib/altq/altq/altq.h index c740ed3..4ef6cd2 100644 --- a/sys/contrib/altq/altq/altq.h +++ b/sys/contrib/altq/altq/altq.h @@ -63,7 +63,9 @@ #define ALTQT_BLUE 10 /* blue */ #define ALTQT_PRIQ 11 /* priority queue */ #define ALTQT_JOBS 12 /* JoBS */ -#define ALTQT_MAX 13 /* should be max discipline type + 1 */ +#define ALTQT_FAIRQ 13 /* fairq */ +#define ALTQT_CODEL 14 /* CoDel */ +#define ALTQT_MAX 15 /* should be max discipline type + 1 */ #ifdef ALTQ3_COMPAT struct altqreq { diff --git a/sys/contrib/altq/altq/altq_cbq.c b/sys/contrib/altq/altq/altq_cbq.c index 3991d1d..13f9721 100644 --- a/sys/contrib/altq/altq/altq_cbq.c +++ b/sys/contrib/altq/altq/altq_cbq.c @@ -241,6 +241,10 @@ get_class_stats(class_stats_t *statsp, struct rm_class *cl) if (q_is_rio(cl->q_)) rio_getstats((rio_t *)cl->red_, &statsp->red[0]); #endif +#ifdef ALTQ_CODEL + if (q_is_codel(cl->q_)) + codel_getstats(cl->codel_, &statsp->codel); +#endif } int diff --git a/sys/contrib/altq/altq/altq_cbq.h b/sys/contrib/altq/altq/altq_cbq.h index 30a15c7..38ee9e5 100644 --- a/sys/contrib/altq/altq/altq_cbq.h +++ b/sys/contrib/altq/altq/altq_cbq.h @@ -35,6 +35,7 @@ #include <altq/altq.h> #include <altq/altq_rmclass.h> +#include <altq/altq_codel.h> #include <altq/altq_red.h> #include <altq/altq_rio.h> @@ -51,6 +52,7 @@ extern "C" { #define CBQCLF_FLOWVALVE 0x0008 /* use flowvalve (aka penalty-box) */ #define CBQCLF_CLEARDSCP 0x0010 /* clear diffserv codepoint */ #define CBQCLF_BORROW 0x0020 /* borrow from parent */ +#define CBQCLF_CODEL 0x0040 /* use CoDel */ /* class flags only for root class */ #define CBQCLF_WRR 0x0100 /* weighted-round robin */ @@ -90,9 +92,10 @@ typedef struct _cbq_class_stats_ { int qcnt; /* # packets in queue */ int avgidle; - /* red and rio related info */ + /* codel, red and rio related info */ int qtype; struct redstats red[3]; + struct codel_stats codel; } class_stats_t; #ifdef ALTQ3_COMPAT @@ -186,7 +189,7 @@ struct cbq_getstats { #define CBQ_TIMEOUT 10 #define CBQ_LS_TIMEOUT (20 * hz / 1000) -#define CBQ_MAX_CLASSES 256 +#define CBQ_MAX_CLASSES 2048 #ifdef ALTQ3_COMPAT #define CBQ_MAX_FILTERS 256 diff --git a/sys/contrib/altq/altq/altq_classq.h b/sys/contrib/altq/altq/altq_classq.h index dc5c646..daec994 100644 --- a/sys/contrib/altq/altq/altq_classq.h +++ b/sys/contrib/altq/altq/altq_classq.h @@ -49,6 +49,7 @@ extern "C" { #define Q_RED 0x01 #define Q_RIO 0x02 #define Q_DROPTAIL 0x03 +#define Q_CODEL 0x04 #ifdef _KERNEL @@ -59,6 +60,7 @@ struct _class_queue_ { struct mbuf *tail_; /* Tail of packet queue */ int qlen_; /* Queue length (in number of packets) */ int qlim_; /* Queue limit (in number of packets*) */ + int qsize_; /* Queue size (in number of bytes*) */ int qtype_; /* Queue type */ }; @@ -67,10 +69,12 @@ typedef struct _class_queue_ class_queue_t; #define qtype(q) (q)->qtype_ /* Get queue type */ #define qlimit(q) (q)->qlim_ /* Max packets to be queued */ #define qlen(q) (q)->qlen_ /* Current queue length. */ +#define qsize(q) (q)->qsize_ /* Current queue size. */ #define qtail(q) (q)->tail_ /* Tail of the queue */ #define qhead(q) ((q)->tail_ ? (q)->tail_->m_nextpkt : NULL) #define qempty(q) ((q)->qlen_ == 0) /* Is the queue empty?? */ +#define q_is_codel(q) ((q)->qtype_ == Q_CODEL) /* Is the queue a codel queue */ #define q_is_red(q) ((q)->qtype_ == Q_RED) /* Is the queue a red queue */ #define q_is_rio(q) ((q)->qtype_ == Q_RIO) /* Is the queue a rio queue */ #define q_is_red_or_rio(q) ((q)->qtype_ == Q_RED || (q)->qtype_ == Q_RIO) @@ -100,6 +104,7 @@ _addq(class_queue_t *q, struct mbuf *m) m0->m_nextpkt = m; qtail(q) = m; qlen(q)++; + qsize(q) += m_pktlen(m); } static __inline struct mbuf * @@ -114,6 +119,7 @@ _getq(class_queue_t *q) else qtail(q) = NULL; qlen(q)--; + qsize(q) -= m_pktlen(m0); m0->m_nextpkt = NULL; return (m0); } diff --git a/sys/contrib/altq/altq/altq_codel.c b/sys/contrib/altq/altq/altq_codel.c new file mode 100644 index 0000000..45c5ca6 --- /dev/null +++ b/sys/contrib/altq/altq/altq_codel.c @@ -0,0 +1,477 @@ +/* + * CoDel - The Controlled-Delay Active Queue Management algorithm + * + * Copyright (C) 2013 Ermal Luçi <eri@FreeBSD.org> + * Copyright (C) 2011-2012 Kathleen Nichols <nichols@pollere.com> + * Copyright (C) 2011-2012 Van Jacobson <van@pollere.net> + * Copyright (C) 2012 Michael D. Taht <dave.taht@bufferbloat.net> + * Copyright (C) 2012 Eric Dumazet <edumazet@google.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The names of the authors may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * Alternatively, provided that this notice is retained in full, this + * software may be distributed under the terms of the GNU General + * Public License ("GPL") version 2, in which case the provisions of the + * GPL apply INSTEAD OF those given above. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * $FreeBSD$ + */ +#include "opt_altq.h" +#include "opt_inet.h" +#include "opt_inet6.h" + +#ifdef ALTQ_CODEL /* CoDel is enabled by ALTQ_CODEL option in opt_altq.h */ + +#include <sys/param.h> +#include <sys/malloc.h> +#include <sys/mbuf.h> +#include <sys/socket.h> +#include <sys/systm.h> + +#include <net/if.h> +#include <net/if_var.h> +#include <netinet/in.h> + +#include <netpfil/pf/pf.h> +#include <netpfil/pf/pf_altq.h> +#include <altq/if_altq.h> +#include <altq/altq.h> +#include <altq/altq_codel.h> + +static int codel_should_drop(struct codel *, class_queue_t *, + struct mbuf *, u_int64_t); +static void codel_Newton_step(struct codel_vars *); +static u_int64_t codel_control_law(u_int64_t t, u_int64_t, u_int32_t); + +#define codel_time_after(a, b) ((int64_t)(a) - (int64_t)(b) > 0) +#define codel_time_after_eq(a, b) ((int64_t)(a) - (int64_t)(b) >= 0) +#define codel_time_before(a, b) ((int64_t)(a) - (int64_t)(b) < 0) +#define codel_time_before_eq(a, b) ((int64_t)(a) - (int64_t)(b) <= 0) + +static int codel_request(struct ifaltq *, int, void *); + +static int codel_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *); +static struct mbuf *codel_dequeue(struct ifaltq *, int); + +int +codel_pfattach(struct pf_altq *a) +{ + struct ifnet *ifp; + + if ((ifp = ifunit(a->ifname)) == NULL || a->altq_disc == NULL) + return (EINVAL); + + return (altq_attach(&ifp->if_snd, ALTQT_CODEL, a->altq_disc, + codel_enqueue, codel_dequeue, codel_request, NULL, NULL)); +} + +int +codel_add_altq(struct pf_altq *a) +{ + struct codel_if *cif; + struct ifnet *ifp; + struct codel_opts *opts; + + if ((ifp = ifunit(a->ifname)) == NULL) + return (EINVAL); + if (!ALTQ_IS_READY(&ifp->if_snd)) + return (ENODEV); + + opts = &a->pq_u.codel_opts; + + cif = malloc(sizeof(struct codel_if), M_DEVBUF, M_NOWAIT | M_ZERO); + if (cif == NULL) + return (ENOMEM); + cif->cif_bandwidth = a->ifbandwidth; + cif->cif_ifq = &ifp->if_snd; + + cif->cl_q = malloc(sizeof(class_queue_t), M_DEVBUF, M_NOWAIT | M_ZERO); + if (cif->cl_q == NULL) { + free(cif, M_DEVBUF); + return (ENOMEM); + } + + if (a->qlimit == 0) + a->qlimit = 50; /* use default. */ + qlimit(cif->cl_q) = a->qlimit; + qtype(cif->cl_q) = Q_CODEL; + qlen(cif->cl_q) = 0; + qsize(cif->cl_q) = 0; + + if (opts->target == 0) + opts->target = 5; + if (opts->interval == 0) + opts->interval = 100; + cif->codel.params.target = machclk_freq * opts->target / 1000; + cif->codel.params.interval = machclk_freq * opts->interval / 1000; + cif->codel.params.ecn = opts->ecn; + cif->codel.stats.maxpacket = 256; + + cif->cl_stats.qlength = qlen(cif->cl_q); + cif->cl_stats.qlimit = qlimit(cif->cl_q); + + /* keep the state in pf_altq */ + a->altq_disc = cif; + + return (0); +} + +int +codel_remove_altq(struct pf_altq *a) +{ + struct codel_if *cif; + + if ((cif = a->altq_disc) == NULL) + return (EINVAL); + a->altq_disc = NULL; + + if (cif->cl_q) + free(cif->cl_q, M_DEVBUF); + free(cif, M_DEVBUF); + + return (0); +} + +int +codel_getqstats(struct pf_altq *a, void *ubuf, int *nbytes) +{ + struct codel_if *cif; + struct codel_ifstats stats; + int error = 0; + + if ((cif = altq_lookup(a->ifname, ALTQT_CODEL)) == NULL) + return (EBADF); + + if (*nbytes < sizeof(stats)) + return (EINVAL); + + stats = cif->cl_stats; + stats.stats = cif->codel.stats; + + if ((error = copyout((caddr_t)&stats, ubuf, sizeof(stats))) != 0) + return (error); + *nbytes = sizeof(stats); + + return (0); +} + +static int +codel_request(struct ifaltq *ifq, int req, void *arg) +{ + struct codel_if *cif = (struct codel_if *)ifq->altq_disc; + struct mbuf *m; + + IFQ_LOCK_ASSERT(ifq); + + switch (req) { + case ALTRQ_PURGE: + if (!ALTQ_IS_ENABLED(cif->cif_ifq)) + break; + + if (qempty(cif->cl_q)) + break; + + while ((m = _getq(cif->cl_q)) != NULL) { + PKTCNTR_ADD(&cif->cl_stats.cl_dropcnt, m_pktlen(m)); + m_freem(m); + IFQ_DEC_LEN(cif->cif_ifq); + } + cif->cif_ifq->ifq_len = 0; + break; + } + + return (0); +} + +static int +codel_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr) +{ + + struct codel_if *cif = (struct codel_if *) ifq->altq_disc; + + IFQ_LOCK_ASSERT(ifq); + + /* grab class set by classifier */ + if ((m->m_flags & M_PKTHDR) == 0) { + /* should not happen */ + printf("altq: packet for %s does not have pkthdr\n", + ifq->altq_ifp->if_xname); + m_freem(m); + PKTCNTR_ADD(&cif->cl_stats.cl_dropcnt, m_pktlen(m)); + return (ENOBUFS); + } + + if (codel_addq(&cif->codel, cif->cl_q, m)) { + PKTCNTR_ADD(&cif->cl_stats.cl_dropcnt, m_pktlen(m)); + return (ENOBUFS); + } + IFQ_INC_LEN(ifq); + + return (0); +} + +static struct mbuf * +codel_dequeue(struct ifaltq *ifq, int op) +{ + struct codel_if *cif = (struct codel_if *)ifq->altq_disc; + struct mbuf *m; + + IFQ_LOCK_ASSERT(ifq); + + if (IFQ_IS_EMPTY(ifq)) + return (NULL); + + if (op == ALTDQ_POLL) + return (qhead(cif->cl_q)); + + + m = codel_getq(&cif->codel, cif->cl_q); + if (m != NULL) { + IFQ_DEC_LEN(ifq); + PKTCNTR_ADD(&cif->cl_stats.cl_xmitcnt, m_pktlen(m)); + return (m); + } + + return (NULL); +} + +struct codel * +codel_alloc(int target, int interval, int ecn) +{ + struct codel *c; + + c = malloc(sizeof(*c), M_DEVBUF, M_NOWAIT | M_ZERO); + if (c != NULL) { + c->params.target = machclk_freq * target / 1000; + c->params.interval = machclk_freq * interval / 1000; + c->params.ecn = ecn; + c->stats.maxpacket = 256; + } + + return (c); +} + +void +codel_destroy(struct codel *c) +{ + + free(c, M_DEVBUF); +} + +#define MTAG_CODEL 1438031249 +int +codel_addq(struct codel *c, class_queue_t *q, struct mbuf *m) +{ + struct m_tag *mtag; + uint64_t *enqueue_time; + + if (qlen(q) < qlimit(q)) { + mtag = m_tag_locate(m, MTAG_CODEL, 0, NULL); + if (mtag == NULL) + mtag = m_tag_alloc(MTAG_CODEL, 0, sizeof(uint64_t), + M_NOWAIT); + if (mtag == NULL) { + m_freem(m); + return (-1); + } + enqueue_time = (uint64_t *)(mtag + 1); + *enqueue_time = read_machclk(); + m_tag_prepend(m, mtag); + _addq(q, m); + return (0); + } + c->drop_overlimit++; + m_freem(m); + + return (-1); +} + +static int +codel_should_drop(struct codel *c, class_queue_t *q, struct mbuf *m, + u_int64_t now) +{ + struct m_tag *mtag; + uint64_t *enqueue_time; + + if (m == NULL) { + c->vars.first_above_time = 0; + return (0); + } + + mtag = m_tag_locate(m, MTAG_CODEL, 0, NULL); + if (mtag == NULL) { + /* Only one warning per second. */ + if (ppsratecheck(&c->last_log, &c->last_pps, 1)) + printf("%s: could not found the packet mtag!\n", + __func__); + c->vars.first_above_time = 0; + return (0); + } + enqueue_time = (uint64_t *)(mtag + 1); + c->vars.ldelay = now - *enqueue_time; + c->stats.maxpacket = MAX(c->stats.maxpacket, m_pktlen(m)); + + if (codel_time_before(c->vars.ldelay, c->params.target) || + qsize(q) <= c->stats.maxpacket) { + /* went below - stay below for at least interval */ + c->vars.first_above_time = 0; + return (0); + } + if (c->vars.first_above_time == 0) { + /* just went above from below. If we stay above + * for at least interval we'll say it's ok to drop + */ + c->vars.first_above_time = now + c->params.interval; + return (0); + } + if (codel_time_after(now, c->vars.first_above_time)) + return (1); + + return (0); +} + +/* + * Run a Newton method step: + * new_invsqrt = (invsqrt / 2) * (3 - count * invsqrt^2) + * + * Here, invsqrt is a fixed point number (< 1.0), 32bit mantissa, aka Q0.32 + */ +static void +codel_Newton_step(struct codel_vars *vars) +{ + uint32_t invsqrt, invsqrt2; + uint64_t val; + +/* sizeof_in_bits(rec_inv_sqrt) */ +#define REC_INV_SQRT_BITS (8 * sizeof(u_int16_t)) +/* needed shift to get a Q0.32 number from rec_inv_sqrt */ +#define REC_INV_SQRT_SHIFT (32 - REC_INV_SQRT_BITS) + + invsqrt = ((u_int32_t)vars->rec_inv_sqrt) << REC_INV_SQRT_SHIFT; + invsqrt2 = ((u_int64_t)invsqrt * invsqrt) >> 32; + val = (3LL << 32) - ((u_int64_t)vars->count * invsqrt2); + val >>= 2; /* avoid overflow in following multiply */ + val = (val * invsqrt) >> (32 - 2 + 1); + + vars->rec_inv_sqrt = val >> REC_INV_SQRT_SHIFT; +} + +static u_int64_t +codel_control_law(u_int64_t t, u_int64_t interval, u_int32_t rec_inv_sqrt) +{ + + return (t + (u_int32_t)(((u_int64_t)interval * + (rec_inv_sqrt << REC_INV_SQRT_SHIFT)) >> 32)); +} + +struct mbuf * +codel_getq(struct codel *c, class_queue_t *q) +{ + struct mbuf *m; + u_int64_t now; + int drop; + + if ((m = _getq(q)) == NULL) { + c->vars.dropping = 0; + return (m); + } + + now = read_machclk(); + drop = codel_should_drop(c, q, m, now); + if (c->vars.dropping) { + if (!drop) { + /* sojourn time below target - leave dropping state */ + c->vars.dropping = 0; + } else if (codel_time_after_eq(now, c->vars.drop_next)) { + /* It's time for the next drop. Drop the current + * packet and dequeue the next. The dequeue might + * take us out of dropping state. + * If not, schedule the next drop. + * A large backlog might result in drop rates so high + * that the next drop should happen now, + * hence the while loop. + */ + while (c->vars.dropping && + codel_time_after_eq(now, c->vars.drop_next)) { + c->vars.count++; /* don't care of possible wrap + * since there is no more + * divide */ + codel_Newton_step(&c->vars); + /* TODO ECN */ + PKTCNTR_ADD(&c->stats.drop_cnt, m_pktlen(m)); + m_freem(m); + m = _getq(q); + if (!codel_should_drop(c, q, m, now)) + /* leave dropping state */ + c->vars.dropping = 0; + else + /* and schedule the next drop */ + c->vars.drop_next = + codel_control_law(c->vars.drop_next, + c->params.interval, + c->vars.rec_inv_sqrt); + } + } + } else if (drop) { + /* TODO ECN */ + PKTCNTR_ADD(&c->stats.drop_cnt, m_pktlen(m)); + m_freem(m); + + m = _getq(q); + drop = codel_should_drop(c, q, m, now); + + c->vars.dropping = 1; + /* if min went above target close to when we last went below it + * assume that the drop rate that controlled the queue on the + * last cycle is a good starting point to control it now. + */ + if (codel_time_before(now - c->vars.drop_next, + 16 * c->params.interval)) { + c->vars.count = (c->vars.count - c->vars.lastcount) | 1; + /* we dont care if rec_inv_sqrt approximation + * is not very precise : + * Next Newton steps will correct it quadratically. + */ + codel_Newton_step(&c->vars); + } else { + c->vars.count = 1; + c->vars.rec_inv_sqrt = ~0U >> REC_INV_SQRT_SHIFT; + } + c->vars.lastcount = c->vars.count; + c->vars.drop_next = codel_control_law(now, c->params.interval, + c->vars.rec_inv_sqrt); + } + + return (m); +} + +void +codel_getstats(struct codel *c, struct codel_stats *s) +{ + *s = c->stats; +} + +#endif /* ALTQ_CODEL */ diff --git a/sys/contrib/altq/altq/altq_codel.h b/sys/contrib/altq/altq/altq_codel.h new file mode 100644 index 0000000..991af42 --- /dev/null +++ b/sys/contrib/altq/altq/altq_codel.h @@ -0,0 +1,129 @@ +/* + * CoDel - The Controlled-Delay Active Queue Management algorithm + * + * Copyright (C) 2013 Ermal Luçi <eri@FreeBSD.org> + * Copyright (C) 2011-2012 Kathleen Nichols <nichols@pollere.com> + * Copyright (C) 2011-2012 Van Jacobson <van@pollere.net> + * Copyright (C) 2012 Michael D. Taht <dave.taht@bufferbloat.net> + * Copyright (C) 2012 Eric Dumazet <edumazet@google.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The names of the authors may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * Alternatively, provided that this notice is retained in full, this + * software may be distributed under the terms of the GNU General + * Public License ("GPL") version 2, in which case the provisions of the + * GPL apply INSTEAD OF those given above. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _ALTQ_ALTQ_CODEL_H_ +#define _ALTQ_ALTQ_CODEL_H_ + +struct codel_stats { + u_int32_t maxpacket; + struct pktcntr drop_cnt; + u_int marked_packets; +}; + +struct codel_ifstats { + u_int qlength; + u_int qlimit; + struct codel_stats stats; + struct pktcntr cl_xmitcnt; /* transmitted packet counter */ + struct pktcntr cl_dropcnt; /* dropped packet counter */ +}; + +#ifdef _KERNEL +#include <altq/altq_classq.h> + +/** + * struct codel_params - contains codel parameters + * <at> target: target queue size (in time units) + * <at> interval: width of moving time window + * <at> ecn: is Explicit Congestion Notification enabled + */ +struct codel_params { + u_int64_t target; + u_int64_t interval; + int ecn; +}; + +/** + * struct codel_vars - contains codel variables + * <at> count: how many drops we've done since the last time we + * entered dropping state + * <at> lastcount: count at entry to dropping state + * <at> dropping: set to true if in dropping state + * <at> rec_inv_sqrt: reciprocal value of sqrt(count) >> 1 + * <at> first_above_time: when we went (or will go) continuously above + * target for interval + * <at> drop_next: time to drop next packet, or when we dropped last + * <at> ldelay: sojourn time of last dequeued packet + */ +struct codel_vars { + u_int32_t count; + u_int32_t lastcount; + int dropping; + u_int16_t rec_inv_sqrt; + u_int64_t first_above_time; + u_int64_t drop_next; + u_int64_t ldelay; +}; + +struct codel { + int last_pps; + struct codel_params params; + struct codel_vars vars; + struct codel_stats stats; + struct timeval last_log; + u_int32_t drop_overlimit; +}; + +/* + * codel interface state + */ +struct codel_if { + struct codel_if *cif_next; /* interface state list */ + struct ifaltq *cif_ifq; /* backpointer to ifaltq */ + u_int cif_bandwidth; /* link bandwidth in bps */ + + class_queue_t *cl_q; /* class queue structure */ + struct codel codel; + + /* statistics */ + struct codel_ifstats cl_stats; +}; + +struct codel *codel_alloc(int, int, int); +void codel_destroy(struct codel *); +int codel_addq(struct codel *, class_queue_t *, struct mbuf *); +struct mbuf *codel_getq(struct codel *, class_queue_t *); +void codel_getstats(struct codel *, struct codel_stats *); + +#endif /* _KERNEL */ + +#endif /* _ALTQ_ALTQ_CODEL_H_ */ diff --git a/sys/contrib/altq/altq/altq_fairq.c b/sys/contrib/altq/altq/altq_fairq.c new file mode 100644 index 0000000..2189996 --- /dev/null +++ b/sys/contrib/altq/altq/altq_fairq.c @@ -0,0 +1,909 @@ +/* + * Copyright (c) 2008 The DragonFly Project. All rights reserved. + * + * This code is derived from software contributed to The DragonFly Project + * by Matthew Dillon <dillon@backplane.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. Neither the name of The DragonFly Project nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific, prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $DragonFly: src/sys/net/altq/altq_fairq.c,v 1.1 2008/04/06 18:58:15 dillon Exp $ + * $FreeBSD$ + */ +/* + * Matt: I gutted altq_priq.c and used it as a skeleton on which to build + * fairq. The fairq algorithm is completely different then priq, of course, + * but because I used priq's skeleton I believe I should include priq's + * copyright. + * + * Copyright (C) 2000-2003 + * Sony Computer Science Laboratories Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * FAIRQ - take traffic classified by keep state (hashed into + * mbuf->m_pkthdr.altq_state_hash) and bucketize it. Fairly extract + * the first packet from each bucket in a round-robin fashion. + * + * TODO - better overall qlimit support (right now it is per-bucket). + * - NOTE: red etc is per bucket, not overall. + * - better service curve support. + * + * EXAMPLE: + * + * altq on em0 fairq bandwidth 650Kb queue { std, bulk } + * queue std priority 3 bandwidth 400Kb \ + * fairq (buckets 64, default, hogs 1Kb) qlimit 50 + * queue bulk priority 2 bandwidth 100Kb \ + * fairq (buckets 64, hogs 1Kb) qlimit 50 + * + * pass out on em0 from any to any keep state queue std + * pass out on em0 inet proto tcp ..... port ... keep state queue bulk + */ +#include "opt_altq.h" +#include "opt_inet.h" +#include "opt_inet6.h" + +#ifdef ALTQ_FAIRQ /* fairq is enabled in the kernel conf */ + +#include <sys/param.h> +#include <sys/malloc.h> +#include <sys/mbuf.h> +#include <sys/socket.h> +#include <sys/sockio.h> +#include <sys/systm.h> +#include <sys/proc.h> +#include <sys/errno.h> +#include <sys/kernel.h> +#include <sys/queue.h> + +#include <net/if.h> +#include <net/if_var.h> +#include <netinet/in.h> + +#include <netpfil/pf/pf.h> +#include <netpfil/pf/pf_altq.h> +#include <netpfil/pf/pf_mtag.h> +#include <altq/altq.h> +#include <altq/altq_fairq.h> + +/* + * function prototypes + */ +static int fairq_clear_interface(struct fairq_if *); +static int fairq_request(struct ifaltq *, int, void *); +static void fairq_purge(struct fairq_if *); +static struct fairq_class *fairq_class_create(struct fairq_if *, int, int, u_int, struct fairq_opts *, int); +static int fairq_class_destroy(struct fairq_class *); +static int fairq_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *); +static struct mbuf *fairq_dequeue(struct ifaltq *, int); + +static int fairq_addq(struct fairq_class *, struct mbuf *, u_int32_t); +static struct mbuf *fairq_getq(struct fairq_class *, uint64_t); +static struct mbuf *fairq_pollq(struct fairq_class *, uint64_t, int *); +static fairq_bucket_t *fairq_selectq(struct fairq_class *, int); +static void fairq_purgeq(struct fairq_class *); + +static void get_class_stats(struct fairq_classstats *, struct fairq_class *); +static struct fairq_class *clh_to_clp(struct fairq_if *, uint32_t); + +int +fairq_pfattach(struct pf_altq *a) +{ + struct ifnet *ifp; + int error; + + if ((ifp = ifunit(a->ifname)) == NULL || a->altq_disc == NULL) + return (EINVAL); + + error = altq_attach(&ifp->if_snd, ALTQT_FAIRQ, a->altq_disc, + fairq_enqueue, fairq_dequeue, fairq_request, NULL, NULL); + + return (error); +} + +int +fairq_add_altq(struct pf_altq *a) +{ + struct fairq_if *pif; + struct ifnet *ifp; + + if ((ifp = ifunit(a->ifname)) == NULL) + return (EINVAL); + if (!ALTQ_IS_READY(&ifp->if_snd)) + return (ENODEV); + + + pif = malloc(sizeof(struct fairq_if), + M_DEVBUF, M_WAITOK | M_ZERO); + pif->pif_bandwidth = a->ifbandwidth; + pif->pif_maxpri = -1; + pif->pif_ifq = &ifp->if_snd; + + /* keep the state in pf_altq */ + a->altq_disc = pif; + + return (0); +} + +int +fairq_remove_altq(struct pf_altq *a) +{ + struct fairq_if *pif; + + if ((pif = a->altq_disc) == NULL) + return (EINVAL); + a->altq_disc = NULL; + + fairq_clear_interface(pif); + + free(pif, M_DEVBUF); + return (0); +} + +int +fairq_add_queue(struct pf_altq *a) +{ + struct fairq_if *pif; + struct fairq_class *cl; + + if ((pif = a->altq_disc) == NULL) + return (EINVAL); + + /* check parameters */ + if (a->priority >= FAIRQ_MAXPRI) + return (EINVAL); + if (a->qid == 0) + return (EINVAL); + if (pif->pif_classes[a->priority] != NULL) + return (EBUSY); + if (clh_to_clp(pif, a->qid) != NULL) + return (EBUSY); + + cl = fairq_class_create(pif, a->priority, a->qlimit, a->bandwidth, + &a->pq_u.fairq_opts, a->qid); + if (cl == NULL) + return (ENOMEM); + + return (0); +} + +int +fairq_remove_queue(struct pf_altq *a) +{ + struct fairq_if *pif; + struct fairq_class *cl; + + if ((pif = a->altq_disc) == NULL) + return (EINVAL); + + if ((cl = clh_to_clp(pif, a->qid)) == NULL) + return (EINVAL); + + return (fairq_class_destroy(cl)); +} + +int +fairq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes) +{ + struct fairq_if *pif; + struct fairq_class *cl; + struct fairq_classstats stats; + int error = 0; + + if ((pif = altq_lookup(a->ifname, ALTQT_FAIRQ)) == NULL) + return (EBADF); + + if ((cl = clh_to_clp(pif, a->qid)) == NULL) + return (EINVAL); + + if (*nbytes < sizeof(stats)) + return (EINVAL); + + get_class_stats(&stats, cl); + + if ((error = copyout((caddr_t)&stats, ubuf, sizeof(stats))) != 0) + return (error); + *nbytes = sizeof(stats); + return (0); +} + +/* + * bring the interface back to the initial state by discarding + * all the filters and classes. + */ +static int +fairq_clear_interface(struct fairq_if *pif) +{ + struct fairq_class *cl; + int pri; + + /* clear out the classes */ + for (pri = 0; pri <= pif->pif_maxpri; pri++) { + if ((cl = pif->pif_classes[pri]) != NULL) + fairq_class_destroy(cl); + } + + return (0); +} + +static int +fairq_request(struct ifaltq *ifq, int req, void *arg) +{ + struct fairq_if *pif = (struct fairq_if *)ifq->altq_disc; + + IFQ_LOCK_ASSERT(ifq); + + switch (req) { + case ALTRQ_PURGE: + fairq_purge(pif); + break; + } + return (0); +} + +/* discard all the queued packets on the interface */ +static void +fairq_purge(struct fairq_if *pif) +{ + struct fairq_class *cl; + int pri; + + for (pri = 0; pri <= pif->pif_maxpri; pri++) { + if ((cl = pif->pif_classes[pri]) != NULL && cl->cl_head) + fairq_purgeq(cl); + } + if (ALTQ_IS_ENABLED(pif->pif_ifq)) + pif->pif_ifq->ifq_len = 0; +} + +static struct fairq_class * +fairq_class_create(struct fairq_if *pif, int pri, int qlimit, + u_int bandwidth, struct fairq_opts *opts, int qid) +{ + struct fairq_class *cl; + int flags = opts->flags; + u_int nbuckets = opts->nbuckets; + int i; + +#ifndef ALTQ_RED + if (flags & FARF_RED) { +#ifdef ALTQ_DEBUG + printf("fairq_class_create: RED not configured for FAIRQ!\n"); +#endif + return (NULL); + } +#endif +#ifndef ALTQ_CODEL + if (flags & FARF_CODEL) { +#ifdef ALTQ_DEBUG + printf("fairq_class_create: CODEL not configured for FAIRQ!\n"); +#endif + return (NULL); + } +#endif + if (nbuckets == 0) + nbuckets = 256; + if (nbuckets > FAIRQ_MAX_BUCKETS) + nbuckets = FAIRQ_MAX_BUCKETS; + /* enforce power-of-2 size */ + while ((nbuckets ^ (nbuckets - 1)) != ((nbuckets << 1) - 1)) + ++nbuckets; + + if ((cl = pif->pif_classes[pri]) != NULL) { + /* modify the class instead of creating a new one */ + IFQ_LOCK(cl->cl_pif->pif_ifq); + if (cl->cl_head) + fairq_purgeq(cl); + IFQ_UNLOCK(cl->cl_pif->pif_ifq); +#ifdef ALTQ_RIO + if (cl->cl_qtype == Q_RIO) + rio_destroy((rio_t *)cl->cl_red); +#endif +#ifdef ALTQ_RED + if (cl->cl_qtype == Q_RED) + red_destroy(cl->cl_red); +#endif +#ifdef ALTQ_CODEL + if (cl->cl_qtype == Q_CODEL) + codel_destroy(cl->cl_codel); +#endif + } else { + cl = malloc(sizeof(struct fairq_class), + M_DEVBUF, M_WAITOK | M_ZERO); + cl->cl_nbuckets = nbuckets; + cl->cl_nbucket_mask = nbuckets - 1; + + cl->cl_buckets = malloc( + sizeof(struct fairq_bucket) * cl->cl_nbuckets, + M_DEVBUF, M_WAITOK | M_ZERO); + cl->cl_head = NULL; + } + + pif->pif_classes[pri] = cl; + if (flags & FARF_DEFAULTCLASS) + pif->pif_default = cl; + if (qlimit == 0) + qlimit = 50; /* use default */ + cl->cl_qlimit = qlimit; + for (i = 0; i < cl->cl_nbuckets; ++i) { + qlimit(&cl->cl_buckets[i].queue) = qlimit; + } + cl->cl_bandwidth = bandwidth / 8; + cl->cl_qtype = Q_DROPTAIL; + cl->cl_flags = flags & FARF_USERFLAGS; + cl->cl_pri = pri; + if (pri > pif->pif_maxpri) + pif->pif_maxpri = pri; + cl->cl_pif = pif; + cl->cl_handle = qid; + cl->cl_hogs_m1 = opts->hogs_m1 / 8; + cl->cl_lssc_m1 = opts->lssc_m1 / 8; /* NOT YET USED */ + +#ifdef ALTQ_RED + if (flags & (FARF_RED|FARF_RIO)) { + int red_flags, red_pkttime; + + red_flags = 0; + if (flags & FARF_ECN) + red_flags |= REDF_ECN; +#ifdef ALTQ_RIO + if (flags & FARF_CLEARDSCP) + red_flags |= RIOF_CLEARDSCP; +#endif + if (pif->pif_bandwidth < 8) + red_pkttime = 1000 * 1000 * 1000; /* 1 sec */ + else + red_pkttime = (int64_t)pif->pif_ifq->altq_ifp->if_mtu + * 1000 * 1000 * 1000 / (pif->pif_bandwidth / 8); +#ifdef ALTQ_RIO + if (flags & FARF_RIO) { + cl->cl_red = (red_t *)rio_alloc(0, NULL, + red_flags, red_pkttime); + if (cl->cl_red != NULL) + cl->cl_qtype = Q_RIO; + } else +#endif + if (flags & FARF_RED) { + cl->cl_red = red_alloc(0, 0, + cl->cl_qlimit * 10/100, + cl->cl_qlimit * 30/100, + red_flags, red_pkttime); + if (cl->cl_red != NULL) + cl->cl_qtype = Q_RED; + } + } +#endif /* ALTQ_RED */ +#ifdef ALTQ_CODEL + if (flags & FARF_CODEL) { + cl->cl_codel = codel_alloc(5, 100, 0); + if (cl->cl_codel != NULL) + cl->cl_qtype = Q_CODEL; + } +#endif + + return (cl); +} + +static int +fairq_class_destroy(struct fairq_class *cl) +{ + struct fairq_if *pif; + int pri; + + IFQ_LOCK(cl->cl_pif->pif_ifq); + + if (cl->cl_head) + fairq_purgeq(cl); + + pif = cl->cl_pif; + pif->pif_classes[cl->cl_pri] = NULL; + if (pif->pif_poll_cache == cl) + pif->pif_poll_cache = NULL; + if (pif->pif_maxpri == cl->cl_pri) { + for (pri = cl->cl_pri; pri >= 0; pri--) + if (pif->pif_classes[pri] != NULL) { + pif->pif_maxpri = pri; + break; + } + if (pri < 0) + pif->pif_maxpri = -1; + } + IFQ_UNLOCK(cl->cl_pif->pif_ifq); + + if (cl->cl_red != NULL) { +#ifdef ALTQ_RIO + if (cl->cl_qtype == Q_RIO) + rio_destroy((rio_t *)cl->cl_red); +#endif +#ifdef ALTQ_RED + if (cl->cl_qtype == Q_RED) + red_destroy(cl->cl_red); +#endif +#ifdef ALTQ_CODEL + if (cl->cl_qtype == Q_CODEL) + codel_destroy(cl->cl_codel); +#endif + } + free(cl->cl_buckets, M_DEVBUF); + free(cl, M_DEVBUF); + + return (0); +} + +/* + * fairq_enqueue is an enqueue function to be registered to + * (*altq_enqueue) in struct ifaltq. + */ +static int +fairq_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr) +{ + struct fairq_if *pif = (struct fairq_if *)ifq->altq_disc; + struct fairq_class *cl = NULL; /* Make compiler happy */ + struct pf_mtag *t; + u_int32_t qid_hash = 0; + int len; + + IFQ_LOCK_ASSERT(ifq); + + /* grab class set by classifier */ + if ((m->m_flags & M_PKTHDR) == 0) { + /* should not happen */ + printf("altq: packet for %s does not have pkthdr\n", + ifq->altq_ifp->if_xname); + m_freem(m); + return (ENOBUFS); + } + + if ((t = pf_find_mtag(m)) != NULL) { + cl = clh_to_clp(pif, t->qid); + qid_hash = t->qid_hash; + } + if (cl == NULL) { + cl = pif->pif_default; + if (cl == NULL) { + m_freem(m); + return (ENOBUFS); + } + } + cl->cl_flags |= FARF_HAS_PACKETS; + cl->cl_pktattr = NULL; + len = m_pktlen(m); + if (fairq_addq(cl, m, qid_hash) != 0) { + /* drop occurred. mbuf was freed in fairq_addq. */ + PKTCNTR_ADD(&cl->cl_dropcnt, len); + return (ENOBUFS); + } + IFQ_INC_LEN(ifq); + + return (0); +} + +/* + * fairq_dequeue is a dequeue function to be registered to + * (*altq_dequeue) in struct ifaltq. + * + * note: ALTDQ_POLL returns the next packet without removing the packet + * from the queue. ALTDQ_REMOVE is a normal dequeue operation. + * ALTDQ_REMOVE must return the same packet if called immediately + * after ALTDQ_POLL. + */ +static struct mbuf * +fairq_dequeue(struct ifaltq *ifq, int op) +{ + struct fairq_if *pif = (struct fairq_if *)ifq->altq_disc; + struct fairq_class *cl; + struct fairq_class *best_cl; + struct mbuf *best_m; + struct mbuf *m = NULL; + uint64_t cur_time = read_machclk(); + int pri; + int hit_limit; + + IFQ_LOCK_ASSERT(ifq); + + if (IFQ_IS_EMPTY(ifq)) { + return (NULL); + } + + if (pif->pif_poll_cache && op == ALTDQ_REMOVE) { + best_cl = pif->pif_poll_cache; + m = fairq_getq(best_cl, cur_time); + pif->pif_poll_cache = NULL; + if (m) { + IFQ_DEC_LEN(ifq); + PKTCNTR_ADD(&best_cl->cl_xmitcnt, m_pktlen(m)); + return (m); + } + } else { + best_cl = NULL; + best_m = NULL; + + for (pri = pif->pif_maxpri; pri >= 0; pri--) { + if ((cl = pif->pif_classes[pri]) == NULL) + continue; + if ((cl->cl_flags & FARF_HAS_PACKETS) == 0) + continue; + m = fairq_pollq(cl, cur_time, &hit_limit); + if (m == NULL) { + cl->cl_flags &= ~FARF_HAS_PACKETS; + continue; + } + + /* + * Only override the best choice if we are under + * the BW limit. + */ + if (hit_limit == 0 || best_cl == NULL) { + best_cl = cl; + best_m = m; + } + + /* + * Remember the highest priority mbuf in case we + * do not find any lower priority mbufs. + */ + if (hit_limit) + continue; + break; + } + if (op == ALTDQ_POLL) { + pif->pif_poll_cache = best_cl; + m = best_m; + } else if (best_cl) { + m = fairq_getq(best_cl, cur_time); + if (m != NULL) { + IFQ_DEC_LEN(ifq); + PKTCNTR_ADD(&best_cl->cl_xmitcnt, m_pktlen(m)); + } + } + return (m); + } + return (NULL); +} + +static int +fairq_addq(struct fairq_class *cl, struct mbuf *m, u_int32_t bucketid) +{ + fairq_bucket_t *b; + u_int hindex; + uint64_t bw; + + /* + * If the packet doesn't have any keep state put it on the end of + * our queue. XXX this can result in out of order delivery. + */ + if (bucketid == 0) { + if (cl->cl_head) + b = cl->cl_head->prev; + else + b = &cl->cl_buckets[0]; + } else { + hindex = bucketid & cl->cl_nbucket_mask; + b = &cl->cl_buckets[hindex]; + } + + /* + * Add the bucket to the end of the circular list of active buckets. + * + * As a special case we add the bucket to the beginning of the list + * instead of the end if it was not previously on the list and if + * its traffic is less then the hog level. + */ + if (b->in_use == 0) { + b->in_use = 1; + if (cl->cl_head == NULL) { + cl->cl_head = b; + b->next = b; + b->prev = b; + } else { + b->next = cl->cl_head; + b->prev = cl->cl_head->prev; + b->prev->next = b; + b->next->prev = b; + + if (b->bw_delta && cl->cl_hogs_m1) { + bw = b->bw_bytes * machclk_freq / b->bw_delta; + if (bw < cl->cl_hogs_m1) + cl->cl_head = b; + } + } + } + +#ifdef ALTQ_RIO + if (cl->cl_qtype == Q_RIO) + return rio_addq((rio_t *)cl->cl_red, &b->queue, m, cl->cl_pktattr); +#endif +#ifdef ALTQ_RED + if (cl->cl_qtype == Q_RED) + return red_addq(cl->cl_red, &b->queue, m, cl->cl_pktattr); +#endif +#ifdef ALTQ_CODEL + if (cl->cl_qtype == Q_CODEL) + return codel_addq(cl->cl_codel, &b->queue, m); +#endif + if (qlen(&b->queue) >= qlimit(&b->queue)) { + m_freem(m); + return (-1); + } + + if (cl->cl_flags & FARF_CLEARDSCP) + write_dsfield(m, cl->cl_pktattr, 0); + + _addq(&b->queue, m); + + return (0); +} + +static struct mbuf * +fairq_getq(struct fairq_class *cl, uint64_t cur_time) +{ + fairq_bucket_t *b; + struct mbuf *m; + + b = fairq_selectq(cl, 0); + if (b == NULL) + m = NULL; +#ifdef ALTQ_RIO + else if (cl->cl_qtype == Q_RIO) + m = rio_getq((rio_t *)cl->cl_red, &b->queue); +#endif +#ifdef ALTQ_RED + else if (cl->cl_qtype == Q_RED) + m = red_getq(cl->cl_red, &b->queue); +#endif +#ifdef ALTQ_CODEL + else if (cl->cl_qtype == Q_CODEL) + m = codel_getq(cl->cl_codel, &b->queue); +#endif + else + m = _getq(&b->queue); + + /* + * Calculate the BW change + */ + if (m != NULL) { + uint64_t delta; + + /* + * Per-class bandwidth calculation + */ + delta = (cur_time - cl->cl_last_time); + if (delta > machclk_freq * 8) + delta = machclk_freq * 8; + cl->cl_bw_delta += delta; + cl->cl_bw_bytes += m->m_pkthdr.len; + cl->cl_last_time = cur_time; + cl->cl_bw_delta -= cl->cl_bw_delta >> 3; + cl->cl_bw_bytes -= cl->cl_bw_bytes >> 3; + + /* + * Per-bucket bandwidth calculation + */ + delta = (cur_time - b->last_time); + if (delta > machclk_freq * 8) + delta = machclk_freq * 8; + b->bw_delta += delta; + b->bw_bytes += m->m_pkthdr.len; + b->last_time = cur_time; + b->bw_delta -= b->bw_delta >> 3; + b->bw_bytes -= b->bw_bytes >> 3; + } + return(m); +} + +/* + * Figure out what the next packet would be if there were no limits. If + * this class hits its bandwidth limit *hit_limit is set to no-zero, otherwise + * it is set to 0. A non-NULL mbuf is returned either way. + */ +static struct mbuf * +fairq_pollq(struct fairq_class *cl, uint64_t cur_time, int *hit_limit) +{ + fairq_bucket_t *b; + struct mbuf *m; + uint64_t delta; + uint64_t bw; + + *hit_limit = 0; + b = fairq_selectq(cl, 1); + if (b == NULL) + return(NULL); + m = qhead(&b->queue); + + /* + * Did this packet exceed the class bandwidth? Calculate the + * bandwidth component of the packet. + * + * - Calculate bytes per second + */ + delta = cur_time - cl->cl_last_time; + if (delta > machclk_freq * 8) + delta = machclk_freq * 8; + cl->cl_bw_delta += delta; + cl->cl_last_time = cur_time; + if (cl->cl_bw_delta) { + bw = cl->cl_bw_bytes * machclk_freq / cl->cl_bw_delta; + + if (bw > cl->cl_bandwidth) + *hit_limit = 1; +#ifdef ALTQ_DEBUG + printf("BW %6ju relative to %6u %d queue %p\n", + (uintmax_t)bw, cl->cl_bandwidth, *hit_limit, b); +#endif + } + return(m); +} + +/* + * Locate the next queue we want to pull a packet out of. This code + * is also responsible for removing empty buckets from the circular list. + */ +static +fairq_bucket_t * +fairq_selectq(struct fairq_class *cl, int ispoll) +{ + fairq_bucket_t *b; + uint64_t bw; + + if (ispoll == 0 && cl->cl_polled) { + b = cl->cl_polled; + cl->cl_polled = NULL; + return(b); + } + + while ((b = cl->cl_head) != NULL) { + /* + * Remove empty queues from consideration + */ + if (qempty(&b->queue)) { + b->in_use = 0; + cl->cl_head = b->next; + if (cl->cl_head == b) { + cl->cl_head = NULL; + } else { + b->next->prev = b->prev; + b->prev->next = b->next; + } + continue; + } + + /* + * Advance the round robin. Queues with bandwidths less + * then the hog bandwidth are allowed to burst. + */ + if (cl->cl_hogs_m1 == 0) { + cl->cl_head = b->next; + } else if (b->bw_delta) { + bw = b->bw_bytes * machclk_freq / b->bw_delta; + if (bw >= cl->cl_hogs_m1) { + cl->cl_head = b->next; + } + /* + * XXX TODO - + */ + } + + /* + * Return bucket b. + */ + break; + } + if (ispoll) + cl->cl_polled = b; + return(b); +} + +static void +fairq_purgeq(struct fairq_class *cl) +{ + fairq_bucket_t *b; + struct mbuf *m; + + while ((b = fairq_selectq(cl, 0)) != NULL) { + while ((m = _getq(&b->queue)) != NULL) { + PKTCNTR_ADD(&cl->cl_dropcnt, m_pktlen(m)); + m_freem(m); + } + ASSERT(qlen(&b->queue) == 0); + } +} + +static void +get_class_stats(struct fairq_classstats *sp, struct fairq_class *cl) +{ + fairq_bucket_t *b; + + sp->class_handle = cl->cl_handle; + sp->qlimit = cl->cl_qlimit; + sp->xmit_cnt = cl->cl_xmitcnt; + sp->drop_cnt = cl->cl_dropcnt; + sp->qtype = cl->cl_qtype; + sp->qlength = 0; + + if (cl->cl_head) { + b = cl->cl_head; + do { + sp->qlength += qlen(&b->queue); + b = b->next; + } while (b != cl->cl_head); + } + +#ifdef ALTQ_RED + if (cl->cl_qtype == Q_RED) + red_getstats(cl->cl_red, &sp->red[0]); +#endif +#ifdef ALTQ_RIO + if (cl->cl_qtype == Q_RIO) + rio_getstats((rio_t *)cl->cl_red, &sp->red[0]); +#endif +#ifdef ALTQ_CODEL + if (cl->cl_qtype == Q_CODEL) + codel_getstats(cl->cl_codel, &sp->codel); +#endif +} + +/* convert a class handle to the corresponding class pointer */ +static struct fairq_class * +clh_to_clp(struct fairq_if *pif, uint32_t chandle) +{ + struct fairq_class *cl; + int idx; + + if (chandle == 0) + return (NULL); + + for (idx = pif->pif_maxpri; idx >= 0; idx--) + if ((cl = pif->pif_classes[idx]) != NULL && + cl->cl_handle == chandle) + return (cl); + + return (NULL); +} + +#endif /* ALTQ_FAIRQ */ diff --git a/sys/contrib/altq/altq/altq_fairq.h b/sys/contrib/altq/altq/altq_fairq.h new file mode 100644 index 0000000..8fede5b --- /dev/null +++ b/sys/contrib/altq/altq/altq_fairq.h @@ -0,0 +1,145 @@ +/* + * Copyright (c) 2008 The DragonFly Project. All rights reserved. + * + * This code is derived from software contributed to The DragonFly Project + * by Matthew Dillon <dillon@backplane.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. Neither the name of The DragonFly Project nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific, prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $DragonFly: src/sys/net/altq/altq_fairq.h,v 1.1 2008/04/06 18:58:15 dillon Exp $ + * $FreeBSD$ + */ + +#ifndef _ALTQ_ALTQ_FAIRQ_H_ +#define _ALTQ_ALTQ_FAIRQ_H_ + +#include <altq/altq.h> +#include <altq/altq_classq.h> +#include <altq/altq_codel.h> +#include <altq/altq_red.h> +#include <altq/altq_rio.h> +#include <altq/altq_rmclass.h> + +#define FAIRQ_MAX_BUCKETS 2048 /* maximum number of sorting buckets */ +#define FAIRQ_MAXPRI RM_MAXPRIO +#define FAIRQ_BITMAP_WIDTH (sizeof(fairq_bitmap_t)*8) +#define FAIRQ_BITMAP_MASK (FAIRQ_BITMAP_WIDTH - 1) + +/* fairq class flags */ +#define FARF_RED 0x0001 /* use RED */ +#define FARF_ECN 0x0002 /* use RED/ECN */ +#define FARF_RIO 0x0004 /* use RIO */ +#define FARF_CODEL 0x0008 /* use CoDel */ +#define FARF_CLEARDSCP 0x0010 /* clear diffserv codepoint */ +#define FARF_DEFAULTCLASS 0x1000 /* default class */ + +#define FARF_HAS_PACKETS 0x2000 /* might have queued packets */ + +#define FARF_USERFLAGS (FARF_RED|FARF_ECN|FARF_RIO|FARF_CLEARDSCP| \ + FARF_DEFAULTCLASS) + +/* special class handles */ +#define FAIRQ_NULLCLASS_HANDLE 0 + +typedef u_int fairq_bitmap_t; + +struct fairq_classstats { + uint32_t class_handle; + + u_int qlength; + u_int qlimit; + struct pktcntr xmit_cnt; /* transmitted packet counter */ + struct pktcntr drop_cnt; /* dropped packet counter */ + + /* codel, red and rio related info */ + int qtype; + struct redstats red[3]; /* rio has 3 red stats */ + struct codel_stats codel; +}; + +#ifdef _KERNEL + +typedef struct fairq_bucket { + struct fairq_bucket *next; /* circular list */ + struct fairq_bucket *prev; /* circular list */ + class_queue_t queue; /* the actual queue */ + uint64_t bw_bytes; /* statistics used to calculate bw */ + uint64_t bw_delta; /* statistics used to calculate bw */ + uint64_t last_time; + int in_use; +} fairq_bucket_t; + +struct fairq_class { + uint32_t cl_handle; /* class handle */ + u_int cl_nbuckets; /* (power of 2) */ + u_int cl_nbucket_mask; /* bucket mask */ + fairq_bucket_t *cl_buckets; + fairq_bucket_t *cl_head; /* head of circular bucket list */ + fairq_bucket_t *cl_polled; + union { + struct red *cl_red; /* RED state */ + struct codel *cl_codel; /* CoDel state */ + } cl_aqm; +#define cl_red cl_aqm.cl_red +#define cl_codel cl_aqm.cl_codel + u_int cl_hogs_m1; + u_int cl_lssc_m1; + u_int cl_bandwidth; + uint64_t cl_bw_bytes; + uint64_t cl_bw_delta; + uint64_t cl_last_time; + int cl_qtype; /* rollup */ + int cl_qlimit; + int cl_pri; /* priority */ + int cl_flags; /* class flags */ + struct fairq_if *cl_pif; /* back pointer to pif */ + struct altq_pktattr *cl_pktattr; /* saved header used by ECN */ + + /* round robin index */ + + /* statistics */ + struct pktcntr cl_xmitcnt; /* transmitted packet counter */ + struct pktcntr cl_dropcnt; /* dropped packet counter */ +}; + +/* + * fairq interface state + */ +struct fairq_if { + struct fairq_if *pif_next; /* interface state list */ + struct ifaltq *pif_ifq; /* backpointer to ifaltq */ + u_int pif_bandwidth; /* link bandwidth in bps */ + int pif_maxpri; /* max priority in use */ + struct fairq_class *pif_poll_cache;/* cached poll */ + struct fairq_class *pif_default; /* default class */ + struct fairq_class *pif_classes[FAIRQ_MAXPRI]; /* classes */ +}; + +#endif /* _KERNEL */ + +#endif /* _ALTQ_ALTQ_FAIRQ_H_ */ diff --git a/sys/contrib/altq/altq/altq_hfsc.c b/sys/contrib/altq/altq/altq_hfsc.c index 0363016..d4f7ab9 100644 --- a/sys/contrib/altq/altq/altq_hfsc.c +++ b/sys/contrib/altq/altq/altq_hfsc.c @@ -391,6 +391,14 @@ hfsc_class_create(struct hfsc_if *hif, struct service_curve *rsc, return (NULL); } #endif +#ifndef ALTQ_CODEL + if (flags & HFCF_CODEL) { +#ifdef ALTQ_DEBUG + printf("hfsc_class_create: CODEL not configured for HFSC!\n"); +#endif + return (NULL); + } +#endif cl = malloc(sizeof(struct hfsc_class), M_DEVBUF, M_NOWAIT | M_ZERO); if (cl == NULL) @@ -407,6 +415,7 @@ hfsc_class_create(struct hfsc_if *hif, struct service_curve *rsc, qlimit(cl->cl_q) = qlimit; qtype(cl->cl_q) = Q_DROPTAIL; qlen(cl->cl_q) = 0; + qsize(cl->cl_q) = 0; cl->cl_flags = flags; #ifdef ALTQ_RED if (flags & (HFCF_RED|HFCF_RIO)) { @@ -451,6 +460,13 @@ hfsc_class_create(struct hfsc_if *hif, struct service_curve *rsc, #endif } #endif /* ALTQ_RED */ +#ifdef ALTQ_CODEL + if (flags & HFCF_CODEL) { + cl->cl_codel = codel_alloc(5, 100, 0); + if (cl->cl_codel != NULL) + qtype(cl->cl_q) = Q_CODEL; + } +#endif if (rsc != NULL && (rsc->m1 != 0 || rsc->m2 != 0)) { cl->cl_rsc = malloc(sizeof(struct internal_sc), @@ -543,6 +559,10 @@ hfsc_class_create(struct hfsc_if *hif, struct service_curve *rsc, if (q_is_red(cl->cl_q)) red_destroy(cl->cl_red); #endif +#ifdef ALTQ_CODEL + if (q_is_codel(cl->cl_q)) + codel_destroy(cl->cl_codel); +#endif } if (cl->cl_fsc != NULL) free(cl->cl_fsc, M_DEVBUF); @@ -617,6 +637,10 @@ hfsc_class_destroy(struct hfsc_class *cl) if (q_is_red(cl->cl_q)) red_destroy(cl->cl_red); #endif +#ifdef ALTQ_CODEL + if (q_is_codel(cl->cl_q)) + codel_destroy(cl->cl_codel); +#endif } IFQ_LOCK(cl->cl_hif->hif_ifq); @@ -844,6 +868,10 @@ hfsc_addq(struct hfsc_class *cl, struct mbuf *m) if (q_is_red(cl->cl_q)) return red_addq(cl->cl_red, cl->cl_q, m, cl->cl_pktattr); #endif +#ifdef ALTQ_CODEL + if (q_is_codel(cl->cl_q)) + return codel_addq(cl->cl_codel, cl->cl_q, m); +#endif if (qlen(cl->cl_q) >= qlimit(cl->cl_q)) { m_freem(m); return (-1); @@ -868,6 +896,10 @@ hfsc_getq(struct hfsc_class *cl) if (q_is_red(cl->cl_q)) return red_getq(cl->cl_red, cl->cl_q); #endif +#ifdef ALTQ_CODEL + if (q_is_codel(cl->cl_q)) + return codel_getq(cl->cl_codel, cl->cl_q); +#endif return _getq(cl->cl_q); } @@ -1652,6 +1684,10 @@ get_class_stats(struct hfsc_classstats *sp, struct hfsc_class *cl) if (q_is_rio(cl->cl_q)) rio_getstats((rio_t *)cl->cl_red, &sp->red[0]); #endif +#ifdef ALTQ_CODEL + if (q_is_codel(cl->cl_q)) + codel_getstats(cl->cl_codel, &sp->codel); +#endif } /* convert a class handle to the corresponding class pointer */ diff --git a/sys/contrib/altq/altq/altq_hfsc.h b/sys/contrib/altq/altq/altq_hfsc.h index d04b378..b0228c5 100644 --- a/sys/contrib/altq/altq/altq_hfsc.h +++ b/sys/contrib/altq/altq/altq_hfsc.h @@ -34,6 +34,7 @@ #include <altq/altq.h> #include <altq/altq_classq.h> +#include <altq/altq_codel.h> #include <altq/altq_red.h> #include <altq/altq_rio.h> @@ -49,12 +50,13 @@ struct service_curve { /* special class handles */ #define HFSC_NULLCLASS_HANDLE 0 -#define HFSC_MAX_CLASSES 64 +#define HFSC_MAX_CLASSES 2048 /* hfsc class flags */ #define HFCF_RED 0x0001 /* use RED */ #define HFCF_ECN 0x0002 /* use RED/ECN */ #define HFCF_RIO 0x0004 /* use RIO */ +#define HFCF_CODEL 0x0008 /* use CoDel */ #define HFCF_CLEARDSCP 0x0010 /* clear diffserv codepoint */ #define HFCF_DEFAULTCLASS 0x1000 /* default class */ @@ -101,9 +103,10 @@ struct hfsc_classstats { u_int parentperiod; /* parent's vt period seqno */ int nactive; /* number of active children */ - /* red and rio related info */ + /* codel, red and rio related info */ int qtype; struct redstats red[3]; + struct codel_stats codel; }; #ifdef ALTQ3_COMPAT @@ -229,7 +232,12 @@ struct hfsc_class { struct hfsc_class *cl_children; /* child classes */ class_queue_t *cl_q; /* class queue structure */ - struct red *cl_red; /* RED state */ + union { + struct red *cl_red; /* RED state */ + struct codel *cl_codel; /* CoDel state */ + } cl_aqm; +#define cl_red cl_aqm.cl_red +#define cl_codel cl_aqm.cl_codel struct altq_pktattr *cl_pktattr; /* saved header used by ECN */ u_int64_t cl_total; /* total work in bytes */ diff --git a/sys/contrib/altq/altq/altq_priq.c b/sys/contrib/altq/altq/altq_priq.c index 3ce65dc..c77ba40 100644 --- a/sys/contrib/altq/altq/altq_priq.c +++ b/sys/contrib/altq/altq/altq_priq.c @@ -297,6 +297,14 @@ priq_class_create(struct priq_if *pif, int pri, int qlimit, int flags, int qid) return (NULL); } #endif +#ifndef ALTQ_CODEL + if (flags & PRCF_CODEL) { +#ifdef ALTQ_DEBUG + printf("priq_class_create: CODEL not configured for PRIQ!\n"); +#endif + return (NULL); + } +#endif if ((cl = pif->pif_classes[pri]) != NULL) { /* modify the class instead of creating a new one */ @@ -318,6 +326,10 @@ priq_class_create(struct priq_if *pif, int pri, int qlimit, int flags, int qid) if (q_is_red(cl->cl_q)) red_destroy(cl->cl_red); #endif +#ifdef ALTQ_CODEL + if (q_is_codel(cl->cl_q)) + codel_destroy(cl->cl_codel); +#endif } else { cl = malloc(sizeof(struct priq_class), M_DEVBUF, M_NOWAIT | M_ZERO); @@ -338,6 +350,7 @@ priq_class_create(struct priq_if *pif, int pri, int qlimit, int flags, int qid) qlimit(cl->cl_q) = qlimit; qtype(cl->cl_q) = Q_DROPTAIL; qlen(cl->cl_q) = 0; + qsize(cl->cl_q) = 0; cl->cl_flags = flags; cl->cl_pri = pri; if (pri > pif->pif_maxpri) @@ -381,6 +394,13 @@ priq_class_create(struct priq_if *pif, int pri, int qlimit, int flags, int qid) } } #endif /* ALTQ_RED */ +#ifdef ALTQ_CODEL + if (flags & PRCF_CODEL) { + cl->cl_codel = codel_alloc(5, 100, 0); + if (cl->cl_codel != NULL) + qtype(cl->cl_q) = Q_CODEL; + } +#endif return (cl); @@ -394,6 +414,10 @@ priq_class_create(struct priq_if *pif, int pri, int qlimit, int flags, int qid) if (q_is_red(cl->cl_q)) red_destroy(cl->cl_red); #endif +#ifdef ALTQ_CODEL + if (q_is_codel(cl->cl_q)) + codel_destroy(cl->cl_codel); +#endif } if (cl->cl_q != NULL) free(cl->cl_q, M_DEVBUF); @@ -445,6 +469,10 @@ priq_class_destroy(struct priq_class *cl) if (q_is_red(cl->cl_q)) red_destroy(cl->cl_red); #endif +#ifdef ALTQ_CODEL + if (q_is_codel(cl->cl_q)) + codel_destroy(cl->cl_codel); +#endif } free(cl->cl_q, M_DEVBUF); free(cl, M_DEVBUF); @@ -560,6 +588,10 @@ priq_addq(struct priq_class *cl, struct mbuf *m) if (q_is_red(cl->cl_q)) return red_addq(cl->cl_red, cl->cl_q, m, cl->cl_pktattr); #endif +#ifdef ALTQ_CODEL + if (q_is_codel(cl->cl_q)) + return codel_addq(cl->cl_codel, cl->cl_q, m); +#endif if (qlen(cl->cl_q) >= qlimit(cl->cl_q)) { m_freem(m); return (-1); @@ -584,6 +616,10 @@ priq_getq(struct priq_class *cl) if (q_is_red(cl->cl_q)) return red_getq(cl->cl_red, cl->cl_q); #endif +#ifdef ALTQ_CODEL + if (q_is_codel(cl->cl_q)) + return codel_getq(cl->cl_codel, cl->cl_q); +#endif return _getq(cl->cl_q); } @@ -628,7 +664,10 @@ get_class_stats(struct priq_classstats *sp, struct priq_class *cl) if (q_is_rio(cl->cl_q)) rio_getstats((rio_t *)cl->cl_red, &sp->red[0]); #endif - +#ifdef ALTQ_CODEL + if (q_is_codel(cl->cl_q)) + codel_getstats(cl->cl_codel, &sp->codel); +#endif } /* convert a class handle to the corresponding class pointer */ diff --git a/sys/contrib/altq/altq/altq_priq.h b/sys/contrib/altq/altq/altq_priq.h index 481d31b..110847d 100644 --- a/sys/contrib/altq/altq/altq_priq.h +++ b/sys/contrib/altq/altq/altq_priq.h @@ -30,6 +30,7 @@ #include <altq/altq.h> #include <altq/altq_classq.h> +#include <altq/altq_codel.h> #include <altq/altq_red.h> #include <altq/altq_rio.h> @@ -59,6 +60,7 @@ struct priq_add_class { #define PRCF_RED 0x0001 /* use RED */ #define PRCF_ECN 0x0002 /* use RED/ECN */ #define PRCF_RIO 0x0004 /* use RIO */ +#define PRCF_CODEL 0x0008 /* use CoDel */ #define PRCF_CLEARDSCP 0x0010 /* clear diffserv codepoint */ #define PRCF_DEFAULTCLASS 0x1000 /* default class */ @@ -102,9 +104,10 @@ struct priq_classstats { struct pktcntr xmitcnt; /* transmitted packet counter */ struct pktcntr dropcnt; /* dropped packet counter */ - /* red and rio related info */ + /* codel, red and rio related info */ int qtype; struct redstats red[3]; /* rio has 3 red stats */ + struct codel_stats codel; }; #ifdef ALTQ3_COMPAT @@ -134,7 +137,12 @@ struct priq_class_stats { struct priq_class { u_int32_t cl_handle; /* class handle */ class_queue_t *cl_q; /* class queue structure */ - struct red *cl_red; /* RED state */ + union { + struct red *cl_red; /* RED state */ + struct codel *cl_codel; /* CoDel state */ + } cl_aqm; +#define cl_red cl_aqm.cl_red +#define cl_codel cl_aqm.cl_codel int cl_pri; /* priority */ int cl_flags; /* class flags */ struct priq_if *cl_pif; /* back pointer to pif */ diff --git a/sys/contrib/altq/altq/altq_rmclass.c b/sys/contrib/altq/altq/altq_rmclass.c index c433024..656d0ee 100644 --- a/sys/contrib/altq/altq/altq_rmclass.c +++ b/sys/contrib/altq/altq/altq_rmclass.c @@ -68,6 +68,7 @@ #include <altq/if_altq.h> #include <altq/altq.h> +#include <altq/altq_codel.h> #include <altq/altq_rmclass.h> #include <altq/altq_rmclass_debug.h> #include <altq/altq_red.h> @@ -218,6 +219,14 @@ rmc_newclass(int pri, struct rm_ifdat *ifd, u_int nsecPerByte, return (NULL); } #endif +#ifndef ALTQ_CODEL + if (flags & RMCF_CODEL) { +#ifdef ALTQ_DEBUG + printf("rmc_newclass: CODEL not configured for CBQ!\n"); +#endif + return (NULL); + } +#endif cl = malloc(sizeof(struct rm_class), M_DEVBUF, M_NOWAIT | M_ZERO); if (cl == NULL) @@ -302,6 +311,13 @@ rmc_newclass(int pri, struct rm_ifdat *ifd, u_int nsecPerByte, #endif } #endif /* ALTQ_RED */ +#ifdef ALTQ_CODEL + if (flags & RMCF_CODEL) { + cl->codel_ = codel_alloc(5, 100, 0); + if (cl->codel_ != NULL) + qtype(cl->q_) = Q_CODEL; + } +#endif /* * put the class into the class tree @@ -652,6 +668,10 @@ rmc_delete_class(struct rm_ifdat *ifd, struct rm_class *cl) if (q_is_red(cl->q_)) red_destroy(cl->red_); #endif +#ifdef ALTQ_CODEL + if (q_is_codel(cl->q_)) + codel_destroy(cl->codel_); +#endif } free(cl->q_, M_DEVBUF); free(cl, M_DEVBUF); @@ -1618,6 +1638,10 @@ _rmc_addq(rm_class_t *cl, mbuf_t *m) if (q_is_red(cl->q_)) return red_addq(cl->red_, cl->q_, m, cl->pktattr_); #endif /* ALTQ_RED */ +#ifdef ALTQ_CODEL + if (q_is_codel(cl->q_)) + return codel_addq(cl->codel_, cl->q_, m); +#endif if (cl->flags_ & RMCF_CLEARDSCP) write_dsfield(m, cl->pktattr_, 0); @@ -1647,6 +1671,10 @@ _rmc_getq(rm_class_t *cl) if (q_is_red(cl->q_)) return red_getq(cl->red_, cl->q_); #endif +#ifdef ALTQ_CODEL + if (q_is_codel(cl->q_)) + return codel_getq(cl->codel_, cl->q_); +#endif return _getq(cl->q_); } @@ -1717,7 +1745,8 @@ void cbqtrace_dump(int counter) #endif /* CBQ_TRACE */ #endif /* ALTQ_CBQ */ -#if defined(ALTQ_CBQ) || defined(ALTQ_RED) || defined(ALTQ_RIO) || defined(ALTQ_HFSC) || defined(ALTQ_PRIQ) +#if defined(ALTQ_CBQ) || defined(ALTQ_RED) || defined(ALTQ_RIO) || \ + defined(ALTQ_HFSC) || defined(ALTQ_PRIQ) || defined(ALTQ_CODEL) #if !defined(__GNUC__) || defined(ALTQ_DEBUG) void diff --git a/sys/contrib/altq/altq/altq_rmclass.h b/sys/contrib/altq/altq/altq_rmclass.h index cf0ddf4..782bc13 100644 --- a/sys/contrib/altq/altq/altq_rmclass.h +++ b/sys/contrib/altq/altq/altq_rmclass.h @@ -164,7 +164,12 @@ struct rm_class { void (*overlimit)(struct rm_class *, struct rm_class *); void (*drop)(struct rm_class *); /* Class drop action. */ - struct red *red_; /* RED state pointer */ + union { + struct red *red_; /* RED state pointer */ + struct codel *codel_; /* codel state pointer */ + } cl_aqm_; +#define red_ cl_aqm_.red_ +#define codel_ cl_aqm_.codel_ struct altq_pktattr *pktattr_; /* saved hdr used by RED/ECN */ int flags_; @@ -233,6 +238,7 @@ struct rm_ifdat { #define RMCF_RIO 0x0004 #define RMCF_FLOWVALVE 0x0008 /* use flowvalve (aka penalty-box) */ #define RMCF_CLEARDSCP 0x0010 /* clear diffserv codepoint */ +#define RMCF_CODEL 0x0020 /* flags for rmc_init */ #define RMCF_WRR 0x0100 diff --git a/sys/contrib/altq/altq/altq_subr.c b/sys/contrib/altq/altq/altq_subr.c index 16b796a..bddf73f 100644 --- a/sys/contrib/altq/altq/altq_subr.c +++ b/sys/contrib/altq/altq/altq_subr.c @@ -537,6 +537,16 @@ altq_pfattach(struct pf_altq *a) error = hfsc_pfattach(a); break; #endif +#ifdef ALTQ_FAIRQ + case ALTQT_FAIRQ: + error = fairq_pfattach(a); + break; +#endif +#ifdef ALTQ_CODEL + case ALTQT_CODEL: + error = codel_pfattach(a); + break; +#endif default: error = ENXIO; } @@ -612,6 +622,16 @@ altq_add(struct pf_altq *a) error = hfsc_add_altq(a); break; #endif +#ifdef ALTQ_FAIRQ + case ALTQT_FAIRQ: + error = fairq_add_altq(a); + break; +#endif +#ifdef ALTQ_CODEL + case ALTQT_CODEL: + error = codel_add_altq(a); + break; +#endif default: error = ENXIO; } @@ -648,6 +668,16 @@ altq_remove(struct pf_altq *a) error = hfsc_remove_altq(a); break; #endif +#ifdef ALTQ_FAIRQ + case ALTQT_FAIRQ: + error = fairq_remove_altq(a); + break; +#endif +#ifdef ALTQ_CODEL + case ALTQT_CODEL: + error = codel_remove_altq(a); + break; +#endif default: error = ENXIO; } @@ -681,6 +711,11 @@ altq_add_queue(struct pf_altq *a) error = hfsc_add_queue(a); break; #endif +#ifdef ALTQ_FAIRQ + case ALTQT_FAIRQ: + error = fairq_add_queue(a); + break; +#endif default: error = ENXIO; } @@ -714,6 +749,11 @@ altq_remove_queue(struct pf_altq *a) error = hfsc_remove_queue(a); break; #endif +#ifdef ALTQ_FAIRQ + case ALTQT_FAIRQ: + error = fairq_remove_queue(a); + break; +#endif default: error = ENXIO; } @@ -747,6 +787,16 @@ altq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes) error = hfsc_getqstats(a, ubuf, nbytes); break; #endif +#ifdef ALTQ_FAIRQ + case ALTQT_FAIRQ: + error = fairq_getqstats(a, ubuf, nbytes); + break; +#endif +#ifdef ALTQ_CODEL + case ALTQT_CODEL: + error = codel_getqstats(a, ubuf, nbytes); + break; +#endif default: error = ENXIO; } diff --git a/sys/contrib/altq/altq/altq_var.h b/sys/contrib/altq/altq/altq_var.h index 956ee16..355c94c 100644 --- a/sys/contrib/altq/altq/altq_var.h +++ b/sys/contrib/altq/altq/altq_var.h @@ -243,6 +243,11 @@ int cbq_add_queue(struct pf_altq *); int cbq_remove_queue(struct pf_altq *); int cbq_getqstats(struct pf_altq *, void *, int *); +int codel_pfattach(struct pf_altq *); +int codel_add_altq(struct pf_altq *); +int codel_remove_altq(struct pf_altq *); +int codel_getqstats(struct pf_altq *, void *, int *); + int priq_pfattach(struct pf_altq *); int priq_add_altq(struct pf_altq *); int priq_remove_altq(struct pf_altq *); @@ -257,5 +262,12 @@ int hfsc_add_queue(struct pf_altq *); int hfsc_remove_queue(struct pf_altq *); int hfsc_getqstats(struct pf_altq *, void *, int *); +int fairq_pfattach(struct pf_altq *); +int fairq_add_altq(struct pf_altq *); +int fairq_remove_altq(struct pf_altq *); +int fairq_add_queue(struct pf_altq *); +int fairq_remove_queue(struct pf_altq *); +int fairq_getqstats(struct pf_altq *, void *, int *); + #endif /* _KERNEL */ #endif /* _ALTQ_ALTQ_VAR_H_ */ diff --git a/sys/crypto/aesni/aesencdec.h b/sys/crypto/aesni/aesencdec.h index 5e4f128..80951a4 100644 --- a/sys/crypto/aesni/aesencdec.h +++ b/sys/crypto/aesni/aesencdec.h @@ -1,5 +1,6 @@ /*- * Copyright 2013 John-Mark Gurney <jmg@FreeBSD.org> + * Copyright 2015 Netflix, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -27,6 +28,11 @@ * */ +#ifndef _AESENCDEC_H_ +#define _AESENCDEC_H_ + +#include <crypto/aesni/aesni_os.h> + #include <wmmintrin.h> static inline void @@ -103,6 +109,7 @@ aesni_dec8(int rounds, const __m128i *keysched, __m128i a, out[7] = _mm_aesdeclast_si128(h, keysched[i + 1]); } +/* rounds is passed in as rounds - 1 */ static inline __m128i aesni_enc(int rounds, const __m128i *keysched, const __m128i from) { @@ -110,11 +117,13 @@ aesni_enc(int rounds, const __m128i *keysched, const __m128i from) int i; tmp = from ^ keysched[0]; - - for (i = 0; i < rounds; i++) + for (i = 1; i < rounds; i += 2) { + tmp = _mm_aesenc_si128(tmp, keysched[i]); tmp = _mm_aesenc_si128(tmp, keysched[i + 1]); + } - return _mm_aesenclast_si128(tmp, keysched[i + 1]); + tmp = _mm_aesenc_si128(tmp, keysched[rounds]); + return _mm_aesenclast_si128(tmp, keysched[rounds + 1]); } static inline __m128i @@ -125,8 +134,13 @@ aesni_dec(int rounds, const __m128i *keysched, const __m128i from) tmp = from ^ keysched[0]; - for (i = 0; i < rounds; i++) + for (i = 1; i < rounds; i += 2) { + tmp = _mm_aesdec_si128(tmp, keysched[i]); tmp = _mm_aesdec_si128(tmp, keysched[i + 1]); + } - return _mm_aesdeclast_si128(tmp, keysched[i + 1]); + tmp = _mm_aesdec_si128(tmp, keysched[rounds]); + return _mm_aesdeclast_si128(tmp, keysched[rounds + 1]); } + +#endif /* _AESENCDEC_H_ */ diff --git a/sys/crypto/aesni/aesni.c b/sys/crypto/aesni/aesni.c index 7d7a740..602247a 100644 --- a/sys/crypto/aesni/aesni.c +++ b/sys/crypto/aesni/aesni.c @@ -1,8 +1,13 @@ /*- * Copyright (c) 2005-2008 Pawel Jakub Dawidek <pjd@FreeBSD.org> * Copyright (c) 2010 Konstantin Belousov <kib@FreeBSD.org> + * Copyright (c) 2014 The FreeBSD Foundation * All rights reserved. * + * Portions of this software were developed by John-Mark Gurney + * under sponsorship of the FreeBSD Foundation and + * Rubicon Communications, LLC (Netgate). + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: @@ -39,16 +44,37 @@ __FBSDID("$FreeBSD$"); #include <sys/rwlock.h> #include <sys/bus.h> #include <sys/uio.h> +#include <sys/mbuf.h> +#include <sys/mutex.h> +#include <sys/smp.h> #include <crypto/aesni/aesni.h> #include <cryptodev_if.h> +#include <opencrypto/gmac.h> + +static struct mtx_padalign *ctx_mtx; +static struct fpu_kern_ctx **ctx_fpu; struct aesni_softc { + int dieing; int32_t cid; uint32_t sid; TAILQ_HEAD(aesni_sessions_head, aesni_session) sessions; struct rwlock lock; }; +#define AQUIRE_CTX(i, ctx) \ + do { \ + (i) = PCPU_GET(cpuid); \ + mtx_lock(&ctx_mtx[(i)]); \ + (ctx) = ctx_fpu[(i)]; \ + } while (0) +#define RELEASE_CTX(i, ctx) \ + do { \ + mtx_unlock(&ctx_mtx[(i)]); \ + (i) = -1; \ + (ctx) = NULL; \ + } while (0) + static int aesni_newsession(device_t, uint32_t *sidp, struct cryptoini *cri); static int aesni_freesession(device_t, uint64_t tid); static void aesni_freesession_locked(struct aesni_softc *sc, @@ -56,7 +82,7 @@ static void aesni_freesession_locked(struct aesni_softc *sc, static int aesni_cipher_setup(struct aesni_session *ses, struct cryptoini *encini); static int aesni_cipher_process(struct aesni_session *ses, - struct cryptodesc *enccrd, struct cryptop *crp); + struct cryptodesc *enccrd, struct cryptodesc *authcrd, struct cryptop *crp); MALLOC_DEFINE(M_AESNI, "aesni_data", "AESNI Data"); @@ -79,23 +105,45 @@ aesni_probe(device_t dev) return (EINVAL); } - if ((cpu_feature & CPUID_SSE2) == 0) { - device_printf(dev, "No SSE2 support but AESNI!?!\n"); + if ((cpu_feature2 & CPUID2_SSE41) == 0) { + device_printf(dev, "No SSE4.1 support.\n"); return (EINVAL); } - device_set_desc_copy(dev, "AES-CBC,AES-XTS"); + device_set_desc_copy(dev, "AES-CBC,AES-XTS,AES-GCM,AES-ICM"); return (0); } +static void +aensi_cleanctx(void) +{ + int i; + + /* XXX - no way to return driverid */ + CPU_FOREACH(i) { + if (ctx_fpu[i] != NULL) { + mtx_destroy(&ctx_mtx[i]); + fpu_kern_free_ctx(ctx_fpu[i]); + } + ctx_fpu[i] = NULL; + } + free(ctx_mtx, M_AESNI); + ctx_mtx = NULL; + free(ctx_fpu, M_AESNI); + ctx_fpu = NULL; +} + static int aesni_attach(device_t dev) { struct aesni_softc *sc; + int i; sc = device_get_softc(dev); + sc->dieing = 0; TAILQ_INIT(&sc->sessions); sc->sid = 1; + sc->cid = crypto_get_driverid(dev, CRYPTOCAP_F_HARDWARE | CRYPTOCAP_F_SYNC); if (sc->cid < 0) { @@ -103,8 +151,23 @@ aesni_attach(device_t dev) return (ENOMEM); } + ctx_mtx = malloc(sizeof *ctx_mtx * (mp_maxid + 1), M_AESNI, + M_WAITOK|M_ZERO); + ctx_fpu = malloc(sizeof *ctx_fpu * (mp_maxid + 1), M_AESNI, + M_WAITOK|M_ZERO); + + CPU_FOREACH(i) { + ctx_fpu[i] = fpu_kern_alloc_ctx(0); + mtx_init(&ctx_mtx[i], "anifpumtx", NULL, MTX_DEF); + } + rw_init(&sc->lock, "aesni_lock"); crypto_register(sc->cid, CRYPTO_AES_CBC, 0, 0); + crypto_register(sc->cid, CRYPTO_AES_ICM, 0, 0); + crypto_register(sc->cid, CRYPTO_AES_NIST_GCM_16, 0, 0); + crypto_register(sc->cid, CRYPTO_AES_128_NIST_GMAC, 0, 0); + crypto_register(sc->cid, CRYPTO_AES_192_NIST_GMAC, 0, 0); + crypto_register(sc->cid, CRYPTO_AES_256_NIST_GMAC, 0, 0); crypto_register(sc->cid, CRYPTO_AES_XTS, 0, 0); return (0); } @@ -116,6 +179,7 @@ aesni_detach(device_t dev) struct aesni_session *ses; sc = device_get_softc(dev); + rw_wlock(&sc->lock); TAILQ_FOREACH(ses, &sc->sessions, next) { if (ses->used) { @@ -125,14 +189,18 @@ aesni_detach(device_t dev) return (EBUSY); } } + sc->dieing = 1; while ((ses = TAILQ_FIRST(&sc->sessions)) != NULL) { TAILQ_REMOVE(&sc->sessions, ses, next); - fpu_kern_free_ctx(ses->fpu_ctx); free(ses, M_AESNI); } rw_wunlock(&sc->lock); - rw_destroy(&sc->lock); crypto_unregister_all(sc->cid); + + rw_destroy(&sc->lock); + + aensi_cleanctx(); + return (0); } @@ -144,28 +212,52 @@ aesni_newsession(device_t dev, uint32_t *sidp, struct cryptoini *cri) struct cryptoini *encini; int error; - if (sidp == NULL || cri == NULL) + if (sidp == NULL || cri == NULL) { + CRYPTDEB("no sidp or cri"); return (EINVAL); + } sc = device_get_softc(dev); + if (sc->dieing) + return (EINVAL); + ses = NULL; encini = NULL; for (; cri != NULL; cri = cri->cri_next) { switch (cri->cri_alg) { case CRYPTO_AES_CBC: + case CRYPTO_AES_ICM: case CRYPTO_AES_XTS: - if (encini != NULL) + case CRYPTO_AES_NIST_GCM_16: + if (encini != NULL) { + CRYPTDEB("encini already set"); return (EINVAL); + } encini = cri; break; + case CRYPTO_AES_128_NIST_GMAC: + case CRYPTO_AES_192_NIST_GMAC: + case CRYPTO_AES_256_NIST_GMAC: + /* + * nothing to do here, maybe in the future cache some + * values for GHASH + */ + break; default: + CRYPTDEB("unhandled algorithm"); return (EINVAL); } } - if (encini == NULL) + if (encini == NULL) { + CRYPTDEB("no cipher"); return (EINVAL); + } rw_wlock(&sc->lock); + if (sc->dieing) { + rw_wunlock(&sc->lock); + return (EINVAL); + } /* * Free sessions goes first, so if first session is used, we need to * allocate one. @@ -177,13 +269,6 @@ aesni_newsession(device_t dev, uint32_t *sidp, struct cryptoini *cri) rw_wunlock(&sc->lock); return (ENOMEM); } - ses->fpu_ctx = fpu_kern_alloc_ctx(FPU_KERN_NORMAL | - FPU_KERN_NOWAIT); - if (ses->fpu_ctx == NULL) { - free(ses, M_AESNI); - rw_wunlock(&sc->lock); - return (ENOMEM); - } ses->id = sc->sid++; } else { TAILQ_REMOVE(&sc->sessions, ses, next); @@ -195,6 +280,7 @@ aesni_newsession(device_t dev, uint32_t *sidp, struct cryptoini *cri) error = aesni_cipher_setup(ses, encini); if (error != 0) { + CRYPTDEB("setup failed"); rw_wlock(&sc->lock); aesni_freesession_locked(sc, ses); rw_wunlock(&sc->lock); @@ -208,15 +294,14 @@ aesni_newsession(device_t dev, uint32_t *sidp, struct cryptoini *cri) static void aesni_freesession_locked(struct aesni_softc *sc, struct aesni_session *ses) { - struct fpu_kern_ctx *ctx; uint32_t sid; + rw_assert(&sc->lock, RA_WLOCKED); + sid = ses->id; TAILQ_REMOVE(&sc->sessions, ses, next); - ctx = ses->fpu_ctx; - bzero(ses, sizeof(*ses)); + *ses = (struct aesni_session){}; ses->id = sid; - ses->fpu_ctx = ctx; TAILQ_INSERT_HEAD(&sc->sessions, ses, next); } @@ -248,11 +333,13 @@ aesni_process(device_t dev, struct cryptop *crp, int hint __unused) { struct aesni_softc *sc = device_get_softc(dev); struct aesni_session *ses = NULL; - struct cryptodesc *crd, *enccrd; - int error; + struct cryptodesc *crd, *enccrd, *authcrd; + int error, needauth; error = 0; enccrd = NULL; + authcrd = NULL; + needauth = 0; /* Sanity check. */ if (crp == NULL) @@ -266,6 +353,7 @@ aesni_process(device_t dev, struct cryptop *crp, int hint __unused) for (crd = crp->crp_desc; crd != NULL; crd = crd->crd_next) { switch (crd->crd_alg) { case CRYPTO_AES_CBC: + case CRYPTO_AES_ICM: case CRYPTO_AES_XTS: if (enccrd != NULL) { error = EINVAL; @@ -273,11 +361,41 @@ aesni_process(device_t dev, struct cryptop *crp, int hint __unused) } enccrd = crd; break; + + case CRYPTO_AES_NIST_GCM_16: + if (enccrd != NULL) { + error = EINVAL; + goto out; + } + enccrd = crd; + needauth = 1; + break; + + case CRYPTO_AES_128_NIST_GMAC: + case CRYPTO_AES_192_NIST_GMAC: + case CRYPTO_AES_256_NIST_GMAC: + if (authcrd != NULL) { + error = EINVAL; + goto out; + } + authcrd = crd; + needauth = 1; + break; + default: - return (EINVAL); + error = EINVAL; + goto out; } } - if (enccrd == NULL || (enccrd->crd_len % AES_BLOCK_LEN) != 0) { + + if (enccrd == NULL || (needauth && authcrd == NULL)) { + error = EINVAL; + goto out; + } + + /* CBC & XTS can only handle full blocks for now */ + if ((enccrd->crd_alg == CRYPTO_AES_CBC || enccrd->crd_alg == + CRYPTO_AES_XTS) && (enccrd->crd_len % AES_BLOCK_LEN) != 0) { error = EINVAL; goto out; } @@ -293,7 +411,7 @@ aesni_process(device_t dev, struct cryptop *crp, int hint __unused) goto out; } - error = aesni_cipher_process(ses, enccrd, crp); + error = aesni_cipher_process(ses, enccrd, authcrd, crp); if (error != 0) goto out; @@ -307,21 +425,26 @@ uint8_t * aesni_cipher_alloc(struct cryptodesc *enccrd, struct cryptop *crp, int *allocated) { + struct mbuf *m; struct uio *uio; struct iovec *iov; uint8_t *addr; - if (crp->crp_flags & CRYPTO_F_IMBUF) - goto alloc; - else if (crp->crp_flags & CRYPTO_F_IOV) { + if (crp->crp_flags & CRYPTO_F_IMBUF) { + m = (struct mbuf *)crp->crp_buf; + if (m->m_next != NULL) + goto alloc; + addr = mtod(m, uint8_t *); + } else if (crp->crp_flags & CRYPTO_F_IOV) { uio = (struct uio *)crp->crp_buf; if (uio->uio_iovcnt != 1) goto alloc; iov = uio->uio_iov; - addr = (u_char *)iov->iov_base + enccrd->crd_skip; + addr = (uint8_t *)iov->iov_base; } else - addr = (u_char *)crp->crp_buf; + addr = (uint8_t *)crp->crp_buf; *allocated = 0; + addr += enccrd->crd_skip; return (addr); alloc: @@ -362,37 +485,74 @@ MODULE_DEPEND(aesni, crypto, 1, 1, 1); static int aesni_cipher_setup(struct aesni_session *ses, struct cryptoini *encini) { - struct thread *td; + struct fpu_kern_ctx *ctx; int error; + int kt, ctxidx; + + kt = is_fpu_kern_thread(0); + if (!kt) { + AQUIRE_CTX(ctxidx, ctx); + error = fpu_kern_enter(curthread, ctx, + FPU_KERN_NORMAL | FPU_KERN_KTHR); + if (error != 0) + goto out; + } - td = curthread; - error = fpu_kern_enter(td, ses->fpu_ctx, FPU_KERN_NORMAL | - FPU_KERN_KTHR); - if (error != 0) - return (error); error = aesni_cipher_setup_common(ses, encini->cri_key, encini->cri_klen); - fpu_kern_leave(td, ses->fpu_ctx); + + if (!kt) { + fpu_kern_leave(curthread, ctx); +out: + RELEASE_CTX(ctxidx, ctx); + } return (error); } +/* + * authcrd contains the associated date. + */ static int aesni_cipher_process(struct aesni_session *ses, struct cryptodesc *enccrd, - struct cryptop *crp) + struct cryptodesc *authcrd, struct cryptop *crp) { - struct thread *td; - uint8_t *buf; - int error, allocated; + struct fpu_kern_ctx *ctx; + uint8_t iv[AES_BLOCK_LEN]; + uint8_t tag[GMAC_DIGEST_LEN]; + uint8_t *buf, *authbuf; + int error, allocated, authallocated; + int ivlen, encflag; + int kt, ctxidx; + + encflag = (enccrd->crd_flags & CRD_F_ENCRYPT) == CRD_F_ENCRYPT; + + if ((enccrd->crd_alg == CRYPTO_AES_ICM || + enccrd->crd_alg == CRYPTO_AES_NIST_GCM_16) && + (enccrd->crd_flags & CRD_F_IV_EXPLICIT) == 0) + return (EINVAL); buf = aesni_cipher_alloc(enccrd, crp, &allocated); if (buf == NULL) return (ENOMEM); - td = curthread; - error = fpu_kern_enter(td, ses->fpu_ctx, FPU_KERN_NORMAL | - FPU_KERN_KTHR); - if (error != 0) - goto out1; + authbuf = NULL; + authallocated = 0; + if (authcrd != NULL) { + authbuf = aesni_cipher_alloc(authcrd, crp, &authallocated); + if (authbuf == NULL) { + error = ENOMEM; + goto out1; + } + } + + kt = is_fpu_kern_thread(0); + if (!kt) { + AQUIRE_CTX(ctxidx, ctx); + error = fpu_kern_enter(curthread, ctx, + FPU_KERN_NORMAL|FPU_KERN_KTHR); + if (error != 0) + goto out2; + } if ((enccrd->crd_flags & CRD_F_KEY_EXPLICIT) != 0) { error = aesni_cipher_setup_common(ses, enccrd->crd_key, @@ -401,48 +561,105 @@ aesni_cipher_process(struct aesni_session *ses, struct cryptodesc *enccrd, goto out; } - if ((enccrd->crd_flags & CRD_F_ENCRYPT) != 0) { + /* XXX - validate that enccrd and authcrd have/use same key? */ + switch (enccrd->crd_alg) { + case CRYPTO_AES_CBC: + case CRYPTO_AES_ICM: + ivlen = AES_BLOCK_LEN; + break; + case CRYPTO_AES_XTS: + ivlen = 8; + break; + case CRYPTO_AES_NIST_GCM_16: + ivlen = 12; /* should support arbitarily larger */ + break; + } + + /* Setup iv */ + if (encflag) { if ((enccrd->crd_flags & CRD_F_IV_EXPLICIT) != 0) - bcopy(enccrd->crd_iv, ses->iv, AES_BLOCK_LEN); + bcopy(enccrd->crd_iv, iv, ivlen); + else + arc4rand(iv, ivlen, 0); + if ((enccrd->crd_flags & CRD_F_IV_PRESENT) == 0) crypto_copyback(crp->crp_flags, crp->crp_buf, - enccrd->crd_inject, AES_BLOCK_LEN, ses->iv); - if (ses->algo == CRYPTO_AES_CBC) { - aesni_encrypt_cbc(ses->rounds, ses->enc_schedule, - enccrd->crd_len, buf, buf, ses->iv); - } else /* if (ses->algo == CRYPTO_AES_XTS) */ { - aesni_encrypt_xts(ses->rounds, ses->enc_schedule, - ses->xts_schedule, enccrd->crd_len, buf, buf, - ses->iv); - } + enccrd->crd_inject, ivlen, iv); } else { if ((enccrd->crd_flags & CRD_F_IV_EXPLICIT) != 0) - bcopy(enccrd->crd_iv, ses->iv, AES_BLOCK_LEN); + bcopy(enccrd->crd_iv, iv, ivlen); else crypto_copydata(crp->crp_flags, crp->crp_buf, - enccrd->crd_inject, AES_BLOCK_LEN, ses->iv); - if (ses->algo == CRYPTO_AES_CBC) { + enccrd->crd_inject, ivlen, iv); + } + + if (authcrd != NULL && !encflag) + crypto_copydata(crp->crp_flags, crp->crp_buf, + authcrd->crd_inject, GMAC_DIGEST_LEN, tag); + else + bzero(tag, sizeof tag); + + /* Do work */ + switch (ses->algo) { + case CRYPTO_AES_CBC: + if (encflag) + aesni_encrypt_cbc(ses->rounds, ses->enc_schedule, + enccrd->crd_len, buf, buf, iv); + else aesni_decrypt_cbc(ses->rounds, ses->dec_schedule, - enccrd->crd_len, buf, ses->iv); - } else /* if (ses->algo == CRYPTO_AES_XTS) */ { + enccrd->crd_len, buf, iv); + break; + case CRYPTO_AES_ICM: + /* encryption & decryption are the same */ + aesni_encrypt_icm(ses->rounds, ses->enc_schedule, + enccrd->crd_len, buf, buf, iv); + break; + case CRYPTO_AES_XTS: + if (encflag) + aesni_encrypt_xts(ses->rounds, ses->enc_schedule, + ses->xts_schedule, enccrd->crd_len, buf, buf, + iv); + else aesni_decrypt_xts(ses->rounds, ses->dec_schedule, ses->xts_schedule, enccrd->crd_len, buf, buf, - ses->iv); + iv); + break; + case CRYPTO_AES_NIST_GCM_16: + if (encflag) + AES_GCM_encrypt(buf, buf, authbuf, iv, tag, + enccrd->crd_len, authcrd->crd_len, ivlen, + ses->enc_schedule, ses->rounds); + else { + if (!AES_GCM_decrypt(buf, buf, authbuf, iv, tag, + enccrd->crd_len, authcrd->crd_len, ivlen, + ses->enc_schedule, ses->rounds)) + error = EBADMSG; } + break; } + if (allocated) crypto_copyback(crp->crp_flags, crp->crp_buf, enccrd->crd_skip, enccrd->crd_len, buf); - if ((enccrd->crd_flags & CRD_F_ENCRYPT) != 0) - crypto_copydata(crp->crp_flags, crp->crp_buf, - enccrd->crd_skip + enccrd->crd_len - AES_BLOCK_LEN, - AES_BLOCK_LEN, ses->iv); + + if (!error && authcrd != NULL) { + crypto_copyback(crp->crp_flags, crp->crp_buf, + authcrd->crd_inject, GMAC_DIGEST_LEN, tag); + } + out: - fpu_kern_leave(td, ses->fpu_ctx); + if (!kt) { + fpu_kern_leave(curthread, ctx); +out2: + RELEASE_CTX(ctxidx, ctx); + } + out1: if (allocated) { bzero(buf, enccrd->crd_len); free(buf, M_AESNI); } + if (authallocated) + free(authbuf, M_AESNI); return (error); } diff --git a/sys/crypto/aesni/aesni.h b/sys/crypto/aesni/aesni.h index ff1d1a2..3d5adec 100644 --- a/sys/crypto/aesni/aesni.h +++ b/sys/crypto/aesni/aesni.h @@ -56,7 +56,6 @@ struct aesni_session { uint8_t enc_schedule[AES_SCHED_LEN] __aligned(16); uint8_t dec_schedule[AES_SCHED_LEN] __aligned(16); uint8_t xts_schedule[AES_SCHED_LEN] __aligned(16); - uint8_t iv[AES_BLOCK_LEN]; int algo; int rounds; /* uint8_t *ses_ictx; */ @@ -65,7 +64,6 @@ struct aesni_session { int used; uint32_t id; TAILQ_ENTRY(aesni_session) next; - struct fpu_kern_ctx *fpu_ctx; }; /* @@ -88,6 +86,9 @@ void aesni_encrypt_ecb(int rounds, const void *key_schedule /*__aligned(16)*/, size_t len, const uint8_t *from, uint8_t *to); void aesni_decrypt_ecb(int rounds, const void *key_schedule /*__aligned(16)*/, size_t len, const uint8_t *from, uint8_t *to); +void aesni_encrypt_icm(int rounds, const void *key_schedule /*__aligned(16)*/, + size_t len, const uint8_t *from, uint8_t *to, + const uint8_t iv[AES_BLOCK_LEN]); void aesni_encrypt_xts(int rounds, const void *data_schedule /*__aligned(16)*/, const void *tweak_schedule /*__aligned(16)*/, size_t len, @@ -96,6 +97,16 @@ void aesni_decrypt_xts(int rounds, const void *data_schedule /*__aligned(16)*/, const void *tweak_schedule /*__aligned(16)*/, size_t len, const uint8_t *from, uint8_t *to, const uint8_t iv[AES_BLOCK_LEN]); +/* GCM & GHASH functions */ +void AES_GCM_encrypt(const unsigned char *in, unsigned char *out, + const unsigned char *addt, const unsigned char *ivec, + unsigned char *tag, uint32_t nbytes, uint32_t abytes, int ibytes, + const unsigned char *key, int nr); +int AES_GCM_decrypt(const unsigned char *in, unsigned char *out, + const unsigned char *addt, const unsigned char *ivec, + const unsigned char *tag, uint32_t nbytes, uint32_t abytes, int ibytes, + const unsigned char *key, int nr); + int aesni_cipher_setup_common(struct aesni_session *ses, const uint8_t *key, int keylen); uint8_t *aesni_cipher_alloc(struct cryptodesc *enccrd, struct cryptop *crp, diff --git a/sys/crypto/aesni/aesni_ghash.c b/sys/crypto/aesni/aesni_ghash.c new file mode 100644 index 0000000..54c8815 --- /dev/null +++ b/sys/crypto/aesni/aesni_ghash.c @@ -0,0 +1,804 @@ +/*- + * Copyright (c) 2014 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by John-Mark Gurney under + * the sponsorship of the FreeBSD Foundation and + * Rubicon Communications, LLC (Netgate). + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * + * $FreeBSD$ + * + */ + +/* + * Figure 5, 8 and 12 are copied from the Intel white paper: + * Intel® Carry-Less Multiplication Instruction and its Usage for + * Computing the GCM Mode + * + * and as such are: + * Copyright © 2010 Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifdef _KERNEL +#include <crypto/aesni/aesni.h> +#include <crypto/aesni/aesni_os.h> +#else +#include <stdint.h> +#endif + +#include <wmmintrin.h> +#include <emmintrin.h> +#include <smmintrin.h> + +static inline int +m128icmp(__m128i a, __m128i b) +{ + __m128i cmp; + + cmp = _mm_cmpeq_epi32(a, b); + + return _mm_movemask_epi8(cmp) == 0xffff; +} + +#ifdef __i386__ +static inline __m128i +_mm_insert_epi64(__m128i a, int64_t b, const int ndx) +{ + + if (!ndx) { + a = _mm_insert_epi32(a, b, 0); + a = _mm_insert_epi32(a, b >> 32, 1); + } else { + a = _mm_insert_epi32(a, b, 2); + a = _mm_insert_epi32(a, b >> 32, 3); + } + + return a; +} +#endif + +/* some code from carry-less-multiplication-instruction-in-gcm-mode-paper.pdf */ + +/* Figure 5. Code Sample - Performing Ghash Using Algorithms 1 and 5 (C) */ +static void +gfmul(__m128i a, __m128i b, __m128i *res) +{ + __m128i tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8, tmp9; + + tmp3 = _mm_clmulepi64_si128(a, b, 0x00); + tmp4 = _mm_clmulepi64_si128(a, b, 0x10); + tmp5 = _mm_clmulepi64_si128(a, b, 0x01); + tmp6 = _mm_clmulepi64_si128(a, b, 0x11); + + tmp4 = _mm_xor_si128(tmp4, tmp5); + tmp5 = _mm_slli_si128(tmp4, 8); + tmp4 = _mm_srli_si128(tmp4, 8); + tmp3 = _mm_xor_si128(tmp3, tmp5); + tmp6 = _mm_xor_si128(tmp6, tmp4); + + tmp7 = _mm_srli_epi32(tmp3, 31); + tmp8 = _mm_srli_epi32(tmp6, 31); + tmp3 = _mm_slli_epi32(tmp3, 1); + tmp6 = _mm_slli_epi32(tmp6, 1); + + tmp9 = _mm_srli_si128(tmp7, 12); + tmp8 = _mm_slli_si128(tmp8, 4); + tmp7 = _mm_slli_si128(tmp7, 4); + tmp3 = _mm_or_si128(tmp3, tmp7); + tmp6 = _mm_or_si128(tmp6, tmp8); + tmp6 = _mm_or_si128(tmp6, tmp9); + + tmp7 = _mm_slli_epi32(tmp3, 31); + tmp8 = _mm_slli_epi32(tmp3, 30); + tmp9 = _mm_slli_epi32(tmp3, 25); + + tmp7 = _mm_xor_si128(tmp7, tmp8); + tmp7 = _mm_xor_si128(tmp7, tmp9); + tmp8 = _mm_srli_si128(tmp7, 4); + tmp7 = _mm_slli_si128(tmp7, 12); + tmp3 = _mm_xor_si128(tmp3, tmp7); + + tmp2 = _mm_srli_epi32(tmp3, 1); + tmp4 = _mm_srli_epi32(tmp3, 2); + tmp5 = _mm_srli_epi32(tmp3, 7); + tmp2 = _mm_xor_si128(tmp2, tmp4); + tmp2 = _mm_xor_si128(tmp2, tmp5); + tmp2 = _mm_xor_si128(tmp2, tmp8); + tmp3 = _mm_xor_si128(tmp3, tmp2); + tmp6 = _mm_xor_si128(tmp6, tmp3); + + *res = tmp6; +} + +/* + * Figure 8. Code Sample - Performing Ghash Using an Aggregated Reduction + * Method */ +static void +reduce4(__m128i H1, __m128i H2, __m128i H3, __m128i H4, + __m128i X1, __m128i X2, __m128i X3, __m128i X4, __m128i *res) +{ + /*algorithm by Krzysztof Jankowski, Pierre Laurent - Intel*/ + __m128i H1_X1_lo, H1_X1_hi, H2_X2_lo, H2_X2_hi, H3_X3_lo, + H3_X3_hi, H4_X4_lo, H4_X4_hi, lo, hi; + __m128i tmp0, tmp1, tmp2, tmp3; + __m128i tmp4, tmp5, tmp6, tmp7; + __m128i tmp8, tmp9; + + H1_X1_lo = _mm_clmulepi64_si128(H1, X1, 0x00); + H2_X2_lo = _mm_clmulepi64_si128(H2, X2, 0x00); + H3_X3_lo = _mm_clmulepi64_si128(H3, X3, 0x00); + H4_X4_lo = _mm_clmulepi64_si128(H4, X4, 0x00); + + lo = _mm_xor_si128(H1_X1_lo, H2_X2_lo); + lo = _mm_xor_si128(lo, H3_X3_lo); + lo = _mm_xor_si128(lo, H4_X4_lo); + + H1_X1_hi = _mm_clmulepi64_si128(H1, X1, 0x11); + H2_X2_hi = _mm_clmulepi64_si128(H2, X2, 0x11); + H3_X3_hi = _mm_clmulepi64_si128(H3, X3, 0x11); + H4_X4_hi = _mm_clmulepi64_si128(H4, X4, 0x11); + + hi = _mm_xor_si128(H1_X1_hi, H2_X2_hi); + hi = _mm_xor_si128(hi, H3_X3_hi); + hi = _mm_xor_si128(hi, H4_X4_hi); + + tmp0 = _mm_shuffle_epi32(H1, 78); + tmp4 = _mm_shuffle_epi32(X1, 78); + tmp0 = _mm_xor_si128(tmp0, H1); + tmp4 = _mm_xor_si128(tmp4, X1); + tmp1 = _mm_shuffle_epi32(H2, 78); + tmp5 = _mm_shuffle_epi32(X2, 78); + tmp1 = _mm_xor_si128(tmp1, H2); + tmp5 = _mm_xor_si128(tmp5, X2); + tmp2 = _mm_shuffle_epi32(H3, 78); + tmp6 = _mm_shuffle_epi32(X3, 78); + tmp2 = _mm_xor_si128(tmp2, H3); + tmp6 = _mm_xor_si128(tmp6, X3); + tmp3 = _mm_shuffle_epi32(H4, 78); + tmp7 = _mm_shuffle_epi32(X4, 78); + tmp3 = _mm_xor_si128(tmp3, H4); + tmp7 = _mm_xor_si128(tmp7, X4); + + tmp0 = _mm_clmulepi64_si128(tmp0, tmp4, 0x00); + tmp1 = _mm_clmulepi64_si128(tmp1, tmp5, 0x00); + tmp2 = _mm_clmulepi64_si128(tmp2, tmp6, 0x00); + tmp3 = _mm_clmulepi64_si128(tmp3, tmp7, 0x00); + + tmp0 = _mm_xor_si128(tmp0, lo); + tmp0 = _mm_xor_si128(tmp0, hi); + tmp0 = _mm_xor_si128(tmp1, tmp0); + tmp0 = _mm_xor_si128(tmp2, tmp0); + tmp0 = _mm_xor_si128(tmp3, tmp0); + + tmp4 = _mm_slli_si128(tmp0, 8); + tmp0 = _mm_srli_si128(tmp0, 8); + + lo = _mm_xor_si128(tmp4, lo); + hi = _mm_xor_si128(tmp0, hi); + + tmp3 = lo; + tmp6 = hi; + + tmp7 = _mm_srli_epi32(tmp3, 31); + tmp8 = _mm_srli_epi32(tmp6, 31); + tmp3 = _mm_slli_epi32(tmp3, 1); + tmp6 = _mm_slli_epi32(tmp6, 1); + + tmp9 = _mm_srli_si128(tmp7, 12); + tmp8 = _mm_slli_si128(tmp8, 4); + tmp7 = _mm_slli_si128(tmp7, 4); + tmp3 = _mm_or_si128(tmp3, tmp7); + tmp6 = _mm_or_si128(tmp6, tmp8); + tmp6 = _mm_or_si128(tmp6, tmp9); + + tmp7 = _mm_slli_epi32(tmp3, 31); + tmp8 = _mm_slli_epi32(tmp3, 30); + tmp9 = _mm_slli_epi32(tmp3, 25); + + tmp7 = _mm_xor_si128(tmp7, tmp8); + tmp7 = _mm_xor_si128(tmp7, tmp9); + tmp8 = _mm_srli_si128(tmp7, 4); + tmp7 = _mm_slli_si128(tmp7, 12); + tmp3 = _mm_xor_si128(tmp3, tmp7); + + tmp2 = _mm_srli_epi32(tmp3, 1); + tmp4 = _mm_srli_epi32(tmp3, 2); + tmp5 = _mm_srli_epi32(tmp3, 7); + tmp2 = _mm_xor_si128(tmp2, tmp4); + tmp2 = _mm_xor_si128(tmp2, tmp5); + tmp2 = _mm_xor_si128(tmp2, tmp8); + tmp3 = _mm_xor_si128(tmp3, tmp2); + tmp6 = _mm_xor_si128(tmp6, tmp3); + + *res = tmp6; +} + +/* + * Figure 12. AES-GCM: Processing Four Blocks in Parallel with Aggregated + * Every Four Blocks + */ +/* + * per NIST SP-800-38D, 5.2.1.1, len(p) <= 2^39-256 (in bits), or + * 2^32-256*8*16 bytes. + */ +void +AES_GCM_encrypt(const unsigned char *in, unsigned char *out, + const unsigned char *addt, const unsigned char *ivec, + unsigned char *tag, uint32_t nbytes, uint32_t abytes, int ibytes, + const unsigned char *key, int nr) +{ + int i, j ,k; + __m128i tmp1, tmp2, tmp3, tmp4; + __m128i tmp5, tmp6, tmp7, tmp8; + __m128i H, H2, H3, H4, Y, T; + __m128i *KEY = (__m128i*)key; + __m128i ctr1, ctr2, ctr3, ctr4; + __m128i ctr5, ctr6, ctr7, ctr8; + __m128i last_block = _mm_setzero_si128(); + __m128i ONE = _mm_set_epi32(0, 1, 0, 0); + __m128i EIGHT = _mm_set_epi32(0, 8, 0, 0); + __m128i BSWAP_EPI64 = _mm_set_epi8(8,9,10,11,12,13,14,15,0,1,2,3,4,5,6, + 7); + __m128i BSWAP_MASK = _mm_set_epi8(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14, + 15); + __m128i X = _mm_setzero_si128(); + + if (ibytes == 96/8) { + Y = _mm_loadu_si128((__m128i*)ivec); + Y = _mm_insert_epi32(Y, 0x1000000, 3); + /*(Compute E[ZERO, KS] and E[Y0, KS] together*/ + tmp1 = _mm_xor_si128(X, KEY[0]); + tmp2 = _mm_xor_si128(Y, KEY[0]); + for (j=1; j < nr-1; j+=2) { + tmp1 = _mm_aesenc_si128(tmp1, KEY[j]); + tmp2 = _mm_aesenc_si128(tmp2, KEY[j]); + + tmp1 = _mm_aesenc_si128(tmp1, KEY[j+1]); + tmp2 = _mm_aesenc_si128(tmp2, KEY[j+1]); + } + tmp1 = _mm_aesenc_si128(tmp1, KEY[nr-1]); + tmp2 = _mm_aesenc_si128(tmp2, KEY[nr-1]); + + H = _mm_aesenclast_si128(tmp1, KEY[nr]); + T = _mm_aesenclast_si128(tmp2, KEY[nr]); + + H = _mm_shuffle_epi8(H, BSWAP_MASK); + } else { + tmp1 = _mm_xor_si128(X, KEY[0]); + for (j=1; j <nr; j++) + tmp1 = _mm_aesenc_si128(tmp1, KEY[j]); + H = _mm_aesenclast_si128(tmp1, KEY[nr]); + + H = _mm_shuffle_epi8(H, BSWAP_MASK); + Y = _mm_setzero_si128(); + + for (i=0; i < ibytes/16; i++) { + tmp1 = _mm_loadu_si128(&((__m128i*)ivec)[i]); + tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); + Y = _mm_xor_si128(Y, tmp1); + gfmul(Y, H, &Y); + } + if (ibytes%16) { + for (j=0; j < ibytes%16; j++) + ((unsigned char*)&last_block)[j] = ivec[i*16+j]; + tmp1 = last_block; + tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); + Y = _mm_xor_si128(Y, tmp1); + gfmul(Y, H, &Y); + } + tmp1 = _mm_insert_epi64(tmp1, (uint64_t)ibytes*8, 0); + tmp1 = _mm_insert_epi64(tmp1, 0, 1); + + Y = _mm_xor_si128(Y, tmp1); + gfmul(Y, H, &Y); + Y = _mm_shuffle_epi8(Y, BSWAP_MASK); /*Compute E(K, Y0)*/ + tmp1 = _mm_xor_si128(Y, KEY[0]); + for (j=1; j < nr; j++) + tmp1 = _mm_aesenc_si128(tmp1, KEY[j]); + T = _mm_aesenclast_si128(tmp1, KEY[nr]); + } + + gfmul(H,H,&H2); + gfmul(H,H2,&H3); + gfmul(H,H3,&H4); + + for (i=0; i<abytes/16/4; i++) { + tmp1 = _mm_loadu_si128(&((__m128i*)addt)[i*4]); + tmp2 = _mm_loadu_si128(&((__m128i*)addt)[i*4+1]); + tmp3 = _mm_loadu_si128(&((__m128i*)addt)[i*4+2]); + tmp4 = _mm_loadu_si128(&((__m128i*)addt)[i*4+3]); + + tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); + tmp2 = _mm_shuffle_epi8(tmp2, BSWAP_MASK); + tmp3 = _mm_shuffle_epi8(tmp3, BSWAP_MASK); + tmp4 = _mm_shuffle_epi8(tmp4, BSWAP_MASK); + tmp1 = _mm_xor_si128(X, tmp1); + + reduce4(H, H2, H3, H4, tmp4, tmp3, tmp2, tmp1, &X); + } + for (i=i*4; i<abytes/16; i++) { + tmp1 = _mm_loadu_si128(&((__m128i*)addt)[i]); + tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); + X = _mm_xor_si128(X,tmp1); + gfmul(X, H, &X); + } + if (abytes%16) { + last_block = _mm_setzero_si128(); + for (j=0; j<abytes%16; j++) + ((unsigned char*)&last_block)[j] = addt[i*16+j]; + tmp1 = last_block; + tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); + X =_mm_xor_si128(X,tmp1); + gfmul(X,H,&X); + } + + ctr1 = _mm_shuffle_epi8(Y, BSWAP_EPI64); + ctr1 = _mm_add_epi64(ctr1, ONE); + ctr2 = _mm_add_epi64(ctr1, ONE); + ctr3 = _mm_add_epi64(ctr2, ONE); + ctr4 = _mm_add_epi64(ctr3, ONE); + ctr5 = _mm_add_epi64(ctr4, ONE); + ctr6 = _mm_add_epi64(ctr5, ONE); + ctr7 = _mm_add_epi64(ctr6, ONE); + ctr8 = _mm_add_epi64(ctr7, ONE); + + for (i=0; i<nbytes/16/8; i++) { + tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64); + tmp2 = _mm_shuffle_epi8(ctr2, BSWAP_EPI64); + tmp3 = _mm_shuffle_epi8(ctr3, BSWAP_EPI64); + tmp4 = _mm_shuffle_epi8(ctr4, BSWAP_EPI64); + tmp5 = _mm_shuffle_epi8(ctr5, BSWAP_EPI64); + tmp6 = _mm_shuffle_epi8(ctr6, BSWAP_EPI64); + tmp7 = _mm_shuffle_epi8(ctr7, BSWAP_EPI64); + tmp8 = _mm_shuffle_epi8(ctr8, BSWAP_EPI64); + + ctr1 = _mm_add_epi64(ctr1, EIGHT); + ctr2 = _mm_add_epi64(ctr2, EIGHT); + ctr3 = _mm_add_epi64(ctr3, EIGHT); + ctr4 = _mm_add_epi64(ctr4, EIGHT); + ctr5 = _mm_add_epi64(ctr5, EIGHT); + ctr6 = _mm_add_epi64(ctr6, EIGHT); + ctr7 = _mm_add_epi64(ctr7, EIGHT); + ctr8 = _mm_add_epi64(ctr8, EIGHT); + + tmp1 =_mm_xor_si128(tmp1, KEY[0]); + tmp2 =_mm_xor_si128(tmp2, KEY[0]); + tmp3 =_mm_xor_si128(tmp3, KEY[0]); + tmp4 =_mm_xor_si128(tmp4, KEY[0]); + tmp5 =_mm_xor_si128(tmp5, KEY[0]); + tmp6 =_mm_xor_si128(tmp6, KEY[0]); + tmp7 =_mm_xor_si128(tmp7, KEY[0]); + tmp8 =_mm_xor_si128(tmp8, KEY[0]); + + for (j=1; j<nr; j++) { + tmp1 = _mm_aesenc_si128(tmp1, KEY[j]); + tmp2 = _mm_aesenc_si128(tmp2, KEY[j]); + tmp3 = _mm_aesenc_si128(tmp3, KEY[j]); + tmp4 = _mm_aesenc_si128(tmp4, KEY[j]); + tmp5 = _mm_aesenc_si128(tmp5, KEY[j]); + tmp6 = _mm_aesenc_si128(tmp6, KEY[j]); + tmp7 = _mm_aesenc_si128(tmp7, KEY[j]); + tmp8 = _mm_aesenc_si128(tmp8, KEY[j]); + } + tmp1 =_mm_aesenclast_si128(tmp1, KEY[nr]); + tmp2 =_mm_aesenclast_si128(tmp2, KEY[nr]); + tmp3 =_mm_aesenclast_si128(tmp3, KEY[nr]); + tmp4 =_mm_aesenclast_si128(tmp4, KEY[nr]); + tmp5 =_mm_aesenclast_si128(tmp5, KEY[nr]); + tmp6 =_mm_aesenclast_si128(tmp6, KEY[nr]); + tmp7 =_mm_aesenclast_si128(tmp7, KEY[nr]); + tmp8 =_mm_aesenclast_si128(tmp8, KEY[nr]); + + tmp1 = _mm_xor_si128(tmp1, + _mm_loadu_si128(&((__m128i*)in)[i*8+0])); + tmp2 = _mm_xor_si128(tmp2, + _mm_loadu_si128(&((__m128i*)in)[i*8+1])); + tmp3 = _mm_xor_si128(tmp3, + _mm_loadu_si128(&((__m128i*)in)[i*8+2])); + tmp4 = _mm_xor_si128(tmp4, + _mm_loadu_si128(&((__m128i*)in)[i*8+3])); + tmp5 = _mm_xor_si128(tmp5, + _mm_loadu_si128(&((__m128i*)in)[i*8+4])); + tmp6 = _mm_xor_si128(tmp6, + _mm_loadu_si128(&((__m128i*)in)[i*8+5])); + tmp7 = _mm_xor_si128(tmp7, + _mm_loadu_si128(&((__m128i*)in)[i*8+6])); + tmp8 = _mm_xor_si128(tmp8, + _mm_loadu_si128(&((__m128i*)in)[i*8+7])); + + _mm_storeu_si128(&((__m128i*)out)[i*8+0], tmp1); + _mm_storeu_si128(&((__m128i*)out)[i*8+1], tmp2); + _mm_storeu_si128(&((__m128i*)out)[i*8+2], tmp3); + _mm_storeu_si128(&((__m128i*)out)[i*8+3], tmp4); + _mm_storeu_si128(&((__m128i*)out)[i*8+4], tmp5); + _mm_storeu_si128(&((__m128i*)out)[i*8+5], tmp6); + _mm_storeu_si128(&((__m128i*)out)[i*8+6], tmp7); + _mm_storeu_si128(&((__m128i*)out)[i*8+7], tmp8); + + tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); + tmp2 = _mm_shuffle_epi8(tmp2, BSWAP_MASK); + tmp3 = _mm_shuffle_epi8(tmp3, BSWAP_MASK); + tmp4 = _mm_shuffle_epi8(tmp4, BSWAP_MASK); + tmp5 = _mm_shuffle_epi8(tmp5, BSWAP_MASK); + tmp6 = _mm_shuffle_epi8(tmp6, BSWAP_MASK); + tmp7 = _mm_shuffle_epi8(tmp7, BSWAP_MASK); + tmp8 = _mm_shuffle_epi8(tmp8, BSWAP_MASK); + + tmp1 = _mm_xor_si128(X, tmp1); + + reduce4(H, H2, H3, H4, tmp4, tmp3, tmp2, tmp1, &X); + + tmp5 = _mm_xor_si128(X, tmp5); + reduce4(H, H2, H3, H4, tmp8, tmp7, tmp6, tmp5, &X); + } + for (k=i*8; k<nbytes/16; k++) { + tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64); + ctr1 = _mm_add_epi64(ctr1, ONE); + tmp1 = _mm_xor_si128(tmp1, KEY[0]); + for (j=1; j<nr-1; j+=2) { + tmp1 = _mm_aesenc_si128(tmp1, KEY[j]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[j+1]); + } + tmp1 = _mm_aesenc_si128(tmp1, KEY[nr-1]); + tmp1 = _mm_aesenclast_si128(tmp1, KEY[nr]); + tmp1 = _mm_xor_si128(tmp1, _mm_loadu_si128(&((__m128i*)in)[k])); + _mm_storeu_si128(&((__m128i*)out)[k], tmp1); + tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); + X = _mm_xor_si128(X, tmp1); + gfmul(X,H,&X); + } + //If remains one incomplete block + if (nbytes%16) { + tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64); + tmp1 = _mm_xor_si128(tmp1, KEY[0]); + for (j=1; j<nr-1; j+=2) { + tmp1 = _mm_aesenc_si128(tmp1, KEY[j]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[j+1]); + } + tmp1 = _mm_aesenc_si128(tmp1, KEY[nr-1]); + tmp1 = _mm_aesenclast_si128(tmp1, KEY[nr]); + tmp1 = _mm_xor_si128(tmp1, _mm_loadu_si128(&((__m128i*)in)[k])); + last_block = tmp1; + for (j=0; j<nbytes%16; j++) + out[k*16+j] = ((unsigned char*)&last_block)[j]; + for ((void)j; j<16; j++) + ((unsigned char*)&last_block)[j] = 0; + tmp1 = last_block; + tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); + X = _mm_xor_si128(X, tmp1); + gfmul(X, H, &X); + } + tmp1 = _mm_insert_epi64(tmp1, (uint64_t)nbytes*8, 0); + tmp1 = _mm_insert_epi64(tmp1, (uint64_t)abytes*8, 1); + + X = _mm_xor_si128(X, tmp1); + gfmul(X,H,&X); + X = _mm_shuffle_epi8(X, BSWAP_MASK); + T = _mm_xor_si128(X, T); + _mm_storeu_si128((__m128i*)tag, T); +} + +/* My modification of _encrypt to be _decrypt */ +int +AES_GCM_decrypt(const unsigned char *in, unsigned char *out, + const unsigned char *addt, const unsigned char *ivec, + const unsigned char *tag, uint32_t nbytes, uint32_t abytes, int ibytes, + const unsigned char *key, int nr) +{ + int i, j ,k; + __m128i tmp1, tmp2, tmp3, tmp4; + __m128i tmp5, tmp6, tmp7, tmp8; + __m128i H, H2, H3, H4, Y, T; + __m128i *KEY = (__m128i*)key; + __m128i ctr1, ctr2, ctr3, ctr4; + __m128i ctr5, ctr6, ctr7, ctr8; + __m128i last_block = _mm_setzero_si128(); + __m128i ONE = _mm_set_epi32(0, 1, 0, 0); + __m128i EIGHT = _mm_set_epi32(0, 8, 0, 0); + __m128i BSWAP_EPI64 = _mm_set_epi8(8,9,10,11,12,13,14,15,0,1,2,3,4,5,6, + 7); + __m128i BSWAP_MASK = _mm_set_epi8(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14, + 15); + __m128i X = _mm_setzero_si128(); + + if (ibytes == 96/8) { + Y = _mm_loadu_si128((__m128i*)ivec); + Y = _mm_insert_epi32(Y, 0x1000000, 3); + /*(Compute E[ZERO, KS] and E[Y0, KS] together*/ + tmp1 = _mm_xor_si128(X, KEY[0]); + tmp2 = _mm_xor_si128(Y, KEY[0]); + for (j=1; j < nr-1; j+=2) { + tmp1 = _mm_aesenc_si128(tmp1, KEY[j]); + tmp2 = _mm_aesenc_si128(tmp2, KEY[j]); + + tmp1 = _mm_aesenc_si128(tmp1, KEY[j+1]); + tmp2 = _mm_aesenc_si128(tmp2, KEY[j+1]); + } + tmp1 = _mm_aesenc_si128(tmp1, KEY[nr-1]); + tmp2 = _mm_aesenc_si128(tmp2, KEY[nr-1]); + + H = _mm_aesenclast_si128(tmp1, KEY[nr]); + T = _mm_aesenclast_si128(tmp2, KEY[nr]); + + H = _mm_shuffle_epi8(H, BSWAP_MASK); + } else { + tmp1 = _mm_xor_si128(X, KEY[0]); + for (j=1; j <nr; j++) + tmp1 = _mm_aesenc_si128(tmp1, KEY[j]); + H = _mm_aesenclast_si128(tmp1, KEY[nr]); + + H = _mm_shuffle_epi8(H, BSWAP_MASK); + Y = _mm_setzero_si128(); + + for (i=0; i < ibytes/16; i++) { + tmp1 = _mm_loadu_si128(&((__m128i*)ivec)[i]); + tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); + Y = _mm_xor_si128(Y, tmp1); + gfmul(Y, H, &Y); + } + if (ibytes%16) { + for (j=0; j < ibytes%16; j++) + ((unsigned char*)&last_block)[j] = ivec[i*16+j]; + tmp1 = last_block; + tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); + Y = _mm_xor_si128(Y, tmp1); + gfmul(Y, H, &Y); + } + tmp1 = _mm_insert_epi64(tmp1, (uint64_t)ibytes*8, 0); + tmp1 = _mm_insert_epi64(tmp1, 0, 1); + + Y = _mm_xor_si128(Y, tmp1); + gfmul(Y, H, &Y); + Y = _mm_shuffle_epi8(Y, BSWAP_MASK); /*Compute E(K, Y0)*/ + tmp1 = _mm_xor_si128(Y, KEY[0]); + for (j=1; j < nr; j++) + tmp1 = _mm_aesenc_si128(tmp1, KEY[j]); + T = _mm_aesenclast_si128(tmp1, KEY[nr]); + } + + gfmul(H,H,&H2); + gfmul(H,H2,&H3); + gfmul(H,H3,&H4); + + for (i=0; i<abytes/16/4; i++) { + tmp1 = _mm_loadu_si128(&((__m128i*)addt)[i*4]); + tmp2 = _mm_loadu_si128(&((__m128i*)addt)[i*4+1]); + tmp3 = _mm_loadu_si128(&((__m128i*)addt)[i*4+2]); + tmp4 = _mm_loadu_si128(&((__m128i*)addt)[i*4+3]); + + tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); + tmp2 = _mm_shuffle_epi8(tmp2, BSWAP_MASK); + tmp3 = _mm_shuffle_epi8(tmp3, BSWAP_MASK); + tmp4 = _mm_shuffle_epi8(tmp4, BSWAP_MASK); + + tmp1 = _mm_xor_si128(X, tmp1); + + reduce4(H, H2, H3, H4, tmp4, tmp3, tmp2, tmp1, &X); + } + for (i=i*4; i<abytes/16; i++) { + tmp1 = _mm_loadu_si128(&((__m128i*)addt)[i]); + tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); + X = _mm_xor_si128(X,tmp1); + gfmul(X, H, &X); + } + if (abytes%16) { + last_block = _mm_setzero_si128(); + for (j=0; j<abytes%16; j++) + ((unsigned char*)&last_block)[j] = addt[i*16+j]; + tmp1 = last_block; + tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); + X =_mm_xor_si128(X,tmp1); + gfmul(X,H,&X); + } + + /* This is where we validate the cipher text before decrypt */ + for (i = 0; i<nbytes/16/4; i++) { + tmp1 = _mm_loadu_si128(&((__m128i*)in)[i*4]); + tmp2 = _mm_loadu_si128(&((__m128i*)in)[i*4+1]); + tmp3 = _mm_loadu_si128(&((__m128i*)in)[i*4+2]); + tmp4 = _mm_loadu_si128(&((__m128i*)in)[i*4+3]); + + tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); + tmp2 = _mm_shuffle_epi8(tmp2, BSWAP_MASK); + tmp3 = _mm_shuffle_epi8(tmp3, BSWAP_MASK); + tmp4 = _mm_shuffle_epi8(tmp4, BSWAP_MASK); + + tmp1 = _mm_xor_si128(X, tmp1); + + reduce4(H, H2, H3, H4, tmp4, tmp3, tmp2, tmp1, &X); + } + for (i = i*4; i<nbytes/16; i++) { + tmp1 = _mm_loadu_si128(&((__m128i*)in)[i]); + tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); + X = _mm_xor_si128(X, tmp1); + gfmul(X,H,&X); + } + if (nbytes%16) { + last_block = _mm_setzero_si128(); + for (j=0; j<nbytes%16; j++) + ((unsigned char*)&last_block)[j] = in[i*16+j]; + tmp1 = last_block; + tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); + X = _mm_xor_si128(X, tmp1); + gfmul(X, H, &X); + } + + tmp1 = _mm_insert_epi64(tmp1, (uint64_t)nbytes*8, 0); + tmp1 = _mm_insert_epi64(tmp1, (uint64_t)abytes*8, 1); + + X = _mm_xor_si128(X, tmp1); + gfmul(X,H,&X); + X = _mm_shuffle_epi8(X, BSWAP_MASK); + T = _mm_xor_si128(X, T); + + if (!m128icmp(T, _mm_loadu_si128((const __m128i*)tag))) + return 0; //in case the authentication failed + + ctr1 = _mm_shuffle_epi8(Y, BSWAP_EPI64); + ctr1 = _mm_add_epi64(ctr1, ONE); + ctr2 = _mm_add_epi64(ctr1, ONE); + ctr3 = _mm_add_epi64(ctr2, ONE); + ctr4 = _mm_add_epi64(ctr3, ONE); + ctr5 = _mm_add_epi64(ctr4, ONE); + ctr6 = _mm_add_epi64(ctr5, ONE); + ctr7 = _mm_add_epi64(ctr6, ONE); + ctr8 = _mm_add_epi64(ctr7, ONE); + + for (i=0; i<nbytes/16/8; i++) { + tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64); + tmp2 = _mm_shuffle_epi8(ctr2, BSWAP_EPI64); + tmp3 = _mm_shuffle_epi8(ctr3, BSWAP_EPI64); + tmp4 = _mm_shuffle_epi8(ctr4, BSWAP_EPI64); + tmp5 = _mm_shuffle_epi8(ctr5, BSWAP_EPI64); + tmp6 = _mm_shuffle_epi8(ctr6, BSWAP_EPI64); + tmp7 = _mm_shuffle_epi8(ctr7, BSWAP_EPI64); + tmp8 = _mm_shuffle_epi8(ctr8, BSWAP_EPI64); + + ctr1 = _mm_add_epi64(ctr1, EIGHT); + ctr2 = _mm_add_epi64(ctr2, EIGHT); + ctr3 = _mm_add_epi64(ctr3, EIGHT); + ctr4 = _mm_add_epi64(ctr4, EIGHT); + ctr5 = _mm_add_epi64(ctr5, EIGHT); + ctr6 = _mm_add_epi64(ctr6, EIGHT); + ctr7 = _mm_add_epi64(ctr7, EIGHT); + ctr8 = _mm_add_epi64(ctr8, EIGHT); + + tmp1 =_mm_xor_si128(tmp1, KEY[0]); + tmp2 =_mm_xor_si128(tmp2, KEY[0]); + tmp3 =_mm_xor_si128(tmp3, KEY[0]); + tmp4 =_mm_xor_si128(tmp4, KEY[0]); + tmp5 =_mm_xor_si128(tmp5, KEY[0]); + tmp6 =_mm_xor_si128(tmp6, KEY[0]); + tmp7 =_mm_xor_si128(tmp7, KEY[0]); + tmp8 =_mm_xor_si128(tmp8, KEY[0]); + + for (j=1; j<nr; j++) { + tmp1 = _mm_aesenc_si128(tmp1, KEY[j]); + tmp2 = _mm_aesenc_si128(tmp2, KEY[j]); + tmp3 = _mm_aesenc_si128(tmp3, KEY[j]); + tmp4 = _mm_aesenc_si128(tmp4, KEY[j]); + tmp5 = _mm_aesenc_si128(tmp5, KEY[j]); + tmp6 = _mm_aesenc_si128(tmp6, KEY[j]); + tmp7 = _mm_aesenc_si128(tmp7, KEY[j]); + tmp8 = _mm_aesenc_si128(tmp8, KEY[j]); + } + tmp1 =_mm_aesenclast_si128(tmp1, KEY[nr]); + tmp2 =_mm_aesenclast_si128(tmp2, KEY[nr]); + tmp3 =_mm_aesenclast_si128(tmp3, KEY[nr]); + tmp4 =_mm_aesenclast_si128(tmp4, KEY[nr]); + tmp5 =_mm_aesenclast_si128(tmp5, KEY[nr]); + tmp6 =_mm_aesenclast_si128(tmp6, KEY[nr]); + tmp7 =_mm_aesenclast_si128(tmp7, KEY[nr]); + tmp8 =_mm_aesenclast_si128(tmp8, KEY[nr]); + + tmp1 = _mm_xor_si128(tmp1, + _mm_loadu_si128(&((__m128i*)in)[i*8+0])); + tmp2 = _mm_xor_si128(tmp2, + _mm_loadu_si128(&((__m128i*)in)[i*8+1])); + tmp3 = _mm_xor_si128(tmp3, + _mm_loadu_si128(&((__m128i*)in)[i*8+2])); + tmp4 = _mm_xor_si128(tmp4, + _mm_loadu_si128(&((__m128i*)in)[i*8+3])); + tmp5 = _mm_xor_si128(tmp5, + _mm_loadu_si128(&((__m128i*)in)[i*8+4])); + tmp6 = _mm_xor_si128(tmp6, + _mm_loadu_si128(&((__m128i*)in)[i*8+5])); + tmp7 = _mm_xor_si128(tmp7, + _mm_loadu_si128(&((__m128i*)in)[i*8+6])); + tmp8 = _mm_xor_si128(tmp8, + _mm_loadu_si128(&((__m128i*)in)[i*8+7])); + + _mm_storeu_si128(&((__m128i*)out)[i*8+0], tmp1); + _mm_storeu_si128(&((__m128i*)out)[i*8+1], tmp2); + _mm_storeu_si128(&((__m128i*)out)[i*8+2], tmp3); + _mm_storeu_si128(&((__m128i*)out)[i*8+3], tmp4); + _mm_storeu_si128(&((__m128i*)out)[i*8+4], tmp5); + _mm_storeu_si128(&((__m128i*)out)[i*8+5], tmp6); + _mm_storeu_si128(&((__m128i*)out)[i*8+6], tmp7); + _mm_storeu_si128(&((__m128i*)out)[i*8+7], tmp8); + + tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); + tmp2 = _mm_shuffle_epi8(tmp2, BSWAP_MASK); + tmp3 = _mm_shuffle_epi8(tmp3, BSWAP_MASK); + tmp4 = _mm_shuffle_epi8(tmp4, BSWAP_MASK); + tmp5 = _mm_shuffle_epi8(tmp5, BSWAP_MASK); + tmp6 = _mm_shuffle_epi8(tmp6, BSWAP_MASK); + tmp7 = _mm_shuffle_epi8(tmp7, BSWAP_MASK); + tmp8 = _mm_shuffle_epi8(tmp8, BSWAP_MASK); + } + for (k=i*8; k<nbytes/16; k++) { + tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64); + ctr1 = _mm_add_epi64(ctr1, ONE); + tmp1 = _mm_xor_si128(tmp1, KEY[0]); + for (j=1; j<nr-1; j+=2) { + tmp1 = _mm_aesenc_si128(tmp1, KEY[j]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[j+1]); + } + tmp1 = _mm_aesenc_si128(tmp1, KEY[nr-1]); + tmp1 = _mm_aesenclast_si128(tmp1, KEY[nr]); + tmp1 = _mm_xor_si128(tmp1, _mm_loadu_si128(&((__m128i*)in)[k])); + _mm_storeu_si128(&((__m128i*)out)[k], tmp1); + } + //If remains one incomplete block + if (nbytes%16) { + tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64); + tmp1 = _mm_xor_si128(tmp1, KEY[0]); + for (j=1; j<nr-1; j+=2) { + tmp1 = _mm_aesenc_si128(tmp1, KEY[j]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[j+1]); + } + tmp1 = _mm_aesenc_si128(tmp1, KEY[nr-1]); + tmp1 = _mm_aesenclast_si128(tmp1, KEY[nr]); + tmp1 = _mm_xor_si128(tmp1, _mm_loadu_si128(&((__m128i*)in)[k])); + last_block = tmp1; + for (j=0; j<nbytes%16; j++) + out[k*16+j] = ((unsigned char*)&last_block)[j]; + } + return 1; //when sucessfull returns 1 +} diff --git a/sys/crypto/aesni/aesni_os.h b/sys/crypto/aesni/aesni_os.h new file mode 100644 index 0000000..273c3f9 --- /dev/null +++ b/sys/crypto/aesni/aesni_os.h @@ -0,0 +1,33 @@ +/*- + * Copyright 2015 Craig Rodrigues <rodrigc@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + * + */ + +#if defined(__GNUC__) && defined(_KERNEL) +/* Suppress inclusion of gcc's mm_malloc.h header */ +#define _MM_MALLOC_H_INCLUDED 1 +#endif diff --git a/sys/crypto/aesni/aesni_wrap.c b/sys/crypto/aesni/aesni_wrap.c index 39819a6..e5e2a69 100644 --- a/sys/crypto/aesni/aesni_wrap.c +++ b/sys/crypto/aesni/aesni_wrap.c @@ -3,8 +3,13 @@ * Copyright (c) 2010 Konstantin Belousov <kib@FreeBSD.org> * Copyright (c) 2010-2011 Pawel Jakub Dawidek <pawel@dawidek.net> * Copyright 2012-2013 John-Mark Gurney <jmg@FreeBSD.org> + * Copyright (c) 2014 The FreeBSD Foundation * All rights reserved. * + * Portions of this software were developed by John-Mark Gurney + * under sponsorship of the FreeBSD Foundation and + * Rubicon Communications, LLC (Netgate). + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: @@ -29,15 +34,18 @@ #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); - + #include <sys/param.h> #include <sys/libkern.h> #include <sys/malloc.h> #include <sys/proc.h> #include <sys/systm.h> #include <crypto/aesni/aesni.h> - + +#include <opencrypto/gmac.h> + #include "aesencdec.h" +#include <smmintrin.h> MALLOC_DECLARE(M_AESNI); @@ -176,6 +184,104 @@ aesni_decrypt_ecb(int rounds, const void *key_schedule, size_t len, } } +/* + * mixed endian increment, low 64bits stored in hi word to be compatible + * with _icm's BSWAP. + */ +static inline __m128i +nextc(__m128i x) +{ + const __m128i ONE = _mm_setr_epi32(0, 0, 1, 0); + const __m128i ZERO = _mm_setzero_si128(); + + x = _mm_add_epi64(x, ONE); + __m128i t = _mm_cmpeq_epi64(x, ZERO); + t = _mm_unpackhi_epi64(t, ZERO); + x = _mm_sub_epi64(x, t); + + return x; +} + +void +aesni_encrypt_icm(int rounds, const void *key_schedule, size_t len, + const uint8_t *from, uint8_t *to, const uint8_t iv[AES_BLOCK_LEN]) +{ + __m128i tot; + __m128i tmp1, tmp2, tmp3, tmp4; + __m128i tmp5, tmp6, tmp7, tmp8; + __m128i ctr1, ctr2, ctr3, ctr4; + __m128i ctr5, ctr6, ctr7, ctr8; + __m128i BSWAP_EPI64; + __m128i tout[8]; + struct blocks8 *top; + const struct blocks8 *blks; + size_t i, cnt; + + BSWAP_EPI64 = _mm_set_epi8(8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7); + + ctr1 = _mm_loadu_si128((__m128i*)iv); + ctr1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64); + + cnt = len / AES_BLOCK_LEN / 8; + for (i = 0; i < cnt; i++) { + tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64); + ctr2 = nextc(ctr1); + tmp2 = _mm_shuffle_epi8(ctr2, BSWAP_EPI64); + ctr3 = nextc(ctr2); + tmp3 = _mm_shuffle_epi8(ctr3, BSWAP_EPI64); + ctr4 = nextc(ctr3); + tmp4 = _mm_shuffle_epi8(ctr4, BSWAP_EPI64); + ctr5 = nextc(ctr4); + tmp5 = _mm_shuffle_epi8(ctr5, BSWAP_EPI64); + ctr6 = nextc(ctr5); + tmp6 = _mm_shuffle_epi8(ctr6, BSWAP_EPI64); + ctr7 = nextc(ctr6); + tmp7 = _mm_shuffle_epi8(ctr7, BSWAP_EPI64); + ctr8 = nextc(ctr7); + tmp8 = _mm_shuffle_epi8(ctr8, BSWAP_EPI64); + ctr1 = nextc(ctr8); + + blks = (const struct blocks8 *)from; + top = (struct blocks8 *)to; + aesni_enc8(rounds - 1, key_schedule, tmp1, tmp2, tmp3, tmp4, + tmp5, tmp6, tmp7, tmp8, tout); + + top->blk[0] = blks->blk[0] ^ tout[0]; + top->blk[1] = blks->blk[1] ^ tout[1]; + top->blk[2] = blks->blk[2] ^ tout[2]; + top->blk[3] = blks->blk[3] ^ tout[3]; + top->blk[4] = blks->blk[4] ^ tout[4]; + top->blk[5] = blks->blk[5] ^ tout[5]; + top->blk[6] = blks->blk[6] ^ tout[6]; + top->blk[7] = blks->blk[7] ^ tout[7]; + + from += AES_BLOCK_LEN * 8; + to += AES_BLOCK_LEN * 8; + } + i *= 8; + cnt = len / AES_BLOCK_LEN; + for (; i < cnt; i++) { + tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64); + ctr1 = nextc(ctr1); + + tot = aesni_enc(rounds - 1, key_schedule, tmp1); + + tot = tot ^ _mm_loadu_si128((const __m128i *)from); + _mm_storeu_si128((__m128i *)to, tot); + + from += AES_BLOCK_LEN; + to += AES_BLOCK_LEN; + } + + /* handle remaining partial round */ + if (len % AES_BLOCK_LEN != 0) { + tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64); + tot = aesni_enc(rounds - 1, key_schedule, tmp1); + tot = tot ^ _mm_loadu_si128((const __m128i *)from); + memcpy(to, &tot, len % AES_BLOCK_LEN); + } +} + #define AES_XTS_BLOCKSIZE 16 #define AES_XTS_IVSIZE 8 #define AES_XTS_ALPHA 0x87 /* GF(2^128) generator polynomial */ @@ -333,8 +439,15 @@ int aesni_cipher_setup_common(struct aesni_session *ses, const uint8_t *key, int keylen) { + int decsched; + + decsched = 1; switch (ses->algo) { + case CRYPTO_AES_ICM: + case CRYPTO_AES_NIST_GCM_16: + decsched = 0; + /* FALLTHROUGH */ case CRYPTO_AES_CBC: switch (keylen) { case 128: @@ -347,6 +460,7 @@ aesni_cipher_setup_common(struct aesni_session *ses, const uint8_t *key, ses->rounds = AES256_ROUNDS; break; default: + CRYPTDEB("invalid CBC/ICM/GCM key length"); return (EINVAL); } break; @@ -359,6 +473,7 @@ aesni_cipher_setup_common(struct aesni_session *ses, const uint8_t *key, ses->rounds = AES256_ROUNDS; break; default: + CRYPTDEB("invalid XTS key length"); return (EINVAL); } break; @@ -367,13 +482,13 @@ aesni_cipher_setup_common(struct aesni_session *ses, const uint8_t *key, } aesni_set_enckey(key, ses->enc_schedule, ses->rounds); - aesni_set_deckey(ses->enc_schedule, ses->dec_schedule, ses->rounds); - if (ses->algo == CRYPTO_AES_CBC) - arc4rand(ses->iv, sizeof(ses->iv), 0); - else /* if (ses->algo == CRYPTO_AES_XTS) */ { + if (decsched) + aesni_set_deckey(ses->enc_schedule, ses->dec_schedule, + ses->rounds); + + if (ses->algo == CRYPTO_AES_XTS) aesni_set_enckey(key + keylen / 16, ses->xts_schedule, ses->rounds); - } return (0); } diff --git a/sys/crypto/rijndael/rijndael-api-fst.c b/sys/crypto/rijndael/rijndael-api-fst.c index 187177b..bf7b4d1 100644 --- a/sys/crypto/rijndael/rijndael-api-fst.c +++ b/sys/crypto/rijndael/rijndael-api-fst.c @@ -34,7 +34,8 @@ __FBSDID("$FreeBSD$"); typedef u_int8_t BYTE; -int rijndael_makeKey(keyInstance *key, BYTE direction, int keyLen, char *keyMaterial) { +int rijndael_makeKey(keyInstance *key, BYTE direction, int keyLen, + const char *keyMaterial) { u_int8_t cipherKey[RIJNDAEL_MAXKB]; if (key == NULL) { @@ -83,7 +84,7 @@ int rijndael_cipherInit(cipherInstance *cipher, BYTE mode, char *IV) { } int rijndael_blockEncrypt(cipherInstance *cipher, keyInstance *key, - BYTE *input, int inputLen, BYTE *outBuffer) { + const BYTE *input, int inputLen, BYTE *outBuffer) { int i, k, numBlocks; u_int8_t block[16], iv[4][4]; @@ -198,7 +199,7 @@ int rijndael_blockEncrypt(cipherInstance *cipher, keyInstance *key, * @return length in octets (not bits) of the encrypted output buffer. */ int rijndael_padEncrypt(cipherInstance *cipher, keyInstance *key, - BYTE *input, int inputOctets, BYTE *outBuffer) { + const BYTE *input, int inputOctets, BYTE *outBuffer) { int i, numBlocks, padLen; u_int8_t block[16], *iv, *cp; @@ -232,10 +233,10 @@ int rijndael_padEncrypt(cipherInstance *cipher, keyInstance *key, case MODE_CBC: iv = cipher->IV; for (i = numBlocks; i > 0; i--) { - ((u_int32_t*)block)[0] = ((u_int32_t*)input)[0] ^ ((u_int32_t*)iv)[0]; - ((u_int32_t*)block)[1] = ((u_int32_t*)input)[1] ^ ((u_int32_t*)iv)[1]; - ((u_int32_t*)block)[2] = ((u_int32_t*)input)[2] ^ ((u_int32_t*)iv)[2]; - ((u_int32_t*)block)[3] = ((u_int32_t*)input)[3] ^ ((u_int32_t*)iv)[3]; + ((u_int32_t*)block)[0] = ((const u_int32_t*)input)[0] ^ ((u_int32_t*)iv)[0]; + ((u_int32_t*)block)[1] = ((const u_int32_t*)input)[1] ^ ((u_int32_t*)iv)[1]; + ((u_int32_t*)block)[2] = ((const u_int32_t*)input)[2] ^ ((u_int32_t*)iv)[2]; + ((u_int32_t*)block)[3] = ((const u_int32_t*)input)[3] ^ ((u_int32_t*)iv)[3]; rijndaelEncrypt(key->rk, key->Nr, block, outBuffer); iv = outBuffer; input += 16; @@ -261,7 +262,7 @@ int rijndael_padEncrypt(cipherInstance *cipher, keyInstance *key, } int rijndael_blockDecrypt(cipherInstance *cipher, keyInstance *key, - BYTE *input, int inputLen, BYTE *outBuffer) { + const BYTE *input, int inputLen, BYTE *outBuffer) { int i, k, numBlocks; u_int8_t block[16], iv[4][4]; @@ -360,7 +361,7 @@ int rijndael_blockDecrypt(cipherInstance *cipher, keyInstance *key, } int rijndael_padDecrypt(cipherInstance *cipher, keyInstance *key, - BYTE *input, int inputOctets, BYTE *outBuffer) { + const BYTE *input, int inputOctets, BYTE *outBuffer) { int i, numBlocks, padLen; u_int8_t block[16]; u_int32_t iv[4]; diff --git a/sys/crypto/rijndael/rijndael-api-fst.h b/sys/crypto/rijndael/rijndael-api-fst.h index 122bf52..e5f596a 100644 --- a/sys/crypto/rijndael/rijndael-api-fst.h +++ b/sys/crypto/rijndael/rijndael-api-fst.h @@ -56,18 +56,18 @@ typedef struct { /* changed order of the components */ /* Function prototypes */ -int rijndael_makeKey(keyInstance *, u_int8_t, int, char *); +int rijndael_makeKey(keyInstance *, u_int8_t, int, const char *); int rijndael_cipherInit(cipherInstance *, u_int8_t, char *); -int rijndael_blockEncrypt(cipherInstance *, keyInstance *, u_int8_t *, int, - u_int8_t *); -int rijndael_padEncrypt(cipherInstance *, keyInstance *, u_int8_t *, int, - u_int8_t *); +int rijndael_blockEncrypt(cipherInstance *, keyInstance *, const u_int8_t *, + int, u_int8_t *); +int rijndael_padEncrypt(cipherInstance *, keyInstance *, const u_int8_t *, + int, u_int8_t *); -int rijndael_blockDecrypt(cipherInstance *, keyInstance *, u_int8_t *, int, - u_int8_t *); -int rijndael_padDecrypt(cipherInstance *, keyInstance *, u_int8_t *, int, - u_int8_t *); +int rijndael_blockDecrypt(cipherInstance *, keyInstance *, const u_int8_t *, + int, u_int8_t *); +int rijndael_padDecrypt(cipherInstance *, keyInstance *, const u_int8_t *, + int, u_int8_t *); #endif /* __RIJNDAEL_API_FST_H */ diff --git a/sys/crypto/sha2/sha2.c b/sys/crypto/sha2/sha2.c index 66ec6e9..5bb52f3 100644 --- a/sys/crypto/sha2/sha2.c +++ b/sys/crypto/sha2/sha2.c @@ -121,20 +121,10 @@ __FBSDID("$FreeBSD$"); * Thank you, Jun-ichiro itojun Hagino, for suggesting using u_intXX_t * types and pointing out recent ANSI C support for uintXX_t in inttypes.h. */ -#if 0 /*def SHA2_USE_INTTYPES_H*/ - typedef uint8_t sha2_byte; /* Exactly 1 byte */ typedef uint32_t sha2_word32; /* Exactly 4 bytes */ typedef uint64_t sha2_word64; /* Exactly 8 bytes */ -#else /* SHA2_USE_INTTYPES_H */ - -typedef u_int8_t sha2_byte; /* Exactly 1 byte */ -typedef u_int32_t sha2_word32; /* Exactly 4 bytes */ -typedef u_int64_t sha2_word64; /* Exactly 8 bytes */ - -#endif /* SHA2_USE_INTTYPES_H */ - /*** SHA-256/384/512 Various Length Definitions ***********************/ /* NOTE: Most of these are in sha2.h */ @@ -183,8 +173,6 @@ typedef u_int64_t sha2_word64; /* Exactly 8 bytes */ */ /* Shift-right (used in SHA-256, SHA-384, and SHA-512): */ #define R(b,x) ((x) >> (b)) -/* 32-bit Rotate-right (used in SHA-256): */ -#define S32(b,x) (((x) >> (b)) | ((x) << (32 - (b)))) /* 64-bit Rotate-right (used in SHA-384 and SHA-512): */ #define S64(b,x) (((x) >> (b)) | ((x) << (64 - (b)))) @@ -192,12 +180,6 @@ typedef u_int64_t sha2_word64; /* Exactly 8 bytes */ #define Ch(x,y,z) (((x) & (y)) ^ ((~(x)) & (z))) #define Maj(x,y,z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z))) -/* Four of six logical functions used in SHA-256: */ -#define Sigma0_256(x) (S32(2, (x)) ^ S32(13, (x)) ^ S32(22, (x))) -#define Sigma1_256(x) (S32(6, (x)) ^ S32(11, (x)) ^ S32(25, (x))) -#define sigma0_256(x) (S32(7, (x)) ^ S32(18, (x)) ^ R(3 , (x))) -#define sigma1_256(x) (S32(17, (x)) ^ S32(19, (x)) ^ R(10, (x))) - /* Four of six logical functions used in SHA-384 and SHA-512: */ #define Sigma0_512(x) (S64(28, (x)) ^ S64(34, (x)) ^ S64(39, (x))) #define Sigma1_512(x) (S64(14, (x)) ^ S64(18, (x)) ^ S64(41, (x))) @@ -210,43 +192,10 @@ typedef u_int64_t sha2_word64; /* Exactly 8 bytes */ * only. */ static void SHA512_Last(SHA512_CTX*); -static void SHA256_Transform(SHA256_CTX*, const sha2_word32*); static void SHA512_Transform(SHA512_CTX*, const sha2_word64*); /*** SHA-XYZ INITIAL HASH VALUES AND CONSTANTS ************************/ -/* Hash constant words K for SHA-256: */ -static const sha2_word32 K256[64] = { - 0x428a2f98UL, 0x71374491UL, 0xb5c0fbcfUL, 0xe9b5dba5UL, - 0x3956c25bUL, 0x59f111f1UL, 0x923f82a4UL, 0xab1c5ed5UL, - 0xd807aa98UL, 0x12835b01UL, 0x243185beUL, 0x550c7dc3UL, - 0x72be5d74UL, 0x80deb1feUL, 0x9bdc06a7UL, 0xc19bf174UL, - 0xe49b69c1UL, 0xefbe4786UL, 0x0fc19dc6UL, 0x240ca1ccUL, - 0x2de92c6fUL, 0x4a7484aaUL, 0x5cb0a9dcUL, 0x76f988daUL, - 0x983e5152UL, 0xa831c66dUL, 0xb00327c8UL, 0xbf597fc7UL, - 0xc6e00bf3UL, 0xd5a79147UL, 0x06ca6351UL, 0x14292967UL, - 0x27b70a85UL, 0x2e1b2138UL, 0x4d2c6dfcUL, 0x53380d13UL, - 0x650a7354UL, 0x766a0abbUL, 0x81c2c92eUL, 0x92722c85UL, - 0xa2bfe8a1UL, 0xa81a664bUL, 0xc24b8b70UL, 0xc76c51a3UL, - 0xd192e819UL, 0xd6990624UL, 0xf40e3585UL, 0x106aa070UL, - 0x19a4c116UL, 0x1e376c08UL, 0x2748774cUL, 0x34b0bcb5UL, - 0x391c0cb3UL, 0x4ed8aa4aUL, 0x5b9cca4fUL, 0x682e6ff3UL, - 0x748f82eeUL, 0x78a5636fUL, 0x84c87814UL, 0x8cc70208UL, - 0x90befffaUL, 0xa4506cebUL, 0xbef9a3f7UL, 0xc67178f2UL -}; - -/* Initial hash value H for SHA-256: */ -static const sha2_word32 sha256_initial_hash_value[8] = { - 0x6a09e667UL, - 0xbb67ae85UL, - 0x3c6ef372UL, - 0xa54ff53aUL, - 0x510e527fUL, - 0x9b05688cUL, - 0x1f83d9abUL, - 0x5be0cd19UL -}; - /* Hash constant words K for SHA-384 and SHA-512: */ static const sha2_word64 K512[80] = { 0x428a2f98d728ae22ULL, 0x7137449123ef65cdULL, @@ -323,301 +272,6 @@ static const char *sha2_hex_digits = "0123456789abcdef"; /*** SHA-256: *********************************************************/ -void SHA256_Init(SHA256_CTX* context) { - if (context == (SHA256_CTX*)0) { - return; - } - bcopy(sha256_initial_hash_value, context->state, SHA256_DIGEST_LENGTH); - bzero(context->buffer, SHA256_BLOCK_LENGTH); - context->bitcount = 0; -} - -#ifdef SHA2_UNROLL_TRANSFORM - -/* Unrolled SHA-256 round macros: */ - -#if BYTE_ORDER == LITTLE_ENDIAN - -#define ROUND256_0_TO_15(a,b,c,d,e,f,g,h) \ - REVERSE32(*data++, W256[j]); \ - T1 = (h) + Sigma1_256(e) + Ch((e), (f), (g)) + \ - K256[j] + W256[j]; \ - (d) += T1; \ - (h) = T1 + Sigma0_256(a) + Maj((a), (b), (c)); \ - j++ - - -#else /* BYTE_ORDER == LITTLE_ENDIAN */ - -#define ROUND256_0_TO_15(a,b,c,d,e,f,g,h) \ - T1 = (h) + Sigma1_256(e) + Ch((e), (f), (g)) + \ - K256[j] + (W256[j] = *data++); \ - (d) += T1; \ - (h) = T1 + Sigma0_256(a) + Maj((a), (b), (c)); \ - j++ - -#endif /* BYTE_ORDER == LITTLE_ENDIAN */ - -#define ROUND256(a,b,c,d,e,f,g,h) \ - s0 = W256[(j+1)&0x0f]; \ - s0 = sigma0_256(s0); \ - s1 = W256[(j+14)&0x0f]; \ - s1 = sigma1_256(s1); \ - T1 = (h) + Sigma1_256(e) + Ch((e), (f), (g)) + K256[j] + \ - (W256[j&0x0f] += s1 + W256[(j+9)&0x0f] + s0); \ - (d) += T1; \ - (h) = T1 + Sigma0_256(a) + Maj((a), (b), (c)); \ - j++ - -static void SHA256_Transform(SHA256_CTX* context, const sha2_word32* data) { - sha2_word32 a, b, c, d, e, f, g, h, s0, s1; - sha2_word32 T1, *W256; - int j; - - W256 = (sha2_word32*)context->buffer; - - /* Initialize registers with the prev. intermediate value */ - a = context->state[0]; - b = context->state[1]; - c = context->state[2]; - d = context->state[3]; - e = context->state[4]; - f = context->state[5]; - g = context->state[6]; - h = context->state[7]; - - j = 0; - do { - /* Rounds 0 to 15 (unrolled): */ - ROUND256_0_TO_15(a,b,c,d,e,f,g,h); - ROUND256_0_TO_15(h,a,b,c,d,e,f,g); - ROUND256_0_TO_15(g,h,a,b,c,d,e,f); - ROUND256_0_TO_15(f,g,h,a,b,c,d,e); - ROUND256_0_TO_15(e,f,g,h,a,b,c,d); - ROUND256_0_TO_15(d,e,f,g,h,a,b,c); - ROUND256_0_TO_15(c,d,e,f,g,h,a,b); - ROUND256_0_TO_15(b,c,d,e,f,g,h,a); - } while (j < 16); - - /* Now for the remaining rounds to 64: */ - do { - ROUND256(a,b,c,d,e,f,g,h); - ROUND256(h,a,b,c,d,e,f,g); - ROUND256(g,h,a,b,c,d,e,f); - ROUND256(f,g,h,a,b,c,d,e); - ROUND256(e,f,g,h,a,b,c,d); - ROUND256(d,e,f,g,h,a,b,c); - ROUND256(c,d,e,f,g,h,a,b); - ROUND256(b,c,d,e,f,g,h,a); - } while (j < 64); - - /* Compute the current intermediate hash value */ - context->state[0] += a; - context->state[1] += b; - context->state[2] += c; - context->state[3] += d; - context->state[4] += e; - context->state[5] += f; - context->state[6] += g; - context->state[7] += h; - - /* Clean up */ - a = b = c = d = e = f = g = h = T1 = 0; -} - -#else /* SHA2_UNROLL_TRANSFORM */ - -static void SHA256_Transform(SHA256_CTX* context, const sha2_word32* data) { - sha2_word32 a, b, c, d, e, f, g, h, s0, s1; - sha2_word32 T1, T2, *W256; - int j; - - W256 = (sha2_word32*)context->buffer; - - /* Initialize registers with the prev. intermediate value */ - a = context->state[0]; - b = context->state[1]; - c = context->state[2]; - d = context->state[3]; - e = context->state[4]; - f = context->state[5]; - g = context->state[6]; - h = context->state[7]; - - j = 0; - do { -#if BYTE_ORDER == LITTLE_ENDIAN - /* Copy data while converting to host byte order */ - REVERSE32(*data++,W256[j]); - /* Apply the SHA-256 compression function to update a..h */ - T1 = h + Sigma1_256(e) + Ch(e, f, g) + K256[j] + W256[j]; -#else /* BYTE_ORDER == LITTLE_ENDIAN */ - /* Apply the SHA-256 compression function to update a..h with copy */ - T1 = h + Sigma1_256(e) + Ch(e, f, g) + K256[j] + (W256[j] = *data++); -#endif /* BYTE_ORDER == LITTLE_ENDIAN */ - T2 = Sigma0_256(a) + Maj(a, b, c); - h = g; - g = f; - f = e; - e = d + T1; - d = c; - c = b; - b = a; - a = T1 + T2; - - j++; - } while (j < 16); - - do { - /* Part of the message block expansion: */ - s0 = W256[(j+1)&0x0f]; - s0 = sigma0_256(s0); - s1 = W256[(j+14)&0x0f]; - s1 = sigma1_256(s1); - - /* Apply the SHA-256 compression function to update a..h */ - T1 = h + Sigma1_256(e) + Ch(e, f, g) + K256[j] + - (W256[j&0x0f] += s1 + W256[(j+9)&0x0f] + s0); - T2 = Sigma0_256(a) + Maj(a, b, c); - h = g; - g = f; - f = e; - e = d + T1; - d = c; - c = b; - b = a; - a = T1 + T2; - - j++; - } while (j < 64); - - /* Compute the current intermediate hash value */ - context->state[0] += a; - context->state[1] += b; - context->state[2] += c; - context->state[3] += d; - context->state[4] += e; - context->state[5] += f; - context->state[6] += g; - context->state[7] += h; - - /* Clean up */ - a = b = c = d = e = f = g = h = T1 = T2 = 0; -} - -#endif /* SHA2_UNROLL_TRANSFORM */ - -void SHA256_Update(SHA256_CTX* context, const sha2_byte *data, size_t len) { - unsigned int freespace, usedspace; - - if (len == 0) { - /* Calling with no data is valid - we do nothing */ - return; - } - - /* Sanity check: */ - assert(context != (SHA256_CTX*)0 && data != (sha2_byte*)0); - - usedspace = (context->bitcount >> 3) % SHA256_BLOCK_LENGTH; - if (usedspace > 0) { - /* Calculate how much free space is available in the buffer */ - freespace = SHA256_BLOCK_LENGTH - usedspace; - - if (len >= freespace) { - /* Fill the buffer completely and process it */ - bcopy(data, &context->buffer[usedspace], freespace); - context->bitcount += freespace << 3; - len -= freespace; - data += freespace; - SHA256_Transform(context, (sha2_word32*)context->buffer); - } else { - /* The buffer is not yet full */ - bcopy(data, &context->buffer[usedspace], len); - context->bitcount += len << 3; - /* Clean up: */ - usedspace = freespace = 0; - return; - } - } - while (len >= SHA256_BLOCK_LENGTH) { - /* Process as many complete blocks as we can */ - SHA256_Transform(context, (const sha2_word32*)data); - context->bitcount += SHA256_BLOCK_LENGTH << 3; - len -= SHA256_BLOCK_LENGTH; - data += SHA256_BLOCK_LENGTH; - } - if (len > 0) { - /* There's left-overs, so save 'em */ - bcopy(data, context->buffer, len); - context->bitcount += len << 3; - } - /* Clean up: */ - usedspace = freespace = 0; -} - -void SHA256_Final(sha2_byte digest[], SHA256_CTX* context) { - sha2_word32 *d = (sha2_word32*)digest; - unsigned int usedspace; - - /* Sanity check: */ - assert(context != (SHA256_CTX*)0); - - /* If no digest buffer is passed, we don't bother doing this: */ - if (digest != (sha2_byte*)0) { - usedspace = (context->bitcount >> 3) % SHA256_BLOCK_LENGTH; -#if BYTE_ORDER == LITTLE_ENDIAN - /* Convert FROM host byte order */ - REVERSE64(context->bitcount,context->bitcount); -#endif - if (usedspace > 0) { - /* Begin padding with a 1 bit: */ - context->buffer[usedspace++] = 0x80; - - if (usedspace <= SHA256_SHORT_BLOCK_LENGTH) { - /* Set-up for the last transform: */ - bzero(&context->buffer[usedspace], SHA256_SHORT_BLOCK_LENGTH - usedspace); - } else { - if (usedspace < SHA256_BLOCK_LENGTH) { - bzero(&context->buffer[usedspace], SHA256_BLOCK_LENGTH - usedspace); - } - /* Do second-to-last transform: */ - SHA256_Transform(context, (sha2_word32*)context->buffer); - - /* And set-up for the last transform: */ - bzero(context->buffer, SHA256_SHORT_BLOCK_LENGTH); - } - } else { - /* Set-up for the last transform: */ - bzero(context->buffer, SHA256_SHORT_BLOCK_LENGTH); - - /* Begin padding with a 1 bit: */ - *context->buffer = 0x80; - } - /* Set the bit count: */ - *(sha2_word64*)&context->buffer[SHA256_SHORT_BLOCK_LENGTH] = context->bitcount; - - /* Final transform: */ - SHA256_Transform(context, (sha2_word32*)context->buffer); - -#if BYTE_ORDER == LITTLE_ENDIAN - { - /* Convert TO host byte order */ - int j; - for (j = 0; j < 8; j++) { - REVERSE32(context->state[j],context->state[j]); - *d++ = context->state[j]; - } - } -#else - bcopy(context->state, d, SHA256_DIGEST_LENGTH); -#endif - } - - /* Clean up state data: */ - bzero(context, sizeof(*context)); - usedspace = 0; -} - char *SHA256_End(SHA256_CTX* context, char buffer[]) { sha2_byte digest[SHA256_DIGEST_LENGTH], *d = digest; int i; @@ -641,7 +295,7 @@ char *SHA256_End(SHA256_CTX* context, char buffer[]) { return buffer; } -char* SHA256_Data(const sha2_byte* data, size_t len, char digest[SHA256_DIGEST_STRING_LENGTH]) { +char* SHA256_Data(const void *data, unsigned int len, char *digest) { SHA256_CTX context; SHA256_Init(&context); diff --git a/sys/crypto/sha2/sha2.h b/sys/crypto/sha2/sha2.h index 639d58b..59e91f6 100644 --- a/sys/crypto/sha2/sha2.h +++ b/sys/crypto/sha2/sha2.h @@ -56,70 +56,17 @@ extern "C" { #define SHA512_DIGEST_STRING_LENGTH (SHA512_DIGEST_LENGTH * 2 + 1) -/*** SHA-256/384/512 Context Structures *******************************/ -/* NOTE: If your architecture does not define either u_intXX_t types or - * uintXX_t (from inttypes.h), you may need to define things by hand - * for your system: - */ -#if 0 -typedef unsigned char u_int8_t; /* 1-byte (8-bits) */ -typedef unsigned int u_int32_t; /* 4-bytes (32-bits) */ -typedef unsigned long long u_int64_t; /* 8-bytes (64-bits) */ -#endif -/* - * Most BSD systems already define u_intXX_t types, as does Linux. - * Some systems, however, like Compaq's Tru64 Unix instead can use - * uintXX_t types defined by very recent ANSI C standards and included - * in the file: - * - * #include <inttypes.h> - * - * If you choose to use <inttypes.h> then please define: - * - * #define SHA2_USE_INTTYPES_H - * - * Or on the command line during compile: - * - * cc -DSHA2_USE_INTTYPES_H ... - */ -#if 0 /*def SHA2_USE_INTTYPES_H*/ - -typedef struct _SHA256_CTX { - uint32_t state[8]; - uint64_t bitcount; - uint8_t buffer[SHA256_BLOCK_LENGTH]; -} SHA256_CTX; +/*** SHA-384/512 Context Structures *******************************/ typedef struct _SHA512_CTX { uint64_t state[8]; uint64_t bitcount[2]; uint8_t buffer[SHA512_BLOCK_LENGTH]; } SHA512_CTX; -#else /* SHA2_USE_INTTYPES_H */ - -typedef struct _SHA256_CTX { - u_int32_t state[8]; - u_int64_t bitcount; - u_int8_t buffer[SHA256_BLOCK_LENGTH]; -} SHA256_CTX; -typedef struct _SHA512_CTX { - u_int64_t state[8]; - u_int64_t bitcount[2]; - u_int8_t buffer[SHA512_BLOCK_LENGTH]; -} SHA512_CTX; - -#endif /* SHA2_USE_INTTYPES_H */ - typedef SHA512_CTX SHA384_CTX; -/*** SHA-256/384/512 Function Prototypes ******************************/ - -void SHA256_Init(SHA256_CTX *); -void SHA256_Update(SHA256_CTX*, const u_int8_t*, size_t); -void SHA256_Final(u_int8_t[SHA256_DIGEST_LENGTH], SHA256_CTX*); -char* SHA256_End(SHA256_CTX*, char[SHA256_DIGEST_STRING_LENGTH]); -char* SHA256_Data(const u_int8_t*, size_t, char[SHA256_DIGEST_STRING_LENGTH]); +/*** SHA-384/512 Function Prototypes ******************************/ void SHA384_Init(SHA384_CTX*); void SHA384_Update(SHA384_CTX*, const u_int8_t*, size_t); @@ -137,4 +84,6 @@ char* SHA512_Data(const u_int8_t*, size_t, char[SHA512_DIGEST_STRING_LENGTH]); } #endif /* __cplusplus */ +#include "sha256.h" + #endif /* __SHA2_H__ */ diff --git a/lib/libmd/sha256.h b/sys/crypto/sha2/sha256.h index ce51787..c828379 100644 --- a/lib/libmd/sha256.h +++ b/sys/crypto/sha2/sha256.h @@ -29,22 +29,58 @@ #ifndef _SHA256_H_ #define _SHA256_H_ +#ifndef _KERNEL #include <sys/types.h> +#endif typedef struct SHA256Context { uint32_t state[8]; - uint32_t count[2]; - unsigned char buf[64]; + uint64_t count; + uint8_t buf[64]; } SHA256_CTX; __BEGIN_DECLS + +/* Ensure libmd symbols do not clash with libcrypto */ + +#ifndef SHA256_Init +#define SHA256_Init _libmd_SHA256_Init +#endif +#ifndef SHA256_Update +#define SHA256_Update _libmd_SHA256_Update +#endif +#ifndef SHA256_Final +#define SHA256_Final _libmd_SHA256_Final +#endif +#ifndef SHA256_End +#define SHA256_End _libmd_SHA256_End +#endif +#ifndef SHA256_File +#define SHA256_File _libmd_SHA256_File +#endif +#ifndef SHA256_FileChunk +#define SHA256_FileChunk _libmd_SHA256_FileChunk +#endif +#ifndef SHA256_Data +#define SHA256_Data _libmd_SHA256_Data +#endif + +#ifndef SHA256_Transform +#define SHA256_Transform _libmd_SHA256_Transform +#endif +#ifndef SHA256_version +#define SHA256_version _libmd_SHA256_version +#endif + void SHA256_Init(SHA256_CTX *); void SHA256_Update(SHA256_CTX *, const void *, size_t); void SHA256_Final(unsigned char [32], SHA256_CTX *); char *SHA256_End(SHA256_CTX *, char *); +char *SHA256_Data(const void *, unsigned int, char *); +#ifndef _KERNEL char *SHA256_File(const char *, char *); char *SHA256_FileChunk(const char *, char *, off_t, off_t); -char *SHA256_Data(const void *, unsigned int, char *); +#endif __END_DECLS #endif /* !_SHA256_H_ */ diff --git a/lib/libmd/sha256c.c b/sys/crypto/sha2/sha256c.c index c95fd31..da9b02c 100644 --- a/lib/libmd/sha256c.c +++ b/sys/crypto/sha2/sha256c.c @@ -30,7 +30,11 @@ __FBSDID("$FreeBSD$"); #include <sys/endian.h> #include <sys/types.h> +#ifdef _KERNEL +#include <sys/systm.h> +#else #include <string.h> +#endif #include "sha256.h" @@ -208,10 +212,10 @@ SHA256_Pad(SHA256_CTX * ctx) * Convert length to a vector of bytes -- we do this now rather * than later because the length will change after we pad. */ - be32enc_vect(len, ctx->count, 8); + be64enc(len, ctx->count); /* Add 1--64 bytes so that the resulting length is 56 mod 64 */ - r = (ctx->count[1] >> 3) & 0x3f; + r = (ctx->count >> 3) & 0x3f; plen = (r < 56) ? (56 - r) : (120 - r); SHA256_Update(ctx, PAD, (size_t)plen); @@ -225,7 +229,7 @@ SHA256_Init(SHA256_CTX * ctx) { /* Zero bits processed so far */ - ctx->count[0] = ctx->count[1] = 0; + ctx->count = 0; /* Magic initialization constants */ ctx->state[0] = 0x6A09E667; @@ -242,21 +246,18 @@ SHA256_Init(SHA256_CTX * ctx) void SHA256_Update(SHA256_CTX * ctx, const void *in, size_t len) { - uint32_t bitlen[2]; + uint64_t bitlen; uint32_t r; const unsigned char *src = in; /* Number of bytes left in the buffer from previous updates */ - r = (ctx->count[1] >> 3) & 0x3f; + r = (ctx->count >> 3) & 0x3f; /* Convert the length into a number of bits */ - bitlen[1] = ((uint32_t)len) << 3; - bitlen[0] = (uint32_t)(len >> 29); + bitlen = len << 3; /* Update number of bits */ - if ((ctx->count[1] += bitlen[1]) < bitlen[1]) - ctx->count[0]++; - ctx->count[0] += bitlen[0]; + ctx->count += bitlen; /* Handle the case where we don't need to perform any transforms */ if (len < 64 - r) { @@ -298,3 +299,18 @@ SHA256_Final(unsigned char digest[32], SHA256_CTX * ctx) /* Clear the context state */ memset((void *)ctx, 0, sizeof(*ctx)); } + +#ifdef WEAK_REFS +/* When building libmd, provide weak references. Note: this is not + activated in the context of compiling these sources for internal + use in libcrypt. + */ +#undef SHA256_Init +__weak_reference(_libmd_SHA256_Init, SHA256_Init); +#undef SHA256_Update +__weak_reference(_libmd_SHA256_Update, SHA256_Update); +#undef SHA256_Final +__weak_reference(_libmd_SHA256_Final, SHA256_Final); +#undef SHA256_Transform +__weak_reference(_libmd_SHA256_Transform, SHA256_Transform); +#endif diff --git a/sys/crypto/via/padlock_hash.c b/sys/crypto/via/padlock_hash.c index 9dffc40..c952b63 100644 --- a/sys/crypto/via/padlock_hash.c +++ b/sys/crypto/via/padlock_hash.c @@ -75,7 +75,7 @@ struct padlock_sha_ctx { CTASSERT(sizeof(struct padlock_sha_ctx) <= sizeof(union authctx)); static void padlock_sha_init(struct padlock_sha_ctx *ctx); -static int padlock_sha_update(struct padlock_sha_ctx *ctx, uint8_t *buf, +static int padlock_sha_update(struct padlock_sha_ctx *ctx, const uint8_t *buf, uint16_t bufsize); static void padlock_sha1_final(uint8_t *hash, struct padlock_sha_ctx *ctx); static void padlock_sha256_final(uint8_t *hash, struct padlock_sha_ctx *ctx); @@ -83,16 +83,16 @@ static void padlock_sha256_final(uint8_t *hash, struct padlock_sha_ctx *ctx); static struct auth_hash padlock_hmac_sha1 = { CRYPTO_SHA1_HMAC, "HMAC-SHA1", 20, SHA1_HASH_LEN, SHA1_HMAC_BLOCK_LEN, sizeof(struct padlock_sha_ctx), - (void (*)(void *))padlock_sha_init, - (int (*)(void *, uint8_t *, uint16_t))padlock_sha_update, + (void (*)(void *))padlock_sha_init, NULL, NULL, + (int (*)(void *, const uint8_t *, uint16_t))padlock_sha_update, (void (*)(uint8_t *, void *))padlock_sha1_final }; static struct auth_hash padlock_hmac_sha256 = { CRYPTO_SHA2_256_HMAC, "HMAC-SHA2-256", 32, SHA2_256_HASH_LEN, SHA2_256_HMAC_BLOCK_LEN, sizeof(struct padlock_sha_ctx), - (void (*)(void *))padlock_sha_init, - (int (*)(void *, uint8_t *, uint16_t))padlock_sha_update, + (void (*)(void *))padlock_sha_init, NULL, NULL, + (int (*)(void *, const uint8_t *, uint16_t))padlock_sha_update, (void (*)(uint8_t *, void *))padlock_sha256_final }; @@ -167,7 +167,7 @@ padlock_sha_init(struct padlock_sha_ctx *ctx) } static int -padlock_sha_update(struct padlock_sha_ctx *ctx, uint8_t *buf, uint16_t bufsize) +padlock_sha_update(struct padlock_sha_ctx *ctx, const uint8_t *buf, uint16_t bufsize) { if (ctx->psc_size - ctx->psc_offset < bufsize) { diff --git a/sys/dev/e1000/if_em.c b/sys/dev/e1000/if_em.c index 6883a7a..b77d0e9 100644 --- a/sys/dev/e1000/if_em.c +++ b/sys/dev/e1000/if_em.c @@ -213,15 +213,15 @@ static int em_detach(device_t); static int em_shutdown(device_t); static int em_suspend(device_t); static int em_resume(device_t); -#ifdef EM_MULTIQUEUE + static int em_mq_start(struct ifnet *, struct mbuf *); static int em_mq_start_locked(struct ifnet *, struct tx_ring *); static void em_qflush(struct ifnet *); -#else + static void em_start(struct ifnet *); static void em_start_locked(struct ifnet *, struct tx_ring *); -#endif + static int em_ioctl(struct ifnet *, u_long, caddr_t); static void em_init(void *); static void em_init_locked(struct adapter *); @@ -944,13 +944,13 @@ em_resume(device_t dev) (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) { for (int i = 0; i < adapter->num_queues; i++, txr++) { EM_TX_LOCK(txr); -#ifdef EM_MULTIQUEUE + if (!drbr_empty(ifp, txr->br)) em_mq_start_locked(ifp, txr); -#else + if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) em_start_locked(ifp, txr); -#endif + EM_TX_UNLOCK(txr); } } @@ -960,7 +960,7 @@ em_resume(device_t dev) } -#ifndef EM_MULTIQUEUE + static void em_start_locked(struct ifnet *ifp, struct tx_ring *txr) { @@ -1023,7 +1023,7 @@ em_start(struct ifnet *ifp) } return; } -#else /* EM_MULTIQUEUE */ + /********************************************************************* * Multiqueue Transmit routines * @@ -1130,7 +1130,7 @@ em_qflush(struct ifnet *ifp) } if_qflush(ifp); } -#endif /* EM_MULTIQUEUE */ + /********************************************************************* * Ioctl entry point @@ -1532,13 +1532,13 @@ em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count) EM_TX_LOCK(txr); em_txeof(txr); -#ifdef EM_MULTIQUEUE + if (!drbr_empty(ifp, txr->br)) em_mq_start_locked(ifp, txr); -#else + if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) em_start_locked(ifp, txr); -#endif + EM_TX_UNLOCK(txr); return (rx_done); @@ -1606,13 +1606,13 @@ em_handle_que(void *context, int pending) EM_TX_LOCK(txr); em_txeof(txr); -#ifdef EM_MULTIQUEUE + if (!drbr_empty(ifp, txr->br)) em_mq_start_locked(ifp, txr); -#else + if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) em_start_locked(ifp, txr); -#endif + EM_TX_UNLOCK(txr); if (more) { taskqueue_enqueue(adapter->tq, &adapter->que_task); @@ -1640,13 +1640,13 @@ em_msix_tx(void *arg) ++txr->tx_irq; EM_TX_LOCK(txr); em_txeof(txr); -#ifdef EM_MULTIQUEUE + if (!drbr_empty(ifp, txr->br)) em_mq_start_locked(ifp, txr); -#else + if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) em_start_locked(ifp, txr); -#endif + /* Reenable this interrupt */ E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims); @@ -1741,13 +1741,13 @@ em_handle_tx(void *context, int pending) EM_TX_LOCK(txr); em_txeof(txr); -#ifdef EM_MULTIQUEUE + if (!drbr_empty(ifp, txr->br)) em_mq_start_locked(ifp, txr); -#else + if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) em_start_locked(ifp, txr); -#endif + E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims); EM_TX_UNLOCK(txr); } @@ -1771,13 +1771,13 @@ em_handle_link(void *context, int pending) if (adapter->link_active) { for (int i = 0; i < adapter->num_queues; i++, txr++) { EM_TX_LOCK(txr); -#ifdef EM_MULTIQUEUE + if (!drbr_empty(ifp, txr->br)) em_mq_start_locked(ifp, txr); -#else + if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) em_start_locked(ifp, txr); -#endif + EM_TX_UNLOCK(txr); } } @@ -3244,16 +3244,15 @@ em_setup_interface(device_t dev, struct adapter *adapter) ifp->if_hw_tsomaxsegcount = EM_MAX_SCATTER - 5; ifp->if_hw_tsomaxsegsize = EM_TSO_SEG_SIZE; -#ifdef EM_MULTIQUEUE /* Multiqueue stack interface */ ifp->if_transmit = em_mq_start; ifp->if_qflush = em_qflush; -#else + ifp->if_start = em_start; IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1); ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1; IFQ_SET_READY(&ifp->if_snd); -#endif + ether_ifattach(ifp, adapter->hw.mac.addr); diff --git a/sys/dev/e1000/if_igb.c b/sys/dev/e1000/if_igb.c index cdd067f..4945950 100644 --- a/sys/dev/e1000/if_igb.c +++ b/sys/dev/e1000/if_igb.c @@ -121,15 +121,12 @@ static int igb_detach(device_t); static int igb_shutdown(device_t); static int igb_suspend(device_t); static int igb_resume(device_t); -#ifndef IGB_LEGACY_TX static int igb_mq_start(struct ifnet *, struct mbuf *); static int igb_mq_start_locked(struct ifnet *, struct tx_ring *); static void igb_qflush(struct ifnet *); static void igb_deferred_mq_start(void *, int); -#else static void igb_start(struct ifnet *); static void igb_start_locked(struct tx_ring *, struct ifnet *ifp); -#endif static int igb_ioctl(struct ifnet *, u_long, caddr_t); static void igb_init(void *); static void igb_init_locked(struct adapter *); @@ -293,7 +290,6 @@ TUNABLE_INT("hw.igb.max_interrupt_rate", &igb_max_interrupt_rate); SYSCTL_INT(_hw_igb, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN, &igb_max_interrupt_rate, 0, "Maximum interrupts per second"); -#ifndef IGB_LEGACY_TX /* ** Tuneable number of buffers in the buf-ring (drbr_xxx) */ @@ -301,7 +297,6 @@ static int igb_buf_ring_size = IGB_BR_SIZE; TUNABLE_INT("hw.igb.buf_ring_size", &igb_buf_ring_size); SYSCTL_INT(_hw_igb, OID_AUTO, buf_ring_size, CTLFLAG_RDTUN, &igb_buf_ring_size, 0, "Size of the bufring"); -#endif /* ** Header split causes the packet header to @@ -807,15 +802,15 @@ igb_resume(device_t dev) (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) { for (int i = 0; i < adapter->num_queues; i++, txr++) { IGB_TX_LOCK(txr); -#ifndef IGB_LEGACY_TX + /* Process the stack queue only if not depleted */ if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) && !drbr_empty(ifp, txr->br)) igb_mq_start_locked(ifp, txr); -#else + if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) igb_start_locked(txr, ifp); -#endif + IGB_TX_UNLOCK(txr); } } @@ -825,7 +820,6 @@ igb_resume(device_t dev) } -#ifdef IGB_LEGACY_TX /********************************************************************* * Transmit entry point @@ -903,7 +897,6 @@ igb_start(struct ifnet *ifp) return; } -#else /* ~IGB_LEGACY_TX */ /* ** Multiqueue Transmit Entry: @@ -1022,7 +1015,6 @@ igb_qflush(struct ifnet *ifp) } if_qflush(ifp); } -#endif /* ~IGB_LEGACY_TX */ /********************************************************************* * Ioctl entry point @@ -1360,15 +1352,15 @@ igb_handle_que(void *context, int pending) IGB_TX_LOCK(txr); igb_txeof(txr); -#ifndef IGB_LEGACY_TX + /* Process the stack queue only if not depleted */ if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) && !drbr_empty(ifp, txr->br)) igb_mq_start_locked(ifp, txr); -#else + if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) igb_start_locked(txr, ifp); -#endif + IGB_TX_UNLOCK(txr); /* Do we need another? */ if (more) { @@ -1411,15 +1403,15 @@ igb_handle_link_locked(struct adapter *adapter) if ((ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) { for (int i = 0; i < adapter->num_queues; i++, txr++) { IGB_TX_LOCK(txr); -#ifndef IGB_LEGACY_TX + /* Process the stack queue only if not depleted */ if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) && !drbr_empty(ifp, txr->br)) igb_mq_start_locked(ifp, txr); -#else + if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) igb_start_locked(txr, ifp); -#endif + IGB_TX_UNLOCK(txr); } } @@ -1513,13 +1505,10 @@ igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count) do { more = igb_txeof(txr); } while (loop-- && more); -#ifndef IGB_LEGACY_TX if (!drbr_empty(ifp, txr->br)) igb_mq_start_locked(ifp, txr); -#else if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) igb_start_locked(txr, ifp); -#endif IGB_TX_UNLOCK(txr); } @@ -1552,15 +1541,12 @@ igb_msix_que(void *arg) IGB_TX_LOCK(txr); igb_txeof(txr); -#ifndef IGB_LEGACY_TX /* Process the stack queue only if not depleted */ if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) && !drbr_empty(ifp, txr->br)) igb_mq_start_locked(ifp, txr); -#else if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) igb_start_locked(txr, ifp); -#endif IGB_TX_UNLOCK(txr); more_rx = igb_rxeof(que, adapter->rx_process_limit, NULL); @@ -2323,9 +2309,7 @@ igb_allocate_legacy(struct adapter *adapter) { device_t dev = adapter->dev; struct igb_queue *que = adapter->queues; -#ifndef IGB_LEGACY_TX struct tx_ring *txr = adapter->tx_rings; -#endif int error, rid = 0; /* Turn off all interrupts */ @@ -2344,9 +2328,7 @@ igb_allocate_legacy(struct adapter *adapter) return (ENXIO); } -#ifndef IGB_LEGACY_TX TASK_INIT(&txr->txq_task, 0, igb_deferred_mq_start, txr); -#endif /* * Try allocating a fast interrupt and the associated deferred @@ -2428,10 +2410,8 @@ igb_allocate_msix(struct adapter *adapter) i,igb_last_bind_cpu); igb_last_bind_cpu = CPU_NEXT(igb_last_bind_cpu); } -#ifndef IGB_LEGACY_TX TASK_INIT(&que->txr->txq_task, 0, igb_deferred_mq_start, que->txr); -#endif /* Make tasklet for deferred handling */ TASK_INIT(&que->que_task, 0, igb_handle_que, que); que->tq = taskqueue_create("igb_que", M_NOWAIT, @@ -2655,9 +2635,9 @@ igb_free_pci_resources(struct adapter *adapter) for (int i = 0; i < adapter->num_queues; i++, que++) { if (que->tq != NULL) { -#ifndef IGB_LEGACY_TX + taskqueue_drain(que->tq, &que->txr->txq_task); -#endif + taskqueue_drain(que->tq, &que->que_task); taskqueue_free(que->tq); } @@ -3049,15 +3029,15 @@ igb_setup_interface(device_t dev, struct adapter *adapter) ifp->if_hw_tsomaxsegcount = IGB_MAX_SCATTER; ifp->if_hw_tsomaxsegsize = IGB_TSO_SEG_SIZE; -#ifndef IGB_LEGACY_TX + ifp->if_transmit = igb_mq_start; ifp->if_qflush = igb_qflush; -#else + ifp->if_start = igb_start; IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1); ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1; IFQ_SET_READY(&ifp->if_snd); -#endif + ether_ifattach(ifp, adapter->hw.mac.addr); @@ -3296,11 +3276,11 @@ igb_allocate_queues(struct adapter *adapter) error = ENOMEM; goto err_tx_desc; } -#ifndef IGB_LEGACY_TX + /* Allocate a buf ring */ txr->br = buf_ring_alloc(igb_buf_ring_size, M_DEVBUF, M_WAITOK, &txr->tx_mtx); -#endif + } /* @@ -3357,9 +3337,9 @@ err_tx_desc: igb_dma_free(adapter, &txr->txdma); free(adapter->rx_rings, M_DEVBUF); rx_fail: -#ifndef IGB_LEGACY_TX + buf_ring_free(txr->br, M_DEVBUF); -#endif + free(adapter->tx_rings, M_DEVBUF); tx_fail: free(adapter->queues, M_DEVBUF); @@ -3615,10 +3595,10 @@ igb_free_transmit_buffers(struct tx_ring *txr) tx_buffer->map = NULL; } } -#ifndef IGB_LEGACY_TX + if (txr->br != NULL) buf_ring_free(txr->br, M_DEVBUF); -#endif + if (txr->tx_buffers != NULL) { free(txr->tx_buffers, M_DEVBUF); txr->tx_buffers = NULL; @@ -4924,10 +4904,10 @@ igb_rxeof(struct igb_queue *que, int count, int *done) */ M_HASHTYPE_SET(rxr->fmp, M_HASHTYPE_OPAQUE); } else { -#ifndef IGB_LEGACY_TX + rxr->fmp->m_pkthdr.flowid = que->msix; M_HASHTYPE_SET(rxr->fmp, M_HASHTYPE_OPAQUE); -#endif + } sendmp = rxr->fmp; /* Make sure to set M_PKTHDR. */ diff --git a/sys/dev/e1000/if_igb.h b/sys/dev/e1000/if_igb.h index 98df1ec..431437d 100644 --- a/sys/dev/e1000/if_igb.h +++ b/sys/dev/e1000/if_igb.h @@ -37,9 +37,9 @@ #include <sys/param.h> #include <sys/systm.h> -#ifndef IGB_LEGACY_TX + #include <sys/buf_ring.h> -#endif + #include <sys/bus.h> #include <sys/endian.h> #include <sys/kernel.h> @@ -365,10 +365,10 @@ struct tx_ring { u32 txd_cmd; bus_dma_tag_t txtag; char mtx_name[16]; -#ifndef IGB_LEGACY_TX + struct buf_ring *br; struct task txq_task; -#endif + u32 bytes; /* used for AIM */ u32 packets; /* Soft Stats */ diff --git a/sys/dev/gpioapu/gpioapu.c b/sys/dev/gpioapu/gpioapu.c new file mode 100644 index 0000000..11e5e13 --- /dev/null +++ b/sys/dev/gpioapu/gpioapu.c @@ -0,0 +1,336 @@ +/*- + * Copyright (c) 2013 Ermal Luci <eri@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> + +#include <sys/types.h> +#include <sys/param.h> +#include <sys/conf.h> +#include <sys/kernel.h> +#include <sys/module.h> +#include <sys/systm.h> +#include <sys/sysctl.h> +#include <sys/priv.h> +#include <sys/bus.h> +#include <machine/bus.h> +#include <sys/rman.h> +#include <machine/resource.h> +#include <sys/proc.h> +#include <sys/uio.h> + +#include <dev/pci/pcivar.h> +#include <isa/isavar.h> + +/* SB7xx RRG 2.3.3.1.1. */ +#define AMDSB_PMIO_INDEX 0xcd6 +#define AMDSB_PMIO_DATA (PMIO_INDEX + 1) +#define AMDSB_PMIO_WIDTH 2 + +#define AMDSB_SMBUS_DEVID 0x43851002 +#define AMDSB8_SMBUS_REVID 0x40 + +#define IOMUX_OFFSET 0xD00 +#define GPIO_SPACE_OFFSET 0x100 +#define GPIO_SPACE_SIZE 0x100 +/* SB8xx RRG 2.3.3. */ +#define AMDSB8_PM_WDT_EN 0x24 + +#define GPIO_187 187 // MODESW +#define GPIO_189 189 // LED1# +#define GPIO_190 190 // LED2# +#define GPIO_191 191 // LED3# +#define GPIO_OUTPUT 0x08 +#define GPIO_INPUT 0x28 + +struct gpioapu_softc { + device_t dev; + struct cdev *cdev; + struct resource *res_ctrl; + struct resource *res_count; + int rid_ctrl; + int rid_count; +}; + +static int gpioapu_open(struct cdev *dev, int flags, int fmt, + struct thread *td); +static int gpioapu_close(struct cdev *dev, int flags, int fmt, + struct thread *td); +static int gpioapu_write(struct cdev *dev, struct uio *uio, int ioflag); +static int gpioapu_read(struct cdev *dev, struct uio *uio, int ioflag); + +static struct cdevsw gpioapu_cdevsw = { + .d_version = D_VERSION, + .d_open = gpioapu_open, + .d_read = gpioapu_read, + .d_write = gpioapu_write, + .d_close = gpioapu_close, + .d_name = "gpioapu", +}; + +static void gpioapu_identify(driver_t *driver, device_t parent); +static int gpioapu_probe(device_t dev); +static int gpioapu_attach(device_t dev); +static int gpioapu_detach(device_t dev); + +static device_method_t gpioapu_methods[] = { + DEVMETHOD(device_identify, gpioapu_identify), + DEVMETHOD(device_probe, gpioapu_probe), + DEVMETHOD(device_attach, gpioapu_attach), + DEVMETHOD(device_detach, gpioapu_detach), + {0, 0} +}; + +static devclass_t gpioapu_devclass; + +static driver_t gpioapu_driver = { + "gpioapu", + gpioapu_methods, + sizeof(struct gpioapu_softc) +}; + +DRIVER_MODULE(gpioapu, isa, gpioapu_driver, gpioapu_devclass, NULL, NULL); + + +static uint8_t +pmio_read(struct resource *res, uint8_t reg) +{ + bus_write_1(res, 0, reg); /* Index */ + return (bus_read_1(res, 1)); /* Data */ +} + +#if 0 +static void +pmio_write(struct resource *res, uint8_t reg, uint8_t val) +{ + bus_write_1(res, 0, reg); /* Index */ + bus_write_1(res, 1, val); /* Data */ +} +#endif + +/* ARGSUSED */ +static int +gpioapu_open(struct cdev *dev __unused, int flags __unused, int fmt __unused, + struct thread *td) +{ + int error; + + error = priv_check(td, PRIV_IO); + if (error != 0) + return (error); + error = securelevel_gt(td->td_ucred, 0); + if (error != 0) + return (error); + + return (error); +} + +static int +gpioapu_read(struct cdev *dev, struct uio *uio, int ioflag) { + struct gpioapu_softc *sc = dev->si_drv1; + uint8_t tmp; + char ch; + int error; + + tmp = bus_read_1(sc->res_ctrl, GPIO_187); +#ifdef DEBUG + device_printf(sc->dev, "returned %x\n", (u_int)tmp); +#endif + if (tmp & 0x80) + ch = '1'; + else + ch = '0'; + + error = uiomove(&ch, sizeof(ch), uio); + + return (error); +} +static int +gpioapu_write(struct cdev *dev, struct uio *uio, int ioflag) { + struct gpioapu_softc *sc = dev->si_drv1; + char ch[3]; + uint8_t old; + int error, i, start; + + error = uiomove(ch, sizeof(ch), uio); + if (error) + return (error); + + start = GPIO_189; + for (i = 0; i < 3; i++) { + old = bus_read_1(sc->res_ctrl, start + i); +#ifdef DEBUG + device_printf(sc->dev, "returned %x - %c\n", (u_int)old, ch[i]); +#endif + if (ch[i] == '1') + old &= 0x80; + else + old = 0xc8; + bus_write_1(sc->res_ctrl, start + i, old); + } + + return (error); +} + +static int +gpioapu_close(struct cdev *dev __unused, int flags __unused, int fmt __unused, + struct thread *td) +{ + struct gpioapu_softc *sc = dev->si_drv1; + int i, start; + + start = GPIO_187; + for (i = 0; i < 2; i++) { + bus_write_1(sc->res_ctrl, start, 0xc8); + } + + return (0); +} + +static void +gpioapu_identify(driver_t *driver, device_t parent) +{ + device_t child; + device_t smb_dev; + + if (resource_disabled("gpioapu", 0)) + return; + + + if (device_find_child(parent, "gpioapu", -1) != NULL) + return; + + /* + * Try to identify SB600/SB7xx by PCI Device ID of SMBus device + * that should be present at bus 0, device 20, function 0. + */ + smb_dev = pci_find_bsf(0, 20, 0); + if (smb_dev == NULL) + return; + + if (pci_get_devid(smb_dev) != AMDSB_SMBUS_DEVID) + return; + + child = BUS_ADD_CHILD(parent, ISA_ORDER_SPECULATIVE, "gpioapu", -1); + if (child == NULL) + device_printf(parent, "add gpioapu child failed\n"); +} + +static int +gpioapu_probe(device_t dev) +{ + struct resource *res; + uint32_t addr; + int rid; + int rc, i; + char *value; + + value = getenv("smbios.system.product"); + device_printf(dev, "Environment returned %s\n", value); + if (value == NULL || strncmp(value, "APU", strlen("APU"))) + return (ENXIO); + + /* Do not claim some ISA PnP device by accident. */ + if (isa_get_logicalid(dev) != 0) + return (ENXIO); + + rc = bus_set_resource(dev, SYS_RES_IOPORT, 0, AMDSB_PMIO_INDEX, + AMDSB_PMIO_WIDTH); + if (rc != 0) { + device_printf(dev, "bus_set_resource for IO failed\n"); + return (ENXIO); + } + rid = 0; + res = bus_alloc_resource(dev, SYS_RES_IOPORT, &rid, 0ul, ~0ul, + AMDSB_PMIO_WIDTH, RF_ACTIVE | RF_SHAREABLE); + if (res == NULL) { + device_printf(dev, "bus_alloc_resource for IO failed\n"); + return (ENXIO); + } + + /* Find base address of memory mapped WDT registers. */ + for (addr = 0, i = 0; i < 4; i++) { + addr <<= 8; + addr |= pmio_read(res, AMDSB8_PM_WDT_EN + 3 - i); + } + addr &= 0xFFFFF000; + device_printf(dev, "Address on reg 0x24 is 0x%x/%u\n", addr, addr); + + bus_release_resource(dev, SYS_RES_IOPORT, rid, res); + bus_delete_resource(dev, SYS_RES_IOPORT, rid); + + rc = bus_set_resource(dev, SYS_RES_MEMORY, 0, addr + GPIO_SPACE_OFFSET, + GPIO_SPACE_SIZE); + if (rc != 0) { + device_printf(dev, "bus_set_resource for memory failed\n"); + return (ENXIO); + } + + return (0); +} + +static int +gpioapu_attach(device_t dev) +{ + struct gpioapu_softc *sc; + + sc = device_get_softc(dev); + sc->dev = dev; + sc->rid_ctrl = 0; + + sc->res_ctrl = bus_alloc_resource(dev, SYS_RES_MEMORY, &sc->rid_ctrl, 0ul, ~0ul, + GPIO_SPACE_SIZE, RF_ACTIVE | RF_SHAREABLE); + if (sc->res_ctrl == NULL) { + device_printf(dev, "bus_alloc_resource for memory failed\n"); + return (ENXIO); + } + + sc->dev = dev; + sc->cdev = make_dev(&gpioapu_cdevsw, 0, + UID_ROOT, GID_WHEEL, 0600, "gpioapu"); + + sc->cdev->si_drv1 = sc; + + return (0); + +} + +static int +gpioapu_detach(device_t dev) +{ + struct gpioapu_softc *sc; + + sc = device_get_softc(dev); + + if (sc->res_ctrl != NULL) { + bus_release_resource(dev, SYS_RES_MEMORY, sc->rid_ctrl, + sc->res_ctrl); + bus_delete_resource(dev, SYS_RES_MEMORY, sc->rid_ctrl); + } + + destroy_dev(sc->cdev); + + return (0); +} diff --git a/sys/dev/ichwd/ichwd.c b/sys/dev/ichwd/ichwd.c index 9f8704f..73fcd7e 100644 --- a/sys/dev/ichwd/ichwd.c +++ b/sys/dev/ichwd/ichwd.c @@ -53,6 +53,7 @@ * (document no. 252516-001) sections 9.10 and 9.11. * * ICH6/7/8 support by Takeharu KATO <takeharu1219@ybb.ne.jp> + * SoC PMC support by Denir Li <denir.li@cas-well.com> */ #include <sys/cdefs.h> @@ -74,161 +75,216 @@ __FBSDID("$FreeBSD$"); #include <dev/ichwd/ichwd.h> static struct ichwd_device ichwd_devices[] = { - { DEVICEID_82801AA, "Intel 82801AA watchdog timer", 1 }, - { DEVICEID_82801AB, "Intel 82801AB watchdog timer", 1 }, - { DEVICEID_82801BA, "Intel 82801BA watchdog timer", 2 }, - { DEVICEID_82801BAM, "Intel 82801BAM watchdog timer", 2 }, - { DEVICEID_82801CA, "Intel 82801CA watchdog timer", 3 }, - { DEVICEID_82801CAM, "Intel 82801CAM watchdog timer", 3 }, - { DEVICEID_82801DB, "Intel 82801DB watchdog timer", 4 }, - { DEVICEID_82801DBM, "Intel 82801DBM watchdog timer", 4 }, - { DEVICEID_82801E, "Intel 82801E watchdog timer", 5 }, - { DEVICEID_82801EB, "Intel 82801EB watchdog timer", 5 }, - { DEVICEID_82801EBR, "Intel 82801EB/ER watchdog timer", 5 }, - { DEVICEID_6300ESB, "Intel 6300ESB watchdog timer", 5 }, - { DEVICEID_82801FBR, "Intel 82801FB/FR watchdog timer", 6 }, - { DEVICEID_ICH6M, "Intel ICH6M watchdog timer", 6 }, - { DEVICEID_ICH6W, "Intel ICH6W watchdog timer", 6 }, - { DEVICEID_ICH7, "Intel ICH7 watchdog timer", 7 }, - { DEVICEID_ICH7DH, "Intel ICH7DH watchdog timer", 7 }, - { DEVICEID_ICH7M, "Intel ICH7M watchdog timer", 7 }, - { DEVICEID_ICH7MDH, "Intel ICH7MDH watchdog timer", 7 }, - { DEVICEID_NM10, "Intel NM10 watchdog timer", 7 }, - { DEVICEID_ICH8, "Intel ICH8 watchdog timer", 8 }, - { DEVICEID_ICH8DH, "Intel ICH8DH watchdog timer", 8 }, - { DEVICEID_ICH8DO, "Intel ICH8DO watchdog timer", 8 }, - { DEVICEID_ICH8M, "Intel ICH8M watchdog timer", 8 }, - { DEVICEID_ICH8ME, "Intel ICH8M-E watchdog timer", 8 }, - { DEVICEID_63XXESB, "Intel 63XXESB watchdog timer", 8 }, - { DEVICEID_ICH9, "Intel ICH9 watchdog timer", 9 }, - { DEVICEID_ICH9DH, "Intel ICH9DH watchdog timer", 9 }, - { DEVICEID_ICH9DO, "Intel ICH9DO watchdog timer", 9 }, - { DEVICEID_ICH9M, "Intel ICH9M watchdog timer", 9 }, - { DEVICEID_ICH9ME, "Intel ICH9M-E watchdog timer", 9 }, - { DEVICEID_ICH9R, "Intel ICH9R watchdog timer", 9 }, - { DEVICEID_ICH10, "Intel ICH10 watchdog timer", 10 }, - { DEVICEID_ICH10D, "Intel ICH10D watchdog timer", 10 }, - { DEVICEID_ICH10DO, "Intel ICH10DO watchdog timer", 10 }, - { DEVICEID_ICH10R, "Intel ICH10R watchdog timer", 10 }, - { DEVICEID_PCH, "Intel PCH watchdog timer", 10 }, - { DEVICEID_PCHM, "Intel PCH watchdog timer", 10 }, - { DEVICEID_P55, "Intel P55 watchdog timer", 10 }, - { DEVICEID_PM55, "Intel PM55 watchdog timer", 10 }, - { DEVICEID_H55, "Intel H55 watchdog timer", 10 }, - { DEVICEID_QM57, "Intel QM57 watchdog timer", 10 }, - { DEVICEID_H57, "Intel H57 watchdog timer", 10 }, - { DEVICEID_HM55, "Intel HM55 watchdog timer", 10 }, - { DEVICEID_Q57, "Intel Q57 watchdog timer", 10 }, - { DEVICEID_HM57, "Intel HM57 watchdog timer", 10 }, - { DEVICEID_PCHMSFF, "Intel PCHMSFF watchdog timer", 10 }, - { DEVICEID_QS57, "Intel QS57 watchdog timer", 10 }, - { DEVICEID_3400, "Intel 3400 watchdog timer", 10 }, - { DEVICEID_3420, "Intel 3420 watchdog timer", 10 }, - { DEVICEID_3450, "Intel 3450 watchdog timer", 10 }, - { DEVICEID_CPT0, "Intel Cougar Point watchdog timer", 10 }, - { DEVICEID_CPT1, "Intel Cougar Point watchdog timer", 10 }, - { DEVICEID_CPT2, "Intel Cougar Point watchdog timer", 10 }, - { DEVICEID_CPT3, "Intel Cougar Point watchdog timer", 10 }, - { DEVICEID_CPT4, "Intel Cougar Point watchdog timer", 10 }, - { DEVICEID_CPT5, "Intel Cougar Point watchdog timer", 10 }, - { DEVICEID_CPT6, "Intel Cougar Point watchdog timer", 10 }, - { DEVICEID_CPT7, "Intel Cougar Point watchdog timer", 10 }, - { DEVICEID_CPT8, "Intel Cougar Point watchdog timer", 10 }, - { DEVICEID_CPT9, "Intel Cougar Point watchdog timer", 10 }, - { DEVICEID_CPT10, "Intel Cougar Point watchdog timer", 10 }, - { DEVICEID_CPT11, "Intel Cougar Point watchdog timer", 10 }, - { DEVICEID_CPT12, "Intel Cougar Point watchdog timer", 10 }, - { DEVICEID_CPT13, "Intel Cougar Point watchdog timer", 10 }, - { DEVICEID_CPT14, "Intel Cougar Point watchdog timer", 10 }, - { DEVICEID_CPT15, "Intel Cougar Point watchdog timer", 10 }, - { DEVICEID_CPT16, "Intel Cougar Point watchdog timer", 10 }, - { DEVICEID_CPT17, "Intel Cougar Point watchdog timer", 10 }, - { DEVICEID_CPT18, "Intel Cougar Point watchdog timer", 10 }, - { DEVICEID_CPT19, "Intel Cougar Point watchdog timer", 10 }, - { DEVICEID_CPT20, "Intel Cougar Point watchdog timer", 10 }, - { DEVICEID_CPT21, "Intel Cougar Point watchdog timer", 10 }, - { DEVICEID_CPT22, "Intel Cougar Point watchdog timer", 10 }, - { DEVICEID_CPT23, "Intel Cougar Point watchdog timer", 10 }, - { DEVICEID_CPT23, "Intel Cougar Point watchdog timer", 10 }, - { DEVICEID_CPT25, "Intel Cougar Point watchdog timer", 10 }, - { DEVICEID_CPT26, "Intel Cougar Point watchdog timer", 10 }, - { DEVICEID_CPT27, "Intel Cougar Point watchdog timer", 10 }, - { DEVICEID_CPT28, "Intel Cougar Point watchdog timer", 10 }, - { DEVICEID_CPT29, "Intel Cougar Point watchdog timer", 10 }, - { DEVICEID_CPT30, "Intel Cougar Point watchdog timer", 10 }, - { DEVICEID_CPT31, "Intel Cougar Point watchdog timer", 10 }, - { DEVICEID_PATSBURG_LPC1, "Intel Patsburg watchdog timer", 10 }, - { DEVICEID_PATSBURG_LPC2, "Intel Patsburg watchdog timer", 10 }, - { DEVICEID_PPT0, "Intel Panther Point watchdog timer", 10 }, - { DEVICEID_PPT1, "Intel Panther Point watchdog timer", 10 }, - { DEVICEID_PPT2, "Intel Panther Point watchdog timer", 10 }, - { DEVICEID_PPT3, "Intel Panther Point watchdog timer", 10 }, - { DEVICEID_PPT4, "Intel Panther Point watchdog timer", 10 }, - { DEVICEID_PPT5, "Intel Panther Point watchdog timer", 10 }, - { DEVICEID_PPT6, "Intel Panther Point watchdog timer", 10 }, - { DEVICEID_PPT7, "Intel Panther Point watchdog timer", 10 }, - { DEVICEID_PPT8, "Intel Panther Point watchdog timer", 10 }, - { DEVICEID_PPT9, "Intel Panther Point watchdog timer", 10 }, - { DEVICEID_PPT10, "Intel Panther Point watchdog timer", 10 }, - { DEVICEID_PPT11, "Intel Panther Point watchdog timer", 10 }, - { DEVICEID_PPT12, "Intel Panther Point watchdog timer", 10 }, - { DEVICEID_PPT13, "Intel Panther Point watchdog timer", 10 }, - { DEVICEID_PPT14, "Intel Panther Point watchdog timer", 10 }, - { DEVICEID_PPT15, "Intel Panther Point watchdog timer", 10 }, - { DEVICEID_PPT16, "Intel Panther Point watchdog timer", 10 }, - { DEVICEID_PPT17, "Intel Panther Point watchdog timer", 10 }, - { DEVICEID_PPT18, "Intel Panther Point watchdog timer", 10 }, - { DEVICEID_PPT19, "Intel Panther Point watchdog timer", 10 }, - { DEVICEID_PPT20, "Intel Panther Point watchdog timer", 10 }, - { DEVICEID_PPT21, "Intel Panther Point watchdog timer", 10 }, - { DEVICEID_PPT22, "Intel Panther Point watchdog timer", 10 }, - { DEVICEID_PPT23, "Intel Panther Point watchdog timer", 10 }, - { DEVICEID_PPT24, "Intel Panther Point watchdog timer", 10 }, - { DEVICEID_PPT25, "Intel Panther Point watchdog timer", 10 }, - { DEVICEID_PPT26, "Intel Panther Point watchdog timer", 10 }, - { DEVICEID_PPT27, "Intel Panther Point watchdog timer", 10 }, - { DEVICEID_PPT28, "Intel Panther Point watchdog timer", 10 }, - { DEVICEID_PPT29, "Intel Panther Point watchdog timer", 10 }, - { DEVICEID_PPT30, "Intel Panther Point watchdog timer", 10 }, - { DEVICEID_PPT31, "Intel Panther Point watchdog timer", 10 }, - { DEVICEID_LPT0, "Intel Lynx Point watchdog timer", 10 }, - { DEVICEID_LPT1, "Intel Lynx Point watchdog timer", 10 }, - { DEVICEID_LPT2, "Intel Lynx Point watchdog timer", 10 }, - { DEVICEID_LPT3, "Intel Lynx Point watchdog timer", 10 }, - { DEVICEID_LPT4, "Intel Lynx Point watchdog timer", 10 }, - { DEVICEID_LPT5, "Intel Lynx Point watchdog timer", 10 }, - { DEVICEID_LPT6, "Intel Lynx Point watchdog timer", 10 }, - { DEVICEID_LPT7, "Intel Lynx Point watchdog timer", 10 }, - { DEVICEID_LPT8, "Intel Lynx Point watchdog timer", 10 }, - { DEVICEID_LPT9, "Intel Lynx Point watchdog timer", 10 }, - { DEVICEID_LPT10, "Intel Lynx Point watchdog timer", 10 }, - { DEVICEID_LPT11, "Intel Lynx Point watchdog timer", 10 }, - { DEVICEID_LPT12, "Intel Lynx Point watchdog timer", 10 }, - { DEVICEID_LPT13, "Intel Lynx Point watchdog timer", 10 }, - { DEVICEID_LPT14, "Intel Lynx Point watchdog timer", 10 }, - { DEVICEID_LPT15, "Intel Lynx Point watchdog timer", 10 }, - { DEVICEID_LPT16, "Intel Lynx Point watchdog timer", 10 }, - { DEVICEID_LPT17, "Intel Lynx Point watchdog timer", 10 }, - { DEVICEID_LPT18, "Intel Lynx Point watchdog timer", 10 }, - { DEVICEID_LPT19, "Intel Lynx Point watchdog timer", 10 }, - { DEVICEID_LPT20, "Intel Lynx Point watchdog timer", 10 }, - { DEVICEID_LPT21, "Intel Lynx Point watchdog timer", 10 }, - { DEVICEID_LPT22, "Intel Lynx Point watchdog timer", 10 }, - { DEVICEID_LPT23, "Intel Lynx Point watchdog timer", 10 }, - { DEVICEID_LPT24, "Intel Lynx Point watchdog timer", 10 }, - { DEVICEID_LPT25, "Intel Lynx Point watchdog timer", 10 }, - { DEVICEID_LPT26, "Intel Lynx Point watchdog timer", 10 }, - { DEVICEID_LPT27, "Intel Lynx Point watchdog timer", 10 }, - { DEVICEID_LPT28, "Intel Lynx Point watchdog timer", 10 }, - { DEVICEID_LPT29, "Intel Lynx Point watchdog timer", 10 }, - { DEVICEID_LPT30, "Intel Lynx Point watchdog timer", 10 }, - { DEVICEID_LPT31, "Intel Lynx Point watchdog timer", 10 }, - { DEVICEID_WCPT2, "Intel Wildcat Point watchdog timer", 10 }, - { DEVICEID_WCPT4, "Intel Wildcat Point watchdog timer", 10 }, - { DEVICEID_WCPT6, "Intel Wildcat Point watchdog timer", 10 }, - { DEVICEID_DH89XXCC_LPC, "Intel DH89xxCC watchdog timer", 10 }, - { DEVICEID_COLETOCRK_LPC, "Intel Coleto Creek watchdog timer", 10 }, - { 0, NULL, 0 }, + { DEVICEID_82801AA, "Intel 82801AA watchdog timer", 1, 1 }, + { DEVICEID_82801AB, "Intel 82801AB watchdog timer", 1, 1 }, + { DEVICEID_82801BA, "Intel 82801BA watchdog timer", 2, 1 }, + { DEVICEID_82801BAM, "Intel 82801BAM watchdog timer", 2, 1 }, + { DEVICEID_82801CA, "Intel 82801CA watchdog timer", 3, 1 }, + { DEVICEID_82801CAM, "Intel 82801CAM watchdog timer", 3, 1 }, + { DEVICEID_82801DB, "Intel 82801DB watchdog timer", 4, 1 }, + { DEVICEID_82801DBM, "Intel 82801DBM watchdog timer", 4, 1 }, + { DEVICEID_82801E, "Intel 82801E watchdog timer", 5, 1 }, + { DEVICEID_82801EB, "Intel 82801EB watchdog timer", 5, 1 }, + { DEVICEID_82801EBR, "Intel 82801EB/ER watchdog timer", 5, 1 }, + { DEVICEID_6300ESB, "Intel 6300ESB watchdog timer", 5, 1 }, + { DEVICEID_82801FBR, "Intel 82801FB/FR watchdog timer", 6, 2 }, + { DEVICEID_ICH6M, "Intel ICH6M watchdog timer", 6, 2 }, + { DEVICEID_ICH6W, "Intel ICH6W watchdog timer", 6, 2 }, + { DEVICEID_ICH7, "Intel ICH7 watchdog timer", 7, 2 }, + { DEVICEID_ICH7DH, "Intel ICH7DH watchdog timer", 7, 2 }, + { DEVICEID_ICH7M, "Intel ICH7M watchdog timer", 7, 2 }, + { DEVICEID_ICH7MDH, "Intel ICH7MDH watchdog timer", 7, 2 }, + { DEVICEID_NM10, "Intel NM10 watchdog timer", 7, 2 }, + { DEVICEID_ICH8, "Intel ICH8 watchdog timer", 8, 2 }, + { DEVICEID_ICH8DH, "Intel ICH8DH watchdog timer", 8, 2 }, + { DEVICEID_ICH8DO, "Intel ICH8DO watchdog timer", 8, 2 }, + { DEVICEID_ICH8M, "Intel ICH8M watchdog timer", 8, 2 }, + { DEVICEID_ICH8ME, "Intel ICH8M-E watchdog timer", 8, 2 }, + { DEVICEID_63XXESB, "Intel 63XXESB watchdog timer", 8, 2 }, + { DEVICEID_ICH9, "Intel ICH9 watchdog timer", 9, 2 }, + { DEVICEID_ICH9DH, "Intel ICH9DH watchdog timer", 9, 2 }, + { DEVICEID_ICH9DO, "Intel ICH9DO watchdog timer", 9, 2 }, + { DEVICEID_ICH9M, "Intel ICH9M watchdog timer", 9, 2 }, + { DEVICEID_ICH9ME, "Intel ICH9M-E watchdog timer", 9, 2 }, + { DEVICEID_ICH9R, "Intel ICH9R watchdog timer", 9, 2 }, + { DEVICEID_ICH10, "Intel ICH10 watchdog timer", 10, 2 }, + { DEVICEID_ICH10D, "Intel ICH10D watchdog timer", 10, 2 }, + { DEVICEID_ICH10DO, "Intel ICH10DO watchdog timer", 10, 2 }, + { DEVICEID_ICH10R, "Intel ICH10R watchdog timer", 10, 2 }, + { DEVICEID_PCH, "Intel PCH watchdog timer", 10, 2 }, + { DEVICEID_PCHM, "Intel PCH watchdog timer", 10, 2 }, + { DEVICEID_P55, "Intel P55 watchdog timer", 10, 2 }, + { DEVICEID_PM55, "Intel PM55 watchdog timer", 10, 2 }, + { DEVICEID_H55, "Intel H55 watchdog timer", 10, 2 }, + { DEVICEID_QM57, "Intel QM57 watchdog timer", 10, 2 }, + { DEVICEID_H57, "Intel H57 watchdog timer", 10, 2 }, + { DEVICEID_HM55, "Intel HM55 watchdog timer", 10, 2 }, + { DEVICEID_Q57, "Intel Q57 watchdog timer", 10, 2 }, + { DEVICEID_HM57, "Intel HM57 watchdog timer", 10, 2 }, + { DEVICEID_PCHMSFF, "Intel PCHMSFF watchdog timer", 10, 2 }, + { DEVICEID_QS57, "Intel QS57 watchdog timer", 10, 2 }, + { DEVICEID_3400, "Intel 3400 watchdog timer", 10, 2 }, + { DEVICEID_3420, "Intel 3420 watchdog timer", 10, 2 }, + { DEVICEID_3450, "Intel 3450 watchdog timer", 10, 2 }, + { DEVICEID_CPT0, "Intel Cougar Point watchdog timer", 10, 2 }, + { DEVICEID_CPT1, "Intel Cougar Point watchdog timer", 10, 2 }, + { DEVICEID_CPT2, "Intel Cougar Point watchdog timer", 10, 2 }, + { DEVICEID_CPT3, "Intel Cougar Point watchdog timer", 10, 2 }, + { DEVICEID_CPT4, "Intel Cougar Point watchdog timer", 10, 2 }, + { DEVICEID_CPT5, "Intel Cougar Point watchdog timer", 10, 2 }, + { DEVICEID_CPT6, "Intel Cougar Point watchdog timer", 10, 2 }, + { DEVICEID_CPT7, "Intel Cougar Point watchdog timer", 10, 2 }, + { DEVICEID_CPT8, "Intel Cougar Point watchdog timer", 10, 2 }, + { DEVICEID_CPT9, "Intel Cougar Point watchdog timer", 10, 2 }, + { DEVICEID_CPT10, "Intel Cougar Point watchdog timer", 10, 2 }, + { DEVICEID_CPT11, "Intel Cougar Point watchdog timer", 10, 2 }, + { DEVICEID_CPT12, "Intel Cougar Point watchdog timer", 10, 2 }, + { DEVICEID_CPT13, "Intel Cougar Point watchdog timer", 10, 2 }, + { DEVICEID_CPT14, "Intel Cougar Point watchdog timer", 10, 2 }, + { DEVICEID_CPT15, "Intel Cougar Point watchdog timer", 10, 2 }, + { DEVICEID_CPT16, "Intel Cougar Point watchdog timer", 10, 2 }, + { DEVICEID_CPT17, "Intel Cougar Point watchdog timer", 10, 2 }, + { DEVICEID_CPT18, "Intel Cougar Point watchdog timer", 10, 2 }, + { DEVICEID_CPT19, "Intel Cougar Point watchdog timer", 10, 2 }, + { DEVICEID_CPT20, "Intel Cougar Point watchdog timer", 10, 2 }, + { DEVICEID_CPT21, "Intel Cougar Point watchdog timer", 10, 2 }, + { DEVICEID_CPT22, "Intel Cougar Point watchdog timer", 10, 2 }, + { DEVICEID_CPT23, "Intel Cougar Point watchdog timer", 10, 2 }, + { DEVICEID_CPT24, "Intel Cougar Point watchdog timer", 10, 2 }, + { DEVICEID_CPT25, "Intel Cougar Point watchdog timer", 10, 2 }, + { DEVICEID_CPT26, "Intel Cougar Point watchdog timer", 10, 2 }, + { DEVICEID_CPT27, "Intel Cougar Point watchdog timer", 10, 2 }, + { DEVICEID_CPT28, "Intel Cougar Point watchdog timer", 10, 2 }, + { DEVICEID_CPT29, "Intel Cougar Point watchdog timer", 10, 2 }, + { DEVICEID_CPT30, "Intel Cougar Point watchdog timer", 10, 2 }, + { DEVICEID_CPT31, "Intel Cougar Point watchdog timer", 10, 2 }, + { DEVICEID_PATSBURG_LPC1, "Intel Patsburg watchdog timer", 10, 2 }, + { DEVICEID_PATSBURG_LPC2, "Intel Patsburg watchdog timer", 10, 2 }, + { DEVICEID_PPT0, "Intel Panther Point watchdog timer", 10, 2 }, + { DEVICEID_PPT1, "Intel Panther Point watchdog timer", 10, 2 }, + { DEVICEID_PPT2, "Intel Panther Point watchdog timer", 10, 2 }, + { DEVICEID_PPT3, "Intel Panther Point watchdog timer", 10, 2 }, + { DEVICEID_PPT4, "Intel Panther Point watchdog timer", 10, 2 }, + { DEVICEID_PPT5, "Intel Panther Point watchdog timer", 10, 2 }, + { DEVICEID_PPT6, "Intel Panther Point watchdog timer", 10, 2 }, + { DEVICEID_PPT7, "Intel Panther Point watchdog timer", 10, 2 }, + { DEVICEID_PPT8, "Intel Panther Point watchdog timer", 10, 2 }, + { DEVICEID_PPT9, "Intel Panther Point watchdog timer", 10, 2 }, + { DEVICEID_PPT10, "Intel Panther Point watchdog timer", 10, 2 }, + { DEVICEID_PPT11, "Intel Panther Point watchdog timer", 10, 2 }, + { DEVICEID_PPT12, "Intel Panther Point watchdog timer", 10, 2 }, + { DEVICEID_PPT13, "Intel Panther Point watchdog timer", 10, 2 }, + { DEVICEID_PPT14, "Intel Panther Point watchdog timer", 10, 2 }, + { DEVICEID_PPT15, "Intel Panther Point watchdog timer", 10, 2 }, + { DEVICEID_PPT16, "Intel Panther Point watchdog timer", 10, 2 }, + { DEVICEID_PPT17, "Intel Panther Point watchdog timer", 10, 2 }, + { DEVICEID_PPT18, "Intel Panther Point watchdog timer", 10, 2 }, + { DEVICEID_PPT19, "Intel Panther Point watchdog timer", 10, 2 }, + { DEVICEID_PPT20, "Intel Panther Point watchdog timer", 10, 2 }, + { DEVICEID_PPT21, "Intel Panther Point watchdog timer", 10, 2 }, + { DEVICEID_PPT22, "Intel Panther Point watchdog timer", 10, 2 }, + { DEVICEID_PPT23, "Intel Panther Point watchdog timer", 10, 2 }, + { DEVICEID_PPT24, "Intel Panther Point watchdog timer", 10, 2 }, + { DEVICEID_PPT25, "Intel Panther Point watchdog timer", 10, 2 }, + { DEVICEID_PPT26, "Intel Panther Point watchdog timer", 10, 2 }, + { DEVICEID_PPT27, "Intel Panther Point watchdog timer", 10, 2 }, + { DEVICEID_PPT28, "Intel Panther Point watchdog timer", 10, 2 }, + { DEVICEID_PPT29, "Intel Panther Point watchdog timer", 10, 2 }, + { DEVICEID_PPT30, "Intel Panther Point watchdog timer", 10, 2 }, + { DEVICEID_PPT31, "Intel Panther Point watchdog timer", 10, 2 }, + { DEVICEID_LPT0, "Intel Lynx Point watchdog timer", 10, 2 }, + { DEVICEID_LPT1, "Intel Lynx Point watchdog timer", 10, 2 }, + { DEVICEID_LPT2, "Intel Lynx Point watchdog timer", 10, 2 }, + { DEVICEID_LPT3, "Intel Lynx Point watchdog timer", 10, 2 }, + { DEVICEID_LPT4, "Intel Lynx Point watchdog timer", 10, 2 }, + { DEVICEID_LPT5, "Intel Lynx Point watchdog timer", 10, 2 }, + { DEVICEID_LPT6, "Intel Lynx Point watchdog timer", 10, 2 }, + { DEVICEID_LPT7, "Intel Lynx Point watchdog timer", 10, 2 }, + { DEVICEID_LPT8, "Intel Lynx Point watchdog timer", 10, 2 }, + { DEVICEID_LPT9, "Intel Lynx Point watchdog timer", 10, 2 }, + { DEVICEID_LPT10, "Intel Lynx Point watchdog timer", 10, 2 }, + { DEVICEID_LPT11, "Intel Lynx Point watchdog timer", 10, 2 }, + { DEVICEID_LPT12, "Intel Lynx Point watchdog timer", 10, 2 }, + { DEVICEID_LPT13, "Intel Lynx Point watchdog timer", 10, 2 }, + { DEVICEID_LPT14, "Intel Lynx Point watchdog timer", 10, 2 }, + { DEVICEID_LPT15, "Intel Lynx Point watchdog timer", 10, 2 }, + { DEVICEID_LPT16, "Intel Lynx Point watchdog timer", 10, 2 }, + { DEVICEID_LPT17, "Intel Lynx Point watchdog timer", 10, 2 }, + { DEVICEID_LPT18, "Intel Lynx Point watchdog timer", 10, 2 }, + { DEVICEID_LPT19, "Intel Lynx Point watchdog timer", 10, 2 }, + { DEVICEID_LPT20, "Intel Lynx Point watchdog timer", 10, 2 }, + { DEVICEID_LPT21, "Intel Lynx Point watchdog timer", 10, 2 }, + { DEVICEID_LPT22, "Intel Lynx Point watchdog timer", 10, 2 }, + { DEVICEID_LPT23, "Intel Lynx Point watchdog timer", 10, 2 }, + { DEVICEID_LPT24, "Intel Lynx Point watchdog timer", 10, 2 }, + { DEVICEID_LPT25, "Intel Lynx Point watchdog timer", 10, 2 }, + { DEVICEID_LPT26, "Intel Lynx Point watchdog timer", 10, 2 }, + { DEVICEID_LPT27, "Intel Lynx Point watchdog timer", 10, 2 }, + { DEVICEID_LPT28, "Intel Lynx Point watchdog timer", 10, 2 }, + { DEVICEID_LPT29, "Intel Lynx Point watchdog timer", 10, 2 }, + { DEVICEID_LPT30, "Intel Lynx Point watchdog timer", 10, 2 }, + { DEVICEID_LPT31, "Intel Lynx Point watchdog timer", 10, 2 }, + { DEVICEID_WCPT1, "Intel Wildcat Point watchdog timer", 10, 2 }, + { DEVICEID_WCPT2, "Intel Wildcat Point watchdog timer", 10, 2 }, + { DEVICEID_WCPT3, "Intel Wildcat Point watchdog timer", 10, 2 }, + { DEVICEID_WCPT4, "Intel Wildcat Point watchdog timer", 10, 2 }, + { DEVICEID_WCPT6, "Intel Wildcat Point watchdog timer", 10, 2 }, + { DEVICEID_WBG0, "Intel Wellsburg watchdog timer", 10, 2 }, + { DEVICEID_WBG1, "Intel Wellsburg watchdog timer", 10, 2 }, + { DEVICEID_WBG2, "Intel Wellsburg watchdog timer", 10, 2 }, + { DEVICEID_WBG3, "Intel Wellsburg watchdog timer", 10, 2 }, + { DEVICEID_WBG4, "Intel Wellsburg watchdog timer", 10, 2 }, + { DEVICEID_WBG5, "Intel Wellsburg watchdog timer", 10, 2 }, + { DEVICEID_WBG6, "Intel Wellsburg watchdog timer", 10, 2 }, + { DEVICEID_WBG7, "Intel Wellsburg watchdog timer", 10, 2 }, + { DEVICEID_WBG8, "Intel Wellsburg watchdog timer", 10, 2 }, + { DEVICEID_WBG9, "Intel Wellsburg watchdog timer", 10, 2 }, + { DEVICEID_WBG10, "Intel Wellsburg watchdog timer", 10, 2 }, + { DEVICEID_WBG11, "Intel Wellsburg watchdog timer", 10, 2 }, + { DEVICEID_WBG12, "Intel Wellsburg watchdog timer", 10, 2 }, + { DEVICEID_WBG13, "Intel Wellsburg watchdog timer", 10, 2 }, + { DEVICEID_WBG14, "Intel Wellsburg watchdog timer", 10, 2 }, + { DEVICEID_WBG15, "Intel Wellsburg watchdog timer", 10, 2 }, + { DEVICEID_WBG16, "Intel Wellsburg watchdog timer", 10, 2 }, + { DEVICEID_WBG17, "Intel Wellsburg watchdog timer", 10, 2 }, + { DEVICEID_WBG18, "Intel Wellsburg watchdog timer", 10, 2 }, + { DEVICEID_WBG19, "Intel Wellsburg watchdog timer", 10, 2 }, + { DEVICEID_WBG20, "Intel Wellsburg watchdog timer", 10, 2 }, + { DEVICEID_WBG21, "Intel Wellsburg watchdog timer", 10, 2 }, + { DEVICEID_WBG22, "Intel Wellsburg watchdog timer", 10, 2 }, + { DEVICEID_WBG23, "Intel Wellsburg watchdog timer", 10, 2 }, + { DEVICEID_WBG24, "Intel Wellsburg watchdog timer", 10, 2 }, + { DEVICEID_WBG25, "Intel Wellsburg watchdog timer", 10, 2 }, + { DEVICEID_WBG26, "Intel Wellsburg watchdog timer", 10, 2 }, + { DEVICEID_WBG27, "Intel Wellsburg watchdog timer", 10, 2 }, + { DEVICEID_WBG28, "Intel Wellsburg watchdog timer", 10, 2 }, + { DEVICEID_WBG29, "Intel Wellsburg watchdog timer", 10, 2 }, + { DEVICEID_WBG30, "Intel Wellsburg watchdog timer", 10, 2 }, + { DEVICEID_WBG31, "Intel Wellsburg watchdog timer", 10, 2 }, + { DEVICEID_LPT_LP0, "Intel Lynx Point-LP watchdog timer", 10, 2 }, + { DEVICEID_LPT_LP1, "Intel Lynx Point-LP watchdog timer", 10, 2 }, + { DEVICEID_LPT_LP2, "Intel Lynx Point-LP watchdog timer", 10, 2 }, + { DEVICEID_LPT_LP3, "Intel Lynx Point-LP watchdog timer", 10, 2 }, + { DEVICEID_LPT_LP4, "Intel Lynx Point-LP watchdog timer", 10, 2 }, + { DEVICEID_LPT_LP5, "Intel Lynx Point-LP watchdog timer", 10, 2 }, + { DEVICEID_LPT_LP6, "Intel Lynx Point-LP watchdog timer", 10, 2 }, + { DEVICEID_LPT_LP7, "Intel Lynx Point-LP watchdog timer", 10, 2 }, + { DEVICEID_WCPT_LP1, "Intel Wildcat Point-LP watchdog timer", 10, 2 }, + { DEVICEID_WCPT_LP2, "Intel Wildcat Point-LP watchdog timer", 10, 2 }, + { DEVICEID_WCPT_LP3, "Intel Wildcat Point-LP watchdog timer", 10, 2 }, + { DEVICEID_WCPT_LP5, "Intel Wildcat Point-LP watchdog timer", 10, 2 }, + { DEVICEID_WCPT_LP6, "Intel Wildcat Point-LP watchdog timer", 10, 2 }, + { DEVICEID_WCPT_LP7, "Intel Wildcat Point-LP watchdog timer", 10, 2 }, + { DEVICEID_WCPT_LP9, "Intel Wildcat Point-LP watchdog timer", 10, 2 }, + { DEVICEID_DH89XXCC_LPC, "Intel DH89xxCC watchdog timer", 10, 2 }, + { DEVICEID_COLETOCRK_LPC, "Intel Coleto Creek watchdog timer", 10, 2 }, + { DEVICEID_AVN0, "Intel Avoton/Rangeley SoC watchdog timer",10, 3 }, + { DEVICEID_AVN1, "Intel Avoton/Rangeley SoC watchdog timer",10, 3 }, + { DEVICEID_AVN2, "Intel Avoton/Rangeley SoC watchdog timer",10, 3 }, + { DEVICEID_AVN3, "Intel Avoton/Rangeley SoC watchdog timer",10, 3 }, + { DEVICEID_BAYTRAIL, "Intel Bay Trail SoC watchdog timer", 10, 3 }, + { DEVICEID_BRASWELL, "Intel Braswell SoC watchdog timer", 10, 3 }, + { 0, NULL, 0, 0 }, }; static devclass_t ichwd_devclass; @@ -241,7 +297,7 @@ static devclass_t ichwd_devclass; bus_read_4((sc)->tco_res, (off)) #define ichwd_read_smi_4(sc, off) \ bus_read_4((sc)->smi_res, (off)) -#define ichwd_read_gcs_4(sc, off) \ +#define ichwd_read_gcs_pmc_4(sc, off) \ bus_read_4((sc)->gcs_res, (off)) #define ichwd_write_tco_1(sc, off, val) \ @@ -252,7 +308,7 @@ static devclass_t ichwd_devclass; bus_write_4((sc)->tco_res, (off), (val)) #define ichwd_write_smi_4(sc, off, val) \ bus_write_4((sc)->smi_res, (off), (val)) -#define ichwd_write_gcs_4(sc, off, val) \ +#define ichwd_write_gcs_pmc_4(sc, off, val) \ bus_write_4((sc)->gcs_res, (off), (val)) #define ichwd_verbose_printf(dev, ...) \ @@ -352,7 +408,7 @@ ichwd_tmr_disable(struct ichwd_softc *sc) static __inline void ichwd_tmr_reload(struct ichwd_softc *sc) { - if (sc->ich_version <= 5) + if (sc->tco_version == 1) ichwd_write_tco_1(sc, TCO_RLD, 1); else ichwd_write_tco_2(sc, TCO_RLD, 1); @@ -369,7 +425,7 @@ ichwd_tmr_set(struct ichwd_softc *sc, unsigned int timeout) if (timeout < TCO_RLD_TMR_MIN) timeout = TCO_RLD_TMR_MIN; - if (sc->ich_version <= 5) { + if (sc->tco_version == 1) { uint8_t tmr_val8 = ichwd_read_tco_1(sc, TCO_TMR1); tmr_val8 &= (~TCO_RLD1_TMR_MAX & 0xff); @@ -399,20 +455,40 @@ ichwd_clear_noreboot(struct ichwd_softc *sc) int rc = 0; /* try to clear the NO_REBOOT bit */ - if (sc->ich_version <= 5) { - status = pci_read_config(sc->ich, ICH_GEN_STA, 1); - status &= ~ICH_GEN_STA_NO_REBOOT; - pci_write_config(sc->ich, ICH_GEN_STA, status, 1); - status = pci_read_config(sc->ich, ICH_GEN_STA, 1); - if (status & ICH_GEN_STA_NO_REBOOT) - rc = EIO; - } else { - status = ichwd_read_gcs_4(sc, 0); - status &= ~ICH_GCS_NO_REBOOT; - ichwd_write_gcs_4(sc, 0, status); - status = ichwd_read_gcs_4(sc, 0); - if (status & ICH_GCS_NO_REBOOT) - rc = EIO; + switch (sc->tco_version) + { + case 1: + status = pci_read_config(sc->ich, ICH_GEN_STA, 1); + status &= ~ICH_GEN_STA_NO_REBOOT; + pci_write_config(sc->ich, ICH_GEN_STA, status, 1); + status = pci_read_config(sc->ich, ICH_GEN_STA, 1); + if (status & ICH_GEN_STA_NO_REBOOT) + rc = EIO; + break; + + case 2: + status = ichwd_read_gcs_pmc_4(sc, 0); + status &= ~ICH_GCS_NO_REBOOT; + ichwd_write_gcs_pmc_4(sc, 0, status); + status = ichwd_read_gcs_pmc_4(sc, 0); + if (status & ICH_GCS_NO_REBOOT) + rc = EIO; + break; + + case 3: + status = ichwd_read_gcs_pmc_4(sc, 0); + status &= ~ICH_PMC_NO_REBOOT; + ichwd_write_gcs_pmc_4(sc, 0, status); + status = ichwd_read_gcs_pmc_4(sc, 0); + if (status & ICH_PMC_NO_REBOOT) + rc = EIO; + break; + + default: + ichwd_verbose_printf(sc->device, + "Unknown TCO Version: %d, can't set NO_REBOOT.\n", + sc->tco_version); + break; } if (rc) @@ -463,7 +539,7 @@ ichwd_find_ich_lpc_bridge(struct ichwd_device **id_p) return (NULL); ichwd_verbose_printf(ich, "found ICH%d or equivalent chipset: %s\n", - id->version, id->desc); + id->ich_version, id->desc); if (id_p) *id_p = id; @@ -481,7 +557,7 @@ ichwd_identify(driver_t *driver, device_t parent) struct ichwd_device *id_p; device_t ich = NULL; device_t dev; - uint32_t rcba; + uint32_t base_address; int rc; ich = ichwd_find_ich_lpc_bridge(&id_p); @@ -497,14 +573,40 @@ ichwd_identify(driver_t *driver, device_t parent) device_set_desc_copy(dev, id_p->desc); - if (id_p->version >= 6) { - /* get RCBA (root complex base address) */ - rcba = pci_read_config(ich, ICH_RCBA, 4); - rc = bus_set_resource(ich, SYS_RES_MEMORY, 0, - (rcba & 0xffffc000) + ICH_GCS_OFFSET, ICH_GCS_SIZE); - if (rc) + switch (id_p->tco_version) + { + case 1: + break; + + case 2: + /* get RCBA (root complex base address) */ + base_address = pci_read_config(ich, ICH_RCBA, 4); + rc = bus_set_resource(ich, SYS_RES_MEMORY, 0, + (base_address & 0xffffc000) + ICH_GCS_OFFSET, + ICH_GCS_SIZE); + if (rc) + ichwd_verbose_printf(dev, + "Can not set TCO v%d memory resource for RCBA\n", + id_p->tco_version); + break; + + case 3: + /* get PBASE (Power Management Controller base address) */ + base_address = pci_read_config(ich, ICH_PBASE, 4); + rc = bus_set_resource(ich, SYS_RES_MEMORY, 0, + (base_address & 0xfffffe00) + ICH_PMC_OFFSET, + ICH_PMC_SIZE); + if (rc) + ichwd_verbose_printf(dev, + "Can not set TCO v%d memory resource for PBASE\n", + id_p->tco_version); + break; + + default: ichwd_verbose_printf(dev, - "Can not set memory resource for RCBA\n"); + "Can not set unknown TCO v%d memory resource for unknown base address\n", + id_p->tco_version); + break; } } @@ -535,7 +637,8 @@ ichwd_attach(device_t dev) goto fail; } sc->ich = ich; - sc->ich_version = id_p->version; + sc->ich_version = id_p->ich_version; + sc->tco_version = id_p->tco_version; /* get ACPI base address */ pmbase = pci_read_config(ich, ICH_PMBASE, 2) & ICH_PMBASE_MASK; @@ -564,7 +667,7 @@ ichwd_attach(device_t dev) } sc->gcs_rid = 0; - if (sc->ich_version >= 6) { + if (sc->tco_version >= 2) { sc->gcs_res = bus_alloc_resource_any(ich, SYS_RES_MEMORY, &sc->gcs_rid, RF_ACTIVE|RF_SHAREABLE); if (sc->gcs_res == NULL) { @@ -577,7 +680,7 @@ ichwd_attach(device_t dev) goto fail; ichwd_verbose_printf(dev, "%s (ICH%d or equivalent)\n", - device_get_desc(dev), sc->ich_version); + id_p->desc, sc->ich_version); /* * Determine if we are coming up after a watchdog-induced reset. Some diff --git a/sys/dev/ichwd/ichwd.h b/sys/dev/ichwd/ichwd.h index 0699279..dc24a9d 100644 --- a/sys/dev/ichwd/ichwd.h +++ b/sys/dev/ichwd/ichwd.h @@ -34,13 +34,15 @@ struct ichwd_device { uint16_t device; char *desc; - unsigned int version; + unsigned int ich_version; + unsigned int tco_version; }; struct ichwd_softc { device_t device; device_t ich; int ich_version; + int tco_version; int active; unsigned int timeout; @@ -59,6 +61,7 @@ struct ichwd_softc { }; #define VENDORID_INTEL 0x8086 +#define DEVICEID_BAYTRAIL 0x0f1c #define DEVICEID_CPT0 0x1c40 #define DEVICEID_CPT1 0x1c41 #define DEVICEID_CPT2 0x1c42 @@ -125,6 +128,11 @@ struct ichwd_softc { #define DEVICEID_PPT29 0x1e5d #define DEVICEID_PPT30 0x1e5e #define DEVICEID_PPT31 0x1e5f +#define DEVICEID_AVN0 0x1f38 +#define DEVICEID_AVN1 0x1f39 +#define DEVICEID_AVN2 0x1f3a +#define DEVICEID_AVN3 0x1f3b +#define DEVICEID_BRASWELL 0x229c #define DEVICEID_DH89XXCC_LPC 0x2310 #define DEVICEID_COLETOCRK_LPC 0x2390 #define DEVICEID_82801AA 0x2410 @@ -210,9 +218,58 @@ struct ichwd_softc { #define DEVICEID_LPT29 0x8c5d #define DEVICEID_LPT30 0x8c5e #define DEVICEID_LPT31 0x8c5f +#define DEVICEID_WCPT1 0x8cc1 #define DEVICEID_WCPT2 0x8cc2 +#define DEVICEID_WCPT3 0x8cc3 #define DEVICEID_WCPT4 0x8cc4 #define DEVICEID_WCPT6 0x8cc6 +#define DEVICEID_WBG0 0x8d40 +#define DEVICEID_WBG1 0x8d41 +#define DEVICEID_WBG2 0x8d42 +#define DEVICEID_WBG3 0x8d43 +#define DEVICEID_WBG4 0x8d44 +#define DEVICEID_WBG5 0x8d45 +#define DEVICEID_WBG6 0x8d46 +#define DEVICEID_WBG7 0x8d47 +#define DEVICEID_WBG8 0x8d48 +#define DEVICEID_WBG9 0x8d49 +#define DEVICEID_WBG10 0x8d4a +#define DEVICEID_WBG11 0x8d4b +#define DEVICEID_WBG12 0x8d4c +#define DEVICEID_WBG13 0x8d4d +#define DEVICEID_WBG14 0x8d4e +#define DEVICEID_WBG15 0x8d4f +#define DEVICEID_WBG16 0x8d50 +#define DEVICEID_WBG17 0x8d51 +#define DEVICEID_WBG18 0x8d52 +#define DEVICEID_WBG19 0x8d53 +#define DEVICEID_WBG20 0x8d54 +#define DEVICEID_WBG21 0x8d55 +#define DEVICEID_WBG22 0x8d56 +#define DEVICEID_WBG23 0x8d57 +#define DEVICEID_WBG24 0x8d58 +#define DEVICEID_WBG25 0x8d59 +#define DEVICEID_WBG26 0x8d5a +#define DEVICEID_WBG27 0x8d5b +#define DEVICEID_WBG28 0x8d5c +#define DEVICEID_WBG29 0x8d5d +#define DEVICEID_WBG30 0x8d5e +#define DEVICEID_WBG31 0x8d5f +#define DEVICEID_LPT_LP0 0x9c40 +#define DEVICEID_LPT_LP1 0x9c41 +#define DEVICEID_LPT_LP2 0x9c42 +#define DEVICEID_LPT_LP3 0x9c43 +#define DEVICEID_LPT_LP4 0x9c44 +#define DEVICEID_LPT_LP5 0x9c45 +#define DEVICEID_LPT_LP6 0x9c46 +#define DEVICEID_LPT_LP7 0x9c47 +#define DEVICEID_WCPT_LP1 0x9cc1 +#define DEVICEID_WCPT_LP2 0x9cc2 +#define DEVICEID_WCPT_LP3 0x9cc3 +#define DEVICEID_WCPT_LP5 0x9cc5 +#define DEVICEID_WCPT_LP6 0x9cc6 +#define DEVICEID_WCPT_LP7 0x9cc7 +#define DEVICEID_WCPT_LP9 0x9cc9 /* ICH LPC Interface Bridge Registers (ICH5 and older) */ #define ICH_GEN_STA 0xd4 @@ -226,6 +283,12 @@ struct ichwd_softc { #define ICH_GCS_SIZE 0x4 #define ICH_GCS_NO_REBOOT 0x20 +/* SoC Power Management Configuration Registers */ +#define ICH_PBASE 0x44 +#define ICH_PMC_OFFSET 0x08 +#define ICH_PMC_SIZE 0x4 +#define ICH_PMC_NO_REBOOT 0x10 + /* register names and locations (relative to PMBASE) */ #define SMI_BASE 0x30 /* base address for SMI registers */ #define SMI_LEN 0x08 diff --git a/sys/dev/ixgbe/if_ix.c b/sys/dev/ixgbe/if_ix.c index b06c485..d72833d 100644 --- a/sys/dev/ixgbe/if_ix.c +++ b/sys/dev/ixgbe/if_ix.c @@ -36,6 +36,9 @@ #ifndef IXGBE_STANDALONE_BUILD #include "opt_inet.h" #include "opt_inet6.h" +#ifdef RSS +#include "opt_rss.h" +#endif #endif #include "ixgbe.h" @@ -48,7 +51,7 @@ /********************************************************************* * Driver version *********************************************************************/ -char ixgbe_driver_version[] = "3.1.13-k"; +char ixgbe_driver_version[] = "3.1.14"; /********************************************************************* @@ -165,14 +168,12 @@ static void ixgbe_unregister_vlan(void *, struct ifnet *, u16); static void ixgbe_add_device_sysctls(struct adapter *); static void ixgbe_add_hw_stats(struct adapter *); -static int ixgbe_set_flowcntl(struct adapter *, int); -static int ixgbe_set_advertise(struct adapter *, int); /* Sysctl handlers */ static void ixgbe_set_sysctl_value(struct adapter *, const char *, const char *, int *, int); -static int ixgbe_sysctl_flowcntl(SYSCTL_HANDLER_ARGS); -static int ixgbe_sysctl_advertise(SYSCTL_HANDLER_ARGS); +static int ixgbe_set_flowcntl(SYSCTL_HANDLER_ARGS); +static int ixgbe_set_advertise(SYSCTL_HANDLER_ARGS); static int ixgbe_sysctl_thermal_test(SYSCTL_HANDLER_ARGS); static int ixgbe_sysctl_dmac(SYSCTL_HANDLER_ARGS); static int ixgbe_sysctl_phy_temp(SYSCTL_HANDLER_ARGS); @@ -183,11 +184,6 @@ static int ixgbe_sysctl_print_rss_config(SYSCTL_HANDLER_ARGS); #endif static int ixgbe_sysctl_wol_enable(SYSCTL_HANDLER_ARGS); static int ixgbe_sysctl_wufc(SYSCTL_HANDLER_ARGS); -static int ixgbe_sysctl_eee_enable(SYSCTL_HANDLER_ARGS); -static int ixgbe_sysctl_eee_negotiated(SYSCTL_HANDLER_ARGS); -static int ixgbe_sysctl_eee_rx_lpi_status(SYSCTL_HANDLER_ARGS); -static int ixgbe_sysctl_eee_tx_lpi_status(SYSCTL_HANDLER_ARGS); -static int ixgbe_sysctl_eee_tx_lpi_delay(SYSCTL_HANDLER_ARGS); /* Support for pluggable optic modules */ static bool ixgbe_sfp_probe(struct adapter *); @@ -211,6 +207,7 @@ static void ixgbe_handle_phy(void *, int); static void ixgbe_reinit_fdir(void *, int); #endif + #ifdef PCI_IOV static void ixgbe_ping_all_vfs(struct adapter *); static void ixgbe_handle_mbx(void *, int); @@ -270,18 +267,16 @@ static SYSCTL_NODE(_hw, OID_AUTO, ix, CTLFLAG_RD, 0, ** traffic for that interrupt vector */ static int ixgbe_enable_aim = TRUE; -TUNABLE_INT("hw.ix.enable_aim", &ixgbe_enable_aim); SYSCTL_INT(_hw_ix, OID_AUTO, enable_aim, CTLFLAG_RWTUN, &ixgbe_enable_aim, 0, "Enable adaptive interrupt moderation"); static int ixgbe_max_interrupt_rate = (4000000 / IXGBE_LOW_LATENCY); -TUNABLE_INT("hw.ix.max_interrupt_rate", &ixgbe_max_interrupt_rate); SYSCTL_INT(_hw_ix, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN, &ixgbe_max_interrupt_rate, 0, "Maximum interrupts per second"); /* How many packets rxeof tries to clean at a time */ static int ixgbe_rx_process_limit = 256; -TUNABLE_INT("hw.ix.rx_process_limit", &ixgbe_rx_process_limit); +TUNABLE_INT("hw.ixgbe.rx_process_limit", &ixgbe_rx_process_limit); SYSCTL_INT(_hw_ix, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN, &ixgbe_rx_process_limit, 0, "Maximum number of received packets to process at a time," @@ -289,22 +284,12 @@ SYSCTL_INT(_hw_ix, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN, /* How many packets txeof tries to clean at a time */ static int ixgbe_tx_process_limit = 256; -TUNABLE_INT("hw.ix.tx_process_limit", &ixgbe_tx_process_limit); +TUNABLE_INT("hw.ixgbe.tx_process_limit", &ixgbe_tx_process_limit); SYSCTL_INT(_hw_ix, OID_AUTO, tx_process_limit, CTLFLAG_RDTUN, &ixgbe_tx_process_limit, 0, "Maximum number of sent packets to process at a time," "-1 means unlimited"); -/* Flow control setting, default to full */ -static int ixgbe_flow_control = ixgbe_fc_full; -SYSCTL_INT(_hw_ix, OID_AUTO, flow_control, CTLFLAG_RDTUN, - &ixgbe_flow_control, 0, "Default flow control used for all adapters"); - -/* Advertise Speed, default to 0 (auto) */ -static int ixgbe_advertise_speed = 0; -SYSCTL_INT(_hw_ix, OID_AUTO, advertise_speed, CTLFLAG_RDTUN, - &ixgbe_advertise_speed, 0, "Default advertised speed for all adapters"); - /* ** Smart speed setting, default to on ** this only works as a compile option @@ -319,7 +304,6 @@ static int ixgbe_smart_speed = ixgbe_smart_speed_on; * but this allows it to be forced off for testing. */ static int ixgbe_enable_msix = 1; -TUNABLE_INT("hw.ix.enable_msix", &ixgbe_enable_msix); SYSCTL_INT(_hw_ix, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &ixgbe_enable_msix, 0, "Enable MSI-X interrupts"); @@ -330,10 +314,8 @@ SYSCTL_INT(_hw_ix, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &ixgbe_enable_msix, 0, * can be overriden manually here. */ static int ixgbe_num_queues = 0; -TUNABLE_INT("hw.ix.num_queues", &ixgbe_num_queues); SYSCTL_INT(_hw_ix, OID_AUTO, num_queues, CTLFLAG_RDTUN, &ixgbe_num_queues, 0, - "Number of queues to configure up to a mximum of 8," - "0 indicates autoconfigure"); + "Number of queues to configure, 0 indicates autoconfigure"); /* ** Number of TX descriptors per ring, @@ -341,13 +323,11 @@ SYSCTL_INT(_hw_ix, OID_AUTO, num_queues, CTLFLAG_RDTUN, &ixgbe_num_queues, 0, ** the better performing choice. */ static int ixgbe_txd = PERFORM_TXD; -TUNABLE_INT("hw.ix.txd", &ixgbe_txd); SYSCTL_INT(_hw_ix, OID_AUTO, txd, CTLFLAG_RDTUN, &ixgbe_txd, 0, "Number of transmit descriptors per queue"); /* Number of RX descriptors per ring */ static int ixgbe_rxd = PERFORM_RXD; -TUNABLE_INT("hw.ix.rxd", &ixgbe_rxd); SYSCTL_INT(_hw_ix, OID_AUTO, rxd, CTLFLAG_RDTUN, &ixgbe_rxd, 0, "Number of receive descriptors per queue"); @@ -469,6 +449,7 @@ ixgbe_attach(device_t dev) adapter->init_locked = ixgbe_init_locked; adapter->stop_locked = ixgbe_stop; #endif + /* Core Lock Init*/ IXGBE_CORE_LOCK_INIT(adapter, device_get_nameunit(dev)); @@ -587,11 +568,6 @@ ixgbe_attach(device_t dev) break; } - /* hw.ix defaults init */ - ixgbe_set_advertise(adapter, ixgbe_advertise_speed); - ixgbe_set_flowcntl(adapter, ixgbe_flow_control); - adapter->enable_aim = ixgbe_enable_aim; - if ((adapter->msix > 1) && (ixgbe_enable_msix)) error = ixgbe_allocate_msix(adapter); else @@ -599,12 +575,6 @@ ixgbe_attach(device_t dev) if (error) goto err_late; - /* Enable the optics for 82599 SFP+ fiber */ - ixgbe_enable_tx_laser(hw); - - /* Enable power to the phy. */ - ixgbe_set_phy_power(hw, TRUE); - /* Setup OS specific network interface */ if (ixgbe_setup_interface(dev, adapter) != 0) goto err_late; @@ -624,7 +594,6 @@ ixgbe_attach(device_t dev) /* Set an initial default flow control & dmac value */ adapter->fc = ixgbe_fc_full; adapter->dmac = 0; - adapter->eee_enabled = 0; #ifdef PCI_IOV if ((hw->mac.type != ixgbe_mac_82598EB) && (adapter->msix > 1)) { @@ -978,7 +947,7 @@ ixgbe_ioctl(struct ifnet * ifp, u_long command, caddr_t data) VLAN_CAPABILITIES(ifp); break; } -#if __FreeBSD_version >= 1002500 +#if __FreeBSD_version >= 1100036 case SIOCGI2C: { struct ixgbe_hw *hw = &adapter->hw; @@ -1270,16 +1239,6 @@ ixgbe_init_locked(struct adapter *adapter) /* Set moderation on the Link interrupt */ IXGBE_WRITE_REG(hw, IXGBE_EITR(adapter->vector), IXGBE_LINK_ITR); - /* Configure Energy Efficient Ethernet for supported devices */ - if (hw->mac.ops.setup_eee) { - err = hw->mac.ops.setup_eee(hw, adapter->eee_enabled); - if (err) - device_printf(dev, "Error setting up EEE: %d\n", err); - } - - /* Enable power to the phy. */ - ixgbe_set_phy_power(hw, TRUE); - /* Config/Enable Link */ ixgbe_config_link(adapter); @@ -1590,7 +1549,7 @@ ixgbe_msix_que(void *arg) /* Do AIM now? */ - if (adapter->enable_aim == FALSE) + if (ixgbe_enable_aim == FALSE) goto no_calc; /* ** Do Adaptive Interrupt Moderation: @@ -1985,16 +1944,10 @@ ixgbe_media_change(struct ifnet * ifp) hw->mac.autotry_restart = TRUE; hw->mac.ops.setup_link(hw, speed, TRUE); - if (IFM_SUBTYPE(ifm->ifm_media) == IFM_AUTO) { - adapter->advertise = 0; - } else { - if ((speed & IXGBE_LINK_SPEED_10GB_FULL) != 0) - adapter->advertise |= 1 << 2; - if ((speed & IXGBE_LINK_SPEED_1GB_FULL) != 0) - adapter->advertise |= 1 << 1; - if ((speed & IXGBE_LINK_SPEED_100_FULL) != 0) - adapter->advertise |= 1 << 0; - } + adapter->advertise = + ((speed & IXGBE_LINK_SPEED_10GB_FULL) << 2) | + ((speed & IXGBE_LINK_SPEED_1GB_FULL) << 1) | + ((speed & IXGBE_LINK_SPEED_100_FULL) << 0); return (0); @@ -2830,7 +2783,7 @@ ixgbe_setup_interface(device_t dev, struct adapter *adapter) return (-1); } if_initname(ifp, device_get_name(dev), device_get_unit(dev)); - if_initbaudrate(ifp, IF_Gbps(10)); + ifp->if_baudrate = IF_Gbps(10); ifp->if_init = ixgbe_init; ifp->if_softc = adapter; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; @@ -2990,7 +2943,12 @@ ixgbe_config_link(struct adapter *adapter) sfp = ixgbe_is_sfp(hw); if (sfp) { - taskqueue_enqueue(adapter->tq, &adapter->mod_task); + if (hw->phy.multispeed_fiber) { + hw->mac.ops.setup_sfp(hw); + ixgbe_enable_tx_laser(hw); + taskqueue_enqueue(adapter->tq, &adapter->msf_task); + } else + taskqueue_enqueue(adapter->tq, &adapter->mod_task); } else { if (hw->mac.ops.check_link) err = ixgbe_check_link(hw, &adapter->link_speed, @@ -3796,66 +3754,23 @@ ixgbe_handle_mod(void *context, int pending) { struct adapter *adapter = context; struct ixgbe_hw *hw = &adapter->hw; - enum ixgbe_phy_type orig_type = hw->phy.type; device_t dev = adapter->dev; u32 err; - IXGBE_CORE_LOCK(adapter); - - /* Check to see if the PHY type changed */ - if (hw->phy.ops.identify) { - hw->phy.type = ixgbe_phy_unknown; - hw->phy.ops.identify(hw); - } - - if (hw->phy.type != orig_type) { - device_printf(dev, "Detected phy_type %d\n", hw->phy.type); - - if (hw->phy.type == ixgbe_phy_none) { - hw->phy.sfp_type = ixgbe_sfp_type_unknown; - goto out; - } - - /* Try to do the initialization that was skipped before */ - if (hw->phy.ops.init) - hw->phy.ops.init(hw); - if (hw->phy.ops.reset) - hw->phy.ops.reset(hw); - } - err = hw->phy.ops.identify_sfp(hw); if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) { device_printf(dev, "Unsupported SFP+ module type was detected.\n"); - goto out; + return; } err = hw->mac.ops.setup_sfp(hw); if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) { device_printf(dev, "Setup failure - unsupported SFP+ module type.\n"); - goto out; - } - if (hw->phy.multispeed_fiber) - taskqueue_enqueue(adapter->tq, &adapter->msf_task); -out: - /* Update media type */ - switch (hw->mac.ops.get_media_type(hw)) { - case ixgbe_media_type_fiber: - adapter->optics = IFM_10G_SR; - break; - case ixgbe_media_type_copper: - adapter->optics = IFM_10G_TWINAX; - break; - case ixgbe_media_type_cx4: - adapter->optics = IFM_10G_CX4; - break; - default: - adapter->optics = 0; - break; + return; } - - IXGBE_CORE_UNLOCK(adapter); + taskqueue_enqueue(adapter->tq, &adapter->msf_task); return; } @@ -3871,7 +3786,6 @@ ixgbe_handle_msf(void *context, int pending) u32 autoneg; bool negotiate; - IXGBE_CORE_LOCK(adapter); /* get_supported_phy_layer will call hw->phy.ops.identify_sfp() */ adapter->phy_layer = ixgbe_get_supported_physical_layer(hw); @@ -3884,7 +3798,6 @@ ixgbe_handle_msf(void *context, int pending) /* Adjust media types shown in ifconfig */ ifmedia_removeall(&adapter->media); ixgbe_add_media_types(adapter); - IXGBE_CORE_UNLOCK(adapter); return; } @@ -4000,9 +3913,6 @@ ixgbe_setup_low_power_mode(struct adapter *adapter) mtx_assert(&adapter->core_mtx, MA_OWNED); - if (!hw->wol_enabled) - ixgbe_set_phy_power(hw, FALSE); - /* Limit power management flow to X550EM baseT */ if (hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T && hw->phy.ops.enter_lplu) { @@ -4316,7 +4226,7 @@ ixgbe_add_device_sysctls(struct adapter *adapter) /* Sysctls for all devices */ SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "fc", CTLTYPE_INT | CTLFLAG_RW, adapter, 0, - ixgbe_sysctl_flowcntl, "I", IXGBE_SYSCTL_DESC_SET_FC); + ixgbe_set_flowcntl, "I", IXGBE_SYSCTL_DESC_SET_FC); SYSCTL_ADD_INT(ctx, child, OID_AUTO, "enable_aim", CTLFLAG_RW, @@ -4324,7 +4234,7 @@ ixgbe_add_device_sysctls(struct adapter *adapter) SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "advertise_speed", CTLTYPE_INT | CTLFLAG_RW, adapter, 0, - ixgbe_sysctl_advertise, "I", IXGBE_SYSCTL_DESC_ADV_SPEED); + ixgbe_set_advertise, "I", IXGBE_SYSCTL_DESC_ADV_SPEED); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "thermal_test", CTLTYPE_INT | CTLFLAG_RW, adapter, 0, @@ -4346,42 +4256,6 @@ ixgbe_add_device_sysctls(struct adapter *adapter) CTLTYPE_INT | CTLFLAG_RW, adapter, 0, ixgbe_sysctl_dmac, "I", "DMA Coalesce"); - /* for X552 backplane devices */ - if (hw->device_id == IXGBE_DEV_ID_X550EM_X_KR) { - struct sysctl_oid *eee_node; - struct sysctl_oid_list *eee_list; - - eee_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "eee", - CTLFLAG_RD, NULL, - "Energy Efficient Ethernet sysctls"); - eee_list = SYSCTL_CHILDREN(eee_node); - - SYSCTL_ADD_PROC(ctx, eee_list, OID_AUTO, "enable", - CTLTYPE_INT | CTLFLAG_RW, adapter, 0, - ixgbe_sysctl_eee_enable, "I", - "Enable or Disable EEE"); - - SYSCTL_ADD_PROC(ctx, eee_list, OID_AUTO, "negotiated", - CTLTYPE_INT | CTLFLAG_RD, adapter, 0, - ixgbe_sysctl_eee_negotiated, "I", - "EEE negotiated on link"); - - SYSCTL_ADD_PROC(ctx, eee_list, OID_AUTO, "tx_lpi_status", - CTLTYPE_INT | CTLFLAG_RD, adapter, 0, - ixgbe_sysctl_eee_tx_lpi_status, "I", - "Whether or not TX link is in LPI state"); - - SYSCTL_ADD_PROC(ctx, eee_list, OID_AUTO, "rx_lpi_status", - CTLTYPE_INT | CTLFLAG_RD, adapter, 0, - ixgbe_sysctl_eee_rx_lpi_status, "I", - "Whether or not RX link is in LPI state"); - - SYSCTL_ADD_PROC(ctx, eee_list, OID_AUTO, "tx_lpi_delay", - CTLTYPE_INT | CTLFLAG_RD, adapter, 0, - ixgbe_sysctl_eee_tx_lpi_delay, "I", - "TX LPI entry delay in microseconds"); - } - /* for WoL-capable devices */ if (hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T) { SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "wol_enable", @@ -4689,51 +4563,41 @@ ixgbe_set_sysctl_value(struct adapter *adapter, const char *name, ** 3 - full */ static int -ixgbe_sysctl_flowcntl(SYSCTL_HANDLER_ARGS) +ixgbe_set_flowcntl(SYSCTL_HANDLER_ARGS) { - int error, fc; - struct adapter *adapter; - - adapter = (struct adapter *) arg1; - fc = adapter->fc; + int error, last; + struct adapter *adapter = (struct adapter *) arg1; - error = sysctl_handle_int(oidp, &fc, 0, req); + last = adapter->fc; + error = sysctl_handle_int(oidp, &adapter->fc, 0, req); if ((error) || (req->newptr == NULL)) return (error); /* Don't bother if it's not changed */ - if (adapter->fc == fc) + if (adapter->fc == last) return (0); - return ixgbe_set_flowcntl(adapter, fc); -} - - -static int -ixgbe_set_flowcntl(struct adapter *adapter, int fc) -{ - - switch (fc) { - case ixgbe_fc_rx_pause: - case ixgbe_fc_tx_pause: - case ixgbe_fc_full: - adapter->hw.fc.requested_mode = adapter->fc; - if (adapter->num_queues > 1) - ixgbe_disable_rx_drop(adapter); - break; - case ixgbe_fc_none: - adapter->hw.fc.requested_mode = ixgbe_fc_none; - if (adapter->num_queues > 1) - ixgbe_enable_rx_drop(adapter); - break; - default: - return (EINVAL); + switch (adapter->fc) { + case ixgbe_fc_rx_pause: + case ixgbe_fc_tx_pause: + case ixgbe_fc_full: + adapter->hw.fc.requested_mode = adapter->fc; + if (adapter->num_queues > 1) + ixgbe_disable_rx_drop(adapter); + break; + case ixgbe_fc_none: + adapter->hw.fc.requested_mode = ixgbe_fc_none; + if (adapter->num_queues > 1) + ixgbe_enable_rx_drop(adapter); + break; + default: + adapter->fc = last; + return (EINVAL); } - adapter->fc = fc; /* Don't autoneg if forcing a value */ adapter->hw.fc.disable_fc_autoneg = TRUE; ixgbe_fc_enable(&adapter->hw); - return (0); + return error; } /* @@ -4744,39 +4608,31 @@ ixgbe_set_flowcntl(struct adapter *adapter, int fc) ** 0x4 - advertise 10G */ static int -ixgbe_sysctl_advertise(SYSCTL_HANDLER_ARGS) +ixgbe_set_advertise(SYSCTL_HANDLER_ARGS) { - int error, advertise; - struct adapter *adapter; + int error = 0, requested; + struct adapter *adapter; + device_t dev; + struct ixgbe_hw *hw; + ixgbe_link_speed speed = 0; adapter = (struct adapter *) arg1; - advertise = adapter->advertise; + dev = adapter->dev; + hw = &adapter->hw; - error = sysctl_handle_int(oidp, &advertise, 0, req); + requested = adapter->advertise; + error = sysctl_handle_int(oidp, &requested, 0, req); if ((error) || (req->newptr == NULL)) return (error); - /* Checks to validate new value */ - if (adapter->advertise == advertise) /* no change */ - return (0); - - return ixgbe_set_advertise(adapter, advertise); -} - -static int -ixgbe_set_advertise(struct adapter *adapter, int advertise) -{ - device_t dev; - struct ixgbe_hw *hw; - ixgbe_link_speed speed; - - hw = &adapter->hw; - dev = adapter->dev; - /* No speed changes for backplane media */ if (hw->phy.media_type == ixgbe_media_type_backplane) return (ENODEV); + /* Checks to validate new value */ + if (adapter->advertise == requested) /* no change */ + return (0); + if (!((hw->phy.media_type == ixgbe_media_type_copper) || (hw->phy.multispeed_fiber))) { device_printf(dev, @@ -4785,13 +4641,13 @@ ixgbe_set_advertise(struct adapter *adapter, int advertise) return (EINVAL); } - if (advertise < 0x1 || advertise > 0x7) { + if (requested < 0x1 || requested > 0x7) { device_printf(dev, "Invalid advertised speed; valid modes are 0x1 through 0x7\n"); return (EINVAL); } - if ((advertise & 0x1) + if ((requested & 0x1) && (hw->mac.type != ixgbe_mac_X540) && (hw->mac.type != ixgbe_mac_X550)) { device_printf(dev, "Set Advertise: 100Mb on X540/X550 only\n"); @@ -4799,19 +4655,18 @@ ixgbe_set_advertise(struct adapter *adapter, int advertise) } /* Set new value and report new advertised mode */ - speed = 0; - if (advertise & 0x1) + if (requested & 0x1) speed |= IXGBE_LINK_SPEED_100_FULL; - if (advertise & 0x2) + if (requested & 0x2) speed |= IXGBE_LINK_SPEED_1GB_FULL; - if (advertise & 0x4) + if (requested & 0x4) speed |= IXGBE_LINK_SPEED_10GB_FULL; - adapter->advertise = advertise; hw->mac.autotry_restart = TRUE; hw->mac.ops.setup_link(hw, speed, TRUE); + adapter->advertise = requested; - return (0); + return (error); } /* @@ -5022,104 +4877,6 @@ ixgbe_sysctl_wol_enable(SYSCTL_HANDLER_ARGS) } /* - * Sysctl to enable/disable the Energy Efficient Ethernet capability, - * if supported by the adapter. - * Values: - * 0 - disabled - * 1 - enabled - */ -static int -ixgbe_sysctl_eee_enable(SYSCTL_HANDLER_ARGS) -{ - struct adapter *adapter = (struct adapter *) arg1; - struct ixgbe_hw *hw = &adapter->hw; - struct ifnet *ifp = adapter->ifp; - int new_eee_enabled, error = 0; - - new_eee_enabled = adapter->eee_enabled; - error = sysctl_handle_int(oidp, &new_eee_enabled, 0, req); - if ((error) || (req->newptr == NULL)) - return (error); - new_eee_enabled = !!(new_eee_enabled); - if (new_eee_enabled == adapter->eee_enabled) - return (0); - - if (new_eee_enabled > 0 && !hw->mac.ops.setup_eee) - return (ENODEV); - else - adapter->eee_enabled = new_eee_enabled; - - /* Re-initialize hardware if it's already running */ - if (ifp->if_drv_flags & IFF_DRV_RUNNING) - ixgbe_init(adapter); - - return (0); -} - -/* - * Read-only sysctl indicating whether EEE support was negotiated - * on the link. - */ -static int -ixgbe_sysctl_eee_negotiated(SYSCTL_HANDLER_ARGS) -{ - struct adapter *adapter = (struct adapter *) arg1; - struct ixgbe_hw *hw = &adapter->hw; - bool status; - - status = !!(IXGBE_READ_REG(hw, IXGBE_EEE_STAT) & IXGBE_EEE_STAT_NEG); - - return (sysctl_handle_int(oidp, 0, status, req)); -} - -/* - * Read-only sysctl indicating whether RX Link is in LPI state. - */ -static int -ixgbe_sysctl_eee_rx_lpi_status(SYSCTL_HANDLER_ARGS) -{ - struct adapter *adapter = (struct adapter *) arg1; - struct ixgbe_hw *hw = &adapter->hw; - bool status; - - status = !!(IXGBE_READ_REG(hw, IXGBE_EEE_STAT) & - IXGBE_EEE_RX_LPI_STATUS); - - return (sysctl_handle_int(oidp, 0, status, req)); -} - -/* - * Read-only sysctl indicating whether TX Link is in LPI state. - */ -static int -ixgbe_sysctl_eee_tx_lpi_status(SYSCTL_HANDLER_ARGS) -{ - struct adapter *adapter = (struct adapter *) arg1; - struct ixgbe_hw *hw = &adapter->hw; - bool status; - - status = !!(IXGBE_READ_REG(hw, IXGBE_EEE_STAT) & - IXGBE_EEE_TX_LPI_STATUS); - - return (sysctl_handle_int(oidp, 0, status, req)); -} - -/* - * Read-only sysctl indicating TX Link LPI delay - */ -static int -ixgbe_sysctl_eee_tx_lpi_delay(SYSCTL_HANDLER_ARGS) -{ - struct adapter *adapter = (struct adapter *) arg1; - struct ixgbe_hw *hw = &adapter->hw; - u32 reg; - - reg = IXGBE_READ_REG(hw, IXGBE_EEE_SU); - - return (sysctl_handle_int(oidp, 0, reg >> 26, req)); -} - -/* * Sysctl to enable/disable the types of packets that the * adapter will wake up on upon receipt. * WUFC - Wake Up Filter Control diff --git a/sys/dev/ixgbe/ix_txrx.c b/sys/dev/ixgbe/ix_txrx.c index 9e90e0d..8c311c1 100644 --- a/sys/dev/ixgbe/ix_txrx.c +++ b/sys/dev/ixgbe/ix_txrx.c @@ -36,6 +36,9 @@ #ifndef IXGBE_STANDALONE_BUILD #include "opt_inet.h" #include "opt_inet6.h" +#ifdef RSS +#include "opt_rss.h" +#endif #endif #include "ixgbe.h" @@ -748,6 +751,7 @@ ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp, if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0) offload = FALSE; + /* Indicate the whole packet as payload when not doing TSO */ *olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT; @@ -796,7 +800,6 @@ ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp, l3d = mtod(mp, caddr_t) + ehdrlen; switch (etype) { -#ifdef INET case ETHERTYPE_IP: ip = (struct ip *)(l3d); ip_hlen = ip->ip_hl << 2; @@ -808,15 +811,12 @@ ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp, *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8; } break; -#endif -#ifdef INET6 case ETHERTYPE_IPV6: ip6 = (struct ip6_hdr *)(l3d); ip_hlen = sizeof(struct ip6_hdr); ipproto = ip6->ip6_nxt; type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6; break; -#endif default: offload = FALSE; break; @@ -1923,12 +1923,49 @@ ixgbe_rxeof(struct ix_queue *que) if (adapter->num_queues > 1) { sendmp->m_pkthdr.flowid = le32toh(cur->wb.lower.hi_dword.rss); - /* - * Full RSS support is not avilable in - * FreeBSD 10 so setting the hash type to - * OPAQUE. - */ - M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE); + switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) { + case IXGBE_RXDADV_RSSTYPE_IPV4: + M_HASHTYPE_SET(sendmp, + M_HASHTYPE_RSS_IPV4); + break; + case IXGBE_RXDADV_RSSTYPE_IPV4_TCP: + M_HASHTYPE_SET(sendmp, + M_HASHTYPE_RSS_TCP_IPV4); + break; + case IXGBE_RXDADV_RSSTYPE_IPV6: + M_HASHTYPE_SET(sendmp, + M_HASHTYPE_RSS_IPV6); + break; + case IXGBE_RXDADV_RSSTYPE_IPV6_TCP: + M_HASHTYPE_SET(sendmp, + M_HASHTYPE_RSS_TCP_IPV6); + break; + case IXGBE_RXDADV_RSSTYPE_IPV6_EX: + M_HASHTYPE_SET(sendmp, + M_HASHTYPE_RSS_IPV6_EX); + break; + case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX: + M_HASHTYPE_SET(sendmp, + M_HASHTYPE_RSS_TCP_IPV6_EX); + break; +#if __FreeBSD_version > 1100000 + case IXGBE_RXDADV_RSSTYPE_IPV4_UDP: + M_HASHTYPE_SET(sendmp, + M_HASHTYPE_RSS_UDP_IPV4); + break; + case IXGBE_RXDADV_RSSTYPE_IPV6_UDP: + M_HASHTYPE_SET(sendmp, + M_HASHTYPE_RSS_UDP_IPV6); + break; + case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX: + M_HASHTYPE_SET(sendmp, + M_HASHTYPE_RSS_UDP_IPV6_EX); + break; +#endif + default: + M_HASHTYPE_SET(sendmp, + M_HASHTYPE_OPAQUE); + } } else { sendmp->m_pkthdr.flowid = que->msix; M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE); diff --git a/sys/dev/ixgbe/ixgbe.h b/sys/dev/ixgbe/ixgbe.h index 558b532..696eb21 100644 --- a/sys/dev/ixgbe/ixgbe.h +++ b/sys/dev/ixgbe/ixgbe.h @@ -497,7 +497,6 @@ struct adapter { u32 optics; u32 fc; /* local flow ctrl setting */ int advertise; /* link speeds */ - bool enable_aim; /* adaptive interrupt moderation */ bool link_active; u16 max_frame_size; u16 num_segs; @@ -505,7 +504,6 @@ struct adapter { bool link_up; u32 vector; u16 dmac; - bool eee_enabled; u32 phy_layer; /* Power management-related */ @@ -563,8 +561,8 @@ struct adapter { struct ixgbe_vf *vfs; #endif #ifdef DEV_NETMAP - void (*init_locked)(struct adapter *); - void (*stop_locked)(void *); + void (*init_locked)(struct adapter *); + void (*stop_locked)(void *); #endif /* Misc stats maintained by the driver */ diff --git a/sys/dev/ixgbe/ixgbe_common.c b/sys/dev/ixgbe/ixgbe_common.c index feb74f6..13a5825 100644 --- a/sys/dev/ixgbe/ixgbe_common.c +++ b/sys/dev/ixgbe/ixgbe_common.c @@ -2250,7 +2250,7 @@ s32 ixgbe_update_eeprom_checksum_generic(struct ixgbe_hw *hw) * ixgbe_validate_mac_addr - Validate MAC address * @mac_addr: pointer to MAC address. * - * Tests a MAC address to ensure it is a valid Individual Address + * Tests a MAC address to ensure it is a valid Individual Address. **/ s32 ixgbe_validate_mac_addr(u8 *mac_addr) { @@ -2260,16 +2260,13 @@ s32 ixgbe_validate_mac_addr(u8 *mac_addr) /* Make sure it is not a multicast address */ if (IXGBE_IS_MULTICAST(mac_addr)) { - DEBUGOUT("MAC address is multicast\n"); status = IXGBE_ERR_INVALID_MAC_ADDR; /* Not a broadcast address */ } else if (IXGBE_IS_BROADCAST(mac_addr)) { - DEBUGOUT("MAC address is broadcast\n"); status = IXGBE_ERR_INVALID_MAC_ADDR; /* Reject the zero address */ } else if (mac_addr[0] == 0 && mac_addr[1] == 0 && mac_addr[2] == 0 && mac_addr[3] == 0 && mac_addr[4] == 0 && mac_addr[5] == 0) { - DEBUGOUT("MAC address is all zeros\n"); status = IXGBE_ERR_INVALID_MAC_ADDR; } return status; diff --git a/sys/dev/ixgbe/ixgbe_dcb.c b/sys/dev/ixgbe/ixgbe_dcb.c index 437d336..2fd71e5 100644 --- a/sys/dev/ixgbe/ixgbe_dcb.c +++ b/sys/dev/ixgbe/ixgbe_dcb.c @@ -375,8 +375,6 @@ s32 ixgbe_dcb_check_config_cee(struct ixgbe_dcb_config *dcb_config) } err_config: - DEBUGOUT2("DCB error code %d while checking %s settings.\n", - ret_val, (i == IXGBE_DCB_TX_CONFIG) ? "Tx" : "Rx"); return ret_val; } diff --git a/sys/dev/ixgbe/ixgbe_osdep.h b/sys/dev/ixgbe/ixgbe_osdep.h index 79d1166..388e26c 100644 --- a/sys/dev/ixgbe/ixgbe_osdep.h +++ b/sys/dev/ixgbe/ixgbe_osdep.h @@ -32,6 +32,7 @@ ******************************************************************************/ /*$FreeBSD$*/ + #ifndef _IXGBE_OS_H_ #define _IXGBE_OS_H_ @@ -57,15 +58,6 @@ #define ASSERT(x) if(!(x)) panic("IXGBE: x") #define EWARN(H, W, S) printf(W) -enum { - IXGBE_ERROR_SOFTWARE, - IXGBE_ERROR_POLLING, - IXGBE_ERROR_INVALID_STATE, - IXGBE_ERROR_UNSUPPORTED, - IXGBE_ERROR_ARGUMENT, - IXGBE_ERROR_CAUTION, -}; - /* The happy-fun DELAY macro is defined in /usr/src/sys/i386/include/clock.h */ #define usec_delay(x) DELAY(x) #define msec_delay(x) DELAY(1000*(x)) @@ -82,23 +74,9 @@ enum { #define DEBUGOUT5(S,A,B,C,D,E) printf(S "\n",A,B,C,D,E) #define DEBUGOUT6(S,A,B,C,D,E,F) printf(S "\n",A,B,C,D,E,F) #define DEBUGOUT7(S,A,B,C,D,E,F,G) printf(S "\n",A,B,C,D,E,F,G) - #define ERROR_REPORT1 ERROR_REPORT - #define ERROR_REPORT2 ERROR_REPORT - #define ERROR_REPORT3 ERROR_REPORT - #define ERROR_REPORT(level, format, arg...) do { \ - switch (level) { \ - case IXGBE_ERROR_SOFTWARE: \ - case IXGBE_ERROR_CAUTION: \ - case IXGBE_ERROR_POLLING: \ - case IXGBE_ERROR_INVALID_STATE: \ - case IXGBE_ERROR_UNSUPPORTED: \ - case IXGBE_ERROR_ARGUMENT: \ - device_printf(ixgbe_dev_from_hw(hw), format, ## arg); \ - break; \ - default: \ - break; \ - } \ - } while (0) + #define ERROR_REPORT1(S,A) printf(S "\n",A) + #define ERROR_REPORT2(S,A,B) printf(S "\n",A,B) + #define ERROR_REPORT3(S,A,B,C) printf(S "\n",A,B,C) #else #define DEBUGOUT(S) #define DEBUGOUT1(S,A) diff --git a/sys/dev/ixgbe/ixgbe_phy.c b/sys/dev/ixgbe/ixgbe_phy.c index f5d22b2..8b0d165 100644 --- a/sys/dev/ixgbe/ixgbe_phy.c +++ b/sys/dev/ixgbe/ixgbe_phy.c @@ -495,7 +495,7 @@ s32 ixgbe_get_phy_id(struct ixgbe_hw *hw) /** * ixgbe_get_phy_type_from_id - Get the phy type - * @hw: pointer to hardware structure + * @phy_id: PHY ID information * **/ enum ixgbe_phy_type ixgbe_get_phy_type_from_id(u32 phy_id) @@ -527,8 +527,6 @@ enum ixgbe_phy_type ixgbe_get_phy_type_from_id(u32 phy_id) phy_type = ixgbe_phy_unknown; break; } - - DEBUGOUT1("phy type found is %d\n", phy_type); return phy_type; } diff --git a/sys/dev/ixgbe/ixgbe_type.h b/sys/dev/ixgbe/ixgbe_type.h index da03f79..86e60b3 100644 --- a/sys/dev/ixgbe/ixgbe_type.h +++ b/sys/dev/ixgbe/ixgbe_type.h @@ -1481,7 +1481,7 @@ struct ixgbe_dmac_config { #define IXGBE_MDIO_GLOBAL_ALARM_1 0xCC00 /* Global alarm 1 */ #define IXGBE_MDIO_GLOBAL_ALM_1_DEV_FAULT 0x0010 /* device fault */ #define IXGBE_MDIO_GLOBAL_ALM_1_HI_TMP_FAIL 0x4000 /* high temp failure */ -#define IXGBE_MDIO_GLOBAL_FAULT_MSG 0xC850 /* Global Fault Message */ +#define IXGBE_MDIO_GLOBAL_FAULT_MSG 0xC850 /* Global Fault Message */ #define IXGBE_MDIO_GLOBAL_FAULT_MSG_HI_TMP 0x8007 /* high temp failure */ #define IXGBE_MDIO_GLOBAL_INT_MASK 0xD400 /* Global int mask */ #define IXGBE_MDIO_GLOBAL_AN_VEN_ALM_INT_EN 0x1000 /* autoneg vendor alarm int enable */ @@ -1489,6 +1489,7 @@ struct ixgbe_dmac_config { #define IXGBE_MDIO_GLOBAL_VEN_ALM_INT_EN 0x1 /* vendor alarm int enable */ #define IXGBE_MDIO_GLOBAL_STD_ALM2_INT 0x200 /* vendor alarm2 int mask */ #define IXGBE_MDIO_GLOBAL_INT_HI_TEMP_EN 0x4000 /* int high temp enable */ +#define IXGBE_MDIO_GLOBAL_INT_DEV_FAULT_EN 0x0010 /* int dev fault enable */ #define IXGBE_MDIO_PMA_PMD_CONTROL_ADDR 0x0000 /* PMA/PMD Control Reg */ #define IXGBE_MDIO_PMA_PMD_SDA_SCL_ADDR 0xC30A /* PHY_XS SDA/SCL Addr Reg */ #define IXGBE_MDIO_PMA_PMD_SDA_SCL_DATA 0xC30B /* PHY_XS SDA/SCL Data Reg */ @@ -2917,6 +2918,15 @@ enum ixgbe_fdir_pballoc_type { #define FW_DISABLE_RXEN_CMD 0xDE #define FW_DISABLE_RXEN_LEN 0x1 #define FW_PHY_MGMT_REQ_CMD 0x20 +#define FW_PHY_TOKEN_REQ_CMD 0xA +#define FW_PHY_TOKEN_REQ_LEN 2 +#define FW_PHY_TOKEN_REQ 0 +#define FW_PHY_TOKEN_REL 1 +#define FW_PHY_TOKEN_OK 1 +#define FW_PHY_TOKEN_RETRY 0x80 +#define FW_PHY_TOKEN_DELAY 5 /* milliseconds */ +#define FW_PHY_TOKEN_WAIT 5 /* seconds */ +#define FW_PHY_TOKEN_RETRIES ((FW_PHY_TOKEN_WAIT * 1000) / FW_PHY_TOKEN_DELAY) #define FW_INT_PHY_REQ_CMD 0xB #define FW_INT_PHY_REQ_LEN 10 #define FW_INT_PHY_REQ_READ 0 @@ -2990,6 +3000,13 @@ struct ixgbe_hic_disable_rxen { u16 pad3; }; +struct ixgbe_hic_phy_token_req { + struct ixgbe_hic_hdr hdr; + u8 port_number; + u8 command_type; + u16 pad; +}; + struct ixgbe_hic_internal_phy_req { struct ixgbe_hic_hdr hdr; u8 port_number; @@ -3130,6 +3147,7 @@ struct ixgbe_adv_tx_context_desc { #define IXGBE_ADVTXD_TUCMD_L4T_UDP 0x00000000 /* L4 Packet TYPE of UDP */ #define IXGBE_ADVTXD_TUCMD_L4T_TCP 0x00000800 /* L4 Packet TYPE of TCP */ #define IXGBE_ADVTXD_TUCMD_L4T_SCTP 0x00001000 /* L4 Packet TYPE of SCTP */ +#define IXGBE_ADVTXD_TUCMD_L4T_RSV 0x00001800 /* RSV L4 Packet TYPE */ #define IXGBE_ADVTXD_TUCMD_MKRREQ 0x00002000 /* req Markers and CRC */ #define IXGBE_ADVTXD_POPTS_IPSEC 0x00000400 /* IPSec offload request */ #define IXGBE_ADVTXD_TUCMD_IPSEC_TYPE_ESP 0x00002000 /* IPSec Type ESP */ @@ -3943,13 +3961,15 @@ struct ixgbe_hw { #define IXGBE_ERR_FEATURE_NOT_SUPPORTED -36 #define IXGBE_ERR_EEPROM_PROTECTED_REGION -37 #define IXGBE_ERR_FDIR_CMD_INCOMPLETE -38 +#define IXGBE_ERR_FW_RESP_INVALID -39 +#define IXGBE_ERR_TOKEN_RETRY -40 #define IXGBE_NOT_IMPLEMENTED 0x7FFFFFFF #define IXGBE_FUSES0_GROUP(_i) (0x11158 + ((_i) * 4)) #define IXGBE_FUSES0_300MHZ (1 << 5) -#define IXGBE_FUSES0_REV1 (1 << 6) +#define IXGBE_FUSES0_REV_MASK (3 << 6) #define IXGBE_KRM_PORT_CAR_GEN_CTRL(P) ((P) ? 0x8010 : 0x4010) #define IXGBE_KRM_LINK_CTRL_1(P) ((P) ? 0x820C : 0x420C) diff --git a/sys/dev/ixgbe/ixgbe_vf.c b/sys/dev/ixgbe/ixgbe_vf.c index 2ce4d32..ee3d325 100644 --- a/sys/dev/ixgbe/ixgbe_vf.c +++ b/sys/dev/ixgbe/ixgbe_vf.c @@ -229,7 +229,9 @@ s32 ixgbe_reset_hw_vf(struct ixgbe_hw *hw) msgbuf[0] != (IXGBE_VF_RESET | IXGBE_VT_MSGTYPE_NACK)) return IXGBE_ERR_INVALID_MAC_ADDR; - memcpy(hw->mac.perm_addr, addr, IXGBE_ETH_LENGTH_OF_ADDRESS); + if (msgbuf[0] == (IXGBE_VF_RESET | IXGBE_VT_MSGTYPE_ACK)) + memcpy(hw->mac.perm_addr, addr, IXGBE_ETH_LENGTH_OF_ADDRESS); + hw->mac.mc_filter_type = msgbuf[IXGBE_VF_MC_TYPE_WORD]; return ret_val; diff --git a/sys/dev/ixgbe/ixgbe_x550.c b/sys/dev/ixgbe/ixgbe_x550.c index 1199d38..ce2b0dc 100644 --- a/sys/dev/ixgbe/ixgbe_x550.c +++ b/sys/dev/ixgbe/ixgbe_x550.c @@ -81,9 +81,13 @@ s32 ixgbe_init_ops_X550(struct ixgbe_hw *hw) mac->ops.mdd_event = ixgbe_mdd_event_X550; mac->ops.restore_mdd_vf = ixgbe_restore_mdd_vf_X550; mac->ops.disable_rx = ixgbe_disable_rx_x550; - if (hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T) { + switch (hw->device_id) { + case IXGBE_DEV_ID_X550EM_X_10G_T: hw->mac.ops.led_on = ixgbe_led_on_t_X550em; hw->mac.ops.led_off = ixgbe_led_off_t_X550em; + break; + default: + break; } return ret_val; } @@ -336,7 +340,6 @@ static s32 ixgbe_identify_phy_x550em(struct ixgbe_hw *hw) hw->phy.phy_semaphore_mask = IXGBE_GSSR_SHARED_I2C_SM; ixgbe_setup_mux_ctl(hw); ixgbe_check_cs4227(hw); - return ixgbe_identify_module_generic(hw); break; case IXGBE_DEV_ID_X550EM_X_KX4: @@ -414,6 +417,8 @@ s32 ixgbe_init_ops_X550EM(struct ixgbe_hw *hw) if (hw->mac.type == ixgbe_mac_X550EM_x) { mac->ops.read_iosf_sb_reg = ixgbe_read_iosf_sb_reg_x550; mac->ops.write_iosf_sb_reg = ixgbe_write_iosf_sb_reg_x550; + mac->ops.acquire_swfw_sync = ixgbe_acquire_swfw_sync_X550em; + mac->ops.release_swfw_sync = ixgbe_release_swfw_sync_X550em; } mac->ops.get_media_type = ixgbe_get_media_type_X550em; @@ -428,8 +433,6 @@ s32 ixgbe_init_ops_X550EM(struct ixgbe_hw *hw) else mac->ops.setup_fc = ixgbe_setup_fc_X550em; - mac->ops.acquire_swfw_sync = ixgbe_acquire_swfw_sync_X550em; - mac->ops.release_swfw_sync = ixgbe_release_swfw_sync_X550em; if (hw->device_id != IXGBE_DEV_ID_X550EM_X_KR) mac->ops.setup_eee = NULL; @@ -632,7 +635,6 @@ s32 ixgbe_setup_eee_X550(struct ixgbe_hw *hw, bool enable_eee) u16 autoneg_eee_reg; u32 link_reg; s32 status; - u32 fuse; DEBUGFUNC("ixgbe_setup_eee_X550"); @@ -653,9 +655,10 @@ s32 ixgbe_setup_eee_X550(struct ixgbe_hw *hw, bool enable_eee) hw->phy.ops.write_reg(hw, IXGBE_MDIO_AUTO_NEG_EEE_ADVT, IXGBE_MDIO_AUTO_NEG_DEV_TYPE, autoneg_eee_reg); } else if (hw->device_id == IXGBE_DEV_ID_X550EM_X_KR) { - /* Not supported on first revision. */ - fuse = IXGBE_READ_REG(hw, IXGBE_FUSES0_GROUP(0)); - if (!(fuse & IXGBE_FUSES0_REV1)) + /* Not supported on first revision of X550EM_x. */ + if ((hw->mac.type == ixgbe_mac_X550EM_x) && + !(IXGBE_FUSES0_REV_MASK & + IXGBE_READ_REG(hw, IXGBE_FUSES0_GROUP(0)))) return IXGBE_SUCCESS; status = ixgbe_read_iosf_sb_reg_x550(hw, @@ -965,7 +968,7 @@ void ixgbe_restore_mdd_vf_X550(struct ixgbe_hw *hw, u32 vf) num_qs = 4; /* 32 VFs / pools */ bitmask = 0x0000000F; break; - default: /* 64 VFs / pools */ + default: /* 64 VFs / pools */ num_qs = 2; bitmask = 0x00000003; break; @@ -1349,7 +1352,7 @@ static s32 ixgbe_enable_lasi_ext_t_x550em(struct ixgbe_hw *hw) if (status != IXGBE_SUCCESS) return status; - /* Enables high temperature failure alarm */ + /* Enable high temperature failure and global fault alarms */ status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_GLOBAL_INT_MASK, IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, ®); @@ -1357,7 +1360,8 @@ static s32 ixgbe_enable_lasi_ext_t_x550em(struct ixgbe_hw *hw) if (status != IXGBE_SUCCESS) return status; - reg |= IXGBE_MDIO_GLOBAL_INT_HI_TEMP_EN; + reg |= (IXGBE_MDIO_GLOBAL_INT_HI_TEMP_EN | + IXGBE_MDIO_GLOBAL_INT_DEV_FAULT_EN); status = hw->phy.ops.write_reg(hw, IXGBE_MDIO_GLOBAL_INT_MASK, IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, @@ -1452,7 +1456,6 @@ static s32 ixgbe_setup_kr_speed_x550em(struct ixgbe_hw *hw, s32 ixgbe_init_phy_ops_X550em(struct ixgbe_hw *hw) { struct ixgbe_phy_info *phy = &hw->phy; - ixgbe_link_speed speed; s32 ret_val; DEBUGFUNC("ixgbe_init_phy_ops_X550em"); @@ -1467,10 +1470,6 @@ s32 ixgbe_init_phy_ops_X550em(struct ixgbe_hw *hw) * to determine internal PHY mode. */ phy->nw_mng_if_sel = IXGBE_READ_REG(hw, IXGBE_NW_MNG_IF_SEL); - if (phy->nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE) { - speed = IXGBE_LINK_SPEED_10GB_FULL | - IXGBE_LINK_SPEED_1GB_FULL; - } phy->ops.identify_sfp = ixgbe_identify_sfp_module_X550em; } @@ -1505,18 +1504,13 @@ s32 ixgbe_init_phy_ops_X550em(struct ixgbe_hw *hw) /* If internal link mode is XFI, then setup iXFI internal link, * else setup KR now. */ - if (!(phy->nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE)) { - phy->ops.setup_internal_link = + phy->ops.setup_internal_link = ixgbe_setup_internal_phy_t_x550em; - } else { - speed = IXGBE_LINK_SPEED_10GB_FULL | - IXGBE_LINK_SPEED_1GB_FULL; - ret_val = ixgbe_setup_kr_speed_x550em(hw, speed); - } - /* setup SW LPLU only for first revision */ - if (!(IXGBE_FUSES0_REV1 & IXGBE_READ_REG(hw, - IXGBE_FUSES0_GROUP(0)))) + /* setup SW LPLU only for first revision of X550EM_x */ + if ((hw->mac.type == ixgbe_mac_X550EM_x) && + !(IXGBE_FUSES0_REV_MASK & + IXGBE_READ_REG(hw, IXGBE_FUSES0_GROUP(0)))) phy->ops.enter_lplu = ixgbe_enter_lplu_t_x550em; phy->ops.handle_lasi = ixgbe_handle_lasi_ext_t_x550em; @@ -1555,11 +1549,15 @@ s32 ixgbe_reset_hw_X550em(struct ixgbe_hw *hw) /* flush pending Tx transactions */ ixgbe_clear_tx_pending(hw); - if (hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T) { + switch (hw->device_id) { + case IXGBE_DEV_ID_X550EM_X_10G_T: /* Config MDIO clock speed before the first MDIO PHY access */ hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0); hlreg0 &= ~IXGBE_HLREG0_MDCSPD; IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0); + break; + default: + break; } /* PHY ops must be identified and initialized prior to reset */ @@ -1639,6 +1637,13 @@ mac_reset_top: hw->mac.num_rar_entries = 128; hw->mac.ops.init_rx_addrs(hw); + if (hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T) { + /* Reconfig MDIO clock speed after PHY reset */ + hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0); + hlreg0 &= ~IXGBE_HLREG0_MDCSPD; + IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0); + } + if (hw->device_id == IXGBE_DEV_ID_X550EM_X_SFP) ixgbe_setup_mux_ctl(hw); @@ -1778,47 +1783,16 @@ s32 ixgbe_setup_mac_link_sfp_x550em(struct ixgbe_hw *hw, } /** - * ixgbe_setup_ixfi_x550em - Configure the KR PHY for iXFI mode. + * ixgbe_setup_ixfi_x550em_x - MAC specific iXFI configuration * @hw: pointer to hardware structure - * @speed: the link speed to force * - * Configures the integrated KR PHY to use iXFI mode. Used to connect an - * internal and external PHY at a specific speed, without autonegotiation. + * iXfI configuration needed for ixgbe_mac_X550EM_x devices. **/ -static s32 ixgbe_setup_ixfi_x550em(struct ixgbe_hw *hw, ixgbe_link_speed *speed) +static s32 ixgbe_setup_ixfi_x550em_x(struct ixgbe_hw *hw) { s32 status; u32 reg_val; - /* Disable AN and force speed to 10G Serial. */ - status = ixgbe_read_iosf_sb_reg_x550(hw, - IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id), - IXGBE_SB_IOSF_TARGET_KR_PHY, ®_val); - if (status != IXGBE_SUCCESS) - return status; - - reg_val &= ~IXGBE_KRM_LINK_CTRL_1_TETH_AN_ENABLE; - reg_val &= ~IXGBE_KRM_LINK_CTRL_1_TETH_FORCE_SPEED_MASK; - - /* Select forced link speed for internal PHY. */ - switch (*speed) { - case IXGBE_LINK_SPEED_10GB_FULL: - reg_val |= IXGBE_KRM_LINK_CTRL_1_TETH_FORCE_SPEED_10G; - break; - case IXGBE_LINK_SPEED_1GB_FULL: - reg_val |= IXGBE_KRM_LINK_CTRL_1_TETH_FORCE_SPEED_1G; - break; - default: - /* Other link speeds are not supported by internal KR PHY. */ - return IXGBE_ERR_LINK_SETUP; - } - - status = ixgbe_write_iosf_sb_reg_x550(hw, - IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id), - IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val); - if (status != IXGBE_SUCCESS) - return status; - /* Disable training protocol FSM. */ status = ixgbe_read_iosf_sb_reg_x550(hw, IXGBE_KRM_RX_TRN_LINKUP_CTRL(hw->bus.lan_id), @@ -1873,9 +1847,58 @@ static s32 ixgbe_setup_ixfi_x550em(struct ixgbe_hw *hw, ixgbe_link_speed *speed) status = ixgbe_write_iosf_sb_reg_x550(hw, IXGBE_KRM_TX_COEFF_CTRL_1(hw->bus.lan_id), IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val); + return status; +} + +/** + * ixgbe_setup_ixfi_x550em - Configure the KR PHY for iXFI mode. + * @hw: pointer to hardware structure + * @speed: the link speed to force + * + * Configures the integrated KR PHY to use iXFI mode. Used to connect an + * internal and external PHY at a specific speed, without autonegotiation. + **/ +static s32 ixgbe_setup_ixfi_x550em(struct ixgbe_hw *hw, ixgbe_link_speed *speed) +{ + s32 status; + u32 reg_val; + + /* Disable AN and force speed to 10G Serial. */ + status = ixgbe_read_iosf_sb_reg_x550(hw, + IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, ®_val); if (status != IXGBE_SUCCESS) return status; + reg_val &= ~IXGBE_KRM_LINK_CTRL_1_TETH_AN_ENABLE; + reg_val &= ~IXGBE_KRM_LINK_CTRL_1_TETH_FORCE_SPEED_MASK; + + /* Select forced link speed for internal PHY. */ + switch (*speed) { + case IXGBE_LINK_SPEED_10GB_FULL: + reg_val |= IXGBE_KRM_LINK_CTRL_1_TETH_FORCE_SPEED_10G; + break; + case IXGBE_LINK_SPEED_1GB_FULL: + reg_val |= IXGBE_KRM_LINK_CTRL_1_TETH_FORCE_SPEED_1G; + break; + default: + /* Other link speeds are not supported by internal KR PHY. */ + return IXGBE_ERR_LINK_SETUP; + } + + status = ixgbe_write_iosf_sb_reg_x550(hw, + IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val); + if (status != IXGBE_SUCCESS) + return status; + + /* Additional configuration needed for x550em_x */ + if (hw->mac.type == ixgbe_mac_X550EM_x) { + status = ixgbe_setup_ixfi_x550em_x(hw); + if (status != IXGBE_SUCCESS) + return status; + } + /* Toggle port SW reset by AN reset. */ status = ixgbe_read_iosf_sb_reg_x550(hw, IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id), @@ -1944,43 +1967,50 @@ s32 ixgbe_setup_internal_phy_t_x550em(struct ixgbe_hw *hw) if (hw->mac.ops.get_media_type(hw) != ixgbe_media_type_copper) return IXGBE_ERR_CONFIG; - /* If link is not up, then there is no setup necessary so return */ - status = ixgbe_ext_phy_t_x550em_get_link(hw, &link_up); - if (status != IXGBE_SUCCESS) - return status; + if (!(hw->phy.nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE)) { + /* If link is down, there is no setup necessary so return */ + status = ixgbe_ext_phy_t_x550em_get_link(hw, &link_up); + if (status != IXGBE_SUCCESS) + return status; - if (!link_up) - return IXGBE_SUCCESS; + if (!link_up) + return IXGBE_SUCCESS; - status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_VENDOR_STAT, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, - &speed); - if (status != IXGBE_SUCCESS) - return status; + status = hw->phy.ops.read_reg(hw, + IXGBE_MDIO_AUTO_NEG_VENDOR_STAT, + IXGBE_MDIO_AUTO_NEG_DEV_TYPE, + &speed); + if (status != IXGBE_SUCCESS) + return status; - /* If link is not still up, then no setup is necessary so return */ - status = ixgbe_ext_phy_t_x550em_get_link(hw, &link_up); - if (status != IXGBE_SUCCESS) - return status; - if (!link_up) - return IXGBE_SUCCESS; + /* If link is still down - no setup is required so return */ + status = ixgbe_ext_phy_t_x550em_get_link(hw, &link_up); + if (status != IXGBE_SUCCESS) + return status; + if (!link_up) + return IXGBE_SUCCESS; - /* clear everything but the speed and duplex bits */ - speed &= IXGBE_MDIO_AUTO_NEG_VENDOR_STATUS_MASK; + /* clear everything but the speed and duplex bits */ + speed &= IXGBE_MDIO_AUTO_NEG_VENDOR_STATUS_MASK; - switch (speed) { - case IXGBE_MDIO_AUTO_NEG_VENDOR_STATUS_10GB_FULL: - force_speed = IXGBE_LINK_SPEED_10GB_FULL; - break; - case IXGBE_MDIO_AUTO_NEG_VENDOR_STATUS_1GB_FULL: - force_speed = IXGBE_LINK_SPEED_1GB_FULL; - break; - default: - /* Internal PHY does not support anything else */ - return IXGBE_ERR_INVALID_LINK_SETTINGS; - } + switch (speed) { + case IXGBE_MDIO_AUTO_NEG_VENDOR_STATUS_10GB_FULL: + force_speed = IXGBE_LINK_SPEED_10GB_FULL; + break; + case IXGBE_MDIO_AUTO_NEG_VENDOR_STATUS_1GB_FULL: + force_speed = IXGBE_LINK_SPEED_1GB_FULL; + break; + default: + /* Internal PHY does not support anything else */ + return IXGBE_ERR_INVALID_LINK_SETTINGS; + } - return ixgbe_setup_ixfi_x550em(hw, &force_speed); + return ixgbe_setup_ixfi_x550em(hw, &force_speed); + } else { + speed = IXGBE_LINK_SPEED_10GB_FULL | + IXGBE_LINK_SPEED_1GB_FULL; + return ixgbe_setup_kr_speed_x550em(hw, speed); + } } /** @@ -2695,7 +2725,9 @@ s32 ixgbe_enter_lplu_t_x550em(struct ixgbe_hw *hw) bool link_up; /* SW LPLU not required on later HW revisions. */ - if (IXGBE_FUSES0_REV1 & IXGBE_READ_REG(hw, IXGBE_FUSES0_GROUP(0))) + if ((hw->mac.type == ixgbe_mac_X550EM_x) && + (IXGBE_FUSES0_REV_MASK & + IXGBE_READ_REG(hw, IXGBE_FUSES0_GROUP(0)))) return IXGBE_SUCCESS; /* If blocked by MNG FW, then don't restart AN */ @@ -2963,6 +2995,7 @@ void ixgbe_release_swfw_sync_X550em(struct ixgbe_hw *hw, u32 mask) ixgbe_release_swfw_sync_X540(hw, mask); } + /** * ixgbe_handle_lasi_ext_t_x550em - Handle external Base T PHY interrupt * @hw: pointer to hardware structure @@ -3142,4 +3175,3 @@ s32 ixgbe_led_off_t_X550em(struct ixgbe_hw *hw, u32 led_idx) return IXGBE_SUCCESS; } - diff --git a/sys/dev/oce/oce_if.c b/sys/dev/oce/oce_if.c index f37ef3a..3711503 100644 --- a/sys/dev/oce/oce_if.c +++ b/sys/dev/oce/oce_if.c @@ -1060,11 +1060,10 @@ oce_tx_restart(POCE_SOFTC sc, struct oce_wq *wq) if ((sc->ifp->if_drv_flags & IFF_DRV_RUNNING) != IFF_DRV_RUNNING) return; -#if __FreeBSD_version >= 800000 if (!drbr_empty(sc->ifp, wq->br)) -#else + taskqueue_enqueue_fast(taskqueue_swi, &wq->txtask); + if (!IFQ_DRV_IS_EMPTY(&sc->ifp->if_snd)) -#endif taskqueue_enqueue_fast(taskqueue_swi, &wq->txtask); } @@ -1147,7 +1146,7 @@ oce_tx_task(void *arg, int npending) struct ifnet *ifp = sc->ifp; int rc = 0; -#if __FreeBSD_version >= 800000 + LOCK(&wq->tx_lock); rc = oce_multiq_transmit(ifp, NULL, wq); if (rc) { @@ -1155,9 +1154,9 @@ oce_tx_task(void *arg, int npending) "TX[%d] restart failed\n", wq->queue_index); } UNLOCK(&wq->tx_lock); -#else + oce_start(ifp); -#endif + } diff --git a/sys/dev/random/hash.c b/sys/dev/random/hash.c index cf0feaa..e37f090 100644 --- a/sys/dev/random/hash.c +++ b/sys/dev/random/hash.c @@ -45,7 +45,7 @@ randomdev_hash_init(struct randomdev_hash *context) /* Iterate the hash */ void -randomdev_hash_iterate(struct randomdev_hash *context, void *data, size_t size) +randomdev_hash_iterate(struct randomdev_hash *context, const void *data, size_t size) { SHA256_Update(&context->sha, data, size); } @@ -64,7 +64,7 @@ randomdev_hash_finish(struct randomdev_hash *context, void *buf) * data. Use CBC mode for better avalanche. */ void -randomdev_encrypt_init(struct randomdev_key *context, void *data) +randomdev_encrypt_init(struct randomdev_key *context, const void *data) { rijndael_cipherInit(&context->cipher, MODE_CBC, NULL); rijndael_makeKey(&context->key, DIR_ENCRYPT, KEYSIZE*8, data); @@ -75,7 +75,7 @@ randomdev_encrypt_init(struct randomdev_key *context, void *data) * a multiple of BLOCKSIZE. */ void -randomdev_encrypt(struct randomdev_key *context, void *d_in, void *d_out, unsigned length) +randomdev_encrypt(struct randomdev_key *context, const void *d_in, void *d_out, unsigned length) { rijndael_blockEncrypt(&context->cipher, &context->key, d_in, length*8, d_out); } diff --git a/sys/dev/random/hash.h b/sys/dev/random/hash.h index 4e6a4a0..8655d88 100644 --- a/sys/dev/random/hash.h +++ b/sys/dev/random/hash.h @@ -42,9 +42,9 @@ struct randomdev_key { /* Big! Make static! */ }; void randomdev_hash_init(struct randomdev_hash *); -void randomdev_hash_iterate(struct randomdev_hash *, void *, size_t); +void randomdev_hash_iterate(struct randomdev_hash *, const void *, size_t); void randomdev_hash_finish(struct randomdev_hash *, void *); -void randomdev_encrypt_init(struct randomdev_key *, void *); -void randomdev_encrypt(struct randomdev_key *context, void *, void *, unsigned); +void randomdev_encrypt_init(struct randomdev_key *, const void *); +void randomdev_encrypt(struct randomdev_key *context, const void *, void *, unsigned); #endif diff --git a/sys/dev/virtio/network/if_vtnet.c b/sys/dev/virtio/network/if_vtnet.c index 5453998..a5e6383 100644 --- a/sys/dev/virtio/network/if_vtnet.c +++ b/sys/dev/virtio/network/if_vtnet.c @@ -139,23 +139,21 @@ static struct mbuf * static int vtnet_txq_enqueue_buf(struct vtnet_txq *, struct mbuf **, struct vtnet_tx_header *); static int vtnet_txq_encap(struct vtnet_txq *, struct mbuf **); -#ifdef VTNET_LEGACY_TX + static void vtnet_start_locked(struct vtnet_txq *, struct ifnet *); static void vtnet_start(struct ifnet *); -#else + static int vtnet_txq_mq_start_locked(struct vtnet_txq *, struct mbuf *); static int vtnet_txq_mq_start(struct ifnet *, struct mbuf *); static void vtnet_txq_tq_deferred(void *, int); -#endif + static void vtnet_txq_start(struct vtnet_txq *); static void vtnet_txq_tq_intr(void *, int); static int vtnet_txq_eof(struct vtnet_txq *); static void vtnet_tx_vq_intr(void *); static void vtnet_tx_start_all(struct vtnet_softc *); -#ifndef VTNET_LEGACY_TX static void vtnet_qflush(struct ifnet *); -#endif static int vtnet_watchdog(struct vtnet_txq *); static void vtnet_rxq_accum_stats(struct vtnet_rxq *, @@ -922,16 +920,16 @@ vtnet_setup_interface(struct vtnet_softc *sc) ifp->if_init = vtnet_init; ifp->if_ioctl = vtnet_ioctl; -#ifndef VTNET_LEGACY_TX + ifp->if_transmit = vtnet_txq_mq_start; ifp->if_qflush = vtnet_qflush; -#else + struct virtqueue *vq = sc->vtnet_txqs[0].vtntx_vq; ifp->if_start = vtnet_start; IFQ_SET_MAXLEN(&ifp->if_snd, virtqueue_size(vq) - 1); ifp->if_snd.ifq_drv_maxlen = virtqueue_size(vq) - 1; IFQ_SET_READY(&ifp->if_snd); -#endif + ifmedia_init(&sc->vtnet_media, IFM_IMASK, vtnet_ifmedia_upd, vtnet_ifmedia_sts); @@ -2209,7 +2207,7 @@ fail: return (error); } -#ifdef VTNET_LEGACY_TX + static void vtnet_start_locked(struct vtnet_txq *txq, struct ifnet *ifp) @@ -2275,7 +2273,7 @@ vtnet_start(struct ifnet *ifp) VTNET_TXQ_UNLOCK(txq); } -#else /* !VTNET_LEGACY_TX */ + static int vtnet_txq_mq_start_locked(struct vtnet_txq *txq, struct mbuf *m) @@ -2387,7 +2385,7 @@ vtnet_txq_tq_deferred(void *xtxq, int pending) VTNET_TXQ_UNLOCK(txq); } -#endif /* VTNET_LEGACY_TX */ + static void vtnet_txq_start(struct vtnet_txq *txq) @@ -2398,13 +2396,13 @@ vtnet_txq_start(struct vtnet_txq *txq) sc = txq->vtntx_sc; ifp = sc->vtnet_ifp; -#ifdef VTNET_LEGACY_TX + if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) vtnet_start_locked(txq, ifp); -#else + if (!drbr_empty(ifp, txq->vtntx_br)) vtnet_txq_mq_start_locked(txq, NULL); -#endif + } static void diff --git a/sys/dev/vmware/vmxnet3/if_vmx.c b/sys/dev/vmware/vmxnet3/if_vmx.c index e92cc00..0629f2b 100644 --- a/sys/dev/vmware/vmxnet3/if_vmx.c +++ b/sys/dev/vmware/vmxnet3/if_vmx.c @@ -166,15 +166,15 @@ static int vmxnet3_txq_load_mbuf(struct vmxnet3_txqueue *, struct mbuf **, bus_dmamap_t, bus_dma_segment_t [], int *); static void vmxnet3_txq_unload_mbuf(struct vmxnet3_txqueue *, bus_dmamap_t); static int vmxnet3_txq_encap(struct vmxnet3_txqueue *, struct mbuf **); -#ifdef VMXNET3_LEGACY_TX + static void vmxnet3_start_locked(struct ifnet *); static void vmxnet3_start(struct ifnet *); -#else + static int vmxnet3_txq_mq_start_locked(struct vmxnet3_txqueue *, struct mbuf *); static int vmxnet3_txq_mq_start(struct ifnet *, struct mbuf *); static void vmxnet3_txq_tq_deferred(void *, int); -#endif + static void vmxnet3_txq_start(struct vmxnet3_txqueue *); static void vmxnet3_tx_start_all(struct vmxnet3_softc *); @@ -1731,15 +1731,15 @@ vmxnet3_setup_interface(struct vmxnet3_softc *sc) ifp->if_hw_tsomaxsegcount = VMXNET3_TX_MAXSEGS; ifp->if_hw_tsomaxsegsize = VMXNET3_TX_MAXSEGSIZE; -#ifdef VMXNET3_LEGACY_TX + ifp->if_start = vmxnet3_start; ifp->if_snd.ifq_drv_maxlen = sc->vmx_ntxdescs - 1; IFQ_SET_MAXLEN(&ifp->if_snd, sc->vmx_ntxdescs - 1); IFQ_SET_READY(&ifp->if_snd); -#else + ifp->if_transmit = vmxnet3_txq_mq_start; ifp->if_qflush = vmxnet3_qflush; -#endif + vmxnet3_get_lladdr(sc); ether_ifattach(ifp, sc->vmx_lladdr); @@ -2872,7 +2872,7 @@ vmxnet3_txq_encap(struct vmxnet3_txqueue *txq, struct mbuf **m0) return (0); } -#ifdef VMXNET3_LEGACY_TX + static void vmxnet3_start_locked(struct ifnet *ifp) @@ -2936,7 +2936,7 @@ vmxnet3_start(struct ifnet *ifp) VMXNET3_TXQ_UNLOCK(txq); } -#else /* !VMXNET3_LEGACY_TX */ + static int vmxnet3_txq_mq_start_locked(struct vmxnet3_txqueue *txq, struct mbuf *m) @@ -3043,7 +3043,7 @@ vmxnet3_txq_tq_deferred(void *xtxq, int pending) VMXNET3_TXQ_UNLOCK(txq); } -#endif /* VMXNET3_LEGACY_TX */ + static void vmxnet3_txq_start(struct vmxnet3_txqueue *txq) @@ -3054,13 +3054,13 @@ vmxnet3_txq_start(struct vmxnet3_txqueue *txq) sc = txq->vxtxq_sc; ifp = sc->vmx_ifp; -#ifdef VMXNET3_LEGACY_TX + if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) vmxnet3_start_locked(ifp); -#else + if (!drbr_empty(ifp, txq->vxtxq_br)) vmxnet3_txq_mq_start_locked(txq, NULL); -#endif + } static void diff --git a/sys/geom/bde/g_bde.h b/sys/geom/bde/g_bde.h index 9332c6b..2f29fe3 100644 --- a/sys/geom/bde/g_bde.h +++ b/sys/geom/bde/g_bde.h @@ -182,7 +182,7 @@ AES_init(cipherInstance *ci) } static __inline void -AES_makekey(keyInstance *ki, int dir, u_int len, void *key) +AES_makekey(keyInstance *ki, int dir, u_int len, const void *key) { int error; @@ -191,7 +191,7 @@ AES_makekey(keyInstance *ki, int dir, u_int len, void *key) } static __inline void -AES_encrypt(cipherInstance *ci, keyInstance *ki, void *in, void *out, u_int len) +AES_encrypt(cipherInstance *ci, keyInstance *ki, const void *in, void *out, u_int len) { int error; @@ -200,7 +200,7 @@ AES_encrypt(cipherInstance *ci, keyInstance *ki, void *in, void *out, u_int len) } static __inline void -AES_decrypt(cipherInstance *ci, keyInstance *ki, void *in, void *out, u_int len) +AES_decrypt(cipherInstance *ci, keyInstance *ki, const void *in, void *out, u_int len) { int error; diff --git a/sys/geom/eli/g_eli_crypto.c b/sys/geom/eli/g_eli_crypto.c index 9b42097..43eabf4 100644 --- a/sys/geom/eli/g_eli_crypto.c +++ b/sys/geom/eli/g_eli_crypto.c @@ -32,7 +32,6 @@ __FBSDID("$FreeBSD$"); #include <sys/systm.h> #include <sys/kernel.h> #include <sys/malloc.h> -#include <sys/uio.h> #else #include <stdint.h> #include <string.h> @@ -63,8 +62,6 @@ g_eli_crypto_cipher(u_int algo, int enc, u_char *data, size_t datasize, struct cryptoini cri; struct cryptop *crp; struct cryptodesc *crd; - struct uio *uio; - struct iovec *iov; uint64_t sid; u_char *p; int error; @@ -79,24 +76,13 @@ g_eli_crypto_cipher(u_int algo, int enc, u_char *data, size_t datasize, error = crypto_newsession(&sid, &cri, CRYPTOCAP_F_SOFTWARE); if (error != 0) return (error); - p = malloc(sizeof(*crp) + sizeof(*crd) + sizeof(*uio) + sizeof(*iov), - M_ELI, M_NOWAIT | M_ZERO); + p = malloc(sizeof(*crp) + sizeof(*crd), M_ELI, M_NOWAIT | M_ZERO); if (p == NULL) { crypto_freesession(sid); return (ENOMEM); } crp = (struct cryptop *)p; p += sizeof(*crp); crd = (struct cryptodesc *)p; p += sizeof(*crd); - uio = (struct uio *)p; p += sizeof(*uio); - iov = (struct iovec *)p; p += sizeof(*iov); - - iov->iov_len = datasize; - iov->iov_base = data; - - uio->uio_iov = iov; - uio->uio_iovcnt = 1; - uio->uio_segflg = UIO_SYSSPACE; - uio->uio_resid = datasize; crd->crd_skip = 0; crd->crd_len = datasize; @@ -114,8 +100,8 @@ g_eli_crypto_cipher(u_int algo, int enc, u_char *data, size_t datasize, crp->crp_olen = datasize; crp->crp_opaque = NULL; crp->crp_callback = g_eli_crypto_done; - crp->crp_buf = (void *)uio; - crp->crp_flags = CRYPTO_F_IOV | CRYPTO_F_CBIFSYNC | CRYPTO_F_REL; + crp->crp_buf = (void *)data; + crp->crp_flags = CRYPTO_F_CBIFSYNC; crp->crp_desc = crd; error = crypto_dispatch(crp); diff --git a/sys/geom/eli/g_eli_integrity.c b/sys/geom/eli/g_eli_integrity.c index aeb5c2a..f7bf1fd 100644 --- a/sys/geom/eli/g_eli_integrity.c +++ b/sys/geom/eli/g_eli_integrity.c @@ -41,7 +41,6 @@ __FBSDID("$FreeBSD$"); #include <sys/proc.h> #include <sys/sched.h> #include <sys/smp.h> -#include <sys/uio.h> #include <sys/vnode.h> #include <vm/uma.h> @@ -363,8 +362,6 @@ g_eli_auth_read(struct g_eli_softc *sc, struct bio *bp) size += sizeof(struct cryptop) * nsec; size += sizeof(struct cryptodesc) * nsec * 2; size += G_ELI_AUTH_SECKEYLEN * nsec; - size += sizeof(struct uio) * nsec; - size += sizeof(struct iovec) * nsec; cbp->bio_offset = (bp->bio_offset / bp->bio_to->sectorsize) * sc->sc_bytes_per_sector; bp->bio_driver2 = malloc(size, M_ELI, M_WAITOK); cbp->bio_data = bp->bio_driver2; @@ -409,8 +406,6 @@ g_eli_auth_run(struct g_eli_worker *wr, struct bio *bp) struct g_eli_softc *sc; struct cryptop *crp; struct cryptodesc *crde, *crda; - struct uio *uio; - struct iovec *iov; u_int i, lsec, nsec, data_secsize, decr_secsize, encr_secsize; off_t dstoff; int err, error; @@ -449,8 +444,6 @@ g_eli_auth_run(struct g_eli_worker *wr, struct bio *bp) size += sizeof(*crde) * nsec; size += sizeof(*crda) * nsec; size += G_ELI_AUTH_SECKEYLEN * nsec; - size += sizeof(*uio) * nsec; - size += sizeof(*iov) * nsec; data = malloc(size, M_ELI, M_WAITOK); bp->bio_driver2 = data; p = data + encr_secsize * nsec; @@ -464,8 +457,6 @@ g_eli_auth_run(struct g_eli_worker *wr, struct bio *bp) crde = (struct cryptodesc *)p; p += sizeof(*crde); crda = (struct cryptodesc *)p; p += sizeof(*crda); authkey = (u_char *)p; p += G_ELI_AUTH_SECKEYLEN; - uio = (struct uio *)p; p += sizeof(*uio); - iov = (struct iovec *)p; p += sizeof(*iov); data_secsize = sc->sc_data_per_sector; if ((i % lsec) == 0) @@ -482,21 +473,13 @@ g_eli_auth_run(struct g_eli_worker *wr, struct bio *bp) plaindata += data_secsize; } - iov->iov_len = sc->sc_alen + data_secsize; - iov->iov_base = data; - data += encr_secsize; - - uio->uio_iov = iov; - uio->uio_iovcnt = 1; - uio->uio_segflg = UIO_SYSSPACE; - uio->uio_resid = iov->iov_len; - crp->crp_sid = wr->w_sid; - crp->crp_ilen = uio->uio_resid; + crp->crp_ilen = sc->sc_alen + data_secsize; crp->crp_olen = data_secsize; crp->crp_opaque = (void *)bp; - crp->crp_buf = (void *)uio; - crp->crp_flags = CRYPTO_F_IOV | CRYPTO_F_CBIFSYNC | CRYPTO_F_REL; + crp->crp_buf = (void *)data; + data += encr_secsize; + crp->crp_flags = CRYPTO_F_CBIFSYNC; if (g_eli_batch) crp->crp_flags |= CRYPTO_F_BATCH; if (bp->bio_cmd == BIO_WRITE) { diff --git a/sys/geom/eli/g_eli_privacy.c b/sys/geom/eli/g_eli_privacy.c index cad3881..a60efe8 100644 --- a/sys/geom/eli/g_eli_privacy.c +++ b/sys/geom/eli/g_eli_privacy.c @@ -41,7 +41,6 @@ __FBSDID("$FreeBSD$"); #include <sys/proc.h> #include <sys/sched.h> #include <sys/smp.h> -#include <sys/uio.h> #include <sys/vnode.h> #include <vm/uma.h> @@ -230,8 +229,6 @@ g_eli_crypto_run(struct g_eli_worker *wr, struct bio *bp) struct g_eli_softc *sc; struct cryptop *crp; struct cryptodesc *crd; - struct uio *uio; - struct iovec *iov; u_int i, nsec, secsize; int err, error; off_t dstoff; @@ -254,8 +251,6 @@ g_eli_crypto_run(struct g_eli_worker *wr, struct bio *bp) */ size = sizeof(*crp) * nsec; size += sizeof(*crd) * nsec; - size += sizeof(*uio) * nsec; - size += sizeof(*iov) * nsec; /* * If we write the data we cannot destroy current bio_data content, * so we need to allocate more memory for encrypted data. @@ -280,28 +275,18 @@ g_eli_crypto_run(struct g_eli_worker *wr, struct bio *bp) for (i = 0, dstoff = bp->bio_offset; i < nsec; i++, dstoff += secsize) { crp = (struct cryptop *)p; p += sizeof(*crp); crd = (struct cryptodesc *)p; p += sizeof(*crd); - uio = (struct uio *)p; p += sizeof(*uio); - iov = (struct iovec *)p; p += sizeof(*iov); - - iov->iov_len = secsize; - iov->iov_base = data; - data += secsize; - - uio->uio_iov = iov; - uio->uio_iovcnt = 1; - uio->uio_segflg = UIO_SYSSPACE; - uio->uio_resid = secsize; crp->crp_sid = wr->w_sid; crp->crp_ilen = secsize; crp->crp_olen = secsize; crp->crp_opaque = (void *)bp; - crp->crp_buf = (void *)uio; + crp->crp_buf = (void *)data; + data += secsize; if (bp->bio_cmd == BIO_WRITE) crp->crp_callback = g_eli_crypto_write_done; else /* if (bp->bio_cmd == BIO_READ) */ crp->crp_callback = g_eli_crypto_read_done; - crp->crp_flags = CRYPTO_F_IOV | CRYPTO_F_CBIFSYNC | CRYPTO_F_REL; + crp->crp_flags = CRYPTO_F_CBIFSYNC; if (g_eli_batch) crp->crp_flags |= CRYPTO_F_BATCH; crp->crp_desc = crd; diff --git a/sys/i386/conf/pfSense b/sys/i386/conf/pfSense new file mode 120000 index 0000000..70fe47c --- /dev/null +++ b/sys/i386/conf/pfSense @@ -0,0 +1 @@ +../../amd64/conf/pfSense
\ No newline at end of file diff --git a/sys/i386/conf/pfSense_wrap b/sys/i386/conf/pfSense_wrap new file mode 100644 index 0000000..ae7dea8 --- /dev/null +++ b/sys/i386/conf/pfSense_wrap @@ -0,0 +1,208 @@ +include GENERIC + +nooptions KDB_TRACE +options DDB # Support DDB. + +ident pfSense_wrap + +nooptions MAC # TrustedBSD MAC Framework +nooptions COMPAT_FREEBSD4 # Compatible with FreeBSD4 +nooptions COMPAT_FREEBSD5 # Compatible with FreeBSD5 +nooptions COMPAT_FREEBSD6 # Compatible with FreeBSD6 +nooptions COMPAT_FREEBSD7 # Compatible with FreeBSD7 + +options GEOM_MIRROR +options GEOM_UZIP +options GEOM_PART_MBR +options GEOM_PART_BSD +options GEOM_ELI +options GEOM_BDE + +options CPU_GEODE +options CPU_SOEKRIS +options CPU_ELAN + +options TMPFS +options UNIONFS +options NULLFS +options PPS_SYNC + +# Bus support. Do not remove isa, even if you have no isa slots +device isa + +# VGA +nodevice vga # VGA video card driver +nooptions VESA # Add support for VESA BIOS Extensions (VBE) + +nodevice splash # Splash screen and screen saver support + +# syscons is the default console driver, resembling an SCO console +nodevice sc +nooptions SC_PIXEL_MODE # add support for the raster text mode + +# vt is the new video console driver +nodevice vt +nodevice vt_vga + +# Wireless +nooptions IEEE80211_DEBUG # enable debug msgs +device wlan_rssadapt +device wlan_xauth +device wlan_acl +device iwifw +device ipwfw # Firmware for Intel PRO/Wireless 2100 IEEE 802.11 driver +device wpifw # Firmware for Intel 3945ABG Wireless LAN IEEE 802.11 driver +device iwnfw # Firmware for Intel Wireless WiFi Link 4965AGN IEEE 802.11n driver +device uath # Atheros USB IEEE 802.11a/b/g wireless network device +device ralfw # Firmware for Ralink Technology RT2500 wireless NICs. +device ural # Ralink Technology RT2500USB IEEE 802.11 driver +device urtw # Realtek RTL8187B/L USB IEEE 802.11b/g wireless network device +device rum # Ralink Technology USB IEEE 802.11a/b/g wireless network device +device mwlfw # Firmware for Marvell 88W8363 IEEE 802.11n wireless network driver +device zyd # ZyDAS ZD1211/ZD1211B USB IEEE 802.11b/g wireless network device +device upgt # Conexant/Intersil PrismGT SoftMAC USB IEEE 802.11b/g wireless +device udav # Davicom DM9601 USB Ethernet driver +device axe +device axge +device aue +device cue +device kue +device mos +device rsu +device rsufw +device run # Ralink RT2700U/RT2800U/RT3000U USB 802.11agn +device runfw +device rue +device urtwn +device urtwnfw + +# Only for 8.1+ +device siba_bwn # Broadcom BCM43xx IEEE 802.11b/g wireless network driver +device bwn # Broadcom BCM43xx IEEE 802.11b/g wireless network driver +device bwi # Broadcom BCM43xx IEEE 802.11b/g wireless network driver + +# Pseudo devices. +device pty # Pseudo-ttys (telnet etc) + +# USB support +nooptions USB_DEBUG # enable debug msgs + +# 3G devices +device ufoma +device ucom +device uslcom +device uplcom +device umct +device uvisor +device uark +device uftdi +device uvscom +device umodem +device u3g +device cdce + +device uhid # "Human Interface Devices" + +# FireWire support +device firewire # FireWire bus code +device sbp # SCSI over FireWire (Requires scbus and da) + +# pfsense addons + +device tap +device gre +device if_bridge +device lagg +device vte + +options IPFIREWALL_DEFAULT_TO_ACCEPT +options IPFIREWALL_VERBOSE + +options IPSTEALTH + +options NETGRAPH #netgraph(4) system +options NETGRAPH_VLAN +options NETGRAPH_L2TP +options NETGRAPH_BPF +options NETGRAPH_ETHER +options NETGRAPH_IFACE +options NETGRAPH_EIFACE +options NETGRAPH_PPP +options NETGRAPH_PPPOE +options NETGRAPH_PPTPGRE +options NETGRAPH_RFC1490 +options NETGRAPH_SOCKET +options NETGRAPH_TTY +options NETGRAPH_MPPC_ENCRYPTION +options NETGRAPH_UI +options NETGRAPH_VJC +options NETGRAPH_KSOCKET +options NETGRAPH_LMI +options NETGRAPH_ONE2MANY +options NETGRAPH_BRIDGE +options NETGRAPH_CISCO +options NETGRAPH_ECHO +options NETGRAPH_ASYNC +options NETGRAPH_FRAME_RELAY +options NETGRAPH_HOLE +options NETGRAPH_TEE +options NETGRAPH_TCPMSS +options NETGRAPH_PIPE +options NETGRAPH_CAR +options NETGRAPH_DEFLATE +options NETGRAPH_PRED1 + +options IPSEC +options IPSEC_NAT_T +options TCP_SIGNATURE + +# IPSEC filtering interface +device enc + +options ALTQ +options ALTQ_CBQ +options ALTQ_RED +options ALTQ_RIO +options ALTQ_HFSC +options ALTQ_PRIQ +options ALTQ_FAIRQ +options ALTQ_NOPCC +options ALTQ_CODEL + +# Squid related settings +options MSGMNB=8192 # max # of bytes in a queue +options MSGMNI=40 # number of message queue identifiers +options MSGSEG=512 # number of message segments per queue +options MSGSSZ=32 # size of a message segment +options MSGTQL=2048 # max messages in system + +device pf +device pflog +device carp +device pfsync + +device crypto # core crypto support +device cryptodev # /dev/crypto for access to h/w +device rndtest # FIPS 140-2 entropy tester +device hifn # Hifn 7951, 7781, etc. +options HIFN_DEBUG # enable debugging support: hw.hifn.debug +options HIFN_RNDTEST # enable rndtest support +device ubsec # Broadcom 5501, 5601, 58xx +device safe # safe -- SafeNet crypto accelerator +device padlock + +device speaker + +options DEVICE_POLLING + +options MROUTING + +# Additional cards +device mxge # mxge - Myricom Myri10GE 10 Gigabit Ethernet adapter driver +device cxgb # cxgb -- Chelsio T3 10 Gigabit Ethernet adapter driver +device cxgbe # cxgbe -- Chelsio T5 10 Gigabit Ethernet adapter driver +device nve # nVidia nForce MCP on-board Ethernet Networking +device oce + +# Default serial speed +options CONSPEED=115200 diff --git a/sys/i386/conf/pfSense_wrap_vga b/sys/i386/conf/pfSense_wrap_vga new file mode 100644 index 0000000..eb54286 --- /dev/null +++ b/sys/i386/conf/pfSense_wrap_vga @@ -0,0 +1,194 @@ +include GENERIC + +nooptions KDB_TRACE +options DDB # Support DDB. + +ident pfSense_wrap_vga + +nooptions MAC # TrustedBSD MAC Framework +nooptions COMPAT_FREEBSD4 # Compatible with FreeBSD4 +nooptions COMPAT_FREEBSD5 # Compatible with FreeBSD5 +nooptions COMPAT_FREEBSD6 # Compatible with FreeBSD6 +nooptions COMPAT_FREEBSD7 # Compatible with FreeBSD7 + +options GEOM_MIRROR +options GEOM_UZIP +options GEOM_PART_MBR +options GEOM_PART_BSD +options GEOM_ELI +options GEOM_BDE + +options CPU_GEODE +options CPU_SOEKRIS +options CPU_ELAN + +options TMPFS +options UNIONFS +options NULLFS +options PPS_SYNC + +# Bus support. Do not remove isa, even if you have no isa slots +device isa + +# Wireless +nooptions IEEE80211_DEBUG # enable debug msgs +device wlan_rssadapt +device wlan_xauth +device wlan_acl +device iwifw +device ipwfw # Firmware for Intel PRO/Wireless 2100 IEEE 802.11 driver +device wpifw # Firmware for Intel 3945ABG Wireless LAN IEEE 802.11 driver +device iwnfw # Firmware for Intel Wireless WiFi Link 4965AGN IEEE 802.11n driver +device uath # Atheros USB IEEE 802.11a/b/g wireless network device +device ralfw # Firmware for Ralink Technology RT2500 wireless NICs. +device ural # Ralink Technology RT2500USB IEEE 802.11 driver +device urtw # Realtek RTL8187B/L USB IEEE 802.11b/g wireless network device +device rum # Ralink Technology USB IEEE 802.11a/b/g wireless network device +device mwlfw # Firmware for Marvell 88W8363 IEEE 802.11n wireless network driver +device zyd # ZyDAS ZD1211/ZD1211B USB IEEE 802.11b/g wireless network device +device upgt # Conexant/Intersil PrismGT SoftMAC USB IEEE 802.11b/g wireless +device udav # Davicom DM9601 USB Ethernet driver +device axe +device axge +device aue +device cue +device kue +device mos +device rsu +device rsufw +device run # Ralink RT2700U/RT2800U/RT3000U USB 802.11agn +device runfw +device rue +device urtwn +device urtwnfw + +# Only for 8.1+ +device siba_bwn # Broadcom BCM43xx IEEE 802.11b/g wireless network driver +device bwn # Broadcom BCM43xx IEEE 802.11b/g wireless network driver +device bwi # Broadcom BCM43xx IEEE 802.11b/g wireless network driver + +# Pseudo devices. +device pty # Pseudo-ttys (telnet etc) + +# USB support +nooptions USB_DEBUG # enable debug msgs + +# 3G devices +device ufoma +device ucom +device uslcom +device uplcom +device umct +device uvisor +device uark +device uftdi +device uvscom +device umodem +device u3g +device cdce + +device uhid # "Human Interface Devices" + +# FireWire support +device firewire # FireWire bus code +device sbp # SCSI over FireWire (Requires scbus and da) + +# pfsense addons + +device tap +device gre +device if_bridge +device lagg +device vte + +options IPFIREWALL_DEFAULT_TO_ACCEPT +options IPFIREWALL_VERBOSE + +options IPSTEALTH + +options NETGRAPH #netgraph(4) system +options NETGRAPH_VLAN +options NETGRAPH_L2TP +options NETGRAPH_BPF +options NETGRAPH_ETHER +options NETGRAPH_IFACE +options NETGRAPH_EIFACE +options NETGRAPH_PPP +options NETGRAPH_PPPOE +options NETGRAPH_PPTPGRE +options NETGRAPH_RFC1490 +options NETGRAPH_SOCKET +options NETGRAPH_TTY +options NETGRAPH_MPPC_ENCRYPTION +options NETGRAPH_UI +options NETGRAPH_VJC +options NETGRAPH_KSOCKET +options NETGRAPH_LMI +options NETGRAPH_ONE2MANY +options NETGRAPH_BRIDGE +options NETGRAPH_CISCO +options NETGRAPH_ECHO +options NETGRAPH_ASYNC +options NETGRAPH_FRAME_RELAY +options NETGRAPH_HOLE +options NETGRAPH_TEE +options NETGRAPH_TCPMSS +options NETGRAPH_PIPE +options NETGRAPH_CAR +options NETGRAPH_DEFLATE +options NETGRAPH_PRED1 + +options IPSEC +options IPSEC_NAT_T +options TCP_SIGNATURE + +# IPSEC filtering interface +device enc + +options ALTQ +options ALTQ_CBQ +options ALTQ_RED +options ALTQ_RIO +options ALTQ_HFSC +options ALTQ_PRIQ +options ALTQ_FAIRQ +options ALTQ_NOPCC +options ALTQ_CODEL + +# Squid related settings +options MSGMNB=8192 # max # of bytes in a queue +options MSGMNI=40 # number of message queue identifiers +options MSGSEG=512 # number of message segments per queue +options MSGSSZ=32 # size of a message segment +options MSGTQL=2048 # max messages in system + +device pf +device pflog +device carp +device pfsync + +device crypto # core crypto support +device cryptodev # /dev/crypto for access to h/w +device rndtest # FIPS 140-2 entropy tester +device hifn # Hifn 7951, 7781, etc. +options HIFN_DEBUG # enable debugging support: hw.hifn.debug +options HIFN_RNDTEST # enable rndtest support +device ubsec # Broadcom 5501, 5601, 58xx +device safe # safe -- SafeNet crypto accelerator +device padlock + +device speaker + +options DEVICE_POLLING + +options MROUTING + +# Additional cards +device mxge # mxge - Myricom Myri10GE 10 Gigabit Ethernet adapter driver +device cxgb # cxgb -- Chelsio T3 10 Gigabit Ethernet adapter driver +device cxgbe # cxgbe -- Chelsio T5 10 Gigabit Ethernet adapter driver +device nve # nVidia nForce MCP on-board Ethernet Networking +device oce + +# Default serial speed +options CONSPEED=115200 diff --git a/sys/libkern/explicit_bzero.c b/sys/libkern/explicit_bzero.c new file mode 100644 index 0000000..2468c55 --- /dev/null +++ b/sys/libkern/explicit_bzero.c @@ -0,0 +1,24 @@ +/* $OpenBSD: explicit_bzero.c,v 1.3 2014/06/21 02:34:26 matthew Exp $ */ +/* + * Public domain. + * Written by Matthew Dempsky. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/libkern.h> + +__attribute__((weak)) void __explicit_bzero_hook(void *, size_t); + +__attribute__((weak)) void +__explicit_bzero_hook(void *buf, size_t len) +{ +} + +void +explicit_bzero(void *buf, size_t len) +{ + memset(buf, 0, len); + __explicit_bzero_hook(buf, len); +} diff --git a/sys/libkern/timingsafe_bcmp.c b/sys/libkern/timingsafe_bcmp.c new file mode 100644 index 0000000..ebada62 --- /dev/null +++ b/sys/libkern/timingsafe_bcmp.c @@ -0,0 +1,32 @@ +/* $OpenBSD: timingsafe_bcmp.c,v 1.2 2014/06/10 04:16:57 deraadt Exp $ */ +/* + * Copyright (c) 2010 Damien Miller. All rights reserved. + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * $FreeBSD$ + * + */ + +#include <sys/libkern.h> + +int +timingsafe_bcmp(const void *b1, const void *b2, size_t n) +{ + const unsigned char *p1 = b1, *p2 = b2; + int ret = 0; + + for (; n > 0; n--) + ret |= *p1++ ^ *p2++; + return (ret != 0); +} diff --git a/sys/mips/rmi/dev/sec/rmisec.c b/sys/mips/rmi/dev/sec/rmisec.c index 6501f75..31098bc 100644 --- a/sys/mips/rmi/dev/sec/rmisec.c +++ b/sys/mips/rmi/dev/sec/rmisec.c @@ -423,13 +423,8 @@ xlr_sec_process(device_t dev, struct cryptop *crp, int hint) cmd->op.source_buf = (uint64_t) (unsigned long)crp->crp_buf; cmd->op.source_buf_size = crp->crp_ilen; - if (crp->crp_flags & CRYPTO_F_REL) { - cmd->op.dest_buf = (uint64_t) (unsigned long)crp->crp_buf; - cmd->op.dest_buf_size = crp->crp_ilen; - } else { - cmd->op.dest_buf = (uint64_t) (unsigned long)crp->crp_buf; - cmd->op.dest_buf_size = crp->crp_ilen; - } + cmd->op.dest_buf = (uint64_t) (unsigned long)crp->crp_buf; + cmd->op.dest_buf_size = crp->crp_ilen; cmd->op.num_packets = 1; cmd->op.num_fragments = 1; diff --git a/sys/modules/aesni/Makefile b/sys/modules/aesni/Makefile index 26dbedc..6fdfc7e 100644 --- a/sys/modules/aesni/Makefile +++ b/sys/modules/aesni/Makefile @@ -7,13 +7,18 @@ SRCS= aesni.c SRCS+= aeskeys_${MACHINE_CPUARCH}.S SRCS+= device_if.h bus_if.h opt_bus.h cryptodev_if.h -OBJS+= aesni_wrap.o +OBJS+= aesni_ghash.o aesni_wrap.o # Remove -nostdinc so we can get the intrinsics. +aesni_ghash.o: aesni_ghash.c + # XXX - gcc won't understand -mpclmul + ${CC} -c ${CFLAGS:C/^-O2$/-O3/:N-nostdinc} ${WERROR} ${PROF} \ + -mmmx -msse -msse4 -maes -mpclmul ${.IMPSRC} + ${CTFCONVERT_CMD} + aesni_wrap.o: aesni_wrap.c ${CC} -c ${CFLAGS:C/^-O2$/-O3/:N-nostdinc} ${WERROR} ${PROF} \ - -mmmx -msse -maes ${.IMPSRC} + -mmmx -msse -msse4 -maes ${.IMPSRC} ${CTFCONVERT_CMD} .include <bsd.kmod.mk> - diff --git a/sys/modules/crypto/Makefile b/sys/modules/crypto/Makefile index 6a37c10..9153445 100644 --- a/sys/modules/crypto/Makefile +++ b/sys/modules/crypto/Makefile @@ -3,19 +3,23 @@ .PATH: ${.CURDIR}/../../opencrypto .PATH: ${.CURDIR}/../../crypto .PATH: ${.CURDIR}/../../crypto/blowfish +.PATH: ${.CURDIR}/../../crypto/camellia .PATH: ${.CURDIR}/../../crypto/des .PATH: ${.CURDIR}/../../crypto/rijndael .PATH: ${.CURDIR}/../../crypto/sha2 -.PATH: ${.CURDIR}/../../crypto/camellia +.PATH: ${.CURDIR}/../../crypto/siphash KMOD = crypto SRCS = crypto.c cryptodev_if.c SRCS += criov.c cryptosoft.c xform.c SRCS += cast.c cryptodeflate.c rmd160.c rijndael-alg-fst.c rijndael-api.c SRCS += skipjack.c bf_enc.c bf_ecb.c bf_skey.c +SRCS += camellia.c camellia-api.c SRCS += des_ecb.c des_enc.c des_setkey.c -SRCS += sha1.c sha2.c +SRCS += sha1.c sha2.c sha256c.c +SRCS += siphash.c +SRCS += gmac.c gfmult.c SRCS += opt_param.h cryptodev_if.h bus_if.h device_if.h -SRCS += opt_ddb.h opt_kdtrace.h -SRCS += camellia.c camellia-api.c +SRCS += opt_ddb.h + .include <bsd.kmod.mk> diff --git a/sys/modules/geom/geom_bde/Makefile b/sys/modules/geom/geom_bde/Makefile index 089126e..5c5a590 100644 --- a/sys/modules/geom/geom_bde/Makefile +++ b/sys/modules/geom/geom_bde/Makefile @@ -6,6 +6,6 @@ KMOD= geom_bde SRCS= g_bde.c g_bde_crypt.c g_bde_lock.c g_bde_work.c -SRCS+= rijndael-alg-fst.c rijndael-api-fst.c sha2.c +SRCS+= rijndael-alg-fst.c rijndael-api-fst.c sha2.c sha256c.c .include <bsd.kmod.mk> diff --git a/sys/modules/gpioapu/Makefile b/sys/modules/gpioapu/Makefile new file mode 100644 index 0000000..80fdd2c --- /dev/null +++ b/sys/modules/gpioapu/Makefile @@ -0,0 +1,7 @@ + +.PATH: ${.CURDIR}/../../dev/gpioapu +KMOD = gpioapu +SRCS = gpioapu.c +SRCS += device_if.h bus_if.h pci_if.h isa_if.h + +.include <bsd.kmod.mk> diff --git a/sys/modules/random/Makefile b/sys/modules/random/Makefile index 332515d..6bf47f2 100644 --- a/sys/modules/random/Makefile +++ b/sys/modules/random/Makefile @@ -12,7 +12,7 @@ SRCS+= ivy.c .endif SRCS+= randomdev_soft.c yarrow.c hash.c SRCS+= random_harvestq.c live_entropy_sources.c rwfile.c -SRCS+= rijndael-alg-fst.c rijndael-api-fst.c sha2.c +SRCS+= rijndael-alg-fst.c rijndael-api-fst.c sha2.c sha256c.c SRCS+= bus_if.h device_if.h vnode_if.h opt_cpu.h opt_random.h CFLAGS+= -I${.CURDIR}/../.. diff --git a/sys/modules/zfs/Makefile b/sys/modules/zfs/Makefile index d2da46b..53e8d34 100644 --- a/sys/modules/zfs/Makefile +++ b/sys/modules/zfs/Makefile @@ -72,7 +72,7 @@ SRCS+= zmod_subr.c SRCS+= zutil.c .PATH: ${.CURDIR}/../../crypto/sha2 -SRCS+= sha2.c +SRCS+= sha2.c sha256c.c .PATH: ${SUNW}/common/zfs .include "${SUNW}/uts/common/Makefile.files" diff --git a/sys/net/if.h b/sys/net/if.h index 04bd1f1..ec6287d 100644 --- a/sys/net/if.h +++ b/sys/net/if.h @@ -245,7 +245,7 @@ struct if_data { #define IFCAP_CANTCHANGE (IFCAP_NETMAP) -#define IFQ_MAXLEN 50 +#define IFQ_MAXLEN 128 #define IFNET_SLOWHZ 1 /* granularity is 1 second */ /* @@ -392,6 +392,7 @@ struct ifreq { caddr_t ifru_data; int ifru_cap[2]; u_int ifru_fib; + u_char ifru_vlan_pcp; } ifr_ifru; #define ifr_addr ifr_ifru.ifru_addr /* address */ #define ifr_dstaddr ifr_ifru.ifru_dstaddr /* other end of p-to-p link */ @@ -409,6 +410,7 @@ struct ifreq { #define ifr_curcap ifr_ifru.ifru_cap[1] /* current capabilities */ #define ifr_index ifr_ifru.ifru_index /* interface index */ #define ifr_fib ifr_ifru.ifru_fib /* interface fib */ +#define ifr_vlan_pcp ifr_ifru.ifru_vlan_pcp /* VLAN priority */ }; #define _SIZEOF_ADDR_IFREQ(ifr) \ diff --git a/sys/net/if_arcsubr.c b/sys/net/if_arcsubr.c index 2f94785..1091ae4 100644 --- a/sys/net/if_arcsubr.c +++ b/sys/net/if_arcsubr.c @@ -557,15 +557,11 @@ arc_input(struct ifnet *ifp, struct mbuf *m) #ifdef INET case ARCTYPE_IP: m_adj(m, ARC_HDRNEWLEN); - if ((m = ip_fastforward(m)) == NULL) - return; isr = NETISR_IP; break; case ARCTYPE_IP_OLD: m_adj(m, ARC_HDRLEN); - if ((m = ip_fastforward(m)) == NULL) - return; isr = NETISR_IP; break; diff --git a/sys/net/if_bridge.c b/sys/net/if_bridge.c index f7c6365..14d9967 100644 --- a/sys/net/if_bridge.c +++ b/sys/net/if_bridge.c @@ -243,6 +243,7 @@ static void bridge_ifdetach(void *arg __unused, struct ifnet *); static void bridge_init(void *); static void bridge_dummynet(struct mbuf *, struct ifnet *); static void bridge_stop(struct ifnet *, int); +static void bridge_start(struct ifnet *); static int bridge_transmit(struct ifnet *, struct mbuf *); static void bridge_qflush(struct ifnet *); static struct mbuf *bridge_input(struct ifnet *, struct mbuf *); @@ -607,10 +608,13 @@ bridge_clone_create(struct if_clone *ifc, int unit, caddr_t params) if_initname(ifp, bridge_name, unit); ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = bridge_ioctl; + ifp->if_start = bridge_start; ifp->if_transmit = bridge_transmit; ifp->if_qflush = bridge_qflush; ifp->if_init = bridge_init; ifp->if_type = IFT_BRIDGE; + IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); + IFQ_SET_READY(&ifp->if_snd); /* * Generate an ethernet address with a locally administered address. @@ -815,6 +819,8 @@ bridge_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) } BRIDGE_LOCK(sc); LIST_FOREACH(bif, &sc->sc_iflist, bif_next) { + if (bif->bif_ifp->if_type == IFT_GIF) + continue; if (bif->bif_ifp->if_mtu != ifr->ifr_mtu) { log(LOG_NOTICE, "%s: invalid MTU: %lu(%s)" " != %d\n", sc->sc_ifp->if_xname, @@ -1106,6 +1112,7 @@ bridge_ioctl_add(struct bridge_softc *sc, void *arg) } #endif /* Allow the first Ethernet member to define the MTU */ + if (ifs->if_type != IFT_GIF) { if (LIST_EMPTY(&sc->sc_iflist)) sc->sc_ifp->if_mtu = ifs->if_mtu; else if (sc->sc_ifp->if_mtu != ifs->if_mtu) { @@ -1113,6 +1120,7 @@ bridge_ioctl_add(struct bridge_softc *sc, void *arg) ifs->if_mtu, ifs->if_xname, sc->sc_ifp->if_mtu); return (EINVAL); } + } bif = malloc(sizeof(*bif), M_DEVBUF, M_NOWAIT|M_ZERO); if (bif == NULL) @@ -2071,6 +2079,47 @@ bridge_qflush(struct ifnet *ifp __unused) } /* + * bridge_start: + * + * Start output on a bridge. + * + */ +static void +bridge_start(struct ifnet *ifp) +{ + struct bridge_softc *sc; + struct mbuf *m; + struct ether_header *eh; + struct ifnet *dst_if; + + sc = ifp->if_softc; + + ifp->if_drv_flags |= IFF_DRV_OACTIVE; + for (;;) { + IFQ_DEQUEUE(&ifp->if_snd, m); + if (m == 0) + break; + ETHER_BPF_MTAP(ifp, m); + + eh = mtod(m, struct ether_header *); + dst_if = NULL; + + BRIDGE_LOCK(sc); + if ((m->m_flags & (M_BCAST|M_MCAST)) == 0) { + dst_if = bridge_rtlookup(sc, eh->ether_dhost, 1); + } + + if (dst_if == NULL) + bridge_broadcast(sc, ifp, m, 0); + else { + BRIDGE_UNLOCK(sc); + bridge_enqueue(sc, dst_if, m); + } + } + ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; +} + +/* * bridge_forward: * * The forwarding function of the bridge. diff --git a/sys/net/if_ef.c b/sys/net/if_ef.c index fc6402c..478dfb4 100644 --- a/sys/net/if_ef.c +++ b/sys/net/if_ef.c @@ -240,8 +240,6 @@ ef_inputEII(struct mbuf *m, struct ether_header *eh, u_short ether_type) #endif #ifdef INET case ETHERTYPE_IP: - if ((m = ip_fastforward(m)) == NULL) - return (0); isr = NETISR_IP; break; diff --git a/sys/net/if_ethersubr.c b/sys/net/if_ethersubr.c index ea22d33..773918f 100644 --- a/sys/net/if_ethersubr.c +++ b/sys/net/if_ethersubr.c @@ -108,6 +108,8 @@ CTASSERT(sizeof (struct ether_addr) == ETHER_ADDR_LEN); VNET_DEFINE(struct pfil_head, link_pfil_hook); /* Packet filter hooks */ +SYSCTL_DECL(_net_link); + /* netgraph node hooks for ng_ether(4) */ void (*ng_ether_input_p)(struct ifnet *ifp, struct mbuf **mp); void (*ng_ether_input_orphan_p)(struct ifnet *ifp, struct mbuf *m); @@ -610,7 +612,7 @@ ether_input_internal(struct ifnet *ifp, struct mbuf *m) return; } eh = mtod(m, struct ether_header *); - } + } else { #if defined(INET) || defined(INET6) /* @@ -639,6 +641,7 @@ ether_input_internal(struct ifnet *ifp, struct mbuf *m) bcmp(IF_LLADDR(ifp), eh->ether_dhost, ETHER_ADDR_LEN) != 0) m->m_flags |= M_PROMISC; } + } if (harvest.ethernet) random_harvest(&(m->m_data), 12, 2, RANDOM_NET_ETHER); @@ -685,6 +688,9 @@ vnet_ether_init(__unused void *arg) if ((i = pfil_head_register(&V_link_pfil_hook)) != 0) printf("%s: WARNING: unable to register pfil link hook, " "error %d\n", __func__, i); + else + pfil_head_export_sysctl(&V_link_pfil_hook, + SYSCTL_STATIC_CHILDREN(_net_link)); } VNET_SYSINIT(vnet_ether_init, SI_SUB_PROTO_IF, SI_ORDER_ANY, vnet_ether_init, NULL); @@ -737,7 +743,11 @@ ether_demux(struct ifnet *ifp, struct mbuf *m) if (i != 0 || m == NULL) return; - } + + /* M_PROTO2 is for M_IP[6]_NEXTHOP */ + i = m->m_flags & (M_FASTFWD_OURS|M_PROTO2); + } else + i = 0; eh = mtod(m, struct ether_header *); ether_type = ntohs(eh->ether_type); @@ -776,6 +786,8 @@ ether_demux(struct ifnet *ifp, struct mbuf *m) */ m->m_flags &= ~M_VLANTAG; m_clrprotoflags(m); + if (i) + m->m_flags |= M_FASTFWD_OURS|M_PROTO2; m_adj(m, ETHER_HDR_LEN); /* @@ -784,8 +796,6 @@ ether_demux(struct ifnet *ifp, struct mbuf *m) switch (ether_type) { #ifdef INET case ETHERTYPE_IP: - if ((m = ip_fastforward(m)) == NULL) - return; isr = NETISR_IP; break; @@ -972,7 +982,6 @@ ether_reassign(struct ifnet *ifp, struct vnet *new_vnet, char *unused __unused) } #endif -SYSCTL_DECL(_net_link); SYSCTL_NODE(_net_link, IFT_ETHER, ether, CTLFLAG_RW, 0, "Ethernet"); #if 0 diff --git a/sys/net/if_fddisubr.c b/sys/net/if_fddisubr.c index 4f54dcb..271f72c 100644 --- a/sys/net/if_fddisubr.c +++ b/sys/net/if_fddisubr.c @@ -501,8 +501,6 @@ fddi_input(ifp, m) switch (type) { #ifdef INET case ETHERTYPE_IP: - if ((m = ip_fastforward(m)) == NULL) - return; isr = NETISR_IP; break; diff --git a/sys/net/if_fwsubr.c b/sys/net/if_fwsubr.c index 31fc2a9..acac423 100644 --- a/sys/net/if_fwsubr.c +++ b/sys/net/if_fwsubr.c @@ -595,8 +595,6 @@ firewire_input(struct ifnet *ifp, struct mbuf *m, uint16_t src) switch (type) { #ifdef INET case ETHERTYPE_IP: - if ((m = ip_fastforward(m)) == NULL) - return; isr = NETISR_IP; break; diff --git a/sys/net/if_iso88025subr.c b/sys/net/if_iso88025subr.c index 5975b28..f96df4e 100644 --- a/sys/net/if_iso88025subr.c +++ b/sys/net/if_iso88025subr.c @@ -579,8 +579,6 @@ iso88025_input(ifp, m) #ifdef INET case ETHERTYPE_IP: th->iso88025_shost[0] &= ~(TR_RII); - if ((m = ip_fastforward(m)) == NULL) - return; isr = NETISR_IP; break; diff --git a/sys/net/if_lagg.c b/sys/net/if_lagg.c index 87f358e..93ec82e 100644 --- a/sys/net/if_lagg.c +++ b/sys/net/if_lagg.c @@ -125,6 +125,7 @@ static int lagg_setflag(struct lagg_port *, int, int, int (*func)(struct ifnet *, int)); static int lagg_setflags(struct lagg_port *, int status); static int lagg_transmit(struct ifnet *, struct mbuf *); +static void lagg_start(struct ifnet *); static void lagg_qflush(struct ifnet *); static int lagg_media_change(struct ifnet *); static void lagg_media_status(struct ifnet *, struct ifmediareq *); @@ -358,11 +359,14 @@ lagg_clone_create(struct if_clone *ifc, int unit, caddr_t params) if_initname(ifp, laggname, unit); ifp->if_softc = sc; ifp->if_transmit = lagg_transmit; + ifp->if_start = lagg_start; ifp->if_qflush = lagg_qflush; ifp->if_init = lagg_init; ifp->if_ioctl = lagg_ioctl; ifp->if_flags = IFF_SIMPLEX | IFF_BROADCAST | IFF_MULTICAST; ifp->if_capenable = ifp->if_capabilities = IFCAP_HWSTATS; + IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); + IFQ_SET_READY(&ifp->if_snd); /* * Attach as an ordinary ethernet device, children will be attached @@ -1494,6 +1498,40 @@ lagg_transmit(struct ifnet *ifp, struct mbuf *m) return (error); } +static void +lagg_start(struct ifnet *ifp) +{ + struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc; + struct rm_priotracker tracker; + struct mbuf *m; + int error = 0, len; + + LAGG_RLOCK(sc, &tracker); + /* We need a Tx algorithm and at least one port */ + if (sc->sc_proto == LAGG_PROTO_NONE || sc->sc_count == 0) { + IF_DRAIN(&ifp->if_snd); + LAGG_RUNLOCK(sc, &tracker); + return; + } + + for (;; error = 0) { + IFQ_DEQUEUE(&ifp->if_snd, m); + if (m == NULL) + break; + + ETHER_BPF_MTAP(ifp, m); + + len = m->m_pkthdr.len; + error = (*sc->sc_start)(sc, m); + if (error == 0) { + counter_u64_add(sc->sc_opackets, 1); + counter_u64_add(sc->sc_obytes, len); + } else + ifp->if_oerrors++; + } + LAGG_RUNLOCK(sc, &tracker); +} + /* * The ifp->if_qflush entry point for lagg(4) is no-op. */ diff --git a/sys/net/if_pflog.h b/sys/net/if_pflog.h index 0faeb7d..326b551 100644 --- a/sys/net/if_pflog.h +++ b/sys/net/if_pflog.h @@ -40,10 +40,14 @@ struct pfloghdr { char ruleset[PFLOG_RULESET_NAME_SIZE]; u_int32_t rulenr; u_int32_t subrulenr; +#ifdef PF_USER_INFO uid_t uid; pid_t pid; uid_t rule_uid; pid_t rule_pid; +#else + u_int32_t ridentifier; +#endif u_int8_t dir; u_int8_t pad[3]; }; diff --git a/sys/net/if_pfsync.h b/sys/net/if_pfsync.h index 7a72bbb..ef8ba1f 100644 --- a/sys/net/if_pfsync.h +++ b/sys/net/if_pfsync.h @@ -241,6 +241,9 @@ struct pfsyncreq { char pfsyncr_syncdev[IFNAMSIZ]; struct in_addr pfsyncr_syncpeer; int pfsyncr_maxupdates; +#define PFSYNCF_OK 0x00000001 +#define PFSYNCF_DEFER 0x00000002 +#define PFSYNCF_PUSH 0x00000004 int pfsyncr_defer; }; diff --git a/sys/net/if_stf.c b/sys/net/if_stf.c index 15d7d64..9d923ad 100644 --- a/sys/net/if_stf.c +++ b/sys/net/if_stf.c @@ -3,6 +3,8 @@ /*- * Copyright (C) 2000 WIDE Project. + * Copyright (c) 2010 Hiroki Sato <hrs@FreeBSD.org> + * Copyright (c) 2013 Ermal Luçi <eri@FreeBSD.org> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -31,7 +33,7 @@ */ /* - * 6to4 interface, based on RFC3056. + * 6to4 interface, based on RFC3056 + 6rd (RFC5569) support. * * 6to4 interface is NOT capable of link-layer (I mean, IPv4) multicasting. * There is no address mapping defined from IPv6 multicast address to IPv4 @@ -60,7 +62,7 @@ * ICMPv6: * - Redirects cannot be used due to the lack of link-local address. * - * stf interface does not have, and will not need, a link-local address. + * stf interface does not have, and will not need, a link-local address. * It seems to have no real benefit and does not help the above symptoms much. * Even if we assign link-locals to interface, we cannot really * use link-local unicast/multicast on top of 6to4 cloud (since there's no @@ -72,6 +74,12 @@ * http://playground.iijlab.net/i-d/draft-itojun-ipv6-transition-abuse-00.txt * for details. The code tries to filter out some of malicious packets. * Note that there is no way to be 100% secure. + * + * 6rd (RFC5569 & RFC5969) extension is enabled when an IPv6 GUA other than + * 2002::/16 is assigned. The stf(4) recognizes a 32-bit just after + * prefixlen as the IPv4 address of the 6rd customer site. The + * prefixlen must be shorter than 32. + * */ #include "opt_inet.h" @@ -92,13 +100,14 @@ #include <machine/cpu.h> #include <sys/malloc.h> +#include <sys/priv.h> #include <net/if.h> +#include <net/if_var.h> #include <net/if_clone.h> #include <net/route.h> #include <net/netisr.h> #include <net/if_types.h> -#include <net/if_stf.h> #include <net/vnet.h> #include <netinet/in.h> @@ -106,6 +115,7 @@ #include <netinet/ip.h> #include <netinet/ip_var.h> #include <netinet/in_var.h> +#include <net/if_stf.h> #include <netinet/ip6.h> #include <netinet6/ip6_var.h> @@ -120,20 +130,48 @@ #include <security/mac/mac_framework.h> +#define STF_DEBUG 1 +#if STF_DEBUG > 3 +#define ip_sprintf(buf, a) \ + sprintf(buf, "%u.%u.%u.%u", \ + (ntohl((a)->s_addr)>>24)&0xFF, \ + (ntohl((a)->s_addr)>>16)&0xFF, \ + (ntohl((a)->s_addr)>>8)&0xFF, \ + (ntohl((a)->s_addr))&0xFF); +#endif + +#if STF_DEBUG +#define DEBUG_PRINTF(a, ...) \ + do { \ + if (V_stf_debug >= a) \ + printf(__VA_ARGS__); \ + } while (0) +#else +#define DEBUG_PRINTF(a, ...) +#endif + SYSCTL_DECL(_net_link); static SYSCTL_NODE(_net_link, IFT_STF, stf, CTLFLAG_RW, 0, "6to4 Interface"); -static int stf_route_cache = 1; -SYSCTL_INT(_net_link_stf, OID_AUTO, route_cache, CTLFLAG_RW, - &stf_route_cache, 0, "Caching of IPv4 routes for 6to4 Output"); +static VNET_DEFINE(int, stf_route_cache) = 0; +#define V_stf_route_cache VNET(stf_route_cache) +SYSCTL_VNET_INT(_net_link_stf, OID_AUTO, route_cache, CTLFLAG_RW, + &VNET_NAME(stf_route_cache), 0, + "Enable caching of IPv4 routes for 6to4 output."); + +#if STF_DEBUG +static VNET_DEFINE(int, stf_debug) = 0; +#define V_stf_debug VNET(stf_debug) +SYSCTL_VNET_INT(_net_link_stf, OID_AUTO, stf_debug, CTLFLAG_RW, + &VNET_NAME(stf_debug), 0, + "Enable displaying verbose debug message of stf interfaces"); +#endif -static int stf_permit_rfc1918 = 0; +static int stf_permit_rfc1918 = 1; TUNABLE_INT("net.link.stf.permit_rfc1918", &stf_permit_rfc1918); SYSCTL_INT(_net_link_stf, OID_AUTO, permit_rfc1918, CTLFLAG_RW | CTLFLAG_TUN, &stf_permit_rfc1918, 0, "Permit the use of private IPv4 addresses"); -#define STFUNIT 0 - #define IN6_IS_ADDR_6TO4(x) (ntohs((x)->s6_addr16[0]) == 0x2002) /* @@ -144,24 +182,37 @@ SYSCTL_INT(_net_link_stf, OID_AUTO, permit_rfc1918, CTLFLAG_RW | CTLFLAG_TUN, struct stf_softc { struct ifnet *sc_ifp; + in_addr_t dstv4_addr; + in_addr_t srcv4_addr; + in_addr_t inaddr; + u_int v4prefixlen; union { struct route __sc_ro4; struct route_in6 __sc_ro6; /* just for safety */ } __sc_ro46; #define sc_ro __sc_ro46.__sc_ro4 - struct mtx sc_ro_mtx; + struct mtx sc_mtx; u_int sc_fibnum; const struct encaptab *encap_cookie; + u_int sc_flags; + LIST_ENTRY(stf_softc) stf_list; }; #define STF2IFP(sc) ((sc)->sc_ifp) static const char stfname[] = "stf"; -/* - * Note that mutable fields in the softc are not currently locked. - * We do lock sc_ro in stf_output though. - */ +static struct mtx stf_mtx; static MALLOC_DEFINE(M_STF, stfname, "6to4 Tunnel Interface"); +static VNET_DEFINE(LIST_HEAD(, stf_softc), stf_softc_list); +#define V_stf_softc_list VNET(stf_softc_list) + +#define STF_LOCK_INIT(sc) mtx_init(&(sc)->sc_mtx, "stf softc", \ + NULL, MTX_DEF); +#define STF_LOCK_DESTROY(sc) mtx_destroy(&(sc)->sc_mtx) +#define STF_LOCK(sc) mtx_lock(&(sc)->sc_mtx) +#define STF_UNLOCK(sc) mtx_unlock(&(sc)->sc_mtx) +#define STF_LOCK_ASSERT(sc) mtx_assert(&(sc)->sc_mtx, MA_OWNED) + static const int ip_stf_ttl = 40; extern struct domain inetdomain; @@ -176,8 +227,6 @@ struct protosw in_stf_protosw = { .pr_usrreqs = &rip_usrreqs }; -static char *stfnames[] = {"stf0", "stf", "6to4", NULL}; - static int stfmodevent(module_t, int, void *); static int stf_encapcheck(const struct mbuf *, int, int, void *); static struct in6_ifaddr *stf_getsrcifa6(struct ifnet *); @@ -191,66 +240,42 @@ static int stf_checkaddr6(struct stf_softc *, struct in6_addr *, static void stf_rtrequest(int, struct rtentry *, struct rt_addrinfo *); static int stf_ioctl(struct ifnet *, u_long, caddr_t); -static int stf_clone_match(struct if_clone *, const char *); -static int stf_clone_create(struct if_clone *, char *, size_t, caddr_t); -static int stf_clone_destroy(struct if_clone *, struct ifnet *); -static struct if_clone *stf_cloner; +#define STF_GETIN4_USE_CACHE 1 +static struct sockaddr_in *stf_getin4addr(struct stf_softc *, struct sockaddr_in *, + struct ifaddr *, int); +static struct sockaddr_in *stf_getin4addr_in6(struct stf_softc *, struct sockaddr_in *, + struct ifaddr *, const struct in6_addr *); +static struct sockaddr_in *stf_getin4addr_sin6(struct stf_softc *, struct sockaddr_in *, + struct ifaddr *, struct sockaddr_in6 *); +static int stf_clone_create(struct if_clone *, int, caddr_t); +static void stf_clone_destroy(struct ifnet *); -static int -stf_clone_match(struct if_clone *ifc, const char *name) -{ - int i; - - for(i = 0; stfnames[i] != NULL; i++) { - if (strcmp(stfnames[i], name) == 0) - return (1); - } - - return (0); -} +static struct if_clone *stf_cloner; static int -stf_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params) +stf_clone_create(struct if_clone *ifc, int unit, caddr_t params) { - int err, unit; struct stf_softc *sc; struct ifnet *ifp; - /* - * We can only have one unit, but since unit allocation is - * already locked, we use it to keep from allocating extra - * interfaces. - */ - unit = STFUNIT; - err = ifc_alloc_unit(ifc, &unit); - if (err != 0) - return (err); - sc = malloc(sizeof(struct stf_softc), M_STF, M_WAITOK | M_ZERO); + sc->sc_fibnum = curthread->td_proc->p_fibnum; ifp = STF2IFP(sc) = if_alloc(IFT_STF); - if (ifp == NULL) { + if (sc->sc_ifp == NULL) { free(sc, M_STF); - ifc_free_unit(ifc, unit); - return (ENOSPC); + return (ENOMEM); } + STF_LOCK_INIT(sc); ifp->if_softc = sc; - sc->sc_fibnum = curthread->td_proc->p_fibnum; - /* - * Set the name manually rather then using if_initname because - * we don't conform to the default naming convention for interfaces. - */ - strlcpy(ifp->if_xname, name, IFNAMSIZ); - ifp->if_dname = stfname; - ifp->if_dunit = IF_DUNIT_NONE; + if_initname(ifp, stfname, unit); - mtx_init(&(sc)->sc_ro_mtx, "stf ro", NULL, MTX_DEF); sc->encap_cookie = encap_attach_func(AF_INET, IPPROTO_IPV6, stf_encapcheck, &in_stf_protosw, sc); if (sc->encap_cookie == NULL) { if_printf(ifp, "attach failed\n"); + if_free(ifp); free(sc, M_STF); - ifc_free_unit(ifc, unit); return (ENOMEM); } @@ -260,42 +285,56 @@ stf_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params) ifp->if_snd.ifq_maxlen = ifqmaxlen; if_attach(ifp); bpfattach(ifp, DLT_NULL, sizeof(u_int32_t)); + + mtx_lock(&stf_mtx); + LIST_INSERT_HEAD(&V_stf_softc_list, sc, stf_list); + mtx_unlock(&stf_mtx); + return (0); } -static int -stf_clone_destroy(struct if_clone *ifc, struct ifnet *ifp) +static void +stf_clone_destroy(struct ifnet *ifp) { struct stf_softc *sc = ifp->if_softc; int err; + mtx_lock(&stf_mtx); + LIST_REMOVE(sc, stf_list); + mtx_unlock(&stf_mtx); + err = encap_detach(sc->encap_cookie); KASSERT(err == 0, ("Unexpected error detaching encap_cookie")); - mtx_destroy(&(sc)->sc_ro_mtx); bpfdetach(ifp); if_detach(ifp); if_free(ifp); + STF_LOCK_DESTROY(sc); free(sc, M_STF); - ifc_free_unit(ifc, STFUNIT); +} - return (0); +static void +vnet_stf_init(const void *unused __unused) +{ + + LIST_INIT(&V_stf_softc_list); } +VNET_SYSINIT(vnet_stf_init, SI_SUB_PSEUDO, SI_ORDER_MIDDLE, vnet_stf_init, + NULL); static int -stfmodevent(mod, type, data) - module_t mod; - int type; - void *data; +stfmodevent(module_t mod, int type, void *data) { switch (type) { case MOD_LOAD: - stf_cloner = if_clone_advanced(stfname, 0, stf_clone_match, - stf_clone_create, stf_clone_destroy); + mtx_init(&stf_mtx, "stf_mtx", NULL, MTX_DEF); + stf_cloner = if_clone_simple(stfname, + stf_clone_create, stf_clone_destroy, 0); break; case MOD_UNLOAD: if_clone_detach(stf_cloner); + mtx_destroy(&stf_mtx); break; default: return (EOPNOTSUPP); @@ -311,28 +350,31 @@ static moduledata_t stf_mod = { }; DECLARE_MODULE(if_stf, stf_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); +MODULE_VERSION(if_stf, 1); static int -stf_encapcheck(m, off, proto, arg) - const struct mbuf *m; - int off; - int proto; - void *arg; +stf_encapcheck(const struct mbuf *m, int off, int proto, void *arg) { struct ip ip; struct in6_ifaddr *ia6; + struct sockaddr_in ia6_in4addr; + struct sockaddr_in ia6_in4mask; + struct sockaddr_in *sin; struct stf_softc *sc; - struct in_addr a, b, mask; + struct ifnet *ifp; + int ret = 0; + DEBUG_PRINTF(1, "%s: enter\n", __func__); sc = (struct stf_softc *)arg; if (sc == NULL) return 0; + ifp = STF2IFP(sc); - if ((STF2IFP(sc)->if_flags & IFF_UP) == 0) + if ((ifp->if_flags & IFF_UP) == 0) return 0; /* IFF_LINK0 means "no decapsulation" */ - if ((STF2IFP(sc)->if_flags & IFF_LINK0) != 0) + if ((ifp->if_flags & IFF_LINK0) != 0) return 0; if (proto != IPPROTO_IPV6) @@ -344,72 +386,169 @@ stf_encapcheck(m, off, proto, arg) if (ip.ip_v != 4) return 0; - ia6 = stf_getsrcifa6(STF2IFP(sc)); + /* Lookup an ia6 whose IPv4 addr encoded in the IPv6 addr is valid. */ + ia6 = stf_getsrcifa6(ifp); if (ia6 == NULL) return 0; + if (sc->srcv4_addr != INADDR_ANY) { + sin = &ia6_in4addr; + sin->sin_addr.s_addr = sc->srcv4_addr; + sin->sin_family = AF_INET; + } else { + sin = stf_getin4addr(sc, &ia6_in4addr, &ia6->ia_ifa, STF_GETIN4_USE_CACHE); + if (sin == NULL) + return (0); + } + +#if STF_DEBUG > 3 + { + char buf[INET6_ADDRSTRLEN + 1]; + memset(&buf, 0, sizeof(buf)); + + ip6_sprintf(buf, &satosin6(ia6->ia_ifa.ifa_addr)->sin6_addr); + DEBUG_PRINTF(1, "%s: ia6->ia_ifa.ifa_addr = %s\n", __func__, buf); + ip6_sprintf(buf, &ia6->ia_addr.sin6_addr); + DEBUG_PRINTF(1, "%s: ia6->ia_addr = %s\n", __func__, buf); + ip6_sprintf(buf, &satosin6(ia6->ia_ifa.ifa_netmask)->sin6_addr); + DEBUG_PRINTF(1, "%s: ia6->ia_ifa.ifa_netmask = %s\n", __func__, buf); + ip6_sprintf(buf, &ia6->ia_prefixmask.sin6_addr); + DEBUG_PRINTF(1, "%s: ia6->ia_prefixmask = %s\n", __func__, buf); + + ip_sprintf(buf, &ia6_in4addr.sin_addr); + DEBUG_PRINTF(1, "%s: ia6_in4addr.sin_addr = %s\n", __func__, buf); + ip_sprintf(buf, &ip.ip_src); + DEBUG_PRINTF(1, "%s: ip.ip_src = %s\n", __func__, buf); + ip_sprintf(buf, &ip.ip_dst); + DEBUG_PRINTF(1, "%s: ip.ip_dst = %s\n", __func__, buf); + } +#endif /* * check if IPv4 dst matches the IPv4 address derived from the * local 6to4 address. * success on: dst = 10.1.1.1, ia6->ia_addr = 2002:0a01:0101:... */ - if (bcmp(GET_V4(&ia6->ia_addr.sin6_addr), &ip.ip_dst, - sizeof(ip.ip_dst)) != 0) { - ifa_free(&ia6->ia_ifa); - return 0; + DEBUG_PRINTF(1, "%s: check1: ia6_in4addr.sin_addr == ip.ip_dst?\n", __func__); + if (ia6_in4addr.sin_addr.s_addr != ip.ip_dst.s_addr) { + DEBUG_PRINTF(1, "%s: check1: false. Ignore this packet.\n", __func__); + goto freeit; } - /* - * check if IPv4 src matches the IPv4 address derived from the - * local 6to4 address masked by prefixmask. - * success on: src = 10.1.1.1, ia6->ia_addr = 2002:0a00:.../24 - * fail on: src = 10.1.1.1, ia6->ia_addr = 2002:0b00:.../24 - */ - bzero(&a, sizeof(a)); - bcopy(GET_V4(&ia6->ia_addr.sin6_addr), &a, sizeof(a)); - bcopy(GET_V4(&ia6->ia_prefixmask.sin6_addr), &mask, sizeof(mask)); - ifa_free(&ia6->ia_ifa); - a.s_addr &= mask.s_addr; - b = ip.ip_src; - b.s_addr &= mask.s_addr; - if (a.s_addr != b.s_addr) - return 0; + DEBUG_PRINTF(1, "%s: check2: ia6->ia_addr is 2002::/16?\n", __func__); + + if (IN6_IS_ADDR_6TO4(&ia6->ia_addr.sin6_addr)) { + /* 6to4 (RFC 3056) */ + /* + * check if IPv4 src matches the IPv4 address derived + * from the local 6to4 address masked by prefixmask. + * success on: src = 10.1.1.1, ia6->ia_addr = 2002:0a00:.../24 + * fail on: src = 10.1.1.1, ia6->ia_addr = 2002:0b00:.../24 + */ + DEBUG_PRINTF(1, "%s: check2: true.\n", __func__); + + memcpy(&ia6_in4mask.sin_addr, + GET_V4(&ia6->ia_prefixmask.sin6_addr), + sizeof(ia6_in4mask)); +#if STF_DEBUG > 3 + { + char buf[INET6_ADDRSTRLEN + 1]; + memset(&buf, 0, sizeof(buf)); + + ip_sprintf(buf, &ia6_in4addr.sin_addr); + DEBUG_PRINTF(1, "%s: ia6->ia_addr = %s\n", + __func__, buf); + ip_sprintf(buf, &ip.ip_src); + DEBUG_PRINTF(1, "%s: ip.ip_src = %s\n", + __func__, buf); + ip_sprintf(buf, &ia6_in4mask.sin_addr); + DEBUG_PRINTF(1, "%s: ia6->ia_prefixmask = %s\n", + __func__, buf); + + DEBUG_PRINTF(1, "%s: check3: ia6_in4addr.sin_addr & mask == ip.ip_src & mask\n", + __func__); + } +#endif + + if ((ia6_in4addr.sin_addr.s_addr & ia6_in4mask.sin_addr.s_addr) != + (ip.ip_src.s_addr & ia6_in4mask.sin_addr.s_addr)) { + DEBUG_PRINTF(1, "%s: check3: false. Ignore this packet.\n", + __func__); + goto freeit; + } + } else { + /* 6rd (RFC 5569) */ + DEBUG_PRINTF(1, "%s: check2: false. 6rd.\n", __func__); + /* + * No restriction on the src address in the case of + * 6rd because the stf(4) interface always has a + * prefix which covers whole of IPv4 src address + * range. So, stf_output() will catch all of + * 6rd-capsuled IPv4 traffic with suspicious inner dst + * IPv4 address (i.e. the IPv6 destination address is + * one the admin does not like to route to outside), + * and then it discard them silently. + */ + } + DEBUG_PRINTF(1, "%s: all clear!\n", __func__); /* stf interface makes single side match only */ - return 32; + ret = 32; +freeit: + ifa_free(&ia6->ia_ifa); + + return (ret); } static struct in6_ifaddr * -stf_getsrcifa6(ifp) - struct ifnet *ifp; +stf_getsrcifa6(struct ifnet *ifp) { - struct ifaddr *ia; + struct ifaddr *ifa; struct in_ifaddr *ia4; - struct sockaddr_in6 *sin6; - struct in_addr in; + struct sockaddr_in *sin; + struct sockaddr_in in4; + struct stf_softc *sc; + + sc = ifp->if_softc; if_addr_rlock(ifp); - TAILQ_FOREACH(ia, &ifp->if_addrhead, ifa_link) { - if (ia->ifa_addr->sa_family != AF_INET6) + TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { + if (ifa->ifa_addr->sa_family != AF_INET6) continue; - sin6 = (struct sockaddr_in6 *)ia->ifa_addr; - if (!IN6_IS_ADDR_6TO4(&sin6->sin6_addr)) + + if (sc->srcv4_addr != INADDR_ANY) { + in4.sin_addr.s_addr = sc->srcv4_addr; + sin = &in4; + } else if ((sin = stf_getin4addr(ifp->if_softc, &in4, ifa, + STF_GETIN4_USE_CACHE)) == NULL) continue; - bcopy(GET_V4(&sin6->sin6_addr), &in, sizeof(in)); - LIST_FOREACH(ia4, INADDR_HASH(in.s_addr), ia_hash) - if (ia4->ia_addr.sin_addr.s_addr == in.s_addr) + LIST_FOREACH(ia4, INADDR_HASH(sin->sin_addr.s_addr), ia_hash) + if (ia4->ia_addr.sin_addr.s_addr == sin->sin_addr.s_addr) break; if (ia4 == NULL) continue; - ifa_ref(ia); +#if STF_DEBUG > 3 + { + char buf[INET6_ADDRSTRLEN + 1]; + memset(&buf, 0, sizeof(buf)); + + ip6_sprintf(buf, &((struct sockaddr_in6 *)ifa->ifa_addr)->sin6_addr); + DEBUG_PRINTF(1, "%s: ifa->ifa_addr->sin6_addr = %s\n", + __func__, buf); + ip_sprintf(buf, &ia4->ia_addr.sin_addr); + DEBUG_PRINTF(1, "%s: ia4->ia_addr.sin_addr = %s\n", + __func__, buf); + } +#endif + + ifa_ref(ifa); if_addr_runlock(ifp); - return (struct in6_ifaddr *)ia; + return (ifatoia6(ifa)); } if_addr_runlock(ifp); - return NULL; + return (NULL); } static int @@ -419,8 +558,8 @@ stf_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, struct stf_softc *sc; const struct sockaddr_in6 *dst6; struct route *cached_route; - struct in_addr in4; - const void *ptr; + struct sockaddr_in *sin; + struct sockaddr_in in4; struct sockaddr_in *dst4; u_int8_t tos; struct ip *ip; @@ -472,20 +611,33 @@ stf_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, /* * Pickup the right outer dst addr from the list of candidates. * ip6_dst has priority as it may be able to give us shorter IPv4 hops. + * ip6_dst: destination addr in the packet header. + * dst6: destination addr specified in function argument. */ - ptr = NULL; - if (IN6_IS_ADDR_6TO4(&ip6->ip6_dst)) - ptr = GET_V4(&ip6->ip6_dst); - else if (IN6_IS_ADDR_6TO4(&dst6->sin6_addr)) - ptr = GET_V4(&dst6->sin6_addr); - else { - ifa_free(&ia6->ia_ifa); - m_freem(m); - ifp->if_oerrors++; - return ENETUNREACH; + DEBUG_PRINTF(1, "%s: dst addr selection\n", __func__); + sin = stf_getin4addr_in6(sc, &in4, &ia6->ia_ifa, &ip6->ip6_dst); + if (sin == NULL) { + if (sc->dstv4_addr != INADDR_ANY) + in4.sin_addr.s_addr = sc->dstv4_addr; + else { + sin = stf_getin4addr_in6(sc, &in4, &ia6->ia_ifa, &dst6->sin6_addr); + if (sin == NULL) { + ifa_free(&ia6->ia_ifa); + m_freem(m); + ifp->if_oerrors++; + return ENETUNREACH; + } + } } - bcopy(ptr, &in4, sizeof(in4)); +#if STF_DEBUG > 3 + { + char buf[INET6_ADDRSTRLEN + 1]; + memset(&buf, 0, sizeof(buf)); + ip_sprintf(buf, &in4.sin_addr); + DEBUG_PRINTF(1, "%s: ip_dst = %s\n", __func__, buf); + } +#endif if (bpf_peers_present(ifp->if_bpf)) { /* * We need to prepend the address family as @@ -507,11 +659,30 @@ stf_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, ip = mtod(m, struct ip *); bzero(ip, sizeof(*ip)); + bcopy(&in4.sin_addr, &ip->ip_dst, sizeof(ip->ip_dst)); - bcopy(GET_V4(&((struct sockaddr_in6 *)&ia6->ia_addr)->sin6_addr), - &ip->ip_src, sizeof(ip->ip_src)); + if (sc->srcv4_addr != INADDR_ANY) + in4.sin_addr.s_addr = sc->srcv4_addr; + else { + sin = stf_getin4addr_sin6(sc, &in4, &ia6->ia_ifa, &ia6->ia_addr); + if (sin == NULL) { + ifa_free(&ia6->ia_ifa); + m_freem(m); + ifp->if_oerrors++; + return ENETUNREACH; + } + } + bcopy(&in4.sin_addr, &ip->ip_src, sizeof(ip->ip_src)); +#if STF_DEBUG > 3 + { + char buf[INET6_ADDRSTRLEN + 1]; + memset(&buf, 0, sizeof(buf)); + + ip_sprintf(buf, &ip->ip_src); + DEBUG_PRINTF(1, "%s: ip_src = %s\n", __func__, buf); + } +#endif ifa_free(&ia6->ia_ifa); - bcopy(&in4, &ip->ip_dst, sizeof(ip->ip_dst)); ip->ip_p = IPPROTO_IPV6; ip->ip_ttl = ip_stf_ttl; ip->ip_len = htons(m->m_pkthdr.len); @@ -520,7 +691,7 @@ stf_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, else ip_ecn_ingress(ECN_NOCARE, &ip->ip_tos, &tos); - if (!stf_route_cache) { + if (!V_stf_route_cache) { cached_route = NULL; goto sendit; } @@ -528,7 +699,7 @@ stf_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, /* * Do we have a cached route? */ - mtx_lock(&(sc)->sc_ro_mtx); + STF_LOCK(sc); dst4 = (struct sockaddr_in *)&sc->sc_ro.ro_dst; if (dst4->sin_family != AF_INET || bcmp(&dst4->sin_addr, &ip->ip_dst, sizeof(ip->ip_dst)) != 0) { @@ -546,8 +717,15 @@ stf_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, rtalloc_fib(&sc->sc_ro, sc->sc_fibnum); if (sc->sc_ro.ro_rt == NULL) { m_freem(m); - mtx_unlock(&(sc)->sc_ro_mtx); ifp->if_oerrors++; + STF_UNLOCK(sc); + return ENETUNREACH; + } + if (sc->sc_ro.ro_rt->rt_ifp == ifp) { + /* infinite loop detection */ + m_free(m); + ifp->if_oerrors++; + STF_UNLOCK(sc); return ENETUNREACH; } } @@ -556,35 +734,33 @@ stf_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, sendit: M_SETFIB(m, sc->sc_fibnum); ifp->if_opackets++; + DEBUG_PRINTF(1, "%s: ip_output dispatch.\n", __func__); error = ip_output(m, NULL, cached_route, 0, NULL, NULL); if (cached_route != NULL) - mtx_unlock(&(sc)->sc_ro_mtx); - return error; + STF_UNLOCK(sc); + + return (error); } static int -isrfc1918addr(in) - struct in_addr *in; +isrfc1918addr(struct in_addr *in) { /* * returns 1 if private address range: * 10.0.0.0/8 172.16.0.0/12 192.168.0.0/16 */ if (stf_permit_rfc1918 == 0 && ( - (ntohl(in->s_addr) & 0xff000000) >> 24 == 10 || - (ntohl(in->s_addr) & 0xfff00000) >> 16 == 172 * 256 + 16 || - (ntohl(in->s_addr) & 0xffff0000) >> 16 == 192 * 256 + 168)) + (ntohl(in->s_addr) & 0xff000000) == 10 << 24 || + (ntohl(in->s_addr) & 0xfff00000) == (172 * 256 + 16) << 16 || + (ntohl(in->s_addr) & 0xffff0000) == (192 * 256 + 168) << 16 )) return 1; return 0; } static int -stf_checkaddr4(sc, in, inifp) - struct stf_softc *sc; - struct in_addr *in; - struct ifnet *inifp; /* incoming interface */ +stf_checkaddr4(struct stf_softc *sc, struct in_addr *in, struct ifnet *inifp) { struct in_ifaddr *ia4; @@ -600,13 +776,6 @@ stf_checkaddr4(sc, in, inifp) } /* - * reject packets with private address range. - * (requirement from RFC3056 section 2 1st paragraph) - */ - if (isrfc1918addr(in)) - return -1; - - /* * reject packets with broadcast */ IN_IFADDR_RLOCK(); @@ -629,7 +798,7 @@ stf_checkaddr4(sc, in, inifp) bzero(&sin, sizeof(sin)); sin.sin_family = AF_INET; - sin.sin_len = sizeof(struct sockaddr_in); + sin.sin_len = sizeof(sin); sin.sin_addr = *in; rt = rtalloc1_fib((struct sockaddr *)&sin, 0, 0UL, sc->sc_fibnum); @@ -650,10 +819,7 @@ stf_checkaddr4(sc, in, inifp) } static int -stf_checkaddr6(sc, in6, inifp) - struct stf_softc *sc; - struct in6_addr *in6; - struct ifnet *inifp; /* incoming interface */ +stf_checkaddr6(struct stf_softc *sc, struct in6_addr *in6, struct ifnet *inifp) { /* * check 6to4 addresses @@ -677,9 +843,7 @@ stf_checkaddr6(sc, in6, inifp) } void -in_stf_input(m, off) - struct mbuf *m; - int off; +in_stf_input(struct mbuf *m, int off) { int proto; struct stf_softc *sc; @@ -687,6 +851,7 @@ in_stf_input(m, off) struct ip6_hdr *ip6; u_int8_t otos, itos; struct ifnet *ifp; + struct route_in6 rin6; proto = mtod(m, struct ip *)->ip_p; @@ -710,6 +875,17 @@ in_stf_input(m, off) mac_ifnet_create_mbuf(ifp, m); #endif +#if STF_DEBUG > 3 + { + char buf[INET6_ADDRSTRLEN + 1]; + memset(&buf, 0, sizeof(buf)); + + ip_sprintf(buf, &ip->ip_dst); + DEBUG_PRINTF(1, "%s: ip->ip_dst = %s\n", __func__, buf); + ip_sprintf(buf, &ip->ip_src); + DEBUG_PRINTF(1, "%s: ip->ip_src = %s\n", __func__, buf); + } +#endif /* * perform sanity check against outer src/dst. * for source, perform ingress filter as well. @@ -730,6 +906,17 @@ in_stf_input(m, off) } ip6 = mtod(m, struct ip6_hdr *); +#if STF_DEBUG > 3 + { + char buf[INET6_ADDRSTRLEN + 1]; + memset(&buf, 0, sizeof(buf)); + + ip6_sprintf(buf, &ip6->ip6_dst); + DEBUG_PRINTF(1, "%s: ip6->ip6_dst = %s\n", __func__, buf); + ip6_sprintf(buf, &ip6->ip6_src); + DEBUG_PRINTF(1, "%s: ip6->ip6_src = %s\n", __func__, buf); + } +#endif /* * perform sanity check against inner src/dst. * for source, perform ingress filter as well. @@ -740,6 +927,41 @@ in_stf_input(m, off) return; } + /* + * reject packets with private address range. + * (requirement from RFC3056 section 2 1st paragraph) + */ + if ((IN6_IS_ADDR_6TO4(&ip6->ip6_src) && isrfc1918addr(&ip->ip_src)) || + (IN6_IS_ADDR_6TO4(&ip6->ip6_dst) && isrfc1918addr(&ip->ip_dst))) { + m_freem(m); + return; + } + + /* + * Ignore if the destination is the same stf interface because + * all of valid IPv6 outgoing traffic should go interfaces + * except for it. + */ + memset(&rin6, 0, sizeof(rin6)); + rin6.ro_dst.sin6_len = sizeof(rin6.ro_dst); + rin6.ro_dst.sin6_family = AF_INET6; + memcpy(&rin6.ro_dst.sin6_addr, &ip6->ip6_dst, + sizeof(rin6.ro_dst.sin6_addr)); + rtalloc((struct route *)&rin6); + if (rin6.ro_rt == NULL) { + DEBUG_PRINTF(1, "%s: no IPv6 dst. Ignored.\n", __func__); + m_free(m); + return; + } + if ((rin6.ro_rt->rt_ifp == ifp) && + (!IN6_ARE_ADDR_EQUAL(&ip6->ip6_src, &rin6.ro_dst.sin6_addr))) { + DEBUG_PRINTF(1, "%s: IPv6 dst is the same stf. Ignored.\n", __func__); + RTFREE(rin6.ro_rt); + m_free(m); + return; + } + RTFREE(rin6.ro_rt); + itos = (ntohl(ip6->ip6_flow) >> 20) & 0xff; if ((ifp->if_flags & IFF_LINK1) != 0) ip_ecn_egress(ECN_ALLOWED, &otos, &itos); @@ -749,7 +971,7 @@ in_stf_input(m, off) ip6->ip6_flow |= htonl((u_int32_t)itos << 20); m->m_pkthdr.rcvif = ifp; - + if (bpf_peers_present(ifp->if_bpf)) { /* * We need to prepend the address family as @@ -762,6 +984,7 @@ in_stf_input(m, off) bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m); } + DEBUG_PRINTF(1, "%s: netisr_dispatch(NETISR_IPV6)\n", __func__); /* * Put the packet to the network layer input queue according to the * specified address family. @@ -776,48 +999,363 @@ in_stf_input(m, off) /* ARGSUSED */ static void -stf_rtrequest(cmd, rt, info) - int cmd; - struct rtentry *rt; - struct rt_addrinfo *info; +stf_rtrequest(int cmd, struct rtentry *rt, struct rt_addrinfo *info) { + RT_LOCK_ASSERT(rt); rt->rt_mtu = rt->rt_ifp->if_mtu; } +static struct sockaddr_in * +stf_getin4addr_in6(struct stf_softc *sc, struct sockaddr_in *sin, + struct ifaddr *ifa, + const struct in6_addr *in6) +{ + struct sockaddr_in6 sin6; + + DEBUG_PRINTF(1, "%s: enter.\n", __func__); + if (ifa == NULL || in6 == NULL) + return NULL; + + memset(&sin6, 0, sizeof(sin6)); + memcpy(&sin6.sin6_addr, in6, sizeof(sin6.sin6_addr)); + sin6.sin6_len = sizeof(sin6); + sin6.sin6_family = AF_INET6; + + return(stf_getin4addr_sin6(sc, sin, ifa, &sin6)); +} + +static struct sockaddr_in * +stf_getin4addr_sin6(struct stf_softc *sc, struct sockaddr_in *sin, + struct ifaddr *ifa, + struct sockaddr_in6 *sin6) +{ + struct in6_ifaddr ia6; + int i; + + DEBUG_PRINTF(1, "%s: enter.\n", __func__); + if (ifa == NULL || sin6 == NULL) + return NULL; + + memset(&ia6, 0, sizeof(ia6)); + memcpy(&ia6, ifatoia6(ifa), sizeof(ia6)); + + /* + * Use prefixmask information from ifa, and + * address information from sin6. + */ + ia6.ia_addr.sin6_family = AF_INET6; + ia6.ia_ifa.ifa_addr = (struct sockaddr *)&ia6.ia_addr; + ia6.ia_ifa.ifa_dstaddr = NULL; + ia6.ia_ifa.ifa_netmask = (struct sockaddr *)&ia6.ia_prefixmask; + +#if STF_DEBUG > 3 + { + char buf[INET6_ADDRSTRLEN + 1]; + memset(&buf, 0, sizeof(buf)); + + ip6_sprintf(buf, &sin6->sin6_addr); + DEBUG_PRINTF(1, "%s: sin6->sin6_addr = %s\n", __func__, buf); + ip6_sprintf(buf, &ia6.ia_addr.sin6_addr); + DEBUG_PRINTF(1, "%s: ia6.ia_addr.sin6_addr = %s\n", __func__, buf); + ip6_sprintf(buf, &ia6.ia_prefixmask.sin6_addr); + DEBUG_PRINTF(1, "%s: ia6.ia_prefixmask.sin6_addr = %s\n", __func__, buf); + } +#endif + + /* + * When (src addr & src mask) != (dst (sin6) addr & src mask), + * the dst is not in the 6rd domain. The IPv4 address must + * not be used. + */ + for (i = 0; i < sizeof(ia6.ia_addr.sin6_addr); i++) { + if ((((u_char *)&ia6.ia_addr.sin6_addr)[i] & + ((u_char *)&ia6.ia_prefixmask.sin6_addr)[i]) + != + (((u_char *)&sin6->sin6_addr)[i] & + ((u_char *)&ia6.ia_prefixmask.sin6_addr)[i])) + return NULL; + } + + /* After the mask check, overwrite ia6.ia_addr with sin6. */ + memcpy(&ia6.ia_addr, sin6, sizeof(ia6.ia_addr)); + return(stf_getin4addr(sc, sin, (struct ifaddr *)&ia6, 0)); +} + +static struct sockaddr_in * +stf_getin4addr(struct stf_softc *sc, struct sockaddr_in *sin, + struct ifaddr *ifa, + int flags) +{ + struct in_addr *in; + struct sockaddr_in6 *sin6; + struct in6_ifaddr *ia6; + + DEBUG_PRINTF(1, "%s: enter.\n", __func__); + if (ifa == NULL || + ifa->ifa_addr == NULL || + ifa->ifa_addr->sa_family != AF_INET6) + return NULL; + + sin6 = satosin6(ifa->ifa_addr); + ia6 = ifatoia6(ifa); + + if ((flags & STF_GETIN4_USE_CACHE) && + (ifa->ifa_dstaddr != NULL) && + (ifa->ifa_dstaddr->sa_family == AF_INET)) { + /* + * XXX: ifa_dstaddr is used as a cache of the + * extracted IPv4 address. + */ + memcpy(sin, satosin(ifa->ifa_dstaddr), sizeof(*sin)); + +#if STF_DEBUG > 3 + { + char tmpbuf[INET6_ADDRSTRLEN + 1]; + memset(&tmpbuf, 0, INET6_ADDRSTRLEN); + + ip_sprintf(tmpbuf, &sin->sin_addr); + DEBUG_PRINTF(1, "%s: cached address was used = %s\n", __func__, tmpbuf); + } +#endif + return (sin); + } + + memset(sin, 0, sizeof(*sin)); + in = &sin->sin_addr; + +#if STF_DEBUG > 3 + { + char tmpbuf[INET6_ADDRSTRLEN + 1]; + memset(&tmpbuf, 0, INET6_ADDRSTRLEN); + + ip6_sprintf(tmpbuf, &sin6->sin6_addr); + DEBUG_PRINTF(1, "%s: sin6->sin6_addr = %s\n", __func__, tmpbuf); + } +#endif + + if (IN6_IS_ADDR_6TO4(&sin6->sin6_addr)) { + /* 6to4 (RFC 3056) */ + bcopy(GET_V4(&sin6->sin6_addr), in, sizeof(*in)); + if (isrfc1918addr(in)) + return NULL; + } else { + /* 6rd (RFC 5569) */ + struct in6_addr buf; + u_char *p = (u_char *)&buf; + u_char *q = (u_char *)&in->s_addr; + u_int residue = 0, v4residue = 0; + u_char mask, v4mask = 0; + int i, j; + u_int plen, loop; + + /* + * 6rd-relays IPv6 prefix is located at a 32-bit just + * after the prefix edge. + */ + plen = in6_mask2len(&satosin6(ifa->ifa_netmask)->sin6_addr, NULL); + if (64 < plen) { + DEBUG_PRINTF(1, "prefixlen is %d\n", plen); + return NULL; + } + + loop = 4; /* Normal 6rd operation */ + memcpy(&buf, &sin6->sin6_addr, sizeof(buf)); + if (sc->v4prefixlen != 0 && sc->v4prefixlen != 32) { + v4residue = sc->v4prefixlen % 8; + } + p += plen / 8; + //plen -= 32; + + residue = plen % 8; + mask = ((u_char)(-1) >> (8 - residue)); + if (v4residue) { + loop++; + v4mask = ((u_char)(-1) << v4residue); + } + /* + * The p points head of the IPv4 address part in + * bytes. The residue is a bit-shift factor when + * prefixlen is not a multiple of 8. + */ + DEBUG_PRINTF(2, "residue = %d 0x%x\n", residue, mask); + for (j = 0, i = (loop - (sc->v4prefixlen / 8)); i < loop; j++, i++) { + if (residue) { + q[i] = ((p[j] & mask) << (8 - residue)); + q[i] |= ((p[j + 1] >> residue) & mask); + DEBUG_PRINTF(2, "FINAL i = %d q[%d] - p[%d/%d] %x\n", + i, q[i], p[j], p[j + 1] >> residue, q[i]); + } else { + q[i] = p[j]; + DEBUG_PRINTF(2, "FINAL q[%d] - p[%d] %x\n", + q[i], p[j], q[i]); + } + } + if (v4residue) { + q[loop - (sc->v4prefixlen / 8)] &= v4mask; + + if (sc->v4prefixlen > 0 && sc->v4prefixlen < 32) + in->s_addr |= sc->inaddr; + } + + //if (in->s_addr != sc->srcv4_addr) + // printf("Wrong decoded address %x/%x!!!!\n", in->s_addr, sc->srcv4_addr); + } + +#if STF_DEBUG > 3 + { + char tmpbuf[INET6_ADDRSTRLEN + 1]; + memset(&tmpbuf, 0, INET_ADDRSTRLEN); + + ip_sprintf(tmpbuf, in); + DEBUG_PRINTF(1, "%s: in->in_addr = %s\n", __func__, tmpbuf); + DEBUG_PRINTF(1, "%s: leave\n", __func__); + } +#endif + + if (flags & STF_GETIN4_USE_CACHE) { + DEBUG_PRINTF(1, "%s: try to access ifa->ifa_dstaddr.\n", __func__); + ifa->ifa_dstaddr = (struct sockaddr *)&ia6->ia_dstaddr; + DEBUG_PRINTF(1, "%s: try to memset 0 to ia_dstaddr.\n", __func__); + memset(&ia6->ia_dstaddr, 0, sizeof(ia6->ia_dstaddr)); + DEBUG_PRINTF(1, "%s: try to memcpy ifa->ifa_dstaddr.\n", __func__); + memcpy((struct sockaddr_in *)ifa->ifa_dstaddr, + sin, sizeof(struct sockaddr_in)); + DEBUG_PRINTF(1, "%s: try to set sa_family.\n", __func__); + ifa->ifa_dstaddr->sa_family = AF_INET; + DEBUG_PRINTF(1, "%s: in->in_addr is stored in ifa_dstaddr.\n", + __func__); + } + + return (sin); +} + + static int -stf_ioctl(ifp, cmd, data) - struct ifnet *ifp; - u_long cmd; - caddr_t data; +stf_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { + struct stf_softc *sc, *sc_cur; struct ifaddr *ifa; struct ifreq *ifr; - struct sockaddr_in6 *sin6; - struct in_addr addr; + struct sockaddr_in in4; + struct stfv4args args; + struct in6_ifaddr *ia6; + struct ifdrv *ifd; int error, mtu; error = 0; + sc_cur = ifp->if_softc; + switch (cmd) { + case SIOCSDRVSPEC: + ifd = (struct ifdrv *) data; + error = priv_check(curthread, PRIV_NET_ADDIFADDR); + if (error) + break; + if (ifd->ifd_cmd == STF_SV4NET) { + if (ifd->ifd_len != sizeof(args)) { + error = EINVAL; + break; + } + mtx_lock(&stf_mtx); + LIST_FOREACH(sc, &V_stf_softc_list, stf_list) { + if (sc == sc_cur) + continue; + if (sc->inaddr == 0 || sc->v4prefixlen == 0) + continue; + + if ((ntohl(sc->inaddr) & ((uint32_t)(-1) << sc_cur->v4prefixlen)) == ntohl(sc_cur->inaddr)) { + error = EEXIST; + mtx_unlock(&stf_mtx); + return (error); + } + if ((ntohl(sc_cur->inaddr) & ((uint32_t)(-1) << sc->v4prefixlen)) == ntohl(sc->inaddr)) { + error = EEXIST; + mtx_unlock(&stf_mtx); + return (error); + } + } + mtx_unlock(&stf_mtx); + bzero(&args, sizeof args); + error = copyin(ifd->ifd_data, &args, ifd->ifd_len); + if (error) + break; + + sc_cur->srcv4_addr = args.inaddr.s_addr; + sc_cur->inaddr = ntohl(args.inaddr.s_addr); + sc_cur->inaddr &= ((uint32_t)(-1) << args.prefix); + sc_cur->inaddr = htonl(sc_cur->inaddr); + sc_cur->v4prefixlen = args.prefix; + if (sc_cur->v4prefixlen == 0) + sc_cur->v4prefixlen = 32; + } else if (ifd->ifd_cmd == STF_SDSTV4) { + if (ifd->ifd_len != sizeof(args)) { + error = EINVAL; + break; + } + bzero(&args, sizeof args); + error = copyin(ifd->ifd_data, &args, ifd->ifd_len); + if (error) + break; + sc_cur->dstv4_addr = args.dstv4_addr.s_addr; + } else + error = EINVAL; + break; + case SIOCGDRVSPEC: + ifd = (struct ifdrv *) data; + if (ifd->ifd_len != sizeof(args)) { + error = EINVAL; + break; + } + if (ifd->ifd_cmd != STF_GV4NET) { + error = EINVAL; + break; + } + bzero(&args, sizeof args); + args.inaddr.s_addr = sc_cur->srcv4_addr; + args.dstv4_addr.s_addr = sc_cur->dstv4_addr; + args.prefix = sc_cur->v4prefixlen; + error = copyout(&args, ifd->ifd_data, ifd->ifd_len); + + break; case SIOCSIFADDR: ifa = (struct ifaddr *)data; if (ifa == NULL || ifa->ifa_addr->sa_family != AF_INET6) { error = EAFNOSUPPORT; break; } - sin6 = (struct sockaddr_in6 *)ifa->ifa_addr; - if (!IN6_IS_ADDR_6TO4(&sin6->sin6_addr)) { + if (stf_getin4addr(sc_cur, &in4, ifa, 0) == NULL) { error = EINVAL; break; } - bcopy(GET_V4(&sin6->sin6_addr), &addr, sizeof(addr)); - if (isrfc1918addr(&addr)) { - error = EINVAL; - break; + /* + * Sanity check: if more than two interfaces have IFF_UP, do + * if_down() for all of them except for the specified one. + */ + mtx_lock(&stf_mtx); + LIST_FOREACH(sc, &V_stf_softc_list, stf_list) { + if (sc == sc_cur) + continue; + if ((ia6 = stf_getsrcifa6(sc->sc_ifp)) == NULL) + continue; + if (IN6_ARE_ADDR_EQUAL(&ia6->ia_addr.sin6_addr, &ifatoia6(ifa)->ia_addr.sin6_addr)) { + error = EEXIST; + ifa_free(&ia6->ia_ifa); + break; + } + ifa_free(&ia6->ia_ifa); } + mtx_unlock(&stf_mtx); + /* + * XXX: ifa_dstaddr is used as a cache of the + * extracted IPv4 address. + */ + if (ifa->ifa_dstaddr != NULL) + ifa->ifa_dstaddr->sa_family = AF_UNSPEC; ifa->ifa_rtrequest = stf_rtrequest; ifp->if_flags |= IFF_UP; + ifp->if_drv_flags |= IFF_DRV_RUNNING; break; case SIOCADDMULTI: @@ -847,4 +1385,5 @@ stf_ioctl(ifp, cmd, data) } return error; + } diff --git a/sys/net/if_stf.h b/sys/net/if_stf.h index cbaf670..e6ff29e 100644 --- a/sys/net/if_stf.h +++ b/sys/net/if_stf.h @@ -33,6 +33,15 @@ #ifndef _NET_IF_STF_H_ #define _NET_IF_STF_H_ +struct stfv4args { + struct in_addr inaddr; + struct in_addr dstv4_addr; + int prefix; +}; + +#define STF_SV4NET 1 +#define STF_GV4NET 2 +#define STF_SDSTV4 3 void in_stf_input(struct mbuf *, int); #endif /* _NET_IF_STF_H_ */ diff --git a/sys/net/if_tun.c b/sys/net/if_tun.c index 262d6d2..9864278 100644 --- a/sys/net/if_tun.c +++ b/sys/net/if_tun.c @@ -52,6 +52,7 @@ #include <net/vnet.h> #ifdef INET #include <netinet/in.h> +#include <netinet/ip.h> #endif #include <net/bpf.h> #include <net/if_tun.h> @@ -110,6 +111,7 @@ static const char tunname[] = "tun"; static MALLOC_DEFINE(M_TUN, tunname, "Tunnel Interface"); static int tundebug = 0; static int tundclone = 1; +static int tundispatch = 1; static struct clonedevs *tunclones; static TAILQ_HEAD(,tun_softc) tunhead = TAILQ_HEAD_INITIALIZER(tunhead); SYSCTL_INT(_debug, OID_AUTO, if_tun_debug, CTLFLAG_RW, &tundebug, 0, ""); @@ -119,6 +121,8 @@ static SYSCTL_NODE(_net_link, OID_AUTO, tun, CTLFLAG_RW, 0, "IP tunnel software network interface."); SYSCTL_INT(_net_link_tun, OID_AUTO, devfs_cloning, CTLFLAG_RW, &tundclone, 0, "Enable legacy devfs interface creation."); +SYSCTL_INT(_net_link_tun, OID_AUTO, tun_dispatching, CTLFLAG_RW, &tundispatch, 0, + "Queue rather than direct dispatch on write."); TUNABLE_INT("net.link.tun.devfs_cloning", &tundclone); @@ -328,31 +332,28 @@ static void tunstart(struct ifnet *ifp) { struct tun_softc *tp = ifp->if_softc; - struct mbuf *m; TUNDEBUG(ifp,"%s starting\n", ifp->if_xname); - if (ALTQ_IS_ENABLED(&ifp->if_snd)) { - IFQ_LOCK(&ifp->if_snd); - IFQ_POLL_NOLOCK(&ifp->if_snd, m); - if (m == NULL) { - IFQ_UNLOCK(&ifp->if_snd); - return; - } - IFQ_UNLOCK(&ifp->if_snd); - } + if (IFQ_IS_EMPTY(&ifp->if_snd)) + return; + + ifp->if_drv_flags |= IFF_DRV_OACTIVE; - mtx_lock(&tp->tun_mtx); if (tp->tun_flags & TUN_RWAIT) { tp->tun_flags &= ~TUN_RWAIT; wakeup(tp); } - selwakeuppri(&tp->tun_rsel, PZERO + 1); - KNOTE_LOCKED(&tp->tun_rsel.si_note, 0); - if (tp->tun_flags & TUN_ASYNC && tp->tun_sigio) { + if (!TAILQ_EMPTY(&tp->tun_rsel.si_tdlist)) + selwakeuppri(&tp->tun_rsel, PZERO + 1); + if (!KNLIST_EMPTY(&tp->tun_rsel.si_note)) { + mtx_lock(&tp->tun_mtx); + KNOTE_LOCKED(&tp->tun_rsel.si_note, 0); mtx_unlock(&tp->tun_mtx); + } + if (tp->tun_flags & TUN_ASYNC && tp->tun_sigio) pgsigio(&tp->tun_sigio, SIGIO, 0); - } else - mtx_unlock(&tp->tun_mtx); + + ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; } /* XXX: should return an error code so it can fail. */ @@ -590,9 +591,7 @@ tunoutput(struct ifnet *ifp, struct mbuf *m0, const struct sockaddr *dst, #endif /* Could be unlocked read? */ - mtx_lock(&tp->tun_mtx); cached_tun_flags = tp->tun_flags; - mtx_unlock(&tp->tun_mtx); if ((cached_tun_flags & TUN_READY) != TUN_READY) { TUNDEBUG (ifp, "not ready 0%o\n", tp->tun_flags); m_freem (m0); @@ -799,9 +798,7 @@ tunread(struct cdev *dev, struct uio *uio, int flag) int error=0, len; TUNDEBUG (ifp, "read\n"); - mtx_lock(&tp->tun_mtx); if ((tp->tun_flags & TUN_READY) != TUN_READY) { - mtx_unlock(&tp->tun_mtx); TUNDEBUG (ifp, "not ready 0%o\n", tp->tun_flags); return (EHOSTDOWN); } @@ -812,19 +809,19 @@ tunread(struct cdev *dev, struct uio *uio, int flag) IFQ_DEQUEUE(&ifp->if_snd, m); if (m == NULL) { if (flag & O_NONBLOCK) { - mtx_unlock(&tp->tun_mtx); return (EWOULDBLOCK); } + mtx_lock(&tp->tun_mtx); tp->tun_flags |= TUN_RWAIT; error = mtx_sleep(tp, &tp->tun_mtx, PCATCH | (PZERO + 1), "tunread", 0); + tp->tun_flags &= ~TUN_RWAIT; + mtx_unlock(&tp->tun_mtx); if (error != 0) { - mtx_unlock(&tp->tun_mtx); return (error); } } } while (m == NULL); - mtx_unlock(&tp->tun_mtx); while (m && uio->uio_resid > 0 && error == 0) { len = min(uio->uio_resid, m->m_len); @@ -849,6 +846,7 @@ tunwrite(struct cdev *dev, struct uio *uio, int flag) struct tun_softc *tp = dev->si_drv1; struct ifnet *ifp = TUN2IFP(tp); struct mbuf *m; + struct ip *ip; uint32_t family; int isr; @@ -876,18 +874,26 @@ tunwrite(struct cdev *dev, struct uio *uio, int flag) mac_ifnet_create_mbuf(ifp, m); #endif - /* Could be unlocked read? */ - mtx_lock(&tp->tun_mtx); + /* XXX: unlocked read? */ if (tp->tun_flags & TUN_IFHEAD) { - mtx_unlock(&tp->tun_mtx); if (m->m_len < sizeof(family) && (m = m_pullup(m, sizeof(family))) == NULL) return (ENOBUFS); family = ntohl(*mtod(m, u_int32_t *)); m_adj(m, sizeof(family)); } else { - mtx_unlock(&tp->tun_mtx); - family = AF_INET; + if (m->m_len < sizeof(struct ip) && + (m = m_pullup(m, sizeof(struct ip))) == NULL) + return (ENOBUFS); + ip = mtod(m, struct ip *); + if (ip->ip_v == IPVERSION) + family = AF_INET; + else if (ip->ip_v == AF_INET6) + family = AF_INET6; + else { + m_freem(m); + return (EINVAL); + } } BPF_MTAP2(ifp, &family, sizeof(family), m); @@ -923,7 +929,10 @@ tunwrite(struct cdev *dev, struct uio *uio, int flag) ifp->if_ipackets++; CURVNET_SET(ifp->if_vnet); M_SETFIB(m, ifp->if_fib); - netisr_dispatch(isr, m); + if (tundispatch) + netisr_queue(isr, m); + else + netisr_dispatch(isr, m); CURVNET_RESTORE(); return (0); } @@ -939,21 +948,17 @@ tunpoll(struct cdev *dev, int events, struct thread *td) struct tun_softc *tp = dev->si_drv1; struct ifnet *ifp = TUN2IFP(tp); int revents = 0; - struct mbuf *m; TUNDEBUG(ifp, "tunpoll\n"); if (events & (POLLIN | POLLRDNORM)) { - IFQ_LOCK(&ifp->if_snd); - IFQ_POLL_NOLOCK(&ifp->if_snd, m); - if (m != NULL) { - TUNDEBUG(ifp, "tunpoll q=%d\n", ifp->if_snd.ifq_len); - revents |= events & (POLLIN | POLLRDNORM); - } else { + if (IFQ_IS_EMPTY(&ifp->if_snd)) { TUNDEBUG(ifp, "tunpoll waiting\n"); selrecord(td, &tp->tun_rsel); + } else { + TUNDEBUG(ifp, "tunpoll q=%d\n", ifp->if_snd.ifq_len); + revents |= events & (POLLIN | POLLRDNORM); } - IFQ_UNLOCK(&ifp->if_snd); } if (events & (POLLOUT | POLLWRNORM)) revents |= events & (POLLOUT | POLLWRNORM); diff --git a/sys/net/if_vlan.c b/sys/net/if_vlan.c index 6a43e37..2de6829 100644 --- a/sys/net/if_vlan.c +++ b/sys/net/if_vlan.c @@ -1,5 +1,9 @@ /*- * Copyright 1998 Massachusetts Institute of Technology + * Copyright 2012 ADARA Networks, Inc. + * + * Portions of this software were developed by Robert N. M. Watson under + * contract to ADARA Networks, Inc. * * Permission to use, copy, modify, and distribute this software and * its documentation for any purpose and without fee is hereby @@ -51,6 +55,7 @@ __FBSDID("$FreeBSD$"); #include <sys/mbuf.h> #include <sys/module.h> #include <sys/rwlock.h> +#include <sys/priv.h> #include <sys/queue.h> #include <sys/socket.h> #include <sys/sockio.h> @@ -112,6 +117,8 @@ struct ifvlan { int ifvm_mintu; /* min transmission unit */ uint16_t ifvm_proto; /* encapsulation ethertype */ uint16_t ifvm_tag; /* tag to apply on packets leaving if */ + uint16_t ifvm_vid; /* VLAN ID */ + uint8_t ifvm_pcp; /* Priority Code Point (PCP). */ } ifv_mib; SLIST_HEAD(, vlan_mc_entry) vlan_mc_listhead; #ifndef VLAN_ARRAY @@ -119,7 +126,9 @@ struct ifvlan { #endif }; #define ifv_proto ifv_mib.ifvm_proto -#define ifv_vid ifv_mib.ifvm_tag +#define ifv_tag ifv_mib.ifvm_tag +#define ifv_vid ifv_mib.ifvm_vid +#define ifv_pcp ifv_mib.ifvm_pcp #define ifv_encaplen ifv_mib.ifvm_encaplen #define ifv_mtufudge ifv_mib.ifvm_mtufudge #define ifv_mintu ifv_mib.ifvm_mintu @@ -144,6 +153,15 @@ static int soft_pad = 0; SYSCTL_INT(_net_link_vlan, OID_AUTO, soft_pad, CTLFLAG_RW, &soft_pad, 0, "pad short frames before tagging"); +/* + * For now, make preserving PCP via an mbuf tag optional, as it increases + * per-packet memory allocations and frees. In the future, it would be + * preferable to reuse ether_vtag for this, or similar. + */ +static int vlan_mtag_pcp = 0; +SYSCTL_INT(_net_link_vlan, OID_AUTO, mtag_pcp, CTLFLAG_RW, &vlan_mtag_pcp, 0, + "Retain VLAN PCP information as packets are passed up the stack"); + static const char vlanname[] = "vlan"; static MALLOC_DEFINE(M_VLAN, vlanname, "802.1Q Virtual LAN Interface"); @@ -188,15 +206,14 @@ static __inline struct ifvlan * vlan_gethash(struct ifvlantrunk *trunk, #endif static void trunk_destroy(struct ifvlantrunk *trunk); +static void vlan_start(struct ifnet *ifp); static void vlan_init(void *foo); static void vlan_input(struct ifnet *ifp, struct mbuf *m); static int vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr); -static void vlan_qflush(struct ifnet *ifp); static int vlan_setflag(struct ifnet *ifp, int flag, int status, int (*func)(struct ifnet *, int)); static int vlan_setflags(struct ifnet *ifp, int status); static int vlan_setmulti(struct ifnet *ifp); -static int vlan_transmit(struct ifnet *ifp, struct mbuf *m); static void vlan_unconfig(struct ifnet *ifp); static void vlan_unconfig_locked(struct ifnet *ifp, int departing); static int vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t tag); @@ -693,6 +710,16 @@ vlan_devat(struct ifnet *ifp, uint16_t vid) } /* + * Recalculate the cached VLAN tag exposed via the MIB. + */ +static void +vlan_tag_recalculate(struct ifvlan *ifv) +{ + + ifv->ifv_tag = EVL_MAKETAG(ifv->ifv_vid, ifv->ifv_pcp, 0); +} + +/* * VLAN support can be loaded as a module. The only place in the * system that's intimately aware of this is ether_input. We hook * into this code through vlan_input_p which is defined there and @@ -958,9 +985,12 @@ vlan_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params) /* NB: mtu is not set here */ ifp->if_init = vlan_init; - ifp->if_transmit = vlan_transmit; - ifp->if_qflush = vlan_qflush; + ifp->if_start = vlan_start; ifp->if_ioctl = vlan_ioctl; + + IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); + ifp->if_snd.ifq_drv_maxlen = 0; + IFQ_SET_READY(&ifp->if_snd); ifp->if_flags = VLAN_IFFLAGS; ether_ifattach(ifp, eaddr); /* Now undo some of the damage... */ @@ -1001,6 +1031,8 @@ vlan_clone_destroy(struct if_clone *ifc, struct ifnet *ifp) struct ifvlan *ifv = ifp->if_softc; int unit = ifp->if_dunit; + IFQ_PURGE(&ifp->if_snd); + ether_ifdetach(ifp); /* first, remove it from system-wide lists */ vlan_unconfig(ifp); /* now it can be unconfigured and freed */ if_free(ifp); @@ -1021,97 +1053,114 @@ vlan_init(void *foo __unused) /* * The if_transmit method for vlan(4) interface. */ -static int -vlan_transmit(struct ifnet *ifp, struct mbuf *m) +static void +vlan_start(struct ifnet *ifp) { struct ifvlan *ifv; struct ifnet *p; + struct m_tag *mtag; + struct mbuf *m; + uint16_t tag; int error, len, mcast; + if (ALTQ_IS_ENABLED(&ifp->if_snd)) { + IFQ_LOCK(&ifp->if_snd); + IFQ_POLL_NOLOCK(&ifp->if_snd, m); + if (m == NULL) { + IFQ_UNLOCK(&ifp->if_snd); + return; + } + IFQ_UNLOCK(&ifp->if_snd); + } + ifv = ifp->if_softc; p = PARENT(ifv); - len = m->m_pkthdr.len; - mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1 : 0; - - BPF_MTAP(ifp, m); - /* - * Do not run parent's if_transmit() if the parent is not up, - * or parent's driver will cause a system crash. - */ - if (!UP_AND_RUNNING(p)) { - m_freem(m); - ifp->if_oerrors++; - return (ENETDOWN); - } + for (;;) { + IFQ_DEQUEUE(&ifp->if_snd, m); + if (m == NULL) + break; - /* - * Pad the frame to the minimum size allowed if told to. - * This option is in accord with IEEE Std 802.1Q, 2003 Ed., - * paragraph C.4.4.3.b. It can help to work around buggy - * bridges that violate paragraph C.4.4.3.a from the same - * document, i.e., fail to pad short frames after untagging. - * E.g., a tagged frame 66 bytes long (incl. FCS) is OK, but - * untagging it will produce a 62-byte frame, which is a runt - * and requires padding. There are VLAN-enabled network - * devices that just discard such runts instead or mishandle - * them somehow. - */ - if (soft_pad && p->if_type == IFT_ETHER) { - static char pad[8]; /* just zeros */ - int n; + len = m->m_pkthdr.len; + mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1 : 0; - for (n = ETHERMIN + ETHER_HDR_LEN - m->m_pkthdr.len; - n > 0; n -= sizeof(pad)) - if (!m_append(m, min(n, sizeof(pad)), pad)) - break; + BPF_MTAP(ifp, m); - if (n > 0) { - if_printf(ifp, "cannot pad short frame\n"); + /* + * Do not run parent's if_transmit() if the parent is not up, + * or parent's driver will cause a system crash. + */ + if (!UP_AND_RUNNING(p)) { ifp->if_oerrors++; - m_freem(m); - return (0); + return; } - } - /* - * If underlying interface can do VLAN tag insertion itself, - * just pass the packet along. However, we need some way to - * tell the interface where the packet came from so that it - * knows how to find the VLAN tag to use, so we attach a - * packet tag that holds it. - */ - if (p->if_capenable & IFCAP_VLAN_HWTAGGING) { - m->m_pkthdr.ether_vtag = ifv->ifv_vid; - m->m_flags |= M_VLANTAG; - } else { - m = ether_vlanencap(m, ifv->ifv_vid); - if (m == NULL) { - if_printf(ifp, "unable to prepend VLAN header\n"); - ifp->if_oerrors++; - return (0); + /* + * Pad the frame to the minimum size allowed if told to. + * This option is in accord with IEEE Std 802.1Q, 2003 Ed., + * paragraph C.4.4.3.b. It can help to work around buggy + * bridges that violate paragraph C.4.4.3.a from the same + * document, i.e., fail to pad short frames after untagging. + * E.g., a tagged frame 66 bytes long (incl. FCS) is OK, but + * untagging it will produce a 62-byte frame, which is a runt + * and requires padding. There are VLAN-enabled network + * devices that just discard such runts instead or mishandle + * them somehow. + */ + if (soft_pad && p->if_type == IFT_ETHER) { + static char pad[8]; /* just zeros */ + int n; + + for (n = ETHERMIN + ETHER_HDR_LEN - m->m_pkthdr.len; + n > 0; n -= sizeof(pad)) + if (!m_append(m, min(n, sizeof(pad)), pad)) + break; + + if (n > 0) { + if_printf(ifp, "cannot pad short frame\n"); + ifp->if_oerrors++; + m_freem(m); + return; + } } - } - /* - * Send it, precisely as ether_output() would have. - */ - error = (p->if_transmit)(p, m); - if (!error) { - ifp->if_opackets++; - ifp->if_omcasts += mcast; - ifp->if_obytes += len; - } else - ifp->if_oerrors++; - return (error); -} + /* + * If underlying interface can do VLAN tag insertion itself, + * just pass the packet along. However, we need some way to + * tell the interface where the packet came from so that it + * knows how to find the VLAN tag to use, so we attach a + * packet tag that holds it. + */ + if (vlan_mtag_pcp && (mtag = m_tag_locate(m, MTAG_8021Q, + MTAG_8021Q_PCP_OUT, NULL)) != NULL) + tag = EVL_MAKETAG(ifv->ifv_vid, *(uint8_t *)(mtag + 1), 0); + else + tag = ifv->ifv_tag; + if (p->if_capenable & IFCAP_VLAN_HWTAGGING) { + m->m_pkthdr.ether_vtag = tag; + m->m_flags |= M_VLANTAG; + } else { + m = ether_vlanencap(m, tag); + if (m == NULL) { + if_printf(ifp, "unable to prepend VLAN header\n"); + ifp->if_oerrors++; + return; + } + } -/* - * The ifp->if_qflush entry point for vlan(4) is a no-op. - */ -static void -vlan_qflush(struct ifnet *ifp __unused) -{ + /* + * Send it, precisely as ether_output() would have. + */ + error = (p->if_transmit)(p, m); + if (!error) { + ifp->if_opackets++; + //ifp->if_omcasts += mcast; + //ifp->if_obytes += len; + } else + ifp->if_oerrors++; + } + + return; } static void @@ -1119,7 +1168,8 @@ vlan_input(struct ifnet *ifp, struct mbuf *m) { struct ifvlantrunk *trunk = ifp->if_vlantrunk; struct ifvlan *ifv; - uint16_t vid; + struct m_tag *mtag; + uint16_t vid, tag; KASSERT(trunk != NULL, ("%s: no trunk", __func__)); @@ -1128,7 +1178,7 @@ vlan_input(struct ifnet *ifp, struct mbuf *m) * Packet is tagged, but m contains a normal * Ethernet frame; the tag is stored out-of-band. */ - vid = EVL_VLANOFTAG(m->m_pkthdr.ether_vtag); + tag = m->m_pkthdr.ether_vtag; m->m_flags &= ~M_VLANTAG; } else { struct ether_vlan_header *evl; @@ -1144,7 +1194,7 @@ vlan_input(struct ifnet *ifp, struct mbuf *m) return; } evl = mtod(m, struct ether_vlan_header *); - vid = EVL_VLANOFTAG(ntohs(evl->evl_tag)); + tag = ntohs(evl->evl_tag); /* * Remove the 802.1q header by copying the Ethernet @@ -1168,6 +1218,8 @@ vlan_input(struct ifnet *ifp, struct mbuf *m) } } + vid = EVL_VLANOFTAG(tag); + TRUNK_RLOCK(trunk); ifv = vlan_gethash(trunk, vid); if (ifv == NULL || !UP_AND_RUNNING(ifv->ifv_ifp)) { @@ -1178,6 +1230,28 @@ vlan_input(struct ifnet *ifp, struct mbuf *m) } TRUNK_RUNLOCK(trunk); + if (vlan_mtag_pcp) { + /* + * While uncommon, it is possible that we will find a 802.1q + * packet encapsulated inside another packet that also had an + * 802.1q header. For example, ethernet tunneled over IPSEC + * arriving over ethernet. In that case, we replace the + * existing 802.1q PCP m_tag value. + */ + mtag = m_tag_locate(m, MTAG_8021Q, MTAG_8021Q_PCP_IN, NULL); + if (mtag == NULL) { + mtag = m_tag_alloc(MTAG_8021Q, MTAG_8021Q_PCP_IN, + sizeof(uint8_t), M_NOWAIT); + if (mtag == NULL) { + m_freem(m); + ifp->if_ierrors++; + return; + } + m_tag_prepend(m, mtag); + } + *(uint8_t *)(mtag + 1) = EVL_PRIOFTAG(tag); + } + m->m_pkthdr.rcvif = ifv->ifv_ifp; ifv->ifv_ifp->if_ipackets++; @@ -1195,7 +1269,7 @@ vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t vid) /* VID numbers 0x0 and 0xFFF are reserved */ if (vid == 0 || vid == 0xFFF) return (EINVAL); - if (p->if_type != IFT_ETHER && + if (p->if_type != IFT_ETHER && p->if_type != IFT_BRIDGE && (p->if_capenable & IFCAP_VLAN_HWTAGGING) == 0) return (EPROTONOSUPPORT); if ((p->if_flags & VLAN_IFFLAGS) != VLAN_IFFLAGS) @@ -1226,6 +1300,8 @@ exists: } ifv->ifv_vid = vid; /* must set this before vlan_inshash() */ + ifv->ifv_pcp = 0; /* Default: best effort delivery. */ + vlan_tag_recalculate(ifv); error = vlan_inshash(trunk, ifv); if (error) goto done; @@ -1721,6 +1797,34 @@ vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) } break; + case SIOCGVLANPCP: +#ifdef VIMAGE + if (ifp->if_vnet != ifp->if_home_vnet) { + error = EPERM; + break; + } +#endif + ifr->ifr_vlan_pcp = ifv->ifv_pcp; + break; + + case SIOCSVLANPCP: +#ifdef VIMAGE + if (ifp->if_vnet != ifp->if_home_vnet) { + error = EPERM; + break; + } +#endif + error = priv_check(curthread, PRIV_NET_SETVLANPCP); + if (error) + break; + if (ifr->ifr_vlan_pcp > 7) { + error = EINVAL; + break; + } + ifv->ifv_pcp = ifr->ifr_vlan_pcp; + vlan_tag_recalculate(ifv); + break; + default: error = EINVAL; break; diff --git a/sys/net/if_vlan_var.h b/sys/net/if_vlan_var.h index 4eb3b09..b1950e1 100644 --- a/sys/net/if_vlan_var.h +++ b/sys/net/if_vlan_var.h @@ -89,6 +89,23 @@ struct vlanreq { #define SIOCSETVLAN SIOCSIFGENERIC #define SIOCGETVLAN SIOCGIFGENERIC +#define SIOCGVLANPCP _IOWR('i', 152, struct ifreq) /* Get VLAN PCP */ +#define SIOCSVLANPCP _IOW('i', 153, struct ifreq) /* Set VLAN PCP */ + +/* + * Names for 802.1q priorities ("802.1p"). Notice that in this scheme, + * (0 < 1), allowing default 0-tagged traffic to take priority over background + * tagged traffic. + */ +#define IEEE8021Q_PCP_BK 1 /* Background (lowest) */ +#define IEEE8021Q_PCP_BE 0 /* Best effort (default) */ +#define IEEE8021Q_PCP_EE 2 /* Excellent effort */ +#define IEEE8021Q_PCP_CA 3 /* Critical applications */ +#define IEEE8021Q_PCP_VI 4 /* Video, < 100ms latency */ +#define IEEE8021Q_PCP_VO 5 /* Video, < 10ms latency */ +#define IEEE8021Q_PCP_IC 6 /* Internetwork control */ +#define IEEE8021Q_PCP_NC 7 /* Network control (highest) */ + #ifdef _KERNEL /* * Drivers that are capable of adding and removing the VLAN header @@ -126,6 +143,16 @@ struct vlanreq { * if_capabilities. */ +/* + * The 802.1q code may also tag mbufs with the PCP (priority) field for use in + * other layers of the stack, in which case an m_tag will be used. This is + * semantically quite different from use of the ether_vtag field, which is + * defined only between the device driver and VLAN layer. + */ +#define MTAG_8021Q 1326104895 +#define MTAG_8021Q_PCP_IN 0 /* Input priority. */ +#define MTAG_8021Q_PCP_OUT 1 /* Output priority. */ + #define VLAN_CAPABILITIES(_ifp) do { \ if ((_ifp)->if_vlantrunk != NULL) \ (*vlan_trunk_cap_p)(_ifp); \ diff --git a/sys/net/netisr.c b/sys/net/netisr.c index 0cc6bb8..ef34600 100644 --- a/sys/net/netisr.c +++ b/sys/net/netisr.c @@ -126,6 +126,13 @@ static struct rmlock netisr_rmlock; static SYSCTL_NODE(_net, OID_AUTO, isr, CTLFLAG_RW, 0, "netisr"); +#ifdef DEVICE_POLLING +static int netisr_polling = 0; /* Enable Polling. */ +TUNABLE_INT("net.isr.polling_enable", &netisr_polling); +SYSCTL_INT(_net_isr, OID_AUTO, polling_enable, CTLFLAG_RW, + &netisr_polling, 0, "Enable polling"); +#endif + /*- * Three global direct dispatch policies are supported: * @@ -168,7 +175,7 @@ SYSCTL_INT(_net_isr, OID_AUTO, maxthreads, CTLFLAG_RDTUN, &netisr_maxthreads, 0, "Use at most this many CPUs for netisr processing"); -static int netisr_bindthreads = 0; /* Bind threads to CPUs. */ +static int netisr_bindthreads = 1; /* Bind threads to CPUs. */ TUNABLE_INT("net.isr.bindthreads", &netisr_bindthreads); SYSCTL_INT(_net_isr, OID_AUTO, bindthreads, CTLFLAG_RDTUN, &netisr_bindthreads, 0, "Bind netisr threads to CPUs."); @@ -796,9 +803,11 @@ swi_net(void *arg) nwsp = arg; #ifdef DEVICE_POLLING - KASSERT(nws_count == 1, - ("%s: device_polling but nws_count != 1", __func__)); - netisr_poll(); + if (netisr_polling) { + KASSERT(nws_count == 1, + ("%s: device_polling but nws_count != 1", __func__)); + netisr_poll(); + } #endif #ifdef NETISR_LOCKING NETISR_RLOCK(&tracker); @@ -823,7 +832,8 @@ out: NETISR_RUNLOCK(&tracker); #endif #ifdef DEVICE_POLLING - netisr_pollmore(); + if (netisr_polling) + netisr_pollmore(); #endif } @@ -1078,6 +1088,9 @@ netisr_sched_poll(void) { struct netisr_workstream *nwsp; + if (!netisr_polling) + return; + nwsp = DPCPU_ID_PTR(nws_array[0], nws); NWS_SIGNAL(nwsp); } @@ -1151,7 +1164,7 @@ netisr_init(void *arg) * multiple netisr threads, so for the time being compiling in device * polling disables parallel netisr workers. */ - if (netisr_maxthreads != 1 || netisr_bindthreads != 0) { + if (netisr_polling && (netisr_maxthreads != 1 || netisr_bindthreads != 0)) { printf("netisr_init: forcing maxthreads to 1 and " "bindthreads to 0 for device polling\n"); netisr_maxthreads = 1; diff --git a/sys/net/pfil.c b/sys/net/pfil.c index 44373ee..5d5d346 100644 --- a/sys/net/pfil.c +++ b/sys/net/pfil.c @@ -34,6 +34,7 @@ #include <sys/errno.h> #include <sys/lock.h> #include <sys/malloc.h> +#include <sys/sbuf.h> #include <sys/rmlock.h> #include <sys/socket.h> #include <sys/socketvar.h> @@ -78,7 +79,7 @@ pfil_run_hooks(struct pfil_head *ph, struct mbuf **mp, struct ifnet *ifp, KASSERT(ph->ph_nhooks >= 0, ("Pfil hook count dropped < 0")); for (pfh = pfil_chain_get(dir, ph); pfh != NULL; pfh = TAILQ_NEXT(pfh, pfil_chain)) { - if (pfh->pfil_func != NULL) { + if (!(pfh->pfil_flags & PFIL_DISABLED) && pfh->pfil_func != NULL) { rv = (*pfh->pfil_func)(pfh->pfil_arg, &m, ifp, dir, inp); if (rv != 0 || m == NULL) @@ -211,6 +212,140 @@ pfil_head_unregister(struct pfil_head *ph) return (0); } +static int +pfil_sysctl_handler(SYSCTL_HANDLER_ARGS) +{ + struct rm_priotracker rmpt; + struct pfil_head *ph; + struct packet_filter_hook *pfh, *pfhtmp; + struct sbuf *sb; + pfil_chain_t npfl, *pfl; + char *new_order, *elm, *parse; + int i = 0, err = 0, hintlen, reqlen; + + hintlen = 0; + + ph = (struct pfil_head *)arg1; + if (ph == NULL || !PFIL_HOOKED(ph)) { + err = SYSCTL_OUT(req, "", 2); + return (err); + } + + if (arg2 == PFIL_IN) + pfl = &ph->ph_in; + else + pfl = &ph->ph_out; + + if (TAILQ_EMPTY(pfl)) { + err = SYSCTL_OUT(req, "", 2); + return (err); + } + + /* + * NOTE: This is needed to avoid witness(4) warnings. + */ + PFIL_RLOCK(ph, &rmpt); + TAILQ_FOREACH(pfh, pfl, pfil_chain) { + if (pfh->pfil_name != NULL) + hintlen = strlen(pfh->pfil_name); + else + hintlen += 2; + } + PFIL_RUNLOCK(ph, &rmpt); + + sb = sbuf_new(NULL, NULL, hintlen + 1, SBUF_AUTOEXTEND); + if (sb == NULL) + return (EINVAL); + + PFIL_RLOCK(ph, &rmpt); + TAILQ_FOREACH(pfh, pfl, pfil_chain) { + if (i > 0) + sbuf_printf(sb, ", "); + if (pfh->pfil_name != NULL) + sbuf_printf(sb, "%s%s", pfh->pfil_name, + pfh->pfil_flags & PFIL_DISABLED ? "*" : ""); + else + sbuf_printf(sb, "%s%s", "NA", + pfh->pfil_flags & PFIL_DISABLED ? "*" : ""); + i++; + } + PFIL_RUNLOCK(ph, &rmpt); + + sbuf_finish(sb); + + /* hint for sensible write buffer sizes */ + hintlen = sbuf_len(sb) + i * 2; + err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1); + sbuf_delete(sb); + + if (err || !req->newptr) + return (err); + + if ((reqlen = req->newlen - req->newidx) > hintlen) + return (E2BIG); + new_order = malloc(reqlen + 1, M_TEMP, M_WAITOK|M_ZERO); + + err = SYSCTL_IN(req, new_order, reqlen); + if (err) + goto error; + new_order[reqlen] = '\0'; /* Just in case */ + parse = new_order; + + TAILQ_INIT(&npfl); + PFIL_WLOCK(ph); + while ((elm = strsep(&parse, " \t,")) != NULL) { + if (*elm == '\0') + continue; + TAILQ_FOREACH_SAFE(pfh, pfl, pfil_chain, pfhtmp) { + if (pfh->pfil_name != NULL) { + if (!strcmp(pfh->pfil_name, elm)) { + TAILQ_REMOVE(pfl, pfh, pfil_chain); + TAILQ_INSERT_TAIL(&npfl, pfh, pfil_chain); + pfh->pfil_flags &= ~PFIL_DISABLED; + break; + } + } else { + if (!strcmp(elm, "NA")) { + TAILQ_REMOVE(pfl, pfh, pfil_chain); + TAILQ_INSERT_TAIL(&npfl, pfh, pfil_chain); + pfh->pfil_flags &= ~PFIL_DISABLED; + break; + } + } + } + } + + TAILQ_FOREACH_SAFE(pfh, pfl, pfil_chain, pfhtmp) { + pfh->pfil_flags |= PFIL_DISABLED; + TAILQ_REMOVE(pfl, pfh, pfil_chain); + TAILQ_INSERT_TAIL(&npfl, pfh, pfil_chain); + } + + TAILQ_CONCAT(pfl, &npfl, pfil_chain); + +error: + PFIL_WUNLOCK(ph); + free(new_order, M_TEMP); + return (err); +} + +void +pfil_head_export_sysctl(struct pfil_head *ph, struct sysctl_oid_list *parent) +{ + struct sysctl_oid *root; + + root = SYSCTL_ADD_NODE(&ph->ph_clist, parent, OID_AUTO, "pfil", + CTLFLAG_RW, 0, "pfil(9) management"); + SYSCTL_ADD_PROC((void *)&ph->ph_clist, SYSCTL_CHILDREN(root), OID_AUTO, + "inbound", CTLTYPE_STRING|CTLFLAG_RW|CTLFLAG_SECURE3, + (void *)ph, PFIL_IN, pfil_sysctl_handler, "A", + "Inbound filter hooks"); + SYSCTL_ADD_PROC((void *)&ph->ph_clist, SYSCTL_CHILDREN(root), OID_AUTO, + "outbound", CTLTYPE_STRING|CTLFLAG_RW|CTLFLAG_SECURE3, + (void *)ph, PFIL_OUT, pfil_sysctl_handler, "A", + "Outbound filter hooks"); +} + /* * pfil_head_get() returns the pfil_head for a given key/dlt. */ @@ -238,6 +373,12 @@ pfil_head_get(int type, u_long val) int pfil_add_hook(pfil_func_t func, void *arg, int flags, struct pfil_head *ph) { + return (pfil_add_named_hook(func, arg, NULL, flags, ph)); +} + +int +pfil_add_named_hook(pfil_func_t func, void *arg, char *name, int flags, struct pfil_head *ph) +{ struct packet_filter_hook *pfh1 = NULL; struct packet_filter_hook *pfh2 = NULL; int err; @@ -262,6 +403,8 @@ pfil_add_hook(pfil_func_t func, void *arg, int flags, struct pfil_head *ph) if (flags & PFIL_IN) { pfh1->pfil_func = func; pfh1->pfil_arg = arg; + pfh1->pfil_name = name; + pfh1->pfil_flags &= ~PFIL_DISABLED; err = pfil_chain_add(&ph->ph_in, pfh1, flags & ~PFIL_OUT); if (err) goto locked_error; @@ -270,6 +413,8 @@ pfil_add_hook(pfil_func_t func, void *arg, int flags, struct pfil_head *ph) if (flags & PFIL_OUT) { pfh2->pfil_func = func; pfh2->pfil_arg = arg; + pfh2->pfil_name = name; + pfh2->pfil_flags &= ~PFIL_DISABLED; err = pfil_chain_add(&ph->ph_out, pfh2, flags & ~PFIL_IN); if (err) { if (flags & PFIL_IN) diff --git a/sys/net/pfil.h b/sys/net/pfil.h index c9a1b65..ff260ce 100644 --- a/sys/net/pfil.h +++ b/sys/net/pfil.h @@ -38,6 +38,7 @@ #include <sys/_mutex.h> #include <sys/lock.h> #include <sys/rmlock.h> +#include <sys/sysctl.h> struct mbuf; struct ifnet; @@ -55,11 +56,14 @@ struct packet_filter_hook { TAILQ_ENTRY(packet_filter_hook) pfil_chain; pfil_func_t pfil_func; void *pfil_arg; + int pfil_flags; + char *pfil_name; }; #define PFIL_IN 0x00000001 #define PFIL_OUT 0x00000002 #define PFIL_WAITOK 0x00000004 +#define PFIL_DISABLED 0x00000008 #define PFIL_ALL (PFIL_IN|PFIL_OUT) typedef TAILQ_HEAD(pfil_chain, packet_filter_hook) pfil_chain_t; @@ -85,6 +89,7 @@ struct pfil_head { struct rmlock ph_lock; /* Private lock storage */ int flags; #endif + struct sysctl_ctx_list ph_clist; union { u_long phu_val; void *phu_ptr; @@ -96,7 +101,9 @@ struct pfil_head { /* Public functions for pfil hook management by packet filters. */ struct pfil_head *pfil_head_get(int, u_long); +void pfil_head_export_sysctl(struct pfil_head *, struct sysctl_oid_list *); int pfil_add_hook(pfil_func_t, void *, int, struct pfil_head *); +int pfil_add_named_hook(pfil_func_t, void *, char *, int, struct pfil_head *); int pfil_remove_hook(pfil_func_t, void *, int, struct pfil_head *); #define PFIL_HOOKED(p) ((p)->ph_nhooks > 0) diff --git a/sys/net/pfkeyv2.h b/sys/net/pfkeyv2.h index c45f8b0..bab26fe 100644 --- a/sys/net/pfkeyv2.h +++ b/sys/net/pfkeyv2.h @@ -218,7 +218,6 @@ struct sadb_x_sa2 { }; /* XXX Policy Extension */ -/* sizeof(struct sadb_x_policy) == 16 */ struct sadb_x_policy { u_int16_t sadb_x_policy_len; u_int16_t sadb_x_policy_exttype; @@ -228,6 +227,8 @@ struct sadb_x_policy { u_int32_t sadb_x_policy_id; u_int32_t sadb_x_policy_reserved2; }; +_Static_assert(sizeof(struct sadb_x_policy) == 16, "struct size mismatch"); + /* * When policy_type == IPSEC, it is followed by some of * the ipsec policy request. @@ -256,31 +257,31 @@ struct sadb_x_ipsecrequest { }; /* NAT-Traversal type, see RFC 3948 (and drafts). */ -/* sizeof(struct sadb_x_nat_t_type) == 8 */ struct sadb_x_nat_t_type { u_int16_t sadb_x_nat_t_type_len; u_int16_t sadb_x_nat_t_type_exttype; u_int8_t sadb_x_nat_t_type_type; u_int8_t sadb_x_nat_t_type_reserved[3]; }; +_Static_assert(sizeof(struct sadb_x_nat_t_type) == 8, "struct size mismatch"); /* NAT-Traversal source or destination port. */ -/* sizeof(struct sadb_x_nat_t_port) == 8 */ struct sadb_x_nat_t_port { u_int16_t sadb_x_nat_t_port_len; u_int16_t sadb_x_nat_t_port_exttype; u_int16_t sadb_x_nat_t_port_port; u_int16_t sadb_x_nat_t_port_reserved; }; +_Static_assert(sizeof(struct sadb_x_nat_t_port) == 8, "struct size mismatch"); /* ESP fragmentation size. */ -/* sizeof(struct sadb_x_nat_t_frag) == 8 */ struct sadb_x_nat_t_frag { u_int16_t sadb_x_nat_t_frag_len; u_int16_t sadb_x_nat_t_frag_exttype; u_int16_t sadb_x_nat_t_frag_fraglen; u_int16_t sadb_x_nat_t_frag_reserved; }; +_Static_assert(sizeof(struct sadb_x_nat_t_frag) == 8, "struct size mismatch"); #define SADB_EXT_RESERVED 0 @@ -332,39 +333,47 @@ struct sadb_x_nat_t_frag { #define SADB_SAFLAGS_PFS 1 -/* RFC2367 numbers - meets RFC2407 */ +/* + * Though some of these numbers (both _AALG and _EALG) appear to be + * IKEv2 numbers and others original IKE numbers, they have no meaning. + * These are constants that the various IKE daemons use to tell the kernel + * what cipher to use. + * + * Do not use these constants directly to decide which Transformation ID + * to send. You are responsible for mapping them yourself. + */ #define SADB_AALG_NONE 0 #define SADB_AALG_MD5HMAC 2 #define SADB_AALG_SHA1HMAC 3 #define SADB_AALG_MAX 252 -/* private allocations - based on RFC2407/IANA assignment */ #define SADB_X_AALG_SHA2_256 5 #define SADB_X_AALG_SHA2_384 6 #define SADB_X_AALG_SHA2_512 7 #define SADB_X_AALG_RIPEMD160HMAC 8 -#define SADB_X_AALG_AES_XCBC_MAC 9 /* draft-ietf-ipsec-ciph-aes-xcbc-mac-04 */ -/* private allocations should use 249-255 (RFC2407) */ +#define SADB_X_AALG_AES_XCBC_MAC 9 /* RFC3566 */ +#define SADB_X_AALG_AES128GMAC 11 /* RFC4543 + Errata1821 */ +#define SADB_X_AALG_AES192GMAC 12 +#define SADB_X_AALG_AES256GMAC 13 #define SADB_X_AALG_MD5 249 /* Keyed MD5 */ #define SADB_X_AALG_SHA 250 /* Keyed SHA */ #define SADB_X_AALG_NULL 251 /* null authentication */ #define SADB_X_AALG_TCP_MD5 252 /* Keyed TCP-MD5 (RFC2385) */ -/* RFC2367 numbers - meets RFC2407 */ #define SADB_EALG_NONE 0 #define SADB_EALG_DESCBC 2 #define SADB_EALG_3DESCBC 3 -#define SADB_EALG_NULL 11 -#define SADB_EALG_MAX 250 -/* private allocations - based on RFC2407/IANA assignment */ #define SADB_X_EALG_CAST128CBC 6 #define SADB_X_EALG_BLOWFISHCBC 7 +#define SADB_EALG_NULL 11 #define SADB_X_EALG_RIJNDAELCBC 12 #define SADB_X_EALG_AES 12 -/* private allocations - based on RFC4312/IANA assignment */ -#define SADB_X_EALG_CAMELLIACBC 22 -/* private allocations should use 249-255 (RFC2407) */ -#define SADB_X_EALG_SKIPJACK 249 /*250*/ /* for IPSEC */ -#define SADB_X_EALG_AESCTR 250 /*249*/ /* draft-ietf-ipsec-ciph-aes-ctr-03 */ +#define SADB_X_EALG_AESCTR 13 +#define SADB_X_EALG_AESGCM8 18 /* RFC4106 */ +#define SADB_X_EALG_AESGCM12 19 +#define SADB_X_EALG_AESGCM16 20 +#define SADB_X_EALG_CAMELLIACBC 22 +#define SADB_X_EALG_AESGMAC 23 /* RFC4543 + Errata1821 */ +#define SADB_EALG_MAX 23 /* !!! keep updated !!! */ /* private allocations - based on RFC2407/IANA assignment */ #define SADB_X_CALG_NONE 0 diff --git a/sys/net/pfvar.h b/sys/net/pfvar.h index ae5ecb9..e46bb69 100644 --- a/sys/net/pfvar.h +++ b/sys/net/pfvar.h @@ -198,10 +198,11 @@ extern struct rwlock pf_rules_lock; (a)->addr32[0] == (b)->addr32[0])) \ #define PF_ANEQ(a, b, c) \ - ((a)->addr32[0] != (b)->addr32[0] || \ + ((c == AF_INET && (a)->addr32[0] != (b)->addr32[0]) || \ + (c == AF_INET6 && ((a)->addr32[0] != (b)->addr32[0] || \ (a)->addr32[1] != (b)->addr32[1] || \ (a)->addr32[2] != (b)->addr32[2] || \ - (a)->addr32[3] != (b)->addr32[3]) \ + (a)->addr32[3] != (b)->addr32[3]))) \ #define PF_AZERO(a, c) \ ((c == AF_INET && !(a)->addr32[0]) || \ @@ -326,6 +327,14 @@ struct pf_rule_gid { u_int8_t op; }; +struct pf_rule_ieee8021q_pcp { + u_int8_t pcp[2]; + u_int8_t op; +#define SETPCP_VALID 0x80 /* Set if PCP value in field is valid. */ +#define SETPCP_PCP_MASK 0x07 /* Mask to retrieve pcp if SETPCP_VALID. */ + u_int8_t setpcp; +}; + struct pf_rule_addr { struct pf_addr_wrap addr; u_int16_t port[2]; @@ -465,6 +474,13 @@ struct pf_osfp_ioctl { int fp_getnum; /* DIOCOSFPGET number */ }; +struct pf_rule_actions { + u_int16_t qid; + u_int16_t pqid; + u_int32_t pdnpipe; + u_int32_t dnpipe; + u_int8_t flags; +}; union pf_rule_ptr { struct pf_rule *ptr; @@ -488,6 +504,7 @@ struct pf_rule { union pf_rule_ptr skip[PF_SKIP_COUNT]; #define PF_RULE_LABEL_SIZE 64 char label[PF_RULE_LABEL_SIZE]; + char schedule[PF_RULE_LABEL_SIZE]; char ifname[IFNAMSIZ]; char qname[PF_QNAME_SIZE]; char pqname[PF_QNAME_SIZE]; @@ -520,12 +537,21 @@ struct pf_rule { u_int32_t limit; u_int32_t seconds; } max_src_conn_rate; - u_int32_t qid; - u_int32_t pqid; + u_int16_t qid; + u_int16_t pqid; + u_int32_t dnpipe; + u_int32_t pdnpipe; +#define PFRULE_DN_IS_PIPE 0x00000010 +#define PFRULE_DN_IS_QUEUE 0x00000020 + u_int32_t free_flags; u_int32_t rt_listid; u_int32_t nr; u_int32_t prob; +#ifdef PF_USER_INFO uid_t cuid; +#else + u_int32_t cuid; +#endif pid_t cpid; counter_u64_t states_cur; @@ -566,6 +592,29 @@ struct pf_rule { u_int8_t allow_opts; u_int8_t rt; u_int8_t return_ttl; + +#ifndef DSCP_EF +/* Copied from altq_cdnr.h */ +/* diffserve code points */ +#define DSCP_MASK 0xfc +#define DSCP_CUMASK 0x03 +#define DSCP_VA 0xb0 +#define DSCP_EF 0xb8 +#define DSCP_AF11 0x28 +#define DSCP_AF12 0x30 +#define DSCP_AF13 0x38 +#define DSCP_AF21 0x48 +#define DSCP_AF22 0x50 +#define DSCP_AF23 0x58 +#define DSCP_AF31 0x68 +#define DSCP_AF32 0x70 +#define DSCP_AF33 0x78 +#define DSCP_AF41 0x88 +#define DSCP_AF42 0x90 +#define DSCP_AF43 0x98 +#define AF_CLASSMASK 0xe0 +#define AF_DROPPRECMASK 0x18 +#endif u_int8_t tos; u_int8_t set_tos; u_int8_t anchor_relative; @@ -580,6 +629,8 @@ struct pf_rule { u_int16_t port; } divert; + struct pf_rule_ieee8021q_pcp ieee8021q_pcp; + uint64_t u_states_cur; uint64_t u_states_tot; uint64_t u_src_nodes; @@ -604,6 +655,13 @@ struct pf_rule { #define PFRULE_REASSEMBLE_TCP 0x1000 #define PFRULE_SET_TOS 0x2000 +/* rule flags for TOS or DSCP differentiation */ +#define PFRULE_TOS 0x2000 +#define PFRULE_DSCP 0x4000 + +/* rule flags for handling ALTQ hashing required by certain disciplines */ +#define PFRULE_ALTQ_HASH 0x8000 + /* rule flags again */ #define PFRULE_IFBOUND 0x00010000 /* if-bound */ #define PFRULE_STATESLOPPY 0x00020000 /* sloppy state tracking */ @@ -708,7 +766,13 @@ struct pf_state { u_int64_t id; u_int32_t creatorid; u_int8_t direction; - u_int8_t pad[3]; + u_int8_t pad[2]; + u_int8_t local_flags; +#define PFSTATE_DIVERT_ALTQ 0x10 +#define PFSTATE_DIVERT_DNCOOKIE 0x20 +#define PFSTATE_DIVERT_ACTION 0x40 +#define PFSTATE_DIVERT_TAG 0x80 +#define PFSTATE_DIVERT_MASK 0xFF00 u_int refs; TAILQ_ENTRY(pf_state) sync_list; @@ -730,7 +794,12 @@ struct pf_state { u_int32_t creation; u_int32_t expire; u_int32_t pfsync_time; + u_int16_t qid; + u_int16_t pqid; + u_int32_t pdnpipe; + u_int32_t dnpipe; u_int16_t tag; + u_int16_t divert_cookie; u_int8_t log; u_int8_t state_flags; #define PFSTATE_ALLOWOPTS 0x01 @@ -743,7 +812,7 @@ struct pf_state { /* XXX */ u_int8_t sync_updates; - u_int8_t _tail[3]; + u_int8_t _tail; }; /* @@ -1079,11 +1148,13 @@ struct pfi_kif { #define PFI_IFLAG_SKIP 0x0100 /* skip filtering on interface */ struct pf_pdesc { +#ifdef PF_USER_INFO struct { int done; uid_t uid; gid_t gid; } lookup; +#endif u_int64_t tot_len; /* Make Mickey money */ union { struct tcphdr *tcp; @@ -1101,6 +1172,7 @@ struct pf_pdesc { u_int16_t *sport; u_int16_t *dport; struct pf_mtag *pf_mtag; + struct pf_rule_actions act; u_int32_t p_len; /* total length of payload */ @@ -1252,6 +1324,11 @@ struct pfioc_state_kill { u_int psk_killed; }; +struct pfioc_schedule_kill { + int numberkilled; + char schedule[PF_RULE_LABEL_SIZE]; +}; + struct pfioc_states { int ps_len; union { @@ -1330,6 +1407,9 @@ struct pfioc_trans { #ifdef _KERNEL #define PFR_FLAG_USERIOCTL 0x10000000 #endif +#define DIOCGETNAMEDALTQ _IOWR('D', 94, struct pfioc_ruleset) +#define DIOCGETNAMEDTAG _IOR('D', 95, u_int32_t) +#define DIOCKILLSCHEDULE _IOWR('D', 96, struct pfioc_schedule_kill) struct pfioc_table { struct pfr_table pfrio_table; @@ -1594,6 +1674,8 @@ int pf_match_addr(u_int8_t, struct pf_addr *, struct pf_addr *, int pf_match_addr_range(struct pf_addr *, struct pf_addr *, struct pf_addr *, sa_family_t); int pf_match_port(u_int8_t, u_int16_t, u_int16_t, u_int16_t); +int pf_match_ieee8021q_pcp(u_int8_t, u_int8_t, u_int8_t, struct mbuf *); +int pf_ieee8021q_setpcp(struct mbuf *m, struct pf_rule *r); void pf_normalize_init(void); void pf_normalize_cleanup(void); diff --git a/sys/netgraph/ng_base.c b/sys/netgraph/ng_base.c index cc2d2d6..693b3ac 100644 --- a/sys/netgraph/ng_base.c +++ b/sys/netgraph/ng_base.c @@ -64,6 +64,10 @@ #include <sys/unistd.h> #include <machine/cpu.h> +#include <sys/socket.h> +#include <net/if.h> +#include <net/if_var.h> + #include <net/netisr.h> #include <net/vnet.h> @@ -240,6 +244,8 @@ int ng_path_parse(char *addr, char **node, char **path, char **hook); void ng_rmnode(node_p node, hook_p dummy1, void *dummy2, int dummy3); void ng_unname(node_p node); +extern void (*ng_ether_attach_p)(struct ifnet *ifp); + /* Our own netgraph malloc type */ MALLOC_DEFINE(M_NETGRAPH, "netgraph", "netgraph structures and ctrl messages"); MALLOC_DEFINE(M_NETGRAPH_MSG, "netgraph_msg", "netgraph name storage"); @@ -574,6 +580,13 @@ static const struct ng_cmdlist ng_generic_cmds[] = { &ng_parse_ng_mesg_type, &ng_parse_ng_mesg_type }, + { + NGM_GENERIC_COOKIE, + NGM_ETHER_ATTACH, + "attach", + &ng_parse_string_type, + NULL + }, { 0 } }; @@ -2908,6 +2921,17 @@ ng_generic_msg(node_p here, item_p item, hook_p lasthook) break; } + case NGM_ETHER_ATTACH: + { + struct ifnet *ifp; + ifp = ifunit((char *)msg->data); + if (ifp && ng_ether_attach_p != NULL) { + ng_ether_attach_p(ifp); + } + + break; + } + case NGM_TEXT_CONFIG: case NGM_TEXT_STATUS: /* diff --git a/sys/netgraph/ng_eiface.c b/sys/netgraph/ng_eiface.c index 0f471bb..24a8daa 100644 --- a/sys/netgraph/ng_eiface.c +++ b/sys/netgraph/ng_eiface.c @@ -43,6 +43,7 @@ #include <net/if.h> #include <net/if_media.h> #include <net/if_types.h> +#include <net/if_dl.h> #include <net/netisr.h> #include <net/route.h> #include <net/vnet.h> @@ -66,6 +67,13 @@ static const struct ng_cmdlist ng_eiface_cmdlist[] = { }, { NGM_EIFACE_COOKIE, + NGM_EIFACE_SET_IFNAME, + "setifname", + &ng_parse_string_type, + NULL + }, + { + NGM_EIFACE_COOKIE, NGM_EIFACE_SET, "set", &ng_parse_enaddr_type, @@ -470,6 +478,11 @@ ng_eiface_rcvmsg(node_p node, item_p item, hook_p lasthook) struct ng_mesg *resp = NULL; int error = 0; struct ng_mesg *msg; + char *new_name; + size_t namelen, onamelen; + struct sockaddr_dl *sdl = NULL; + struct ifaddr *ifa = NULL; + node_p ethernode; NGI_GET_MSG(item, msg); switch (msg->header.typecookie) { @@ -496,6 +509,46 @@ ng_eiface_rcvmsg(node_p node, item_p item, hook_p lasthook) } strlcpy(resp->data, ifp->if_xname, IFNAMSIZ); break; + case NGM_EIFACE_SET_IFNAME: + new_name = (char *)msg->data; + + /* Deny request if interface is UP */ + if ((ifp->if_flags & IFF_UP) != 0) { + error = EBUSY; + break; + } + + EVENTHANDLER_INVOKE(ifnet_departure_event, ifp); + + ethernode = ng_name2noderef(node, ifp->if_xname); + if (ethernode != NULL) + ng_name_node(ethernode, new_name); + + strlcpy(ifp->if_xname, new_name, sizeof(ifp->if_xname)); + ifa = ifp->if_addr; + IFA_LOCK(ifa); + sdl = (struct sockaddr_dl *)ifa->ifa_addr; + namelen = strlen(new_name) + 1; + onamelen = sdl->sdl_nlen; + /* + * Move the address if needed. This is safe because we + * allocate space for a name of length IFNAMSIZ when we + * create this in if_attach(). + */ + if (namelen != onamelen) { + bcopy(sdl->sdl_data + onamelen, + sdl->sdl_data + namelen, sdl->sdl_alen); + } + bcopy(new_name, sdl->sdl_data, namelen); + sdl->sdl_nlen = namelen; + sdl = (struct sockaddr_dl *)ifa->ifa_netmask; + bzero(sdl->sdl_data, onamelen); + while (namelen != 0) + sdl->sdl_data[--namelen] = 0xff; + IFA_UNLOCK(ifa); + + EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp); + break; case NGM_EIFACE_GET_IFADDRS: { diff --git a/sys/netgraph/ng_eiface.h b/sys/netgraph/ng_eiface.h index 6fc1c5b..9f1509b 100644 --- a/sys/netgraph/ng_eiface.h +++ b/sys/netgraph/ng_eiface.h @@ -54,6 +54,7 @@ enum { NGM_EIFACE_GET_IFNAME = 1, /* get the interface name */ NGM_EIFACE_GET_IFADDRS, /* returns list of addresses */ NGM_EIFACE_SET, /* set ethernet address */ + NGM_EIFACE_SET_IFNAME, }; #endif /* _NETGRAPH_NG_EIFACE_H_ */ diff --git a/sys/netgraph/ng_iface.c b/sys/netgraph/ng_iface.c index 6c18d2a..94c8c05c 100644 --- a/sys/netgraph/ng_iface.c +++ b/sys/netgraph/ng_iface.c @@ -70,9 +70,11 @@ #include <sys/socket.h> #include <sys/syslog.h> #include <sys/libkern.h> +#include <sys/ctype.h> #include <net/if.h> #include <net/if_types.h> +#include <net/if_dl.h> #include <net/bpf.h> #include <net/netisr.h> #include <net/route.h> @@ -166,6 +168,13 @@ static const struct ng_cmdlist ng_iface_cmds[] = { }, { NGM_IFACE_COOKIE, + NGM_IFACE_SET_IFNAME, + "setifname", + &ng_parse_string_type, + NULL + }, + { + NGM_IFACE_COOKIE, NGM_IFACE_POINT2POINT, "point2point", NULL, @@ -608,6 +617,10 @@ ng_iface_rcvmsg(node_p node, item_p item, hook_p lasthook) struct ng_mesg *resp = NULL; int error = 0; struct ng_mesg *msg; + char *new_name; + size_t namelen, onamelen; + struct sockaddr_dl *sdl = NULL; + struct ifaddr *ifa = NULL; NGI_GET_MSG(item, msg); switch (msg->header.typecookie) { @@ -622,6 +635,49 @@ ng_iface_rcvmsg(node_p node, item_p item, hook_p lasthook) strlcpy(resp->data, ifp->if_xname, IFNAMSIZ); break; + case NGM_IFACE_SET_IFNAME: + + new_name = (char *)msg->data; + /* Announce the departure of the interface. */ + //new_name[strlen(new_name)] = '\0'; + + /* Deny request if interface is UP */ + if ((ifp->if_flags & IFF_UP) != 0) { + error = EBUSY; + break; + } + + //rt_ifannouncemsg(ifp, IFAN_DEPARTURE); + EVENTHANDLER_INVOKE(ifnet_departure_event, ifp); + + strlcpy(ifp->if_xname, new_name, sizeof(ifp->if_xname)); + ifa = ifp->if_addr; + IFA_LOCK(ifa); + sdl = (struct sockaddr_dl *)ifa->ifa_addr; + namelen = strlen(new_name) + 1; + onamelen = sdl->sdl_nlen; + /* + * Move the address if needed. This is safe because we + * allocate space for a name of length IFNAMSIZ when we + * create this in if_attach(). + */ + if (namelen != onamelen) { + bcopy(sdl->sdl_data + onamelen, + sdl->sdl_data + namelen, sdl->sdl_alen); + } + bcopy(new_name, sdl->sdl_data, namelen); + sdl->sdl_nlen = namelen; + sdl = (struct sockaddr_dl *)ifa->ifa_netmask; + bzero(sdl->sdl_data, onamelen); + while (namelen != 0) + sdl->sdl_data[--namelen] = 0xff; + IFA_UNLOCK(ifa); + + EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp); + /* Announce the return of the interface. */ + //rt_ifannouncemsg(ifp, IFAN_ARRIVAL); + break; + case NGM_IFACE_POINT2POINT: case NGM_IFACE_BROADCAST: { @@ -699,9 +755,11 @@ ng_iface_rcvmsg(node_p node, item_p item, hook_p lasthook) switch (msg->header.cmd) { case NGM_LINK_IS_UP: ifp->if_drv_flags |= IFF_DRV_RUNNING; + //if_link_state_change(ifp, LINK_STATE_UP); break; case NGM_LINK_IS_DOWN: ifp->if_drv_flags &= ~IFF_DRV_RUNNING; + //if_link_state_change(ifp, LINK_STATE_DOWN); break; default: break; diff --git a/sys/netgraph/ng_iface.h b/sys/netgraph/ng_iface.h index 58fb442..3c843c4 100644 --- a/sys/netgraph/ng_iface.h +++ b/sys/netgraph/ng_iface.h @@ -70,6 +70,7 @@ enum { NGM_IFACE_POINT2POINT, NGM_IFACE_BROADCAST, NGM_IFACE_GET_IFINDEX, + NGM_IFACE_SET_IFNAME, }; #define MTAG_NGIF NGM_IFACE_COOKIE diff --git a/sys/netgraph/ng_message.h b/sys/netgraph/ng_message.h index da531f0..d17ce46 100644 --- a/sys/netgraph/ng_message.h +++ b/sys/netgraph/ng_message.h @@ -138,6 +138,7 @@ enum { NGM_ASCII2BINARY= (13|NGM_READONLY|NGM_HASREPLY), /* (optional) Get/set text config. */ NGM_TEXT_CONFIG = 14, + NGM_ETHER_ATTACH = 15, }; /* diff --git a/sys/netinet/if_ether.c b/sys/netinet/if_ether.c index 78d2f43..3a9d262 100644 --- a/sys/netinet/if_ether.c +++ b/sys/netinet/if_ether.c @@ -80,6 +80,8 @@ SYSCTL_DECL(_net_link_ether); static SYSCTL_NODE(_net_link_ether, PF_INET, inet, CTLFLAG_RW, 0, ""); static SYSCTL_NODE(_net_link_ether, PF_ARP, arp, CTLFLAG_RW, 0, ""); +static VNET_DEFINE(int, arp_carp_mac) = 0; /* default to disabled */ + /* timer values */ static VNET_DEFINE(int, arpt_keep) = (20*60); /* once resolved, good for 20 * minutes */ @@ -98,12 +100,16 @@ VNET_PCPUSTAT_SYSUNINIT(arpstat); static VNET_DEFINE(int, arp_maxhold) = 1; +#define V_arp_carp_mac VNET(arp_carp_mac) #define V_arpt_keep VNET(arpt_keep) #define V_arpt_down VNET(arpt_down) #define V_arp_maxtries VNET(arp_maxtries) #define V_arp_proxyall VNET(arp_proxyall) #define V_arp_maxhold VNET(arp_maxhold) +SYSCTL_VNET_INT(_net_link_ether_inet, OID_AUTO, carp_mac, CTLFLAG_RW, + &VNET_NAME(arp_carp_mac), 0, + "Send CARP mac with replies to CARP ips"); SYSCTL_VNET_INT(_net_link_ether_inet, OID_AUTO, max_age, CTLFLAG_RW, &VNET_NAME(arpt_keep), 0, "ARP entry lifetime in seconds"); @@ -909,6 +915,29 @@ reply: /* default behaviour; never reply by broadcast. */ m->m_flags &= ~(M_BCAST|M_MCAST); } +#ifdef DEV_CARP + if (V_arp_carp_mac && carp_match) { + struct ether_header *eh = (struct ether_header *) sa.sa_data; + short type = htons(ETHERTYPE_ARP); + + ah->ar_hrd = htons(ARPHRD_ETHER); + + (void)memcpy(&eh->ether_type, &type, + sizeof(eh->ether_type)); + (void)memcpy(eh->ether_dhost, ar_tha(ah), + sizeof (eh->ether_dhost)); + (void)memcpy(eh->ether_shost, enaddr, + sizeof(eh->ether_shost)); + + sa.sa_family = pseudo_AF_HDRCMPLT; + sa.sa_len = sizeof(sa); + } else { +#endif + sa.sa_family = AF_ARP; + sa.sa_len = 2; +#ifdef DEV_CARP + } +#endif (void)memcpy(ar_tpa(ah), ar_spa(ah), ah->ar_pln); (void)memcpy(ar_spa(ah), &itaddr, ah->ar_pln); ah->ar_op = htons(ARPOP_REPLY); @@ -916,8 +945,6 @@ reply: m->m_len = sizeof(*ah) + (2 * ah->ar_pln) + (2 * ah->ar_hln); m->m_pkthdr.len = m->m_len; m->m_pkthdr.rcvif = NULL; - sa.sa_family = AF_ARP; - sa.sa_len = 2; m_clrprotoflags(m); /* Avoid confusing lower layers. */ (*ifp->if_output)(ifp, m, &sa, NULL); ARPSTAT_INC(txreplies); diff --git a/sys/netinet/in.c b/sys/netinet/in.c index b61b4b6..fde54be 100644 --- a/sys/netinet/in.c +++ b/sys/netinet/in.c @@ -315,6 +315,7 @@ in_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp, * Security checks before we get involved in any work. */ switch (cmd) { + case SIOCORDERIFADDR: case SIOCAIFADDR: if (td != NULL) { error = priv_check(td, PRIV_NET_ADDIFADDR); @@ -374,6 +375,7 @@ in_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp, error = 0; switch (cmd) { + case SIOCORDERIFADDR: case SIOCAIFADDR: case SIOCDIFADDR: if (ifra->ifra_addr.sin_family == AF_INET) { @@ -399,10 +401,17 @@ in_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp, goto out; } } - if (cmd == SIOCDIFADDR && ia == NULL) { + if ((cmd == SIOCDIFADDR || cmd == SIOCORDERIFADDR) && ia == NULL) { error = EADDRNOTAVAIL; goto out; } + if (cmd == SIOCORDERIFADDR && ia != NULL) { + IF_ADDR_WLOCK(ifp); + TAILQ_REMOVE(&ifp->if_addrhead, &ia->ia_ifa, ifa_link); + TAILQ_INSERT_AFTER(&ifp->if_addrhead, TAILQ_FIRST(&ifp->if_addrhead), &ia->ia_ifa, ifa_link); + IF_ADDR_WUNLOCK(ifp); + goto out; + } if (ia == NULL) { ia = (struct in_ifaddr *) malloc(sizeof *ia, M_IFADDR, M_NOWAIT | diff --git a/sys/netinet/in_var.h b/sys/netinet/in_var.h index 8657dbb..511ba26 100644 --- a/sys/netinet/in_var.h +++ b/sys/netinet/in_var.h @@ -452,7 +452,7 @@ int in_scrubprefix(struct in_ifaddr *, u_int); void ip_input(struct mbuf *); int in_ifadown(struct ifaddr *ifa, int); void in_ifscrub(struct ifnet *, struct in_ifaddr *, u_int); -struct mbuf *ip_fastforward(struct mbuf *); +struct mbuf *ip_tryforward(struct mbuf *); void *in_domifattach(struct ifnet *); void in_domifdetach(struct ifnet *, void *); diff --git a/sys/netinet/ip_carp.c b/sys/netinet/ip_carp.c index c5fea16..f85a322 100644 --- a/sys/netinet/ip_carp.c +++ b/sys/netinet/ip_carp.c @@ -115,7 +115,6 @@ struct carp_softc { int sc_sendad_success; #define CARP_SENDAD_MIN_SUCCESS 3 - int sc_init_counter; uint64_t sc_counter; /* authentication */ @@ -143,7 +142,7 @@ struct carp_if { struct ip6_moptions cif_im6o; #endif struct ifnet *cif_ifp; - struct mtx cif_mtx; + struct rwlock cif_mtx; }; #define CARP_INET 0 @@ -247,18 +246,19 @@ SYSCTL_VNET_PCPUSTAT(_net_inet_carp, OID_AUTO, stats, struct carpstats, #define CARP_LOCK_ASSERT(sc) mtx_assert(&(sc)->sc_mtx, MA_OWNED) #define CARP_LOCK(sc) mtx_lock(&(sc)->sc_mtx) #define CARP_UNLOCK(sc) mtx_unlock(&(sc)->sc_mtx) -#define CIF_LOCK_INIT(cif) mtx_init(&(cif)->cif_mtx, "carp_if", \ - NULL, MTX_DEF) -#define CIF_LOCK_DESTROY(cif) mtx_destroy(&(cif)->cif_mtx) -#define CIF_LOCK_ASSERT(cif) mtx_assert(&(cif)->cif_mtx, MA_OWNED) -#define CIF_LOCK(cif) mtx_lock(&(cif)->cif_mtx) -#define CIF_UNLOCK(cif) mtx_unlock(&(cif)->cif_mtx) +#define CIF_LOCK_INIT(cif) rw_init(&(cif)->cif_mtx, "carp_if") +#define CIF_LOCK_DESTROY(cif) rw_destroy(&(cif)->cif_mtx) +#define CIF_LOCK_ASSERT(cif) rw_assert(&(cif)->cif_mtx, MA_OWNED) +#define CIF_RLOCK(cif) rw_rlock(&(cif)->cif_mtx) +#define CIF_RUNLOCK(cif) rw_runlock(&(cif)->cif_mtx) +#define CIF_WLOCK(cif) rw_wlock(&(cif)->cif_mtx) +#define CIF_WUNLOCK(cif) rw_wunlock(&(cif)->cif_mtx) #define CIF_FREE(cif) do { \ CIF_LOCK_ASSERT(cif); \ if (TAILQ_EMPTY(&(cif)->cif_vrs)) \ carp_free_if(cif); \ else \ - CIF_UNLOCK(cif); \ + CIF_WUNLOCK(cif); \ } while (0) #define CARP_LOG(...) do { \ @@ -579,7 +579,6 @@ carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af) struct ifnet *ifp = m->m_pkthdr.rcvif; struct ifaddr *ifa; struct carp_softc *sc; - uint64_t tmp_counter; struct timeval sc_tv, ch_tv; /* verify that the VHID is valid on the receiving interface */ @@ -619,14 +618,20 @@ carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af) goto out; } - tmp_counter = ntohl(ch->carp_counter[0]); - tmp_counter = tmp_counter<<32; - tmp_counter += ntohl(ch->carp_counter[1]); - - /* XXX Replay protection goes here */ - - sc->sc_init_counter = 0; - sc->sc_counter = tmp_counter; + if (!bcmp(&sc->sc_counter, ch->carp_counter, + sizeof(ch->carp_counter))) { + /* Do not log duplicates from non simplex interfaces */ + if (sc->sc_carpdev->if_flags & IFF_SIMPLEX) { + CARPSTATS_INC(carps_badauth); + ifp->if_ierrors++; + CARP_UNLOCK(sc); + CARP_LOG("%s, replay or network loop detected.\n", + ifp->if_xname); + } else + CARP_UNLOCK(sc); + m_freem(m); + return; + } sc_tv.tv_sec = sc->sc_advbase; sc_tv.tv_usec = DEMOTE_ADVSKEW(sc) * 1000000 / 256; @@ -700,13 +705,12 @@ carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch) { struct m_tag *mtag; - if (sc->sc_init_counter) { + if (!sc->sc_counter) { /* this could also be seconds since unix epoch */ sc->sc_counter = arc4random(); sc->sc_counter = sc->sc_counter << 32; sc->sc_counter += arc4random(); - } else - sc->sc_counter++; + } ch->carp_counter[0] = htonl((sc->sc_counter>>32)&0xffffffff); ch->carp_counter[1] = htonl(sc->sc_counter&0xffffffff); @@ -772,7 +776,8 @@ carp_send_ad_error(struct carp_softc *sc, int error) char msg[sizeof(fmt) + IFNAMSIZ]; sprintf(msg, fmt, error, sc->sc_carpdev->if_xname); - carp_demote_adj(V_carp_senderr_adj, msg); + if (V_carp_senderr_adj > 0) + carp_demote_adj(V_carp_senderr_adj, msg); } sc->sc_sendad_success = 0; } else { @@ -782,7 +787,8 @@ carp_send_ad_error(struct carp_softc *sc, int error) char msg[sizeof(fmt) + IFNAMSIZ]; sprintf(msg, fmt, sc->sc_carpdev->if_xname); - carp_demote_adj(-V_carp_senderr_adj, msg); + if (V_carp_senderr_adj > 0) + carp_demote_adj(-V_carp_senderr_adj, msg); sc->sc_sendad_errors = 0; } else sc->sc_sendad_errors = 0; @@ -1117,18 +1123,17 @@ carp_forus(struct ifnet *ifp, u_char *dhost) if (ena[0] || ena[1] || ena[2] != 0x5e || ena[3] || ena[4] != 1) return (0); - CIF_LOCK(ifp->if_carp); + CIF_RLOCK(ifp->if_carp); IFNET_FOREACH_CARP(ifp, sc) { - CARP_LOCK(sc); - if (sc->sc_state == MASTER && !bcmp(dhost, LLADDR(&sc->sc_addr), - ETHER_ADDR_LEN)) { - CARP_UNLOCK(sc); - CIF_UNLOCK(ifp->if_carp); + //CARP_LOCK(sc); + if (sc->sc_state == MASTER && ena[5] == sc->sc_vhid) { + //CARP_UNLOCK(sc); + CIF_RUNLOCK(ifp->if_carp); return (1); } - CARP_UNLOCK(sc); + //CARP_UNLOCK(sc); } - CIF_UNLOCK(ifp->if_carp); + CIF_RUNLOCK(ifp->if_carp); return (0); } @@ -1485,9 +1490,9 @@ carp_alloc(struct ifnet *ifp) sc = malloc(sizeof(*sc), M_CARP, M_WAITOK|M_ZERO); + sc->sc_counter = 0; sc->sc_advbase = CARP_DFLTINTV; sc->sc_vhid = -1; /* required setting */ - sc->sc_init_counter = 1; sc->sc_state = INIT; sc->sc_ifasiz = sizeof(struct ifaddr *); @@ -1503,9 +1508,9 @@ carp_alloc(struct ifnet *ifp) #endif callout_init_mtx(&sc->sc_ad_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED); - CIF_LOCK(cif); + CIF_WLOCK(cif); TAILQ_INSERT_TAIL(&cif->cif_vrs, sc, sc_list); - CIF_UNLOCK(cif); + CIF_WUNLOCK(cif); mtx_lock(&carp_mtx); LIST_INSERT_HEAD(&carp_list, sc, sc_next); @@ -1669,11 +1674,11 @@ carp_ioctl(struct ifreq *ifr, u_long cmd, struct thread *td) } if (ifp->if_carp) { - CIF_LOCK(ifp->if_carp); + CIF_RLOCK(ifp->if_carp); IFNET_FOREACH_CARP(ifp, sc) if (sc->sc_vhid == carpr.carpr_vhid) break; - CIF_UNLOCK(ifp->if_carp); + CIF_RUNLOCK(ifp->if_carp); } if (sc == NULL) { sc = carp_alloc(ifp); @@ -1747,11 +1752,11 @@ carp_ioctl(struct ifreq *ifr, u_long cmd, struct thread *td) priveleged = (priv_check(td, PRIV_NETINET_CARP) == 0); if (carpr.carpr_vhid != 0) { - CIF_LOCK(ifp->if_carp); + CIF_RLOCK(ifp->if_carp); IFNET_FOREACH_CARP(ifp, sc) if (sc->sc_vhid == carpr.carpr_vhid) break; - CIF_UNLOCK(ifp->if_carp); + CIF_RUNLOCK(ifp->if_carp); if (sc == NULL) { error = ENOENT; break; @@ -1762,12 +1767,12 @@ carp_ioctl(struct ifreq *ifr, u_long cmd, struct thread *td) int i, count; count = 0; - CIF_LOCK(ifp->if_carp); + CIF_RLOCK(ifp->if_carp); IFNET_FOREACH_CARP(ifp, sc) count++; if (count > carpr.carpr_count) { - CIF_UNLOCK(ifp->if_carp); + CIF_RUNLOCK(ifp->if_carp); error = EMSGSIZE; break; } @@ -1779,12 +1784,12 @@ carp_ioctl(struct ifreq *ifr, u_long cmd, struct thread *td) error = copyout(&carpr, ifr->ifr_data + (i * sizeof(carpr)), sizeof(carpr)); if (error) { - CIF_UNLOCK(ifp->if_carp); + CIF_RUNLOCK(ifp->if_carp); break; } i++; } - CIF_UNLOCK(ifp->if_carp); + CIF_RUNLOCK(ifp->if_carp); } break; } @@ -1833,12 +1838,12 @@ carp_attach(struct ifaddr *ifa, int vhid) return (EPROTOTYPE); } - CIF_LOCK(cif); + CIF_WLOCK(cif); IFNET_FOREACH_CARP(ifp, sc) if (sc->sc_vhid == vhid) break; if (sc == NULL) { - CIF_UNLOCK(cif); + CIF_WUNLOCK(cif); return (ENOENT); } @@ -1846,7 +1851,7 @@ carp_attach(struct ifaddr *ifa, int vhid) if (ifa->ifa_carp->sc_vhid != vhid) carp_detach_locked(ifa); else { - CIF_UNLOCK(cif); + CIF_WUNLOCK(cif); return (0); } } @@ -1891,7 +1896,7 @@ carp_attach(struct ifaddr *ifa, int vhid) carp_sc_state(sc); CARP_UNLOCK(sc); - CIF_UNLOCK(cif); + CIF_WUNLOCK(cif); return (0); } @@ -1902,7 +1907,7 @@ carp_detach(struct ifaddr *ifa) struct ifnet *ifp = ifa->ifa_ifp; struct carp_if *cif = ifp->if_carp; - CIF_LOCK(cif); + CIF_WLOCK(cif); carp_detach_locked(ifa); CIF_FREE(cif); } @@ -1984,13 +1989,13 @@ carp_linkstate(struct ifnet *ifp) { struct carp_softc *sc; - CIF_LOCK(ifp->if_carp); + CIF_RLOCK(ifp->if_carp); IFNET_FOREACH_CARP(ifp, sc) { CARP_LOCK(sc); carp_sc_state(sc); CARP_UNLOCK(sc); } - CIF_UNLOCK(ifp->if_carp); + CIF_RUNLOCK(ifp->if_carp); } static void @@ -2025,9 +2030,11 @@ carp_sc_state(struct carp_softc *sc) static void carp_demote_adj(int adj, char *reason) { + if (adj == 0) + return; atomic_add_int(&V_carp_demotion, adj); CARP_LOG("demoted by %d to %d (%s)\n", adj, V_carp_demotion, reason); - taskqueue_enqueue(taskqueue_swi, &carp_sendall_task); + taskqueue_enqueue(taskqueue_thread, &carp_sendall_task); } static int diff --git a/sys/netinet/ip_divert.c b/sys/netinet/ip_divert.c index e698035..b74d60d 100644 --- a/sys/netinet/ip_divert.c +++ b/sys/netinet/ip_divert.c @@ -267,8 +267,7 @@ divert_packet(struct mbuf *m, int incoming) * this iface name will come along for the ride. * (see div_output for the other half of this.) */ - strlcpy(divsrc.sin_zero, m->m_pkthdr.rcvif->if_xname, - sizeof(divsrc.sin_zero)); + *((u_short *)divsrc.sin_zero) = m->m_pkthdr.rcvif->if_index; } /* Put packet on socket queue, if any */ @@ -342,7 +341,7 @@ div_output(struct socket *so, struct mbuf *m, struct sockaddr_in *sin, /* Loopback avoidance and state recovery */ if (sin) { - int i; + u_short idx; /* set the starting point. We provide a non-zero slot, * but a non_matching chain_id to skip that info and use @@ -350,7 +349,7 @@ div_output(struct socket *so, struct mbuf *m, struct sockaddr_in *sin, */ dt->slot = 1; /* dummy, chain_id is invalid */ dt->chain_id = 0; - dt->rulenum = sin->sin_port+1; /* host format ? */ + dt->rulenum = sin->sin_port; /* host format ? */ dt->rule_id = 0; /* * Find receive interface with the given name, stuffed @@ -358,10 +357,9 @@ div_output(struct socket *so, struct mbuf *m, struct sockaddr_in *sin, * The name is user supplied data so don't trust its size * or that it is zero terminated. */ - for (i = 0; i < sizeof(sin->sin_zero) && sin->sin_zero[i]; i++) - ; - if ( i > 0 && i < sizeof(sin->sin_zero)) - m->m_pkthdr.rcvif = ifunit(sin->sin_zero); + idx = *((u_short *)sin->sin_zero); + if ( idx > 0 ) + m->m_pkthdr.rcvif = ifnet_byindex(idx); } /* Reinject packet into the system as incoming or outgoing */ @@ -832,5 +830,4 @@ static moduledata_t ipdivertmod = { }; DECLARE_MODULE(ipdivert, ipdivertmod, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY); -MODULE_DEPEND(ipdivert, ipfw, 2, 2, 2); MODULE_VERSION(ipdivert, 1); diff --git a/sys/netinet/ip_fastfwd.c b/sys/netinet/ip_fastfwd.c index 0772cf0..9c3ea66 100644 --- a/sys/netinet/ip_fastfwd.c +++ b/sys/netinet/ip_fastfwd.c @@ -109,12 +109,6 @@ __FBSDID("$FreeBSD$"); #include <machine/in_cksum.h> -static VNET_DEFINE(int, ipfastforward_active); -#define V_ipfastforward_active VNET(ipfastforward_active) - -SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, fastforwarding, CTLFLAG_RW, - &VNET_NAME(ipfastforward_active), 0, "Enable fast IP forwarding"); - static struct sockaddr_in * ip_findroute(struct route *ro, struct in_addr dest, struct mbuf *m) { @@ -159,7 +153,7 @@ ip_findroute(struct route *ro, struct in_addr dest, struct mbuf *m) * to ip_input for full processing. */ struct mbuf * -ip_fastforward(struct mbuf *m) +ip_tryforward(struct mbuf *m) { struct ip *ip; struct mbuf *m0 = NULL; @@ -167,119 +161,20 @@ ip_fastforward(struct mbuf *m) struct sockaddr_in *dst = NULL; struct ifnet *ifp; struct in_addr odest, dest; - uint16_t sum, ip_len, ip_off; + uint16_t ip_len, ip_off; int error = 0; - int hlen, mtu; + int mtu; struct m_tag *fwd_tag = NULL; /* * Are we active and forwarding packets? */ - if (!V_ipfastforward_active || !V_ipforwarding) - return m; M_ASSERTVALID(m); M_ASSERTPKTHDR(m); bzero(&ro, sizeof(ro)); - /* - * Step 1: check for packet drop conditions (and sanity checks) - */ - - /* - * Is entire packet big enough? - */ - if (m->m_pkthdr.len < sizeof(struct ip)) { - IPSTAT_INC(ips_tooshort); - goto drop; - } - - /* - * Is first mbuf large enough for ip header and is header present? - */ - if (m->m_len < sizeof (struct ip) && - (m = m_pullup(m, sizeof (struct ip))) == NULL) { - IPSTAT_INC(ips_toosmall); - return NULL; /* mbuf already free'd */ - } - - ip = mtod(m, struct ip *); - - /* - * Is it IPv4? - */ - if (ip->ip_v != IPVERSION) { - IPSTAT_INC(ips_badvers); - goto drop; - } - - /* - * Is IP header length correct and is it in first mbuf? - */ - hlen = ip->ip_hl << 2; - if (hlen < sizeof(struct ip)) { /* minimum header length */ - IPSTAT_INC(ips_badhlen); - goto drop; - } - if (hlen > m->m_len) { - if ((m = m_pullup(m, hlen)) == NULL) { - IPSTAT_INC(ips_badhlen); - return NULL; /* mbuf already free'd */ - } - ip = mtod(m, struct ip *); - } - - /* - * Checksum correct? - */ - if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) - sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID); - else { - if (hlen == sizeof(struct ip)) - sum = in_cksum_hdr(ip); - else - sum = in_cksum(m, hlen); - } - if (sum) { - IPSTAT_INC(ips_badsum); - goto drop; - } - - /* - * Remember that we have checked the IP header and found it valid. - */ - m->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID); - - ip_len = ntohs(ip->ip_len); - - /* - * Is IP length longer than packet we have got? - */ - if (m->m_pkthdr.len < ip_len) { - IPSTAT_INC(ips_tooshort); - goto drop; - } - - /* - * Is packet longer than IP header tells us? If yes, truncate packet. - */ - if (m->m_pkthdr.len > ip_len) { - if (m->m_len == m->m_pkthdr.len) { - m->m_len = ip_len; - m->m_pkthdr.len = ip_len; - } else - m_adj(m, ip_len - m->m_pkthdr.len); - } - - /* - * Is packet from or to 127/8? - */ - if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET || - (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) { - IPSTAT_INC(ips_badaddr); - goto drop; - } #ifdef ALTQ /* @@ -290,12 +185,10 @@ ip_fastforward(struct mbuf *m) #endif /* - * Step 2: fallback conditions to normal ip_input path processing - */ - - /* * Only IP packets without options */ + ip = mtod(m, struct ip *); + if (ip->ip_hl != (sizeof(struct ip) >> 2)) { if (V_ip_doopts == 1) return m; @@ -496,18 +389,6 @@ passout: goto consumed; } -#ifndef ALTQ - /* - * Check if there is enough space in the interface queue - */ - if ((ifp->if_snd.ifq_len + ip_len / ifp->if_mtu + 1) >= - ifp->if_snd.ifq_maxlen) { - IPSTAT_INC(ips_odropped); - /* would send source quench here but that is depreciated */ - goto drop; - } -#endif - /* * Check if media link state of interface is not down */ diff --git a/sys/netinet/ip_fw.h b/sys/netinet/ip_fw.h index dae8cc0..188057d 100644 --- a/sys/netinet/ip_fw.h +++ b/sys/netinet/ip_fw.h @@ -65,7 +65,8 @@ /* IP_FW3 header/opcodes */ typedef struct _ip_fw3_opheader { uint16_t opcode; /* Operation opcode */ - uint16_t reserved[3]; /* Align to 64-bit boundary */ + uint16_t ctxid; + uint16_t reserved[2]; /* Align to 64-bit boundary */ } ip_fw3_opheader; @@ -74,6 +75,14 @@ typedef struct _ip_fw3_opheader { #define IP_FW_TABLE_XDEL 87 /* delete entry */ #define IP_FW_TABLE_XGETSIZE 88 /* get table size */ #define IP_FW_TABLE_XLIST 89 /* list table contents */ +#define IP_FW_TABLE_XLISTENTRY 90 /* list one table entry contents */ +#define IP_FW_TABLE_XZEROENTRY 91 /* zero one table entry stats */ +#define IP_FW_CTX_GET 92 +#define IP_FW_CTX_ADD 93 +#define IP_FW_CTX_DEL 94 +#define IP_FW_CTX_SET 95 +#define IP_FW_CTX_ADDMEMBER 96 +#define IP_FW_CTX_DELMEMBER 97 /* * The kernel representation of ipfw rules is made of a list of @@ -600,11 +609,16 @@ struct _ipfw_dyn_rule { #define IPFW_TABLE_CIDR 1 /* Table for holding IPv4/IPv6 prefixes */ #define IPFW_TABLE_INTERFACE 2 /* Table for holding interface names */ -#define IPFW_TABLE_MAXTYPE 2 /* Maximum valid number */ +#define IPFW_TABLE_MIX 3 /* Table for holding IPv4/mac entries */ +#define IPFW_TABLE_MAC 4 /* Table for holding mac entries */ +#define IPFW_TABLE_MAXTYPE 5 /* Maximum valid number */ typedef struct _ipfw_table_entry { in_addr_t addr; /* network address */ u_int32_t value; /* value */ + uint64_t mac_addr; + uint64_t bytes; + uint64_t packets; u_int16_t tbl; /* table number */ u_int8_t masklen; /* mask length */ } ipfw_table_entry; @@ -616,6 +630,10 @@ typedef struct _ipfw_table_xentry { uint16_t tbl; /* table number */ uint16_t flags; /* record flags */ uint32_t value; /* value */ + uint32_t timestamp; + uint64_t mac_addr; + uint64_t bytes; + uint64_t packets; union { /* Longest field needs to be aligned by 4-byte boundary */ struct in6_addr addr6; /* IPv6 address */ diff --git a/sys/netinet/ip_input.c b/sys/netinet/ip_input.c index 2dc080f..1eefadc 100644 --- a/sys/netinet/ip_input.c +++ b/sys/netinet/ip_input.c @@ -77,6 +77,8 @@ __FBSDID("$FreeBSD$"); #include <netinet/ip_carp.h> #ifdef IPSEC #include <netinet/ip_ipsec.h> +#include <netipsec/ipsec.h> +#include <netipsec/key.h> #endif /* IPSEC */ #include <sys/socketvar.h> @@ -297,6 +299,9 @@ ip_init(void) if ((i = pfil_head_register(&V_inet_pfil_hook)) != 0) printf("%s: WARNING: unable to register pfil hook, " "error %d\n", __func__, i); + else + pfil_head_export_sysctl(&V_inet_pfil_hook, + SYSCTL_STATIC_CHILDREN(_net_inet_ip)); /* Skip initialization of globals for non-default instances. */ if (!IS_DEFAULT_VNET(curvnet)) @@ -464,12 +469,22 @@ tooshort: } else m_adj(m, ip_len - m->m_pkthdr.len); } + /* Try to forward the packet, but if we fail continue */ #ifdef IPSEC + /* For now we do not handle IPSEC in tryforward. */ + if (!key_havesp(IPSEC_DIR_INBOUND) && !key_havesp(IPSEC_DIR_OUTBOUND) && + (V_ipforwarding == 1)) + if (ip_tryforward(m) == NULL) + return; /* * Bypass packet filtering for packets previously handled by IPsec. */ if (ip_ipsec_filtertunnel(m)) goto passin; +#else + if (V_ipforwarding == 1) + if (ip_tryforward(m) == NULL) + return; #endif /* IPSEC */ /* @@ -499,8 +514,7 @@ tooshort: goto ours; } if (m->m_flags & M_IP_NEXTHOP) { - dchg = (m_tag_find(m, PACKET_TAG_IPFORWARD, NULL) != NULL); - if (dchg != 0) { + if (m_tag_find(m, PACKET_TAG_IPFORWARD, NULL) != NULL) { /* * Directly ship the packet on. This allows * forwarding packets originally destined to us @@ -674,10 +688,6 @@ passin: IPSTAT_INC(ips_cantforward); m_freem(m); } else { -#ifdef IPSEC - if (ip_ipsec_fwd(m)) - goto bad; -#endif /* IPSEC */ ip_forward(m, dchg); } return; @@ -722,7 +732,7 @@ ours: * note that we do not visit this with protocols with pcb layer * code - like udp/tcp/raw ip. */ - if (ip_ipsec_input(m)) + if (ip_ipsec_input(m, ip->ip_p) != 0) goto bad; #endif /* IPSEC */ @@ -1355,6 +1365,13 @@ ip_forward(struct mbuf *m, int srcrt) m_freem(m); return; } +#ifdef IPSEC + if (ip_ipsec_fwd(m) != 0) { + IPSTAT_INC(ips_cantforward); + m_freem(m); + return; + } +#endif /* IPSEC */ #ifdef IPSTEALTH if (!V_ipstealth) { #endif diff --git a/sys/netinet/ip_ipsec.c b/sys/netinet/ip_ipsec.c index 133fa7c..2e43e43 100644 --- a/sys/netinet/ip_ipsec.c +++ b/sys/netinet/ip_ipsec.c @@ -61,15 +61,12 @@ __FBSDID("$FreeBSD$"); #include <machine/in_cksum.h> -#ifdef IPSEC #include <netipsec/ipsec.h> #include <netipsec/xform.h> #include <netipsec/key.h> -#endif /*IPSEC*/ extern struct protosw inetsw[]; -#ifdef IPSEC #ifdef IPSEC_FILTERTUNNEL static VNET_DEFINE(int, ip4_ipsec_filtertunnel) = 1; #else @@ -81,7 +78,6 @@ SYSCTL_DECL(_net_inet_ipsec); SYSCTL_VNET_INT(_net_inet_ipsec, OID_AUTO, filtertunnel, CTLFLAG_RW, &VNET_NAME(ip4_ipsec_filtertunnel), 0, "If set filter packets from an IPsec tunnel."); -#endif /* IPSEC */ /* * Check if we have to jump over firewall processing for this packet. @@ -91,7 +87,6 @@ SYSCTL_VNET_INT(_net_inet_ipsec, OID_AUTO, filtertunnel, int ip_ipsec_filtertunnel(struct mbuf *m) { -#ifdef IPSEC /* * Bypass packet filtering for packets previously handled by IPsec. @@ -99,50 +94,20 @@ ip_ipsec_filtertunnel(struct mbuf *m) if (!V_ip4_ipsec_filtertunnel && m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL) != NULL) return 1; -#endif return 0; } /* * Check if this packet has an active SA and needs to be dropped instead * of forwarded. - * Called from ip_input(). + * Called from ip_forward(). * 1 = drop packet, 0 = forward packet. */ int ip_ipsec_fwd(struct mbuf *m) { -#ifdef IPSEC - struct m_tag *mtag; - struct tdb_ident *tdbi; - struct secpolicy *sp; - int error; - - mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL); - if (mtag != NULL) { - tdbi = (struct tdb_ident *)(mtag + 1); - sp = ipsec_getpolicy(tdbi, IPSEC_DIR_INBOUND); - } else { - sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_INBOUND, - IP_FORWARDING, &error); - } - if (sp == NULL) { /* NB: can happen if error */ - /*XXX error stat???*/ - DPRINTF(("ip_input: no SP for forwarding\n")); /*XXX*/ - return 1; - } - /* - * Check security policy against packet attributes. - */ - error = ipsec_in_reject(sp, m); - KEY_FREESP(&sp); - if (error) { - IPSTAT_INC(ips_cantforward); - return 1; - } -#endif /* IPSEC */ - return 0; + return (ipsec4_in_reject(m, NULL)); } /* @@ -153,51 +118,16 @@ ip_ipsec_fwd(struct mbuf *m) * 1 = drop packet, 0 = continue processing packet. */ int -ip_ipsec_input(struct mbuf *m) +ip_ipsec_input(struct mbuf *m, int nxt) { -#ifdef IPSEC - struct ip *ip = mtod(m, struct ip *); - struct m_tag *mtag; - struct tdb_ident *tdbi; - struct secpolicy *sp; - int error; /* * enforce IPsec policy checking if we are seeing last header. * note that we do not visit this with protocols with pcb layer * code - like udp/tcp/raw ip. */ - if ((inetsw[ip_protox[ip->ip_p]].pr_flags & PR_LASTHDR) != 0) { - /* - * Check if the packet has already had IPsec processing - * done. If so, then just pass it along. This tag gets - * set during AH, ESP, etc. input handling, before the - * packet is returned to the ip input queue for delivery. - */ - mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL); - if (mtag != NULL) { - tdbi = (struct tdb_ident *)(mtag + 1); - sp = ipsec_getpolicy(tdbi, IPSEC_DIR_INBOUND); - } else { - sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_INBOUND, - IP_FORWARDING, &error); - } - if (sp != NULL) { - /* - * Check security policy against packet attributes. - */ - error = ipsec_in_reject(sp, m); - KEY_FREESP(&sp); - } else { - /* XXX error stat??? */ - error = EINVAL; - DPRINTF(("ip_input: no SP, packet discarded\n"));/*XXX*/ - return 1; - } - if (error) - return 1; - } -#endif /* IPSEC */ - return 0; + if ((inetsw[ip_protox[nxt]].pr_flags & PR_LASTHDR) != 0) + return (ipsec4_in_reject(m, NULL)); + return (0); } /* @@ -224,12 +154,9 @@ ip_ipsec_mtu(struct mbuf *m, int mtu) * -1 = packet was reinjected and stop processing packet */ int -ip_ipsec_output(struct mbuf **m, struct inpcb *inp, int *flags, int *error) +ip_ipsec_output(struct mbuf **m, struct inpcb *inp, int *error) { -#ifdef IPSEC - struct secpolicy *sp = NULL; - struct tdb_ident *tdbi; - struct m_tag *mtag; + struct secpolicy *sp; if (!key_havesp(IPSEC_DIR_OUTBOUND)) return 0; @@ -243,17 +170,11 @@ ip_ipsec_output(struct mbuf **m, struct inpcb *inp, int *flags, int *error) * AH, ESP, etc. processing), there will be a tag to bypass * the lookup and related policy checking. */ - mtag = m_tag_find(*m, PACKET_TAG_IPSEC_PENDING_TDB, NULL); - if (mtag != NULL) { - tdbi = (struct tdb_ident *)(mtag + 1); - sp = ipsec_getpolicy(tdbi, IPSEC_DIR_OUTBOUND); - if (sp == NULL) - *error = -EINVAL; /* force silent drop */ - m_tag_delete(*m, mtag); - } else { - sp = ipsec4_checkpolicy(*m, IPSEC_DIR_OUTBOUND, *flags, - error, inp); + if (m_tag_find(*m, PACKET_TAG_IPSEC_OUT_DONE, NULL) != NULL) { + *error = 0; + return (0); } + sp = ipsec4_checkpolicy(*m, IPSEC_DIR_OUTBOUND, error, inp); /* * There are four return cases: * sp != NULL apply IPsec policy @@ -262,40 +183,6 @@ ip_ipsec_output(struct mbuf **m, struct inpcb *inp, int *flags, int *error) * sp == NULL, error != 0 discard packet, report error */ if (sp != NULL) { - /* Loop detection, check if ipsec processing already done */ - KASSERT(sp->req != NULL, ("ip_output: no ipsec request")); - for (mtag = m_tag_first(*m); mtag != NULL; - mtag = m_tag_next(*m, mtag)) { - if (mtag->m_tag_cookie != MTAG_ABI_COMPAT) - continue; - if (mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_DONE && - mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED) - continue; - /* - * Check if policy has an SA associated with it. - * This can happen when an SP has yet to acquire - * an SA; e.g. on first reference. If it occurs, - * then we let ipsec4_process_packet do its thing. - */ - if (sp->req->sav == NULL) - break; - tdbi = (struct tdb_ident *)(mtag + 1); - if (tdbi->spi == sp->req->sav->spi && - tdbi->proto == sp->req->sav->sah->saidx.proto && - bcmp(&tdbi->dst, &sp->req->sav->sah->saidx.dst, - sizeof (union sockaddr_union)) == 0) { - /* - * No IPsec processing is needed, free - * reference to SP. - * - * NB: null pointer to avoid free at - * done: below. - */ - KEY_FREESP(&sp), sp = NULL; - goto done; - } - } - /* * Do delayed checksums now because we send before * this is done in the normal processing path. @@ -314,7 +201,10 @@ ip_ipsec_output(struct mbuf **m, struct inpcb *inp, int *flags, int *error) #endif /* NB: callee frees mbuf */ - *error = ipsec4_process_packet(*m, sp->req, *flags, 0); + *error = ipsec4_process_packet(*m, sp->req); + /* Release SP if an error occured */ + if (*error != 0) + KEY_FREESP(&sp); if (*error == EJUSTRETURN) { /* * We had a SP with a level of 'use' and no SA. We @@ -346,22 +236,17 @@ ip_ipsec_output(struct mbuf **m, struct inpcb *inp, int *flags, int *error) if (*error == -EINVAL) *error = 0; goto bad; - } else { - /* No IPsec processing for this packet. */ } + /* No IPsec processing for this packet. */ } done: if (sp != NULL) KEY_FREESP(&sp); return 0; reinjected: - if (sp != NULL) - KEY_FREESP(&sp); - return -1; + return (-1); bad: if (sp != NULL) KEY_FREESP(&sp); return 1; -#endif /* IPSEC */ - return 0; } diff --git a/sys/netinet/ip_ipsec.h b/sys/netinet/ip_ipsec.h index 2870c11..f499b74 100644 --- a/sys/netinet/ip_ipsec.h +++ b/sys/netinet/ip_ipsec.h @@ -34,7 +34,7 @@ int ip_ipsec_filtertunnel(struct mbuf *); int ip_ipsec_fwd(struct mbuf *); -int ip_ipsec_input(struct mbuf *); +int ip_ipsec_input(struct mbuf *, int); int ip_ipsec_mtu(struct mbuf *, int); -int ip_ipsec_output(struct mbuf **, struct inpcb *, int *, int *); +int ip_ipsec_output(struct mbuf **, struct inpcb *, int *); #endif diff --git a/sys/netinet/ip_output.c b/sys/netinet/ip_output.c index 954785d..0aee48d 100644 --- a/sys/netinet/ip_output.c +++ b/sys/netinet/ip_output.c @@ -123,7 +123,7 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags, struct ifnet *ifp = NULL; /* keep compiler happy */ struct mbuf *m0; int hlen = sizeof (struct ip); - int mtu; + int mtu = 0; int n; /* scratchpad */ int error = 0; struct sockaddr_in *dst; @@ -136,9 +136,8 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags, struct in_addr odst; struct m_tag *fwd_tag = NULL; int have_ia_ref; -#ifdef IPSEC - int no_route_but_check_spd = 0; -#endif + int no_route_but_check = 0; + M_ASSERTPKTHDR(m); if (inp != NULL) { @@ -292,10 +291,11 @@ again: * There is no route for this packet, but it is * possible that a matching SPD entry exists. */ - no_route_but_check_spd = 1; mtu = 0; /* Silence GCC warning. */ - goto sendit; #endif + no_route_but_check = 1; + goto sendit; + IPSTAT_INC(ips_noroute); error = EHOSTUNREACH; goto bad; @@ -482,7 +482,7 @@ again: sendit: #ifdef IPSEC - switch(ip_ipsec_output(&m, inp, &flags, &error)) { + switch(ip_ipsec_output(&m, inp, &error)) { case 1: goto bad; case -1: @@ -491,28 +491,34 @@ sendit: default: break; /* Continue with packet processing. */ } - /* - * Check if there was a route for this packet; return error if not. - */ - if (no_route_but_check_spd) { - IPSTAT_INC(ips_noroute); - error = EHOSTUNREACH; - goto bad; - } /* Update variables that are affected by ipsec4_output(). */ ip = mtod(m, struct ip *); hlen = ip->ip_hl << 2; #endif /* IPSEC */ /* Jump over all PFIL processing if hooks are not active. */ - if (!PFIL_HOOKED(&V_inet_pfil_hook)) + if (!PFIL_HOOKED(&V_inet_pfil_hook)) { + if (no_route_but_check) { + IPSTAT_INC(ips_noroute); + error = EHOSTUNREACH; + goto bad; + } goto passout; + } + + if (ifp == NULL) + ifp = V_loif; /* Run through list of hooks for output packets. */ odst.s_addr = ip->ip_dst.s_addr; error = pfil_run_hooks(&V_inet_pfil_hook, &m, ifp, PFIL_OUT, inp); if (error != 0 || m == NULL) goto done; + if (no_route_but_check) { + IPSTAT_INC(ips_noroute); + error = EHOSTUNREACH; + goto bad; + } ip = mtod(m, struct ip *); diff --git a/sys/netinet/sctp_input.c b/sys/netinet/sctp_input.c index 05eb026..4cd9a9b 100644 --- a/sys/netinet/sctp_input.c +++ b/sys/netinet/sctp_input.c @@ -5788,7 +5788,6 @@ sctp_common_input_processing(struct mbuf **mm, int iphlen, int offset, int lengt #ifdef INET case AF_INET: if (ipsec4_in_reject(m, &inp->ip_inp.inp)) { - IPSECSTAT_INC(ips_in_polvio); SCTP_STAT_INCR(sctps_hdrops); goto out; } @@ -5797,7 +5796,6 @@ sctp_common_input_processing(struct mbuf **mm, int iphlen, int offset, int lengt #ifdef INET6 case AF_INET6: if (ipsec6_in_reject(m, &inp->ip_inp.inp)) { - IPSEC6STAT_INC(ips_in_polvio); SCTP_STAT_INCR(sctps_hdrops); goto out; } diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c index 32133ae..114802e 100644 --- a/sys/netinet/tcp_input.c +++ b/sys/netinet/tcp_input.c @@ -889,12 +889,10 @@ findpcb: #ifdef IPSEC #ifdef INET6 if (isipv6 && ipsec6_in_reject(m, inp)) { - IPSEC6STAT_INC(ips_in_polvio); goto dropunlock; } else #endif /* INET6 */ if (ipsec4_in_reject(m, inp) != 0) { - IPSECSTAT_INC(ips_in_polvio); goto dropunlock; } #endif /* IPSEC */ diff --git a/sys/netinet/udp_usrreq.c b/sys/netinet/udp_usrreq.c index 79a2166..46bebf6 100644 --- a/sys/netinet/udp_usrreq.c +++ b/sys/netinet/udp_usrreq.c @@ -314,7 +314,6 @@ udp_append(struct inpcb *inp, struct ip *ip, struct mbuf *n, int off, /* Check AH/ESP integrity. */ if (ipsec4_in_reject(n, inp)) { m_freem(n); - IPSECSTAT_INC(ips_in_polvio); return; } #ifdef IPSEC_NAT_T @@ -1561,7 +1560,8 @@ udp4_espdecap(struct inpcb *inp, struct mbuf *m, int off) if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) m->m_pkthdr.csum_flags &= ~(CSUM_DATA_VALID|CSUM_PSEUDO_HDR); - (void) ipsec4_common_input(m, iphlen, ip->ip_p); + (void) ipsec_common_input(m, iphlen, offsetof(struct ip, ip_p), + AF_INET, ip->ip_p); return (NULL); /* NB: consumed, bypass processing. */ } #endif /* defined(IPSEC) && defined(IPSEC_NAT_T) */ diff --git a/sys/netinet6/in6.c b/sys/netinet6/in6.c index e2073f5..e3c3229 100644 --- a/sys/netinet6/in6.c +++ b/sys/netinet6/in6.c @@ -2375,6 +2375,7 @@ in6if_do_dad(struct ifnet *ifp) case IFT_DUMMY: #endif case IFT_FAITH: + case IFT_STF: /* * These interfaces do not have the IFF_LOOPBACK flag, * but loop packets back. We do not have to do DAD on such diff --git a/sys/netinet6/ip6_forward.c b/sys/netinet6/ip6_forward.c index caa58e6..053864a 100644 --- a/sys/netinet6/ip6_forward.c +++ b/sys/netinet6/ip6_forward.c @@ -70,6 +70,7 @@ __FBSDID("$FreeBSD$"); #include <netinet/in_pcb.h> #ifdef IPSEC +#include <netinet6/ip6_ipsec.h> #include <netipsec/ipsec.h> #include <netipsec/ipsec6.h> #include <netipsec/key.h> @@ -111,21 +112,6 @@ ip6_forward(struct mbuf *m, int srcrt) struct m_tag *fwd_tag; char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN]; -#ifdef IPSEC - /* - * Check AH/ESP integrity. - */ - /* - * Don't increment ip6s_cantforward because this is the check - * before forwarding packet actually. - */ - if (ipsec6_in_reject(m, NULL)) { - IPSEC6STAT_INC(ips_in_polvio); - m_freem(m); - return; - } -#endif /* IPSEC */ - /* * Do not forward packets to multicast destination (should be handled * by ip6_mforward(). @@ -150,6 +136,17 @@ ip6_forward(struct mbuf *m, int srcrt) m_freem(m); return; } +#ifdef IPSEC + /* + * Check if this packet has an active SA and needs to be dropped + * instead of forwarded. + */ + if (ip6_ipsec_fwd(m) != 0) { + IP6STAT_INC(ip6s_cantforward); + m_freem(m); + return; + } +#endif /* IPSEC */ #ifdef IPSTEALTH if (!V_ip6stealth) { @@ -179,8 +176,7 @@ ip6_forward(struct mbuf *m, int srcrt) #ifdef IPSEC /* get a security policy for this packet */ - sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, - IP_FORWARDING, &error); + sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, &error); if (sp == NULL) { IPSEC6STAT_INC(ips_out_inval); IP6STAT_INC(ip6s_cantforward); @@ -254,8 +250,7 @@ ip6_forward(struct mbuf *m, int srcrt) /* * when the kernel forwards a packet, it is not proper to apply - * IPsec transport mode to the packet is not proper. this check - * avoid from this. + * IPsec transport mode to the packet. This check avoid from this. * at present, if there is even a transport mode SA request in the * security policy, the kernel does not apply IPsec to the packet. * this check is not enough because the following case is valid. @@ -289,9 +284,9 @@ ip6_forward(struct mbuf *m, int srcrt) * ipsec6_proces_packet will send the packet using ip6_output */ error = ipsec6_process_packet(m, sp->req); - - KEY_FREESP(&sp); - + /* Release SP if an error occured */ + if (error != 0) + KEY_FREESP(&sp); if (error == EJUSTRETURN) { /* * We had a SP with a level of 'use' and no SA. We @@ -557,8 +552,6 @@ pass: if (mcopy) { u_long mtu; #ifdef IPSEC - struct secpolicy *sp; - int ipsecerror; size_t ipsechdrsiz; #endif /* IPSEC */ @@ -571,15 +564,10 @@ pass: * case, as we have the outgoing interface for * encapsulated packet as "rt->rt_ifp". */ - sp = ipsec_getpolicybyaddr(mcopy, IPSEC_DIR_OUTBOUND, - IP_FORWARDING, &ipsecerror); - if (sp) { - ipsechdrsiz = ipsec_hdrsiz(mcopy, - IPSEC_DIR_OUTBOUND, NULL); - if (ipsechdrsiz < mtu) - mtu -= ipsechdrsiz; - } - + ipsechdrsiz = ipsec_hdrsiz(mcopy, IPSEC_DIR_OUTBOUND, + NULL); + if (ipsechdrsiz < mtu) + mtu -= ipsechdrsiz; /* * if mtu becomes less than minimum MTU, * tell minimum MTU (and I'll need to fragment it). diff --git a/sys/netinet6/ip6_input.c b/sys/netinet6/ip6_input.c index b8a75c6..b6602d3 100644 --- a/sys/netinet6/ip6_input.c +++ b/sys/netinet6/ip6_input.c @@ -136,6 +136,7 @@ static struct netisr_handler ip6_nh = { VNET_DECLARE(struct callout, in6_tmpaddrtimer_ch); #define V_in6_tmpaddrtimer_ch VNET(in6_tmpaddrtimer_ch) +SYSCTL_DECL(_net_inet6_ip6); VNET_DEFINE(struct pfil_head, inet6_pfil_hook); VNET_PCPUSTAT_DEFINE(struct ip6stat, ip6stat); @@ -182,6 +183,9 @@ ip6_init(void) if ((i = pfil_head_register(&V_inet6_pfil_hook)) != 0) printf("%s: WARNING: unable to register pfil hook, " "error %d\n", __func__, i); + else + pfil_head_export_sysctl(&V_inet6_pfil_hook, + SYSCTL_STATIC_CHILDREN(_net_inet6_ip6)); scope6_init(); addrsel_policy_init(); diff --git a/sys/netinet6/ip6_ipsec.c b/sys/netinet6/ip6_ipsec.c index ea47566..ac49275 100644 --- a/sys/netinet6/ip6_ipsec.c +++ b/sys/netinet6/ip6_ipsec.c @@ -117,42 +117,18 @@ ip6_ipsec_filtertunnel(struct mbuf *m) /* * Check if this packet has an active SA and needs to be dropped instead * of forwarded. - * Called from ip6_input(). + * Called from ip6_forward(). * 1 = drop packet, 0 = forward packet. */ int ip6_ipsec_fwd(struct mbuf *m) { -#ifdef IPSEC - struct m_tag *mtag; - struct tdb_ident *tdbi; - struct secpolicy *sp; - int error; - mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL); - if (mtag != NULL) { - tdbi = (struct tdb_ident *)(mtag + 1); - sp = ipsec_getpolicy(tdbi, IPSEC_DIR_INBOUND); - } else { - sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_INBOUND, - IP_FORWARDING, &error); - } - if (sp == NULL) { /* NB: can happen if error */ - /*XXX error stat???*/ - DPRINTF(("%s: no SP for forwarding\n", __func__)); /*XXX*/ - return 1; - } - /* - * Check security policy against packet attributes. - */ - error = ipsec_in_reject(sp, m); - KEY_FREESP(&sp); - if (error) { - IP6STAT_INC(ip6s_cantforward); - return 1; - } -#endif /* IPSEC */ - return 0; +#ifdef IPSEC + return (ipsec6_in_reject(m, NULL)); +#else + return (0); +#endif /* !IPSEC */ } /* @@ -165,50 +141,17 @@ ip6_ipsec_fwd(struct mbuf *m) int ip6_ipsec_input(struct mbuf *m, int nxt) { + #ifdef IPSEC - struct m_tag *mtag; - struct tdb_ident *tdbi; - struct secpolicy *sp; - int error; /* * enforce IPsec policy checking if we are seeing last header. * note that we do not visit this with protocols with pcb layer * code - like udp/tcp/raw ip. */ - if ((inet6sw[ip6_protox[nxt]].pr_flags & PR_LASTHDR) != 0 && - ipsec6_in_reject(m, NULL)) { - - /* - * Check if the packet has already had IPsec processing - * done. If so, then just pass it along. This tag gets - * set during AH, ESP, etc. input handling, before the - * packet is returned to the ip input queue for delivery. - */ - mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL); - if (mtag != NULL) { - tdbi = (struct tdb_ident *)(mtag + 1); - sp = ipsec_getpolicy(tdbi, IPSEC_DIR_INBOUND); - } else { - sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_INBOUND, - IP_FORWARDING, &error); - } - if (sp != NULL) { - /* - * Check security policy against packet attributes. - */ - error = ipsec_in_reject(sp, m); - KEY_FREESP(&sp); - } else { - /* XXX error stat??? */ - error = EINVAL; - DPRINTF(("%s: no SP, packet discarded\n", __func__));/*XXX*/ - return 1; - } - if (error) - return 1; - } + if ((inet6sw[ip6_protox[nxt]].pr_flags & PR_LASTHDR) != 0) + return (ipsec6_in_reject(m, NULL)); #endif /* IPSEC */ - return 0; + return (0); } /* @@ -218,27 +161,26 @@ ip6_ipsec_input(struct mbuf *m, int nxt) */ int -ip6_ipsec_output(struct mbuf **m, struct inpcb *inp, int *flags, int *error, - struct ifnet **ifp) +ip6_ipsec_output(struct mbuf **m, struct inpcb *inp, int *error) { #ifdef IPSEC - struct secpolicy *sp = NULL; - struct tdb_ident *tdbi; - struct m_tag *mtag; - /* XXX int s; */ - mtag = m_tag_find(*m, PACKET_TAG_IPSEC_PENDING_TDB, NULL); - if (mtag != NULL) { - tdbi = (struct tdb_ident *)(mtag + 1); - sp = ipsec_getpolicy(tdbi, IPSEC_DIR_OUTBOUND); - if (sp == NULL) - *error = -EINVAL; /* force silent drop */ - m_tag_delete(*m, mtag); - } else { - sp = ipsec4_checkpolicy(*m, IPSEC_DIR_OUTBOUND, *flags, - error, inp); - } + struct secpolicy *sp; /* + * Check the security policy (SP) for the packet and, if + * required, do IPsec-related processing. There are two + * cases here; the first time a packet is sent through + * it will be untagged and handled by ipsec4_checkpolicy. + * If the packet is resubmitted to ip6_output (e.g. after + * AH, ESP, etc. processing), there will be a tag to bypass + * the lookup and related policy checking. + */ + if (m_tag_find(*m, PACKET_TAG_IPSEC_OUT_DONE, NULL) != NULL) { + *error = 0; + return (0); + } + sp = ipsec4_checkpolicy(*m, IPSEC_DIR_OUTBOUND, error, inp); + /* * There are four return cases: * sp != NULL apply IPsec policy * sp == NULL, error == 0 no IPsec handling needed @@ -246,36 +188,6 @@ ip6_ipsec_output(struct mbuf **m, struct inpcb *inp, int *flags, int *error, * sp == NULL, error != 0 discard packet, report error */ if (sp != NULL) { - /* Loop detection, check if ipsec processing already done */ - KASSERT(sp->req != NULL, ("ip_output: no ipsec request")); - for (mtag = m_tag_first(*m); mtag != NULL; - mtag = m_tag_next(*m, mtag)) { - if (mtag->m_tag_cookie != MTAG_ABI_COMPAT) - continue; - if (mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_DONE && - mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED) - continue; - /* - * Check if policy has no SA associated with it. - * This can happen when an SP has yet to acquire - * an SA; e.g. on first reference. If it occurs, - * then we let ipsec4_process_packet do its thing. - */ - if (sp->req->sav == NULL) - break; - tdbi = (struct tdb_ident *)(mtag + 1); - if (tdbi->spi == sp->req->sav->spi && - tdbi->proto == sp->req->sav->sah->saidx.proto && - bcmp(&tdbi->dst, &sp->req->sav->sah->saidx.dst, - sizeof (union sockaddr_union)) == 0) { - /* - * No IPsec processing is needed, free - * reference to SP. - */ - goto done; - } - } - /* * Do delayed checksums now because we send before * this is done in the normal processing path. @@ -300,7 +212,9 @@ ip6_ipsec_output(struct mbuf **m, struct inpcb *inp, int *flags, int *error, /* NB: callee frees mbuf */ *error = ipsec6_process_packet(*m, sp->req); - + /* Release SP if an error occured */ + if (*error != 0) + KEY_FREESP(&sp); if (*error == EJUSTRETURN) { /* * We had a SP with a level of 'use' and no SA. We @@ -332,18 +246,15 @@ ip6_ipsec_output(struct mbuf **m, struct inpcb *inp, int *flags, int *error, if (*error == -EINVAL) *error = 0; goto bad; - } else { - /* No IPsec processing for this packet. */ } + /* No IPsec processing for this packet. */ } done: if (sp != NULL) KEY_FREESP(&sp); return 0; reinjected: - if (sp != NULL) - KEY_FREESP(&sp); - return -1; + return (-1); bad: if (sp != NULL) KEY_FREESP(&sp); diff --git a/sys/netinet6/ip6_ipsec.h b/sys/netinet6/ip6_ipsec.h index a65b19a..e335d85 100644 --- a/sys/netinet6/ip6_ipsec.h +++ b/sys/netinet6/ip6_ipsec.h @@ -35,8 +35,7 @@ int ip6_ipsec_filtertunnel(struct mbuf *); int ip6_ipsec_fwd(struct mbuf *); int ip6_ipsec_input(struct mbuf *, int); -int ip6_ipsec_output(struct mbuf **, struct inpcb *, int *, int *, - struct ifnet **); +int ip6_ipsec_output(struct mbuf **, struct inpcb *, int *); #if 0 int ip6_ipsec_mtu(struct mbuf *); #endif diff --git a/sys/netinet6/ip6_output.c b/sys/netinet6/ip6_output.c index 4fbac61..09eab20 100644 --- a/sys/netinet6/ip6_output.c +++ b/sys/netinet6/ip6_output.c @@ -348,8 +348,9 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, /* * IPSec checking which handles several cases. * FAST IPSEC: We re-injected the packet. + * XXX: need scope argument. */ - switch(ip6_ipsec_output(&m, inp, &flags, &error, &ifp)) + switch(ip6_ipsec_output(&m, inp, &error)) { case 1: /* Bad packet */ goto freehdrs; diff --git a/sys/netinet6/raw_ip6.c b/sys/netinet6/raw_ip6.c index a37dfc5..54841d1 100644 --- a/sys/netinet6/raw_ip6.c +++ b/sys/netinet6/raw_ip6.c @@ -269,7 +269,6 @@ rip6_input(struct mbuf **mp, int *offp, int proto) */ if (n && ipsec6_in_reject(n, last)) { m_freem(n); - IPSEC6STAT_INC(ips_in_polvio); /* Do not inject data into pcb. */ } else #endif /* IPSEC */ @@ -301,7 +300,6 @@ rip6_input(struct mbuf **mp, int *offp, int proto) */ if ((last != NULL) && ipsec6_in_reject(m, last)) { m_freem(m); - IPSEC6STAT_INC(ips_in_polvio); IP6STAT_DEC(ip6s_delivered); /* Do not inject data into pcb. */ INP_RUNLOCK(last); diff --git a/sys/netinet6/udp6_usrreq.c b/sys/netinet6/udp6_usrreq.c index 17de377..92e6780 100644 --- a/sys/netinet6/udp6_usrreq.c +++ b/sys/netinet6/udp6_usrreq.c @@ -156,7 +156,6 @@ udp6_append(struct inpcb *inp, struct mbuf *n, int off, /* Check AH/ESP integrity. */ if (ipsec6_in_reject(n, inp)) { m_freem(n); - IPSEC6STAT_INC(ips_in_polvio); return; } #endif /* IPSEC */ diff --git a/sys/netipsec/esp.h b/sys/netipsec/esp.h index eb37397..8eb0963 100644 --- a/sys/netipsec/esp.h +++ b/sys/netipsec/esp.h @@ -42,8 +42,7 @@ struct esp { /*variable size, 32bit bound*/ /* Initialization Vector */ /*variable size*/ /* Payload data */ /*variable size*/ /* padding */ - /*8bit*/ /* pad size */ - /*8bit*/ /* next header */ + /*8bit*/ /* pad length */ /*8bit*/ /* next header */ /*variable size, 32bit bound*/ /* Authentication data (new IPsec) */ }; @@ -53,8 +52,7 @@ struct newesp { u_int32_t esp_seq; /* Sequence number */ /*variable size*/ /* (IV and) Payload data */ /*variable size*/ /* padding */ - /*8bit*/ /* pad size */ - /*8bit*/ /* next header */ + /*8bit*/ /* pad length */ /*8bit*/ /* next header */ /*variable size, 32bit bound*/ /* Authentication data */ }; diff --git a/sys/netipsec/ipip_var.h b/sys/netipsec/ipip_var.h deleted file mode 100644 index 02420c1..0000000 --- a/sys/netipsec/ipip_var.h +++ /dev/null @@ -1,71 +0,0 @@ -/* $FreeBSD$ */ -/* $OpenBSD: ip_ipip.h,v 1.5 2002/06/09 16:26:10 itojun Exp $ */ -/*- - * The authors of this code are John Ioannidis (ji@tla.org), - * Angelos D. Keromytis (kermit@csd.uch.gr) and - * Niels Provos (provos@physnet.uni-hamburg.de). - * - * The original version of this code was written by John Ioannidis - * for BSD/OS in Athens, Greece, in November 1995. - * - * Ported to OpenBSD and NetBSD, with additional transforms, in December 1996, - * by Angelos D. Keromytis. - * - * Additional transforms and features in 1997 and 1998 by Angelos D. Keromytis - * and Niels Provos. - * - * Additional features in 1999 by Angelos D. Keromytis. - * - * Copyright (C) 1995, 1996, 1997, 1998, 1999 by John Ioannidis, - * Angelos D. Keromytis and Niels Provos. - * Copyright (c) 2001, Angelos D. Keromytis. - * - * Permission to use, copy, and modify this software with or without fee - * is hereby granted, provided that this entire notice is included in - * all copies of any software which is or includes a copy or - * modification of this software. - * You may use this code under the GNU public license if you so wish. Please - * contribute changes back to the authors under this freer than GPL license - * so that we may further the use of strong encryption without limitations to - * all. - * - * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR - * IMPLIED WARRANTY. IN PARTICULAR, NONE OF THE AUTHORS MAKES ANY - * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE - * MERCHANTABILITY OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR - * PURPOSE. - */ - -#ifndef _NETINET_IPIP_H_ -#define _NETINET_IPIP_H_ - -/* - * IP-inside-IP processing. - * Not quite all the functionality of RFC-1853, but the main idea is there. - */ - -struct ipipstat { - uint64_t ipips_ipackets; /* total input packets */ - uint64_t ipips_opackets; /* total output packets */ - uint64_t ipips_hdrops; /* packet shorter than header shows */ - uint64_t ipips_qfull; - uint64_t ipips_ibytes; - uint64_t ipips_obytes; - uint64_t ipips_pdrops; /* packet dropped due to policy */ - uint64_t ipips_spoof; /* IP spoofing attempts */ - uint64_t ipips_family; /* Protocol family mismatch */ - uint64_t ipips_unspec; /* Missing tunnel endpoint address */ -}; - -#ifdef _KERNEL -#include <sys/counter.h> - -VNET_DECLARE(int, ipip_allow); -VNET_PCPUSTAT_DECLARE(struct ipipstat, ipipstat); - -#define IPIPSTAT_ADD(name, val) \ - VNET_PCPUSTAT_ADD(struct ipipstat, ipipstat, name, (val)) -#define IPIPSTAT_INC(name) IPIPSTAT_ADD(name, 1) -#define V_ipip_allow VNET(ipip_allow) -#endif /* _KERNEL */ -#endif /* _NETINET_IPIP_H_ */ diff --git a/sys/netipsec/ipsec.c b/sys/netipsec/ipsec.c index 93e37e8..bf02f93 100644 --- a/sys/netipsec/ipsec.c +++ b/sys/netipsec/ipsec.c @@ -117,11 +117,12 @@ VNET_DEFINE(int, ip4_esp_trans_deflev) = IPSEC_LEVEL_USE; VNET_DEFINE(int, ip4_esp_net_deflev) = IPSEC_LEVEL_USE; VNET_DEFINE(int, ip4_ah_trans_deflev) = IPSEC_LEVEL_USE; VNET_DEFINE(int, ip4_ah_net_deflev) = IPSEC_LEVEL_USE; -VNET_DEFINE(struct secpolicy, ip4_def_policy); /* ECN ignore(-1)/forbidden(0)/allowed(1) */ VNET_DEFINE(int, ip4_ipsec_ecn) = 0; VNET_DEFINE(int, ip4_esp_randpad) = -1; +static VNET_DEFINE(struct secpolicy, def_policy); +#define V_def_policy VNET(def_policy) /* * Crypto support requirements: * @@ -140,7 +141,7 @@ SYSCTL_DECL(_net_inet_ipsec); /* net.inet.ipsec */ SYSCTL_VNET_INT(_net_inet_ipsec, IPSECCTL_DEF_POLICY, def_policy, - CTLFLAG_RW, &VNET_NAME(ip4_def_policy).policy, 0, + CTLFLAG_RW, &VNET_NAME(def_policy).policy, 0, "IPsec default policy."); SYSCTL_VNET_INT(_net_inet_ipsec, IPSECCTL_DEF_ESP_TRANSLEV, esp_trans_deflev, CTLFLAG_RW, &VNET_NAME(ip4_esp_trans_deflev), 0, @@ -212,7 +213,7 @@ SYSCTL_DECL(_net_inet6_ipsec6); /* net.inet6.ipsec6 */ SYSCTL_VNET_INT(_net_inet6_ipsec6, IPSECCTL_DEF_POLICY, def_policy, CTLFLAG_RW, - &VNET_NAME(ip4_def_policy).policy, 0, + &VNET_NAME(def_policy).policy, 0, "IPsec default policy."); SYSCTL_VNET_INT(_net_inet6_ipsec6, IPSECCTL_DEF_ESP_TRANSLEV, esp_trans_deflev, CTLFLAG_RW, &VNET_NAME(ip6_esp_trans_deflev), 0, @@ -236,6 +237,7 @@ SYSCTL_VNET_PCPUSTAT(_net_inet6_ipsec6, IPSECCTL_STATS, ipsecstats, struct ipsecstat, ipsec6stat, "IPsec IPv6 statistics."); #endif /* INET6 */ +static int ipsec_in_reject(struct secpolicy *, struct mbuf *); static int ipsec_setspidx_inpcb(struct mbuf *, struct inpcb *); static int ipsec_setspidx(struct mbuf *, struct secpolicyindex *, int); static void ipsec4_get_ulp(struct mbuf *m, struct secpolicyindex *, int); @@ -261,7 +263,7 @@ key_allocsp_default(const char* where, int tag) KEYDEBUG(KEYDEBUG_IPSEC_STAMP, printf("DP key_allocsp_default from %s:%u\n", where, tag)); - sp = &V_ip4_def_policy; + sp = &V_def_policy; if (sp->policy != IPSEC_POLICY_DISCARD && sp->policy != IPSEC_POLICY_NONE) { ipseclog((LOG_INFO, "fixed system default policy: %d->%d\n", @@ -331,6 +333,12 @@ ipsec_getpolicybysock(struct mbuf *m, u_int dir, struct inpcb *inp, int *error) IPSEC_ASSERT(dir == IPSEC_DIR_INBOUND || dir == IPSEC_DIR_OUTBOUND, ("invalid direction %u", dir)); + if (!key_havesp(dir)) { + /* No SP found, use system default. */ + sp = KEY_ALLOCSP_DEFAULT(); + return (sp); + } + /* Set spidx in pcb. */ *error = ipsec_setspidx_inpcb(m, inp); if (*error) @@ -416,7 +424,7 @@ ipsec_getpolicybysock(struct mbuf *m, u_int dir, struct inpcb *inp, int *error) * others : error occured. */ struct secpolicy * -ipsec_getpolicybyaddr(struct mbuf *m, u_int dir, int flag, int *error) +ipsec_getpolicybyaddr(struct mbuf *m, u_int dir, int *error) { struct secpolicyindex spidx; struct secpolicy *sp; @@ -427,17 +435,16 @@ ipsec_getpolicybyaddr(struct mbuf *m, u_int dir, int flag, int *error) ("invalid direction %u", dir)); sp = NULL; + *error = 0; if (key_havesp(dir)) { /* Make an index to look for a policy. */ - *error = ipsec_setspidx(m, &spidx, - (flag & IP_FORWARDING) ? 0 : 1); + *error = ipsec_setspidx(m, &spidx, 0); if (*error != 0) { - DPRINTF(("%s: setpidx failed, dir %u flag %u\n", - __func__, dir, flag)); + DPRINTF(("%s: setpidx failed, dir %u\n", + __func__, dir)); return (NULL); } spidx.dir = dir; - sp = KEY_ALLOCSP(&spidx, dir); } if (sp == NULL) /* No SP found, use system default. */ @@ -447,14 +454,13 @@ ipsec_getpolicybyaddr(struct mbuf *m, u_int dir, int flag, int *error) } struct secpolicy * -ipsec4_checkpolicy(struct mbuf *m, u_int dir, u_int flag, int *error, - struct inpcb *inp) +ipsec4_checkpolicy(struct mbuf *m, u_int dir, int *error, struct inpcb *inp) { struct secpolicy *sp; *error = 0; if (inp == NULL) - sp = ipsec_getpolicybyaddr(m, dir, flag, error); + sp = ipsec_getpolicybyaddr(m, dir, error); else sp = ipsec_getpolicybysock(m, dir, inp, error); if (sp == NULL) { @@ -829,17 +835,13 @@ ipsec_init_policy(struct socket *so, struct inpcbpolicy **pcb_sp) ipsec_delpcbpolicy(new); return (ENOBUFS); } - new->sp_in->state = IPSEC_SPSTATE_ALIVE; new->sp_in->policy = IPSEC_POLICY_ENTRUST; - if ((new->sp_out = KEY_NEWSP()) == NULL) { KEY_FREESP(&new->sp_in); ipsec_delpcbpolicy(new); return (ENOBUFS); } - new->sp_out->state = IPSEC_SPSTATE_ALIVE; new->sp_out->policy = IPSEC_POLICY_ENTRUST; - *pcb_sp = new; return (0); @@ -928,7 +930,6 @@ ipsec_deepcopy_policy(struct secpolicy *src) } dst->req = newchain; - dst->state = src->state; dst->policy = src->policy; /* Do not touch the refcnt fields. */ @@ -980,8 +981,6 @@ ipsec_set_policy_internal(struct secpolicy **pcb_sp, int optname, if ((newsp = key_msg2sp(xpl, len, &error)) == NULL) return (error); - newsp->state = IPSEC_SPSTATE_ALIVE; - /* Clear old SP and set new SP. */ KEY_FREESP(pcb_sp); *pcb_sp = newsp; @@ -1198,7 +1197,7 @@ ipsec_get_reqlevel(struct ipsecrequest *isr) * 0: valid * 1: invalid */ -int +static int ipsec_in_reject(struct secpolicy *sp, struct mbuf *m) { struct ipsecrequest *isr; @@ -1266,6 +1265,9 @@ ipsec_in_reject(struct secpolicy *sp, struct mbuf *m) return (0); /* Valid. */ } +/* + * Non zero return value means security policy DISCARD or policy violation. + */ static int ipsec46_in_reject(struct mbuf *m, struct inpcb *inp) { @@ -1278,13 +1280,9 @@ ipsec46_in_reject(struct mbuf *m, struct inpcb *inp) IPSEC_ASSERT(m != NULL, ("null mbuf")); - /* - * Get SP for this packet. - * When we are called from ip_forward(), we call - * ipsec_getpolicybyaddr() with IP_FORWARDING flag. - */ + /* Get SP for this packet. */ if (inp == NULL) - sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_INBOUND, IP_FORWARDING, &error); + sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_INBOUND, &error); else sp = ipsec_getpolicybysock(m, IPSEC_DIR_INBOUND, inp, &error); @@ -1292,8 +1290,7 @@ ipsec46_in_reject(struct mbuf *m, struct inpcb *inp) result = ipsec_in_reject(sp, m); KEY_FREESP(&sp); } else { - result = 0; /* XXX Should be panic? - * -> No, there may be error. */ + result = 1; /* treat errors as policy violation */ } return (result); } @@ -1413,12 +1410,9 @@ ipsec_hdrsiz(struct mbuf *m, u_int dir, struct inpcb *inp) IPSEC_ASSERT(m != NULL, ("null mbuf")); - /* Get SP for this packet. - * When we are called from ip_forward(), we call - * ipsec_getpolicybyaddr() with IP_FORWARDING flag. - */ + /* Get SP for this packet. */ if (inp == NULL) - sp = ipsec_getpolicybyaddr(m, dir, IP_FORWARDING, &error); + sp = ipsec_getpolicybyaddr(m, dir, &error); else sp = ipsec_getpolicybysock(m, dir, inp, &error); @@ -1506,6 +1500,7 @@ ipsec_chkreplay(u_int32_t seq, struct secasvar *sav) int ipsec_updatereplay(u_int32_t seq, struct secasvar *sav) { + char buf[128]; struct secreplay *replay; u_int32_t diff; int fr; @@ -1585,7 +1580,8 @@ ok: return (1); ipseclog((LOG_WARNING, "%s: replay counter made %d cycle. %s\n", - __func__, replay->overflow, ipsec_logsastr(sav))); + __func__, replay->overflow, + ipsec_logsastr(sav, buf, sizeof(buf)))); } replay->count++; @@ -1616,67 +1612,37 @@ vshiftl(unsigned char *bitmap, int nbit, int wsize) } } -#ifdef INET -/* Return a printable string for the IPv4 address. */ -static char * -inet_ntoa4(struct in_addr ina) -{ - static char buf[4][4 * sizeof "123" + 4]; - unsigned char *ucp = (unsigned char *) &ina; - static int i = 3; - - /* XXX-BZ Returns static buffer. */ - i = (i + 1) % 4; - sprintf(buf[i], "%d.%d.%d.%d", ucp[0] & 0xff, ucp[1] & 0xff, - ucp[2] & 0xff, ucp[3] & 0xff); - return (buf[i]); -} -#endif - /* Return a printable string for the address. */ -char * -ipsec_address(union sockaddr_union* sa) +char* +ipsec_address(union sockaddr_union* sa, char *buf, socklen_t size) { -#ifdef INET6 - char ip6buf[INET6_ADDRSTRLEN]; -#endif switch (sa->sa.sa_family) { #ifdef INET case AF_INET: - return (inet_ntoa4(sa->sin.sin_addr)); + return (inet_ntop(AF_INET, &sa->sin.sin_addr, buf, size)); #endif /* INET */ #ifdef INET6 case AF_INET6: - return (ip6_sprintf(ip6buf, &sa->sin6.sin6_addr)); + return (inet_ntop(AF_INET6, &sa->sin6.sin6_addr, buf, size)); #endif /* INET6 */ default: return ("(unknown address family)"); } } -const char * -ipsec_logsastr(struct secasvar *sav) +char * +ipsec_logsastr(struct secasvar *sav, char *buf, size_t size) { - static char buf[256]; - char *p; - struct secasindex *saidx = &sav->sah->saidx; - - IPSEC_ASSERT(saidx->src.sa.sa_family == saidx->dst.sa.sa_family, - ("address family mismatch")); - - p = buf; - snprintf(buf, sizeof(buf), "SA(SPI=%u ", (u_int32_t)ntohl(sav->spi)); - while (p && *p) - p++; - /* NB: only use ipsec_address on one address at a time. */ - snprintf(p, sizeof (buf) - (p - buf), "src=%s ", - ipsec_address(&saidx->src)); - while (p && *p) - p++; - snprintf(p, sizeof (buf) - (p - buf), "dst=%s)", - ipsec_address(&saidx->dst)); + char sbuf[INET6_ADDRSTRLEN], dbuf[INET6_ADDRSTRLEN]; + + IPSEC_ASSERT(sav->sah->saidx.src.sa.sa_family == + sav->sah->saidx.dst.sa.sa_family, ("address family mismatch")); + snprintf(buf, size, "SA(SPI=%08lx src=%s dst=%s)", + (u_long)ntohl(sav->spi), + ipsec_address(&sav->sah->saidx.src, sbuf, sizeof(sbuf)), + ipsec_address(&sav->sah->saidx.dst, dbuf, sizeof(dbuf))); return (buf); } @@ -1705,14 +1671,15 @@ ipsec_dumpmbuf(struct mbuf *m) } static void -ipsec_init(const void *unused __unused) +def_policy_init(const void *unused __unused) { - SECPOLICY_LOCK_INIT(&V_ip4_def_policy); - V_ip4_def_policy.refcnt = 1; /* NB: disallow free. */ + bzero(&V_def_policy, sizeof(struct secpolicy)); + V_def_policy.policy = IPSEC_POLICY_NONE; + V_def_policy.refcnt = 1; } -VNET_SYSINIT(ipsec_init, SI_SUB_PROTO_DOMAININIT, SI_ORDER_ANY, ipsec_init, - NULL); +VNET_SYSINIT(def_policy_init, SI_SUB_PROTO_DOMAININIT, SI_ORDER_ANY, + def_policy_init, NULL); /* XXX This stuff doesn't belong here... */ diff --git a/sys/netipsec/ipsec.h b/sys/netipsec/ipsec.h index 6da3fc7..c05be36 100644 --- a/sys/netipsec/ipsec.h +++ b/sys/netipsec/ipsec.h @@ -81,21 +81,18 @@ struct secpolicyindex { /* Security Policy Data Base */ struct secpolicy { - LIST_ENTRY(secpolicy) chain; - struct mtx lock; + TAILQ_ENTRY(secpolicy) chain; - u_int refcnt; /* reference count */ struct secpolicyindex spidx; /* selector */ - u_int32_t id; /* It's unique number on the system. */ - u_int state; /* 0: dead, others: alive */ -#define IPSEC_SPSTATE_DEAD 0 -#define IPSEC_SPSTATE_ALIVE 1 - u_int policy; /* policy_type per pfkeyv2.h */ - u_int16_t scangen; /* scan generation # */ struct ipsecrequest *req; /* pointer to the ipsec request tree, */ /* if policy == IPSEC else this value == NULL.*/ - + u_int refcnt; /* reference count */ + u_int policy; /* policy_type per pfkeyv2.h */ + u_int state; +#define IPSEC_SPSTATE_DEAD 0 +#define IPSEC_SPSTATE_ALIVE 1 + u_int32_t id; /* It's unique number on the system. */ /* * lifetime handler. * the policy can be used without limitiation if both lifetime and @@ -109,13 +106,6 @@ struct secpolicy { long validtime; /* duration this policy is valid without use */ }; -#define SECPOLICY_LOCK_INIT(_sp) \ - mtx_init(&(_sp)->lock, "ipsec policy", NULL, MTX_DEF) -#define SECPOLICY_LOCK(_sp) mtx_lock(&(_sp)->lock) -#define SECPOLICY_UNLOCK(_sp) mtx_unlock(&(_sp)->lock) -#define SECPOLICY_LOCK_DESTROY(_sp) mtx_destroy(&(_sp)->lock) -#define SECPOLICY_LOCK_ASSERT(_sp) mtx_assert(&(_sp)->lock, MA_OWNED) - /* Request for IPsec */ struct ipsecrequest { struct ipsecrequest *next; @@ -271,17 +261,6 @@ struct ipsecstat { #ifdef _KERNEL #include <sys/counter.h> -struct ipsec_output_state { - struct mbuf *m; - struct route *ro; - struct sockaddr *dst; -}; - -struct ipsec_history { - int ih_proto; - u_int32_t ih_spi; -}; - VNET_DECLARE(int, ipsec_debug); #define V_ipsec_debug VNET(ipsec_debug) @@ -294,7 +273,6 @@ VNET_DECLARE(int, ipsec_integrity); #endif VNET_PCPUSTAT_DECLARE(struct ipsecstat, ipsec4stat); -VNET_DECLARE(struct secpolicy, ip4_def_policy); VNET_DECLARE(int, ip4_esp_trans_deflev); VNET_DECLARE(int, ip4_esp_net_deflev); VNET_DECLARE(int, ip4_ah_trans_deflev); @@ -307,7 +285,6 @@ VNET_DECLARE(int, crypto_support); #define IPSECSTAT_INC(name) \ VNET_PCPUSTAT_ADD(struct ipsecstat, ipsec4stat, name, 1) -#define V_ip4_def_policy VNET(ip4_def_policy) #define V_ip4_esp_trans_deflev VNET(ip4_esp_trans_deflev) #define V_ip4_esp_net_deflev VNET(ip4_esp_net_deflev) #define V_ip4_ah_trans_deflev VNET(ip4_ah_trans_deflev) @@ -328,16 +305,14 @@ extern void ipsec_delisr(struct ipsecrequest *); struct tdb_ident; extern struct secpolicy *ipsec_getpolicy(struct tdb_ident*, u_int); struct inpcb; -extern struct secpolicy *ipsec4_checkpolicy(struct mbuf *, u_int, u_int, +extern struct secpolicy *ipsec4_checkpolicy(struct mbuf *, u_int, int *, struct inpcb *); -extern struct secpolicy * ipsec_getpolicybyaddr(struct mbuf *, u_int, - int, int *); +extern struct secpolicy * ipsec_getpolicybyaddr(struct mbuf *, u_int, int *); struct inpcb; extern int ipsec_init_policy(struct socket *so, struct inpcbpolicy **); extern int ipsec_copy_policy(struct inpcbpolicy *, struct inpcbpolicy *); extern u_int ipsec_get_reqlevel(struct ipsecrequest *); -extern int ipsec_in_reject(struct secpolicy *, struct mbuf *); extern int ipsec_set_policy(struct inpcb *inp, int optname, caddr_t request, size_t len, struct ucred *cred); @@ -355,8 +330,8 @@ extern size_t ipsec_hdrsiz(struct mbuf *, u_int, struct inpcb *); extern size_t ipsec_hdrsiz_tcp(struct tcpcb *); union sockaddr_union; -extern char * ipsec_address(union sockaddr_union* sa); -extern const char *ipsec_logsastr(struct secasvar *); +extern char *ipsec_address(union sockaddr_union *, char *, socklen_t); +extern char *ipsec_logsastr(struct secasvar *, char *, size_t); extern void ipsec_dumpmbuf(struct mbuf *); @@ -366,11 +341,10 @@ extern void ah4_ctlinput(int cmd, struct sockaddr *sa, void *); extern void esp4_input(struct mbuf *m, int off); extern void esp4_ctlinput(int cmd, struct sockaddr *sa, void *); extern void ipcomp4_input(struct mbuf *m, int off); -extern int ipsec4_common_input(struct mbuf *m, ...); +extern int ipsec_common_input(struct mbuf *m, int, int, int, int); extern int ipsec4_common_input_cb(struct mbuf *m, struct secasvar *sav, - int skip, int protoff, struct m_tag *mt); -extern int ipsec4_process_packet(struct mbuf *, struct ipsecrequest *, - int, int); + int skip, int protoff); +extern int ipsec4_process_packet(struct mbuf *, struct ipsecrequest *); extern int ipsec_process_done(struct mbuf *, struct ipsecrequest *); extern struct mbuf *ipsec_copypkt(struct mbuf *); diff --git a/sys/netipsec/ipsec6.h b/sys/netipsec/ipsec6.h index 5179939..38ac114 100644 --- a/sys/netipsec/ipsec6.h +++ b/sys/netipsec/ipsec6.h @@ -64,7 +64,7 @@ extern int ipsec6_in_reject(struct mbuf *, struct inpcb *); struct m_tag; extern int ipsec6_common_input(struct mbuf **mp, int *offp, int proto); extern int ipsec6_common_input_cb(struct mbuf *m, struct secasvar *sav, - int skip, int protoff, struct m_tag *mt); + int skip, int protoff); extern void esp6_ctlinput(int, struct sockaddr *, void *); extern int ipsec6_process_packet(struct mbuf *, struct ipsecrequest *); #endif /*_KERNEL*/ diff --git a/sys/netipsec/ipsec_input.c b/sys/netipsec/ipsec_input.c index 66de530..8fb74ff 100644 --- a/sys/netipsec/ipsec_input.c +++ b/sys/netipsec/ipsec_input.c @@ -117,9 +117,10 @@ static void ipsec4_common_ctlinput(int, struct sockaddr *, void *, int); * and call the appropriate transform. The transform callback * takes care of further processing (like ingress filtering). */ -static int +int ipsec_common_input(struct mbuf *m, int skip, int protoff, int af, int sproto) { + char buf[INET6_ADDRSTRLEN]; union sockaddr_union dst_address; struct secasvar *sav; u_int32_t spi; @@ -194,6 +195,13 @@ ipsec_common_input(struct mbuf *m, int skip, int protoff, int af, int sproto) m_copydata(m, offsetof(struct ip6_hdr, ip6_dst), sizeof(struct in6_addr), (caddr_t) &dst_address.sin6.sin6_addr); + /* We keep addresses in SADB without embedded scope id */ + if (IN6_IS_SCOPE_LINKLOCAL(&dst_address.sin6.sin6_addr)) { + /* XXX: sa6_recoverscope() */ + dst_address.sin6.sin6_scope_id = + ntohs(dst_address.sin6.sin6_addr.s6_addr16[1]); + dst_address.sin6.sin6_addr.s6_addr16[1] = 0; + } break; #endif /* INET6 */ default: @@ -207,8 +215,8 @@ ipsec_common_input(struct mbuf *m, int skip, int protoff, int af, int sproto) sav = KEY_ALLOCSA(&dst_address, sproto, spi); if (sav == NULL) { DPRINTF(("%s: no key association found for SA %s/%08lx/%u\n", - __func__, ipsec_address(&dst_address), - (u_long) ntohl(spi), sproto)); + __func__, ipsec_address(&dst_address, buf, sizeof(buf)), + (u_long) ntohl(spi), sproto)); IPSEC_ISTAT(sproto, notdb); m_freem(m); return ENOENT; @@ -216,8 +224,8 @@ ipsec_common_input(struct mbuf *m, int skip, int protoff, int af, int sproto) if (sav->tdb_xform == NULL) { DPRINTF(("%s: attempted to use uninitialized SA %s/%08lx/%u\n", - __func__, ipsec_address(&dst_address), - (u_long) ntohl(spi), sproto)); + __func__, ipsec_address(&dst_address, buf, sizeof(buf)), + (u_long) ntohl(spi), sproto)); IPSEC_ISTAT(sproto, noxform); KEY_FREESAV(&sav); m_freem(m); @@ -234,28 +242,11 @@ ipsec_common_input(struct mbuf *m, int skip, int protoff, int af, int sproto) } #ifdef INET -/* - * Common input handler for IPv4 AH, ESP, and IPCOMP. - */ -int -ipsec4_common_input(struct mbuf *m, ...) -{ - va_list ap; - int off, nxt; - - va_start(ap, m); - off = va_arg(ap, int); - nxt = va_arg(ap, int); - va_end(ap); - - return ipsec_common_input(m, off, offsetof(struct ip, ip_p), - AF_INET, nxt); -} - void ah4_input(struct mbuf *m, int off) { - ipsec4_common_input(m, off, IPPROTO_AH); + ipsec_common_input(m, off, offsetof(struct ip, ip_p), + AF_INET, IPPROTO_AH); } void ah4_ctlinput(int cmd, struct sockaddr *sa, void *v) @@ -268,7 +259,8 @@ ah4_ctlinput(int cmd, struct sockaddr *sa, void *v) void esp4_input(struct mbuf *m, int off) { - ipsec4_common_input(m, off, IPPROTO_ESP); + ipsec_common_input(m, off, offsetof(struct ip, ip_p), + AF_INET, IPPROTO_ESP); } void esp4_ctlinput(int cmd, struct sockaddr *sa, void *v) @@ -281,7 +273,8 @@ esp4_ctlinput(int cmd, struct sockaddr *sa, void *v) void ipcomp4_input(struct mbuf *m, int off) { - ipsec4_common_input(m, off, IPPROTO_IPCOMP); + ipsec_common_input(m, off, offsetof(struct ip, ip_p), + AF_INET, IPPROTO_IPCOMP); } /* @@ -291,9 +284,10 @@ ipcomp4_input(struct mbuf *m, int off) * the processed packet. */ int -ipsec4_common_input_cb(struct mbuf *m, struct secasvar *sav, - int skip, int protoff, struct m_tag *mt) +ipsec4_common_input_cb(struct mbuf *m, struct secasvar *sav, int skip, + int protoff) { + char buf[INET6_ADDRSTRLEN]; int prot, af, sproto, isr_prot; struct ip *ip; struct m_tag *mtag; @@ -332,8 +326,8 @@ ipsec4_common_input_cb(struct mbuf *m, struct secasvar *sav, */ if (m->m_len < skip && (m = m_pullup(m, skip)) == NULL) { DPRINTF(("%s: processing failed for SA %s/%08lx\n", - __func__, ipsec_address(&sav->sah->saidx.dst), - (u_long) ntohl(sav->spi))); + __func__, ipsec_address(&sav->sah->saidx.dst, + buf, sizeof(buf)), (u_long) ntohl(sav->spi))); IPSEC_ISTAT(sproto, hdrops); error = ENOBUFS; goto bad; @@ -356,6 +350,7 @@ ipsec4_common_input_cb(struct mbuf *m, struct secasvar *sav, ipsec_bpf(m, sav, AF_INET, ENC_IN|ENC_BEFORE); if ((error = ipsec_filter(&m, PFIL_IN, ENC_IN|ENC_BEFORE)) != 0) return (error); + ip = mtod(m, struct ip *); #endif /* DEV_ENC */ /* IP-in-IP encapsulation */ @@ -449,13 +444,9 @@ ipsec4_common_input_cb(struct mbuf *m, struct secasvar *sav, /* * Record what we've done to the packet (under what SA it was - * processed). If we've been passed an mtag, it means the packet - * was already processed by an ethernet/crypto combo card and - * thus has a tag attached with all the right information, but - * with a PACKET_TAG_IPSEC_IN_CRYPTO_DONE as opposed to - * PACKET_TAG_IPSEC_IN_DONE type; in that case, just change the type. + * processed). */ - if (mt == NULL && sproto != IPPROTO_IPCOMP) { + if (sproto != IPPROTO_IPCOMP) { mtag = m_tag_get(PACKET_TAG_IPSEC_IN_DONE, sizeof(struct tdb_ident), M_NOWAIT); if (mtag == NULL) { @@ -474,9 +465,6 @@ ipsec4_common_input_cb(struct mbuf *m, struct secasvar *sav, tdbi->alg_enc = sav->alg_enc; m_tag_prepend(m, mtag); - } else if (mt != NULL) { - mt->m_tag_id = PACKET_TAG_IPSEC_IN_DONE; - /* XXX do we need to mark m_flags??? */ } key_sa_recordxfer(sav, m); /* record data transfer */ @@ -593,15 +581,16 @@ ipsec6_common_input(struct mbuf **mp, int *offp, int proto) * filtering and other sanity checks on the processed packet. */ int -ipsec6_common_input_cb(struct mbuf *m, struct secasvar *sav, int skip, int protoff, - struct m_tag *mt) +ipsec6_common_input_cb(struct mbuf *m, struct secasvar *sav, int skip, + int protoff) { + char buf[INET6_ADDRSTRLEN]; int prot, af, sproto; struct ip6_hdr *ip6; struct m_tag *mtag; struct tdb_ident *tdbi; struct secasindex *saidx; - int nxt; + int nxt, isr_prot; u_int8_t nxt8; int error, nest; #ifdef notyet @@ -632,8 +621,8 @@ ipsec6_common_input_cb(struct mbuf *m, struct secasvar *sav, int skip, int proto (m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) { DPRINTF(("%s: processing failed for SA %s/%08lx\n", - __func__, ipsec_address(&sav->sah->saidx.dst), - (u_long) ntohl(sav->spi))); + __func__, ipsec_address(&sav->sah->saidx.dst, buf, + sizeof(buf)), (u_long) ntohl(sav->spi))); IPSEC_ISTAT(sproto, hdrops); error = EACCES; @@ -738,13 +727,9 @@ ipsec6_common_input_cb(struct mbuf *m, struct secasvar *sav, int skip, int proto /* * Record what we've done to the packet (under what SA it was - * processed). If we've been passed an mtag, it means the packet - * was already processed by an ethernet/crypto combo card and - * thus has a tag attached with all the right information, but - * with a PACKET_TAG_IPSEC_IN_CRYPTO_DONE as opposed to - * PACKET_TAG_IPSEC_IN_DONE type; in that case, just change the type. + * processed). */ - if (mt == NULL && sproto != IPPROTO_IPCOMP) { + if (sproto != IPPROTO_IPCOMP) { mtag = m_tag_get(PACKET_TAG_IPSEC_IN_DONE, sizeof(struct tdb_ident), M_NOWAIT); if (mtag == NULL) { @@ -763,10 +748,6 @@ ipsec6_common_input_cb(struct mbuf *m, struct secasvar *sav, int skip, int proto tdbi->alg_enc = sav->alg_enc; m_tag_prepend(m, mtag); - } else { - if (mt != NULL) - mt->m_tag_id = PACKET_TAG_IPSEC_IN_DONE; - /* XXX do we need to mark m_flags??? */ } key_sa_recordxfer(sav, m); @@ -785,6 +766,35 @@ ipsec6_common_input_cb(struct mbuf *m, struct secasvar *sav, int skip, int proto if ((error = ipsec_filter(&m, PFIL_IN, ENC_IN|ENC_AFTER)) != 0) return (error); #endif /* DEV_ENC */ + if (skip == 0) { + /* + * We stripped outer IPv6 header. + * Now we should requeue decrypted packet via netisr. + */ + switch (prot) { +#ifdef INET + case IPPROTO_IPIP: + isr_prot = NETISR_IP; + break; +#endif + case IPPROTO_IPV6: + isr_prot = NETISR_IPV6; + break; + default: + DPRINTF(("%s: cannot handle inner ip proto %d\n", + __func__, prot)); + IPSEC_ISTAT(sproto, nopf); + error = EPFNOSUPPORT; + goto bad; + } + error = netisr_queue_src(isr_prot, (uintptr_t)sav->spi, m); + if (error) { + IPSEC_ISTAT(sproto, qfull); + DPRINTF(("%s: queue full; proto %u packet dropped\n", + __func__, sproto)); + } + return (error); + } /* * See the end of ip6_input for this logic. * IPPROTO_IPV[46] case will be processed just like other ones diff --git a/sys/netipsec/ipsec_output.c b/sys/netipsec/ipsec_output.c index 442fb7a..7fc61ac 100644 --- a/sys/netipsec/ipsec_output.c +++ b/sys/netipsec/ipsec_output.c @@ -60,6 +60,7 @@ #include <netinet/ip6.h> #ifdef INET6 #include <netinet6/ip6_var.h> +#include <netinet6/scope6_var.h> #endif #include <netinet/in_pcb.h> #ifdef INET6 @@ -102,6 +103,7 @@ ipsec_process_done(struct mbuf *m, struct ipsecrequest *isr) IPSEC_ASSERT(m != NULL, ("null mbuf")); IPSEC_ASSERT(isr != NULL, ("null ISR")); + IPSEC_ASSERT(isr->sp != NULL, ("NULL isr->sp")); sav = isr->sav; IPSEC_ASSERT(sav != NULL, ("null SA")); IPSEC_ASSERT(sav->sah != NULL, ("null SAH")); @@ -155,12 +157,18 @@ ipsec_process_done(struct mbuf *m, struct ipsecrequest *isr) tdbi->spi = sav->spi; m_tag_prepend(m, mtag); + key_sa_recordxfer(sav, m); /* record data transfer */ + /* * If there's another (bundled) SA to apply, do so. * Note that this puts a burden on the kernel stack size. * If this is a problem we'll need to introduce a queue * to set the packet on so we can unwind the stack before * doing further processing. + * + * If ipsec[46]_process_packet() will successfully queue + * the request, we need to take additional reference to SP, + * because xform callback will release reference. */ if (isr->next) { /* XXX-BZ currently only support same AF bundles. */ @@ -168,7 +176,11 @@ ipsec_process_done(struct mbuf *m, struct ipsecrequest *isr) #ifdef INET case AF_INET: IPSECSTAT_INC(ips_out_bundlesa); - return ipsec4_process_packet(m, isr->next, 0, 0); + key_addref(isr->sp); + error = ipsec4_process_packet(m, isr->next); + if (error != 0) + KEY_FREESP(&isr->sp); + return (error); /* NOTREACHED */ #endif #ifdef notyet @@ -176,7 +188,11 @@ ipsec_process_done(struct mbuf *m, struct ipsecrequest *isr) case AF_INET6: /* XXX */ IPSEC6STAT_INC(ips_out_bundlesa); - return ipsec6_process_packet(m, isr->next); + key_addref(isr->sp); + error = ipsec6_process_packet(m, isr->next); + if (error != 0) + KEY_FREESP(&isr->sp); + return (error); /* NOTREACHED */ #endif /* INET6 */ #endif @@ -187,12 +203,10 @@ ipsec_process_done(struct mbuf *m, struct ipsecrequest *isr) goto bad; } } - key_sa_recordxfer(sav, m); /* record data transfer */ /* * We're done with IPsec processing, transmit the packet using the - * appropriate network protocol (IP or IPv6). SPD lookup will be - * performed again there. + * appropriate network protocol (IP or IPv6). */ switch (saidx->dst.sa.sa_family) { #ifdef INET @@ -418,17 +432,117 @@ bad: #undef IPSEC_OSTAT } +static int +ipsec_encap(struct mbuf **mp, struct secasindex *saidx) +{ +#ifdef INET6 + struct ip6_hdr *ip6; +#endif + struct ip *ip; + int setdf; + uint8_t itos, proto; + + ip = mtod(*mp, struct ip *); + switch (ip->ip_v) { +#ifdef INET + case IPVERSION: + proto = IPPROTO_IPIP; + /* + * Collect IP_DF state from the inner header + * and honor system-wide control of how to handle it. + */ + switch (V_ip4_ipsec_dfbit) { + case 0: /* clear in outer header */ + case 1: /* set in outer header */ + setdf = V_ip4_ipsec_dfbit; + break; + default:/* propagate to outer header */ + setdf = (ip->ip_off & ntohs(IP_DF)) != 0; + } + itos = ip->ip_tos; + break; +#endif +#ifdef INET6 + case (IPV6_VERSION >> 4): + proto = IPPROTO_IPV6; + ip6 = mtod(*mp, struct ip6_hdr *); + itos = (ntohl(ip6->ip6_flow) >> 20) & 0xff; + setdf = V_ip4_ipsec_dfbit ? 1: 0; + /* scoped address handling */ + in6_clearscope(&ip6->ip6_src); + in6_clearscope(&ip6->ip6_dst); + break; +#endif + default: + return (EAFNOSUPPORT); + } + switch (saidx->dst.sa.sa_family) { +#ifdef INET + case AF_INET: + if (saidx->src.sa.sa_family != AF_INET || + saidx->src.sin.sin_addr.s_addr == INADDR_ANY || + saidx->dst.sin.sin_addr.s_addr == INADDR_ANY) + return (EINVAL); + M_PREPEND(*mp, sizeof(struct ip), M_NOWAIT); + if (*mp == NULL) + return (ENOBUFS); + ip = mtod(*mp, struct ip *); + ip->ip_v = IPVERSION; + ip->ip_hl = sizeof(struct ip) >> 2; + ip->ip_p = proto; + ip->ip_len = htons((*mp)->m_pkthdr.len); + ip->ip_ttl = V_ip_defttl; + ip->ip_sum = 0; + ip->ip_off = setdf ? htons(IP_DF): 0; + ip->ip_src = saidx->src.sin.sin_addr; + ip->ip_dst = saidx->dst.sin.sin_addr; + ip_ecn_ingress(V_ip4_ipsec_ecn, &ip->ip_tos, &itos); + ip->ip_id = ip_newid(); + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + if (saidx->src.sa.sa_family != AF_INET6 || + IN6_IS_ADDR_UNSPECIFIED(&saidx->src.sin6.sin6_addr) || + IN6_IS_ADDR_UNSPECIFIED(&saidx->dst.sin6.sin6_addr)) + return (EINVAL); + M_PREPEND(*mp, sizeof(struct ip6_hdr), M_NOWAIT); + if (*mp == NULL) + return (ENOBUFS); + ip6 = mtod(*mp, struct ip6_hdr *); + ip6->ip6_flow = 0; + ip6->ip6_vfc = IPV6_VERSION; + ip6->ip6_hlim = V_ip6_defhlim; + ip6->ip6_nxt = proto; + ip6->ip6_dst = saidx->dst.sin6.sin6_addr; + /* For link-local address embed scope zone id */ + if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst)) + ip6->ip6_dst.s6_addr16[1] = + htons(saidx->dst.sin6.sin6_scope_id & 0xffff); + ip6->ip6_src = saidx->src.sin6.sin6_addr; + if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) + ip6->ip6_src.s6_addr16[1] = + htons(saidx->src.sin6.sin6_scope_id & 0xffff); + ip6->ip6_plen = htons((*mp)->m_pkthdr.len - sizeof(*ip6)); + ip_ecn_ingress(V_ip6_ipsec_ecn, &proto, &itos); + ip6->ip6_flow |= htonl((uint32_t)proto << 20); + break; +#endif /* INET6 */ + default: + return (EAFNOSUPPORT); + } + return (0); +} + #ifdef INET /* * IPsec output logic for IPv4. */ int -ipsec4_process_packet( - struct mbuf *m, - struct ipsecrequest *isr, - int flags, - int tunalready) +ipsec4_process_packet(struct mbuf *m, struct ipsecrequest *isr) { + char sbuf[INET6_ADDRSTRLEN], dbuf[INET6_ADDRSTRLEN]; + union sockaddr_union *dst; struct secasindex saidx; struct secasvar *sav; struct ip *ip; @@ -447,7 +561,13 @@ ipsec4_process_packet( } sav = isr->sav; - + if (m->m_len < sizeof(struct ip) && + (m = m_pullup(m, sizeof (struct ip))) == NULL) { + error = ENOBUFS; + goto bad; + } + ip = mtod(m, struct ip *); + dst = &sav->sah->saidx.dst; #ifdef DEV_ENC encif->if_opackets++; encif->if_obytes += m->m_pkthdr.len; @@ -457,99 +577,29 @@ ipsec4_process_packet( /* pass the mbuf to enc0 for packet filtering */ if ((error = ipsec_filter(&m, PFIL_OUT, ENC_OUT|ENC_BEFORE)) != 0) goto bad; + ip = mtod(m, struct ip *); #endif - - if (!tunalready) { - union sockaddr_union *dst = &sav->sah->saidx.dst; - int setdf; - - /* - * Collect IP_DF state from the outer header. - */ - if (dst->sa.sa_family == AF_INET) { - if (m->m_len < sizeof (struct ip) && - (m = m_pullup(m, sizeof (struct ip))) == NULL) { - error = ENOBUFS; - goto bad; - } - ip = mtod(m, struct ip *); - /* Honor system-wide control of how to handle IP_DF */ - switch (V_ip4_ipsec_dfbit) { - case 0: /* clear in outer header */ - case 1: /* set in outer header */ - setdf = V_ip4_ipsec_dfbit; - break; - default: /* propagate to outer header */ - setdf = ntohs(ip->ip_off & IP_DF); - break; - } - } else { - ip = NULL; /* keep compiler happy */ - setdf = 0; - } - /* Do the appropriate encapsulation, if necessary */ - if (isr->saidx.mode == IPSEC_MODE_TUNNEL || /* Tunnel requ'd */ - dst->sa.sa_family != AF_INET || /* PF mismatch */ -#if 0 - (sav->flags & SADB_X_SAFLAGS_TUNNEL) || /* Tunnel requ'd */ - sav->tdb_xform->xf_type == XF_IP4 || /* ditto */ -#endif - (dst->sa.sa_family == AF_INET && /* Proxy */ - dst->sin.sin_addr.s_addr != INADDR_ANY && - dst->sin.sin_addr.s_addr != ip->ip_dst.s_addr)) { - struct mbuf *mp; - - /* Fix IPv4 header checksum and length */ - if (m->m_len < sizeof (struct ip) && - (m = m_pullup(m, sizeof (struct ip))) == NULL) { - error = ENOBUFS; - goto bad; - } - ip = mtod(m, struct ip *); - if (ip->ip_v == IPVERSION) { - ip->ip_len = htons(m->m_pkthdr.len); - ip->ip_sum = 0; - ip->ip_sum = in_cksum(m, ip->ip_hl << 2); - } - - /* Encapsulate the packet */ - error = ipip_output(m, isr, &mp, 0, 0); - if (mp == NULL && !error) { - /* Should never happen. */ - DPRINTF(("%s: ipip_output returns no mbuf and " - "no error!", __func__)); - error = EFAULT; - } - if (error) { - if (mp) { - /* XXX: Should never happen! */ - m_freem(mp); - } - m = NULL; /* ipip_output() already freed it */ - goto bad; - } - m = mp, mp = NULL; - /* - * ipip_output clears IP_DF in the new header. If - * we need to propagate IP_DF from the outer header, - * then we have to do it here. - * - * XXX shouldn't assume what ipip_output does. - */ - if (dst->sa.sa_family == AF_INET && setdf) { - if (m->m_len < sizeof (struct ip) && - (m = m_pullup(m, sizeof (struct ip))) == NULL) { - error = ENOBUFS; - goto bad; - } - ip = mtod(m, struct ip *); - ip->ip_off = ntohs(ip->ip_off); - ip->ip_off |= IP_DF; - ip->ip_off = htons(ip->ip_off); - } + /* Do the appropriate encapsulation, if necessary */ + if (isr->saidx.mode == IPSEC_MODE_TUNNEL || /* Tunnel requ'd */ + dst->sa.sa_family != AF_INET || /* PF mismatch */ + (dst->sa.sa_family == AF_INET && /* Proxy */ + dst->sin.sin_addr.s_addr != INADDR_ANY && + dst->sin.sin_addr.s_addr != ip->ip_dst.s_addr)) { + /* Fix IPv4 header checksum and length */ + ip->ip_len = htons(m->m_pkthdr.len); + ip->ip_sum = 0; + ip->ip_sum = in_cksum(m, ip->ip_hl << 2); + error = ipsec_encap(&m, &sav->sah->saidx); + if (error != 0) { + DPRINTF(("%s: encapsulation for SA %s->%s " + "SPI 0x%08x failed with error %d\n", __func__, + ipsec_address(&sav->sah->saidx.src, sbuf, + sizeof(sbuf)), + ipsec_address(&sav->sah->saidx.dst, dbuf, + sizeof(dbuf)), ntohl(sav->spi), error)); + goto bad; } } - #ifdef DEV_ENC /* pass the mbuf to enc0 for bpf processing */ ipsec_bpf(m, sav, sav->sah->saidx.dst.sa.sa_family, ENC_OUT|ENC_AFTER); @@ -561,40 +611,33 @@ ipsec4_process_packet( /* * Dispatch to the appropriate IPsec transform logic. The * packet will be returned for transmission after crypto - * processing, etc. are completed. For encapsulation we - * bypass this call because of the explicit call done above - * (necessary to deal with IP_DF handling for IPv4). + * processing, etc. are completed. * * NB: m & sav are ``passed to caller'' who's reponsible for * for reclaiming their resources. */ - if (sav->tdb_xform->xf_type != XF_IP4) { - union sockaddr_union *dst = &sav->sah->saidx.dst; - switch(dst->sa.sa_family) { - case AF_INET: - ip = mtod(m, struct ip *); - i = ip->ip_hl << 2; - off = offsetof(struct ip, ip_p); - break; + switch(dst->sa.sa_family) { + case AF_INET: + ip = mtod(m, struct ip *); + i = ip->ip_hl << 2; + off = offsetof(struct ip, ip_p); + break; #ifdef INET6 - case AF_INET6: - i = sizeof(struct ip6_hdr); - off = offsetof(struct ip6_hdr, ip6_nxt); - break; + case AF_INET6: + i = sizeof(struct ip6_hdr); + off = offsetof(struct ip6_hdr, ip6_nxt); + break; #endif /* INET6 */ - default: + default: DPRINTF(("%s: unsupported protocol family %u\n", - __func__, dst->sa.sa_family)); - error = EPFNOSUPPORT; - IPSECSTAT_INC(ips_out_inval); - goto bad; - } - error = (*sav->tdb_xform->xf_output)(m, isr, NULL, i, off); - } else { - error = ipsec_process_done(m, isr); + __func__, dst->sa.sa_family)); + error = EPFNOSUPPORT; + IPSECSTAT_INC(ips_out_inval); + goto bad; } + error = (*sav->tdb_xform->xf_output)(m, isr, NULL, i, off); IPSECREQUEST_UNLOCK(isr); - return error; + return (error); bad: if (isr) IPSECREQUEST_UNLOCK(isr); @@ -622,11 +665,9 @@ in6_sa_equal_addrwithscope(const struct sockaddr_in6 *sa, const struct in6_addr * IPsec output logic for IPv6. */ int -ipsec6_process_packet( - struct mbuf *m, - struct ipsecrequest *isr - ) +ipsec6_process_packet(struct mbuf *m, struct ipsecrequest *isr) { + char sbuf[INET6_ADDRSTRLEN], dbuf[INET6_ADDRSTRLEN]; struct secasindex saidx; struct secasvar *sav; struct ip6_hdr *ip6; @@ -644,7 +685,6 @@ ipsec6_process_packet( goto bad; return EJUSTRETURN; } - sav = isr->sav; dst = &sav->sah->saidx.dst; @@ -659,6 +699,7 @@ ipsec6_process_packet( /* pass the mbuf to enc0 for packet filtering */ if ((error = ipsec_filter(&m, PFIL_OUT, ENC_OUT|ENC_BEFORE)) != 0) goto bad; + ip6 = mtod(m, struct ip6_hdr *); #endif /* DEV_ENC */ /* Do the appropriate encapsulation, if necessary */ @@ -668,42 +709,21 @@ ipsec6_process_packet( (!IN6_IS_ADDR_UNSPECIFIED(&dst->sin6.sin6_addr)) && (!in6_sa_equal_addrwithscope(&dst->sin6, &ip6->ip6_dst)))) { - struct mbuf *mp; - - /* Fix IPv6 header payload length. */ - if (m->m_len < sizeof(struct ip6_hdr)) - if ((m = m_pullup(m,sizeof(struct ip6_hdr))) == NULL) { - error = ENOBUFS; - goto bad; - } - if (m->m_pkthdr.len - sizeof(*ip6) > IPV6_MAXPACKET) { /* No jumbogram support. */ error = ENXIO; /*XXX*/ goto bad; } - - /* Encapsulate the packet */ - error = ipip_output(m, isr, &mp, 0, 0); - if (mp == NULL && !error) { - /* Should never happen. */ - DPRINTF(("ipsec6_process_packet: ipip_output " - "returns no mbuf and no error!")); - error = EFAULT; - goto bad; - } - - if (error) { - if (mp) { - /* XXX: Should never happen! */ - m_freem(mp); - } - m = NULL; /* ipip_output() already freed it */ + error = ipsec_encap(&m, &sav->sah->saidx); + if (error != 0) { + DPRINTF(("%s: encapsulation for SA %s->%s " + "SPI 0x%08x failed with error %d\n", __func__, + ipsec_address(&sav->sah->saidx.src, sbuf, + sizeof(sbuf)), + ipsec_address(&sav->sah->saidx.dst, dbuf, + sizeof(dbuf)), ntohl(sav->spi), error)); goto bad; } - - m = mp; - mp = NULL; } #ifdef DEV_ENC diff --git a/sys/netipsec/key.c b/sys/netipsec/key.c index a3e3d76..7705a63 100644 --- a/sys/netipsec/key.c +++ b/sys/netipsec/key.c @@ -48,6 +48,7 @@ #include <sys/domain.h> #include <sys/protosw.h> #include <sys/malloc.h> +#include <sys/rmlock.h> #include <sys/socket.h> #include <sys/socketvar.h> #include <sys/sysctl.h> @@ -140,16 +141,19 @@ static VNET_DEFINE(u_int32_t, acq_seq) = 0; #define V_acq_seq VNET(acq_seq) /* SPD */ -static VNET_DEFINE(LIST_HEAD(_sptree, secpolicy), sptree[IPSEC_DIR_MAX]); +static VNET_DEFINE(TAILQ_HEAD(_sptree, secpolicy), sptree[IPSEC_DIR_MAX]); +static struct rmlock sptree_lock; #define V_sptree VNET(sptree) -static struct mtx sptree_lock; -#define SPTREE_LOCK_INIT() \ - mtx_init(&sptree_lock, "sptree", \ - "fast ipsec security policy database", MTX_DEF) -#define SPTREE_LOCK_DESTROY() mtx_destroy(&sptree_lock) -#define SPTREE_LOCK() mtx_lock(&sptree_lock) -#define SPTREE_UNLOCK() mtx_unlock(&sptree_lock) -#define SPTREE_LOCK_ASSERT() mtx_assert(&sptree_lock, MA_OWNED) +#define SPTREE_LOCK_INIT() rm_init(&sptree_lock, "sptree") +#define SPTREE_LOCK_DESTROY() rm_destroy(&sptree_lock) +#define SPTREE_RLOCK_TRACKER struct rm_priotracker sptree_tracker +#define SPTREE_RLOCK() rm_rlock(&sptree_lock, &sptree_tracker) +#define SPTREE_RUNLOCK() rm_runlock(&sptree_lock, &sptree_tracker) +#define SPTREE_RLOCK_ASSERT() rm_assert(&sptree_lock, RA_RLOCKED) +#define SPTREE_WLOCK() rm_wlock(&sptree_lock) +#define SPTREE_WUNLOCK() rm_wunlock(&sptree_lock) +#define SPTREE_WLOCK_ASSERT() rm_assert(&sptree_lock, RA_WLOCKED) +#define SPTREE_UNLOCK_ASSERT() rm_assert(&sptree_lock, RA_UNLOCKED) static VNET_DEFINE(LIST_HEAD(_sahtree, secashead), sahtree); /* SAD */ #define V_sahtree VNET(sahtree) @@ -411,9 +415,8 @@ struct sadb_msghdr { static struct secasvar *key_allocsa_policy(const struct secasindex *); static void key_freesp_so(struct secpolicy **); static struct secasvar *key_do_allocsa_policy(struct secashead *, u_int); -static void key_delsp(struct secpolicy *); +static void key_unlink(struct secpolicy *); static struct secpolicy *key_getsp(struct secpolicyindex *); -static void _key_delsp(struct secpolicy *sp); static struct secpolicy *key_getspbyid(u_int32_t); static u_int32_t key_newreqid(void); static struct mbuf *key_gather_mbuf(struct mbuf *, @@ -570,15 +573,8 @@ sa_delref(struct secasvar *sav) return (refcount_release(&sav->refcnt)); } -#define SP_ADDREF(p) do { \ - (p)->refcnt++; \ - IPSEC_ASSERT((p)->refcnt != 0, ("SP refcnt overflow")); \ -} while (0) -#define SP_DELREF(p) do { \ - IPSEC_ASSERT((p)->refcnt > 0, ("SP refcnt underflow")); \ - (p)->refcnt--; \ -} while (0) - +#define SP_ADDREF(p) refcount_acquire(&(p)->refcnt) +#define SP_DELREF(p) refcount_release(&(p)->refcnt) /* * Update the refcnt while holding the SPTREE lock. @@ -586,9 +582,8 @@ sa_delref(struct secasvar *sav) void key_addref(struct secpolicy *sp) { - SPTREE_LOCK(); + SP_ADDREF(sp); - SPTREE_UNLOCK(); } /* @@ -601,7 +596,7 @@ key_havesp(u_int dir) { return (dir == IPSEC_DIR_INBOUND || dir == IPSEC_DIR_OUTBOUND ? - LIST_FIRST(&V_sptree[dir]) != NULL : 1); + TAILQ_FIRST(&V_sptree[dir]) != NULL : 1); } /* %%% IPsec policy management */ @@ -615,6 +610,7 @@ struct secpolicy * key_allocsp(struct secpolicyindex *spidx, u_int dir, const char* where, int tag) { + SPTREE_RLOCK_TRACKER; struct secpolicy *sp; IPSEC_ASSERT(spidx != NULL, ("null spidx")); @@ -629,14 +625,11 @@ key_allocsp(struct secpolicyindex *spidx, u_int dir, const char* where, printf("*** objects\n"); kdebug_secpolicyindex(spidx)); - SPTREE_LOCK(); - LIST_FOREACH(sp, &V_sptree[dir], chain) { + SPTREE_RLOCK(); + TAILQ_FOREACH(sp, &V_sptree[dir], chain) { KEYDEBUG(KEYDEBUG_IPSEC_DATA, printf("*** in SPD\n"); kdebug_secpolicyindex(&sp->spidx)); - - if (sp->state == IPSEC_SPSTATE_DEAD) - continue; if (key_cmpspidx_withmask(&sp->spidx, spidx)) goto found; } @@ -650,7 +643,7 @@ found: sp->lastused = time_second; SP_ADDREF(sp); } - SPTREE_UNLOCK(); + SPTREE_RUNLOCK(); KEYDEBUG(KEYDEBUG_IPSEC_STAMP, printf("DP %s return SP:%p (ID=%u) refcnt %u\n", __func__, @@ -668,6 +661,7 @@ struct secpolicy * key_allocsp2(u_int32_t spi, union sockaddr_union *dst, u_int8_t proto, u_int dir, const char* where, int tag) { + SPTREE_RLOCK_TRACKER; struct secpolicy *sp; IPSEC_ASSERT(dst != NULL, ("null dst")); @@ -683,14 +677,11 @@ key_allocsp2(u_int32_t spi, union sockaddr_union *dst, u_int8_t proto, printf("spi %u proto %u dir %u\n", spi, proto, dir); kdebug_sockaddr(&dst->sa)); - SPTREE_LOCK(); - LIST_FOREACH(sp, &V_sptree[dir], chain) { + SPTREE_RLOCK(); + TAILQ_FOREACH(sp, &V_sptree[dir], chain) { KEYDEBUG(KEYDEBUG_IPSEC_DATA, printf("*** in SPD\n"); kdebug_secpolicyindex(&sp->spidx)); - - if (sp->state == IPSEC_SPSTATE_DEAD) - continue; /* compare simple values, then dst address */ if (sp->spidx.ul_proto != proto) continue; @@ -710,7 +701,7 @@ found: sp->lastused = time_second; SP_ADDREF(sp); } - SPTREE_UNLOCK(); + SPTREE_RUNLOCK(); KEYDEBUG(KEYDEBUG_IPSEC_STAMP, printf("DP %s return SP:%p (ID=%u) refcnt %u\n", __func__, @@ -1169,22 +1160,47 @@ done: void _key_freesp(struct secpolicy **spp, const char* where, int tag) { + struct ipsecrequest *isr, *nextisr; struct secpolicy *sp = *spp; IPSEC_ASSERT(sp != NULL, ("null sp")); - - SPTREE_LOCK(); - SP_DELREF(sp); - KEYDEBUG(KEYDEBUG_IPSEC_STAMP, printf("DP %s SP:%p (ID=%u) from %s:%u; refcnt now %u\n", __func__, sp, sp->id, where, tag, sp->refcnt)); - if (sp->refcnt == 0) { - *spp = NULL; - key_delsp(sp); + if (SP_DELREF(sp) == 0) + return; + *spp = NULL; + for (isr = sp->req; isr != NULL; isr = nextisr) { + if (isr->sav != NULL) { + KEY_FREESAV(&isr->sav); + isr->sav = NULL; + } + nextisr = isr->next; + ipsec_delisr(isr); } - SPTREE_UNLOCK(); + free(sp, M_IPSEC_SP); +} + +static void +key_unlink(struct secpolicy *sp) +{ + + IPSEC_ASSERT(sp != NULL, ("null sp")); + IPSEC_ASSERT(sp->spidx.dir == IPSEC_DIR_INBOUND || + sp->spidx.dir == IPSEC_DIR_OUTBOUND, + ("invalid direction %u", sp->spidx.dir)); + SPTREE_UNLOCK_ASSERT(); + + SPTREE_WLOCK(); + if (sp->state == IPSEC_SPSTATE_DEAD) { + SPTREE_WUNLOCK(); + return; + } + sp->state = IPSEC_SPSTATE_DEAD; + TAILQ_REMOVE(&V_sptree[sp->spidx.dir], sp, chain); + SPTREE_WUNLOCK(); + KEY_FREESP(&sp); } /* @@ -1273,38 +1289,6 @@ key_freesav(struct secasvar **psav, const char* where, int tag) /* %%% SPD management */ /* - * free security policy entry. - */ -static void -key_delsp(struct secpolicy *sp) -{ - struct ipsecrequest *isr, *nextisr; - - IPSEC_ASSERT(sp != NULL, ("null sp")); - SPTREE_LOCK_ASSERT(); - - sp->state = IPSEC_SPSTATE_DEAD; - - IPSEC_ASSERT(sp->refcnt == 0, - ("SP with references deleted (refcnt %u)", sp->refcnt)); - - /* remove from SP index */ - if (__LIST_CHAINED(sp)) - LIST_REMOVE(sp, chain); - - for (isr = sp->req; isr != NULL; isr = nextisr) { - if (isr->sav != NULL) { - KEY_FREESAV(&isr->sav); - isr->sav = NULL; - } - - nextisr = isr->next; - ipsec_delisr(isr); - } - _key_delsp(sp); -} - -/* * search SPD * OUT: NULL : not found * others : found, pointer to a SP. @@ -1312,20 +1296,19 @@ key_delsp(struct secpolicy *sp) static struct secpolicy * key_getsp(struct secpolicyindex *spidx) { + SPTREE_RLOCK_TRACKER; struct secpolicy *sp; IPSEC_ASSERT(spidx != NULL, ("null spidx")); - SPTREE_LOCK(); - LIST_FOREACH(sp, &V_sptree[spidx->dir], chain) { - if (sp->state == IPSEC_SPSTATE_DEAD) - continue; + SPTREE_RLOCK(); + TAILQ_FOREACH(sp, &V_sptree[spidx->dir], chain) { if (key_cmpspidx_exactly(spidx, &sp->spidx)) { SP_ADDREF(sp); break; } } - SPTREE_UNLOCK(); + SPTREE_RUNLOCK(); return sp; } @@ -1338,28 +1321,25 @@ key_getsp(struct secpolicyindex *spidx) static struct secpolicy * key_getspbyid(u_int32_t id) { + SPTREE_RLOCK_TRACKER; struct secpolicy *sp; - SPTREE_LOCK(); - LIST_FOREACH(sp, &V_sptree[IPSEC_DIR_INBOUND], chain) { - if (sp->state == IPSEC_SPSTATE_DEAD) - continue; + SPTREE_RLOCK(); + TAILQ_FOREACH(sp, &V_sptree[IPSEC_DIR_INBOUND], chain) { if (sp->id == id) { SP_ADDREF(sp); goto done; } } - LIST_FOREACH(sp, &V_sptree[IPSEC_DIR_OUTBOUND], chain) { - if (sp->state == IPSEC_SPSTATE_DEAD) - continue; + TAILQ_FOREACH(sp, &V_sptree[IPSEC_DIR_OUTBOUND], chain) { if (sp->id == id) { SP_ADDREF(sp); goto done; } } done: - SPTREE_UNLOCK(); + SPTREE_RUNLOCK(); return sp; } @@ -1371,11 +1351,8 @@ key_newsp(const char* where, int tag) newsp = (struct secpolicy *) malloc(sizeof(struct secpolicy), M_IPSEC_SP, M_NOWAIT|M_ZERO); - if (newsp) { - SECPOLICY_LOCK_INIT(newsp); - newsp->refcnt = 1; - newsp->req = NULL; - } + if (newsp) + refcount_init(&newsp->refcnt, 1); KEYDEBUG(KEYDEBUG_IPSEC_STAMP, printf("DP %s from %s:%u return SP:%p\n", __func__, @@ -1383,13 +1360,6 @@ key_newsp(const char* where, int tag) return newsp; } -static void -_key_delsp(struct secpolicy *sp) -{ - SECPOLICY_LOCK_DESTROY(sp); - free(sp, M_IPSEC_SP); -} - /* * create secpolicy structure from sadb_x_policy structure. * NOTE: `state', `secpolicyindex' in secpolicy structure are not set, @@ -1869,9 +1839,7 @@ key_spdadd(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) newsp = key_getsp(&spidx); if (mhp->msg->sadb_msg_type == SADB_X_SPDUPDATE) { if (newsp) { - SPTREE_LOCK(); - newsp->state = IPSEC_SPSTATE_DEAD; - SPTREE_UNLOCK(); + key_unlink(newsp); KEY_FREESP(&newsp); } } else { @@ -1883,13 +1851,15 @@ key_spdadd(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) } } + /* XXX: there is race between key_getsp and key_msg2sp. */ + /* allocation new SP entry */ if ((newsp = key_msg2sp(xpl0, PFKEY_EXTLEN(xpl0), &error)) == NULL) { return key_senderror(so, m, error); } if ((newsp->id = key_getnewspid()) == 0) { - _key_delsp(newsp); + KEY_FREESP(&newsp); return key_senderror(so, m, ENOBUFS); } @@ -1905,18 +1875,20 @@ key_spdadd(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) /* sanity check on addr pair */ if (((struct sockaddr *)(src0 + 1))->sa_family != ((struct sockaddr *)(dst0+ 1))->sa_family) { - _key_delsp(newsp); + KEY_FREESP(&newsp); return key_senderror(so, m, EINVAL); } if (((struct sockaddr *)(src0 + 1))->sa_len != ((struct sockaddr *)(dst0+ 1))->sa_len) { - _key_delsp(newsp); + KEY_FREESP(&newsp); return key_senderror(so, m, EINVAL); } #if 1 - if (newsp->req && newsp->req->saidx.src.sa.sa_family && newsp->req->saidx.dst.sa.sa_family) { - if (newsp->req->saidx.src.sa.sa_family != newsp->req->saidx.dst.sa.sa_family) { - _key_delsp(newsp); + if (newsp->req && newsp->req->saidx.src.sa.sa_family && + newsp->req->saidx.dst.sa.sa_family) { + if (newsp->req->saidx.src.sa.sa_family != + newsp->req->saidx.dst.sa.sa_family) { + KEY_FREESP(&newsp); return key_senderror(so, m, EINVAL); } } @@ -1927,9 +1899,10 @@ key_spdadd(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) newsp->lifetime = lft ? lft->sadb_lifetime_addtime : 0; newsp->validtime = lft ? lft->sadb_lifetime_usetime : 0; - newsp->refcnt = 1; /* do not reclaim until I say I do */ + SPTREE_WLOCK(); + TAILQ_INSERT_TAIL(&V_sptree[newsp->spidx.dir], newsp, chain); newsp->state = IPSEC_SPSTATE_ALIVE; - LIST_INSERT_TAIL(&V_sptree[newsp->spidx.dir], newsp, secpolicy, chain); + SPTREE_WUNLOCK(); /* delete the entry in spacqtree */ if (mhp->msg->sadb_msg_type == SADB_X_SPDUPDATE) { @@ -2104,9 +2077,7 @@ key_spddelete(struct socket *so, struct mbuf *m, /* save policy id to buffer to be returned. */ xpl0->sadb_x_policy_id = sp->id; - SPTREE_LOCK(); - sp->state = IPSEC_SPSTATE_DEAD; - SPTREE_UNLOCK(); + key_unlink(sp); KEY_FREESP(&sp); { @@ -2171,9 +2142,7 @@ key_spddelete2(struct socket *so, struct mbuf *m, return key_senderror(so, m, EINVAL); } - SPTREE_LOCK(); - sp->state = IPSEC_SPSTATE_DEAD; - SPTREE_UNLOCK(); + key_unlink(sp); KEY_FREESP(&sp); { @@ -2352,8 +2321,9 @@ key_spdacquire(struct secpolicy *sp) static int key_spdflush(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) { + TAILQ_HEAD(, secpolicy) drainq; struct sadb_msg *newmsg; - struct secpolicy *sp; + struct secpolicy *sp, *nextsp; u_int dir; IPSEC_ASSERT(so != NULL, ("null socket")); @@ -2364,11 +2334,23 @@ key_spdflush(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) if (m->m_len != PFKEY_ALIGN8(sizeof(struct sadb_msg))) return key_senderror(so, m, EINVAL); + TAILQ_INIT(&drainq); + SPTREE_WLOCK(); for (dir = 0; dir < IPSEC_DIR_MAX; dir++) { - SPTREE_LOCK(); - LIST_FOREACH(sp, &V_sptree[dir], chain) - sp->state = IPSEC_SPSTATE_DEAD; - SPTREE_UNLOCK(); + TAILQ_CONCAT(&drainq, &V_sptree[dir], chain); + } + /* + * We need to set state to DEAD for each policy to be sure, + * that another thread won't try to unlink it. + */ + TAILQ_FOREACH(sp, &drainq, chain) + sp->state = IPSEC_SPSTATE_DEAD; + SPTREE_WUNLOCK(); + sp = TAILQ_FIRST(&drainq); + while (sp != NULL) { + nextsp = TAILQ_NEXT(sp, chain); + KEY_FREESP(&sp); + sp = nextsp; } if (sizeof(struct sadb_msg) > m->m_len + M_TRAILINGSPACE(m)) { @@ -2401,6 +2383,7 @@ key_spdflush(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) static int key_spddump(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) { + SPTREE_RLOCK_TRACKER; struct secpolicy *sp; int cnt; u_int dir; @@ -2413,20 +2396,20 @@ key_spddump(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) /* search SPD entry and get buffer size. */ cnt = 0; - SPTREE_LOCK(); + SPTREE_RLOCK(); for (dir = 0; dir < IPSEC_DIR_MAX; dir++) { - LIST_FOREACH(sp, &V_sptree[dir], chain) { + TAILQ_FOREACH(sp, &V_sptree[dir], chain) { cnt++; } } if (cnt == 0) { - SPTREE_UNLOCK(); + SPTREE_RUNLOCK(); return key_senderror(so, m, ENOENT); } for (dir = 0; dir < IPSEC_DIR_MAX; dir++) { - LIST_FOREACH(sp, &V_sptree[dir], chain) { + TAILQ_FOREACH(sp, &V_sptree[dir], chain) { --cnt; n = key_setdumpsp(sp, SADB_X_SPDDUMP, cnt, mhp->msg->sadb_msg_pid); @@ -2436,7 +2419,7 @@ key_spddump(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) } } - SPTREE_UNLOCK(); + SPTREE_RUNLOCK(); m_freem(m); return 0; } @@ -2848,7 +2831,6 @@ key_cleansav(struct secasvar *sav) sav->tdb_xform->xf_zeroize(sav); sav->tdb_xform = NULL; } else { - KASSERT(sav->iv == NULL, ("iv but no xform")); if (sav->key_auth != NULL) bzero(sav->key_auth->key_data, _KEYLEN(sav->key_auth)); if (sav->key_enc != NULL) @@ -3026,7 +3008,6 @@ key_setsaval(struct secasvar *sav, struct mbuf *m, sav->key_enc = NULL; sav->sched = NULL; sav->schedlen = 0; - sav->iv = NULL; sav->lft_c = NULL; sav->lft_h = NULL; sav->lft_s = NULL; @@ -3871,48 +3852,19 @@ key_ismyaddr(struct sockaddr *sa) * compare my own address for IPv6. * 1: ours * 0: other - * NOTE: derived ip6_input() in KAME. This is necessary to modify more. */ -#include <netinet6/in6_var.h> - static int key_ismyaddr6(struct sockaddr_in6 *sin6) { - struct in6_ifaddr *ia; -#if 0 - struct in6_multi *in6m; -#endif + struct in6_addr in6; - IN6_IFADDR_RLOCK(); - TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) { - if (key_sockaddrcmp((struct sockaddr *)sin6, - (struct sockaddr *)&ia->ia_addr, 0) == 0) { - IN6_IFADDR_RUNLOCK(); - return 1; - } + if (!IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) + return (in6_localip(&sin6->sin6_addr)); -#if 0 - /* - * XXX Multicast - * XXX why do we care about multlicast here while we don't care - * about IPv4 multicast?? - * XXX scope - */ - in6m = NULL; - IN6_LOOKUP_MULTI(sin6->sin6_addr, ia->ia_ifp, in6m); - if (in6m) { - IN6_IFADDR_RUNLOCK(); - return 1; - } -#endif - } - IN6_IFADDR_RUNLOCK(); - - /* loopback, just for safety */ - if (IN6_IS_ADDR_LOOPBACK(&sin6->sin6_addr)) - return 1; - - return 0; + /* Convert address into kernel-internal form */ + in6 = sin6->sin6_addr; + in6.s6_addr16[1] = htons(sin6->sin6_scope_id & 0xffff); + return (in6_localip(&in6)); } #endif /*INET6*/ @@ -4222,47 +4174,30 @@ key_bbcmp(const void *a1, const void *a2, u_int bits) static void key_flush_spd(time_t now) { - static u_int16_t sptree_scangen = 0; - u_int16_t gen = sptree_scangen++; + SPTREE_RLOCK_TRACKER; struct secpolicy *sp; u_int dir; /* SPD */ for (dir = 0; dir < IPSEC_DIR_MAX; dir++) { restart: - SPTREE_LOCK(); - LIST_FOREACH(sp, &V_sptree[dir], chain) { - if (sp->scangen == gen) /* previously handled */ - continue; - sp->scangen = gen; - if (sp->state == IPSEC_SPSTATE_DEAD && - sp->refcnt == 1) { - /* - * Ensure that we only decrease refcnt once, - * when we're the last consumer. - * Directly call SP_DELREF/key_delsp instead - * of KEY_FREESP to avoid unlocking/relocking - * SPTREE_LOCK before key_delsp: may refcnt - * be increased again during that time ? - * NB: also clean entries created by - * key_spdflush - */ - SP_DELREF(sp); - key_delsp(sp); - SPTREE_UNLOCK(); - goto restart; - } + SPTREE_RLOCK(); + TAILQ_FOREACH(sp, &V_sptree[dir], chain) { if (sp->lifetime == 0 && sp->validtime == 0) continue; - if ((sp->lifetime && now - sp->created > sp->lifetime) - || (sp->validtime && now - sp->lastused > sp->validtime)) { - sp->state = IPSEC_SPSTATE_DEAD; - SPTREE_UNLOCK(); + if ((sp->lifetime && + now - sp->created > sp->lifetime) || + (sp->validtime && + now - sp->lastused > sp->validtime)) { + SP_ADDREF(sp); + SPTREE_RUNLOCK(); key_spdexpire(sp); + key_unlink(sp); + KEY_FREESP(&sp); goto restart; } } - SPTREE_UNLOCK(); + SPTREE_RUNLOCK(); } } @@ -7655,7 +7590,7 @@ key_init(void) int i; for (i = 0; i < IPSEC_DIR_MAX; i++) - LIST_INIT(&V_sptree[i]); + TAILQ_INIT(&V_sptree[i]); LIST_INIT(&V_sahtree); @@ -7665,10 +7600,6 @@ key_init(void) LIST_INIT(&V_acqtree); LIST_INIT(&V_spacqtree); - /* system default */ - V_ip4_def_policy.policy = IPSEC_POLICY_NONE; - V_ip4_def_policy.refcnt++; /*never reclaim this*/ - if (!IS_DEFAULT_VNET(curvnet)) return; @@ -7692,6 +7623,7 @@ key_init(void) void key_destroy(void) { + TAILQ_HEAD(, secpolicy) drainq; struct secpolicy *sp, *nextsp; struct secacq *acq, *nextacq; struct secspacq *spacq, *nextspacq; @@ -7699,18 +7631,18 @@ key_destroy(void) struct secreg *reg; int i; - SPTREE_LOCK(); + TAILQ_INIT(&drainq); + SPTREE_WLOCK(); for (i = 0; i < IPSEC_DIR_MAX; i++) { - for (sp = LIST_FIRST(&V_sptree[i]); - sp != NULL; sp = nextsp) { - nextsp = LIST_NEXT(sp, chain); - if (__LIST_CHAINED(sp)) { - LIST_REMOVE(sp, chain); - free(sp, M_IPSEC_SP); - } - } + TAILQ_CONCAT(&drainq, &V_sptree[i], chain); + } + SPTREE_WUNLOCK(); + sp = TAILQ_FIRST(&drainq); + while (sp != NULL) { + nextsp = TAILQ_NEXT(sp, chain); + KEY_FREESP(&sp); + sp = nextsp; } - SPTREE_UNLOCK(); SAHTREE_LOCK(); for (sah = LIST_FIRST(&V_sahtree); sah != NULL; sah = nextsah) { @@ -7831,14 +7763,6 @@ key_sa_chgstate(struct secasvar *sav, u_int8_t state) } } -void -key_sa_stir_iv(struct secasvar *sav) -{ - - IPSEC_ASSERT(sav->iv != NULL, ("null IV")); - key_randomfill(sav->iv, sav->ivlen); -} - /* * Take one of the kernel's security keys and convert it into a PF_KEY * structure within an mbuf, suitable for sending up to a waiting diff --git a/sys/netipsec/key.h b/sys/netipsec/key.h index f197977..39fd1b2 100644 --- a/sys/netipsec/key.h +++ b/sys/netipsec/key.h @@ -106,7 +106,6 @@ extern void key_init(void); extern void key_destroy(void); #endif extern void key_sa_recordxfer(struct secasvar *, struct mbuf *); -extern void key_sa_stir_iv(struct secasvar *); #ifdef IPSEC_NAT_T u_int16_t key_portfromsaddr(struct sockaddr *); #define KEY_PORTFROMSADDR(saddr) \ diff --git a/sys/netipsec/key_debug.c b/sys/netipsec/key_debug.c index 2031af6..c9365e2 100644 --- a/sys/netipsec/key_debug.c +++ b/sys/netipsec/key_debug.c @@ -463,8 +463,8 @@ kdebug_secpolicy(struct secpolicy *sp) if (sp == NULL) panic("%s: NULL pointer was passed.\n", __func__); - printf("secpolicy{ refcnt=%u state=%u policy=%u\n", - sp->refcnt, sp->state, sp->policy); + printf("secpolicy{ refcnt=%u policy=%u\n", + sp->refcnt, sp->policy); kdebug_secpolicyindex(&sp->spidx); @@ -577,11 +577,6 @@ kdebug_secasv(struct secasvar *sav) kdebug_sadb_key((struct sadb_ext *)sav->key_auth); if (sav->key_enc != NULL) kdebug_sadb_key((struct sadb_ext *)sav->key_enc); - if (sav->iv != NULL) { - printf(" iv="); - ipsec_hexdump(sav->iv, sav->ivlen ? sav->ivlen : 8); - printf("\n"); - } if (sav->replay != NULL) kdebug_secreplay(sav->replay); diff --git a/sys/netipsec/keydb.h b/sys/netipsec/keydb.h index 15dbc9c..3fe28eb 100644 --- a/sys/netipsec/keydb.h +++ b/sys/netipsec/keydb.h @@ -122,10 +122,10 @@ struct secasvar { struct seckey *key_auth; /* Key for Authentication */ struct seckey *key_enc; /* Key for Encryption */ - caddr_t iv; /* Initilization Vector */ u_int ivlen; /* length of IV */ void *sched; /* intermediate encryption key */ size_t schedlen; + uint64_t cntr; /* counter for GCM and CTR */ struct secreplay *replay; /* replay prevention */ time_t created; /* for lifetime */ @@ -163,6 +163,12 @@ struct secasvar { #define SECASVAR_UNLOCK(_sav) mtx_unlock(&(_sav)->lock) #define SECASVAR_LOCK_DESTROY(_sav) mtx_destroy(&(_sav)->lock) #define SECASVAR_LOCK_ASSERT(_sav) mtx_assert(&(_sav)->lock, MA_OWNED) +#define SAV_ISGCM(_sav) \ + ((_sav)->alg_enc == SADB_X_EALG_AESGCM8 || \ + (_sav)->alg_enc == SADB_X_EALG_AESGCM12 || \ + (_sav)->alg_enc == SADB_X_EALG_AESGCM16) +#define SAV_ISCTR(_sav) ((_sav)->alg_enc == SADB_X_EALG_AESCTR) +#define SAV_ISCTRORGCM(_sav) (SAV_ISCTR((_sav)) || SAV_ISGCM((_sav))) /* replay prevention */ struct secreplay { diff --git a/sys/netipsec/xform.h b/sys/netipsec/xform.h index e389cab..fee457b 100644 --- a/sys/netipsec/xform.h +++ b/sys/netipsec/xform.h @@ -83,7 +83,7 @@ struct ipescrequest; struct xformsw { u_short xf_type; /* xform ID */ -#define XF_IP4 1 /* IP inside IP */ +#define XF_IP4 1 /* unused */ #define XF_AH 2 /* AH */ #define XF_ESP 3 /* ESP */ #define XF_TCPSIGNATURE 5 /* TCP MD5 Signature option, RFC 2358 */ @@ -105,15 +105,10 @@ struct xformsw { #ifdef _KERNEL extern void xform_register(struct xformsw*); extern int xform_init(struct secasvar *sav, int xftype); +extern int xform_ah_authsize(struct auth_hash *esph); struct cryptoini; -/* XF_IP4 */ -extern int ip4_input6(struct mbuf **m, int *offp, int proto); -extern void ip4_input(struct mbuf *m, int); -extern int ipip_output(struct mbuf *, struct ipsecrequest *, - struct mbuf **, int, int); - /* XF_AH */ extern int ah_init0(struct secasvar *, struct xformsw *, struct cryptoini *); extern int ah_zeroize(struct secasvar *sav); diff --git a/sys/netipsec/xform_ah.c b/sys/netipsec/xform_ah.c index afa452c..6829d59 100644 --- a/sys/netipsec/xform_ah.c +++ b/sys/netipsec/xform_ah.c @@ -80,11 +80,11 @@ (((sav)->flags & SADB_X_EXT_OLD) ? \ sizeof (struct ah) : sizeof (struct ah) + sizeof (u_int32_t)) /* - * Return authenticator size in bytes. The old protocol is known - * to use a fixed 16-byte authenticator. The new algorithm use 12-byte - * authenticator. + * Return authenticator size in bytes, based on a field in the + * algorithm descriptor. */ -#define AUTHSIZE(sav) ah_authsize(sav) +#define AUTHSIZE(sav) ((sav->flags & SADB_X_EXT_OLD) ? 16 : \ + xform_ah_authsize((sav)->tdb_authalgxform)) VNET_DEFINE(int, ah_enable) = 1; /* control flow of packets with AH */ VNET_DEFINE(int, ah_cleartos) = 1; /* clear ip_tos when doing AH calc */ @@ -110,27 +110,35 @@ static unsigned char ipseczeroes[256]; /* larger than an ip6 extension hdr */ static int ah_input_cb(struct cryptop*); static int ah_output_cb(struct cryptop*); -static int -ah_authsize(struct secasvar *sav) +int +xform_ah_authsize(struct auth_hash *esph) { + int alen; - IPSEC_ASSERT(sav != NULL, ("%s: sav == NULL", __func__)); + if (esph == NULL) + return 0; - if (sav->flags & SADB_X_EXT_OLD) - return 16; + switch (esph->type) { + case CRYPTO_SHA2_256_HMAC: + case CRYPTO_SHA2_384_HMAC: + case CRYPTO_SHA2_512_HMAC: + alen = esph->hashsize / 2; /* RFC4868 2.3 */ + break; + + case CRYPTO_AES_128_NIST_GMAC: + case CRYPTO_AES_192_NIST_GMAC: + case CRYPTO_AES_256_NIST_GMAC: + alen = esph->hashsize; + break; - switch (sav->alg_auth) { - case SADB_X_AALG_SHA2_256: - return 16; - case SADB_X_AALG_SHA2_384: - return 24; - case SADB_X_AALG_SHA2_512: - return 32; default: - return AH_HMAC_HASHLEN; + alen = AH_HMAC_HASHLEN; + break; } - /* NOTREACHED */ + + return alen; } + /* * NB: this is public for use by the PF_KEY support. */ @@ -158,6 +166,12 @@ ah_algorithm_lookup(int alg) return &auth_hash_hmac_sha2_384; case SADB_X_AALG_SHA2_512: return &auth_hash_hmac_sha2_512; + case SADB_X_AALG_AES128GMAC: + return &auth_hash_nist_gmac_aes_128; + case SADB_X_AALG_AES192GMAC: + return &auth_hash_nist_gmac_aes_192; + case SADB_X_AALG_AES256GMAC: + return &auth_hash_nist_gmac_aes_256; } return NULL; } @@ -565,12 +579,11 @@ ah_massage_headers(struct mbuf **m0, int proto, int skip, int alg, int out) static int ah_input(struct mbuf *m, struct secasvar *sav, int skip, int protoff) { + char buf[128]; struct auth_hash *ahx; - struct tdb_ident *tdbi; struct tdb_crypto *tc; - struct m_tag *mtag; struct newah *ah; - int hl, rplen, authsize; + int hl, rplen, authsize, error; struct cryptodesc *crda; struct cryptop *crp; @@ -596,7 +609,7 @@ ah_input(struct mbuf *m, struct secasvar *sav, int skip, int protoff) if (sav->replay && !ipsec_chkreplay(ntohl(ah->ah_seq), sav)) { AHSTAT_INC(ahs_replay); DPRINTF(("%s: packet replay failure: %s\n", __func__, - ipsec_logsastr(sav))); + ipsec_logsastr(sav, buf, sizeof(buf)))); m_freem(m); return ENOBUFS; } @@ -607,10 +620,10 @@ ah_input(struct mbuf *m, struct secasvar *sav, int skip, int protoff) authsize = AUTHSIZE(sav); if (hl != authsize + rplen - sizeof (struct ah)) { DPRINTF(("%s: bad authenticator length %u (expecting %lu)" - " for packet in SA %s/%08lx\n", __func__, - hl, (u_long) (authsize + rplen - sizeof (struct ah)), - ipsec_address(&sav->sah->saidx.dst), - (u_long) ntohl(sav->spi))); + " for packet in SA %s/%08lx\n", __func__, hl, + (u_long) (authsize + rplen - sizeof (struct ah)), + ipsec_address(&sav->sah->saidx.dst, buf, sizeof(buf)), + (u_long) ntohl(sav->spi))); AHSTAT_INC(ahs_badauthl); m_freem(m); return EACCES; @@ -638,27 +651,9 @@ ah_input(struct mbuf *m, struct secasvar *sav, int skip, int protoff) crda->crd_klen = _KEYBITS(sav->key_auth); crda->crd_key = sav->key_auth->key_data; - /* Find out if we've already done crypto. */ - for (mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_CRYPTO_DONE, NULL); - mtag != NULL; - mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_CRYPTO_DONE, mtag)) { - tdbi = (struct tdb_ident *) (mtag + 1); - if (tdbi->proto == sav->sah->saidx.proto && - tdbi->spi == sav->spi && - !bcmp(&tdbi->dst, &sav->sah->saidx.dst, - sizeof (union sockaddr_union))) - break; - } - /* Allocate IPsec-specific opaque crypto info. */ - if (mtag == NULL) { - tc = (struct tdb_crypto *) malloc(sizeof (struct tdb_crypto) + - skip + rplen + authsize, M_XDATA, M_NOWAIT|M_ZERO); - } else { - /* Hash verification has already been done successfully. */ - tc = (struct tdb_crypto *) malloc(sizeof (struct tdb_crypto), - M_XDATA, M_NOWAIT|M_ZERO); - } + tc = (struct tdb_crypto *) malloc(sizeof (struct tdb_crypto) + + skip + rplen + authsize, M_XDATA, M_NOWAIT | M_ZERO); if (tc == NULL) { DPRINTF(("%s: failed to allocate tdb_crypto\n", __func__)); AHSTAT_INC(ahs_crypto); @@ -667,29 +662,24 @@ ah_input(struct mbuf *m, struct secasvar *sav, int skip, int protoff) return ENOBUFS; } - /* Only save information if crypto processing is needed. */ - if (mtag == NULL) { - int error; + /* + * Save the authenticator, the skipped portion of the packet, + * and the AH header. + */ + m_copydata(m, 0, skip + rplen + authsize, (caddr_t)(tc+1)); + + /* Zeroize the authenticator on the packet. */ + m_copyback(m, skip + rplen, authsize, ipseczeroes); - /* - * Save the authenticator, the skipped portion of the packet, - * and the AH header. - */ - m_copydata(m, 0, skip + rplen + authsize, (caddr_t)(tc+1)); - - /* Zeroize the authenticator on the packet. */ - m_copyback(m, skip + rplen, authsize, ipseczeroes); - - /* "Massage" the packet headers for crypto processing. */ - error = ah_massage_headers(&m, sav->sah->saidx.dst.sa.sa_family, - skip, ahx->type, 0); - if (error != 0) { - /* NB: mbuf is free'd by ah_massage_headers */ - AHSTAT_INC(ahs_hdrops); - free(tc, M_XDATA); - crypto_freereq(crp); - return error; - } + /* "Massage" the packet headers for crypto processing. */ + error = ah_massage_headers(&m, sav->sah->saidx.dst.sa.sa_family, + skip, ahx->type, 0); + if (error != 0) { + /* NB: mbuf is free'd by ah_massage_headers */ + AHSTAT_INC(ahs_hdrops); + free(tc, M_XDATA); + crypto_freereq(crp); + return (error); } /* Crypto operation descriptor. */ @@ -707,14 +697,9 @@ ah_input(struct mbuf *m, struct secasvar *sav, int skip, int protoff) tc->tc_nxt = ah->ah_nxt; tc->tc_protoff = protoff; tc->tc_skip = skip; - tc->tc_ptr = (caddr_t) mtag; /* Save the mtag we've identified. */ KEY_ADDREFSA(sav); tc->tc_sav = sav; - - if (mtag == NULL) - return crypto_dispatch(crp); - else - return ah_input_cb(crp); + return (crypto_dispatch(crp)); } /* @@ -723,13 +708,13 @@ ah_input(struct mbuf *m, struct secasvar *sav, int skip, int protoff) static int ah_input_cb(struct cryptop *crp) { + char buf[INET6_ADDRSTRLEN]; int rplen, error, skip, protoff; unsigned char calc[AH_ALEN_MAX]; struct mbuf *m; struct cryptodesc *crd; struct auth_hash *ahx; struct tdb_crypto *tc; - struct m_tag *mtag; struct secasvar *sav; struct secasindex *saidx; u_int8_t nxt; @@ -743,7 +728,6 @@ ah_input_cb(struct cryptop *crp) skip = tc->tc_skip; nxt = tc->tc_nxt; protoff = tc->tc_protoff; - mtag = (struct m_tag *) tc->tc_ptr; m = (struct mbuf *) crp->crp_buf; sav = tc->tc_sav; @@ -789,34 +773,22 @@ ah_input_cb(struct cryptop *crp) /* Copy authenticator off the packet. */ m_copydata(m, skip + rplen, authsize, calc); - /* - * If we have an mtag, we don't need to verify the authenticator -- - * it has been verified by an IPsec-aware NIC. - */ - if (mtag == NULL) { - ptr = (caddr_t) (tc + 1); - - /* Verify authenticator. */ - if (bcmp(ptr + skip + rplen, calc, authsize)) { - DPRINTF(("%s: authentication hash mismatch for packet " - "in SA %s/%08lx\n", __func__, - ipsec_address(&saidx->dst), - (u_long) ntohl(sav->spi))); - AHSTAT_INC(ahs_badauth); - error = EACCES; - goto bad; - } - - /* Fix the Next Protocol field. */ - ((u_int8_t *) ptr)[protoff] = nxt; - - /* Copyback the saved (uncooked) network headers. */ - m_copyback(m, 0, skip, ptr); - } else { - /* Fix the Next Protocol field. */ - m_copyback(m, protoff, sizeof(u_int8_t), &nxt); + /* Verify authenticator. */ + ptr = (caddr_t) (tc + 1); + if (timingsafe_bcmp(ptr + skip + rplen, calc, authsize)) { + DPRINTF(("%s: authentication hash mismatch for packet " + "in SA %s/%08lx\n", __func__, + ipsec_address(&saidx->dst, buf, sizeof(buf)), + (u_long) ntohl(sav->spi))); + AHSTAT_INC(ahs_badauth); + error = EACCES; + goto bad; } + /* Fix the Next Protocol field. */ + ((u_int8_t *) ptr)[protoff] = nxt; + /* Copyback the saved (uncooked) network headers. */ + m_copyback(m, 0, skip, ptr); free(tc, M_XDATA), tc = NULL; /* No longer needed */ /* @@ -845,8 +817,8 @@ ah_input_cb(struct cryptop *crp) error = m_striphdr(m, skip, rplen + authsize); if (error) { DPRINTF(("%s: mangled mbuf chain for SA %s/%08lx\n", __func__, - ipsec_address(&saidx->dst), (u_long) ntohl(sav->spi))); - + ipsec_address(&saidx->dst, buf, sizeof(buf)), + (u_long) ntohl(sav->spi))); AHSTAT_INC(ahs_hdrops); goto bad; } @@ -854,12 +826,12 @@ ah_input_cb(struct cryptop *crp) switch (saidx->dst.sa.sa_family) { #ifdef INET6 case AF_INET6: - error = ipsec6_common_input_cb(m, sav, skip, protoff, mtag); + error = ipsec6_common_input_cb(m, sav, skip, protoff); break; #endif #ifdef INET case AF_INET: - error = ipsec4_common_input_cb(m, sav, skip, protoff, mtag); + error = ipsec4_common_input_cb(m, sav, skip, protoff); break; #endif default: @@ -885,13 +857,10 @@ bad: * AH output routine, called by ipsec[46]_process_packet(). */ static int -ah_output( - struct mbuf *m, - struct ipsecrequest *isr, - struct mbuf **mp, - int skip, - int protoff) +ah_output(struct mbuf *m, struct ipsecrequest *isr, struct mbuf **mp, + int skip, int protoff) { + char buf[INET6_ADDRSTRLEN]; struct secasvar *sav; struct auth_hash *ahx; struct cryptodesc *crda; @@ -929,7 +898,7 @@ ah_output( DPRINTF(("%s: unknown/unsupported protocol family %u, " "SA %s/%08lx\n", __func__, sav->sah->saidx.dst.sa.sa_family, - ipsec_address(&sav->sah->saidx.dst), + ipsec_address(&sav->sah->saidx.dst, buf, sizeof(buf)), (u_long) ntohl(sav->spi))); AHSTAT_INC(ahs_nopf); error = EPFNOSUPPORT; @@ -939,7 +908,7 @@ ah_output( if (rplen + authsize + m->m_pkthdr.len > maxpacketsize) { DPRINTF(("%s: packet in SA %s/%08lx got too big " "(len %u, max len %u)\n", __func__, - ipsec_address(&sav->sah->saidx.dst), + ipsec_address(&sav->sah->saidx.dst, buf, sizeof(buf)), (u_long) ntohl(sav->spi), rplen + authsize + m->m_pkthdr.len, maxpacketsize)); AHSTAT_INC(ahs_toobig); @@ -953,7 +922,7 @@ ah_output( m = m_unshare(m, M_NOWAIT); if (m == NULL) { DPRINTF(("%s: cannot clone mbuf chain, SA %s/%08lx\n", __func__, - ipsec_address(&sav->sah->saidx.dst), + ipsec_address(&sav->sah->saidx.dst, buf, sizeof(buf)), (u_long) ntohl(sav->spi))); AHSTAT_INC(ahs_hdrops); error = ENOBUFS; @@ -966,7 +935,7 @@ ah_output( DPRINTF(("%s: failed to inject %u byte AH header for SA " "%s/%08lx\n", __func__, rplen + authsize, - ipsec_address(&sav->sah->saidx.dst), + ipsec_address(&sav->sah->saidx.dst, buf, sizeof(buf)), (u_long) ntohl(sav->spi))); AHSTAT_INC(ahs_hdrops); /*XXX differs from openbsd */ error = ENOBUFS; @@ -993,9 +962,8 @@ ah_output( if (sav->replay->count == ~0 && (sav->flags & SADB_X_EXT_CYCSEQ) == 0) { DPRINTF(("%s: replay counter wrapped for SA %s/%08lx\n", - __func__, - ipsec_address(&sav->sah->saidx.dst), - (u_long) ntohl(sav->spi))); + __func__, ipsec_address(&sav->sah->saidx.dst, buf, + sizeof(buf)), (u_long) ntohl(sav->spi))); AHSTAT_INC(ahs_wrap); error = EINVAL; goto bad; @@ -1135,6 +1103,7 @@ ah_output_cb(struct cryptop *crp) m = (struct mbuf *) crp->crp_buf; isr = tc->tc_isr; + IPSEC_ASSERT(isr->sp != NULL, ("NULL isr->sp")); IPSECREQUEST_LOCK(isr); sav = tc->tc_sav; /* With the isr lock released SA pointer can be updated. */ @@ -1198,16 +1167,18 @@ ah_output_cb(struct cryptop *crp) error = ipsec_process_done(m, isr); KEY_FREESAV(&sav); IPSECREQUEST_UNLOCK(isr); - return error; + KEY_FREESP(&isr->sp); + return (error); bad: if (sav) KEY_FREESAV(&sav); IPSECREQUEST_UNLOCK(isr); + KEY_FREESP(&isr->sp); if (m) m_freem(m); free(tc, M_XDATA); crypto_freereq(crp); - return error; + return (error); } static struct xformsw ah_xformsw = { diff --git a/sys/netipsec/xform_esp.c b/sys/netipsec/xform_esp.c index 90f6d56..f1c1eaf 100644 --- a/sys/netipsec/xform_esp.c +++ b/sys/netipsec/xform_esp.c @@ -46,6 +46,9 @@ #include <sys/kernel.h> #include <sys/random.h> #include <sys/sysctl.h> +#include <sys/lock.h> +#include <sys/mutex.h> +#include <machine/atomic.h> #include <net/if.h> #include <net/vnet.h> @@ -113,12 +116,16 @@ esp_algorithm_lookup(int alg) return &enc_xform_blf; case SADB_X_EALG_CAST128CBC: return &enc_xform_cast5; - case SADB_X_EALG_SKIPJACK: - return &enc_xform_skipjack; case SADB_EALG_NULL: return &enc_xform_null; case SADB_X_EALG_CAMELLIACBC: return &enc_xform_camellia; + case SADB_X_EALG_AESCTR: + return &enc_xform_aes_icm; + case SADB_X_EALG_AESGCM16: + return &enc_xform_aes_nist_gcm; + case SADB_X_EALG_AESGMAC: + return &enc_xform_aes_nist_gmac; } return NULL; } @@ -176,12 +183,14 @@ esp_init(struct secasvar *sav, struct xformsw *xsp) __func__, txform->name)); return EINVAL; } - if ((sav->flags&(SADB_X_EXT_OLD|SADB_X_EXT_IV4B)) == SADB_X_EXT_IV4B) { + if ((sav->flags & (SADB_X_EXT_OLD | SADB_X_EXT_IV4B)) == + SADB_X_EXT_IV4B) { DPRINTF(("%s: 4-byte IV not supported with protocol\n", __func__)); return EINVAL; } - keylen = _KEYLEN(sav->key_enc); + /* subtract off the salt, RFC4106, 8.1 and RFC3686, 5.1 */ + keylen = _KEYLEN(sav->key_enc) - SAV_ISCTRORGCM(sav) * 4; if (txform->minkey > keylen || keylen > txform->maxkey) { DPRINTF(("%s: invalid key length %u, must be in the range " "[%u..%u] for algorithm %s\n", __func__, @@ -196,9 +205,10 @@ esp_init(struct secasvar *sav, struct xformsw *xsp) * the ESP header will be processed incorrectly. The * compromise is to force it to zero here. */ - sav->ivlen = (txform == &enc_xform_null ? 0 : txform->blocksize); - sav->iv = (caddr_t) malloc(sav->ivlen, M_XDATA, M_WAITOK); - key_randomfill(sav->iv, sav->ivlen); /*XXX*/ + if (SAV_ISCTRORGCM(sav)) + sav->ivlen = 8; /* RFC4106 3.1 and RFC3686 3.1 */ + else + sav->ivlen = (txform == &enc_xform_null ? 0 : txform->ivsize); /* * Setup AH-related state. @@ -213,12 +223,42 @@ esp_init(struct secasvar *sav, struct xformsw *xsp) sav->tdb_xform = xsp; sav->tdb_encalgxform = txform; + /* + * Whenever AES-GCM is used for encryption, one + * of the AES authentication algorithms is chosen + * as well, based on the key size. + */ + if (sav->alg_enc == SADB_X_EALG_AESGCM16) { + switch (keylen) { + case AES_128_GMAC_KEY_LEN: + sav->alg_auth = SADB_X_AALG_AES128GMAC; + sav->tdb_authalgxform = &auth_hash_nist_gmac_aes_128; + break; + case AES_192_GMAC_KEY_LEN: + sav->alg_auth = SADB_X_AALG_AES192GMAC; + sav->tdb_authalgxform = &auth_hash_nist_gmac_aes_192; + break; + case AES_256_GMAC_KEY_LEN: + sav->alg_auth = SADB_X_AALG_AES256GMAC; + sav->tdb_authalgxform = &auth_hash_nist_gmac_aes_256; + break; + default: + DPRINTF(("%s: invalid key length %u" + "for algorithm %s\n", __func__, + keylen, txform->name)); + return EINVAL; + } + bzero(&cria, sizeof(cria)); + cria.cri_alg = sav->tdb_authalgxform->type; + cria.cri_key = sav->key_enc->key_data; + cria.cri_klen = _KEYBITS(sav->key_enc) - SAV_ISGCM(sav) * 32; + } + /* Initialize crypto session. */ - bzero(&crie, sizeof (crie)); + bzero(&crie, sizeof(crie)); crie.cri_alg = sav->tdb_encalgxform->type; - crie.cri_klen = _KEYBITS(sav->key_enc); crie.cri_key = sav->key_enc->key_data; - /* XXX Rounds ? */ + crie.cri_klen = _KEYBITS(sav->key_enc) - SAV_ISCTRORGCM(sav) * 32; if (sav->tdb_authalgxform && sav->tdb_encalgxform) { /* init both auth & enc */ @@ -251,10 +291,6 @@ esp_zeroize(struct secasvar *sav) if (sav->key_enc) bzero(sav->key_enc->key_data, _KEYLEN(sav->key_enc)); - if (sav->iv) { - free(sav->iv, M_XDATA); - sav->iv = NULL; - } sav->tdb_encalgxform = NULL; sav->tdb_xform = NULL; return error; @@ -266,14 +302,13 @@ esp_zeroize(struct secasvar *sav) static int esp_input(struct mbuf *m, struct secasvar *sav, int skip, int protoff) { + char buf[128]; struct auth_hash *esph; struct enc_xform *espx; - struct tdb_ident *tdbi; struct tdb_crypto *tc; + uint8_t *ivp; int plen, alen, hlen; - struct m_tag *mtag; struct newesp *esp; - struct cryptodesc *crde; struct cryptop *crp; @@ -288,32 +323,19 @@ esp_input(struct mbuf *m, struct secasvar *sav, int skip, int protoff) m_freem(m); return EINVAL; } - /* XXX don't pullup, just copy header */ IP6_EXTHDR_GET(esp, struct newesp *, m, skip, sizeof (struct newesp)); esph = sav->tdb_authalgxform; espx = sav->tdb_encalgxform; - /* Determine the ESP header length */ + /* Determine the ESP header and auth length */ if (sav->flags & SADB_X_EXT_OLD) hlen = sizeof (struct esp) + sav->ivlen; else hlen = sizeof (struct newesp) + sav->ivlen; - /* Authenticator hash size */ - if (esph != NULL) { - switch (esph->type) { - case CRYPTO_SHA2_256_HMAC: - case CRYPTO_SHA2_384_HMAC: - case CRYPTO_SHA2_512_HMAC: - alen = esph->hashsize/2; - break; - default: - alen = AH_HMAC_HASHLEN; - break; - } - }else - alen = 0; + + alen = xform_ah_authsize(esph); /* * Verify payload length is multiple of encryption algorithm @@ -326,10 +348,9 @@ esp_input(struct mbuf *m, struct secasvar *sav, int skip, int protoff) plen = m->m_pkthdr.len - (skip + hlen + alen); if ((plen & (espx->blocksize - 1)) || (plen <= 0)) { DPRINTF(("%s: payload of %d octets not a multiple of %d octets," - " SA %s/%08lx\n", __func__, - plen, espx->blocksize, - ipsec_address(&sav->sah->saidx.dst), - (u_long) ntohl(sav->spi))); + " SA %s/%08lx\n", __func__, plen, espx->blocksize, + ipsec_address(&sav->sah->saidx.dst, buf, sizeof(buf)), + (u_long)ntohl(sav->spi))); ESPSTAT_INC(esps_badilen); m_freem(m); return EINVAL; @@ -338,9 +359,10 @@ esp_input(struct mbuf *m, struct secasvar *sav, int skip, int protoff) /* * Check sequence number. */ - if (esph && sav->replay && !ipsec_chkreplay(ntohl(esp->esp_seq), sav)) { + if (esph != NULL && sav->replay != NULL && + !ipsec_chkreplay(ntohl(esp->esp_seq), sav)) { DPRINTF(("%s: packet replay check for %s\n", __func__, - ipsec_logsastr(sav))); /*XXX*/ + ipsec_logsastr(sav, buf, sizeof(buf)))); /*XXX*/ ESPSTAT_INC(esps_replay); m_freem(m); return ENOBUFS; /*XXX*/ @@ -349,18 +371,6 @@ esp_input(struct mbuf *m, struct secasvar *sav, int skip, int protoff) /* Update the counters */ ESPSTAT_ADD(esps_ibytes, m->m_pkthdr.len - (skip + hlen + alen)); - /* Find out if we've already done crypto */ - for (mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_CRYPTO_DONE, NULL); - mtag != NULL; - mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_CRYPTO_DONE, mtag)) { - tdbi = (struct tdb_ident *) (mtag + 1); - if (tdbi->proto == sav->sah->saidx.proto && - tdbi->spi == sav->spi && - !bcmp(&tdbi->dst, &sav->sah->saidx.dst, - sizeof(union sockaddr_union))) - break; - } - /* Get crypto descriptors */ crp = crypto_getreq(esph && espx ? 2 : 1); if (crp == NULL) { @@ -372,12 +382,8 @@ esp_input(struct mbuf *m, struct secasvar *sav, int skip, int protoff) } /* Get IPsec-specific opaque pointer */ - if (esph == NULL || mtag != NULL) - tc = (struct tdb_crypto *) malloc(sizeof(struct tdb_crypto), - M_XDATA, M_NOWAIT|M_ZERO); - else - tc = (struct tdb_crypto *) malloc(sizeof(struct tdb_crypto) + alen, - M_XDATA, M_NOWAIT|M_ZERO); + tc = (struct tdb_crypto *) malloc(sizeof(struct tdb_crypto) + alen, + M_XDATA, M_NOWAIT | M_ZERO); if (tc == NULL) { crypto_freereq(crp); DPRINTF(("%s: failed to allocate tdb_crypto\n", __func__)); @@ -386,26 +392,24 @@ esp_input(struct mbuf *m, struct secasvar *sav, int skip, int protoff) return ENOBUFS; } - tc->tc_ptr = (caddr_t) mtag; - - if (esph) { + if (esph != NULL) { struct cryptodesc *crda = crp->crp_desc; IPSEC_ASSERT(crda != NULL, ("null ah crypto descriptor")); /* Authentication descriptor */ crda->crd_skip = skip; - crda->crd_len = m->m_pkthdr.len - (skip + alen); + if (SAV_ISGCM(sav)) + crda->crd_len = 8; /* RFC4106 5, SPI + SN */ + else + crda->crd_len = m->m_pkthdr.len - (skip + alen); crda->crd_inject = m->m_pkthdr.len - alen; crda->crd_alg = esph->type; - crda->crd_key = sav->key_auth->key_data; - crda->crd_klen = _KEYBITS(sav->key_auth); /* Copy the authenticator */ - if (mtag == NULL) - m_copydata(m, m->m_pkthdr.len - alen, alen, - (caddr_t) (tc + 1)); + m_copydata(m, m->m_pkthdr.len - alen, alen, + (caddr_t) (tc + 1)); /* Chain authentication request */ crde = crda->crd_next; @@ -431,22 +435,33 @@ esp_input(struct mbuf *m, struct secasvar *sav, int skip, int protoff) tc->tc_sav = sav; /* Decryption descriptor */ - if (espx) { - IPSEC_ASSERT(crde != NULL, ("null esp crypto descriptor")); - crde->crd_skip = skip + hlen; - crde->crd_len = m->m_pkthdr.len - (skip + hlen + alen); - crde->crd_inject = skip + hlen - sav->ivlen; - - crde->crd_alg = espx->type; - crde->crd_key = sav->key_enc->key_data; - crde->crd_klen = _KEYBITS(sav->key_enc); - /* XXX Rounds ? */ + IPSEC_ASSERT(crde != NULL, ("null esp crypto descriptor")); + crde->crd_skip = skip + hlen; + crde->crd_len = m->m_pkthdr.len - (skip + hlen + alen); + crde->crd_inject = skip + hlen - sav->ivlen; + + if (SAV_ISCTRORGCM(sav)) { + ivp = &crde->crd_iv[0]; + + /* GCM IV Format: RFC4106 4 */ + /* CTR IV Format: RFC3686 4 */ + /* Salt is last four bytes of key, RFC4106 8.1 */ + /* Nonce is last four bytes of key, RFC3686 5.1 */ + memcpy(ivp, sav->key_enc->key_data + + _KEYLEN(sav->key_enc) - 4, 4); + + if (SAV_ISCTR(sav)) { + /* Initial block counter is 1, RFC3686 4 */ + be32enc(&ivp[sav->ivlen + 4], 1); + } + + m_copydata(m, skip + hlen - sav->ivlen, sav->ivlen, &ivp[4]); + crde->crd_flags |= CRD_F_IV_EXPLICIT; } - if (mtag == NULL) - return crypto_dispatch(crp); - else - return esp_input_cb(crp); + crde->crd_alg = espx->type; + + return (crypto_dispatch(crp)); } /* @@ -455,6 +470,7 @@ esp_input(struct mbuf *m, struct secasvar *sav, int skip, int protoff) static int esp_input_cb(struct cryptop *crp) { + char buf[128]; u_int8_t lastthree[3], aalg[AH_HMAC_MAXHASHLEN]; int hlen, skip, protoff, error, alen; struct mbuf *m; @@ -462,7 +478,6 @@ esp_input_cb(struct cryptop *crp) struct auth_hash *esph; struct enc_xform *espx; struct tdb_crypto *tc; - struct m_tag *mtag; struct secasvar *sav; struct secasindex *saidx; caddr_t ptr; @@ -474,7 +489,6 @@ esp_input_cb(struct cryptop *crp) IPSEC_ASSERT(tc != NULL, ("null opaque crypto data area!")); skip = tc->tc_skip; protoff = tc->tc_protoff; - mtag = (struct m_tag *) tc->tc_ptr; m = (struct mbuf *) crp->crp_buf; sav = tc->tc_sav; @@ -514,40 +528,21 @@ esp_input_cb(struct cryptop *crp) /* If authentication was performed, check now. */ if (esph != NULL) { - switch (esph->type) { - case CRYPTO_SHA2_256_HMAC: - case CRYPTO_SHA2_384_HMAC: - case CRYPTO_SHA2_512_HMAC: - alen = esph->hashsize/2; - break; - default: - alen = AH_HMAC_HASHLEN; - break; - } - /* - * If we have a tag, it means an IPsec-aware NIC did - * the verification for us. Otherwise we need to - * check the authentication calculation. - */ + alen = xform_ah_authsize(esph); AHSTAT_INC(ahs_hist[sav->alg_auth]); - if (mtag == NULL) { - /* Copy the authenticator from the packet */ - m_copydata(m, m->m_pkthdr.len - alen, - alen, aalg); - - ptr = (caddr_t) (tc + 1); - - /* Verify authenticator */ - if (bcmp(ptr, aalg, alen) != 0) { - DPRINTF(("%s: " - "authentication hash mismatch for packet in SA %s/%08lx\n", - __func__, - ipsec_address(&saidx->dst), - (u_long) ntohl(sav->spi))); - ESPSTAT_INC(esps_badauth); - error = EACCES; - goto bad; - } + /* Copy the authenticator from the packet */ + m_copydata(m, m->m_pkthdr.len - alen, alen, aalg); + ptr = (caddr_t) (tc + 1); + + /* Verify authenticator */ + if (timingsafe_bcmp(ptr, aalg, alen) != 0) { + DPRINTF(("%s: authentication hash mismatch for " + "packet in SA %s/%08lx\n", __func__, + ipsec_address(&saidx->dst, buf, sizeof(buf)), + (u_long) ntohl(sav->spi))); + ESPSTAT_INC(esps_badauth); + error = EACCES; + goto bad; } /* Remove trailing authenticator */ @@ -573,7 +568,7 @@ esp_input_cb(struct cryptop *crp) sizeof (seq), (caddr_t) &seq); if (ipsec_updatereplay(ntohl(seq), sav)) { DPRINTF(("%s: packet replay check for %s\n", __func__, - ipsec_logsastr(sav))); + ipsec_logsastr(sav, buf, sizeof(buf)))); ESPSTAT_INC(esps_replay); error = ENOBUFS; goto bad; @@ -591,7 +586,7 @@ esp_input_cb(struct cryptop *crp) if (error) { ESPSTAT_INC(esps_hdrops); DPRINTF(("%s: bad mbuf chain, SA %s/%08lx\n", __func__, - ipsec_address(&sav->sah->saidx.dst), + ipsec_address(&sav->sah->saidx.dst, buf, sizeof(buf)), (u_long) ntohl(sav->spi))); goto bad; } @@ -603,10 +598,10 @@ esp_input_cb(struct cryptop *crp) if (lastthree[1] + 2 > m->m_pkthdr.len - skip) { ESPSTAT_INC(esps_badilen); DPRINTF(("%s: invalid padding length %d for %u byte packet " - "in SA %s/%08lx\n", __func__, - lastthree[1], m->m_pkthdr.len - skip, - ipsec_address(&sav->sah->saidx.dst), - (u_long) ntohl(sav->spi))); + "in SA %s/%08lx\n", __func__, lastthree[1], + m->m_pkthdr.len - skip, + ipsec_address(&sav->sah->saidx.dst, buf, sizeof(buf)), + (u_long) ntohl(sav->spi))); error = EINVAL; goto bad; } @@ -616,9 +611,9 @@ esp_input_cb(struct cryptop *crp) if (lastthree[1] != lastthree[0] && lastthree[1] != 0) { ESPSTAT_INC(esps_badenc); DPRINTF(("%s: decryption failed for packet in " - "SA %s/%08lx\n", __func__, - ipsec_address(&sav->sah->saidx.dst), - (u_long) ntohl(sav->spi))); + "SA %s/%08lx\n", __func__, ipsec_address( + &sav->sah->saidx.dst, buf, sizeof(buf)), + (u_long) ntohl(sav->spi))); error = EINVAL; goto bad; } @@ -633,12 +628,12 @@ esp_input_cb(struct cryptop *crp) switch (saidx->dst.sa.sa_family) { #ifdef INET6 case AF_INET6: - error = ipsec6_common_input_cb(m, sav, skip, protoff, mtag); + error = ipsec6_common_input_cb(m, sav, skip, protoff); break; #endif #ifdef INET case AF_INET: - error = ipsec4_common_input_cb(m, sav, skip, protoff, mtag); + error = ipsec4_common_input_cb(m, sav, skip, protoff); break; #endif default: @@ -664,16 +659,14 @@ bad: * ESP output routine, called by ipsec[46]_process_packet(). */ static int -esp_output( - struct mbuf *m, - struct ipsecrequest *isr, - struct mbuf **mp, - int skip, - int protoff -) +esp_output(struct mbuf *m, struct ipsecrequest *isr, struct mbuf **mp, + int skip, int protoff) { + char buf[INET6_ADDRSTRLEN]; struct enc_xform *espx; struct auth_hash *esph; + uint8_t *ivp; + uint64_t cntr; int hlen, rlen, padding, blks, alen, i, roff; struct mbuf *mo = (struct mbuf *) NULL; struct tdb_crypto *tc; @@ -699,27 +692,14 @@ esp_output( rlen = m->m_pkthdr.len - skip; /* Raw payload length. */ /* - * NB: The null encoding transform has a blocksize of 4 - * so that headers are properly aligned. + * RFC4303 2.4 Requires 4 byte alignment. */ - blks = espx->blocksize; /* IV blocksize */ + blks = MAX(4, espx->blocksize); /* Cipher blocksize */ /* XXX clamp padding length a la KAME??? */ padding = ((blks - ((rlen + 2) % blks)) % blks) + 2; - if (esph) - switch (esph->type) { - case CRYPTO_SHA2_256_HMAC: - case CRYPTO_SHA2_384_HMAC: - case CRYPTO_SHA2_512_HMAC: - alen = esph->hashsize/2; - break; - default: - alen = AH_HMAC_HASHLEN; - break; - } - else - alen = 0; + alen = xform_ah_authsize(esph); ESPSTAT_INC(esps_output); @@ -739,16 +719,19 @@ esp_output( default: DPRINTF(("%s: unknown/unsupported protocol " "family %d, SA %s/%08lx\n", __func__, - saidx->dst.sa.sa_family, ipsec_address(&saidx->dst), - (u_long) ntohl(sav->spi))); + saidx->dst.sa.sa_family, ipsec_address(&saidx->dst, + buf, sizeof(buf)), (u_long) ntohl(sav->spi))); ESPSTAT_INC(esps_nopf); error = EPFNOSUPPORT; goto bad; } + DPRINTF(("%s: skip %d hlen %d rlen %d padding %d alen %d blksd %d\n", + __func__, skip, hlen, rlen, padding, alen, blks)); if (skip + hlen + rlen + padding + alen > maxpacketsize) { DPRINTF(("%s: packet in SA %s/%08lx got too big " "(len %u, max len %u)\n", __func__, - ipsec_address(&saidx->dst), (u_long) ntohl(sav->spi), + ipsec_address(&saidx->dst, buf, sizeof(buf)), + (u_long) ntohl(sav->spi), skip + hlen + rlen + padding + alen, maxpacketsize)); ESPSTAT_INC(esps_toobig); error = EMSGSIZE; @@ -761,7 +744,8 @@ esp_output( m = m_unshare(m, M_NOWAIT); if (m == NULL) { DPRINTF(("%s: cannot clone mbuf chain, SA %s/%08lx\n", __func__, - ipsec_address(&saidx->dst), (u_long) ntohl(sav->spi))); + ipsec_address(&saidx->dst, buf, sizeof(buf)), + (u_long) ntohl(sav->spi))); ESPSTAT_INC(esps_hdrops); error = ENOBUFS; goto bad; @@ -771,8 +755,8 @@ esp_output( mo = m_makespace(m, skip, hlen, &roff); if (mo == NULL) { DPRINTF(("%s: %u byte ESP hdr inject failed for SA %s/%08lx\n", - __func__, hlen, ipsec_address(&saidx->dst), - (u_long) ntohl(sav->spi))); + __func__, hlen, ipsec_address(&saidx->dst, buf, + sizeof(buf)), (u_long) ntohl(sav->spi))); ESPSTAT_INC(esps_hdrops); /* XXX diffs from openbsd */ error = ENOBUFS; goto bad; @@ -801,7 +785,8 @@ esp_output( pad = (u_char *) m_pad(m, padding + alen); if (pad == NULL) { DPRINTF(("%s: m_pad failed for SA %s/%08lx\n", __func__, - ipsec_address(&saidx->dst), (u_long) ntohl(sav->spi))); + ipsec_address(&saidx->dst, buf, sizeof(buf)), + (u_long) ntohl(sav->spi))); m = NULL; /* NB: free'd by m_pad */ error = ENOBUFS; goto bad; @@ -833,7 +818,7 @@ esp_output( m_copyback(m, protoff, sizeof(u_int8_t), (u_char *) &prot); /* Get crypto descriptors. */ - crp = crypto_getreq(esph && espx ? 2 : 1); + crp = crypto_getreq(esph != NULL ? 2 : 1); if (crp == NULL) { DPRINTF(("%s: failed to acquire crypto descriptors\n", __func__)); @@ -842,27 +827,9 @@ esp_output( goto bad; } - if (espx) { - crde = crp->crp_desc; - crda = crde->crd_next; - - /* Encryption descriptor. */ - crde->crd_skip = skip + hlen; - crde->crd_len = m->m_pkthdr.len - (skip + hlen + alen); - crde->crd_flags = CRD_F_ENCRYPT; - crde->crd_inject = skip + hlen - sav->ivlen; - - /* Encryption operation. */ - crde->crd_alg = espx->type; - crde->crd_key = sav->key_enc->key_data; - crde->crd_klen = _KEYBITS(sav->key_enc); - /* XXX Rounds ? */ - } else - crda = crp->crp_desc; - /* IPsec-specific opaque crypto info. */ tc = (struct tdb_crypto *) malloc(sizeof(struct tdb_crypto), - M_XDATA, M_NOWAIT|M_ZERO); + M_XDATA, M_NOWAIT|M_ZERO); if (tc == NULL) { crypto_freereq(crp); DPRINTF(("%s: failed to allocate tdb_crypto\n", __func__)); @@ -871,6 +838,40 @@ esp_output( goto bad; } + crde = crp->crp_desc; + crda = crde->crd_next; + + /* Encryption descriptor. */ + crde->crd_skip = skip + hlen; + crde->crd_len = m->m_pkthdr.len - (skip + hlen + alen); + crde->crd_flags = CRD_F_ENCRYPT; + crde->crd_inject = skip + hlen - sav->ivlen; + + /* Encryption operation. */ + crde->crd_alg = espx->type; + if (SAV_ISCTRORGCM(sav)) { + ivp = &crde->crd_iv[0]; + + /* GCM IV Format: RFC4106 4 */ + /* CTR IV Format: RFC3686 4 */ + /* Salt is last four bytes of key, RFC4106 8.1 */ + /* Nonce is last four bytes of key, RFC3686 5.1 */ + memcpy(ivp, sav->key_enc->key_data + + _KEYLEN(sav->key_enc) - 4, 4); + SECASVAR_LOCK(sav); + cntr = sav->cntr++; + SECASVAR_UNLOCK(sav); + be64enc(&ivp[4], cntr); + + if (SAV_ISCTR(sav)) { + /* Initial block counter is 1, RFC3686 4 */ + be32enc(&ivp[sav->ivlen + 4], 1); + } + + m_copyback(m, skip + hlen - sav->ivlen, sav->ivlen, &ivp[4]); + crde->crd_flags |= CRD_F_IV_EXPLICIT|CRD_F_IV_PRESENT; + } + /* Callback parameters */ tc->tc_isr = isr; KEY_ADDREFSA(sav); @@ -889,14 +890,13 @@ esp_output( if (esph) { /* Authentication descriptor. */ + crda->crd_alg = esph->type; crda->crd_skip = skip; - crda->crd_len = m->m_pkthdr.len - (skip + alen); + if (SAV_ISGCM(sav)) + crda->crd_len = 8; /* RFC4106 5, SPI + SN */ + else + crda->crd_len = m->m_pkthdr.len - (skip + alen); crda->crd_inject = m->m_pkthdr.len - alen; - - /* Authentication operation. */ - crda->crd_alg = esph->type; - crda->crd_key = sav->key_auth->key_data; - crda->crd_klen = _KEYBITS(sav->key_auth); } return crypto_dispatch(crp); @@ -912,6 +912,7 @@ bad: static int esp_output_cb(struct cryptop *crp) { + char buf[INET6_ADDRSTRLEN]; struct tdb_crypto *tc; struct ipsecrequest *isr; struct secasvar *sav; @@ -923,13 +924,15 @@ esp_output_cb(struct cryptop *crp) m = (struct mbuf *) crp->crp_buf; isr = tc->tc_isr; + IPSEC_ASSERT(isr->sp != NULL, ("NULL isr->sp")); IPSECREQUEST_LOCK(isr); sav = tc->tc_sav; - /* With the isr lock released SA pointer can be updated. */ + + /* With the isr lock released, SA pointer may have changed. */ if (sav != isr->sav) { ESPSTAT_INC(esps_notdb); DPRINTF(("%s: SA gone during crypto (SA %s/%08lx proto %u)\n", - __func__, ipsec_address(&tc->tc_dst), + __func__, ipsec_address(&tc->tc_dst, buf, sizeof(buf)), (u_long) ntohl(tc->tc_spi), tc->tc_proto)); error = ENOBUFS; /*XXX*/ goto bad; @@ -981,16 +984,7 @@ esp_output_cb(struct cryptop *crp) if (esph != NULL) { int alen; - switch (esph->type) { - case CRYPTO_SHA2_256_HMAC: - case CRYPTO_SHA2_384_HMAC: - case CRYPTO_SHA2_512_HMAC: - alen = esph->hashsize/2; - break; - default: - alen = AH_HMAC_HASHLEN; - break; - } + alen = xform_ah_authsize(esph); m_copyback(m, m->m_pkthdr.len - alen, alen, ipseczeroes); } @@ -1001,16 +995,18 @@ esp_output_cb(struct cryptop *crp) error = ipsec_process_done(m, isr); KEY_FREESAV(&sav); IPSECREQUEST_UNLOCK(isr); - return error; + KEY_FREESP(&isr->sp); + return (error); bad: if (sav) KEY_FREESAV(&sav); IPSECREQUEST_UNLOCK(isr); + KEY_FREESP(&isr->sp); if (m) m_freem(m); free(tc, M_XDATA); crypto_freereq(crp); - return error; + return (error); } static struct xformsw esp_xformsw = { diff --git a/sys/netipsec/xform_ipcomp.c b/sys/netipsec/xform_ipcomp.c index 6d95250..a5d1e57 100644 --- a/sys/netipsec/xform_ipcomp.c +++ b/sys/netipsec/xform_ipcomp.c @@ -224,10 +224,10 @@ ipcomp_input(struct mbuf *m, struct secasvar *sav, int skip, int protoff) static int ipcomp_input_cb(struct cryptop *crp) { + char buf[INET6_ADDRSTRLEN]; struct cryptodesc *crd; struct tdb_crypto *tc; int skip, protoff; - struct mtag *mtag; struct mbuf *m; struct secasvar *sav; struct secasindex *saidx; @@ -241,7 +241,6 @@ ipcomp_input_cb(struct cryptop *crp) IPSEC_ASSERT(tc != NULL, ("null opaque crypto data area!")); skip = tc->tc_skip; protoff = tc->tc_protoff; - mtag = (struct mtag *) tc->tc_ptr; m = (struct mbuf *) crp->crp_buf; sav = tc->tc_sav; @@ -300,8 +299,8 @@ ipcomp_input_cb(struct cryptop *crp) if (error) { IPCOMPSTAT_INC(ipcomps_hdrops); DPRINTF(("%s: bad mbuf chain, IPCA %s/%08lx\n", __func__, - ipsec_address(&sav->sah->saidx.dst), - (u_long) ntohl(sav->spi))); + ipsec_address(&sav->sah->saidx.dst, buf, sizeof(buf)), + (u_long) ntohl(sav->spi))); goto bad; } @@ -311,12 +310,12 @@ ipcomp_input_cb(struct cryptop *crp) switch (saidx->dst.sa.sa_family) { #ifdef INET6 case AF_INET6: - error = ipsec6_common_input_cb(m, sav, skip, protoff, NULL); + error = ipsec6_common_input_cb(m, sav, skip, protoff); break; #endif #ifdef INET case AF_INET: - error = ipsec4_common_input_cb(m, sav, skip, protoff, NULL); + error = ipsec4_common_input_cb(m, sav, skip, protoff); break; #endif default: @@ -342,14 +341,10 @@ bad: * IPComp output routine, called by ipsec[46]_process_packet() */ static int -ipcomp_output( - struct mbuf *m, - struct ipsecrequest *isr, - struct mbuf **mp, - int skip, - int protoff -) +ipcomp_output(struct mbuf *m, struct ipsecrequest *isr, struct mbuf **mp, + int skip, int protoff) { + char buf[INET6_ADDRSTRLEN]; struct secasvar *sav; struct comp_algo *ipcompx; int error, ralen, maxpacketsize; @@ -393,7 +388,7 @@ ipcomp_output( DPRINTF(("%s: unknown/unsupported protocol family %d, " "IPCA %s/%08lx\n", __func__, sav->sah->saidx.dst.sa.sa_family, - ipsec_address(&sav->sah->saidx.dst), + ipsec_address(&sav->sah->saidx.dst, buf, sizeof(buf)), (u_long) ntohl(sav->spi))); error = EPFNOSUPPORT; goto bad; @@ -402,7 +397,7 @@ ipcomp_output( IPCOMPSTAT_INC(ipcomps_toobig); DPRINTF(("%s: packet in IPCA %s/%08lx got too big " "(len %u, max len %u)\n", __func__, - ipsec_address(&sav->sah->saidx.dst), + ipsec_address(&sav->sah->saidx.dst, buf, sizeof(buf)), (u_long) ntohl(sav->spi), ralen + skip + IPCOMP_HLENGTH, maxpacketsize)); error = EMSGSIZE; @@ -416,8 +411,8 @@ ipcomp_output( if (m == NULL) { IPCOMPSTAT_INC(ipcomps_hdrops); DPRINTF(("%s: cannot clone mbuf chain, IPCA %s/%08lx\n", - __func__, ipsec_address(&sav->sah->saidx.dst), - (u_long) ntohl(sav->spi))); + __func__, ipsec_address(&sav->sah->saidx.dst, buf, + sizeof(buf)), (u_long) ntohl(sav->spi))); error = ENOBUFS; goto bad; } @@ -484,6 +479,7 @@ bad: static int ipcomp_output_cb(struct cryptop *crp) { + char buf[INET6_ADDRSTRLEN]; struct tdb_crypto *tc; struct ipsecrequest *isr; struct secasvar *sav; @@ -496,6 +492,7 @@ ipcomp_output_cb(struct cryptop *crp) skip = tc->tc_skip; isr = tc->tc_isr; + IPSEC_ASSERT(isr->sp != NULL, ("NULL isr->sp")); IPSECREQUEST_LOCK(isr); sav = tc->tc_sav; /* With the isr lock released SA pointer can be updated. */ @@ -541,8 +538,8 @@ ipcomp_output_cb(struct cryptop *crp) if (mo == NULL) { IPCOMPSTAT_INC(ipcomps_wrap); DPRINTF(("%s: IPCOMP header inject failed for IPCA %s/%08lx\n", - __func__, ipsec_address(&sav->sah->saidx.dst), - (u_long) ntohl(sav->spi))); + __func__, ipsec_address(&sav->sah->saidx.dst, buf, + sizeof(buf)), (u_long) ntohl(sav->spi))); error = ENOBUFS; goto bad; } @@ -588,8 +585,8 @@ ipcomp_output_cb(struct cryptop *crp) DPRINTF(("%s: unknown/unsupported protocol " "family %d, IPCA %s/%08lx\n", __func__, sav->sah->saidx.dst.sa.sa_family, - ipsec_address(&sav->sah->saidx.dst), - (u_long) ntohl(sav->spi))); + ipsec_address(&sav->sah->saidx.dst, buf, + sizeof(buf)), (u_long) ntohl(sav->spi))); error = EPFNOSUPPORT; goto bad; } @@ -610,16 +607,18 @@ ipcomp_output_cb(struct cryptop *crp) error = ipsec_process_done(m, isr); KEY_FREESAV(&sav); IPSECREQUEST_UNLOCK(isr); - return error; + KEY_FREESP(&isr->sp); + return (error); bad: if (sav) KEY_FREESAV(&sav); IPSECREQUEST_UNLOCK(isr); + KEY_FREESP(&isr->sp); if (m) m_freem(m); free(tc, M_XDATA); crypto_freereq(crp); - return error; + return (error); } static struct xformsw ipcomp_xformsw = { diff --git a/sys/netipsec/xform_ipip.c b/sys/netipsec/xform_ipip.c deleted file mode 100644 index 9585eef..0000000 --- a/sys/netipsec/xform_ipip.c +++ /dev/null @@ -1,662 +0,0 @@ -/* $FreeBSD$ */ -/* $OpenBSD: ip_ipip.c,v 1.25 2002/06/10 18:04:55 itojun Exp $ */ -/*- - * The authors of this code are John Ioannidis (ji@tla.org), - * Angelos D. Keromytis (kermit@csd.uch.gr) and - * Niels Provos (provos@physnet.uni-hamburg.de). - * - * The original version of this code was written by John Ioannidis - * for BSD/OS in Athens, Greece, in November 1995. - * - * Ported to OpenBSD and NetBSD, with additional transforms, in December 1996, - * by Angelos D. Keromytis. - * - * Additional transforms and features in 1997 and 1998 by Angelos D. Keromytis - * and Niels Provos. - * - * Additional features in 1999 by Angelos D. Keromytis. - * - * Copyright (C) 1995, 1996, 1997, 1998, 1999 by John Ioannidis, - * Angelos D. Keromytis and Niels Provos. - * Copyright (c) 2001, Angelos D. Keromytis. - * - * Permission to use, copy, and modify this software with or without fee - * is hereby granted, provided that this entire notice is included in - * all copies of any software which is or includes a copy or - * modification of this software. - * You may use this code under the GNU public license if you so wish. Please - * contribute changes back to the authors under this freer than GPL license - * so that we may further the use of strong encryption without limitations to - * all. - * - * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR - * IMPLIED WARRANTY. IN PARTICULAR, NONE OF THE AUTHORS MAKES ANY - * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE - * MERCHANTABILITY OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR - * PURPOSE. - */ - -/* - * IP-inside-IP processing - */ -#include "opt_inet.h" -#include "opt_inet6.h" -#include "opt_enc.h" - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/mbuf.h> -#include <sys/socket.h> -#include <sys/kernel.h> -#include <sys/protosw.h> -#include <sys/sysctl.h> - -#include <net/if.h> -#include <net/pfil.h> -#include <net/netisr.h> -#include <net/vnet.h> - -#include <netinet/in.h> -#include <netinet/in_systm.h> -#include <netinet/in_var.h> -#include <netinet/ip.h> -#include <netinet/ip_ecn.h> -#include <netinet/ip_var.h> -#include <netinet/ip_encap.h> - -#include <netipsec/ipsec.h> -#include <netipsec/xform.h> - -#include <netipsec/ipip_var.h> - -#ifdef INET6 -#include <netinet/ip6.h> -#include <netipsec/ipsec6.h> -#include <netinet6/ip6_ecn.h> -#include <netinet6/in6_var.h> -#include <netinet6/ip6protosw.h> -#endif - -#include <netipsec/key.h> -#include <netipsec/key_debug.h> - -#include <machine/stdarg.h> - -/* - * We can control the acceptance of IP4 packets by altering the sysctl - * net.inet.ipip.allow value. Zero means drop them, all else is acceptance. - */ -VNET_DEFINE(int, ipip_allow) = 0; -VNET_PCPUSTAT_DEFINE(struct ipipstat, ipipstat); -VNET_PCPUSTAT_SYSINIT(ipipstat); - -#ifdef VIMAGE -VNET_PCPUSTAT_SYSUNINIT(ipipstat); -#endif /* VIMAGE */ - -SYSCTL_DECL(_net_inet_ipip); -SYSCTL_VNET_INT(_net_inet_ipip, OID_AUTO, - ipip_allow, CTLFLAG_RW, &VNET_NAME(ipip_allow), 0, ""); -SYSCTL_VNET_PCPUSTAT(_net_inet_ipip, IPSECCTL_STATS, stats, - struct ipipstat, ipipstat, - "IPIP statistics (struct ipipstat, netipsec/ipip_var.h)"); - -/* XXX IPCOMP */ -#define M_IPSEC (M_AUTHIPHDR|M_AUTHIPDGM|M_DECRYPTED) - -static void _ipip_input(struct mbuf *m, int iphlen, struct ifnet *gifp); - -#ifdef INET6 -/* - * Really only a wrapper for ipip_input(), for use with IPv6. - */ -int -ip4_input6(struct mbuf **m, int *offp, int proto) -{ -#if 0 - /* If we do not accept IP-in-IP explicitly, drop. */ - if (!V_ipip_allow && ((*m)->m_flags & M_IPSEC) == 0) { - DPRINTF(("%s: dropped due to policy\n", __func__)); - IPIPSTAT_INC(ipips_pdrops); - m_freem(*m); - return IPPROTO_DONE; - } -#endif - _ipip_input(*m, *offp, NULL); - return IPPROTO_DONE; -} -#endif /* INET6 */ - -#ifdef INET -/* - * Really only a wrapper for ipip_input(), for use with IPv4. - */ -void -ip4_input(struct mbuf *m, int off) -{ -#if 0 - /* If we do not accept IP-in-IP explicitly, drop. */ - if (!V_ipip_allow && (m->m_flags & M_IPSEC) == 0) { - DPRINTF(("%s: dropped due to policy\n", __func__)); - IPIPSTAT_INC(ipips_pdrops); - m_freem(m); - return; - } -#endif - _ipip_input(m, off, NULL); -} -#endif /* INET */ - -/* - * ipip_input gets called when we receive an IP{46} encapsulated packet, - * either because we got it at a real interface, or because AH or ESP - * were being used in tunnel mode (in which case the rcvif element will - * contain the address of the encX interface associated with the tunnel. - */ - -static void -_ipip_input(struct mbuf *m, int iphlen, struct ifnet *gifp) -{ - struct ip *ipo; -#ifdef INET6 - struct ip6_hdr *ip6 = NULL; - u_int8_t itos; -#endif - int isr; - u_int8_t otos; - u_int8_t v; - int hlen; - - IPIPSTAT_INC(ipips_ipackets); - - m_copydata(m, 0, 1, &v); - - switch (v >> 4) { -#ifdef INET - case 4: - hlen = sizeof(struct ip); - break; -#endif /* INET */ -#ifdef INET6 - case 6: - hlen = sizeof(struct ip6_hdr); - break; -#endif - default: - IPIPSTAT_INC(ipips_family); - m_freem(m); - return /* EAFNOSUPPORT */; - } - - /* Bring the IP header in the first mbuf, if not there already */ - if (m->m_len < hlen) { - if ((m = m_pullup(m, hlen)) == NULL) { - DPRINTF(("%s: m_pullup (1) failed\n", __func__)); - IPIPSTAT_INC(ipips_hdrops); - return; - } - } - ipo = mtod(m, struct ip *); - - /* Keep outer ecn field. */ - switch (v >> 4) { -#ifdef INET - case 4: - otos = ipo->ip_tos; - break; -#endif /* INET */ -#ifdef INET6 - case 6: - otos = (ntohl(mtod(m, struct ip6_hdr *)->ip6_flow) >> 20) & 0xff; - break; -#endif - default: - panic("ipip_input: unknown ip version %u (outer)", v>>4); - } - - /* Remove outer IP header */ - m_adj(m, iphlen); - - /* Sanity check */ - if (m->m_pkthdr.len < sizeof(struct ip)) { - IPIPSTAT_INC(ipips_hdrops); - m_freem(m); - return; - } - - m_copydata(m, 0, 1, &v); - - switch (v >> 4) { -#ifdef INET - case 4: - hlen = sizeof(struct ip); - break; -#endif /* INET */ - -#ifdef INET6 - case 6: - hlen = sizeof(struct ip6_hdr); - break; -#endif - default: - IPIPSTAT_INC(ipips_family); - m_freem(m); - return; /* EAFNOSUPPORT */ - } - - /* - * Bring the inner IP header in the first mbuf, if not there already. - */ - if (m->m_len < hlen) { - if ((m = m_pullup(m, hlen)) == NULL) { - DPRINTF(("%s: m_pullup (2) failed\n", __func__)); - IPIPSTAT_INC(ipips_hdrops); - return; - } - } - - /* - * RFC 1853 specifies that the inner TTL should not be touched on - * decapsulation. There's no reason this comment should be here, but - * this is as good as any a position. - */ - - /* Some sanity checks in the inner IP header */ - switch (v >> 4) { -#ifdef INET - case 4: - ipo = mtod(m, struct ip *); - ip_ecn_egress(V_ip4_ipsec_ecn, &otos, &ipo->ip_tos); - break; -#endif /* INET */ -#ifdef INET6 - case 6: - ip6 = (struct ip6_hdr *) ipo; - itos = (ntohl(ip6->ip6_flow) >> 20) & 0xff; - ip_ecn_egress(V_ip6_ipsec_ecn, &otos, &itos); - ip6->ip6_flow &= ~htonl(0xff << 20); - ip6->ip6_flow |= htonl((u_int32_t) itos << 20); - break; -#endif - default: - panic("ipip_input: unknown ip version %u (inner)", v>>4); - } - - /* Check for local address spoofing. */ - if ((m->m_pkthdr.rcvif == NULL || - !(m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK)) && - V_ipip_allow != 2) { -#ifdef INET - if ((v >> 4) == IPVERSION && - in_localip(ipo->ip_src) != 0) { - IPIPSTAT_INC(ipips_spoof); - m_freem(m); - return; - } -#endif -#ifdef INET6 - if ((v & IPV6_VERSION_MASK) == IPV6_VERSION && - in6_localip(&ip6->ip6_src) != 0) { - IPIPSTAT_INC(ipips_spoof); - m_freem(m); - return; - } -#endif - } - - /* Statistics */ - IPIPSTAT_ADD(ipips_ibytes, m->m_pkthdr.len - iphlen); - - /* - * Interface pointer stays the same; if no IPsec processing has - * been done (or will be done), this will point to a normal - * interface. Otherwise, it'll point to an enc interface, which - * will allow a packet filter to distinguish between secure and - * untrusted packets. - */ - - switch (v >> 4) { -#ifdef INET - case 4: - isr = NETISR_IP; - break; -#endif -#ifdef INET6 - case 6: - isr = NETISR_IPV6; - break; -#endif - default: - panic("%s: bogus ip version %u", __func__, v>>4); - } - - if (netisr_queue(isr, m)) { /* (0) on success. */ - IPIPSTAT_INC(ipips_qfull); - DPRINTF(("%s: packet dropped because of full queue\n", - __func__)); - } -} - -int -ipip_output( - struct mbuf *m, - struct ipsecrequest *isr, - struct mbuf **mp, - int skip, - int protoff -) -{ - struct secasvar *sav; - u_int8_t tp, otos; - struct secasindex *saidx; - int error; -#if defined(INET) || defined(INET6) - u_int8_t itos; -#endif -#ifdef INET - struct ip *ipo; -#endif /* INET */ -#ifdef INET6 - struct ip6_hdr *ip6, *ip6o; -#endif /* INET6 */ - - sav = isr->sav; - IPSEC_ASSERT(sav != NULL, ("null SA")); - IPSEC_ASSERT(sav->sah != NULL, ("null SAH")); - - /* XXX Deal with empty TDB source/destination addresses. */ - - m_copydata(m, 0, 1, &tp); - tp = (tp >> 4) & 0xff; /* Get the IP version number. */ - - saidx = &sav->sah->saidx; - switch (saidx->dst.sa.sa_family) { -#ifdef INET - case AF_INET: - if (saidx->src.sa.sa_family != AF_INET || - saidx->src.sin.sin_addr.s_addr == INADDR_ANY || - saidx->dst.sin.sin_addr.s_addr == INADDR_ANY) { - DPRINTF(("%s: unspecified tunnel endpoint " - "address in SA %s/%08lx\n", __func__, - ipsec_address(&saidx->dst), - (u_long) ntohl(sav->spi))); - IPIPSTAT_INC(ipips_unspec); - error = EINVAL; - goto bad; - } - - M_PREPEND(m, sizeof(struct ip), M_NOWAIT); - if (m == 0) { - DPRINTF(("%s: M_PREPEND failed\n", __func__)); - IPIPSTAT_INC(ipips_hdrops); - error = ENOBUFS; - goto bad; - } - - ipo = mtod(m, struct ip *); - - ipo->ip_v = IPVERSION; - ipo->ip_hl = 5; - ipo->ip_len = htons(m->m_pkthdr.len); - ipo->ip_ttl = V_ip_defttl; - ipo->ip_sum = 0; - ipo->ip_src = saidx->src.sin.sin_addr; - ipo->ip_dst = saidx->dst.sin.sin_addr; - - ipo->ip_id = ip_newid(); - - /* If the inner protocol is IP... */ - switch (tp) { - case IPVERSION: - /* Save ECN notification */ - m_copydata(m, sizeof(struct ip) + - offsetof(struct ip, ip_tos), - sizeof(u_int8_t), (caddr_t) &itos); - - ipo->ip_p = IPPROTO_IPIP; - - /* - * We should be keeping tunnel soft-state and - * send back ICMPs if needed. - */ - m_copydata(m, sizeof(struct ip) + - offsetof(struct ip, ip_off), - sizeof(u_int16_t), (caddr_t) &ipo->ip_off); - ipo->ip_off = ntohs(ipo->ip_off); - ipo->ip_off &= ~(IP_DF | IP_MF | IP_OFFMASK); - ipo->ip_off = htons(ipo->ip_off); - break; -#ifdef INET6 - case (IPV6_VERSION >> 4): - { - u_int32_t itos32; - - /* Save ECN notification. */ - m_copydata(m, sizeof(struct ip) + - offsetof(struct ip6_hdr, ip6_flow), - sizeof(u_int32_t), (caddr_t) &itos32); - itos = ntohl(itos32) >> 20; - ipo->ip_p = IPPROTO_IPV6; - ipo->ip_off = 0; - break; - } -#endif /* INET6 */ - default: - goto nofamily; - } - - otos = 0; - ip_ecn_ingress(ECN_ALLOWED, &otos, &itos); - ipo->ip_tos = otos; - break; -#endif /* INET */ - -#ifdef INET6 - case AF_INET6: - if (IN6_IS_ADDR_UNSPECIFIED(&saidx->dst.sin6.sin6_addr) || - saidx->src.sa.sa_family != AF_INET6 || - IN6_IS_ADDR_UNSPECIFIED(&saidx->src.sin6.sin6_addr)) { - DPRINTF(("%s: unspecified tunnel endpoint " - "address in SA %s/%08lx\n", __func__, - ipsec_address(&saidx->dst), - (u_long) ntohl(sav->spi))); - IPIPSTAT_INC(ipips_unspec); - error = ENOBUFS; - goto bad; - } - - /* scoped address handling */ - ip6 = mtod(m, struct ip6_hdr *); - if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) - ip6->ip6_src.s6_addr16[1] = 0; - if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst)) - ip6->ip6_dst.s6_addr16[1] = 0; - - M_PREPEND(m, sizeof(struct ip6_hdr), M_NOWAIT); - if (m == 0) { - DPRINTF(("%s: M_PREPEND failed\n", __func__)); - IPIPSTAT_INC(ipips_hdrops); - error = ENOBUFS; - goto bad; - } - - /* Initialize IPv6 header */ - ip6o = mtod(m, struct ip6_hdr *); - ip6o->ip6_flow = 0; - ip6o->ip6_vfc &= ~IPV6_VERSION_MASK; - ip6o->ip6_vfc |= IPV6_VERSION; - ip6o->ip6_hlim = IPV6_DEFHLIM; - ip6o->ip6_dst = saidx->dst.sin6.sin6_addr; - ip6o->ip6_src = saidx->src.sin6.sin6_addr; - ip6o->ip6_plen = htons(m->m_pkthdr.len - sizeof(*ip6)); - - switch (tp) { -#ifdef INET - case IPVERSION: - /* Save ECN notification */ - m_copydata(m, sizeof(struct ip6_hdr) + - offsetof(struct ip, ip_tos), sizeof(u_int8_t), - (caddr_t) &itos); - - /* This is really IPVERSION. */ - ip6o->ip6_nxt = IPPROTO_IPIP; - break; -#endif /* INET */ - case (IPV6_VERSION >> 4): - { - u_int32_t itos32; - - /* Save ECN notification. */ - m_copydata(m, sizeof(struct ip6_hdr) + - offsetof(struct ip6_hdr, ip6_flow), - sizeof(u_int32_t), (caddr_t) &itos32); - itos = ntohl(itos32) >> 20; - - ip6o->ip6_nxt = IPPROTO_IPV6; - break; - } - default: - goto nofamily; - } - - otos = 0; - ip_ecn_ingress(V_ip6_ipsec_ecn, &otos, &itos); - ip6o->ip6_flow |= htonl((u_int32_t) otos << 20); - break; -#endif /* INET6 */ - - default: -nofamily: - DPRINTF(("%s: unsupported protocol family %u\n", __func__, - saidx->dst.sa.sa_family)); - IPIPSTAT_INC(ipips_family); - error = EAFNOSUPPORT; /* XXX diffs from openbsd */ - goto bad; - } - - IPIPSTAT_INC(ipips_opackets); - *mp = m; - -#ifdef INET - if (saidx->dst.sa.sa_family == AF_INET) { -#if 0 - if (sav->tdb_xform->xf_type == XF_IP4) - tdb->tdb_cur_bytes += - m->m_pkthdr.len - sizeof(struct ip); -#endif - IPIPSTAT_ADD(ipips_obytes, - m->m_pkthdr.len - sizeof(struct ip)); - } -#endif /* INET */ - -#ifdef INET6 - if (saidx->dst.sa.sa_family == AF_INET6) { -#if 0 - if (sav->tdb_xform->xf_type == XF_IP4) - tdb->tdb_cur_bytes += - m->m_pkthdr.len - sizeof(struct ip6_hdr); -#endif - IPIPSTAT_ADD(ipips_obytes, - m->m_pkthdr.len - sizeof(struct ip6_hdr)); - } -#endif /* INET6 */ - - return 0; -bad: - if (m) - m_freem(m); - *mp = NULL; - return (error); -} - -#ifdef IPSEC -#if defined(INET) || defined(INET6) -static int -ipe4_init(struct secasvar *sav, struct xformsw *xsp) -{ - sav->tdb_xform = xsp; - return 0; -} - -static int -ipe4_zeroize(struct secasvar *sav) -{ - sav->tdb_xform = NULL; - return 0; -} - -static int -ipe4_input(struct mbuf *m, struct secasvar *sav, int skip, int protoff) -{ - /* This is a rather serious mistake, so no conditional printing. */ - printf("%s: should never be called\n", __func__); - if (m) - m_freem(m); - return EOPNOTSUPP; -} - -static struct xformsw ipe4_xformsw = { - XF_IP4, 0, "IPv4 Simple Encapsulation", - ipe4_init, ipe4_zeroize, ipe4_input, ipip_output, -}; - -extern struct domain inetdomain; -#endif /* INET || INET6 */ -#ifdef INET -static struct protosw ipe4_protosw = { - .pr_type = SOCK_RAW, - .pr_domain = &inetdomain, - .pr_protocol = IPPROTO_IPV4, - .pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR, - .pr_input = ip4_input, - .pr_ctloutput = rip_ctloutput, - .pr_usrreqs = &rip_usrreqs -}; -#endif /* INET */ -#if defined(INET6) && defined(INET) -static struct ip6protosw ipe6_protosw = { - .pr_type = SOCK_RAW, - .pr_domain = &inetdomain, - .pr_protocol = IPPROTO_IPV6, - .pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR, - .pr_input = ip4_input6, - .pr_ctloutput = rip_ctloutput, - .pr_usrreqs = &rip_usrreqs -}; -#endif /* INET6 && INET */ - -#ifdef INET -/* - * Check the encapsulated packet to see if we want it - */ -static int -ipe4_encapcheck(const struct mbuf *m, int off, int proto, void *arg) -{ - /* - * Only take packets coming from IPSEC tunnels; the rest - * must be handled by the gif tunnel code. Note that we - * also return a minimum priority when we want the packet - * so any explicit gif tunnels take precedence. - */ - return ((m->m_flags & M_IPSEC) != 0 ? 1 : 0); -} -#endif /* INET */ - -static void -ipe4_attach(void) -{ - - xform_register(&ipe4_xformsw); - /* attach to encapsulation framework */ - /* XXX save return cookie for detach on module remove */ -#ifdef INET - (void) encap_attach_func(AF_INET, -1, - ipe4_encapcheck, &ipe4_protosw, NULL); -#endif -#if defined(INET6) && defined(INET) - (void) encap_attach_func(AF_INET6, -1, - ipe4_encapcheck, (struct protosw *)&ipe6_protosw, NULL); -#endif -} -SYSINIT(ipe4_xform_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_MIDDLE, ipe4_attach, NULL); -#endif /* IPSEC */ diff --git a/sys/netpfil/ipfw/ip_dn_io.c b/sys/netpfil/ipfw/ip_dn_io.c index 9a4b486..c842ce0 100644 --- a/sys/netpfil/ipfw/ip_dn_io.c +++ b/sys/netpfil/ipfw/ip_dn_io.c @@ -651,6 +651,7 @@ dummynet_send(struct mbuf *m) * to carry reinject info. */ dst = pkt->dn_dir; + pkt->rule.info |= IPFW_IS_DUMMYNET; ifp = pkt->ifp; tag->m_tag_cookie = MTAG_IPFW_RULE; tag->m_tag_id = 0; diff --git a/sys/netpfil/ipfw/ip_dummynet.c b/sys/netpfil/ipfw/ip_dummynet.c index 4de2156..40b8e97 100644 --- a/sys/netpfil/ipfw/ip_dummynet.c +++ b/sys/netpfil/ipfw/ip_dummynet.c @@ -124,7 +124,7 @@ ipdn_bound_var(int *v, int dflt, int lo, int hi, const char *msg) op = "Clamp"; } else return *v; - if (op && msg) + if (op && msg && bootverbose) printf("%s %s to %d (was %d)\n", op, msg, *v, oldv); return *v; } @@ -2290,7 +2290,6 @@ static moduledata_t dummynet_mod = { #define DN_SI_SUB SI_SUB_PROTO_IFATTACHDOMAIN #define DN_MODEV_ORD (SI_ORDER_ANY - 128) /* after ipfw */ DECLARE_MODULE(dummynet, dummynet_mod, DN_SI_SUB, DN_MODEV_ORD); -MODULE_DEPEND(dummynet, ipfw, 2, 2, 2); MODULE_VERSION(dummynet, 3); /* diff --git a/sys/netpfil/ipfw/ip_fw2.c b/sys/netpfil/ipfw/ip_fw2.c index 764696c..68d83f8 100644 --- a/sys/netpfil/ipfw/ip_fw2.c +++ b/sys/netpfil/ipfw/ip_fw2.c @@ -140,8 +140,7 @@ VNET_DEFINE(int, fw_verbose); VNET_DEFINE(u_int64_t, norule_counter); VNET_DEFINE(int, verbose_limit); -/* layer3_chain contains the list of rules for layer 3 */ -VNET_DEFINE(struct ip_fw_chain, layer3_chain); +VNET_DEFINE(struct ip_fw_contextes, ip_fw_contexts); VNET_DEFINE(int, ipfw_nat_ready) = 0; @@ -182,9 +181,6 @@ SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, default_to_accept, CTLFLAG_RDTUN, "Make the default rule accept all packets."); TUNABLE_INT("net.inet.ip.fw.default_to_accept", &default_to_accept); TUNABLE_INT("net.inet.ip.fw.tables_max", (int *)&default_fw_tables); -SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, static_count, - CTLFLAG_RD, &VNET_NAME(layer3_chain.n_rules), 0, - "Number of static rules"); #ifdef INET6 SYSCTL_DECL(_net_inet6_ip6); @@ -358,8 +354,8 @@ iface_match(struct ifnet *ifp, ipfw_insn_if *cmd, struct ip_fw_chain *chain, uin /* Check by name or by IP address */ if (cmd->name[0] != '\0') { /* match by name */ if (cmd->name[0] == '\1') /* use tablearg to match */ - return ipfw_lookup_table_extended(chain, cmd->p.glob, - ifp->if_xname, tablearg, IPFW_TABLE_INTERFACE); + return (ipfw_lookup_table_extended(chain, cmd->p.glob, + ifp->if_xname, tablearg, IPFW_TABLE_INTERFACE, NULL) != NULL); /* Check name */ if (cmd->p.glob) { if (fnmatch(cmd->name, ifp->if_xname, 0) == 0) @@ -904,6 +900,9 @@ ipfw_chk(struct ip_fw_args *args) */ struct ifnet *oif = args->oif; + if (V_ip_fw_contexts.chain[oif->if_ispare[0]] == NULL) + return (IP_FW_PASS); + int f_pos = 0; /* index of current rule in the array */ int retval = 0; @@ -954,7 +953,14 @@ ipfw_chk(struct ip_fw_args *args) */ int dyn_dir = MATCH_UNKNOWN; ipfw_dyn_rule *q = NULL; - struct ip_fw_chain *chain = &V_layer3_chain; + void *tblent = NULL, *tblent2 = NULL; + + /* XXX: WARNING - The chain is accessed unlocked here. + * There is a potential race here with context handling. + * The chain pointer will get destroyed and a NULL + * pointer dereference can happen! + */ + struct ip_fw_chain *chain = V_ip_fw_contexts.chain[oif->if_ispare[0]]; /* * We store in ulp a pointer to the upper layer protocol header. @@ -1288,6 +1294,8 @@ do { \ continue; skip_or = 0; + tblent = NULL; + tblent2 = NULL; for (l = f->cmd_len, cmd = f->cmd ; l > 0 ; l -= cmdlen, cmd += cmdlen) { int match; @@ -1402,7 +1410,7 @@ do { \ break; case O_IN: /* "out" is "not in" */ - match = (oif == NULL); + match = (args->dir == DIR_IN); break; case O_LAYER2: @@ -1438,11 +1446,18 @@ do { \ case O_IP_SRC_LOOKUP: case O_IP_DST_LOOKUP: if (is_ipv4) { + struct ether_addr *ea = NULL; + uint32_t key = (cmd->opcode == O_IP_DST_LOOKUP) ? dst_ip.s_addr : src_ip.s_addr; uint32_t v = 0; + if (args->eh) { + ea = (struct ether_addr*)((cmd->opcode == O_IP_DST_LOOKUP) ? + args->eh->ether_dhost : + args->eh->ether_shost); + } if (cmdlen > F_INSN_SIZE(ipfw_insn_u32)) { /* generic lookup. The key must be * in 32bit big-endian format. @@ -1484,22 +1499,37 @@ do { \ } else break; } - match = ipfw_lookup_table(chain, - cmd->arg1, key, &v); - if (!match) + tblent2 = ipfw_lookup_table(chain, + cmd->arg1, key, &v, ea); + if (tblent2 == NULL) { + match = 0; break; + } else + match = 1; if (cmdlen == F_INSN_SIZE(ipfw_insn_u32)) match = ((ipfw_insn_u32 *)cmd)->d[0] == v; - else + if (match) tablearg = v; } else if (is_ipv6) { + struct ether_addr *ea = NULL; uint32_t v = 0; + + if (args->eh) { + ea = (struct ether_addr*)((cmd->opcode == O_IP_DST_LOOKUP) ? + args->eh->ether_dhost : + args->eh->ether_shost); + } void *pkey = (cmd->opcode == O_IP_DST_LOOKUP) ? &args->f_id.dst_ip6: &args->f_id.src_ip6; - match = ipfw_lookup_table_extended(chain, + tblent = ipfw_lookup_table_extended(chain, cmd->arg1, pkey, &v, - IPFW_TABLE_CIDR); + IPFW_TABLE_CIDR, ea); + if (tblent == NULL) { + match = 0; + break; + } else + match = 1; if (cmdlen == F_INSN_SIZE(ipfw_insn_u32)) match = ((ipfw_insn_u32 *)cmd)->d[0] == v; if (match) @@ -2314,8 +2344,7 @@ do { \ break; case O_FORWARD_IP: - if (args->eh) /* not valid on layer2 pkts */ - break; + if (!args->eh) {/* not valid on layer2 pkts */ if (q == NULL || q->rule != f || dyn_dir == MATCH_FORWARD) { struct sockaddr_in *sa; @@ -2330,6 +2359,48 @@ do { \ args->next_hop = sa; } } + } else if (args->eh) { + struct m_tag *fwd_tag; + struct sockaddr_in *sa; + u_short sum; + + /* + * Checksum correct? (from ip_fastfwd.c) + */ + if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) + sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID); + else { + if (hlen == sizeof(struct ip)) + sum = in_cksum_hdr(ip); + else + sum = in_cksum(m, hlen); + } + if (sum) { + IPSTAT_INC(ips_badsum); + retval = IP_FW_DENY; + break; + } + + /* + * Remember that we have checked the IP header and found it valid. + */ + m->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID); + + sa = &(((ipfw_insn_sa *)cmd)->sa); + fwd_tag = m_tag_get(PACKET_TAG_IPFORWARD, + sizeof(struct sockaddr_in), M_NOWAIT); + if (fwd_tag == NULL) + retval = IP_FW_DENY; + else { + bcopy(sa, (fwd_tag+1), sizeof(struct sockaddr_in)); + m_tag_prepend(m, fwd_tag); + + if (in_localip(sa->sin_addr)) + m->m_flags |= M_FASTFWD_OURS; + m->m_flags |= M_IP_NEXTHOP; + } + } + retval = IP_FW_PASS; l = 0; /* exit inner loop */ done = 1; /* exit outer loop */ @@ -2337,8 +2408,7 @@ do { \ #ifdef INET6 case O_FORWARD_IP6: - if (args->eh) /* not valid on layer2 pkts */ - break; + if (!args->eh) {/* not valid on layer2 pkts */ if (q == NULL || q->rule != f || dyn_dir == MATCH_FORWARD) { struct sockaddr_in6 *sin6; @@ -2346,6 +2416,24 @@ do { \ sin6 = &(((ipfw_insn_sa6 *)cmd)->sa); args->next_hop6 = sin6; } + } else if (args->eh) { + struct m_tag *fwd_tag; + struct sockaddr_in6 *sin6; + + sin6 = &(((ipfw_insn_sa6 *)cmd)->sa); + fwd_tag = m_tag_get(PACKET_TAG_IPFORWARD, + sizeof(struct sockaddr_in6), M_NOWAIT); + if (fwd_tag == NULL) + retval = IP_FW_DENY; + else { + bcopy(sin6, (fwd_tag+1), sizeof(struct sockaddr_in6)); + m_tag_prepend(m, fwd_tag); + + if (in6_localip(&sin6->sin6_addr)) + m->m_flags |= M_FASTFWD_OURS; + m->m_flags |= M_IP6_NEXTHOP; + } + } retval = IP_FW_PASS; l = 0; /* exit inner loop */ done = 1; /* exit outer loop */ @@ -2417,7 +2505,7 @@ do { \ set_match(args, f_pos, chain); /* Check if this is 'global' nat rule */ if (cmd->arg1 == 0) { - retval = ipfw_nat_ptr(args, NULL, m); + retval = ipfw_nat_ptr(args, NULL, m, chain); break; } t = ((ipfw_insn_nat *)cmd)->nat; @@ -2432,7 +2520,7 @@ do { \ if (cmd->arg1 != IP_FW_TABLEARG) ((ipfw_insn_nat *)cmd)->nat = t; } - retval = ipfw_nat_ptr(args, t, m); + retval = ipfw_nat_ptr(args, t, m, chain); break; case O_REASS: { @@ -2502,6 +2590,10 @@ do { \ struct ip_fw *rule = chain->map[f_pos]; /* Update statistics */ IPFW_INC_RULE_COUNTER(rule, pktlen); + if (tblent != NULL) + ipfw_count_table_xentry_stats(tblent, pktlen); + if (tblent2 != NULL) + ipfw_count_table_entry_stats(tblent2, pktlen); } else { retval = IP_FW_DENY; printf("ipfw: ouch!, skip past end of rules, denying packet\n"); @@ -2536,7 +2628,9 @@ sysctl_ipfw_table_num(SYSCTL_HANDLER_ARGS) if ((error != 0) || (req->newptr == NULL)) return (error); - return (ipfw_resize_tables(&V_layer3_chain, ntables)); + for (int i = 1; i < IP_FW_MAXCTX; i++) + error += ipfw_resize_tables(V_ip_fw_contexts.chain[i], ntables); + return (error); } #endif /* @@ -2614,11 +2708,6 @@ ipfw_destroy(void) static int vnet_ipfw_init(const void *unused) { - int error; - struct ip_fw *rule = NULL; - struct ip_fw_chain *chain; - - chain = &V_layer3_chain; /* First set up some values that are compile time options */ V_autoinc_step = 100; /* bounded to 1..1000 in add_rule() */ @@ -2629,10 +2718,55 @@ vnet_ipfw_init(const void *unused) #ifdef IPFIREWALL_VERBOSE_LIMIT V_verbose_limit = IPFIREWALL_VERBOSE_LIMIT; #endif + + for (int i = 0; i < IP_FW_MAXCTX; i++) + V_ip_fw_contexts.chain[i] = NULL; + + IPFW_CTX_LOCK_INIT(); + + V_ip_fw_contexts.ifnet_arrival = EVENTHANDLER_REGISTER(ifnet_arrival_event, + ipfw_attach_ifnet_event, NULL, EVENTHANDLER_PRI_ANY); + + ipfw_dyn_init(); + + /* First set up some values that are compile time options */ + V_ipfw_vnet_ready = 1; /* Open for business */ + + /* + * Hook the sockopt handler and pfil hooks for ipv4 and ipv6. + * Even if the latter two fail we still keep the module alive + * because the sockopt and layer2 paths are still useful. + * ipfw[6]_hook return 0 on success, ENOENT on failure, + * so we can ignore the exact return value and just set a flag. + * + * Note that V_fw[6]_enable are manipulated by a SYSCTL_PROC so + * changes in the underlying (per-vnet) variables trigger + * immediate hook()/unhook() calls. + * In layer2 we have the same behaviour, except that V_ether_ipfw + * is checked on each packet because there are no pfil hooks. + */ + V_ip_fw_ctl_ptr = ipfw_ctl; + return ipfw_attach_hooks(1); +} + +int +ipfw_context_init(int index) +{ + struct ip_fw_chain *chain; + struct ip_fw *rule = NULL; + + if (index >= IP_FW_MAXCTX) + return (-1); + + TAILQ_INIT(&V_ip_fw_contexts.iflist[index]); + + chain = V_ip_fw_contexts.chain[index]; + + IPFW_LOCK_INIT(chain); + #ifdef IPFIREWALL_NAT LIST_INIT(&chain->nat); #endif - /* insert the default rule and create the initial map */ chain->n_rules = 1; chain->static_len = sizeof(struct ip_fw); @@ -2642,13 +2776,7 @@ vnet_ipfw_init(const void *unused) /* Set initial number of tables */ V_fw_tables_max = default_fw_tables; - error = ipfw_init_tables(chain); - if (error) { - printf("ipfw2: setting up tables failed\n"); - free(chain->map, M_IPFW); - free(rule, M_IPFW); - return (ENOSPC); - } + ipfw_init_tables(chain); /* fill and insert the default rule */ rule->act_ofs = 0; @@ -2660,28 +2788,13 @@ vnet_ipfw_init(const void *unused) chain->default_rule = chain->map[0] = rule; chain->id = rule->id = 1; - IPFW_LOCK_INIT(chain); - ipfw_dyn_init(chain); - - /* First set up some values that are compile time options */ - V_ipfw_vnet_ready = 1; /* Open for business */ + /* + * This can potentially be done on first dynamic rule + * being added to chain. + */ + resize_dynamic_table(chain, V_curr_dyn_buckets); - /* - * Hook the sockopt handler and pfil hooks for ipv4 and ipv6. - * Even if the latter two fail we still keep the module alive - * because the sockopt and layer2 paths are still useful. - * ipfw[6]_hook return 0 on success, ENOENT on failure, - * so we can ignore the exact return value and just set a flag. - * - * Note that V_fw[6]_enable are manipulated by a SYSCTL_PROC so - * changes in the underlying (per-vnet) variables trigger - * immediate hook()/unhook() calls. - * In layer2 we have the same behaviour, except that V_ether_ipfw - * is checked on each packet because there are no pfil hooks. - */ - V_ip_fw_ctl_ptr = ipfw_ctl; - error = ipfw_attach_hooks(1); - return (error); + return (0); } /* @@ -2690,11 +2803,9 @@ vnet_ipfw_init(const void *unused) static int vnet_ipfw_uninit(const void *unused) { - struct ip_fw *reap, *rule; - struct ip_fw_chain *chain = &V_layer3_chain; - int i; V_ipfw_vnet_ready = 0; /* tell new callers to go away */ + /* * disconnect from ipv4, ipv6, layer2 and sockopt. * Then grab, release and grab again the WLOCK so we make @@ -2702,14 +2813,51 @@ vnet_ipfw_uninit(const void *unused) */ (void)ipfw_attach_hooks(0 /* detach */); V_ip_fw_ctl_ptr = NULL; + + ipfw_dyn_uninit(0); /* run the callout_drain */ + + IPFW_CTX_WLOCK(); + EVENTHANDLER_DEREGISTER(ifnet_arrival_event, V_ip_fw_contexts.ifnet_arrival); + for (int i = 0; i < IP_FW_MAXCTX; i++) { + ipfw_context_uninit(i); + } + IPFW_CTX_WUNLOCK(); + IPFW_CTX_LOCK_DESTROY(); + + ipfw_dyn_uninit(1); /* free the remaining parts */ + + return (0); +} + +int +ipfw_context_uninit(int index) +{ + struct ip_fw_chain *chain; + struct ip_fw_ctx_iflist *ifl; + struct ip_fw *reap, *rule; + struct ifnet *ifp; + int i; + + if (index >= IP_FW_MAXCTX) + return (-1); + + chain = V_ip_fw_contexts.chain[index]; + if (chain == NULL) + return (0); + + while (!TAILQ_EMPTY(&V_ip_fw_contexts.iflist[index])) { + ifl = TAILQ_FIRST(&V_ip_fw_contexts.iflist[index]); + TAILQ_REMOVE(&V_ip_fw_contexts.iflist[index], ifl, entry); + ifp = ifunit(ifl->ifname); + if (ifp != NULL) + ifp->if_ispare[0] = 0; + free(ifl, M_IPFW); + } + IPFW_UH_WLOCK(chain); IPFW_UH_WUNLOCK(chain); IPFW_UH_WLOCK(chain); - IPFW_WLOCK(chain); - ipfw_dyn_uninit(0); /* run the callout_drain */ - IPFW_WUNLOCK(chain); - ipfw_destroy_tables(chain); reap = NULL; IPFW_WLOCK(chain); @@ -2725,8 +2873,10 @@ vnet_ipfw_uninit(const void *unused) if (reap != NULL) ipfw_reap_rules(reap); IPFW_LOCK_DESTROY(chain); - ipfw_dyn_uninit(1); /* free the remaining parts */ - return 0; + + free(chain, M_IPFW); + + return (0); } /* diff --git a/sys/netpfil/ipfw/ip_fw_dynamic.c b/sys/netpfil/ipfw/ip_fw_dynamic.c index 81c1b2c..b6cfa62 100644 --- a/sys/netpfil/ipfw/ip_fw_dynamic.c +++ b/sys/netpfil/ipfw/ip_fw_dynamic.c @@ -121,11 +121,9 @@ struct ipfw_dyn_bucket { */ static VNET_DEFINE(struct ipfw_dyn_bucket *, ipfw_dyn_v); static VNET_DEFINE(u_int32_t, dyn_buckets_max); -static VNET_DEFINE(u_int32_t, curr_dyn_buckets); static VNET_DEFINE(struct callout, ipfw_timeout); #define V_ipfw_dyn_v VNET(ipfw_dyn_v) #define V_dyn_buckets_max VNET(dyn_buckets_max) -#define V_curr_dyn_buckets VNET(curr_dyn_buckets) #define V_ipfw_timeout VNET(ipfw_timeout) static VNET_DEFINE(uma_zone_t, ipfw_dyn_rule_zone); @@ -181,6 +179,8 @@ static VNET_DEFINE(u_int32_t, dyn_max); /* max # of dynamic rules */ static int last_log; /* Log ratelimiting */ +VNET_DEFINE(u_int32_t, curr_dyn_buckets); + static void ipfw_dyn_tick(void *vnetx); static void check_dyn_rules(struct ip_fw_chain *, struct ip_fw *, int, int, int); @@ -472,7 +472,7 @@ ipfw_dyn_unlock(ipfw_dyn_rule *q) IPFW_BUCK_UNLOCK(q->bucket); } -static int +int resize_dynamic_table(struct ip_fw_chain *chain, int nbuckets) { int i, k, nbuckets_old; @@ -975,7 +975,6 @@ ipfw_dyn_send_ka(struct mbuf **mtailp, ipfw_dyn_rule *q) static void ipfw_dyn_tick(void * vnetx) { - struct ip_fw_chain *chain; int check_ka = 0; #ifdef VIMAGE struct vnet *vp = vnetx; @@ -983,7 +982,6 @@ ipfw_dyn_tick(void * vnetx) CURVNET_SET(vp); - chain = &V_layer3_chain; /* Run keepalive checks every keepalive_period iff ka is enabled */ if ((V_dyn_keepalive_last + V_dyn_keepalive_period <= time_uptime) && @@ -992,7 +990,12 @@ ipfw_dyn_tick(void * vnetx) check_ka = 1; } - check_dyn_rules(chain, NULL, RESVD_SET, check_ka, 1); + IPFW_CTX_RLOCK(); + for (int i = 1; i < IP_FW_MAXCTX; i++) { + if (V_ip_fw_contexts.chain[i] != NULL) + check_dyn_rules(V_ip_fw_contexts.chain[i], NULL, RESVD_SET, check_ka, 1); + } + IPFW_CTX_RUNLOCK(); callout_reset_on(&V_ipfw_timeout, hz, ipfw_dyn_tick, vnetx, 0); @@ -1308,7 +1311,7 @@ ipfw_expire_dyn_rules(struct ip_fw_chain *chain, struct ip_fw *rule, int set) } void -ipfw_dyn_init(struct ip_fw_chain *chain) +ipfw_dyn_init() { V_ipfw_dyn_v = NULL; @@ -1337,12 +1340,6 @@ ipfw_dyn_init(struct ip_fw_chain *chain) uma_zone_set_max(V_ipfw_dyn_rule_zone, V_dyn_max); callout_init(&V_ipfw_timeout, CALLOUT_MPSAFE); - - /* - * This can potentially be done on first dynamic rule - * being added to chain. - */ - resize_dynamic_table(chain, V_curr_dyn_buckets); } void diff --git a/sys/netpfil/ipfw/ip_fw_nat.c b/sys/netpfil/ipfw/ip_fw_nat.c index 0fb4534..627603d 100644 --- a/sys/netpfil/ipfw/ip_fw_nat.c +++ b/sys/netpfil/ipfw/ip_fw_nat.c @@ -64,26 +64,33 @@ ifaddr_change(void *arg __unused, struct ifnet *ifp) KASSERT(curvnet == ifp->if_vnet, ("curvnet(%p) differs from iface vnet(%p)", curvnet, ifp->if_vnet)); - chain = &V_layer3_chain; - IPFW_WLOCK(chain); - /* Check every nat entry... */ - LIST_FOREACH(ptr, &chain->nat, _next) { - /* ...using nic 'ifp->if_xname' as dynamic alias address. */ - if (strncmp(ptr->if_name, ifp->if_xname, IF_NAMESIZE) != 0) + + IPFW_CTX_RLOCK(); + for (int i = 1; i < IP_FW_MAXCTX; i++) { + chain = V_ip_fw_contexts.chain[i]; + if (chain == NULL) continue; - if_addr_rlock(ifp); - TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { - if (ifa->ifa_addr == NULL) - continue; - if (ifa->ifa_addr->sa_family != AF_INET) + IPFW_WLOCK(chain); + /* Check every nat entry... */ + LIST_FOREACH(ptr, &chain->nat, _next) { + /* ...using nic 'ifp->if_xname' as dynamic alias address. */ + if (strncmp(ptr->if_name, ifp->if_xname, IF_NAMESIZE) != 0) continue; - ptr->ip = ((struct sockaddr_in *) - (ifa->ifa_addr))->sin_addr; - LibAliasSetAddress(ptr->lib, ptr->ip); + if_addr_rlock(ifp); + TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { + if (ifa->ifa_addr == NULL) + continue; + if (ifa->ifa_addr->sa_family != AF_INET) + continue; + ptr->ip = ((struct sockaddr_in *) + (ifa->ifa_addr))->sin_addr; + LibAliasSetAddress(ptr->lib, ptr->ip); + } + if_addr_runlock(ifp); } - if_addr_runlock(ifp); + IPFW_WUNLOCK(chain); } - IPFW_WUNLOCK(chain); + IPFW_CTX_RUNLOCK(); } /* @@ -206,18 +213,18 @@ add_redir_spool_cfg(char *buf, struct cfg_nat *ptr) /* * ipfw_nat - perform mbuf header translation. * - * Note V_layer3_chain has to be locked while calling ipfw_nat() in + * Note *chain has to be locked while calling ipfw_nat() in * 'global' operation mode (t == NULL). * */ static int -ipfw_nat(struct ip_fw_args *args, struct cfg_nat *t, struct mbuf *m) +ipfw_nat(struct ip_fw_args *args, struct cfg_nat *t, struct mbuf *m, + struct ip_fw_chain *chain) { struct mbuf *mcl; struct ip *ip; /* XXX - libalias duct tape */ int ldt, retval, found; - struct ip_fw_chain *chain; char *c; ldt = 0; @@ -276,7 +283,6 @@ ipfw_nat(struct ip_fw_args *args, struct cfg_nat *t, struct mbuf *m) } found = 0; - chain = &V_layer3_chain; IPFW_RLOCK_ASSERT(chain); /* Check every nat entry... */ LIST_FOREACH(t, &chain->nat, _next) { @@ -391,11 +397,10 @@ lookup_nat(struct nat_list *l, int nat_id) } static int -ipfw_nat_cfg(struct sockopt *sopt) +ipfw_nat_cfg(struct sockopt *sopt, struct ip_fw_chain *chain) { struct cfg_nat *cfg, *ptr; char *buf; - struct ip_fw_chain *chain = &V_layer3_chain; size_t len; int gencnt, error = 0; @@ -468,10 +473,9 @@ out: } static int -ipfw_nat_del(struct sockopt *sopt) +ipfw_nat_del(struct sockopt *sopt, struct ip_fw_chain *chain) { struct cfg_nat *ptr; - struct ip_fw_chain *chain = &V_layer3_chain; int i; sooptcopyin(sopt, &i, sizeof i, sizeof i); @@ -492,9 +496,8 @@ ipfw_nat_del(struct sockopt *sopt) } static int -ipfw_nat_get_cfg(struct sockopt *sopt) +ipfw_nat_get_cfg(struct sockopt *sopt, struct ip_fw_chain *chain) { - struct ip_fw_chain *chain = &V_layer3_chain; struct cfg_nat *n; struct cfg_redir *r; struct cfg_spool *s; @@ -552,14 +555,11 @@ retry: } static int -ipfw_nat_get_log(struct sockopt *sopt) +ipfw_nat_get_log(struct sockopt *sopt, struct ip_fw_chain *chain) { uint8_t *data; struct cfg_nat *ptr; int i, size; - struct ip_fw_chain *chain; - - chain = &V_layer3_chain; IPFW_RLOCK(chain); /* one pass to count, one to copy the data */ @@ -604,17 +604,22 @@ vnet_ipfw_nat_uninit(const void *arg __unused) struct cfg_nat *ptr, *ptr_temp; struct ip_fw_chain *chain; - chain = &V_layer3_chain; - IPFW_WLOCK(chain); - LIST_FOREACH_SAFE(ptr, &chain->nat, _next, ptr_temp) { - LIST_REMOVE(ptr, _next); - del_redir_spool_cfg(ptr, &ptr->redir_chain); - LibAliasUninit(ptr->lib); - free(ptr, M_IPFW); + IPFW_CTX_RLOCK(); + for (int i = 1; i < IP_FW_MAXCTX; i++) { + chain = V_ip_fw_contexts.chain[i]; + IPFW_WLOCK(chain); + LIST_FOREACH_SAFE(ptr, &chain->nat, _next, ptr_temp) { + LIST_REMOVE(ptr, _next); + del_redir_spool_cfg(ptr, &ptr->redir_chain); + LibAliasUninit(ptr->lib); + free(ptr, M_IPFW); + } + flush_nat_ptrs(chain, -1 /* flush all */); + V_ipfw_nat_ready = 0; + IPFW_WUNLOCK(chain); } - flush_nat_ptrs(chain, -1 /* flush all */); - V_ipfw_nat_ready = 0; - IPFW_WUNLOCK(chain); + IPFW_CTX_RUNLOCK(); + return (0); } diff --git a/sys/netpfil/ipfw/ip_fw_pfil.c b/sys/netpfil/ipfw/ip_fw_pfil.c index 2bcd1dd..bf225b8 100644 --- a/sys/netpfil/ipfw/ip_fw_pfil.c +++ b/sys/netpfil/ipfw/ip_fw_pfil.c @@ -143,8 +143,9 @@ again: } args.m = *m0; - args.oif = dir == DIR_OUT ? ifp : NULL; + args.oif = ifp; args.inp = inp; + args.dir = dir; ipfw = ipfw_chk(&args); *m0 = args.m; @@ -314,9 +315,8 @@ ipfw_check_frame(void *arg, struct mbuf **m0, struct ifnet *dst, int dir, /* XXX can we free it after use ? */ mtag->m_tag_id = PACKET_TAG_NONE; r = (struct ipfw_rule_ref *)(mtag + 1); - if (r->info & IPFW_ONEPASS) - return (0); - args.rule = *r; + m_tag_delete(*m0, mtag); + return (0); } /* I need some amt of data to be contiguous */ @@ -333,12 +333,15 @@ ipfw_check_frame(void *arg, struct mbuf **m0, struct ifnet *dst, int dir, save_eh = *eh; /* save copy for restore below */ m_adj(m, ETHER_HDR_LEN); /* strip ethernet header */ + dir = dir == PFIL_IN ? DIR_IN : DIR_OUT; + args.m = m; /* the packet we are looking at */ - args.oif = dir == PFIL_OUT ? dst: NULL; /* destination, if any */ + args.oif = dst; /* destination, if any */ args.next_hop = NULL; /* we do not support forward yet */ args.next_hop6 = NULL; /* we do not support forward yet */ args.eh = &save_eh; /* MAC header for bridged/MAC packets */ args.inp = NULL; /* used by ipfw uid/gid/jail rules */ + args.dir = dir; /* pfSense addition */ i = ipfw_chk(&args); m = args.m; if (m != NULL) { @@ -369,13 +372,12 @@ ipfw_check_frame(void *arg, struct mbuf **m0, struct ifnet *dst, int dir, case IP_FW_DUMMYNET: ret = EACCES; - int dir; if (ip_dn_io_ptr == NULL) break; /* i.e. drop */ *m0 = NULL; - dir = PROTO_LAYER2 | (dst ? DIR_OUT : DIR_IN); + dir = PROTO_LAYER2 | dir; ip_dn_io_ptr(&m, dir, &args); return 0; @@ -499,7 +501,11 @@ ipfw_hook(int onoff, int pf) hook_func = (pf == AF_LINK) ? ipfw_check_frame : ipfw_check_packet; - (void) (onoff ? pfil_add_hook : pfil_remove_hook) + if (onoff) + (void) pfil_add_named_hook + (hook_func, NULL, "ipfw", PFIL_IN | PFIL_OUT | PFIL_WAITOK, pfh); + else + (void) pfil_remove_hook (hook_func, NULL, PFIL_IN | PFIL_OUT | PFIL_WAITOK, pfh); return 0; diff --git a/sys/netpfil/ipfw/ip_fw_private.h b/sys/netpfil/ipfw/ip_fw_private.h index e4a2f31..4f4cf93 100644 --- a/sys/netpfil/ipfw/ip_fw_private.h +++ b/sys/netpfil/ipfw/ip_fw_private.h @@ -101,6 +101,7 @@ struct ip_fw_args { struct ipfw_flow_id f_id; /* grabbed from IP header */ //uint32_t cookie; /* a cookie depending on rule action */ + uint32_t dir; /* direction */ struct inpcb *inp; struct _ip6dn_args dummypar; /* dummynet->ip6_output */ @@ -170,6 +171,9 @@ enum { /* result for matching dynamic rules */ MATCH_UNKNOWN, }; +VNET_DECLARE(u_int32_t, curr_dyn_buckets); +#define V_curr_dyn_buckets VNET(curr_dyn_buckets) + /* * The lock for dynamic rules is only used once outside the file, * and only to release the result of lookup_dyn_rule(). @@ -178,6 +182,7 @@ enum { /* result for matching dynamic rules */ struct ip_fw_chain; void ipfw_expire_dyn_rules(struct ip_fw_chain *, struct ip_fw *, int); void ipfw_dyn_unlock(ipfw_dyn_rule *q); +int resize_dynamic_table(struct ip_fw_chain *, int); struct tcphdr; struct mbuf *ipfw_send_pkt(struct mbuf *, struct ipfw_flow_id *, @@ -189,7 +194,7 @@ ipfw_dyn_rule *ipfw_lookup_dyn_rule(struct ipfw_flow_id *pkt, void ipfw_remove_dyn_children(struct ip_fw *rule); void ipfw_get_dynamic(struct ip_fw_chain *chain, char **bp, const char *ep); -void ipfw_dyn_init(struct ip_fw_chain *); /* per-vnet initialization */ +void ipfw_dyn_init(void); /* per-vnet initialization */ void ipfw_dyn_uninit(int); /* per-vnet deinitialization */ int ipfw_dyn_len(void); @@ -200,9 +205,6 @@ VNET_DECLARE(int, fw_one_pass); VNET_DECLARE(int, fw_verbose); #define V_fw_verbose VNET(fw_verbose) -VNET_DECLARE(struct ip_fw_chain, layer3_chain); -#define V_layer3_chain VNET(layer3_chain) - VNET_DECLARE(u_int32_t, set_disable); #define V_set_disable VNET(set_disable) @@ -236,6 +238,33 @@ struct ip_fw_chain { #endif }; +struct ip_fw_ctx_iflist { + TAILQ_ENTRY(ip_fw_ctx_iflist) entry; + char ifname[IFNAMSIZ]; +}; + +#define IP_FW_MAXCTX 4096 +struct ip_fw_contextes { + struct ip_fw_chain *chain[IP_FW_MAXCTX]; /* Arrays of contextes */ + TAILQ_HEAD(, ip_fw_ctx_iflist) iflist[IP_FW_MAXCTX]; + struct rwlock rwctx; + eventhandler_tag ifnet_arrival; +}; + +VNET_DECLARE(struct ip_fw_contextes, ip_fw_contexts); +#define V_ip_fw_contexts VNET(ip_fw_contexts) + +#define IPFW_CTX_LOCK_INIT() rw_init(&V_ip_fw_contexts.rwctx, "IPFW context") +#define IPFW_CTX_LOCK_DESTROY() rw_destroy(&V_ip_fw_contexts.rwctx) +#define IPFW_CTX_WLOCK() rw_wlock(&V_ip_fw_contexts.rwctx) +#define IPFW_CTX_WUNLOCK() rw_wunlock(&V_ip_fw_contexts.rwctx) +#define IPFW_CTX_RLOCK() rw_rlock(&V_ip_fw_contexts.rwctx) +#define IPFW_CTX_RUNLOCK() rw_runlock(&V_ip_fw_contexts.rwctx) + +void ipfw_attach_ifnet_event(void *, struct ifnet *); +int ipfw_context_init(int); +int ipfw_context_uninit(int); + struct sockopt; /* used by tcp_var.h */ /* Macro for working with various counters */ @@ -303,16 +332,21 @@ int ipfw_chk(struct ip_fw_args *args); void ipfw_reap_rules(struct ip_fw *head); /* In ip_fw_table.c */ +struct ether_addr; struct radix_node; -int ipfw_lookup_table(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr, - uint32_t *val); -int ipfw_lookup_table_extended(struct ip_fw_chain *ch, uint16_t tbl, void *paddr, - uint32_t *val, int type); +void *ipfw_lookup_table(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr, + uint32_t *val, struct ether_addr *); +void *ipfw_lookup_table_extended(struct ip_fw_chain *ch, uint16_t tbl, void *paddr, + uint32_t *val, int type, struct ether_addr *); +void ipfw_count_table_entry_stats(void *, int); +void ipfw_count_table_xentry_stats(void *, int); +int ipfw_zero_table_xentry_stats(struct ip_fw_chain *, ipfw_table_xentry *); +int ipfw_lookup_table_xentry(struct ip_fw_chain *, ipfw_table_xentry *); int ipfw_init_tables(struct ip_fw_chain *ch); void ipfw_destroy_tables(struct ip_fw_chain *ch); int ipfw_flush_table(struct ip_fw_chain *ch, uint16_t tbl); int ipfw_add_table_entry(struct ip_fw_chain *ch, uint16_t tbl, void *paddr, - uint8_t plen, uint8_t mlen, uint8_t type, uint32_t value); + uint8_t plen, uint8_t mlen, uint8_t type, u_int64_t mac_addr, uint32_t value); int ipfw_del_table_entry(struct ip_fw_chain *ch, uint16_t tbl, void *paddr, uint8_t plen, uint8_t mlen, uint8_t type); int ipfw_count_table(struct ip_fw_chain *ch, uint32_t tbl, uint32_t *cnt); @@ -326,8 +360,9 @@ int ipfw_resize_tables(struct ip_fw_chain *ch, unsigned int ntables); extern struct cfg_nat *(*lookup_nat_ptr)(struct nat_list *, int); -typedef int ipfw_nat_t(struct ip_fw_args *, struct cfg_nat *, struct mbuf *); -typedef int ipfw_nat_cfg_t(struct sockopt *); +typedef int ipfw_nat_t(struct ip_fw_args *, struct cfg_nat *, struct mbuf *, + struct ip_fw_chain *); +typedef int ipfw_nat_cfg_t(struct sockopt *, struct ip_fw_chain *); VNET_DECLARE(int, ipfw_nat_ready); #define V_ipfw_nat_ready VNET(ipfw_nat_ready) diff --git a/sys/netpfil/ipfw/ip_fw_sockopt.c b/sys/netpfil/ipfw/ip_fw_sockopt.c index 3c342f7..2776e4c 100644 --- a/sys/netpfil/ipfw/ip_fw_sockopt.c +++ b/sys/netpfil/ipfw/ip_fw_sockopt.c @@ -736,8 +736,8 @@ check_ipfw_struct(struct ip_fw *rule, int size) if (!IPFW_NAT_LOADED) return EINVAL; if (cmdlen != F_INSN_SIZE(ipfw_insn_nat)) - goto bad_size; - goto check_action; + goto bad_size; + goto check_action; case O_FORWARD_MAC: /* XXX not implemented yet */ case O_CHECK_STATE: case O_COUNT: @@ -943,12 +943,15 @@ ipfw_ctl(struct sockopt *sopt) #define RULE_MAXSIZE (256*sizeof(u_int32_t)) int error; size_t size, len, valsize; + struct ifnet *ifp; struct ip_fw *buf, *rule; - struct ip_fw_chain *chain; + static struct ip_fw_chain *chain; + struct ip_fw_ctx_iflist *tmpifl, *tmpifl2 = NULL; + ip_fw3_opheader *op3 = NULL; u_int32_t rulenum[2]; uint32_t opt; char xbuf[128]; - ip_fw3_opheader *op3 = NULL; + char *ifname; error = priv_check(sopt->sopt_td, PRIV_NETINET_IPFW); if (error) @@ -965,7 +968,6 @@ ipfw_ctl(struct sockopt *sopt) return (error); } - chain = &V_layer3_chain; error = 0; /* Save original valsize before it is altered via sooptcopyin() */ @@ -980,9 +982,238 @@ ipfw_ctl(struct sockopt *sopt) return (error); op3 = (ip_fw3_opheader *)xbuf; opt = op3->opcode; + + if (op3->ctxid >= IP_FW_MAXCTX) + return (EINVAL); + + if (opt != IP_FW_CTX_GET && opt != IP_FW_CTX_ADD) { + if (op3->ctxid == 0) + return (ENOENT); + + IPFW_CTX_RLOCK(); + chain = V_ip_fw_contexts.chain[op3->ctxid]; + IPFW_CTX_RUNLOCK(); + if (chain == NULL) + return (ENOENT); + } } + /* Verification needed to avoid problems */ switch (opt) { + case IP_FW_CTX_GET: + case IP_FW_CTX_ADD: + case IP_FW_CTX_DEL: + break; + default: + if (chain == NULL) + return (EINVAL); + /* NOTREACHED */ + } + + switch (opt) { + case IP_FW_CTX_ADD: + IPFW_CTX_WLOCK(); + if (V_ip_fw_contexts.chain[op3->ctxid] != NULL) { + IPFW_CTX_WUNLOCK(); + return (EEXIST); + } + + chain = malloc(sizeof(struct ip_fw_chain), M_IPFW, M_WAITOK | M_ZERO); + TAILQ_INIT(&V_ip_fw_contexts.iflist[op3->ctxid]); + V_ip_fw_contexts.chain[op3->ctxid] = chain; + ipfw_context_init(op3->ctxid); /* XXX: error checking */ + IPFW_CTX_WUNLOCK(); + break; + + case IP_FW_CTX_DEL: + IPFW_CTX_WLOCK(); + if (V_ip_fw_contexts.chain[op3->ctxid] == NULL) { + IPFW_CTX_WUNLOCK(); + return (ENOENT); + } + + ipfw_context_uninit(op3->ctxid); + V_ip_fw_contexts.chain[op3->ctxid] = NULL; + IPFW_CTX_WUNLOCK(); + break; + + case IP_FW_CTX_GET: + { + int i, n, len = 0, want; + char *bufout, *tmpbuf; + + sopt->sopt_valsize = valsize; + + IPFW_CTX_RLOCK(); + for (i = 1; i < IP_FW_MAXCTX; i++) { + if (op3->ctxid > 0 && op3->ctxid != i) + continue; + if (op3->ctxid > 0 && op3->ctxid < i) + break; + + if (V_ip_fw_contexts.chain[i] == NULL) + continue; + + /* Calculate number of bytes for the integer */ + n = i; + while (n > 0) { + n /= 10; + len++; + } + TAILQ_FOREACH(tmpifl, &V_ip_fw_contexts.iflist[i], entry) { + len += strlen(tmpifl->ifname) + 1; + } + len += 3; // newline, :, space + } + IPFW_CTX_RUNLOCK(); + + if (len > sopt->sopt_valsize) { + sopt->sopt_valsize = len; + break; + } + + bufout = malloc(len, M_TEMP, M_WAITOK | M_ZERO); + if (bufout == NULL) + break; + + /* Record our size for later checks */ + want = len; + len = 0; + IPFW_CTX_RLOCK(); + /* Recalculate length to detect if smth changed */ + for (i = 1; i < IP_FW_MAXCTX; i++) { + if (op3->ctxid > 0 && op3->ctxid != i) + continue; + if (op3->ctxid > 0 && op3->ctxid < i) + break; + + if (V_ip_fw_contexts.chain[i] == NULL) + continue; + + /* Calculate number of bytes for the integer */ + n = i; + while (n > 0) { + n /= 10; + len++; + } + TAILQ_FOREACH(tmpifl, &V_ip_fw_contexts.iflist[i], entry) { + len += strlen(tmpifl->ifname) + 1; + } + len += 3; // newline, :, space + } + + if (want >= len) { + tmpbuf = bufout; + for (i = 1; i < IP_FW_MAXCTX; i++) { + if (op3->ctxid > 0 && op3->ctxid != i) + continue; + if (op3->ctxid > 0 && op3->ctxid < i) + break; + + if (V_ip_fw_contexts.chain[i] == NULL) + continue; + + sprintf(tmpbuf, "%d: ", i); + tmpbuf += strlen(tmpbuf); + TAILQ_FOREACH(tmpifl, &V_ip_fw_contexts.iflist[i], entry) { + sprintf(tmpbuf, "%s,", tmpifl->ifname); + tmpbuf += strlen(tmpifl->ifname) + 1; + } + sprintf(tmpbuf, "\n"); + tmpbuf++; + } + } + IPFW_CTX_RUNLOCK(); + + if (want >= len) + error = sooptcopyout(sopt, bufout, len); + else + len = 0; + free(bufout, M_TEMP); + } + break; + + case IP_FW_CTX_SET: + /* XXX: Maybe not use this option at all? */ + IPFW_CTX_RLOCK(); + if (V_ip_fw_contexts.chain[op3->ctxid] == NULL) + error = ENOENT; + else + chain = V_ip_fw_contexts.chain[op3->ctxid]; + IPFW_CTX_RUNLOCK(); + break; + + case IP_FW_CTX_ADDMEMBER: + { + int i; + + ifname = (char *)(op3 + 1); + ifp = ifunit(ifname); + if (ifp == NULL) + return (ENOENT); + + tmpifl = malloc(sizeof(*tmpifl), M_IPFW, M_WAITOK | M_ZERO); + + IPFW_CTX_WLOCK(); + if (V_ip_fw_contexts.chain[op3->ctxid] == NULL) { + IPFW_CTX_WUNLOCK(); + free(tmpifl, M_IPFW); + return (ENOENT); + } + + for (i = 1; i < IP_FW_MAXCTX; i++) { + if (V_ip_fw_contexts.chain[i] == NULL) + continue; + + TAILQ_FOREACH(tmpifl2, &V_ip_fw_contexts.iflist[i], entry) { + if (strlen(tmpifl2->ifname) != strlen(ifname)) + continue; + if (!strcmp(tmpifl2->ifname, ifname)) + goto ctxifacefound; + } + } +ctxifacefound: + if (tmpifl2 != NULL) { + IPFW_CTX_WUNLOCK(); + free(tmpifl, M_IPFW); + return (EEXIST); + } + + strlcpy(tmpifl->ifname, ifname, IFNAMSIZ); + TAILQ_INSERT_HEAD(&V_ip_fw_contexts.iflist[op3->ctxid], tmpifl, entry); + ifp->if_ispare[0] = op3->ctxid; + IPFW_CTX_WUNLOCK(); + } + break; + + case IP_FW_CTX_DELMEMBER: + IPFW_CTX_WLOCK(); + if (V_ip_fw_contexts.chain[op3->ctxid] == NULL) { + IPFW_CTX_WUNLOCK(); + return (ENOENT); + } + + ifname = (char *)(op3 + 1); + TAILQ_FOREACH(tmpifl2, &V_ip_fw_contexts.iflist[op3->ctxid], entry) { + if (strlen(tmpifl2->ifname) != strlen(ifname)) + continue; + if (!strcmp(tmpifl2->ifname, ifname)) + break; + } + if (tmpifl2 == NULL) { + IPFW_CTX_WUNLOCK(); + return (ENOENT); + } + + TAILQ_REMOVE(&V_ip_fw_contexts.iflist[op3->ctxid], tmpifl2, entry); + IPFW_CTX_WUNLOCK(); + free(tmpifl2, M_IPFW); + + ifp = ifunit(ifname); + if (ifp != NULL) + ifp->if_ispare[0] = 0; + break; + case IP_FW_GET: /* * pass up a copy of the current rules. Static rules @@ -1124,7 +1355,7 @@ ipfw_ctl(struct sockopt *sopt) break; error = ipfw_add_table_entry(chain, ent.tbl, &ent.addr, sizeof(ent.addr), ent.masklen, - IPFW_TABLE_CIDR, ent.value); + IPFW_TABLE_CIDR, ent.mac_addr, ent.value); } break; @@ -1157,12 +1388,12 @@ ipfw_ctl(struct sockopt *sopt) error = EINVAL; break; } - + len = xent->len - offsetof(ipfw_table_xentry, k); error = (opt == IP_FW_TABLE_XADD) ? ipfw_add_table_entry(chain, xent->tbl, &xent->k, - len, xent->masklen, xent->type, xent->value) : + len, xent->masklen, xent->type, xent->mac_addr, xent->value) : ipfw_del_table_entry(chain, xent->tbl, &xent->k, len, xent->masklen, xent->type); } @@ -1245,6 +1476,54 @@ ipfw_ctl(struct sockopt *sopt) } break; + case IP_FW_TABLE_XZEROENTRY: /* IP_FW3 */ + { + ipfw_table_xentry *xent = (ipfw_table_xentry *)(op3 + 1); + + /* Check minimum header size */ + if (IP_FW3_OPLENGTH(sopt) < offsetof(ipfw_table_xentry, k)) { + error = EINVAL; + break; + } + + /* Check if len field is valid */ + if (xent->len > sizeof(ipfw_table_xentry)) { + error = EINVAL; + break; + } + + error = ipfw_zero_table_xentry_stats(chain, xent); + if (!error) { + xent->timestamp += boottime.tv_sec; + error = sooptcopyout(sopt, xent, sizeof(*xent)); + } + } + break; + + case IP_FW_TABLE_XLISTENTRY: /* IP_FW3 */ + { + ipfw_table_xentry *xent = (ipfw_table_xentry *)(op3 + 1); + + /* Check minimum header size */ + if (IP_FW3_OPLENGTH(sopt) < offsetof(ipfw_table_xentry, k)) { + error = EINVAL; + break; + } + + /* Check if len field is valid */ + if (xent->len > sizeof(ipfw_table_xentry)) { + error = EINVAL; + break; + } + + error = ipfw_lookup_table_xentry(chain, xent); + if (!error) { + xent->timestamp += boottime.tv_sec; + error = sooptcopyout(sopt, xent, sizeof(*xent)); + } + } + break; + case IP_FW_TABLE_XLIST: /* IP_FW3 */ { ipfw_xtable *tbl; @@ -1284,7 +1563,7 @@ ipfw_ctl(struct sockopt *sopt) /*--- NAT operations are protected by the IPFW_LOCK ---*/ case IP_FW_NAT_CFG: if (IPFW_NAT_LOADED) - error = ipfw_nat_cfg_ptr(sopt); + error = ipfw_nat_cfg_ptr(sopt, chain); else { printf("IP_FW_NAT_CFG: %s\n", "ipfw_nat not present, please load it"); @@ -1294,7 +1573,7 @@ ipfw_ctl(struct sockopt *sopt) case IP_FW_NAT_DEL: if (IPFW_NAT_LOADED) - error = ipfw_nat_del_ptr(sopt); + error = ipfw_nat_del_ptr(sopt, chain); else { printf("IP_FW_NAT_DEL: %s\n", "ipfw_nat not present, please load it"); @@ -1304,7 +1583,7 @@ ipfw_ctl(struct sockopt *sopt) case IP_FW_NAT_GET_CONFIG: if (IPFW_NAT_LOADED) - error = ipfw_nat_get_cfg_ptr(sopt); + error = ipfw_nat_get_cfg_ptr(sopt, chain); else { printf("IP_FW_NAT_GET_CFG: %s\n", "ipfw_nat not present, please load it"); @@ -1314,7 +1593,7 @@ ipfw_ctl(struct sockopt *sopt) case IP_FW_NAT_GET_LOG: if (IPFW_NAT_LOADED) - error = ipfw_nat_get_log_ptr(sopt); + error = ipfw_nat_get_log_ptr(sopt, chain); else { printf("IP_FW_NAT_GET_LOG: %s\n", "ipfw_nat not present, please load it"); @@ -1331,6 +1610,33 @@ ipfw_ctl(struct sockopt *sopt) #undef RULE_MAXSIZE } +void +ipfw_attach_ifnet_event(void *arg __unused, struct ifnet *ifp) +{ + struct ip_fw_ctx_iflist *tmpifl; + + CURVNET_SET(ifp->if_vnet); + + IPFW_CTX_RLOCK(); + for (int i = 1; i < IP_FW_MAXCTX; i++) { + if (V_ip_fw_contexts.chain[i] == NULL) + continue; + TAILQ_FOREACH(tmpifl, &V_ip_fw_contexts.iflist[i], entry) { + if (strlen(tmpifl->ifname) != strlen(ifp->if_xname)) + continue; + if (!strcmp(tmpifl->ifname, ifp->if_xname)) { + printf("Restoring context for interface %s to %d\n", ifp->if_xname, i); + ifp->if_ispare[0] = i; + goto ifctxdone; + break; + } + } + } +ifctxdone: + IPFW_CTX_RUNLOCK(); + + CURVNET_RESTORE(); +} #define RULE_MAXSIZE (256*sizeof(u_int32_t)) diff --git a/sys/netpfil/ipfw/ip_fw_table.c b/sys/netpfil/ipfw/ip_fw_table.c index 760a10c..689596f 100644 --- a/sys/netpfil/ipfw/ip_fw_table.c +++ b/sys/netpfil/ipfw/ip_fw_table.c @@ -59,6 +59,7 @@ __FBSDID("$FreeBSD$"); #include <net/route.h> #include <net/vnet.h> +#include <net/ethernet.h> #include <netinet/in.h> #include <netinet/ip_var.h> /* struct ipfw_rule_ref */ #include <netinet/ip_fw.h> @@ -74,7 +75,11 @@ static MALLOC_DEFINE(M_IPFW_TBL, "ipfw_tbl", "IpFw tables"); struct table_entry { struct radix_node rn[2]; struct sockaddr_in addr, mask; + u_int64_t mac_addr; u_int32_t value; + u_int32_t timestamp; + u_int64_t bytes; + u_int64_t packets; }; struct xaddr_iface { @@ -97,7 +102,11 @@ struct table_xentry { #endif struct xaddr_iface ifmask; } m; + u_int64_t mac_addr; u_int32_t value; + u_int32_t timestamp; + u_int64_t bytes; + u_int64_t packets; }; /* @@ -137,7 +146,7 @@ ipv6_writemask(struct in6_addr *addr6, uint8_t mask) int ipfw_add_table_entry(struct ip_fw_chain *ch, uint16_t tbl, void *paddr, - uint8_t plen, uint8_t mlen, uint8_t type, uint32_t value) + uint8_t plen, uint8_t mlen, uint8_t type, u_int64_t mac_addr, uint32_t value) { struct radix_node_head *rnh, **rnh_ptr; struct table_entry *ent; @@ -161,6 +170,7 @@ ipfw_add_table_entry(struct ip_fw_chain *ch, uint16_t tbl, void *paddr, return (EINVAL); ent = malloc(sizeof(*ent), M_IPFW_TBL, M_WAITOK | M_ZERO); ent->value = value; + ent->mac_addr = mac_addr; /* Set 'total' structure length */ KEY_LEN(ent->addr) = KEY_LEN_INET; KEY_LEN(ent->mask) = KEY_LEN_INET; @@ -182,6 +192,7 @@ ipfw_add_table_entry(struct ip_fw_chain *ch, uint16_t tbl, void *paddr, return (EINVAL); xent = malloc(sizeof(*xent), M_IPFW_TBL, M_WAITOK | M_ZERO); xent->value = value; + xent->mac_addr = mac_addr; /* Set 'total' structure length */ KEY_LEN(xent->a.addr6) = KEY_LEN_INET6; KEY_LEN(xent->m.mask6) = KEY_LEN_INET6; @@ -281,6 +292,28 @@ ipfw_add_table_entry(struct ip_fw_chain *ch, uint16_t tbl, void *paddr, IPFW_WUNLOCK(ch); if (rn == NULL) { + if (type == IPFW_TABLE_CIDR) { + /* Just update if any new value needed */ + if (plen == sizeof(in_addr_t)) { + ent = (struct table_entry *)(rnh->rnh_matchaddr(addr_ptr, rnh)); + if (ent != NULL) { + if (ent->mac_addr) { + if (!bcmp(&mac_addr, &ent->mac_addr, ETHER_ADDR_LEN)) + ent->value = value; + } else + ent->value = value; + } + } else { + xent = (struct table_xentry *)(rnh->rnh_matchaddr(addr_ptr, rnh)); + if (xent != NULL) { + if (xent->mac_addr) { + if (!bcmp(&mac_addr, &xent->mac_addr, ETHER_ADDR_LEN)) + xent->value = value; + } else + xent->value = value; + } + } + } free(ent_ptr, M_IPFW_TBL); return (EEXIST); } @@ -530,31 +563,194 @@ ipfw_resize_tables(struct ip_fw_chain *ch, unsigned int ntables) return (0); } -int +void * ipfw_lookup_table(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr, - uint32_t *val) + uint32_t *val, struct ether_addr *ea) { struct radix_node_head *rnh; struct table_entry *ent; struct sockaddr_in sa; if (tbl >= V_fw_tables_max) - return (0); + return (NULL); if ((rnh = ch->tables[tbl]) == NULL) - return (0); + return (NULL); KEY_LEN(sa) = KEY_LEN_INET; sa.sin_addr.s_addr = addr; ent = (struct table_entry *)(rnh->rnh_matchaddr(&sa, rnh)); if (ent != NULL) { - *val = ent->value; - return (1); + if (ea && ent->mac_addr) { + if (bcmp((u_char *)&ent->mac_addr, ea->octet, ETHER_ADDR_LEN) != 0) + ent = NULL; + } + if (ent != NULL) { + *val = ent->value; + return (ent); + } } - return (0); + return (NULL); +} + +void +ipfw_count_table_entry_stats(void *arg, int pktlen) +{ + struct table_entry *xent = arg; + + xent->packets++; + xent->bytes += pktlen; + xent->timestamp = time_uptime; +} + +void +ipfw_count_table_xentry_stats(void *arg, int pktlen) +{ + ipfw_table_xentry *xent= arg; + + xent->packets++; + xent->bytes += pktlen; + xent->timestamp = time_uptime; } int +ipfw_zero_table_xentry_stats(struct ip_fw_chain *ch, ipfw_table_xentry *arg) +{ + struct radix_node_head *rnh; + struct table_xentry *xent; + struct sockaddr_in6 sa6; + struct xaddr_iface iface; + + if (arg->tbl >= V_fw_tables_max) + return (EINVAL); + if (ch->tables[arg->tbl] != NULL) + rnh = ch->tables[arg->tbl]; + else if (ch->xtables[arg->tbl] != NULL) + rnh = ch->xtables[arg->tbl]; + else + return (EINVAL); + + switch (arg->type) { + case IPFW_TABLE_CIDR: + if (ch->tables[arg->tbl] != NULL) { + /* XXX: Maybe better by FreeBSD 11!! */ + struct sockaddr_in sa; + struct table_entry *ent; + + KEY_LEN(sa) = KEY_LEN_INET; + sa.sin_addr.s_addr = *((in_addr_t *)&arg->k.addr6); + ent = (struct table_entry *)(rnh->rnh_matchaddr(&sa, rnh)); + if (ent == NULL) + return (EINVAL); + + arg->bytes = 0; + arg->packets = 0; + arg->value = ent->value; + arg->timestamp = time_uptime; + + return (0); + } else { + KEY_LEN(sa6) = KEY_LEN_INET6; + memcpy(&sa6.sin6_addr, &arg->k.addr6, sizeof(struct in6_addr)); + xent = (struct table_xentry *)(rnh->rnh_matchaddr(&sa6, rnh)); + } + break; + + case IPFW_TABLE_INTERFACE: + KEY_LEN(iface) = KEY_LEN_IFACE + + strlcpy(iface.ifname, arg->k.iface, IF_NAMESIZE) + 1; + /* Assume direct match */ + /* FIXME: Add interface pattern matching */ + xent = (struct table_xentry *)(rnh->rnh_lookup(&iface, NULL, rnh)); + break; + + default: + return (EINVAL); + } + + if (xent != NULL) { + xent->bytes = 0; + xent->packets = 0; + xent->timestamp = time_uptime; + + return (0); + } + return (EINVAL); +} + +int +ipfw_lookup_table_xentry(struct ip_fw_chain *ch, ipfw_table_xentry *arg) +{ + struct radix_node_head *rnh; + struct table_xentry *xent; + + if (arg->tbl >= V_fw_tables_max) + return (EINVAL); + if (ch->tables[arg->tbl] != NULL) + rnh = ch->tables[arg->tbl]; + else if (ch->xtables[arg->tbl] != NULL) + rnh = ch->xtables[arg->tbl]; + else + return (EINVAL); + + switch (arg->type) { + case IPFW_TABLE_CIDR: + { + if (ch->tables[arg->tbl] != NULL) { + /* XXX: Maybe better by FreeBSD 11!! */ + struct sockaddr_in sa; + struct table_entry *ent; + + KEY_LEN(sa) = KEY_LEN_INET; + sa.sin_addr.s_addr = *((in_addr_t *)&arg->k.addr6); + ent = (struct table_entry *)(rnh->rnh_matchaddr(&sa, rnh)); + if (ent == NULL) + return (EINVAL); + + arg->bytes = ent->bytes; + arg->packets = ent->packets; + arg->value = ent->value; + arg->timestamp = ent->timestamp; + arg->mac_addr = ent->mac_addr; + return (0); + } else { + struct sockaddr_in6 sa6; + KEY_LEN(sa6) = KEY_LEN_INET6; + memcpy(&sa6.sin6_addr, &arg->k.addr6, sizeof(struct in6_addr)); + xent = (struct table_xentry *)(rnh->rnh_matchaddr(&sa6, rnh)); + } + } + break; + + case IPFW_TABLE_INTERFACE: + { + struct xaddr_iface iface; + + KEY_LEN(iface) = KEY_LEN_IFACE + + strlcpy(iface.ifname, arg->k.iface, IF_NAMESIZE) + 1; + /* Assume direct match */ + /* FIXME: Add interface pattern matching */ + xent = (struct table_xentry *)(rnh->rnh_lookup(&iface, NULL, rnh)); + } + break; + + default: + return (0); + } + + if (xent != NULL) { + arg->bytes = xent->bytes; + arg->packets = xent->packets; + arg->value = xent->value; + arg->timestamp = xent->timestamp; + arg->mac_addr = xent->mac_addr; + + return (0); + } + return (EINVAL); +} + +void * ipfw_lookup_table_extended(struct ip_fw_chain *ch, uint16_t tbl, void *paddr, - uint32_t *val, int type) + uint32_t *val, int type, struct ether_addr *ea) { struct radix_node_head *rnh; struct table_xentry *xent; @@ -562,15 +758,21 @@ ipfw_lookup_table_extended(struct ip_fw_chain *ch, uint16_t tbl, void *paddr, struct xaddr_iface iface; if (tbl >= V_fw_tables_max) - return (0); + return (NULL); if ((rnh = ch->xtables[tbl]) == NULL) - return (0); + return (NULL); switch (type) { case IPFW_TABLE_CIDR: KEY_LEN(sa6) = KEY_LEN_INET6; memcpy(&sa6.sin6_addr, paddr, sizeof(struct in6_addr)); xent = (struct table_xentry *)(rnh->rnh_matchaddr(&sa6, rnh)); + if (xent != NULL) { + if (ea && xent->mac_addr) { + if (bcmp((u_char *)&xent->mac_addr, ea->octet, ETHER_ADDR_LEN) != 0) + xent = NULL; + } + } break; case IPFW_TABLE_INTERFACE: @@ -582,14 +784,14 @@ ipfw_lookup_table_extended(struct ip_fw_chain *ch, uint16_t tbl, void *paddr, break; default: - return (0); + return (NULL); } if (xent != NULL) { *val = xent->value; - return (1); + return (xent); } - return (0); + return (NULL); } static int @@ -696,9 +898,13 @@ dump_table_xentry_base(struct radix_node *rn, void *arg) else xent->masklen = 33 - ffs(ntohl(n->mask.sin_addr.s_addr)); /* Save IPv4 address as deprecated IPv6 compatible */ - xent->k.addr6.s6_addr32[3] = n->addr.sin_addr.s_addr; + xent->k.addr6.s6_addr32[0] = n->addr.sin_addr.s_addr; xent->flags = IPFW_TCF_INET; xent->value = n->value; + xent->bytes = n->bytes; + xent->packets = n->packets; + xent->timestamp = n->timestamp; + xent->mac_addr = n->mac_addr; tbl->cnt++; return (0); } @@ -742,6 +948,10 @@ dump_table_xentry_extended(struct radix_node *rn, void *arg) } xent->value = n->value; + xent->bytes = n->bytes; + xent->packets = n->packets; + xent->timestamp = n->timestamp; + xent->mac_addr = n->mac_addr; tbl->cnt++; return (0); } diff --git a/sys/netpfil/pf/if_pflog.c b/sys/netpfil/pf/if_pflog.c index 1efd5e2..5c22806 100644 --- a/sys/netpfil/pf/if_pflog.c +++ b/sys/netpfil/pf/if_pflog.c @@ -209,7 +209,7 @@ pflog_packet(struct pfi_kif *kif, struct mbuf *m, sa_family_t af, u_int8_t dir, return (0); bzero(&hdr, sizeof(hdr)); - hdr.length = PFLOG_REAL_HDRLEN; + hdr.length = PFLOG_HDRLEN; hdr.af = af; hdr.action = rm->action; hdr.reason = reason; @@ -218,13 +218,16 @@ pflog_packet(struct pfi_kif *kif, struct mbuf *m, sa_family_t af, u_int8_t dir, if (am == NULL) { hdr.rulenr = htonl(rm->nr); hdr.subrulenr = 1; + hdr.ridentifier = rm->cuid; } else { hdr.rulenr = htonl(am->nr); hdr.subrulenr = htonl(rm->nr); + hdr.ridentifier = rm->cuid; if (ruleset != NULL && ruleset->anchor != NULL) strlcpy(hdr.ruleset, ruleset->anchor->name, sizeof(hdr.ruleset)); } +#ifdef PF_USER_INFO /* * XXXGL: we avoid pf_socket_lookup() when we are holding * state lock, since this leads to unsafe LOR. @@ -239,6 +242,7 @@ pflog_packet(struct pfi_kif *kif, struct mbuf *m, sa_family_t af, u_int8_t dir, hdr.pid = NO_PID; hdr.rule_uid = rm->cuid; hdr.rule_pid = rm->cpid; +#endif hdr.dir = dir; #ifdef INET diff --git a/sys/netpfil/pf/if_pfsync.c b/sys/netpfil/pf/if_pfsync.c index 90e6f8f..7936c07 100644 --- a/sys/netpfil/pf/if_pfsync.c +++ b/sys/netpfil/pf/if_pfsync.c @@ -185,9 +185,6 @@ struct pfsync_softc { struct ip_moptions sc_imo; struct in_addr sc_sync_peer; uint32_t sc_flags; -#define PFSYNCF_OK 0x00000001 -#define PFSYNCF_DEFER 0x00000002 -#define PFSYNCF_PUSH 0x00000004 uint8_t sc_maxupdates; struct ip sc_template; struct callout sc_tmo; @@ -365,7 +362,7 @@ pfsync_clone_destroy(struct ifnet *ifp) callout_drain(&sc->sc_bulkfail_tmo); callout_drain(&sc->sc_bulk_tmo); - if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) + if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p && V_pfsync_carp_adj > 0) (*carp_demote_adj_p)(-V_pfsync_carp_adj, "pfsync destroy"); bpfdetach(ifp); if_detach(ifp); @@ -1150,7 +1147,7 @@ pfsync_in_bus(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) sc->sc_ureq_sent = 0; sc->sc_bulk_tries = 0; callout_stop(&sc->sc_bulkfail_tmo); - if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) + if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p && V_pfsync_carp_adj > 0) (*carp_demote_adj_p)(-V_pfsync_carp_adj, "pfsync bulk done"); sc->sc_flags |= PFSYNCF_OK; @@ -1308,8 +1305,7 @@ pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) } pfsyncr.pfsyncr_syncpeer = sc->sc_sync_peer; pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates; - pfsyncr.pfsyncr_defer = (PFSYNCF_DEFER == - (sc->sc_flags & PFSYNCF_DEFER)); + pfsyncr.pfsyncr_defer = sc->sc_flags; PFSYNC_UNLOCK(sc); return (copyout(&pfsyncr, ifr->ifr_data, sizeof(pfsyncr))); @@ -1401,7 +1397,7 @@ pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) ip->ip_dst.s_addr = sc->sc_sync_peer.s_addr; /* Request a full state table update. */ - if ((sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) + if ((sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p && V_pfsync_carp_adj > 0) (*carp_demote_adj_p)(V_pfsync_carp_adj, "pfsync bulk start"); sc->sc_flags &= ~PFSYNCF_OK; @@ -1631,6 +1627,7 @@ pfsync_sendout(int schedswi) sc->sc_ifp->if_obytes += m->m_pkthdr.len; sc->sc_len = PFSYNC_MINPKT; + /* XXX: SHould not drop voluntarily update packets! */ if (!_IF_QFULL(&sc->sc_ifp->if_snd)) _IF_ENQUEUE(&sc->sc_ifp->if_snd, m); else { @@ -1776,7 +1773,7 @@ pfsync_undefer_state(struct pf_state *st, int drop) } } - panic("%s: unable to find deferred state", __func__); + printf("%s: unable to find deferred state", __func__); } static void @@ -2144,7 +2141,7 @@ pfsync_bulk_fail(void *arg) sc->sc_ureq_sent = 0; sc->sc_bulk_tries = 0; PFSYNC_LOCK(sc); - if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) + if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p && V_pfsync_carp_adj > 0) (*carp_demote_adj_p)(-V_pfsync_carp_adj, "pfsync bulk fail"); sc->sc_flags |= PFSYNCF_OK; diff --git a/sys/netpfil/pf/pf.c b/sys/netpfil/pf/pf.c index 48da880..ebda220 100644 --- a/sys/netpfil/pf/pf.c +++ b/sys/netpfil/pf/pf.c @@ -62,6 +62,8 @@ __FBSDID("$FreeBSD$"); #include <net/if.h> #include <net/if_types.h> +#include <net/ethernet.h> +#include <net/if_vlan_var.h> #include <net/route.h> #include <net/radix_mpath.h> #include <net/vnet.h> @@ -86,6 +88,9 @@ __FBSDID("$FreeBSD$"); #include <netinet/udp_var.h> #include <netpfil/ipfw/ip_fw_private.h> /* XXX: only for DIR_IN/DIR_OUT */ +#include <netinet/ip_fw.h> +#include <netinet/ip_dummynet.h> +#include <netinet/ip_divert.h> #ifdef INET6 #include <netinet/ip6.h> @@ -226,6 +231,8 @@ static int pf_state_key_attach(struct pf_state_key *, static void pf_state_key_detach(struct pf_state *, int); static int pf_state_key_ctor(void *, int, void *, int); static u_int32_t pf_tcp_iss(struct pf_pdesc *); +void pf_rule_to_actions(struct pf_rule *, + struct pf_rule_actions *); static int pf_test_rule(struct pf_rule **, struct pf_state **, int, struct pfi_kif *, struct mbuf *, int, struct pf_pdesc *, struct pf_rule **, @@ -258,7 +265,8 @@ static int pf_test_state_icmp(struct pf_state **, int, struct pfi_kif *, struct mbuf *, int, void *, struct pf_pdesc *, u_short *); static int pf_test_state_other(struct pf_state **, int, - struct pfi_kif *, struct mbuf *, struct pf_pdesc *); + struct pfi_kif *, struct mbuf *, int, + struct pf_pdesc *); static u_int8_t pf_get_wscale(struct mbuf *, int, u_int16_t, sa_family_t); static u_int16_t pf_get_mss(struct mbuf *, int, u_int16_t, @@ -281,6 +289,10 @@ static u_int pf_purge_expired_states(u_int, int); static void pf_purge_unlinked_rules(void); static int pf_mtag_uminit(void *, int, int); static void pf_mtag_free(struct m_tag *); +static void pf_packet_redo_nat(struct mbuf *, struct pf_pdesc *, + int, struct pf_state *, int); +static void pf_packet_undo_nat(struct mbuf *, struct pf_pdesc *, + int, struct pf_state *, int); #ifdef INET static void pf_route(struct mbuf **, struct pf_rule *, int, struct ifnet *, struct pf_state *, @@ -300,28 +312,36 @@ VNET_DECLARE(int, pf_end_threads); VNET_DEFINE(struct pf_limit, pf_limits[PF_LIMIT_MAX]); -#define PACKET_LOOPED(pd) ((pd)->pf_mtag && \ - (pd)->pf_mtag->flags & PF_PACKET_LOOPED) +#define PACKET_LOOPED(mtag) ((mtag)->flags & PF_PACKET_LOOPED) + +#define PF_DIVERT_MAXPACKETS_REACHED() \ +do { \ + if (r->spare2 && \ + s->packets[dir == PF_OUT] > r->spare2) \ + /* fake that divert already happened */ \ + pd.pf_mtag->flags |= PF_PACKET_LOOPED; \ +} while(0) #define STATE_LOOKUP(i, k, d, s, pd) \ do { \ (s) = pf_find_state((i), (k), (d)); \ if ((s) == NULL) \ return (PF_DROP); \ - if (PACKET_LOOPED(pd)) \ + if (PACKET_LOOPED(pd->pf_mtag)) { \ + if ((s)->key[PF_SK_WIRE] != (s)->key[PF_SK_STACK]) { \ + pf_packet_redo_nat(m, pd, off, s, direction); \ + } \ return (PF_PASS); \ + } \ if ((d) == PF_OUT && \ (((s)->rule.ptr->rt == PF_ROUTETO && \ - (s)->rule.ptr->direction == PF_OUT) || \ - ((s)->rule.ptr->rt == PF_REPLYTO && \ - (s)->rule.ptr->direction == PF_IN)) && \ + (s)->rule.ptr->direction == PF_OUT)) && \ (s)->rt_kif != NULL && \ (s)->rt_kif != (i)) \ return (PF_PASS); \ } while (0) -#define BOUND_IFACE(r, k) \ - ((r)->rule_flag & PFRULE_IFBOUND) ? (k) : V_pfi_all +#define BOUND_IFACE(r, k) k #define STATE_INC_COUNTERS(s) \ do { \ @@ -407,6 +427,160 @@ pf_addr_cmp(struct pf_addr *a, struct pf_addr *b, sa_family_t af) return (0); } +static void +pf_packet_undo_nat(struct mbuf *m, struct pf_pdesc *pd, int off, + struct pf_state *state, int direction) +{ + struct pf_state_key *nk; + + if (state == NULL || state->nat_rule.ptr == NULL) + return; + + if (state->nat_rule.ptr->action == PF_RDR || + state->nat_rule.ptr->action == PF_BINAT) + nk = (state)->key[PF_SK_WIRE]; + else + nk = (state)->key[PF_SK_STACK]; + + switch (pd->proto) { + case IPPROTO_TCP: { + struct tcphdr *th = pd->hdr.tcp; + + if (direction == PF_OUT) { + pf_change_ap(m, pd->src, &th->th_sport, pd->ip_sum, + &th->th_sum, &nk->addr[pd->sidx], + nk->port[pd->sidx], 0, pd->af); + } else { + pf_change_ap(m, pd->dst, &th->th_dport, pd->ip_sum, + &th->th_sum, &nk->addr[pd->didx], + nk->port[pd->didx], 0, pd->af); + } + m_copyback(m, off, sizeof(*th), (caddr_t)th); + } + break; + case IPPROTO_UDP: { + struct udphdr *uh = pd->hdr.udp; + + if (direction == PF_OUT) { + pf_change_ap(m, pd->src, &uh->uh_sport, pd->ip_sum, + &uh->uh_sum, &nk->addr[pd->sidx], + nk->port[pd->sidx], 1, pd->af); + } else { + pf_change_ap(m, pd->dst, &uh->uh_dport, pd->ip_sum, + &uh->uh_sum, &nk->addr[pd->didx], + nk->port[pd->didx], 1, pd->af); + } + m_copyback(m, off, sizeof(*uh), (caddr_t)uh); + } + break; + /* case IPPROTO_ICMP: */ + /* XXX: If we want to do this for icmp is probably wrong!?! */ + /* break; */ + default: + if (direction == PF_OUT) { + switch (pd->af) { + case AF_INET: + pf_change_a(&pd->src->v4.s_addr, + pd->ip_sum, nk->addr[pd->sidx].v4.s_addr, + 0); + break; + case AF_INET6: + PF_ACPY(pd->src, &nk->addr[pd->sidx], pd->af); + break; + } + } else { + switch (pd->af) { + case AF_INET: + pf_change_a(&pd->dst->v4.s_addr, + pd->ip_sum, nk->addr[pd->didx].v4.s_addr, + 0); + break; + case AF_INET6: + PF_ACPY(pd->dst, &nk->addr[pd->didx], pd->af); + break; + } + } + break; + } +} + +static void +pf_packet_redo_nat(struct mbuf *m, struct pf_pdesc *pd, int off, + struct pf_state *state, int direction) +{ + struct pf_state_key *nk; + + if (state == NULL || state->nat_rule.ptr == NULL) + return; + + if (state->nat_rule.ptr->action == PF_RDR || + state->nat_rule.ptr->action == PF_BINAT) + nk = (state)->key[PF_SK_STACK]; + else + nk = (state)->key[PF_SK_WIRE]; + + switch (pd->proto) { + case IPPROTO_TCP: { + struct tcphdr *th = pd->hdr.tcp; + + if (direction == PF_OUT) { + pf_change_ap(m, pd->src, &th->th_sport, pd->ip_sum, + &th->th_sum, &nk->addr[pd->sidx], + nk->port[pd->sidx], 0, pd->af); + } else { + pf_change_ap(m, pd->dst, &th->th_dport, pd->ip_sum, + &th->th_sum, &nk->addr[pd->didx], + nk->port[pd->didx], 0, pd->af); + } + m_copyback(m, off, sizeof(*th), (caddr_t)th); + } + break; + case IPPROTO_UDP: { + struct udphdr *uh = pd->hdr.udp; + + if (direction == PF_OUT) { + pf_change_ap(m, pd->src, &uh->uh_sport, pd->ip_sum, + &uh->uh_sum, &nk->addr[pd->sidx], + nk->port[pd->sidx], 1, pd->af); + } else { + pf_change_ap(m, pd->dst, &uh->uh_dport, pd->ip_sum, + &uh->uh_sum, &nk->addr[pd->didx], + nk->port[pd->didx], 1, pd->af); + } + m_copyback(m, off, sizeof(*uh), (caddr_t)uh); + } + break; + /* case IPPROTO_ICMP: */ + /* XXX: If we want to do this for icmp is probably wrong!?! */ + /* break; */ + default: + if (direction == PF_OUT) { + switch (pd->af) { + case AF_INET: + pf_change_a(&pd->src->v4.s_addr, + pd->ip_sum, nk->addr[pd->sidx].v4.s_addr, + 0); + break; + case AF_INET6: + PF_ACPY(pd->src, &nk->addr[pd->sidx], pd->af); + break; + } + } else { + switch (pd->af) { + case AF_INET: + pf_change_a(&pd->dst->v4.s_addr, + pd->ip_sum, nk->addr[pd->didx].v4.s_addr, + 0); + break; + case AF_INET6: + PF_ACPY(pd->dst, &nk->addr[pd->didx], pd->af); + break; + } + } + break; + } +} + static __inline uint32_t pf_hashkey(struct pf_state_key *sk) { @@ -440,6 +614,20 @@ pf_hashsrc(struct pf_addr *addr, sa_family_t af) return (h & pf_srchashmask); } +#ifdef ALTQ +static int +pf_state_hash(struct pf_state *s) +{ + u_int32_t hv = (intptr_t)s / sizeof(*s); + + hv ^= crc32(&s->src, sizeof(s->src)); + hv ^= crc32(&s->dst, sizeof(s->dst)); + if (hv == 0) + hv = 1; + return (hv); +} +#endif + #ifdef INET6 void pf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af) @@ -1285,7 +1473,7 @@ pf_find_state(struct pfi_kif *kif, struct pf_state_key_cmp *key, u_int dir) /* List is sorted, if-bound states before floating ones. */ TAILQ_FOREACH(s, &sk->states[idx], key_list[idx]) - if (s->kif == V_pfi_all || s->kif == kif) { + { PF_STATE_LOCK(s); PF_HASHROW_UNLOCK(kh); if (s->timeout >= PFTM_MAX) { @@ -1971,9 +2159,9 @@ pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2) switch (aw1->type) { case PF_ADDR_ADDRMASK: case PF_ADDR_RANGE: - if (PF_ANEQ(&aw1->v.a.addr, &aw2->v.a.addr, 0)) + if (PF_ANEQ(&aw1->v.a.addr, &aw2->v.a.addr, AF_INET6)) return (1); - if (PF_ANEQ(&aw1->v.a.mask, &aw2->v.a.mask, 0)) + if (PF_ANEQ(&aw1->v.a.mask, &aw2->v.a.mask, AF_INET6)) return (1); return (0); case PF_ADDR_DYNIFTL: @@ -2440,6 +2628,26 @@ pf_send_tcp(struct mbuf *replyto, const struct pf_rule *r, sa_family_t af, pf_send(pfse); } +int +pf_ieee8021q_setpcp(struct mbuf *m, struct pf_rule *r) +{ + struct m_tag *mtag; + + KASSERT(r->ieee8021q_pcp.setpcp & SETPCP_VALID, + ("%s with invalid setpcp", __func__)); + + mtag = m_tag_locate(m, MTAG_8021Q, MTAG_8021Q_PCP_OUT, NULL); + if (mtag == NULL) { + mtag = m_tag_alloc(MTAG_8021Q, MTAG_8021Q_PCP_OUT, + sizeof(uint8_t), M_NOWAIT); + if (mtag == NULL) + return (ENOMEM); + m_tag_prepend(m, mtag); + } + *(uint8_t *)(mtag + 1) = (r->ieee8021q_pcp.setpcp & SETPCP_PCP_MASK); + return (0); +} + static void pf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, sa_family_t af, struct pf_rule *r) @@ -2613,6 +2821,37 @@ pf_match_port(u_int8_t op, u_int16_t a1, u_int16_t a2, u_int16_t p) return (pf_match(op, a1, a2, p)); } +int +pf_match_ieee8021q_pcp(u_int8_t op, u_int8_t pcp1, u_int8_t pcp2, + struct mbuf *m) +{ + struct m_tag *mtag; + uint8_t mpcp; + + /* + * Packets without 802.1q headers are treated as having a PCP of 0 + * (best effort). + */ + mtag = m_tag_locate(m, MTAG_8021Q, MTAG_8021Q_PCP_IN, NULL); + if (mtag != NULL) + mpcp = *(uint8_t *)(mtag + 1); + else + mpcp = IEEE8021Q_PCP_BE; + + /* + * 802.1q uses a non-traditional ordering, in which 1 < 0, allowing + * default 0-tagged ("best effort") traffic to take precedence over + * 1-tagged ("background") traffic. Renumber both PCP arguments + * before making a comparison so that we can use boring arithmetic + * operators. + */ + pcp1 = ((pcp1 == 0) ? 1 : ((pcp1 == 1) ? 0 : pcp1)); + pcp2 = ((pcp2 == 0) ? 1 : ((pcp2 == 1) ? 0 : pcp2)); + mpcp = ((mpcp == 0) ? 1 : ((mpcp == 1) ? 0 : mpcp)); + return (pf_match(op, pcp1, pcp2, mpcp)); +} + +#ifdef PF_USER_INFO static int pf_match_uid(u_int8_t op, uid_t a1, uid_t a2, uid_t u) { @@ -2628,6 +2867,7 @@ pf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g) return (0); return (pf_match(op, a1, a2, g)); } +#endif int pf_match_tag(struct mbuf *m, struct pf_rule *r, int *tag, int mtag) @@ -2821,6 +3061,22 @@ pf_addr_inc(struct pf_addr *addr, sa_family_t af) } #endif /* INET6 */ +void +pf_rule_to_actions(struct pf_rule *r, struct pf_rule_actions *a) +{ + if (r->qid) + a->qid = r->qid; + if (r->pqid) + a->pqid = r->pqid; + if (r->pdnpipe) + a->pdnpipe = r->pdnpipe; + if (r->dnpipe) + a->dnpipe = r->dnpipe; + if (r->free_flags & PFRULE_DN_IS_PIPE) + a->flags |= PFRULE_DN_IS_PIPE; +} + +#ifdef PF_USER_INFO int pf_socket_lookup(int direction, struct pf_pdesc *pd, struct mbuf *m) { @@ -2900,6 +3156,7 @@ pf_socket_lookup(int direction, struct pf_pdesc *pd, struct mbuf *m) return (1); } +#endif static u_int8_t pf_get_wscale(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af) @@ -3091,12 +3348,14 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction, PF_RULES_RASSERT(); +#ifdef PF_USER_INFO if (inp != NULL) { INP_LOCK_ASSERT(inp); pd->lookup.uid = inp->inp_cred->cr_uid; pd->lookup.gid = inp->inp_cred->cr_groups[0]; pd->lookup.done = 1; } +#endif switch (pd->proto) { case IPPROTO_TCP: @@ -3307,7 +3566,11 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction, /* icmp only. type always 0 in other cases */ else if (r->code && r->code != icmpcode + 1) r = TAILQ_NEXT(r, entries); - else if (r->tos && !(r->tos == pd->tos)) + else if ((r->rule_flag & PFRULE_TOS) && r->tos && + !(r->tos == pd->tos)) + r = TAILQ_NEXT(r, entries); + else if ((r->rule_flag & PFRULE_DSCP) && r->tos && + !(r->tos == (pd->tos & DSCP_MASK))) r = TAILQ_NEXT(r, entries); else if (r->rule_flag & PFRULE_FRAGMENT) r = TAILQ_NEXT(r, entries); @@ -3315,6 +3578,7 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction, (r->flagset & th->th_flags) != r->flags) r = TAILQ_NEXT(r, entries); /* tcp/udp only. uid.op always 0 in other cases */ +#ifdef PF_USER_INFO else if (r->uid.op && (pd->lookup.done || (pd->lookup.done = pf_socket_lookup(direction, pd, m), 1)) && !pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1], @@ -3326,6 +3590,11 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction, !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1], pd->lookup.gid)) r = TAILQ_NEXT(r, entries); +#endif + else if (r->ieee8021q_pcp.op && + !pf_match_ieee8021q_pcp(r->ieee8021q_pcp.op, + r->ieee8021q_pcp.pcp[0], r->ieee8021q_pcp.pcp[1], m)) + r = TAILQ_NEXT(r, entries); else if (r->prob && r->prob <= arc4random()) r = TAILQ_NEXT(r, entries); @@ -3343,10 +3612,20 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction, if (r->rtableid >= 0) rtableid = r->rtableid; if (r->anchor == NULL) { - match = 1; - *rm = r; - *am = a; - *rsm = ruleset; + if (r->action == PF_MATCH) { + r->packets[direction == PF_OUT]++; + r->bytes[direction == PF_OUT] += pd->tot_len; + pf_rule_to_actions(r, &pd->act); + if (r->log) + PFLOG_PACKET(kif, m, af, + direction, PFRES_MATCH, r, + a, ruleset, pd, 1); + } else { + match = 1; + *rm = r; + *am = a; + *rsm = ruleset; + } if ((*rm)->quick) break; r = TAILQ_NEXT(r, entries); @@ -3365,6 +3644,9 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction, REASON_SET(&reason, PFRES_MATCH); + /* apply actions for last matching pass/block rule */ + pf_rule_to_actions(r, &pd->act); + if (r->log || (nr != NULL && nr->log)) { if (rewrite) m_copyback(m, off, hdrlen, pd->hdr.any); @@ -3538,6 +3820,11 @@ pf_create_state(struct pf_rule *r, struct pf_rule *nr, struct pf_rule *a, s->state_flags |= PFSTATE_SLOPPY; s->log = r->log & PF_LOG_ALL; s->sync_state = PFSYNC_S_NONE; + s->qid = pd->act.qid; + s->pqid = pd->act.pqid; + s->pdnpipe = pd->act.pdnpipe; + s->dnpipe = pd->act.dnpipe; + s->state_flags |= pd->act.flags; if (nr != NULL) s->log |= nr->log & PF_LOG_ALL; switch (pd->proto) { @@ -3776,6 +4063,9 @@ pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif, r = r->skip[PF_SKIP_DST_ADDR].ptr; else if (r->tos && !(r->tos == pd->tos)) r = TAILQ_NEXT(r, entries); + else if ((r->rule_flag & PFRULE_DSCP) && r->tos && + !(r->tos == (pd->tos & DSCP_MASK))) + r = TAILQ_NEXT(r, entries); else if (r->os_fingerprint != PF_OSFP_ANY) r = TAILQ_NEXT(r, entries); else if (pd->proto == IPPROTO_UDP && @@ -3788,6 +4078,10 @@ pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif, pd->proto == IPPROTO_ICMPV6) && (r->type || r->code)) r = TAILQ_NEXT(r, entries); + else if (r->ieee8021q_pcp.op && + !pf_match_ieee8021q_pcp(r->ieee8021q_pcp.op, + r->ieee8021q_pcp.pcp[0], r->ieee8021q_pcp.pcp[1], m)) + r = TAILQ_NEXT(r, entries); else if (r->prob && r->prob <= (arc4random() % (UINT_MAX - 1) + 1)) r = TAILQ_NEXT(r, entries); @@ -3796,10 +4090,20 @@ pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif, r = TAILQ_NEXT(r, entries); else { if (r->anchor == NULL) { - match = 1; - *rm = r; - *am = a; - *rsm = ruleset; + if (r->action == PF_MATCH) { + r->packets[direction == PF_OUT]++; + r->bytes[direction == PF_OUT] += pd->tot_len; + pf_rule_to_actions(r, &pd->act); + if (r->log) + PFLOG_PACKET(kif, m, af, + direction, PFRES_MATCH, r, + a, ruleset, pd, 1); + } else { + match = 1; + *rm = r; + *am = a; + *rsm = ruleset; + } if ((*rm)->quick) break; r = TAILQ_NEXT(r, entries); @@ -3818,6 +4122,9 @@ pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif, REASON_SET(&reason, PFRES_MATCH); + /* apply actions for last matching pass/block rule */ + pf_rule_to_actions(r, &pd->act); + if (r->log) PFLOG_PACKET(kif, m, af, direction, reason, r, a, ruleset, pd, 1); @@ -5061,7 +5368,7 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, static int pf_test_state_other(struct pf_state **state, int direction, struct pfi_kif *kif, - struct mbuf *m, struct pf_pdesc *pd) + struct mbuf *m, int off, struct pf_pdesc *pd) { struct pf_state_peer *src, *dst; struct pf_state_key_cmp key; @@ -5339,6 +5646,12 @@ pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, ip = mtod(m0, struct ip *); + if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) { + if (s) + PF_STATE_UNLOCK(s); + return; + } + bzero(&dst, sizeof(dst)); dst.sin_family = AF_INET; dst.sin_len = sizeof(dst); @@ -5386,7 +5699,71 @@ pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, if (ifp == NULL) goto bad; - if (oifp != ifp) { + else if (r->rt == PF_REPLYTO || (r->rt == PF_ROUTETO && ifp->if_type == IFT_ENC)) { + /* XXX: Copied from ifaof_ifpforaddr() since it mostly will not return NULL! */ + struct sockaddr_in inaddr; + struct sockaddr *addr; + struct ifaddr *ifa; + char *cp, *cp2, *cp3; + char *cplim; + + inaddr.sin_addr = ip->ip_dst; + inaddr.sin_family = AF_INET; + inaddr.sin_len = sizeof(inaddr); + inaddr.sin_port = 0; + addr = (struct sockaddr *)&inaddr; + + IF_ADDR_RLOCK(ifp); + TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { + if (ifa->ifa_addr->sa_family != AF_INET) + continue; + if (ifa->ifa_netmask == 0) { + if ((bcmp(addr, ifa->ifa_addr, addr->sa_len) == 0) || + (ifa->ifa_dstaddr && + (bcmp(addr, ifa->ifa_dstaddr, addr->sa_len) == 0))) { + IF_ADDR_RUNLOCK(ifp); + return; + } + continue; + } + if (ifp->if_flags & IFF_POINTOPOINT) { + if (bcmp(addr, ifa->ifa_dstaddr, addr->sa_len) == 0) { + IF_ADDR_RUNLOCK(ifp); + return; + } + } else { + cp = addr->sa_data; + cp2 = ifa->ifa_addr->sa_data; + cp3 = ifa->ifa_netmask->sa_data; + cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask; + for (; cp3 < cplim; cp3++) + if ((*cp++ ^ *cp2++) & *cp3) + break; + if (cp3 == cplim) { + IF_ADDR_RUNLOCK(ifp); + return; + } + } + } + IF_ADDR_RUNLOCK(ifp); + } + else if (r->rt == PF_ROUTETO && r->direction == dir && in_localip(ip->ip_dst)) + return; + + if (s != NULL && r->rt == PF_REPLYTO) { + /* + * Send it out since it came from state recorded ifp(rt_addr). + * Routing table lookup might have chosen not correct interface! + */ + } else if (oifp != ifp) { + if (in_broadcast(ip->ip_dst, oifp)) /* XXX: LOCKING of address list?! */ + return; + + if (s && r->rt == PF_ROUTETO && pd->nat_rule != NULL && + r->direction == PF_OUT && r->direction == dir && pd->pf_mtag->routed < 2) { + pf_packet_undo_nat(m0, pd, ntohs(ip->ip_off), s, dir); + } + if (pf_test(PF_OUT, ifp, &m0, NULL) != PF_PASS) goto bad; else if (m0 == NULL) @@ -5440,6 +5817,9 @@ pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, error = EMSGSIZE; KMOD_IPSTAT_INC(ips_cantfrag); if (r->rt != PF_DUPTO) { + if (s && pd->nat_rule != NULL) + pf_packet_undo_nat(m0, pd, ntohs(ip->ip_off), s, dir); + icmp_error(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 0, ifp->if_mtu); goto done; @@ -5519,6 +5899,12 @@ pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, ip6 = mtod(m0, struct ip6_hdr *); + if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) { + if (s) + PF_STATE_UNLOCK(s); + return; + } + bzero(&dst, sizeof(dst)); dst.sin6_family = AF_INET6; dst.sin6_len = sizeof(dst); @@ -5558,9 +5944,71 @@ pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, if (ifp == NULL) goto bad; + else if (r->rt == PF_REPLYTO) { + /* XXX: Copied from ifaof_ifpforaddr() since it mostly will not return NULL! */ + struct sockaddr_in6 inaddr6; + struct sockaddr *addr; + struct ifaddr *ifa; + char *cp, *cp2, *cp3; + char *cplim; + + inaddr6.sin6_addr = ip6->ip6_dst; + inaddr6.sin6_family = AF_INET6; + inaddr6.sin6_len = sizeof(inaddr6); + inaddr6.sin6_port = 0; + inaddr6.sin6_flowinfo = 0; + addr = (struct sockaddr *)&inaddr6; + + IF_ADDR_RLOCK(ifp); + TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { + if (ifa->ifa_addr->sa_family != AF_INET6) + continue; + if (ifa->ifa_netmask == 0) { + if ((bcmp(addr, ifa->ifa_addr, addr->sa_len) == 0) || + (ifa->ifa_dstaddr && + (bcmp(addr, ifa->ifa_dstaddr, addr->sa_len) == 0))) { + IF_ADDR_RUNLOCK(ifp); + return; + } + continue; + } + if (ifp->if_flags & IFF_POINTOPOINT) { + if (bcmp(addr, ifa->ifa_dstaddr, addr->sa_len) == 0) { + IF_ADDR_RUNLOCK(ifp); + return; + } + } else { + cp = addr->sa_data; + cp2 = ifa->ifa_addr->sa_data; + cp3 = ifa->ifa_netmask->sa_data; + cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask; + for (; cp3 < cplim; cp3++) + if ((*cp++ ^ *cp2++) & *cp3) + break; + if (cp3 == cplim) { + IF_ADDR_RUNLOCK(ifp); + return; + } + } + } + IF_ADDR_RUNLOCK(ifp); + } else if (r->rt == PF_ROUTETO && r->direction == dir && in6_localaddr(&ip6->ip6_dst)) + return; + + if (s != NULL && r->rt == PF_REPLYTO) { + /* + * Send it out since it came from state recorded ifp(rt_addr). + * Routing table lookup might have chosen not correct interface! + */ + } else if (oifp != ifp) { + + if (s && r->rt == PF_ROUTETO && pd->nat_rule != NULL && + r->direction == PF_OUT && r->direction == dir && pd->pf_mtag->routed < 2) { + int ip_off = ((caddr_t)ip6 - m0->m_data) + sizeof(struct ip6_hdr); + pf_packet_undo_nat(m0, pd, ip_off, s, dir); + } - if (oifp != ifp) { - if (pf_test6(PF_FWD, ifp, &m0, NULL) != PF_PASS) + if (pf_test6(PF_OUT, ifp, &m0, NULL) != PF_PASS) goto bad; else if (m0 == NULL) goto done; @@ -5593,9 +6041,12 @@ pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, nd6_output(ifp, ifp, m0, &dst, NULL); else { in6_ifstat_inc(ifp, ifs6_in_toobig); - if (r->rt != PF_DUPTO) + if (r->rt != PF_DUPTO) { + if (s && pd->nat_rule != NULL) + pf_packet_undo_nat(m0, pd, ((caddr_t)ip6 - m0->m_data) + sizeof(struct ip6_hdr), s, dir); + icmp6_error(m0, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu); - else + } else goto bad; } @@ -5761,7 +6212,11 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, struct inpcb *inp) struct pf_state *s = NULL; struct pf_ruleset *ruleset = NULL; struct pf_pdesc pd; - int off, dirndx, pqid = 0; + int off = 0, dirndx, pqid = 0; + int loopedfrom = 0; + u_int16_t divertcookie = 0; + u_int8_t divflags = 0; + struct ip_fw_args dnflow; M_ASSERTPKTHDR(m); @@ -5783,26 +6238,33 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, struct inpcb *inp) if (m->m_flags & M_SKIP_FIREWALL) return (PF_PASS); - pd.pf_mtag = pf_find_mtag(m); + pd.pf_mtag = pf_get_mtag(m); + if (pd.pf_mtag == NULL) { + REASON_SET(&reason, PFRES_MEMORY); + log = 1; + DPFPRINTF(PF_DEBUG_MISC, + ("pf: dropping packet due to failed memory allocation for tags\n")); + return PF_DROP; + } PF_RULES_RLOCK(); - if (ip_divert_ptr != NULL && + if ((ip_divert_ptr != NULL || ip_dn_io_ptr != NULL) && ((ipfwtag = m_tag_locate(m, MTAG_IPFW_RULE, 0, NULL)) != NULL)) { struct ipfw_rule_ref *rr = (struct ipfw_rule_ref *)(ipfwtag+1); - if (rr->info & IPFW_IS_DIVERT && rr->rulenum == 0) { - if (pd.pf_mtag == NULL && - ((pd.pf_mtag = pf_get_mtag(m)) == NULL)) { - action = PF_DROP; - goto done; - } - pd.pf_mtag->flags |= PF_PACKET_LOOPED; - m_tag_delete(m, ipfwtag); + pd.pf_mtag->flags |= PF_PACKET_LOOPED; + if (rr->info & IPFW_IS_DUMMYNET) + loopedfrom = 1; + if (rr->info & IPFW_IS_DIVERT) { + divertcookie = rr->rulenum; + divflags = (u_int8_t)(divertcookie >> 8); + divertcookie &= ~PFSTATE_DIVERT_MASK; } if (pd.pf_mtag && pd.pf_mtag->flags & PF_FASTFWD_OURS_PRESENT) { m->m_flags |= M_FASTFWD_OURS; pd.pf_mtag->flags &= ~PF_FASTFWD_OURS_PRESENT; } + m_tag_delete(m, ipfwtag); } else if (pf_normalize_ip(m0, dir, kif, &reason, &pd) != PF_PASS) { /* We do IP header normalization and packet reassembly here */ action = PF_DROP; @@ -5845,6 +6307,10 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, struct inpcb *inp) struct tcphdr th; pd.hdr.tcp = &th; + dnflow.f_id._flags = th.th_flags; + dnflow.f_id.dst_port = ntohs(th.th_dport); + dnflow.f_id.src_port = ntohs(th.th_sport); + if (!pf_pull_hdr(m, off, &th, sizeof(th), &action, &reason, AF_INET)) { log = action != PF_PASS; @@ -5859,8 +6325,6 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, struct inpcb *inp) action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd, &reason); if (action == PF_PASS) { - if (pfsync_update_state_ptr != NULL) - pfsync_update_state_ptr(s); r = s->rule.ptr; a = s->anchor.ptr; log = s->log; @@ -5874,6 +6338,9 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, struct inpcb *inp) struct udphdr uh; pd.hdr.udp = &uh; + dnflow.f_id.dst_port = ntohs(uh.uh_dport); + dnflow.f_id.src_port = ntohs(uh.uh_sport); + if (!pf_pull_hdr(m, off, &uh, sizeof(uh), &action, &reason, AF_INET)) { log = action != PF_PASS; @@ -5888,8 +6355,6 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, struct inpcb *inp) } action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd); if (action == PF_PASS) { - if (pfsync_update_state_ptr != NULL) - pfsync_update_state_ptr(s); r = s->rule.ptr; a = s->anchor.ptr; log = s->log; @@ -5911,8 +6376,6 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, struct inpcb *inp) action = pf_test_state_icmp(&s, dir, kif, m, off, h, &pd, &reason); if (action == PF_PASS) { - if (pfsync_update_state_ptr != NULL) - pfsync_update_state_ptr(s); r = s->rule.ptr; a = s->anchor.ptr; log = s->log; @@ -5932,10 +6395,8 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, struct inpcb *inp) #endif default: - action = pf_test_state_other(&s, dir, kif, m, &pd); + action = pf_test_state_other(&s, dir, kif, m, off, &pd); if (action == PF_PASS) { - if (pfsync_update_state_ptr != NULL) - pfsync_update_state_ptr(s); r = s->rule.ptr; a = s->anchor.ptr; log = s->log; @@ -5956,6 +6417,17 @@ done: ("pf: dropping packet with ip options\n")); } + if (s) { + PF_DIVERT_MAXPACKETS_REACHED(); + + if (divflags) { + s->divert_cookie = divertcookie; + s->local_flags |= divflags; + } else { + divertcookie = s->divert_cookie; + divflags = s->local_flags; + } + } if (s && s->tag > 0 && pf_tag_packet(m, &pd, s->tag)) { action = PF_DROP; REASON_SET(&reason, PFRES_MEMORY); @@ -5963,23 +6435,160 @@ done: if (r->rtableid >= 0) M_SETFIB(m, r->rtableid); + if ((r->ieee8021q_pcp.setpcp & SETPCP_VALID) && + pf_ieee8021q_setpcp(m, r)) { + action = PF_DROP; + REASON_SET(&reason, PFRES_MEMORY); + log = 1; + DPFPRINTF(PF_DEBUG_MISC, + ("pf: failed to allocate 802.1q mtag\n")); + } + #ifdef ALTQ - if (action == PF_PASS && r->qid) { - if (pd.pf_mtag == NULL && - ((pd.pf_mtag = pf_get_mtag(m)) == NULL)) { - action = PF_DROP; - REASON_SET(&reason, PFRES_MEMORY); + if (s && s->qid) { + pd.act.pqid = s->pqid; + pd.act.qid = s->qid; + } else if (r->qid) { + pd.act.pqid = r->pqid; + pd.act.qid = r->qid; + } + if (action == PF_PASS && pd.act.qid) { + if (s != NULL) + pd.pf_mtag->qid_hash = pf_state_hash(s); + if (pqid || (pd.tos & IPTOS_LOWDELAY)) + pd.pf_mtag->qid = pd.act.pqid; + else + pd.pf_mtag->qid = pd.act.qid; + /* Add hints for ecn. */ + pd.pf_mtag->hdr = h; + } +#endif /* ALTQ */ + + if (divflags & PFSTATE_DIVERT_TAG) + pd.pf_mtag->tag = divertcookie; + else if (divflags & PFSTATE_DIVERT_ALTQ) + pd.pf_mtag->qid = divertcookie; + else if (divflags & PFSTATE_DIVERT_ACTION) { + struct pf_rule *dlr; + action = PF_DROP; + if (s) + pf_unlink_state(s, PF_ENTER_LOCKED); + REASON_SET(&reason, PFRES_DIVERT); + log = 1; + DPFPRINTF(PF_DEBUG_MISC, + ("pf: changing action to with overload from divert.\n")); + dlr = r; + PFLOG_PACKET(kif, m, AF_INET, dir, reason, dlr, a, + ruleset, &pd, (s == NULL)); + m_freem(*m0); + *m0 = NULL; + /* NOTE: Fake this to avoid divert giving errors to the application. */ + return (PF_PASS); + } + + if (divflags & PFSTATE_DIVERT_DNCOOKIE) { + pd.act.dnpipe = divertcookie; + pd.act.pdnpipe = divertcookie; + pd.act.flags |= PFRULE_DN_IS_PIPE; + } else if (s && (s->dnpipe || s->pdnpipe)) { + pd.act.dnpipe = s->dnpipe; + pd.act.pdnpipe = s->pdnpipe; + pd.act.flags = s->state_flags; + } else if (r->dnpipe || r->pdnpipe) { + pd.act.dnpipe = r->dnpipe; + pd.act.dnpipe = r->pdnpipe; + pd.act.flags = r->free_flags; + } + + if (pd.act.dnpipe && ip_dn_io_ptr != NULL && loopedfrom != 1) { + if (dir != r->direction && pd.act.pdnpipe) { + dnflow.rule.info = pd.act.pdnpipe; + } else if (dir == r->direction) { + dnflow.rule.info = pd.act.dnpipe; + } else + goto continueprocessing; + + if (pd.act.flags & PFRULE_DN_IS_PIPE) + dnflow.rule.info |= IPFW_IS_PIPE; + dnflow.f_id.addr_type = 4; /* IPv4 type */ + dnflow.f_id.proto = pd.proto; + if (dir == PF_OUT && s != NULL && s->nat_rule.ptr != NULL && + s->nat_rule.ptr->action == PF_NAT) + dnflow.f_id.src_ip = + ntohl(s->key[(s->direction == PF_IN)]-> + addr[(s->direction == PF_OUT)].v4.s_addr); + else + dnflow.f_id.src_ip = ntohl(h->ip_src.s_addr); + dnflow.f_id.dst_ip = ntohl(h->ip_dst.s_addr); + dnflow.f_id.extra = dnflow.rule.info; + + if (m->m_flags & M_FASTFWD_OURS) { + pd.pf_mtag->flags |= PF_FASTFWD_OURS_PRESENT; + m->m_flags &= ~M_FASTFWD_OURS; + } + + if (s != NULL && s->nat_rule.ptr) + pf_packet_undo_nat(m, &pd, off, s, dir); + + ip_dn_io_ptr(m0, + (dir == PF_IN) ? DIR_IN : DIR_OUT, + &dnflow); + /* This is dummynet fast io processing */ + if (*m0 != NULL) { + m_tag_delete(*m0, m_tag_first(*m0)); + pd.pf_mtag->flags &= ~PF_PACKET_LOOPED; + if (s != NULL && s->nat_rule.ptr) + pf_packet_redo_nat(m, &pd, off, s, dir); } else { - if (pqid || (pd.tos & IPTOS_LOWDELAY)) - pd.pf_mtag->qid = r->pqid; - else - pd.pf_mtag->qid = r->qid; - /* Add hints for ecn. */ - pd.pf_mtag->hdr = h; + *m0 = NULL; + if (s) + PF_STATE_UNLOCK(s); + return (action); } + } +continueprocessing: + + if (action == PF_PASS && r->divert.port && ip_divert_ptr != NULL && + !PACKET_LOOPED(&pd)) { + if (!r->spare2 || + (s && s->packets[dir == PF_OUT] <= r->spare2)) { + ipfwtag = m_tag_alloc(MTAG_IPFW_RULE, 0, + sizeof(struct ipfw_rule_ref), M_NOWAIT | M_ZERO); + if (ipfwtag != NULL) { + ((struct ipfw_rule_ref *)(ipfwtag+1))->info = + ntohs(r->divert.port); + ((struct ipfw_rule_ref *)(ipfwtag+1))->rulenum = dir; + + if (s) + PF_STATE_UNLOCK(s); + + m_tag_prepend(m, ipfwtag); + if (m->m_flags & M_FASTFWD_OURS) { + if (pd.pf_mtag == NULL && + ((pd.pf_mtag = pf_get_mtag(m)) == NULL)) { + action = PF_DROP; + REASON_SET(&reason, PFRES_MEMORY); + log = 1; + DPFPRINTF(PF_DEBUG_MISC, + ("pf: failed to allocate tag\n")); + } + pd.pf_mtag->flags |= PF_FASTFWD_OURS_PRESENT; + m->m_flags &= ~M_FASTFWD_OURS; + } + ip_divert_ptr(*m0, dir == PF_IN ? DIR_IN : DIR_OUT); + *m0 = NULL; + return (action); + } else { + /* XXX: ipfw has the same behaviour! */ + action = PF_DROP; + REASON_SET(&reason, PFRES_MEMORY); + log = 1; + DPFPRINTF(PF_DEBUG_MISC, + ("pf: failed to allocate divert tag\n")); + } + } } -#endif /* ALTQ */ /* * connections redirected to loopback should not match sockets @@ -5990,50 +6599,17 @@ done: pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL && (s->nat_rule.ptr->action == PF_RDR || s->nat_rule.ptr->action == PF_BINAT) && - (ntohl(pd.dst->v4.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) + (ntohl(pd.dst->v4.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) { m->m_flags |= M_SKIP_FIREWALL; - if (action == PF_PASS && r->divert.port && ip_divert_ptr != NULL && - !PACKET_LOOPED(&pd)) { + if (PACKET_LOOPED(pd.pf_mtag) && !loopedfrom) + m->m_flags |= M_FASTFWD_OURS; + } - ipfwtag = m_tag_alloc(MTAG_IPFW_RULE, 0, - sizeof(struct ipfw_rule_ref), M_NOWAIT | M_ZERO); - if (ipfwtag != NULL) { - ((struct ipfw_rule_ref *)(ipfwtag+1))->info = - ntohs(r->divert.port); - ((struct ipfw_rule_ref *)(ipfwtag+1))->rulenum = dir; + pd.pf_mtag->flags &= ~PF_PACKET_LOOPED; - if (s) - PF_STATE_UNLOCK(s); - - m_tag_prepend(m, ipfwtag); - if (m->m_flags & M_FASTFWD_OURS) { - if (pd.pf_mtag == NULL && - ((pd.pf_mtag = pf_get_mtag(m)) == NULL)) { - action = PF_DROP; - REASON_SET(&reason, PFRES_MEMORY); - log = 1; - DPFPRINTF(PF_DEBUG_MISC, - ("pf: failed to allocate tag\n")); - } else { - pd.pf_mtag->flags |= - PF_FASTFWD_OURS_PRESENT; - m->m_flags &= ~M_FASTFWD_OURS; - } - } - ip_divert_ptr(*m0, dir == PF_IN ? DIR_IN : DIR_OUT); - *m0 = NULL; - - return (action); - } else { - /* XXX: ipfw has the same behaviour! */ - action = PF_DROP; - REASON_SET(&reason, PFRES_MEMORY); - log = 1; - DPFPRINTF(PF_DEBUG_MISC, - ("pf: failed to allocate divert tag\n")); - } - } + if (action == PF_PASS && s != NULL && pfsync_update_state_ptr != NULL) + pfsync_update_state_ptr(s); if (log) { struct pf_rule *lr; @@ -6134,7 +6710,10 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, struct inpcb *inp) struct pf_state *s = NULL; struct pf_ruleset *ruleset = NULL; struct pf_pdesc pd; - int off, terminal = 0, dirndx, rh_cnt = 0; + int off = 0, terminal = 0, dirndx, rh_cnt = 0; + int loopedfrom = 0; + struct m_tag *dn_tag; + struct ip_fw_args dnflow; int fwdir = dir; M_ASSERTPKTHDR(m); @@ -6145,18 +6724,27 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, struct inpcb *inp) * We do need to be careful about bridges. If the * net.link.bridge.pfil_bridge sysctl is set we can be filtering on a * bridge, so if the input interface is a bridge member and the output - * interface is its bridge we're not actually forwarding but bridging. + * interface is its bridge or a member of the same bridge we're not + * actually forwarding but bridging. */ - if (dir == PF_OUT && m->m_pkthdr.rcvif && ifp != m->m_pkthdr.rcvif - && (m->m_pkthdr.rcvif->if_bridge == NULL - || m->m_pkthdr.rcvif->if_bridge != ifp->if_softc)) + if (dir == PF_OUT && m->m_pkthdr.rcvif && ifp != m->m_pkthdr.rcvif && + (m->m_pkthdr.rcvif->if_bridge == NULL || + (m->m_pkthdr.rcvif->if_bridge != ifp->if_softc && + m->m_pkthdr.rcvif->if_bridge != ifp->if_bridge))) fwdir = PF_FWD; if (!V_pf_status.running) return (PF_PASS); memset(&pd, 0, sizeof(pd)); - pd.pf_mtag = pf_find_mtag(m); + pd.pf_mtag = pf_get_mtag(m); + if (pd.pf_mtag == NULL) { + REASON_SET(&reason, PFRES_MEMORY); + log = 1; + DPFPRINTF(PF_DEBUG_MISC, + ("pf: dropping packet due to failed memory allocation for tags\n")); + return PF_DROP; + } if (pd.pf_mtag && pd.pf_mtag->flags & PF_TAG_GENERATED) return (PF_PASS); @@ -6175,8 +6763,20 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, struct inpcb *inp) PF_RULES_RLOCK(); + if (((ip_dn_io_ptr != NULL) || (ip_divert_ptr != NULL)) && + ((dn_tag = m_tag_locate(m, MTAG_IPFW_RULE, 0, NULL)) != NULL)) { + struct ipfw_rule_ref *rr = (struct ipfw_rule_ref *)(dn_tag+1); + pd.pf_mtag->flags |= PF_PACKET_LOOPED; + if (rr->info & IPFW_IS_DUMMYNET) + loopedfrom = 1; + if (pd.pf_mtag->flags & PF_FASTFWD_OURS_PRESENT) { + m->m_flags |= M_FASTFWD_OURS; + pd.pf_mtag->flags &= ~PF_FASTFWD_OURS_PRESENT; + } + m_tag_delete(m, dn_tag); + } /* We do IP header normalization and packet reassembly here */ - if (pf_normalize_ip6(m0, dir, kif, &reason, &pd) != PF_PASS) { + else if (pf_normalize_ip6(m0, dir, kif, &reason, &pd) != PF_PASS) { action = PF_DROP; goto done; } @@ -6285,6 +6885,10 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, struct inpcb *inp) struct tcphdr th; pd.hdr.tcp = &th; + dnflow.f_id._flags = th.th_flags; + dnflow.f_id.dst_port = th.th_dport; + dnflow.f_id.src_port = th.th_sport; + if (!pf_pull_hdr(m, off, &th, sizeof(th), &action, &reason, AF_INET6)) { log = action != PF_PASS; @@ -6297,8 +6901,6 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, struct inpcb *inp) action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd, &reason); if (action == PF_PASS) { - if (pfsync_update_state_ptr != NULL) - pfsync_update_state_ptr(s); r = s->rule.ptr; a = s->anchor.ptr; log = s->log; @@ -6312,6 +6914,9 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, struct inpcb *inp) struct udphdr uh; pd.hdr.udp = &uh; + dnflow.f_id.dst_port = uh.uh_dport; + dnflow.f_id.src_port = uh.uh_sport; + if (!pf_pull_hdr(m, off, &uh, sizeof(uh), &action, &reason, AF_INET6)) { log = action != PF_PASS; @@ -6326,8 +6931,6 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, struct inpcb *inp) } action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd); if (action == PF_PASS) { - if (pfsync_update_state_ptr != NULL) - pfsync_update_state_ptr(s); r = s->rule.ptr; a = s->anchor.ptr; log = s->log; @@ -6356,8 +6959,6 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, struct inpcb *inp) action = pf_test_state_icmp(&s, dir, kif, m, off, h, &pd, &reason); if (action == PF_PASS) { - if (pfsync_update_state_ptr != NULL) - pfsync_update_state_ptr(s); r = s->rule.ptr; a = s->anchor.ptr; log = s->log; @@ -6368,10 +6969,8 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, struct inpcb *inp) } default: - action = pf_test_state_other(&s, dir, kif, m, &pd); + action = pf_test_state_other(&s, dir, kif, m, off, &pd); if (action == PF_PASS) { - if (pfsync_update_state_ptr != NULL) - pfsync_update_state_ptr(s); r = s->rule.ptr; a = s->anchor.ptr; log = s->log; @@ -6405,23 +7004,90 @@ done: if (r->rtableid >= 0) M_SETFIB(m, r->rtableid); + if ((r->ieee8021q_pcp.setpcp & SETPCP_VALID) && + pf_ieee8021q_setpcp(m, r)) { + action = PF_DROP; + REASON_SET(&reason, PFRES_MEMORY); + log = 1; + DPFPRINTF(PF_DEBUG_MISC, + ("pf: failed to allocate 802.1q mtag\n")); + } + #ifdef ALTQ - if (action == PF_PASS && r->qid) { - if (pd.pf_mtag == NULL && - ((pd.pf_mtag = pf_get_mtag(m)) == NULL)) { - action = PF_DROP; - REASON_SET(&reason, PFRES_MEMORY); - } else { - if (pd.tos & IPTOS_LOWDELAY) - pd.pf_mtag->qid = r->pqid; - else - pd.pf_mtag->qid = r->qid; - /* Add hints for ecn. */ - pd.pf_mtag->hdr = h; - } + if (s && s->qid) { + pd.act.pqid = s->pqid; + pd.act.qid = s->qid; + } else if (r->qid) { + pd.act.pqid = r->pqid; + pd.act.qid = r->qid; + } + if (action == PF_PASS && pd.act.qid) { + if (s != NULL) + pd.pf_mtag->qid_hash = pf_state_hash(s); + if (pd.tos & IPTOS_LOWDELAY) + pd.pf_mtag->qid = pd.act.pqid; + else + pd.pf_mtag->qid = pd.act.qid; + /* Add hints for ecn. */ + pd.pf_mtag->hdr = h; } #endif /* ALTQ */ + if (s && (s->dnpipe || s->pdnpipe)) { + pd.act.dnpipe = s->dnpipe; + pd.act.pdnpipe = s->pdnpipe; + pd.act.flags = s->state_flags; + } else if (r->dnpipe || r->pdnpipe) { + pd.act.dnpipe = r->dnpipe; + pd.act.dnpipe = r->pdnpipe; + pd.act.flags = r->free_flags; + } + if ((pd.act.dnpipe || pd.act.pdnpipe) && ip_dn_io_ptr != NULL && loopedfrom != 1) { + if (dir != r->direction && pd.act.pdnpipe) { + dnflow.rule.info = pd.act.pdnpipe; + } else if (dir == r->direction && pd.act.dnpipe) { + dnflow.rule.info = pd.act.dnpipe; + } else + goto continueprocessing6; + + if (pd.act.flags & PFRULE_DN_IS_PIPE) + dnflow.rule.info |= IPFW_IS_PIPE; + dnflow.f_id.addr_type = 6; /* IPv4 type */ + dnflow.f_id.proto = pd.proto; + dnflow.f_id.src_ip = 0; + dnflow.f_id.dst_ip = 0; + if (dir == PF_OUT && s != NULL && s->nat_rule.ptr != NULL && + s->nat_rule.ptr->action == PF_NAT) + dnflow.f_id.src_ip6 = s->key[(s->direction == PF_IN)]->addr[0].v6; + else + dnflow.f_id.src_ip6 = h->ip6_src; + dnflow.f_id.dst_ip6 = h->ip6_dst; + + if (s != NULL && s->nat_rule.ptr) + pf_packet_undo_nat(m, &pd, off, s, dir); + + ip_dn_io_ptr(m0, + ((dir == PF_IN) ? DIR_IN : DIR_OUT) | PROTO_IPV6, + &dnflow); + /* This is dummynet fast io processing */ + if (*m0 != NULL) { + m_tag_delete(*m0, m_tag_first(*m0)); + pd.pf_mtag->flags &= ~PF_PACKET_LOOPED; + if (s != NULL && s->nat_rule.ptr) + pf_packet_redo_nat(m, &pd, off, s, dir); + } else { + *m0 = NULL; + if (s) + PF_STATE_UNLOCK(s); + return (action); + } + } else + pd.pf_mtag->flags &= ~PF_PACKET_LOOPED; +continueprocessing6: + + if (action == PF_PASS && s != NULL && pfsync_update_state_ptr != NULL) + pfsync_update_state_ptr(s); + if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP || pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL && (s->nat_rule.ptr->action == PF_RDR || diff --git a/sys/netpfil/pf/pf.h b/sys/netpfil/pf/pf.h index 3a5d2aa..5351786 100644 --- a/sys/netpfil/pf/pf.h +++ b/sys/netpfil/pf/pf.h @@ -45,7 +45,8 @@ enum { PF_INOUT, PF_IN, PF_OUT, PF_FWD }; enum { PF_PASS, PF_DROP, PF_SCRUB, PF_NOSCRUB, PF_NAT, PF_NONAT, - PF_BINAT, PF_NOBINAT, PF_RDR, PF_NORDR, PF_SYNPROXY_DROP, PF_DEFER }; + PF_BINAT, PF_NOBINAT, PF_RDR, PF_NORDR, PF_SYNPROXY_DROP, PF_DEFER, + PF_MATCH }; enum { PF_RULESET_SCRUB, PF_RULESET_FILTER, PF_RULESET_NAT, PF_RULESET_BINAT, PF_RULESET_RDR, PF_RULESET_MAX }; enum { PF_OP_NONE, PF_OP_IRG, PF_OP_EQ, PF_OP_NE, PF_OP_LT, @@ -125,7 +126,8 @@ enum { PF_ADDR_ADDRMASK, PF_ADDR_NOROUTE, PF_ADDR_DYNIFTL, #define PFRES_MAXSTATES 12 /* State limit */ #define PFRES_SRCLIMIT 13 /* Source node/conn limit */ #define PFRES_SYNPROXY 14 /* SYN proxy */ -#define PFRES_MAX 15 /* total+1 */ +#define PFRES_DIVERT 15 /* Divert override */ +#define PFRES_MAX 16 /* total+1 */ #define PFRES_NAMES { \ "match", \ @@ -143,6 +145,7 @@ enum { PF_ADDR_ADDRMASK, PF_ADDR_NOROUTE, PF_ADDR_DYNIFTL, "state-limit", \ "src-limit", \ "synproxy", \ + "divert", \ NULL \ } diff --git a/sys/netpfil/pf/pf_altq.h b/sys/netpfil/pf/pf_altq.h index eda0965..3efd4ff 100644 --- a/sys/netpfil/pf/pf_altq.h +++ b/sys/netpfil/pf/pf_altq.h @@ -45,6 +45,12 @@ struct cbq_opts { int flags; }; +struct codel_opts { + u_int target; + u_int interval; + int ecn; +}; + struct priq_opts { int flags; }; @@ -65,6 +71,20 @@ struct hfsc_opts { int flags; }; +/* + * XXX this needs some work + */ +struct fairq_opts { + u_int nbuckets; + u_int hogs_m1; + int flags; + + /* link sharing service curve */ + u_int lssc_m1; + u_int lssc_d; + u_int lssc_m2; +}; + struct pf_altq { char ifname[IFNAMSIZ]; @@ -89,8 +109,10 @@ struct pf_altq { uint16_t flags; /* misc flags */ union { struct cbq_opts cbq_opts; + struct codel_opts codel_opts; struct priq_opts priq_opts; struct hfsc_opts hfsc_opts; + struct fairq_opts fairq_opts; } pq_u; uint32_t qid; /* return value */ diff --git a/sys/netpfil/pf/pf_ioctl.c b/sys/netpfil/pf/pf_ioctl.c index 29ebb68..420214a 100644 --- a/sys/netpfil/pf/pf_ioctl.c +++ b/sys/netpfil/pf/pf_ioctl.c @@ -1004,6 +1004,8 @@ pfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, struct thread *td case DIOCCLRRULECTRS: case DIOCGETLIMIT: case DIOCGETALTQS: + case DIOCGETNAMEDALTQ: + case DIOCGETNAMEDTAG: case DIOCGETALTQ: case DIOCGETQSTATS: case DIOCGETRULESETS: @@ -1050,6 +1052,8 @@ pfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, struct thread *td case DIOCGETTIMEOUT: case DIOCGETLIMIT: case DIOCGETALTQS: + case DIOCGETNAMEDALTQ: + case DIOCGETNAMEDTAG: case DIOCGETALTQ: case DIOCGETQSTATS: case DIOCGETRULESETS: @@ -1164,7 +1168,9 @@ pfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, struct thread *td rule->states_cur = counter_u64_alloc(M_WAITOK); rule->states_tot = counter_u64_alloc(M_WAITOK); rule->src_nodes = counter_u64_alloc(M_WAITOK); +#ifdef PF_USER_INFO rule->cuid = td->td_ucred->cr_ruid; +#endif rule->cpid = td->td_proc ? td->td_proc->p_pid : 0; TAILQ_INIT(&rule->rpool.list); @@ -1190,7 +1196,6 @@ pfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, struct thread *td V_ticket_pabuf)); ERROUT(EBUSY); } - tail = TAILQ_LAST(ruleset->rules[rs_num].inactive.ptr, pf_rulequeue); if (tail) @@ -1269,8 +1274,29 @@ pfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, struct thread *td } rule->rpool.cur = TAILQ_FIRST(&rule->rpool.list); +#ifndef PF_USER_INFO + if (rule->cuid) { + tail = TAILQ_FIRST(ruleset->rules[rs_num].active.ptr); + while ((tail != NULL) && (tail->cuid != rule->cuid)) + tail = TAILQ_NEXT(tail, entries); + if (tail != NULL) { + rule->evaluations = tail->evaluations; + rule->packets[0] = tail->packets[0]; + rule->packets[1] = tail->packets[1]; + rule->bytes[0] = tail->bytes[0]; + rule->bytes[1] = tail->bytes[1]; + } else { + rule->evaluations = rule->packets[0] = rule->packets[1] = + rule->bytes[0] = rule->bytes[1] = 0; + } + } else { + rule->evaluations = rule->packets[0] = rule->packets[1] = + rule->bytes[0] = rule->bytes[1] = 0; + } +#else rule->evaluations = rule->packets[0] = rule->packets[1] = rule->bytes[0] = rule->bytes[1] = 0; +#endif TAILQ_INSERT_TAIL(ruleset->rules[rs_num].inactive.ptr, rule, entries); ruleset->rules[rs_num].inactive.rcount++; @@ -1420,7 +1446,9 @@ DIOCADDRULE_error: newrule->states_cur = counter_u64_alloc(M_WAITOK); newrule->states_tot = counter_u64_alloc(M_WAITOK); newrule->src_nodes = counter_u64_alloc(M_WAITOK); +#ifdef PF_USER_INFO newrule->cuid = td->td_ucred->cr_ruid; +#endif newrule->cpid = td->td_proc ? td->td_proc->p_pid : 0; TAILQ_INIT(&newrule->rpool.list); } @@ -1707,6 +1735,30 @@ relock_DIOCKILLSTATES: break; } + case DIOCKILLSCHEDULE: { + struct pf_state *state; + struct pfioc_schedule_kill *psk = (struct pfioc_schedule_kill *)addr; + int killed = 0; + u_int i; + + for (i = 0; i <= pf_hashmask; i++) { + struct pf_idhash *ih = &V_pf_idhash[i]; + +relock_DIOCKILLSCHEDULE: + PF_HASHROW_LOCK(ih); + LIST_FOREACH(state, &ih->states, entry) { + if (!strcmp(psk->schedule, state->rule.ptr->schedule)) { + pf_unlink_state(state, PF_ENTER_LOCKED); + killed++; + goto relock_DIOCKILLSCHEDULE; + } + } + PF_HASHROW_UNLOCK(ih); + } + psk->numberkilled = killed; + break; + } + case DIOCADDSTATE: { struct pfioc_state *ps = (struct pfioc_state *)addr; struct pfsync_state *sp = &ps->state; @@ -2097,6 +2149,16 @@ DIOCGETSTATES_full: break; } + case DIOCGETNAMEDALTQ: { + struct pfioc_ruleset *pa = (struct pfioc_ruleset *)addr; + + if (pa->name[0]) { + pa->nr = pf_qname2qid(pa->name); + pf_qid_unref(pa->nr); + } + break; + } + case DIOCGETALTQS: { struct pfioc_altq *pa = (struct pfioc_altq *)addr; struct pf_altq *altq; @@ -2182,6 +2244,16 @@ DIOCGETSTATES_full: } #endif /* ALTQ */ + case DIOCGETNAMEDTAG: { + /* Little abuse. */ + struct pfioc_ruleset *pa = (struct pfioc_ruleset *)addr; + + if (pa->name[0]) + pa->nr = pf_tagname2tag(pa->name); + + break; + } + case DIOCBEGINADDRS: { struct pfioc_pooladdr *pp = (struct pfioc_pooladdr *)addr; @@ -3628,8 +3700,8 @@ hook_pf(void) pfh_inet = pfil_head_get(PFIL_TYPE_AF, AF_INET); if (pfh_inet == NULL) return (ESRCH); /* XXX */ - pfil_add_hook(pf_check_in, NULL, PFIL_IN | PFIL_WAITOK, pfh_inet); - pfil_add_hook(pf_check_out, NULL, PFIL_OUT | PFIL_WAITOK, pfh_inet); + pfil_add_named_hook(pf_check_in, NULL, "pf", PFIL_IN | PFIL_WAITOK, pfh_inet); + pfil_add_named_hook(pf_check_out, NULL, "pf", PFIL_OUT | PFIL_WAITOK, pfh_inet); #endif #ifdef INET6 pfh_inet6 = pfil_head_get(PFIL_TYPE_AF, AF_INET6); @@ -3642,8 +3714,10 @@ hook_pf(void) #endif return (ESRCH); /* XXX */ } - pfil_add_hook(pf_check6_in, NULL, PFIL_IN | PFIL_WAITOK, pfh_inet6); - pfil_add_hook(pf_check6_out, NULL, PFIL_OUT | PFIL_WAITOK, pfh_inet6); + pfil_add_named_hook(pf_check6_in, NULL, "pf", PFIL_IN | PFIL_WAITOK, + pfh_inet6); + pfil_add_named_hook(pf_check6_out, NULL, "pf", PFIL_OUT | PFIL_WAITOK, + pfh_inet6); #endif V_pf_pfil_hooked = 1; diff --git a/sys/netpfil/pf/pf_mtag.h b/sys/netpfil/pf/pf_mtag.h index 3aacb2e..fd8554a 100644 --- a/sys/netpfil/pf/pf_mtag.h +++ b/sys/netpfil/pf/pf_mtag.h @@ -44,6 +44,7 @@ struct pf_mtag { void *hdr; /* saved hdr pos in mbuf, for ECN */ u_int32_t qid; /* queue id */ + u_int32_t qid_hash; /* queue hashid used by WFQ like algos */ u_int16_t tag; /* tag id */ u_int8_t flags; u_int8_t routed; diff --git a/sys/netpfil/pf/pf_norm.c b/sys/netpfil/pf/pf_norm.c index 4f9a499..b925960 100644 --- a/sys/netpfil/pf/pf_norm.c +++ b/sys/netpfil/pf/pf_norm.c @@ -1151,6 +1151,7 @@ pf_refragment6(struct ifnet *ifp, struct mbuf **m0, struct m_tag *mtag) for (t = m; m; m = t) { t = m->m_nextpkt; m->m_nextpkt = NULL; + m->m_pkthdr.rcvif = ifp; m->m_flags |= M_SKIP_FIREWALL; memset(&pd, 0, sizeof(pd)); pd.pf_mtag = pf_find_mtag(m); diff --git a/sys/netpfil/pf/pf_ruleset.c b/sys/netpfil/pf/pf_ruleset.c index 384e42b..2274359 100644 --- a/sys/netpfil/pf/pf_ruleset.c +++ b/sys/netpfil/pf/pf_ruleset.c @@ -120,6 +120,7 @@ pf_get_ruleset_number(u_int8_t action) return (PF_RULESET_SCRUB); break; case PF_PASS: + case PF_MATCH: case PF_DROP: return (PF_RULESET_FILTER); break; diff --git a/sys/opencrypto/criov.c b/sys/opencrypto/criov.c index 96f0dfc..499bfe3 100644 --- a/sys/opencrypto/criov.c +++ b/sys/opencrypto/criov.c @@ -99,35 +99,31 @@ cuio_copyback(struct uio* uio, int off, int len, caddr_t cp) } /* - * Return a pointer to iov/offset of location in iovec list. + * Return the index and offset of location in iovec list. */ -struct iovec * +int cuio_getptr(struct uio *uio, int loc, int *off) { - struct iovec *iov = uio->uio_iov; - int iol = uio->uio_iovcnt; + int ind, len; - while (loc >= 0) { - /* Normal end of search */ - if (loc < iov->iov_len) { + ind = 0; + while (loc >= 0 && ind < uio->uio_iovcnt) { + len = uio->uio_iov[ind].iov_len; + if (len > loc) { *off = loc; - return (iov); + return (ind); } + loc -= len; + ind++; + } - loc -= iov->iov_len; - if (iol == 0) { - if (loc == 0) { - /* Point at the end of valid data */ - *off = iov->iov_len; - return (iov); - } else - return (NULL); - } else { - iov++, iol--; - } - } + if (ind > 0 && loc == 0) { + ind--; + *off = uio->uio_iov[ind].iov_len; + return (ind); + } - return (NULL); + return (-1); } /* @@ -196,3 +192,50 @@ crypto_apply(int flags, caddr_t buf, int off, int len, error = (*f)(arg, buf + off, len); return (error); } + +int +crypto_mbuftoiov(struct mbuf *mbuf, struct iovec **iovptr, int *cnt, + int *allocated) +{ + struct iovec *iov; + struct mbuf *m, *mtmp; + int i, j; + + *allocated = 0; + iov = *iovptr; + if (iov == NULL) + *cnt = 0; + + m = mbuf; + i = 0; + while (m != NULL) { + if (i == *cnt) { + /* we need to allocate a larger array */ + j = 1; + mtmp = m; + while ((mtmp = mtmp->m_next) != NULL) + j++; + iov = malloc(sizeof *iov * (i + j), M_CRYPTO_DATA, + M_NOWAIT); + if (iov == NULL) + return ENOMEM; + *allocated = 1; + *cnt = i + j; + memcpy(iov, *iovptr, sizeof *iov * i); + } + + iov[i].iov_base = m->m_data; + iov[i].iov_len = m->m_len; + + i++; + m = m->m_next; + } + + if (*allocated) + KASSERT(*cnt == i, ("did not allocate correct amount: %d != %d", + *cnt, i)); + + *iovptr = iov; + *cnt = i; + return 0; +} diff --git a/sys/opencrypto/crypto.c b/sys/opencrypto/crypto.c index 53c11e6..d084b77 100644 --- a/sys/opencrypto/crypto.c +++ b/sys/opencrypto/crypto.c @@ -57,7 +57,6 @@ __FBSDID("$FreeBSD$"); #define CRYPTO_TIMING /* enable timing support */ #include "opt_ddb.h" -#include "opt_kdtrace.h" #include <sys/param.h> #include <sys/systm.h> @@ -162,10 +161,10 @@ int crypto_userasymcrypto = 1; /* userland may do asym crypto reqs */ SYSCTL_INT(_kern, OID_AUTO, userasymcrypto, CTLFLAG_RW, &crypto_userasymcrypto, 0, "Enable/disable user-mode access to asymmetric crypto support"); -int crypto_devallowsoft = 0; /* only use hardware crypto for asym */ +int crypto_devallowsoft = 0; /* only use hardware crypto */ SYSCTL_INT(_kern, OID_AUTO, cryptodevallowsoft, CTLFLAG_RW, &crypto_devallowsoft, 0, - "Enable/disable use of software asym crypto support"); + "Enable/disable use of software crypto by /dev/crypto"); MALLOC_DEFINE(M_CRYPTO_DATA, "crypto", "crypto session records"); @@ -369,9 +368,8 @@ again: best = cap; } } - if (best != NULL) - return best; - if (match == CRYPTOCAP_F_HARDWARE && (flags & CRYPTOCAP_F_SOFTWARE)) { + if (best == NULL && match == CRYPTOCAP_F_HARDWARE && + (flags & CRYPTOCAP_F_SOFTWARE)) { /* sort of an Algol 68-style for loop */ match = CRYPTOCAP_F_SOFTWARE; goto again; @@ -422,9 +420,12 @@ crypto_newsession(u_int64_t *sid, struct cryptoini *cri, int crid) (*sid) <<= 32; (*sid) |= (lid & 0xffffffff); cap->cc_sessions++; - } - } else + } else + CRYPTDEB("dev newsession failed"); + } else { + CRYPTDEB("no driver"); err = EINVAL; + } CRYPTO_DRIVER_UNLOCK(); return err; } @@ -1180,8 +1181,8 @@ crypto_kdone(struct cryptkop *krp) /* XXX: What if driver is loaded in the meantime? */ if (krp->krp_hid < crypto_drivers_num) { cap = &crypto_drivers[krp->krp_hid]; + KASSERT(cap->cc_koperations > 0, ("cc_koperations == 0")); cap->cc_koperations--; - KASSERT(cap->cc_koperations >= 0, ("cc_koperations < 0")); if (cap->cc_flags & CRYPTOCAP_F_CLEANUP) crypto_remove(cap); } diff --git a/sys/opencrypto/cryptodeflate.c b/sys/opencrypto/cryptodeflate.c index 2113611..f342a91 100644 --- a/sys/opencrypto/cryptodeflate.c +++ b/sys/opencrypto/cryptodeflate.c @@ -35,8 +35,6 @@ #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); -#include "opt_kdtrace.h" - #include <sys/types.h> #include <sys/param.h> #include <sys/malloc.h> @@ -107,8 +105,8 @@ deflate_global(data, size, decomp, out) bufh = bufp = malloc(sizeof(*bufp) + (size_t)(size * i), M_CRYPTO_DATA, M_NOWAIT); if (bufp == NULL) { - SDT_PROBE3(opencrypto, deflate, deflate_global, bad, - decomp, 0, __LINE__); + SDT_PROBE5(opencrypto, deflate, deflate_global, bad, + decomp, 0, __LINE__, 0, 0); goto bad2; } bufp->next = NULL; @@ -127,8 +125,8 @@ deflate_global(data, size, decomp, out) deflateInit2(&zbuf, Z_DEFAULT_COMPRESSION, Z_METHOD, window_deflate, Z_MEMLEVEL, Z_DEFAULT_STRATEGY); if (error != Z_OK) { - SDT_PROBE3(opencrypto, deflate, deflate_global, bad, - decomp, error, __LINE__); + SDT_PROBE5(opencrypto, deflate, deflate_global, bad, + decomp, error, __LINE__, 0, 0); goto bad; } @@ -167,8 +165,8 @@ deflate_global(data, size, decomp, out) p = malloc(sizeof(*p) + (size_t)(size * i), M_CRYPTO_DATA, M_NOWAIT); if (p == NULL) { - SDT_PROBE3(opencrypto, deflate, deflate_global, - bad, decomp, 0, __LINE__); + SDT_PROBE5(opencrypto, deflate, deflate_global, + bad, decomp, 0, __LINE__, 0, 0); goto bad; } p->next = NULL; @@ -197,8 +195,8 @@ deflate_global(data, size, decomp, out) *out = malloc(result, M_CRYPTO_DATA, M_NOWAIT); if (*out == NULL) { - SDT_PROBE3(opencrypto, deflate, deflate_global, bad, - decomp, 0, __LINE__); + SDT_PROBE5(opencrypto, deflate, deflate_global, bad, + decomp, 0, __LINE__, 0, 0); goto bad; } if (decomp) diff --git a/sys/opencrypto/cryptodev.c b/sys/opencrypto/cryptodev.c index 44bfa5c..f5e80a0 100644 --- a/sys/opencrypto/cryptodev.c +++ b/sys/opencrypto/cryptodev.c @@ -3,6 +3,12 @@ /*- * Copyright (c) 2001 Theo de Raadt * Copyright (c) 2002-2006 Sam Leffler, Errno Consulting + * Copyright (c) 2014 The FreeBSD Foundation + * All rights reserved. + * + * Portions of this software were developed by John-Mark Gurney + * under sponsorship of the FreeBSD Foundation and + * Rubicon Communications, LLC (Netgate). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -54,10 +60,16 @@ __FBSDID("$FreeBSD$"); #include <sys/module.h> #include <sys/fcntl.h> #include <sys/bus.h> +#include <sys/user.h> +#include <sys/sdt.h> #include <opencrypto/cryptodev.h> #include <opencrypto/xform.h> +SDT_PROVIDER_DECLARE(opencrypto); + +SDT_PROBE_DEFINE1(opencrypto, dev, ioctl, error, "int"/*line number*/); + #ifdef COMPAT_FREEBSD32 #include <sys/mount.h> #include <compat/freebsd32/freebsd32.h> @@ -317,6 +329,8 @@ static int csefree(struct csession *); static int cryptodev_op(struct csession *, struct crypt_op *, struct ucred *, struct thread *td); +static int cryptodev_aead(struct csession *, struct crypt_aead *, + struct ucred *, struct thread *); static int cryptodev_key(struct crypt_kop *); static int cryptodev_find(struct crypt_find_op *); @@ -349,13 +363,24 @@ cryptof_truncate( * by device name/class or through search constraints. */ static int -checkforsoftware(int crid) +checkforsoftware(int *cridp) { - if (crid & CRYPTOCAP_F_SOFTWARE) - return EINVAL; /* XXX */ - if ((crid & CRYPTOCAP_F_HARDWARE) == 0 && - (crypto_getcaps(crid) & CRYPTOCAP_F_HARDWARE) == 0) - return EINVAL; /* XXX */ + int crid; + + crid = *cridp; + + if (!crypto_devallowsoft) { + if (crid & CRYPTOCAP_F_SOFTWARE) { + if (crid & CRYPTOCAP_F_HARDWARE) { + *cridp = CRYPTOCAP_F_HARDWARE; + return 0; + } + return EINVAL; + } + if ((crid & CRYPTOCAP_F_HARDWARE) == 0 && + (crypto_getcaps(crid) & CRYPTOCAP_F_HARDWARE) == 0) + return EINVAL; + } return 0; } @@ -374,6 +399,7 @@ cryptof_ioctl( struct csession *cse; struct session_op *sop; struct crypt_op *cop; + struct crypt_aead *caead; struct enc_xform *txform = NULL; struct auth_hash *thash = NULL; struct crypt_kop *kop; @@ -434,7 +460,15 @@ cryptof_ioctl( case CRYPTO_CAMELLIA_CBC: txform = &enc_xform_camellia; break; + case CRYPTO_AES_ICM: + txform = &enc_xform_aes_icm; + break; + case CRYPTO_AES_NIST_GCM_16: + txform = &enc_xform_aes_nist_gcm; + break; + default: + CRYPTDEB("invalid cipher"); return (EINVAL); } @@ -459,6 +493,16 @@ cryptof_ioctl( case CRYPTO_RIPEMD160_HMAC: thash = &auth_hash_hmac_ripemd_160; break; + case CRYPTO_AES_128_NIST_GMAC: + thash = &auth_hash_nist_gmac_aes_128; + break; + case CRYPTO_AES_192_NIST_GMAC: + thash = &auth_hash_nist_gmac_aes_192; + break; + case CRYPTO_AES_256_NIST_GMAC: + thash = &auth_hash_nist_gmac_aes_256; + break; + #ifdef notdef case CRYPTO_MD5: thash = &auth_hash_md5; @@ -471,6 +515,7 @@ cryptof_ioctl( thash = &auth_hash_null; break; default: + CRYPTDEB("invalid mac"); return (EINVAL); } @@ -482,6 +527,7 @@ cryptof_ioctl( crie.cri_klen = sop->keylen * 8; if (sop->keylen > txform->maxkey || sop->keylen < txform->minkey) { + CRYPTDEB("invalid cipher parameters"); error = EINVAL; goto bail; } @@ -489,8 +535,10 @@ cryptof_ioctl( crie.cri_key = malloc(crie.cri_klen / 8, M_XDATA, M_WAITOK); if ((error = copyin(sop->key, crie.cri_key, - crie.cri_klen / 8))) + crie.cri_klen / 8))) { + CRYPTDEB("invalid key"); goto bail; + } if (thash) crie.cri_next = &cria; } @@ -499,6 +547,7 @@ cryptof_ioctl( cria.cri_alg = thash->type; cria.cri_klen = sop->mackeylen * 8; if (sop->mackeylen != thash->keysize) { + CRYPTDEB("invalid mac key length"); error = EINVAL; goto bail; } @@ -507,8 +556,10 @@ cryptof_ioctl( cria.cri_key = malloc(cria.cri_klen / 8, M_XDATA, M_WAITOK); if ((error = copyin(sop->mackey, cria.cri_key, - cria.cri_klen / 8))) + cria.cri_klen / 8))) { + CRYPTDEB("invalid mac key"); goto bail; + } } } @@ -519,14 +570,18 @@ cryptof_ioctl( #endif ) { crid = SES2(sop)->crid; - error = checkforsoftware(crid); - if (error) + error = checkforsoftware(&crid); + if (error) { + CRYPTDEB("checkforsoftware"); goto bail; + } } else crid = CRYPTOCAP_F_HARDWARE; error = crypto_newsession(&sid, (txform ? &crie : &cria), crid); - if (error) + if (error) { + CRYPTDEB("crypto_newsession"); goto bail; + } cse = csecreate(fcr, sid, crie.cri_key, crie.cri_klen, cria.cri_key, cria.cri_klen, sop->cipher, sop->mac, txform, @@ -535,6 +590,7 @@ cryptof_ioctl( if (cse == NULL) { crypto_freesession(sid); error = EINVAL; + CRYPTDEB("csecreate"); goto bail; } sop->ses = cse->ses; @@ -581,8 +637,10 @@ bail: #endif cop = (struct crypt_op *)data; cse = csefind(fcr, cop->ses); - if (cse == NULL) + if (cse == NULL) { + SDT_PROBE1(opencrypto, dev, ioctl, error, __LINE__); return (EINVAL); + } error = cryptodev_op(cse, cop, active_cred, td); #ifdef COMPAT_FREEBSD32 if (error == 0 && cmd == CIOCCRYPT32) @@ -636,6 +694,13 @@ bail: case CIOCFINDDEV: error = cryptodev_find((struct crypt_find_op *)data); break; + case CIOCCRYPTAEAD: + caead = (struct crypt_aead *)data; + cse = csefind(fcr, caead->ses); + if (cse == NULL) + return (EINVAL); + error = cryptodev_aead(cse, caead, active_cred, td); + break; default: error = EINVAL; break; @@ -658,12 +723,16 @@ cryptodev_op( struct cryptodesc *crde = NULL, *crda = NULL; int error; - if (cop->len > 256*1024-4) + if (cop->len > 256*1024-4) { + SDT_PROBE1(opencrypto, dev, ioctl, error, __LINE__); return (E2BIG); + } if (cse->txform) { - if (cop->len == 0 || (cop->len % cse->txform->blocksize) != 0) + if (cop->len == 0 || (cop->len % cse->txform->blocksize) != 0) { + SDT_PROBE1(opencrypto, dev, ioctl, error, __LINE__); return (EINVAL); + } } cse->uio.uio_iov = &cse->iovec; @@ -683,6 +752,7 @@ cryptodev_op( crp = crypto_getreq((cse->txform != NULL) + (cse->thash != NULL)); if (crp == NULL) { + SDT_PROBE1(opencrypto, dev, ioctl, error, __LINE__); error = ENOMEM; goto bail; } @@ -695,13 +765,17 @@ cryptodev_op( if (cse->txform) crde = crp->crp_desc; else { + SDT_PROBE1(opencrypto, dev, ioctl, error, __LINE__); error = EINVAL; goto bail; } } - if ((error = copyin(cop->src, cse->uio.uio_iov[0].iov_base, cop->len))) + if ((error = copyin(cop->src, cse->uio.uio_iov[0].iov_base, + cop->len))) { + SDT_PROBE1(opencrypto, dev, ioctl, error, __LINE__); goto bail; + } if (crda) { crda->crd_skip = 0; @@ -736,15 +810,20 @@ cryptodev_op( if (cop->iv) { if (crde == NULL) { + SDT_PROBE1(opencrypto, dev, ioctl, error, __LINE__); error = EINVAL; goto bail; } if (cse->cipher == CRYPTO_ARC4) { /* XXX use flag? */ + SDT_PROBE1(opencrypto, dev, ioctl, error, __LINE__); error = EINVAL; goto bail; } - if ((error = copyin(cop->iv, cse->tmp_iv, cse->txform->blocksize))) + if ((error = copyin(cop->iv, cse->tmp_iv, + cse->txform->blocksize))) { + SDT_PROBE1(opencrypto, dev, ioctl, error, __LINE__); goto bail; + } bcopy(cse->tmp_iv, crde->crd_iv, cse->txform->blocksize); crde->crd_flags |= CRD_F_IV_EXPLICIT | CRD_F_IV_PRESENT; crde->crd_skip = 0; @@ -757,6 +836,7 @@ cryptodev_op( } if (cop->mac && crda == NULL) { + SDT_PROBE1(opencrypto, dev, ioctl, error, __LINE__); error = EINVAL; goto bail; } @@ -775,8 +855,10 @@ again: error = msleep(crp, &cse->lock, PWAIT, "crydev", 0); mtx_unlock(&cse->lock); - if (error != 0) + if (error != 0) { + SDT_PROBE1(opencrypto, dev, ioctl, error, __LINE__); goto bail; + } if (crp->crp_etype == EAGAIN) { crp->crp_etype = 0; @@ -785,23 +867,30 @@ again: } if (crp->crp_etype != 0) { + SDT_PROBE1(opencrypto, dev, ioctl, error, __LINE__); error = crp->crp_etype; goto bail; } if (cse->error) { + SDT_PROBE1(opencrypto, dev, ioctl, error, __LINE__); error = cse->error; goto bail; } if (cop->dst && - (error = copyout(cse->uio.uio_iov[0].iov_base, cop->dst, cop->len))) + (error = copyout(cse->uio.uio_iov[0].iov_base, cop->dst, + cop->len))) { + SDT_PROBE1(opencrypto, dev, ioctl, error, __LINE__); goto bail; + } if (cop->mac && (error = copyout((caddr_t)cse->uio.uio_iov[0].iov_base + cop->len, - cop->mac, cse->thash->hashsize))) + cop->mac, cse->thash->hashsize))) { + SDT_PROBE1(opencrypto, dev, ioctl, error, __LINE__); goto bail; + } bail: if (crp) @@ -813,6 +902,151 @@ bail: } static int +cryptodev_aead( + struct csession *cse, + struct crypt_aead *caead, + struct ucred *active_cred, + struct thread *td) +{ + struct uio *uio; + struct cryptop *crp = NULL; + struct cryptodesc *crde = NULL, *crda = NULL; + int error; + + if (caead->len > 256*1024-4 || caead->aadlen > 256*1024-4) + return (E2BIG); + + if (cse->txform == NULL || cse->thash == NULL || caead->tag == NULL || + (caead->len % cse->txform->blocksize) != 0) + return (EINVAL); + + uio = &cse->uio; + uio->uio_iov = &cse->iovec; + uio->uio_iovcnt = 1; + uio->uio_offset = 0; + uio->uio_resid = caead->len + caead->aadlen + cse->thash->hashsize; + uio->uio_segflg = UIO_SYSSPACE; + uio->uio_rw = UIO_WRITE; + uio->uio_td = td; + uio->uio_iov[0].iov_len = uio->uio_resid; + + uio->uio_iov[0].iov_base = malloc(uio->uio_iov[0].iov_len, + M_XDATA, M_WAITOK); + + crp = crypto_getreq(2); + if (crp == NULL) { + error = ENOMEM; + goto bail; + } + + crda = crp->crp_desc; + crde = crda->crd_next; + + if ((error = copyin(caead->src, cse->uio.uio_iov[0].iov_base, + caead->len))) + goto bail; + + if ((error = copyin(caead->aad, (char *)cse->uio.uio_iov[0].iov_base + + caead->len, caead->aadlen))) + goto bail; + + crda->crd_skip = caead->len; + crda->crd_len = caead->aadlen; + crda->crd_inject = caead->len + caead->aadlen; + + crda->crd_alg = cse->mac; + crda->crd_key = cse->mackey; + crda->crd_klen = cse->mackeylen * 8; + + if (caead->op == COP_ENCRYPT) + crde->crd_flags |= CRD_F_ENCRYPT; + else + crde->crd_flags &= ~CRD_F_ENCRYPT; + /* crde->crd_skip set below */ + crde->crd_len = caead->len; + crde->crd_inject = 0; + + crde->crd_alg = cse->cipher; + crde->crd_key = cse->key; + crde->crd_klen = cse->keylen * 8; + + crp->crp_ilen = caead->len + caead->aadlen; + crp->crp_flags = CRYPTO_F_IOV | CRYPTO_F_CBIMM + | (caead->flags & COP_F_BATCH); + crp->crp_buf = (caddr_t)&cse->uio.uio_iov; + crp->crp_callback = (int (*) (struct cryptop *)) cryptodev_cb; + crp->crp_sid = cse->sid; + crp->crp_opaque = (void *)cse; + + if (caead->iv) { + if (caead->ivlen > sizeof cse->tmp_iv) { + error = EINVAL; + goto bail; + } + + if ((error = copyin(caead->iv, cse->tmp_iv, caead->ivlen))) + goto bail; + bcopy(cse->tmp_iv, crde->crd_iv, caead->ivlen); + crde->crd_flags |= CRD_F_IV_EXPLICIT | CRD_F_IV_PRESENT; + crde->crd_skip = 0; + } else { + crde->crd_flags |= CRD_F_IV_PRESENT; + crde->crd_skip = cse->txform->blocksize; + crde->crd_len -= cse->txform->blocksize; + } + + if ((error = copyin(caead->tag, (caddr_t)cse->uio.uio_iov[0].iov_base + + caead->len + caead->aadlen, cse->thash->hashsize))) + goto bail; +again: + /* + * Let the dispatch run unlocked, then, interlock against the + * callback before checking if the operation completed and going + * to sleep. This insures drivers don't inherit our lock which + * results in a lock order reversal between crypto_dispatch forced + * entry and the crypto_done callback into us. + */ + error = crypto_dispatch(crp); + mtx_lock(&cse->lock); + if (error == 0 && (crp->crp_flags & CRYPTO_F_DONE) == 0) + error = msleep(crp, &cse->lock, PWAIT, "crydev", 0); + mtx_unlock(&cse->lock); + + if (error != 0) + goto bail; + + if (crp->crp_etype == EAGAIN) { + crp->crp_etype = 0; + crp->crp_flags &= ~CRYPTO_F_DONE; + goto again; + } + + if (crp->crp_etype != 0) { + error = crp->crp_etype; + goto bail; + } + + if (cse->error) { + error = cse->error; + goto bail; + } + + if (caead->dst && (error = copyout(cse->uio.uio_iov[0].iov_base, + caead->dst, caead->len))) + goto bail; + + if ((error = copyout((caddr_t)cse->uio.uio_iov[0].iov_base + + caead->len + caead->aadlen, caead->tag, cse->thash->hashsize))) + goto bail; + +bail: + crypto_freereq(crp); + free(cse->uio.uio_iov[0].iov_base, M_XDATA); + + return (error); +} + +static int cryptodev_cb(void *op) { struct cryptop *crp = (struct cryptop *) op; @@ -941,14 +1175,16 @@ static int cryptodev_find(struct crypt_find_op *find) { device_t dev; + size_t fnlen = sizeof find->name; if (find->crid != -1) { dev = crypto_find_device_byhid(find->crid); if (dev == NULL) return (ENOENT); - strlcpy(find->name, device_get_nameunit(dev), - sizeof(find->name)); + strncpy(find->name, device_get_nameunit(dev), fnlen); + find->name[fnlen - 1] = '\x0'; } else { + find->name[fnlen - 1] = '\x0'; find->crid = crypto_find_driver(find->name); if (find->crid == -1) return (ENOENT); @@ -1044,12 +1280,7 @@ csecreate(struct fcrypt *fcr, u_int64_t sid, caddr_t key, u_int64_t keylen, { struct csession *cse; -#ifdef INVARIANTS - /* NB: required when mtx_init is built with INVARIANTS */ cse = malloc(sizeof(struct csession), M_XDATA, M_NOWAIT | M_ZERO); -#else - cse = malloc(sizeof(struct csession), M_XDATA, M_NOWAIT); -#endif if (cse == NULL) return NULL; mtx_init(&cse->lock, "cryptodev", "crypto session lock", MTX_DEF); diff --git a/sys/opencrypto/cryptodev.h b/sys/opencrypto/cryptodev.h index e299522..d14fb3a 100644 --- a/sys/opencrypto/cryptodev.h +++ b/sys/opencrypto/cryptodev.h @@ -23,6 +23,12 @@ * PURPOSE. * * Copyright (c) 2001 Theo de Raadt + * Copyright (c) 2014 The FreeBSD Foundation + * All rights reserved. + * + * Portions of this software were developed by John-Mark Gurney + * under sponsorship of the FreeBSD Foundation and + * Rubicon Communications, LLC (Netgate). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -72,6 +78,7 @@ #define SHA2_512_HASH_LEN 64 #define MD5_KPDK_HASH_LEN 16 #define SHA1_KPDK_HASH_LEN 20 +#define AES_GMAC_HASH_LEN 16 /* Maximum hash algorithm result length */ #define HASH_MAX_LEN SHA2_512_HASH_LEN /* Keep this updated */ @@ -80,40 +87,87 @@ #define MD5_HMAC_BLOCK_LEN 64 #define SHA1_HMAC_BLOCK_LEN 64 #define RIPEMD160_HMAC_BLOCK_LEN 64 -#define SHA2_256_HMAC_BLOCK_LEN 64 -#define SHA2_384_HMAC_BLOCK_LEN 128 -#define SHA2_512_HMAC_BLOCK_LEN 128 +#define SHA2_256_HMAC_BLOCK_LEN 64 +#define SHA2_384_HMAC_BLOCK_LEN 128 +#define SHA2_512_HMAC_BLOCK_LEN 128 /* Maximum HMAC block length */ -#define HMAC_MAX_BLOCK_LEN SHA2_512_HMAC_BLOCK_LEN /* Keep this updated */ +#define HMAC_MAX_BLOCK_LEN SHA2_512_HMAC_BLOCK_LEN /* Keep this updated */ #define HMAC_IPAD_VAL 0x36 #define HMAC_OPAD_VAL 0x5C +/* HMAC Key Length */ +#define NULL_HMAC_KEY_LEN 0 +#define MD5_HMAC_KEY_LEN 16 +#define SHA1_HMAC_KEY_LEN 20 +#define RIPEMD160_HMAC_KEY_LEN 20 +#define SHA2_256_HMAC_KEY_LEN 32 +#define SHA2_384_HMAC_KEY_LEN 48 +#define SHA2_512_HMAC_KEY_LEN 64 +#define AES_128_GMAC_KEY_LEN 16 +#define AES_192_GMAC_KEY_LEN 24 +#define AES_256_GMAC_KEY_LEN 32 /* Encryption algorithm block sizes */ -#define NULL_BLOCK_LEN 4 -#define DES_BLOCK_LEN 8 -#define DES3_BLOCK_LEN 8 -#define BLOWFISH_BLOCK_LEN 8 -#define SKIPJACK_BLOCK_LEN 8 -#define CAST128_BLOCK_LEN 8 -#define RIJNDAEL128_BLOCK_LEN 16 -#define AES_BLOCK_LEN RIJNDAEL128_BLOCK_LEN -#define CAMELLIA_BLOCK_LEN 16 -#define EALG_MAX_BLOCK_LEN AES_BLOCK_LEN /* Keep this updated */ +#define NULL_BLOCK_LEN 4 /* IPsec to maintain alignment */ +#define DES_BLOCK_LEN 8 +#define DES3_BLOCK_LEN 8 +#define BLOWFISH_BLOCK_LEN 8 +#define SKIPJACK_BLOCK_LEN 8 +#define CAST128_BLOCK_LEN 8 +#define RIJNDAEL128_BLOCK_LEN 16 +#define AES_BLOCK_LEN 16 +#define AES_ICM_BLOCK_LEN 1 +#define ARC4_BLOCK_LEN 1 +#define CAMELLIA_BLOCK_LEN 16 +#define EALG_MAX_BLOCK_LEN AES_BLOCK_LEN /* Keep this updated */ + +/* IV Lengths */ + +#define ARC4_IV_LEN 1 +#define AES_GCM_IV_LEN 12 +#define AES_XTS_IV_LEN 8 +#define AES_XTS_ALPHA 0x87 /* GF(2^128) generator polynomial */ + +/* Min and Max Encryption Key Sizes */ +#define NULL_MIN_KEY 0 +#define NULL_MAX_KEY 256 /* 2048 bits, max key */ +#define DES_MIN_KEY 8 +#define DES_MAX_KEY DES_MIN_KEY +#define TRIPLE_DES_MIN_KEY 24 +#define TRIPLE_DES_MAX_KEY TRIPLE_DES_MIN_KEY +#define BLOWFISH_MIN_KEY 5 +#define BLOWFISH_MAX_KEY 56 /* 448 bits, max key */ +#define CAST_MIN_KEY 5 +#define CAST_MAX_KEY 16 +#define SKIPJACK_MIN_KEY 10 +#define SKIPJACK_MAX_KEY SKIPJACK_MIN_KEY +#define RIJNDAEL_MIN_KEY 16 +#define RIJNDAEL_MAX_KEY 32 +#define AES_MIN_KEY RIJNDAEL_MIN_KEY +#define AES_MAX_KEY RIJNDAEL_MAX_KEY +#define AES_XTS_MIN_KEY (2 * AES_MIN_KEY) +#define AES_XTS_MAX_KEY (2 * AES_MAX_KEY) +#define ARC4_MIN_KEY 1 +#define ARC4_MAX_KEY 32 +#define CAMELLIA_MIN_KEY 8 +#define CAMELLIA_MAX_KEY 32 + +/* Maximum hash algorithm result length */ +#define AALG_MAX_RESULT_LEN 64 /* Keep this updated */ #define CRYPTO_ALGORITHM_MIN 1 -#define CRYPTO_DES_CBC 1 -#define CRYPTO_3DES_CBC 2 -#define CRYPTO_BLF_CBC 3 -#define CRYPTO_CAST_CBC 4 -#define CRYPTO_SKIPJACK_CBC 5 -#define CRYPTO_MD5_HMAC 6 -#define CRYPTO_SHA1_HMAC 7 -#define CRYPTO_RIPEMD160_HMAC 8 -#define CRYPTO_MD5_KPDK 9 -#define CRYPTO_SHA1_KPDK 10 -#define CRYPTO_RIJNDAEL128_CBC 11 /* 128 bit blocksize */ -#define CRYPTO_AES_CBC 11 /* 128 bit blocksize -- the same as above */ -#define CRYPTO_ARC4 12 +#define CRYPTO_DES_CBC 1 +#define CRYPTO_3DES_CBC 2 +#define CRYPTO_BLF_CBC 3 +#define CRYPTO_CAST_CBC 4 +#define CRYPTO_SKIPJACK_CBC 5 +#define CRYPTO_MD5_HMAC 6 +#define CRYPTO_SHA1_HMAC 7 +#define CRYPTO_RIPEMD160_HMAC 8 +#define CRYPTO_MD5_KPDK 9 +#define CRYPTO_SHA1_KPDK 10 +#define CRYPTO_RIJNDAEL128_CBC 11 /* 128 bit blocksize */ +#define CRYPTO_AES_CBC 11 /* 128 bit blocksize -- the same as above */ +#define CRYPTO_ARC4 12 #define CRYPTO_MD5 13 #define CRYPTO_SHA1 14 #define CRYPTO_NULL_HMAC 15 @@ -122,9 +176,18 @@ #define CRYPTO_SHA2_256_HMAC 18 #define CRYPTO_SHA2_384_HMAC 19 #define CRYPTO_SHA2_512_HMAC 20 -#define CRYPTO_CAMELLIA_CBC 21 +#define CRYPTO_CAMELLIA_CBC 21 #define CRYPTO_AES_XTS 22 -#define CRYPTO_ALGORITHM_MAX 22 /* Keep updated - see below */ +#define CRYPTO_AES_ICM 23 /* commonly known as CTR mode */ +#define CRYPTO_AES_NIST_GMAC 24 /* cipher side */ +#define CRYPTO_AES_NIST_GCM_16 25 /* 16 byte ICV */ +#define CRYPTO_AES_128_NIST_GMAC 26 /* auth side */ +#define CRYPTO_AES_192_NIST_GMAC 27 /* auth side */ +#define CRYPTO_AES_256_NIST_GMAC 28 /* auth side */ +#define CRYPTO_ALGORITHM_MAX 28 /* Keep updated - see below */ + +#define CRYPTO_ALGO_VALID(x) ((x) >= CRYPTO_ALGORITHM_MIN && \ + (x) <= CRYPTO_ALGORITHM_MAX) /* Algorithm flags */ #define CRYPTO_ALG_FLAG_SUPPORTED 0x01 /* Algorithm is supported */ @@ -182,6 +245,20 @@ struct crypt_op { caddr_t iv; }; +/* op and flags the same as crypt_op */ +struct crypt_aead { + u_int32_t ses; + u_int16_t op; /* i.e. COP_ENCRYPT */ + u_int16_t flags; + u_int len; + u_int aadlen; + u_int ivlen; + caddr_t src, dst; /* become iov[] inside kernel */ + caddr_t aad; /* additional authenticated data */ + caddr_t tag; /* must fit for chosen TAG length */ + caddr_t iv; +}; + /* * Parameters for looking up a crypto driver/device by * device name or by id. The latter are returned for @@ -239,6 +316,7 @@ struct crypt_kop { #define CIOCGSESSION2 _IOWR('c', 106, struct session2_op) #define CIOCKEY2 _IOWR('c', 107, struct crypt_kop) #define CIOCFINDDEV _IOWR('c', 108, struct crypt_find_op) +#define CIOCCRYPTAEAD _IOWR('c', 109, struct crypt_aead) struct cryptotstat { struct timespec acc; /* total accumulated time */ @@ -269,6 +347,14 @@ struct cryptostats { }; #ifdef _KERNEL + +#if 0 +#define CRYPTDEB(s) do { printf("%s:%d: %s\n", __FILE__, __LINE__, s); \ + } while (0) +#else +#define CRYPTDEB(s) do { } while (0) +#endif + /* Standard initialization structure beginning */ struct cryptoini { int cri_alg; /* Algorithm to use */ @@ -292,14 +378,15 @@ struct cryptodesc { place, so don't copy. */ #define CRD_F_IV_EXPLICIT 0x04 /* IV explicitly provided */ #define CRD_F_DSA_SHA_NEEDED 0x08 /* Compute SHA-1 of buffer for DSA */ +#define CRD_F_COMP 0x0f /* Set when doing compression */ #define CRD_F_KEY_EXPLICIT 0x10 /* Key explicitly provided */ -#define CRD_F_COMP 0x0f /* Set when doing compression */ struct cryptoini CRD_INI; /* Initialization/context data */ -#define crd_iv CRD_INI.cri_iv -#define crd_key CRD_INI.cri_key -#define crd_alg CRD_INI.cri_alg -#define crd_klen CRD_INI.cri_klen +#define crd_esn CRD_INI.cri_esn +#define crd_iv CRD_INI.cri_iv +#define crd_key CRD_INI.cri_key +#define crd_alg CRD_INI.cri_alg +#define crd_klen CRD_INI.cri_klen struct cryptodesc *crd_next; }; @@ -324,9 +411,8 @@ struct cryptop { */ int crp_flags; -#define CRYPTO_F_IMBUF 0x0001 /* Input/output are mbuf chains */ -#define CRYPTO_F_IOV 0x0002 /* Input/output are uio */ -#define CRYPTO_F_REL 0x0004 /* Must return data in same place */ +#define CRYPTO_F_IMBUF 0x0001 /* Input/output are mbuf chains */ +#define CRYPTO_F_IOV 0x0002 /* Input/output are uio */ #define CRYPTO_F_BATCH 0x0008 /* Batch op if possible */ #define CRYPTO_F_CBIMM 0x0010 /* Do callback immediately */ #define CRYPTO_F_DONE 0x0020 /* Operation completed */ @@ -341,12 +427,12 @@ struct cryptop { struct bintime crp_tstamp; /* performance time stamp */ }; -#define CRYPTO_BUF_CONTIG 0x0 -#define CRYPTO_BUF_IOV 0x1 -#define CRYPTO_BUF_MBUF 0x2 +#define CRYPTO_BUF_CONTIG 0x0 +#define CRYPTO_BUF_IOV 0x1 +#define CRYPTO_BUF_MBUF 0x2 -#define CRYPTO_OP_DECRYPT 0x0 -#define CRYPTO_OP_ENCRYPT 0x1 +#define CRYPTO_OP_DECRYPT 0x0 +#define CRYPTO_OP_ENCRYPT 0x1 /* * Hints passed to process methods. @@ -381,9 +467,9 @@ MALLOC_DECLARE(M_CRYPTO_DATA); extern int crypto_newsession(u_int64_t *sid, struct cryptoini *cri, int hard); extern int crypto_freesession(u_int64_t sid); -#define CRYPTOCAP_F_HARDWARE CRYPTO_FLAG_HARDWARE -#define CRYPTOCAP_F_SOFTWARE CRYPTO_FLAG_SOFTWARE -#define CRYPTOCAP_F_SYNC 0x04000000 /* operates synchronously */ +#define CRYPTOCAP_F_HARDWARE CRYPTO_FLAG_HARDWARE +#define CRYPTOCAP_F_SOFTWARE CRYPTO_FLAG_SOFTWARE +#define CRYPTOCAP_F_SYNC 0x04000000 /* operates synchronously */ extern int32_t crypto_get_driverid(device_t dev, int flags); extern int crypto_find_driver(const char *); extern device_t crypto_find_device_byhid(int hid); @@ -418,10 +504,15 @@ extern int crypto_devallowsoft; /* only use hardware crypto */ struct uio; extern void cuio_copydata(struct uio* uio, int off, int len, caddr_t cp); extern void cuio_copyback(struct uio* uio, int off, int len, caddr_t cp); -extern struct iovec *cuio_getptr(struct uio *uio, int loc, int *off); +extern int cuio_getptr(struct uio *uio, int loc, int *off); extern int cuio_apply(struct uio *uio, int off, int len, int (*f)(void *, void *, u_int), void *arg); +struct mbuf; +struct iovec; +extern int crypto_mbuftoiov(struct mbuf *mbuf, struct iovec **iovptr, + int *cnt, int *allocated); + extern void crypto_copyback(int flags, caddr_t buf, int off, int size, caddr_t in); extern void crypto_copydata(int flags, caddr_t buf, int off, int size, diff --git a/sys/opencrypto/cryptosoft.c b/sys/opencrypto/cryptosoft.c index d73f462..052916b 100644 --- a/sys/opencrypto/cryptosoft.c +++ b/sys/opencrypto/cryptosoft.c @@ -9,6 +9,12 @@ * supported the development of this code. * * Copyright (c) 2000, 2001 Angelos D. Keromytis + * Copyright (c) 2014 The FreeBSD Foundation + * All rights reserved. + * + * Portions of this software were developed by John-Mark Gurney + * under sponsorship of the FreeBSD Foundation and + * Rubicon Communications, LLC (Netgate). * * Permission to use, copy, and modify this software with or without fee * is hereby granted, provided that this entire notice is included in @@ -35,6 +41,10 @@ __FBSDID("$FreeBSD$"); #include <sys/random.h> #include <sys/kernel.h> #include <sys/uio.h> +#include <sys/lock.h> +#include <sys/rwlock.h> +#include <sys/endian.h> +#include <sys/limits.h> #include <crypto/blowfish/blowfish.h> #include <crypto/sha1.h> @@ -54,14 +64,18 @@ __FBSDID("$FreeBSD$"); static int32_t swcr_id; static struct swcr_data **swcr_sessions = NULL; static u_int32_t swcr_sesnum; +/* Protects swcr_sessions pointer, not data. */ +static struct rwlock swcr_sessions_lock; u_int8_t hmac_ipad_buffer[HMAC_MAX_BLOCK_LEN]; u_int8_t hmac_opad_buffer[HMAC_MAX_BLOCK_LEN]; static int swcr_encdec(struct cryptodesc *, struct swcr_data *, caddr_t, int); static int swcr_authcompute(struct cryptodesc *, struct swcr_data *, caddr_t, int); +static int swcr_authenc(struct cryptop *crp); static int swcr_compdec(struct cryptodesc *, struct swcr_data *, caddr_t, int); static int swcr_freesession(device_t dev, u_int64_t tid); +static int swcr_freesession_locked(device_t dev, u_int64_t tid); /* * Apply a symmetric encryption/decryption algorithm. @@ -71,36 +85,48 @@ swcr_encdec(struct cryptodesc *crd, struct swcr_data *sw, caddr_t buf, int flags) { unsigned char iv[EALG_MAX_BLOCK_LEN], blk[EALG_MAX_BLOCK_LEN], *idat; - unsigned char *ivp, piv[EALG_MAX_BLOCK_LEN]; + unsigned char *ivp, *nivp, iv2[EALG_MAX_BLOCK_LEN]; struct enc_xform *exf; - int i, k, j, blks; + int i, j, k, blks, ind, count, ivlen; + struct uio *uio, uiolcl; + struct iovec iovlcl[4]; + struct iovec *iov; + int iovcnt, iovalloc; + int error; + + error = 0; exf = sw->sw_exf; blks = exf->blocksize; + ivlen = exf->ivsize; /* Check for non-padded data */ if (crd->crd_len % blks) return EINVAL; + if (crd->crd_alg == CRYPTO_AES_ICM && + (crd->crd_flags & CRD_F_IV_EXPLICIT) == 0) + return (EINVAL); + /* Initialize the IV */ if (crd->crd_flags & CRD_F_ENCRYPT) { /* IV explicitly provided ? */ if (crd->crd_flags & CRD_F_IV_EXPLICIT) - bcopy(crd->crd_iv, iv, blks); + bcopy(crd->crd_iv, iv, ivlen); else - arc4rand(iv, blks, 0); + arc4rand(iv, ivlen, 0); /* Do we need to write the IV */ if (!(crd->crd_flags & CRD_F_IV_PRESENT)) - crypto_copyback(flags, buf, crd->crd_inject, blks, iv); + crypto_copyback(flags, buf, crd->crd_inject, ivlen, iv); } else { /* Decryption */ - /* IV explicitly provided ? */ + /* IV explicitly provided ? */ if (crd->crd_flags & CRD_F_IV_EXPLICIT) - bcopy(crd->crd_iv, iv, blks); + bcopy(crd->crd_iv, iv, ivlen); else { /* Get IV off buf */ - crypto_copydata(flags, buf, crd->crd_inject, blks, iv); + crypto_copydata(flags, buf, crd->crd_inject, ivlen, iv); } } @@ -109,341 +135,186 @@ swcr_encdec(struct cryptodesc *crd, struct swcr_data *sw, caddr_t buf, if (sw->sw_kschedule) exf->zerokey(&(sw->sw_kschedule)); + error = exf->setkey(&sw->sw_kschedule, crd->crd_key, crd->crd_klen / 8); if (error) return (error); } + iov = iovlcl; + iovcnt = nitems(iovlcl); + iovalloc = 0; + uio = &uiolcl; + if ((flags & CRYPTO_F_IMBUF) != 0) { + error = crypto_mbuftoiov((struct mbuf *)buf, &iov, &iovcnt, + &iovalloc); + if (error) + return (error); + uio->uio_iov = iov; + uio->uio_iovcnt = iovcnt; + } else if ((flags & CRYPTO_F_IOV) != 0) + uio = (struct uio *)buf; + else { + iov[0].iov_base = buf; + iov[0].iov_len = crd->crd_skip + crd->crd_len; + uio->uio_iov = iov; + uio->uio_iovcnt = 1; + } + ivp = iv; - /* - * xforms that provide a reinit method perform all IV - * handling themselves. - */ - if (exf->reinit) + if (exf->reinit) { + /* + * xforms that provide a reinit method perform all IV + * handling themselves. + */ exf->reinit(sw->sw_kschedule, iv); + } - if (flags & CRYPTO_F_IMBUF) { - struct mbuf *m = (struct mbuf *) buf; + count = crd->crd_skip; + ind = cuio_getptr(uio, count, &k); + if (ind == -1) { + error = EINVAL; + goto out; + } - /* Find beginning of data */ - m = m_getptr(m, crd->crd_skip, &k); - if (m == NULL) - return EINVAL; + i = crd->crd_len; - i = crd->crd_len; - - while (i > 0) { - /* - * If there's insufficient data at the end of - * an mbuf, we have to do some copying. - */ - if (m->m_len < k + blks && m->m_len != k) { - m_copydata(m, k, blks, blk); - - /* Actual encryption/decryption */ - if (exf->reinit) { - if (crd->crd_flags & CRD_F_ENCRYPT) { - exf->encrypt(sw->sw_kschedule, - blk); - } else { - exf->decrypt(sw->sw_kschedule, - blk); - } - } else if (crd->crd_flags & CRD_F_ENCRYPT) { - /* XOR with previous block */ - for (j = 0; j < blks; j++) - blk[j] ^= ivp[j]; - - exf->encrypt(sw->sw_kschedule, blk); - - /* - * Keep encrypted block for XOR'ing - * with next block - */ - bcopy(blk, iv, blks); - ivp = iv; - } else { /* decrypt */ - /* - * Keep encrypted block for XOR'ing - * with next block - */ - if (ivp == iv) - bcopy(blk, piv, blks); - else - bcopy(blk, iv, blks); - - exf->decrypt(sw->sw_kschedule, blk); - - /* XOR with previous block */ - for (j = 0; j < blks; j++) - blk[j] ^= ivp[j]; - - if (ivp == iv) - bcopy(piv, iv, blks); - else - ivp = iv; + while (i > 0) { + /* + * If there's insufficient data at the end of + * an iovec, we have to do some copying. + */ + if (uio->uio_iov[ind].iov_len < k + blks && + uio->uio_iov[ind].iov_len != k) { + cuio_copydata(uio, count, blks, blk); + + /* Actual encryption/decryption */ + if (exf->reinit) { + if (crd->crd_flags & CRD_F_ENCRYPT) { + exf->encrypt(sw->sw_kschedule, + blk); + } else { + exf->decrypt(sw->sw_kschedule, + blk); } - - /* Copy back decrypted block */ - m_copyback(m, k, blks, blk); - - /* Advance pointer */ - m = m_getptr(m, k + blks, &k); - if (m == NULL) - return EINVAL; - - i -= blks; - - /* Could be done... */ - if (i == 0) - break; + } else if (crd->crd_flags & CRD_F_ENCRYPT) { + /* XOR with previous block */ + for (j = 0; j < blks; j++) + blk[j] ^= ivp[j]; + + exf->encrypt(sw->sw_kschedule, blk); + + /* + * Keep encrypted block for XOR'ing + * with next block + */ + bcopy(blk, iv, blks); + ivp = iv; + } else { /* decrypt */ + /* + * Keep encrypted block for XOR'ing + * with next block + */ + nivp = (ivp == iv) ? iv2 : iv; + bcopy(blk, nivp, blks); + + exf->decrypt(sw->sw_kschedule, blk); + + /* XOR with previous block */ + for (j = 0; j < blks; j++) + blk[j] ^= ivp[j]; + + ivp = nivp; } - /* Skip possibly empty mbufs */ - if (k == m->m_len) { - for (m = m->m_next; m && m->m_len == 0; - m = m->m_next) - ; - k = 0; - } - - /* Sanity check */ - if (m == NULL) - return EINVAL; + /* Copy back decrypted block */ + cuio_copyback(uio, count, blks, blk); - /* - * Warning: idat may point to garbage here, but - * we only use it in the while() loop, only if - * there are indeed enough data. - */ - idat = mtod(m, unsigned char *) + k; - - while (m->m_len >= k + blks && i > 0) { - if (exf->reinit) { - if (crd->crd_flags & CRD_F_ENCRYPT) { - exf->encrypt(sw->sw_kschedule, - idat); - } else { - exf->decrypt(sw->sw_kschedule, - idat); - } - } else if (crd->crd_flags & CRD_F_ENCRYPT) { - /* XOR with previous block/IV */ - for (j = 0; j < blks; j++) - idat[j] ^= ivp[j]; - - exf->encrypt(sw->sw_kschedule, idat); - ivp = idat; - } else { /* decrypt */ - /* - * Keep encrypted block to be used - * in next block's processing. - */ - if (ivp == iv) - bcopy(idat, piv, blks); - else - bcopy(idat, iv, blks); - - exf->decrypt(sw->sw_kschedule, idat); - - /* XOR with previous block/IV */ - for (j = 0; j < blks; j++) - idat[j] ^= ivp[j]; - - if (ivp == iv) - bcopy(piv, iv, blks); - else - ivp = iv; - } + count += blks; - idat += blks; - k += blks; - i -= blks; + /* Advance pointer */ + ind = cuio_getptr(uio, count, &k); + if (ind == -1) { + error = EINVAL; + goto out; } - } - return 0; /* Done with mbuf encryption/decryption */ - } else if (flags & CRYPTO_F_IOV) { - struct uio *uio = (struct uio *) buf; - struct iovec *iov; + i -= blks; - /* Find beginning of data */ - iov = cuio_getptr(uio, crd->crd_skip, &k); - if (iov == NULL) - return EINVAL; + /* Could be done... */ + if (i == 0) + break; + } - i = crd->crd_len; - - while (i > 0) { - /* - * If there's insufficient data at the end of - * an iovec, we have to do some copying. - */ - if (iov->iov_len < k + blks && iov->iov_len != k) { - cuio_copydata(uio, k, blks, blk); - - /* Actual encryption/decryption */ - if (exf->reinit) { - if (crd->crd_flags & CRD_F_ENCRYPT) { - exf->encrypt(sw->sw_kschedule, - blk); - } else { - exf->decrypt(sw->sw_kschedule, - blk); - } - } else if (crd->crd_flags & CRD_F_ENCRYPT) { - /* XOR with previous block */ - for (j = 0; j < blks; j++) - blk[j] ^= ivp[j]; - - exf->encrypt(sw->sw_kschedule, blk); - - /* - * Keep encrypted block for XOR'ing - * with next block - */ - bcopy(blk, iv, blks); - ivp = iv; - } else { /* decrypt */ - /* - * Keep encrypted block for XOR'ing - * with next block - */ - if (ivp == iv) - bcopy(blk, piv, blks); - else - bcopy(blk, iv, blks); - - exf->decrypt(sw->sw_kschedule, blk); - - /* XOR with previous block */ - for (j = 0; j < blks; j++) - blk[j] ^= ivp[j]; - - if (ivp == iv) - bcopy(piv, iv, blks); - else - ivp = iv; + /* + * Warning: idat may point to garbage here, but + * we only use it in the while() loop, only if + * there are indeed enough data. + */ + idat = (char *)uio->uio_iov[ind].iov_base + k; + + while (uio->uio_iov[ind].iov_len >= k + blks && i > 0) { + if (exf->reinit) { + if (crd->crd_flags & CRD_F_ENCRYPT) { + exf->encrypt(sw->sw_kschedule, + idat); + } else { + exf->decrypt(sw->sw_kschedule, + idat); } - - /* Copy back decrypted block */ - cuio_copyback(uio, k, blks, blk); - - /* Advance pointer */ - iov = cuio_getptr(uio, k + blks, &k); - if (iov == NULL) - return EINVAL; - - i -= blks; - - /* Could be done... */ - if (i == 0) - break; + } else if (crd->crd_flags & CRD_F_ENCRYPT) { + /* XOR with previous block/IV */ + for (j = 0; j < blks; j++) + idat[j] ^= ivp[j]; + + exf->encrypt(sw->sw_kschedule, idat); + ivp = idat; + } else { /* decrypt */ + /* + * Keep encrypted block to be used + * in next block's processing. + */ + nivp = (ivp == iv) ? iv2 : iv; + bcopy(idat, nivp, blks); + + exf->decrypt(sw->sw_kschedule, idat); + + /* XOR with previous block/IV */ + for (j = 0; j < blks; j++) + idat[j] ^= ivp[j]; + + ivp = nivp; } - /* - * Warning: idat may point to garbage here, but - * we only use it in the while() loop, only if - * there are indeed enough data. - */ - idat = (char *)iov->iov_base + k; - - while (iov->iov_len >= k + blks && i > 0) { - if (exf->reinit) { - if (crd->crd_flags & CRD_F_ENCRYPT) { - exf->encrypt(sw->sw_kschedule, - idat); - } else { - exf->decrypt(sw->sw_kschedule, - idat); - } - } else if (crd->crd_flags & CRD_F_ENCRYPT) { - /* XOR with previous block/IV */ - for (j = 0; j < blks; j++) - idat[j] ^= ivp[j]; - - exf->encrypt(sw->sw_kschedule, idat); - ivp = idat; - } else { /* decrypt */ - /* - * Keep encrypted block to be used - * in next block's processing. - */ - if (ivp == iv) - bcopy(idat, piv, blks); - else - bcopy(idat, iv, blks); - - exf->decrypt(sw->sw_kschedule, idat); - - /* XOR with previous block/IV */ - for (j = 0; j < blks; j++) - idat[j] ^= ivp[j]; - - if (ivp == iv) - bcopy(piv, iv, blks); - else - ivp = iv; - } - - idat += blks; - k += blks; - i -= blks; - } - if (k == iov->iov_len) { - iov++; - k = 0; - } + idat += blks; + count += blks; + k += blks; + i -= blks; } - return 0; /* Done with iovec encryption/decryption */ - } else { /* contiguous buffer */ - if (exf->reinit) { - for (i = crd->crd_skip; - i < crd->crd_skip + crd->crd_len; i += blks) { - if (crd->crd_flags & CRD_F_ENCRYPT) - exf->encrypt(sw->sw_kschedule, buf + i); - else - exf->decrypt(sw->sw_kschedule, buf + i); - } - } else if (crd->crd_flags & CRD_F_ENCRYPT) { - for (i = crd->crd_skip; - i < crd->crd_skip + crd->crd_len; i += blks) { - /* XOR with the IV/previous block, as appropriate. */ - if (i == crd->crd_skip) - for (k = 0; k < blks; k++) - buf[i + k] ^= ivp[k]; - else - for (k = 0; k < blks; k++) - buf[i + k] ^= buf[i + k - blks]; - exf->encrypt(sw->sw_kschedule, buf + i); - } - } else { /* Decrypt */ - /* - * Start at the end, so we don't need to keep the encrypted - * block as the IV for the next block. - */ - for (i = crd->crd_skip + crd->crd_len - blks; - i >= crd->crd_skip; i -= blks) { - exf->decrypt(sw->sw_kschedule, buf + i); - - /* XOR with the IV/previous block, as appropriate */ - if (i == crd->crd_skip) - for (k = 0; k < blks; k++) - buf[i + k] ^= ivp[k]; - else - for (k = 0; k < blks; k++) - buf[i + k] ^= buf[i + k - blks]; + /* + * Advance to the next iov if the end of the current iov + * is aligned with the end of a cipher block. + * Note that the code is equivalent to calling: + * ind = cuio_getptr(uio, count, &k); + */ + if (i > 0 && k == uio->uio_iov[ind].iov_len) { + k = 0; + ind++; + if (ind >= uio->uio_iovcnt) { + error = EINVAL; + goto out; } } - - return 0; /* Done with contiguous buffer encryption/decryption */ } - /* Unreachable */ - return EINVAL; +out: + if (iovalloc) + free(iov, M_CRYPTO_DATA); + + return (error); } static void @@ -578,6 +449,181 @@ swcr_authcompute(struct cryptodesc *crd, struct swcr_data *sw, caddr_t buf, return 0; } +CTASSERT(INT_MAX <= (1ll<<39) - 256); /* GCM: plain text < 2^39-256 */ +CTASSERT(INT_MAX <= (uint64_t)-1); /* GCM: associated data <= 2^64-1 */ + +/* + * Apply a combined encryption-authentication transformation + */ +static int +swcr_authenc(struct cryptop *crp) +{ + uint32_t blkbuf[howmany(EALG_MAX_BLOCK_LEN, sizeof(uint32_t))]; + u_char *blk = (u_char *)blkbuf; + u_char aalg[AALG_MAX_RESULT_LEN]; + u_char uaalg[AALG_MAX_RESULT_LEN]; + u_char iv[EALG_MAX_BLOCK_LEN]; + union authctx ctx; + struct cryptodesc *crd, *crda = NULL, *crde = NULL; + struct swcr_data *sw, *swa, *swe = NULL; + struct auth_hash *axf = NULL; + struct enc_xform *exf = NULL; + caddr_t buf = (caddr_t)crp->crp_buf; + uint32_t *blkp; + int aadlen, blksz, i, ivlen, len, iskip, oskip, r; + + ivlen = blksz = iskip = oskip = 0; + + for (crd = crp->crp_desc; crd; crd = crd->crd_next) { + for (sw = swcr_sessions[crp->crp_sid & 0xffffffff]; + sw && sw->sw_alg != crd->crd_alg; + sw = sw->sw_next) + ; + if (sw == NULL) + return (EINVAL); + + switch (sw->sw_alg) { + case CRYPTO_AES_NIST_GCM_16: + case CRYPTO_AES_NIST_GMAC: + swe = sw; + crde = crd; + exf = swe->sw_exf; + ivlen = 12; + break; + case CRYPTO_AES_128_NIST_GMAC: + case CRYPTO_AES_192_NIST_GMAC: + case CRYPTO_AES_256_NIST_GMAC: + swa = sw; + crda = crd; + axf = swa->sw_axf; + if (swa->sw_ictx == 0) + return (EINVAL); + bcopy(swa->sw_ictx, &ctx, axf->ctxsize); + blksz = axf->blocksize; + break; + default: + return (EINVAL); + } + } + if (crde == NULL || crda == NULL) + return (EINVAL); + + if (crde->crd_alg == CRYPTO_AES_NIST_GCM_16 && + (crde->crd_flags & CRD_F_IV_EXPLICIT) == 0) + return (EINVAL); + + if (crde->crd_klen != crda->crd_klen) + return (EINVAL); + + /* Initialize the IV */ + if (crde->crd_flags & CRD_F_ENCRYPT) { + /* IV explicitly provided ? */ + if (crde->crd_flags & CRD_F_IV_EXPLICIT) + bcopy(crde->crd_iv, iv, ivlen); + else + arc4rand(iv, ivlen, 0); + + /* Do we need to write the IV */ + if (!(crde->crd_flags & CRD_F_IV_PRESENT)) + crypto_copyback(crp->crp_flags, buf, crde->crd_inject, + ivlen, iv); + + } else { /* Decryption */ + /* IV explicitly provided ? */ + if (crde->crd_flags & CRD_F_IV_EXPLICIT) + bcopy(crde->crd_iv, iv, ivlen); + else { + /* Get IV off buf */ + crypto_copydata(crp->crp_flags, buf, crde->crd_inject, + ivlen, iv); + } + } + + /* Supply MAC with IV */ + if (axf->Reinit) + axf->Reinit(&ctx, iv, ivlen); + + /* Supply MAC with AAD */ + aadlen = crda->crd_len; + + for (i = iskip; i < crda->crd_len; i += blksz) { + len = MIN(crda->crd_len - i, blksz - oskip); + crypto_copydata(crp->crp_flags, buf, crda->crd_skip + i, len, + blk + oskip); + bzero(blk + len + oskip, blksz - len - oskip); + axf->Update(&ctx, blk, blksz); + oskip = 0; /* reset initial output offset */ + } + + if (exf->reinit) + exf->reinit(swe->sw_kschedule, iv); + + /* Do encryption/decryption with MAC */ + for (i = 0; i < crde->crd_len; i += blksz) { + len = MIN(crde->crd_len - i, blksz); + if (len < blksz) + bzero(blk, blksz); + crypto_copydata(crp->crp_flags, buf, crde->crd_skip + i, len, + blk); + if (crde->crd_flags & CRD_F_ENCRYPT) { + exf->encrypt(swe->sw_kschedule, blk); + axf->Update(&ctx, blk, len); + crypto_copyback(crp->crp_flags, buf, + crde->crd_skip + i, len, blk); + } else { + axf->Update(&ctx, blk, len); + } + } + + /* Do any required special finalization */ + switch (crda->crd_alg) { + case CRYPTO_AES_128_NIST_GMAC: + case CRYPTO_AES_192_NIST_GMAC: + case CRYPTO_AES_256_NIST_GMAC: + /* length block */ + bzero(blk, blksz); + blkp = (uint32_t *)blk + 1; + *blkp = htobe32(aadlen * 8); + blkp = (uint32_t *)blk + 3; + *blkp = htobe32(crde->crd_len * 8); + axf->Update(&ctx, blk, blksz); + break; + } + + /* Finalize MAC */ + axf->Final(aalg, &ctx); + + /* Validate tag */ + if (!(crde->crd_flags & CRD_F_ENCRYPT)) { + crypto_copydata(crp->crp_flags, buf, crda->crd_inject, + axf->hashsize, uaalg); + + r = timingsafe_bcmp(aalg, uaalg, axf->hashsize); + if (r == 0) { + /* tag matches, decrypt data */ + for (i = 0; i < crde->crd_len; i += blksz) { + len = MIN(crde->crd_len - i, blksz); + if (len < blksz) + bzero(blk, blksz); + crypto_copydata(crp->crp_flags, buf, + crde->crd_skip + i, len, blk); + if (!(crde->crd_flags & CRD_F_ENCRYPT)) { + exf->decrypt(swe->sw_kschedule, blk); + } + crypto_copyback(crp->crp_flags, buf, + crde->crd_skip + i, len, blk); + } + } else + return (EBADMSG); + } else { + /* Inject the authentication data */ + crypto_copyback(crp->crp_flags, buf, crda->crd_inject, + axf->hashsize, aalg); + } + + return (0); +} + /* * Apply a compression/decompression algorithm */ @@ -665,11 +711,13 @@ swcr_newsession(device_t dev, u_int32_t *sid, struct cryptoini *cri) struct enc_xform *txf; struct comp_algo *cxf; u_int32_t i; + int len; int error; if (sid == NULL || cri == NULL) return EINVAL; + rw_wlock(&swcr_sessions_lock); if (swcr_sessions) { for (i = 1; i < swcr_sesnum; i++) if (swcr_sessions[i] == NULL) @@ -692,6 +740,7 @@ swcr_newsession(device_t dev, u_int32_t *sid, struct cryptoini *cri) swcr_sesnum = 0; else swcr_sesnum /= 2; + rw_wunlock(&swcr_sessions_lock); return ENOBUFS; } @@ -705,6 +754,7 @@ swcr_newsession(device_t dev, u_int32_t *sid, struct cryptoini *cri) swcr_sessions = swd; } + rw_downgrade(&swcr_sessions_lock); swd = &swcr_sessions[i]; *sid = i; @@ -712,7 +762,8 @@ swcr_newsession(device_t dev, u_int32_t *sid, struct cryptoini *cri) *swd = malloc(sizeof(struct swcr_data), M_CRYPTO_DATA, M_NOWAIT|M_ZERO); if (*swd == NULL) { - swcr_freesession(dev, i); + swcr_freesession_locked(dev, i); + rw_runlock(&swcr_sessions_lock); return ENOBUFS; } @@ -738,6 +789,16 @@ swcr_newsession(device_t dev, u_int32_t *sid, struct cryptoini *cri) case CRYPTO_AES_XTS: txf = &enc_xform_aes_xts; goto enccommon; + case CRYPTO_AES_ICM: + txf = &enc_xform_aes_icm; + goto enccommon; + case CRYPTO_AES_NIST_GCM_16: + txf = &enc_xform_aes_nist_gcm; + goto enccommon; + case CRYPTO_AES_NIST_GMAC: + txf = &enc_xform_aes_nist_gmac; + (*swd)->sw_exf = txf; + break; case CRYPTO_CAMELLIA_CBC: txf = &enc_xform_camellia; goto enccommon; @@ -749,7 +810,8 @@ swcr_newsession(device_t dev, u_int32_t *sid, struct cryptoini *cri) error = txf->setkey(&((*swd)->sw_kschedule), cri->cri_key, cri->cri_klen / 8); if (error) { - swcr_freesession(dev, i); + swcr_freesession_locked(dev, i); + rw_runlock(&swcr_sessions_lock); return error; } } @@ -780,14 +842,16 @@ swcr_newsession(device_t dev, u_int32_t *sid, struct cryptoini *cri) (*swd)->sw_ictx = malloc(axf->ctxsize, M_CRYPTO_DATA, M_NOWAIT); if ((*swd)->sw_ictx == NULL) { - swcr_freesession(dev, i); + swcr_freesession_locked(dev, i); + rw_runlock(&swcr_sessions_lock); return ENOBUFS; } (*swd)->sw_octx = malloc(axf->ctxsize, M_CRYPTO_DATA, M_NOWAIT); if ((*swd)->sw_octx == NULL) { - swcr_freesession(dev, i); + swcr_freesession_locked(dev, i); + rw_runlock(&swcr_sessions_lock); return ENOBUFS; } @@ -810,14 +874,16 @@ swcr_newsession(device_t dev, u_int32_t *sid, struct cryptoini *cri) (*swd)->sw_ictx = malloc(axf->ctxsize, M_CRYPTO_DATA, M_NOWAIT); if ((*swd)->sw_ictx == NULL) { - swcr_freesession(dev, i); + swcr_freesession_locked(dev, i); + rw_runlock(&swcr_sessions_lock); return ENOBUFS; } (*swd)->sw_octx = malloc(cri->cri_klen / 8, M_CRYPTO_DATA, M_NOWAIT); if ((*swd)->sw_octx == NULL) { - swcr_freesession(dev, i); + swcr_freesession_locked(dev, i); + rw_runlock(&swcr_sessions_lock); return ENOBUFS; } @@ -841,7 +907,8 @@ swcr_newsession(device_t dev, u_int32_t *sid, struct cryptoini *cri) (*swd)->sw_ictx = malloc(axf->ctxsize, M_CRYPTO_DATA, M_NOWAIT); if ((*swd)->sw_ictx == NULL) { - swcr_freesession(dev, i); + swcr_freesession_locked(dev, i); + rw_runlock(&swcr_sessions_lock); return ENOBUFS; } @@ -850,12 +917,41 @@ swcr_newsession(device_t dev, u_int32_t *sid, struct cryptoini *cri) (*swd)->sw_axf = axf; break; #endif + + case CRYPTO_AES_128_NIST_GMAC: + axf = &auth_hash_nist_gmac_aes_128; + goto auth4common; + + case CRYPTO_AES_192_NIST_GMAC: + axf = &auth_hash_nist_gmac_aes_192; + goto auth4common; + + case CRYPTO_AES_256_NIST_GMAC: + axf = &auth_hash_nist_gmac_aes_256; + auth4common: + len = cri->cri_klen / 8; + if (len != 16 && len != 24 && len != 32) + return EINVAL; + + (*swd)->sw_ictx = malloc(axf->ctxsize, M_CRYPTO_DATA, + M_NOWAIT); + if ((*swd)->sw_ictx == NULL) { + swcr_freesession_locked(dev, i); + rw_runlock(&swcr_sessions_lock); + return ENOBUFS; + } + axf->Init((*swd)->sw_ictx); + axf->Setkey((*swd)->sw_ictx, cri->cri_key, len); + (*swd)->sw_axf = axf; + break; + case CRYPTO_DEFLATE_COMP: cxf = &comp_algo_deflate; (*swd)->sw_cxf = cxf; break; default: - swcr_freesession(dev, i); + swcr_freesession_locked(dev, i); + rw_runlock(&swcr_sessions_lock); return EINVAL; } @@ -863,14 +959,26 @@ swcr_newsession(device_t dev, u_int32_t *sid, struct cryptoini *cri) cri = cri->cri_next; swd = &((*swd)->sw_next); } + rw_runlock(&swcr_sessions_lock); return 0; } +static int +swcr_freesession(device_t dev, u_int64_t tid) +{ + int error; + + rw_rlock(&swcr_sessions_lock); + error = swcr_freesession_locked(dev, tid); + rw_runlock(&swcr_sessions_lock); + return error; +} + /* * Free a session. */ static int -swcr_freesession(device_t dev, u_int64_t tid) +swcr_freesession_locked(device_t dev, u_int64_t tid) { struct swcr_data *swd; struct enc_xform *txf; @@ -897,6 +1005,9 @@ swcr_freesession(device_t dev, u_int64_t tid) case CRYPTO_SKIPJACK_CBC: case CRYPTO_RIJNDAEL128_CBC: case CRYPTO_AES_XTS: + case CRYPTO_AES_ICM: + case CRYPTO_AES_NIST_GCM_16: + case CRYPTO_AES_NIST_GMAC: case CRYPTO_CAMELLIA_CBC: case CRYPTO_NULL_CBC: txf = swd->sw_exf; @@ -975,11 +1086,15 @@ swcr_process(device_t dev, struct cryptop *crp, int hint) goto done; } - lid = crp->crp_sid & 0xffffffff; - if (lid >= swcr_sesnum || lid == 0 || swcr_sessions[lid] == NULL) { + lid = CRYPTO_SESID2LID(crp->crp_sid); + rw_rlock(&swcr_sessions_lock); + if (swcr_sessions == NULL || lid >= swcr_sesnum || lid == 0 || + swcr_sessions[lid] == NULL) { + rw_runlock(&swcr_sessions_lock); crp->crp_etype = ENOENT; goto done; } + rw_runlock(&swcr_sessions_lock); /* Go through crypto descriptors, processing as we go */ for (crd = crp->crp_desc; crd; crd = crd->crd_next) { @@ -993,10 +1108,17 @@ swcr_process(device_t dev, struct cryptop *crp, int hint) * XXX between the various instances of an algorithm (so we can * XXX locate the correct crypto context). */ + rw_rlock(&swcr_sessions_lock); + if (swcr_sessions == NULL) { + rw_runlock(&swcr_sessions_lock); + crp->crp_etype = ENOENT; + goto done; + } for (sw = swcr_sessions[lid]; sw && sw->sw_alg != crd->crd_alg; sw = sw->sw_next) ; + rw_runlock(&swcr_sessions_lock); /* No such context ? */ if (sw == NULL) { @@ -1011,6 +1133,7 @@ swcr_process(device_t dev, struct cryptop *crp, int hint) case CRYPTO_SKIPJACK_CBC: case CRYPTO_RIJNDAEL128_CBC: case CRYPTO_AES_XTS: + case CRYPTO_AES_ICM: case CRYPTO_CAMELLIA_CBC: if ((crp->crp_etype = swcr_encdec(crd, sw, crp->crp_buf, crp->crp_flags)) != 0) @@ -1035,6 +1158,14 @@ swcr_process(device_t dev, struct cryptop *crp, int hint) goto done; break; + case CRYPTO_AES_NIST_GCM_16: + case CRYPTO_AES_NIST_GMAC: + case CRYPTO_AES_128_NIST_GMAC: + case CRYPTO_AES_192_NIST_GMAC: + case CRYPTO_AES_256_NIST_GMAC: + crp->crp_etype = swcr_authenc(crp); + goto done; + case CRYPTO_DEFLATE_COMP: if ((crp->crp_etype = swcr_compdec(crd, sw, crp->crp_buf, crp->crp_flags)) != 0) @@ -1074,6 +1205,7 @@ swcr_probe(device_t dev) static int swcr_attach(device_t dev) { + rw_init(&swcr_sessions_lock, "swcr_sessions_lock"); memset(hmac_ipad_buffer, HMAC_IPAD_VAL, HMAC_MAX_BLOCK_LEN); memset(hmac_opad_buffer, HMAC_OPAD_VAL, HMAC_MAX_BLOCK_LEN); @@ -1104,6 +1236,12 @@ swcr_attach(device_t dev) REGISTER(CRYPTO_SHA1); REGISTER(CRYPTO_RIJNDAEL128_CBC); REGISTER(CRYPTO_AES_XTS); + REGISTER(CRYPTO_AES_ICM); + REGISTER(CRYPTO_AES_NIST_GCM_16); + REGISTER(CRYPTO_AES_NIST_GMAC); + REGISTER(CRYPTO_AES_128_NIST_GMAC); + REGISTER(CRYPTO_AES_192_NIST_GMAC); + REGISTER(CRYPTO_AES_256_NIST_GMAC); REGISTER(CRYPTO_CAMELLIA_CBC); REGISTER(CRYPTO_DEFLATE_COMP); #undef REGISTER @@ -1115,8 +1253,11 @@ static int swcr_detach(device_t dev) { crypto_unregister_all(swcr_id); - if (swcr_sessions != NULL) - free(swcr_sessions, M_CRYPTO_DATA); + rw_wlock(&swcr_sessions_lock); + free(swcr_sessions, M_CRYPTO_DATA); + swcr_sessions = NULL; + rw_wunlock(&swcr_sessions_lock); + rw_destroy(&swcr_sessions_lock); return 0; } diff --git a/sys/opencrypto/gfmult.c b/sys/opencrypto/gfmult.c new file mode 100644 index 0000000..990a7ff --- /dev/null +++ b/sys/opencrypto/gfmult.c @@ -0,0 +1,275 @@ +/*- + * Copyright (c) 2014 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by John-Mark Gurney under + * the sponsorship of the FreeBSD Foundation and + * Rubicon Communications, LLC (Netgate). + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + * + */ + +#include "gfmult.h" + +#define REV_POLY_REDUCT 0xe1 /* 0x87 bit reversed */ + +/* reverse the bits of a nibble */ +static const uint8_t nib_rev[] = { + 0x0, 0x8, 0x4, 0xc, 0x2, 0xa, 0x6, 0xe, + 0x1, 0x9, 0x5, 0xd, 0x3, 0xb, 0x7, 0xf, +}; + +/* calulate v * 2 */ +static inline struct gf128 +gf128_mulalpha(struct gf128 v) +{ + uint64_t mask; + + mask = !!(v.v[1] & 1); + mask = ~(mask - 1); + v.v[1] = (v.v[1] >> 1) | ((v.v[0] & 1) << 63); + v.v[0] = (v.v[0] >> 1) ^ ((mask & REV_POLY_REDUCT) << 56); + + return v; +} + +/* + * Generate a table for 0-16 * h. Store the results in the table w/ indexes + * bit reversed, and the words striped across the values. + */ +void +gf128_genmultable(struct gf128 h, struct gf128table *t) +{ + struct gf128 tbl[16]; + int i; + + tbl[0] = MAKE_GF128(0, 0); + tbl[1] = h; + + for (i = 2; i < 16; i += 2) { + tbl[i] = gf128_mulalpha(tbl[i / 2]); + tbl[i + 1] = gf128_add(tbl[i], h); + } + + for (i = 0; i < 16; i++) { + t->a[nib_rev[i]] = tbl[i].v[0] >> 32; + t->b[nib_rev[i]] = tbl[i].v[0]; + t->c[nib_rev[i]] = tbl[i].v[1] >> 32; + t->d[nib_rev[i]] = tbl[i].v[1]; + } +} + +/* + * Generate tables containing h, h^2, h^3 and h^4, starting at 0. + */ +void +gf128_genmultable4(struct gf128 h, struct gf128table4 *t) +{ + struct gf128 h2, h3, h4; + + gf128_genmultable(h, &t->tbls[0]); + + h2 = gf128_mul(h, &t->tbls[0]); + + gf128_genmultable(h2, &t->tbls[1]); + + h3 = gf128_mul(h, &t->tbls[1]); + gf128_genmultable(h3, &t->tbls[2]); + + h4 = gf128_mul(h2, &t->tbls[1]); + gf128_genmultable(h4, &t->tbls[3]); +} + +/* + * Read a row from the table. + */ +static inline struct gf128 +readrow(struct gf128table *tbl, unsigned bits) +{ + struct gf128 r; + + bits = bits % 16; + + r.v[0] = ((uint64_t)tbl->a[bits] << 32) | tbl->b[bits]; + r.v[1] = ((uint64_t)tbl->c[bits] << 32) | tbl->d[bits]; + + return r; +} + +/* + * These are the reduction values. Since we are dealing with bit reversed + * version, the values need to be bit reversed, AND the indexes are also + * bit reversed to make lookups quicker. + */ +static uint16_t reduction[] = { + 0x0000, 0x1c20, 0x3840, 0x2460, 0x7080, 0x6ca0, 0x48c0, 0x54e0, + 0xe100, 0xfd20, 0xd940, 0xc560, 0x9180, 0x8da0, 0xa9c0, 0xb5e0, +}; + +/* + * Calculate: + * (x*2^4 + word[3,0]*h) * + * 2^4 + word[7,4]*h) * + * ... + * 2^4 + word[63,60]*h + */ +static struct gf128 +gfmultword(uint64_t word, struct gf128 x, struct gf128table *tbl) +{ + struct gf128 row; + unsigned bits; + unsigned redbits; + int i; + + for (i = 0; i < 64; i += 4) { + bits = word % 16; + + /* fetch row */ + row = readrow(tbl, bits); + + /* x * 2^4 */ + redbits = x.v[1] % 16; + x.v[1] = (x.v[1] >> 4) | (x.v[0] % 16) << 60; + x.v[0] >>= 4; + x.v[0] ^= (uint64_t)reduction[redbits] << (64 - 16); + + word >>= 4; + + x = gf128_add(x, row); + } + + return x; +} + +/* + * Calculate + * (x*2^4 + worda[3,0]*h^4+wordb[3,0]*h^3+...+wordd[3,0]*h) * + * ... + * 2^4 + worda[63,60]*h^4+ ... + wordd[63,60]*h + * + * Passing/returning struct is .5% faster than passing in via pointer on + * amd64. + */ +static struct gf128 +gfmultword4(uint64_t worda, uint64_t wordb, uint64_t wordc, uint64_t wordd, + struct gf128 x, struct gf128table4 *tbl) +{ + struct gf128 rowa, rowb, rowc, rowd; + unsigned bitsa, bitsb, bitsc, bitsd; + unsigned redbits; + int i; + + /* + * XXX - nibble reverse words to save a shift? probably not as + * nibble reverse would take 20 ops (5 * 4) verse 16 + */ + + for (i = 0; i < 64; i += 4) { + bitsa = worda % 16; + bitsb = wordb % 16; + bitsc = wordc % 16; + bitsd = wordd % 16; + + /* fetch row */ + rowa = readrow(&tbl->tbls[3], bitsa); + rowb = readrow(&tbl->tbls[2], bitsb); + rowc = readrow(&tbl->tbls[1], bitsc); + rowd = readrow(&tbl->tbls[0], bitsd); + + /* x * 2^4 */ + redbits = x.v[1] % 16; + x.v[1] = (x.v[1] >> 4) | (x.v[0] % 16) << 60; + x.v[0] >>= 4; + x.v[0] ^= (uint64_t)reduction[redbits] << (64 - 16); + + worda >>= 4; + wordb >>= 4; + wordc >>= 4; + wordd >>= 4; + + x = gf128_add(x, gf128_add(rowa, gf128_add(rowb, + gf128_add(rowc, rowd)))); + } + + return x; +} + +struct gf128 +gf128_mul(struct gf128 v, struct gf128table *tbl) +{ + struct gf128 ret; + + ret = MAKE_GF128(0, 0); + + ret = gfmultword(v.v[1], ret, tbl); + ret = gfmultword(v.v[0], ret, tbl); + + return ret; +} + +/* + * Calculate a*h^4 + b*h^3 + c*h^2 + d*h, or: + * (((a*h+b)*h+c)*h+d)*h + */ +struct gf128 +gf128_mul4(struct gf128 a, struct gf128 b, struct gf128 c, struct gf128 d, + struct gf128table4 *tbl) +{ + struct gf128 tmp; + + tmp = MAKE_GF128(0, 0); + + tmp = gfmultword4(a.v[1], b.v[1], c.v[1], d.v[1], tmp, tbl); + tmp = gfmultword4(a.v[0], b.v[0], c.v[0], d.v[0], tmp, tbl); + + return tmp; +} + +/* + * a = data[0..15] + r + * b = data[16..31] + * c = data[32..47] + * d = data[48..63] + * + * Calculate a*h^4 + b*h^3 + c*h^2 + d*h, or: + * (((a*h+b)*h+c)*h+d)*h + */ +struct gf128 +gf128_mul4b(struct gf128 r, const uint8_t *v, struct gf128table4 *tbl) +{ + struct gf128 a, b, c, d; + struct gf128 tmp; + + tmp = MAKE_GF128(0, 0); + + a = gf128_add(r, gf128_read(&v[0*16])); + b = gf128_read(&v[1*16]); + c = gf128_read(&v[2*16]); + d = gf128_read(&v[3*16]); + + tmp = gfmultword4(a.v[1], b.v[1], c.v[1], d.v[1], tmp, tbl); + tmp = gfmultword4(a.v[0], b.v[0], c.v[0], d.v[0], tmp, tbl); + + return tmp; +} diff --git a/sys/opencrypto/gfmult.h b/sys/opencrypto/gfmult.h new file mode 100644 index 0000000..c385618 --- /dev/null +++ b/sys/opencrypto/gfmult.h @@ -0,0 +1,128 @@ +/*- + * Copyright (c) 2014 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by John-Mark Gurney under + * the sponsorship of the FreeBSD Foundation and + * Rubicon Communications, LLC (Netgate). + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + * + */ + +#ifndef _GFMULT_H_ +#define _GFMULT_H_ + +#ifdef __APPLE__ +#define __aligned(x) __attribute__((__aligned__(x))) +#define be64dec(buf) __builtin_bswap64(*(uint64_t *)buf) +#define be64enc(buf, x) (*(uint64_t *)buf = __builtin_bswap64(x)) +#else +#include <sys/endian.h> +#endif + +#ifdef _KERNEL +#include <sys/types.h> +#else +#include <stdint.h> +#include <strings.h> +#endif + +#define REQ_ALIGN (16 * 4) +/* + * The rows are striped across cache lines. Note that the indexes + * are bit reversed to make accesses quicker. + */ +struct gf128table { + uint32_t a[16] __aligned(REQ_ALIGN); /* bits 0 - 31 */ + uint32_t b[16] __aligned(REQ_ALIGN); /* bits 63 - 32 */ + uint32_t c[16] __aligned(REQ_ALIGN); /* bits 95 - 64 */ + uint32_t d[16] __aligned(REQ_ALIGN); /* bits 127 - 96 */ +} __aligned(REQ_ALIGN); + +/* + * A set of tables that contain h, h^2, h^3, h^4. To be used w/ gf128_mul4. + */ +struct gf128table4 { + struct gf128table tbls[4]; +}; + +/* + * GCM per spec is bit reversed in memory. So byte 0 is really bit reversed + * and contains bits 0-7. We can deal w/ this by using right shifts and + * related math instead of having to bit reverse everything. This means that + * the low bits are in v[0] (bits 0-63) and reverse order, while the high + * bits are in v[1] (bits 64-127) and reverse order. The high bit of v[0] is + * bit 0, and the low bit of v[1] is bit 127. + */ +struct gf128 { + uint64_t v[2]; +}; + +/* Note that we don't bit reverse in MAKE_GF128. */ +#define MAKE_GF128(a, b) ((struct gf128){.v = { (a), (b) } }) +#define GF128_EQ(a, b) ((((a).v[0] ^ (b).v[0]) | \ + ((a).v[1] ^ (b).v[1])) == 0) + +static inline struct gf128 +gf128_read(const uint8_t *buf) +{ + struct gf128 r; + + r.v[0] = be64dec(buf); + buf += sizeof(uint64_t); + + r.v[1] = be64dec(buf); + + return r; +} + +static inline void +gf128_write(struct gf128 v, uint8_t *buf) +{ + uint64_t tmp; + + be64enc(buf, v.v[0]); + buf += sizeof tmp; + + be64enc(buf, v.v[1]); +} + +static inline struct gf128 __pure /* XXX - __pure2 instead */ +gf128_add(struct gf128 a, struct gf128 b) +{ + a.v[0] ^= b.v[0]; + a.v[1] ^= b.v[1]; + + return a; +} + +void gf128_genmultable(struct gf128 h, struct gf128table *t); +void gf128_genmultable4(struct gf128 h, struct gf128table4 *t); +struct gf128 gf128_mul(struct gf128 v, struct gf128table *tbl); +struct gf128 gf128_mul4(struct gf128 a, struct gf128 b, struct gf128 c, + struct gf128 d, struct gf128table4 *tbl); +struct gf128 gf128_mul4b(struct gf128 r, const uint8_t *v, + struct gf128table4 *tbl); + +#endif /* _GFMULT_H_ */ diff --git a/sys/opencrypto/gmac.c b/sys/opencrypto/gmac.c new file mode 100644 index 0000000..402092c --- /dev/null +++ b/sys/opencrypto/gmac.c @@ -0,0 +1,119 @@ +/*- + * Copyright (c) 2014 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by John-Mark Gurney under + * the sponsorship of the FreeBSD Foundation and + * Rubicon Communications, LLC (Netgate). + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + * + */ + +#include <sys/types.h> +#include <sys/systm.h> +#include <opencrypto/gfmult.h> +#include <opencrypto/gmac.h> + +void +AES_GMAC_Init(struct aes_gmac_ctx *agc) +{ + + bzero(agc, sizeof *agc); +} + +void +AES_GMAC_Setkey(struct aes_gmac_ctx *agc, const uint8_t *key, uint16_t klen) +{ + const uint8_t zeros[GMAC_BLOCK_LEN] = {}; + struct gf128 h; + uint8_t hbuf[GMAC_BLOCK_LEN]; + + agc->rounds = rijndaelKeySetupEnc(agc->keysched, key, klen * 8); + + rijndaelEncrypt(agc->keysched, agc->rounds, zeros, hbuf); + + h = gf128_read(hbuf); + gf128_genmultable4(h, &agc->ghashtbl); + + explicit_bzero(&h, sizeof h); + explicit_bzero(hbuf, sizeof hbuf); +} + +void +AES_GMAC_Reinit(struct aes_gmac_ctx *agc, const uint8_t *iv, uint16_t ivlen) +{ + + KASSERT(ivlen <= sizeof agc->counter, ("passed ivlen too large!")); + bcopy(iv, agc->counter, ivlen); +} + +int +AES_GMAC_Update(struct aes_gmac_ctx *agc, const uint8_t *data, uint16_t len) +{ + struct gf128 v; + uint8_t buf[GMAC_BLOCK_LEN] = {}; + int i; + + v = agc->hash; + + while (len > 0) { + if (len >= 4*GMAC_BLOCK_LEN) { + i = 4*GMAC_BLOCK_LEN; + v = gf128_mul4b(v, data, &agc->ghashtbl); + } else if (len >= GMAC_BLOCK_LEN) { + i = GMAC_BLOCK_LEN; + v = gf128_add(v, gf128_read(data)); + v = gf128_mul(v, &agc->ghashtbl.tbls[0]); + } else { + i = len; + bcopy(data, buf, i); + v = gf128_add(v, gf128_read(&buf[0])); + v = gf128_mul(v, &agc->ghashtbl.tbls[0]); + explicit_bzero(buf, sizeof buf); + } + len -= i; + data += i; + } + + agc->hash = v; + explicit_bzero(&v, sizeof v); + + return (0); +} + +void +AES_GMAC_Final(uint8_t digest[GMAC_DIGEST_LEN], struct aes_gmac_ctx *agc) +{ + uint8_t enccntr[GMAC_BLOCK_LEN]; + struct gf128 a; + + /* XXX - zero additional bytes? */ + agc->counter[GMAC_BLOCK_LEN - 1] = 1; + + rijndaelEncrypt(agc->keysched, agc->rounds, agc->counter, enccntr); + a = gf128_add(agc->hash, gf128_read(enccntr)); + gf128_write(a, digest); + + explicit_bzero(enccntr, sizeof enccntr); +} diff --git a/sys/opencrypto/gmac.h b/sys/opencrypto/gmac.h new file mode 100644 index 0000000..909b78c --- /dev/null +++ b/sys/opencrypto/gmac.h @@ -0,0 +1,56 @@ +/*- + * Copyright (c) 2014 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by John-Mark Gurney under + * the sponsorship of the FreeBSD Foundation and + * Rubicon Communications, LLC (Netgate). + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + * + */ + +#ifndef _GMAC_H_ +#define _GMAC_H_ + +#include "gfmult.h" +#include <crypto/rijndael/rijndael.h> + +#define GMAC_BLOCK_LEN 16 +#define GMAC_DIGEST_LEN 16 + +struct aes_gmac_ctx { + struct gf128table4 ghashtbl; + struct gf128 hash; + uint32_t keysched[4*(RIJNDAEL_MAXNR + 1)]; + uint8_t counter[GMAC_BLOCK_LEN]; + int rounds; +}; + +void AES_GMAC_Init(struct aes_gmac_ctx *); +void AES_GMAC_Setkey(struct aes_gmac_ctx *, const uint8_t *, uint16_t); +void AES_GMAC_Reinit(struct aes_gmac_ctx *, const uint8_t *, uint16_t); +int AES_GMAC_Update(struct aes_gmac_ctx *, const uint8_t *, uint16_t); +void AES_GMAC_Final(uint8_t [GMAC_DIGEST_LEN], struct aes_gmac_ctx *); + +#endif /* _GMAC_H_ */ diff --git a/sys/opencrypto/xform.c b/sys/opencrypto/xform.c index bfb061b..cb44bfa 100644 --- a/sys/opencrypto/xform.c +++ b/sys/opencrypto/xform.c @@ -24,6 +24,12 @@ * Copyright (C) 2001, Angelos D. Keromytis. * * Copyright (C) 2008, Damien Miller + * Copyright (c) 2014 The FreeBSD Foundation + * All rights reserved. + * + * Portions of this software were developed by John-Mark Gurney + * under sponsorship of the FreeBSD Foundation and + * Rubicon Communications, LLC (Netgate). * * Permission to use, copy, and modify this software with or without fee * is hereby granted, provided that this entire notice is included in @@ -76,6 +82,7 @@ static int blf_setkey(u_int8_t **, u_int8_t *, int); static int cast5_setkey(u_int8_t **, u_int8_t *, int); static int skipjack_setkey(u_int8_t **, u_int8_t *, int); static int rijndael128_setkey(u_int8_t **, u_int8_t *, int); +static int aes_icm_setkey(u_int8_t **, u_int8_t *, int); static int aes_xts_setkey(u_int8_t **, u_int8_t *, int); static int cml_setkey(u_int8_t **, u_int8_t *, int); @@ -99,6 +106,8 @@ static void rijndael128_decrypt(caddr_t, u_int8_t *); static void aes_xts_decrypt(caddr_t, u_int8_t *); static void cml_decrypt(caddr_t, u_int8_t *); +static void aes_icm_crypt(caddr_t, u_int8_t *); + static void null_zerokey(u_int8_t **); static void des1_zerokey(u_int8_t **); static void des3_zerokey(u_int8_t **); @@ -106,103 +115,149 @@ static void blf_zerokey(u_int8_t **); static void cast5_zerokey(u_int8_t **); static void skipjack_zerokey(u_int8_t **); static void rijndael128_zerokey(u_int8_t **); +static void aes_icm_zerokey(u_int8_t **); static void aes_xts_zerokey(u_int8_t **); static void cml_zerokey(u_int8_t **); +static void aes_icm_reinit(caddr_t, u_int8_t *); static void aes_xts_reinit(caddr_t, u_int8_t *); +static void aes_gcm_reinit(caddr_t, u_int8_t *); static void null_init(void *); -static int null_update(void *, u_int8_t *, u_int16_t); +static void null_reinit(void *ctx, const u_int8_t *buf, u_int16_t len); +static int null_update(void *, const u_int8_t *, u_int16_t); static void null_final(u_int8_t *, void *); -static int MD5Update_int(void *, u_int8_t *, u_int16_t); +static int MD5Update_int(void *, const u_int8_t *, u_int16_t); static void SHA1Init_int(void *); -static int SHA1Update_int(void *, u_int8_t *, u_int16_t); +static int SHA1Update_int(void *, const u_int8_t *, u_int16_t); static void SHA1Final_int(u_int8_t *, void *); -static int RMD160Update_int(void *, u_int8_t *, u_int16_t); -static int SHA256Update_int(void *, u_int8_t *, u_int16_t); -static int SHA384Update_int(void *, u_int8_t *, u_int16_t); -static int SHA512Update_int(void *, u_int8_t *, u_int16_t); +static int RMD160Update_int(void *, const u_int8_t *, u_int16_t); +static int SHA256Update_int(void *, const u_int8_t *, u_int16_t); +static int SHA384Update_int(void *, const u_int8_t *, u_int16_t); +static int SHA512Update_int(void *, const u_int8_t *, u_int16_t); static u_int32_t deflate_compress(u_int8_t *, u_int32_t, u_int8_t **); static u_int32_t deflate_decompress(u_int8_t *, u_int32_t, u_int8_t **); +#define AESICM_BLOCKSIZE AES_BLOCK_LEN + +struct aes_icm_ctx { + u_int32_t ac_ek[4*(RIJNDAEL_MAXNR + 1)]; + /* ac_block is initalized to IV */ + u_int8_t ac_block[AESICM_BLOCKSIZE]; + int ac_nr; +}; + MALLOC_DEFINE(M_XDATA, "xform", "xform data buffers"); /* Encryption instances */ struct enc_xform enc_xform_null = { CRYPTO_NULL_CBC, "NULL", /* NB: blocksize of 4 is to generate a properly aligned ESP header */ - NULL_BLOCK_LEN, 0, 256, /* 2048 bits, max key */ + NULL_BLOCK_LEN, NULL_BLOCK_LEN, NULL_MIN_KEY, NULL_MAX_KEY, null_encrypt, null_decrypt, null_setkey, null_zerokey, - NULL + NULL, }; struct enc_xform enc_xform_des = { CRYPTO_DES_CBC, "DES", - DES_BLOCK_LEN, 8, 8, + DES_BLOCK_LEN, DES_BLOCK_LEN, DES_MIN_KEY, DES_MAX_KEY, des1_encrypt, des1_decrypt, des1_setkey, des1_zerokey, - NULL + NULL, }; struct enc_xform enc_xform_3des = { CRYPTO_3DES_CBC, "3DES", - DES3_BLOCK_LEN, 24, 24, + DES3_BLOCK_LEN, DES3_BLOCK_LEN, TRIPLE_DES_MIN_KEY, + TRIPLE_DES_MAX_KEY, des3_encrypt, des3_decrypt, des3_setkey, des3_zerokey, - NULL + NULL, }; struct enc_xform enc_xform_blf = { CRYPTO_BLF_CBC, "Blowfish", - BLOWFISH_BLOCK_LEN, 5, 56 /* 448 bits, max key */, + BLOWFISH_BLOCK_LEN, BLOWFISH_BLOCK_LEN, BLOWFISH_MIN_KEY, + BLOWFISH_MAX_KEY, blf_encrypt, blf_decrypt, blf_setkey, blf_zerokey, - NULL + NULL, }; struct enc_xform enc_xform_cast5 = { CRYPTO_CAST_CBC, "CAST-128", - CAST128_BLOCK_LEN, 5, 16, + CAST128_BLOCK_LEN, CAST128_BLOCK_LEN, CAST_MIN_KEY, CAST_MAX_KEY, cast5_encrypt, cast5_decrypt, cast5_setkey, cast5_zerokey, - NULL + NULL, }; struct enc_xform enc_xform_skipjack = { CRYPTO_SKIPJACK_CBC, "Skipjack", - SKIPJACK_BLOCK_LEN, 10, 10, + SKIPJACK_BLOCK_LEN, SKIPJACK_BLOCK_LEN, SKIPJACK_MIN_KEY, + SKIPJACK_MAX_KEY, skipjack_encrypt, - skipjack_decrypt, - skipjack_setkey, + skipjack_decrypt, skipjack_setkey, skipjack_zerokey, - NULL + NULL, }; struct enc_xform enc_xform_rijndael128 = { CRYPTO_RIJNDAEL128_CBC, "Rijndael-128/AES", - RIJNDAEL128_BLOCK_LEN, 8, 32, + RIJNDAEL128_BLOCK_LEN, RIJNDAEL128_BLOCK_LEN, RIJNDAEL_MIN_KEY, + RIJNDAEL_MAX_KEY, rijndael128_encrypt, rijndael128_decrypt, rijndael128_setkey, rijndael128_zerokey, - NULL + NULL, +}; + +struct enc_xform enc_xform_aes_icm = { + CRYPTO_AES_ICM, "AES-ICM", + AES_BLOCK_LEN, AES_BLOCK_LEN, AES_MIN_KEY, AES_MAX_KEY, + aes_icm_crypt, + aes_icm_crypt, + aes_icm_setkey, + rijndael128_zerokey, + aes_icm_reinit, +}; + +struct enc_xform enc_xform_aes_nist_gcm = { + CRYPTO_AES_NIST_GCM_16, "AES-GCM", + AES_ICM_BLOCK_LEN, AES_GCM_IV_LEN, AES_MIN_KEY, AES_MAX_KEY, + aes_icm_crypt, + aes_icm_crypt, + aes_icm_setkey, + aes_icm_zerokey, + aes_gcm_reinit, +}; + +struct enc_xform enc_xform_aes_nist_gmac = { + CRYPTO_AES_NIST_GMAC, "AES-GMAC", + AES_ICM_BLOCK_LEN, AES_GCM_IV_LEN, AES_MIN_KEY, AES_MAX_KEY, + NULL, + NULL, + NULL, + NULL, + NULL, }; struct enc_xform enc_xform_aes_xts = { CRYPTO_AES_XTS, "AES-XTS", - RIJNDAEL128_BLOCK_LEN, 32, 64, + AES_BLOCK_LEN, AES_XTS_IV_LEN, AES_XTS_MIN_KEY, AES_XTS_MAX_KEY, aes_xts_encrypt, aes_xts_decrypt, aes_xts_setkey, @@ -212,85 +267,123 @@ struct enc_xform enc_xform_aes_xts = { struct enc_xform enc_xform_arc4 = { CRYPTO_ARC4, "ARC4", - 1, 1, 32, + ARC4_BLOCK_LEN, ARC4_IV_LEN, ARC4_MIN_KEY, ARC4_MAX_KEY, + NULL, NULL, NULL, NULL, NULL, - NULL }; struct enc_xform enc_xform_camellia = { CRYPTO_CAMELLIA_CBC, "Camellia", - CAMELLIA_BLOCK_LEN, 8, 32, + CAMELLIA_BLOCK_LEN, CAMELLIA_BLOCK_LEN, CAMELLIA_MIN_KEY, + CAMELLIA_MAX_KEY, cml_encrypt, cml_decrypt, cml_setkey, cml_zerokey, - NULL + NULL, }; /* Authentication instances */ -struct auth_hash auth_hash_null = { +struct auth_hash auth_hash_null = { /* NB: context isn't used */ CRYPTO_NULL_HMAC, "NULL-HMAC", - 0, NULL_HASH_LEN, NULL_HMAC_BLOCK_LEN, sizeof(int), /* NB: context isn't used */ - null_init, null_update, null_final + NULL_HMAC_KEY_LEN, NULL_HASH_LEN, sizeof(int), NULL_HMAC_BLOCK_LEN, + null_init, null_reinit, null_reinit, null_update, null_final }; struct auth_hash auth_hash_hmac_md5 = { CRYPTO_MD5_HMAC, "HMAC-MD5", - 16, MD5_HASH_LEN, MD5_HMAC_BLOCK_LEN, sizeof(MD5_CTX), - (void (*) (void *)) MD5Init, MD5Update_int, + MD5_HMAC_KEY_LEN, MD5_HASH_LEN, sizeof(MD5_CTX), MD5_HMAC_BLOCK_LEN, + (void (*) (void *)) MD5Init, NULL, NULL, MD5Update_int, (void (*) (u_int8_t *, void *)) MD5Final }; struct auth_hash auth_hash_hmac_sha1 = { CRYPTO_SHA1_HMAC, "HMAC-SHA1", - 20, SHA1_HASH_LEN, SHA1_HMAC_BLOCK_LEN, sizeof(SHA1_CTX), - SHA1Init_int, SHA1Update_int, SHA1Final_int + SHA1_HMAC_KEY_LEN, SHA1_HASH_LEN, sizeof(SHA1_CTX), SHA1_HMAC_BLOCK_LEN, + SHA1Init_int, NULL, NULL, SHA1Update_int, SHA1Final_int }; struct auth_hash auth_hash_hmac_ripemd_160 = { CRYPTO_RIPEMD160_HMAC, "HMAC-RIPEMD-160", - 20, RIPEMD160_HASH_LEN, RIPEMD160_HMAC_BLOCK_LEN, sizeof(RMD160_CTX), - (void (*)(void *)) RMD160Init, RMD160Update_int, + RIPEMD160_HMAC_KEY_LEN, RIPEMD160_HASH_LEN, sizeof(RMD160_CTX), + RIPEMD160_HMAC_BLOCK_LEN, + (void (*)(void *)) RMD160Init, NULL, NULL, RMD160Update_int, (void (*)(u_int8_t *, void *)) RMD160Final }; struct auth_hash auth_hash_key_md5 = { CRYPTO_MD5_KPDK, "Keyed MD5", - 0, MD5_KPDK_HASH_LEN, 0, sizeof(MD5_CTX), - (void (*)(void *)) MD5Init, MD5Update_int, + NULL_HMAC_KEY_LEN, MD5_KPDK_HASH_LEN, sizeof(MD5_CTX), 0, + (void (*)(void *)) MD5Init, NULL, NULL, MD5Update_int, (void (*)(u_int8_t *, void *)) MD5Final }; struct auth_hash auth_hash_key_sha1 = { CRYPTO_SHA1_KPDK, "Keyed SHA1", - 0, SHA1_KPDK_HASH_LEN, 0, sizeof(SHA1_CTX), - SHA1Init_int, SHA1Update_int, SHA1Final_int + NULL_HMAC_KEY_LEN, SHA1_KPDK_HASH_LEN, sizeof(SHA1_CTX), 0, + SHA1Init_int, NULL, NULL, SHA1Update_int, SHA1Final_int }; struct auth_hash auth_hash_hmac_sha2_256 = { CRYPTO_SHA2_256_HMAC, "HMAC-SHA2-256", - 32, SHA2_256_HASH_LEN, SHA2_256_HMAC_BLOCK_LEN, sizeof(SHA256_CTX), - (void (*)(void *)) SHA256_Init, SHA256Update_int, + SHA2_256_HMAC_KEY_LEN, SHA2_256_HASH_LEN, sizeof(SHA256_CTX), + SHA2_256_HMAC_BLOCK_LEN, + (void (*)(void *)) SHA256_Init, NULL, NULL, SHA256Update_int, (void (*)(u_int8_t *, void *)) SHA256_Final }; struct auth_hash auth_hash_hmac_sha2_384 = { CRYPTO_SHA2_384_HMAC, "HMAC-SHA2-384", - 48, SHA2_384_HASH_LEN, SHA2_384_HMAC_BLOCK_LEN, sizeof(SHA384_CTX), - (void (*)(void *)) SHA384_Init, SHA384Update_int, + SHA2_384_HMAC_KEY_LEN, SHA2_384_HASH_LEN, sizeof(SHA384_CTX), + SHA2_384_HMAC_BLOCK_LEN, + (void (*)(void *)) SHA384_Init, NULL, NULL, SHA384Update_int, (void (*)(u_int8_t *, void *)) SHA384_Final }; struct auth_hash auth_hash_hmac_sha2_512 = { CRYPTO_SHA2_512_HMAC, "HMAC-SHA2-512", - 64, SHA2_512_HASH_LEN, SHA2_512_HMAC_BLOCK_LEN, sizeof(SHA512_CTX), - (void (*)(void *)) SHA512_Init, SHA512Update_int, + SHA2_512_HMAC_KEY_LEN, SHA2_512_HASH_LEN, sizeof(SHA512_CTX), + SHA2_512_HMAC_BLOCK_LEN, + (void (*)(void *)) SHA512_Init, NULL, NULL, SHA512Update_int, (void (*)(u_int8_t *, void *)) SHA512_Final }; +struct auth_hash auth_hash_nist_gmac_aes_128 = { + CRYPTO_AES_128_NIST_GMAC, "GMAC-AES-128", + AES_128_GMAC_KEY_LEN, AES_GMAC_HASH_LEN, sizeof(struct aes_gmac_ctx), + GMAC_BLOCK_LEN, + (void (*)(void *)) AES_GMAC_Init, + (void (*)(void *, const u_int8_t *, u_int16_t)) AES_GMAC_Setkey, + (void (*)(void *, const u_int8_t *, u_int16_t)) AES_GMAC_Reinit, + (int (*)(void *, const u_int8_t *, u_int16_t)) AES_GMAC_Update, + (void (*)(u_int8_t *, void *)) AES_GMAC_Final +}; + +struct auth_hash auth_hash_nist_gmac_aes_192 = { + CRYPTO_AES_192_NIST_GMAC, "GMAC-AES-192", + AES_192_GMAC_KEY_LEN, AES_GMAC_HASH_LEN, sizeof(struct aes_gmac_ctx), + GMAC_BLOCK_LEN, + (void (*)(void *)) AES_GMAC_Init, + (void (*)(void *, const u_int8_t *, u_int16_t)) AES_GMAC_Setkey, + (void (*)(void *, const u_int8_t *, u_int16_t)) AES_GMAC_Reinit, + (int (*)(void *, const u_int8_t *, u_int16_t)) AES_GMAC_Update, + (void (*)(u_int8_t *, void *)) AES_GMAC_Final +}; + +struct auth_hash auth_hash_nist_gmac_aes_256 = { + CRYPTO_AES_256_NIST_GMAC, "GMAC-AES-256", + AES_256_GMAC_KEY_LEN, AES_GMAC_HASH_LEN, sizeof(struct aes_gmac_ctx), + GMAC_BLOCK_LEN, + (void (*)(void *)) AES_GMAC_Init, + (void (*)(void *, const u_int8_t *, u_int16_t)) AES_GMAC_Setkey, + (void (*)(void *, const u_int8_t *, u_int16_t)) AES_GMAC_Reinit, + (int (*)(void *, const u_int8_t *, u_int16_t)) AES_GMAC_Update, + (void (*)(u_int8_t *, void *)) AES_GMAC_Final +}; + /* Compression instance */ struct comp_algo comp_algo_deflate = { CRYPTO_DEFLATE_COMP, "Deflate", @@ -579,6 +672,75 @@ rijndael128_zerokey(u_int8_t **sched) *sched = NULL; } +void +aes_icm_reinit(caddr_t key, u_int8_t *iv) +{ + struct aes_icm_ctx *ctx; + + ctx = (struct aes_icm_ctx *)key; + bcopy(iv, ctx->ac_block, AESICM_BLOCKSIZE); +} + +void +aes_gcm_reinit(caddr_t key, u_int8_t *iv) +{ + struct aes_icm_ctx *ctx; + + aes_icm_reinit(key, iv); + + ctx = (struct aes_icm_ctx *)key; + /* GCM starts with 2 as counter 1 is used for final xor of tag. */ + bzero(&ctx->ac_block[AESICM_BLOCKSIZE - 4], 4); + ctx->ac_block[AESICM_BLOCKSIZE - 1] = 2; +} + +void +aes_icm_crypt(caddr_t key, u_int8_t *data) +{ + struct aes_icm_ctx *ctx; + u_int8_t keystream[AESICM_BLOCKSIZE]; + int i; + + ctx = (struct aes_icm_ctx *)key; + rijndaelEncrypt(ctx->ac_ek, ctx->ac_nr, ctx->ac_block, keystream); + for (i = 0; i < AESICM_BLOCKSIZE; i++) + data[i] ^= keystream[i]; + explicit_bzero(keystream, sizeof(keystream)); + + /* increment counter */ + for (i = AESICM_BLOCKSIZE - 1; + i >= 0; i--) + if (++ctx->ac_block[i]) /* continue on overflow */ + break; +} + +int +aes_icm_setkey(u_int8_t **sched, u_int8_t *key, int len) +{ + struct aes_icm_ctx *ctx; + + if (len != 16 && len != 24 && len != 32) + return EINVAL; + + *sched = malloc(sizeof(struct aes_icm_ctx), M_CRYPTO_DATA, + M_NOWAIT | M_ZERO); + if (*sched == NULL) + return ENOMEM; + + ctx = (struct aes_icm_ctx *)*sched; + ctx->ac_nr = rijndaelKeySetupEnc(ctx->ac_ek, (u_char *)key, len * 8); + return 0; +} + +void +aes_icm_zerokey(u_int8_t **sched) +{ + + bzero(*sched, sizeof(struct aes_icm_ctx)); + free(*sched, M_CRYPTO_DATA); + *sched = NULL; +} + #define AES_XTS_BLOCKSIZE 16 #define AES_XTS_IVSIZE 8 #define AES_XTS_ALPHA 0x87 /* GF(2^128) generator polynomial */ @@ -728,8 +890,13 @@ null_init(void *ctx) { } +static void +null_reinit(void *ctx, const u_int8_t *buf, u_int16_t len) +{ +} + static int -null_update(void *ctx, u_int8_t *buf, u_int16_t len) +null_update(void *ctx, const u_int8_t *buf, u_int16_t len) { return 0; } @@ -742,14 +909,14 @@ null_final(u_int8_t *buf, void *ctx) } static int -RMD160Update_int(void *ctx, u_int8_t *buf, u_int16_t len) +RMD160Update_int(void *ctx, const u_int8_t *buf, u_int16_t len) { RMD160Update(ctx, buf, len); return 0; } static int -MD5Update_int(void *ctx, u_int8_t *buf, u_int16_t len) +MD5Update_int(void *ctx, const u_int8_t *buf, u_int16_t len) { MD5Update(ctx, buf, len); return 0; @@ -762,7 +929,7 @@ SHA1Init_int(void *ctx) } static int -SHA1Update_int(void *ctx, u_int8_t *buf, u_int16_t len) +SHA1Update_int(void *ctx, const u_int8_t *buf, u_int16_t len) { SHA1Update(ctx, buf, len); return 0; @@ -775,21 +942,21 @@ SHA1Final_int(u_int8_t *blk, void *ctx) } static int -SHA256Update_int(void *ctx, u_int8_t *buf, u_int16_t len) +SHA256Update_int(void *ctx, const u_int8_t *buf, u_int16_t len) { SHA256_Update(ctx, buf, len); return 0; } static int -SHA384Update_int(void *ctx, u_int8_t *buf, u_int16_t len) +SHA384Update_int(void *ctx, const u_int8_t *buf, u_int16_t len) { SHA384_Update(ctx, buf, len); return 0; } static int -SHA512Update_int(void *ctx, u_int8_t *buf, u_int16_t len) +SHA512Update_int(void *ctx, const u_int8_t *buf, u_int16_t len) { SHA512_Update(ctx, buf, len); return 0; diff --git a/sys/opencrypto/xform.h b/sys/opencrypto/xform.h index 8df7b07..e0b6397 100644 --- a/sys/opencrypto/xform.h +++ b/sys/opencrypto/xform.h @@ -9,6 +9,12 @@ * supported the development of this code. * * Copyright (c) 2000 Angelos D. Keromytis + * Copyright (c) 2014 The FreeBSD Foundation + * All rights reserved. + * + * Portions of this software were developed by John-Mark Gurney + * under sponsorship of the FreeBSD Foundation and + * Rubicon Communications, LLC (Netgate). * * Permission to use, copy, and modify this software without fee * is hereby granted, provided that this entire notice is included in @@ -29,6 +35,7 @@ #include <crypto/sha1.h> #include <crypto/sha2/sha2.h> #include <opencrypto/rmd160.h> +#include <opencrypto/gmac.h> /* Declarations */ struct auth_hash { @@ -36,10 +43,12 @@ struct auth_hash { char *name; u_int16_t keysize; u_int16_t hashsize; - u_int16_t blocksize; u_int16_t ctxsize; + u_int16_t blocksize; void (*Init) (void *); - int (*Update) (void *, u_int8_t *, u_int16_t); + void (*Setkey) (void *, const u_int8_t *, u_int16_t); + void (*Reinit) (void *, const u_int8_t *, u_int16_t); + int (*Update) (void *, const u_int8_t *, u_int16_t); void (*Final) (u_int8_t *, void *); }; @@ -50,6 +59,7 @@ struct enc_xform { int type; char *name; u_int16_t blocksize; + u_int16_t ivsize; u_int16_t minkey, maxkey; void (*encrypt) (caddr_t, u_int8_t *); void (*decrypt) (caddr_t, u_int8_t *); @@ -73,6 +83,7 @@ union authctx { SHA256_CTX sha256ctx; SHA384_CTX sha384ctx; SHA512_CTX sha512ctx; + struct aes_gmac_ctx aes_gmac_ctx; }; extern struct enc_xform enc_xform_null; @@ -82,6 +93,9 @@ extern struct enc_xform enc_xform_blf; extern struct enc_xform enc_xform_cast5; extern struct enc_xform enc_xform_skipjack; extern struct enc_xform enc_xform_rijndael128; +extern struct enc_xform enc_xform_aes_icm; +extern struct enc_xform enc_xform_aes_nist_gcm; +extern struct enc_xform enc_xform_aes_nist_gmac; extern struct enc_xform enc_xform_aes_xts; extern struct enc_xform enc_xform_arc4; extern struct enc_xform enc_xform_camellia; @@ -95,6 +109,9 @@ extern struct auth_hash auth_hash_hmac_ripemd_160; extern struct auth_hash auth_hash_hmac_sha2_256; extern struct auth_hash auth_hash_hmac_sha2_384; extern struct auth_hash auth_hash_hmac_sha2_512; +extern struct auth_hash auth_hash_nist_gmac_aes_128; +extern struct auth_hash auth_hash_nist_gmac_aes_192; +extern struct auth_hash auth_hash_nist_gmac_aes_256; extern struct comp_algo comp_algo_deflate; diff --git a/sys/sys/libkern.h b/sys/sys/libkern.h index 0294537..5faebb1 100644 --- a/sys/sys/libkern.h +++ b/sys/sys/libkern.h @@ -80,6 +80,7 @@ struct malloc_type; uint32_t arc4random(void); void arc4rand(void *ptr, u_int len, int reseed); int bcmp(const void *, const void *, size_t); +int timingsafe_bcmp(const void *, const void *, size_t); void *bsearch(const void *, const void *, size_t, size_t, int (*)(const void *, const void *)); #ifndef HAVE_INLINE_FFS diff --git a/sys/sys/priv.h b/sys/sys/priv.h index 78a8e3e..7347149 100644 --- a/sys/sys/priv.h +++ b/sys/sys/priv.h @@ -341,6 +341,7 @@ #define PRIV_NET_SETIFDESCR 418 /* Set interface description. */ #define PRIV_NET_SETIFFIB 419 /* Set interface fib. */ #define PRIV_NET_VXLAN 420 /* Administer vxlan. */ +#define PRIV_NET_SETVLANPCP 421 /* Set VLAN priority. */ /* * 802.11-related privileges. diff --git a/sys/sys/sockio.h b/sys/sys/sockio.h index e76720f..0ad221b 100644 --- a/sys/sys/sockio.h +++ b/sys/sys/sockio.h @@ -132,5 +132,6 @@ #define SIOCDIFGROUP _IOW('i', 137, struct ifgroupreq) /* delete ifgroup */ #define SIOCGIFGMEMB _IOWR('i', 138, struct ifgroupreq) /* get members */ #define SIOCGIFXMEDIA _IOWR('i', 139, struct ifmediareq) /* get net xmedia */ +#define SIOCORDERIFADDR _IOWR('i', 160, struct ifaliasreq) /* reorder interface */ #endif /* !_SYS_SOCKIO_H_ */ diff --git a/sys/sys/systm.h b/sys/sys/systm.h index cb83ba4..83e4f12 100644 --- a/sys/sys/systm.h +++ b/sys/sys/systm.h @@ -238,6 +238,7 @@ void hexdump(const void *ptr, int length, const char *hdr, int flags); #define ovbcopy(f, t, l) bcopy((f), (t), (l)) void bcopy(const void *from, void *to, size_t len) __nonnull(1) __nonnull(2); void bzero(void *buf, size_t len) __nonnull(1); +void explicit_bzero(void *, size_t) __nonnull(1);; void *memcpy(void *to, const void *from, size_t len) __nonnull(1) __nonnull(2); void *memmove(void *dest, const void *src, size_t n) __nonnull(1) __nonnull(2); diff --git a/tests/sys/Makefile b/tests/sys/Makefile index f69012b..4e653be 100644 --- a/tests/sys/Makefile +++ b/tests/sys/Makefile @@ -13,6 +13,7 @@ TESTS_SUBDIRS+= kqueue TESTS_SUBDIRS+= mac TESTS_SUBDIRS+= mqueue TESTS_SUBDIRS+= netinet +TESTS_SUBDIRS+= opencrypto TESTS_SUBDIRS+= posixshm TESTS_SUBDIRS+= vfs TESTS_SUBDIRS+= vm diff --git a/tests/sys/opencrypto/Makefile b/tests/sys/opencrypto/Makefile new file mode 100644 index 0000000..1696920 --- /dev/null +++ b/tests/sys/opencrypto/Makefile @@ -0,0 +1,14 @@ +# $FreeBSD$ + +TESTSDIR= ${TESTSBASE}/sys/opencrypto +BINDIR= ${TESTSDIR} + +PLAIN_TESTS_SH= runtests + +TEST_METADATA.foo+=required_programs="python" +PYMODULES= cryptodev.py cryptodevh.py cryptotest.py dpkt.py + +FILESDIR= ${TESTSDIR} +FILES= ${PYMODULES} + +.include <bsd.test.mk> diff --git a/tests/sys/opencrypto/cryptodev.py b/tests/sys/opencrypto/cryptodev.py new file mode 100644 index 0000000..d9f764a --- /dev/null +++ b/tests/sys/opencrypto/cryptodev.py @@ -0,0 +1,561 @@ +#!/usr/bin/env python +# +# Copyright (c) 2014 The FreeBSD Foundation +# Copyright 2014 John-Mark Gurney +# All rights reserved. +# +# This software was developed by John-Mark Gurney under +# the sponsorship from the FreeBSD Foundation. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +# SUCH DAMAGE. +# +# $FreeBSD$ +# + +import array +import dpkt +from fcntl import ioctl +import os +import signal +from struct import pack as _pack + +from cryptodevh import * + +__all__ = [ 'Crypto', 'MismatchError', ] + +class FindOp(dpkt.Packet): + __byte_order__ = '@' + __hdr__ = ( ('crid', 'i', 0), + ('name', '32s', 0), + ) + +class SessionOp(dpkt.Packet): + __byte_order__ = '@' + __hdr__ = ( ('cipher', 'I', 0), + ('mac', 'I', 0), + ('keylen', 'I', 0), + ('key', 'P', 0), + ('mackeylen', 'i', 0), + ('mackey', 'P', 0), + ('ses', 'I', 0), + ) + +class SessionOp2(dpkt.Packet): + __byte_order__ = '@' + __hdr__ = ( ('cipher', 'I', 0), + ('mac', 'I', 0), + ('keylen', 'I', 0), + ('key', 'P', 0), + ('mackeylen', 'i', 0), + ('mackey', 'P', 0), + ('ses', 'I', 0), + ('crid', 'i', 0), + ('pad0', 'i', 0), + ('pad1', 'i', 0), + ('pad2', 'i', 0), + ('pad3', 'i', 0), + ) + +class CryptOp(dpkt.Packet): + __byte_order__ = '@' + __hdr__ = ( ('ses', 'I', 0), + ('op', 'H', 0), + ('flags', 'H', 0), + ('len', 'I', 0), + ('src', 'P', 0), + ('dst', 'P', 0), + ('mac', 'P', 0), + ('iv', 'P', 0), + ) + +class CryptAEAD(dpkt.Packet): + __byte_order__ = '@' + __hdr__ = ( + ('ses', 'I', 0), + ('op', 'H', 0), + ('flags', 'H', 0), + ('len', 'I', 0), + ('aadlen', 'I', 0), + ('ivlen', 'I', 0), + ('src', 'P', 0), + ('dst', 'P', 0), + ('aad', 'P', 0), + ('tag', 'P', 0), + ('iv', 'P', 0), + ) + +# h2py.py can't handle multiarg macros +CRIOGET = 3221513060 +CIOCGSESSION = 3224396645 +CIOCGSESSION2 = 3225445226 +CIOCFSESSION = 2147771238 +CIOCCRYPT = 3224396647 +CIOCKEY = 3230688104 +CIOCASYMFEAT = 1074029417 +CIOCKEY2 = 3230688107 +CIOCFINDDEV = 3223610220 +CIOCCRYPTAEAD = 3225445229 + +def _getdev(): + fd = os.open('/dev/crypto', os.O_RDWR) + buf = array.array('I', [0]) + ioctl(fd, CRIOGET, buf, 1) + os.close(fd) + + return buf[0] + +_cryptodev = _getdev() + +def _findop(crid, name): + fop = FindOp() + fop.crid = crid + fop.name = name + s = array.array('B', fop.pack_hdr()) + ioctl(_cryptodev, CIOCFINDDEV, s, 1) + fop.unpack(s) + + try: + idx = fop.name.index('\x00') + name = fop.name[:idx] + except ValueError: + name = fop.name + + return fop.crid, name + +class Crypto: + @staticmethod + def findcrid(name): + return _findop(-1, name)[0] + + @staticmethod + def getcridname(crid): + return _findop(crid, '')[1] + + def __init__(self, cipher=0, key=None, mac=0, mackey=None, + crid=CRYPTOCAP_F_SOFTWARE | CRYPTOCAP_F_HARDWARE): + self._ses = None + ses = SessionOp2() + ses.cipher = cipher + ses.mac = mac + + if key is not None: + ses.keylen = len(key) + k = array.array('B', key) + ses.key = k.buffer_info()[0] + else: + self.key = None + + if mackey is not None: + ses.mackeylen = len(mackey) + mk = array.array('B', mackey) + ses.mackey = mk.buffer_info()[0] + self._maclen = 16 # parameterize? + else: + self._maclen = None + + if not cipher and not mac: + raise ValueError('one of cipher or mac MUST be specified.') + ses.crid = CRYPTOCAP_F_SOFTWARE | CRYPTOCAP_F_HARDWARE + #ses.crid = CRYPTOCAP_F_HARDWARE + #ses.crid = CRYPTOCAP_F_SOFTWARE + #ses.crid = 0 + #print `ses` + s = array.array('B', ses.pack_hdr()) + #print `s` + ioctl(_cryptodev, CIOCGSESSION2, s, 1) + ses.unpack(s) + + self._ses = ses.ses + + def __del__(self): + if self._ses is None: + return + + try: + ioctl(_cryptodev, CIOCFSESSION, _pack('I', self._ses)) + except TypeError: + pass + self._ses = None + + def _doop(self, op, src, iv): + cop = CryptOp() + cop.ses = self._ses + cop.op = op + cop.flags = 0 + cop.len = len(src) + s = array.array('B', src) + cop.src = cop.dst = s.buffer_info()[0] + if self._maclen is not None: + m = array.array('B', [0] * self._maclen) + cop.mac = m.buffer_info()[0] + ivbuf = array.array('B', iv) + cop.iv = ivbuf.buffer_info()[0] + + #print 'cop:', `cop` + ioctl(_cryptodev, CIOCCRYPT, str(cop)) + + s = s.tostring() + if self._maclen is not None: + return s, m.tostring() + + return s + + def _doaead(self, op, src, aad, iv, tag=None): + caead = CryptAEAD() + caead.ses = self._ses + caead.op = op + caead.flags = CRD_F_IV_EXPLICIT + caead.flags = 0 + caead.len = len(src) + s = array.array('B', src) + caead.src = caead.dst = s.buffer_info()[0] + caead.aadlen = len(aad) + saad = array.array('B', aad) + caead.aad = saad.buffer_info()[0] + + if self._maclen is None: + raise ValueError('must have a tag length') + + if tag is None: + tag = array.array('B', [0] * self._maclen) + else: + assert len(tag) == self._maclen, `len(tag), self._maclen` + tag = array.array('B', tag) + + caead.tag = tag.buffer_info()[0] + + ivbuf = array.array('B', iv) + caead.ivlen = len(iv) + caead.iv = ivbuf.buffer_info()[0] + + ioctl(_cryptodev, CIOCCRYPTAEAD, str(caead)) + + s = s.tostring() + + return s, tag.tostring() + + def perftest(self, op, size, timeo=3): + import random + import time + + inp = array.array('B', (random.randint(0, 255) for x in xrange(size))) + out = array.array('B', inp) + + # prep ioctl + cop = CryptOp() + cop.ses = self._ses + cop.op = op + cop.flags = 0 + cop.len = len(inp) + s = array.array('B', inp) + cop.src = s.buffer_info()[0] + cop.dst = out.buffer_info()[0] + if self._maclen is not None: + m = array.array('B', [0] * self._maclen) + cop.mac = m.buffer_info()[0] + ivbuf = array.array('B', (random.randint(0, 255) for x in xrange(16))) + cop.iv = ivbuf.buffer_info()[0] + + exit = [ False ] + def alarmhandle(a, b, exit=exit): + exit[0] = True + + oldalarm = signal.signal(signal.SIGALRM, alarmhandle) + signal.alarm(timeo) + + start = time.time() + reps = 0 + while not exit[0]: + ioctl(_cryptodev, CIOCCRYPT, str(cop)) + reps += 1 + + end = time.time() + + signal.signal(signal.SIGALRM, oldalarm) + + print 'time:', end - start + print 'perf MB/sec:', (reps * size) / (end - start) / 1024 / 1024 + + def encrypt(self, data, iv, aad=None): + if aad is None: + return self._doop(COP_ENCRYPT, data, iv) + else: + return self._doaead(COP_ENCRYPT, data, aad, + iv) + + def decrypt(self, data, iv, aad=None, tag=None): + if aad is None: + return self._doop(COP_DECRYPT, data, iv) + else: + return self._doaead(COP_DECRYPT, data, aad, + iv, tag=tag) + +class MismatchError(Exception): + pass + +class KATParser: + def __init__(self, fname, fields): + self.fp = open(fname) + self.fields = set(fields) + self._pending = None + + def __iter__(self): + while True: + didread = False + if self._pending is not None: + i = self._pending + self._pending = None + else: + i = self.fp.readline() + didread = True + + if didread and not i: + return + + if (i and i[0] == '#') or not i.strip(): + continue + if i[0] == '[': + yield i[1:].split(']', 1)[0], self.fielditer() + else: + raise ValueError('unknown line: %s' % `i`) + + def eatblanks(self): + while True: + line = self.fp.readline() + if line == '': + break + + line = line.strip() + if line: + break + + return line + + def fielditer(self): + while True: + values = {} + + line = self.eatblanks() + if not line or line[0] == '[': + self._pending = line + return + + while True: + try: + f, v = line.split(' =') + except: + if line == 'FAIL': + f, v = 'FAIL', '' + else: + print 'line:', `line` + raise + v = v.strip() + + if f in values: + raise ValueError('already present: %s' % `f`) + values[f] = v + line = self.fp.readline().strip() + if not line: + break + + # we should have everything + remain = self.fields.copy() - set(values.keys()) + # XXX - special case GCM decrypt + if remain and not ('FAIL' in values and 'PT' in remain): + raise ValueError('not all fields found: %s' % `remain`) + + yield values + +def _spdechex(s): + return ''.join(s.split()).decode('hex') + +if __name__ == '__main__': + if True: + try: + crid = Crypto.findcrid('aesni0') + print 'aesni:', crid + except IOError: + print 'aesni0 not found' + + for i in xrange(10): + try: + name = Crypto.getcridname(i) + print '%2d: %s' % (i, `name`) + except IOError: + pass + elif False: + kp = KATParser('/usr/home/jmg/aesni.testing/format tweak value input - data unit seq no/XTSGenAES128.rsp', [ 'COUNT', 'DataUnitLen', 'Key', 'DataUnitSeqNumber', 'PT', 'CT' ]) + for mode, ni in kp: + print `i`, `ni` + for j in ni: + print `j` + elif False: + key = _spdechex('c939cc13397c1d37de6ae0e1cb7c423c') + iv = _spdechex('00000000000000000000000000000001') + pt = _spdechex('ab3cabed693a32946055524052afe3c9cb49664f09fc8b7da824d924006b7496353b8c1657c5dec564d8f38d7432e1de35aae9d95590e66278d4acce883e51abaf94977fcd3679660109a92bf7b2973ccd547f065ec6cee4cb4a72a5e9f45e615d920d76cb34cba482467b3e21422a7242e7d931330c0fbf465c3a3a46fae943029fd899626dda542750a1eee253df323c6ef1573f1c8c156613e2ea0a6cdbf2ae9701020be2d6a83ecb7f3f9d8e') + #pt = _spdechex('00000000000000000000000000000000') + ct = _spdechex('f42c33853ecc5ce2949865fdb83de3bff1089e9360c94f830baebfaff72836ab5236f77212f1e7396c8c54ac73d81986375a6e9e299cfeca5ba051ed25e8d1affa5beaf6c1d2b45e90802408f2ced21663497e906de5f29341e5e52ddfea5363d628b3eb7806835e17bae051b3a6da3f8e2941fe44384eac17a9d298d2c331ca8320c775b5d53263a5e905059d891b21dede2d8110fd427c7bd5a9a274ddb47b1945ee79522203b6e297d0e399ef') + + c = Crypto(CRYPTO_AES_ICM, key) + enc = c.encrypt(pt, iv) + + print 'enc:', enc.encode('hex') + print ' ct:', ct.encode('hex') + + assert ct == enc + + dec = c.decrypt(ct, iv) + + print 'dec:', dec.encode('hex') + print ' pt:', pt.encode('hex') + + assert pt == dec + elif False: + key = _spdechex('c939cc13397c1d37de6ae0e1cb7c423c') + iv = _spdechex('00000000000000000000000000000001') + pt = _spdechex('ab3cabed693a32946055524052afe3c9cb49664f09fc8b7da824d924006b7496353b8c1657c5dec564d8f38d7432e1de35aae9d95590e66278d4acce883e51abaf94977fcd3679660109a92bf7b2973ccd547f065ec6cee4cb4a72a5e9f45e615d920d76cb34cba482467b3e21422a7242e7d931330c0fbf465c3a3a46fae943029fd899626dda542750a1eee253df323c6ef1573f1c8c156613e2ea0a6cdbf2ae9701020be2d6a83ecb7f3f9d8e0a3f') + #pt = _spdechex('00000000000000000000000000000000') + ct = _spdechex('f42c33853ecc5ce2949865fdb83de3bff1089e9360c94f830baebfaff72836ab5236f77212f1e7396c8c54ac73d81986375a6e9e299cfeca5ba051ed25e8d1affa5beaf6c1d2b45e90802408f2ced21663497e906de5f29341e5e52ddfea5363d628b3eb7806835e17bae051b3a6da3f8e2941fe44384eac17a9d298d2c331ca8320c775b5d53263a5e905059d891b21dede2d8110fd427c7bd5a9a274ddb47b1945ee79522203b6e297d0e399ef3768') + + c = Crypto(CRYPTO_AES_ICM, key) + enc = c.encrypt(pt, iv) + + print 'enc:', enc.encode('hex') + print ' ct:', ct.encode('hex') + + assert ct == enc + + dec = c.decrypt(ct, iv) + + print 'dec:', dec.encode('hex') + print ' pt:', pt.encode('hex') + + assert pt == dec + elif False: + key = _spdechex('c939cc13397c1d37de6ae0e1cb7c423c') + iv = _spdechex('6eba2716ec0bd6fa5cdef5e6d3a795bc') + pt = _spdechex('ab3cabed693a32946055524052afe3c9cb49664f09fc8b7da824d924006b7496353b8c1657c5dec564d8f38d7432e1de35aae9d95590e66278d4acce883e51abaf94977fcd3679660109a92bf7b2973ccd547f065ec6cee4cb4a72a5e9f45e615d920d76cb34cba482467b3e21422a7242e7d931330c0fbf465c3a3a46fae943029fd899626dda542750a1eee253df323c6ef1573f1c8c156613e2ea0a6cdbf2ae9701020be2d6a83ecb7f3f9d8e0a3f') + ct = _spdechex('f1f81f12e72e992dbdc304032705dc75dc3e4180eff8ee4819906af6aee876d5b00b7c36d282a445ce3620327be481e8e53a8e5a8e5ca9abfeb2281be88d12ffa8f46d958d8224738c1f7eea48bda03edbf9adeb900985f4fa25648b406d13a886c25e70cfdecdde0ad0f2991420eb48a61c64fd797237cf2798c2675b9bb744360b0a3f329ac53bbceb4e3e7456e6514f1a9d2f06c236c31d0f080b79c15dce1096357416602520daa098b17d1af427') + c = Crypto(CRYPTO_AES_CBC, key) + + enc = c.encrypt(pt, iv) + + print 'enc:', enc.encode('hex') + print ' ct:', ct.encode('hex') + + assert ct == enc + + dec = c.decrypt(ct, iv) + + print 'dec:', dec.encode('hex') + print ' pt:', pt.encode('hex') + + assert pt == dec + elif False: + key = _spdechex('c939cc13397c1d37de6ae0e1cb7c423c') + iv = _spdechex('b3d8cc017cbb89b39e0f67e2') + pt = _spdechex('c3b3c41f113a31b73d9a5cd4321030') + aad = _spdechex('24825602bd12a984e0092d3e448eda5f') + ct = _spdechex('93fe7d9e9bfd10348a5606e5cafa7354') + ct = _spdechex('93fe7d9e9bfd10348a5606e5cafa73') + tag = _spdechex('0032a1dc85f1c9786925a2e71d8272dd') + tag = _spdechex('8d11a0929cb3fbe1fef01a4a38d5f8ea') + + c = Crypto(CRYPTO_AES_NIST_GCM_16, key, + mac=CRYPTO_AES_128_NIST_GMAC, mackey=key) + + enc, enctag = c.encrypt(pt, iv, aad=aad) + + print 'enc:', enc.encode('hex') + print ' ct:', ct.encode('hex') + + assert enc == ct + + print 'etg:', enctag.encode('hex') + print 'tag:', tag.encode('hex') + assert enctag == tag + + # Make sure we get EBADMSG + #enctag = enctag[:-1] + 'a' + dec, dectag = c.decrypt(ct, iv, aad=aad, tag=enctag) + + print 'dec:', dec.encode('hex') + print ' pt:', pt.encode('hex') + + assert dec == pt + + print 'dtg:', dectag.encode('hex') + print 'tag:', tag.encode('hex') + + assert dectag == tag + elif False: + key = _spdechex('c939cc13397c1d37de6ae0e1cb7c423c') + iv = _spdechex('b3d8cc017cbb89b39e0f67e2') + key = key + iv[:4] + iv = iv[4:] + pt = _spdechex('c3b3c41f113a31b73d9a5cd432103069') + aad = _spdechex('24825602bd12a984e0092d3e448eda5f') + ct = _spdechex('93fe7d9e9bfd10348a5606e5cafa7354') + tag = _spdechex('0032a1dc85f1c9786925a2e71d8272dd') + + c = Crypto(CRYPTO_AES_GCM_16, key, mac=CRYPTO_AES_128_GMAC, mackey=key) + + enc, enctag = c.encrypt(pt, iv, aad=aad) + + print 'enc:', enc.encode('hex') + print ' ct:', ct.encode('hex') + + assert enc == ct + + print 'etg:', enctag.encode('hex') + print 'tag:', tag.encode('hex') + assert enctag == tag + elif False: + for i in xrange(100000): + c = Crypto(CRYPTO_AES_XTS, '1bbfeadf539daedcae33ced497343f3ca1f2474ad932b903997d44707db41382'.decode('hex')) + data = '52a42bca4e9425a25bbc8c8bf6129dec'.decode('hex') + ct = '517e602becd066b65fa4f4f56ddfe240'.decode('hex') + iv = _pack('QQ', 71, 0) + + enc = c.encrypt(data, iv) + assert enc == ct + elif True: + c = Crypto(CRYPTO_AES_XTS, '1bbfeadf539daedcae33ced497343f3ca1f2474ad932b903997d44707db41382'.decode('hex')) + data = '52a42bca4e9425a25bbc8c8bf6129dec'.decode('hex') + ct = '517e602becd066b65fa4f4f56ddfe240'.decode('hex') + iv = _pack('QQ', 71, 0) + + enc = c.encrypt(data, iv) + assert enc == ct + + dec = c.decrypt(enc, iv) + assert dec == data + + #c.perftest(COP_ENCRYPT, 192*1024, reps=30000) + + else: + key = '1bbfeadf539daedcae33ced497343f3ca1f2474ad932b903997d44707db41382'.decode('hex') + print 'XTS %d testing:' % (len(key) * 8) + c = Crypto(CRYPTO_AES_XTS, key) + for i in [ 8192, 192*1024]: + print 'block size: %d' % i + c.perftest(COP_ENCRYPT, i) + c.perftest(COP_DECRYPT, i) diff --git a/tests/sys/opencrypto/cryptodevh.py b/tests/sys/opencrypto/cryptodevh.py new file mode 100644 index 0000000..ee024e9 --- /dev/null +++ b/tests/sys/opencrypto/cryptodevh.py @@ -0,0 +1,250 @@ +# $FreeBSD$ +# Generated by h2py from stdin + +# Included from sys/ioccom.h +IOCPARM_SHIFT = 13 +IOCPARM_MASK = ((1 << IOCPARM_SHIFT) - 1) +def IOCPARM_LEN(x): return (((x) >> 16) & IOCPARM_MASK) + +def IOCBASECMD(x): return ((x) & ~(IOCPARM_MASK << 16)) + +def IOCGROUP(x): return (((x) >> 8) & 0xff) + +IOCPARM_MAX = (1 << IOCPARM_SHIFT) +IOC_VOID = 0x20000000 +IOC_OUT = 0x40000000 +IOC_IN = 0x80000000 +IOC_INOUT = (IOC_IN|IOC_OUT) +IOC_DIRMASK = (IOC_VOID|IOC_OUT|IOC_IN) + +# Included from sys/cdefs.h +def __has_feature(x): return 0 + +def __has_include(x): return 0 + +def __has_builtin(x): return 0 + +__GNUCLIKE_ASM = 3 +__GNUCLIKE_ASM = 2 +__GNUCLIKE___TYPEOF = 1 +__GNUCLIKE___OFFSETOF = 1 +__GNUCLIKE___SECTION = 1 +__GNUCLIKE_CTOR_SECTION_HANDLING = 1 +__GNUCLIKE_BUILTIN_CONSTANT_P = 1 +__GNUCLIKE_BUILTIN_VARARGS = 1 +__GNUCLIKE_BUILTIN_STDARG = 1 +__GNUCLIKE_BUILTIN_VAALIST = 1 +__GNUC_VA_LIST_COMPATIBILITY = 1 +__GNUCLIKE_BUILTIN_NEXT_ARG = 1 +__GNUCLIKE_BUILTIN_MEMCPY = 1 +__CC_SUPPORTS_INLINE = 1 +__CC_SUPPORTS___INLINE = 1 +__CC_SUPPORTS___INLINE__ = 1 +__CC_SUPPORTS___FUNC__ = 1 +__CC_SUPPORTS_WARNING = 1 +__CC_SUPPORTS_VARADIC_XXX = 1 +__CC_SUPPORTS_DYNAMIC_ARRAY_INIT = 1 +def __P(protos): return protos + +def __STRING(x): return #x + +def __XSTRING(x): return __STRING(x) + +def __P(protos): return () + +def __STRING(x): return "x" + +def __aligned(x): return __attribute__((__aligned__(x))) + +def __section(x): return __attribute__((__section__(x))) + +def __aligned(x): return __attribute__((__aligned__(x))) + +def __section(x): return __attribute__((__section__(x))) + +def _Alignas(x): return alignas(x) + +def _Alignas(x): return __aligned(x) + +def _Alignof(x): return alignof(x) + +def _Alignof(x): return __alignof(x) + +def __nonnull(x): return __attribute__((__nonnull__(x))) + +def __predict_true(exp): return __builtin_expect((exp), 1) + +def __predict_false(exp): return __builtin_expect((exp), 0) + +def __predict_true(exp): return (exp) + +def __predict_false(exp): return (exp) + +def __format_arg(fmtarg): return __attribute__((__format_arg__ (fmtarg))) + +def __GLOBL(sym): return __GLOBL1(sym) + +def __FBSDID(s): return __IDSTRING(__CONCAT(__rcsid_,__LINE__),s) + +def __RCSID(s): return __IDSTRING(__CONCAT(__rcsid_,__LINE__),s) + +def __RCSID_SOURCE(s): return __IDSTRING(__CONCAT(__rcsid_source_,__LINE__),s) + +def __SCCSID(s): return __IDSTRING(__CONCAT(__sccsid_,__LINE__),s) + +def __COPYRIGHT(s): return __IDSTRING(__CONCAT(__copyright_,__LINE__),s) + +_POSIX_C_SOURCE = 199009 +_POSIX_C_SOURCE = 199209 +__XSI_VISIBLE = 700 +_POSIX_C_SOURCE = 200809 +__XSI_VISIBLE = 600 +_POSIX_C_SOURCE = 200112 +__XSI_VISIBLE = 500 +_POSIX_C_SOURCE = 199506 +_POSIX_C_SOURCE = 198808 +__POSIX_VISIBLE = 200809 +__ISO_C_VISIBLE = 1999 +__POSIX_VISIBLE = 200112 +__ISO_C_VISIBLE = 1999 +__POSIX_VISIBLE = 199506 +__ISO_C_VISIBLE = 1990 +__POSIX_VISIBLE = 199309 +__ISO_C_VISIBLE = 1990 +__POSIX_VISIBLE = 199209 +__ISO_C_VISIBLE = 1990 +__POSIX_VISIBLE = 199009 +__ISO_C_VISIBLE = 1990 +__POSIX_VISIBLE = 198808 +__ISO_C_VISIBLE = 0 +__POSIX_VISIBLE = 0 +__XSI_VISIBLE = 0 +__BSD_VISIBLE = 0 +__ISO_C_VISIBLE = 1990 +__POSIX_VISIBLE = 0 +__XSI_VISIBLE = 0 +__BSD_VISIBLE = 0 +__ISO_C_VISIBLE = 1999 +__POSIX_VISIBLE = 0 +__XSI_VISIBLE = 0 +__BSD_VISIBLE = 0 +__ISO_C_VISIBLE = 2011 +__POSIX_VISIBLE = 200809 +__XSI_VISIBLE = 700 +__BSD_VISIBLE = 1 +__ISO_C_VISIBLE = 2011 +__NO_TLS = 1 +CRYPTO_DRIVERS_INITIAL = 4 +CRYPTO_SW_SESSIONS = 32 +NULL_HASH_LEN = 16 +MD5_HASH_LEN = 16 +SHA1_HASH_LEN = 20 +RIPEMD160_HASH_LEN = 20 +SHA2_256_HASH_LEN = 32 +SHA2_384_HASH_LEN = 48 +SHA2_512_HASH_LEN = 64 +MD5_KPDK_HASH_LEN = 16 +SHA1_KPDK_HASH_LEN = 20 +HASH_MAX_LEN = SHA2_512_HASH_LEN +NULL_HMAC_BLOCK_LEN = 64 +MD5_HMAC_BLOCK_LEN = 64 +SHA1_HMAC_BLOCK_LEN = 64 +RIPEMD160_HMAC_BLOCK_LEN = 64 +SHA2_256_HMAC_BLOCK_LEN = 64 +SHA2_384_HMAC_BLOCK_LEN = 128 +SHA2_512_HMAC_BLOCK_LEN = 128 +HMAC_MAX_BLOCK_LEN = SHA2_512_HMAC_BLOCK_LEN +HMAC_IPAD_VAL = 0x36 +HMAC_OPAD_VAL = 0x5C +NULL_BLOCK_LEN = 4 +DES_BLOCK_LEN = 8 +DES3_BLOCK_LEN = 8 +BLOWFISH_BLOCK_LEN = 8 +SKIPJACK_BLOCK_LEN = 8 +CAST128_BLOCK_LEN = 8 +RIJNDAEL128_BLOCK_LEN = 16 +AES_BLOCK_LEN = RIJNDAEL128_BLOCK_LEN +CAMELLIA_BLOCK_LEN = 16 +EALG_MAX_BLOCK_LEN = AES_BLOCK_LEN +AALG_MAX_RESULT_LEN = 64 +CRYPTO_ALGORITHM_MIN = 1 +CRYPTO_DES_CBC = 1 +CRYPTO_3DES_CBC = 2 +CRYPTO_BLF_CBC = 3 +CRYPTO_CAST_CBC = 4 +CRYPTO_SKIPJACK_CBC = 5 +CRYPTO_MD5_HMAC = 6 +CRYPTO_SHA1_HMAC = 7 +CRYPTO_RIPEMD160_HMAC = 8 +CRYPTO_MD5_KPDK = 9 +CRYPTO_SHA1_KPDK = 10 +CRYPTO_RIJNDAEL128_CBC = 11 +CRYPTO_AES_CBC = 11 +CRYPTO_ARC4 = 12 +CRYPTO_MD5 = 13 +CRYPTO_SHA1 = 14 +CRYPTO_NULL_HMAC = 15 +CRYPTO_NULL_CBC = 16 +CRYPTO_DEFLATE_COMP = 17 +CRYPTO_SHA2_256_HMAC = 18 +CRYPTO_SHA2_384_HMAC = 19 +CRYPTO_SHA2_512_HMAC = 20 +CRYPTO_CAMELLIA_CBC = 21 +CRYPTO_AES_XTS = 22 +CRYPTO_AES_ICM = 23 +CRYPTO_AES_NIST_GMAC = 24 +CRYPTO_AES_NIST_GCM_16 = 25 +CRYPTO_AES_128_NIST_GMAC = 26 +CRYPTO_AES_192_NIST_GMAC = 27 +CRYPTO_AES_256_NIST_GMAC = 28 +CRYPTO_ALGORITHM_MAX = 28 +CRYPTO_ALG_FLAG_SUPPORTED = 0x01 +CRYPTO_ALG_FLAG_RNG_ENABLE = 0x02 +CRYPTO_ALG_FLAG_DSA_SHA = 0x04 +CRYPTO_FLAG_HARDWARE = 0x01000000 +CRYPTO_FLAG_SOFTWARE = 0x02000000 +COP_ENCRYPT = 1 +COP_DECRYPT = 2 +COP_F_BATCH = 0x0008 +CRK_MAXPARAM = 8 +CRK_ALGORITM_MIN = 0 +CRK_MOD_EXP = 0 +CRK_MOD_EXP_CRT = 1 +CRK_DSA_SIGN = 2 +CRK_DSA_VERIFY = 3 +CRK_DH_COMPUTE_KEY = 4 +CRK_ALGORITHM_MAX = 4 +CRF_MOD_EXP = (1 << CRK_MOD_EXP) +CRF_MOD_EXP_CRT = (1 << CRK_MOD_EXP_CRT) +CRF_DSA_SIGN = (1 << CRK_DSA_SIGN) +CRF_DSA_VERIFY = (1 << CRK_DSA_VERIFY) +CRF_DH_COMPUTE_KEY = (1 << CRK_DH_COMPUTE_KEY) +CRD_F_ENCRYPT = 0x01 +CRD_F_IV_PRESENT = 0x02 +CRD_F_IV_EXPLICIT = 0x04 +CRD_F_DSA_SHA_NEEDED = 0x08 +CRD_F_COMP = 0x0f +CRD_F_KEY_EXPLICIT = 0x10 +CRYPTO_F_IMBUF = 0x0001 +CRYPTO_F_IOV = 0x0002 +CRYPTO_F_BATCH = 0x0008 +CRYPTO_F_CBIMM = 0x0010 +CRYPTO_F_DONE = 0x0020 +CRYPTO_F_CBIFSYNC = 0x0040 +CRYPTO_BUF_CONTIG = 0x0 +CRYPTO_BUF_IOV = 0x1 +CRYPTO_BUF_MBUF = 0x2 +CRYPTO_OP_DECRYPT = 0x0 +CRYPTO_OP_ENCRYPT = 0x1 +CRYPTO_HINT_MORE = 0x1 +def CRYPTO_SESID2HID(_sid): return (((_sid) >> 32) & 0x00ffffff) + +def CRYPTO_SESID2CAPS(_sid): return (((_sid) >> 32) & 0xff000000) + +def CRYPTO_SESID2LID(_sid): return (((u_int32_t) (_sid)) & 0xffffffff) + +CRYPTOCAP_F_HARDWARE = CRYPTO_FLAG_HARDWARE +CRYPTOCAP_F_SOFTWARE = CRYPTO_FLAG_SOFTWARE +CRYPTOCAP_F_SYNC = 0x04000000 +CRYPTO_SYMQ = 0x1 +CRYPTO_ASYMQ = 0x2 diff --git a/tests/sys/opencrypto/cryptotest.py b/tests/sys/opencrypto/cryptotest.py new file mode 100644 index 0000000..e616baf --- /dev/null +++ b/tests/sys/opencrypto/cryptotest.py @@ -0,0 +1,265 @@ +#!/usr/bin/env python +# +# Copyright (c) 2014 The FreeBSD Foundation +# All rights reserved. +# +# This software was developed by John-Mark Gurney under +# the sponsorship from the FreeBSD Foundation. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +# SUCH DAMAGE. +# +# $FreeBSD$ +# + +import cryptodev +import itertools +import os +import struct +import unittest +from cryptodev import * +from glob import iglob + +katdir = '/usr/local/share/nist-kat' + +def katg(base, glob): + return iglob(os.path.join(katdir, base, glob)) + +aesmodules = [ 'cryptosoft0', 'aesni0', ] +desmodules = [ 'cryptosoft0', ] +shamodules = [ 'cryptosoft0', ] + +def GenTestCase(cname): + try: + crid = cryptodev.Crypto.findcrid(cname) + except IOError: + return None + + class GendCryptoTestCase(unittest.TestCase): + ############### + ##### AES ##### + ############### + @unittest.skipIf(cname not in aesmodules, 'skipping AES on %s' % `cname`) + def test_xts(self): + for i in katg('XTSTestVectors/format tweak value input - data unit seq no', '*.rsp'): + self.runXTS(i, cryptodev.CRYPTO_AES_XTS) + + def test_cbc(self): + for i in katg('KAT_AES', 'CBC[GKV]*.rsp'): + self.runCBC(i) + + def test_gcm(self): + for i in katg('gcmtestvectors', 'gcmEncrypt*'): + self.runGCM(i, 'ENCRYPT') + + for i in katg('gcmtestvectors', 'gcmDecrypt*'): + self.runGCM(i, 'DECRYPT') + + _gmacsizes = { 32: cryptodev.CRYPTO_AES_256_NIST_GMAC, + 24: cryptodev.CRYPTO_AES_192_NIST_GMAC, + 16: cryptodev.CRYPTO_AES_128_NIST_GMAC, + } + def runGCM(self, fname, mode): + curfun = None + if mode == 'ENCRYPT': + swapptct = False + curfun = Crypto.encrypt + elif mode == 'DECRYPT': + swapptct = True + curfun = Crypto.decrypt + else: + raise RuntimeError('unknown mode: %s' % `mode`) + + for bogusmode, lines in cryptodev.KATParser(fname, + [ 'Count', 'Key', 'IV', 'CT', 'AAD', 'Tag', 'PT', ]): + for data in lines: + curcnt = int(data['Count']) + cipherkey = data['Key'].decode('hex') + iv = data['IV'].decode('hex') + aad = data['AAD'].decode('hex') + tag = data['Tag'].decode('hex') + if 'FAIL' not in data: + pt = data['PT'].decode('hex') + ct = data['CT'].decode('hex') + + if len(iv) != 12: + # XXX - isn't supported + continue + + c = Crypto(cryptodev.CRYPTO_AES_NIST_GCM_16, + cipherkey, + mac=self._gmacsizes[len(cipherkey)], + mackey=cipherkey, crid=crid) + + if mode == 'ENCRYPT': + rct, rtag = c.encrypt(pt, iv, aad) + rtag = rtag[:len(tag)] + data['rct'] = rct.encode('hex') + data['rtag'] = rtag.encode('hex') + self.assertEqual(rct, ct, `data`) + self.assertEqual(rtag, tag, `data`) + else: + if len(tag) != 16: + continue + args = (ct, iv, aad, tag) + if 'FAIL' in data: + self.assertRaises(IOError, + c.decrypt, *args) + else: + rpt, rtag = c.decrypt(*args) + data['rpt'] = rpt.encode('hex') + data['rtag'] = rtag.encode('hex') + self.assertEqual(rpt, pt, + `data`) + + def runCBC(self, fname): + curfun = None + for mode, lines in cryptodev.KATParser(fname, + [ 'COUNT', 'KEY', 'IV', 'PLAINTEXT', 'CIPHERTEXT', ]): + if mode == 'ENCRYPT': + swapptct = False + curfun = Crypto.encrypt + elif mode == 'DECRYPT': + swapptct = True + curfun = Crypto.decrypt + else: + raise RuntimeError('unknown mode: %s' % `mode`) + + for data in lines: + curcnt = int(data['COUNT']) + cipherkey = data['KEY'].decode('hex') + iv = data['IV'].decode('hex') + pt = data['PLAINTEXT'].decode('hex') + ct = data['CIPHERTEXT'].decode('hex') + + if swapptct: + pt, ct = ct, pt + # run the fun + c = Crypto(cryptodev.CRYPTO_AES_CBC, cipherkey, crid=crid) + r = curfun(c, pt, iv) + self.assertEqual(r, ct) + + def runXTS(self, fname, meth): + curfun = None + for mode, lines in cryptodev.KATParser(fname, + [ 'COUNT', 'DataUnitLen', 'Key', 'DataUnitSeqNumber', 'PT', + 'CT' ]): + if mode == 'ENCRYPT': + swapptct = False + curfun = Crypto.encrypt + elif mode == 'DECRYPT': + swapptct = True + curfun = Crypto.decrypt + else: + raise RuntimeError('unknown mode: %s' % `mode`) + + for data in lines: + curcnt = int(data['COUNT']) + nbits = int(data['DataUnitLen']) + cipherkey = data['Key'].decode('hex') + iv = struct.pack('QQ', int(data['DataUnitSeqNumber']), 0) + pt = data['PT'].decode('hex') + ct = data['CT'].decode('hex') + + if nbits % 128 != 0: + # XXX - mark as skipped + continue + if swapptct: + pt, ct = ct, pt + # run the fun + c = Crypto(meth, cipherkey, crid=crid) + r = curfun(c, pt, iv) + self.assertEqual(r, ct) + + ############### + ##### DES ##### + ############### + @unittest.skipIf(cname not in desmodules, 'skipping DES on %s' % `cname`) + def test_tdes(self): + for i in katg('KAT_TDES', 'TCBC[a-z]*.rsp'): + self.runTDES(i) + + def runTDES(self, fname): + curfun = None + for mode, lines in cryptodev.KATParser(fname, + [ 'COUNT', 'KEYs', 'IV', 'PLAINTEXT', 'CIPHERTEXT', ]): + if mode == 'ENCRYPT': + swapptct = False + curfun = Crypto.encrypt + elif mode == 'DECRYPT': + swapptct = True + curfun = Crypto.decrypt + else: + raise RuntimeError('unknown mode: %s' % `mode`) + + for data in lines: + curcnt = int(data['COUNT']) + key = data['KEYs'] * 3 + cipherkey = key.decode('hex') + iv = data['IV'].decode('hex') + pt = data['PLAINTEXT'].decode('hex') + ct = data['CIPHERTEXT'].decode('hex') + + if swapptct: + pt, ct = ct, pt + # run the fun + c = Crypto(cryptodev.CRYPTO_3DES_CBC, cipherkey, crid=crid) + r = curfun(c, pt, iv) + self.assertEqual(r, ct) + + ############### + ##### SHA ##### + ############### + @unittest.skipIf(cname not in shamodules, 'skipping SHA on %s' % `cname`) + def test_sha(self): + # SHA not available in software + pass + #for i in iglob('SHA1*'): + # self.runSHA(i) + + def test_sha1hmac(self): + for i in katg('hmactestvectors', 'HMAC.rsp'): + self.runSHA1HMAC(i) + + def runSHA1HMAC(self, fname): + for bogusmode, lines in cryptodev.KATParser(fname, + [ 'Count', 'Klen', 'Tlen', 'Key', 'Msg', 'Mac' ]): + for data in lines: + key = data['Key'].decode('hex') + msg = data['Msg'].decode('hex') + mac = data['Mac'].decode('hex') + + if len(key) != 20: + # XXX - implementation bug + continue + + c = Crypto(mac=cryptodev.CRYPTO_SHA1_HMAC, + mackey=key, crid=crid) + + r = c.encrypt(msg) + self.assertEqual(r, mac, `data`) + + return GendCryptoTestCase + +cryptosoft = GenTestCase('cryptosoft0') +aesni = GenTestCase('aesni0') + +if __name__ == '__main__': + unittest.main() diff --git a/tests/sys/opencrypto/dpkt.py b/tests/sys/opencrypto/dpkt.py new file mode 100644 index 0000000..b74aa35 --- /dev/null +++ b/tests/sys/opencrypto/dpkt.py @@ -0,0 +1,160 @@ +# $FreeBSD$ +# $Id: dpkt.py 114 2005-09-11 15:15:12Z dugsong $ + +"""fast, simple packet creation / parsing, with definitions for the +basic TCP/IP protocols. +""" + +__author__ = 'Dug Song <dugsong@monkey.org>' +__copyright__ = 'Copyright (c) 2004 Dug Song' +__license__ = 'BSD' +__url__ = 'http://monkey.org/~dugsong/dpkt/' +__version__ = '1.2' + +try: + from itertools import izip as _it_izip +except ImportError: + _it_izip = zip + +from struct import calcsize as _st_calcsize, \ + pack as _st_pack, unpack as _st_unpack, error as _st_error +from re import compile as _re_compile + +intchr = _re_compile(r"(?P<int>[0-9]+)(?P<chr>.)") + +class MetaPacket(type): + def __new__(cls, clsname, clsbases, clsdict): + if '__hdr__' in clsdict: + st = clsdict['__hdr__'] + clsdict['__hdr_fields__'] = [ x[0] for x in st ] + clsdict['__hdr_fmt__'] = clsdict.get('__byte_order__', '>') + \ + ''.join([ x[1] for x in st ]) + clsdict['__hdr_len__'] = _st_calcsize(clsdict['__hdr_fmt__']) + clsdict['__hdr_defaults__'] = \ + dict(zip(clsdict['__hdr_fields__'], [ x[2] for x in st ])) + clsdict['__slots__'] = clsdict['__hdr_fields__'] + return type.__new__(cls, clsname, clsbases, clsdict) + +class Packet(object): + """Packet class + + __hdr__ should be defined as a list of (name, structfmt, default) tuples + __byte_order__ can be set to override the default ('>') + """ + __metaclass__ = MetaPacket + data = '' + + def __init__(self, *args, **kwargs): + """Packet constructor with ([buf], [field=val,...]) prototype. + + Arguments: + + buf -- packet buffer to unpack + + Optional keyword arguments correspond to packet field names. + """ + if args: + self.unpack(args[0]) + else: + for k in self.__hdr_fields__: + setattr(self, k, self.__hdr_defaults__[k]) + for k, v in kwargs.iteritems(): + setattr(self, k, v) + + def __len__(self): + return self.__hdr_len__ + len(self.data) + + def __repr__(self): + l = [ '%s=%r' % (k, getattr(self, k)) + for k in self.__hdr_defaults__ + if getattr(self, k) != self.__hdr_defaults__[k] ] + if self.data: + l.append('data=%r' % self.data) + return '%s(%s)' % (self.__class__.__name__, ', '.join(l)) + + def __str__(self): + return self.pack_hdr() + str(self.data) + + def pack_hdr(self): + """Return packed header string.""" + try: + return _st_pack(self.__hdr_fmt__, + *[ getattr(self, k) for k in self.__hdr_fields__ ]) + except _st_error: + vals = [] + for k in self.__hdr_fields__: + v = getattr(self, k) + if isinstance(v, tuple): + vals.extend(v) + else: + vals.append(v) + return _st_pack(self.__hdr_fmt__, *vals) + + def unpack(self, buf): + """Unpack packet header fields from buf, and set self.data.""" + + res = list(_st_unpack(self.__hdr_fmt__, buf[:self.__hdr_len__])) + for e, k in enumerate(self.__slots__): + sfmt = self.__hdr__[e][1] + mat = intchr.match(sfmt) + if mat and mat.group('chr') != 's': + cnt = int(mat.group('int')) + setattr(self, k, list(res[:cnt])) + del res[:cnt] + else: + if sfmt[-1] == 's': + i = res[0].find('\x00') + if i != -1: + res[0] = res[0][:i] + setattr(self, k, res[0]) + del res[0] + assert len(res) == 0 + self.data = buf[self.__hdr_len__:] + +# XXX - ''.join([(len(`chr(x)`)==3) and chr(x) or '.' for x in range(256)]) +__vis_filter = """................................ !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[.]^_`abcdefghijklmnopqrstuvwxyz{|}~.................................................................................................................................""" + +def hexdump(buf, length=16): + """Return a hexdump output string of the given buffer.""" + n = 0 + res = [] + while buf: + line, buf = buf[:length], buf[length:] + hexa = ' '.join(['%02x' % ord(x) for x in line]) + line = line.translate(__vis_filter) + res.append(' %04d: %-*s %s' % (n, length * 3, hexa, line)) + n += length + return '\n'.join(res) + +def in_cksum_add(s, buf): + """in_cksum_add(cksum, buf) -> cksum + + Return accumulated Internet checksum. + """ + nleft = len(buf) + i = 0 + while nleft > 1: + s += ord(buf[i]) * 256 + ord(buf[i+1]) + i += 2 + nleft -= 2 + if nleft: + s += ord(buf[i]) * 256 + return s + +def in_cksum_done(s): + """Fold and return Internet checksum.""" + while (s >> 16): + s = (s >> 16) + (s & 0xffff) + return (~s & 0xffff) + +def in_cksum(buf): + """Return computed Internet checksum.""" + return in_cksum_done(in_cksum_add(0, buf)) + +try: + import psyco + psyco.bind(in_cksum) + psyco.bind(Packet) +except ImportError: + pass + diff --git a/tests/sys/opencrypto/runtests.sh b/tests/sys/opencrypto/runtests.sh new file mode 100644 index 0000000..26c673a --- /dev/null +++ b/tests/sys/opencrypto/runtests.sh @@ -0,0 +1,60 @@ +#!/bin/sh - +# +# Copyright (c) 2014 The FreeBSD Foundation +# All rights reserved. +# +# This software was developed by John-Mark Gurney under +# the sponsorship from the FreeBSD Foundation. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +# SUCH DAMAGE. +# +# $FreeBSD$ +# + +set -e + +if [ ! -d /usr/local/share/nist-kat ]; then + echo 'Skipping, nist-kat package not installed for test vectors.' + exit 0 +fi + +if kldload aesni 2>/dev/null; then + unloadaesni=1 +fi + +if kldload cryptodev 2>/dev/null; then + unloadcdev=1 +fi + +# Run software crypto test +oldcdas=$(sysctl -e kern.cryptodevallowsoft) +sysctl kern.cryptodevallowsoft=1 + +python $(dirname $0)/cryptotest.py + +sysctl "$oldcdas" + +if [ x"$unloadcdev" = x"1" ]; then + kldunload cryptodev +fi +if [ x"$unloadaesni" = x"1" ]; then + kldunload aesni +fi diff --git a/usr.bin/cap_mkdb/cap_mkdb.c b/usr.bin/cap_mkdb/cap_mkdb.c index bbcedd5..2f8bd96 100644 --- a/usr.bin/cap_mkdb/cap_mkdb.c +++ b/usr.bin/cap_mkdb/cap_mkdb.c @@ -119,7 +119,7 @@ main(int argc, char *argv[]) (void)snprintf(buf, sizeof(buf), "%s.db", capname ? capname : *argv); if ((capname = strdup(buf)) == NULL) errx(1, "strdup failed"); - if ((capdbp = dbopen(capname, O_CREAT | O_TRUNC | O_RDWR | O_SYNC, + if ((capdbp = dbopen(capname, O_CREAT | O_TRUNC | O_RDWR, DEFFILEMODE, DB_HASH, &openinfo)) == NULL) err(1, "%s", buf); diff --git a/usr.bin/netstat/ipsec.c b/usr.bin/netstat/ipsec.c index 20364b6..801008f 100644 --- a/usr.bin/netstat/ipsec.c +++ b/usr.bin/netstat/ipsec.c @@ -155,6 +155,9 @@ static struct val2str ipsec_espnames[] = { #ifdef SADB_X_EALG_AESCTR { SADB_X_EALG_AESCTR, "aes-ctr", }, #endif +#ifdef SADB_X_EALG_AESGCM16 + { SADB_X_EALG_AESGCM16, "aes-gcm-16", }, +#endif { -1, NULL }, }; diff --git a/usr.sbin/ngctl/main.c b/usr.sbin/ngctl/main.c index 4b1cdab..3444fe3 100644 --- a/usr.sbin/ngctl/main.c +++ b/usr.sbin/ngctl/main.c @@ -218,7 +218,6 @@ ReadFile(FILE *fp) continue; if ((rtn = DoParseCommand(line)) != 0) { warnx("line %d: error in file", num); - return (rtn); } } return (CMDRTN_OK); diff --git a/usr.sbin/pwd_mkdb/pwd_mkdb.c b/usr.sbin/pwd_mkdb/pwd_mkdb.c index c75804a..9158e61 100644 --- a/usr.sbin/pwd_mkdb/pwd_mkdb.c +++ b/usr.sbin/pwd_mkdb/pwd_mkdb.c @@ -225,14 +225,14 @@ main(int argc, char *argv[]) clean = FILE_INSECURE; cp(buf2, buf, PERM_INSECURE); dp = dbopen(buf, - O_RDWR|O_EXCL|O_SYNC, PERM_INSECURE, DB_HASH, &openinfo); + O_RDWR|O_EXCL, PERM_INSECURE, DB_HASH, &openinfo); if (dp == NULL) error(buf); clean = FILE_SECURE; cp(sbuf2, sbuf, PERM_SECURE); sdp = dbopen(sbuf, - O_RDWR|O_EXCL|O_SYNC, PERM_SECURE, DB_HASH, &openinfo); + O_RDWR|O_EXCL, PERM_SECURE, DB_HASH, &openinfo); if (sdp == NULL) error(sbuf); @@ -289,13 +289,13 @@ main(int argc, char *argv[]) method = 0; } else { dp = dbopen(buf, - O_RDWR|O_CREAT|O_EXCL|O_SYNC, PERM_INSECURE, DB_HASH, &openinfo); + O_RDWR|O_CREAT|O_EXCL, PERM_INSECURE, DB_HASH, &openinfo); if (dp == NULL) error(buf); clean = FILE_INSECURE; sdp = dbopen(sbuf, - O_RDWR|O_CREAT|O_EXCL|O_SYNC, PERM_SECURE, DB_HASH, &openinfo); + O_RDWR|O_CREAT|O_EXCL, PERM_SECURE, DB_HASH, &openinfo); if (sdp == NULL) error(sbuf); clean = FILE_SECURE; diff --git a/usr.sbin/rtsold/rtsol.c b/usr.sbin/rtsold/rtsol.c index 118206a..ced0a73 100644 --- a/usr.sbin/rtsold/rtsol.c +++ b/usr.sbin/rtsold/rtsol.c @@ -92,7 +92,7 @@ static int ra_opt_rdnss_dispatch(struct ifinfo *, struct rainfo *, struct script_msg_head_t *, struct script_msg_head_t *); static char *make_rsid(const char *, const char *, struct rainfo *); -#define _ARGS_OTHER otherconf_script, ifi->ifname +#define _ARGS_OTHER otherconf_script, ifi->ifname, ntopbuf #define _ARGS_RESADD resolvconf_script, "-a", rsid #define _ARGS_RESDEL resolvconf_script, "-d", rsid @@ -374,8 +374,8 @@ rtsol_input(int s) warnmsg(LOG_DEBUG, __func__, "OtherConfigFlag on %s is turned on", ifi->ifname); ifi->otherconfig = 1; - CALL_SCRIPT(OTHER, NULL); } + CALL_SCRIPT(OTHER, NULL); clock_gettime(CLOCK_MONOTONIC_FAST, &now); newent_rai = 0; rai = find_rainfo(ifi, &from); diff --git a/usr.sbin/services_mkdb/services_mkdb.c b/usr.sbin/services_mkdb/services_mkdb.c index 9ea66de..c928ea9 100644 --- a/usr.sbin/services_mkdb/services_mkdb.c +++ b/usr.sbin/services_mkdb/services_mkdb.c @@ -141,7 +141,7 @@ main(int argc, char *argv[]) err(1, "Cannot install exit handler"); (void)snprintf(tname, sizeof(tname), "%s.tmp", dbname); - db = dbopen(tname, O_RDWR | O_CREAT | O_EXCL | O_SYNC, + db = dbopen(tname, O_RDWR | O_CREAT | O_EXCL, (S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH), DB_HASH, &hinfo); if (!db) err(1, "Error opening temporary database `%s'", tname); diff --git a/usr.sbin/syslogd/clog.h b/usr.sbin/syslogd/clog.h new file mode 100644 index 0000000..ed9e7a2 --- /dev/null +++ b/usr.sbin/syslogd/clog.h @@ -0,0 +1,52 @@ +/*- + * Copyright (c) 2001 + * Jeff Wheelhouse (jdw@wwwi.com) + * + * This code was originally developed by Jeff Wheelhouse (jdw@wwwi.com). + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistribution of source code must retail the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY JEFF WHEELHOUSE ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN + * NO EVENT SHALL JEFF WHEELHOUSE BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, + * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $Id: clog.h,v 1.2 2001/10/02 04:43:52 jdw Exp $ + * $DragonFly: src/usr.sbin/clog/clog.h,v 1.1 2004/10/30 20:26:46 dillon Exp $ + */ + + +#ifndef _CLOG_H_ +#define _CLOG_H_ + +/* + * This magic constant is used to identify a valid circular log file. + * syslogd will ignore any circular log file that doesn't have this constant. + */ + +const char MAGIC_CONST[4] = "CLOG"; + + +struct clog_footer { + uint32_t cf_magic; + uint32_t cf_wrap; + uint32_t cf_next; + uint32_t cf_max; + uint32_t cf_lock; +}; + + +#endif /* _CLOG_H_ */ diff --git a/usr.sbin/syslogd/syslogd.c b/usr.sbin/syslogd/syslogd.c index 097e158..4afec2d 100644 --- a/usr.sbin/syslogd/syslogd.c +++ b/usr.sbin/syslogd/syslogd.c @@ -89,6 +89,7 @@ __FBSDID("$FreeBSD$"); #include <sys/resource.h> #include <sys/syslimits.h> #include <sys/types.h> +#include <sys/mman.h> #include <netinet/in.h> #include <netdb.h> @@ -110,6 +111,8 @@ __FBSDID("$FreeBSD$"); #include <utmpx.h> #include "pathnames.h" +#include "clog.h" + #include "ttymsg.h" #define SYSLOG_NAMES @@ -118,6 +121,7 @@ __FBSDID("$FreeBSD$"); const char *ConfFile = _PATH_LOGCONF; const char *PidFile = _PATH_LOGPID; const char ctty[] = _PATH_CONSOLE; +const char ring_magic[] = "CLOG"; #define dprintf if (Debug) printf @@ -183,6 +187,11 @@ struct filed { char f_pname[MAXPATHLEN]; pid_t f_pid; } f_pipe; + struct { + char f_rname[MAXPATHLEN]; + struct clog_footer *f_footer; + size_t f_size; + } f_ring; } f_un; char f_prevline[MAXSVLINE]; /* last message logged */ char f_lasttime[16]; /* time of last occurrence */ @@ -261,10 +270,12 @@ int repeatinterval[] = { 30, 120, 600 }; /* # of secs before flush */ #define F_USERS 5 /* list of users */ #define F_WALL 6 /* everyone logged on */ #define F_PIPE 7 /* pipe to program */ +#define F_RING 8 /* ring buffer (circular log) */ -const char *TypeNames[8] = { +const char *TypeNames[9] = { "UNUSED", "FILE", "TTY", "CONSOLE", - "FORW", "USERS", "WALL", "PIPE" + "FORW", "USERS", "WALL", "PIPE", + "RING" }; static struct filed *Files; /* Log files that we write to */ @@ -330,6 +341,8 @@ static int skip_message(const char *, const char *, int); static void printline(const char *, char *, int); static void printsys(char *); static int p_open(const char *, pid_t *); +ssize_t rbwrite __P((struct filed *, char *, size_t)); +ssize_t rbwritev __P((struct filed *, struct iovec *, int)); static void readklog(void); static void reapchild(int); static void usage(void); @@ -927,7 +940,7 @@ logmsg(int pri, const char *msg, const char *from, int flags) struct filed *f; int i, fac, msglen, omask, prilev; const char *timestamp; - char prog[NAME_MAX+1]; + char prog[NAME_MAX+1]; char buf[MAXLINE+1]; dprintf("logmsg: pri %o, flags %x, from %s, msg %s\n", @@ -1301,6 +1314,20 @@ fprintlog(struct filed *f, int flags, const char *msg) needdofsync = 1; } break; + case F_RING: + dprintf(" %s\n", f->f_un.f_ring.f_rname); + v->iov_base = "\n"; + v->iov_len = 1; + if (rbwritev(f, iov, 7)==-1) { + int e = errno; + (void)munmap(f->f_un.f_ring.f_footer,sizeof(struct clog_footer)); + (void)close(f->f_file); + f->f_type = F_UNUSED; + errno = e; + logerror(f->f_un.f_fname); + } + + break; case F_PIPE: dprintf(" %s\n", f->f_un.f_pipe.f_pname); @@ -1557,7 +1584,7 @@ init(int signo) struct filed *f, *next, **nextp; char *p; char cline[LINE_MAX]; - char prog[LINE_MAX]; + char prog[LINE_MAX]; char host[MAXHOSTNAMELEN]; char oldLocalHostName[MAXHOSTNAMELEN]; char hostMsg[2*MAXHOSTNAMELEN+40]; @@ -1604,6 +1631,10 @@ init(int signo) } f->f_un.f_pipe.f_pid = 0; break; + case F_RING: + (void)munmap(f->f_un.f_ring.f_footer,sizeof(struct clog_footer)); + (void)close(f->f_file); + break; } next = f->f_next; if (f->f_program) free(f->f_program); @@ -1745,6 +1776,10 @@ init(int signo) } break; + case F_RING: + printf("%s", f->f_un.f_ring.f_rname); + break; + case F_PIPE: printf("%s", f->f_un.f_pipe.f_pname); break; @@ -1795,6 +1830,7 @@ cfline(const char *line, struct filed *f, const char *prog, const char *host) const char *p, *q; char *bp; char buf[MAXLINE], ebuf[100]; + struct stat sb; dprintf("cfline(\"%s\", f, \"%s\", \"%s\")\n", line, prog, host); @@ -1968,9 +2004,16 @@ cfline(const char *line, struct filed *f, const char *prog, const char *host) p++; endkey = ']'; } - while (*p && (*p != endkey) && (i-- > 0)) { + while (*p && (*p != endkey) && (*p != '[') && (i-- > 0)) { *tp++ = *p++; } + if (*p == '[') { + p++; + while (*p && (*p != ']') && (i-- > 0)) { + *tp++ = *p++; + } + p++; + } if (endkey == ']' && *p == endkey) p++; *tp = '\0'; @@ -2015,6 +2058,38 @@ cfline(const char *line, struct filed *f, const char *prog, const char *host) } break; + case '%': + if ((f->f_file = open(p+1, O_RDWR, 0 )) < 0) { + f->f_type = F_UNUSED; + logerror(p+1); + break; + } + if (fstat(f->f_file,&sb)<0) { + (void)close(f->f_file); + f->f_type = F_UNUSED; + logerror(p+1); + break; + } + f->f_un.f_ring.f_footer = mmap(NULL,sizeof(struct clog_footer),PROT_READ|PROT_WRITE,MAP_SHARED,f->f_file,sb.st_size-sizeof(struct clog_footer)); + if (f->f_un.f_ring.f_footer==NULL) { + (void)close(f->f_file); + f->f_type = F_UNUSED; + logerror(p+1); + break; + } + if (memcmp(&(f->f_un.f_ring.f_footer->cf_magic),MAGIC_CONST,4)!=0) { + (void)munmap(f->f_un.f_ring.f_footer,sizeof(struct clog_footer)); + (void)close(f->f_file); + f->f_type = F_UNUSED; + errno = ENODEV; + logerror(p+1); + break; + } + f->f_un.f_ring.f_size = sb.st_size; + (void)strcpy(f->f_un.f_ring.f_rname, p + 1); + f->f_type = F_RING; + break; + case '|': f->f_un.f_pipe.f_pid = 0; (void)strlcpy(f->f_un.f_pipe.f_pname, p + 1, @@ -2758,6 +2833,49 @@ socksetup(int af, char *bindhostname) return (socks); } +ssize_t rbwritev(struct filed *f, struct iovec *iov, int iovcnt) { + int i; + ssize_t out = 0; + ssize_t err; + + for(i=0;i<iovcnt;i++) { + err = rbwrite(f,iov[i].iov_base,iov[i].iov_len); + if (err==-1) return -1; + out += err; + } + return out; +} + + +ssize_t rbwrite(struct filed *f, char *buf, size_t nbytes) { + size_t maxwrite = f->f_un.f_ring.f_footer->cf_max - f->f_un.f_ring.f_footer->cf_next; + ssize_t err; + ssize_t out = 0; + + f->f_un.f_ring.f_footer->cf_lock = 1; + while (nbytes>0) { + maxwrite = f->f_un.f_ring.f_footer->cf_max - f->f_un.f_ring.f_footer->cf_next; + if (maxwrite>nbytes) maxwrite = nbytes; + err = pwrite(f->f_file,buf,maxwrite,f->f_un.f_ring.f_footer->cf_next); + if (err==-1) { + f->f_un.f_ring.f_footer->cf_lock = 0; + return -1; + } + nbytes -= err; + out += err; + buf += err; + f->f_un.f_ring.f_footer->cf_next += err; + if (f->f_un.f_ring.f_footer->cf_next==f->f_un.f_ring.f_footer->cf_max) { + f->f_un.f_ring.f_footer->cf_next = 0; + f->f_un.f_ring.f_footer->cf_wrap = 1; + } + + } + + f->f_un.f_ring.f_footer->cf_lock = 0; + return out; +} + static void increase_rcvbuf(int fd) { |