summaryrefslogtreecommitdiffstats
path: root/sys/netinet/ip_input.c
diff options
context:
space:
mode:
authorrwatson <rwatson@FreeBSD.org>2009-07-14 22:48:30 +0000
committerrwatson <rwatson@FreeBSD.org>2009-07-14 22:48:30 +0000
commit57ca4583e728cab422fba8f15de10bd0b637b3dd (patch)
tree13848f891fb2f7a396281b31633563d0f764ff65 /sys/netinet/ip_input.c
parentef443476d9706035ac219f0280ef0b817dda7a6d (diff)
downloadFreeBSD-src-57ca4583e728cab422fba8f15de10bd0b637b3dd.zip
FreeBSD-src-57ca4583e728cab422fba8f15de10bd0b637b3dd.tar.gz
Build on Jeff Roberson's linker-set based dynamic per-CPU allocator
(DPCPU), as suggested by Peter Wemm, and implement a new per-virtual network stack memory allocator. Modify vnet to use the allocator instead of monolithic global container structures (vinet, ...). This change solves many binary compatibility problems associated with VIMAGE, and restores ELF symbols for virtualized global variables. Each virtualized global variable exists as a "reference copy", and also once per virtual network stack. Virtualized global variables are tagged at compile-time, placing the in a special linker set, which is loaded into a contiguous region of kernel memory. Virtualized global variables in the base kernel are linked as normal, but those in modules are copied and relocated to a reserved portion of the kernel's vnet region with the help of a the kernel linker. Virtualized global variables exist in per-vnet memory set up when the network stack instance is created, and are initialized statically from the reference copy. Run-time access occurs via an accessor macro, which converts from the current vnet and requested symbol to a per-vnet address. When "options VIMAGE" is not compiled into the kernel, normal global ELF symbols will be used instead and indirection is avoided. This change restores static initialization for network stack global variables, restores support for non-global symbols and types, eliminates the need for many subsystem constructors, eliminates large per-subsystem structures that caused many binary compatibility issues both for monitoring applications (netstat) and kernel modules, removes the per-function INIT_VNET_*() macros throughout the stack, eliminates the need for vnet_symmap ksym(2) munging, and eliminates duplicate definitions of virtualized globals under VIMAGE_GLOBALS. Bump __FreeBSD_version and update UPDATING. Portions submitted by: bz Reviewed by: bz, zec Discussed with: gnn, jamie, jeff, jhb, julian, sam Suggested by: peter Approved by: re (kensmith)
Diffstat (limited to 'sys/netinet/ip_input.c')
-rw-r--r--sys/netinet/ip_input.c184
1 files changed, 74 insertions, 110 deletions
diff --git a/sys/netinet/ip_input.c b/sys/netinet/ip_input.c
index 73ee286..11ab3c7 100644
--- a/sys/netinet/ip_input.c
+++ b/sys/netinet/ip_input.c
@@ -71,10 +71,10 @@ __FBSDID("$FreeBSD$");
#include <netinet/ip.h>
#include <netinet/in_pcb.h>
#include <netinet/ip_var.h>
+#include <netinet/ip_fw.h>
#include <netinet/ip_icmp.h>
#include <netinet/ip_options.h>
#include <machine/in_cksum.h>
-#include <netinet/vinet.h>
#ifdef DEV_CARP
#include <netinet/ip_carp.h>
#endif
@@ -90,58 +90,69 @@ __FBSDID("$FreeBSD$");
CTASSERT(sizeof(struct ip) == 20);
#endif
-#ifndef VIMAGE
-#ifndef VIMAGE_GLOBALS
-struct vnet_inet vnet_inet_0;
-#endif
-#endif
+static VNET_DEFINE(int, ipsendredirects) = 1; /* XXX */
+static VNET_DEFINE(int, ip_checkinterface);
+static VNET_DEFINE(int, ip_keepfaith);
+static VNET_DEFINE(int, ip_sendsourcequench);
-#ifdef VIMAGE_GLOBALS
-static int ipsendredirects;
-static int ip_checkinterface;
-static int ip_keepfaith;
-static int ip_sendsourcequench;
-int ip_defttl;
-int ip_do_randomid;
-int ipforwarding;
-struct in_ifaddrhead in_ifaddrhead; /* first inet address */
-struct in_ifaddrhashhead *in_ifaddrhashtbl; /* inet addr hash table */
-u_long in_ifaddrhmask; /* mask for hash table */
-struct ipstat ipstat;
-static int ip_rsvp_on;
-struct socket *ip_rsvpd;
-int rsvp_on;
-static struct ipqhead ipq[IPREASS_NHASH];
-static int maxnipq; /* Administrative limit on # reass queues. */
-static int maxfragsperpacket;
-int ipstealth;
-static int nipq; /* Total # of reass queues */
-#endif
+#define V_ipsendredirects VNET_GET(ipsendredirects)
+#define V_ip_checkinterface VNET_GET(ip_checkinterface)
+#define V_ip_keepfaith VNET_GET(ip_keepfaith)
+#define V_ip_sendsourcequench VNET_GET(ip_sendsourcequench)
+
+VNET_DEFINE(int, ip_defttl) = IPDEFTTL;
+VNET_DEFINE(int, ip_do_randomid);
+VNET_DEFINE(int, ipforwarding);
+
+VNET_DEFINE(struct in_ifaddrhead, in_ifaddrhead); /* first inet address */
+VNET_DEFINE(struct in_ifaddrhashhead *, in_ifaddrhashtbl); /* inet addr hash table */
+VNET_DEFINE(u_long, in_ifaddrhmask); /* mask for hash table */
+VNET_DEFINE(struct ipstat, ipstat);
+
+static VNET_DEFINE(int, ip_rsvp_on);
+VNET_DEFINE(struct socket *, ip_rsvpd);
+VNET_DEFINE(int, rsvp_on);
+
+#define V_ip_rsvp_on VNET_GET(ip_rsvp_on)
+
+static VNET_DEFINE(TAILQ_HEAD(ipqhead, ipq), ipq[IPREASS_NHASH]);
+static VNET_DEFINE(int, maxnipq); /* Administrative limit on # reass queues. */
+static VNET_DEFINE(int, maxfragsperpacket);
+static VNET_DEFINE(int, nipq); /* Total # of reass queues */
+
+#define V_ipq VNET_GET(ipq)
+#define V_maxnipq VNET_GET(maxnipq)
+#define V_maxfragsperpacket VNET_GET(maxfragsperpacket)
+#define V_nipq VNET_GET(nipq)
+
+VNET_DEFINE(int, ipstealth);
struct rwlock in_ifaddr_lock;
RW_SYSINIT(in_ifaddr_lock, &in_ifaddr_lock, "in_ifaddr_lock");
-SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_ip, IPCTL_FORWARDING,
- forwarding, CTLFLAG_RW, ipforwarding, 0,
+SYSCTL_VNET_INT(_net_inet_ip, IPCTL_FORWARDING, forwarding, CTLFLAG_RW,
+ &VNET_NAME(ipforwarding), 0,
"Enable IP forwarding between interfaces");
-SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_ip, IPCTL_SENDREDIRECTS,
- redirect, CTLFLAG_RW, ipsendredirects, 0,
+SYSCTL_VNET_INT(_net_inet_ip, IPCTL_SENDREDIRECTS, redirect, CTLFLAG_RW,
+ &VNET_NAME(ipsendredirects), 0,
"Enable sending IP redirects");
-SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_ip, IPCTL_DEFTTL,
- ttl, CTLFLAG_RW, ip_defttl, 0, "Maximum TTL on IP packets");
+SYSCTL_VNET_INT(_net_inet_ip, IPCTL_DEFTTL, ttl, CTLFLAG_RW,
+ &VNET_NAME(ip_defttl), 0,
+ "Maximum TTL on IP packets");
-SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_ip, IPCTL_KEEPFAITH,
- keepfaith, CTLFLAG_RW, ip_keepfaith, 0,
+SYSCTL_VNET_INT(_net_inet_ip, IPCTL_KEEPFAITH, keepfaith, CTLFLAG_RW,
+ &VNET_NAME(ip_keepfaith), 0,
"Enable packet capture for FAITH IPv4->IPv6 translater daemon");
-SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_ip, OID_AUTO,
- sendsourcequench, CTLFLAG_RW, ip_sendsourcequench, 0,
+SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, sendsourcequench, CTLFLAG_RW,
+ &VNET_NAME(ip_sendsourcequench), 0,
"Enable the transmission of source quench packets");
-SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_ip, OID_AUTO, random_id,
- CTLFLAG_RW, ip_do_randomid, 0, "Assign random ip_id values");
+SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, random_id, CTLFLAG_RW,
+ &VNET_NAME(ip_do_randomid), 0,
+ "Assign random ip_id values");
/*
* XXX - Setting ip_checkinterface mostly implements the receive side of
@@ -156,8 +167,8 @@ SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_ip, OID_AUTO, random_id,
* to the loopback interface instead of the interface where the
* packets for those addresses are received.
*/
-SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_ip, OID_AUTO,
- check_interface, CTLFLAG_RW, ip_checkinterface, 0,
+SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, check_interface, CTLFLAG_RW,
+ &VNET_NAME(ip_checkinterface), 0,
"Verify packet arrives on correct interface");
struct pfil_head inet_pfil_hook; /* Packet filter hooks */
@@ -173,13 +184,13 @@ extern struct domain inetdomain;
extern struct protosw inetsw[];
u_char ip_protox[IPPROTO_MAX];
+SYSCTL_VNET_STRUCT(_net_inet_ip, IPCTL_STATS, stats, CTLFLAG_RW,
+ &VNET_NAME(ipstat), ipstat,
+ "IP statistics (struct ipstat, netinet/ip_var.h)");
-SYSCTL_V_STRUCT(V_NET, vnet_inet, _net_inet_ip, IPCTL_STATS, stats, CTLFLAG_RW,
- ipstat, ipstat, "IP statistics (struct ipstat, netinet/ip_var.h)");
+static VNET_DEFINE(uma_zone_t, ipq_zone);
+#define V_ipq_zone VNET_GET(ipq_zone)
-#ifdef VIMAGE_GLOBALS
-static uma_zone_t ipq_zone;
-#endif
static struct mtx ipqlock;
#define IPQ_LOCK() mtx_lock(&ipqlock)
@@ -190,12 +201,12 @@ static struct mtx ipqlock;
static void maxnipq_update(void);
static void ipq_zone_change(void *);
-SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_ip, OID_AUTO, fragpackets,
- CTLFLAG_RD, nipq, 0,
+SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, fragpackets, CTLFLAG_RD,
+ &VNET_NAME(nipq), 0,
"Current number of IPv4 fragment reassembly queue entries");
-SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_ip, OID_AUTO, maxfragsperpacket,
- CTLFLAG_RW, maxfragsperpacket, 0,
+SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, maxfragsperpacket, CTLFLAG_RW,
+ &VNET_NAME(maxfragsperpacket), 0,
"Maximum number of IPv4 fragments allowed per packet");
struct callout ipport_tick_callout;
@@ -206,32 +217,32 @@ SYSCTL_INT(_net_inet_ip, IPCTL_DEFMTU, mtu, CTLFLAG_RW,
#endif
#ifdef IPSTEALTH
-SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_ip, OID_AUTO, stealth, CTLFLAG_RW,
- ipstealth, 0, "IP stealth mode, no TTL decrementation on forwarding");
+SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, stealth, CTLFLAG_RW,
+ &VNET_NAME(ipstealth), 0,
+ "IP stealth mode, no TTL decrementation on forwarding");
#endif
+
#ifdef FLOWTABLE
-#ifdef VIMAGE_GLOBALS
-static int ip_output_flowtable_size;
-struct flowtable *ip_ft;
-#endif
-SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_ip, OID_AUTO, output_flowtable_size,
- CTLFLAG_RDTUN, ip_output_flowtable_size, 2048,
+static VNET_DEFINE(int, ip_output_flowtable_size) = 2048;
+VNET_DEFINE(struct flowtable *, ip_ft);
+#define V_ip_output_flowtable_size VNET_GET(ip_output_flowtable_size)
+
+SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, output_flowtable_size, CTLFLAG_RDTUN,
+ &VNET_NAME(ip_output_flowtable_size), 2048,
"number of entries in the per-cpu output flow caches");
#endif
-#ifdef VIMAGE_GLOBALS
-int fw_one_pass;
-#endif
+VNET_DEFINE(int, fw_one_pass) = 1;
static void ip_freef(struct ipqhead *, struct ipq *);
-#ifndef VIMAGE_GLOBALS
+#ifdef VIMAGE
+/* XXX only has to stay for .vmi_dependson elsewhere. */
static void vnet_inet_register(void);
static const vnet_modinfo_t vnet_inet_modinfo = {
.vmi_id = VNET_MOD_INET,
.vmi_name = "inet",
- .vmi_size = sizeof(struct vnet_inet)
};
static void vnet_inet_register()
@@ -288,43 +299,10 @@ SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQDROPS, intr_queue_drops,
void
ip_init(void)
{
- INIT_VNET_INET(curvnet);
struct protosw *pr;
int i;
- V_ipsendredirects = 1; /* XXX */
- V_ip_checkinterface = 0;
- V_ip_keepfaith = 0;
- V_ip_sendsourcequench = 0;
- V_rsvp_on = 0;
- V_ip_defttl = IPDEFTTL;
- V_ip_do_randomid = 0;
V_ip_id = time_second & 0xffff;
- V_ipforwarding = 0;
- V_ipstealth = 0;
- V_nipq = 0; /* Total # of reass queues */
-
- V_ipport_lowfirstauto = IPPORT_RESERVED - 1; /* 1023 */
- V_ipport_lowlastauto = IPPORT_RESERVEDSTART; /* 600 */
- V_ipport_firstauto = IPPORT_EPHEMERALFIRST; /* 10000 */
- V_ipport_lastauto = IPPORT_EPHEMERALLAST; /* 65535 */
- V_ipport_hifirstauto = IPPORT_HIFIRSTAUTO; /* 49152 */
- V_ipport_hilastauto = IPPORT_HILASTAUTO; /* 65535 */
- V_ipport_reservedhigh = IPPORT_RESERVED - 1; /* 1023 */
- V_ipport_reservedlow = 0;
- V_ipport_randomized = 1; /* user controlled via sysctl */
- V_ipport_randomcps = 10; /* user controlled via sysctl */
- V_ipport_randomtime = 45; /* user controlled via sysctl */
- V_ipport_stoprandom = 0; /* toggled by ipport_tick */
-
- V_fw_one_pass = 1;
-
-#ifdef NOTYET
- /* XXX global static but not instantiated in this file */
- V_ipfastforward_active = 0;
- V_subnetsarelocal = 0;
- V_sameprefixcarponly = 0;
-#endif
TAILQ_INIT(&V_in_ifaddrhead);
V_in_ifaddrhashtbl = hashinit(INADDR_NHASH, M_IFADDR, &V_in_ifaddrhmask);
@@ -339,7 +317,6 @@ ip_init(void)
maxnipq_update();
#ifdef FLOWTABLE
- V_ip_output_flowtable_size = 2048;
TUNABLE_INT_FETCH("net.inet.ip.output_flowtable_size",
&V_ip_output_flowtable_size);
V_ip_ft = flowtable_alloc(V_ip_output_flowtable_size, FL_PCPU);
@@ -403,7 +380,6 @@ ip_fini(void *xtp)
void
ip_input(struct mbuf *m)
{
- INIT_VNET_INET(curvnet);
struct ip *ip = NULL;
struct in_ifaddr *ia = NULL;
struct ifaddr *ifa;
@@ -808,7 +784,6 @@ bad:
static void
maxnipq_update(void)
{
- INIT_VNET_INET(curvnet);
/*
* -1 for unlimited allocation.
@@ -832,7 +807,6 @@ maxnipq_update(void)
static void
ipq_zone_change(void *tag)
{
- INIT_VNET_INET(curvnet);
if (V_maxnipq > 0 && V_maxnipq < (nmbclusters / 32)) {
V_maxnipq = nmbclusters / 32;
@@ -843,7 +817,6 @@ ipq_zone_change(void *tag)
static int
sysctl_maxnipq(SYSCTL_HANDLER_ARGS)
{
- INIT_VNET_INET(curvnet);
int error, i;
i = V_maxnipq;
@@ -879,7 +852,6 @@ SYSCTL_PROC(_net_inet_ip, OID_AUTO, maxfragpackets, CTLTYPE_INT|CTLFLAG_RW,
struct mbuf *
ip_reass(struct mbuf *m)
{
- INIT_VNET_INET(curvnet);
struct ip *ip;
struct mbuf *p, *q, *nq, *t;
struct ipq *fp = NULL;
@@ -1195,7 +1167,6 @@ done:
static void
ip_freef(struct ipqhead *fhp, struct ipq *fp)
{
- INIT_VNET_INET(curvnet);
struct mbuf *q;
IPQ_LOCK_ASSERT();
@@ -1226,7 +1197,6 @@ ip_slowtimo(void)
VNET_LIST_RLOCK();
VNET_FOREACH(vnet_iter) {
CURVNET_SET(vnet_iter);
- INIT_VNET_INET(vnet_iter);
for (i = 0; i < IPREASS_NHASH; i++) {
for(fp = TAILQ_FIRST(&V_ipq[i]); fp;) {
struct ipq *fpp;
@@ -1275,7 +1245,6 @@ ip_drain(void)
VNET_LIST_RLOCK();
VNET_FOREACH(vnet_iter) {
CURVNET_SET(vnet_iter);
- INIT_VNET_INET(vnet_iter);
for (i = 0; i < IPREASS_NHASH; i++) {
while(!TAILQ_EMPTY(&V_ipq[i])) {
IPSTAT_ADD(ips_fragdropped,
@@ -1403,7 +1372,6 @@ u_char inetctlerrmap[PRC_NCMDS] = {
void
ip_forward(struct mbuf *m, int srcrt)
{
- INIT_VNET_INET(curvnet);
struct ip *ip = mtod(m, struct ip *);
struct in_ifaddr *ia;
struct mbuf *mcopy;
@@ -1638,7 +1606,6 @@ void
ip_savecontrol(struct inpcb *inp, struct mbuf **mp, struct ip *ip,
struct mbuf *m)
{
- INIT_VNET_NET(inp->inp_vnet);
if (inp->inp_socket->so_options & (SO_BINTIME | SO_TIMESTAMP)) {
struct bintime bt;
@@ -1736,7 +1703,6 @@ makedummy:
int
ip_rsvp_init(struct socket *so)
{
- INIT_VNET_INET(so->so_vnet);
if (so->so_type != SOCK_RAW ||
so->so_proto->pr_protocol != IPPROTO_RSVP)
@@ -1761,7 +1727,6 @@ ip_rsvp_init(struct socket *so)
int
ip_rsvp_done(void)
{
- INIT_VNET_INET(curvnet);
V_ip_rsvpd = NULL;
/*
@@ -1778,7 +1743,6 @@ ip_rsvp_done(void)
void
rsvp_input(struct mbuf *m, int off) /* XXX must fixup manually */
{
- INIT_VNET_INET(curvnet);
if (rsvp_input_p) { /* call the real one if loaded */
rsvp_input_p(m, off);
OpenPOWER on IntegriCloud