From 7947b20ebae785ba25154aa1a9a00a98a22de75a Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 4 Jun 2007 21:17:10 -0700 Subject: [BNX2]: Fix netdev watchdog on 5708. There's a bug in the driver that only initializes half of the context memory on the 5708. Surprisingly, this works most of the time except for some occasional netdev watchdogs when sending a lot of 64-byte packets. The fix is to add the missing code to initialize the 2nd halves of all context memory. Signed-off-by: Michael Chan Acked-by: Jeff Garzik Signed-off-by: David S. Miller --- drivers/net/bnx2.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c index da7c3b0..9789f05 100644 --- a/drivers/net/bnx2.c +++ b/drivers/net/bnx2.c @@ -1811,6 +1811,7 @@ bnx2_init_context(struct bnx2 *bp) vcid = 96; while (vcid) { u32 vcid_addr, pcid_addr, offset; + int i; vcid--; @@ -1831,16 +1832,20 @@ bnx2_init_context(struct bnx2 *bp) pcid_addr = vcid_addr; } - REG_WR(bp, BNX2_CTX_VIRT_ADDR, 0x00); - REG_WR(bp, BNX2_CTX_PAGE_TBL, pcid_addr); + for (i = 0; i < (CTX_SIZE / PHY_CTX_SIZE); i++) { + vcid_addr += (i << PHY_CTX_SHIFT); + pcid_addr += (i << PHY_CTX_SHIFT); - /* Zero out the context. */ - for (offset = 0; offset < PHY_CTX_SIZE; offset += 4) { - CTX_WR(bp, 0x00, offset, 0); - } + REG_WR(bp, BNX2_CTX_VIRT_ADDR, 0x00); + REG_WR(bp, BNX2_CTX_PAGE_TBL, pcid_addr); - REG_WR(bp, BNX2_CTX_VIRT_ADDR, vcid_addr); - REG_WR(bp, BNX2_CTX_PAGE_TBL, pcid_addr); + /* Zero out the context. */ + for (offset = 0; offset < PHY_CTX_SIZE; offset += 4) + CTX_WR(bp, 0x00, offset, 0); + + REG_WR(bp, BNX2_CTX_VIRT_ADDR, vcid_addr); + REG_WR(bp, BNX2_CTX_PAGE_TBL, pcid_addr); + } } } -- cgit v1.1 From 641bdcd56c8bb2110a31da846b2752b11a644050 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 4 Jun 2007 21:22:24 -0700 Subject: [BNX2]: Add missing wait in bnx2_init_5709_context(). For correctness, we need to wait for the MEM_INIT bit to be cleared in the BNX2_CTX_COMMAND register before proceeding. [Added return -EBUSY when the MEM_INIT bit doesn't clear, suggested by Jeff Garzik.] Signed-off-by: Michael Chan Acked-by: Jeff Garzik Signed-off-by: David S. Miller --- drivers/net/bnx2.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c index 9789f05..9eba7a2 100644 --- a/drivers/net/bnx2.c +++ b/drivers/net/bnx2.c @@ -1778,6 +1778,15 @@ bnx2_init_5709_context(struct bnx2 *bp) val = BNX2_CTX_COMMAND_ENABLED | BNX2_CTX_COMMAND_MEM_INIT | (1 << 12); val |= (BCM_PAGE_BITS - 8) << 16; REG_WR(bp, BNX2_CTX_COMMAND, val); + for (i = 0; i < 10; i++) { + val = REG_RD(bp, BNX2_CTX_COMMAND); + if (!(val & BNX2_CTX_COMMAND_MEM_INIT)) + break; + udelay(2); + } + if (val & BNX2_CTX_COMMAND_MEM_INIT) + return -EBUSY; + for (i = 0; i < bp->ctx_pages; i++) { int j; @@ -3696,9 +3705,11 @@ bnx2_init_chip(struct bnx2 *bp) /* Initialize context mapping and zero out the quick contexts. The * context block must have already been enabled. */ - if (CHIP_NUM(bp) == CHIP_NUM_5709) - bnx2_init_5709_context(bp); - else + if (CHIP_NUM(bp) == CHIP_NUM_5709) { + rc = bnx2_init_5709_context(bp); + if (rc) + return rc; + } else bnx2_init_context(bp); if ((rc = bnx2_init_cpus(bp)) != 0) -- cgit v1.1 From 0aa38df7cd5b6c5b89f5146f4a2286434bc4a8f3 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 4 Jun 2007 21:23:06 -0700 Subject: [BNX2]: Enable DMA on 5709. Add missing code to enable DMA on 5709 A1. The bit is a no-op on A0 and therefore can be set on all 5709 chips. Signed-off-by: Michael Chan Acked-by: Jeff Garzik Signed-off-by: David S. Miller --- drivers/net/bnx2.c | 5 +++++ drivers/net/bnx2.h | 1 + 2 files changed, 6 insertions(+) diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c index 9eba7a2..3b7ca2a 100644 --- a/drivers/net/bnx2.c +++ b/drivers/net/bnx2.c @@ -3815,6 +3815,11 @@ bnx2_init_chip(struct bnx2 *bp) /* Initialize the receive filter. */ bnx2_set_rx_mode(bp->dev); + if (CHIP_NUM(bp) == CHIP_NUM_5709) { + val = REG_RD(bp, BNX2_MISC_NEW_CORE_CTL); + val |= BNX2_MISC_NEW_CORE_CTL_DMA_ENABLE; + REG_WR(bp, BNX2_MISC_NEW_CORE_CTL, val); + } rc = bnx2_fw_sync(bp, BNX2_DRV_MSG_DATA_WAIT2 | BNX2_DRV_MSG_CODE_RESET, 0); diff --git a/drivers/net/bnx2.h b/drivers/net/bnx2.h index bd6288d..49a5de2 100644 --- a/drivers/net/bnx2.h +++ b/drivers/net/bnx2.h @@ -1373,6 +1373,7 @@ struct l2_fhdr { #define BNX2_MISC_NEW_CORE_CTL 0x000008c8 #define BNX2_MISC_NEW_CORE_CTL_LINK_HOLDOFF_SUCCESS (1L<<0) #define BNX2_MISC_NEW_CORE_CTL_LINK_HOLDOFF_REQ (1L<<1) +#define BNX2_MISC_NEW_CORE_CTL_DMA_ENABLE (1L<<16) #define BNX2_MISC_NEW_CORE_CTL_RESERVED_CMN (0x3fffL<<2) #define BNX2_MISC_NEW_CORE_CTL_RESERVED_TC (0xffffL<<16) -- cgit v1.1 From 02537b0676930b1bd9aff2139e0e645c79986931 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 4 Jun 2007 21:24:07 -0700 Subject: [BNX2]: Fix occasional counter corruption on 5708. The statistics block DMA on 5708 can be messed up occasionally on the average of about once per hour. If the user is reading the counters within one second after the corruption, the counters will be all messed up. One second later, the counters will be ok again until the next corruption occurs. The workaround is to disable the periodic statistics DMA. Instead, we manually trigger the DMA once a second in bnx2_timer(). This manual trigger of the DMA avoids the problem. As a consequence, we can only allow 0 or 1 second settings for ethtool -C statistics block. Thanks to Jean-Daniel Pauget and CaT for reporting this rare problem. Signed-off-by: Michael Chan Acked-by: Jeff Garzik Signed-off-by: David S. Miller --- drivers/net/bnx2.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c index 3b7ca2a..5046b0f 100644 --- a/drivers/net/bnx2.c +++ b/drivers/net/bnx2.c @@ -3788,7 +3788,10 @@ bnx2_init_chip(struct bnx2 *bp) REG_WR(bp, BNX2_HC_CMD_TICKS, (bp->cmd_ticks_int << 16) | bp->cmd_ticks); - REG_WR(bp, BNX2_HC_STATS_TICKS, bp->stats_ticks & 0xffff00); + if (CHIP_NUM(bp) == CHIP_NUM_5708) + REG_WR(bp, BNX2_HC_STATS_TICKS, 0); + else + REG_WR(bp, BNX2_HC_STATS_TICKS, bp->stats_ticks & 0xffff00); REG_WR(bp, BNX2_HC_STAT_COLLECT_TICKS, 0xbb8); /* 3ms */ if (CHIP_ID(bp) == CHIP_ID_5706_A1) @@ -4641,6 +4644,11 @@ bnx2_timer(unsigned long data) bp->stats_blk->stat_FwRxDrop = REG_RD_IND(bp, BNX2_FW_RX_DROP_COUNT); + /* workaround occasional corrupted counters */ + if (CHIP_NUM(bp) == CHIP_NUM_5708 && bp->stats_ticks) + REG_WR(bp, BNX2_HC_COMMAND, bp->hc_cmd | + BNX2_HC_COMMAND_STATS_NOW); + if (bp->phy_flags & PHY_SERDES_FLAG) { if (CHIP_NUM(bp) == CHIP_NUM_5706) bnx2_5706_serdes_timer(bp); @@ -5438,6 +5446,10 @@ bnx2_set_coalesce(struct net_device *dev, struct ethtool_coalesce *coal) 0xff; bp->stats_ticks = coal->stats_block_coalesce_usecs; + if (CHIP_NUM(bp) == CHIP_NUM_5708) { + if (bp->stats_ticks != 0 && bp->stats_ticks != USEC_PER_SEC) + bp->stats_ticks = USEC_PER_SEC; + } if (bp->stats_ticks > 0xffff00) bp->stats_ticks = 0xffff00; bp->stats_ticks &= 0xffff00; -- cgit v1.1 From b91b9fd11210a7023f37eaee1e977ad9ce532095 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 4 Jun 2007 21:24:42 -0700 Subject: [BNX2]: Update version and reldate. Update to version 1.5.11. Signed-off-by: Michael Chan Acked-by: Jeff Garzik Signed-off-by: David S. Miller --- drivers/net/bnx2.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c index 5046b0f..ce3ed67 100644 --- a/drivers/net/bnx2.c +++ b/drivers/net/bnx2.c @@ -54,8 +54,8 @@ #define DRV_MODULE_NAME "bnx2" #define PFX DRV_MODULE_NAME ": " -#define DRV_MODULE_VERSION "1.5.10" -#define DRV_MODULE_RELDATE "May 1, 2007" +#define DRV_MODULE_VERSION "1.5.11" +#define DRV_MODULE_RELDATE "June 4, 2007" #define RUN_AT(x) (jiffies + (x)) -- cgit v1.1 From f0e48dbfc5c74e967fea4c0fd0c5ad07557ae0c8 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 4 Jun 2007 21:32:46 -0700 Subject: [TCP]: Honour sk_bound_dev_if in tcp_v4_send_ack A time_wait socket inherits sk_bound_dev_if from the original socket, but it is not used when sending ACK packets using ip_send_reply. Fix by passing the oif to ip_send_reply in struct ip_reply_arg and use it for output routing. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/ip.h | 1 + net/ipv4/ip_output.c | 4 +++- net/ipv4/tcp_ipv4.c | 2 ++ 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/include/net/ip.h b/include/net/ip.h index bb207db..abf2820 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -143,6 +143,7 @@ struct ip_reply_arg { __wsum csum; int csumoffset; /* u16 offset of csum in iov[0].iov_base */ /* -1 if not needed */ + int bound_dev_if; }; void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *arg, diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index d6427d9..34ea454 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1352,7 +1352,8 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar } { - struct flowi fl = { .nl_u = { .ip4_u = + struct flowi fl = { .oif = arg->bound_dev_if, + .nl_u = { .ip4_u = { .daddr = daddr, .saddr = rt->rt_spec_dst, .tos = RT_TOS(ip_hdr(skb)->tos) } }, @@ -1376,6 +1377,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar inet->tos = ip_hdr(skb)->tos; sk->sk_priority = skb->priority; sk->sk_protocol = ip_hdr(skb)->protocol; + sk->sk_bound_dev_if = arg->bound_dev_if; ip_append_data(sk, ip_reply_glue_bits, arg->iov->iov_base, len, 0, &ipc, rt, MSG_DONTWAIT); if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) { diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 47c6105..97e294e 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -705,6 +705,8 @@ static void tcp_v4_send_ack(struct tcp_timewait_sock *twsk, ip_hdr(skb)->saddr, /* XXX */ arg.iov[0].iov_len, IPPROTO_TCP, 0); arg.csumoffset = offsetof(struct tcphdr, check) / 2; + if (twsk) + arg.bound_dev_if = twsk->tw_sk.tw_bound_dev_if; ip_send_reply(tcp_socket->sk, skb, &arg, arg.iov[0].iov_len); -- cgit v1.1 From 8d76527e728d00d1cf9d5dd663caffb2dcf05ae6 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 4 Jun 2007 23:34:08 -0700 Subject: [IPV4]: Only panic if inetdev_init fails for loopback When I made the inetdev_init call work on all devices I incorrectly left in the panic call as well. It is obviously undesirable to panic on an allocation failure for a normal network device. This patch moves the panic call under the loopback if clause. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/devinet.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 7f95e6e..88a22d2 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1057,9 +1057,10 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, if (!in_dev) { if (event == NETDEV_REGISTER) { in_dev = inetdev_init(dev); - if (!in_dev) - panic("devinet: Failed to create loopback\n"); if (dev == &loopback_dev) { + if (!in_dev) + panic("devinet: " + "Failed to create loopback\n"); in_dev->cnf.no_xfrm = 1; in_dev->cnf.no_policy = 1; } -- cgit v1.1 From 42f811b8bcdf6695bf74de580b1daf53445e8949 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 4 Jun 2007 23:34:44 -0700 Subject: [IPV4]: Convert IPv4 devconf to an array This patch converts the ipv4_devconf config members (everything except sysctl) to an array. This allows easier manipulation which will be needed later on to provide better management of default config values. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- drivers/infiniband/hw/amso1100/c2.c | 2 +- include/linux/inetdevice.h | 94 +++++++------ net/ipv4/arp.c | 11 +- net/ipv4/devinet.c | 264 ++++++++++-------------------------- net/ipv4/igmp.c | 18 +-- net/ipv4/ipmr.c | 12 +- net/ipv4/proc.c | 2 +- net/ipv4/route.c | 14 +- net/ipv4/sysctl_net_ipv4.c | 6 +- 9 files changed, 163 insertions(+), 260 deletions(-) diff --git a/drivers/infiniband/hw/amso1100/c2.c b/drivers/infiniband/hw/amso1100/c2.c index 58bc272..0aecea6 100644 --- a/drivers/infiniband/hw/amso1100/c2.c +++ b/drivers/infiniband/hw/amso1100/c2.c @@ -672,7 +672,7 @@ static int c2_up(struct net_device *netdev) * rdma interface. */ in_dev = in_dev_get(netdev); - in_dev->cnf.arp_ignore = 1; + IN_DEV_CONF_SET(in_dev, ARP_IGNORE, 1); in_dev_put(in_dev); return 0; diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index c0f7aec..1ef174d 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -10,28 +10,8 @@ struct ipv4_devconf { - int accept_redirects; - int send_redirects; - int secure_redirects; - int shared_media; - int accept_source_route; - int rp_filter; - int proxy_arp; - int bootp_relay; - int log_martians; - int forwarding; - int mc_forwarding; - int tag; - int arp_filter; - int arp_announce; - int arp_ignore; - int arp_accept; - int medium_id; - int no_xfrm; - int no_policy; - int force_igmp_version; - int promote_secondaries; void *sysctl; + int data[__NET_IPV4_CONF_MAX - 1]; }; extern struct ipv4_devconf ipv4_devconf; @@ -60,30 +40,64 @@ struct in_device struct rcu_head rcu_head; }; -#define IN_DEV_FORWARD(in_dev) ((in_dev)->cnf.forwarding) -#define IN_DEV_MFORWARD(in_dev) (ipv4_devconf.mc_forwarding && (in_dev)->cnf.mc_forwarding) -#define IN_DEV_RPFILTER(in_dev) (ipv4_devconf.rp_filter && (in_dev)->cnf.rp_filter) -#define IN_DEV_SOURCE_ROUTE(in_dev) (ipv4_devconf.accept_source_route && (in_dev)->cnf.accept_source_route) -#define IN_DEV_BOOTP_RELAY(in_dev) (ipv4_devconf.bootp_relay && (in_dev)->cnf.bootp_relay) - -#define IN_DEV_LOG_MARTIANS(in_dev) (ipv4_devconf.log_martians || (in_dev)->cnf.log_martians) -#define IN_DEV_PROXY_ARP(in_dev) (ipv4_devconf.proxy_arp || (in_dev)->cnf.proxy_arp) -#define IN_DEV_SHARED_MEDIA(in_dev) (ipv4_devconf.shared_media || (in_dev)->cnf.shared_media) -#define IN_DEV_TX_REDIRECTS(in_dev) (ipv4_devconf.send_redirects || (in_dev)->cnf.send_redirects) -#define IN_DEV_SEC_REDIRECTS(in_dev) (ipv4_devconf.secure_redirects || (in_dev)->cnf.secure_redirects) -#define IN_DEV_IDTAG(in_dev) ((in_dev)->cnf.tag) -#define IN_DEV_MEDIUM_ID(in_dev) ((in_dev)->cnf.medium_id) -#define IN_DEV_PROMOTE_SECONDARIES(in_dev) (ipv4_devconf.promote_secondaries || (in_dev)->cnf.promote_secondaries) +#define IPV4_DEVCONF(cnf, attr) ((cnf).data[NET_IPV4_CONF_ ## attr - 1]) +#define IPV4_DEVCONF_ALL(attr) IPV4_DEVCONF(ipv4_devconf, attr) + +static inline int ipv4_devconf_get(struct in_device *in_dev, int index) +{ + index--; + return in_dev->cnf.data[index]; +} + +static inline void ipv4_devconf_set(struct in_device *in_dev, int index, + int val) +{ + index--; + in_dev->cnf.data[index] = val; +} + +#define IN_DEV_CONF_GET(in_dev, attr) \ + ipv4_devconf_get((in_dev), NET_IPV4_CONF_ ## attr) +#define IN_DEV_CONF_SET(in_dev, attr, val) \ + ipv4_devconf_set((in_dev), NET_IPV4_CONF_ ## attr, (val)) + +#define IN_DEV_ANDCONF(in_dev, attr) \ + (IPV4_DEVCONF_ALL(attr) && IN_DEV_CONF_GET((in_dev), attr)) +#define IN_DEV_ORCONF(in_dev, attr) \ + (IPV4_DEVCONF_ALL(attr) || IN_DEV_CONF_GET((in_dev), attr)) +#define IN_DEV_MAXCONF(in_dev, attr) \ + (max(IPV4_DEVCONF_ALL(attr), IN_DEV_CONF_GET((in_dev), attr))) + +#define IN_DEV_FORWARD(in_dev) IN_DEV_CONF_GET((in_dev), FORWARDING) +#define IN_DEV_MFORWARD(in_dev) (IPV4_DEVCONF_ALL(MC_FORWARDING) && \ + IPV4_DEVCONF((in_dev)->cnf, \ + MC_FORWARDING)) +#define IN_DEV_RPFILTER(in_dev) IN_DEV_ANDCONF((in_dev), RP_FILTER) +#define IN_DEV_SOURCE_ROUTE(in_dev) IN_DEV_ANDCONF((in_dev), \ + ACCEPT_SOURCE_ROUTE) +#define IN_DEV_BOOTP_RELAY(in_dev) IN_DEV_ANDCONF((in_dev), BOOTP_RELAY) + +#define IN_DEV_LOG_MARTIANS(in_dev) IN_DEV_ORCONF((in_dev), LOG_MARTIANS) +#define IN_DEV_PROXY_ARP(in_dev) IN_DEV_ORCONF((in_dev), PROXY_ARP) +#define IN_DEV_SHARED_MEDIA(in_dev) IN_DEV_ORCONF((in_dev), SHARED_MEDIA) +#define IN_DEV_TX_REDIRECTS(in_dev) IN_DEV_ORCONF((in_dev), SEND_REDIRECTS) +#define IN_DEV_SEC_REDIRECTS(in_dev) IN_DEV_ORCONF((in_dev), \ + SECURE_REDIRECTS) +#define IN_DEV_IDTAG(in_dev) IN_DEV_CONF_GET(in_dev, TAG) +#define IN_DEV_MEDIUM_ID(in_dev) IN_DEV_CONF_GET(in_dev, MEDIUM_ID) +#define IN_DEV_PROMOTE_SECONDARIES(in_dev) \ + IN_DEV_ORCONF((in_dev), \ + PROMOTE_SECONDARIES) #define IN_DEV_RX_REDIRECTS(in_dev) \ ((IN_DEV_FORWARD(in_dev) && \ - (ipv4_devconf.accept_redirects && (in_dev)->cnf.accept_redirects)) \ + IN_DEV_ANDCONF((in_dev), ACCEPT_REDIRECTS)) \ || (!IN_DEV_FORWARD(in_dev) && \ - (ipv4_devconf.accept_redirects || (in_dev)->cnf.accept_redirects))) + IN_DEV_ORCONF((in_dev), ACCEPT_REDIRECTS))) -#define IN_DEV_ARPFILTER(in_dev) (ipv4_devconf.arp_filter || (in_dev)->cnf.arp_filter) -#define IN_DEV_ARP_ANNOUNCE(in_dev) (max(ipv4_devconf.arp_announce, (in_dev)->cnf.arp_announce)) -#define IN_DEV_ARP_IGNORE(in_dev) (max(ipv4_devconf.arp_ignore, (in_dev)->cnf.arp_ignore)) +#define IN_DEV_ARPFILTER(in_dev) IN_DEV_ORCONF((in_dev), ARPFILTER) +#define IN_DEV_ARP_ANNOUNCE(in_dev) IN_DEV_MAXCONF((in_dev), ARP_ANNOUNCE) +#define IN_DEV_ARP_IGNORE(in_dev) IN_DEV_MAXCONF((in_dev), ARP_IGNORE) struct in_ifaddr { diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 7110779..e00767e 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -877,7 +877,7 @@ static int arp_process(struct sk_buff *skb) n = __neigh_lookup(&arp_tbl, &sip, dev, 0); - if (ipv4_devconf.arp_accept) { + if (IPV4_DEVCONF_ALL(ARP_ACCEPT)) { /* Unsolicited ARP is not accepted by default. It is possible, that this option should be enabled for some devices (strip is candidate) @@ -987,11 +987,11 @@ static int arp_req_set(struct arpreq *r, struct net_device * dev) return 0; } if (dev == NULL) { - ipv4_devconf.proxy_arp = 1; + IPV4_DEVCONF_ALL(PROXY_ARP) = 1; return 0; } if (__in_dev_get_rtnl(dev)) { - __in_dev_get_rtnl(dev)->cnf.proxy_arp = 1; + IN_DEV_CONF_SET(__in_dev_get_rtnl(dev), PROXY_ARP, 1); return 0; } return -ENXIO; @@ -1093,11 +1093,12 @@ static int arp_req_delete(struct arpreq *r, struct net_device * dev) return pneigh_delete(&arp_tbl, &ip, dev); if (mask == 0) { if (dev == NULL) { - ipv4_devconf.proxy_arp = 0; + IPV4_DEVCONF_ALL(PROXY_ARP) = 0; return 0; } if (__in_dev_get_rtnl(dev)) { - __in_dev_get_rtnl(dev)->cnf.proxy_arp = 0; + IN_DEV_CONF_SET(__in_dev_get_rtnl(dev), + PROXY_ARP, 0); return 0; } return -ENXIO; diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 88a22d2..0094066 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -64,20 +64,26 @@ #include struct ipv4_devconf ipv4_devconf = { - .accept_redirects = 1, - .send_redirects = 1, - .secure_redirects = 1, - .shared_media = 1, + .data = { + [NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1, + [NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1, + [NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1, + [NET_IPV4_CONF_SHARED_MEDIA - 1] = 1, + }, }; static struct ipv4_devconf ipv4_devconf_dflt = { - .accept_redirects = 1, - .send_redirects = 1, - .secure_redirects = 1, - .shared_media = 1, - .accept_source_route = 1, + .data = { + [NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1, + [NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1, + [NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1, + [NET_IPV4_CONF_SHARED_MEDIA - 1] = 1, + [NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE - 1] = 1, + }, }; +#define IPV4_DEVCONF_DFLT(attr) IPV4_DEVCONF(ipv4_devconf_dflt, attr) + static struct nla_policy ifa_ipv4_policy[IFA_MAX+1] __read_mostly = { [IFA_LOCAL] = { .type = NLA_U32 }, [IFA_ADDRESS] = { .type = NLA_U32 }, @@ -1061,8 +1067,8 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, if (!in_dev) panic("devinet: " "Failed to create loopback\n"); - in_dev->cnf.no_xfrm = 1; - in_dev->cnf.no_policy = 1; + IN_DEV_CONF_SET(in_dev, NOXFRM, 1); + IN_DEV_CONF_SET(in_dev, NOPOLICY, 1); } } goto out; @@ -1241,10 +1247,10 @@ errout: void inet_forward_change(void) { struct net_device *dev; - int on = ipv4_devconf.forwarding; + int on = IPV4_DEVCONF_ALL(FORWARDING); - ipv4_devconf.accept_redirects = !on; - ipv4_devconf_dflt.forwarding = on; + IPV4_DEVCONF_ALL(ACCEPT_REDIRECTS) = !on; + IPV4_DEVCONF_DFLT(FORWARDING) = on; read_lock(&dev_base_lock); for_each_netdev(dev) { @@ -1252,7 +1258,7 @@ void inet_forward_change(void) rcu_read_lock(); in_dev = __in_dev_get_rcu(dev); if (in_dev) - in_dev->cnf.forwarding = on; + IN_DEV_CONF_SET(in_dev, FORWARDING, on); rcu_read_unlock(); } read_unlock(&dev_base_lock); @@ -1269,9 +1275,9 @@ static int devinet_sysctl_forward(ctl_table *ctl, int write, int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); if (write && *valp != val) { - if (valp == &ipv4_devconf.forwarding) + if (valp == &IPV4_DEVCONF_ALL(FORWARDING)) inet_forward_change(); - else if (valp != &ipv4_devconf_dflt.forwarding) + else if (valp != &IPV4_DEVCONF_DFLT(FORWARDING)) rt_cache_flush(0); } @@ -1333,6 +1339,31 @@ int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen, } +#define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc, sysctl) \ + { \ + .ctl_name = NET_IPV4_CONF_ ## attr, \ + .procname = name, \ + .data = ipv4_devconf.data + \ + NET_IPV4_CONF_ ## attr - 1, \ + .maxlen = sizeof(int), \ + .mode = mval, \ + .proc_handler = proc, \ + .strategy = sysctl, \ + } + +#define DEVINET_SYSCTL_RW_ENTRY(attr, name) \ + DEVINET_SYSCTL_ENTRY(attr, name, 0644, &proc_dointvec, NULL) + +#define DEVINET_SYSCTL_RO_ENTRY(attr, name) \ + DEVINET_SYSCTL_ENTRY(attr, name, 0444, &proc_dointvec, NULL) + +#define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc, sysctl) \ + DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc, sysctl) + +#define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \ + DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush, \ + ipv4_doint_and_flush_strategy) + static struct devinet_sysctl_table { struct ctl_table_header *sysctl_header; ctl_table devinet_vars[__NET_IPV4_CONF_MAX]; @@ -1342,178 +1373,33 @@ static struct devinet_sysctl_table { ctl_table devinet_root_dir[2]; } devinet_sysctl = { .devinet_vars = { - { - .ctl_name = NET_IPV4_CONF_FORWARDING, - .procname = "forwarding", - .data = &ipv4_devconf.forwarding, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &devinet_sysctl_forward, - }, - { - .ctl_name = NET_IPV4_CONF_MC_FORWARDING, - .procname = "mc_forwarding", - .data = &ipv4_devconf.mc_forwarding, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = &proc_dointvec, - }, - { - .ctl_name = NET_IPV4_CONF_ACCEPT_REDIRECTS, - .procname = "accept_redirects", - .data = &ipv4_devconf.accept_redirects, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, - { - .ctl_name = NET_IPV4_CONF_SECURE_REDIRECTS, - .procname = "secure_redirects", - .data = &ipv4_devconf.secure_redirects, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, - { - .ctl_name = NET_IPV4_CONF_SHARED_MEDIA, - .procname = "shared_media", - .data = &ipv4_devconf.shared_media, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, - { - .ctl_name = NET_IPV4_CONF_RP_FILTER, - .procname = "rp_filter", - .data = &ipv4_devconf.rp_filter, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, - { - .ctl_name = NET_IPV4_CONF_SEND_REDIRECTS, - .procname = "send_redirects", - .data = &ipv4_devconf.send_redirects, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, - { - .ctl_name = NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE, - .procname = "accept_source_route", - .data = &ipv4_devconf.accept_source_route, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, - { - .ctl_name = NET_IPV4_CONF_PROXY_ARP, - .procname = "proxy_arp", - .data = &ipv4_devconf.proxy_arp, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, - { - .ctl_name = NET_IPV4_CONF_MEDIUM_ID, - .procname = "medium_id", - .data = &ipv4_devconf.medium_id, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, - { - .ctl_name = NET_IPV4_CONF_BOOTP_RELAY, - .procname = "bootp_relay", - .data = &ipv4_devconf.bootp_relay, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, - { - .ctl_name = NET_IPV4_CONF_LOG_MARTIANS, - .procname = "log_martians", - .data = &ipv4_devconf.log_martians, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, - { - .ctl_name = NET_IPV4_CONF_TAG, - .procname = "tag", - .data = &ipv4_devconf.tag, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, - { - .ctl_name = NET_IPV4_CONF_ARPFILTER, - .procname = "arp_filter", - .data = &ipv4_devconf.arp_filter, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, - { - .ctl_name = NET_IPV4_CONF_ARP_ANNOUNCE, - .procname = "arp_announce", - .data = &ipv4_devconf.arp_announce, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, - { - .ctl_name = NET_IPV4_CONF_ARP_IGNORE, - .procname = "arp_ignore", - .data = &ipv4_devconf.arp_ignore, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, - { - .ctl_name = NET_IPV4_CONF_ARP_ACCEPT, - .procname = "arp_accept", - .data = &ipv4_devconf.arp_accept, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, - { - .ctl_name = NET_IPV4_CONF_NOXFRM, - .procname = "disable_xfrm", - .data = &ipv4_devconf.no_xfrm, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &ipv4_doint_and_flush, - .strategy = &ipv4_doint_and_flush_strategy, - }, - { - .ctl_name = NET_IPV4_CONF_NOPOLICY, - .procname = "disable_policy", - .data = &ipv4_devconf.no_policy, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &ipv4_doint_and_flush, - .strategy = &ipv4_doint_and_flush_strategy, - }, - { - .ctl_name = NET_IPV4_CONF_FORCE_IGMP_VERSION, - .procname = "force_igmp_version", - .data = &ipv4_devconf.force_igmp_version, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &ipv4_doint_and_flush, - .strategy = &ipv4_doint_and_flush_strategy, - }, - { - .ctl_name = NET_IPV4_CONF_PROMOTE_SECONDARIES, - .procname = "promote_secondaries", - .data = &ipv4_devconf.promote_secondaries, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &ipv4_doint_and_flush, - .strategy = &ipv4_doint_and_flush_strategy, - }, + DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding", + devinet_sysctl_forward, NULL), + DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"), + + DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"), + DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"), + DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"), + DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"), + DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"), + DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE, + "accept_source_route"), + DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"), + DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"), + DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"), + DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"), + DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"), + DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"), + DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"), + DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"), + DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"), + + DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"), + DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"), + DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION, + "force_igmp_version"), + DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES, + "promote_secondaries"), }, .devinet_dev = { { diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index f4dd474..a646409 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -128,14 +128,16 @@ * contradict to specs provided this delay is small enough. */ -#define IGMP_V1_SEEN(in_dev) (ipv4_devconf.force_igmp_version == 1 || \ - (in_dev)->cnf.force_igmp_version == 1 || \ - ((in_dev)->mr_v1_seen && \ - time_before(jiffies, (in_dev)->mr_v1_seen))) -#define IGMP_V2_SEEN(in_dev) (ipv4_devconf.force_igmp_version == 2 || \ - (in_dev)->cnf.force_igmp_version == 2 || \ - ((in_dev)->mr_v2_seen && \ - time_before(jiffies, (in_dev)->mr_v2_seen))) +#define IGMP_V1_SEEN(in_dev) \ + (IPV4_DEVCONF_ALL(FORCE_IGMP_VERSION) == 1 || \ + IN_DEV_CONF_GET((in_dev), FORCE_IGMP_VERSION) == 1 || \ + ((in_dev)->mr_v1_seen && \ + time_before(jiffies, (in_dev)->mr_v1_seen))) +#define IGMP_V2_SEEN(in_dev) \ + (IPV4_DEVCONF_ALL(FORCE_IGMP_VERSION) == 2 || \ + IN_DEV_CONF_GET((in_dev), FORCE_IGMP_VERSION) == 2 || \ + ((in_dev)->mr_v2_seen && \ + time_before(jiffies, (in_dev)->mr_v2_seen))) static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im); static void igmpv3_del_delrec(struct in_device *in_dev, __be32 multiaddr); diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 0ebae41..d570d3a 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -154,7 +154,7 @@ struct net_device *ipmr_new_tunnel(struct vifctl *v) in_dev = __in_dev_get_rtnl(dev); if (in_dev == NULL && (in_dev = inetdev_init(dev)) == NULL) goto failure; - in_dev->cnf.rp_filter = 0; + IN_DEV_CONF_SET(in_dev, RP_FILTER, 0); if (dev_open(dev)) goto failure; @@ -221,7 +221,7 @@ static struct net_device *ipmr_reg_vif(void) if ((in_dev = inetdev_init(dev)) == NULL) goto failure; - in_dev->cnf.rp_filter = 0; + IN_DEV_CONF_SET(in_dev, RP_FILTER, 0); if (dev_open(dev)) goto failure; @@ -281,7 +281,7 @@ static int vif_delete(int vifi) dev_set_allmulti(dev, -1); if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) { - in_dev->cnf.mc_forwarding--; + IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--; ip_rt_multicast_event(in_dev); } @@ -426,7 +426,7 @@ static int vif_add(struct vifctl *vifc, int mrtsock) if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) return -EADDRNOTAVAIL; - in_dev->cnf.mc_forwarding++; + IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++; dev_set_allmulti(dev, +1); ip_rt_multicast_event(in_dev); @@ -841,7 +841,7 @@ static void mrtsock_destruct(struct sock *sk) { rtnl_lock(); if (sk == mroute_socket) { - ipv4_devconf.mc_forwarding--; + IPV4_DEVCONF_ALL(MC_FORWARDING)--; write_lock_bh(&mrt_lock); mroute_socket=NULL; @@ -890,7 +890,7 @@ int ip_mroute_setsockopt(struct sock *sk,int optname,char __user *optval,int opt mroute_socket=sk; write_unlock_bh(&mrt_lock); - ipv4_devconf.mc_forwarding++; + IPV4_DEVCONF_ALL(MC_FORWARDING)++; } rtnl_unlock(); return ret; diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index cdbc6c1..3b690cf 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -260,7 +260,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v) seq_printf(seq, " %s", snmp4_ipstats_list[i].name); seq_printf(seq, "\nIp: %d %d", - ipv4_devconf.forwarding ? 1 : 2, sysctl_ip_default_ttl); + IPV4_DEVCONF_ALL(FORWARDING) ? 1 : 2, sysctl_ip_default_ttl); for (i = 0; snmp4_ipstats_list[i].name != NULL; i++) seq_printf(seq, " %lu", diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 8603cfb..29ca63e 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1636,7 +1636,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, atomic_set(&rth->u.dst.__refcnt, 1); rth->u.dst.flags= DST_HOST; - if (in_dev->cnf.no_policy) + if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) rth->u.dst.flags |= DST_NOPOLICY; rth->fl.fl4_dst = daddr; rth->rt_dst = daddr; @@ -1778,9 +1778,9 @@ static inline int __mkroute_input(struct sk_buff *skb, if (res->fi->fib_nhs > 1) rth->u.dst.flags |= DST_BALANCED; #endif - if (in_dev->cnf.no_policy) + if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) rth->u.dst.flags |= DST_NOPOLICY; - if (out_dev->cnf.no_xfrm) + if (IN_DEV_CONF_GET(out_dev, NOXFRM)) rth->u.dst.flags |= DST_NOXFRM; rth->fl.fl4_dst = daddr; rth->rt_dst = daddr; @@ -2021,7 +2021,7 @@ local_input: atomic_set(&rth->u.dst.__refcnt, 1); rth->u.dst.flags= DST_HOST; - if (in_dev->cnf.no_policy) + if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) rth->u.dst.flags |= DST_NOPOLICY; rth->fl.fl4_dst = daddr; rth->rt_dst = daddr; @@ -2218,9 +2218,9 @@ static inline int __mkroute_output(struct rtable **result, rth->u.dst.flags |= DST_BALANCED; } #endif - if (in_dev->cnf.no_xfrm) + if (IN_DEV_CONF_GET(in_dev, NOXFRM)) rth->u.dst.flags |= DST_NOXFRM; - if (in_dev->cnf.no_policy) + if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) rth->u.dst.flags |= DST_NOPOLICY; rth->fl.fl4_dst = oldflp->fl4_dst; @@ -2759,7 +2759,7 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event, __be32 dst = rt->rt_dst; if (MULTICAST(dst) && !LOCAL_MCAST(dst) && - ipv4_devconf.mc_forwarding) { + IPV4_DEVCONF_ALL(MC_FORWARDING)) { int err = ipmr_get_route(skb, r, nowait); if (err <= 0) { if (!nowait) { diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 6817d64..53ef0f4 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -37,12 +37,12 @@ static int ipv4_sysctl_forward(ctl_table *ctl, int write, struct file * filp, void __user *buffer, size_t *lenp, loff_t *ppos) { - int val = ipv4_devconf.forwarding; + int val = IPV4_DEVCONF_ALL(FORWARDING); int ret; ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); - if (write && ipv4_devconf.forwarding != val) + if (write && IPV4_DEVCONF_ALL(FORWARDING) != val) inet_forward_change(); return ret; @@ -222,7 +222,7 @@ ctl_table ipv4_table[] = { { .ctl_name = NET_IPV4_FORWARD, .procname = "ip_forward", - .data = &ipv4_devconf.forwarding, + .data = &IPV4_DEVCONF_ALL(FORWARDING), .maxlen = sizeof(int), .mode = 0644, .proc_handler = &ipv4_sysctl_forward, -- cgit v1.1 From 31be308541e990592a2d0a3e77e8e51bd0cea0e0 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 4 Jun 2007 23:35:37 -0700 Subject: [IPV4]: Add default config support after inetdev_init Previously once inetdev_init has been called on a device any changes made to ipv4_devconf_dflt would have no effect on that device's configuration. This creates a problem since we have moved the point where inetdev_init is called from when an address is added to where the device is registered. This patch is the first half of a set that tries to mimic the old behaviour while still calling inetdev_init. It propagates any changes to ipv4_devconf_dflt to those devices that have not had the corresponding attribute set. The next patch will forcibly set all values at the point where inetdev_init was previously called. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/inetdevice.h | 3 + net/ipv4/devinet.c | 133 +++++++++++++++++++++++++++++++++------------ 2 files changed, 101 insertions(+), 35 deletions(-) diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index 1ef174d..40adefd 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -3,6 +3,7 @@ #ifdef __KERNEL__ +#include #include #include #include @@ -12,6 +13,7 @@ struct ipv4_devconf { void *sysctl; int data[__NET_IPV4_CONF_MAX - 1]; + DECLARE_BITMAP(state, __NET_IPV4_CONF_MAX - 1); }; extern struct ipv4_devconf ipv4_devconf; @@ -53,6 +55,7 @@ static inline void ipv4_devconf_set(struct in_device *in_dev, int index, int val) { index--; + set_bit(index, in_dev->cnf.state); in_dev->cnf.data[index] = val; } diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 0094066..e197347 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1244,6 +1244,91 @@ errout: #ifdef CONFIG_SYSCTL +static void devinet_copy_dflt_conf(int i) +{ + struct net_device *dev; + + read_lock(&dev_base_lock); + for_each_netdev(dev) { + struct in_device *in_dev; + rcu_read_lock(); + in_dev = __in_dev_get_rcu(dev); + if (in_dev && !test_bit(i, in_dev->cnf.state)) + in_dev->cnf.data[i] = ipv4_devconf_dflt.data[i]; + rcu_read_unlock(); + } + read_unlock(&dev_base_lock); +} + +static int devinet_conf_proc(ctl_table *ctl, int write, + struct file* filp, void __user *buffer, + size_t *lenp, loff_t *ppos) +{ + int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); + + if (write) { + struct ipv4_devconf *cnf = ctl->extra1; + int i = (int *)ctl->data - cnf->data; + + set_bit(i, cnf->state); + + if (cnf == &ipv4_devconf_dflt) + devinet_copy_dflt_conf(i); + } + + return ret; +} + +static int devinet_conf_sysctl(ctl_table *table, int __user *name, int nlen, + void __user *oldval, size_t __user *oldlenp, + void __user *newval, size_t newlen) +{ + struct ipv4_devconf *cnf; + int *valp = table->data; + int new; + int i; + + if (!newval || !newlen) + return 0; + + if (newlen != sizeof(int)) + return -EINVAL; + + if (get_user(new, (int __user *)newval)) + return -EFAULT; + + if (new == *valp) + return 0; + + if (oldval && oldlenp) { + size_t len; + + if (get_user(len, oldlenp)) + return -EFAULT; + + if (len) { + if (len > table->maxlen) + len = table->maxlen; + if (copy_to_user(oldval, valp, len)) + return -EFAULT; + if (put_user(len, oldlenp)) + return -EFAULT; + } + } + + *valp = new; + + cnf = table->extra1; + i = (int *)table->data - cnf->data; + + set_bit(i, cnf->state); + + if (cnf == &ipv4_devconf_dflt) + devinet_copy_dflt_conf(i); + + return 1; +} + void inet_forward_change(void) { struct net_device *dev; @@ -1302,40 +1387,13 @@ int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) { - int *valp = table->data; - int new; - - if (!newval || !newlen) - return 0; - - if (newlen != sizeof(int)) - return -EINVAL; + int ret = devinet_conf_sysctl(table, name, nlen, oldval, oldlenp, + newval, newlen); - if (get_user(new, (int __user *)newval)) - return -EFAULT; - - if (new == *valp) - return 0; - - if (oldval && oldlenp) { - size_t len; - - if (get_user(len, oldlenp)) - return -EFAULT; - - if (len) { - if (len > table->maxlen) - len = table->maxlen; - if (copy_to_user(oldval, valp, len)) - return -EFAULT; - if (put_user(len, oldlenp)) - return -EFAULT; - } - } + if (ret == 1) + rt_cache_flush(0); - *valp = new; - rt_cache_flush(0); - return 1; + return ret; } @@ -1349,13 +1407,16 @@ int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen, .mode = mval, \ .proc_handler = proc, \ .strategy = sysctl, \ + .extra1 = &ipv4_devconf, \ } #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \ - DEVINET_SYSCTL_ENTRY(attr, name, 0644, &proc_dointvec, NULL) + DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc, \ + devinet_conf_sysctl) #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \ - DEVINET_SYSCTL_ENTRY(attr, name, 0444, &proc_dointvec, NULL) + DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc, \ + devinet_conf_sysctl) #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc, sysctl) \ DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc, sysctl) @@ -1374,7 +1435,8 @@ static struct devinet_sysctl_table { } devinet_sysctl = { .devinet_vars = { DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding", - devinet_sysctl_forward, NULL), + devinet_sysctl_forward, + devinet_conf_sysctl), DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"), DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"), @@ -1448,6 +1510,7 @@ static void devinet_sysctl_register(struct in_device *in_dev, return; for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) { t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf; + t->devinet_vars[i].extra1 = p; } if (dev) { -- cgit v1.1 From 71e27da9618b5f4d525ec821def83991da20429f Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 4 Jun 2007 23:36:06 -0700 Subject: [IPV4]: Restore old behaviour of default config values Previously inet devices were only constructed when addresses are added (or rarely in ipmr). Therefore the default config values they get are the ones at the time of these operations. Now that we're creating inet devices earlier, this changes the behaviour of default config values in an incompatible way (see bug #8519). This patch creates a compromise by setting the default values at the same point as before but only for those that have not been explicitly set by the user since the inet device's creation. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/inetdevice.h | 6 +++++- net/ipv4/devinet.c | 19 ++++++++----------- net/ipv4/ipmr.c | 15 +++++++++++---- 3 files changed, 24 insertions(+), 16 deletions(-) diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index 40adefd..ae04901 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -59,6 +59,11 @@ static inline void ipv4_devconf_set(struct in_device *in_dev, int index, in_dev->cnf.data[index] = val; } +static inline void ipv4_devconf_setall(struct in_device *in_dev) +{ + bitmap_fill(in_dev->cnf.state, __NET_IPV4_CONF_MAX - 1); +} + #define IN_DEV_CONF_GET(in_dev, attr) \ ipv4_devconf_get((in_dev), NET_IPV4_CONF_ ## attr) #define IN_DEV_CONF_SET(in_dev, attr, val) \ @@ -125,7 +130,6 @@ extern struct net_device *ip_dev_find(__be32 addr); extern int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b); extern int devinet_ioctl(unsigned int cmd, void __user *); extern void devinet_init(void); -extern struct in_device *inetdev_init(struct net_device *dev); extern struct in_device *inetdev_by_index(int); extern __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope); extern __be32 inet_confirm_addr(const struct net_device *dev, __be32 dst, __be32 local, int scope); diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index e197347..354e800 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -147,7 +147,7 @@ void in_dev_finish_destroy(struct in_device *idev) } } -struct in_device *inetdev_init(struct net_device *dev) +static struct in_device *inetdev_init(struct net_device *dev) { struct in_device *in_dev; @@ -405,12 +405,10 @@ static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa) ASSERT_RTNL(); if (!in_dev) { - in_dev = inetdev_init(dev); - if (!in_dev) { - inet_free_ifa(ifa); - return -ENOBUFS; - } + inet_free_ifa(ifa); + return -ENOBUFS; } + ipv4_devconf_setall(in_dev); if (ifa->ifa_dev != in_dev) { BUG_TRAP(!ifa->ifa_dev); in_dev_hold(in_dev); @@ -520,13 +518,12 @@ static struct in_ifaddr *rtm_to_ifaddr(struct nlmsghdr *nlh) in_dev = __in_dev_get_rtnl(dev); if (in_dev == NULL) { - in_dev = inetdev_init(dev); - if (in_dev == NULL) { - err = -ENOBUFS; - goto errout; - } + err = -ENOBUFS; + goto errout; } + ipv4_devconf_setall(in_dev); + ifa = inet_alloc_ifa(); if (ifa == NULL) { /* diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index d570d3a..d96582a 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -152,9 +152,11 @@ struct net_device *ipmr_new_tunnel(struct vifctl *v) dev->flags |= IFF_MULTICAST; in_dev = __in_dev_get_rtnl(dev); - if (in_dev == NULL && (in_dev = inetdev_init(dev)) == NULL) + if (in_dev == NULL) goto failure; - IN_DEV_CONF_SET(in_dev, RP_FILTER, 0); + + ipv4_devconf_setall(in_dev); + IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0; if (dev_open(dev)) goto failure; @@ -218,10 +220,15 @@ static struct net_device *ipmr_reg_vif(void) } dev->iflink = 0; - if ((in_dev = inetdev_init(dev)) == NULL) + rcu_read_lock(); + if ((in_dev = __in_dev_get_rcu(dev)) == NULL) { + rcu_read_unlock(); goto failure; + } - IN_DEV_CONF_SET(in_dev, RP_FILTER, 0); + ipv4_devconf_setall(in_dev); + IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0; + rcu_read_unlock(); if (dev_open(dev)) goto failure; -- cgit v1.1 From c36befb52350fa76bf3cb03d13ba2e17dbe8a508 Mon Sep 17 00:00:00 2001 From: Ivo van Doorn Date: Tue, 5 Jun 2007 00:04:46 -0700 Subject: [RFKILL]: Make rfkill->name const The rfkill name can be made const safely, this makes the compiler happy when drivers make it point to some const string used elsewhere. Signed-off-by: Ivo van Doorn Signed-off-by: David S. Miller --- include/linux/rfkill.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h index 7c1ffba..a8a6ea8 100644 --- a/include/linux/rfkill.h +++ b/include/linux/rfkill.h @@ -63,7 +63,7 @@ enum rfkill_state { * This structure represents a RF switch located on a network device. */ struct rfkill { - char *name; + const char *name; enum rfkill_type type; enum rfkill_state state; -- cgit v1.1 From c4b1010f406d7c3f819c22a6323c46776d5b148c Mon Sep 17 00:00:00 2001 From: Denis Cheng Date: Tue, 5 Jun 2007 00:06:57 -0700 Subject: [NET]: Merge dst_discard_in and dst_discard_out. Signed-off-by: Denis Cheng Signed-off-by: David S. Miller --- net/core/dst.c | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/net/core/dst.c b/net/core/dst.c index 764bccb..c6a05879 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -111,13 +111,7 @@ out: spin_unlock(&dst_lock); } -static int dst_discard_in(struct sk_buff *skb) -{ - kfree_skb(skb); - return 0; -} - -static int dst_discard_out(struct sk_buff *skb) +static int dst_discard(struct sk_buff *skb) { kfree_skb(skb); return 0; @@ -138,8 +132,7 @@ void * dst_alloc(struct dst_ops * ops) dst->ops = ops; dst->lastuse = jiffies; dst->path = dst; - dst->input = dst_discard_in; - dst->output = dst_discard_out; + dst->input = dst->output = dst_discard; #if RT_CACHE_DEBUG >= 2 atomic_inc(&dst_total); #endif @@ -153,8 +146,7 @@ static void ___dst_free(struct dst_entry * dst) protocol module is unloaded. */ if (dst->dev == NULL || !(dst->dev->flags&IFF_UP)) { - dst->input = dst_discard_in; - dst->output = dst_discard_out; + dst->input = dst->output = dst_discard; } dst->obsolete = 2; } @@ -242,8 +234,7 @@ static inline void dst_ifdown(struct dst_entry *dst, struct net_device *dev, return; if (!unregister) { - dst->input = dst_discard_in; - dst->output = dst_discard_out; + dst->input = dst->output = dst_discard; } else { dst->dev = &loopback_dev; dev_hold(&loopback_dev); -- cgit v1.1 From 274707cff9810b784c548ed169298617a1bc3528 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 5 Jun 2007 00:16:12 -0700 Subject: [TCP]: Use LIMIT_NETDEBUG in tcp_retransmit_timer(). LIMIT_NETDEBUG allows the admin to disable some warning messages (echo 0 >/proc/sys/net/core/warnings). The "TCP: Treason uncloaked!" message can use this facility. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_timer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index e613401..e9b151b 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -292,9 +292,9 @@ static void tcp_retransmit_timer(struct sock *sk) * we cannot allow such beasts to hang infinitely. */ #ifdef TCP_DEBUG - if (net_ratelimit()) { + if (1) { struct inet_sock *inet = inet_sk(sk); - printk(KERN_DEBUG "TCP: Treason uncloaked! Peer %u.%u.%u.%u:%u/%u shrinks window %u:%u. Repaired.\n", + LIMIT_NETDEBUG(KERN_DEBUG "TCP: Treason uncloaked! Peer %u.%u.%u.%u:%u/%u shrinks window %u:%u. Repaired.\n", NIPQUAD(inet->daddr), ntohs(inet->dport), inet->num, tp->snd_una, tp->snd_nxt); } -- cgit v1.1 From 14a49e1fd2bb91ba2bf0e1f06711b6dbc21de02d Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 5 Jun 2007 00:19:24 -0700 Subject: [TCP] tcp_probe: Attach printf attribute properly to printl(). GCC doesn't like the way Stephen initially did it: net/ipv4/tcp_probe.c:83: warning: empty declaration Signed-off-by: David S. Miller --- net/ipv4/tcp_probe.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c index 760165a..d9323df 100644 --- a/net/ipv4/tcp_probe.c +++ b/net/ipv4/tcp_probe.c @@ -63,6 +63,9 @@ struct { * FIXME: causes an extra copy */ static void printl(const char *fmt, ...) + __attribute__ ((format (printf, 1, 2))); + +static void printl(const char *fmt, ...) { va_list args; int len; @@ -80,8 +83,7 @@ static void printl(const char *fmt, ...) kfifo_put(tcpw.fifo, tbuf, len); wake_up(&tcpw.wait); -} __attribute__ ((format (printf, 1, 2))); - +} /* * Hook inserted to be called before each receive packet. -- cgit v1.1 From ef7c79ed645f52bcbdd88f8d54a9702c4d3fd15d Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 5 Jun 2007 12:38:30 -0700 Subject: [NETLINK]: Mark netlink policies const Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/fib_rules.h | 2 +- include/net/genetlink.h | 2 +- include/net/ip_fib.h | 2 +- include/net/netlink.h | 12 ++++++------ net/core/neighbour.c | 4 ++-- net/core/rtnetlink.c | 2 +- net/decnet/dn_dev.c | 2 +- net/decnet/dn_rules.c | 2 +- net/ipv4/devinet.c | 2 +- net/ipv4/fib_frontend.c | 2 +- net/ipv4/fib_rules.c | 2 +- net/ipv6/addrconf.c | 2 +- net/ipv6/fib6_rules.c | 2 +- net/ipv6/route.c | 2 +- net/netlabel/netlabel_cipso_v4.c | 2 +- net/netlabel/netlabel_mgmt.c | 2 +- net/netlabel/netlabel_unlabeled.c | 2 +- net/netlink/attr.c | 8 ++++---- net/netlink/genetlink.c | 2 +- 19 files changed, 28 insertions(+), 28 deletions(-) diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index ed3a887..83e41dd 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -64,7 +64,7 @@ struct fib_rules_ops void (*flush_cache)(void); int nlgroup; - struct nla_policy *policy; + const struct nla_policy *policy; struct list_head *rules_list; struct module *owner; }; diff --git a/include/net/genetlink.h b/include/net/genetlink.h index adff4c8..b6eaca1 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -60,7 +60,7 @@ struct genl_ops { u8 cmd; unsigned int flags; - struct nla_policy *policy; + const struct nla_policy *policy; int (*doit)(struct sk_buff *skb, struct genl_info *info); int (*dumpit)(struct sk_buff *skb, diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 5a4a0366..69252cb 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -213,7 +213,7 @@ extern void fib_select_default(const struct flowi *flp, struct fib_result *res); #endif /* CONFIG_IP_MULTIPLE_TABLES */ /* Exported by fib_frontend.c */ -extern struct nla_policy rtm_ipv4_policy[]; +extern const struct nla_policy rtm_ipv4_policy[]; extern void ip_fib_init(void); extern int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, struct net_device *dev, __be32 *spec_dst, u32 *itag); diff --git a/include/net/netlink.h b/include/net/netlink.h index 0bf325c..7b510a9 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -222,10 +222,10 @@ extern int nlmsg_notify(struct sock *sk, struct sk_buff *skb, gfp_t flags); extern int nla_validate(struct nlattr *head, int len, int maxtype, - struct nla_policy *policy); + const struct nla_policy *policy); extern int nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head, int len, - struct nla_policy *policy); + const struct nla_policy *policy); extern struct nlattr * nla_find(struct nlattr *head, int len, int attrtype); extern size_t nla_strlcpy(char *dst, const struct nlattr *nla, size_t dstsize); @@ -360,7 +360,7 @@ static inline struct nlmsghdr *nlmsg_next(struct nlmsghdr *nlh, int *remaining) */ static inline int nlmsg_parse(struct nlmsghdr *nlh, int hdrlen, struct nlattr *tb[], int maxtype, - struct nla_policy *policy) + const struct nla_policy *policy) { if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen)) return -EINVAL; @@ -392,7 +392,7 @@ static inline struct nlattr *nlmsg_find_attr(struct nlmsghdr *nlh, * @policy: validation policy */ static inline int nlmsg_validate(struct nlmsghdr *nlh, int hdrlen, int maxtype, - struct nla_policy *policy) + const struct nla_policy *policy) { if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen)) return -EINVAL; @@ -729,7 +729,7 @@ static inline struct nlattr *nla_find_nested(struct nlattr *nla, int attrtype) */ static inline int nla_parse_nested(struct nlattr *tb[], int maxtype, struct nlattr *nla, - struct nla_policy *policy) + const struct nla_policy *policy) { return nla_parse(tb, maxtype, nla_data(nla), nla_len(nla), policy); } @@ -990,7 +990,7 @@ static inline int nla_nest_cancel(struct sk_buff *skb, struct nlattr *start) * Returns 0 on success or a negative error code. */ static inline int nla_validate_nested(struct nlattr *start, int maxtype, - struct nla_policy *policy) + const struct nla_policy *policy) { return nla_validate(nla_data(start), nla_len(start), maxtype, policy); } diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 6f3bb73..9df26a0 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1761,7 +1761,7 @@ static inline struct neigh_parms *lookup_neigh_params(struct neigh_table *tbl, return NULL; } -static struct nla_policy nl_neightbl_policy[NDTA_MAX+1] __read_mostly = { +static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = { [NDTA_NAME] = { .type = NLA_STRING }, [NDTA_THRESH1] = { .type = NLA_U32 }, [NDTA_THRESH2] = { .type = NLA_U32 }, @@ -1770,7 +1770,7 @@ static struct nla_policy nl_neightbl_policy[NDTA_MAX+1] __read_mostly = { [NDTA_PARMS] = { .type = NLA_NESTED }, }; -static struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] __read_mostly = { +static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = { [NDTPA_IFINDEX] = { .type = NLA_U32 }, [NDTPA_QUEUE_LEN] = { .type = NLA_U32 }, [NDTPA_PROXY_QLEN] = { .type = NLA_U32 }, diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 27da9cd..a8a5093 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -551,7 +551,7 @@ cont: return skb->len; } -static struct nla_policy ifla_policy[IFLA_MAX+1] __read_mostly = { +static const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_IFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ-1 }, [IFLA_MAP] = { .len = sizeof(struct rtnl_link_ifmap) }, [IFLA_MTU] = { .type = NLA_U32 }, diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index 764a56a..ab41c18 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -638,7 +638,7 @@ static struct dn_dev *dn_dev_by_index(int ifindex) return dn_dev; } -static struct nla_policy dn_ifa_policy[IFA_MAX+1] __read_mostly = { +static const struct nla_policy dn_ifa_policy[IFA_MAX+1] = { [IFA_ADDRESS] = { .type = NLA_U16 }, [IFA_LOCAL] = { .type = NLA_U16 }, [IFA_LABEL] = { .type = NLA_STRING, diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c index 17a1932..84ff3dd 100644 --- a/net/decnet/dn_rules.c +++ b/net/decnet/dn_rules.c @@ -108,7 +108,7 @@ errout: return err; } -static struct nla_policy dn_fib_rule_policy[FRA_MAX+1] __read_mostly = { +static const struct nla_policy dn_fib_rule_policy[FRA_MAX+1] = { FRA_GENERIC_POLICY, }; diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 354e800..fa97b96 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -84,7 +84,7 @@ static struct ipv4_devconf ipv4_devconf_dflt = { #define IPV4_DEVCONF_DFLT(attr) IPV4_DEVCONF(ipv4_devconf_dflt, attr) -static struct nla_policy ifa_ipv4_policy[IFA_MAX+1] __read_mostly = { +static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = { [IFA_LOCAL] = { .type = NLA_U32 }, [IFA_ADDRESS] = { .type = NLA_U32 }, [IFA_BROADCAST] = { .type = NLA_U32 }, diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 9ad1f62..311d633 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -441,7 +441,7 @@ int ip_rt_ioctl(unsigned int cmd, void __user *arg) return -EINVAL; } -struct nla_policy rtm_ipv4_policy[RTA_MAX+1] __read_mostly = { +const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = { [RTA_DST] = { .type = NLA_U32 }, [RTA_SRC] = { .type = NLA_U32 }, [RTA_IIF] = { .type = NLA_U32 }, diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 33083ad..2a94784 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -169,7 +169,7 @@ static struct fib_table *fib_empty_table(void) return NULL; } -static struct nla_policy fib4_rule_policy[FRA_MAX+1] __read_mostly = { +static const struct nla_policy fib4_rule_policy[FRA_MAX+1] = { FRA_GENERIC_POLICY, [FRA_FLOW] = { .type = NLA_U32 }, }; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 329de67..5a5f8bd 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2990,7 +2990,7 @@ static struct in6_addr *extract_addr(struct nlattr *addr, struct nlattr *local) return pfx; } -static struct nla_policy ifa_ipv6_policy[IFA_MAX+1] __read_mostly = { +static const struct nla_policy ifa_ipv6_policy[IFA_MAX+1] = { [IFA_ADDRESS] = { .len = sizeof(struct in6_addr) }, [IFA_LOCAL] = { .len = sizeof(struct in6_addr) }, [IFA_CACHEINFO] = { .len = sizeof(struct ifa_cacheinfo) }, diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index fc3882c..53b3998 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -157,7 +157,7 @@ static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) return 1; } -static struct nla_policy fib6_rule_policy[FRA_MAX+1] __read_mostly = { +static const struct nla_policy fib6_rule_policy[FRA_MAX+1] = { FRA_GENERIC_POLICY, }; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 1324b06..fe8d983 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1999,7 +1999,7 @@ void rt6_mtu_change(struct net_device *dev, unsigned mtu) fib6_clean_all(rt6_mtu_change_route, 0, &arg); } -static struct nla_policy rtm_ipv6_policy[RTA_MAX+1] __read_mostly = { +static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = { [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) }, [RTA_OIF] = { .type = NLA_U32 }, [RTA_IIF] = { .type = NLA_U32 }, diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c index 07e47db..24b660f 100644 --- a/net/netlabel/netlabel_cipso_v4.c +++ b/net/netlabel/netlabel_cipso_v4.c @@ -59,7 +59,7 @@ static struct genl_family netlbl_cipsov4_gnl_family = { }; /* NetLabel Netlink attribute policy */ -static struct nla_policy netlbl_cipsov4_genl_policy[NLBL_CIPSOV4_A_MAX + 1] = { +static const struct nla_policy netlbl_cipsov4_genl_policy[NLBL_CIPSOV4_A_MAX + 1] = { [NLBL_CIPSOV4_A_DOI] = { .type = NLA_U32 }, [NLBL_CIPSOV4_A_MTYPE] = { .type = NLA_U32 }, [NLBL_CIPSOV4_A_TAG] = { .type = NLA_U8 }, diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c index e8c80f3..e00fc21 100644 --- a/net/netlabel/netlabel_mgmt.c +++ b/net/netlabel/netlabel_mgmt.c @@ -59,7 +59,7 @@ static struct genl_family netlbl_mgmt_gnl_family = { }; /* NetLabel Netlink attribute policy */ -static struct nla_policy netlbl_mgmt_genl_policy[NLBL_MGMT_A_MAX + 1] = { +static const struct nla_policy netlbl_mgmt_genl_policy[NLBL_MGMT_A_MAX + 1] = { [NLBL_MGMT_A_DOMAIN] = { .type = NLA_NUL_STRING }, [NLBL_MGMT_A_PROTOCOL] = { .type = NLA_U32 }, [NLBL_MGMT_A_VERSION] = { .type = NLA_U32 }, diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index b931ede..5c303c68a 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -61,7 +61,7 @@ static struct genl_family netlbl_unlabel_gnl_family = { }; /* NetLabel Netlink attribute policy */ -static struct nla_policy netlbl_unlabel_genl_policy[NLBL_UNLABEL_A_MAX + 1] = { +static const struct nla_policy netlbl_unlabel_genl_policy[NLBL_UNLABEL_A_MAX + 1] = { [NLBL_UNLABEL_A_ACPTFLG] = { .type = NLA_U8 }, }; diff --git a/net/netlink/attr.c b/net/netlink/attr.c index df5f820..c591212 100644 --- a/net/netlink/attr.c +++ b/net/netlink/attr.c @@ -24,9 +24,9 @@ static u16 nla_attr_minlen[NLA_TYPE_MAX+1] __read_mostly = { }; static int validate_nla(struct nlattr *nla, int maxtype, - struct nla_policy *policy) + const struct nla_policy *policy) { - struct nla_policy *pt; + const struct nla_policy *pt; int minlen = 0, attrlen = nla_len(nla); if (nla->nla_type <= 0 || nla->nla_type > maxtype) @@ -99,7 +99,7 @@ static int validate_nla(struct nlattr *nla, int maxtype, * Returns 0 on success or a negative error code. */ int nla_validate(struct nlattr *head, int len, int maxtype, - struct nla_policy *policy) + const struct nla_policy *policy) { struct nlattr *nla; int rem, err; @@ -130,7 +130,7 @@ errout: * Returns 0 on success or a negative error code. */ int nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head, int len, - struct nla_policy *policy) + const struct nla_policy *policy) { struct nlattr *nla; int rem, err; diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 6e31234..b9ab62f 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -472,7 +472,7 @@ static struct sk_buff *ctrl_build_msg(struct genl_family *family, u32 pid, return skb; } -static struct nla_policy ctrl_policy[CTRL_ATTR_MAX+1] __read_mostly = { +static const struct nla_policy ctrl_policy[CTRL_ATTR_MAX+1] = { [CTRL_ATTR_FAMILY_ID] = { .type = NLA_U16 }, [CTRL_ATTR_FAMILY_NAME] = { .type = NLA_NUL_STRING, .len = GENL_NAMSIZ - 1 }, -- cgit v1.1 From 51055be81c3cb14d0165a7432b787098b817fd35 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 5 Jun 2007 12:40:01 -0700 Subject: [RTNETLINK]: ifindex 0 does not exist ifindex == 0 does not exist and implies we should do a lookup by name if one was given. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index a8a5093..02e8bf0 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -580,7 +580,7 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) err = -EINVAL; ifm = nlmsg_data(nlh); - if (ifm->ifi_index >= 0) + if (ifm->ifi_index > 0) dev = dev_get_by_index(ifm->ifi_index); else if (tb[IFLA_IFNAME]) dev = dev_get_by_name(ifname); @@ -672,7 +672,7 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) * name provided implies that a name change has been * requested. */ - if (ifm->ifi_index >= 0 && ifname[0]) { + if (ifm->ifi_index > 0 && ifname[0]) { err = dev_change_name(dev, ifname); if (err < 0) goto errout_dev; @@ -740,7 +740,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) return err; ifm = nlmsg_data(nlh); - if (ifm->ifi_index >= 0) { + if (ifm->ifi_index > 0) { dev = dev_get_by_index(ifm->ifi_index); if (dev == NULL) return -ENODEV; -- cgit v1.1 From 3c158f7f57601bc27eab82f0dc4fd3fad314d845 Mon Sep 17 00:00:00 2001 From: Patrick McHarrdy Date: Tue, 5 Jun 2007 12:55:27 -0700 Subject: [NETFILTER]: nf_conntrack: fix helper module unload races When a helper module is unloaded all conntracks refering to it have their helper pointer NULLed out, leading to lots of races. In most places this can be fixed by proper use of RCU (they do already check for != NULL, but in a racy way), additionally nf_conntrack_expect_related needs to bail out when no helper is present. Also remove two paranoid BUG_ONs in nf_conntrack_proto_gre that are racy and not worth fixing. Signed-off-by: Patrick McHarrdy Signed-off-by: David S. Miller --- net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 13 ++++++---- net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 9 +++++-- net/netfilter/nf_conntrack_core.c | 26 ++++++++++++++------ net/netfilter/nf_conntrack_expect.c | 4 +++ net/netfilter/nf_conntrack_helper.c | 2 +- net/netfilter/nf_conntrack_netlink.c | 34 +++++++++++++++++--------- net/netfilter/nf_conntrack_proto_gre.c | 2 -- 7 files changed, 61 insertions(+), 29 deletions(-) diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index fd62a41..6dc72a8 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -133,6 +133,7 @@ static unsigned int ipv4_conntrack_help(unsigned int hooknum, struct nf_conn *ct; enum ip_conntrack_info ctinfo; struct nf_conn_help *help; + struct nf_conntrack_helper *helper; /* This is where we call the helper: as the packet goes out. */ ct = nf_ct_get(*pskb, &ctinfo); @@ -140,12 +141,14 @@ static unsigned int ipv4_conntrack_help(unsigned int hooknum, return NF_ACCEPT; help = nfct_help(ct); - if (!help || !help->helper) + if (!help) return NF_ACCEPT; - - return help->helper->help(pskb, - skb_network_offset(*pskb) + ip_hdrlen(*pskb), - ct, ctinfo); + /* rcu_read_lock()ed by nf_hook_slow */ + helper = rcu_dereference(help->helper); + if (!helper) + return NF_ACCEPT; + return helper->help(pskb, skb_network_offset(*pskb) + ip_hdrlen(*pskb), + ct, ctinfo); } static unsigned int ipv4_conntrack_defrag(unsigned int hooknum, diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index dc442fb..1b1797f 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -160,6 +160,7 @@ static unsigned int ipv6_confirm(unsigned int hooknum, { struct nf_conn *ct; struct nf_conn_help *help; + struct nf_conntrack_helper *helper; enum ip_conntrack_info ctinfo; unsigned int ret, protoff; unsigned int extoff = (u8 *)(ipv6_hdr(*pskb) + 1) - (*pskb)->data; @@ -172,7 +173,11 @@ static unsigned int ipv6_confirm(unsigned int hooknum, goto out; help = nfct_help(ct); - if (!help || !help->helper) + if (!help) + goto out; + /* rcu_read_lock()ed by nf_hook_slow */ + helper = rcu_dereference(help->helper); + if (!helper) goto out; protoff = nf_ct_ipv6_skip_exthdr(*pskb, extoff, &pnum, @@ -182,7 +187,7 @@ static unsigned int ipv6_confirm(unsigned int hooknum, return NF_ACCEPT; } - ret = help->helper->help(pskb, protoff, ct, ctinfo); + ret = helper->help(pskb, protoff, ct, ctinfo); if (ret != NF_ACCEPT) return ret; out: diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 483e927..7a15e30 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -350,9 +350,15 @@ static void death_by_timeout(unsigned long ul_conntrack) { struct nf_conn *ct = (void *)ul_conntrack; struct nf_conn_help *help = nfct_help(ct); + struct nf_conntrack_helper *helper; - if (help && help->helper && help->helper->destroy) - help->helper->destroy(ct); + if (help) { + rcu_read_lock(); + helper = rcu_dereference(help->helper); + if (helper && helper->destroy) + helper->destroy(ct); + rcu_read_unlock(); + } write_lock_bh(&nf_conntrack_lock); /* Inside lock so preempt is disabled on module removal path. @@ -661,6 +667,7 @@ init_conntrack(const struct nf_conntrack_tuple *tuple, unsigned int dataoff) { struct nf_conn *conntrack; + struct nf_conn_help *help; struct nf_conntrack_tuple repl_tuple; struct nf_conntrack_expect *exp; u_int32_t features = 0; @@ -691,6 +698,7 @@ init_conntrack(const struct nf_conntrack_tuple *tuple, write_lock_bh(&nf_conntrack_lock); exp = find_expectation(tuple); + help = nfct_help(conntrack); if (exp) { DEBUGP("conntrack: expectation arrives ct=%p exp=%p\n", conntrack, exp); @@ -698,7 +706,7 @@ init_conntrack(const struct nf_conntrack_tuple *tuple, __set_bit(IPS_EXPECTED_BIT, &conntrack->status); conntrack->master = exp->master; if (exp->helper) - nfct_help(conntrack)->helper = exp->helper; + rcu_assign_pointer(help->helper, exp->helper); #ifdef CONFIG_NF_CONNTRACK_MARK conntrack->mark = exp->master->mark; #endif @@ -708,10 +716,11 @@ init_conntrack(const struct nf_conntrack_tuple *tuple, nf_conntrack_get(&conntrack->master->ct_general); NF_CT_STAT_INC(expect_new); } else { - struct nf_conn_help *help = nfct_help(conntrack); - - if (help) - help->helper = __nf_ct_helper_find(&repl_tuple); + if (help) { + /* not in hash table yet, so not strictly necessary */ + rcu_assign_pointer(help->helper, + __nf_ct_helper_find(&repl_tuple)); + } NF_CT_STAT_INC(new); } @@ -893,7 +902,8 @@ void nf_conntrack_alter_reply(struct nf_conn *ct, helper = __nf_ct_helper_find(newreply); if (helper) memset(&help->help, 0, sizeof(help->help)); - help->helper = helper; + /* not in hash table yet, so not strictly necessary */ + rcu_assign_pointer(help->helper, helper); } write_unlock_bh(&nf_conntrack_lock); } diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 117cbfd..504fb6c 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -337,6 +337,10 @@ int nf_conntrack_expect_related(struct nf_conntrack_expect *expect) NF_CT_ASSERT(master_help); write_lock_bh(&nf_conntrack_lock); + if (!master_help->helper) { + ret = -ESHUTDOWN; + goto out; + } list_for_each_entry(i, &nf_conntrack_expect_list, list) { if (expect_matches(i, expect)) { /* Refresh timer: if it's dying, ignore.. */ diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 0743be4..f868b7f 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -93,7 +93,7 @@ static inline int unhelp(struct nf_conntrack_tuple_hash *i, if (help && help->helper == me) { nf_conntrack_event(IPCT_HELPER, ct); - help->helper = NULL; + rcu_assign_pointer(help->helper, NULL); } return 0; } diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index d6d39e2..3f73327 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -171,21 +171,29 @@ ctnetlink_dump_helpinfo(struct sk_buff *skb, const struct nf_conn *ct) { struct nfattr *nest_helper; const struct nf_conn_help *help = nfct_help(ct); + struct nf_conntrack_helper *helper; - if (!help || !help->helper) + if (!help) return 0; + rcu_read_lock(); + helper = rcu_dereference(help->helper); + if (!helper) + goto out; + nest_helper = NFA_NEST(skb, CTA_HELP); - NFA_PUT(skb, CTA_HELP_NAME, strlen(help->helper->name), help->helper->name); + NFA_PUT(skb, CTA_HELP_NAME, strlen(helper->name), helper->name); - if (help->helper->to_nfattr) - help->helper->to_nfattr(skb, ct); + if (helper->to_nfattr) + helper->to_nfattr(skb, ct); NFA_NEST_END(skb, nest_helper); - +out: + rcu_read_unlock(); return 0; nfattr_failure: + rcu_read_unlock(); return -1; } @@ -842,7 +850,7 @@ ctnetlink_change_helper(struct nf_conn *ct, struct nfattr *cda[]) if (help && help->helper) { /* we had a helper before ... */ nf_ct_remove_expectations(ct); - help->helper = NULL; + rcu_assign_pointer(help->helper, NULL); } return 0; @@ -866,7 +874,7 @@ ctnetlink_change_helper(struct nf_conn *ct, struct nfattr *cda[]) /* need to zero data of old helper */ memset(&help->help, 0, sizeof(help->help)); - help->helper = helper; + rcu_assign_pointer(help->helper, helper); return 0; } @@ -950,6 +958,7 @@ ctnetlink_create_conntrack(struct nfattr *cda[], struct nf_conn *ct; int err = -EINVAL; struct nf_conn_help *help; + struct nf_conntrack_helper *helper = NULL; ct = nf_conntrack_alloc(otuple, rtuple); if (ct == NULL || IS_ERR(ct)) @@ -980,14 +989,17 @@ ctnetlink_create_conntrack(struct nfattr *cda[], #endif help = nfct_help(ct); - if (help) - help->helper = nf_ct_helper_find_get(rtuple); + if (help) { + helper = nf_ct_helper_find_get(rtuple); + /* not in hash table yet so not strictly necessary */ + rcu_assign_pointer(help->helper, helper); + } add_timer(&ct->timeout); nf_conntrack_hash_insert(ct); - if (help && help->helper) - nf_ct_helper_put(help->helper); + if (helper) + nf_ct_helper_put(helper); return 0; diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index 5434472..339c397 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -100,7 +100,6 @@ int nf_ct_gre_keymap_add(struct nf_conn *ct, enum ip_conntrack_dir dir, struct nf_conn_help *help = nfct_help(ct); struct nf_ct_gre_keymap **kmp, *km; - BUG_ON(strcmp(help->helper->name, "pptp")); kmp = &help->help.ct_pptp_info.keymap[dir]; if (*kmp) { /* check whether it's a retransmission */ @@ -137,7 +136,6 @@ void nf_ct_gre_keymap_destroy(struct nf_conn *ct) enum ip_conntrack_dir dir; DEBUGP("entering for ct %p\n", ct); - BUG_ON(strcmp(help->helper->name, "pptp")); write_lock_bh(&nf_ct_gre_lock); for (dir = IP_CT_DIR_ORIGINAL; dir < IP_CT_DIR_MAX; dir++) { -- cgit v1.1 From 4c1b52bc7a2f5ee01ea3fc248a8748a1c6843f7c Mon Sep 17 00:00:00 2001 From: Dmitry Mishin Date: Tue, 5 Jun 2007 12:56:09 -0700 Subject: [NETFILTER]: ip_tables: fix compat related crash check_compat_entry_size_and_hooks iterates over the matches and calls compat_check_calc_match, which loads the match and calculates the compat offsets, but unlike the non-compat version, doesn't call ->checkentry yet. On error however it calls cleanup_matches, which in turn calls ->destroy, which can result in crashes if the destroy function (validly) expects to only get called after the checkentry function. Add a compat_release_match function that only drops the module reference on error and rename compat_check_calc_match to compat_find_calc_match to reflect the fact that it doesn't call the checkentry function. Reported by Jan Engelhardt Signed-off-by: Dmitry Mishin Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ip_tables.h | 20 ++++++++ net/ipv4/netfilter/ip_tables.c | 81 +++++++++++++++++++++++++------- 2 files changed, 83 insertions(+), 18 deletions(-) diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h index 2f46dd7..e992cd6 100644 --- a/include/linux/netfilter_ipv4/ip_tables.h +++ b/include/linux/netfilter_ipv4/ip_tables.h @@ -264,6 +264,26 @@ ipt_get_target(struct ipt_entry *e) __ret; \ }) +/* fn returns 0 to continue iteration */ +#define IPT_ENTRY_ITERATE_CONTINUE(entries, size, n, fn, args...) \ +({ \ + unsigned int __i, __n; \ + int __ret = 0; \ + struct ipt_entry *__entry; \ + \ + for (__i = 0, __n = 0; __i < (size); \ + __i += __entry->next_offset, __n++) { \ + __entry = (void *)(entries) + __i; \ + if (__n < n) \ + continue; \ + \ + __ret = fn(__entry , ## args); \ + if (__ret != 0) \ + break; \ + } \ + __ret; \ +}) + /* * Main firewall chains definitions and global var's definitions. */ diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index e3f83bf..9bacf1a0 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -499,7 +499,8 @@ check_entry(struct ipt_entry *e, const char *name) } static inline int check_match(struct ipt_entry_match *m, const char *name, - const struct ipt_ip *ip, unsigned int hookmask) + const struct ipt_ip *ip, unsigned int hookmask, + unsigned int *i) { struct xt_match *match; int ret; @@ -515,6 +516,8 @@ static inline int check_match(struct ipt_entry_match *m, const char *name, m->u.kernel.match->name); ret = -EINVAL; } + if (!ret) + (*i)++; return ret; } @@ -537,11 +540,10 @@ find_check_match(struct ipt_entry_match *m, } m->u.kernel.match = match; - ret = check_match(m, name, ip, hookmask); + ret = check_match(m, name, ip, hookmask, i); if (ret) goto err; - (*i)++; return 0; err: module_put(m->u.kernel.match->me); @@ -1425,7 +1427,7 @@ out: } static inline int -compat_check_calc_match(struct ipt_entry_match *m, +compat_find_calc_match(struct ipt_entry_match *m, const char *name, const struct ipt_ip *ip, unsigned int hookmask, @@ -1449,6 +1451,31 @@ compat_check_calc_match(struct ipt_entry_match *m, } static inline int +compat_release_match(struct ipt_entry_match *m, unsigned int *i) +{ + if (i && (*i)-- == 0) + return 1; + + module_put(m->u.kernel.match->me); + return 0; +} + +static inline int +compat_release_entry(struct ipt_entry *e, unsigned int *i) +{ + struct ipt_entry_target *t; + + if (i && (*i)-- == 0) + return 1; + + /* Cleanup all matches */ + IPT_MATCH_ITERATE(e, compat_release_match, NULL); + t = ipt_get_target(e); + module_put(t->u.kernel.target->me); + return 0; +} + +static inline int check_compat_entry_size_and_hooks(struct ipt_entry *e, struct xt_table_info *newinfo, unsigned int *size, @@ -1485,10 +1512,10 @@ check_compat_entry_size_and_hooks(struct ipt_entry *e, off = 0; entry_offset = (void *)e - (void *)base; j = 0; - ret = IPT_MATCH_ITERATE(e, compat_check_calc_match, name, &e->ip, + ret = IPT_MATCH_ITERATE(e, compat_find_calc_match, name, &e->ip, e->comefrom, &off, &j); if (ret != 0) - goto cleanup_matches; + goto release_matches; t = ipt_get_target(e); target = try_then_request_module(xt_find_target(AF_INET, @@ -1499,7 +1526,7 @@ check_compat_entry_size_and_hooks(struct ipt_entry *e, duprintf("check_compat_entry_size_and_hooks: `%s' not found\n", t->u.user.name); ret = target ? PTR_ERR(target) : -ENOENT; - goto cleanup_matches; + goto release_matches; } t->u.kernel.target = target; @@ -1526,8 +1553,8 @@ check_compat_entry_size_and_hooks(struct ipt_entry *e, out: module_put(t->u.kernel.target->me); -cleanup_matches: - IPT_MATCH_ITERATE(e, cleanup_match, &j); +release_matches: + IPT_MATCH_ITERATE(e, compat_release_match, &j); return ret; } @@ -1574,15 +1601,26 @@ static int compat_copy_entry_from_user(struct ipt_entry *e, void **dstptr, return ret; } -static inline int compat_check_entry(struct ipt_entry *e, const char *name) +static inline int compat_check_entry(struct ipt_entry *e, const char *name, + unsigned int *i) { - int ret; + int j, ret; - ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip, e->comefrom); + j = 0; + ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip, e->comefrom, &j); if (ret) - return ret; + goto cleanup_matches; + + ret = check_target(e, name); + if (ret) + goto cleanup_matches; - return check_target(e, name); + (*i)++; + return 0; + + cleanup_matches: + IPT_MATCH_ITERATE(e, cleanup_match, &j); + return ret; } static int @@ -1673,10 +1711,17 @@ translate_compat_table(const char *name, if (!mark_source_chains(newinfo, valid_hooks, entry1)) goto free_newinfo; + i = 0; ret = IPT_ENTRY_ITERATE(entry1, newinfo->size, compat_check_entry, - name); - if (ret) - goto free_newinfo; + name, &i); + if (ret) { + j -= i; + IPT_ENTRY_ITERATE_CONTINUE(entry1, newinfo->size, i, + compat_release_entry, &j); + IPT_ENTRY_ITERATE(entry1, newinfo->size, cleanup_entry, &i); + xt_free_table_info(newinfo); + return ret; + } /* And one copy for every other CPU */ for_each_possible_cpu(i) @@ -1691,7 +1736,7 @@ translate_compat_table(const char *name, free_newinfo: xt_free_table_info(newinfo); out: - IPT_ENTRY_ITERATE(entry0, total_size, cleanup_entry, &j); + IPT_ENTRY_ITERATE(entry0, total_size, compat_release_entry, &j); return ret; out_unlock: compat_flush_offsets(); -- cgit v1.1 From c764c9ade6d9b710bad2b9c631ede9864333b98c Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Tue, 5 Jun 2007 12:56:53 -0700 Subject: [NETFILTER]: nf_conntrack_amanda: fix textsearch_prepare() error check The return value from textsearch_prepare() needs to be checked by IS_ERR(). Because it returns error code as a pointer. Cc: "Brian J. Murrell" Signed-off-by: Akinobu Mita Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/nf_conntrack_amanda.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/net/netfilter/nf_conntrack_amanda.c b/net/netfilter/nf_conntrack_amanda.c index b8869ea..0568f2e86 100644 --- a/net/netfilter/nf_conntrack_amanda.c +++ b/net/netfilter/nf_conntrack_amanda.c @@ -208,13 +208,14 @@ static int __init nf_conntrack_amanda_init(void) { int ret, i; - ret = -ENOMEM; for (i = 0; i < ARRAY_SIZE(search); i++) { search[i].ts = textsearch_prepare(ts_algo, search[i].string, search[i].len, GFP_KERNEL, TS_AUTOLOAD); - if (search[i].ts == NULL) + if (IS_ERR(search[i].ts)) { + ret = PTR_ERR(search[i].ts); goto err1; + } } ret = nf_conntrack_helper_register(&amanda_helper[0]); if (ret < 0) @@ -227,10 +228,9 @@ static int __init nf_conntrack_amanda_init(void) err2: nf_conntrack_helper_unregister(&amanda_helper[0]); err1: - for (; i >= 0; i--) { - if (search[i].ts) - textsearch_destroy(search[i].ts); - } + while (--i >= 0) + textsearch_destroy(search[i].ts); + return ret; } -- cgit v1.1 From 3c0d2f3780fc94746c4842e965bd2570e2119bb6 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 5 Jun 2007 13:10:29 -0700 Subject: [AF_UNIX]: Fix stream recvmsg() race. A recv() on an AF_UNIX, SOCK_STREAM socket can race with a send()+close() on the peer, causing recv() to return zero, even though the sent data should be received. This happens if the send() and the close() is performed between skb_dequeue() and checking sk->sk_shutdown in unix_stream_recvmsg(): process A skb_dequeue() returns NULL, there's no data in the socket queue process B new data is inserted onto the queue by unix_stream_sendmsg() process B sk->sk_shutdown is set to SHUTDOWN_MASK by unix_release_sock() process A sk->sk_shutdown is checked, unix_release_sock() returns zero I'm surprised nobody noticed this, it's not hard to trigger. Maybe it's just (un)luck with the timing. It's possible to work around this bug in userspace, by retrying the recv() once in case of a zero return value. Signed-off-by: Miklos Szeredi Signed-off-by: David S. Miller --- net/unix/af_unix.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 87c794d8..d70fa30 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1744,20 +1744,23 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, int chunk; struct sk_buff *skb; + unix_state_lock(sk); skb = skb_dequeue(&sk->sk_receive_queue); if (skb==NULL) { if (copied >= target) - break; + goto unlock; /* * POSIX 1003.1g mandates this order. */ if ((err = sock_error(sk)) != 0) - break; + goto unlock; if (sk->sk_shutdown & RCV_SHUTDOWN) - break; + goto unlock; + + unix_state_unlock(sk); err = -EAGAIN; if (!timeo) break; @@ -1771,7 +1774,11 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, } mutex_lock(&u->readlock); continue; + unlock: + unix_state_unlock(sk); + break; } + unix_state_unlock(sk); if (check_creds) { /* Never glue messages from different writers */ -- cgit v1.1 From df2bc459a3ad71f8b44c358bf7169acf9caf4acd Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 5 Jun 2007 15:18:43 -0700 Subject: [UDP]: Revert 2-pass hashing changes. This reverts changesets: 6aaf47fa48d3c44280810b1b470261d340e4ed87 b7b5f487ab39bc10ed0694af35651a03d9cb97ff de34ed91c4ffa4727964a832c46e624dd1495cf5 fc038410b4b1643766f8033f4940bcdb1dace633 There are still some correctness issues recently discovered which do not have a known fix that doesn't involve doing a full hash table scan on port bind. So revert for now. Signed-off-by: David S. Miller --- include/net/udp.h | 9 +- include/net/udplite.h | 2 +- net/ipv4/udp.c | 241 ++++++++++++++------------------------------------ net/ipv4/udp_impl.h | 6 +- net/ipv4/udplite.c | 7 +- net/ipv6/udp.c | 21 +---- net/ipv6/udp_impl.h | 2 - net/ipv6/udplite.c | 2 +- 8 files changed, 78 insertions(+), 212 deletions(-) diff --git a/include/net/udp.h b/include/net/udp.h index 496f89d..98755eb 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -119,16 +119,9 @@ static inline void udp_lib_close(struct sock *sk, long timeout) } -struct udp_get_port_ops { - int (*saddr_cmp)(const struct sock *sk1, const struct sock *sk2); - int (*saddr_any)(const struct sock *sk); - unsigned int (*hash_port_and_rcv_saddr)(__u16 port, - const struct sock *sk); -}; - /* net/ipv4/udp.c */ extern int udp_get_port(struct sock *sk, unsigned short snum, - const struct udp_get_port_ops *ops); + int (*saddr_cmp)(const struct sock *, const struct sock *)); extern void udp_err(struct sk_buff *, u32); extern int udp_sendmsg(struct kiocb *iocb, struct sock *sk, diff --git a/include/net/udplite.h b/include/net/udplite.h index 50b4b42..635b0ea 100644 --- a/include/net/udplite.h +++ b/include/net/udplite.h @@ -120,5 +120,5 @@ static inline __wsum udplite_csum_outgoing(struct sock *sk, struct sk_buff *skb) extern void udplite4_register(void); extern int udplite_get_port(struct sock *sk, unsigned short snum, - const struct udp_get_port_ops *ops); + int (*scmp)(const struct sock *, const struct sock *)); #endif /* _UDPLITE_H */ diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 5da703e..facb7e2 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -114,36 +114,14 @@ DEFINE_RWLOCK(udp_hash_lock); static int udp_port_rover; -/* - * Note about this hash function : - * Typical use is probably daddr = 0, only dport is going to vary hash - */ -static inline unsigned int udp_hash_port(__u16 port) -{ - return port; -} - -static inline int __udp_lib_port_inuse(unsigned int hash, int port, - const struct sock *this_sk, - struct hlist_head udptable[], - const struct udp_get_port_ops *ops) +static inline int __udp_lib_lport_inuse(__u16 num, struct hlist_head udptable[]) { struct sock *sk; struct hlist_node *node; - struct inet_sock *inet; - sk_for_each(sk, node, &udptable[hash & (UDP_HTABLE_SIZE - 1)]) { - if (sk->sk_hash != hash) - continue; - inet = inet_sk(sk); - if (inet->num != port) - continue; - if (this_sk) { - if (ops->saddr_cmp(sk, this_sk)) - return 1; - } else if (ops->saddr_any(sk)) + sk_for_each(sk, node, &udptable[num & (UDP_HTABLE_SIZE - 1)]) + if (sk->sk_hash == num) return 1; - } return 0; } @@ -154,16 +132,16 @@ static inline int __udp_lib_port_inuse(unsigned int hash, int port, * @snum: port number to look up * @udptable: hash list table, must be of UDP_HTABLE_SIZE * @port_rover: pointer to record of last unallocated port - * @ops: AF-dependent address operations + * @saddr_comp: AF-dependent comparison of bound local IP addresses */ int __udp_lib_get_port(struct sock *sk, unsigned short snum, struct hlist_head udptable[], int *port_rover, - const struct udp_get_port_ops *ops) + int (*saddr_comp)(const struct sock *sk1, + const struct sock *sk2 ) ) { struct hlist_node *node; struct hlist_head *head; struct sock *sk2; - unsigned int hash; int error = 1; write_lock_bh(&udp_hash_lock); @@ -178,8 +156,7 @@ int __udp_lib_get_port(struct sock *sk, unsigned short snum, for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) { int size; - hash = ops->hash_port_and_rcv_saddr(result, sk); - head = &udptable[hash & (UDP_HTABLE_SIZE - 1)]; + head = &udptable[result & (UDP_HTABLE_SIZE - 1)]; if (hlist_empty(head)) { if (result > sysctl_local_port_range[1]) result = sysctl_local_port_range[0] + @@ -204,16 +181,7 @@ int __udp_lib_get_port(struct sock *sk, unsigned short snum, result = sysctl_local_port_range[0] + ((result - sysctl_local_port_range[0]) & (UDP_HTABLE_SIZE - 1)); - hash = udp_hash_port(result); - if (__udp_lib_port_inuse(hash, result, - NULL, udptable, ops)) - continue; - if (ops->saddr_any(sk)) - break; - - hash = ops->hash_port_and_rcv_saddr(result, sk); - if (! __udp_lib_port_inuse(hash, result, - sk, udptable, ops)) + if (! __udp_lib_lport_inuse(result, udptable)) break; } if (i >= (1 << 16) / UDP_HTABLE_SIZE) @@ -221,40 +189,21 @@ int __udp_lib_get_port(struct sock *sk, unsigned short snum, gotit: *port_rover = snum = result; } else { - hash = udp_hash_port(snum); - head = &udptable[hash & (UDP_HTABLE_SIZE - 1)]; + head = &udptable[snum & (UDP_HTABLE_SIZE - 1)]; sk_for_each(sk2, node, head) - if (sk2->sk_hash == hash && - sk2 != sk && - inet_sk(sk2)->num == snum && - (!sk2->sk_reuse || !sk->sk_reuse) && - (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if || - sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && - ops->saddr_cmp(sk, sk2)) + if (sk2->sk_hash == snum && + sk2 != sk && + (!sk2->sk_reuse || !sk->sk_reuse) && + (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if + || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && + (*saddr_comp)(sk, sk2) ) goto fail; - - if (!ops->saddr_any(sk)) { - hash = ops->hash_port_and_rcv_saddr(snum, sk); - head = &udptable[hash & (UDP_HTABLE_SIZE - 1)]; - - sk_for_each(sk2, node, head) - if (sk2->sk_hash == hash && - sk2 != sk && - inet_sk(sk2)->num == snum && - (!sk2->sk_reuse || !sk->sk_reuse) && - (!sk2->sk_bound_dev_if || - !sk->sk_bound_dev_if || - sk2->sk_bound_dev_if == - sk->sk_bound_dev_if) && - ops->saddr_cmp(sk, sk2)) - goto fail; - } } inet_sk(sk)->num = snum; - sk->sk_hash = hash; + sk->sk_hash = snum; if (sk_unhashed(sk)) { - head = &udptable[hash & (UDP_HTABLE_SIZE - 1)]; + head = &udptable[snum & (UDP_HTABLE_SIZE - 1)]; sk_add_node(sk, head); sock_prot_inc_use(sk->sk_prot); } @@ -265,12 +214,12 @@ fail: } int udp_get_port(struct sock *sk, unsigned short snum, - const struct udp_get_port_ops *ops) + int (*scmp)(const struct sock *, const struct sock *)) { - return __udp_lib_get_port(sk, snum, udp_hash, &udp_port_rover, ops); + return __udp_lib_get_port(sk, snum, udp_hash, &udp_port_rover, scmp); } -static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) +int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) { struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2); @@ -279,33 +228,9 @@ static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) inet1->rcv_saddr == inet2->rcv_saddr )); } -static int ipv4_rcv_saddr_any(const struct sock *sk) -{ - return !inet_sk(sk)->rcv_saddr; -} - -static inline unsigned int ipv4_hash_port_and_addr(__u16 port, __be32 addr) -{ - addr ^= addr >> 16; - addr ^= addr >> 8; - return port ^ addr; -} - -static unsigned int ipv4_hash_port_and_rcv_saddr(__u16 port, - const struct sock *sk) -{ - return ipv4_hash_port_and_addr(port, inet_sk(sk)->rcv_saddr); -} - -const struct udp_get_port_ops udp_ipv4_ops = { - .saddr_cmp = ipv4_rcv_saddr_equal, - .saddr_any = ipv4_rcv_saddr_any, - .hash_port_and_rcv_saddr = ipv4_hash_port_and_rcv_saddr, -}; - static inline int udp_v4_get_port(struct sock *sk, unsigned short snum) { - return udp_get_port(sk, snum, &udp_ipv4_ops); + return udp_get_port(sk, snum, ipv4_rcv_saddr_equal); } /* UDP is nearly always wildcards out the wazoo, it makes no sense to try @@ -317,77 +242,63 @@ static struct sock *__udp4_lib_lookup(__be32 saddr, __be16 sport, { struct sock *sk, *result = NULL; struct hlist_node *node; - unsigned int hash, hashwild; - int score, best = -1, hport = ntohs(dport); - - hash = ipv4_hash_port_and_addr(hport, daddr); - hashwild = udp_hash_port(hport); + unsigned short hnum = ntohs(dport); + int badness = -1; read_lock(&udp_hash_lock); - -lookup: - - sk_for_each(sk, node, &udptable[hash & (UDP_HTABLE_SIZE - 1)]) { + sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) { struct inet_sock *inet = inet_sk(sk); - if (sk->sk_hash != hash || ipv6_only_sock(sk) || - inet->num != hport) - continue; - - score = (sk->sk_family == PF_INET ? 1 : 0); - if (inet->rcv_saddr) { - if (inet->rcv_saddr != daddr) - continue; - score+=2; - } - if (inet->daddr) { - if (inet->daddr != saddr) - continue; - score+=2; - } - if (inet->dport) { - if (inet->dport != sport) - continue; - score+=2; - } - if (sk->sk_bound_dev_if) { - if (sk->sk_bound_dev_if != dif) - continue; - score+=2; - } - if (score == 9) { - result = sk; - goto found; - } else if (score > best) { - result = sk; - best = score; + if (sk->sk_hash == hnum && !ipv6_only_sock(sk)) { + int score = (sk->sk_family == PF_INET ? 1 : 0); + if (inet->rcv_saddr) { + if (inet->rcv_saddr != daddr) + continue; + score+=2; + } + if (inet->daddr) { + if (inet->daddr != saddr) + continue; + score+=2; + } + if (inet->dport) { + if (inet->dport != sport) + continue; + score+=2; + } + if (sk->sk_bound_dev_if) { + if (sk->sk_bound_dev_if != dif) + continue; + score+=2; + } + if (score == 9) { + result = sk; + break; + } else if (score > badness) { + result = sk; + badness = score; + } } } - - if (hash != hashwild) { - hash = hashwild; - goto lookup; - } -found: if (result) sock_hold(result); read_unlock(&udp_hash_lock); return result; } -static inline struct sock *udp_v4_mcast_next(struct sock *sk, unsigned int hnum, - int hport, __be32 loc_addr, +static inline struct sock *udp_v4_mcast_next(struct sock *sk, + __be16 loc_port, __be32 loc_addr, __be16 rmt_port, __be32 rmt_addr, int dif) { struct hlist_node *node; struct sock *s = sk; + unsigned short hnum = ntohs(loc_port); sk_for_each_from(s, node) { struct inet_sock *inet = inet_sk(s); if (s->sk_hash != hnum || - inet->num != hport || (inet->daddr && inet->daddr != rmt_addr) || (inet->dport != rmt_port && inet->dport) || (inet->rcv_saddr && inet->rcv_saddr != loc_addr) || @@ -1221,45 +1132,29 @@ static int __udp4_lib_mcast_deliver(struct sk_buff *skb, __be32 saddr, __be32 daddr, struct hlist_head udptable[]) { - struct sock *sk, *skw, *sknext; + struct sock *sk; int dif; - int hport = ntohs(uh->dest); - unsigned int hash = ipv4_hash_port_and_addr(hport, daddr); - unsigned int hashwild = udp_hash_port(hport); - - dif = skb->dev->ifindex; read_lock(&udp_hash_lock); - - sk = sk_head(&udptable[hash & (UDP_HTABLE_SIZE - 1)]); - skw = sk_head(&udptable[hashwild & (UDP_HTABLE_SIZE - 1)]); - - sk = udp_v4_mcast_next(sk, hash, hport, daddr, uh->source, saddr, dif); - if (!sk) { - hash = hashwild; - sk = udp_v4_mcast_next(skw, hash, hport, daddr, uh->source, - saddr, dif); - } + sk = sk_head(&udptable[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]); + dif = skb->dev->ifindex; + sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif); if (sk) { + struct sock *sknext = NULL; + do { struct sk_buff *skb1 = skb; - sknext = udp_v4_mcast_next(sk_next(sk), hash, hport, - daddr, uh->source, saddr, dif); - if (!sknext && hash != hashwild) { - hash = hashwild; - sknext = udp_v4_mcast_next(skw, hash, hport, - daddr, uh->source, saddr, dif); - } + + sknext = udp_v4_mcast_next(sk_next(sk), uh->dest, daddr, + uh->source, saddr, dif); if (sknext) skb1 = skb_clone(skb, GFP_ATOMIC); if (skb1) { int ret = udp_queue_rcv_skb(sk, skb1); if (ret > 0) - /* - * we should probably re-process - * instead of dropping packets here. - */ + /* we should probably re-process instead + * of dropping packets here. */ kfree_skb(skb1); } sk = sknext; @@ -1346,7 +1241,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], return __udp4_lib_mcast_deliver(skb, uh, saddr, daddr, udptable); sk = __udp4_lib_lookup(saddr, uh->source, daddr, uh->dest, - skb->dev->ifindex, udptable); + skb->dev->ifindex, udptable ); if (sk != NULL) { int ret = udp_queue_rcv_skb(sk, skb); diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h index 06d9419..820a477 100644 --- a/net/ipv4/udp_impl.h +++ b/net/ipv4/udp_impl.h @@ -5,14 +5,14 @@ #include #include -extern const struct udp_get_port_ops udp_ipv4_ops; - extern int __udp4_lib_rcv(struct sk_buff *, struct hlist_head [], int ); extern void __udp4_lib_err(struct sk_buff *, u32, struct hlist_head []); extern int __udp_lib_get_port(struct sock *sk, unsigned short snum, struct hlist_head udptable[], int *port_rover, - const struct udp_get_port_ops *ops); + int (*)(const struct sock*,const struct sock*)); +extern int ipv4_rcv_saddr_equal(const struct sock *, const struct sock *); + extern int udp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, int optlen); diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c index 3653b32..f34fd68 100644 --- a/net/ipv4/udplite.c +++ b/net/ipv4/udplite.c @@ -19,15 +19,14 @@ struct hlist_head udplite_hash[UDP_HTABLE_SIZE]; static int udplite_port_rover; int udplite_get_port(struct sock *sk, unsigned short p, - const struct udp_get_port_ops *ops) + int (*c)(const struct sock *, const struct sock *)) { - return __udp_lib_get_port(sk, p, udplite_hash, - &udplite_port_rover, ops); + return __udp_lib_get_port(sk, p, udplite_hash, &udplite_port_rover, c); } static int udplite_v4_get_port(struct sock *sk, unsigned short snum) { - return udplite_get_port(sk, snum, &udp_ipv4_ops); + return udplite_get_port(sk, snum, ipv4_rcv_saddr_equal); } static int udplite_rcv(struct sk_buff *skb) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index d1fbddd..4210951 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -52,28 +52,9 @@ DEFINE_SNMP_STAT(struct udp_mib, udp_stats_in6) __read_mostly; -static int ipv6_rcv_saddr_any(const struct sock *sk) -{ - struct ipv6_pinfo *np = inet6_sk(sk); - - return ipv6_addr_any(&np->rcv_saddr); -} - -static unsigned int ipv6_hash_port_and_rcv_saddr(__u16 port, - const struct sock *sk) -{ - return port; -} - -const struct udp_get_port_ops udp_ipv6_ops = { - .saddr_cmp = ipv6_rcv_saddr_equal, - .saddr_any = ipv6_rcv_saddr_any, - .hash_port_and_rcv_saddr = ipv6_hash_port_and_rcv_saddr, -}; - static inline int udp_v6_get_port(struct sock *sk, unsigned short snum) { - return udp_get_port(sk, snum, &udp_ipv6_ops); + return udp_get_port(sk, snum, ipv6_rcv_saddr_equal); } static struct sock *__udp6_lib_lookup(struct in6_addr *saddr, __be16 sport, diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h index 36b0c11..6e252f3 100644 --- a/net/ipv6/udp_impl.h +++ b/net/ipv6/udp_impl.h @@ -6,8 +6,6 @@ #include #include -extern const struct udp_get_port_ops udp_ipv6_ops; - extern int __udp6_lib_rcv(struct sk_buff **, struct hlist_head [], int ); extern void __udp6_lib_err(struct sk_buff *, struct inet6_skb_parm *, int , int , int , __be32 , struct hlist_head []); diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c index c40a513..f54016a 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite.c @@ -37,7 +37,7 @@ static struct inet6_protocol udplitev6_protocol = { static int udplite_v6_get_port(struct sock *sk, unsigned short snum) { - return udplite_get_port(sk, snum, &udp_ipv6_ops); + return udplite_get_port(sk, snum, ipv6_rcv_saddr_equal); } struct proto udplitev6_prot = { -- cgit v1.1 From 7c355f532dd43036622e1880c114773463bafd23 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 5 Jun 2007 16:03:03 -0700 Subject: [NET]: Avoid duplicate netlink notification when changing link state When changing the link state from userspace not affecting any other flags. Two duplicate notification are being sent, once as action in the NETDEV_UP/NETDEV_DOWN notification chain and a second time when comparing old and new device flags after the change has been completed. Although harmless, the duplicates should be avoided. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- net/core/dev.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 5a7f20f..2609062 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2577,7 +2577,7 @@ unsigned dev_get_flags(const struct net_device *dev) int dev_change_flags(struct net_device *dev, unsigned flags) { - int ret; + int ret, changes; int old_flags = dev->flags; /* @@ -2632,8 +2632,10 @@ int dev_change_flags(struct net_device *dev, unsigned flags) dev_set_allmulti(dev, inc); } - if (old_flags ^ dev->flags) - rtmsg_ifinfo(RTM_NEWLINK, dev, old_flags ^ dev->flags); + /* Exclude state transition flags, already notified */ + changes = (old_flags ^ dev->flags) & ~(IFF_UP | IFF_RUNNING); + if (changes) + rtmsg_ifinfo(RTM_NEWLINK, dev, changes); return ret; } -- cgit v1.1 From b00b4bf94edb42852d55619af453588b2de2dc5e Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 5 Jun 2007 16:06:59 -0700 Subject: [NET_SCHED]: Fix filter double free cbq and atm destroy their filters twice when destroying inner classes during qdisc destruction. Reported-and-tested-by: Strobl Anton Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/sched/sch_atm.c | 1 + net/sched/sch_cbq.c | 8 +++++--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index be7d299..d1c383f 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -599,6 +599,7 @@ static void atm_tc_destroy(struct Qdisc *sch) /* races ? */ while ((flow = p->flows)) { tcf_destroy_chain(flow->filter_list); + flow->filter_list = NULL; if (flow->ref > 1) printk(KERN_ERR "atm_destroy: %p->ref = %d\n",flow, flow->ref); diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index a294542..ee2d596 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -1748,10 +1748,12 @@ cbq_destroy(struct Qdisc* sch) * classes from root to leafs which means that filters can still * be bound to classes which have been destroyed already. --TGR '04 */ - for (h = 0; h < 16; h++) - for (cl = q->classes[h]; cl; cl = cl->next) + for (h = 0; h < 16; h++) { + for (cl = q->classes[h]; cl; cl = cl->next) { tcf_destroy_chain(cl->filter_list); - + cl->filter_list = NULL; + } + } for (h = 0; h < 16; h++) { struct cbq_class *next; -- cgit v1.1 From 4aa2e62c45b5ca08be2d0d3c0744d7585b56e860 Mon Sep 17 00:00:00 2001 From: Joy Latten Date: Mon, 4 Jun 2007 19:05:57 -0400 Subject: xfrm: Add security check before flushing SAD/SPD Currently we check for permission before deleting entries from SAD and SPD, (see security_xfrm_policy_delete() security_xfrm_state_delete()) However we are not checking for authorization when flushing the SPD and the SAD completely. It was perhaps missed in the original security hooks patch. This patch adds a security check when flushing entries from the SAD and SPD. It runs the entire database and checks each entry for a denial. If the process attempting the flush is unable to remove all of the entries a denial is logged the the flush function returns an error without removing anything. This is particularly useful when a process may need to create or delete its own xfrm entries used for things like labeled networking but that same process should not be able to delete other entries or flush the entire database. Signed-off-by: Joy Latten Signed-off-by: Eric Paris Signed-off-by: James Morris --- include/net/xfrm.h | 6 ++--- net/key/af_key.c | 10 ++++++-- net/xfrm/xfrm_policy.c | 63 +++++++++++++++++++++++++++++++++++++++++++++++--- net/xfrm/xfrm_state.c | 46 +++++++++++++++++++++++++++++++++--- net/xfrm/xfrm_user.c | 9 ++++++-- 5 files changed, 121 insertions(+), 13 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 90185e8..311f25a 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -964,7 +964,7 @@ struct xfrmk_spdinfo { extern struct xfrm_state *xfrm_find_acq_byseq(u32 seq); extern int xfrm_state_delete(struct xfrm_state *x); -extern void xfrm_state_flush(u8 proto, struct xfrm_audit *audit_info); +extern int xfrm_state_flush(u8 proto, struct xfrm_audit *audit_info); extern void xfrm_sad_getinfo(struct xfrmk_sadinfo *si); extern void xfrm_spd_getinfo(struct xfrmk_spdinfo *si); extern int xfrm_replay_check(struct xfrm_state *x, __be32 seq); @@ -1020,13 +1020,13 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir, struct xfrm_sec_ctx *ctx, int delete, int *err); struct xfrm_policy *xfrm_policy_byid(u8, int dir, u32 id, int delete, int *err); -void xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info); +int xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info); u32 xfrm_get_acqseq(void); void xfrm_alloc_spi(struct xfrm_state *x, __be32 minspi, __be32 maxspi); struct xfrm_state * xfrm_find_acq(u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create, unsigned short family); -extern void xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info); +extern int xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info); extern int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol); extern int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *xdst, struct flowi *fl, int family, int strict); diff --git a/net/key/af_key.c b/net/key/af_key.c index d302dda..0f8304b 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -1682,6 +1682,7 @@ static int pfkey_flush(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hd unsigned proto; struct km_event c; struct xfrm_audit audit_info; + int err; proto = pfkey_satype2proto(hdr->sadb_msg_satype); if (proto == 0) @@ -1689,7 +1690,9 @@ static int pfkey_flush(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hd audit_info.loginuid = audit_get_loginuid(current->audit_context); audit_info.secid = 0; - xfrm_state_flush(proto, &audit_info); + err = xfrm_state_flush(proto, &audit_info); + if (err) + return err; c.data.proto = proto; c.seq = hdr->sadb_msg_seq; c.pid = hdr->sadb_msg_pid; @@ -2683,10 +2686,13 @@ static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, struct sadb_msg { struct km_event c; struct xfrm_audit audit_info; + int err; audit_info.loginuid = audit_get_loginuid(current->audit_context); audit_info.secid = 0; - xfrm_policy_flush(XFRM_POLICY_TYPE_MAIN, &audit_info); + err = xfrm_policy_flush(XFRM_POLICY_TYPE_MAIN, &audit_info); + if (err) + return err; c.data.type = XFRM_POLICY_TYPE_MAIN; c.event = XFRM_MSG_FLUSHPOLICY; c.pid = hdr->sadb_msg_pid; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 64a3751..157bfbd 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -834,11 +834,67 @@ struct xfrm_policy *xfrm_policy_byid(u8 type, int dir, u32 id, int delete, } EXPORT_SYMBOL(xfrm_policy_byid); -void xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info) +#ifdef CONFIG_SECURITY_NETWORK_XFRM +static inline int +xfrm_policy_flush_secctx_check(u8 type, struct xfrm_audit *audit_info) { - int dir; + int dir, err = 0; + + for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { + struct xfrm_policy *pol; + struct hlist_node *entry; + int i; + + hlist_for_each_entry(pol, entry, + &xfrm_policy_inexact[dir], bydst) { + if (pol->type != type) + continue; + err = security_xfrm_policy_delete(pol); + if (err) { + xfrm_audit_log(audit_info->loginuid, + audit_info->secid, + AUDIT_MAC_IPSEC_DELSPD, 0, + pol, NULL); + return err; + } + } + for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) { + hlist_for_each_entry(pol, entry, + xfrm_policy_bydst[dir].table + i, + bydst) { + if (pol->type != type) + continue; + err = security_xfrm_policy_delete(pol); + if (err) { + xfrm_audit_log(audit_info->loginuid, + audit_info->secid, + AUDIT_MAC_IPSEC_DELSPD, + 0, pol, NULL); + return err; + } + } + } + } + return err; +} +#else +static inline int +xfrm_policy_flush_secctx_check(u8 type, struct xfrm_audit *audit_info) +{ + return 0; +} +#endif + +int xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info) +{ + int dir, err = 0; write_lock_bh(&xfrm_policy_lock); + + err = xfrm_policy_flush_secctx_check(type, audit_info); + if (err) + goto out; + for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { struct xfrm_policy *pol; struct hlist_node *entry; @@ -891,7 +947,9 @@ void xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info) xfrm_policy_count[dir] -= killed; } atomic_inc(&flow_cache_genid); +out: write_unlock_bh(&xfrm_policy_lock); + return err; } EXPORT_SYMBOL(xfrm_policy_flush); @@ -2583,4 +2641,3 @@ restore_state: } EXPORT_SYMBOL(xfrm_migrate); #endif - diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 372f06e..85f3f43 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -391,12 +391,48 @@ int xfrm_state_delete(struct xfrm_state *x) } EXPORT_SYMBOL(xfrm_state_delete); -void xfrm_state_flush(u8 proto, struct xfrm_audit *audit_info) +#ifdef CONFIG_SECURITY_NETWORK_XFRM +static inline int +xfrm_state_flush_secctx_check(u8 proto, struct xfrm_audit *audit_info) { - int i; - int err = 0; + int i, err = 0; + + for (i = 0; i <= xfrm_state_hmask; i++) { + struct hlist_node *entry; + struct xfrm_state *x; + + hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) { + if (xfrm_id_proto_match(x->id.proto, proto) && + (err = security_xfrm_state_delete(x)) != 0) { + xfrm_audit_log(audit_info->loginuid, + audit_info->secid, + AUDIT_MAC_IPSEC_DELSA, + 0, NULL, x); + + return err; + } + } + } + + return err; +} +#else +static inline int +xfrm_state_flush_secctx_check(u8 proto, struct xfrm_audit *audit_info) +{ + return 0; +} +#endif + +int xfrm_state_flush(u8 proto, struct xfrm_audit *audit_info) +{ + int i, err = 0; spin_lock_bh(&xfrm_state_lock); + err = xfrm_state_flush_secctx_check(proto, audit_info); + if (err) + goto out; + for (i = 0; i <= xfrm_state_hmask; i++) { struct hlist_node *entry; struct xfrm_state *x; @@ -419,8 +455,12 @@ restart: } } } + err = 0; + +out: spin_unlock_bh(&xfrm_state_lock); wake_up(&km_waitq); + return err; } EXPORT_SYMBOL(xfrm_state_flush); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index b14c7e5..c06883b 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1418,10 +1418,13 @@ static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh, struct km_event c; struct xfrm_usersa_flush *p = NLMSG_DATA(nlh); struct xfrm_audit audit_info; + int err; audit_info.loginuid = NETLINK_CB(skb).loginuid; audit_info.secid = NETLINK_CB(skb).sid; - xfrm_state_flush(p->proto, &audit_info); + err = xfrm_state_flush(p->proto, &audit_info); + if (err) + return err; c.data.proto = p->proto; c.event = nlh->nlmsg_type; c.seq = nlh->nlmsg_seq; @@ -1582,7 +1585,9 @@ static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh, audit_info.loginuid = NETLINK_CB(skb).loginuid; audit_info.secid = NETLINK_CB(skb).sid; - xfrm_policy_flush(type, &audit_info); + err = xfrm_policy_flush(type, &audit_info); + if (err) + return err; c.data.type = type; c.event = nlh->nlmsg_type; c.seq = nlh->nlmsg_seq; -- cgit v1.1