diff options
Diffstat (limited to 'net')
223 files changed, 6432 insertions, 2624 deletions
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index c7a581a..d24c464 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -18,6 +18,8 @@ * 2 of the License, or (at your option) any later version. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/capability.h> #include <linux/module.h> #include <linux/netdevice.h> @@ -149,13 +151,13 @@ int vlan_check_real_dev(struct net_device *real_dev, u16 vlan_id) const struct net_device_ops *ops = real_dev->netdev_ops; if (real_dev->features & NETIF_F_VLAN_CHALLENGED) { - pr_info("8021q: VLANs not supported on %s\n", name); + pr_info("VLANs not supported on %s\n", name); return -EOPNOTSUPP; } if ((real_dev->features & NETIF_F_HW_VLAN_FILTER) && (!ops->ndo_vlan_rx_add_vid || !ops->ndo_vlan_rx_kill_vid)) { - pr_info("8021q: Device %s has buggy VLAN hw accel\n", name); + pr_info("Device %s has buggy VLAN hw accel\n", name); return -EOPNOTSUPP; } @@ -205,7 +207,7 @@ int register_vlan_dev(struct net_device *dev) grp->nr_vlans++; if (ngrp) { - if (ops->ndo_vlan_rx_register) + if (ops->ndo_vlan_rx_register && (real_dev->features & NETIF_F_HW_VLAN_RX)) ops->ndo_vlan_rx_register(real_dev, ngrp); rcu_assign_pointer(real_dev->vlgrp, ngrp); } @@ -344,13 +346,12 @@ static void __vlan_device_event(struct net_device *dev, unsigned long event) case NETDEV_CHANGENAME: vlan_proc_rem_dev(dev); if (vlan_proc_add_dev(dev) < 0) - pr_warning("8021q: failed to change proc name for %s\n", - dev->name); + pr_warn("failed to change proc name for %s\n", + dev->name); break; case NETDEV_REGISTER: if (vlan_proc_add_dev(dev) < 0) - pr_warning("8021q: failed to add proc entry for %s\n", - dev->name); + pr_warn("failed to add proc entry for %s\n", dev->name); break; case NETDEV_UNREGISTER: vlan_proc_rem_dev(dev); @@ -374,7 +375,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, if ((event == NETDEV_UP) && (dev->features & NETIF_F_HW_VLAN_FILTER) && dev->netdev_ops->ndo_vlan_rx_add_vid) { - pr_info("8021q: adding VLAN 0 to HW filter on device %s\n", + pr_info("adding VLAN 0 to HW filter on device %s\n", dev->name); dev->netdev_ops->ndo_vlan_rx_add_vid(dev, 0); } diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 41495dc2..fcc6846 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -23,6 +23,31 @@ bool vlan_do_receive(struct sk_buff **skbp) return false; skb->dev = vlan_dev; + if (skb->pkt_type == PACKET_OTHERHOST) { + /* Our lower layer thinks this is not local, let's make sure. + * This allows the VLAN to have a different MAC than the + * underlying device, and still route correctly. */ + if (!compare_ether_addr(eth_hdr(skb)->h_dest, + vlan_dev->dev_addr)) + skb->pkt_type = PACKET_HOST; + } + + if (!(vlan_dev_info(vlan_dev)->flags & VLAN_FLAG_REORDER_HDR)) { + unsigned int offset = skb->data - skb_mac_header(skb); + + /* + * vlan_insert_tag expect skb->data pointing to mac header. + * So change skb->data before calling it and change back to + * original position later + */ + skb_push(skb, offset); + skb = *skbp = vlan_insert_tag(skb, skb->vlan_tci); + if (!skb) + return false; + skb_pull(skb, offset + VLAN_HLEN); + skb_reset_mac_len(skb); + } + skb->priority = vlan_get_ingress_priority(vlan_dev, skb->vlan_tci); skb->vlan_tci = 0; @@ -31,22 +56,8 @@ bool vlan_do_receive(struct sk_buff **skbp) u64_stats_update_begin(&rx_stats->syncp); rx_stats->rx_packets++; rx_stats->rx_bytes += skb->len; - - switch (skb->pkt_type) { - case PACKET_BROADCAST: - break; - case PACKET_MULTICAST: + if (skb->pkt_type == PACKET_MULTICAST) rx_stats->rx_multicast++; - break; - case PACKET_OTHERHOST: - /* Our lower layer thinks this is not local, let's make sure. - * This allows the VLAN to have a different MAC than the - * underlying device, and still route correctly. */ - if (!compare_ether_addr(eth_hdr(skb)->h_dest, - vlan_dev->dev_addr)) - skb->pkt_type = PACKET_HOST; - break; - } u64_stats_update_end(&rx_stats->syncp); return true; @@ -89,18 +100,13 @@ gro_result_t vlan_gro_frags(struct napi_struct *napi, struct vlan_group *grp, } EXPORT_SYMBOL(vlan_gro_frags); -static struct sk_buff *vlan_check_reorder_header(struct sk_buff *skb) +static struct sk_buff *vlan_reorder_header(struct sk_buff *skb) { - if (vlan_dev_info(skb->dev)->flags & VLAN_FLAG_REORDER_HDR) { - if (skb_cow(skb, skb_headroom(skb)) < 0) - skb = NULL; - if (skb) { - /* Lifted from Gleb's VLAN code... */ - memmove(skb->data - ETH_HLEN, - skb->data - VLAN_ETH_HLEN, 12); - skb->mac_header += VLAN_HLEN; - } - } + if (skb_cow(skb, skb_headroom(skb)) < 0) + return NULL; + memmove(skb->data - ETH_HLEN, skb->data - VLAN_ETH_HLEN, 2 * ETH_ALEN); + skb->mac_header += VLAN_HLEN; + skb_reset_mac_len(skb); return skb; } @@ -161,7 +167,7 @@ struct sk_buff *vlan_untag(struct sk_buff *skb) skb_pull_rcsum(skb, VLAN_HLEN); vlan_set_encap_proto(skb, vhdr); - skb = vlan_check_reorder_header(skb); + skb = vlan_reorder_header(skb); if (unlikely(!skb)) goto err_free; diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index f247f5b..1c9aa8c 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -20,6 +20,8 @@ * 2 of the License, or (at your option) any later version. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/module.h> #include <linux/slab.h> #include <linux/skbuff.h> @@ -55,7 +57,7 @@ static int vlan_dev_rebuild_header(struct sk_buff *skb) return arp_find(veth->h_dest, skb); #endif default: - pr_debug("%s: unable to resolve type %X addresses.\n", + pr_debug("%s: unable to resolve type %X addresses\n", dev->name, ntohs(veth->h_vlan_encapsulated_proto)); memcpy(veth->h_source, dev->dev_addr, ETH_ALEN); @@ -165,7 +167,7 @@ static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb, u64_stats_update_begin(&stats->syncp); stats->tx_packets++; stats->tx_bytes += len; - u64_stats_update_begin(&stats->syncp); + u64_stats_update_end(&stats->syncp); } else { this_cpu_inc(vlan_dev_info(dev)->vlan_pcpu_stats->tx_dropped); } diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c index d940c49..d34b6da 100644 --- a/net/8021q/vlanproc.c +++ b/net/8021q/vlanproc.c @@ -17,6 +17,8 @@ * Jan 20, 1998 Ben Greear Initial Version *****************************************************************************/ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/module.h> #include <linux/errno.h> #include <linux/kernel.h> @@ -155,7 +157,7 @@ int __net_init vlan_proc_init(struct net *net) return 0; err: - pr_err("%s: can't create entry in proc filesystem!\n", __func__); + pr_err("can't create entry in proc filesystem!\n"); vlan_proc_cleanup(net); return -ENOBUFS; } @@ -229,7 +231,7 @@ static void *vlan_seq_next(struct seq_file *seq, void *v, loff_t *pos) ++*pos; - dev = (struct net_device *)v; + dev = v; if (v == SEQ_START_TOKEN) dev = net_device_entry(&net->dev_base_head); diff --git a/net/9p/Kconfig b/net/9p/Kconfig index 7ed75c7..d9ea09b 100644 --- a/net/9p/Kconfig +++ b/net/9p/Kconfig @@ -3,8 +3,8 @@ # menuconfig NET_9P - depends on NET && EXPERIMENTAL - tristate "Plan 9 Resource Sharing Support (9P2000) (Experimental)" + depends on NET + tristate "Plan 9 Resource Sharing Support (9P2000)" help If you say Y here, you will get experimental support for Plan 9 resource sharing via the 9P2000 protocol. @@ -16,8 +16,8 @@ menuconfig NET_9P if NET_9P config NET_9P_VIRTIO - depends on EXPERIMENTAL && VIRTIO - tristate "9P Virtio Transport (Experimental)" + depends on VIRTIO + tristate "9P Virtio Transport" help This builds support for a transports between guest partitions and a host partition. diff --git a/net/9p/client.c b/net/9p/client.c index ceab943..9e3b0e6 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -92,9 +92,6 @@ static int get_protocol_version(const substring_t *name) return version; } -static struct p9_req_t * -p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...); - /** * parse_options - parse mount options into client structure * @opts: options string passed from mount @@ -307,12 +304,13 @@ static int p9_tag_init(struct p9_client *c) c->tagpool = p9_idpool_create(); if (IS_ERR(c->tagpool)) { err = PTR_ERR(c->tagpool); - c->tagpool = NULL; goto error; } - - p9_idpool_get(c->tagpool); /* reserve tag 0 */ - + err = p9_idpool_get(c->tagpool); /* reserve tag 0 */ + if (err < 0) { + p9_idpool_destroy(c->tagpool); + goto error; + } c->max_tag = 0; error: return err; @@ -518,12 +516,15 @@ out_err: return err; } +static struct p9_req_t * +p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...); + /** * p9_client_flush - flush (cancel) a request * @c: client state * @oldreq: request to cancel * - * This sents a flush for a particular requests and links + * This sents a flush for a particular request and links * the flush request to the original request. The current * code only supports a single flush request although the protocol * allows for multiple flush requests to be sent for a single request. @@ -789,11 +790,13 @@ struct p9_client *p9_client_create(const char *dev_name, char *options) spin_lock_init(&clnt->lock); INIT_LIST_HEAD(&clnt->fidlist); - p9_tag_init(clnt); + err = p9_tag_init(clnt); + if (err < 0) + goto free_client; err = parse_opts(options, clnt); if (err < 0) - goto free_client; + goto destroy_tagpool; if (!clnt->trans_mod) clnt->trans_mod = v9fs_get_default_trans(); @@ -802,13 +805,12 @@ struct p9_client *p9_client_create(const char *dev_name, char *options) err = -EPROTONOSUPPORT; P9_DPRINTK(P9_DEBUG_ERROR, "No transport defined or default transport\n"); - goto free_client; + goto destroy_tagpool; } clnt->fidpool = p9_idpool_create(); if (IS_ERR(clnt->fidpool)) { err = PTR_ERR(clnt->fidpool); - clnt->fidpool = NULL; goto put_trans; } @@ -834,6 +836,8 @@ destroy_fidpool: p9_idpool_destroy(clnt->fidpool); put_trans: v9fs_put_trans(clnt->trans_mod); +destroy_tagpool: + p9_idpool_destroy(clnt->tagpool); free_client: kfree(clnt); return ERR_PTR(err); @@ -1298,7 +1302,7 @@ p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset, if (count < rsize) rsize = count; - /* Don't bother zerocopy form small IO (< 1024) */ + /* Don't bother zerocopy for small IO (< 1024) */ if (((clnt->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) == P9_TRANS_PREF_PAYLOAD_SEP) && (rsize > 1024)) { req = p9_client_rpc(clnt, P9_TREAD, "dqE", fid->fid, offset, diff --git a/net/9p/mod.c b/net/9p/mod.c index cf8a412..72c3982 100644 --- a/net/9p/mod.c +++ b/net/9p/mod.c @@ -139,7 +139,7 @@ void v9fs_put_trans(struct p9_trans_module *m) } /** - * v9fs_init - Initialize module + * init_p9 - Initialize module * */ static int __init init_p9(void) @@ -154,7 +154,7 @@ static int __init init_p9(void) } /** - * v9fs_init - shutdown module + * exit_p9 - shutdown module * */ diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index 4a90843..fdfdb57 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -916,8 +916,8 @@ p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args) sin_server.sin_family = AF_INET; sin_server.sin_addr.s_addr = in_aton(addr); sin_server.sin_port = htons(opts.port); - err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &csocket); - + err = __sock_create(read_pnet(¤t->nsproxy->net_ns), PF_INET, + SOCK_STREAM, IPPROTO_TCP, &csocket, 1); if (err) { P9_EPRINTK(KERN_ERR, "p9_trans_tcp: problem creating socket\n"); return err; @@ -954,7 +954,8 @@ p9_fd_create_unix(struct p9_client *client, const char *addr, char *args) sun_server.sun_family = PF_UNIX; strcpy(sun_server.sun_path, addr); - err = sock_create_kern(PF_UNIX, SOCK_STREAM, 0, &csocket); + err = __sock_create(read_pnet(¤t->nsproxy->net_ns), PF_UNIX, + SOCK_STREAM, 0, &csocket, 1); if (err < 0) { P9_EPRINTK(KERN_ERR, "p9_trans_unix: problem creating socket\n"); return err; diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c index 844a7a5..159c50f 100644 --- a/net/9p/trans_rdma.c +++ b/net/9p/trans_rdma.c @@ -589,7 +589,8 @@ rdma_create_trans(struct p9_client *client, const char *addr, char *args) return -ENOMEM; /* Create the RDMA CM ID */ - rdma->cm_id = rdma_create_id(p9_cm_event_handler, client, RDMA_PS_TCP); + rdma->cm_id = rdma_create_id(p9_cm_event_handler, client, RDMA_PS_TCP, + IB_QPT_RC); if (IS_ERR(rdma->cm_id)) goto error; diff --git a/net/9p/util.c b/net/9p/util.c index da6af81..9c1c934 100644 --- a/net/9p/util.c +++ b/net/9p/util.c @@ -93,7 +93,7 @@ int p9_idpool_get(struct p9_idpool *p) retry: if (idr_pre_get(&p->pool, GFP_NOFS) == 0) - return 0; + return -1; spin_lock_irqsave(&p->lock, flags); diff --git a/net/atm/lec.c b/net/atm/lec.c index 25073b6..ba48daa 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -1171,7 +1171,7 @@ static int __init lane_module_init(void) #endif register_atm_ioctl(&lane_ioctl_ops); - pr_info("lec.c: " __DATE__ " " __TIME__ " initialized\n"); + pr_info("lec.c: initialized\n"); return 0; } diff --git a/net/atm/mpc.c b/net/atm/mpc.c index 644cdf0..aa972409f 100644 --- a/net/atm/mpc.c +++ b/net/atm/mpc.c @@ -1005,7 +1005,7 @@ static int mpoa_event_listener(struct notifier_block *mpoa_notifier, struct mpoa_client *mpc; struct lec_priv *priv; - dev = (struct net_device *)dev_ptr; + dev = dev_ptr; if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; @@ -1482,7 +1482,7 @@ static __init int atm_mpoa_init(void) if (mpc_proc_init() != 0) pr_info("failed to initialize /proc/mpoa\n"); - pr_info("mpc.c: " __DATE__ " " __TIME__ " initialized\n"); + pr_info("mpc.c: initialized\n"); return 0; } diff --git a/net/atm/pppoatm.c b/net/atm/pppoatm.c index e9aced0..db4a11c 100644 --- a/net/atm/pppoatm.c +++ b/net/atm/pppoatm.c @@ -37,6 +37,7 @@ #include <linux/module.h> #include <linux/init.h> +#include <linux/interrupt.h> #include <linux/skbuff.h> #include <linux/slab.h> #include <linux/atm.h> diff --git a/net/batman-adv/Kconfig b/net/batman-adv/Kconfig index 6c051ad..2b68d06 100644 --- a/net/batman-adv/Kconfig +++ b/net/batman-adv/Kconfig @@ -5,6 +5,7 @@ config BATMAN_ADV tristate "B.A.T.M.A.N. Advanced Meshing Protocol" depends on NET + select CRC16 default n ---help--- diff --git a/net/batman-adv/aggregation.c b/net/batman-adv/aggregation.c index a8c3203..c583e04 100644 --- a/net/batman-adv/aggregation.c +++ b/net/batman-adv/aggregation.c @@ -20,24 +20,19 @@ */ #include "main.h" +#include "translation-table.h" #include "aggregation.h" #include "send.h" #include "routing.h" #include "hard-interface.h" -/* calculate the size of the tt information for a given packet */ -static int tt_len(struct batman_packet *batman_packet) -{ - return batman_packet->num_tt * ETH_ALEN; -} - /* return true if new_packet can be aggregated with forw_packet */ -static bool can_aggregate_with(struct batman_packet *new_batman_packet, +static bool can_aggregate_with(const struct batman_packet *new_batman_packet, int packet_len, unsigned long send_time, bool directlink, - struct hard_iface *if_incoming, - struct forw_packet *forw_packet) + const struct hard_iface *if_incoming, + const struct forw_packet *forw_packet) { struct batman_packet *batman_packet = (struct batman_packet *)forw_packet->skb->data; @@ -97,8 +92,9 @@ static bool can_aggregate_with(struct batman_packet *new_batman_packet, } /* create a new aggregated packet and add this packet to it */ -static void new_aggregated_packet(unsigned char *packet_buff, int packet_len, - unsigned long send_time, bool direct_link, +static void new_aggregated_packet(const unsigned char *packet_buff, + int packet_len, unsigned long send_time, + bool direct_link, struct hard_iface *if_incoming, int own_packet) { @@ -118,7 +114,7 @@ static void new_aggregated_packet(unsigned char *packet_buff, int packet_len, } } - forw_packet_aggr = kmalloc(sizeof(struct forw_packet), GFP_ATOMIC); + forw_packet_aggr = kmalloc(sizeof(*forw_packet_aggr), GFP_ATOMIC); if (!forw_packet_aggr) { if (!own_packet) atomic_inc(&bat_priv->batman_queue_left); @@ -150,7 +146,7 @@ static void new_aggregated_packet(unsigned char *packet_buff, int packet_len, forw_packet_aggr->own = own_packet; forw_packet_aggr->if_incoming = if_incoming; forw_packet_aggr->num_packets = 0; - forw_packet_aggr->direct_link_flags = 0; + forw_packet_aggr->direct_link_flags = NO_FLAGS; forw_packet_aggr->send_time = send_time; /* save packet direct link flag status */ @@ -176,8 +172,7 @@ out: /* aggregate a new packet into the existing aggregation */ static void aggregate(struct forw_packet *forw_packet_aggr, - unsigned char *packet_buff, - int packet_len, + const unsigned char *packet_buff, int packet_len, bool direct_link) { unsigned char *skb_buff; @@ -195,7 +190,7 @@ static void aggregate(struct forw_packet *forw_packet_aggr, void add_bat_packet_to_list(struct bat_priv *bat_priv, unsigned char *packet_buff, int packet_len, - struct hard_iface *if_incoming, char own_packet, + struct hard_iface *if_incoming, int own_packet, unsigned long send_time) { /** @@ -253,8 +248,9 @@ void add_bat_packet_to_list(struct bat_priv *bat_priv, } /* unpack the aggregated packets and process them one by one */ -void receive_aggr_bat_packet(struct ethhdr *ethhdr, unsigned char *packet_buff, - int packet_len, struct hard_iface *if_incoming) +void receive_aggr_bat_packet(const struct ethhdr *ethhdr, + unsigned char *packet_buff, int packet_len, + struct hard_iface *if_incoming) { struct batman_packet *batman_packet; int buff_pos = 0; @@ -263,18 +259,20 @@ void receive_aggr_bat_packet(struct ethhdr *ethhdr, unsigned char *packet_buff, batman_packet = (struct batman_packet *)packet_buff; do { - /* network to host order for our 32bit seqno, and the - orig_interval. */ + /* network to host order for our 32bit seqno and the + orig_interval */ batman_packet->seqno = ntohl(batman_packet->seqno); + batman_packet->tt_crc = ntohs(batman_packet->tt_crc); tt_buff = packet_buff + buff_pos + BAT_PACKET_LEN; - receive_bat_packet(ethhdr, batman_packet, - tt_buff, tt_len(batman_packet), - if_incoming); - buff_pos += BAT_PACKET_LEN + tt_len(batman_packet); + receive_bat_packet(ethhdr, batman_packet, tt_buff, if_incoming); + + buff_pos += BAT_PACKET_LEN + + tt_len(batman_packet->tt_num_changes); + batman_packet = (struct batman_packet *) (packet_buff + buff_pos); } while (aggregated_packet(buff_pos, packet_len, - batman_packet->num_tt)); + batman_packet->tt_num_changes)); } diff --git a/net/batman-adv/aggregation.h b/net/batman-adv/aggregation.h index 7e6d72f..216337b 100644 --- a/net/batman-adv/aggregation.h +++ b/net/batman-adv/aggregation.h @@ -25,9 +25,11 @@ #include "main.h" /* is there another aggregated packet here? */ -static inline int aggregated_packet(int buff_pos, int packet_len, int num_tt) +static inline int aggregated_packet(int buff_pos, int packet_len, + int tt_num_changes) { - int next_buff_pos = buff_pos + BAT_PACKET_LEN + (num_tt * ETH_ALEN); + int next_buff_pos = buff_pos + BAT_PACKET_LEN + (tt_num_changes * + sizeof(struct tt_change)); return (next_buff_pos <= packet_len) && (next_buff_pos <= MAX_AGGREGATION_BYTES); @@ -35,9 +37,10 @@ static inline int aggregated_packet(int buff_pos, int packet_len, int num_tt) void add_bat_packet_to_list(struct bat_priv *bat_priv, unsigned char *packet_buff, int packet_len, - struct hard_iface *if_incoming, char own_packet, + struct hard_iface *if_incoming, int own_packet, unsigned long send_time); -void receive_aggr_bat_packet(struct ethhdr *ethhdr, unsigned char *packet_buff, - int packet_len, struct hard_iface *if_incoming); +void receive_aggr_bat_packet(const struct ethhdr *ethhdr, + unsigned char *packet_buff, int packet_len, + struct hard_iface *if_incoming); #endif /* _NET_BATMAN_ADV_AGGREGATION_H_ */ diff --git a/net/batman-adv/bat_debugfs.c b/net/batman-adv/bat_debugfs.c index abaeec5..d0af9bf 100644 --- a/net/batman-adv/bat_debugfs.c +++ b/net/batman-adv/bat_debugfs.c @@ -50,7 +50,8 @@ static void emit_log_char(struct debug_log *debug_log, char c) debug_log->log_start = debug_log->log_end - log_buff_len; } -static int fdebug_log(struct debug_log *debug_log, char *fmt, ...) +__printf(2, 3) +static int fdebug_log(struct debug_log *debug_log, const char *fmt, ...) { va_list args; static char debug_log_buf[256]; @@ -74,14 +75,14 @@ static int fdebug_log(struct debug_log *debug_log, char *fmt, ...) return 0; } -int debug_log(struct bat_priv *bat_priv, char *fmt, ...) +int debug_log(struct bat_priv *bat_priv, const char *fmt, ...) { va_list args; char tmp_log_buf[256]; va_start(args, fmt); vscnprintf(tmp_log_buf, sizeof(tmp_log_buf), fmt, args); - fdebug_log(bat_priv->debug_log, "[%10u] %s", + fdebug_log(bat_priv->debug_log, "[%10lu] %s", (jiffies / HZ), tmp_log_buf); va_end(args); @@ -114,7 +115,7 @@ static ssize_t log_read(struct file *file, char __user *buf, !(debug_log->log_end - debug_log->log_start)) return -EAGAIN; - if ((!buf) || (count < 0)) + if (!buf) return -EINVAL; if (count == 0) @@ -184,7 +185,7 @@ static int debug_log_setup(struct bat_priv *bat_priv) if (!bat_priv->debug_dir) goto err; - bat_priv->debug_log = kzalloc(sizeof(struct debug_log), GFP_ATOMIC); + bat_priv->debug_log = kzalloc(sizeof(*bat_priv->debug_log), GFP_ATOMIC); if (!bat_priv->debug_log) goto err; diff --git a/net/batman-adv/bat_sysfs.c b/net/batman-adv/bat_sysfs.c index 497a070..cd15deb 100644 --- a/net/batman-adv/bat_sysfs.c +++ b/net/batman-adv/bat_sysfs.c @@ -28,9 +28,31 @@ #include "gateway_client.h" #include "vis.h" -#define to_dev(obj) container_of(obj, struct device, kobj) -#define kobj_to_netdev(obj) to_net_dev(to_dev(obj->parent)) -#define kobj_to_batpriv(obj) netdev_priv(kobj_to_netdev(obj)) +static struct net_device *kobj_to_netdev(struct kobject *obj) +{ + struct device *dev = container_of(obj->parent, struct device, kobj); + return to_net_dev(dev); +} + +static struct bat_priv *kobj_to_batpriv(struct kobject *obj) +{ + struct net_device *net_dev = kobj_to_netdev(obj); + return netdev_priv(net_dev); +} + +#define UEV_TYPE_VAR "BATTYPE=" +#define UEV_ACTION_VAR "BATACTION=" +#define UEV_DATA_VAR "BATDATA=" + +static char *uev_action_str[] = { + "add", + "del", + "change" +}; + +static char *uev_type_str[] = { + "gw" +}; /* Use this, if you have customized show and store functions */ #define BAT_ATTR(_name, _mode, _show, _store) \ @@ -96,7 +118,7 @@ ssize_t show_##_name(struct kobject *kobj, struct attribute *attr, \ static int store_bool_attr(char *buff, size_t count, struct net_device *net_dev, - char *attr_name, atomic_t *attr) + const char *attr_name, atomic_t *attr) { int enabled = -1; @@ -138,16 +160,15 @@ static inline ssize_t __store_bool_attr(char *buff, size_t count, { int ret; - ret = store_bool_attr(buff, count, net_dev, (char *)attr->name, - attr_store); + ret = store_bool_attr(buff, count, net_dev, attr->name, attr_store); if (post_func && ret) post_func(net_dev); return ret; } -static int store_uint_attr(char *buff, size_t count, - struct net_device *net_dev, char *attr_name, +static int store_uint_attr(const char *buff, size_t count, + struct net_device *net_dev, const char *attr_name, unsigned int min, unsigned int max, atomic_t *attr) { unsigned long uint_val; @@ -183,15 +204,15 @@ static int store_uint_attr(char *buff, size_t count, return count; } -static inline ssize_t __store_uint_attr(char *buff, size_t count, +static inline ssize_t __store_uint_attr(const char *buff, size_t count, int min, int max, void (*post_func)(struct net_device *), - struct attribute *attr, + const struct attribute *attr, atomic_t *attr_store, struct net_device *net_dev) { int ret; - ret = store_uint_attr(buff, count, net_dev, (char *)attr->name, + ret = store_uint_attr(buff, count, net_dev, attr->name, min, max, attr_store); if (post_func && ret) post_func(net_dev); @@ -368,7 +389,7 @@ BAT_ATTR_UINT(gw_sel_class, S_IRUGO | S_IWUSR, 1, TQ_MAX_VALUE, static BAT_ATTR(gw_bandwidth, S_IRUGO | S_IWUSR, show_gw_bwidth, store_gw_bwidth); #ifdef CONFIG_BATMAN_ADV_DEBUG -BAT_ATTR_UINT(log_level, S_IRUGO | S_IWUSR, 0, 3, NULL); +BAT_ATTR_UINT(log_level, S_IRUGO | S_IWUSR, 0, 7, NULL); #endif static struct bat_attribute *mesh_attrs[] = { @@ -594,3 +615,60 @@ void sysfs_del_hardif(struct kobject **hardif_obj) kobject_put(*hardif_obj); *hardif_obj = NULL; } + +int throw_uevent(struct bat_priv *bat_priv, enum uev_type type, + enum uev_action action, const char *data) +{ + int ret = -1; + struct hard_iface *primary_if = NULL; + struct kobject *bat_kobj; + char *uevent_env[4] = { NULL, NULL, NULL, NULL }; + + primary_if = primary_if_get_selected(bat_priv); + if (!primary_if) + goto out; + + bat_kobj = &primary_if->soft_iface->dev.kobj; + + uevent_env[0] = kmalloc(strlen(UEV_TYPE_VAR) + + strlen(uev_type_str[type]) + 1, + GFP_ATOMIC); + if (!uevent_env[0]) + goto out; + + sprintf(uevent_env[0], "%s%s", UEV_TYPE_VAR, uev_type_str[type]); + + uevent_env[1] = kmalloc(strlen(UEV_ACTION_VAR) + + strlen(uev_action_str[action]) + 1, + GFP_ATOMIC); + if (!uevent_env[1]) + goto out; + + sprintf(uevent_env[1], "%s%s", UEV_ACTION_VAR, uev_action_str[action]); + + /* If the event is DEL, ignore the data field */ + if (action != UEV_DEL) { + uevent_env[2] = kmalloc(strlen(UEV_DATA_VAR) + + strlen(data) + 1, GFP_ATOMIC); + if (!uevent_env[2]) + goto out; + + sprintf(uevent_env[2], "%s%s", UEV_DATA_VAR, data); + } + + ret = kobject_uevent_env(bat_kobj, KOBJ_CHANGE, uevent_env); +out: + kfree(uevent_env[0]); + kfree(uevent_env[1]); + kfree(uevent_env[2]); + + if (primary_if) + hardif_free_ref(primary_if); + + if (ret) + bat_dbg(DBG_BATMAN, bat_priv, "Impossible to send " + "uevent for (%s,%s,%s) event (err: %d)\n", + uev_type_str[type], uev_action_str[action], + (action == UEV_DEL ? "NULL" : data), ret); + return ret; +} diff --git a/net/batman-adv/bat_sysfs.h b/net/batman-adv/bat_sysfs.h index 02f1fa7..a3f75a7 100644 --- a/net/batman-adv/bat_sysfs.h +++ b/net/batman-adv/bat_sysfs.h @@ -38,5 +38,7 @@ int sysfs_add_meshif(struct net_device *dev); void sysfs_del_meshif(struct net_device *dev); int sysfs_add_hardif(struct kobject **hardif_obj, struct net_device *dev); void sysfs_del_hardif(struct kobject **hardif_obj); +int throw_uevent(struct bat_priv *bat_priv, enum uev_type type, + enum uev_action action, const char *data); #endif /* _NET_BATMAN_ADV_SYSFS_H_ */ diff --git a/net/batman-adv/bitarray.c b/net/batman-adv/bitarray.c index ad2ca92..c1f4bfc 100644 --- a/net/batman-adv/bitarray.c +++ b/net/batman-adv/bitarray.c @@ -26,8 +26,8 @@ /* returns true if the corresponding bit in the given seq_bits indicates true * and curr_seqno is within range of last_seqno */ -uint8_t get_bit_status(unsigned long *seq_bits, uint32_t last_seqno, - uint32_t curr_seqno) +int get_bit_status(const unsigned long *seq_bits, uint32_t last_seqno, + uint32_t curr_seqno) { int32_t diff, word_offset, word_num; @@ -127,10 +127,10 @@ static void bit_reset_window(unsigned long *seq_bits) * 1 if the window was moved (either new or very old) * 0 if the window was not moved/shifted. */ -char bit_get_packet(void *priv, unsigned long *seq_bits, - int32_t seq_num_diff, int8_t set_mark) +int bit_get_packet(void *priv, unsigned long *seq_bits, + int32_t seq_num_diff, int set_mark) { - struct bat_priv *bat_priv = (struct bat_priv *)priv; + struct bat_priv *bat_priv = priv; /* sequence number is slightly older. We already got a sequence number * higher than this one, so we just mark it. */ @@ -190,7 +190,7 @@ char bit_get_packet(void *priv, unsigned long *seq_bits, /* count the hamming weight, how many good packets did we receive? just count * the 1's. */ -int bit_packet_count(unsigned long *seq_bits) +int bit_packet_count(const unsigned long *seq_bits) { int i, hamming = 0; diff --git a/net/batman-adv/bitarray.h b/net/batman-adv/bitarray.h index 769c246..9c04422 100644 --- a/net/batman-adv/bitarray.h +++ b/net/batman-adv/bitarray.h @@ -26,8 +26,8 @@ /* returns true if the corresponding bit in the given seq_bits indicates true * and curr_seqno is within range of last_seqno */ -uint8_t get_bit_status(unsigned long *seq_bits, uint32_t last_seqno, - uint32_t curr_seqno); +int get_bit_status(const unsigned long *seq_bits, uint32_t last_seqno, + uint32_t curr_seqno); /* turn corresponding bit on, so we can remember that we got the packet */ void bit_mark(unsigned long *seq_bits, int32_t n); @@ -35,10 +35,10 @@ void bit_mark(unsigned long *seq_bits, int32_t n); /* receive and process one packet, returns 1 if received seq_num is considered * new, 0 if old */ -char bit_get_packet(void *priv, unsigned long *seq_bits, - int32_t seq_num_diff, int8_t set_mark); +int bit_get_packet(void *priv, unsigned long *seq_bits, + int32_t seq_num_diff, int set_mark); /* count the hamming weight, how many good packets did we receive? */ -int bit_packet_count(unsigned long *seq_bits); +int bit_packet_count(const unsigned long *seq_bits); #endif /* _NET_BATMAN_ADV_BITARRAY_H_ */ diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index 61605a0..8b25b52 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -20,15 +20,22 @@ */ #include "main.h" +#include "bat_sysfs.h" #include "gateway_client.h" #include "gateway_common.h" #include "hard-interface.h" #include "originator.h" +#include "routing.h" #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/udp.h> #include <linux/if_vlan.h> +/* This is the offset of the options field in a dhcp packet starting at + * the beginning of the dhcp header */ +#define DHCP_OPTIONS_OFFSET 240 +#define DHCP_REQUEST 3 + static void gw_node_free_ref(struct gw_node *gw_node) { if (atomic_dec_and_test(&gw_node->refcount)) @@ -86,7 +93,7 @@ static void gw_select(struct bat_priv *bat_priv, struct gw_node *new_gw_node) if (new_gw_node && !atomic_inc_not_zero(&new_gw_node->refcount)) new_gw_node = NULL; - curr_gw_node = bat_priv->curr_gw; + curr_gw_node = rcu_dereference_protected(bat_priv->curr_gw, 1); rcu_assign_pointer(bat_priv->curr_gw, new_gw_node); if (curr_gw_node) @@ -97,40 +104,19 @@ static void gw_select(struct bat_priv *bat_priv, struct gw_node *new_gw_node) void gw_deselect(struct bat_priv *bat_priv) { - gw_select(bat_priv, NULL); + atomic_set(&bat_priv->gw_reselect, 1); } -void gw_election(struct bat_priv *bat_priv) +static struct gw_node *gw_get_best_gw_node(struct bat_priv *bat_priv) { - struct hlist_node *node; - struct gw_node *gw_node, *curr_gw = NULL, *curr_gw_tmp = NULL; struct neigh_node *router; - uint8_t max_tq = 0; + struct hlist_node *node; + struct gw_node *gw_node, *curr_gw = NULL; uint32_t max_gw_factor = 0, tmp_gw_factor = 0; + uint8_t max_tq = 0; int down, up; - /** - * The batman daemon checks here if we already passed a full originator - * cycle in order to make sure we don't choose the first gateway we - * hear about. This check is based on the daemon's uptime which we - * don't have. - **/ - if (atomic_read(&bat_priv->gw_mode) != GW_MODE_CLIENT) - return; - - curr_gw = gw_get_selected_gw_node(bat_priv); - if (curr_gw) - goto out; - rcu_read_lock(); - if (hlist_empty(&bat_priv->gw_list)) { - bat_dbg(DBG_BATMAN, bat_priv, - "Removing selected gateway - " - "no gateway in range\n"); - gw_deselect(bat_priv); - goto unlock; - } - hlist_for_each_entry_rcu(gw_node, node, &bat_priv->gw_list, list) { if (gw_node->deleted) continue; @@ -139,6 +125,9 @@ void gw_election(struct bat_priv *bat_priv) if (!router) continue; + if (!atomic_inc_not_zero(&gw_node->refcount)) + goto next; + switch (atomic_read(&bat_priv->gw_sel_class)) { case 1: /* fast connection */ gw_bandwidth_to_kbit(gw_node->orig_node->gw_flags, @@ -151,8 +140,12 @@ void gw_election(struct bat_priv *bat_priv) if ((tmp_gw_factor > max_gw_factor) || ((tmp_gw_factor == max_gw_factor) && - (router->tq_avg > max_tq))) - curr_gw_tmp = gw_node; + (router->tq_avg > max_tq))) { + if (curr_gw) + gw_node_free_ref(curr_gw); + curr_gw = gw_node; + atomic_inc(&curr_gw->refcount); + } break; default: /** @@ -163,8 +156,12 @@ void gw_election(struct bat_priv *bat_priv) * soon as a better gateway appears which has * $routing_class more tq points) **/ - if (router->tq_avg > max_tq) - curr_gw_tmp = gw_node; + if (router->tq_avg > max_tq) { + if (curr_gw) + gw_node_free_ref(curr_gw); + curr_gw = gw_node; + atomic_inc(&curr_gw->refcount); + } break; } @@ -174,42 +171,81 @@ void gw_election(struct bat_priv *bat_priv) if (tmp_gw_factor > max_gw_factor) max_gw_factor = tmp_gw_factor; + gw_node_free_ref(gw_node); + +next: neigh_node_free_ref(router); } + rcu_read_unlock(); - if (curr_gw != curr_gw_tmp) { - router = orig_node_get_router(curr_gw_tmp->orig_node); - if (!router) - goto unlock; + return curr_gw; +} - if ((curr_gw) && (!curr_gw_tmp)) - bat_dbg(DBG_BATMAN, bat_priv, - "Removing selected gateway - " - "no gateway in range\n"); - else if ((!curr_gw) && (curr_gw_tmp)) - bat_dbg(DBG_BATMAN, bat_priv, - "Adding route to gateway %pM " - "(gw_flags: %i, tq: %i)\n", - curr_gw_tmp->orig_node->orig, - curr_gw_tmp->orig_node->gw_flags, - router->tq_avg); - else - bat_dbg(DBG_BATMAN, bat_priv, - "Changing route to gateway %pM " - "(gw_flags: %i, tq: %i)\n", - curr_gw_tmp->orig_node->orig, - curr_gw_tmp->orig_node->gw_flags, - router->tq_avg); +void gw_election(struct bat_priv *bat_priv) +{ + struct gw_node *curr_gw = NULL, *next_gw = NULL; + struct neigh_node *router = NULL; + char gw_addr[18] = { '\0' }; - neigh_node_free_ref(router); - gw_select(bat_priv, curr_gw_tmp); + /** + * The batman daemon checks here if we already passed a full originator + * cycle in order to make sure we don't choose the first gateway we + * hear about. This check is based on the daemon's uptime which we + * don't have. + **/ + if (atomic_read(&bat_priv->gw_mode) != GW_MODE_CLIENT) + goto out; + + if (!atomic_dec_not_zero(&bat_priv->gw_reselect)) + goto out; + + curr_gw = gw_get_selected_gw_node(bat_priv); + + next_gw = gw_get_best_gw_node(bat_priv); + + if (curr_gw == next_gw) + goto out; + + if (next_gw) { + sprintf(gw_addr, "%pM", next_gw->orig_node->orig); + + router = orig_node_get_router(next_gw->orig_node); + if (!router) { + gw_deselect(bat_priv); + goto out; + } } -unlock: - rcu_read_unlock(); + if ((curr_gw) && (!next_gw)) { + bat_dbg(DBG_BATMAN, bat_priv, + "Removing selected gateway - no gateway in range\n"); + throw_uevent(bat_priv, UEV_GW, UEV_DEL, NULL); + } else if ((!curr_gw) && (next_gw)) { + bat_dbg(DBG_BATMAN, bat_priv, + "Adding route to gateway %pM (gw_flags: %i, tq: %i)\n", + next_gw->orig_node->orig, + next_gw->orig_node->gw_flags, + router->tq_avg); + throw_uevent(bat_priv, UEV_GW, UEV_ADD, gw_addr); + } else { + bat_dbg(DBG_BATMAN, bat_priv, + "Changing route to gateway %pM " + "(gw_flags: %i, tq: %i)\n", + next_gw->orig_node->orig, + next_gw->orig_node->gw_flags, + router->tq_avg); + throw_uevent(bat_priv, UEV_GW, UEV_CHANGE, gw_addr); + } + + gw_select(bat_priv, next_gw); + out: if (curr_gw) gw_node_free_ref(curr_gw); + if (next_gw) + gw_node_free_ref(next_gw); + if (router) + neigh_node_free_ref(router); } void gw_check_election(struct bat_priv *bat_priv, struct orig_node *orig_node) @@ -273,11 +309,10 @@ static void gw_node_add(struct bat_priv *bat_priv, struct gw_node *gw_node; int down, up; - gw_node = kmalloc(sizeof(struct gw_node), GFP_ATOMIC); + gw_node = kzalloc(sizeof(*gw_node), GFP_ATOMIC); if (!gw_node) return; - memset(gw_node, 0, sizeof(struct gw_node)); INIT_HLIST_NODE(&gw_node->list); gw_node->orig_node = orig_node; atomic_set(&gw_node->refcount, 1); @@ -323,7 +358,7 @@ void gw_node_update(struct bat_priv *bat_priv, gw_node->deleted = 0; - if (new_gwflags == 0) { + if (new_gwflags == NO_FLAGS) { gw_node->deleted = jiffies; bat_dbg(DBG_BATMAN, bat_priv, "Gateway %pM removed from gateway list\n", @@ -336,7 +371,7 @@ void gw_node_update(struct bat_priv *bat_priv, goto unlock; } - if (new_gwflags == 0) + if (new_gwflags == NO_FLAGS) goto unlock; gw_node_add(bat_priv, orig_node, new_gwflags); @@ -353,7 +388,7 @@ unlock: void gw_node_delete(struct bat_priv *bat_priv, struct orig_node *orig_node) { - return gw_node_update(bat_priv, orig_node, 0); + gw_node_update(bat_priv, orig_node, 0); } void gw_node_purge(struct bat_priv *bat_priv) @@ -361,7 +396,7 @@ void gw_node_purge(struct bat_priv *bat_priv) struct gw_node *gw_node, *curr_gw; struct hlist_node *node, *node_tmp; unsigned long timeout = 2 * PURGE_TIMEOUT * HZ; - char do_deselect = 0; + int do_deselect = 0; curr_gw = gw_get_selected_gw_node(bat_priv); @@ -394,8 +429,8 @@ void gw_node_purge(struct bat_priv *bat_priv) /** * fails if orig_node has no router */ -static int _write_buffer_text(struct bat_priv *bat_priv, - struct seq_file *seq, struct gw_node *gw_node) +static int _write_buffer_text(struct bat_priv *bat_priv, struct seq_file *seq, + const struct gw_node *gw_node) { struct gw_node *curr_gw; struct neigh_node *router; @@ -480,14 +515,75 @@ out: return ret; } -int gw_is_target(struct bat_priv *bat_priv, struct sk_buff *skb) +static bool is_type_dhcprequest(struct sk_buff *skb, int header_len) +{ + int ret = false; + unsigned char *p; + int pkt_len; + + if (skb_linearize(skb) < 0) + goto out; + + pkt_len = skb_headlen(skb); + + if (pkt_len < header_len + DHCP_OPTIONS_OFFSET + 1) + goto out; + + p = skb->data + header_len + DHCP_OPTIONS_OFFSET; + pkt_len -= header_len + DHCP_OPTIONS_OFFSET + 1; + + /* Access the dhcp option lists. Each entry is made up by: + * - octect 1: option type + * - octect 2: option data len (only if type != 255 and 0) + * - octect 3: option data */ + while (*p != 255 && !ret) { + /* p now points to the first octect: option type */ + if (*p == 53) { + /* type 53 is the message type option. + * Jump the len octect and go to the data octect */ + if (pkt_len < 2) + goto out; + p += 2; + + /* check if the message type is what we need */ + if (*p == DHCP_REQUEST) + ret = true; + break; + } else if (*p == 0) { + /* option type 0 (padding), just go forward */ + if (pkt_len < 1) + goto out; + pkt_len--; + p++; + } else { + /* This is any other option. So we get the length... */ + if (pkt_len < 1) + goto out; + pkt_len--; + p++; + + /* ...and then we jump over the data */ + if (pkt_len < *p) + goto out; + pkt_len -= *p; + p += (*p); + } + } +out: + return ret; +} + +int gw_is_target(struct bat_priv *bat_priv, struct sk_buff *skb, + struct orig_node *old_gw) { struct ethhdr *ethhdr; struct iphdr *iphdr; struct ipv6hdr *ipv6hdr; struct udphdr *udphdr; struct gw_node *curr_gw; + struct neigh_node *neigh_curr = NULL, *neigh_old = NULL; unsigned int header_len = 0; + int ret = 1; if (atomic_read(&bat_priv->gw_mode) == GW_MODE_OFF) return 0; @@ -509,7 +605,7 @@ int gw_is_target(struct bat_priv *bat_priv, struct sk_buff *skb) /* check for ip header */ switch (ntohs(ethhdr->h_proto)) { case ETH_P_IP: - if (!pskb_may_pull(skb, header_len + sizeof(struct iphdr))) + if (!pskb_may_pull(skb, header_len + sizeof(*iphdr))) return 0; iphdr = (struct iphdr *)(skb->data + header_len); header_len += iphdr->ihl * 4; @@ -520,10 +616,10 @@ int gw_is_target(struct bat_priv *bat_priv, struct sk_buff *skb) break; case ETH_P_IPV6: - if (!pskb_may_pull(skb, header_len + sizeof(struct ipv6hdr))) + if (!pskb_may_pull(skb, header_len + sizeof(*ipv6hdr))) return 0; ipv6hdr = (struct ipv6hdr *)(skb->data + header_len); - header_len += sizeof(struct ipv6hdr); + header_len += sizeof(*ipv6hdr); /* check for udp header */ if (ipv6hdr->nexthdr != IPPROTO_UDP) @@ -534,10 +630,10 @@ int gw_is_target(struct bat_priv *bat_priv, struct sk_buff *skb) return 0; } - if (!pskb_may_pull(skb, header_len + sizeof(struct udphdr))) + if (!pskb_may_pull(skb, header_len + sizeof(*udphdr))) return 0; udphdr = (struct udphdr *)(skb->data + header_len); - header_len += sizeof(struct udphdr); + header_len += sizeof(*udphdr); /* check for bootp port */ if ((ntohs(ethhdr->h_proto) == ETH_P_IP) && @@ -555,7 +651,30 @@ int gw_is_target(struct bat_priv *bat_priv, struct sk_buff *skb) if (!curr_gw) return 0; + /* If old_gw != NULL then this packet is unicast. + * So, at this point we have to check the message type: if it is a + * DHCPREQUEST we have to decide whether to drop it or not */ + if (old_gw && curr_gw->orig_node != old_gw) { + if (is_type_dhcprequest(skb, header_len)) { + /* If the dhcp packet has been sent to a different gw, + * we have to evaluate whether the old gw is still + * reliable enough */ + neigh_curr = find_router(bat_priv, curr_gw->orig_node, + NULL); + neigh_old = find_router(bat_priv, old_gw, NULL); + if (!neigh_curr || !neigh_old) + goto free_neigh; + if (neigh_curr->tq_avg - neigh_old->tq_avg < + GW_THRESHOLD) + ret = -1; + } + } +free_neigh: + if (neigh_old) + neigh_node_free_ref(neigh_old); + if (neigh_curr) + neigh_node_free_ref(neigh_curr); if (curr_gw) gw_node_free_ref(curr_gw); - return 1; + return ret; } diff --git a/net/batman-adv/gateway_client.h b/net/batman-adv/gateway_client.h index 1ce8c60..b9b983c 100644 --- a/net/batman-adv/gateway_client.h +++ b/net/batman-adv/gateway_client.h @@ -31,6 +31,7 @@ void gw_node_update(struct bat_priv *bat_priv, void gw_node_delete(struct bat_priv *bat_priv, struct orig_node *orig_node); void gw_node_purge(struct bat_priv *bat_priv); int gw_client_seq_print_text(struct seq_file *seq, void *offset); -int gw_is_target(struct bat_priv *bat_priv, struct sk_buff *skb); +int gw_is_target(struct bat_priv *bat_priv, struct sk_buff *skb, + struct orig_node *old_gw); #endif /* _NET_BATMAN_ADV_GATEWAY_CLIENT_H_ */ diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c index 50d3a59..18661af 100644 --- a/net/batman-adv/gateway_common.c +++ b/net/batman-adv/gateway_common.c @@ -61,9 +61,9 @@ static void kbit_to_gw_bandwidth(int down, int up, long *gw_srv_class) /* returns the up and downspeeds in kbit, calculated from the class */ void gw_bandwidth_to_kbit(uint8_t gw_srv_class, int *down, int *up) { - char sbit = (gw_srv_class & 0x80) >> 7; - char dpart = (gw_srv_class & 0x78) >> 3; - char upart = (gw_srv_class & 0x07); + int sbit = (gw_srv_class & 0x80) >> 7; + int dpart = (gw_srv_class & 0x78) >> 3; + int upart = (gw_srv_class & 0x07); if (!gw_srv_class) { *down = 0; @@ -76,10 +76,11 @@ void gw_bandwidth_to_kbit(uint8_t gw_srv_class, int *down, int *up) } static bool parse_gw_bandwidth(struct net_device *net_dev, char *buff, - long *up, long *down) + int *up, int *down) { int ret, multi = 1; char *slash_ptr, *tmp_ptr; + long ldown, lup; slash_ptr = strchr(buff, '/'); if (slash_ptr) @@ -96,7 +97,7 @@ static bool parse_gw_bandwidth(struct net_device *net_dev, char *buff, *tmp_ptr = '\0'; } - ret = strict_strtoul(buff, 10, down); + ret = strict_strtol(buff, 10, &ldown); if (ret) { bat_err(net_dev, "Download speed of gateway mode invalid: %s\n", @@ -104,7 +105,7 @@ static bool parse_gw_bandwidth(struct net_device *net_dev, char *buff, return false; } - *down *= multi; + *down = ldown * multi; /* we also got some upload info */ if (slash_ptr) { @@ -121,7 +122,7 @@ static bool parse_gw_bandwidth(struct net_device *net_dev, char *buff, *tmp_ptr = '\0'; } - ret = strict_strtoul(slash_ptr + 1, 10, up); + ret = strict_strtol(slash_ptr + 1, 10, &lup); if (ret) { bat_err(net_dev, "Upload speed of gateway mode invalid: " @@ -129,7 +130,7 @@ static bool parse_gw_bandwidth(struct net_device *net_dev, char *buff, return false; } - *up *= multi; + *up = lup * multi; } return true; @@ -138,7 +139,8 @@ static bool parse_gw_bandwidth(struct net_device *net_dev, char *buff, ssize_t gw_bandwidth_set(struct net_device *net_dev, char *buff, size_t count) { struct bat_priv *bat_priv = netdev_priv(net_dev); - long gw_bandwidth_tmp = 0, up = 0, down = 0; + long gw_bandwidth_tmp = 0; + int up = 0, down = 0; bool ret; ret = parse_gw_bandwidth(net_dev, buff, &up, &down); @@ -158,12 +160,11 @@ ssize_t gw_bandwidth_set(struct net_device *net_dev, char *buff, size_t count) * speeds, hence we need to calculate it back to show the number * that is going to be propagated **/ - gw_bandwidth_to_kbit((uint8_t)gw_bandwidth_tmp, - (int *)&down, (int *)&up); + gw_bandwidth_to_kbit((uint8_t)gw_bandwidth_tmp, &down, &up); gw_deselect(bat_priv); bat_info(net_dev, "Changing gateway bandwidth from: '%i' to: '%ld' " - "(propagating: %ld%s/%ld%s)\n", + "(propagating: %d%s/%d%s)\n", atomic_read(&bat_priv->gw_bandwidth), gw_bandwidth_tmp, (down > 2048 ? down / 1024 : down), (down > 2048 ? "MBit" : "KBit"), diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index dfbfccc..db7aacf 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -46,7 +46,7 @@ void hardif_free_rcu(struct rcu_head *rcu) kfree(hard_iface); } -struct hard_iface *hardif_get_by_netdev(struct net_device *net_dev) +struct hard_iface *hardif_get_by_netdev(const struct net_device *net_dev) { struct hard_iface *hard_iface; @@ -64,7 +64,7 @@ out: return hard_iface; } -static int is_valid_iface(struct net_device *net_dev) +static int is_valid_iface(const struct net_device *net_dev) { if (net_dev->flags & IFF_LOOPBACK) return 0; @@ -86,7 +86,7 @@ static int is_valid_iface(struct net_device *net_dev) return 1; } -static struct hard_iface *hardif_get_active(struct net_device *soft_iface) +static struct hard_iface *hardif_get_active(const struct net_device *soft_iface) { struct hard_iface *hard_iface; @@ -138,7 +138,7 @@ static void primary_if_select(struct bat_priv *bat_priv, if (new_hard_iface && !atomic_inc_not_zero(&new_hard_iface->refcount)) new_hard_iface = NULL; - curr_hard_iface = bat_priv->primary_if; + curr_hard_iface = rcu_dereference_protected(bat_priv->primary_if, 1); rcu_assign_pointer(bat_priv->primary_if, new_hard_iface); if (curr_hard_iface) @@ -152,15 +152,9 @@ static void primary_if_select(struct bat_priv *bat_priv, batman_packet->ttl = TTL; primary_if_update_addr(bat_priv); - - /*** - * hacky trick to make sure that we send the TT information via - * our new primary interface - */ - atomic_set(&bat_priv->tt_local_changed, 1); } -static bool hardif_is_iface_up(struct hard_iface *hard_iface) +static bool hardif_is_iface_up(const struct hard_iface *hard_iface) { if (hard_iface->net_dev->flags & IFF_UP) return true; @@ -176,9 +170,9 @@ static void update_mac_addresses(struct hard_iface *hard_iface) hard_iface->net_dev->dev_addr, ETH_ALEN); } -static void check_known_mac_addr(struct net_device *net_dev) +static void check_known_mac_addr(const struct net_device *net_dev) { - struct hard_iface *hard_iface; + const struct hard_iface *hard_iface; rcu_read_lock(); list_for_each_entry_rcu(hard_iface, &hardif_list, list) { @@ -204,8 +198,8 @@ static void check_known_mac_addr(struct net_device *net_dev) int hardif_min_mtu(struct net_device *soft_iface) { - struct bat_priv *bat_priv = netdev_priv(soft_iface); - struct hard_iface *hard_iface; + const struct bat_priv *bat_priv = netdev_priv(soft_iface); + const struct hard_iface *hard_iface; /* allow big frames if all devices are capable to do so * (have MTU > 1500 + BAT_HEADER_LEN) */ int min_mtu = ETH_DATA_LEN; @@ -285,7 +279,8 @@ static void hardif_deactivate_interface(struct hard_iface *hard_iface) update_min_mtu(hard_iface->soft_iface); } -int hardif_enable_interface(struct hard_iface *hard_iface, char *iface_name) +int hardif_enable_interface(struct hard_iface *hard_iface, + const char *iface_name) { struct bat_priv *bat_priv; struct batman_packet *batman_packet; @@ -336,10 +331,11 @@ int hardif_enable_interface(struct hard_iface *hard_iface, char *iface_name) batman_packet = (struct batman_packet *)(hard_iface->packet_buff); batman_packet->packet_type = BAT_PACKET; batman_packet->version = COMPAT_VERSION; - batman_packet->flags = 0; + batman_packet->flags = NO_FLAGS; batman_packet->ttl = 2; batman_packet->tq = TQ_MAX_VALUE; - batman_packet->num_tt = 0; + batman_packet->tt_num_changes = 0; + batman_packet->ttvn = 0; hard_iface->if_num = bat_priv->num_ifaces; bat_priv->num_ifaces++; @@ -458,7 +454,7 @@ static struct hard_iface *hardif_add_interface(struct net_device *net_dev) dev_hold(net_dev); - hard_iface = kmalloc(sizeof(struct hard_iface), GFP_ATOMIC); + hard_iface = kmalloc(sizeof(*hard_iface), GFP_ATOMIC); if (!hard_iface) { pr_err("Can't add interface (%s): out of memory\n", net_dev->name); @@ -522,7 +518,7 @@ void hardif_remove_interfaces(void) static int hard_if_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *net_dev = (struct net_device *)ptr; + struct net_device *net_dev = ptr; struct hard_iface *hard_iface = hardif_get_by_netdev(net_dev); struct hard_iface *primary_if = NULL; struct bat_priv *bat_priv; @@ -567,7 +563,7 @@ static int hard_if_event(struct notifier_block *this, break; default: break; - }; + } hardif_put: hardif_free_ref(hard_iface); @@ -658,6 +654,14 @@ static int batman_skb_recv(struct sk_buff *skb, struct net_device *dev, case BAT_VIS: ret = recv_vis_packet(skb, hard_iface); break; + /* Translation table query (request or response) */ + case BAT_TT_QUERY: + ret = recv_tt_query(skb, hard_iface); + break; + /* Roaming advertisement */ + case BAT_ROAM_ADV: + ret = recv_roam_adv(skb, hard_iface); + break; default: ret = NET_RX_DROP; } diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h index 6426599..442eacb 100644 --- a/net/batman-adv/hard-interface.h +++ b/net/batman-adv/hard-interface.h @@ -22,17 +22,21 @@ #ifndef _NET_BATMAN_ADV_HARD_INTERFACE_H_ #define _NET_BATMAN_ADV_HARD_INTERFACE_H_ -#define IF_NOT_IN_USE 0 -#define IF_TO_BE_REMOVED 1 -#define IF_INACTIVE 2 -#define IF_ACTIVE 3 -#define IF_TO_BE_ACTIVATED 4 -#define IF_I_WANT_YOU 5 +enum hard_if_state { + IF_NOT_IN_USE, + IF_TO_BE_REMOVED, + IF_INACTIVE, + IF_ACTIVE, + IF_TO_BE_ACTIVATED, + IF_I_WANT_YOU +}; extern struct notifier_block hard_if_notifier; -struct hard_iface *hardif_get_by_netdev(struct net_device *net_dev); -int hardif_enable_interface(struct hard_iface *hard_iface, char *iface_name); +struct hard_iface* +hardif_get_by_netdev(const struct net_device *net_dev); +int hardif_enable_interface(struct hard_iface *hard_iface, + const char *iface_name); void hardif_disable_interface(struct hard_iface *hard_iface); void hardif_remove_interfaces(void); int hardif_min_mtu(struct net_device *soft_iface); diff --git a/net/batman-adv/hash.c b/net/batman-adv/hash.c index c5213d8..2a17250 100644 --- a/net/batman-adv/hash.c +++ b/net/batman-adv/hash.c @@ -46,15 +46,16 @@ struct hashtable_t *hash_new(int size) { struct hashtable_t *hash; - hash = kmalloc(sizeof(struct hashtable_t), GFP_ATOMIC); + hash = kmalloc(sizeof(*hash), GFP_ATOMIC); if (!hash) return NULL; - hash->table = kmalloc(sizeof(struct element_t *) * size, GFP_ATOMIC); + hash->table = kmalloc(sizeof(*hash->table) * size, GFP_ATOMIC); if (!hash->table) goto free_hash; - hash->list_locks = kmalloc(sizeof(spinlock_t) * size, GFP_ATOMIC); + hash->list_locks = kmalloc(sizeof(*hash->list_locks) * size, + GFP_ATOMIC); if (!hash->list_locks) goto free_table; diff --git a/net/batman-adv/hash.h b/net/batman-adv/hash.h index 434822b..dd5c9fd 100644 --- a/net/batman-adv/hash.h +++ b/net/batman-adv/hash.h @@ -28,12 +28,12 @@ * compare 2 element datas for their keys, * return 0 if same and not 0 if not * same */ -typedef int (*hashdata_compare_cb)(struct hlist_node *, void *); +typedef int (*hashdata_compare_cb)(const struct hlist_node *, const void *); /* the hashfunction, should return an index * based on the key in the data of the first * argument and the size the second */ -typedef int (*hashdata_choose_cb)(void *, int); +typedef int (*hashdata_choose_cb)(const void *, int); typedef void (*hashdata_free_cb)(struct hlist_node *, void *); struct hashtable_t { @@ -80,7 +80,7 @@ static inline void hash_delete(struct hashtable_t *hash, static inline int hash_add(struct hashtable_t *hash, hashdata_compare_cb compare, hashdata_choose_cb choose, - void *data, struct hlist_node *data_node) + const void *data, struct hlist_node *data_node) { int index; struct hlist_head *head; diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c index fa22ba2..ac3520e 100644 --- a/net/batman-adv/icmp_socket.c +++ b/net/batman-adv/icmp_socket.c @@ -46,7 +46,7 @@ static int bat_socket_open(struct inode *inode, struct file *file) nonseekable_open(inode, file); - socket_client = kmalloc(sizeof(struct socket_client), GFP_KERNEL); + socket_client = kmalloc(sizeof(*socket_client), GFP_KERNEL); if (!socket_client) return -ENOMEM; @@ -310,7 +310,7 @@ static void bat_socket_add_packet(struct socket_client *socket_client, { struct socket_packet *socket_packet; - socket_packet = kmalloc(sizeof(struct socket_packet), GFP_ATOMIC); + socket_packet = kmalloc(sizeof(*socket_packet), GFP_ATOMIC); if (!socket_packet) return; diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index 0a7cee0..e367e69 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -84,8 +84,10 @@ int mesh_init(struct net_device *soft_iface) spin_lock_init(&bat_priv->forw_bat_list_lock); spin_lock_init(&bat_priv->forw_bcast_list_lock); - spin_lock_init(&bat_priv->tt_lhash_lock); - spin_lock_init(&bat_priv->tt_ghash_lock); + spin_lock_init(&bat_priv->tt_changes_list_lock); + spin_lock_init(&bat_priv->tt_req_list_lock); + spin_lock_init(&bat_priv->tt_roam_list_lock); + spin_lock_init(&bat_priv->tt_buff_lock); spin_lock_init(&bat_priv->gw_list_lock); spin_lock_init(&bat_priv->vis_hash_lock); spin_lock_init(&bat_priv->vis_list_lock); @@ -96,14 +98,14 @@ int mesh_init(struct net_device *soft_iface) INIT_HLIST_HEAD(&bat_priv->forw_bcast_list); INIT_HLIST_HEAD(&bat_priv->gw_list); INIT_HLIST_HEAD(&bat_priv->softif_neigh_vids); + INIT_LIST_HEAD(&bat_priv->tt_changes_list); + INIT_LIST_HEAD(&bat_priv->tt_req_list); + INIT_LIST_HEAD(&bat_priv->tt_roam_list); if (originator_init(bat_priv) < 1) goto err; - if (tt_local_init(bat_priv) < 1) - goto err; - - if (tt_global_init(bat_priv) < 1) + if (tt_init(bat_priv) < 1) goto err; tt_local_add(soft_iface, soft_iface->dev_addr); @@ -111,6 +113,7 @@ int mesh_init(struct net_device *soft_iface) if (vis_init(bat_priv) < 1) goto err; + atomic_set(&bat_priv->gw_reselect, 0); atomic_set(&bat_priv->mesh_state, MESH_ACTIVE); goto end; @@ -137,8 +140,7 @@ void mesh_free(struct net_device *soft_iface) gw_node_purge(bat_priv); originator_free(bat_priv); - tt_local_free(bat_priv); - tt_global_free(bat_priv); + tt_free(bat_priv); softif_neigh_purge(bat_priv); @@ -155,9 +157,9 @@ void dec_module_count(void) module_put(THIS_MODULE); } -int is_my_mac(uint8_t *addr) +int is_my_mac(const uint8_t *addr) { - struct hard_iface *hard_iface; + const struct hard_iface *hard_iface; rcu_read_lock(); list_for_each_entry_rcu(hard_iface, &hardif_list, list) { diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index 148b49e..4f293b5 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -42,15 +42,25 @@ * -> TODO: check influence on TQ_LOCAL_WINDOW_SIZE */ #define PURGE_TIMEOUT 200 #define TT_LOCAL_TIMEOUT 3600 /* in seconds */ - +#define TT_CLIENT_ROAM_TIMEOUT 600 /* sliding packet range of received originator messages in squence numbers * (should be a multiple of our word size) */ #define TQ_LOCAL_WINDOW_SIZE 64 +#define TT_REQUEST_TIMEOUT 3 /* seconds we have to keep pending tt_req */ + #define TQ_GLOBAL_WINDOW_SIZE 5 #define TQ_LOCAL_BIDRECT_SEND_MINIMUM 1 #define TQ_LOCAL_BIDRECT_RECV_MINIMUM 1 #define TQ_TOTAL_BIDRECT_LIMIT 1 +#define TT_OGM_APPEND_MAX 3 /* number of OGMs sent with the last tt diff */ + +#define ROAMING_MAX_TIME 20 /* Time in which a client can roam at most + * ROAMING_MAX_COUNT times */ +#define ROAMING_MAX_COUNT 5 + +#define NO_FLAGS 0 + #define NUM_WORDS (TQ_LOCAL_WINDOW_SIZE / WORD_BIT_SIZE) #define LOG_BUF_LEN 8192 /* has to be a power of 2 */ @@ -72,13 +82,27 @@ #define RESET_PROTECTION_MS 30000 #define EXPECTED_SEQNO_RANGE 65536 -#define MESH_INACTIVE 0 -#define MESH_ACTIVE 1 -#define MESH_DEACTIVATING 2 +enum mesh_state { + MESH_INACTIVE, + MESH_ACTIVE, + MESH_DEACTIVATING +}; #define BCAST_QUEUE_LEN 256 #define BATMAN_QUEUE_LEN 256 +enum uev_action { + UEV_ADD = 0, + UEV_DEL, + UEV_CHANGE +}; + +enum uev_type { + UEV_GW = 0 +}; + +#define GW_THRESHOLD 50 + /* * Debug Messages */ @@ -89,10 +113,12 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt /* all messages related to routing / flooding / broadcasting / etc */ -#define DBG_BATMAN 1 -/* route or tt entry added / changed / deleted */ -#define DBG_ROUTES 2 -#define DBG_ALL 3 +enum dbg_level { + DBG_BATMAN = 1 << 0, + DBG_ROUTES = 1 << 1, /* route added / changed / deleted */ + DBG_TT = 1 << 2, /* translation table operations */ + DBG_ALL = 7 +}; /* @@ -133,10 +159,10 @@ int mesh_init(struct net_device *soft_iface); void mesh_free(struct net_device *soft_iface); void inc_module_count(void); void dec_module_count(void); -int is_my_mac(uint8_t *addr); +int is_my_mac(const uint8_t *addr); #ifdef CONFIG_BATMAN_ADV_DEBUG -int debug_log(struct bat_priv *bat_priv, char *fmt, ...); +int debug_log(struct bat_priv *bat_priv, const char *fmt, ...) __printf(2, 3); #define bat_dbg(type, bat_priv, fmt, arg...) \ do { \ @@ -145,9 +171,10 @@ int debug_log(struct bat_priv *bat_priv, char *fmt, ...); } \ while (0) #else /* !CONFIG_BATMAN_ADV_DEBUG */ -static inline void bat_dbg(char type __always_unused, +__printf(3, 4) +static inline void bat_dbg(int type __always_unused, struct bat_priv *bat_priv __always_unused, - char *fmt __always_unused, ...) + const char *fmt __always_unused, ...) { } #endif @@ -172,11 +199,32 @@ static inline void bat_dbg(char type __always_unused, * * note: can't use compare_ether_addr() as it requires aligned memory */ -static inline int compare_eth(void *data1, void *data2) + +static inline int compare_eth(const void *data1, const void *data2) { return (memcmp(data1, data2, ETH_ALEN) == 0 ? 1 : 0); } + #define atomic_dec_not_zero(v) atomic_add_unless((v), -1, 0) +/* Returns the smallest signed integer in two's complement with the sizeof x */ +#define smallest_signed_int(x) (1u << (7u + 8u * (sizeof(x) - 1u))) + +/* Checks if a sequence number x is a predecessor/successor of y. + * they handle overflows/underflows and can correctly check for a + * predecessor/successor unless the variable sequence number has grown by + * more then 2**(bitwidth(x)-1)-1. + * This means that for a uint8_t with the maximum value 255, it would think: + * - when adding nothing - it is neither a predecessor nor a successor + * - before adding more than 127 to the starting value - it is a predecessor, + * - when adding 128 - it is neither a predecessor nor a successor, + * - after adding more than 127 to the starting value - it is a successor */ +#define seq_before(x, y) ({typeof(x) _d1 = (x); \ + typeof(y) _d2 = (y); \ + typeof(x) _dummy = (_d1 - _d2); \ + (void) (&_d1 == &_d2); \ + _dummy > smallest_signed_int(_dummy); }) +#define seq_after(x, y) seq_before(y, x) + #endif /* _NET_BATMAN_ADV_MAIN_H_ */ diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 40a30bb..338b3c5 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -37,6 +37,14 @@ static void start_purge_timer(struct bat_priv *bat_priv) queue_delayed_work(bat_event_workqueue, &bat_priv->orig_work, 1 * HZ); } +/* returns 1 if they are the same originator */ +static int compare_orig(const struct hlist_node *node, const void *data2) +{ + const void *data1 = container_of(node, struct orig_node, hash_entry); + + return (memcmp(data1, data2, ETH_ALEN) == 0 ? 1 : 0); +} + int originator_init(struct bat_priv *bat_priv) { if (bat_priv->orig_hash) @@ -77,7 +85,7 @@ struct neigh_node *orig_node_get_router(struct orig_node *orig_node) struct neigh_node *create_neighbor(struct orig_node *orig_node, struct orig_node *orig_neigh_node, - uint8_t *neigh, + const uint8_t *neigh, struct hard_iface *if_incoming) { struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface); @@ -86,7 +94,7 @@ struct neigh_node *create_neighbor(struct orig_node *orig_node, bat_dbg(DBG_BATMAN, bat_priv, "Creating new last-hop neighbor of originator\n"); - neigh_node = kzalloc(sizeof(struct neigh_node), GFP_ATOMIC); + neigh_node = kzalloc(sizeof(*neigh_node), GFP_ATOMIC); if (!neigh_node) return NULL; @@ -137,6 +145,7 @@ static void orig_node_free_rcu(struct rcu_head *rcu) tt_global_del_orig(orig_node->bat_priv, orig_node, "originator timed out"); + kfree(orig_node->tt_buff); kfree(orig_node->bcast_own); kfree(orig_node->bcast_own_sum); kfree(orig_node); @@ -183,7 +192,7 @@ void originator_free(struct bat_priv *bat_priv) /* this function finds or creates an originator entry for the given * address if it does not exits */ -struct orig_node *get_orig_node(struct bat_priv *bat_priv, uint8_t *addr) +struct orig_node *get_orig_node(struct bat_priv *bat_priv, const uint8_t *addr) { struct orig_node *orig_node; int size; @@ -196,7 +205,7 @@ struct orig_node *get_orig_node(struct bat_priv *bat_priv, uint8_t *addr) bat_dbg(DBG_BATMAN, bat_priv, "Creating new originator: %pM\n", addr); - orig_node = kzalloc(sizeof(struct orig_node), GFP_ATOMIC); + orig_node = kzalloc(sizeof(*orig_node), GFP_ATOMIC); if (!orig_node) return NULL; @@ -205,14 +214,18 @@ struct orig_node *get_orig_node(struct bat_priv *bat_priv, uint8_t *addr) spin_lock_init(&orig_node->ogm_cnt_lock); spin_lock_init(&orig_node->bcast_seqno_lock); spin_lock_init(&orig_node->neigh_list_lock); + spin_lock_init(&orig_node->tt_buff_lock); /* extra reference for return */ atomic_set(&orig_node->refcount, 2); + orig_node->tt_poss_change = false; orig_node->bat_priv = bat_priv; memcpy(orig_node->orig, addr, ETH_ALEN); orig_node->router = NULL; orig_node->tt_buff = NULL; + orig_node->tt_buff_len = 0; + atomic_set(&orig_node->tt_size, 0); orig_node->bcast_seqno_reset = jiffies - 1 - msecs_to_jiffies(RESET_PROTECTION_MS); orig_node->batman_seqno_reset = jiffies - 1 @@ -322,9 +335,7 @@ static bool purge_orig_node(struct bat_priv *bat_priv, if (purge_orig_neighbors(bat_priv, orig_node, &best_neigh_node)) { update_routes(bat_priv, orig_node, - best_neigh_node, - orig_node->tt_buff, - orig_node->tt_buff_len); + best_neigh_node); } } @@ -559,7 +570,7 @@ static int orig_node_del_if(struct orig_node *orig_node, memcpy(data_ptr, orig_node->bcast_own, del_if_num * chunk_size); /* copy second part */ - memcpy(data_ptr + del_if_num * chunk_size, + memcpy((char *)data_ptr + del_if_num * chunk_size, orig_node->bcast_own + ((del_if_num + 1) * chunk_size), (max_if_num - del_if_num) * chunk_size); @@ -579,7 +590,7 @@ free_bcast_own: memcpy(data_ptr, orig_node->bcast_own_sum, del_if_num * sizeof(uint8_t)); - memcpy(data_ptr + del_if_num * sizeof(uint8_t), + memcpy((char *)data_ptr + del_if_num * sizeof(uint8_t), orig_node->bcast_own_sum + ((del_if_num + 1) * sizeof(uint8_t)), (max_if_num - del_if_num) * sizeof(uint8_t)); diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h index e1d641f..cfc1f60 100644 --- a/net/batman-adv/originator.h +++ b/net/batman-adv/originator.h @@ -28,10 +28,10 @@ int originator_init(struct bat_priv *bat_priv); void originator_free(struct bat_priv *bat_priv); void purge_orig_ref(struct bat_priv *bat_priv); void orig_node_free_ref(struct orig_node *orig_node); -struct orig_node *get_orig_node(struct bat_priv *bat_priv, uint8_t *addr); +struct orig_node *get_orig_node(struct bat_priv *bat_priv, const uint8_t *addr); struct neigh_node *create_neighbor(struct orig_node *orig_node, struct orig_node *orig_neigh_node, - uint8_t *neigh, + const uint8_t *neigh, struct hard_iface *if_incoming); void neigh_node_free_ref(struct neigh_node *neigh_node); struct neigh_node *orig_node_get_router(struct orig_node *orig_node); @@ -40,19 +40,11 @@ int orig_hash_add_if(struct hard_iface *hard_iface, int max_if_num); int orig_hash_del_if(struct hard_iface *hard_iface, int max_if_num); -/* returns 1 if they are the same originator */ -static inline int compare_orig(struct hlist_node *node, void *data2) -{ - void *data1 = container_of(node, struct orig_node, hash_entry); - - return (memcmp(data1, data2, ETH_ALEN) == 0 ? 1 : 0); -} - /* hashfunction to choose an entry in a hash table of given size */ /* hash algorithm from http://en.wikipedia.org/wiki/Hash_table */ -static inline int choose_orig(void *data, int32_t size) +static inline int choose_orig(const void *data, int32_t size) { - unsigned char *key = data; + const unsigned char *key = data; uint32_t hash = 0; size_t i; @@ -70,7 +62,7 @@ static inline int choose_orig(void *data, int32_t size) } static inline struct orig_node *orig_hash_find(struct bat_priv *bat_priv, - void *data) + const void *data) { struct hashtable_t *hash = bat_priv->orig_hash; struct hlist_head *head; diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h index eda9965..c5f081d 100644 --- a/net/batman-adv/packet.h +++ b/net/batman-adv/packet.h @@ -24,46 +24,79 @@ #define ETH_P_BATMAN 0x4305 /* unofficial/not registered Ethertype */ -#define BAT_PACKET 0x01 -#define BAT_ICMP 0x02 -#define BAT_UNICAST 0x03 -#define BAT_BCAST 0x04 -#define BAT_VIS 0x05 -#define BAT_UNICAST_FRAG 0x06 +enum bat_packettype { + BAT_PACKET = 0x01, + BAT_ICMP = 0x02, + BAT_UNICAST = 0x03, + BAT_BCAST = 0x04, + BAT_VIS = 0x05, + BAT_UNICAST_FRAG = 0x06, + BAT_TT_QUERY = 0x07, + BAT_ROAM_ADV = 0x08 +}; /* this file is included by batctl which needs these defines */ -#define COMPAT_VERSION 12 -#define DIRECTLINK 0x40 -#define VIS_SERVER 0x20 -#define PRIMARIES_FIRST_HOP 0x10 +#define COMPAT_VERSION 14 + +enum batman_flags { + PRIMARIES_FIRST_HOP = 1 << 4, + VIS_SERVER = 1 << 5, + DIRECTLINK = 1 << 6 +}; /* ICMP message types */ -#define ECHO_REPLY 0 -#define DESTINATION_UNREACHABLE 3 -#define ECHO_REQUEST 8 -#define TTL_EXCEEDED 11 -#define PARAMETER_PROBLEM 12 +enum icmp_packettype { + ECHO_REPLY = 0, + DESTINATION_UNREACHABLE = 3, + ECHO_REQUEST = 8, + TTL_EXCEEDED = 11, + PARAMETER_PROBLEM = 12 +}; /* vis defines */ -#define VIS_TYPE_SERVER_SYNC 0 -#define VIS_TYPE_CLIENT_UPDATE 1 +enum vis_packettype { + VIS_TYPE_SERVER_SYNC = 0, + VIS_TYPE_CLIENT_UPDATE = 1 +}; /* fragmentation defines */ -#define UNI_FRAG_HEAD 0x01 -#define UNI_FRAG_LARGETAIL 0x02 +enum unicast_frag_flags { + UNI_FRAG_HEAD = 1 << 0, + UNI_FRAG_LARGETAIL = 1 << 1 +}; + +/* TT_QUERY subtypes */ +#define TT_QUERY_TYPE_MASK 0x3 + +enum tt_query_packettype { + TT_REQUEST = 0, + TT_RESPONSE = 1 +}; + +/* TT_QUERY flags */ +enum tt_query_flags { + TT_FULL_TABLE = 1 << 2 +}; + +/* TT_CHANGE flags */ +enum tt_change_flags { + TT_CHANGE_DEL = 0x01, + TT_CLIENT_ROAM = 0x02 +}; struct batman_packet { uint8_t packet_type; uint8_t version; /* batman version field */ + uint8_t ttl; uint8_t flags; /* 0x40: DIRECTLINK flag, 0x20 VIS_SERVER flag... */ - uint8_t tq; uint32_t seqno; uint8_t orig[6]; uint8_t prev_sender[6]; - uint8_t ttl; - uint8_t num_tt; uint8_t gw_flags; /* flags related to gateway class */ - uint8_t align; + uint8_t tq; + uint8_t tt_num_changes; + uint8_t ttvn; /* translation table version number */ + uint16_t tt_crc; } __packed; #define BAT_PACKET_LEN sizeof(struct batman_packet) @@ -71,12 +104,13 @@ struct batman_packet { struct icmp_packet { uint8_t packet_type; uint8_t version; /* batman version field */ - uint8_t msg_type; /* see ICMP message types above */ uint8_t ttl; + uint8_t msg_type; /* see ICMP message types above */ uint8_t dst[6]; uint8_t orig[6]; uint16_t seqno; uint8_t uid; + uint8_t reserved; } __packed; #define BAT_RR_LEN 16 @@ -86,8 +120,8 @@ struct icmp_packet { struct icmp_packet_rr { uint8_t packet_type; uint8_t version; /* batman version field */ - uint8_t msg_type; /* see ICMP message types above */ uint8_t ttl; + uint8_t msg_type; /* see ICMP message types above */ uint8_t dst[6]; uint8_t orig[6]; uint16_t seqno; @@ -99,16 +133,19 @@ struct icmp_packet_rr { struct unicast_packet { uint8_t packet_type; uint8_t version; /* batman version field */ - uint8_t dest[6]; uint8_t ttl; + uint8_t ttvn; /* destination translation table version number */ + uint8_t dest[6]; } __packed; struct unicast_frag_packet { uint8_t packet_type; uint8_t version; /* batman version field */ - uint8_t dest[6]; uint8_t ttl; + uint8_t ttvn; /* destination translation table version number */ + uint8_t dest[6]; uint8_t flags; + uint8_t align; uint8_t orig[6]; uint16_t seqno; } __packed; @@ -116,21 +153,61 @@ struct unicast_frag_packet { struct bcast_packet { uint8_t packet_type; uint8_t version; /* batman version field */ - uint8_t orig[6]; uint8_t ttl; + uint8_t reserved; uint32_t seqno; + uint8_t orig[6]; } __packed; struct vis_packet { uint8_t packet_type; uint8_t version; /* batman version field */ + uint8_t ttl; /* TTL */ uint8_t vis_type; /* which type of vis-participant sent this? */ - uint8_t entries; /* number of entries behind this struct */ uint32_t seqno; /* sequence number */ - uint8_t ttl; /* TTL */ + uint8_t entries; /* number of entries behind this struct */ + uint8_t reserved; uint8_t vis_orig[6]; /* originator that announces its neighbors */ uint8_t target_orig[6]; /* who should receive this packet */ uint8_t sender_orig[6]; /* who sent or rebroadcasted this packet */ } __packed; +struct tt_query_packet { + uint8_t packet_type; + uint8_t version; /* batman version field */ + uint8_t ttl; + /* the flag field is a combination of: + * - TT_REQUEST or TT_RESPONSE + * - TT_FULL_TABLE */ + uint8_t flags; + uint8_t dst[ETH_ALEN]; + uint8_t src[ETH_ALEN]; + /* the ttvn field is: + * if TT_REQUEST: ttvn that triggered the + * request + * if TT_RESPONSE: new ttvn for the src + * orig_node */ + uint8_t ttvn; + /* tt_data field is: + * if TT_REQUEST: crc associated with the + * ttvn + * if TT_RESPONSE: table_size */ + uint16_t tt_data; +} __packed; + +struct roam_adv_packet { + uint8_t packet_type; + uint8_t version; + uint8_t ttl; + uint8_t reserved; + uint8_t dst[ETH_ALEN]; + uint8_t src[ETH_ALEN]; + uint8_t client[ETH_ALEN]; +} __packed; + +struct tt_change { + uint8_t flags; + uint8_t addr[ETH_ALEN]; +} __packed; + #endif /* _NET_BATMAN_ADV_PACKET_H_ */ diff --git a/net/batman-adv/ring_buffer.c b/net/batman-adv/ring_buffer.c index 5bb6a61..f1ccfa7 100644 --- a/net/batman-adv/ring_buffer.c +++ b/net/batman-adv/ring_buffer.c @@ -28,9 +28,9 @@ void ring_buffer_set(uint8_t lq_recv[], uint8_t *lq_index, uint8_t value) *lq_index = (*lq_index + 1) % TQ_GLOBAL_WINDOW_SIZE; } -uint8_t ring_buffer_avg(uint8_t lq_recv[]) +uint8_t ring_buffer_avg(const uint8_t lq_recv[]) { - uint8_t *ptr; + const uint8_t *ptr; uint16_t count = 0, i = 0, sum = 0; ptr = lq_recv; diff --git a/net/batman-adv/ring_buffer.h b/net/batman-adv/ring_buffer.h index 0395b27..7cdfe62 100644 --- a/net/batman-adv/ring_buffer.h +++ b/net/batman-adv/ring_buffer.h @@ -23,6 +23,6 @@ #define _NET_BATMAN_ADV_RING_BUFFER_H_ void ring_buffer_set(uint8_t lq_recv[], uint8_t *lq_index, uint8_t value); -uint8_t ring_buffer_avg(uint8_t lq_recv[]); +uint8_t ring_buffer_avg(const uint8_t lq_recv[]); #endif /* _NET_BATMAN_ADV_RING_BUFFER_H_ */ diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index bb1c3ec..0ce090c 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -64,28 +64,57 @@ void slide_own_bcast_window(struct hard_iface *hard_iface) } } -static void update_TT(struct bat_priv *bat_priv, struct orig_node *orig_node, - unsigned char *tt_buff, int tt_buff_len) +static void update_transtable(struct bat_priv *bat_priv, + struct orig_node *orig_node, + const unsigned char *tt_buff, + uint8_t tt_num_changes, uint8_t ttvn, + uint16_t tt_crc) { - if ((tt_buff_len != orig_node->tt_buff_len) || - ((tt_buff_len > 0) && - (orig_node->tt_buff_len > 0) && - (memcmp(orig_node->tt_buff, tt_buff, tt_buff_len) != 0))) { - - if (orig_node->tt_buff_len > 0) - tt_global_del_orig(bat_priv, orig_node, - "originator changed tt"); - - if ((tt_buff_len > 0) && (tt_buff)) - tt_global_add_orig(bat_priv, orig_node, - tt_buff, tt_buff_len); + uint8_t orig_ttvn = (uint8_t)atomic_read(&orig_node->last_ttvn); + bool full_table = true; + + /* the ttvn increased by one -> we can apply the attached changes */ + if (ttvn - orig_ttvn == 1) { + /* the OGM could not contain the changes because they were too + * many to fit in one frame or because they have already been + * sent TT_OGM_APPEND_MAX times. In this case send a tt + * request */ + if (!tt_num_changes) { + full_table = false; + goto request_table; + } + + tt_update_changes(bat_priv, orig_node, tt_num_changes, ttvn, + (struct tt_change *)tt_buff); + + /* Even if we received the crc into the OGM, we prefer + * to recompute it to spot any possible inconsistency + * in the global table */ + orig_node->tt_crc = tt_global_crc(bat_priv, orig_node); + /* Roaming phase is over: tables are in sync again. I can + * unset the flag */ + orig_node->tt_poss_change = false; + } else { + /* if we missed more than one change or our tables are not + * in sync anymore -> request fresh tt data */ + if (ttvn != orig_ttvn || orig_node->tt_crc != tt_crc) { +request_table: + bat_dbg(DBG_TT, bat_priv, "TT inconsistency for %pM. " + "Need to retrieve the correct information " + "(ttvn: %u last_ttvn: %u crc: %u last_crc: " + "%u num_changes: %u)\n", orig_node->orig, ttvn, + orig_ttvn, tt_crc, orig_node->tt_crc, + tt_num_changes); + send_tt_request(bat_priv, orig_node, ttvn, tt_crc, + full_table); + return; + } } } static void update_route(struct bat_priv *bat_priv, struct orig_node *orig_node, - struct neigh_node *neigh_node, - unsigned char *tt_buff, int tt_buff_len) + struct neigh_node *neigh_node) { struct neigh_node *curr_router; @@ -93,11 +122,10 @@ static void update_route(struct bat_priv *bat_priv, /* route deleted */ if ((curr_router) && (!neigh_node)) { - bat_dbg(DBG_ROUTES, bat_priv, "Deleting route towards: %pM\n", orig_node->orig); tt_global_del_orig(bat_priv, orig_node, - "originator timed out"); + "Deleted route towards originator"); /* route added */ } else if ((!curr_router) && (neigh_node)) { @@ -105,11 +133,8 @@ static void update_route(struct bat_priv *bat_priv, bat_dbg(DBG_ROUTES, bat_priv, "Adding route towards: %pM (via %pM)\n", orig_node->orig, neigh_node->addr); - tt_global_add_orig(bat_priv, orig_node, - tt_buff, tt_buff_len); - /* route changed */ - } else { + } else if (neigh_node && curr_router) { bat_dbg(DBG_ROUTES, bat_priv, "Changing route towards: %pM " "(now via %pM - was via %pM)\n", @@ -133,10 +158,8 @@ static void update_route(struct bat_priv *bat_priv, neigh_node_free_ref(curr_router); } - void update_routes(struct bat_priv *bat_priv, struct orig_node *orig_node, - struct neigh_node *neigh_node, unsigned char *tt_buff, - int tt_buff_len) + struct neigh_node *neigh_node) { struct neigh_node *router = NULL; @@ -146,11 +169,7 @@ void update_routes(struct bat_priv *bat_priv, struct orig_node *orig_node, router = orig_node_get_router(orig_node); if (router != neigh_node) - update_route(bat_priv, orig_node, neigh_node, - tt_buff, tt_buff_len); - /* may be just TT changed */ - else - update_TT(bat_priv, orig_node, tt_buff, tt_buff_len); + update_route(bat_priv, orig_node, neigh_node); out: if (router) @@ -165,7 +184,7 @@ static int is_bidirectional_neigh(struct orig_node *orig_node, struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface); struct neigh_node *neigh_node = NULL, *tmp_neigh_node; struct hlist_node *node; - unsigned char total_count; + uint8_t total_count; uint8_t orig_eq_count, neigh_rq_count, tq_own; int tq_asym_penalty, ret = 0; @@ -348,9 +367,9 @@ out: } /* copy primary address for bonding */ -static void bonding_save_primary(struct orig_node *orig_node, +static void bonding_save_primary(const struct orig_node *orig_node, struct orig_node *orig_neigh_node, - struct batman_packet *batman_packet) + const struct batman_packet *batman_packet) { if (!(batman_packet->flags & PRIMARIES_FIRST_HOP)) return; @@ -358,19 +377,16 @@ static void bonding_save_primary(struct orig_node *orig_node, memcpy(orig_neigh_node->primary_addr, orig_node->orig, ETH_ALEN); } -static void update_orig(struct bat_priv *bat_priv, - struct orig_node *orig_node, - struct ethhdr *ethhdr, - struct batman_packet *batman_packet, +static void update_orig(struct bat_priv *bat_priv, struct orig_node *orig_node, + const struct ethhdr *ethhdr, + const struct batman_packet *batman_packet, struct hard_iface *if_incoming, - unsigned char *tt_buff, int tt_buff_len, - char is_duplicate) + const unsigned char *tt_buff, int is_duplicate) { struct neigh_node *neigh_node = NULL, *tmp_neigh_node = NULL; struct neigh_node *router = NULL; struct orig_node *orig_node_tmp; struct hlist_node *node; - int tmp_tt_buff_len; uint8_t bcast_own_sum_orig, bcast_own_sum_neigh; bat_dbg(DBG_BATMAN, bat_priv, "update_originator(): " @@ -435,9 +451,6 @@ static void update_orig(struct bat_priv *bat_priv, bonding_candidate_add(orig_node, neigh_node); - tmp_tt_buff_len = (tt_buff_len > batman_packet->num_tt * ETH_ALEN ? - batman_packet->num_tt * ETH_ALEN : tt_buff_len); - /* if this neighbor already is our next hop there is nothing * to change */ router = orig_node_get_router(orig_node); @@ -467,15 +480,19 @@ static void update_orig(struct bat_priv *bat_priv, goto update_tt; } - update_routes(bat_priv, orig_node, neigh_node, - tt_buff, tmp_tt_buff_len); - goto update_gw; + update_routes(bat_priv, orig_node, neigh_node); update_tt: - update_routes(bat_priv, orig_node, router, - tt_buff, tmp_tt_buff_len); + /* I have to check for transtable changes only if the OGM has been + * sent through a primary interface */ + if (((batman_packet->orig != ethhdr->h_source) && + (batman_packet->ttl > 2)) || + (batman_packet->flags & PRIMARIES_FIRST_HOP)) + update_transtable(bat_priv, orig_node, tt_buff, + batman_packet->tt_num_changes, + batman_packet->ttvn, + batman_packet->tt_crc); -update_gw: if (orig_node->gw_flags != batman_packet->gw_flags) gw_node_update(bat_priv, orig_node, batman_packet->gw_flags); @@ -531,15 +548,15 @@ static int window_protected(struct bat_priv *bat_priv, * -1 the packet is old and has been received while the seqno window * was protected. Caller should drop it. */ -static char count_real_packets(struct ethhdr *ethhdr, - struct batman_packet *batman_packet, - struct hard_iface *if_incoming) +static int count_real_packets(const struct ethhdr *ethhdr, + const struct batman_packet *batman_packet, + const struct hard_iface *if_incoming) { struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface); struct orig_node *orig_node; struct neigh_node *tmp_neigh_node; struct hlist_node *node; - char is_duplicate = 0; + int is_duplicate = 0; int32_t seq_diff; int need_update = 0; int set_mark, ret = -1; @@ -595,9 +612,9 @@ out: return ret; } -void receive_bat_packet(struct ethhdr *ethhdr, +void receive_bat_packet(const struct ethhdr *ethhdr, struct batman_packet *batman_packet, - unsigned char *tt_buff, int tt_buff_len, + const unsigned char *tt_buff, struct hard_iface *if_incoming) { struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface); @@ -605,10 +622,10 @@ void receive_bat_packet(struct ethhdr *ethhdr, struct orig_node *orig_neigh_node, *orig_node; struct neigh_node *router = NULL, *router_router = NULL; struct neigh_node *orig_neigh_router = NULL; - char has_directlink_flag; - char is_my_addr = 0, is_my_orig = 0, is_my_oldorig = 0; - char is_broadcast = 0, is_bidirectional, is_single_hop_neigh; - char is_duplicate; + int has_directlink_flag; + int is_my_addr = 0, is_my_orig = 0, is_my_oldorig = 0; + int is_broadcast = 0, is_bidirectional, is_single_hop_neigh; + int is_duplicate; uint32_t if_incoming_seqno; /* Silently drop when the batman packet is actually not a @@ -636,12 +653,14 @@ void receive_bat_packet(struct ethhdr *ethhdr, bat_dbg(DBG_BATMAN, bat_priv, "Received BATMAN packet via NB: %pM, IF: %s [%pM] " - "(from OG: %pM, via prev OG: %pM, seqno %d, tq %d, " - "TTL %d, V %d, IDF %d)\n", + "(from OG: %pM, via prev OG: %pM, seqno %d, ttvn %u, " + "crc %u, changes %u, td %d, TTL %d, V %d, IDF %d)\n", ethhdr->h_source, if_incoming->net_dev->name, if_incoming->net_dev->dev_addr, batman_packet->orig, batman_packet->prev_sender, batman_packet->seqno, - batman_packet->tq, batman_packet->ttl, batman_packet->version, + batman_packet->ttvn, batman_packet->tt_crc, + batman_packet->tt_num_changes, batman_packet->tq, + batman_packet->ttl, batman_packet->version, has_directlink_flag); rcu_read_lock(); @@ -664,7 +683,7 @@ void receive_bat_packet(struct ethhdr *ethhdr, hard_iface->net_dev->dev_addr)) is_my_oldorig = 1; - if (compare_eth(ethhdr->h_source, broadcast_addr)) + if (is_broadcast_ether_addr(ethhdr->h_source)) is_broadcast = 1; } rcu_read_unlock(); @@ -701,17 +720,16 @@ void receive_bat_packet(struct ethhdr *ethhdr, /* neighbor has to indicate direct link and it has to * come via the corresponding interface */ - /* if received seqno equals last send seqno save new - * seqno for bidirectional check */ + /* save packet seqno for bidirectional check */ if (has_directlink_flag && compare_eth(if_incoming->net_dev->dev_addr, - batman_packet->orig) && - (batman_packet->seqno - if_incoming_seqno + 2 == 0)) { + batman_packet->orig)) { offset = if_incoming->if_num * NUM_WORDS; spin_lock_bh(&orig_neigh_node->ogm_cnt_lock); word = &(orig_neigh_node->bcast_own[offset]); - bit_mark(word, 0); + bit_mark(word, + if_incoming_seqno - batman_packet->seqno - 2); orig_neigh_node->bcast_own_sum[if_incoming->if_num] = bit_packet_count(word); spin_unlock_bh(&orig_neigh_node->ogm_cnt_lock); @@ -794,14 +812,14 @@ void receive_bat_packet(struct ethhdr *ethhdr, ((orig_node->last_real_seqno == batman_packet->seqno) && (orig_node->last_ttl - 3 <= batman_packet->ttl)))) update_orig(bat_priv, orig_node, ethhdr, batman_packet, - if_incoming, tt_buff, tt_buff_len, is_duplicate); + if_incoming, tt_buff, is_duplicate); /* is single hop (direct) neighbor */ if (is_single_hop_neigh) { /* mark direct link on incoming interface */ schedule_forward_packet(orig_node, ethhdr, batman_packet, - 1, tt_buff_len, if_incoming); + 1, if_incoming); bat_dbg(DBG_BATMAN, bat_priv, "Forwarding packet: " "rebroadcast neighbor packet with direct link flag\n"); @@ -824,7 +842,7 @@ void receive_bat_packet(struct ethhdr *ethhdr, bat_dbg(DBG_BATMAN, bat_priv, "Forwarding packet: rebroadcast originator packet\n"); schedule_forward_packet(orig_node, ethhdr, batman_packet, - 0, tt_buff_len, if_incoming); + 0, if_incoming); out_neigh: if ((orig_neigh_node) && (!is_single_hop_neigh)) @@ -1077,7 +1095,7 @@ out: * This method rotates the bonding list and increases the * returned router's refcount. */ static struct neigh_node *find_bond_router(struct orig_node *primary_orig, - struct hard_iface *recv_if) + const struct hard_iface *recv_if) { struct neigh_node *tmp_neigh_node; struct neigh_node *router = NULL, *first_candidate = NULL; @@ -1128,7 +1146,7 @@ out: * * Increases the returned router's refcount */ static struct neigh_node *find_ifalter_router(struct orig_node *primary_orig, - struct hard_iface *recv_if) + const struct hard_iface *recv_if) { struct neigh_node *tmp_neigh_node; struct neigh_node *router = NULL, *first_candidate = NULL; @@ -1171,12 +1189,124 @@ static struct neigh_node *find_ifalter_router(struct orig_node *primary_orig, return router; } +int recv_tt_query(struct sk_buff *skb, struct hard_iface *recv_if) +{ + struct bat_priv *bat_priv = netdev_priv(recv_if->soft_iface); + struct tt_query_packet *tt_query; + struct ethhdr *ethhdr; + + /* drop packet if it has not necessary minimum size */ + if (unlikely(!pskb_may_pull(skb, sizeof(struct tt_query_packet)))) + goto out; + + /* I could need to modify it */ + if (skb_cow(skb, sizeof(struct tt_query_packet)) < 0) + goto out; + + ethhdr = (struct ethhdr *)skb_mac_header(skb); + + /* packet with unicast indication but broadcast recipient */ + if (is_broadcast_ether_addr(ethhdr->h_dest)) + goto out; + + /* packet with broadcast sender address */ + if (is_broadcast_ether_addr(ethhdr->h_source)) + goto out; + + tt_query = (struct tt_query_packet *)skb->data; + + tt_query->tt_data = ntohs(tt_query->tt_data); + + switch (tt_query->flags & TT_QUERY_TYPE_MASK) { + case TT_REQUEST: + /* If we cannot provide an answer the tt_request is + * forwarded */ + if (!send_tt_response(bat_priv, tt_query)) { + bat_dbg(DBG_TT, bat_priv, + "Routing TT_REQUEST to %pM [%c]\n", + tt_query->dst, + (tt_query->flags & TT_FULL_TABLE ? 'F' : '.')); + tt_query->tt_data = htons(tt_query->tt_data); + return route_unicast_packet(skb, recv_if); + } + break; + case TT_RESPONSE: + /* packet needs to be linearised to access the TT changes */ + if (skb_linearize(skb) < 0) + goto out; + + if (is_my_mac(tt_query->dst)) + handle_tt_response(bat_priv, tt_query); + else { + bat_dbg(DBG_TT, bat_priv, + "Routing TT_RESPONSE to %pM [%c]\n", + tt_query->dst, + (tt_query->flags & TT_FULL_TABLE ? 'F' : '.')); + tt_query->tt_data = htons(tt_query->tt_data); + return route_unicast_packet(skb, recv_if); + } + break; + } + +out: + /* returning NET_RX_DROP will make the caller function kfree the skb */ + return NET_RX_DROP; +} + +int recv_roam_adv(struct sk_buff *skb, struct hard_iface *recv_if) +{ + struct bat_priv *bat_priv = netdev_priv(recv_if->soft_iface); + struct roam_adv_packet *roam_adv_packet; + struct orig_node *orig_node; + struct ethhdr *ethhdr; + + /* drop packet if it has not necessary minimum size */ + if (unlikely(!pskb_may_pull(skb, sizeof(struct roam_adv_packet)))) + goto out; + + ethhdr = (struct ethhdr *)skb_mac_header(skb); + + /* packet with unicast indication but broadcast recipient */ + if (is_broadcast_ether_addr(ethhdr->h_dest)) + goto out; + + /* packet with broadcast sender address */ + if (is_broadcast_ether_addr(ethhdr->h_source)) + goto out; + + roam_adv_packet = (struct roam_adv_packet *)skb->data; + + if (!is_my_mac(roam_adv_packet->dst)) + return route_unicast_packet(skb, recv_if); + + orig_node = orig_hash_find(bat_priv, roam_adv_packet->src); + if (!orig_node) + goto out; + + bat_dbg(DBG_TT, bat_priv, "Received ROAMING_ADV from %pM " + "(client %pM)\n", roam_adv_packet->src, + roam_adv_packet->client); + + tt_global_add(bat_priv, orig_node, roam_adv_packet->client, + atomic_read(&orig_node->last_ttvn) + 1, true); + + /* Roaming phase starts: I have new information but the ttvn has not + * been incremented yet. This flag will make me check all the incoming + * packets for the correct destination. */ + bat_priv->tt_poss_change = true; + + orig_node_free_ref(orig_node); +out: + /* returning NET_RX_DROP will make the caller function kfree the skb */ + return NET_RX_DROP; +} + /* find a suitable router for this originator, and use * bonding if possible. increases the found neighbors * refcount.*/ struct neigh_node *find_router(struct bat_priv *bat_priv, struct orig_node *orig_node, - struct hard_iface *recv_if) + const struct hard_iface *recv_if) { struct orig_node *primary_orig_node; struct orig_node *router_orig; @@ -1240,6 +1370,9 @@ struct neigh_node *find_router(struct bat_priv *bat_priv, router = find_ifalter_router(primary_orig_node, recv_if); return_router: + if (router && router->if_incoming->if_status != IF_ACTIVE) + goto err_unlock; + rcu_read_unlock(); return router; err_unlock: @@ -1354,14 +1487,84 @@ out: return ret; } +static int check_unicast_ttvn(struct bat_priv *bat_priv, + struct sk_buff *skb) { + uint8_t curr_ttvn; + struct orig_node *orig_node; + struct ethhdr *ethhdr; + struct hard_iface *primary_if; + struct unicast_packet *unicast_packet; + bool tt_poss_change; + + /* I could need to modify it */ + if (skb_cow(skb, sizeof(struct unicast_packet)) < 0) + return 0; + + unicast_packet = (struct unicast_packet *)skb->data; + + if (is_my_mac(unicast_packet->dest)) { + tt_poss_change = bat_priv->tt_poss_change; + curr_ttvn = (uint8_t)atomic_read(&bat_priv->ttvn); + } else { + orig_node = orig_hash_find(bat_priv, unicast_packet->dest); + + if (!orig_node) + return 0; + + curr_ttvn = (uint8_t)atomic_read(&orig_node->last_ttvn); + tt_poss_change = orig_node->tt_poss_change; + orig_node_free_ref(orig_node); + } + + /* Check whether I have to reroute the packet */ + if (seq_before(unicast_packet->ttvn, curr_ttvn) || tt_poss_change) { + /* Linearize the skb before accessing it */ + if (skb_linearize(skb) < 0) + return 0; + + ethhdr = (struct ethhdr *)(skb->data + + sizeof(struct unicast_packet)); + orig_node = transtable_search(bat_priv, ethhdr->h_dest); + + if (!orig_node) { + if (!is_my_client(bat_priv, ethhdr->h_dest)) + return 0; + primary_if = primary_if_get_selected(bat_priv); + if (!primary_if) + return 0; + memcpy(unicast_packet->dest, + primary_if->net_dev->dev_addr, ETH_ALEN); + hardif_free_ref(primary_if); + } else { + memcpy(unicast_packet->dest, orig_node->orig, + ETH_ALEN); + curr_ttvn = (uint8_t) + atomic_read(&orig_node->last_ttvn); + orig_node_free_ref(orig_node); + } + + bat_dbg(DBG_ROUTES, bat_priv, "TTVN mismatch (old_ttvn %u " + "new_ttvn %u)! Rerouting unicast packet (for %pM) to " + "%pM\n", unicast_packet->ttvn, curr_ttvn, + ethhdr->h_dest, unicast_packet->dest); + + unicast_packet->ttvn = curr_ttvn; + } + return 1; +} + int recv_unicast_packet(struct sk_buff *skb, struct hard_iface *recv_if) { + struct bat_priv *bat_priv = netdev_priv(recv_if->soft_iface); struct unicast_packet *unicast_packet; - int hdr_size = sizeof(struct unicast_packet); + int hdr_size = sizeof(*unicast_packet); if (check_unicast_packet(skb, hdr_size) < 0) return NET_RX_DROP; + if (!check_unicast_ttvn(bat_priv, skb)) + return NET_RX_DROP; + unicast_packet = (struct unicast_packet *)skb->data; /* packet for me */ @@ -1377,13 +1580,16 @@ int recv_ucast_frag_packet(struct sk_buff *skb, struct hard_iface *recv_if) { struct bat_priv *bat_priv = netdev_priv(recv_if->soft_iface); struct unicast_frag_packet *unicast_packet; - int hdr_size = sizeof(struct unicast_frag_packet); + int hdr_size = sizeof(*unicast_packet); struct sk_buff *new_skb = NULL; int ret; if (check_unicast_packet(skb, hdr_size) < 0) return NET_RX_DROP; + if (!check_unicast_ttvn(bat_priv, skb)) + return NET_RX_DROP; + unicast_packet = (struct unicast_frag_packet *)skb->data; /* packet for me */ @@ -1413,7 +1619,7 @@ int recv_bcast_packet(struct sk_buff *skb, struct hard_iface *recv_if) struct orig_node *orig_node = NULL; struct bcast_packet *bcast_packet; struct ethhdr *ethhdr; - int hdr_size = sizeof(struct bcast_packet); + int hdr_size = sizeof(*bcast_packet); int ret = NET_RX_DROP; int32_t seq_diff; @@ -1491,7 +1697,7 @@ int recv_vis_packet(struct sk_buff *skb, struct hard_iface *recv_if) struct vis_packet *vis_packet; struct ethhdr *ethhdr; struct bat_priv *bat_priv = netdev_priv(recv_if->soft_iface); - int hdr_size = sizeof(struct vis_packet); + int hdr_size = sizeof(*vis_packet); /* keep skb linear */ if (skb_linearize(skb) < 0) diff --git a/net/batman-adv/routing.h b/net/batman-adv/routing.h index 870f298..fb14e95 100644 --- a/net/batman-adv/routing.h +++ b/net/batman-adv/routing.h @@ -23,13 +23,12 @@ #define _NET_BATMAN_ADV_ROUTING_H_ void slide_own_bcast_window(struct hard_iface *hard_iface); -void receive_bat_packet(struct ethhdr *ethhdr, - struct batman_packet *batman_packet, - unsigned char *tt_buff, int tt_buff_len, - struct hard_iface *if_incoming); +void receive_bat_packet(const struct ethhdr *ethhdr, + struct batman_packet *batman_packet, + const unsigned char *tt_buff, + struct hard_iface *if_incoming); void update_routes(struct bat_priv *bat_priv, struct orig_node *orig_node, - struct neigh_node *neigh_node, unsigned char *tt_buff, - int tt_buff_len); + struct neigh_node *neigh_node); int route_unicast_packet(struct sk_buff *skb, struct hard_iface *recv_if); int recv_icmp_packet(struct sk_buff *skb, struct hard_iface *recv_if); int recv_unicast_packet(struct sk_buff *skb, struct hard_iface *recv_if); @@ -37,9 +36,11 @@ int recv_ucast_frag_packet(struct sk_buff *skb, struct hard_iface *recv_if); int recv_bcast_packet(struct sk_buff *skb, struct hard_iface *recv_if); int recv_vis_packet(struct sk_buff *skb, struct hard_iface *recv_if); int recv_bat_packet(struct sk_buff *skb, struct hard_iface *recv_if); +int recv_tt_query(struct sk_buff *skb, struct hard_iface *recv_if); +int recv_roam_adv(struct sk_buff *skb, struct hard_iface *recv_if); struct neigh_node *find_router(struct bat_priv *bat_priv, struct orig_node *orig_node, - struct hard_iface *recv_if); + const struct hard_iface *recv_if); void bonding_candidate_del(struct orig_node *orig_node, struct neigh_node *neigh_node); diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index 3377927..7a2f082 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -33,14 +33,14 @@ static void send_outstanding_bcast_packet(struct work_struct *work); /* apply hop penalty for a normal link */ -static uint8_t hop_penalty(const uint8_t tq, struct bat_priv *bat_priv) +static uint8_t hop_penalty(uint8_t tq, const struct bat_priv *bat_priv) { int hop_penalty = atomic_read(&bat_priv->hop_penalty); return (tq * (TQ_MAX_VALUE - hop_penalty)) / (TQ_MAX_VALUE); } /* when do we schedule our own packet to be sent */ -static unsigned long own_send_time(struct bat_priv *bat_priv) +static unsigned long own_send_time(const struct bat_priv *bat_priv) { return jiffies + msecs_to_jiffies( atomic_read(&bat_priv->orig_interval) - @@ -55,9 +55,8 @@ static unsigned long forward_send_time(void) /* send out an already prepared packet to the given address via the * specified batman interface */ -int send_skb_packet(struct sk_buff *skb, - struct hard_iface *hard_iface, - uint8_t *dst_addr) +int send_skb_packet(struct sk_buff *skb, struct hard_iface *hard_iface, + const uint8_t *dst_addr) { struct ethhdr *ethhdr; @@ -74,7 +73,7 @@ int send_skb_packet(struct sk_buff *skb, } /* push to the ethernet header. */ - if (my_skb_head_push(skb, sizeof(struct ethhdr)) < 0) + if (my_skb_head_push(skb, sizeof(*ethhdr)) < 0) goto send_skb_err; skb_reset_mac_header(skb); @@ -121,7 +120,7 @@ static void send_packet_to_if(struct forw_packet *forw_packet, /* adjust all flags and log packets */ while (aggregated_packet(buff_pos, forw_packet->packet_len, - batman_packet->num_tt)) { + batman_packet->tt_num_changes)) { /* we might have aggregated direct link packets with an * ordinary base packet */ @@ -136,17 +135,17 @@ static void send_packet_to_if(struct forw_packet *forw_packet, "Forwarding")); bat_dbg(DBG_BATMAN, bat_priv, "%s %spacket (originator %pM, seqno %d, TQ %d, TTL %d," - " IDF %s) on interface %s [%pM]\n", + " IDF %s, hvn %d) on interface %s [%pM]\n", fwd_str, (packet_num > 0 ? "aggregated " : ""), batman_packet->orig, ntohl(batman_packet->seqno), batman_packet->tq, batman_packet->ttl, (batman_packet->flags & DIRECTLINK ? "on" : "off"), - hard_iface->net_dev->name, + batman_packet->ttvn, hard_iface->net_dev->name, hard_iface->net_dev->dev_addr); - buff_pos += sizeof(struct batman_packet) + - (batman_packet->num_tt * ETH_ALEN); + buff_pos += sizeof(*batman_packet) + + tt_len(batman_packet->tt_num_changes); packet_num++; batman_packet = (struct batman_packet *) (forw_packet->skb->data + buff_pos); @@ -166,7 +165,7 @@ static void send_packet(struct forw_packet *forw_packet) struct bat_priv *bat_priv; struct batman_packet *batman_packet = (struct batman_packet *)(forw_packet->skb->data); - unsigned char directlink = (batman_packet->flags & DIRECTLINK ? 1 : 0); + int directlink = (batman_packet->flags & DIRECTLINK ? 1 : 0); if (!forw_packet->if_incoming) { pr_err("Error - can't forward packet: incoming iface not " @@ -214,26 +213,18 @@ static void send_packet(struct forw_packet *forw_packet) rcu_read_unlock(); } -static void rebuild_batman_packet(struct bat_priv *bat_priv, - struct hard_iface *hard_iface) +static void realloc_packet_buffer(struct hard_iface *hard_iface, + int new_len) { - int new_len; unsigned char *new_buff; struct batman_packet *batman_packet; - new_len = sizeof(struct batman_packet) + - (bat_priv->num_local_tt * ETH_ALEN); new_buff = kmalloc(new_len, GFP_ATOMIC); /* keep old buffer if kmalloc should fail */ if (new_buff) { memcpy(new_buff, hard_iface->packet_buff, - sizeof(struct batman_packet)); - batman_packet = (struct batman_packet *)new_buff; - - batman_packet->num_tt = tt_local_fill_buffer(bat_priv, - new_buff + sizeof(struct batman_packet), - new_len - sizeof(struct batman_packet)); + sizeof(*batman_packet)); kfree(hard_iface->packet_buff); hard_iface->packet_buff = new_buff; @@ -241,6 +232,46 @@ static void rebuild_batman_packet(struct bat_priv *bat_priv, } } +/* when calling this function (hard_iface == primary_if) has to be true */ +static void prepare_packet_buffer(struct bat_priv *bat_priv, + struct hard_iface *hard_iface) +{ + int new_len; + struct batman_packet *batman_packet; + + new_len = BAT_PACKET_LEN + + tt_len((uint8_t)atomic_read(&bat_priv->tt_local_changes)); + + /* if we have too many changes for one packet don't send any + * and wait for the tt table request which will be fragmented */ + if (new_len > hard_iface->soft_iface->mtu) + new_len = BAT_PACKET_LEN; + + realloc_packet_buffer(hard_iface, new_len); + batman_packet = (struct batman_packet *)hard_iface->packet_buff; + + atomic_set(&bat_priv->tt_crc, tt_local_crc(bat_priv)); + + /* reset the sending counter */ + atomic_set(&bat_priv->tt_ogm_append_cnt, TT_OGM_APPEND_MAX); + + batman_packet->tt_num_changes = tt_changes_fill_buffer(bat_priv, + hard_iface->packet_buff + BAT_PACKET_LEN, + hard_iface->packet_len - BAT_PACKET_LEN); + +} + +static void reset_packet_buffer(struct bat_priv *bat_priv, + struct hard_iface *hard_iface) +{ + struct batman_packet *batman_packet; + + realloc_packet_buffer(hard_iface, BAT_PACKET_LEN); + + batman_packet = (struct batman_packet *)hard_iface->packet_buff; + batman_packet->tt_num_changes = 0; +} + void schedule_own_packet(struct hard_iface *hard_iface) { struct bat_priv *bat_priv = netdev_priv(hard_iface->soft_iface); @@ -266,14 +297,23 @@ void schedule_own_packet(struct hard_iface *hard_iface) if (hard_iface->if_status == IF_TO_BE_ACTIVATED) hard_iface->if_status = IF_ACTIVE; - /* if local tt has changed and interface is a primary interface */ - if ((atomic_read(&bat_priv->tt_local_changed)) && - (hard_iface == primary_if)) - rebuild_batman_packet(bat_priv, hard_iface); + if (hard_iface == primary_if) { + /* if at least one change happened */ + if (atomic_read(&bat_priv->tt_local_changes) > 0) { + prepare_packet_buffer(bat_priv, hard_iface); + /* Increment the TTVN only once per OGM interval */ + atomic_inc(&bat_priv->ttvn); + bat_priv->tt_poss_change = false; + } + + /* if the changes have been sent enough times */ + if (!atomic_dec_not_zero(&bat_priv->tt_ogm_append_cnt)) + reset_packet_buffer(bat_priv, hard_iface); + } /** * NOTE: packet_buff might just have been re-allocated in - * rebuild_batman_packet() + * prepare_packet_buffer() or in reset_packet_buffer() */ batman_packet = (struct batman_packet *)hard_iface->packet_buff; @@ -281,6 +321,9 @@ void schedule_own_packet(struct hard_iface *hard_iface) batman_packet->seqno = htonl((uint32_t)atomic_read(&hard_iface->seqno)); + batman_packet->ttvn = atomic_read(&bat_priv->ttvn); + batman_packet->tt_crc = htons((uint16_t)atomic_read(&bat_priv->tt_crc)); + if (vis_server == VIS_TYPE_SERVER_SYNC) batman_packet->flags |= VIS_SERVER; else @@ -291,7 +334,7 @@ void schedule_own_packet(struct hard_iface *hard_iface) batman_packet->gw_flags = (uint8_t)atomic_read(&bat_priv->gw_bandwidth); else - batman_packet->gw_flags = 0; + batman_packet->gw_flags = NO_FLAGS; atomic_inc(&hard_iface->seqno); @@ -307,15 +350,16 @@ void schedule_own_packet(struct hard_iface *hard_iface) } void schedule_forward_packet(struct orig_node *orig_node, - struct ethhdr *ethhdr, + const struct ethhdr *ethhdr, struct batman_packet *batman_packet, - uint8_t directlink, int tt_buff_len, + int directlink, struct hard_iface *if_incoming) { struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface); struct neigh_node *router; - unsigned char in_tq, in_ttl, tq_avg = 0; + uint8_t in_tq, in_ttl, tq_avg = 0; unsigned long send_time; + uint8_t tt_num_changes; if (batman_packet->ttl <= 1) { bat_dbg(DBG_BATMAN, bat_priv, "ttl exceeded\n"); @@ -326,6 +370,7 @@ void schedule_forward_packet(struct orig_node *orig_node, in_tq = batman_packet->tq; in_ttl = batman_packet->ttl; + tt_num_changes = batman_packet->tt_num_changes; batman_packet->ttl--; memcpy(batman_packet->prev_sender, ethhdr->h_source, ETH_ALEN); @@ -358,6 +403,7 @@ void schedule_forward_packet(struct orig_node *orig_node, batman_packet->ttl); batman_packet->seqno = htonl(batman_packet->seqno); + batman_packet->tt_crc = htons(batman_packet->tt_crc); /* switch of primaries first hop flag when forwarding */ batman_packet->flags &= ~PRIMARIES_FIRST_HOP; @@ -369,7 +415,7 @@ void schedule_forward_packet(struct orig_node *orig_node, send_time = forward_send_time(); add_bat_packet_to_list(bat_priv, (unsigned char *)batman_packet, - sizeof(struct batman_packet) + tt_buff_len, + sizeof(*batman_packet) + tt_len(tt_num_changes), if_incoming, 0, send_time); } @@ -408,11 +454,13 @@ static void _add_bcast_packet_to_list(struct bat_priv *bat_priv, * * The skb is not consumed, so the caller should make sure that the * skb is freed. */ -int add_bcast_packet_to_list(struct bat_priv *bat_priv, struct sk_buff *skb) +int add_bcast_packet_to_list(struct bat_priv *bat_priv, + const struct sk_buff *skb) { struct hard_iface *primary_if = NULL; struct forw_packet *forw_packet; struct bcast_packet *bcast_packet; + struct sk_buff *newskb; if (!atomic_dec_not_zero(&bat_priv->bcast_queue_left)) { bat_dbg(DBG_BATMAN, bat_priv, "bcast packet queue full\n"); @@ -423,22 +471,22 @@ int add_bcast_packet_to_list(struct bat_priv *bat_priv, struct sk_buff *skb) if (!primary_if) goto out_and_inc; - forw_packet = kmalloc(sizeof(struct forw_packet), GFP_ATOMIC); + forw_packet = kmalloc(sizeof(*forw_packet), GFP_ATOMIC); if (!forw_packet) goto out_and_inc; - skb = skb_copy(skb, GFP_ATOMIC); - if (!skb) + newskb = skb_copy(skb, GFP_ATOMIC); + if (!newskb) goto packet_free; /* as we have a copy now, it is safe to decrease the TTL */ - bcast_packet = (struct bcast_packet *)skb->data; + bcast_packet = (struct bcast_packet *)newskb->data; bcast_packet->ttl--; - skb_reset_mac_header(skb); + skb_reset_mac_header(newskb); - forw_packet->skb = skb; + forw_packet->skb = newskb; forw_packet->if_incoming = primary_if; /* how often did we send the bcast packet ? */ @@ -537,7 +585,7 @@ out: } void purge_outstanding_packets(struct bat_priv *bat_priv, - struct hard_iface *hard_iface) + const struct hard_iface *hard_iface) { struct forw_packet *forw_packet; struct hlist_node *tmp_node, *safe_tmp_node; diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h index 247172d..633224a 100644 --- a/net/batman-adv/send.h +++ b/net/batman-adv/send.h @@ -22,18 +22,18 @@ #ifndef _NET_BATMAN_ADV_SEND_H_ #define _NET_BATMAN_ADV_SEND_H_ -int send_skb_packet(struct sk_buff *skb, - struct hard_iface *hard_iface, - uint8_t *dst_addr); +int send_skb_packet(struct sk_buff *skb, struct hard_iface *hard_iface, + const uint8_t *dst_addr); void schedule_own_packet(struct hard_iface *hard_iface); void schedule_forward_packet(struct orig_node *orig_node, - struct ethhdr *ethhdr, + const struct ethhdr *ethhdr, struct batman_packet *batman_packet, - uint8_t directlink, int tt_buff_len, + int directlink, struct hard_iface *if_outgoing); -int add_bcast_packet_to_list(struct bat_priv *bat_priv, struct sk_buff *skb); +int add_bcast_packet_to_list(struct bat_priv *bat_priv, + const struct sk_buff *skb); void send_outstanding_bat_packet(struct work_struct *work); void purge_outstanding_packets(struct bat_priv *bat_priv, - struct hard_iface *hard_iface); + const struct hard_iface *hard_iface); #endif /* _NET_BATMAN_ADV_SEND_H_ */ diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index d5aa609..2dcdbb7 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -30,6 +30,7 @@ #include "gateway_common.h" #include "gateway_client.h" #include "bat_sysfs.h" +#include "originator.h" #include <linux/slab.h> #include <linux/ethtool.h> #include <linux/etherdevice.h> @@ -123,8 +124,7 @@ static struct softif_neigh_vid *softif_neigh_vid_get(struct bat_priv *bat_priv, goto out; } - softif_neigh_vid = kzalloc(sizeof(struct softif_neigh_vid), - GFP_ATOMIC); + softif_neigh_vid = kzalloc(sizeof(*softif_neigh_vid), GFP_ATOMIC); if (!softif_neigh_vid) goto out; @@ -146,7 +146,7 @@ out: } static struct softif_neigh *softif_neigh_get(struct bat_priv *bat_priv, - uint8_t *addr, short vid) + const uint8_t *addr, short vid) { struct softif_neigh_vid *softif_neigh_vid; struct softif_neigh *softif_neigh = NULL; @@ -170,7 +170,7 @@ static struct softif_neigh *softif_neigh_get(struct bat_priv *bat_priv, goto unlock; } - softif_neigh = kzalloc(sizeof(struct softif_neigh), GFP_ATOMIC); + softif_neigh = kzalloc(sizeof(*softif_neigh), GFP_ATOMIC); if (!softif_neigh) goto unlock; @@ -242,7 +242,8 @@ static void softif_neigh_vid_select(struct bat_priv *bat_priv, if (new_neigh && !atomic_inc_not_zero(&new_neigh->refcount)) new_neigh = NULL; - curr_neigh = softif_neigh_vid->softif_neigh; + curr_neigh = rcu_dereference_protected(softif_neigh_vid->softif_neigh, + 1); rcu_assign_pointer(softif_neigh_vid->softif_neigh, new_neigh); if ((curr_neigh) && (!new_neigh)) @@ -380,7 +381,7 @@ void softif_neigh_purge(struct bat_priv *bat_priv) struct softif_neigh *softif_neigh, *curr_softif_neigh; struct softif_neigh_vid *softif_neigh_vid; struct hlist_node *node, *node_tmp, *node_tmp2; - char do_deselect; + int do_deselect; rcu_read_lock(); hlist_for_each_entry_rcu(softif_neigh_vid, node, @@ -534,7 +535,7 @@ static int interface_set_mac_addr(struct net_device *dev, void *p) /* only modify transtable if it has been initialised before */ if (atomic_read(&bat_priv->mesh_state) == MESH_ACTIVE) { tt_local_remove(bat_priv, dev->dev_addr, - "mac address changed"); + "mac address changed", false); tt_local_add(dev, addr->sa_data); } @@ -553,7 +554,7 @@ static int interface_change_mtu(struct net_device *dev, int new_mtu) return 0; } -int interface_tx(struct sk_buff *skb, struct net_device *soft_iface) +static int interface_tx(struct sk_buff *skb, struct net_device *soft_iface) { struct ethhdr *ethhdr = (struct ethhdr *)skb->data; struct bat_priv *bat_priv = netdev_priv(soft_iface); @@ -561,6 +562,7 @@ int interface_tx(struct sk_buff *skb, struct net_device *soft_iface) struct bcast_packet *bcast_packet; struct vlan_ethhdr *vhdr; struct softif_neigh *curr_softif_neigh = NULL; + struct orig_node *orig_node = NULL; int data_len = skb->len, ret; short vid = -1; bool do_bcast = false; @@ -592,11 +594,13 @@ int interface_tx(struct sk_buff *skb, struct net_device *soft_iface) if (curr_softif_neigh) goto dropped; - /* TODO: check this for locks */ + /* Register the client MAC in the transtable */ tt_local_add(soft_iface, ethhdr->h_source); - if (is_multicast_ether_addr(ethhdr->h_dest)) { - ret = gw_is_target(bat_priv, skb); + orig_node = transtable_search(bat_priv, ethhdr->h_dest); + if (is_multicast_ether_addr(ethhdr->h_dest) || + (orig_node && orig_node->gw_flags)) { + ret = gw_is_target(bat_priv, skb, orig_node); if (ret < 0) goto dropped; @@ -611,7 +615,7 @@ int interface_tx(struct sk_buff *skb, struct net_device *soft_iface) if (!primary_if) goto dropped; - if (my_skb_head_push(skb, sizeof(struct bcast_packet)) < 0) + if (my_skb_head_push(skb, sizeof(*bcast_packet)) < 0) goto dropped; bcast_packet = (struct bcast_packet *)skb->data; @@ -656,6 +660,8 @@ end: softif_neigh_free_ref(curr_softif_neigh); if (primary_if) hardif_free_ref(primary_if); + if (orig_node) + orig_node_free_ref(orig_node); return NETDEV_TX_OK; } @@ -790,17 +796,16 @@ static void interface_setup(struct net_device *dev) SET_ETHTOOL_OPS(dev, &bat_ethtool_ops); - memset(priv, 0, sizeof(struct bat_priv)); + memset(priv, 0, sizeof(*priv)); } -struct net_device *softif_create(char *name) +struct net_device *softif_create(const char *name) { struct net_device *soft_iface; struct bat_priv *bat_priv; int ret; - soft_iface = alloc_netdev(sizeof(struct bat_priv) , name, - interface_setup); + soft_iface = alloc_netdev(sizeof(*bat_priv), name, interface_setup); if (!soft_iface) { pr_err("Unable to allocate the batman interface: %s\n", name); @@ -831,7 +836,13 @@ struct net_device *softif_create(char *name) atomic_set(&bat_priv->mesh_state, MESH_INACTIVE); atomic_set(&bat_priv->bcast_seqno, 1); - atomic_set(&bat_priv->tt_local_changed, 0); + atomic_set(&bat_priv->ttvn, 0); + atomic_set(&bat_priv->tt_local_changes, 0); + atomic_set(&bat_priv->tt_ogm_append_cnt, 0); + + bat_priv->tt_buff = NULL; + bat_priv->tt_buff_len = 0; + bat_priv->tt_poss_change = false; bat_priv->primary_if = NULL; bat_priv->num_ifaces = 0; @@ -872,7 +883,7 @@ void softif_destroy(struct net_device *soft_iface) unregister_netdevice(soft_iface); } -int softif_is_valid(struct net_device *net_dev) +int softif_is_valid(const struct net_device *net_dev) { #ifdef HAVE_NET_DEVICE_OPS if (net_dev->netdev_ops->ndo_start_xmit == interface_tx) @@ -924,4 +935,3 @@ static u32 bat_get_link(struct net_device *dev) { return 1; } - diff --git a/net/batman-adv/soft-interface.h b/net/batman-adv/soft-interface.h index 4789b6f..001546f 100644 --- a/net/batman-adv/soft-interface.h +++ b/net/batman-adv/soft-interface.h @@ -25,12 +25,11 @@ int my_skb_head_push(struct sk_buff *skb, unsigned int len); int softif_neigh_seq_print_text(struct seq_file *seq, void *offset); void softif_neigh_purge(struct bat_priv *bat_priv); -int interface_tx(struct sk_buff *skb, struct net_device *soft_iface); void interface_rx(struct net_device *soft_iface, struct sk_buff *skb, struct hard_iface *recv_if, int hdr_size); -struct net_device *softif_create(char *name); +struct net_device *softif_create(const char *name); void softif_destroy(struct net_device *soft_iface); -int softif_is_valid(struct net_device *net_dev); +int softif_is_valid(const struct net_device *net_dev); #endif /* _NET_BATMAN_ADV_SOFT_INTERFACE_H_ */ diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 7b72966..5f1fcd5 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -23,38 +23,45 @@ #include "translation-table.h" #include "soft-interface.h" #include "hard-interface.h" +#include "send.h" #include "hash.h" #include "originator.h" +#include "routing.h" -static void tt_local_purge(struct work_struct *work); -static void _tt_global_del_orig(struct bat_priv *bat_priv, - struct tt_global_entry *tt_global_entry, - char *message); +#include <linux/crc16.h> + +static void _tt_global_del(struct bat_priv *bat_priv, + struct tt_global_entry *tt_global_entry, + const char *message); +static void tt_purge(struct work_struct *work); /* returns 1 if they are the same mac addr */ -static int compare_ltt(struct hlist_node *node, void *data2) +static int compare_ltt(const struct hlist_node *node, const void *data2) { - void *data1 = container_of(node, struct tt_local_entry, hash_entry); + const void *data1 = container_of(node, struct tt_local_entry, + hash_entry); return (memcmp(data1, data2, ETH_ALEN) == 0 ? 1 : 0); } /* returns 1 if they are the same mac addr */ -static int compare_gtt(struct hlist_node *node, void *data2) +static int compare_gtt(const struct hlist_node *node, const void *data2) { - void *data1 = container_of(node, struct tt_global_entry, hash_entry); + const void *data1 = container_of(node, struct tt_global_entry, + hash_entry); return (memcmp(data1, data2, ETH_ALEN) == 0 ? 1 : 0); } -static void tt_local_start_timer(struct bat_priv *bat_priv) +static void tt_start_timer(struct bat_priv *bat_priv) { - INIT_DELAYED_WORK(&bat_priv->tt_work, tt_local_purge); - queue_delayed_work(bat_event_workqueue, &bat_priv->tt_work, 10 * HZ); + INIT_DELAYED_WORK(&bat_priv->tt_work, tt_purge); + queue_delayed_work(bat_event_workqueue, &bat_priv->tt_work, + msecs_to_jiffies(5000)); } static struct tt_local_entry *tt_local_hash_find(struct bat_priv *bat_priv, - void *data) + const void *data) { struct hashtable_t *hash = bat_priv->tt_local_hash; struct hlist_head *head; @@ -73,6 +80,9 @@ static struct tt_local_entry *tt_local_hash_find(struct bat_priv *bat_priv, if (!compare_eth(tt_local_entry, data)) continue; + if (!atomic_inc_not_zero(&tt_local_entry->refcount)) + continue; + tt_local_entry_tmp = tt_local_entry; break; } @@ -82,7 +92,7 @@ static struct tt_local_entry *tt_local_hash_find(struct bat_priv *bat_priv, } static struct tt_global_entry *tt_global_hash_find(struct bat_priv *bat_priv, - void *data) + const void *data) { struct hashtable_t *hash = bat_priv->tt_global_hash; struct hlist_head *head; @@ -102,6 +112,9 @@ static struct tt_global_entry *tt_global_hash_find(struct bat_priv *bat_priv, if (!compare_eth(tt_global_entry, data)) continue; + if (!atomic_inc_not_zero(&tt_global_entry->refcount)) + continue; + tt_global_entry_tmp = tt_global_entry; break; } @@ -110,7 +123,57 @@ static struct tt_global_entry *tt_global_hash_find(struct bat_priv *bat_priv, return tt_global_entry_tmp; } -int tt_local_init(struct bat_priv *bat_priv) +static bool is_out_of_time(unsigned long starting_time, unsigned long timeout) +{ + unsigned long deadline; + deadline = starting_time + msecs_to_jiffies(timeout); + + return time_after(jiffies, deadline); +} + +static void tt_local_entry_free_ref(struct tt_local_entry *tt_local_entry) +{ + if (atomic_dec_and_test(&tt_local_entry->refcount)) + kfree_rcu(tt_local_entry, rcu); +} + +static void tt_global_entry_free_ref(struct tt_global_entry *tt_global_entry) +{ + if (atomic_dec_and_test(&tt_global_entry->refcount)) + kfree_rcu(tt_global_entry, rcu); +} + +static void tt_local_event(struct bat_priv *bat_priv, uint8_t op, + const uint8_t *addr, bool roaming) +{ + struct tt_change_node *tt_change_node; + + tt_change_node = kmalloc(sizeof(*tt_change_node), GFP_ATOMIC); + + if (!tt_change_node) + return; + + tt_change_node->change.flags = op; + if (roaming) + tt_change_node->change.flags |= TT_CLIENT_ROAM; + + memcpy(tt_change_node->change.addr, addr, ETH_ALEN); + + spin_lock_bh(&bat_priv->tt_changes_list_lock); + /* track the change in the OGMinterval list */ + list_add_tail(&tt_change_node->list, &bat_priv->tt_changes_list); + atomic_inc(&bat_priv->tt_local_changes); + spin_unlock_bh(&bat_priv->tt_changes_list_lock); + + atomic_set(&bat_priv->tt_ogm_append_cnt, 0); +} + +int tt_len(int changes_num) +{ + return changes_num * sizeof(struct tt_change); +} + +static int tt_local_init(struct bat_priv *bat_priv) { if (bat_priv->tt_local_hash) return 1; @@ -120,54 +183,35 @@ int tt_local_init(struct bat_priv *bat_priv) if (!bat_priv->tt_local_hash) return 0; - atomic_set(&bat_priv->tt_local_changed, 0); - tt_local_start_timer(bat_priv); - return 1; } -void tt_local_add(struct net_device *soft_iface, uint8_t *addr) +void tt_local_add(struct net_device *soft_iface, const uint8_t *addr) { struct bat_priv *bat_priv = netdev_priv(soft_iface); - struct tt_local_entry *tt_local_entry; - struct tt_global_entry *tt_global_entry; - int required_bytes; + struct tt_local_entry *tt_local_entry = NULL; + struct tt_global_entry *tt_global_entry = NULL; - spin_lock_bh(&bat_priv->tt_lhash_lock); tt_local_entry = tt_local_hash_find(bat_priv, addr); - spin_unlock_bh(&bat_priv->tt_lhash_lock); if (tt_local_entry) { tt_local_entry->last_seen = jiffies; - return; + goto out; } - /* only announce as many hosts as possible in the batman-packet and - space in batman_packet->num_tt That also should give a limit to - MAC-flooding. */ - required_bytes = (bat_priv->num_local_tt + 1) * ETH_ALEN; - required_bytes += BAT_PACKET_LEN; - - if ((required_bytes > ETH_DATA_LEN) || - (atomic_read(&bat_priv->aggregated_ogms) && - required_bytes > MAX_AGGREGATION_BYTES) || - (bat_priv->num_local_tt + 1 > 255)) { - bat_dbg(DBG_ROUTES, bat_priv, - "Can't add new local tt entry (%pM): " - "number of local tt entries exceeds packet size\n", - addr); - return; - } + tt_local_entry = kmalloc(sizeof(*tt_local_entry), GFP_ATOMIC); + if (!tt_local_entry) + goto out; - bat_dbg(DBG_ROUTES, bat_priv, - "Creating new local tt entry: %pM\n", addr); + tt_local_event(bat_priv, NO_FLAGS, addr, false); - tt_local_entry = kmalloc(sizeof(struct tt_local_entry), GFP_ATOMIC); - if (!tt_local_entry) - return; + bat_dbg(DBG_TT, bat_priv, + "Creating new local tt entry: %pM (ttvn: %d)\n", addr, + (uint8_t)atomic_read(&bat_priv->ttvn)); memcpy(tt_local_entry->addr, addr, ETH_ALEN); tt_local_entry->last_seen = jiffies; + atomic_set(&tt_local_entry->refcount, 2); /* the batman interface mac address should never be purged */ if (compare_eth(addr, soft_iface->dev_addr)) @@ -175,61 +219,75 @@ void tt_local_add(struct net_device *soft_iface, uint8_t *addr) else tt_local_entry->never_purge = 0; - spin_lock_bh(&bat_priv->tt_lhash_lock); - hash_add(bat_priv->tt_local_hash, compare_ltt, choose_orig, tt_local_entry, &tt_local_entry->hash_entry); - bat_priv->num_local_tt++; - atomic_set(&bat_priv->tt_local_changed, 1); - spin_unlock_bh(&bat_priv->tt_lhash_lock); + atomic_inc(&bat_priv->num_local_tt); /* remove address from global hash if present */ - spin_lock_bh(&bat_priv->tt_ghash_lock); - tt_global_entry = tt_global_hash_find(bat_priv, addr); + /* Check whether it is a roaming! */ + if (tt_global_entry) { + /* This node is probably going to update its tt table */ + tt_global_entry->orig_node->tt_poss_change = true; + _tt_global_del(bat_priv, tt_global_entry, + "local tt received"); + send_roam_adv(bat_priv, tt_global_entry->addr, + tt_global_entry->orig_node); + } +out: + if (tt_local_entry) + tt_local_entry_free_ref(tt_local_entry); if (tt_global_entry) - _tt_global_del_orig(bat_priv, tt_global_entry, - "local tt received"); - - spin_unlock_bh(&bat_priv->tt_ghash_lock); + tt_global_entry_free_ref(tt_global_entry); } -int tt_local_fill_buffer(struct bat_priv *bat_priv, - unsigned char *buff, int buff_len) +int tt_changes_fill_buffer(struct bat_priv *bat_priv, + unsigned char *buff, int buff_len) { - struct hashtable_t *hash = bat_priv->tt_local_hash; - struct tt_local_entry *tt_local_entry; - struct hlist_node *node; - struct hlist_head *head; - int i, count = 0; + int count = 0, tot_changes = 0; + struct tt_change_node *entry, *safe; - spin_lock_bh(&bat_priv->tt_lhash_lock); + if (buff_len > 0) + tot_changes = buff_len / tt_len(1); - for (i = 0; i < hash->size; i++) { - head = &hash->table[i]; - - rcu_read_lock(); - hlist_for_each_entry_rcu(tt_local_entry, node, - head, hash_entry) { - if (buff_len < (count + 1) * ETH_ALEN) - break; - - memcpy(buff + (count * ETH_ALEN), tt_local_entry->addr, - ETH_ALEN); + spin_lock_bh(&bat_priv->tt_changes_list_lock); + atomic_set(&bat_priv->tt_local_changes, 0); + list_for_each_entry_safe(entry, safe, &bat_priv->tt_changes_list, + list) { + if (count < tot_changes) { + memcpy(buff + tt_len(count), + &entry->change, sizeof(struct tt_change)); count++; } - rcu_read_unlock(); + list_del(&entry->list); + kfree(entry); } + spin_unlock_bh(&bat_priv->tt_changes_list_lock); + + /* Keep the buffer for possible tt_request */ + spin_lock_bh(&bat_priv->tt_buff_lock); + kfree(bat_priv->tt_buff); + bat_priv->tt_buff_len = 0; + bat_priv->tt_buff = NULL; + /* We check whether this new OGM has no changes due to size + * problems */ + if (buff_len > 0) { + /** + * if kmalloc() fails we will reply with the full table + * instead of providing the diff + */ + bat_priv->tt_buff = kmalloc(buff_len, GFP_ATOMIC); + if (bat_priv->tt_buff) { + memcpy(bat_priv->tt_buff, buff, buff_len); + bat_priv->tt_buff_len = buff_len; + } + } + spin_unlock_bh(&bat_priv->tt_buff_lock); - /* if we did not get all new local tts see you next time ;-) */ - if (count == bat_priv->num_local_tt) - atomic_set(&bat_priv->tt_local_changed, 0); - - spin_unlock_bh(&bat_priv->tt_lhash_lock); - return count; + return tot_changes; } int tt_local_seq_print_text(struct seq_file *seq, void *offset) @@ -261,10 +319,8 @@ int tt_local_seq_print_text(struct seq_file *seq, void *offset) } seq_printf(seq, "Locally retrieved addresses (from %s) " - "announced via TT:\n", - net_dev->name); - - spin_lock_bh(&bat_priv->tt_lhash_lock); + "announced via TT (TTVN: %u):\n", + net_dev->name, (uint8_t)atomic_read(&bat_priv->ttvn)); buf_size = 1; /* Estimate length for: " * xx:xx:xx:xx:xx:xx\n" */ @@ -279,7 +335,6 @@ int tt_local_seq_print_text(struct seq_file *seq, void *offset) buff = kmalloc(buf_size, GFP_ATOMIC); if (!buff) { - spin_unlock_bh(&bat_priv->tt_lhash_lock); ret = -ENOMEM; goto out; } @@ -299,8 +354,6 @@ int tt_local_seq_print_text(struct seq_file *seq, void *offset) rcu_read_unlock(); } - spin_unlock_bh(&bat_priv->tt_lhash_lock); - seq_printf(seq, "%s", buff); kfree(buff); out: @@ -309,92 +362,108 @@ out: return ret; } -static void _tt_local_del(struct hlist_node *node, void *arg) -{ - struct bat_priv *bat_priv = (struct bat_priv *)arg; - void *data = container_of(node, struct tt_local_entry, hash_entry); - - kfree(data); - bat_priv->num_local_tt--; - atomic_set(&bat_priv->tt_local_changed, 1); -} - static void tt_local_del(struct bat_priv *bat_priv, - struct tt_local_entry *tt_local_entry, - char *message) + struct tt_local_entry *tt_local_entry, + const char *message) { - bat_dbg(DBG_ROUTES, bat_priv, "Deleting local tt entry (%pM): %s\n", + bat_dbg(DBG_TT, bat_priv, "Deleting local tt entry (%pM): %s\n", tt_local_entry->addr, message); + atomic_dec(&bat_priv->num_local_tt); + hash_remove(bat_priv->tt_local_hash, compare_ltt, choose_orig, tt_local_entry->addr); - _tt_local_del(&tt_local_entry->hash_entry, bat_priv); + + tt_local_entry_free_ref(tt_local_entry); } -void tt_local_remove(struct bat_priv *bat_priv, - uint8_t *addr, char *message) +void tt_local_remove(struct bat_priv *bat_priv, const uint8_t *addr, + const char *message, bool roaming) { - struct tt_local_entry *tt_local_entry; - - spin_lock_bh(&bat_priv->tt_lhash_lock); + struct tt_local_entry *tt_local_entry = NULL; tt_local_entry = tt_local_hash_find(bat_priv, addr); - if (tt_local_entry) - tt_local_del(bat_priv, tt_local_entry, message); + if (!tt_local_entry) + goto out; - spin_unlock_bh(&bat_priv->tt_lhash_lock); + tt_local_event(bat_priv, TT_CHANGE_DEL, tt_local_entry->addr, roaming); + tt_local_del(bat_priv, tt_local_entry, message); +out: + if (tt_local_entry) + tt_local_entry_free_ref(tt_local_entry); } -static void tt_local_purge(struct work_struct *work) +static void tt_local_purge(struct bat_priv *bat_priv) { - struct delayed_work *delayed_work = - container_of(work, struct delayed_work, work); - struct bat_priv *bat_priv = - container_of(delayed_work, struct bat_priv, tt_work); struct hashtable_t *hash = bat_priv->tt_local_hash; struct tt_local_entry *tt_local_entry; struct hlist_node *node, *node_tmp; struct hlist_head *head; - unsigned long timeout; + spinlock_t *list_lock; /* protects write access to the hash lists */ int i; - spin_lock_bh(&bat_priv->tt_lhash_lock); - for (i = 0; i < hash->size; i++) { head = &hash->table[i]; + list_lock = &hash->list_locks[i]; + spin_lock_bh(list_lock); hlist_for_each_entry_safe(tt_local_entry, node, node_tmp, head, hash_entry) { if (tt_local_entry->never_purge) continue; - timeout = tt_local_entry->last_seen; - timeout += TT_LOCAL_TIMEOUT * HZ; - - if (time_before(jiffies, timeout)) + if (!is_out_of_time(tt_local_entry->last_seen, + TT_LOCAL_TIMEOUT * 1000)) continue; - tt_local_del(bat_priv, tt_local_entry, - "address timed out"); + tt_local_event(bat_priv, TT_CHANGE_DEL, + tt_local_entry->addr, false); + atomic_dec(&bat_priv->num_local_tt); + bat_dbg(DBG_TT, bat_priv, "Deleting local " + "tt entry (%pM): timed out\n", + tt_local_entry->addr); + hlist_del_rcu(node); + tt_local_entry_free_ref(tt_local_entry); } + spin_unlock_bh(list_lock); } - spin_unlock_bh(&bat_priv->tt_lhash_lock); - tt_local_start_timer(bat_priv); } -void tt_local_free(struct bat_priv *bat_priv) +static void tt_local_table_free(struct bat_priv *bat_priv) { + struct hashtable_t *hash; + spinlock_t *list_lock; /* protects write access to the hash lists */ + struct tt_local_entry *tt_local_entry; + struct hlist_node *node, *node_tmp; + struct hlist_head *head; + int i; + if (!bat_priv->tt_local_hash) return; - cancel_delayed_work_sync(&bat_priv->tt_work); - hash_delete(bat_priv->tt_local_hash, _tt_local_del, bat_priv); + hash = bat_priv->tt_local_hash; + + for (i = 0; i < hash->size; i++) { + head = &hash->table[i]; + list_lock = &hash->list_locks[i]; + + spin_lock_bh(list_lock); + hlist_for_each_entry_safe(tt_local_entry, node, node_tmp, + head, hash_entry) { + hlist_del_rcu(node); + tt_local_entry_free_ref(tt_local_entry); + } + spin_unlock_bh(list_lock); + } + + hash_destroy(hash); + bat_priv->tt_local_hash = NULL; } -int tt_global_init(struct bat_priv *bat_priv) +static int tt_global_init(struct bat_priv *bat_priv) { if (bat_priv->tt_global_hash) return 1; @@ -407,74 +476,78 @@ int tt_global_init(struct bat_priv *bat_priv) return 1; } -void tt_global_add_orig(struct bat_priv *bat_priv, - struct orig_node *orig_node, - unsigned char *tt_buff, int tt_buff_len) +static void tt_changes_list_free(struct bat_priv *bat_priv) { - struct tt_global_entry *tt_global_entry; - struct tt_local_entry *tt_local_entry; - int tt_buff_count = 0; - unsigned char *tt_ptr; + struct tt_change_node *entry, *safe; - while ((tt_buff_count + 1) * ETH_ALEN <= tt_buff_len) { - spin_lock_bh(&bat_priv->tt_ghash_lock); + spin_lock_bh(&bat_priv->tt_changes_list_lock); - tt_ptr = tt_buff + (tt_buff_count * ETH_ALEN); - tt_global_entry = tt_global_hash_find(bat_priv, tt_ptr); - - if (!tt_global_entry) { - spin_unlock_bh(&bat_priv->tt_ghash_lock); - - tt_global_entry = - kmalloc(sizeof(struct tt_global_entry), - GFP_ATOMIC); + list_for_each_entry_safe(entry, safe, &bat_priv->tt_changes_list, + list) { + list_del(&entry->list); + kfree(entry); + } - if (!tt_global_entry) - break; + atomic_set(&bat_priv->tt_local_changes, 0); + spin_unlock_bh(&bat_priv->tt_changes_list_lock); +} - memcpy(tt_global_entry->addr, tt_ptr, ETH_ALEN); +/* caller must hold orig_node refcount */ +int tt_global_add(struct bat_priv *bat_priv, struct orig_node *orig_node, + const unsigned char *tt_addr, uint8_t ttvn, bool roaming) +{ + struct tt_global_entry *tt_global_entry; + struct orig_node *orig_node_tmp; + int ret = 0; - bat_dbg(DBG_ROUTES, bat_priv, - "Creating new global tt entry: " - "%pM (via %pM)\n", - tt_global_entry->addr, orig_node->orig); + tt_global_entry = tt_global_hash_find(bat_priv, tt_addr); - spin_lock_bh(&bat_priv->tt_ghash_lock); - hash_add(bat_priv->tt_global_hash, compare_gtt, - choose_orig, tt_global_entry, - &tt_global_entry->hash_entry); - - } + if (!tt_global_entry) { + tt_global_entry = + kmalloc(sizeof(*tt_global_entry), + GFP_ATOMIC); + if (!tt_global_entry) + goto out; + memcpy(tt_global_entry->addr, tt_addr, ETH_ALEN); + /* Assign the new orig_node */ + atomic_inc(&orig_node->refcount); tt_global_entry->orig_node = orig_node; - spin_unlock_bh(&bat_priv->tt_ghash_lock); - - /* remove address from local hash if present */ - spin_lock_bh(&bat_priv->tt_lhash_lock); - - tt_ptr = tt_buff + (tt_buff_count * ETH_ALEN); - tt_local_entry = tt_local_hash_find(bat_priv, tt_ptr); - - if (tt_local_entry) - tt_local_del(bat_priv, tt_local_entry, - "global tt received"); - - spin_unlock_bh(&bat_priv->tt_lhash_lock); - - tt_buff_count++; + tt_global_entry->ttvn = ttvn; + tt_global_entry->flags = NO_FLAGS; + tt_global_entry->roam_at = 0; + atomic_set(&tt_global_entry->refcount, 2); + + hash_add(bat_priv->tt_global_hash, compare_gtt, + choose_orig, tt_global_entry, + &tt_global_entry->hash_entry); + atomic_inc(&orig_node->tt_size); + } else { + if (tt_global_entry->orig_node != orig_node) { + atomic_dec(&tt_global_entry->orig_node->tt_size); + orig_node_tmp = tt_global_entry->orig_node; + atomic_inc(&orig_node->refcount); + tt_global_entry->orig_node = orig_node; + orig_node_free_ref(orig_node_tmp); + atomic_inc(&orig_node->tt_size); + } + tt_global_entry->ttvn = ttvn; + tt_global_entry->flags = NO_FLAGS; + tt_global_entry->roam_at = 0; } - /* initialize, and overwrite if malloc succeeds */ - orig_node->tt_buff = NULL; - orig_node->tt_buff_len = 0; + bat_dbg(DBG_TT, bat_priv, + "Creating new global tt entry: %pM (via %pM)\n", + tt_global_entry->addr, orig_node->orig); - if (tt_buff_len > 0) { - orig_node->tt_buff = kmalloc(tt_buff_len, GFP_ATOMIC); - if (orig_node->tt_buff) { - memcpy(orig_node->tt_buff, tt_buff, tt_buff_len); - orig_node->tt_buff_len = tt_buff_len; - } - } + /* remove address from local hash if present */ + tt_local_remove(bat_priv, tt_global_entry->addr, + "global tt received", roaming); + ret = 1; +out: + if (tt_global_entry) + tt_global_entry_free_ref(tt_global_entry); + return ret; } int tt_global_seq_print_text(struct seq_file *seq, void *offset) @@ -508,26 +581,27 @@ int tt_global_seq_print_text(struct seq_file *seq, void *offset) seq_printf(seq, "Globally announced TT entries received via the mesh %s\n", net_dev->name); - - spin_lock_bh(&bat_priv->tt_ghash_lock); + seq_printf(seq, " %-13s %s %-15s %s\n", + "Client", "(TTVN)", "Originator", "(Curr TTVN)"); buf_size = 1; - /* Estimate length for: " * xx:xx:xx:xx:xx:xx via xx:xx:xx:xx:xx:xx\n"*/ + /* Estimate length for: " * xx:xx:xx:xx:xx:xx (ttvn) via + * xx:xx:xx:xx:xx:xx (cur_ttvn)\n"*/ for (i = 0; i < hash->size; i++) { head = &hash->table[i]; rcu_read_lock(); __hlist_for_each_rcu(node, head) - buf_size += 43; + buf_size += 59; rcu_read_unlock(); } buff = kmalloc(buf_size, GFP_ATOMIC); if (!buff) { - spin_unlock_bh(&bat_priv->tt_ghash_lock); ret = -ENOMEM; goto out; } + buff[0] = '\0'; pos = 0; @@ -537,16 +611,18 @@ int tt_global_seq_print_text(struct seq_file *seq, void *offset) rcu_read_lock(); hlist_for_each_entry_rcu(tt_global_entry, node, head, hash_entry) { - pos += snprintf(buff + pos, 44, - " * %pM via %pM\n", + pos += snprintf(buff + pos, 61, + " * %pM (%3u) via %pM (%3u)\n", tt_global_entry->addr, - tt_global_entry->orig_node->orig); + tt_global_entry->ttvn, + tt_global_entry->orig_node->orig, + (uint8_t) atomic_read( + &tt_global_entry->orig_node-> + last_ttvn)); } rcu_read_unlock(); } - spin_unlock_bh(&bat_priv->tt_ghash_lock); - seq_printf(seq, "%s", buff); kfree(buff); out: @@ -555,84 +631,994 @@ out: return ret; } -static void _tt_global_del_orig(struct bat_priv *bat_priv, - struct tt_global_entry *tt_global_entry, - char *message) +static void _tt_global_del(struct bat_priv *bat_priv, + struct tt_global_entry *tt_global_entry, + const char *message) { - bat_dbg(DBG_ROUTES, bat_priv, + if (!tt_global_entry) + goto out; + + bat_dbg(DBG_TT, bat_priv, "Deleting global tt entry %pM (via %pM): %s\n", tt_global_entry->addr, tt_global_entry->orig_node->orig, message); + atomic_dec(&tt_global_entry->orig_node->tt_size); + hash_remove(bat_priv->tt_global_hash, compare_gtt, choose_orig, tt_global_entry->addr); - kfree(tt_global_entry); +out: + if (tt_global_entry) + tt_global_entry_free_ref(tt_global_entry); } -void tt_global_del_orig(struct bat_priv *bat_priv, - struct orig_node *orig_node, char *message) +void tt_global_del(struct bat_priv *bat_priv, + struct orig_node *orig_node, const unsigned char *addr, + const char *message, bool roaming) { - struct tt_global_entry *tt_global_entry; - int tt_buff_count = 0; - unsigned char *tt_ptr; + struct tt_global_entry *tt_global_entry = NULL; - if (orig_node->tt_buff_len == 0) - return; + tt_global_entry = tt_global_hash_find(bat_priv, addr); + if (!tt_global_entry) + goto out; - spin_lock_bh(&bat_priv->tt_ghash_lock); + if (tt_global_entry->orig_node == orig_node) { + if (roaming) { + tt_global_entry->flags |= TT_CLIENT_ROAM; + tt_global_entry->roam_at = jiffies; + goto out; + } + _tt_global_del(bat_priv, tt_global_entry, message); + } +out: + if (tt_global_entry) + tt_global_entry_free_ref(tt_global_entry); +} - while ((tt_buff_count + 1) * ETH_ALEN <= orig_node->tt_buff_len) { - tt_ptr = orig_node->tt_buff + (tt_buff_count * ETH_ALEN); - tt_global_entry = tt_global_hash_find(bat_priv, tt_ptr); +void tt_global_del_orig(struct bat_priv *bat_priv, + struct orig_node *orig_node, const char *message) +{ + struct tt_global_entry *tt_global_entry; + int i; + struct hashtable_t *hash = bat_priv->tt_global_hash; + struct hlist_node *node, *safe; + struct hlist_head *head; + spinlock_t *list_lock; /* protects write access to the hash lists */ - if ((tt_global_entry) && - (tt_global_entry->orig_node == orig_node)) - _tt_global_del_orig(bat_priv, tt_global_entry, - message); + for (i = 0; i < hash->size; i++) { + head = &hash->table[i]; + list_lock = &hash->list_locks[i]; - tt_buff_count++; + spin_lock_bh(list_lock); + hlist_for_each_entry_safe(tt_global_entry, node, safe, + head, hash_entry) { + if (tt_global_entry->orig_node == orig_node) { + bat_dbg(DBG_TT, bat_priv, + "Deleting global tt entry %pM " + "(via %pM): originator time out\n", + tt_global_entry->addr, + tt_global_entry->orig_node->orig); + hlist_del_rcu(node); + tt_global_entry_free_ref(tt_global_entry); + } + } + spin_unlock_bh(list_lock); } - - spin_unlock_bh(&bat_priv->tt_ghash_lock); - - orig_node->tt_buff_len = 0; - kfree(orig_node->tt_buff); - orig_node->tt_buff = NULL; + atomic_set(&orig_node->tt_size, 0); } -static void tt_global_del(struct hlist_node *node, void *arg) +static void tt_global_roam_purge(struct bat_priv *bat_priv) { - void *data = container_of(node, struct tt_global_entry, hash_entry); + struct hashtable_t *hash = bat_priv->tt_global_hash; + struct tt_global_entry *tt_global_entry; + struct hlist_node *node, *node_tmp; + struct hlist_head *head; + spinlock_t *list_lock; /* protects write access to the hash lists */ + int i; + + for (i = 0; i < hash->size; i++) { + head = &hash->table[i]; + list_lock = &hash->list_locks[i]; + + spin_lock_bh(list_lock); + hlist_for_each_entry_safe(tt_global_entry, node, node_tmp, + head, hash_entry) { + if (!(tt_global_entry->flags & TT_CLIENT_ROAM)) + continue; + if (!is_out_of_time(tt_global_entry->roam_at, + TT_CLIENT_ROAM_TIMEOUT * 1000)) + continue; + + bat_dbg(DBG_TT, bat_priv, "Deleting global " + "tt entry (%pM): Roaming timeout\n", + tt_global_entry->addr); + atomic_dec(&tt_global_entry->orig_node->tt_size); + hlist_del_rcu(node); + tt_global_entry_free_ref(tt_global_entry); + } + spin_unlock_bh(list_lock); + } - kfree(data); } -void tt_global_free(struct bat_priv *bat_priv) +static void tt_global_table_free(struct bat_priv *bat_priv) { + struct hashtable_t *hash; + spinlock_t *list_lock; /* protects write access to the hash lists */ + struct tt_global_entry *tt_global_entry; + struct hlist_node *node, *node_tmp; + struct hlist_head *head; + int i; + if (!bat_priv->tt_global_hash) return; - hash_delete(bat_priv->tt_global_hash, tt_global_del, NULL); + hash = bat_priv->tt_global_hash; + + for (i = 0; i < hash->size; i++) { + head = &hash->table[i]; + list_lock = &hash->list_locks[i]; + + spin_lock_bh(list_lock); + hlist_for_each_entry_safe(tt_global_entry, node, node_tmp, + head, hash_entry) { + hlist_del_rcu(node); + tt_global_entry_free_ref(tt_global_entry); + } + spin_unlock_bh(list_lock); + } + + hash_destroy(hash); + bat_priv->tt_global_hash = NULL; } -struct orig_node *transtable_search(struct bat_priv *bat_priv, uint8_t *addr) +struct orig_node *transtable_search(struct bat_priv *bat_priv, + const uint8_t *addr) { struct tt_global_entry *tt_global_entry; struct orig_node *orig_node = NULL; - spin_lock_bh(&bat_priv->tt_ghash_lock); tt_global_entry = tt_global_hash_find(bat_priv, addr); if (!tt_global_entry) goto out; if (!atomic_inc_not_zero(&tt_global_entry->orig_node->refcount)) - goto out; + goto free_tt; orig_node = tt_global_entry->orig_node; +free_tt: + tt_global_entry_free_ref(tt_global_entry); out: - spin_unlock_bh(&bat_priv->tt_ghash_lock); return orig_node; } + +/* Calculates the checksum of the local table of a given orig_node */ +uint16_t tt_global_crc(struct bat_priv *bat_priv, struct orig_node *orig_node) +{ + uint16_t total = 0, total_one; + struct hashtable_t *hash = bat_priv->tt_global_hash; + struct tt_global_entry *tt_global_entry; + struct hlist_node *node; + struct hlist_head *head; + int i, j; + + for (i = 0; i < hash->size; i++) { + head = &hash->table[i]; + + rcu_read_lock(); + hlist_for_each_entry_rcu(tt_global_entry, node, + head, hash_entry) { + if (compare_eth(tt_global_entry->orig_node, + orig_node)) { + /* Roaming clients are in the global table for + * consistency only. They don't have to be + * taken into account while computing the + * global crc */ + if (tt_global_entry->flags & TT_CLIENT_ROAM) + continue; + total_one = 0; + for (j = 0; j < ETH_ALEN; j++) + total_one = crc16_byte(total_one, + tt_global_entry->addr[j]); + total ^= total_one; + } + } + rcu_read_unlock(); + } + + return total; +} + +/* Calculates the checksum of the local table */ +uint16_t tt_local_crc(struct bat_priv *bat_priv) +{ + uint16_t total = 0, total_one; + struct hashtable_t *hash = bat_priv->tt_local_hash; + struct tt_local_entry *tt_local_entry; + struct hlist_node *node; + struct hlist_head *head; + int i, j; + + for (i = 0; i < hash->size; i++) { + head = &hash->table[i]; + + rcu_read_lock(); + hlist_for_each_entry_rcu(tt_local_entry, node, + head, hash_entry) { + total_one = 0; + for (j = 0; j < ETH_ALEN; j++) + total_one = crc16_byte(total_one, + tt_local_entry->addr[j]); + total ^= total_one; + } + rcu_read_unlock(); + } + + return total; +} + +static void tt_req_list_free(struct bat_priv *bat_priv) +{ + struct tt_req_node *node, *safe; + + spin_lock_bh(&bat_priv->tt_req_list_lock); + + list_for_each_entry_safe(node, safe, &bat_priv->tt_req_list, list) { + list_del(&node->list); + kfree(node); + } + + spin_unlock_bh(&bat_priv->tt_req_list_lock); +} + +void tt_save_orig_buffer(struct bat_priv *bat_priv, struct orig_node *orig_node, + const unsigned char *tt_buff, uint8_t tt_num_changes) +{ + uint16_t tt_buff_len = tt_len(tt_num_changes); + + /* Replace the old buffer only if I received something in the + * last OGM (the OGM could carry no changes) */ + spin_lock_bh(&orig_node->tt_buff_lock); + if (tt_buff_len > 0) { + kfree(orig_node->tt_buff); + orig_node->tt_buff_len = 0; + orig_node->tt_buff = kmalloc(tt_buff_len, GFP_ATOMIC); + if (orig_node->tt_buff) { + memcpy(orig_node->tt_buff, tt_buff, tt_buff_len); + orig_node->tt_buff_len = tt_buff_len; + } + } + spin_unlock_bh(&orig_node->tt_buff_lock); +} + +static void tt_req_purge(struct bat_priv *bat_priv) +{ + struct tt_req_node *node, *safe; + + spin_lock_bh(&bat_priv->tt_req_list_lock); + list_for_each_entry_safe(node, safe, &bat_priv->tt_req_list, list) { + if (is_out_of_time(node->issued_at, + TT_REQUEST_TIMEOUT * 1000)) { + list_del(&node->list); + kfree(node); + } + } + spin_unlock_bh(&bat_priv->tt_req_list_lock); +} + +/* returns the pointer to the new tt_req_node struct if no request + * has already been issued for this orig_node, NULL otherwise */ +static struct tt_req_node *new_tt_req_node(struct bat_priv *bat_priv, + struct orig_node *orig_node) +{ + struct tt_req_node *tt_req_node_tmp, *tt_req_node = NULL; + + spin_lock_bh(&bat_priv->tt_req_list_lock); + list_for_each_entry(tt_req_node_tmp, &bat_priv->tt_req_list, list) { + if (compare_eth(tt_req_node_tmp, orig_node) && + !is_out_of_time(tt_req_node_tmp->issued_at, + TT_REQUEST_TIMEOUT * 1000)) + goto unlock; + } + + tt_req_node = kmalloc(sizeof(*tt_req_node), GFP_ATOMIC); + if (!tt_req_node) + goto unlock; + + memcpy(tt_req_node->addr, orig_node->orig, ETH_ALEN); + tt_req_node->issued_at = jiffies; + + list_add(&tt_req_node->list, &bat_priv->tt_req_list); +unlock: + spin_unlock_bh(&bat_priv->tt_req_list_lock); + return tt_req_node; +} + +static int tt_global_valid_entry(const void *entry_ptr, const void *data_ptr) +{ + const struct tt_global_entry *tt_global_entry = entry_ptr; + const struct orig_node *orig_node = data_ptr; + + if (tt_global_entry->flags & TT_CLIENT_ROAM) + return 0; + + return (tt_global_entry->orig_node == orig_node); +} + +static struct sk_buff *tt_response_fill_table(uint16_t tt_len, uint8_t ttvn, + struct hashtable_t *hash, + struct hard_iface *primary_if, + int (*valid_cb)(const void *, + const void *), + void *cb_data) +{ + struct tt_local_entry *tt_local_entry; + struct tt_query_packet *tt_response; + struct tt_change *tt_change; + struct hlist_node *node; + struct hlist_head *head; + struct sk_buff *skb = NULL; + uint16_t tt_tot, tt_count; + ssize_t tt_query_size = sizeof(struct tt_query_packet); + int i; + + if (tt_query_size + tt_len > primary_if->soft_iface->mtu) { + tt_len = primary_if->soft_iface->mtu - tt_query_size; + tt_len -= tt_len % sizeof(struct tt_change); + } + tt_tot = tt_len / sizeof(struct tt_change); + + skb = dev_alloc_skb(tt_query_size + tt_len + ETH_HLEN); + if (!skb) + goto out; + + skb_reserve(skb, ETH_HLEN); + tt_response = (struct tt_query_packet *)skb_put(skb, + tt_query_size + tt_len); + tt_response->ttvn = ttvn; + tt_response->tt_data = htons(tt_tot); + + tt_change = (struct tt_change *)(skb->data + tt_query_size); + tt_count = 0; + + rcu_read_lock(); + for (i = 0; i < hash->size; i++) { + head = &hash->table[i]; + + hlist_for_each_entry_rcu(tt_local_entry, node, + head, hash_entry) { + if (tt_count == tt_tot) + break; + + if ((valid_cb) && (!valid_cb(tt_local_entry, cb_data))) + continue; + + memcpy(tt_change->addr, tt_local_entry->addr, ETH_ALEN); + tt_change->flags = NO_FLAGS; + + tt_count++; + tt_change++; + } + } + rcu_read_unlock(); + +out: + return skb; +} + +int send_tt_request(struct bat_priv *bat_priv, struct orig_node *dst_orig_node, + uint8_t ttvn, uint16_t tt_crc, bool full_table) +{ + struct sk_buff *skb = NULL; + struct tt_query_packet *tt_request; + struct neigh_node *neigh_node = NULL; + struct hard_iface *primary_if; + struct tt_req_node *tt_req_node = NULL; + int ret = 1; + + primary_if = primary_if_get_selected(bat_priv); + if (!primary_if) + goto out; + + /* The new tt_req will be issued only if I'm not waiting for a + * reply from the same orig_node yet */ + tt_req_node = new_tt_req_node(bat_priv, dst_orig_node); + if (!tt_req_node) + goto out; + + skb = dev_alloc_skb(sizeof(struct tt_query_packet) + ETH_HLEN); + if (!skb) + goto out; + + skb_reserve(skb, ETH_HLEN); + + tt_request = (struct tt_query_packet *)skb_put(skb, + sizeof(struct tt_query_packet)); + + tt_request->packet_type = BAT_TT_QUERY; + tt_request->version = COMPAT_VERSION; + memcpy(tt_request->src, primary_if->net_dev->dev_addr, ETH_ALEN); + memcpy(tt_request->dst, dst_orig_node->orig, ETH_ALEN); + tt_request->ttl = TTL; + tt_request->ttvn = ttvn; + tt_request->tt_data = tt_crc; + tt_request->flags = TT_REQUEST; + + if (full_table) + tt_request->flags |= TT_FULL_TABLE; + + neigh_node = orig_node_get_router(dst_orig_node); + if (!neigh_node) + goto out; + + bat_dbg(DBG_TT, bat_priv, "Sending TT_REQUEST to %pM via %pM " + "[%c]\n", dst_orig_node->orig, neigh_node->addr, + (full_table ? 'F' : '.')); + + send_skb_packet(skb, neigh_node->if_incoming, neigh_node->addr); + ret = 0; + +out: + if (neigh_node) + neigh_node_free_ref(neigh_node); + if (primary_if) + hardif_free_ref(primary_if); + if (ret) + kfree_skb(skb); + if (ret && tt_req_node) { + spin_lock_bh(&bat_priv->tt_req_list_lock); + list_del(&tt_req_node->list); + spin_unlock_bh(&bat_priv->tt_req_list_lock); + kfree(tt_req_node); + } + return ret; +} + +static bool send_other_tt_response(struct bat_priv *bat_priv, + struct tt_query_packet *tt_request) +{ + struct orig_node *req_dst_orig_node = NULL, *res_dst_orig_node = NULL; + struct neigh_node *neigh_node = NULL; + struct hard_iface *primary_if = NULL; + uint8_t orig_ttvn, req_ttvn, ttvn; + int ret = false; + unsigned char *tt_buff; + bool full_table; + uint16_t tt_len, tt_tot; + struct sk_buff *skb = NULL; + struct tt_query_packet *tt_response; + + bat_dbg(DBG_TT, bat_priv, + "Received TT_REQUEST from %pM for " + "ttvn: %u (%pM) [%c]\n", tt_request->src, + tt_request->ttvn, tt_request->dst, + (tt_request->flags & TT_FULL_TABLE ? 'F' : '.')); + + /* Let's get the orig node of the REAL destination */ + req_dst_orig_node = get_orig_node(bat_priv, tt_request->dst); + if (!req_dst_orig_node) + goto out; + + res_dst_orig_node = get_orig_node(bat_priv, tt_request->src); + if (!res_dst_orig_node) + goto out; + + neigh_node = orig_node_get_router(res_dst_orig_node); + if (!neigh_node) + goto out; + + primary_if = primary_if_get_selected(bat_priv); + if (!primary_if) + goto out; + + orig_ttvn = (uint8_t)atomic_read(&req_dst_orig_node->last_ttvn); + req_ttvn = tt_request->ttvn; + + /* I have not the requested data */ + if (orig_ttvn != req_ttvn || + tt_request->tt_data != req_dst_orig_node->tt_crc) + goto out; + + /* If it has explicitly been requested the full table */ + if (tt_request->flags & TT_FULL_TABLE || + !req_dst_orig_node->tt_buff) + full_table = true; + else + full_table = false; + + /* In this version, fragmentation is not implemented, then + * I'll send only one packet with as much TT entries as I can */ + if (!full_table) { + spin_lock_bh(&req_dst_orig_node->tt_buff_lock); + tt_len = req_dst_orig_node->tt_buff_len; + tt_tot = tt_len / sizeof(struct tt_change); + + skb = dev_alloc_skb(sizeof(struct tt_query_packet) + + tt_len + ETH_HLEN); + if (!skb) + goto unlock; + + skb_reserve(skb, ETH_HLEN); + tt_response = (struct tt_query_packet *)skb_put(skb, + sizeof(struct tt_query_packet) + tt_len); + tt_response->ttvn = req_ttvn; + tt_response->tt_data = htons(tt_tot); + + tt_buff = skb->data + sizeof(struct tt_query_packet); + /* Copy the last orig_node's OGM buffer */ + memcpy(tt_buff, req_dst_orig_node->tt_buff, + req_dst_orig_node->tt_buff_len); + + spin_unlock_bh(&req_dst_orig_node->tt_buff_lock); + } else { + tt_len = (uint16_t)atomic_read(&req_dst_orig_node->tt_size) * + sizeof(struct tt_change); + ttvn = (uint8_t)atomic_read(&req_dst_orig_node->last_ttvn); + + skb = tt_response_fill_table(tt_len, ttvn, + bat_priv->tt_global_hash, + primary_if, tt_global_valid_entry, + req_dst_orig_node); + if (!skb) + goto out; + + tt_response = (struct tt_query_packet *)skb->data; + } + + tt_response->packet_type = BAT_TT_QUERY; + tt_response->version = COMPAT_VERSION; + tt_response->ttl = TTL; + memcpy(tt_response->src, req_dst_orig_node->orig, ETH_ALEN); + memcpy(tt_response->dst, tt_request->src, ETH_ALEN); + tt_response->flags = TT_RESPONSE; + + if (full_table) + tt_response->flags |= TT_FULL_TABLE; + + bat_dbg(DBG_TT, bat_priv, + "Sending TT_RESPONSE %pM via %pM for %pM (ttvn: %u)\n", + res_dst_orig_node->orig, neigh_node->addr, + req_dst_orig_node->orig, req_ttvn); + + send_skb_packet(skb, neigh_node->if_incoming, neigh_node->addr); + ret = true; + goto out; + +unlock: + spin_unlock_bh(&req_dst_orig_node->tt_buff_lock); + +out: + if (res_dst_orig_node) + orig_node_free_ref(res_dst_orig_node); + if (req_dst_orig_node) + orig_node_free_ref(req_dst_orig_node); + if (neigh_node) + neigh_node_free_ref(neigh_node); + if (primary_if) + hardif_free_ref(primary_if); + if (!ret) + kfree_skb(skb); + return ret; + +} +static bool send_my_tt_response(struct bat_priv *bat_priv, + struct tt_query_packet *tt_request) +{ + struct orig_node *orig_node = NULL; + struct neigh_node *neigh_node = NULL; + struct hard_iface *primary_if = NULL; + uint8_t my_ttvn, req_ttvn, ttvn; + int ret = false; + unsigned char *tt_buff; + bool full_table; + uint16_t tt_len, tt_tot; + struct sk_buff *skb = NULL; + struct tt_query_packet *tt_response; + + bat_dbg(DBG_TT, bat_priv, + "Received TT_REQUEST from %pM for " + "ttvn: %u (me) [%c]\n", tt_request->src, + tt_request->ttvn, + (tt_request->flags & TT_FULL_TABLE ? 'F' : '.')); + + + my_ttvn = (uint8_t)atomic_read(&bat_priv->ttvn); + req_ttvn = tt_request->ttvn; + + orig_node = get_orig_node(bat_priv, tt_request->src); + if (!orig_node) + goto out; + + neigh_node = orig_node_get_router(orig_node); + if (!neigh_node) + goto out; + + primary_if = primary_if_get_selected(bat_priv); + if (!primary_if) + goto out; + + /* If the full table has been explicitly requested or the gap + * is too big send the whole local translation table */ + if (tt_request->flags & TT_FULL_TABLE || my_ttvn != req_ttvn || + !bat_priv->tt_buff) + full_table = true; + else + full_table = false; + + /* In this version, fragmentation is not implemented, then + * I'll send only one packet with as much TT entries as I can */ + if (!full_table) { + spin_lock_bh(&bat_priv->tt_buff_lock); + tt_len = bat_priv->tt_buff_len; + tt_tot = tt_len / sizeof(struct tt_change); + + skb = dev_alloc_skb(sizeof(struct tt_query_packet) + + tt_len + ETH_HLEN); + if (!skb) + goto unlock; + + skb_reserve(skb, ETH_HLEN); + tt_response = (struct tt_query_packet *)skb_put(skb, + sizeof(struct tt_query_packet) + tt_len); + tt_response->ttvn = req_ttvn; + tt_response->tt_data = htons(tt_tot); + + tt_buff = skb->data + sizeof(struct tt_query_packet); + memcpy(tt_buff, bat_priv->tt_buff, + bat_priv->tt_buff_len); + spin_unlock_bh(&bat_priv->tt_buff_lock); + } else { + tt_len = (uint16_t)atomic_read(&bat_priv->num_local_tt) * + sizeof(struct tt_change); + ttvn = (uint8_t)atomic_read(&bat_priv->ttvn); + + skb = tt_response_fill_table(tt_len, ttvn, + bat_priv->tt_local_hash, + primary_if, NULL, NULL); + if (!skb) + goto out; + + tt_response = (struct tt_query_packet *)skb->data; + } + + tt_response->packet_type = BAT_TT_QUERY; + tt_response->version = COMPAT_VERSION; + tt_response->ttl = TTL; + memcpy(tt_response->src, primary_if->net_dev->dev_addr, ETH_ALEN); + memcpy(tt_response->dst, tt_request->src, ETH_ALEN); + tt_response->flags = TT_RESPONSE; + + if (full_table) + tt_response->flags |= TT_FULL_TABLE; + + bat_dbg(DBG_TT, bat_priv, + "Sending TT_RESPONSE to %pM via %pM [%c]\n", + orig_node->orig, neigh_node->addr, + (tt_response->flags & TT_FULL_TABLE ? 'F' : '.')); + + send_skb_packet(skb, neigh_node->if_incoming, neigh_node->addr); + ret = true; + goto out; + +unlock: + spin_unlock_bh(&bat_priv->tt_buff_lock); +out: + if (orig_node) + orig_node_free_ref(orig_node); + if (neigh_node) + neigh_node_free_ref(neigh_node); + if (primary_if) + hardif_free_ref(primary_if); + if (!ret) + kfree_skb(skb); + /* This packet was for me, so it doesn't need to be re-routed */ + return true; +} + +bool send_tt_response(struct bat_priv *bat_priv, + struct tt_query_packet *tt_request) +{ + if (is_my_mac(tt_request->dst)) + return send_my_tt_response(bat_priv, tt_request); + else + return send_other_tt_response(bat_priv, tt_request); +} + +static void _tt_update_changes(struct bat_priv *bat_priv, + struct orig_node *orig_node, + struct tt_change *tt_change, + uint16_t tt_num_changes, uint8_t ttvn) +{ + int i; + + for (i = 0; i < tt_num_changes; i++) { + if ((tt_change + i)->flags & TT_CHANGE_DEL) + tt_global_del(bat_priv, orig_node, + (tt_change + i)->addr, + "tt removed by changes", + (tt_change + i)->flags & TT_CLIENT_ROAM); + else + if (!tt_global_add(bat_priv, orig_node, + (tt_change + i)->addr, ttvn, false)) + /* In case of problem while storing a + * global_entry, we stop the updating + * procedure without committing the + * ttvn change. This will avoid to send + * corrupted data on tt_request + */ + return; + } +} + +static void tt_fill_gtable(struct bat_priv *bat_priv, + struct tt_query_packet *tt_response) +{ + struct orig_node *orig_node = NULL; + + orig_node = orig_hash_find(bat_priv, tt_response->src); + if (!orig_node) + goto out; + + /* Purge the old table first.. */ + tt_global_del_orig(bat_priv, orig_node, "Received full table"); + + _tt_update_changes(bat_priv, orig_node, + (struct tt_change *)(tt_response + 1), + tt_response->tt_data, tt_response->ttvn); + + spin_lock_bh(&orig_node->tt_buff_lock); + kfree(orig_node->tt_buff); + orig_node->tt_buff_len = 0; + orig_node->tt_buff = NULL; + spin_unlock_bh(&orig_node->tt_buff_lock); + + atomic_set(&orig_node->last_ttvn, tt_response->ttvn); + +out: + if (orig_node) + orig_node_free_ref(orig_node); +} + +void tt_update_changes(struct bat_priv *bat_priv, struct orig_node *orig_node, + uint16_t tt_num_changes, uint8_t ttvn, + struct tt_change *tt_change) +{ + _tt_update_changes(bat_priv, orig_node, tt_change, tt_num_changes, + ttvn); + + tt_save_orig_buffer(bat_priv, orig_node, (unsigned char *)tt_change, + tt_num_changes); + atomic_set(&orig_node->last_ttvn, ttvn); +} + +bool is_my_client(struct bat_priv *bat_priv, const uint8_t *addr) +{ + struct tt_local_entry *tt_local_entry = NULL; + bool ret = false; + + tt_local_entry = tt_local_hash_find(bat_priv, addr); + if (!tt_local_entry) + goto out; + ret = true; +out: + if (tt_local_entry) + tt_local_entry_free_ref(tt_local_entry); + return ret; +} + +void handle_tt_response(struct bat_priv *bat_priv, + struct tt_query_packet *tt_response) +{ + struct tt_req_node *node, *safe; + struct orig_node *orig_node = NULL; + + bat_dbg(DBG_TT, bat_priv, "Received TT_RESPONSE from %pM for " + "ttvn %d t_size: %d [%c]\n", + tt_response->src, tt_response->ttvn, + tt_response->tt_data, + (tt_response->flags & TT_FULL_TABLE ? 'F' : '.')); + + orig_node = orig_hash_find(bat_priv, tt_response->src); + if (!orig_node) + goto out; + + if (tt_response->flags & TT_FULL_TABLE) + tt_fill_gtable(bat_priv, tt_response); + else + tt_update_changes(bat_priv, orig_node, tt_response->tt_data, + tt_response->ttvn, + (struct tt_change *)(tt_response + 1)); + + /* Delete the tt_req_node from pending tt_requests list */ + spin_lock_bh(&bat_priv->tt_req_list_lock); + list_for_each_entry_safe(node, safe, &bat_priv->tt_req_list, list) { + if (!compare_eth(node->addr, tt_response->src)) + continue; + list_del(&node->list); + kfree(node); + } + spin_unlock_bh(&bat_priv->tt_req_list_lock); + + /* Recalculate the CRC for this orig_node and store it */ + orig_node->tt_crc = tt_global_crc(bat_priv, orig_node); + /* Roaming phase is over: tables are in sync again. I can + * unset the flag */ + orig_node->tt_poss_change = false; +out: + if (orig_node) + orig_node_free_ref(orig_node); +} + +int tt_init(struct bat_priv *bat_priv) +{ + if (!tt_local_init(bat_priv)) + return 0; + + if (!tt_global_init(bat_priv)) + return 0; + + tt_start_timer(bat_priv); + + return 1; +} + +static void tt_roam_list_free(struct bat_priv *bat_priv) +{ + struct tt_roam_node *node, *safe; + + spin_lock_bh(&bat_priv->tt_roam_list_lock); + + list_for_each_entry_safe(node, safe, &bat_priv->tt_roam_list, list) { + list_del(&node->list); + kfree(node); + } + + spin_unlock_bh(&bat_priv->tt_roam_list_lock); +} + +static void tt_roam_purge(struct bat_priv *bat_priv) +{ + struct tt_roam_node *node, *safe; + + spin_lock_bh(&bat_priv->tt_roam_list_lock); + list_for_each_entry_safe(node, safe, &bat_priv->tt_roam_list, list) { + if (!is_out_of_time(node->first_time, + ROAMING_MAX_TIME * 1000)) + continue; + + list_del(&node->list); + kfree(node); + } + spin_unlock_bh(&bat_priv->tt_roam_list_lock); +} + +/* This function checks whether the client already reached the + * maximum number of possible roaming phases. In this case the ROAMING_ADV + * will not be sent. + * + * returns true if the ROAMING_ADV can be sent, false otherwise */ +static bool tt_check_roam_count(struct bat_priv *bat_priv, + uint8_t *client) +{ + struct tt_roam_node *tt_roam_node; + bool ret = false; + + spin_lock_bh(&bat_priv->tt_roam_list_lock); + /* The new tt_req will be issued only if I'm not waiting for a + * reply from the same orig_node yet */ + list_for_each_entry(tt_roam_node, &bat_priv->tt_roam_list, list) { + if (!compare_eth(tt_roam_node->addr, client)) + continue; + + if (is_out_of_time(tt_roam_node->first_time, + ROAMING_MAX_TIME * 1000)) + continue; + + if (!atomic_dec_not_zero(&tt_roam_node->counter)) + /* Sorry, you roamed too many times! */ + goto unlock; + ret = true; + break; + } + + if (!ret) { + tt_roam_node = kmalloc(sizeof(*tt_roam_node), GFP_ATOMIC); + if (!tt_roam_node) + goto unlock; + + tt_roam_node->first_time = jiffies; + atomic_set(&tt_roam_node->counter, ROAMING_MAX_COUNT - 1); + memcpy(tt_roam_node->addr, client, ETH_ALEN); + + list_add(&tt_roam_node->list, &bat_priv->tt_roam_list); + ret = true; + } + +unlock: + spin_unlock_bh(&bat_priv->tt_roam_list_lock); + return ret; +} + +void send_roam_adv(struct bat_priv *bat_priv, uint8_t *client, + struct orig_node *orig_node) +{ + struct neigh_node *neigh_node = NULL; + struct sk_buff *skb = NULL; + struct roam_adv_packet *roam_adv_packet; + int ret = 1; + struct hard_iface *primary_if; + + /* before going on we have to check whether the client has + * already roamed to us too many times */ + if (!tt_check_roam_count(bat_priv, client)) + goto out; + + skb = dev_alloc_skb(sizeof(struct roam_adv_packet) + ETH_HLEN); + if (!skb) + goto out; + + skb_reserve(skb, ETH_HLEN); + + roam_adv_packet = (struct roam_adv_packet *)skb_put(skb, + sizeof(struct roam_adv_packet)); + + roam_adv_packet->packet_type = BAT_ROAM_ADV; + roam_adv_packet->version = COMPAT_VERSION; + roam_adv_packet->ttl = TTL; + primary_if = primary_if_get_selected(bat_priv); + if (!primary_if) + goto out; + memcpy(roam_adv_packet->src, primary_if->net_dev->dev_addr, ETH_ALEN); + hardif_free_ref(primary_if); + memcpy(roam_adv_packet->dst, orig_node->orig, ETH_ALEN); + memcpy(roam_adv_packet->client, client, ETH_ALEN); + + neigh_node = orig_node_get_router(orig_node); + if (!neigh_node) + goto out; + + bat_dbg(DBG_TT, bat_priv, + "Sending ROAMING_ADV to %pM (client %pM) via %pM\n", + orig_node->orig, client, neigh_node->addr); + + send_skb_packet(skb, neigh_node->if_incoming, neigh_node->addr); + ret = 0; + +out: + if (neigh_node) + neigh_node_free_ref(neigh_node); + if (ret) + kfree_skb(skb); + return; +} + +static void tt_purge(struct work_struct *work) +{ + struct delayed_work *delayed_work = + container_of(work, struct delayed_work, work); + struct bat_priv *bat_priv = + container_of(delayed_work, struct bat_priv, tt_work); + + tt_local_purge(bat_priv); + tt_global_roam_purge(bat_priv); + tt_req_purge(bat_priv); + tt_roam_purge(bat_priv); + + tt_start_timer(bat_priv); +} + +void tt_free(struct bat_priv *bat_priv) +{ + cancel_delayed_work_sync(&bat_priv->tt_work); + + tt_local_table_free(bat_priv); + tt_global_table_free(bat_priv); + tt_req_list_free(bat_priv); + tt_changes_list_free(bat_priv); + tt_roam_list_free(bat_priv); + + kfree(bat_priv->tt_buff); +} diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h index 46152c3..1cd2d39 100644 --- a/net/batman-adv/translation-table.h +++ b/net/batman-adv/translation-table.h @@ -22,22 +22,45 @@ #ifndef _NET_BATMAN_ADV_TRANSLATION_TABLE_H_ #define _NET_BATMAN_ADV_TRANSLATION_TABLE_H_ -int tt_local_init(struct bat_priv *bat_priv); -void tt_local_add(struct net_device *soft_iface, uint8_t *addr); +int tt_len(int changes_num); +int tt_changes_fill_buffer(struct bat_priv *bat_priv, + unsigned char *buff, int buff_len); +int tt_init(struct bat_priv *bat_priv); +void tt_local_add(struct net_device *soft_iface, const uint8_t *addr); void tt_local_remove(struct bat_priv *bat_priv, - uint8_t *addr, char *message); -int tt_local_fill_buffer(struct bat_priv *bat_priv, - unsigned char *buff, int buff_len); + const uint8_t *addr, const char *message, bool roaming); int tt_local_seq_print_text(struct seq_file *seq, void *offset); -void tt_local_free(struct bat_priv *bat_priv); -int tt_global_init(struct bat_priv *bat_priv); void tt_global_add_orig(struct bat_priv *bat_priv, - struct orig_node *orig_node, - unsigned char *tt_buff, int tt_buff_len); + struct orig_node *orig_node, + const unsigned char *tt_buff, int tt_buff_len); +int tt_global_add(struct bat_priv *bat_priv, + struct orig_node *orig_node, const unsigned char *addr, + uint8_t ttvn, bool roaming); int tt_global_seq_print_text(struct seq_file *seq, void *offset); void tt_global_del_orig(struct bat_priv *bat_priv, - struct orig_node *orig_node, char *message); -void tt_global_free(struct bat_priv *bat_priv); -struct orig_node *transtable_search(struct bat_priv *bat_priv, uint8_t *addr); + struct orig_node *orig_node, const char *message); +void tt_global_del(struct bat_priv *bat_priv, + struct orig_node *orig_node, const unsigned char *addr, + const char *message, bool roaming); +struct orig_node *transtable_search(struct bat_priv *bat_priv, + const uint8_t *addr); +void tt_save_orig_buffer(struct bat_priv *bat_priv, struct orig_node *orig_node, + const unsigned char *tt_buff, uint8_t tt_num_changes); +uint16_t tt_local_crc(struct bat_priv *bat_priv); +uint16_t tt_global_crc(struct bat_priv *bat_priv, struct orig_node *orig_node); +void tt_free(struct bat_priv *bat_priv); +int send_tt_request(struct bat_priv *bat_priv, + struct orig_node *dst_orig_node, uint8_t hvn, + uint16_t tt_crc, bool full_table); +bool send_tt_response(struct bat_priv *bat_priv, + struct tt_query_packet *tt_request); +void tt_update_changes(struct bat_priv *bat_priv, struct orig_node *orig_node, + uint16_t tt_num_changes, uint8_t ttvn, + struct tt_change *tt_change); +bool is_my_client(struct bat_priv *bat_priv, const uint8_t *addr); +void handle_tt_response(struct bat_priv *bat_priv, + struct tt_query_packet *tt_response); +void send_roam_adv(struct bat_priv *bat_priv, uint8_t *client, + struct orig_node *orig_node); #endif /* _NET_BATMAN_ADV_TRANSLATION_TABLE_H_ */ diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index fab70e8..85cf1224 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -75,8 +75,18 @@ struct orig_node { unsigned long batman_seqno_reset; uint8_t gw_flags; uint8_t flags; + atomic_t last_ttvn; /* last seen translation table version number */ + uint16_t tt_crc; unsigned char *tt_buff; int16_t tt_buff_len; + spinlock_t tt_buff_lock; /* protects tt_buff */ + atomic_t tt_size; + /* The tt_poss_change flag is used to detect an ongoing roaming phase. + * If true, then I sent a Roaming_adv to this orig_node and I have to + * inspect every packet directed to it to check whether it is still + * the true destination or not. This flag will be reset to false as + * soon as I receive a new TTVN from this orig_node */ + bool tt_poss_change; uint32_t last_real_seqno; uint8_t last_ttl; unsigned long bcast_bits[NUM_WORDS]; @@ -94,6 +104,7 @@ struct orig_node { spinlock_t ogm_cnt_lock; /* bcast_seqno_lock protects bcast_bits, last_bcast_seqno */ spinlock_t bcast_seqno_lock; + spinlock_t tt_list_lock; /* protects tt_list */ atomic_t bond_candidates; struct list_head bond_list; }; @@ -145,6 +156,15 @@ struct bat_priv { atomic_t bcast_seqno; atomic_t bcast_queue_left; atomic_t batman_queue_left; + atomic_t ttvn; /* tranlation table version number */ + atomic_t tt_ogm_append_cnt; + atomic_t tt_local_changes; /* changes registered in a OGM interval */ + /* The tt_poss_change flag is used to detect an ongoing roaming phase. + * If true, then I received a Roaming_adv and I have to inspect every + * packet directed to me to check whether I am still the true + * destination or not. This flag will be reset to false as soon as I + * increase my TTVN */ + bool tt_poss_change; char num_ifaces; struct debug_log *debug_log; struct kobject *mesh_obj; @@ -153,26 +173,35 @@ struct bat_priv { struct hlist_head forw_bcast_list; struct hlist_head gw_list; struct hlist_head softif_neigh_vids; + struct list_head tt_changes_list; /* tracks changes in a OGM int */ struct list_head vis_send_list; struct hashtable_t *orig_hash; struct hashtable_t *tt_local_hash; struct hashtable_t *tt_global_hash; + struct list_head tt_req_list; /* list of pending tt_requests */ + struct list_head tt_roam_list; struct hashtable_t *vis_hash; spinlock_t forw_bat_list_lock; /* protects forw_bat_list */ spinlock_t forw_bcast_list_lock; /* protects */ - spinlock_t tt_lhash_lock; /* protects tt_local_hash */ - spinlock_t tt_ghash_lock; /* protects tt_global_hash */ + spinlock_t tt_changes_list_lock; /* protects tt_changes */ + spinlock_t tt_req_list_lock; /* protects tt_req_list */ + spinlock_t tt_roam_list_lock; /* protects tt_roam_list */ spinlock_t gw_list_lock; /* protects gw_list and curr_gw */ spinlock_t vis_hash_lock; /* protects vis_hash */ spinlock_t vis_list_lock; /* protects vis_info::recv_list */ spinlock_t softif_neigh_lock; /* protects soft-interface neigh list */ spinlock_t softif_neigh_vid_lock; /* protects soft-interface vid list */ - int16_t num_local_tt; - atomic_t tt_local_changed; + atomic_t num_local_tt; + /* Checksum of the local table, recomputed before sending a new OGM */ + atomic_t tt_crc; + unsigned char *tt_buff; + int16_t tt_buff_len; + spinlock_t tt_buff_lock; /* protects tt_buff */ struct delayed_work tt_work; struct delayed_work orig_work; struct delayed_work vis_work; struct gw_node __rcu *curr_gw; /* rcu protected pointer */ + atomic_t gw_reselect; struct hard_iface __rcu *primary_if; /* rcu protected pointer */ struct vis_info *my_vis_info; }; @@ -196,13 +225,38 @@ struct tt_local_entry { uint8_t addr[ETH_ALEN]; unsigned long last_seen; char never_purge; + atomic_t refcount; + struct rcu_head rcu; struct hlist_node hash_entry; }; struct tt_global_entry { uint8_t addr[ETH_ALEN]; struct orig_node *orig_node; - struct hlist_node hash_entry; + uint8_t ttvn; + uint8_t flags; /* only TT_GLOBAL_ROAM is used */ + unsigned long roam_at; /* time at which TT_GLOBAL_ROAM was set */ + atomic_t refcount; + struct rcu_head rcu; + struct hlist_node hash_entry; /* entry in the global table */ +}; + +struct tt_change_node { + struct list_head list; + struct tt_change change; +}; + +struct tt_req_node { + uint8_t addr[ETH_ALEN]; + unsigned long issued_at; + struct list_head list; +}; + +struct tt_roam_node { + uint8_t addr[ETH_ALEN]; + atomic_t counter; + unsigned long first_time; + struct list_head list; }; /** @@ -246,10 +300,10 @@ struct frag_packet_list_entry { }; struct vis_info { - unsigned long first_seen; - struct list_head recv_list; - /* list of server-neighbors we received a vis-packet - * from. we should not reply to them. */ + unsigned long first_seen; + /* list of server-neighbors we received a vis-packet + * from. we should not reply to them. */ + struct list_head recv_list; struct list_head send_list; struct kref refcount; struct hlist_node hash_entry; diff --git a/net/batman-adv/unicast.c b/net/batman-adv/unicast.c index 19c3daf..32b125f 100644 --- a/net/batman-adv/unicast.c +++ b/net/batman-adv/unicast.c @@ -39,8 +39,8 @@ static struct sk_buff *frag_merge_packet(struct list_head *head, (struct unicast_frag_packet *)skb->data; struct sk_buff *tmp_skb; struct unicast_packet *unicast_packet; - int hdr_len = sizeof(struct unicast_packet); - int uni_diff = sizeof(struct unicast_frag_packet) - hdr_len; + int hdr_len = sizeof(*unicast_packet); + int uni_diff = sizeof(*up) - hdr_len; /* set skb to the first part and tmp_skb to the second part */ if (up->flags & UNI_FRAG_HEAD) { @@ -53,7 +53,7 @@ static struct sk_buff *frag_merge_packet(struct list_head *head, if (skb_linearize(skb) < 0 || skb_linearize(tmp_skb) < 0) goto err; - skb_pull(tmp_skb, sizeof(struct unicast_frag_packet)); + skb_pull(tmp_skb, sizeof(*up)); if (pskb_expand_head(skb, 0, tmp_skb->len, GFP_ATOMIC) < 0) goto err; @@ -99,8 +99,7 @@ static int frag_create_buffer(struct list_head *head) struct frag_packet_list_entry *tfp; for (i = 0; i < FRAG_BUFFER_SIZE; i++) { - tfp = kmalloc(sizeof(struct frag_packet_list_entry), - GFP_ATOMIC); + tfp = kmalloc(sizeof(*tfp), GFP_ATOMIC); if (!tfp) { frag_list_free(head); return -ENOMEM; @@ -115,7 +114,7 @@ static int frag_create_buffer(struct list_head *head) } static struct frag_packet_list_entry *frag_search_packet(struct list_head *head, - struct unicast_frag_packet *up) + const struct unicast_frag_packet *up) { struct frag_packet_list_entry *tfp; struct unicast_frag_packet *tmp_up = NULL; @@ -218,14 +217,14 @@ out: } int frag_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv, - struct hard_iface *hard_iface, uint8_t dstaddr[]) + struct hard_iface *hard_iface, const uint8_t dstaddr[]) { struct unicast_packet tmp_uc, *unicast_packet; struct hard_iface *primary_if; struct sk_buff *frag_skb; struct unicast_frag_packet *frag1, *frag2; - int uc_hdr_len = sizeof(struct unicast_packet); - int ucf_hdr_len = sizeof(struct unicast_frag_packet); + int uc_hdr_len = sizeof(*unicast_packet); + int ucf_hdr_len = sizeof(*frag1); int data_len = skb->len - uc_hdr_len; int large_tail = 0, ret = NET_RX_DROP; uint16_t seqno; @@ -250,14 +249,14 @@ int frag_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv, frag1 = (struct unicast_frag_packet *)skb->data; frag2 = (struct unicast_frag_packet *)frag_skb->data; - memcpy(frag1, &tmp_uc, sizeof(struct unicast_packet)); + memcpy(frag1, &tmp_uc, sizeof(tmp_uc)); frag1->ttl--; frag1->version = COMPAT_VERSION; frag1->packet_type = BAT_UNICAST_FRAG; memcpy(frag1->orig, primary_if->net_dev->dev_addr, ETH_ALEN); - memcpy(frag2, frag1, sizeof(struct unicast_frag_packet)); + memcpy(frag2, frag1, sizeof(*frag2)); if (data_len & 1) large_tail = UNI_FRAG_LARGETAIL; @@ -295,7 +294,7 @@ int unicast_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv) /* get routing information */ if (is_multicast_ether_addr(ethhdr->h_dest)) { - orig_node = (struct orig_node *)gw_get_selected_orig(bat_priv); + orig_node = gw_get_selected_orig(bat_priv); if (orig_node) goto find_router; } @@ -314,10 +313,7 @@ find_router: if (!neigh_node) goto out; - if (neigh_node->if_incoming->if_status != IF_ACTIVE) - goto out; - - if (my_skb_head_push(skb, sizeof(struct unicast_packet)) < 0) + if (my_skb_head_push(skb, sizeof(*unicast_packet)) < 0) goto out; unicast_packet = (struct unicast_packet *)skb->data; @@ -329,9 +325,12 @@ find_router: unicast_packet->ttl = TTL; /* copy the destination for faster routing */ memcpy(unicast_packet->dest, orig_node->orig, ETH_ALEN); + /* set the destination tt version number */ + unicast_packet->ttvn = + (uint8_t)atomic_read(&orig_node->last_ttvn); if (atomic_read(&bat_priv->fragmentation) && - data_len + sizeof(struct unicast_packet) > + data_len + sizeof(*unicast_packet) > neigh_node->if_incoming->net_dev->mtu) { /* send frag skb decreases ttl */ unicast_packet->ttl++; diff --git a/net/batman-adv/unicast.h b/net/batman-adv/unicast.h index 16ad7a9..62f54b9 100644 --- a/net/batman-adv/unicast.h +++ b/net/batman-adv/unicast.h @@ -32,11 +32,11 @@ int frag_reassemble_skb(struct sk_buff *skb, struct bat_priv *bat_priv, void frag_list_free(struct list_head *head); int unicast_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv); int frag_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv, - struct hard_iface *hard_iface, uint8_t dstaddr[]); + struct hard_iface *hard_iface, const uint8_t dstaddr[]); -static inline int frag_can_reassemble(struct sk_buff *skb, int mtu) +static inline int frag_can_reassemble(const struct sk_buff *skb, int mtu) { - struct unicast_frag_packet *unicast_packet; + const struct unicast_frag_packet *unicast_packet; int uneven_correction = 0; unsigned int merged_size; @@ -49,7 +49,7 @@ static inline int frag_can_reassemble(struct sk_buff *skb, int mtu) uneven_correction = -1; } - merged_size = (skb->len - sizeof(struct unicast_frag_packet)) * 2; + merged_size = (skb->len - sizeof(*unicast_packet)) * 2; merged_size += sizeof(struct unicast_packet) + uneven_correction; return merged_size <= mtu; diff --git a/net/batman-adv/vis.c b/net/batman-adv/vis.c index c39f20c..8a1b985 100644 --- a/net/batman-adv/vis.c +++ b/net/batman-adv/vis.c @@ -30,22 +30,6 @@ #define MAX_VIS_PACKET_SIZE 1000 -/* Returns the smallest signed integer in two's complement with the sizeof x */ -#define smallest_signed_int(x) (1u << (7u + 8u * (sizeof(x) - 1u))) - -/* Checks if a sequence number x is a predecessor/successor of y. - * they handle overflows/underflows and can correctly check for a - * predecessor/successor unless the variable sequence number has grown by - * more then 2**(bitwidth(x)-1)-1. - * This means that for a uint8_t with the maximum value 255, it would think: - * - when adding nothing - it is neither a predecessor nor a successor - * - before adding more than 127 to the starting value - it is a predecessor, - * - when adding 128 - it is neither a predecessor nor a successor, - * - after adding more than 127 to the starting value - it is a successor */ -#define seq_before(x, y) ({typeof(x) _dummy = (x - y); \ - _dummy > smallest_signed_int(_dummy); }) -#define seq_after(x, y) seq_before(y, x) - static void start_vis_timer(struct bat_priv *bat_priv); /* free the info */ @@ -68,10 +52,10 @@ static void free_info(struct kref *ref) } /* Compare two vis packets, used by the hashing algorithm */ -static int vis_info_cmp(struct hlist_node *node, void *data2) +static int vis_info_cmp(const struct hlist_node *node, const void *data2) { - struct vis_info *d1, *d2; - struct vis_packet *p1, *p2; + const struct vis_info *d1, *d2; + const struct vis_packet *p1, *p2; d1 = container_of(node, struct vis_info, hash_entry); d2 = data2; @@ -82,11 +66,11 @@ static int vis_info_cmp(struct hlist_node *node, void *data2) /* hash function to choose an entry in a hash table of given size */ /* hash algorithm from http://en.wikipedia.org/wiki/Hash_table */ -static int vis_info_choose(void *data, int size) +static int vis_info_choose(const void *data, int size) { - struct vis_info *vis_info = data; - struct vis_packet *packet; - unsigned char *key; + const struct vis_info *vis_info = data; + const struct vis_packet *packet; + const unsigned char *key; uint32_t hash = 0; size_t i; @@ -106,7 +90,7 @@ static int vis_info_choose(void *data, int size) } static struct vis_info *vis_hash_find(struct bat_priv *bat_priv, - void *data) + const void *data) { struct hashtable_t *hash = bat_priv->vis_hash; struct hlist_head *head; @@ -143,7 +127,7 @@ static void vis_data_insert_interface(const uint8_t *interface, struct hlist_node *pos; hlist_for_each_entry(entry, pos, if_list, list) { - if (compare_eth(entry->addr, (void *)interface)) + if (compare_eth(entry->addr, interface)) return; } @@ -156,7 +140,8 @@ static void vis_data_insert_interface(const uint8_t *interface, hlist_add_head(&entry->list, if_list); } -static ssize_t vis_data_read_prim_sec(char *buff, struct hlist_head *if_list) +static ssize_t vis_data_read_prim_sec(char *buff, + const struct hlist_head *if_list) { struct if_list_entry *entry; struct hlist_node *pos; @@ -189,8 +174,9 @@ static size_t vis_data_count_prim_sec(struct hlist_head *if_list) } /* read an entry */ -static ssize_t vis_data_read_entry(char *buff, struct vis_info_entry *entry, - uint8_t *src, bool primary) +static ssize_t vis_data_read_entry(char *buff, + const struct vis_info_entry *entry, + const uint8_t *src, bool primary) { /* maximal length: max(4+17+2, 3+17+1+3+2) == 26 */ if (primary && entry->quality == 0) @@ -239,7 +225,7 @@ int vis_seq_print_text(struct seq_file *seq, void *offset) hlist_for_each_entry_rcu(info, node, head, hash_entry) { packet = (struct vis_packet *)info->skb_packet->data; entries = (struct vis_info_entry *) - ((char *)packet + sizeof(struct vis_packet)); + ((char *)packet + sizeof(*packet)); for (j = 0; j < packet->entries; j++) { if (entries[j].quality == 0) @@ -287,7 +273,7 @@ int vis_seq_print_text(struct seq_file *seq, void *offset) hlist_for_each_entry_rcu(info, node, head, hash_entry) { packet = (struct vis_packet *)info->skb_packet->data; entries = (struct vis_info_entry *) - ((char *)packet + sizeof(struct vis_packet)); + ((char *)packet + sizeof(*packet)); for (j = 0; j < packet->entries; j++) { if (entries[j].quality == 0) @@ -361,11 +347,11 @@ static void send_list_del(struct vis_info *info) /* tries to add one entry to the receive list. */ static void recv_list_add(struct bat_priv *bat_priv, - struct list_head *recv_list, char *mac) + struct list_head *recv_list, const char *mac) { struct recvlist_node *entry; - entry = kmalloc(sizeof(struct recvlist_node), GFP_ATOMIC); + entry = kmalloc(sizeof(*entry), GFP_ATOMIC); if (!entry) return; @@ -377,9 +363,9 @@ static void recv_list_add(struct bat_priv *bat_priv, /* returns 1 if this mac is in the recv_list */ static int recv_list_is_in(struct bat_priv *bat_priv, - struct list_head *recv_list, char *mac) + const struct list_head *recv_list, const char *mac) { - struct recvlist_node *entry; + const struct recvlist_node *entry; spin_lock_bh(&bat_priv->vis_list_lock); list_for_each_entry(entry, recv_list, list) { @@ -412,11 +398,11 @@ static struct vis_info *add_packet(struct bat_priv *bat_priv, return NULL; /* see if the packet is already in vis_hash */ - search_elem.skb_packet = dev_alloc_skb(sizeof(struct vis_packet)); + search_elem.skb_packet = dev_alloc_skb(sizeof(*search_packet)); if (!search_elem.skb_packet) return NULL; search_packet = (struct vis_packet *)skb_put(search_elem.skb_packet, - sizeof(struct vis_packet)); + sizeof(*search_packet)); memcpy(search_packet->vis_orig, vis_packet->vis_orig, ETH_ALEN); old_info = vis_hash_find(bat_priv, &search_elem); @@ -442,27 +428,26 @@ static struct vis_info *add_packet(struct bat_priv *bat_priv, kref_put(&old_info->refcount, free_info); } - info = kmalloc(sizeof(struct vis_info), GFP_ATOMIC); + info = kmalloc(sizeof(*info), GFP_ATOMIC); if (!info) return NULL; - info->skb_packet = dev_alloc_skb(sizeof(struct vis_packet) + - vis_info_len + sizeof(struct ethhdr)); + info->skb_packet = dev_alloc_skb(sizeof(*packet) + vis_info_len + + sizeof(struct ethhdr)); if (!info->skb_packet) { kfree(info); return NULL; } skb_reserve(info->skb_packet, sizeof(struct ethhdr)); - packet = (struct vis_packet *)skb_put(info->skb_packet, - sizeof(struct vis_packet) + - vis_info_len); + packet = (struct vis_packet *)skb_put(info->skb_packet, sizeof(*packet) + + vis_info_len); kref_init(&info->refcount); INIT_LIST_HEAD(&info->send_list); INIT_LIST_HEAD(&info->recv_list); info->first_seen = jiffies; info->bat_priv = bat_priv; - memcpy(packet, vis_packet, sizeof(struct vis_packet) + vis_info_len); + memcpy(packet, vis_packet, sizeof(*packet) + vis_info_len); /* initialize and add new packet. */ *is_new = 1; @@ -599,9 +584,9 @@ static int find_best_vis_server(struct bat_priv *bat_priv, } /* Return true if the vis packet is full. */ -static bool vis_packet_full(struct vis_info *info) +static bool vis_packet_full(const struct vis_info *info) { - struct vis_packet *packet; + const struct vis_packet *packet; packet = (struct vis_packet *)info->skb_packet->data; if (MAX_VIS_PACKET_SIZE / sizeof(struct vis_info_entry) @@ -619,7 +604,7 @@ static int generate_vis_packet(struct bat_priv *bat_priv) struct hlist_head *head; struct orig_node *orig_node; struct neigh_node *router; - struct vis_info *info = (struct vis_info *)bat_priv->my_vis_info; + struct vis_info *info = bat_priv->my_vis_info; struct vis_packet *packet = (struct vis_packet *)info->skb_packet->data; struct vis_info_entry *entry; struct tt_local_entry *tt_local_entry; @@ -632,7 +617,7 @@ static int generate_vis_packet(struct bat_priv *bat_priv) packet->ttl = TTL; packet->seqno = htonl(ntohl(packet->seqno) + 1); packet->entries = 0; - skb_trim(info->skb_packet, sizeof(struct vis_packet)); + skb_trim(info->skb_packet, sizeof(*packet)); if (packet->vis_type == VIS_TYPE_CLIENT_UPDATE) { best_tq = find_best_vis_server(bat_priv, info); @@ -680,11 +665,12 @@ next: hash = bat_priv->tt_local_hash; - spin_lock_bh(&bat_priv->tt_lhash_lock); for (i = 0; i < hash->size; i++) { head = &hash->table[i]; - hlist_for_each_entry(tt_local_entry, node, head, hash_entry) { + rcu_read_lock(); + hlist_for_each_entry_rcu(tt_local_entry, node, head, + hash_entry) { entry = (struct vis_info_entry *) skb_put(info->skb_packet, sizeof(*entry)); @@ -693,14 +679,12 @@ next: entry->quality = 0; /* 0 means TT */ packet->entries++; - if (vis_packet_full(info)) { - spin_unlock_bh(&bat_priv->tt_lhash_lock); - return 0; - } + if (vis_packet_full(info)) + goto unlock; } + rcu_read_unlock(); } - spin_unlock_bh(&bat_priv->tt_lhash_lock); return 0; unlock: @@ -908,17 +892,15 @@ int vis_init(struct bat_priv *bat_priv) goto err; } - bat_priv->my_vis_info->skb_packet = dev_alloc_skb( - sizeof(struct vis_packet) + - MAX_VIS_PACKET_SIZE + - sizeof(struct ethhdr)); + bat_priv->my_vis_info->skb_packet = dev_alloc_skb(sizeof(*packet) + + MAX_VIS_PACKET_SIZE + + sizeof(struct ethhdr)); if (!bat_priv->my_vis_info->skb_packet) goto free_info; skb_reserve(bat_priv->my_vis_info->skb_packet, sizeof(struct ethhdr)); - packet = (struct vis_packet *)skb_put( - bat_priv->my_vis_info->skb_packet, - sizeof(struct vis_packet)); + packet = (struct vis_packet *)skb_put(bat_priv->my_vis_info->skb_packet, + sizeof(*packet)); /* prefill the vis info */ bat_priv->my_vis_info->first_seen = jiffies - diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 0029e17..b18db56 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1707,7 +1707,7 @@ int hci_recv_fragment(struct hci_dev *hdev, int type, void *data, int count) data += (count - rem); count = rem; - }; + } return rem; } @@ -1742,7 +1742,7 @@ int hci_recv_stream_fragment(struct hci_dev *hdev, void *data, int count) data += (count - rem); count = rem; - }; + } return rem; } diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index a6b2f86..c188c80 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -243,6 +243,7 @@ int br_netpoll_enable(struct net_bridge_port *p) goto out; np->dev = p->dev; + strlcpy(np->dev_name, p->dev->name, IFNAMSIZ); err = __netpoll_setup(np); if (err) { diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 2f14eaf..29b9812 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1424,7 +1424,7 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br, switch (ih->type) { case IGMP_HOST_MEMBERSHIP_REPORT: case IGMPV2_HOST_MEMBERSHIP_REPORT: - BR_INPUT_SKB_CB(skb2)->mrouters_only = 1; + BR_INPUT_SKB_CB(skb)->mrouters_only = 1; err = br_ip4_multicast_add_group(br, port, ih->group); break; case IGMPV3_HOST_MEMBERSHIP_REPORT: @@ -1543,7 +1543,7 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, goto out; } mld = (struct mld_msg *)skb_transport_header(skb2); - BR_INPUT_SKB_CB(skb2)->mrouters_only = 1; + BR_INPUT_SKB_CB(skb)->mrouters_only = 1; err = br_ip6_multicast_add_group(br, port, &mld->mld_mca); break; } diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 3fa1231..56149ec 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -104,10 +104,16 @@ static void fake_update_pmtu(struct dst_entry *dst, u32 mtu) { } +static u32 *fake_cow_metrics(struct dst_entry *dst, unsigned long old) +{ + return NULL; +} + static struct dst_ops fake_dst_ops = { .family = AF_INET, .protocol = cpu_to_be16(ETH_P_IP), .update_pmtu = fake_update_pmtu, + .cow_metrics = fake_cow_metrics, }; /* diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index ffb0dc4..6814083 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -218,19 +218,24 @@ int __init br_netlink_init(void) if (err < 0) goto err1; - err = __rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, br_dump_ifinfo); + err = __rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, + br_dump_ifinfo, NULL); if (err) goto err2; - err = __rtnl_register(PF_BRIDGE, RTM_SETLINK, br_rtm_setlink, NULL); + err = __rtnl_register(PF_BRIDGE, RTM_SETLINK, + br_rtm_setlink, NULL, NULL); if (err) goto err3; - err = __rtnl_register(PF_BRIDGE, RTM_NEWNEIGH, br_fdb_add, NULL); + err = __rtnl_register(PF_BRIDGE, RTM_NEWNEIGH, + br_fdb_add, NULL, NULL); if (err) goto err3; - err = __rtnl_register(PF_BRIDGE, RTM_DELNEIGH, br_fdb_delete, NULL); + err = __rtnl_register(PF_BRIDGE, RTM_DELNEIGH, + br_fdb_delete, NULL, NULL); if (err) goto err3; - err = __rtnl_register(PF_BRIDGE, RTM_GETNEIGH, NULL, br_fdb_dump); + err = __rtnl_register(PF_BRIDGE, RTM_GETNEIGH, + NULL, br_fdb_dump, NULL); if (err) goto err3; diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c index 682c0fe..7c2fa0a 100644 --- a/net/caif/caif_dev.c +++ b/net/caif/caif_dev.c @@ -11,7 +11,6 @@ #define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__ -#include <linux/version.h> #include <linux/kernel.h> #include <linux/if_arp.h> #include <linux/net.h> diff --git a/net/caif/cfmuxl.c b/net/caif/cfmuxl.c index 3a66b8c..c23979e 100644 --- a/net/caif/cfmuxl.c +++ b/net/caif/cfmuxl.c @@ -255,7 +255,7 @@ static void cfmuxl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, if (cfsrvl_phyid_match(layer, phyid) && layer->ctrlcmd) { - if ((ctrl == _CAIF_CTRLCMD_PHYIF_FLOW_OFF_IND || + if ((ctrl == _CAIF_CTRLCMD_PHYIF_DOWN_IND || ctrl == CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND) && layer->id != 0) { diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c index 649ebac..8656909 100644 --- a/net/caif/chnl_net.c +++ b/net/caif/chnl_net.c @@ -7,8 +7,8 @@ #define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__ -#include <linux/version.h> #include <linux/fs.h> +#include <linux/hardirq.h> #include <linux/init.h> #include <linux/module.h> #include <linux/netdevice.h> @@ -139,17 +139,14 @@ static void close_work(struct work_struct *work) struct chnl_net *dev = NULL; struct list_head *list_node; struct list_head *_tmp; - /* May be called with or without RTNL lock held */ - int islocked = rtnl_is_locked(); - if (!islocked) - rtnl_lock(); + + rtnl_lock(); list_for_each_safe(list_node, _tmp, &chnl_net_list) { dev = list_entry(list_node, struct chnl_net, list_field); if (dev->state == CAIF_SHUTDOWN) dev_close(dev->netdev); } - if (!islocked) - rtnl_unlock(); + rtnl_unlock(); } static DECLARE_WORK(close_worker, close_work); diff --git a/net/can/af_can.c b/net/can/af_can.c index 094fc53..8ce926d 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -58,6 +58,7 @@ #include <linux/skbuff.h> #include <linux/can.h> #include <linux/can/core.h> +#include <linux/ratelimit.h> #include <net/net_namespace.h> #include <net/sock.h> @@ -161,8 +162,8 @@ static int can_create(struct net *net, struct socket *sock, int protocol, * return the error code immediately. Below we will * return -EPROTONOSUPPORT */ - if (err && printk_ratelimit()) - printk(KERN_ERR "can: request_module " + if (err) + printk_ratelimited(KERN_ERR "can: request_module " "(can-proto-%d) failed.\n", protocol); cp = can_get_proto(protocol); diff --git a/net/can/bcm.c b/net/can/bcm.c index 184a657..d6c8ae5 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -43,6 +43,7 @@ #include <linux/module.h> #include <linux/init.h> +#include <linux/interrupt.h> #include <linux/hrtimer.h> #include <linux/list.h> #include <linux/proc_fs.h> diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c index 5a8009c..85f3bc0 100644 --- a/net/ceph/crypto.c +++ b/net/ceph/crypto.c @@ -444,7 +444,7 @@ int ceph_key_instantiate(struct key *key, const void *data, size_t datalen) goto err; /* TODO ceph_crypto_key_decode should really take const input */ - p = (void*)data; + p = (void *)data; ret = ceph_crypto_key_decode(ckey, &p, (char*)data+datalen); if (ret < 0) goto err_ckey; diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index e15a82c..78b55f4 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -76,7 +76,8 @@ const char *ceph_pr_addr(const struct sockaddr_storage *ss) break; default: - sprintf(s, "(unknown sockaddr family %d)", (int)ss->ss_family); + snprintf(s, MAX_ADDR_STR_LEN, "(unknown sockaddr family %d)", + (int)ss->ss_family); } return s; @@ -598,7 +599,7 @@ static void prepare_write_keepalive(struct ceph_connection *con) * Connection negotiation. */ -static void prepare_connect_authorizer(struct ceph_connection *con) +static int prepare_connect_authorizer(struct ceph_connection *con) { void *auth_buf; int auth_len = 0; @@ -612,13 +613,20 @@ static void prepare_connect_authorizer(struct ceph_connection *con) con->auth_retry); mutex_lock(&con->mutex); + if (test_bit(CLOSED, &con->state) || + test_bit(OPENING, &con->state)) + return -EAGAIN; + con->out_connect.authorizer_protocol = cpu_to_le32(auth_protocol); con->out_connect.authorizer_len = cpu_to_le32(auth_len); - con->out_kvec[con->out_kvec_left].iov_base = auth_buf; - con->out_kvec[con->out_kvec_left].iov_len = auth_len; - con->out_kvec_left++; - con->out_kvec_bytes += auth_len; + if (auth_len) { + con->out_kvec[con->out_kvec_left].iov_base = auth_buf; + con->out_kvec[con->out_kvec_left].iov_len = auth_len; + con->out_kvec_left++; + con->out_kvec_bytes += auth_len; + } + return 0; } /* @@ -640,9 +648,9 @@ static void prepare_write_banner(struct ceph_messenger *msgr, set_bit(WRITE_PENDING, &con->state); } -static void prepare_write_connect(struct ceph_messenger *msgr, - struct ceph_connection *con, - int after_banner) +static int prepare_write_connect(struct ceph_messenger *msgr, + struct ceph_connection *con, + int after_banner) { unsigned global_seq = get_global_seq(con->msgr, 0); int proto; @@ -683,7 +691,7 @@ static void prepare_write_connect(struct ceph_messenger *msgr, con->out_more = 0; set_bit(WRITE_PENDING, &con->state); - prepare_connect_authorizer(con); + return prepare_connect_authorizer(con); } @@ -1065,8 +1073,10 @@ static void addr_set_port(struct sockaddr_storage *ss, int p) switch (ss->ss_family) { case AF_INET: ((struct sockaddr_in *)ss)->sin_port = htons(p); + break; case AF_INET6: ((struct sockaddr_in6 *)ss)->sin6_port = htons(p); + break; } } @@ -1216,6 +1226,7 @@ static int process_connect(struct ceph_connection *con) u64 sup_feat = con->msgr->supported_features; u64 req_feat = con->msgr->required_features; u64 server_feat = le64_to_cpu(con->in_reply.features); + int ret; dout("process_connect on %p tag %d\n", con, (int)con->in_tag); @@ -1250,7 +1261,9 @@ static int process_connect(struct ceph_connection *con) return -1; } con->auth_retry = 1; - prepare_write_connect(con->msgr, con, 0); + ret = prepare_write_connect(con->msgr, con, 0); + if (ret < 0) + return ret; prepare_read_connect(con); break; @@ -1277,6 +1290,9 @@ static int process_connect(struct ceph_connection *con) if (con->ops->peer_reset) con->ops->peer_reset(con); mutex_lock(&con->mutex); + if (test_bit(CLOSED, &con->state) || + test_bit(OPENING, &con->state)) + return -EAGAIN; break; case CEPH_MSGR_TAG_RETRY_SESSION: @@ -1341,7 +1357,9 @@ static int process_connect(struct ceph_connection *con) * to WAIT. This shouldn't happen if we are the * client. */ - pr_err("process_connect peer connecting WAIT\n"); + pr_err("process_connect got WAIT as client\n"); + con->error_msg = "protocol error, got WAIT as client"; + return -1; default: pr_err("connect protocol error, will retry\n"); @@ -1810,6 +1828,17 @@ static int try_read(struct ceph_connection *con) more: dout("try_read tag %d in_base_pos %d\n", (int)con->in_tag, con->in_base_pos); + + /* + * process_connect and process_message drop and re-take + * con->mutex. make sure we handle a racing close or reopen. + */ + if (test_bit(CLOSED, &con->state) || + test_bit(OPENING, &con->state)) { + ret = -EAGAIN; + goto out; + } + if (test_bit(CONNECTING, &con->state)) { if (!test_bit(NEGOTIATING, &con->state)) { dout("try_read connecting\n"); @@ -1938,8 +1967,10 @@ static void con_work(struct work_struct *work) { struct ceph_connection *con = container_of(work, struct ceph_connection, work.work); + int ret; mutex_lock(&con->mutex); +restart: if (test_and_clear_bit(BACKOFF, &con->state)) { dout("con_work %p backing off\n", con); if (queue_delayed_work(ceph_msgr_wq, &con->work, @@ -1969,18 +2000,31 @@ static void con_work(struct work_struct *work) con_close_socket(con); } - if (test_and_clear_bit(SOCK_CLOSED, &con->state) || - try_read(con) < 0 || - try_write(con) < 0) { - mutex_unlock(&con->mutex); - ceph_fault(con); /* error/fault path */ - goto done_unlocked; - } + if (test_and_clear_bit(SOCK_CLOSED, &con->state)) + goto fault; + + ret = try_read(con); + if (ret == -EAGAIN) + goto restart; + if (ret < 0) + goto fault; + + ret = try_write(con); + if (ret == -EAGAIN) + goto restart; + if (ret < 0) + goto fault; done: mutex_unlock(&con->mutex); done_unlocked: con->ops->put(con); + return; + +fault: + mutex_unlock(&con->mutex); + ceph_fault(con); /* error/fault path */ + goto done_unlocked; } diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 6b5dda1..9cb627a 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -124,7 +124,7 @@ static void calc_layout(struct ceph_osd_client *osdc, ceph_calc_raw_layout(osdc, layout, vino.snap, off, plen, &bno, req, op); - sprintf(req->r_oid, "%llx.%08llx", vino.ino, bno); + snprintf(req->r_oid, sizeof(req->r_oid), "%llx.%08llx", vino.ino, bno); req->r_oid_len = strlen(req->r_oid); } @@ -1144,6 +1144,13 @@ static void handle_osds_timeout(struct work_struct *work) round_jiffies_relative(delay)); } +static void complete_request(struct ceph_osd_request *req) +{ + if (req->r_safe_callback) + req->r_safe_callback(req, NULL); + complete_all(&req->r_safe_completion); /* fsync waiter */ +} + /* * handle osd op reply. either call the callback if it is specified, * or do the completion to wake up the waiting thread. @@ -1226,11 +1233,8 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg, else complete_all(&req->r_completion); - if (flags & CEPH_OSD_FLAG_ONDISK) { - if (req->r_safe_callback) - req->r_safe_callback(req, msg); - complete_all(&req->r_safe_completion); /* fsync waiter */ - } + if (flags & CEPH_OSD_FLAG_ONDISK) + complete_request(req); done: dout("req=%p req->r_linger=%d\n", req, req->r_linger); @@ -1421,6 +1425,15 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg) done: downgrade_write(&osdc->map_sem); ceph_monc_got_osdmap(&osdc->client->monc, osdc->osdmap->epoch); + + /* + * subscribe to subsequent osdmap updates if full to ensure + * we find out when we are no longer full and stop returning + * ENOSPC. + */ + if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL)) + ceph_monc_request_next_osdmap(&osdc->client->monc); + send_queued(osdc); up_read(&osdc->map_sem); wake_up_all(&osdc->client->auth_wq); @@ -1677,8 +1690,14 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc, */ if (req->r_sent == 0) { rc = __map_request(osdc, req); - if (rc < 0) + if (rc < 0) { + if (nofail) { + dout("osdc_start_request failed map, " + " will retry %lld\n", req->r_tid); + rc = 0; + } goto out_unlock; + } if (req->r_osd == NULL) { dout("send_request %p no up osds in pg\n", req); ceph_monc_request_next_osdmap(&osdc->client->monc); @@ -1717,6 +1736,7 @@ int ceph_osdc_wait_request(struct ceph_osd_client *osdc, __cancel_request(req); __unregister_request(osdc, req); mutex_unlock(&osdc->request_mutex); + complete_request(req); dout("wait_request tid %llu canceled/timed out\n", req->r_tid); return rc; } diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index 71603ac..e97c358 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -765,7 +765,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, } map->epoch++; - map->modified = map->modified; + map->modified = modified; if (newcrush) { if (map->crush) crush_destroy(map->crush); @@ -830,15 +830,20 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, map->osd_addr[osd] = addr; } - /* new_down */ + /* new_state */ ceph_decode_32_safe(p, end, len, bad); while (len--) { u32 osd; + u8 xorstate; ceph_decode_32_safe(p, end, osd, bad); + xorstate = **(u8 **)p; (*p)++; /* clean flag */ - pr_info("osd%d down\n", osd); + if (xorstate == 0) + xorstate = CEPH_OSD_UP; + if (xorstate & CEPH_OSD_UP) + pr_info("osd%d down\n", osd); if (osd < map->max_osd) - map->osd_state[osd] &= ~CEPH_OSD_UP; + map->osd_state[osd] ^= xorstate; } /* new_weight */ diff --git a/net/core/dev.c b/net/core/dev.c index c7e305d..6b6ef14 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2096,6 +2096,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, { const struct net_device_ops *ops = dev->netdev_ops; int rc = NETDEV_TX_OK; + unsigned int skb_len; if (likely(!skb->next)) { u32 features; @@ -2146,8 +2147,9 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, } } + skb_len = skb->len; rc = ops->ndo_start_xmit(skb, dev); - trace_net_dev_xmit(skb, rc); + trace_net_dev_xmit(skb, rc, dev, skb_len); if (rc == NETDEV_TX_OK) txq_trans_update(txq); return rc; @@ -2167,8 +2169,9 @@ gso: if (dev->priv_flags & IFF_XMIT_DST_RELEASE) skb_dst_drop(nskb); + skb_len = nskb->len; rc = ops->ndo_start_xmit(nskb, dev); - trace_net_dev_xmit(nskb, rc); + trace_net_dev_xmit(nskb, rc, dev, skb_len); if (unlikely(rc != NETDEV_TX_OK)) { if (rc & ~NETDEV_TX_MASK) goto out_kfree_gso_skb; @@ -2529,7 +2532,7 @@ __u32 __skb_get_rxhash(struct sk_buff *skb) goto done; ip = (const struct iphdr *) (skb->data + nhoff); - if (ip->frag_off & htons(IP_MF | IP_OFFSET)) + if (ip_is_fragment(ip)) ip_proto = 0; else ip_proto = ip->protocol; @@ -3111,7 +3114,7 @@ static int __netif_receive_skb(struct sk_buff *skb) skb_reset_network_header(skb); skb_reset_transport_header(skb); - skb->mac_len = skb->network_header - skb->mac_header; + skb_reset_mac_len(skb); pt_prev = NULL; @@ -5864,8 +5867,6 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, dev->gso_max_size = GSO_MAX_SIZE; - INIT_LIST_HEAD(&dev->ethtool_ntuple_list.list); - dev->ethtool_ntuple_list.count = 0; INIT_LIST_HEAD(&dev->napi_list); INIT_LIST_HEAD(&dev->unreg_list); INIT_LIST_HEAD(&dev->link_watch_list); @@ -5929,9 +5930,6 @@ void free_netdev(struct net_device *dev) /* Flush device addresses */ dev_addr_flush(dev); - /* Clear ethtool n-tuple list */ - ethtool_ntuple_flush(dev); - list_for_each_entry_safe(p, n, &dev->napi_list, dev_list) netif_napi_del(p); @@ -6175,6 +6173,11 @@ static int dev_cpu_callback(struct notifier_block *nfb, oldsd->output_queue = NULL; oldsd->output_queue_tailp = &oldsd->output_queue; } + /* Append NAPI poll list from offline CPU. */ + if (!list_empty(&oldsd->poll_list)) { + list_splice_init(&oldsd->poll_list, &sd->poll_list); + raise_softirq_irqoff(NET_RX_SOFTIRQ); + } raise_softirq_irqoff(NET_TX_SOFTIRQ); local_irq_enable(); @@ -6261,29 +6264,23 @@ err_name: /** * netdev_drivername - network driver for the device * @dev: network device - * @buffer: buffer for resulting name - * @len: size of buffer * * Determine network driver for device. */ -char *netdev_drivername(const struct net_device *dev, char *buffer, int len) +const char *netdev_drivername(const struct net_device *dev) { const struct device_driver *driver; const struct device *parent; - - if (len <= 0 || !buffer) - return buffer; - buffer[0] = 0; + const char *empty = ""; parent = dev->dev.parent; - if (!parent) - return buffer; + return empty; driver = parent->driver; if (driver && driver->name) - strlcpy(buffer, driver->name, len); - return buffer; + return driver->name; + return empty; } static int __netdev_printk(const char *level, const struct net_device *dev, diff --git a/net/core/ethtool.c b/net/core/ethtool.c index fd14116..b7c12a6 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -169,18 +169,6 @@ int ethtool_op_set_flags(struct net_device *dev, u32 data, u32 supported) } EXPORT_SYMBOL(ethtool_op_set_flags); -void ethtool_ntuple_flush(struct net_device *dev) -{ - struct ethtool_rx_ntuple_flow_spec_container *fsc, *f; - - list_for_each_entry_safe(fsc, f, &dev->ethtool_ntuple_list.list, list) { - list_del(&fsc->list); - kfree(fsc); - } - dev->ethtool_ntuple_list.count = 0; -} -EXPORT_SYMBOL(ethtool_ntuple_flush); - /* Handlers for each ethtool command */ #define ETHTOOL_DEV_FEATURE_WORDS 1 @@ -865,34 +853,6 @@ out: return ret; } -static void __rx_ntuple_filter_add(struct ethtool_rx_ntuple_list *list, - struct ethtool_rx_ntuple_flow_spec *spec, - struct ethtool_rx_ntuple_flow_spec_container *fsc) -{ - - /* don't add filters forever */ - if (list->count >= ETHTOOL_MAX_NTUPLE_LIST_ENTRY) { - /* free the container */ - kfree(fsc); - return; - } - - /* Copy the whole filter over */ - fsc->fs.flow_type = spec->flow_type; - memcpy(&fsc->fs.h_u, &spec->h_u, sizeof(spec->h_u)); - memcpy(&fsc->fs.m_u, &spec->m_u, sizeof(spec->m_u)); - - fsc->fs.vlan_tag = spec->vlan_tag; - fsc->fs.vlan_tag_mask = spec->vlan_tag_mask; - fsc->fs.data = spec->data; - fsc->fs.data_mask = spec->data_mask; - fsc->fs.action = spec->action; - - /* add to the list */ - list_add_tail_rcu(&fsc->list, &list->list); - list->count++; -} - /* * ethtool does not (or did not) set masks for flow parameters that are * not specified, so if both value and mask are 0 then this must be @@ -930,8 +890,6 @@ static noinline_for_stack int ethtool_set_rx_ntuple(struct net_device *dev, { struct ethtool_rx_ntuple cmd; const struct ethtool_ops *ops = dev->ethtool_ops; - struct ethtool_rx_ntuple_flow_spec_container *fsc = NULL; - int ret; if (!ops->set_rx_ntuple) return -EOPNOTSUPP; @@ -944,269 +902,7 @@ static noinline_for_stack int ethtool_set_rx_ntuple(struct net_device *dev, rx_ntuple_fix_masks(&cmd.fs); - /* - * Cache filter in dev struct for GET operation only if - * the underlying driver doesn't have its own GET operation, and - * only if the filter was added successfully. First make sure we - * can allocate the filter, then continue if successful. - */ - if (!ops->get_rx_ntuple) { - fsc = kmalloc(sizeof(*fsc), GFP_ATOMIC); - if (!fsc) - return -ENOMEM; - } - - ret = ops->set_rx_ntuple(dev, &cmd); - if (ret) { - kfree(fsc); - return ret; - } - - if (!ops->get_rx_ntuple) - __rx_ntuple_filter_add(&dev->ethtool_ntuple_list, &cmd.fs, fsc); - - return ret; -} - -static int ethtool_get_rx_ntuple(struct net_device *dev, void __user *useraddr) -{ - struct ethtool_gstrings gstrings; - const struct ethtool_ops *ops = dev->ethtool_ops; - struct ethtool_rx_ntuple_flow_spec_container *fsc; - u8 *data; - char *p; - int ret, i, num_strings = 0; - - if (!ops->get_sset_count) - return -EOPNOTSUPP; - - if (copy_from_user(&gstrings, useraddr, sizeof(gstrings))) - return -EFAULT; - - ret = ops->get_sset_count(dev, gstrings.string_set); - if (ret < 0) - return ret; - - gstrings.len = ret; - - data = kzalloc(gstrings.len * ETH_GSTRING_LEN, GFP_USER); - if (!data) - return -ENOMEM; - - if (ops->get_rx_ntuple) { - /* driver-specific filter grab */ - ret = ops->get_rx_ntuple(dev, gstrings.string_set, data); - goto copy; - } - - /* default ethtool filter grab */ - i = 0; - p = (char *)data; - list_for_each_entry(fsc, &dev->ethtool_ntuple_list.list, list) { - sprintf(p, "Filter %d:\n", i); - p += ETH_GSTRING_LEN; - num_strings++; - - switch (fsc->fs.flow_type) { - case TCP_V4_FLOW: - sprintf(p, "\tFlow Type: TCP\n"); - p += ETH_GSTRING_LEN; - num_strings++; - break; - case UDP_V4_FLOW: - sprintf(p, "\tFlow Type: UDP\n"); - p += ETH_GSTRING_LEN; - num_strings++; - break; - case SCTP_V4_FLOW: - sprintf(p, "\tFlow Type: SCTP\n"); - p += ETH_GSTRING_LEN; - num_strings++; - break; - case AH_ESP_V4_FLOW: - sprintf(p, "\tFlow Type: AH ESP\n"); - p += ETH_GSTRING_LEN; - num_strings++; - break; - case ESP_V4_FLOW: - sprintf(p, "\tFlow Type: ESP\n"); - p += ETH_GSTRING_LEN; - num_strings++; - break; - case IP_USER_FLOW: - sprintf(p, "\tFlow Type: Raw IP\n"); - p += ETH_GSTRING_LEN; - num_strings++; - break; - case IPV4_FLOW: - sprintf(p, "\tFlow Type: IPv4\n"); - p += ETH_GSTRING_LEN; - num_strings++; - break; - default: - sprintf(p, "\tFlow Type: Unknown\n"); - p += ETH_GSTRING_LEN; - num_strings++; - goto unknown_filter; - } - - /* now the rest of the filters */ - switch (fsc->fs.flow_type) { - case TCP_V4_FLOW: - case UDP_V4_FLOW: - case SCTP_V4_FLOW: - sprintf(p, "\tSrc IP addr: 0x%x\n", - fsc->fs.h_u.tcp_ip4_spec.ip4src); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tSrc IP mask: 0x%x\n", - fsc->fs.m_u.tcp_ip4_spec.ip4src); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tDest IP addr: 0x%x\n", - fsc->fs.h_u.tcp_ip4_spec.ip4dst); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tDest IP mask: 0x%x\n", - fsc->fs.m_u.tcp_ip4_spec.ip4dst); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tSrc Port: %d, mask: 0x%x\n", - fsc->fs.h_u.tcp_ip4_spec.psrc, - fsc->fs.m_u.tcp_ip4_spec.psrc); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tDest Port: %d, mask: 0x%x\n", - fsc->fs.h_u.tcp_ip4_spec.pdst, - fsc->fs.m_u.tcp_ip4_spec.pdst); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tTOS: %d, mask: 0x%x\n", - fsc->fs.h_u.tcp_ip4_spec.tos, - fsc->fs.m_u.tcp_ip4_spec.tos); - p += ETH_GSTRING_LEN; - num_strings++; - break; - case AH_ESP_V4_FLOW: - case ESP_V4_FLOW: - sprintf(p, "\tSrc IP addr: 0x%x\n", - fsc->fs.h_u.ah_ip4_spec.ip4src); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tSrc IP mask: 0x%x\n", - fsc->fs.m_u.ah_ip4_spec.ip4src); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tDest IP addr: 0x%x\n", - fsc->fs.h_u.ah_ip4_spec.ip4dst); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tDest IP mask: 0x%x\n", - fsc->fs.m_u.ah_ip4_spec.ip4dst); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tSPI: %d, mask: 0x%x\n", - fsc->fs.h_u.ah_ip4_spec.spi, - fsc->fs.m_u.ah_ip4_spec.spi); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tTOS: %d, mask: 0x%x\n", - fsc->fs.h_u.ah_ip4_spec.tos, - fsc->fs.m_u.ah_ip4_spec.tos); - p += ETH_GSTRING_LEN; - num_strings++; - break; - case IP_USER_FLOW: - sprintf(p, "\tSrc IP addr: 0x%x\n", - fsc->fs.h_u.usr_ip4_spec.ip4src); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tSrc IP mask: 0x%x\n", - fsc->fs.m_u.usr_ip4_spec.ip4src); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tDest IP addr: 0x%x\n", - fsc->fs.h_u.usr_ip4_spec.ip4dst); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tDest IP mask: 0x%x\n", - fsc->fs.m_u.usr_ip4_spec.ip4dst); - p += ETH_GSTRING_LEN; - num_strings++; - break; - case IPV4_FLOW: - sprintf(p, "\tSrc IP addr: 0x%x\n", - fsc->fs.h_u.usr_ip4_spec.ip4src); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tSrc IP mask: 0x%x\n", - fsc->fs.m_u.usr_ip4_spec.ip4src); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tDest IP addr: 0x%x\n", - fsc->fs.h_u.usr_ip4_spec.ip4dst); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tDest IP mask: 0x%x\n", - fsc->fs.m_u.usr_ip4_spec.ip4dst); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tL4 bytes: 0x%x, mask: 0x%x\n", - fsc->fs.h_u.usr_ip4_spec.l4_4_bytes, - fsc->fs.m_u.usr_ip4_spec.l4_4_bytes); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tTOS: %d, mask: 0x%x\n", - fsc->fs.h_u.usr_ip4_spec.tos, - fsc->fs.m_u.usr_ip4_spec.tos); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tIP Version: %d, mask: 0x%x\n", - fsc->fs.h_u.usr_ip4_spec.ip_ver, - fsc->fs.m_u.usr_ip4_spec.ip_ver); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tProtocol: %d, mask: 0x%x\n", - fsc->fs.h_u.usr_ip4_spec.proto, - fsc->fs.m_u.usr_ip4_spec.proto); - p += ETH_GSTRING_LEN; - num_strings++; - break; - } - sprintf(p, "\tVLAN: %d, mask: 0x%x\n", - fsc->fs.vlan_tag, fsc->fs.vlan_tag_mask); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tUser-defined: 0x%Lx\n", fsc->fs.data); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tUser-defined mask: 0x%Lx\n", fsc->fs.data_mask); - p += ETH_GSTRING_LEN; - num_strings++; - if (fsc->fs.action == ETHTOOL_RXNTUPLE_ACTION_DROP) - sprintf(p, "\tAction: Drop\n"); - else - sprintf(p, "\tAction: Direct to queue %d\n", - fsc->fs.action); - p += ETH_GSTRING_LEN; - num_strings++; -unknown_filter: - i++; - } -copy: - /* indicate to userspace how many strings we actually have */ - gstrings.len = num_strings; - ret = -EFAULT; - if (copy_to_user(useraddr, &gstrings, sizeof(gstrings))) - goto out; - useraddr += sizeof(gstrings); - if (copy_to_user(useraddr, data, gstrings.len * ETH_GSTRING_LEN)) - goto out; - ret = 0; - -out: - kfree(data); - return ret; + return ops->set_rx_ntuple(dev, &cmd); } static int ethtool_get_regs(struct net_device *dev, char __user *useraddr) @@ -2101,9 +1797,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_SRXNTUPLE: rc = ethtool_set_rx_ntuple(dev, useraddr); break; - case ETHTOOL_GRXNTUPLE: - rc = ethtool_get_rx_ntuple(dev, useraddr); - break; case ETHTOOL_GSSET_INFO: rc = ethtool_get_sset_info(dev, useraddr); break; diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 008dc70..e7ab0c0 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -740,9 +740,9 @@ static struct pernet_operations fib_rules_net_ops = { static int __init fib_rules_init(void) { int err; - rtnl_register(PF_UNSPEC, RTM_NEWRULE, fib_nl_newrule, NULL); - rtnl_register(PF_UNSPEC, RTM_DELRULE, fib_nl_delrule, NULL); - rtnl_register(PF_UNSPEC, RTM_GETRULE, NULL, fib_nl_dumprule); + rtnl_register(PF_UNSPEC, RTM_NEWRULE, fib_nl_newrule, NULL, NULL); + rtnl_register(PF_UNSPEC, RTM_DELRULE, fib_nl_delrule, NULL, NULL); + rtnl_register(PF_UNSPEC, RTM_GETRULE, NULL, fib_nl_dumprule, NULL); err = register_pernet_subsys(&fib_rules_net_ops); if (err < 0) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 799f06e..ceb505b 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -2909,12 +2909,13 @@ EXPORT_SYMBOL(neigh_sysctl_unregister); static int __init neigh_init(void) { - rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL); - rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL); - rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info); + rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, NULL); + rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, NULL); + rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, NULL); - rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info); - rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL); + rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info, + NULL); + rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, NULL); return 0; } diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 11b98bc..33d2a1f 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -1179,9 +1179,14 @@ static void remove_queue_kobjects(struct net_device *net) #endif } -static const void *net_current_ns(void) +static void *net_grab_current_ns(void) { - return current->nsproxy->net_ns; + struct net *ns = current->nsproxy->net_ns; +#ifdef CONFIG_NET_NS + if (ns) + atomic_inc(&ns->passive); +#endif + return ns; } static const void *net_initial_ns(void) @@ -1196,22 +1201,13 @@ static const void *net_netlink_ns(struct sock *sk) struct kobj_ns_type_operations net_ns_type_operations = { .type = KOBJ_NS_TYPE_NET, - .current_ns = net_current_ns, + .grab_current_ns = net_grab_current_ns, .netlink_ns = net_netlink_ns, .initial_ns = net_initial_ns, + .drop_ns = net_drop_ns, }; EXPORT_SYMBOL_GPL(net_ns_type_operations); -static void net_kobj_ns_exit(struct net *net) -{ - kobj_ns_exit(KOBJ_NS_TYPE_NET, net); -} - -static struct pernet_operations kobj_net_ops = { - .exit = net_kobj_ns_exit, -}; - - #ifdef CONFIG_HOTPLUG static int netdev_uevent(struct device *d, struct kobj_uevent_env *env) { @@ -1339,6 +1335,5 @@ EXPORT_SYMBOL(netdev_class_remove_file); int netdev_kobject_init(void) { kobj_ns_type_register(&net_ns_type_operations); - register_pernet_subsys(&kobj_net_ops); return class_register(&net_class); } diff --git a/net/core/net-traces.c b/net/core/net-traces.c index 7f1bb2a..52380b1 100644 --- a/net/core/net-traces.c +++ b/net/core/net-traces.c @@ -28,6 +28,8 @@ #include <trace/events/skb.h> #include <trace/events/net.h> #include <trace/events/napi.h> +#include <trace/events/sock.h> +#include <trace/events/udp.h> EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb); diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 2e2dce6..ea489db 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -8,6 +8,8 @@ #include <linux/idr.h> #include <linux/rculist.h> #include <linux/nsproxy.h> +#include <linux/proc_fs.h> +#include <linux/file.h> #include <net/net_namespace.h> #include <net/netns/generic.h> @@ -126,6 +128,7 @@ static __net_init int setup_net(struct net *net) LIST_HEAD(net_exit_list); atomic_set(&net->count, 1); + atomic_set(&net->passive, 1); #ifdef NETNS_REFCNT_DEBUG atomic_set(&net->use_count, 0); @@ -208,6 +211,13 @@ static void net_free(struct net *net) kmem_cache_free(net_cachep, net); } +void net_drop_ns(void *p) +{ + struct net *ns = p; + if (ns && atomic_dec_and_test(&ns->passive)) + net_free(ns); +} + struct net *copy_net_ns(unsigned long flags, struct net *old_net) { struct net *net; @@ -228,7 +238,7 @@ struct net *copy_net_ns(unsigned long flags, struct net *old_net) } mutex_unlock(&net_mutex); if (rv < 0) { - net_free(net); + net_drop_ns(net); return ERR_PTR(rv); } return net; @@ -284,7 +294,7 @@ static void cleanup_net(struct work_struct *work) /* Finally it is safe to free my network namespace structure */ list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) { list_del_init(&net->exit_list); - net_free(net); + net_drop_ns(net); } } static DECLARE_WORK(net_cleanup_work, cleanup_net); @@ -302,6 +312,26 @@ void __put_net(struct net *net) } EXPORT_SYMBOL_GPL(__put_net); +struct net *get_net_ns_by_fd(int fd) +{ + struct proc_inode *ei; + struct file *file; + struct net *net; + + file = proc_ns_fget(fd); + if (IS_ERR(file)) + return ERR_CAST(file); + + ei = PROC_I(file->f_dentry->d_inode); + if (ei->ns_ops == &netns_operations) + net = get_net(ei->ns); + else + net = ERR_PTR(-EINVAL); + + fput(file); + return net; +} + #else struct net *copy_net_ns(unsigned long flags, struct net *old_net) { @@ -309,6 +339,11 @@ struct net *copy_net_ns(unsigned long flags, struct net *old_net) return ERR_PTR(-EINVAL); return old_net; } + +struct net *get_net_ns_by_fd(int fd) +{ + return ERR_PTR(-EINVAL); +} #endif struct net *get_net_ns_by_pid(pid_t pid) @@ -561,3 +596,39 @@ void unregister_pernet_device(struct pernet_operations *ops) mutex_unlock(&net_mutex); } EXPORT_SYMBOL_GPL(unregister_pernet_device); + +#ifdef CONFIG_NET_NS +static void *netns_get(struct task_struct *task) +{ + struct net *net = NULL; + struct nsproxy *nsproxy; + + rcu_read_lock(); + nsproxy = task_nsproxy(task); + if (nsproxy) + net = get_net(nsproxy->net_ns); + rcu_read_unlock(); + + return net; +} + +static void netns_put(void *ns) +{ + put_net(ns); +} + +static int netns_install(struct nsproxy *nsproxy, void *ns) +{ + put_net(nsproxy->net_ns); + nsproxy->net_ns = get_net(ns); + return 0; +} + +const struct proc_ns_operations netns_operations = { + .name = "net", + .type = CLONE_NEWNET, + .get = netns_get, + .put = netns_put, + .install = netns_install, +}; +#endif diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 2d7d6d4..18d9cbd 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -792,6 +792,13 @@ int netpoll_setup(struct netpoll *np) return -ENODEV; } + if (ndev->master) { + printk(KERN_ERR "%s: %s is a slave device, aborting.\n", + np->name, np->dev_name); + err = -EBUSY; + goto put; + } + if (!netif_running(ndev)) { unsigned long atmost, atleast; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 2d56cb9..a798fc6 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -56,9 +56,11 @@ struct rtnl_link { rtnl_doit_func doit; rtnl_dumpit_func dumpit; + rtnl_calcit_func calcit; }; static DEFINE_MUTEX(rtnl_mutex); +static u16 min_ifinfo_dump_size; void rtnl_lock(void) { @@ -144,12 +146,28 @@ static rtnl_dumpit_func rtnl_get_dumpit(int protocol, int msgindex) return tab ? tab[msgindex].dumpit : NULL; } +static rtnl_calcit_func rtnl_get_calcit(int protocol, int msgindex) +{ + struct rtnl_link *tab; + + if (protocol <= RTNL_FAMILY_MAX) + tab = rtnl_msg_handlers[protocol]; + else + tab = NULL; + + if (tab == NULL || tab[msgindex].calcit == NULL) + tab = rtnl_msg_handlers[PF_UNSPEC]; + + return tab ? tab[msgindex].calcit : NULL; +} + /** * __rtnl_register - Register a rtnetlink message type * @protocol: Protocol family or PF_UNSPEC * @msgtype: rtnetlink message type * @doit: Function pointer called for each request message * @dumpit: Function pointer called for each dump request (NLM_F_DUMP) message + * @calcit: Function pointer to calc size of dump message * * Registers the specified function pointers (at least one of them has * to be non-NULL) to be called whenever a request message for the @@ -162,7 +180,8 @@ static rtnl_dumpit_func rtnl_get_dumpit(int protocol, int msgindex) * Returns 0 on success or a negative error code. */ int __rtnl_register(int protocol, int msgtype, - rtnl_doit_func doit, rtnl_dumpit_func dumpit) + rtnl_doit_func doit, rtnl_dumpit_func dumpit, + rtnl_calcit_func calcit) { struct rtnl_link *tab; int msgindex; @@ -185,6 +204,9 @@ int __rtnl_register(int protocol, int msgtype, if (dumpit) tab[msgindex].dumpit = dumpit; + if (calcit) + tab[msgindex].calcit = calcit; + return 0; } EXPORT_SYMBOL_GPL(__rtnl_register); @@ -199,9 +221,10 @@ EXPORT_SYMBOL_GPL(__rtnl_register); * of memory implies no sense in continuing. */ void rtnl_register(int protocol, int msgtype, - rtnl_doit_func doit, rtnl_dumpit_func dumpit) + rtnl_doit_func doit, rtnl_dumpit_func dumpit, + rtnl_calcit_func calcit) { - if (__rtnl_register(protocol, msgtype, doit, dumpit) < 0) + if (__rtnl_register(protocol, msgtype, doit, dumpit, calcit) < 0) panic("Unable to register rtnetlink message handler, " "protocol = %d, message type = %d\n", protocol, msgtype); @@ -1046,6 +1069,7 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_LINKMODE] = { .type = NLA_U8 }, [IFLA_LINKINFO] = { .type = NLA_NESTED }, [IFLA_NET_NS_PID] = { .type = NLA_U32 }, + [IFLA_NET_NS_FD] = { .type = NLA_U32 }, [IFLA_IFALIAS] = { .type = NLA_STRING, .len = IFALIASZ-1 }, [IFLA_VFINFO_LIST] = {. type = NLA_NESTED }, [IFLA_VF_PORTS] = { .type = NLA_NESTED }, @@ -1094,6 +1118,8 @@ struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[]) */ if (tb[IFLA_NET_NS_PID]) net = get_net_ns_by_pid(nla_get_u32(tb[IFLA_NET_NS_PID])); + else if (tb[IFLA_NET_NS_FD]) + net = get_net_ns_by_fd(nla_get_u32(tb[IFLA_NET_NS_FD])); else net = get_net(src_net); return net; @@ -1224,7 +1250,7 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, int send_addr_notify = 0; int err; - if (tb[IFLA_NET_NS_PID]) { + if (tb[IFLA_NET_NS_PID] || tb[IFLA_NET_NS_FD]) { struct net *net = rtnl_link_get_net(dev_net(dev), tb); if (IS_ERR(net)) { err = PTR_ERR(net); @@ -1815,6 +1841,11 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) return err; } +static u16 rtnl_calcit(struct sk_buff *skb) +{ + return min_ifinfo_dump_size; +} + static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb) { int idx; @@ -1844,11 +1875,14 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change) struct net *net = dev_net(dev); struct sk_buff *skb; int err = -ENOBUFS; + size_t if_info_size; - skb = nlmsg_new(if_nlmsg_size(dev), GFP_KERNEL); + skb = nlmsg_new((if_info_size = if_nlmsg_size(dev)), GFP_KERNEL); if (skb == NULL) goto errout; + min_ifinfo_dump_size = max_t(u16, if_info_size, min_ifinfo_dump_size); + err = rtnl_fill_ifinfo(skb, dev, type, 0, 0, change, 0); if (err < 0) { /* -EMSGSIZE implies BUG in if_nlmsg_size() */ @@ -1899,14 +1933,20 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) { struct sock *rtnl; rtnl_dumpit_func dumpit; + rtnl_calcit_func calcit; + u16 min_dump_alloc = 0; dumpit = rtnl_get_dumpit(family, type); if (dumpit == NULL) return -EOPNOTSUPP; + calcit = rtnl_get_calcit(family, type); + if (calcit) + min_dump_alloc = calcit(skb); __rtnl_unlock(); rtnl = net->rtnl; - err = netlink_dump_start(rtnl, skb, nlh, dumpit, NULL); + err = netlink_dump_start(rtnl, skb, nlh, dumpit, + NULL, min_dump_alloc); rtnl_lock(); return err; } @@ -2016,12 +2056,13 @@ void __init rtnetlink_init(void) netlink_set_nonroot(NETLINK_ROUTE, NL_NONROOT_RECV); register_netdevice_notifier(&rtnetlink_dev_notifier); - rtnl_register(PF_UNSPEC, RTM_GETLINK, rtnl_getlink, rtnl_dump_ifinfo); - rtnl_register(PF_UNSPEC, RTM_SETLINK, rtnl_setlink, NULL); - rtnl_register(PF_UNSPEC, RTM_NEWLINK, rtnl_newlink, NULL); - rtnl_register(PF_UNSPEC, RTM_DELLINK, rtnl_dellink, NULL); + rtnl_register(PF_UNSPEC, RTM_GETLINK, rtnl_getlink, + rtnl_dump_ifinfo, rtnl_calcit); + rtnl_register(PF_UNSPEC, RTM_SETLINK, rtnl_setlink, NULL, NULL); + rtnl_register(PF_UNSPEC, RTM_NEWLINK, rtnl_newlink, NULL, NULL); + rtnl_register(PF_UNSPEC, RTM_DELLINK, rtnl_dellink, NULL, NULL); - rtnl_register(PF_UNSPEC, RTM_GETADDR, NULL, rtnl_dump_all); - rtnl_register(PF_UNSPEC, RTM_GETROUTE, NULL, rtnl_dump_all); + rtnl_register(PF_UNSPEC, RTM_GETADDR, NULL, rtnl_dump_all, NULL); + rtnl_register(PF_UNSPEC, RTM_GETROUTE, NULL, rtnl_dump_all, NULL); } diff --git a/net/core/sock.c b/net/core/sock.c index 6e81978..76c4031 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -128,6 +128,8 @@ #include <linux/filter.h> +#include <trace/events/sock.h> + #ifdef CONFIG_INET #include <net/tcp.h> #endif @@ -292,6 +294,7 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >= (unsigned)sk->sk_rcvbuf) { atomic_inc(&sk->sk_drops); + trace_sock_rcvqueue_full(sk, skb); return -ENOMEM; } @@ -1736,6 +1739,8 @@ suppress_allocation: return 1; } + trace_sock_exceed_buf_limit(sk, prot, allocated); + /* Alas. Undo changes. */ sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM; atomic_long_sub(amt, prot->memory_allocated); diff --git a/net/core/timestamping.c b/net/core/timestamping.c index 7e7ca37..98a5264 100644 --- a/net/core/timestamping.c +++ b/net/core/timestamping.c @@ -68,6 +68,7 @@ void skb_clone_tx_timestamp(struct sk_buff *skb) break; } } +EXPORT_SYMBOL_GPL(skb_clone_tx_timestamp); void skb_complete_tx_timestamp(struct sk_buff *skb, struct skb_shared_hwtstamps *hwtstamps) @@ -121,6 +122,7 @@ bool skb_defer_rx_timestamp(struct sk_buff *skb) return false; } +EXPORT_SYMBOL_GPL(skb_defer_rx_timestamp); void __init skb_timestamping_init(void) { diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index 3609eac..fc56e85 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -1166,64 +1166,6 @@ err: return ret; } -/* Handle IEEE 802.1Qaz SET commands. If any requested operation can not - * be completed the entire msg is aborted and error value is returned. - * No attempt is made to reconcile the case where only part of the - * cmd can be completed. - */ -static int dcbnl_ieee_set(struct net_device *netdev, struct nlattr **tb, - u32 pid, u32 seq, u16 flags) -{ - const struct dcbnl_rtnl_ops *ops = netdev->dcbnl_ops; - struct nlattr *ieee[DCB_ATTR_IEEE_MAX + 1]; - int err = -EOPNOTSUPP; - - if (!ops) - goto err; - - err = nla_parse_nested(ieee, DCB_ATTR_IEEE_MAX, - tb[DCB_ATTR_IEEE], dcbnl_ieee_policy); - if (err) - goto err; - - if (ieee[DCB_ATTR_IEEE_ETS] && ops->ieee_setets) { - struct ieee_ets *ets = nla_data(ieee[DCB_ATTR_IEEE_ETS]); - err = ops->ieee_setets(netdev, ets); - if (err) - goto err; - } - - if (ieee[DCB_ATTR_IEEE_PFC] && ops->ieee_setpfc) { - struct ieee_pfc *pfc = nla_data(ieee[DCB_ATTR_IEEE_PFC]); - err = ops->ieee_setpfc(netdev, pfc); - if (err) - goto err; - } - - if (ieee[DCB_ATTR_IEEE_APP_TABLE]) { - struct nlattr *attr; - int rem; - - nla_for_each_nested(attr, ieee[DCB_ATTR_IEEE_APP_TABLE], rem) { - struct dcb_app *app_data; - if (nla_type(attr) != DCB_ATTR_IEEE_APP) - continue; - app_data = nla_data(attr); - if (ops->ieee_setapp) - err = ops->ieee_setapp(netdev, app_data); - else - err = dcb_setapp(netdev, app_data); - if (err) - goto err; - } - } - -err: - dcbnl_reply(err, RTM_SETDCB, DCB_CMD_IEEE_SET, DCB_ATTR_IEEE, - pid, seq, flags); - return err; -} - static int dcbnl_build_peer_app(struct net_device *netdev, struct sk_buff* skb, int app_nested_type, int app_info_type, int app_entry_type) @@ -1279,29 +1221,13 @@ nla_put_failure: } /* Handle IEEE 802.1Qaz GET commands. */ -static int dcbnl_ieee_get(struct net_device *netdev, struct nlattr **tb, - u32 pid, u32 seq, u16 flags) +static int dcbnl_ieee_fill(struct sk_buff *skb, struct net_device *netdev) { - struct sk_buff *skb; - struct nlmsghdr *nlh; - struct dcbmsg *dcb; struct nlattr *ieee, *app; struct dcb_app_type *itr; const struct dcbnl_rtnl_ops *ops = netdev->dcbnl_ops; - int err; - - if (!ops) - return -EOPNOTSUPP; - - skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (!skb) - return -ENOBUFS; - - nlh = NLMSG_NEW(skb, pid, seq, RTM_GETDCB, sizeof(*dcb), flags); - - dcb = NLMSG_DATA(nlh); - dcb->dcb_family = AF_UNSPEC; - dcb->cmd = DCB_CMD_IEEE_GET; + int dcbx; + int err = -EMSGSIZE; NLA_PUT_STRING(skb, DCB_ATTR_IFNAME, netdev->name); @@ -1338,6 +1264,12 @@ static int dcbnl_ieee_get(struct net_device *netdev, struct nlattr **tb, } } } + + if (netdev->dcbnl_ops->getdcbx) + dcbx = netdev->dcbnl_ops->getdcbx(netdev); + else + dcbx = -EOPNOTSUPP; + spin_unlock(&dcb_lock); nla_nest_end(skb, app); @@ -1366,16 +1298,208 @@ static int dcbnl_ieee_get(struct net_device *netdev, struct nlattr **tb, } nla_nest_end(skb, ieee); - nlmsg_end(skb, nlh); + if (dcbx >= 0) { + err = nla_put_u8(skb, DCB_ATTR_DCBX, dcbx); + if (err) + goto nla_put_failure; + } + + return 0; - return rtnl_unicast(skb, &init_net, pid); nla_put_failure: - nlmsg_cancel(skb, nlh); -nlmsg_failure: - kfree_skb(skb); - return -1; + return err; +} + +int dcbnl_notify(struct net_device *dev, int event, int cmd, + u32 seq, u32 pid) +{ + struct net *net = dev_net(dev); + struct sk_buff *skb; + struct nlmsghdr *nlh; + struct dcbmsg *dcb; + const struct dcbnl_rtnl_ops *ops = dev->dcbnl_ops; + int err; + + if (!ops) + return -EOPNOTSUPP; + + skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!skb) + return -ENOBUFS; + + nlh = nlmsg_put(skb, pid, 0, event, sizeof(*dcb), 0); + if (nlh == NULL) { + nlmsg_free(skb); + return -EMSGSIZE; + } + + dcb = NLMSG_DATA(nlh); + dcb->dcb_family = AF_UNSPEC; + dcb->cmd = cmd; + + err = dcbnl_ieee_fill(skb, dev); + if (err < 0) { + /* Report error to broadcast listeners */ + nlmsg_cancel(skb, nlh); + kfree_skb(skb); + rtnl_set_sk_err(net, RTNLGRP_DCB, err); + } else { + /* End nlmsg and notify broadcast listeners */ + nlmsg_end(skb, nlh); + rtnl_notify(skb, net, 0, RTNLGRP_DCB, NULL, GFP_KERNEL); + } + + return err; +} +EXPORT_SYMBOL(dcbnl_notify); + +/* Handle IEEE 802.1Qaz SET commands. If any requested operation can not + * be completed the entire msg is aborted and error value is returned. + * No attempt is made to reconcile the case where only part of the + * cmd can be completed. + */ +static int dcbnl_ieee_set(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags) +{ + const struct dcbnl_rtnl_ops *ops = netdev->dcbnl_ops; + struct nlattr *ieee[DCB_ATTR_IEEE_MAX + 1]; + int err = -EOPNOTSUPP; + + if (!ops) + return err; + + if (!tb[DCB_ATTR_IEEE]) + return -EINVAL; + + err = nla_parse_nested(ieee, DCB_ATTR_IEEE_MAX, + tb[DCB_ATTR_IEEE], dcbnl_ieee_policy); + if (err) + return err; + + if (ieee[DCB_ATTR_IEEE_ETS] && ops->ieee_setets) { + struct ieee_ets *ets = nla_data(ieee[DCB_ATTR_IEEE_ETS]); + err = ops->ieee_setets(netdev, ets); + if (err) + goto err; + } + + if (ieee[DCB_ATTR_IEEE_PFC] && ops->ieee_setpfc) { + struct ieee_pfc *pfc = nla_data(ieee[DCB_ATTR_IEEE_PFC]); + err = ops->ieee_setpfc(netdev, pfc); + if (err) + goto err; + } + + if (ieee[DCB_ATTR_IEEE_APP_TABLE]) { + struct nlattr *attr; + int rem; + + nla_for_each_nested(attr, ieee[DCB_ATTR_IEEE_APP_TABLE], rem) { + struct dcb_app *app_data; + if (nla_type(attr) != DCB_ATTR_IEEE_APP) + continue; + app_data = nla_data(attr); + if (ops->ieee_setapp) + err = ops->ieee_setapp(netdev, app_data); + else + err = dcb_ieee_setapp(netdev, app_data); + if (err) + goto err; + } + } + +err: + dcbnl_reply(err, RTM_SETDCB, DCB_CMD_IEEE_SET, DCB_ATTR_IEEE, + pid, seq, flags); + dcbnl_notify(netdev, RTM_SETDCB, DCB_CMD_IEEE_SET, seq, 0); + return err; } +static int dcbnl_ieee_get(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags) +{ + struct net *net = dev_net(netdev); + struct sk_buff *skb; + struct nlmsghdr *nlh; + struct dcbmsg *dcb; + const struct dcbnl_rtnl_ops *ops = netdev->dcbnl_ops; + int err; + + if (!ops) + return -EOPNOTSUPP; + + skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!skb) + return -ENOBUFS; + + nlh = nlmsg_put(skb, pid, seq, RTM_GETDCB, sizeof(*dcb), flags); + if (nlh == NULL) { + nlmsg_free(skb); + return -EMSGSIZE; + } + + dcb = NLMSG_DATA(nlh); + dcb->dcb_family = AF_UNSPEC; + dcb->cmd = DCB_CMD_IEEE_GET; + + err = dcbnl_ieee_fill(skb, netdev); + + if (err < 0) { + nlmsg_cancel(skb, nlh); + kfree_skb(skb); + } else { + nlmsg_end(skb, nlh); + err = rtnl_unicast(skb, net, pid); + } + + return err; +} + +static int dcbnl_ieee_del(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags) +{ + const struct dcbnl_rtnl_ops *ops = netdev->dcbnl_ops; + struct nlattr *ieee[DCB_ATTR_IEEE_MAX + 1]; + int err = -EOPNOTSUPP; + + if (!ops) + return -EOPNOTSUPP; + + if (!tb[DCB_ATTR_IEEE]) + return -EINVAL; + + err = nla_parse_nested(ieee, DCB_ATTR_IEEE_MAX, + tb[DCB_ATTR_IEEE], dcbnl_ieee_policy); + if (err) + return err; + + if (ieee[DCB_ATTR_IEEE_APP_TABLE]) { + struct nlattr *attr; + int rem; + + nla_for_each_nested(attr, ieee[DCB_ATTR_IEEE_APP_TABLE], rem) { + struct dcb_app *app_data; + + if (nla_type(attr) != DCB_ATTR_IEEE_APP) + continue; + app_data = nla_data(attr); + if (ops->ieee_delapp) + err = ops->ieee_delapp(netdev, app_data); + else + err = dcb_ieee_delapp(netdev, app_data); + if (err) + goto err; + } + } + +err: + dcbnl_reply(err, RTM_SETDCB, DCB_CMD_IEEE_DEL, DCB_ATTR_IEEE, + pid, seq, flags); + dcbnl_notify(netdev, RTM_SETDCB, DCB_CMD_IEEE_DEL, seq, 0); + return err; +} + + /* DCBX configuration */ static int dcbnl_getdcbx(struct net_device *netdev, struct nlattr **tb, u32 pid, u32 seq, u16 flags) @@ -1690,11 +1814,15 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) goto out; case DCB_CMD_IEEE_SET: ret = dcbnl_ieee_set(netdev, tb, pid, nlh->nlmsg_seq, - nlh->nlmsg_flags); + nlh->nlmsg_flags); goto out; case DCB_CMD_IEEE_GET: ret = dcbnl_ieee_get(netdev, tb, pid, nlh->nlmsg_seq, - nlh->nlmsg_flags); + nlh->nlmsg_flags); + goto out; + case DCB_CMD_IEEE_DEL: + ret = dcbnl_ieee_del(netdev, tb, pid, nlh->nlmsg_seq, + nlh->nlmsg_flags); goto out; case DCB_CMD_GDCBX: ret = dcbnl_getdcbx(netdev, tb, pid, nlh->nlmsg_seq, @@ -1754,12 +1882,13 @@ u8 dcb_getapp(struct net_device *dev, struct dcb_app *app) EXPORT_SYMBOL(dcb_getapp); /** - * ixgbe_dcbnl_setapp - add dcb application data to app list + * dcb_setapp - add CEE dcb application data to app list * - * Priority 0 is the default priority this removes applications - * from the app list if the priority is set to zero. + * Priority 0 is an invalid priority in CEE spec. This routine + * removes applications from the app list if the priority is + * set to zero. */ -u8 dcb_setapp(struct net_device *dev, struct dcb_app *new) +int dcb_setapp(struct net_device *dev, struct dcb_app *new) { struct dcb_app_type *itr; struct dcb_app_type event; @@ -1802,6 +1931,114 @@ out: } EXPORT_SYMBOL(dcb_setapp); +/** + * dcb_ieee_getapp_mask - retrieve the IEEE DCB application priority + * + * Helper routine which on success returns a non-zero 802.1Qaz user + * priority bitmap otherwise returns 0 to indicate the dcb_app was + * not found in APP list. + */ +u8 dcb_ieee_getapp_mask(struct net_device *dev, struct dcb_app *app) +{ + struct dcb_app_type *itr; + u8 prio = 0; + + spin_lock(&dcb_lock); + list_for_each_entry(itr, &dcb_app_list, list) { + if (itr->app.selector == app->selector && + itr->app.protocol == app->protocol && + (strncmp(itr->name, dev->name, IFNAMSIZ) == 0)) { + prio |= 1 << itr->app.priority; + } + } + spin_unlock(&dcb_lock); + + return prio; +} +EXPORT_SYMBOL(dcb_ieee_getapp_mask); + +/** + * dcb_ieee_setapp - add IEEE dcb application data to app list + * + * This adds Application data to the list. Multiple application + * entries may exists for the same selector and protocol as long + * as the priorities are different. + */ +int dcb_ieee_setapp(struct net_device *dev, struct dcb_app *new) +{ + struct dcb_app_type *itr, *entry; + struct dcb_app_type event; + int err = 0; + + memcpy(&event.name, dev->name, sizeof(event.name)); + memcpy(&event.app, new, sizeof(event.app)); + + spin_lock(&dcb_lock); + /* Search for existing match and abort if found */ + list_for_each_entry(itr, &dcb_app_list, list) { + if (itr->app.selector == new->selector && + itr->app.protocol == new->protocol && + itr->app.priority == new->priority && + (strncmp(itr->name, dev->name, IFNAMSIZ) == 0)) { + err = -EEXIST; + goto out; + } + } + + /* App entry does not exist add new entry */ + entry = kmalloc(sizeof(struct dcb_app_type), GFP_ATOMIC); + if (!entry) { + err = -ENOMEM; + goto out; + } + + memcpy(&entry->app, new, sizeof(*new)); + strncpy(entry->name, dev->name, IFNAMSIZ); + list_add(&entry->list, &dcb_app_list); +out: + spin_unlock(&dcb_lock); + if (!err) + call_dcbevent_notifiers(DCB_APP_EVENT, &event); + return err; +} +EXPORT_SYMBOL(dcb_ieee_setapp); + +/** + * dcb_ieee_delapp - delete IEEE dcb application data from list + * + * This removes a matching APP data from the APP list + */ +int dcb_ieee_delapp(struct net_device *dev, struct dcb_app *del) +{ + struct dcb_app_type *itr; + struct dcb_app_type event; + int err = -ENOENT; + + memcpy(&event.name, dev->name, sizeof(event.name)); + memcpy(&event.app, del, sizeof(event.app)); + + spin_lock(&dcb_lock); + /* Search for existing match and remove it. */ + list_for_each_entry(itr, &dcb_app_list, list) { + if (itr->app.selector == del->selector && + itr->app.protocol == del->protocol && + itr->app.priority == del->priority && + (strncmp(itr->name, dev->name, IFNAMSIZ) == 0)) { + list_del(&itr->list); + kfree(itr); + err = 0; + goto out; + } + } + +out: + spin_unlock(&dcb_lock); + if (!err) + call_dcbevent_notifiers(DCB_APP_EVENT, &event); + return err; +} +EXPORT_SYMBOL(dcb_ieee_delapp); + static void dcb_flushapp(void) { struct dcb_app_type *app; @@ -1819,8 +2056,8 @@ static int __init dcbnl_init(void) { INIT_LIST_HEAD(&dcb_app_list); - rtnl_register(PF_UNSPEC, RTM_GETDCB, dcb_doit, NULL); - rtnl_register(PF_UNSPEC, RTM_SETDCB, dcb_doit, NULL); + rtnl_register(PF_UNSPEC, RTM_GETDCB, dcb_doit, NULL, NULL); + rtnl_register(PF_UNSPEC, RTM_SETDCB, dcb_doit, NULL, NULL); return 0; } diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index cf26ac7..48530b4 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -1313,7 +1313,7 @@ static void *dn_dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) ++*pos; - dev = (struct net_device *)v; + dev = v; if (v == SEQ_START_TOKEN) dev = net_device_entry(&init_net.dev_base_head); @@ -1414,9 +1414,9 @@ void __init dn_dev_init(void) dn_dev_devices_on(); - rtnl_register(PF_DECnet, RTM_NEWADDR, dn_nl_newaddr, NULL); - rtnl_register(PF_DECnet, RTM_DELADDR, dn_nl_deladdr, NULL); - rtnl_register(PF_DECnet, RTM_GETADDR, NULL, dn_nl_dump_ifaddr); + rtnl_register(PF_DECnet, RTM_NEWADDR, dn_nl_newaddr, NULL, NULL); + rtnl_register(PF_DECnet, RTM_DELADDR, dn_nl_deladdr, NULL, NULL); + rtnl_register(PF_DECnet, RTM_GETADDR, NULL, dn_nl_dump_ifaddr, NULL); proc_net_fops_create(&init_net, "decnet_dev", S_IRUGO, &dn_dev_seq_fops); diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c index 1c74ed3..104324d 100644 --- a/net/decnet/dn_fib.c +++ b/net/decnet/dn_fib.c @@ -763,8 +763,8 @@ void __init dn_fib_init(void) register_dnaddr_notifier(&dn_fib_dnaddr_notifier); - rtnl_register(PF_DECnet, RTM_NEWROUTE, dn_fib_rtm_newroute, NULL); - rtnl_register(PF_DECnet, RTM_DELROUTE, dn_fib_rtm_delroute, NULL); + rtnl_register(PF_DECnet, RTM_NEWROUTE, dn_fib_rtm_newroute, NULL, NULL); + rtnl_register(PF_DECnet, RTM_DELROUTE, dn_fib_rtm_delroute, NULL, NULL); } diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 74544bc..2949ca4 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -1841,10 +1841,11 @@ void __init dn_route_init(void) proc_net_fops_create(&init_net, "decnet_cache", S_IRUGO, &dn_rt_cache_seq_fops); #ifdef CONFIG_DECNET_ROUTER - rtnl_register(PF_DECnet, RTM_GETROUTE, dn_cache_getroute, dn_fib_dump); + rtnl_register(PF_DECnet, RTM_GETROUTE, dn_cache_getroute, + dn_fib_dump, NULL); #else rtnl_register(PF_DECnet, RTM_GETROUTE, dn_cache_getroute, - dn_cache_dump); + dn_cache_dump, NULL); #endif } diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c index cfa7a5e..fa000d2 100644 --- a/net/dns_resolver/dns_key.c +++ b/net/dns_resolver/dns_key.c @@ -212,10 +212,12 @@ static void dns_resolver_describe(const struct key *key, struct seq_file *m) int err = key->type_data.x[0]; seq_puts(m, key->description); - if (err) - seq_printf(m, ": %d", err); - else - seq_printf(m, ": %u", key->datalen); + if (key_is_instantiated(key)) { + if (err) + seq_printf(m, ": %d", err); + else + seq_printf(m, ": %u", key->datalen); + } } /* diff --git a/net/ieee802154/nl-phy.c b/net/ieee802154/nl-phy.c index ed0eab3..02548b2 100644 --- a/net/ieee802154/nl-phy.c +++ b/net/ieee802154/nl-phy.c @@ -44,7 +44,7 @@ static int ieee802154_nl_fill_phy(struct sk_buff *msg, u32 pid, pr_debug("%s\n", __func__); if (!buf) - goto out; + return -EMSGSIZE; hdr = genlmsg_put(msg, 0, seq, &nl802154_family, flags, IEEE802154_LIST_PHY); @@ -65,6 +65,7 @@ static int ieee802154_nl_fill_phy(struct sk_buff *msg, u32 pid, pages * sizeof(uint32_t), buf); mutex_unlock(&phy->pib_lock); + kfree(buf); return genlmsg_end(msg, hdr); nla_put_failure: diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index cc14631..0600f0f 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -465,6 +465,9 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (addr_len < sizeof(struct sockaddr_in)) goto out; + if (addr->sin_family != AF_INET) + goto out; + chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr); /* Not specified by any standard per-se, however it breaks too @@ -673,6 +676,7 @@ int inet_accept(struct socket *sock, struct socket *newsock, int flags) lock_sock(sk2); + sock_rps_record_flow(sk2); WARN_ON(!((1 << sk2->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_CLOSE))); @@ -1434,11 +1438,11 @@ EXPORT_SYMBOL_GPL(inet_ctl_sock_create); unsigned long snmp_fold_field(void __percpu *mib[], int offt) { unsigned long res = 0; - int i; + int i, j; for_each_possible_cpu(i) { - res += *(((unsigned long *) per_cpu_ptr(mib[0], i)) + offt); - res += *(((unsigned long *) per_cpu_ptr(mib[1], i)) + offt); + for (j = 0; j < SNMP_ARRAY_SZ; j++) + res += *(((unsigned long *) per_cpu_ptr(mib[j], i)) + offt); } return res; } @@ -1452,28 +1456,19 @@ u64 snmp_fold_field64(void __percpu *mib[], int offt, size_t syncp_offset) int cpu; for_each_possible_cpu(cpu) { - void *bhptr, *userptr; + void *bhptr; struct u64_stats_sync *syncp; - u64 v_bh, v_user; + u64 v; unsigned int start; - /* first mib used by softirq context, we must use _bh() accessors */ - bhptr = per_cpu_ptr(SNMP_STAT_BHPTR(mib), cpu); + bhptr = per_cpu_ptr(mib[0], cpu); syncp = (struct u64_stats_sync *)(bhptr + syncp_offset); do { start = u64_stats_fetch_begin_bh(syncp); - v_bh = *(((u64 *) bhptr) + offt); + v = *(((u64 *) bhptr) + offt); } while (u64_stats_fetch_retry_bh(syncp, start)); - /* second mib used in USER context */ - userptr = per_cpu_ptr(SNMP_STAT_USRPTR(mib), cpu); - syncp = (struct u64_stats_sync *)(userptr + syncp_offset); - do { - start = u64_stats_fetch_begin(syncp); - v_user = *(((u64 *) userptr) + offt); - } while (u64_stats_fetch_retry(syncp, start)); - - res += v_bh + v_user; + res += v; } return res; } @@ -1485,25 +1480,28 @@ int snmp_mib_init(void __percpu *ptr[2], size_t mibsize, size_t align) BUG_ON(ptr == NULL); ptr[0] = __alloc_percpu(mibsize, align); if (!ptr[0]) - goto err0; + return -ENOMEM; +#if SNMP_ARRAY_SZ == 2 ptr[1] = __alloc_percpu(mibsize, align); - if (!ptr[1]) - goto err1; + if (!ptr[1]) { + free_percpu(ptr[0]); + ptr[0] = NULL; + return -ENOMEM; + } +#endif return 0; -err1: - free_percpu(ptr[0]); - ptr[0] = NULL; -err0: - return -ENOMEM; } EXPORT_SYMBOL_GPL(snmp_mib_init); -void snmp_mib_free(void __percpu *ptr[2]) +void snmp_mib_free(void __percpu *ptr[SNMP_ARRAY_SZ]) { + int i; + BUG_ON(ptr == NULL); - free_percpu(ptr[0]); - free_percpu(ptr[1]); - ptr[0] = ptr[1] = NULL; + for (i = 0; i < SNMP_ARRAY_SZ; i++) { + free_percpu(ptr[i]); + ptr[i] = NULL; + } } EXPORT_SYMBOL_GPL(snmp_mib_free); diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 0d4a184..37b3c18 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1833,8 +1833,8 @@ void __init devinet_init(void) rtnl_af_register(&inet_af_ops); - rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL); - rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL); - rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr); + rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL); + rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL); + rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL); } diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 2252471..92fc5f6 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -1124,9 +1124,9 @@ static struct pernet_operations fib_net_ops = { void __init ip_fib_init(void) { - rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL); - rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL); - rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib); + rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL, NULL); + rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL, NULL); + rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib, NULL); register_pernet_subsys(&fib_net_ops); register_netdevice_notifier(&fib_netdev_notifier); diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c index c6933f2..9dbe108 100644 --- a/net/ipv4/gre.c +++ b/net/ipv4/gre.c @@ -16,7 +16,6 @@ #include <linux/skbuff.h> #include <linux/in.h> #include <linux/netdevice.h> -#include <linux/version.h> #include <linux/spinlock.h> #include <net/protocol.h> #include <net/gre.h> diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 61fac4c..c14d88a 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -33,7 +33,7 @@ EXPORT_SYMBOL(inet_csk_timer_bug_msg); * This struct holds the first and last local port number. */ struct local_ports sysctl_local_ports __read_mostly = { - .lock = SEQLOCK_UNLOCKED, + .lock = __SEQLOCK_UNLOCKED(sysctl_local_ports.lock), .range = { 32768, 61000 }, }; diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 6ffe94c..389a2e6 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -437,7 +437,7 @@ static int valid_cc(const void *bc, int len, int cc) return 0; if (cc == len) return 1; - if (op->yes < 4) + if (op->yes < 4 || op->yes & 3) return 0; len -= op->yes; bc += op->yes; @@ -447,11 +447,11 @@ static int valid_cc(const void *bc, int len, int cc) static int inet_diag_bc_audit(const void *bytecode, int bytecode_len) { - const unsigned char *bc = bytecode; + const void *bc = bytecode; int len = bytecode_len; while (len > 0) { - struct inet_diag_bc_op *op = (struct inet_diag_bc_op *)bc; + const struct inet_diag_bc_op *op = bc; //printk("BC: %d %d %d {%d} / %d\n", op->code, op->yes, op->no, op[1].no, len); switch (op->code) { @@ -462,22 +462,20 @@ static int inet_diag_bc_audit(const void *bytecode, int bytecode_len) case INET_DIAG_BC_S_LE: case INET_DIAG_BC_D_GE: case INET_DIAG_BC_D_LE: - if (op->yes < 4 || op->yes > len + 4) - return -EINVAL; case INET_DIAG_BC_JMP: - if (op->no < 4 || op->no > len + 4) + if (op->no < 4 || op->no > len + 4 || op->no & 3) return -EINVAL; if (op->no < len && !valid_cc(bytecode, bytecode_len, len - op->no)) return -EINVAL; break; case INET_DIAG_BC_NOP: - if (op->yes < 4 || op->yes > len + 4) - return -EINVAL; break; default: return -EINVAL; } + if (op->yes < 4 || op->yes > len + 4 || op->yes & 3) + return -EINVAL; bc += op->yes; len -= op->yes; } @@ -871,7 +869,7 @@ static int inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) } return netlink_dump_start(idiagnl, skb, nlh, - inet_diag_dump, NULL); + inet_diag_dump, NULL, 0); } return inet_diag_get_exact(skb, nlh); diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index ce616d9..dafbf2c 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -54,15 +54,11 @@ * 1. Nodes may appear in the tree only with the pool lock held. * 2. Nodes may disappear from the tree only with the pool lock held * AND reference count being 0. - * 3. Nodes appears and disappears from unused node list only under - * "inet_peer_unused_lock". - * 4. Global variable peer_total is modified under the pool lock. - * 5. struct inet_peer fields modification: + * 3. Global variable peer_total is modified under the pool lock. + * 4. struct inet_peer fields modification: * avl_left, avl_right, avl_parent, avl_height: pool lock - * unused: unused node list lock * refcnt: atomically against modifications on other CPU; * usually under some other lock to prevent node disappearing - * dtime: unused node list lock * daddr: unchangeable * ip_id_count: atomic value (no lock needed) */ @@ -104,19 +100,6 @@ int inet_peer_threshold __read_mostly = 65536 + 128; /* start to throw entries m * aggressively at this stage */ int inet_peer_minttl __read_mostly = 120 * HZ; /* TTL under high load: 120 sec */ int inet_peer_maxttl __read_mostly = 10 * 60 * HZ; /* usual time to live: 10 min */ -int inet_peer_gc_mintime __read_mostly = 10 * HZ; -int inet_peer_gc_maxtime __read_mostly = 120 * HZ; - -static struct { - struct list_head list; - spinlock_t lock; -} unused_peers = { - .list = LIST_HEAD_INIT(unused_peers.list), - .lock = __SPIN_LOCK_UNLOCKED(unused_peers.lock), -}; - -static void peer_check_expire(unsigned long dummy); -static DEFINE_TIMER(peer_periodic_timer, peer_check_expire, 0, 0); /* Called from ip_output.c:ip_init */ @@ -142,21 +125,6 @@ void __init inet_initpeers(void) 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); - /* All the timers, started at system startup tend - to synchronize. Perturb it a bit. - */ - peer_periodic_timer.expires = jiffies - + net_random() % inet_peer_gc_maxtime - + inet_peer_gc_maxtime; - add_timer(&peer_periodic_timer); -} - -/* Called with or without local BH being disabled. */ -static void unlink_from_unused(struct inet_peer *p) -{ - spin_lock_bh(&unused_peers.lock); - list_del_init(&p->unused); - spin_unlock_bh(&unused_peers.lock); } static int addr_compare(const struct inetpeer_addr *a, @@ -203,20 +171,6 @@ static int addr_compare(const struct inetpeer_addr *a, u; \ }) -static bool atomic_add_unless_return(atomic_t *ptr, int a, int u, int *newv) -{ - int cur, old = atomic_read(ptr); - - while (old != u) { - *newv = old + a; - cur = atomic_cmpxchg(ptr, old, *newv); - if (cur == old) - return true; - old = cur; - } - return false; -} - /* * Called with rcu_read_lock() * Because we hold no lock against a writer, its quite possible we fall @@ -225,8 +179,7 @@ static bool atomic_add_unless_return(atomic_t *ptr, int a, int u, int *newv) * We exit from this function if number of links exceeds PEER_MAXDEPTH */ static struct inet_peer *lookup_rcu(const struct inetpeer_addr *daddr, - struct inet_peer_base *base, - int *newrefcnt) + struct inet_peer_base *base) { struct inet_peer *u = rcu_dereference(base->root); int count = 0; @@ -235,11 +188,9 @@ static struct inet_peer *lookup_rcu(const struct inetpeer_addr *daddr, int cmp = addr_compare(daddr, &u->daddr); if (cmp == 0) { /* Before taking a reference, check if this entry was - * deleted, unlink_from_pool() sets refcnt=-1 to make - * distinction between an unused entry (refcnt=0) and - * a freed one. + * deleted (refcnt=-1) */ - if (!atomic_add_unless_return(&u->refcnt, 1, -1, newrefcnt)) + if (!atomic_add_unless(&u->refcnt, 1, -1)) u = NULL; return u; } @@ -366,137 +317,96 @@ static void inetpeer_free_rcu(struct rcu_head *head) kmem_cache_free(peer_cachep, container_of(head, struct inet_peer, rcu)); } -/* May be called with local BH enabled. */ static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base, struct inet_peer __rcu **stack[PEER_MAXDEPTH]) { - int do_free; - - do_free = 0; - - write_seqlock_bh(&base->lock); - /* Check the reference counter. It was artificially incremented by 1 - * in cleanup() function to prevent sudden disappearing. If we can - * atomically (because of lockless readers) take this last reference, - * it's safe to remove the node and free it later. - * We use refcnt=-1 to alert lockless readers this entry is deleted. - */ - if (atomic_cmpxchg(&p->refcnt, 1, -1) == 1) { - struct inet_peer __rcu ***stackptr, ***delp; - if (lookup(&p->daddr, stack, base) != p) - BUG(); - delp = stackptr - 1; /* *delp[0] == p */ - if (p->avl_left == peer_avl_empty_rcu) { - *delp[0] = p->avl_right; - --stackptr; - } else { - /* look for a node to insert instead of p */ - struct inet_peer *t; - t = lookup_rightempty(p, base); - BUG_ON(rcu_deref_locked(*stackptr[-1], base) != t); - **--stackptr = t->avl_left; - /* t is removed, t->daddr > x->daddr for any - * x in p->avl_left subtree. - * Put t in the old place of p. */ - RCU_INIT_POINTER(*delp[0], t); - t->avl_left = p->avl_left; - t->avl_right = p->avl_right; - t->avl_height = p->avl_height; - BUG_ON(delp[1] != &p->avl_left); - delp[1] = &t->avl_left; /* was &p->avl_left */ - } - peer_avl_rebalance(stack, stackptr, base); - base->total--; - do_free = 1; + struct inet_peer __rcu ***stackptr, ***delp; + + if (lookup(&p->daddr, stack, base) != p) + BUG(); + delp = stackptr - 1; /* *delp[0] == p */ + if (p->avl_left == peer_avl_empty_rcu) { + *delp[0] = p->avl_right; + --stackptr; + } else { + /* look for a node to insert instead of p */ + struct inet_peer *t; + t = lookup_rightempty(p, base); + BUG_ON(rcu_deref_locked(*stackptr[-1], base) != t); + **--stackptr = t->avl_left; + /* t is removed, t->daddr > x->daddr for any + * x in p->avl_left subtree. + * Put t in the old place of p. */ + RCU_INIT_POINTER(*delp[0], t); + t->avl_left = p->avl_left; + t->avl_right = p->avl_right; + t->avl_height = p->avl_height; + BUG_ON(delp[1] != &p->avl_left); + delp[1] = &t->avl_left; /* was &p->avl_left */ } - write_sequnlock_bh(&base->lock); - - if (do_free) - call_rcu(&p->rcu, inetpeer_free_rcu); - else - /* The node is used again. Decrease the reference counter - * back. The loop "cleanup -> unlink_from_unused - * -> unlink_from_pool -> putpeer -> link_to_unused - * -> cleanup (for the same node)" - * doesn't really exist because the entry will have a - * recent deletion time and will not be cleaned again soon. - */ - inet_putpeer(p); + peer_avl_rebalance(stack, stackptr, base); + base->total--; + call_rcu(&p->rcu, inetpeer_free_rcu); } static struct inet_peer_base *family_to_base(int family) { - return (family == AF_INET ? &v4_peers : &v6_peers); + return family == AF_INET ? &v4_peers : &v6_peers; } -static struct inet_peer_base *peer_to_base(struct inet_peer *p) +/* perform garbage collect on all items stacked during a lookup */ +static int inet_peer_gc(struct inet_peer_base *base, + struct inet_peer __rcu **stack[PEER_MAXDEPTH], + struct inet_peer __rcu ***stackptr) { - return family_to_base(p->daddr.family); -} - -/* May be called with local BH enabled. */ -static int cleanup_once(unsigned long ttl, struct inet_peer __rcu **stack[PEER_MAXDEPTH]) -{ - struct inet_peer *p = NULL; - - /* Remove the first entry from the list of unused nodes. */ - spin_lock_bh(&unused_peers.lock); - if (!list_empty(&unused_peers.list)) { - __u32 delta; + struct inet_peer *p, *gchead = NULL; + __u32 delta, ttl; + int cnt = 0; - p = list_first_entry(&unused_peers.list, struct inet_peer, unused); + if (base->total >= inet_peer_threshold) + ttl = 0; /* be aggressive */ + else + ttl = inet_peer_maxttl + - (inet_peer_maxttl - inet_peer_minttl) / HZ * + base->total / inet_peer_threshold * HZ; + stackptr--; /* last stack slot is peer_avl_empty */ + while (stackptr > stack) { + stackptr--; + p = rcu_deref_locked(**stackptr, base); delta = (__u32)jiffies - p->dtime; - - if (delta < ttl) { - /* Do not prune fresh entries. */ - spin_unlock_bh(&unused_peers.lock); - return -1; + if (atomic_read(&p->refcnt) == 0 && delta >= ttl && + atomic_cmpxchg(&p->refcnt, 0, -1) == 0) { + p->gc_next = gchead; + gchead = p; } - - list_del_init(&p->unused); - - /* Grab an extra reference to prevent node disappearing - * before unlink_from_pool() call. */ - atomic_inc(&p->refcnt); } - spin_unlock_bh(&unused_peers.lock); - - if (p == NULL) - /* It means that the total number of USED entries has - * grown over inet_peer_threshold. It shouldn't really - * happen because of entry limits in route cache. */ - return -1; - - unlink_from_pool(p, peer_to_base(p), stack); - return 0; + while ((p = gchead) != NULL) { + gchead = p->gc_next; + cnt++; + unlink_from_pool(p, base, stack); + } + return cnt; } -/* Called with or without local BH being disabled. */ struct inet_peer *inet_getpeer(struct inetpeer_addr *daddr, int create) { struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr; struct inet_peer_base *base = family_to_base(daddr->family); struct inet_peer *p; unsigned int sequence; - int invalidated, newrefcnt = 0; + int invalidated, gccnt = 0; - /* Look up for the address quickly, lockless. + /* Attempt a lockless lookup first. * Because of a concurrent writer, we might not find an existing entry. */ rcu_read_lock(); sequence = read_seqbegin(&base->lock); - p = lookup_rcu(daddr, base, &newrefcnt); + p = lookup_rcu(daddr, base); invalidated = read_seqretry(&base->lock, sequence); rcu_read_unlock(); - if (p) { -found: /* The existing node has been found. - * Remove the entry from unused list if it was there. - */ - if (newrefcnt == 1) - unlink_from_unused(p); + if (p) return p; - } /* If no writer did a change during our lookup, we can return early. */ if (!create && !invalidated) @@ -506,11 +416,17 @@ found: /* The existing node has been found. * At least, nodes should be hot in our cache. */ write_seqlock_bh(&base->lock); +relookup: p = lookup(daddr, stack, base); if (p != peer_avl_empty) { - newrefcnt = atomic_inc_return(&p->refcnt); + atomic_inc(&p->refcnt); write_sequnlock_bh(&base->lock); - goto found; + return p; + } + if (!gccnt) { + gccnt = inet_peer_gc(base, stack, stackptr); + if (gccnt && create) + goto relookup; } p = create ? kmem_cache_alloc(peer_cachep, GFP_ATOMIC) : NULL; if (p) { @@ -525,7 +441,6 @@ found: /* The existing node has been found. p->pmtu_expires = 0; p->pmtu_orig = 0; memset(&p->redirect_learned, 0, sizeof(p->redirect_learned)); - INIT_LIST_HEAD(&p->unused); /* Link the node. */ @@ -534,63 +449,14 @@ found: /* The existing node has been found. } write_sequnlock_bh(&base->lock); - if (base->total >= inet_peer_threshold) - /* Remove one less-recently-used entry. */ - cleanup_once(0, stack); - return p; } - -static int compute_total(void) -{ - return v4_peers.total + v6_peers.total; -} EXPORT_SYMBOL_GPL(inet_getpeer); -/* Called with local BH disabled. */ -static void peer_check_expire(unsigned long dummy) -{ - unsigned long now = jiffies; - int ttl, total; - struct inet_peer __rcu **stack[PEER_MAXDEPTH]; - - total = compute_total(); - if (total >= inet_peer_threshold) - ttl = inet_peer_minttl; - else - ttl = inet_peer_maxttl - - (inet_peer_maxttl - inet_peer_minttl) / HZ * - total / inet_peer_threshold * HZ; - while (!cleanup_once(ttl, stack)) { - if (jiffies != now) - break; - } - - /* Trigger the timer after inet_peer_gc_mintime .. inet_peer_gc_maxtime - * interval depending on the total number of entries (more entries, - * less interval). */ - total = compute_total(); - if (total >= inet_peer_threshold) - peer_periodic_timer.expires = jiffies + inet_peer_gc_mintime; - else - peer_periodic_timer.expires = jiffies - + inet_peer_gc_maxtime - - (inet_peer_gc_maxtime - inet_peer_gc_mintime) / HZ * - total / inet_peer_threshold * HZ; - add_timer(&peer_periodic_timer); -} - void inet_putpeer(struct inet_peer *p) { - local_bh_disable(); - - if (atomic_dec_and_lock(&p->refcnt, &unused_peers.lock)) { - list_add_tail(&p->unused, &unused_peers.list); - p->dtime = (__u32)jiffies; - spin_unlock(&unused_peers.lock); - } - - local_bh_enable(); + p->dtime = (__u32)jiffies; + atomic_dec(&p->refcnt); } EXPORT_SYMBOL_GPL(inet_putpeer); diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index c8f48ef..073a9b0 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -165,7 +165,7 @@ int ip_call_ra_chain(struct sk_buff *skb) (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dev->ifindex) && net_eq(sock_net(sk), dev_net(dev))) { - if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { + if (ip_is_fragment(ip_hdr(skb))) { if (ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN)) return 1; } @@ -256,7 +256,7 @@ int ip_local_deliver(struct sk_buff *skb) * Reassemble IP fragments. */ - if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { + if (ip_is_fragment(ip_hdr(skb))) { if (ip_defrag(skb, IP_DEFRAG_LOCAL_DELIVER)) return 0; } diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index c3118e1..ec93335 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -14,6 +14,7 @@ #include <linux/slab.h> #include <linux/types.h> #include <asm/uaccess.h> +#include <asm/unaligned.h> #include <linux/skbuff.h> #include <linux/ip.h> #include <linux/icmp.h> @@ -350,7 +351,7 @@ int ip_options_compile(struct net *net, goto error; } if (optptr[2] <= optlen) { - __be32 *timeptr = NULL; + unsigned char *timeptr = NULL; if (optptr[2]+3 > optptr[1]) { pp_ptr = optptr + 2; goto error; @@ -359,7 +360,7 @@ int ip_options_compile(struct net *net, case IPOPT_TS_TSONLY: opt->ts = optptr - iph; if (skb) - timeptr = (__be32*)&optptr[optptr[2]-1]; + timeptr = &optptr[optptr[2]-1]; opt->ts_needtime = 1; optptr[2] += 4; break; @@ -371,7 +372,7 @@ int ip_options_compile(struct net *net, opt->ts = optptr - iph; if (rt) { memcpy(&optptr[optptr[2]-1], &rt->rt_spec_dst, 4); - timeptr = (__be32*)&optptr[optptr[2]+3]; + timeptr = &optptr[optptr[2]+3]; } opt->ts_needaddr = 1; opt->ts_needtime = 1; @@ -389,7 +390,7 @@ int ip_options_compile(struct net *net, if (inet_addr_type(net, addr) == RTN_UNICAST) break; if (skb) - timeptr = (__be32*)&optptr[optptr[2]+3]; + timeptr = &optptr[optptr[2]+3]; } opt->ts_needtime = 1; optptr[2] += 8; @@ -403,10 +404,10 @@ int ip_options_compile(struct net *net, } if (timeptr) { struct timespec tv; - __be32 midtime; + u32 midtime; getnstimeofday(&tv); - midtime = htonl((tv.tv_sec % 86400) * MSEC_PER_SEC + tv.tv_nsec / NSEC_PER_MSEC); - memcpy(timeptr, &midtime, sizeof(__be32)); + midtime = (tv.tv_sec % 86400) * MSEC_PER_SEC + tv.tv_nsec / NSEC_PER_MSEC; + put_unaligned_be32(midtime, timeptr); opt->is_changed = 1; } } else { diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 98af369..167da8b 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -489,7 +489,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) if (first_len - hlen > mtu || ((first_len - hlen) & 7) || - (iph->frag_off & htons(IP_MF|IP_OFFSET)) || + ip_is_fragment(iph) || skb_cloned(skb)) goto slow_path; @@ -799,7 +799,9 @@ static int __ip_append_data(struct sock *sk, int csummode = CHECKSUM_NONE; struct rtable *rt = (struct rtable *)cork->dst; - exthdrlen = transhdrlen ? rt->dst.header_len : 0; + skb = skb_peek_tail(queue); + + exthdrlen = !skb ? rt->dst.header_len : 0; length += exthdrlen; transhdrlen += exthdrlen; mtu = cork->fragsize; @@ -825,8 +827,6 @@ static int __ip_append_data(struct sock *sk, !exthdrlen) csummode = CHECKSUM_PARTIAL; - skb = skb_peek_tail(queue); - cork->length += length; if (((length > mtu) || (skb && skb_is_gso(skb))) && (sk->sk_protocol == IPPROTO_UDP) && diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index ab7e554..ae93dd5 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -932,7 +932,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str goto drop; /* Fragments are not supported */ - if (h->frag_off & htons(IP_OFFSET | IP_MF)) { + if (ip_is_fragment(h)) { if (net_ratelimit()) printk(KERN_ERR "DHCP/BOOTP: Ignoring fragmented " "reply.\n"); diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 30a7763..aae2bd8 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -2544,7 +2544,8 @@ int __init ip_mr_init(void) goto add_proto_fail; } #endif - rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE, NULL, ipmr_rtm_dumproute); + rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE, + NULL, ipmr_rtm_dumproute, NULL); return 0; #ifdef CONFIG_IP_PIMSM_V2 diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index d2c1311..5c9b9d9 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -203,7 +203,8 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp) else pmsg->outdev_name[0] = '\0'; - if (entry->indev && entry->skb->dev) { + if (entry->indev && entry->skb->dev && + entry->skb->mac_header != entry->skb->network_header) { pmsg->hw_type = entry->skb->dev->type; pmsg->hw_addrlen = dev_parse_header(entry->skb, pmsg->hw_addr); @@ -402,7 +403,8 @@ ipq_dev_drop(int ifindex) static inline void __ipq_rcv_skb(struct sk_buff *skb) { - int status, type, pid, flags, nlmsglen, skblen; + int status, type, pid, flags; + unsigned int nlmsglen, skblen; struct nlmsghdr *nlh; skblen = skb->len; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 7647438..24e556e 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -566,7 +566,7 @@ check_entry(const struct ipt_entry *e, const char *name) const struct xt_entry_target *t; if (!ip_checkentry(&e->ip)) { - duprintf("ip check failed %p %s.\n", e, par->match->name); + duprintf("ip check failed %p %s.\n", e, name); return -EINVAL; } diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index d609ac3..5c9e97c 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -307,7 +307,7 @@ clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par) * error messages (RELATED) and information requests (see below) */ if (ip_hdr(skb)->protocol == IPPROTO_ICMP && (ctinfo == IP_CT_RELATED || - ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)) + ctinfo == IP_CT_RELATED_REPLY)) return XT_CONTINUE; /* ip_conntrack_icmp guarantees us that we only have ICMP_ECHO, @@ -321,12 +321,12 @@ clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par) ct->mark = hash; break; case IP_CT_RELATED: - case IP_CT_RELATED+IP_CT_IS_REPLY: + case IP_CT_RELATED_REPLY: /* FIXME: we don't handle expectations at the * moment. they can arrive on a different node than * the master connection (e.g. FTP passive mode) */ case IP_CT_ESTABLISHED: - case IP_CT_ESTABLISHED+IP_CT_IS_REPLY: + case IP_CT_ESTABLISHED_REPLY: break; default: break; diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index d2ed9dc..9931152 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c @@ -60,7 +60,7 @@ masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par) nat = nfct_nat(ct); NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || - ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)); + ctinfo == IP_CT_RELATED_REPLY)); /* Source address is 0.0.0.0 - locally generated packet that is * probably not supposed to be masqueraded. diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c index af6e9c7..2b57e52 100644 --- a/net/ipv4/netfilter/ipt_ecn.c +++ b/net/ipv4/netfilter/ipt_ecn.c @@ -25,7 +25,8 @@ MODULE_LICENSE("GPL"); static inline bool match_ip(const struct sk_buff *skb, const struct ipt_ecn_info *einfo) { - return (ip_hdr(skb)->tos & IPT_ECN_IP_MASK) == einfo->ip_ect; + return ((ip_hdr(skb)->tos & IPT_ECN_IP_MASK) == einfo->ip_ect) ^ + !!(einfo->invert & IPT_ECN_OP_MATCH_IP); } static inline bool match_tcp(const struct sk_buff *skb, @@ -76,8 +77,6 @@ static bool ecn_mt(const struct sk_buff *skb, struct xt_action_param *par) return false; if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR)) { - if (ip_hdr(skb)->protocol != IPPROTO_TCP) - return false; if (!match_tcp(skb, info, &par->hotdrop)) return false; } @@ -97,7 +96,7 @@ static int ecn_mt_check(const struct xt_mtchk_param *par) return -EINVAL; if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR) && - ip->proto != IPPROTO_TCP) { + (ip->proto != IPPROTO_TCP || ip->invflags & IPT_INV_PROTO)) { pr_info("cannot match TCP bits in rule for non-tcp packets\n"); return -EINVAL; } diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 5a03c02..de9da21 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -101,7 +101,7 @@ static unsigned int ipv4_confirm(unsigned int hooknum, /* This is where we call the helper: as the packet goes out. */ ct = nf_ct_get(skb, &ctinfo); - if (!ct || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY) + if (!ct || ctinfo == IP_CT_RELATED_REPLY) goto out; help = nfct_help(ct); @@ -121,7 +121,9 @@ static unsigned int ipv4_confirm(unsigned int hooknum, return ret; } - if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status)) { + /* adjust seqs for loopback traffic only in outgoing direction */ + if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) && + !nf_is_loopback_packet(skb)) { typeof(nf_nat_seq_adjust_hook) seq_adjust; seq_adjust = rcu_dereference(nf_nat_seq_adjust_hook); diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 7404bde..ab5b27a 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -160,7 +160,7 @@ icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, /* Update skb to refer to this connection */ skb->nfct = &nf_ct_tuplehash_to_ctrack(h)->ct_general; skb->nfctinfo = *ctinfo; - return -NF_ACCEPT; + return NF_ACCEPT; } /* Small and modified version of icmp_rcv */ diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c index f3a9b42..9bb1b8a 100644 --- a/net/ipv4/netfilter/nf_defrag_ipv4.c +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c @@ -82,7 +82,7 @@ static unsigned int ipv4_conntrack_defrag(unsigned int hooknum, #endif #endif /* Gather fragments. */ - if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { + if (ip_is_fragment(ip_hdr(skb))) { enum ip_defrag_users user = nf_ct_defrag_user(hooknum, skb); if (nf_ct_ipv4_gather_frags(skb, user)) return NF_STOLEN; diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index 9c71b27..3346de5 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c @@ -433,7 +433,7 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct, /* Must be RELATED */ NF_CT_ASSERT(skb->nfctinfo == IP_CT_RELATED || - skb->nfctinfo == IP_CT_RELATED+IP_CT_IS_REPLY); + skb->nfctinfo == IP_CT_RELATED_REPLY); /* Redirects on non-null nats must be dropped, else they'll start talking to each other without our translation, and be diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c index 99cfa28..ebc5f88 100644 --- a/net/ipv4/netfilter/nf_nat_helper.c +++ b/net/ipv4/netfilter/nf_nat_helper.c @@ -160,7 +160,7 @@ static void nf_nat_csum(struct sk_buff *skb, const struct iphdr *iph, void *data if (skb->ip_summed != CHECKSUM_PARTIAL) { if (!(rt->rt_flags & RTCF_LOCAL) && - skb->dev->features & NETIF_F_V4_CSUM) { + (!skb->dev || skb->dev->features & NETIF_F_V4_CSUM)) { skb->ip_summed = CHECKSUM_PARTIAL; skb->csum_start = skb_headroom(skb) + skb_network_offset(skb) + diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c index 21c3042..733c9ab 100644 --- a/net/ipv4/netfilter/nf_nat_rule.c +++ b/net/ipv4/netfilter/nf_nat_rule.c @@ -53,7 +53,7 @@ ipt_snat_target(struct sk_buff *skb, const struct xt_action_param *par) /* Connection must be valid and new. */ NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || - ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)); + ctinfo == IP_CT_RELATED_REPLY)); NF_CT_ASSERT(par->out != NULL); return nf_nat_setup_info(ct, &mr->range[0], IP_NAT_MANIP_SRC); diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c index 7317bdf..a6e606e 100644 --- a/net/ipv4/netfilter/nf_nat_standalone.c +++ b/net/ipv4/netfilter/nf_nat_standalone.c @@ -88,7 +88,7 @@ nf_nat_fn(unsigned int hooknum, /* We never see fragments: conntrack defrags on pre-routing and local-out, and nf_nat_out protects post-routing. */ - NF_CT_ASSERT(!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET))); + NF_CT_ASSERT(!ip_is_fragment(ip_hdr(skb))); ct = nf_ct_get(skb, &ctinfo); /* Can't track? It's not due to stress, or conntrack would @@ -116,7 +116,7 @@ nf_nat_fn(unsigned int hooknum, switch (ctinfo) { case IP_CT_RELATED: - case IP_CT_RELATED+IP_CT_IS_REPLY: + case IP_CT_RELATED_REPLY: if (ip_hdr(skb)->protocol == IPPROTO_ICMP) { if (!nf_nat_icmp_reply_translation(ct, ctinfo, hooknum, skb)) @@ -144,7 +144,7 @@ nf_nat_fn(unsigned int hooknum, default: /* ESTABLISHED */ NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || - ctinfo == (IP_CT_ESTABLISHED+IP_CT_IS_REPLY)); + ctinfo == IP_CT_ESTABLISHED_REPLY); } return nf_nat_packet(ct, ctinfo, hooknum, skb); diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 9aaa671..39b403f 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -41,7 +41,6 @@ #include <linux/proc_fs.h> #include <net/sock.h> #include <net/ping.h> -#include <net/icmp.h> #include <net/udp.h> #include <net/route.h> #include <net/inet_common.h> diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 52b0b95..f24c335 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1316,6 +1316,23 @@ reject_redirect: ; } +static bool peer_pmtu_expired(struct inet_peer *peer) +{ + unsigned long orig = ACCESS_ONCE(peer->pmtu_expires); + + return orig && + time_after_eq(jiffies, orig) && + cmpxchg(&peer->pmtu_expires, orig, 0) == orig; +} + +static bool peer_pmtu_cleaned(struct inet_peer *peer) +{ + unsigned long orig = ACCESS_ONCE(peer->pmtu_expires); + + return orig && + cmpxchg(&peer->pmtu_expires, orig, 0) == orig; +} + static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) { struct rtable *rt = (struct rtable *)dst; @@ -1331,14 +1348,8 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) rt_genid(dev_net(dst->dev))); rt_del(hash, rt); ret = NULL; - } else if (rt->peer && - rt->peer->pmtu_expires && - time_after_eq(jiffies, rt->peer->pmtu_expires)) { - unsigned long orig = rt->peer->pmtu_expires; - - if (cmpxchg(&rt->peer->pmtu_expires, orig, 0) == orig) - dst_metric_set(dst, RTAX_MTU, - rt->peer->pmtu_orig); + } else if (rt->peer && peer_pmtu_expired(rt->peer)) { + dst_metric_set(dst, RTAX_MTU, rt->peer->pmtu_orig); } } return ret; @@ -1531,8 +1542,10 @@ unsigned short ip_rt_frag_needed(struct net *net, const struct iphdr *iph, static void check_peer_pmtu(struct dst_entry *dst, struct inet_peer *peer) { - unsigned long expires = peer->pmtu_expires; + unsigned long expires = ACCESS_ONCE(peer->pmtu_expires); + if (!expires) + return; if (time_before(jiffies, expires)) { u32 orig_dst_mtu = dst_mtu(dst); if (peer->pmtu_learned < orig_dst_mtu) { @@ -1555,10 +1568,11 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) rt_bind_peer(rt, rt->rt_dst, 1); peer = rt->peer; if (peer) { + unsigned long pmtu_expires = ACCESS_ONCE(peer->pmtu_expires); + if (mtu < ip_rt_min_pmtu) mtu = ip_rt_min_pmtu; - if (!peer->pmtu_expires || mtu < peer->pmtu_learned) { - unsigned long pmtu_expires; + if (!pmtu_expires || mtu < peer->pmtu_learned) { pmtu_expires = jiffies + ip_rt_mtu_expires; if (!pmtu_expires) @@ -1612,13 +1626,14 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) rt_bind_peer(rt, rt->rt_dst, 0); peer = rt->peer; - if (peer && peer->pmtu_expires) + if (peer) { check_peer_pmtu(dst, peer); - if (peer && peer->redirect_learned.a4 && - peer->redirect_learned.a4 != rt->rt_gateway) { - if (check_peer_redir(dst, peer)) - return NULL; + if (peer->redirect_learned.a4 && + peer->redirect_learned.a4 != rt->rt_gateway) { + if (check_peer_redir(dst, peer)) + return NULL; + } } rt->rt_peer_genid = rt_peer_genid(); @@ -1649,14 +1664,8 @@ static void ipv4_link_failure(struct sk_buff *skb) icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); rt = skb_rtable(skb); - if (rt && - rt->peer && - rt->peer->pmtu_expires) { - unsigned long orig = rt->peer->pmtu_expires; - - if (cmpxchg(&rt->peer->pmtu_expires, orig, 0) == orig) - dst_metric_set(&rt->dst, RTAX_MTU, rt->peer->pmtu_orig); - } + if (rt && rt->peer && peer_pmtu_cleaned(rt->peer)) + dst_metric_set(&rt->dst, RTAX_MTU, rt->peer->pmtu_orig); } static int ip_rt_bug(struct sk_buff *skb) @@ -1770,8 +1779,7 @@ static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4, sizeof(u32) * RTAX_MAX); dst_init_metrics(&rt->dst, peer->metrics, false); - if (peer->pmtu_expires) - check_peer_pmtu(&rt->dst, peer); + check_peer_pmtu(&rt->dst, peer); if (peer->redirect_learned.a4 && peer->redirect_learned.a4 != rt->rt_gateway) { rt->rt_gateway = peer->redirect_learned.a4; @@ -1894,9 +1902,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev))); rth = rt_intern_hash(hash, rth, skb, dev->ifindex); - err = 0; - if (IS_ERR(rth)) - err = PTR_ERR(rth); + return IS_ERR(rth) ? PTR_ERR(rth) : 0; e_nobufs: return -ENOBUFS; @@ -2775,7 +2781,8 @@ static int rt_fill_info(struct net *net, struct rtable *rt = skb_rtable(skb); struct rtmsg *r; struct nlmsghdr *nlh; - long expires; + long expires = 0; + const struct inet_peer *peer = rt->peer; u32 id = 0, ts = 0, tsage = 0, error; nlh = nlmsg_put(skb, pid, seq, event, sizeof(*r), flags); @@ -2823,15 +2830,16 @@ static int rt_fill_info(struct net *net, NLA_PUT_BE32(skb, RTA_MARK, rt->rt_mark); error = rt->dst.error; - expires = (rt->peer && rt->peer->pmtu_expires) ? - rt->peer->pmtu_expires - jiffies : 0; - if (rt->peer) { + if (peer) { inet_peer_refcheck(rt->peer); - id = atomic_read(&rt->peer->ip_id_count) & 0xffff; - if (rt->peer->tcp_ts_stamp) { - ts = rt->peer->tcp_ts; - tsage = get_seconds() - rt->peer->tcp_ts_stamp; + id = atomic_read(&peer->ip_id_count) & 0xffff; + if (peer->tcp_ts_stamp) { + ts = peer->tcp_ts; + tsage = get_seconds() - peer->tcp_ts_stamp; } + expires = ACCESS_ONCE(peer->pmtu_expires); + if (expires) + expires -= jiffies; } if (rt_is_input_route(rt)) { @@ -3295,7 +3303,7 @@ int __init ip_rt_init(void) xfrm_init(); xfrm4_init(ip_rt_max_size); #endif - rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL); + rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL, NULL); #ifdef CONFIG_SYSCTL register_pernet_subsys(&sysctl_route_ops); diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 2646149..92bb943 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -316,6 +316,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, ireq->wscale_ok = tcp_opt.wscale_ok; ireq->tstamp_ok = tcp_opt.saw_tstamp; req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0; + treq->snt_synack = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsecr : 0; /* We throwed the options of the initial SYN away, so we hope * the ACK carries the same options again (see RFC1122 4.2.3.8) diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 57d0752..69fd720 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -398,20 +398,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec_jiffies, }, { - .procname = "inet_peer_gc_mintime", - .data = &inet_peer_gc_mintime, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "inet_peer_gc_maxtime", - .data = &inet_peer_gc_maxtime, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { .procname = "tcp_orphan_retries", .data = &sysctl_tcp_orphan_retries, .maxlen = sizeof(int), diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index bef9f04..ea0d218 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -880,6 +880,11 @@ static void tcp_init_metrics(struct sock *sk) tp->snd_ssthresh = dst_metric(dst, RTAX_SSTHRESH); if (tp->snd_ssthresh > tp->snd_cwnd_clamp) tp->snd_ssthresh = tp->snd_cwnd_clamp; + } else { + /* ssthresh may have been reduced unnecessarily during. + * 3WHS. Restore it back to its initial default. + */ + tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; } if (dst_metric(dst, RTAX_REORDERING) && tp->reordering != dst_metric(dst, RTAX_REORDERING)) { @@ -887,10 +892,7 @@ static void tcp_init_metrics(struct sock *sk) tp->reordering = dst_metric(dst, RTAX_REORDERING); } - if (dst_metric(dst, RTAX_RTT) == 0) - goto reset; - - if (!tp->srtt && dst_metric_rtt(dst, RTAX_RTT) < (TCP_TIMEOUT_INIT << 3)) + if (dst_metric(dst, RTAX_RTT) == 0 || tp->srtt == 0) goto reset; /* Initial rtt is determined from SYN,SYN-ACK. @@ -916,19 +918,26 @@ static void tcp_init_metrics(struct sock *sk) tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk)); } tcp_set_rto(sk); - if (inet_csk(sk)->icsk_rto < TCP_TIMEOUT_INIT && !tp->rx_opt.saw_tstamp) { reset: - /* Play conservative. If timestamps are not - * supported, TCP will fail to recalculate correct - * rtt, if initial rto is too small. FORGET ALL AND RESET! + if (tp->srtt == 0) { + /* RFC2988bis: We've failed to get a valid RTT sample from + * 3WHS. This is most likely due to retransmission, + * including spurious one. Reset the RTO back to 3secs + * from the more aggressive 1sec to avoid more spurious + * retransmission. */ - if (!tp->rx_opt.saw_tstamp && tp->srtt) { - tp->srtt = 0; - tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_INIT; - inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT; - } + tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_FALLBACK; + inet_csk(sk)->icsk_rto = TCP_TIMEOUT_FALLBACK; } - tp->snd_cwnd = tcp_init_cwnd(tp, dst); + /* Cut cwnd down to 1 per RFC5681 if SYN or SYN-ACK has been + * retransmitted. In light of RFC2988bis' more aggressive 1sec + * initRTO, we only reset cwnd when more than 1 SYN/SYN-ACK + * retransmission has occurred. + */ + if (tp->total_retrans > 1) + tp->snd_cwnd = 1; + else + tp->snd_cwnd = tcp_init_cwnd(tp, dst); tp->snd_cwnd_stamp = tcp_time_stamp; } @@ -3112,12 +3121,13 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag) tcp_xmit_retransmit_queue(sk); } -static void tcp_valid_rtt_meas(struct sock *sk, u32 seq_rtt) +void tcp_valid_rtt_meas(struct sock *sk, u32 seq_rtt) { tcp_rtt_estimator(sk, seq_rtt); tcp_set_rto(sk); inet_csk(sk)->icsk_backoff = 0; } +EXPORT_SYMBOL(tcp_valid_rtt_meas); /* Read draft-ietf-tcplw-high-performance before mucking * with this code. (Supersedes RFC1323) @@ -5806,12 +5816,6 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, tp->rx_opt.snd_wscale; tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); - /* tcp_ack considers this ACK as duplicate - * and does not calculate rtt. - * Force it here. - */ - tcp_ack_update_rtt(sk, 0, 0); - if (tp->rx_opt.tstamp_ok) tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index a7d6671..955b8e6 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -429,8 +429,8 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) break; icsk->icsk_backoff--; - inet_csk(sk)->icsk_rto = __tcp_set_rto(tp) << - icsk->icsk_backoff; + inet_csk(sk)->icsk_rto = (tp->srtt ? __tcp_set_rto(tp) : + TCP_TIMEOUT_INIT) << icsk->icsk_backoff; tcp_bound_rto(sk); skb = tcp_write_queue_head(sk); @@ -1384,6 +1384,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) isn = tcp_v4_init_sequence(skb); } tcp_rsk(req)->snt_isn = isn; + tcp_rsk(req)->snt_synack = tcp_time_stamp; if (tcp_v4_send_synack(sk, dst, req, (struct request_values *)&tmp_ext) || @@ -1458,6 +1459,10 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newtp->advmss = tcp_sk(sk)->rx_opt.user_mss; tcp_initialize_rcv_mss(newsk); + if (tcp_rsk(req)->snt_synack) + tcp_valid_rtt_meas(newsk, + tcp_time_stamp - tcp_rsk(req)->snt_synack); + newtp->total_retrans = req->retrans; #ifdef CONFIG_TCP_MD5SIG /* Copy over the MD5 key from the original socket */ @@ -1589,6 +1594,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) goto discard; if (nsk != sk) { + sock_rps_save_rxhash(nsk, skb->rxhash); if (tcp_child_process(sk, nsk, skb)) { rsk = nsk; goto reset; @@ -1854,7 +1860,7 @@ static int tcp_v4_init_sock(struct sock *sk) * algorithms that we must have the following bandaid to talk * efficiently to them. -DaveM */ - tp->snd_cwnd = 2; + tp->snd_cwnd = TCP_INIT_CWND; /* See draft-stevens-tcpca-spec-01 for discussion of the * initialization of these values. diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 80b1f80..d2fe4e0 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -486,7 +486,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, * algorithms that we must have the following bandaid to talk * efficiently to them. -DaveM */ - newtp->snd_cwnd = 2; + newtp->snd_cwnd = TCP_INIT_CWND; newtp->snd_cwnd_cnt = 0; newtp->bytes_acked = 0; @@ -720,6 +720,10 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDEFERACCEPTDROP); return NULL; } + if (tmp_opt.saw_tstamp && tmp_opt.rcv_tsecr) + tcp_rsk(req)->snt_synack = tmp_opt.rcv_tsecr; + else if (req->retrans) /* don't take RTT sample if retrans && ~TS */ + tcp_rsk(req)->snt_synack = 0; /* OK, ACK is valid, create big socket and * feed this segment to it. It will repeat all diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index abca870..37aa9bf 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -105,6 +105,7 @@ #include <net/route.h> #include <net/checksum.h> #include <net/xfrm.h> +#include <trace/events/udp.h> #include "udp_impl.h" struct udp_table udp_table __read_mostly; @@ -1363,6 +1364,7 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) is_udplite); UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite); kfree_skb(skb); + trace_udp_fail_queue_rcv_skb(rc, sk); return -1; } diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 981e43e..fc5368a 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -117,7 +117,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) memset(fl4, 0, sizeof(struct flowi4)); fl4->flowi4_mark = skb->mark; - if (!(iph->frag_off & htons(IP_MF | IP_OFFSET))) { + if (!ip_is_fragment(iph)) { switch (iph->protocol) { case IPPROTO_UDP: case IPPROTO_UDPLITE: diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 498b927..05838c7 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1559,6 +1559,11 @@ static int addrconf_ifid_sit(u8 *eui, struct net_device *dev) return -1; } +static int addrconf_ifid_gre(u8 *eui, struct net_device *dev) +{ + return __ipv6_isatap_ifid(eui, *(__be32 *)dev->dev_addr); +} + static int ipv6_generate_eui64(u8 *eui, struct net_device *dev) { switch (dev->type) { @@ -1572,6 +1577,8 @@ static int ipv6_generate_eui64(u8 *eui, struct net_device *dev) return addrconf_ifid_infiniband(eui, dev); case ARPHRD_SIT: return addrconf_ifid_sit(eui, dev); + case ARPHRD_IPGRE: + return addrconf_ifid_gre(eui, dev); } return -1; } @@ -2423,6 +2430,29 @@ static void addrconf_sit_config(struct net_device *dev) } #endif +#if defined(CONFIG_NET_IPGRE) || defined(CONFIG_NET_IPGRE_MODULE) +static void addrconf_gre_config(struct net_device *dev) +{ + struct inet6_dev *idev; + struct in6_addr addr; + + pr_info("ipv6: addrconf_gre_config(%s)\n", dev->name); + + ASSERT_RTNL(); + + if ((idev = ipv6_find_idev(dev)) == NULL) { + printk(KERN_DEBUG "init gre: add_dev failed\n"); + return; + } + + ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0); + addrconf_prefix_route(&addr, 64, dev, 0, 0); + + if (!ipv6_generate_eui64(addr.s6_addr + 8, dev)) + addrconf_add_linklocal(idev, &addr); +} +#endif + static inline int ipv6_inherit_linklocal(struct inet6_dev *idev, struct net_device *link_dev) { @@ -2539,6 +2569,11 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, addrconf_sit_config(dev); break; #endif +#if defined(CONFIG_NET_IPGRE) || defined(CONFIG_NET_IPGRE_MODULE) + case ARPHRD_IPGRE: + addrconf_gre_config(dev); + break; +#endif case ARPHRD_TUNNEL6: addrconf_ip6_tnl_config(dev); break; @@ -4692,16 +4727,20 @@ int __init addrconf_init(void) if (err < 0) goto errout_af; - err = __rtnl_register(PF_INET6, RTM_GETLINK, NULL, inet6_dump_ifinfo); + err = __rtnl_register(PF_INET6, RTM_GETLINK, NULL, inet6_dump_ifinfo, + NULL); if (err < 0) goto errout; /* Only the first call to __rtnl_register can fail */ - __rtnl_register(PF_INET6, RTM_NEWADDR, inet6_rtm_newaddr, NULL); - __rtnl_register(PF_INET6, RTM_DELADDR, inet6_rtm_deladdr, NULL); - __rtnl_register(PF_INET6, RTM_GETADDR, inet6_rtm_getaddr, inet6_dump_ifaddr); - __rtnl_register(PF_INET6, RTM_GETMULTICAST, NULL, inet6_dump_ifmcaddr); - __rtnl_register(PF_INET6, RTM_GETANYCAST, NULL, inet6_dump_ifacaddr); + __rtnl_register(PF_INET6, RTM_NEWADDR, inet6_rtm_newaddr, NULL, NULL); + __rtnl_register(PF_INET6, RTM_DELADDR, inet6_rtm_deladdr, NULL, NULL); + __rtnl_register(PF_INET6, RTM_GETADDR, inet6_rtm_getaddr, + inet6_dump_ifaddr, NULL); + __rtnl_register(PF_INET6, RTM_GETMULTICAST, NULL, + inet6_dump_ifmcaddr, NULL); + __rtnl_register(PF_INET6, RTM_GETANYCAST, NULL, + inet6_dump_ifacaddr, NULL); ipv6_addr_label_rtnl_register(); diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index c8993e5..2d8ddba 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -592,8 +592,11 @@ out: void __init ipv6_addr_label_rtnl_register(void) { - __rtnl_register(PF_INET6, RTM_NEWADDRLABEL, ip6addrlbl_newdel, NULL); - __rtnl_register(PF_INET6, RTM_DELADDRLABEL, ip6addrlbl_newdel, NULL); - __rtnl_register(PF_INET6, RTM_GETADDRLABEL, ip6addrlbl_get, ip6addrlbl_dump); + __rtnl_register(PF_INET6, RTM_NEWADDRLABEL, ip6addrlbl_newdel, + NULL, NULL); + __rtnl_register(PF_INET6, RTM_DELADDRLABEL, ip6addrlbl_newdel, + NULL, NULL); + __rtnl_register(PF_INET6, RTM_GETADDRLABEL, ip6addrlbl_get, + ip6addrlbl_dump, NULL); } diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index b7919f9..d450a2f 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -272,6 +272,10 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (addr_len < SIN6_LEN_RFC2133) return -EINVAL; + + if (addr->sin6_family != AF_INET6) + return -EINVAL; + addr_type = ipv6_addr_type(&addr->sin6_addr); if ((addr_type & IPV6_ADDR_MULTICAST) && sock->type == SOCK_STREAM) return -EINVAL; diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 4076a0b..3030bdf 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1586,7 +1586,8 @@ int __init fib6_init(void) if (ret) goto out_kmem_cache_create; - ret = __rtnl_register(PF_INET6, RTM_GETROUTE, NULL, inet6_dump_fib); + ret = __rtnl_register(PF_INET6, RTM_GETROUTE, NULL, inet6_dump_fib, + NULL); if (ret) goto out_unregister_subsys; out: diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 82a8099..705c828 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1354,7 +1354,8 @@ int __init ip6_mr_init(void) goto add_proto_fail; } #endif - rtnl_register(RTNL_FAMILY_IP6MR, RTM_GETROUTE, NULL, ip6mr_rtm_dumproute); + rtnl_register(RTNL_FAMILY_IP6MR, RTM_GETROUTE, NULL, + ip6mr_rtm_dumproute, NULL); return 0; #ifdef CONFIG_IPV6_PIMSM_V2 add_proto_fail: diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index 413ab07..2493948 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -204,7 +204,8 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp) else pmsg->outdev_name[0] = '\0'; - if (entry->indev && entry->skb->dev) { + if (entry->indev && entry->skb->dev && + entry->skb->mac_header != entry->skb->network_header) { pmsg->hw_type = entry->skb->dev->type; pmsg->hw_addrlen = dev_parse_header(entry->skb, pmsg->hw_addr); } @@ -403,7 +404,8 @@ ipq_dev_drop(int ifindex) static inline void __ipq_rcv_skb(struct sk_buff *skb) { - int status, type, pid, flags, nlmsglen, skblen; + int status, type, pid, flags; + unsigned int nlmsglen, skblen; struct nlmsghdr *nlh; skblen = skb->len; diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index c8af58b..4111050 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -160,7 +160,7 @@ static unsigned int ipv6_confirm(unsigned int hooknum, /* This is where we call the helper: as the packet goes out. */ ct = nf_ct_get(skb, &ctinfo); - if (!ct || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY) + if (!ct || ctinfo == IP_CT_RELATED_REPLY) goto out; help = nfct_help(ct); diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index 1df3c8b..7c05e7e 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -177,7 +177,7 @@ icmpv6_error_message(struct net *net, struct nf_conn *tmpl, /* Update skb to refer to this connection */ skb->nfct = &nf_ct_tuplehash_to_ctrack(h)->ct_general; skb->nfctinfo = *ctinfo; - return -NF_ACCEPT; + return NF_ACCEPT; } static int diff --git a/net/ipv6/route.c b/net/ipv6/route.c index de2b1de..216ff31 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2925,9 +2925,9 @@ int __init ip6_route_init(void) goto xfrm6_init; ret = -ENOBUFS; - if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) || - __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) || - __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL)) + if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) || + __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) || + __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL)) goto fib6_rules_init; ret = register_netdevice_notifier(&ip6_route_dev_notifier); diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 8b9644a..89d5bf8 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -223,6 +223,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) ireq->wscale_ok = tcp_opt.wscale_ok; ireq->tstamp_ok = tcp_opt.saw_tstamp; req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0; + treq->snt_synack = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsecr : 0; treq->rcv_isn = ntohl(th->seq) - 1; treq->snt_isn = cookie; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index d1fd287..78aa534 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1341,6 +1341,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) } have_isn: tcp_rsk(req)->snt_isn = isn; + tcp_rsk(req)->snt_synack = tcp_time_stamp; security_inet_conn_request(sk, skb, req); @@ -1509,6 +1510,10 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, tcp_sync_mss(newsk, dst_mtu(dst)); newtp->advmss = dst_metric_advmss(dst); tcp_initialize_rcv_mss(newsk); + if (tcp_rsk(req)->snt_synack) + tcp_valid_rtt_meas(newsk, + tcp_time_stamp - tcp_rsk(req)->snt_synack); + newtp->total_retrans = req->retrans; newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6; newinet->inet_rcv_saddr = LOOPBACK4_IPV6; @@ -1644,6 +1649,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) * the new socket.. */ if(nsk != sk) { + sock_rps_save_rxhash(nsk, skb->rxhash); if (tcp_child_process(sk, nsk, skb)) goto reset; if (opt_skb) diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index cc61697..c24f25a 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -369,7 +369,7 @@ static void irda_getvalue_confirm(int result, __u16 obj_id, { struct irda_sock *self; - self = (struct irda_sock *) priv; + self = priv; if (!self) { IRDA_WARNING("%s: lost myself!\n", __func__); return; @@ -418,7 +418,7 @@ static void irda_selective_discovery_indication(discinfo_t *discovery, IRDA_DEBUG(2, "%s()\n", __func__); - self = (struct irda_sock *) priv; + self = priv; if (!self) { IRDA_WARNING("%s: lost myself!\n", __func__); return; diff --git a/net/irda/ircomm/ircomm_tty_attach.c b/net/irda/ircomm/ircomm_tty_attach.c index 3c17540..b65d66e 100644 --- a/net/irda/ircomm/ircomm_tty_attach.c +++ b/net/irda/ircomm/ircomm_tty_attach.c @@ -382,7 +382,7 @@ static void ircomm_tty_discovery_indication(discinfo_t *discovery, info.daddr = discovery->daddr; info.saddr = discovery->saddr; - self = (struct ircomm_tty_cb *) priv; + self = priv; ircomm_tty_do_event(self, IRCOMM_TTY_DISCOVERY_INDICATION, NULL, &info); } diff --git a/net/irda/irda_device.c b/net/irda/irda_device.c index 25cc2e6..3eca35f 100644 --- a/net/irda/irda_device.c +++ b/net/irda/irda_device.c @@ -262,7 +262,7 @@ static void irda_task_timer_expired(void *data) IRDA_DEBUG(2, "%s()\n", __func__); - task = (struct irda_task *) data; + task = data; irda_task_kick(task); } diff --git a/net/irda/iriap.c b/net/irda/iriap.c index 3647753..e71e85b 100644 --- a/net/irda/iriap.c +++ b/net/irda/iriap.c @@ -87,6 +87,8 @@ static inline void iriap_start_watchdog_timer(struct iriap_cb *self, iriap_watchdog_timer_expired); } +static struct lock_class_key irias_objects_key; + /* * Function iriap_init (void) * @@ -114,6 +116,9 @@ int __init iriap_init(void) return -ENOMEM; } + lockdep_set_class_and_name(&irias_objects->hb_spinlock, &irias_objects_key, + "irias_objects"); + /* * Register some default services for IrLMP */ @@ -300,7 +305,7 @@ static void iriap_disconnect_indication(void *instance, void *sap, IRDA_DEBUG(4, "%s(), reason=%s\n", __func__, irlmp_reasons[reason]); - self = (struct iriap_cb *) instance; + self = instance; IRDA_ASSERT(self != NULL, return;); IRDA_ASSERT(self->magic == IAS_MAGIC, return;); @@ -754,7 +759,7 @@ static void iriap_connect_confirm(void *instance, void *sap, { struct iriap_cb *self; - self = (struct iriap_cb *) instance; + self = instance; IRDA_ASSERT(self != NULL, return;); IRDA_ASSERT(self->magic == IAS_MAGIC, return;); @@ -786,7 +791,7 @@ static void iriap_connect_indication(void *instance, void *sap, IRDA_DEBUG(1, "%s()\n", __func__); - self = (struct iriap_cb *) instance; + self = instance; IRDA_ASSERT(skb != NULL, return;); IRDA_ASSERT(self != NULL, goto out;); @@ -834,7 +839,7 @@ static int iriap_data_indication(void *instance, void *sap, IRDA_DEBUG(3, "%s()\n", __func__); - self = (struct iriap_cb *) instance; + self = instance; IRDA_ASSERT(skb != NULL, return 0;); IRDA_ASSERT(self != NULL, goto out;); diff --git a/net/irda/irlan/irlan_client.c b/net/irda/irlan/irlan_client.c index 7ed3af9..ba1a3fc 100644 --- a/net/irda/irlan/irlan_client.c +++ b/net/irda/irlan/irlan_client.c @@ -198,7 +198,7 @@ static int irlan_client_ctrl_data_indication(void *instance, void *sap, IRDA_DEBUG(2, "%s()\n", __func__ ); - self = (struct irlan_cb *) instance; + self = instance; IRDA_ASSERT(self != NULL, return -1;); IRDA_ASSERT(self->magic == IRLAN_MAGIC, return -1;); @@ -226,8 +226,8 @@ static void irlan_client_ctrl_disconnect_indication(void *instance, void *sap, IRDA_DEBUG(4, "%s(), reason=%d\n", __func__ , reason); - self = (struct irlan_cb *) instance; - tsap = (struct tsap_cb *) sap; + self = instance; + tsap = sap; IRDA_ASSERT(self != NULL, return;); IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;); @@ -298,7 +298,7 @@ static void irlan_client_ctrl_connect_confirm(void *instance, void *sap, IRDA_DEBUG(4, "%s()\n", __func__ ); - self = (struct irlan_cb *) instance; + self = instance; IRDA_ASSERT(self != NULL, return;); IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;); @@ -542,7 +542,7 @@ void irlan_client_get_value_confirm(int result, __u16 obj_id, IRDA_ASSERT(priv != NULL, return;); - self = (struct irlan_cb *) priv; + self = priv; IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;); /* We probably don't need to make any more queries */ diff --git a/net/irda/irlan/irlan_common.c b/net/irda/irlan/irlan_common.c index 6130f9d..7791176 100644 --- a/net/irda/irlan/irlan_common.c +++ b/net/irda/irlan/irlan_common.c @@ -317,8 +317,8 @@ static void irlan_connect_indication(void *instance, void *sap, IRDA_DEBUG(2, "%s()\n", __func__ ); - self = (struct irlan_cb *) instance; - tsap = (struct tsap_cb *) sap; + self = instance; + tsap = sap; IRDA_ASSERT(self != NULL, return;); IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;); @@ -361,7 +361,7 @@ static void irlan_connect_confirm(void *instance, void *sap, { struct irlan_cb *self; - self = (struct irlan_cb *) instance; + self = instance; IRDA_ASSERT(self != NULL, return;); IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;); @@ -406,8 +406,8 @@ static void irlan_disconnect_indication(void *instance, IRDA_DEBUG(0, "%s(), reason=%d\n", __func__ , reason); - self = (struct irlan_cb *) instance; - tsap = (struct tsap_cb *) sap; + self = instance; + tsap = sap; IRDA_ASSERT(self != NULL, return;); IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;); diff --git a/net/irda/irlan/irlan_eth.c b/net/irda/irlan/irlan_eth.c index 8ee1ff6..e8d5f44 100644 --- a/net/irda/irlan/irlan_eth.c +++ b/net/irda/irlan/irlan_eth.c @@ -272,7 +272,7 @@ void irlan_eth_flow_indication(void *instance, void *sap, LOCAL_FLOW flow) struct irlan_cb *self; struct net_device *dev; - self = (struct irlan_cb *) instance; + self = instance; IRDA_ASSERT(self != NULL, return;); IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;); diff --git a/net/irda/irlan/irlan_provider.c b/net/irda/irlan/irlan_provider.c index b8af74a..8b61cf0 100644 --- a/net/irda/irlan/irlan_provider.c +++ b/net/irda/irlan/irlan_provider.c @@ -73,7 +73,7 @@ static int irlan_provider_data_indication(void *instance, void *sap, IRDA_DEBUG(4, "%s()\n", __func__ ); - self = (struct irlan_cb *) instance; + self = instance; IRDA_ASSERT(self != NULL, return -1;); IRDA_ASSERT(self->magic == IRLAN_MAGIC, return -1;); @@ -131,8 +131,8 @@ static void irlan_provider_connect_indication(void *instance, void *sap, IRDA_DEBUG(0, "%s()\n", __func__ ); - self = (struct irlan_cb *) instance; - tsap = (struct tsap_cb *) sap; + self = instance; + tsap = sap; IRDA_ASSERT(self != NULL, return;); IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;); @@ -182,8 +182,8 @@ static void irlan_provider_disconnect_indication(void *instance, void *sap, IRDA_DEBUG(4, "%s(), reason=%d\n", __func__ , reason); - self = (struct irlan_cb *) instance; - tsap = (struct tsap_cb *) sap; + self = instance; + tsap = sap; IRDA_ASSERT(self != NULL, return;); IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;); diff --git a/net/irda/irqueue.c b/net/irda/irqueue.c index 9715e6e..f06947c 100644 --- a/net/irda/irqueue.c +++ b/net/irda/irqueue.c @@ -780,7 +780,7 @@ void* hashbin_lock_find( hashbin_t* hashbin, long hashv, const char* name ) /* * Search for entry */ - entry = (irda_queue_t* ) hashbin_find( hashbin, hashv, name ); + entry = hashbin_find(hashbin, hashv, name); /* Release lock */ spin_unlock_irqrestore(&hashbin->hb_spinlock, flags); @@ -813,7 +813,7 @@ void* hashbin_find_next( hashbin_t* hashbin, long hashv, const char* name, * This allow to check if the current item is still in the * hashbin or has been removed. */ - entry = (irda_queue_t* ) hashbin_find( hashbin, hashv, name ); + entry = hashbin_find(hashbin, hashv, name); /* * Trick hashbin_get_next() to return what we want diff --git a/net/irda/irttp.c b/net/irda/irttp.c index 9d9af46..285ccd6 100644 --- a/net/irda/irttp.c +++ b/net/irda/irttp.c @@ -350,7 +350,7 @@ static int irttp_param_max_sdu_size(void *instance, irda_param_t *param, { struct tsap_cb *self; - self = (struct tsap_cb *) instance; + self = instance; IRDA_ASSERT(self != NULL, return -1;); IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return -1;); @@ -879,7 +879,7 @@ static int irttp_udata_indication(void *instance, void *sap, IRDA_DEBUG(4, "%s()\n", __func__); - self = (struct tsap_cb *) instance; + self = instance; IRDA_ASSERT(self != NULL, return -1;); IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return -1;); @@ -914,7 +914,7 @@ static int irttp_data_indication(void *instance, void *sap, unsigned long flags; int n; - self = (struct tsap_cb *) instance; + self = instance; n = skb->data[0] & 0x7f; /* Extract the credits */ @@ -996,7 +996,7 @@ static void irttp_status_indication(void *instance, IRDA_DEBUG(4, "%s()\n", __func__); - self = (struct tsap_cb *) instance; + self = instance; IRDA_ASSERT(self != NULL, return;); IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return;); @@ -1025,7 +1025,7 @@ static void irttp_flow_indication(void *instance, void *sap, LOCAL_FLOW flow) { struct tsap_cb *self; - self = (struct tsap_cb *) instance; + self = instance; IRDA_ASSERT(self != NULL, return;); IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return;); @@ -1208,7 +1208,7 @@ static void irttp_connect_confirm(void *instance, void *sap, IRDA_DEBUG(4, "%s()\n", __func__); - self = (struct tsap_cb *) instance; + self = instance; IRDA_ASSERT(self != NULL, return;); IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return;); @@ -1292,13 +1292,13 @@ static void irttp_connect_indication(void *instance, void *sap, __u8 plen; __u8 n; - self = (struct tsap_cb *) instance; + self = instance; IRDA_ASSERT(self != NULL, return;); IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return;); IRDA_ASSERT(skb != NULL, return;); - lsap = (struct lsap_cb *) sap; + lsap = sap; self->max_seg_size = max_seg_size - TTP_HEADER; self->max_header_size = max_header_size+TTP_HEADER; @@ -1602,7 +1602,7 @@ static void irttp_disconnect_indication(void *instance, void *sap, IRDA_DEBUG(4, "%s()\n", __func__); - self = (struct tsap_cb *) instance; + self = instance; IRDA_ASSERT(self != NULL, return;); IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return;); diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index a15c015..7f91249 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -54,7 +54,7 @@ #include <asm/atomic.h> #include <asm/ebcdic.h> #include <asm/io.h> -#include <asm/s390_ext.h> +#include <asm/irq.h> #include <asm/smp.h> /* diff --git a/net/key/af_key.c b/net/key/af_key.c index 8f92cf8..1e733e9 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -621,7 +621,7 @@ static struct xfrm_state *pfkey_xfrm_state_lookup(struct net *net, const struct unsigned short family; xfrm_address_t *xaddr; - sa = (const struct sadb_sa *) ext_hdrs[SADB_EXT_SA-1]; + sa = ext_hdrs[SADB_EXT_SA - 1]; if (sa == NULL) return NULL; @@ -630,7 +630,7 @@ static struct xfrm_state *pfkey_xfrm_state_lookup(struct net *net, const struct return NULL; /* sadb_address_len should be checked by caller */ - addr = (const struct sadb_address *) ext_hdrs[SADB_EXT_ADDRESS_DST-1]; + addr = ext_hdrs[SADB_EXT_ADDRESS_DST - 1]; if (addr == NULL) return NULL; @@ -1039,7 +1039,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net, int err; - sa = (const struct sadb_sa *) ext_hdrs[SADB_EXT_SA-1]; + sa = ext_hdrs[SADB_EXT_SA - 1]; if (!sa || !present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1], ext_hdrs[SADB_EXT_ADDRESS_DST-1])) @@ -1078,7 +1078,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net, sa->sadb_sa_encrypt > SADB_X_CALG_MAX) || sa->sadb_sa_encrypt > SADB_EALG_MAX) return ERR_PTR(-EINVAL); - key = (const struct sadb_key*) ext_hdrs[SADB_EXT_KEY_AUTH-1]; + key = ext_hdrs[SADB_EXT_KEY_AUTH - 1]; if (key != NULL && sa->sadb_sa_auth != SADB_X_AALG_NULL && ((key->sadb_key_bits+7) / 8 == 0 || @@ -1105,14 +1105,14 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net, if (sa->sadb_sa_flags & SADB_SAFLAGS_NOPMTUDISC) x->props.flags |= XFRM_STATE_NOPMTUDISC; - lifetime = (const struct sadb_lifetime*) ext_hdrs[SADB_EXT_LIFETIME_HARD-1]; + lifetime = ext_hdrs[SADB_EXT_LIFETIME_HARD - 1]; if (lifetime != NULL) { x->lft.hard_packet_limit = _KEY2X(lifetime->sadb_lifetime_allocations); x->lft.hard_byte_limit = _KEY2X(lifetime->sadb_lifetime_bytes); x->lft.hard_add_expires_seconds = lifetime->sadb_lifetime_addtime; x->lft.hard_use_expires_seconds = lifetime->sadb_lifetime_usetime; } - lifetime = (const struct sadb_lifetime*) ext_hdrs[SADB_EXT_LIFETIME_SOFT-1]; + lifetime = ext_hdrs[SADB_EXT_LIFETIME_SOFT - 1]; if (lifetime != NULL) { x->lft.soft_packet_limit = _KEY2X(lifetime->sadb_lifetime_allocations); x->lft.soft_byte_limit = _KEY2X(lifetime->sadb_lifetime_bytes); @@ -1120,7 +1120,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net, x->lft.soft_use_expires_seconds = lifetime->sadb_lifetime_usetime; } - sec_ctx = (const struct sadb_x_sec_ctx *) ext_hdrs[SADB_X_EXT_SEC_CTX-1]; + sec_ctx = ext_hdrs[SADB_X_EXT_SEC_CTX - 1]; if (sec_ctx != NULL) { struct xfrm_user_sec_ctx *uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx); @@ -1134,7 +1134,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net, goto out; } - key = (const struct sadb_key*) ext_hdrs[SADB_EXT_KEY_AUTH-1]; + key = ext_hdrs[SADB_EXT_KEY_AUTH - 1]; if (sa->sadb_sa_auth) { int keysize = 0; struct xfrm_algo_desc *a = xfrm_aalg_get_byid(sa->sadb_sa_auth); @@ -2219,7 +2219,7 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, const struct sadb_ if (xp->selector.dport) xp->selector.dport_mask = htons(0xffff); - sec_ctx = (struct sadb_x_sec_ctx *) ext_hdrs[SADB_X_EXT_SEC_CTX-1]; + sec_ctx = ext_hdrs[SADB_X_EXT_SEC_CTX - 1]; if (sec_ctx != NULL) { struct xfrm_user_sec_ctx *uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx); @@ -2323,7 +2323,7 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, const struct sa if (sel.dport) sel.dport_mask = htons(0xffff); - sec_ctx = (struct sadb_x_sec_ctx *) ext_hdrs[SADB_X_EXT_SEC_CTX-1]; + sec_ctx = ext_hdrs[SADB_X_EXT_SEC_CTX - 1]; if (sec_ctx != NULL) { struct xfrm_user_sec_ctx *uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx); diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c index b8dbae8..7613013 100644 --- a/net/l2tp/l2tp_debugfs.c +++ b/net/l2tp/l2tp_debugfs.c @@ -258,7 +258,7 @@ static int l2tp_dfs_seq_open(struct inode *inode, struct file *file) */ pd->net = get_net_ns_by_pid(current->pid); if (IS_ERR(pd->net)) { - rc = -PTR_ERR(pd->net); + rc = PTR_ERR(pd->net); goto err_free_pd; } diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index b6466e7..d21e7eb 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -480,18 +480,16 @@ static int l2tp_ip_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m if (connected) rt = (struct rtable *) __sk_dst_check(sk, 0); + rcu_read_lock(); if (rt == NULL) { - struct ip_options_rcu *inet_opt; + const struct ip_options_rcu *inet_opt; - rcu_read_lock(); inet_opt = rcu_dereference(inet->inet_opt); /* Use correct destination address if we have options. */ if (inet_opt && inet_opt->opt.srr) daddr = inet_opt->opt.faddr; - rcu_read_unlock(); - /* If this fails, retransmit mechanism of transport layer will * keep trying until route appears or the connection times * itself out. @@ -503,12 +501,20 @@ static int l2tp_ip_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m sk->sk_bound_dev_if); if (IS_ERR(rt)) goto no_route; - sk_setup_caps(sk, &rt->dst); + if (connected) + sk_setup_caps(sk, &rt->dst); + else + dst_release(&rt->dst); /* safe since we hold rcu_read_lock */ } - skb_dst_set(skb, dst_clone(&rt->dst)); + + /* We dont need to clone dst here, it is guaranteed to not disappear. + * __dev_xmit_skb() might force a refcount if needed. + */ + skb_dst_set_noref(skb, &rt->dst); /* Queue the packet to IP for output */ rc = ip_queue_xmit(skb, &inet->cork.fl); + rcu_read_unlock(); error: /* Update stats */ @@ -525,6 +531,7 @@ out: return rc; no_route: + rcu_read_unlock(); IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); kfree_skb(skb); rc = -EHOSTUNREACH; diff --git a/net/netfilter/ipset/Kconfig b/net/netfilter/ipset/Kconfig index 2c5b348..ba36c28 100644 --- a/net/netfilter/ipset/Kconfig +++ b/net/netfilter/ipset/Kconfig @@ -109,6 +109,16 @@ config IP_SET_HASH_NETPORT To compile it as a module, choose M here. If unsure, say N. +config IP_SET_HASH_NETIFACE + tristate "hash:net,iface set support" + depends on IP_SET + help + This option adds the hash:net,iface set type support, by which + one can store IPv4/IPv6 network address/prefix and + interface name pairs as elements in a set. + + To compile it as a module, choose M here. If unsure, say N. + config IP_SET_LIST_SET tristate "list:set set support" depends on IP_SET diff --git a/net/netfilter/ipset/Makefile b/net/netfilter/ipset/Makefile index 5adbdab..6e965ec 100644 --- a/net/netfilter/ipset/Makefile +++ b/net/netfilter/ipset/Makefile @@ -19,6 +19,7 @@ obj-$(CONFIG_IP_SET_HASH_IPPORTIP) += ip_set_hash_ipportip.o obj-$(CONFIG_IP_SET_HASH_IPPORTNET) += ip_set_hash_ipportnet.o obj-$(CONFIG_IP_SET_HASH_NET) += ip_set_hash_net.o obj-$(CONFIG_IP_SET_HASH_NETPORT) += ip_set_hash_netport.o +obj-$(CONFIG_IP_SET_HASH_NETIFACE) += ip_set_hash_netiface.o # list types obj-$(CONFIG_IP_SET_LIST_SET) += ip_set_list_set.o diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c index ba2d166..e3e7399 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ip.c +++ b/net/netfilter/ipset/ip_set_bitmap_ip.c @@ -54,7 +54,7 @@ ip_to_id(const struct bitmap_ip *m, u32 ip) } static int -bitmap_ip_test(struct ip_set *set, void *value, u32 timeout) +bitmap_ip_test(struct ip_set *set, void *value, u32 timeout, u32 flags) { const struct bitmap_ip *map = set->data; u16 id = *(u16 *)value; @@ -63,7 +63,7 @@ bitmap_ip_test(struct ip_set *set, void *value, u32 timeout) } static int -bitmap_ip_add(struct ip_set *set, void *value, u32 timeout) +bitmap_ip_add(struct ip_set *set, void *value, u32 timeout, u32 flags) { struct bitmap_ip *map = set->data; u16 id = *(u16 *)value; @@ -75,7 +75,7 @@ bitmap_ip_add(struct ip_set *set, void *value, u32 timeout) } static int -bitmap_ip_del(struct ip_set *set, void *value, u32 timeout) +bitmap_ip_del(struct ip_set *set, void *value, u32 timeout, u32 flags) { struct bitmap_ip *map = set->data; u16 id = *(u16 *)value; @@ -131,7 +131,7 @@ nla_put_failure: /* Timeout variant */ static int -bitmap_ip_ttest(struct ip_set *set, void *value, u32 timeout) +bitmap_ip_ttest(struct ip_set *set, void *value, u32 timeout, u32 flags) { const struct bitmap_ip *map = set->data; const unsigned long *members = map->members; @@ -141,13 +141,13 @@ bitmap_ip_ttest(struct ip_set *set, void *value, u32 timeout) } static int -bitmap_ip_tadd(struct ip_set *set, void *value, u32 timeout) +bitmap_ip_tadd(struct ip_set *set, void *value, u32 timeout, u32 flags) { struct bitmap_ip *map = set->data; unsigned long *members = map->members; u16 id = *(u16 *)value; - if (ip_set_timeout_test(members[id])) + if (ip_set_timeout_test(members[id]) && !(flags & IPSET_FLAG_EXIST)) return -IPSET_ERR_EXIST; members[id] = ip_set_timeout_set(timeout); @@ -156,7 +156,7 @@ bitmap_ip_tadd(struct ip_set *set, void *value, u32 timeout) } static int -bitmap_ip_tdel(struct ip_set *set, void *value, u32 timeout) +bitmap_ip_tdel(struct ip_set *set, void *value, u32 timeout, u32 flags) { struct bitmap_ip *map = set->data; unsigned long *members = map->members; @@ -219,24 +219,25 @@ nla_put_failure: static int bitmap_ip_kadt(struct ip_set *set, const struct sk_buff *skb, - enum ipset_adt adt, u8 pf, u8 dim, u8 flags) + const struct xt_action_param *par, + enum ipset_adt adt, const struct ip_set_adt_opt *opt) { struct bitmap_ip *map = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; u32 ip; - ip = ntohl(ip4addr(skb, flags & IPSET_DIM_ONE_SRC)); + ip = ntohl(ip4addr(skb, opt->flags & IPSET_DIM_ONE_SRC)); if (ip < map->first_ip || ip > map->last_ip) return -IPSET_ERR_BITMAP_RANGE; ip = ip_to_id(map, ip); - return adtfn(set, &ip, map->timeout); + return adtfn(set, &ip, opt_timeout(opt, map), opt->cmdflags); } static int bitmap_ip_uadt(struct ip_set *set, struct nlattr *tb[], - enum ipset_adt adt, u32 *lineno, u32 flags) + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { struct bitmap_ip *map = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; @@ -266,7 +267,7 @@ bitmap_ip_uadt(struct ip_set *set, struct nlattr *tb[], if (adt == IPSET_TEST) { id = ip_to_id(map, ip); - return adtfn(set, &id, timeout); + return adtfn(set, &id, timeout, flags); } if (tb[IPSET_ATTR_IP_TO]) { @@ -283,8 +284,7 @@ bitmap_ip_uadt(struct ip_set *set, struct nlattr *tb[], if (cidr > 32) return -IPSET_ERR_INVALID_CIDR; - ip &= ip_set_hostmask(cidr); - ip_to = ip | ~ip_set_hostmask(cidr); + ip_set_mask_from_to(ip, ip_to, cidr); } else ip_to = ip; @@ -293,7 +293,7 @@ bitmap_ip_uadt(struct ip_set *set, struct nlattr *tb[], for (; !before(ip_to, ip); ip += map->hosts) { id = ip_to_id(map, ip); - ret = adtfn(set, &id, timeout); + ret = adtfn(set, &id, timeout, flags); if (ret && !ip_set_eexist(ret, flags)) return ret; @@ -478,7 +478,7 @@ bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags) if (cidr >= 32) return -IPSET_ERR_INVALID_CIDR; - last_ip = first_ip | ~ip_set_hostmask(cidr); + ip_set_mask_from_to(first_ip, last_ip, cidr); } else return -IPSET_ERR_PROTOCOL; @@ -551,7 +551,8 @@ static struct ip_set_type bitmap_ip_type __read_mostly = { .features = IPSET_TYPE_IP, .dimension = IPSET_DIM_ONE, .family = AF_INET, - .revision = 0, + .revision_min = 0, + .revision_max = 0, .create = bitmap_ip_create, .create_policy = { [IPSET_ATTR_IP] = { .type = NLA_NESTED }, diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c index a274300..56096f5 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c +++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c @@ -99,7 +99,7 @@ bitmap_ipmac_exist(const struct ipmac_telem *elem) /* Base variant */ static int -bitmap_ipmac_test(struct ip_set *set, void *value, u32 timeout) +bitmap_ipmac_test(struct ip_set *set, void *value, u32 timeout, u32 flags) { const struct bitmap_ipmac *map = set->data; const struct ipmac *data = value; @@ -117,7 +117,7 @@ bitmap_ipmac_test(struct ip_set *set, void *value, u32 timeout) } static int -bitmap_ipmac_add(struct ip_set *set, void *value, u32 timeout) +bitmap_ipmac_add(struct ip_set *set, void *value, u32 timeout, u32 flags) { struct bitmap_ipmac *map = set->data; const struct ipmac *data = value; @@ -146,7 +146,7 @@ bitmap_ipmac_add(struct ip_set *set, void *value, u32 timeout) } static int -bitmap_ipmac_del(struct ip_set *set, void *value, u32 timeout) +bitmap_ipmac_del(struct ip_set *set, void *value, u32 timeout, u32 flags) { struct bitmap_ipmac *map = set->data; const struct ipmac *data = value; @@ -212,7 +212,7 @@ nla_put_failure: /* Timeout variant */ static int -bitmap_ipmac_ttest(struct ip_set *set, void *value, u32 timeout) +bitmap_ipmac_ttest(struct ip_set *set, void *value, u32 timeout, u32 flags) { const struct bitmap_ipmac *map = set->data; const struct ipmac *data = value; @@ -231,15 +231,16 @@ bitmap_ipmac_ttest(struct ip_set *set, void *value, u32 timeout) } static int -bitmap_ipmac_tadd(struct ip_set *set, void *value, u32 timeout) +bitmap_ipmac_tadd(struct ip_set *set, void *value, u32 timeout, u32 flags) { struct bitmap_ipmac *map = set->data; const struct ipmac *data = value; struct ipmac_telem *elem = bitmap_ipmac_elem(map, data->id); + bool flag_exist = flags & IPSET_FLAG_EXIST; switch (elem->match) { case MAC_UNSET: - if (!data->ether) + if (!(data->ether || flag_exist)) /* Already added without ethernet address */ return -IPSET_ERR_EXIST; /* Fill the MAC address and activate the timer */ @@ -251,7 +252,7 @@ bitmap_ipmac_tadd(struct ip_set *set, void *value, u32 timeout) elem->timeout = ip_set_timeout_set(timeout); break; case MAC_FILLED: - if (!bitmap_expired(map, data->id)) + if (!(bitmap_expired(map, data->id) || flag_exist)) return -IPSET_ERR_EXIST; /* Fall through */ case MAC_EMPTY: @@ -273,7 +274,7 @@ bitmap_ipmac_tadd(struct ip_set *set, void *value, u32 timeout) } static int -bitmap_ipmac_tdel(struct ip_set *set, void *value, u32 timeout) +bitmap_ipmac_tdel(struct ip_set *set, void *value, u32 timeout, u32 flags) { struct bitmap_ipmac *map = set->data; const struct ipmac *data = value; @@ -337,17 +338,18 @@ nla_put_failure: static int bitmap_ipmac_kadt(struct ip_set *set, const struct sk_buff *skb, - enum ipset_adt adt, u8 pf, u8 dim, u8 flags) + const struct xt_action_param *par, + enum ipset_adt adt, const struct ip_set_adt_opt *opt) { struct bitmap_ipmac *map = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct ipmac data; /* MAC can be src only */ - if (!(flags & IPSET_DIM_TWO_SRC)) + if (!(opt->flags & IPSET_DIM_TWO_SRC)) return 0; - data.id = ntohl(ip4addr(skb, flags & IPSET_DIM_ONE_SRC)); + data.id = ntohl(ip4addr(skb, opt->flags & IPSET_DIM_ONE_SRC)); if (data.id < map->first_ip || data.id > map->last_ip) return -IPSET_ERR_BITMAP_RANGE; @@ -359,12 +361,12 @@ bitmap_ipmac_kadt(struct ip_set *set, const struct sk_buff *skb, data.id -= map->first_ip; data.ether = eth_hdr(skb)->h_source; - return adtfn(set, &data, map->timeout); + return adtfn(set, &data, opt_timeout(opt, map), opt->cmdflags); } static int bitmap_ipmac_uadt(struct ip_set *set, struct nlattr *tb[], - enum ipset_adt adt, u32 *lineno, u32 flags) + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { const struct bitmap_ipmac *map = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; @@ -399,7 +401,7 @@ bitmap_ipmac_uadt(struct ip_set *set, struct nlattr *tb[], data.id -= map->first_ip; - ret = adtfn(set, &data, timeout); + ret = adtfn(set, &data, timeout, flags); return ip_set_eexist(ret, flags) ? 0 : ret; } @@ -577,7 +579,7 @@ bitmap_ipmac_create(struct ip_set *set, struct nlattr *tb[], if (cidr >= 32) return -IPSET_ERR_INVALID_CIDR; - last_ip = first_ip | ~ip_set_hostmask(cidr); + ip_set_mask_from_to(first_ip, last_ip, cidr); } else return -IPSET_ERR_PROTOCOL; @@ -622,7 +624,8 @@ static struct ip_set_type bitmap_ipmac_type = { .features = IPSET_TYPE_IP | IPSET_TYPE_MAC, .dimension = IPSET_DIM_TWO, .family = AF_INET, - .revision = 0, + .revision_min = 0, + .revision_max = 0, .create = bitmap_ipmac_create, .create_policy = { [IPSET_ATTR_IP] = { .type = NLA_NESTED }, @@ -632,7 +635,8 @@ static struct ip_set_type bitmap_ipmac_type = { }, .adt_policy = { [IPSET_ATTR_IP] = { .type = NLA_NESTED }, - [IPSET_ATTR_ETHER] = { .type = NLA_BINARY, .len = ETH_ALEN }, + [IPSET_ATTR_ETHER] = { .type = NLA_BINARY, + .len = ETH_ALEN }, [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, }, diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c index 6b38eb8..29ba93b 100644 --- a/net/netfilter/ipset/ip_set_bitmap_port.c +++ b/net/netfilter/ipset/ip_set_bitmap_port.c @@ -40,7 +40,7 @@ struct bitmap_port { /* Base variant */ static int -bitmap_port_test(struct ip_set *set, void *value, u32 timeout) +bitmap_port_test(struct ip_set *set, void *value, u32 timeout, u32 flags) { const struct bitmap_port *map = set->data; u16 id = *(u16 *)value; @@ -49,7 +49,7 @@ bitmap_port_test(struct ip_set *set, void *value, u32 timeout) } static int -bitmap_port_add(struct ip_set *set, void *value, u32 timeout) +bitmap_port_add(struct ip_set *set, void *value, u32 timeout, u32 flags) { struct bitmap_port *map = set->data; u16 id = *(u16 *)value; @@ -61,7 +61,7 @@ bitmap_port_add(struct ip_set *set, void *value, u32 timeout) } static int -bitmap_port_del(struct ip_set *set, void *value, u32 timeout) +bitmap_port_del(struct ip_set *set, void *value, u32 timeout, u32 flags) { struct bitmap_port *map = set->data; u16 id = *(u16 *)value; @@ -119,7 +119,7 @@ nla_put_failure: /* Timeout variant */ static int -bitmap_port_ttest(struct ip_set *set, void *value, u32 timeout) +bitmap_port_ttest(struct ip_set *set, void *value, u32 timeout, u32 flags) { const struct bitmap_port *map = set->data; const unsigned long *members = map->members; @@ -129,13 +129,13 @@ bitmap_port_ttest(struct ip_set *set, void *value, u32 timeout) } static int -bitmap_port_tadd(struct ip_set *set, void *value, u32 timeout) +bitmap_port_tadd(struct ip_set *set, void *value, u32 timeout, u32 flags) { struct bitmap_port *map = set->data; unsigned long *members = map->members; u16 id = *(u16 *)value; - if (ip_set_timeout_test(members[id])) + if (ip_set_timeout_test(members[id]) && !(flags & IPSET_FLAG_EXIST)) return -IPSET_ERR_EXIST; members[id] = ip_set_timeout_set(timeout); @@ -144,7 +144,7 @@ bitmap_port_tadd(struct ip_set *set, void *value, u32 timeout) } static int -bitmap_port_tdel(struct ip_set *set, void *value, u32 timeout) +bitmap_port_tdel(struct ip_set *set, void *value, u32 timeout, u32 flags) { struct bitmap_port *map = set->data; unsigned long *members = map->members; @@ -208,14 +208,16 @@ nla_put_failure: static int bitmap_port_kadt(struct ip_set *set, const struct sk_buff *skb, - enum ipset_adt adt, u8 pf, u8 dim, u8 flags) + const struct xt_action_param *par, + enum ipset_adt adt, const struct ip_set_adt_opt *opt) { struct bitmap_port *map = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; __be16 __port; u16 port = 0; - if (!ip_set_get_ip_port(skb, pf, flags & IPSET_DIM_ONE_SRC, &__port)) + if (!ip_set_get_ip_port(skb, opt->family, + opt->flags & IPSET_DIM_ONE_SRC, &__port)) return -EINVAL; port = ntohs(__port); @@ -225,12 +227,12 @@ bitmap_port_kadt(struct ip_set *set, const struct sk_buff *skb, port -= map->first_port; - return adtfn(set, &port, map->timeout); + return adtfn(set, &port, opt_timeout(opt, map), opt->cmdflags); } static int bitmap_port_uadt(struct ip_set *set, struct nlattr *tb[], - enum ipset_adt adt, u32 *lineno, u32 flags) + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { struct bitmap_port *map = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; @@ -259,7 +261,7 @@ bitmap_port_uadt(struct ip_set *set, struct nlattr *tb[], if (adt == IPSET_TEST) { id = port - map->first_port; - return adtfn(set, &id, timeout); + return adtfn(set, &id, timeout, flags); } if (tb[IPSET_ATTR_PORT_TO]) { @@ -277,7 +279,7 @@ bitmap_port_uadt(struct ip_set *set, struct nlattr *tb[], for (; port <= port_to; port++) { id = port - map->first_port; - ret = adtfn(set, &id, timeout); + ret = adtfn(set, &id, timeout, flags); if (ret && !ip_set_eexist(ret, flags)) return ret; @@ -482,7 +484,8 @@ static struct ip_set_type bitmap_port_type = { .features = IPSET_TYPE_PORT, .dimension = IPSET_DIM_ONE, .family = AF_UNSPEC, - .revision = 0, + .revision_min = 0, + .revision_max = 0, .create = bitmap_port_create, .create_policy = { [IPSET_ATTR_PORT] = { .type = NLA_U16 }, diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 8041bef..d7e86ef 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -17,10 +17,10 @@ #include <linux/spinlock.h> #include <linux/netlink.h> #include <linux/rculist.h> -#include <linux/version.h> #include <net/netlink.h> #include <linux/netfilter.h> +#include <linux/netfilter/x_tables.h> #include <linux/netfilter/nfnetlink.h> #include <linux/netfilter/ipset/ip_set.h> @@ -70,7 +70,8 @@ find_set_type(const char *name, u8 family, u8 revision) list_for_each_entry_rcu(type, &ip_set_type_list, list) if (STREQ(type->name, name) && (type->family == family || type->family == AF_UNSPEC) && - type->revision == revision) + revision >= type->revision_min && + revision <= type->revision_max) return type; return NULL; } @@ -135,10 +136,10 @@ find_set_type_minmax(const char *name, u8 family, u8 *min, u8 *max) if (STREQ(type->name, name) && (type->family == family || type->family == AF_UNSPEC)) { found = true; - if (type->revision < *min) - *min = type->revision; - if (type->revision > *max) - *max = type->revision; + if (type->revision_min < *min) + *min = type->revision_min; + if (type->revision_max > *max) + *max = type->revision_max; } rcu_read_unlock(); if (found) @@ -159,25 +160,27 @@ ip_set_type_register(struct ip_set_type *type) int ret = 0; if (type->protocol != IPSET_PROTOCOL) { - pr_warning("ip_set type %s, family %s, revision %u uses " + pr_warning("ip_set type %s, family %s, revision %u:%u uses " "wrong protocol version %u (want %u)\n", type->name, family_name(type->family), - type->revision, type->protocol, IPSET_PROTOCOL); + type->revision_min, type->revision_max, + type->protocol, IPSET_PROTOCOL); return -EINVAL; } ip_set_type_lock(); - if (find_set_type(type->name, type->family, type->revision)) { + if (find_set_type(type->name, type->family, type->revision_min)) { /* Duplicate! */ - pr_warning("ip_set type %s, family %s, revision %u " + pr_warning("ip_set type %s, family %s with revision min %u " "already registered!\n", type->name, - family_name(type->family), type->revision); + family_name(type->family), type->revision_min); ret = -EINVAL; goto unlock; } list_add_rcu(&type->list, &ip_set_type_list); - pr_debug("type %s, family %s, revision %u registered.\n", - type->name, family_name(type->family), type->revision); + pr_debug("type %s, family %s, revision %u:%u registered.\n", + type->name, family_name(type->family), + type->revision_min, type->revision_max); unlock: ip_set_type_unlock(); return ret; @@ -189,15 +192,15 @@ void ip_set_type_unregister(struct ip_set_type *type) { ip_set_type_lock(); - if (!find_set_type(type->name, type->family, type->revision)) { - pr_warning("ip_set type %s, family %s, revision %u " + if (!find_set_type(type->name, type->family, type->revision_min)) { + pr_warning("ip_set type %s, family %s with revision min %u " "not registered\n", type->name, - family_name(type->family), type->revision); + family_name(type->family), type->revision_min); goto unlock; } list_del_rcu(&type->list); - pr_debug("type %s, family %s, revision %u unregistered.\n", - type->name, family_name(type->family), type->revision); + pr_debug("type %s, family %s with revision min %u unregistered.\n", + type->name, family_name(type->family), type->revision_min); unlock: ip_set_type_unlock(); @@ -325,7 +328,8 @@ __ip_set_put(ip_set_id_t index) int ip_set_test(ip_set_id_t index, const struct sk_buff *skb, - u8 family, u8 dim, u8 flags) + const struct xt_action_param *par, + const struct ip_set_adt_opt *opt) { struct ip_set *set = ip_set_list[index]; int ret = 0; @@ -333,19 +337,19 @@ ip_set_test(ip_set_id_t index, const struct sk_buff *skb, BUG_ON(set == NULL); pr_debug("set %s, index %u\n", set->name, index); - if (dim < set->type->dimension || - !(family == set->family || set->family == AF_UNSPEC)) + if (opt->dim < set->type->dimension || + !(opt->family == set->family || set->family == AF_UNSPEC)) return 0; read_lock_bh(&set->lock); - ret = set->variant->kadt(set, skb, IPSET_TEST, family, dim, flags); + ret = set->variant->kadt(set, skb, par, IPSET_TEST, opt); read_unlock_bh(&set->lock); if (ret == -EAGAIN) { /* Type requests element to be completed */ pr_debug("element must be competed, ADD is triggered\n"); write_lock_bh(&set->lock); - set->variant->kadt(set, skb, IPSET_ADD, family, dim, flags); + set->variant->kadt(set, skb, par, IPSET_ADD, opt); write_unlock_bh(&set->lock); ret = 1; } @@ -357,7 +361,8 @@ EXPORT_SYMBOL_GPL(ip_set_test); int ip_set_add(ip_set_id_t index, const struct sk_buff *skb, - u8 family, u8 dim, u8 flags) + const struct xt_action_param *par, + const struct ip_set_adt_opt *opt) { struct ip_set *set = ip_set_list[index]; int ret; @@ -365,12 +370,12 @@ ip_set_add(ip_set_id_t index, const struct sk_buff *skb, BUG_ON(set == NULL); pr_debug("set %s, index %u\n", set->name, index); - if (dim < set->type->dimension || - !(family == set->family || set->family == AF_UNSPEC)) + if (opt->dim < set->type->dimension || + !(opt->family == set->family || set->family == AF_UNSPEC)) return 0; write_lock_bh(&set->lock); - ret = set->variant->kadt(set, skb, IPSET_ADD, family, dim, flags); + ret = set->variant->kadt(set, skb, par, IPSET_ADD, opt); write_unlock_bh(&set->lock); return ret; @@ -379,7 +384,8 @@ EXPORT_SYMBOL_GPL(ip_set_add); int ip_set_del(ip_set_id_t index, const struct sk_buff *skb, - u8 family, u8 dim, u8 flags) + const struct xt_action_param *par, + const struct ip_set_adt_opt *opt) { struct ip_set *set = ip_set_list[index]; int ret = 0; @@ -387,12 +393,12 @@ ip_set_del(ip_set_id_t index, const struct sk_buff *skb, BUG_ON(set == NULL); pr_debug("set %s, index %u\n", set->name, index); - if (dim < set->type->dimension || - !(family == set->family || set->family == AF_UNSPEC)) + if (opt->dim < set->type->dimension || + !(opt->family == set->family || set->family == AF_UNSPEC)) return 0; write_lock_bh(&set->lock); - ret = set->variant->kadt(set, skb, IPSET_DEL, family, dim, flags); + ret = set->variant->kadt(set, skb, par, IPSET_DEL, opt); write_unlock_bh(&set->lock); return ret; @@ -656,6 +662,7 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, rwlock_init(&set->lock); strlcpy(set->name, name, IPSET_MAXNAMELEN); set->family = family; + set->revision = revision; /* * Next, check that we know the type, and take @@ -675,8 +682,8 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, if (attr[IPSET_ATTR_DATA] && nla_parse_nested(tb, IPSET_ATTR_CREATE_MAX, attr[IPSET_ATTR_DATA], set->type->create_policy)) { - ret = -IPSET_ERR_PROTOCOL; - goto put_out; + ret = -IPSET_ERR_PROTOCOL; + goto put_out; } ret = set->type->create(set, tb, flags); @@ -696,7 +703,8 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, (flags & IPSET_FLAG_EXIST) && STREQ(set->type->name, clash->type->name) && set->type->family == clash->type->family && - set->type->revision == clash->type->revision && + set->type->revision_min == clash->type->revision_min && + set->type->revision_max == clash->type->revision_max && set->variant->same_set(set, clash)) ret = 0; goto cleanup; @@ -767,7 +775,7 @@ ip_set_destroy(struct sock *ctnl, struct sk_buff *skb, if (!attr[IPSET_ATTR_SETNAME]) { for (i = 0; i < ip_set_max; i++) { if (ip_set_list[i] != NULL && ip_set_list[i]->ref) { - ret = IPSET_ERR_BUSY; + ret = -IPSET_ERR_BUSY; goto out; } } @@ -939,10 +947,13 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb, /* List/save set data */ -#define DUMP_INIT 0L -#define DUMP_ALL 1L -#define DUMP_ONE 2L -#define DUMP_LAST 3L +#define DUMP_INIT 0 +#define DUMP_ALL 1 +#define DUMP_ONE 2 +#define DUMP_LAST 3 + +#define DUMP_TYPE(arg) (((u32)(arg)) & 0x0000FFFF) +#define DUMP_FLAGS(arg) (((u32)(arg)) >> 16) static int ip_set_dump_done(struct netlink_callback *cb) @@ -973,6 +984,7 @@ dump_init(struct netlink_callback *cb) int min_len = NLMSG_SPACE(sizeof(struct nfgenmsg)); struct nlattr *cda[IPSET_ATTR_CMD_MAX+1]; struct nlattr *attr = (void *)nlh + min_len; + u32 dump_type; ip_set_id_t index; /* Second pass, so parser can't fail */ @@ -984,17 +996,22 @@ dump_init(struct netlink_callback *cb) * [..]: type specific */ - if (!cda[IPSET_ATTR_SETNAME]) { - cb->args[0] = DUMP_ALL; - return 0; - } + if (cda[IPSET_ATTR_SETNAME]) { + index = find_set_id(nla_data(cda[IPSET_ATTR_SETNAME])); + if (index == IPSET_INVALID_ID) + return -ENOENT; - index = find_set_id(nla_data(cda[IPSET_ATTR_SETNAME])); - if (index == IPSET_INVALID_ID) - return -ENOENT; + dump_type = DUMP_ONE; + cb->args[1] = index; + } else + dump_type = DUMP_ALL; + + if (cda[IPSET_ATTR_FLAGS]) { + u32 f = ip_set_get_h32(cda[IPSET_ATTR_FLAGS]); + dump_type |= (f << 16); + } + cb->args[0] = dump_type; - cb->args[0] = DUMP_ONE; - cb->args[1] = index; return 0; } @@ -1005,9 +1022,10 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb) struct ip_set *set = NULL; struct nlmsghdr *nlh = NULL; unsigned int flags = NETLINK_CB(cb->skb).pid ? NLM_F_MULTI : 0; + u32 dump_type, dump_flags; int ret = 0; - if (cb->args[0] == DUMP_INIT) { + if (!cb->args[0]) { ret = dump_init(cb); if (ret < 0) { nlh = nlmsg_hdr(cb->skb); @@ -1022,14 +1040,17 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb) if (cb->args[1] >= ip_set_max) goto out; - max = cb->args[0] == DUMP_ONE ? cb->args[1] + 1 : ip_set_max; + dump_type = DUMP_TYPE(cb->args[0]); + dump_flags = DUMP_FLAGS(cb->args[0]); + max = dump_type == DUMP_ONE ? cb->args[1] + 1 : ip_set_max; dump_last: - pr_debug("args[0]: %ld args[1]: %ld\n", cb->args[0], cb->args[1]); + pr_debug("args[0]: %u %u args[1]: %ld\n", + dump_type, dump_flags, cb->args[1]); for (; cb->args[1] < max; cb->args[1]++) { index = (ip_set_id_t) cb->args[1]; set = ip_set_list[index]; if (set == NULL) { - if (cb->args[0] == DUMP_ONE) { + if (dump_type == DUMP_ONE) { ret = -ENOENT; goto out; } @@ -1038,8 +1059,8 @@ dump_last: /* When dumping all sets, we must dump "sorted" * so that lists (unions of sets) are dumped last. */ - if (cb->args[0] != DUMP_ONE && - ((cb->args[0] == DUMP_ALL) == + if (dump_type != DUMP_ONE && + ((dump_type == DUMP_ALL) == !!(set->type->features & IPSET_DUMP_LAST))) continue; pr_debug("List set: %s\n", set->name); @@ -1057,6 +1078,8 @@ dump_last: } NLA_PUT_U8(skb, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL); NLA_PUT_STRING(skb, IPSET_ATTR_SETNAME, set->name); + if (dump_flags & IPSET_FLAG_LIST_SETNAME) + goto next_set; switch (cb->args[2]) { case 0: /* Core header data */ @@ -1065,28 +1088,27 @@ dump_last: NLA_PUT_U8(skb, IPSET_ATTR_FAMILY, set->family); NLA_PUT_U8(skb, IPSET_ATTR_REVISION, - set->type->revision); + set->revision); ret = set->variant->head(set, skb); if (ret < 0) goto release_refcount; + if (dump_flags & IPSET_FLAG_LIST_HEADER) + goto next_set; /* Fall through and add elements */ default: read_lock_bh(&set->lock); ret = set->variant->list(set, skb, cb); read_unlock_bh(&set->lock); - if (!cb->args[2]) { + if (!cb->args[2]) /* Set is done, proceed with next one */ - if (cb->args[0] == DUMP_ONE) - cb->args[1] = IPSET_INVALID_ID; - else - cb->args[1]++; - } + goto next_set; goto release_refcount; } } /* If we dump all sets, continue with dumping last ones */ - if (cb->args[0] == DUMP_ALL) { - cb->args[0] = DUMP_LAST; + if (dump_type == DUMP_ALL) { + dump_type = DUMP_LAST; + cb->args[0] = dump_type | (dump_flags << 16); cb->args[1] = 0; goto dump_last; } @@ -1094,6 +1116,11 @@ dump_last: nla_put_failure: ret = -EFAULT; +next_set: + if (dump_type == DUMP_ONE) + cb->args[1] = IPSET_INVALID_ID; + else + cb->args[1]++; release_refcount: /* If there was an error or set is done, release set */ if (ret || !cb->args[2]) { @@ -1120,7 +1147,7 @@ ip_set_dump(struct sock *ctnl, struct sk_buff *skb, return netlink_dump_start(ctnl, skb, nlh, ip_set_dump_start, - ip_set_dump_done); + ip_set_dump_done, 0); } /* Add, del and test */ @@ -1139,17 +1166,18 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 flags, bool use_lineno) { - int ret, retried = 0; + int ret; u32 lineno = 0; - bool eexist = flags & IPSET_FLAG_EXIST; + bool eexist = flags & IPSET_FLAG_EXIST, retried = false; do { write_lock_bh(&set->lock); - ret = set->variant->uadt(set, tb, adt, &lineno, flags); + ret = set->variant->uadt(set, tb, adt, &lineno, flags, retried); write_unlock_bh(&set->lock); + retried = true; } while (ret == -EAGAIN && set->variant->resize && - (ret = set->variant->resize(set, retried++)) == 0); + (ret = set->variant->resize(set, retried)) == 0); if (!ret || (ret == -IPSET_ERR_EXIST && eexist)) return 0; @@ -1322,7 +1350,7 @@ ip_set_utest(struct sock *ctnl, struct sk_buff *skb, return -IPSET_ERR_PROTOCOL; read_lock_bh(&set->lock); - ret = set->variant->uadt(set, tb, IPSET_TEST, NULL, 0); + ret = set->variant->uadt(set, tb, IPSET_TEST, NULL, 0, 0); read_unlock_bh(&set->lock); /* Userspace can't trigger element to be re-added */ if (ret == -EAGAIN) @@ -1365,7 +1393,7 @@ ip_set_header(struct sock *ctnl, struct sk_buff *skb, NLA_PUT_STRING(skb2, IPSET_ATTR_SETNAME, set->name); NLA_PUT_STRING(skb2, IPSET_ATTR_TYPENAME, set->type->name); NLA_PUT_U8(skb2, IPSET_ATTR_FAMILY, set->family); - NLA_PUT_U8(skb2, IPSET_ATTR_REVISION, set->type->revision); + NLA_PUT_U8(skb2, IPSET_ATTR_REVISION, set->revision); nlmsg_end(skb2, nlh2); ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c index 43bcce2..fa80bb9 100644 --- a/net/netfilter/ipset/ip_set_hash_ip.c +++ b/net/netfilter/ipset/ip_set_hash_ip.c @@ -108,25 +108,32 @@ nla_put_failure: #define HOST_MASK 32 #include <linux/netfilter/ipset/ip_set_ahash.h> +static inline void +hash_ip4_data_next(struct ip_set_hash *h, const struct hash_ip4_elem *d) +{ + h->next.ip = ntohl(d->ip); +} + static int hash_ip4_kadt(struct ip_set *set, const struct sk_buff *skb, - enum ipset_adt adt, u8 pf, u8 dim, u8 flags) + const struct xt_action_param *par, + enum ipset_adt adt, const struct ip_set_adt_opt *opt) { const struct ip_set_hash *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; __be32 ip; - ip4addrptr(skb, flags & IPSET_DIM_ONE_SRC, &ip); + ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &ip); ip &= ip_set_netmask(h->netmask); if (ip == 0) return -EINVAL; - return adtfn(set, &ip, h->timeout); + return adtfn(set, &ip, opt_timeout(opt, h), opt->cmdflags); } static int hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[], - enum ipset_adt adt, u32 *lineno, u32 flags) + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { const struct ip_set_hash *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; @@ -157,7 +164,7 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[], nip = htonl(ip); if (nip == 0) return -IPSET_ERR_HASH_ELEM; - return adtfn(set, &nip, timeout); + return adtfn(set, &nip, timeout, flags); } if (tb[IPSET_ATTR_IP_TO]) { @@ -171,18 +178,19 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[], if (cidr > 32) return -IPSET_ERR_INVALID_CIDR; - ip &= ip_set_hostmask(cidr); - ip_to = ip | ~ip_set_hostmask(cidr); + ip_set_mask_from_to(ip, ip_to, cidr); } else ip_to = ip; hosts = h->netmask == 32 ? 1 : 2 << (32 - h->netmask - 1); + if (retried) + ip = h->next.ip; for (; !before(ip_to, ip); ip += hosts) { nip = htonl(ip); if (nip == 0) return -IPSET_ERR_HASH_ELEM; - ret = adtfn(set, &nip, timeout); + ret = adtfn(set, &nip, timeout, flags); if (ret && !ip_set_eexist(ret, flags)) return ret; @@ -281,20 +289,26 @@ nla_put_failure: #define HOST_MASK 128 #include <linux/netfilter/ipset/ip_set_ahash.h> +static inline void +hash_ip6_data_next(struct ip_set_hash *h, const struct hash_ip6_elem *d) +{ +} + static int hash_ip6_kadt(struct ip_set *set, const struct sk_buff *skb, - enum ipset_adt adt, u8 pf, u8 dim, u8 flags) + const struct xt_action_param *par, + enum ipset_adt adt, const struct ip_set_adt_opt *opt) { const struct ip_set_hash *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; union nf_inet_addr ip; - ip6addrptr(skb, flags & IPSET_DIM_ONE_SRC, &ip.in6); + ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &ip.in6); ip6_netmask(&ip, h->netmask); if (ipv6_addr_any(&ip.in6)) return -EINVAL; - return adtfn(set, &ip, h->timeout); + return adtfn(set, &ip, opt_timeout(opt, h), opt->cmdflags); } static const struct nla_policy hash_ip6_adt_policy[IPSET_ATTR_ADT_MAX + 1] = { @@ -305,7 +319,7 @@ static const struct nla_policy hash_ip6_adt_policy[IPSET_ATTR_ADT_MAX + 1] = { static int hash_ip6_uadt(struct ip_set *set, struct nlattr *tb[], - enum ipset_adt adt, u32 *lineno, u32 flags) + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { const struct ip_set_hash *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; @@ -336,7 +350,7 @@ hash_ip6_uadt(struct ip_set *set, struct nlattr *tb[], timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); } - ret = adtfn(set, &ip, timeout); + ret = adtfn(set, &ip, timeout, flags); return ip_set_eexist(ret, flags) ? 0 : ret; } @@ -428,7 +442,8 @@ static struct ip_set_type hash_ip_type __read_mostly = { .features = IPSET_TYPE_IP, .dimension = IPSET_DIM_ONE, .family = AF_UNSPEC, - .revision = 0, + .revision_min = 0, + .revision_max = 0, .create = hash_ip_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c index 14281b6..bbf51b6 100644 --- a/net/netfilter/ipset/ip_set_hash_ipport.c +++ b/net/netfilter/ipset/ip_set_hash_ipport.c @@ -124,31 +124,40 @@ nla_put_failure: #define HOST_MASK 32 #include <linux/netfilter/ipset/ip_set_ahash.h> +static inline void +hash_ipport4_data_next(struct ip_set_hash *h, + const struct hash_ipport4_elem *d) +{ + h->next.ip = ntohl(d->ip); + h->next.port = ntohs(d->port); +} + static int hash_ipport4_kadt(struct ip_set *set, const struct sk_buff *skb, - enum ipset_adt adt, u8 pf, u8 dim, u8 flags) + const struct xt_action_param *par, + enum ipset_adt adt, const struct ip_set_adt_opt *opt) { const struct ip_set_hash *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipport4_elem data = { }; - if (!ip_set_get_ip4_port(skb, flags & IPSET_DIM_TWO_SRC, + if (!ip_set_get_ip4_port(skb, opt->flags & IPSET_DIM_TWO_SRC, &data.port, &data.proto)) return -EINVAL; - ip4addrptr(skb, flags & IPSET_DIM_ONE_SRC, &data.ip); + ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &data.ip); - return adtfn(set, &data, h->timeout); + return adtfn(set, &data, opt_timeout(opt, h), opt->cmdflags); } static int hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], - enum ipset_adt adt, u32 *lineno, u32 flags) + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { const struct ip_set_hash *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipport4_elem data = { }; - u32 ip, ip_to, p, port, port_to; + u32 ip, ip_to, p = 0, port, port_to; u32 timeout = h->timeout; bool with_ports = false; int ret; @@ -192,7 +201,7 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], if (adt == IPSET_TEST || !(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR] || tb[IPSET_ATTR_PORT_TO])) { - ret = adtfn(set, &data, timeout); + ret = adtfn(set, &data, timeout, flags); return ip_set_eexist(ret, flags) ? 0 : ret; } @@ -208,8 +217,7 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], if (cidr > 32) return -IPSET_ERR_INVALID_CIDR; - ip &= ip_set_hostmask(cidr); - ip_to = ip | ~ip_set_hostmask(cidr); + ip_set_mask_from_to(ip, ip_to, cidr); } else ip_to = ip; @@ -220,17 +228,21 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], swap(port, port_to); } - for (; !before(ip_to, ip); ip++) - for (p = port; p <= port_to; p++) { + if (retried) + ip = h->next.ip; + for (; !before(ip_to, ip); ip++) { + p = retried && ip == h->next.ip ? h->next.port : port; + for (; p <= port_to; p++) { data.ip = htonl(ip); data.port = htons(p); - ret = adtfn(set, &data, timeout); + ret = adtfn(set, &data, timeout, flags); if (ret && !ip_set_eexist(ret, flags)) return ret; else ret = 0; } + } return ret; } @@ -328,26 +340,34 @@ nla_put_failure: #define HOST_MASK 128 #include <linux/netfilter/ipset/ip_set_ahash.h> +static inline void +hash_ipport6_data_next(struct ip_set_hash *h, + const struct hash_ipport6_elem *d) +{ + h->next.port = ntohs(d->port); +} + static int hash_ipport6_kadt(struct ip_set *set, const struct sk_buff *skb, - enum ipset_adt adt, u8 pf, u8 dim, u8 flags) + const struct xt_action_param *par, + enum ipset_adt adt, const struct ip_set_adt_opt *opt) { const struct ip_set_hash *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipport6_elem data = { }; - if (!ip_set_get_ip6_port(skb, flags & IPSET_DIM_TWO_SRC, + if (!ip_set_get_ip6_port(skb, opt->flags & IPSET_DIM_TWO_SRC, &data.port, &data.proto)) return -EINVAL; - ip6addrptr(skb, flags & IPSET_DIM_ONE_SRC, &data.ip.in6); + ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &data.ip.in6); - return adtfn(set, &data, h->timeout); + return adtfn(set, &data, opt_timeout(opt, h), opt->cmdflags); } static int hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[], - enum ipset_adt adt, u32 *lineno, u32 flags) + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { const struct ip_set_hash *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; @@ -396,7 +416,7 @@ hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[], } if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) { - ret = adtfn(set, &data, timeout); + ret = adtfn(set, &data, timeout, flags); return ip_set_eexist(ret, flags) ? 0 : ret; } @@ -405,9 +425,11 @@ hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[], if (port > port_to) swap(port, port_to); + if (retried) + port = h->next.port; for (; port <= port_to; port++) { data.port = htons(port); - ret = adtfn(set, &data, timeout); + ret = adtfn(set, &data, timeout, flags); if (ret && !ip_set_eexist(ret, flags)) return ret; @@ -491,7 +513,8 @@ static struct ip_set_type hash_ipport_type __read_mostly = { .features = IPSET_TYPE_IP | IPSET_TYPE_PORT, .dimension = IPSET_DIM_TWO, .family = AF_UNSPEC, - .revision = 1, + .revision_min = 0, + .revision_max = 1, /* SCTP and UDPLITE support added */ .create = hash_ipport_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c index 401c8a2..96525f5 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportip.c +++ b/net/netfilter/ipset/ip_set_hash_ipportip.c @@ -127,32 +127,41 @@ nla_put_failure: #define HOST_MASK 32 #include <linux/netfilter/ipset/ip_set_ahash.h> +static inline void +hash_ipportip4_data_next(struct ip_set_hash *h, + const struct hash_ipportip4_elem *d) +{ + h->next.ip = ntohl(d->ip); + h->next.port = ntohs(d->port); +} + static int hash_ipportip4_kadt(struct ip_set *set, const struct sk_buff *skb, - enum ipset_adt adt, u8 pf, u8 dim, u8 flags) + const struct xt_action_param *par, + enum ipset_adt adt, const struct ip_set_adt_opt *opt) { const struct ip_set_hash *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportip4_elem data = { }; - if (!ip_set_get_ip4_port(skb, flags & IPSET_DIM_TWO_SRC, + if (!ip_set_get_ip4_port(skb, opt->flags & IPSET_DIM_TWO_SRC, &data.port, &data.proto)) return -EINVAL; - ip4addrptr(skb, flags & IPSET_DIM_ONE_SRC, &data.ip); - ip4addrptr(skb, flags & IPSET_DIM_THREE_SRC, &data.ip2); + ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &data.ip); + ip4addrptr(skb, opt->flags & IPSET_DIM_THREE_SRC, &data.ip2); - return adtfn(set, &data, h->timeout); + return adtfn(set, &data, opt_timeout(opt, h), opt->cmdflags); } static int hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], - enum ipset_adt adt, u32 *lineno, u32 flags) + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { const struct ip_set_hash *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportip4_elem data = { }; - u32 ip, ip_to, p, port, port_to; + u32 ip, ip_to, p = 0, port, port_to; u32 timeout = h->timeout; bool with_ports = false; int ret; @@ -200,7 +209,7 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], if (adt == IPSET_TEST || !(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR] || tb[IPSET_ATTR_PORT_TO])) { - ret = adtfn(set, &data, timeout); + ret = adtfn(set, &data, timeout, flags); return ip_set_eexist(ret, flags) ? 0 : ret; } @@ -216,8 +225,7 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], if (cidr > 32) return -IPSET_ERR_INVALID_CIDR; - ip &= ip_set_hostmask(cidr); - ip_to = ip | ~ip_set_hostmask(cidr); + ip_set_mask_from_to(ip, ip_to, cidr); } else ip_to = ip; @@ -228,17 +236,21 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], swap(port, port_to); } - for (; !before(ip_to, ip); ip++) - for (p = port; p <= port_to; p++) { + if (retried) + ip = h->next.ip; + for (; !before(ip_to, ip); ip++) { + p = retried && ip == h->next.ip ? h->next.port : port; + for (; p <= port_to; p++) { data.ip = htonl(ip); data.port = htons(p); - ret = adtfn(set, &data, timeout); + ret = adtfn(set, &data, timeout, flags); if (ret && !ip_set_eexist(ret, flags)) return ret; else ret = 0; } + } return ret; } @@ -341,27 +353,35 @@ nla_put_failure: #define HOST_MASK 128 #include <linux/netfilter/ipset/ip_set_ahash.h> +static inline void +hash_ipportip6_data_next(struct ip_set_hash *h, + const struct hash_ipportip6_elem *d) +{ + h->next.port = ntohs(d->port); +} + static int hash_ipportip6_kadt(struct ip_set *set, const struct sk_buff *skb, - enum ipset_adt adt, u8 pf, u8 dim, u8 flags) + const struct xt_action_param *par, + enum ipset_adt adt, const struct ip_set_adt_opt *opt) { const struct ip_set_hash *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportip6_elem data = { }; - if (!ip_set_get_ip6_port(skb, flags & IPSET_DIM_TWO_SRC, + if (!ip_set_get_ip6_port(skb, opt->flags & IPSET_DIM_TWO_SRC, &data.port, &data.proto)) return -EINVAL; - ip6addrptr(skb, flags & IPSET_DIM_ONE_SRC, &data.ip.in6); - ip6addrptr(skb, flags & IPSET_DIM_THREE_SRC, &data.ip2.in6); + ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &data.ip.in6); + ip6addrptr(skb, opt->flags & IPSET_DIM_THREE_SRC, &data.ip2.in6); - return adtfn(set, &data, h->timeout); + return adtfn(set, &data, opt_timeout(opt, h), opt->cmdflags); } static int hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[], - enum ipset_adt adt, u32 *lineno, u32 flags) + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { const struct ip_set_hash *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; @@ -414,7 +434,7 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[], } if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) { - ret = adtfn(set, &data, timeout); + ret = adtfn(set, &data, timeout, flags); return ip_set_eexist(ret, flags) ? 0 : ret; } @@ -423,9 +443,11 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[], if (port > port_to) swap(port, port_to); + if (retried) + port = h->next.port; for (; port <= port_to; port++) { data.port = htons(port); - ret = adtfn(set, &data, timeout); + ret = adtfn(set, &data, timeout, flags); if (ret && !ip_set_eexist(ret, flags)) return ret; @@ -509,7 +531,8 @@ static struct ip_set_type hash_ipportip_type __read_mostly = { .features = IPSET_TYPE_IP | IPSET_TYPE_PORT | IPSET_TYPE_IP2, .dimension = IPSET_DIM_THREE, .family = AF_UNSPEC, - .revision = 1, + .revision_min = 0, + .revision_max = 1, /* SCTP and UDPLITE support added */ .create = hash_ipportip_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index 4743e54..d2d6ab8 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -140,39 +140,51 @@ nla_put_failure: #define HOST_MASK 32 #include <linux/netfilter/ipset/ip_set_ahash.h> +static inline void +hash_ipportnet4_data_next(struct ip_set_hash *h, + const struct hash_ipportnet4_elem *d) +{ + h->next.ip = ntohl(d->ip); + h->next.port = ntohs(d->port); + h->next.ip2 = ntohl(d->ip2); +} + static int hash_ipportnet4_kadt(struct ip_set *set, const struct sk_buff *skb, - enum ipset_adt adt, u8 pf, u8 dim, u8 flags) + const struct xt_action_param *par, + enum ipset_adt adt, const struct ip_set_adt_opt *opt) { const struct ip_set_hash *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; - struct hash_ipportnet4_elem data = - { .cidr = h->nets[0].cidr || HOST_MASK }; + struct hash_ipportnet4_elem data = { + .cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK + }; if (data.cidr == 0) return -EINVAL; if (adt == IPSET_TEST) data.cidr = HOST_MASK; - if (!ip_set_get_ip4_port(skb, flags & IPSET_DIM_TWO_SRC, + if (!ip_set_get_ip4_port(skb, opt->flags & IPSET_DIM_TWO_SRC, &data.port, &data.proto)) return -EINVAL; - ip4addrptr(skb, flags & IPSET_DIM_ONE_SRC, &data.ip); - ip4addrptr(skb, flags & IPSET_DIM_THREE_SRC, &data.ip2); + ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &data.ip); + ip4addrptr(skb, opt->flags & IPSET_DIM_THREE_SRC, &data.ip2); data.ip2 &= ip_set_netmask(data.cidr); - return adtfn(set, &data, h->timeout); + return adtfn(set, &data, opt_timeout(opt, h), opt->cmdflags); } static int hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], - enum ipset_adt adt, u32 *lineno, u32 flags) + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { const struct ip_set_hash *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportnet4_elem data = { .cidr = HOST_MASK }; - u32 ip, ip_to, p, port, port_to; + u32 ip, ip_to, p = 0, port, port_to; + u32 ip2_from = 0, ip2_to, ip2_last, ip2; u32 timeout = h->timeout; bool with_ports = false; int ret; @@ -186,21 +198,19 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_LINENO]) *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); - ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &data.ip); + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip); if (ret) return ret; - ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP2], &data.ip2); + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2], &ip2_from); if (ret) return ret; - if (tb[IPSET_ATTR_CIDR2]) + if (tb[IPSET_ATTR_CIDR2]) { data.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR2]); - - if (!data.cidr) - return -IPSET_ERR_INVALID_CIDR; - - data.ip2 &= ip_set_netmask(data.cidr); + if (!data.cidr) + return -IPSET_ERR_INVALID_CIDR; + } if (tb[IPSET_ATTR_PORT]) data.port = nla_get_be16(tb[IPSET_ATTR_PORT]); @@ -225,14 +235,16 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); } + with_ports = with_ports && tb[IPSET_ATTR_PORT_TO]; if (adt == IPSET_TEST || - !(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR] || - tb[IPSET_ATTR_PORT_TO])) { - ret = adtfn(set, &data, timeout); + !(tb[IPSET_ATTR_CIDR] || tb[IPSET_ATTR_IP_TO] || with_ports || + tb[IPSET_ATTR_IP2_TO])) { + data.ip = htonl(ip); + data.ip2 = htonl(ip2_from & ip_set_hostmask(data.cidr)); + ret = adtfn(set, &data, timeout, flags); return ip_set_eexist(ret, flags) ? 0 : ret; } - ip = ntohl(data.ip); if (tb[IPSET_ATTR_IP_TO]) { ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to); if (ret) @@ -244,29 +256,50 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], if (cidr > 32) return -IPSET_ERR_INVALID_CIDR; - ip &= ip_set_hostmask(cidr); - ip_to = ip | ~ip_set_hostmask(cidr); - } else - ip_to = ip; + ip_set_mask_from_to(ip, ip_to, cidr); + } port_to = port = ntohs(data.port); - if (with_ports && tb[IPSET_ATTR_PORT_TO]) { + if (tb[IPSET_ATTR_PORT_TO]) { port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]); if (port > port_to) swap(port, port_to); } + if (tb[IPSET_ATTR_IP2_TO]) { + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2_TO], &ip2_to); + if (ret) + return ret; + if (ip2_from > ip2_to) + swap(ip2_from, ip2_to); + if (ip2_from + UINT_MAX == ip2_to) + return -IPSET_ERR_HASH_RANGE; + } else { + ip_set_mask_from_to(ip2_from, ip2_to, data.cidr); + } - for (; !before(ip_to, ip); ip++) - for (p = port; p <= port_to; p++) { - data.ip = htonl(ip); + if (retried) + ip = h->next.ip; + for (; !before(ip_to, ip); ip++) { + data.ip = htonl(ip); + p = retried && ip == h->next.ip ? h->next.port : port; + for (; p <= port_to; p++) { data.port = htons(p); - ret = adtfn(set, &data, timeout); - - if (ret && !ip_set_eexist(ret, flags)) - return ret; - else - ret = 0; + ip2 = retried && ip == h->next.ip && p == h->next.port + ? h->next.ip2 : ip2_from; + while (!after(ip2, ip2_to)) { + data.ip2 = htonl(ip2); + ip2_last = ip_set_range_to_cidr(ip2, ip2_to, + &data.cidr); + ret = adtfn(set, &data, timeout, flags); + + if (ret && !ip_set_eexist(ret, flags)) + return ret; + else + ret = 0; + ip2 = ip2_last + 1; + } } + } return ret; } @@ -388,34 +421,43 @@ nla_put_failure: #define HOST_MASK 128 #include <linux/netfilter/ipset/ip_set_ahash.h> +static inline void +hash_ipportnet6_data_next(struct ip_set_hash *h, + const struct hash_ipportnet6_elem *d) +{ + h->next.port = ntohs(d->port); +} + static int hash_ipportnet6_kadt(struct ip_set *set, const struct sk_buff *skb, - enum ipset_adt adt, u8 pf, u8 dim, u8 flags) + const struct xt_action_param *par, + enum ipset_adt adt, const struct ip_set_adt_opt *opt) { const struct ip_set_hash *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; - struct hash_ipportnet6_elem data = - { .cidr = h->nets[0].cidr || HOST_MASK }; + struct hash_ipportnet6_elem data = { + .cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK + }; if (data.cidr == 0) return -EINVAL; if (adt == IPSET_TEST) data.cidr = HOST_MASK; - if (!ip_set_get_ip6_port(skb, flags & IPSET_DIM_TWO_SRC, + if (!ip_set_get_ip6_port(skb, opt->flags & IPSET_DIM_TWO_SRC, &data.port, &data.proto)) return -EINVAL; - ip6addrptr(skb, flags & IPSET_DIM_ONE_SRC, &data.ip.in6); - ip6addrptr(skb, flags & IPSET_DIM_THREE_SRC, &data.ip2.in6); + ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &data.ip.in6); + ip6addrptr(skb, opt->flags & IPSET_DIM_THREE_SRC, &data.ip2.in6); ip6_netmask(&data.ip2, data.cidr); - return adtfn(set, &data, h->timeout); + return adtfn(set, &data, opt_timeout(opt, h), opt->cmdflags); } static int hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[], - enum ipset_adt adt, u32 *lineno, u32 flags) + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { const struct ip_set_hash *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; @@ -432,6 +474,8 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[], tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR])) return -IPSET_ERR_PROTOCOL; + if (unlikely(tb[IPSET_ATTR_IP_TO])) + return -IPSET_ERR_HASH_RANGE_UNSUPPORTED; if (tb[IPSET_ATTR_LINENO]) *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); @@ -476,7 +520,7 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[], } if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) { - ret = adtfn(set, &data, timeout); + ret = adtfn(set, &data, timeout, flags); return ip_set_eexist(ret, flags) ? 0 : ret; } @@ -485,9 +529,11 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[], if (port > port_to) swap(port, port_to); + if (retried) + port = h->next.port; for (; port <= port_to; port++) { data.port = htons(port); - ret = adtfn(set, &data, timeout); + ret = adtfn(set, &data, timeout, flags); if (ret && !ip_set_eexist(ret, flags)) return ret; @@ -574,7 +620,9 @@ static struct ip_set_type hash_ipportnet_type __read_mostly = { .features = IPSET_TYPE_IP | IPSET_TYPE_PORT | IPSET_TYPE_IP2, .dimension = IPSET_DIM_THREE, .family = AF_UNSPEC, - .revision = 1, + .revision_min = 0, + /* 1 SCTP and UDPLITE support added */ + .revision_max = 2, /* Range as input support for IPv4 added */ .create = hash_ipportnet_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, @@ -587,6 +635,7 @@ static struct ip_set_type hash_ipportnet_type __read_mostly = { [IPSET_ATTR_IP] = { .type = NLA_NESTED }, [IPSET_ATTR_IP_TO] = { .type = NLA_NESTED }, [IPSET_ATTR_IP2] = { .type = NLA_NESTED }, + [IPSET_ATTR_IP2_TO] = { .type = NLA_NESTED }, [IPSET_ATTR_PORT] = { .type = NLA_U16 }, [IPSET_ATTR_PORT_TO] = { .type = NLA_U16 }, [IPSET_ATTR_CIDR] = { .type = NLA_U8 }, diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c index c4db202..2d4b1f4 100644 --- a/net/netfilter/ipset/ip_set_hash_net.c +++ b/net/netfilter/ipset/ip_set_hash_net.c @@ -125,33 +125,44 @@ nla_put_failure: #define HOST_MASK 32 #include <linux/netfilter/ipset/ip_set_ahash.h> +static inline void +hash_net4_data_next(struct ip_set_hash *h, + const struct hash_net4_elem *d) +{ + h->next.ip = ntohl(d->ip); +} + static int hash_net4_kadt(struct ip_set *set, const struct sk_buff *skb, - enum ipset_adt adt, u8 pf, u8 dim, u8 flags) + const struct xt_action_param *par, + enum ipset_adt adt, const struct ip_set_adt_opt *opt) { const struct ip_set_hash *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; - struct hash_net4_elem data = { .cidr = h->nets[0].cidr || HOST_MASK }; + struct hash_net4_elem data = { + .cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK + }; if (data.cidr == 0) return -EINVAL; if (adt == IPSET_TEST) data.cidr = HOST_MASK; - ip4addrptr(skb, flags & IPSET_DIM_ONE_SRC, &data.ip); + ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &data.ip); data.ip &= ip_set_netmask(data.cidr); - return adtfn(set, &data, h->timeout); + return adtfn(set, &data, opt_timeout(opt, h), opt->cmdflags); } static int hash_net4_uadt(struct ip_set *set, struct nlattr *tb[], - enum ipset_adt adt, u32 *lineno, u32 flags) + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { const struct ip_set_hash *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_net4_elem data = { .cidr = HOST_MASK }; u32 timeout = h->timeout; + u32 ip = 0, ip_to, last; int ret; if (unlikely(!tb[IPSET_ATTR_IP] || @@ -161,17 +172,15 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_LINENO]) *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); - ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &data.ip); + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip); if (ret) return ret; - if (tb[IPSET_ATTR_CIDR]) + if (tb[IPSET_ATTR_CIDR]) { data.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); - - if (!data.cidr) - return -IPSET_ERR_INVALID_CIDR; - - data.ip &= ip_set_netmask(data.cidr); + if (!data.cidr) + return -IPSET_ERR_INVALID_CIDR; + } if (tb[IPSET_ATTR_TIMEOUT]) { if (!with_timeout(h->timeout)) @@ -179,9 +188,35 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[], timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); } - ret = adtfn(set, &data, timeout); + if (adt == IPSET_TEST || !tb[IPSET_ATTR_IP_TO]) { + data.ip = htonl(ip & ip_set_hostmask(data.cidr)); + ret = adtfn(set, &data, timeout, flags); + return ip_set_eexist(ret, flags) ? 0 : ret; + } - return ip_set_eexist(ret, flags) ? 0 : ret; + ip_to = ip; + if (tb[IPSET_ATTR_IP_TO]) { + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to); + if (ret) + return ret; + if (ip_to < ip) + swap(ip, ip_to); + if (ip + UINT_MAX == ip_to) + return -IPSET_ERR_HASH_RANGE; + } + if (retried) + ip = h->next.ip; + while (!after(ip, ip_to)) { + data.ip = htonl(ip); + last = ip_set_range_to_cidr(ip, ip_to, &data.cidr); + ret = adtfn(set, &data, timeout, flags); + if (ret && !ip_set_eexist(ret, flags)) + return ret; + else + ret = 0; + ip = last + 1; + } + return ret; } static bool @@ -290,28 +325,37 @@ nla_put_failure: #define HOST_MASK 128 #include <linux/netfilter/ipset/ip_set_ahash.h> +static inline void +hash_net6_data_next(struct ip_set_hash *h, + const struct hash_net6_elem *d) +{ +} + static int hash_net6_kadt(struct ip_set *set, const struct sk_buff *skb, - enum ipset_adt adt, u8 pf, u8 dim, u8 flags) + const struct xt_action_param *par, + enum ipset_adt adt, const struct ip_set_adt_opt *opt) { const struct ip_set_hash *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; - struct hash_net6_elem data = { .cidr = h->nets[0].cidr || HOST_MASK }; + struct hash_net6_elem data = { + .cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK + }; if (data.cidr == 0) return -EINVAL; if (adt == IPSET_TEST) data.cidr = HOST_MASK; - ip6addrptr(skb, flags & IPSET_DIM_ONE_SRC, &data.ip.in6); + ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &data.ip.in6); ip6_netmask(&data.ip, data.cidr); - return adtfn(set, &data, h->timeout); + return adtfn(set, &data, opt_timeout(opt, h), opt->cmdflags); } static int hash_net6_uadt(struct ip_set *set, struct nlattr *tb[], - enum ipset_adt adt, u32 *lineno, u32 flags) + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { const struct ip_set_hash *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; @@ -322,6 +366,8 @@ hash_net6_uadt(struct ip_set *set, struct nlattr *tb[], if (unlikely(!tb[IPSET_ATTR_IP] || !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT))) return -IPSET_ERR_PROTOCOL; + if (unlikely(tb[IPSET_ATTR_IP_TO])) + return -IPSET_ERR_HASH_RANGE_UNSUPPORTED; if (tb[IPSET_ATTR_LINENO]) *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); @@ -344,7 +390,7 @@ hash_net6_uadt(struct ip_set *set, struct nlattr *tb[], timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); } - ret = adtfn(set, &data, timeout); + ret = adtfn(set, &data, timeout, flags); return ip_set_eexist(ret, flags) ? 0 : ret; } @@ -425,7 +471,8 @@ static struct ip_set_type hash_net_type __read_mostly = { .features = IPSET_TYPE_IP, .dimension = IPSET_DIM_ONE, .family = AF_UNSPEC, - .revision = 0, + .revision_min = 0, + .revision_max = 1, /* Range as input support for IPv4 added */ .create = hash_net_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, @@ -436,6 +483,7 @@ static struct ip_set_type hash_net_type __read_mostly = { }, .adt_policy = { [IPSET_ATTR_IP] = { .type = NLA_NESTED }, + [IPSET_ATTR_IP_TO] = { .type = NLA_NESTED }, [IPSET_ATTR_CIDR] = { .type = NLA_U8 }, [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, }, diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c new file mode 100644 index 0000000..3d6c53b --- /dev/null +++ b/net/netfilter/ipset/ip_set_hash_netiface.c @@ -0,0 +1,762 @@ +/* Copyright (C) 2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* Kernel module implementing an IP set type: the hash:net,iface type */ + +#include <linux/jhash.h> +#include <linux/module.h> +#include <linux/ip.h> +#include <linux/skbuff.h> +#include <linux/errno.h> +#include <linux/random.h> +#include <linux/rbtree.h> +#include <net/ip.h> +#include <net/ipv6.h> +#include <net/netlink.h> + +#include <linux/netfilter.h> +#include <linux/netfilter/ipset/pfxlen.h> +#include <linux/netfilter/ipset/ip_set.h> +#include <linux/netfilter/ipset/ip_set_timeout.h> +#include <linux/netfilter/ipset/ip_set_hash.h> + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); +MODULE_DESCRIPTION("hash:net,iface type of IP sets"); +MODULE_ALIAS("ip_set_hash:net,iface"); + +/* Interface name rbtree */ + +struct iface_node { + struct rb_node node; + char iface[IFNAMSIZ]; +}; + +#define iface_data(n) (rb_entry(n, struct iface_node, node)->iface) + +static inline long +ifname_compare(const char *_a, const char *_b) +{ + const long *a = (const long *)_a; + const long *b = (const long *)_b; + + BUILD_BUG_ON(IFNAMSIZ > 4 * sizeof(unsigned long)); + if (a[0] != b[0]) + return a[0] - b[0]; + if (IFNAMSIZ > sizeof(long)) { + if (a[1] != b[1]) + return a[1] - b[1]; + } + if (IFNAMSIZ > 2 * sizeof(long)) { + if (a[2] != b[2]) + return a[2] - b[2]; + } + if (IFNAMSIZ > 3 * sizeof(long)) { + if (a[3] != b[3]) + return a[3] - b[3]; + } + return 0; +} + +static void +rbtree_destroy(struct rb_root *root) +{ + struct rb_node *p, *n = root->rb_node; + struct iface_node *node; + + /* Non-recursive destroy, like in ext3 */ + while (n) { + if (n->rb_left) { + n = n->rb_left; + continue; + } + if (n->rb_right) { + n = n->rb_right; + continue; + } + p = rb_parent(n); + node = rb_entry(n, struct iface_node, node); + if (!p) + *root = RB_ROOT; + else if (p->rb_left == n) + p->rb_left = NULL; + else if (p->rb_right == n) + p->rb_right = NULL; + + kfree(node); + n = p; + } +} + +static int +iface_test(struct rb_root *root, const char **iface) +{ + struct rb_node *n = root->rb_node; + + while (n) { + const char *d = iface_data(n); + int res = ifname_compare(*iface, d); + + if (res < 0) + n = n->rb_left; + else if (res > 0) + n = n->rb_right; + else { + *iface = d; + return 1; + } + } + return 0; +} + +static int +iface_add(struct rb_root *root, const char **iface) +{ + struct rb_node **n = &(root->rb_node), *p = NULL; + struct iface_node *d; + + while (*n) { + char *ifname = iface_data(*n); + int res = ifname_compare(*iface, ifname); + + p = *n; + if (res < 0) + n = &((*n)->rb_left); + else if (res > 0) + n = &((*n)->rb_right); + else { + *iface = ifname; + return 0; + } + } + + d = kzalloc(sizeof(*d), GFP_ATOMIC); + if (!d) + return -ENOMEM; + strcpy(d->iface, *iface); + + rb_link_node(&d->node, p, n); + rb_insert_color(&d->node, root); + + *iface = d->iface; + return 0; +} + +/* Type specific function prefix */ +#define TYPE hash_netiface + +static bool +hash_netiface_same_set(const struct ip_set *a, const struct ip_set *b); + +#define hash_netiface4_same_set hash_netiface_same_set +#define hash_netiface6_same_set hash_netiface_same_set + +#define STREQ(a, b) (strcmp(a, b) == 0) + +/* The type variant functions: IPv4 */ + +/* Member elements without timeout */ +struct hash_netiface4_elem { + __be32 ip; + const char *iface; + u8 physdev; + u8 cidr; + u16 padding; +}; + +/* Member elements with timeout support */ +struct hash_netiface4_telem { + __be32 ip; + const char *iface; + u8 physdev; + u8 cidr; + u16 padding; + unsigned long timeout; +}; + +static inline bool +hash_netiface4_data_equal(const struct hash_netiface4_elem *ip1, + const struct hash_netiface4_elem *ip2) +{ + return ip1->ip == ip2->ip && + ip1->cidr == ip2->cidr && + ip1->physdev == ip2->physdev && + ip1->iface == ip2->iface; +} + +static inline bool +hash_netiface4_data_isnull(const struct hash_netiface4_elem *elem) +{ + return elem->cidr == 0; +} + +static inline void +hash_netiface4_data_copy(struct hash_netiface4_elem *dst, + const struct hash_netiface4_elem *src) { + dst->ip = src->ip; + dst->cidr = src->cidr; + dst->physdev = src->physdev; + dst->iface = src->iface; +} + +static inline void +hash_netiface4_data_netmask(struct hash_netiface4_elem *elem, u8 cidr) +{ + elem->ip &= ip_set_netmask(cidr); + elem->cidr = cidr; +} + +static inline void +hash_netiface4_data_zero_out(struct hash_netiface4_elem *elem) +{ + elem->cidr = 0; +} + +static bool +hash_netiface4_data_list(struct sk_buff *skb, + const struct hash_netiface4_elem *data) +{ + u32 flags = data->physdev ? IPSET_FLAG_PHYSDEV : 0; + + NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, data->ip); + NLA_PUT_U8(skb, IPSET_ATTR_CIDR, data->cidr); + NLA_PUT_STRING(skb, IPSET_ATTR_IFACE, data->iface); + if (flags) + NLA_PUT_NET32(skb, IPSET_ATTR_CADT_FLAGS, flags); + return 0; + +nla_put_failure: + return 1; +} + +static bool +hash_netiface4_data_tlist(struct sk_buff *skb, + const struct hash_netiface4_elem *data) +{ + const struct hash_netiface4_telem *tdata = + (const struct hash_netiface4_telem *)data; + u32 flags = data->physdev ? IPSET_FLAG_PHYSDEV : 0; + + NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, data->ip); + NLA_PUT_U8(skb, IPSET_ATTR_CIDR, data->cidr); + NLA_PUT_STRING(skb, IPSET_ATTR_IFACE, data->iface); + if (flags) + NLA_PUT_NET32(skb, IPSET_ATTR_CADT_FLAGS, flags); + NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, + htonl(ip_set_timeout_get(tdata->timeout))); + + return 0; + +nla_put_failure: + return 1; +} + +#define IP_SET_HASH_WITH_NETS +#define IP_SET_HASH_WITH_RBTREE + +#define PF 4 +#define HOST_MASK 32 +#include <linux/netfilter/ipset/ip_set_ahash.h> + +static inline void +hash_netiface4_data_next(struct ip_set_hash *h, + const struct hash_netiface4_elem *d) +{ + h->next.ip = ntohl(d->ip); +} + +static int +hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb, + const struct xt_action_param *par, + enum ipset_adt adt, const struct ip_set_adt_opt *opt) +{ + struct ip_set_hash *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_netiface4_elem data = { + .cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK + }; + int ret; + + if (data.cidr == 0) + return -EINVAL; + if (adt == IPSET_TEST) + data.cidr = HOST_MASK; + + ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &data.ip); + data.ip &= ip_set_netmask(data.cidr); + +#define IFACE(dir) (par->dir ? par->dir->name : NULL) +#define PHYSDEV(dir) (nf_bridge->dir ? nf_bridge->dir->name : NULL) +#define SRCDIR (opt->flags & IPSET_DIM_TWO_SRC) + + if (opt->cmdflags & IPSET_FLAG_PHYSDEV) { +#ifdef CONFIG_BRIDGE_NETFILTER + const struct nf_bridge_info *nf_bridge = skb->nf_bridge; + + if (!nf_bridge) + return -EINVAL; + data.iface = SRCDIR ? PHYSDEV(physindev) : PHYSDEV(physoutdev); + data.physdev = 1; +#else + data.iface = NULL; +#endif + } else + data.iface = SRCDIR ? IFACE(in) : IFACE(out); + + if (!data.iface) + return -EINVAL; + ret = iface_test(&h->rbtree, &data.iface); + if (adt == IPSET_ADD) { + if (!ret) { + ret = iface_add(&h->rbtree, &data.iface); + if (ret) + return ret; + } + } else if (!ret) + return ret; + + return adtfn(set, &data, opt_timeout(opt, h), opt->cmdflags); +} + +static int +hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) +{ + struct ip_set_hash *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_netiface4_elem data = { .cidr = HOST_MASK }; + u32 ip = 0, ip_to, last; + u32 timeout = h->timeout; + char iface[IFNAMSIZ] = {}; + int ret; + + if (unlikely(!tb[IPSET_ATTR_IP] || + !tb[IPSET_ATTR_IFACE] || + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip); + if (ret) + return ret; + + if (tb[IPSET_ATTR_CIDR]) { + data.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); + if (!data.cidr) + return -IPSET_ERR_INVALID_CIDR; + } + + if (tb[IPSET_ATTR_TIMEOUT]) { + if (!with_timeout(h->timeout)) + return -IPSET_ERR_TIMEOUT; + timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); + } + + strcpy(iface, nla_data(tb[IPSET_ATTR_IFACE])); + data.iface = iface; + ret = iface_test(&h->rbtree, &data.iface); + if (adt == IPSET_ADD) { + if (!ret) { + ret = iface_add(&h->rbtree, &data.iface); + if (ret) + return ret; + } + } else if (!ret) + return ret; + + if (tb[IPSET_ATTR_CADT_FLAGS]) { + u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); + if (cadt_flags & IPSET_FLAG_PHYSDEV) + data.physdev = 1; + } + + if (adt == IPSET_TEST || !tb[IPSET_ATTR_IP_TO]) { + data.ip = htonl(ip & ip_set_hostmask(data.cidr)); + ret = adtfn(set, &data, timeout, flags); + return ip_set_eexist(ret, flags) ? 0 : ret; + } + + if (tb[IPSET_ATTR_IP_TO]) { + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to); + if (ret) + return ret; + if (ip_to < ip) + swap(ip, ip_to); + if (ip + UINT_MAX == ip_to) + return -IPSET_ERR_HASH_RANGE; + } else { + ip_set_mask_from_to(ip, ip_to, data.cidr); + } + + if (retried) + ip = h->next.ip; + while (!after(ip, ip_to)) { + data.ip = htonl(ip); + last = ip_set_range_to_cidr(ip, ip_to, &data.cidr); + ret = adtfn(set, &data, timeout, flags); + + if (ret && !ip_set_eexist(ret, flags)) + return ret; + else + ret = 0; + ip = last + 1; + } + return ret; +} + +static bool +hash_netiface_same_set(const struct ip_set *a, const struct ip_set *b) +{ + const struct ip_set_hash *x = a->data; + const struct ip_set_hash *y = b->data; + + /* Resizing changes htable_bits, so we ignore it */ + return x->maxelem == y->maxelem && + x->timeout == y->timeout; +} + +/* The type variant functions: IPv6 */ + +struct hash_netiface6_elem { + union nf_inet_addr ip; + const char *iface; + u8 physdev; + u8 cidr; + u16 padding; +}; + +struct hash_netiface6_telem { + union nf_inet_addr ip; + const char *iface; + u8 physdev; + u8 cidr; + u16 padding; + unsigned long timeout; +}; + +static inline bool +hash_netiface6_data_equal(const struct hash_netiface6_elem *ip1, + const struct hash_netiface6_elem *ip2) +{ + return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0 && + ip1->cidr == ip2->cidr && + ip1->physdev == ip2->physdev && + ip1->iface == ip2->iface; +} + +static inline bool +hash_netiface6_data_isnull(const struct hash_netiface6_elem *elem) +{ + return elem->cidr == 0; +} + +static inline void +hash_netiface6_data_copy(struct hash_netiface6_elem *dst, + const struct hash_netiface6_elem *src) +{ + memcpy(dst, src, sizeof(*dst)); +} + +static inline void +hash_netiface6_data_zero_out(struct hash_netiface6_elem *elem) +{ +} + +static inline void +ip6_netmask(union nf_inet_addr *ip, u8 prefix) +{ + ip->ip6[0] &= ip_set_netmask6(prefix)[0]; + ip->ip6[1] &= ip_set_netmask6(prefix)[1]; + ip->ip6[2] &= ip_set_netmask6(prefix)[2]; + ip->ip6[3] &= ip_set_netmask6(prefix)[3]; +} + +static inline void +hash_netiface6_data_netmask(struct hash_netiface6_elem *elem, u8 cidr) +{ + ip6_netmask(&elem->ip, cidr); + elem->cidr = cidr; +} + +static bool +hash_netiface6_data_list(struct sk_buff *skb, + const struct hash_netiface6_elem *data) +{ + u32 flags = data->physdev ? IPSET_FLAG_PHYSDEV : 0; + + NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP, &data->ip); + NLA_PUT_U8(skb, IPSET_ATTR_CIDR, data->cidr); + NLA_PUT_STRING(skb, IPSET_ATTR_IFACE, data->iface); + if (flags) + NLA_PUT_NET32(skb, IPSET_ATTR_CADT_FLAGS, flags); + return 0; + +nla_put_failure: + return 1; +} + +static bool +hash_netiface6_data_tlist(struct sk_buff *skb, + const struct hash_netiface6_elem *data) +{ + const struct hash_netiface6_telem *e = + (const struct hash_netiface6_telem *)data; + u32 flags = data->physdev ? IPSET_FLAG_PHYSDEV : 0; + + NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP, &e->ip); + NLA_PUT_U8(skb, IPSET_ATTR_CIDR, data->cidr); + NLA_PUT_STRING(skb, IPSET_ATTR_IFACE, data->iface); + if (flags) + NLA_PUT_NET32(skb, IPSET_ATTR_CADT_FLAGS, flags); + NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, + htonl(ip_set_timeout_get(e->timeout))); + return 0; + +nla_put_failure: + return 1; +} + +#undef PF +#undef HOST_MASK + +#define PF 6 +#define HOST_MASK 128 +#include <linux/netfilter/ipset/ip_set_ahash.h> + +static inline void +hash_netiface6_data_next(struct ip_set_hash *h, + const struct hash_netiface6_elem *d) +{ +} + +static int +hash_netiface6_kadt(struct ip_set *set, const struct sk_buff *skb, + const struct xt_action_param *par, + enum ipset_adt adt, const struct ip_set_adt_opt *opt) +{ + struct ip_set_hash *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_netiface6_elem data = { + .cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK + }; + int ret; + + if (data.cidr == 0) + return -EINVAL; + if (adt == IPSET_TEST) + data.cidr = HOST_MASK; + + ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &data.ip.in6); + ip6_netmask(&data.ip, data.cidr); + + if (opt->cmdflags & IPSET_FLAG_PHYSDEV) { +#ifdef CONFIG_BRIDGE_NETFILTER + const struct nf_bridge_info *nf_bridge = skb->nf_bridge; + + if (!nf_bridge) + return -EINVAL; + data.iface = SRCDIR ? PHYSDEV(physindev) : PHYSDEV(physoutdev); + data.physdev = 1; +#else + data.iface = NULL; +#endif + } else + data.iface = SRCDIR ? IFACE(in) : IFACE(out); + + if (!data.iface) + return -EINVAL; + ret = iface_test(&h->rbtree, &data.iface); + if (adt == IPSET_ADD) { + if (!ret) { + ret = iface_add(&h->rbtree, &data.iface); + if (ret) + return ret; + } + } else if (!ret) + return ret; + + return adtfn(set, &data, opt_timeout(opt, h), opt->cmdflags); +} + +static int +hash_netiface6_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) +{ + struct ip_set_hash *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_netiface6_elem data = { .cidr = HOST_MASK }; + u32 timeout = h->timeout; + char iface[IFNAMSIZ] = {}; + int ret; + + if (unlikely(!tb[IPSET_ATTR_IP] || + !tb[IPSET_ATTR_IFACE] || + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) + return -IPSET_ERR_PROTOCOL; + if (unlikely(tb[IPSET_ATTR_IP_TO])) + return -IPSET_ERR_HASH_RANGE_UNSUPPORTED; + + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + + ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &data.ip); + if (ret) + return ret; + + if (tb[IPSET_ATTR_CIDR]) + data.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); + if (!data.cidr) + return -IPSET_ERR_INVALID_CIDR; + ip6_netmask(&data.ip, data.cidr); + + if (tb[IPSET_ATTR_TIMEOUT]) { + if (!with_timeout(h->timeout)) + return -IPSET_ERR_TIMEOUT; + timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); + } + + strcpy(iface, nla_data(tb[IPSET_ATTR_IFACE])); + data.iface = iface; + ret = iface_test(&h->rbtree, &data.iface); + if (adt == IPSET_ADD) { + if (!ret) { + ret = iface_add(&h->rbtree, &data.iface); + if (ret) + return ret; + } + } else if (!ret) + return ret; + + if (tb[IPSET_ATTR_CADT_FLAGS]) { + u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); + if (cadt_flags & IPSET_FLAG_PHYSDEV) + data.physdev = 1; + } + + ret = adtfn(set, &data, timeout, flags); + + return ip_set_eexist(ret, flags) ? 0 : ret; +} + +/* Create hash:ip type of sets */ + +static int +hash_netiface_create(struct ip_set *set, struct nlattr *tb[], u32 flags) +{ + struct ip_set_hash *h; + u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM; + u8 hbits; + + if (!(set->family == AF_INET || set->family == AF_INET6)) + return -IPSET_ERR_INVALID_FAMILY; + + if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT))) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_HASHSIZE]) { + hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]); + if (hashsize < IPSET_MIMINAL_HASHSIZE) + hashsize = IPSET_MIMINAL_HASHSIZE; + } + + if (tb[IPSET_ATTR_MAXELEM]) + maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]); + + h = kzalloc(sizeof(*h) + + sizeof(struct ip_set_hash_nets) + * (set->family == AF_INET ? 32 : 128), GFP_KERNEL); + if (!h) + return -ENOMEM; + + h->maxelem = maxelem; + get_random_bytes(&h->initval, sizeof(h->initval)); + h->timeout = IPSET_NO_TIMEOUT; + + hbits = htable_bits(hashsize); + h->table = ip_set_alloc( + sizeof(struct htable) + + jhash_size(hbits) * sizeof(struct hbucket)); + if (!h->table) { + kfree(h); + return -ENOMEM; + } + h->table->htable_bits = hbits; + h->rbtree = RB_ROOT; + + set->data = h; + + if (tb[IPSET_ATTR_TIMEOUT]) { + h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); + + set->variant = set->family == AF_INET + ? &hash_netiface4_tvariant : &hash_netiface6_tvariant; + + if (set->family == AF_INET) + hash_netiface4_gc_init(set); + else + hash_netiface6_gc_init(set); + } else { + set->variant = set->family == AF_INET + ? &hash_netiface4_variant : &hash_netiface6_variant; + } + + pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n", + set->name, jhash_size(h->table->htable_bits), + h->table->htable_bits, h->maxelem, set->data, h->table); + + return 0; +} + +static struct ip_set_type hash_netiface_type __read_mostly = { + .name = "hash:net,iface", + .protocol = IPSET_PROTOCOL, + .features = IPSET_TYPE_IP | IPSET_TYPE_IFACE, + .dimension = IPSET_DIM_TWO, + .family = AF_UNSPEC, + .revision_min = 0, + .create = hash_netiface_create, + .create_policy = { + [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, + [IPSET_ATTR_MAXELEM] = { .type = NLA_U32 }, + [IPSET_ATTR_PROBES] = { .type = NLA_U8 }, + [IPSET_ATTR_RESIZE] = { .type = NLA_U8 }, + [IPSET_ATTR_PROTO] = { .type = NLA_U8 }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, + }, + .adt_policy = { + [IPSET_ATTR_IP] = { .type = NLA_NESTED }, + [IPSET_ATTR_IP_TO] = { .type = NLA_NESTED }, + [IPSET_ATTR_IFACE] = { .type = NLA_NUL_STRING, + .len = IPSET_MAXNAMELEN - 1 }, + [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, + [IPSET_ATTR_CIDR] = { .type = NLA_U8 }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, + [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, + }, + .me = THIS_MODULE, +}; + +static int __init +hash_netiface_init(void) +{ + return ip_set_type_register(&hash_netiface_type); +} + +static void __exit +hash_netiface_fini(void) +{ + ip_set_type_unregister(&hash_netiface_type); +} + +module_init(hash_netiface_init); +module_exit(hash_netiface_fini); diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c index d2a4036..fe203d1 100644 --- a/net/netfilter/ipset/ip_set_hash_netport.c +++ b/net/netfilter/ipset/ip_set_hash_netport.c @@ -137,38 +137,48 @@ nla_put_failure: #define HOST_MASK 32 #include <linux/netfilter/ipset/ip_set_ahash.h> +static inline void +hash_netport4_data_next(struct ip_set_hash *h, + const struct hash_netport4_elem *d) +{ + h->next.ip = ntohl(d->ip); + h->next.port = ntohs(d->port); +} + static int hash_netport4_kadt(struct ip_set *set, const struct sk_buff *skb, - enum ipset_adt adt, u8 pf, u8 dim, u8 flags) + const struct xt_action_param *par, + enum ipset_adt adt, const struct ip_set_adt_opt *opt) { const struct ip_set_hash *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netport4_elem data = { - .cidr = h->nets[0].cidr || HOST_MASK }; + .cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK + }; if (data.cidr == 0) return -EINVAL; if (adt == IPSET_TEST) data.cidr = HOST_MASK; - if (!ip_set_get_ip4_port(skb, flags & IPSET_DIM_TWO_SRC, + if (!ip_set_get_ip4_port(skb, opt->flags & IPSET_DIM_TWO_SRC, &data.port, &data.proto)) return -EINVAL; - ip4addrptr(skb, flags & IPSET_DIM_ONE_SRC, &data.ip); + ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &data.ip); data.ip &= ip_set_netmask(data.cidr); - return adtfn(set, &data, h->timeout); + return adtfn(set, &data, opt_timeout(opt, h), opt->cmdflags); } static int hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], - enum ipset_adt adt, u32 *lineno, u32 flags) + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { const struct ip_set_hash *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netport4_elem data = { .cidr = HOST_MASK }; - u32 port, port_to; + u32 port, port_to, p = 0, ip = 0, ip_to, last; u32 timeout = h->timeout; bool with_ports = false; int ret; @@ -182,15 +192,15 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_LINENO]) *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); - ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &data.ip); + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip); if (ret) return ret; - if (tb[IPSET_ATTR_CIDR]) + if (tb[IPSET_ATTR_CIDR]) { data.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); - if (!data.cidr) - return -IPSET_ERR_INVALID_CIDR; - data.ip &= ip_set_netmask(data.cidr); + if (!data.cidr) + return -IPSET_ERR_INVALID_CIDR; + } if (tb[IPSET_ATTR_PORT]) data.port = nla_get_be16(tb[IPSET_ATTR_PORT]); @@ -215,24 +225,47 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); } - if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) { - ret = adtfn(set, &data, timeout); + with_ports = with_ports && tb[IPSET_ATTR_PORT_TO]; + if (adt == IPSET_TEST || !(with_ports || tb[IPSET_ATTR_IP_TO])) { + data.ip = htonl(ip & ip_set_hostmask(data.cidr)); + ret = adtfn(set, &data, timeout, flags); return ip_set_eexist(ret, flags) ? 0 : ret; } - port = ntohs(data.port); - port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]); - if (port > port_to) - swap(port, port_to); - - for (; port <= port_to; port++) { - data.port = htons(port); - ret = adtfn(set, &data, timeout); - - if (ret && !ip_set_eexist(ret, flags)) + port = port_to = ntohs(data.port); + if (tb[IPSET_ATTR_PORT_TO]) { + port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]); + if (port_to < port) + swap(port, port_to); + } + if (tb[IPSET_ATTR_IP_TO]) { + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to); + if (ret) return ret; - else - ret = 0; + if (ip_to < ip) + swap(ip, ip_to); + if (ip + UINT_MAX == ip_to) + return -IPSET_ERR_HASH_RANGE; + } else { + ip_set_mask_from_to(ip, ip_to, data.cidr); + } + + if (retried) + ip = h->next.ip; + while (!after(ip, ip_to)) { + data.ip = htonl(ip); + last = ip_set_range_to_cidr(ip, ip_to, &data.cidr); + p = retried && ip == h->next.ip ? h->next.port : port; + for (; p <= port_to; p++) { + data.port = htons(p); + ret = adtfn(set, &data, timeout, flags); + + if (ret && !ip_set_eexist(ret, flags)) + return ret; + else + ret = 0; + } + ip = last + 1; } return ret; } @@ -350,33 +383,42 @@ nla_put_failure: #define HOST_MASK 128 #include <linux/netfilter/ipset/ip_set_ahash.h> +static inline void +hash_netport6_data_next(struct ip_set_hash *h, + const struct hash_netport6_elem *d) +{ + h->next.port = ntohs(d->port); +} + static int hash_netport6_kadt(struct ip_set *set, const struct sk_buff *skb, - enum ipset_adt adt, u8 pf, u8 dim, u8 flags) + const struct xt_action_param *par, + enum ipset_adt adt, const struct ip_set_adt_opt *opt) { const struct ip_set_hash *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netport6_elem data = { - .cidr = h->nets[0].cidr || HOST_MASK }; + .cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK + }; if (data.cidr == 0) return -EINVAL; if (adt == IPSET_TEST) data.cidr = HOST_MASK; - if (!ip_set_get_ip6_port(skb, flags & IPSET_DIM_TWO_SRC, + if (!ip_set_get_ip6_port(skb, opt->flags & IPSET_DIM_TWO_SRC, &data.port, &data.proto)) return -EINVAL; - ip6addrptr(skb, flags & IPSET_DIM_ONE_SRC, &data.ip.in6); + ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &data.ip.in6); ip6_netmask(&data.ip, data.cidr); - return adtfn(set, &data, h->timeout); + return adtfn(set, &data, opt_timeout(opt, h), opt->cmdflags); } static int hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[], - enum ipset_adt adt, u32 *lineno, u32 flags) + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { const struct ip_set_hash *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; @@ -391,6 +433,8 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[], !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT))) return -IPSET_ERR_PROTOCOL; + if (unlikely(tb[IPSET_ATTR_IP_TO])) + return -IPSET_ERR_HASH_RANGE_UNSUPPORTED; if (tb[IPSET_ATTR_LINENO]) *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); @@ -429,7 +473,7 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[], } if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) { - ret = adtfn(set, &data, timeout); + ret = adtfn(set, &data, timeout, flags); return ip_set_eexist(ret, flags) ? 0 : ret; } @@ -438,9 +482,11 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[], if (port > port_to) swap(port, port_to); + if (retried) + port = h->next.port; for (; port <= port_to; port++) { data.port = htons(port); - ret = adtfn(set, &data, timeout); + ret = adtfn(set, &data, timeout, flags); if (ret && !ip_set_eexist(ret, flags)) return ret; @@ -526,7 +572,9 @@ static struct ip_set_type hash_netport_type __read_mostly = { .features = IPSET_TYPE_IP | IPSET_TYPE_PORT, .dimension = IPSET_DIM_TWO, .family = AF_UNSPEC, - .revision = 1, + .revision_min = 0, + /* 1 SCTP and UDPLITE support added */ + .revision_max = 2, /* Range as input support for IPv4 added */ .create = hash_netport_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, @@ -538,6 +586,7 @@ static struct ip_set_type hash_netport_type __read_mostly = { }, .adt_policy = { [IPSET_ATTR_IP] = { .type = NLA_NESTED }, + [IPSET_ATTR_IP_TO] = { .type = NLA_NESTED }, [IPSET_ATTR_PORT] = { .type = NLA_U16 }, [IPSET_ATTR_PORT_TO] = { .type = NLA_U16 }, [IPSET_ATTR_PROTO] = { .type = NLA_U8 }, diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index e9159e9..4d10819 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -72,7 +72,8 @@ list_set_expired(const struct list_set *map, u32 id) static int list_set_kadt(struct ip_set *set, const struct sk_buff *skb, - enum ipset_adt adt, u8 pf, u8 dim, u8 flags) + const struct xt_action_param *par, + enum ipset_adt adt, const struct ip_set_adt_opt *opt) { struct list_set *map = set->data; struct set_elem *elem; @@ -87,17 +88,17 @@ list_set_kadt(struct ip_set *set, const struct sk_buff *skb, continue; switch (adt) { case IPSET_TEST: - ret = ip_set_test(elem->id, skb, pf, dim, flags); + ret = ip_set_test(elem->id, skb, par, opt); if (ret > 0) return ret; break; case IPSET_ADD: - ret = ip_set_add(elem->id, skb, pf, dim, flags); + ret = ip_set_add(elem->id, skb, par, opt); if (ret == 0) return ret; break; case IPSET_DEL: - ret = ip_set_del(elem->id, skb, pf, dim, flags); + ret = ip_set_del(elem->id, skb, par, opt); if (ret == 0) return ret; break; @@ -109,15 +110,28 @@ list_set_kadt(struct ip_set *set, const struct sk_buff *skb, } static bool -next_id_eq(const struct list_set *map, u32 i, ip_set_id_t id) +id_eq(const struct list_set *map, u32 i, ip_set_id_t id) { const struct set_elem *elem; - if (i + 1 < map->size) { - elem = list_set_elem(map, i + 1); + if (i < map->size) { + elem = list_set_elem(map, i); + return elem->id == id; + } + + return 0; +} + +static bool +id_eq_timeout(const struct list_set *map, u32 i, ip_set_id_t id) +{ + const struct set_elem *elem; + + if (i < map->size) { + elem = list_set_elem(map, i); return !!(elem->id == id && !(with_timeout(map->timeout) && - list_set_expired(map, i + 1))); + list_set_expired(map, i))); } return 0; @@ -190,12 +204,26 @@ list_set_del(struct list_set *map, u32 i) return 0; } +static void +cleanup_entries(struct list_set *map) +{ + struct set_telem *e; + u32 i; + + for (i = 0; i < map->size; i++) { + e = list_set_telem(map, i); + if (e->id != IPSET_INVALID_ID && list_set_expired(map, i)) + list_set_del(map, i); + } +} + static int list_set_uadt(struct ip_set *set, struct nlattr *tb[], - enum ipset_adt adt, u32 *lineno, u32 flags) + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { struct list_set *map = set->data; bool with_timeout = with_timeout(map->timeout); + bool flag_exist = flags & IPSET_FLAG_EXIST; int before = 0; u32 timeout = map->timeout; ip_set_id_t id, refid = IPSET_INVALID_ID; @@ -248,6 +276,8 @@ list_set_uadt(struct ip_set *set, struct nlattr *tb[], } timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); } + if (with_timeout && adt != IPSET_TEST) + cleanup_entries(map); switch (adt) { case IPSET_TEST: @@ -259,22 +289,37 @@ list_set_uadt(struct ip_set *set, struct nlattr *tb[], else if (with_timeout && list_set_expired(map, i)) continue; else if (before > 0 && elem->id == id) - ret = next_id_eq(map, i, refid); + ret = id_eq_timeout(map, i + 1, refid); else if (before < 0 && elem->id == refid) - ret = next_id_eq(map, i, id); + ret = id_eq_timeout(map, i + 1, id); else if (before == 0 && elem->id == id) ret = 1; } break; case IPSET_ADD: - for (i = 0; i < map->size && !ret; i++) { + for (i = 0; i < map->size; i++) { elem = list_set_elem(map, i); - if (elem->id == id && - !(with_timeout && list_set_expired(map, i))) + if (elem->id != id) + continue; + if (!(with_timeout && flag_exist)) { ret = -IPSET_ERR_EXIST; + goto finish; + } else { + struct set_telem *e = list_set_telem(map, i); + + if ((before > 1 && + !id_eq(map, i + 1, refid)) || + (before < 0 && + (i == 0 || !id_eq(map, i - 1, refid)))) { + ret = -IPSET_ERR_EXIST; + goto finish; + } + e->timeout = ip_set_timeout_set(timeout); + ip_set_put_byindex(id); + ret = 0; + goto finish; + } } - if (ret == -IPSET_ERR_EXIST) - break; ret = -IPSET_ERR_LIST_FULL; for (i = 0; i < map->size && ret == -IPSET_ERR_LIST_FULL; i++) { elem = list_set_elem(map, i); @@ -283,9 +328,7 @@ list_set_uadt(struct ip_set *set, struct nlattr *tb[], : list_set_add(map, i, id, timeout); else if (elem->id != refid) continue; - else if (with_timeout && list_set_expired(map, i)) - ret = -IPSET_ERR_REF_EXIST; - else if (before) + else if (before > 0) ret = list_set_add(map, i, id, timeout); else if (i + 1 < map->size) ret = list_set_add(map, i + 1, id, timeout); @@ -299,16 +342,12 @@ list_set_uadt(struct ip_set *set, struct nlattr *tb[], ret = before != 0 ? -IPSET_ERR_REF_EXIST : -IPSET_ERR_EXIST; break; - } else if (with_timeout && list_set_expired(map, i)) - continue; - else if (elem->id == id && - (before == 0 || - (before > 0 && - next_id_eq(map, i, refid)))) + } else if (elem->id == id && + (before == 0 || + (before > 0 && id_eq(map, i + 1, refid)))) ret = list_set_del(map, i); - else if (before < 0 && - elem->id == refid && - next_id_eq(map, i, id)) + else if (elem->id == refid && + before < 0 && id_eq(map, i + 1, id)) ret = list_set_del(map, i + 1); } break; @@ -454,15 +493,9 @@ list_set_gc(unsigned long ul_set) { struct ip_set *set = (struct ip_set *) ul_set; struct list_set *map = set->data; - struct set_telem *e; - u32 i; write_lock_bh(&set->lock); - for (i = 0; i < map->size; i++) { - e = list_set_telem(map, i); - if (e->id != IPSET_INVALID_ID && list_set_expired(map, i)) - list_set_del(map, i); - } + cleanup_entries(map); write_unlock_bh(&set->lock); map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ; @@ -543,7 +576,8 @@ static struct ip_set_type list_set_type __read_mostly = { .features = IPSET_TYPE_NAME | IPSET_DUMP_LAST, .dimension = IPSET_DIM_ONE, .family = AF_UNSPEC, - .revision = 0, + .revision_min = 0, + .revision_max = 0, .create = list_set_create, .create_policy = { [IPSET_ATTR_SIZE] = { .type = NLA_U32 }, diff --git a/net/netfilter/ipset/pfxlen.c b/net/netfilter/ipset/pfxlen.c index 23f8c81..bd13d66 100644 --- a/net/netfilter/ipset/pfxlen.c +++ b/net/netfilter/ipset/pfxlen.c @@ -148,7 +148,7 @@ const union nf_inet_addr ip_set_netmask_map[] = { EXPORT_SYMBOL_GPL(ip_set_netmask_map); #undef E -#define E(a, b, c, d) \ +#define E(a, b, c, d) \ {.ip6 = { (__force __be32) a, (__force __be32) b, \ (__force __be32) c, (__force __be32) d, \ } } @@ -289,3 +289,24 @@ const union nf_inet_addr ip_set_hostmask_map[] = { E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF), }; EXPORT_SYMBOL_GPL(ip_set_hostmask_map); + +/* Find the largest network which matches the range from left, in host order. */ +u32 +ip_set_range_to_cidr(u32 from, u32 to, u8 *cidr) +{ + u32 last; + u8 i; + + for (i = 1; i < 32; i++) { + if ((from & ip_set_hostmask(i)) != from) + continue; + last = from | ~ip_set_hostmask(i); + if (!after(last, to)) { + *cidr = i; + return last; + } + } + *cidr = 32; + return from; +} +EXPORT_SYMBOL_GPL(ip_set_range_to_cidr); diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c index 059af31..fe6cb43 100644 --- a/net/netfilter/ipvs/ip_vs_app.c +++ b/net/netfilter/ipvs/ip_vs_app.c @@ -576,7 +576,7 @@ static const struct file_operations ip_vs_app_fops = { }; #endif -int __net_init __ip_vs_app_init(struct net *net) +int __net_init ip_vs_app_net_init(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); @@ -585,17 +585,7 @@ int __net_init __ip_vs_app_init(struct net *net) return 0; } -void __net_exit __ip_vs_app_cleanup(struct net *net) +void __net_exit ip_vs_app_net_cleanup(struct net *net) { proc_net_remove(net, "ip_vs_app"); } - -int __init ip_vs_app_init(void) -{ - return 0; -} - - -void ip_vs_app_cleanup(void) -{ -} diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index bf28ac2..12571fb 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -776,8 +776,16 @@ static void ip_vs_conn_expire(unsigned long data) if (cp->control) ip_vs_control_del(cp); - if (cp->flags & IP_VS_CONN_F_NFCT) + if (cp->flags & IP_VS_CONN_F_NFCT) { ip_vs_conn_drop_conntrack(cp); + /* Do not access conntracks during subsys cleanup + * because nf_conntrack_find_get can not be used after + * conntrack cleanup for the net. + */ + smp_rmb(); + if (ipvs->enable) + ip_vs_conn_drop_conntrack(cp); + } ip_vs_pe_put(cp->pe); kfree(cp->pe_data); @@ -1247,7 +1255,7 @@ flush_again: /* * per netns init and exit */ -int __net_init __ip_vs_conn_init(struct net *net) +int __net_init ip_vs_conn_net_init(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); @@ -1258,7 +1266,7 @@ int __net_init __ip_vs_conn_init(struct net *net) return 0; } -void __net_exit __ip_vs_conn_cleanup(struct net *net) +void __net_exit ip_vs_conn_net_cleanup(struct net *net) { /* flush all the connection entries first */ ip_vs_conn_flush(net); diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index bfa808f..4f77bb1 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -852,7 +852,7 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related, *related = 1; /* reassemble IP fragments */ - if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { + if (ip_is_fragment(ip_hdr(skb))) { if (ip_vs_gather_frags(skb, ip_vs_defrag_user(hooknum))) return NF_STOLEN; } @@ -1156,8 +1156,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); } else #endif - if (unlikely(ip_hdr(skb)->frag_off & htons(IP_MF|IP_OFFSET) && - !pp->dont_defrag)) { + if (unlikely(ip_is_fragment(ip_hdr(skb)) && !pp->dont_defrag)) { if (ip_vs_gather_frags(skb, ip_vs_defrag_user(hooknum))) return NF_STOLEN; @@ -1310,7 +1309,7 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) *related = 1; /* reassemble IP fragments */ - if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { + if (ip_is_fragment(ip_hdr(skb))) { if (ip_vs_gather_frags(skb, ip_vs_defrag_user(hooknum))) return NF_STOLEN; } @@ -1384,7 +1383,7 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) offset += 2 * sizeof(__u16); verdict = ip_vs_icmp_xmit(skb, cp, pp, offset, hooknum); - out: +out: __ip_vs_conn_put(cp); return verdict; @@ -1772,7 +1771,7 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { .owner = THIS_MODULE, .pf = PF_INET, .hooknum = NF_INET_LOCAL_IN, - .priority = 99, + .priority = NF_IP_PRI_NAT_SRC - 2, }, /* After packet filtering, forward packet through VS/DR, VS/TUN, * or VS/NAT(change destination), so that filtering rules can be @@ -1782,7 +1781,7 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { .owner = THIS_MODULE, .pf = PF_INET, .hooknum = NF_INET_LOCAL_IN, - .priority = 101, + .priority = NF_IP_PRI_NAT_SRC - 1, }, /* Before ip_vs_in, change source only for VS/NAT */ { @@ -1790,7 +1789,7 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { .owner = THIS_MODULE, .pf = PF_INET, .hooknum = NF_INET_LOCAL_OUT, - .priority = -99, + .priority = NF_IP_PRI_NAT_DST + 1, }, /* After mangle, schedule and forward local requests */ { @@ -1798,7 +1797,7 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { .owner = THIS_MODULE, .pf = PF_INET, .hooknum = NF_INET_LOCAL_OUT, - .priority = -98, + .priority = NF_IP_PRI_NAT_DST + 2, }, /* After packet filtering (but before ip_vs_out_icmp), catch icmp * destined for 0.0.0.0/0, which is for incoming IPVS connections */ @@ -1824,7 +1823,7 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { .owner = THIS_MODULE, .pf = PF_INET6, .hooknum = NF_INET_LOCAL_IN, - .priority = 99, + .priority = NF_IP6_PRI_NAT_SRC - 2, }, /* After packet filtering, forward packet through VS/DR, VS/TUN, * or VS/NAT(change destination), so that filtering rules can be @@ -1834,7 +1833,7 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { .owner = THIS_MODULE, .pf = PF_INET6, .hooknum = NF_INET_LOCAL_IN, - .priority = 101, + .priority = NF_IP6_PRI_NAT_SRC - 1, }, /* Before ip_vs_in, change source only for VS/NAT */ { @@ -1842,7 +1841,7 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { .owner = THIS_MODULE, .pf = PF_INET, .hooknum = NF_INET_LOCAL_OUT, - .priority = -99, + .priority = NF_IP6_PRI_NAT_DST + 1, }, /* After mangle, schedule and forward local requests */ { @@ -1850,7 +1849,7 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { .owner = THIS_MODULE, .pf = PF_INET6, .hooknum = NF_INET_LOCAL_OUT, - .priority = -98, + .priority = NF_IP6_PRI_NAT_DST + 2, }, /* After packet filtering (but before ip_vs_out_icmp), catch icmp * destined for 0.0.0.0/0, which is for incoming IPVS connections */ @@ -1891,22 +1890,22 @@ static int __net_init __ip_vs_init(struct net *net) atomic_inc(&ipvs_netns_cnt); net->ipvs = ipvs; - if (__ip_vs_estimator_init(net) < 0) + if (ip_vs_estimator_net_init(net) < 0) goto estimator_fail; - if (__ip_vs_control_init(net) < 0) + if (ip_vs_control_net_init(net) < 0) goto control_fail; - if (__ip_vs_protocol_init(net) < 0) + if (ip_vs_protocol_net_init(net) < 0) goto protocol_fail; - if (__ip_vs_app_init(net) < 0) + if (ip_vs_app_net_init(net) < 0) goto app_fail; - if (__ip_vs_conn_init(net) < 0) + if (ip_vs_conn_net_init(net) < 0) goto conn_fail; - if (__ip_vs_sync_init(net) < 0) + if (ip_vs_sync_net_init(net) < 0) goto sync_fail; printk(KERN_INFO "IPVS: Creating netns size=%zu id=%d\n", @@ -1917,27 +1916,27 @@ static int __net_init __ip_vs_init(struct net *net) */ sync_fail: - __ip_vs_conn_cleanup(net); + ip_vs_conn_net_cleanup(net); conn_fail: - __ip_vs_app_cleanup(net); + ip_vs_app_net_cleanup(net); app_fail: - __ip_vs_protocol_cleanup(net); + ip_vs_protocol_net_cleanup(net); protocol_fail: - __ip_vs_control_cleanup(net); + ip_vs_control_net_cleanup(net); control_fail: - __ip_vs_estimator_cleanup(net); + ip_vs_estimator_net_cleanup(net); estimator_fail: return -ENOMEM; } static void __net_exit __ip_vs_cleanup(struct net *net) { - __ip_vs_service_cleanup(net); /* ip_vs_flush() with locks */ - __ip_vs_conn_cleanup(net); - __ip_vs_app_cleanup(net); - __ip_vs_protocol_cleanup(net); - __ip_vs_control_cleanup(net); - __ip_vs_estimator_cleanup(net); + ip_vs_service_net_cleanup(net); /* ip_vs_flush() with locks */ + ip_vs_conn_net_cleanup(net); + ip_vs_app_net_cleanup(net); + ip_vs_protocol_net_cleanup(net); + ip_vs_control_net_cleanup(net); + ip_vs_estimator_net_cleanup(net); IP_VS_DBG(2, "ipvs netns %d released\n", net_ipvs(net)->gen); } @@ -1945,7 +1944,8 @@ static void __net_exit __ip_vs_dev_cleanup(struct net *net) { EnterFunction(2); net_ipvs(net)->enable = 0; /* Disable packet reception */ - __ip_vs_sync_cleanup(net); + smp_wmb(); + ip_vs_sync_net_cleanup(net); LeaveFunction(2); } @@ -1967,36 +1967,23 @@ static int __init ip_vs_init(void) { int ret; - ip_vs_estimator_init(); ret = ip_vs_control_init(); if (ret < 0) { pr_err("can't setup control.\n"); - goto cleanup_estimator; + goto exit; } ip_vs_protocol_init(); - ret = ip_vs_app_init(); - if (ret < 0) { - pr_err("can't setup application helper.\n"); - goto cleanup_protocol; - } - ret = ip_vs_conn_init(); if (ret < 0) { pr_err("can't setup connection table.\n"); - goto cleanup_app; - } - - ret = ip_vs_sync_init(); - if (ret < 0) { - pr_err("can't setup sync data.\n"); - goto cleanup_conn; + goto cleanup_protocol; } ret = register_pernet_subsys(&ipvs_core_ops); /* Alloc ip_vs struct */ if (ret < 0) - goto cleanup_sync; + goto cleanup_conn; ret = register_pernet_device(&ipvs_core_dev_ops); if (ret < 0) @@ -2016,17 +2003,12 @@ cleanup_dev: unregister_pernet_device(&ipvs_core_dev_ops); cleanup_sub: unregister_pernet_subsys(&ipvs_core_ops); -cleanup_sync: - ip_vs_sync_cleanup(); - cleanup_conn: +cleanup_conn: ip_vs_conn_cleanup(); - cleanup_app: - ip_vs_app_cleanup(); - cleanup_protocol: +cleanup_protocol: ip_vs_protocol_cleanup(); ip_vs_control_cleanup(); - cleanup_estimator: - ip_vs_estimator_cleanup(); +exit: return ret; } @@ -2035,12 +2017,9 @@ static void __exit ip_vs_cleanup(void) nf_unregister_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops)); unregister_pernet_device(&ipvs_core_dev_ops); unregister_pernet_subsys(&ipvs_core_ops); /* free ip_vs struct */ - ip_vs_sync_cleanup(); ip_vs_conn_cleanup(); - ip_vs_app_cleanup(); ip_vs_protocol_cleanup(); ip_vs_control_cleanup(); - ip_vs_estimator_cleanup(); pr_info("ipvs unloaded.\n"); } diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 699c79a..be43fd8 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1334,9 +1334,9 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u) ip_vs_bind_pe(svc, pe); } - out_unlock: +out_unlock: write_unlock_bh(&__ip_vs_svc_lock); - out: +out: ip_vs_scheduler_put(old_sched); ip_vs_pe_put(old_pe); return ret; @@ -1483,7 +1483,7 @@ static int ip_vs_flush(struct net *net) * Delete service by {netns} in the service table. * Called by __ip_vs_cleanup() */ -void __ip_vs_service_cleanup(struct net *net) +void ip_vs_service_net_cleanup(struct net *net) { EnterFunction(2); /* Check for "full" addressed entries */ @@ -1662,7 +1662,7 @@ proc_do_sync_mode(ctl_table *table, int write, /* * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/) * Do not change order or insert new entries without - * align with netns init in __ip_vs_control_init() + * align with netns init in ip_vs_control_net_init() */ static struct ctl_table vs_vars[] = { @@ -2469,7 +2469,7 @@ __ip_vs_get_service_entries(struct net *net, count++; } } - out: +out: return ret; } @@ -2707,7 +2707,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) ret = -EINVAL; } - out: +out: mutex_unlock(&__ip_vs_mutex); return ret; } @@ -3595,7 +3595,7 @@ static void ip_vs_genl_unregister(void) * per netns intit/exit func. */ #ifdef CONFIG_SYSCTL -int __net_init __ip_vs_control_init_sysctl(struct net *net) +int __net_init ip_vs_control_net_init_sysctl(struct net *net) { int idx; struct netns_ipvs *ipvs = net_ipvs(net); @@ -3654,7 +3654,7 @@ int __net_init __ip_vs_control_init_sysctl(struct net *net) return 0; } -void __net_init __ip_vs_control_cleanup_sysctl(struct net *net) +void __net_init ip_vs_control_net_cleanup_sysctl(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); @@ -3665,8 +3665,8 @@ void __net_init __ip_vs_control_cleanup_sysctl(struct net *net) #else -int __net_init __ip_vs_control_init_sysctl(struct net *net) { return 0; } -void __net_init __ip_vs_control_cleanup_sysctl(struct net *net) { } +int __net_init ip_vs_control_net_init_sysctl(struct net *net) { return 0; } +void __net_init ip_vs_control_net_cleanup_sysctl(struct net *net) { } #endif @@ -3674,7 +3674,7 @@ static struct notifier_block ip_vs_dst_notifier = { .notifier_call = ip_vs_dst_event, }; -int __net_init __ip_vs_control_init(struct net *net) +int __net_init ip_vs_control_net_init(struct net *net) { int idx; struct netns_ipvs *ipvs = net_ipvs(net); @@ -3702,7 +3702,7 @@ int __net_init __ip_vs_control_init(struct net *net) proc_net_fops_create(net, "ip_vs_stats_percpu", 0, &ip_vs_stats_percpu_fops); - if (__ip_vs_control_init_sysctl(net)) + if (ip_vs_control_net_init_sysctl(net)) goto err; return 0; @@ -3712,13 +3712,13 @@ err: return -ENOMEM; } -void __net_exit __ip_vs_control_cleanup(struct net *net) +void __net_exit ip_vs_control_net_cleanup(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); ip_vs_trash_cleanup(net); ip_vs_stop_estimator(net, &ipvs->tot_stats); - __ip_vs_control_cleanup_sysctl(net); + ip_vs_control_net_cleanup_sysctl(net); proc_net_remove(net, "ip_vs_stats_percpu"); proc_net_remove(net, "ip_vs_stats"); proc_net_remove(net, "ip_vs"); diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c index 508cce9..0fac601 100644 --- a/net/netfilter/ipvs/ip_vs_est.c +++ b/net/netfilter/ipvs/ip_vs_est.c @@ -192,7 +192,7 @@ void ip_vs_read_estimator(struct ip_vs_stats_user *dst, dst->outbps = (e->outbps + 0xF) >> 5; } -int __net_init __ip_vs_estimator_init(struct net *net) +int __net_init ip_vs_estimator_net_init(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); @@ -203,16 +203,7 @@ int __net_init __ip_vs_estimator_init(struct net *net) return 0; } -void __net_exit __ip_vs_estimator_cleanup(struct net *net) +void __net_exit ip_vs_estimator_net_cleanup(struct net *net) { del_timer_sync(&net_ipvs(net)->est_timer); } - -int __init ip_vs_estimator_init(void) -{ - return 0; -} - -void ip_vs_estimator_cleanup(void) -{ -} diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c index af63553..4490a32 100644 --- a/net/netfilter/ipvs/ip_vs_ftp.c +++ b/net/netfilter/ipvs/ip_vs_ftp.c @@ -44,8 +44,8 @@ #include <net/ip_vs.h> -#define SERVER_STRING "227 Entering Passive Mode (" -#define CLIENT_STRING "PORT " +#define SERVER_STRING "227 " +#define CLIENT_STRING "PORT" /* @@ -79,14 +79,17 @@ ip_vs_ftp_done_conn(struct ip_vs_app *app, struct ip_vs_conn *cp) /* * Get <addr,port> from the string "xxx.xxx.xxx.xxx,ppp,ppp", started - * with the "pattern" and terminated with the "term" character. + * with the "pattern", ignoring before "skip" and terminated with + * the "term" character. * <addr,port> is in network order. */ static int ip_vs_ftp_get_addrport(char *data, char *data_limit, - const char *pattern, size_t plen, char term, + const char *pattern, size_t plen, + char skip, char term, __be32 *addr, __be16 *port, char **start, char **end) { + char *s, c; unsigned char p[6]; int i = 0; @@ -101,19 +104,38 @@ static int ip_vs_ftp_get_addrport(char *data, char *data_limit, if (strnicmp(data, pattern, plen) != 0) { return 0; } - *start = data + plen; + s = data + plen; + if (skip) { + int found = 0; + + for (;; s++) { + if (s == data_limit) + return -1; + if (!found) { + if (*s == skip) + found = 1; + } else if (*s != skip) { + break; + } + } + } - for (data = *start; *data != term; data++) { + for (data = s; ; data++) { if (data == data_limit) return -1; + if (*data == term) + break; } *end = data; memset(p, 0, sizeof(p)); - for (data = *start; data != *end; data++) { - if (*data >= '0' && *data <= '9') { - p[i] = p[i]*10 + *data - '0'; - } else if (*data == ',' && i < 5) { + for (data = s; ; data++) { + c = *data; + if (c == term) + break; + if (c >= '0' && c <= '9') { + p[i] = p[i]*10 + c - '0'; + } else if (c == ',' && i < 5) { i++; } else { /* unexpected character */ @@ -124,8 +146,9 @@ static int ip_vs_ftp_get_addrport(char *data, char *data_limit, if (i != 5) return -1; - *addr = get_unaligned((__be32 *)p); - *port = get_unaligned((__be16 *)(p + 4)); + *start = s; + *addr = get_unaligned((__be32 *) p); + *port = get_unaligned((__be16 *) (p + 4)); return 1; } @@ -185,7 +208,8 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, if (ip_vs_ftp_get_addrport(data, data_limit, SERVER_STRING, - sizeof(SERVER_STRING)-1, ')', + sizeof(SERVER_STRING)-1, + '(', ')', &from.ip, &port, &start, &end) != 1) return 1; @@ -345,7 +369,7 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, */ if (ip_vs_ftp_get_addrport(data_start, data_limit, CLIENT_STRING, sizeof(CLIENT_STRING)-1, - '\r', &to.ip, &port, + ' ', '\r', &to.ip, &port, &start, &end) != 1) return 1; diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c index eb86028..52d073c 100644 --- a/net/netfilter/ipvs/ip_vs_proto.c +++ b/net/netfilter/ipvs/ip_vs_proto.c @@ -316,7 +316,7 @@ ip_vs_tcpudp_debug_packet(int af, struct ip_vs_protocol *pp, /* * per network name-space init */ -int __net_init __ip_vs_protocol_init(struct net *net) +int __net_init ip_vs_protocol_net_init(struct net *net) { #ifdef CONFIG_IP_VS_PROTO_TCP register_ip_vs_proto_netns(net, &ip_vs_protocol_tcp); @@ -336,7 +336,7 @@ int __net_init __ip_vs_protocol_init(struct net *net) return 0; } -void __net_exit __ip_vs_protocol_cleanup(struct net *net) +void __net_exit ip_vs_protocol_net_cleanup(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); struct ip_vs_proto_data *pd; diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index e292e5b..7ee7215 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -1663,7 +1663,7 @@ int stop_sync_thread(struct net *net, int state) /* * Initialize data struct for each netns */ -int __net_init __ip_vs_sync_init(struct net *net) +int __net_init ip_vs_sync_net_init(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); @@ -1677,7 +1677,7 @@ int __net_init __ip_vs_sync_init(struct net *net) return 0; } -void __ip_vs_sync_cleanup(struct net *net) +void ip_vs_sync_net_cleanup(struct net *net) { int retc; @@ -1689,12 +1689,3 @@ void __ip_vs_sync_cleanup(struct net *net) if (retc && retc != -ESRCH) pr_err("Failed to stop Backup Daemon\n"); } - -int __init ip_vs_sync_init(void) -{ - return 0; -} - -void ip_vs_sync_cleanup(void) -{ -} diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 2e1c11f..f7af8b8 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -850,7 +850,7 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl, /* It exists; we have (non-exclusive) reference. */ if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) { - *ctinfo = IP_CT_ESTABLISHED + IP_CT_IS_REPLY; + *ctinfo = IP_CT_ESTABLISHED_REPLY; /* Please set reply bit if this packet OK */ *set_reply = 1; } else { @@ -922,6 +922,9 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, ret = -ret; goto out; } + /* ICMP[v6] protocol trackers may assign one conntrack. */ + if (skb->nfct) + goto out; } ct = resolve_normal_ct(net, tmpl, skb, dataoff, pf, protonum, @@ -1143,7 +1146,7 @@ static void nf_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb) /* This ICMP is in reverse direction to the packet which caused it */ ct = nf_ct_get(skb, &ctinfo); if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) - ctinfo = IP_CT_RELATED + IP_CT_IS_REPLY; + ctinfo = IP_CT_RELATED_REPLY; else ctinfo = IP_CT_RELATED; diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index e17cb7c..6f5801e 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -368,7 +368,7 @@ static int help(struct sk_buff *skb, /* Until there's been traffic both ways, don't look in packets. */ if (ctinfo != IP_CT_ESTABLISHED && - ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) { + ctinfo != IP_CT_ESTABLISHED_REPLY) { pr_debug("ftp: Conntrackinfo = %u\n", ctinfo); return NF_ACCEPT; } diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index 18b2ce5..f03c2d4 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -571,10 +571,9 @@ static int h245_help(struct sk_buff *skb, unsigned int protoff, int ret; /* Until there's been traffic both ways, don't look in packets. */ - if (ctinfo != IP_CT_ESTABLISHED && - ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) { + if (ctinfo != IP_CT_ESTABLISHED && ctinfo != IP_CT_ESTABLISHED_REPLY) return NF_ACCEPT; - } + pr_debug("nf_ct_h245: skblen = %u\n", skb->len); spin_lock_bh(&nf_h323_lock); @@ -1125,10 +1124,9 @@ static int q931_help(struct sk_buff *skb, unsigned int protoff, int ret; /* Until there's been traffic both ways, don't look in packets. */ - if (ctinfo != IP_CT_ESTABLISHED && - ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) { + if (ctinfo != IP_CT_ESTABLISHED && ctinfo != IP_CT_ESTABLISHED_REPLY) return NF_ACCEPT; - } + pr_debug("nf_ct_q931: skblen = %u\n", skb->len); spin_lock_bh(&nf_h323_lock); diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c index b394aa3..4f9390b 100644 --- a/net/netfilter/nf_conntrack_irc.c +++ b/net/netfilter/nf_conntrack_irc.c @@ -125,8 +125,7 @@ static int help(struct sk_buff *skb, unsigned int protoff, return NF_ACCEPT; /* Until there's been traffic both ways, don't look in packets. */ - if (ctinfo != IP_CT_ESTABLISHED && - ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) + if (ctinfo != IP_CT_ESTABLISHED && ctinfo != IP_CT_ESTABLISHED_REPLY) return NF_ACCEPT; /* Not a full tcp header? */ diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 482e90c..7dec88a 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -970,7 +970,7 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, if (nlh->nlmsg_flags & NLM_F_DUMP) return netlink_dump_start(ctnl, skb, nlh, ctnetlink_dump_table, - ctnetlink_done); + ctnetlink_done, 0); err = ctnetlink_parse_zone(cda[CTA_ZONE], &zone); if (err < 0) @@ -1840,7 +1840,7 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, if (nlh->nlmsg_flags & NLM_F_DUMP) { return netlink_dump_start(ctnl, skb, nlh, ctnetlink_exp_dump_table, - ctnetlink_exp_done); + ctnetlink_exp_done, 0); } err = ctnetlink_parse_zone(cda[CTA_EXPECT_ZONE], &zone); diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c index 0889448..2fd4565 100644 --- a/net/netfilter/nf_conntrack_pptp.c +++ b/net/netfilter/nf_conntrack_pptp.c @@ -519,8 +519,7 @@ conntrack_pptp_help(struct sk_buff *skb, unsigned int protoff, u_int16_t msg; /* don't do any tracking before tcp handshake complete */ - if (ctinfo != IP_CT_ESTABLISHED && - ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) + if (ctinfo != IP_CT_ESTABLISHED && ctinfo != IP_CT_ESTABLISHED_REPLY) return NF_ACCEPT; nexthdr_off = protoff; diff --git a/net/netfilter/nf_conntrack_sane.c b/net/netfilter/nf_conntrack_sane.c index d9e2773..8501823 100644 --- a/net/netfilter/nf_conntrack_sane.c +++ b/net/netfilter/nf_conntrack_sane.c @@ -78,7 +78,7 @@ static int help(struct sk_buff *skb, ct_sane_info = &nfct_help(ct)->help.ct_sane_info; /* Until there's been traffic both ways, don't look in packets. */ if (ctinfo != IP_CT_ESTABLISHED && - ctinfo != IP_CT_ESTABLISHED+IP_CT_IS_REPLY) + ctinfo != IP_CT_ESTABLISHED_REPLY) return NF_ACCEPT; /* Not a full tcp header? */ diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index cb5a285..93faf6a 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -1423,7 +1423,7 @@ static int sip_help_tcp(struct sk_buff *skb, unsigned int protoff, typeof(nf_nat_sip_seq_adjust_hook) nf_nat_sip_seq_adjust; if (ctinfo != IP_CT_ESTABLISHED && - ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) + ctinfo != IP_CT_ESTABLISHED_REPLY) return NF_ACCEPT; /* No Data ? */ diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index e0ee010..2e7ccbb 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -456,7 +456,8 @@ __build_packet_message(struct nfulnl_instance *inst, if (skb->mark) NLA_PUT_BE32(inst->skb, NFULA_MARK, htonl(skb->mark)); - if (indev && skb->dev) { + if (indev && skb->dev && + skb->mac_header != skb->network_header) { struct nfulnl_msg_packet_hw phw; int len = dev_parse_header(skb, phw.hw_addr); if (len > 0) { diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index b83123f..fdd2faf 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -335,7 +335,8 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, if (entskb->mark) NLA_PUT_BE32(skb, NFQA_MARK, htonl(entskb->mark)); - if (indev && entskb->dev) { + if (indev && entskb->dev && + entskb->mac_header != entskb->network_header) { struct nfqnl_msg_packet_hw phw; int len = dev_parse_header(entskb, phw.hw_addr); if (len) { diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index 782e5198..0221d10 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -5,7 +5,7 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ - +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/gfp.h> #include <linux/skbuff.h> @@ -95,8 +95,11 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par) if (info->helper[0]) { ret = -ENOENT; proto = xt_ct_find_proto(par); - if (!proto) + if (!proto) { + pr_info("You must specify a L4 protocol, " + "and not use inversions on it.\n"); goto err3; + } ret = -ENOMEM; help = nf_ct_helper_ext_add(ct, GFP_KERNEL); @@ -107,8 +110,10 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par) help->helper = nf_conntrack_helper_try_module_get(info->helper, par->family, proto); - if (help->helper == NULL) + if (help->helper == NULL) { + pr_info("No such helper \"%s\"\n", info->helper); goto err3; + } } __set_bit(IPS_TEMPLATE_BIT, &ct->status); diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c index b3babae..0ec8138 100644 --- a/net/netfilter/xt_set.c +++ b/net/netfilter/xt_set.c @@ -13,7 +13,6 @@ #include <linux/module.h> #include <linux/skbuff.h> -#include <linux/version.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter/xt_set.h> @@ -29,23 +28,33 @@ MODULE_ALIAS("ip6t_SET"); static inline int match_set(ip_set_id_t index, const struct sk_buff *skb, - u8 pf, u8 dim, u8 flags, int inv) + const struct xt_action_param *par, + const struct ip_set_adt_opt *opt, int inv) { - if (ip_set_test(index, skb, pf, dim, flags)) + if (ip_set_test(index, skb, par, opt)) inv = !inv; return inv; } +#define ADT_OPT(n, f, d, fs, cfs, t) \ +const struct ip_set_adt_opt n = { \ + .family = f, \ + .dim = d, \ + .flags = fs, \ + .cmdflags = cfs, \ + .timeout = t, \ +} + /* Revision 0 interface: backward compatible with netfilter/iptables */ static bool set_match_v0(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_set_info_match_v0 *info = par->matchinfo; + ADT_OPT(opt, par->family, info->match_set.u.compat.dim, + info->match_set.u.compat.flags, 0, UINT_MAX); - return match_set(info->match_set.index, skb, par->family, - info->match_set.u.compat.dim, - info->match_set.u.compat.flags, + return match_set(info->match_set.index, skb, par, &opt, info->match_set.u.compat.flags & IPSET_INV_MATCH); } @@ -103,15 +112,15 @@ static unsigned int set_target_v0(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_set_info_target_v0 *info = par->targinfo; + ADT_OPT(add_opt, par->family, info->add_set.u.compat.dim, + info->add_set.u.compat.flags, 0, UINT_MAX); + ADT_OPT(del_opt, par->family, info->del_set.u.compat.dim, + info->del_set.u.compat.flags, 0, UINT_MAX); if (info->add_set.index != IPSET_INVALID_ID) - ip_set_add(info->add_set.index, skb, par->family, - info->add_set.u.compat.dim, - info->add_set.u.compat.flags); + ip_set_add(info->add_set.index, skb, par, &add_opt); if (info->del_set.index != IPSET_INVALID_ID) - ip_set_del(info->del_set.index, skb, par->family, - info->del_set.u.compat.dim, - info->del_set.u.compat.flags); + ip_set_del(info->del_set.index, skb, par, &del_opt); return XT_CONTINUE; } @@ -170,23 +179,23 @@ set_target_v0_destroy(const struct xt_tgdtor_param *par) ip_set_nfnl_put(info->del_set.index); } -/* Revision 1: current interface to netfilter/iptables */ +/* Revision 1 match and target */ static bool -set_match(const struct sk_buff *skb, struct xt_action_param *par) +set_match_v1(const struct sk_buff *skb, struct xt_action_param *par) { - const struct xt_set_info_match *info = par->matchinfo; + const struct xt_set_info_match_v1 *info = par->matchinfo; + ADT_OPT(opt, par->family, info->match_set.dim, + info->match_set.flags, 0, UINT_MAX); - return match_set(info->match_set.index, skb, par->family, - info->match_set.dim, - info->match_set.flags, + return match_set(info->match_set.index, skb, par, &opt, info->match_set.flags & IPSET_INV_MATCH); } static int -set_match_checkentry(const struct xt_mtchk_param *par) +set_match_v1_checkentry(const struct xt_mtchk_param *par) { - struct xt_set_info_match *info = par->matchinfo; + struct xt_set_info_match_v1 *info = par->matchinfo; ip_set_id_t index; index = ip_set_nfnl_get_byindex(info->match_set.index); @@ -207,36 +216,34 @@ set_match_checkentry(const struct xt_mtchk_param *par) } static void -set_match_destroy(const struct xt_mtdtor_param *par) +set_match_v1_destroy(const struct xt_mtdtor_param *par) { - struct xt_set_info_match *info = par->matchinfo; + struct xt_set_info_match_v1 *info = par->matchinfo; ip_set_nfnl_put(info->match_set.index); } static unsigned int -set_target(struct sk_buff *skb, const struct xt_action_param *par) +set_target_v1(struct sk_buff *skb, const struct xt_action_param *par) { - const struct xt_set_info_target *info = par->targinfo; + const struct xt_set_info_target_v1 *info = par->targinfo; + ADT_OPT(add_opt, par->family, info->add_set.dim, + info->add_set.flags, 0, UINT_MAX); + ADT_OPT(del_opt, par->family, info->del_set.dim, + info->del_set.flags, 0, UINT_MAX); if (info->add_set.index != IPSET_INVALID_ID) - ip_set_add(info->add_set.index, - skb, par->family, - info->add_set.dim, - info->add_set.flags); + ip_set_add(info->add_set.index, skb, par, &add_opt); if (info->del_set.index != IPSET_INVALID_ID) - ip_set_del(info->del_set.index, - skb, par->family, - info->del_set.dim, - info->del_set.flags); + ip_set_del(info->del_set.index, skb, par, &del_opt); return XT_CONTINUE; } static int -set_target_checkentry(const struct xt_tgchk_param *par) +set_target_v1_checkentry(const struct xt_tgchk_param *par) { - const struct xt_set_info_target *info = par->targinfo; + const struct xt_set_info_target_v1 *info = par->targinfo; ip_set_id_t index; if (info->add_set.index != IPSET_INVALID_ID) { @@ -273,9 +280,9 @@ set_target_checkentry(const struct xt_tgchk_param *par) } static void -set_target_destroy(const struct xt_tgdtor_param *par) +set_target_v1_destroy(const struct xt_tgdtor_param *par) { - const struct xt_set_info_target *info = par->targinfo; + const struct xt_set_info_target_v1 *info = par->targinfo; if (info->add_set.index != IPSET_INVALID_ID) ip_set_nfnl_put(info->add_set.index); @@ -283,6 +290,28 @@ set_target_destroy(const struct xt_tgdtor_param *par) ip_set_nfnl_put(info->del_set.index); } +/* Revision 2 target */ + +static unsigned int +set_target_v2(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct xt_set_info_target_v2 *info = par->targinfo; + ADT_OPT(add_opt, par->family, info->add_set.dim, + info->add_set.flags, info->flags, info->timeout); + ADT_OPT(del_opt, par->family, info->del_set.dim, + info->del_set.flags, 0, UINT_MAX); + + if (info->add_set.index != IPSET_INVALID_ID) + ip_set_add(info->add_set.index, skb, par, &add_opt); + if (info->del_set.index != IPSET_INVALID_ID) + ip_set_del(info->del_set.index, skb, par, &del_opt); + + return XT_CONTINUE; +} + +#define set_target_v2_checkentry set_target_v1_checkentry +#define set_target_v2_destroy set_target_v1_destroy + static struct xt_match set_matches[] __read_mostly = { { .name = "set", @@ -298,20 +327,20 @@ static struct xt_match set_matches[] __read_mostly = { .name = "set", .family = NFPROTO_IPV4, .revision = 1, - .match = set_match, - .matchsize = sizeof(struct xt_set_info_match), - .checkentry = set_match_checkentry, - .destroy = set_match_destroy, + .match = set_match_v1, + .matchsize = sizeof(struct xt_set_info_match_v1), + .checkentry = set_match_v1_checkentry, + .destroy = set_match_v1_destroy, .me = THIS_MODULE }, { .name = "set", .family = NFPROTO_IPV6, .revision = 1, - .match = set_match, - .matchsize = sizeof(struct xt_set_info_match), - .checkentry = set_match_checkentry, - .destroy = set_match_destroy, + .match = set_match_v1, + .matchsize = sizeof(struct xt_set_info_match_v1), + .checkentry = set_match_v1_checkentry, + .destroy = set_match_v1_destroy, .me = THIS_MODULE }, }; @@ -331,20 +360,40 @@ static struct xt_target set_targets[] __read_mostly = { .name = "SET", .revision = 1, .family = NFPROTO_IPV4, - .target = set_target, - .targetsize = sizeof(struct xt_set_info_target), - .checkentry = set_target_checkentry, - .destroy = set_target_destroy, + .target = set_target_v1, + .targetsize = sizeof(struct xt_set_info_target_v1), + .checkentry = set_target_v1_checkentry, + .destroy = set_target_v1_destroy, .me = THIS_MODULE }, { .name = "SET", .revision = 1, .family = NFPROTO_IPV6, - .target = set_target, - .targetsize = sizeof(struct xt_set_info_target), - .checkentry = set_target_checkentry, - .destroy = set_target_destroy, + .target = set_target_v1, + .targetsize = sizeof(struct xt_set_info_target_v1), + .checkentry = set_target_v1_checkentry, + .destroy = set_target_v1_destroy, + .me = THIS_MODULE + }, + { + .name = "SET", + .revision = 2, + .family = NFPROTO_IPV4, + .target = set_target_v2, + .targetsize = sizeof(struct xt_set_info_target_v2), + .checkentry = set_target_v2_checkentry, + .destroy = set_target_v2_destroy, + .me = THIS_MODULE + }, + { + .name = "SET", + .revision = 2, + .family = NFPROTO_IPV6, + .target = set_target_v2, + .targetsize = sizeof(struct xt_set_info_target_v2), + .checkentry = set_target_v2_checkentry, + .destroy = set_target_v2_destroy, .me = THIS_MODULE }, }; diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index 9cc4635..fe39f7e 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -143,9 +143,9 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, ct = nf_ct_get(skb, &ctinfo); if (ct && !nf_ct_is_untracked(ct) && ((iph->protocol != IPPROTO_ICMP && - ctinfo == IP_CT_IS_REPLY + IP_CT_ESTABLISHED) || + ctinfo == IP_CT_ESTABLISHED_REPLY) || (iph->protocol == IPPROTO_ICMP && - ctinfo == IP_CT_IS_REPLY + IP_CT_RELATED)) && + ctinfo == IP_CT_RELATED_REPLY)) && (ct->status & IPS_SRC_NAT_DONE)) { daddr = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip; diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index 9c38658..8efd061 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -426,10 +426,9 @@ int netlbl_unlhsh_add(struct net *net, audit_info); switch (addr_len) { case sizeof(struct in_addr): { - struct in_addr *addr4, *mask4; + const struct in_addr *addr4 = addr; + const struct in_addr *mask4 = mask; - addr4 = (struct in_addr *)addr; - mask4 = (struct in_addr *)mask; ret_val = netlbl_unlhsh_add_addr4(iface, addr4, mask4, secid); if (audit_buf != NULL) netlbl_af4list_audit_addr(audit_buf, 1, @@ -440,10 +439,9 @@ int netlbl_unlhsh_add(struct net *net, } #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) case sizeof(struct in6_addr): { - struct in6_addr *addr6, *mask6; + const struct in6_addr *addr6 = addr; + const struct in6_addr *mask6 = mask; - addr6 = (struct in6_addr *)addr; - mask6 = (struct in6_addr *)mask; ret_val = netlbl_unlhsh_add_addr6(iface, addr6, mask6, secid); if (audit_buf != NULL) netlbl_af6list_audit_addr(audit_buf, 1, diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index a7ec851..0a4db02 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1659,13 +1659,10 @@ static int netlink_dump(struct sock *sk) { struct netlink_sock *nlk = nlk_sk(sk); struct netlink_callback *cb; - struct sk_buff *skb; + struct sk_buff *skb = NULL; struct nlmsghdr *nlh; int len, err = -ENOBUFS; - - skb = sock_rmalloc(sk, NLMSG_GOODSIZE, 0, GFP_KERNEL); - if (!skb) - goto errout; + int alloc_size; mutex_lock(nlk->cb_mutex); @@ -1675,6 +1672,12 @@ static int netlink_dump(struct sock *sk) goto errout_skb; } + alloc_size = max_t(int, cb->min_dump_alloc, NLMSG_GOODSIZE); + + skb = sock_rmalloc(sk, alloc_size, 0, GFP_KERNEL); + if (!skb) + goto errout_skb; + len = cb->dump(skb, cb); if (len > 0) { @@ -1715,7 +1718,6 @@ static int netlink_dump(struct sock *sk) errout_skb: mutex_unlock(nlk->cb_mutex); kfree_skb(skb); -errout: return err; } @@ -1723,7 +1725,8 @@ int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, const struct nlmsghdr *nlh, int (*dump)(struct sk_buff *skb, struct netlink_callback *), - int (*done)(struct netlink_callback *)) + int (*done)(struct netlink_callback *), + u16 min_dump_alloc) { struct netlink_callback *cb; struct sock *sk; @@ -1737,6 +1740,7 @@ int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, cb->dump = dump; cb->done = done; cb->nlh = nlh; + cb->min_dump_alloc = min_dump_alloc; atomic_inc(&skb->users); cb->skb = skb; diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 1781d99..482fa57 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -525,7 +525,7 @@ static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) genl_unlock(); err = netlink_dump_start(net->genl_sock, skb, nlh, - ops->dumpit, ops->done); + ops->dumpit, ops->done, 0); genl_lock(); return err; } diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 925f715..461b16f 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -798,7 +798,13 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, getnstimeofday(&ts); h.h2->tp_sec = ts.tv_sec; h.h2->tp_nsec = ts.tv_nsec; - h.h2->tp_vlan_tci = vlan_tx_tag_get(skb); + if (vlan_tx_tag_present(skb)) { + h.h2->tp_vlan_tci = vlan_tx_tag_get(skb); + status |= TP_STATUS_VLAN_VALID; + } else { + h.h2->tp_vlan_tci = 0; + } + h.h2->tp_padding = 0; hdrlen = sizeof(*h.h2); break; default: @@ -969,7 +975,8 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) struct sk_buff *skb; struct net_device *dev; __be16 proto; - int ifindex, err, reserve = 0; + bool need_rls_dev = false; + int err, reserve = 0; void *ph; struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name; int tp_len, size_max; @@ -981,7 +988,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) err = -EBUSY; if (saddr == NULL) { - ifindex = po->ifindex; + dev = po->prot_hook.dev; proto = po->num; addr = NULL; } else { @@ -992,12 +999,12 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) + offsetof(struct sockaddr_ll, sll_addr))) goto out; - ifindex = saddr->sll_ifindex; proto = saddr->sll_protocol; addr = saddr->sll_addr; + dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex); + need_rls_dev = true; } - dev = dev_get_by_index(sock_net(&po->sk), ifindex); err = -ENXIO; if (unlikely(dev == NULL)) goto out; @@ -1083,7 +1090,8 @@ out_status: __packet_set_status(po, ph, status); kfree_skb(skb); out_put: - dev_put(dev); + if (need_rls_dev) + dev_put(dev); out: mutex_unlock(&po->pg_vec_lock); return err; @@ -1121,8 +1129,9 @@ static int packet_snd(struct socket *sock, struct sk_buff *skb; struct net_device *dev; __be16 proto; + bool need_rls_dev = false; unsigned char *addr; - int ifindex, err, reserve = 0; + int err, reserve = 0; struct virtio_net_hdr vnet_hdr = { 0 }; int offset = 0; int vnet_hdr_len; @@ -1134,7 +1143,7 @@ static int packet_snd(struct socket *sock, */ if (saddr == NULL) { - ifindex = po->ifindex; + dev = po->prot_hook.dev; proto = po->num; addr = NULL; } else { @@ -1143,13 +1152,12 @@ static int packet_snd(struct socket *sock, goto out; if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr))) goto out; - ifindex = saddr->sll_ifindex; proto = saddr->sll_protocol; addr = saddr->sll_addr; + dev = dev_get_by_index(sock_net(sk), saddr->sll_ifindex); + need_rls_dev = true; } - - dev = dev_get_by_index(sock_net(sk), ifindex); err = -ENXIO; if (dev == NULL) goto out_unlock; @@ -1280,14 +1288,15 @@ static int packet_snd(struct socket *sock, if (err > 0 && (err = net_xmit_errno(err)) != 0) goto out_unlock; - dev_put(dev); + if (need_rls_dev) + dev_put(dev); return len; out_free: kfree_skb(skb); out_unlock: - if (dev) + if (dev && need_rls_dev) dev_put(dev); out: return err; @@ -1337,6 +1346,10 @@ static int packet_release(struct socket *sock) __dev_remove_pack(&po->prot_hook); __sock_put(sk); } + if (po->prot_hook.dev) { + dev_put(po->prot_hook.dev); + po->prot_hook.dev = NULL; + } spin_unlock(&po->bind_lock); packet_flush_mclist(sk); @@ -1390,6 +1403,8 @@ static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 protoc po->num = protocol; po->prot_hook.type = protocol; + if (po->prot_hook.dev) + dev_put(po->prot_hook.dev); po->prot_hook.dev = dev; po->ifindex = dev ? dev->ifindex : 0; @@ -1434,10 +1449,8 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, strlcpy(name, uaddr->sa_data, sizeof(name)); dev = dev_get_by_name(sock_net(sk), name); - if (dev) { + if (dev) err = packet_do_bind(sk, dev, pkt_sk(sk)->num); - dev_put(dev); - } return err; } @@ -1465,8 +1478,6 @@ static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len goto out; } err = packet_do_bind(sk, dev, sll->sll_protocol ? : pkt_sk(sk)->num); - if (dev) - dev_put(dev); out: return err; @@ -1675,6 +1686,8 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, vnet_hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; vnet_hdr.csum_start = skb_checksum_start_offset(skb); vnet_hdr.csum_offset = skb->csum_offset; + } else if (skb->ip_summed == CHECKSUM_UNNECESSARY) { + vnet_hdr.flags = VIRTIO_NET_HDR_F_DATA_VALID; } /* else everything is zero */ err = memcpy_toiovec(msg->msg_iov, (void *)&vnet_hdr, @@ -1725,8 +1738,13 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, aux.tp_snaplen = skb->len; aux.tp_mac = 0; aux.tp_net = skb_network_offset(skb); - aux.tp_vlan_tci = vlan_tx_tag_get(skb); - + if (vlan_tx_tag_present(skb)) { + aux.tp_vlan_tci = vlan_tx_tag_get(skb); + aux.tp_status |= TP_STATUS_VLAN_VALID; + } else { + aux.tp_vlan_tci = 0; + } + aux.tp_padding = 0; put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux); } @@ -2231,6 +2249,8 @@ static int packet_notifier(struct notifier_block *this, unsigned long msg, void } if (msg == NETDEV_UNREGISTER) { po->ifindex = -1; + if (po->prot_hook.dev) + dev_put(po->prot_hook.dev); po->prot_hook.dev = NULL; } spin_unlock(&po->bind_lock); diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c index 438accb..d61f676 100644 --- a/net/phonet/pn_netlink.c +++ b/net/phonet/pn_netlink.c @@ -289,15 +289,16 @@ out: int __init phonet_netlink_register(void) { - int err = __rtnl_register(PF_PHONET, RTM_NEWADDR, addr_doit, NULL); + int err = __rtnl_register(PF_PHONET, RTM_NEWADDR, addr_doit, + NULL, NULL); if (err) return err; /* Further __rtnl_register() cannot fail */ - __rtnl_register(PF_PHONET, RTM_DELADDR, addr_doit, NULL); - __rtnl_register(PF_PHONET, RTM_GETADDR, NULL, getaddr_dumpit); - __rtnl_register(PF_PHONET, RTM_NEWROUTE, route_doit, NULL); - __rtnl_register(PF_PHONET, RTM_DELROUTE, route_doit, NULL); - __rtnl_register(PF_PHONET, RTM_GETROUTE, NULL, route_dumpit); + __rtnl_register(PF_PHONET, RTM_DELADDR, addr_doit, NULL, NULL); + __rtnl_register(PF_PHONET, RTM_GETADDR, NULL, getaddr_dumpit, NULL); + __rtnl_register(PF_PHONET, RTM_NEWROUTE, route_doit, NULL, NULL); + __rtnl_register(PF_PHONET, RTM_DELROUTE, route_doit, NULL, NULL); + __rtnl_register(PF_PHONET, RTM_GETROUTE, NULL, route_dumpit, NULL); return 0; } diff --git a/net/rds/bind.c b/net/rds/bind.c index 2f6b3fc..637bde5 100644 --- a/net/rds/bind.c +++ b/net/rds/bind.c @@ -35,6 +35,7 @@ #include <linux/in.h> #include <linux/if_arp.h> #include <linux/jhash.h> +#include <linux/ratelimit.h> #include "rds.h" #define BIND_HASH_SIZE 1024 @@ -185,8 +186,7 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (!trans) { ret = -EADDRNOTAVAIL; rds_remove_bound(rs); - if (printk_ratelimit()) - printk(KERN_INFO "RDS: rds_bind() could not find a transport, " + printk_ratelimited(KERN_INFO "RDS: rds_bind() could not find a transport, " "load rds_tcp or rds_rdma?\n"); goto out; } diff --git a/net/rds/ib.c b/net/rds/ib.c index cce19f95..3b83086 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -325,7 +325,7 @@ static int rds_ib_laddr_check(__be32 addr) /* Create a CMA ID and try to bind it. This catches both * IB and iWARP capable NICs. */ - cm_id = rdma_create_id(NULL, NULL, RDMA_PS_TCP); + cm_id = rdma_create_id(NULL, NULL, RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(cm_id)) return PTR_ERR(cm_id); diff --git a/net/rds/ib.h b/net/rds/ib.h index 4297d92..edfaaaf 100644 --- a/net/rds/ib.h +++ b/net/rds/ib.h @@ -3,6 +3,7 @@ #include <rdma/ib_verbs.h> #include <rdma/rdma_cm.h> +#include <linux/interrupt.h> #include <linux/pci.h> #include <linux/slab.h> #include "rds.h" diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index ee369d2..cd67026 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -34,6 +34,7 @@ #include <linux/in.h> #include <linux/slab.h> #include <linux/vmalloc.h> +#include <linux/ratelimit.h> #include "rds.h" #include "ib.h" @@ -435,13 +436,12 @@ static u32 rds_ib_protocol_compatible(struct rdma_cm_event *event) version = RDS_PROTOCOL_3_0; while ((common >>= 1) != 0) version++; - } else if (printk_ratelimit()) { - printk(KERN_NOTICE "RDS: Connection from %pI4 using " + } + printk_ratelimited(KERN_NOTICE "RDS: Connection from %pI4 using " "incompatible protocol version %u.%u\n", &dp->dp_saddr, dp->dp_protocol_major, dp->dp_protocol_minor); - } return version; } @@ -587,7 +587,7 @@ int rds_ib_conn_connect(struct rds_connection *conn) /* XXX I wonder what affect the port space has */ /* delegate cm event handler to rdma_transport */ ic->i_cm_id = rdma_create_id(rds_rdma_cm_event_handler, conn, - RDMA_PS_TCP); + RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(ic->i_cm_id)) { ret = PTR_ERR(ic->i_cm_id); ic->i_cm_id = NULL; diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c index 7c4dce8..e590949 100644 --- a/net/rds/ib_send.c +++ b/net/rds/ib_send.c @@ -34,6 +34,7 @@ #include <linux/in.h> #include <linux/device.h> #include <linux/dmapool.h> +#include <linux/ratelimit.h> #include "rds.h" #include "ib.h" @@ -207,8 +208,7 @@ static struct rds_message *rds_ib_send_unmap_op(struct rds_ib_connection *ic, } break; default: - if (printk_ratelimit()) - printk(KERN_NOTICE + printk_ratelimited(KERN_NOTICE "RDS/IB: %s: unexpected opcode 0x%x in WR!\n", __func__, send->s_wr.opcode); break; diff --git a/net/rds/iw.c b/net/rds/iw.c index 5a9676f..f747484 100644 --- a/net/rds/iw.c +++ b/net/rds/iw.c @@ -226,7 +226,7 @@ static int rds_iw_laddr_check(__be32 addr) /* Create a CMA ID and try to bind it. This catches both * IB and iWARP capable NICs. */ - cm_id = rdma_create_id(NULL, NULL, RDMA_PS_TCP); + cm_id = rdma_create_id(NULL, NULL, RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(cm_id)) return PTR_ERR(cm_id); diff --git a/net/rds/iw.h b/net/rds/iw.h index 9015192..04ce3b1 100644 --- a/net/rds/iw.h +++ b/net/rds/iw.h @@ -1,6 +1,7 @@ #ifndef _RDS_IW_H #define _RDS_IW_H +#include <linux/interrupt.h> #include <rdma/ib_verbs.h> #include <rdma/rdma_cm.h> #include "rds.h" diff --git a/net/rds/iw_cm.c b/net/rds/iw_cm.c index 3a60a15..9556d28 100644 --- a/net/rds/iw_cm.c +++ b/net/rds/iw_cm.c @@ -34,6 +34,7 @@ #include <linux/in.h> #include <linux/slab.h> #include <linux/vmalloc.h> +#include <linux/ratelimit.h> #include "rds.h" #include "iw.h" @@ -258,8 +259,7 @@ static int rds_iw_setup_qp(struct rds_connection *conn) */ rds_iwdev = ib_get_client_data(dev, &rds_iw_client); if (!rds_iwdev) { - if (printk_ratelimit()) - printk(KERN_NOTICE "RDS/IW: No client_data for device %s\n", + printk_ratelimited(KERN_NOTICE "RDS/IW: No client_data for device %s\n", dev->name); return -EOPNOTSUPP; } @@ -365,13 +365,12 @@ static u32 rds_iw_protocol_compatible(const struct rds_iw_connect_private *dp) version = RDS_PROTOCOL_3_0; while ((common >>= 1) != 0) version++; - } else if (printk_ratelimit()) { - printk(KERN_NOTICE "RDS: Connection from %pI4 using " + } + printk_ratelimited(KERN_NOTICE "RDS: Connection from %pI4 using " "incompatible protocol version %u.%u\n", &dp->dp_saddr, dp->dp_protocol_major, dp->dp_protocol_minor); - } return version; } @@ -522,7 +521,7 @@ int rds_iw_conn_connect(struct rds_connection *conn) /* XXX I wonder what affect the port space has */ /* delegate cm event handler to rdma_transport */ ic->i_cm_id = rdma_create_id(rds_rdma_cm_event_handler, conn, - RDMA_PS_TCP); + RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(ic->i_cm_id)) { ret = PTR_ERR(ic->i_cm_id); ic->i_cm_id = NULL; diff --git a/net/rds/iw_rdma.c b/net/rds/iw_rdma.c index 6deaa77..8b77edb 100644 --- a/net/rds/iw_rdma.c +++ b/net/rds/iw_rdma.c @@ -32,6 +32,7 @@ */ #include <linux/kernel.h> #include <linux/slab.h> +#include <linux/ratelimit.h> #include "rds.h" #include "iw.h" @@ -729,8 +730,8 @@ static int rds_iw_rdma_build_fastreg(struct rds_iw_mapping *mapping) failed_wr = &f_wr; ret = ib_post_send(ibmr->cm_id->qp, &f_wr, &failed_wr); BUG_ON(failed_wr != &f_wr); - if (ret && printk_ratelimit()) - printk(KERN_WARNING "RDS/IW: %s:%d ib_post_send returned %d\n", + if (ret) + printk_ratelimited(KERN_WARNING "RDS/IW: %s:%d ib_post_send returned %d\n", __func__, __LINE__, ret); return ret; } @@ -751,8 +752,8 @@ static int rds_iw_rdma_fastreg_inv(struct rds_iw_mr *ibmr) failed_wr = &s_wr; ret = ib_post_send(ibmr->cm_id->qp, &s_wr, &failed_wr); - if (ret && printk_ratelimit()) { - printk(KERN_WARNING "RDS/IW: %s:%d ib_post_send returned %d\n", + if (ret) { + printk_ratelimited(KERN_WARNING "RDS/IW: %s:%d ib_post_send returned %d\n", __func__, __LINE__, ret); goto out; } diff --git a/net/rds/iw_send.c b/net/rds/iw_send.c index 545d8ee..e40c3c5 100644 --- a/net/rds/iw_send.c +++ b/net/rds/iw_send.c @@ -34,6 +34,7 @@ #include <linux/in.h> #include <linux/device.h> #include <linux/dmapool.h> +#include <linux/ratelimit.h> #include "rds.h" #include "iw.h" @@ -258,8 +259,7 @@ void rds_iw_send_cq_comp_handler(struct ib_cq *cq, void *context) * when the SEND completes. */ break; default: - if (printk_ratelimit()) - printk(KERN_NOTICE + printk_ratelimited(KERN_NOTICE "RDS/IW: %s: unexpected opcode 0x%x in WR!\n", __func__, send->s_wr.opcode); break; diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c index 4195a05..f8760e1 100644 --- a/net/rds/rdma_transport.c +++ b/net/rds/rdma_transport.c @@ -158,7 +158,8 @@ static int rds_rdma_listen_init(void) struct rdma_cm_id *cm_id; int ret; - cm_id = rdma_create_id(rds_rdma_cm_event_handler, NULL, RDMA_PS_TCP); + cm_id = rdma_create_id(rds_rdma_cm_event_handler, NULL, RDMA_PS_TCP, + IB_QPT_RC); if (IS_ERR(cm_id)) { ret = PTR_ERR(cm_id); printk(KERN_ERR "RDS/RDMA: failed to setup listener, " diff --git a/net/rds/send.c b/net/rds/send.c index d58ae5f..aa57e22 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -35,6 +35,7 @@ #include <net/sock.h> #include <linux/in.h> #include <linux/list.h> +#include <linux/ratelimit.h> #include "rds.h" @@ -1006,16 +1007,14 @@ int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, goto out; if (rm->rdma.op_active && !conn->c_trans->xmit_rdma) { - if (printk_ratelimit()) - printk(KERN_NOTICE "rdma_op %p conn xmit_rdma %p\n", + printk_ratelimited(KERN_NOTICE "rdma_op %p conn xmit_rdma %p\n", &rm->rdma, conn->c_trans->xmit_rdma); ret = -EOPNOTSUPP; goto out; } if (rm->atomic.op_active && !conn->c_trans->xmit_atomic) { - if (printk_ratelimit()) - printk(KERN_NOTICE "atomic_op %p conn xmit_atomic %p\n", + printk_ratelimited(KERN_NOTICE "atomic_op %p conn xmit_atomic %p\n", &rm->atomic, conn->c_trans->xmit_atomic); ret = -EOPNOTSUPP; goto out; diff --git a/net/sched/act_api.c b/net/sched/act_api.c index a606025..2f64262 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -1115,9 +1115,10 @@ nlmsg_failure: static int __init tc_action_init(void) { - rtnl_register(PF_UNSPEC, RTM_NEWACTION, tc_ctl_action, NULL); - rtnl_register(PF_UNSPEC, RTM_DELACTION, tc_ctl_action, NULL); - rtnl_register(PF_UNSPEC, RTM_GETACTION, tc_ctl_action, tc_dump_action); + rtnl_register(PF_UNSPEC, RTM_NEWACTION, tc_ctl_action, NULL, NULL); + rtnl_register(PF_UNSPEC, RTM_DELACTION, tc_ctl_action, NULL, NULL); + rtnl_register(PF_UNSPEC, RTM_GETACTION, tc_ctl_action, tc_dump_action, + NULL); return 0; } diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index bb2c523..9563887 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -610,10 +610,10 @@ EXPORT_SYMBOL(tcf_exts_dump_stats); static int __init tc_filter_init(void) { - rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_ctl_tfilter, NULL); - rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_ctl_tfilter, NULL); + rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_ctl_tfilter, NULL, NULL); + rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_ctl_tfilter, NULL, NULL); rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_ctl_tfilter, - tc_dump_tfilter); + tc_dump_tfilter, NULL); return 0; } diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 8ec0139..34533a5 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -121,7 +121,7 @@ static u32 flow_get_proto_src(struct sk_buff *skb) if (!pskb_network_may_pull(skb, sizeof(*iph))) break; iph = ip_hdr(skb); - if (iph->frag_off & htons(IP_MF | IP_OFFSET)) + if (ip_is_fragment(iph)) break; poff = proto_ports_offset(iph->protocol); if (poff >= 0 && @@ -163,7 +163,7 @@ static u32 flow_get_proto_dst(struct sk_buff *skb) if (!pskb_network_may_pull(skb, sizeof(*iph))) break; iph = ip_hdr(skb); - if (iph->frag_off & htons(IP_MF | IP_OFFSET)) + if (ip_is_fragment(iph)) break; poff = proto_ports_offset(iph->protocol); if (poff >= 0 && diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h index 402c44b..ed691b1 100644 --- a/net/sched/cls_rsvp.h +++ b/net/sched/cls_rsvp.h @@ -167,7 +167,7 @@ restart: dst = &nhptr->daddr; protocol = nhptr->protocol; xprt = ((u8 *)nhptr) + (nhptr->ihl<<2); - if (nhptr->frag_off & htons(IP_MF | IP_OFFSET)) + if (ip_is_fragment(nhptr)) return -1; #endif diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 6b86276..8182aef 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1792,12 +1792,12 @@ static int __init pktsched_init(void) register_qdisc(&pfifo_head_drop_qdisc_ops); register_qdisc(&mq_qdisc_ops); - rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL); - rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL); - rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc); - rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL); - rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL); - rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass); + rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, NULL); + rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, NULL); + rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc, NULL); + rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, NULL); + rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, NULL); + rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass, NULL); return 0; } diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index 3f08158..e25e490 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -5,6 +5,7 @@ #include <linux/module.h> #include <linux/slab.h> #include <linux/init.h> +#include <linux/interrupt.h> #include <linux/string.h> #include <linux/errno.h> #include <linux/skbuff.h> diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c index 06afbae..3422b25 100644 --- a/net/sched/sch_choke.c +++ b/net/sched/sch_choke.c @@ -181,7 +181,7 @@ static bool choke_match_flow(struct sk_buff *skb1, ip1->saddr != ip2->saddr || ip1->daddr != ip2->daddr) return false; - if ((ip1->frag_off | ip2->frag_off) & htons(IP_MF | IP_OFFSET)) + if (ip_is_fragment(ip1) | ip_is_fragment(ip2)) ip_proto = 0; off1 += ip1->ihl * 4; off2 += ip2->ihl * 4; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index b1721d7..b4c6809 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -251,9 +251,8 @@ static void dev_watchdog(unsigned long arg) } if (some_queue_timedout) { - char drivername[64]; WARN_ONCE(1, KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit queue %u timed out\n", - dev->name, netdev_drivername(dev, drivername, 64), i); + dev->name, netdev_drivername(dev), i); dev->netdev_ops->ndo_tx_timeout(dev); } if (!mod_timer(&dev->watchdog_timer, diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 69c35f6..eb3b9a8 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -13,6 +13,7 @@ * Catalin(ux aka Dino) BOIE <catab at umbrella dot ro> */ +#include <linux/mm.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/types.h> diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index b6ea6af..4536ee6 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -157,7 +157,7 @@ static unsigned int sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb) iph = ip_hdr(skb); h = (__force u32)iph->daddr; h2 = (__force u32)iph->saddr ^ iph->protocol; - if (iph->frag_off & htons(IP_MF | IP_OFFSET)) + if (ip_is_fragment(iph)) break; poff = proto_ports_offset(iph->protocol); if (poff >= 0 && diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 525f97c..dc16b90 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -280,6 +280,8 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a asoc->peer.asconf_capable = 0; if (sctp_addip_noauth) asoc->peer.asconf_capable = 1; + asoc->asconf_addr_del_pending = NULL; + asoc->src_out_of_asoc_ok = 0; /* Create an input queue. */ sctp_inq_init(&asoc->base.inqueue); @@ -444,15 +446,11 @@ void sctp_association_free(struct sctp_association *asoc) asoc->peer.transport_count = 0; - /* Free any cached ASCONF_ACK chunk. */ - sctp_assoc_free_asconf_acks(asoc); - - /* Free the ASCONF queue. */ - sctp_assoc_free_asconf_queue(asoc); + sctp_asconf_queue_teardown(asoc); - /* Free any cached ASCONF chunk. */ - if (asoc->addip_last_asconf) - sctp_chunk_free(asoc->addip_last_asconf); + /* Free pending address space being deleted */ + if (asoc->asconf_addr_del_pending != NULL) + kfree(asoc->asconf_addr_del_pending); /* AUTH - Free the endpoint shared keys */ sctp_auth_destroy_keys(&asoc->endpoint_shared_keys); @@ -1646,3 +1644,16 @@ struct sctp_chunk *sctp_assoc_lookup_asconf_ack( return NULL; } + +void sctp_asconf_queue_teardown(struct sctp_association *asoc) +{ + /* Free any cached ASCONF_ACK chunk. */ + sctp_assoc_free_asconf_acks(asoc); + + /* Free the ASCONF queue. */ + sctp_assoc_free_asconf_queue(asoc); + + /* Free any cached ASCONF chunk. */ + if (asoc->addip_last_asconf) + sctp_chunk_free(asoc->addip_last_asconf); +} diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c index 83e3011..4ece451 100644 --- a/net/sctp/bind_addr.c +++ b/net/sctp/bind_addr.c @@ -430,7 +430,7 @@ union sctp_addr *sctp_find_unmatch_addr(struct sctp_bind_addr *bp, list_for_each_entry(laddr, &bp->address_list, list) { addr_buf = (union sctp_addr *)addrs; for (i = 0; i < addrcnt; i++) { - addr = (union sctp_addr *)addr_buf; + addr = addr_buf; af = sctp_get_af_specific(addr->v4.sin_family); if (!af) break; @@ -534,6 +534,21 @@ int sctp_in_scope(const union sctp_addr *addr, sctp_scope_t scope) return 0; } +int sctp_is_ep_boundall(struct sock *sk) +{ + struct sctp_bind_addr *bp; + struct sctp_sockaddr_entry *addr; + + bp = &sctp_sk(sk)->ep->base.bind_addr; + if (sctp_list_single_entry(&bp->address_list)) { + addr = list_entry(bp->address_list.next, + struct sctp_sockaddr_entry, list); + if (sctp_is_any(sk, &addr->a)) + return 1; + } + return 0; +} + /******************************************************************** * 3rd Level Abstractions ********************************************************************/ diff --git a/net/sctp/input.c b/net/sctp/input.c index 741ed16..b7692aa 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -510,8 +510,7 @@ struct sock *sctp_err_lookup(int family, struct sk_buff *skb, * discard the packet. */ if (vtag == 0) { - chunkhdr = (struct sctp_init_chunk *)((void *)sctphdr - + sizeof(struct sctphdr)); + chunkhdr = (void *)sctphdr + sizeof(struct sctphdr); if (len < sizeof(struct sctphdr) + sizeof(sctp_chunkhdr_t) + sizeof(__be32) || chunkhdr->chunk_hdr.type != SCTP_CID_INIT || diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 0bb0d7c..aabaee4 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -112,6 +112,7 @@ static int sctp_inet6addr_event(struct notifier_block *this, unsigned long ev, addr->valid = 1; spin_lock_bh(&sctp_local_addr_lock); list_add_tail_rcu(&addr->list, &sctp_local_addr_list); + sctp_addr_wq_mgmt(addr, SCTP_ADDR_NEW); spin_unlock_bh(&sctp_local_addr_lock); } break; @@ -122,6 +123,7 @@ static int sctp_inet6addr_event(struct notifier_block *this, unsigned long ev, if (addr->a.sa.sa_family == AF_INET6 && ipv6_addr_equal(&addr->a.v6.sin6_addr, &ifa->addr)) { + sctp_addr_wq_mgmt(addr, SCTP_ADDR_DEL); found = 1; addr->valid = 0; list_del_rcu(&addr->list); diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 1c88c89..edc7532 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -754,6 +754,16 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) */ list_for_each_entry_safe(chunk, tmp, &q->control_chunk_list, list) { + /* RFC 5061, 5.3 + * F1) This means that until such time as the ASCONF + * containing the add is acknowledged, the sender MUST + * NOT use the new IP address as a source for ANY SCTP + * packet except on carrying an ASCONF Chunk. + */ + if (asoc->src_out_of_asoc_ok && + chunk->chunk_hdr->type != SCTP_CID_ASCONF) + continue; + list_del_init(&chunk->list); /* Pick the right transport to use. */ @@ -881,6 +891,9 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) } } + if (q->asoc->src_out_of_asoc_ok) + goto sctp_flush_out; + /* Is it OK to send data chunks? */ switch (asoc->state) { case SCTP_STATE_COOKIE_ECHOED: diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 67380a2..ab5ded2 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -503,7 +503,9 @@ static void sctp_v4_get_dst(struct sctp_transport *t, union sctp_addr *saddr, sctp_v4_dst_saddr(&dst_saddr, fl4, htons(bp->port)); rcu_read_lock(); list_for_each_entry_rcu(laddr, &bp->address_list, list) { - if (!laddr->valid || (laddr->state != SCTP_ADDR_SRC)) + if (!laddr->valid || (laddr->state == SCTP_ADDR_DEL) || + (laddr->state != SCTP_ADDR_SRC && + !asoc->src_out_of_asoc_ok)) continue; if (sctp_v4_cmp_addr(&dst_saddr, &laddr->a)) goto out_unlock; @@ -623,6 +625,143 @@ static void sctp_v4_ecn_capable(struct sock *sk) INET_ECN_xmit(sk); } +void sctp_addr_wq_timeout_handler(unsigned long arg) +{ + struct sctp_sockaddr_entry *addrw, *temp; + struct sctp_sock *sp; + + spin_lock_bh(&sctp_addr_wq_lock); + + list_for_each_entry_safe(addrw, temp, &sctp_addr_waitq, list) { + SCTP_DEBUG_PRINTK_IPADDR("sctp_addrwq_timo_handler: the first ent in wq %p is ", + " for cmd %d at entry %p\n", &sctp_addr_waitq, &addrw->a, addrw->state, + addrw); + +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) + /* Now we send an ASCONF for each association */ + /* Note. we currently don't handle link local IPv6 addressees */ + if (addrw->a.sa.sa_family == AF_INET6) { + struct in6_addr *in6; + + if (ipv6_addr_type(&addrw->a.v6.sin6_addr) & + IPV6_ADDR_LINKLOCAL) + goto free_next; + + in6 = (struct in6_addr *)&addrw->a.v6.sin6_addr; + if (ipv6_chk_addr(&init_net, in6, NULL, 0) == 0 && + addrw->state == SCTP_ADDR_NEW) { + unsigned long timeo_val; + + SCTP_DEBUG_PRINTK("sctp_timo_handler: this is on DAD, trying %d sec later\n", + SCTP_ADDRESS_TICK_DELAY); + timeo_val = jiffies; + timeo_val += msecs_to_jiffies(SCTP_ADDRESS_TICK_DELAY); + mod_timer(&sctp_addr_wq_timer, timeo_val); + break; + } + } +#endif + list_for_each_entry(sp, &sctp_auto_asconf_splist, auto_asconf_list) { + struct sock *sk; + + sk = sctp_opt2sk(sp); + /* ignore bound-specific endpoints */ + if (!sctp_is_ep_boundall(sk)) + continue; + sctp_bh_lock_sock(sk); + if (sctp_asconf_mgmt(sp, addrw) < 0) + SCTP_DEBUG_PRINTK("sctp_addrwq_timo_handler: sctp_asconf_mgmt failed\n"); + sctp_bh_unlock_sock(sk); + } +free_next: + list_del(&addrw->list); + kfree(addrw); + } + spin_unlock_bh(&sctp_addr_wq_lock); +} + +static void sctp_free_addr_wq(void) +{ + struct sctp_sockaddr_entry *addrw; + struct sctp_sockaddr_entry *temp; + + spin_lock_bh(&sctp_addr_wq_lock); + del_timer(&sctp_addr_wq_timer); + list_for_each_entry_safe(addrw, temp, &sctp_addr_waitq, list) { + list_del(&addrw->list); + kfree(addrw); + } + spin_unlock_bh(&sctp_addr_wq_lock); +} + +/* lookup the entry for the same address in the addr_waitq + * sctp_addr_wq MUST be locked + */ +static struct sctp_sockaddr_entry *sctp_addr_wq_lookup(struct sctp_sockaddr_entry *addr) +{ + struct sctp_sockaddr_entry *addrw; + + list_for_each_entry(addrw, &sctp_addr_waitq, list) { + if (addrw->a.sa.sa_family != addr->a.sa.sa_family) + continue; + if (addrw->a.sa.sa_family == AF_INET) { + if (addrw->a.v4.sin_addr.s_addr == + addr->a.v4.sin_addr.s_addr) + return addrw; + } else if (addrw->a.sa.sa_family == AF_INET6) { + if (ipv6_addr_equal(&addrw->a.v6.sin6_addr, + &addr->a.v6.sin6_addr)) + return addrw; + } + } + return NULL; +} + +void sctp_addr_wq_mgmt(struct sctp_sockaddr_entry *addr, int cmd) +{ + struct sctp_sockaddr_entry *addrw; + unsigned long timeo_val; + + /* first, we check if an opposite message already exist in the queue. + * If we found such message, it is removed. + * This operation is a bit stupid, but the DHCP client attaches the + * new address after a couple of addition and deletion of that address + */ + + spin_lock_bh(&sctp_addr_wq_lock); + /* Offsets existing events in addr_wq */ + addrw = sctp_addr_wq_lookup(addr); + if (addrw) { + if (addrw->state != cmd) { + SCTP_DEBUG_PRINTK_IPADDR("sctp_addr_wq_mgmt offsets existing entry for %d ", + " in wq %p\n", addrw->state, &addrw->a, + &sctp_addr_waitq); + list_del(&addrw->list); + kfree(addrw); + } + spin_unlock_bh(&sctp_addr_wq_lock); + return; + } + + /* OK, we have to add the new address to the wait queue */ + addrw = kmemdup(addr, sizeof(struct sctp_sockaddr_entry), GFP_ATOMIC); + if (addrw == NULL) { + spin_unlock_bh(&sctp_addr_wq_lock); + return; + } + addrw->state = cmd; + list_add_tail(&addrw->list, &sctp_addr_waitq); + SCTP_DEBUG_PRINTK_IPADDR("sctp_addr_wq_mgmt add new entry for cmd:%d ", + " in wq %p\n", addrw->state, &addrw->a, &sctp_addr_waitq); + + if (!timer_pending(&sctp_addr_wq_timer)) { + timeo_val = jiffies; + timeo_val += msecs_to_jiffies(SCTP_ADDRESS_TICK_DELAY); + mod_timer(&sctp_addr_wq_timer, timeo_val); + } + spin_unlock_bh(&sctp_addr_wq_lock); +} + /* Event handler for inet address addition/deletion events. * The sctp_local_addr_list needs to be protocted by a spin lock since * multiple notifiers (say IPv4 and IPv6) may be running at the same @@ -650,6 +789,7 @@ static int sctp_inetaddr_event(struct notifier_block *this, unsigned long ev, addr->valid = 1; spin_lock_bh(&sctp_local_addr_lock); list_add_tail_rcu(&addr->list, &sctp_local_addr_list); + sctp_addr_wq_mgmt(addr, SCTP_ADDR_NEW); spin_unlock_bh(&sctp_local_addr_lock); } break; @@ -660,6 +800,7 @@ static int sctp_inetaddr_event(struct notifier_block *this, unsigned long ev, if (addr->a.sa.sa_family == AF_INET && addr->a.v4.sin_addr.s_addr == ifa->ifa_local) { + sctp_addr_wq_mgmt(addr, SCTP_ADDR_DEL); found = 1; addr->valid = 0; list_del_rcu(&addr->list); @@ -1242,6 +1383,7 @@ SCTP_STATIC __init int sctp_init(void) /* Disable ADDIP by default. */ sctp_addip_enable = 0; sctp_addip_noauth = 0; + sctp_default_auto_asconf = 0; /* Enable PR-SCTP by default. */ sctp_prsctp_enable = 1; @@ -1266,6 +1408,13 @@ SCTP_STATIC __init int sctp_init(void) spin_lock_init(&sctp_local_addr_lock); sctp_get_local_addr_list(); + /* Initialize the address event list */ + INIT_LIST_HEAD(&sctp_addr_waitq); + INIT_LIST_HEAD(&sctp_auto_asconf_splist); + spin_lock_init(&sctp_addr_wq_lock); + sctp_addr_wq_timer.expires = 0; + setup_timer(&sctp_addr_wq_timer, sctp_addr_wq_timeout_handler, 0); + status = sctp_v4_protosw_init(); if (status) @@ -1337,6 +1486,7 @@ SCTP_STATIC __exit void sctp_exit(void) /* Unregister with inet6/inet layers. */ sctp_v6_del_protocol(); sctp_v4_del_protocol(); + sctp_free_addr_wq(); /* Free the control endpoint. */ inet_ctl_sock_destroy(sctp_ctl_sock); diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 58eb27f..81db4e3 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -2768,11 +2768,12 @@ struct sctp_chunk *sctp_make_asconf_update_ip(struct sctp_association *asoc, int addr_param_len = 0; int totallen = 0; int i; + int del_pickup = 0; /* Get total length of all the address parameters. */ addr_buf = addrs; for (i = 0; i < addrcnt; i++) { - addr = (union sctp_addr *)addr_buf; + addr = addr_buf; af = sctp_get_af_specific(addr->v4.sin_family); addr_param_len = af->to_addr_param(addr, &addr_param); @@ -2780,6 +2781,13 @@ struct sctp_chunk *sctp_make_asconf_update_ip(struct sctp_association *asoc, totallen += addr_param_len; addr_buf += af->sockaddr_len; + if (asoc->asconf_addr_del_pending && !del_pickup) { + /* reuse the parameter length from the same scope one */ + totallen += paramlen; + totallen += addr_param_len; + del_pickup = 1; + SCTP_DEBUG_PRINTK("mkasconf_update_ip: picked same-scope del_pending addr, totallen for all addresses is %d\n", totallen); + } } /* Create an asconf chunk with the required length. */ @@ -2790,7 +2798,7 @@ struct sctp_chunk *sctp_make_asconf_update_ip(struct sctp_association *asoc, /* Add the address parameters to the asconf chunk. */ addr_buf = addrs; for (i = 0; i < addrcnt; i++) { - addr = (union sctp_addr *)addr_buf; + addr = addr_buf; af = sctp_get_af_specific(addr->v4.sin_family); addr_param_len = af->to_addr_param(addr, &addr_param); param.param_hdr.type = flags; @@ -2802,6 +2810,17 @@ struct sctp_chunk *sctp_make_asconf_update_ip(struct sctp_association *asoc, addr_buf += af->sockaddr_len; } + if (flags == SCTP_PARAM_ADD_IP && del_pickup) { + addr = asoc->asconf_addr_del_pending; + af = sctp_get_af_specific(addr->v4.sin_family); + addr_param_len = af->to_addr_param(addr, &addr_param); + param.param_hdr.type = SCTP_PARAM_DEL_IP; + param.param_hdr.length = htons(paramlen + addr_param_len); + param.crr_id = i; + + sctp_addto_chunk(retval, paramlen, ¶m); + sctp_addto_chunk(retval, addr_param_len, &addr_param); + } return retval; } @@ -2939,8 +2958,7 @@ static __be16 sctp_process_asconf_param(struct sctp_association *asoc, union sctp_addr addr; union sctp_addr_param *addr_param; - addr_param = (union sctp_addr_param *) - ((void *)asconf_param + sizeof(sctp_addip_param_t)); + addr_param = (void *)asconf_param + sizeof(sctp_addip_param_t); if (asconf_param->param_hdr.type != SCTP_PARAM_ADD_IP && asconf_param->param_hdr.type != SCTP_PARAM_DEL_IP && @@ -3014,7 +3032,7 @@ static __be16 sctp_process_asconf_param(struct sctp_association *asoc, * an Error Cause TLV set to the new error code 'Request to * Delete Source IP Address' */ - if (sctp_cmp_addr_exact(sctp_source(asconf), &addr)) + if (sctp_cmp_addr_exact(&asconf->source, &addr)) return SCTP_ERROR_DEL_SRC_IP; /* Section 4.2.2 @@ -3125,7 +3143,7 @@ struct sctp_chunk *sctp_process_asconf(struct sctp_association *asoc, * asconf parameter. */ length = ntohs(addr_param->p.length); - asconf_param = (sctp_addip_param_t *)((void *)addr_param + length); + asconf_param = (void *)addr_param + length; chunk_len -= length; /* create an ASCONF_ACK chunk. @@ -3166,8 +3184,7 @@ struct sctp_chunk *sctp_process_asconf(struct sctp_association *asoc, /* Move to the next ASCONF param. */ length = ntohs(asconf_param->param_hdr.length); - asconf_param = (sctp_addip_param_t *)((void *)asconf_param + - length); + asconf_param = (void *)asconf_param + length; chunk_len -= length; } @@ -3197,8 +3214,7 @@ static void sctp_asconf_param_success(struct sctp_association *asoc, struct sctp_transport *transport; struct sctp_sockaddr_entry *saddr; - addr_param = (union sctp_addr_param *) - ((void *)asconf_param + sizeof(sctp_addip_param_t)); + addr_param = (void *)asconf_param + sizeof(sctp_addip_param_t); /* We have checked the packet before, so we do not check again. */ af = sctp_get_af_specific(param_type2af(addr_param->p.type)); @@ -3224,6 +3240,11 @@ static void sctp_asconf_param_success(struct sctp_association *asoc, case SCTP_PARAM_DEL_IP: local_bh_disable(); sctp_del_bind_addr(bp, &addr); + if (asoc->asconf_addr_del_pending != NULL && + sctp_cmp_addr_exact(asoc->asconf_addr_del_pending, &addr)) { + kfree(asoc->asconf_addr_del_pending); + asoc->asconf_addr_del_pending = NULL; + } local_bh_enable(); list_for_each_entry(transport, &asoc->peer.transport_addr_list, transports) { @@ -3278,8 +3299,7 @@ static __be16 sctp_get_asconf_response(struct sctp_chunk *asconf_ack, return SCTP_ERROR_NO_ERROR; case SCTP_PARAM_ERR_CAUSE: length = sizeof(sctp_addip_param_t); - err_param = (sctp_errhdr_t *) - ((void *)asconf_ack_param + length); + err_param = (void *)asconf_ack_param + length; asconf_ack_len -= length; if (asconf_ack_len > 0) return err_param->cause; @@ -3292,8 +3312,7 @@ static __be16 sctp_get_asconf_response(struct sctp_chunk *asconf_ack, } length = ntohs(asconf_ack_param->param_hdr.length); - asconf_ack_param = (sctp_addip_param_t *) - ((void *)asconf_ack_param + length); + asconf_ack_param = (void *)asconf_ack_param + length; asconf_ack_len -= length; } @@ -3325,7 +3344,7 @@ int sctp_process_asconf_ack(struct sctp_association *asoc, * pointer to the first asconf parameter. */ length = ntohs(addr_param->p.length); - asconf_param = (sctp_addip_param_t *)((void *)addr_param + length); + asconf_param = (void *)addr_param + length; asconf_len -= length; /* ADDIP 4.1 @@ -3376,11 +3395,13 @@ int sctp_process_asconf_ack(struct sctp_association *asoc, * one. */ length = ntohs(asconf_param->param_hdr.length); - asconf_param = (sctp_addip_param_t *)((void *)asconf_param + - length); + asconf_param = (void *)asconf_param + length; asconf_len -= length; } + if (no_err && asoc->src_out_of_asoc_ok) + asoc->src_out_of_asoc_ok = 0; + /* Free the cached last sent asconf chunk. */ list_del_init(&asconf->transmitted_list); sctp_chunk_free(asconf); diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index d612ca1..1b2bb64 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -1201,7 +1201,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, int local_cork = 0; if (SCTP_EVENT_T_TIMEOUT != event_type) - chunk = (struct sctp_chunk *) event_arg; + chunk = event_arg; /* Note: This whole file is a huge candidate for rework. * For example, each command could either have its own handler, so @@ -1670,6 +1670,9 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, case SCTP_CMD_SEND_NEXT_ASCONF: sctp_cmd_send_asconf(asoc); break; + case SCTP_CMD_PURGE_ASCONF_QUEUE: + sctp_asconf_queue_teardown(asoc); + break; default: pr_warn("Impossible command: %u, %p\n", cmd->verb, cmd->obj.ptr); diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 7f4a4f8..a297283 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -1718,11 +1718,21 @@ static sctp_disposition_t sctp_sf_do_dupcook_a(const struct sctp_endpoint *ep, return SCTP_DISPOSITION_CONSUME; } - /* For now, fail any unsent/unacked data. Consider the optional - * choice of resending of this data. + /* For now, stop pending T3-rtx and SACK timers, fail any unsent/unacked + * data. Consider the optional choice of resending of this data. */ + sctp_add_cmd_sf(commands, SCTP_CMD_T3_RTX_TIMERS_STOP, SCTP_NULL()); + sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, + SCTP_TO(SCTP_EVENT_TIMEOUT_SACK)); sctp_add_cmd_sf(commands, SCTP_CMD_PURGE_OUTQUEUE, SCTP_NULL()); + /* Stop pending T4-rto timer, teardown ASCONF queue, ASCONF-ACK queue + * and ASCONF-ACK cache. + */ + sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, + SCTP_TO(SCTP_EVENT_TIMEOUT_T4_RTO)); + sctp_add_cmd_sf(commands, SCTP_CMD_PURGE_ASCONF_QUEUE, SCTP_NULL()); + repl = sctp_make_cookie_ack(new_asoc, chunk); if (!repl) goto nomem; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 6766913..fd31b36 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -476,7 +476,7 @@ static int sctp_bindx_add(struct sock *sk, struct sockaddr *addrs, int addrcnt) /* The list may contain either IPv4 or IPv6 address; * determine the address length for walking thru the list. */ - sa_addr = (struct sockaddr *)addr_buf; + sa_addr = addr_buf; af = sctp_get_af_specific(sa_addr->sa_family); if (!af) { retval = -EINVAL; @@ -555,7 +555,7 @@ static int sctp_send_asconf_add_ip(struct sock *sk, */ addr_buf = addrs; for (i = 0; i < addrcnt; i++) { - addr = (union sctp_addr *)addr_buf; + addr = addr_buf; af = sctp_get_af_specific(addr->v4.sin_family); if (!af) { retval = -EINVAL; @@ -583,22 +583,35 @@ static int sctp_send_asconf_add_ip(struct sock *sk, goto out; } - retval = sctp_send_asconf(asoc, chunk); - if (retval) - goto out; - /* Add the new addresses to the bind address list with * use_as_src set to 0. */ addr_buf = addrs; for (i = 0; i < addrcnt; i++) { - addr = (union sctp_addr *)addr_buf; + addr = addr_buf; af = sctp_get_af_specific(addr->v4.sin_family); memcpy(&saveaddr, addr, af->sockaddr_len); retval = sctp_add_bind_addr(bp, &saveaddr, SCTP_ADDR_NEW, GFP_ATOMIC); addr_buf += af->sockaddr_len; } + if (asoc->src_out_of_asoc_ok) { + struct sctp_transport *trans; + + list_for_each_entry(trans, + &asoc->peer.transport_addr_list, transports) { + /* Clear the source and route cache */ + dst_release(trans->dst); + trans->cwnd = min(4*asoc->pathmtu, max_t(__u32, + 2*asoc->pathmtu, 4380)); + trans->ssthresh = asoc->peer.i.a_rwnd; + trans->rto = asoc->rto_initial; + trans->rtt = trans->srtt = trans->rttvar = 0; + sctp_transport_route(trans, NULL, + sctp_sk(asoc->base.sk)); + } + } + retval = sctp_send_asconf(asoc, chunk); } out: @@ -646,7 +659,7 @@ static int sctp_bindx_rem(struct sock *sk, struct sockaddr *addrs, int addrcnt) goto err_bindx_rem; } - sa_addr = (union sctp_addr *)addr_buf; + sa_addr = addr_buf; af = sctp_get_af_specific(sa_addr->sa.sa_family); if (!af) { retval = -EINVAL; @@ -715,7 +728,9 @@ static int sctp_send_asconf_del_ip(struct sock *sk, struct sctp_sockaddr_entry *saddr; int i; int retval = 0; + int stored = 0; + chunk = NULL; if (!sctp_addip_enable) return retval; @@ -743,7 +758,7 @@ static int sctp_send_asconf_del_ip(struct sock *sk, */ addr_buf = addrs; for (i = 0; i < addrcnt; i++) { - laddr = (union sctp_addr *)addr_buf; + laddr = addr_buf; af = sctp_get_af_specific(laddr->v4.sin_family); if (!af) { retval = -EINVAL; @@ -766,8 +781,37 @@ static int sctp_send_asconf_del_ip(struct sock *sk, bp = &asoc->base.bind_addr; laddr = sctp_find_unmatch_addr(bp, (union sctp_addr *)addrs, addrcnt, sp); - if (!laddr) - continue; + if ((laddr == NULL) && (addrcnt == 1)) { + if (asoc->asconf_addr_del_pending) + continue; + asoc->asconf_addr_del_pending = + kzalloc(sizeof(union sctp_addr), GFP_ATOMIC); + if (asoc->asconf_addr_del_pending == NULL) { + retval = -ENOMEM; + goto out; + } + asoc->asconf_addr_del_pending->sa.sa_family = + addrs->sa_family; + asoc->asconf_addr_del_pending->v4.sin_port = + htons(bp->port); + if (addrs->sa_family == AF_INET) { + struct sockaddr_in *sin; + + sin = (struct sockaddr_in *)addrs; + asoc->asconf_addr_del_pending->v4.sin_addr.s_addr = sin->sin_addr.s_addr; + } else if (addrs->sa_family == AF_INET6) { + struct sockaddr_in6 *sin6; + + sin6 = (struct sockaddr_in6 *)addrs; + ipv6_addr_copy(&asoc->asconf_addr_del_pending->v6.sin6_addr, &sin6->sin6_addr); + } + SCTP_DEBUG_PRINTK_IPADDR("send_asconf_del_ip: keep the last address asoc: %p ", + " at %p\n", asoc, asoc->asconf_addr_del_pending, + asoc->asconf_addr_del_pending); + asoc->src_out_of_asoc_ok = 1; + stored = 1; + goto skip_mkasconf; + } /* We do not need RCU protection throughout this loop * because this is done under a socket lock from the @@ -780,12 +824,13 @@ static int sctp_send_asconf_del_ip(struct sock *sk, goto out; } +skip_mkasconf: /* Reset use_as_src flag for the addresses in the bind address * list that are to be deleted. */ addr_buf = addrs; for (i = 0; i < addrcnt; i++) { - laddr = (union sctp_addr *)addr_buf; + laddr = addr_buf; af = sctp_get_af_specific(laddr->v4.sin_family); list_for_each_entry(saddr, &bp->address_list, list) { if (sctp_cmp_addr_exact(&saddr->a, laddr)) @@ -805,12 +850,37 @@ static int sctp_send_asconf_del_ip(struct sock *sk, sctp_sk(asoc->base.sk)); } + if (stored) + /* We don't need to transmit ASCONF */ + continue; retval = sctp_send_asconf(asoc, chunk); } out: return retval; } +/* set addr events to assocs in the endpoint. ep and addr_wq must be locked */ +int sctp_asconf_mgmt(struct sctp_sock *sp, struct sctp_sockaddr_entry *addrw) +{ + struct sock *sk = sctp_opt2sk(sp); + union sctp_addr *addr; + struct sctp_af *af; + + /* It is safe to write port space in caller. */ + addr = &addrw->a; + addr->v4.sin_port = htons(sp->ep->base.bind_addr.port); + af = sctp_get_af_specific(addr->sa.sa_family); + if (!af) + return -EINVAL; + if (sctp_verify_addr(sk, addr, af->sockaddr_len)) + return -EINVAL; + + if (addrw->state == SCTP_ADDR_NEW) + return sctp_send_asconf_add_ip(sk, (struct sockaddr *)addr, 1); + else + return sctp_send_asconf_del_ip(sk, (struct sockaddr *)addr, 1); +} + /* Helper for tunneling sctp_bindx() requests through sctp_setsockopt() * * API 8.1 @@ -927,7 +997,7 @@ SCTP_STATIC int sctp_setsockopt_bindx(struct sock* sk, return -EINVAL; } - sa_addr = (struct sockaddr *)addr_buf; + sa_addr = addr_buf; af = sctp_get_af_specific(sa_addr->sa_family); /* If the address family is not supported or if this address @@ -1018,7 +1088,7 @@ static int __sctp_connect(struct sock* sk, goto out_free; } - sa_addr = (union sctp_addr *)addr_buf; + sa_addr = addr_buf; af = sctp_get_af_specific(sa_addr->sa.sa_family); /* If the address family is not supported or if this address @@ -3334,6 +3404,46 @@ static int sctp_setsockopt_del_key(struct sock *sk, } +/* + * 8.1.23 SCTP_AUTO_ASCONF + * + * This option will enable or disable the use of the automatic generation of + * ASCONF chunks to add and delete addresses to an existing association. Note + * that this option has two caveats namely: a) it only affects sockets that + * are bound to all addresses available to the SCTP stack, and b) the system + * administrator may have an overriding control that turns the ASCONF feature + * off no matter what setting the socket option may have. + * This option expects an integer boolean flag, where a non-zero value turns on + * the option, and a zero value turns off the option. + * Note. In this implementation, socket operation overrides default parameter + * being set by sysctl as well as FreeBSD implementation + */ +static int sctp_setsockopt_auto_asconf(struct sock *sk, char __user *optval, + unsigned int optlen) +{ + int val; + struct sctp_sock *sp = sctp_sk(sk); + + if (optlen < sizeof(int)) + return -EINVAL; + if (get_user(val, (int __user *)optval)) + return -EFAULT; + if (!sctp_is_ep_boundall(sk) && val) + return -EINVAL; + if ((val && sp->do_auto_asconf) || (!val && !sp->do_auto_asconf)) + return 0; + + if (val == 0 && sp->do_auto_asconf) { + list_del(&sp->auto_asconf_list); + sp->do_auto_asconf = 0; + } else if (val && !sp->do_auto_asconf) { + list_add_tail(&sp->auto_asconf_list, + &sctp_auto_asconf_splist); + sp->do_auto_asconf = 1; + } + return 0; +} + /* API 6.2 setsockopt(), getsockopt() * @@ -3481,6 +3591,9 @@ SCTP_STATIC int sctp_setsockopt(struct sock *sk, int level, int optname, case SCTP_AUTH_DELETE_KEY: retval = sctp_setsockopt_del_key(sk, optval, optlen); break; + case SCTP_AUTO_ASCONF: + retval = sctp_setsockopt_auto_asconf(sk, optval, optlen); + break; default: retval = -ENOPROTOOPT; break; @@ -3763,6 +3876,12 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk) local_bh_disable(); percpu_counter_inc(&sctp_sockets_allocated); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); + if (sctp_default_auto_asconf) { + list_add_tail(&sp->auto_asconf_list, + &sctp_auto_asconf_splist); + sp->do_auto_asconf = 1; + } else + sp->do_auto_asconf = 0; local_bh_enable(); return 0; @@ -3771,13 +3890,17 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk) /* Cleanup any SCTP per socket resources. */ SCTP_STATIC void sctp_destroy_sock(struct sock *sk) { - struct sctp_endpoint *ep; + struct sctp_sock *sp; SCTP_DEBUG_PRINTK("sctp_destroy_sock(sk: %p)\n", sk); /* Release our hold on the endpoint. */ - ep = sctp_sk(sk)->ep; - sctp_endpoint_free(ep); + sp = sctp_sk(sk); + if (sp->do_auto_asconf) { + sp->do_auto_asconf = 0; + list_del(&sp->auto_asconf_list); + } + sctp_endpoint_free(sp->ep); local_bh_disable(); percpu_counter_dec(&sctp_sockets_allocated); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); @@ -5277,6 +5400,28 @@ static int sctp_getsockopt_assoc_number(struct sock *sk, int len, } /* + * 8.1.23 SCTP_AUTO_ASCONF + * See the corresponding setsockopt entry as description + */ +static int sctp_getsockopt_auto_asconf(struct sock *sk, int len, + char __user *optval, int __user *optlen) +{ + int val = 0; + + if (len < sizeof(int)) + return -EINVAL; + + len = sizeof(int); + if (sctp_sk(sk)->do_auto_asconf && sctp_is_ep_boundall(sk)) + val = 1; + if (put_user(len, optlen)) + return -EFAULT; + if (copy_to_user(optval, &val, len)) + return -EFAULT; + return 0; +} + +/* * 8.2.6. Get the Current Identifiers of Associations * (SCTP_GET_ASSOC_ID_LIST) * @@ -5460,6 +5605,9 @@ SCTP_STATIC int sctp_getsockopt(struct sock *sk, int level, int optname, case SCTP_GET_ASSOC_ID_LIST: retval = sctp_getsockopt_assoc_ids(sk, len, optval, optlen); break; + case SCTP_AUTO_ASCONF: + retval = sctp_getsockopt_auto_asconf(sk, len, optval, optlen); + break; default: retval = -ENOPROTOOPT; break; @@ -6512,6 +6660,7 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, struct sk_buff *skb, *tmp; struct sctp_ulpevent *event; struct sctp_bind_hashbucket *head; + struct list_head tmplist; /* Migrate socket buffer sizes and all the socket level options to the * new socket. @@ -6519,7 +6668,12 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, newsk->sk_sndbuf = oldsk->sk_sndbuf; newsk->sk_rcvbuf = oldsk->sk_rcvbuf; /* Brute force copy old sctp opt. */ - inet_sk_copy_descendant(newsk, oldsk); + if (oldsp->do_auto_asconf) { + memcpy(&tmplist, &newsp->auto_asconf_list, sizeof(tmplist)); + inet_sk_copy_descendant(newsk, oldsk); + memcpy(&newsp->auto_asconf_list, &tmplist, sizeof(tmplist)); + } else + inet_sk_copy_descendant(newsk, oldsk); /* Restore the ep value that was overwritten with the above structure * copy. diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index 50cb57f..6b39529 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -183,6 +183,13 @@ static ctl_table sctp_table[] = { .proc_handler = proc_dointvec, }, { + .procname = "default_auto_asconf", + .data = &sctp_default_auto_asconf, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { .procname = "prsctp_enable", .data = &sctp_prsctp_enable, .maxlen = sizeof(int), diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 67e3127..727e506 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -326,10 +326,12 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan) * Run memory cache shrinker. */ static int -rpcauth_cache_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask) +rpcauth_cache_shrinker(struct shrinker *shrink, struct shrink_control *sc) { LIST_HEAD(free); int res; + int nr_to_scan = sc->nr_to_scan; + gfp_t gfp_mask = sc->gfp_mask; if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL) return (nr_to_scan == 0) ? 0 : -1; @@ -624,7 +626,7 @@ rpcauth_refreshcred(struct rpc_task *task) if (err < 0) goto out; cred = task->tk_rqstp->rq_cred; - }; + } dprintk("RPC: %5u refreshing %s cred %p\n", task->tk_pid, cred->cr_auth->au_ops->au_name, cred); diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c index 0a9a2ec..c3b7533 100644 --- a/net/sunrpc/auth_gss/gss_krb5_mech.c +++ b/net/sunrpc/auth_gss/gss_krb5_mech.c @@ -43,6 +43,7 @@ #include <linux/sunrpc/gss_krb5.h> #include <linux/sunrpc/xdr.h> #include <linux/crypto.h> +#include <linux/sunrpc/gss_krb5_enctypes.h> #ifdef RPC_DEBUG # define RPCDBG_FACILITY RPCDBG_AUTH @@ -750,7 +751,7 @@ static struct gss_api_mech gss_kerberos_mech = { .gm_ops = &gss_kerberos_ops, .gm_pf_num = ARRAY_SIZE(gss_kerberos_pfs), .gm_pfs = gss_kerberos_pfs, - .gm_upcall_enctypes = "18,17,16,23,3,1,2", + .gm_upcall_enctypes = KRB5_SUPPORTED_ENCTYPES, }; static int __init init_kerberos_module(void) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 8d83f9d..b84d739 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -13,10 +13,6 @@ * and need to be refreshed, or when a packet was damaged in transit. * This may be have to be moved to the VFS layer. * - * NB: BSD uses a more intelligent approach to guessing when a request - * or reply has been lost by keeping the RTO estimate for each procedure. - * We currently make do with a constant timeout value. - * * Copyright (C) 1992,1993 Rick Sladkey <jrs@world.std.com> * Copyright (C) 1995,1996 Olaf Kirch <okir@monad.swb.de> */ @@ -32,7 +28,9 @@ #include <linux/slab.h> #include <linux/utsname.h> #include <linux/workqueue.h> +#include <linux/in.h> #include <linux/in6.h> +#include <linux/un.h> #include <linux/sunrpc/clnt.h> #include <linux/sunrpc/rpc_pipe_fs.h> @@ -298,22 +296,27 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) * up a string representation of the passed-in address. */ if (args->servername == NULL) { + struct sockaddr_un *sun = + (struct sockaddr_un *)args->address; + struct sockaddr_in *sin = + (struct sockaddr_in *)args->address; + struct sockaddr_in6 *sin6 = + (struct sockaddr_in6 *)args->address; + servername[0] = '\0'; switch (args->address->sa_family) { - case AF_INET: { - struct sockaddr_in *sin = - (struct sockaddr_in *)args->address; + case AF_LOCAL: + snprintf(servername, sizeof(servername), "%s", + sun->sun_path); + break; + case AF_INET: snprintf(servername, sizeof(servername), "%pI4", &sin->sin_addr.s_addr); break; - } - case AF_INET6: { - struct sockaddr_in6 *sin = - (struct sockaddr_in6 *)args->address; + case AF_INET6: snprintf(servername, sizeof(servername), "%pI6", - &sin->sin6_addr); + &sin6->sin6_addr); break; - } default: /* caller wants default server name, but * address family isn't recognized. */ diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index c652e4c..9a80a92 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -16,6 +16,7 @@ #include <linux/types.h> #include <linux/socket.h> +#include <linux/un.h> #include <linux/in.h> #include <linux/in6.h> #include <linux/kernel.h> @@ -32,6 +33,8 @@ # define RPCDBG_FACILITY RPCDBG_BIND #endif +#define RPCBIND_SOCK_PATHNAME "/var/run/rpcbind.sock" + #define RPCBIND_PROGRAM (100000u) #define RPCBIND_PORT (111u) @@ -158,20 +161,69 @@ static void rpcb_map_release(void *data) kfree(map); } -static const struct sockaddr_in rpcb_inaddr_loopback = { - .sin_family = AF_INET, - .sin_addr.s_addr = htonl(INADDR_LOOPBACK), - .sin_port = htons(RPCBIND_PORT), -}; +/* + * Returns zero on success, otherwise a negative errno value + * is returned. + */ +static int rpcb_create_local_unix(void) +{ + static const struct sockaddr_un rpcb_localaddr_rpcbind = { + .sun_family = AF_LOCAL, + .sun_path = RPCBIND_SOCK_PATHNAME, + }; + struct rpc_create_args args = { + .net = &init_net, + .protocol = XPRT_TRANSPORT_LOCAL, + .address = (struct sockaddr *)&rpcb_localaddr_rpcbind, + .addrsize = sizeof(rpcb_localaddr_rpcbind), + .servername = "localhost", + .program = &rpcb_program, + .version = RPCBVERS_2, + .authflavor = RPC_AUTH_NULL, + }; + struct rpc_clnt *clnt, *clnt4; + int result = 0; + + /* + * Because we requested an RPC PING at transport creation time, + * this works only if the user space portmapper is rpcbind, and + * it's listening on AF_LOCAL on the named socket. + */ + clnt = rpc_create(&args); + if (IS_ERR(clnt)) { + dprintk("RPC: failed to create AF_LOCAL rpcbind " + "client (errno %ld).\n", PTR_ERR(clnt)); + result = -PTR_ERR(clnt); + goto out; + } + + clnt4 = rpc_bind_new_program(clnt, &rpcb_program, RPCBVERS_4); + if (IS_ERR(clnt4)) { + dprintk("RPC: failed to bind second program to " + "rpcbind v4 client (errno %ld).\n", + PTR_ERR(clnt4)); + clnt4 = NULL; + } + + /* Protected by rpcb_create_local_mutex */ + rpcb_local_clnt = clnt; + rpcb_local_clnt4 = clnt4; -static DEFINE_MUTEX(rpcb_create_local_mutex); +out: + return result; +} /* * Returns zero on success, otherwise a negative errno value * is returned. */ -static int rpcb_create_local(void) +static int rpcb_create_local_net(void) { + static const struct sockaddr_in rpcb_inaddr_loopback = { + .sin_family = AF_INET, + .sin_addr.s_addr = htonl(INADDR_LOOPBACK), + .sin_port = htons(RPCBIND_PORT), + }; struct rpc_create_args args = { .net = &init_net, .protocol = XPRT_TRANSPORT_TCP, @@ -186,13 +238,6 @@ static int rpcb_create_local(void) struct rpc_clnt *clnt, *clnt4; int result = 0; - if (rpcb_local_clnt) - return result; - - mutex_lock(&rpcb_create_local_mutex); - if (rpcb_local_clnt) - goto out; - clnt = rpc_create(&args); if (IS_ERR(clnt)) { dprintk("RPC: failed to create local rpcbind " @@ -214,10 +259,34 @@ static int rpcb_create_local(void) clnt4 = NULL; } + /* Protected by rpcb_create_local_mutex */ rpcb_local_clnt = clnt; rpcb_local_clnt4 = clnt4; out: + return result; +} + +/* + * Returns zero on success, otherwise a negative errno value + * is returned. + */ +static int rpcb_create_local(void) +{ + static DEFINE_MUTEX(rpcb_create_local_mutex); + int result = 0; + + if (rpcb_local_clnt) + return result; + + mutex_lock(&rpcb_create_local_mutex); + if (rpcb_local_clnt) + goto out; + + if (rpcb_create_local_unix() != 0) + result = rpcb_create_local_net(); + +out: mutex_unlock(&rpcb_create_local_mutex); return result; } diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 08e05a8..2b90292 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -942,6 +942,8 @@ static void svc_unregister(const struct svc_serv *serv) if (progp->pg_vers[i]->vs_hidden) continue; + dprintk("svc: attempting to unregister %sv%u\n", + progp->pg_name, i); __svc_unregister(progp->pg_prog, i, progp->pg_name); } } diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index b7d435c..af04f77 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -387,6 +387,33 @@ static int svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr, return len; } +static int svc_partial_recvfrom(struct svc_rqst *rqstp, + struct kvec *iov, int nr, + int buflen, unsigned int base) +{ + size_t save_iovlen; + void __user *save_iovbase; + unsigned int i; + int ret; + + if (base == 0) + return svc_recvfrom(rqstp, iov, nr, buflen); + + for (i = 0; i < nr; i++) { + if (iov[i].iov_len > base) + break; + base -= iov[i].iov_len; + } + save_iovlen = iov[i].iov_len; + save_iovbase = iov[i].iov_base; + iov[i].iov_len -= base; + iov[i].iov_base += base; + ret = svc_recvfrom(rqstp, &iov[i], nr - i, buflen); + iov[i].iov_len = save_iovlen; + iov[i].iov_base = save_iovbase; + return ret; +} + /* * Set socket snd and rcv buffer lengths */ @@ -409,7 +436,6 @@ static void svc_sock_setbufsize(struct socket *sock, unsigned int snd, lock_sock(sock->sk); sock->sk->sk_sndbuf = snd * 2; sock->sk->sk_rcvbuf = rcv * 2; - sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK|SOCK_RCVBUF_LOCK; sock->sk->sk_write_space(sock->sk); release_sock(sock->sk); #endif @@ -884,6 +910,56 @@ failed: return NULL; } +static unsigned int svc_tcp_restore_pages(struct svc_sock *svsk, struct svc_rqst *rqstp) +{ + unsigned int i, len, npages; + + if (svsk->sk_tcplen <= sizeof(rpc_fraghdr)) + return 0; + len = svsk->sk_tcplen - sizeof(rpc_fraghdr); + npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; + for (i = 0; i < npages; i++) { + if (rqstp->rq_pages[i] != NULL) + put_page(rqstp->rq_pages[i]); + BUG_ON(svsk->sk_pages[i] == NULL); + rqstp->rq_pages[i] = svsk->sk_pages[i]; + svsk->sk_pages[i] = NULL; + } + rqstp->rq_arg.head[0].iov_base = page_address(rqstp->rq_pages[0]); + return len; +} + +static void svc_tcp_save_pages(struct svc_sock *svsk, struct svc_rqst *rqstp) +{ + unsigned int i, len, npages; + + if (svsk->sk_tcplen <= sizeof(rpc_fraghdr)) + return; + len = svsk->sk_tcplen - sizeof(rpc_fraghdr); + npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; + for (i = 0; i < npages; i++) { + svsk->sk_pages[i] = rqstp->rq_pages[i]; + rqstp->rq_pages[i] = NULL; + } +} + +static void svc_tcp_clear_pages(struct svc_sock *svsk) +{ + unsigned int i, len, npages; + + if (svsk->sk_tcplen <= sizeof(rpc_fraghdr)) + goto out; + len = svsk->sk_tcplen - sizeof(rpc_fraghdr); + npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; + for (i = 0; i < npages; i++) { + BUG_ON(svsk->sk_pages[i] == NULL); + put_page(svsk->sk_pages[i]); + svsk->sk_pages[i] = NULL; + } +out: + svsk->sk_tcplen = 0; +} + /* * Receive data. * If we haven't gotten the record length yet, get the next four bytes. @@ -893,31 +969,15 @@ failed: static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp) { struct svc_serv *serv = svsk->sk_xprt.xpt_server; + unsigned int want; int len; - if (test_and_clear_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags)) - /* sndbuf needs to have room for one request - * per thread, otherwise we can stall even when the - * network isn't a bottleneck. - * - * We count all threads rather than threads in a - * particular pool, which provides an upper bound - * on the number of threads which will access the socket. - * - * rcvbuf just needs to be able to hold a few requests. - * Normally they will be removed from the queue - * as soon a a complete request arrives. - */ - svc_sock_setbufsize(svsk->sk_sock, - (serv->sv_nrthreads+3) * serv->sv_max_mesg, - 3 * serv->sv_max_mesg); - clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); if (svsk->sk_tcplen < sizeof(rpc_fraghdr)) { - int want = sizeof(rpc_fraghdr) - svsk->sk_tcplen; struct kvec iov; + want = sizeof(rpc_fraghdr) - svsk->sk_tcplen; iov.iov_base = ((char *) &svsk->sk_reclen) + svsk->sk_tcplen; iov.iov_len = want; if ((len = svc_recvfrom(rqstp, &iov, 1, want)) < 0) @@ -927,7 +987,7 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp) if (len < want) { dprintk("svc: short recvfrom while reading record " "length (%d of %d)\n", len, want); - goto err_again; /* record header not complete */ + return -EAGAIN; } svsk->sk_reclen = ntohl(svsk->sk_reclen); @@ -954,83 +1014,75 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp) } } - /* Check whether enough data is available */ - len = svc_recv_available(svsk); - if (len < 0) - goto error; + if (svsk->sk_reclen < 8) + goto err_delete; /* client is nuts. */ - if (len < svsk->sk_reclen) { - dprintk("svc: incomplete TCP record (%d of %d)\n", - len, svsk->sk_reclen); - goto err_again; /* record not complete */ - } len = svsk->sk_reclen; - set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); return len; - error: - if (len == -EAGAIN) - dprintk("RPC: TCP recv_record got EAGAIN\n"); +error: + dprintk("RPC: TCP recv_record got %d\n", len); return len; - err_delete: +err_delete: set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); - err_again: return -EAGAIN; } -static int svc_process_calldir(struct svc_sock *svsk, struct svc_rqst *rqstp, - struct rpc_rqst **reqpp, struct kvec *vec) +static int receive_cb_reply(struct svc_sock *svsk, struct svc_rqst *rqstp) { + struct rpc_xprt *bc_xprt = svsk->sk_xprt.xpt_bc_xprt; struct rpc_rqst *req = NULL; - u32 *p; - u32 xid; - u32 calldir; - int len; - - len = svc_recvfrom(rqstp, vec, 1, 8); - if (len < 0) - goto error; + struct kvec *src, *dst; + __be32 *p = (__be32 *)rqstp->rq_arg.head[0].iov_base; + __be32 xid; + __be32 calldir; - p = (u32 *)rqstp->rq_arg.head[0].iov_base; xid = *p++; calldir = *p; - if (calldir == 0) { - /* REQUEST is the most common case */ - vec[0] = rqstp->rq_arg.head[0]; - } else { - /* REPLY */ - struct rpc_xprt *bc_xprt = svsk->sk_xprt.xpt_bc_xprt; - - if (bc_xprt) - req = xprt_lookup_rqst(bc_xprt, xid); - - if (!req) { - printk(KERN_NOTICE - "%s: Got unrecognized reply: " - "calldir 0x%x xpt_bc_xprt %p xid %08x\n", - __func__, ntohl(calldir), - bc_xprt, xid); - vec[0] = rqstp->rq_arg.head[0]; - goto out; - } + if (bc_xprt) + req = xprt_lookup_rqst(bc_xprt, xid); - memcpy(&req->rq_private_buf, &req->rq_rcv_buf, - sizeof(struct xdr_buf)); - /* copy the xid and call direction */ - memcpy(req->rq_private_buf.head[0].iov_base, - rqstp->rq_arg.head[0].iov_base, 8); - vec[0] = req->rq_private_buf.head[0]; + if (!req) { + printk(KERN_NOTICE + "%s: Got unrecognized reply: " + "calldir 0x%x xpt_bc_xprt %p xid %08x\n", + __func__, ntohl(calldir), + bc_xprt, xid); + return -EAGAIN; } - out: - vec[0].iov_base += 8; - vec[0].iov_len -= 8; - len = svsk->sk_reclen - 8; - error: - *reqpp = req; - return len; + + memcpy(&req->rq_private_buf, &req->rq_rcv_buf, sizeof(struct xdr_buf)); + /* + * XXX!: cheating for now! Only copying HEAD. + * But we know this is good enough for now (in fact, for any + * callback reply in the forseeable future). + */ + dst = &req->rq_private_buf.head[0]; + src = &rqstp->rq_arg.head[0]; + if (dst->iov_len < src->iov_len) + return -EAGAIN; /* whatever; just giving up. */ + memcpy(dst->iov_base, src->iov_base, src->iov_len); + xprt_complete_rqst(req->rq_task, svsk->sk_reclen); + rqstp->rq_arg.len = 0; + return 0; } +static int copy_pages_to_kvecs(struct kvec *vec, struct page **pages, int len) +{ + int i = 0; + int t = 0; + + while (t < len) { + vec[i].iov_base = page_address(pages[i]); + vec[i].iov_len = PAGE_SIZE; + i++; + t += PAGE_SIZE; + } + return i; +} + + /* * Receive data from a TCP socket. */ @@ -1041,8 +1093,10 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) struct svc_serv *serv = svsk->sk_xprt.xpt_server; int len; struct kvec *vec; - int pnum, vlen; - struct rpc_rqst *req = NULL; + unsigned int want, base; + __be32 *p; + __be32 calldir; + int pnum; dprintk("svc: tcp_recv %p data %d conn %d close %d\n", svsk, test_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags), @@ -1053,87 +1107,73 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) if (len < 0) goto error; + base = svc_tcp_restore_pages(svsk, rqstp); + want = svsk->sk_reclen - base; + vec = rqstp->rq_vec; - vec[0] = rqstp->rq_arg.head[0]; - vlen = PAGE_SIZE; - /* - * We have enough data for the whole tcp record. Let's try and read the - * first 8 bytes to get the xid and the call direction. We can use this - * to figure out if this is a call or a reply to a callback. If - * sk_reclen is < 8 (xid and calldir), then this is a malformed packet. - * In that case, don't bother with the calldir and just read the data. - * It will be rejected in svc_process. - */ - if (len >= 8) { - len = svc_process_calldir(svsk, rqstp, &req, vec); - if (len < 0) - goto err_again; - vlen -= 8; - } + pnum = copy_pages_to_kvecs(&vec[0], &rqstp->rq_pages[0], + svsk->sk_reclen); - pnum = 1; - while (vlen < len) { - vec[pnum].iov_base = (req) ? - page_address(req->rq_private_buf.pages[pnum - 1]) : - page_address(rqstp->rq_pages[pnum]); - vec[pnum].iov_len = PAGE_SIZE; - pnum++; - vlen += PAGE_SIZE; - } rqstp->rq_respages = &rqstp->rq_pages[pnum]; /* Now receive data */ - len = svc_recvfrom(rqstp, vec, pnum, len); - if (len < 0) - goto err_again; - - /* - * Account for the 8 bytes we read earlier - */ - len += 8; - - if (req) { - xprt_complete_rqst(req->rq_task, len); - len = 0; - goto out; + len = svc_partial_recvfrom(rqstp, vec, pnum, want, base); + if (len >= 0) + svsk->sk_tcplen += len; + if (len != want) { + if (len < 0 && len != -EAGAIN) + goto err_other; + svc_tcp_save_pages(svsk, rqstp); + dprintk("svc: incomplete TCP record (%d of %d)\n", + svsk->sk_tcplen, svsk->sk_reclen); + goto err_noclose; } - dprintk("svc: TCP complete record (%d bytes)\n", len); - rqstp->rq_arg.len = len; + + rqstp->rq_arg.len = svsk->sk_reclen; rqstp->rq_arg.page_base = 0; - if (len <= rqstp->rq_arg.head[0].iov_len) { - rqstp->rq_arg.head[0].iov_len = len; + if (rqstp->rq_arg.len <= rqstp->rq_arg.head[0].iov_len) { + rqstp->rq_arg.head[0].iov_len = rqstp->rq_arg.len; rqstp->rq_arg.page_len = 0; - } else { - rqstp->rq_arg.page_len = len - rqstp->rq_arg.head[0].iov_len; - } + } else + rqstp->rq_arg.page_len = rqstp->rq_arg.len - rqstp->rq_arg.head[0].iov_len; rqstp->rq_xprt_ctxt = NULL; rqstp->rq_prot = IPPROTO_TCP; -out: + p = (__be32 *)rqstp->rq_arg.head[0].iov_base; + calldir = p[1]; + if (calldir) + len = receive_cb_reply(svsk, rqstp); + /* Reset TCP read info */ svsk->sk_reclen = 0; svsk->sk_tcplen = 0; + /* If we have more data, signal svc_xprt_enqueue() to try again */ + if (svc_recv_available(svsk) > sizeof(rpc_fraghdr)) + set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); + + if (len < 0) + goto error; svc_xprt_copy_addrs(rqstp, &svsk->sk_xprt); if (serv->sv_stats) serv->sv_stats->nettcpcnt++; - return len; + dprintk("svc: TCP complete record (%d bytes)\n", rqstp->rq_arg.len); + return rqstp->rq_arg.len; -err_again: - if (len == -EAGAIN) { - dprintk("RPC: TCP recvfrom got EAGAIN\n"); - return len; - } error: - if (len != -EAGAIN) { - printk(KERN_NOTICE "%s: recvfrom returned errno %d\n", - svsk->sk_xprt.xpt_server->sv_name, -len); - set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); - } + if (len != -EAGAIN) + goto err_other; + dprintk("RPC: TCP recvfrom got EAGAIN\n"); return -EAGAIN; +err_other: + printk(KERN_NOTICE "%s: recvfrom returned errno %d\n", + svsk->sk_xprt.xpt_server->sv_name, -len); + set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); +err_noclose: + return -EAGAIN; /* record not complete */ } /* @@ -1304,18 +1344,10 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv) svsk->sk_reclen = 0; svsk->sk_tcplen = 0; + memset(&svsk->sk_pages[0], 0, sizeof(svsk->sk_pages)); tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF; - /* initialise setting must have enough space to - * receive and respond to one request. - * svc_tcp_recvfrom will re-adjust if necessary - */ - svc_sock_setbufsize(svsk->sk_sock, - 3 * svsk->sk_xprt.xpt_server->sv_max_mesg, - 3 * svsk->sk_xprt.xpt_server->sv_max_mesg); - - set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags); set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); if (sk->sk_state != TCP_ESTABLISHED) set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); @@ -1379,8 +1411,14 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, /* Initialize the socket */ if (sock->type == SOCK_DGRAM) svc_udp_init(svsk, serv); - else + else { + /* initialise setting must have enough space to + * receive and respond to one request. + */ + svc_sock_setbufsize(svsk->sk_sock, 4 * serv->sv_max_mesg, + 4 * serv->sv_max_mesg); svc_tcp_init(svsk, serv); + } dprintk("svc: svc_setup_socket created %p (inet %p)\n", svsk, svsk->sk_sk); @@ -1562,8 +1600,10 @@ static void svc_tcp_sock_detach(struct svc_xprt *xprt) svc_sock_detach(xprt); - if (!test_bit(XPT_LISTENER, &xprt->xpt_flags)) + if (!test_bit(XPT_LISTENER, &xprt->xpt_flags)) { + svc_tcp_clear_pages(svsk); kernel_sock_shutdown(svsk->sk_sock, SHUT_RDWR); + } } /* diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 679cd67..f008c14 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -638,6 +638,25 @@ void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p) } EXPORT_SYMBOL_GPL(xdr_init_decode); +/** + * xdr_init_decode - Initialize an xdr_stream for decoding data. + * @xdr: pointer to xdr_stream struct + * @buf: pointer to XDR buffer from which to decode data + * @pages: list of pages to decode into + * @len: length in bytes of buffer in pages + */ +void xdr_init_decode_pages(struct xdr_stream *xdr, struct xdr_buf *buf, + struct page **pages, unsigned int len) +{ + memset(buf, 0, sizeof(*buf)); + buf->pages = pages; + buf->page_len = len; + buf->buflen = len; + buf->len = len; + xdr_init_decode(xdr, buf, NULL); +} +EXPORT_SYMBOL_GPL(xdr_init_decode_pages); + static __be32 * __xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes) { __be32 *p = xdr->p; diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 6c014dd..a385430 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -42,6 +42,7 @@ #include <linux/sunrpc/svc_xprt.h> #include <linux/sunrpc/debug.h> #include <linux/sunrpc/rpc_rdma.h> +#include <linux/interrupt.h> #include <linux/sched.h> #include <linux/slab.h> #include <linux/spinlock.h> @@ -695,7 +696,8 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, return ERR_PTR(-ENOMEM); xprt = &cma_xprt->sc_xprt; - listen_id = rdma_create_id(rdma_listen_handler, cma_xprt, RDMA_PS_TCP); + listen_id = rdma_create_id(rdma_listen_handler, cma_xprt, RDMA_PS_TCP, + IB_QPT_RC); if (IS_ERR(listen_id)) { ret = PTR_ERR(listen_id); dprintk("svcrdma: rdma_create_id failed = %d\n", ret); diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index d4297dc..28236ba 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -47,6 +47,7 @@ * o buffer memory */ +#include <linux/interrupt.h> #include <linux/pci.h> /* for Tavor hack below */ #include <linux/slab.h> @@ -387,7 +388,7 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt, init_completion(&ia->ri_done); - id = rdma_create_id(rpcrdma_conn_upcall, xprt, RDMA_PS_TCP); + id = rdma_create_id(rpcrdma_conn_upcall, xprt, RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(id)) { rc = PTR_ERR(id); dprintk("RPC: %s: rdma_create_id() failed %i\n", diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index bf005d3..72abb73 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -19,6 +19,7 @@ */ #include <linux/types.h> +#include <linux/string.h> #include <linux/slab.h> #include <linux/module.h> #include <linux/capability.h> @@ -28,6 +29,7 @@ #include <linux/in.h> #include <linux/net.h> #include <linux/mm.h> +#include <linux/un.h> #include <linux/udp.h> #include <linux/tcp.h> #include <linux/sunrpc/clnt.h> @@ -45,6 +47,9 @@ #include <net/tcp.h> #include "sunrpc.h" + +static void xs_close(struct rpc_xprt *xprt); + /* * xprtsock tunables */ @@ -261,6 +266,11 @@ static inline struct sockaddr *xs_addr(struct rpc_xprt *xprt) return (struct sockaddr *) &xprt->addr; } +static inline struct sockaddr_un *xs_addr_un(struct rpc_xprt *xprt) +{ + return (struct sockaddr_un *) &xprt->addr; +} + static inline struct sockaddr_in *xs_addr_in(struct rpc_xprt *xprt) { return (struct sockaddr_in *) &xprt->addr; @@ -276,23 +286,34 @@ static void xs_format_common_peer_addresses(struct rpc_xprt *xprt) struct sockaddr *sap = xs_addr(xprt); struct sockaddr_in6 *sin6; struct sockaddr_in *sin; + struct sockaddr_un *sun; char buf[128]; - (void)rpc_ntop(sap, buf, sizeof(buf)); - xprt->address_strings[RPC_DISPLAY_ADDR] = kstrdup(buf, GFP_KERNEL); - switch (sap->sa_family) { + case AF_LOCAL: + sun = xs_addr_un(xprt); + strlcpy(buf, sun->sun_path, sizeof(buf)); + xprt->address_strings[RPC_DISPLAY_ADDR] = + kstrdup(buf, GFP_KERNEL); + break; case AF_INET: + (void)rpc_ntop(sap, buf, sizeof(buf)); + xprt->address_strings[RPC_DISPLAY_ADDR] = + kstrdup(buf, GFP_KERNEL); sin = xs_addr_in(xprt); snprintf(buf, sizeof(buf), "%08x", ntohl(sin->sin_addr.s_addr)); break; case AF_INET6: + (void)rpc_ntop(sap, buf, sizeof(buf)); + xprt->address_strings[RPC_DISPLAY_ADDR] = + kstrdup(buf, GFP_KERNEL); sin6 = xs_addr_in6(xprt); snprintf(buf, sizeof(buf), "%pi6", &sin6->sin6_addr); break; default: BUG(); } + xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL); } @@ -495,6 +516,70 @@ static int xs_nospace(struct rpc_task *task) return ret; } +/* + * Construct a stream transport record marker in @buf. + */ +static inline void xs_encode_stream_record_marker(struct xdr_buf *buf) +{ + u32 reclen = buf->len - sizeof(rpc_fraghdr); + rpc_fraghdr *base = buf->head[0].iov_base; + *base = cpu_to_be32(RPC_LAST_STREAM_FRAGMENT | reclen); +} + +/** + * xs_local_send_request - write an RPC request to an AF_LOCAL socket + * @task: RPC task that manages the state of an RPC request + * + * Return values: + * 0: The request has been sent + * EAGAIN: The socket was blocked, please call again later to + * complete the request + * ENOTCONN: Caller needs to invoke connect logic then call again + * other: Some other error occured, the request was not sent + */ +static int xs_local_send_request(struct rpc_task *task) +{ + struct rpc_rqst *req = task->tk_rqstp; + struct rpc_xprt *xprt = req->rq_xprt; + struct sock_xprt *transport = + container_of(xprt, struct sock_xprt, xprt); + struct xdr_buf *xdr = &req->rq_snd_buf; + int status; + + xs_encode_stream_record_marker(&req->rq_snd_buf); + + xs_pktdump("packet data:", + req->rq_svec->iov_base, req->rq_svec->iov_len); + + status = xs_sendpages(transport->sock, NULL, 0, + xdr, req->rq_bytes_sent); + dprintk("RPC: %s(%u) = %d\n", + __func__, xdr->len - req->rq_bytes_sent, status); + if (likely(status >= 0)) { + req->rq_bytes_sent += status; + req->rq_xmit_bytes_sent += status; + if (likely(req->rq_bytes_sent >= req->rq_slen)) { + req->rq_bytes_sent = 0; + return 0; + } + status = -EAGAIN; + } + + switch (status) { + case -EAGAIN: + status = xs_nospace(task); + break; + default: + dprintk("RPC: sendmsg returned unrecognized error %d\n", + -status); + case -EPIPE: + xs_close(xprt); + status = -ENOTCONN; + } + + return status; +} + /** * xs_udp_send_request - write an RPC request to a UDP socket * @task: address of RPC task that manages the state of an RPC request @@ -574,13 +659,6 @@ static void xs_tcp_shutdown(struct rpc_xprt *xprt) kernel_sock_shutdown(sock, SHUT_WR); } -static inline void xs_encode_tcp_record_marker(struct xdr_buf *buf) -{ - u32 reclen = buf->len - sizeof(rpc_fraghdr); - rpc_fraghdr *base = buf->head[0].iov_base; - *base = htonl(RPC_LAST_STREAM_FRAGMENT | reclen); -} - /** * xs_tcp_send_request - write an RPC request to a TCP socket * @task: address of RPC task that manages the state of an RPC request @@ -603,7 +681,7 @@ static int xs_tcp_send_request(struct rpc_task *task) struct xdr_buf *xdr = &req->rq_snd_buf; int status; - xs_encode_tcp_record_marker(&req->rq_snd_buf); + xs_encode_stream_record_marker(&req->rq_snd_buf); xs_pktdump("packet data:", req->rq_svec->iov_base, @@ -785,6 +863,88 @@ static inline struct rpc_xprt *xprt_from_sock(struct sock *sk) return (struct rpc_xprt *) sk->sk_user_data; } +static int xs_local_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb) +{ + struct xdr_skb_reader desc = { + .skb = skb, + .offset = sizeof(rpc_fraghdr), + .count = skb->len - sizeof(rpc_fraghdr), + }; + + if (xdr_partial_copy_from_skb(xdr, 0, &desc, xdr_skb_read_bits) < 0) + return -1; + if (desc.count) + return -1; + return 0; +} + +/** + * xs_local_data_ready - "data ready" callback for AF_LOCAL sockets + * @sk: socket with data to read + * @len: how much data to read + * + * Currently this assumes we can read the whole reply in a single gulp. + */ +static void xs_local_data_ready(struct sock *sk, int len) +{ + struct rpc_task *task; + struct rpc_xprt *xprt; + struct rpc_rqst *rovr; + struct sk_buff *skb; + int err, repsize, copied; + u32 _xid; + __be32 *xp; + + read_lock_bh(&sk->sk_callback_lock); + dprintk("RPC: %s...\n", __func__); + xprt = xprt_from_sock(sk); + if (xprt == NULL) + goto out; + + skb = skb_recv_datagram(sk, 0, 1, &err); + if (skb == NULL) + goto out; + + if (xprt->shutdown) + goto dropit; + + repsize = skb->len - sizeof(rpc_fraghdr); + if (repsize < 4) { + dprintk("RPC: impossible RPC reply size %d\n", repsize); + goto dropit; + } + + /* Copy the XID from the skb... */ + xp = skb_header_pointer(skb, sizeof(rpc_fraghdr), sizeof(_xid), &_xid); + if (xp == NULL) + goto dropit; + + /* Look up and lock the request corresponding to the given XID */ + spin_lock(&xprt->transport_lock); + rovr = xprt_lookup_rqst(xprt, *xp); + if (!rovr) + goto out_unlock; + task = rovr->rq_task; + + copied = rovr->rq_private_buf.buflen; + if (copied > repsize) + copied = repsize; + + if (xs_local_copy_to_xdr(&rovr->rq_private_buf, skb)) { + dprintk("RPC: sk_buff copy failed\n"); + goto out_unlock; + } + + xprt_complete_rqst(task, copied); + + out_unlock: + spin_unlock(&xprt->transport_lock); + dropit: + skb_free_datagram(sk, skb); + out: + read_unlock_bh(&sk->sk_callback_lock); +} + /** * xs_udp_data_ready - "data ready" callback for UDP sockets * @sk: socket with data to read @@ -1344,7 +1504,6 @@ static void xs_tcp_state_change(struct sock *sk) case TCP_CLOSE_WAIT: /* The server initiated a shutdown of the socket */ xprt_force_disconnect(xprt); - case TCP_SYN_SENT: xprt->connect_cookie++; case TCP_CLOSING: /* @@ -1571,11 +1730,31 @@ static int xs_bind(struct sock_xprt *transport, struct socket *sock) return err; } +/* + * We don't support autobind on AF_LOCAL sockets + */ +static void xs_local_rpcbind(struct rpc_task *task) +{ + xprt_set_bound(task->tk_xprt); +} + +static void xs_local_set_port(struct rpc_xprt *xprt, unsigned short port) +{ +} #ifdef CONFIG_DEBUG_LOCK_ALLOC static struct lock_class_key xs_key[2]; static struct lock_class_key xs_slock_key[2]; +static inline void xs_reclassify_socketu(struct socket *sock) +{ + struct sock *sk = sock->sk; + + BUG_ON(sock_owned_by_user(sk)); + sock_lock_init_class_and_name(sk, "slock-AF_LOCAL-RPC", + &xs_slock_key[1], "sk_lock-AF_LOCAL-RPC", &xs_key[1]); +} + static inline void xs_reclassify_socket4(struct socket *sock) { struct sock *sk = sock->sk; @@ -1597,6 +1776,9 @@ static inline void xs_reclassify_socket6(struct socket *sock) static inline void xs_reclassify_socket(int family, struct socket *sock) { switch (family) { + case AF_LOCAL: + xs_reclassify_socketu(sock); + break; case AF_INET: xs_reclassify_socket4(sock); break; @@ -1606,6 +1788,10 @@ static inline void xs_reclassify_socket(int family, struct socket *sock) } } #else +static inline void xs_reclassify_socketu(struct socket *sock) +{ +} + static inline void xs_reclassify_socket4(struct socket *sock) { } @@ -1644,6 +1830,94 @@ out: return ERR_PTR(err); } +static int xs_local_finish_connecting(struct rpc_xprt *xprt, + struct socket *sock) +{ + struct sock_xprt *transport = container_of(xprt, struct sock_xprt, + xprt); + + if (!transport->inet) { + struct sock *sk = sock->sk; + + write_lock_bh(&sk->sk_callback_lock); + + xs_save_old_callbacks(transport, sk); + + sk->sk_user_data = xprt; + sk->sk_data_ready = xs_local_data_ready; + sk->sk_write_space = xs_udp_write_space; + sk->sk_error_report = xs_error_report; + sk->sk_allocation = GFP_ATOMIC; + + xprt_clear_connected(xprt); + + /* Reset to new socket */ + transport->sock = sock; + transport->inet = sk; + + write_unlock_bh(&sk->sk_callback_lock); + } + + /* Tell the socket layer to start connecting... */ + xprt->stat.connect_count++; + xprt->stat.connect_start = jiffies; + return kernel_connect(sock, xs_addr(xprt), xprt->addrlen, 0); +} + +/** + * xs_local_setup_socket - create AF_LOCAL socket, connect to a local endpoint + * @xprt: RPC transport to connect + * @transport: socket transport to connect + * @create_sock: function to create a socket of the correct type + * + * Invoked by a work queue tasklet. + */ +static void xs_local_setup_socket(struct work_struct *work) +{ + struct sock_xprt *transport = + container_of(work, struct sock_xprt, connect_worker.work); + struct rpc_xprt *xprt = &transport->xprt; + struct socket *sock; + int status = -EIO; + + if (xprt->shutdown) + goto out; + + clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); + status = __sock_create(xprt->xprt_net, AF_LOCAL, + SOCK_STREAM, 0, &sock, 1); + if (status < 0) { + dprintk("RPC: can't create AF_LOCAL " + "transport socket (%d).\n", -status); + goto out; + } + xs_reclassify_socketu(sock); + + dprintk("RPC: worker connecting xprt %p via AF_LOCAL to %s\n", + xprt, xprt->address_strings[RPC_DISPLAY_ADDR]); + + status = xs_local_finish_connecting(xprt, sock); + switch (status) { + case 0: + dprintk("RPC: xprt %p connected to %s\n", + xprt, xprt->address_strings[RPC_DISPLAY_ADDR]); + xprt_set_connected(xprt); + break; + case -ENOENT: + dprintk("RPC: xprt %p: socket %s does not exist\n", + xprt, xprt->address_strings[RPC_DISPLAY_ADDR]); + break; + default: + printk(KERN_ERR "%s: unhandled error (%d) connecting to %s\n", + __func__, -status, + xprt->address_strings[RPC_DISPLAY_ADDR]); + } + +out: + xprt_clear_connecting(xprt); + xprt_wake_pending_tasks(xprt, status); +} + static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) { struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); @@ -1758,6 +2032,7 @@ static void xs_tcp_reuse_connection(struct sock_xprt *transport) static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) { struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); + int ret = -ENOTCONN; if (!transport->inet) { struct sock *sk = sock->sk; @@ -1789,12 +2064,22 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) } if (!xprt_bound(xprt)) - return -ENOTCONN; + goto out; /* Tell the socket layer to start connecting... */ xprt->stat.connect_count++; xprt->stat.connect_start = jiffies; - return kernel_connect(sock, xs_addr(xprt), xprt->addrlen, O_NONBLOCK); + ret = kernel_connect(sock, xs_addr(xprt), xprt->addrlen, O_NONBLOCK); + switch (ret) { + case 0: + case -EINPROGRESS: + /* SYN_SENT! */ + xprt->connect_cookie++; + if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO) + xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; + } +out: + return ret; } /** @@ -1917,6 +2202,32 @@ static void xs_connect(struct rpc_task *task) } /** + * xs_local_print_stats - display AF_LOCAL socket-specifc stats + * @xprt: rpc_xprt struct containing statistics + * @seq: output file + * + */ +static void xs_local_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) +{ + long idle_time = 0; + + if (xprt_connected(xprt)) + idle_time = (long)(jiffies - xprt->last_used) / HZ; + + seq_printf(seq, "\txprt:\tlocal %lu %lu %lu %ld %lu %lu %lu " + "%llu %llu\n", + xprt->stat.bind_count, + xprt->stat.connect_count, + xprt->stat.connect_time, + idle_time, + xprt->stat.sends, + xprt->stat.recvs, + xprt->stat.bad_xids, + xprt->stat.req_u, + xprt->stat.bklog_u); +} + +/** * xs_udp_print_stats - display UDP socket-specifc stats * @xprt: rpc_xprt struct containing statistics * @seq: output file @@ -2014,10 +2325,7 @@ static int bc_sendto(struct rpc_rqst *req) unsigned long headoff; unsigned long tailoff; - /* - * Set up the rpc header and record marker stuff - */ - xs_encode_tcp_record_marker(xbufp); + xs_encode_stream_record_marker(xbufp); tailoff = (unsigned long)xbufp->tail[0].iov_base & ~PAGE_MASK; headoff = (unsigned long)xbufp->head[0].iov_base & ~PAGE_MASK; @@ -2089,6 +2397,21 @@ static void bc_destroy(struct rpc_xprt *xprt) { } +static struct rpc_xprt_ops xs_local_ops = { + .reserve_xprt = xprt_reserve_xprt, + .release_xprt = xs_tcp_release_xprt, + .rpcbind = xs_local_rpcbind, + .set_port = xs_local_set_port, + .connect = xs_connect, + .buf_alloc = rpc_malloc, + .buf_free = rpc_free, + .send_request = xs_local_send_request, + .set_retrans_timeout = xprt_set_retrans_timeout_def, + .close = xs_close, + .destroy = xs_destroy, + .print_stats = xs_local_print_stats, +}; + static struct rpc_xprt_ops xs_udp_ops = { .set_buffer_size = xs_udp_set_buffer_size, .reserve_xprt = xprt_reserve_xprt_cong, @@ -2150,6 +2473,8 @@ static int xs_init_anyaddr(const int family, struct sockaddr *sap) }; switch (family) { + case AF_LOCAL: + break; case AF_INET: memcpy(sap, &sin, sizeof(sin)); break; @@ -2197,6 +2522,70 @@ static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args, return xprt; } +static const struct rpc_timeout xs_local_default_timeout = { + .to_initval = 10 * HZ, + .to_maxval = 10 * HZ, + .to_retries = 2, +}; + +/** + * xs_setup_local - Set up transport to use an AF_LOCAL socket + * @args: rpc transport creation arguments + * + * AF_LOCAL is a "tpi_cots_ord" transport, just like TCP + */ +static struct rpc_xprt *xs_setup_local(struct xprt_create *args) +{ + struct sockaddr_un *sun = (struct sockaddr_un *)args->dstaddr; + struct sock_xprt *transport; + struct rpc_xprt *xprt; + struct rpc_xprt *ret; + + xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries); + if (IS_ERR(xprt)) + return xprt; + transport = container_of(xprt, struct sock_xprt, xprt); + + xprt->prot = 0; + xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32); + xprt->max_payload = RPC_MAX_FRAGMENT_SIZE; + + xprt->bind_timeout = XS_BIND_TO; + xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; + xprt->idle_timeout = XS_IDLE_DISC_TO; + + xprt->ops = &xs_local_ops; + xprt->timeout = &xs_local_default_timeout; + + switch (sun->sun_family) { + case AF_LOCAL: + if (sun->sun_path[0] != '/') { + dprintk("RPC: bad AF_LOCAL address: %s\n", + sun->sun_path); + ret = ERR_PTR(-EINVAL); + goto out_err; + } + xprt_set_bound(xprt); + INIT_DELAYED_WORK(&transport->connect_worker, + xs_local_setup_socket); + xs_format_peer_addresses(xprt, "local", RPCBIND_NETID_LOCAL); + break; + default: + ret = ERR_PTR(-EAFNOSUPPORT); + goto out_err; + } + + dprintk("RPC: set up xprt to %s via AF_LOCAL\n", + xprt->address_strings[RPC_DISPLAY_ADDR]); + + if (try_module_get(THIS_MODULE)) + return xprt; + ret = ERR_PTR(-EINVAL); +out_err: + xprt_free(xprt); + return ret; +} + static const struct rpc_timeout xs_udp_default_timeout = { .to_initval = 5 * HZ, .to_maxval = 30 * HZ, @@ -2438,6 +2827,14 @@ out_err: return ret; } +static struct xprt_class xs_local_transport = { + .list = LIST_HEAD_INIT(xs_local_transport.list), + .name = "named UNIX socket", + .owner = THIS_MODULE, + .ident = XPRT_TRANSPORT_LOCAL, + .setup = xs_setup_local, +}; + static struct xprt_class xs_udp_transport = { .list = LIST_HEAD_INIT(xs_udp_transport.list), .name = "udp", @@ -2473,6 +2870,7 @@ int init_socket_xprt(void) sunrpc_table_header = register_sysctl_table(sunrpc_table); #endif + xprt_register_transport(&xs_local_transport); xprt_register_transport(&xs_udp_transport); xprt_register_transport(&xs_tcp_transport); xprt_register_transport(&xs_bc_tcp_transport); @@ -2493,6 +2891,7 @@ void cleanup_socket_xprt(void) } #endif + xprt_unregister_transport(&xs_local_transport); xprt_unregister_transport(&xs_udp_transport); xprt_unregister_transport(&xs_tcp_transport); xprt_unregister_transport(&xs_bc_tcp_transport); diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c index 47f1b86..b11ea69 100644 --- a/net/xfrm/xfrm_replay.c +++ b/net/xfrm/xfrm_replay.c @@ -265,7 +265,7 @@ static void xfrm_replay_advance_bmp(struct xfrm_state *x, __be32 net_seq) bitnr = bitnr & 0x1F; replay_esn->bmp[nr] |= (1U << bitnr); } else { - nr = replay_esn->replay_window >> 5; + nr = (replay_esn->replay_window - 1) >> 5; for (i = 0; i <= nr; i++) replay_esn->bmp[i] = 0; @@ -471,7 +471,7 @@ static void xfrm_replay_advance_esn(struct xfrm_state *x, __be32 net_seq) bitnr = bitnr & 0x1F; replay_esn->bmp[nr] |= (1U << bitnr); } else { - nr = replay_esn->replay_window >> 5; + nr = (replay_esn->replay_window - 1) >> 5; for (i = 0; i <= nr; i++) replay_esn->bmp[i] = 0; diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index c658cb3..0256b8a 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -2299,7 +2299,8 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) if (link->dump == NULL) return -EINVAL; - return netlink_dump_start(net->xfrm.nlsk, skb, nlh, link->dump, link->done); + return netlink_dump_start(net->xfrm.nlsk, skb, nlh, + link->dump, link->done, 0); } err = nlmsg_parse(nlh, xfrm_msg_min[type], attrs, XFRMA_MAX, |