371 files changed, 26197 insertions, 10659 deletions
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 6e64f7c..7850412 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -327,7 +327,7 @@ static void vlan_sync_address(struct net_device *dev,
 static void vlan_transfer_features(struct net_device *dev,
 				   struct net_device *vlandev)
 {
-	unsigned long old_features = vlandev->features;
+	u32 old_features = vlandev->features;
 
 	vlandev->features &= ~dev->vlan_features;
 	vlandev->features |= dev->features & dev->vlan_features;
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index be73753..ae610f0 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -625,6 +625,19 @@ static int vlan_dev_fcoe_get_wwn(struct net_device *dev, u64 *wwn, int type)
 		rc = ops->ndo_fcoe_get_wwn(real_dev, wwn, type);
 	return rc;
 }
+
+static int vlan_dev_fcoe_ddp_target(struct net_device *dev, u16 xid,
+				    struct scatterlist *sgl, unsigned int sgc)
+{
+	struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
+	const struct net_device_ops *ops = real_dev->netdev_ops;
+	int rc = 0;
+
+	if (ops->ndo_fcoe_ddp_target)
+		rc = ops->ndo_fcoe_ddp_target(real_dev, xid, sgl, sgc);
+
+	return rc;
+}
 #endif
 
 static void vlan_dev_change_rx_flags(struct net_device *dev, int change)
@@ -858,6 +871,7 @@ static const struct net_device_ops vlan_netdev_ops = {
 	.ndo_fcoe_enable	= vlan_dev_fcoe_enable,
 	.ndo_fcoe_disable	= vlan_dev_fcoe_disable,
 	.ndo_fcoe_get_wwn	= vlan_dev_fcoe_get_wwn,
+	.ndo_fcoe_ddp_target	= vlan_dev_fcoe_ddp_target,
 #endif
 };
 
diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
index 17c5ba7..29a54cc 100644
--- a/net/9p/trans_rdma.c
+++ b/net/9p/trans_rdma.c
@@ -59,7 +59,6 @@
 						 * safely advertise a maxsize
 						 * of 64k */
 
-#define P9_RDMA_MAX_SGE (P9_RDMA_MAXSIZE >> PAGE_SHIFT)
 /**
  * struct p9_trans_rdma - RDMA transport instance
  *
diff --git a/net/Kconfig b/net/Kconfig
index 7284062..79cabf1 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -221,6 +221,12 @@ config RPS
 	depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS
 	default y
 
+config RFS_ACCEL
+	boolean
+	depends on RPS && GENERIC_HARDIRQS
+	select CPU_RMAP
+	default y
+
 config XPS
 	boolean
 	depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS
diff --git a/net/atm/clip.c b/net/atm/clip.c
index d257da5..810a129 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -520,9 +520,9 @@ static int clip_setentry(struct atm_vcc *vcc, __be32 ip)
 		unlink_clip_vcc(clip_vcc);
 		return 0;
 	}
-	error = ip_route_output_key(&init_net, &rt, &fl);
-	if (error)
-		return error;
+	rt = ip_route_output_key(&init_net, &fl);
+	if (IS_ERR(rt))
+		return PTR_ERR(rt);
 	neigh = __neigh_lookup(&clip_tbl, &ip, rt->dst.dev, 1);
 	ip_rt_put(rt);
 	if (!neigh)
diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile
index d936aec..2de93d0 100644
--- a/net/batman-adv/Makefile
+++ b/net/batman-adv/Makefile
@@ -1,5 +1,5 @@
 #
-# Copyright (C) 2007-2010 B.A.T.M.A.N. contributors:
+# Copyright (C) 2007-2011 B.A.T.M.A.N. contributors:
 #
 # Marek Lindner, Simon Wunderlich
 #
diff --git a/net/batman-adv/aggregation.c b/net/batman-adv/aggregation.c
index 3850a3e..af45d6b 100644
--- a/net/batman-adv/aggregation.c
+++ b/net/batman-adv/aggregation.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors:
+ * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
@@ -35,7 +35,7 @@ static bool can_aggregate_with(struct batman_packet *new_batman_packet,
 			       int packet_len,
 			       unsigned long send_time,
 			       bool directlink,
-			       struct batman_if *if_incoming,
+			       struct hard_iface *if_incoming,
 			       struct forw_packet *forw_packet)
 {
 	struct batman_packet *batman_packet =
@@ -99,7 +99,7 @@ static bool can_aggregate_with(struct batman_packet *new_batman_packet,
 /* create a new aggregated packet and add this packet to it */
 static void new_aggregated_packet(unsigned char *packet_buff, int packet_len,
 				  unsigned long send_time, bool direct_link,
-				  struct batman_if *if_incoming,
+				  struct hard_iface *if_incoming,
 				  int own_packet)
 {
 	struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
@@ -188,7 +188,7 @@ static void aggregate(struct forw_packet *forw_packet_aggr,
 
 void add_bat_packet_to_list(struct bat_priv *bat_priv,
 			    unsigned char *packet_buff, int packet_len,
-			    struct batman_if *if_incoming, char own_packet,
+			    struct hard_iface *if_incoming, char own_packet,
 			    unsigned long send_time)
 {
 	/**
@@ -247,7 +247,7 @@ void add_bat_packet_to_list(struct bat_priv *bat_priv,
 
 /* unpack the aggregated packets and process them one by one */
 void receive_aggr_bat_packet(struct ethhdr *ethhdr, unsigned char *packet_buff,
-			     int packet_len, struct batman_if *if_incoming)
+			     int packet_len, struct hard_iface *if_incoming)
 {
 	struct batman_packet *batman_packet;
 	int buff_pos = 0;
diff --git a/net/batman-adv/aggregation.h b/net/batman-adv/aggregation.h
index 71a91b3..0622042 100644
--- a/net/batman-adv/aggregation.h
+++ b/net/batman-adv/aggregation.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors:
+ * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
@@ -35,9 +35,9 @@ static inline int aggregated_packet(int buff_pos, int packet_len, int num_hna)
 
 void add_bat_packet_to_list(struct bat_priv *bat_priv,
 			    unsigned char *packet_buff, int packet_len,
-			    struct batman_if *if_incoming, char own_packet,
+			    struct hard_iface *if_incoming, char own_packet,
 			    unsigned long send_time);
 void receive_aggr_bat_packet(struct ethhdr *ethhdr, unsigned char *packet_buff,
-			     int packet_len, struct batman_if *if_incoming);
+			     int packet_len, struct hard_iface *if_incoming);
 
 #endif /* _NET_BATMAN_ADV_AGGREGATION_H_ */
diff --git a/net/batman-adv/bat_debugfs.c b/net/batman-adv/bat_debugfs.c
index 0ae81d0..0e9d435 100644
--- a/net/batman-adv/bat_debugfs.c
+++ b/net/batman-adv/bat_debugfs.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2010 B.A.T.M.A.N. contributors:
+ * Copyright (C) 2010-2011 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
@@ -52,7 +52,6 @@ static void emit_log_char(struct debug_log *debug_log, char c)
 
 static int fdebug_log(struct debug_log *debug_log, char *fmt, ...)
 {
-	int printed_len;
 	va_list args;
 	static char debug_log_buf[256];
 	char *p;
@@ -62,8 +61,7 @@ static int fdebug_log(struct debug_log *debug_log, char *fmt, ...)
 
 	spin_lock_bh(&debug_log->lock);
 	va_start(args, fmt);
-	printed_len = vscnprintf(debug_log_buf, sizeof(debug_log_buf),
-				 fmt, args);
+	vscnprintf(debug_log_buf, sizeof(debug_log_buf), fmt, args);
 	va_end(args);
 
 	for (p = debug_log_buf; *p != 0; p++)
diff --git a/net/batman-adv/bat_debugfs.h b/net/batman-adv/bat_debugfs.h
index 72df532..bc9cda3f 100644
--- a/net/batman-adv/bat_debugfs.h
+++ b/net/batman-adv/bat_debugfs.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2010 B.A.T.M.A.N. contributors:
+ * Copyright (C) 2010-2011 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
diff --git a/net/batman-adv/bat_sysfs.c b/net/batman-adv/bat_sysfs.c
index cd7bb51..e449bf6 100644
--- a/net/batman-adv/bat_sysfs.c
+++ b/net/batman-adv/bat_sysfs.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2010 B.A.T.M.A.N. contributors:
+ * Copyright (C) 2010-2011 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
@@ -441,16 +441,16 @@ static ssize_t show_mesh_iface(struct kobject *kobj, struct attribute *attr,
 			       char *buff)
 {
 	struct net_device *net_dev = kobj_to_netdev(kobj);
-	struct batman_if *batman_if = get_batman_if_by_netdev(net_dev);
+	struct hard_iface *hard_iface = hardif_get_by_netdev(net_dev);
 	ssize_t length;
 
-	if (!batman_if)
+	if (!hard_iface)
 		return 0;
 
-	length = sprintf(buff, "%s\n", batman_if->if_status == IF_NOT_IN_USE ?
-			 "none" : batman_if->soft_iface->name);
+	length = sprintf(buff, "%s\n", hard_iface->if_status == IF_NOT_IN_USE ?
+			 "none" : hard_iface->soft_iface->name);
 
-	kref_put(&batman_if->refcount, hardif_free_ref);
+	hardif_free_ref(hard_iface);
 
 	return length;
 }
@@ -459,11 +459,11 @@ static ssize_t store_mesh_iface(struct kobject *kobj, struct attribute *attr,
 				char *buff, size_t count)
 {
 	struct net_device *net_dev = kobj_to_netdev(kobj);
-	struct batman_if *batman_if = get_batman_if_by_netdev(net_dev);
+	struct hard_iface *hard_iface = hardif_get_by_netdev(net_dev);
 	int status_tmp = -1;
-	int ret;
+	int ret = count;
 
-	if (!batman_if)
+	if (!hard_iface)
 		return count;
 
 	if (buff[count - 1] == '\n')
@@ -472,7 +472,7 @@ static ssize_t store_mesh_iface(struct kobject *kobj, struct attribute *attr,
 	if (strlen(buff) >= IFNAMSIZ) {
 		pr_err("Invalid parameter for 'mesh_iface' setting received: "
 		       "interface name too long '%s'\n", buff);
-		kref_put(&batman_if->refcount, hardif_free_ref);
+		hardif_free_ref(hard_iface);
 		return -EINVAL;
 	}
 
@@ -481,30 +481,31 @@ static ssize_t store_mesh_iface(struct kobject *kobj, struct attribute *attr,
 	else
 		status_tmp = IF_I_WANT_YOU;
 
-	if ((batman_if->if_status == status_tmp) || ((batman_if->soft_iface) &&
-	    (strncmp(batman_if->soft_iface->name, buff, IFNAMSIZ) == 0))) {
-		kref_put(&batman_if->refcount, hardif_free_ref);
-		return count;
-	}
+	if (hard_iface->if_status == status_tmp)
+		goto out;
+
+	if ((hard_iface->soft_iface) &&
+	    (strncmp(hard_iface->soft_iface->name, buff, IFNAMSIZ) == 0))
+		goto out;
 
 	if (status_tmp == IF_NOT_IN_USE) {
 		rtnl_lock();
-		hardif_disable_interface(batman_if);
+		hardif_disable_interface(hard_iface);
 		rtnl_unlock();
-		kref_put(&batman_if->refcount, hardif_free_ref);
-		return count;
+		goto out;
 	}
 
 	/* if the interface already is in use */
-	if (batman_if->if_status != IF_NOT_IN_USE) {
+	if (hard_iface->if_status != IF_NOT_IN_USE) {
 		rtnl_lock();
-		hardif_disable_interface(batman_if);
+		hardif_disable_interface(hard_iface);
 		rtnl_unlock();
 	}
 
-	ret = hardif_enable_interface(batman_if, buff);
-	kref_put(&batman_if->refcount, hardif_free_ref);
+	ret = hardif_enable_interface(hard_iface, buff);
 
+out:
+	hardif_free_ref(hard_iface);
 	return ret;
 }
 
@@ -512,13 +513,13 @@ static ssize_t show_iface_status(struct kobject *kobj, struct attribute *attr,
 				 char *buff)
 {
 	struct net_device *net_dev = kobj_to_netdev(kobj);
-	struct batman_if *batman_if = get_batman_if_by_netdev(net_dev);
+	struct hard_iface *hard_iface = hardif_get_by_netdev(net_dev);
 	ssize_t length;
 
-	if (!batman_if)
+	if (!hard_iface)
 		return 0;
 
-	switch (batman_if->if_status) {
+	switch (hard_iface->if_status) {
 	case IF_TO_BE_REMOVED:
 		length = sprintf(buff, "disabling\n");
 		break;
@@ -537,7 +538,7 @@ static ssize_t show_iface_status(struct kobject *kobj, struct attribute *attr,
 		break;
 	}
 
-	kref_put(&batman_if->refcount, hardif_free_ref);
+	hardif_free_ref(hard_iface);
 
 	return length;
 }
diff --git a/net/batman-adv/bat_sysfs.h b/net/batman-adv/bat_sysfs.h
index 7f186c0..02f1fa7 100644
--- a/net/batman-adv/bat_sysfs.h
+++ b/net/batman-adv/bat_sysfs.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2010 B.A.T.M.A.N. contributors:
+ * Copyright (C) 2010-2011 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
diff --git a/net/batman-adv/bitarray.c b/net/batman-adv/bitarray.c
index bbcd8f7..ad2ca92 100644
--- a/net/batman-adv/bitarray.c
+++ b/net/batman-adv/bitarray.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2006-2010 B.A.T.M.A.N. contributors:
+ * Copyright (C) 2006-2011 B.A.T.M.A.N. contributors:
  *
  * Simon Wunderlich, Marek Lindner
  *
diff --git a/net/batman-adv/bitarray.h b/net/batman-adv/bitarray.h
index ac54017..769c246 100644
--- a/net/batman-adv/bitarray.h
+++ b/net/batman-adv/bitarray.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2006-2010 B.A.T.M.A.N. contributors:
+ * Copyright (C) 2006-2011 B.A.T.M.A.N. contributors:
  *
  * Simon Wunderlich, Marek Lindner
  *
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index 0065ffb..3cc4355 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2009-2010 B.A.T.M.A.N. contributors:
+ * Copyright (C) 2009-2011 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
@@ -28,58 +28,75 @@
 #include <linux/udp.h>
 #include <linux/if_vlan.h>
 
-static void gw_node_free_ref(struct kref *refcount)
+static void gw_node_free_rcu(struct rcu_head *rcu)
 {
 	struct gw_node *gw_node;
 
-	gw_node = container_of(refcount, struct gw_node, refcount);
+	gw_node = container_of(rcu, struct gw_node, rcu);
 	kfree(gw_node);
 }
 
-static void gw_node_free_rcu(struct rcu_head *rcu)
+static void gw_node_free_ref(struct gw_node *gw_node)
 {
-	struct gw_node *gw_node;
-
-	gw_node = container_of(rcu, struct gw_node, rcu);
-	kref_put(&gw_node->refcount, gw_node_free_ref);
+	if (atomic_dec_and_test(&gw_node->refcount))
+		call_rcu(&gw_node->rcu, gw_node_free_rcu);
 }
 
 void *gw_get_selected(struct bat_priv *bat_priv)
 {
-	struct gw_node *curr_gateway_tmp = bat_priv->curr_gw;
+	struct gw_node *curr_gateway_tmp;
+	struct orig_node *orig_node = NULL;
 
+	rcu_read_lock();
+	curr_gateway_tmp = rcu_dereference(bat_priv->curr_gw);
 	if (!curr_gateway_tmp)
-		return NULL;
+		goto out;
+
+	orig_node = curr_gateway_tmp->orig_node;
+	if (!orig_node)
+		goto out;
 
-	return curr_gateway_tmp->orig_node;
+	if (!atomic_inc_not_zero(&orig_node->refcount))
+		orig_node = NULL;
+
+out:
+	rcu_read_unlock();
+	return orig_node;
 }
 
 void gw_deselect(struct bat_priv *bat_priv)
 {
-	struct gw_node *gw_node = bat_priv->curr_gw;
+	struct gw_node *gw_node;
 
-	bat_priv->curr_gw = NULL;
+	spin_lock_bh(&bat_priv->gw_list_lock);
+	gw_node = rcu_dereference(bat_priv->curr_gw);
+	rcu_assign_pointer(bat_priv->curr_gw, NULL);
+	spin_unlock_bh(&bat_priv->gw_list_lock);
 
 	if (gw_node)
-		kref_put(&gw_node->refcount, gw_node_free_ref);
+		gw_node_free_ref(gw_node);
 }
 
-static struct gw_node *gw_select(struct bat_priv *bat_priv,
-			  struct gw_node *new_gw_node)
+static void gw_select(struct bat_priv *bat_priv, struct gw_node *new_gw_node)
 {
-	struct gw_node *curr_gw_node = bat_priv->curr_gw;
+	struct gw_node *curr_gw_node;
 
-	if (new_gw_node)
-		kref_get(&new_gw_node->refcount);
+	if (new_gw_node && !atomic_inc_not_zero(&new_gw_node->refcount))
+		new_gw_node = NULL;
+
+	spin_lock_bh(&bat_priv->gw_list_lock);
+	curr_gw_node = rcu_dereference(bat_priv->curr_gw);
+	rcu_assign_pointer(bat_priv->curr_gw, new_gw_node);
+	spin_unlock_bh(&bat_priv->gw_list_lock);
 
-	bat_priv->curr_gw = new_gw_node;
-	return curr_gw_node;
+	if (curr_gw_node)
+		gw_node_free_ref(curr_gw_node);
 }
 
 void gw_election(struct bat_priv *bat_priv)
 {
 	struct hlist_node *node;
-	struct gw_node *gw_node, *curr_gw_tmp = NULL, *old_gw_node = NULL;
+	struct gw_node *gw_node, *curr_gw, *curr_gw_tmp = NULL;
 	uint8_t max_tq = 0;
 	uint32_t max_gw_factor = 0, tmp_gw_factor = 0;
 	int down, up;
@@ -93,19 +110,23 @@ void gw_election(struct bat_priv *bat_priv)
 	if (atomic_read(&bat_priv->gw_mode) != GW_MODE_CLIENT)
 		return;
 
-	if (bat_priv->curr_gw)
+	rcu_read_lock();
+	curr_gw = rcu_dereference(bat_priv->curr_gw);
+	if (curr_gw) {
+		rcu_read_unlock();
 		return;
+	}
 
-	rcu_read_lock();
 	if (hlist_empty(&bat_priv->gw_list)) {
-		rcu_read_unlock();
 
-		if (bat_priv->curr_gw) {
+		if (curr_gw) {
+			rcu_read_unlock();
 			bat_dbg(DBG_BATMAN, bat_priv,
 				"Removing selected gateway - "
 				"no gateway in range\n");
 			gw_deselect(bat_priv);
-		}
+		} else
+			rcu_read_unlock();
 
 		return;
 	}
@@ -154,12 +175,12 @@ void gw_election(struct bat_priv *bat_priv)
 			max_gw_factor = tmp_gw_factor;
 	}
 
-	if (bat_priv->curr_gw != curr_gw_tmp) {
-		if ((bat_priv->curr_gw) && (!curr_gw_tmp))
+	if (curr_gw != curr_gw_tmp) {
+		if ((curr_gw) && (!curr_gw_tmp))
 			bat_dbg(DBG_BATMAN, bat_priv,
 				"Removing selected gateway - "
 				"no gateway in range\n");
-		else if ((!bat_priv->curr_gw) && (curr_gw_tmp))
+		else if ((!curr_gw) && (curr_gw_tmp))
 			bat_dbg(DBG_BATMAN, bat_priv,
 				"Adding route to gateway %pM "
 				"(gw_flags: %i, tq: %i)\n",
@@ -174,43 +195,43 @@ void gw_election(struct bat_priv *bat_priv)
 				curr_gw_tmp->orig_node->gw_flags,
 				curr_gw_tmp->orig_node->router->tq_avg);
 
-		old_gw_node = gw_select(bat_priv, curr_gw_tmp);
+		gw_select(bat_priv, curr_gw_tmp);
 	}
 
 	rcu_read_unlock();
-
-	/* the kfree() has to be outside of the rcu lock */
-	if (old_gw_node)
-		kref_put(&old_gw_node->refcount, gw_node_free_ref);
 }
 
 void gw_check_election(struct bat_priv *bat_priv, struct orig_node *orig_node)
 {
-	struct gw_node *curr_gateway_tmp = bat_priv->curr_gw;
+	struct gw_node *curr_gateway_tmp;
 	uint8_t gw_tq_avg, orig_tq_avg;
 
+	rcu_read_lock();
+	curr_gateway_tmp = rcu_dereference(bat_priv->curr_gw);
 	if (!curr_gateway_tmp)
-		return;
+		goto out_rcu;
 
 	if (!curr_gateway_tmp->orig_node)
-		goto deselect;
+		goto deselect_rcu;
 
 	if (!curr_gateway_tmp->orig_node->router)
-		goto deselect;
+		goto deselect_rcu;
 
 	/* this node already is the gateway */
 	if (curr_gateway_tmp->orig_node == orig_node)
-		return;
+		goto out_rcu;
 
 	if (!orig_node->router)
-		return;
+		goto out_rcu;
 
 	gw_tq_avg = curr_gateway_tmp->orig_node->router->tq_avg;
+	rcu_read_unlock();
+
 	orig_tq_avg = orig_node->router->tq_avg;
 
 	/* the TQ value has to be better */
 	if (orig_tq_avg < gw_tq_avg)
-		return;
+		goto out;
 
 	/**
 	 * if the routing class is greater than 3 the value tells us how much
@@ -218,15 +239,23 @@ void gw_check_election(struct bat_priv *bat_priv, struct orig_node *orig_node)
 	 **/
 	if ((atomic_read(&bat_priv->gw_sel_class) > 3) &&
 	    (orig_tq_avg - gw_tq_avg < atomic_read(&bat_priv->gw_sel_class)))
-		return;
+		goto out;
 
 	bat_dbg(DBG_BATMAN, bat_priv,
 		"Restarting gateway selection: better gateway found (tq curr: "
 		"%i, tq new: %i)\n",
 		gw_tq_avg, orig_tq_avg);
+	goto deselect;
 
+out_rcu:
+	rcu_read_unlock();
+	goto out;
+deselect_rcu:
+	rcu_read_unlock();
 deselect:
 	gw_deselect(bat_priv);
+out:
+	return;
 }
 
 static void gw_node_add(struct bat_priv *bat_priv,
@@ -242,7 +271,7 @@ static void gw_node_add(struct bat_priv *bat_priv,
 	memset(gw_node, 0, sizeof(struct gw_node));
 	INIT_HLIST_NODE(&gw_node->list);
 	gw_node->orig_node = orig_node;
-	kref_init(&gw_node->refcount);
+	atomic_set(&gw_node->refcount, 1);
 
 	spin_lock_bh(&bat_priv->gw_list_lock);
 	hlist_add_head_rcu(&gw_node->list, &bat_priv->gw_list);
@@ -283,7 +312,7 @@ void gw_node_update(struct bat_priv *bat_priv,
 				"Gateway %pM removed from gateway list\n",
 				orig_node->orig);
 
-			if (gw_node == bat_priv->curr_gw) {
+			if (gw_node == rcu_dereference(bat_priv->curr_gw)) {
 				rcu_read_unlock();
 				gw_deselect(bat_priv);
 				return;
@@ -321,11 +350,11 @@ void gw_node_purge(struct bat_priv *bat_priv)
 		    atomic_read(&bat_priv->mesh_state) == MESH_ACTIVE)
 			continue;
 
-		if (bat_priv->curr_gw == gw_node)
+		if (rcu_dereference(bat_priv->curr_gw) == gw_node)
 			gw_deselect(bat_priv);
 
 		hlist_del_rcu(&gw_node->list);
-		call_rcu(&gw_node->rcu, gw_node_free_rcu);
+		gw_node_free_ref(gw_node);
 	}
 
 
@@ -335,12 +364,16 @@ void gw_node_purge(struct bat_priv *bat_priv)
 static int _write_buffer_text(struct bat_priv *bat_priv,
 			      struct seq_file *seq, struct gw_node *gw_node)
 {
-	int down, up;
+	struct gw_node *curr_gw;
+	int down, up, ret;
 
 	gw_bandwidth_to_kbit(gw_node->orig_node->gw_flags, &down, &up);
 
-	return seq_printf(seq, "%s %pM (%3i) %pM [%10s]: %3i - %i%s/%i%s\n",
-		       (bat_priv->curr_gw == gw_node ? "=>" : "  "),
+	rcu_read_lock();
+	curr_gw = rcu_dereference(bat_priv->curr_gw);
+
+	ret = seq_printf(seq, "%s %pM (%3i) %pM [%10s]: %3i - %i%s/%i%s\n",
+		       (curr_gw == gw_node ? "=>" : "  "),
 		       gw_node->orig_node->orig,
 		       gw_node->orig_node->router->tq_avg,
 		       gw_node->orig_node->router->addr,
@@ -350,6 +383,9 @@ static int _write_buffer_text(struct bat_priv *bat_priv,
 		       (down > 2048 ? "MBit" : "KBit"),
 		       (up > 2048 ? up / 1024 : up),
 		       (up > 2048 ? "MBit" : "KBit"));
+
+	rcu_read_unlock();
+	return ret;
 }
 
 int gw_client_seq_print_text(struct seq_file *seq, void *offset)
@@ -470,8 +506,12 @@ int gw_is_target(struct bat_priv *bat_priv, struct sk_buff *skb)
 	if (atomic_read(&bat_priv->gw_mode) == GW_MODE_SERVER)
 		return -1;
 
-	if (!bat_priv->curr_gw)
+	rcu_read_lock();
+	if (!rcu_dereference(bat_priv->curr_gw)) {
+		rcu_read_unlock();
 		return 0;
+	}
+	rcu_read_unlock();
 
 	return 1;
 }
diff --git a/net/batman-adv/gateway_client.h b/net/batman-adv/gateway_client.h
index 4585e65..2aa4391 100644
--- a/net/batman-adv/gateway_client.h
+++ b/net/batman-adv/gateway_client.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2009-2010 B.A.T.M.A.N. contributors:
+ * Copyright (C) 2009-2011 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c
index b962982..50d3a59 100644
--- a/net/batman-adv/gateway_common.c
+++ b/net/batman-adv/gateway_common.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2009-2010 B.A.T.M.A.N. contributors:
+ * Copyright (C) 2009-2011 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
diff --git a/net/batman-adv/gateway_common.h b/net/batman-adv/gateway_common.h
index 5e728d0..55e527a 100644
--- a/net/batman-adv/gateway_common.h
+++ b/net/batman-adv/gateway_common.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2009-2010 B.A.T.M.A.N. contributors:
+ * Copyright (C) 2009-2011 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index 4f95777..b3058e4 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors:
+ * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
@@ -31,36 +31,40 @@
 
 #include <linux/if_arp.h>
 
-/* protect update critical side of if_list - but not the content */
-static DEFINE_SPINLOCK(if_list_lock);
+/* protect update critical side of hardif_list - but not the content */
+static DEFINE_SPINLOCK(hardif_list_lock);
 
-static void hardif_free_rcu(struct rcu_head *rcu)
+
+static int batman_skb_recv(struct sk_buff *skb,
+			   struct net_device *dev,
+			   struct packet_type *ptype,
+			   struct net_device *orig_dev);
+
+void hardif_free_rcu(struct rcu_head *rcu)
 {
-	struct batman_if *batman_if;
+	struct hard_iface *hard_iface;
 
-	batman_if = container_of(rcu, struct batman_if, rcu);
-	dev_put(batman_if->net_dev);
-	kref_put(&batman_if->refcount, hardif_free_ref);
+	hard_iface = container_of(rcu, struct hard_iface, rcu);
+	dev_put(hard_iface->net_dev);
+	kfree(hard_iface);
 }
 
-struct batman_if *get_batman_if_by_netdev(struct net_device *net_dev)
+struct hard_iface *hardif_get_by_netdev(struct net_device *net_dev)
 {
-	struct batman_if *batman_if;
+	struct hard_iface *hard_iface;
 
 	rcu_read_lock();
-	list_for_each_entry_rcu(batman_if, &if_list, list) {
-		if (batman_if->net_dev == net_dev)
+	list_for_each_entry_rcu(hard_iface, &hardif_list, list) {
+		if (hard_iface->net_dev == net_dev &&
+		    atomic_inc_not_zero(&hard_iface->refcount))
 			goto out;
 	}
 
-	batman_if = NULL;
+	hard_iface = NULL;
 
 out:
-	if (batman_if)
-		kref_get(&batman_if->refcount);
-
 	rcu_read_unlock();
-	return batman_if;
+	return hard_iface;
 }
 
 static int is_valid_iface(struct net_device *net_dev)
@@ -75,13 +79,8 @@ static int is_valid_iface(struct net_device *net_dev)
 		return 0;
 
 	/* no batman over batman */
-#ifdef HAVE_NET_DEVICE_OPS
-	if (net_dev->netdev_ops->ndo_start_xmit == interface_tx)
-		return 0;
-#else
-	if (net_dev->hard_start_xmit == interface_tx)
+	if (softif_is_valid(net_dev))
 		return 0;
-#endif
 
 	/* Device is being bridged */
 	/* if (net_dev->priv_flags & IFF_BRIDGE_PORT)
@@ -90,27 +89,25 @@ static int is_valid_iface(struct net_device *net_dev)
 	return 1;
 }
 
-static struct batman_if *get_active_batman_if(struct net_device *soft_iface)
+static struct hard_iface *hardif_get_active(struct net_device *soft_iface)
 {
-	struct batman_if *batman_if;
+	struct hard_iface *hard_iface;
 
 	rcu_read_lock();
-	list_for_each_entry_rcu(batman_if, &if_list, list) {
-		if (batman_if->soft_iface != soft_iface)
+	list_for_each_entry_rcu(hard_iface, &hardif_list, list) {
+		if (hard_iface->soft_iface != soft_iface)
 			continue;
 
-		if (batman_if->if_status == IF_ACTIVE)
+		if (hard_iface->if_status == IF_ACTIVE &&
+		    atomic_inc_not_zero(&hard_iface->refcount))
 			goto out;
 	}
 
-	batman_if = NULL;
+	hard_iface = NULL;
 
 out:
-	if (batman_if)
-		kref_get(&batman_if->refcount);
-
 	rcu_read_unlock();
-	return batman_if;
+	return hard_iface;
 }
 
 static void update_primary_addr(struct bat_priv *bat_priv)
@@ -126,24 +123,24 @@ static void update_primary_addr(struct bat_priv *bat_priv)
 }
 
 static void set_primary_if(struct bat_priv *bat_priv,
-			   struct batman_if *batman_if)
+			   struct hard_iface *hard_iface)
 {
 	struct batman_packet *batman_packet;
-	struct batman_if *old_if;
+	struct hard_iface *old_if;
 
-	if (batman_if)
-		kref_get(&batman_if->refcount);
+	if (hard_iface && !atomic_inc_not_zero(&hard_iface->refcount))
+		hard_iface = NULL;
 
 	old_if = bat_priv->primary_if;
-	bat_priv->primary_if = batman_if;
+	bat_priv->primary_if = hard_iface;
 
 	if (old_if)
-		kref_put(&old_if->refcount, hardif_free_ref);
+		hardif_free_ref(old_if);
 
 	if (!bat_priv->primary_if)
 		return;
 
-	batman_packet = (struct batman_packet *)(batman_if->packet_buff);
+	batman_packet = (struct batman_packet *)(hard_iface->packet_buff);
 	batman_packet->flags = PRIMARIES_FIRST_HOP;
 	batman_packet->ttl = TTL;
 
@@ -156,42 +153,42 @@ static void set_primary_if(struct bat_priv *bat_priv,
 	atomic_set(&bat_priv->hna_local_changed, 1);
 }
 
-static bool hardif_is_iface_up(struct batman_if *batman_if)
+static bool hardif_is_iface_up(struct hard_iface *hard_iface)
 {
-	if (batman_if->net_dev->flags & IFF_UP)
+	if (hard_iface->net_dev->flags & IFF_UP)
 		return true;
 
 	return false;
 }
 
-static void update_mac_addresses(struct batman_if *batman_if)
+static void update_mac_addresses(struct hard_iface *hard_iface)
 {
-	memcpy(((struct batman_packet *)(batman_if->packet_buff))->orig,
-	       batman_if->net_dev->dev_addr, ETH_ALEN);
-	memcpy(((struct batman_packet *)(batman_if->packet_buff))->prev_sender,
-	       batman_if->net_dev->dev_addr, ETH_ALEN);
+	memcpy(((struct batman_packet *)(hard_iface->packet_buff))->orig,
+	       hard_iface->net_dev->dev_addr, ETH_ALEN);
+	memcpy(((struct batman_packet *)(hard_iface->packet_buff))->prev_sender,
+	       hard_iface->net_dev->dev_addr, ETH_ALEN);
 }
 
 static void check_known_mac_addr(struct net_device *net_dev)
 {
-	struct batman_if *batman_if;
+	struct hard_iface *hard_iface;
 
 	rcu_read_lock();
-	list_for_each_entry_rcu(batman_if, &if_list, list) {
-		if ((batman_if->if_status != IF_ACTIVE) &&
-		    (batman_if->if_status != IF_TO_BE_ACTIVATED))
+	list_for_each_entry_rcu(hard_iface, &hardif_list, list) {
+		if ((hard_iface->if_status != IF_ACTIVE) &&
+		    (hard_iface->if_status != IF_TO_BE_ACTIVATED))
 			continue;
 
-		if (batman_if->net_dev == net_dev)
+		if (hard_iface->net_dev == net_dev)
 			continue;
 
-		if (!compare_orig(batman_if->net_dev->dev_addr,
-				  net_dev->dev_addr))
+		if (!compare_eth(hard_iface->net_dev->dev_addr,
+				 net_dev->dev_addr))
 			continue;
 
 		pr_warning("The newly added mac address (%pM) already exists "
 			   "on: %s\n", net_dev->dev_addr,
-			   batman_if->net_dev->name);
+			   hard_iface->net_dev->name);
 		pr_warning("It is strongly recommended to keep mac addresses "
 			   "unique to avoid problems!\n");
 	}
@@ -201,7 +198,7 @@ static void check_known_mac_addr(struct net_device *net_dev)
 int hardif_min_mtu(struct net_device *soft_iface)
 {
 	struct bat_priv *bat_priv = netdev_priv(soft_iface);
-	struct batman_if *batman_if;
+	struct hard_iface *hard_iface;
 	/* allow big frames if all devices are capable to do so
 	 * (have MTU > 1500 + BAT_HEADER_LEN) */
 	int min_mtu = ETH_DATA_LEN;
@@ -210,15 +207,15 @@ int hardif_min_mtu(struct net_device *soft_iface)
 		goto out;
 
 	rcu_read_lock();
-	list_for_each_entry_rcu(batman_if, &if_list, list) {
-		if ((batman_if->if_status != IF_ACTIVE) &&
-		    (batman_if->if_status != IF_TO_BE_ACTIVATED))
+	list_for_each_entry_rcu(hard_iface, &hardif_list, list) {
+		if ((hard_iface->if_status != IF_ACTIVE) &&
+		    (hard_iface->if_status != IF_TO_BE_ACTIVATED))
 			continue;
 
-		if (batman_if->soft_iface != soft_iface)
+		if (hard_iface->soft_iface != soft_iface)
 			continue;
 
-		min_mtu = min_t(int, batman_if->net_dev->mtu - BAT_HEADER_LEN,
+		min_mtu = min_t(int, hard_iface->net_dev->mtu - BAT_HEADER_LEN,
 				min_mtu);
 	}
 	rcu_read_unlock();
@@ -236,77 +233,95 @@ void update_min_mtu(struct net_device *soft_iface)
 		soft_iface->mtu = min_mtu;
 }
 
-static void hardif_activate_interface(struct batman_if *batman_if)
+static void hardif_activate_interface(struct hard_iface *hard_iface)
 {
 	struct bat_priv *bat_priv;
 
-	if (batman_if->if_status != IF_INACTIVE)
+	if (hard_iface->if_status != IF_INACTIVE)
 		return;
 
-	bat_priv = netdev_priv(batman_if->soft_iface);
+	bat_priv = netdev_priv(hard_iface->soft_iface);
 
-	update_mac_addresses(batman_if);
-	batman_if->if_status = IF_TO_BE_ACTIVATED;
+	update_mac_addresses(hard_iface);
+	hard_iface->if_status = IF_TO_BE_ACTIVATED;
 
 	/**
 	 * the first active interface becomes our primary interface or
 	 * the next active interface after the old primay interface was removed
 	 */
 	if (!bat_priv->primary_if)
-		set_primary_if(bat_priv, batman_if);
+		set_primary_if(bat_priv, hard_iface);
 
-	bat_info(batman_if->soft_iface, "Interface activated: %s\n",
-		 batman_if->net_dev->name);
+	bat_info(hard_iface->soft_iface, "Interface activated: %s\n",
+		 hard_iface->net_dev->name);
 
-	update_min_mtu(batman_if->soft_iface);
+	update_min_mtu(hard_iface->soft_iface);
 	return;
 }
 
-static void hardif_deactivate_interface(struct batman_if *batman_if)
+static void hardif_deactivate_interface(struct hard_iface *hard_iface)
 {
-	if ((batman_if->if_status != IF_ACTIVE) &&
-	   (batman_if->if_status != IF_TO_BE_ACTIVATED))
+	if ((hard_iface->if_status != IF_ACTIVE) &&
+	    (hard_iface->if_status != IF_TO_BE_ACTIVATED))
 		return;
 
-	batman_if->if_status = IF_INACTIVE;
+	hard_iface->if_status = IF_INACTIVE;
 
-	bat_info(batman_if->soft_iface, "Interface deactivated: %s\n",
-		 batman_if->net_dev->name);
+	bat_info(hard_iface->soft_iface, "Interface deactivated: %s\n",
+		 hard_iface->net_dev->name);
 
-	update_min_mtu(batman_if->soft_iface);
+	update_min_mtu(hard_iface->soft_iface);
 }
 
-int hardif_enable_interface(struct batman_if *batman_if, char *iface_name)
+int hardif_enable_interface(struct hard_iface *hard_iface, char *iface_name)
 {
 	struct bat_priv *bat_priv;
 	struct batman_packet *batman_packet;
+	struct net_device *soft_iface;
+	int ret;
 
-	if (batman_if->if_status != IF_NOT_IN_USE)
+	if (hard_iface->if_status != IF_NOT_IN_USE)
 		goto out;
 
-	batman_if->soft_iface = dev_get_by_name(&init_net, iface_name);
+	if (!atomic_inc_not_zero(&hard_iface->refcount))
+		goto out;
 
-	if (!batman_if->soft_iface) {
-		batman_if->soft_iface = softif_create(iface_name);
+	soft_iface = dev_get_by_name(&init_net, iface_name);
 
-		if (!batman_if->soft_iface)
+	if (!soft_iface) {
+		soft_iface = softif_create(iface_name);
+
+		if (!soft_iface) {
+			ret = -ENOMEM;
 			goto err;
+		}
 
 		/* dev_get_by_name() increases the reference counter for us */
-		dev_hold(batman_if->soft_iface);
+		dev_hold(soft_iface);
+	}
+
+	if (!softif_is_valid(soft_iface)) {
+		pr_err("Can't create batman mesh interface %s: "
+		       "already exists as regular interface\n",
+		       soft_iface->name);
+		dev_put(soft_iface);
+		ret = -EINVAL;
+		goto err;
 	}
 
-	bat_priv = netdev_priv(batman_if->soft_iface);
-	batman_if->packet_len = BAT_PACKET_LEN;
-	batman_if->packet_buff = kmalloc(batman_if->packet_len, GFP_ATOMIC);
+	hard_iface->soft_iface = soft_iface;
+	bat_priv = netdev_priv(hard_iface->soft_iface);
+	hard_iface->packet_len = BAT_PACKET_LEN;
+	hard_iface->packet_buff = kmalloc(hard_iface->packet_len, GFP_ATOMIC);
 
-	if (!batman_if->packet_buff) {
-		bat_err(batman_if->soft_iface, "Can't add interface packet "
-			"(%s): out of memory\n", batman_if->net_dev->name);
+	if (!hard_iface->packet_buff) {
+		bat_err(hard_iface->soft_iface, "Can't add interface packet "
+			"(%s): out of memory\n", hard_iface->net_dev->name);
+		ret = -ENOMEM;
 		goto err;
 	}
 
-	batman_packet = (struct batman_packet *)(batman_if->packet_buff);
+	batman_packet = (struct batman_packet *)(hard_iface->packet_buff);
 	batman_packet->packet_type = BAT_PACKET;
 	batman_packet->version = COMPAT_VERSION;
 	batman_packet->flags = 0;
@@ -314,107 +329,107 @@ int hardif_enable_interface(struct batman_if *batman_if, char *iface_name)
 	batman_packet->tq = TQ_MAX_VALUE;
 	batman_packet->num_hna = 0;
 
-	batman_if->if_num = bat_priv->num_ifaces;
+	hard_iface->if_num = bat_priv->num_ifaces;
 	bat_priv->num_ifaces++;
-	batman_if->if_status = IF_INACTIVE;
-	orig_hash_add_if(batman_if, bat_priv->num_ifaces);
+	hard_iface->if_status = IF_INACTIVE;
+	orig_hash_add_if(hard_iface, bat_priv->num_ifaces);
 
-	batman_if->batman_adv_ptype.type = __constant_htons(ETH_P_BATMAN);
-	batman_if->batman_adv_ptype.func = batman_skb_recv;
-	batman_if->batman_adv_ptype.dev = batman_if->net_dev;
-	kref_get(&batman_if->refcount);
-	dev_add_pack(&batman_if->batman_adv_ptype);
+	hard_iface->batman_adv_ptype.type = __constant_htons(ETH_P_BATMAN);
+	hard_iface->batman_adv_ptype.func = batman_skb_recv;
+	hard_iface->batman_adv_ptype.dev = hard_iface->net_dev;
+	dev_add_pack(&hard_iface->batman_adv_ptype);
 
-	atomic_set(&batman_if->seqno, 1);
-	atomic_set(&batman_if->frag_seqno, 1);
-	bat_info(batman_if->soft_iface, "Adding interface: %s\n",
-		 batman_if->net_dev->name);
+	atomic_set(&hard_iface->seqno, 1);
+	atomic_set(&hard_iface->frag_seqno, 1);
+	bat_info(hard_iface->soft_iface, "Adding interface: %s\n",
+		 hard_iface->net_dev->name);
 
-	if (atomic_read(&bat_priv->fragmentation) && batman_if->net_dev->mtu <
+	if (atomic_read(&bat_priv->fragmentation) && hard_iface->net_dev->mtu <
 		ETH_DATA_LEN + BAT_HEADER_LEN)
-		bat_info(batman_if->soft_iface,
+		bat_info(hard_iface->soft_iface,
 			"The MTU of interface %s is too small (%i) to handle "
 			"the transport of batman-adv packets. Packets going "
 			"over this interface will be fragmented on layer2 "
 			"which could impact the performance. Setting the MTU "
 			"to %zi would solve the problem.\n",
-			batman_if->net_dev->name, batman_if->net_dev->mtu,
+			hard_iface->net_dev->name, hard_iface->net_dev->mtu,
 			ETH_DATA_LEN + BAT_HEADER_LEN);
 
-	if (!atomic_read(&bat_priv->fragmentation) && batman_if->net_dev->mtu <
+	if (!atomic_read(&bat_priv->fragmentation) && hard_iface->net_dev->mtu <
 		ETH_DATA_LEN + BAT_HEADER_LEN)
-		bat_info(batman_if->soft_iface,
+		bat_info(hard_iface->soft_iface,
 			"The MTU of interface %s is too small (%i) to handle "
 			"the transport of batman-adv packets. If you experience"
 			" problems getting traffic through try increasing the "
 			"MTU to %zi.\n",
-			batman_if->net_dev->name, batman_if->net_dev->mtu,
+			hard_iface->net_dev->name, hard_iface->net_dev->mtu,
 			ETH_DATA_LEN + BAT_HEADER_LEN);
 
-	if (hardif_is_iface_up(batman_if))
-		hardif_activate_interface(batman_if);
+	if (hardif_is_iface_up(hard_iface))
+		hardif_activate_interface(hard_iface);
 	else
-		bat_err(batman_if->soft_iface, "Not using interface %s "
+		bat_err(hard_iface->soft_iface, "Not using interface %s "
 			"(retrying later): interface not active\n",
-			batman_if->net_dev->name);
+			hard_iface->net_dev->name);
 
 	/* begin scheduling originator messages on that interface */
-	schedule_own_packet(batman_if);
+	schedule_own_packet(hard_iface);
 
 out:
 	return 0;
 
 err:
-	return -ENOMEM;
+	hardif_free_ref(hard_iface);
+	return ret;
 }
 
-void hardif_disable_interface(struct batman_if *batman_if)
+void hardif_disable_interface(struct hard_iface *hard_iface)
 {
-	struct bat_priv *bat_priv = netdev_priv(batman_if->soft_iface);
+	struct bat_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
 
-	if (batman_if->if_status == IF_ACTIVE)
-		hardif_deactivate_interface(batman_if);
+	if (hard_iface->if_status == IF_ACTIVE)
+		hardif_deactivate_interface(hard_iface);
 
-	if (batman_if->if_status != IF_INACTIVE)
+	if (hard_iface->if_status != IF_INACTIVE)
 		return;
 
-	bat_info(batman_if->soft_iface, "Removing interface: %s\n",
-		 batman_if->net_dev->name);
-	dev_remove_pack(&batman_if->batman_adv_ptype);
-	kref_put(&batman_if->refcount, hardif_free_ref);
+	bat_info(hard_iface->soft_iface, "Removing interface: %s\n",
+		 hard_iface->net_dev->name);
+	dev_remove_pack(&hard_iface->batman_adv_ptype);
 
 	bat_priv->num_ifaces--;
-	orig_hash_del_if(batman_if, bat_priv->num_ifaces);
+	orig_hash_del_if(hard_iface, bat_priv->num_ifaces);
 
-	if (batman_if == bat_priv->primary_if) {
-		struct batman_if *new_if;
+	if (hard_iface == bat_priv->primary_if) {
+		struct hard_iface *new_if;
 
-		new_if = get_active_batman_if(batman_if->soft_iface);
+		new_if = hardif_get_active(hard_iface->soft_iface);
 		set_primary_if(bat_priv, new_if);
 
 		if (new_if)
-			kref_put(&new_if->refcount, hardif_free_ref);
+			hardif_free_ref(new_if);
 	}
 
-	kfree(batman_if->packet_buff);
-	batman_if->packet_buff = NULL;
-	batman_if->if_status = IF_NOT_IN_USE;
+	kfree(hard_iface->packet_buff);
+	hard_iface->packet_buff = NULL;
+	hard_iface->if_status = IF_NOT_IN_USE;
 
-	/* delete all references to this batman_if */
+	/* delete all references to this hard_iface */
 	purge_orig_ref(bat_priv);
-	purge_outstanding_packets(bat_priv, batman_if);
-	dev_put(batman_if->soft_iface);
+	purge_outstanding_packets(bat_priv, hard_iface);
+	dev_put(hard_iface->soft_iface);
 
 	/* nobody uses this interface anymore */
 	if (!bat_priv->num_ifaces)
-		softif_destroy(batman_if->soft_iface);
+		softif_destroy(hard_iface->soft_iface);
 
-	batman_if->soft_iface = NULL;
+	hard_iface->soft_iface = NULL;
+	hardif_free_ref(hard_iface);
 }
 
-static struct batman_if *hardif_add_interface(struct net_device *net_dev)
+static struct hard_iface *hardif_add_interface(struct net_device *net_dev)
 {
-	struct batman_if *batman_if;
+	struct hard_iface *hard_iface;
 	int ret;
 
 	ret = is_valid_iface(net_dev);
@@ -423,73 +438,73 @@ static struct batman_if *hardif_add_interface(struct net_device *net_dev)
 
 	dev_hold(net_dev);
 
-	batman_if = kmalloc(sizeof(struct batman_if), GFP_ATOMIC);
-	if (!batman_if) {
+	hard_iface = kmalloc(sizeof(struct hard_iface), GFP_ATOMIC);
+	if (!hard_iface) {
 		pr_err("Can't add interface (%s): out of memory\n",
 		       net_dev->name);
 		goto release_dev;
 	}
 
-	ret = sysfs_add_hardif(&batman_if->hardif_obj, net_dev);
+	ret = sysfs_add_hardif(&hard_iface->hardif_obj, net_dev);
 	if (ret)
 		goto free_if;
 
-	batman_if->if_num = -1;
-	batman_if->net_dev = net_dev;
-	batman_if->soft_iface = NULL;
-	batman_if->if_status = IF_NOT_IN_USE;
-	INIT_LIST_HEAD(&batman_if->list);
-	kref_init(&batman_if->refcount);
+	hard_iface->if_num = -1;
+	hard_iface->net_dev = net_dev;
+	hard_iface->soft_iface = NULL;
+	hard_iface->if_status = IF_NOT_IN_USE;
+	INIT_LIST_HEAD(&hard_iface->list);
+	/* extra reference for return */
+	atomic_set(&hard_iface->refcount, 2);
 
-	check_known_mac_addr(batman_if->net_dev);
+	check_known_mac_addr(hard_iface->net_dev);
 
-	spin_lock(&if_list_lock);
-	list_add_tail_rcu(&batman_if->list, &if_list);
-	spin_unlock(&if_list_lock);
+	spin_lock(&hardif_list_lock);
+	list_add_tail_rcu(&hard_iface->list, &hardif_list);
+	spin_unlock(&hardif_list_lock);
 
-	/* extra reference for return */
-	kref_get(&batman_if->refcount);
-	return batman_if;
+	return hard_iface;
 
 free_if:
-	kfree(batman_if);
+	kfree(hard_iface);
 release_dev:
 	dev_put(net_dev);
 out:
 	return NULL;
 }
 
-static void hardif_remove_interface(struct batman_if *batman_if)
+static void hardif_remove_interface(struct hard_iface *hard_iface)
 {
 	/* first deactivate interface */
-	if (batman_if->if_status != IF_NOT_IN_USE)
-		hardif_disable_interface(batman_if);
+	if (hard_iface->if_status != IF_NOT_IN_USE)
+		hardif_disable_interface(hard_iface);
 
-	if (batman_if->if_status != IF_NOT_IN_USE)
+	if (hard_iface->if_status != IF_NOT_IN_USE)
 		return;
 
-	batman_if->if_status = IF_TO_BE_REMOVED;
-	sysfs_del_hardif(&batman_if->hardif_obj);
-	call_rcu(&batman_if->rcu, hardif_free_rcu);
+	hard_iface->if_status = IF_TO_BE_REMOVED;
+	sysfs_del_hardif(&hard_iface->hardif_obj);
+	hardif_free_ref(hard_iface);
 }
 
 void hardif_remove_interfaces(void)
 {
-	struct batman_if *batman_if, *batman_if_tmp;
+	struct hard_iface *hard_iface, *hard_iface_tmp;
 	struct list_head if_queue;
 
 	INIT_LIST_HEAD(&if_queue);
 
-	spin_lock(&if_list_lock);
-	list_for_each_entry_safe(batman_if, batman_if_tmp, &if_list, list) {
-		list_del_rcu(&batman_if->list);
-		list_add_tail(&batman_if->list, &if_queue);
+	spin_lock(&hardif_list_lock);
+	list_for_each_entry_safe(hard_iface, hard_iface_tmp,
+				 &hardif_list, list) {
+		list_del_rcu(&hard_iface->list);
+		list_add_tail(&hard_iface->list, &if_queue);
 	}
-	spin_unlock(&if_list_lock);
+	spin_unlock(&hardif_list_lock);
 
 	rtnl_lock();
-	list_for_each_entry_safe(batman_if, batman_if_tmp, &if_queue, list) {
-		hardif_remove_interface(batman_if);
+	list_for_each_entry_safe(hard_iface, hard_iface_tmp, &if_queue, list) {
+		hardif_remove_interface(hard_iface);
 	}
 	rtnl_unlock();
 }
@@ -498,43 +513,43 @@ static int hard_if_event(struct notifier_block *this,
 			 unsigned long event, void *ptr)
 {
 	struct net_device *net_dev = (struct net_device *)ptr;
-	struct batman_if *batman_if = get_batman_if_by_netdev(net_dev);
+	struct hard_iface *hard_iface = hardif_get_by_netdev(net_dev);
 	struct bat_priv *bat_priv;
 
-	if (!batman_if && event == NETDEV_REGISTER)
-		batman_if = hardif_add_interface(net_dev);
+	if (!hard_iface && event == NETDEV_REGISTER)
+		hard_iface = hardif_add_interface(net_dev);
 
-	if (!batman_if)
+	if (!hard_iface)
 		goto out;
 
 	switch (event) {
 	case NETDEV_UP:
-		hardif_activate_interface(batman_if);
+		hardif_activate_interface(hard_iface);
 		break;
 	case NETDEV_GOING_DOWN:
 	case NETDEV_DOWN:
-		hardif_deactivate_interface(batman_if);
+		hardif_deactivate_interface(hard_iface);
 		break;
 	case NETDEV_UNREGISTER:
-		spin_lock(&if_list_lock);
-		list_del_rcu(&batman_if->list);
-		spin_unlock(&if_list_lock);
+		spin_lock(&hardif_list_lock);
+		list_del_rcu(&hard_iface->list);
+		spin_unlock(&hardif_list_lock);
 
-		hardif_remove_interface(batman_if);
+		hardif_remove_interface(hard_iface);
 		break;
 	case NETDEV_CHANGEMTU:
-		if (batman_if->soft_iface)
-			update_min_mtu(batman_if->soft_iface);
+		if (hard_iface->soft_iface)
+			update_min_mtu(hard_iface->soft_iface);
 		break;
 	case NETDEV_CHANGEADDR:
-		if (batman_if->if_status == IF_NOT_IN_USE)
+		if (hard_iface->if_status == IF_NOT_IN_USE)
 			goto hardif_put;
 
-		check_known_mac_addr(batman_if->net_dev);
-		update_mac_addresses(batman_if);
+		check_known_mac_addr(hard_iface->net_dev);
+		update_mac_addresses(hard_iface);
 
-		bat_priv = netdev_priv(batman_if->soft_iface);
-		if (batman_if == bat_priv->primary_if)
+		bat_priv = netdev_priv(hard_iface->soft_iface);
+		if (hard_iface == bat_priv->primary_if)
 			update_primary_addr(bat_priv);
 		break;
 	default:
@@ -542,22 +557,23 @@ static int hard_if_event(struct notifier_block *this,
 	};
 
 hardif_put:
-	kref_put(&batman_if->refcount, hardif_free_ref);
+	hardif_free_ref(hard_iface);
 out:
 	return NOTIFY_DONE;
 }
 
 /* receive a packet with the batman ethertype coming on a hard
  * interface */
-int batman_skb_recv(struct sk_buff *skb, struct net_device *dev,
-	struct packet_type *ptype, struct net_device *orig_dev)
+static int batman_skb_recv(struct sk_buff *skb, struct net_device *dev,
+			   struct packet_type *ptype,
+			   struct net_device *orig_dev)
 {
 	struct bat_priv *bat_priv;
 	struct batman_packet *batman_packet;
-	struct batman_if *batman_if;
+	struct hard_iface *hard_iface;
 	int ret;
 
-	batman_if = container_of(ptype, struct batman_if, batman_adv_ptype);
+	hard_iface = container_of(ptype, struct hard_iface, batman_adv_ptype);
 	skb = skb_share_check(skb, GFP_ATOMIC);
 
 	/* skb was released by skb_share_check() */
@@ -573,16 +589,16 @@ int batman_skb_recv(struct sk_buff *skb, struct net_device *dev,
 				|| !skb_mac_header(skb)))
 		goto err_free;
 
-	if (!batman_if->soft_iface)
+	if (!hard_iface->soft_iface)
 		goto err_free;
 
-	bat_priv = netdev_priv(batman_if->soft_iface);
+	bat_priv = netdev_priv(hard_iface->soft_iface);
 
 	if (atomic_read(&bat_priv->mesh_state) != MESH_ACTIVE)
 		goto err_free;
 
 	/* discard frames on not active interfaces */
-	if (batman_if->if_status != IF_ACTIVE)
+	if (hard_iface->if_status != IF_ACTIVE)
 		goto err_free;
 
 	batman_packet = (struct batman_packet *)skb->data;
@@ -600,32 +616,32 @@ int batman_skb_recv(struct sk_buff *skb, struct net_device *dev,
 	switch (batman_packet->packet_type) {
 		/* batman originator packet */
 	case BAT_PACKET:
-		ret = recv_bat_packet(skb, batman_if);
+		ret = recv_bat_packet(skb, hard_iface);
 		break;
 
 		/* batman icmp packet */
 	case BAT_ICMP:
-		ret = recv_icmp_packet(skb, batman_if);
+		ret = recv_icmp_packet(skb, hard_iface);
 		break;
 
 		/* unicast packet */
 	case BAT_UNICAST:
-		ret = recv_unicast_packet(skb, batman_if);
+		ret = recv_unicast_packet(skb, hard_iface);
 		break;
 
 		/* fragmented unicast packet */
 	case BAT_UNICAST_FRAG:
-		ret = recv_ucast_frag_packet(skb, batman_if);
+		ret = recv_ucast_frag_packet(skb, hard_iface);
 		break;
 
 		/* broadcast packet */
 	case BAT_BCAST:
-		ret = recv_bcast_packet(skb, batman_if);
+		ret = recv_bcast_packet(skb, hard_iface);
 		break;
 
 		/* vis packet */
 	case BAT_VIS:
-		ret = recv_vis_packet(skb, batman_if);
+		ret = recv_vis_packet(skb, hard_iface);
 		break;
 	default:
 		ret = NET_RX_DROP;
diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h
index 30ec3b8..a9ddf36 100644
--- a/net/batman-adv/hard-interface.h
+++ b/net/batman-adv/hard-interface.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors:
+ * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
@@ -31,23 +31,18 @@
 
 extern struct notifier_block hard_if_notifier;
 
-struct batman_if *get_batman_if_by_netdev(struct net_device *net_dev);
-int hardif_enable_interface(struct batman_if *batman_if, char *iface_name);
-void hardif_disable_interface(struct batman_if *batman_if);
+struct hard_iface *hardif_get_by_netdev(struct net_device *net_dev);
+int hardif_enable_interface(struct hard_iface *hard_iface, char *iface_name);
+void hardif_disable_interface(struct hard_iface *hard_iface);
 void hardif_remove_interfaces(void);
-int batman_skb_recv(struct sk_buff *skb,
-				struct net_device *dev,
-				struct packet_type *ptype,
-				struct net_device *orig_dev);
 int hardif_min_mtu(struct net_device *soft_iface);
 void update_min_mtu(struct net_device *soft_iface);
+void hardif_free_rcu(struct rcu_head *rcu);
 
-static inline void hardif_free_ref(struct kref *refcount)
+static inline void hardif_free_ref(struct hard_iface *hard_iface)
 {
-	struct batman_if *batman_if;
-
-	batman_if = container_of(refcount, struct batman_if, refcount);
-	kfree(batman_if);
+	if (atomic_dec_and_test(&hard_iface->refcount))
+		call_rcu(&hard_iface->rcu, hardif_free_rcu);
 }
 
 #endif /* _NET_BATMAN_ADV_HARD_INTERFACE_H_ */
diff --git a/net/batman-adv/hash.c b/net/batman-adv/hash.c
index 26e623e..c5213d8 100644
--- a/net/batman-adv/hash.c
+++ b/net/batman-adv/hash.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2006-2010 B.A.T.M.A.N. contributors:
+ * Copyright (C) 2006-2011 B.A.T.M.A.N. contributors:
  *
  * Simon Wunderlich, Marek Lindner
  *
@@ -27,13 +27,16 @@ static void hash_init(struct hashtable_t *hash)
 {
 	int i;
 
-	for (i = 0 ; i < hash->size; i++)
+	for (i = 0 ; i < hash->size; i++) {
 		INIT_HLIST_HEAD(&hash->table[i]);
+		spin_lock_init(&hash->list_locks[i]);
+	}
 }
 
 /* free only the hashtable and the hash itself. */
 void hash_destroy(struct hashtable_t *hash)
 {
+	kfree(hash->list_locks);
 	kfree(hash->table);
 	kfree(hash);
 }
@@ -43,20 +46,25 @@ struct hashtable_t *hash_new(int size)
 {
 	struct hashtable_t *hash;
 
-	hash = kmalloc(sizeof(struct hashtable_t) , GFP_ATOMIC);
-
+	hash = kmalloc(sizeof(struct hashtable_t), GFP_ATOMIC);
 	if (!hash)
 		return NULL;
 
-	hash->size = size;
 	hash->table = kmalloc(sizeof(struct element_t *) * size, GFP_ATOMIC);
+	if (!hash->table)
+		goto free_hash;
 
-	if (!hash->table) {
-		kfree(hash);
-		return NULL;
-	}
+	hash->list_locks = kmalloc(sizeof(spinlock_t) * size, GFP_ATOMIC);
+	if (!hash->list_locks)
+		goto free_table;
 
+	hash->size = size;
 	hash_init(hash);
-
 	return hash;
+
+free_table:
+	kfree(hash->table);
+free_hash:
+	kfree(hash);
+	return NULL;
 }
diff --git a/net/batman-adv/hash.h b/net/batman-adv/hash.h
index 09216ad..434822b 100644
--- a/net/batman-adv/hash.h
+++ b/net/batman-adv/hash.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2006-2010 B.A.T.M.A.N. contributors:
+ * Copyright (C) 2006-2011 B.A.T.M.A.N. contributors:
  *
  * Simon Wunderlich, Marek Lindner
  *
@@ -28,32 +28,23 @@
  * compare 2 element datas for their keys,
  * return 0 if same and not 0 if not
  * same */
-typedef int (*hashdata_compare_cb)(void *, void *);
+typedef int (*hashdata_compare_cb)(struct hlist_node *, void *);
 
 /* the hashfunction, should return an index
  * based on the key in the data of the first
  * argument and the size the second */
 typedef int (*hashdata_choose_cb)(void *, int);
-typedef void (*hashdata_free_cb)(void *, void *);
-
-struct element_t {
-	void *data;		/* pointer to the data */
-	struct hlist_node hlist;	/* bucket list pointer */
-};
+typedef void (*hashdata_free_cb)(struct hlist_node *, void *);
 
 struct hashtable_t {
-	struct hlist_head *table;   /* the hashtable itself, with the buckets */
+	struct hlist_head *table;   /* the hashtable itself with the buckets */
+	spinlock_t *list_locks;     /* spinlock for each hash list entry */
 	int size;		    /* size of hashtable */
 };
 
 /* allocates and clears the hash */
 struct hashtable_t *hash_new(int size);
 
-/* remove element if you already found the element you want to delete and don't
- * need the overhead to find it again with hash_remove().  But usually, you
- * don't want to use this function, as it fiddles with hash-internals. */
-void *hash_remove_element(struct hashtable_t *hash, struct element_t *elem);
-
 /* free only the hashtable and the hash itself. */
 void hash_destroy(struct hashtable_t *hash);
 
@@ -64,21 +55,22 @@ static inline void hash_delete(struct hashtable_t *hash,
 			       hashdata_free_cb free_cb, void *arg)
 {
 	struct hlist_head *head;
-	struct hlist_node *walk, *safe;
-	struct element_t *bucket;
+	struct hlist_node *node, *node_tmp;
+	spinlock_t *list_lock; /* spinlock to protect write access */
 	int i;
 
 	for (i = 0; i < hash->size; i++) {
 		head = &hash->table[i];
+		list_lock = &hash->list_locks[i];
 
-		hlist_for_each_safe(walk, safe, head) {
-			bucket = hlist_entry(walk, struct element_t, hlist);
-			if (free_cb)
-				free_cb(bucket->data, arg);
+		spin_lock_bh(list_lock);
+		hlist_for_each_safe(node, node_tmp, head) {
+			hlist_del_rcu(node);
 
-			hlist_del(walk);
-			kfree(bucket);
+			if (free_cb)
+				free_cb(node, arg);
 		}
+		spin_unlock_bh(list_lock);
 	}
 
 	hash_destroy(hash);
@@ -87,35 +79,41 @@ static inline void hash_delete(struct hashtable_t *hash,
 /* adds data to the hashtable. returns 0 on success, -1 on error */
 static inline int hash_add(struct hashtable_t *hash,
 			   hashdata_compare_cb compare,
-			   hashdata_choose_cb choose, void *data)
+			   hashdata_choose_cb choose,
+			   void *data, struct hlist_node *data_node)
 {
 	int index;
 	struct hlist_head *head;
-	struct hlist_node *walk, *safe;
-	struct element_t *bucket;
+	struct hlist_node *node;
+	spinlock_t *list_lock; /* spinlock to protect write access */
 
 	if (!hash)
-		return -1;
+		goto err;
 
 	index = choose(data, hash->size);
 	head = &hash->table[index];
+	list_lock = &hash->list_locks[index];
+
+	rcu_read_lock();
+	__hlist_for_each_rcu(node, head) {
+		if (!compare(node, data))
+			continue;
 
-	hlist_for_each_safe(walk, safe, head) {
-		bucket = hlist_entry(walk, struct element_t, hlist);
-		if (compare(bucket->data, data))
-			return -1;
+		goto err_unlock;
 	}
+	rcu_read_unlock();
 
 	/* no duplicate found in list, add new element */
-	bucket = kmalloc(sizeof(struct element_t), GFP_ATOMIC);
-
-	if (!bucket)
-		return -1;
-
-	bucket->data = data;
-	hlist_add_head(&bucket->hlist, head);
+	spin_lock_bh(list_lock);
+	hlist_add_head_rcu(data_node, head);
+	spin_unlock_bh(list_lock);
 
 	return 0;
+
+err_unlock:
+	rcu_read_unlock();
+err:
+	return -1;
 }
 
 /* removes data from hash, if found. returns pointer do data on success, so you
@@ -127,50 +125,25 @@ static inline void *hash_remove(struct hashtable_t *hash,
 				hashdata_choose_cb choose, void *data)
 {
 	size_t index;
-	struct hlist_node *walk;
-	struct element_t *bucket;
+	struct hlist_node *node;
 	struct hlist_head *head;
-	void *data_save;
+	void *data_save = NULL;
 
 	index = choose(data, hash->size);
 	head = &hash->table[index];
 
-	hlist_for_each_entry(bucket, walk, head, hlist) {
-		if (compare(bucket->data, data)) {
-			data_save = bucket->data;
-			hlist_del(walk);
-			kfree(bucket);
-			return data_save;
-		}
-	}
-
-	return NULL;
-}
-
-/* finds data, based on the key in keydata. returns the found data on success,
- * or NULL on error */
-static inline void *hash_find(struct hashtable_t *hash,
-			      hashdata_compare_cb compare,
-			      hashdata_choose_cb choose, void *keydata)
-{
-	int index;
-	struct hlist_head *head;
-	struct hlist_node *walk;
-	struct element_t *bucket;
-
-	if (!hash)
-		return NULL;
-
-	index = choose(keydata , hash->size);
-	head = &hash->table[index];
+	spin_lock_bh(&hash->list_locks[index]);
+	hlist_for_each(node, head) {
+		if (!compare(node, data))
+			continue;
 
-	hlist_for_each(walk, head) {
-		bucket = hlist_entry(walk, struct element_t, hlist);
-		if (compare(bucket->data, keydata))
-			return bucket->data;
+		data_save = node;
+		hlist_del_rcu(node);
+		break;
 	}
+	spin_unlock_bh(&hash->list_locks[index]);
 
-	return NULL;
+	return data_save;
 }
 
 #endif /* _NET_BATMAN_ADV_HASH_H_ */
diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c
index ecf6d7f..34ce56c 100644
--- a/net/batman-adv/icmp_socket.c
+++ b/net/batman-adv/icmp_socket.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors:
+ * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
@@ -24,7 +24,6 @@
 #include <linux/slab.h>
 #include "icmp_socket.h"
 #include "send.h"
-#include "types.h"
 #include "hash.h"
 #include "originator.h"
 #include "hard-interface.h"
@@ -157,10 +156,9 @@ static ssize_t bat_socket_write(struct file *file, const char __user *buff,
 	struct sk_buff *skb;
 	struct icmp_packet_rr *icmp_packet;
 
-	struct orig_node *orig_node;
-	struct batman_if *batman_if;
+	struct orig_node *orig_node = NULL;
+	struct neigh_node *neigh_node = NULL;
 	size_t packet_len = sizeof(struct icmp_packet);
-	uint8_t dstaddr[ETH_ALEN];
 
 	if (len < sizeof(struct icmp_packet)) {
 		bat_dbg(DBG_BATMAN, bat_priv,
@@ -220,47 +218,52 @@ static ssize_t bat_socket_write(struct file *file, const char __user *buff,
 	if (atomic_read(&bat_priv->mesh_state) != MESH_ACTIVE)
 		goto dst_unreach;
 
-	spin_lock_bh(&bat_priv->orig_hash_lock);
-	orig_node = ((struct orig_node *)hash_find(bat_priv->orig_hash,
-						   compare_orig, choose_orig,
-						   icmp_packet->dst));
+	rcu_read_lock();
+	orig_node = orig_hash_find(bat_priv, icmp_packet->dst);
 
 	if (!orig_node)
 		goto unlock;
 
-	if (!orig_node->router)
+	neigh_node = orig_node->router;
+
+	if (!neigh_node)
 		goto unlock;
 
-	batman_if = orig_node->router->if_incoming;
-	memcpy(dstaddr, orig_node->router->addr, ETH_ALEN);
+	if (!atomic_inc_not_zero(&neigh_node->refcount)) {
+		neigh_node = NULL;
+		goto unlock;
+	}
 
-	spin_unlock_bh(&bat_priv->orig_hash_lock);
+	rcu_read_unlock();
 
-	if (!batman_if)
+	if (!neigh_node->if_incoming)
 		goto dst_unreach;
 
-	if (batman_if->if_status != IF_ACTIVE)
+	if (neigh_node->if_incoming->if_status != IF_ACTIVE)
 		goto dst_unreach;
 
 	memcpy(icmp_packet->orig,
 	       bat_priv->primary_if->net_dev->dev_addr, ETH_ALEN);
 
 	if (packet_len == sizeof(struct icmp_packet_rr))
-		memcpy(icmp_packet->rr, batman_if->net_dev->dev_addr, ETH_ALEN);
-
-
-	send_skb_packet(skb, batman_if, dstaddr);
+		memcpy(icmp_packet->rr,
+		       neigh_node->if_incoming->net_dev->dev_addr, ETH_ALEN);
 
+	send_skb_packet(skb, neigh_node->if_incoming, neigh_node->addr);
 	goto out;
 
 unlock:
-	spin_unlock_bh(&bat_priv->orig_hash_lock);
+	rcu_read_unlock();
 dst_unreach:
 	icmp_packet->msg_type = DESTINATION_UNREACHABLE;
 	bat_socket_add_packet(socket_client, icmp_packet, packet_len);
 free_skb:
 	kfree_skb(skb);
 out:
+	if (neigh_node)
+		neigh_node_free_ref(neigh_node);
+	if (orig_node)
+		orig_node_free_ref(orig_node);
 	return len;
 }
 
diff --git a/net/batman-adv/icmp_socket.h b/net/batman-adv/icmp_socket.h
index bf9b348..462b190 100644
--- a/net/batman-adv/icmp_socket.h
+++ b/net/batman-adv/icmp_socket.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors:
+ * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
@@ -22,8 +22,6 @@
 #ifndef _NET_BATMAN_ADV_ICMP_SOCKET_H_
 #define _NET_BATMAN_ADV_ICMP_SOCKET_H_
 
-#include "types.h"
-
 #define ICMP_SOCKET "socket"
 
 void bat_socket_init(void);
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index b827f6a..709b33b 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors:
+ * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
@@ -30,11 +30,10 @@
 #include "translation-table.h"
 #include "hard-interface.h"
 #include "gateway_client.h"
-#include "types.h"
 #include "vis.h"
 #include "hash.h"
 
-struct list_head if_list;
+struct list_head hardif_list;
 
 unsigned char broadcast_addr[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
 
@@ -42,7 +41,7 @@ struct workqueue_struct *bat_event_workqueue;
 
 static int __init batman_init(void)
 {
-	INIT_LIST_HEAD(&if_list);
+	INIT_LIST_HEAD(&hardif_list);
 
 	/* the name should not be longer than 10 chars - see
 	 * http://lwn.net/Articles/23634/ */
@@ -80,7 +79,6 @@ int mesh_init(struct net_device *soft_iface)
 {
 	struct bat_priv *bat_priv = netdev_priv(soft_iface);
 
-	spin_lock_init(&bat_priv->orig_hash_lock);
 	spin_lock_init(&bat_priv->forw_bat_list_lock);
 	spin_lock_init(&bat_priv->forw_bcast_list_lock);
 	spin_lock_init(&bat_priv->hna_lhash_lock);
@@ -155,14 +153,14 @@ void dec_module_count(void)
 
 int is_my_mac(uint8_t *addr)
 {
-	struct batman_if *batman_if;
+	struct hard_iface *hard_iface;
 
 	rcu_read_lock();
-	list_for_each_entry_rcu(batman_if, &if_list, list) {
-		if (batman_if->if_status != IF_ACTIVE)
+	list_for_each_entry_rcu(hard_iface, &hardif_list, list) {
+		if (hard_iface->if_status != IF_ACTIVE)
 			continue;
 
-		if (compare_orig(batman_if->net_dev->dev_addr, addr)) {
+		if (compare_eth(hard_iface->net_dev->dev_addr, addr)) {
 			rcu_read_unlock();
 			return 1;
 		}
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 65106fb..dc24869 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors:
+ * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
@@ -22,9 +22,6 @@
 #ifndef _NET_BATMAN_ADV_MAIN_H_
 #define _NET_BATMAN_ADV_MAIN_H_
 
-/* Kernel Programming */
-#define LINUX
-
 #define DRIVER_AUTHOR "Marek Lindner <lindner_marek@yahoo.de>, " \
 		      "Simon Wunderlich <siwu@hrz.tu-chemnitz.de>"
 #define DRIVER_DESC   "B.A.T.M.A.N. advanced"
@@ -54,7 +51,6 @@
 
 #define NUM_WORDS (TQ_LOCAL_WINDOW_SIZE / WORD_BIT_SIZE)
 
-#define PACKBUFF_SIZE 2000
 #define LOG_BUF_LEN 8192	  /* has to be a power of 2 */
 
 #define VIS_INTERVAL 5000	/* 5 seconds */
@@ -96,15 +92,11 @@
 #define DBG_ROUTES 2	/* route or hna added / changed / deleted */
 #define DBG_ALL 3
 
-#define LOG_BUF_LEN 8192          /* has to be a power of 2 */
-
 
 /*
  *  Vis
  */
 
-/* #define VIS_SUBCLUSTERS_DISABLED */
-
 /*
  * Kernel headers
  */
@@ -130,7 +122,7 @@
 #define REVISION_VERSION_STR " "REVISION_VERSION
 #endif
 
-extern struct list_head if_list;
+extern struct list_head hardif_list;
 
 extern unsigned char broadcast_addr[];
 extern struct workqueue_struct *bat_event_workqueue;
@@ -158,13 +150,6 @@ static inline void bat_dbg(char type __always_unused,
 }
 #endif
 
-#define bat_warning(net_dev, fmt, arg...)				\
-	do {								\
-		struct net_device *_netdev = (net_dev);                 \
-		struct bat_priv *_batpriv = netdev_priv(_netdev);       \
-		bat_dbg(DBG_ALL, _batpriv, fmt, ## arg);		\
-		pr_warning("%s: " fmt, _netdev->name, ## arg);		\
-	} while (0)
 #define bat_info(net_dev, fmt, arg...)					\
 	do {								\
 		struct net_device *_netdev = (net_dev);                 \
@@ -180,4 +165,14 @@ static inline void bat_dbg(char type __always_unused,
 		pr_err("%s: " fmt, _netdev->name, ## arg);		\
 	} while (0)
 
+/**
+ * returns 1 if they are the same ethernet addr
+ *
+ * note: can't use compare_ether_addr() as it requires aligned memory
+ */
+static inline int compare_eth(void *data1, void *data2)
+{
+	return (memcmp(data1, data2, ETH_ALEN) == 0 ? 1 : 0);
+}
+
 #endif /* _NET_BATMAN_ADV_MAIN_H_ */
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index 6b7fb6b..0b91330 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2009-2010 B.A.T.M.A.N. contributors:
+ * Copyright (C) 2009-2011 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
@@ -44,24 +44,36 @@ int originator_init(struct bat_priv *bat_priv)
 	if (bat_priv->orig_hash)
 		return 1;
 
-	spin_lock_bh(&bat_priv->orig_hash_lock);
 	bat_priv->orig_hash = hash_new(1024);
 
 	if (!bat_priv->orig_hash)
 		goto err;
 
-	spin_unlock_bh(&bat_priv->orig_hash_lock);
 	start_purge_timer(bat_priv);
 	return 1;
 
 err:
-	spin_unlock_bh(&bat_priv->orig_hash_lock);
 	return 0;
 }
 
-struct neigh_node *
-create_neighbor(struct orig_node *orig_node, struct orig_node *orig_neigh_node,
-		uint8_t *neigh, struct batman_if *if_incoming)
+static void neigh_node_free_rcu(struct rcu_head *rcu)
+{
+	struct neigh_node *neigh_node;
+
+	neigh_node = container_of(rcu, struct neigh_node, rcu);
+	kfree(neigh_node);
+}
+
+void neigh_node_free_ref(struct neigh_node *neigh_node)
+{
+	if (atomic_dec_and_test(&neigh_node->refcount))
+		call_rcu(&neigh_node->rcu, neigh_node_free_rcu);
+}
+
+struct neigh_node *create_neighbor(struct orig_node *orig_node,
+				   struct orig_node *orig_neigh_node,
+				   uint8_t *neigh,
+				   struct hard_iface *if_incoming)
 {
 	struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
 	struct neigh_node *neigh_node;
@@ -73,50 +85,94 @@ create_neighbor(struct orig_node *orig_node, struct orig_node *orig_neigh_node,
 	if (!neigh_node)
 		return NULL;
 
-	INIT_LIST_HEAD(&neigh_node->list);
+	INIT_HLIST_NODE(&neigh_node->list);
+	INIT_LIST_HEAD(&neigh_node->bonding_list);
 
 	memcpy(neigh_node->addr, neigh, ETH_ALEN);
 	neigh_node->orig_node = orig_neigh_node;
 	neigh_node->if_incoming = if_incoming;
 
-	list_add_tail(&neigh_node->list, &orig_node->neigh_list);
+	/* extra reference for return */
+	atomic_set(&neigh_node->refcount, 2);
+
+	spin_lock_bh(&orig_node->neigh_list_lock);
+	hlist_add_head_rcu(&neigh_node->list, &orig_node->neigh_list);
+	spin_unlock_bh(&orig_node->neigh_list_lock);
 	return neigh_node;
 }
 
-static void free_orig_node(void *data, void *arg)
+static void orig_node_free_rcu(struct rcu_head *rcu)
 {
-	struct list_head *list_pos, *list_pos_tmp;
-	struct neigh_node *neigh_node;
-	struct orig_node *orig_node = (struct orig_node *)data;
-	struct bat_priv *bat_priv = (struct bat_priv *)arg;
+	struct hlist_node *node, *node_tmp;
+	struct neigh_node *neigh_node, *tmp_neigh_node;
+	struct orig_node *orig_node;
 
-	/* for all neighbors towards this originator ... */
-	list_for_each_safe(list_pos, list_pos_tmp, &orig_node->neigh_list) {
-		neigh_node = list_entry(list_pos, struct neigh_node, list);
+	orig_node = container_of(rcu, struct orig_node, rcu);
+
+	spin_lock_bh(&orig_node->neigh_list_lock);
+
+	/* for all bonding members ... */
+	list_for_each_entry_safe(neigh_node, tmp_neigh_node,
+				 &orig_node->bond_list, bonding_list) {
+		list_del_rcu(&neigh_node->bonding_list);
+		neigh_node_free_ref(neigh_node);
+	}
 
-		list_del(list_pos);
-		kfree(neigh_node);
+	/* for all neighbors towards this originator ... */
+	hlist_for_each_entry_safe(neigh_node, node, node_tmp,
+				  &orig_node->neigh_list, list) {
+		hlist_del_rcu(&neigh_node->list);
+		neigh_node_free_ref(neigh_node);
 	}
 
+	spin_unlock_bh(&orig_node->neigh_list_lock);
+
 	frag_list_free(&orig_node->frag_list);
-	hna_global_del_orig(bat_priv, orig_node, "originator timed out");
+	hna_global_del_orig(orig_node->bat_priv, orig_node,
+			    "originator timed out");
 
 	kfree(orig_node->bcast_own);
 	kfree(orig_node->bcast_own_sum);
 	kfree(orig_node);
 }
 
+void orig_node_free_ref(struct orig_node *orig_node)
+{
+	if (atomic_dec_and_test(&orig_node->refcount))
+		call_rcu(&orig_node->rcu, orig_node_free_rcu);
+}
+
 void originator_free(struct bat_priv *bat_priv)
 {
-	if (!bat_priv->orig_hash)
+	struct hashtable_t *hash = bat_priv->orig_hash;
+	struct hlist_node *node, *node_tmp;
+	struct hlist_head *head;
+	spinlock_t *list_lock; /* spinlock to protect write access */
+	struct orig_node *orig_node;
+	int i;
+
+	if (!hash)
 		return;
 
 	cancel_delayed_work_sync(&bat_priv->orig_work);
 
-	spin_lock_bh(&bat_priv->orig_hash_lock);
-	hash_delete(bat_priv->orig_hash, free_orig_node, bat_priv);
 	bat_priv->orig_hash = NULL;
-	spin_unlock_bh(&bat_priv->orig_hash_lock);
+
+	for (i = 0; i < hash->size; i++) {
+		head = &hash->table[i];
+		list_lock = &hash->list_locks[i];
+
+		spin_lock_bh(list_lock);
+		hlist_for_each_entry_safe(orig_node, node, node_tmp,
+					  head, hash_entry) {
+
+			hlist_del_rcu(node);
+			orig_node_free_ref(orig_node);
+		}
+		spin_unlock_bh(list_lock);
+	}
+
+	hash_destroy(hash);
 }
 
 /* this function finds or creates an originator entry for the given
@@ -127,10 +183,7 @@ struct orig_node *get_orig_node(struct bat_priv *bat_priv, uint8_t *addr)
 	int size;
 	int hash_added;
 
-	orig_node = ((struct orig_node *)hash_find(bat_priv->orig_hash,
-						   compare_orig, choose_orig,
-						   addr));
-
+	orig_node = orig_hash_find(bat_priv, addr);
 	if (orig_node)
 		return orig_node;
 
@@ -141,8 +194,16 @@ struct orig_node *get_orig_node(struct bat_priv *bat_priv, uint8_t *addr)
 	if (!orig_node)
 		return NULL;
 
-	INIT_LIST_HEAD(&orig_node->neigh_list);
+	INIT_HLIST_HEAD(&orig_node->neigh_list);
+	INIT_LIST_HEAD(&orig_node->bond_list);
+	spin_lock_init(&orig_node->ogm_cnt_lock);
+	spin_lock_init(&orig_node->bcast_seqno_lock);
+	spin_lock_init(&orig_node->neigh_list_lock);
+
+	/* extra reference for return */
+	atomic_set(&orig_node->refcount, 2);
 
+	orig_node->bat_priv = bat_priv;
 	memcpy(orig_node->orig, addr, ETH_ALEN);
 	orig_node->router = NULL;
 	orig_node->hna_buff = NULL;
@@ -151,6 +212,8 @@ struct orig_node *get_orig_node(struct bat_priv *bat_priv, uint8_t *addr)
 	orig_node->batman_seqno_reset = jiffies - 1
 					- msecs_to_jiffies(RESET_PROTECTION_MS);
 
+	atomic_set(&orig_node->bond_candidates, 0);
+
 	size = bat_priv->num_ifaces * sizeof(unsigned long) * NUM_WORDS;
 
 	orig_node->bcast_own = kzalloc(size, GFP_ATOMIC);
@@ -166,8 +229,8 @@ struct orig_node *get_orig_node(struct bat_priv *bat_priv, uint8_t *addr)
 	if (!orig_node->bcast_own_sum)
 		goto free_bcast_own;
 
-	hash_added = hash_add(bat_priv->orig_hash, compare_orig, choose_orig,
-			      orig_node);
+	hash_added = hash_add(bat_priv->orig_hash, compare_orig,
+			      choose_orig, orig_node, &orig_node->hash_entry);
 	if (hash_added < 0)
 		goto free_bcast_own_sum;
 
@@ -185,23 +248,30 @@ static bool purge_orig_neighbors(struct bat_priv *bat_priv,
 				 struct orig_node *orig_node,
 				 struct neigh_node **best_neigh_node)
 {
-	struct list_head *list_pos, *list_pos_tmp;
+	struct hlist_node *node, *node_tmp;
 	struct neigh_node *neigh_node;
 	bool neigh_purged = false;
 
 	*best_neigh_node = NULL;
 
+	spin_lock_bh(&orig_node->neigh_list_lock);
+
 	/* for all neighbors towards this originator ... */
-	list_for_each_safe(list_pos, list_pos_tmp, &orig_node->neigh_list) {
-		neigh_node = list_entry(list_pos, struct neigh_node, list);
+	hlist_for_each_entry_safe(neigh_node, node, node_tmp,
+				  &orig_node->neigh_list, list) {
 
 		if ((time_after(jiffies,
 			neigh_node->last_valid + PURGE_TIMEOUT * HZ)) ||
 		    (neigh_node->if_incoming->if_status == IF_INACTIVE) ||
+		    (neigh_node->if_incoming->if_status == IF_NOT_IN_USE) ||
 		    (neigh_node->if_incoming->if_status == IF_TO_BE_REMOVED)) {
 
-			if (neigh_node->if_incoming->if_status ==
-							IF_TO_BE_REMOVED)
+			if ((neigh_node->if_incoming->if_status ==
+								IF_INACTIVE) ||
+			    (neigh_node->if_incoming->if_status ==
+							IF_NOT_IN_USE) ||
+			    (neigh_node->if_incoming->if_status ==
+							IF_TO_BE_REMOVED))
 				bat_dbg(DBG_BATMAN, bat_priv,
 					"neighbor purge: originator %pM, "
 					"neighbor: %pM, iface: %s\n",
@@ -215,14 +285,18 @@ static bool purge_orig_neighbors(struct bat_priv *bat_priv,
 					(neigh_node->last_valid / HZ));
 
 			neigh_purged = true;
-			list_del(list_pos);
-			kfree(neigh_node);
+
+			hlist_del_rcu(&neigh_node->list);
+			bonding_candidate_del(orig_node, neigh_node);
+			neigh_node_free_ref(neigh_node);
 		} else {
 			if ((!*best_neigh_node) ||
 			    (neigh_node->tq_avg > (*best_neigh_node)->tq_avg))
 				*best_neigh_node = neigh_node;
 		}
 	}
+
+	spin_unlock_bh(&orig_node->neigh_list_lock);
 	return neigh_purged;
 }
 
@@ -245,9 +319,6 @@ static bool purge_orig_node(struct bat_priv *bat_priv,
 				      best_neigh_node,
 				      orig_node->hna_buff,
 				      orig_node->hna_buff_len);
-			/* update bonding candidates, we could have lost
-			 * some candidates. */
-			update_bonding_candidates(bat_priv, orig_node);
 		}
 	}
 
@@ -257,40 +328,38 @@ static bool purge_orig_node(struct bat_priv *bat_priv,
 static void _purge_orig(struct bat_priv *bat_priv)
 {
 	struct hashtable_t *hash = bat_priv->orig_hash;
-	struct hlist_node *walk, *safe;
+	struct hlist_node *node, *node_tmp;
 	struct hlist_head *head;
-	struct element_t *bucket;
+	spinlock_t *list_lock; /* spinlock to protect write access */
 	struct orig_node *orig_node;
 	int i;
 
 	if (!hash)
 		return;
 
-	spin_lock_bh(&bat_priv->orig_hash_lock);
-
 	/* for all origins... */
 	for (i = 0; i < hash->size; i++) {
 		head = &hash->table[i];
+		list_lock = &hash->list_locks[i];
 
-		hlist_for_each_entry_safe(bucket, walk, safe, head, hlist) {
-			orig_node = bucket->data;
-
+		spin_lock_bh(list_lock);
+		hlist_for_each_entry_safe(orig_node, node, node_tmp,
+					  head, hash_entry) {
 			if (purge_orig_node(bat_priv, orig_node)) {
 				if (orig_node->gw_flags)
 					gw_node_delete(bat_priv, orig_node);
-				hlist_del(walk);
-				kfree(bucket);
-				free_orig_node(orig_node, bat_priv);
+				hlist_del_rcu(node);
+				orig_node_free_ref(orig_node);
+				continue;
 			}
 
 			if (time_after(jiffies, orig_node->last_frag_packet +
 						msecs_to_jiffies(FRAG_TIMEOUT)))
 				frag_list_free(&orig_node->frag_list);
 		}
+		spin_unlock_bh(list_lock);
 	}
 
-	spin_unlock_bh(&bat_priv->orig_hash_lock);
-
 	gw_node_purge(bat_priv);
 	gw_election(bat_priv);
 
@@ -318,9 +387,8 @@ int orig_seq_print_text(struct seq_file *seq, void *offset)
 	struct net_device *net_dev = (struct net_device *)seq->private;
 	struct bat_priv *bat_priv = netdev_priv(net_dev);
 	struct hashtable_t *hash = bat_priv->orig_hash;
-	struct hlist_node *walk;
+	struct hlist_node *node, *node_tmp;
 	struct hlist_head *head;
-	struct element_t *bucket;
 	struct orig_node *orig_node;
 	struct neigh_node *neigh_node;
 	int batman_count = 0;
@@ -348,14 +416,11 @@ int orig_seq_print_text(struct seq_file *seq, void *offset)
 		   "Originator", "last-seen", "#", TQ_MAX_VALUE, "Nexthop",
 		   "outgoingIF", "Potential nexthops");
 
-	spin_lock_bh(&bat_priv->orig_hash_lock);
-
 	for (i = 0; i < hash->size; i++) {
 		head = &hash->table[i];
 
-		hlist_for_each_entry(bucket, walk, head, hlist) {
-			orig_node = bucket->data;
-
+		rcu_read_lock();
+		hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) {
 			if (!orig_node->router)
 				continue;
 
@@ -374,8 +439,8 @@ int orig_seq_print_text(struct seq_file *seq, void *offset)
 				   neigh_node->addr,
 				   neigh_node->if_incoming->net_dev->name);
 
-			list_for_each_entry(neigh_node, &orig_node->neigh_list,
-					    list) {
+			hlist_for_each_entry_rcu(neigh_node, node_tmp,
+						 &orig_node->neigh_list, list) {
 				seq_printf(seq, " %pM (%3i)", neigh_node->addr,
 						neigh_node->tq_avg);
 			}
@@ -383,10 +448,9 @@ int orig_seq_print_text(struct seq_file *seq, void *offset)
 			seq_printf(seq, "\n");
 			batman_count++;
 		}
+		rcu_read_unlock();
 	}
 
-	spin_unlock_bh(&bat_priv->orig_hash_lock);
-
 	if ((batman_count == 0))
 		seq_printf(seq, "No batman nodes in range ...\n");
 
@@ -423,36 +487,36 @@ static int orig_node_add_if(struct orig_node *orig_node, int max_if_num)
 	return 0;
 }
 
-int orig_hash_add_if(struct batman_if *batman_if, int max_if_num)
+int orig_hash_add_if(struct hard_iface *hard_iface, int max_if_num)
 {
-	struct bat_priv *bat_priv = netdev_priv(batman_if->soft_iface);
+	struct bat_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
 	struct hashtable_t *hash = bat_priv->orig_hash;
-	struct hlist_node *walk;
+	struct hlist_node *node;
 	struct hlist_head *head;
-	struct element_t *bucket;
 	struct orig_node *orig_node;
-	int i;
+	int i, ret;
 
 	/* resize all orig nodes because orig_node->bcast_own(_sum) depend on
 	 * if_num */
-	spin_lock_bh(&bat_priv->orig_hash_lock);
-
 	for (i = 0; i < hash->size; i++) {
 		head = &hash->table[i];
 
-		hlist_for_each_entry(bucket, walk, head, hlist) {
-			orig_node = bucket->data;
+		rcu_read_lock();
+		hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) {
+			spin_lock_bh(&orig_node->ogm_cnt_lock);
+			ret = orig_node_add_if(orig_node, max_if_num);
+			spin_unlock_bh(&orig_node->ogm_cnt_lock);
 
-			if (orig_node_add_if(orig_node, max_if_num) == -1)
+			if (ret == -1)
 				goto err;
 		}
+		rcu_read_unlock();
 	}
 
-	spin_unlock_bh(&bat_priv->orig_hash_lock);
 	return 0;
 
 err:
-	spin_unlock_bh(&bat_priv->orig_hash_lock);
+	rcu_read_unlock();
 	return -ENOMEM;
 }
 
@@ -508,57 +572,55 @@ free_own_sum:
 	return 0;
 }
 
-int orig_hash_del_if(struct batman_if *batman_if, int max_if_num)
+int orig_hash_del_if(struct hard_iface *hard_iface, int max_if_num)
 {
-	struct bat_priv *bat_priv = netdev_priv(batman_if->soft_iface);
+	struct bat_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
 	struct hashtable_t *hash = bat_priv->orig_hash;
-	struct hlist_node *walk;
+	struct hlist_node *node;
 	struct hlist_head *head;
-	struct element_t *bucket;
-	struct batman_if *batman_if_tmp;
+	struct hard_iface *hard_iface_tmp;
 	struct orig_node *orig_node;
 	int i, ret;
 
 	/* resize all orig nodes because orig_node->bcast_own(_sum) depend on
 	 * if_num */
-	spin_lock_bh(&bat_priv->orig_hash_lock);
-
 	for (i = 0; i < hash->size; i++) {
 		head = &hash->table[i];
 
-		hlist_for_each_entry(bucket, walk, head, hlist) {
-			orig_node = bucket->data;
-
+		rcu_read_lock();
+		hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) {
+			spin_lock_bh(&orig_node->ogm_cnt_lock);
 			ret = orig_node_del_if(orig_node, max_if_num,
-					batman_if->if_num);
+					hard_iface->if_num);
+			spin_unlock_bh(&orig_node->ogm_cnt_lock);
 
 			if (ret == -1)
 				goto err;
 		}
+		rcu_read_unlock();
 	}
 
 	/* renumber remaining batman interfaces _inside_ of orig_hash_lock */
 	rcu_read_lock();
-	list_for_each_entry_rcu(batman_if_tmp, &if_list, list) {
-		if (batman_if_tmp->if_status == IF_NOT_IN_USE)
+	list_for_each_entry_rcu(hard_iface_tmp, &hardif_list, list) {
+		if (hard_iface_tmp->if_status == IF_NOT_IN_USE)
 			continue;
 
-		if (batman_if == batman_if_tmp)
+		if (hard_iface == hard_iface_tmp)
 			continue;
 
-		if (batman_if->soft_iface != batman_if_tmp->soft_iface)
+		if (hard_iface->soft_iface != hard_iface_tmp->soft_iface)
 			continue;
 
-		if (batman_if_tmp->if_num > batman_if->if_num)
-			batman_if_tmp->if_num--;
+		if (hard_iface_tmp->if_num > hard_iface->if_num)
+			hard_iface_tmp->if_num--;
 	}
 	rcu_read_unlock();
 
-	batman_if->if_num = -1;
-	spin_unlock_bh(&bat_priv->orig_hash_lock);
+	hard_iface->if_num = -1;
 	return 0;
 
 err:
-	spin_unlock_bh(&bat_priv->orig_hash_lock);
+	rcu_read_unlock();
 	return -ENOMEM;
 }
diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h
index d474ceb..5cc0110 100644
--- a/net/batman-adv/originator.h
+++ b/net/batman-adv/originator.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors:
+ * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
@@ -22,21 +22,28 @@
 #ifndef _NET_BATMAN_ADV_ORIGINATOR_H_
 #define _NET_BATMAN_ADV_ORIGINATOR_H_
 
+#include "hash.h"
+
 int originator_init(struct bat_priv *bat_priv);
 void originator_free(struct bat_priv *bat_priv);
 void purge_orig_ref(struct bat_priv *bat_priv);
+void orig_node_free_ref(struct orig_node *orig_node);
 struct orig_node *get_orig_node(struct bat_priv *bat_priv, uint8_t *addr);
-struct neigh_node *
-create_neighbor(struct orig_node *orig_node, struct orig_node *orig_neigh_node,
-		uint8_t *neigh, struct batman_if *if_incoming);
+struct neigh_node *create_neighbor(struct orig_node *orig_node,
+				   struct orig_node *orig_neigh_node,
+				   uint8_t *neigh,
+				   struct hard_iface *if_incoming);
+void neigh_node_free_ref(struct neigh_node *neigh_node);
 int orig_seq_print_text(struct seq_file *seq, void *offset);
-int orig_hash_add_if(struct batman_if *batman_if, int max_if_num);
-int orig_hash_del_if(struct batman_if *batman_if, int max_if_num);
+int orig_hash_add_if(struct hard_iface *hard_iface, int max_if_num);
+int orig_hash_del_if(struct hard_iface *hard_iface, int max_if_num);
 
 
 /* returns 1 if they are the same originator */
-static inline int compare_orig(void *data1, void *data2)
+static inline int compare_orig(struct hlist_node *node, void *data2)
 {
+	void *data1 = container_of(node, struct orig_node, hash_entry);
+
 	return (memcmp(data1, data2, ETH_ALEN) == 0 ? 1 : 0);
 }
 
@@ -61,4 +68,35 @@ static inline int choose_orig(void *data, int32_t size)
 	return hash % size;
 }
 
+static inline struct orig_node *orig_hash_find(struct bat_priv *bat_priv,
+					       void *data)
+{
+	struct hashtable_t *hash = bat_priv->orig_hash;
+	struct hlist_head *head;
+	struct hlist_node *node;
+	struct orig_node *orig_node, *orig_node_tmp = NULL;
+	int index;
+
+	if (!hash)
+		return NULL;
+
+	index = choose_orig(data, hash->size);
+	head = &hash->table[index];
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) {
+		if (!compare_eth(orig_node, data))
+			continue;
+
+		if (!atomic_inc_not_zero(&orig_node->refcount))
+			continue;
+
+		orig_node_tmp = orig_node;
+		break;
+	}
+	rcu_read_unlock();
+
+	return orig_node_tmp;
+}
+
 #endif /* _NET_BATMAN_ADV_ORIGINATOR_H_ */
diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h
index 2284e81..e757187 100644
--- a/net/batman-adv/packet.h
+++ b/net/batman-adv/packet.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors:
+ * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
@@ -50,6 +50,7 @@
 
 /* fragmentation defines */
 #define UNI_FRAG_HEAD 0x01
+#define UNI_FRAG_LARGETAIL 0x02
 
 struct batman_packet {
 	uint8_t  packet_type;
diff --git a/net/batman-adv/ring_buffer.c b/net/batman-adv/ring_buffer.c
index defd37c..5bb6a61 100644
--- a/net/batman-adv/ring_buffer.c
+++ b/net/batman-adv/ring_buffer.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors:
+ * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
diff --git a/net/batman-adv/ring_buffer.h b/net/batman-adv/ring_buffer.h
index 6b0cb9a..0395b27 100644
--- a/net/batman-adv/ring_buffer.h
+++ b/net/batman-adv/ring_buffer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors:
+ * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index 8828edd..c172f5d 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors:
+ * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
@@ -28,7 +28,6 @@
 #include "icmp_socket.h"
 #include "translation-table.h"
 #include "originator.h"
-#include "types.h"
 #include "ring_buffer.h"
 #include "vis.h"
 #include "aggregation.h"
@@ -36,35 +35,33 @@
 #include "gateway_client.h"
 #include "unicast.h"
 
-void slide_own_bcast_window(struct batman_if *batman_if)
+void slide_own_bcast_window(struct hard_iface *hard_iface)
 {
-	struct bat_priv *bat_priv = netdev_priv(batman_if->soft_iface);
+	struct bat_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
 	struct hashtable_t *hash = bat_priv->orig_hash;
-	struct hlist_node *walk;
+	struct hlist_node *node;
 	struct hlist_head *head;
-	struct element_t *bucket;
 	struct orig_node *orig_node;
 	unsigned long *word;
 	int i;
 	size_t word_index;
 
-	spin_lock_bh(&bat_priv->orig_hash_lock);
-
 	for (i = 0; i < hash->size; i++) {
 		head = &hash->table[i];
 
-		hlist_for_each_entry(bucket, walk, head, hlist) {
-			orig_node = bucket->data;
-			word_index = batman_if->if_num * NUM_WORDS;
+		rcu_read_lock();
+		hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) {
+			spin_lock_bh(&orig_node->ogm_cnt_lock);
+			word_index = hard_iface->if_num * NUM_WORDS;
 			word = &(orig_node->bcast_own[word_index]);
 
 			bit_get_packet(bat_priv, word, 1, 0);
-			orig_node->bcast_own_sum[batman_if->if_num] =
+			orig_node->bcast_own_sum[hard_iface->if_num] =
 				bit_packet_count(word);
+			spin_unlock_bh(&orig_node->ogm_cnt_lock);
 		}
+		rcu_read_unlock();
 	}
-
-	spin_unlock_bh(&bat_priv->orig_hash_lock);
 }
 
 static void update_HNA(struct bat_priv *bat_priv, struct orig_node *orig_node,
@@ -90,6 +87,8 @@ static void update_route(struct bat_priv *bat_priv,
 			 struct neigh_node *neigh_node,
 			 unsigned char *hna_buff, int hna_buff_len)
 {
+	struct neigh_node *neigh_node_tmp;
+
 	/* route deleted */
 	if ((orig_node->router) && (!neigh_node)) {
 
@@ -116,7 +115,12 @@ static void update_route(struct bat_priv *bat_priv,
 			orig_node->router->addr);
 	}
 
+	if (neigh_node && !atomic_inc_not_zero(&neigh_node->refcount))
+		neigh_node = NULL;
+	neigh_node_tmp = orig_node->router;
 	orig_node->router = neigh_node;
+	if (neigh_node_tmp)
+		neigh_node_free_ref(neigh_node_tmp);
 }
 
 
@@ -139,73 +143,93 @@ void update_routes(struct bat_priv *bat_priv, struct orig_node *orig_node,
 static int is_bidirectional_neigh(struct orig_node *orig_node,
 				struct orig_node *orig_neigh_node,
 				struct batman_packet *batman_packet,
-				struct batman_if *if_incoming)
+				struct hard_iface *if_incoming)
 {
 	struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
-	struct neigh_node *neigh_node = NULL, *tmp_neigh_node = NULL;
+	struct neigh_node *neigh_node = NULL, *tmp_neigh_node;
+	struct hlist_node *node;
 	unsigned char total_count;
+	uint8_t orig_eq_count, neigh_rq_count, tq_own;
+	int tq_asym_penalty, ret = 0;
 
 	if (orig_node == orig_neigh_node) {
-		list_for_each_entry(tmp_neigh_node,
-				    &orig_node->neigh_list,
-				    list) {
-
-			if (compare_orig(tmp_neigh_node->addr,
-					 orig_neigh_node->orig) &&
-			    (tmp_neigh_node->if_incoming == if_incoming))
-				neigh_node = tmp_neigh_node;
+		rcu_read_lock();
+		hlist_for_each_entry_rcu(tmp_neigh_node, node,
+					 &orig_node->neigh_list, list) {
+
+			if (!compare_eth(tmp_neigh_node->addr,
+					 orig_neigh_node->orig))
+				continue;
+
+			if (tmp_neigh_node->if_incoming != if_incoming)
+				continue;
+
+			if (!atomic_inc_not_zero(&tmp_neigh_node->refcount))
+				continue;
+
+			neigh_node = tmp_neigh_node;
 		}
+		rcu_read_unlock();
 
 		if (!neigh_node)
 			neigh_node = create_neighbor(orig_node,
 						     orig_neigh_node,
 						     orig_neigh_node->orig,
 						     if_incoming);
-		/* create_neighbor failed, return 0 */
 		if (!neigh_node)
-			return 0;
+			goto out;
 
 		neigh_node->last_valid = jiffies;
 	} else {
 		/* find packet count of corresponding one hop neighbor */
-		list_for_each_entry(tmp_neigh_node,
-				    &orig_neigh_node->neigh_list, list) {
+		rcu_read_lock();
+		hlist_for_each_entry_rcu(tmp_neigh_node, node,
+					 &orig_neigh_node->neigh_list, list) {
 
-			if (compare_orig(tmp_neigh_node->addr,
-					 orig_neigh_node->orig) &&
-			    (tmp_neigh_node->if_incoming == if_incoming))
-				neigh_node = tmp_neigh_node;
+			if (!compare_eth(tmp_neigh_node->addr,
+					 orig_neigh_node->orig))
+				continue;
+
+			if (tmp_neigh_node->if_incoming != if_incoming)
+				continue;
+
+			if (!atomic_inc_not_zero(&tmp_neigh_node->refcount))
+				continue;
+
+			neigh_node = tmp_neigh_node;
 		}
+		rcu_read_unlock();
 
 		if (!neigh_node)
 			neigh_node = create_neighbor(orig_neigh_node,
 						     orig_neigh_node,
 						     orig_neigh_node->orig,
 						     if_incoming);
-		/* create_neighbor failed, return 0 */
 		if (!neigh_node)
-			return 0;
+			goto out;
 	}
 
 	orig_node->last_valid = jiffies;
 
+	spin_lock_bh(&orig_node->ogm_cnt_lock);
+	orig_eq_count = orig_neigh_node->bcast_own_sum[if_incoming->if_num];
+	neigh_rq_count = neigh_node->real_packet_count;
+	spin_unlock_bh(&orig_node->ogm_cnt_lock);
+
 	/* pay attention to not get a value bigger than 100 % */
-	total_count = (orig_neigh_node->bcast_own_sum[if_incoming->if_num] >
-		       neigh_node->real_packet_count ?
-		       neigh_node->real_packet_count :
-		       orig_neigh_node->bcast_own_sum[if_incoming->if_num]);
+	total_count = (orig_eq_count > neigh_rq_count ?
+		       neigh_rq_count : orig_eq_count);
 
 	/* if we have too few packets (too less data) we set tq_own to zero */
 	/* if we receive too few packets it is not considered bidirectional */
 	if ((total_count < TQ_LOCAL_BIDRECT_SEND_MINIMUM) ||
-	    (neigh_node->real_packet_count < TQ_LOCAL_BIDRECT_RECV_MINIMUM))
-		orig_neigh_node->tq_own = 0;
+	    (neigh_rq_count < TQ_LOCAL_BIDRECT_RECV_MINIMUM))
+		tq_own = 0;
 	else
 		/* neigh_node->real_packet_count is never zero as we
 		 * only purge old information when getting new
 		 * information */
-		orig_neigh_node->tq_own = (TQ_MAX_VALUE * total_count) /
-			neigh_node->real_packet_count;
+		tq_own = (TQ_MAX_VALUE * total_count) /	neigh_rq_count;
 
 	/*
 	 * 1 - ((1-x) ** 3), normalized to TQ_MAX_VALUE this does
@@ -213,20 +237,16 @@ static int is_bidirectional_neigh(struct orig_node *orig_node,
 	 * punishes asymmetric links more.  This will give a value
 	 * between 0 and TQ_MAX_VALUE
 	 */
-	orig_neigh_node->tq_asym_penalty =
-		TQ_MAX_VALUE -
-		(TQ_MAX_VALUE *
-		 (TQ_LOCAL_WINDOW_SIZE - neigh_node->real_packet_count) *
-		 (TQ_LOCAL_WINDOW_SIZE - neigh_node->real_packet_count) *
-		 (TQ_LOCAL_WINDOW_SIZE - neigh_node->real_packet_count)) /
-		(TQ_LOCAL_WINDOW_SIZE *
-		 TQ_LOCAL_WINDOW_SIZE *
-		 TQ_LOCAL_WINDOW_SIZE);
-
-	batman_packet->tq = ((batman_packet->tq *
-			      orig_neigh_node->tq_own *
-			      orig_neigh_node->tq_asym_penalty) /
-			     (TQ_MAX_VALUE * TQ_MAX_VALUE));
+	tq_asym_penalty = TQ_MAX_VALUE - (TQ_MAX_VALUE *
+				(TQ_LOCAL_WINDOW_SIZE - neigh_rq_count) *
+				(TQ_LOCAL_WINDOW_SIZE - neigh_rq_count) *
+				(TQ_LOCAL_WINDOW_SIZE - neigh_rq_count)) /
+					(TQ_LOCAL_WINDOW_SIZE *
+					 TQ_LOCAL_WINDOW_SIZE *
+					 TQ_LOCAL_WINDOW_SIZE);
+
+	batman_packet->tq = ((batman_packet->tq * tq_own * tq_asym_penalty) /
+						(TQ_MAX_VALUE * TQ_MAX_VALUE));
 
 	bat_dbg(DBG_BATMAN, bat_priv,
 		"bidirectional: "
@@ -234,34 +254,141 @@ static int is_bidirectional_neigh(struct orig_node *orig_node,
 		"real recv = %2i, local tq: %3i, asym_penalty: %3i, "
 		"total tq: %3i\n",
 		orig_node->orig, orig_neigh_node->orig, total_count,
-		neigh_node->real_packet_count, orig_neigh_node->tq_own,
-		orig_neigh_node->tq_asym_penalty, batman_packet->tq);
+		neigh_rq_count, tq_own,	tq_asym_penalty, batman_packet->tq);
 
 	/* if link has the minimum required transmission quality
 	 * consider it bidirectional */
 	if (batman_packet->tq >= TQ_TOTAL_BIDRECT_LIMIT)
-		return 1;
+		ret = 1;
 
-	return 0;
+out:
+	if (neigh_node)
+		neigh_node_free_ref(neigh_node);
+	return ret;
+}
+
+/* caller must hold the neigh_list_lock */
+void bonding_candidate_del(struct orig_node *orig_node,
+			   struct neigh_node *neigh_node)
+{
+	/* this neighbor is not part of our candidate list */
+	if (list_empty(&neigh_node->bonding_list))
+		goto out;
+
+	list_del_rcu(&neigh_node->bonding_list);
+	INIT_LIST_HEAD(&neigh_node->bonding_list);
+	neigh_node_free_ref(neigh_node);
+	atomic_dec(&orig_node->bond_candidates);
+
+out:
+	return;
+}
+
+static void bonding_candidate_add(struct orig_node *orig_node,
+				  struct neigh_node *neigh_node)
+{
+	struct hlist_node *node;
+	struct neigh_node *tmp_neigh_node;
+	uint8_t best_tq, interference_candidate = 0;
+
+	spin_lock_bh(&orig_node->neigh_list_lock);
+
+	/* only consider if it has the same primary address ...  */
+	if (!compare_eth(orig_node->orig,
+			 neigh_node->orig_node->primary_addr))
+		goto candidate_del;
+
+	if (!orig_node->router)
+		goto candidate_del;
+
+	best_tq = orig_node->router->tq_avg;
+
+	/* ... and is good enough to be considered */
+	if (neigh_node->tq_avg < best_tq - BONDING_TQ_THRESHOLD)
+		goto candidate_del;
+
+	/**
+	 * check if we have another candidate with the same mac address or
+	 * interface. If we do, we won't select this candidate because of
+	 * possible interference.
+	 */
+	hlist_for_each_entry_rcu(tmp_neigh_node, node,
+				 &orig_node->neigh_list, list) {
+
+		if (tmp_neigh_node == neigh_node)
+			continue;
+
+		/* we only care if the other candidate is even
+		* considered as candidate. */
+		if (list_empty(&tmp_neigh_node->bonding_list))
+			continue;
+
+		if ((neigh_node->if_incoming == tmp_neigh_node->if_incoming) ||
+		    (compare_eth(neigh_node->addr, tmp_neigh_node->addr))) {
+			interference_candidate = 1;
+			break;
+		}
+	}
+
+	/* don't care further if it is an interference candidate */
+	if (interference_candidate)
+		goto candidate_del;
+
+	/* this neighbor already is part of our candidate list */
+	if (!list_empty(&neigh_node->bonding_list))
+		goto out;
+
+	if (!atomic_inc_not_zero(&neigh_node->refcount))
+		goto out;
+
+	list_add_rcu(&neigh_node->bonding_list, &orig_node->bond_list);
+	atomic_inc(&orig_node->bond_candidates);
+	goto out;
+
+candidate_del:
+	bonding_candidate_del(orig_node, neigh_node);
+
+out:
+	spin_unlock_bh(&orig_node->neigh_list_lock);
+	return;
+}
+
+/* copy primary address for bonding */
+static void bonding_save_primary(struct orig_node *orig_node,
+				 struct orig_node *orig_neigh_node,
+				 struct batman_packet *batman_packet)
+{
+	if (!(batman_packet->flags & PRIMARIES_FIRST_HOP))
+		return;
+
+	memcpy(orig_neigh_node->primary_addr, orig_node->orig, ETH_ALEN);
 }
 
 static void update_orig(struct bat_priv *bat_priv,
 			struct orig_node *orig_node,
 			struct ethhdr *ethhdr,
 			struct batman_packet *batman_packet,
-			struct batman_if *if_incoming,
+			struct hard_iface *if_incoming,
 			unsigned char *hna_buff, int hna_buff_len,
 			char is_duplicate)
 {
 	struct neigh_node *neigh_node = NULL, *tmp_neigh_node = NULL;
+	struct orig_node *orig_node_tmp;
+	struct hlist_node *node;
 	int tmp_hna_buff_len;
+	uint8_t bcast_own_sum_orig, bcast_own_sum_neigh;
 
 	bat_dbg(DBG_BATMAN, bat_priv, "update_originator(): "
 		"Searching and updating originator entry of received packet\n");
 
-	list_for_each_entry(tmp_neigh_node, &orig_node->neigh_list, list) {
-		if (compare_orig(tmp_neigh_node->addr, ethhdr->h_source) &&
-		    (tmp_neigh_node->if_incoming == if_incoming)) {
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(tmp_neigh_node, node,
+				 &orig_node->neigh_list, list) {
+		if (compare_eth(tmp_neigh_node->addr, ethhdr->h_source) &&
+		    (tmp_neigh_node->if_incoming == if_incoming) &&
+		     atomic_inc_not_zero(&tmp_neigh_node->refcount)) {
+			if (neigh_node)
+				neigh_node_free_ref(neigh_node);
 			neigh_node = tmp_neigh_node;
 			continue;
 		}
@@ -280,16 +407,20 @@ static void update_orig(struct bat_priv *bat_priv,
 
 		orig_tmp = get_orig_node(bat_priv, ethhdr->h_source);
 		if (!orig_tmp)
-			return;
+			goto unlock;
 
 		neigh_node = create_neighbor(orig_node, orig_tmp,
 					     ethhdr->h_source, if_incoming);
+
+		orig_node_free_ref(orig_tmp);
 		if (!neigh_node)
-			return;
+			goto unlock;
 	} else
 		bat_dbg(DBG_BATMAN, bat_priv,
 			"Updating existing last-hop neighbor of originator\n");
 
+	rcu_read_unlock();
+
 	orig_node->flags = batman_packet->flags;
 	neigh_node->last_valid = jiffies;
 
@@ -303,6 +434,8 @@ static void update_orig(struct bat_priv *bat_priv,
 		neigh_node->last_ttl = batman_packet->ttl;
 	}
 
+	bonding_candidate_add(orig_node, neigh_node);
+
 	tmp_hna_buff_len = (hna_buff_len > batman_packet->num_hna * ETH_ALEN ?
 			    batman_packet->num_hna * ETH_ALEN : hna_buff_len);
 
@@ -319,10 +452,22 @@ static void update_orig(struct bat_priv *bat_priv,
 	/* if the TQ is the same and the link not more symetric we
 	 * won't consider it either */
 	if ((orig_node->router) &&
-	     ((neigh_node->tq_avg == orig_node->router->tq_avg) &&
-	     (orig_node->router->orig_node->bcast_own_sum[if_incoming->if_num]
-	      >= neigh_node->orig_node->bcast_own_sum[if_incoming->if_num])))
-		goto update_hna;
+	     (neigh_node->tq_avg == orig_node->router->tq_avg)) {
+		orig_node_tmp = orig_node->router->orig_node;
+		spin_lock_bh(&orig_node_tmp->ogm_cnt_lock);
+		bcast_own_sum_orig =
+			orig_node_tmp->bcast_own_sum[if_incoming->if_num];
+		spin_unlock_bh(&orig_node_tmp->ogm_cnt_lock);
+
+		orig_node_tmp = neigh_node->orig_node;
+		spin_lock_bh(&orig_node_tmp->ogm_cnt_lock);
+		bcast_own_sum_neigh =
+			orig_node_tmp->bcast_own_sum[if_incoming->if_num];
+		spin_unlock_bh(&orig_node_tmp->ogm_cnt_lock);
+
+		if (bcast_own_sum_orig >= bcast_own_sum_neigh)
+			goto update_hna;
+	}
 
 	update_routes(bat_priv, orig_node, neigh_node,
 		      hna_buff, tmp_hna_buff_len);
@@ -343,6 +488,14 @@ update_gw:
 	    (atomic_read(&bat_priv->gw_mode) == GW_MODE_CLIENT) &&
 	    (atomic_read(&bat_priv->gw_sel_class) > 2))
 		gw_check_election(bat_priv, orig_node);
+
+	goto out;
+
+unlock:
+	rcu_read_unlock();
+out:
+	if (neigh_node)
+		neigh_node_free_ref(neigh_node);
 }
 
 /* checks whether the host restarted and is in the protection time.
@@ -380,34 +533,38 @@ static int window_protected(struct bat_priv *bat_priv,
  */
 static char count_real_packets(struct ethhdr *ethhdr,
 			       struct batman_packet *batman_packet,
-			       struct batman_if *if_incoming)
+			       struct hard_iface *if_incoming)
 {
 	struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
 	struct orig_node *orig_node;
 	struct neigh_node *tmp_neigh_node;
+	struct hlist_node *node;
 	char is_duplicate = 0;
 	int32_t seq_diff;
 	int need_update = 0;
-	int set_mark;
+	int set_mark, ret = -1;
 
 	orig_node = get_orig_node(bat_priv, batman_packet->orig);
 	if (!orig_node)
 		return 0;
 
+	spin_lock_bh(&orig_node->ogm_cnt_lock);
 	seq_diff = batman_packet->seqno - orig_node->last_real_seqno;
 
 	/* signalize caller that the packet is to be dropped. */
 	if (window_protected(bat_priv, seq_diff,
 			     &orig_node->batman_seqno_reset))
-		return -1;
+		goto out;
 
-	list_for_each_entry(tmp_neigh_node, &orig_node->neigh_list, list) {
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(tmp_neigh_node, node,
+				 &orig_node->neigh_list, list) {
 
 		is_duplicate |= get_bit_status(tmp_neigh_node->real_bits,
 					       orig_node->last_real_seqno,
 					       batman_packet->seqno);
 
-		if (compare_orig(tmp_neigh_node->addr, ethhdr->h_source) &&
+		if (compare_eth(tmp_neigh_node->addr, ethhdr->h_source) &&
 		    (tmp_neigh_node->if_incoming == if_incoming))
 			set_mark = 1;
 		else
@@ -421,6 +578,7 @@ static char count_real_packets(struct ethhdr *ethhdr,
 		tmp_neigh_node->real_packet_count =
 			bit_packet_count(tmp_neigh_node->real_bits);
 	}
+	rcu_read_unlock();
 
 	if (need_update) {
 		bat_dbg(DBG_BATMAN, bat_priv,
@@ -429,123 +587,21 @@ static char count_real_packets(struct ethhdr *ethhdr,
 		orig_node->last_real_seqno = batman_packet->seqno;
 	}
 
-	return is_duplicate;
-}
-
-/* copy primary address for bonding */
-static void mark_bonding_address(struct bat_priv *bat_priv,
-				 struct orig_node *orig_node,
-				 struct orig_node *orig_neigh_node,
-				 struct batman_packet *batman_packet)
+	ret = is_duplicate;
 
-{
-	if (batman_packet->flags & PRIMARIES_FIRST_HOP)
-		memcpy(orig_neigh_node->primary_addr,
-		       orig_node->orig, ETH_ALEN);
-
-	return;
-}
-
-/* mark possible bond.candidates in the neighbor list */
-void update_bonding_candidates(struct bat_priv *bat_priv,
-			       struct orig_node *orig_node)
-{
-	int candidates;
-	int interference_candidate;
-	int best_tq;
-	struct neigh_node *tmp_neigh_node, *tmp_neigh_node2;
-	struct neigh_node *first_candidate, *last_candidate;
-
-	/* update the candidates for this originator */
-	if (!orig_node->router) {
-		orig_node->bond.candidates = 0;
-		return;
-	}
-
-	best_tq = orig_node->router->tq_avg;
-
-	/* update bond.candidates */
-
-	candidates = 0;
-
-	/* mark other nodes which also received "PRIMARIES FIRST HOP" packets
-	 * as "bonding partner" */
-
-	/* first, zero the list */
-	list_for_each_entry(tmp_neigh_node, &orig_node->neigh_list, list) {
-		tmp_neigh_node->next_bond_candidate = NULL;
-	}
-
-	first_candidate = NULL;
-	last_candidate = NULL;
-	list_for_each_entry(tmp_neigh_node, &orig_node->neigh_list, list) {
-
-		/* only consider if it has the same primary address ...  */
-		if (memcmp(orig_node->orig,
-				tmp_neigh_node->orig_node->primary_addr,
-				ETH_ALEN) != 0)
-			continue;
-
-		/* ... and is good enough to be considered */
-		if (tmp_neigh_node->tq_avg < best_tq - BONDING_TQ_THRESHOLD)
-			continue;
-
-		/* check if we have another candidate with the same
-		 * mac address or interface. If we do, we won't
-		 * select this candidate because of possible interference. */
-
-		interference_candidate = 0;
-		list_for_each_entry(tmp_neigh_node2,
-				&orig_node->neigh_list, list) {
-
-			if (tmp_neigh_node2 == tmp_neigh_node)
-				continue;
-
-			/* we only care if the other candidate is even
-			 * considered as candidate. */
-			if (!tmp_neigh_node2->next_bond_candidate)
-				continue;
-
-
-			if ((tmp_neigh_node->if_incoming ==
-				tmp_neigh_node2->if_incoming)
-				|| (memcmp(tmp_neigh_node->addr,
-				tmp_neigh_node2->addr, ETH_ALEN) == 0)) {
-
-				interference_candidate = 1;
-				break;
-			}
-		}
-		/* don't care further if it is an interference candidate */
-		if (interference_candidate)
-			continue;
-
-		if (!first_candidate) {
-			first_candidate = tmp_neigh_node;
-			tmp_neigh_node->next_bond_candidate = first_candidate;
-		} else
-			tmp_neigh_node->next_bond_candidate = last_candidate;
-
-		last_candidate = tmp_neigh_node;
-
-		candidates++;
-	}
-
-	if (candidates > 0) {
-		first_candidate->next_bond_candidate = last_candidate;
-		orig_node->bond.selected = first_candidate;
-	}
-
-	orig_node->bond.candidates = candidates;
+out:
+	spin_unlock_bh(&orig_node->ogm_cnt_lock);
+	orig_node_free_ref(orig_node);
+	return ret;
 }
 
 void receive_bat_packet(struct ethhdr *ethhdr,
-				struct batman_packet *batman_packet,
-				unsigned char *hna_buff, int hna_buff_len,
-				struct batman_if *if_incoming)
+			struct batman_packet *batman_packet,
+			unsigned char *hna_buff, int hna_buff_len,
+			struct hard_iface *if_incoming)
 {
 	struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
-	struct batman_if *batman_if;
+	struct hard_iface *hard_iface;
 	struct orig_node *orig_neigh_node, *orig_node;
 	char has_directlink_flag;
 	char is_my_addr = 0, is_my_orig = 0, is_my_oldorig = 0;
@@ -573,8 +629,8 @@ void receive_bat_packet(struct ethhdr *ethhdr,
 
 	has_directlink_flag = (batman_packet->flags & DIRECTLINK ? 1 : 0);
 
-	is_single_hop_neigh = (compare_orig(ethhdr->h_source,
-					    batman_packet->orig) ? 1 : 0);
+	is_single_hop_neigh = (compare_eth(ethhdr->h_source,
+					   batman_packet->orig) ? 1 : 0);
 
 	bat_dbg(DBG_BATMAN, bat_priv,
 		"Received BATMAN packet via NB: %pM, IF: %s [%pM] "
@@ -587,26 +643,26 @@ void receive_bat_packet(struct ethhdr *ethhdr,
 		has_directlink_flag);
 
 	rcu_read_lock();
-	list_for_each_entry_rcu(batman_if, &if_list, list) {
-		if (batman_if->if_status != IF_ACTIVE)
+	list_for_each_entry_rcu(hard_iface, &hardif_list, list) {
+		if (hard_iface->if_status != IF_ACTIVE)
 			continue;
 
-		if (batman_if->soft_iface != if_incoming->soft_iface)
+		if (hard_iface->soft_iface != if_incoming->soft_iface)
 			continue;
 
-		if (compare_orig(ethhdr->h_source,
-				 batman_if->net_dev->dev_addr))
+		if (compare_eth(ethhdr->h_source,
+				hard_iface->net_dev->dev_addr))
 			is_my_addr = 1;
 
-		if (compare_orig(batman_packet->orig,
-				 batman_if->net_dev->dev_addr))
+		if (compare_eth(batman_packet->orig,
+				hard_iface->net_dev->dev_addr))
 			is_my_orig = 1;
 
-		if (compare_orig(batman_packet->prev_sender,
-				 batman_if->net_dev->dev_addr))
+		if (compare_eth(batman_packet->prev_sender,
+				hard_iface->net_dev->dev_addr))
 			is_my_oldorig = 1;
 
-		if (compare_orig(ethhdr->h_source, broadcast_addr))
+		if (compare_eth(ethhdr->h_source, broadcast_addr))
 			is_broadcast = 1;
 	}
 	rcu_read_unlock();
@@ -638,7 +694,6 @@ void receive_bat_packet(struct ethhdr *ethhdr,
 		int offset;
 
 		orig_neigh_node = get_orig_node(bat_priv, ethhdr->h_source);
-
 		if (!orig_neigh_node)
 			return;
 
@@ -647,18 +702,22 @@ void receive_bat_packet(struct ethhdr *ethhdr,
 		/* if received seqno equals last send seqno save new
 		 * seqno for bidirectional check */
 		if (has_directlink_flag &&
-		    compare_orig(if_incoming->net_dev->dev_addr,
-				 batman_packet->orig) &&
+		    compare_eth(if_incoming->net_dev->dev_addr,
+				batman_packet->orig) &&
 		    (batman_packet->seqno - if_incoming_seqno + 2 == 0)) {
 			offset = if_incoming->if_num * NUM_WORDS;
+
+			spin_lock_bh(&orig_neigh_node->ogm_cnt_lock);
 			word = &(orig_neigh_node->bcast_own[offset]);
 			bit_mark(word, 0);
 			orig_neigh_node->bcast_own_sum[if_incoming->if_num] =
 				bit_packet_count(word);
+			spin_unlock_bh(&orig_neigh_node->ogm_cnt_lock);
 		}
 
 		bat_dbg(DBG_BATMAN, bat_priv, "Drop packet: "
 			"originator packet from myself (via neighbor)\n");
+		orig_node_free_ref(orig_neigh_node);
 		return;
 	}
 
@@ -679,27 +738,27 @@ void receive_bat_packet(struct ethhdr *ethhdr,
 		bat_dbg(DBG_BATMAN, bat_priv,
 			"Drop packet: packet within seqno protection time "
 			"(sender: %pM)\n", ethhdr->h_source);
-		return;
+		goto out;
 	}
 
 	if (batman_packet->tq == 0) {
 		bat_dbg(DBG_BATMAN, bat_priv,
 			"Drop packet: originator packet with tq equal 0\n");
-		return;
+		goto out;
 	}
 
 	/* avoid temporary routing loops */
 	if ((orig_node->router) &&
 	    (orig_node->router->orig_node->router) &&
-	    (compare_orig(orig_node->router->addr,
-			  batman_packet->prev_sender)) &&
-	    !(compare_orig(batman_packet->orig, batman_packet->prev_sender)) &&
-	    (compare_orig(orig_node->router->addr,
-			  orig_node->router->orig_node->router->addr))) {
+	    (compare_eth(orig_node->router->addr,
+			 batman_packet->prev_sender)) &&
+	    !(compare_eth(batman_packet->orig, batman_packet->prev_sender)) &&
+	    (compare_eth(orig_node->router->addr,
+			 orig_node->router->orig_node->router->addr))) {
 		bat_dbg(DBG_BATMAN, bat_priv,
 			"Drop packet: ignoring all rebroadcast packets that "
 			"may make me loop (sender: %pM)\n", ethhdr->h_source);
-		return;
+		goto out;
 	}
 
 	/* if sender is a direct neighbor the sender mac equals
@@ -708,19 +767,21 @@ void receive_bat_packet(struct ethhdr *ethhdr,
 			   orig_node :
 			   get_orig_node(bat_priv, ethhdr->h_source));
 	if (!orig_neigh_node)
-		return;
+		goto out;
 
 	/* drop packet if sender is not a direct neighbor and if we
 	 * don't route towards it */
 	if (!is_single_hop_neigh && (!orig_neigh_node->router)) {
 		bat_dbg(DBG_BATMAN, bat_priv,
 			"Drop packet: OGM via unknown neighbor!\n");
-		return;
+		goto out_neigh;
 	}
 
 	is_bidirectional = is_bidirectional_neigh(orig_node, orig_neigh_node,
 						batman_packet, if_incoming);
 
+	bonding_save_primary(orig_node, orig_neigh_node, batman_packet);
+
 	/* update ranking if it is not a duplicate or has the same
 	 * seqno and similar ttl as the non-duplicate */
 	if (is_bidirectional &&
@@ -730,10 +791,6 @@ void receive_bat_packet(struct ethhdr *ethhdr,
 		update_orig(bat_priv, orig_node, ethhdr, batman_packet,
 			    if_incoming, hna_buff, hna_buff_len, is_duplicate);
 
-	mark_bonding_address(bat_priv, orig_node,
-			     orig_neigh_node, batman_packet);
-	update_bonding_candidates(bat_priv, orig_node);
-
 	/* is single hop (direct) neighbor */
 	if (is_single_hop_neigh) {
 
@@ -743,31 +800,36 @@ void receive_bat_packet(struct ethhdr *ethhdr,
 
 		bat_dbg(DBG_BATMAN, bat_priv, "Forwarding packet: "
 			"rebroadcast neighbor packet with direct link flag\n");
-		return;
+		goto out_neigh;
 	}
 
 	/* multihop originator */
 	if (!is_bidirectional) {
 		bat_dbg(DBG_BATMAN, bat_priv,
 			"Drop packet: not received via bidirectional link\n");
-		return;
+		goto out_neigh;
 	}
 
 	if (is_duplicate) {
 		bat_dbg(DBG_BATMAN, bat_priv,
 			"Drop packet: duplicate packet received\n");
-		return;
+		goto out_neigh;
 	}
 
 	bat_dbg(DBG_BATMAN, bat_priv,
 		"Forwarding packet: rebroadcast originator packet\n");
 	schedule_forward_packet(orig_node, ethhdr, batman_packet,
 				0, hna_buff_len, if_incoming);
+
+out_neigh:
+	if ((orig_neigh_node) && (!is_single_hop_neigh))
+		orig_node_free_ref(orig_neigh_node);
+out:
+	orig_node_free_ref(orig_node);
 }
 
-int recv_bat_packet(struct sk_buff *skb, struct batman_if *batman_if)
+int recv_bat_packet(struct sk_buff *skb, struct hard_iface *hard_iface)
 {
-	struct bat_priv *bat_priv = netdev_priv(batman_if->soft_iface);
 	struct ethhdr *ethhdr;
 
 	/* drop packet if it has not necessary minimum size */
@@ -794,12 +856,10 @@ int recv_bat_packet(struct sk_buff *skb, struct batman_if *batman_if)
 
 	ethhdr = (struct ethhdr *)skb_mac_header(skb);
 
-	spin_lock_bh(&bat_priv->orig_hash_lock);
 	receive_aggr_bat_packet(ethhdr,
 				skb->data,
 				skb_headlen(skb),
-				batman_if);
-	spin_unlock_bh(&bat_priv->orig_hash_lock);
+				hard_iface);
 
 	kfree_skb(skb);
 	return NET_RX_SUCCESS;
@@ -808,135 +868,144 @@ int recv_bat_packet(struct sk_buff *skb, struct batman_if *batman_if)
 static int recv_my_icmp_packet(struct bat_priv *bat_priv,
 			       struct sk_buff *skb, size_t icmp_len)
 {
-	struct orig_node *orig_node;
+	struct orig_node *orig_node = NULL;
+	struct neigh_node *neigh_node = NULL;
 	struct icmp_packet_rr *icmp_packet;
-	struct ethhdr *ethhdr;
-	struct batman_if *batman_if;
-	int ret;
-	uint8_t dstaddr[ETH_ALEN];
+	int ret = NET_RX_DROP;
 
 	icmp_packet = (struct icmp_packet_rr *)skb->data;
-	ethhdr = (struct ethhdr *)skb_mac_header(skb);
 
 	/* add data to device queue */
 	if (icmp_packet->msg_type != ECHO_REQUEST) {
 		bat_socket_receive_packet(icmp_packet, icmp_len);
-		return NET_RX_DROP;
+		goto out;
 	}
 
 	if (!bat_priv->primary_if)
-		return NET_RX_DROP;
+		goto out;
 
 	/* answer echo request (ping) */
 	/* get routing information */
-	spin_lock_bh(&bat_priv->orig_hash_lock);
-	orig_node = ((struct orig_node *)hash_find(bat_priv->orig_hash,
-						   compare_orig, choose_orig,
-						   icmp_packet->orig));
-	ret = NET_RX_DROP;
-
-	if ((orig_node) && (orig_node->router)) {
-
-		/* don't lock while sending the packets ... we therefore
-		 * copy the required data before sending */
-		batman_if = orig_node->router->if_incoming;
-		memcpy(dstaddr, orig_node->router->addr, ETH_ALEN);
-		spin_unlock_bh(&bat_priv->orig_hash_lock);
-
-		/* create a copy of the skb, if needed, to modify it. */
-		if (skb_cow(skb, sizeof(struct ethhdr)) < 0)
-			return NET_RX_DROP;
+	rcu_read_lock();
+	orig_node = orig_hash_find(bat_priv, icmp_packet->orig);
 
-		icmp_packet = (struct icmp_packet_rr *)skb->data;
-		ethhdr = (struct ethhdr *)skb_mac_header(skb);
+	if (!orig_node)
+		goto unlock;
 
-		memcpy(icmp_packet->dst, icmp_packet->orig, ETH_ALEN);
-		memcpy(icmp_packet->orig,
-		       bat_priv->primary_if->net_dev->dev_addr, ETH_ALEN);
-		icmp_packet->msg_type = ECHO_REPLY;
-		icmp_packet->ttl = TTL;
+	neigh_node = orig_node->router;
 
-		send_skb_packet(skb, batman_if, dstaddr);
-		ret = NET_RX_SUCCESS;
+	if (!neigh_node)
+		goto unlock;
 
-	} else
-		spin_unlock_bh(&bat_priv->orig_hash_lock);
+	if (!atomic_inc_not_zero(&neigh_node->refcount)) {
+		neigh_node = NULL;
+		goto unlock;
+	}
+
+	rcu_read_unlock();
+
+	/* create a copy of the skb, if needed, to modify it. */
+	if (skb_cow(skb, sizeof(struct ethhdr)) < 0)
+		goto out;
+
+	icmp_packet = (struct icmp_packet_rr *)skb->data;
+
+	memcpy(icmp_packet->dst, icmp_packet->orig, ETH_ALEN);
+	memcpy(icmp_packet->orig,
+		bat_priv->primary_if->net_dev->dev_addr, ETH_ALEN);
+	icmp_packet->msg_type = ECHO_REPLY;
+	icmp_packet->ttl = TTL;
 
+	send_skb_packet(skb, neigh_node->if_incoming, neigh_node->addr);
+	ret = NET_RX_SUCCESS;
+	goto out;
+
+unlock:
+	rcu_read_unlock();
+out:
+	if (neigh_node)
+		neigh_node_free_ref(neigh_node);
+	if (orig_node)
+		orig_node_free_ref(orig_node);
 	return ret;
 }
 
 static int recv_icmp_ttl_exceeded(struct bat_priv *bat_priv,
-				  struct sk_buff *skb, size_t icmp_len)
+				  struct sk_buff *skb)
 {
-	struct orig_node *orig_node;
+	struct orig_node *orig_node = NULL;
+	struct neigh_node *neigh_node = NULL;
 	struct icmp_packet *icmp_packet;
-	struct ethhdr *ethhdr;
-	struct batman_if *batman_if;
-	int ret;
-	uint8_t dstaddr[ETH_ALEN];
+	int ret = NET_RX_DROP;
 
 	icmp_packet = (struct icmp_packet *)skb->data;
-	ethhdr = (struct ethhdr *)skb_mac_header(skb);
 
 	/* send TTL exceeded if packet is an echo request (traceroute) */
 	if (icmp_packet->msg_type != ECHO_REQUEST) {
 		pr_debug("Warning - can't forward icmp packet from %pM to "
 			 "%pM: ttl exceeded\n", icmp_packet->orig,
 			 icmp_packet->dst);
-		return NET_RX_DROP;
+		goto out;
 	}
 
 	if (!bat_priv->primary_if)
-		return NET_RX_DROP;
+		goto out;
 
 	/* get routing information */
-	spin_lock_bh(&bat_priv->orig_hash_lock);
-	orig_node = ((struct orig_node *)
-		     hash_find(bat_priv->orig_hash, compare_orig, choose_orig,
-			       icmp_packet->orig));
-	ret = NET_RX_DROP;
-
-	if ((orig_node) && (orig_node->router)) {
-
-		/* don't lock while sending the packets ... we therefore
-		 * copy the required data before sending */
-		batman_if = orig_node->router->if_incoming;
-		memcpy(dstaddr, orig_node->router->addr, ETH_ALEN);
-		spin_unlock_bh(&bat_priv->orig_hash_lock);
-
-		/* create a copy of the skb, if needed, to modify it. */
-		if (skb_cow(skb, sizeof(struct ethhdr)) < 0)
-			return NET_RX_DROP;
+	rcu_read_lock();
+	orig_node = orig_hash_find(bat_priv, icmp_packet->orig);
 
-		icmp_packet = (struct icmp_packet *) skb->data;
-		ethhdr = (struct ethhdr *)skb_mac_header(skb);
+	if (!orig_node)
+		goto unlock;
 
-		memcpy(icmp_packet->dst, icmp_packet->orig, ETH_ALEN);
-		memcpy(icmp_packet->orig,
-		       bat_priv->primary_if->net_dev->dev_addr, ETH_ALEN);
-		icmp_packet->msg_type = TTL_EXCEEDED;
-		icmp_packet->ttl = TTL;
+	neigh_node = orig_node->router;
 
-		send_skb_packet(skb, batman_if, dstaddr);
-		ret = NET_RX_SUCCESS;
+	if (!neigh_node)
+		goto unlock;
 
-	} else
-		spin_unlock_bh(&bat_priv->orig_hash_lock);
+	if (!atomic_inc_not_zero(&neigh_node->refcount)) {
+		neigh_node = NULL;
+		goto unlock;
+	}
 
+	rcu_read_unlock();
+
+	/* create a copy of the skb, if needed, to modify it. */
+	if (skb_cow(skb, sizeof(struct ethhdr)) < 0)
+		goto out;
+
+	icmp_packet = (struct icmp_packet *)skb->data;
+
+	memcpy(icmp_packet->dst, icmp_packet->orig, ETH_ALEN);
+	memcpy(icmp_packet->orig,
+		bat_priv->primary_if->net_dev->dev_addr, ETH_ALEN);
+	icmp_packet->msg_type = TTL_EXCEEDED;
+	icmp_packet->ttl = TTL;
+
+	send_skb_packet(skb, neigh_node->if_incoming, neigh_node->addr);
+	ret = NET_RX_SUCCESS;
+	goto out;
+
+unlock:
+	rcu_read_unlock();
+out:
+	if (neigh_node)
+		neigh_node_free_ref(neigh_node);
+	if (orig_node)
+		orig_node_free_ref(orig_node);
 	return ret;
 }
 
 
-int recv_icmp_packet(struct sk_buff *skb, struct batman_if *recv_if)
+int recv_icmp_packet(struct sk_buff *skb, struct hard_iface *recv_if)
 {
 	struct bat_priv *bat_priv = netdev_priv(recv_if->soft_iface);
 	struct icmp_packet_rr *icmp_packet;
 	struct ethhdr *ethhdr;
-	struct orig_node *orig_node;
-	struct batman_if *batman_if;
+	struct orig_node *orig_node = NULL;
+	struct neigh_node *neigh_node = NULL;
 	int hdr_size = sizeof(struct icmp_packet);
-	int ret;
-	uint8_t dstaddr[ETH_ALEN];
+	int ret = NET_RX_DROP;
 
 	/**
 	 * we truncate all incoming icmp packets if they don't match our size
@@ -946,21 +1015,21 @@ int recv_icmp_packet(struct sk_buff *skb, struct batman_if *recv_if)
 
 	/* drop packet if it has not necessary minimum size */
 	if (unlikely(!pskb_may_pull(skb, hdr_size)))
-		return NET_RX_DROP;
+		goto out;
 
 	ethhdr = (struct ethhdr *)skb_mac_header(skb);
 
 	/* packet with unicast indication but broadcast recipient */
 	if (is_broadcast_ether_addr(ethhdr->h_dest))
-		return NET_RX_DROP;
+		goto out;
 
 	/* packet with broadcast sender address */
 	if (is_broadcast_ether_addr(ethhdr->h_source))
-		return NET_RX_DROP;
+		goto out;
 
 	/* not for me */
 	if (!is_my_mac(ethhdr->h_dest))
-		return NET_RX_DROP;
+		goto out;
 
 	icmp_packet = (struct icmp_packet_rr *)skb->data;
 
@@ -978,53 +1047,61 @@ int recv_icmp_packet(struct sk_buff *skb, struct batman_if *recv_if)
 
 	/* TTL exceeded */
 	if (icmp_packet->ttl < 2)
-		return recv_icmp_ttl_exceeded(bat_priv, skb, hdr_size);
-
-	ret = NET_RX_DROP;
+		return recv_icmp_ttl_exceeded(bat_priv, skb);
 
 	/* get routing information */
-	spin_lock_bh(&bat_priv->orig_hash_lock);
-	orig_node = ((struct orig_node *)
-		     hash_find(bat_priv->orig_hash, compare_orig, choose_orig,
-			       icmp_packet->dst));
+	rcu_read_lock();
+	orig_node = orig_hash_find(bat_priv, icmp_packet->dst);
 
-	if ((orig_node) && (orig_node->router)) {
+	if (!orig_node)
+		goto unlock;
 
-		/* don't lock while sending the packets ... we therefore
-		 * copy the required data before sending */
-		batman_if = orig_node->router->if_incoming;
-		memcpy(dstaddr, orig_node->router->addr, ETH_ALEN);
-		spin_unlock_bh(&bat_priv->orig_hash_lock);
+	neigh_node = orig_node->router;
 
-		/* create a copy of the skb, if needed, to modify it. */
-		if (skb_cow(skb, sizeof(struct ethhdr)) < 0)
-			return NET_RX_DROP;
+	if (!neigh_node)
+		goto unlock;
 
-		icmp_packet = (struct icmp_packet_rr *)skb->data;
-		ethhdr = (struct ethhdr *)skb_mac_header(skb);
+	if (!atomic_inc_not_zero(&neigh_node->refcount)) {
+		neigh_node = NULL;
+		goto unlock;
+	}
 
-		/* decrement ttl */
-		icmp_packet->ttl--;
+	rcu_read_unlock();
 
-		/* route it */
-		send_skb_packet(skb, batman_if, dstaddr);
-		ret = NET_RX_SUCCESS;
+	/* create a copy of the skb, if needed, to modify it. */
+	if (skb_cow(skb, sizeof(struct ethhdr)) < 0)
+		goto out;
 
-	} else
-		spin_unlock_bh(&bat_priv->orig_hash_lock);
+	icmp_packet = (struct icmp_packet_rr *)skb->data;
+
+	/* decrement ttl */
+	icmp_packet->ttl--;
 
+	/* route it */
+	send_skb_packet(skb, neigh_node->if_incoming, neigh_node->addr);
+	ret = NET_RX_SUCCESS;
+	goto out;
+
+unlock:
+	rcu_read_unlock();
+out:
+	if (neigh_node)
+		neigh_node_free_ref(neigh_node);
+	if (orig_node)
+		orig_node_free_ref(orig_node);
 	return ret;
 }
 
 /* find a suitable router for this originator, and use
- * bonding if possible. */
+ * bonding if possible. increases the found neighbors
+ * refcount.*/
 struct neigh_node *find_router(struct bat_priv *bat_priv,
 			       struct orig_node *orig_node,
-			       struct batman_if *recv_if)
+			       struct hard_iface *recv_if)
 {
 	struct orig_node *primary_orig_node;
 	struct orig_node *router_orig;
-	struct neigh_node *router, *first_candidate, *best_router;
+	struct neigh_node *router, *first_candidate, *tmp_neigh_node;
 	static uint8_t zero_mac[ETH_ALEN] = {0, 0, 0, 0, 0, 0};
 	int bonding_enabled;
 
@@ -1036,78 +1113,128 @@ struct neigh_node *find_router(struct bat_priv *bat_priv,
 
 	/* without bonding, the first node should
 	 * always choose the default router. */
-
 	bonding_enabled = atomic_read(&bat_priv->bonding);
 
-	if ((!recv_if) && (!bonding_enabled))
-		return orig_node->router;
-
+	rcu_read_lock();
+	/* select default router to output */
+	router = orig_node->router;
 	router_orig = orig_node->router->orig_node;
+	if (!router_orig || !atomic_inc_not_zero(&router->refcount)) {
+		rcu_read_unlock();
+		return NULL;
+	}
+
+	if ((!recv_if) && (!bonding_enabled))
+		goto return_router;
 
 	/* if we have something in the primary_addr, we can search
 	 * for a potential bonding candidate. */
-	if (memcmp(router_orig->primary_addr, zero_mac, ETH_ALEN) == 0)
-		return orig_node->router;
+	if (compare_eth(router_orig->primary_addr, zero_mac))
+		goto return_router;
 
 	/* find the orig_node which has the primary interface. might
 	 * even be the same as our router_orig in many cases */
 
-	if (memcmp(router_orig->primary_addr,
-				router_orig->orig, ETH_ALEN) == 0) {
+	if (compare_eth(router_orig->primary_addr, router_orig->orig)) {
 		primary_orig_node = router_orig;
 	} else {
-		primary_orig_node = hash_find(bat_priv->orig_hash, compare_orig,
-					       choose_orig,
-					       router_orig->primary_addr);
-
+		primary_orig_node = orig_hash_find(bat_priv,
+						   router_orig->primary_addr);
 		if (!primary_orig_node)
-			return orig_node->router;
+			goto return_router;
+
+		orig_node_free_ref(primary_orig_node);
 	}
 
 	/* with less than 2 candidates, we can't do any
 	 * bonding and prefer the original router. */
-
-	if (primary_orig_node->bond.candidates < 2)
-		return orig_node->router;
+	if (atomic_read(&primary_orig_node->bond_candidates) < 2)
+		goto return_router;
 
 
 	/* all nodes between should choose a candidate which
 	 * is is not on the interface where the packet came
 	 * in. */
-	first_candidate = primary_orig_node->bond.selected;
-	router = first_candidate;
+
+	neigh_node_free_ref(router);
+	first_candidate = NULL;
+	router = NULL;
 
 	if (bonding_enabled) {
 		/* in the bonding case, send the packets in a round
 		 * robin fashion over the remaining interfaces. */
-		do {
+
+		list_for_each_entry_rcu(tmp_neigh_node,
+				&primary_orig_node->bond_list, bonding_list) {
+			if (!first_candidate)
+				first_candidate = tmp_neigh_node;
 			/* recv_if == NULL on the first node. */
-			if (router->if_incoming != recv_if)
+			if (tmp_neigh_node->if_incoming != recv_if &&
+			    atomic_inc_not_zero(&tmp_neigh_node->refcount)) {
+				router = tmp_neigh_node;
 				break;
+			}
+		}
+
+		/* use the first candidate if nothing was found. */
+		if (!router && first_candidate &&
+		    atomic_inc_not_zero(&first_candidate->refcount))
+			router = first_candidate;
 
-			router = router->next_bond_candidate;
-		} while (router != first_candidate);
+		if (!router) {
+			rcu_read_unlock();
+			return NULL;
+		}
 
-		primary_orig_node->bond.selected = router->next_bond_candidate;
+		/* selected should point to the next element
+		 * after the current router */
+		spin_lock_bh(&primary_orig_node->neigh_list_lock);
+		/* this is a list_move(), which unfortunately
+		 * does not exist as rcu version */
+		list_del_rcu(&primary_orig_node->bond_list);
+		list_add_rcu(&primary_orig_node->bond_list,
+				&router->bonding_list);
+		spin_unlock_bh(&primary_orig_node->neigh_list_lock);
 
 	} else {
 		/* if bonding is disabled, use the best of the
 		 * remaining candidates which are not using
 		 * this interface. */
-		best_router = first_candidate;
+		list_for_each_entry_rcu(tmp_neigh_node,
+			&primary_orig_node->bond_list, bonding_list) {
+			if (!first_candidate)
+				first_candidate = tmp_neigh_node;
 
-		do {
 			/* recv_if == NULL on the first node. */
-			if ((router->if_incoming != recv_if) &&
-				(router->tq_avg > best_router->tq_avg))
-					best_router = router;
+			if (tmp_neigh_node->if_incoming == recv_if)
+				continue;
 
-			router = router->next_bond_candidate;
-		} while (router != first_candidate);
+			if (!atomic_inc_not_zero(&tmp_neigh_node->refcount))
+				continue;
 
-		router = best_router;
-	}
+			/* if we don't have a router yet
+			 * or this one is better, choose it. */
+			if ((!router) ||
+			    (tmp_neigh_node->tq_avg > router->tq_avg)) {
+				/* decrement refcount of
+				 * previously selected router */
+				if (router)
+					neigh_node_free_ref(router);
+
+				router = tmp_neigh_node;
+				atomic_inc_not_zero(&router->refcount);
+			}
+
+			neigh_node_free_ref(tmp_neigh_node);
+		}
 
+		/* use the first candidate if nothing was found. */
+		if (!router && first_candidate &&
+		    atomic_inc_not_zero(&first_candidate->refcount))
+			router = first_candidate;
+	}
+return_router:
+	rcu_read_unlock();
 	return router;
 }
 
@@ -1136,17 +1263,14 @@ static int check_unicast_packet(struct sk_buff *skb, int hdr_size)
 	return 0;
 }
 
-int route_unicast_packet(struct sk_buff *skb, struct batman_if *recv_if,
-			 int hdr_size)
+int route_unicast_packet(struct sk_buff *skb, struct hard_iface *recv_if)
 {
 	struct bat_priv *bat_priv = netdev_priv(recv_if->soft_iface);
-	struct orig_node *orig_node;
-	struct neigh_node *router;
-	struct batman_if *batman_if;
-	uint8_t dstaddr[ETH_ALEN];
+	struct orig_node *orig_node = NULL;
+	struct neigh_node *neigh_node = NULL;
 	struct unicast_packet *unicast_packet;
 	struct ethhdr *ethhdr = (struct ethhdr *)skb_mac_header(skb);
-	int ret;
+	int ret = NET_RX_DROP;
 	struct sk_buff *new_skb;
 
 	unicast_packet = (struct unicast_packet *)skb->data;
@@ -1156,53 +1280,51 @@ int route_unicast_packet(struct sk_buff *skb, struct batman_if *recv_if,
 		pr_debug("Warning - can't forward unicast packet from %pM to "
 			 "%pM: ttl exceeded\n", ethhdr->h_source,
 			 unicast_packet->dest);
-		return NET_RX_DROP;
+		goto out;
 	}
 
 	/* get routing information */
-	spin_lock_bh(&bat_priv->orig_hash_lock);
-	orig_node = ((struct orig_node *)
-		     hash_find(bat_priv->orig_hash, compare_orig, choose_orig,
-			       unicast_packet->dest));
-
-	router = find_router(bat_priv, orig_node, recv_if);
+	rcu_read_lock();
+	orig_node = orig_hash_find(bat_priv, unicast_packet->dest);
 
-	if (!router) {
-		spin_unlock_bh(&bat_priv->orig_hash_lock);
-		return NET_RX_DROP;
-	}
+	if (!orig_node)
+		goto unlock;
 
-	/* don't lock while sending the packets ... we therefore
-	 * copy the required data before sending */
+	rcu_read_unlock();
 
-	batman_if = router->if_incoming;
-	memcpy(dstaddr, router->addr, ETH_ALEN);
+	/* find_router() increases neigh_nodes refcount if found. */
+	neigh_node = find_router(bat_priv, orig_node, recv_if);
 
-	spin_unlock_bh(&bat_priv->orig_hash_lock);
+	if (!neigh_node)
+		goto out;
 
 	/* create a copy of the skb, if needed, to modify it. */
 	if (skb_cow(skb, sizeof(struct ethhdr)) < 0)
-		return NET_RX_DROP;
+		goto out;
 
 	unicast_packet = (struct unicast_packet *)skb->data;
 
 	if (unicast_packet->packet_type == BAT_UNICAST &&
 	    atomic_read(&bat_priv->fragmentation) &&
-	    skb->len > batman_if->net_dev->mtu)
-		return frag_send_skb(skb, bat_priv, batman_if,
-				     dstaddr);
+	    skb->len > neigh_node->if_incoming->net_dev->mtu) {
+		ret = frag_send_skb(skb, bat_priv,
+				    neigh_node->if_incoming, neigh_node->addr);
+		goto out;
+	}
 
 	if (unicast_packet->packet_type == BAT_UNICAST_FRAG &&
-	    2 * skb->len - hdr_size <= batman_if->net_dev->mtu) {
+	    frag_can_reassemble(skb, neigh_node->if_incoming->net_dev->mtu)) {
 
 		ret = frag_reassemble_skb(skb, bat_priv, &new_skb);
 
 		if (ret == NET_RX_DROP)
-			return NET_RX_DROP;
+			goto out;
 
 		/* packet was buffered for late merge */
-		if (!new_skb)
-			return NET_RX_SUCCESS;
+		if (!new_skb) {
+			ret = NET_RX_SUCCESS;
+			goto out;
+		}
 
 		skb = new_skb;
 		unicast_packet = (struct unicast_packet *)skb->data;
@@ -1212,12 +1334,21 @@ int route_unicast_packet(struct sk_buff *skb, struct batman_if *recv_if,
 	unicast_packet->ttl--;
 
 	/* route it */
-	send_skb_packet(skb, batman_if, dstaddr);
+	send_skb_packet(skb, neigh_node->if_incoming, neigh_node->addr);
+	ret = NET_RX_SUCCESS;
+	goto out;
 
-	return NET_RX_SUCCESS;
+unlock:
+	rcu_read_unlock();
+out:
+	if (neigh_node)
+		neigh_node_free_ref(neigh_node);
+	if (orig_node)
+		orig_node_free_ref(orig_node);
+	return ret;
 }
 
-int recv_unicast_packet(struct sk_buff *skb, struct batman_if *recv_if)
+int recv_unicast_packet(struct sk_buff *skb, struct hard_iface *recv_if)
 {
 	struct unicast_packet *unicast_packet;
 	int hdr_size = sizeof(struct unicast_packet);
@@ -1233,10 +1364,10 @@ int recv_unicast_packet(struct sk_buff *skb, struct batman_if *recv_if)
 		return NET_RX_SUCCESS;
 	}
 
-	return route_unicast_packet(skb, recv_if, hdr_size);
+	return route_unicast_packet(skb, recv_if);
 }
 
-int recv_ucast_frag_packet(struct sk_buff *skb, struct batman_if *recv_if)
+int recv_ucast_frag_packet(struct sk_buff *skb, struct hard_iface *recv_if)
 {
 	struct bat_priv *bat_priv = netdev_priv(recv_if->soft_iface);
 	struct unicast_frag_packet *unicast_packet;
@@ -1266,89 +1397,96 @@ int recv_ucast_frag_packet(struct sk_buff *skb, struct batman_if *recv_if)
 		return NET_RX_SUCCESS;
 	}
 
-	return route_unicast_packet(skb, recv_if, hdr_size);
+	return route_unicast_packet(skb, recv_if);
 }
 
 
-int recv_bcast_packet(struct sk_buff *skb, struct batman_if *recv_if)
+int recv_bcast_packet(struct sk_buff *skb, struct hard_iface *recv_if)
 {
 	struct bat_priv *bat_priv = netdev_priv(recv_if->soft_iface);
-	struct orig_node *orig_node;
+	struct orig_node *orig_node = NULL;
 	struct bcast_packet *bcast_packet;
 	struct ethhdr *ethhdr;
 	int hdr_size = sizeof(struct bcast_packet);
+	int ret = NET_RX_DROP;
 	int32_t seq_diff;
 
 	/* drop packet if it has not necessary minimum size */
 	if (unlikely(!pskb_may_pull(skb, hdr_size)))
-		return NET_RX_DROP;
+		goto out;
 
 	ethhdr = (struct ethhdr *)skb_mac_header(skb);
 
 	/* packet with broadcast indication but unicast recipient */
 	if (!is_broadcast_ether_addr(ethhdr->h_dest))
-		return NET_RX_DROP;
+		goto out;
 
 	/* packet with broadcast sender address */
 	if (is_broadcast_ether_addr(ethhdr->h_source))
-		return NET_RX_DROP;
+		goto out;
 
 	/* ignore broadcasts sent by myself */
 	if (is_my_mac(ethhdr->h_source))
-		return NET_RX_DROP;
+		goto out;
 
 	bcast_packet = (struct bcast_packet *)skb->data;
 
 	/* ignore broadcasts originated by myself */
 	if (is_my_mac(bcast_packet->orig))
-		return NET_RX_DROP;
+		goto out;
 
 	if (bcast_packet->ttl < 2)
-		return NET_RX_DROP;
+		goto out;
 
-	spin_lock_bh(&bat_priv->orig_hash_lock);
-	orig_node = ((struct orig_node *)
-		     hash_find(bat_priv->orig_hash, compare_orig, choose_orig,
-			       bcast_packet->orig));
+	rcu_read_lock();
+	orig_node = orig_hash_find(bat_priv, bcast_packet->orig);
 
-	if (!orig_node) {
-		spin_unlock_bh(&bat_priv->orig_hash_lock);
-		return NET_RX_DROP;
-	}
+	if (!orig_node)
+		goto rcu_unlock;
+
+	rcu_read_unlock();
+
+	spin_lock_bh(&orig_node->bcast_seqno_lock);
 
 	/* check whether the packet is a duplicate */
-	if (get_bit_status(orig_node->bcast_bits,
-			   orig_node->last_bcast_seqno,
-			   ntohl(bcast_packet->seqno))) {
-		spin_unlock_bh(&bat_priv->orig_hash_lock);
-		return NET_RX_DROP;
-	}
+	if (get_bit_status(orig_node->bcast_bits, orig_node->last_bcast_seqno,
+			   ntohl(bcast_packet->seqno)))
+		goto spin_unlock;
 
 	seq_diff = ntohl(bcast_packet->seqno) - orig_node->last_bcast_seqno;
 
 	/* check whether the packet is old and the host just restarted. */
 	if (window_protected(bat_priv, seq_diff,
-			     &orig_node->bcast_seqno_reset)) {
-		spin_unlock_bh(&bat_priv->orig_hash_lock);
-		return NET_RX_DROP;
-	}
+			     &orig_node->bcast_seqno_reset))
+		goto spin_unlock;
 
 	/* mark broadcast in flood history, update window position
 	 * if required. */
 	if (bit_get_packet(bat_priv, orig_node->bcast_bits, seq_diff, 1))
 		orig_node->last_bcast_seqno = ntohl(bcast_packet->seqno);
 
-	spin_unlock_bh(&bat_priv->orig_hash_lock);
+	spin_unlock_bh(&orig_node->bcast_seqno_lock);
+
 	/* rebroadcast packet */
 	add_bcast_packet_to_list(bat_priv, skb);
 
 	/* broadcast for me */
 	interface_rx(recv_if->soft_iface, skb, recv_if, hdr_size);
+	ret = NET_RX_SUCCESS;
+	goto out;
 
-	return NET_RX_SUCCESS;
+rcu_unlock:
+	rcu_read_unlock();
+	goto out;
+spin_unlock:
+	spin_unlock_bh(&orig_node->bcast_seqno_lock);
+out:
+	if (orig_node)
+		orig_node_free_ref(orig_node);
+	return ret;
 }
 
-int recv_vis_packet(struct sk_buff *skb, struct batman_if *recv_if)
+int recv_vis_packet(struct sk_buff *skb, struct hard_iface *recv_if)
 {
 	struct vis_packet *vis_packet;
 	struct ethhdr *ethhdr;
diff --git a/net/batman-adv/routing.h b/net/batman-adv/routing.h
index f108f23..b5a064c 100644
--- a/net/batman-adv/routing.h
+++ b/net/batman-adv/routing.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors:
+ * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
@@ -22,27 +22,25 @@
 #ifndef _NET_BATMAN_ADV_ROUTING_H_
 #define _NET_BATMAN_ADV_ROUTING_H_
 
-#include "types.h"
-
-void slide_own_bcast_window(struct batman_if *batman_if);
+void slide_own_bcast_window(struct hard_iface *hard_iface);
 void receive_bat_packet(struct ethhdr *ethhdr,
 				struct batman_packet *batman_packet,
 				unsigned char *hna_buff, int hna_buff_len,
-				struct batman_if *if_incoming);
+				struct hard_iface *if_incoming);
 void update_routes(struct bat_priv *bat_priv, struct orig_node *orig_node,
 		   struct neigh_node *neigh_node, unsigned char *hna_buff,
 		   int hna_buff_len);
-int route_unicast_packet(struct sk_buff *skb, struct batman_if *recv_if,
-			 int hdr_size);
-int recv_icmp_packet(struct sk_buff *skb, struct batman_if *recv_if);
-int recv_unicast_packet(struct sk_buff *skb, struct batman_if *recv_if);
-int recv_ucast_frag_packet(struct sk_buff *skb, struct batman_if *recv_if);
-int recv_bcast_packet(struct sk_buff *skb, struct batman_if *recv_if);
-int recv_vis_packet(struct sk_buff *skb, struct batman_if *recv_if);
-int recv_bat_packet(struct sk_buff *skb, struct batman_if *recv_if);
+int route_unicast_packet(struct sk_buff *skb, struct hard_iface *recv_if);
+int recv_icmp_packet(struct sk_buff *skb, struct hard_iface *recv_if);
+int recv_unicast_packet(struct sk_buff *skb, struct hard_iface *recv_if);
+int recv_ucast_frag_packet(struct sk_buff *skb, struct hard_iface *recv_if);
+int recv_bcast_packet(struct sk_buff *skb, struct hard_iface *recv_if);
+int recv_vis_packet(struct sk_buff *skb, struct hard_iface *recv_if);
+int recv_bat_packet(struct sk_buff *skb, struct hard_iface *recv_if);
 struct neigh_node *find_router(struct bat_priv *bat_priv,
-		struct orig_node *orig_node, struct batman_if *recv_if);
-void update_bonding_candidates(struct bat_priv *bat_priv,
-			       struct orig_node *orig_node);
+			       struct orig_node *orig_node,
+			       struct hard_iface *recv_if);
+void bonding_candidate_del(struct orig_node *orig_node,
+			   struct neigh_node *neigh_node);
 
 #endif /* _NET_BATMAN_ADV_ROUTING_H_ */
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index b89b9f7..d49e54d 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors:
+ * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
@@ -25,7 +25,6 @@
 #include "translation-table.h"
 #include "soft-interface.h"
 #include "hard-interface.h"
-#include "types.h"
 #include "vis.h"
 #include "aggregation.h"
 #include "gateway_common.h"
@@ -49,7 +48,7 @@ static unsigned long own_send_time(struct bat_priv *bat_priv)
 }
 
 /* when do we schedule a forwarded packet to be sent */
-static unsigned long forward_send_time(struct bat_priv *bat_priv)
+static unsigned long forward_send_time(void)
 {
 	return jiffies + msecs_to_jiffies(random32() % (JITTER/2));
 }
@@ -57,20 +56,20 @@ static unsigned long forward_send_time(struct bat_priv *bat_priv)
 /* send out an already prepared packet to the given address via the
  * specified batman interface */
 int send_skb_packet(struct sk_buff *skb,
-				struct batman_if *batman_if,
+				struct hard_iface *hard_iface,
 				uint8_t *dst_addr)
 {
 	struct ethhdr *ethhdr;
 
-	if (batman_if->if_status != IF_ACTIVE)
+	if (hard_iface->if_status != IF_ACTIVE)
 		goto send_skb_err;
 
-	if (unlikely(!batman_if->net_dev))
+	if (unlikely(!hard_iface->net_dev))
 		goto send_skb_err;
 
-	if (!(batman_if->net_dev->flags & IFF_UP)) {
+	if (!(hard_iface->net_dev->flags & IFF_UP)) {
 		pr_warning("Interface %s is not up - can't send packet via "
-			   "that interface!\n", batman_if->net_dev->name);
+			   "that interface!\n", hard_iface->net_dev->name);
 		goto send_skb_err;
 	}
 
@@ -81,7 +80,7 @@ int send_skb_packet(struct sk_buff *skb,
 	skb_reset_mac_header(skb);
 
 	ethhdr = (struct ethhdr *) skb_mac_header(skb);
-	memcpy(ethhdr->h_source, batman_if->net_dev->dev_addr, ETH_ALEN);
+	memcpy(ethhdr->h_source, hard_iface->net_dev->dev_addr, ETH_ALEN);
 	memcpy(ethhdr->h_dest, dst_addr, ETH_ALEN);
 	ethhdr->h_proto = __constant_htons(ETH_P_BATMAN);
 
@@ -89,7 +88,7 @@ int send_skb_packet(struct sk_buff *skb,
 	skb->priority = TC_PRIO_CONTROL;
 	skb->protocol = __constant_htons(ETH_P_BATMAN);
 
-	skb->dev = batman_if->net_dev;
+	skb->dev = hard_iface->net_dev;
 
 	/* dev_queue_xmit() returns a negative result on error.	 However on
 	 * congestion and traffic shaping, it drops and returns NET_XMIT_DROP
@@ -103,16 +102,16 @@ send_skb_err:
 
 /* Send a packet to a given interface */
 static void send_packet_to_if(struct forw_packet *forw_packet,
-			      struct batman_if *batman_if)
+			      struct hard_iface *hard_iface)
 {
-	struct bat_priv *bat_priv = netdev_priv(batman_if->soft_iface);
+	struct bat_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
 	char *fwd_str;
 	uint8_t packet_num;
 	int16_t buff_pos;
 	struct batman_packet *batman_packet;
 	struct sk_buff *skb;
 
-	if (batman_if->if_status != IF_ACTIVE)
+	if (hard_iface->if_status != IF_ACTIVE)
 		return;
 
 	packet_num = 0;
@@ -127,7 +126,7 @@ static void send_packet_to_if(struct forw_packet *forw_packet,
 		/* we might have aggregated direct link packets with an
 		 * ordinary base packet */
 		if ((forw_packet->direct_link_flags & (1 << packet_num)) &&
-		    (forw_packet->if_incoming == batman_if))
+		    (forw_packet->if_incoming == hard_iface))
 			batman_packet->flags |= DIRECTLINK;
 		else
 			batman_packet->flags &= ~DIRECTLINK;
@@ -143,7 +142,8 @@ static void send_packet_to_if(struct forw_packet *forw_packet,
 			batman_packet->tq, batman_packet->ttl,
 			(batman_packet->flags & DIRECTLINK ?
 			 "on" : "off"),
-			batman_if->net_dev->name, batman_if->net_dev->dev_addr);
+			hard_iface->net_dev->name,
+			hard_iface->net_dev->dev_addr);
 
 		buff_pos += sizeof(struct batman_packet) +
 			(batman_packet->num_hna * ETH_ALEN);
@@ -155,13 +155,13 @@ static void send_packet_to_if(struct forw_packet *forw_packet,
 	/* create clone because function is called more than once */
 	skb = skb_clone(forw_packet->skb, GFP_ATOMIC);
 	if (skb)
-		send_skb_packet(skb, batman_if, broadcast_addr);
+		send_skb_packet(skb, hard_iface, broadcast_addr);
 }
 
 /* send a batman packet */
 static void send_packet(struct forw_packet *forw_packet)
 {
-	struct batman_if *batman_if;
+	struct hard_iface *hard_iface;
 	struct net_device *soft_iface;
 	struct bat_priv *bat_priv;
 	struct batman_packet *batman_packet =
@@ -205,17 +205,17 @@ static void send_packet(struct forw_packet *forw_packet)
 
 	/* broadcast on every interface */
 	rcu_read_lock();
-	list_for_each_entry_rcu(batman_if, &if_list, list) {
-		if (batman_if->soft_iface != soft_iface)
+	list_for_each_entry_rcu(hard_iface, &hardif_list, list) {
+		if (hard_iface->soft_iface != soft_iface)
 			continue;
 
-		send_packet_to_if(forw_packet, batman_if);
+		send_packet_to_if(forw_packet, hard_iface);
 	}
 	rcu_read_unlock();
 }
 
 static void rebuild_batman_packet(struct bat_priv *bat_priv,
-				  struct batman_if *batman_if)
+				  struct hard_iface *hard_iface)
 {
 	int new_len;
 	unsigned char *new_buff;
@@ -227,7 +227,7 @@ static void rebuild_batman_packet(struct bat_priv *bat_priv,
 
 	/* keep old buffer if kmalloc should fail */
 	if (new_buff) {
-		memcpy(new_buff, batman_if->packet_buff,
+		memcpy(new_buff, hard_iface->packet_buff,
 		       sizeof(struct batman_packet));
 		batman_packet = (struct batman_packet *)new_buff;
 
@@ -235,21 +235,21 @@ static void rebuild_batman_packet(struct bat_priv *bat_priv,
 				new_buff + sizeof(struct batman_packet),
 				new_len - sizeof(struct batman_packet));
 
-		kfree(batman_if->packet_buff);
-		batman_if->packet_buff = new_buff;
-		batman_if->packet_len = new_len;
+		kfree(hard_iface->packet_buff);
+		hard_iface->packet_buff = new_buff;
+		hard_iface->packet_len = new_len;
 	}
 }
 
-void schedule_own_packet(struct batman_if *batman_if)
+void schedule_own_packet(struct hard_iface *hard_iface)
 {
-	struct bat_priv *bat_priv = netdev_priv(batman_if->soft_iface);
+	struct bat_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
 	unsigned long send_time;
 	struct batman_packet *batman_packet;
 	int vis_server;
 
-	if ((batman_if->if_status == IF_NOT_IN_USE) ||
-	    (batman_if->if_status == IF_TO_BE_REMOVED))
+	if ((hard_iface->if_status == IF_NOT_IN_USE) ||
+	    (hard_iface->if_status == IF_TO_BE_REMOVED))
 		return;
 
 	vis_server = atomic_read(&bat_priv->vis_mode);
@@ -261,51 +261,51 @@ void schedule_own_packet(struct batman_if *batman_if)
 	 * outdated packets (especially uninitialized mac addresses) in the
 	 * packet queue
 	 */
-	if (batman_if->if_status == IF_TO_BE_ACTIVATED)
-		batman_if->if_status = IF_ACTIVE;
+	if (hard_iface->if_status == IF_TO_BE_ACTIVATED)
+		hard_iface->if_status = IF_ACTIVE;
 
 	/* if local hna has changed and interface is a primary interface */
 	if ((atomic_read(&bat_priv->hna_local_changed)) &&
-	    (batman_if == bat_priv->primary_if))
-		rebuild_batman_packet(bat_priv, batman_if);
+	    (hard_iface == bat_priv->primary_if))
+		rebuild_batman_packet(bat_priv, hard_iface);
 
 	/**
 	 * NOTE: packet_buff might just have been re-allocated in
 	 * rebuild_batman_packet()
 	 */
-	batman_packet = (struct batman_packet *)batman_if->packet_buff;
+	batman_packet = (struct batman_packet *)hard_iface->packet_buff;
 
 	/* change sequence number to network order */
 	batman_packet->seqno =
-		htonl((uint32_t)atomic_read(&batman_if->seqno));
+		htonl((uint32_t)atomic_read(&hard_iface->seqno));
 
 	if (vis_server == VIS_TYPE_SERVER_SYNC)
 		batman_packet->flags |= VIS_SERVER;
 	else
 		batman_packet->flags &= ~VIS_SERVER;
 
-	if ((batman_if == bat_priv->primary_if) &&
+	if ((hard_iface == bat_priv->primary_if) &&
 	    (atomic_read(&bat_priv->gw_mode) == GW_MODE_SERVER))
 		batman_packet->gw_flags =
 				(uint8_t)atomic_read(&bat_priv->gw_bandwidth);
 	else
 		batman_packet->gw_flags = 0;
 
-	atomic_inc(&batman_if->seqno);
+	atomic_inc(&hard_iface->seqno);
 
-	slide_own_bcast_window(batman_if);
+	slide_own_bcast_window(hard_iface);
 	send_time = own_send_time(bat_priv);
 	add_bat_packet_to_list(bat_priv,
-			       batman_if->packet_buff,
-			       batman_if->packet_len,
-			       batman_if, 1, send_time);
+			       hard_iface->packet_buff,
+			       hard_iface->packet_len,
+			       hard_iface, 1, send_time);
 }
 
 void schedule_forward_packet(struct orig_node *orig_node,
 			     struct ethhdr *ethhdr,
 			     struct batman_packet *batman_packet,
 			     uint8_t directlink, int hna_buff_len,
-			     struct batman_if *if_incoming)
+			     struct hard_iface *if_incoming)
 {
 	struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
 	unsigned char in_tq, in_ttl, tq_avg = 0;
@@ -327,7 +327,7 @@ void schedule_forward_packet(struct orig_node *orig_node,
 	if ((orig_node->router) && (orig_node->router->tq_avg != 0)) {
 
 		/* rebroadcast ogm of best ranking neighbor as is */
-		if (!compare_orig(orig_node->router->addr, ethhdr->h_source)) {
+		if (!compare_eth(orig_node->router->addr, ethhdr->h_source)) {
 			batman_packet->tq = orig_node->router->tq_avg;
 
 			if (orig_node->router->last_ttl)
@@ -356,7 +356,7 @@ void schedule_forward_packet(struct orig_node *orig_node,
 	else
 		batman_packet->flags &= ~DIRECTLINK;
 
-	send_time = forward_send_time(bat_priv);
+	send_time = forward_send_time();
 	add_bat_packet_to_list(bat_priv,
 			       (unsigned char *)batman_packet,
 			       sizeof(struct batman_packet) + hna_buff_len,
@@ -444,7 +444,7 @@ out:
 
 static void send_outstanding_bcast_packet(struct work_struct *work)
 {
-	struct batman_if *batman_if;
+	struct hard_iface *hard_iface;
 	struct delayed_work *delayed_work =
 		container_of(work, struct delayed_work, work);
 	struct forw_packet *forw_packet =
@@ -462,14 +462,14 @@ static void send_outstanding_bcast_packet(struct work_struct *work)
 
 	/* rebroadcast packet */
 	rcu_read_lock();
-	list_for_each_entry_rcu(batman_if, &if_list, list) {
-		if (batman_if->soft_iface != soft_iface)
+	list_for_each_entry_rcu(hard_iface, &hardif_list, list) {
+		if (hard_iface->soft_iface != soft_iface)
 			continue;
 
 		/* send a copy of the saved skb */
 		skb1 = skb_clone(forw_packet->skb, GFP_ATOMIC);
 		if (skb1)
-			send_skb_packet(skb1, batman_if, broadcast_addr);
+			send_skb_packet(skb1, hard_iface, broadcast_addr);
 	}
 	rcu_read_unlock();
 
@@ -522,15 +522,15 @@ out:
 }
 
 void purge_outstanding_packets(struct bat_priv *bat_priv,
-			       struct batman_if *batman_if)
+			       struct hard_iface *hard_iface)
 {
 	struct forw_packet *forw_packet;
 	struct hlist_node *tmp_node, *safe_tmp_node;
 
-	if (batman_if)
+	if (hard_iface)
 		bat_dbg(DBG_BATMAN, bat_priv,
 			"purge_outstanding_packets(): %s\n",
-			batman_if->net_dev->name);
+			hard_iface->net_dev->name);
 	else
 		bat_dbg(DBG_BATMAN, bat_priv,
 			"purge_outstanding_packets()\n");
@@ -544,8 +544,8 @@ void purge_outstanding_packets(struct bat_priv *bat_priv,
 		 * if purge_outstanding_packets() was called with an argmument
 		 * we delete only packets belonging to the given interface
 		 */
-		if ((batman_if) &&
-		    (forw_packet->if_incoming != batman_if))
+		if ((hard_iface) &&
+		    (forw_packet->if_incoming != hard_iface))
 			continue;
 
 		spin_unlock_bh(&bat_priv->forw_bcast_list_lock);
@@ -568,8 +568,8 @@ void purge_outstanding_packets(struct bat_priv *bat_priv,
 		 * if purge_outstanding_packets() was called with an argmument
 		 * we delete only packets belonging to the given interface
 		 */
-		if ((batman_if) &&
-		    (forw_packet->if_incoming != batman_if))
+		if ((hard_iface) &&
+		    (forw_packet->if_incoming != hard_iface))
 			continue;
 
 		spin_unlock_bh(&bat_priv->forw_bat_list_lock);
diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h
index c4cefa8..7b2ff19 100644
--- a/net/batman-adv/send.h
+++ b/net/batman-adv/send.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors:
+ * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
@@ -22,20 +22,18 @@
 #ifndef _NET_BATMAN_ADV_SEND_H_
 #define _NET_BATMAN_ADV_SEND_H_
 
-#include "types.h"
-
 int send_skb_packet(struct sk_buff *skb,
-				struct batman_if *batman_if,
+				struct hard_iface *hard_iface,
 				uint8_t *dst_addr);
-void schedule_own_packet(struct batman_if *batman_if);
+void schedule_own_packet(struct hard_iface *hard_iface);
 void schedule_forward_packet(struct orig_node *orig_node,
 			     struct ethhdr *ethhdr,
 			     struct batman_packet *batman_packet,
 			     uint8_t directlink, int hna_buff_len,
-			     struct batman_if *if_outgoing);
+			     struct hard_iface *if_outgoing);
 int add_bcast_packet_to_list(struct bat_priv *bat_priv, struct sk_buff *skb);
 void send_outstanding_bat_packet(struct work_struct *work);
 void purge_outstanding_packets(struct bat_priv *bat_priv,
-			       struct batman_if *batman_if);
+			       struct hard_iface *hard_iface);
 
 #endif /* _NET_BATMAN_ADV_SEND_H_ */
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index e89ede1..9ed2614 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors:
+ * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
@@ -26,18 +26,15 @@
 #include "send.h"
 #include "bat_debugfs.h"
 #include "translation-table.h"
-#include "types.h"
 #include "hash.h"
 #include "gateway_common.h"
 #include "gateway_client.h"
-#include "send.h"
 #include "bat_sysfs.h"
 #include <linux/slab.h>
 #include <linux/ethtool.h>
 #include <linux/etherdevice.h>
 #include <linux/if_vlan.h>
 #include "unicast.h"
-#include "routing.h"
 
 
 static int bat_get_settings(struct net_device *dev, struct ethtool_cmd *cmd);
@@ -79,20 +76,18 @@ int my_skb_head_push(struct sk_buff *skb, unsigned int len)
 	return 0;
 }
 
-static void softif_neigh_free_ref(struct kref *refcount)
+static void softif_neigh_free_rcu(struct rcu_head *rcu)
 {
 	struct softif_neigh *softif_neigh;
 
-	softif_neigh = container_of(refcount, struct softif_neigh, refcount);
+	softif_neigh = container_of(rcu, struct softif_neigh, rcu);
 	kfree(softif_neigh);
 }
 
-static void softif_neigh_free_rcu(struct rcu_head *rcu)
+static void softif_neigh_free_ref(struct softif_neigh *softif_neigh)
 {
-	struct softif_neigh *softif_neigh;
-
-	softif_neigh = container_of(rcu, struct softif_neigh, rcu);
-	kref_put(&softif_neigh->refcount, softif_neigh_free_ref);
+	if (atomic_dec_and_test(&softif_neigh->refcount))
+		call_rcu(&softif_neigh->rcu, softif_neigh_free_rcu);
 }
 
 void softif_neigh_purge(struct bat_priv *bat_priv)
@@ -119,11 +114,10 @@ void softif_neigh_purge(struct bat_priv *bat_priv)
 				 softif_neigh->addr, softif_neigh->vid);
 			softif_neigh_tmp = bat_priv->softif_neigh;
 			bat_priv->softif_neigh = NULL;
-			kref_put(&softif_neigh_tmp->refcount,
-				 softif_neigh_free_ref);
+			softif_neigh_free_ref(softif_neigh_tmp);
 		}
 
-		call_rcu(&softif_neigh->rcu, softif_neigh_free_rcu);
+		softif_neigh_free_ref(softif_neigh);
 	}
 
 	spin_unlock_bh(&bat_priv->softif_neigh_lock);
@@ -138,14 +132,17 @@ static struct softif_neigh *softif_neigh_get(struct bat_priv *bat_priv,
 	rcu_read_lock();
 	hlist_for_each_entry_rcu(softif_neigh, node,
 				 &bat_priv->softif_neigh_list, list) {
-		if (memcmp(softif_neigh->addr, addr, ETH_ALEN) != 0)
+		if (!compare_eth(softif_neigh->addr, addr))
 			continue;
 
 		if (softif_neigh->vid != vid)
 			continue;
 
+		if (!atomic_inc_not_zero(&softif_neigh->refcount))
+			continue;
+
 		softif_neigh->last_seen = jiffies;
-		goto found;
+		goto out;
 	}
 
 	softif_neigh = kzalloc(sizeof(struct softif_neigh), GFP_ATOMIC);
@@ -155,15 +152,14 @@ static struct softif_neigh *softif_neigh_get(struct bat_priv *bat_priv,
 	memcpy(softif_neigh->addr, addr, ETH_ALEN);
 	softif_neigh->vid = vid;
 	softif_neigh->last_seen = jiffies;
-	kref_init(&softif_neigh->refcount);
+	/* initialize with 2 - caller decrements counter by one */
+	atomic_set(&softif_neigh->refcount, 2);
 
 	INIT_HLIST_NODE(&softif_neigh->list);
 	spin_lock_bh(&bat_priv->softif_neigh_lock);
 	hlist_add_head_rcu(&softif_neigh->list, &bat_priv->softif_neigh_list);
 	spin_unlock_bh(&bat_priv->softif_neigh_lock);
 
-found:
-	kref_get(&softif_neigh->refcount);
 out:
 	rcu_read_unlock();
 	return softif_neigh;
@@ -175,8 +171,6 @@ int softif_neigh_seq_print_text(struct seq_file *seq, void *offset)
 	struct bat_priv *bat_priv = netdev_priv(net_dev);
 	struct softif_neigh *softif_neigh;
 	struct hlist_node *node;
-	size_t buf_size, pos;
-	char *buff;
 
 	if (!bat_priv->primary_if) {
 		return seq_printf(seq, "BATMAN mesh %s disabled - "
@@ -186,33 +180,15 @@ int softif_neigh_seq_print_text(struct seq_file *seq, void *offset)
 
 	seq_printf(seq, "Softif neighbor list (%s)\n", net_dev->name);
 
-	buf_size = 1;
-	/* Estimate length for: "   xx:xx:xx:xx:xx:xx\n" */
 	rcu_read_lock();
 	hlist_for_each_entry_rcu(softif_neigh, node,
 				 &bat_priv->softif_neigh_list, list)
-		buf_size += 30;
-	rcu_read_unlock();
-
-	buff = kmalloc(buf_size, GFP_ATOMIC);
-	if (!buff)
-		return -ENOMEM;
-
-	buff[0] = '\0';
-	pos = 0;
-
-	rcu_read_lock();
-	hlist_for_each_entry_rcu(softif_neigh, node,
-				 &bat_priv->softif_neigh_list, list) {
-		pos += snprintf(buff + pos, 31, "%s %pM (vid: %d)\n",
+		seq_printf(seq, "%s %pM (vid: %d)\n",
 				bat_priv->softif_neigh == softif_neigh
 				? "=>" : "  ", softif_neigh->addr,
 				softif_neigh->vid);
-	}
 	rcu_read_unlock();
 
-	seq_printf(seq, "%s", buff);
-	kfree(buff);
 	return 0;
 }
 
@@ -267,7 +243,7 @@ static void softif_batman_recv(struct sk_buff *skb, struct net_device *dev,
 			 softif_neigh->addr, softif_neigh->vid);
 		softif_neigh_tmp = bat_priv->softif_neigh;
 		bat_priv->softif_neigh = softif_neigh;
-		kref_put(&softif_neigh_tmp->refcount, softif_neigh_free_ref);
+		softif_neigh_free_ref(softif_neigh_tmp);
 		/* we need to hold the additional reference */
 		goto err;
 	}
@@ -285,7 +261,7 @@ static void softif_batman_recv(struct sk_buff *skb, struct net_device *dev,
 	}
 
 out:
-	kref_put(&softif_neigh->refcount, softif_neigh_free_ref);
+	softif_neigh_free_ref(softif_neigh);
 err:
 	kfree_skb(skb);
 	return;
@@ -438,7 +414,7 @@ end:
 }
 
 void interface_rx(struct net_device *soft_iface,
-		  struct sk_buff *skb, struct batman_if *recv_if,
+		  struct sk_buff *skb, struct hard_iface *recv_if,
 		  int hdr_size)
 {
 	struct bat_priv *bat_priv = netdev_priv(soft_iface);
@@ -486,7 +462,7 @@ void interface_rx(struct net_device *soft_iface,
 
 		memcpy(unicast_packet->dest,
 		       bat_priv->softif_neigh->addr, ETH_ALEN);
-		ret = route_unicast_packet(skb, recv_if, hdr_size);
+		ret = route_unicast_packet(skb, recv_if);
 		if (ret == NET_RX_DROP)
 			goto dropped;
 
@@ -646,6 +622,19 @@ void softif_destroy(struct net_device *soft_iface)
 	unregister_netdevice(soft_iface);
 }
 
+int softif_is_valid(struct net_device *net_dev)
+{
+#ifdef HAVE_NET_DEVICE_OPS
+	if (net_dev->netdev_ops->ndo_start_xmit == interface_tx)
+		return 1;
+#else
+	if (net_dev->hard_start_xmit == interface_tx)
+		return 1;
+#endif
+
+	return 0;
+}
+
 /* ethtool */
 static int bat_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 {
diff --git a/net/batman-adv/soft-interface.h b/net/batman-adv/soft-interface.h
index 02b7733..4789b6f 100644
--- a/net/batman-adv/soft-interface.h
+++ b/net/batman-adv/soft-interface.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors:
+ * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
@@ -27,9 +27,10 @@ int softif_neigh_seq_print_text(struct seq_file *seq, void *offset);
 void softif_neigh_purge(struct bat_priv *bat_priv);
 int interface_tx(struct sk_buff *skb, struct net_device *soft_iface);
 void interface_rx(struct net_device *soft_iface,
-		  struct sk_buff *skb, struct batman_if *recv_if,
+		  struct sk_buff *skb, struct hard_iface *recv_if,
 		  int hdr_size);
 struct net_device *softif_create(char *name);
 void softif_destroy(struct net_device *soft_iface);
+int softif_is_valid(struct net_device *net_dev);
 
 #endif /* _NET_BATMAN_ADV_SOFT_INTERFACE_H_ */
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index a633b5a4..8d15b48 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors:
+ * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
@@ -22,7 +22,6 @@
 #include "main.h"
 #include "translation-table.h"
 #include "soft-interface.h"
-#include "types.h"
 #include "hash.h"
 #include "originator.h"
 
@@ -31,12 +30,85 @@ static void _hna_global_del_orig(struct bat_priv *bat_priv,
 				 struct hna_global_entry *hna_global_entry,
 				 char *message);
 
+/* returns 1 if they are the same mac addr */
+static int compare_lhna(struct hlist_node *node, void *data2)
+{
+	void *data1 = container_of(node, struct hna_local_entry, hash_entry);
+
+	return (memcmp(data1, data2, ETH_ALEN) == 0 ? 1 : 0);
+}
+
+/* returns 1 if they are the same mac addr */
+static int compare_ghna(struct hlist_node *node, void *data2)
+{
+	void *data1 = container_of(node, struct hna_global_entry, hash_entry);
+
+	return (memcmp(data1, data2, ETH_ALEN) == 0 ? 1 : 0);
+}
+
 static void hna_local_start_timer(struct bat_priv *bat_priv)
 {
 	INIT_DELAYED_WORK(&bat_priv->hna_work, hna_local_purge);
 	queue_delayed_work(bat_event_workqueue, &bat_priv->hna_work, 10 * HZ);
 }
 
+static struct hna_local_entry *hna_local_hash_find(struct bat_priv *bat_priv,
+						   void *data)
+{
+	struct hashtable_t *hash = bat_priv->hna_local_hash;
+	struct hlist_head *head;
+	struct hlist_node *node;
+	struct hna_local_entry *hna_local_entry, *hna_local_entry_tmp = NULL;
+	int index;
+
+	if (!hash)
+		return NULL;
+
+	index = choose_orig(data, hash->size);
+	head = &hash->table[index];
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(hna_local_entry, node, head, hash_entry) {
+		if (!compare_eth(hna_local_entry, data))
+			continue;
+
+		hna_local_entry_tmp = hna_local_entry;
+		break;
+	}
+	rcu_read_unlock();
+
+	return hna_local_entry_tmp;
+}
+
+static struct hna_global_entry *hna_global_hash_find(struct bat_priv *bat_priv,
+						     void *data)
+{
+	struct hashtable_t *hash = bat_priv->hna_global_hash;
+	struct hlist_head *head;
+	struct hlist_node *node;
+	struct hna_global_entry *hna_global_entry;
+	struct hna_global_entry *hna_global_entry_tmp = NULL;
+	int index;
+
+	if (!hash)
+		return NULL;
+
+	index = choose_orig(data, hash->size);
+	head = &hash->table[index];
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(hna_global_entry, node, head, hash_entry) {
+		if (!compare_eth(hna_global_entry, data))
+			continue;
+
+		hna_global_entry_tmp = hna_global_entry;
+		break;
+	}
+	rcu_read_unlock();
+
+	return hna_global_entry_tmp;
+}
+
 int hna_local_init(struct bat_priv *bat_priv)
 {
 	if (bat_priv->hna_local_hash)
@@ -61,10 +133,7 @@ void hna_local_add(struct net_device *soft_iface, uint8_t *addr)
 	int required_bytes;
 
 	spin_lock_bh(&bat_priv->hna_lhash_lock);
-	hna_local_entry =
-		((struct hna_local_entry *)hash_find(bat_priv->hna_local_hash,
-						     compare_orig, choose_orig,
-						     addr));
+	hna_local_entry = hna_local_hash_find(bat_priv, addr);
 	spin_unlock_bh(&bat_priv->hna_lhash_lock);
 
 	if (hna_local_entry) {
@@ -100,15 +169,15 @@ void hna_local_add(struct net_device *soft_iface, uint8_t *addr)
 	hna_local_entry->last_seen = jiffies;
 
 	/* the batman interface mac address should never be purged */
-	if (compare_orig(addr, soft_iface->dev_addr))
+	if (compare_eth(addr, soft_iface->dev_addr))
 		hna_local_entry->never_purge = 1;
 	else
 		hna_local_entry->never_purge = 0;
 
 	spin_lock_bh(&bat_priv->hna_lhash_lock);
 
-	hash_add(bat_priv->hna_local_hash, compare_orig, choose_orig,
-		 hna_local_entry);
+	hash_add(bat_priv->hna_local_hash, compare_lhna, choose_orig,
+		 hna_local_entry, &hna_local_entry->hash_entry);
 	bat_priv->num_local_hna++;
 	atomic_set(&bat_priv->hna_local_changed, 1);
 
@@ -117,9 +186,7 @@ void hna_local_add(struct net_device *soft_iface, uint8_t *addr)
 	/* remove address from global hash if present */
 	spin_lock_bh(&bat_priv->hna_ghash_lock);
 
-	hna_global_entry = ((struct hna_global_entry *)
-				hash_find(bat_priv->hna_global_hash,
-					  compare_orig, choose_orig, addr));
+	hna_global_entry = hna_global_hash_find(bat_priv, addr);
 
 	if (hna_global_entry)
 		_hna_global_del_orig(bat_priv, hna_global_entry,
@@ -133,28 +200,27 @@ int hna_local_fill_buffer(struct bat_priv *bat_priv,
 {
 	struct hashtable_t *hash = bat_priv->hna_local_hash;
 	struct hna_local_entry *hna_local_entry;
-	struct element_t *bucket;
-	int i;
-	struct hlist_node *walk;
+	struct hlist_node *node;
 	struct hlist_head *head;
-	int count = 0;
+	int i, count = 0;
 
 	spin_lock_bh(&bat_priv->hna_lhash_lock);
 
 	for (i = 0; i < hash->size; i++) {
 		head = &hash->table[i];
 
-		hlist_for_each_entry(bucket, walk, head, hlist) {
-
+		rcu_read_lock();
+		hlist_for_each_entry_rcu(hna_local_entry, node,
+					 head, hash_entry) {
 			if (buff_len < (count + 1) * ETH_ALEN)
 				break;
 
-			hna_local_entry = bucket->data;
 			memcpy(buff + (count * ETH_ALEN), hna_local_entry->addr,
 			       ETH_ALEN);
 
 			count++;
 		}
+		rcu_read_unlock();
 	}
 
 	/* if we did not get all new local hnas see you next time  ;-) */
@@ -171,12 +237,11 @@ int hna_local_seq_print_text(struct seq_file *seq, void *offset)
 	struct bat_priv *bat_priv = netdev_priv(net_dev);
 	struct hashtable_t *hash = bat_priv->hna_local_hash;
 	struct hna_local_entry *hna_local_entry;
-	int i;
-	struct hlist_node *walk;
+	struct hlist_node *node;
 	struct hlist_head *head;
-	struct element_t *bucket;
 	size_t buf_size, pos;
 	char *buff;
+	int i;
 
 	if (!bat_priv->primary_if) {
 		return seq_printf(seq, "BATMAN mesh %s disabled - "
@@ -195,8 +260,10 @@ int hna_local_seq_print_text(struct seq_file *seq, void *offset)
 	for (i = 0; i < hash->size; i++) {
 		head = &hash->table[i];
 
-		hlist_for_each(walk, head)
+		rcu_read_lock();
+		__hlist_for_each_rcu(node, head)
 			buf_size += 21;
+		rcu_read_unlock();
 	}
 
 	buff = kmalloc(buf_size, GFP_ATOMIC);
@@ -204,18 +271,20 @@ int hna_local_seq_print_text(struct seq_file *seq, void *offset)
 		spin_unlock_bh(&bat_priv->hna_lhash_lock);
 		return -ENOMEM;
 	}
+
 	buff[0] = '\0';
 	pos = 0;
 
 	for (i = 0; i < hash->size; i++) {
 		head = &hash->table[i];
 
-		hlist_for_each_entry(bucket, walk, head, hlist) {
-			hna_local_entry = bucket->data;
-
+		rcu_read_lock();
+		hlist_for_each_entry_rcu(hna_local_entry, node,
+					 head, hash_entry) {
 			pos += snprintf(buff + pos, 22, " * %pM\n",
 					hna_local_entry->addr);
 		}
+		rcu_read_unlock();
 	}
 
 	spin_unlock_bh(&bat_priv->hna_lhash_lock);
@@ -225,9 +294,10 @@ int hna_local_seq_print_text(struct seq_file *seq, void *offset)
 	return 0;
 }
 
-static void _hna_local_del(void *data, void *arg)
+static void _hna_local_del(struct hlist_node *node, void *arg)
 {
 	struct bat_priv *bat_priv = (struct bat_priv *)arg;
+	void *data = container_of(node, struct hna_local_entry, hash_entry);
 
 	kfree(data);
 	bat_priv->num_local_hna--;
@@ -241,9 +311,9 @@ static void hna_local_del(struct bat_priv *bat_priv,
 	bat_dbg(DBG_ROUTES, bat_priv, "Deleting local hna entry (%pM): %s\n",
 		hna_local_entry->addr, message);
 
-	hash_remove(bat_priv->hna_local_hash, compare_orig, choose_orig,
+	hash_remove(bat_priv->hna_local_hash, compare_lhna, choose_orig,
 		    hna_local_entry->addr);
-	_hna_local_del(hna_local_entry, bat_priv);
+	_hna_local_del(&hna_local_entry->hash_entry, bat_priv);
 }
 
 void hna_local_remove(struct bat_priv *bat_priv,
@@ -253,9 +323,7 @@ void hna_local_remove(struct bat_priv *bat_priv,
 
 	spin_lock_bh(&bat_priv->hna_lhash_lock);
 
-	hna_local_entry = (struct hna_local_entry *)
-		hash_find(bat_priv->hna_local_hash, compare_orig, choose_orig,
-			  addr);
+	hna_local_entry = hna_local_hash_find(bat_priv, addr);
 
 	if (hna_local_entry)
 		hna_local_del(bat_priv, hna_local_entry, message);
@@ -271,27 +339,29 @@ static void hna_local_purge(struct work_struct *work)
 		container_of(delayed_work, struct bat_priv, hna_work);
 	struct hashtable_t *hash = bat_priv->hna_local_hash;
 	struct hna_local_entry *hna_local_entry;
-	int i;
-	struct hlist_node *walk, *safe;
+	struct hlist_node *node, *node_tmp;
 	struct hlist_head *head;
-	struct element_t *bucket;
 	unsigned long timeout;
+	int i;
 
 	spin_lock_bh(&bat_priv->hna_lhash_lock);
 
 	for (i = 0; i < hash->size; i++) {
 		head = &hash->table[i];
 
-		hlist_for_each_entry_safe(bucket, walk, safe, head, hlist) {
-			hna_local_entry = bucket->data;
+		hlist_for_each_entry_safe(hna_local_entry, node, node_tmp,
+					  head, hash_entry) {
+			if (hna_local_entry->never_purge)
+				continue;
 
 			timeout = hna_local_entry->last_seen;
 			timeout += LOCAL_HNA_TIMEOUT * HZ;
 
-			if ((!hna_local_entry->never_purge) &&
-			    time_after(jiffies, timeout))
-				hna_local_del(bat_priv, hna_local_entry,
-					"address timed out");
+			if (time_before(jiffies, timeout))
+				continue;
+
+			hna_local_del(bat_priv, hna_local_entry,
+				      "address timed out");
 		}
 	}
 
@@ -335,9 +405,7 @@ void hna_global_add_orig(struct bat_priv *bat_priv,
 		spin_lock_bh(&bat_priv->hna_ghash_lock);
 
 		hna_ptr = hna_buff + (hna_buff_count * ETH_ALEN);
-		hna_global_entry = (struct hna_global_entry *)
-			hash_find(bat_priv->hna_global_hash, compare_orig,
-				  choose_orig, hna_ptr);
+		hna_global_entry = hna_global_hash_find(bat_priv, hna_ptr);
 
 		if (!hna_global_entry) {
 			spin_unlock_bh(&bat_priv->hna_ghash_lock);
@@ -357,8 +425,9 @@ void hna_global_add_orig(struct bat_priv *bat_priv,
 				hna_global_entry->addr, orig_node->orig);
 
 			spin_lock_bh(&bat_priv->hna_ghash_lock);
-			hash_add(bat_priv->hna_global_hash, compare_orig,
-				 choose_orig, hna_global_entry);
+			hash_add(bat_priv->hna_global_hash, compare_ghna,
+				 choose_orig, hna_global_entry,
+				 &hna_global_entry->hash_entry);
 
 		}
 
@@ -369,9 +438,7 @@ void hna_global_add_orig(struct bat_priv *bat_priv,
 		spin_lock_bh(&bat_priv->hna_lhash_lock);
 
 		hna_ptr = hna_buff + (hna_buff_count * ETH_ALEN);
-		hna_local_entry = (struct hna_local_entry *)
-			hash_find(bat_priv->hna_local_hash, compare_orig,
-				  choose_orig, hna_ptr);
+		hna_local_entry = hna_local_hash_find(bat_priv, hna_ptr);
 
 		if (hna_local_entry)
 			hna_local_del(bat_priv, hna_local_entry,
@@ -401,12 +468,11 @@ int hna_global_seq_print_text(struct seq_file *seq, void *offset)
 	struct bat_priv *bat_priv = netdev_priv(net_dev);
 	struct hashtable_t *hash = bat_priv->hna_global_hash;
 	struct hna_global_entry *hna_global_entry;
-	int i;
-	struct hlist_node *walk;
+	struct hlist_node *node;
 	struct hlist_head *head;
-	struct element_t *bucket;
 	size_t buf_size, pos;
 	char *buff;
+	int i;
 
 	if (!bat_priv->primary_if) {
 		return seq_printf(seq, "BATMAN mesh %s disabled - "
@@ -424,8 +490,10 @@ int hna_global_seq_print_text(struct seq_file *seq, void *offset)
 	for (i = 0; i < hash->size; i++) {
 		head = &hash->table[i];
 
-		hlist_for_each(walk, head)
+		rcu_read_lock();
+		__hlist_for_each_rcu(node, head)
 			buf_size += 43;
+		rcu_read_unlock();
 	}
 
 	buff = kmalloc(buf_size, GFP_ATOMIC);
@@ -439,14 +507,15 @@ int hna_global_seq_print_text(struct seq_file *seq, void *offset)
 	for (i = 0; i < hash->size; i++) {
 		head = &hash->table[i];
 
-		hlist_for_each_entry(bucket, walk, head, hlist) {
-			hna_global_entry = bucket->data;
-
+		rcu_read_lock();
+		hlist_for_each_entry_rcu(hna_global_entry, node,
+					 head, hash_entry) {
 			pos += snprintf(buff + pos, 44,
 					" * %pM via %pM\n",
 					hna_global_entry->addr,
 					hna_global_entry->orig_node->orig);
 		}
+		rcu_read_unlock();
 	}
 
 	spin_unlock_bh(&bat_priv->hna_ghash_lock);
@@ -465,7 +534,7 @@ static void _hna_global_del_orig(struct bat_priv *bat_priv,
 		hna_global_entry->addr, hna_global_entry->orig_node->orig,
 		message);
 
-	hash_remove(bat_priv->hna_global_hash, compare_orig, choose_orig,
+	hash_remove(bat_priv->hna_global_hash, compare_ghna, choose_orig,
 		    hna_global_entry->addr);
 	kfree(hna_global_entry);
 }
@@ -484,9 +553,7 @@ void hna_global_del_orig(struct bat_priv *bat_priv,
 
 	while ((hna_buff_count + 1) * ETH_ALEN <= orig_node->hna_buff_len) {
 		hna_ptr = orig_node->hna_buff + (hna_buff_count * ETH_ALEN);
-		hna_global_entry = (struct hna_global_entry *)
-			hash_find(bat_priv->hna_global_hash, compare_orig,
-				  choose_orig, hna_ptr);
+		hna_global_entry = hna_global_hash_find(bat_priv, hna_ptr);
 
 		if ((hna_global_entry) &&
 		    (hna_global_entry->orig_node == orig_node))
@@ -503,8 +570,10 @@ void hna_global_del_orig(struct bat_priv *bat_priv,
 	orig_node->hna_buff = NULL;
 }
 
-static void hna_global_del(void *data, void *arg)
+static void hna_global_del(struct hlist_node *node, void *arg)
 {
+	void *data = container_of(node, struct hna_global_entry, hash_entry);
+
 	kfree(data);
 }
 
@@ -520,15 +589,20 @@ void hna_global_free(struct bat_priv *bat_priv)
 struct orig_node *transtable_search(struct bat_priv *bat_priv, uint8_t *addr)
 {
 	struct hna_global_entry *hna_global_entry;
+	struct orig_node *orig_node = NULL;
 
 	spin_lock_bh(&bat_priv->hna_ghash_lock);
-	hna_global_entry = (struct hna_global_entry *)
-				hash_find(bat_priv->hna_global_hash,
-					  compare_orig, choose_orig, addr);
-	spin_unlock_bh(&bat_priv->hna_ghash_lock);
+	hna_global_entry = hna_global_hash_find(bat_priv, addr);
 
 	if (!hna_global_entry)
-		return NULL;
+		goto out;
 
-	return hna_global_entry->orig_node;
+	if (!atomic_inc_not_zero(&hna_global_entry->orig_node->refcount))
+		goto out;
+
+	orig_node = hna_global_entry->orig_node;
+
+out:
+	spin_unlock_bh(&bat_priv->hna_ghash_lock);
+	return orig_node;
 }
diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h
index 10c4c5c..f19931c 100644
--- a/net/batman-adv/translation-table.h
+++ b/net/batman-adv/translation-table.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors:
+ * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
@@ -22,8 +22,6 @@
 #ifndef _NET_BATMAN_ADV_TRANSLATION_TABLE_H_
 #define _NET_BATMAN_ADV_TRANSLATION_TABLE_H_
 
-#include "types.h"
-
 int hna_local_init(struct bat_priv *bat_priv);
 void hna_local_add(struct net_device *soft_iface, uint8_t *addr);
 void hna_local_remove(struct bat_priv *bat_priv,
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index bf3f6f5..83445cf 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors:
+ * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
@@ -33,7 +33,7 @@
 	 sizeof(struct bcast_packet))))
 
 
-struct batman_if {
+struct hard_iface {
 	struct list_head list;
 	int16_t if_num;
 	char if_status;
@@ -43,7 +43,7 @@ struct batman_if {
 	unsigned char *packet_buff;
 	int packet_len;
 	struct kobject *hardif_obj;
-	struct kref refcount;
+	atomic_t refcount;
 	struct packet_type batman_adv_ptype;
 	struct net_device *soft_iface;
 	struct rcu_head rcu;
@@ -70,8 +70,6 @@ struct orig_node {
 	struct neigh_node *router;
 	unsigned long *bcast_own;
 	uint8_t *bcast_own_sum;
-	uint8_t tq_own;
-	int tq_asym_penalty;
 	unsigned long last_valid;
 	unsigned long bcast_seqno_reset;
 	unsigned long batman_seqno_reset;
@@ -83,20 +81,28 @@ struct orig_node {
 	uint8_t last_ttl;
 	unsigned long bcast_bits[NUM_WORDS];
 	uint32_t last_bcast_seqno;
-	struct list_head neigh_list;
+	struct hlist_head neigh_list;
 	struct list_head frag_list;
+	spinlock_t neigh_list_lock; /* protects neighbor list */
+	atomic_t refcount;
+	struct rcu_head rcu;
+	struct hlist_node hash_entry;
+	struct bat_priv *bat_priv;
 	unsigned long last_frag_packet;
-	struct {
-		uint8_t candidates;
-		struct neigh_node *selected;
-	} bond;
+	spinlock_t ogm_cnt_lock; /* protects: bcast_own, bcast_own_sum,
+				  * neigh_node->real_bits,
+				  * neigh_node->real_packet_count */
+	spinlock_t bcast_seqno_lock; /* protects bcast_bits,
+				      *	 last_bcast_seqno */
+	atomic_t bond_candidates;
+	struct list_head bond_list;
 };
 
 struct gw_node {
 	struct hlist_node list;
 	struct orig_node *orig_node;
 	unsigned long deleted;
-	struct kref refcount;
+	atomic_t refcount;
 	struct rcu_head rcu;
 };
 
@@ -105,18 +111,20 @@ struct gw_node {
  *	@last_valid: when last packet via this neighbor was received
  */
 struct neigh_node {
-	struct list_head list;
+	struct hlist_node list;
 	uint8_t addr[ETH_ALEN];
 	uint8_t real_packet_count;
 	uint8_t tq_recv[TQ_GLOBAL_WINDOW_SIZE];
 	uint8_t tq_index;
 	uint8_t tq_avg;
 	uint8_t last_ttl;
-	struct neigh_node *next_bond_candidate;
+	struct list_head bonding_list;
 	unsigned long last_valid;
 	unsigned long real_bits[NUM_WORDS];
+	atomic_t refcount;
+	struct rcu_head rcu;
 	struct orig_node *orig_node;
-	struct batman_if *if_incoming;
+	struct hard_iface *if_incoming;
 };
 
 
@@ -140,7 +148,7 @@ struct bat_priv {
 	struct hlist_head softif_neigh_list;
 	struct softif_neigh *softif_neigh;
 	struct debug_log *debug_log;
-	struct batman_if *primary_if;
+	struct hard_iface *primary_if;
 	struct kobject *mesh_obj;
 	struct dentry *debug_dir;
 	struct hlist_head forw_bat_list;
@@ -151,12 +159,11 @@ struct bat_priv {
 	struct hashtable_t *hna_local_hash;
 	struct hashtable_t *hna_global_hash;
 	struct hashtable_t *vis_hash;
-	spinlock_t orig_hash_lock; /* protects orig_hash */
 	spinlock_t forw_bat_list_lock; /* protects forw_bat_list */
 	spinlock_t forw_bcast_list_lock; /* protects  */
 	spinlock_t hna_lhash_lock; /* protects hna_local_hash */
 	spinlock_t hna_ghash_lock; /* protects hna_global_hash */
-	spinlock_t gw_list_lock; /* protects gw_list */
+	spinlock_t gw_list_lock; /* protects gw_list and curr_gw */
 	spinlock_t vis_hash_lock; /* protects vis_hash */
 	spinlock_t vis_list_lock; /* protects vis_info::recv_list */
 	spinlock_t softif_neigh_lock; /* protects soft-interface neigh list */
@@ -165,7 +172,7 @@ struct bat_priv {
 	struct delayed_work hna_work;
 	struct delayed_work orig_work;
 	struct delayed_work vis_work;
-	struct gw_node *curr_gw;
+	struct gw_node __rcu *curr_gw;  /* rcu protected pointer */
 	struct vis_info *my_vis_info;
 };
 
@@ -188,11 +195,13 @@ struct hna_local_entry {
 	uint8_t addr[ETH_ALEN];
 	unsigned long last_seen;
 	char never_purge;
+	struct hlist_node hash_entry;
 };
 
 struct hna_global_entry {
 	uint8_t addr[ETH_ALEN];
 	struct orig_node *orig_node;
+	struct hlist_node hash_entry;
 };
 
 /**
@@ -208,7 +217,7 @@ struct forw_packet {
 	uint32_t direct_link_flags;
 	uint8_t num_packets;
 	struct delayed_work delayed_work;
-	struct batman_if *if_incoming;
+	struct hard_iface *if_incoming;
 };
 
 /* While scanning for vis-entries of a particular vis-originator
@@ -242,6 +251,7 @@ struct vis_info {
 			     * from.  we should not reply to them. */
 	struct list_head send_list;
 	struct kref refcount;
+	struct hlist_node hash_entry;
 	struct bat_priv *bat_priv;
 	/* this packet might be part of the vis send queue. */
 	struct sk_buff *skb_packet;
@@ -264,7 +274,7 @@ struct softif_neigh {
 	uint8_t addr[ETH_ALEN];
 	unsigned long last_seen;
 	short vid;
-	struct kref refcount;
+	atomic_t refcount;
 	struct rcu_head rcu;
 };
 
diff --git a/net/batman-adv/unicast.c b/net/batman-adv/unicast.c
index d1a6113..19f84bd 100644
--- a/net/batman-adv/unicast.c
+++ b/net/batman-adv/unicast.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2010 B.A.T.M.A.N. contributors:
+ * Copyright (C) 2010-2011 B.A.T.M.A.N. contributors:
  *
  * Andreas Langer
  *
@@ -39,8 +39,8 @@ static struct sk_buff *frag_merge_packet(struct list_head *head,
 		(struct unicast_frag_packet *)skb->data;
 	struct sk_buff *tmp_skb;
 	struct unicast_packet *unicast_packet;
-	int hdr_len = sizeof(struct unicast_packet),
-	    uni_diff = sizeof(struct unicast_frag_packet) - hdr_len;
+	int hdr_len = sizeof(struct unicast_packet);
+	int uni_diff = sizeof(struct unicast_frag_packet) - hdr_len;
 
 	/* set skb to the first part and tmp_skb to the second part */
 	if (up->flags & UNI_FRAG_HEAD) {
@@ -183,15 +183,10 @@ int frag_reassemble_skb(struct sk_buff *skb, struct bat_priv *bat_priv,
 		(struct unicast_frag_packet *)skb->data;
 
 	*new_skb = NULL;
-	spin_lock_bh(&bat_priv->orig_hash_lock);
-	orig_node = ((struct orig_node *)
-		    hash_find(bat_priv->orig_hash, compare_orig, choose_orig,
-			      unicast_packet->orig));
 
-	if (!orig_node) {
-		pr_debug("couldn't find originator in orig_hash\n");
+	orig_node = orig_hash_find(bat_priv, unicast_packet->orig);
+	if (!orig_node)
 		goto out;
-	}
 
 	orig_node->last_frag_packet = jiffies;
 
@@ -215,21 +210,24 @@ int frag_reassemble_skb(struct sk_buff *skb, struct bat_priv *bat_priv,
 	/* if not, merge failed */
 	if (*new_skb)
 		ret = NET_RX_SUCCESS;
-out:
-	spin_unlock_bh(&bat_priv->orig_hash_lock);
 
+out:
+	if (orig_node)
+		orig_node_free_ref(orig_node);
 	return ret;
 }
 
 int frag_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv,
-		  struct batman_if *batman_if, uint8_t dstaddr[])
+		  struct hard_iface *hard_iface, uint8_t dstaddr[])
 {
 	struct unicast_packet tmp_uc, *unicast_packet;
 	struct sk_buff *frag_skb;
 	struct unicast_frag_packet *frag1, *frag2;
 	int uc_hdr_len = sizeof(struct unicast_packet);
 	int ucf_hdr_len = sizeof(struct unicast_frag_packet);
-	int data_len = skb->len;
+	int data_len = skb->len - uc_hdr_len;
+	int large_tail = 0;
+	uint16_t seqno;
 
 	if (!bat_priv->primary_if)
 		goto dropped;
@@ -237,10 +235,11 @@ int frag_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv,
 	frag_skb = dev_alloc_skb(data_len - (data_len / 2) + ucf_hdr_len);
 	if (!frag_skb)
 		goto dropped;
+	skb_reserve(frag_skb, ucf_hdr_len);
 
 	unicast_packet = (struct unicast_packet *) skb->data;
 	memcpy(&tmp_uc, unicast_packet, uc_hdr_len);
-	skb_split(skb, frag_skb, data_len / 2);
+	skb_split(skb, frag_skb, data_len / 2 + uc_hdr_len);
 
 	if (my_skb_head_push(skb, ucf_hdr_len - uc_hdr_len) < 0 ||
 	    my_skb_head_push(frag_skb, ucf_hdr_len) < 0)
@@ -258,16 +257,18 @@ int frag_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv,
 	memcpy(frag1->orig, bat_priv->primary_if->net_dev->dev_addr, ETH_ALEN);
 	memcpy(frag2, frag1, sizeof(struct unicast_frag_packet));
 
-	frag1->flags |= UNI_FRAG_HEAD;
-	frag2->flags &= ~UNI_FRAG_HEAD;
+	if (data_len & 1)
+		large_tail = UNI_FRAG_LARGETAIL;
+
+	frag1->flags = UNI_FRAG_HEAD | large_tail;
+	frag2->flags = large_tail;
 
-	frag1->seqno = htons((uint16_t)atomic_inc_return(
-			     &batman_if->frag_seqno));
-	frag2->seqno = htons((uint16_t)atomic_inc_return(
-			     &batman_if->frag_seqno));
+	seqno = atomic_add_return(2, &hard_iface->frag_seqno);
+	frag1->seqno = htons(seqno - 1);
+	frag2->seqno = htons(seqno);
 
-	send_skb_packet(skb, batman_if, dstaddr);
-	send_skb_packet(frag_skb, batman_if, dstaddr);
+	send_skb_packet(skb, hard_iface, dstaddr);
+	send_skb_packet(frag_skb, hard_iface, dstaddr);
 	return NET_RX_SUCCESS;
 
 drop_frag:
@@ -282,44 +283,36 @@ int unicast_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv)
 	struct ethhdr *ethhdr = (struct ethhdr *)skb->data;
 	struct unicast_packet *unicast_packet;
 	struct orig_node *orig_node;
-	struct batman_if *batman_if;
-	struct neigh_node *router;
+	struct neigh_node *neigh_node;
 	int data_len = skb->len;
-	uint8_t dstaddr[6];
-
-	spin_lock_bh(&bat_priv->orig_hash_lock);
+	int ret = 1;
 
 	/* get routing information */
-	if (is_multicast_ether_addr(ethhdr->h_dest))
+	if (is_multicast_ether_addr(ethhdr->h_dest)) {
 		orig_node = (struct orig_node *)gw_get_selected(bat_priv);
-	else
-		orig_node = ((struct orig_node *)hash_find(bat_priv->orig_hash,
-							   compare_orig,
-							   choose_orig,
-							   ethhdr->h_dest));
-
-	/* check for hna host */
-	if (!orig_node)
-		orig_node = transtable_search(bat_priv, ethhdr->h_dest);
-
-	router = find_router(bat_priv, orig_node, NULL);
-
-	if (!router)
-		goto unlock;
+		if (orig_node)
+			goto find_router;
+	}
 
-	/* don't lock while sending the packets ... we therefore
-		* copy the required data before sending */
+	/* check for hna host - increases orig_node refcount */
+	orig_node = transtable_search(bat_priv, ethhdr->h_dest);
 
-	batman_if = router->if_incoming;
-	memcpy(dstaddr, router->addr, ETH_ALEN);
+find_router:
+	/**
+	 * find_router():
+	 *  - if orig_node is NULL it returns NULL
+	 *  - increases neigh_nodes refcount if found.
+	 */
+	neigh_node = find_router(bat_priv, orig_node, NULL);
 
-	spin_unlock_bh(&bat_priv->orig_hash_lock);
+	if (!neigh_node)
+		goto out;
 
-	if (batman_if->if_status != IF_ACTIVE)
-		goto dropped;
+	if (neigh_node->if_incoming->if_status != IF_ACTIVE)
+		goto out;
 
 	if (my_skb_head_push(skb, sizeof(struct unicast_packet)) < 0)
-		goto dropped;
+		goto out;
 
 	unicast_packet = (struct unicast_packet *)skb->data;
 
@@ -333,18 +326,24 @@ int unicast_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv)
 
 	if (atomic_read(&bat_priv->fragmentation) &&
 	    data_len + sizeof(struct unicast_packet) >
-	    batman_if->net_dev->mtu) {
+				neigh_node->if_incoming->net_dev->mtu) {
 		/* send frag skb decreases ttl */
 		unicast_packet->ttl++;
-		return frag_send_skb(skb, bat_priv, batman_if,
-				     dstaddr);
+		ret = frag_send_skb(skb, bat_priv,
+				    neigh_node->if_incoming, neigh_node->addr);
+		goto out;
 	}
-	send_skb_packet(skb, batman_if, dstaddr);
-	return 0;
 
-unlock:
-	spin_unlock_bh(&bat_priv->orig_hash_lock);
-dropped:
-	kfree_skb(skb);
-	return 1;
+	send_skb_packet(skb, neigh_node->if_incoming, neigh_node->addr);
+	ret = 0;
+	goto out;
+
+out:
+	if (neigh_node)
+		neigh_node_free_ref(neigh_node);
+	if (orig_node)
+		orig_node_free_ref(orig_node);
+	if (ret == 1)
+		kfree_skb(skb);
+	return ret;
 }
diff --git a/net/batman-adv/unicast.h b/net/batman-adv/unicast.h
index e32b786..16ad7a9 100644
--- a/net/batman-adv/unicast.h
+++ b/net/batman-adv/unicast.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2010 B.A.T.M.A.N. contributors:
+ * Copyright (C) 2010-2011 B.A.T.M.A.N. contributors:
  *
  * Andreas Langer
  *
@@ -22,6 +22,8 @@
 #ifndef _NET_BATMAN_ADV_UNICAST_H_
 #define _NET_BATMAN_ADV_UNICAST_H_
 
+#include "packet.h"
+
 #define FRAG_TIMEOUT 10000	/* purge frag list entrys after time in ms */
 #define FRAG_BUFFER_SIZE 6	/* number of list elements in buffer */
 
@@ -30,6 +32,27 @@ int frag_reassemble_skb(struct sk_buff *skb, struct bat_priv *bat_priv,
 void frag_list_free(struct list_head *head);
 int unicast_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv);
 int frag_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv,
-		  struct batman_if *batman_if, uint8_t dstaddr[]);
+		  struct hard_iface *hard_iface, uint8_t dstaddr[]);
+
+static inline int frag_can_reassemble(struct sk_buff *skb, int mtu)
+{
+	struct unicast_frag_packet *unicast_packet;
+	int uneven_correction = 0;
+	unsigned int merged_size;
+
+	unicast_packet = (struct unicast_frag_packet *)skb->data;
+
+	if (unicast_packet->flags & UNI_FRAG_LARGETAIL) {
+		if (unicast_packet->flags & UNI_FRAG_HEAD)
+			uneven_correction = 1;
+		else
+			uneven_correction = -1;
+	}
+
+	merged_size = (skb->len - sizeof(struct unicast_frag_packet)) * 2;
+	merged_size += sizeof(struct unicast_packet) + uneven_correction;
+
+	return merged_size <= mtu;
+}
 
 #endif /* _NET_BATMAN_ADV_UNICAST_H_ */
diff --git a/net/batman-adv/vis.c b/net/batman-adv/vis.c
index de1022c..f90212f 100644
--- a/net/batman-adv/vis.c
+++ b/net/batman-adv/vis.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2008-2010 B.A.T.M.A.N. contributors:
+ * Copyright (C) 2008-2011 B.A.T.M.A.N. contributors:
  *
  * Simon Wunderlich
  *
@@ -68,15 +68,16 @@ static void free_info(struct kref *ref)
 }
 
 /* Compare two vis packets, used by the hashing algorithm */
-static int vis_info_cmp(void *data1, void *data2)
+static int vis_info_cmp(struct hlist_node *node, void *data2)
 {
 	struct vis_info *d1, *d2;
 	struct vis_packet *p1, *p2;
-	d1 = data1;
+
+	d1 = container_of(node, struct vis_info, hash_entry);
 	d2 = data2;
 	p1 = (struct vis_packet *)d1->skb_packet->data;
 	p2 = (struct vis_packet *)d2->skb_packet->data;
-	return compare_orig(p1->vis_orig, p2->vis_orig);
+	return compare_eth(p1->vis_orig, p2->vis_orig);
 }
 
 /* hash function to choose an entry in a hash table of given size */
@@ -104,6 +105,34 @@ static int vis_info_choose(void *data, int size)
 	return hash % size;
 }
 
+static struct vis_info *vis_hash_find(struct bat_priv *bat_priv,
+				      void *data)
+{
+	struct hashtable_t *hash = bat_priv->vis_hash;
+	struct hlist_head *head;
+	struct hlist_node *node;
+	struct vis_info *vis_info, *vis_info_tmp = NULL;
+	int index;
+
+	if (!hash)
+		return NULL;
+
+	index = vis_info_choose(data, hash->size);
+	head = &hash->table[index];
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(vis_info, node, head, hash_entry) {
+		if (!vis_info_cmp(node, data))
+			continue;
+
+		vis_info_tmp = vis_info;
+		break;
+	}
+	rcu_read_unlock();
+
+	return vis_info_tmp;
+}
+
 /* insert interface to the list of interfaces of one originator, if it
  * does not already exist in the list */
 static void vis_data_insert_interface(const uint8_t *interface,
@@ -114,7 +143,7 @@ static void vis_data_insert_interface(const uint8_t *interface,
 	struct hlist_node *pos;
 
 	hlist_for_each_entry(entry, pos, if_list, list) {
-		if (compare_orig(entry->addr, (void *)interface))
+		if (compare_eth(entry->addr, (void *)interface))
 			return;
 	}
 
@@ -166,7 +195,7 @@ static ssize_t vis_data_read_entry(char *buff, struct vis_info_entry *entry,
 	/* maximal length: max(4+17+2, 3+17+1+3+2) == 26 */
 	if (primary && entry->quality == 0)
 		return sprintf(buff, "HNA %pM, ", entry->dest);
-	else if (compare_orig(entry->src, src))
+	else if (compare_eth(entry->src, src))
 		return sprintf(buff, "TQ %pM %d, ", entry->dest,
 			       entry->quality);
 
@@ -175,9 +204,8 @@ static ssize_t vis_data_read_entry(char *buff, struct vis_info_entry *entry,
 
 int vis_seq_print_text(struct seq_file *seq, void *offset)
 {
-	struct hlist_node *walk;
+	struct hlist_node *node;
 	struct hlist_head *head;
-	struct element_t *bucket;
 	struct vis_info *info;
 	struct vis_packet *packet;
 	struct vis_info_entry *entries;
@@ -203,8 +231,8 @@ int vis_seq_print_text(struct seq_file *seq, void *offset)
 	for (i = 0; i < hash->size; i++) {
 		head = &hash->table[i];
 
-		hlist_for_each_entry(bucket, walk, head, hlist) {
-			info = bucket->data;
+		rcu_read_lock();
+		hlist_for_each_entry_rcu(info, node, head, hash_entry) {
 			packet = (struct vis_packet *)info->skb_packet->data;
 			entries = (struct vis_info_entry *)
 				((char *)packet + sizeof(struct vis_packet));
@@ -213,7 +241,7 @@ int vis_seq_print_text(struct seq_file *seq, void *offset)
 				if (entries[j].quality == 0)
 					continue;
 				compare =
-				 compare_orig(entries[j].src, packet->vis_orig);
+				 compare_eth(entries[j].src, packet->vis_orig);
 				vis_data_insert_interface(entries[j].src,
 							  &vis_if_list,
 							  compare);
@@ -223,7 +251,7 @@ int vis_seq_print_text(struct seq_file *seq, void *offset)
 				buf_size += 18 + 26 * packet->entries;
 
 				/* add primary/secondary records */
-				if (compare_orig(entry->addr, packet->vis_orig))
+				if (compare_eth(entry->addr, packet->vis_orig))
 					buf_size +=
 					  vis_data_count_prim_sec(&vis_if_list);
 
@@ -236,6 +264,7 @@ int vis_seq_print_text(struct seq_file *seq, void *offset)
 				kfree(entry);
 			}
 		}
+		rcu_read_unlock();
 	}
 
 	buff = kmalloc(buf_size, GFP_ATOMIC);
@@ -249,8 +278,8 @@ int vis_seq_print_text(struct seq_file *seq, void *offset)
 	for (i = 0; i < hash->size; i++) {
 		head = &hash->table[i];
 
-		hlist_for_each_entry(bucket, walk, head, hlist) {
-			info = bucket->data;
+		rcu_read_lock();
+		hlist_for_each_entry_rcu(info, node, head, hash_entry) {
 			packet = (struct vis_packet *)info->skb_packet->data;
 			entries = (struct vis_info_entry *)
 				((char *)packet + sizeof(struct vis_packet));
@@ -259,7 +288,7 @@ int vis_seq_print_text(struct seq_file *seq, void *offset)
 				if (entries[j].quality == 0)
 					continue;
 				compare =
-				 compare_orig(entries[j].src, packet->vis_orig);
+				 compare_eth(entries[j].src, packet->vis_orig);
 				vis_data_insert_interface(entries[j].src,
 							  &vis_if_list,
 							  compare);
@@ -277,7 +306,7 @@ int vis_seq_print_text(struct seq_file *seq, void *offset)
 							entry->primary);
 
 				/* add primary/secondary records */
-				if (compare_orig(entry->addr, packet->vis_orig))
+				if (compare_eth(entry->addr, packet->vis_orig))
 					buff_pos +=
 					 vis_data_read_prim_sec(buff + buff_pos,
 								&vis_if_list);
@@ -291,6 +320,7 @@ int vis_seq_print_text(struct seq_file *seq, void *offset)
 				kfree(entry);
 			}
 		}
+		rcu_read_unlock();
 	}
 
 	spin_unlock_bh(&bat_priv->vis_hash_lock);
@@ -345,7 +375,7 @@ static int recv_list_is_in(struct bat_priv *bat_priv,
 
 	spin_lock_bh(&bat_priv->vis_list_lock);
 	list_for_each_entry(entry, recv_list, list) {
-		if (memcmp(entry->mac, mac, ETH_ALEN) == 0) {
+		if (compare_eth(entry->mac, mac)) {
 			spin_unlock_bh(&bat_priv->vis_list_lock);
 			return 1;
 		}
@@ -381,8 +411,7 @@ static struct vis_info *add_packet(struct bat_priv *bat_priv,
 						     sizeof(struct vis_packet));
 
 	memcpy(search_packet->vis_orig, vis_packet->vis_orig, ETH_ALEN);
-	old_info = hash_find(bat_priv->vis_hash, vis_info_cmp, vis_info_choose,
-			     &search_elem);
+	old_info = vis_hash_find(bat_priv, &search_elem);
 	kfree_skb(search_elem.skb_packet);
 
 	if (old_info) {
@@ -442,7 +471,7 @@ static struct vis_info *add_packet(struct bat_priv *bat_priv,
 
 	/* try to add it */
 	hash_added = hash_add(bat_priv->vis_hash, vis_info_cmp, vis_info_choose,
-			      info);
+			      info, &info->hash_entry);
 	if (hash_added < 0) {
 		/* did not work (for some reason) */
 		kref_put(&info->refcount, free_info);
@@ -529,9 +558,8 @@ static int find_best_vis_server(struct bat_priv *bat_priv,
 				struct vis_info *info)
 {
 	struct hashtable_t *hash = bat_priv->orig_hash;
-	struct hlist_node *walk;
+	struct hlist_node *node;
 	struct hlist_head *head;
-	struct element_t *bucket;
 	struct orig_node *orig_node;
 	struct vis_packet *packet;
 	int best_tq = -1, i;
@@ -541,16 +569,17 @@ static int find_best_vis_server(struct bat_priv *bat_priv,
 	for (i = 0; i < hash->size; i++) {
 		head = &hash->table[i];
 
-		hlist_for_each_entry(bucket, walk, head, hlist) {
-			orig_node = bucket->data;
+		rcu_read_lock();
+		hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) {
 			if ((orig_node) && (orig_node->router) &&
-			(orig_node->flags & VIS_SERVER) &&
-			(orig_node->router->tq_avg > best_tq)) {
+			    (orig_node->flags & VIS_SERVER) &&
+			    (orig_node->router->tq_avg > best_tq)) {
 				best_tq = orig_node->router->tq_avg;
 				memcpy(packet->target_orig, orig_node->orig,
 				       ETH_ALEN);
 			}
 		}
+		rcu_read_unlock();
 	}
 
 	return best_tq;
@@ -573,9 +602,8 @@ static bool vis_packet_full(struct vis_info *info)
 static int generate_vis_packet(struct bat_priv *bat_priv)
 {
 	struct hashtable_t *hash = bat_priv->orig_hash;
-	struct hlist_node *walk;
+	struct hlist_node *node;
 	struct hlist_head *head;
-	struct element_t *bucket;
 	struct orig_node *orig_node;
 	struct neigh_node *neigh_node;
 	struct vis_info *info = (struct vis_info *)bat_priv->my_vis_info;
@@ -587,7 +615,6 @@ static int generate_vis_packet(struct bat_priv *bat_priv)
 	info->first_seen = jiffies;
 	packet->vis_type = atomic_read(&bat_priv->vis_mode);
 
-	spin_lock_bh(&bat_priv->orig_hash_lock);
 	memcpy(packet->target_orig, broadcast_addr, ETH_ALEN);
 	packet->ttl = TTL;
 	packet->seqno = htonl(ntohl(packet->seqno) + 1);
@@ -597,23 +624,21 @@ static int generate_vis_packet(struct bat_priv *bat_priv)
 	if (packet->vis_type == VIS_TYPE_CLIENT_UPDATE) {
 		best_tq = find_best_vis_server(bat_priv, info);
 
-		if (best_tq < 0) {
-			spin_unlock_bh(&bat_priv->orig_hash_lock);
+		if (best_tq < 0)
 			return -1;
-		}
 	}
 
 	for (i = 0; i < hash->size; i++) {
 		head = &hash->table[i];
 
-		hlist_for_each_entry(bucket, walk, head, hlist) {
-			orig_node = bucket->data;
+		rcu_read_lock();
+		hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) {
 			neigh_node = orig_node->router;
 
 			if (!neigh_node)
 				continue;
 
-			if (!compare_orig(neigh_node->addr, orig_node->orig))
+			if (!compare_eth(neigh_node->addr, orig_node->orig))
 				continue;
 
 			if (neigh_node->if_incoming->if_status != IF_ACTIVE)
@@ -632,23 +657,19 @@ static int generate_vis_packet(struct bat_priv *bat_priv)
 			entry->quality = neigh_node->tq_avg;
 			packet->entries++;
 
-			if (vis_packet_full(info)) {
-				spin_unlock_bh(&bat_priv->orig_hash_lock);
-				return 0;
-			}
+			if (vis_packet_full(info))
+				goto unlock;
 		}
+		rcu_read_unlock();
 	}
 
-	spin_unlock_bh(&bat_priv->orig_hash_lock);
-
 	hash = bat_priv->hna_local_hash;
 
 	spin_lock_bh(&bat_priv->hna_lhash_lock);
 	for (i = 0; i < hash->size; i++) {
 		head = &hash->table[i];
 
-		hlist_for_each_entry(bucket, walk, head, hlist) {
-			hna_local_entry = bucket->data;
+		hlist_for_each_entry(hna_local_entry, node, head, hash_entry) {
 			entry = (struct vis_info_entry *)
 					skb_put(info->skb_packet,
 						sizeof(*entry));
@@ -666,6 +687,10 @@ static int generate_vis_packet(struct bat_priv *bat_priv)
 
 	spin_unlock_bh(&bat_priv->hna_lhash_lock);
 	return 0;
+
+unlock:
+	rcu_read_unlock();
+	return 0;
 }
 
 /* free old vis packets. Must be called with this vis_hash_lock
@@ -674,25 +699,22 @@ static void purge_vis_packets(struct bat_priv *bat_priv)
 {
 	int i;
 	struct hashtable_t *hash = bat_priv->vis_hash;
-	struct hlist_node *walk, *safe;
+	struct hlist_node *node, *node_tmp;
 	struct hlist_head *head;
-	struct element_t *bucket;
 	struct vis_info *info;
 
 	for (i = 0; i < hash->size; i++) {
 		head = &hash->table[i];
 
-		hlist_for_each_entry_safe(bucket, walk, safe, head, hlist) {
-			info = bucket->data;
-
+		hlist_for_each_entry_safe(info, node, node_tmp,
+					  head, hash_entry) {
 			/* never purge own data. */
 			if (info == bat_priv->my_vis_info)
 				continue;
 
 			if (time_after(jiffies,
 				       info->first_seen + VIS_TIMEOUT * HZ)) {
-				hlist_del(walk);
-				kfree(bucket);
+				hlist_del(node);
 				send_list_del(info);
 				kref_put(&info->refcount, free_info);
 			}
@@ -704,27 +726,24 @@ static void broadcast_vis_packet(struct bat_priv *bat_priv,
 				 struct vis_info *info)
 {
 	struct hashtable_t *hash = bat_priv->orig_hash;
-	struct hlist_node *walk;
+	struct hlist_node *node;
 	struct hlist_head *head;
-	struct element_t *bucket;
 	struct orig_node *orig_node;
 	struct vis_packet *packet;
 	struct sk_buff *skb;
-	struct batman_if *batman_if;
+	struct hard_iface *hard_iface;
 	uint8_t dstaddr[ETH_ALEN];
 	int i;
 
 
-	spin_lock_bh(&bat_priv->orig_hash_lock);
 	packet = (struct vis_packet *)info->skb_packet->data;
 
 	/* send to all routers in range. */
 	for (i = 0; i < hash->size; i++) {
 		head = &hash->table[i];
 
-		hlist_for_each_entry(bucket, walk, head, hlist) {
-			orig_node = bucket->data;
-
+		rcu_read_lock();
+		hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) {
 			/* if it's a vis server and reachable, send it. */
 			if ((!orig_node) || (!orig_node->router))
 				continue;
@@ -737,54 +756,61 @@ static void broadcast_vis_packet(struct bat_priv *bat_priv,
 				continue;
 
 			memcpy(packet->target_orig, orig_node->orig, ETH_ALEN);
-			batman_if = orig_node->router->if_incoming;
+			hard_iface = orig_node->router->if_incoming;
 			memcpy(dstaddr, orig_node->router->addr, ETH_ALEN);
-			spin_unlock_bh(&bat_priv->orig_hash_lock);
 
 			skb = skb_clone(info->skb_packet, GFP_ATOMIC);
 			if (skb)
-				send_skb_packet(skb, batman_if, dstaddr);
+				send_skb_packet(skb, hard_iface, dstaddr);
 
-			spin_lock_bh(&bat_priv->orig_hash_lock);
 		}
-
+		rcu_read_unlock();
 	}
-
-	spin_unlock_bh(&bat_priv->orig_hash_lock);
 }
 
 static void unicast_vis_packet(struct bat_priv *bat_priv,
 			       struct vis_info *info)
 {
 	struct orig_node *orig_node;
+	struct neigh_node *neigh_node = NULL;
 	struct sk_buff *skb;
 	struct vis_packet *packet;
-	struct batman_if *batman_if;
-	uint8_t dstaddr[ETH_ALEN];
 
-	spin_lock_bh(&bat_priv->orig_hash_lock);
 	packet = (struct vis_packet *)info->skb_packet->data;
-	orig_node = ((struct orig_node *)hash_find(bat_priv->orig_hash,
-						   compare_orig, choose_orig,
-						   packet->target_orig));
 
-	if ((!orig_node) || (!orig_node->router))
-		goto out;
+	rcu_read_lock();
+	orig_node = orig_hash_find(bat_priv, packet->target_orig);
 
-	/* don't lock while sending the packets ... we therefore
-	 * copy the required data before sending */
-	batman_if = orig_node->router->if_incoming;
-	memcpy(dstaddr, orig_node->router->addr, ETH_ALEN);
-	spin_unlock_bh(&bat_priv->orig_hash_lock);
+	if (!orig_node)
+		goto unlock;
+
+	neigh_node = orig_node->router;
+
+	if (!neigh_node)
+		goto unlock;
+
+	if (!atomic_inc_not_zero(&neigh_node->refcount)) {
+		neigh_node = NULL;
+		goto unlock;
+	}
+
+	rcu_read_unlock();
 
 	skb = skb_clone(info->skb_packet, GFP_ATOMIC);
 	if (skb)
-		send_skb_packet(skb, batman_if, dstaddr);
+		send_skb_packet(skb, neigh_node->if_incoming,
+				neigh_node->addr);
 
-	return;
+	goto out;
 
+unlock:
+	rcu_read_unlock();
 out:
-	spin_unlock_bh(&bat_priv->orig_hash_lock);
+	if (neigh_node)
+		neigh_node_free_ref(neigh_node);
+	if (orig_node)
+		orig_node_free_ref(orig_node);
+	return;
 }
 
 /* only send one vis packet. called from send_vis_packets() */
@@ -896,7 +922,8 @@ int vis_init(struct bat_priv *bat_priv)
 	INIT_LIST_HEAD(&bat_priv->vis_send_list);
 
 	hash_added = hash_add(bat_priv->vis_hash, vis_info_cmp, vis_info_choose,
-			      bat_priv->my_vis_info);
+			      bat_priv->my_vis_info,
+			      &bat_priv->my_vis_info->hash_entry);
 	if (hash_added < 0) {
 		pr_err("Can't add own vis packet into hash\n");
 		/* not in hash, need to remove it manually. */
@@ -918,10 +945,11 @@ err:
 }
 
 /* Decrease the reference count on a hash item info */
-static void free_info_ref(void *data, void *arg)
+static void free_info_ref(struct hlist_node *node, void *arg)
 {
-	struct vis_info *info = data;
+	struct vis_info *info;
 
+	info = container_of(node, struct vis_info, hash_entry);
 	send_list_del(info);
 	kref_put(&info->refcount, free_info);
 }
diff --git a/net/batman-adv/vis.h b/net/batman-adv/vis.h
index 2c3b330..31b820d 100644
--- a/net/batman-adv/vis.h
+++ b/net/batman-adv/vis.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2008-2010 B.A.T.M.A.N. contributors:
+ * Copyright (C) 2008-2011 B.A.T.M.A.N. contributors:
  *
  * Simon Wunderlich, Marek Lindner
  *
diff --git a/net/bluetooth/Kconfig b/net/bluetooth/Kconfig
index ed37168..6ae5ec5 100644
--- a/net/bluetooth/Kconfig
+++ b/net/bluetooth/Kconfig
@@ -27,31 +27,27 @@ menuconfig BT
 	  compile it as module (bluetooth).
 
 	  To use Linux Bluetooth subsystem, you will need several user-space
-	  utilities like hciconfig and hcid.  These utilities and updates to
-	  Bluetooth kernel modules are provided in the BlueZ packages.
-	  For more information, see <http://www.bluez.org/>.
+	  utilities like hciconfig and bluetoothd.  These utilities and updates
+	  to Bluetooth kernel modules are provided in the BlueZ packages.  For
+	  more information, see <http://www.bluez.org/>.
+
+if BT != n
 
 config BT_L2CAP
-	tristate "L2CAP protocol support"
-	depends on BT
+	bool "L2CAP protocol support"
 	select CRC16
 	help
 	  L2CAP (Logical Link Control and Adaptation Protocol) provides
 	  connection oriented and connection-less data transport.  L2CAP
 	  support is required for most Bluetooth applications.
 
-	  Say Y here to compile L2CAP support into the kernel or say M to
-	  compile it as module (l2cap).
-
 config BT_SCO
-	tristate "SCO links support"
-	depends on BT
+	bool "SCO links support"
 	help
 	  SCO link provides voice transport over Bluetooth.  SCO support is
 	  required for voice applications like Headset and Audio.
 
-	  Say Y here to compile SCO support into the kernel or say M to
-	  compile it as module (sco).
+endif
 
 source "net/bluetooth/rfcomm/Kconfig"
 
diff --git a/net/bluetooth/Makefile b/net/bluetooth/Makefile
index 250f954..f04fe9a 100644
--- a/net/bluetooth/Makefile
+++ b/net/bluetooth/Makefile
@@ -3,11 +3,11 @@
 #
 
 obj-$(CONFIG_BT)	+= bluetooth.o
-obj-$(CONFIG_BT_L2CAP)	+= l2cap.o
-obj-$(CONFIG_BT_SCO)	+= sco.o
 obj-$(CONFIG_BT_RFCOMM)	+= rfcomm/
 obj-$(CONFIG_BT_BNEP)	+= bnep/
 obj-$(CONFIG_BT_CMTP)	+= cmtp/
 obj-$(CONFIG_BT_HIDP)	+= hidp/
 
 bluetooth-y := af_bluetooth.o hci_core.o hci_conn.o hci_event.o mgmt.o hci_sock.o hci_sysfs.o lib.o
+bluetooth-$(CONFIG_BT_L2CAP)	+= l2cap_core.o l2cap_sock.o
+bluetooth-$(CONFIG_BT_SCO)	+= sco.o
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index c4cf3f5..88af9eb 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -40,7 +40,7 @@
 
 #include <net/bluetooth/bluetooth.h>
 
-#define VERSION "2.15"
+#define VERSION "2.16"
 
 /* Bluetooth sockets */
 #define BT_MAX_PROTO	8
@@ -199,14 +199,15 @@ struct sock *bt_accept_dequeue(struct sock *parent, struct socket *newsock)
 
 	BT_DBG("parent %p", parent);
 
+	local_bh_disable();
 	list_for_each_safe(p, n, &bt_sk(parent)->accept_q) {
 		sk = (struct sock *) list_entry(p, struct bt_sock, accept_q);
 
-		lock_sock(sk);
+		bh_lock_sock(sk);
 
 		/* FIXME: Is this check still needed */
 		if (sk->sk_state == BT_CLOSED) {
-			release_sock(sk);
+			bh_unlock_sock(sk);
 			bt_accept_unlink(sk);
 			continue;
 		}
@@ -216,12 +217,16 @@ struct sock *bt_accept_dequeue(struct sock *parent, struct socket *newsock)
 			bt_accept_unlink(sk);
 			if (newsock)
 				sock_graft(sk, newsock);
-			release_sock(sk);
+
+			bh_unlock_sock(sk);
+			local_bh_enable();
 			return sk;
 		}
 
-		release_sock(sk);
+		bh_unlock_sock(sk);
 	}
+	local_bh_enable();
+
 	return NULL;
 }
 EXPORT_SYMBOL(bt_accept_dequeue);
@@ -240,7 +245,8 @@ int bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 	if (flags & (MSG_OOB))
 		return -EOPNOTSUPP;
 
-	if (!(skb = skb_recv_datagram(sk, flags, noblock, &err))) {
+	skb = skb_recv_datagram(sk, flags, noblock, &err);
+	if (!skb) {
 		if (sk->sk_shutdown & RCV_SHUTDOWN)
 			return 0;
 		return err;
@@ -323,7 +329,8 @@ int bt_sock_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
 			if (copied >= target)
 				break;
 
-			if ((err = sock_error(sk)) != 0)
+			err = sock_error(sk);
+			if (err)
 				break;
 			if (sk->sk_shutdown & RCV_SHUTDOWN)
 				break;
@@ -390,7 +397,7 @@ static inline unsigned int bt_accept_poll(struct sock *parent)
 	return 0;
 }
 
-unsigned int bt_sock_poll(struct file * file, struct socket *sock, poll_table *wait)
+unsigned int bt_sock_poll(struct file *file, struct socket *sock, poll_table *wait)
 {
 	struct sock *sk = sock->sk;
 	unsigned int mask = 0;
@@ -538,13 +545,41 @@ static int __init bt_init(void)
 
 	BT_INFO("HCI device and connection manager initialized");
 
-	hci_sock_init();
+	err = hci_sock_init();
+	if (err < 0)
+		goto error;
+
+	err = l2cap_init();
+	if (err < 0) {
+		hci_sock_cleanup();
+		goto sock_err;
+	}
+
+	err = sco_init();
+	if (err < 0) {
+		l2cap_exit();
+		goto sock_err;
+	}
 
 	return 0;
+
+sock_err:
+	hci_sock_cleanup();
+
+error:
+	sock_unregister(PF_BLUETOOTH);
+	bt_sysfs_cleanup();
+
+	return err;
 }
 
 static void __exit bt_exit(void)
 {
+
+	sco_exit();
+
+	l2cap_exit();
+
 	hci_sock_cleanup();
 
 	sock_unregister(PF_BLUETOOTH);
diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c
index 5868597..03d4d12 100644
--- a/net/bluetooth/bnep/core.c
+++ b/net/bluetooth/bnep/core.c
@@ -708,8 +708,6 @@ static int __init bnep_init(void)
 {
 	char flt[50] = "";
 
-	l2cap_load();
-
 #ifdef CONFIG_BT_BNEP_PROTO_FILTER
 	strcat(flt, "protocol ");
 #endif
diff --git a/net/bluetooth/bnep/sock.c b/net/bluetooth/bnep/sock.c
index 2862f53..d935da7 100644
--- a/net/bluetooth/bnep/sock.c
+++ b/net/bluetooth/bnep/sock.c
@@ -88,6 +88,7 @@ static int bnep_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long
 			sockfd_put(nsock);
 			return -EBADFD;
 		}
+		ca.device[sizeof(ca.device)-1] = 0;
 
 		err = bnep_add_connection(&ca, nsock);
 		if (!err) {
diff --git a/net/bluetooth/cmtp/capi.c b/net/bluetooth/cmtp/capi.c
index 3487cfe..67cff810 100644
--- a/net/bluetooth/cmtp/capi.c
+++ b/net/bluetooth/cmtp/capi.c
@@ -155,7 +155,8 @@ static void cmtp_send_interopmsg(struct cmtp_session *session,
 
 	BT_DBG("session %p subcmd 0x%02x appl %d msgnum %d", session, subcmd, appl, msgnum);
 
-	if (!(skb = alloc_skb(CAPI_MSG_BASELEN + 6 + len, GFP_ATOMIC))) {
+	skb = alloc_skb(CAPI_MSG_BASELEN + 6 + len, GFP_ATOMIC);
+	if (!skb) {
 		BT_ERR("Can't allocate memory for interoperability packet");
 		return;
 	}
diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c
index 8e5f292..964ea91 100644
--- a/net/bluetooth/cmtp/core.c
+++ b/net/bluetooth/cmtp/core.c
@@ -115,7 +115,8 @@ static inline void cmtp_add_msgpart(struct cmtp_session *session, int id, const
 
 	size = (skb) ? skb->len + count : count;
 
-	if (!(nskb = alloc_skb(size, GFP_ATOMIC))) {
+	nskb = alloc_skb(size, GFP_ATOMIC);
+	if (!nskb) {
 		BT_ERR("Can't allocate memory for CAPI message");
 		return;
 	}
@@ -216,7 +217,8 @@ static void cmtp_process_transmit(struct cmtp_session *session)
 
 	BT_DBG("session %p", session);
 
-	if (!(nskb = alloc_skb(session->mtu, GFP_ATOMIC))) {
+	nskb = alloc_skb(session->mtu, GFP_ATOMIC);
+	if (!nskb) {
 		BT_ERR("Can't allocate memory for new frame");
 		return;
 	}
@@ -224,7 +226,8 @@ static void cmtp_process_transmit(struct cmtp_session *session)
 	while ((skb = skb_dequeue(&session->transmit))) {
 		struct cmtp_scb *scb = (void *) skb->cb;
 
-		if ((tail = (session->mtu - nskb->len)) < 5) {
+		tail = session->mtu - nskb->len;
+		if (tail < 5) {
 			cmtp_send_frame(session, nskb->data, nskb->len);
 			skb_trim(nskb, 0);
 			tail = session->mtu;
@@ -466,8 +469,6 @@ int cmtp_get_conninfo(struct cmtp_conninfo *ci)
 
 static int __init cmtp_init(void)
 {
-	l2cap_load();
-
 	BT_INFO("CMTP (CAPI Emulation) ver %s", VERSION);
 
 	cmtp_init_sockets();
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 99cd8d9..a050a69 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -45,6 +45,33 @@
 #include <net/bluetooth/bluetooth.h>
 #include <net/bluetooth/hci_core.h>
 
+static void hci_le_connect(struct hci_conn *conn)
+{
+	struct hci_dev *hdev = conn->hdev;
+	struct hci_cp_le_create_conn cp;
+
+	conn->state = BT_CONNECT;
+	conn->out = 1;
+	conn->link_mode |= HCI_LM_MASTER;
+
+	memset(&cp, 0, sizeof(cp));
+	cp.scan_interval = cpu_to_le16(0x0004);
+	cp.scan_window = cpu_to_le16(0x0004);
+	bacpy(&cp.peer_addr, &conn->dst);
+	cp.conn_interval_min = cpu_to_le16(0x0008);
+	cp.conn_interval_max = cpu_to_le16(0x0100);
+	cp.supervision_timeout = cpu_to_le16(0x0064);
+	cp.min_ce_len = cpu_to_le16(0x0001);
+	cp.max_ce_len = cpu_to_le16(0x0001);
+
+	hci_send_cmd(hdev, HCI_OP_LE_CREATE_CONN, sizeof(cp), &cp);
+}
+
+static void hci_le_connect_cancel(struct hci_conn *conn)
+{
+	hci_send_cmd(conn->hdev, HCI_OP_LE_CREATE_CONN_CANCEL, 0, NULL);
+}
+
 void hci_acl_connect(struct hci_conn *conn)
 {
 	struct hci_dev *hdev = conn->hdev;
@@ -156,6 +183,26 @@ void hci_setup_sync(struct hci_conn *conn, __u16 handle)
 	hci_send_cmd(hdev, HCI_OP_SETUP_SYNC_CONN, sizeof(cp), &cp);
 }
 
+void hci_le_conn_update(struct hci_conn *conn, u16 min, u16 max,
+					u16 latency, u16 to_multiplier)
+{
+	struct hci_cp_le_conn_update cp;
+	struct hci_dev *hdev = conn->hdev;
+
+	memset(&cp, 0, sizeof(cp));
+
+	cp.handle		= cpu_to_le16(conn->handle);
+	cp.conn_interval_min	= cpu_to_le16(min);
+	cp.conn_interval_max	= cpu_to_le16(max);
+	cp.conn_latency		= cpu_to_le16(latency);
+	cp.supervision_timeout	= cpu_to_le16(to_multiplier);
+	cp.min_ce_len		= cpu_to_le16(0x0001);
+	cp.max_ce_len		= cpu_to_le16(0x0001);
+
+	hci_send_cmd(hdev, HCI_OP_LE_CONN_UPDATE, sizeof(cp), &cp);
+}
+EXPORT_SYMBOL(hci_le_conn_update);
+
 /* Device _must_ be locked */
 void hci_sco_setup(struct hci_conn *conn, __u8 status)
 {
@@ -193,8 +240,12 @@ static void hci_conn_timeout(unsigned long arg)
 	switch (conn->state) {
 	case BT_CONNECT:
 	case BT_CONNECT2:
-		if (conn->type == ACL_LINK && conn->out)
-			hci_acl_connect_cancel(conn);
+		if (conn->out) {
+			if (conn->type == ACL_LINK)
+				hci_acl_connect_cancel(conn);
+			else if (conn->type == LE_LINK)
+				hci_le_connect_cancel(conn);
+		}
 		break;
 	case BT_CONFIG:
 	case BT_CONNECTED:
@@ -234,6 +285,7 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst)
 	conn->mode  = HCI_CM_ACTIVE;
 	conn->state = BT_OPEN;
 	conn->auth_type = HCI_AT_GENERAL_BONDING;
+	conn->io_capability = hdev->io_capability;
 
 	conn->power_save = 1;
 	conn->disc_timeout = HCI_DISCONN_TIMEOUT;
@@ -295,6 +347,11 @@ int hci_conn_del(struct hci_conn *conn)
 
 		/* Unacked frames */
 		hdev->acl_cnt += conn->sent;
+	} else if (conn->type == LE_LINK) {
+		if (hdev->le_pkts)
+			hdev->le_cnt += conn->sent;
+		else
+			hdev->acl_cnt += conn->sent;
 	} else {
 		struct hci_conn *acl = conn->link;
 		if (acl) {
@@ -360,15 +417,30 @@ struct hci_dev *hci_get_route(bdaddr_t *dst, bdaddr_t *src)
 }
 EXPORT_SYMBOL(hci_get_route);
 
-/* Create SCO or ACL connection.
+/* Create SCO, ACL or LE connection.
  * Device _must_ be locked */
 struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, __u8 sec_level, __u8 auth_type)
 {
 	struct hci_conn *acl;
 	struct hci_conn *sco;
+	struct hci_conn *le;
 
 	BT_DBG("%s dst %s", hdev->name, batostr(dst));
 
+	if (type == LE_LINK) {
+		le = hci_conn_hash_lookup_ba(hdev, LE_LINK, dst);
+		if (!le)
+			le = hci_conn_add(hdev, LE_LINK, dst);
+		if (!le)
+			return NULL;
+		if (le->state == BT_OPEN)
+			hci_le_connect(le);
+
+		hci_conn_hold(le);
+
+		return le;
+	}
+
 	acl = hci_conn_hash_lookup_ba(hdev, ACL_LINK, dst);
 	if (!acl) {
 		acl = hci_conn_add(hdev, ACL_LINK, dst);
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 9c4541b..b372fb8 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -41,6 +41,7 @@
 #include <linux/interrupt.h>
 #include <linux/notifier.h>
 #include <linux/rfkill.h>
+#include <linux/timer.h>
 #include <net/sock.h>
 
 #include <asm/system.h>
@@ -50,6 +51,8 @@
 #include <net/bluetooth/bluetooth.h>
 #include <net/bluetooth/hci_core.h>
 
+#define AUTO_OFF_TIMEOUT 2000
+
 static void hci_cmd_task(unsigned long arg);
 static void hci_rx_task(unsigned long arg);
 static void hci_tx_task(unsigned long arg);
@@ -95,11 +98,10 @@ void hci_req_complete(struct hci_dev *hdev, __u16 cmd, int result)
 {
 	BT_DBG("%s command 0x%04x result 0x%2.2x", hdev->name, cmd, result);
 
-	/* If the request has set req_last_cmd (typical for multi-HCI
-	 * command requests) check if the completed command matches
-	 * this, and if not just return. Single HCI command requests
-	 * typically leave req_last_cmd as 0 */
-	if (hdev->req_last_cmd && cmd != hdev->req_last_cmd)
+	/* If this is the init phase check if the completed command matches
+	 * the last init command, and if not just return.
+	 */
+	if (test_bit(HCI_INIT, &hdev->flags) && hdev->init_last_cmd != cmd)
 		return;
 
 	if (hdev->req_status == HCI_REQ_PEND) {
@@ -122,7 +124,7 @@ static void hci_req_cancel(struct hci_dev *hdev, int err)
 
 /* Execute request and wait for completion. */
 static int __hci_request(struct hci_dev *hdev, void (*req)(struct hci_dev *hdev, unsigned long opt),
-				unsigned long opt, __u32 timeout)
+					unsigned long opt, __u32 timeout)
 {
 	DECLARE_WAITQUEUE(wait, current);
 	int err = 0;
@@ -156,7 +158,7 @@ static int __hci_request(struct hci_dev *hdev, void (*req)(struct hci_dev *hdev,
 		break;
 	}
 
-	hdev->req_last_cmd = hdev->req_status = hdev->req_result = 0;
+	hdev->req_status = hdev->req_result = 0;
 
 	BT_DBG("%s end: err %d", hdev->name, err);
 
@@ -164,7 +166,7 @@ static int __hci_request(struct hci_dev *hdev, void (*req)(struct hci_dev *hdev,
 }
 
 static inline int hci_request(struct hci_dev *hdev, void (*req)(struct hci_dev *hdev, unsigned long opt),
-				unsigned long opt, __u32 timeout)
+					unsigned long opt, __u32 timeout)
 {
 	int ret;
 
@@ -189,6 +191,7 @@ static void hci_reset_req(struct hci_dev *hdev, unsigned long opt)
 
 static void hci_init_req(struct hci_dev *hdev, unsigned long opt)
 {
+	struct hci_cp_delete_stored_link_key cp;
 	struct sk_buff *skb;
 	__le16 param;
 	__u8 flt_type;
@@ -252,15 +255,21 @@ static void hci_init_req(struct hci_dev *hdev, unsigned long opt)
 	flt_type = HCI_FLT_CLEAR_ALL;
 	hci_send_cmd(hdev, HCI_OP_SET_EVENT_FLT, 1, &flt_type);
 
-	/* Page timeout ~20 secs */
-	param = cpu_to_le16(0x8000);
-	hci_send_cmd(hdev, HCI_OP_WRITE_PG_TIMEOUT, 2, &param);
-
 	/* Connection accept timeout ~20 secs */
 	param = cpu_to_le16(0x7d00);
 	hci_send_cmd(hdev, HCI_OP_WRITE_CA_TIMEOUT, 2, &param);
 
-	hdev->req_last_cmd = HCI_OP_WRITE_CA_TIMEOUT;
+	bacpy(&cp.bdaddr, BDADDR_ANY);
+	cp.delete_all = 1;
+	hci_send_cmd(hdev, HCI_OP_DELETE_STORED_LINK_KEY, sizeof(cp), &cp);
+}
+
+static void hci_le_init_req(struct hci_dev *hdev, unsigned long opt)
+{
+	BT_DBG("%s", hdev->name);
+
+	/* Read LE buffer size */
+	hci_send_cmd(hdev, HCI_OP_LE_READ_BUFFER_SIZE, 0, NULL);
 }
 
 static void hci_scan_req(struct hci_dev *hdev, unsigned long opt)
@@ -429,7 +438,8 @@ int hci_inquiry(void __user *arg)
 	if (copy_from_user(&ir, ptr, sizeof(ir)))
 		return -EFAULT;
 
-	if (!(hdev = hci_dev_get(ir.dev_id)))
+	hdev = hci_dev_get(ir.dev_id);
+	if (!hdev)
 		return -ENODEV;
 
 	hci_dev_lock_bh(hdev);
@@ -455,7 +465,7 @@ int hci_inquiry(void __user *arg)
 	/* cache_dump can't sleep. Therefore we allocate temp buffer and then
 	 * copy it to the user space.
 	 */
-	buf = kmalloc(sizeof(struct inquiry_info) *max_rsp, GFP_KERNEL);
+	buf = kmalloc(sizeof(struct inquiry_info) * max_rsp, GFP_KERNEL);
 	if (!buf) {
 		err = -ENOMEM;
 		goto done;
@@ -489,7 +499,8 @@ int hci_dev_open(__u16 dev)
 	struct hci_dev *hdev;
 	int ret = 0;
 
-	if (!(hdev = hci_dev_get(dev)))
+	hdev = hci_dev_get(dev);
+	if (!hdev)
 		return -ENODEV;
 
 	BT_DBG("%s %p", hdev->name, hdev);
@@ -521,11 +532,15 @@ int hci_dev_open(__u16 dev)
 	if (!test_bit(HCI_RAW, &hdev->flags)) {
 		atomic_set(&hdev->cmd_cnt, 1);
 		set_bit(HCI_INIT, &hdev->flags);
+		hdev->init_last_cmd = 0;
 
-		//__hci_request(hdev, hci_reset_req, 0, HZ);
 		ret = __hci_request(hdev, hci_init_req, 0,
 					msecs_to_jiffies(HCI_INIT_TIMEOUT));
 
+		if (lmp_le_capable(hdev))
+			ret = __hci_request(hdev, hci_le_init_req, 0,
+					msecs_to_jiffies(HCI_INIT_TIMEOUT));
+
 		clear_bit(HCI_INIT, &hdev->flags);
 	}
 
@@ -533,6 +548,8 @@ int hci_dev_open(__u16 dev)
 		hci_dev_hold(hdev);
 		set_bit(HCI_UP, &hdev->flags);
 		hci_notify(hdev, HCI_DEV_UP);
+		if (!test_bit(HCI_SETUP, &hdev->flags))
+			mgmt_powered(hdev->id, 1);
 	} else {
 		/* Init failed, cleanup */
 		tasklet_kill(&hdev->rx_task);
@@ -606,6 +623,7 @@ static int hci_dev_do_close(struct hci_dev *hdev)
 
 	/* Drop last sent command */
 	if (hdev->sent_cmd) {
+		del_timer_sync(&hdev->cmd_timer);
 		kfree_skb(hdev->sent_cmd);
 		hdev->sent_cmd = NULL;
 	}
@@ -614,6 +632,8 @@ static int hci_dev_do_close(struct hci_dev *hdev)
 	 * and no tasks are scheduled. */
 	hdev->close(hdev);
 
+	mgmt_powered(hdev->id, 0);
+
 	/* Clear flags */
 	hdev->flags = 0;
 
@@ -664,7 +684,7 @@ int hci_dev_reset(__u16 dev)
 		hdev->flush(hdev);
 
 	atomic_set(&hdev->cmd_cnt, 1);
-	hdev->acl_cnt = 0; hdev->sco_cnt = 0;
+	hdev->acl_cnt = 0; hdev->sco_cnt = 0; hdev->le_cnt = 0;
 
 	if (!test_bit(HCI_RAW, &hdev->flags))
 		ret = __hci_request(hdev, hci_reset_req, 0,
@@ -793,9 +813,17 @@ int hci_get_dev_list(void __user *arg)
 	read_lock_bh(&hci_dev_list_lock);
 	list_for_each(p, &hci_dev_list) {
 		struct hci_dev *hdev;
+
 		hdev = list_entry(p, struct hci_dev, list);
+
+		hci_del_off_timer(hdev);
+
+		if (!test_bit(HCI_MGMT, &hdev->flags))
+			set_bit(HCI_PAIRABLE, &hdev->flags);
+
 		(dr + n)->dev_id  = hdev->id;
 		(dr + n)->dev_opt = hdev->flags;
+
 		if (++n >= dev_num)
 			break;
 	}
@@ -823,6 +851,11 @@ int hci_get_dev_info(void __user *arg)
 	if (!hdev)
 		return -ENODEV;
 
+	hci_del_off_timer(hdev);
+
+	if (!test_bit(HCI_MGMT, &hdev->flags))
+		set_bit(HCI_PAIRABLE, &hdev->flags);
+
 	strcpy(di.name, hdev->name);
 	di.bdaddr   = hdev->bdaddr;
 	di.type     = (hdev->bus & 0x0f) | (hdev->dev_type << 4);
@@ -891,6 +924,159 @@ void hci_free_dev(struct hci_dev *hdev)
 }
 EXPORT_SYMBOL(hci_free_dev);
 
+static void hci_power_on(struct work_struct *work)
+{
+	struct hci_dev *hdev = container_of(work, struct hci_dev, power_on);
+
+	BT_DBG("%s", hdev->name);
+
+	if (hci_dev_open(hdev->id) < 0)
+		return;
+
+	if (test_bit(HCI_AUTO_OFF, &hdev->flags))
+		mod_timer(&hdev->off_timer,
+				jiffies + msecs_to_jiffies(AUTO_OFF_TIMEOUT));
+
+	if (test_and_clear_bit(HCI_SETUP, &hdev->flags))
+		mgmt_index_added(hdev->id);
+}
+
+static void hci_power_off(struct work_struct *work)
+{
+	struct hci_dev *hdev = container_of(work, struct hci_dev, power_off);
+
+	BT_DBG("%s", hdev->name);
+
+	hci_dev_close(hdev->id);
+}
+
+static void hci_auto_off(unsigned long data)
+{
+	struct hci_dev *hdev = (struct hci_dev *) data;
+
+	BT_DBG("%s", hdev->name);
+
+	clear_bit(HCI_AUTO_OFF, &hdev->flags);
+
+	queue_work(hdev->workqueue, &hdev->power_off);
+}
+
+void hci_del_off_timer(struct hci_dev *hdev)
+{
+	BT_DBG("%s", hdev->name);
+
+	clear_bit(HCI_AUTO_OFF, &hdev->flags);
+	del_timer(&hdev->off_timer);
+}
+
+int hci_uuids_clear(struct hci_dev *hdev)
+{
+	struct list_head *p, *n;
+
+	list_for_each_safe(p, n, &hdev->uuids) {
+		struct bt_uuid *uuid;
+
+		uuid = list_entry(p, struct bt_uuid, list);
+
+		list_del(p);
+		kfree(uuid);
+	}
+
+	return 0;
+}
+
+int hci_link_keys_clear(struct hci_dev *hdev)
+{
+	struct list_head *p, *n;
+
+	list_for_each_safe(p, n, &hdev->link_keys) {
+		struct link_key *key;
+
+		key = list_entry(p, struct link_key, list);
+
+		list_del(p);
+		kfree(key);
+	}
+
+	return 0;
+}
+
+struct link_key *hci_find_link_key(struct hci_dev *hdev, bdaddr_t *bdaddr)
+{
+	struct list_head *p;
+
+	list_for_each(p, &hdev->link_keys) {
+		struct link_key *k;
+
+		k = list_entry(p, struct link_key, list);
+
+		if (bacmp(bdaddr, &k->bdaddr) == 0)
+			return k;
+	}
+
+	return NULL;
+}
+
+int hci_add_link_key(struct hci_dev *hdev, int new_key, bdaddr_t *bdaddr,
+						u8 *val, u8 type, u8 pin_len)
+{
+	struct link_key *key, *old_key;
+	u8 old_key_type;
+
+	old_key = hci_find_link_key(hdev, bdaddr);
+	if (old_key) {
+		old_key_type = old_key->type;
+		key = old_key;
+	} else {
+		old_key_type = 0xff;
+		key = kzalloc(sizeof(*key), GFP_ATOMIC);
+		if (!key)
+			return -ENOMEM;
+		list_add(&key->list, &hdev->link_keys);
+	}
+
+	BT_DBG("%s key for %s type %u", hdev->name, batostr(bdaddr), type);
+
+	bacpy(&key->bdaddr, bdaddr);
+	memcpy(key->val, val, 16);
+	key->type = type;
+	key->pin_len = pin_len;
+
+	if (new_key)
+		mgmt_new_key(hdev->id, key, old_key_type);
+
+	if (type == 0x06)
+		key->type = old_key_type;
+
+	return 0;
+}
+
+int hci_remove_link_key(struct hci_dev *hdev, bdaddr_t *bdaddr)
+{
+	struct link_key *key;
+
+	key = hci_find_link_key(hdev, bdaddr);
+	if (!key)
+		return -ENOENT;
+
+	BT_DBG("%s removing %s", hdev->name, batostr(bdaddr));
+
+	list_del(&key->list);
+	kfree(key);
+
+	return 0;
+}
+
+/* HCI command timer function */
+static void hci_cmd_timer(unsigned long arg)
+{
+	struct hci_dev *hdev = (void *) arg;
+
+	BT_ERR("%s command tx timeout", hdev->name);
+	atomic_set(&hdev->cmd_cnt, 1);
+	tasklet_schedule(&hdev->cmd_task);
+}
+
 /* Register HCI device */
 int hci_register_dev(struct hci_dev *hdev)
 {
@@ -923,6 +1109,7 @@ int hci_register_dev(struct hci_dev *hdev)
 	hdev->pkt_type  = (HCI_DM1 | HCI_DH1 | HCI_HV1);
 	hdev->esco_type = (ESCO_HV1);
 	hdev->link_mode = (HCI_LM_ACCEPT);
+	hdev->io_capability = 0x03; /* No Input No Output */
 
 	hdev->idle_timeout = 0;
 	hdev->sniff_max_interval = 800;
@@ -936,6 +1123,8 @@ int hci_register_dev(struct hci_dev *hdev)
 	skb_queue_head_init(&hdev->cmd_q);
 	skb_queue_head_init(&hdev->raw_q);
 
+	setup_timer(&hdev->cmd_timer, hci_cmd_timer, (unsigned long) hdev);
+
 	for (i = 0; i < NUM_REASSEMBLY; i++)
 		hdev->reassembly[i] = NULL;
 
@@ -948,6 +1137,14 @@ int hci_register_dev(struct hci_dev *hdev)
 
 	INIT_LIST_HEAD(&hdev->blacklist);
 
+	INIT_LIST_HEAD(&hdev->uuids);
+
+	INIT_LIST_HEAD(&hdev->link_keys);
+
+	INIT_WORK(&hdev->power_on, hci_power_on);
+	INIT_WORK(&hdev->power_off, hci_power_off);
+	setup_timer(&hdev->off_timer, hci_auto_off, (unsigned long) hdev);
+
 	memset(&hdev->stat, 0, sizeof(struct hci_dev_stats));
 
 	atomic_set(&hdev->promisc, 0);
@@ -969,7 +1166,10 @@ int hci_register_dev(struct hci_dev *hdev)
 		}
 	}
 
-	mgmt_index_added(hdev->id);
+	set_bit(HCI_AUTO_OFF, &hdev->flags);
+	set_bit(HCI_SETUP, &hdev->flags);
+	queue_work(hdev->workqueue, &hdev->power_on);
+
 	hci_notify(hdev, HCI_DEV_REG);
 
 	return id;
@@ -999,7 +1199,10 @@ int hci_unregister_dev(struct hci_dev *hdev)
 	for (i = 0; i < NUM_REASSEMBLY; i++)
 		kfree_skb(hdev->reassembly[i]);
 
-	mgmt_index_removed(hdev->id);
+	if (!test_bit(HCI_INIT, &hdev->flags) &&
+					!test_bit(HCI_SETUP, &hdev->flags))
+		mgmt_index_removed(hdev->id);
+
 	hci_notify(hdev, HCI_DEV_UNREG);
 
 	if (hdev->rfkill) {
@@ -1009,10 +1212,14 @@ int hci_unregister_dev(struct hci_dev *hdev)
 
 	hci_unregister_sysfs(hdev);
 
+	hci_del_off_timer(hdev);
+
 	destroy_workqueue(hdev->workqueue);
 
 	hci_dev_lock_bh(hdev);
 	hci_blacklist_clear(hdev);
+	hci_uuids_clear(hdev);
+	hci_link_keys_clear(hdev);
 	hci_dev_unlock_bh(hdev);
 
 	__hci_dev_put(hdev);
@@ -1313,7 +1520,7 @@ static int hci_send_frame(struct sk_buff *skb)
 		/* Time stamp */
 		__net_timestamp(skb);
 
-		hci_send_to_sock(hdev, skb);
+		hci_send_to_sock(hdev, skb, NULL);
 	}
 
 	/* Get rid of skb owner, prior to sending to the driver. */
@@ -1349,6 +1556,9 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, void *param)
 	bt_cb(skb)->pkt_type = HCI_COMMAND_PKT;
 	skb->dev = (void *) hdev;
 
+	if (test_bit(HCI_INIT, &hdev->flags))
+		hdev->init_last_cmd = opcode;
+
 	skb_queue_tail(&hdev->cmd_q, skb);
 	tasklet_schedule(&hdev->cmd_task);
 
@@ -1395,7 +1605,7 @@ void hci_send_acl(struct hci_conn *conn, struct sk_buff *skb, __u16 flags)
 
 	skb->dev = (void *) hdev;
 	bt_cb(skb)->pkt_type = HCI_ACLDATA_PKT;
-	hci_add_acl_hdr(skb, conn->handle, flags | ACL_START);
+	hci_add_acl_hdr(skb, conn->handle, flags);
 
 	list = skb_shinfo(skb)->frag_list;
 	if (!list) {
@@ -1413,12 +1623,15 @@ void hci_send_acl(struct hci_conn *conn, struct sk_buff *skb, __u16 flags)
 		spin_lock_bh(&conn->data_q.lock);
 
 		__skb_queue_tail(&conn->data_q, skb);
+
+		flags &= ~ACL_START;
+		flags |= ACL_CONT;
 		do {
 			skb = list; list = list->next;
 
 			skb->dev = (void *) hdev;
 			bt_cb(skb)->pkt_type = HCI_ACLDATA_PKT;
-			hci_add_acl_hdr(skb, conn->handle, flags | ACL_CONT);
+			hci_add_acl_hdr(skb, conn->handle, flags);
 
 			BT_DBG("%s frag %p len %d", hdev->name, skb, skb->len);
 
@@ -1486,8 +1699,25 @@ static inline struct hci_conn *hci_low_sent(struct hci_dev *hdev, __u8 type, int
 	}
 
 	if (conn) {
-		int cnt = (type == ACL_LINK ? hdev->acl_cnt : hdev->sco_cnt);
-		int q = cnt / num;
+		int cnt, q;
+
+		switch (conn->type) {
+		case ACL_LINK:
+			cnt = hdev->acl_cnt;
+			break;
+		case SCO_LINK:
+		case ESCO_LINK:
+			cnt = hdev->sco_cnt;
+			break;
+		case LE_LINK:
+			cnt = hdev->le_mtu ? hdev->le_cnt : hdev->acl_cnt;
+			break;
+		default:
+			cnt = 0;
+			BT_ERR("Unknown link type");
+		}
+
+		q = cnt / num;
 		*quote = q ? q : 1;
 	} else
 		*quote = 0;
@@ -1496,19 +1726,19 @@ static inline struct hci_conn *hci_low_sent(struct hci_dev *hdev, __u8 type, int
 	return conn;
 }
 
-static inline void hci_acl_tx_to(struct hci_dev *hdev)
+static inline void hci_link_tx_to(struct hci_dev *hdev, __u8 type)
 {
 	struct hci_conn_hash *h = &hdev->conn_hash;
 	struct list_head *p;
 	struct hci_conn  *c;
 
-	BT_ERR("%s ACL tx timeout", hdev->name);
+	BT_ERR("%s link tx timeout", hdev->name);
 
 	/* Kill stalled connections */
 	list_for_each(p, &h->list) {
 		c = list_entry(p, struct hci_conn, list);
-		if (c->type == ACL_LINK && c->sent) {
-			BT_ERR("%s killing stalled ACL connection %s",
+		if (c->type == type && c->sent) {
+			BT_ERR("%s killing stalled connection %s",
 				hdev->name, batostr(&c->dst));
 			hci_acl_disconn(c, 0x13);
 		}
@@ -1527,7 +1757,7 @@ static inline void hci_sched_acl(struct hci_dev *hdev)
 		/* ACL tx timeout must be longer than maximum
 		 * link supervision timeout (40.9 seconds) */
 		if (!hdev->acl_cnt && time_after(jiffies, hdev->acl_last_tx + HZ * 45))
-			hci_acl_tx_to(hdev);
+			hci_link_tx_to(hdev, ACL_LINK);
 	}
 
 	while (hdev->acl_cnt && (conn = hci_low_sent(hdev, ACL_LINK, &quote))) {
@@ -1586,6 +1816,40 @@ static inline void hci_sched_esco(struct hci_dev *hdev)
 	}
 }
 
+static inline void hci_sched_le(struct hci_dev *hdev)
+{
+	struct hci_conn *conn;
+	struct sk_buff *skb;
+	int quote, cnt;
+
+	BT_DBG("%s", hdev->name);
+
+	if (!test_bit(HCI_RAW, &hdev->flags)) {
+		/* LE tx timeout must be longer than maximum
+		 * link supervision timeout (40.9 seconds) */
+		if (!hdev->le_cnt && hdev->le_pkts &&
+				time_after(jiffies, hdev->le_last_tx + HZ * 45))
+			hci_link_tx_to(hdev, LE_LINK);
+	}
+
+	cnt = hdev->le_pkts ? hdev->le_cnt : hdev->acl_cnt;
+	while (cnt && (conn = hci_low_sent(hdev, LE_LINK, &quote))) {
+		while (quote-- && (skb = skb_dequeue(&conn->data_q))) {
+			BT_DBG("skb %p len %d", skb, skb->len);
+
+			hci_send_frame(skb);
+			hdev->le_last_tx = jiffies;
+
+			cnt--;
+			conn->sent++;
+		}
+	}
+	if (hdev->le_pkts)
+		hdev->le_cnt = cnt;
+	else
+		hdev->acl_cnt = cnt;
+}
+
 static void hci_tx_task(unsigned long arg)
 {
 	struct hci_dev *hdev = (struct hci_dev *) arg;
@@ -1593,7 +1857,8 @@ static void hci_tx_task(unsigned long arg)
 
 	read_lock(&hci_task_lock);
 
-	BT_DBG("%s acl %d sco %d", hdev->name, hdev->acl_cnt, hdev->sco_cnt);
+	BT_DBG("%s acl %d sco %d le %d", hdev->name, hdev->acl_cnt,
+		hdev->sco_cnt, hdev->le_cnt);
 
 	/* Schedule queues and send stuff to HCI driver */
 
@@ -1603,6 +1868,8 @@ static void hci_tx_task(unsigned long arg)
 
 	hci_sched_esco(hdev);
 
+	hci_sched_le(hdev);
+
 	/* Send next queued raw (unknown type) packet */
 	while ((skb = skb_dequeue(&hdev->raw_q)))
 		hci_send_frame(skb);
@@ -1700,7 +1967,7 @@ static void hci_rx_task(unsigned long arg)
 	while ((skb = skb_dequeue(&hdev->rx_q))) {
 		if (atomic_read(&hdev->promisc)) {
 			/* Send copy to the sockets */
-			hci_send_to_sock(hdev, skb);
+			hci_send_to_sock(hdev, skb, NULL);
 		}
 
 		if (test_bit(HCI_RAW, &hdev->flags)) {
@@ -1750,20 +2017,20 @@ static void hci_cmd_task(unsigned long arg)
 
 	BT_DBG("%s cmd %d", hdev->name, atomic_read(&hdev->cmd_cnt));
 
-	if (!atomic_read(&hdev->cmd_cnt) && time_after(jiffies, hdev->cmd_last_tx + HZ)) {
-		BT_ERR("%s command tx timeout", hdev->name);
-		atomic_set(&hdev->cmd_cnt, 1);
-	}
-
 	/* Send queued commands */
-	if (atomic_read(&hdev->cmd_cnt) && (skb = skb_dequeue(&hdev->cmd_q))) {
+	if (atomic_read(&hdev->cmd_cnt)) {
+		skb = skb_dequeue(&hdev->cmd_q);
+		if (!skb)
+			return;
+
 		kfree_skb(hdev->sent_cmd);
 
 		hdev->sent_cmd = skb_clone(skb, GFP_ATOMIC);
 		if (hdev->sent_cmd) {
 			atomic_dec(&hdev->cmd_cnt);
 			hci_send_frame(skb);
-			hdev->cmd_last_tx = jiffies;
+			mod_timer(&hdev->cmd_timer,
+				  jiffies + msecs_to_jiffies(HCI_CMD_TIMEOUT));
 		} else {
 			skb_queue_head(&hdev->cmd_q, skb);
 			tasklet_schedule(&hdev->cmd_task);
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index a290854..98b5764 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -274,15 +274,24 @@ static void hci_cc_write_scan_enable(struct hci_dev *hdev, struct sk_buff *skb)
 
 	if (!status) {
 		__u8 param = *((__u8 *) sent);
+		int old_pscan, old_iscan;
 
-		clear_bit(HCI_PSCAN, &hdev->flags);
-		clear_bit(HCI_ISCAN, &hdev->flags);
+		old_pscan = test_and_clear_bit(HCI_PSCAN, &hdev->flags);
+		old_iscan = test_and_clear_bit(HCI_ISCAN, &hdev->flags);
 
-		if (param & SCAN_INQUIRY)
+		if (param & SCAN_INQUIRY) {
 			set_bit(HCI_ISCAN, &hdev->flags);
+			if (!old_iscan)
+				mgmt_discoverable(hdev->id, 1);
+		} else if (old_iscan)
+			mgmt_discoverable(hdev->id, 0);
 
-		if (param & SCAN_PAGE)
+		if (param & SCAN_PAGE) {
 			set_bit(HCI_PSCAN, &hdev->flags);
+			if (!old_pscan)
+				mgmt_connectable(hdev->id, 1);
+		} else if (old_pscan)
+			mgmt_connectable(hdev->id, 0);
 	}
 
 	hci_req_complete(hdev, HCI_OP_WRITE_SCAN_ENABLE, status);
@@ -415,6 +424,115 @@ static void hci_cc_write_ssp_mode(struct hci_dev *hdev, struct sk_buff *skb)
 	hdev->ssp_mode = *((__u8 *) sent);
 }
 
+static u8 hci_get_inquiry_mode(struct hci_dev *hdev)
+{
+	if (hdev->features[6] & LMP_EXT_INQ)
+		return 2;
+
+	if (hdev->features[3] & LMP_RSSI_INQ)
+		return 1;
+
+	if (hdev->manufacturer == 11 && hdev->hci_rev == 0x00 &&
+						hdev->lmp_subver == 0x0757)
+		return 1;
+
+	if (hdev->manufacturer == 15) {
+		if (hdev->hci_rev == 0x03 && hdev->lmp_subver == 0x6963)
+			return 1;
+		if (hdev->hci_rev == 0x09 && hdev->lmp_subver == 0x6963)
+			return 1;
+		if (hdev->hci_rev == 0x00 && hdev->lmp_subver == 0x6965)
+			return 1;
+	}
+
+	if (hdev->manufacturer == 31 && hdev->hci_rev == 0x2005 &&
+						hdev->lmp_subver == 0x1805)
+		return 1;
+
+	return 0;
+}
+
+static void hci_setup_inquiry_mode(struct hci_dev *hdev)
+{
+	u8 mode;
+
+	mode = hci_get_inquiry_mode(hdev);
+
+	hci_send_cmd(hdev, HCI_OP_WRITE_INQUIRY_MODE, 1, &mode);
+}
+
+static void hci_setup_event_mask(struct hci_dev *hdev)
+{
+	/* The second byte is 0xff instead of 0x9f (two reserved bits
+	 * disabled) since a Broadcom 1.2 dongle doesn't respond to the
+	 * command otherwise */
+	u8 events[8] = { 0xff, 0xff, 0xfb, 0xff, 0x00, 0x00, 0x00, 0x00 };
+
+	/* Events for 1.2 and newer controllers */
+	if (hdev->lmp_ver > 1) {
+		events[4] |= 0x01; /* Flow Specification Complete */
+		events[4] |= 0x02; /* Inquiry Result with RSSI */
+		events[4] |= 0x04; /* Read Remote Extended Features Complete */
+		events[5] |= 0x08; /* Synchronous Connection Complete */
+		events[5] |= 0x10; /* Synchronous Connection Changed */
+	}
+
+	if (hdev->features[3] & LMP_RSSI_INQ)
+		events[4] |= 0x04; /* Inquiry Result with RSSI */
+
+	if (hdev->features[5] & LMP_SNIFF_SUBR)
+		events[5] |= 0x20; /* Sniff Subrating */
+
+	if (hdev->features[5] & LMP_PAUSE_ENC)
+		events[5] |= 0x80; /* Encryption Key Refresh Complete */
+
+	if (hdev->features[6] & LMP_EXT_INQ)
+		events[5] |= 0x40; /* Extended Inquiry Result */
+
+	if (hdev->features[6] & LMP_NO_FLUSH)
+		events[7] |= 0x01; /* Enhanced Flush Complete */
+
+	if (hdev->features[7] & LMP_LSTO)
+		events[6] |= 0x80; /* Link Supervision Timeout Changed */
+
+	if (hdev->features[6] & LMP_SIMPLE_PAIR) {
+		events[6] |= 0x01;	/* IO Capability Request */
+		events[6] |= 0x02;	/* IO Capability Response */
+		events[6] |= 0x04;	/* User Confirmation Request */
+		events[6] |= 0x08;	/* User Passkey Request */
+		events[6] |= 0x10;	/* Remote OOB Data Request */
+		events[6] |= 0x20;	/* Simple Pairing Complete */
+		events[7] |= 0x04;	/* User Passkey Notification */
+		events[7] |= 0x08;	/* Keypress Notification */
+		events[7] |= 0x10;	/* Remote Host Supported
+					 * Features Notification */
+	}
+
+	if (hdev->features[4] & LMP_LE)
+		events[7] |= 0x20;	/* LE Meta-Event */
+
+	hci_send_cmd(hdev, HCI_OP_SET_EVENT_MASK, sizeof(events), events);
+}
+
+static void hci_setup(struct hci_dev *hdev)
+{
+	hci_setup_event_mask(hdev);
+
+	if (hdev->lmp_ver > 1)
+		hci_send_cmd(hdev, HCI_OP_READ_LOCAL_COMMANDS, 0, NULL);
+
+	if (hdev->features[6] & LMP_SIMPLE_PAIR) {
+		u8 mode = 0x01;
+		hci_send_cmd(hdev, HCI_OP_WRITE_SSP_MODE, sizeof(mode), &mode);
+	}
+
+	if (hdev->features[3] & LMP_RSSI_INQ)
+		hci_setup_inquiry_mode(hdev);
+
+	if (hdev->features[7] & LMP_INQ_TX_PWR)
+		hci_send_cmd(hdev, HCI_OP_READ_INQ_RSP_TX_POWER, 0, NULL);
+}
+
 static void hci_cc_read_local_version(struct hci_dev *hdev, struct sk_buff *skb)
 {
 	struct hci_rp_read_local_version *rp = (void *) skb->data;
@@ -426,11 +544,34 @@ static void hci_cc_read_local_version(struct hci_dev *hdev, struct sk_buff *skb)
 
 	hdev->hci_ver = rp->hci_ver;
 	hdev->hci_rev = __le16_to_cpu(rp->hci_rev);
+	hdev->lmp_ver = rp->lmp_ver;
 	hdev->manufacturer = __le16_to_cpu(rp->manufacturer);
+	hdev->lmp_subver = __le16_to_cpu(rp->lmp_subver);
 
 	BT_DBG("%s manufacturer %d hci ver %d:%d", hdev->name,
 					hdev->manufacturer,
 					hdev->hci_ver, hdev->hci_rev);
+
+	if (test_bit(HCI_INIT, &hdev->flags))
+		hci_setup(hdev);
+}
+
+static void hci_setup_link_policy(struct hci_dev *hdev)
+{
+	u16 link_policy = 0;
+
+	if (hdev->features[0] & LMP_RSWITCH)
+		link_policy |= HCI_LP_RSWITCH;
+	if (hdev->features[0] & LMP_HOLD)
+		link_policy |= HCI_LP_HOLD;
+	if (hdev->features[0] & LMP_SNIFF)
+		link_policy |= HCI_LP_SNIFF;
+	if (hdev->features[1] & LMP_PARK)
+		link_policy |= HCI_LP_PARK;
+
+	link_policy = cpu_to_le16(link_policy);
+	hci_send_cmd(hdev, HCI_OP_WRITE_DEF_LINK_POLICY,
+					sizeof(link_policy), &link_policy);
 }
 
 static void hci_cc_read_local_commands(struct hci_dev *hdev, struct sk_buff *skb)
@@ -440,9 +581,15 @@ static void hci_cc_read_local_commands(struct hci_dev *hdev, struct sk_buff *skb
 	BT_DBG("%s status 0x%x", hdev->name, rp->status);
 
 	if (rp->status)
-		return;
+		goto done;
 
 	memcpy(hdev->commands, rp->commands, sizeof(hdev->commands));
+
+	if (test_bit(HCI_INIT, &hdev->flags) && (hdev->commands[5] & 0x10))
+		hci_setup_link_policy(hdev);
+
+done:
+	hci_req_complete(hdev, HCI_OP_READ_LOCAL_COMMANDS, rp->status);
 }
 
 static void hci_cc_read_local_features(struct hci_dev *hdev, struct sk_buff *skb)
@@ -548,6 +695,107 @@ static void hci_cc_write_ca_timeout(struct hci_dev *hdev, struct sk_buff *skb)
 	hci_req_complete(hdev, HCI_OP_WRITE_CA_TIMEOUT, status);
 }
 
+static void hci_cc_delete_stored_link_key(struct hci_dev *hdev,
+							struct sk_buff *skb)
+{
+	__u8 status = *((__u8 *) skb->data);
+
+	BT_DBG("%s status 0x%x", hdev->name, status);
+
+	hci_req_complete(hdev, HCI_OP_DELETE_STORED_LINK_KEY, status);
+}
+
+static void hci_cc_set_event_mask(struct hci_dev *hdev, struct sk_buff *skb)
+{
+	__u8 status = *((__u8 *) skb->data);
+
+	BT_DBG("%s status 0x%x", hdev->name, status);
+
+	hci_req_complete(hdev, HCI_OP_SET_EVENT_MASK, status);
+}
+
+static void hci_cc_write_inquiry_mode(struct hci_dev *hdev,
+							struct sk_buff *skb)
+{
+	__u8 status = *((__u8 *) skb->data);
+
+	BT_DBG("%s status 0x%x", hdev->name, status);
+
+	hci_req_complete(hdev, HCI_OP_WRITE_INQUIRY_MODE, status);
+}
+
+static void hci_cc_read_inq_rsp_tx_power(struct hci_dev *hdev,
+							struct sk_buff *skb)
+{
+	__u8 status = *((__u8 *) skb->data);
+
+	BT_DBG("%s status 0x%x", hdev->name, status);
+
+	hci_req_complete(hdev, HCI_OP_READ_INQ_RSP_TX_POWER, status);
+}
+
+static void hci_cc_set_event_flt(struct hci_dev *hdev, struct sk_buff *skb)
+{
+	__u8 status = *((__u8 *) skb->data);
+
+	BT_DBG("%s status 0x%x", hdev->name, status);
+
+	hci_req_complete(hdev, HCI_OP_SET_EVENT_FLT, status);
+}
+
+static void hci_cc_pin_code_reply(struct hci_dev *hdev, struct sk_buff *skb)
+{
+	struct hci_rp_pin_code_reply *rp = (void *) skb->data;
+	struct hci_cp_pin_code_reply *cp;
+	struct hci_conn *conn;
+
+	BT_DBG("%s status 0x%x", hdev->name, rp->status);
+
+	if (test_bit(HCI_MGMT, &hdev->flags))
+		mgmt_pin_code_reply_complete(hdev->id, &rp->bdaddr, rp->status);
+
+	if (rp->status != 0)
+		return;
+
+	cp = hci_sent_cmd_data(hdev, HCI_OP_PIN_CODE_REPLY);
+	if (!cp)
+		return;
+
+	conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &cp->bdaddr);
+	if (conn)
+		conn->pin_length = cp->pin_len;
+}
+
+static void hci_cc_pin_code_neg_reply(struct hci_dev *hdev, struct sk_buff *skb)
+{
+	struct hci_rp_pin_code_neg_reply *rp = (void *) skb->data;
+
+	BT_DBG("%s status 0x%x", hdev->name, rp->status);
+
+	if (test_bit(HCI_MGMT, &hdev->flags))
+		mgmt_pin_code_neg_reply_complete(hdev->id, &rp->bdaddr,
+								rp->status);
+}
+static void hci_cc_le_read_buffer_size(struct hci_dev *hdev,
+				       struct sk_buff *skb)
+{
+	struct hci_rp_le_read_buffer_size *rp = (void *) skb->data;
+
+	BT_DBG("%s status 0x%x", hdev->name, rp->status);
+
+	if (rp->status)
+		return;
+
+	hdev->le_mtu = __le16_to_cpu(rp->le_mtu);
+	hdev->le_pkts = rp->le_max_pkt;
+
+	hdev->le_cnt = hdev->le_pkts;
+
+	BT_DBG("%s le mtu %d:%d", hdev->name, hdev->le_mtu, hdev->le_pkts);
+
+	hci_req_complete(hdev, HCI_OP_LE_READ_BUFFER_SIZE, rp->status);
+}
+
 static inline void hci_cs_inquiry(struct hci_dev *hdev, __u8 status)
 {
 	BT_DBG("%s status 0x%x", hdev->name, status);
@@ -622,11 +870,14 @@ static void hci_cs_add_sco(struct hci_dev *hdev, __u8 status)
 	hci_dev_lock(hdev);
 
 	acl = hci_conn_hash_lookup_handle(hdev, handle);
-	if (acl && (sco = acl->link)) {
-		sco->state = BT_CLOSED;
+	if (acl) {
+		sco = acl->link;
+		if (sco) {
+			sco->state = BT_CLOSED;
 
-		hci_proto_connect_cfm(sco, status);
-		hci_conn_del(sco);
+			hci_proto_connect_cfm(sco, status);
+			hci_conn_del(sco);
+		}
 	}
 
 	hci_dev_unlock(hdev);
@@ -687,7 +938,7 @@ static void hci_cs_set_conn_encrypt(struct hci_dev *hdev, __u8 status)
 }
 
 static int hci_outgoing_auth_needed(struct hci_dev *hdev,
-						struct hci_conn *conn)
+							struct hci_conn *conn)
 {
 	if (conn->state != BT_CONFIG || !conn->out)
 		return 0;
@@ -808,11 +1059,14 @@ static void hci_cs_setup_sync_conn(struct hci_dev *hdev, __u8 status)
 	hci_dev_lock(hdev);
 
 	acl = hci_conn_hash_lookup_handle(hdev, handle);
-	if (acl && (sco = acl->link)) {
-		sco->state = BT_CLOSED;
+	if (acl) {
+		sco = acl->link;
+		if (sco) {
+			sco->state = BT_CLOSED;
 
-		hci_proto_connect_cfm(sco, status);
-		hci_conn_del(sco);
+			hci_proto_connect_cfm(sco, status);
+			hci_conn_del(sco);
+		}
 	}
 
 	hci_dev_unlock(hdev);
@@ -872,6 +1126,43 @@ static void hci_cs_exit_sniff_mode(struct hci_dev *hdev, __u8 status)
 	hci_dev_unlock(hdev);
 }
 
+static void hci_cs_le_create_conn(struct hci_dev *hdev, __u8 status)
+{
+	struct hci_cp_le_create_conn *cp;
+	struct hci_conn *conn;
+
+	BT_DBG("%s status 0x%x", hdev->name, status);
+
+	cp = hci_sent_cmd_data(hdev, HCI_OP_LE_CREATE_CONN);
+	if (!cp)
+		return;
+
+	hci_dev_lock(hdev);
+
+	conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, &cp->peer_addr);
+
+	BT_DBG("%s bdaddr %s conn %p", hdev->name, batostr(&cp->peer_addr),
+		conn);
+
+	if (status) {
+		if (conn && conn->state == BT_CONNECT) {
+			conn->state = BT_CLOSED;
+			hci_proto_connect_cfm(conn, status);
+			hci_conn_del(conn);
+		}
+	} else {
+		if (!conn) {
+			conn = hci_conn_add(hdev, LE_LINK, &cp->peer_addr);
+			if (conn)
+				conn->out = 1;
+			else
+				BT_ERR("No memory for new connection");
+		}
+	}
+
+	hci_dev_unlock(hdev);
+}
+
 static inline void hci_inquiry_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
 {
 	__u8 status = *((__u8 *) skb->data);
@@ -942,6 +1233,7 @@ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *s
 			conn->state = BT_CONFIG;
 			hci_conn_hold(conn);
 			conn->disc_timeout = HCI_DISCONN_TIMEOUT;
+			mgmt_connected(hdev->id, &ev->bdaddr);
 		} else
 			conn->state = BT_CONNECTED;
 
@@ -970,8 +1262,11 @@ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *s
 			hci_send_cmd(hdev, HCI_OP_CHANGE_CONN_PTYPE,
 							sizeof(cp), &cp);
 		}
-	} else
+	} else {
 		conn->state = BT_CLOSED;
+		if (conn->type == ACL_LINK)
+			mgmt_connect_failed(hdev->id, &ev->bdaddr, ev->status);
+	}
 
 	if (conn->type == ACL_LINK)
 		hci_sco_setup(conn, ev->status);
@@ -998,7 +1293,8 @@ static inline void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *sk
 
 	mask |= hci_proto_connect_ind(hdev, &ev->bdaddr, ev->link_type);
 
-	if ((mask & HCI_LM_ACCEPT) && !hci_blacklist_lookup(hdev, &ev->bdaddr)) {
+	if ((mask & HCI_LM_ACCEPT) &&
+			!hci_blacklist_lookup(hdev, &ev->bdaddr)) {
 		/* Connection accepted */
 		struct inquiry_entry *ie;
 		struct hci_conn *conn;
@@ -1068,19 +1364,26 @@ static inline void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff
 
 	BT_DBG("%s status %d", hdev->name, ev->status);
 
-	if (ev->status)
+	if (ev->status) {
+		mgmt_disconnect_failed(hdev->id);
 		return;
+	}
 
 	hci_dev_lock(hdev);
 
 	conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle));
-	if (conn) {
-		conn->state = BT_CLOSED;
+	if (!conn)
+		goto unlock;
 
-		hci_proto_disconn_cfm(conn, ev->reason);
-		hci_conn_del(conn);
-	}
+	conn->state = BT_CLOSED;
+
+	if (conn->type == ACL_LINK)
+		mgmt_disconnected(hdev->id, &conn->dst);
 
+	hci_proto_disconn_cfm(conn, ev->reason);
+	hci_conn_del(conn);
+
+unlock:
 	hci_dev_unlock(hdev);
 }
 
@@ -1393,11 +1696,46 @@ static inline void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *sk
 		hci_cc_write_ca_timeout(hdev, skb);
 		break;
 
+	case HCI_OP_DELETE_STORED_LINK_KEY:
+		hci_cc_delete_stored_link_key(hdev, skb);
+		break;
+
+	case HCI_OP_SET_EVENT_MASK:
+		hci_cc_set_event_mask(hdev, skb);
+		break;
+
+	case HCI_OP_WRITE_INQUIRY_MODE:
+		hci_cc_write_inquiry_mode(hdev, skb);
+		break;
+
+	case HCI_OP_READ_INQ_RSP_TX_POWER:
+		hci_cc_read_inq_rsp_tx_power(hdev, skb);
+		break;
+
+	case HCI_OP_SET_EVENT_FLT:
+		hci_cc_set_event_flt(hdev, skb);
+		break;
+
+	case HCI_OP_PIN_CODE_REPLY:
+		hci_cc_pin_code_reply(hdev, skb);
+		break;
+
+	case HCI_OP_PIN_CODE_NEG_REPLY:
+		hci_cc_pin_code_neg_reply(hdev, skb);
+		break;
+
+	case HCI_OP_LE_READ_BUFFER_SIZE:
+		hci_cc_le_read_buffer_size(hdev, skb);
+		break;
+
 	default:
 		BT_DBG("%s opcode 0x%x", hdev->name, opcode);
 		break;
 	}
 
+	if (ev->opcode != HCI_OP_NOP)
+		del_timer(&hdev->cmd_timer);
+
 	if (ev->ncmd) {
 		atomic_set(&hdev->cmd_cnt, 1);
 		if (!skb_queue_empty(&hdev->cmd_q))
@@ -1459,11 +1797,23 @@ static inline void hci_cmd_status_evt(struct hci_dev *hdev, struct sk_buff *skb)
 		hci_cs_exit_sniff_mode(hdev, ev->status);
 		break;
 
+	case HCI_OP_DISCONNECT:
+		if (ev->status != 0)
+			mgmt_disconnect_failed(hdev->id);
+		break;
+
+	case HCI_OP_LE_CREATE_CONN:
+		hci_cs_le_create_conn(hdev, ev->status);
+		break;
+
 	default:
 		BT_DBG("%s opcode 0x%x", hdev->name, opcode);
 		break;
 	}
 
+	if (ev->opcode != HCI_OP_NOP)
+		del_timer(&hdev->cmd_timer);
+
 	if (ev->ncmd) {
 		atomic_set(&hdev->cmd_cnt, 1);
 		if (!skb_queue_empty(&hdev->cmd_q))
@@ -1529,6 +1879,16 @@ static inline void hci_num_comp_pkts_evt(struct hci_dev *hdev, struct sk_buff *s
 				hdev->acl_cnt += count;
 				if (hdev->acl_cnt > hdev->acl_pkts)
 					hdev->acl_cnt = hdev->acl_pkts;
+			} else if (conn->type == LE_LINK) {
+				if (hdev->le_pkts) {
+					hdev->le_cnt += count;
+					if (hdev->le_cnt > hdev->le_pkts)
+						hdev->le_cnt = hdev->le_pkts;
+				} else {
+					hdev->acl_cnt += count;
+					if (hdev->acl_cnt > hdev->acl_pkts)
+						hdev->acl_cnt = hdev->acl_pkts;
+				}
 			} else {
 				hdev->sco_cnt += count;
 				if (hdev->sco_cnt > hdev->sco_pkts)
@@ -1586,18 +1946,72 @@ static inline void hci_pin_code_request_evt(struct hci_dev *hdev, struct sk_buff
 		hci_conn_put(conn);
 	}
 
+	if (!test_bit(HCI_PAIRABLE, &hdev->flags))
+		hci_send_cmd(hdev, HCI_OP_PIN_CODE_NEG_REPLY,
+					sizeof(ev->bdaddr), &ev->bdaddr);
+
+	if (test_bit(HCI_MGMT, &hdev->flags))
+		mgmt_pin_code_request(hdev->id, &ev->bdaddr);
+
 	hci_dev_unlock(hdev);
 }
 
 static inline void hci_link_key_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
 {
+	struct hci_ev_link_key_req *ev = (void *) skb->data;
+	struct hci_cp_link_key_reply cp;
+	struct hci_conn *conn;
+	struct link_key *key;
+
 	BT_DBG("%s", hdev->name);
+
+	if (!test_bit(HCI_LINK_KEYS, &hdev->flags))
+		return;
+
+	hci_dev_lock(hdev);
+
+	key = hci_find_link_key(hdev, &ev->bdaddr);
+	if (!key) {
+		BT_DBG("%s link key not found for %s", hdev->name,
+							batostr(&ev->bdaddr));
+		goto not_found;
+	}
+
+	BT_DBG("%s found key type %u for %s", hdev->name, key->type,
+							batostr(&ev->bdaddr));
+
+	if (!test_bit(HCI_DEBUG_KEYS, &hdev->flags) && key->type == 0x03) {
+		BT_DBG("%s ignoring debug key", hdev->name);
+		goto not_found;
+	}
+
+	conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr);
+
+	if (key->type == 0x04 && conn && conn->auth_type != 0xff &&
+						(conn->auth_type & 0x01)) {
+		BT_DBG("%s ignoring unauthenticated key", hdev->name);
+		goto not_found;
+	}
+
+	bacpy(&cp.bdaddr, &ev->bdaddr);
+	memcpy(cp.link_key, key->val, 16);
+
+	hci_send_cmd(hdev, HCI_OP_LINK_KEY_REPLY, sizeof(cp), &cp);
+
+	hci_dev_unlock(hdev);
+
+	return;
+
+not_found:
+	hci_send_cmd(hdev, HCI_OP_LINK_KEY_NEG_REPLY, 6, &ev->bdaddr);
+	hci_dev_unlock(hdev);
 }
 
 static inline void hci_link_key_notify_evt(struct hci_dev *hdev, struct sk_buff *skb)
 {
 	struct hci_ev_link_key_notify *ev = (void *) skb->data;
 	struct hci_conn *conn;
+	u8 pin_len = 0;
 
 	BT_DBG("%s", hdev->name);
 
@@ -1607,9 +2021,14 @@ static inline void hci_link_key_notify_evt(struct hci_dev *hdev, struct sk_buff
 	if (conn) {
 		hci_conn_hold(conn);
 		conn->disc_timeout = HCI_DISCONN_TIMEOUT;
+		pin_len = conn->pin_length;
 		hci_conn_put(conn);
 	}
 
+	if (test_bit(HCI_LINK_KEYS, &hdev->flags))
+		hci_add_link_key(hdev, 1, &ev->bdaddr, ev->link_key,
+							ev->key_type, pin_len);
+
 	hci_dev_unlock(hdev);
 }
 
@@ -1683,7 +2102,8 @@ static inline void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev, struct
 	hci_dev_lock(hdev);
 
 	if ((skb->len - 1) / num_rsp != sizeof(struct inquiry_info_with_rssi)) {
-		struct inquiry_info_with_rssi_and_pscan_mode *info = (void *) (skb->data + 1);
+		struct inquiry_info_with_rssi_and_pscan_mode *info;
+		info = (void *) (skb->data + 1);
 
 		for (; num_rsp; num_rsp--) {
 			bacpy(&data.bdaddr, &info->bdaddr);
@@ -1824,17 +2244,8 @@ static inline void hci_sync_conn_changed_evt(struct hci_dev *hdev, struct sk_buf
 static inline void hci_sniff_subrate_evt(struct hci_dev *hdev, struct sk_buff *skb)
 {
 	struct hci_ev_sniff_subrate *ev = (void *) skb->data;
-	struct hci_conn *conn;
 
 	BT_DBG("%s status %d", hdev->name, ev->status);
-
-	hci_dev_lock(hdev);
-
-	conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle));
-	if (conn) {
-	}
-
-	hci_dev_unlock(hdev);
 }
 
 static inline void hci_extended_inquiry_result_evt(struct hci_dev *hdev, struct sk_buff *skb)
@@ -1852,12 +2263,12 @@ static inline void hci_extended_inquiry_result_evt(struct hci_dev *hdev, struct
 
 	for (; num_rsp; num_rsp--) {
 		bacpy(&data.bdaddr, &info->bdaddr);
-		data.pscan_rep_mode     = info->pscan_rep_mode;
-		data.pscan_period_mode  = info->pscan_period_mode;
-		data.pscan_mode         = 0x00;
+		data.pscan_rep_mode	= info->pscan_rep_mode;
+		data.pscan_period_mode	= info->pscan_period_mode;
+		data.pscan_mode		= 0x00;
 		memcpy(data.dev_class, info->dev_class, 3);
-		data.clock_offset       = info->clock_offset;
-		data.rssi               = info->rssi;
+		data.clock_offset	= info->clock_offset;
+		data.rssi		= info->rssi;
 		data.ssp_mode		= 0x01;
 		info++;
 		hci_inquiry_cache_update(hdev, &data);
@@ -1866,6 +2277,25 @@ static inline void hci_extended_inquiry_result_evt(struct hci_dev *hdev, struct
 	hci_dev_unlock(hdev);
 }
 
+static inline u8 hci_get_auth_req(struct hci_conn *conn)
+{
+	/* If remote requests dedicated bonding follow that lead */
+	if (conn->remote_auth == 0x02 || conn->remote_auth == 0x03) {
+		/* If both remote and local IO capabilities allow MITM
+		 * protection then require it, otherwise don't */
+		if (conn->remote_cap == 0x03 || conn->io_capability == 0x03)
+			return 0x02;
+		else
+			return 0x03;
+	}
+
+	/* If remote requests no-bonding follow that lead */
+	if (conn->remote_auth == 0x00 || conn->remote_auth == 0x01)
+		return 0x00;
+
+	return conn->auth_type;
+}
+
 static inline void hci_io_capa_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
 {
 	struct hci_ev_io_capa_request *ev = (void *) skb->data;
@@ -1876,9 +2306,59 @@ static inline void hci_io_capa_request_evt(struct hci_dev *hdev, struct sk_buff
 	hci_dev_lock(hdev);
 
 	conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr);
-	if (conn)
-		hci_conn_hold(conn);
+	if (!conn)
+		goto unlock;
+
+	hci_conn_hold(conn);
+
+	if (!test_bit(HCI_MGMT, &hdev->flags))
+		goto unlock;
+
+	if (test_bit(HCI_PAIRABLE, &hdev->flags) ||
+			(conn->remote_auth & ~0x01) == HCI_AT_NO_BONDING) {
+		struct hci_cp_io_capability_reply cp;
+
+		bacpy(&cp.bdaddr, &ev->bdaddr);
+		cp.capability = conn->io_capability;
+		cp.oob_data = 0;
+		cp.authentication = hci_get_auth_req(conn);
+
+		hci_send_cmd(hdev, HCI_OP_IO_CAPABILITY_REPLY,
+							sizeof(cp), &cp);
+	} else {
+		struct hci_cp_io_capability_neg_reply cp;
+
+		bacpy(&cp.bdaddr, &ev->bdaddr);
+		cp.reason = 0x16; /* Pairing not allowed */
+
+		hci_send_cmd(hdev, HCI_OP_IO_CAPABILITY_NEG_REPLY,
+							sizeof(cp), &cp);
+	}
+
+unlock:
+	hci_dev_unlock(hdev);
+}
+
+static inline void hci_io_capa_reply_evt(struct hci_dev *hdev, struct sk_buff *skb)
+{
+	struct hci_ev_io_capa_reply *ev = (void *) skb->data;
+	struct hci_conn *conn;
+
+	BT_DBG("%s", hdev->name);
+
+	hci_dev_lock(hdev);
 
+	conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr);
+	if (!conn)
+		goto unlock;
+
+	hci_conn_hold(conn);
+
+	conn->remote_cap = ev->capability;
+	conn->remote_oob = ev->oob_data;
+	conn->remote_auth = ev->authentication;
+
+unlock:
 	hci_dev_unlock(hdev);
 }
 
@@ -1914,6 +2394,60 @@ static inline void hci_remote_host_features_evt(struct hci_dev *hdev, struct sk_
 	hci_dev_unlock(hdev);
 }
 
+static inline void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
+{
+	struct hci_ev_le_conn_complete *ev = (void *) skb->data;
+	struct hci_conn *conn;
+
+	BT_DBG("%s status %d", hdev->name, ev->status);
+
+	hci_dev_lock(hdev);
+
+	conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, &ev->bdaddr);
+	if (!conn) {
+		conn = hci_conn_add(hdev, LE_LINK, &ev->bdaddr);
+		if (!conn) {
+			BT_ERR("No memory for new connection");
+			hci_dev_unlock(hdev);
+			return;
+		}
+	}
+
+	if (ev->status) {
+		hci_proto_connect_cfm(conn, ev->status);
+		conn->state = BT_CLOSED;
+		hci_conn_del(conn);
+		goto unlock;
+	}
+
+	conn->handle = __le16_to_cpu(ev->handle);
+	conn->state = BT_CONNECTED;
+
+	hci_conn_hold_device(conn);
+	hci_conn_add_sysfs(conn);
+
+	hci_proto_connect_cfm(conn, ev->status);
+
+unlock:
+	hci_dev_unlock(hdev);
+}
+
+static inline void hci_le_meta_evt(struct hci_dev *hdev, struct sk_buff *skb)
+{
+	struct hci_ev_le_meta *le_ev = (void *) skb->data;
+
+	skb_pull(skb, sizeof(*le_ev));
+
+	switch (le_ev->subevent) {
+	case HCI_EV_LE_CONN_COMPLETE:
+		hci_le_conn_complete_evt(hdev, skb);
+		break;
+
+	default:
+		break;
+	}
+}
+
 void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb)
 {
 	struct hci_event_hdr *hdr = (void *) skb->data;
@@ -2042,6 +2576,10 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb)
 		hci_io_capa_request_evt(hdev, skb);
 		break;
 
+	case HCI_EV_IO_CAPA_REPLY:
+		hci_io_capa_reply_evt(hdev, skb);
+		break;
+
 	case HCI_EV_SIMPLE_PAIR_COMPLETE:
 		hci_simple_pair_complete_evt(hdev, skb);
 		break;
@@ -2050,6 +2588,10 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb)
 		hci_remote_host_features_evt(hdev, skb);
 		break;
 
+	case HCI_EV_LE_META:
+		hci_le_meta_evt(hdev, skb);
+		break;
+
 	default:
 		BT_DBG("%s event 0x%x", hdev->name, event);
 		break;
@@ -2083,6 +2625,6 @@ void hci_si_event(struct hci_dev *hdev, int type, int dlen, void *data)
 
 	bt_cb(skb)->pkt_type = HCI_EVENT_PKT;
 	skb->dev = (void *) hdev;
-	hci_send_to_sock(hdev, skb);
+	hci_send_to_sock(hdev, skb, NULL);
 	kfree_skb(skb);
 }
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 29827c7..d50e961 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -85,7 +85,8 @@ static struct bt_sock_list hci_sk_list = {
 };
 
 /* Send frame to RAW socket */
-void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb)
+void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb,
+							struct sock *skip_sk)
 {
 	struct sock *sk;
 	struct hlist_node *node;
@@ -97,6 +98,9 @@ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb)
 		struct hci_filter *flt;
 		struct sk_buff *nskb;
 
+		if (sk == skip_sk)
+			continue;
+
 		if (sk->sk_state != BT_BOUND || hci_pi(sk)->hdev != hdev)
 			continue;
 
diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index 5fce3d6..3c838a6 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -11,7 +11,7 @@
 
 static struct class *bt_class;
 
-struct dentry *bt_debugfs = NULL;
+struct dentry *bt_debugfs;
 EXPORT_SYMBOL_GPL(bt_debugfs);
 
 static inline char *link_typetostr(int type)
@@ -51,8 +51,8 @@ static ssize_t show_link_features(struct device *dev, struct device_attribute *a
 				conn->features[6], conn->features[7]);
 }
 
-#define LINK_ATTR(_name,_mode,_show,_store) \
-struct device_attribute link_attr_##_name = __ATTR(_name,_mode,_show,_store)
+#define LINK_ATTR(_name, _mode, _show, _store) \
+struct device_attribute link_attr_##_name = __ATTR(_name, _mode, _show, _store)
 
 static LINK_ATTR(type, S_IRUGO, show_link_type, NULL);
 static LINK_ATTR(address, S_IRUGO, show_link_address, NULL);
@@ -461,6 +461,56 @@ static const struct file_operations blacklist_fops = {
 	.llseek		= seq_lseek,
 	.release	= single_release,
 };
+
+static void print_bt_uuid(struct seq_file *f, u8 *uuid)
+{
+	u32 data0, data4;
+	u16 data1, data2, data3, data5;
+
+	memcpy(&data0, &uuid[0], 4);
+	memcpy(&data1, &uuid[4], 2);
+	memcpy(&data2, &uuid[6], 2);
+	memcpy(&data3, &uuid[8], 2);
+	memcpy(&data4, &uuid[10], 4);
+	memcpy(&data5, &uuid[14], 2);
+
+	seq_printf(f, "%.8x-%.4x-%.4x-%.4x-%.8x%.4x\n",
+				ntohl(data0), ntohs(data1), ntohs(data2),
+				ntohs(data3), ntohl(data4), ntohs(data5));
+}
+
+static int uuids_show(struct seq_file *f, void *p)
+{
+	struct hci_dev *hdev = f->private;
+	struct list_head *l;
+
+	hci_dev_lock_bh(hdev);
+
+	list_for_each(l, &hdev->uuids) {
+		struct bt_uuid *uuid;
+
+		uuid = list_entry(l, struct bt_uuid, list);
+
+		print_bt_uuid(f, uuid->uuid);
+	}
+
+	hci_dev_unlock_bh(hdev);
+
+	return 0;
+}
+
+static int uuids_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, uuids_show, inode->i_private);
+}
+
+static const struct file_operations uuids_fops = {
+	.open		= uuids_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
 int hci_register_sysfs(struct hci_dev *hdev)
 {
 	struct device *dev = &hdev->dev;
@@ -493,6 +543,8 @@ int hci_register_sysfs(struct hci_dev *hdev)
 	debugfs_create_file("blacklist", 0444, hdev->debugfs,
 						hdev, &blacklist_fops);
 
+	debugfs_create_file("uuids", 0444, hdev->debugfs, hdev, &uuids_fops);
+
 	return 0;
 }
 
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index 29544c2..2429ca2 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -157,7 +157,8 @@ static int hidp_queue_event(struct hidp_session *session, struct input_dev *dev,
 
 	session->leds = newleds;
 
-	if (!(skb = alloc_skb(3, GFP_ATOMIC))) {
+	skb = alloc_skb(3, GFP_ATOMIC);
+	if (!skb) {
 		BT_ERR("Can't allocate memory for new frame");
 		return -ENOMEM;
 	}
@@ -250,7 +251,8 @@ static int __hidp_send_ctrl_message(struct hidp_session *session,
 
 	BT_DBG("session %p data %p size %d", session, data, size);
 
-	if (!(skb = alloc_skb(size + 1, GFP_ATOMIC))) {
+	skb = alloc_skb(size + 1, GFP_ATOMIC);
+	if (!skb) {
 		BT_ERR("Can't allocate memory for new frame");
 		return -ENOMEM;
 	}
@@ -283,7 +285,8 @@ static int hidp_queue_report(struct hidp_session *session,
 
 	BT_DBG("session %p hid %p data %p size %d", session, session->hid, data, size);
 
-	if (!(skb = alloc_skb(size + 1, GFP_ATOMIC))) {
+	skb = alloc_skb(size + 1, GFP_ATOMIC);
+	if (!skb) {
 		BT_ERR("Can't allocate memory for new frame");
 		return -ENOMEM;
 	}
@@ -1016,8 +1019,6 @@ static int __init hidp_init(void)
 {
 	int ret;
 
-	l2cap_load();
-
 	BT_INFO("HIDP (Human Interface Emulation) ver %s", VERSION);
 
 	ret = hid_register_driver(&hidp_driver);
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap_core.c
index 675614e..efcef0d 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap_core.c
@@ -24,7 +24,7 @@
    SOFTWARE IS DISCLAIMED.
 */
 
-/* Bluetooth L2CAP core and sockets. */
+/* Bluetooth L2CAP core. */
 
 #include <linux/module.h>
 
@@ -55,79 +55,24 @@
 #include <net/bluetooth/hci_core.h>
 #include <net/bluetooth/l2cap.h>
 
-#define VERSION "2.15"
-
-static int disable_ertm;
+int disable_ertm;
 
 static u32 l2cap_feat_mask = L2CAP_FEAT_FIXED_CHAN;
 static u8 l2cap_fixed_chan[8] = { 0x02, };
 
-static const struct proto_ops l2cap_sock_ops;
-
 static struct workqueue_struct *_busy_wq;
 
-static struct bt_sock_list l2cap_sk_list = {
+struct bt_sock_list l2cap_sk_list = {
 	.lock = __RW_LOCK_UNLOCKED(l2cap_sk_list.lock)
 };
 
 static void l2cap_busy_work(struct work_struct *work);
 
-static void __l2cap_sock_close(struct sock *sk, int reason);
-static void l2cap_sock_close(struct sock *sk);
-static void l2cap_sock_kill(struct sock *sk);
-
-static int l2cap_build_conf_req(struct sock *sk, void *data);
 static struct sk_buff *l2cap_build_cmd(struct l2cap_conn *conn,
 				u8 code, u8 ident, u16 dlen, void *data);
 
 static int l2cap_ertm_data_rcv(struct sock *sk, struct sk_buff *skb);
 
-/* ---- L2CAP timers ---- */
-static void l2cap_sock_set_timer(struct sock *sk, long timeout)
-{
-	BT_DBG("sk %p state %d timeout %ld", sk, sk->sk_state, timeout);
-	sk_reset_timer(sk, &sk->sk_timer, jiffies + timeout);
-}
-
-static void l2cap_sock_clear_timer(struct sock *sk)
-{
-	BT_DBG("sock %p state %d", sk, sk->sk_state);
-	sk_stop_timer(sk, &sk->sk_timer);
-}
-
-static void l2cap_sock_timeout(unsigned long arg)
-{
-	struct sock *sk = (struct sock *) arg;
-	int reason;
-
-	BT_DBG("sock %p state %d", sk, sk->sk_state);
-
-	bh_lock_sock(sk);
-
-	if (sock_owned_by_user(sk)) {
-		/* sk is owned by user. Try again later */
-		l2cap_sock_set_timer(sk, HZ / 5);
-		bh_unlock_sock(sk);
-		sock_put(sk);
-		return;
-	}
-
-	if (sk->sk_state == BT_CONNECTED || sk->sk_state == BT_CONFIG)
-		reason = ECONNREFUSED;
-	else if (sk->sk_state == BT_CONNECT &&
-				l2cap_pi(sk)->sec_level != BT_SECURITY_SDP)
-		reason = ECONNREFUSED;
-	else
-		reason = ETIMEDOUT;
-
-	__l2cap_sock_close(sk, reason);
-
-	bh_unlock_sock(sk);
-
-	l2cap_sock_kill(sk);
-	sock_put(sk);
-}
-
 /* ---- L2CAP channels ---- */
 static struct sock *__l2cap_get_chan_by_dcid(struct l2cap_chan_list *l, u16 cid)
 {
@@ -236,8 +181,16 @@ static void __l2cap_chan_add(struct l2cap_conn *conn, struct sock *sk, struct so
 	l2cap_pi(sk)->conn = conn;
 
 	if (sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM) {
-		/* Alloc CID for connection-oriented socket */
-		l2cap_pi(sk)->scid = l2cap_alloc_cid(l);
+		if (conn->hcon->type == LE_LINK) {
+			/* LE connection */
+			l2cap_pi(sk)->omtu = L2CAP_LE_DEFAULT_MTU;
+			l2cap_pi(sk)->scid = L2CAP_CID_LE_DATA;
+			l2cap_pi(sk)->dcid = L2CAP_CID_LE_DATA;
+		} else {
+			/* Alloc CID for connection-oriented socket */
+			l2cap_pi(sk)->scid = l2cap_alloc_cid(l);
+			l2cap_pi(sk)->omtu = L2CAP_DEFAULT_MTU;
+		}
 	} else if (sk->sk_type == SOCK_DGRAM) {
 		/* Connectionless socket */
 		l2cap_pi(sk)->scid = L2CAP_CID_CONN_LESS;
@@ -258,7 +211,7 @@ static void __l2cap_chan_add(struct l2cap_conn *conn, struct sock *sk, struct so
 
 /* Delete channel.
  * Must be called on the locked socket. */
-static void l2cap_chan_del(struct sock *sk, int err)
+void l2cap_chan_del(struct sock *sk, int err)
 {
 	struct l2cap_conn *conn = l2cap_pi(sk)->conn;
 	struct sock *parent = bt_sk(sk)->parent;
@@ -348,7 +301,7 @@ static inline int l2cap_check_security(struct sock *sk)
 								auth_type);
 }
 
-static inline u8 l2cap_get_ident(struct l2cap_conn *conn)
+u8 l2cap_get_ident(struct l2cap_conn *conn)
 {
 	u8 id;
 
@@ -370,16 +323,22 @@ static inline u8 l2cap_get_ident(struct l2cap_conn *conn)
 	return id;
 }
 
-static inline void l2cap_send_cmd(struct l2cap_conn *conn, u8 ident, u8 code, u16 len, void *data)
+void l2cap_send_cmd(struct l2cap_conn *conn, u8 ident, u8 code, u16 len, void *data)
 {
 	struct sk_buff *skb = l2cap_build_cmd(conn, code, ident, len, data);
+	u8 flags;
 
 	BT_DBG("code 0x%2.2x", code);
 
 	if (!skb)
 		return;
 
-	hci_send_acl(conn->hcon, skb, 0);
+	if (lmp_no_flush_capable(conn->hcon->hdev))
+		flags = ACL_START_NO_FLUSH;
+	else
+		flags = ACL_START;
+
+	hci_send_acl(conn->hcon, skb, flags);
 }
 
 static inline void l2cap_send_sframe(struct l2cap_pinfo *pi, u16 control)
@@ -389,6 +348,7 @@ static inline void l2cap_send_sframe(struct l2cap_pinfo *pi, u16 control)
 	struct l2cap_conn *conn = pi->conn;
 	struct sock *sk = (struct sock *)pi;
 	int count, hlen = L2CAP_HDR_SIZE + 2;
+	u8 flags;
 
 	if (sk->sk_state != BT_CONNECTED)
 		return;
@@ -425,7 +385,12 @@ static inline void l2cap_send_sframe(struct l2cap_pinfo *pi, u16 control)
 		put_unaligned_le16(fcs, skb_put(skb, 2));
 	}
 
-	hci_send_acl(pi->conn->hcon, skb, 0);
+	if (lmp_no_flush_capable(conn->hcon->hdev))
+		flags = ACL_START_NO_FLUSH;
+	else
+		flags = ACL_START;
+
+	hci_send_acl(pi->conn->hcon, skb, flags);
 }
 
 static inline void l2cap_send_rr_or_rnr(struct l2cap_pinfo *pi, u16 control)
@@ -496,7 +461,7 @@ static inline int l2cap_mode_supported(__u8 mode, __u32 feat_mask)
 	}
 }
 
-static void l2cap_send_disconn_req(struct l2cap_conn *conn, struct sock *sk, int err)
+void l2cap_send_disconn_req(struct l2cap_conn *conn, struct sock *sk, int err)
 {
 	struct l2cap_disconn_req req;
 
@@ -624,6 +589,82 @@ static void l2cap_conn_start(struct l2cap_conn *conn)
 	}
 }
 
+/* Find socket with cid and source bdaddr.
+ * Returns closest match, locked.
+ */
+static struct sock *l2cap_get_sock_by_scid(int state, __le16 cid, bdaddr_t *src)
+{
+	struct sock *s, *sk = NULL, *sk1 = NULL;
+	struct hlist_node *node;
+
+	read_lock(&l2cap_sk_list.lock);
+
+	sk_for_each(sk, node, &l2cap_sk_list.head) {
+		if (state && sk->sk_state != state)
+			continue;
+
+		if (l2cap_pi(sk)->scid == cid) {
+			/* Exact match. */
+			if (!bacmp(&bt_sk(sk)->src, src))
+				break;
+
+			/* Closest match */
+			if (!bacmp(&bt_sk(sk)->src, BDADDR_ANY))
+				sk1 = sk;
+		}
+	}
+	s = node ? sk : sk1;
+	if (s)
+		bh_lock_sock(s);
+	read_unlock(&l2cap_sk_list.lock);
+
+	return s;
+}
+
+static void l2cap_le_conn_ready(struct l2cap_conn *conn)
+{
+	struct l2cap_chan_list *list = &conn->chan_list;
+	struct sock *parent, *uninitialized_var(sk);
+
+	BT_DBG("");
+
+	/* Check if we have socket listening on cid */
+	parent = l2cap_get_sock_by_scid(BT_LISTEN, L2CAP_CID_LE_DATA,
+							conn->src);
+	if (!parent)
+		return;
+
+	/* Check for backlog size */
+	if (sk_acceptq_is_full(parent)) {
+		BT_DBG("backlog full %d", parent->sk_ack_backlog);
+		goto clean;
+	}
+
+	sk = l2cap_sock_alloc(sock_net(parent), NULL, BTPROTO_L2CAP, GFP_ATOMIC);
+	if (!sk)
+		goto clean;
+
+	write_lock_bh(&list->lock);
+
+	hci_conn_hold(conn->hcon);
+
+	l2cap_sock_init(sk, parent);
+	bacpy(&bt_sk(sk)->src, conn->src);
+	bacpy(&bt_sk(sk)->dst, conn->dst);
+
+	__l2cap_chan_add(conn, sk, parent);
+
+	l2cap_sock_set_timer(sk, sk->sk_sndtimeo);
+
+	sk->sk_state = BT_CONNECTED;
+	parent->sk_data_ready(parent, 0);
+
+	write_unlock_bh(&list->lock);
+
+clean:
+	bh_unlock_sock(parent);
+}
+
 static void l2cap_conn_ready(struct l2cap_conn *conn)
 {
 	struct l2cap_chan_list *l = &conn->chan_list;
@@ -631,11 +672,20 @@ static void l2cap_conn_ready(struct l2cap_conn *conn)
 
 	BT_DBG("conn %p", conn);
 
+	if (!conn->hcon->out && conn->hcon->type == LE_LINK)
+		l2cap_le_conn_ready(conn);
+
 	read_lock(&l->lock);
 
 	for (sk = l->head; sk; sk = l2cap_pi(sk)->next_c) {
 		bh_lock_sock(sk);
 
+		if (conn->hcon->type == LE_LINK) {
+			l2cap_sock_clear_timer(sk);
+			sk->sk_state = BT_CONNECTED;
+			sk->sk_state_change(sk);
+		}
+
 		if (sk->sk_type != SOCK_SEQPACKET &&
 				sk->sk_type != SOCK_STREAM) {
 			l2cap_sock_clear_timer(sk);
@@ -694,7 +744,11 @@ static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon, u8 status)
 
 	BT_DBG("hcon %p conn %p", hcon, conn);
 
-	conn->mtu = hcon->hdev->acl_mtu;
+	if (hcon->hdev->le_mtu && hcon->type == LE_LINK)
+		conn->mtu = hcon->hdev->le_mtu;
+	else
+		conn->mtu = hcon->hdev->acl_mtu;
+
 	conn->src = &hcon->hdev->bdaddr;
 	conn->dst = &hcon->dst;
 
@@ -703,7 +757,8 @@ static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon, u8 status)
 	spin_lock_init(&conn->lock);
 	rwlock_init(&conn->chan_list.lock);
 
-	setup_timer(&conn->info_timer, l2cap_info_timeout,
+	if (hcon->type != LE_LINK)
+		setup_timer(&conn->info_timer, l2cap_info_timeout,
 						(unsigned long) conn);
 
 	conn->disc_reason = 0x13;
@@ -747,17 +802,6 @@ static inline void l2cap_chan_add(struct l2cap_conn *conn, struct sock *sk, stru
 }
 
 /* ---- Socket interface ---- */
-static struct sock *__l2cap_get_sock_by_addr(__le16 psm, bdaddr_t *src)
-{
-	struct sock *sk;
-	struct hlist_node *node;
-	sk_for_each(sk, node, &l2cap_sk_list.head)
-		if (l2cap_pi(sk)->sport == psm && !bacmp(&bt_sk(sk)->src, src))
-			goto found;
-	sk = NULL;
-found:
-	return sk;
-}
 
 /* Find socket with psm and source bdaddr.
  * Returns closest match.
@@ -789,277 +833,7 @@ static struct sock *l2cap_get_sock_by_psm(int state, __le16 psm, bdaddr_t *src)
 	return node ? sk : sk1;
 }
 
-static void l2cap_sock_destruct(struct sock *sk)
-{
-	BT_DBG("sk %p", sk);
-
-	skb_queue_purge(&sk->sk_receive_queue);
-	skb_queue_purge(&sk->sk_write_queue);
-}
-
-static void l2cap_sock_cleanup_listen(struct sock *parent)
-{
-	struct sock *sk;
-
-	BT_DBG("parent %p", parent);
-
-	/* Close not yet accepted channels */
-	while ((sk = bt_accept_dequeue(parent, NULL)))
-		l2cap_sock_close(sk);
-
-	parent->sk_state = BT_CLOSED;
-	sock_set_flag(parent, SOCK_ZAPPED);
-}
-
-/* Kill socket (only if zapped and orphan)
- * Must be called on unlocked socket.
- */
-static void l2cap_sock_kill(struct sock *sk)
-{
-	if (!sock_flag(sk, SOCK_ZAPPED) || sk->sk_socket)
-		return;
-
-	BT_DBG("sk %p state %d", sk, sk->sk_state);
-
-	/* Kill poor orphan */
-	bt_sock_unlink(&l2cap_sk_list, sk);
-	sock_set_flag(sk, SOCK_DEAD);
-	sock_put(sk);
-}
-
-static void __l2cap_sock_close(struct sock *sk, int reason)
-{
-	BT_DBG("sk %p state %d socket %p", sk, sk->sk_state, sk->sk_socket);
-
-	switch (sk->sk_state) {
-	case BT_LISTEN:
-		l2cap_sock_cleanup_listen(sk);
-		break;
-
-	case BT_CONNECTED:
-	case BT_CONFIG:
-		if (sk->sk_type == SOCK_SEQPACKET ||
-				sk->sk_type == SOCK_STREAM) {
-			struct l2cap_conn *conn = l2cap_pi(sk)->conn;
-
-			l2cap_sock_set_timer(sk, sk->sk_sndtimeo);
-			l2cap_send_disconn_req(conn, sk, reason);
-		} else
-			l2cap_chan_del(sk, reason);
-		break;
-
-	case BT_CONNECT2:
-		if (sk->sk_type == SOCK_SEQPACKET ||
-				sk->sk_type == SOCK_STREAM) {
-			struct l2cap_conn *conn = l2cap_pi(sk)->conn;
-			struct l2cap_conn_rsp rsp;
-			__u16 result;
-
-			if (bt_sk(sk)->defer_setup)
-				result = L2CAP_CR_SEC_BLOCK;
-			else
-				result = L2CAP_CR_BAD_PSM;
-			sk->sk_state = BT_DISCONN;
-
-			rsp.scid   = cpu_to_le16(l2cap_pi(sk)->dcid);
-			rsp.dcid   = cpu_to_le16(l2cap_pi(sk)->scid);
-			rsp.result = cpu_to_le16(result);
-			rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO);
-			l2cap_send_cmd(conn, l2cap_pi(sk)->ident,
-					L2CAP_CONN_RSP, sizeof(rsp), &rsp);
-		} else
-			l2cap_chan_del(sk, reason);
-		break;
-
-	case BT_CONNECT:
-	case BT_DISCONN:
-		l2cap_chan_del(sk, reason);
-		break;
-
-	default:
-		sock_set_flag(sk, SOCK_ZAPPED);
-		break;
-	}
-}
-
-/* Must be called on unlocked socket. */
-static void l2cap_sock_close(struct sock *sk)
-{
-	l2cap_sock_clear_timer(sk);
-	lock_sock(sk);
-	__l2cap_sock_close(sk, ECONNRESET);
-	release_sock(sk);
-	l2cap_sock_kill(sk);
-}
-
-static void l2cap_sock_init(struct sock *sk, struct sock *parent)
-{
-	struct l2cap_pinfo *pi = l2cap_pi(sk);
-
-	BT_DBG("sk %p", sk);
-
-	if (parent) {
-		sk->sk_type = parent->sk_type;
-		bt_sk(sk)->defer_setup = bt_sk(parent)->defer_setup;
-
-		pi->imtu = l2cap_pi(parent)->imtu;
-		pi->omtu = l2cap_pi(parent)->omtu;
-		pi->conf_state = l2cap_pi(parent)->conf_state;
-		pi->mode = l2cap_pi(parent)->mode;
-		pi->fcs  = l2cap_pi(parent)->fcs;
-		pi->max_tx = l2cap_pi(parent)->max_tx;
-		pi->tx_win = l2cap_pi(parent)->tx_win;
-		pi->sec_level = l2cap_pi(parent)->sec_level;
-		pi->role_switch = l2cap_pi(parent)->role_switch;
-		pi->force_reliable = l2cap_pi(parent)->force_reliable;
-	} else {
-		pi->imtu = L2CAP_DEFAULT_MTU;
-		pi->omtu = 0;
-		if (!disable_ertm && sk->sk_type == SOCK_STREAM) {
-			pi->mode = L2CAP_MODE_ERTM;
-			pi->conf_state |= L2CAP_CONF_STATE2_DEVICE;
-		} else {
-			pi->mode = L2CAP_MODE_BASIC;
-		}
-		pi->max_tx = L2CAP_DEFAULT_MAX_TX;
-		pi->fcs  = L2CAP_FCS_CRC16;
-		pi->tx_win = L2CAP_DEFAULT_TX_WINDOW;
-		pi->sec_level = BT_SECURITY_LOW;
-		pi->role_switch = 0;
-		pi->force_reliable = 0;
-	}
-
-	/* Default config options */
-	pi->conf_len = 0;
-	pi->flush_to = L2CAP_DEFAULT_FLUSH_TO;
-	skb_queue_head_init(TX_QUEUE(sk));
-	skb_queue_head_init(SREJ_QUEUE(sk));
-	skb_queue_head_init(BUSY_QUEUE(sk));
-	INIT_LIST_HEAD(SREJ_LIST(sk));
-}
-
-static struct proto l2cap_proto = {
-	.name		= "L2CAP",
-	.owner		= THIS_MODULE,
-	.obj_size	= sizeof(struct l2cap_pinfo)
-};
-
-static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio)
-{
-	struct sock *sk;
-
-	sk = sk_alloc(net, PF_BLUETOOTH, prio, &l2cap_proto);
-	if (!sk)
-		return NULL;
-
-	sock_init_data(sock, sk);
-	INIT_LIST_HEAD(&bt_sk(sk)->accept_q);
-
-	sk->sk_destruct = l2cap_sock_destruct;
-	sk->sk_sndtimeo = msecs_to_jiffies(L2CAP_CONN_TIMEOUT);
-
-	sock_reset_flag(sk, SOCK_ZAPPED);
-
-	sk->sk_protocol = proto;
-	sk->sk_state = BT_OPEN;
-
-	setup_timer(&sk->sk_timer, l2cap_sock_timeout, (unsigned long) sk);
-
-	bt_sock_link(&l2cap_sk_list, sk);
-	return sk;
-}
-
-static int l2cap_sock_create(struct net *net, struct socket *sock, int protocol,
-			     int kern)
-{
-	struct sock *sk;
-
-	BT_DBG("sock %p", sock);
-
-	sock->state = SS_UNCONNECTED;
-
-	if (sock->type != SOCK_SEQPACKET && sock->type != SOCK_STREAM &&
-			sock->type != SOCK_DGRAM && sock->type != SOCK_RAW)
-		return -ESOCKTNOSUPPORT;
-
-	if (sock->type == SOCK_RAW && !kern && !capable(CAP_NET_RAW))
-		return -EPERM;
-
-	sock->ops = &l2cap_sock_ops;
-
-	sk = l2cap_sock_alloc(net, sock, protocol, GFP_ATOMIC);
-	if (!sk)
-		return -ENOMEM;
-
-	l2cap_sock_init(sk, NULL);
-	return 0;
-}
-
-static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int alen)
-{
-	struct sock *sk = sock->sk;
-	struct sockaddr_l2 la;
-	int len, err = 0;
-
-	BT_DBG("sk %p", sk);
-
-	if (!addr || addr->sa_family != AF_BLUETOOTH)
-		return -EINVAL;
-
-	memset(&la, 0, sizeof(la));
-	len = min_t(unsigned int, sizeof(la), alen);
-	memcpy(&la, addr, len);
-
-	if (la.l2_cid)
-		return -EINVAL;
-
-	lock_sock(sk);
-
-	if (sk->sk_state != BT_OPEN) {
-		err = -EBADFD;
-		goto done;
-	}
-
-	if (la.l2_psm) {
-		__u16 psm = __le16_to_cpu(la.l2_psm);
-
-		/* PSM must be odd and lsb of upper byte must be 0 */
-		if ((psm & 0x0101) != 0x0001) {
-			err = -EINVAL;
-			goto done;
-		}
-
-		/* Restrict usage of well-known PSMs */
-		if (psm < 0x1001 && !capable(CAP_NET_BIND_SERVICE)) {
-			err = -EACCES;
-			goto done;
-		}
-	}
-
-	write_lock_bh(&l2cap_sk_list.lock);
-
-	if (la.l2_psm && __l2cap_get_sock_by_addr(la.l2_psm, &la.l2_bdaddr)) {
-		err = -EADDRINUSE;
-	} else {
-		/* Save source address */
-		bacpy(&bt_sk(sk)->src, &la.l2_bdaddr);
-		l2cap_pi(sk)->psm   = la.l2_psm;
-		l2cap_pi(sk)->sport = la.l2_psm;
-		sk->sk_state = BT_BOUND;
-
-		if (__le16_to_cpu(la.l2_psm) == 0x0001 ||
-					__le16_to_cpu(la.l2_psm) == 0x0003)
-			l2cap_pi(sk)->sec_level = BT_SECURITY_SDP;
-	}
-
-	write_unlock_bh(&l2cap_sk_list.lock);
-
-done:
-	release_sock(sk);
-	return err;
-}
-
-static int l2cap_do_connect(struct sock *sk)
+int l2cap_do_connect(struct sock *sk)
 {
 	bdaddr_t *src = &bt_sk(sk)->src;
 	bdaddr_t *dst = &bt_sk(sk)->dst;
@@ -1082,8 +856,13 @@ static int l2cap_do_connect(struct sock *sk)
 
 	auth_type = l2cap_get_auth_type(sk);
 
-	hcon = hci_connect(hdev, ACL_LINK, dst,
+	if (l2cap_pi(sk)->dcid == L2CAP_CID_LE_DATA)
+		hcon = hci_connect(hdev, LE_LINK, dst,
+					l2cap_pi(sk)->sec_level, auth_type);
+	else
+		hcon = hci_connect(hdev, ACL_LINK, dst,
 					l2cap_pi(sk)->sec_level, auth_type);
+
 	if (!hcon)
 		goto done;
 
@@ -1119,230 +898,7 @@ done:
 	return err;
 }
 
-static int l2cap_sock_connect(struct socket *sock, struct sockaddr *addr, int alen, int flags)
-{
-	struct sock *sk = sock->sk;
-	struct sockaddr_l2 la;
-	int len, err = 0;
-
-	BT_DBG("sk %p", sk);
-
-	if (!addr || alen < sizeof(addr->sa_family) ||
-	    addr->sa_family != AF_BLUETOOTH)
-		return -EINVAL;
-
-	memset(&la, 0, sizeof(la));
-	len = min_t(unsigned int, sizeof(la), alen);
-	memcpy(&la, addr, len);
-
-	if (la.l2_cid)
-		return -EINVAL;
-
-	lock_sock(sk);
-
-	if ((sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM)
-			&& !la.l2_psm) {
-		err = -EINVAL;
-		goto done;
-	}
-
-	switch (l2cap_pi(sk)->mode) {
-	case L2CAP_MODE_BASIC:
-		break;
-	case L2CAP_MODE_ERTM:
-	case L2CAP_MODE_STREAMING:
-		if (!disable_ertm)
-			break;
-		/* fall through */
-	default:
-		err = -ENOTSUPP;
-		goto done;
-	}
-
-	switch (sk->sk_state) {
-	case BT_CONNECT:
-	case BT_CONNECT2:
-	case BT_CONFIG:
-		/* Already connecting */
-		goto wait;
-
-	case BT_CONNECTED:
-		/* Already connected */
-		err = -EISCONN;
-		goto done;
-
-	case BT_OPEN:
-	case BT_BOUND:
-		/* Can connect */
-		break;
-
-	default:
-		err = -EBADFD;
-		goto done;
-	}
-
-	/* PSM must be odd and lsb of upper byte must be 0 */
-	if ((__le16_to_cpu(la.l2_psm) & 0x0101) != 0x0001 &&
-		sk->sk_type != SOCK_RAW) {
-		err = -EINVAL;
-		goto done;
-	}
-
-	/* Set destination address and psm */
-	bacpy(&bt_sk(sk)->dst, &la.l2_bdaddr);
-	l2cap_pi(sk)->psm = la.l2_psm;
-
-	err = l2cap_do_connect(sk);
-	if (err)
-		goto done;
-
-wait:
-	err = bt_sock_wait_state(sk, BT_CONNECTED,
-			sock_sndtimeo(sk, flags & O_NONBLOCK));
-done:
-	release_sock(sk);
-	return err;
-}
-
-static int l2cap_sock_listen(struct socket *sock, int backlog)
-{
-	struct sock *sk = sock->sk;
-	int err = 0;
-
-	BT_DBG("sk %p backlog %d", sk, backlog);
-
-	lock_sock(sk);
-
-	if ((sock->type != SOCK_SEQPACKET && sock->type != SOCK_STREAM)
-			|| sk->sk_state != BT_BOUND) {
-		err = -EBADFD;
-		goto done;
-	}
-
-	switch (l2cap_pi(sk)->mode) {
-	case L2CAP_MODE_BASIC:
-		break;
-	case L2CAP_MODE_ERTM:
-	case L2CAP_MODE_STREAMING:
-		if (!disable_ertm)
-			break;
-		/* fall through */
-	default:
-		err = -ENOTSUPP;
-		goto done;
-	}
-
-	if (!l2cap_pi(sk)->psm) {
-		bdaddr_t *src = &bt_sk(sk)->src;
-		u16 psm;
-
-		err = -EINVAL;
-
-		write_lock_bh(&l2cap_sk_list.lock);
-
-		for (psm = 0x1001; psm < 0x1100; psm += 2)
-			if (!__l2cap_get_sock_by_addr(cpu_to_le16(psm), src)) {
-				l2cap_pi(sk)->psm   = cpu_to_le16(psm);
-				l2cap_pi(sk)->sport = cpu_to_le16(psm);
-				err = 0;
-				break;
-			}
-
-		write_unlock_bh(&l2cap_sk_list.lock);
-
-		if (err < 0)
-			goto done;
-	}
-
-	sk->sk_max_ack_backlog = backlog;
-	sk->sk_ack_backlog = 0;
-	sk->sk_state = BT_LISTEN;
-
-done:
-	release_sock(sk);
-	return err;
-}
-
-static int l2cap_sock_accept(struct socket *sock, struct socket *newsock, int flags)
-{
-	DECLARE_WAITQUEUE(wait, current);
-	struct sock *sk = sock->sk, *nsk;
-	long timeo;
-	int err = 0;
-
-	lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
-
-	if (sk->sk_state != BT_LISTEN) {
-		err = -EBADFD;
-		goto done;
-	}
-
-	timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
-
-	BT_DBG("sk %p timeo %ld", sk, timeo);
-
-	/* Wait for an incoming connection. (wake-one). */
-	add_wait_queue_exclusive(sk_sleep(sk), &wait);
-	while (!(nsk = bt_accept_dequeue(sk, newsock))) {
-		set_current_state(TASK_INTERRUPTIBLE);
-		if (!timeo) {
-			err = -EAGAIN;
-			break;
-		}
-
-		release_sock(sk);
-		timeo = schedule_timeout(timeo);
-		lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
-
-		if (sk->sk_state != BT_LISTEN) {
-			err = -EBADFD;
-			break;
-		}
-
-		if (signal_pending(current)) {
-			err = sock_intr_errno(timeo);
-			break;
-		}
-	}
-	set_current_state(TASK_RUNNING);
-	remove_wait_queue(sk_sleep(sk), &wait);
-
-	if (err)
-		goto done;
-
-	newsock->state = SS_CONNECTED;
-
-	BT_DBG("new socket %p", nsk);
-
-done:
-	release_sock(sk);
-	return err;
-}
-
-static int l2cap_sock_getname(struct socket *sock, struct sockaddr *addr, int *len, int peer)
-{
-	struct sockaddr_l2 *la = (struct sockaddr_l2 *) addr;
-	struct sock *sk = sock->sk;
-
-	BT_DBG("sock %p, sk %p", sock, sk);
-
-	addr->sa_family = AF_BLUETOOTH;
-	*len = sizeof(struct sockaddr_l2);
-
-	if (peer) {
-		la->l2_psm = l2cap_pi(sk)->psm;
-		bacpy(&la->l2_bdaddr, &bt_sk(sk)->dst);
-		la->l2_cid = cpu_to_le16(l2cap_pi(sk)->dcid);
-	} else {
-		la->l2_psm = l2cap_pi(sk)->sport;
-		bacpy(&la->l2_bdaddr, &bt_sk(sk)->src);
-		la->l2_cid = cpu_to_le16(l2cap_pi(sk)->scid);
-	}
-
-	return 0;
-}
-
-static int __l2cap_wait_ack(struct sock *sk)
+int __l2cap_wait_ack(struct sock *sk)
 {
 	DECLARE_WAITQUEUE(wait, current);
 	int err = 0;
@@ -1428,16 +984,23 @@ static void l2cap_drop_acked_frames(struct sock *sk)
 		del_timer(&l2cap_pi(sk)->retrans_timer);
 }
 
-static inline void l2cap_do_send(struct sock *sk, struct sk_buff *skb)
+void l2cap_do_send(struct sock *sk, struct sk_buff *skb)
 {
 	struct l2cap_pinfo *pi = l2cap_pi(sk);
+	struct hci_conn *hcon = pi->conn->hcon;
+	u16 flags;
 
 	BT_DBG("sk %p, skb %p len %d", sk, skb, skb->len);
 
-	hci_send_acl(pi->conn->hcon, skb, 0);
+	if (!pi->flushable && lmp_no_flush_capable(hcon->hdev))
+		flags = ACL_START_NO_FLUSH;
+	else
+		flags = ACL_START;
+
+	hci_send_acl(hcon, skb, flags);
 }
 
-static void l2cap_streaming_send(struct sock *sk)
+void l2cap_streaming_send(struct sock *sk)
 {
 	struct sk_buff *skb;
 	struct l2cap_pinfo *pi = l2cap_pi(sk);
@@ -1506,7 +1069,7 @@ static void l2cap_retransmit_one_frame(struct sock *sk, u8 tx_seq)
 	l2cap_do_send(sk, tx_skb);
 }
 
-static int l2cap_ertm_send(struct sock *sk)
+int l2cap_ertm_send(struct sock *sk)
 {
 	struct sk_buff *skb, *tx_skb;
 	struct l2cap_pinfo *pi = l2cap_pi(sk);
@@ -1646,7 +1209,7 @@ static inline int l2cap_skbuff_fromiovec(struct sock *sk, struct msghdr *msg, in
 	return sent;
 }
 
-static struct sk_buff *l2cap_create_connless_pdu(struct sock *sk, struct msghdr *msg, size_t len)
+struct sk_buff *l2cap_create_connless_pdu(struct sock *sk, struct msghdr *msg, size_t len)
 {
 	struct l2cap_conn *conn = l2cap_pi(sk)->conn;
 	struct sk_buff *skb;
@@ -1675,7 +1238,7 @@ static struct sk_buff *l2cap_create_connless_pdu(struct sock *sk, struct msghdr
 	return skb;
 }
 
-static struct sk_buff *l2cap_create_basic_pdu(struct sock *sk, struct msghdr *msg, size_t len)
+struct sk_buff *l2cap_create_basic_pdu(struct sock *sk, struct msghdr *msg, size_t len)
 {
 	struct l2cap_conn *conn = l2cap_pi(sk)->conn;
 	struct sk_buff *skb;
@@ -1703,7 +1266,7 @@ static struct sk_buff *l2cap_create_basic_pdu(struct sock *sk, struct msghdr *ms
 	return skb;
 }
 
-static struct sk_buff *l2cap_create_iframe_pdu(struct sock *sk, struct msghdr *msg, size_t len, u16 control, u16 sdulen)
+struct sk_buff *l2cap_create_iframe_pdu(struct sock *sk, struct msghdr *msg, size_t len, u16 control, u16 sdulen)
 {
 	struct l2cap_conn *conn = l2cap_pi(sk)->conn;
 	struct sk_buff *skb;
@@ -1748,7 +1311,7 @@ static struct sk_buff *l2cap_create_iframe_pdu(struct sock *sk, struct msghdr *m
 	return skb;
 }
 
-static inline int l2cap_sar_segment_sdu(struct sock *sk, struct msghdr *msg, size_t len)
+int l2cap_sar_segment_sdu(struct sock *sk, struct msghdr *msg, size_t len)
 {
 	struct l2cap_pinfo *pi = l2cap_pi(sk);
 	struct sk_buff *skb;
@@ -1794,487 +1357,6 @@ static inline int l2cap_sar_segment_sdu(struct sock *sk, struct msghdr *msg, siz
 	return size;
 }
 
-static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t len)
-{
-	struct sock *sk = sock->sk;
-	struct l2cap_pinfo *pi = l2cap_pi(sk);
-	struct sk_buff *skb;
-	u16 control;
-	int err;
-
-	BT_DBG("sock %p, sk %p", sock, sk);
-
-	err = sock_error(sk);
-	if (err)
-		return err;
-
-	if (msg->msg_flags & MSG_OOB)
-		return -EOPNOTSUPP;
-
-	lock_sock(sk);
-
-	if (sk->sk_state != BT_CONNECTED) {
-		err = -ENOTCONN;
-		goto done;
-	}
-
-	/* Connectionless channel */
-	if (sk->sk_type == SOCK_DGRAM) {
-		skb = l2cap_create_connless_pdu(sk, msg, len);
-		if (IS_ERR(skb)) {
-			err = PTR_ERR(skb);
-		} else {
-			l2cap_do_send(sk, skb);
-			err = len;
-		}
-		goto done;
-	}
-
-	switch (pi->mode) {
-	case L2CAP_MODE_BASIC:
-		/* Check outgoing MTU */
-		if (len > pi->omtu) {
-			err = -EMSGSIZE;
-			goto done;
-		}
-
-		/* Create a basic PDU */
-		skb = l2cap_create_basic_pdu(sk, msg, len);
-		if (IS_ERR(skb)) {
-			err = PTR_ERR(skb);
-			goto done;
-		}
-
-		l2cap_do_send(sk, skb);
-		err = len;
-		break;
-
-	case L2CAP_MODE_ERTM:
-	case L2CAP_MODE_STREAMING:
-		/* Entire SDU fits into one PDU */
-		if (len <= pi->remote_mps) {
-			control = L2CAP_SDU_UNSEGMENTED;
-			skb = l2cap_create_iframe_pdu(sk, msg, len, control, 0);
-			if (IS_ERR(skb)) {
-				err = PTR_ERR(skb);
-				goto done;
-			}
-			__skb_queue_tail(TX_QUEUE(sk), skb);
-
-			if (sk->sk_send_head == NULL)
-				sk->sk_send_head = skb;
-
-		} else {
-		/* Segment SDU into multiples PDUs */
-			err = l2cap_sar_segment_sdu(sk, msg, len);
-			if (err < 0)
-				goto done;
-		}
-
-		if (pi->mode == L2CAP_MODE_STREAMING) {
-			l2cap_streaming_send(sk);
-		} else {
-			if ((pi->conn_state & L2CAP_CONN_REMOTE_BUSY) &&
-					(pi->conn_state & L2CAP_CONN_WAIT_F)) {
-				err = len;
-				break;
-			}
-			err = l2cap_ertm_send(sk);
-		}
-
-		if (err >= 0)
-			err = len;
-		break;
-
-	default:
-		BT_DBG("bad state %1.1x", pi->mode);
-		err = -EBADFD;
-	}
-
-done:
-	release_sock(sk);
-	return err;
-}
-
-static int l2cap_sock_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t len, int flags)
-{
-	struct sock *sk = sock->sk;
-
-	lock_sock(sk);
-
-	if (sk->sk_state == BT_CONNECT2 && bt_sk(sk)->defer_setup) {
-		struct l2cap_conn_rsp rsp;
-		struct l2cap_conn *conn = l2cap_pi(sk)->conn;
-		u8 buf[128];
-
-		sk->sk_state = BT_CONFIG;
-
-		rsp.scid   = cpu_to_le16(l2cap_pi(sk)->dcid);
-		rsp.dcid   = cpu_to_le16(l2cap_pi(sk)->scid);
-		rsp.result = cpu_to_le16(L2CAP_CR_SUCCESS);
-		rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO);
-		l2cap_send_cmd(l2cap_pi(sk)->conn, l2cap_pi(sk)->ident,
-					L2CAP_CONN_RSP, sizeof(rsp), &rsp);
-
-		if (l2cap_pi(sk)->conf_state & L2CAP_CONF_REQ_SENT) {
-			release_sock(sk);
-			return 0;
-		}
-
-		l2cap_pi(sk)->conf_state |= L2CAP_CONF_REQ_SENT;
-		l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ,
-				l2cap_build_conf_req(sk, buf), buf);
-		l2cap_pi(sk)->num_conf_req++;
-
-		release_sock(sk);
-		return 0;
-	}
-
-	release_sock(sk);
-
-	if (sock->type == SOCK_STREAM)
-		return bt_sock_stream_recvmsg(iocb, sock, msg, len, flags);
-
-	return bt_sock_recvmsg(iocb, sock, msg, len, flags);
-}
-
-static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, char __user *optval, unsigned int optlen)
-{
-	struct sock *sk = sock->sk;
-	struct l2cap_options opts;
-	int len, err = 0;
-	u32 opt;
-
-	BT_DBG("sk %p", sk);
-
-	lock_sock(sk);
-
-	switch (optname) {
-	case L2CAP_OPTIONS:
-		if (sk->sk_state == BT_CONNECTED) {
-			err = -EINVAL;
-			break;
-		}
-
-		opts.imtu     = l2cap_pi(sk)->imtu;
-		opts.omtu     = l2cap_pi(sk)->omtu;
-		opts.flush_to = l2cap_pi(sk)->flush_to;
-		opts.mode     = l2cap_pi(sk)->mode;
-		opts.fcs      = l2cap_pi(sk)->fcs;
-		opts.max_tx   = l2cap_pi(sk)->max_tx;
-		opts.txwin_size = (__u16)l2cap_pi(sk)->tx_win;
-
-		len = min_t(unsigned int, sizeof(opts), optlen);
-		if (copy_from_user((char *) &opts, optval, len)) {
-			err = -EFAULT;
-			break;
-		}
-
-		if (opts.txwin_size > L2CAP_DEFAULT_TX_WINDOW) {
-			err = -EINVAL;
-			break;
-		}
-
-		l2cap_pi(sk)->mode = opts.mode;
-		switch (l2cap_pi(sk)->mode) {
-		case L2CAP_MODE_BASIC:
-			l2cap_pi(sk)->conf_state &= ~L2CAP_CONF_STATE2_DEVICE;
-			break;
-		case L2CAP_MODE_ERTM:
-		case L2CAP_MODE_STREAMING:
-			if (!disable_ertm)
-				break;
-			/* fall through */
-		default:
-			err = -EINVAL;
-			break;
-		}
-
-		l2cap_pi(sk)->imtu = opts.imtu;
-		l2cap_pi(sk)->omtu = opts.omtu;
-		l2cap_pi(sk)->fcs  = opts.fcs;
-		l2cap_pi(sk)->max_tx = opts.max_tx;
-		l2cap_pi(sk)->tx_win = (__u8)opts.txwin_size;
-		break;
-
-	case L2CAP_LM:
-		if (get_user(opt, (u32 __user *) optval)) {
-			err = -EFAULT;
-			break;
-		}
-
-		if (opt & L2CAP_LM_AUTH)
-			l2cap_pi(sk)->sec_level = BT_SECURITY_LOW;
-		if (opt & L2CAP_LM_ENCRYPT)
-			l2cap_pi(sk)->sec_level = BT_SECURITY_MEDIUM;
-		if (opt & L2CAP_LM_SECURE)
-			l2cap_pi(sk)->sec_level = BT_SECURITY_HIGH;
-
-		l2cap_pi(sk)->role_switch    = (opt & L2CAP_LM_MASTER);
-		l2cap_pi(sk)->force_reliable = (opt & L2CAP_LM_RELIABLE);
-		break;
-
-	default:
-		err = -ENOPROTOOPT;
-		break;
-	}
-
-	release_sock(sk);
-	return err;
-}
-
-static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen)
-{
-	struct sock *sk = sock->sk;
-	struct bt_security sec;
-	int len, err = 0;
-	u32 opt;
-
-	BT_DBG("sk %p", sk);
-
-	if (level == SOL_L2CAP)
-		return l2cap_sock_setsockopt_old(sock, optname, optval, optlen);
-
-	if (level != SOL_BLUETOOTH)
-		return -ENOPROTOOPT;
-
-	lock_sock(sk);
-
-	switch (optname) {
-	case BT_SECURITY:
-		if (sk->sk_type != SOCK_SEQPACKET && sk->sk_type != SOCK_STREAM
-				&& sk->sk_type != SOCK_RAW) {
-			err = -EINVAL;
-			break;
-		}
-
-		sec.level = BT_SECURITY_LOW;
-
-		len = min_t(unsigned int, sizeof(sec), optlen);
-		if (copy_from_user((char *) &sec, optval, len)) {
-			err = -EFAULT;
-			break;
-		}
-
-		if (sec.level < BT_SECURITY_LOW ||
-					sec.level > BT_SECURITY_HIGH) {
-			err = -EINVAL;
-			break;
-		}
-
-		l2cap_pi(sk)->sec_level = sec.level;
-		break;
-
-	case BT_DEFER_SETUP:
-		if (sk->sk_state != BT_BOUND && sk->sk_state != BT_LISTEN) {
-			err = -EINVAL;
-			break;
-		}
-
-		if (get_user(opt, (u32 __user *) optval)) {
-			err = -EFAULT;
-			break;
-		}
-
-		bt_sk(sk)->defer_setup = opt;
-		break;
-
-	default:
-		err = -ENOPROTOOPT;
-		break;
-	}
-
-	release_sock(sk);
-	return err;
-}
-
-static int l2cap_sock_getsockopt_old(struct socket *sock, int optname, char __user *optval, int __user *optlen)
-{
-	struct sock *sk = sock->sk;
-	struct l2cap_options opts;
-	struct l2cap_conninfo cinfo;
-	int len, err = 0;
-	u32 opt;
-
-	BT_DBG("sk %p", sk);
-
-	if (get_user(len, optlen))
-		return -EFAULT;
-
-	lock_sock(sk);
-
-	switch (optname) {
-	case L2CAP_OPTIONS:
-		opts.imtu     = l2cap_pi(sk)->imtu;
-		opts.omtu     = l2cap_pi(sk)->omtu;
-		opts.flush_to = l2cap_pi(sk)->flush_to;
-		opts.mode     = l2cap_pi(sk)->mode;
-		opts.fcs      = l2cap_pi(sk)->fcs;
-		opts.max_tx   = l2cap_pi(sk)->max_tx;
-		opts.txwin_size = (__u16)l2cap_pi(sk)->tx_win;
-
-		len = min_t(unsigned int, len, sizeof(opts));
-		if (copy_to_user(optval, (char *) &opts, len))
-			err = -EFAULT;
-
-		break;
-
-	case L2CAP_LM:
-		switch (l2cap_pi(sk)->sec_level) {
-		case BT_SECURITY_LOW:
-			opt = L2CAP_LM_AUTH;
-			break;
-		case BT_SECURITY_MEDIUM:
-			opt = L2CAP_LM_AUTH | L2CAP_LM_ENCRYPT;
-			break;
-		case BT_SECURITY_HIGH:
-			opt = L2CAP_LM_AUTH | L2CAP_LM_ENCRYPT |
-							L2CAP_LM_SECURE;
-			break;
-		default:
-			opt = 0;
-			break;
-		}
-
-		if (l2cap_pi(sk)->role_switch)
-			opt |= L2CAP_LM_MASTER;
-
-		if (l2cap_pi(sk)->force_reliable)
-			opt |= L2CAP_LM_RELIABLE;
-
-		if (put_user(opt, (u32 __user *) optval))
-			err = -EFAULT;
-		break;
-
-	case L2CAP_CONNINFO:
-		if (sk->sk_state != BT_CONNECTED &&
-					!(sk->sk_state == BT_CONNECT2 &&
-						bt_sk(sk)->defer_setup)) {
-			err = -ENOTCONN;
-			break;
-		}
-
-		cinfo.hci_handle = l2cap_pi(sk)->conn->hcon->handle;
-		memcpy(cinfo.dev_class, l2cap_pi(sk)->conn->hcon->dev_class, 3);
-
-		len = min_t(unsigned int, len, sizeof(cinfo));
-		if (copy_to_user(optval, (char *) &cinfo, len))
-			err = -EFAULT;
-
-		break;
-
-	default:
-		err = -ENOPROTOOPT;
-		break;
-	}
-
-	release_sock(sk);
-	return err;
-}
-
-static int l2cap_sock_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen)
-{
-	struct sock *sk = sock->sk;
-	struct bt_security sec;
-	int len, err = 0;
-
-	BT_DBG("sk %p", sk);
-
-	if (level == SOL_L2CAP)
-		return l2cap_sock_getsockopt_old(sock, optname, optval, optlen);
-
-	if (level != SOL_BLUETOOTH)
-		return -ENOPROTOOPT;
-
-	if (get_user(len, optlen))
-		return -EFAULT;
-
-	lock_sock(sk);
-
-	switch (optname) {
-	case BT_SECURITY:
-		if (sk->sk_type != SOCK_SEQPACKET && sk->sk_type != SOCK_STREAM
-				&& sk->sk_type != SOCK_RAW) {
-			err = -EINVAL;
-			break;
-		}
-
-		sec.level = l2cap_pi(sk)->sec_level;
-
-		len = min_t(unsigned int, len, sizeof(sec));
-		if (copy_to_user(optval, (char *) &sec, len))
-			err = -EFAULT;
-
-		break;
-
-	case BT_DEFER_SETUP:
-		if (sk->sk_state != BT_BOUND && sk->sk_state != BT_LISTEN) {
-			err = -EINVAL;
-			break;
-		}
-
-		if (put_user(bt_sk(sk)->defer_setup, (u32 __user *) optval))
-			err = -EFAULT;
-
-		break;
-
-	default:
-		err = -ENOPROTOOPT;
-		break;
-	}
-
-	release_sock(sk);
-	return err;
-}
-
-static int l2cap_sock_shutdown(struct socket *sock, int how)
-{
-	struct sock *sk = sock->sk;
-	int err = 0;
-
-	BT_DBG("sock %p, sk %p", sock, sk);
-
-	if (!sk)
-		return 0;
-
-	lock_sock(sk);
-	if (!sk->sk_shutdown) {
-		if (l2cap_pi(sk)->mode == L2CAP_MODE_ERTM)
-			err = __l2cap_wait_ack(sk);
-
-		sk->sk_shutdown = SHUTDOWN_MASK;
-		l2cap_sock_clear_timer(sk);
-		__l2cap_sock_close(sk, 0);
-
-		if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime)
-			err = bt_sock_wait_state(sk, BT_CLOSED,
-							sk->sk_lingertime);
-	}
-
-	if (!err && sk->sk_err)
-		err = -sk->sk_err;
-
-	release_sock(sk);
-	return err;
-}
-
-static int l2cap_sock_release(struct socket *sock)
-{
-	struct sock *sk = sock->sk;
-	int err;
-
-	BT_DBG("sock %p, sk %p", sock, sk);
-
-	if (!sk)
-		return 0;
-
-	err = l2cap_sock_shutdown(sock, 2);
-
-	sock_orphan(sk);
-	l2cap_sock_kill(sk);
-	return err;
-}
-
 static void l2cap_chan_ready(struct sock *sk)
 {
 	struct sock *parent = bt_sk(sk)->parent;
@@ -2346,7 +1428,11 @@ static struct sk_buff *l2cap_build_cmd(struct l2cap_conn *conn,
 
 	lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE);
 	lh->len = cpu_to_le16(L2CAP_CMD_HDR_SIZE + dlen);
-	lh->cid = cpu_to_le16(L2CAP_CID_SIGNALING);
+
+	if (conn->hcon->type == LE_LINK)
+		lh->cid = cpu_to_le16(L2CAP_CID_LE_SIGNALING);
+	else
+		lh->cid = cpu_to_le16(L2CAP_CID_SIGNALING);
 
 	cmd = (struct l2cap_cmd_hdr *) skb_put(skb, L2CAP_CMD_HDR_SIZE);
 	cmd->code  = code;
@@ -2493,7 +1579,7 @@ static inline __u8 l2cap_select_mode(__u8 mode, __u16 remote_feat_mask)
 	}
 }
 
-static int l2cap_build_conf_req(struct sock *sk, void *data)
+int l2cap_build_conf_req(struct sock *sk, void *data)
 {
 	struct l2cap_pinfo *pi = l2cap_pi(sk);
 	struct l2cap_conf_req *req = data;
@@ -2518,11 +1604,11 @@ static int l2cap_build_conf_req(struct sock *sk, void *data)
 	}
 
 done:
+	if (pi->imtu != L2CAP_DEFAULT_MTU)
+		l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, pi->imtu);
+
 	switch (pi->mode) {
 	case L2CAP_MODE_BASIC:
-		if (pi->imtu != L2CAP_DEFAULT_MTU)
-			l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, pi->imtu);
-
 		if (!(pi->conn->feat_mask & L2CAP_FEAT_ERTM) &&
 				!(pi->conn->feat_mask & L2CAP_FEAT_STREAMING))
 			break;
@@ -2585,10 +1671,6 @@ done:
 		break;
 	}
 
-	/* FIXME: Need actual value of the flush timeout */
-	//if (flush_to != L2CAP_DEFAULT_FLUSH_TO)
-	//   l2cap_add_conf_opt(&ptr, L2CAP_CONF_FLUSH_TO, 2, pi->flush_to);
-
 	req->dcid  = cpu_to_le16(pi->dcid);
 	req->flags = cpu_to_le16(0);
 
@@ -3415,12 +2497,153 @@ static inline int l2cap_information_rsp(struct l2cap_conn *conn, struct l2cap_cm
 	return 0;
 }
 
-static inline void l2cap_sig_channel(struct l2cap_conn *conn, struct sk_buff *skb)
+static inline int l2cap_check_conn_param(u16 min, u16 max, u16 latency,
+							u16 to_multiplier)
+{
+	u16 max_latency;
+
+	if (min > max || min < 6 || max > 3200)
+		return -EINVAL;
+
+	if (to_multiplier < 10 || to_multiplier > 3200)
+		return -EINVAL;
+
+	if (max >= to_multiplier * 8)
+		return -EINVAL;
+
+	max_latency = (to_multiplier * 8 / max) - 1;
+	if (latency > 499 || latency > max_latency)
+		return -EINVAL;
+
+	return 0;
+}
+
+static inline int l2cap_conn_param_update_req(struct l2cap_conn *conn,
+					struct l2cap_cmd_hdr *cmd, u8 *data)
+{
+	struct hci_conn *hcon = conn->hcon;
+	struct l2cap_conn_param_update_req *req;
+	struct l2cap_conn_param_update_rsp rsp;
+	u16 min, max, latency, to_multiplier, cmd_len;
+	int err;
+
+	if (!(hcon->link_mode & HCI_LM_MASTER))
+		return -EINVAL;
+
+	cmd_len = __le16_to_cpu(cmd->len);
+	if (cmd_len != sizeof(struct l2cap_conn_param_update_req))
+		return -EPROTO;
+
+	req = (struct l2cap_conn_param_update_req *) data;
+	min		= __le16_to_cpu(req->min);
+	max		= __le16_to_cpu(req->max);
+	latency		= __le16_to_cpu(req->latency);
+	to_multiplier	= __le16_to_cpu(req->to_multiplier);
+
+	BT_DBG("min 0x%4.4x max 0x%4.4x latency: 0x%4.4x Timeout: 0x%4.4x",
+						min, max, latency, to_multiplier);
+
+	memset(&rsp, 0, sizeof(rsp));
+
+	err = l2cap_check_conn_param(min, max, latency, to_multiplier);
+	if (err)
+		rsp.result = cpu_to_le16(L2CAP_CONN_PARAM_REJECTED);
+	else
+		rsp.result = cpu_to_le16(L2CAP_CONN_PARAM_ACCEPTED);
+
+	l2cap_send_cmd(conn, cmd->ident, L2CAP_CONN_PARAM_UPDATE_RSP,
+							sizeof(rsp), &rsp);
+
+	if (!err)
+		hci_le_conn_update(hcon, min, max, latency, to_multiplier);
+
+	return 0;
+}
+
+static inline int l2cap_bredr_sig_cmd(struct l2cap_conn *conn,
+			struct l2cap_cmd_hdr *cmd, u16 cmd_len, u8 *data)
+{
+	int err = 0;
+
+	switch (cmd->code) {
+	case L2CAP_COMMAND_REJ:
+		l2cap_command_rej(conn, cmd, data);
+		break;
+
+	case L2CAP_CONN_REQ:
+		err = l2cap_connect_req(conn, cmd, data);
+		break;
+
+	case L2CAP_CONN_RSP:
+		err = l2cap_connect_rsp(conn, cmd, data);
+		break;
+
+	case L2CAP_CONF_REQ:
+		err = l2cap_config_req(conn, cmd, cmd_len, data);
+		break;
+
+	case L2CAP_CONF_RSP:
+		err = l2cap_config_rsp(conn, cmd, data);
+		break;
+
+	case L2CAP_DISCONN_REQ:
+		err = l2cap_disconnect_req(conn, cmd, data);
+		break;
+
+	case L2CAP_DISCONN_RSP:
+		err = l2cap_disconnect_rsp(conn, cmd, data);
+		break;
+
+	case L2CAP_ECHO_REQ:
+		l2cap_send_cmd(conn, cmd->ident, L2CAP_ECHO_RSP, cmd_len, data);
+		break;
+
+	case L2CAP_ECHO_RSP:
+		break;
+
+	case L2CAP_INFO_REQ:
+		err = l2cap_information_req(conn, cmd, data);
+		break;
+
+	case L2CAP_INFO_RSP:
+		err = l2cap_information_rsp(conn, cmd, data);
+		break;
+
+	default:
+		BT_ERR("Unknown BR/EDR signaling command 0x%2.2x", cmd->code);
+		err = -EINVAL;
+		break;
+	}
+
+	return err;
+}
+
+static inline int l2cap_le_sig_cmd(struct l2cap_conn *conn,
+					struct l2cap_cmd_hdr *cmd, u8 *data)
+{
+	switch (cmd->code) {
+	case L2CAP_COMMAND_REJ:
+		return 0;
+
+	case L2CAP_CONN_PARAM_UPDATE_REQ:
+		return l2cap_conn_param_update_req(conn, cmd, data);
+
+	case L2CAP_CONN_PARAM_UPDATE_RSP:
+		return 0;
+
+	default:
+		BT_ERR("Unknown LE signaling command 0x%2.2x", cmd->code);
+		return -EINVAL;
+	}
+}
+
+static inline void l2cap_sig_channel(struct l2cap_conn *conn,
+							struct sk_buff *skb)
 {
 	u8 *data = skb->data;
 	int len = skb->len;
 	struct l2cap_cmd_hdr cmd;
-	int err = 0;
+	int err;
 
 	l2cap_raw_recv(conn, skb);
 
@@ -3439,55 +2662,10 @@ static inline void l2cap_sig_channel(struct l2cap_conn *conn, struct sk_buff *sk
 			break;
 		}
 
-		switch (cmd.code) {
-		case L2CAP_COMMAND_REJ:
-			l2cap_command_rej(conn, &cmd, data);
-			break;
-
-		case L2CAP_CONN_REQ:
-			err = l2cap_connect_req(conn, &cmd, data);
-			break;
-
-		case L2CAP_CONN_RSP:
-			err = l2cap_connect_rsp(conn, &cmd, data);
-			break;
-
-		case L2CAP_CONF_REQ:
-			err = l2cap_config_req(conn, &cmd, cmd_len, data);
-			break;
-
-		case L2CAP_CONF_RSP:
-			err = l2cap_config_rsp(conn, &cmd, data);
-			break;
-
-		case L2CAP_DISCONN_REQ:
-			err = l2cap_disconnect_req(conn, &cmd, data);
-			break;
-
-		case L2CAP_DISCONN_RSP:
-			err = l2cap_disconnect_rsp(conn, &cmd, data);
-			break;
-
-		case L2CAP_ECHO_REQ:
-			l2cap_send_cmd(conn, cmd.ident, L2CAP_ECHO_RSP, cmd_len, data);
-			break;
-
-		case L2CAP_ECHO_RSP:
-			break;
-
-		case L2CAP_INFO_REQ:
-			err = l2cap_information_req(conn, &cmd, data);
-			break;
-
-		case L2CAP_INFO_RSP:
-			err = l2cap_information_rsp(conn, &cmd, data);
-			break;
-
-		default:
-			BT_ERR("Unknown signaling command 0x%2.2x", cmd.code);
-			err = -EINVAL;
-			break;
-		}
+		if (conn->hcon->type == LE_LINK)
+			err = l2cap_le_sig_cmd(conn, &cmd, data);
+		else
+			err = l2cap_bredr_sig_cmd(conn, &cmd, cmd_len, data);
 
 		if (err) {
 			struct l2cap_cmd_rej rej;
@@ -4484,6 +3662,7 @@ static void l2cap_recv_frame(struct l2cap_conn *conn, struct sk_buff *skb)
 	BT_DBG("len %d, cid 0x%4.4x", len, cid);
 
 	switch (cid) {
+	case L2CAP_CID_LE_SIGNALING:
 	case L2CAP_CID_SIGNALING:
 		l2cap_sig_channel(conn, skb);
 		break;
@@ -4541,7 +3720,7 @@ static int l2cap_connect_cfm(struct hci_conn *hcon, u8 status)
 
 	BT_DBG("hcon %p bdaddr %s status %d", hcon, batostr(&hcon->dst), status);
 
-	if (hcon->type != ACL_LINK)
+	if (!(hcon->type == ACL_LINK || hcon->type == LE_LINK))
 		return -EINVAL;
 
 	if (!status) {
@@ -4570,7 +3749,7 @@ static int l2cap_disconn_cfm(struct hci_conn *hcon, u8 reason)
 {
 	BT_DBG("hcon %p reason %d", hcon, reason);
 
-	if (hcon->type != ACL_LINK)
+	if (!(hcon->type == ACL_LINK || hcon->type == LE_LINK))
 		return -EINVAL;
 
 	l2cap_conn_del(hcon, bt_err(reason));
@@ -4673,12 +3852,15 @@ static int l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 fl
 {
 	struct l2cap_conn *conn = hcon->l2cap_data;
 
-	if (!conn && !(conn = l2cap_conn_add(hcon, 0)))
+	if (!conn)
+		conn = l2cap_conn_add(hcon, 0);
+
+	if (!conn)
 		goto drop;
 
 	BT_DBG("conn %p len %d flags 0x%x", conn, skb->len, flags);
 
-	if (flags & ACL_START) {
+	if (!(flags & ACL_CONT)) {
 		struct l2cap_hdr *hdr;
 		struct sock *sk;
 		u16 cid;
@@ -4784,12 +3966,13 @@ static int l2cap_debugfs_show(struct seq_file *f, void *p)
 	sk_for_each(sk, node, &l2cap_sk_list.head) {
 		struct l2cap_pinfo *pi = l2cap_pi(sk);
 
-		seq_printf(f, "%s %s %d %d 0x%4.4x 0x%4.4x %d %d %d\n",
+		seq_printf(f, "%s %s %d %d 0x%4.4x 0x%4.4x %d %d %d %d\n",
 					batostr(&bt_sk(sk)->src),
 					batostr(&bt_sk(sk)->dst),
 					sk->sk_state, __le16_to_cpu(pi->psm),
 					pi->scid, pi->dcid,
-					pi->imtu, pi->omtu, pi->sec_level);
+					pi->imtu, pi->omtu, pi->sec_level,
+					pi->mode);
 	}
 
 	read_unlock_bh(&l2cap_sk_list.lock);
@@ -4811,32 +3994,6 @@ static const struct file_operations l2cap_debugfs_fops = {
 
 static struct dentry *l2cap_debugfs;
 
-static const struct proto_ops l2cap_sock_ops = {
-	.family		= PF_BLUETOOTH,
-	.owner		= THIS_MODULE,
-	.release	= l2cap_sock_release,
-	.bind		= l2cap_sock_bind,
-	.connect	= l2cap_sock_connect,
-	.listen		= l2cap_sock_listen,
-	.accept		= l2cap_sock_accept,
-	.getname	= l2cap_sock_getname,
-	.sendmsg	= l2cap_sock_sendmsg,
-	.recvmsg	= l2cap_sock_recvmsg,
-	.poll		= bt_sock_poll,
-	.ioctl		= bt_sock_ioctl,
-	.mmap		= sock_no_mmap,
-	.socketpair	= sock_no_socketpair,
-	.shutdown	= l2cap_sock_shutdown,
-	.setsockopt	= l2cap_sock_setsockopt,
-	.getsockopt	= l2cap_sock_getsockopt
-};
-
-static const struct net_proto_family l2cap_sock_family_ops = {
-	.family	= PF_BLUETOOTH,
-	.owner	= THIS_MODULE,
-	.create	= l2cap_sock_create,
-};
-
 static struct hci_proto l2cap_hci_proto = {
 	.name		= "L2CAP",
 	.id		= HCI_PROTO_L2CAP,
@@ -4848,23 +4005,17 @@ static struct hci_proto l2cap_hci_proto = {
 	.recv_acldata	= l2cap_recv_acldata
 };
 
-static int __init l2cap_init(void)
+int __init l2cap_init(void)
 {
 	int err;
 
-	err = proto_register(&l2cap_proto, 0);
+	err = l2cap_init_sockets();
 	if (err < 0)
 		return err;
 
 	_busy_wq = create_singlethread_workqueue("l2cap");
 	if (!_busy_wq) {
-		proto_unregister(&l2cap_proto);
-		return -ENOMEM;
-	}
-
-	err = bt_sock_register(BTPROTO_L2CAP, &l2cap_sock_family_ops);
-	if (err < 0) {
-		BT_ERR("L2CAP socket registration failed");
+		err = -ENOMEM;
 		goto error;
 	}
 
@@ -4882,49 +4033,28 @@ static int __init l2cap_init(void)
 			BT_ERR("Failed to create L2CAP debug file");
 	}
 
-	BT_INFO("L2CAP ver %s", VERSION);
 	BT_INFO("L2CAP socket layer initialized");
 
 	return 0;
 
 error:
 	destroy_workqueue(_busy_wq);
-	proto_unregister(&l2cap_proto);
+	l2cap_cleanup_sockets();
 	return err;
 }
 
-static void __exit l2cap_exit(void)
+void l2cap_exit(void)
 {
 	debugfs_remove(l2cap_debugfs);
 
 	flush_workqueue(_busy_wq);
 	destroy_workqueue(_busy_wq);
 
-	if (bt_sock_unregister(BTPROTO_L2CAP) < 0)
-		BT_ERR("L2CAP socket unregistration failed");
-
 	if (hci_unregister_proto(&l2cap_hci_proto) < 0)
 		BT_ERR("L2CAP protocol unregistration failed");
 
-	proto_unregister(&l2cap_proto);
+	l2cap_cleanup_sockets();
 }
 
-void l2cap_load(void)
-{
-	/* Dummy function to trigger automatic L2CAP module loading by
-	 * other modules that use L2CAP sockets but don't use any other
-	 * symbols from it. */
-}
-EXPORT_SYMBOL(l2cap_load);
-
-module_init(l2cap_init);
-module_exit(l2cap_exit);
-
 module_param(disable_ertm, bool, 0644);
 MODULE_PARM_DESC(disable_ertm, "Disable enhanced retransmission mode");
-
-MODULE_AUTHOR("Marcel Holtmann <marcel@holtmann.org>");
-MODULE_DESCRIPTION("Bluetooth L2CAP ver " VERSION);
-MODULE_VERSION(VERSION);
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("bt-proto-0");
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
new file mode 100644
index 0000000..fc85e7a
--- /dev/null
+++ b/net/bluetooth/l2cap_sock.c
@@ -0,0 +1,1156 @@
+/*
+   BlueZ - Bluetooth protocol stack for Linux
+   Copyright (C) 2000-2001 Qualcomm Incorporated
+   Copyright (C) 2009-2010 Gustavo F. Padovan <gustavo@padovan.org>
+   Copyright (C) 2010 Google Inc.
+
+   Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com>
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License version 2 as
+   published by the Free Software Foundation;
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
+   IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY
+   CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES
+   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+   ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS,
+   COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS
+   SOFTWARE IS DISCLAIMED.
+*/
+
+/* Bluetooth L2CAP sockets. */
+
+#include <net/bluetooth/bluetooth.h>
+#include <net/bluetooth/hci_core.h>
+#include <net/bluetooth/l2cap.h>
+
+/* ---- L2CAP timers ---- */
+static void l2cap_sock_timeout(unsigned long arg)
+{
+	struct sock *sk = (struct sock *) arg;
+	int reason;
+
+	BT_DBG("sock %p state %d", sk, sk->sk_state);
+
+	bh_lock_sock(sk);
+
+	if (sock_owned_by_user(sk)) {
+		/* sk is owned by user. Try again later */
+		l2cap_sock_set_timer(sk, HZ / 5);
+		bh_unlock_sock(sk);
+		sock_put(sk);
+		return;
+	}
+
+	if (sk->sk_state == BT_CONNECTED || sk->sk_state == BT_CONFIG)
+		reason = ECONNREFUSED;
+	else if (sk->sk_state == BT_CONNECT &&
+				l2cap_pi(sk)->sec_level != BT_SECURITY_SDP)
+		reason = ECONNREFUSED;
+	else
+		reason = ETIMEDOUT;
+
+	__l2cap_sock_close(sk, reason);
+
+	bh_unlock_sock(sk);
+
+	l2cap_sock_kill(sk);
+	sock_put(sk);
+}
+
+void l2cap_sock_set_timer(struct sock *sk, long timeout)
+{
+	BT_DBG("sk %p state %d timeout %ld", sk, sk->sk_state, timeout);
+	sk_reset_timer(sk, &sk->sk_timer, jiffies + timeout);
+}
+
+void l2cap_sock_clear_timer(struct sock *sk)
+{
+	BT_DBG("sock %p state %d", sk, sk->sk_state);
+	sk_stop_timer(sk, &sk->sk_timer);
+}
+
+static struct sock *__l2cap_get_sock_by_addr(__le16 psm, bdaddr_t *src)
+{
+	struct sock *sk;
+	struct hlist_node *node;
+	sk_for_each(sk, node, &l2cap_sk_list.head)
+		if (l2cap_pi(sk)->sport == psm && !bacmp(&bt_sk(sk)->src, src))
+			goto found;
+	sk = NULL;
+found:
+	return sk;
+}
+
+static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int alen)
+{
+	struct sock *sk = sock->sk;
+	struct sockaddr_l2 la;
+	int len, err = 0;
+
+	BT_DBG("sk %p", sk);
+
+	if (!addr || addr->sa_family != AF_BLUETOOTH)
+		return -EINVAL;
+
+	memset(&la, 0, sizeof(la));
+	len = min_t(unsigned int, sizeof(la), alen);
+	memcpy(&la, addr, len);
+
+	if (la.l2_cid && la.l2_psm)
+		return -EINVAL;
+
+	lock_sock(sk);
+
+	if (sk->sk_state != BT_OPEN) {
+		err = -EBADFD;
+		goto done;
+	}
+
+	if (la.l2_psm) {
+		__u16 psm = __le16_to_cpu(la.l2_psm);
+
+		/* PSM must be odd and lsb of upper byte must be 0 */
+		if ((psm & 0x0101) != 0x0001) {
+			err = -EINVAL;
+			goto done;
+		}
+
+		/* Restrict usage of well-known PSMs */
+		if (psm < 0x1001 && !capable(CAP_NET_BIND_SERVICE)) {
+			err = -EACCES;
+			goto done;
+		}
+	}
+
+	write_lock_bh(&l2cap_sk_list.lock);
+
+	if (la.l2_psm && __l2cap_get_sock_by_addr(la.l2_psm, &la.l2_bdaddr)) {
+		err = -EADDRINUSE;
+	} else {
+		/* Save source address */
+		bacpy(&bt_sk(sk)->src, &la.l2_bdaddr);
+		l2cap_pi(sk)->psm   = la.l2_psm;
+		l2cap_pi(sk)->sport = la.l2_psm;
+		sk->sk_state = BT_BOUND;
+
+		if (__le16_to_cpu(la.l2_psm) == 0x0001 ||
+					__le16_to_cpu(la.l2_psm) == 0x0003)
+			l2cap_pi(sk)->sec_level = BT_SECURITY_SDP;
+	}
+
+	if (la.l2_cid)
+		l2cap_pi(sk)->scid = la.l2_cid;
+
+	write_unlock_bh(&l2cap_sk_list.lock);
+
+done:
+	release_sock(sk);
+	return err;
+}
+
+static int l2cap_sock_connect(struct socket *sock, struct sockaddr *addr, int alen, int flags)
+{
+	struct sock *sk = sock->sk;
+	struct sockaddr_l2 la;
+	int len, err = 0;
+
+	BT_DBG("sk %p", sk);
+
+	if (!addr || alen < sizeof(addr->sa_family) ||
+	    addr->sa_family != AF_BLUETOOTH)
+		return -EINVAL;
+
+	memset(&la, 0, sizeof(la));
+	len = min_t(unsigned int, sizeof(la), alen);
+	memcpy(&la, addr, len);
+
+	if (la.l2_cid && la.l2_psm)
+		return -EINVAL;
+
+	lock_sock(sk);
+
+	if ((sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM)
+			&& !(la.l2_psm || la.l2_cid)) {
+		err = -EINVAL;
+		goto done;
+	}
+
+	switch (l2cap_pi(sk)->mode) {
+	case L2CAP_MODE_BASIC:
+		break;
+	case L2CAP_MODE_ERTM:
+	case L2CAP_MODE_STREAMING:
+		if (!disable_ertm)
+			break;
+		/* fall through */
+	default:
+		err = -ENOTSUPP;
+		goto done;
+	}
+
+	switch (sk->sk_state) {
+	case BT_CONNECT:
+	case BT_CONNECT2:
+	case BT_CONFIG:
+		/* Already connecting */
+		goto wait;
+
+	case BT_CONNECTED:
+		/* Already connected */
+		err = -EISCONN;
+		goto done;
+
+	case BT_OPEN:
+	case BT_BOUND:
+		/* Can connect */
+		break;
+
+	default:
+		err = -EBADFD;
+		goto done;
+	}
+
+	/* PSM must be odd and lsb of upper byte must be 0 */
+	if ((__le16_to_cpu(la.l2_psm) & 0x0101) != 0x0001 &&
+				sk->sk_type != SOCK_RAW && !la.l2_cid) {
+		err = -EINVAL;
+		goto done;
+	}
+
+	/* Set destination address and psm */
+	bacpy(&bt_sk(sk)->dst, &la.l2_bdaddr);
+	l2cap_pi(sk)->psm = la.l2_psm;
+	l2cap_pi(sk)->dcid = la.l2_cid;
+
+	err = l2cap_do_connect(sk);
+	if (err)
+		goto done;
+
+wait:
+	err = bt_sock_wait_state(sk, BT_CONNECTED,
+			sock_sndtimeo(sk, flags & O_NONBLOCK));
+done:
+	release_sock(sk);
+	return err;
+}
+
+static int l2cap_sock_listen(struct socket *sock, int backlog)
+{
+	struct sock *sk = sock->sk;
+	int err = 0;
+
+	BT_DBG("sk %p backlog %d", sk, backlog);
+
+	lock_sock(sk);
+
+	if ((sock->type != SOCK_SEQPACKET && sock->type != SOCK_STREAM)
+			|| sk->sk_state != BT_BOUND) {
+		err = -EBADFD;
+		goto done;
+	}
+
+	switch (l2cap_pi(sk)->mode) {
+	case L2CAP_MODE_BASIC:
+		break;
+	case L2CAP_MODE_ERTM:
+	case L2CAP_MODE_STREAMING:
+		if (!disable_ertm)
+			break;
+		/* fall through */
+	default:
+		err = -ENOTSUPP;
+		goto done;
+	}
+
+	if (!l2cap_pi(sk)->psm && !l2cap_pi(sk)->dcid) {
+		bdaddr_t *src = &bt_sk(sk)->src;
+		u16 psm;
+
+		err = -EINVAL;
+
+		write_lock_bh(&l2cap_sk_list.lock);
+
+		for (psm = 0x1001; psm < 0x1100; psm += 2)
+			if (!__l2cap_get_sock_by_addr(cpu_to_le16(psm), src)) {
+				l2cap_pi(sk)->psm   = cpu_to_le16(psm);
+				l2cap_pi(sk)->sport = cpu_to_le16(psm);
+				err = 0;
+				break;
+			}
+
+		write_unlock_bh(&l2cap_sk_list.lock);
+
+		if (err < 0)
+			goto done;
+	}
+
+	sk->sk_max_ack_backlog = backlog;
+	sk->sk_ack_backlog = 0;
+	sk->sk_state = BT_LISTEN;
+
+done:
+	release_sock(sk);
+	return err;
+}
+
+static int l2cap_sock_accept(struct socket *sock, struct socket *newsock, int flags)
+{
+	DECLARE_WAITQUEUE(wait, current);
+	struct sock *sk = sock->sk, *nsk;
+	long timeo;
+	int err = 0;
+
+	lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
+
+	if (sk->sk_state != BT_LISTEN) {
+		err = -EBADFD;
+		goto done;
+	}
+
+	timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
+
+	BT_DBG("sk %p timeo %ld", sk, timeo);
+
+	/* Wait for an incoming connection. (wake-one). */
+	add_wait_queue_exclusive(sk_sleep(sk), &wait);
+	while (!(nsk = bt_accept_dequeue(sk, newsock))) {
+		set_current_state(TASK_INTERRUPTIBLE);
+		if (!timeo) {
+			err = -EAGAIN;
+			break;
+		}
+
+		release_sock(sk);
+		timeo = schedule_timeout(timeo);
+		lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
+
+		if (sk->sk_state != BT_LISTEN) {
+			err = -EBADFD;
+			break;
+		}
+
+		if (signal_pending(current)) {
+			err = sock_intr_errno(timeo);
+			break;
+		}
+	}
+	set_current_state(TASK_RUNNING);
+	remove_wait_queue(sk_sleep(sk), &wait);
+
+	if (err)
+		goto done;
+
+	newsock->state = SS_CONNECTED;
+
+	BT_DBG("new socket %p", nsk);
+
+done:
+	release_sock(sk);
+	return err;
+}
+
+static int l2cap_sock_getname(struct socket *sock, struct sockaddr *addr, int *len, int peer)
+{
+	struct sockaddr_l2 *la = (struct sockaddr_l2 *) addr;
+	struct sock *sk = sock->sk;
+
+	BT_DBG("sock %p, sk %p", sock, sk);
+
+	addr->sa_family = AF_BLUETOOTH;
+	*len = sizeof(struct sockaddr_l2);
+
+	if (peer) {
+		la->l2_psm = l2cap_pi(sk)->psm;
+		bacpy(&la->l2_bdaddr, &bt_sk(sk)->dst);
+		la->l2_cid = cpu_to_le16(l2cap_pi(sk)->dcid);
+	} else {
+		la->l2_psm = l2cap_pi(sk)->sport;
+		bacpy(&la->l2_bdaddr, &bt_sk(sk)->src);
+		la->l2_cid = cpu_to_le16(l2cap_pi(sk)->scid);
+	}
+
+	return 0;
+}
+
+static int l2cap_sock_getsockopt_old(struct socket *sock, int optname, char __user *optval, int __user *optlen)
+{
+	struct sock *sk = sock->sk;
+	struct l2cap_options opts;
+	struct l2cap_conninfo cinfo;
+	int len, err = 0;
+	u32 opt;
+
+	BT_DBG("sk %p", sk);
+
+	if (get_user(len, optlen))
+		return -EFAULT;
+
+	lock_sock(sk);
+
+	switch (optname) {
+	case L2CAP_OPTIONS:
+		memset(&opts, 0, sizeof(opts));
+		opts.imtu     = l2cap_pi(sk)->imtu;
+		opts.omtu     = l2cap_pi(sk)->omtu;
+		opts.flush_to = l2cap_pi(sk)->flush_to;
+		opts.mode     = l2cap_pi(sk)->mode;
+		opts.fcs      = l2cap_pi(sk)->fcs;
+		opts.max_tx   = l2cap_pi(sk)->max_tx;
+		opts.txwin_size = (__u16)l2cap_pi(sk)->tx_win;
+
+		len = min_t(unsigned int, len, sizeof(opts));
+		if (copy_to_user(optval, (char *) &opts, len))
+			err = -EFAULT;
+
+		break;
+
+	case L2CAP_LM:
+		switch (l2cap_pi(sk)->sec_level) {
+		case BT_SECURITY_LOW:
+			opt = L2CAP_LM_AUTH;
+			break;
+		case BT_SECURITY_MEDIUM:
+			opt = L2CAP_LM_AUTH | L2CAP_LM_ENCRYPT;
+			break;
+		case BT_SECURITY_HIGH:
+			opt = L2CAP_LM_AUTH | L2CAP_LM_ENCRYPT |
+							L2CAP_LM_SECURE;
+			break;
+		default:
+			opt = 0;
+			break;
+		}
+
+		if (l2cap_pi(sk)->role_switch)
+			opt |= L2CAP_LM_MASTER;
+
+		if (l2cap_pi(sk)->force_reliable)
+			opt |= L2CAP_LM_RELIABLE;
+
+		if (put_user(opt, (u32 __user *) optval))
+			err = -EFAULT;
+		break;
+
+	case L2CAP_CONNINFO:
+		if (sk->sk_state != BT_CONNECTED &&
+					!(sk->sk_state == BT_CONNECT2 &&
+						bt_sk(sk)->defer_setup)) {
+			err = -ENOTCONN;
+			break;
+		}
+
+		cinfo.hci_handle = l2cap_pi(sk)->conn->hcon->handle;
+		memcpy(cinfo.dev_class, l2cap_pi(sk)->conn->hcon->dev_class, 3);
+
+		len = min_t(unsigned int, len, sizeof(cinfo));
+		if (copy_to_user(optval, (char *) &cinfo, len))
+			err = -EFAULT;
+
+		break;
+
+	default:
+		err = -ENOPROTOOPT;
+		break;
+	}
+
+	release_sock(sk);
+	return err;
+}
+
+static int l2cap_sock_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen)
+{
+	struct sock *sk = sock->sk;
+	struct bt_security sec;
+	int len, err = 0;
+
+	BT_DBG("sk %p", sk);
+
+	if (level == SOL_L2CAP)
+		return l2cap_sock_getsockopt_old(sock, optname, optval, optlen);
+
+	if (level != SOL_BLUETOOTH)
+		return -ENOPROTOOPT;
+
+	if (get_user(len, optlen))
+		return -EFAULT;
+
+	lock_sock(sk);
+
+	switch (optname) {
+	case BT_SECURITY:
+		if (sk->sk_type != SOCK_SEQPACKET && sk->sk_type != SOCK_STREAM
+				&& sk->sk_type != SOCK_RAW) {
+			err = -EINVAL;
+			break;
+		}
+
+		sec.level = l2cap_pi(sk)->sec_level;
+
+		len = min_t(unsigned int, len, sizeof(sec));
+		if (copy_to_user(optval, (char *) &sec, len))
+			err = -EFAULT;
+
+		break;
+
+	case BT_DEFER_SETUP:
+		if (sk->sk_state != BT_BOUND && sk->sk_state != BT_LISTEN) {
+			err = -EINVAL;
+			break;
+		}
+
+		if (put_user(bt_sk(sk)->defer_setup, (u32 __user *) optval))
+			err = -EFAULT;
+
+		break;
+
+	case BT_FLUSHABLE:
+		if (put_user(l2cap_pi(sk)->flushable, (u32 __user *) optval))
+			err = -EFAULT;
+
+		break;
+
+	default:
+		err = -ENOPROTOOPT;
+		break;
+	}
+
+	release_sock(sk);
+	return err;
+}
+
+static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, char __user *optval, unsigned int optlen)
+{
+	struct sock *sk = sock->sk;
+	struct l2cap_options opts;
+	int len, err = 0;
+	u32 opt;
+
+	BT_DBG("sk %p", sk);
+
+	lock_sock(sk);
+
+	switch (optname) {
+	case L2CAP_OPTIONS:
+		if (sk->sk_state == BT_CONNECTED) {
+			err = -EINVAL;
+			break;
+		}
+
+		opts.imtu     = l2cap_pi(sk)->imtu;
+		opts.omtu     = l2cap_pi(sk)->omtu;
+		opts.flush_to = l2cap_pi(sk)->flush_to;
+		opts.mode     = l2cap_pi(sk)->mode;
+		opts.fcs      = l2cap_pi(sk)->fcs;
+		opts.max_tx   = l2cap_pi(sk)->max_tx;
+		opts.txwin_size = (__u16)l2cap_pi(sk)->tx_win;
+
+		len = min_t(unsigned int, sizeof(opts), optlen);
+		if (copy_from_user((char *) &opts, optval, len)) {
+			err = -EFAULT;
+			break;
+		}
+
+		if (opts.txwin_size > L2CAP_DEFAULT_TX_WINDOW) {
+			err = -EINVAL;
+			break;
+		}
+
+		l2cap_pi(sk)->mode = opts.mode;
+		switch (l2cap_pi(sk)->mode) {
+		case L2CAP_MODE_BASIC:
+			l2cap_pi(sk)->conf_state &= ~L2CAP_CONF_STATE2_DEVICE;
+			break;
+		case L2CAP_MODE_ERTM:
+		case L2CAP_MODE_STREAMING:
+			if (!disable_ertm)
+				break;
+			/* fall through */
+		default:
+			err = -EINVAL;
+			break;
+		}
+
+		l2cap_pi(sk)->imtu = opts.imtu;
+		l2cap_pi(sk)->omtu = opts.omtu;
+		l2cap_pi(sk)->fcs  = opts.fcs;
+		l2cap_pi(sk)->max_tx = opts.max_tx;
+		l2cap_pi(sk)->tx_win = (__u8)opts.txwin_size;
+		break;
+
+	case L2CAP_LM:
+		if (get_user(opt, (u32 __user *) optval)) {
+			err = -EFAULT;
+			break;
+		}
+
+		if (opt & L2CAP_LM_AUTH)
+			l2cap_pi(sk)->sec_level = BT_SECURITY_LOW;
+		if (opt & L2CAP_LM_ENCRYPT)
+			l2cap_pi(sk)->sec_level = BT_SECURITY_MEDIUM;
+		if (opt & L2CAP_LM_SECURE)
+			l2cap_pi(sk)->sec_level = BT_SECURITY_HIGH;
+
+		l2cap_pi(sk)->role_switch    = (opt & L2CAP_LM_MASTER);
+		l2cap_pi(sk)->force_reliable = (opt & L2CAP_LM_RELIABLE);
+		break;
+
+	default:
+		err = -ENOPROTOOPT;
+		break;
+	}
+
+	release_sock(sk);
+	return err;
+}
+
+static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen)
+{
+	struct sock *sk = sock->sk;
+	struct bt_security sec;
+	int len, err = 0;
+	u32 opt;
+
+	BT_DBG("sk %p", sk);
+
+	if (level == SOL_L2CAP)
+		return l2cap_sock_setsockopt_old(sock, optname, optval, optlen);
+
+	if (level != SOL_BLUETOOTH)
+		return -ENOPROTOOPT;
+
+	lock_sock(sk);
+
+	switch (optname) {
+	case BT_SECURITY:
+		if (sk->sk_type != SOCK_SEQPACKET && sk->sk_type != SOCK_STREAM
+				&& sk->sk_type != SOCK_RAW) {
+			err = -EINVAL;
+			break;
+		}
+
+		sec.level = BT_SECURITY_LOW;
+
+		len = min_t(unsigned int, sizeof(sec), optlen);
+		if (copy_from_user((char *) &sec, optval, len)) {
+			err = -EFAULT;
+			break;
+		}
+
+		if (sec.level < BT_SECURITY_LOW ||
+					sec.level > BT_SECURITY_HIGH) {
+			err = -EINVAL;
+			break;
+		}
+
+		l2cap_pi(sk)->sec_level = sec.level;
+		break;
+
+	case BT_DEFER_SETUP:
+		if (sk->sk_state != BT_BOUND && sk->sk_state != BT_LISTEN) {
+			err = -EINVAL;
+			break;
+		}
+
+		if (get_user(opt, (u32 __user *) optval)) {
+			err = -EFAULT;
+			break;
+		}
+
+		bt_sk(sk)->defer_setup = opt;
+		break;
+
+	case BT_FLUSHABLE:
+		if (get_user(opt, (u32 __user *) optval)) {
+			err = -EFAULT;
+			break;
+		}
+
+		if (opt > BT_FLUSHABLE_ON) {
+			err = -EINVAL;
+			break;
+		}
+
+		if (opt == BT_FLUSHABLE_OFF) {
+			struct l2cap_conn *conn = l2cap_pi(sk)->conn;
+			/* proceed futher only when we have l2cap_conn and
+			   No Flush support in the LM */
+			if (!conn || !lmp_no_flush_capable(conn->hcon->hdev)) {
+				err = -EINVAL;
+				break;
+			}
+		}
+
+		l2cap_pi(sk)->flushable = opt;
+		break;
+
+	default:
+		err = -ENOPROTOOPT;
+		break;
+	}
+
+	release_sock(sk);
+	return err;
+}
+
+static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t len)
+{
+	struct sock *sk = sock->sk;
+	struct l2cap_pinfo *pi = l2cap_pi(sk);
+	struct sk_buff *skb;
+	u16 control;
+	int err;
+
+	BT_DBG("sock %p, sk %p", sock, sk);
+
+	err = sock_error(sk);
+	if (err)
+		return err;
+
+	if (msg->msg_flags & MSG_OOB)
+		return -EOPNOTSUPP;
+
+	lock_sock(sk);
+
+	if (sk->sk_state != BT_CONNECTED) {
+		err = -ENOTCONN;
+		goto done;
+	}
+
+	/* Connectionless channel */
+	if (sk->sk_type == SOCK_DGRAM) {
+		skb = l2cap_create_connless_pdu(sk, msg, len);
+		if (IS_ERR(skb)) {
+			err = PTR_ERR(skb);
+		} else {
+			l2cap_do_send(sk, skb);
+			err = len;
+		}
+		goto done;
+	}
+
+	switch (pi->mode) {
+	case L2CAP_MODE_BASIC:
+		/* Check outgoing MTU */
+		if (len > pi->omtu) {
+			err = -EMSGSIZE;
+			goto done;
+		}
+
+		/* Create a basic PDU */
+		skb = l2cap_create_basic_pdu(sk, msg, len);
+		if (IS_ERR(skb)) {
+			err = PTR_ERR(skb);
+			goto done;
+		}
+
+		l2cap_do_send(sk, skb);
+		err = len;
+		break;
+
+	case L2CAP_MODE_ERTM:
+	case L2CAP_MODE_STREAMING:
+		/* Entire SDU fits into one PDU */
+		if (len <= pi->remote_mps) {
+			control = L2CAP_SDU_UNSEGMENTED;
+			skb = l2cap_create_iframe_pdu(sk, msg, len, control, 0);
+			if (IS_ERR(skb)) {
+				err = PTR_ERR(skb);
+				goto done;
+			}
+			__skb_queue_tail(TX_QUEUE(sk), skb);
+
+			if (sk->sk_send_head == NULL)
+				sk->sk_send_head = skb;
+
+		} else {
+		/* Segment SDU into multiples PDUs */
+			err = l2cap_sar_segment_sdu(sk, msg, len);
+			if (err < 0)
+				goto done;
+		}
+
+		if (pi->mode == L2CAP_MODE_STREAMING) {
+			l2cap_streaming_send(sk);
+		} else {
+			if ((pi->conn_state & L2CAP_CONN_REMOTE_BUSY) &&
+					(pi->conn_state & L2CAP_CONN_WAIT_F)) {
+				err = len;
+				break;
+			}
+			err = l2cap_ertm_send(sk);
+		}
+
+		if (err >= 0)
+			err = len;
+		break;
+
+	default:
+		BT_DBG("bad state %1.1x", pi->mode);
+		err = -EBADFD;
+	}
+
+done:
+	release_sock(sk);
+	return err;
+}
+
+static int l2cap_sock_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t len, int flags)
+{
+	struct sock *sk = sock->sk;
+
+	lock_sock(sk);
+
+	if (sk->sk_state == BT_CONNECT2 && bt_sk(sk)->defer_setup) {
+		struct l2cap_conn_rsp rsp;
+		struct l2cap_conn *conn = l2cap_pi(sk)->conn;
+		u8 buf[128];
+
+		sk->sk_state = BT_CONFIG;
+
+		rsp.scid   = cpu_to_le16(l2cap_pi(sk)->dcid);
+		rsp.dcid   = cpu_to_le16(l2cap_pi(sk)->scid);
+		rsp.result = cpu_to_le16(L2CAP_CR_SUCCESS);
+		rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO);
+		l2cap_send_cmd(l2cap_pi(sk)->conn, l2cap_pi(sk)->ident,
+					L2CAP_CONN_RSP, sizeof(rsp), &rsp);
+
+		if (l2cap_pi(sk)->conf_state & L2CAP_CONF_REQ_SENT) {
+			release_sock(sk);
+			return 0;
+		}
+
+		l2cap_pi(sk)->conf_state |= L2CAP_CONF_REQ_SENT;
+		l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ,
+				l2cap_build_conf_req(sk, buf), buf);
+		l2cap_pi(sk)->num_conf_req++;
+
+		release_sock(sk);
+		return 0;
+	}
+
+	release_sock(sk);
+
+	if (sock->type == SOCK_STREAM)
+		return bt_sock_stream_recvmsg(iocb, sock, msg, len, flags);
+
+	return bt_sock_recvmsg(iocb, sock, msg, len, flags);
+}
+
+/* Kill socket (only if zapped and orphan)
+ * Must be called on unlocked socket.
+ */
+void l2cap_sock_kill(struct sock *sk)
+{
+	if (!sock_flag(sk, SOCK_ZAPPED) || sk->sk_socket)
+		return;
+
+	BT_DBG("sk %p state %d", sk, sk->sk_state);
+
+	/* Kill poor orphan */
+	bt_sock_unlink(&l2cap_sk_list, sk);
+	sock_set_flag(sk, SOCK_DEAD);
+	sock_put(sk);
+}
+
+/* Must be called on unlocked socket. */
+static void l2cap_sock_close(struct sock *sk)
+{
+	l2cap_sock_clear_timer(sk);
+	lock_sock(sk);
+	__l2cap_sock_close(sk, ECONNRESET);
+	release_sock(sk);
+	l2cap_sock_kill(sk);
+}
+
+static void l2cap_sock_cleanup_listen(struct sock *parent)
+{
+	struct sock *sk;
+
+	BT_DBG("parent %p", parent);
+
+	/* Close not yet accepted channels */
+	while ((sk = bt_accept_dequeue(parent, NULL)))
+		l2cap_sock_close(sk);
+
+	parent->sk_state = BT_CLOSED;
+	sock_set_flag(parent, SOCK_ZAPPED);
+}
+
+void __l2cap_sock_close(struct sock *sk, int reason)
+{
+	struct l2cap_conn *conn = l2cap_pi(sk)->conn;
+
+	BT_DBG("sk %p state %d socket %p", sk, sk->sk_state, sk->sk_socket);
+
+	switch (sk->sk_state) {
+	case BT_LISTEN:
+		l2cap_sock_cleanup_listen(sk);
+		break;
+
+	case BT_CONNECTED:
+	case BT_CONFIG:
+		if ((sk->sk_type == SOCK_SEQPACKET ||
+					sk->sk_type == SOCK_STREAM) &&
+					conn->hcon->type == ACL_LINK) {
+			l2cap_sock_set_timer(sk, sk->sk_sndtimeo);
+			l2cap_send_disconn_req(conn, sk, reason);
+		} else
+			l2cap_chan_del(sk, reason);
+		break;
+
+	case BT_CONNECT2:
+		if ((sk->sk_type == SOCK_SEQPACKET ||
+					sk->sk_type == SOCK_STREAM) &&
+					conn->hcon->type == ACL_LINK) {
+			struct l2cap_conn_rsp rsp;
+			__u16 result;
+
+			if (bt_sk(sk)->defer_setup)
+				result = L2CAP_CR_SEC_BLOCK;
+			else
+				result = L2CAP_CR_BAD_PSM;
+
+			rsp.scid   = cpu_to_le16(l2cap_pi(sk)->dcid);
+			rsp.dcid   = cpu_to_le16(l2cap_pi(sk)->scid);
+			rsp.result = cpu_to_le16(result);
+			rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO);
+			l2cap_send_cmd(conn, l2cap_pi(sk)->ident,
+					L2CAP_CONN_RSP, sizeof(rsp), &rsp);
+		} else
+			l2cap_chan_del(sk, reason);
+		break;
+
+	case BT_CONNECT:
+	case BT_DISCONN:
+		l2cap_chan_del(sk, reason);
+		break;
+
+	default:
+		sock_set_flag(sk, SOCK_ZAPPED);
+		break;
+	}
+}
+
+static int l2cap_sock_shutdown(struct socket *sock, int how)
+{
+	struct sock *sk = sock->sk;
+	int err = 0;
+
+	BT_DBG("sock %p, sk %p", sock, sk);
+
+	if (!sk)
+		return 0;
+
+	lock_sock(sk);
+	if (!sk->sk_shutdown) {
+		if (l2cap_pi(sk)->mode == L2CAP_MODE_ERTM)
+			err = __l2cap_wait_ack(sk);
+
+		sk->sk_shutdown = SHUTDOWN_MASK;
+		l2cap_sock_clear_timer(sk);
+		__l2cap_sock_close(sk, 0);
+
+		if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime)
+			err = bt_sock_wait_state(sk, BT_CLOSED,
+							sk->sk_lingertime);
+	}
+
+	if (!err && sk->sk_err)
+		err = -sk->sk_err;
+
+	release_sock(sk);
+	return err;
+}
+
+static int l2cap_sock_release(struct socket *sock)
+{
+	struct sock *sk = sock->sk;
+	int err;
+
+	BT_DBG("sock %p, sk %p", sock, sk);
+
+	if (!sk)
+		return 0;
+
+	err = l2cap_sock_shutdown(sock, 2);
+
+	sock_orphan(sk);
+	l2cap_sock_kill(sk);
+	return err;
+}
+
+static void l2cap_sock_destruct(struct sock *sk)
+{
+	BT_DBG("sk %p", sk);
+
+	skb_queue_purge(&sk->sk_receive_queue);
+	skb_queue_purge(&sk->sk_write_queue);
+}
+
+void l2cap_sock_init(struct sock *sk, struct sock *parent)
+{
+	struct l2cap_pinfo *pi = l2cap_pi(sk);
+
+	BT_DBG("sk %p", sk);
+
+	if (parent) {
+		sk->sk_type = parent->sk_type;
+		bt_sk(sk)->defer_setup = bt_sk(parent)->defer_setup;
+
+		pi->imtu = l2cap_pi(parent)->imtu;
+		pi->omtu = l2cap_pi(parent)->omtu;
+		pi->conf_state = l2cap_pi(parent)->conf_state;
+		pi->mode = l2cap_pi(parent)->mode;
+		pi->fcs  = l2cap_pi(parent)->fcs;
+		pi->max_tx = l2cap_pi(parent)->max_tx;
+		pi->tx_win = l2cap_pi(parent)->tx_win;
+		pi->sec_level = l2cap_pi(parent)->sec_level;
+		pi->role_switch = l2cap_pi(parent)->role_switch;
+		pi->force_reliable = l2cap_pi(parent)->force_reliable;
+		pi->flushable = l2cap_pi(parent)->flushable;
+	} else {
+		pi->imtu = L2CAP_DEFAULT_MTU;
+		pi->omtu = 0;
+		if (!disable_ertm && sk->sk_type == SOCK_STREAM) {
+			pi->mode = L2CAP_MODE_ERTM;
+			pi->conf_state |= L2CAP_CONF_STATE2_DEVICE;
+		} else {
+			pi->mode = L2CAP_MODE_BASIC;
+		}
+		pi->max_tx = L2CAP_DEFAULT_MAX_TX;
+		pi->fcs  = L2CAP_FCS_CRC16;
+		pi->tx_win = L2CAP_DEFAULT_TX_WINDOW;
+		pi->sec_level = BT_SECURITY_LOW;
+		pi->role_switch = 0;
+		pi->force_reliable = 0;
+		pi->flushable = BT_FLUSHABLE_OFF;
+	}
+
+	/* Default config options */
+	pi->conf_len = 0;
+	pi->flush_to = L2CAP_DEFAULT_FLUSH_TO;
+	skb_queue_head_init(TX_QUEUE(sk));
+	skb_queue_head_init(SREJ_QUEUE(sk));
+	skb_queue_head_init(BUSY_QUEUE(sk));
+	INIT_LIST_HEAD(SREJ_LIST(sk));
+}
+
+static struct proto l2cap_proto = {
+	.name		= "L2CAP",
+	.owner		= THIS_MODULE,
+	.obj_size	= sizeof(struct l2cap_pinfo)
+};
+
+struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio)
+{
+	struct sock *sk;
+
+	sk = sk_alloc(net, PF_BLUETOOTH, prio, &l2cap_proto);
+	if (!sk)
+		return NULL;
+
+	sock_init_data(sock, sk);
+	INIT_LIST_HEAD(&bt_sk(sk)->accept_q);
+
+	sk->sk_destruct = l2cap_sock_destruct;
+	sk->sk_sndtimeo = msecs_to_jiffies(L2CAP_CONN_TIMEOUT);
+
+	sock_reset_flag(sk, SOCK_ZAPPED);
+
+	sk->sk_protocol = proto;
+	sk->sk_state = BT_OPEN;
+
+	setup_timer(&sk->sk_timer, l2cap_sock_timeout, (unsigned long) sk);
+
+	bt_sock_link(&l2cap_sk_list, sk);
+	return sk;
+}
+
+static int l2cap_sock_create(struct net *net, struct socket *sock, int protocol,
+			     int kern)
+{
+	struct sock *sk;
+
+	BT_DBG("sock %p", sock);
+
+	sock->state = SS_UNCONNECTED;
+
+	if (sock->type != SOCK_SEQPACKET && sock->type != SOCK_STREAM &&
+			sock->type != SOCK_DGRAM && sock->type != SOCK_RAW)
+		return -ESOCKTNOSUPPORT;
+
+	if (sock->type == SOCK_RAW && !kern && !capable(CAP_NET_RAW))
+		return -EPERM;
+
+	sock->ops = &l2cap_sock_ops;
+
+	sk = l2cap_sock_alloc(net, sock, protocol, GFP_ATOMIC);
+	if (!sk)
+		return -ENOMEM;
+
+	l2cap_sock_init(sk, NULL);
+	return 0;
+}
+
+const struct proto_ops l2cap_sock_ops = {
+	.family		= PF_BLUETOOTH,
+	.owner		= THIS_MODULE,
+	.release	= l2cap_sock_release,
+	.bind		= l2cap_sock_bind,
+	.connect	= l2cap_sock_connect,
+	.listen		= l2cap_sock_listen,
+	.accept		= l2cap_sock_accept,
+	.getname	= l2cap_sock_getname,
+	.sendmsg	= l2cap_sock_sendmsg,
+	.recvmsg	= l2cap_sock_recvmsg,
+	.poll		= bt_sock_poll,
+	.ioctl		= bt_sock_ioctl,
+	.mmap		= sock_no_mmap,
+	.socketpair	= sock_no_socketpair,
+	.shutdown	= l2cap_sock_shutdown,
+	.setsockopt	= l2cap_sock_setsockopt,
+	.getsockopt	= l2cap_sock_getsockopt
+};
+
+static const struct net_proto_family l2cap_sock_family_ops = {
+	.family	= PF_BLUETOOTH,
+	.owner	= THIS_MODULE,
+	.create	= l2cap_sock_create,
+};
+
+int __init l2cap_init_sockets(void)
+{
+	int err;
+
+	err = proto_register(&l2cap_proto, 0);
+	if (err < 0)
+		return err;
+
+	err = bt_sock_register(BTPROTO_L2CAP, &l2cap_sock_family_ops);
+	if (err < 0)
+		goto error;
+
+	BT_INFO("L2CAP socket layer initialized");
+
+	return 0;
+
+error:
+	BT_ERR("L2CAP socket registration failed");
+	proto_unregister(&l2cap_proto);
+	return err;
+}
+
+void l2cap_cleanup_sockets(void)
+{
+	if (bt_sock_unregister(BTPROTO_L2CAP) < 0)
+		BT_ERR("L2CAP socket unregistration failed");
+
+	proto_unregister(&l2cap_proto);
+}
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index f827fd90..f5ef7a3 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -22,7 +22,7 @@
 
 /* Bluetooth HCI Management interface */
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/unaligned.h>
 
 #include <net/bluetooth/bluetooth.h>
@@ -32,6 +32,16 @@
 #define MGMT_VERSION	0
 #define MGMT_REVISION	1
 
+struct pending_cmd {
+	struct list_head list;
+	__u16 opcode;
+	int index;
+	void *cmd;
+	struct sock *sk;
+};
+
+LIST_HEAD(cmd_list);
+
 static int cmd_status(struct sock *sk, u16 cmd, u8 status)
 {
 	struct sk_buff *skb;
@@ -59,29 +69,26 @@ static int cmd_status(struct sock *sk, u16 cmd, u8 status)
 	return 0;
 }
 
-static int read_version(struct sock *sk)
+static int cmd_complete(struct sock *sk, u16 cmd, void *rp, size_t rp_len)
 {
 	struct sk_buff *skb;
 	struct mgmt_hdr *hdr;
 	struct mgmt_ev_cmd_complete *ev;
-	struct mgmt_rp_read_version *rp;
 
 	BT_DBG("sock %p", sk);
 
-	skb = alloc_skb(sizeof(*hdr) + sizeof(*ev) + sizeof(*rp), GFP_ATOMIC);
+	skb = alloc_skb(sizeof(*hdr) + sizeof(*ev) + rp_len, GFP_ATOMIC);
 	if (!skb)
 		return -ENOMEM;
 
 	hdr = (void *) skb_put(skb, sizeof(*hdr));
-	hdr->opcode = cpu_to_le16(MGMT_EV_CMD_COMPLETE);
-	hdr->len = cpu_to_le16(sizeof(*ev) + sizeof(*rp));
 
-	ev = (void *) skb_put(skb, sizeof(*ev));
-	put_unaligned_le16(MGMT_OP_READ_VERSION, &ev->opcode);
+	hdr->opcode = cpu_to_le16(MGMT_EV_CMD_COMPLETE);
+	hdr->len = cpu_to_le16(sizeof(*ev) + rp_len);
 
-	rp = (void *) skb_put(skb, sizeof(*rp));
-	rp->version = MGMT_VERSION;
-	put_unaligned_le16(MGMT_REVISION, &rp->revision);
+	ev = (void *) skb_put(skb, sizeof(*ev) + rp_len);
+	put_unaligned_le16(cmd, &ev->opcode);
+	memcpy(ev->data, rp, rp_len);
 
 	if (sock_queue_rcv_skb(sk, skb) < 0)
 		kfree_skb(skb);
@@ -89,16 +96,25 @@ static int read_version(struct sock *sk)
 	return 0;
 }
 
+static int read_version(struct sock *sk)
+{
+	struct mgmt_rp_read_version rp;
+
+	BT_DBG("sock %p", sk);
+
+	rp.version = MGMT_VERSION;
+	put_unaligned_le16(MGMT_REVISION, &rp.revision);
+
+	return cmd_complete(sk, MGMT_OP_READ_VERSION, &rp, sizeof(rp));
+}
+
 static int read_index_list(struct sock *sk)
 {
-	struct sk_buff *skb;
-	struct mgmt_hdr *hdr;
-	struct mgmt_ev_cmd_complete *ev;
 	struct mgmt_rp_read_index_list *rp;
 	struct list_head *p;
-	size_t body_len;
+	size_t rp_len;
 	u16 count;
-	int i;
+	int i, err;
 
 	BT_DBG("sock %p", sk);
 
@@ -109,43 +125,43 @@ static int read_index_list(struct sock *sk)
 		count++;
 	}
 
-	body_len = sizeof(*ev) + sizeof(*rp) + (2 * count);
-	skb = alloc_skb(sizeof(*hdr) + body_len, GFP_ATOMIC);
-	if (!skb)
+	rp_len = sizeof(*rp) + (2 * count);
+	rp = kmalloc(rp_len, GFP_ATOMIC);
+	if (!rp) {
+		read_unlock(&hci_dev_list_lock);
 		return -ENOMEM;
+	}
 
-	hdr = (void *) skb_put(skb, sizeof(*hdr));
-	hdr->opcode = cpu_to_le16(MGMT_EV_CMD_COMPLETE);
-	hdr->len = cpu_to_le16(body_len);
-
-	ev = (void *) skb_put(skb, sizeof(*ev));
-	put_unaligned_le16(MGMT_OP_READ_INDEX_LIST, &ev->opcode);
-
-	rp = (void *) skb_put(skb, sizeof(*rp) + (2 * count));
 	put_unaligned_le16(count, &rp->num_controllers);
 
 	i = 0;
 	list_for_each(p, &hci_dev_list) {
 		struct hci_dev *d = list_entry(p, struct hci_dev, list);
+
+		hci_del_off_timer(d);
+
+		set_bit(HCI_MGMT, &d->flags);
+
+		if (test_bit(HCI_SETUP, &d->flags))
+			continue;
+
 		put_unaligned_le16(d->id, &rp->index[i++]);
 		BT_DBG("Added hci%u", d->id);
 	}
 
 	read_unlock(&hci_dev_list_lock);
 
-	if (sock_queue_rcv_skb(sk, skb) < 0)
-		kfree_skb(skb);
+	err = cmd_complete(sk, MGMT_OP_READ_INDEX_LIST, rp, rp_len);
 
-	return 0;
+	kfree(rp);
+
+	return err;
 }
 
 static int read_controller_info(struct sock *sk, unsigned char *data, u16 len)
 {
-	struct sk_buff *skb;
-	struct mgmt_hdr *hdr;
-	struct mgmt_ev_cmd_complete *ev;
-	struct mgmt_rp_read_info *rp;
-	struct mgmt_cp_read_info *cp;
+	struct mgmt_rp_read_info rp;
+	struct mgmt_cp_read_info *cp = (void *) data;
 	struct hci_dev *hdev;
 	u16 dev_id;
 
@@ -154,18 +170,333 @@ static int read_controller_info(struct sock *sk, unsigned char *data, u16 len)
 	if (len != 2)
 		return cmd_status(sk, MGMT_OP_READ_INFO, EINVAL);
 
-	skb = alloc_skb(sizeof(*hdr) + sizeof(*ev) + sizeof(*rp), GFP_ATOMIC);
+	dev_id = get_unaligned_le16(&cp->index);
+
+	BT_DBG("request for hci%u", dev_id);
+
+	hdev = hci_dev_get(dev_id);
+	if (!hdev)
+		return cmd_status(sk, MGMT_OP_READ_INFO, ENODEV);
+
+	hci_del_off_timer(hdev);
+
+	hci_dev_lock_bh(hdev);
+
+	set_bit(HCI_MGMT, &hdev->flags);
+
+	put_unaligned_le16(hdev->id, &rp.index);
+	rp.type = hdev->dev_type;
+
+	rp.powered = test_bit(HCI_UP, &hdev->flags);
+	rp.connectable = test_bit(HCI_PSCAN, &hdev->flags);
+	rp.discoverable = test_bit(HCI_ISCAN, &hdev->flags);
+	rp.pairable = test_bit(HCI_PSCAN, &hdev->flags);
+
+	if (test_bit(HCI_AUTH, &hdev->flags))
+		rp.sec_mode = 3;
+	else if (hdev->ssp_mode > 0)
+		rp.sec_mode = 4;
+	else
+		rp.sec_mode = 2;
+
+	bacpy(&rp.bdaddr, &hdev->bdaddr);
+	memcpy(rp.features, hdev->features, 8);
+	memcpy(rp.dev_class, hdev->dev_class, 3);
+	put_unaligned_le16(hdev->manufacturer, &rp.manufacturer);
+	rp.hci_ver = hdev->hci_ver;
+	put_unaligned_le16(hdev->hci_rev, &rp.hci_rev);
+
+	hci_dev_unlock_bh(hdev);
+	hci_dev_put(hdev);
+
+	return cmd_complete(sk, MGMT_OP_READ_INFO, &rp, sizeof(rp));
+}
+
+static void mgmt_pending_free(struct pending_cmd *cmd)
+{
+	sock_put(cmd->sk);
+	kfree(cmd->cmd);
+	kfree(cmd);
+}
+
+static int mgmt_pending_add(struct sock *sk, u16 opcode, int index,
+							void *data, u16 len)
+{
+	struct pending_cmd *cmd;
+
+	cmd = kmalloc(sizeof(*cmd), GFP_ATOMIC);
+	if (!cmd)
+		return -ENOMEM;
+
+	cmd->opcode = opcode;
+	cmd->index = index;
+
+	cmd->cmd = kmalloc(len, GFP_ATOMIC);
+	if (!cmd->cmd) {
+		kfree(cmd);
+		return -ENOMEM;
+	}
+
+	memcpy(cmd->cmd, data, len);
+
+	cmd->sk = sk;
+	sock_hold(sk);
+
+	list_add(&cmd->list, &cmd_list);
+
+	return 0;
+}
+
+static void mgmt_pending_foreach(u16 opcode, int index,
+				void (*cb)(struct pending_cmd *cmd, void *data),
+				void *data)
+{
+	struct list_head *p, *n;
+
+	list_for_each_safe(p, n, &cmd_list) {
+		struct pending_cmd *cmd;
+
+		cmd = list_entry(p, struct pending_cmd, list);
+
+		if (cmd->opcode != opcode)
+			continue;
+
+		if (index >= 0 && cmd->index != index)
+			continue;
+
+		cb(cmd, data);
+	}
+}
+
+static struct pending_cmd *mgmt_pending_find(u16 opcode, int index)
+{
+	struct list_head *p;
+
+	list_for_each(p, &cmd_list) {
+		struct pending_cmd *cmd;
+
+		cmd = list_entry(p, struct pending_cmd, list);
+
+		if (cmd->opcode != opcode)
+			continue;
+
+		if (index >= 0 && cmd->index != index)
+			continue;
+
+		return cmd;
+	}
+
+	return NULL;
+}
+
+static void mgmt_pending_remove(u16 opcode, int index)
+{
+	struct pending_cmd *cmd;
+
+	cmd = mgmt_pending_find(opcode, index);
+	if (cmd == NULL)
+		return;
+
+	list_del(&cmd->list);
+	mgmt_pending_free(cmd);
+}
+
+static int set_powered(struct sock *sk, unsigned char *data, u16 len)
+{
+	struct mgmt_mode *cp;
+	struct hci_dev *hdev;
+	u16 dev_id;
+	int ret, up;
+
+	cp = (void *) data;
+	dev_id = get_unaligned_le16(&cp->index);
+
+	BT_DBG("request for hci%u", dev_id);
+
+	hdev = hci_dev_get(dev_id);
+	if (!hdev)
+		return cmd_status(sk, MGMT_OP_SET_POWERED, ENODEV);
+
+	hci_dev_lock_bh(hdev);
+
+	up = test_bit(HCI_UP, &hdev->flags);
+	if ((cp->val && up) || (!cp->val && !up)) {
+		ret = cmd_status(sk, MGMT_OP_SET_POWERED, EALREADY);
+		goto failed;
+	}
+
+	if (mgmt_pending_find(MGMT_OP_SET_POWERED, dev_id)) {
+		ret = cmd_status(sk, MGMT_OP_SET_POWERED, EBUSY);
+		goto failed;
+	}
+
+	ret = mgmt_pending_add(sk, MGMT_OP_SET_POWERED, dev_id, data, len);
+	if (ret < 0)
+		goto failed;
+
+	if (cp->val)
+		queue_work(hdev->workqueue, &hdev->power_on);
+	else
+		queue_work(hdev->workqueue, &hdev->power_off);
+
+	ret = 0;
+
+failed:
+	hci_dev_unlock_bh(hdev);
+	hci_dev_put(hdev);
+	return ret;
+}
+
+static int set_discoverable(struct sock *sk, unsigned char *data, u16 len)
+{
+	struct mgmt_mode *cp;
+	struct hci_dev *hdev;
+	u16 dev_id;
+	u8 scan;
+	int err;
+
+	cp = (void *) data;
+	dev_id = get_unaligned_le16(&cp->index);
+
+	BT_DBG("request for hci%u", dev_id);
+
+	hdev = hci_dev_get(dev_id);
+	if (!hdev)
+		return cmd_status(sk, MGMT_OP_SET_DISCOVERABLE, ENODEV);
+
+	hci_dev_lock_bh(hdev);
+
+	if (!test_bit(HCI_UP, &hdev->flags)) {
+		err = cmd_status(sk, MGMT_OP_SET_DISCOVERABLE, ENETDOWN);
+		goto failed;
+	}
+
+	if (mgmt_pending_find(MGMT_OP_SET_DISCOVERABLE, dev_id) ||
+			mgmt_pending_find(MGMT_OP_SET_CONNECTABLE, dev_id)) {
+		err = cmd_status(sk, MGMT_OP_SET_DISCOVERABLE, EBUSY);
+		goto failed;
+	}
+
+	if (cp->val == test_bit(HCI_ISCAN, &hdev->flags) &&
+					test_bit(HCI_PSCAN, &hdev->flags)) {
+		err = cmd_status(sk, MGMT_OP_SET_DISCOVERABLE, EALREADY);
+		goto failed;
+	}
+
+	err = mgmt_pending_add(sk, MGMT_OP_SET_DISCOVERABLE, dev_id, data, len);
+	if (err < 0)
+		goto failed;
+
+	scan = SCAN_PAGE;
+
+	if (cp->val)
+		scan |= SCAN_INQUIRY;
+
+	err = hci_send_cmd(hdev, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan);
+	if (err < 0)
+		mgmt_pending_remove(MGMT_OP_SET_DISCOVERABLE, dev_id);
+
+failed:
+	hci_dev_unlock_bh(hdev);
+	hci_dev_put(hdev);
+
+	return err;
+}
+
+static int set_connectable(struct sock *sk, unsigned char *data, u16 len)
+{
+	struct mgmt_mode *cp;
+	struct hci_dev *hdev;
+	u16 dev_id;
+	u8 scan;
+	int err;
+
+	cp = (void *) data;
+	dev_id = get_unaligned_le16(&cp->index);
+
+	BT_DBG("request for hci%u", dev_id);
+
+	hdev = hci_dev_get(dev_id);
+	if (!hdev)
+		return cmd_status(sk, MGMT_OP_SET_CONNECTABLE, ENODEV);
+
+	hci_dev_lock_bh(hdev);
+
+	if (!test_bit(HCI_UP, &hdev->flags)) {
+		err = cmd_status(sk, MGMT_OP_SET_CONNECTABLE, ENETDOWN);
+		goto failed;
+	}
+
+	if (mgmt_pending_find(MGMT_OP_SET_DISCOVERABLE, dev_id) ||
+			mgmt_pending_find(MGMT_OP_SET_CONNECTABLE, dev_id)) {
+		err = cmd_status(sk, MGMT_OP_SET_CONNECTABLE, EBUSY);
+		goto failed;
+	}
+
+	if (cp->val == test_bit(HCI_PSCAN, &hdev->flags)) {
+		err = cmd_status(sk, MGMT_OP_SET_CONNECTABLE, EALREADY);
+		goto failed;
+	}
+
+	err = mgmt_pending_add(sk, MGMT_OP_SET_CONNECTABLE, dev_id, data, len);
+	if (err < 0)
+		goto failed;
+
+	if (cp->val)
+		scan = SCAN_PAGE;
+	else
+		scan = 0;
+
+	err = hci_send_cmd(hdev, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan);
+	if (err < 0)
+		mgmt_pending_remove(MGMT_OP_SET_CONNECTABLE, dev_id);
+
+failed:
+	hci_dev_unlock_bh(hdev);
+	hci_dev_put(hdev);
+
+	return err;
+}
+
+static int mgmt_event(u16 event, void *data, u16 data_len, struct sock *skip_sk)
+{
+	struct sk_buff *skb;
+	struct mgmt_hdr *hdr;
+
+	skb = alloc_skb(sizeof(*hdr) + data_len, GFP_ATOMIC);
 	if (!skb)
 		return -ENOMEM;
 
+	bt_cb(skb)->channel = HCI_CHANNEL_CONTROL;
+
 	hdr = (void *) skb_put(skb, sizeof(*hdr));
-	hdr->opcode = cpu_to_le16(MGMT_EV_CMD_COMPLETE);
-	hdr->len = cpu_to_le16(sizeof(*ev) + sizeof(*rp));
+	hdr->opcode = cpu_to_le16(event);
+	hdr->len = cpu_to_le16(data_len);
 
-	ev = (void *) skb_put(skb, sizeof(*ev));
-	put_unaligned_le16(MGMT_OP_READ_INFO, &ev->opcode);
+	memcpy(skb_put(skb, data_len), data, data_len);
 
-	rp = (void *) skb_put(skb, sizeof(*rp));
+	hci_send_to_sock(NULL, skb, skip_sk);
+	kfree_skb(skb);
+
+	return 0;
+}
+
+static int send_mode_rsp(struct sock *sk, u16 opcode, u16 index, u8 val)
+{
+	struct mgmt_mode rp;
+
+	put_unaligned_le16(index, &rp.index);
+	rp.val = val;
+
+	return cmd_complete(sk, opcode, &rp, sizeof(rp));
+}
+
+static int set_pairable(struct sock *sk, unsigned char *data, u16 len)
+{
+	struct mgmt_mode *cp, ev;
+	struct hci_dev *hdev;
+	u16 dev_id;
+	int err;
 
 	cp = (void *) data;
 	dev_id = get_unaligned_le16(&cp->index);
@@ -173,43 +504,547 @@ static int read_controller_info(struct sock *sk, unsigned char *data, u16 len)
 	BT_DBG("request for hci%u", dev_id);
 
 	hdev = hci_dev_get(dev_id);
-	if (!hdev) {
-		kfree_skb(skb);
-		return cmd_status(sk, MGMT_OP_READ_INFO, ENODEV);
+	if (!hdev)
+		return cmd_status(sk, MGMT_OP_SET_PAIRABLE, ENODEV);
+
+	hci_dev_lock_bh(hdev);
+
+	if (cp->val)
+		set_bit(HCI_PAIRABLE, &hdev->flags);
+	else
+		clear_bit(HCI_PAIRABLE, &hdev->flags);
+
+	err = send_mode_rsp(sk, MGMT_OP_SET_PAIRABLE, dev_id, cp->val);
+	if (err < 0)
+		goto failed;
+
+	put_unaligned_le16(dev_id, &ev.index);
+	ev.val = cp->val;
+
+	err = mgmt_event(MGMT_EV_PAIRABLE, &ev, sizeof(ev), sk);
+
+failed:
+	hci_dev_unlock_bh(hdev);
+	hci_dev_put(hdev);
+
+	return err;
+}
+
+static u8 get_service_classes(struct hci_dev *hdev)
+{
+	struct list_head *p;
+	u8 val = 0;
+
+	list_for_each(p, &hdev->uuids) {
+		struct bt_uuid *uuid = list_entry(p, struct bt_uuid, list);
+
+		val |= uuid->svc_hint;
+	}
+
+	return val;
+}
+
+static int update_class(struct hci_dev *hdev)
+{
+	u8 cod[3];
+
+	BT_DBG("%s", hdev->name);
+
+	if (test_bit(HCI_SERVICE_CACHE, &hdev->flags))
+		return 0;
+
+	cod[0] = hdev->minor_class;
+	cod[1] = hdev->major_class;
+	cod[2] = get_service_classes(hdev);
+
+	if (memcmp(cod, hdev->dev_class, 3) == 0)
+		return 0;
+
+	return hci_send_cmd(hdev, HCI_OP_WRITE_CLASS_OF_DEV, sizeof(cod), cod);
+}
+
+static int add_uuid(struct sock *sk, unsigned char *data, u16 len)
+{
+	struct mgmt_cp_add_uuid *cp;
+	struct hci_dev *hdev;
+	struct bt_uuid *uuid;
+	u16 dev_id;
+	int err;
+
+	cp = (void *) data;
+	dev_id = get_unaligned_le16(&cp->index);
+
+	BT_DBG("request for hci%u", dev_id);
+
+	hdev = hci_dev_get(dev_id);
+	if (!hdev)
+		return cmd_status(sk, MGMT_OP_ADD_UUID, ENODEV);
+
+	hci_dev_lock_bh(hdev);
+
+	uuid = kmalloc(sizeof(*uuid), GFP_ATOMIC);
+	if (!uuid) {
+		err = -ENOMEM;
+		goto failed;
 	}
 
+	memcpy(uuid->uuid, cp->uuid, 16);
+	uuid->svc_hint = cp->svc_hint;
+
+	list_add(&uuid->list, &hdev->uuids);
+
+	err = update_class(hdev);
+	if (err < 0)
+		goto failed;
+
+	err = cmd_complete(sk, MGMT_OP_ADD_UUID, &dev_id, sizeof(dev_id));
+
+failed:
+	hci_dev_unlock_bh(hdev);
+	hci_dev_put(hdev);
+
+	return err;
+}
+
+static int remove_uuid(struct sock *sk, unsigned char *data, u16 len)
+{
+	struct list_head *p, *n;
+	struct mgmt_cp_add_uuid *cp;
+	struct hci_dev *hdev;
+	u8 bt_uuid_any[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+	u16 dev_id;
+	int err, found;
+
+	cp = (void *) data;
+	dev_id = get_unaligned_le16(&cp->index);
+
+	BT_DBG("request for hci%u", dev_id);
+
+	hdev = hci_dev_get(dev_id);
+	if (!hdev)
+		return cmd_status(sk, MGMT_OP_REMOVE_UUID, ENODEV);
+
 	hci_dev_lock_bh(hdev);
 
-	put_unaligned_le16(hdev->id, &rp->index);
-	rp->type = hdev->dev_type;
+	if (memcmp(cp->uuid, bt_uuid_any, 16) == 0) {
+		err = hci_uuids_clear(hdev);
+		goto unlock;
+	}
 
-	rp->powered = test_bit(HCI_UP, &hdev->flags);
-	rp->discoverable = test_bit(HCI_ISCAN, &hdev->flags);
-	rp->pairable = test_bit(HCI_PSCAN, &hdev->flags);
+	found = 0;
 
-	if (test_bit(HCI_AUTH, &hdev->flags))
-		rp->sec_mode = 3;
-	else if (hdev->ssp_mode > 0)
-		rp->sec_mode = 4;
-	else
-		rp->sec_mode = 2;
+	list_for_each_safe(p, n, &hdev->uuids) {
+		struct bt_uuid *match = list_entry(p, struct bt_uuid, list);
 
-	bacpy(&rp->bdaddr, &hdev->bdaddr);
-	memcpy(rp->features, hdev->features, 8);
-	memcpy(rp->dev_class, hdev->dev_class, 3);
-	put_unaligned_le16(hdev->manufacturer, &rp->manufacturer);
-	rp->hci_ver = hdev->hci_ver;
-	put_unaligned_le16(hdev->hci_rev, &rp->hci_rev);
+		if (memcmp(match->uuid, cp->uuid, 16) != 0)
+			continue;
 
+		list_del(&match->list);
+		found++;
+	}
+
+	if (found == 0) {
+		err = cmd_status(sk, MGMT_OP_REMOVE_UUID, ENOENT);
+		goto unlock;
+	}
+
+	err = update_class(hdev);
+	if (err < 0)
+		goto unlock;
+
+	err = cmd_complete(sk, MGMT_OP_REMOVE_UUID, &dev_id, sizeof(dev_id));
+
+unlock:
 	hci_dev_unlock_bh(hdev);
 	hci_dev_put(hdev);
 
-	if (sock_queue_rcv_skb(sk, skb) < 0)
-		kfree_skb(skb);
+	return err;
+}
+
+static int set_dev_class(struct sock *sk, unsigned char *data, u16 len)
+{
+	struct hci_dev *hdev;
+	struct mgmt_cp_set_dev_class *cp;
+	u16 dev_id;
+	int err;
+
+	cp = (void *) data;
+	dev_id = get_unaligned_le16(&cp->index);
+
+	BT_DBG("request for hci%u", dev_id);
+
+	hdev = hci_dev_get(dev_id);
+	if (!hdev)
+		return cmd_status(sk, MGMT_OP_SET_DEV_CLASS, ENODEV);
+
+	hci_dev_lock_bh(hdev);
+
+	hdev->major_class = cp->major;
+	hdev->minor_class = cp->minor;
+
+	err = update_class(hdev);
+
+	if (err == 0)
+		err = cmd_complete(sk, MGMT_OP_SET_DEV_CLASS, &dev_id,
+							sizeof(dev_id));
+
+	hci_dev_unlock_bh(hdev);
+	hci_dev_put(hdev);
+
+	return err;
+}
+
+static int set_service_cache(struct sock *sk, unsigned char *data, u16 len)
+{
+	struct hci_dev *hdev;
+	struct mgmt_cp_set_service_cache *cp;
+	u16 dev_id;
+	int err;
+
+	cp = (void *) data;
+	dev_id = get_unaligned_le16(&cp->index);
+
+	hdev = hci_dev_get(dev_id);
+	if (!hdev)
+		return cmd_status(sk, MGMT_OP_SET_SERVICE_CACHE, ENODEV);
+
+	hci_dev_lock_bh(hdev);
+
+	BT_DBG("hci%u enable %d", dev_id, cp->enable);
+
+	if (cp->enable) {
+		set_bit(HCI_SERVICE_CACHE, &hdev->flags);
+		err = 0;
+	} else {
+		clear_bit(HCI_SERVICE_CACHE, &hdev->flags);
+		err = update_class(hdev);
+	}
+
+	if (err == 0)
+		err = cmd_complete(sk, MGMT_OP_SET_SERVICE_CACHE, &dev_id,
+							sizeof(dev_id));
+
+	hci_dev_unlock_bh(hdev);
+	hci_dev_put(hdev);
+
+	return err;
+}
+
+static int load_keys(struct sock *sk, unsigned char *data, u16 len)
+{
+	struct hci_dev *hdev;
+	struct mgmt_cp_load_keys *cp;
+	u16 dev_id, key_count, expected_len;
+	int i;
+
+	cp = (void *) data;
+	dev_id = get_unaligned_le16(&cp->index);
+	key_count = get_unaligned_le16(&cp->key_count);
+
+	expected_len = sizeof(*cp) + key_count * sizeof(struct mgmt_key_info);
+	if (expected_len != len) {
+		BT_ERR("load_keys: expected %u bytes, got %u bytes",
+							len, expected_len);
+		return -EINVAL;
+	}
+
+	hdev = hci_dev_get(dev_id);
+	if (!hdev)
+		return cmd_status(sk, MGMT_OP_LOAD_KEYS, ENODEV);
+
+	BT_DBG("hci%u debug_keys %u key_count %u", dev_id, cp->debug_keys,
+								key_count);
+
+	hci_dev_lock_bh(hdev);
+
+	hci_link_keys_clear(hdev);
+
+	set_bit(HCI_LINK_KEYS, &hdev->flags);
+
+	if (cp->debug_keys)
+		set_bit(HCI_DEBUG_KEYS, &hdev->flags);
+	else
+		clear_bit(HCI_DEBUG_KEYS, &hdev->flags);
+
+	for (i = 0; i < key_count; i++) {
+		struct mgmt_key_info *key = &cp->keys[i];
+
+		hci_add_link_key(hdev, 0, &key->bdaddr, key->val, key->type,
+								key->pin_len);
+	}
+
+	hci_dev_unlock_bh(hdev);
+	hci_dev_put(hdev);
 
 	return 0;
 }
 
+static int remove_key(struct sock *sk, unsigned char *data, u16 len)
+{
+	struct hci_dev *hdev;
+	struct mgmt_cp_remove_key *cp;
+	struct hci_conn *conn;
+	u16 dev_id;
+	int err;
+
+	cp = (void *) data;
+	dev_id = get_unaligned_le16(&cp->index);
+
+	hdev = hci_dev_get(dev_id);
+	if (!hdev)
+		return cmd_status(sk, MGMT_OP_REMOVE_KEY, ENODEV);
+
+	hci_dev_lock_bh(hdev);
+
+	err = hci_remove_link_key(hdev, &cp->bdaddr);
+	if (err < 0) {
+		err = cmd_status(sk, MGMT_OP_REMOVE_KEY, -err);
+		goto unlock;
+	}
+
+	err = 0;
+
+	if (!test_bit(HCI_UP, &hdev->flags) || !cp->disconnect)
+		goto unlock;
+
+	conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &cp->bdaddr);
+	if (conn) {
+		struct hci_cp_disconnect dc;
+
+		put_unaligned_le16(conn->handle, &dc.handle);
+		dc.reason = 0x13; /* Remote User Terminated Connection */
+		err = hci_send_cmd(hdev, HCI_OP_DISCONNECT, 0, NULL);
+	}
+
+unlock:
+	hci_dev_unlock_bh(hdev);
+	hci_dev_put(hdev);
+
+	return err;
+}
+
+static int disconnect(struct sock *sk, unsigned char *data, u16 len)
+{
+	struct hci_dev *hdev;
+	struct mgmt_cp_disconnect *cp;
+	struct hci_cp_disconnect dc;
+	struct hci_conn *conn;
+	u16 dev_id;
+	int err;
+
+	BT_DBG("");
+
+	cp = (void *) data;
+	dev_id = get_unaligned_le16(&cp->index);
+
+	hdev = hci_dev_get(dev_id);
+	if (!hdev)
+		return cmd_status(sk, MGMT_OP_DISCONNECT, ENODEV);
+
+	hci_dev_lock_bh(hdev);
+
+	if (!test_bit(HCI_UP, &hdev->flags)) {
+		err = cmd_status(sk, MGMT_OP_DISCONNECT, ENETDOWN);
+		goto failed;
+	}
+
+	if (mgmt_pending_find(MGMT_OP_DISCONNECT, dev_id)) {
+		err = cmd_status(sk, MGMT_OP_DISCONNECT, EBUSY);
+		goto failed;
+	}
+
+	conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &cp->bdaddr);
+	if (!conn) {
+		err = cmd_status(sk, MGMT_OP_DISCONNECT, ENOTCONN);
+		goto failed;
+	}
+
+	err = mgmt_pending_add(sk, MGMT_OP_DISCONNECT, dev_id, data, len);
+	if (err < 0)
+		goto failed;
+
+	put_unaligned_le16(conn->handle, &dc.handle);
+	dc.reason = 0x13; /* Remote User Terminated Connection */
+
+	err = hci_send_cmd(hdev, HCI_OP_DISCONNECT, sizeof(dc), &dc);
+	if (err < 0)
+		mgmt_pending_remove(MGMT_OP_DISCONNECT, dev_id);
+
+failed:
+	hci_dev_unlock_bh(hdev);
+	hci_dev_put(hdev);
+
+	return err;
+}
+
+static int get_connections(struct sock *sk, unsigned char *data, u16 len)
+{
+	struct mgmt_cp_get_connections *cp;
+	struct mgmt_rp_get_connections *rp;
+	struct hci_dev *hdev;
+	struct list_head *p;
+	size_t rp_len;
+	u16 dev_id, count;
+	int i, err;
+
+	BT_DBG("");
+
+	cp = (void *) data;
+	dev_id = get_unaligned_le16(&cp->index);
+
+	hdev = hci_dev_get(dev_id);
+	if (!hdev)
+		return cmd_status(sk, MGMT_OP_GET_CONNECTIONS, ENODEV);
+
+	hci_dev_lock_bh(hdev);
+
+	count = 0;
+	list_for_each(p, &hdev->conn_hash.list) {
+		count++;
+	}
+
+	rp_len = sizeof(*rp) + (count * sizeof(bdaddr_t));
+	rp = kmalloc(rp_len, GFP_ATOMIC);
+	if (!rp) {
+		err = -ENOMEM;
+		goto unlock;
+	}
+
+	put_unaligned_le16(dev_id, &rp->index);
+	put_unaligned_le16(count, &rp->conn_count);
+
+	read_lock(&hci_dev_list_lock);
+
+	i = 0;
+	list_for_each(p, &hdev->conn_hash.list) {
+		struct hci_conn *c = list_entry(p, struct hci_conn, list);
+
+		bacpy(&rp->conn[i++], &c->dst);
+	}
+
+	read_unlock(&hci_dev_list_lock);
+
+	err = cmd_complete(sk, MGMT_OP_GET_CONNECTIONS, rp, rp_len);
+
+unlock:
+	kfree(rp);
+	hci_dev_unlock_bh(hdev);
+	hci_dev_put(hdev);
+	return err;
+}
+
+static int pin_code_reply(struct sock *sk, unsigned char *data, u16 len)
+{
+	struct hci_dev *hdev;
+	struct mgmt_cp_pin_code_reply *cp;
+	struct hci_cp_pin_code_reply reply;
+	u16 dev_id;
+	int err;
+
+	BT_DBG("");
+
+	cp = (void *) data;
+	dev_id = get_unaligned_le16(&cp->index);
+
+	hdev = hci_dev_get(dev_id);
+	if (!hdev)
+		return cmd_status(sk, MGMT_OP_DISCONNECT, ENODEV);
+
+	hci_dev_lock_bh(hdev);
+
+	if (!test_bit(HCI_UP, &hdev->flags)) {
+		err = cmd_status(sk, MGMT_OP_PIN_CODE_REPLY, ENETDOWN);
+		goto failed;
+	}
+
+	err = mgmt_pending_add(sk, MGMT_OP_PIN_CODE_REPLY, dev_id, data, len);
+	if (err < 0)
+		goto failed;
+
+	bacpy(&reply.bdaddr, &cp->bdaddr);
+	reply.pin_len = cp->pin_len;
+	memcpy(reply.pin_code, cp->pin_code, 16);
+
+	err = hci_send_cmd(hdev, HCI_OP_PIN_CODE_REPLY, sizeof(reply), &reply);
+	if (err < 0)
+		mgmt_pending_remove(MGMT_OP_PIN_CODE_REPLY, dev_id);
+
+failed:
+	hci_dev_unlock_bh(hdev);
+	hci_dev_put(hdev);
+
+	return err;
+}
+
+static int pin_code_neg_reply(struct sock *sk, unsigned char *data, u16 len)
+{
+	struct hci_dev *hdev;
+	struct mgmt_cp_pin_code_neg_reply *cp;
+	u16 dev_id;
+	int err;
+
+	BT_DBG("");
+
+	cp = (void *) data;
+	dev_id = get_unaligned_le16(&cp->index);
+
+	hdev = hci_dev_get(dev_id);
+	if (!hdev)
+		return cmd_status(sk, MGMT_OP_PIN_CODE_NEG_REPLY, ENODEV);
+
+	hci_dev_lock_bh(hdev);
+
+	if (!test_bit(HCI_UP, &hdev->flags)) {
+		err = cmd_status(sk, MGMT_OP_PIN_CODE_NEG_REPLY, ENETDOWN);
+		goto failed;
+	}
+
+	err = mgmt_pending_add(sk, MGMT_OP_PIN_CODE_NEG_REPLY, dev_id,
+								data, len);
+	if (err < 0)
+		goto failed;
+
+	err = hci_send_cmd(hdev, HCI_OP_PIN_CODE_NEG_REPLY, sizeof(bdaddr_t),
+								&cp->bdaddr);
+	if (err < 0)
+		mgmt_pending_remove(MGMT_OP_PIN_CODE_NEG_REPLY, dev_id);
+
+failed:
+	hci_dev_unlock_bh(hdev);
+	hci_dev_put(hdev);
+
+	return err;
+}
+
+static int set_io_capability(struct sock *sk, unsigned char *data, u16 len)
+{
+	struct hci_dev *hdev;
+	struct mgmt_cp_set_io_capability *cp;
+	u16 dev_id;
+
+	BT_DBG("");
+
+	cp = (void *) data;
+	dev_id = get_unaligned_le16(&cp->index);
+
+	hdev = hci_dev_get(dev_id);
+	if (!hdev)
+		return cmd_status(sk, MGMT_OP_SET_IO_CAPABILITY, ENODEV);
+
+	hci_dev_lock_bh(hdev);
+
+	hdev->io_capability = cp->io_capability;
+
+	BT_DBG("%s IO capability set to 0x%02x", hdev->name,
+						hdev->io_capability);
+
+	hci_dev_unlock_bh(hdev);
+	hci_dev_put(hdev);
+
+	return cmd_complete(sk, MGMT_OP_SET_IO_CAPABILITY,
+						&dev_id, sizeof(dev_id));
+}
+
 int mgmt_control(struct sock *sk, struct msghdr *msg, size_t msglen)
 {
 	unsigned char *buf;
@@ -250,6 +1085,51 @@ int mgmt_control(struct sock *sk, struct msghdr *msg, size_t msglen)
 	case MGMT_OP_READ_INFO:
 		err = read_controller_info(sk, buf + sizeof(*hdr), len);
 		break;
+	case MGMT_OP_SET_POWERED:
+		err = set_powered(sk, buf + sizeof(*hdr), len);
+		break;
+	case MGMT_OP_SET_DISCOVERABLE:
+		err = set_discoverable(sk, buf + sizeof(*hdr), len);
+		break;
+	case MGMT_OP_SET_CONNECTABLE:
+		err = set_connectable(sk, buf + sizeof(*hdr), len);
+		break;
+	case MGMT_OP_SET_PAIRABLE:
+		err = set_pairable(sk, buf + sizeof(*hdr), len);
+		break;
+	case MGMT_OP_ADD_UUID:
+		err = add_uuid(sk, buf + sizeof(*hdr), len);
+		break;
+	case MGMT_OP_REMOVE_UUID:
+		err = remove_uuid(sk, buf + sizeof(*hdr), len);
+		break;
+	case MGMT_OP_SET_DEV_CLASS:
+		err = set_dev_class(sk, buf + sizeof(*hdr), len);
+		break;
+	case MGMT_OP_SET_SERVICE_CACHE:
+		err = set_service_cache(sk, buf + sizeof(*hdr), len);
+		break;
+	case MGMT_OP_LOAD_KEYS:
+		err = load_keys(sk, buf + sizeof(*hdr), len);
+		break;
+	case MGMT_OP_REMOVE_KEY:
+		err = remove_key(sk, buf + sizeof(*hdr), len);
+		break;
+	case MGMT_OP_DISCONNECT:
+		err = disconnect(sk, buf + sizeof(*hdr), len);
+		break;
+	case MGMT_OP_GET_CONNECTIONS:
+		err = get_connections(sk, buf + sizeof(*hdr), len);
+		break;
+	case MGMT_OP_PIN_CODE_REPLY:
+		err = pin_code_reply(sk, buf + sizeof(*hdr), len);
+		break;
+	case MGMT_OP_PIN_CODE_NEG_REPLY:
+		err = pin_code_neg_reply(sk, buf + sizeof(*hdr), len);
+		break;
+	case MGMT_OP_SET_IO_CAPABILITY:
+		err = set_io_capability(sk, buf + sizeof(*hdr), len);
+		break;
 	default:
 		BT_DBG("Unknown op %u", opcode);
 		err = cmd_status(sk, opcode, 0x01);
@@ -266,43 +1146,247 @@ done:
 	return err;
 }
 
-static int mgmt_event(u16 event, void *data, u16 data_len)
+int mgmt_index_added(u16 index)
 {
-	struct sk_buff *skb;
-	struct mgmt_hdr *hdr;
+	struct mgmt_ev_index_added ev;
 
-	skb = alloc_skb(sizeof(*hdr) + data_len, GFP_ATOMIC);
-	if (!skb)
-		return -ENOMEM;
+	put_unaligned_le16(index, &ev.index);
 
-	bt_cb(skb)->channel = HCI_CHANNEL_CONTROL;
+	return mgmt_event(MGMT_EV_INDEX_ADDED, &ev, sizeof(ev), NULL);
+}
 
-	hdr = (void *) skb_put(skb, sizeof(*hdr));
-	hdr->opcode = cpu_to_le16(event);
-	hdr->len = cpu_to_le16(data_len);
+int mgmt_index_removed(u16 index)
+{
+	struct mgmt_ev_index_added ev;
 
-	memcpy(skb_put(skb, data_len), data, data_len);
+	put_unaligned_le16(index, &ev.index);
 
-	hci_send_to_sock(NULL, skb);
-	kfree_skb(skb);
+	return mgmt_event(MGMT_EV_INDEX_REMOVED, &ev, sizeof(ev), NULL);
+}
 
-	return 0;
+struct cmd_lookup {
+	u8 val;
+	struct sock *sk;
+};
+
+static void mode_rsp(struct pending_cmd *cmd, void *data)
+{
+	struct mgmt_mode *cp = cmd->cmd;
+	struct cmd_lookup *match = data;
+
+	if (cp->val != match->val)
+		return;
+
+	send_mode_rsp(cmd->sk, cmd->opcode, cmd->index, cp->val);
+
+	list_del(&cmd->list);
+
+	if (match->sk == NULL) {
+		match->sk = cmd->sk;
+		sock_hold(match->sk);
+	}
+
+	mgmt_pending_free(cmd);
 }
 
-int mgmt_index_added(u16 index)
+int mgmt_powered(u16 index, u8 powered)
 {
-	struct mgmt_ev_index_added ev;
+	struct mgmt_mode ev;
+	struct cmd_lookup match = { powered, NULL };
+	int ret;
+
+	mgmt_pending_foreach(MGMT_OP_SET_POWERED, index, mode_rsp, &match);
 
 	put_unaligned_le16(index, &ev.index);
+	ev.val = powered;
+
+	ret = mgmt_event(MGMT_EV_POWERED, &ev, sizeof(ev), match.sk);
 
-	return mgmt_event(MGMT_EV_INDEX_ADDED, &ev, sizeof(ev));
+	if (match.sk)
+		sock_put(match.sk);
+
+	return ret;
 }
 
-int mgmt_index_removed(u16 index)
+int mgmt_discoverable(u16 index, u8 discoverable)
 {
-	struct mgmt_ev_index_added ev;
+	struct mgmt_mode ev;
+	struct cmd_lookup match = { discoverable, NULL };
+	int ret;
+
+	mgmt_pending_foreach(MGMT_OP_SET_DISCOVERABLE, index,
+							mode_rsp, &match);
+
+	put_unaligned_le16(index, &ev.index);
+	ev.val = discoverable;
+
+	ret = mgmt_event(MGMT_EV_DISCOVERABLE, &ev, sizeof(ev), match.sk);
+
+	if (match.sk)
+		sock_put(match.sk);
+
+	return ret;
+}
+
+int mgmt_connectable(u16 index, u8 connectable)
+{
+	struct mgmt_mode ev;
+	struct cmd_lookup match = { connectable, NULL };
+	int ret;
+
+	mgmt_pending_foreach(MGMT_OP_SET_CONNECTABLE, index, mode_rsp, &match);
+
+	put_unaligned_le16(index, &ev.index);
+	ev.val = connectable;
+
+	ret = mgmt_event(MGMT_EV_CONNECTABLE, &ev, sizeof(ev), match.sk);
+
+	if (match.sk)
+		sock_put(match.sk);
+
+	return ret;
+}
+
+int mgmt_new_key(u16 index, struct link_key *key, u8 old_key_type)
+{
+	struct mgmt_ev_new_key ev;
+
+	memset(&ev, 0, sizeof(ev));
+
+	put_unaligned_le16(index, &ev.index);
+
+	bacpy(&ev.key.bdaddr, &key->bdaddr);
+	ev.key.type = key->type;
+	memcpy(ev.key.val, key->val, 16);
+	ev.key.pin_len = key->pin_len;
+	ev.old_key_type = old_key_type;
+
+	return mgmt_event(MGMT_EV_NEW_KEY, &ev, sizeof(ev), NULL);
+}
+
+int mgmt_connected(u16 index, bdaddr_t *bdaddr)
+{
+	struct mgmt_ev_connected ev;
 
 	put_unaligned_le16(index, &ev.index);
+	bacpy(&ev.bdaddr, bdaddr);
 
-	return mgmt_event(MGMT_EV_INDEX_REMOVED, &ev, sizeof(ev));
+	return mgmt_event(MGMT_EV_CONNECTED, &ev, sizeof(ev), NULL);
+}
+
+static void disconnect_rsp(struct pending_cmd *cmd, void *data)
+{
+	struct mgmt_cp_disconnect *cp = cmd->cmd;
+	struct sock **sk = data;
+	struct mgmt_rp_disconnect rp;
+
+	put_unaligned_le16(cmd->index, &rp.index);
+	bacpy(&rp.bdaddr, &cp->bdaddr);
+
+	cmd_complete(cmd->sk, MGMT_OP_DISCONNECT, &rp, sizeof(rp));
+
+	*sk = cmd->sk;
+	sock_hold(*sk);
+
+	list_del(&cmd->list);
+	mgmt_pending_free(cmd);
+}
+
+int mgmt_disconnected(u16 index, bdaddr_t *bdaddr)
+{
+	struct mgmt_ev_disconnected ev;
+	struct sock *sk = NULL;
+	int err;
+
+	mgmt_pending_foreach(MGMT_OP_DISCONNECT, index, disconnect_rsp, &sk);
+
+	put_unaligned_le16(index, &ev.index);
+	bacpy(&ev.bdaddr, bdaddr);
+
+	err = mgmt_event(MGMT_EV_DISCONNECTED, &ev, sizeof(ev), sk);
+
+	if (sk)
+		sock_put(sk);
+
+	return err;
+}
+
+int mgmt_disconnect_failed(u16 index)
+{
+	struct pending_cmd *cmd;
+	int err;
+
+	cmd = mgmt_pending_find(MGMT_OP_DISCONNECT, index);
+	if (!cmd)
+		return -ENOENT;
+
+	err = cmd_status(cmd->sk, MGMT_OP_DISCONNECT, EIO);
+
+	list_del(&cmd->list);
+	mgmt_pending_free(cmd);
+
+	return err;
+}
+
+int mgmt_connect_failed(u16 index, bdaddr_t *bdaddr, u8 status)
+{
+	struct mgmt_ev_connect_failed ev;
+
+	put_unaligned_le16(index, &ev.index);
+	bacpy(&ev.bdaddr, bdaddr);
+	ev.status = status;
+
+	return mgmt_event(MGMT_EV_CONNECT_FAILED, &ev, sizeof(ev), NULL);
+}
+
+int mgmt_pin_code_request(u16 index, bdaddr_t *bdaddr)
+{
+	struct mgmt_ev_pin_code_request ev;
+
+	put_unaligned_le16(index, &ev.index);
+	bacpy(&ev.bdaddr, bdaddr);
+
+	return mgmt_event(MGMT_EV_PIN_CODE_REQUEST, &ev, sizeof(ev), NULL);
+}
+
+int mgmt_pin_code_reply_complete(u16 index, bdaddr_t *bdaddr, u8 status)
+{
+	struct pending_cmd *cmd;
+	int err;
+
+	cmd = mgmt_pending_find(MGMT_OP_PIN_CODE_REPLY, index);
+	if (!cmd)
+		return -ENOENT;
+
+	if (status != 0)
+		err = cmd_status(cmd->sk, MGMT_OP_PIN_CODE_REPLY, status);
+	else
+		err = cmd_complete(cmd->sk, MGMT_OP_PIN_CODE_REPLY,
+						bdaddr, sizeof(*bdaddr));
+
+	list_del(&cmd->list);
+	mgmt_pending_free(cmd);
+
+	return err;
+}
+
+int mgmt_pin_code_neg_reply_complete(u16 index, bdaddr_t *bdaddr, u8 status)
+{
+	struct pending_cmd *cmd;
+	int err;
+
+	cmd = mgmt_pending_find(MGMT_OP_PIN_CODE_NEG_REPLY, index);
+	if (!cmd)
+		return -ENOENT;
+
+	if (status != 0)
+		err = cmd_status(cmd->sk, MGMT_OP_PIN_CODE_NEG_REPLY, status);
+	else
+		err = cmd_complete(cmd->sk, MGMT_OP_PIN_CODE_NEG_REPLY,
+						bdaddr, sizeof(*bdaddr));
+
+	list_del(&cmd->list);
+	mgmt_pending_free(cmd);
+
+	return err;
 }
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index 6b83776..c997393 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -2154,8 +2154,6 @@ static int __init rfcomm_init(void)
 {
 	int err;
 
-	l2cap_load();
-
 	hci_register_cb(&rfcomm_cb);
 
 	rfcomm_thread = kthread_run(rfcomm_run, NULL, "krfcommd");
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 960c6d1..c9348dd 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -50,8 +50,6 @@
 #include <net/bluetooth/hci_core.h>
 #include <net/bluetooth/sco.h>
 
-#define VERSION "0.6"
-
 static int disable_esco;
 
 static const struct proto_ops sco_sock_ops;
@@ -703,6 +701,7 @@ static int sco_sock_getsockopt_old(struct socket *sock, int optname, char __user
 			break;
 		}
 
+		memset(&cinfo, 0, sizeof(cinfo));
 		cinfo.hci_handle = sco_pi(sk)->conn->hcon->handle;
 		memcpy(cinfo.dev_class, sco_pi(sk)->conn->hcon->dev_class, 3);
 
@@ -1023,7 +1022,7 @@ static struct hci_proto sco_hci_proto = {
 	.recv_scodata	= sco_recv_scodata
 };
 
-static int __init sco_init(void)
+int __init sco_init(void)
 {
 	int err;
 
@@ -1051,7 +1050,6 @@ static int __init sco_init(void)
 			BT_ERR("Failed to create SCO debug file");
 	}
 
-	BT_INFO("SCO (Voice Link) ver %s", VERSION);
 	BT_INFO("SCO socket layer initialized");
 
 	return 0;
@@ -1061,7 +1059,7 @@ error:
 	return err;
 }
 
-static void __exit sco_exit(void)
+void __exit sco_exit(void)
 {
 	debugfs_remove(sco_debugfs);
 
@@ -1074,14 +1072,5 @@ static void __exit sco_exit(void)
 	proto_unregister(&sco_proto);
 }
 
-module_init(sco_init);
-module_exit(sco_exit);
-
 module_param(disable_esco, bool, 0644);
 MODULE_PARM_DESC(disable_esco, "Disable eSCO connection creation");
-
-MODULE_AUTHOR("Marcel Holtmann <marcel@holtmann.org>");
-MODULE_DESCRIPTION("Bluetooth SCO ver " VERSION);
-MODULE_VERSION(VERSION);
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("bt-proto-2");
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 5564435..1461b19 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -297,6 +297,21 @@ void br_netpoll_disable(struct net_bridge_port *p)
 
 #endif
 
+static int br_add_slave(struct net_device *dev, struct net_device *slave_dev)
+
+{
+	struct net_bridge *br = netdev_priv(dev);
+
+	return br_add_if(br, slave_dev);
+}
+
+static int br_del_slave(struct net_device *dev, struct net_device *slave_dev)
+{
+	struct net_bridge *br = netdev_priv(dev);
+
+	return br_del_if(br, slave_dev);
+}
+
 static const struct ethtool_ops br_ethtool_ops = {
 	.get_drvinfo    = br_getinfo,
 	.get_link	= ethtool_op_get_link,
@@ -326,6 +341,8 @@ static const struct net_device_ops br_netdev_ops = {
 	.ndo_netpoll_cleanup	 = br_netpoll_cleanup,
 	.ndo_poll_controller	 = br_poll_controller,
 #endif
+	.ndo_add_slave		 = br_add_slave,
+	.ndo_del_slave		 = br_del_slave,
 };
 
 static void br_dev_free(struct net_device *dev)
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index d9d1e2b..dce8f00 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -148,6 +148,8 @@ static void del_nbp(struct net_bridge_port *p)
 
 	netdev_rx_handler_unregister(dev);
 
+	netdev_set_master(dev, NULL);
+
 	br_multicast_del_port(p);
 
 	kobject_uevent(&p->kobj, KOBJ_REMOVE);
@@ -365,7 +367,7 @@ int br_min_mtu(const struct net_bridge *br)
 void br_features_recompute(struct net_bridge *br)
 {
 	struct net_bridge_port *p;
-	unsigned long features, mask;
+	u32 features, mask;
 
 	features = mask = br->feature_mask;
 	if (list_empty(&br->port_list))
@@ -379,7 +381,7 @@ void br_features_recompute(struct net_bridge *br)
 	}
 
 done:
-	br->dev->features = netdev_fix_features(features, NULL);
+	br->dev->features = netdev_fix_features(br->dev, features);
 }
 
 /* called with RTNL */
@@ -429,10 +431,14 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
 	if (br_netpoll_info(br) && ((err = br_netpoll_enable(p))))
 		goto err3;
 
-	err = netdev_rx_handler_register(dev, br_handle_frame, p);
+	err = netdev_set_master(dev, br->dev);
 	if (err)
 		goto err3;
 
+	err = netdev_rx_handler_register(dev, br_handle_frame, p);
+	if (err)
+		goto err4;
+
 	dev->priv_flags |= IFF_BRIDGE_PORT;
 
 	dev_disable_lro(dev);
@@ -455,6 +461,9 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
 	kobject_uevent(&p->kobj, KOBJ_ADD);
 
 	return 0;
+
+err4:
+	netdev_set_master(dev, NULL);
 err3:
 	sysfs_remove_link(br->ifobj, p->dev->name);
 err2:
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 4b5b66d..45b57b1 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -428,14 +428,15 @@ static int br_nf_pre_routing_finish(struct sk_buff *skb)
 			if (err != -EHOSTUNREACH || !in_dev || IN_DEV_FORWARD(in_dev))
 				goto free_skb;
 
-			if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
+			rt = ip_route_output_key(dev_net(dev), &fl);
+			if (!IS_ERR(rt)) {
 				/* - Bridged-and-DNAT'ed traffic doesn't
 				 *   require ip_forwarding. */
-				if (((struct dst_entry *)rt)->dev == dev) {
-					skb_dst_set(skb, (struct dst_entry *)rt);
+				if (rt->dst.dev == dev) {
+					skb_dst_set(skb, &rt->dst);
 					goto bridged_dnat;
 				}
-				dst_release((struct dst_entry *)rt);
+				ip_rt_put(rt);
 			}
 free_skb:
 			kfree_skb(skb);
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 4e1b620..f7afc36 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -182,7 +182,7 @@ struct net_bridge
 	struct br_cpu_netstats __percpu *stats;
 	spinlock_t			hash_lock;
 	struct hlist_head		hash[BR_HASH_SIZE];
-	unsigned long			feature_mask;
+	u32				feature_mask;
 #ifdef CONFIG_BRIDGE_NETFILTER
 	struct rtable 			fake_rtable;
 	bool				nf_call_iptables;
diff --git a/net/bridge/netfilter/ebt_ip6.c b/net/bridge/netfilter/ebt_ip6.c
index 50a46af..2ed0056 100644
--- a/net/bridge/netfilter/ebt_ip6.c
+++ b/net/bridge/netfilter/ebt_ip6.c
@@ -22,9 +22,15 @@
 #include <linux/netfilter_bridge/ebtables.h>
 #include <linux/netfilter_bridge/ebt_ip6.h>
 
-struct tcpudphdr {
-	__be16 src;
-	__be16 dst;
+union pkthdr {
+	struct {
+		__be16 src;
+		__be16 dst;
+	} tcpudphdr;
+	struct {
+		u8 type;
+		u8 code;
+	} icmphdr;
 };
 
 static bool
@@ -33,8 +39,8 @@ ebt_ip6_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	const struct ebt_ip6_info *info = par->matchinfo;
 	const struct ipv6hdr *ih6;
 	struct ipv6hdr _ip6h;
-	const struct tcpudphdr *pptr;
-	struct tcpudphdr _ports;
+	const union pkthdr *pptr;
+	union pkthdr _pkthdr;
 
 	ih6 = skb_header_pointer(skb, 0, sizeof(_ip6h), &_ip6h);
 	if (ih6 == NULL)
@@ -56,26 +62,34 @@ ebt_ip6_mt(const struct sk_buff *skb, struct xt_action_param *par)
 			return false;
 		if (FWINV(info->protocol != nexthdr, EBT_IP6_PROTO))
 			return false;
-		if (!(info->bitmask & EBT_IP6_DPORT) &&
-		    !(info->bitmask & EBT_IP6_SPORT))
+		if (!(info->bitmask & ( EBT_IP6_DPORT |
+					EBT_IP6_SPORT | EBT_IP6_ICMP6)))
 			return true;
-		pptr = skb_header_pointer(skb, offset_ph, sizeof(_ports),
-					  &_ports);
+
+		/* min icmpv6 headersize is 4, so sizeof(_pkthdr) is ok. */
+		pptr = skb_header_pointer(skb, offset_ph, sizeof(_pkthdr),
+					  &_pkthdr);
 		if (pptr == NULL)
 			return false;
 		if (info->bitmask & EBT_IP6_DPORT) {
-			u32 dst = ntohs(pptr->dst);
+			u16 dst = ntohs(pptr->tcpudphdr.dst);
 			if (FWINV(dst < info->dport[0] ||
 				  dst > info->dport[1], EBT_IP6_DPORT))
 				return false;
 		}
 		if (info->bitmask & EBT_IP6_SPORT) {
-			u32 src = ntohs(pptr->src);
+			u16 src = ntohs(pptr->tcpudphdr.src);
 			if (FWINV(src < info->sport[0] ||
 				  src > info->sport[1], EBT_IP6_SPORT))
 			return false;
 		}
-		return true;
+		if ((info->bitmask & EBT_IP6_ICMP6) &&
+		     FWINV(pptr->icmphdr.type < info->icmpv6_type[0] ||
+			   pptr->icmphdr.type > info->icmpv6_type[1] ||
+			   pptr->icmphdr.code < info->icmpv6_code[0] ||
+			   pptr->icmphdr.code > info->icmpv6_code[1],
+							EBT_IP6_ICMP6))
+			return false;
 	}
 	return true;
 }
@@ -103,6 +117,14 @@ static int ebt_ip6_mt_check(const struct xt_mtchk_param *par)
 		return -EINVAL;
 	if (info->bitmask & EBT_IP6_SPORT && info->sport[0] > info->sport[1])
 		return -EINVAL;
+	if (info->bitmask & EBT_IP6_ICMP6) {
+		if ((info->invflags & EBT_IP6_PROTO) ||
+		     info->protocol != IPPROTO_ICMPV6)
+			return -EINVAL;
+		if (info->icmpv6_type[0] > info->icmpv6_type[1] ||
+		    info->icmpv6_code[0] > info->icmpv6_code[1])
+			return -EINVAL;
+	}
 	return 0;
 }
 
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 16df053..893669c 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -1107,6 +1107,8 @@ static int do_replace(struct net *net, const void __user *user,
 	if (tmp.num_counters >= INT_MAX / sizeof(struct ebt_counter))
 		return -ENOMEM;
 
+	tmp.name[sizeof(tmp.name) - 1] = 0;
+
 	countersize = COUNTER_OFFSET(tmp.nentries) * nr_cpu_ids;
 	newinfo = vmalloc(sizeof(*newinfo) + countersize);
 	if (!newinfo)
@@ -1764,6 +1766,7 @@ static int compat_table_info(const struct ebt_table_info *info,
 
 	newinfo->entries_size = size;
 
+	xt_compat_init_offsets(AF_INET, info->nentries);
 	return EBT_ENTRY_ITERATE(entries, size, compat_calc_entry, info,
 							entries, newinfo);
 }
diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c
index c665de7..f1f98d9 100644
--- a/net/caif/cfcnfg.c
+++ b/net/caif/cfcnfg.c
@@ -23,10 +23,8 @@
 #include <asm/atomic.h>
 
 #define MAX_PHY_LAYERS 7
-#define PHY_NAME_LEN 20
 
 #define container_obj(layr) container_of(layr, struct cfcnfg, layer)
-#define RFM_FRAGMENT_SIZE 4030
 
 /* Information about CAIF physical interfaces held by Config Module in order
  * to manage physical interfaces
diff --git a/net/caif/cfdgml.c b/net/caif/cfdgml.c
index d3ed264..27dab26 100644
--- a/net/caif/cfdgml.c
+++ b/net/caif/cfdgml.c
@@ -18,7 +18,6 @@
 #define DGM_CMD_BIT  0x80
 #define DGM_FLOW_OFF 0x81
 #define DGM_FLOW_ON  0x80
-#define DGM_CTRL_PKT_SIZE 1
 #define DGM_MTU 1500
 
 static int cfdgml_receive(struct cflayer *layr, struct cfpkt *pkt);
diff --git a/net/caif/cfserl.c b/net/caif/cfserl.c
index 9297f7d..8303fe3 100644
--- a/net/caif/cfserl.c
+++ b/net/caif/cfserl.c
@@ -25,7 +25,6 @@ struct cfserl {
 	spinlock_t sync;
 	bool usestx;
 };
-#define STXLEN(layr) (layr->usestx ? 1 : 0)
 
 static int cfserl_receive(struct cflayer *layr, struct cfpkt *pkt);
 static int cfserl_transmit(struct cflayer *layr, struct cfpkt *pkt);
diff --git a/net/caif/cfutill.c b/net/caif/cfutill.c
index efad410..315c0d6 100644
--- a/net/caif/cfutill.c
+++ b/net/caif/cfutill.c
@@ -20,7 +20,7 @@
 #define UTIL_REMOTE_SHUTDOWN 0x82
 #define UTIL_FLOW_OFF 0x81
 #define UTIL_FLOW_ON  0x80
-#define UTIL_CTRL_PKT_SIZE 1
+
 static int cfutill_receive(struct cflayer *layr, struct cfpkt *pkt);
 static int cfutill_transmit(struct cflayer *layr, struct cfpkt *pkt);
 
diff --git a/net/caif/cfveil.c b/net/caif/cfveil.c
index 3b425b1..c3b1dec 100644
--- a/net/caif/cfveil.c
+++ b/net/caif/cfveil.c
@@ -17,7 +17,7 @@
 #define VEI_FLOW_OFF 0x81
 #define VEI_FLOW_ON  0x80
 #define VEI_SET_PIN  0x82
-#define VEI_CTRL_PKT_SIZE 1
+
 #define container_obj(layr) container_of(layr, struct cfsrvl, layer)
 
 static int cfvei_receive(struct cflayer *layr, struct cfpkt *pkt);
diff --git a/net/core/dev.c b/net/core/dev.c
index 6561021..0d39032 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -132,6 +132,7 @@
 #include <trace/events/skb.h>
 #include <linux/pci.h>
 #include <linux/inetdevice.h>
+#include <linux/cpu_rmap.h>
 
 #include "net-sysfs.h"
 
@@ -1297,7 +1298,7 @@ static int __dev_close(struct net_device *dev)
 	return retval;
 }
 
-int dev_close_many(struct list_head *head)
+static int dev_close_many(struct list_head *head)
 {
 	struct net_device *dev, *tmp;
 	LIST_HEAD(tmp_list);
@@ -1605,6 +1606,48 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
 	rcu_read_unlock();
 }
 
+/* netif_setup_tc - Handle tc mappings on real_num_tx_queues change
+ * @dev: Network device
+ * @txq: number of queues available
+ *
+ * If real_num_tx_queues is changed the tc mappings may no longer be
+ * valid. To resolve this verify the tc mapping remains valid and if
+ * not NULL the mapping. With no priorities mapping to this
+ * offset/count pair it will no longer be used. In the worst case TC0
+ * is invalid nothing can be done so disable priority mappings. If is
+ * expected that drivers will fix this mapping if they can before
+ * calling netif_set_real_num_tx_queues.
+ */
+static void netif_setup_tc(struct net_device *dev, unsigned int txq)
+{
+	int i;
+	struct netdev_tc_txq *tc = &dev->tc_to_txq[0];
+
+	/* If TC0 is invalidated disable TC mapping */
+	if (tc->offset + tc->count > txq) {
+		pr_warning("Number of in use tx queues changed "
+			   "invalidating tc mappings. Priority "
+			   "traffic classification disabled!\n");
+		dev->num_tc = 0;
+		return;
+	}
+
+	/* Invalidated prio to tc mappings set to TC0 */
+	for (i = 1; i < TC_BITMASK + 1; i++) {
+		int q = netdev_get_prio_tc_map(dev, i);
+
+		tc = &dev->tc_to_txq[q];
+		if (tc->offset + tc->count > txq) {
+			pr_warning("Number of in use tx queues "
+				   "changed. Priority %i to tc "
+				   "mapping %i is no longer valid "
+				   "setting map to 0\n",
+				   i, q);
+			netdev_set_prio_tc_map(dev, i, 0);
+		}
+	}
+}
+
 /*
  * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues
  * greater then real_num_tx_queues stale skbs on the qdisc must be flushed.
@@ -1616,7 +1659,8 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
 	if (txq < 1 || txq > dev->num_tx_queues)
 		return -EINVAL;
 
-	if (dev->reg_state == NETREG_REGISTERED) {
+	if (dev->reg_state == NETREG_REGISTERED ||
+	    dev->reg_state == NETREG_UNREGISTERING) {
 		ASSERT_RTNL();
 
 		rc = netdev_queue_update_kobjects(dev, dev->real_num_tx_queues,
@@ -1624,6 +1668,9 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
 		if (rc)
 			return rc;
 
+		if (dev->num_tc)
+			netif_setup_tc(dev, txq);
+
 		if (txq < dev->real_num_tx_queues)
 			qdisc_reset_all_tx_gt(dev, txq);
 	}
@@ -1823,7 +1870,7 @@ EXPORT_SYMBOL(skb_checksum_help);
  *	It may return NULL if the skb requires no segmentation.  This is
  *	only possible when GSO is used for verifying header integrity.
  */
-struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
+struct sk_buff *skb_gso_segment(struct sk_buff *skb, u32 features)
 {
 	struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
 	struct packet_type *ptype;
@@ -2011,7 +2058,7 @@ static bool can_checksum_protocol(unsigned long features, __be16 protocol)
 		 protocol == htons(ETH_P_FCOE)));
 }
 
-static int harmonize_features(struct sk_buff *skb, __be16 protocol, int features)
+static u32 harmonize_features(struct sk_buff *skb, __be16 protocol, u32 features)
 {
 	if (!can_checksum_protocol(features, protocol)) {
 		features &= ~NETIF_F_ALL_CSUM;
@@ -2023,10 +2070,10 @@ static int harmonize_features(struct sk_buff *skb, __be16 protocol, int features
 	return features;
 }
 
-int netif_skb_features(struct sk_buff *skb)
+u32 netif_skb_features(struct sk_buff *skb)
 {
 	__be16 protocol = skb->protocol;
-	int features = skb->dev->features;
+	u32 features = skb->dev->features;
 
 	if (protocol == htons(ETH_P_8021Q)) {
 		struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
@@ -2071,7 +2118,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 	int rc = NETDEV_TX_OK;
 
 	if (likely(!skb->next)) {
-		int features;
+		u32 features;
 
 		/*
 		 * If device doesnt need skb->dst, release it right now while
@@ -2173,6 +2220,8 @@ u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,
 		  unsigned int num_tx_queues)
 {
 	u32 hash;
+	u16 qoffset = 0;
+	u16 qcount = num_tx_queues;
 
 	if (skb_rx_queue_recorded(skb)) {
 		hash = skb_get_rx_queue(skb);
@@ -2181,13 +2230,19 @@ u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,
 		return hash;
 	}
 
+	if (dev->num_tc) {
+		u8 tc = netdev_get_prio_tc_map(dev, skb->priority);
+		qoffset = dev->tc_to_txq[tc].offset;
+		qcount = dev->tc_to_txq[tc].count;
+	}
+
 	if (skb->sk && skb->sk->sk_hash)
 		hash = skb->sk->sk_hash;
 	else
 		hash = (__force u16) skb->protocol ^ skb->rxhash;
 	hash = jhash_1word(hash, hashrnd);
 
-	return (u16) (((u64) hash * num_tx_queues) >> 32);
+	return (u16) (((u64) hash * qcount) >> 32) + qoffset;
 }
 EXPORT_SYMBOL(__skb_tx_hash);
 
@@ -2284,15 +2339,18 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
 				 struct netdev_queue *txq)
 {
 	spinlock_t *root_lock = qdisc_lock(q);
-	bool contended = qdisc_is_running(q);
+	bool contended;
 	int rc;
 
+	qdisc_skb_cb(skb)->pkt_len = skb->len;
+	qdisc_calculate_pkt_len(skb, q);
 	/*
 	 * Heuristic to force contended enqueues to serialize on a
 	 * separate lock before trying to get qdisc main lock.
 	 * This permits __QDISC_STATE_RUNNING owner to get the lock more often
 	 * and dequeue packets faster.
 	 */
+	contended = qdisc_is_running(q);
 	if (unlikely(contended))
 		spin_lock(&q->busylock);
 
@@ -2310,7 +2368,6 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
 		if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE))
 			skb_dst_force(skb);
 
-		qdisc_skb_cb(skb)->pkt_len = skb->len;
 		qdisc_bstats_update(q, skb);
 
 		if (sch_direct_xmit(skb, q, dev, txq, root_lock)) {
@@ -2325,7 +2382,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
 		rc = NET_XMIT_SUCCESS;
 	} else {
 		skb_dst_force(skb);
-		rc = qdisc_enqueue_root(skb, q);
+		rc = q->enqueue(skb, q) & NET_XMIT_MASK;
 		if (qdisc_run_begin(q)) {
 			if (unlikely(contended)) {
 				spin_unlock(&q->busylock);
@@ -2544,6 +2601,54 @@ EXPORT_SYMBOL(__skb_get_rxhash);
 struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly;
 EXPORT_SYMBOL(rps_sock_flow_table);
 
+static struct rps_dev_flow *
+set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
+	    struct rps_dev_flow *rflow, u16 next_cpu)
+{
+	u16 tcpu;
+
+	tcpu = rflow->cpu = next_cpu;
+	if (tcpu != RPS_NO_CPU) {
+#ifdef CONFIG_RFS_ACCEL
+		struct netdev_rx_queue *rxqueue;
+		struct rps_dev_flow_table *flow_table;
+		struct rps_dev_flow *old_rflow;
+		u32 flow_id;
+		u16 rxq_index;
+		int rc;
+
+		/* Should we steer this flow to a different hardware queue? */
+		if (!skb_rx_queue_recorded(skb) || !dev->rx_cpu_rmap ||
+		    !(dev->features & NETIF_F_NTUPLE))
+			goto out;
+		rxq_index = cpu_rmap_lookup_index(dev->rx_cpu_rmap, next_cpu);
+		if (rxq_index == skb_get_rx_queue(skb))
+			goto out;
+
+		rxqueue = dev->_rx + rxq_index;
+		flow_table = rcu_dereference(rxqueue->rps_flow_table);
+		if (!flow_table)
+			goto out;
+		flow_id = skb->rxhash & flow_table->mask;
+		rc = dev->netdev_ops->ndo_rx_flow_steer(dev, skb,
+							rxq_index, flow_id);
+		if (rc < 0)
+			goto out;
+		old_rflow = rflow;
+		rflow = &flow_table->flows[flow_id];
+		rflow->cpu = next_cpu;
+		rflow->filter = rc;
+		if (old_rflow->filter == rflow->filter)
+			old_rflow->filter = RPS_NO_FILTER;
+	out:
+#endif
+		rflow->last_qtail =
+			per_cpu(softnet_data, tcpu).input_queue_head;
+	}
+
+	return rflow;
+}
+
 /*
  * get_rps_cpu is called from netif_receive_skb and returns the target
  * CPU from the RPS map of the receiving queue for a given skb.
@@ -2615,12 +2720,9 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
 		if (unlikely(tcpu != next_cpu) &&
 		    (tcpu == RPS_NO_CPU || !cpu_online(tcpu) ||
 		     ((int)(per_cpu(softnet_data, tcpu).input_queue_head -
-		      rflow->last_qtail)) >= 0)) {
-			tcpu = rflow->cpu = next_cpu;
-			if (tcpu != RPS_NO_CPU)
-				rflow->last_qtail = per_cpu(softnet_data,
-				    tcpu).input_queue_head;
-		}
+		      rflow->last_qtail)) >= 0))
+			rflow = set_rps_cpu(dev, skb, rflow, next_cpu);
+
 		if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) {
 			*rflowp = rflow;
 			cpu = tcpu;
@@ -2641,6 +2743,46 @@ done:
 	return cpu;
 }
 
+#ifdef CONFIG_RFS_ACCEL
+
+/**
+ * rps_may_expire_flow - check whether an RFS hardware filter may be removed
+ * @dev: Device on which the filter was set
+ * @rxq_index: RX queue index
+ * @flow_id: Flow ID passed to ndo_rx_flow_steer()
+ * @filter_id: Filter ID returned by ndo_rx_flow_steer()
+ *
+ * Drivers that implement ndo_rx_flow_steer() should periodically call
+ * this function for each installed filter and remove the filters for
+ * which it returns %true.
+ */
+bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index,
+			 u32 flow_id, u16 filter_id)
+{
+	struct netdev_rx_queue *rxqueue = dev->_rx + rxq_index;
+	struct rps_dev_flow_table *flow_table;
+	struct rps_dev_flow *rflow;
+	bool expire = true;
+	int cpu;
+
+	rcu_read_lock();
+	flow_table = rcu_dereference(rxqueue->rps_flow_table);
+	if (flow_table && flow_id <= flow_table->mask) {
+		rflow = &flow_table->flows[flow_id];
+		cpu = ACCESS_ONCE(rflow->cpu);
+		if (rflow->filter == filter_id && cpu != RPS_NO_CPU &&
+		    ((int)(per_cpu(softnet_data, cpu).input_queue_head -
+			   rflow->last_qtail) <
+		     (int)(10 * flow_table->mask)))
+			expire = false;
+	}
+	rcu_read_unlock();
+	return expire;
+}
+EXPORT_SYMBOL(rps_may_expire_flow);
+
+#endif /* CONFIG_RFS_ACCEL */
+
 /* Called from hardirq (IPI) context */
 static void rps_trigger_softirq(void *data)
 {
@@ -2962,64 +3104,31 @@ void netdev_rx_handler_unregister(struct net_device *dev)
 }
 EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
 
-static inline void skb_bond_set_mac_by_master(struct sk_buff *skb,
-					      struct net_device *master)
-{
-	if (skb->pkt_type == PACKET_HOST) {
-		u16 *dest = (u16 *) eth_hdr(skb)->h_dest;
-
-		memcpy(dest, master->dev_addr, ETH_ALEN);
-	}
-}
-
-/* On bonding slaves other than the currently active slave, suppress
- * duplicates except for 802.3ad ETH_P_SLOW, alb non-mcast/bcast, and
- * ARP on active-backup slaves with arp_validate enabled.
- */
-int __skb_bond_should_drop(struct sk_buff *skb, struct net_device *master)
+static void vlan_on_bond_hook(struct sk_buff *skb)
 {
-	struct net_device *dev = skb->dev;
-
-	if (master->priv_flags & IFF_MASTER_ARPMON)
-		dev->last_rx = jiffies;
-
-	if ((master->priv_flags & IFF_MASTER_ALB) &&
-	    (master->priv_flags & IFF_BRIDGE_PORT)) {
-		/* Do address unmangle. The local destination address
-		 * will be always the one master has. Provides the right
-		 * functionality in a bridge.
-		 */
-		skb_bond_set_mac_by_master(skb, master);
-	}
-
-	if (dev->priv_flags & IFF_SLAVE_INACTIVE) {
-		if ((dev->priv_flags & IFF_SLAVE_NEEDARP) &&
-		    skb->protocol == __cpu_to_be16(ETH_P_ARP))
-			return 0;
-
-		if (master->priv_flags & IFF_MASTER_ALB) {
-			if (skb->pkt_type != PACKET_BROADCAST &&
-			    skb->pkt_type != PACKET_MULTICAST)
-				return 0;
-		}
-		if (master->priv_flags & IFF_MASTER_8023AD &&
-		    skb->protocol == __cpu_to_be16(ETH_P_SLOW))
-			return 0;
+	/*
+	 * Make sure ARP frames received on VLAN interfaces stacked on
+	 * bonding interfaces still make their way to any base bonding
+	 * device that may have registered for a specific ptype.
+	 */
+	if (skb->dev->priv_flags & IFF_802_1Q_VLAN &&
+	    vlan_dev_real_dev(skb->dev)->priv_flags & IFF_BONDING &&
+	    skb->protocol == htons(ETH_P_ARP)) {
+		struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 
-		return 1;
+		if (!skb2)
+			return;
+		skb2->dev = vlan_dev_real_dev(skb->dev);
+		netif_rx(skb2);
 	}
-	return 0;
 }
-EXPORT_SYMBOL(__skb_bond_should_drop);
 
 static int __netif_receive_skb(struct sk_buff *skb)
 {
 	struct packet_type *ptype, *pt_prev;
 	rx_handler_func_t *rx_handler;
 	struct net_device *orig_dev;
-	struct net_device *master;
-	struct net_device *null_or_orig;
-	struct net_device *orig_or_bond;
+	struct net_device *null_or_dev;
 	int ret = NET_RX_DROP;
 	__be16 type;
 
@@ -3034,28 +3143,8 @@ static int __netif_receive_skb(struct sk_buff *skb)
 
 	if (!skb->skb_iif)
 		skb->skb_iif = skb->dev->ifindex;
-
-	/*
-	 * bonding note: skbs received on inactive slaves should only
-	 * be delivered to pkt handlers that are exact matches.  Also
-	 * the deliver_no_wcard flag will be set.  If packet handlers
-	 * are sensitive to duplicate packets these skbs will need to
-	 * be dropped at the handler.
-	 */
-	null_or_orig = NULL;
 	orig_dev = skb->dev;
-	master = ACCESS_ONCE(orig_dev->master);
-	if (skb->deliver_no_wcard)
-		null_or_orig = orig_dev;
-	else if (master) {
-		if (skb_bond_should_drop(skb, master)) {
-			skb->deliver_no_wcard = 1;
-			null_or_orig = orig_dev; /* deliver only exact match */
-		} else
-			skb->dev = master;
-	}
 
-	__this_cpu_inc(softnet_data.processed);
 	skb_reset_network_header(skb);
 	skb_reset_transport_header(skb);
 	skb->mac_len = skb->network_header - skb->mac_header;
@@ -3064,6 +3153,10 @@ static int __netif_receive_skb(struct sk_buff *skb)
 
 	rcu_read_lock();
 
+another_round:
+
+	__this_cpu_inc(softnet_data.processed);
+
 #ifdef CONFIG_NET_CLS_ACT
 	if (skb->tc_verd & TC_NCLS) {
 		skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
@@ -3072,8 +3165,7 @@ static int __netif_receive_skb(struct sk_buff *skb)
 #endif
 
 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
-		if (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
-		    ptype->dev == orig_dev) {
+		if (!ptype->dev || ptype->dev == skb->dev) {
 			if (pt_prev)
 				ret = deliver_skb(skb, pt_prev, orig_dev);
 			pt_prev = ptype;
@@ -3087,16 +3179,20 @@ static int __netif_receive_skb(struct sk_buff *skb)
 ncls:
 #endif
 
-	/* Handle special case of bridge or macvlan */
 	rx_handler = rcu_dereference(skb->dev->rx_handler);
 	if (rx_handler) {
+		struct net_device *prev_dev;
+
 		if (pt_prev) {
 			ret = deliver_skb(skb, pt_prev, orig_dev);
 			pt_prev = NULL;
 		}
+		prev_dev = skb->dev;
 		skb = rx_handler(skb);
 		if (!skb)
 			goto out;
+		if (skb->dev != prev_dev)
+			goto another_round;
 	}
 
 	if (vlan_tx_tag_present(skb)) {
@@ -3111,24 +3207,17 @@ ncls:
 			goto out;
 	}
 
-	/*
-	 * Make sure frames received on VLAN interfaces stacked on
-	 * bonding interfaces still make their way to any base bonding
-	 * device that may have registered for a specific ptype.  The
-	 * handler may have to adjust skb->dev and orig_dev.
-	 */
-	orig_or_bond = orig_dev;
-	if ((skb->dev->priv_flags & IFF_802_1Q_VLAN) &&
-	    (vlan_dev_real_dev(skb->dev)->priv_flags & IFF_BONDING)) {
-		orig_or_bond = vlan_dev_real_dev(skb->dev);
-	}
+	vlan_on_bond_hook(skb);
+
+	/* deliver only exact match when indicated */
+	null_or_dev = skb->deliver_no_wcard ? skb->dev : NULL;
 
 	type = skb->protocol;
 	list_for_each_entry_rcu(ptype,
 			&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
-		if (ptype->type == type && (ptype->dev == null_or_orig ||
-		     ptype->dev == skb->dev || ptype->dev == orig_dev ||
-		     ptype->dev == orig_or_bond)) {
+		if (ptype->type == type &&
+		    (ptype->dev == null_or_dev || ptype->dev == skb->dev ||
+		     ptype->dev == orig_dev)) {
 			if (pt_prev)
 				ret = deliver_skb(skb, pt_prev, orig_dev);
 			pt_prev = ptype;
@@ -3925,12 +4014,15 @@ void *dev_seq_start(struct seq_file *seq, loff_t *pos)
 
 void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
-	struct net_device *dev = (v == SEQ_START_TOKEN) ?
-				  first_net_device(seq_file_net(seq)) :
-				  next_net_device((struct net_device *)v);
+	struct net_device *dev = v;
+
+	if (v == SEQ_START_TOKEN)
+		dev = first_net_device_rcu(seq_file_net(seq));
+	else
+		dev = next_net_device_rcu(dev);
 
 	++*pos;
-	return rcu_dereference(dev);
+	return dev;
 }
 
 void dev_seq_stop(struct seq_file *seq, void *v)
@@ -4214,15 +4306,14 @@ static int __init dev_proc_init(void)
 
 
 /**
- *	netdev_set_master	-	set up master/slave pair
+ *	netdev_set_master	-	set up master pointer
  *	@slave: slave device
  *	@master: new master device
  *
  *	Changes the master device of the slave. Pass %NULL to break the
  *	bonding. The caller must hold the RTNL semaphore. On a failure
  *	a negative errno code is returned. On success the reference counts
- *	are adjusted, %RTM_NEWLINK is sent to the routing socket and the
- *	function returns zero.
+ *	are adjusted and the function returns zero.
  */
 int netdev_set_master(struct net_device *slave, struct net_device *master)
 {
@@ -4242,6 +4333,29 @@ int netdev_set_master(struct net_device *slave, struct net_device *master)
 		synchronize_net();
 		dev_put(old);
 	}
+	return 0;
+}
+EXPORT_SYMBOL(netdev_set_master);
+
+/**
+ *	netdev_set_bond_master	-	set up bonding master/slave pair
+ *	@slave: slave device
+ *	@master: new master device
+ *
+ *	Changes the master device of the slave. Pass %NULL to break the
+ *	bonding. The caller must hold the RTNL semaphore. On a failure
+ *	a negative errno code is returned. On success %RTM_NEWLINK is sent
+ *	to the routing socket and the function returns zero.
+ */
+int netdev_set_bond_master(struct net_device *slave, struct net_device *master)
+{
+	int err;
+
+	ASSERT_RTNL();
+
+	err = netdev_set_master(slave, master);
+	if (err)
+		return err;
 	if (master)
 		slave->flags |= IFF_SLAVE;
 	else
@@ -4250,7 +4364,7 @@ int netdev_set_master(struct net_device *slave, struct net_device *master)
 	rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
 	return 0;
 }
-EXPORT_SYMBOL(netdev_set_master);
+EXPORT_SYMBOL(netdev_set_bond_master);
 
 static void dev_change_rx_flags(struct net_device *dev, int flags)
 {
@@ -4587,6 +4701,17 @@ int dev_set_mtu(struct net_device *dev, int new_mtu)
 EXPORT_SYMBOL(dev_set_mtu);
 
 /**
+ *	dev_set_group - Change group this device belongs to
+ *	@dev: device
+ *	@new_group: group this device should belong to
+ */
+void dev_set_group(struct net_device *dev, int new_group)
+{
+	dev->group = new_group;
+}
+EXPORT_SYMBOL(dev_set_group);
+
+/**
  *	dev_set_mac_address - Change Media Access Control Address
  *	@dev: device
  *	@sa: new address
@@ -5077,41 +5202,55 @@ static void rollback_registered(struct net_device *dev)
 	list_del(&single);
 }
 
-unsigned long netdev_fix_features(unsigned long features, const char *name)
+u32 netdev_fix_features(struct net_device *dev, u32 features)
 {
+	/* Fix illegal checksum combinations */
+	if ((features & NETIF_F_HW_CSUM) &&
+	    (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
+		netdev_info(dev, "mixed HW and IP checksum settings.\n");
+		features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
+	}
+
+	if ((features & NETIF_F_NO_CSUM) &&
+	    (features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
+		netdev_info(dev, "mixed no checksumming and other settings.\n");
+		features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM);
+	}
+
 	/* Fix illegal SG+CSUM combinations. */
 	if ((features & NETIF_F_SG) &&
 	    !(features & NETIF_F_ALL_CSUM)) {
-		if (name)
-			printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no "
-			       "checksum feature.\n", name);
+		netdev_info(dev,
+			    "Dropping NETIF_F_SG since no checksum feature.\n");
 		features &= ~NETIF_F_SG;
 	}
 
 	/* TSO requires that SG is present as well. */
 	if ((features & NETIF_F_TSO) && !(features & NETIF_F_SG)) {
-		if (name)
-			printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no "
-			       "SG feature.\n", name);
+		netdev_info(dev, "Dropping NETIF_F_TSO since no SG feature.\n");
 		features &= ~NETIF_F_TSO;
 	}
 
+	/* Software GSO depends on SG. */
+	if ((features & NETIF_F_GSO) && !(features & NETIF_F_SG)) {
+		netdev_info(dev, "Dropping NETIF_F_GSO since no SG feature.\n");
+		features &= ~NETIF_F_GSO;
+	}
+
+	/* UFO needs SG and checksumming */
 	if (features & NETIF_F_UFO) {
 		/* maybe split UFO into V4 and V6? */
 		if (!((features & NETIF_F_GEN_CSUM) ||
 		    (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))
 			    == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
-			if (name)
-				printk(KERN_ERR "%s: Dropping NETIF_F_UFO "
-				       "since no checksum offload features.\n",
-				       name);
+			netdev_info(dev,
+				"Dropping NETIF_F_UFO since no checksum offload features.\n");
 			features &= ~NETIF_F_UFO;
 		}
 
 		if (!(features & NETIF_F_SG)) {
-			if (name)
-				printk(KERN_ERR "%s: Dropping NETIF_F_UFO "
-				       "since no NETIF_F_SG feature.\n", name);
+			netdev_info(dev,
+				"Dropping NETIF_F_UFO since no NETIF_F_SG feature.\n");
 			features &= ~NETIF_F_UFO;
 		}
 	}
@@ -5120,6 +5259,37 @@ unsigned long netdev_fix_features(unsigned long features, const char *name)
 }
 EXPORT_SYMBOL(netdev_fix_features);
 
+void netdev_update_features(struct net_device *dev)
+{
+	u32 features;
+	int err = 0;
+
+	features = netdev_get_wanted_features(dev);
+
+	if (dev->netdev_ops->ndo_fix_features)
+		features = dev->netdev_ops->ndo_fix_features(dev, features);
+
+	/* driver might be less strict about feature dependencies */
+	features = netdev_fix_features(dev, features);
+
+	if (dev->features == features)
+		return;
+
+	netdev_info(dev, "Features changed: 0x%08x -> 0x%08x\n",
+		dev->features, features);
+
+	if (dev->netdev_ops->ndo_set_features)
+		err = dev->netdev_ops->ndo_set_features(dev, features);
+
+	if (!err)
+		dev->features = features;
+	else if (err < 0)
+		netdev_err(dev,
+			"set_features() failed (%d); wanted 0x%08x, left 0x%08x\n",
+			err, features, dev->features);
+}
+EXPORT_SYMBOL(netdev_update_features);
+
 /**
  *	netif_stacked_transfer_operstate -	transfer operstate
  *	@rootdev: the root or lower level device to transfer state from
@@ -5254,27 +5424,19 @@ int register_netdevice(struct net_device *dev)
 	if (dev->iflink == -1)
 		dev->iflink = dev->ifindex;
 
-	/* Fix illegal checksum combinations */
-	if ((dev->features & NETIF_F_HW_CSUM) &&
-	    (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
-		printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n",
-		       dev->name);
-		dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
-	}
+	/* Transfer changeable features to wanted_features and enable
+	 * software offloads (GSO and GRO).
+	 */
+	dev->hw_features |= NETIF_F_SOFT_FEATURES;
+	dev->features |= NETIF_F_SOFT_FEATURES;
+	dev->wanted_features = dev->features & dev->hw_features;
 
-	if ((dev->features & NETIF_F_NO_CSUM) &&
-	    (dev->features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
-		printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n",
-		       dev->name);
-		dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM);
+	/* Avoid warning from netdev_fix_features() for GSO without SG */
+	if (!(dev->wanted_features & NETIF_F_SG)) {
+		dev->wanted_features &= ~NETIF_F_GSO;
+		dev->features &= ~NETIF_F_GSO;
 	}
 
-	dev->features = netdev_fix_features(dev->features, dev->name);
-
-	/* Enable software GSO if SG is supported. */
-	if (dev->features & NETIF_F_SG)
-		dev->features |= NETIF_F_GSO;
-
 	/* Enable GRO and NETIF_F_HIGHDMA for vlans by default,
 	 * vlan_dev_init() will do the dev->features check, so these features
 	 * are enabled only if supported by underlying device.
@@ -5291,6 +5453,8 @@ int register_netdevice(struct net_device *dev)
 		goto err_uninit;
 	dev->reg_state = NETREG_REGISTERED;
 
+	netdev_update_features(dev);
+
 	/*
 	 *	Default initial state at registry is that the
 	 *	device is present.
@@ -5695,6 +5859,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
 #endif
 
 	strcpy(dev->name, name);
+	dev->group = INIT_NETDEV_GROUP;
 	return dev;
 
 free_all:
@@ -6009,8 +6174,7 @@ static int dev_cpu_callback(struct notifier_block *nfb,
  *	@one to the master device with current feature set @all.  Will not
  *	enable anything that is off in @mask. Returns the new feature set.
  */
-unsigned long netdev_increment_features(unsigned long all, unsigned long one,
-					unsigned long mask)
+u32 netdev_increment_features(u32 all, u32 one, u32 mask)
 {
 	/* If device needs checksumming, downgrade to it. */
 	if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM))
diff --git a/net/core/dst.c b/net/core/dst.c
index b99c7c7..91104d3 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -164,7 +164,9 @@ int dst_discard(struct sk_buff *skb)
 }
 EXPORT_SYMBOL(dst_discard);
 
-void *dst_alloc(struct dst_ops *ops)
+const u32 dst_default_metrics[RTAX_MAX];
+
+void *dst_alloc(struct dst_ops *ops, int initial_ref)
 {
 	struct dst_entry *dst;
 
@@ -175,11 +177,12 @@ void *dst_alloc(struct dst_ops *ops)
 	dst = kmem_cache_zalloc(ops->kmem_cachep, GFP_ATOMIC);
 	if (!dst)
 		return NULL;
-	atomic_set(&dst->__refcnt, 0);
+	atomic_set(&dst->__refcnt, initial_ref);
 	dst->ops = ops;
 	dst->lastuse = jiffies;
 	dst->path = dst;
 	dst->input = dst->output = dst_discard;
+	dst_init_metrics(dst, dst_default_metrics, true);
 #if RT_CACHE_DEBUG >= 2
 	atomic_inc(&dst_total);
 #endif
@@ -282,6 +285,42 @@ void dst_release(struct dst_entry *dst)
 }
 EXPORT_SYMBOL(dst_release);
 
+u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old)
+{
+	u32 *p = kmalloc(sizeof(u32) * RTAX_MAX, GFP_ATOMIC);
+
+	if (p) {
+		u32 *old_p = __DST_METRICS_PTR(old);
+		unsigned long prev, new;
+
+		memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
+
+		new = (unsigned long) p;
+		prev = cmpxchg(&dst->_metrics, old, new);
+
+		if (prev != old) {
+			kfree(p);
+			p = __DST_METRICS_PTR(prev);
+			if (prev & DST_METRICS_READ_ONLY)
+				p = NULL;
+		}
+	}
+	return p;
+}
+EXPORT_SYMBOL(dst_cow_metrics_generic);
+
+/* Caller asserts that dst_metrics_read_only(dst) is false.  */
+void __dst_destroy_metrics_generic(struct dst_entry *dst, unsigned long old)
+{
+	unsigned long prev, new;
+
+	new = (unsigned long) dst_default_metrics;
+	prev = cmpxchg(&dst->_metrics, old, new);
+	if (prev == old)
+		kfree(__DST_METRICS_PTR(old));
+}
+EXPORT_SYMBOL(__dst_destroy_metrics_generic);
+
 /**
  * skb_dst_set_noref - sets skb dst, without a reference
  * @skb: buffer
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index ff23029..c1a71bb 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -34,12 +34,6 @@ u32 ethtool_op_get_link(struct net_device *dev)
 }
 EXPORT_SYMBOL(ethtool_op_get_link);
 
-u32 ethtool_op_get_rx_csum(struct net_device *dev)
-{
-	return (dev->features & NETIF_F_ALL_CSUM) != 0;
-}
-EXPORT_SYMBOL(ethtool_op_get_rx_csum);
-
 u32 ethtool_op_get_tx_csum(struct net_device *dev)
 {
 	return (dev->features & NETIF_F_ALL_CSUM) != 0;
@@ -55,6 +49,7 @@ int ethtool_op_set_tx_csum(struct net_device *dev, u32 data)
 
 	return 0;
 }
+EXPORT_SYMBOL(ethtool_op_set_tx_csum);
 
 int ethtool_op_set_tx_hw_csum(struct net_device *dev, u32 data)
 {
@@ -171,6 +166,381 @@ EXPORT_SYMBOL(ethtool_ntuple_flush);
 
 /* Handlers for each ethtool command */
 
+#define ETHTOOL_DEV_FEATURE_WORDS	1
+
+static void ethtool_get_features_compat(struct net_device *dev,
+	struct ethtool_get_features_block *features)
+{
+	if (!dev->ethtool_ops)
+		return;
+
+	/* getting RX checksum */
+	if (dev->ethtool_ops->get_rx_csum)
+		if (dev->ethtool_ops->get_rx_csum(dev))
+			features[0].active |= NETIF_F_RXCSUM;
+
+	/* mark legacy-changeable features */
+	if (dev->ethtool_ops->set_sg)
+		features[0].available |= NETIF_F_SG;
+	if (dev->ethtool_ops->set_tx_csum)
+		features[0].available |= NETIF_F_ALL_CSUM;
+	if (dev->ethtool_ops->set_tso)
+		features[0].available |= NETIF_F_ALL_TSO;
+	if (dev->ethtool_ops->set_rx_csum)
+		features[0].available |= NETIF_F_RXCSUM;
+	if (dev->ethtool_ops->set_flags)
+		features[0].available |= flags_dup_features;
+}
+
+static int ethtool_set_feature_compat(struct net_device *dev,
+	int (*legacy_set)(struct net_device *, u32),
+	struct ethtool_set_features_block *features, u32 mask)
+{
+	u32 do_set;
+
+	if (!legacy_set)
+		return 0;
+
+	if (!(features[0].valid & mask))
+		return 0;
+
+	features[0].valid &= ~mask;
+
+	do_set = !!(features[0].requested & mask);
+
+	if (legacy_set(dev, do_set) < 0)
+		netdev_info(dev,
+			"Legacy feature change (%s) failed for 0x%08x\n",
+			do_set ? "set" : "clear", mask);
+
+	return 1;
+}
+
+static int ethtool_set_features_compat(struct net_device *dev,
+	struct ethtool_set_features_block *features)
+{
+	int compat;
+
+	if (!dev->ethtool_ops)
+		return 0;
+
+	compat  = ethtool_set_feature_compat(dev, dev->ethtool_ops->set_sg,
+		features, NETIF_F_SG);
+	compat |= ethtool_set_feature_compat(dev, dev->ethtool_ops->set_tx_csum,
+		features, NETIF_F_ALL_CSUM);
+	compat |= ethtool_set_feature_compat(dev, dev->ethtool_ops->set_tso,
+		features, NETIF_F_ALL_TSO);
+	compat |= ethtool_set_feature_compat(dev, dev->ethtool_ops->set_rx_csum,
+		features, NETIF_F_RXCSUM);
+	compat |= ethtool_set_feature_compat(dev, dev->ethtool_ops->set_flags,
+		features, flags_dup_features);
+
+	return compat;
+}
+
+static int ethtool_get_features(struct net_device *dev, void __user *useraddr)
+{
+	struct ethtool_gfeatures cmd = {
+		.cmd = ETHTOOL_GFEATURES,
+		.size = ETHTOOL_DEV_FEATURE_WORDS,
+	};
+	struct ethtool_get_features_block features[ETHTOOL_DEV_FEATURE_WORDS] = {
+		{
+			.available = dev->hw_features,
+			.requested = dev->wanted_features,
+			.active = dev->features,
+			.never_changed = NETIF_F_NEVER_CHANGE,
+		},
+	};
+	u32 __user *sizeaddr;
+	u32 copy_size;
+
+	ethtool_get_features_compat(dev, features);
+
+	sizeaddr = useraddr + offsetof(struct ethtool_gfeatures, size);
+	if (get_user(copy_size, sizeaddr))
+		return -EFAULT;
+
+	if (copy_size > ETHTOOL_DEV_FEATURE_WORDS)
+		copy_size = ETHTOOL_DEV_FEATURE_WORDS;
+
+	if (copy_to_user(useraddr, &cmd, sizeof(cmd)))
+		return -EFAULT;
+	useraddr += sizeof(cmd);
+	if (copy_to_user(useraddr, features, copy_size * sizeof(*features)))
+		return -EFAULT;
+
+	return 0;
+}
+
+static int ethtool_set_features(struct net_device *dev, void __user *useraddr)
+{
+	struct ethtool_sfeatures cmd;
+	struct ethtool_set_features_block features[ETHTOOL_DEV_FEATURE_WORDS];
+	int ret = 0;
+
+	if (copy_from_user(&cmd, useraddr, sizeof(cmd)))
+		return -EFAULT;
+	useraddr += sizeof(cmd);
+
+	if (cmd.size != ETHTOOL_DEV_FEATURE_WORDS)
+		return -EINVAL;
+
+	if (copy_from_user(features, useraddr, sizeof(features)))
+		return -EFAULT;
+
+	if (features[0].valid & ~NETIF_F_ETHTOOL_BITS)
+		return -EINVAL;
+
+	if (ethtool_set_features_compat(dev, features))
+		ret |= ETHTOOL_F_COMPAT;
+
+	if (features[0].valid & ~dev->hw_features) {
+		features[0].valid &= dev->hw_features;
+		ret |= ETHTOOL_F_UNSUPPORTED;
+	}
+
+	dev->wanted_features &= ~features[0].valid;
+	dev->wanted_features |= features[0].valid & features[0].requested;
+	netdev_update_features(dev);
+
+	if ((dev->wanted_features ^ dev->features) & features[0].valid)
+		ret |= ETHTOOL_F_WISH;
+
+	return ret;
+}
+
+static const char netdev_features_strings[ETHTOOL_DEV_FEATURE_WORDS * 32][ETH_GSTRING_LEN] = {
+	/* NETIF_F_SG */              "tx-scatter-gather",
+	/* NETIF_F_IP_CSUM */         "tx-checksum-ipv4",
+	/* NETIF_F_NO_CSUM */         "tx-checksum-unneeded",
+	/* NETIF_F_HW_CSUM */         "tx-checksum-ip-generic",
+	/* NETIF_F_IPV6_CSUM */       "tx_checksum-ipv6",
+	/* NETIF_F_HIGHDMA */         "highdma",
+	/* NETIF_F_FRAGLIST */        "tx-scatter-gather-fraglist",
+	/* NETIF_F_HW_VLAN_TX */      "tx-vlan-hw-insert",
+
+	/* NETIF_F_HW_VLAN_RX */      "rx-vlan-hw-parse",
+	/* NETIF_F_HW_VLAN_FILTER */  "rx-vlan-filter",
+	/* NETIF_F_VLAN_CHALLENGED */ "vlan-challenged",
+	/* NETIF_F_GSO */             "tx-generic-segmentation",
+	/* NETIF_F_LLTX */            "tx-lockless",
+	/* NETIF_F_NETNS_LOCAL */     "netns-local",
+	/* NETIF_F_GRO */             "rx-gro",
+	/* NETIF_F_LRO */             "rx-lro",
+
+	/* NETIF_F_TSO */             "tx-tcp-segmentation",
+	/* NETIF_F_UFO */             "tx-udp-fragmentation",
+	/* NETIF_F_GSO_ROBUST */      "tx-gso-robust",
+	/* NETIF_F_TSO_ECN */         "tx-tcp-ecn-segmentation",
+	/* NETIF_F_TSO6 */            "tx-tcp6-segmentation",
+	/* NETIF_F_FSO */             "tx-fcoe-segmentation",
+	"",
+	"",
+
+	/* NETIF_F_FCOE_CRC */        "tx-checksum-fcoe-crc",
+	/* NETIF_F_SCTP_CSUM */       "tx-checksum-sctp",
+	/* NETIF_F_FCOE_MTU */        "fcoe-mtu",
+	/* NETIF_F_NTUPLE */          "rx-ntuple-filter",
+	/* NETIF_F_RXHASH */          "rx-hashing",
+	/* NETIF_F_RXCSUM */          "rx-checksum",
+	"",
+	"",
+};
+
+static int __ethtool_get_sset_count(struct net_device *dev, int sset)
+{
+	const struct ethtool_ops *ops = dev->ethtool_ops;
+
+	if (sset == ETH_SS_FEATURES)
+		return ARRAY_SIZE(netdev_features_strings);
+
+	if (ops && ops->get_sset_count && ops->get_strings)
+		return ops->get_sset_count(dev, sset);
+	else
+		return -EOPNOTSUPP;
+}
+
+static void __ethtool_get_strings(struct net_device *dev,
+	u32 stringset, u8 *data)
+{
+	const struct ethtool_ops *ops = dev->ethtool_ops;
+
+	if (stringset == ETH_SS_FEATURES)
+		memcpy(data, netdev_features_strings,
+			sizeof(netdev_features_strings));
+	else
+		/* ops->get_strings is valid because checked earlier */
+		ops->get_strings(dev, stringset, data);
+}
+
+static u32 ethtool_get_feature_mask(u32 eth_cmd)
+{
+	/* feature masks of legacy discrete ethtool ops */
+
+	switch (eth_cmd) {
+	case ETHTOOL_GTXCSUM:
+	case ETHTOOL_STXCSUM:
+		return NETIF_F_ALL_CSUM | NETIF_F_SCTP_CSUM;
+	case ETHTOOL_GRXCSUM:
+	case ETHTOOL_SRXCSUM:
+		return NETIF_F_RXCSUM;
+	case ETHTOOL_GSG:
+	case ETHTOOL_SSG:
+		return NETIF_F_SG;
+	case ETHTOOL_GTSO:
+	case ETHTOOL_STSO:
+		return NETIF_F_ALL_TSO;
+	case ETHTOOL_GUFO:
+	case ETHTOOL_SUFO:
+		return NETIF_F_UFO;
+	case ETHTOOL_GGSO:
+	case ETHTOOL_SGSO:
+		return NETIF_F_GSO;
+	case ETHTOOL_GGRO:
+	case ETHTOOL_SGRO:
+		return NETIF_F_GRO;
+	default:
+		BUG();
+	}
+}
+
+static void *__ethtool_get_one_feature_actor(struct net_device *dev, u32 ethcmd)
+{
+	const struct ethtool_ops *ops = dev->ethtool_ops;
+
+	if (!ops)
+		return NULL;
+
+	switch (ethcmd) {
+	case ETHTOOL_GTXCSUM:
+		return ops->get_tx_csum;
+	case ETHTOOL_GRXCSUM:
+		return ops->get_rx_csum;
+	case ETHTOOL_SSG:
+		return ops->get_sg;
+	case ETHTOOL_STSO:
+		return ops->get_tso;
+	case ETHTOOL_SUFO:
+		return ops->get_ufo;
+	default:
+		return NULL;
+	}
+}
+
+static u32 __ethtool_get_rx_csum_oldbug(struct net_device *dev)
+{
+	return !!(dev->features & NETIF_F_ALL_CSUM);
+}
+
+static int ethtool_get_one_feature(struct net_device *dev,
+	char __user *useraddr, u32 ethcmd)
+{
+	u32 mask = ethtool_get_feature_mask(ethcmd);
+	struct ethtool_value edata = {
+		.cmd = ethcmd,
+		.data = !!(dev->features & mask),
+	};
+
+	/* compatibility with discrete get_ ops */
+	if (!(dev->hw_features & mask)) {
+		u32 (*actor)(struct net_device *);
+
+		actor = __ethtool_get_one_feature_actor(dev, ethcmd);
+
+		/* bug compatibility with old get_rx_csum */
+		if (ethcmd == ETHTOOL_GRXCSUM && !actor)
+			actor = __ethtool_get_rx_csum_oldbug;
+
+		if (actor)
+			edata.data = actor(dev);
+	}
+
+	if (copy_to_user(useraddr, &edata, sizeof(edata)))
+		return -EFAULT;
+	return 0;
+}
+
+static int __ethtool_set_tx_csum(struct net_device *dev, u32 data);
+static int __ethtool_set_rx_csum(struct net_device *dev, u32 data);
+static int __ethtool_set_sg(struct net_device *dev, u32 data);
+static int __ethtool_set_tso(struct net_device *dev, u32 data);
+static int __ethtool_set_ufo(struct net_device *dev, u32 data);
+
+static int ethtool_set_one_feature(struct net_device *dev,
+	void __user *useraddr, u32 ethcmd)
+{
+	struct ethtool_value edata;
+	u32 mask;
+
+	if (copy_from_user(&edata, useraddr, sizeof(edata)))
+		return -EFAULT;
+
+	mask = ethtool_get_feature_mask(ethcmd);
+	mask &= dev->hw_features;
+	if (mask) {
+		if (edata.data)
+			dev->wanted_features |= mask;
+		else
+			dev->wanted_features &= ~mask;
+
+		netdev_update_features(dev);
+		return 0;
+	}
+
+	/* Driver is not converted to ndo_fix_features or does not
+	 * support changing this offload. In the latter case it won't
+	 * have corresponding ethtool_ops field set.
+	 *
+	 * Following part is to be removed after all drivers advertise
+	 * their changeable features in netdev->hw_features and stop
+	 * using discrete offload setting ops.
+	 */
+
+	switch (ethcmd) {
+	case ETHTOOL_STXCSUM:
+		return __ethtool_set_tx_csum(dev, edata.data);
+	case ETHTOOL_SRXCSUM:
+		return __ethtool_set_rx_csum(dev, edata.data);
+	case ETHTOOL_SSG:
+		return __ethtool_set_sg(dev, edata.data);
+	case ETHTOOL_STSO:
+		return __ethtool_set_tso(dev, edata.data);
+	case ETHTOOL_SUFO:
+		return __ethtool_set_ufo(dev, edata.data);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int __ethtool_set_flags(struct net_device *dev, u32 data)
+{
+	u32 changed;
+
+	if (data & ~flags_dup_features)
+		return -EINVAL;
+
+	/* legacy set_flags() op */
+	if (dev->ethtool_ops->set_flags) {
+		if (unlikely(dev->hw_features & flags_dup_features))
+			netdev_warn(dev,
+				"driver BUG: mixed hw_features and set_flags()\n");
+		return dev->ethtool_ops->set_flags(dev, data);
+	}
+
+	/* allow changing only bits set in hw_features */
+	changed = (data ^ dev->wanted_features) & flags_dup_features;
+	if (changed & ~dev->hw_features)
+		return (changed & dev->hw_features) ? -EINVAL : -EOPNOTSUPP;
+
+	dev->wanted_features =
+		(dev->wanted_features & ~changed) | data;
+
+	netdev_update_features(dev);
+
+	return 0;
+}
+
 static int ethtool_get_settings(struct net_device *dev, void __user *useraddr)
 {
 	struct ethtool_cmd cmd = { .cmd = ETHTOOL_GSET };
@@ -251,14 +621,10 @@ static noinline_for_stack int ethtool_get_sset_info(struct net_device *dev,
 						    void __user *useraddr)
 {
 	struct ethtool_sset_info info;
-	const struct ethtool_ops *ops = dev->ethtool_ops;
 	u64 sset_mask;
 	int i, idx = 0, n_bits = 0, ret, rc;
 	u32 *info_buf = NULL;
 
-	if (!ops->get_sset_count)
-		return -EOPNOTSUPP;
-
 	if (copy_from_user(&info, useraddr, sizeof(info)))
 		return -EFAULT;
 
@@ -285,7 +651,7 @@ static noinline_for_stack int ethtool_get_sset_info(struct net_device *dev,
 		if (!(sset_mask & (1ULL << i)))
 			continue;
 
-		rc = ops->get_sset_count(dev, i);
+		rc = __ethtool_get_sset_count(dev, i);
 		if (rc >= 0) {
 			info.sset_mask |= (1ULL << i);
 			info_buf[idx++] = rc;
@@ -1091,6 +1457,9 @@ static int __ethtool_set_sg(struct net_device *dev, u32 data)
 {
 	int err;
 
+	if (data && !(dev->features & NETIF_F_ALL_CSUM))
+		return -EINVAL;
+
 	if (!data && dev->ethtool_ops->set_tso) {
 		err = dev->ethtool_ops->set_tso(dev, 0);
 		if (err)
@@ -1105,145 +1474,55 @@ static int __ethtool_set_sg(struct net_device *dev, u32 data)
 	return dev->ethtool_ops->set_sg(dev, data);
 }
 
-static int ethtool_set_tx_csum(struct net_device *dev, char __user *useraddr)
+static int __ethtool_set_tx_csum(struct net_device *dev, u32 data)
 {
-	struct ethtool_value edata;
 	int err;
 
 	if (!dev->ethtool_ops->set_tx_csum)
 		return -EOPNOTSUPP;
 
-	if (copy_from_user(&edata, useraddr, sizeof(edata)))
-		return -EFAULT;
-
-	if (!edata.data && dev->ethtool_ops->set_sg) {
+	if (!data && dev->ethtool_ops->set_sg) {
 		err = __ethtool_set_sg(dev, 0);
 		if (err)
 			return err;
 	}
 
-	return dev->ethtool_ops->set_tx_csum(dev, edata.data);
+	return dev->ethtool_ops->set_tx_csum(dev, data);
 }
-EXPORT_SYMBOL(ethtool_op_set_tx_csum);
 
-static int ethtool_set_rx_csum(struct net_device *dev, char __user *useraddr)
+static int __ethtool_set_rx_csum(struct net_device *dev, u32 data)
 {
-	struct ethtool_value edata;
-
 	if (!dev->ethtool_ops->set_rx_csum)
 		return -EOPNOTSUPP;
 
-	if (copy_from_user(&edata, useraddr, sizeof(edata)))
-		return -EFAULT;
-
-	if (!edata.data && dev->ethtool_ops->set_sg)
+	if (!data)
 		dev->features &= ~NETIF_F_GRO;
 
-	return dev->ethtool_ops->set_rx_csum(dev, edata.data);
+	return dev->ethtool_ops->set_rx_csum(dev, data);
 }
 
-static int ethtool_set_sg(struct net_device *dev, char __user *useraddr)
+static int __ethtool_set_tso(struct net_device *dev, u32 data)
 {
-	struct ethtool_value edata;
-
-	if (!dev->ethtool_ops->set_sg)
-		return -EOPNOTSUPP;
-
-	if (copy_from_user(&edata, useraddr, sizeof(edata)))
-		return -EFAULT;
-
-	if (edata.data &&
-	    !(dev->features & NETIF_F_ALL_CSUM))
-		return -EINVAL;
-
-	return __ethtool_set_sg(dev, edata.data);
-}
-
-static int ethtool_set_tso(struct net_device *dev, char __user *useraddr)
-{
-	struct ethtool_value edata;
-
 	if (!dev->ethtool_ops->set_tso)
 		return -EOPNOTSUPP;
 
-	if (copy_from_user(&edata, useraddr, sizeof(edata)))
-		return -EFAULT;
-
-	if (edata.data && !(dev->features & NETIF_F_SG))
+	if (data && !(dev->features & NETIF_F_SG))
 		return -EINVAL;
 
-	return dev->ethtool_ops->set_tso(dev, edata.data);
+	return dev->ethtool_ops->set_tso(dev, data);
 }
 
-static int ethtool_set_ufo(struct net_device *dev, char __user *useraddr)
+static int __ethtool_set_ufo(struct net_device *dev, u32 data)
 {
-	struct ethtool_value edata;
-
 	if (!dev->ethtool_ops->set_ufo)
 		return -EOPNOTSUPP;
-	if (copy_from_user(&edata, useraddr, sizeof(edata)))
-		return -EFAULT;
-	if (edata.data && !(dev->features & NETIF_F_SG))
+	if (data && !(dev->features & NETIF_F_SG))
 		return -EINVAL;
-	if (edata.data && !((dev->features & NETIF_F_GEN_CSUM) ||
+	if (data && !((dev->features & NETIF_F_GEN_CSUM) ||
 		(dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))
 			== (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM)))
 		return -EINVAL;
-	return dev->ethtool_ops->set_ufo(dev, edata.data);
-}
-
-static int ethtool_get_gso(struct net_device *dev, char __user *useraddr)
-{
-	struct ethtool_value edata = { ETHTOOL_GGSO };
-
-	edata.data = dev->features & NETIF_F_GSO;
-	if (copy_to_user(useraddr, &edata, sizeof(edata)))
-		return -EFAULT;
-	return 0;
-}
-
-static int ethtool_set_gso(struct net_device *dev, char __user *useraddr)
-{
-	struct ethtool_value edata;
-
-	if (copy_from_user(&edata, useraddr, sizeof(edata)))
-		return -EFAULT;
-	if (edata.data)
-		dev->features |= NETIF_F_GSO;
-	else
-		dev->features &= ~NETIF_F_GSO;
-	return 0;
-}
-
-static int ethtool_get_gro(struct net_device *dev, char __user *useraddr)
-{
-	struct ethtool_value edata = { ETHTOOL_GGRO };
-
-	edata.data = dev->features & NETIF_F_GRO;
-	if (copy_to_user(useraddr, &edata, sizeof(edata)))
-		return -EFAULT;
-	return 0;
-}
-
-static int ethtool_set_gro(struct net_device *dev, char __user *useraddr)
-{
-	struct ethtool_value edata;
-
-	if (copy_from_user(&edata, useraddr, sizeof(edata)))
-		return -EFAULT;
-
-	if (edata.data) {
-		u32 rxcsum = dev->ethtool_ops->get_rx_csum ?
-				dev->ethtool_ops->get_rx_csum(dev) :
-				ethtool_op_get_rx_csum(dev);
-
-		if (!rxcsum)
-			return -EINVAL;
-		dev->features |= NETIF_F_GRO;
-	} else
-		dev->features &= ~NETIF_F_GRO;
-
-	return 0;
+	return dev->ethtool_ops->set_ufo(dev, data);
 }
 
 static int ethtool_self_test(struct net_device *dev, char __user *useraddr)
@@ -1287,17 +1566,13 @@ static int ethtool_self_test(struct net_device *dev, char __user *useraddr)
 static int ethtool_get_strings(struct net_device *dev, void __user *useraddr)
 {
 	struct ethtool_gstrings gstrings;
-	const struct ethtool_ops *ops = dev->ethtool_ops;
 	u8 *data;
 	int ret;
 
-	if (!ops->get_strings || !ops->get_sset_count)
-		return -EOPNOTSUPP;
-
 	if (copy_from_user(&gstrings, useraddr, sizeof(gstrings)))
 		return -EFAULT;
 
-	ret = ops->get_sset_count(dev, gstrings.string_set);
+	ret = __ethtool_get_sset_count(dev, gstrings.string_set);
 	if (ret < 0)
 		return ret;
 
@@ -1307,7 +1582,7 @@ static int ethtool_get_strings(struct net_device *dev, void __user *useraddr)
 	if (!data)
 		return -ENOMEM;
 
-	ops->get_strings(dev, gstrings.string_set, data);
+	__ethtool_get_strings(dev, gstrings.string_set, data);
 
 	ret = -EFAULT;
 	if (copy_to_user(useraddr, &gstrings, sizeof(gstrings)))
@@ -1317,7 +1592,7 @@ static int ethtool_get_strings(struct net_device *dev, void __user *useraddr)
 		goto out;
 	ret = 0;
 
- out:
+out:
 	kfree(data);
 	return ret;
 }
@@ -1458,7 +1733,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 	void __user *useraddr = ifr->ifr_data;
 	u32 ethcmd;
 	int rc;
-	unsigned long old_features;
+	u32 old_features;
 
 	if (!dev || !netif_device_present(dev))
 		return -ENODEV;
@@ -1500,6 +1775,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 	case ETHTOOL_GRXCLSRLCNT:
 	case ETHTOOL_GRXCLSRULE:
 	case ETHTOOL_GRXCLSRLALL:
+	case ETHTOOL_GFEATURES:
 		break;
 	default:
 		if (!capable(CAP_NET_ADMIN))
@@ -1570,42 +1846,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 	case ETHTOOL_SPAUSEPARAM:
 		rc = ethtool_set_pauseparam(dev, useraddr);
 		break;
-	case ETHTOOL_GRXCSUM:
-		rc = ethtool_get_value(dev, useraddr, ethcmd,
-				       (dev->ethtool_ops->get_rx_csum ?
-					dev->ethtool_ops->get_rx_csum :
-					ethtool_op_get_rx_csum));
-		break;
-	case ETHTOOL_SRXCSUM:
-		rc = ethtool_set_rx_csum(dev, useraddr);
-		break;
-	case ETHTOOL_GTXCSUM:
-		rc = ethtool_get_value(dev, useraddr, ethcmd,
-				       (dev->ethtool_ops->get_tx_csum ?
-					dev->ethtool_ops->get_tx_csum :
-					ethtool_op_get_tx_csum));
-		break;
-	case ETHTOOL_STXCSUM:
-		rc = ethtool_set_tx_csum(dev, useraddr);
-		break;
-	case ETHTOOL_GSG:
-		rc = ethtool_get_value(dev, useraddr, ethcmd,
-				       (dev->ethtool_ops->get_sg ?
-					dev->ethtool_ops->get_sg :
-					ethtool_op_get_sg));
-		break;
-	case ETHTOOL_SSG:
-		rc = ethtool_set_sg(dev, useraddr);
-		break;
-	case ETHTOOL_GTSO:
-		rc = ethtool_get_value(dev, useraddr, ethcmd,
-				       (dev->ethtool_ops->get_tso ?
-					dev->ethtool_ops->get_tso :
-					ethtool_op_get_tso));
-		break;
-	case ETHTOOL_STSO:
-		rc = ethtool_set_tso(dev, useraddr);
-		break;
 	case ETHTOOL_TEST:
 		rc = ethtool_self_test(dev, useraddr);
 		break;
@@ -1621,21 +1861,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 	case ETHTOOL_GPERMADDR:
 		rc = ethtool_get_perm_addr(dev, useraddr);
 		break;
-	case ETHTOOL_GUFO:
-		rc = ethtool_get_value(dev, useraddr, ethcmd,
-				       (dev->ethtool_ops->get_ufo ?
-					dev->ethtool_ops->get_ufo :
-					ethtool_op_get_ufo));
-		break;
-	case ETHTOOL_SUFO:
-		rc = ethtool_set_ufo(dev, useraddr);
-		break;
-	case ETHTOOL_GGSO:
-		rc = ethtool_get_gso(dev, useraddr);
-		break;
-	case ETHTOOL_SGSO:
-		rc = ethtool_set_gso(dev, useraddr);
-		break;
 	case ETHTOOL_GFLAGS:
 		rc = ethtool_get_value(dev, useraddr, ethcmd,
 				       (dev->ethtool_ops->get_flags ?
@@ -1643,8 +1868,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 					ethtool_op_get_flags));
 		break;
 	case ETHTOOL_SFLAGS:
-		rc = ethtool_set_value(dev, useraddr,
-				       dev->ethtool_ops->set_flags);
+		rc = ethtool_set_value(dev, useraddr, __ethtool_set_flags);
 		break;
 	case ETHTOOL_GPFLAGS:
 		rc = ethtool_get_value(dev, useraddr, ethcmd,
@@ -1666,12 +1890,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 	case ETHTOOL_SRXCLSRLINS:
 		rc = ethtool_set_rxnfc(dev, ethcmd, useraddr);
 		break;
-	case ETHTOOL_GGRO:
-		rc = ethtool_get_gro(dev, useraddr);
-		break;
-	case ETHTOOL_SGRO:
-		rc = ethtool_set_gro(dev, useraddr);
-		break;
 	case ETHTOOL_FLASHDEV:
 		rc = ethtool_flash_device(dev, useraddr);
 		break;
@@ -1693,6 +1911,30 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 	case ETHTOOL_SRXFHINDIR:
 		rc = ethtool_set_rxfh_indir(dev, useraddr);
 		break;
+	case ETHTOOL_GFEATURES:
+		rc = ethtool_get_features(dev, useraddr);
+		break;
+	case ETHTOOL_SFEATURES:
+		rc = ethtool_set_features(dev, useraddr);
+		break;
+	case ETHTOOL_GTXCSUM:
+	case ETHTOOL_GRXCSUM:
+	case ETHTOOL_GSG:
+	case ETHTOOL_GTSO:
+	case ETHTOOL_GUFO:
+	case ETHTOOL_GGSO:
+	case ETHTOOL_GGRO:
+		rc = ethtool_get_one_feature(dev, useraddr, ethcmd);
+		break;
+	case ETHTOOL_STXCSUM:
+	case ETHTOOL_SRXCSUM:
+	case ETHTOOL_SSG:
+	case ETHTOOL_STSO:
+	case ETHTOOL_SUFO:
+	case ETHTOOL_SGSO:
+	case ETHTOOL_SGRO:
+		rc = ethtool_set_one_feature(dev, useraddr, ethcmd);
+		break;
 	default:
 		rc = -EOPNOTSUPP;
 	}
diff --git a/net/core/filter.c b/net/core/filter.c
index afc5837..232b187 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -142,14 +142,14 @@ int sk_filter(struct sock *sk, struct sk_buff *skb)
 	if (err)
 		return err;
 
-	rcu_read_lock_bh();
-	filter = rcu_dereference_bh(sk->sk_filter);
+	rcu_read_lock();
+	filter = rcu_dereference(sk->sk_filter);
 	if (filter) {
 		unsigned int pkt_len = sk_run_filter(skb, filter->insns);
 
 		err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM;
 	}
-	rcu_read_unlock_bh();
+	rcu_read_unlock();
 
 	return err;
 }
diff --git a/net/core/flow.c b/net/core/flow.c
index 127c8a7..990703b8 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -172,9 +172,9 @@ static void flow_new_hash_rnd(struct flow_cache *fc,
 
 static u32 flow_hash_code(struct flow_cache *fc,
 			  struct flow_cache_percpu *fcp,
-			  struct flowi *key)
+			  const struct flowi *key)
 {
-	u32 *k = (u32 *) key;
+	const u32 *k = (const u32 *) key;
 
 	return jhash2(k, (sizeof(*key) / sizeof(u32)), fcp->hash_rnd)
 		& (flow_cache_hash_size(fc) - 1);
@@ -186,17 +186,17 @@ typedef unsigned long flow_compare_t;
  * important assumptions that we can here, such as alignment and
  * constant size.
  */
-static int flow_key_compare(struct flowi *key1, struct flowi *key2)
+static int flow_key_compare(const struct flowi *key1, const struct flowi *key2)
 {
-	flow_compare_t *k1, *k1_lim, *k2;
+	const flow_compare_t *k1, *k1_lim, *k2;
 	const int n_elem = sizeof(struct flowi) / sizeof(flow_compare_t);
 
 	BUILD_BUG_ON(sizeof(struct flowi) % sizeof(flow_compare_t));
 
-	k1 = (flow_compare_t *) key1;
+	k1 = (const flow_compare_t *) key1;
 	k1_lim = k1 + n_elem;
 
-	k2 = (flow_compare_t *) key2;
+	k2 = (const flow_compare_t *) key2;
 
 	do {
 		if (*k1++ != *k2++)
@@ -207,7 +207,7 @@ static int flow_key_compare(struct flowi *key1, struct flowi *key2)
 }
 
 struct flow_cache_object *
-flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir,
+flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir,
 		  flow_resolve_t resolver, void *ctx)
 {
 	struct flow_cache *fc = &flow_cache_global;
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 60a9029..799f06e 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -316,7 +316,7 @@ static struct neigh_hash_table *neigh_hash_alloc(unsigned int entries)
 {
 	size_t size = entries * sizeof(struct neighbour *);
 	struct neigh_hash_table *ret;
-	struct neighbour **buckets;
+	struct neighbour __rcu **buckets;
 
 	ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
 	if (!ret)
@@ -324,14 +324,14 @@ static struct neigh_hash_table *neigh_hash_alloc(unsigned int entries)
 	if (size <= PAGE_SIZE)
 		buckets = kzalloc(size, GFP_ATOMIC);
 	else
-		buckets = (struct neighbour **)
+		buckets = (struct neighbour __rcu **)
 			  __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
 					   get_order(size));
 	if (!buckets) {
 		kfree(ret);
 		return NULL;
 	}
-	rcu_assign_pointer(ret->hash_buckets, buckets);
+	ret->hash_buckets = buckets;
 	ret->hash_mask = entries - 1;
 	get_random_bytes(&ret->hash_rnd, sizeof(ret->hash_rnd));
 	return ret;
@@ -343,7 +343,7 @@ static void neigh_hash_free_rcu(struct rcu_head *head)
 						    struct neigh_hash_table,
 						    rcu);
 	size_t size = (nht->hash_mask + 1) * sizeof(struct neighbour *);
-	struct neighbour **buckets = nht->hash_buckets;
+	struct neighbour __rcu **buckets = nht->hash_buckets;
 
 	if (size <= PAGE_SIZE)
 		kfree(buckets);
@@ -1540,7 +1540,7 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl)
 		panic("cannot create neighbour proc dir entry");
 #endif
 
-	tbl->nht = neigh_hash_alloc(8);
+	RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(8));
 
 	phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
 	tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
@@ -1602,7 +1602,8 @@ int neigh_table_clear(struct neigh_table *tbl)
 	}
 	write_unlock(&neigh_tbl_lock);
 
-	call_rcu(&tbl->nht->rcu, neigh_hash_free_rcu);
+	call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
+		 neigh_hash_free_rcu);
 	tbl->nht = NULL;
 
 	kfree(tbl->phash_buckets);
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index e23c01b..5ceb257 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -99,7 +99,7 @@ NETDEVICE_SHOW(addr_assign_type, fmt_dec);
 NETDEVICE_SHOW(addr_len, fmt_dec);
 NETDEVICE_SHOW(iflink, fmt_dec);
 NETDEVICE_SHOW(ifindex, fmt_dec);
-NETDEVICE_SHOW(features, fmt_long_hex);
+NETDEVICE_SHOW(features, fmt_hex);
 NETDEVICE_SHOW(type, fmt_dec);
 NETDEVICE_SHOW(link_mode, fmt_dec);
 
@@ -295,6 +295,20 @@ static ssize_t show_ifalias(struct device *dev,
 	return ret;
 }
 
+NETDEVICE_SHOW(group, fmt_dec);
+
+static int change_group(struct net_device *net, unsigned long new_group)
+{
+	dev_set_group(net, (int) new_group);
+	return 0;
+}
+
+static ssize_t store_group(struct device *dev, struct device_attribute *attr,
+			 const char *buf, size_t len)
+{
+	return netdev_store(dev, attr, buf, len, change_group);
+}
+
 static struct device_attribute net_class_attributes[] = {
 	__ATTR(addr_assign_type, S_IRUGO, show_addr_assign_type, NULL),
 	__ATTR(addr_len, S_IRUGO, show_addr_len, NULL),
@@ -316,6 +330,7 @@ static struct device_attribute net_class_attributes[] = {
 	__ATTR(flags, S_IRUGO | S_IWUSR, show_flags, store_flags),
 	__ATTR(tx_queue_len, S_IRUGO | S_IWUSR, show_tx_queue_len,
 	       store_tx_queue_len),
+	__ATTR(netdev_group, S_IRUGO | S_IWUSR, show_group, store_group),
 	{}
 };
 
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 02dc2cb..06be243 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -193,6 +193,17 @@ void netpoll_poll_dev(struct net_device *dev)
 
 	poll_napi(dev);
 
+	if (dev->priv_flags & IFF_SLAVE) {
+		if (dev->npinfo) {
+			struct net_device *bond_dev = dev->master;
+			struct sk_buff *skb;
+			while ((skb = skb_dequeue(&dev->npinfo->arp_tx))) {
+				skb->dev = bond_dev;
+				skb_queue_tail(&bond_dev->npinfo->arp_tx, skb);
+			}
+		}
+	}
+
 	service_arp_queue(dev->npinfo);
 
 	zap_completion_queue();
@@ -313,9 +324,7 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
 		     tries > 0; --tries) {
 			if (__netif_tx_trylock(txq)) {
 				if (!netif_tx_queue_stopped(txq)) {
-					dev->priv_flags |= IFF_IN_NETPOLL;
 					status = ops->ndo_start_xmit(skb, dev);
-					dev->priv_flags &= ~IFF_IN_NETPOLL;
 					if (status == NETDEV_TX_OK)
 						txq_trans_update(txq);
 				}
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index b5bada9..f0aec6c 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -251,6 +251,7 @@ struct pktgen_dev {
 	int max_pkt_size;	/* = ETH_ZLEN; */
 	int pkt_overhead;	/* overhead for MPLS, VLANs, IPSEC etc */
 	int nfrags;
+	struct page *page;
 	u64 delay;		/* nano-seconds */
 
 	__u64 count;		/* Default No packets to send */
@@ -1134,6 +1135,10 @@ static ssize_t pktgen_if_write(struct file *file,
 		if (node_possible(value)) {
 			pkt_dev->node = value;
 			sprintf(pg_result, "OK: node=%d", pkt_dev->node);
+			if (pkt_dev->page) {
+				put_page(pkt_dev->page);
+				pkt_dev->page = NULL;
+			}
 		}
 		else
 			sprintf(pg_result, "ERROR: node not possible");
@@ -2605,6 +2610,90 @@ static inline __be16 build_tci(unsigned int id, unsigned int cfi,
 	return htons(id | (cfi << 12) | (prio << 13));
 }
 
+static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb,
+				int datalen)
+{
+	struct timeval timestamp;
+	struct pktgen_hdr *pgh;
+
+	pgh = (struct pktgen_hdr *)skb_put(skb, sizeof(*pgh));
+	datalen -= sizeof(*pgh);
+
+	if (pkt_dev->nfrags <= 0) {
+		pgh = (struct pktgen_hdr *)skb_put(skb, datalen);
+		memset(pgh + 1, 0, datalen);
+	} else {
+		int frags = pkt_dev->nfrags;
+		int i, len;
+
+
+		if (frags > MAX_SKB_FRAGS)
+			frags = MAX_SKB_FRAGS;
+		len = datalen - frags * PAGE_SIZE;
+		if (len > 0) {
+			memset(skb_put(skb, len), 0, len);
+			datalen = frags * PAGE_SIZE;
+		}
+
+		i = 0;
+		while (datalen > 0) {
+			if (unlikely(!pkt_dev->page)) {
+				int node = numa_node_id();
+
+				if (pkt_dev->node >= 0 && (pkt_dev->flags & F_NODE))
+					node = pkt_dev->node;
+				pkt_dev->page = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0);
+				if (!pkt_dev->page)
+					break;
+			}
+			skb_shinfo(skb)->frags[i].page = pkt_dev->page;
+			get_page(pkt_dev->page);
+			skb_shinfo(skb)->frags[i].page_offset = 0;
+			skb_shinfo(skb)->frags[i].size =
+			    (datalen < PAGE_SIZE ? datalen : PAGE_SIZE);
+			datalen -= skb_shinfo(skb)->frags[i].size;
+			skb->len += skb_shinfo(skb)->frags[i].size;
+			skb->data_len += skb_shinfo(skb)->frags[i].size;
+			i++;
+			skb_shinfo(skb)->nr_frags = i;
+		}
+
+		while (i < frags) {
+			int rem;
+
+			if (i == 0)
+				break;
+
+			rem = skb_shinfo(skb)->frags[i - 1].size / 2;
+			if (rem == 0)
+				break;
+
+			skb_shinfo(skb)->frags[i - 1].size -= rem;
+
+			skb_shinfo(skb)->frags[i] =
+			    skb_shinfo(skb)->frags[i - 1];
+			get_page(skb_shinfo(skb)->frags[i].page);
+			skb_shinfo(skb)->frags[i].page =
+			    skb_shinfo(skb)->frags[i - 1].page;
+			skb_shinfo(skb)->frags[i].page_offset +=
+			    skb_shinfo(skb)->frags[i - 1].size;
+			skb_shinfo(skb)->frags[i].size = rem;
+			i++;
+			skb_shinfo(skb)->nr_frags = i;
+		}
+	}
+
+	/* Stamp the time, and sequence number,
+	 * convert them to network byte order
+	 */
+	pgh->pgh_magic = htonl(PKTGEN_MAGIC);
+	pgh->seq_num = htonl(pkt_dev->seq_num);
+
+	do_gettimeofday(&timestamp);
+	pgh->tv_sec = htonl(timestamp.tv_sec);
+	pgh->tv_usec = htonl(timestamp.tv_usec);
+}
+
 static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
 					struct pktgen_dev *pkt_dev)
 {
@@ -2613,7 +2702,6 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
 	struct udphdr *udph;
 	int datalen, iplen;
 	struct iphdr *iph;
-	struct pktgen_hdr *pgh = NULL;
 	__be16 protocol = htons(ETH_P_IP);
 	__be32 *mpls;
 	__be16 *vlan_tci = NULL;                 /* Encapsulates priority and VLAN ID */
@@ -2729,76 +2817,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
 			   pkt_dev->pkt_overhead);
 	skb->dev = odev;
 	skb->pkt_type = PACKET_HOST;
-
-	if (pkt_dev->nfrags <= 0) {
-		pgh = (struct pktgen_hdr *)skb_put(skb, datalen);
-		memset(pgh + 1, 0, datalen - sizeof(struct pktgen_hdr));
-	} else {
-		int frags = pkt_dev->nfrags;
-		int i, len;
-
-		pgh = (struct pktgen_hdr *)(((char *)(udph)) + 8);
-
-		if (frags > MAX_SKB_FRAGS)
-			frags = MAX_SKB_FRAGS;
-		if (datalen > frags * PAGE_SIZE) {
-			len = datalen - frags * PAGE_SIZE;
-			memset(skb_put(skb, len), 0, len);
-			datalen = frags * PAGE_SIZE;
-		}
-
-		i = 0;
-		while (datalen > 0) {
-			struct page *page = alloc_pages(GFP_KERNEL | __GFP_ZERO, 0);
-			skb_shinfo(skb)->frags[i].page = page;
-			skb_shinfo(skb)->frags[i].page_offset = 0;
-			skb_shinfo(skb)->frags[i].size =
-			    (datalen < PAGE_SIZE ? datalen : PAGE_SIZE);
-			datalen -= skb_shinfo(skb)->frags[i].size;
-			skb->len += skb_shinfo(skb)->frags[i].size;
-			skb->data_len += skb_shinfo(skb)->frags[i].size;
-			i++;
-			skb_shinfo(skb)->nr_frags = i;
-		}
-
-		while (i < frags) {
-			int rem;
-
-			if (i == 0)
-				break;
-
-			rem = skb_shinfo(skb)->frags[i - 1].size / 2;
-			if (rem == 0)
-				break;
-
-			skb_shinfo(skb)->frags[i - 1].size -= rem;
-
-			skb_shinfo(skb)->frags[i] =
-			    skb_shinfo(skb)->frags[i - 1];
-			get_page(skb_shinfo(skb)->frags[i].page);
-			skb_shinfo(skb)->frags[i].page =
-			    skb_shinfo(skb)->frags[i - 1].page;
-			skb_shinfo(skb)->frags[i].page_offset +=
-			    skb_shinfo(skb)->frags[i - 1].size;
-			skb_shinfo(skb)->frags[i].size = rem;
-			i++;
-			skb_shinfo(skb)->nr_frags = i;
-		}
-	}
-
-	/* Stamp the time, and sequence number,
-	 * convert them to network byte order
-	 */
-	if (pgh) {
-		struct timeval timestamp;
-
-		pgh->pgh_magic = htonl(PKTGEN_MAGIC);
-		pgh->seq_num = htonl(pkt_dev->seq_num);
-
-		do_gettimeofday(&timestamp);
-		pgh->tv_sec = htonl(timestamp.tv_sec);
-		pgh->tv_usec = htonl(timestamp.tv_usec);
-	}
+	pktgen_finalize_skb(pkt_dev, skb, datalen);
 
 #ifdef CONFIG_XFRM
 	if (!process_ipsec(pkt_dev, skb, protocol))
@@ -2980,7 +2999,6 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
 	struct udphdr *udph;
 	int datalen;
 	struct ipv6hdr *iph;
-	struct pktgen_hdr *pgh = NULL;
 	__be16 protocol = htons(ETH_P_IPV6);
 	__be32 *mpls;
 	__be16 *vlan_tci = NULL;                 /* Encapsulates priority and VLAN ID */
@@ -3083,75 +3101,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
 	skb->dev = odev;
 	skb->pkt_type = PACKET_HOST;
 
-	if (pkt_dev->nfrags <= 0)
-		pgh = (struct pktgen_hdr *)skb_put(skb, datalen);
-	else {
-		int frags = pkt_dev->nfrags;
-		int i;
-
-		pgh = (struct pktgen_hdr *)(((char *)(udph)) + 8);
-
-		if (frags > MAX_SKB_FRAGS)
-			frags = MAX_SKB_FRAGS;
-		if (datalen > frags * PAGE_SIZE) {
-			skb_put(skb, datalen - frags * PAGE_SIZE);
-			datalen = frags * PAGE_SIZE;
-		}
-
-		i = 0;
-		while (datalen > 0) {
-			struct page *page = alloc_pages(GFP_KERNEL, 0);
-			skb_shinfo(skb)->frags[i].page = page;
-			skb_shinfo(skb)->frags[i].page_offset = 0;
-			skb_shinfo(skb)->frags[i].size =
-			    (datalen < PAGE_SIZE ? datalen : PAGE_SIZE);
-			datalen -= skb_shinfo(skb)->frags[i].size;
-			skb->len += skb_shinfo(skb)->frags[i].size;
-			skb->data_len += skb_shinfo(skb)->frags[i].size;
-			i++;
-			skb_shinfo(skb)->nr_frags = i;
-		}
-
-		while (i < frags) {
-			int rem;
-
-			if (i == 0)
-				break;
-
-			rem = skb_shinfo(skb)->frags[i - 1].size / 2;
-			if (rem == 0)
-				break;
-
-			skb_shinfo(skb)->frags[i - 1].size -= rem;
-
-			skb_shinfo(skb)->frags[i] =
-			    skb_shinfo(skb)->frags[i - 1];
-			get_page(skb_shinfo(skb)->frags[i].page);
-			skb_shinfo(skb)->frags[i].page =
-			    skb_shinfo(skb)->frags[i - 1].page;
-			skb_shinfo(skb)->frags[i].page_offset +=
-			    skb_shinfo(skb)->frags[i - 1].size;
-			skb_shinfo(skb)->frags[i].size = rem;
-			i++;
-			skb_shinfo(skb)->nr_frags = i;
-		}
-	}
-
-	/* Stamp the time, and sequence number,
-	 * convert them to network byte order
-	 * should we update cloned packets too ?
-	 */
-	if (pgh) {
-		struct timeval timestamp;
-
-		pgh->pgh_magic = htonl(PKTGEN_MAGIC);
-		pgh->seq_num = htonl(pkt_dev->seq_num);
-
-		do_gettimeofday(&timestamp);
-		pgh->tv_sec = htonl(timestamp.tv_sec);
-		pgh->tv_usec = htonl(timestamp.tv_usec);
-	}
-	/* pkt_dev->seq_num++; FF: you really mean this? */
+	pktgen_finalize_skb(pkt_dev, skb, datalen);
 
 	return skb;
 }
@@ -3884,6 +3834,8 @@ static int pktgen_remove_device(struct pktgen_thread *t,
 	free_SAs(pkt_dev);
 #endif
 	vfree(pkt_dev->flows);
+	if (pkt_dev->page)
+		put_page(pkt_dev->page);
 	kfree(pkt_dev);
 	return 0;
 }
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 2d65c6b..49f7ea5 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -868,6 +868,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 		   netif_running(dev) ? dev->operstate : IF_OPER_DOWN);
 	NLA_PUT_U8(skb, IFLA_LINKMODE, dev->link_mode);
 	NLA_PUT_U32(skb, IFLA_MTU, dev->mtu);
+	NLA_PUT_U32(skb, IFLA_GROUP, dev->group);
 
 	if (dev->ifindex != dev->iflink)
 		NLA_PUT_U32(skb, IFLA_LINK, dev->iflink);
@@ -1035,6 +1036,7 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = {
 	[IFLA_MAP]		= { .len = sizeof(struct rtnl_link_ifmap) },
 	[IFLA_MTU]		= { .type = NLA_U32 },
 	[IFLA_LINK]		= { .type = NLA_U32 },
+	[IFLA_MASTER]		= { .type = NLA_U32 },
 	[IFLA_TXQLEN]		= { .type = NLA_U32 },
 	[IFLA_WEIGHT]		= { .type = NLA_U32 },
 	[IFLA_OPERSTATE]	= { .type = NLA_U8 },
@@ -1177,6 +1179,41 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr *attr)
 	return err;
 }
 
+static int do_set_master(struct net_device *dev, int ifindex)
+{
+	struct net_device *master_dev;
+	const struct net_device_ops *ops;
+	int err;
+
+	if (dev->master) {
+		if (dev->master->ifindex == ifindex)
+			return 0;
+		ops = dev->master->netdev_ops;
+		if (ops->ndo_del_slave) {
+			err = ops->ndo_del_slave(dev->master, dev);
+			if (err)
+				return err;
+		} else {
+			return -EOPNOTSUPP;
+		}
+	}
+
+	if (ifindex) {
+		master_dev = __dev_get_by_index(dev_net(dev), ifindex);
+		if (!master_dev)
+			return -EINVAL;
+		ops = master_dev->netdev_ops;
+		if (ops->ndo_add_slave) {
+			err = ops->ndo_add_slave(master_dev, dev);
+			if (err)
+				return err;
+		} else {
+			return -EOPNOTSUPP;
+		}
+	}
+	return 0;
+}
+
 static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
 		      struct nlattr **tb, char *ifname, int modified)
 {
@@ -1264,6 +1301,11 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
 		modified = 1;
 	}
 
+	if (tb[IFLA_GROUP]) {
+		dev_set_group(dev, nla_get_u32(tb[IFLA_GROUP]));
+		modified = 1;
+	}
+
 	/*
 	 * Interface selected by interface index but interface
 	 * name provided implies that a name change has been
@@ -1295,6 +1337,13 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
 			goto errout;
 	}
 
+	if (tb[IFLA_MASTER]) {
+		err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER]));
+		if (err)
+			goto errout;
+		modified = 1;
+	}
+
 	if (tb[IFLA_TXQLEN])
 		dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]);
 
@@ -1541,6 +1590,8 @@ struct net_device *rtnl_create_link(struct net *src_net, struct net *net,
 		set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE]));
 	if (tb[IFLA_LINKMODE])
 		dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]);
+	if (tb[IFLA_GROUP])
+		dev_set_group(dev, nla_get_u32(tb[IFLA_GROUP]));
 
 	return dev;
 
@@ -1551,6 +1602,24 @@ err:
 }
 EXPORT_SYMBOL(rtnl_create_link);
 
+static int rtnl_group_changelink(struct net *net, int group,
+		struct ifinfomsg *ifm,
+		struct nlattr **tb)
+{
+	struct net_device *dev;
+	int err;
+
+	for_each_netdev(net, dev) {
+		if (dev->group == group) {
+			err = do_setlink(dev, ifm, tb, NULL, 0);
+			if (err < 0)
+				return err;
+		}
+	}
+
+	return 0;
+}
+
 static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 {
 	struct net *net = sock_net(skb->sk);
@@ -1578,10 +1647,12 @@ replay:
 	ifm = nlmsg_data(nlh);
 	if (ifm->ifi_index > 0)
 		dev = __dev_get_by_index(net, ifm->ifi_index);
-	else if (ifname[0])
-		dev = __dev_get_by_name(net, ifname);
-	else
-		dev = NULL;
+	else {
+		if (ifname[0])
+			dev = __dev_get_by_name(net, ifname);
+		else
+			dev = NULL;
+	}
 
 	err = validate_linkmsg(dev, tb);
 	if (err < 0)
@@ -1645,8 +1716,13 @@ replay:
 			return do_setlink(dev, ifm, tb, ifname, modified);
 		}
 
-		if (!(nlh->nlmsg_flags & NLM_F_CREATE))
+		if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
+			if (ifm->ifi_index == 0 && tb[IFLA_GROUP])
+				return rtnl_group_changelink(net,
+						nla_get_u32(tb[IFLA_GROUP]),
+						ifm, tb);
 			return -ENODEV;
+		}
 
 		if (ifm->ifi_index)
 			return -EOPNOTSUPP;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index d883dcc..1eb526a8 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2434,8 +2434,6 @@ int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
 			return -ENOMEM;
 
 		/* initialize the next frag */
-		sk->sk_sndmsg_page = page;
-		sk->sk_sndmsg_off = 0;
 		skb_fill_page_desc(skb, frg_cnt, page, 0, 0);
 		skb->truesize += PAGE_SIZE;
 		atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc);
@@ -2455,7 +2453,6 @@ int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
 			return -EFAULT;
 
 		/* copy was successful so update the size parameters */
-		sk->sk_sndmsg_off += copy;
 		frag->size += copy;
 		skb->len += copy;
 		skb->data_len += copy;
@@ -2498,7 +2495,7 @@ EXPORT_SYMBOL_GPL(skb_pull_rcsum);
  *	a pointer to the first in a list of new skbs for the segments.
  *	In case of error it returns ERR_PTR(err).
  */
-struct sk_buff *skb_segment(struct sk_buff *skb, int features)
+struct sk_buff *skb_segment(struct sk_buff *skb, u32 features)
 {
 	struct sk_buff *segs = NULL;
 	struct sk_buff *tail = NULL;
@@ -2508,7 +2505,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features)
 	unsigned int offset = doffset;
 	unsigned int headroom;
 	unsigned int len;
-	int sg = features & NETIF_F_SG;
+	int sg = !!(features & NETIF_F_SG);
 	int nfrags = skb_shinfo(skb)->nr_frags;
 	int err = -ENOMEM;
 	int i = 0;
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index c44348a..118392f 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -1224,6 +1224,59 @@ err:
 	return err;
 }
 
+static int dcbnl_build_peer_app(struct net_device *netdev, struct sk_buff* skb,
+				int app_nested_type, int app_info_type,
+				int app_entry_type)
+{
+	struct dcb_peer_app_info info;
+	struct dcb_app *table = NULL;
+	const struct dcbnl_rtnl_ops *ops = netdev->dcbnl_ops;
+	u16 app_count;
+	int err;
+
+
+	/**
+	 * retrieve the peer app configuration form the driver. If the driver
+	 * handlers fail exit without doing anything
+	 */
+	err = ops->peer_getappinfo(netdev, &info, &app_count);
+	if (!err && app_count) {
+		table = kmalloc(sizeof(struct dcb_app) * app_count, GFP_KERNEL);
+		if (!table)
+			return -ENOMEM;
+
+		err = ops->peer_getapptable(netdev, table);
+	}
+
+	if (!err) {
+		u16 i;
+		struct nlattr *app;
+
+		/**
+		 * build the message, from here on the only possible failure
+		 * is due to the skb size
+		 */
+		err = -EMSGSIZE;
+
+		app = nla_nest_start(skb, app_nested_type);
+		if (!app)
+			goto nla_put_failure;
+
+		if (app_info_type)
+			NLA_PUT(skb, app_info_type, sizeof(info), &info);
+
+		for (i = 0; i < app_count; i++)
+			NLA_PUT(skb, app_entry_type, sizeof(struct dcb_app),
+				&table[i]);
+
+		nla_nest_end(skb, app);
+	}
+	err = 0;
+
+nla_put_failure:
+	kfree(table);
+	return err;
+}
 
 /* Handle IEEE 802.1Qaz GET commands. */
 static int dcbnl_ieee_get(struct net_device *netdev, struct nlattr **tb,
@@ -1288,6 +1341,30 @@ static int dcbnl_ieee_get(struct net_device *netdev, struct nlattr **tb,
 	spin_unlock(&dcb_lock);
 	nla_nest_end(skb, app);
 
+	/* get peer info if available */
+	if (ops->ieee_peer_getets) {
+		struct ieee_ets ets;
+		err = ops->ieee_peer_getets(netdev, &ets);
+		if (!err)
+			NLA_PUT(skb, DCB_ATTR_IEEE_PEER_ETS, sizeof(ets), &ets);
+	}
+
+	if (ops->ieee_peer_getpfc) {
+		struct ieee_pfc pfc;
+		err = ops->ieee_peer_getpfc(netdev, &pfc);
+		if (!err)
+			NLA_PUT(skb, DCB_ATTR_IEEE_PEER_PFC, sizeof(pfc), &pfc);
+	}
+
+	if (ops->peer_getappinfo && ops->peer_getapptable) {
+		err = dcbnl_build_peer_app(netdev, skb,
+					   DCB_ATTR_IEEE_PEER_APP,
+					   DCB_ATTR_IEEE_APP_UNSPEC,
+					   DCB_ATTR_IEEE_APP);
+		if (err)
+			goto nla_put_failure;
+	}
+
 	nla_nest_end(skb, ieee);
 	nlmsg_end(skb, nlh);
 
@@ -1441,6 +1518,71 @@ err:
 	return ret;
 }
 
+/* Handle CEE DCBX GET commands. */
+static int dcbnl_cee_get(struct net_device *netdev, struct nlattr **tb,
+			 u32 pid, u32 seq, u16 flags)
+{
+	struct sk_buff *skb;
+	struct nlmsghdr *nlh;
+	struct dcbmsg *dcb;
+	struct nlattr *cee;
+	const struct dcbnl_rtnl_ops *ops = netdev->dcbnl_ops;
+	int err;
+
+	if (!ops)
+		return -EOPNOTSUPP;
+
+	skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!skb)
+		return -ENOBUFS;
+
+	nlh = NLMSG_NEW(skb, pid, seq, RTM_GETDCB, sizeof(*dcb), flags);
+
+	dcb = NLMSG_DATA(nlh);
+	dcb->dcb_family = AF_UNSPEC;
+	dcb->cmd = DCB_CMD_CEE_GET;
+
+	NLA_PUT_STRING(skb, DCB_ATTR_IFNAME, netdev->name);
+
+	cee = nla_nest_start(skb, DCB_ATTR_CEE);
+	if (!cee)
+		goto nla_put_failure;
+
+	/* get peer info if available */
+	if (ops->cee_peer_getpg) {
+		struct cee_pg pg;
+		err = ops->cee_peer_getpg(netdev, &pg);
+		if (!err)
+			NLA_PUT(skb, DCB_ATTR_CEE_PEER_PG, sizeof(pg), &pg);
+	}
+
+	if (ops->cee_peer_getpfc) {
+		struct cee_pfc pfc;
+		err = ops->cee_peer_getpfc(netdev, &pfc);
+		if (!err)
+			NLA_PUT(skb, DCB_ATTR_CEE_PEER_PFC, sizeof(pfc), &pfc);
+	}
+
+	if (ops->peer_getappinfo && ops->peer_getapptable) {
+		err = dcbnl_build_peer_app(netdev, skb,
+					   DCB_ATTR_CEE_PEER_APP_TABLE,
+					   DCB_ATTR_CEE_PEER_APP_INFO,
+					   DCB_ATTR_CEE_PEER_APP);
+		if (err)
+			goto nla_put_failure;
+	}
+
+	nla_nest_end(skb, cee);
+	nlmsg_end(skb, nlh);
+
+	return rtnl_unicast(skb, &init_net, pid);
+nla_put_failure:
+	nlmsg_cancel(skb, nlh);
+nlmsg_failure:
+	kfree_skb(skb);
+	return -1;
+}
+
 static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 {
 	struct net *net = sock_net(skb->sk);
@@ -1570,6 +1712,10 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 		ret = dcbnl_setfeatcfg(netdev, tb, pid, nlh->nlmsg_seq,
 				       nlh->nlmsg_flags);
 		goto out;
+	case DCB_CMD_CEE_GET:
+		ret = dcbnl_cee_get(netdev, tb, pid, nlh->nlmsg_seq,
+				    nlh->nlmsg_flags);
+		goto out;
 	default:
 		goto errout;
 	}
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index e96d5e8..fadecd2 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -583,6 +583,15 @@ done:
 	dccp_ackvec_parsed_cleanup(&hc->tx_av_chunks);
 }
 
+/*
+ * Convert RFC 3390 larger initial window into an equivalent number of packets.
+ * This is based on the numbers specified in RFC 5681, 3.1.
+ */
+static inline u32 rfc3390_bytes_to_packets(const u32 smss)
+{
+	return smss <= 1095 ? 4 : (smss > 2190 ? 2 : 3);
+}
+
 static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
 {
 	struct ccid2_hc_tx_sock *hc = ccid_priv(ccid);
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 45a434f..7882377 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -43,9 +43,9 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	struct inet_sock *inet = inet_sk(sk);
 	struct dccp_sock *dp = dccp_sk(sk);
 	const struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
+	__be16 orig_sport, orig_dport;
 	struct rtable *rt;
 	__be32 daddr, nexthop;
-	int tmp;
 	int err;
 
 	dp->dccps_role = DCCP_ROLE_CLIENT;
@@ -63,12 +63,14 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 		nexthop = inet->opt->faddr;
 	}
 
-	tmp = ip_route_connect(&rt, nexthop, inet->inet_saddr,
-			       RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
-			       IPPROTO_DCCP,
-			       inet->inet_sport, usin->sin_port, sk, 1);
-	if (tmp < 0)
-		return tmp;
+	orig_sport = inet->inet_sport;
+	orig_dport = usin->sin_port;
+	rt = ip_route_connect(nexthop, inet->inet_saddr,
+			      RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
+			      IPPROTO_DCCP,
+			      orig_sport, orig_dport, sk, true);
+	if (IS_ERR(rt))
+		return PTR_ERR(rt);
 
 	if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
 		ip_rt_put(rt);
@@ -99,11 +101,13 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	if (err != 0)
 		goto failure;
 
-	err = ip_route_newports(&rt, IPPROTO_DCCP, inet->inet_sport,
-				inet->inet_dport, sk);
-	if (err != 0)
+	rt = ip_route_newports(rt, IPPROTO_DCCP,
+			       orig_sport, orig_dport,
+			       inet->inet_sport, inet->inet_dport, sk);
+	if (IS_ERR(rt)) {
+		rt = NULL;
 		goto failure;
-
+	}
 	/* OK, now commit destination to socket.  */
 	sk_setup_caps(sk, &rt->dst);
 
@@ -471,7 +475,8 @@ static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk,
 			  };
 
 	security_skb_classify_flow(skb, &fl);
-	if (ip_route_output_flow(net, &rt, &fl, sk, 0)) {
+	rt = ip_route_output_flow(net, &fl, sk);
+	if (IS_ERR(rt)) {
 		IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES);
 		return NULL;
 	}
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index dca711d..5efc57f 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -162,15 +162,9 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 			fl.fl_ip_sport = inet->inet_sport;
 			security_sk_classify_flow(sk, &fl);
 
-			err = ip6_dst_lookup(sk, &dst, &fl);
-			if (err) {
-				sk->sk_err_soft = -err;
-				goto out;
-			}
-
-			err = xfrm_lookup(net, &dst, &fl, sk, 0);
-			if (err < 0) {
-				sk->sk_err_soft = -err;
+			dst = ip6_dst_lookup_flow(sk, &fl, NULL, false);
+			if (IS_ERR(dst)) {
+				sk->sk_err_soft = -PTR_ERR(dst);
 				goto out;
 			}
 		} else
@@ -267,16 +261,12 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req,
 
 	final_p = fl6_update_dst(&fl, opt, &final);
 
-	err = ip6_dst_lookup(sk, &dst, &fl);
-	if (err)
-		goto done;
-
-	if (final_p)
-		ipv6_addr_copy(&fl.fl6_dst, final_p);
-
-	err = xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0);
-	if (err < 0)
+	dst = ip6_dst_lookup_flow(sk, &fl, final_p, false);
+	if (IS_ERR(dst)) {
+		err = PTR_ERR(dst);
+		dst = NULL;
 		goto done;
+	}
 
 	skb = dccp_make_response(sk, dst, req);
 	if (skb != NULL) {
@@ -338,14 +328,13 @@ static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb)
 	security_skb_classify_flow(rxskb, &fl);
 
 	/* sk = NULL, but it is safe for now. RST socket required. */
-	if (!ip6_dst_lookup(ctl_sk, &dst, &fl)) {
-		if (xfrm_lookup(net, &dst, &fl, NULL, 0) >= 0) {
-			skb_dst_set(skb, dst);
-			ip6_xmit(ctl_sk, skb, &fl, NULL);
-			DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS);
-			DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS);
-			return;
-		}
+	dst = ip6_dst_lookup_flow(ctl_sk, &fl, NULL, false);
+	if (!IS_ERR(dst)) {
+		skb_dst_set(skb, dst);
+		ip6_xmit(ctl_sk, skb, &fl, NULL);
+		DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS);
+		DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS);
+		return;
 	}
 
 	kfree_skb(skb);
@@ -484,7 +473,6 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
 	struct inet6_request_sock *ireq6 = inet6_rsk(req);
 	struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
 	struct inet_sock *newinet;
-	struct dccp_sock *newdp;
 	struct dccp6_sock *newdp6;
 	struct sock *newsk;
 	struct ipv6_txoptions *opt;
@@ -498,7 +486,6 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
 			return NULL;
 
 		newdp6 = (struct dccp6_sock *)newsk;
-		newdp = dccp_sk(newsk);
 		newinet = inet_sk(newsk);
 		newinet->pinet6 = &newdp6->inet6;
 		newnp = inet6_sk(newsk);
@@ -552,13 +539,8 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
 		fl.fl_ip_sport = inet_rsk(req)->loc_port;
 		security_sk_classify_flow(sk, &fl);
 
-		if (ip6_dst_lookup(sk, &dst, &fl))
-			goto out;
-
-		if (final_p)
-			ipv6_addr_copy(&fl.fl6_dst, final_p);
-
-		if ((xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0)
+		dst = ip6_dst_lookup_flow(sk, &fl, final_p, false);
+		if (IS_ERR(dst))
 			goto out;
 	}
 
@@ -578,7 +560,6 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
 	newdp6 = (struct dccp6_sock *)newsk;
 	newinet = inet_sk(newsk);
 	newinet->pinet6 = &newdp6->inet6;
-	newdp = dccp_sk(newsk);
 	newnp = inet6_sk(newsk);
 
 	memcpy(newnp, np, sizeof(struct ipv6_pinfo));
@@ -982,19 +963,10 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 
 	final_p = fl6_update_dst(&fl, np->opt, &final);
 
-	err = ip6_dst_lookup(sk, &dst, &fl);
-	if (err)
+	dst = ip6_dst_lookup_flow(sk, &fl, final_p, true);
+	if (IS_ERR(dst)) {
+		err = PTR_ERR(dst);
 		goto failure;
-
-	if (final_p)
-		ipv6_addr_copy(&fl.fl6_dst, final_p);
-
-	err = __xfrm_lookup(sock_net(sk), &dst, &fl, sk, XFRM_LOOKUP_WAIT);
-	if (err < 0) {
-		if (err == -EREMOTE)
-			err = ip6_dst_blackhole(sk, &dst, &fl);
-		if (err < 0)
-			goto failure;
 	}
 
 	if (saddr == NULL) {
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 5e63636..484fdbf 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -112,6 +112,7 @@ static int dn_dst_gc(struct dst_ops *ops);
 static struct dst_entry *dn_dst_check(struct dst_entry *, __u32);
 static unsigned int dn_dst_default_advmss(const struct dst_entry *dst);
 static unsigned int dn_dst_default_mtu(const struct dst_entry *dst);
+static void dn_dst_destroy(struct dst_entry *);
 static struct dst_entry *dn_dst_negative_advice(struct dst_entry *);
 static void dn_dst_link_failure(struct sk_buff *);
 static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu);
@@ -133,11 +134,18 @@ static struct dst_ops dn_dst_ops = {
 	.check =		dn_dst_check,
 	.default_advmss =	dn_dst_default_advmss,
 	.default_mtu =		dn_dst_default_mtu,
+	.cow_metrics =		dst_cow_metrics_generic,
+	.destroy =		dn_dst_destroy,
 	.negative_advice =	dn_dst_negative_advice,
 	.link_failure =		dn_dst_link_failure,
 	.update_pmtu =		dn_dst_update_pmtu,
 };
 
+static void dn_dst_destroy(struct dst_entry *dst)
+{
+	dst_destroy_metrics_generic(dst);
+}
+
 static __inline__ unsigned dn_hash(__le16 src, __le16 dst)
 {
 	__u16 tmp = (__u16 __force)(src ^ dst);
@@ -814,14 +822,14 @@ static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res)
 {
 	struct dn_fib_info *fi = res->fi;
 	struct net_device *dev = rt->dst.dev;
+	unsigned int mss_metric;
 	struct neighbour *n;
-	unsigned int metric;
 
 	if (fi) {
 		if (DN_FIB_RES_GW(*res) &&
 		    DN_FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
 			rt->rt_gateway = DN_FIB_RES_GW(*res);
-		dst_import_metrics(&rt->dst, fi->fib_metrics);
+		dst_init_metrics(&rt->dst, fi->fib_metrics, true);
 	}
 	rt->rt_type = res->type;
 
@@ -834,10 +842,10 @@ static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res)
 
 	if (dst_metric(&rt->dst, RTAX_MTU) > rt->dst.dev->mtu)
 		dst_metric_set(&rt->dst, RTAX_MTU, rt->dst.dev->mtu);
-	metric = dst_metric_raw(&rt->dst, RTAX_ADVMSS);
-	if (metric) {
+	mss_metric = dst_metric_raw(&rt->dst, RTAX_ADVMSS);
+	if (mss_metric) {
 		unsigned int mss = dn_mss_from_pmtu(dev, dst_mtu(&rt->dst));
-		if (metric > mss)
+		if (mss_metric > mss)
 			dst_metric_set(&rt->dst, RTAX_ADVMSS, mss);
 	}
 	return 0;
@@ -1114,7 +1122,7 @@ make_route:
 	if (dev_out->flags & IFF_LOOPBACK)
 		flags |= RTCF_LOCAL;
 
-	rt = dst_alloc(&dn_dst_ops);
+	rt = dst_alloc(&dn_dst_ops, 0);
 	if (rt == NULL)
 		goto e_nobufs;
 
@@ -1214,7 +1222,11 @@ static int dn_route_output_key(struct dst_entry **pprt, struct flowi *flp, int f
 
 	err = __dn_route_output_key(pprt, flp, flags);
 	if (err == 0 && flp->proto) {
-		err = xfrm_lookup(&init_net, pprt, flp, NULL, 0);
+		*pprt = xfrm_lookup(&init_net, *pprt, flp, NULL, 0);
+		if (IS_ERR(*pprt)) {
+			err = PTR_ERR(*pprt);
+			*pprt = NULL;
+		}
 	}
 	return err;
 }
@@ -1225,8 +1237,13 @@ int dn_route_output_sock(struct dst_entry **pprt, struct flowi *fl, struct sock
 
 	err = __dn_route_output_key(pprt, fl, flags & MSG_TRYHARD);
 	if (err == 0 && fl->proto) {
-		err = xfrm_lookup(&init_net, pprt, fl, sk,
-				 (flags & MSG_DONTWAIT) ? 0 : XFRM_LOOKUP_WAIT);
+		if (!(flags & MSG_DONTWAIT))
+			fl->flags |= FLOWI_FLAG_CAN_SLEEP;
+		*pprt = xfrm_lookup(&init_net, *pprt, fl, sk, 0);
+		if (IS_ERR(*pprt)) {
+			err = PTR_ERR(*pprt);
+			*pprt = NULL;
+		}
 	}
 	return err;
 }
@@ -1375,7 +1392,7 @@ static int dn_route_input_slow(struct sk_buff *skb)
 	}
 
 make_route:
-	rt = dst_alloc(&dn_dst_ops);
+	rt = dst_alloc(&dn_dst_ops, 0);
 	if (rt == NULL)
 		goto e_nobufs;
 
diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c
index f2abd37..b66600b 100644
--- a/net/decnet/dn_table.c
+++ b/net/decnet/dn_table.c
@@ -59,7 +59,6 @@ struct dn_hash
 };
 
 #define dz_key_0(key)		((key).datum = 0)
-#define dz_prefix(key,dz)	((key).datum)
 
 #define for_nexthops(fi) { int nhsel; const struct dn_fib_nh *nh;\
 	for(nhsel = 0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
diff --git a/net/dsa/mv88e6060.c b/net/dsa/mv88e6060.c
index 83277f4..8f4ff5a 100644
--- a/net/dsa/mv88e6060.c
+++ b/net/dsa/mv88e6060.c
@@ -18,7 +18,7 @@
 
 static int reg_read(struct dsa_switch *ds, int addr, int reg)
 {
-	return mdiobus_read(ds->master_mii_bus, addr, reg);
+	return mdiobus_read(ds->master_mii_bus, ds->pd->sw_addr + addr, reg);
 }
 
 #define REG_READ(addr, reg)					\
@@ -34,7 +34,8 @@ static int reg_read(struct dsa_switch *ds, int addr, int reg)
 
 static int reg_write(struct dsa_switch *ds, int addr, int reg, u16 val)
 {
-	return mdiobus_write(ds->master_mii_bus, addr, reg, val);
+	return mdiobus_write(ds->master_mii_bus, ds->pd->sw_addr + addr,
+			     reg, val);
 }
 
 #define REG_WRITE(addr, reg, val)				\
@@ -50,7 +51,7 @@ static char *mv88e6060_probe(struct mii_bus *bus, int sw_addr)
 {
 	int ret;
 
-	ret = mdiobus_read(bus, REG_PORT(0), 0x03);
+	ret = mdiobus_read(bus, sw_addr + REG_PORT(0), 0x03);
 	if (ret >= 0) {
 		ret &= 0xfff0;
 		if (ret == 0x0600)
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index a5a1050..cbb505b 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -55,45 +55,9 @@ config IP_ADVANCED_ROUTER
 
 	  If unsure, say N here.
 
-choice
-	prompt "Choose IP: FIB lookup algorithm (choose FIB_HASH if unsure)"
-	depends on IP_ADVANCED_ROUTER
-	default ASK_IP_FIB_HASH
-
-config ASK_IP_FIB_HASH
-	bool "FIB_HASH"
-	---help---
-	  Current FIB is very proven and good enough for most users.
-
-config IP_FIB_TRIE
-	bool "FIB_TRIE"
-	---help---
-	  Use new experimental LC-trie as FIB lookup algorithm.
-	  This improves lookup performance if you have a large
-	  number of routes.
-
-	  LC-trie is a longest matching prefix lookup algorithm which
-	  performs better than FIB_HASH for large routing tables.
-	  But, it consumes more memory and is more complex.
-
-	  LC-trie is described in:
-
-	  IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson
-	  IEEE Journal on Selected Areas in Communications, 17(6):1083-1092,
-	  June 1999
-
-	  An experimental study of compression methods for dynamic tries
-	  Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002.
-	  <http://www.csc.kth.se/~snilsson/software/dyntrie2/>
-
-endchoice
-
-config IP_FIB_HASH
-	def_bool ASK_IP_FIB_HASH || !IP_ADVANCED_ROUTER
-
 config IP_FIB_TRIE_STATS
 	bool "FIB TRIE statistics"
-	depends on IP_FIB_TRIE
+	depends on IP_ADVANCED_ROUTER
 	---help---
 	  Keep track of statistics on structure of FIB TRIE table.
 	  Useful for testing and measuring TRIE performance.
@@ -140,6 +104,9 @@ config IP_ROUTE_VERBOSE
 	  handled by the klogd daemon which is responsible for kernel messages
 	  ("man klogd").
 
+config IP_ROUTE_CLASSID
+	bool
+
 config IP_PNP
 	bool "IP: kernel level autoconfiguration"
 	help
@@ -657,4 +624,3 @@ config TCP_MD5SIG
 	  on the Internet.
 
 	  If unsure, say N.
-
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 4978d22..0dc772d 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -10,12 +10,10 @@ obj-y     := route.o inetpeer.o protocol.o \
 	     tcp_minisocks.o tcp_cong.o \
 	     datagram.o raw.o udp.o udplite.o \
 	     arp.o icmp.o devinet.o af_inet.o  igmp.o \
-	     fib_frontend.o fib_semantics.o \
+	     fib_frontend.o fib_semantics.o fib_trie.o \
 	     inet_fragment.o
 
 obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o
-obj-$(CONFIG_IP_FIB_HASH) += fib_hash.o
-obj-$(CONFIG_IP_FIB_TRIE) += fib_trie.o
 obj-$(CONFIG_PROC_FS) += proc.o
 obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o
 obj-$(CONFIG_IP_MROUTE) += ipmr.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 45b89d7..35a5020 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1101,23 +1101,20 @@ int sysctl_ip_dynaddr __read_mostly;
 static int inet_sk_reselect_saddr(struct sock *sk)
 {
 	struct inet_sock *inet = inet_sk(sk);
-	int err;
-	struct rtable *rt;
 	__be32 old_saddr = inet->inet_saddr;
-	__be32 new_saddr;
 	__be32 daddr = inet->inet_daddr;
+	struct rtable *rt;
+	__be32 new_saddr;
 
 	if (inet->opt && inet->opt->srr)
 		daddr = inet->opt->faddr;
 
 	/* Query new route. */
-	err = ip_route_connect(&rt, daddr, 0,
-			       RT_CONN_FLAGS(sk),
-			       sk->sk_bound_dev_if,
-			       sk->sk_protocol,
-			       inet->inet_sport, inet->inet_dport, sk, 0);
-	if (err)
-		return err;
+	rt = ip_route_connect(daddr, 0, RT_CONN_FLAGS(sk),
+			      sk->sk_bound_dev_if, sk->sk_protocol,
+			      inet->inet_sport, inet->inet_dport, sk, false);
+	if (IS_ERR(rt))
+		return PTR_ERR(rt);
 
 	sk_setup_caps(sk, &rt->dst);
 
@@ -1160,7 +1157,7 @@ int inet_sk_rebuild_header(struct sock *sk)
 	daddr = inet->inet_daddr;
 	if (inet->opt && inet->opt->srr)
 		daddr = inet->opt->faddr;
-{
+	{
 	struct flowi fl = {
 		.oif = sk->sk_bound_dev_if,
 		.mark = sk->sk_mark,
@@ -1174,11 +1171,14 @@ int inet_sk_rebuild_header(struct sock *sk)
 	};
 
 	security_sk_classify_flow(sk, &fl);
-	err = ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0);
-}
-	if (!err)
+	rt = ip_route_output_flow(sock_net(sk), &fl, sk);
+	}
+	if (!IS_ERR(rt)) {
+		err = 0;
 		sk_setup_caps(sk, &rt->dst);
-	else {
+	} else {
+		err = PTR_ERR(rt);
+
 		/* Routing failed... */
 		sk->sk_route_caps = 0;
 		/*
@@ -1231,7 +1231,7 @@ out:
 	return err;
 }
 
-static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features)
+static struct sk_buff *inet_gso_segment(struct sk_buff *skb, u32 features)
 {
 	struct sk_buff *segs = ERR_PTR(-EINVAL);
 	struct iphdr *iph;
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 86961be..325053d 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -201,7 +201,10 @@ static int ah_output(struct xfrm_state *x, struct sk_buff *skb)
 	top_iph->ttl = 0;
 	top_iph->check = 0;
 
-	ah->hdrlen  = (XFRM_ALIGN8(sizeof(*ah) + ahp->icv_trunc_len) >> 2) - 2;
+	if (x->props.flags & XFRM_STATE_ALIGN4)
+		ah->hdrlen  = (XFRM_ALIGN4(sizeof(*ah) + ahp->icv_trunc_len) >> 2) - 2;
+	else
+		ah->hdrlen  = (XFRM_ALIGN8(sizeof(*ah) + ahp->icv_trunc_len) >> 2) - 2;
 
 	ah->reserved = 0;
 	ah->spi = x->id.spi;
@@ -299,9 +302,15 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb)
 	nexthdr = ah->nexthdr;
 	ah_hlen = (ah->hdrlen + 2) << 2;
 
-	if (ah_hlen != XFRM_ALIGN8(sizeof(*ah) + ahp->icv_full_len) &&
-	    ah_hlen != XFRM_ALIGN8(sizeof(*ah) + ahp->icv_trunc_len))
-		goto out;
+	if (x->props.flags & XFRM_STATE_ALIGN4) {
+		if (ah_hlen != XFRM_ALIGN4(sizeof(*ah) + ahp->icv_full_len) &&
+		    ah_hlen != XFRM_ALIGN4(sizeof(*ah) + ahp->icv_trunc_len))
+			goto out;
+	} else {
+		if (ah_hlen != XFRM_ALIGN8(sizeof(*ah) + ahp->icv_full_len) &&
+		    ah_hlen != XFRM_ALIGN8(sizeof(*ah) + ahp->icv_trunc_len))
+			goto out;
+	}
 
 	if (!pskb_may_pull(skb, ah_hlen))
 		goto out;
@@ -450,8 +459,12 @@ static int ah_init_state(struct xfrm_state *x)
 
 	BUG_ON(ahp->icv_trunc_len > MAX_AH_AUTH_LEN);
 
-	x->props.header_len = XFRM_ALIGN8(sizeof(struct ip_auth_hdr) +
-					  ahp->icv_trunc_len);
+	if (x->props.flags & XFRM_STATE_ALIGN4)
+		x->props.header_len = XFRM_ALIGN4(sizeof(struct ip_auth_hdr) +
+						  ahp->icv_trunc_len);
+	else
+		x->props.header_len = XFRM_ALIGN8(sizeof(struct ip_auth_hdr) +
+						  ahp->icv_trunc_len);
 	if (x->props.mode == XFRM_MODE_TUNNEL)
 		x->props.header_len += sizeof(struct iphdr);
 	x->data = ahp;
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 7927589..fa9988d 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -440,7 +440,8 @@ static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev)
 	/*unsigned long now; */
 	struct net *net = dev_net(dev);
 
-	if (ip_route_output_key(net, &rt, &fl) < 0)
+	rt = ip_route_output_key(net, &fl);
+	if (IS_ERR(rt))
 		return 1;
 	if (rt->dst.dev != dev) {
 		NET_INC_STATS_BH(net, LINUX_MIB_ARPFILTER);
@@ -1063,10 +1064,10 @@ static int arp_req_set(struct net *net, struct arpreq *r,
 	if (dev == NULL) {
 		struct flowi fl = { .fl4_dst = ip,
 				    .fl4_tos = RTO_ONLINK };
-		struct rtable *rt;
-		err = ip_route_output_key(net, &rt, &fl);
-		if (err != 0)
-			return err;
+		struct rtable *rt = ip_route_output_key(net, &fl);
+
+		if (IS_ERR(rt))
+			return PTR_ERR(rt);
 		dev = rt->dst.dev;
 		ip_rt_put(rt);
 		if (!dev)
@@ -1177,7 +1178,6 @@ static int arp_req_delete_public(struct net *net, struct arpreq *r,
 static int arp_req_delete(struct net *net, struct arpreq *r,
 			  struct net_device *dev)
 {
-	int err;
 	__be32 ip;
 
 	if (r->arp_flags & ATF_PUBL)
@@ -1187,10 +1187,9 @@ static int arp_req_delete(struct net *net, struct arpreq *r,
 	if (dev == NULL) {
 		struct flowi fl = { .fl4_dst = ip,
 				    .fl4_tos = RTO_ONLINK };
-		struct rtable *rt;
-		err = ip_route_output_key(net, &rt, &fl);
-		if (err != 0)
-			return err;
+		struct rtable *rt = ip_route_output_key(net, &fl);
+		if (IS_ERR(rt))
+			return PTR_ERR(rt);
 		dev = rt->dst.dev;
 		ip_rt_put(rt);
 		if (!dev)
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index 174be6c..85bd24c 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -46,11 +46,12 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 		if (!saddr)
 			saddr = inet->mc_addr;
 	}
-	err = ip_route_connect(&rt, usin->sin_addr.s_addr, saddr,
-			       RT_CONN_FLAGS(sk), oif,
-			       sk->sk_protocol,
-			       inet->inet_sport, usin->sin_port, sk, 1);
-	if (err) {
+	rt = ip_route_connect(usin->sin_addr.s_addr, saddr,
+			      RT_CONN_FLAGS(sk), oif,
+			      sk->sk_protocol,
+			      inet->inet_sport, usin->sin_port, sk, true);
+	if (IS_ERR(rt)) {
+		err = PTR_ERR(rt);
 		if (err == -ENETUNREACH)
 			IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
 		return err;
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 036652c..6d85800d 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -51,6 +51,7 @@
 #include <linux/inetdevice.h>
 #include <linux/igmp.h>
 #include <linux/slab.h>
+#include <linux/hash.h>
 #ifdef CONFIG_SYSCTL
 #include <linux/sysctl.h>
 #endif
@@ -92,6 +93,71 @@ static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
 	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
 };
 
+/* inet_addr_hash's shifting is dependent upon this IN4_ADDR_HSIZE
+ * value.  So if you change this define, make appropriate changes to
+ * inet_addr_hash as well.
+ */
+#define IN4_ADDR_HSIZE	256
+static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
+static DEFINE_SPINLOCK(inet_addr_hash_lock);
+
+static inline unsigned int inet_addr_hash(struct net *net, __be32 addr)
+{
+	u32 val = (__force u32) addr ^ hash_ptr(net, 8);
+
+	return ((val ^ (val >> 8) ^ (val >> 16) ^ (val >> 24)) &
+		(IN4_ADDR_HSIZE - 1));
+}
+
+static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
+{
+	unsigned int hash = inet_addr_hash(net, ifa->ifa_local);
+
+	spin_lock(&inet_addr_hash_lock);
+	hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
+	spin_unlock(&inet_addr_hash_lock);
+}
+
+static void inet_hash_remove(struct in_ifaddr *ifa)
+{
+	spin_lock(&inet_addr_hash_lock);
+	hlist_del_init_rcu(&ifa->hash);
+	spin_unlock(&inet_addr_hash_lock);
+}
+
+/**
+ * __ip_dev_find - find the first device with a given source address.
+ * @net: the net namespace
+ * @addr: the source address
+ * @devref: if true, take a reference on the found device
+ *
+ * If a caller uses devref=false, it should be protected by RCU, or RTNL
+ */
+struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
+{
+	unsigned int hash = inet_addr_hash(net, addr);
+	struct net_device *result = NULL;
+	struct in_ifaddr *ifa;
+	struct hlist_node *node;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(ifa, node, &inet_addr_lst[hash], hash) {
+		struct net_device *dev = ifa->ifa_dev->dev;
+
+		if (!net_eq(dev_net(dev), net))
+			continue;
+		if (ifa->ifa_local == addr) {
+			result = dev;
+			break;
+		}
+	}
+	if (result && devref)
+		dev_hold(result);
+	rcu_read_unlock();
+	return result;
+}
+EXPORT_SYMBOL(__ip_dev_find);
+
 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
 
 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
@@ -265,6 +331,7 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
 			}
 
 			if (!do_promote) {
+				inet_hash_remove(ifa);
 				*ifap1 = ifa->ifa_next;
 
 				rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
@@ -281,6 +348,7 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
 	/* 2. Unlink it */
 
 	*ifap = ifa1->ifa_next;
+	inet_hash_remove(ifa1);
 
 	/* 3. Announce address deletion */
 
@@ -368,6 +436,8 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
 	ifa->ifa_next = *ifap;
 	*ifap = ifa;
 
+	inet_hash_insert(dev_net(in_dev->dev), ifa);
+
 	/* Send message first, then call notifier.
 	   Notifier will trigger FIB update, so that
 	   listeners of netlink will know about new ifaddr */
@@ -521,6 +591,7 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
 	if (tb[IFA_ADDRESS] == NULL)
 		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
 
+	INIT_HLIST_NODE(&ifa->hash);
 	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
 	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
 	ifa->ifa_flags = ifm->ifa_flags;
@@ -728,6 +799,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
 		if (!ifa) {
 			ret = -ENOBUFS;
 			ifa = inet_alloc_ifa();
+			INIT_HLIST_NODE(&ifa->hash);
 			if (!ifa)
 				break;
 			if (colon)
@@ -1084,6 +1156,7 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
 			struct in_ifaddr *ifa = inet_alloc_ifa();
 
 			if (ifa) {
+				INIT_HLIST_NODE(&ifa->hash);
 				ifa->ifa_local =
 				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
 				ifa->ifa_prefixlen = 8;
@@ -1720,6 +1793,11 @@ static struct rtnl_af_ops inet_af_ops = {
 
 void __init devinet_init(void)
 {
+	int i;
+
+	for (i = 0; i < IN4_ADDR_HSIZE; i++)
+		INIT_HLIST_HEAD(&inet_addr_lst[i]);
+
 	register_pernet_subsys(&devinet_ops);
 
 	register_gifconf(PF_INET, inet_gifconf);
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 1d2cdd4..fe10bcd 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -51,11 +51,11 @@ static int __net_init fib4_rules_init(struct net *net)
 {
 	struct fib_table *local_table, *main_table;
 
-	local_table = fib_hash_table(RT_TABLE_LOCAL);
+	local_table = fib_trie_table(RT_TABLE_LOCAL);
 	if (local_table == NULL)
 		return -ENOMEM;
 
-	main_table  = fib_hash_table(RT_TABLE_MAIN);
+	main_table  = fib_trie_table(RT_TABLE_MAIN);
 	if (main_table == NULL)
 		goto fail;
 
@@ -82,7 +82,7 @@ struct fib_table *fib_new_table(struct net *net, u32 id)
 	if (tb)
 		return tb;
 
-	tb = fib_hash_table(id);
+	tb = fib_trie_table(id);
 	if (!tb)
 		return NULL;
 	h = id & (FIB_TABLE_HASHSZ - 1);
@@ -114,21 +114,6 @@ struct fib_table *fib_get_table(struct net *net, u32 id)
 }
 #endif /* CONFIG_IP_MULTIPLE_TABLES */
 
-void fib_select_default(struct net *net,
-			const struct flowi *flp, struct fib_result *res)
-{
-	struct fib_table *tb;
-	int table = RT_TABLE_MAIN;
-#ifdef CONFIG_IP_MULTIPLE_TABLES
-	if (res->r == NULL || res->r->action != FR_ACT_TO_TBL)
-		return;
-	table = res->r->table;
-#endif
-	tb = fib_get_table(net, table);
-	if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
-		fib_table_select_default(tb, flp, res);
-}
-
 static void fib_flush(struct net *net)
 {
 	int flushed = 0;
@@ -147,46 +132,6 @@ static void fib_flush(struct net *net)
 		rt_cache_flush(net, -1);
 }
 
-/**
- * __ip_dev_find - find the first device with a given source address.
- * @net: the net namespace
- * @addr: the source address
- * @devref: if true, take a reference on the found device
- *
- * If a caller uses devref=false, it should be protected by RCU, or RTNL
- */
-struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
-{
-	struct flowi fl = {
-		.fl4_dst = addr,
-	};
-	struct fib_result res = { 0 };
-	struct net_device *dev = NULL;
-	struct fib_table *local_table;
-
-#ifdef CONFIG_IP_MULTIPLE_TABLES
-	res.r = NULL;
-#endif
-
-	rcu_read_lock();
-	local_table = fib_get_table(net, RT_TABLE_LOCAL);
-	if (!local_table ||
-	    fib_table_lookup(local_table, &fl, &res, FIB_LOOKUP_NOREF)) {
-		rcu_read_unlock();
-		return NULL;
-	}
-	if (res.type != RTN_LOCAL)
-		goto out;
-	dev = FIB_RES_DEV(res);
-
-	if (dev && devref)
-		dev_hold(dev);
-out:
-	rcu_read_unlock();
-	return dev;
-}
-EXPORT_SYMBOL(__ip_dev_find);
-
 /*
  * Find address type as if only "dev" was present in the system. If
  * on_dev is NULL then all interfaces are taken into consideration.
@@ -248,19 +193,21 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
 			u32 *itag, u32 mark)
 {
 	struct in_device *in_dev;
-	struct flowi fl = {
-		.fl4_dst = src,
-		.fl4_src = dst,
-		.fl4_tos = tos,
-		.mark = mark,
-		.iif = oif
-	};
+	struct flowi fl;
 	struct fib_result res;
 	int no_addr, rpf, accept_local;
 	bool dev_match;
 	int ret;
 	struct net *net;
 
+	fl.oif = 0;
+	fl.iif = oif;
+	fl.mark = mark;
+	fl.fl4_dst = src;
+	fl.fl4_src = dst;
+	fl.fl4_tos = tos;
+	fl.fl4_scope = RT_SCOPE_UNIVERSE;
+
 	no_addr = rpf = accept_local = 0;
 	in_dev = __in_dev_get_rcu(dev);
 	if (in_dev) {
@@ -945,10 +892,12 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 		fib_sync_up(dev);
 #endif
+		fib_update_nh_saddrs(dev);
 		rt_cache_flush(dev_net(dev), -1);
 		break;
 	case NETDEV_DOWN:
 		fib_del_ifaddr(ifa);
+		fib_update_nh_saddrs(dev);
 		if (ifa->ifa_dev->ifa_list == NULL) {
 			/* Last address was deleted from this interface.
 			 * Disable IP.
@@ -1101,5 +1050,5 @@ void __init ip_fib_init(void)
 	register_netdevice_notifier(&fib_netdev_notifier);
 	register_inetaddr_notifier(&fib_inetaddr_notifier);
 
-	fib_hash_init();
+	fib_trie_init();
 }
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
deleted file mode 100644
index b3acb04..0000000
--- a/net/ipv4/fib_hash.c
+++ /dev/null
@@ -1,1133 +0,0 @@
-/*
- * INET		An implementation of the TCP/IP protocol suite for the LINUX
- *		operating system.  INET is implemented using the  BSD Socket
- *		interface as the means of communication with the user level.
- *
- *		IPv4 FIB: lookup engine and maintenance routines.
- *
- * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
- *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- */
-
-#include <asm/uaccess.h>
-#include <asm/system.h>
-#include <linux/bitops.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/socket.h>
-#include <linux/sockios.h>
-#include <linux/errno.h>
-#include <linux/in.h>
-#include <linux/inet.h>
-#include <linux/inetdevice.h>
-#include <linux/netdevice.h>
-#include <linux/if_arp.h>
-#include <linux/proc_fs.h>
-#include <linux/skbuff.h>
-#include <linux/netlink.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-
-#include <net/net_namespace.h>
-#include <net/ip.h>
-#include <net/protocol.h>
-#include <net/route.h>
-#include <net/tcp.h>
-#include <net/sock.h>
-#include <net/ip_fib.h>
-
-#include "fib_lookup.h"
-
-static struct kmem_cache *fn_hash_kmem __read_mostly;
-static struct kmem_cache *fn_alias_kmem __read_mostly;
-
-struct fib_node {
-	struct hlist_node	fn_hash;
-	struct list_head	fn_alias;
-	__be32			fn_key;
-	struct fib_alias        fn_embedded_alias;
-};
-
-#define EMBEDDED_HASH_SIZE (L1_CACHE_BYTES / sizeof(struct hlist_head))
-
-struct fn_zone {
-	struct fn_zone __rcu	*fz_next;	/* Next not empty zone	*/
-	struct hlist_head __rcu	*fz_hash;	/* Hash table pointer	*/
-	seqlock_t		fz_lock;
-	u32			fz_hashmask;	/* (fz_divisor - 1)	*/
-
-	u8			fz_order;	/* Zone order (0..32)	*/
-	u8			fz_revorder;	/* 32 - fz_order	*/
-	__be32			fz_mask;	/* inet_make_mask(order) */
-#define FZ_MASK(fz)		((fz)->fz_mask)
-
-	struct hlist_head	fz_embedded_hash[EMBEDDED_HASH_SIZE];
-
-	int			fz_nent;	/* Number of entries	*/
-	int			fz_divisor;	/* Hash size (mask+1)	*/
-};
-
-struct fn_hash {
-	struct fn_zone		*fn_zones[33];
-	struct fn_zone __rcu	*fn_zone_list;
-};
-
-static inline u32 fn_hash(__be32 key, struct fn_zone *fz)
-{
-	u32 h = ntohl(key) >> fz->fz_revorder;
-	h ^= (h>>20);
-	h ^= (h>>10);
-	h ^= (h>>5);
-	h &= fz->fz_hashmask;
-	return h;
-}
-
-static inline __be32 fz_key(__be32 dst, struct fn_zone *fz)
-{
-	return dst & FZ_MASK(fz);
-}
-
-static unsigned int fib_hash_genid;
-
-#define FZ_MAX_DIVISOR ((PAGE_SIZE<<MAX_ORDER) / sizeof(struct hlist_head))
-
-static struct hlist_head *fz_hash_alloc(int divisor)
-{
-	unsigned long size = divisor * sizeof(struct hlist_head);
-
-	if (size <= PAGE_SIZE)
-		return kzalloc(size, GFP_KERNEL);
-
-	return (struct hlist_head *)
-		__get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(size));
-}
-
-/* The fib hash lock must be held when this is called. */
-static inline void fn_rebuild_zone(struct fn_zone *fz,
-				   struct hlist_head *old_ht,
-				   int old_divisor)
-{
-	int i;
-
-	for (i = 0; i < old_divisor; i++) {
-		struct hlist_node *node, *n;
-		struct fib_node *f;
-
-		hlist_for_each_entry_safe(f, node, n, &old_ht[i], fn_hash) {
-			struct hlist_head *new_head;
-
-			hlist_del_rcu(&f->fn_hash);
-
-			new_head = rcu_dereference_protected(fz->fz_hash, 1) +
-				   fn_hash(f->fn_key, fz);
-			hlist_add_head_rcu(&f->fn_hash, new_head);
-		}
-	}
-}
-
-static void fz_hash_free(struct hlist_head *hash, int divisor)
-{
-	unsigned long size = divisor * sizeof(struct hlist_head);
-
-	if (size <= PAGE_SIZE)
-		kfree(hash);
-	else
-		free_pages((unsigned long)hash, get_order(size));
-}
-
-static void fn_rehash_zone(struct fn_zone *fz)
-{
-	struct hlist_head *ht, *old_ht;
-	int old_divisor, new_divisor;
-	u32 new_hashmask;
-
-	new_divisor = old_divisor = fz->fz_divisor;
-
-	switch (old_divisor) {
-	case EMBEDDED_HASH_SIZE:
-		new_divisor *= EMBEDDED_HASH_SIZE;
-		break;
-	case EMBEDDED_HASH_SIZE*EMBEDDED_HASH_SIZE:
-		new_divisor *= (EMBEDDED_HASH_SIZE/2);
-		break;
-	default:
-		if ((old_divisor << 1) > FZ_MAX_DIVISOR) {
-			printk(KERN_CRIT "route.c: bad divisor %d!\n", old_divisor);
-			return;
-		}
-		new_divisor = (old_divisor << 1);
-		break;
-	}
-
-	new_hashmask = (new_divisor - 1);
-
-#if RT_CACHE_DEBUG >= 2
-	printk(KERN_DEBUG "fn_rehash_zone: hash for zone %d grows from %d\n",
-	       fz->fz_order, old_divisor);
-#endif
-
-	ht = fz_hash_alloc(new_divisor);
-
-	if (ht)	{
-		struct fn_zone nfz;
-
-		memcpy(&nfz, fz, sizeof(nfz));
-
-		write_seqlock_bh(&fz->fz_lock);
-		old_ht = rcu_dereference_protected(fz->fz_hash, 1);
-		RCU_INIT_POINTER(nfz.fz_hash, ht);
-		nfz.fz_hashmask = new_hashmask;
-		nfz.fz_divisor = new_divisor;
-		fn_rebuild_zone(&nfz, old_ht, old_divisor);
-		fib_hash_genid++;
-		rcu_assign_pointer(fz->fz_hash, ht);
-		fz->fz_hashmask = new_hashmask;
-		fz->fz_divisor = new_divisor;
-		write_sequnlock_bh(&fz->fz_lock);
-
-		if (old_ht != fz->fz_embedded_hash) {
-			synchronize_rcu();
-			fz_hash_free(old_ht, old_divisor);
-		}
-	}
-}
-
-static void fn_free_node_rcu(struct rcu_head *head)
-{
-	struct fib_node *f = container_of(head, struct fib_node, fn_embedded_alias.rcu);
-
-	kmem_cache_free(fn_hash_kmem, f);
-}
-
-static inline void fn_free_node(struct fib_node *f)
-{
-	call_rcu(&f->fn_embedded_alias.rcu, fn_free_node_rcu);
-}
-
-static void fn_free_alias_rcu(struct rcu_head *head)
-{
-	struct fib_alias *fa = container_of(head, struct fib_alias, rcu);
-
-	kmem_cache_free(fn_alias_kmem, fa);
-}
-
-static inline void fn_free_alias(struct fib_alias *fa, struct fib_node *f)
-{
-	fib_release_info(fa->fa_info);
-	if (fa == &f->fn_embedded_alias)
-		fa->fa_info = NULL;
-	else
-		call_rcu(&fa->rcu, fn_free_alias_rcu);
-}
-
-static struct fn_zone *
-fn_new_zone(struct fn_hash *table, int z)
-{
-	int i;
-	struct fn_zone *fz = kzalloc(sizeof(struct fn_zone), GFP_KERNEL);
-	if (!fz)
-		return NULL;
-
-	seqlock_init(&fz->fz_lock);
-	fz->fz_divisor = z ? EMBEDDED_HASH_SIZE : 1;
-	fz->fz_hashmask = fz->fz_divisor - 1;
-	RCU_INIT_POINTER(fz->fz_hash, fz->fz_embedded_hash);
-	fz->fz_order = z;
-	fz->fz_revorder = 32 - z;
-	fz->fz_mask = inet_make_mask(z);
-
-	/* Find the first not empty zone with more specific mask */
-	for (i = z + 1; i <= 32; i++)
-		if (table->fn_zones[i])
-			break;
-	if (i > 32) {
-		/* No more specific masks, we are the first. */
-		rcu_assign_pointer(fz->fz_next,
-				   rtnl_dereference(table->fn_zone_list));
-		rcu_assign_pointer(table->fn_zone_list, fz);
-	} else {
-		rcu_assign_pointer(fz->fz_next,
-				   rtnl_dereference(table->fn_zones[i]->fz_next));
-		rcu_assign_pointer(table->fn_zones[i]->fz_next, fz);
-	}
-	table->fn_zones[z] = fz;
-	fib_hash_genid++;
-	return fz;
-}
-
-int fib_table_lookup(struct fib_table *tb,
-		     const struct flowi *flp, struct fib_result *res,
-		     int fib_flags)
-{
-	int err;
-	struct fn_zone *fz;
-	struct fn_hash *t = (struct fn_hash *)tb->tb_data;
-
-	rcu_read_lock();
-	for (fz = rcu_dereference(t->fn_zone_list);
-	     fz != NULL;
-	     fz = rcu_dereference(fz->fz_next)) {
-		struct hlist_head *head;
-		struct hlist_node *node;
-		struct fib_node *f;
-		__be32 k;
-		unsigned int seq;
-
-		do {
-			seq = read_seqbegin(&fz->fz_lock);
-			k = fz_key(flp->fl4_dst, fz);
-
-			head = rcu_dereference(fz->fz_hash) + fn_hash(k, fz);
-			hlist_for_each_entry_rcu(f, node, head, fn_hash) {
-				if (f->fn_key != k)
-					continue;
-
-				err = fib_semantic_match(&f->fn_alias,
-						 flp, res,
-						 fz->fz_order, fib_flags);
-				if (err <= 0)
-					goto out;
-			}
-		} while (read_seqretry(&fz->fz_lock, seq));
-	}
-	err = 1;
-out:
-	rcu_read_unlock();
-	return err;
-}
-
-void fib_table_select_default(struct fib_table *tb,
-			      const struct flowi *flp, struct fib_result *res)
-{
-	int order, last_idx;
-	struct hlist_node *node;
-	struct fib_node *f;
-	struct fib_info *fi = NULL;
-	struct fib_info *last_resort;
-	struct fn_hash *t = (struct fn_hash *)tb->tb_data;
-	struct fn_zone *fz = t->fn_zones[0];
-	struct hlist_head *head;
-
-	if (fz == NULL)
-		return;
-
-	last_idx = -1;
-	last_resort = NULL;
-	order = -1;
-
-	rcu_read_lock();
-	head = rcu_dereference(fz->fz_hash);
-	hlist_for_each_entry_rcu(f, node, head, fn_hash) {
-		struct fib_alias *fa;
-
-		list_for_each_entry_rcu(fa, &f->fn_alias, fa_list) {
-			struct fib_info *next_fi = fa->fa_info;
-
-			if (fa->fa_scope != res->scope ||
-			    fa->fa_type != RTN_UNICAST)
-				continue;
-
-			if (next_fi->fib_priority > res->fi->fib_priority)
-				break;
-			if (!next_fi->fib_nh[0].nh_gw ||
-			    next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
-				continue;
-
-			fib_alias_accessed(fa);
-
-			if (fi == NULL) {
-				if (next_fi != res->fi)
-					break;
-			} else if (!fib_detect_death(fi, order, &last_resort,
-						&last_idx, tb->tb_default)) {
-				fib_result_assign(res, fi);
-				tb->tb_default = order;
-				goto out;
-			}
-			fi = next_fi;
-			order++;
-		}
-	}
-
-	if (order <= 0 || fi == NULL) {
-		tb->tb_default = -1;
-		goto out;
-	}
-
-	if (!fib_detect_death(fi, order, &last_resort, &last_idx,
-				tb->tb_default)) {
-		fib_result_assign(res, fi);
-		tb->tb_default = order;
-		goto out;
-	}
-
-	if (last_idx >= 0)
-		fib_result_assign(res, last_resort);
-	tb->tb_default = last_idx;
-out:
-	rcu_read_unlock();
-}
-
-/* Insert node F to FZ. */
-static inline void fib_insert_node(struct fn_zone *fz, struct fib_node *f)
-{
-	struct hlist_head *head = rtnl_dereference(fz->fz_hash) + fn_hash(f->fn_key, fz);
-
-	hlist_add_head_rcu(&f->fn_hash, head);
-}
-
-/* Return the node in FZ matching KEY. */
-static struct fib_node *fib_find_node(struct fn_zone *fz, __be32 key)
-{
-	struct hlist_head *head = rtnl_dereference(fz->fz_hash) + fn_hash(key, fz);
-	struct hlist_node *node;
-	struct fib_node *f;
-
-	hlist_for_each_entry_rcu(f, node, head, fn_hash) {
-		if (f->fn_key == key)
-			return f;
-	}
-
-	return NULL;
-}
-
-
-static struct fib_alias *fib_fast_alloc(struct fib_node *f)
-{
-	struct fib_alias *fa = &f->fn_embedded_alias;
-
-	if (fa->fa_info != NULL)
-		fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
-	return fa;
-}
-
-/* Caller must hold RTNL. */
-int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
-{
-	struct fn_hash *table = (struct fn_hash *) tb->tb_data;
-	struct fib_node *new_f = NULL;
-	struct fib_node *f;
-	struct fib_alias *fa, *new_fa;
-	struct fn_zone *fz;
-	struct fib_info *fi;
-	u8 tos = cfg->fc_tos;
-	__be32 key;
-	int err;
-
-	if (cfg->fc_dst_len > 32)
-		return -EINVAL;
-
-	fz = table->fn_zones[cfg->fc_dst_len];
-	if (!fz && !(fz = fn_new_zone(table, cfg->fc_dst_len)))
-		return -ENOBUFS;
-
-	key = 0;
-	if (cfg->fc_dst) {
-		if (cfg->fc_dst & ~FZ_MASK(fz))
-			return -EINVAL;
-		key = fz_key(cfg->fc_dst, fz);
-	}
-
-	fi = fib_create_info(cfg);
-	if (IS_ERR(fi))
-		return PTR_ERR(fi);
-
-	if (fz->fz_nent > (fz->fz_divisor<<1) &&
-	    fz->fz_divisor < FZ_MAX_DIVISOR &&
-	    (cfg->fc_dst_len == 32 ||
-	     (1 << cfg->fc_dst_len) > fz->fz_divisor))
-		fn_rehash_zone(fz);
-
-	f = fib_find_node(fz, key);
-
-	if (!f)
-		fa = NULL;
-	else
-		fa = fib_find_alias(&f->fn_alias, tos, fi->fib_priority);
-
-	/* Now fa, if non-NULL, points to the first fib alias
-	 * with the same keys [prefix,tos,priority], if such key already
-	 * exists or to the node before which we will insert new one.
-	 *
-	 * If fa is NULL, we will need to allocate a new one and
-	 * insert to the head of f.
-	 *
-	 * If f is NULL, no fib node matched the destination key
-	 * and we need to allocate a new one of those as well.
-	 */
-
-	if (fa && fa->fa_tos == tos &&
-	    fa->fa_info->fib_priority == fi->fib_priority) {
-		struct fib_alias *fa_first, *fa_match;
-
-		err = -EEXIST;
-		if (cfg->fc_nlflags & NLM_F_EXCL)
-			goto out;
-
-		/* We have 2 goals:
-		 * 1. Find exact match for type, scope, fib_info to avoid
-		 * duplicate routes
-		 * 2. Find next 'fa' (or head), NLM_F_APPEND inserts before it
-		 */
-		fa_match = NULL;
-		fa_first = fa;
-		fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list);
-		list_for_each_entry_continue(fa, &f->fn_alias, fa_list) {
-			if (fa->fa_tos != tos)
-				break;
-			if (fa->fa_info->fib_priority != fi->fib_priority)
-				break;
-			if (fa->fa_type == cfg->fc_type &&
-			    fa->fa_scope == cfg->fc_scope &&
-			    fa->fa_info == fi) {
-				fa_match = fa;
-				break;
-			}
-		}
-
-		if (cfg->fc_nlflags & NLM_F_REPLACE) {
-			u8 state;
-
-			fa = fa_first;
-			if (fa_match) {
-				if (fa == fa_match)
-					err = 0;
-				goto out;
-			}
-			err = -ENOBUFS;
-			new_fa = fib_fast_alloc(f);
-			if (new_fa == NULL)
-				goto out;
-
-			new_fa->fa_tos = fa->fa_tos;
-			new_fa->fa_info = fi;
-			new_fa->fa_type = cfg->fc_type;
-			new_fa->fa_scope = cfg->fc_scope;
-			state = fa->fa_state;
-			new_fa->fa_state = state & ~FA_S_ACCESSED;
-			fib_hash_genid++;
-			list_replace_rcu(&fa->fa_list, &new_fa->fa_list);
-
-			fn_free_alias(fa, f);
-			if (state & FA_S_ACCESSED)
-				rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
-			rtmsg_fib(RTM_NEWROUTE, key, new_fa, cfg->fc_dst_len,
-				  tb->tb_id, &cfg->fc_nlinfo, NLM_F_REPLACE);
-			return 0;
-		}
-
-		/* Error if we find a perfect match which
-		 * uses the same scope, type, and nexthop
-		 * information.
-		 */
-		if (fa_match)
-			goto out;
-
-		if (!(cfg->fc_nlflags & NLM_F_APPEND))
-			fa = fa_first;
-	}
-
-	err = -ENOENT;
-	if (!(cfg->fc_nlflags & NLM_F_CREATE))
-		goto out;
-
-	err = -ENOBUFS;
-
-	if (!f) {
-		new_f = kmem_cache_zalloc(fn_hash_kmem, GFP_KERNEL);
-		if (new_f == NULL)
-			goto out;
-
-		INIT_HLIST_NODE(&new_f->fn_hash);
-		INIT_LIST_HEAD(&new_f->fn_alias);
-		new_f->fn_key = key;
-		f = new_f;
-	}
-
-	new_fa = fib_fast_alloc(f);
-	if (new_fa == NULL)
-		goto out;
-
-	new_fa->fa_info = fi;
-	new_fa->fa_tos = tos;
-	new_fa->fa_type = cfg->fc_type;
-	new_fa->fa_scope = cfg->fc_scope;
-	new_fa->fa_state = 0;
-
-	/*
-	 * Insert new entry to the list.
-	 */
-
-	if (new_f)
-		fib_insert_node(fz, new_f);
-	list_add_tail_rcu(&new_fa->fa_list,
-		 (fa ? &fa->fa_list : &f->fn_alias));
-	fib_hash_genid++;
-
-	if (new_f)
-		fz->fz_nent++;
-	rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
-
-	rtmsg_fib(RTM_NEWROUTE, key, new_fa, cfg->fc_dst_len, tb->tb_id,
-		  &cfg->fc_nlinfo, 0);
-	return 0;
-
-out:
-	if (new_f)
-		kmem_cache_free(fn_hash_kmem, new_f);
-	fib_release_info(fi);
-	return err;
-}
-
-int fib_table_delete(struct fib_table *tb, struct fib_config *cfg)
-{
-	struct fn_hash *table = (struct fn_hash *)tb->tb_data;
-	struct fib_node *f;
-	struct fib_alias *fa, *fa_to_delete;
-	struct fn_zone *fz;
-	__be32 key;
-
-	if (cfg->fc_dst_len > 32)
-		return -EINVAL;
-
-	if ((fz  = table->fn_zones[cfg->fc_dst_len]) == NULL)
-		return -ESRCH;
-
-	key = 0;
-	if (cfg->fc_dst) {
-		if (cfg->fc_dst & ~FZ_MASK(fz))
-			return -EINVAL;
-		key = fz_key(cfg->fc_dst, fz);
-	}
-
-	f = fib_find_node(fz, key);
-
-	if (!f)
-		fa = NULL;
-	else
-		fa = fib_find_alias(&f->fn_alias, cfg->fc_tos, 0);
-	if (!fa)
-		return -ESRCH;
-
-	fa_to_delete = NULL;
-	fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list);
-	list_for_each_entry_continue(fa, &f->fn_alias, fa_list) {
-		struct fib_info *fi = fa->fa_info;
-
-		if (fa->fa_tos != cfg->fc_tos)
-			break;
-
-		if ((!cfg->fc_type ||
-		     fa->fa_type == cfg->fc_type) &&
-		    (cfg->fc_scope == RT_SCOPE_NOWHERE ||
-		     fa->fa_scope == cfg->fc_scope) &&
-		    (!cfg->fc_protocol ||
-		     fi->fib_protocol == cfg->fc_protocol) &&
-		    fib_nh_match(cfg, fi) == 0) {
-			fa_to_delete = fa;
-			break;
-		}
-	}
-
-	if (fa_to_delete) {
-		int kill_fn;
-
-		fa = fa_to_delete;
-		rtmsg_fib(RTM_DELROUTE, key, fa, cfg->fc_dst_len,
-			  tb->tb_id, &cfg->fc_nlinfo, 0);
-
-		kill_fn = 0;
-		list_del_rcu(&fa->fa_list);
-		if (list_empty(&f->fn_alias)) {
-			hlist_del_rcu(&f->fn_hash);
-			kill_fn = 1;
-		}
-		fib_hash_genid++;
-
-		if (fa->fa_state & FA_S_ACCESSED)
-			rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
-		fn_free_alias(fa, f);
-		if (kill_fn) {
-			fn_free_node(f);
-			fz->fz_nent--;
-		}
-
-		return 0;
-	}
-	return -ESRCH;
-}
-
-static int fn_flush_list(struct fn_zone *fz, int idx)
-{
-	struct hlist_head *head = rtnl_dereference(fz->fz_hash) + idx;
-	struct hlist_node *node, *n;
-	struct fib_node *f;
-	int found = 0;
-
-	hlist_for_each_entry_safe(f, node, n, head, fn_hash) {
-		struct fib_alias *fa, *fa_node;
-		int kill_f;
-
-		kill_f = 0;
-		list_for_each_entry_safe(fa, fa_node, &f->fn_alias, fa_list) {
-			struct fib_info *fi = fa->fa_info;
-
-			if (fi && (fi->fib_flags&RTNH_F_DEAD)) {
-				list_del_rcu(&fa->fa_list);
-				if (list_empty(&f->fn_alias)) {
-					hlist_del_rcu(&f->fn_hash);
-					kill_f = 1;
-				}
-				fib_hash_genid++;
-
-				fn_free_alias(fa, f);
-				found++;
-			}
-		}
-		if (kill_f) {
-			fn_free_node(f);
-			fz->fz_nent--;
-		}
-	}
-	return found;
-}
-
-/* caller must hold RTNL. */
-int fib_table_flush(struct fib_table *tb)
-{
-	struct fn_hash *table = (struct fn_hash *) tb->tb_data;
-	struct fn_zone *fz;
-	int found = 0;
-
-	for (fz = rtnl_dereference(table->fn_zone_list);
-	     fz != NULL;
-	     fz = rtnl_dereference(fz->fz_next)) {
-		int i;
-
-		for (i = fz->fz_divisor - 1; i >= 0; i--)
-			found += fn_flush_list(fz, i);
-	}
-	return found;
-}
-
-void fib_free_table(struct fib_table *tb)
-{
-	struct fn_hash *table = (struct fn_hash *) tb->tb_data;
-	struct fn_zone *fz, *next;
-
-	next = table->fn_zone_list;
-	while (next != NULL) {
-		fz = next;
-		next = fz->fz_next;
-
-		if (fz->fz_hash != fz->fz_embedded_hash)
-			fz_hash_free(fz->fz_hash, fz->fz_divisor);
-
-		kfree(fz);
-	}
-
-	kfree(tb);
-}
-
-static inline int
-fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb,
-		     struct fib_table *tb,
-		     struct fn_zone *fz,
-		     struct hlist_head *head)
-{
-	struct hlist_node *node;
-	struct fib_node *f;
-	int i, s_i;
-
-	s_i = cb->args[4];
-	i = 0;
-	hlist_for_each_entry_rcu(f, node, head, fn_hash) {
-		struct fib_alias *fa;
-
-		list_for_each_entry_rcu(fa, &f->fn_alias, fa_list) {
-			if (i < s_i)
-				goto next;
-
-			if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid,
-					  cb->nlh->nlmsg_seq,
-					  RTM_NEWROUTE,
-					  tb->tb_id,
-					  fa->fa_type,
-					  fa->fa_scope,
-					  f->fn_key,
-					  fz->fz_order,
-					  fa->fa_tos,
-					  fa->fa_info,
-					  NLM_F_MULTI) < 0) {
-				cb->args[4] = i;
-				return -1;
-			}
-next:
-			i++;
-		}
-	}
-	cb->args[4] = i;
-	return skb->len;
-}
-
-static inline int
-fn_hash_dump_zone(struct sk_buff *skb, struct netlink_callback *cb,
-		   struct fib_table *tb,
-		   struct fn_zone *fz)
-{
-	int h, s_h;
-	struct hlist_head *head = rcu_dereference(fz->fz_hash);
-
-	if (head == NULL)
-		return skb->len;
-	s_h = cb->args[3];
-	for (h = s_h; h < fz->fz_divisor; h++) {
-		if (hlist_empty(head + h))
-			continue;
-		if (fn_hash_dump_bucket(skb, cb, tb, fz, head + h) < 0) {
-			cb->args[3] = h;
-			return -1;
-		}
-		memset(&cb->args[4], 0,
-		       sizeof(cb->args) - 4*sizeof(cb->args[0]));
-	}
-	cb->args[3] = h;
-	return skb->len;
-}
-
-int fib_table_dump(struct fib_table *tb, struct sk_buff *skb,
-		   struct netlink_callback *cb)
-{
-	int m = 0, s_m;
-	struct fn_zone *fz;
-	struct fn_hash *table = (struct fn_hash *)tb->tb_data;
-
-	s_m = cb->args[2];
-	rcu_read_lock();
-	for (fz = rcu_dereference(table->fn_zone_list);
-	     fz != NULL;
-	     fz = rcu_dereference(fz->fz_next), m++) {
-		if (m < s_m)
-			continue;
-		if (fn_hash_dump_zone(skb, cb, tb, fz) < 0) {
-			cb->args[2] = m;
-			rcu_read_unlock();
-			return -1;
-		}
-		memset(&cb->args[3], 0,
-		       sizeof(cb->args) - 3*sizeof(cb->args[0]));
-	}
-	rcu_read_unlock();
-	cb->args[2] = m;
-	return skb->len;
-}
-
-void __init fib_hash_init(void)
-{
-	fn_hash_kmem = kmem_cache_create("ip_fib_hash", sizeof(struct fib_node),
-					 0, SLAB_PANIC, NULL);
-
-	fn_alias_kmem = kmem_cache_create("ip_fib_alias", sizeof(struct fib_alias),
-					  0, SLAB_PANIC, NULL);
-
-}
-
-struct fib_table *fib_hash_table(u32 id)
-{
-	struct fib_table *tb;
-
-	tb = kmalloc(sizeof(struct fib_table) + sizeof(struct fn_hash),
-		     GFP_KERNEL);
-	if (tb == NULL)
-		return NULL;
-
-	tb->tb_id = id;
-	tb->tb_default = -1;
-
-	memset(tb->tb_data, 0, sizeof(struct fn_hash));
-	return tb;
-}
-
-/* ------------------------------------------------------------------------ */
-#ifdef CONFIG_PROC_FS
-
-struct fib_iter_state {
-	struct seq_net_private p;
-	struct fn_zone	*zone;
-	int		bucket;
-	struct hlist_head *hash_head;
-	struct fib_node *fn;
-	struct fib_alias *fa;
-	loff_t pos;
-	unsigned int genid;
-	int valid;
-};
-
-static struct fib_alias *fib_get_first(struct seq_file *seq)
-{
-	struct fib_iter_state *iter = seq->private;
-	struct fib_table *main_table;
-	struct fn_hash *table;
-
-	main_table = fib_get_table(seq_file_net(seq), RT_TABLE_MAIN);
-	table = (struct fn_hash *)main_table->tb_data;
-
-	iter->bucket    = 0;
-	iter->hash_head = NULL;
-	iter->fn        = NULL;
-	iter->fa        = NULL;
-	iter->pos	= 0;
-	iter->genid	= fib_hash_genid;
-	iter->valid	= 1;
-
-	for (iter->zone = rcu_dereference(table->fn_zone_list);
-	     iter->zone != NULL;
-	     iter->zone = rcu_dereference(iter->zone->fz_next)) {
-		int maxslot;
-
-		if (!iter->zone->fz_nent)
-			continue;
-
-		iter->hash_head = rcu_dereference(iter->zone->fz_hash);
-		maxslot = iter->zone->fz_divisor;
-
-		for (iter->bucket = 0; iter->bucket < maxslot;
-		     ++iter->bucket, ++iter->hash_head) {
-			struct hlist_node *node;
-			struct fib_node *fn;
-
-			hlist_for_each_entry(fn, node, iter->hash_head, fn_hash) {
-				struct fib_alias *fa;
-
-				list_for_each_entry(fa, &fn->fn_alias, fa_list) {
-					iter->fn = fn;
-					iter->fa = fa;
-					goto out;
-				}
-			}
-		}
-	}
-out:
-	return iter->fa;
-}
-
-static struct fib_alias *fib_get_next(struct seq_file *seq)
-{
-	struct fib_iter_state *iter = seq->private;
-	struct fib_node *fn;
-	struct fib_alias *fa;
-
-	/* Advance FA, if any. */
-	fn = iter->fn;
-	fa = iter->fa;
-	if (fa) {
-		BUG_ON(!fn);
-		list_for_each_entry_continue(fa, &fn->fn_alias, fa_list) {
-			iter->fa = fa;
-			goto out;
-		}
-	}
-
-	fa = iter->fa = NULL;
-
-	/* Advance FN. */
-	if (fn) {
-		struct hlist_node *node = &fn->fn_hash;
-		hlist_for_each_entry_continue(fn, node, fn_hash) {
-			iter->fn = fn;
-
-			list_for_each_entry(fa, &fn->fn_alias, fa_list) {
-				iter->fa = fa;
-				goto out;
-			}
-		}
-	}
-
-	fn = iter->fn = NULL;
-
-	/* Advance hash chain. */
-	if (!iter->zone)
-		goto out;
-
-	for (;;) {
-		struct hlist_node *node;
-		int maxslot;
-
-		maxslot = iter->zone->fz_divisor;
-
-		while (++iter->bucket < maxslot) {
-			iter->hash_head++;
-
-			hlist_for_each_entry(fn, node, iter->hash_head, fn_hash) {
-				list_for_each_entry(fa, &fn->fn_alias, fa_list) {
-					iter->fn = fn;
-					iter->fa = fa;
-					goto out;
-				}
-			}
-		}
-
-		iter->zone = rcu_dereference(iter->zone->fz_next);
-
-		if (!iter->zone)
-			goto out;
-
-		iter->bucket = 0;
-		iter->hash_head = rcu_dereference(iter->zone->fz_hash);
-
-		hlist_for_each_entry(fn, node, iter->hash_head, fn_hash) {
-			list_for_each_entry(fa, &fn->fn_alias, fa_list) {
-				iter->fn = fn;
-				iter->fa = fa;
-				goto out;
-			}
-		}
-	}
-out:
-	iter->pos++;
-	return fa;
-}
-
-static struct fib_alias *fib_get_idx(struct seq_file *seq, loff_t pos)
-{
-	struct fib_iter_state *iter = seq->private;
-	struct fib_alias *fa;
-
-	if (iter->valid && pos >= iter->pos && iter->genid == fib_hash_genid) {
-		fa   = iter->fa;
-		pos -= iter->pos;
-	} else
-		fa = fib_get_first(seq);
-
-	if (fa)
-		while (pos && (fa = fib_get_next(seq)))
-			--pos;
-	return pos ? NULL : fa;
-}
-
-static void *fib_seq_start(struct seq_file *seq, loff_t *pos)
-	__acquires(RCU)
-{
-	void *v = NULL;
-
-	rcu_read_lock();
-	if (fib_get_table(seq_file_net(seq), RT_TABLE_MAIN))
-		v = *pos ? fib_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
-	return v;
-}
-
-static void *fib_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
-	++*pos;
-	return v == SEQ_START_TOKEN ? fib_get_first(seq) : fib_get_next(seq);
-}
-
-static void fib_seq_stop(struct seq_file *seq, void *v)
-	__releases(RCU)
-{
-	rcu_read_unlock();
-}
-
-static unsigned fib_flag_trans(int type, __be32 mask, struct fib_info *fi)
-{
-	static const unsigned type2flags[RTN_MAX + 1] = {
-		[7] = RTF_REJECT,
-		[8] = RTF_REJECT,
-	};
-	unsigned flags = type2flags[type];
-
-	if (fi && fi->fib_nh->nh_gw)
-		flags |= RTF_GATEWAY;
-	if (mask == htonl(0xFFFFFFFF))
-		flags |= RTF_HOST;
-	flags |= RTF_UP;
-	return flags;
-}
-
-/*
- *	This outputs /proc/net/route.
- *
- *	It always works in backward compatibility mode.
- *	The format of the file is not supposed to be changed.
- */
-static int fib_seq_show(struct seq_file *seq, void *v)
-{
-	struct fib_iter_state *iter;
-	int len;
-	__be32 prefix, mask;
-	unsigned flags;
-	struct fib_node *f;
-	struct fib_alias *fa;
-	struct fib_info *fi;
-
-	if (v == SEQ_START_TOKEN) {
-		seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway "
-			   "\tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU"
-			   "\tWindow\tIRTT");
-		goto out;
-	}
-
-	iter	= seq->private;
-	f	= iter->fn;
-	fa	= iter->fa;
-	fi	= fa->fa_info;
-	prefix	= f->fn_key;
-	mask	= FZ_MASK(iter->zone);
-	flags	= fib_flag_trans(fa->fa_type, mask, fi);
-	if (fi)
-		seq_printf(seq,
-			 "%s\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u%n",
-			 fi->fib_dev ? fi->fib_dev->name : "*", prefix,
-			 fi->fib_nh->nh_gw, flags, 0, 0, fi->fib_priority,
-			 mask, (fi->fib_advmss ? fi->fib_advmss + 40 : 0),
-			 fi->fib_window,
-			 fi->fib_rtt >> 3, &len);
-	else
-		seq_printf(seq,
-			 "*\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u%n",
-			 prefix, 0, flags, 0, 0, 0, mask, 0, 0, 0, &len);
-
-	seq_printf(seq, "%*s\n", 127 - len, "");
-out:
-	return 0;
-}
-
-static const struct seq_operations fib_seq_ops = {
-	.start  = fib_seq_start,
-	.next   = fib_seq_next,
-	.stop   = fib_seq_stop,
-	.show   = fib_seq_show,
-};
-
-static int fib_seq_open(struct inode *inode, struct file *file)
-{
-	return seq_open_net(inode, file, &fib_seq_ops,
-			    sizeof(struct fib_iter_state));
-}
-
-static const struct file_operations fib_seq_fops = {
-	.owner		= THIS_MODULE,
-	.open           = fib_seq_open,
-	.read           = seq_read,
-	.llseek         = seq_lseek,
-	.release	= seq_release_net,
-};
-
-int __net_init fib_proc_init(struct net *net)
-{
-	if (!proc_net_fops_create(net, "route", S_IRUGO, &fib_seq_fops))
-		return -ENOMEM;
-	return 0;
-}
-
-void __net_exit fib_proc_exit(struct net *net)
-{
-	proc_net_remove(net, "route");
-}
-#endif /* CONFIG_PROC_FS */
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index c079cc0..84db2da 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -25,7 +25,7 @@ static inline void fib_alias_accessed(struct fib_alias *fa)
 }
 
 /* Exported by fib_semantics.c */
-extern int fib_semantic_match(struct list_head *head,
+extern int fib_semantic_match(struct fib_table *tb, struct list_head *head,
 			      const struct flowi *flp,
 			      struct fib_result *res, int prefixlen, int fib_flags);
 extern void fib_release_info(struct fib_info *);
@@ -51,4 +51,11 @@ static inline void fib_result_assign(struct fib_result *res,
 	res->fi = fi;
 }
 
+struct fib_prop {
+	int	error;
+	u8	scope;
+};
+
+extern const struct fib_prop fib_props[RTN_MAX + 1];
+
 #endif /* _FIB_LOOKUP_H */
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 7981a24..3018efb 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -41,13 +41,13 @@ struct fib4_rule {
 	__be32			srcmask;
 	__be32			dst;
 	__be32			dstmask;
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
 	u32			tclassid;
 #endif
 };
 
-#ifdef CONFIG_NET_CLS_ROUTE
-u32 fib_rules_tclass(struct fib_result *res)
+#ifdef CONFIG_IP_ROUTE_CLASSID
+u32 fib_rules_tclass(const struct fib_result *res)
 {
 	return res->r ? ((struct fib4_rule *) res->r)->tclassid : 0;
 }
@@ -165,7 +165,7 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
 	if (frh->dst_len)
 		rule4->dst = nla_get_be32(tb[FRA_DST]);
 
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
 	if (tb[FRA_FLOW])
 		rule4->tclassid = nla_get_u32(tb[FRA_FLOW]);
 #endif
@@ -195,7 +195,7 @@ static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
 	if (frh->tos && (rule4->tos != frh->tos))
 		return 0;
 
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
 	if (tb[FRA_FLOW] && (rule4->tclassid != nla_get_u32(tb[FRA_FLOW])))
 		return 0;
 #endif
@@ -224,7 +224,7 @@ static int fib4_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
 	if (rule4->src_len)
 		NLA_PUT_BE32(skb, FRA_SRC, rule4->src);
 
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
 	if (rule4->tclassid)
 		NLA_PUT_U32(skb, FRA_FLOW, rule4->tclassid);
 #endif
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 12d3dc3..d73d758 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -49,7 +49,7 @@
 static DEFINE_SPINLOCK(fib_info_lock);
 static struct hlist_head *fib_info_hash;
 static struct hlist_head *fib_info_laddrhash;
-static unsigned int fib_hash_size;
+static unsigned int fib_info_hash_size;
 static unsigned int fib_info_cnt;
 
 #define DEVINDEX_HASHBITS 8
@@ -90,11 +90,7 @@ static DEFINE_SPINLOCK(fib_multipath_lock);
 #define endfor_nexthops(fi) }
 
 
-static const struct
-{
-	int	error;
-	u8	scope;
-} fib_props[RTN_MAX + 1] = {
+const struct fib_prop fib_props[RTN_MAX + 1] = {
 	[RTN_UNSPEC] = {
 		.error	= 0,
 		.scope	= RT_SCOPE_NOWHERE,
@@ -152,6 +148,8 @@ static void free_fib_info_rcu(struct rcu_head *head)
 {
 	struct fib_info *fi = container_of(head, struct fib_info, rcu);
 
+	if (fi->fib_metrics != (u32 *) dst_default_metrics)
+		kfree(fi->fib_metrics);
 	kfree(fi);
 }
 
@@ -200,7 +198,7 @@ static inline int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 		    nh->nh_weight != onh->nh_weight ||
 #endif
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
 		    nh->nh_tclassid != onh->nh_tclassid ||
 #endif
 		    ((nh->nh_flags ^ onh->nh_flags) & ~RTNH_F_DEAD))
@@ -221,7 +219,7 @@ static inline unsigned int fib_devindex_hashfn(unsigned int val)
 
 static inline unsigned int fib_info_hashfn(const struct fib_info *fi)
 {
-	unsigned int mask = (fib_hash_size - 1);
+	unsigned int mask = (fib_info_hash_size - 1);
 	unsigned int val = fi->fib_nhs;
 
 	val ^= fi->fib_protocol;
@@ -422,7 +420,7 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
 
 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
 			nexthop_nh->nh_gw = nla ? nla_get_be32(nla) : 0;
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
 			nla = nla_find(attrs, attrlen, RTA_FLOW);
 			nexthop_nh->nh_tclassid = nla ? nla_get_u32(nla) : 0;
 #endif
@@ -476,7 +474,7 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi)
 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
 			if (nla && nla_get_be32(nla) != nh->nh_gw)
 				return 1;
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
 			nla = nla_find(attrs, attrlen, RTA_FLOW);
 			if (nla && nla_get_u32(nla) != nh->nh_tclassid)
 				return 1;
@@ -613,14 +611,14 @@ out:
 
 static inline unsigned int fib_laddr_hashfn(__be32 val)
 {
-	unsigned int mask = (fib_hash_size - 1);
+	unsigned int mask = (fib_info_hash_size - 1);
 
 	return ((__force u32)val ^
 		((__force u32)val >> 7) ^
 		((__force u32)val >> 14)) & mask;
 }
 
-static struct hlist_head *fib_hash_alloc(int bytes)
+static struct hlist_head *fib_info_hash_alloc(int bytes)
 {
 	if (bytes <= PAGE_SIZE)
 		return kzalloc(bytes, GFP_KERNEL);
@@ -630,7 +628,7 @@ static struct hlist_head *fib_hash_alloc(int bytes)
 					 get_order(bytes));
 }
 
-static void fib_hash_free(struct hlist_head *hash, int bytes)
+static void fib_info_hash_free(struct hlist_head *hash, int bytes)
 {
 	if (!hash)
 		return;
@@ -641,18 +639,18 @@ static void fib_hash_free(struct hlist_head *hash, int bytes)
 		free_pages((unsigned long) hash, get_order(bytes));
 }
 
-static void fib_hash_move(struct hlist_head *new_info_hash,
-			  struct hlist_head *new_laddrhash,
-			  unsigned int new_size)
+static void fib_info_hash_move(struct hlist_head *new_info_hash,
+			       struct hlist_head *new_laddrhash,
+			       unsigned int new_size)
 {
 	struct hlist_head *old_info_hash, *old_laddrhash;
-	unsigned int old_size = fib_hash_size;
+	unsigned int old_size = fib_info_hash_size;
 	unsigned int i, bytes;
 
 	spin_lock_bh(&fib_info_lock);
 	old_info_hash = fib_info_hash;
 	old_laddrhash = fib_info_laddrhash;
-	fib_hash_size = new_size;
+	fib_info_hash_size = new_size;
 
 	for (i = 0; i < old_size; i++) {
 		struct hlist_head *head = &fib_info_hash[i];
@@ -693,8 +691,8 @@ static void fib_hash_move(struct hlist_head *new_info_hash,
 	spin_unlock_bh(&fib_info_lock);
 
 	bytes = old_size * sizeof(struct hlist_head *);
-	fib_hash_free(old_info_hash, bytes);
-	fib_hash_free(old_laddrhash, bytes);
+	fib_info_hash_free(old_info_hash, bytes);
+	fib_info_hash_free(old_laddrhash, bytes);
 }
 
 struct fib_info *fib_create_info(struct fib_config *cfg)
@@ -705,6 +703,9 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
 	int nhs = 1;
 	struct net *net = cfg->fc_nlinfo.nl_net;
 
+	if (cfg->fc_type > RTN_MAX)
+		goto err_inval;
+
 	/* Fast check to catch the most weird cases */
 	if (fib_props[cfg->fc_type].scope > cfg->fc_scope)
 		goto err_inval;
@@ -718,8 +719,8 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
 #endif
 
 	err = -ENOBUFS;
-	if (fib_info_cnt >= fib_hash_size) {
-		unsigned int new_size = fib_hash_size << 1;
+	if (fib_info_cnt >= fib_info_hash_size) {
+		unsigned int new_size = fib_info_hash_size << 1;
 		struct hlist_head *new_info_hash;
 		struct hlist_head *new_laddrhash;
 		unsigned int bytes;
@@ -727,21 +728,27 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
 		if (!new_size)
 			new_size = 1;
 		bytes = new_size * sizeof(struct hlist_head *);
-		new_info_hash = fib_hash_alloc(bytes);
-		new_laddrhash = fib_hash_alloc(bytes);
+		new_info_hash = fib_info_hash_alloc(bytes);
+		new_laddrhash = fib_info_hash_alloc(bytes);
 		if (!new_info_hash || !new_laddrhash) {
-			fib_hash_free(new_info_hash, bytes);
-			fib_hash_free(new_laddrhash, bytes);
+			fib_info_hash_free(new_info_hash, bytes);
+			fib_info_hash_free(new_laddrhash, bytes);
 		} else
-			fib_hash_move(new_info_hash, new_laddrhash, new_size);
+			fib_info_hash_move(new_info_hash, new_laddrhash, new_size);
 
-		if (!fib_hash_size)
+		if (!fib_info_hash_size)
 			goto failure;
 	}
 
 	fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
 	if (fi == NULL)
 		goto failure;
+	if (cfg->fc_mx) {
+		fi->fib_metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
+		if (!fi->fib_metrics)
+			goto failure;
+	} else
+		fi->fib_metrics = (u32 *) dst_default_metrics;
 	fib_info_cnt++;
 
 	fi->fib_net = hold_net(net);
@@ -779,7 +786,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
 			goto err_inval;
 		if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw)
 			goto err_inval;
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
 		if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow)
 			goto err_inval;
 #endif
@@ -792,7 +799,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
 		nh->nh_oif = cfg->fc_oif;
 		nh->nh_gw = cfg->fc_gw;
 		nh->nh_flags = cfg->fc_flags;
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
 		nh->nh_tclassid = cfg->fc_flow;
 #endif
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
@@ -804,6 +811,17 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
 		if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp)
 			goto err_inval;
 		goto link_it;
+	} else {
+		switch (cfg->fc_type) {
+		case RTN_UNICAST:
+		case RTN_LOCAL:
+		case RTN_BROADCAST:
+		case RTN_ANYCAST:
+		case RTN_MULTICAST:
+			break;
+		default:
+			goto err_inval;
+		}
 	}
 
 	if (cfg->fc_scope > RT_SCOPE_HOST)
@@ -835,6 +853,13 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
 				goto err_inval;
 	}
 
+	change_nexthops(fi) {
+		nexthop_nh->nh_cfg_scope = cfg->fc_scope;
+		nexthop_nh->nh_saddr = inet_select_addr(nexthop_nh->nh_dev,
+							nexthop_nh->nh_gw,
+							nexthop_nh->nh_cfg_scope);
+	} endfor_nexthops(fi)
+
 link_it:
 	ofi = fib_find_info(fi);
 	if (ofi) {
@@ -880,84 +905,6 @@ failure:
 	return ERR_PTR(err);
 }
 
-/* Note! fib_semantic_match intentionally uses  RCU list functions. */
-int fib_semantic_match(struct list_head *head, const struct flowi *flp,
-		       struct fib_result *res, int prefixlen, int fib_flags)
-{
-	struct fib_alias *fa;
-	int nh_sel = 0;
-
-	list_for_each_entry_rcu(fa, head, fa_list) {
-		int err;
-
-		if (fa->fa_tos &&
-		    fa->fa_tos != flp->fl4_tos)
-			continue;
-
-		if (fa->fa_scope < flp->fl4_scope)
-			continue;
-
-		fib_alias_accessed(fa);
-
-		err = fib_props[fa->fa_type].error;
-		if (err == 0) {
-			struct fib_info *fi = fa->fa_info;
-
-			if (fi->fib_flags & RTNH_F_DEAD)
-				continue;
-
-			switch (fa->fa_type) {
-			case RTN_UNICAST:
-			case RTN_LOCAL:
-			case RTN_BROADCAST:
-			case RTN_ANYCAST:
-			case RTN_MULTICAST:
-				for_nexthops(fi) {
-					if (nh->nh_flags & RTNH_F_DEAD)
-						continue;
-					if (!flp->oif || flp->oif == nh->nh_oif)
-						break;
-				}
-#ifdef CONFIG_IP_ROUTE_MULTIPATH
-				if (nhsel < fi->fib_nhs) {
-					nh_sel = nhsel;
-					goto out_fill_res;
-				}
-#else
-				if (nhsel < 1)
-					goto out_fill_res;
-#endif
-				endfor_nexthops(fi);
-				continue;
-
-			default:
-				pr_warning("fib_semantic_match bad type %#x\n",
-					   fa->fa_type);
-				return -EINVAL;
-			}
-		}
-		return err;
-	}
-	return 1;
-
-out_fill_res:
-	res->prefixlen = prefixlen;
-	res->nh_sel = nh_sel;
-	res->type = fa->fa_type;
-	res->scope = fa->fa_scope;
-	res->fi = fa->fa_info;
-	if (!(fib_flags & FIB_LOOKUP_NOREF))
-		atomic_inc(&res->fi->fib_clntref);
-	return 0;
-}
-
-/* Find appropriate source address to this destination */
-
-__be32 __fib_res_prefsrc(struct fib_result *res)
-{
-	return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope);
-}
-
 int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
 		  u32 tb_id, u8 type, u8 scope, __be32 dst, int dst_len, u8 tos,
 		  struct fib_info *fi, unsigned int flags)
@@ -1002,7 +949,7 @@ int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
 
 		if (fi->fib_nh->nh_oif)
 			NLA_PUT_U32(skb, RTA_OIF, fi->fib_nh->nh_oif);
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
 		if (fi->fib_nh[0].nh_tclassid)
 			NLA_PUT_U32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid);
 #endif
@@ -1027,7 +974,7 @@ int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
 
 			if (nh->nh_gw)
 				NLA_PUT_BE32(skb, RTA_GATEWAY, nh->nh_gw);
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
 			if (nh->nh_tclassid)
 				NLA_PUT_U32(skb, RTA_FLOW, nh->nh_tclassid);
 #endif
@@ -1125,6 +1072,80 @@ int fib_sync_down_dev(struct net_device *dev, int force)
 	return ret;
 }
 
+/* Must be invoked inside of an RCU protected region.  */
+void fib_select_default(struct fib_result *res)
+{
+	struct fib_info *fi = NULL, *last_resort = NULL;
+	struct list_head *fa_head = res->fa_head;
+	struct fib_table *tb = res->table;
+	int order = -1, last_idx = -1;
+	struct fib_alias *fa;
+
+	list_for_each_entry_rcu(fa, fa_head, fa_list) {
+		struct fib_info *next_fi = fa->fa_info;
+
+		if (fa->fa_scope != res->scope ||
+		    fa->fa_type != RTN_UNICAST)
+			continue;
+
+		if (next_fi->fib_priority > res->fi->fib_priority)
+			break;
+		if (!next_fi->fib_nh[0].nh_gw ||
+		    next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
+			continue;
+
+		fib_alias_accessed(fa);
+
+		if (fi == NULL) {
+			if (next_fi != res->fi)
+				break;
+		} else if (!fib_detect_death(fi, order, &last_resort,
+					     &last_idx, tb->tb_default)) {
+			fib_result_assign(res, fi);
+			tb->tb_default = order;
+			goto out;
+		}
+		fi = next_fi;
+		order++;
+	}
+
+	if (order <= 0 || fi == NULL) {
+		tb->tb_default = -1;
+		goto out;
+	}
+
+	if (!fib_detect_death(fi, order, &last_resort, &last_idx,
+				tb->tb_default)) {
+		fib_result_assign(res, fi);
+		tb->tb_default = order;
+		goto out;
+	}
+
+	if (last_idx >= 0)
+		fib_result_assign(res, last_resort);
+	tb->tb_default = last_idx;
+out:
+	return;
+}
+
+void fib_update_nh_saddrs(struct net_device *dev)
+{
+	struct hlist_head *head;
+	struct hlist_node *node;
+	struct fib_nh *nh;
+	unsigned int hash;
+
+	hash = fib_devindex_hashfn(dev->ifindex);
+	head = &fib_info_devhash[hash];
+	hlist_for_each_entry(nh, node, head, nh_hash) {
+		if (nh->nh_dev != dev)
+			continue;
+		nh->nh_saddr = inet_select_addr(nh->nh_dev,
+						nh->nh_gw,
+						nh->nh_cfg_scope);
+	}
+}
+
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 
 /*
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 0f28034..a4109a5 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -95,7 +95,7 @@ typedef unsigned int t_key;
 #define IS_TNODE(n) (!(n->parent & T_LEAF))
 #define IS_LEAF(n) (n->parent & T_LEAF)
 
-struct node {
+struct rt_trie_node {
 	unsigned long parent;
 	t_key key;
 };
@@ -126,7 +126,7 @@ struct tnode {
 		struct work_struct work;
 		struct tnode *tnode_free;
 	};
-	struct node *child[0];
+	struct rt_trie_node *child[0];
 };
 
 #ifdef CONFIG_IP_FIB_TRIE_STATS
@@ -151,16 +151,16 @@ struct trie_stat {
 };
 
 struct trie {
-	struct node *trie;
+	struct rt_trie_node *trie;
 #ifdef CONFIG_IP_FIB_TRIE_STATS
 	struct trie_use_stats stats;
 #endif
 };
 
-static void put_child(struct trie *t, struct tnode *tn, int i, struct node *n);
-static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n,
+static void put_child(struct trie *t, struct tnode *tn, int i, struct rt_trie_node *n);
+static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node *n,
 				  int wasfull);
-static struct node *resize(struct trie *t, struct tnode *tn);
+static struct rt_trie_node *resize(struct trie *t, struct tnode *tn);
 static struct tnode *inflate(struct trie *t, struct tnode *tn);
 static struct tnode *halve(struct trie *t, struct tnode *tn);
 /* tnodes to free after resize(); protected by RTNL */
@@ -177,12 +177,12 @@ static const int sync_pages = 128;
 static struct kmem_cache *fn_alias_kmem __read_mostly;
 static struct kmem_cache *trie_leaf_kmem __read_mostly;
 
-static inline struct tnode *node_parent(struct node *node)
+static inline struct tnode *node_parent(struct rt_trie_node *node)
 {
 	return (struct tnode *)(node->parent & ~NODE_TYPE_MASK);
 }
 
-static inline struct tnode *node_parent_rcu(struct node *node)
+static inline struct tnode *node_parent_rcu(struct rt_trie_node *node)
 {
 	struct tnode *ret = node_parent(node);
 
@@ -192,22 +192,22 @@ static inline struct tnode *node_parent_rcu(struct node *node)
 /* Same as rcu_assign_pointer
  * but that macro() assumes that value is a pointer.
  */
-static inline void node_set_parent(struct node *node, struct tnode *ptr)
+static inline void node_set_parent(struct rt_trie_node *node, struct tnode *ptr)
 {
 	smp_wmb();
 	node->parent = (unsigned long)ptr | NODE_TYPE(node);
 }
 
-static inline struct node *tnode_get_child(struct tnode *tn, unsigned int i)
+static inline struct rt_trie_node *tnode_get_child(struct tnode *tn, unsigned int i)
 {
 	BUG_ON(i >= 1U << tn->bits);
 
 	return tn->child[i];
 }
 
-static inline struct node *tnode_get_child_rcu(struct tnode *tn, unsigned int i)
+static inline struct rt_trie_node *tnode_get_child_rcu(struct tnode *tn, unsigned int i)
 {
-	struct node *ret = tnode_get_child(tn, i);
+	struct rt_trie_node *ret = tnode_get_child(tn, i);
 
 	return rcu_dereference_rtnl(ret);
 }
@@ -217,12 +217,12 @@ static inline int tnode_child_length(const struct tnode *tn)
 	return 1 << tn->bits;
 }
 
-static inline t_key mask_pfx(t_key k, unsigned short l)
+static inline t_key mask_pfx(t_key k, unsigned int l)
 {
 	return (l == 0) ? 0 : k >> (KEYLENGTH-l) << (KEYLENGTH-l);
 }
 
-static inline t_key tkey_extract_bits(t_key a, int offset, int bits)
+static inline t_key tkey_extract_bits(t_key a, unsigned int offset, unsigned int bits)
 {
 	if (offset < KEYLENGTH)
 		return ((t_key)(a << offset)) >> (KEYLENGTH - bits);
@@ -378,7 +378,7 @@ static void __tnode_free_rcu(struct rcu_head *head)
 {
 	struct tnode *tn = container_of(head, struct tnode, rcu);
 	size_t size = sizeof(struct tnode) +
-		      (sizeof(struct node *) << tn->bits);
+		      (sizeof(struct rt_trie_node *) << tn->bits);
 
 	if (size <= PAGE_SIZE)
 		kfree(tn);
@@ -402,7 +402,7 @@ static void tnode_free_safe(struct tnode *tn)
 	tn->tnode_free = tnode_free_head;
 	tnode_free_head = tn;
 	tnode_free_size += sizeof(struct tnode) +
-			   (sizeof(struct node *) << tn->bits);
+			   (sizeof(struct rt_trie_node *) << tn->bits);
 }
 
 static void tnode_free_flush(void)
@@ -443,7 +443,7 @@ static struct leaf_info *leaf_info_new(int plen)
 
 static struct tnode *tnode_new(t_key key, int pos, int bits)
 {
-	size_t sz = sizeof(struct tnode) + (sizeof(struct node *) << bits);
+	size_t sz = sizeof(struct tnode) + (sizeof(struct rt_trie_node *) << bits);
 	struct tnode *tn = tnode_alloc(sz);
 
 	if (tn) {
@@ -456,7 +456,7 @@ static struct tnode *tnode_new(t_key key, int pos, int bits)
 	}
 
 	pr_debug("AT %p s=%zu %zu\n", tn, sizeof(struct tnode),
-		 sizeof(struct node) << bits);
+		 sizeof(struct rt_trie_node) << bits);
 	return tn;
 }
 
@@ -465,7 +465,7 @@ static struct tnode *tnode_new(t_key key, int pos, int bits)
  * and no bits are skipped. See discussion in dyntree paper p. 6
  */
 
-static inline int tnode_full(const struct tnode *tn, const struct node *n)
+static inline int tnode_full(const struct tnode *tn, const struct rt_trie_node *n)
 {
 	if (n == NULL || IS_LEAF(n))
 		return 0;
@@ -474,7 +474,7 @@ static inline int tnode_full(const struct tnode *tn, const struct node *n)
 }
 
 static inline void put_child(struct trie *t, struct tnode *tn, int i,
-			     struct node *n)
+			     struct rt_trie_node *n)
 {
 	tnode_put_child_reorg(tn, i, n, -1);
 }
@@ -484,10 +484,10 @@ static inline void put_child(struct trie *t, struct tnode *tn, int i,
   * Update the value of full_children and empty_children.
   */
 
-static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n,
+static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node *n,
 				  int wasfull)
 {
-	struct node *chi = tn->child[i];
+	struct rt_trie_node *chi = tn->child[i];
 	int isfull;
 
 	BUG_ON(i >= 1<<tn->bits);
@@ -515,7 +515,7 @@ static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n,
 }
 
 #define MAX_WORK 10
-static struct node *resize(struct trie *t, struct tnode *tn)
+static struct rt_trie_node *resize(struct trie *t, struct tnode *tn)
 {
 	int i;
 	struct tnode *old_tn;
@@ -605,7 +605,7 @@ static struct node *resize(struct trie *t, struct tnode *tn)
 
 	/* Keep root node larger  */
 
-	if (!node_parent((struct node *)tn)) {
+	if (!node_parent((struct rt_trie_node *)tn)) {
 		inflate_threshold_use = inflate_threshold_root;
 		halve_threshold_use = halve_threshold_root;
 	} else {
@@ -635,7 +635,7 @@ static struct node *resize(struct trie *t, struct tnode *tn)
 
 	/* Return if at least one inflate is run */
 	if (max_work != MAX_WORK)
-		return (struct node *) tn;
+		return (struct rt_trie_node *) tn;
 
 	/*
 	 * Halve as long as the number of empty children in this
@@ -663,7 +663,7 @@ static struct node *resize(struct trie *t, struct tnode *tn)
 	if (tn->empty_children == tnode_child_length(tn) - 1) {
 one_child:
 		for (i = 0; i < tnode_child_length(tn); i++) {
-			struct node *n;
+			struct rt_trie_node *n;
 
 			n = tn->child[i];
 			if (!n)
@@ -676,7 +676,7 @@ one_child:
 			return n;
 		}
 	}
-	return (struct node *) tn;
+	return (struct rt_trie_node *) tn;
 }
 
 static struct tnode *inflate(struct trie *t, struct tnode *tn)
@@ -723,14 +723,14 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
 				goto nomem;
 			}
 
-			put_child(t, tn, 2*i, (struct node *) left);
-			put_child(t, tn, 2*i+1, (struct node *) right);
+			put_child(t, tn, 2*i, (struct rt_trie_node *) left);
+			put_child(t, tn, 2*i+1, (struct rt_trie_node *) right);
 		}
 	}
 
 	for (i = 0; i < olen; i++) {
 		struct tnode *inode;
-		struct node *node = tnode_get_child(oldtnode, i);
+		struct rt_trie_node *node = tnode_get_child(oldtnode, i);
 		struct tnode *left, *right;
 		int size, j;
 
@@ -825,7 +825,7 @@ nomem:
 static struct tnode *halve(struct trie *t, struct tnode *tn)
 {
 	struct tnode *oldtnode = tn;
-	struct node *left, *right;
+	struct rt_trie_node *left, *right;
 	int i;
 	int olen = tnode_child_length(tn);
 
@@ -856,7 +856,7 @@ static struct tnode *halve(struct trie *t, struct tnode *tn)
 			if (!newn)
 				goto nomem;
 
-			put_child(t, tn, i/2, (struct node *)newn);
+			put_child(t, tn, i/2, (struct rt_trie_node *)newn);
 		}
 
 	}
@@ -958,7 +958,7 @@ fib_find_node(struct trie *t, u32 key)
 {
 	int pos;
 	struct tnode *tn;
-	struct node *n;
+	struct rt_trie_node *n;
 
 	pos = 0;
 	n = rcu_dereference_rtnl(t->trie);
@@ -993,17 +993,17 @@ static void trie_rebalance(struct trie *t, struct tnode *tn)
 
 	key = tn->key;
 
-	while (tn != NULL && (tp = node_parent((struct node *)tn)) != NULL) {
+	while (tn != NULL && (tp = node_parent((struct rt_trie_node *)tn)) != NULL) {
 		cindex = tkey_extract_bits(key, tp->pos, tp->bits);
 		wasfull = tnode_full(tp, tnode_get_child(tp, cindex));
 		tn = (struct tnode *) resize(t, (struct tnode *)tn);
 
 		tnode_put_child_reorg((struct tnode *)tp, cindex,
-				      (struct node *)tn, wasfull);
+				      (struct rt_trie_node *)tn, wasfull);
 
-		tp = node_parent((struct node *) tn);
+		tp = node_parent((struct rt_trie_node *) tn);
 		if (!tp)
-			rcu_assign_pointer(t->trie, (struct node *)tn);
+			rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn);
 
 		tnode_free_flush();
 		if (!tp)
@@ -1015,7 +1015,7 @@ static void trie_rebalance(struct trie *t, struct tnode *tn)
 	if (IS_TNODE(tn))
 		tn = (struct tnode *)resize(t, (struct tnode *)tn);
 
-	rcu_assign_pointer(t->trie, (struct node *)tn);
+	rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn);
 	tnode_free_flush();
 }
 
@@ -1025,7 +1025,7 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
 {
 	int pos, newpos;
 	struct tnode *tp = NULL, *tn = NULL;
-	struct node *n;
+	struct rt_trie_node *n;
 	struct leaf *l;
 	int missbit;
 	struct list_head *fa_head = NULL;
@@ -1111,10 +1111,10 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
 	if (t->trie && n == NULL) {
 		/* Case 2: n is NULL, and will just insert a new leaf */
 
-		node_set_parent((struct node *)l, tp);
+		node_set_parent((struct rt_trie_node *)l, tp);
 
 		cindex = tkey_extract_bits(key, tp->pos, tp->bits);
-		put_child(t, (struct tnode *)tp, cindex, (struct node *)l);
+		put_child(t, (struct tnode *)tp, cindex, (struct rt_trie_node *)l);
 	} else {
 		/* Case 3: n is a LEAF or a TNODE and the key doesn't match. */
 		/*
@@ -1141,18 +1141,18 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
 			return NULL;
 		}
 
-		node_set_parent((struct node *)tn, tp);
+		node_set_parent((struct rt_trie_node *)tn, tp);
 
 		missbit = tkey_extract_bits(key, newpos, 1);
-		put_child(t, tn, missbit, (struct node *)l);
+		put_child(t, tn, missbit, (struct rt_trie_node *)l);
 		put_child(t, tn, 1-missbit, n);
 
 		if (tp) {
 			cindex = tkey_extract_bits(key, tp->pos, tp->bits);
 			put_child(t, (struct tnode *)tp, cindex,
-				  (struct node *)tn);
+				  (struct rt_trie_node *)tn);
 		} else {
-			rcu_assign_pointer(t->trie, (struct node *)tn);
+			rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn);
 			tp = tn;
 		}
 	}
@@ -1340,7 +1340,7 @@ err:
 }
 
 /* should be called with rcu_read_lock */
-static int check_leaf(struct trie *t, struct leaf *l,
+static int check_leaf(struct fib_table *tb, struct trie *t, struct leaf *l,
 		      t_key key,  const struct flowi *flp,
 		      struct fib_result *res, int fib_flags)
 {
@@ -1349,23 +1349,58 @@ static int check_leaf(struct trie *t, struct leaf *l,
 	struct hlist_node *node;
 
 	hlist_for_each_entry_rcu(li, node, hhead, hlist) {
-		int err;
+		struct fib_alias *fa;
 		int plen = li->plen;
 		__be32 mask = inet_make_mask(plen);
 
 		if (l->key != (key & ntohl(mask)))
 			continue;
 
-		err = fib_semantic_match(&li->falh, flp, res, plen, fib_flags);
+		list_for_each_entry_rcu(fa, &li->falh, fa_list) {
+			struct fib_info *fi = fa->fa_info;
+			int nhsel, err;
 
+			if (fa->fa_tos && fa->fa_tos != flp->fl4_tos)
+				continue;
+			if (fa->fa_scope < flp->fl4_scope)
+				continue;
+			fib_alias_accessed(fa);
+			err = fib_props[fa->fa_type].error;
+			if (err) {
 #ifdef CONFIG_IP_FIB_TRIE_STATS
-		if (err <= 0)
-			t->stats.semantic_match_passed++;
-		else
-			t->stats.semantic_match_miss++;
+				t->stats.semantic_match_miss++;
+#endif
+				return 1;
+			}
+			if (fi->fib_flags & RTNH_F_DEAD)
+				continue;
+			for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) {
+				const struct fib_nh *nh = &fi->fib_nh[nhsel];
+
+				if (nh->nh_flags & RTNH_F_DEAD)
+					continue;
+				if (flp->oif && flp->oif != nh->nh_oif)
+					continue;
+
+#ifdef CONFIG_IP_FIB_TRIE_STATS
+				t->stats.semantic_match_passed++;
+#endif
+				res->prefixlen = plen;
+				res->nh_sel = nhsel;
+				res->type = fa->fa_type;
+				res->scope = fa->fa_scope;
+				res->fi = fi;
+				res->table = tb;
+				res->fa_head = &li->falh;
+				if (!(fib_flags & FIB_LOOKUP_NOREF))
+					atomic_inc(&res->fi->fib_clntref);
+				return 0;
+			}
+		}
+
+#ifdef CONFIG_IP_FIB_TRIE_STATS
+		t->stats.semantic_match_miss++;
 #endif
-		if (err <= 0)
-			return err;
 	}
 
 	return 1;
@@ -1376,13 +1411,13 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi *flp,
 {
 	struct trie *t = (struct trie *) tb->tb_data;
 	int ret;
-	struct node *n;
+	struct rt_trie_node *n;
 	struct tnode *pn;
-	int pos, bits;
+	unsigned int pos, bits;
 	t_key key = ntohl(flp->fl4_dst);
-	int chopped_off;
+	unsigned int chopped_off;
 	t_key cindex = 0;
-	int current_prefix_length = KEYLENGTH;
+	unsigned int current_prefix_length = KEYLENGTH;
 	struct tnode *cn;
 	t_key pref_mismatch;
 
@@ -1398,7 +1433,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi *flp,
 
 	/* Just a leaf? */
 	if (IS_LEAF(n)) {
-		ret = check_leaf(t, (struct leaf *)n, key, flp, res, fib_flags);
+		ret = check_leaf(tb, t, (struct leaf *)n, key, flp, res, fib_flags);
 		goto found;
 	}
 
@@ -1423,7 +1458,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi *flp,
 		}
 
 		if (IS_LEAF(n)) {
-			ret = check_leaf(t, (struct leaf *)n, key, flp, res, fib_flags);
+			ret = check_leaf(tb, t, (struct leaf *)n, key, flp, res, fib_flags);
 			if (ret > 0)
 				goto backtrace;
 			goto found;
@@ -1541,7 +1576,7 @@ backtrace:
 		if (chopped_off <= pn->bits) {
 			cindex &= ~(1 << (chopped_off-1));
 		} else {
-			struct tnode *parent = node_parent_rcu((struct node *) pn);
+			struct tnode *parent = node_parent_rcu((struct rt_trie_node *) pn);
 			if (!parent)
 				goto failed;
 
@@ -1568,7 +1603,7 @@ found:
  */
 static void trie_leaf_remove(struct trie *t, struct leaf *l)
 {
-	struct tnode *tp = node_parent((struct node *) l);
+	struct tnode *tp = node_parent((struct rt_trie_node *) l);
 
 	pr_debug("entering trie_leaf_remove(%p)\n", l);
 
@@ -1706,7 +1741,7 @@ static int trie_flush_leaf(struct leaf *l)
  * Scan for the next right leaf starting at node p->child[idx]
  * Since we have back pointer, no recursion necessary.
  */
-static struct leaf *leaf_walk_rcu(struct tnode *p, struct node *c)
+static struct leaf *leaf_walk_rcu(struct tnode *p, struct rt_trie_node *c)
 {
 	do {
 		t_key idx;
@@ -1732,7 +1767,7 @@ static struct leaf *leaf_walk_rcu(struct tnode *p, struct node *c)
 		}
 
 		/* Node empty, walk back up to parent */
-		c = (struct node *) p;
+		c = (struct rt_trie_node *) p;
 	} while ((p = node_parent_rcu(c)) != NULL);
 
 	return NULL; /* Root of trie */
@@ -1753,7 +1788,7 @@ static struct leaf *trie_firstleaf(struct trie *t)
 
 static struct leaf *trie_nextleaf(struct leaf *l)
 {
-	struct node *c = (struct node *) l;
+	struct rt_trie_node *c = (struct rt_trie_node *) l;
 	struct tnode *p = node_parent_rcu(c);
 
 	if (!p)
@@ -1802,80 +1837,6 @@ void fib_free_table(struct fib_table *tb)
 	kfree(tb);
 }
 
-void fib_table_select_default(struct fib_table *tb,
-			      const struct flowi *flp,
-			      struct fib_result *res)
-{
-	struct trie *t = (struct trie *) tb->tb_data;
-	int order, last_idx;
-	struct fib_info *fi = NULL;
-	struct fib_info *last_resort;
-	struct fib_alias *fa = NULL;
-	struct list_head *fa_head;
-	struct leaf *l;
-
-	last_idx = -1;
-	last_resort = NULL;
-	order = -1;
-
-	rcu_read_lock();
-
-	l = fib_find_node(t, 0);
-	if (!l)
-		goto out;
-
-	fa_head = get_fa_head(l, 0);
-	if (!fa_head)
-		goto out;
-
-	if (list_empty(fa_head))
-		goto out;
-
-	list_for_each_entry_rcu(fa, fa_head, fa_list) {
-		struct fib_info *next_fi = fa->fa_info;
-
-		if (fa->fa_scope != res->scope ||
-		    fa->fa_type != RTN_UNICAST)
-			continue;
-
-		if (next_fi->fib_priority > res->fi->fib_priority)
-			break;
-		if (!next_fi->fib_nh[0].nh_gw ||
-		    next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
-			continue;
-
-		fib_alias_accessed(fa);
-
-		if (fi == NULL) {
-			if (next_fi != res->fi)
-				break;
-		} else if (!fib_detect_death(fi, order, &last_resort,
-					     &last_idx, tb->tb_default)) {
-			fib_result_assign(res, fi);
-			tb->tb_default = order;
-			goto out;
-		}
-		fi = next_fi;
-		order++;
-	}
-	if (order <= 0 || fi == NULL) {
-		tb->tb_default = -1;
-		goto out;
-	}
-
-	if (!fib_detect_death(fi, order, &last_resort, &last_idx,
-				tb->tb_default)) {
-		fib_result_assign(res, fi);
-		tb->tb_default = order;
-		goto out;
-	}
-	if (last_idx >= 0)
-		fib_result_assign(res, last_resort);
-	tb->tb_default = last_idx;
-out:
-	rcu_read_unlock();
-}
-
 static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah,
 			   struct fib_table *tb,
 			   struct sk_buff *skb, struct netlink_callback *cb)
@@ -1990,7 +1951,7 @@ int fib_table_dump(struct fib_table *tb, struct sk_buff *skb,
 	return skb->len;
 }
 
-void __init fib_hash_init(void)
+void __init fib_trie_init(void)
 {
 	fn_alias_kmem = kmem_cache_create("ip_fib_alias",
 					  sizeof(struct fib_alias),
@@ -2003,8 +1964,7 @@ void __init fib_hash_init(void)
 }
 
 
-/* Fix more generic FIB names for init later */
-struct fib_table *fib_hash_table(u32 id)
+struct fib_table *fib_trie_table(u32 id)
 {
 	struct fib_table *tb;
 	struct trie *t;
@@ -2036,7 +1996,7 @@ struct fib_trie_iter {
 	unsigned int depth;
 };
 
-static struct node *fib_trie_get_next(struct fib_trie_iter *iter)
+static struct rt_trie_node *fib_trie_get_next(struct fib_trie_iter *iter)
 {
 	struct tnode *tn = iter->tnode;
 	unsigned int cindex = iter->index;
@@ -2050,7 +2010,7 @@ static struct node *fib_trie_get_next(struct fib_trie_iter *iter)
 		 iter->tnode, iter->index, iter->depth);
 rescan:
 	while (cindex < (1<<tn->bits)) {
-		struct node *n = tnode_get_child_rcu(tn, cindex);
+		struct rt_trie_node *n = tnode_get_child_rcu(tn, cindex);
 
 		if (n) {
 			if (IS_LEAF(n)) {
@@ -2069,7 +2029,7 @@ rescan:
 	}
 
 	/* Current node exhausted, pop back up */
-	p = node_parent_rcu((struct node *)tn);
+	p = node_parent_rcu((struct rt_trie_node *)tn);
 	if (p) {
 		cindex = tkey_extract_bits(tn->key, p->pos, p->bits)+1;
 		tn = p;
@@ -2081,10 +2041,10 @@ rescan:
 	return NULL;
 }
 
-static struct node *fib_trie_get_first(struct fib_trie_iter *iter,
+static struct rt_trie_node *fib_trie_get_first(struct fib_trie_iter *iter,
 				       struct trie *t)
 {
-	struct node *n;
+	struct rt_trie_node *n;
 
 	if (!t)
 		return NULL;
@@ -2108,7 +2068,7 @@ static struct node *fib_trie_get_first(struct fib_trie_iter *iter,
 
 static void trie_collect_stats(struct trie *t, struct trie_stat *s)
 {
-	struct node *n;
+	struct rt_trie_node *n;
 	struct fib_trie_iter iter;
 
 	memset(s, 0, sizeof(*s));
@@ -2181,7 +2141,7 @@ static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat)
 	seq_putc(seq, '\n');
 	seq_printf(seq, "\tPointers: %u\n", pointers);
 
-	bytes += sizeof(struct node *) * pointers;
+	bytes += sizeof(struct rt_trie_node *) * pointers;
 	seq_printf(seq, "Null ptrs: %u\n", stat->nullpointers);
 	seq_printf(seq, "Total size: %u  kB\n", (bytes + 1023) / 1024);
 }
@@ -2262,7 +2222,7 @@ static const struct file_operations fib_triestat_fops = {
 	.release = single_release_net,
 };
 
-static struct node *fib_trie_get_idx(struct seq_file *seq, loff_t pos)
+static struct rt_trie_node *fib_trie_get_idx(struct seq_file *seq, loff_t pos)
 {
 	struct fib_trie_iter *iter = seq->private;
 	struct net *net = seq_file_net(seq);
@@ -2275,7 +2235,7 @@ static struct node *fib_trie_get_idx(struct seq_file *seq, loff_t pos)
 		struct fib_table *tb;
 
 		hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
-			struct node *n;
+			struct rt_trie_node *n;
 
 			for (n = fib_trie_get_first(iter,
 						    (struct trie *) tb->tb_data);
@@ -2304,7 +2264,7 @@ static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 	struct fib_table *tb = iter->tb;
 	struct hlist_node *tb_node;
 	unsigned int h;
-	struct node *n;
+	struct rt_trie_node *n;
 
 	++*pos;
 	/* next node in same table */
@@ -2390,7 +2350,7 @@ static inline const char *rtn_type(char *buf, size_t len, unsigned int t)
 static int fib_trie_seq_show(struct seq_file *seq, void *v)
 {
 	const struct fib_trie_iter *iter = seq->private;
-	struct node *n = v;
+	struct rt_trie_node *n = v;
 
 	if (!node_parent_rcu(n))
 		fib_table_print(seq, iter->tb);
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 4aa1b7f..1771ce6 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -233,48 +233,11 @@ static inline void icmp_xmit_unlock(struct sock *sk)
  *	Send an ICMP frame.
  */
 
-/*
- *	Check transmit rate limitation for given message.
- *	The rate information is held in the destination cache now.
- *	This function is generic and could be used for other purposes
- *	too. It uses a Token bucket filter as suggested by Alexey Kuznetsov.
- *
- *	Note that the same dst_entry fields are modified by functions in
- *	route.c too, but these work for packet destinations while xrlim_allow
- *	works for icmp destinations. This means the rate limiting information
- *	for one "ip object" is shared - and these ICMPs are twice limited:
- *	by source and by destination.
- *
- *	RFC 1812: 4.3.2.8 SHOULD be able to limit error message rate
- *			  SHOULD allow setting of rate limits
- *
- * 	Shared between ICMPv4 and ICMPv6.
- */
-#define XRLIM_BURST_FACTOR 6
-int xrlim_allow(struct dst_entry *dst, int timeout)
-{
-	unsigned long now, token = dst->rate_tokens;
-	int rc = 0;
-
-	now = jiffies;
-	token += now - dst->rate_last;
-	dst->rate_last = now;
-	if (token > XRLIM_BURST_FACTOR * timeout)
-		token = XRLIM_BURST_FACTOR * timeout;
-	if (token >= timeout) {
-		token -= timeout;
-		rc = 1;
-	}
-	dst->rate_tokens = token;
-	return rc;
-}
-EXPORT_SYMBOL(xrlim_allow);
-
-static inline int icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
+static inline bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
 		int type, int code)
 {
 	struct dst_entry *dst = &rt->dst;
-	int rc = 1;
+	bool rc = true;
 
 	if (type > NR_ICMP_TYPES)
 		goto out;
@@ -288,8 +251,12 @@ static inline int icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
 		goto out;
 
 	/* Limit if icmp type is enabled in ratemask. */
-	if ((1 << type) & net->ipv4.sysctl_icmp_ratemask)
-		rc = xrlim_allow(dst, net->ipv4.sysctl_icmp_ratelimit);
+	if ((1 << type) & net->ipv4.sysctl_icmp_ratemask) {
+		if (!rt->peer)
+			rt_bind_peer(rt, 1);
+		rc = inet_peer_xrlim_allow(rt->peer,
+					   net->ipv4.sysctl_icmp_ratelimit);
+	}
 out:
 	return rc;
 }
@@ -391,7 +358,8 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
 				    .fl4_tos = RT_TOS(ip_hdr(skb)->tos),
 				    .proto = IPPROTO_ICMP };
 		security_skb_classify_flow(skb, &fl);
-		if (ip_route_output_key(net, &rt, &fl))
+		rt = ip_route_output_key(net, &fl);
+		if (IS_ERR(rt))
 			goto out_unlock;
 	}
 	if (icmpv4_xrlim_allow(net, rt, icmp_param->data.icmph.type,
@@ -402,6 +370,94 @@ out_unlock:
 	icmp_xmit_unlock(sk);
 }
 
+static struct rtable *icmp_route_lookup(struct net *net, struct sk_buff *skb_in,
+					struct iphdr *iph,
+					__be32 saddr, u8 tos,
+					int type, int code,
+					struct icmp_bxm *param)
+{
+	struct flowi fl = {
+		.fl4_dst = (param->replyopts.srr ?
+			    param->replyopts.faddr : iph->saddr),
+		.fl4_src = saddr,
+		.fl4_tos = RT_TOS(tos),
+		.proto = IPPROTO_ICMP,
+		.fl_icmp_type = type,
+		.fl_icmp_code = code,
+	};
+	struct rtable *rt, *rt2;
+	int err;
+
+	security_skb_classify_flow(skb_in, &fl);
+	rt = __ip_route_output_key(net, &fl);
+	if (IS_ERR(rt))
+		return rt;
+
+	/* No need to clone since we're just using its address. */
+	rt2 = rt;
+
+	if (!fl.fl4_src)
+		fl.fl4_src = rt->rt_src;
+
+	rt = (struct rtable *) xfrm_lookup(net, &rt->dst, &fl, NULL, 0);
+	if (!IS_ERR(rt)) {
+		if (rt != rt2)
+			return rt;
+	} else if (PTR_ERR(rt) == -EPERM) {
+		rt = NULL;
+	} else
+		return rt;
+
+	err = xfrm_decode_session_reverse(skb_in, &fl, AF_INET);
+	if (err)
+		goto relookup_failed;
+
+	if (inet_addr_type(net, fl.fl4_src) == RTN_LOCAL) {
+		rt2 = __ip_route_output_key(net, &fl);
+		if (IS_ERR(rt2))
+			err = PTR_ERR(rt2);
+	} else {
+		struct flowi fl2 = {};
+		unsigned long orefdst;
+
+		fl2.fl4_dst = fl.fl4_src;
+		rt2 = ip_route_output_key(net, &fl2);
+		if (IS_ERR(rt2)) {
+			err = PTR_ERR(rt2);
+			goto relookup_failed;
+		}
+		/* Ugh! */
+		orefdst = skb_in->_skb_refdst; /* save old refdst */
+		err = ip_route_input(skb_in, fl.fl4_dst, fl.fl4_src,
+				     RT_TOS(tos), rt2->dst.dev);
+
+		dst_release(&rt2->dst);
+		rt2 = skb_rtable(skb_in);
+		skb_in->_skb_refdst = orefdst; /* restore old refdst */
+	}
+
+	if (err)
+		goto relookup_failed;
+
+	rt2 = (struct rtable *) xfrm_lookup(net, &rt2->dst, &fl, NULL, XFRM_LOOKUP_ICMP);
+	if (!IS_ERR(rt2)) {
+		dst_release(&rt->dst);
+		rt = rt2;
+	} else if (PTR_ERR(rt2) == -EPERM) {
+		if (rt)
+			dst_release(&rt->dst);
+		return rt2;
+	} else {
+		err = PTR_ERR(rt2);
+		goto relookup_failed;
+	}
+	return rt;
+
+relookup_failed:
+	if (rt)
+		return rt;
+	return ERR_PTR(err);
+}
 
 /*
  *	Send an ICMP message in response to a situation
@@ -507,7 +563,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 		rcu_read_lock();
 		if (rt_is_input_route(rt) &&
 		    net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr)
-			dev = dev_get_by_index_rcu(net, rt->fl.iif);
+			dev = dev_get_by_index_rcu(net, rt->rt_iif);
 
 		if (dev)
 			saddr = inet_select_addr(dev, 0, RT_SCOPE_LINK);
@@ -539,86 +595,11 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 	ipc.opt = &icmp_param.replyopts;
 	ipc.tx_flags = 0;
 
-	{
-		struct flowi fl = {
-			.fl4_dst = icmp_param.replyopts.srr ?
-				   icmp_param.replyopts.faddr : iph->saddr,
-			.fl4_src = saddr,
-			.fl4_tos = RT_TOS(tos),
-			.proto = IPPROTO_ICMP,
-			.fl_icmp_type = type,
-			.fl_icmp_code = code,
-		};
-		int err;
-		struct rtable *rt2;
-
-		security_skb_classify_flow(skb_in, &fl);
-		if (__ip_route_output_key(net, &rt, &fl))
-			goto out_unlock;
-
-		/* No need to clone since we're just using its address. */
-		rt2 = rt;
-
-		if (!fl.nl_u.ip4_u.saddr)
-			fl.nl_u.ip4_u.saddr = rt->rt_src;
-
-		err = xfrm_lookup(net, (struct dst_entry **)&rt, &fl, NULL, 0);
-		switch (err) {
-		case 0:
-			if (rt != rt2)
-				goto route_done;
-			break;
-		case -EPERM:
-			rt = NULL;
-			break;
-		default:
-			goto out_unlock;
-		}
-
-		if (xfrm_decode_session_reverse(skb_in, &fl, AF_INET))
-			goto relookup_failed;
-
-		if (inet_addr_type(net, fl.fl4_src) == RTN_LOCAL)
-			err = __ip_route_output_key(net, &rt2, &fl);
-		else {
-			struct flowi fl2 = {};
-			unsigned long orefdst;
-
-			fl2.fl4_dst = fl.fl4_src;
-			if (ip_route_output_key(net, &rt2, &fl2))
-				goto relookup_failed;
-
-			/* Ugh! */
-			orefdst = skb_in->_skb_refdst; /* save old refdst */
-			err = ip_route_input(skb_in, fl.fl4_dst, fl.fl4_src,
-					     RT_TOS(tos), rt2->dst.dev);
-
-			dst_release(&rt2->dst);
-			rt2 = skb_rtable(skb_in);
-			skb_in->_skb_refdst = orefdst; /* restore old refdst */
-		}
-
-		if (err)
-			goto relookup_failed;
-
-		err = xfrm_lookup(net, (struct dst_entry **)&rt2, &fl, NULL,
-				  XFRM_LOOKUP_ICMP);
-		switch (err) {
-		case 0:
-			dst_release(&rt->dst);
-			rt = rt2;
-			break;
-		case -EPERM:
-			goto ende;
-		default:
-relookup_failed:
-			if (!rt)
-				goto out_unlock;
-			break;
-		}
-	}
+	rt = icmp_route_lookup(net, skb_in, iph, saddr, tos,
+			       type, code, &icmp_param);
+	if (IS_ERR(rt))
+		goto out_unlock;
 
-route_done:
 	if (!icmpv4_xrlim_allow(net, rt, type, code))
 		goto ende;
 
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index e0e77e2..44ba906 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -325,7 +325,8 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
 		struct flowi fl = { .oif = dev->ifindex,
 				    .fl4_dst = IGMPV3_ALL_MCR,
 				    .proto = IPPROTO_IGMP };
-		if (ip_route_output_key(net, &rt, &fl)) {
+		rt = ip_route_output_key(net, &fl);
+		if (IS_ERR(rt)) {
 			kfree_skb(skb);
 			return NULL;
 		}
@@ -670,7 +671,8 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
 		struct flowi fl = { .oif = dev->ifindex,
 				    .fl4_dst = dst,
 				    .proto = IPPROTO_IGMP };
-		if (ip_route_output_key(net, &rt, &fl))
+		rt = ip_route_output_key(net, &fl);
+		if (IS_ERR(rt))
 			return -1;
 	}
 	if (rt->rt_src == 0) {
@@ -1440,7 +1442,6 @@ void ip_mc_destroy_dev(struct in_device *in_dev)
 static struct in_device *ip_mc_find_dev(struct net *net, struct ip_mreqn *imr)
 {
 	struct flowi fl = { .fl4_dst = imr->imr_multiaddr.s_addr };
-	struct rtable *rt;
 	struct net_device *dev = NULL;
 	struct in_device *idev = NULL;
 
@@ -1454,9 +1455,12 @@ static struct in_device *ip_mc_find_dev(struct net *net, struct ip_mreqn *imr)
 			return NULL;
 	}
 
-	if (!dev && !ip_route_output_key(net, &rt, &fl)) {
-		dev = rt->dst.dev;
-		ip_rt_put(rt);
+	if (!dev) {
+		struct rtable *rt = ip_route_output_key(net, &fl);
+		if (!IS_ERR(rt)) {
+			dev = rt->dst.dev;
+			ip_rt_put(rt);
+		}
 	}
 	if (dev) {
 		imr->imr_ifindex = dev->ifindex;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 97e5fb7..e4e301a 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -369,7 +369,8 @@ struct dst_entry *inet_csk_route_req(struct sock *sk,
 	struct net *net = sock_net(sk);
 
 	security_req_classify_flow(req, &fl);
-	if (ip_route_output_flow(net, &rt, &fl, sk, 0))
+	rt = ip_route_output_flow(net, &fl, sk);
+	if (IS_ERR(rt))
 		goto no_route;
 	if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
 		goto route_err;
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index a96e656..6442c35 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -81,19 +81,19 @@ static const struct inet_peer peer_fake_node = {
 
 struct inet_peer_base {
 	struct inet_peer __rcu *root;
-	spinlock_t	lock;
+	seqlock_t	lock;
 	int		total;
 };
 
 static struct inet_peer_base v4_peers = {
 	.root		= peer_avl_empty_rcu,
-	.lock		= __SPIN_LOCK_UNLOCKED(v4_peers.lock),
+	.lock		= __SEQLOCK_UNLOCKED(v4_peers.lock),
 	.total		= 0,
 };
 
 static struct inet_peer_base v6_peers = {
 	.root		= peer_avl_empty_rcu,
-	.lock		= __SPIN_LOCK_UNLOCKED(v6_peers.lock),
+	.lock		= __SEQLOCK_UNLOCKED(v6_peers.lock),
 	.total		= 0,
 };
 
@@ -167,9 +167,9 @@ static int addr_compare(const struct inetpeer_addr *a,
 	int i, n = (a->family == AF_INET ? 1 : 4);
 
 	for (i = 0; i < n; i++) {
-		if (a->a6[i] == b->a6[i])
+		if (a->addr.a6[i] == b->addr.a6[i])
 			continue;
-		if (a->a6[i] < b->a6[i])
+		if (a->addr.a6[i] < b->addr.a6[i])
 			return -1;
 		return 1;
 	}
@@ -177,6 +177,9 @@ static int addr_compare(const struct inetpeer_addr *a,
 	return 0;
 }
 
+#define rcu_deref_locked(X, BASE)				\
+	rcu_dereference_protected(X, lockdep_is_held(&(BASE)->lock.lock))
+
 /*
  * Called with local BH disabled and the pool lock held.
  */
@@ -187,8 +190,7 @@ static int addr_compare(const struct inetpeer_addr *a,
 								\
 	stackptr = _stack;					\
 	*stackptr++ = &_base->root;				\
-	for (u = rcu_dereference_protected(_base->root,		\
-			lockdep_is_held(&_base->lock));		\
+	for (u = rcu_deref_locked(_base->root, _base);		\
 	     u != peer_avl_empty; ) {				\
 		int cmp = addr_compare(_daddr, &u->daddr);	\
 		if (cmp == 0)					\
@@ -198,23 +200,22 @@ static int addr_compare(const struct inetpeer_addr *a,
 		else						\
 			v = &u->avl_right;			\
 		*stackptr++ = v;				\
-		u = rcu_dereference_protected(*v,		\
-			lockdep_is_held(&_base->lock));		\
+		u = rcu_deref_locked(*v, _base);		\
 	}							\
 	u;							\
 })
 
 /*
- * Called with rcu_read_lock_bh()
+ * Called with rcu_read_lock()
  * Because we hold no lock against a writer, its quite possible we fall
  * in an endless loop.
  * But every pointer we follow is guaranteed to be valid thanks to RCU.
  * We exit from this function if number of links exceeds PEER_MAXDEPTH
  */
-static struct inet_peer *lookup_rcu_bh(const struct inetpeer_addr *daddr,
-				       struct inet_peer_base *base)
+static struct inet_peer *lookup_rcu(const struct inetpeer_addr *daddr,
+				    struct inet_peer_base *base)
 {
-	struct inet_peer *u = rcu_dereference_bh(base->root);
+	struct inet_peer *u = rcu_dereference(base->root);
 	int count = 0;
 
 	while (u != peer_avl_empty) {
@@ -230,9 +231,9 @@ static struct inet_peer *lookup_rcu_bh(const struct inetpeer_addr *daddr,
 			return u;
 		}
 		if (cmp == -1)
-			u = rcu_dereference_bh(u->avl_left);
+			u = rcu_dereference(u->avl_left);
 		else
-			u = rcu_dereference_bh(u->avl_right);
+			u = rcu_dereference(u->avl_right);
 		if (unlikely(++count == PEER_MAXDEPTH))
 			break;
 	}
@@ -246,13 +247,11 @@ static struct inet_peer *lookup_rcu_bh(const struct inetpeer_addr *daddr,
 	struct inet_peer __rcu **v;				\
 	*stackptr++ = &start->avl_left;				\
 	v = &start->avl_left;					\
-	for (u = rcu_dereference_protected(*v,			\
-			lockdep_is_held(&base->lock));		\
+	for (u = rcu_deref_locked(*v, base);			\
 	     u->avl_right != peer_avl_empty_rcu; ) {		\
 		v = &u->avl_right;				\
 		*stackptr++ = v;				\
-		u = rcu_dereference_protected(*v,		\
-			lockdep_is_held(&base->lock));		\
+		u = rcu_deref_locked(*v, base);			\
 	}							\
 	u;							\
 })
@@ -271,21 +270,16 @@ static void peer_avl_rebalance(struct inet_peer __rcu **stack[],
 
 	while (stackend > stack) {
 		nodep = *--stackend;
-		node = rcu_dereference_protected(*nodep,
-				lockdep_is_held(&base->lock));
-		l = rcu_dereference_protected(node->avl_left,
-				lockdep_is_held(&base->lock));
-		r = rcu_dereference_protected(node->avl_right,
-				lockdep_is_held(&base->lock));
+		node = rcu_deref_locked(*nodep, base);
+		l = rcu_deref_locked(node->avl_left, base);
+		r = rcu_deref_locked(node->avl_right, base);
 		lh = node_height(l);
 		rh = node_height(r);
 		if (lh > rh + 1) { /* l: RH+2 */
 			struct inet_peer *ll, *lr, *lrl, *lrr;
 			int lrh;
-			ll = rcu_dereference_protected(l->avl_left,
-				lockdep_is_held(&base->lock));
-			lr = rcu_dereference_protected(l->avl_right,
-				lockdep_is_held(&base->lock));
+			ll = rcu_deref_locked(l->avl_left, base);
+			lr = rcu_deref_locked(l->avl_right, base);
 			lrh = node_height(lr);
 			if (lrh <= node_height(ll)) {	/* ll: RH+1 */
 				RCU_INIT_POINTER(node->avl_left, lr);	/* lr: RH or RH+1 */
@@ -296,10 +290,8 @@ static void peer_avl_rebalance(struct inet_peer __rcu **stack[],
 				l->avl_height = node->avl_height + 1;
 				RCU_INIT_POINTER(*nodep, l);
 			} else { /* ll: RH, lr: RH+1 */
-				lrl = rcu_dereference_protected(lr->avl_left,
-					lockdep_is_held(&base->lock));	/* lrl: RH or RH-1 */
-				lrr = rcu_dereference_protected(lr->avl_right,
-					lockdep_is_held(&base->lock));	/* lrr: RH or RH-1 */
+				lrl = rcu_deref_locked(lr->avl_left, base);/* lrl: RH or RH-1 */
+				lrr = rcu_deref_locked(lr->avl_right, base);/* lrr: RH or RH-1 */
 				RCU_INIT_POINTER(node->avl_left, lrr);	/* lrr: RH or RH-1 */
 				RCU_INIT_POINTER(node->avl_right, r);	/* r: RH */
 				node->avl_height = rh + 1; /* node: RH+1 */
@@ -314,10 +306,8 @@ static void peer_avl_rebalance(struct inet_peer __rcu **stack[],
 		} else if (rh > lh + 1) { /* r: LH+2 */
 			struct inet_peer *rr, *rl, *rlr, *rll;
 			int rlh;
-			rr = rcu_dereference_protected(r->avl_right,
-				lockdep_is_held(&base->lock));
-			rl = rcu_dereference_protected(r->avl_left,
-				lockdep_is_held(&base->lock));
+			rr = rcu_deref_locked(r->avl_right, base);
+			rl = rcu_deref_locked(r->avl_left, base);
 			rlh = node_height(rl);
 			if (rlh <= node_height(rr)) {	/* rr: LH+1 */
 				RCU_INIT_POINTER(node->avl_right, rl);	/* rl: LH or LH+1 */
@@ -328,10 +318,8 @@ static void peer_avl_rebalance(struct inet_peer __rcu **stack[],
 				r->avl_height = node->avl_height + 1;
 				RCU_INIT_POINTER(*nodep, r);
 			} else { /* rr: RH, rl: RH+1 */
-				rlr = rcu_dereference_protected(rl->avl_right,
-					lockdep_is_held(&base->lock));	/* rlr: LH or LH-1 */
-				rll = rcu_dereference_protected(rl->avl_left,
-					lockdep_is_held(&base->lock));	/* rll: LH or LH-1 */
+				rlr = rcu_deref_locked(rl->avl_right, base);/* rlr: LH or LH-1 */
+				rll = rcu_deref_locked(rl->avl_left, base);/* rll: LH or LH-1 */
 				RCU_INIT_POINTER(node->avl_right, rll);	/* rll: LH or LH-1 */
 				RCU_INIT_POINTER(node->avl_left, l);	/* l: LH */
 				node->avl_height = lh + 1; /* node: LH+1 */
@@ -372,7 +360,7 @@ static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base)
 
 	do_free = 0;
 
-	spin_lock_bh(&base->lock);
+	write_seqlock_bh(&base->lock);
 	/* Check the reference counter.  It was artificially incremented by 1
 	 * in cleanup() function to prevent sudden disappearing.  If we can
 	 * atomically (because of lockless readers) take this last reference,
@@ -392,8 +380,7 @@ static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base)
 			/* look for a node to insert instead of p */
 			struct inet_peer *t;
 			t = lookup_rightempty(p, base);
-			BUG_ON(rcu_dereference_protected(*stackptr[-1],
-					lockdep_is_held(&base->lock)) != t);
+			BUG_ON(rcu_deref_locked(*stackptr[-1], base) != t);
 			**--stackptr = t->avl_left;
 			/* t is removed, t->daddr > x->daddr for any
 			 * x in p->avl_left subtree.
@@ -409,7 +396,7 @@ static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base)
 		base->total--;
 		do_free = 1;
 	}
-	spin_unlock_bh(&base->lock);
+	write_sequnlock_bh(&base->lock);
 
 	if (do_free)
 		call_rcu_bh(&p->rcu, inetpeer_free_rcu);
@@ -477,13 +464,17 @@ struct inet_peer *inet_getpeer(struct inetpeer_addr *daddr, int create)
 	struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr;
 	struct inet_peer_base *base = family_to_base(daddr->family);
 	struct inet_peer *p;
+	unsigned int sequence;
+	int invalidated;
 
 	/* Look up for the address quickly, lockless.
 	 * Because of a concurrent writer, we might not find an existing entry.
 	 */
-	rcu_read_lock_bh();
-	p = lookup_rcu_bh(daddr, base);
-	rcu_read_unlock_bh();
+	rcu_read_lock();
+	sequence = read_seqbegin(&base->lock);
+	p = lookup_rcu(daddr, base);
+	invalidated = read_seqretry(&base->lock, sequence);
+	rcu_read_unlock();
 
 	if (p) {
 		/* The existing node has been found.
@@ -493,14 +484,18 @@ struct inet_peer *inet_getpeer(struct inetpeer_addr *daddr, int create)
 		return p;
 	}
 
+	/* If no writer did a change during our lookup, we can return early. */
+	if (!create && !invalidated)
+		return NULL;
+
 	/* retry an exact lookup, taking the lock before.
 	 * At least, nodes should be hot in our cache.
 	 */
-	spin_lock_bh(&base->lock);
+	write_seqlock_bh(&base->lock);
 	p = lookup(daddr, stack, base);
 	if (p != peer_avl_empty) {
 		atomic_inc(&p->refcnt);
-		spin_unlock_bh(&base->lock);
+		write_sequnlock_bh(&base->lock);
 		/* Remove the entry from unused list if it was there. */
 		unlink_from_unused(p);
 		return p;
@@ -510,8 +505,13 @@ struct inet_peer *inet_getpeer(struct inetpeer_addr *daddr, int create)
 		p->daddr = *daddr;
 		atomic_set(&p->refcnt, 1);
 		atomic_set(&p->rid, 0);
-		atomic_set(&p->ip_id_count, secure_ip_id(daddr->a4));
+		atomic_set(&p->ip_id_count, secure_ip_id(daddr->addr.a4));
 		p->tcp_ts_stamp = 0;
+		p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW;
+		p->rate_tokens = 0;
+		p->rate_last = 0;
+		p->pmtu_expires = 0;
+		memset(&p->redirect_learned, 0, sizeof(p->redirect_learned));
 		INIT_LIST_HEAD(&p->unused);
 
 
@@ -519,7 +519,7 @@ struct inet_peer *inet_getpeer(struct inetpeer_addr *daddr, int create)
 		link_to_pool(p, base);
 		base->total++;
 	}
-	spin_unlock_bh(&base->lock);
+	write_sequnlock_bh(&base->lock);
 
 	if (base->total >= inet_peer_threshold)
 		/* Remove one less-recently-used entry. */
@@ -579,3 +579,44 @@ void inet_putpeer(struct inet_peer *p)
 	local_bh_enable();
 }
 EXPORT_SYMBOL_GPL(inet_putpeer);
+
+/*
+ *	Check transmit rate limitation for given message.
+ *	The rate information is held in the inet_peer entries now.
+ *	This function is generic and could be used for other purposes
+ *	too. It uses a Token bucket filter as suggested by Alexey Kuznetsov.
+ *
+ *	Note that the same inet_peer fields are modified by functions in
+ *	route.c too, but these work for packet destinations while xrlim_allow
+ *	works for icmp destinations. This means the rate limiting information
+ *	for one "ip object" is shared - and these ICMPs are twice limited:
+ *	by source and by destination.
+ *
+ *	RFC 1812: 4.3.2.8 SHOULD be able to limit error message rate
+ *			  SHOULD allow setting of rate limits
+ *
+ * 	Shared between ICMPv4 and ICMPv6.
+ */
+#define XRLIM_BURST_FACTOR 6
+bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout)
+{
+	unsigned long now, token;
+	bool rc = false;
+
+	if (!peer)
+		return true;
+
+	token = peer->rate_tokens;
+	now = jiffies;
+	token += now - peer->rate_last;
+	peer->rate_last = now;
+	if (token > XRLIM_BURST_FACTOR * timeout)
+		token = XRLIM_BURST_FACTOR * timeout;
+	if (token >= timeout) {
+		token -= timeout;
+		rc = true;
+	}
+	peer->rate_tokens = token;
+	return rc;
+}
+EXPORT_SYMBOL(inet_peer_xrlim_allow);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index d1d0e2c..71465955 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -778,7 +778,8 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 			.proto = IPPROTO_GRE,
 			.fl_gre_key = tunnel->parms.o_key
 		};
-		if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
+		rt = ip_route_output_key(dev_net(dev), &fl);
+		if (IS_ERR(rt)) {
 			dev->stats.tx_carrier_errors++;
 			goto tx_error;
 		}
@@ -953,9 +954,9 @@ static int ipgre_tunnel_bind_dev(struct net_device *dev)
 			.proto = IPPROTO_GRE,
 			.fl_gre_key = tunnel->parms.o_key
 		};
-		struct rtable *rt;
+		struct rtable *rt = ip_route_output_key(dev_net(dev), &fl);
 
-		if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
+		if (!IS_ERR(rt)) {
 			tdev = rt->dst.dev;
 			ip_rt_put(rt);
 		}
@@ -1215,9 +1216,9 @@ static int ipgre_open(struct net_device *dev)
 			.proto = IPPROTO_GRE,
 			.fl_gre_key = t->parms.o_key
 		};
-		struct rtable *rt;
+		struct rtable *rt = ip_route_output_key(dev_net(dev), &fl);
 
-		if (ip_route_output_key(dev_net(dev), &rt, &fl))
+		if (IS_ERR(rt))
 			return -EADDRNOTAVAIL;
 		dev = rt->dst.dev;
 		ip_rt_put(rt);
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index d859bcc..d7b2b09 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -340,7 +340,7 @@ static int ip_rcv_finish(struct sk_buff *skb)
 		}
 	}
 
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
 	if (unlikely(skb_dst(skb)->tclassid)) {
 		struct ip_rt_acct *st = this_cpu_ptr(ip_rt_acct);
 		u32 idx = skb_dst(skb)->tclassid;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 04c7b3b..171f483 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -355,7 +355,8 @@ int ip_queue_xmit(struct sk_buff *skb)
 			 * itself out.
 			 */
 			security_sk_classify_flow(sk, &fl);
-			if (ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0))
+			rt = ip_route_output_flow(sock_net(sk), &fl, sk);
+			if (IS_ERR(rt))
 				goto no_route;
 		}
 		sk_setup_caps(sk, &rt->dst);
@@ -733,6 +734,7 @@ csum_page(struct page *page, int offset, int copy)
 }
 
 static inline int ip_ufo_append_data(struct sock *sk,
+			struct sk_buff_head *queue,
 			int getfrag(void *from, char *to, int offset, int len,
 			       int odd, struct sk_buff *skb),
 			void *from, int length, int hh_len, int fragheaderlen,
@@ -745,7 +747,7 @@ static inline int ip_ufo_append_data(struct sock *sk,
 	 * device, so create one single skb packet containing complete
 	 * udp datagram
 	 */
-	if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
+	if ((skb = skb_peek_tail(queue)) == NULL) {
 		skb = sock_alloc_send_skb(sk,
 			hh_len + fragheaderlen + transhdrlen + 20,
 			(flags & MSG_DONTWAIT), &err);
@@ -767,40 +769,28 @@ static inline int ip_ufo_append_data(struct sock *sk,
 
 		skb->ip_summed = CHECKSUM_PARTIAL;
 		skb->csum = 0;
-		sk->sk_sndmsg_off = 0;
 
 		/* specify the length of each IP datagram fragment */
 		skb_shinfo(skb)->gso_size = mtu - fragheaderlen;
 		skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
-		__skb_queue_tail(&sk->sk_write_queue, skb);
+		__skb_queue_tail(queue, skb);
 	}
 
 	return skb_append_datato_frags(sk, skb, getfrag, from,
 				       (length - transhdrlen));
 }
 
-/*
- *	ip_append_data() and ip_append_page() can make one large IP datagram
- *	from many pieces of data. Each pieces will be holded on the socket
- *	until ip_push_pending_frames() is called. Each piece can be a page
- *	or non-page data.
- *
- *	Not only UDP, other transport protocols - e.g. raw sockets - can use
- *	this interface potentially.
- *
- *	LATER: length must be adjusted by pad at tail, when it is required.
- */
-int ip_append_data(struct sock *sk,
-		   int getfrag(void *from, char *to, int offset, int len,
-			       int odd, struct sk_buff *skb),
-		   void *from, int length, int transhdrlen,
-		   struct ipcm_cookie *ipc, struct rtable **rtp,
-		   unsigned int flags)
+static int __ip_append_data(struct sock *sk, struct sk_buff_head *queue,
+			    struct inet_cork *cork,
+			    int getfrag(void *from, char *to, int offset,
+					int len, int odd, struct sk_buff *skb),
+			    void *from, int length, int transhdrlen,
+			    unsigned int flags)
 {
 	struct inet_sock *inet = inet_sk(sk);
 	struct sk_buff *skb;
 
-	struct ip_options *opt = NULL;
+	struct ip_options *opt = cork->opt;
 	int hh_len;
 	int exthdrlen;
 	int mtu;
@@ -809,58 +799,19 @@ int ip_append_data(struct sock *sk,
 	int offset = 0;
 	unsigned int maxfraglen, fragheaderlen;
 	int csummode = CHECKSUM_NONE;
-	struct rtable *rt;
+	struct rtable *rt = (struct rtable *)cork->dst;
 
-	if (flags&MSG_PROBE)
-		return 0;
-
-	if (skb_queue_empty(&sk->sk_write_queue)) {
-		/*
-		 * setup for corking.
-		 */
-		opt = ipc->opt;
-		if (opt) {
-			if (inet->cork.opt == NULL) {
-				inet->cork.opt = kmalloc(sizeof(struct ip_options) + 40, sk->sk_allocation);
-				if (unlikely(inet->cork.opt == NULL))
-					return -ENOBUFS;
-			}
-			memcpy(inet->cork.opt, opt, sizeof(struct ip_options)+opt->optlen);
-			inet->cork.flags |= IPCORK_OPT;
-			inet->cork.addr = ipc->addr;
-		}
-		rt = *rtp;
-		if (unlikely(!rt))
-			return -EFAULT;
-		/*
-		 * We steal reference to this route, caller should not release it
-		 */
-		*rtp = NULL;
-		inet->cork.fragsize = mtu = inet->pmtudisc == IP_PMTUDISC_PROBE ?
-					    rt->dst.dev->mtu :
-					    dst_mtu(rt->dst.path);
-		inet->cork.dst = &rt->dst;
-		inet->cork.length = 0;
-		sk->sk_sndmsg_page = NULL;
-		sk->sk_sndmsg_off = 0;
-		exthdrlen = rt->dst.header_len;
-		length += exthdrlen;
-		transhdrlen += exthdrlen;
-	} else {
-		rt = (struct rtable *)inet->cork.dst;
-		if (inet->cork.flags & IPCORK_OPT)
-			opt = inet->cork.opt;
+	exthdrlen = transhdrlen ? rt->dst.header_len : 0;
+	length += exthdrlen;
+	transhdrlen += exthdrlen;
+	mtu = cork->fragsize;
 
-		transhdrlen = 0;
-		exthdrlen = 0;
-		mtu = inet->cork.fragsize;
-	}
 	hh_len = LL_RESERVED_SPACE(rt->dst.dev);
 
 	fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
 	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
 
-	if (inet->cork.length + length > 0xFFFF - fragheaderlen) {
+	if (cork->length + length > 0xFFFF - fragheaderlen) {
 		ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->inet_dport,
 			       mtu-exthdrlen);
 		return -EMSGSIZE;
@@ -876,15 +827,15 @@ int ip_append_data(struct sock *sk,
 	    !exthdrlen)
 		csummode = CHECKSUM_PARTIAL;
 
-	skb = skb_peek_tail(&sk->sk_write_queue);
+	skb = skb_peek_tail(queue);
 
-	inet->cork.length += length;
+	cork->length += length;
 	if (((length > mtu) || (skb && skb_is_gso(skb))) &&
 	    (sk->sk_protocol == IPPROTO_UDP) &&
 	    (rt->dst.dev->features & NETIF_F_UFO)) {
-		err = ip_ufo_append_data(sk, getfrag, from, length, hh_len,
-					 fragheaderlen, transhdrlen, mtu,
-					 flags);
+		err = ip_ufo_append_data(sk, queue, getfrag, from, length,
+					 hh_len, fragheaderlen, transhdrlen,
+					 mtu, flags);
 		if (err)
 			goto error;
 		return 0;
@@ -961,7 +912,7 @@ alloc_new_skb:
 				else
 					/* only the initial fragment is
 					   time stamped */
-					ipc->tx_flags = 0;
+					cork->tx_flags = 0;
 			}
 			if (skb == NULL)
 				goto error;
@@ -972,7 +923,7 @@ alloc_new_skb:
 			skb->ip_summed = csummode;
 			skb->csum = 0;
 			skb_reserve(skb, hh_len);
-			skb_shinfo(skb)->tx_flags = ipc->tx_flags;
+			skb_shinfo(skb)->tx_flags = cork->tx_flags;
 
 			/*
 			 *	Find where to start putting bytes.
@@ -1009,7 +960,7 @@ alloc_new_skb:
 			/*
 			 * Put the packet on the pending queue.
 			 */
-			__skb_queue_tail(&sk->sk_write_queue, skb);
+			__skb_queue_tail(queue, skb);
 			continue;
 		}
 
@@ -1029,8 +980,8 @@ alloc_new_skb:
 		} else {
 			int i = skb_shinfo(skb)->nr_frags;
 			skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
-			struct page *page = sk->sk_sndmsg_page;
-			int off = sk->sk_sndmsg_off;
+			struct page *page = cork->page;
+			int off = cork->off;
 			unsigned int left;
 
 			if (page && (left = PAGE_SIZE - off) > 0) {
@@ -1042,7 +993,7 @@ alloc_new_skb:
 						goto error;
 					}
 					get_page(page);
-					skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
+					skb_fill_page_desc(skb, i, page, off, 0);
 					frag = &skb_shinfo(skb)->frags[i];
 				}
 			} else if (i < MAX_SKB_FRAGS) {
@@ -1053,8 +1004,8 @@ alloc_new_skb:
 					err = -ENOMEM;
 					goto error;
 				}
-				sk->sk_sndmsg_page = page;
-				sk->sk_sndmsg_off = 0;
+				cork->page = page;
+				cork->off = 0;
 
 				skb_fill_page_desc(skb, i, page, 0, 0);
 				frag = &skb_shinfo(skb)->frags[i];
@@ -1066,7 +1017,7 @@ alloc_new_skb:
 				err = -EFAULT;
 				goto error;
 			}
-			sk->sk_sndmsg_off += copy;
+			cork->off += copy;
 			frag->size += copy;
 			skb->len += copy;
 			skb->data_len += copy;
@@ -1080,11 +1031,87 @@ alloc_new_skb:
 	return 0;
 
 error:
-	inet->cork.length -= length;
+	cork->length -= length;
 	IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS);
 	return err;
 }
 
+static int ip_setup_cork(struct sock *sk, struct inet_cork *cork,
+			 struct ipcm_cookie *ipc, struct rtable **rtp)
+{
+	struct inet_sock *inet = inet_sk(sk);
+	struct ip_options *opt;
+	struct rtable *rt;
+
+	/*
+	 * setup for corking.
+	 */
+	opt = ipc->opt;
+	if (opt) {
+		if (cork->opt == NULL) {
+			cork->opt = kmalloc(sizeof(struct ip_options) + 40,
+					    sk->sk_allocation);
+			if (unlikely(cork->opt == NULL))
+				return -ENOBUFS;
+		}
+		memcpy(cork->opt, opt, sizeof(struct ip_options) + opt->optlen);
+		cork->flags |= IPCORK_OPT;
+		cork->addr = ipc->addr;
+	}
+	rt = *rtp;
+	if (unlikely(!rt))
+		return -EFAULT;
+	/*
+	 * We steal reference to this route, caller should not release it
+	 */
+	*rtp = NULL;
+	cork->fragsize = inet->pmtudisc == IP_PMTUDISC_PROBE ?
+			 rt->dst.dev->mtu : dst_mtu(rt->dst.path);
+	cork->dst = &rt->dst;
+	cork->length = 0;
+	cork->tx_flags = ipc->tx_flags;
+	cork->page = NULL;
+	cork->off = 0;
+
+	return 0;
+}
+
+/*
+ *	ip_append_data() and ip_append_page() can make one large IP datagram
+ *	from many pieces of data. Each pieces will be holded on the socket
+ *	until ip_push_pending_frames() is called. Each piece can be a page
+ *	or non-page data.
+ *
+ *	Not only UDP, other transport protocols - e.g. raw sockets - can use
+ *	this interface potentially.
+ *
+ *	LATER: length must be adjusted by pad at tail, when it is required.
+ */
+int ip_append_data(struct sock *sk,
+		   int getfrag(void *from, char *to, int offset, int len,
+			       int odd, struct sk_buff *skb),
+		   void *from, int length, int transhdrlen,
+		   struct ipcm_cookie *ipc, struct rtable **rtp,
+		   unsigned int flags)
+{
+	struct inet_sock *inet = inet_sk(sk);
+	int err;
+
+	if (flags&MSG_PROBE)
+		return 0;
+
+	if (skb_queue_empty(&sk->sk_write_queue)) {
+		err = ip_setup_cork(sk, &inet->cork, ipc, rtp);
+		if (err)
+			return err;
+	} else {
+		transhdrlen = 0;
+	}
+
+	return __ip_append_data(sk, &sk->sk_write_queue, &inet->cork, getfrag,
+				from, length, transhdrlen, flags);
+}
+
 ssize_t	ip_append_page(struct sock *sk, struct page *page,
 		       int offset, size_t size, int flags)
 {
@@ -1228,40 +1255,41 @@ error:
 	return err;
 }
 
-static void ip_cork_release(struct inet_sock *inet)
+static void ip_cork_release(struct inet_cork *cork)
 {
-	inet->cork.flags &= ~IPCORK_OPT;
-	kfree(inet->cork.opt);
-	inet->cork.opt = NULL;
-	dst_release(inet->cork.dst);
-	inet->cork.dst = NULL;
+	cork->flags &= ~IPCORK_OPT;
+	kfree(cork->opt);
+	cork->opt = NULL;
+	dst_release(cork->dst);
+	cork->dst = NULL;
 }
 
 /*
  *	Combined all pending IP fragments on the socket as one IP datagram
  *	and push them out.
  */
-int ip_push_pending_frames(struct sock *sk)
+struct sk_buff *__ip_make_skb(struct sock *sk,
+			      struct sk_buff_head *queue,
+			      struct inet_cork *cork)
 {
 	struct sk_buff *skb, *tmp_skb;
 	struct sk_buff **tail_skb;
 	struct inet_sock *inet = inet_sk(sk);
 	struct net *net = sock_net(sk);
 	struct ip_options *opt = NULL;
-	struct rtable *rt = (struct rtable *)inet->cork.dst;
+	struct rtable *rt = (struct rtable *)cork->dst;
 	struct iphdr *iph;
 	__be16 df = 0;
 	__u8 ttl;
-	int err = 0;
 
-	if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
+	if ((skb = __skb_dequeue(queue)) == NULL)
 		goto out;
 	tail_skb = &(skb_shinfo(skb)->frag_list);
 
 	/* move skb->data to ip header from ext header */
 	if (skb->data < skb_network_header(skb))
 		__skb_pull(skb, skb_network_offset(skb));
-	while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
+	while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
 		__skb_pull(tmp_skb, skb_network_header_len(skb));
 		*tail_skb = tmp_skb;
 		tail_skb = &(tmp_skb->next);
@@ -1287,8 +1315,8 @@ int ip_push_pending_frames(struct sock *sk)
 	     ip_dont_fragment(sk, &rt->dst)))
 		df = htons(IP_DF);
 
-	if (inet->cork.flags & IPCORK_OPT)
-		opt = inet->cork.opt;
+	if (cork->flags & IPCORK_OPT)
+		opt = cork->opt;
 
 	if (rt->rt_type == RTN_MULTICAST)
 		ttl = inet->mc_ttl;
@@ -1300,7 +1328,7 @@ int ip_push_pending_frames(struct sock *sk)
 	iph->ihl = 5;
 	if (opt) {
 		iph->ihl += opt->optlen>>2;
-		ip_options_build(skb, opt, inet->cork.addr, rt, 0);
+		ip_options_build(skb, opt, cork->addr, rt, 0);
 	}
 	iph->tos = inet->tos;
 	iph->frag_off = df;
@@ -1316,44 +1344,95 @@ int ip_push_pending_frames(struct sock *sk)
 	 * Steal rt from cork.dst to avoid a pair of atomic_inc/atomic_dec
 	 * on dst refcount
 	 */
-	inet->cork.dst = NULL;
+	cork->dst = NULL;
 	skb_dst_set(skb, &rt->dst);
 
 	if (iph->protocol == IPPROTO_ICMP)
 		icmp_out_count(net, ((struct icmphdr *)
 			skb_transport_header(skb))->type);
 
-	/* Netfilter gets whole the not fragmented skb. */
+	ip_cork_release(cork);
+out:
+	return skb;
+}
+
+int ip_send_skb(struct sk_buff *skb)
+{
+	struct net *net = sock_net(skb->sk);
+	int err;
+
 	err = ip_local_out(skb);
 	if (err) {
 		if (err > 0)
 			err = net_xmit_errno(err);
 		if (err)
-			goto error;
+			IP_INC_STATS(net, IPSTATS_MIB_OUTDISCARDS);
 	}
 
-out:
-	ip_cork_release(inet);
 	return err;
+}
 
-error:
-	IP_INC_STATS(net, IPSTATS_MIB_OUTDISCARDS);
-	goto out;
+int ip_push_pending_frames(struct sock *sk)
+{
+	struct sk_buff *skb;
+
+	skb = ip_finish_skb(sk);
+	if (!skb)
+		return 0;
+
+	/* Netfilter gets whole the not fragmented skb. */
+	return ip_send_skb(skb);
 }
 
 /*
  *	Throw away all pending data on the socket.
  */
-void ip_flush_pending_frames(struct sock *sk)
+static void __ip_flush_pending_frames(struct sock *sk,
+				      struct sk_buff_head *queue,
+				      struct inet_cork *cork)
 {
 	struct sk_buff *skb;
 
-	while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL)
+	while ((skb = __skb_dequeue_tail(queue)) != NULL)
 		kfree_skb(skb);
 
-	ip_cork_release(inet_sk(sk));
+	ip_cork_release(cork);
+}
+
+void ip_flush_pending_frames(struct sock *sk)
+{
+	__ip_flush_pending_frames(sk, &sk->sk_write_queue, &inet_sk(sk)->cork);
 }
 
+struct sk_buff *ip_make_skb(struct sock *sk,
+			    int getfrag(void *from, char *to, int offset,
+					int len, int odd, struct sk_buff *skb),
+			    void *from, int length, int transhdrlen,
+			    struct ipcm_cookie *ipc, struct rtable **rtp,
+			    unsigned int flags)
+{
+	struct inet_cork cork = {};
+	struct sk_buff_head queue;
+	int err;
+
+	if (flags & MSG_PROBE)
+		return NULL;
+
+	__skb_queue_head_init(&queue);
+
+	err = ip_setup_cork(sk, &cork, ipc, rtp);
+	if (err)
+		return ERR_PTR(err);
+
+	err = __ip_append_data(sk, &queue, &cork, getfrag,
+			       from, length, transhdrlen, flags);
+	if (err) {
+		__ip_flush_pending_frames(sk, &queue, &cork);
+		return ERR_PTR(err);
+	}
+
+	return __ip_make_skb(sk, &queue, &cork);
+}
 
 /*
  *	Fetch data from kernel space and fill in checksum if needed.
@@ -1411,7 +1490,8 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
 				    .proto = sk->sk_protocol,
 				    .flags = ip_reply_arg_flowi_flags(arg) };
 		security_skb_classify_flow(skb, &fl);
-		if (ip_route_output_key(sock_net(sk), &rt, &fl))
+		rt = ip_route_output_key(sock_net(sk), &fl);
+		if (IS_ERR(rt))
 			return;
 	}
 
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index a5f58e7..65008f4 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -469,7 +469,8 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 			.proto = IPPROTO_IPIP
 		};
 
-		if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
+		rt = ip_route_output_key(dev_net(dev), &fl);
+		if (IS_ERR(rt)) {
 			dev->stats.tx_carrier_errors++;
 			goto tx_error_icmp;
 		}
@@ -590,9 +591,9 @@ static void ipip_tunnel_bind_dev(struct net_device *dev)
 			.fl4_tos = RT_TOS(iph->tos),
 			.proto = IPPROTO_IPIP
 		};
-		struct rtable *rt;
+		struct rtable *rt = ip_route_output_key(dev_net(dev), &fl);
 
-		if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
+		if (!IS_ERR(rt)) {
 			tdev = rt->dst.dev;
 			ip_rt_put(rt);
 		}
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 8b65a12..74909ba 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1618,8 +1618,8 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
 			.fl4_tos = RT_TOS(iph->tos),
 			.proto = IPPROTO_IPIP
 		};
-
-		if (ip_route_output_key(net, &rt, &fl))
+		rt = ip_route_output_key(net, &fl);
+		if (IS_ERR(rt))
 			goto out_free;
 		encap = sizeof(struct iphdr);
 	} else {
@@ -1629,8 +1629,8 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
 			.fl4_tos = RT_TOS(iph->tos),
 			.proto = IPPROTO_IPIP
 		};
-
-		if (ip_route_output_key(net, &rt, &fl))
+		rt = ip_route_output_key(net, &fl);
+		if (IS_ERR(rt))
 			goto out_free;
 	}
 
@@ -1793,6 +1793,24 @@ dont_forward:
 	return 0;
 }
 
+static struct mr_table *ipmr_rt_fib_lookup(struct net *net, struct rtable *rt)
+{
+	struct flowi fl = {
+		.fl4_dst = rt->rt_key_dst,
+		.fl4_src = rt->rt_key_src,
+		.fl4_tos = rt->rt_tos,
+		.oif = rt->rt_oif,
+		.iif = rt->rt_iif,
+		.mark = rt->rt_mark,
+	};
+	struct mr_table *mrt;
+	int err;
+
+	err = ipmr_fib_lookup(net, &fl, &mrt);
+	if (err)
+		return ERR_PTR(err);
+	return mrt;
+}
 
 /*
  *	Multicast packets for forwarding arrive here
@@ -1805,7 +1823,6 @@ int ip_mr_input(struct sk_buff *skb)
 	struct net *net = dev_net(skb->dev);
 	int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
 	struct mr_table *mrt;
-	int err;
 
 	/* Packet is looped back after forward, it should not be
 	 * forwarded second time, but still can be delivered locally.
@@ -1813,12 +1830,11 @@ int ip_mr_input(struct sk_buff *skb)
 	if (IPCB(skb)->flags & IPSKB_FORWARDED)
 		goto dont_forward;
 
-	err = ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt);
-	if (err < 0) {
+	mrt = ipmr_rt_fib_lookup(net, skb_rtable(skb));
+	if (IS_ERR(mrt)) {
 		kfree_skb(skb);
-		return err;
+		return PTR_ERR(mrt);
 	}
-
 	if (!local) {
 		if (IPCB(skb)->opt.router_alert) {
 			if (ip_call_ra_chain(skb))
@@ -1946,9 +1962,9 @@ int pim_rcv_v1(struct sk_buff *skb)
 
 	pim = igmp_hdr(skb);
 
-	if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0)
+	mrt = ipmr_rt_fib_lookup(net, skb_rtable(skb));
+	if (IS_ERR(mrt))
 		goto drop;
-
 	if (!mrt->mroute_do_pim ||
 	    pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
 		goto drop;
@@ -1978,9 +1994,9 @@ static int pim_rcv(struct sk_buff *skb)
 	     csum_fold(skb_checksum(skb, 0, skb->len, 0))))
 		goto drop;
 
-	if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0)
+	mrt = ipmr_rt_fib_lookup(net, skb_rtable(skb));
+	if (IS_ERR(mrt))
 		goto drop;
-
 	if (__pim_rcv(mrt, skb, sizeof(*pim))) {
 drop:
 		kfree_skb(skb);
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index 994a1f2..67bf709 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -38,7 +38,8 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
 		fl.oif = skb->sk ? skb->sk->sk_bound_dev_if : 0;
 		fl.mark = skb->mark;
 		fl.flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : 0;
-		if (ip_route_output_key(net, &rt, &fl) != 0)
+		rt = ip_route_output_key(net, &fl);
+		if (IS_ERR(rt))
 			return -1;
 
 		/* Drop old route. */
@@ -48,7 +49,8 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
 		/* non-local src, find valid iif to satisfy
 		 * rp-filter when calling ip_route_input. */
 		fl.fl4_dst = iph->saddr;
-		if (ip_route_output_key(net, &rt, &fl) != 0)
+		rt = ip_route_output_key(net, &fl);
+		if (IS_ERR(rt))
 			return -1;
 
 		orefdst = skb->_skb_refdst;
@@ -69,7 +71,8 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
 	    xfrm_decode_session(skb, &fl, AF_INET) == 0) {
 		struct dst_entry *dst = skb_dst(skb);
 		skb_dst_set(skb, NULL);
-		if (xfrm_lookup(net, &dst, &fl, skb->sk, 0))
+		dst = xfrm_lookup(net, dst, &fl, skb->sk, 0);
+		if (IS_ERR(dst))
 			return -1;
 		skb_dst_set(skb, dst);
 	}
@@ -102,7 +105,8 @@ int ip_xfrm_me_harder(struct sk_buff *skb)
 		dst = ((struct xfrm_dst *)dst)->route;
 	dst_hold(dst);
 
-	if (xfrm_lookup(dev_net(dst->dev), &dst, &fl, skb->sk, 0) < 0)
+	dst = xfrm_lookup(dev_net(dst->dev), dst, &fl, skb->sk, 0);
+	if (IS_ERR(dst))
 		return -1;
 
 	skb_dst_drop(skb);
@@ -219,7 +223,11 @@ static __sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook,
 
 static int nf_ip_route(struct dst_entry **dst, struct flowi *fl)
 {
-	return ip_route_output_key(&init_net, (struct rtable **)dst, fl);
+	struct rtable *rt = ip_route_output_key(&init_net, fl);
+	if (IS_ERR(rt))
+		return PTR_ERR(rt);
+	*dst = &rt->dst;
+	return 0;
 }
 
 static const struct nf_afinfo nf_ip_afinfo = {
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index babd1a2..f926a31 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -206,8 +206,9 @@ config IP_NF_TARGET_REDIRECT
 
 config NF_NAT_SNMP_BASIC
 	tristate "Basic SNMP-ALG support"
-	depends on NF_NAT
+	depends on NF_CONNTRACK_SNMP && NF_NAT
 	depends on NETFILTER_ADVANCED
+	default NF_NAT && NF_CONNTRACK_SNMP
 	---help---
 
 	  This module implements an Application Layer Gateway (ALG) for
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index e855fff..e95054c 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -866,6 +866,7 @@ static int compat_table_info(const struct xt_table_info *info,
 	memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
 	newinfo->initial_entries = 0;
 	loc_cpu_entry = info->entries[raw_smp_processor_id()];
+	xt_compat_init_offsets(NFPROTO_ARP, info->number);
 	xt_entry_foreach(iter, loc_cpu_entry, info->size) {
 		ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
 		if (ret != 0)
@@ -1333,6 +1334,7 @@ static int translate_compat_table(const char *name,
 	duprintf("translate_compat_table: size %u\n", info->size);
 	j = 0;
 	xt_compat_lock(NFPROTO_ARP);
+	xt_compat_init_offsets(NFPROTO_ARP, number);
 	/* Walk through entries, checking offsets. */
 	xt_entry_foreach(iter0, entry0, total_size) {
 		ret = check_compat_entry_size_and_hooks(iter0, info, &size,
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 652efea..ef7d7b9 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -1063,6 +1063,7 @@ static int compat_table_info(const struct xt_table_info *info,
 	memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
 	newinfo->initial_entries = 0;
 	loc_cpu_entry = info->entries[raw_smp_processor_id()];
+	xt_compat_init_offsets(AF_INET, info->number);
 	xt_entry_foreach(iter, loc_cpu_entry, info->size) {
 		ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
 		if (ret != 0)
@@ -1664,6 +1665,7 @@ translate_compat_table(struct net *net,
 	duprintf("translate_compat_table: size %u\n", info->size);
 	j = 0;
 	xt_compat_lock(AF_INET);
+	xt_compat_init_offsets(AF_INET, number);
 	/* Walk through entries, checking offsets. */
 	xt_entry_foreach(iter0, entry0, total_size) {
 		ret = check_compat_entry_size_and_hooks(iter0, info, &size,
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 1e26a48..403ca57 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -300,13 +300,8 @@ clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par)
 	 * that the ->target() function isn't called after ->destroy() */
 
 	ct = nf_ct_get(skb, &ctinfo);
-	if (ct == NULL) {
-		pr_info("no conntrack!\n");
-			/* FIXME: need to drop invalid ones, since replies
-			 * to outgoing connections of other nodes will be
-			 * marked as INVALID */
+	if (ct == NULL)
 		return NF_DROP;
-	}
 
 	/* special case: ICMP error handling. conntrack distinguishes between
 	 * error messages (RELATED) and information requests (see below) */
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
index 72ffc8f..d76d6c9 100644
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -442,8 +442,7 @@ ipt_log_packet(u_int8_t pf,
 	}
 #endif
 
-	/* MAC logging for input path only. */
-	if (in && !out)
+	if (in != NULL)
 		dump_mac_header(m, loginfo, skb);
 
 	dump_packet(m, loginfo, skb, 0);
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index 294a2a3..aef5d1f 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -60,7 +60,7 @@ ipt_mangle_out(struct sk_buff *skb, const struct net_device *out)
 	ret = ipt_do_table(skb, NF_INET_LOCAL_OUT, NULL, out,
 			   dev_net(out)->ipv4.iptable_mangle);
 	/* Reroute for ANY change. */
-	if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE) {
+	if (ret != NF_DROP && ret != NF_STOLEN) {
 		iph = ip_hdr(skb);
 
 		if (iph->saddr != saddr ||
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
index 63f60fc..5585980 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
@@ -20,6 +20,7 @@
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_expect.h>
 #include <net/netfilter/nf_conntrack_acct.h>
+#include <linux/rculist_nulls.h>
 
 struct ct_iter_state {
 	struct seq_net_private p;
@@ -35,7 +36,8 @@ static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
 	for (st->bucket = 0;
 	     st->bucket < net->ct.htable_size;
 	     st->bucket++) {
-		n = rcu_dereference(net->ct.hash[st->bucket].first);
+		n = rcu_dereference(
+			hlist_nulls_first_rcu(&net->ct.hash[st->bucket]));
 		if (!is_a_nulls(n))
 			return n;
 	}
@@ -48,13 +50,14 @@ static struct hlist_nulls_node *ct_get_next(struct seq_file *seq,
 	struct net *net = seq_file_net(seq);
 	struct ct_iter_state *st = seq->private;
 
-	head = rcu_dereference(head->next);
+	head = rcu_dereference(hlist_nulls_next_rcu(head));
 	while (is_a_nulls(head)) {
 		if (likely(get_nulls_value(head) == st->bucket)) {
 			if (++st->bucket >= net->ct.htable_size)
 				return NULL;
 		}
-		head = rcu_dereference(net->ct.hash[st->bucket].first);
+		head = rcu_dereference(
+			hlist_nulls_first_rcu(&net->ct.hash[st->bucket]));
 	}
 	return head;
 }
@@ -217,7 +220,8 @@ static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
 	struct hlist_node *n;
 
 	for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
-		n = rcu_dereference(net->ct.expect_hash[st->bucket].first);
+		n = rcu_dereference(
+			hlist_first_rcu(&net->ct.expect_hash[st->bucket]));
 		if (n)
 			return n;
 	}
@@ -230,11 +234,12 @@ static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
 	struct net *net = seq_file_net(seq);
 	struct ct_expect_iter_state *st = seq->private;
 
-	head = rcu_dereference(head->next);
+	head = rcu_dereference(hlist_next_rcu(head));
 	while (head == NULL) {
 		if (++st->bucket >= nf_ct_expect_hsize)
 			return NULL;
-		head = rcu_dereference(net->ct.expect_hash[st->bucket].first);
+		head = rcu_dereference(
+			hlist_first_rcu(&net->ct.expect_hash[st->bucket]));
 	}
 	return head;
 }
diff --git a/net/ipv4/netfilter/nf_nat_amanda.c b/net/ipv4/netfilter/nf_nat_amanda.c
index 0f23b3f..703f366f 100644
--- a/net/ipv4/netfilter/nf_nat_amanda.c
+++ b/net/ipv4/netfilter/nf_nat_amanda.c
@@ -44,13 +44,13 @@ static unsigned int help(struct sk_buff *skb,
 
 	/* Try to get same port: if not, try to change it. */
 	for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) {
-		int ret;
+		int res;
 
 		exp->tuple.dst.u.tcp.port = htons(port);
-		ret = nf_ct_expect_related(exp);
-		if (ret == 0)
+		res = nf_ct_expect_related(exp);
+		if (res == 0)
 			break;
-		else if (ret != -EBUSY) {
+		else if (res != -EBUSY) {
 			port = 0;
 			break;
 		}
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index c04787c..21bcf47 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -221,7 +221,14 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
 	   manips not an issue.  */
 	if (maniptype == IP_NAT_MANIP_SRC &&
 	    !(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) {
-		if (find_appropriate_src(net, zone, orig_tuple, tuple, range)) {
+		/* try the original tuple first */
+		if (in_range(orig_tuple, range)) {
+			if (!nf_nat_used_tuple(orig_tuple, ct)) {
+				*tuple = *orig_tuple;
+				return;
+			}
+		} else if (find_appropriate_src(net, zone, orig_tuple, tuple,
+			   range)) {
 			pr_debug("get_unique_tuple: Found current src map\n");
 			if (!nf_nat_used_tuple(tuple, ct))
 				return;
@@ -266,7 +273,6 @@ nf_nat_setup_info(struct nf_conn *ct,
 	struct net *net = nf_ct_net(ct);
 	struct nf_conntrack_tuple curr_tuple, new_tuple;
 	struct nf_conn_nat *nat;
-	int have_to_hash = !(ct->status & IPS_NAT_DONE_MASK);
 
 	/* nat helper or nfctnetlink also setup binding */
 	nat = nfct_nat(ct);
@@ -306,8 +312,7 @@ nf_nat_setup_info(struct nf_conn *ct,
 			ct->status |= IPS_DST_NAT;
 	}
 
-	/* Place in source hash if this is the first time. */
-	if (have_to_hash) {
+	if (maniptype == IP_NAT_MANIP_SRC) {
 		unsigned int srchash;
 
 		srchash = hash_by_src(net, nf_ct_zone(ct),
@@ -323,9 +328,9 @@ nf_nat_setup_info(struct nf_conn *ct,
 
 	/* It's done. */
 	if (maniptype == IP_NAT_MANIP_DST)
-		set_bit(IPS_DST_NAT_DONE_BIT, &ct->status);
+		ct->status |= IPS_DST_NAT_DONE;
 	else
-		set_bit(IPS_SRC_NAT_DONE_BIT, &ct->status);
+		ct->status |= IPS_SRC_NAT_DONE;
 
 	return NF_ACCEPT;
 }
@@ -502,7 +507,10 @@ int nf_nat_protocol_register(const struct nf_nat_protocol *proto)
 	int ret = 0;
 
 	spin_lock_bh(&nf_nat_lock);
-	if (nf_nat_protos[proto->protonum] != &nf_nat_unknown_protocol) {
+	if (rcu_dereference_protected(
+			nf_nat_protos[proto->protonum],
+			lockdep_is_held(&nf_nat_lock)
+			) != &nf_nat_unknown_protocol) {
 		ret = -EBUSY;
 		goto out;
 	}
@@ -532,7 +540,7 @@ static void nf_nat_cleanup_conntrack(struct nf_conn *ct)
 	if (nat == NULL || nat->ct == NULL)
 		return;
 
-	NF_CT_ASSERT(nat->ct->status & IPS_NAT_DONE_MASK);
+	NF_CT_ASSERT(nat->ct->status & IPS_SRC_NAT_DONE);
 
 	spin_lock_bh(&nf_nat_lock);
 	hlist_del_rcu(&nat->bysource);
@@ -545,11 +553,10 @@ static void nf_nat_move_storage(void *new, void *old)
 	struct nf_conn_nat *old_nat = old;
 	struct nf_conn *ct = old_nat->ct;
 
-	if (!ct || !(ct->status & IPS_NAT_DONE_MASK))
+	if (!ct || !(ct->status & IPS_SRC_NAT_DONE))
 		return;
 
 	spin_lock_bh(&nf_nat_lock);
-	new_nat->ct = ct;
 	hlist_replace_rcu(&old_nat->bysource, &new_nat->bysource);
 	spin_unlock_bh(&nf_nat_lock);
 }
@@ -679,8 +686,7 @@ static int __net_init nf_nat_net_init(struct net *net)
 {
 	/* Leave them the same for the moment. */
 	net->ipv4.nat_htable_size = net->ct.htable_size;
-	net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&net->ipv4.nat_htable_size,
-						       &net->ipv4.nat_vmalloced, 0);
+	net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&net->ipv4.nat_htable_size, 0);
 	if (!net->ipv4.nat_bysource)
 		return -ENOMEM;
 	return 0;
@@ -702,8 +708,7 @@ static void __net_exit nf_nat_net_exit(struct net *net)
 {
 	nf_ct_iterate_cleanup(net, &clean_nat, NULL);
 	synchronize_rcu();
-	nf_ct_free_hashtable(net->ipv4.nat_bysource, net->ipv4.nat_vmalloced,
-			     net->ipv4.nat_htable_size);
+	nf_ct_free_hashtable(net->ipv4.nat_bysource, net->ipv4.nat_htable_size);
 }
 
 static struct pernet_operations nf_nat_net_ops = {
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index ee5f419..8812a02 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -54,6 +54,7 @@
 #include <net/netfilter/nf_conntrack_expect.h>
 #include <net/netfilter/nf_conntrack_helper.h>
 #include <net/netfilter/nf_nat_helper.h>
+#include <linux/netfilter/nf_conntrack_snmp.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
@@ -1310,9 +1311,9 @@ static int __init nf_nat_snmp_basic_init(void)
 {
 	int ret = 0;
 
-	ret = nf_conntrack_helper_register(&snmp_helper);
-	if (ret < 0)
-		return ret;
+	BUG_ON(nf_nat_snmp_hook != NULL);
+	rcu_assign_pointer(nf_nat_snmp_hook, help);
+
 	ret = nf_conntrack_helper_register(&snmp_trap_helper);
 	if (ret < 0) {
 		nf_conntrack_helper_unregister(&snmp_helper);
@@ -1323,7 +1324,7 @@ static int __init nf_nat_snmp_basic_init(void)
 
 static void __exit nf_nat_snmp_basic_fini(void)
 {
-	nf_conntrack_helper_unregister(&snmp_helper);
+	rcu_assign_pointer(nf_nat_snmp_hook, NULL);
 	nf_conntrack_helper_unregister(&snmp_trap_helper);
 }
 
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 6390ba2..467d570 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -555,7 +555,8 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 				    .fl4_tos = tos,
 				    .proto = inet->hdrincl ? IPPROTO_RAW :
 							     sk->sk_protocol,
-				  };
+				    .flags = FLOWI_FLAG_CAN_SLEEP,
+		};
 		if (!inet->hdrincl) {
 			err = raw_probe_proto_opt(&fl, msg);
 			if (err)
@@ -563,10 +564,12 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 		}
 
 		security_sk_classify_flow(sk, &fl);
-		err = ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 1);
+		rt = ip_route_output_flow(sock_net(sk), &fl, sk);
+		if (IS_ERR(rt)) {
+			err = PTR_ERR(rt);
+			goto done;
+		}
 	}
-	if (err)
-		goto done;
 
 	err = -EACCES;
 	if (rt->rt_flags & RTCF_BROADCAST && !sock_flag(sk, SOCK_BROADCAST))
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 6ed6603..ac32d8f 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -131,9 +131,6 @@ static int ip_rt_min_pmtu __read_mostly		= 512 + 20 + 20;
 static int ip_rt_min_advmss __read_mostly	= 256;
 static int rt_chain_length_max __read_mostly	= 20;
 
-static struct delayed_work expires_work;
-static unsigned long expires_ljiffies;
-
 /*
  *	Interface to generic destination cache.
  */
@@ -152,6 +149,41 @@ static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
 {
 }
 
+static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old)
+{
+	struct rtable *rt = (struct rtable *) dst;
+	struct inet_peer *peer;
+	u32 *p = NULL;
+
+	if (!rt->peer)
+		rt_bind_peer(rt, 1);
+
+	peer = rt->peer;
+	if (peer) {
+		u32 *old_p = __DST_METRICS_PTR(old);
+		unsigned long prev, new;
+
+		p = peer->metrics;
+		if (inet_metrics_new(peer))
+			memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
+
+		new = (unsigned long) p;
+		prev = cmpxchg(&dst->_metrics, old, new);
+
+		if (prev != old) {
+			p = __DST_METRICS_PTR(prev);
+			if (prev & DST_METRICS_READ_ONLY)
+				p = NULL;
+		} else {
+			if (rt->fi) {
+				fib_info_put(rt->fi);
+				rt->fi = NULL;
+			}
+		}
+	}
+	return p;
+}
+
 static struct dst_ops ipv4_dst_ops = {
 	.family =		AF_INET,
 	.protocol =		cpu_to_be16(ETH_P_IP),
@@ -159,6 +191,7 @@ static struct dst_ops ipv4_dst_ops = {
 	.check =		ipv4_dst_check,
 	.default_advmss =	ipv4_default_advmss,
 	.default_mtu =		ipv4_default_mtu,
+	.cow_metrics =		ipv4_cow_metrics,
 	.destroy =		ipv4_dst_destroy,
 	.ifdown =		ipv4_dst_ifdown,
 	.negative_advice =	ipv4_negative_advice,
@@ -391,7 +424,7 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v)
 			dst_metric(&r->dst, RTAX_WINDOW),
 			(int)((dst_metric(&r->dst, RTAX_RTT) >> 3) +
 			      dst_metric(&r->dst, RTAX_RTTVAR)),
-			r->fl.fl4_tos,
+			r->rt_tos,
 			r->dst.hh ? atomic_read(&r->dst.hh->hh_refcnt) : -1,
 			r->dst.hh ? (r->dst.hh->hh_output ==
 				       dev_queue_xmit) : 0,
@@ -514,7 +547,7 @@ static const struct file_operations rt_cpu_seq_fops = {
 	.release = seq_release,
 };
 
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
 static int rt_acct_proc_show(struct seq_file *m, void *v)
 {
 	struct ip_rt_acct *dst, *src;
@@ -567,14 +600,14 @@ static int __net_init ip_rt_do_proc_init(struct net *net)
 	if (!pde)
 		goto err2;
 
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
 	pde = proc_create("rt_acct", 0, net->proc_net, &rt_acct_proc_fops);
 	if (!pde)
 		goto err3;
 #endif
 	return 0;
 
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
 err3:
 	remove_proc_entry("rt_cache", net->proc_net_stat);
 #endif
@@ -588,7 +621,7 @@ static void __net_exit ip_rt_do_proc_exit(struct net *net)
 {
 	remove_proc_entry("rt_cache", net->proc_net_stat);
 	remove_proc_entry("rt_cache", net->proc_net);
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
 	remove_proc_entry("rt_acct", net->proc_net);
 #endif
 }
@@ -632,7 +665,7 @@ static inline int rt_fast_clean(struct rtable *rth)
 static inline int rt_valuable(struct rtable *rth)
 {
 	return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) ||
-		rth->dst.expires;
+		(rth->peer && rth->peer->pmtu_expires);
 }
 
 static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long tmo2)
@@ -643,13 +676,7 @@ static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long t
 	if (atomic_read(&rth->dst.__refcnt))
 		goto out;
 
-	ret = 1;
-	if (rth->dst.expires &&
-	    time_after_eq(jiffies, rth->dst.expires))
-		goto out;
-
 	age = jiffies - rth->dst.lastuse;
-	ret = 0;
 	if ((age <= tmo1 && !rt_fast_clean(rth)) ||
 	    (age <= tmo2 && rt_valuable(rth)))
 		goto out;
@@ -684,22 +711,22 @@ static inline bool rt_caching(const struct net *net)
 		net->ipv4.sysctl_rt_cache_rebuild_count;
 }
 
-static inline bool compare_hash_inputs(const struct flowi *fl1,
-					const struct flowi *fl2)
+static inline bool compare_hash_inputs(const struct rtable *rt1,
+				       const struct rtable *rt2)
 {
-	return ((((__force u32)fl1->fl4_dst ^ (__force u32)fl2->fl4_dst) |
-		((__force u32)fl1->fl4_src ^ (__force u32)fl2->fl4_src) |
-		(fl1->iif ^ fl2->iif)) == 0);
+	return ((((__force u32)rt1->rt_key_dst ^ (__force u32)rt2->rt_key_dst) |
+		((__force u32)rt1->rt_key_src ^ (__force u32)rt2->rt_key_src) |
+		(rt1->rt_iif ^ rt2->rt_iif)) == 0);
 }
 
-static inline int compare_keys(struct flowi *fl1, struct flowi *fl2)
+static inline int compare_keys(struct rtable *rt1, struct rtable *rt2)
 {
-	return (((__force u32)fl1->fl4_dst ^ (__force u32)fl2->fl4_dst) |
-		((__force u32)fl1->fl4_src ^ (__force u32)fl2->fl4_src) |
-		(fl1->mark ^ fl2->mark) |
-		(*(u16 *)&fl1->fl4_tos ^ *(u16 *)&fl2->fl4_tos) |
-		(fl1->oif ^ fl2->oif) |
-		(fl1->iif ^ fl2->iif)) == 0;
+	return (((__force u32)rt1->rt_key_dst ^ (__force u32)rt2->rt_key_dst) |
+		((__force u32)rt1->rt_key_src ^ (__force u32)rt2->rt_key_src) |
+		(rt1->rt_mark ^ rt2->rt_mark) |
+		(rt1->rt_tos ^ rt2->rt_tos) |
+		(rt1->rt_oif ^ rt2->rt_oif) |
+		(rt1->rt_iif ^ rt2->rt_iif)) == 0;
 }
 
 static inline int compare_netns(struct rtable *rt1, struct rtable *rt2)
@@ -786,104 +813,13 @@ static int has_noalias(const struct rtable *head, const struct rtable *rth)
 	const struct rtable *aux = head;
 
 	while (aux != rth) {
-		if (compare_hash_inputs(&aux->fl, &rth->fl))
+		if (compare_hash_inputs(aux, rth))
 			return 0;
 		aux = rcu_dereference_protected(aux->dst.rt_next, 1);
 	}
 	return ONE;
 }
 
-static void rt_check_expire(void)
-{
-	static unsigned int rover;
-	unsigned int i = rover, goal;
-	struct rtable *rth;
-	struct rtable __rcu **rthp;
-	unsigned long samples = 0;
-	unsigned long sum = 0, sum2 = 0;
-	unsigned long delta;
-	u64 mult;
-
-	delta = jiffies - expires_ljiffies;
-	expires_ljiffies = jiffies;
-	mult = ((u64)delta) << rt_hash_log;
-	if (ip_rt_gc_timeout > 1)
-		do_div(mult, ip_rt_gc_timeout);
-	goal = (unsigned int)mult;
-	if (goal > rt_hash_mask)
-		goal = rt_hash_mask + 1;
-	for (; goal > 0; goal--) {
-		unsigned long tmo = ip_rt_gc_timeout;
-		unsigned long length;
-
-		i = (i + 1) & rt_hash_mask;
-		rthp = &rt_hash_table[i].chain;
-
-		if (need_resched())
-			cond_resched();
-
-		samples++;
-
-		if (rcu_dereference_raw(*rthp) == NULL)
-			continue;
-		length = 0;
-		spin_lock_bh(rt_hash_lock_addr(i));
-		while ((rth = rcu_dereference_protected(*rthp,
-					lockdep_is_held(rt_hash_lock_addr(i)))) != NULL) {
-			prefetch(rth->dst.rt_next);
-			if (rt_is_expired(rth)) {
-				*rthp = rth->dst.rt_next;
-				rt_free(rth);
-				continue;
-			}
-			if (rth->dst.expires) {
-				/* Entry is expired even if it is in use */
-				if (time_before_eq(jiffies, rth->dst.expires)) {
-nofree:
-					tmo >>= 1;
-					rthp = &rth->dst.rt_next;
-					/*
-					 * We only count entries on
-					 * a chain with equal hash inputs once
-					 * so that entries for different QOS
-					 * levels, and other non-hash input
-					 * attributes don't unfairly skew
-					 * the length computation
-					 */
-					length += has_noalias(rt_hash_table[i].chain, rth);
-					continue;
-				}
-			} else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout))
-				goto nofree;
-
-			/* Cleanup aged off entries. */
-			*rthp = rth->dst.rt_next;
-			rt_free(rth);
-		}
-		spin_unlock_bh(rt_hash_lock_addr(i));
-		sum += length;
-		sum2 += length*length;
-	}
-	if (samples) {
-		unsigned long avg = sum / samples;
-		unsigned long sd = int_sqrt(sum2 / samples - avg*avg);
-		rt_chain_length_max = max_t(unsigned long,
-					ip_rt_gc_elasticity,
-					(avg + 4*sd) >> FRACT_BITS);
-	}
-	rover = i;
-}
-
-/*
- * rt_worker_func() is run in process context.
- * we call rt_check_expire() to scan part of the hash table
- */
-static void rt_worker_func(struct work_struct *work)
-{
-	rt_check_expire();
-	schedule_delayed_work(&expires_work, ip_rt_gc_interval);
-}
-
 /*
  * Pertubation of rt_genid by a small quantity [1..256]
  * Using 8 bits of shuffling ensure we can call rt_cache_invalidate()
@@ -1078,8 +1014,8 @@ static int slow_chain_length(const struct rtable *head)
 	return length >> FRACT_BITS;
 }
 
-static int rt_intern_hash(unsigned hash, struct rtable *rt,
-			  struct rtable **rp, struct sk_buff *skb, int ifindex)
+static struct rtable *rt_intern_hash(unsigned hash, struct rtable *rt,
+				     struct sk_buff *skb, int ifindex)
 {
 	struct rtable	*rth, *cand;
 	struct rtable __rcu **rthp, **candp;
@@ -1120,7 +1056,7 @@ restart:
 					printk(KERN_WARNING
 					    "Neighbour table failure & not caching routes.\n");
 				ip_rt_put(rt);
-				return err;
+				return ERR_PTR(err);
 			}
 		}
 
@@ -1137,7 +1073,7 @@ restart:
 			rt_free(rth);
 			continue;
 		}
-		if (compare_keys(&rth->fl, &rt->fl) && compare_netns(rth, rt)) {
+		if (compare_keys(rth, rt) && compare_netns(rth, rt)) {
 			/* Put it first */
 			*rthp = rth->dst.rt_next;
 			/*
@@ -1157,11 +1093,9 @@ restart:
 			spin_unlock_bh(rt_hash_lock_addr(hash));
 
 			rt_drop(rt);
-			if (rp)
-				*rp = rth;
-			else
+			if (skb)
 				skb_dst_set(skb, &rth->dst);
-			return 0;
+			return rth;
 		}
 
 		if (!atomic_read(&rth->dst.__refcnt)) {
@@ -1202,7 +1136,7 @@ restart:
 			rt_emergency_hash_rebuild(net);
 			spin_unlock_bh(rt_hash_lock_addr(hash));
 
-			hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src,
+			hash = rt_hash(rt->rt_key_dst, rt->rt_key_src,
 					ifindex, rt_genid(net));
 			goto restart;
 		}
@@ -1218,7 +1152,7 @@ restart:
 
 			if (err != -ENOBUFS) {
 				rt_drop(rt);
-				return err;
+				return ERR_PTR(err);
 			}
 
 			/* Neighbour tables are full and nothing
@@ -1239,7 +1173,7 @@ restart:
 			if (net_ratelimit())
 				printk(KERN_WARNING "ipv4: Neighbour table overflow.\n");
 			rt_drop(rt);
-			return -ENOBUFS;
+			return ERR_PTR(-ENOBUFS);
 		}
 	}
 
@@ -1265,11 +1199,16 @@ restart:
 	spin_unlock_bh(rt_hash_lock_addr(hash));
 
 skip_hashing:
-	if (rp)
-		*rp = rt;
-	else
+	if (skb)
 		skb_dst_set(skb, &rt->dst);
-	return 0;
+	return rt;
+}
+
+static atomic_t __rt_peer_genid = ATOMIC_INIT(0);
+
+static u32 rt_peer_genid(void)
+{
+	return atomic_read(&__rt_peer_genid);
 }
 
 void rt_bind_peer(struct rtable *rt, int create)
@@ -1280,6 +1219,8 @@ void rt_bind_peer(struct rtable *rt, int create)
 
 	if (peer && cmpxchg(&rt->peer, NULL, peer) != NULL)
 		inet_putpeer(peer);
+	else
+		rt->rt_peer_genid = rt_peer_genid();
 }
 
 /*
@@ -1349,13 +1290,8 @@ static void rt_del(unsigned hash, struct rtable *rt)
 void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
 		    __be32 saddr, struct net_device *dev)
 {
-	int i, k;
 	struct in_device *in_dev = __in_dev_get_rcu(dev);
-	struct rtable *rth;
-	struct rtable __rcu **rthp;
-	__be32  skeys[2] = { saddr, 0 };
-	int  ikeys[2] = { dev->ifindex, 0 };
-	struct netevent_redirect netevent;
+	struct inet_peer *peer;
 	struct net *net;
 
 	if (!in_dev)
@@ -1367,9 +1303,6 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
 	    ipv4_is_zeronet(new_gw))
 		goto reject_redirect;
 
-	if (!rt_caching(net))
-		goto reject_redirect;
-
 	if (!IN_DEV_SHARED_MEDIA(in_dev)) {
 		if (!inet_addr_onlink(in_dev, new_gw, old_gw))
 			goto reject_redirect;
@@ -1380,91 +1313,13 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
 			goto reject_redirect;
 	}
 
-	for (i = 0; i < 2; i++) {
-		for (k = 0; k < 2; k++) {
-			unsigned hash = rt_hash(daddr, skeys[i], ikeys[k],
-						rt_genid(net));
-
-			rthp = &rt_hash_table[hash].chain;
-
-			while ((rth = rcu_dereference(*rthp)) != NULL) {
-				struct rtable *rt;
-
-				if (rth->fl.fl4_dst != daddr ||
-				    rth->fl.fl4_src != skeys[i] ||
-				    rth->fl.oif != ikeys[k] ||
-				    rt_is_input_route(rth) ||
-				    rt_is_expired(rth) ||
-				    !net_eq(dev_net(rth->dst.dev), net)) {
-					rthp = &rth->dst.rt_next;
-					continue;
-				}
-
-				if (rth->rt_dst != daddr ||
-				    rth->rt_src != saddr ||
-				    rth->dst.error ||
-				    rth->rt_gateway != old_gw ||
-				    rth->dst.dev != dev)
-					break;
-
-				dst_hold(&rth->dst);
-
-				rt = dst_alloc(&ipv4_dst_ops);
-				if (rt == NULL) {
-					ip_rt_put(rth);
-					return;
-				}
-
-				/* Copy all the information. */
-				*rt = *rth;
-				rt->dst.__use		= 1;
-				atomic_set(&rt->dst.__refcnt, 1);
-				rt->dst.child		= NULL;
-				if (rt->dst.dev)
-					dev_hold(rt->dst.dev);
-				rt->dst.obsolete	= -1;
-				rt->dst.lastuse	= jiffies;
-				rt->dst.path		= &rt->dst;
-				rt->dst.neighbour	= NULL;
-				rt->dst.hh		= NULL;
-#ifdef CONFIG_XFRM
-				rt->dst.xfrm		= NULL;
-#endif
-				rt->rt_genid		= rt_genid(net);
-				rt->rt_flags		|= RTCF_REDIRECTED;
-
-				/* Gateway is different ... */
-				rt->rt_gateway		= new_gw;
-
-				/* Redirect received -> path was valid */
-				dst_confirm(&rth->dst);
-
-				if (rt->peer)
-					atomic_inc(&rt->peer->refcnt);
-
-				if (arp_bind_neighbour(&rt->dst) ||
-				    !(rt->dst.neighbour->nud_state &
-					    NUD_VALID)) {
-					if (rt->dst.neighbour)
-						neigh_event_send(rt->dst.neighbour, NULL);
-					ip_rt_put(rth);
-					rt_drop(rt);
-					goto do_next;
-				}
+	peer = inet_getpeer_v4(daddr, 1);
+	if (peer) {
+		peer->redirect_learned.a4 = new_gw;
 
-				netevent.old = &rth->dst;
-				netevent.new = &rt->dst;
-				call_netevent_notifiers(NETEVENT_REDIRECT,
-							&netevent);
+		inet_putpeer(peer);
 
-				rt_del(hash, rth);
-				if (!rt_intern_hash(hash, rt, &rt, NULL, rt->fl.oif))
-					ip_rt_put(rt);
-				goto do_next;
-			}
-		do_next:
-			;
-		}
+		atomic_inc(&__rt_peer_genid);
 	}
 	return;
 
@@ -1488,18 +1343,24 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
 		if (dst->obsolete > 0) {
 			ip_rt_put(rt);
 			ret = NULL;
-		} else if ((rt->rt_flags & RTCF_REDIRECTED) ||
-			   (rt->dst.expires &&
-			    time_after_eq(jiffies, rt->dst.expires))) {
-			unsigned hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src,
-						rt->fl.oif,
+		} else if (rt->rt_flags & RTCF_REDIRECTED) {
+			unsigned hash = rt_hash(rt->rt_key_dst, rt->rt_key_src,
+						rt->rt_oif,
 						rt_genid(dev_net(dst->dev)));
 #if RT_CACHE_DEBUG >= 1
 			printk(KERN_DEBUG "ipv4_negative_advice: redirect to %pI4/%02x dropped\n",
-				&rt->rt_dst, rt->fl.fl4_tos);
+				&rt->rt_dst, rt->rt_tos);
 #endif
 			rt_del(hash, rt);
 			ret = NULL;
+		} else if (rt->peer &&
+			   rt->peer->pmtu_expires &&
+			   time_after_eq(jiffies, rt->peer->pmtu_expires)) {
+			unsigned long orig = rt->peer->pmtu_expires;
+
+			if (cmpxchg(&rt->peer->pmtu_expires, orig, 0) == orig)
+				dst_metric_set(dst, RTAX_MTU,
+					       rt->peer->pmtu_orig);
 		}
 	}
 	return ret;
@@ -1525,6 +1386,7 @@ void ip_rt_send_redirect(struct sk_buff *skb)
 {
 	struct rtable *rt = skb_rtable(skb);
 	struct in_device *in_dev;
+	struct inet_peer *peer;
 	int log_martians;
 
 	rcu_read_lock();
@@ -1536,33 +1398,41 @@ void ip_rt_send_redirect(struct sk_buff *skb)
 	log_martians = IN_DEV_LOG_MARTIANS(in_dev);
 	rcu_read_unlock();
 
+	if (!rt->peer)
+		rt_bind_peer(rt, 1);
+	peer = rt->peer;
+	if (!peer) {
+		icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway);
+		return;
+	}
+
 	/* No redirected packets during ip_rt_redirect_silence;
 	 * reset the algorithm.
 	 */
-	if (time_after(jiffies, rt->dst.rate_last + ip_rt_redirect_silence))
-		rt->dst.rate_tokens = 0;
+	if (time_after(jiffies, peer->rate_last + ip_rt_redirect_silence))
+		peer->rate_tokens = 0;
 
 	/* Too many ignored redirects; do not send anything
 	 * set dst.rate_last to the last seen redirected packet.
 	 */
-	if (rt->dst.rate_tokens >= ip_rt_redirect_number) {
-		rt->dst.rate_last = jiffies;
+	if (peer->rate_tokens >= ip_rt_redirect_number) {
+		peer->rate_last = jiffies;
 		return;
 	}
 
 	/* Check for load limit; set rate_last to the latest sent
 	 * redirect.
 	 */
-	if (rt->dst.rate_tokens == 0 ||
+	if (peer->rate_tokens == 0 ||
 	    time_after(jiffies,
-		       (rt->dst.rate_last +
-			(ip_rt_redirect_load << rt->dst.rate_tokens)))) {
+		       (peer->rate_last +
+			(ip_rt_redirect_load << peer->rate_tokens)))) {
 		icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway);
-		rt->dst.rate_last = jiffies;
-		++rt->dst.rate_tokens;
+		peer->rate_last = jiffies;
+		++peer->rate_tokens;
 #ifdef CONFIG_IP_ROUTE_VERBOSE
 		if (log_martians &&
-		    rt->dst.rate_tokens == ip_rt_redirect_number &&
+		    peer->rate_tokens == ip_rt_redirect_number &&
 		    net_ratelimit())
 			printk(KERN_WARNING "host %pI4/if%d ignores redirects for %pI4 to %pI4.\n",
 				&rt->rt_src, rt->rt_iif,
@@ -1574,7 +1444,9 @@ void ip_rt_send_redirect(struct sk_buff *skb)
 static int ip_error(struct sk_buff *skb)
 {
 	struct rtable *rt = skb_rtable(skb);
+	struct inet_peer *peer;
 	unsigned long now;
+	bool send;
 	int code;
 
 	switch (rt->dst.error) {
@@ -1594,15 +1466,24 @@ static int ip_error(struct sk_buff *skb)
 			break;
 	}
 
-	now = jiffies;
-	rt->dst.rate_tokens += now - rt->dst.rate_last;
-	if (rt->dst.rate_tokens > ip_rt_error_burst)
-		rt->dst.rate_tokens = ip_rt_error_burst;
-	rt->dst.rate_last = now;
-	if (rt->dst.rate_tokens >= ip_rt_error_cost) {
-		rt->dst.rate_tokens -= ip_rt_error_cost;
-		icmp_send(skb, ICMP_DEST_UNREACH, code, 0);
+	if (!rt->peer)
+		rt_bind_peer(rt, 1);
+	peer = rt->peer;
+
+	send = true;
+	if (peer) {
+		now = jiffies;
+		peer->rate_tokens += now - peer->rate_last;
+		if (peer->rate_tokens > ip_rt_error_burst)
+			peer->rate_tokens = ip_rt_error_burst;
+		peer->rate_last = now;
+		if (peer->rate_tokens >= ip_rt_error_cost)
+			peer->rate_tokens -= ip_rt_error_cost;
+		else
+			send = false;
 	}
+	if (send)
+		icmp_send(skb, ICMP_DEST_UNREACH, code, 0);
 
 out:	kfree_skb(skb);
 	return 0;
@@ -1630,88 +1511,130 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph,
 				 unsigned short new_mtu,
 				 struct net_device *dev)
 {
-	int i, k;
 	unsigned short old_mtu = ntohs(iph->tot_len);
-	struct rtable *rth;
-	int  ikeys[2] = { dev->ifindex, 0 };
-	__be32  skeys[2] = { iph->saddr, 0, };
-	__be32  daddr = iph->daddr;
 	unsigned short est_mtu = 0;
+	struct inet_peer *peer;
 
-	for (k = 0; k < 2; k++) {
-		for (i = 0; i < 2; i++) {
-			unsigned hash = rt_hash(daddr, skeys[i], ikeys[k],
-						rt_genid(net));
-
-			rcu_read_lock();
-			for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
-			     rth = rcu_dereference(rth->dst.rt_next)) {
-				unsigned short mtu = new_mtu;
-
-				if (rth->fl.fl4_dst != daddr ||
-				    rth->fl.fl4_src != skeys[i] ||
-				    rth->rt_dst != daddr ||
-				    rth->rt_src != iph->saddr ||
-				    rth->fl.oif != ikeys[k] ||
-				    rt_is_input_route(rth) ||
-				    dst_metric_locked(&rth->dst, RTAX_MTU) ||
-				    !net_eq(dev_net(rth->dst.dev), net) ||
-				    rt_is_expired(rth))
-					continue;
-
-				if (new_mtu < 68 || new_mtu >= old_mtu) {
+	peer = inet_getpeer_v4(iph->daddr, 1);
+	if (peer) {
+		unsigned short mtu = new_mtu;
 
-					/* BSD 4.2 compatibility hack :-( */
-					if (mtu == 0 &&
-					    old_mtu >= dst_mtu(&rth->dst) &&
-					    old_mtu >= 68 + (iph->ihl << 2))
-						old_mtu -= iph->ihl << 2;
+		if (new_mtu < 68 || new_mtu >= old_mtu) {
+			/* BSD 4.2 derived systems incorrectly adjust
+			 * tot_len by the IP header length, and report
+			 * a zero MTU in the ICMP message.
+			 */
+			if (mtu == 0 &&
+			    old_mtu >= 68 + (iph->ihl << 2))
+				old_mtu -= iph->ihl << 2;
+			mtu = guess_mtu(old_mtu);
+		}
 
-					mtu = guess_mtu(old_mtu);
-				}
-				if (mtu <= dst_mtu(&rth->dst)) {
-					if (mtu < dst_mtu(&rth->dst)) {
-						dst_confirm(&rth->dst);
-						if (mtu < ip_rt_min_pmtu) {
-							u32 lock = dst_metric(&rth->dst,
-									      RTAX_LOCK);
-							mtu = ip_rt_min_pmtu;
-							lock |= (1 << RTAX_MTU);
-							dst_metric_set(&rth->dst, RTAX_LOCK,
-								       lock);
-						}
-						dst_metric_set(&rth->dst, RTAX_MTU, mtu);
-						dst_set_expires(&rth->dst,
-							ip_rt_mtu_expires);
-					}
-					est_mtu = mtu;
-				}
-			}
-			rcu_read_unlock();
+		if (mtu < ip_rt_min_pmtu)
+			mtu = ip_rt_min_pmtu;
+		if (!peer->pmtu_expires || mtu < peer->pmtu_learned) {
+			est_mtu = mtu;
+			peer->pmtu_learned = mtu;
+			peer->pmtu_expires = jiffies + ip_rt_mtu_expires;
 		}
+
+		inet_putpeer(peer);
+
+		atomic_inc(&__rt_peer_genid);
 	}
 	return est_mtu ? : new_mtu;
 }
 
+static void check_peer_pmtu(struct dst_entry *dst, struct inet_peer *peer)
+{
+	unsigned long expires = peer->pmtu_expires;
+
+	if (time_before(expires, jiffies)) {
+		u32 orig_dst_mtu = dst_mtu(dst);
+		if (peer->pmtu_learned < orig_dst_mtu) {
+			if (!peer->pmtu_orig)
+				peer->pmtu_orig = dst_metric_raw(dst, RTAX_MTU);
+			dst_metric_set(dst, RTAX_MTU, peer->pmtu_learned);
+		}
+	} else if (cmpxchg(&peer->pmtu_expires, expires, 0) == expires)
+		dst_metric_set(dst, RTAX_MTU, peer->pmtu_orig);
+}
+
 static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
 {
-	if (dst_mtu(dst) > mtu && mtu >= 68 &&
-	    !(dst_metric_locked(dst, RTAX_MTU))) {
-		if (mtu < ip_rt_min_pmtu) {
-			u32 lock = dst_metric(dst, RTAX_LOCK);
+	struct rtable *rt = (struct rtable *) dst;
+	struct inet_peer *peer;
+
+	dst_confirm(dst);
+
+	if (!rt->peer)
+		rt_bind_peer(rt, 1);
+	peer = rt->peer;
+	if (peer) {
+		if (mtu < ip_rt_min_pmtu)
 			mtu = ip_rt_min_pmtu;
-			dst_metric_set(dst, RTAX_LOCK, lock | (1 << RTAX_MTU));
+		if (!peer->pmtu_expires || mtu < peer->pmtu_learned) {
+			peer->pmtu_learned = mtu;
+			peer->pmtu_expires = jiffies + ip_rt_mtu_expires;
+
+			atomic_inc(&__rt_peer_genid);
+			rt->rt_peer_genid = rt_peer_genid();
+
+			check_peer_pmtu(dst, peer);
 		}
-		dst_metric_set(dst, RTAX_MTU, mtu);
-		dst_set_expires(dst, ip_rt_mtu_expires);
-		call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
+		inet_putpeer(peer);
+	}
+}
+
+static int check_peer_redir(struct dst_entry *dst, struct inet_peer *peer)
+{
+	struct rtable *rt = (struct rtable *) dst;
+	__be32 orig_gw = rt->rt_gateway;
+
+	dst_confirm(&rt->dst);
+
+	neigh_release(rt->dst.neighbour);
+	rt->dst.neighbour = NULL;
+
+	rt->rt_gateway = peer->redirect_learned.a4;
+	if (arp_bind_neighbour(&rt->dst) ||
+	    !(rt->dst.neighbour->nud_state & NUD_VALID)) {
+		if (rt->dst.neighbour)
+			neigh_event_send(rt->dst.neighbour, NULL);
+		rt->rt_gateway = orig_gw;
+		return -EAGAIN;
+	} else {
+		rt->rt_flags |= RTCF_REDIRECTED;
+		call_netevent_notifiers(NETEVENT_NEIGH_UPDATE,
+					rt->dst.neighbour);
 	}
+	return 0;
 }
 
 static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
 {
-	if (rt_is_expired((struct rtable *)dst))
+	struct rtable *rt = (struct rtable *) dst;
+
+	if (rt_is_expired(rt))
 		return NULL;
+	if (rt->rt_peer_genid != rt_peer_genid()) {
+		struct inet_peer *peer;
+
+		if (!rt->peer)
+			rt_bind_peer(rt, 0);
+
+		peer = rt->peer;
+		if (peer && peer->pmtu_expires)
+			check_peer_pmtu(dst, peer);
+
+		if (peer && peer->redirect_learned.a4 &&
+		    peer->redirect_learned.a4 != rt->rt_gateway) {
+			if (check_peer_redir(dst, peer))
+				return NULL;
+		}
+
+		rt->rt_peer_genid = rt_peer_genid();
+	}
 	return dst;
 }
 
@@ -1720,6 +1643,10 @@ static void ipv4_dst_destroy(struct dst_entry *dst)
 	struct rtable *rt = (struct rtable *) dst;
 	struct inet_peer *peer = rt->peer;
 
+	if (rt->fi) {
+		fib_info_put(rt->fi);
+		rt->fi = NULL;
+	}
 	if (peer) {
 		rt->peer = NULL;
 		inet_putpeer(peer);
@@ -1734,8 +1661,14 @@ static void ipv4_link_failure(struct sk_buff *skb)
 	icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
 
 	rt = skb_rtable(skb);
-	if (rt)
-		dst_set_expires(&rt->dst, 0);
+	if (rt &&
+	    rt->peer &&
+	    rt->peer->pmtu_expires) {
+		unsigned long orig = rt->peer->pmtu_expires;
+
+		if (cmpxchg(&rt->peer->pmtu_expires, orig, 0) == orig)
+			dst_metric_set(&rt->dst, RTAX_MTU, rt->peer->pmtu_orig);
+	}
 }
 
 static int ip_rt_bug(struct sk_buff *skb)
@@ -1764,8 +1697,17 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt)
 	if (rt_is_output_route(rt))
 		src = rt->rt_src;
 	else {
+		struct flowi fl = {
+			.fl4_dst = rt->rt_key_dst,
+			.fl4_src = rt->rt_key_src,
+			.fl4_tos = rt->rt_tos,
+			.oif = rt->rt_oif,
+			.iif = rt->rt_iif,
+			.mark = rt->rt_mark,
+		};
+
 		rcu_read_lock();
-		if (fib_lookup(dev_net(rt->dst.dev), &rt->fl, &res) == 0)
+		if (fib_lookup(dev_net(rt->dst.dev), &fl, &res) == 0)
 			src = FIB_RES_PREFSRC(res);
 		else
 			src = inet_select_addr(rt->dst.dev, rt->rt_gateway,
@@ -1775,7 +1717,7 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt)
 	memcpy(addr, &src, 4);
 }
 
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
 static void set_class_tag(struct rtable *rt, u32 tag)
 {
 	if (!(rt->dst.tclassid & 0xFFFF))
@@ -1815,17 +1757,54 @@ static unsigned int ipv4_default_mtu(const struct dst_entry *dst)
 	return mtu;
 }
 
-static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag)
+static void rt_init_metrics(struct rtable *rt, const struct flowi *oldflp,
+			    struct fib_info *fi)
+{
+	struct inet_peer *peer;
+	int create = 0;
+
+	/* If a peer entry exists for this destination, we must hook
+	 * it up in order to get at cached metrics.
+	 */
+	if (oldflp && (oldflp->flags & FLOWI_FLAG_PRECOW_METRICS))
+		create = 1;
+
+	rt->peer = peer = inet_getpeer_v4(rt->rt_dst, create);
+	if (peer) {
+		rt->rt_peer_genid = rt_peer_genid();
+		if (inet_metrics_new(peer))
+			memcpy(peer->metrics, fi->fib_metrics,
+			       sizeof(u32) * RTAX_MAX);
+		dst_init_metrics(&rt->dst, peer->metrics, false);
+
+		if (peer->pmtu_expires)
+			check_peer_pmtu(&rt->dst, peer);
+		if (peer->redirect_learned.a4 &&
+		    peer->redirect_learned.a4 != rt->rt_gateway) {
+			rt->rt_gateway = peer->redirect_learned.a4;
+			rt->rt_flags |= RTCF_REDIRECTED;
+		}
+	} else {
+		if (fi->fib_metrics != (u32 *) dst_default_metrics) {
+			rt->fi = fi;
+			atomic_inc(&fi->fib_clntref);
+		}
+		dst_init_metrics(&rt->dst, fi->fib_metrics, true);
+	}
+}
+
+static void rt_set_nexthop(struct rtable *rt, const struct flowi *oldflp,
+			   const struct fib_result *res,
+			   struct fib_info *fi, u16 type, u32 itag)
 {
 	struct dst_entry *dst = &rt->dst;
-	struct fib_info *fi = res->fi;
 
 	if (fi) {
 		if (FIB_RES_GW(*res) &&
 		    FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
 			rt->rt_gateway = FIB_RES_GW(*res);
-		dst_import_metrics(dst, fi->fib_metrics);
-#ifdef CONFIG_NET_CLS_ROUTE
+		rt_init_metrics(rt, oldflp, fi);
+#ifdef CONFIG_IP_ROUTE_CLASSID
 		dst->tclassid = FIB_RES_NH(*res).nh_tclassid;
 #endif
 	}
@@ -1835,13 +1814,26 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag)
 	if (dst_metric_raw(dst, RTAX_ADVMSS) > 65535 - 40)
 		dst_metric_set(dst, RTAX_ADVMSS, 65535 - 40);
 
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
 #ifdef CONFIG_IP_MULTIPLE_TABLES
 	set_class_tag(rt, fib_rules_tclass(res));
 #endif
 	set_class_tag(rt, itag);
 #endif
-	rt->rt_type = res->type;
+	rt->rt_type = type;
+}
+
+static struct rtable *rt_dst_alloc(bool nopolicy, bool noxfrm)
+{
+	struct rtable *rt = dst_alloc(&ipv4_dst_ops, 1);
+	if (rt) {
+		rt->dst.obsolete = -1;
+
+		rt->dst.flags = DST_HOST |
+			(nopolicy ? DST_NOPOLICY : 0) |
+			(noxfrm ? DST_NOXFRM : 0);
+	}
+	return rt;
 }
 
 /* called in rcu_read_lock() section */
@@ -1874,31 +1866,25 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 		if (err < 0)
 			goto e_err;
 	}
-	rth = dst_alloc(&ipv4_dst_ops);
+	rth = rt_dst_alloc(IN_DEV_CONF_GET(in_dev, NOPOLICY), false);
 	if (!rth)
 		goto e_nobufs;
 
 	rth->dst.output = ip_rt_bug;
-	rth->dst.obsolete = -1;
 
-	atomic_set(&rth->dst.__refcnt, 1);
-	rth->dst.flags= DST_HOST;
-	if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
-		rth->dst.flags |= DST_NOPOLICY;
-	rth->fl.fl4_dst	= daddr;
+	rth->rt_key_dst	= daddr;
 	rth->rt_dst	= daddr;
-	rth->fl.fl4_tos	= tos;
-	rth->fl.mark    = skb->mark;
-	rth->fl.fl4_src	= saddr;
+	rth->rt_tos	= tos;
+	rth->rt_mark    = skb->mark;
+	rth->rt_key_src	= saddr;
 	rth->rt_src	= saddr;
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
 	rth->dst.tclassid = itag;
 #endif
-	rth->rt_iif	=
-	rth->fl.iif	= dev->ifindex;
+	rth->rt_iif	= dev->ifindex;
 	rth->dst.dev	= init_net.loopback_dev;
 	dev_hold(rth->dst.dev);
-	rth->fl.oif	= 0;
+	rth->rt_oif	= 0;
 	rth->rt_gateway	= daddr;
 	rth->rt_spec_dst= spec_dst;
 	rth->rt_genid	= rt_genid(dev_net(dev));
@@ -1916,7 +1902,10 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	RT_CACHE_STAT_INC(in_slow_mc);
 
 	hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev)));
-	return rt_intern_hash(hash, rth, NULL, skb, dev->ifindex);
+	rth = rt_intern_hash(hash, rth, skb, dev->ifindex);
+	err = 0;
+	if (IS_ERR(rth))
+		err = PTR_ERR(rth);
 
 e_nobufs:
 	return -ENOBUFS;
@@ -1959,7 +1948,7 @@ static void ip_handle_martian_source(struct net_device *dev,
 
 /* called in rcu_read_lock() section */
 static int __mkroute_input(struct sk_buff *skb,
-			   struct fib_result *res,
+			   const struct fib_result *res,
 			   struct in_device *in_dev,
 			   __be32 daddr, __be32 saddr, u32 tos,
 			   struct rtable **result)
@@ -2013,39 +2002,31 @@ static int __mkroute_input(struct sk_buff *skb,
 		}
 	}
 
-
-	rth = dst_alloc(&ipv4_dst_ops);
+	rth = rt_dst_alloc(IN_DEV_CONF_GET(in_dev, NOPOLICY),
+			   IN_DEV_CONF_GET(out_dev, NOXFRM));
 	if (!rth) {
 		err = -ENOBUFS;
 		goto cleanup;
 	}
 
-	atomic_set(&rth->dst.__refcnt, 1);
-	rth->dst.flags= DST_HOST;
-	if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
-		rth->dst.flags |= DST_NOPOLICY;
-	if (IN_DEV_CONF_GET(out_dev, NOXFRM))
-		rth->dst.flags |= DST_NOXFRM;
-	rth->fl.fl4_dst	= daddr;
+	rth->rt_key_dst	= daddr;
 	rth->rt_dst	= daddr;
-	rth->fl.fl4_tos	= tos;
-	rth->fl.mark    = skb->mark;
-	rth->fl.fl4_src	= saddr;
+	rth->rt_tos	= tos;
+	rth->rt_mark    = skb->mark;
+	rth->rt_key_src	= saddr;
 	rth->rt_src	= saddr;
 	rth->rt_gateway	= daddr;
-	rth->rt_iif 	=
-		rth->fl.iif	= in_dev->dev->ifindex;
+	rth->rt_iif 	= in_dev->dev->ifindex;
 	rth->dst.dev	= (out_dev)->dev;
 	dev_hold(rth->dst.dev);
-	rth->fl.oif 	= 0;
+	rth->rt_oif 	= 0;
 	rth->rt_spec_dst= spec_dst;
 
-	rth->dst.obsolete = -1;
 	rth->dst.input = ip_forward;
 	rth->dst.output = ip_output;
 	rth->rt_genid = rt_genid(dev_net(rth->dst.dev));
 
-	rt_set_nexthop(rth, res, itag);
+	rt_set_nexthop(rth, NULL, res, res->fi, res->type, itag);
 
 	rth->rt_flags = flags;
 
@@ -2078,7 +2059,10 @@ static int ip_mkroute_input(struct sk_buff *skb,
 	/* put it into the cache */
 	hash = rt_hash(daddr, saddr, fl->iif,
 		       rt_genid(dev_net(rth->dst.dev)));
-	return rt_intern_hash(hash, rth, NULL, skb, fl->iif);
+	rth = rt_intern_hash(hash, rth, skb, fl->iif);
+	if (IS_ERR(rth))
+		return PTR_ERR(rth);
+	return 0;
 }
 
 /*
@@ -2097,12 +2081,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 {
 	struct fib_result res;
 	struct in_device *in_dev = __in_dev_get_rcu(dev);
-	struct flowi fl = { .fl4_dst	= daddr,
-			    .fl4_src	= saddr,
-			    .fl4_tos	= tos,
-			    .fl4_scope	= RT_SCOPE_UNIVERSE,
-			    .mark = skb->mark,
-			    .iif = dev->ifindex };
+	struct flowi fl;
 	unsigned	flags = 0;
 	u32		itag = 0;
 	struct rtable * rth;
@@ -2139,6 +2118,13 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	/*
 	 *	Now we are ready to route packet.
 	 */
+	fl.oif = 0;
+	fl.iif = dev->ifindex;
+	fl.mark = skb->mark;
+	fl.fl4_dst = daddr;
+	fl.fl4_src = saddr;
+	fl.fl4_tos = tos;
+	fl.fl4_scope = RT_SCOPE_UNIVERSE;
 	err = fib_lookup(net, &fl, &res);
 	if (err != 0) {
 		if (!IN_DEV_FORWARD(in_dev))
@@ -2190,29 +2176,23 @@ brd_input:
 	RT_CACHE_STAT_INC(in_brd);
 
 local_input:
-	rth = dst_alloc(&ipv4_dst_ops);
+	rth = rt_dst_alloc(IN_DEV_CONF_GET(in_dev, NOPOLICY), false);
 	if (!rth)
 		goto e_nobufs;
 
 	rth->dst.output= ip_rt_bug;
-	rth->dst.obsolete = -1;
 	rth->rt_genid = rt_genid(net);
 
-	atomic_set(&rth->dst.__refcnt, 1);
-	rth->dst.flags= DST_HOST;
-	if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
-		rth->dst.flags |= DST_NOPOLICY;
-	rth->fl.fl4_dst	= daddr;
+	rth->rt_key_dst	= daddr;
 	rth->rt_dst	= daddr;
-	rth->fl.fl4_tos	= tos;
-	rth->fl.mark    = skb->mark;
-	rth->fl.fl4_src	= saddr;
+	rth->rt_tos	= tos;
+	rth->rt_mark    = skb->mark;
+	rth->rt_key_src	= saddr;
 	rth->rt_src	= saddr;
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
 	rth->dst.tclassid = itag;
 #endif
-	rth->rt_iif	=
-	rth->fl.iif	= dev->ifindex;
+	rth->rt_iif	= dev->ifindex;
 	rth->dst.dev	= net->loopback_dev;
 	dev_hold(rth->dst.dev);
 	rth->rt_gateway	= daddr;
@@ -2226,7 +2206,10 @@ local_input:
 	}
 	rth->rt_type	= res.type;
 	hash = rt_hash(daddr, saddr, fl.iif, rt_genid(net));
-	err = rt_intern_hash(hash, rth, NULL, skb, fl.iif);
+	rth = rt_intern_hash(hash, rth, skb, fl.iif);
+	err = 0;
+	if (IS_ERR(rth))
+		err = PTR_ERR(rth);
 	goto out;
 
 no_route:
@@ -2288,12 +2271,12 @@ int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 
 	for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
 	     rth = rcu_dereference(rth->dst.rt_next)) {
-		if ((((__force u32)rth->fl.fl4_dst ^ (__force u32)daddr) |
-		     ((__force u32)rth->fl.fl4_src ^ (__force u32)saddr) |
-		     (rth->fl.iif ^ iif) |
-		     rth->fl.oif |
-		     (rth->fl.fl4_tos ^ tos)) == 0 &&
-		    rth->fl.mark == skb->mark &&
+		if ((((__force u32)rth->rt_key_dst ^ (__force u32)daddr) |
+		     ((__force u32)rth->rt_key_src ^ (__force u32)saddr) |
+		     (rth->rt_iif ^ iif) |
+		     rth->rt_oif |
+		     (rth->rt_tos ^ tos)) == 0 &&
+		    rth->rt_mark == skb->mark &&
 		    net_eq(dev_net(rth->dst.dev), net) &&
 		    !rt_is_expired(rth)) {
 			if (noref) {
@@ -2351,38 +2334,39 @@ skip_cache:
 EXPORT_SYMBOL(ip_route_input_common);
 
 /* called with rcu_read_lock() */
-static int __mkroute_output(struct rtable **result,
-			    struct fib_result *res,
-			    const struct flowi *fl,
-			    const struct flowi *oldflp,
-			    struct net_device *dev_out,
-			    unsigned flags)
+static struct rtable *__mkroute_output(const struct fib_result *res,
+				       const struct flowi *fl,
+				       const struct flowi *oldflp,
+				       struct net_device *dev_out,
+				       unsigned int flags)
 {
-	struct rtable *rth;
-	struct in_device *in_dev;
+	struct fib_info *fi = res->fi;
 	u32 tos = RT_FL_TOS(oldflp);
+	struct in_device *in_dev;
+	u16 type = res->type;
+	struct rtable *rth;
 
 	if (ipv4_is_loopback(fl->fl4_src) && !(dev_out->flags & IFF_LOOPBACK))
-		return -EINVAL;
+		return ERR_PTR(-EINVAL);
 
 	if (ipv4_is_lbcast(fl->fl4_dst))
-		res->type = RTN_BROADCAST;
+		type = RTN_BROADCAST;
 	else if (ipv4_is_multicast(fl->fl4_dst))
-		res->type = RTN_MULTICAST;
+		type = RTN_MULTICAST;
 	else if (ipv4_is_zeronet(fl->fl4_dst))
-		return -EINVAL;
+		return ERR_PTR(-EINVAL);
 
 	if (dev_out->flags & IFF_LOOPBACK)
 		flags |= RTCF_LOCAL;
 
 	in_dev = __in_dev_get_rcu(dev_out);
 	if (!in_dev)
-		return -EINVAL;
+		return ERR_PTR(-EINVAL);
 
-	if (res->type == RTN_BROADCAST) {
+	if (type == RTN_BROADCAST) {
 		flags |= RTCF_BROADCAST | RTCF_LOCAL;
-		res->fi = NULL;
-	} else if (res->type == RTN_MULTICAST) {
+		fi = NULL;
+	} else if (type == RTN_MULTICAST) {
 		flags |= RTCF_MULTICAST | RTCF_LOCAL;
 		if (!ip_check_mc(in_dev, oldflp->fl4_dst, oldflp->fl4_src,
 				 oldflp->proto))
@@ -2391,30 +2375,23 @@ static int __mkroute_output(struct rtable **result,
 		 * default one, but do not gateway in this case.
 		 * Yes, it is hack.
 		 */
-		if (res->fi && res->prefixlen < 4)
-			res->fi = NULL;
+		if (fi && res->prefixlen < 4)
+			fi = NULL;
 	}
 
-
-	rth = dst_alloc(&ipv4_dst_ops);
+	rth = rt_dst_alloc(IN_DEV_CONF_GET(in_dev, NOPOLICY),
+			   IN_DEV_CONF_GET(in_dev, NOXFRM));
 	if (!rth)
-		return -ENOBUFS;
-
-	atomic_set(&rth->dst.__refcnt, 1);
-	rth->dst.flags= DST_HOST;
-	if (IN_DEV_CONF_GET(in_dev, NOXFRM))
-		rth->dst.flags |= DST_NOXFRM;
-	if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
-		rth->dst.flags |= DST_NOPOLICY;
-
-	rth->fl.fl4_dst	= oldflp->fl4_dst;
-	rth->fl.fl4_tos	= tos;
-	rth->fl.fl4_src	= oldflp->fl4_src;
-	rth->fl.oif	= oldflp->oif;
-	rth->fl.mark    = oldflp->mark;
+		return ERR_PTR(-ENOBUFS);
+
+	rth->rt_key_dst	= oldflp->fl4_dst;
+	rth->rt_tos	= tos;
+	rth->rt_key_src	= oldflp->fl4_src;
+	rth->rt_oif	= oldflp->oif;
+	rth->rt_mark    = oldflp->mark;
 	rth->rt_dst	= fl->fl4_dst;
 	rth->rt_src	= fl->fl4_src;
-	rth->rt_iif	= oldflp->oif ? : dev_out->ifindex;
+	rth->rt_iif	= 0;
 	/* get references to the devices that are to be hold by the routing
 	   cache entry */
 	rth->dst.dev	= dev_out;
@@ -2423,7 +2400,6 @@ static int __mkroute_output(struct rtable **result,
 	rth->rt_spec_dst= fl->fl4_src;
 
 	rth->dst.output=ip_output;
-	rth->dst.obsolete = -1;
 	rth->rt_genid = rt_genid(dev_net(dev_out));
 
 	RT_CACHE_STAT_INC(out_slow_tot);
@@ -2440,7 +2416,7 @@ static int __mkroute_output(struct rtable **result,
 			RT_CACHE_STAT_INC(out_slow_mc);
 		}
 #ifdef CONFIG_IP_MROUTE
-		if (res->type == RTN_MULTICAST) {
+		if (type == RTN_MULTICAST) {
 			if (IN_DEV_MFORWARD(in_dev) &&
 			    !ipv4_is_local_multicast(oldflp->fl4_dst)) {
 				rth->dst.input = ip_mr_input;
@@ -2450,31 +2426,10 @@ static int __mkroute_output(struct rtable **result,
 #endif
 	}
 
-	rt_set_nexthop(rth, res, 0);
+	rt_set_nexthop(rth, oldflp, res, fi, type, 0);
 
 	rth->rt_flags = flags;
-	*result = rth;
-	return 0;
-}
-
-/* called with rcu_read_lock() */
-static int ip_mkroute_output(struct rtable **rp,
-			     struct fib_result *res,
-			     const struct flowi *fl,
-			     const struct flowi *oldflp,
-			     struct net_device *dev_out,
-			     unsigned flags)
-{
-	struct rtable *rth = NULL;
-	int err = __mkroute_output(&rth, res, fl, oldflp, dev_out, flags);
-	unsigned hash;
-	if (err == 0) {
-		hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, oldflp->oif,
-			       rt_genid(dev_net(dev_out)));
-		err = rt_intern_hash(hash, rth, rp, NULL, oldflp->oif);
-	}
-
-	return err;
+	return rth;
 }
 
 /*
@@ -2482,31 +2437,33 @@ static int ip_mkroute_output(struct rtable **rp,
  * called with rcu_read_lock();
  */
 
-static int ip_route_output_slow(struct net *net, struct rtable **rp,
-				const struct flowi *oldflp)
+static struct rtable *ip_route_output_slow(struct net *net,
+					   const struct flowi *oldflp)
 {
 	u32 tos	= RT_FL_TOS(oldflp);
-	struct flowi fl = { .fl4_dst = oldflp->fl4_dst,
-			    .fl4_src = oldflp->fl4_src,
-			    .fl4_tos = tos & IPTOS_RT_MASK,
-			    .fl4_scope = ((tos & RTO_ONLINK) ?
-					  RT_SCOPE_LINK : RT_SCOPE_UNIVERSE),
-			    .mark = oldflp->mark,
-			    .iif = net->loopback_dev->ifindex,
-			    .oif = oldflp->oif };
+	struct flowi fl;
 	struct fib_result res;
 	unsigned int flags = 0;
 	struct net_device *dev_out = NULL;
-	int err;
-
+	struct rtable *rth;
 
 	res.fi		= NULL;
 #ifdef CONFIG_IP_MULTIPLE_TABLES
 	res.r		= NULL;
 #endif
 
+	fl.oif = oldflp->oif;
+	fl.iif = net->loopback_dev->ifindex;
+	fl.mark = oldflp->mark;
+	fl.fl4_dst = oldflp->fl4_dst;
+	fl.fl4_src = oldflp->fl4_src;
+	fl.fl4_tos = tos & IPTOS_RT_MASK;
+	fl.fl4_scope = ((tos & RTO_ONLINK) ?
+			RT_SCOPE_LINK : RT_SCOPE_UNIVERSE);
+
+	rcu_read_lock();
 	if (oldflp->fl4_src) {
-		err = -EINVAL;
+		rth = ERR_PTR(-EINVAL);
 		if (ipv4_is_multicast(oldflp->fl4_src) ||
 		    ipv4_is_lbcast(oldflp->fl4_src) ||
 		    ipv4_is_zeronet(oldflp->fl4_src))
@@ -2557,13 +2514,13 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
 
 	if (oldflp->oif) {
 		dev_out = dev_get_by_index_rcu(net, oldflp->oif);
-		err = -ENODEV;
+		rth = ERR_PTR(-ENODEV);
 		if (dev_out == NULL)
 			goto out;
 
 		/* RACE: Check return value of inet_select_addr instead. */
 		if (!(dev_out->flags & IFF_UP) || !__in_dev_get_rcu(dev_out)) {
-			err = -ENETUNREACH;
+			rth = ERR_PTR(-ENETUNREACH);
 			goto out;
 		}
 		if (ipv4_is_local_multicast(oldflp->fl4_dst) ||
@@ -2621,7 +2578,7 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
 			res.type = RTN_UNICAST;
 			goto make_route;
 		}
-		err = -ENETUNREACH;
+		rth = ERR_PTR(-ENETUNREACH);
 		goto out;
 	}
 
@@ -2645,7 +2602,7 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
 	else
 #endif
 	if (!res.prefixlen && res.type == RTN_UNICAST && !fl.oif)
-		fib_select_default(net, &fl, &res);
+		fib_select_default(&res);
 
 	if (!fl.fl4_src)
 		fl.fl4_src = FIB_RES_PREFSRC(res);
@@ -2655,17 +2612,24 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
 
 
 make_route:
-	err = ip_mkroute_output(rp, &res, &fl, oldflp, dev_out, flags);
+	rth = __mkroute_output(&res, &fl, oldflp, dev_out, flags);
+	if (!IS_ERR(rth)) {
+		unsigned int hash;
 
-out:	return err;
+		hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, oldflp->oif,
+			       rt_genid(dev_net(dev_out)));
+		rth = rt_intern_hash(hash, rth, NULL, oldflp->oif);
+	}
+
+out:
+	rcu_read_unlock();
+	return rth;
 }
 
-int __ip_route_output_key(struct net *net, struct rtable **rp,
-			  const struct flowi *flp)
+struct rtable *__ip_route_output_key(struct net *net, const struct flowi *flp)
 {
-	unsigned int hash;
-	int res;
 	struct rtable *rth;
+	unsigned int hash;
 
 	if (!rt_caching(net))
 		goto slow_output;
@@ -2675,30 +2639,26 @@ int __ip_route_output_key(struct net *net, struct rtable **rp,
 	rcu_read_lock_bh();
 	for (rth = rcu_dereference_bh(rt_hash_table[hash].chain); rth;
 		rth = rcu_dereference_bh(rth->dst.rt_next)) {
-		if (rth->fl.fl4_dst == flp->fl4_dst &&
-		    rth->fl.fl4_src == flp->fl4_src &&
+		if (rth->rt_key_dst == flp->fl4_dst &&
+		    rth->rt_key_src == flp->fl4_src &&
 		    rt_is_output_route(rth) &&
-		    rth->fl.oif == flp->oif &&
-		    rth->fl.mark == flp->mark &&
-		    !((rth->fl.fl4_tos ^ flp->fl4_tos) &
+		    rth->rt_oif == flp->oif &&
+		    rth->rt_mark == flp->mark &&
+		    !((rth->rt_tos ^ flp->fl4_tos) &
 			    (IPTOS_RT_MASK | RTO_ONLINK)) &&
 		    net_eq(dev_net(rth->dst.dev), net) &&
 		    !rt_is_expired(rth)) {
 			dst_use(&rth->dst, jiffies);
 			RT_CACHE_STAT_INC(out_hit);
 			rcu_read_unlock_bh();
-			*rp = rth;
-			return 0;
+			return rth;
 		}
 		RT_CACHE_STAT_INC(out_hlist_search);
 	}
 	rcu_read_unlock_bh();
 
 slow_output:
-	rcu_read_lock();
-	res = ip_route_output_slow(net, rp, flp);
-	rcu_read_unlock();
-	return res;
+	return ip_route_output_slow(net, flp);
 }
 EXPORT_SYMBOL_GPL(__ip_route_output_key);
 
@@ -2726,17 +2686,14 @@ static struct dst_ops ipv4_dst_blackhole_ops = {
 	.update_pmtu		=	ipv4_rt_blackhole_update_pmtu,
 };
 
-
-static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi *flp)
+struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig)
 {
-	struct rtable *ort = *rp;
-	struct rtable *rt = (struct rtable *)
-		dst_alloc(&ipv4_dst_blackhole_ops);
+	struct rtable *rt = dst_alloc(&ipv4_dst_blackhole_ops, 1);
+	struct rtable *ort = (struct rtable *) dst_orig;
 
 	if (rt) {
 		struct dst_entry *new = &rt->dst;
 
-		atomic_set(&new->__refcnt, 1);
 		new->__use = 1;
 		new->input = dst_discard;
 		new->output = dst_discard;
@@ -2746,7 +2703,12 @@ static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi
 		if (new->dev)
 			dev_hold(new->dev);
 
-		rt->fl = ort->fl;
+		rt->rt_key_dst = ort->rt_key_dst;
+		rt->rt_key_src = ort->rt_key_src;
+		rt->rt_tos = ort->rt_tos;
+		rt->rt_iif = ort->rt_iif;
+		rt->rt_oif = ort->rt_oif;
+		rt->rt_mark = ort->rt_mark;
 
 		rt->rt_genid = rt_genid(net);
 		rt->rt_flags = ort->rt_flags;
@@ -2759,46 +2721,38 @@ static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi
 		rt->peer = ort->peer;
 		if (rt->peer)
 			atomic_inc(&rt->peer->refcnt);
+		rt->fi = ort->fi;
+		if (rt->fi)
+			atomic_inc(&rt->fi->fib_clntref);
 
 		dst_free(new);
 	}
 
-	dst_release(&(*rp)->dst);
-	*rp = rt;
-	return rt ? 0 : -ENOMEM;
+	dst_release(dst_orig);
+
+	return rt ? &rt->dst : ERR_PTR(-ENOMEM);
 }
 
-int ip_route_output_flow(struct net *net, struct rtable **rp, struct flowi *flp,
-			 struct sock *sk, int flags)
+struct rtable *ip_route_output_flow(struct net *net, struct flowi *flp,
+				    struct sock *sk)
 {
-	int err;
+	struct rtable *rt = __ip_route_output_key(net, flp);
 
-	if ((err = __ip_route_output_key(net, rp, flp)) != 0)
-		return err;
+	if (IS_ERR(rt))
+		return rt;
 
 	if (flp->proto) {
 		if (!flp->fl4_src)
-			flp->fl4_src = (*rp)->rt_src;
+			flp->fl4_src = rt->rt_src;
 		if (!flp->fl4_dst)
-			flp->fl4_dst = (*rp)->rt_dst;
-		err = __xfrm_lookup(net, (struct dst_entry **)rp, flp, sk,
-				    flags ? XFRM_LOOKUP_WAIT : 0);
-		if (err == -EREMOTE)
-			err = ipv4_dst_blackhole(net, rp, flp);
-
-		return err;
+			flp->fl4_dst = rt->rt_dst;
+		rt = (struct rtable *) xfrm_lookup(net, &rt->dst, flp, sk, 0);
 	}
 
-	return 0;
+	return rt;
 }
 EXPORT_SYMBOL_GPL(ip_route_output_flow);
 
-int ip_route_output_key(struct net *net, struct rtable **rp, struct flowi *flp)
-{
-	return ip_route_output_flow(net, rp, flp, NULL, 0);
-}
-EXPORT_SYMBOL(ip_route_output_key);
-
 static int rt_fill_info(struct net *net,
 			struct sk_buff *skb, u32 pid, u32 seq, int event,
 			int nowait, unsigned int flags)
@@ -2817,7 +2771,7 @@ static int rt_fill_info(struct net *net,
 	r->rtm_family	 = AF_INET;
 	r->rtm_dst_len	= 32;
 	r->rtm_src_len	= 0;
-	r->rtm_tos	= rt->fl.fl4_tos;
+	r->rtm_tos	= rt->rt_tos;
 	r->rtm_table	= RT_TABLE_MAIN;
 	NLA_PUT_U32(skb, RTA_TABLE, RT_TABLE_MAIN);
 	r->rtm_type	= rt->rt_type;
@@ -2829,19 +2783,19 @@ static int rt_fill_info(struct net *net,
 
 	NLA_PUT_BE32(skb, RTA_DST, rt->rt_dst);
 
-	if (rt->fl.fl4_src) {
+	if (rt->rt_key_src) {
 		r->rtm_src_len = 32;
-		NLA_PUT_BE32(skb, RTA_SRC, rt->fl.fl4_src);
+		NLA_PUT_BE32(skb, RTA_SRC, rt->rt_key_src);
 	}
 	if (rt->dst.dev)
 		NLA_PUT_U32(skb, RTA_OIF, rt->dst.dev->ifindex);
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
 	if (rt->dst.tclassid)
 		NLA_PUT_U32(skb, RTA_FLOW, rt->dst.tclassid);
 #endif
 	if (rt_is_input_route(rt))
 		NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_spec_dst);
-	else if (rt->rt_src != rt->fl.fl4_src)
+	else if (rt->rt_src != rt->rt_key_src)
 		NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_src);
 
 	if (rt->rt_dst != rt->rt_gateway)
@@ -2850,11 +2804,12 @@ static int rt_fill_info(struct net *net,
 	if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
 		goto nla_put_failure;
 
-	if (rt->fl.mark)
-		NLA_PUT_BE32(skb, RTA_MARK, rt->fl.mark);
+	if (rt->rt_mark)
+		NLA_PUT_BE32(skb, RTA_MARK, rt->rt_mark);
 
 	error = rt->dst.error;
-	expires = rt->dst.expires ? rt->dst.expires - jiffies : 0;
+	expires = (rt->peer && rt->peer->pmtu_expires) ?
+		rt->peer->pmtu_expires - jiffies : 0;
 	if (rt->peer) {
 		inet_peer_refcheck(rt->peer);
 		id = atomic_read(&rt->peer->ip_id_count) & 0xffff;
@@ -2884,7 +2839,7 @@ static int rt_fill_info(struct net *net,
 			}
 		} else
 #endif
-			NLA_PUT_U32(skb, RTA_IIF, rt->fl.iif);
+			NLA_PUT_U32(skb, RTA_IIF, rt->rt_iif);
 	}
 
 	if (rtnl_put_cacheinfo(skb, &rt->dst, id, ts, tsage,
@@ -2965,7 +2920,11 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
 			.oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0,
 			.mark = mark,
 		};
-		err = ip_route_output_key(net, &rt, &fl);
+		rt = ip_route_output_key(net, &fl);
+
+		err = 0;
+		if (IS_ERR(rt))
+			err = PTR_ERR(rt);
 	}
 
 	if (err)
@@ -3256,9 +3215,9 @@ static __net_initdata struct pernet_operations rt_genid_ops = {
 };
 
 
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
 struct ip_rt_acct __percpu *ip_rt_acct __read_mostly;
-#endif /* CONFIG_NET_CLS_ROUTE */
+#endif /* CONFIG_IP_ROUTE_CLASSID */
 
 static __initdata unsigned long rhash_entries;
 static int __init set_rhash_entries(char *str)
@@ -3274,7 +3233,7 @@ int __init ip_rt_init(void)
 {
 	int rc = 0;
 
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
 	ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct));
 	if (!ip_rt_acct)
 		panic("IP: failed to allocate ip_rt_acct\n");
@@ -3311,14 +3270,6 @@ int __init ip_rt_init(void)
 	devinet_init();
 	ip_fib_init();
 
-	/* All the timers, started at system startup tend
-	   to synchronize. Perturb it a bit.
-	 */
-	INIT_DELAYED_WORK_DEFERRABLE(&expires_work, rt_worker_func);
-	expires_ljiffies = jiffies;
-	schedule_delayed_work(&expires_work,
-		net_random() % ip_rt_gc_interval + ip_rt_gc_interval);
-
 	if (ip_rt_proc_init())
 		printk(KERN_ERR "Unable to create route proc files\n");
 #ifdef CONFIG_XFRM
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 4751920..0ad6ddf 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -355,7 +355,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 				    .fl_ip_sport = th->dest,
 				    .fl_ip_dport = th->source };
 		security_req_classify_flow(req, &fl);
-		if (ip_route_output_key(sock_net(sk), &rt, &fl)) {
+		rt = ip_route_output_key(sock_net(sk), &fl);
+		if (IS_ERR(rt)) {
 			reqsk_free(req);
 			goto out;
 		}
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 6c11eec..b22d450 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -505,6 +505,15 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
 		else
 			answ = tp->write_seq - tp->snd_una;
 		break;
+	case SIOCOUTQNSD:
+		if (sk->sk_state == TCP_LISTEN)
+			return -EINVAL;
+
+		if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))
+			answ = 0;
+		else
+			answ = tp->write_seq - tp->snd_nxt;
+		break;
 	default:
 		return -ENOIOCTLCMD;
 	}
@@ -873,9 +882,7 @@ int tcp_sendpage(struct sock *sk, struct page *page, int offset,
 					flags);
 
 	lock_sock(sk);
-	TCP_CHECK_TIMER(sk);
 	res = do_tcp_sendpages(sk, &page, offset, size, flags);
-	TCP_CHECK_TIMER(sk);
 	release_sock(sk);
 	return res;
 }
@@ -916,7 +923,6 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 	long timeo;
 
 	lock_sock(sk);
-	TCP_CHECK_TIMER(sk);
 
 	flags = msg->msg_flags;
 	timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
@@ -1104,7 +1110,6 @@ wait_for_memory:
 out:
 	if (copied)
 		tcp_push(sk, flags, mss_now, tp->nonagle);
-	TCP_CHECK_TIMER(sk);
 	release_sock(sk);
 	return copied;
 
@@ -1123,7 +1128,6 @@ do_error:
 		goto out;
 out_err:
 	err = sk_stream_error(sk, flags, err);
-	TCP_CHECK_TIMER(sk);
 	release_sock(sk);
 	return err;
 }
@@ -1415,8 +1419,6 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 
 	lock_sock(sk);
 
-	TCP_CHECK_TIMER(sk);
-
 	err = -ENOTCONN;
 	if (sk->sk_state == TCP_LISTEN)
 		goto out;
@@ -1767,12 +1769,10 @@ skip_copy:
 	/* Clean up data we have read: This will do ACK frames. */
 	tcp_cleanup_rbuf(sk, copied);
 
-	TCP_CHECK_TIMER(sk);
 	release_sock(sk);
 	return copied;
 
 out:
-	TCP_CHECK_TIMER(sk);
 	release_sock(sk);
 	return err;
 
@@ -2653,7 +2653,7 @@ int compat_tcp_getsockopt(struct sock *sk, int level, int optname,
 EXPORT_SYMBOL(compat_tcp_getsockopt);
 #endif
 
-struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features)
+struct sk_buff *tcp_tso_segment(struct sk_buff *skb, u32 features)
 {
 	struct sk_buff *segs = ERR_PTR(-EINVAL);
 	struct tcphdr *th;
diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c
index 3b53fd1..6187eb4 100644
--- a/net/ipv4/tcp_bic.c
+++ b/net/ipv4/tcp_bic.c
@@ -209,7 +209,7 @@ static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt)
 }
 
 
-static struct tcp_congestion_ops bictcp = {
+static struct tcp_congestion_ops bictcp __read_mostly = {
 	.init		= bictcp_init,
 	.ssthresh	= bictcp_recalc_ssthresh,
 	.cong_avoid	= bictcp_cong_avoid,
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 71d5f2f..62f775c 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -405,7 +405,7 @@ static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us)
 		hystart_update(sk, delay);
 }
 
-static struct tcp_congestion_ops cubictcp = {
+static struct tcp_congestion_ops cubictcp __read_mostly = {
 	.init		= bictcp_init,
 	.ssthresh	= bictcp_recalc_ssthresh,
 	.cong_avoid	= bictcp_cong_avoid,
diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c
index 8b6caaf..30f27f6 100644
--- a/net/ipv4/tcp_highspeed.c
+++ b/net/ipv4/tcp_highspeed.c
@@ -158,7 +158,7 @@ static u32 hstcp_ssthresh(struct sock *sk)
 }
 
 
-static struct tcp_congestion_ops tcp_highspeed = {
+static struct tcp_congestion_ops tcp_highspeed __read_mostly = {
 	.init		= hstcp_init,
 	.ssthresh	= hstcp_ssthresh,
 	.cong_avoid	= hstcp_cong_avoid,
diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c
index 7c94a49..c1a8175 100644
--- a/net/ipv4/tcp_htcp.c
+++ b/net/ipv4/tcp_htcp.c
@@ -284,7 +284,7 @@ static void htcp_state(struct sock *sk, u8 new_state)
 	}
 }
 
-static struct tcp_congestion_ops htcp = {
+static struct tcp_congestion_ops htcp __read_mostly = {
 	.init		= htcp_init,
 	.ssthresh	= htcp_recalc_ssthresh,
 	.cong_avoid	= htcp_cong_avoid,
diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c
index 377bc93..fe3ecf4 100644
--- a/net/ipv4/tcp_hybla.c
+++ b/net/ipv4/tcp_hybla.c
@@ -162,7 +162,7 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
 	tp->snd_cwnd = min_t(u32, tp->snd_cwnd, tp->snd_cwnd_clamp);
 }
 
-static struct tcp_congestion_ops tcp_hybla = {
+static struct tcp_congestion_ops tcp_hybla __read_mostly = {
 	.init		= hybla_init,
 	.ssthresh	= tcp_reno_ssthresh,
 	.min_cwnd	= tcp_reno_min_cwnd,
diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c
index 00ca688d..813b43a 100644
--- a/net/ipv4/tcp_illinois.c
+++ b/net/ipv4/tcp_illinois.c
@@ -322,7 +322,7 @@ static void tcp_illinois_info(struct sock *sk, u32 ext,
 	}
 }
 
-static struct tcp_congestion_ops tcp_illinois = {
+static struct tcp_congestion_ops tcp_illinois __read_mostly = {
 	.flags		= TCP_CONG_RTT_STAMP,
 	.init		= tcp_illinois_init,
 	.ssthresh	= tcp_illinois_ssthresh,
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 65f6c04..08ea735 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -817,7 +817,7 @@ __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst)
 	__u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0);
 
 	if (!cwnd)
-		cwnd = rfc3390_bytes_to_packets(tp->mss_cache);
+		cwnd = TCP_INIT_CWND;
 	return min_t(__u32, cwnd, tp->snd_cwnd_clamp);
 }
 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 02f583b..f7e6c2c 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -149,9 +149,9 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	struct inet_sock *inet = inet_sk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
+	__be16 orig_sport, orig_dport;
 	struct rtable *rt;
 	__be32 daddr, nexthop;
-	int tmp;
 	int err;
 
 	if (addr_len < sizeof(struct sockaddr_in))
@@ -167,14 +167,17 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 		nexthop = inet->opt->faddr;
 	}
 
-	tmp = ip_route_connect(&rt, nexthop, inet->inet_saddr,
-			       RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
-			       IPPROTO_TCP,
-			       inet->inet_sport, usin->sin_port, sk, 1);
-	if (tmp < 0) {
-		if (tmp == -ENETUNREACH)
+	orig_sport = inet->inet_sport;
+	orig_dport = usin->sin_port;
+	rt = ip_route_connect(nexthop, inet->inet_saddr,
+			      RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
+			      IPPROTO_TCP,
+			      orig_sport, orig_dport, sk, true);
+	if (IS_ERR(rt)) {
+		err = PTR_ERR(rt);
+		if (err == -ENETUNREACH)
 			IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
-		return tmp;
+		return err;
 	}
 
 	if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
@@ -233,11 +236,14 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	if (err)
 		goto failure;
 
-	err = ip_route_newports(&rt, IPPROTO_TCP,
-				inet->inet_sport, inet->inet_dport, sk);
-	if (err)
+	rt = ip_route_newports(rt, IPPROTO_TCP,
+			       orig_sport, orig_dport,
+			       inet->inet_sport, inet->inet_dport, sk);
+	if (IS_ERR(rt)) {
+		err = PTR_ERR(rt);
+		rt = NULL;
 		goto failure;
-
+	}
 	/* OK, now commit destination to socket.  */
 	sk->sk_gso_type = SKB_GSO_TCPV4;
 	sk_setup_caps(sk, &rt->dst);
@@ -1341,7 +1347,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 		    tcp_death_row.sysctl_tw_recycle &&
 		    (dst = inet_csk_route_req(sk, req)) != NULL &&
 		    (peer = rt_get_peer((struct rtable *)dst)) != NULL &&
-		    peer->daddr.a4 == saddr) {
+		    peer->daddr.addr.a4 == saddr) {
 			inet_peer_refcheck(peer);
 			if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL &&
 			    (s32)(peer->tcp_ts - req->ts_recent) >
@@ -1556,12 +1562,10 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
 
 	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
 		sock_rps_save_rxhash(sk, skb->rxhash);
-		TCP_CHECK_TIMER(sk);
 		if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
 			rsk = sk;
 			goto reset;
 		}
-		TCP_CHECK_TIMER(sk);
 		return 0;
 	}
 
@@ -1583,13 +1587,10 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
 	} else
 		sock_rps_save_rxhash(sk, skb->rxhash);
 
-
-	TCP_CHECK_TIMER(sk);
 	if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
 		rsk = sk;
 		goto reset;
 	}
-	TCP_CHECK_TIMER(sk);
 	return 0;
 
 reset:
diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c
index de87037..656d431 100644
--- a/net/ipv4/tcp_lp.c
+++ b/net/ipv4/tcp_lp.c
@@ -313,7 +313,7 @@ static void tcp_lp_pkts_acked(struct sock *sk, u32 num_acked, s32 rtt_us)
 	lp->last_drop = tcp_time_stamp;
 }
 
-static struct tcp_congestion_ops tcp_lp = {
+static struct tcp_congestion_ops tcp_lp __read_mostly = {
 	.flags = TCP_CONG_RTT_STAMP,
 	.init = tcp_lp_init,
 	.ssthresh = tcp_reno_ssthresh,
diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c
index a765137..8ce55b8 100644
--- a/net/ipv4/tcp_scalable.c
+++ b/net/ipv4/tcp_scalable.c
@@ -35,7 +35,7 @@ static u32 tcp_scalable_ssthresh(struct sock *sk)
 }
 
 
-static struct tcp_congestion_ops tcp_scalable = {
+static struct tcp_congestion_ops tcp_scalable __read_mostly = {
 	.ssthresh	= tcp_scalable_ssthresh,
 	.cong_avoid	= tcp_scalable_cong_avoid,
 	.min_cwnd	= tcp_reno_min_cwnd,
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 74a6aa0..ecd44b0 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -259,7 +259,6 @@ static void tcp_delack_timer(unsigned long data)
 		tcp_send_ack(sk);
 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKS);
 	}
-	TCP_CHECK_TIMER(sk);
 
 out:
 	if (tcp_memory_pressure)
@@ -481,7 +480,6 @@ static void tcp_write_timer(unsigned long data)
 		tcp_probe_timer(sk);
 		break;
 	}
-	TCP_CHECK_TIMER(sk);
 
 out:
 	sk_mem_reclaim(sk);
@@ -589,7 +587,6 @@ static void tcp_keepalive_timer (unsigned long data)
 		elapsed = keepalive_time_when(tp) - elapsed;
 	}
 
-	TCP_CHECK_TIMER(sk);
 	sk_mem_reclaim(sk);
 
 resched:
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index c6743ee..80fa2bf 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -304,7 +304,7 @@ void tcp_vegas_get_info(struct sock *sk, u32 ext, struct sk_buff *skb)
 }
 EXPORT_SYMBOL_GPL(tcp_vegas_get_info);
 
-static struct tcp_congestion_ops tcp_vegas = {
+static struct tcp_congestion_ops tcp_vegas __read_mostly = {
 	.flags		= TCP_CONG_RTT_STAMP,
 	.init		= tcp_vegas_init,
 	.ssthresh	= tcp_reno_ssthresh,
diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c
index 38bc0b5..ac43cd7 100644
--- a/net/ipv4/tcp_veno.c
+++ b/net/ipv4/tcp_veno.c
@@ -201,7 +201,7 @@ static u32 tcp_veno_ssthresh(struct sock *sk)
 		return max(tp->snd_cwnd >> 1U, 2U);
 }
 
-static struct tcp_congestion_ops tcp_veno = {
+static struct tcp_congestion_ops tcp_veno __read_mostly = {
 	.flags		= TCP_CONG_RTT_STAMP,
 	.init		= tcp_veno_init,
 	.ssthresh	= tcp_veno_ssthresh,
diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c
index a534dda..1b91bf4 100644
--- a/net/ipv4/tcp_westwood.c
+++ b/net/ipv4/tcp_westwood.c
@@ -272,7 +272,7 @@ static void tcp_westwood_info(struct sock *sk, u32 ext,
 }
 
 
-static struct tcp_congestion_ops tcp_westwood = {
+static struct tcp_congestion_ops tcp_westwood __read_mostly = {
 	.init		= tcp_westwood_init,
 	.ssthresh	= tcp_reno_ssthresh,
 	.cong_avoid	= tcp_reno_cong_avoid,
diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c
index a0f2403..dc7f431 100644
--- a/net/ipv4/tcp_yeah.c
+++ b/net/ipv4/tcp_yeah.c
@@ -225,7 +225,7 @@ static u32 tcp_yeah_ssthresh(struct sock *sk) {
 	return tp->snd_cwnd - reduction;
 }
 
-static struct tcp_congestion_ops tcp_yeah = {
+static struct tcp_congestion_ops tcp_yeah __read_mostly = {
 	.flags		= TCP_CONG_RTT_STAMP,
 	.init		= tcp_yeah_init,
 	.ssthresh	= tcp_yeah_ssthresh,
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 8157b17..c9a73e5 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -663,75 +663,72 @@ void udp_flush_pending_frames(struct sock *sk)
 EXPORT_SYMBOL(udp_flush_pending_frames);
 
 /**
- * 	udp4_hwcsum_outgoing  -  handle outgoing HW checksumming
- * 	@sk: 	socket we are sending on
+ * 	udp4_hwcsum  -  handle outgoing HW checksumming
  * 	@skb: 	sk_buff containing the filled-in UDP header
  * 	        (checksum field must be zeroed out)
+ *	@src:	source IP address
+ *	@dst:	destination IP address
  */
-static void udp4_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb,
-				 __be32 src, __be32 dst, int len)
+static void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst)
 {
-	unsigned int offset;
 	struct udphdr *uh = udp_hdr(skb);
+	struct sk_buff *frags = skb_shinfo(skb)->frag_list;
+	int offset = skb_transport_offset(skb);
+	int len = skb->len - offset;
+	int hlen = len;
 	__wsum csum = 0;
 
-	if (skb_queue_len(&sk->sk_write_queue) == 1) {
+	if (!frags) {
 		/*
 		 * Only one fragment on the socket.
 		 */
 		skb->csum_start = skb_transport_header(skb) - skb->head;
 		skb->csum_offset = offsetof(struct udphdr, check);
-		uh->check = ~csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, 0);
+		uh->check = ~csum_tcpudp_magic(src, dst, len,
+					       IPPROTO_UDP, 0);
 	} else {
 		/*
 		 * HW-checksum won't work as there are two or more
 		 * fragments on the socket so that all csums of sk_buffs
 		 * should be together
 		 */
-		offset = skb_transport_offset(skb);
-		skb->csum = skb_checksum(skb, offset, skb->len - offset, 0);
+		do {
+			csum = csum_add(csum, frags->csum);
+			hlen -= frags->len;
+		} while ((frags = frags->next));
 
+		csum = skb_checksum(skb, offset, hlen, csum);
 		skb->ip_summed = CHECKSUM_NONE;
 
-		skb_queue_walk(&sk->sk_write_queue, skb) {
-			csum = csum_add(csum, skb->csum);
-		}
-
 		uh->check = csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, csum);
 		if (uh->check == 0)
 			uh->check = CSUM_MANGLED_0;
 	}
 }
 
-/*
- * Push out all pending data as one UDP datagram. Socket is locked.
- */
-static int udp_push_pending_frames(struct sock *sk)
+static int udp_send_skb(struct sk_buff *skb, __be32 daddr, __be32 dport)
 {
-	struct udp_sock  *up = udp_sk(sk);
+	struct sock *sk = skb->sk;
 	struct inet_sock *inet = inet_sk(sk);
-	struct flowi *fl = &inet->cork.fl;
-	struct sk_buff *skb;
 	struct udphdr *uh;
+	struct rtable *rt = (struct rtable *)skb_dst(skb);
 	int err = 0;
 	int is_udplite = IS_UDPLITE(sk);
+	int offset = skb_transport_offset(skb);
+	int len = skb->len - offset;
 	__wsum csum = 0;
 
-	/* Grab the skbuff where UDP header space exists. */
-	if ((skb = skb_peek(&sk->sk_write_queue)) == NULL)
-		goto out;
-
 	/*
 	 * Create a UDP header
 	 */
 	uh = udp_hdr(skb);
-	uh->source = fl->fl_ip_sport;
-	uh->dest = fl->fl_ip_dport;
-	uh->len = htons(up->len);
+	uh->source = inet->inet_sport;
+	uh->dest = dport;
+	uh->len = htons(len);
 	uh->check = 0;
 
 	if (is_udplite)  				 /*     UDP-Lite      */
-		csum  = udplite_csum_outgoing(sk, skb);
+		csum = udplite_csum(skb);
 
 	else if (sk->sk_no_check == UDP_CSUM_NOXMIT) {   /* UDP csum disabled */
 
@@ -740,20 +737,20 @@ static int udp_push_pending_frames(struct sock *sk)
 
 	} else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */
 
-		udp4_hwcsum_outgoing(sk, skb, fl->fl4_src, fl->fl4_dst, up->len);
+		udp4_hwcsum(skb, rt->rt_src, daddr);
 		goto send;
 
-	} else						 /*   `normal' UDP    */
-		csum = udp_csum_outgoing(sk, skb);
+	} else
+		csum = udp_csum(skb);
 
 	/* add protocol-dependent pseudo-header */
-	uh->check = csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst, up->len,
+	uh->check = csum_tcpudp_magic(rt->rt_src, daddr, len,
 				      sk->sk_protocol, csum);
 	if (uh->check == 0)
 		uh->check = CSUM_MANGLED_0;
 
 send:
-	err = ip_push_pending_frames(sk);
+	err = ip_send_skb(skb);
 	if (err) {
 		if (err == -ENOBUFS && !inet->recverr) {
 			UDP_INC_STATS_USER(sock_net(sk),
@@ -763,6 +760,26 @@ send:
 	} else
 		UDP_INC_STATS_USER(sock_net(sk),
 				   UDP_MIB_OUTDATAGRAMS, is_udplite);
+	return err;
+}
+
+/*
+ * Push out all pending data as one UDP datagram. Socket is locked.
+ */
+static int udp_push_pending_frames(struct sock *sk)
+{
+	struct udp_sock  *up = udp_sk(sk);
+	struct inet_sock *inet = inet_sk(sk);
+	struct flowi *fl = &inet->cork.fl;
+	struct sk_buff *skb;
+	int err = 0;
+
+	skb = ip_finish_skb(sk);
+	if (!skb)
+		goto out;
+
+	err = udp_send_skb(skb, fl->fl4_dst, fl->fl_ip_dport);
+
 out:
 	up->len = 0;
 	up->pending = 0;
@@ -785,6 +802,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 	int err, is_udplite = IS_UDPLITE(sk);
 	int corkreq = up->corkflag || msg->msg_flags&MSG_MORE;
 	int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
+	struct sk_buff *skb;
 
 	if (len > 0xFFFF)
 		return -EMSGSIZE;
@@ -799,6 +817,8 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 	ipc.opt = NULL;
 	ipc.tx_flags = 0;
 
+	getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag;
+
 	if (up->pending) {
 		/*
 		 * There are pending frames.
@@ -894,14 +914,18 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 				    .fl4_src = saddr,
 				    .fl4_tos = tos,
 				    .proto = sk->sk_protocol,
-				    .flags = inet_sk_flowi_flags(sk),
+				    .flags = (inet_sk_flowi_flags(sk) |
+					      FLOWI_FLAG_CAN_SLEEP),
 				    .fl_ip_sport = inet->inet_sport,
-				    .fl_ip_dport = dport };
+				    .fl_ip_dport = dport
+		};
 		struct net *net = sock_net(sk);
 
 		security_sk_classify_flow(sk, &fl);
-		err = ip_route_output_flow(net, &rt, &fl, sk, 1);
-		if (err) {
+		rt = ip_route_output_flow(net, &fl, sk);
+		if (IS_ERR(rt)) {
+			err = PTR_ERR(rt);
+			rt = NULL;
 			if (err == -ENETUNREACH)
 				IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES);
 			goto out;
@@ -923,6 +947,17 @@ back_from_confirm:
 	if (!ipc.addr)
 		daddr = ipc.addr = rt->rt_dst;
 
+	/* Lockless fast path for the non-corking case. */
+	if (!corkreq) {
+		skb = ip_make_skb(sk, getfrag, msg->msg_iov, ulen,
+				  sizeof(struct udphdr), &ipc, &rt,
+				  msg->msg_flags);
+		err = PTR_ERR(skb);
+		if (skb && !IS_ERR(skb))
+			err = udp_send_skb(skb, daddr, dport);
+		goto out;
+	}
+
 	lock_sock(sk);
 	if (unlikely(up->pending)) {
 		/* The socket is already corked while preparing it. */
@@ -944,7 +979,6 @@ back_from_confirm:
 
 do_append_data:
 	up->len += ulen;
-	getfrag  =  is_udplite ?  udplite_getfrag : ip_generic_getfrag;
 	err = ip_append_data(sk, getfrag, msg->msg_iov, ulen,
 			sizeof(struct udphdr), &ipc, &rt,
 			corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags);
@@ -2199,7 +2233,7 @@ int udp4_ufo_send_check(struct sk_buff *skb)
 	return 0;
 }
 
-struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, int features)
+struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, u32 features)
 {
 	struct sk_buff *segs = ERR_PTR(-EINVAL);
 	unsigned int mss;
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index b057d40..c70c42e 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -19,25 +19,23 @@
 static struct xfrm_policy_afinfo xfrm4_policy_afinfo;
 
 static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos,
-					  xfrm_address_t *saddr,
-					  xfrm_address_t *daddr)
+					  const xfrm_address_t *saddr,
+					  const xfrm_address_t *daddr)
 {
 	struct flowi fl = {
 		.fl4_dst = daddr->a4,
 		.fl4_tos = tos,
 	};
-	struct dst_entry *dst;
 	struct rtable *rt;
-	int err;
 
 	if (saddr)
 		fl.fl4_src = saddr->a4;
 
-	err = __ip_route_output_key(net, &rt, &fl);
-	dst = &rt->dst;
-	if (err)
-		dst = ERR_PTR(err);
-	return dst;
+	rt = __ip_route_output_key(net, &fl);
+	if (!IS_ERR(rt))
+		return &rt->dst;
+
+	return ERR_CAST(rt);
 }
 
 static int xfrm4_get_saddr(struct net *net,
@@ -56,7 +54,7 @@ static int xfrm4_get_saddr(struct net *net,
 	return 0;
 }
 
-static int xfrm4_get_tos(struct flowi *fl)
+static int xfrm4_get_tos(const struct flowi *fl)
 {
 	return IPTOS_RT_MASK & fl->fl4_tos; /* Strip ECN bits */
 }
@@ -68,11 +66,16 @@ static int xfrm4_init_path(struct xfrm_dst *path, struct dst_entry *dst,
 }
 
 static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
-			  struct flowi *fl)
+			  const struct flowi *fl)
 {
 	struct rtable *rt = (struct rtable *)xdst->route;
 
-	xdst->u.rt.fl = *fl;
+	rt->rt_key_dst = fl->fl4_dst;
+	rt->rt_key_src = fl->fl4_src;
+	rt->rt_tos = fl->fl4_tos;
+	rt->rt_iif = fl->iif;
+	rt->rt_oif = fl->oif;
+	rt->rt_mark = fl->mark;
 
 	xdst->u.dst.dev = dev;
 	dev_hold(dev);
@@ -196,8 +199,11 @@ static void xfrm4_dst_destroy(struct dst_entry *dst)
 {
 	struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
 
+	dst_destroy_metrics_generic(dst);
+
 	if (likely(xdst->u.rt.peer))
 		inet_putpeer(xdst->u.rt.peer);
+
 	xfrm_dst_destroy(xdst);
 }
 
@@ -215,6 +221,7 @@ static struct dst_ops xfrm4_dst_ops = {
 	.protocol =		cpu_to_be16(ETH_P_IP),
 	.gc =			xfrm4_garbage_collect,
 	.update_pmtu =		xfrm4_update_pmtu,
+	.cow_metrics =		dst_cow_metrics_generic,
 	.destroy =		xfrm4_dst_destroy,
 	.ifdown =		xfrm4_dst_ifdown,
 	.local_out =		__ip_local_out,
@@ -230,6 +237,7 @@ static struct xfrm_policy_afinfo xfrm4_policy_afinfo = {
 	.get_tos =		xfrm4_get_tos,
 	.init_path =		xfrm4_init_path,
 	.fill_dst =		xfrm4_fill_dst,
+	.blackhole_route =	ipv4_blackhole_route,
 };
 
 #ifdef CONFIG_SYSCTL
diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c
index 4794762..983eff2 100644
--- a/net/ipv4/xfrm4_state.c
+++ b/net/ipv4/xfrm4_state.c
@@ -21,7 +21,7 @@ static int xfrm4_init_flags(struct xfrm_state *x)
 }
 
 static void
-__xfrm4_init_tempsel(struct xfrm_selector *sel, struct flowi *fl)
+__xfrm4_init_tempsel(struct xfrm_selector *sel, const struct flowi *fl)
 {
 	sel->daddr.a4 = fl->fl4_dst;
 	sel->saddr.a4 = fl->fl4_src;
@@ -37,8 +37,8 @@ __xfrm4_init_tempsel(struct xfrm_selector *sel, struct flowi *fl)
 }
 
 static void
-xfrm4_init_temprop(struct xfrm_state *x, struct xfrm_tmpl *tmpl,
-		   xfrm_address_t *daddr, xfrm_address_t *saddr)
+xfrm4_init_temprop(struct xfrm_state *x, const struct xfrm_tmpl *tmpl,
+		   const xfrm_address_t *daddr, const xfrm_address_t *saddr)
 {
 	x->id = tmpl->id;
 	if (x->id.daddr.a4 == 0)
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index fd6782e..3daaf3c 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -718,12 +718,9 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
 	struct inet6_ifaddr *ifa, *ifn;
 	struct inet6_dev *idev = ifp->idev;
 	int state;
-	int hash;
 	int deleted = 0, onlink = 0;
 	unsigned long expires = jiffies;
 
-	hash = ipv6_addr_hash(&ifp->addr);
-
 	spin_lock_bh(&ifp->state_lock);
 	state = ifp->state;
 	ifp->state = INET6_IFADDR_STATE_DEAD;
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 978e80e..a88b2e9 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -644,9 +644,8 @@ EXPORT_SYMBOL(inet6_unregister_protosw);
 
 int inet6_sk_rebuild_header(struct sock *sk)
 {
-	int err;
-	struct dst_entry *dst;
 	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct dst_entry *dst;
 
 	dst = __sk_dst_check(sk, np->dst_cookie);
 
@@ -668,17 +667,11 @@ int inet6_sk_rebuild_header(struct sock *sk)
 
 		final_p = fl6_update_dst(&fl, np->opt, &final);
 
-		err = ip6_dst_lookup(sk, &dst, &fl);
-		if (err) {
+		dst = ip6_dst_lookup_flow(sk, &fl, final_p, false);
+		if (IS_ERR(dst)) {
 			sk->sk_route_caps = 0;
-			return err;
-		}
-		if (final_p)
-			ipv6_addr_copy(&fl.fl6_dst, final_p);
-
-		if ((err = xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0) {
-			sk->sk_err_soft = -err;
-			return err;
+			sk->sk_err_soft = -PTR_ERR(dst);
+			return PTR_ERR(dst);
 		}
 
 		__ip6_dst_store(sk, dst, NULL, NULL);
@@ -772,7 +765,7 @@ out:
 	return err;
 }
 
-static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, int features)
+static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, u32 features)
 {
 	struct sk_buff *segs = ERR_PTR(-EINVAL);
 	struct ipv6hdr *ipv6h;
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 320bdb8..be3a781 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -162,18 +162,11 @@ ipv4_connected:
 	opt = flowlabel ? flowlabel->opt : np->opt;
 	final_p = fl6_update_dst(&fl, opt, &final);
 
-	err = ip6_dst_lookup(sk, &dst, &fl);
-	if (err)
+	dst = ip6_dst_lookup_flow(sk, &fl, final_p, true);
+	err = 0;
+	if (IS_ERR(dst)) {
+		err = PTR_ERR(dst);
 		goto out;
-	if (final_p)
-		ipv6_addr_copy(&fl.fl6_dst, final_p);
-
-	err = __xfrm_lookup(sock_net(sk), &dst, &fl, sk, XFRM_LOOKUP_WAIT);
-	if (err < 0) {
-		if (err == -EREMOTE)
-			err = ip6_dst_blackhole(sk, &dst, &fl);
-		if (err < 0)
-			goto out;
 	}
 
 	/* source address lookup done in ip6_dst_lookup */
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 03e62f9..5566595 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -157,20 +157,20 @@ static int is_ineligible(struct sk_buff *skb)
 /*
  * Check the ICMP output rate limit
  */
-static inline int icmpv6_xrlim_allow(struct sock *sk, u8 type,
-				     struct flowi *fl)
+static inline bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
+				      struct flowi *fl)
 {
 	struct dst_entry *dst;
 	struct net *net = sock_net(sk);
-	int res = 0;
+	bool res = false;
 
 	/* Informational messages are not limited. */
 	if (type & ICMPV6_INFOMSG_MASK)
-		return 1;
+		return true;
 
 	/* Do not limit pmtu discovery, it would break it. */
 	if (type == ICMPV6_PKT_TOOBIG)
-		return 1;
+		return true;
 
 	/*
 	 * Look up the output route.
@@ -182,7 +182,7 @@ static inline int icmpv6_xrlim_allow(struct sock *sk, u8 type,
 		IP6_INC_STATS(net, ip6_dst_idev(dst),
 			      IPSTATS_MIB_OUTNOROUTES);
 	} else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
-		res = 1;
+		res = true;
 	} else {
 		struct rt6_info *rt = (struct rt6_info *)dst;
 		int tmo = net->ipv6.sysctl.icmpv6_time;
@@ -191,7 +191,9 @@ static inline int icmpv6_xrlim_allow(struct sock *sk, u8 type,
 		if (rt->rt6i_dst.plen < 128)
 			tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
 
-		res = xrlim_allow(dst, tmo);
+		if (!rt->rt6i_peer)
+			rt6_bind_peer(rt, 1);
+		res = inet_peer_xrlim_allow(rt->rt6i_peer, tmo);
 	}
 	dst_release(dst);
 	return res;
@@ -298,6 +300,68 @@ static void mip6_addr_swap(struct sk_buff *skb)
 static inline void mip6_addr_swap(struct sk_buff *skb) {}
 #endif
 
+static struct dst_entry *icmpv6_route_lookup(struct net *net, struct sk_buff *skb,
+					     struct sock *sk, struct flowi *fl)
+{
+	struct dst_entry *dst, *dst2;
+	struct flowi fl2;
+	int err;
+
+	err = ip6_dst_lookup(sk, &dst, fl);
+	if (err)
+		return ERR_PTR(err);
+
+	/*
+	 * We won't send icmp if the destination is known
+	 * anycast.
+	 */
+	if (((struct rt6_info *)dst)->rt6i_flags & RTF_ANYCAST) {
+		LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: acast source\n");
+		dst_release(dst);
+		return ERR_PTR(-EINVAL);
+	}
+
+	/* No need to clone since we're just using its address. */
+	dst2 = dst;
+
+	dst = xfrm_lookup(net, dst, fl, sk, 0);
+	if (!IS_ERR(dst)) {
+		if (dst != dst2)
+			return dst;
+	} else {
+		if (PTR_ERR(dst) == -EPERM)
+			dst = NULL;
+		else
+			return dst;
+	}
+
+	err = xfrm_decode_session_reverse(skb, &fl2, AF_INET6);
+	if (err)
+		goto relookup_failed;
+
+	err = ip6_dst_lookup(sk, &dst2, &fl2);
+	if (err)
+		goto relookup_failed;
+
+	dst2 = xfrm_lookup(net, dst2, &fl2, sk, XFRM_LOOKUP_ICMP);
+	if (!IS_ERR(dst2)) {
+		dst_release(dst);
+		dst = dst2;
+	} else {
+		err = PTR_ERR(dst2);
+		if (err == -EPERM) {
+			dst_release(dst);
+			return dst2;
+		} else
+			goto relookup_failed;
+	}
+
+relookup_failed:
+	if (dst)
+		return dst;
+	return ERR_PTR(err);
+}
+
 /*
  *	Send an ICMP message in response to a packet in error
  */
@@ -310,10 +374,8 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
 	struct ipv6_pinfo *np;
 	struct in6_addr *saddr = NULL;
 	struct dst_entry *dst;
-	struct dst_entry *dst2;
 	struct icmp6hdr tmp_hdr;
 	struct flowi fl;
-	struct flowi fl2;
 	struct icmpv6_msg msg;
 	int iif = 0;
 	int addr_type = 0;
@@ -406,57 +468,10 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
 	if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst))
 		fl.oif = np->mcast_oif;
 
-	err = ip6_dst_lookup(sk, &dst, &fl);
-	if (err)
-		goto out;
-
-	/*
-	 * We won't send icmp if the destination is known
-	 * anycast.
-	 */
-	if (((struct rt6_info *)dst)->rt6i_flags & RTF_ANYCAST) {
-		LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: acast source\n");
-		goto out_dst_release;
-	}
-
-	/* No need to clone since we're just using its address. */
-	dst2 = dst;
-
-	err = xfrm_lookup(net, &dst, &fl, sk, 0);
-	switch (err) {
-	case 0:
-		if (dst != dst2)
-			goto route_done;
-		break;
-	case -EPERM:
-		dst = NULL;
-		break;
-	default:
+	dst = icmpv6_route_lookup(net, skb, sk, &fl);
+	if (IS_ERR(dst))
 		goto out;
-	}
-
-	if (xfrm_decode_session_reverse(skb, &fl2, AF_INET6))
-		goto relookup_failed;
-
-	if (ip6_dst_lookup(sk, &dst2, &fl2))
-		goto relookup_failed;
-
-	err = xfrm_lookup(net, &dst2, &fl2, sk, XFRM_LOOKUP_ICMP);
-	switch (err) {
-	case 0:
-		dst_release(dst);
-		dst = dst2;
-		break;
-	case -EPERM:
-		goto out_dst_release;
-	default:
-relookup_failed:
-		if (!dst)
-			goto out;
-		break;
-	}
 
-route_done:
 	if (ipv6_addr_is_multicast(&fl.fl6_dst))
 		hlimit = np->mcast_hops;
 	else
@@ -543,7 +558,8 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
 	err = ip6_dst_lookup(sk, &dst, &fl);
 	if (err)
 		goto out;
-	if ((err = xfrm_lookup(net, &dst, &fl, sk, 0)) < 0)
+	dst = xfrm_lookup(net, dst, &fl, sk, 0);
+	if (IS_ERR(dst))
 		goto out;
 
 	if (ipv6_addr_is_multicast(&fl.fl6_dst))
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index d144e62..d687e13 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -74,13 +74,8 @@ struct dst_entry *inet6_csk_route_req(struct sock *sk,
 	fl.fl_ip_sport = inet_rsk(req)->loc_port;
 	security_req_classify_flow(req, &fl);
 
-	if (ip6_dst_lookup(sk, &dst, &fl))
-		return NULL;
-
-	if (final_p)
-		ipv6_addr_copy(&fl.fl6_dst, final_p);
-
-	if ((xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0)
+	dst = ip6_dst_lookup_flow(sk, &fl, final_p, false);
+	if (IS_ERR(dst))
 		return NULL;
 
 	return dst;
@@ -234,21 +229,13 @@ int inet6_csk_xmit(struct sk_buff *skb)
 	dst = __inet6_csk_dst_check(sk, np->dst_cookie);
 
 	if (dst == NULL) {
-		int err = ip6_dst_lookup(sk, &dst, &fl);
-
-		if (err) {
-			sk->sk_err_soft = -err;
-			kfree_skb(skb);
-			return err;
-		}
-
-		if (final_p)
-			ipv6_addr_copy(&fl.fl6_dst, final_p);
+		dst = ip6_dst_lookup_flow(sk, &fl, final_p, false);
 
-		if ((err = xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0) {
+		if (IS_ERR(dst)) {
+			sk->sk_err_soft = -PTR_ERR(dst);
 			sk->sk_route_caps = 0;
 			kfree_skb(skb);
-			return err;
+			return PTR_ERR(dst);
 		}
 
 		__inet6_csk_dst_store(sk, dst, NULL, NULL);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 5f8d242..adaffaf 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -274,13 +274,10 @@ int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
 {
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct ipv6hdr *hdr;
-	int totlen;
 
 	skb->protocol = htons(ETH_P_IPV6);
 	skb->dev = dev;
 
-	totlen = len + sizeof(struct ipv6hdr);
-
 	skb_reset_network_header(skb);
 	skb_put(skb, sizeof(struct ipv6hdr));
 	hdr = ipv6_hdr(skb);
@@ -479,10 +476,13 @@ int ip6_forward(struct sk_buff *skb)
 		else
 			target = &hdr->daddr;
 
+		if (!rt->rt6i_peer)
+			rt6_bind_peer(rt, 1);
+
 		/* Limit redirects both by destination (here)
 		   and by source (inside ndisc_send_redirect)
 		 */
-		if (xrlim_allow(dst, 1*HZ))
+		if (inet_peer_xrlim_allow(rt->rt6i_peer, 1*HZ))
 			ndisc_send_redirect(skb, n, target);
 	} else {
 		int addrtype = ipv6_addr_type(&hdr->saddr);
@@ -1002,29 +1002,71 @@ int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
 
 /**
- *	ip6_sk_dst_lookup - perform socket cached route lookup on flow
+ *	ip6_dst_lookup_flow - perform route lookup on flow with ipsec
+ *	@sk: socket which provides route info
+ *	@fl: flow to lookup
+ *	@final_dst: final destination address for ipsec lookup
+ *	@can_sleep: we are in a sleepable context
+ *
+ *	This function performs a route lookup on the given flow.
+ *
+ *	It returns a valid dst pointer on success, or a pointer encoded
+ *	error code.
+ */
+struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi *fl,
+				      const struct in6_addr *final_dst,
+				      bool can_sleep)
+{
+	struct dst_entry *dst = NULL;
+	int err;
+
+	err = ip6_dst_lookup_tail(sk, &dst, fl);
+	if (err)
+		return ERR_PTR(err);
+	if (final_dst)
+		ipv6_addr_copy(&fl->fl6_dst, final_dst);
+	if (can_sleep)
+		fl->flags |= FLOWI_FLAG_CAN_SLEEP;
+
+	return xfrm_lookup(sock_net(sk), dst, fl, sk, 0);
+}
+EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
+
+/**
+ *	ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
  *	@sk: socket which provides the dst cache and route info
- *	@dst: pointer to dst_entry * for result
  *	@fl: flow to lookup
+ *	@final_dst: final destination address for ipsec lookup
+ *	@can_sleep: we are in a sleepable context
  *
  *	This function performs a route lookup on the given flow with the
  *	possibility of using the cached route in the socket if it is valid.
  *	It will take the socket dst lock when operating on the dst cache.
  *	As a result, this function can only be used in process context.
  *
- *	It returns zero on success, or a standard errno code on error.
+ *	It returns a valid dst pointer on success, or a pointer encoded
+ *	error code.
  */
-int ip6_sk_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
+struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi *fl,
+					 const struct in6_addr *final_dst,
+					 bool can_sleep)
 {
-	*dst = NULL;
-	if (sk) {
-		*dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
-		*dst = ip6_sk_dst_check(sk, *dst, fl);
-	}
+	struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
+	int err;
 
-	return ip6_dst_lookup_tail(sk, dst, fl);
+	dst = ip6_sk_dst_check(sk, dst, fl);
+
+	err = ip6_dst_lookup_tail(sk, &dst, fl);
+	if (err)
+		return ERR_PTR(err);
+	if (final_dst)
+		ipv6_addr_copy(&fl->fl6_dst, final_dst);
+	if (can_sleep)
+		fl->flags |= FLOWI_FLAG_CAN_SLEEP;
+
+	return xfrm_lookup(sock_net(sk), dst, fl, sk, 0);
 }
-EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup);
+EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
 
 static inline int ip6_ufo_append_data(struct sock *sk,
 			int getfrag(void *from, char *to, int offset, int len,
@@ -1061,7 +1103,6 @@ static inline int ip6_ufo_append_data(struct sock *sk,
 
 		skb->ip_summed = CHECKSUM_PARTIAL;
 		skb->csum = 0;
-		sk->sk_sndmsg_off = 0;
 	}
 
 	err = skb_append_datato_frags(sk,skb, getfrag, from,
@@ -1118,6 +1159,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
 	int err;
 	int offset = 0;
 	int csummode = CHECKSUM_NONE;
+	__u8 tx_flags = 0;
 
 	if (flags&MSG_PROBE)
 		return 0;
@@ -1202,6 +1244,13 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
 		}
 	}
 
+	/* For UDP, check if TX timestamp is enabled */
+	if (sk->sk_type == SOCK_DGRAM) {
+		err = sock_tx_timestamp(sk, &tx_flags);
+		if (err)
+			goto error;
+	}
+
 	/*
 	 * Let's try using as much space as possible.
 	 * Use MTU if total length of the message fits into the MTU.
@@ -1306,6 +1355,12 @@ alloc_new_skb:
 							   sk->sk_allocation);
 				if (unlikely(skb == NULL))
 					err = -ENOBUFS;
+				else {
+					/* Only the initial fragment
+					 * is time stamped.
+					 */
+					tx_flags = 0;
+				}
 			}
 			if (skb == NULL)
 				goto error;
@@ -1317,6 +1372,9 @@ alloc_new_skb:
 			/* reserve for fragmentation */
 			skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
 
+			if (sk->sk_type == SOCK_DGRAM)
+				skb_shinfo(skb)->tx_flags = tx_flags;
+
 			/*
 			 *	Find where to start putting bytes
 			 */
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index e528a42..ea8d5e8 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -582,7 +582,8 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	fl.fl4_dst = eiph->saddr;
 	fl.fl4_tos = RT_TOS(eiph->tos);
 	fl.proto = IPPROTO_IPIP;
-	if (ip_route_output_key(dev_net(skb->dev), &rt, &fl))
+	rt = ip_route_output_key(dev_net(skb->dev), &fl);
+	if (IS_ERR(rt))
 		goto out;
 
 	skb2->dev = rt->dst.dev;
@@ -594,12 +595,14 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 		fl.fl4_dst = eiph->daddr;
 		fl.fl4_src = eiph->saddr;
 		fl.fl4_tos = eiph->tos;
-		if (ip_route_output_key(dev_net(skb->dev), &rt, &fl) ||
+		rt = ip_route_output_key(dev_net(skb->dev), &fl);
+		if (IS_ERR(rt) ||
 		    rt->dst.dev->type != ARPHRD_TUNNEL) {
-			ip_rt_put(rt);
+			if (!IS_ERR(rt))
+				ip_rt_put(rt);
 			goto out;
 		}
-		skb_dst_set(skb2, (struct dst_entry *)rt);
+		skb_dst_set(skb2, &rt->dst);
 	} else {
 		ip_rt_put(rt);
 		if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos,
@@ -904,8 +907,14 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
 	else {
 		dst = ip6_route_output(net, NULL, fl);
 
-		if (dst->error || xfrm_lookup(net, &dst, fl, NULL, 0) < 0)
+		if (dst->error)
 			goto tx_err_link_failure;
+		dst = xfrm_lookup(net, dst, fl, NULL, 0);
+		if (IS_ERR(dst)) {
+			err = PTR_ERR(dst);
+			dst = NULL;
+			goto tx_err_link_failure;
+		}
 	}
 
 	tdev = dst->dev;
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 0e1d53b..618f67cc 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -1039,7 +1039,6 @@ static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
 
 	while((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
 		if (ipv6_hdr(skb)->version == 0) {
-			int err;
 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
 
 			if (__ip6mr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) {
@@ -1050,7 +1049,7 @@ static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
 				skb_trim(skb, nlh->nlmsg_len);
 				((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE;
 			}
-			err = rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
+			rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
 		} else
 			ip6_mr_forward(net, mrt, skb, c);
 	}
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 49f986d..f2c9b69 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -319,7 +319,6 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
 {
 	struct in6_addr *source, *group;
 	struct ipv6_mc_socklist *pmc;
-	struct net_device *dev;
 	struct inet6_dev *idev;
 	struct ipv6_pinfo *inet6 = inet6_sk(sk);
 	struct ip6_sf_socklist *psl;
@@ -341,7 +340,6 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
 		rcu_read_unlock();
 		return -ENODEV;
 	}
-	dev = idev->dev;
 
 	err = -EADDRNOTAVAIL;
 
@@ -455,7 +453,6 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf)
 {
 	struct in6_addr *group;
 	struct ipv6_mc_socklist *pmc;
-	struct net_device *dev;
 	struct inet6_dev *idev;
 	struct ipv6_pinfo *inet6 = inet6_sk(sk);
 	struct ip6_sf_socklist *newpsl, *psl;
@@ -478,7 +475,6 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf)
 		rcu_read_unlock();
 		return -ENODEV;
 	}
-	dev = idev->dev;
 
 	err = 0;
 
@@ -549,7 +545,6 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
 	struct in6_addr *group;
 	struct ipv6_mc_socklist *pmc;
 	struct inet6_dev *idev;
-	struct net_device *dev;
 	struct ipv6_pinfo *inet6 = inet6_sk(sk);
 	struct ip6_sf_socklist *psl;
 	struct net *net = sock_net(sk);
@@ -566,7 +561,6 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
 		rcu_read_unlock();
 		return -ENODEV;
 	}
-	dev = idev->dev;
 
 	err = -EADDRNOTAVAIL;
 	/*
@@ -1429,7 +1423,12 @@ static void mld_sendpack(struct sk_buff *skb)
 			 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
 			 skb->dev->ifindex);
 
-	err = xfrm_lookup(net, &dst, &fl, NULL, 0);
+	dst = xfrm_lookup(net, dst, &fl, NULL, 0);
+	err = 0;
+	if (IS_ERR(dst)) {
+		err = PTR_ERR(dst);
+		dst = NULL;
+	}
 	skb_dst_set(skb, dst);
 	if (err)
 		goto err_out;
@@ -1796,9 +1795,11 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
 			 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
 			 skb->dev->ifindex);
 
-	err = xfrm_lookup(net, &dst, &fl, NULL, 0);
-	if (err)
+	dst = xfrm_lookup(net, dst, &fl, NULL, 0);
+	if (IS_ERR(dst)) {
+		err = PTR_ERR(dst);
 		goto err_out;
+	}
 
 	skb_dst_set(skb, dst);
 	err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, skb->dev,
diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c
index d6e9599..f3e3ca9 100644
--- a/net/ipv6/mip6.c
+++ b/net/ipv6/mip6.c
@@ -203,7 +203,8 @@ static inline int mip6_report_rl_allow(struct timeval *stamp,
 	return allow;
 }
 
-static int mip6_destopt_reject(struct xfrm_state *x, struct sk_buff *skb, struct flowi *fl)
+static int mip6_destopt_reject(struct xfrm_state *x, struct sk_buff *skb,
+			       const struct flowi *fl)
 {
 	struct net *net = xs_net(x);
 	struct inet6_skb_parm *opt = (struct inet6_skb_parm *)skb->cb;
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 2342545..9360d3b 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -529,8 +529,8 @@ void ndisc_send_skb(struct sk_buff *skb,
 		return;
 	}
 
-	err = xfrm_lookup(net, &dst, &fl, NULL, 0);
-	if (err < 0) {
+	dst = xfrm_lookup(net, dst, &fl, NULL, 0);
+	if (IS_ERR(dst)) {
 		kfree_skb(skb);
 		return;
 	}
@@ -1542,8 +1542,8 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
 	if (dst == NULL)
 		return;
 
-	err = xfrm_lookup(net, &dst, &fl, NULL, 0);
-	if (err)
+	dst = xfrm_lookup(net, dst, &fl, NULL, 0);
+	if (IS_ERR(dst))
 		return;
 
 	rt = (struct rt6_info *) dst;
@@ -1553,7 +1553,9 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
 			   "ICMPv6 Redirect: destination is not a neighbour.\n");
 		goto release;
 	}
-	if (!xrlim_allow(dst, 1*HZ))
+	if (!rt->rt6i_peer)
+		rt6_bind_peer(rt, 1);
+	if (inet_peer_xrlim_allow(rt->rt6i_peer, 1*HZ))
 		goto release;
 
 	if (dev->addr_len) {
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 35915e8..8d74116 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -39,7 +39,8 @@ int ip6_route_me_harder(struct sk_buff *skb)
 	if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
 	    xfrm_decode_session(skb, &fl, AF_INET6) == 0) {
 		skb_dst_set(skb, NULL);
-		if (xfrm_lookup(net, &dst, &fl, skb->sk, 0))
+		dst = xfrm_lookup(net, dst, &fl, skb->sk, 0);
+		if (IS_ERR(dst))
 			return -1;
 		skb_dst_set(skb, dst);
 	}
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 7d227c6..47b7b8d 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -1076,6 +1076,7 @@ static int compat_table_info(const struct xt_table_info *info,
 	memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
 	newinfo->initial_entries = 0;
 	loc_cpu_entry = info->entries[raw_smp_processor_id()];
+	xt_compat_init_offsets(AF_INET6, info->number);
 	xt_entry_foreach(iter, loc_cpu_entry, info->size) {
 		ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
 		if (ret != 0)
@@ -1679,6 +1680,7 @@ translate_compat_table(struct net *net,
 	duprintf("translate_compat_table: size %u\n", info->size);
 	j = 0;
 	xt_compat_lock(AF_INET6);
+	xt_compat_init_offsets(AF_INET6, number);
 	/* Walk through entries, checking offsets. */
 	xt_entry_foreach(iter0, entry0, total_size) {
 		ret = check_compat_entry_size_and_hooks(iter0, info, &size,
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index de33803..e6af8d7 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -452,8 +452,7 @@ ip6t_log_packet(u_int8_t pf,
 	       in ? in->name : "",
 	       out ? out->name : "");
 
-	/* MAC logging for input path only. */
-	if (in && !out)
+	if (in != NULL)
 		dump_mac_header(m, loginfo, skb);
 
 	dump_packet(m, loginfo, skb, skb_network_offset(skb), 1);
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index bf998fe..91f6a61 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -101,7 +101,8 @@ static void send_reset(struct net *net, struct sk_buff *oldskb)
 		dst_release(dst);
 		return;
 	}
-	if (xfrm_lookup(net, &dst, &fl, NULL, 0))
+	dst = xfrm_lookup(net, dst, &fl, NULL, 0);
+	if (IS_ERR(dst))
 		return;
 
 	hh_len = (dst->dev->hard_header_len + 15)&~15;
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 79d43aa..0857272 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -45,6 +45,7 @@
 #include <linux/netfilter_ipv6.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
 
 
 struct nf_ct_frag6_skb_cb
@@ -73,7 +74,7 @@ static struct inet_frags nf_frags;
 static struct netns_frags nf_init_frags;
 
 #ifdef CONFIG_SYSCTL
-struct ctl_table nf_ct_frag6_sysctl_table[] = {
+static struct ctl_table nf_ct_frag6_sysctl_table[] = {
 	{
 		.procname	= "nf_conntrack_frag6_timeout",
 		.data		= &nf_init_frags.timeout,
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index c5b0915..dc29b07 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -124,18 +124,18 @@ static __inline__ int icmpv6_filter(struct sock *sk, struct sk_buff *skb)
 }
 
 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
-static int (*mh_filter)(struct sock *sock, struct sk_buff *skb);
+typedef int mh_filter_t(struct sock *sock, struct sk_buff *skb);
 
-int rawv6_mh_filter_register(int (*filter)(struct sock *sock,
-					   struct sk_buff *skb))
+static mh_filter_t __rcu *mh_filter __read_mostly;
+
+int rawv6_mh_filter_register(mh_filter_t filter)
 {
 	rcu_assign_pointer(mh_filter, filter);
 	return 0;
 }
 EXPORT_SYMBOL(rawv6_mh_filter_register);
 
-int rawv6_mh_filter_unregister(int (*filter)(struct sock *sock,
-					     struct sk_buff *skb))
+int rawv6_mh_filter_unregister(mh_filter_t filter)
 {
 	rcu_assign_pointer(mh_filter, NULL);
 	synchronize_rcu();
@@ -193,10 +193,10 @@ static int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
 			 * policy is placed in rawv6_rcv() because it is
 			 * required for each socket.
 			 */
-			int (*filter)(struct sock *sock, struct sk_buff *skb);
+			mh_filter_t *filter;
 
 			filter = rcu_dereference(mh_filter);
-			filtered = filter ? filter(sk, skb) : 0;
+			filtered = filter ? (*filter)(sk, skb) : 0;
 			break;
 		}
 #endif
@@ -856,20 +856,11 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
 		fl.oif = np->mcast_oif;
 	security_sk_classify_flow(sk, &fl);
 
-	err = ip6_dst_lookup(sk, &dst, &fl);
-	if (err)
+	dst = ip6_dst_lookup_flow(sk, &fl, final_p, true);
+	if (IS_ERR(dst)) {
+		err = PTR_ERR(dst);
 		goto out;
-	if (final_p)
-		ipv6_addr_copy(&fl.fl6_dst, final_p);
-
-	err = __xfrm_lookup(sock_net(sk), &dst, &fl, sk, XFRM_LOOKUP_WAIT);
-	if (err < 0) {
-		if (err == -EREMOTE)
-			err = ip6_dst_blackhole(sk, &dst, &fl);
-		if (err < 0)
-			goto out;
 	}
-
 	if (hlimit < 0) {
 		if (ipv6_addr_is_multicast(&fl.fl6_dst))
 			hlimit = np->mcast_hops;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index e7db701..0012760 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -97,6 +97,36 @@ static struct rt6_info *rt6_get_route_info(struct net *net,
 					   struct in6_addr *gwaddr, int ifindex);
 #endif
 
+static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
+{
+	struct rt6_info *rt = (struct rt6_info *) dst;
+	struct inet_peer *peer;
+	u32 *p = NULL;
+
+	if (!rt->rt6i_peer)
+		rt6_bind_peer(rt, 1);
+
+	peer = rt->rt6i_peer;
+	if (peer) {
+		u32 *old_p = __DST_METRICS_PTR(old);
+		unsigned long prev, new;
+
+		p = peer->metrics;
+		if (inet_metrics_new(peer))
+			memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
+
+		new = (unsigned long) p;
+		prev = cmpxchg(&dst->_metrics, old, new);
+
+		if (prev != old) {
+			p = __DST_METRICS_PTR(prev);
+			if (prev & DST_METRICS_READ_ONLY)
+				p = NULL;
+		}
+	}
+	return p;
+}
+
 static struct dst_ops ip6_dst_ops_template = {
 	.family			=	AF_INET6,
 	.protocol		=	cpu_to_be16(ETH_P_IPV6),
@@ -105,6 +135,7 @@ static struct dst_ops ip6_dst_ops_template = {
 	.check			=	ip6_dst_check,
 	.default_advmss		=	ip6_default_advmss,
 	.default_mtu		=	ip6_default_mtu,
+	.cow_metrics		=	ipv6_cow_metrics,
 	.destroy		=	ip6_dst_destroy,
 	.ifdown			=	ip6_dst_ifdown,
 	.negative_advice	=	ip6_negative_advice,
@@ -132,6 +163,10 @@ static struct dst_ops ip6_dst_blackhole_ops = {
 	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
 };
 
+static const u32 ip6_template_metrics[RTAX_MAX] = {
+	[RTAX_HOPLIMIT - 1] = 255,
+};
+
 static struct rt6_info ip6_null_entry_template = {
 	.dst = {
 		.__refcnt	= ATOMIC_INIT(1),
@@ -187,7 +222,7 @@ static struct rt6_info ip6_blk_hole_entry_template = {
 /* allocate dst with ip6_dst_ops */
 static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops)
 {
-	return (struct rt6_info *)dst_alloc(ops);
+	return (struct rt6_info *)dst_alloc(ops, 0);
 }
 
 static void ip6_dst_destroy(struct dst_entry *dst)
@@ -206,6 +241,13 @@ static void ip6_dst_destroy(struct dst_entry *dst)
 	}
 }
 
+static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
+
+static u32 rt6_peer_genid(void)
+{
+	return atomic_read(&__rt6_peer_genid);
+}
+
 void rt6_bind_peer(struct rt6_info *rt, int create)
 {
 	struct inet_peer *peer;
@@ -213,6 +255,8 @@ void rt6_bind_peer(struct rt6_info *rt, int create)
 	peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
 	if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
 		inet_putpeer(peer);
+	else
+		rt->rt6i_peer_genid = rt6_peer_genid();
 }
 
 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
@@ -828,17 +872,15 @@ struct dst_entry * ip6_route_output(struct net *net, struct sock *sk,
 
 EXPORT_SYMBOL(ip6_route_output);
 
-int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
+struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
 {
-	struct rt6_info *ort = (struct rt6_info *) *dstp;
-	struct rt6_info *rt = (struct rt6_info *)
-		dst_alloc(&ip6_dst_blackhole_ops);
+	struct rt6_info *rt = dst_alloc(&ip6_dst_blackhole_ops, 1);
+	struct rt6_info *ort = (struct rt6_info *) dst_orig;
 	struct dst_entry *new = NULL;
 
 	if (rt) {
 		new = &rt->dst;
 
-		atomic_set(&new->__refcnt, 1);
 		new->__use = 1;
 		new->input = dst_discard;
 		new->output = dst_discard;
@@ -864,11 +906,9 @@ int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl
 		dst_free(new);
 	}
 
-	dst_release(*dstp);
-	*dstp = new;
-	return new ? 0 : -ENOMEM;
+	dst_release(dst_orig);
+	return new ? new : ERR_PTR(-ENOMEM);
 }
-EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
 
 /*
  *	Destination cache support functions
@@ -880,9 +920,14 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
 
 	rt = (struct rt6_info *) dst;
 
-	if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
+	if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
+		if (rt->rt6i_peer_genid != rt6_peer_genid()) {
+			if (!rt->rt6i_peer)
+				rt6_bind_peer(rt, 0);
+			rt->rt6i_peer_genid = rt6_peer_genid();
+		}
 		return dst;
-
+	}
 	return NULL;
 }
 
@@ -933,7 +978,6 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
 			dst_metric_set(dst, RTAX_FEATURES, features);
 		}
 		dst_metric_set(dst, RTAX_MTU, mtu);
-		call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
 	}
 }
 
@@ -1030,11 +1074,9 @@ out:
 
 int icmp6_dst_gc(void)
 {
-	struct dst_entry *dst, *next, **pprev;
+	struct dst_entry *dst, **pprev;
 	int more = 0;
 
-	next = NULL;
-
 	spin_lock_bh(&icmp6_dst_lock);
 	pprev = &icmp6_dst_gc_list;
 
@@ -1982,12 +2024,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
 	if (IS_ERR(neigh)) {
 		dst_free(&rt->dst);
 
-		/* We are casting this because that is the return
-		 * value type.  But an errno encoded pointer is the
-		 * same regardless of the underlying pointer type,
-		 * and that's what we are returning.  So this is OK.
-		 */
-		return (struct rt6_info *) neigh;
+		return ERR_CAST(neigh);
 	}
 	rt->rt6i_nexthop = neigh;
 
@@ -2689,7 +2726,8 @@ static int __net_init ip6_route_net_init(struct net *net)
 	net->ipv6.ip6_null_entry->dst.path =
 		(struct dst_entry *)net->ipv6.ip6_null_entry;
 	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
-	dst_metric_set(&net->ipv6.ip6_null_entry->dst, RTAX_HOPLIMIT, 255);
+	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
+			 ip6_template_metrics, true);
 
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
 	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
@@ -2700,7 +2738,8 @@ static int __net_init ip6_route_net_init(struct net *net)
 	net->ipv6.ip6_prohibit_entry->dst.path =
 		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
 	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
-	dst_metric_set(&net->ipv6.ip6_prohibit_entry->dst, RTAX_HOPLIMIT, 255);
+	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
+			 ip6_template_metrics, true);
 
 	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
 					       sizeof(*net->ipv6.ip6_blk_hole_entry),
@@ -2710,7 +2749,8 @@ static int __net_init ip6_route_net_init(struct net *net)
 	net->ipv6.ip6_blk_hole_entry->dst.path =
 		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
 	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
-	dst_metric_set(&net->ipv6.ip6_blk_hole_entry->dst, RTAX_HOPLIMIT, 255);
+	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
+			 ip6_template_metrics, true);
 #endif
 
 	net->ipv6.sysctl.flush_delay = 0;
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index d2c16e1..3534cea 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -412,7 +412,7 @@ static void prl_list_destroy_rcu(struct rcu_head *head)
 
 	p = container_of(head, struct ip_tunnel_prl_entry, rcu_head);
 	do {
-		n = p->next;
+		n = rcu_dereference_protected(p->next, 1);
 		kfree(p);
 		p = n;
 	} while (p);
@@ -421,15 +421,17 @@ static void prl_list_destroy_rcu(struct rcu_head *head)
 static int
 ipip6_tunnel_del_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a)
 {
-	struct ip_tunnel_prl_entry *x, **p;
+	struct ip_tunnel_prl_entry *x;
+	struct ip_tunnel_prl_entry __rcu **p;
 	int err = 0;
 
 	ASSERT_RTNL();
 
 	if (a && a->addr != htonl(INADDR_ANY)) {
-		for (p = &t->prl; *p; p = &(*p)->next) {
-			if ((*p)->addr == a->addr) {
-				x = *p;
+		for (p = &t->prl;
+		     (x = rtnl_dereference(*p)) != NULL;
+		     p = &x->next) {
+			if (x->addr == a->addr) {
 				*p = x->next;
 				call_rcu(&x->rcu_head, prl_entry_destroy_rcu);
 				t->prl_count--;
@@ -438,9 +440,9 @@ ipip6_tunnel_del_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a)
 		}
 		err = -ENXIO;
 	} else {
-		if (t->prl) {
+		x = rtnl_dereference(t->prl);
+		if (x) {
 			t->prl_count = 0;
-			x = t->prl;
 			call_rcu(&x->rcu_head, prl_list_destroy_rcu);
 			t->prl = NULL;
 		}
@@ -736,7 +738,8 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
 				    .fl4_tos = RT_TOS(tos),
 				    .oif = tunnel->parms.link,
 				    .proto = IPPROTO_IPV6 };
-		if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
+		rt = ip_route_output_key(dev_net(dev), &fl);
+		if (IS_ERR(rt)) {
 			dev->stats.tx_carrier_errors++;
 			goto tx_error_icmp;
 		}
@@ -860,8 +863,9 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev)
 				    .fl4_tos = RT_TOS(iph->tos),
 				    .oif = tunnel->parms.link,
 				    .proto = IPPROTO_IPV6 };
-		struct rtable *rt;
-		if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
+		struct rtable *rt = ip_route_output_key(dev_net(dev), &fl);
+
+		if (!IS_ERR(rt)) {
 			tdev = rt->dst.dev;
 			ip_rt_put(rt);
 		}
@@ -1179,7 +1183,7 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev)
 	if (!dev->tstats)
 		return -ENOMEM;
 	dev_hold(dev);
-	sitn->tunnels_wc[0]	= tunnel;
+	rcu_assign_pointer(sitn->tunnels_wc[0], tunnel);
 	return 0;
 }
 
@@ -1196,11 +1200,12 @@ static void __net_exit sit_destroy_tunnels(struct sit_net *sitn, struct list_hea
 	for (prio = 1; prio < 4; prio++) {
 		int h;
 		for (h = 0; h < HASH_SIZE; h++) {
-			struct ip_tunnel *t = sitn->tunnels[prio][h];
+			struct ip_tunnel *t;
 
+			t = rtnl_dereference(sitn->tunnels[prio][h]);
 			while (t != NULL) {
 				unregister_netdevice_queue(t->dev, head);
-				t = t->next;
+				t = rtnl_dereference(t->next);
 			}
 		}
 	}
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 09fd34f..0b4cf35 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -243,12 +243,9 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 		fl.fl_ip_dport = inet_rsk(req)->rmt_port;
 		fl.fl_ip_sport = inet_sk(sk)->inet_sport;
 		security_req_classify_flow(req, &fl);
-		if (ip6_dst_lookup(sk, &dst, &fl))
-			goto out_free;
 
-		if (final_p)
-			ipv6_addr_copy(&fl.fl6_dst, final_p);
-		if ((xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0)
+		dst = ip6_dst_lookup_flow(sk, &fl, final_p, false);
+		if (IS_ERR(dst))
 			goto out_free;
 	}
 
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 20aa95e..e59a31c 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -255,18 +255,10 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 
 	security_sk_classify_flow(sk, &fl);
 
-	err = ip6_dst_lookup(sk, &dst, &fl);
-	if (err)
+	dst = ip6_dst_lookup_flow(sk, &fl, final_p, true);
+	if (IS_ERR(dst)) {
+		err = PTR_ERR(dst);
 		goto failure;
-	if (final_p)
-		ipv6_addr_copy(&fl.fl6_dst, final_p);
-
-	err = __xfrm_lookup(sock_net(sk), &dst, &fl, sk, XFRM_LOOKUP_WAIT);
-	if (err < 0) {
-		if (err == -EREMOTE)
-			err = ip6_dst_blackhole(sk, &dst, &fl);
-		if (err < 0)
-			goto failure;
 	}
 
 	if (saddr == NULL) {
@@ -385,7 +377,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	np = inet6_sk(sk);
 
 	if (type == ICMPV6_PKT_TOOBIG) {
-		struct dst_entry *dst = NULL;
+		struct dst_entry *dst;
 
 		if (sock_owned_by_user(sk))
 			goto out;
@@ -413,13 +405,9 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 			fl.fl_ip_sport = inet->inet_sport;
 			security_skb_classify_flow(skb, &fl);
 
-			if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
-				sk->sk_err_soft = -err;
-				goto out;
-			}
-
-			if ((err = xfrm_lookup(net, &dst, &fl, sk, 0)) < 0) {
-				sk->sk_err_soft = -err;
+			dst = ip6_dst_lookup_flow(sk, &fl, NULL, false);
+			if (IS_ERR(dst)) {
+				sk->sk_err_soft = -PTR_ERR(dst);
 				goto out;
 			}
 
@@ -496,7 +484,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
 	struct in6_addr * final_p, final;
 	struct flowi fl;
 	struct dst_entry *dst;
-	int err = -1;
+	int err;
 
 	memset(&fl, 0, sizeof(fl));
 	fl.proto = IPPROTO_TCP;
@@ -512,15 +500,13 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
 	opt = np->opt;
 	final_p = fl6_update_dst(&fl, opt, &final);
 
-	err = ip6_dst_lookup(sk, &dst, &fl);
-	if (err)
+	dst = ip6_dst_lookup_flow(sk, &fl, final_p, false);
+	if (IS_ERR(dst)) {
+		err = PTR_ERR(dst);
 		goto done;
-	if (final_p)
-		ipv6_addr_copy(&fl.fl6_dst, final_p);
-	if ((err = xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0)
-		goto done;
-
+	}
 	skb = tcp_make_synack(sk, dst, req, rvp);
+	err = -ENOMEM;
 	if (skb) {
 		__tcp_v6_send_check(skb, &treq->loc_addr, &treq->rmt_addr);
 
@@ -1079,15 +1065,14 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
 	 * Underlying function will use this to retrieve the network
 	 * namespace
 	 */
-	if (!ip6_dst_lookup(ctl_sk, &dst, &fl)) {
-		if (xfrm_lookup(net, &dst, &fl, NULL, 0) >= 0) {
-			skb_dst_set(buff, dst);
-			ip6_xmit(ctl_sk, buff, &fl, NULL);
-			TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
-			if (rst)
-				TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
-			return;
-		}
+	dst = ip6_dst_lookup_flow(ctl_sk, &fl, NULL, false);
+	if (!IS_ERR(dst)) {
+		skb_dst_set(buff, dst);
+		ip6_xmit(ctl_sk, buff, &fl, NULL);
+		TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
+		if (rst)
+			TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
+		return;
 	}
 
 	kfree_skb(buff);
@@ -1323,7 +1308,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 		    tcp_death_row.sysctl_tw_recycle &&
 		    (dst = inet6_csk_route_req(sk, req)) != NULL &&
 		    (peer = rt6_get_peer((struct rt6_info *)dst)) != NULL &&
-		    ipv6_addr_equal((struct in6_addr *)peer->daddr.a6,
+		    ipv6_addr_equal((struct in6_addr *)peer->daddr.addr.a6,
 				    &treq->rmt_addr)) {
 			inet_peer_refcheck(peer);
 			if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL &&
@@ -1636,10 +1621,8 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
 		opt_skb = skb_clone(skb, GFP_ATOMIC);
 
 	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
-		TCP_CHECK_TIMER(sk);
 		if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len))
 			goto reset;
-		TCP_CHECK_TIMER(sk);
 		if (opt_skb)
 			goto ipv6_pktoptions;
 		return 0;
@@ -1667,10 +1650,8 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
 		}
 	}
 
-	TCP_CHECK_TIMER(sk);
 	if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len))
 		goto reset;
-	TCP_CHECK_TIMER(sk);
 	if (opt_skb)
 		goto ipv6_pktoptions;
 	return 0;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 9a009c6..d86d7f6 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1125,18 +1125,11 @@ do_udp_sendmsg:
 
 	security_sk_classify_flow(sk, &fl);
 
-	err = ip6_sk_dst_lookup(sk, &dst, &fl);
-	if (err)
+	dst = ip6_sk_dst_lookup_flow(sk, &fl, final_p, true);
+	if (IS_ERR(dst)) {
+		err = PTR_ERR(dst);
+		dst = NULL;
 		goto out;
-	if (final_p)
-		ipv6_addr_copy(&fl.fl6_dst, final_p);
-
-	err = __xfrm_lookup(sock_net(sk), &dst, &fl, sk, XFRM_LOOKUP_WAIT);
-	if (err < 0) {
-		if (err == -EREMOTE)
-			err = ip6_dst_blackhole(sk, &dst, &fl);
-		if (err < 0)
-			goto out;
 	}
 
 	if (hlimit < 0) {
@@ -1299,7 +1292,7 @@ static int udp6_ufo_send_check(struct sk_buff *skb)
 	return 0;
 }
 
-static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, int features)
+static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, u32 features)
 {
 	struct sk_buff *segs = ERR_PTR(-EINVAL);
 	unsigned int mss;
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index da87428..48ce496 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -27,8 +27,8 @@
 static struct xfrm_policy_afinfo xfrm6_policy_afinfo;
 
 static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos,
-					  xfrm_address_t *saddr,
-					  xfrm_address_t *daddr)
+					  const xfrm_address_t *saddr,
+					  const xfrm_address_t *daddr)
 {
 	struct flowi fl = {};
 	struct dst_entry *dst;
@@ -67,7 +67,7 @@ static int xfrm6_get_saddr(struct net *net,
 	return 0;
 }
 
-static int xfrm6_get_tos(struct flowi *fl)
+static int xfrm6_get_tos(const struct flowi *fl)
 {
 	return 0;
 }
@@ -87,7 +87,7 @@ static int xfrm6_init_path(struct xfrm_dst *path, struct dst_entry *dst,
 }
 
 static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
-			  struct flowi *fl)
+			  const struct flowi *fl)
 {
 	struct rt6_info *rt = (struct rt6_info*)xdst->route;
 
@@ -220,6 +220,7 @@ static void xfrm6_dst_destroy(struct dst_entry *dst)
 
 	if (likely(xdst->u.rt6.rt6i_idev))
 		in6_dev_put(xdst->u.rt6.rt6i_idev);
+	dst_destroy_metrics_generic(dst);
 	if (likely(xdst->u.rt6.rt6i_peer))
 		inet_putpeer(xdst->u.rt6.rt6i_peer);
 	xfrm_dst_destroy(xdst);
@@ -257,6 +258,7 @@ static struct dst_ops xfrm6_dst_ops = {
 	.protocol =		cpu_to_be16(ETH_P_IPV6),
 	.gc =			xfrm6_garbage_collect,
 	.update_pmtu =		xfrm6_update_pmtu,
+	.cow_metrics =		dst_cow_metrics_generic,
 	.destroy =		xfrm6_dst_destroy,
 	.ifdown =		xfrm6_dst_ifdown,
 	.local_out =		__ip6_local_out,
@@ -272,6 +274,7 @@ static struct xfrm_policy_afinfo xfrm6_policy_afinfo = {
 	.get_tos =		xfrm6_get_tos,
 	.init_path =		xfrm6_init_path,
 	.fill_dst =		xfrm6_fill_dst,
+	.blackhole_route =	ip6_blackhole_route,
 };
 
 static int __init xfrm6_policy_init(void)
diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c
index a67575d..a02598e 100644
--- a/net/ipv6/xfrm6_state.c
+++ b/net/ipv6/xfrm6_state.c
@@ -20,7 +20,7 @@
 #include <net/addrconf.h>
 
 static void
-__xfrm6_init_tempsel(struct xfrm_selector *sel, struct flowi *fl)
+__xfrm6_init_tempsel(struct xfrm_selector *sel, const struct flowi *fl)
 {
 	/* Initialize temporary selector matching only
 	 * to current session. */
@@ -38,8 +38,8 @@ __xfrm6_init_tempsel(struct xfrm_selector *sel, struct flowi *fl)
 }
 
 static void
-xfrm6_init_temprop(struct xfrm_state *x, struct xfrm_tmpl *tmpl,
-		   xfrm_address_t *daddr, xfrm_address_t *saddr)
+xfrm6_init_temprop(struct xfrm_state *x, const struct xfrm_tmpl *tmpl,
+		   const xfrm_address_t *daddr, const xfrm_address_t *saddr)
 {
 	x->id = tmpl->id;
 	if (ipv6_addr_any((struct in6_addr*)&x->id.daddr))
diff --git a/net/key/af_key.c b/net/key/af_key.c
index d87c22d..7db86ff 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -70,7 +70,7 @@ static inline struct pfkey_sock *pfkey_sk(struct sock *sk)
 	return (struct pfkey_sock *)sk;
 }
 
-static int pfkey_can_dump(struct sock *sk)
+static int pfkey_can_dump(const struct sock *sk)
 {
 	if (3 * atomic_read(&sk->sk_rmem_alloc) <= 2 * sk->sk_rcvbuf)
 		return 1;
@@ -303,12 +303,13 @@ static int pfkey_do_dump(struct pfkey_sock *pfk)
 	return rc;
 }
 
-static inline void pfkey_hdr_dup(struct sadb_msg *new, struct sadb_msg *orig)
+static inline void pfkey_hdr_dup(struct sadb_msg *new,
+				 const struct sadb_msg *orig)
 {
 	*new = *orig;
 }
 
-static int pfkey_error(struct sadb_msg *orig, int err, struct sock *sk)
+static int pfkey_error(const struct sadb_msg *orig, int err, struct sock *sk)
 {
 	struct sk_buff *skb = alloc_skb(sizeof(struct sadb_msg) + 16, GFP_KERNEL);
 	struct sadb_msg *hdr;
@@ -369,13 +370,13 @@ static u8 sadb_ext_min_len[] = {
 };
 
 /* Verify sadb_address_{len,prefixlen} against sa_family.  */
-static int verify_address_len(void *p)
+static int verify_address_len(const void *p)
 {
-	struct sadb_address *sp = p;
-	struct sockaddr *addr = (struct sockaddr *)(sp + 1);
-	struct sockaddr_in *sin;
+	const struct sadb_address *sp = p;
+	const struct sockaddr *addr = (const struct sockaddr *)(sp + 1);
+	const struct sockaddr_in *sin;
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-	struct sockaddr_in6 *sin6;
+	const struct sockaddr_in6 *sin6;
 #endif
 	int len;
 
@@ -411,16 +412,16 @@ static int verify_address_len(void *p)
 	return 0;
 }
 
-static inline int pfkey_sec_ctx_len(struct sadb_x_sec_ctx *sec_ctx)
+static inline int pfkey_sec_ctx_len(const struct sadb_x_sec_ctx *sec_ctx)
 {
 	return DIV_ROUND_UP(sizeof(struct sadb_x_sec_ctx) +
 			    sec_ctx->sadb_x_ctx_len,
 			    sizeof(uint64_t));
 }
 
-static inline int verify_sec_ctx_len(void *p)
+static inline int verify_sec_ctx_len(const void *p)
 {
-	struct sadb_x_sec_ctx *sec_ctx = (struct sadb_x_sec_ctx *)p;
+	const struct sadb_x_sec_ctx *sec_ctx = p;
 	int len = sec_ctx->sadb_x_ctx_len;
 
 	if (len > PAGE_SIZE)
@@ -434,7 +435,7 @@ static inline int verify_sec_ctx_len(void *p)
 	return 0;
 }
 
-static inline struct xfrm_user_sec_ctx *pfkey_sadb2xfrm_user_sec_ctx(struct sadb_x_sec_ctx *sec_ctx)
+static inline struct xfrm_user_sec_ctx *pfkey_sadb2xfrm_user_sec_ctx(const struct sadb_x_sec_ctx *sec_ctx)
 {
 	struct xfrm_user_sec_ctx *uctx = NULL;
 	int ctx_size = sec_ctx->sadb_x_ctx_len;
@@ -455,16 +456,16 @@ static inline struct xfrm_user_sec_ctx *pfkey_sadb2xfrm_user_sec_ctx(struct sadb
 	return uctx;
 }
 
-static int present_and_same_family(struct sadb_address *src,
-				   struct sadb_address *dst)
+static int present_and_same_family(const struct sadb_address *src,
+				   const struct sadb_address *dst)
 {
-	struct sockaddr *s_addr, *d_addr;
+	const struct sockaddr *s_addr, *d_addr;
 
 	if (!src || !dst)
 		return 0;
 
-	s_addr = (struct sockaddr *)(src + 1);
-	d_addr = (struct sockaddr *)(dst + 1);
+	s_addr = (const struct sockaddr *)(src + 1);
+	d_addr = (const struct sockaddr *)(dst + 1);
 	if (s_addr->sa_family != d_addr->sa_family)
 		return 0;
 	if (s_addr->sa_family != AF_INET
@@ -477,15 +478,15 @@ static int present_and_same_family(struct sadb_address *src,
 	return 1;
 }
 
-static int parse_exthdrs(struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
+static int parse_exthdrs(struct sk_buff *skb, const struct sadb_msg *hdr, void **ext_hdrs)
 {
-	char *p = (char *) hdr;
+	const char *p = (char *) hdr;
 	int len = skb->len;
 
 	len -= sizeof(*hdr);
 	p += sizeof(*hdr);
 	while (len > 0) {
-		struct sadb_ext *ehdr = (struct sadb_ext *) p;
+		const struct sadb_ext *ehdr = (const struct sadb_ext *) p;
 		uint16_t ext_type;
 		int ext_len;
 
@@ -514,7 +515,7 @@ static int parse_exthdrs(struct sk_buff *skb, struct sadb_msg *hdr, void **ext_h
 				if (verify_sec_ctx_len(p))
 					return -EINVAL;
 			}
-			ext_hdrs[ext_type-1] = p;
+			ext_hdrs[ext_type-1] = (void *) p;
 		}
 		p   += ext_len;
 		len -= ext_len;
@@ -606,21 +607,21 @@ int pfkey_sockaddr_extract(const struct sockaddr *sa, xfrm_address_t *xaddr)
 }
 
 static
-int pfkey_sadb_addr2xfrm_addr(struct sadb_address *addr, xfrm_address_t *xaddr)
+int pfkey_sadb_addr2xfrm_addr(const struct sadb_address *addr, xfrm_address_t *xaddr)
 {
 	return pfkey_sockaddr_extract((struct sockaddr *)(addr + 1),
 				      xaddr);
 }
 
-static struct  xfrm_state *pfkey_xfrm_state_lookup(struct net *net, struct sadb_msg *hdr, void **ext_hdrs)
+static struct  xfrm_state *pfkey_xfrm_state_lookup(struct net *net, const struct sadb_msg *hdr, void * const *ext_hdrs)
 {
-	struct sadb_sa *sa;
-	struct sadb_address *addr;
+	const struct sadb_sa *sa;
+	const struct sadb_address *addr;
 	uint16_t proto;
 	unsigned short family;
 	xfrm_address_t *xaddr;
 
-	sa = (struct sadb_sa *) ext_hdrs[SADB_EXT_SA-1];
+	sa = (const struct sadb_sa *) ext_hdrs[SADB_EXT_SA-1];
 	if (sa == NULL)
 		return NULL;
 
@@ -629,18 +630,18 @@ static struct  xfrm_state *pfkey_xfrm_state_lookup(struct net *net, struct sadb_
 		return NULL;
 
 	/* sadb_address_len should be checked by caller */
-	addr = (struct sadb_address *) ext_hdrs[SADB_EXT_ADDRESS_DST-1];
+	addr = (const struct sadb_address *) ext_hdrs[SADB_EXT_ADDRESS_DST-1];
 	if (addr == NULL)
 		return NULL;
 
-	family = ((struct sockaddr *)(addr + 1))->sa_family;
+	family = ((const struct sockaddr *)(addr + 1))->sa_family;
 	switch (family) {
 	case AF_INET:
-		xaddr = (xfrm_address_t *)&((struct sockaddr_in *)(addr + 1))->sin_addr;
+		xaddr = (xfrm_address_t *)&((const struct sockaddr_in *)(addr + 1))->sin_addr;
 		break;
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 	case AF_INET6:
-		xaddr = (xfrm_address_t *)&((struct sockaddr_in6 *)(addr + 1))->sin6_addr;
+		xaddr = (xfrm_address_t *)&((const struct sockaddr_in6 *)(addr + 1))->sin6_addr;
 		break;
 #endif
 	default:
@@ -690,9 +691,9 @@ static inline int pfkey_mode_to_xfrm(int mode)
 	}
 }
 
-static unsigned int pfkey_sockaddr_fill(xfrm_address_t *xaddr, __be16 port,
-				       struct sockaddr *sa,
-				       unsigned short family)
+static unsigned int pfkey_sockaddr_fill(const xfrm_address_t *xaddr, __be16 port,
+					struct sockaddr *sa,
+					unsigned short family)
 {
 	switch (family) {
 	case AF_INET:
@@ -720,7 +721,7 @@ static unsigned int pfkey_sockaddr_fill(xfrm_address_t *xaddr, __be16 port,
 	return 0;
 }
 
-static struct sk_buff *__pfkey_xfrm_state2msg(struct xfrm_state *x,
+static struct sk_buff *__pfkey_xfrm_state2msg(const struct xfrm_state *x,
 					      int add_keys, int hsc)
 {
 	struct sk_buff *skb;
@@ -1010,7 +1011,7 @@ static struct sk_buff *__pfkey_xfrm_state2msg(struct xfrm_state *x,
 }
 
 
-static inline struct sk_buff *pfkey_xfrm_state2msg(struct xfrm_state *x)
+static inline struct sk_buff *pfkey_xfrm_state2msg(const struct xfrm_state *x)
 {
 	struct sk_buff *skb;
 
@@ -1019,26 +1020,26 @@ static inline struct sk_buff *pfkey_xfrm_state2msg(struct xfrm_state *x)
 	return skb;
 }
 
-static inline struct sk_buff *pfkey_xfrm_state2msg_expire(struct xfrm_state *x,
+static inline struct sk_buff *pfkey_xfrm_state2msg_expire(const struct xfrm_state *x,
 							  int hsc)
 {
 	return __pfkey_xfrm_state2msg(x, 0, hsc);
 }
 
 static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
-						struct sadb_msg *hdr,
-						void **ext_hdrs)
+						const struct sadb_msg *hdr,
+						void * const *ext_hdrs)
 {
 	struct xfrm_state *x;
-	struct sadb_lifetime *lifetime;
-	struct sadb_sa *sa;
-	struct sadb_key *key;
-	struct sadb_x_sec_ctx *sec_ctx;
+	const struct sadb_lifetime *lifetime;
+	const struct sadb_sa *sa;
+	const struct sadb_key *key;
+	const struct sadb_x_sec_ctx *sec_ctx;
 	uint16_t proto;
 	int err;
 
 
-	sa = (struct sadb_sa *) ext_hdrs[SADB_EXT_SA-1];
+	sa = (const struct sadb_sa *) ext_hdrs[SADB_EXT_SA-1];
 	if (!sa ||
 	    !present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1],
 				     ext_hdrs[SADB_EXT_ADDRESS_DST-1]))
@@ -1077,7 +1078,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
 	     sa->sadb_sa_encrypt > SADB_X_CALG_MAX) ||
 	    sa->sadb_sa_encrypt > SADB_EALG_MAX)
 		return ERR_PTR(-EINVAL);
-	key = (struct sadb_key*) ext_hdrs[SADB_EXT_KEY_AUTH-1];
+	key = (const struct sadb_key*) ext_hdrs[SADB_EXT_KEY_AUTH-1];
 	if (key != NULL &&
 	    sa->sadb_sa_auth != SADB_X_AALG_NULL &&
 	    ((key->sadb_key_bits+7) / 8 == 0 ||
@@ -1104,14 +1105,14 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
 	if (sa->sadb_sa_flags & SADB_SAFLAGS_NOPMTUDISC)
 		x->props.flags |= XFRM_STATE_NOPMTUDISC;
 
-	lifetime = (struct sadb_lifetime*) ext_hdrs[SADB_EXT_LIFETIME_HARD-1];
+	lifetime = (const struct sadb_lifetime*) ext_hdrs[SADB_EXT_LIFETIME_HARD-1];
 	if (lifetime != NULL) {
 		x->lft.hard_packet_limit = _KEY2X(lifetime->sadb_lifetime_allocations);
 		x->lft.hard_byte_limit = _KEY2X(lifetime->sadb_lifetime_bytes);
 		x->lft.hard_add_expires_seconds = lifetime->sadb_lifetime_addtime;
 		x->lft.hard_use_expires_seconds = lifetime->sadb_lifetime_usetime;
 	}
-	lifetime = (struct sadb_lifetime*) ext_hdrs[SADB_EXT_LIFETIME_SOFT-1];
+	lifetime = (const struct sadb_lifetime*) ext_hdrs[SADB_EXT_LIFETIME_SOFT-1];
 	if (lifetime != NULL) {
 		x->lft.soft_packet_limit = _KEY2X(lifetime->sadb_lifetime_allocations);
 		x->lft.soft_byte_limit = _KEY2X(lifetime->sadb_lifetime_bytes);
@@ -1119,7 +1120,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
 		x->lft.soft_use_expires_seconds = lifetime->sadb_lifetime_usetime;
 	}
 
-	sec_ctx = (struct sadb_x_sec_ctx *) ext_hdrs[SADB_X_EXT_SEC_CTX-1];
+	sec_ctx = (const struct sadb_x_sec_ctx *) ext_hdrs[SADB_X_EXT_SEC_CTX-1];
 	if (sec_ctx != NULL) {
 		struct xfrm_user_sec_ctx *uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx);
 
@@ -1133,7 +1134,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
 			goto out;
 	}
 
-	key = (struct sadb_key*) ext_hdrs[SADB_EXT_KEY_AUTH-1];
+	key = (const struct sadb_key*) ext_hdrs[SADB_EXT_KEY_AUTH-1];
 	if (sa->sadb_sa_auth) {
 		int keysize = 0;
 		struct xfrm_algo_desc *a = xfrm_aalg_get_byid(sa->sadb_sa_auth);
@@ -1202,7 +1203,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
 				  &x->id.daddr);
 
 	if (ext_hdrs[SADB_X_EXT_SA2-1]) {
-		struct sadb_x_sa2 *sa2 = (void*)ext_hdrs[SADB_X_EXT_SA2-1];
+		const struct sadb_x_sa2 *sa2 = ext_hdrs[SADB_X_EXT_SA2-1];
 		int mode = pfkey_mode_to_xfrm(sa2->sadb_x_sa2_mode);
 		if (mode < 0) {
 			err = -EINVAL;
@@ -1213,7 +1214,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
 	}
 
 	if (ext_hdrs[SADB_EXT_ADDRESS_PROXY-1]) {
-		struct sadb_address *addr = ext_hdrs[SADB_EXT_ADDRESS_PROXY-1];
+		const struct sadb_address *addr = ext_hdrs[SADB_EXT_ADDRESS_PROXY-1];
 
 		/* Nobody uses this, but we try. */
 		x->sel.family = pfkey_sadb_addr2xfrm_addr(addr, &x->sel.saddr);
@@ -1224,7 +1225,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
 		x->sel.family = x->props.family;
 
 	if (ext_hdrs[SADB_X_EXT_NAT_T_TYPE-1]) {
-		struct sadb_x_nat_t_type* n_type;
+		const struct sadb_x_nat_t_type* n_type;
 		struct xfrm_encap_tmpl *natt;
 
 		x->encap = kmalloc(sizeof(*x->encap), GFP_KERNEL);
@@ -1236,12 +1237,12 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
 		natt->encap_type = n_type->sadb_x_nat_t_type_type;
 
 		if (ext_hdrs[SADB_X_EXT_NAT_T_SPORT-1]) {
-			struct sadb_x_nat_t_port* n_port =
+			const struct sadb_x_nat_t_port *n_port =
 				ext_hdrs[SADB_X_EXT_NAT_T_SPORT-1];
 			natt->encap_sport = n_port->sadb_x_nat_t_port_port;
 		}
 		if (ext_hdrs[SADB_X_EXT_NAT_T_DPORT-1]) {
-			struct sadb_x_nat_t_port* n_port =
+			const struct sadb_x_nat_t_port *n_port =
 				ext_hdrs[SADB_X_EXT_NAT_T_DPORT-1];
 			natt->encap_dport = n_port->sadb_x_nat_t_port_port;
 		}
@@ -1261,12 +1262,12 @@ out:
 	return ERR_PTR(err);
 }
 
-static int pfkey_reserved(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
+static int pfkey_reserved(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs)
 {
 	return -EOPNOTSUPP;
 }
 
-static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
+static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs)
 {
 	struct net *net = sock_net(sk);
 	struct sk_buff *resp_skb;
@@ -1365,7 +1366,7 @@ static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h
 	return 0;
 }
 
-static int pfkey_acquire(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
+static int pfkey_acquire(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs)
 {
 	struct net *net = sock_net(sk);
 	struct xfrm_state *x;
@@ -1429,7 +1430,7 @@ static inline int event2keytype(int event)
 }
 
 /* ADD/UPD/DEL */
-static int key_notify_sa(struct xfrm_state *x, struct km_event *c)
+static int key_notify_sa(struct xfrm_state *x, const struct km_event *c)
 {
 	struct sk_buff *skb;
 	struct sadb_msg *hdr;
@@ -1453,7 +1454,7 @@ static int key_notify_sa(struct xfrm_state *x, struct km_event *c)
 	return 0;
 }
 
-static int pfkey_add(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
+static int pfkey_add(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs)
 {
 	struct net *net = sock_net(sk);
 	struct xfrm_state *x;
@@ -1492,7 +1493,7 @@ out:
 	return err;
 }
 
-static int pfkey_delete(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
+static int pfkey_delete(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs)
 {
 	struct net *net = sock_net(sk);
 	struct xfrm_state *x;
@@ -1534,7 +1535,7 @@ out:
 	return err;
 }
 
-static int pfkey_get(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
+static int pfkey_get(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs)
 {
 	struct net *net = sock_net(sk);
 	__u8 proto;
@@ -1570,7 +1571,7 @@ static int pfkey_get(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr,
 	return 0;
 }
 
-static struct sk_buff *compose_sadb_supported(struct sadb_msg *orig,
+static struct sk_buff *compose_sadb_supported(const struct sadb_msg *orig,
 					      gfp_t allocation)
 {
 	struct sk_buff *skb;
@@ -1642,7 +1643,7 @@ out_put_algs:
 	return skb;
 }
 
-static int pfkey_register(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
+static int pfkey_register(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs)
 {
 	struct pfkey_sock *pfk = pfkey_sk(sk);
 	struct sk_buff *supp_skb;
@@ -1671,7 +1672,7 @@ static int pfkey_register(struct sock *sk, struct sk_buff *skb, struct sadb_msg
 	return 0;
 }
 
-static int unicast_flush_resp(struct sock *sk, struct sadb_msg *ihdr)
+static int unicast_flush_resp(struct sock *sk, const struct sadb_msg *ihdr)
 {
 	struct sk_buff *skb;
 	struct sadb_msg *hdr;
@@ -1688,7 +1689,7 @@ static int unicast_flush_resp(struct sock *sk, struct sadb_msg *ihdr)
 	return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ONE, sk, sock_net(sk));
 }
 
-static int key_notify_sa_flush(struct km_event *c)
+static int key_notify_sa_flush(const struct km_event *c)
 {
 	struct sk_buff *skb;
 	struct sadb_msg *hdr;
@@ -1710,7 +1711,7 @@ static int key_notify_sa_flush(struct km_event *c)
 	return 0;
 }
 
-static int pfkey_flush(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
+static int pfkey_flush(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs)
 {
 	struct net *net = sock_net(sk);
 	unsigned proto;
@@ -1784,7 +1785,7 @@ static void pfkey_dump_sa_done(struct pfkey_sock *pfk)
 	xfrm_state_walk_done(&pfk->dump.u.state);
 }
 
-static int pfkey_dump(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
+static int pfkey_dump(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs)
 {
 	u8 proto;
 	struct pfkey_sock *pfk = pfkey_sk(sk);
@@ -1805,19 +1806,29 @@ static int pfkey_dump(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr
 	return pfkey_do_dump(pfk);
 }
 
-static int pfkey_promisc(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
+static int pfkey_promisc(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs)
 {
 	struct pfkey_sock *pfk = pfkey_sk(sk);
 	int satype = hdr->sadb_msg_satype;
+	bool reset_errno = false;
 
 	if (hdr->sadb_msg_len == (sizeof(*hdr) / sizeof(uint64_t))) {
-		/* XXX we mangle packet... */
-		hdr->sadb_msg_errno = 0;
+		reset_errno = true;
 		if (satype != 0 && satype != 1)
 			return -EINVAL;
 		pfk->promisc = satype;
 	}
-	pfkey_broadcast(skb_clone(skb, GFP_KERNEL), GFP_KERNEL, BROADCAST_ALL, NULL, sock_net(sk));
+	if (reset_errno && skb_cloned(skb))
+		skb = skb_copy(skb, GFP_KERNEL);
+	else
+		skb = skb_clone(skb, GFP_KERNEL);
+
+	if (reset_errno && skb) {
+		struct sadb_msg *new_hdr = (struct sadb_msg *) skb->data;
+		new_hdr->sadb_msg_errno = 0;
+	}
+
+	pfkey_broadcast(skb, GFP_KERNEL, BROADCAST_ALL, NULL, sock_net(sk));
 	return 0;
 }
 
@@ -1921,7 +1932,7 @@ parse_ipsecrequests(struct xfrm_policy *xp, struct sadb_x_policy *pol)
 	return 0;
 }
 
-static inline int pfkey_xfrm_policy2sec_ctx_size(struct xfrm_policy *xp)
+static inline int pfkey_xfrm_policy2sec_ctx_size(const struct xfrm_policy *xp)
 {
   struct xfrm_sec_ctx *xfrm_ctx = xp->security;
 
@@ -1933,9 +1944,9 @@ static inline int pfkey_xfrm_policy2sec_ctx_size(struct xfrm_policy *xp)
 	return 0;
 }
 
-static int pfkey_xfrm_policy2msg_size(struct xfrm_policy *xp)
+static int pfkey_xfrm_policy2msg_size(const struct xfrm_policy *xp)
 {
-	struct xfrm_tmpl *t;
+	const struct xfrm_tmpl *t;
 	int sockaddr_size = pfkey_sockaddr_size(xp->family);
 	int socklen = 0;
 	int i;
@@ -1955,7 +1966,7 @@ static int pfkey_xfrm_policy2msg_size(struct xfrm_policy *xp)
 		pfkey_xfrm_policy2sec_ctx_size(xp);
 }
 
-static struct sk_buff * pfkey_xfrm_policy2msg_prep(struct xfrm_policy *xp)
+static struct sk_buff * pfkey_xfrm_policy2msg_prep(const struct xfrm_policy *xp)
 {
 	struct sk_buff *skb;
 	int size;
@@ -1969,7 +1980,7 @@ static struct sk_buff * pfkey_xfrm_policy2msg_prep(struct xfrm_policy *xp)
 	return skb;
 }
 
-static int pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, int dir)
+static int pfkey_xfrm_policy2msg(struct sk_buff *skb, const struct xfrm_policy *xp, int dir)
 {
 	struct sadb_msg *hdr;
 	struct sadb_address *addr;
@@ -2065,8 +2076,8 @@ static int pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, in
 	pol->sadb_x_policy_priority = xp->priority;
 
 	for (i=0; i<xp->xfrm_nr; i++) {
+		const struct xfrm_tmpl *t = xp->xfrm_vec + i;
 		struct sadb_x_ipsecrequest *rq;
-		struct xfrm_tmpl *t = xp->xfrm_vec + i;
 		int req_size;
 		int mode;
 
@@ -2123,7 +2134,7 @@ static int pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, in
 	return 0;
 }
 
-static int key_notify_policy(struct xfrm_policy *xp, int dir, struct km_event *c)
+static int key_notify_policy(struct xfrm_policy *xp, int dir, const struct km_event *c)
 {
 	struct sk_buff *out_skb;
 	struct sadb_msg *out_hdr;
@@ -2152,7 +2163,7 @@ static int key_notify_policy(struct xfrm_policy *xp, int dir, struct km_event *c
 
 }
 
-static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
+static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs)
 {
 	struct net *net = sock_net(sk);
 	int err = 0;
@@ -2273,7 +2284,7 @@ out:
 	return err;
 }
 
-static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
+static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs)
 {
 	struct net *net = sock_net(sk);
 	int err;
@@ -2350,7 +2361,7 @@ out:
 	return err;
 }
 
-static int key_pol_get_resp(struct sock *sk, struct xfrm_policy *xp, struct sadb_msg *hdr, int dir)
+static int key_pol_get_resp(struct sock *sk, struct xfrm_policy *xp, const struct sadb_msg *hdr, int dir)
 {
 	int err;
 	struct sk_buff *out_skb;
@@ -2458,7 +2469,7 @@ static int ipsecrequests_to_migrate(struct sadb_x_ipsecrequest *rq1, int len,
 }
 
 static int pfkey_migrate(struct sock *sk, struct sk_buff *skb,
-			 struct sadb_msg *hdr, void **ext_hdrs)
+			 const struct sadb_msg *hdr, void * const *ext_hdrs)
 {
 	int i, len, ret, err = -EINVAL;
 	u8 dir;
@@ -2549,14 +2560,14 @@ static int pfkey_migrate(struct sock *sk, struct sk_buff *skb,
 }
 #else
 static int pfkey_migrate(struct sock *sk, struct sk_buff *skb,
-			 struct sadb_msg *hdr, void **ext_hdrs)
+			 const struct sadb_msg *hdr, void * const *ext_hdrs)
 {
 	return -ENOPROTOOPT;
 }
 #endif
 
 
-static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
+static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs)
 {
 	struct net *net = sock_net(sk);
 	unsigned int dir;
@@ -2644,7 +2655,7 @@ static void pfkey_dump_sp_done(struct pfkey_sock *pfk)
 	xfrm_policy_walk_done(&pfk->dump.u.policy);
 }
 
-static int pfkey_spddump(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
+static int pfkey_spddump(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs)
 {
 	struct pfkey_sock *pfk = pfkey_sk(sk);
 
@@ -2660,7 +2671,7 @@ static int pfkey_spddump(struct sock *sk, struct sk_buff *skb, struct sadb_msg *
 	return pfkey_do_dump(pfk);
 }
 
-static int key_notify_policy_flush(struct km_event *c)
+static int key_notify_policy_flush(const struct km_event *c)
 {
 	struct sk_buff *skb_out;
 	struct sadb_msg *hdr;
@@ -2680,7 +2691,7 @@ static int key_notify_policy_flush(struct km_event *c)
 
 }
 
-static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
+static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs)
 {
 	struct net *net = sock_net(sk);
 	struct km_event c;
@@ -2709,7 +2720,7 @@ static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, struct sadb_msg
 }
 
 typedef int (*pfkey_handler)(struct sock *sk, struct sk_buff *skb,
-			     struct sadb_msg *hdr, void **ext_hdrs);
+			     const struct sadb_msg *hdr, void * const *ext_hdrs);
 static pfkey_handler pfkey_funcs[SADB_MAX + 1] = {
 	[SADB_RESERVED]		= pfkey_reserved,
 	[SADB_GETSPI]		= pfkey_getspi,
@@ -2736,7 +2747,7 @@ static pfkey_handler pfkey_funcs[SADB_MAX + 1] = {
 	[SADB_X_MIGRATE]	= pfkey_migrate,
 };
 
-static int pfkey_process(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr)
+static int pfkey_process(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr)
 {
 	void *ext_hdrs[SADB_EXT_MAX];
 	int err;
@@ -2781,7 +2792,8 @@ static struct sadb_msg *pfkey_get_base_msg(struct sk_buff *skb, int *errp)
 	return hdr;
 }
 
-static inline int aalg_tmpl_set(struct xfrm_tmpl *t, struct xfrm_algo_desc *d)
+static inline int aalg_tmpl_set(const struct xfrm_tmpl *t,
+				const struct xfrm_algo_desc *d)
 {
 	unsigned int id = d->desc.sadb_alg_id;
 
@@ -2791,7 +2803,8 @@ static inline int aalg_tmpl_set(struct xfrm_tmpl *t, struct xfrm_algo_desc *d)
 	return (t->aalgos >> id) & 1;
 }
 
-static inline int ealg_tmpl_set(struct xfrm_tmpl *t, struct xfrm_algo_desc *d)
+static inline int ealg_tmpl_set(const struct xfrm_tmpl *t,
+				const struct xfrm_algo_desc *d)
 {
 	unsigned int id = d->desc.sadb_alg_id;
 
@@ -2801,12 +2814,12 @@ static inline int ealg_tmpl_set(struct xfrm_tmpl *t, struct xfrm_algo_desc *d)
 	return (t->ealgos >> id) & 1;
 }
 
-static int count_ah_combs(struct xfrm_tmpl *t)
+static int count_ah_combs(const struct xfrm_tmpl *t)
 {
 	int i, sz = 0;
 
 	for (i = 0; ; i++) {
-		struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(i);
+		const struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(i);
 		if (!aalg)
 			break;
 		if (aalg_tmpl_set(t, aalg) && aalg->available)
@@ -2815,12 +2828,12 @@ static int count_ah_combs(struct xfrm_tmpl *t)
 	return sz + sizeof(struct sadb_prop);
 }
 
-static int count_esp_combs(struct xfrm_tmpl *t)
+static int count_esp_combs(const struct xfrm_tmpl *t)
 {
 	int i, k, sz = 0;
 
 	for (i = 0; ; i++) {
-		struct xfrm_algo_desc *ealg = xfrm_ealg_get_byidx(i);
+		const struct xfrm_algo_desc *ealg = xfrm_ealg_get_byidx(i);
 		if (!ealg)
 			break;
 
@@ -2828,7 +2841,7 @@ static int count_esp_combs(struct xfrm_tmpl *t)
 			continue;
 
 		for (k = 1; ; k++) {
-			struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(k);
+			const struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(k);
 			if (!aalg)
 				break;
 
@@ -2839,7 +2852,7 @@ static int count_esp_combs(struct xfrm_tmpl *t)
 	return sz + sizeof(struct sadb_prop);
 }
 
-static void dump_ah_combs(struct sk_buff *skb, struct xfrm_tmpl *t)
+static void dump_ah_combs(struct sk_buff *skb, const struct xfrm_tmpl *t)
 {
 	struct sadb_prop *p;
 	int i;
@@ -2851,7 +2864,7 @@ static void dump_ah_combs(struct sk_buff *skb, struct xfrm_tmpl *t)
 	memset(p->sadb_prop_reserved, 0, sizeof(p->sadb_prop_reserved));
 
 	for (i = 0; ; i++) {
-		struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(i);
+		const struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(i);
 		if (!aalg)
 			break;
 
@@ -2871,7 +2884,7 @@ static void dump_ah_combs(struct sk_buff *skb, struct xfrm_tmpl *t)
 	}
 }
 
-static void dump_esp_combs(struct sk_buff *skb, struct xfrm_tmpl *t)
+static void dump_esp_combs(struct sk_buff *skb, const struct xfrm_tmpl *t)
 {
 	struct sadb_prop *p;
 	int i, k;
@@ -2883,7 +2896,7 @@ static void dump_esp_combs(struct sk_buff *skb, struct xfrm_tmpl *t)
 	memset(p->sadb_prop_reserved, 0, sizeof(p->sadb_prop_reserved));
 
 	for (i=0; ; i++) {
-		struct xfrm_algo_desc *ealg = xfrm_ealg_get_byidx(i);
+		const struct xfrm_algo_desc *ealg = xfrm_ealg_get_byidx(i);
 		if (!ealg)
 			break;
 
@@ -2892,7 +2905,7 @@ static void dump_esp_combs(struct sk_buff *skb, struct xfrm_tmpl *t)
 
 		for (k = 1; ; k++) {
 			struct sadb_comb *c;
-			struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(k);
+			const struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(k);
 			if (!aalg)
 				break;
 			if (!(aalg_tmpl_set(t, aalg) && aalg->available))
@@ -2914,12 +2927,12 @@ static void dump_esp_combs(struct sk_buff *skb, struct xfrm_tmpl *t)
 	}
 }
 
-static int key_notify_policy_expire(struct xfrm_policy *xp, struct km_event *c)
+static int key_notify_policy_expire(struct xfrm_policy *xp, const struct km_event *c)
 {
 	return 0;
 }
 
-static int key_notify_sa_expire(struct xfrm_state *x, struct km_event *c)
+static int key_notify_sa_expire(struct xfrm_state *x, const struct km_event *c)
 {
 	struct sk_buff *out_skb;
 	struct sadb_msg *out_hdr;
@@ -2949,7 +2962,7 @@ static int key_notify_sa_expire(struct xfrm_state *x, struct km_event *c)
 	return 0;
 }
 
-static int pfkey_send_notify(struct xfrm_state *x, struct km_event *c)
+static int pfkey_send_notify(struct xfrm_state *x, const struct km_event *c)
 {
 	struct net *net = x ? xs_net(x) : c->net;
 	struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id);
@@ -2976,7 +2989,7 @@ static int pfkey_send_notify(struct xfrm_state *x, struct km_event *c)
 	return 0;
 }
 
-static int pfkey_send_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c)
+static int pfkey_send_policy_notify(struct xfrm_policy *xp, int dir, const struct km_event *c)
 {
 	if (xp && xp->type != XFRM_POLICY_TYPE_MAIN)
 		return 0;
@@ -3318,7 +3331,7 @@ static int pfkey_send_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr,
 
 #ifdef CONFIG_NET_KEY_MIGRATE
 static int set_sadb_address(struct sk_buff *skb, int sasize, int type,
-			    struct xfrm_selector *sel)
+			    const struct xfrm_selector *sel)
 {
 	struct sadb_address *addr;
 	addr = (struct sadb_address *)skb_put(skb, sizeof(struct sadb_address) + sasize);
@@ -3348,7 +3361,7 @@ static int set_sadb_address(struct sk_buff *skb, int sasize, int type,
 }
 
 
-static int set_sadb_kmaddress(struct sk_buff *skb, struct xfrm_kmaddress *k)
+static int set_sadb_kmaddress(struct sk_buff *skb, const struct xfrm_kmaddress *k)
 {
 	struct sadb_x_kmaddress *kma;
 	u8 *sa;
@@ -3376,7 +3389,7 @@ static int set_sadb_kmaddress(struct sk_buff *skb, struct xfrm_kmaddress *k)
 static int set_ipsecrequest(struct sk_buff *skb,
 			    uint8_t proto, uint8_t mode, int level,
 			    uint32_t reqid, uint8_t family,
-			    xfrm_address_t *src, xfrm_address_t *dst)
+			    const xfrm_address_t *src, const xfrm_address_t *dst)
 {
 	struct sadb_x_ipsecrequest *rq;
 	u8 *sa;
@@ -3404,9 +3417,9 @@ static int set_ipsecrequest(struct sk_buff *skb,
 #endif
 
 #ifdef CONFIG_NET_KEY_MIGRATE
-static int pfkey_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type,
-			      struct xfrm_migrate *m, int num_bundles,
-			      struct xfrm_kmaddress *k)
+static int pfkey_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
+			      const struct xfrm_migrate *m, int num_bundles,
+			      const struct xfrm_kmaddress *k)
 {
 	int i;
 	int sasize_sel;
@@ -3415,7 +3428,7 @@ static int pfkey_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type,
 	struct sk_buff *skb;
 	struct sadb_msg *hdr;
 	struct sadb_x_policy *pol;
-	struct xfrm_migrate *mp;
+	const struct xfrm_migrate *mp;
 
 	if (type != XFRM_POLICY_TYPE_MAIN)
 		return 0;
@@ -3513,9 +3526,9 @@ err:
 	return -EINVAL;
 }
 #else
-static int pfkey_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type,
-			      struct xfrm_migrate *m, int num_bundles,
-			      struct xfrm_kmaddress *k)
+static int pfkey_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
+			      const struct xfrm_migrate *m, int num_bundles,
+			      const struct xfrm_kmaddress *k)
 {
 	return -ENOPROTOOPT;
 }
@@ -3655,6 +3668,7 @@ static int pfkey_seq_show(struct seq_file *f, void *v)
 }
 
 static void *pfkey_seq_start(struct seq_file *f, loff_t *ppos)
+	__acquires(rcu)
 {
 	struct net *net = seq_file_net(f);
 	struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id);
@@ -3672,6 +3686,7 @@ static void *pfkey_seq_next(struct seq_file *f, void *v, loff_t *ppos)
 }
 
 static void pfkey_seq_stop(struct seq_file *f, void *v)
+	__releases(rcu)
 {
 	rcu_read_unlock();
 }
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 110efb7..2a698ff 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -320,11 +320,12 @@ static int l2tp_ip_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len
 	if (ipv4_is_multicast(lsa->l2tp_addr.s_addr))
 		goto out;
 
-	rc = ip_route_connect(&rt, lsa->l2tp_addr.s_addr, saddr,
+	rt = ip_route_connect(lsa->l2tp_addr.s_addr, saddr,
 			      RT_CONN_FLAGS(sk), oif,
 			      IPPROTO_L2TP,
-			      0, 0, sk, 1);
-	if (rc) {
+			      0, 0, sk, true);
+	if (IS_ERR(rt)) {
+		rc = PTR_ERR(rt);
 		if (rc == -ENETUNREACH)
 			IP_INC_STATS_BH(&init_net, IPSTATS_MIB_OUTNOROUTES);
 		goto out;
@@ -489,7 +490,8 @@ static int l2tp_ip_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m
 			 * itself out.
 			 */
 			security_sk_classify_flow(sk, &fl);
-			if (ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0))
+			rt = ip_route_output_flow(sock_net(sk), &fl, sk);
+			if (IS_ERR(rt))
 				goto no_route;
 		}
 		sk_setup_caps(sk, &rt->dst);
diff --git a/net/llc/llc_input.c b/net/llc/llc_input.c
index f996874..058f1e9 100644
--- a/net/llc/llc_input.c
+++ b/net/llc/llc_input.c
@@ -181,25 +181,26 @@ int llc_rcv(struct sk_buff *skb, struct net_device *dev,
 	 * LLC functionality
 	 */
 	rcv = rcu_dereference(sap->rcv_func);
-	if (rcv) {
-		struct sk_buff *cskb = skb_clone(skb, GFP_ATOMIC);
-		if (cskb)
-			rcv(cskb, dev, pt, orig_dev);
-	}
 	dest = llc_pdu_type(skb);
-	if (unlikely(!dest || !llc_type_handlers[dest - 1]))
-		goto drop_put;
-	llc_type_handlers[dest - 1](sap, skb);
-out_put:
+	if (unlikely(!dest || !llc_type_handlers[dest - 1])) {
+		if (rcv)
+			rcv(skb, dev, pt, orig_dev);
+		else
+			kfree_skb(skb);
+	} else {
+		if (rcv) {
+			struct sk_buff *cskb = skb_clone(skb, GFP_ATOMIC);
+			if (cskb)
+				rcv(cskb, dev, pt, orig_dev);
+		}
+		llc_type_handlers[dest - 1](sap, skb);
+	}
 	llc_sap_put(sap);
 out:
 	return 0;
 drop:
 	kfree_skb(skb);
 	goto out;
-drop_put:
-	kfree_skb(skb);
-	goto out_put;
 handle_station:
 	if (!llc_station_handler)
 		goto drop;
diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig
index c766056..513f85c 100644
--- a/net/mac80211/Kconfig
+++ b/net/mac80211/Kconfig
@@ -17,7 +17,7 @@ comment "CFG80211 needs to be enabled for MAC80211"
 if MAC80211 != n
 
 config MAC80211_HAS_RC
-	def_bool n
+	bool
 
 config MAC80211_RC_PID
 	bool "PID controller based rate control algorithm" if EXPERT
@@ -78,7 +78,7 @@ config MAC80211_RC_DEFAULT
 endif
 
 comment "Some wireless drivers require a rate control algorithm"
-	depends on MAC80211_HAS_RC=n
+	depends on MAC80211 && MAC80211_HAS_RC=n
 
 config MAC80211_MESH
 	bool "Enable mac80211 mesh networking (pre-802.11s) support"
diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index 227ca82..0c9d0c0 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -76,7 +76,7 @@ void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
 #endif /* CONFIG_MAC80211_HT_DEBUG */
 
 	if (drv_ampdu_action(local, sta->sdata, IEEE80211_AMPDU_RX_STOP,
-			     &sta->sta, tid, NULL))
+			     &sta->sta, tid, NULL, 0))
 		printk(KERN_DEBUG "HW problem - can not stop rx "
 				"aggregation for tid %d\n", tid);
 
@@ -232,6 +232,9 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
 	if (buf_size == 0)
 		buf_size = IEEE80211_MAX_AMPDU_BUF;
 
+	/* make sure the size doesn't exceed the maximum supported by the hw */
+	if (buf_size > local->hw.max_rx_aggregation_subframes)
+		buf_size = local->hw.max_rx_aggregation_subframes;
 
 	/* examine state machine */
 	mutex_lock(&sta->ampdu_mlme.mtx);
@@ -287,7 +290,7 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
 	}
 
 	ret = drv_ampdu_action(local, sta->sdata, IEEE80211_AMPDU_RX_START,
-			       &sta->sta, tid, &start_seq_num);
+			       &sta->sta, tid, &start_seq_num, 0);
 #ifdef CONFIG_MAC80211_HT_DEBUG
 	printk(KERN_DEBUG "Rx A-MPDU request on tid %d result %d\n", tid, ret);
 #endif /* CONFIG_MAC80211_HT_DEBUG */
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index 9cc472c..63d852c 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -190,7 +190,7 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
 
 	ret = drv_ampdu_action(local, sta->sdata,
 			       IEEE80211_AMPDU_TX_STOP,
-			       &sta->sta, tid, NULL);
+			       &sta->sta, tid, NULL, 0);
 
 	/* HW shall not deny going back to legacy */
 	if (WARN_ON(ret)) {
@@ -311,7 +311,7 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
 	start_seq_num = sta->tid_seq[tid] >> 4;
 
 	ret = drv_ampdu_action(local, sdata, IEEE80211_AMPDU_TX_START,
-			       &sta->sta, tid, &start_seq_num);
+			       &sta->sta, tid, &start_seq_num, 0);
 	if (ret) {
 #ifdef CONFIG_MAC80211_HT_DEBUG
 		printk(KERN_DEBUG "BA request denied - HW unavailable for"
@@ -342,7 +342,8 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
 	/* send AddBA request */
 	ieee80211_send_addba_request(sdata, sta->sta.addr, tid,
 				     tid_tx->dialog_token, start_seq_num,
-				     0x40, tid_tx->timeout);
+				     local->hw.max_tx_aggregation_subframes,
+				     tid_tx->timeout);
 }
 
 int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid,
@@ -487,7 +488,8 @@ static void ieee80211_agg_tx_operational(struct ieee80211_local *local,
 
 	drv_ampdu_action(local, sta->sdata,
 			 IEEE80211_AMPDU_TX_OPERATIONAL,
-			 &sta->sta, tid, NULL);
+			 &sta->sta, tid, NULL,
+			 sta->ampdu_mlme.tid_tx[tid]->buf_size);
 
 	/*
 	 * synchronize with TX path, while splicing the TX path
@@ -742,9 +744,11 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local,
 {
 	struct tid_ampdu_tx *tid_tx;
 	u16 capab, tid;
+	u8 buf_size;
 
 	capab = le16_to_cpu(mgmt->u.action.u.addba_resp.capab);
 	tid = (capab & IEEE80211_ADDBA_PARAM_TID_MASK) >> 2;
+	buf_size = (capab & IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK) >> 6;
 
 	mutex_lock(&sta->ampdu_mlme.mtx);
 
@@ -767,12 +771,23 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local,
 
 	if (le16_to_cpu(mgmt->u.action.u.addba_resp.status)
 			== WLAN_STATUS_SUCCESS) {
+		/*
+		 * IEEE 802.11-2007 7.3.1.14:
+		 * In an ADDBA Response frame, when the Status Code field
+		 * is set to 0, the Buffer Size subfield is set to a value
+		 * of at least 1.
+		 */
+		if (!buf_size)
+			goto out;
+
 		if (test_and_set_bit(HT_AGG_STATE_RESPONSE_RECEIVED,
 				     &tid_tx->state)) {
 			/* ignore duplicate response */
 			goto out;
 		}
 
+		tid_tx->buf_size = buf_size;
+
 		if (test_bit(HT_AGG_STATE_DRV_READY, &tid_tx->state))
 			ieee80211_agg_tx_operational(local, sta, tid);
 
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 9cd73b1..7b701dc 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -316,6 +316,17 @@ static int ieee80211_config_default_mgmt_key(struct wiphy *wiphy,
 	return 0;
 }
 
+static void rate_idx_to_bitrate(struct rate_info *rate, struct sta_info *sta, int idx)
+{
+	if (!(rate->flags & RATE_INFO_FLAGS_MCS)) {
+		struct ieee80211_supported_band *sband;
+		sband = sta->local->hw.wiphy->bands[
+				sta->local->hw.conf.channel->band];
+		rate->legacy = sband->bitrates[idx].bitrate;
+	} else
+		rate->mcs = idx;
+}
+
 static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
 {
 	struct ieee80211_sub_if_data *sdata = sta->sdata;
@@ -330,6 +341,7 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
 			STATION_INFO_TX_RETRIES |
 			STATION_INFO_TX_FAILED |
 			STATION_INFO_TX_BITRATE |
+			STATION_INFO_RX_BITRATE |
 			STATION_INFO_RX_DROP_MISC;
 
 	sinfo->inactive_time = jiffies_to_msecs(jiffies - sta->last_rx);
@@ -355,15 +367,16 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
 		sinfo->txrate.flags |= RATE_INFO_FLAGS_40_MHZ_WIDTH;
 	if (sta->last_tx_rate.flags & IEEE80211_TX_RC_SHORT_GI)
 		sinfo->txrate.flags |= RATE_INFO_FLAGS_SHORT_GI;
+	rate_idx_to_bitrate(&sinfo->txrate, sta, sta->last_tx_rate.idx);
 
-	if (!(sta->last_tx_rate.flags & IEEE80211_TX_RC_MCS)) {
-		struct ieee80211_supported_band *sband;
-		sband = sta->local->hw.wiphy->bands[
-				sta->local->hw.conf.channel->band];
-		sinfo->txrate.legacy =
-			sband->bitrates[sta->last_tx_rate.idx].bitrate;
-	} else
-		sinfo->txrate.mcs = sta->last_tx_rate.idx;
+	sinfo->rxrate.flags = 0;
+	if (sta->last_rx_rate_flag & RX_FLAG_HT)
+		sinfo->rxrate.flags |= RATE_INFO_FLAGS_MCS;
+	if (sta->last_rx_rate_flag & RX_FLAG_40MHZ)
+		sinfo->rxrate.flags |= RATE_INFO_FLAGS_40_MHZ_WIDTH;
+	if (sta->last_rx_rate_flag & RX_FLAG_SHORT_GI)
+		sinfo->rxrate.flags |= RATE_INFO_FLAGS_SHORT_GI;
+	rate_idx_to_bitrate(&sinfo->rxrate, sta, sta->last_rx_rate_idx);
 
 	if (ieee80211_vif_is_mesh(&sdata->vif)) {
 #ifdef CONFIG_MAC80211_MESH
@@ -1215,6 +1228,9 @@ static int ieee80211_set_channel(struct wiphy *wiphy,
 {
 	struct ieee80211_local *local = wiphy_priv(wiphy);
 	struct ieee80211_sub_if_data *sdata = NULL;
+	struct ieee80211_channel *old_oper;
+	enum nl80211_channel_type old_oper_type;
+	enum nl80211_channel_type old_vif_oper_type= NL80211_CHAN_NO_HT;
 
 	if (netdev)
 		sdata = IEEE80211_DEV_TO_SUB_IF(netdev);
@@ -1232,13 +1248,23 @@ static int ieee80211_set_channel(struct wiphy *wiphy,
 		break;
 	}
 
-	local->oper_channel = chan;
+	if (sdata)
+		old_vif_oper_type = sdata->vif.bss_conf.channel_type;
+	old_oper_type = local->_oper_channel_type;
 
 	if (!ieee80211_set_channel_type(local, sdata, channel_type))
 		return -EBUSY;
 
-	ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL);
-	if (sdata && sdata->vif.type != NL80211_IFTYPE_MONITOR)
+	old_oper = local->oper_channel;
+	local->oper_channel = chan;
+
+	/* Update driver if changes were actually made. */
+	if ((old_oper != local->oper_channel) ||
+	    (old_oper_type != local->_oper_channel_type))
+		ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL);
+
+	if ((sdata && sdata->vif.type != NL80211_IFTYPE_MONITOR) &&
+	    old_vif_oper_type != sdata->vif.bss_conf.channel_type)
 		ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_HT);
 
 	return 0;
@@ -1274,8 +1300,11 @@ static int ieee80211_scan(struct wiphy *wiphy,
 	case NL80211_IFTYPE_P2P_GO:
 		if (sdata->local->ops->hw_scan)
 			break;
-		/* FIXME: implement NoA while scanning in software */
-		return -EOPNOTSUPP;
+		/*
+		 * FIXME: implement NoA while scanning in software,
+		 * for now fall through to allow scanning only when
+		 * beaconing hasn't been configured yet
+		 */
 	case NL80211_IFTYPE_AP:
 		if (sdata->u.ap.beacon)
 			return -EOPNOTSUPP;
@@ -1784,6 +1813,33 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct net_device *dev,
 
 	*cookie = (unsigned long) skb;
 
+	if (is_offchan && local->ops->offchannel_tx) {
+		int ret;
+
+		IEEE80211_SKB_CB(skb)->band = chan->band;
+
+		mutex_lock(&local->mtx);
+
+		if (local->hw_offchan_tx_cookie) {
+			mutex_unlock(&local->mtx);
+			return -EBUSY;
+		}
+
+		/* TODO: bitrate control, TX processing? */
+		ret = drv_offchannel_tx(local, skb, chan, channel_type, wait);
+
+		if (ret == 0)
+			local->hw_offchan_tx_cookie = *cookie;
+		mutex_unlock(&local->mtx);
+
+		/*
+		 * Allow driver to return 1 to indicate it wants to have the
+		 * frame transmitted with a remain_on_channel + regular TX.
+		 */
+		if (ret != 1)
+			return ret;
+	}
+
 	if (is_offchan && local->ops->remain_on_channel) {
 		unsigned int duration;
 		int ret;
@@ -1847,6 +1903,7 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct net_device *dev,
 
 	wk->type = IEEE80211_WORK_OFFCHANNEL_TX;
 	wk->chan = chan;
+	wk->chan_type = channel_type;
 	wk->sdata = sdata;
 	wk->done = ieee80211_offchan_tx_done;
 	wk->offchan_tx.frame = skb;
@@ -1869,6 +1926,18 @@ static int ieee80211_mgmt_tx_cancel_wait(struct wiphy *wiphy,
 
 	mutex_lock(&local->mtx);
 
+	if (local->ops->offchannel_tx_cancel_wait &&
+	    local->hw_offchan_tx_cookie == cookie) {
+		ret = drv_offchannel_tx_cancel_wait(local);
+
+		if (!ret)
+			local->hw_offchan_tx_cookie = 0;
+
+		mutex_unlock(&local->mtx);
+
+		return ret;
+	}
+
 	if (local->ops->cancel_remain_on_channel) {
 		cookie ^= 2;
 		ret = ieee80211_cancel_remain_on_channel_hw(local, cookie);
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index 1f02e59..51f0d78 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -60,6 +60,10 @@ static const struct file_operations name## _ops = {			\
 	debugfs_create_file(#name, mode, phyd, local, &name## _ops);
 
 
+DEBUGFS_READONLY_FILE(user_power, "%d",
+		      local->user_power_level);
+DEBUGFS_READONLY_FILE(power, "%d",
+		      local->hw.conf.power_level);
 DEBUGFS_READONLY_FILE(frequency, "%d",
 		      local->hw.conf.channel->center_freq);
 DEBUGFS_READONLY_FILE(total_ps_buffered, "%d",
@@ -391,6 +395,8 @@ void debugfs_hw_add(struct ieee80211_local *local)
 	DEBUGFS_ADD(uapsd_queues);
 	DEBUGFS_ADD(uapsd_max_sp_len);
 	DEBUGFS_ADD(channel_type);
+	DEBUGFS_ADD(user_power);
+	DEBUGFS_ADD(power);
 
 	statsd = debugfs_create_dir("statistics", phyd);
 
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index 2dabdf7..dacace6 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -36,7 +36,7 @@ static ssize_t ieee80211_if_read(
 		ret = (*format)(sdata, buf, sizeof(buf));
 	read_unlock(&dev_base_lock);
 
-	if (ret != -EINVAL)
+	if (ret >= 0)
 		ret = simple_read_from_buffer(userbuf, count, ppos, buf, ret);
 
 	return ret;
@@ -81,6 +81,8 @@ static ssize_t ieee80211_if_fmt_##name(					\
 		IEEE80211_IF_FMT(name, field, "%d\n")
 #define IEEE80211_IF_FMT_HEX(name, field)				\
 		IEEE80211_IF_FMT(name, field, "%#x\n")
+#define IEEE80211_IF_FMT_LHEX(name, field)				\
+		IEEE80211_IF_FMT(name, field, "%#lx\n")
 #define IEEE80211_IF_FMT_SIZE(name, field)				\
 		IEEE80211_IF_FMT(name, field, "%zd\n")
 
@@ -145,6 +147,9 @@ IEEE80211_IF_FILE(rc_rateidx_mask_2ghz, rc_rateidx_mask[IEEE80211_BAND_2GHZ],
 		  HEX);
 IEEE80211_IF_FILE(rc_rateidx_mask_5ghz, rc_rateidx_mask[IEEE80211_BAND_5GHZ],
 		  HEX);
+IEEE80211_IF_FILE(flags, flags, HEX);
+IEEE80211_IF_FILE(state, state, LHEX);
+IEEE80211_IF_FILE(channel_type, vif.bss_conf.channel_type, DEC);
 
 /* STA attributes */
 IEEE80211_IF_FILE(bssid, u.mgd.bssid, MAC);
@@ -216,6 +221,104 @@ static ssize_t ieee80211_if_parse_smps(struct ieee80211_sub_if_data *sdata,
 
 __IEEE80211_IF_FILE_W(smps);
 
+static ssize_t ieee80211_if_fmt_tkip_mic_test(
+	const struct ieee80211_sub_if_data *sdata, char *buf, int buflen)
+{
+	return -EOPNOTSUPP;
+}
+
+static int hwaddr_aton(const char *txt, u8 *addr)
+{
+	int i;
+
+	for (i = 0; i < ETH_ALEN; i++) {
+		int a, b;
+
+		a = hex_to_bin(*txt++);
+		if (a < 0)
+			return -1;
+		b = hex_to_bin(*txt++);
+		if (b < 0)
+			return -1;
+		*addr++ = (a << 4) | b;
+		if (i < 5 && *txt++ != ':')
+			return -1;
+	}
+
+	return 0;
+}
+
+static ssize_t ieee80211_if_parse_tkip_mic_test(
+	struct ieee80211_sub_if_data *sdata, const char *buf, int buflen)
+{
+	struct ieee80211_local *local = sdata->local;
+	u8 addr[ETH_ALEN];
+	struct sk_buff *skb;
+	struct ieee80211_hdr *hdr;
+	__le16 fc;
+
+	/*
+	 * Assume colon-delimited MAC address with possible white space
+	 * following.
+	 */
+	if (buflen < 3 * ETH_ALEN - 1)
+		return -EINVAL;
+	if (hwaddr_aton(buf, addr) < 0)
+		return -EINVAL;
+
+	if (!ieee80211_sdata_running(sdata))
+		return -ENOTCONN;
+
+	skb = dev_alloc_skb(local->hw.extra_tx_headroom + 24 + 100);
+	if (!skb)
+		return -ENOMEM;
+	skb_reserve(skb, local->hw.extra_tx_headroom);
+
+	hdr = (struct ieee80211_hdr *) skb_put(skb, 24);
+	memset(hdr, 0, 24);
+	fc = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_DATA);
+
+	switch (sdata->vif.type) {
+	case NL80211_IFTYPE_AP:
+		fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS);
+		/* DA BSSID SA */
+		memcpy(hdr->addr1, addr, ETH_ALEN);
+		memcpy(hdr->addr2, sdata->vif.addr, ETH_ALEN);
+		memcpy(hdr->addr3, sdata->vif.addr, ETH_ALEN);
+		break;
+	case NL80211_IFTYPE_STATION:
+		fc |= cpu_to_le16(IEEE80211_FCTL_TODS);
+		/* BSSID SA DA */
+		if (sdata->vif.bss_conf.bssid == NULL) {
+			dev_kfree_skb(skb);
+			return -ENOTCONN;
+		}
+		memcpy(hdr->addr1, sdata->vif.bss_conf.bssid, ETH_ALEN);
+		memcpy(hdr->addr2, sdata->vif.addr, ETH_ALEN);
+		memcpy(hdr->addr3, addr, ETH_ALEN);
+		break;
+	default:
+		dev_kfree_skb(skb);
+		return -EOPNOTSUPP;
+	}
+	hdr->frame_control = fc;
+
+	/*
+	 * Add some length to the test frame to make it look bit more valid.
+	 * The exact contents does not matter since the recipient is required
+	 * to drop this because of the Michael MIC failure.
+	 */
+	memset(skb_put(skb, 50), 0, 50);
+
+	IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_TKIP_MIC_FAILURE;
+
+	ieee80211_tx_skb(sdata, skb);
+
+	return buflen;
+}
+
+__IEEE80211_IF_FILE_W(tkip_mic_test);
+
 /* AP attributes */
 IEEE80211_IF_FILE(num_sta_ps, u.ap.num_sta_ps, ATOMIC);
 IEEE80211_IF_FILE(dtim_count, u.ap.dtim_count, DEC);
@@ -283,6 +386,9 @@ IEEE80211_IF_FILE(dot11MeshHWMPRootMode,
 static void add_sta_files(struct ieee80211_sub_if_data *sdata)
 {
 	DEBUGFS_ADD(drop_unencrypted);
+	DEBUGFS_ADD(flags);
+	DEBUGFS_ADD(state);
+	DEBUGFS_ADD(channel_type);
 	DEBUGFS_ADD(rc_rateidx_mask_2ghz);
 	DEBUGFS_ADD(rc_rateidx_mask_5ghz);
 
@@ -291,22 +397,30 @@ static void add_sta_files(struct ieee80211_sub_if_data *sdata)
 	DEBUGFS_ADD(last_beacon);
 	DEBUGFS_ADD(ave_beacon);
 	DEBUGFS_ADD_MODE(smps, 0600);
+	DEBUGFS_ADD_MODE(tkip_mic_test, 0200);
 }
 
 static void add_ap_files(struct ieee80211_sub_if_data *sdata)
 {
 	DEBUGFS_ADD(drop_unencrypted);
+	DEBUGFS_ADD(flags);
+	DEBUGFS_ADD(state);
+	DEBUGFS_ADD(channel_type);
 	DEBUGFS_ADD(rc_rateidx_mask_2ghz);
 	DEBUGFS_ADD(rc_rateidx_mask_5ghz);
 
 	DEBUGFS_ADD(num_sta_ps);
 	DEBUGFS_ADD(dtim_count);
 	DEBUGFS_ADD(num_buffered_multicast);
+	DEBUGFS_ADD_MODE(tkip_mic_test, 0200);
 }
 
 static void add_wds_files(struct ieee80211_sub_if_data *sdata)
 {
 	DEBUGFS_ADD(drop_unencrypted);
+	DEBUGFS_ADD(flags);
+	DEBUGFS_ADD(state);
+	DEBUGFS_ADD(channel_type);
 	DEBUGFS_ADD(rc_rateidx_mask_2ghz);
 	DEBUGFS_ADD(rc_rateidx_mask_5ghz);
 
@@ -316,12 +430,18 @@ static void add_wds_files(struct ieee80211_sub_if_data *sdata)
 static void add_vlan_files(struct ieee80211_sub_if_data *sdata)
 {
 	DEBUGFS_ADD(drop_unencrypted);
+	DEBUGFS_ADD(flags);
+	DEBUGFS_ADD(state);
+	DEBUGFS_ADD(channel_type);
 	DEBUGFS_ADD(rc_rateidx_mask_2ghz);
 	DEBUGFS_ADD(rc_rateidx_mask_5ghz);
 }
 
 static void add_monitor_files(struct ieee80211_sub_if_data *sdata)
 {
+	DEBUGFS_ADD(flags);
+	DEBUGFS_ADD(state);
+	DEBUGFS_ADD(channel_type);
 }
 
 #ifdef CONFIG_MAC80211_MESH
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 98d5899..3729296 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -5,9 +5,9 @@
 #include "ieee80211_i.h"
 #include "driver-trace.h"
 
-static inline int drv_tx(struct ieee80211_local *local, struct sk_buff *skb)
+static inline void drv_tx(struct ieee80211_local *local, struct sk_buff *skb)
 {
-	return local->ops->tx(&local->hw, skb);
+	local->ops->tx(&local->hw, skb);
 }
 
 static inline int drv_start(struct ieee80211_local *local)
@@ -382,17 +382,17 @@ static inline int drv_ampdu_action(struct ieee80211_local *local,
 				   struct ieee80211_sub_if_data *sdata,
 				   enum ieee80211_ampdu_mlme_action action,
 				   struct ieee80211_sta *sta, u16 tid,
-				   u16 *ssn)
+				   u16 *ssn, u8 buf_size)
 {
 	int ret = -EOPNOTSUPP;
 
 	might_sleep();
 
-	trace_drv_ampdu_action(local, sdata, action, sta, tid, ssn);
+	trace_drv_ampdu_action(local, sdata, action, sta, tid, ssn, buf_size);
 
 	if (local->ops->ampdu_action)
 		ret = local->ops->ampdu_action(&local->hw, &sdata->vif, action,
-					       sta, tid, ssn);
+					       sta, tid, ssn, buf_size);
 
 	trace_drv_return_int(local, ret);
 
@@ -495,4 +495,35 @@ static inline int drv_cancel_remain_on_channel(struct ieee80211_local *local)
 	return ret;
 }
 
+static inline int drv_offchannel_tx(struct ieee80211_local *local,
+				    struct sk_buff *skb,
+				    struct ieee80211_channel *chan,
+				    enum nl80211_channel_type channel_type,
+				    unsigned int wait)
+{
+	int ret;
+
+	might_sleep();
+
+	trace_drv_offchannel_tx(local, skb, chan, channel_type, wait);
+	ret = local->ops->offchannel_tx(&local->hw, skb, chan,
+					channel_type, wait);
+	trace_drv_return_int(local, ret);
+
+	return ret;
+}
+
+static inline int drv_offchannel_tx_cancel_wait(struct ieee80211_local *local)
+{
+	int ret;
+
+	might_sleep();
+
+	trace_drv_offchannel_tx_cancel_wait(local);
+	ret = local->ops->offchannel_tx_cancel_wait(&local->hw);
+	trace_drv_return_int(local, ret);
+
+	return ret;
+}
+
 #endif /* __MAC80211_DRIVER_OPS */
diff --git a/net/mac80211/driver-trace.h b/net/mac80211/driver-trace.h
index 49c8421..520fe24 100644
--- a/net/mac80211/driver-trace.h
+++ b/net/mac80211/driver-trace.h
@@ -9,6 +9,11 @@
 #undef TRACE_EVENT
 #define TRACE_EVENT(name, proto, ...) \
 static inline void trace_ ## name(proto) {}
+#undef DECLARE_EVENT_CLASS
+#define DECLARE_EVENT_CLASS(...)
+#undef DEFINE_EVENT
+#define DEFINE_EVENT(evt_class, name, proto, ...) \
+static inline void trace_ ## name(proto) {}
 #endif
 
 #undef TRACE_SYSTEM
@@ -38,7 +43,7 @@ static inline void trace_ ## name(proto) {}
  * Tracing for driver callbacks.
  */
 
-TRACE_EVENT(drv_return_void,
+DECLARE_EVENT_CLASS(local_only_evt,
 	TP_PROTO(struct ieee80211_local *local),
 	TP_ARGS(local),
 	TP_STRUCT__entry(
@@ -50,6 +55,11 @@ TRACE_EVENT(drv_return_void,
 	TP_printk(LOCAL_PR_FMT, LOCAL_PR_ARG)
 );
 
+DEFINE_EVENT(local_only_evt, drv_return_void,
+	TP_PROTO(struct ieee80211_local *local),
+	TP_ARGS(local)
+);
+
 TRACE_EVENT(drv_return_int,
 	TP_PROTO(struct ieee80211_local *local, int ret),
 	TP_ARGS(local, ret),
@@ -78,40 +88,14 @@ TRACE_EVENT(drv_return_u64,
 	TP_printk(LOCAL_PR_FMT " - %llu", LOCAL_PR_ARG, __entry->ret)
 );
 
-TRACE_EVENT(drv_start,
+DEFINE_EVENT(local_only_evt, drv_start,
 	TP_PROTO(struct ieee80211_local *local),
-
-	TP_ARGS(local),
-
-	TP_STRUCT__entry(
-		LOCAL_ENTRY
-	),
-
-	TP_fast_assign(
-		LOCAL_ASSIGN;
-	),
-
-	TP_printk(
-		LOCAL_PR_FMT, LOCAL_PR_ARG
-	)
+	TP_ARGS(local)
 );
 
-TRACE_EVENT(drv_stop,
+DEFINE_EVENT(local_only_evt, drv_stop,
 	TP_PROTO(struct ieee80211_local *local),
-
-	TP_ARGS(local),
-
-	TP_STRUCT__entry(
-		LOCAL_ENTRY
-	),
-
-	TP_fast_assign(
-		LOCAL_ASSIGN;
-	),
-
-	TP_printk(
-		LOCAL_PR_FMT, LOCAL_PR_ARG
-	)
+	TP_ARGS(local)
 );
 
 TRACE_EVENT(drv_add_interface,
@@ -439,40 +423,14 @@ TRACE_EVENT(drv_hw_scan,
 	)
 );
 
-TRACE_EVENT(drv_sw_scan_start,
+DEFINE_EVENT(local_only_evt, drv_sw_scan_start,
 	TP_PROTO(struct ieee80211_local *local),
-
-	TP_ARGS(local),
-
-	TP_STRUCT__entry(
-		LOCAL_ENTRY
-	),
-
-	TP_fast_assign(
-		LOCAL_ASSIGN;
-	),
-
-	TP_printk(
-		LOCAL_PR_FMT, LOCAL_PR_ARG
-	)
+	TP_ARGS(local)
 );
 
-TRACE_EVENT(drv_sw_scan_complete,
+DEFINE_EVENT(local_only_evt, drv_sw_scan_complete,
 	TP_PROTO(struct ieee80211_local *local),
-
-	TP_ARGS(local),
-
-	TP_STRUCT__entry(
-		LOCAL_ENTRY
-	),
-
-	TP_fast_assign(
-		LOCAL_ASSIGN;
-	),
-
-	TP_printk(
-		LOCAL_PR_FMT, LOCAL_PR_ARG
-	)
+	TP_ARGS(local)
 );
 
 TRACE_EVENT(drv_get_stats,
@@ -702,23 +660,9 @@ TRACE_EVENT(drv_conf_tx,
 	)
 );
 
-TRACE_EVENT(drv_get_tsf,
+DEFINE_EVENT(local_only_evt, drv_get_tsf,
 	TP_PROTO(struct ieee80211_local *local),
-
-	TP_ARGS(local),
-
-	TP_STRUCT__entry(
-		LOCAL_ENTRY
-	),
-
-	TP_fast_assign(
-		LOCAL_ASSIGN;
-	),
-
-	TP_printk(
-		LOCAL_PR_FMT,
-		LOCAL_PR_ARG
-	)
+	TP_ARGS(local)
 );
 
 TRACE_EVENT(drv_set_tsf,
@@ -742,41 +686,14 @@ TRACE_EVENT(drv_set_tsf,
 	)
 );
 
-TRACE_EVENT(drv_reset_tsf,
+DEFINE_EVENT(local_only_evt, drv_reset_tsf,
 	TP_PROTO(struct ieee80211_local *local),
-
-	TP_ARGS(local),
-
-	TP_STRUCT__entry(
-		LOCAL_ENTRY
-	),
-
-	TP_fast_assign(
-		LOCAL_ASSIGN;
-	),
-
-	TP_printk(
-		LOCAL_PR_FMT, LOCAL_PR_ARG
-	)
+	TP_ARGS(local)
 );
 
-TRACE_EVENT(drv_tx_last_beacon,
+DEFINE_EVENT(local_only_evt, drv_tx_last_beacon,
 	TP_PROTO(struct ieee80211_local *local),
-
-	TP_ARGS(local),
-
-	TP_STRUCT__entry(
-		LOCAL_ENTRY
-	),
-
-	TP_fast_assign(
-		LOCAL_ASSIGN;
-	),
-
-	TP_printk(
-		LOCAL_PR_FMT,
-		LOCAL_PR_ARG
-	)
+	TP_ARGS(local)
 );
 
 TRACE_EVENT(drv_ampdu_action,
@@ -784,9 +701,9 @@ TRACE_EVENT(drv_ampdu_action,
 		 struct ieee80211_sub_if_data *sdata,
 		 enum ieee80211_ampdu_mlme_action action,
 		 struct ieee80211_sta *sta, u16 tid,
-		 u16 *ssn),
+		 u16 *ssn, u8 buf_size),
 
-	TP_ARGS(local, sdata, action, sta, tid, ssn),
+	TP_ARGS(local, sdata, action, sta, tid, ssn, buf_size),
 
 	TP_STRUCT__entry(
 		LOCAL_ENTRY
@@ -794,6 +711,7 @@ TRACE_EVENT(drv_ampdu_action,
 		__field(u32, action)
 		__field(u16, tid)
 		__field(u16, ssn)
+		__field(u8, buf_size)
 		VIF_ENTRY
 	),
 
@@ -804,11 +722,13 @@ TRACE_EVENT(drv_ampdu_action,
 		__entry->action = action;
 		__entry->tid = tid;
 		__entry->ssn = ssn ? *ssn : 0;
+		__entry->buf_size = buf_size;
 	),
 
 	TP_printk(
-		LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT " action:%d tid:%d",
-		LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, __entry->action, __entry->tid
+		LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT " action:%d tid:%d buf:%d",
+		LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, __entry->action,
+		__entry->tid, __entry->buf_size
 	)
 );
 
@@ -959,24 +879,44 @@ TRACE_EVENT(drv_remain_on_channel,
 	)
 );
 
-TRACE_EVENT(drv_cancel_remain_on_channel,
+DEFINE_EVENT(local_only_evt, drv_cancel_remain_on_channel,
 	TP_PROTO(struct ieee80211_local *local),
+	TP_ARGS(local)
+);
 
-	TP_ARGS(local),
+TRACE_EVENT(drv_offchannel_tx,
+	TP_PROTO(struct ieee80211_local *local, struct sk_buff *skb,
+		 struct ieee80211_channel *chan,
+		 enum nl80211_channel_type channel_type,
+		 unsigned int wait),
+
+	TP_ARGS(local, skb, chan, channel_type, wait),
 
 	TP_STRUCT__entry(
 		LOCAL_ENTRY
+		__field(int, center_freq)
+		__field(int, channel_type)
+		__field(unsigned int, wait)
 	),
 
 	TP_fast_assign(
 		LOCAL_ASSIGN;
+		__entry->center_freq = chan->center_freq;
+		__entry->channel_type = channel_type;
+		__entry->wait = wait;
 	),
 
 	TP_printk(
-		LOCAL_PR_FMT, LOCAL_PR_ARG
+		LOCAL_PR_FMT " freq:%dMHz, wait:%dms",
+		LOCAL_PR_ARG, __entry->center_freq, __entry->wait
 	)
 );
 
+DEFINE_EVENT(local_only_evt, drv_offchannel_tx_cancel_wait,
+	TP_PROTO(struct ieee80211_local *local),
+	TP_ARGS(local)
+);
+
 /*
  * Tracing for API calls that drivers call.
  */
@@ -1069,23 +1009,9 @@ TRACE_EVENT(api_stop_tx_ba_cb,
 	)
 );
 
-TRACE_EVENT(api_restart_hw,
+DEFINE_EVENT(local_only_evt, api_restart_hw,
 	TP_PROTO(struct ieee80211_local *local),
-
-	TP_ARGS(local),
-
-	TP_STRUCT__entry(
-		LOCAL_ENTRY
-	),
-
-	TP_fast_assign(
-		LOCAL_ASSIGN;
-	),
-
-	TP_printk(
-		LOCAL_PR_FMT,
-		LOCAL_PR_ARG
-	)
+	TP_ARGS(local)
 );
 
 TRACE_EVENT(api_beacon_loss,
@@ -1214,40 +1140,14 @@ TRACE_EVENT(api_chswitch_done,
 	)
 );
 
-TRACE_EVENT(api_ready_on_channel,
+DEFINE_EVENT(local_only_evt, api_ready_on_channel,
 	TP_PROTO(struct ieee80211_local *local),
-
-	TP_ARGS(local),
-
-	TP_STRUCT__entry(
-		LOCAL_ENTRY
-	),
-
-	TP_fast_assign(
-		LOCAL_ASSIGN;
-	),
-
-	TP_printk(
-		LOCAL_PR_FMT, LOCAL_PR_ARG
-	)
+	TP_ARGS(local)
 );
 
-TRACE_EVENT(api_remain_on_channel_expired,
+DEFINE_EVENT(local_only_evt, api_remain_on_channel_expired,
 	TP_PROTO(struct ieee80211_local *local),
-
-	TP_ARGS(local),
-
-	TP_STRUCT__entry(
-		LOCAL_ENTRY
-	),
-
-	TP_fast_assign(
-		LOCAL_ASSIGN;
-	),
-
-	TP_printk(
-		LOCAL_PR_FMT, LOCAL_PR_ARG
-	)
+	TP_ARGS(local)
 );
 
 /*
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index 75d679d..b9e4b9b 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -66,6 +66,9 @@ void ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_supported_band *sband,
 	/* own MCS TX capabilities */
 	tx_mcs_set_cap = sband->ht_cap.mcs.tx_params;
 
+	/* Copy peer MCS TX capabilities, the driver might need them. */
+	ht_cap->mcs.tx_params = ht_cap_ie->mcs.tx_params;
+
 	/* can we TX with MCS rates? */
 	if (!(tx_mcs_set_cap & IEEE80211_HT_MCS_TX_DEFINED))
 		return;
@@ -79,7 +82,7 @@ void ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_supported_band *sband,
 		max_tx_streams = IEEE80211_HT_MCS_TX_MAX_STREAMS;
 
 	/*
-	 * 802.11n D5.0 20.3.5 / 20.6 says:
+	 * 802.11n-2009 20.3.5 / 20.6 says:
 	 * - indices 0 to 7 and 32 are single spatial stream
 	 * - 8 to 31 are multiple spatial streams using equal modulation
 	 *   [8..15 for two streams, 16..23 for three and 24..31 for four]
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index 53c7077..3e81af1 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -31,7 +31,6 @@
 #define IEEE80211_IBSS_JOIN_TIMEOUT (7 * HZ)
 
 #define IEEE80211_IBSS_MERGE_INTERVAL (30 * HZ)
-#define IEEE80211_IBSS_MERGE_DELAY 0x400000
 #define IEEE80211_IBSS_INACTIVITY_LIMIT (60 * HZ)
 
 #define IEEE80211_IBSS_MAX_STA_ENTRIES 128
@@ -270,7 +269,8 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 	enum ieee80211_band band = rx_status->band;
 
 	if (elems->ds_params && elems->ds_params_len == 1)
-		freq = ieee80211_channel_to_frequency(elems->ds_params[0]);
+		freq = ieee80211_channel_to_frequency(elems->ds_params[0],
+						      band);
 	else
 		freq = rx_status->freq;
 
@@ -354,7 +354,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 	if (memcmp(cbss->bssid, sdata->u.ibss.bssid, ETH_ALEN) == 0)
 		goto put_bss;
 
-	if (rx_status->flag & RX_FLAG_TSFT) {
+	if (rx_status->flag & RX_FLAG_MACTIME_MPDU) {
 		/*
 		 * For correct IBSS merging we need mactime; since mactime is
 		 * defined as the time the first data symbol of the frame hits
@@ -396,10 +396,6 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 	       jiffies);
 #endif
 
-	/* give slow hardware some time to do the TSF sync */
-	if (rx_timestamp < IEEE80211_IBSS_MERGE_DELAY)
-		goto put_bss;
-
 	if (beacon_timestamp > rx_timestamp) {
 #ifdef CONFIG_MAC80211_IBSS_DEBUG
 		printk(KERN_DEBUG "%s: beacon TSF higher than "
@@ -663,12 +659,13 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata)
 }
 
 static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata,
-					struct ieee80211_mgmt *mgmt,
-					size_t len)
+					struct sk_buff *req)
 {
+	struct ieee80211_rx_status *rx_status = IEEE80211_SKB_RXCB(req);
+	struct ieee80211_mgmt *mgmt = (void *)req->data;
 	struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
 	struct ieee80211_local *local = sdata->local;
-	int tx_last_beacon;
+	int tx_last_beacon, len = req->len;
 	struct sk_buff *skb;
 	struct ieee80211_mgmt *resp;
 	u8 *pos, *end;
@@ -688,7 +685,7 @@ static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata,
 	       mgmt->bssid, tx_last_beacon);
 #endif /* CONFIG_MAC80211_IBSS_DEBUG */
 
-	if (!tx_last_beacon)
+	if (!tx_last_beacon && !(rx_status->rx_flags & IEEE80211_RX_RA_MATCH))
 		return;
 
 	if (memcmp(mgmt->bssid, ifibss->bssid, ETH_ALEN) != 0 &&
@@ -785,7 +782,7 @@ void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
 
 	switch (fc & IEEE80211_FCTL_STYPE) {
 	case IEEE80211_STYPE_PROBE_REQ:
-		ieee80211_rx_mgmt_probe_req(sdata, mgmt, skb->len);
+		ieee80211_rx_mgmt_probe_req(sdata, skb);
 		break;
 	case IEEE80211_STYPE_PROBE_RESP:
 		ieee80211_rx_mgmt_probe_resp(sdata, mgmt, skb->len,
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 533fd32..a404017 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -225,6 +225,7 @@ struct ieee80211_if_ap {
 	struct sk_buff_head ps_bc_buf;
 	atomic_t num_sta_ps; /* number of stations in PS mode */
 	int dtim_count;
+	bool dtim_bc_mc;
 };
 
 struct ieee80211_if_wds {
@@ -654,8 +655,6 @@ struct tpt_led_trigger {
  *	well be on the operating channel
  * @SCAN_HW_SCANNING: The hardware is scanning for us, we have no way to
  *	determine if we are on the operating channel or not
- * @SCAN_OFF_CHANNEL: We're off our operating channel for scanning,
- *	gets only set in conjunction with SCAN_SW_SCANNING
  * @SCAN_COMPLETED: Set for our scan work function when the driver reported
  *	that the scan completed.
  * @SCAN_ABORTED: Set for our scan work function when the driver reported
@@ -664,7 +663,6 @@ struct tpt_led_trigger {
 enum {
 	SCAN_SW_SCANNING,
 	SCAN_HW_SCANNING,
-	SCAN_OFF_CHANNEL,
 	SCAN_COMPLETED,
 	SCAN_ABORTED,
 };
@@ -959,6 +957,7 @@ struct ieee80211_local {
 	unsigned int hw_roc_duration;
 	u32 hw_roc_cookie;
 	bool hw_roc_for_tx;
+	unsigned long hw_offchan_tx_cookie;
 
 	/* dummy netdev for use w/ NAPI */
 	struct net_device napi_dev;
@@ -1068,8 +1067,6 @@ void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata,
 void ieee80211_configure_filter(struct ieee80211_local *local);
 u32 ieee80211_reset_erp_info(struct ieee80211_sub_if_data *sdata);
 
-extern bool ieee80211_disable_40mhz_24ghz;
-
 /* STA code */
 void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata);
 int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
@@ -1147,10 +1144,14 @@ void ieee80211_rx_bss_put(struct ieee80211_local *local,
 			  struct ieee80211_bss *bss);
 
 /* off-channel helpers */
-void ieee80211_offchannel_stop_beaconing(struct ieee80211_local *local);
-void ieee80211_offchannel_stop_station(struct ieee80211_local *local);
+bool ieee80211_cfg_on_oper_channel(struct ieee80211_local *local);
+void ieee80211_offchannel_enable_all_ps(struct ieee80211_local *local,
+					bool tell_ap);
+void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local,
+				    bool offchannel_ps_enable);
 void ieee80211_offchannel_return(struct ieee80211_local *local,
-				 bool enable_beaconing);
+				 bool enable_beaconing,
+				 bool offchannel_ps_disable);
 void ieee80211_hw_roc_setup(struct ieee80211_local *local);
 
 /* interface handling */
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 7a10a8d..4054399 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -382,6 +382,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
 	struct sk_buff *skb, *tmp;
 	u32 hw_reconf_flags = 0;
 	int i;
+	enum nl80211_channel_type orig_ct;
 
 	if (local->scan_sdata == sdata)
 		ieee80211_scan_cancel(local);
@@ -542,8 +543,14 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
 		hw_reconf_flags = 0;
 	}
 
+	/* Re-calculate channel-type, in case there are multiple vifs
+	 * on different channel types.
+	 */
+	orig_ct = local->_oper_channel_type;
+	ieee80211_set_channel_type(local, NULL, NL80211_CHAN_NO_HT);
+
 	/* do after stop to avoid reconfiguring when we stop anyway */
-	if (hw_reconf_flags)
+	if (hw_reconf_flags || (orig_ct != local->_oper_channel_type))
 		ieee80211_hw_config(local, hw_reconf_flags);
 
 	spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index a46ff06..2543e48 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -34,7 +34,7 @@
 #include "debugfs.h"
 
 
-bool ieee80211_disable_40mhz_24ghz;
+static bool ieee80211_disable_40mhz_24ghz;
 module_param(ieee80211_disable_40mhz_24ghz, bool, 0644);
 MODULE_PARM_DESC(ieee80211_disable_40mhz_24ghz,
 		 "Disable 40MHz support in the 2.4GHz band");
@@ -98,6 +98,47 @@ static void ieee80211_reconfig_filter(struct work_struct *work)
 	ieee80211_configure_filter(local);
 }
 
+/*
+ * Returns true if we are logically configured to be on
+ * the operating channel AND the hardware-conf is currently
+ * configured on the operating channel.  Compares channel-type
+ * as well.
+ */
+bool ieee80211_cfg_on_oper_channel(struct ieee80211_local *local)
+{
+	struct ieee80211_channel *chan, *scan_chan;
+	enum nl80211_channel_type channel_type;
+
+	/* This logic needs to match logic in ieee80211_hw_config */
+	if (local->scan_channel) {
+		chan = local->scan_channel;
+		/* If scanning on oper channel, use whatever channel-type
+		 * is currently in use.
+		 */
+		if (chan == local->oper_channel)
+			channel_type = local->_oper_channel_type;
+		else
+			channel_type = NL80211_CHAN_NO_HT;
+	} else if (local->tmp_channel) {
+		chan = scan_chan = local->tmp_channel;
+		channel_type = local->tmp_channel_type;
+	} else {
+		chan = local->oper_channel;
+		channel_type = local->_oper_channel_type;
+	}
+
+	if (chan != local->oper_channel ||
+	    channel_type != local->_oper_channel_type)
+		return false;
+
+	/* Check current hardware-config against oper_channel. */
+	if ((local->oper_channel != local->hw.conf.channel) ||
+	    (local->_oper_channel_type != local->hw.conf.channel_type))
+		return false;
+
+	return true;
+}
+
 int ieee80211_hw_config(struct ieee80211_local *local, u32 changed)
 {
 	struct ieee80211_channel *chan, *scan_chan;
@@ -110,21 +151,33 @@ int ieee80211_hw_config(struct ieee80211_local *local, u32 changed)
 
 	scan_chan = local->scan_channel;
 
+	/* If this off-channel logic ever changes,  ieee80211_on_oper_channel
+	 * may need to change as well.
+	 */
 	offchannel_flag = local->hw.conf.flags & IEEE80211_CONF_OFFCHANNEL;
 	if (scan_chan) {
 		chan = scan_chan;
-		channel_type = NL80211_CHAN_NO_HT;
-		local->hw.conf.flags |= IEEE80211_CONF_OFFCHANNEL;
-	} else if (local->tmp_channel &&
-		   local->oper_channel != local->tmp_channel) {
+		/* If scanning on oper channel, use whatever channel-type
+		 * is currently in use.
+		 */
+		if (chan == local->oper_channel)
+			channel_type = local->_oper_channel_type;
+		else
+			channel_type = NL80211_CHAN_NO_HT;
+	} else if (local->tmp_channel) {
 		chan = scan_chan = local->tmp_channel;
 		channel_type = local->tmp_channel_type;
-		local->hw.conf.flags |= IEEE80211_CONF_OFFCHANNEL;
 	} else {
 		chan = local->oper_channel;
 		channel_type = local->_oper_channel_type;
-		local->hw.conf.flags &= ~IEEE80211_CONF_OFFCHANNEL;
 	}
+
+	if (chan != local->oper_channel ||
+	    channel_type != local->_oper_channel_type)
+		local->hw.conf.flags |= IEEE80211_CONF_OFFCHANNEL;
+	else
+		local->hw.conf.flags &= ~IEEE80211_CONF_OFFCHANNEL;
+
 	offchannel_flag ^= local->hw.conf.flags & IEEE80211_CONF_OFFCHANNEL;
 
 	if (offchannel_flag || chan != local->hw.conf.channel ||
@@ -146,7 +199,8 @@ int ieee80211_hw_config(struct ieee80211_local *local, u32 changed)
 		changed |= IEEE80211_CONF_CHANGE_SMPS;
 	}
 
-	if (scan_chan)
+	if ((local->scanning & SCAN_SW_SCANNING) ||
+	    (local->scanning & SCAN_HW_SCANNING))
 		power = chan->max_power;
 	else
 		power = local->power_constr_level ?
@@ -231,7 +285,7 @@ void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata,
 
 	if (changed & BSS_CHANGED_BEACON_ENABLED) {
 		if (local->quiescing || !ieee80211_sdata_running(sdata) ||
-		    test_bit(SCAN_SW_SCANNING, &local->scanning)) {
+		    test_bit(SDATA_STATE_OFFCHANNEL, &sdata->state)) {
 			sdata->vif.bss_conf.enable_beacon = false;
 		} else {
 			/*
@@ -554,6 +608,7 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
 	local->hw.queues = 1;
 	local->hw.max_rates = 1;
 	local->hw.max_report_rates = 0;
+	local->hw.max_rx_aggregation_subframes = IEEE80211_MAX_AMPDU_BUF;
 	local->hw.conf.long_frame_max_tx_count = wiphy->retry_long;
 	local->hw.conf.short_frame_max_tx_count = wiphy->retry_short;
 	local->user_power_level = -1;
@@ -668,6 +723,18 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 		}
 		channels += sband->n_channels;
 
+		/*
+		 * Since ieee80211_disable_40mhz_24ghz is global, we can
+		 * modify the sband's ht data even if the driver uses a
+		 * global structure for that.
+		 */
+		if (ieee80211_disable_40mhz_24ghz &&
+		    band == IEEE80211_BAND_2GHZ &&
+		    sband->ht_cap.ht_supported) {
+			sband->ht_cap.cap &= ~IEEE80211_HT_CAP_SUP_WIDTH_20_40;
+			sband->ht_cap.cap &= ~IEEE80211_HT_CAP_SGI_40;
+		}
+
 		if (max_bitrates < sband->n_bitrates)
 			max_bitrates = sband->n_bitrates;
 		supp_ht = supp_ht || sband->ht_cap.ht_supported;
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index ca3af46..2a57cc0 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -574,7 +574,7 @@ static void ieee80211_mesh_rx_bcn_presp(struct ieee80211_sub_if_data *sdata,
 			       &elems);
 
 	if (elems.ds_params && elems.ds_params_len == 1)
-		freq = ieee80211_channel_to_frequency(elems.ds_params[0]);
+		freq = ieee80211_channel_to_frequency(elems.ds_params[0], band);
 	else
 		freq = rx_status->freq;
 
@@ -645,7 +645,7 @@ void ieee80211_mesh_work(struct ieee80211_sub_if_data *sdata)
 	if (test_and_clear_bit(MESH_WORK_GROW_MPATH_TABLE, &ifmsh->wrkq_flags))
 		mesh_mpath_table_grow();
 
-	if (test_and_clear_bit(MESH_WORK_GROW_MPATH_TABLE, &ifmsh->wrkq_flags))
+	if (test_and_clear_bit(MESH_WORK_GROW_MPP_TABLE, &ifmsh->wrkq_flags))
 		mesh_mpp_table_grow();
 
 	if (test_and_clear_bit(MESH_WORK_HOUSEKEEPING, &ifmsh->wrkq_flags))
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index c9ceb4d..cc984bd 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -28,8 +28,15 @@
 #include "rate.h"
 #include "led.h"
 
-#define IEEE80211_MAX_NULLFUNC_TRIES 2
-#define IEEE80211_MAX_PROBE_TRIES 5
+static int max_nullfunc_tries = 2;
+module_param(max_nullfunc_tries, int, 0644);
+MODULE_PARM_DESC(max_nullfunc_tries,
+		 "Maximum nullfunc tx tries before disconnecting (reason 4).");
+
+static int max_probe_tries = 5;
+module_param(max_probe_tries, int, 0644);
+MODULE_PARM_DESC(max_probe_tries,
+		 "Maximum probe tries before disconnecting (reason 4).");
 
 /*
  * Beacon loss timeout is calculated as N frames times the
@@ -51,7 +58,11 @@
  * a probe request because of beacon loss or for
  * checking the connection still works.
  */
-#define IEEE80211_PROBE_WAIT		(HZ / 2)
+static int probe_wait_ms = 500;
+module_param(probe_wait_ms, int, 0644);
+MODULE_PARM_DESC(probe_wait_ms,
+		 "Maximum time(ms) to wait for probe response"
+		 " before disconnecting (reason 4).");
 
 /*
  * Weight given to the latest Beacon frame when calculating average signal
@@ -134,6 +145,9 @@ void ieee80211_sta_reset_conn_monitor(struct ieee80211_sub_if_data *sdata)
 {
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
 
+	if (unlikely(!sdata->u.mgd.associated))
+		return;
+
 	if (sdata->local->hw.flags & IEEE80211_HW_CONNECTION_MONITOR)
 		return;
 
@@ -161,6 +175,7 @@ static u32 ieee80211_enable_ht(struct ieee80211_sub_if_data *sdata,
 	struct ieee80211_supported_band *sband;
 	struct sta_info *sta;
 	u32 changed = 0;
+	int hti_cfreq;
 	u16 ht_opmode;
 	bool enable_ht = true;
 	enum nl80211_channel_type prev_chantype;
@@ -174,10 +189,27 @@ static u32 ieee80211_enable_ht(struct ieee80211_sub_if_data *sdata,
 	if (!sband->ht_cap.ht_supported)
 		enable_ht = false;
 
-	/* check that channel matches the right operating channel */
-	if (local->hw.conf.channel->center_freq !=
-	    ieee80211_channel_to_frequency(hti->control_chan))
-		enable_ht = false;
+	if (enable_ht) {
+		hti_cfreq = ieee80211_channel_to_frequency(hti->control_chan,
+							   sband->band);
+		/* check that channel matches the right operating channel */
+		if (local->hw.conf.channel->center_freq != hti_cfreq) {
+			/* Some APs mess this up, evidently.
+			 * Netgear WNDR3700 sometimes reports 4 higher than
+			 * the actual channel, for instance.
+			 */
+			printk(KERN_DEBUG
+			       "%s: Wrong control channel in association"
+			       " response: configured center-freq: %d"
+			       " hti-cfreq: %d  hti->control_chan: %d"
+			       " band: %d.  Disabling HT.\n",
+			       sdata->name,
+			       local->hw.conf.channel->center_freq,
+			       hti_cfreq, hti->control_chan,
+			       sband->band);
+			enable_ht = false;
+		}
+	}
 
 	if (enable_ht) {
 		channel_type = NL80211_CHAN_HT20;
@@ -429,7 +461,8 @@ void ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
 		container_of((void *)bss, struct cfg80211_bss, priv);
 	struct ieee80211_channel *new_ch;
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
-	int new_freq = ieee80211_channel_to_frequency(sw_elem->new_ch_num);
+	int new_freq = ieee80211_channel_to_frequency(sw_elem->new_ch_num,
+						      cbss->channel->band);
 
 	ASSERT_MGD_MTX(ifmgd);
 
@@ -600,6 +633,14 @@ void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency)
 	list_for_each_entry(sdata, &local->interfaces, list) {
 		if (!ieee80211_sdata_running(sdata))
 			continue;
+		if (sdata->vif.type == NL80211_IFTYPE_AP) {
+			/* If an AP vif is found, then disable PS
+			 * by setting the count to zero thereby setting
+			 * ps_sdata to NULL.
+			 */
+			count = 0;
+			break;
+		}
 		if (sdata->vif.type != NL80211_IFTYPE_STATION)
 			continue;
 		found = sdata;
@@ -700,9 +741,19 @@ void ieee80211_dynamic_ps_enable_work(struct work_struct *work)
 		return;
 
 	if ((local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK) &&
-	    (!(ifmgd->flags & IEEE80211_STA_NULLFUNC_ACKED)))
+	    (!(ifmgd->flags & IEEE80211_STA_NULLFUNC_ACKED))) {
+		netif_tx_stop_all_queues(sdata->dev);
+		/*
+		 * Flush all the frames queued in the driver before
+		 * going to power save
+		 */
+		drv_flush(local, false);
 		ieee80211_send_nullfunc(local, sdata, 1);
 
+		/* Flush once again to get the tx status of nullfunc frame */
+		drv_flush(local, false);
+	}
+
 	if (!((local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) &&
 	      (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK)) ||
 	    (ifmgd->flags & IEEE80211_STA_NULLFUNC_ACKED)) {
@@ -710,6 +761,8 @@ void ieee80211_dynamic_ps_enable_work(struct work_struct *work)
 		local->hw.conf.flags |= IEEE80211_CONF_PS;
 		ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS);
 	}
+
+	netif_tx_start_all_queues(sdata->dev);
 }
 
 void ieee80211_dynamic_ps_timer(unsigned long data)
@@ -1033,12 +1086,6 @@ void ieee80211_sta_rx_notify(struct ieee80211_sub_if_data *sdata,
 	if (is_multicast_ether_addr(hdr->addr1))
 		return;
 
-	/*
-	 * In case we receive frames after disassociation.
-	 */
-	if (!sdata->u.mgd.associated)
-		return;
-
 	ieee80211_sta_reset_conn_monitor(sdata);
 }
 
@@ -1095,7 +1142,7 @@ static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata)
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
 	const u8 *ssid;
 	u8 *dst = ifmgd->associated->bssid;
-	u8 unicast_limit = max(1, IEEE80211_MAX_PROBE_TRIES - 3);
+	u8 unicast_limit = max(1, max_probe_tries - 3);
 
 	/*
 	 * Try sending broadcast probe requests for the last three
@@ -1121,7 +1168,7 @@ static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata)
 	}
 
 	ifmgd->probe_send_count++;
-	ifmgd->probe_timeout = jiffies + IEEE80211_PROBE_WAIT;
+	ifmgd->probe_timeout = jiffies + msecs_to_jiffies(probe_wait_ms);
 	run_again(ifmgd, ifmgd->probe_timeout);
 }
 
@@ -1222,7 +1269,8 @@ static void __ieee80211_connection_loss(struct ieee80211_sub_if_data *sdata)
 
 	memcpy(bssid, ifmgd->associated->bssid, ETH_ALEN);
 
-	printk(KERN_DEBUG "Connection to AP %pM lost.\n", bssid);
+	printk(KERN_DEBUG "%s: Connection to AP %pM lost.\n",
+	       sdata->name, bssid);
 
 	ieee80211_set_disassoc(sdata, true, true);
 	mutex_unlock(&ifmgd->mtx);
@@ -1525,7 +1573,8 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 	}
 
 	if (elems->ds_params && elems->ds_params_len == 1)
-		freq = ieee80211_channel_to_frequency(elems->ds_params[0]);
+		freq = ieee80211_channel_to_frequency(elems->ds_params[0],
+						      rx_status->band);
 	else
 		freq = rx_status->freq;
 
@@ -1966,9 +2015,9 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
 		memcpy(bssid, ifmgd->associated->bssid, ETH_ALEN);
 
 		if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS)
-			max_tries = IEEE80211_MAX_NULLFUNC_TRIES;
+			max_tries = max_nullfunc_tries;
 		else
-			max_tries = IEEE80211_MAX_PROBE_TRIES;
+			max_tries = max_probe_tries;
 
 		/* ACK received for nullfunc probing frame */
 		if (!ifmgd->probe_send_count)
@@ -1978,9 +2027,9 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
 #ifdef CONFIG_MAC80211_VERBOSE_DEBUG
 				wiphy_debug(local->hw.wiphy,
 					    "%s: No ack for nullfunc frame to"
-					    " AP %pM, try %d\n",
+					    " AP %pM, try %d/%i\n",
 					    sdata->name, bssid,
-					    ifmgd->probe_send_count);
+					    ifmgd->probe_send_count, max_tries);
 #endif
 				ieee80211_mgd_probe_ap_send(sdata);
 			} else {
@@ -2000,17 +2049,17 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
 				    "%s: Failed to send nullfunc to AP %pM"
 				    " after %dms, disconnecting.\n",
 				    sdata->name,
-				    bssid, (1000 * IEEE80211_PROBE_WAIT)/HZ);
+				    bssid, probe_wait_ms);
 #endif
 			ieee80211_sta_connection_lost(sdata, bssid);
 		} else if (ifmgd->probe_send_count < max_tries) {
 #ifdef CONFIG_MAC80211_VERBOSE_DEBUG
 			wiphy_debug(local->hw.wiphy,
 				    "%s: No probe response from AP %pM"
-				    " after %dms, try %d\n",
+				    " after %dms, try %d/%i\n",
 				    sdata->name,
-				    bssid, (1000 * IEEE80211_PROBE_WAIT)/HZ,
-				    ifmgd->probe_send_count);
+				    bssid, probe_wait_ms,
+				    ifmgd->probe_send_count, max_tries);
 #endif
 			ieee80211_mgd_probe_ap_send(sdata);
 		} else {
@@ -2022,7 +2071,7 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
 				    "%s: No probe response from AP %pM"
 				    " after %dms, disconnecting.\n",
 				    sdata->name,
-				    bssid, (1000 * IEEE80211_PROBE_WAIT)/HZ);
+				    bssid, probe_wait_ms);
 
 			ieee80211_sta_connection_lost(sdata, bssid);
 		}
@@ -2260,6 +2309,7 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
 	else
 		wk->type = IEEE80211_WORK_DIRECT_PROBE;
 	wk->chan = req->bss->channel;
+	wk->chan_type = NL80211_CHAN_NO_HT;
 	wk->sdata = sdata;
 	wk->done = ieee80211_probe_auth_done;
 
@@ -2409,6 +2459,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
 		memcpy(wk->assoc.prev_bssid, req->prev_bssid, ETH_ALEN);
 
 	wk->chan = req->bss->channel;
+	wk->chan_type = NL80211_CHAN_NO_HT;
 	wk->sdata = sdata;
 	wk->done = ieee80211_assoc_done;
 	if (!bss->dtim_period &&
diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c
index b4e5267..13427b1 100644
--- a/net/mac80211/offchannel.c
+++ b/net/mac80211/offchannel.c
@@ -17,10 +17,14 @@
 #include "driver-trace.h"
 
 /*
- * inform AP that we will go to sleep so that it will buffer the frames
- * while we scan
+ * Tell our hardware to disable PS.
+ * Optionally inform AP that we will go to sleep so that it will buffer
+ * the frames while we are doing off-channel work.  This is optional
+ * because we *may* be doing work on-operating channel, and want our
+ * hardware unconditionally awake, but still let the AP send us normal frames.
  */
-static void ieee80211_offchannel_ps_enable(struct ieee80211_sub_if_data *sdata)
+static void ieee80211_offchannel_ps_enable(struct ieee80211_sub_if_data *sdata,
+					   bool tell_ap)
 {
 	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
@@ -41,8 +45,8 @@ static void ieee80211_offchannel_ps_enable(struct ieee80211_sub_if_data *sdata)
 		ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS);
 	}
 
-	if (!(local->offchannel_ps_enabled) ||
-	    !(local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK))
+	if (tell_ap && (!local->offchannel_ps_enabled ||
+			!(local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK)))
 		/*
 		 * If power save was enabled, no need to send a nullfunc
 		 * frame because AP knows that we are sleeping. But if the
@@ -77,6 +81,9 @@ static void ieee80211_offchannel_ps_disable(struct ieee80211_sub_if_data *sdata)
 		 * we are sleeping, let's just enable power save mode in
 		 * hardware.
 		 */
+		/* TODO:  Only set hardware if CONF_PS changed?
+		 * TODO:  Should we set offchannel_ps_enabled to false?
+		 */
 		local->hw.conf.flags |= IEEE80211_CONF_PS;
 		ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS);
 	} else if (local->hw.conf.dynamic_ps_timeout > 0) {
@@ -95,63 +102,61 @@ static void ieee80211_offchannel_ps_disable(struct ieee80211_sub_if_data *sdata)
 	ieee80211_sta_reset_conn_monitor(sdata);
 }
 
-void ieee80211_offchannel_stop_beaconing(struct ieee80211_local *local)
+void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local,
+				    bool offchannel_ps_enable)
 {
 	struct ieee80211_sub_if_data *sdata;
 
+	/*
+	 * notify the AP about us leaving the channel and stop all
+	 * STA interfaces.
+	 */
 	mutex_lock(&local->iflist_mtx);
 	list_for_each_entry(sdata, &local->interfaces, list) {
 		if (!ieee80211_sdata_running(sdata))
 			continue;
 
-		/* disable beaconing */
+		if (sdata->vif.type != NL80211_IFTYPE_MONITOR)
+			set_bit(SDATA_STATE_OFFCHANNEL, &sdata->state);
+
+		/* Check to see if we should disable beaconing. */
 		if (sdata->vif.type == NL80211_IFTYPE_AP ||
 		    sdata->vif.type == NL80211_IFTYPE_ADHOC ||
 		    sdata->vif.type == NL80211_IFTYPE_MESH_POINT)
 			ieee80211_bss_info_change_notify(
 				sdata, BSS_CHANGED_BEACON_ENABLED);
 
-		/*
-		 * only handle non-STA interfaces here, STA interfaces
-		 * are handled in ieee80211_offchannel_stop_station(),
-		 * e.g., from the background scan state machine.
-		 *
-		 * In addition, do not stop monitor interface to allow it to be
-		 * used from user space controlled off-channel operations.
-		 */
-		if (sdata->vif.type != NL80211_IFTYPE_STATION &&
-		    sdata->vif.type != NL80211_IFTYPE_MONITOR) {
-			set_bit(SDATA_STATE_OFFCHANNEL, &sdata->state);
+		if (sdata->vif.type != NL80211_IFTYPE_MONITOR) {
 			netif_tx_stop_all_queues(sdata->dev);
+			if (offchannel_ps_enable &&
+			    (sdata->vif.type == NL80211_IFTYPE_STATION) &&
+			    sdata->u.mgd.associated)
+				ieee80211_offchannel_ps_enable(sdata, true);
 		}
 	}
 	mutex_unlock(&local->iflist_mtx);
 }
 
-void ieee80211_offchannel_stop_station(struct ieee80211_local *local)
+void ieee80211_offchannel_enable_all_ps(struct ieee80211_local *local,
+					bool tell_ap)
 {
 	struct ieee80211_sub_if_data *sdata;
 
-	/*
-	 * notify the AP about us leaving the channel and stop all STA interfaces
-	 */
 	mutex_lock(&local->iflist_mtx);
 	list_for_each_entry(sdata, &local->interfaces, list) {
 		if (!ieee80211_sdata_running(sdata))
 			continue;
 
-		if (sdata->vif.type == NL80211_IFTYPE_STATION) {
-			set_bit(SDATA_STATE_OFFCHANNEL, &sdata->state);
-			netif_tx_stop_all_queues(sdata->dev);
-			if (sdata->u.mgd.associated)
-				ieee80211_offchannel_ps_enable(sdata);
-		}
+		if (sdata->vif.type == NL80211_IFTYPE_STATION &&
+		    sdata->u.mgd.associated)
+			ieee80211_offchannel_ps_enable(sdata, tell_ap);
 	}
 	mutex_unlock(&local->iflist_mtx);
 }
 
 void ieee80211_offchannel_return(struct ieee80211_local *local,
-				 bool enable_beaconing)
+				 bool enable_beaconing,
+				 bool offchannel_ps_disable)
 {
 	struct ieee80211_sub_if_data *sdata;
 
@@ -161,7 +166,8 @@ void ieee80211_offchannel_return(struct ieee80211_local *local,
 			continue;
 
 		/* Tell AP we're back */
-		if (sdata->vif.type == NL80211_IFTYPE_STATION) {
+		if (offchannel_ps_disable &&
+		    sdata->vif.type == NL80211_IFTYPE_STATION) {
 			if (sdata->u.mgd.associated)
 				ieee80211_offchannel_ps_disable(sdata);
 		}
@@ -181,7 +187,7 @@ void ieee80211_offchannel_return(struct ieee80211_local *local,
 			netif_tx_wake_all_queues(sdata->dev);
 		}
 
-		/* re-enable beaconing */
+		/* Check to see if we should re-enable beaconing */
 		if (enable_beaconing &&
 		    (sdata->vif.type == NL80211_IFTYPE_AP ||
 		     sdata->vif.type == NL80211_IFTYPE_ADHOC ||
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index a6701ed..5c1930ba 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -77,7 +77,7 @@ ieee80211_rx_radiotap_len(struct ieee80211_local *local,
 	/* always present fields */
 	len = sizeof(struct ieee80211_radiotap_header) + 9;
 
-	if (status->flag & RX_FLAG_TSFT)
+	if (status->flag & RX_FLAG_MACTIME_MPDU)
 		len += 8;
 	if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM)
 		len += 1;
@@ -85,6 +85,9 @@ ieee80211_rx_radiotap_len(struct ieee80211_local *local,
 	if (len & 1) /* padding for RX_FLAGS if necessary */
 		len++;
 
+	if (status->flag & RX_FLAG_HT) /* HT info */
+		len += 3;
+
 	return len;
 }
 
@@ -120,7 +123,7 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
 	/* the order of the following fields is important */
 
 	/* IEEE80211_RADIOTAP_TSFT */
-	if (status->flag & RX_FLAG_TSFT) {
+	if (status->flag & RX_FLAG_MACTIME_MPDU) {
 		put_unaligned_le64(status->mactime, pos);
 		rthdr->it_present |=
 			cpu_to_le32(1 << IEEE80211_RADIOTAP_TSFT);
@@ -139,11 +142,8 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
 	/* IEEE80211_RADIOTAP_RATE */
 	if (status->flag & RX_FLAG_HT) {
 		/*
-		 * TODO: add following information into radiotap header once
-		 * suitable fields are defined for it:
-		 * - MCS index (status->rate_idx)
-		 * - HT40 (status->flag & RX_FLAG_40MHZ)
-		 * - short-GI (status->flag & RX_FLAG_SHORT_GI)
+		 * MCS information is a separate field in radiotap,
+		 * added below.
 		 */
 		*pos = 0;
 	} else {
@@ -193,6 +193,20 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
 		rx_flags |= IEEE80211_RADIOTAP_F_RX_BADPLCP;
 	put_unaligned_le16(rx_flags, pos);
 	pos += 2;
+
+	if (status->flag & RX_FLAG_HT) {
+		rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_MCS);
+		*pos++ = IEEE80211_RADIOTAP_MCS_HAVE_MCS |
+			 IEEE80211_RADIOTAP_MCS_HAVE_GI |
+			 IEEE80211_RADIOTAP_MCS_HAVE_BW;
+		*pos = 0;
+		if (status->flag & RX_FLAG_SHORT_GI)
+			*pos |= IEEE80211_RADIOTAP_MCS_SGI;
+		if (status->flag & RX_FLAG_40MHZ)
+			*pos |= IEEE80211_RADIOTAP_MCS_BW_40;
+		pos++;
+		*pos++ = status->rate_idx;
+	}
 }
 
 /*
@@ -392,16 +406,10 @@ ieee80211_rx_h_passive_scan(struct ieee80211_rx_data *rx)
 	if (likely(!(status->rx_flags & IEEE80211_RX_IN_SCAN)))
 		return RX_CONTINUE;
 
-	if (test_bit(SCAN_HW_SCANNING, &local->scanning))
+	if (test_bit(SCAN_HW_SCANNING, &local->scanning) ||
+	    test_bit(SCAN_SW_SCANNING, &local->scanning))
 		return ieee80211_scan_rx(rx->sdata, skb);
 
-	if (test_bit(SCAN_SW_SCANNING, &local->scanning)) {
-		/* drop all the other packets during a software scan anyway */
-		if (ieee80211_scan_rx(rx->sdata, skb) != RX_QUEUED)
-			dev_kfree_skb(skb);
-		return RX_QUEUED;
-	}
-
 	/* scanning finished during invoking of handlers */
 	I802_DEBUG_INC(local->rx_handlers_drop_passive_scan);
 	return RX_DROP_UNUSABLE;
@@ -798,7 +806,7 @@ ieee80211_rx_h_check(struct ieee80211_rx_data *rx)
 				rx->local->dot11FrameDuplicateCount++;
 				rx->sta->num_duplicates++;
 			}
-			return RX_DROP_MONITOR;
+			return RX_DROP_UNUSABLE;
 		} else
 			rx->sta->last_seq_ctrl[rx->queue] = hdr->seq_ctrl;
 	}
@@ -824,18 +832,8 @@ ieee80211_rx_h_check(struct ieee80211_rx_data *rx)
 		      ieee80211_is_pspoll(hdr->frame_control)) &&
 		     rx->sdata->vif.type != NL80211_IFTYPE_ADHOC &&
 		     rx->sdata->vif.type != NL80211_IFTYPE_WDS &&
-		     (!rx->sta || !test_sta_flags(rx->sta, WLAN_STA_ASSOC)))) {
-		if ((!ieee80211_has_fromds(hdr->frame_control) &&
-		     !ieee80211_has_tods(hdr->frame_control) &&
-		     ieee80211_is_data(hdr->frame_control)) ||
-		    !(status->rx_flags & IEEE80211_RX_RA_MATCH)) {
-			/* Drop IBSS frames and frames for other hosts
-			 * silently. */
-			return RX_DROP_MONITOR;
-		}
-
+		     (!rx->sta || !test_sta_flags(rx->sta, WLAN_STA_ASSOC))))
 		return RX_DROP_MONITOR;
-	}
 
 	return RX_CONTINUE;
 }
@@ -1088,7 +1086,8 @@ static void ap_sta_ps_start(struct sta_info *sta)
 
 	atomic_inc(&sdata->bss->num_sta_ps);
 	set_sta_flags(sta, WLAN_STA_PS_STA);
-	drv_sta_notify(local, sdata, STA_NOTIFY_SLEEP, &sta->sta);
+	if (!(local->hw.flags & IEEE80211_HW_AP_LINK_PS))
+		drv_sta_notify(local, sdata, STA_NOTIFY_SLEEP, &sta->sta);
 #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG
 	printk(KERN_DEBUG "%s: STA %pM aid %d enters power save mode\n",
 	       sdata->name, sta->sta.addr, sta->sta.aid);
@@ -1117,6 +1116,27 @@ static void ap_sta_ps_end(struct sta_info *sta)
 	ieee80211_sta_ps_deliver_wakeup(sta);
 }
 
+int ieee80211_sta_ps_transition(struct ieee80211_sta *sta, bool start)
+{
+	struct sta_info *sta_inf = container_of(sta, struct sta_info, sta);
+	bool in_ps;
+
+	WARN_ON(!(sta_inf->local->hw.flags & IEEE80211_HW_AP_LINK_PS));
+
+	/* Don't let the same PS state be set twice */
+	in_ps = test_sta_flags(sta_inf, WLAN_STA_PS_STA);
+	if ((start && in_ps) || (!start && !in_ps))
+		return -EINVAL;
+
+	if (start)
+		ap_sta_ps_start(sta_inf);
+	else
+		ap_sta_ps_end(sta_inf);
+
+	return 0;
+}
+EXPORT_SYMBOL(ieee80211_sta_ps_transition);
+
 static ieee80211_rx_result debug_noinline
 ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx)
 {
@@ -1136,14 +1156,23 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx)
 	if (rx->sdata->vif.type == NL80211_IFTYPE_ADHOC) {
 		u8 *bssid = ieee80211_get_bssid(hdr, rx->skb->len,
 						NL80211_IFTYPE_ADHOC);
-		if (compare_ether_addr(bssid, rx->sdata->u.ibss.bssid) == 0)
+		if (compare_ether_addr(bssid, rx->sdata->u.ibss.bssid) == 0) {
 			sta->last_rx = jiffies;
+			if (ieee80211_is_data(hdr->frame_control)) {
+				sta->last_rx_rate_idx = status->rate_idx;
+				sta->last_rx_rate_flag = status->flag;
+			}
+		}
 	} else if (!is_multicast_ether_addr(hdr->addr1)) {
 		/*
 		 * Mesh beacons will update last_rx when if they are found to
 		 * match the current local configuration when processed.
 		 */
 		sta->last_rx = jiffies;
+		if (ieee80211_is_data(hdr->frame_control)) {
+			sta->last_rx_rate_idx = status->rate_idx;
+			sta->last_rx_rate_flag = status->flag;
+		}
 	}
 
 	if (!(status->rx_flags & IEEE80211_RX_RA_MATCH))
@@ -1161,7 +1190,8 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx)
 	 * Change STA power saving mode only at the end of a frame
 	 * exchange sequence.
 	 */
-	if (!ieee80211_has_morefrags(hdr->frame_control) &&
+	if (!(sta->local->hw.flags & IEEE80211_HW_AP_LINK_PS) &&
+	    !ieee80211_has_morefrags(hdr->frame_control) &&
 	    !(status->rx_flags & IEEE80211_RX_DEFERRED_RELEASE) &&
 	    (rx->sdata->vif.type == NL80211_IFTYPE_AP ||
 	     rx->sdata->vif.type == NL80211_IFTYPE_AP_VLAN)) {
@@ -1556,17 +1586,36 @@ __ieee80211_data_to_8023(struct ieee80211_rx_data *rx)
 {
 	struct ieee80211_sub_if_data *sdata = rx->sdata;
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data;
+	bool check_port_control = false;
+	struct ethhdr *ehdr;
+	int ret;
 
 	if (ieee80211_has_a4(hdr->frame_control) &&
 	    sdata->vif.type == NL80211_IFTYPE_AP_VLAN && !sdata->u.vlan.sta)
 		return -1;
 
+	if (sdata->vif.type == NL80211_IFTYPE_STATION &&
+	    !!sdata->u.mgd.use_4addr != !!ieee80211_has_a4(hdr->frame_control)) {
+
+		if (!sdata->u.mgd.use_4addr)
+			return -1;
+		else
+			check_port_control = true;
+	}
+
 	if (is_multicast_ether_addr(hdr->addr1) &&
-	    ((sdata->vif.type == NL80211_IFTYPE_AP_VLAN && sdata->u.vlan.sta) ||
-	     (sdata->vif.type == NL80211_IFTYPE_STATION && sdata->u.mgd.use_4addr)))
+	    sdata->vif.type == NL80211_IFTYPE_AP_VLAN && sdata->u.vlan.sta)
 		return -1;
 
-	return ieee80211_data_to_8023(rx->skb, sdata->vif.addr, sdata->vif.type);
+	ret = ieee80211_data_to_8023(rx->skb, sdata->vif.addr, sdata->vif.type);
+	if (ret < 0 || !check_port_control)
+		return ret;
+
+	ehdr = (struct ethhdr *) rx->skb->data;
+	if (ehdr->h_proto != rx->sdata->control_port_protocol)
+		return -1;
+
+	return 0;
 }
 
 /*
@@ -1893,7 +1942,10 @@ ieee80211_rx_h_data(struct ieee80211_rx_data *rx)
 	dev->stats.rx_bytes += rx->skb->len;
 
 	if (local->ps_sdata && local->hw.conf.dynamic_ps_timeout > 0 &&
-	    !is_multicast_ether_addr(((struct ethhdr *)rx->skb->data)->h_dest)) {
+	    !is_multicast_ether_addr(
+		    ((struct ethhdr *)rx->skb->data)->h_dest) &&
+	    (!local->scanning &&
+	     !test_bit(SDATA_STATE_OFFCHANNEL, &sdata->state))) {
 			mod_timer(&local->dynamic_ps_timer, jiffies +
 			 msecs_to_jiffies(local->hw.conf.dynamic_ps_timeout));
 	}
@@ -2590,7 +2642,8 @@ static int prepare_for_handlers(struct ieee80211_rx_data *rx,
 			return 0;
 		if (!multicast &&
 		    compare_ether_addr(sdata->vif.addr, hdr->addr1) != 0) {
-			if (!(sdata->dev->flags & IFF_PROMISC))
+			if (!(sdata->dev->flags & IFF_PROMISC) ||
+			    sdata->u.mgd.use_4addr)
 				return 0;
 			status->rx_flags &= ~IEEE80211_RX_RA_MATCH;
 		}
@@ -2639,7 +2692,8 @@ static int prepare_for_handlers(struct ieee80211_rx_data *rx,
 				return 0;
 		} else if (!ieee80211_bssid_match(bssid,
 					sdata->vif.addr)) {
-			if (!(status->rx_flags & IEEE80211_RX_IN_SCAN))
+			if (!(status->rx_flags & IEEE80211_RX_IN_SCAN) &&
+			    !ieee80211_is_beacon(hdr->frame_control))
 				return 0;
 			status->rx_flags &= ~IEEE80211_RX_RA_MATCH;
 		}
@@ -2692,7 +2746,7 @@ static bool ieee80211_prepare_and_rx_handle(struct ieee80211_rx_data *rx,
 		if (!skb) {
 			if (net_ratelimit())
 				wiphy_debug(local->hw.wiphy,
-					"failed to copy multicast frame for %s\n",
+					"failed to copy skb for %s\n",
 					sdata->name);
 			return true;
 		}
@@ -2730,7 +2784,7 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
 		local->dot11ReceivedFragmentCount++;
 
 	if (unlikely(test_bit(SCAN_HW_SCANNING, &local->scanning) ||
-		     test_bit(SCAN_OFF_CHANNEL, &local->scanning)))
+		     test_bit(SCAN_SW_SCANNING, &local->scanning)))
 		status->rx_flags |= IEEE80211_RX_IN_SCAN;
 
 	if (ieee80211_is_mgmt(fc))
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index fb274db..8429545 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -196,7 +196,8 @@ ieee80211_scan_rx(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb)
 	ieee802_11_parse_elems(elements, skb->len - baselen, &elems);
 
 	if (elems.ds_params && elems.ds_params_len == 1)
-		freq = ieee80211_channel_to_frequency(elems.ds_params[0]);
+		freq = ieee80211_channel_to_frequency(elems.ds_params[0],
+						      rx_status->band);
 	else
 		freq = rx_status->freq;
 
@@ -211,6 +212,14 @@ ieee80211_scan_rx(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb)
 	if (bss)
 		ieee80211_rx_bss_put(sdata->local, bss);
 
+	/* If we are on-operating-channel, and this packet is for the
+	 * current channel, pass the pkt on up the stack so that
+	 * the rest of the stack can make use of it.
+	 */
+	if (ieee80211_cfg_on_oper_channel(sdata->local)
+	    && (channel == sdata->local->oper_channel))
+		return RX_CONTINUE;
+
 	dev_kfree_skb(skb);
 	return RX_QUEUED;
 }
@@ -292,15 +301,35 @@ static void __ieee80211_scan_completed_finish(struct ieee80211_hw *hw,
 					      bool was_hw_scan)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
+	bool on_oper_chan;
+	bool enable_beacons = false;
+
+	mutex_lock(&local->mtx);
+	on_oper_chan = ieee80211_cfg_on_oper_channel(local);
+
+	WARN_ON(local->scanning & (SCAN_SW_SCANNING | SCAN_HW_SCANNING));
+
+	if (was_hw_scan || !on_oper_chan) {
+		if (WARN_ON(local->scan_channel))
+			local->scan_channel = NULL;
+		ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL);
+	} else
+		/* Set power back to normal operating levels. */
+		ieee80211_hw_config(local, 0);
 
-	ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL);
 	if (!was_hw_scan) {
+		bool on_oper_chan2;
 		ieee80211_configure_filter(local);
 		drv_sw_scan_complete(local);
-		ieee80211_offchannel_return(local, true);
+		on_oper_chan2 = ieee80211_cfg_on_oper_channel(local);
+		/* We should always be on-channel at this point. */
+		WARN_ON(!on_oper_chan2);
+		if (on_oper_chan2 && (on_oper_chan != on_oper_chan2))
+			enable_beacons = true;
+
+		ieee80211_offchannel_return(local, enable_beacons, true);
 	}
 
-	mutex_lock(&local->mtx);
 	ieee80211_recalc_idle(local);
 	mutex_unlock(&local->mtx);
 
@@ -340,16 +369,21 @@ static int ieee80211_start_sw_scan(struct ieee80211_local *local)
 	 */
 	drv_sw_scan_start(local);
 
-	ieee80211_offchannel_stop_beaconing(local);
-
 	local->leave_oper_channel_time = 0;
 	local->next_scan_state = SCAN_DECISION;
 	local->scan_channel_idx = 0;
 
-	drv_flush(local, false);
+	/* We always want to use off-channel PS, even if we
+	 * are not really leaving oper-channel.  Don't
+	 * tell the AP though, as long as we are on-channel.
+	 */
+	ieee80211_offchannel_enable_all_ps(local, false);
 
 	ieee80211_configure_filter(local);
 
+	/* We need to set power level at maximum rate for scanning. */
+	ieee80211_hw_config(local, 0);
+
 	ieee80211_queue_delayed_work(&local->hw,
 				     &local->scan_work,
 				     IEEE80211_CHANNEL_TIME);
@@ -486,7 +520,20 @@ static void ieee80211_scan_state_decision(struct ieee80211_local *local,
 	}
 	mutex_unlock(&local->iflist_mtx);
 
-	if (local->scan_channel) {
+	next_chan = local->scan_req->channels[local->scan_channel_idx];
+
+	if (ieee80211_cfg_on_oper_channel(local)) {
+		/* We're currently on operating channel. */
+		if (next_chan == local->oper_channel)
+			/* We don't need to move off of operating channel. */
+			local->next_scan_state = SCAN_SET_CHANNEL;
+		else
+			/*
+			 * We do need to leave operating channel, as next
+			 * scan is somewhere else.
+			 */
+			local->next_scan_state = SCAN_LEAVE_OPER_CHANNEL;
+	} else {
 		/*
 		 * we're currently scanning a different channel, let's
 		 * see if we can scan another channel without interfering
@@ -502,7 +549,6 @@ static void ieee80211_scan_state_decision(struct ieee80211_local *local,
 		 *
 		 * Otherwise switch back to the operating channel.
 		 */
-		next_chan = local->scan_req->channels[local->scan_channel_idx];
 
 		bad_latency = time_after(jiffies +
 				ieee80211_scan_get_channel_time(next_chan),
@@ -520,12 +566,6 @@ static void ieee80211_scan_state_decision(struct ieee80211_local *local,
 			local->next_scan_state = SCAN_ENTER_OPER_CHANNEL;
 		else
 			local->next_scan_state = SCAN_SET_CHANNEL;
-	} else {
-		/*
-		 * we're on the operating channel currently, let's
-		 * leave that channel now to scan another one
-		 */
-		local->next_scan_state = SCAN_LEAVE_OPER_CHANNEL;
 	}
 
 	*next_delay = 0;
@@ -534,9 +574,10 @@ static void ieee80211_scan_state_decision(struct ieee80211_local *local,
 static void ieee80211_scan_state_leave_oper_channel(struct ieee80211_local *local,
 						    unsigned long *next_delay)
 {
-	ieee80211_offchannel_stop_station(local);
-
-	__set_bit(SCAN_OFF_CHANNEL, &local->scanning);
+	/* PS will already be in off-channel mode,
+	 * we do that once at the beginning of scanning.
+	 */
+	ieee80211_offchannel_stop_vifs(local, false);
 
 	/*
 	 * What if the nullfunc frames didn't arrive?
@@ -559,15 +600,15 @@ static void ieee80211_scan_state_enter_oper_channel(struct ieee80211_local *loca
 {
 	/* switch back to the operating channel */
 	local->scan_channel = NULL;
-	ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL);
+	if (!ieee80211_cfg_on_oper_channel(local))
+		ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL);
 
 	/*
-	 * Only re-enable station mode interface now; beaconing will be
-	 * re-enabled once the full scan has been completed.
+	 * Re-enable vifs and beaconing.  Leave PS
+	 * in off-channel state..will put that back
+	 * on-channel at the end of scanning.
 	 */
-	ieee80211_offchannel_return(local, false);
-
-	__clear_bit(SCAN_OFF_CHANNEL, &local->scanning);
+	ieee80211_offchannel_return(local, true, false);
 
 	*next_delay = HZ / 5;
 	local->next_scan_state = SCAN_DECISION;
@@ -583,8 +624,11 @@ static void ieee80211_scan_state_set_channel(struct ieee80211_local *local,
 	chan = local->scan_req->channels[local->scan_channel_idx];
 
 	local->scan_channel = chan;
-	if (ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL))
-		skip = 1;
+
+	/* Only call hw-config if we really need to change channels. */
+	if (chan != local->hw.conf.channel)
+		if (ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL))
+			skip = 1;
 
 	/* advance state machine to next channel/band */
 	local->scan_channel_idx++;
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index c426504..5a11078 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -899,7 +899,8 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta)
 	struct ieee80211_local *local = sdata->local;
 	int sent, buffered;
 
-	drv_sta_notify(local, sdata, STA_NOTIFY_AWAKE, &sta->sta);
+	if (!(local->hw.flags & IEEE80211_HW_AP_LINK_PS))
+		drv_sta_notify(local, sdata, STA_NOTIFY_AWAKE, &sta->sta);
 
 	if (!skb_queue_empty(&sta->ps_tx_buf))
 		sta_info_clear_tim_bit(sta);
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index bbdd2a8..5768114 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -82,6 +82,7 @@ enum ieee80211_sta_info_flags {
  * @state: session state (see above)
  * @stop_initiator: initiator of a session stop
  * @tx_stop: TX DelBA frame when stopping
+ * @buf_size: reorder buffer size at receiver
  *
  * This structure's lifetime is managed by RCU, assignments to
  * the array holding it must hold the aggregation mutex.
@@ -101,6 +102,7 @@ struct tid_ampdu_tx {
 	u8 dialog_token;
 	u8 stop_initiator;
 	bool tx_stop;
+	u8 buf_size;
 };
 
 /**
@@ -207,6 +209,8 @@ enum plink_state {
  * @rate_ctrl_priv: rate control private per-STA pointer
  * @last_tx_rate: rate used for last transmit, to report to userspace as
  *	"the" transmit rate
+ * @last_rx_rate_idx: rx status rate index of the last data packet
+ * @last_rx_rate_flag: rx status flag of the last data packet
  * @lock: used for locking all fields that require locking, see comments
  *	in the header file.
  * @flaglock: spinlock for flags accesses
@@ -309,6 +313,8 @@ struct sta_info {
 	unsigned long tx_bytes;
 	unsigned long tx_fragments;
 	struct ieee80211_tx_rate last_tx_rate;
+	int last_rx_rate_idx;
+	int last_rx_rate_flag;
 	u16 tid_seq[IEEE80211_QOS_CTL_TID_MASK + 1];
 
 	/*
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index 071ac95..b936dd2 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -98,6 +98,10 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local,
 	 *  (b) always process RX events before TX status events if ordering
 	 *      can be unknown, for example with different interrupt status
 	 *	bits.
+	 *  (c) if PS mode transitions are manual (i.e. the flag
+	 *      %IEEE80211_HW_AP_LINK_PS is set), always process PS state
+	 *      changes before calling TX status events if ordering can be
+	 *	unknown.
 	 */
 	if (test_sta_flags(sta, WLAN_STA_PS_STA) &&
 	    skb_queue_len(&sta->tx_filtered) < STA_MAX_TX_BUFFER) {
@@ -314,8 +318,6 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
 		if (info->flags & IEEE80211_TX_STAT_ACK) {
 			local->ps_sdata->u.mgd.flags |=
 					IEEE80211_STA_NULLFUNC_ACKED;
-			ieee80211_queue_work(&local->hw,
-					&local->dynamic_ps_enable_work);
 		} else
 			mod_timer(&local->dynamic_ps_timer, jiffies +
 					msecs_to_jiffies(10));
@@ -339,6 +341,10 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
 			cookie = local->hw_roc_cookie ^ 2;
 			local->hw_roc_skb_for_status = NULL;
 		}
+
+		if (cookie == local->hw_offchan_tx_cookie)
+			local->hw_offchan_tx_cookie = 0;
+
 		cfg80211_mgmt_tx_status(
 			skb->dev, cookie, skb->data, skb->len,
 			!!(info->flags & IEEE80211_TX_STAT_ACK), GFP_ATOMIC);
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index b0beaa5..081dcaf 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -33,10 +33,6 @@
 #include "wme.h"
 #include "rate.h"
 
-#define IEEE80211_TX_OK		0
-#define IEEE80211_TX_AGAIN	1
-#define IEEE80211_TX_PENDING	2
-
 /* misc utils */
 
 static __le16 ieee80211_duration(struct ieee80211_tx_data *tx, int group_addr,
@@ -236,6 +232,7 @@ ieee80211_tx_h_dynamic_ps(struct ieee80211_tx_data *tx)
 	if (local->hw.conf.flags & IEEE80211_CONF_PS) {
 		ieee80211_stop_queues_by_reason(&local->hw,
 						IEEE80211_QUEUE_STOP_REASON_PS);
+		ifmgd->flags &= ~IEEE80211_STA_NULLFUNC_ACKED;
 		ieee80211_queue_work(&local->hw,
 				     &local->dynamic_ps_disable_work);
 	}
@@ -257,7 +254,8 @@ ieee80211_tx_h_check_assoc(struct ieee80211_tx_data *tx)
 	if (unlikely(info->flags & IEEE80211_TX_CTL_INJECTED))
 		return TX_CONTINUE;
 
-	if (unlikely(test_bit(SCAN_OFF_CHANNEL, &tx->local->scanning)) &&
+	if (unlikely(test_bit(SCAN_SW_SCANNING, &tx->local->scanning)) &&
+	    test_bit(SDATA_STATE_OFFCHANNEL, &tx->sdata->state) &&
 	    !ieee80211_is_probe_req(hdr->frame_control) &&
 	    !ieee80211_is_nullfunc(hdr->frame_control))
 		/*
@@ -1283,16 +1281,17 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata,
 	return TX_CONTINUE;
 }
 
-static int __ieee80211_tx(struct ieee80211_local *local,
-			  struct sk_buff **skbp,
-			  struct sta_info *sta,
-			  bool txpending)
+/*
+ * Returns false if the frame couldn't be transmitted but was queued instead.
+ */
+static bool __ieee80211_tx(struct ieee80211_local *local, struct sk_buff **skbp,
+			   struct sta_info *sta, bool txpending)
 {
 	struct sk_buff *skb = *skbp, *next;
 	struct ieee80211_tx_info *info;
 	struct ieee80211_sub_if_data *sdata;
 	unsigned long flags;
-	int ret, len;
+	int len;
 	bool fragm = false;
 
 	while (skb) {
@@ -1300,13 +1299,37 @@ static int __ieee80211_tx(struct ieee80211_local *local,
 		__le16 fc;
 
 		spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
-		ret = IEEE80211_TX_OK;
 		if (local->queue_stop_reasons[q] ||
-		    (!txpending && !skb_queue_empty(&local->pending[q])))
-			ret = IEEE80211_TX_PENDING;
+		    (!txpending && !skb_queue_empty(&local->pending[q]))) {
+			/*
+			 * Since queue is stopped, queue up frames for later
+			 * transmission from the tx-pending tasklet when the
+			 * queue is woken again.
+			 */
+
+			do {
+				next = skb->next;
+				skb->next = NULL;
+				/*
+				 * NB: If txpending is true, next must already
+				 * be NULL since we must've gone through this
+				 * loop before already; therefore we can just
+				 * queue the frame to the head without worrying
+				 * about reordering of fragments.
+				 */
+				if (unlikely(txpending))
+					__skb_queue_head(&local->pending[q],
+							 skb);
+				else
+					__skb_queue_tail(&local->pending[q],
+							 skb);
+			} while ((skb = next));
+
+			spin_unlock_irqrestore(&local->queue_stop_reason_lock,
+					       flags);
+			return false;
+		}
 		spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
-		if (ret != IEEE80211_TX_OK)
-			return ret;
 
 		info = IEEE80211_SKB_CB(skb);
 
@@ -1341,15 +1364,7 @@ static int __ieee80211_tx(struct ieee80211_local *local,
 			info->control.sta = NULL;
 
 		fc = ((struct ieee80211_hdr *)skb->data)->frame_control;
-		ret = drv_tx(local, skb);
-		if (WARN_ON(ret != NETDEV_TX_OK && skb->len != len)) {
-			dev_kfree_skb(skb);
-			ret = NETDEV_TX_OK;
-		}
-		if (ret != NETDEV_TX_OK) {
-			info->control.vif = &sdata->vif;
-			return IEEE80211_TX_AGAIN;
-		}
+		drv_tx(local, skb);
 
 		ieee80211_tpt_led_trig_tx(local, fc, len);
 		*skbp = skb = next;
@@ -1357,7 +1372,7 @@ static int __ieee80211_tx(struct ieee80211_local *local,
 		fragm = true;
 	}
 
-	return IEEE80211_TX_OK;
+	return true;
 }
 
 /*
@@ -1394,7 +1409,8 @@ static int invoke_tx_handlers(struct ieee80211_tx_data *tx)
 	/* handlers after fragment must be aware of tx info fragmentation! */
 	CALL_TXH(ieee80211_tx_h_stats);
 	CALL_TXH(ieee80211_tx_h_encrypt);
-	CALL_TXH(ieee80211_tx_h_calculate_duration);
+	if (!(tx->local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL))
+		CALL_TXH(ieee80211_tx_h_calculate_duration);
 #undef CALL_TXH
 
  txh_done:
@@ -1416,23 +1432,24 @@ static int invoke_tx_handlers(struct ieee80211_tx_data *tx)
 	return 0;
 }
 
-static void ieee80211_tx(struct ieee80211_sub_if_data *sdata,
+/*
+ * Returns false if the frame couldn't be transmitted but was queued instead.
+ */
+static bool ieee80211_tx(struct ieee80211_sub_if_data *sdata,
 			 struct sk_buff *skb, bool txpending)
 {
 	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_tx_data tx;
 	ieee80211_tx_result res_prepare;
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
-	struct sk_buff *next;
-	unsigned long flags;
-	int ret, retries;
 	u16 queue;
+	bool result = true;
 
 	queue = skb_get_queue_mapping(skb);
 
 	if (unlikely(skb->len < 10)) {
 		dev_kfree_skb(skb);
-		return;
+		return true;
 	}
 
 	rcu_read_lock();
@@ -1442,85 +1459,19 @@ static void ieee80211_tx(struct ieee80211_sub_if_data *sdata,
 
 	if (unlikely(res_prepare == TX_DROP)) {
 		dev_kfree_skb(skb);
-		rcu_read_unlock();
-		return;
+		goto out;
 	} else if (unlikely(res_prepare == TX_QUEUED)) {
-		rcu_read_unlock();
-		return;
+		goto out;
 	}
 
 	tx.channel = local->hw.conf.channel;
 	info->band = tx.channel->band;
 
-	if (invoke_tx_handlers(&tx))
-		goto out;
-
-	retries = 0;
- retry:
-	ret = __ieee80211_tx(local, &tx.skb, tx.sta, txpending);
-	switch (ret) {
-	case IEEE80211_TX_OK:
-		break;
-	case IEEE80211_TX_AGAIN:
-		/*
-		 * Since there are no fragmented frames on A-MPDU
-		 * queues, there's no reason for a driver to reject
-		 * a frame there, warn and drop it.
-		 */
-		if (WARN_ON(info->flags & IEEE80211_TX_CTL_AMPDU))
-			goto drop;
-		/* fall through */
-	case IEEE80211_TX_PENDING:
-		skb = tx.skb;
-
-		spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
-
-		if (local->queue_stop_reasons[queue] ||
-		    !skb_queue_empty(&local->pending[queue])) {
-			/*
-			 * if queue is stopped, queue up frames for later
-			 * transmission from the tasklet
-			 */
-			do {
-				next = skb->next;
-				skb->next = NULL;
-				if (unlikely(txpending))
-					__skb_queue_head(&local->pending[queue],
-							 skb);
-				else
-					__skb_queue_tail(&local->pending[queue],
-							 skb);
-			} while ((skb = next));
-
-			spin_unlock_irqrestore(&local->queue_stop_reason_lock,
-					       flags);
-		} else {
-			/*
-			 * otherwise retry, but this is a race condition or
-			 * a driver bug (which we warn about if it persists)
-			 */
-			spin_unlock_irqrestore(&local->queue_stop_reason_lock,
-					       flags);
-
-			retries++;
-			if (WARN(retries > 10, "tx refused but queue active\n"))
-				goto drop;
-			goto retry;
-		}
-	}
+	if (!invoke_tx_handlers(&tx))
+		result = __ieee80211_tx(local, &tx.skb, tx.sta, txpending);
  out:
 	rcu_read_unlock();
-	return;
-
- drop:
-	rcu_read_unlock();
-
-	skb = tx.skb;
-	while (skb) {
-		next = skb->next;
-		dev_kfree_skb(skb);
-		skb = next;
-	}
+	return result;
 }
 
 /* device xmit handlers */
@@ -1750,7 +1701,7 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
 	__le16 fc;
 	struct ieee80211_hdr hdr;
 	struct ieee80211s_hdr mesh_hdr __maybe_unused;
-	struct mesh_path *mppath = NULL;
+	struct mesh_path __maybe_unused *mppath = NULL;
 	const u8 *encaps_data;
 	int encaps_len, skip_header_bytes;
 	int nh_pos, h_pos;
@@ -1815,19 +1766,19 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
 			mppath = mpp_path_lookup(skb->data, sdata);
 
 		/*
-		 * Do not use address extension, if it is a packet from
-		 * the same interface and the destination is not being
-		 * proxied by any other mest point.
+		 * Use address extension if it is a packet from
+		 * another interface or if we know the destination
+		 * is being proxied by a portal (i.e. portal address
+		 * differs from proxied address)
 		 */
 		if (compare_ether_addr(sdata->vif.addr,
 				       skb->data + ETH_ALEN) == 0 &&
-		    (!mppath || !compare_ether_addr(mppath->mpp, skb->data))) {
+		    !(mppath && compare_ether_addr(mppath->mpp, skb->data))) {
 			hdrlen = ieee80211_fill_mesh_addresses(&hdr, &fc,
 					skb->data, skb->data + ETH_ALEN);
 			meshhdrlen = ieee80211_new_mesh_header(&mesh_hdr,
 					sdata, NULL, NULL);
 		} else {
-			/* packet from other interface */
 			int is_mesh_mcast = 1;
 			const u8 *mesh_da;
 
@@ -2067,6 +2018,11 @@ void ieee80211_clear_tx_pending(struct ieee80211_local *local)
 		skb_queue_purge(&local->pending[i]);
 }
 
+/*
+ * Returns false if the frame couldn't be transmitted but was queued instead,
+ * which in this case means re-queued -- take as an indication to stop sending
+ * more pending frames.
+ */
 static bool ieee80211_tx_pending_skb(struct ieee80211_local *local,
 				     struct sk_buff *skb)
 {
@@ -2074,20 +2030,17 @@ static bool ieee80211_tx_pending_skb(struct ieee80211_local *local,
 	struct ieee80211_sub_if_data *sdata;
 	struct sta_info *sta;
 	struct ieee80211_hdr *hdr;
-	int ret;
-	bool result = true;
+	bool result;
 
 	sdata = vif_to_sdata(info->control.vif);
 
 	if (info->flags & IEEE80211_TX_INTFL_NEED_TXPROCESSING) {
-		ieee80211_tx(sdata, skb, true);
+		result = ieee80211_tx(sdata, skb, true);
 	} else {
 		hdr = (struct ieee80211_hdr *)skb->data;
 		sta = sta_info_get(sdata, hdr->addr1);
 
-		ret = __ieee80211_tx(local, &skb, sta, true);
-		if (ret != IEEE80211_TX_OK)
-			result = false;
+		result = __ieee80211_tx(local, &skb, sta, true);
 	}
 
 	return result;
@@ -2129,8 +2082,6 @@ void ieee80211_tx_pending(unsigned long data)
 						flags);
 
 			txok = ieee80211_tx_pending_skb(local, skb);
-			if (!txok)
-				__skb_queue_head(&local->pending[i], skb);
 			spin_lock_irqsave(&local->queue_stop_reason_lock,
 					  flags);
 			if (!txok)
@@ -2178,6 +2129,8 @@ static void ieee80211_beacon_add_tim(struct ieee80211_if_ap *bss,
 	if (bss->dtim_count == 0 && !skb_queue_empty(&bss->ps_bc_buf))
 		aid0 = 1;
 
+	bss->dtim_bc_mc = aid0 == 1;
+
 	if (have_bits) {
 		/* Find largest even number N1 so that bits numbered 1 through
 		 * (N1 x 8) - 1 in the bitmap are 0 and number N2 so that bits
@@ -2241,7 +2194,7 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
 	if (sdata->vif.type == NL80211_IFTYPE_AP) {
 		ap = &sdata->u.ap;
 		beacon = rcu_dereference(ap->beacon);
-		if (ap && beacon) {
+		if (beacon) {
 			/*
 			 * headroom, head length,
 			 * tail length and maximum TIM length
@@ -2302,6 +2255,11 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
 		struct ieee80211_mgmt *mgmt;
 		u8 *pos;
 
+#ifdef CONFIG_MAC80211_MESH
+		if (!sdata->u.mesh.mesh_id_len)
+			goto out;
+#endif
+
 		/* headroom, head length, tail length and maximum TIM length */
 		skb = dev_alloc_skb(local->tx_headroom + 400 +
 				sdata->u.mesh.vendor_ie_len);
@@ -2543,7 +2501,7 @@ ieee80211_get_buffered_bc(struct ieee80211_hw *hw,
 	if (sdata->vif.type != NL80211_IFTYPE_AP || !beacon || !beacon->head)
 		goto out;
 
-	if (bss->dtim_count != 0)
+	if (bss->dtim_count != 0 || !bss->dtim_bc_mc)
 		goto out; /* send buffered bc/mc only after DTIM beacon */
 
 	while (1) {
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index d036597..556647a 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -986,12 +986,6 @@ int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer,
 		u16 cap = sband->ht_cap.cap;
 		__le16 tmp;
 
-		if (ieee80211_disable_40mhz_24ghz &&
-		    sband->band == IEEE80211_BAND_2GHZ) {
-			cap &= ~IEEE80211_HT_CAP_SUP_WIDTH_20_40;
-			cap &= ~IEEE80211_HT_CAP_SGI_40;
-		}
-
 		*pos++ = WLAN_EID_HT_CAPABILITY;
 		*pos++ = sizeof(struct ieee80211_ht_cap);
 		memset(pos, 0, sizeof(struct ieee80211_ht_cap));
diff --git a/net/mac80211/work.c b/net/mac80211/work.c
index 36305e0..204f0a4 100644
--- a/net/mac80211/work.c
+++ b/net/mac80211/work.c
@@ -126,12 +126,6 @@ static void ieee80211_add_ht_ie(struct sk_buff *skb, const u8 *ht_info_ie,
 
 	/* determine capability flags */
 
-	if (ieee80211_disable_40mhz_24ghz &&
-	    sband->band == IEEE80211_BAND_2GHZ) {
-		cap &= ~IEEE80211_HT_CAP_SUP_WIDTH_20_40;
-		cap &= ~IEEE80211_HT_CAP_SGI_40;
-	}
-
 	switch (ht_info->ht_param & IEEE80211_HT_PARAM_CHA_SEC_OFFSET) {
 	case IEEE80211_HT_PARAM_CHA_SEC_ABOVE:
 		if (flags & IEEE80211_CHAN_NO_HT40PLUS) {
@@ -874,6 +868,44 @@ static void ieee80211_work_rx_queued_mgmt(struct ieee80211_local *local,
 	kfree_skb(skb);
 }
 
+static bool ieee80211_work_ct_coexists(enum nl80211_channel_type wk_ct,
+				       enum nl80211_channel_type oper_ct)
+{
+	switch (wk_ct) {
+	case NL80211_CHAN_NO_HT:
+		return true;
+	case NL80211_CHAN_HT20:
+		if (oper_ct != NL80211_CHAN_NO_HT)
+			return true;
+		return false;
+	case NL80211_CHAN_HT40MINUS:
+	case NL80211_CHAN_HT40PLUS:
+		return (wk_ct == oper_ct);
+	}
+	WARN_ON(1); /* shouldn't get here */
+	return false;
+}
+
+static enum nl80211_channel_type
+ieee80211_calc_ct(enum nl80211_channel_type wk_ct,
+		  enum nl80211_channel_type oper_ct)
+{
+	switch (wk_ct) {
+	case NL80211_CHAN_NO_HT:
+		return oper_ct;
+	case NL80211_CHAN_HT20:
+		if (oper_ct != NL80211_CHAN_NO_HT)
+			return oper_ct;
+		return wk_ct;
+	case NL80211_CHAN_HT40MINUS:
+	case NL80211_CHAN_HT40PLUS:
+		return wk_ct;
+	}
+	WARN_ON(1); /* shouldn't get here */
+	return wk_ct;
+}
+
+
 static void ieee80211_work_timer(unsigned long data)
 {
 	struct ieee80211_local *local = (void *) data;
@@ -924,18 +956,52 @@ static void ieee80211_work_work(struct work_struct *work)
 		}
 
 		if (!started && !local->tmp_channel) {
+			bool on_oper_chan;
+			bool tmp_chan_changed = false;
+			bool on_oper_chan2;
+			enum nl80211_channel_type wk_ct;
+			on_oper_chan = ieee80211_cfg_on_oper_channel(local);
+
+			/* Work with existing channel type if possible. */
+			wk_ct = wk->chan_type;
+			if (wk->chan == local->hw.conf.channel)
+				wk_ct = ieee80211_calc_ct(wk->chan_type,
+						local->hw.conf.channel_type);
+
+			if (local->tmp_channel)
+				if ((local->tmp_channel != wk->chan) ||
+				    (local->tmp_channel_type != wk_ct))
+					tmp_chan_changed = true;
+
+			local->tmp_channel = wk->chan;
+			local->tmp_channel_type = wk_ct;
 			/*
-			 * TODO: could optimize this by leaving the
-			 *	 station vifs in awake mode if they
-			 *	 happen to be on the same channel as
-			 *	 the requested channel
+			 * Leave the station vifs in awake mode if they
+			 * happen to be on the same channel as
+			 * the requested channel.
 			 */
-			ieee80211_offchannel_stop_beaconing(local);
-			ieee80211_offchannel_stop_station(local);
+			on_oper_chan2 = ieee80211_cfg_on_oper_channel(local);
+			if (on_oper_chan != on_oper_chan2) {
+				if (on_oper_chan2) {
+					/* going off oper channel, PS too */
+					ieee80211_offchannel_stop_vifs(local,
+								       true);
+					ieee80211_hw_config(local, 0);
+				} else {
+					/* going on channel, but leave PS
+					 * off-channel. */
+					ieee80211_hw_config(local, 0);
+					ieee80211_offchannel_return(local,
+								    true,
+								    false);
+				}
+			} else if (tmp_chan_changed)
+				/* Still off-channel, but on some other
+				 * channel, so update hardware.
+				 * PS should already be off-channel.
+				 */
+				ieee80211_hw_config(local, 0);
 
-			local->tmp_channel = wk->chan;
-			local->tmp_channel_type = wk->chan_type;
-			ieee80211_hw_config(local, 0);
 			started = true;
 			wk->timeout = jiffies;
 		}
@@ -1005,15 +1071,34 @@ static void ieee80211_work_work(struct work_struct *work)
 			continue;
 		if (wk->chan != local->tmp_channel)
 			continue;
-		if (wk->chan_type != local->tmp_channel_type)
+		if (ieee80211_work_ct_coexists(wk->chan_type,
+					       local->tmp_channel_type))
 			continue;
 		remain_off_channel = true;
 	}
 
 	if (!remain_off_channel && local->tmp_channel) {
+		bool on_oper_chan = ieee80211_cfg_on_oper_channel(local);
 		local->tmp_channel = NULL;
-		ieee80211_hw_config(local, 0);
-		ieee80211_offchannel_return(local, true);
+		/* If tmp_channel wasn't operating channel, then
+		 * we need to go back on-channel.
+		 * NOTE:  If we can ever be here while scannning,
+		 * or if the hw_config() channel config logic changes,
+		 * then we may need to do a more thorough check to see if
+		 * we still need to do a hardware config.  Currently,
+		 * we cannot be here while scanning, however.
+		 */
+		if (ieee80211_cfg_on_oper_channel(local) && !on_oper_chan)
+			ieee80211_hw_config(local, 0);
+
+		/* At the least, we need to disable offchannel_ps,
+		 * so just go ahead and run the entire offchannel
+		 * return logic here.  We *could* skip enabling
+		 * beaconing if we were already on-oper-channel
+		 * as a future optimization.
+		 */
+		ieee80211_offchannel_return(local, true, true);
+
 		/* give connection some time to breathe */
 		run_again(local, jiffies + HZ/2);
 	}
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index bee230d..f1765de 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -26,13 +26,12 @@
 ieee80211_tx_result
 ieee80211_tx_h_michael_mic_add(struct ieee80211_tx_data *tx)
 {
-	u8 *data, *key, *mic, key_offset;
+	u8 *data, *key, *mic;
 	size_t data_len;
 	unsigned int hdrlen;
 	struct ieee80211_hdr *hdr;
 	struct sk_buff *skb = tx->skb;
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
-	int authenticator;
 	int tail;
 
 	hdr = (struct ieee80211_hdr *)skb->data;
@@ -47,6 +46,11 @@ ieee80211_tx_h_michael_mic_add(struct ieee80211_tx_data *tx)
 	data = skb->data + hdrlen;
 	data_len = skb->len - hdrlen;
 
+	if (unlikely(info->flags & IEEE80211_TX_INTFL_TKIP_MIC_FAILURE)) {
+		/* Need to use software crypto for the test */
+		info->control.hw_key = NULL;
+	}
+
 	if (info->control.hw_key &&
 	    !(tx->flags & IEEE80211_TX_FRAGMENTED) &&
 	    !(tx->key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC)) {
@@ -62,17 +66,11 @@ ieee80211_tx_h_michael_mic_add(struct ieee80211_tx_data *tx)
 		    skb_headroom(skb) < TKIP_IV_LEN))
 		return TX_DROP;
 
-#if 0
-	authenticator = fc & IEEE80211_FCTL_FROMDS; /* FIX */
-#else
-	authenticator = 1;
-#endif
-	key_offset = authenticator ?
-		NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY :
-		NL80211_TKIP_DATA_OFFSET_RX_MIC_KEY;
-	key = &tx->key->conf.key[key_offset];
+	key = &tx->key->conf.key[NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY];
 	mic = skb_put(skb, MICHAEL_MIC_LEN);
 	michael_mic(key, hdr, data, data_len, mic);
+	if (unlikely(info->flags & IEEE80211_TX_INTFL_TKIP_MIC_FAILURE))
+		mic[0]++;
 
 	return TX_CONTINUE;
 }
@@ -81,14 +79,13 @@ ieee80211_tx_h_michael_mic_add(struct ieee80211_tx_data *tx)
 ieee80211_rx_result
 ieee80211_rx_h_michael_mic_verify(struct ieee80211_rx_data *rx)
 {
-	u8 *data, *key = NULL, key_offset;
+	u8 *data, *key = NULL;
 	size_t data_len;
 	unsigned int hdrlen;
 	u8 mic[MICHAEL_MIC_LEN];
 	struct sk_buff *skb = rx->skb;
 	struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
-	int authenticator = 1, wpa_test = 0;
 
 	/* No way to verify the MIC if the hardware stripped it */
 	if (status->flag & RX_FLAG_MMIC_STRIPPED)
@@ -106,17 +103,9 @@ ieee80211_rx_h_michael_mic_verify(struct ieee80211_rx_data *rx)
 	data = skb->data + hdrlen;
 	data_len = skb->len - hdrlen - MICHAEL_MIC_LEN;
 
-#if 0
-	authenticator = fc & IEEE80211_FCTL_TODS; /* FIX */
-#else
-	authenticator = 1;
-#endif
-	key_offset = authenticator ?
-		NL80211_TKIP_DATA_OFFSET_RX_MIC_KEY :
-		NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY;
-	key = &rx->key->conf.key[key_offset];
+	key = &rx->key->conf.key[NL80211_TKIP_DATA_OFFSET_RX_MIC_KEY];
 	michael_mic(key, hdr, data, data_len, mic);
-	if (memcmp(mic, data + data_len, MICHAEL_MIC_LEN) != 0 || wpa_test) {
+	if (memcmp(mic, data + data_len, MICHAEL_MIC_LEN) != 0) {
 		if (!(status->rx_flags & IEEE80211_RX_RA_MATCH))
 			return RX_DROP_UNUSABLE;
 
@@ -208,7 +197,7 @@ ieee80211_rx_result
 ieee80211_crypto_tkip_decrypt(struct ieee80211_rx_data *rx)
 {
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) rx->skb->data;
-	int hdrlen, res, hwaccel = 0, wpa_test = 0;
+	int hdrlen, res, hwaccel = 0;
 	struct ieee80211_key *key = rx->key;
 	struct sk_buff *skb = rx->skb;
 	struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
@@ -235,7 +224,7 @@ ieee80211_crypto_tkip_decrypt(struct ieee80211_rx_data *rx)
 					  hdr->addr1, hwaccel, rx->queue,
 					  &rx->tkip_iv32,
 					  &rx->tkip_iv16);
-	if (res != TKIP_DECRYPT_OK || wpa_test)
+	if (res != TKIP_DECRYPT_OK)
 		return RX_DROP_UNUSABLE;
 
 	/* Trim ICV */
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 1534f2b..82a6e0d 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -85,6 +85,17 @@ config NF_CONNTRACK_EVENTS
 
 	  If unsure, say `N'.
 
+config NF_CONNTRACK_TIMESTAMP
+	bool  'Connection tracking timestamping'
+	depends on NETFILTER_ADVANCED
+	help
+	  This option enables support for connection tracking timestamping.
+	  This allows you to store the flow start-time and to obtain
+	  the flow-stop time (once it has been destroyed) via Connection
+	  tracking events.
+
+	  If unsure, say `N'.
+
 config NF_CT_PROTO_DCCP
 	tristate 'DCCP protocol connection tracking support (EXPERIMENTAL)'
 	depends on EXPERIMENTAL
@@ -185,9 +196,13 @@ config NF_CONNTRACK_IRC
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
+config NF_CONNTRACK_BROADCAST
+	tristate
+
 config NF_CONNTRACK_NETBIOS_NS
 	tristate "NetBIOS name service protocol support"
 	depends on NETFILTER_ADVANCED
+	select NF_CONNTRACK_BROADCAST
 	help
 	  NetBIOS name service requests are sent as broadcast messages from an
 	  unprivileged port and responded to with unicast messages to the
@@ -204,6 +219,21 @@ config NF_CONNTRACK_NETBIOS_NS
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
+config NF_CONNTRACK_SNMP
+	tristate "SNMP service protocol support"
+	depends on NETFILTER_ADVANCED
+	select NF_CONNTRACK_BROADCAST
+	help
+	  SNMP service requests are sent as broadcast messages from an
+	  unprivileged port and responded to with unicast messages to the
+	  same port. This make them hard to firewall properly because connection
+	  tracking doesn't deal with broadcasts. This helper tracks locally
+	  originating SNMP service requests and the corresponding
+	  responses. It relies on correct IP address configuration, specifically
+	  netmask and broadcast address.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
 config NF_CONNTRACK_PPTP
 	tristate "PPtP protocol support"
 	depends on NETFILTER_ADVANCED
@@ -322,10 +352,32 @@ config NETFILTER_XT_CONNMARK
 	ctmark), similarly to the packet mark (nfmark). Using this
 	target and match, you can set and match on this mark.
 
+config NETFILTER_XT_SET
+	tristate 'set target and match support'
+	depends on IP_SET
+	depends on NETFILTER_ADVANCED
+	help
+	  This option adds the "SET" target and "set" match.
+
+	  Using this target and match, you can add/delete and match
+	  elements in the sets created by ipset(8).
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
 # alphabetically ordered list of targets
 
 comment "Xtables targets"
 
+config NETFILTER_XT_TARGET_AUDIT
+	tristate "AUDIT target support"
+	depends on AUDIT
+	depends on NETFILTER_ADVANCED
+	---help---
+	  This option adds a 'AUDIT' target, which can be used to create
+	  audit records for packets dropped/accepted.
+
+	  To compileit as a module, choose M here. If unsure, say N.
+
 config NETFILTER_XT_TARGET_CHECKSUM
 	tristate "CHECKSUM target support"
 	depends on IP_NF_MANGLE || IP6_NF_MANGLE
@@ -477,6 +529,7 @@ config NETFILTER_XT_TARGET_NFLOG
 config NETFILTER_XT_TARGET_NFQUEUE
 	tristate '"NFQUEUE" target Support'
 	depends on NETFILTER_ADVANCED
+	select NETFILTER_NETLINK_QUEUE
 	help
 	  This target replaced the old obsolete QUEUE target.
 
@@ -685,6 +738,15 @@ config NETFILTER_XT_MATCH_DCCP
 	  If you want to compile it as a module, say M here and read
 	  <file:Documentation/kbuild/modules.txt>.  If unsure, say `N'.
 
+config NETFILTER_XT_MATCH_DEVGROUP
+	tristate '"devgroup" match support'
+	depends on NETFILTER_ADVANCED
+	help
+	  This options adds a `devgroup' match, which allows to match on the
+	  device group a network device is assigned to.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
 config NETFILTER_XT_MATCH_DSCP
 	tristate '"dscp" and "tos" match support'
 	depends on NETFILTER_ADVANCED
@@ -886,7 +948,7 @@ config NETFILTER_XT_MATCH_RATEEST
 config NETFILTER_XT_MATCH_REALM
 	tristate  '"realm" match support'
 	depends on NETFILTER_ADVANCED
-	select NET_CLS_ROUTE
+	select IP_ROUTE_CLASSID
 	help
 	  This option adds a `realm' match, which allows you to use the realm
 	  key from the routing subsystem inside iptables.
@@ -1011,4 +1073,6 @@ endif # NETFILTER_XTABLES
 
 endmenu
 
+source "net/netfilter/ipset/Kconfig"
+
 source "net/netfilter/ipvs/Kconfig"
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 441050f..d57a890 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -1,6 +1,7 @@
 netfilter-objs := core.o nf_log.o nf_queue.o nf_sockopt.o
 
 nf_conntrack-y	:= nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o nf_conntrack_proto.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o nf_conntrack_extend.o nf_conntrack_acct.o
+nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMESTAMP) += nf_conntrack_timestamp.o
 nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o
 
 obj-$(CONFIG_NETFILTER) = netfilter.o
@@ -28,7 +29,9 @@ obj-$(CONFIG_NF_CONNTRACK_AMANDA) += nf_conntrack_amanda.o
 obj-$(CONFIG_NF_CONNTRACK_FTP) += nf_conntrack_ftp.o
 obj-$(CONFIG_NF_CONNTRACK_H323) += nf_conntrack_h323.o
 obj-$(CONFIG_NF_CONNTRACK_IRC) += nf_conntrack_irc.o
+obj-$(CONFIG_NF_CONNTRACK_BROADCAST) += nf_conntrack_broadcast.o
 obj-$(CONFIG_NF_CONNTRACK_NETBIOS_NS) += nf_conntrack_netbios_ns.o
+obj-$(CONFIG_NF_CONNTRACK_SNMP) += nf_conntrack_snmp.o
 obj-$(CONFIG_NF_CONNTRACK_PPTP) += nf_conntrack_pptp.o
 obj-$(CONFIG_NF_CONNTRACK_SANE) += nf_conntrack_sane.o
 obj-$(CONFIG_NF_CONNTRACK_SIP) += nf_conntrack_sip.o
@@ -43,8 +46,10 @@ obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
 # combos
 obj-$(CONFIG_NETFILTER_XT_MARK) += xt_mark.o
 obj-$(CONFIG_NETFILTER_XT_CONNMARK) += xt_connmark.o
+obj-$(CONFIG_NETFILTER_XT_SET) += xt_set.o
 
 # targets
+obj-$(CONFIG_NETFILTER_XT_TARGET_AUDIT) += xt_AUDIT.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_CHECKSUM) += xt_CHECKSUM.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o
@@ -72,6 +77,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_CONNLIMIT) += xt_connlimit.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_CONNTRACK) += xt_conntrack.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_CPU) += xt_cpu.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_DCCP) += xt_dccp.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_DEVGROUP) += xt_devgroup.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_DSCP) += xt_dscp.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_ESP) += xt_esp.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_HASHLIMIT) += xt_hashlimit.o
@@ -101,5 +107,8 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_TCPMSS) += xt_tcpmss.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_TIME) += xt_time.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_U32) += xt_u32.o
 
+# ipset
+obj-$(CONFIG_IP_SET) += ipset/
+
 # IPVS
 obj-$(CONFIG_IP_VS) += ipvs/
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 4aa614b..899b71c 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -176,13 +176,21 @@ next_hook:
 		ret = 1;
 	} else if ((verdict & NF_VERDICT_MASK) == NF_DROP) {
 		kfree_skb(skb);
-		ret = -(verdict >> NF_VERDICT_BITS);
+		ret = NF_DROP_GETERR(verdict);
 		if (ret == 0)
 			ret = -EPERM;
 	} else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) {
-		if (!nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
-			      verdict >> NF_VERDICT_BITS))
-			goto next_hook;
+		ret = nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
+			       verdict >> NF_VERDICT_QBITS);
+		if (ret < 0) {
+			if (ret == -ECANCELED)
+				goto next_hook;
+			if (ret == -ESRCH &&
+			   (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS))
+				goto next_hook;
+			kfree_skb(skb);
+		}
+		ret = 0;
 	}
 	rcu_read_unlock();
 	return ret;
@@ -215,7 +223,7 @@ EXPORT_SYMBOL(skb_make_writable);
 /* This does not belong here, but locally generated errors need it if connection
    tracking in use: without this, connection may not be in hash table, and hence
    manufactured ICMP or RST packets will not be associated with it. */
-void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *);
+void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *) __rcu __read_mostly;
 EXPORT_SYMBOL(ip_ct_attach);
 
 void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb)
@@ -232,7 +240,7 @@ void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb)
 }
 EXPORT_SYMBOL(nf_ct_attach);
 
-void (*nf_ct_destroy)(struct nf_conntrack *);
+void (*nf_ct_destroy)(struct nf_conntrack *) __rcu __read_mostly;
 EXPORT_SYMBOL(nf_ct_destroy);
 
 void nf_conntrack_destroy(struct nf_conntrack *nfct)
diff --git a/net/netfilter/ipset/Kconfig b/net/netfilter/ipset/Kconfig
new file mode 100644
index 0000000..2c5b348
--- /dev/null
+++ b/net/netfilter/ipset/Kconfig
@@ -0,0 +1,122 @@
+menuconfig IP_SET
+	tristate "IP set support"
+	depends on INET && NETFILTER
+	depends on NETFILTER_NETLINK
+	help
+	  This option adds IP set support to the kernel.
+	  In order to define and use the sets, you need the userspace utility
+	  ipset(8). You can use the sets in netfilter via the "set" match
+	  and "SET" target.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+if IP_SET
+
+config IP_SET_MAX
+	int "Maximum number of IP sets"
+	default 256
+	range 2 65534
+	depends on IP_SET
+	help
+	  You can define here default value of the maximum number 
+	  of IP sets for the kernel.
+
+	  The value can be overriden by the 'max_sets' module
+	  parameter of the 'ip_set' module.
+
+config IP_SET_BITMAP_IP
+	tristate "bitmap:ip set support"
+	depends on IP_SET
+	help
+	  This option adds the bitmap:ip set type support, by which one
+	  can store IPv4 addresses (or network addresse) from a range.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config IP_SET_BITMAP_IPMAC
+	tristate "bitmap:ip,mac set support"
+	depends on IP_SET
+	help
+	  This option adds the bitmap:ip,mac set type support, by which one
+	  can store IPv4 address and (source) MAC address pairs from a range.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config IP_SET_BITMAP_PORT
+	tristate "bitmap:port set support"
+	depends on IP_SET
+	help
+	  This option adds the bitmap:port set type support, by which one
+	  can store TCP/UDP port numbers from a range.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config IP_SET_HASH_IP
+	tristate "hash:ip set support"
+	depends on IP_SET
+	help
+	  This option adds the hash:ip set type support, by which one
+	  can store arbitrary IPv4 or IPv6 addresses (or network addresses)
+	  in a set.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config IP_SET_HASH_IPPORT
+	tristate "hash:ip,port set support"
+	depends on IP_SET
+	help
+	  This option adds the hash:ip,port set type support, by which one
+	  can store IPv4/IPv6 address and protocol/port pairs.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config IP_SET_HASH_IPPORTIP
+	tristate "hash:ip,port,ip set support"
+	depends on IP_SET
+	help
+	  This option adds the hash:ip,port,ip set type support, by which
+	  one can store IPv4/IPv6 address, protocol/port, and IPv4/IPv6
+	  address triples in a set.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config IP_SET_HASH_IPPORTNET
+	tristate "hash:ip,port,net set support"
+	depends on IP_SET
+	help
+	  This option adds the hash:ip,port,net set type support, by which
+	  one can store IPv4/IPv6 address, protocol/port, and IPv4/IPv6
+	  network address/prefix triples in a set.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config IP_SET_HASH_NET
+	tristate "hash:net set support"
+	depends on IP_SET
+	help
+	  This option adds the hash:net set type support, by which
+	  one can store IPv4/IPv6 network address/prefix elements in a set.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config IP_SET_HASH_NETPORT
+	tristate "hash:net,port set support"
+	depends on IP_SET
+	help
+	  This option adds the hash:net,port set type support, by which
+	  one can store IPv4/IPv6 network address/prefix and
+	  protocol/port pairs as elements in a set.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+config IP_SET_LIST_SET
+	tristate "list:set set support"
+	depends on IP_SET
+	help
+	  This option adds the list:set set type support. In this
+	  kind of set one can store the name of other sets and it forms
+	  an ordered union of the member sets.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
+endif # IP_SET
diff --git a/net/netfilter/ipset/Makefile b/net/netfilter/ipset/Makefile
new file mode 100644
index 0000000..5adbdab
--- /dev/null
+++ b/net/netfilter/ipset/Makefile
@@ -0,0 +1,24 @@
+#
+# Makefile for the ipset modules
+#
+
+ip_set-y := ip_set_core.o ip_set_getport.o pfxlen.o
+
+# ipset core
+obj-$(CONFIG_IP_SET) += ip_set.o
+
+# bitmap types
+obj-$(CONFIG_IP_SET_BITMAP_IP) += ip_set_bitmap_ip.o
+obj-$(CONFIG_IP_SET_BITMAP_IPMAC) += ip_set_bitmap_ipmac.o
+obj-$(CONFIG_IP_SET_BITMAP_PORT) += ip_set_bitmap_port.o
+
+# hash types
+obj-$(CONFIG_IP_SET_HASH_IP) += ip_set_hash_ip.o
+obj-$(CONFIG_IP_SET_HASH_IPPORT) += ip_set_hash_ipport.o
+obj-$(CONFIG_IP_SET_HASH_IPPORTIP) += ip_set_hash_ipportip.o
+obj-$(CONFIG_IP_SET_HASH_IPPORTNET) += ip_set_hash_ipportnet.o
+obj-$(CONFIG_IP_SET_HASH_NET) += ip_set_hash_net.o
+obj-$(CONFIG_IP_SET_HASH_NETPORT) += ip_set_hash_netport.o
+
+# list types
+obj-$(CONFIG_IP_SET_LIST_SET) += ip_set_list_set.o
diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c
new file mode 100644
index 0000000..bca9699
--- /dev/null
+++ b/net/netfilter/ipset/ip_set_bitmap_ip.c
@@ -0,0 +1,587 @@
+/* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu>
+ *                         Patrick Schaaf <bof@bof.de>
+ * Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/* Kernel module implementing an IP set type: the bitmap:ip type */
+
+#include <linux/module.h>
+#include <linux/ip.h>
+#include <linux/skbuff.h>
+#include <linux/errno.h>
+#include <linux/bitops.h>
+#include <linux/spinlock.h>
+#include <linux/netlink.h>
+#include <linux/jiffies.h>
+#include <linux/timer.h>
+#include <net/netlink.h>
+#include <net/tcp.h>
+
+#include <linux/netfilter/ipset/pfxlen.h>
+#include <linux/netfilter/ipset/ip_set.h>
+#include <linux/netfilter/ipset/ip_set_bitmap.h>
+#define IP_SET_BITMAP_TIMEOUT
+#include <linux/netfilter/ipset/ip_set_timeout.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
+MODULE_DESCRIPTION("bitmap:ip type of IP sets");
+MODULE_ALIAS("ip_set_bitmap:ip");
+
+/* Type structure */
+struct bitmap_ip {
+	void *members;		/* the set members */
+	u32 first_ip;		/* host byte order, included in range */
+	u32 last_ip;		/* host byte order, included in range */
+	u32 elements;		/* number of max elements in the set */
+	u32 hosts;		/* number of hosts in a subnet */
+	size_t memsize;		/* members size */
+	u8 netmask;		/* subnet netmask */
+	u32 timeout;		/* timeout parameter */
+	struct timer_list gc;	/* garbage collection */
+};
+
+/* Base variant */
+
+static inline u32
+ip_to_id(const struct bitmap_ip *m, u32 ip)
+{
+	return ((ip & ip_set_hostmask(m->netmask)) - m->first_ip)/m->hosts;
+}
+
+static int
+bitmap_ip_test(struct ip_set *set, void *value, u32 timeout)
+{
+	const struct bitmap_ip *map = set->data;
+	u16 id = *(u16 *)value;
+
+	return !!test_bit(id, map->members);
+}
+
+static int
+bitmap_ip_add(struct ip_set *set, void *value, u32 timeout)
+{
+	struct bitmap_ip *map = set->data;
+	u16 id = *(u16 *)value;
+
+	if (test_and_set_bit(id, map->members))
+		return -IPSET_ERR_EXIST;
+
+	return 0;
+}
+
+static int
+bitmap_ip_del(struct ip_set *set, void *value, u32 timeout)
+{
+	struct bitmap_ip *map = set->data;
+	u16 id = *(u16 *)value;
+
+	if (!test_and_clear_bit(id, map->members))
+		return -IPSET_ERR_EXIST;
+
+	return 0;
+}
+
+static int
+bitmap_ip_list(const struct ip_set *set,
+	       struct sk_buff *skb, struct netlink_callback *cb)
+{
+	const struct bitmap_ip *map = set->data;
+	struct nlattr *atd, *nested;
+	u32 id, first = cb->args[2];
+
+	atd = ipset_nest_start(skb, IPSET_ATTR_ADT);
+	if (!atd)
+		return -EMSGSIZE;
+	for (; cb->args[2] < map->elements; cb->args[2]++) {
+		id = cb->args[2];
+		if (!test_bit(id, map->members))
+			continue;
+		nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
+		if (!nested) {
+			if (id == first) {
+				nla_nest_cancel(skb, atd);
+				return -EMSGSIZE;
+			} else
+				goto nla_put_failure;
+		}
+		NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP,
+				htonl(map->first_ip + id * map->hosts));
+		ipset_nest_end(skb, nested);
+	}
+	ipset_nest_end(skb, atd);
+	/* Set listing finished */
+	cb->args[2] = 0;
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(skb, nested);
+	ipset_nest_end(skb, atd);
+	if (unlikely(id == first)) {
+		cb->args[2] = 0;
+		return -EMSGSIZE;
+	}
+	return 0;
+}
+
+/* Timeout variant */
+
+static int
+bitmap_ip_ttest(struct ip_set *set, void *value, u32 timeout)
+{
+	const struct bitmap_ip *map = set->data;
+	const unsigned long *members = map->members;
+	u16 id = *(u16 *)value;
+
+	return ip_set_timeout_test(members[id]);
+}
+
+static int
+bitmap_ip_tadd(struct ip_set *set, void *value, u32 timeout)
+{
+	struct bitmap_ip *map = set->data;
+	unsigned long *members = map->members;
+	u16 id = *(u16 *)value;
+
+	if (ip_set_timeout_test(members[id]))
+		return -IPSET_ERR_EXIST;
+
+	members[id] = ip_set_timeout_set(timeout);
+
+	return 0;
+}
+
+static int
+bitmap_ip_tdel(struct ip_set *set, void *value, u32 timeout)
+{
+	struct bitmap_ip *map = set->data;
+	unsigned long *members = map->members;
+	u16 id = *(u16 *)value;
+	int ret = -IPSET_ERR_EXIST;
+
+	if (ip_set_timeout_test(members[id]))
+		ret = 0;
+
+	members[id] = IPSET_ELEM_UNSET;
+	return ret;
+}
+
+static int
+bitmap_ip_tlist(const struct ip_set *set,
+		struct sk_buff *skb, struct netlink_callback *cb)
+{
+	const struct bitmap_ip *map = set->data;
+	struct nlattr *adt, *nested;
+	u32 id, first = cb->args[2];
+	const unsigned long *members = map->members;
+
+	adt = ipset_nest_start(skb, IPSET_ATTR_ADT);
+	if (!adt)
+		return -EMSGSIZE;
+	for (; cb->args[2] < map->elements; cb->args[2]++) {
+		id = cb->args[2];
+		if (!ip_set_timeout_test(members[id]))
+			continue;
+		nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
+		if (!nested) {
+			if (id == first) {
+				nla_nest_cancel(skb, adt);
+				return -EMSGSIZE;
+			} else
+				goto nla_put_failure;
+		}
+		NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP,
+				htonl(map->first_ip + id * map->hosts));
+		NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT,
+			      htonl(ip_set_timeout_get(members[id])));
+		ipset_nest_end(skb, nested);
+	}
+	ipset_nest_end(skb, adt);
+
+	/* Set listing finished */
+	cb->args[2] = 0;
+
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(skb, nested);
+	ipset_nest_end(skb, adt);
+	if (unlikely(id == first)) {
+		cb->args[2] = 0;
+		return -EMSGSIZE;
+	}
+	return 0;
+}
+
+static int
+bitmap_ip_kadt(struct ip_set *set, const struct sk_buff *skb,
+	       enum ipset_adt adt, u8 pf, u8 dim, u8 flags)
+{
+	struct bitmap_ip *map = set->data;
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	u32 ip;
+
+	ip = ntohl(ip4addr(skb, flags & IPSET_DIM_ONE_SRC));
+	if (ip < map->first_ip || ip > map->last_ip)
+		return -IPSET_ERR_BITMAP_RANGE;
+
+	ip = ip_to_id(map, ip);
+
+	return adtfn(set, &ip, map->timeout);
+}
+
+static int
+bitmap_ip_uadt(struct ip_set *set, struct nlattr *tb[],
+	       enum ipset_adt adt, u32 *lineno, u32 flags)
+{
+	struct bitmap_ip *map = set->data;
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	u32 timeout = map->timeout;
+	u32 ip, ip_to, id;
+	int ret = 0;
+
+	if (unlikely(!tb[IPSET_ATTR_IP] ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
+		return -IPSET_ERR_PROTOCOL;
+
+	if (tb[IPSET_ATTR_LINENO])
+		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
+	ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip);
+	if (ret)
+		return ret;
+
+	if (ip < map->first_ip || ip > map->last_ip)
+		return -IPSET_ERR_BITMAP_RANGE;
+
+	if (tb[IPSET_ATTR_TIMEOUT]) {
+		if (!with_timeout(map->timeout))
+			return -IPSET_ERR_TIMEOUT;
+		timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
+	}
+
+	if (adt == IPSET_TEST) {
+		id = ip_to_id(map, ip);
+		return adtfn(set, &id, timeout);
+	}
+
+	if (tb[IPSET_ATTR_IP_TO]) {
+		ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to);
+		if (ret)
+			return ret;
+		if (ip > ip_to) {
+			swap(ip, ip_to);
+			if (ip < map->first_ip)
+				return -IPSET_ERR_BITMAP_RANGE;
+		}
+	} else if (tb[IPSET_ATTR_CIDR]) {
+		u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
+
+		if (cidr > 32)
+			return -IPSET_ERR_INVALID_CIDR;
+		ip &= ip_set_hostmask(cidr);
+		ip_to = ip | ~ip_set_hostmask(cidr);
+	} else
+		ip_to = ip;
+
+	if (ip_to > map->last_ip)
+		return -IPSET_ERR_BITMAP_RANGE;
+
+	for (; !before(ip_to, ip); ip += map->hosts) {
+		id = ip_to_id(map, ip);
+		ret = adtfn(set, &id, timeout);;
+
+		if (ret && !ip_set_eexist(ret, flags))
+			return ret;
+		else
+			ret = 0;
+	}
+	return ret;
+}
+
+static void
+bitmap_ip_destroy(struct ip_set *set)
+{
+	struct bitmap_ip *map = set->data;
+
+	if (with_timeout(map->timeout))
+		del_timer_sync(&map->gc);
+
+	ip_set_free(map->members);
+	kfree(map);
+
+	set->data = NULL;
+}
+
+static void
+bitmap_ip_flush(struct ip_set *set)
+{
+	struct bitmap_ip *map = set->data;
+
+	memset(map->members, 0, map->memsize);
+}
+
+static int
+bitmap_ip_head(struct ip_set *set, struct sk_buff *skb)
+{
+	const struct bitmap_ip *map = set->data;
+	struct nlattr *nested;
+
+	nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
+	if (!nested)
+		goto nla_put_failure;
+	NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, htonl(map->first_ip));
+	NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP_TO, htonl(map->last_ip));
+	if (map->netmask != 32)
+		NLA_PUT_U8(skb, IPSET_ATTR_NETMASK, map->netmask);
+	NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES,
+		      htonl(atomic_read(&set->ref) - 1));
+	NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE,
+		      htonl(sizeof(*map) + map->memsize));
+	if (with_timeout(map->timeout))
+		NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, htonl(map->timeout));
+	ipset_nest_end(skb, nested);
+
+	return 0;
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
+static bool
+bitmap_ip_same_set(const struct ip_set *a, const struct ip_set *b)
+{
+	const struct bitmap_ip *x = a->data;
+	const struct bitmap_ip *y = b->data;
+
+	return x->first_ip == y->first_ip &&
+	       x->last_ip == y->last_ip &&
+	       x->netmask == y->netmask &&
+	       x->timeout == y->timeout;
+}
+
+static const struct ip_set_type_variant bitmap_ip = {
+	.kadt	= bitmap_ip_kadt,
+	.uadt	= bitmap_ip_uadt,
+	.adt	= {
+		[IPSET_ADD] = bitmap_ip_add,
+		[IPSET_DEL] = bitmap_ip_del,
+		[IPSET_TEST] = bitmap_ip_test,
+	},
+	.destroy = bitmap_ip_destroy,
+	.flush	= bitmap_ip_flush,
+	.head	= bitmap_ip_head,
+	.list	= bitmap_ip_list,
+	.same_set = bitmap_ip_same_set,
+};
+
+static const struct ip_set_type_variant bitmap_tip = {
+	.kadt	= bitmap_ip_kadt,
+	.uadt	= bitmap_ip_uadt,
+	.adt	= {
+		[IPSET_ADD] = bitmap_ip_tadd,
+		[IPSET_DEL] = bitmap_ip_tdel,
+		[IPSET_TEST] = bitmap_ip_ttest,
+	},
+	.destroy = bitmap_ip_destroy,
+	.flush	= bitmap_ip_flush,
+	.head	= bitmap_ip_head,
+	.list	= bitmap_ip_tlist,
+	.same_set = bitmap_ip_same_set,
+};
+
+static void
+bitmap_ip_gc(unsigned long ul_set)
+{
+	struct ip_set *set = (struct ip_set *) ul_set;
+	struct bitmap_ip *map = set->data;
+	unsigned long *table = map->members;
+	u32 id;
+
+	/* We run parallel with other readers (test element)
+	 * but adding/deleting new entries is locked out */
+	read_lock_bh(&set->lock);
+	for (id = 0; id < map->elements; id++)
+		if (ip_set_timeout_expired(table[id]))
+			table[id] = IPSET_ELEM_UNSET;
+	read_unlock_bh(&set->lock);
+
+	map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ;
+	add_timer(&map->gc);
+}
+
+static void
+bitmap_ip_gc_init(struct ip_set *set)
+{
+	struct bitmap_ip *map = set->data;
+
+	init_timer(&map->gc);
+	map->gc.data = (unsigned long) set;
+	map->gc.function = bitmap_ip_gc;
+	map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ;
+	add_timer(&map->gc);
+}
+
+/* Create bitmap:ip type of sets */
+
+static bool
+init_map_ip(struct ip_set *set, struct bitmap_ip *map,
+	    u32 first_ip, u32 last_ip,
+	    u32 elements, u32 hosts, u8 netmask)
+{
+	map->members = ip_set_alloc(map->memsize);
+	if (!map->members)
+		return false;
+	map->first_ip = first_ip;
+	map->last_ip = last_ip;
+	map->elements = elements;
+	map->hosts = hosts;
+	map->netmask = netmask;
+	map->timeout = IPSET_NO_TIMEOUT;
+
+	set->data = map;
+	set->family = AF_INET;
+
+	return true;
+}
+
+static int
+bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
+{
+	struct bitmap_ip *map;
+	u32 first_ip, last_ip, hosts, elements;
+	u8 netmask = 32;
+	int ret;
+
+	if (unlikely(!tb[IPSET_ATTR_IP] ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
+		return -IPSET_ERR_PROTOCOL;
+
+	ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &first_ip);
+	if (ret)
+		return ret;
+
+	if (tb[IPSET_ATTR_IP_TO]) {
+		ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &last_ip);
+		if (ret)
+			return ret;
+		if (first_ip > last_ip) {
+			u32 tmp = first_ip;
+
+			first_ip = last_ip;
+			last_ip = tmp;
+		}
+	} else if (tb[IPSET_ATTR_CIDR]) {
+		u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
+
+		if (cidr >= 32)
+			return -IPSET_ERR_INVALID_CIDR;
+		last_ip = first_ip | ~ip_set_hostmask(cidr);
+	} else
+		return -IPSET_ERR_PROTOCOL;
+
+	if (tb[IPSET_ATTR_NETMASK]) {
+		netmask = nla_get_u8(tb[IPSET_ATTR_NETMASK]);
+
+		if (netmask > 32)
+			return -IPSET_ERR_INVALID_NETMASK;
+
+		first_ip &= ip_set_hostmask(netmask);
+		last_ip |= ~ip_set_hostmask(netmask);
+	}
+
+	if (netmask == 32) {
+		hosts = 1;
+		elements = last_ip - first_ip + 1;
+	} else {
+		u8 mask_bits;
+		u32 mask;
+
+		mask = range_to_mask(first_ip, last_ip, &mask_bits);
+
+		if ((!mask && (first_ip || last_ip != 0xFFFFFFFF)) ||
+		    netmask <= mask_bits)
+			return -IPSET_ERR_BITMAP_RANGE;
+
+		pr_debug("mask_bits %u, netmask %u\n", mask_bits, netmask);
+		hosts = 2 << (32 - netmask - 1);
+		elements = 2 << (netmask - mask_bits - 1);
+	}
+	if (elements > IPSET_BITMAP_MAX_RANGE + 1)
+		return -IPSET_ERR_BITMAP_RANGE_SIZE;
+
+	pr_debug("hosts %u, elements %u\n", hosts, elements);
+
+	map = kzalloc(sizeof(*map), GFP_KERNEL);
+	if (!map)
+		return -ENOMEM;
+
+	if (tb[IPSET_ATTR_TIMEOUT]) {
+		map->memsize = elements * sizeof(unsigned long);
+
+		if (!init_map_ip(set, map, first_ip, last_ip,
+				 elements, hosts, netmask)) {
+			kfree(map);
+			return -ENOMEM;
+		}
+
+		map->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
+		set->variant = &bitmap_tip;
+
+		bitmap_ip_gc_init(set);
+	} else {
+		map->memsize = bitmap_bytes(0, elements - 1);
+
+		if (!init_map_ip(set, map, first_ip, last_ip,
+				 elements, hosts, netmask)) {
+			kfree(map);
+			return -ENOMEM;
+		}
+
+		set->variant = &bitmap_ip;
+	}
+	return 0;
+}
+
+static struct ip_set_type bitmap_ip_type __read_mostly = {
+	.name		= "bitmap:ip",
+	.protocol	= IPSET_PROTOCOL,
+	.features	= IPSET_TYPE_IP,
+	.dimension	= IPSET_DIM_ONE,
+	.family		= AF_INET,
+	.revision	= 0,
+	.create		= bitmap_ip_create,
+	.create_policy	= {
+		[IPSET_ATTR_IP]		= { .type = NLA_NESTED },
+		[IPSET_ATTR_IP_TO]	= { .type = NLA_NESTED },
+		[IPSET_ATTR_CIDR]	= { .type = NLA_U8 },
+		[IPSET_ATTR_NETMASK]	= { .type = NLA_U8  },
+		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
+	},
+	.adt_policy	= {
+		[IPSET_ATTR_IP]		= { .type = NLA_NESTED },
+		[IPSET_ATTR_IP_TO]	= { .type = NLA_NESTED },
+		[IPSET_ATTR_CIDR]	= { .type = NLA_U8 },
+		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
+		[IPSET_ATTR_LINENO]	= { .type = NLA_U32 },
+	},
+	.me		= THIS_MODULE,
+};
+
+static int __init
+bitmap_ip_init(void)
+{
+	return ip_set_type_register(&bitmap_ip_type);
+}
+
+static void __exit
+bitmap_ip_fini(void)
+{
+	ip_set_type_unregister(&bitmap_ip_type);
+}
+
+module_init(bitmap_ip_init);
+module_exit(bitmap_ip_fini);
diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
new file mode 100644
index 0000000..5e79017
--- /dev/null
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -0,0 +1,652 @@
+/* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu>
+ *                         Patrick Schaaf <bof@bof.de>
+ *			   Martin Josefsson <gandalf@wlug.westbo.se>
+ * Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/* Kernel module implementing an IP set type: the bitmap:ip,mac type */
+
+#include <linux/module.h>
+#include <linux/ip.h>
+#include <linux/etherdevice.h>
+#include <linux/skbuff.h>
+#include <linux/errno.h>
+#include <linux/if_ether.h>
+#include <linux/netlink.h>
+#include <linux/jiffies.h>
+#include <linux/timer.h>
+#include <net/netlink.h>
+
+#include <linux/netfilter/ipset/pfxlen.h>
+#include <linux/netfilter/ipset/ip_set.h>
+#include <linux/netfilter/ipset/ip_set_timeout.h>
+#include <linux/netfilter/ipset/ip_set_bitmap.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
+MODULE_DESCRIPTION("bitmap:ip,mac type of IP sets");
+MODULE_ALIAS("ip_set_bitmap:ip,mac");
+
+enum {
+	MAC_EMPTY,		/* element is not set */
+	MAC_FILLED,		/* element is set with MAC */
+	MAC_UNSET,		/* element is set, without MAC */
+};
+
+/* Type structure */
+struct bitmap_ipmac {
+	void *members;		/* the set members */
+	u32 first_ip;		/* host byte order, included in range */
+	u32 last_ip;		/* host byte order, included in range */
+	u32 timeout;		/* timeout value */
+	struct timer_list gc;	/* garbage collector */
+	size_t dsize;		/* size of element */
+};
+
+/* ADT structure for generic function args */
+struct ipmac {
+	u32 id;			/* id in array */
+	unsigned char *ether;	/* ethernet address */
+};
+
+/* Member element without and with timeout */
+
+struct ipmac_elem {
+	unsigned char ether[ETH_ALEN];
+	unsigned char match;
+} __attribute__ ((aligned));
+
+struct ipmac_telem {
+	unsigned char ether[ETH_ALEN];
+	unsigned char match;
+	unsigned long timeout;
+} __attribute__ ((aligned));
+
+static inline void *
+bitmap_ipmac_elem(const struct bitmap_ipmac *map, u32 id)
+{
+	return (void *)((char *)map->members + id * map->dsize);
+}
+
+static inline bool
+bitmap_timeout(const struct bitmap_ipmac *map, u32 id)
+{
+	const struct ipmac_telem *elem = bitmap_ipmac_elem(map, id);
+
+	return ip_set_timeout_test(elem->timeout);
+}
+
+static inline bool
+bitmap_expired(const struct bitmap_ipmac *map, u32 id)
+{
+	const struct ipmac_telem *elem = bitmap_ipmac_elem(map, id);
+
+	return ip_set_timeout_expired(elem->timeout);
+}
+
+static inline int
+bitmap_ipmac_exist(const struct ipmac_telem *elem)
+{
+	return elem->match == MAC_UNSET ||
+	       (elem->match == MAC_FILLED &&
+		!ip_set_timeout_expired(elem->timeout));
+}
+
+/* Base variant */
+
+static int
+bitmap_ipmac_test(struct ip_set *set, void *value, u32 timeout)
+{
+	const struct bitmap_ipmac *map = set->data;
+	const struct ipmac *data = value;
+	const struct ipmac_elem *elem = bitmap_ipmac_elem(map, data->id);
+
+	switch (elem->match) {
+	case MAC_UNSET:
+		/* Trigger kernel to fill out the ethernet address */
+		return -EAGAIN;
+	case MAC_FILLED:
+		return data->ether == NULL ||
+		       compare_ether_addr(data->ether, elem->ether) == 0;
+	}
+	return 0;
+}
+
+static int
+bitmap_ipmac_add(struct ip_set *set, void *value, u32 timeout)
+{
+	struct bitmap_ipmac *map = set->data;
+	const struct ipmac *data = value;
+	struct ipmac_elem *elem = bitmap_ipmac_elem(map, data->id);
+
+	switch (elem->match) {
+	case MAC_UNSET:
+		if (!data->ether)
+			/* Already added without ethernet address */
+			return -IPSET_ERR_EXIST;
+		/* Fill the MAC address */
+		memcpy(elem->ether, data->ether, ETH_ALEN);
+		elem->match = MAC_FILLED;
+		break;
+	case MAC_FILLED:
+		return -IPSET_ERR_EXIST;
+	case MAC_EMPTY:
+		if (data->ether) {
+			memcpy(elem->ether, data->ether, ETH_ALEN);
+			elem->match = MAC_FILLED;
+		} else
+			elem->match = MAC_UNSET;
+	}
+
+	return 0;
+}
+
+static int
+bitmap_ipmac_del(struct ip_set *set, void *value, u32 timeout)
+{
+	struct bitmap_ipmac *map = set->data;
+	const struct ipmac *data = value;
+	struct ipmac_elem *elem = bitmap_ipmac_elem(map, data->id);
+
+	if (elem->match == MAC_EMPTY)
+		return -IPSET_ERR_EXIST;
+
+	elem->match = MAC_EMPTY;
+
+	return 0;
+}
+
+static int
+bitmap_ipmac_list(const struct ip_set *set,
+		  struct sk_buff *skb, struct netlink_callback *cb)
+{
+	const struct bitmap_ipmac *map = set->data;
+	const struct ipmac_elem *elem;
+	struct nlattr *atd, *nested;
+	u32 id, first = cb->args[2];
+	u32 last = map->last_ip - map->first_ip;
+
+	atd = ipset_nest_start(skb, IPSET_ATTR_ADT);
+	if (!atd)
+		return -EMSGSIZE;
+	for (; cb->args[2] <= last; cb->args[2]++) {
+		id = cb->args[2];
+		elem = bitmap_ipmac_elem(map, id);
+		if (elem->match == MAC_EMPTY)
+			continue;
+		nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
+		if (!nested) {
+			if (id == first) {
+				nla_nest_cancel(skb, atd);
+				return -EMSGSIZE;
+			} else
+				goto nla_put_failure;
+		}
+		NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP,
+				htonl(map->first_ip + id));
+		if (elem->match == MAC_FILLED)
+			NLA_PUT(skb, IPSET_ATTR_ETHER, ETH_ALEN,
+				elem->ether);
+		ipset_nest_end(skb, nested);
+	}
+	ipset_nest_end(skb, atd);
+	/* Set listing finished */
+	cb->args[2] = 0;
+
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(skb, nested);
+	ipset_nest_end(skb, atd);
+	if (unlikely(id == first)) {
+		cb->args[2] = 0;
+		return -EMSGSIZE;
+	}
+	return 0;
+}
+
+/* Timeout variant */
+
+static int
+bitmap_ipmac_ttest(struct ip_set *set, void *value, u32 timeout)
+{
+	const struct bitmap_ipmac *map = set->data;
+	const struct ipmac *data = value;
+	const struct ipmac_elem *elem = bitmap_ipmac_elem(map, data->id);
+
+	switch (elem->match) {
+	case MAC_UNSET:
+		/* Trigger kernel to fill out the ethernet address */
+		return -EAGAIN;
+	case MAC_FILLED:
+		return (data->ether == NULL ||
+			compare_ether_addr(data->ether, elem->ether) == 0) &&
+		       !bitmap_expired(map, data->id);
+	}
+	return 0;
+}
+
+static int
+bitmap_ipmac_tadd(struct ip_set *set, void *value, u32 timeout)
+{
+	struct bitmap_ipmac *map = set->data;
+	const struct ipmac *data = value;
+	struct ipmac_telem *elem = bitmap_ipmac_elem(map, data->id);
+
+	switch (elem->match) {
+	case MAC_UNSET:
+		if (!data->ether)
+			/* Already added without ethernet address */
+			return -IPSET_ERR_EXIST;
+		/* Fill the MAC address and activate the timer */
+		memcpy(elem->ether, data->ether, ETH_ALEN);
+		elem->match = MAC_FILLED;
+		if (timeout == map->timeout)
+			/* Timeout was not specified, get stored one */
+			timeout = elem->timeout;
+		elem->timeout = ip_set_timeout_set(timeout);
+		break;
+	case MAC_FILLED:
+		if (!bitmap_expired(map, data->id))
+			return -IPSET_ERR_EXIST;
+		/* Fall through */
+	case MAC_EMPTY:
+		if (data->ether) {
+			memcpy(elem->ether, data->ether, ETH_ALEN);
+			elem->match = MAC_FILLED;
+		} else
+			elem->match = MAC_UNSET;
+		/* If MAC is unset yet, we store plain timeout value
+		 * because the timer is not activated yet
+		 * and we can reuse it later when MAC is filled out,
+		 * possibly by the kernel */
+		elem->timeout = data->ether ? ip_set_timeout_set(timeout)
+					    : timeout;
+		break;
+	}
+
+	return 0;
+}
+
+static int
+bitmap_ipmac_tdel(struct ip_set *set, void *value, u32 timeout)
+{
+	struct bitmap_ipmac *map = set->data;
+	const struct ipmac *data = value;
+	struct ipmac_telem *elem = bitmap_ipmac_elem(map, data->id);
+
+	if (elem->match == MAC_EMPTY || bitmap_expired(map, data->id))
+		return -IPSET_ERR_EXIST;
+
+	elem->match = MAC_EMPTY;
+
+	return 0;
+}
+
+static int
+bitmap_ipmac_tlist(const struct ip_set *set,
+		   struct sk_buff *skb, struct netlink_callback *cb)
+{
+	const struct bitmap_ipmac *map = set->data;
+	const struct ipmac_telem *elem;
+	struct nlattr *atd, *nested;
+	u32 id, first = cb->args[2];
+	u32 timeout, last = map->last_ip - map->first_ip;
+
+	atd = ipset_nest_start(skb, IPSET_ATTR_ADT);
+	if (!atd)
+		return -EMSGSIZE;
+	for (; cb->args[2] <= last; cb->args[2]++) {
+		id = cb->args[2];
+		elem = bitmap_ipmac_elem(map, id);
+		if (!bitmap_ipmac_exist(elem))
+			continue;
+		nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
+		if (!nested) {
+			if (id == first) {
+				nla_nest_cancel(skb, atd);
+				return -EMSGSIZE;
+			} else
+				goto nla_put_failure;
+		}
+		NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP,
+				htonl(map->first_ip + id));
+		if (elem->match == MAC_FILLED)
+			NLA_PUT(skb, IPSET_ATTR_ETHER, ETH_ALEN,
+				elem->ether);
+		timeout = elem->match == MAC_UNSET ? elem->timeout
+				: ip_set_timeout_get(elem->timeout);
+		NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, htonl(timeout));
+		ipset_nest_end(skb, nested);
+	}
+	ipset_nest_end(skb, atd);
+	/* Set listing finished */
+	cb->args[2] = 0;
+
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(skb, nested);
+	ipset_nest_end(skb, atd);
+	return -EMSGSIZE;
+}
+
+static int
+bitmap_ipmac_kadt(struct ip_set *set, const struct sk_buff *skb,
+		  enum ipset_adt adt, u8 pf, u8 dim, u8 flags)
+{
+	struct bitmap_ipmac *map = set->data;
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct ipmac data;
+
+	data.id = ntohl(ip4addr(skb, flags & IPSET_DIM_ONE_SRC));
+	if (data.id < map->first_ip || data.id > map->last_ip)
+		return -IPSET_ERR_BITMAP_RANGE;
+
+	/* Backward compatibility: we don't check the second flag */
+	if (skb_mac_header(skb) < skb->head ||
+	    (skb_mac_header(skb) + ETH_HLEN) > skb->data)
+		return -EINVAL;
+
+	data.id -= map->first_ip;
+	data.ether = eth_hdr(skb)->h_source;
+
+	return adtfn(set, &data, map->timeout);
+}
+
+static int
+bitmap_ipmac_uadt(struct ip_set *set, struct nlattr *tb[],
+		  enum ipset_adt adt, u32 *lineno, u32 flags)
+{
+	const struct bitmap_ipmac *map = set->data;
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct ipmac data;
+	u32 timeout = map->timeout;
+	int ret = 0;
+
+	if (unlikely(!tb[IPSET_ATTR_IP] ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
+		return -IPSET_ERR_PROTOCOL;
+
+	if (tb[IPSET_ATTR_LINENO])
+		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
+	ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &data.id);
+	if (ret)
+		return ret;
+
+	if (data.id < map->first_ip || data.id > map->last_ip)
+		return -IPSET_ERR_BITMAP_RANGE;
+
+	if (tb[IPSET_ATTR_ETHER])
+		data.ether = nla_data(tb[IPSET_ATTR_ETHER]);
+	else
+		data.ether = NULL;
+
+	if (tb[IPSET_ATTR_TIMEOUT]) {
+		if (!with_timeout(map->timeout))
+			return -IPSET_ERR_TIMEOUT;
+		timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
+	}
+
+	data.id -= map->first_ip;
+
+	ret = adtfn(set, &data, timeout);
+
+	return ip_set_eexist(ret, flags) ? 0 : ret;
+}
+
+static void
+bitmap_ipmac_destroy(struct ip_set *set)
+{
+	struct bitmap_ipmac *map = set->data;
+
+	if (with_timeout(map->timeout))
+		del_timer_sync(&map->gc);
+
+	ip_set_free(map->members);
+	kfree(map);
+
+	set->data = NULL;
+}
+
+static void
+bitmap_ipmac_flush(struct ip_set *set)
+{
+	struct bitmap_ipmac *map = set->data;
+
+	memset(map->members, 0,
+	       (map->last_ip - map->first_ip + 1) * map->dsize);
+}
+
+static int
+bitmap_ipmac_head(struct ip_set *set, struct sk_buff *skb)
+{
+	const struct bitmap_ipmac *map = set->data;
+	struct nlattr *nested;
+
+	nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
+	if (!nested)
+		goto nla_put_failure;
+	NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, htonl(map->first_ip));
+	NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP_TO, htonl(map->last_ip));
+	NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES,
+		      htonl(atomic_read(&set->ref) - 1));
+	NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE,
+		      htonl(sizeof(*map)
+			    + (map->last_ip - map->first_ip + 1) * map->dsize));
+	if (with_timeout(map->timeout))
+		NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, htonl(map->timeout));
+	ipset_nest_end(skb, nested);
+
+	return 0;
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
+static bool
+bitmap_ipmac_same_set(const struct ip_set *a, const struct ip_set *b)
+{
+	const struct bitmap_ipmac *x = a->data;
+	const struct bitmap_ipmac *y = b->data;
+
+	return x->first_ip == y->first_ip &&
+	       x->last_ip == y->last_ip &&
+	       x->timeout == y->timeout;
+}
+
+static const struct ip_set_type_variant bitmap_ipmac = {
+	.kadt	= bitmap_ipmac_kadt,
+	.uadt	= bitmap_ipmac_uadt,
+	.adt	= {
+		[IPSET_ADD] = bitmap_ipmac_add,
+		[IPSET_DEL] = bitmap_ipmac_del,
+		[IPSET_TEST] = bitmap_ipmac_test,
+	},
+	.destroy = bitmap_ipmac_destroy,
+	.flush	= bitmap_ipmac_flush,
+	.head	= bitmap_ipmac_head,
+	.list	= bitmap_ipmac_list,
+	.same_set = bitmap_ipmac_same_set,
+};
+
+static const struct ip_set_type_variant bitmap_tipmac = {
+	.kadt	= bitmap_ipmac_kadt,
+	.uadt	= bitmap_ipmac_uadt,
+	.adt	= {
+		[IPSET_ADD] = bitmap_ipmac_tadd,
+		[IPSET_DEL] = bitmap_ipmac_tdel,
+		[IPSET_TEST] = bitmap_ipmac_ttest,
+	},
+	.destroy = bitmap_ipmac_destroy,
+	.flush	= bitmap_ipmac_flush,
+	.head	= bitmap_ipmac_head,
+	.list	= bitmap_ipmac_tlist,
+	.same_set = bitmap_ipmac_same_set,
+};
+
+static void
+bitmap_ipmac_gc(unsigned long ul_set)
+{
+	struct ip_set *set = (struct ip_set *) ul_set;
+	struct bitmap_ipmac *map = set->data;
+	struct ipmac_telem *elem;
+	u32 id, last = map->last_ip - map->first_ip;
+
+	/* We run parallel with other readers (test element)
+	 * but adding/deleting new entries is locked out */
+	read_lock_bh(&set->lock);
+	for (id = 0; id <= last; id++) {
+		elem = bitmap_ipmac_elem(map, id);
+		if (elem->match == MAC_FILLED &&
+		    ip_set_timeout_expired(elem->timeout))
+			elem->match = MAC_EMPTY;
+	}
+	read_unlock_bh(&set->lock);
+
+	map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ;
+	add_timer(&map->gc);
+}
+
+static void
+bitmap_ipmac_gc_init(struct ip_set *set)
+{
+	struct bitmap_ipmac *map = set->data;
+
+	init_timer(&map->gc);
+	map->gc.data = (unsigned long) set;
+	map->gc.function = bitmap_ipmac_gc;
+	map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ;
+	add_timer(&map->gc);
+}
+
+/* Create bitmap:ip,mac type of sets */
+
+static bool
+init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map,
+	       u32 first_ip, u32 last_ip)
+{
+	map->members = ip_set_alloc((last_ip - first_ip + 1) * map->dsize);
+	if (!map->members)
+		return false;
+	map->first_ip = first_ip;
+	map->last_ip = last_ip;
+	map->timeout = IPSET_NO_TIMEOUT;
+
+	set->data = map;
+	set->family = AF_INET;
+
+	return true;
+}
+
+static int
+bitmap_ipmac_create(struct ip_set *set, struct nlattr *tb[],
+		    u32 flags)
+{
+	u32 first_ip, last_ip, elements;
+	struct bitmap_ipmac *map;
+	int ret;
+
+	if (unlikely(!tb[IPSET_ATTR_IP] ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
+		return -IPSET_ERR_PROTOCOL;
+
+	ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &first_ip);
+	if (ret)
+		return ret;
+
+	if (tb[IPSET_ATTR_IP_TO]) {
+		ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &last_ip);
+		if (ret)
+			return ret;
+		if (first_ip > last_ip) {
+			u32 tmp = first_ip;
+
+			first_ip = last_ip;
+			last_ip = tmp;
+		}
+	} else if (tb[IPSET_ATTR_CIDR]) {
+		u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
+
+		if (cidr >= 32)
+			return -IPSET_ERR_INVALID_CIDR;
+		last_ip = first_ip | ~ip_set_hostmask(cidr);
+	} else
+		return -IPSET_ERR_PROTOCOL;
+
+	elements = last_ip - first_ip + 1;
+
+	if (elements > IPSET_BITMAP_MAX_RANGE + 1)
+		return -IPSET_ERR_BITMAP_RANGE_SIZE;
+
+	map = kzalloc(sizeof(*map), GFP_KERNEL);
+	if (!map)
+		return -ENOMEM;
+
+	if (tb[IPSET_ATTR_TIMEOUT]) {
+		map->dsize = sizeof(struct ipmac_telem);
+
+		if (!init_map_ipmac(set, map, first_ip, last_ip)) {
+			kfree(map);
+			return -ENOMEM;
+		}
+
+		map->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
+
+		set->variant = &bitmap_tipmac;
+
+		bitmap_ipmac_gc_init(set);
+	} else {
+		map->dsize = sizeof(struct ipmac_elem);
+
+		if (!init_map_ipmac(set, map, first_ip, last_ip)) {
+			kfree(map);
+			return -ENOMEM;
+		}
+		set->variant = &bitmap_ipmac;
+
+	}
+	return 0;
+}
+
+static struct ip_set_type bitmap_ipmac_type = {
+	.name		= "bitmap:ip,mac",
+	.protocol	= IPSET_PROTOCOL,
+	.features	= IPSET_TYPE_IP | IPSET_TYPE_MAC,
+	.dimension	= IPSET_DIM_TWO,
+	.family		= AF_INET,
+	.revision	= 0,
+	.create		= bitmap_ipmac_create,
+	.create_policy	= {
+		[IPSET_ATTR_IP]		= { .type = NLA_NESTED },
+		[IPSET_ATTR_IP_TO]	= { .type = NLA_NESTED },
+		[IPSET_ATTR_CIDR]	= { .type = NLA_U8 },
+		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
+	},
+	.adt_policy	= {
+		[IPSET_ATTR_IP]		= { .type = NLA_NESTED },
+		[IPSET_ATTR_ETHER]	= { .type = NLA_BINARY, .len  = ETH_ALEN },
+		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
+		[IPSET_ATTR_LINENO]	= { .type = NLA_U32 },
+	},
+	.me		= THIS_MODULE,
+};
+
+static int __init
+bitmap_ipmac_init(void)
+{
+	return ip_set_type_register(&bitmap_ipmac_type);
+}
+
+static void __exit
+bitmap_ipmac_fini(void)
+{
+	ip_set_type_unregister(&bitmap_ipmac_type);
+}
+
+module_init(bitmap_ipmac_init);
+module_exit(bitmap_ipmac_fini);
diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c
new file mode 100644
index 0000000..165f09b
--- /dev/null
+++ b/net/netfilter/ipset/ip_set_bitmap_port.c
@@ -0,0 +1,515 @@
+/* Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/* Kernel module implementing an IP set type: the bitmap:port type */
+
+#include <linux/module.h>
+#include <linux/ip.h>
+#include <linux/skbuff.h>
+#include <linux/errno.h>
+#include <linux/netlink.h>
+#include <linux/jiffies.h>
+#include <linux/timer.h>
+#include <net/netlink.h>
+
+#include <linux/netfilter/ipset/ip_set.h>
+#include <linux/netfilter/ipset/ip_set_bitmap.h>
+#include <linux/netfilter/ipset/ip_set_getport.h>
+#define IP_SET_BITMAP_TIMEOUT
+#include <linux/netfilter/ipset/ip_set_timeout.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
+MODULE_DESCRIPTION("bitmap:port type of IP sets");
+MODULE_ALIAS("ip_set_bitmap:port");
+
+/* Type structure */
+struct bitmap_port {
+	void *members;		/* the set members */
+	u16 first_port;		/* host byte order, included in range */
+	u16 last_port;		/* host byte order, included in range */
+	size_t memsize;		/* members size */
+	u32 timeout;		/* timeout parameter */
+	struct timer_list gc;	/* garbage collection */
+};
+
+/* Base variant */
+
+static int
+bitmap_port_test(struct ip_set *set, void *value, u32 timeout)
+{
+	const struct bitmap_port *map = set->data;
+	u16 id = *(u16 *)value;
+
+	return !!test_bit(id, map->members);
+}
+
+static int
+bitmap_port_add(struct ip_set *set, void *value, u32 timeout)
+{
+	struct bitmap_port *map = set->data;
+	u16 id = *(u16 *)value;
+
+	if (test_and_set_bit(id, map->members))
+		return -IPSET_ERR_EXIST;
+
+	return 0;
+}
+
+static int
+bitmap_port_del(struct ip_set *set, void *value, u32 timeout)
+{
+	struct bitmap_port *map = set->data;
+	u16 id = *(u16 *)value;
+
+	if (!test_and_clear_bit(id, map->members))
+		return -IPSET_ERR_EXIST;
+
+	return 0;
+}
+
+static int
+bitmap_port_list(const struct ip_set *set,
+		 struct sk_buff *skb, struct netlink_callback *cb)
+{
+	const struct bitmap_port *map = set->data;
+	struct nlattr *atd, *nested;
+	u16 id, first = cb->args[2];
+	u16 last = map->last_port - map->first_port;
+
+	atd = ipset_nest_start(skb, IPSET_ATTR_ADT);
+	if (!atd)
+		return -EMSGSIZE;
+	for (; cb->args[2] <= last; cb->args[2]++) {
+		id = cb->args[2];
+		if (!test_bit(id, map->members))
+			continue;
+		nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
+		if (!nested) {
+			if (id == first) {
+				nla_nest_cancel(skb, atd);
+				return -EMSGSIZE;
+			} else
+				goto nla_put_failure;
+		}
+		NLA_PUT_NET16(skb, IPSET_ATTR_PORT,
+			      htons(map->first_port + id));
+		ipset_nest_end(skb, nested);
+	}
+	ipset_nest_end(skb, atd);
+	/* Set listing finished */
+	cb->args[2] = 0;
+
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(skb, nested);
+	ipset_nest_end(skb, atd);
+	if (unlikely(id == first)) {
+		cb->args[2] = 0;
+		return -EMSGSIZE;
+	}
+	return 0;
+}
+
+/* Timeout variant */
+
+static int
+bitmap_port_ttest(struct ip_set *set, void *value, u32 timeout)
+{
+	const struct bitmap_port *map = set->data;
+	const unsigned long *members = map->members;
+	u16 id = *(u16 *)value;
+
+	return ip_set_timeout_test(members[id]);
+}
+
+static int
+bitmap_port_tadd(struct ip_set *set, void *value, u32 timeout)
+{
+	struct bitmap_port *map = set->data;
+	unsigned long *members = map->members;
+	u16 id = *(u16 *)value;
+
+	if (ip_set_timeout_test(members[id]))
+		return -IPSET_ERR_EXIST;
+
+	members[id] = ip_set_timeout_set(timeout);
+
+	return 0;
+}
+
+static int
+bitmap_port_tdel(struct ip_set *set, void *value, u32 timeout)
+{
+	struct bitmap_port *map = set->data;
+	unsigned long *members = map->members;
+	u16 id = *(u16 *)value;
+	int ret = -IPSET_ERR_EXIST;
+
+	if (ip_set_timeout_test(members[id]))
+		ret = 0;
+
+	members[id] = IPSET_ELEM_UNSET;
+	return ret;
+}
+
+static int
+bitmap_port_tlist(const struct ip_set *set,
+		  struct sk_buff *skb, struct netlink_callback *cb)
+{
+	const struct bitmap_port *map = set->data;
+	struct nlattr *adt, *nested;
+	u16 id, first = cb->args[2];
+	u16 last = map->last_port - map->first_port;
+	const unsigned long *members = map->members;
+
+	adt = ipset_nest_start(skb, IPSET_ATTR_ADT);
+	if (!adt)
+		return -EMSGSIZE;
+	for (; cb->args[2] <= last; cb->args[2]++) {
+		id = cb->args[2];
+		if (!ip_set_timeout_test(members[id]))
+			continue;
+		nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
+		if (!nested) {
+			if (id == first) {
+				nla_nest_cancel(skb, adt);
+				return -EMSGSIZE;
+			} else
+				goto nla_put_failure;
+		}
+		NLA_PUT_NET16(skb, IPSET_ATTR_PORT,
+			      htons(map->first_port + id));
+		NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT,
+			      htonl(ip_set_timeout_get(members[id])));
+		ipset_nest_end(skb, nested);
+	}
+	ipset_nest_end(skb, adt);
+
+	/* Set listing finished */
+	cb->args[2] = 0;
+
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(skb, nested);
+	ipset_nest_end(skb, adt);
+	if (unlikely(id == first)) {
+		cb->args[2] = 0;
+		return -EMSGSIZE;
+	}
+	return 0;
+}
+
+static int
+bitmap_port_kadt(struct ip_set *set, const struct sk_buff *skb,
+		 enum ipset_adt adt, u8 pf, u8 dim, u8 flags)
+{
+	struct bitmap_port *map = set->data;
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	__be16 __port;
+	u16 port = 0;
+
+	if (!ip_set_get_ip_port(skb, pf, flags & IPSET_DIM_ONE_SRC, &__port))
+		return -EINVAL;
+
+	port = ntohs(__port);
+
+	if (port < map->first_port || port > map->last_port)
+		return -IPSET_ERR_BITMAP_RANGE;
+
+	port -= map->first_port;
+
+	return adtfn(set, &port, map->timeout);
+}
+
+static int
+bitmap_port_uadt(struct ip_set *set, struct nlattr *tb[],
+		 enum ipset_adt adt, u32 *lineno, u32 flags)
+{
+	struct bitmap_port *map = set->data;
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	u32 timeout = map->timeout;
+	u32 port;	/* wraparound */
+	u16 id, port_to;
+	int ret = 0;
+
+	if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
+		return -IPSET_ERR_PROTOCOL;
+
+	if (tb[IPSET_ATTR_LINENO])
+		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
+	port = ip_set_get_h16(tb[IPSET_ATTR_PORT]);
+	if (port < map->first_port || port > map->last_port)
+		return -IPSET_ERR_BITMAP_RANGE;
+
+	if (tb[IPSET_ATTR_TIMEOUT]) {
+		if (!with_timeout(map->timeout))
+			return -IPSET_ERR_TIMEOUT;
+		timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
+	}
+
+	if (adt == IPSET_TEST) {
+		id = port - map->first_port;
+		return adtfn(set, &id, timeout);
+	}
+
+	if (tb[IPSET_ATTR_PORT_TO]) {
+		port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
+		if (port > port_to) {
+			swap(port, port_to);
+			if (port < map->first_port)
+				return -IPSET_ERR_BITMAP_RANGE;
+		}
+	} else
+		port_to = port;
+
+	if (port_to > map->last_port)
+		return -IPSET_ERR_BITMAP_RANGE;
+
+	for (; port <= port_to; port++) {
+		id = port - map->first_port;
+		ret = adtfn(set, &id, timeout);
+
+		if (ret && !ip_set_eexist(ret, flags))
+			return ret;
+		else
+			ret = 0;
+	}
+	return ret;
+}
+
+static void
+bitmap_port_destroy(struct ip_set *set)
+{
+	struct bitmap_port *map = set->data;
+
+	if (with_timeout(map->timeout))
+		del_timer_sync(&map->gc);
+
+	ip_set_free(map->members);
+	kfree(map);
+
+	set->data = NULL;
+}
+
+static void
+bitmap_port_flush(struct ip_set *set)
+{
+	struct bitmap_port *map = set->data;
+
+	memset(map->members, 0, map->memsize);
+}
+
+static int
+bitmap_port_head(struct ip_set *set, struct sk_buff *skb)
+{
+	const struct bitmap_port *map = set->data;
+	struct nlattr *nested;
+
+	nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
+	if (!nested)
+		goto nla_put_failure;
+	NLA_PUT_NET16(skb, IPSET_ATTR_PORT, htons(map->first_port));
+	NLA_PUT_NET16(skb, IPSET_ATTR_PORT_TO, htons(map->last_port));
+	NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES,
+		      htonl(atomic_read(&set->ref) - 1));
+	NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE,
+		      htonl(sizeof(*map) + map->memsize));
+	if (with_timeout(map->timeout))
+		NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, htonl(map->timeout));
+	ipset_nest_end(skb, nested);
+
+	return 0;
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
+static bool
+bitmap_port_same_set(const struct ip_set *a, const struct ip_set *b)
+{
+	const struct bitmap_port *x = a->data;
+	const struct bitmap_port *y = b->data;
+
+	return x->first_port == y->first_port &&
+	       x->last_port == y->last_port &&
+	       x->timeout == y->timeout;
+}
+
+static const struct ip_set_type_variant bitmap_port = {
+	.kadt	= bitmap_port_kadt,
+	.uadt	= bitmap_port_uadt,
+	.adt	= {
+		[IPSET_ADD] = bitmap_port_add,
+		[IPSET_DEL] = bitmap_port_del,
+		[IPSET_TEST] = bitmap_port_test,
+	},
+	.destroy = bitmap_port_destroy,
+	.flush	= bitmap_port_flush,
+	.head	= bitmap_port_head,
+	.list	= bitmap_port_list,
+	.same_set = bitmap_port_same_set,
+};
+
+static const struct ip_set_type_variant bitmap_tport = {
+	.kadt	= bitmap_port_kadt,
+	.uadt	= bitmap_port_uadt,
+	.adt	= {
+		[IPSET_ADD] = bitmap_port_tadd,
+		[IPSET_DEL] = bitmap_port_tdel,
+		[IPSET_TEST] = bitmap_port_ttest,
+	},
+	.destroy = bitmap_port_destroy,
+	.flush	= bitmap_port_flush,
+	.head	= bitmap_port_head,
+	.list	= bitmap_port_tlist,
+	.same_set = bitmap_port_same_set,
+};
+
+static void
+bitmap_port_gc(unsigned long ul_set)
+{
+	struct ip_set *set = (struct ip_set *) ul_set;
+	struct bitmap_port *map = set->data;
+	unsigned long *table = map->members;
+	u32 id;	/* wraparound */
+	u16 last = map->last_port - map->first_port;
+
+	/* We run parallel with other readers (test element)
+	 * but adding/deleting new entries is locked out */
+	read_lock_bh(&set->lock);
+	for (id = 0; id <= last; id++)
+		if (ip_set_timeout_expired(table[id]))
+			table[id] = IPSET_ELEM_UNSET;
+	read_unlock_bh(&set->lock);
+
+	map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ;
+	add_timer(&map->gc);
+}
+
+static void
+bitmap_port_gc_init(struct ip_set *set)
+{
+	struct bitmap_port *map = set->data;
+
+	init_timer(&map->gc);
+	map->gc.data = (unsigned long) set;
+	map->gc.function = bitmap_port_gc;
+	map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ;
+	add_timer(&map->gc);
+}
+
+/* Create bitmap:ip type of sets */
+
+static bool
+init_map_port(struct ip_set *set, struct bitmap_port *map,
+	      u16 first_port, u16 last_port)
+{
+	map->members = ip_set_alloc(map->memsize);
+	if (!map->members)
+		return false;
+	map->first_port = first_port;
+	map->last_port = last_port;
+	map->timeout = IPSET_NO_TIMEOUT;
+
+	set->data = map;
+	set->family = AF_UNSPEC;
+
+	return true;
+}
+
+static int
+bitmap_port_create(struct ip_set *set, struct nlattr *tb[],
+		 u32 flags)
+{
+	struct bitmap_port *map;
+	u16 first_port, last_port;
+
+	if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
+		     !ip_set_attr_netorder(tb, IPSET_ATTR_PORT_TO) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
+		return -IPSET_ERR_PROTOCOL;
+
+	first_port = ip_set_get_h16(tb[IPSET_ATTR_PORT]);
+	last_port = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
+	if (first_port > last_port) {
+		u16 tmp = first_port;
+
+		first_port = last_port;
+		last_port = tmp;
+	}
+
+	map = kzalloc(sizeof(*map), GFP_KERNEL);
+	if (!map)
+		return -ENOMEM;
+
+	if (tb[IPSET_ATTR_TIMEOUT]) {
+		map->memsize = (last_port - first_port + 1)
+			       * sizeof(unsigned long);
+
+		if (!init_map_port(set, map, first_port, last_port)) {
+			kfree(map);
+			return -ENOMEM;
+		}
+
+		map->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
+		set->variant = &bitmap_tport;
+
+		bitmap_port_gc_init(set);
+	} else {
+		map->memsize = bitmap_bytes(0, last_port - first_port);
+		pr_debug("memsize: %zu\n", map->memsize);
+		if (!init_map_port(set, map, first_port, last_port)) {
+			kfree(map);
+			return -ENOMEM;
+		}
+
+		set->variant = &bitmap_port;
+	}
+	return 0;
+}
+
+static struct ip_set_type bitmap_port_type = {
+	.name		= "bitmap:port",
+	.protocol	= IPSET_PROTOCOL,
+	.features	= IPSET_TYPE_PORT,
+	.dimension	= IPSET_DIM_ONE,
+	.family		= AF_UNSPEC,
+	.revision	= 0,
+	.create		= bitmap_port_create,
+	.create_policy	= {
+		[IPSET_ATTR_PORT]	= { .type = NLA_U16 },
+		[IPSET_ATTR_PORT_TO]	= { .type = NLA_U16 },
+		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
+	},
+	.adt_policy	= {
+		[IPSET_ATTR_PORT]	= { .type = NLA_U16 },
+		[IPSET_ATTR_PORT_TO]	= { .type = NLA_U16 },
+		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
+		[IPSET_ATTR_LINENO]	= { .type = NLA_U32 },
+	},
+	.me		= THIS_MODULE,
+};
+
+static int __init
+bitmap_port_init(void)
+{
+	return ip_set_type_register(&bitmap_port_type);
+}
+
+static void __exit
+bitmap_port_fini(void)
+{
+	ip_set_type_unregister(&bitmap_port_type);
+}
+
+module_init(bitmap_port_init);
+module_exit(bitmap_port_fini);
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
new file mode 100644
index 0000000..8b1a54c
--- /dev/null
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -0,0 +1,1671 @@
+/* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu>
+ *                         Patrick Schaaf <bof@bof.de>
+ * Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/* Kernel module for IP set management */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/ip.h>
+#include <linux/skbuff.h>
+#include <linux/spinlock.h>
+#include <linux/netlink.h>
+#include <linux/rculist.h>
+#include <linux/version.h>
+#include <net/netlink.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/ipset/ip_set.h>
+
+static LIST_HEAD(ip_set_type_list);		/* all registered set types */
+static DEFINE_MUTEX(ip_set_type_mutex);		/* protects ip_set_type_list */
+
+static struct ip_set **ip_set_list;		/* all individual sets */
+static ip_set_id_t ip_set_max = CONFIG_IP_SET_MAX; /* max number of sets */
+
+#define STREQ(a, b)	(strncmp(a, b, IPSET_MAXNAMELEN) == 0)
+
+static unsigned int max_sets;
+
+module_param(max_sets, int, 0600);
+MODULE_PARM_DESC(max_sets, "maximal number of sets");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
+MODULE_DESCRIPTION("core IP set support");
+MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET);
+
+/*
+ * The set types are implemented in modules and registered set types
+ * can be found in ip_set_type_list. Adding/deleting types is
+ * serialized by ip_set_type_mutex.
+ */
+
+static inline void
+ip_set_type_lock(void)
+{
+	mutex_lock(&ip_set_type_mutex);
+}
+
+static inline void
+ip_set_type_unlock(void)
+{
+	mutex_unlock(&ip_set_type_mutex);
+}
+
+/* Register and deregister settype */
+
+static struct ip_set_type *
+find_set_type(const char *name, u8 family, u8 revision)
+{
+	struct ip_set_type *type;
+
+	list_for_each_entry_rcu(type, &ip_set_type_list, list)
+		if (STREQ(type->name, name) &&
+		    (type->family == family || type->family == AF_UNSPEC) &&
+		    type->revision == revision)
+			return type;
+	return NULL;
+}
+
+/* Unlock, try to load a set type module and lock again */
+static int
+try_to_load_type(const char *name)
+{
+	nfnl_unlock();
+	pr_debug("try to load ip_set_%s\n", name);
+	if (request_module("ip_set_%s", name) < 0) {
+		pr_warning("Can't find ip_set type %s\n", name);
+		nfnl_lock();
+		return -IPSET_ERR_FIND_TYPE;
+	}
+	nfnl_lock();
+	return -EAGAIN;
+}
+
+/* Find a set type and reference it */
+static int
+find_set_type_get(const char *name, u8 family, u8 revision,
+		  struct ip_set_type **found)
+{
+	rcu_read_lock();
+	*found = find_set_type(name, family, revision);
+	if (*found) {
+		int err = !try_module_get((*found)->me);
+		rcu_read_unlock();
+		return err ? -EFAULT : 0;
+	}
+	rcu_read_unlock();
+
+	return try_to_load_type(name);
+}
+
+/* Find a given set type by name and family.
+ * If we succeeded, the supported minimal and maximum revisions are
+ * filled out.
+ */
+static int
+find_set_type_minmax(const char *name, u8 family, u8 *min, u8 *max)
+{
+	struct ip_set_type *type;
+	bool found = false;
+
+	*min = *max = 0;
+	rcu_read_lock();
+	list_for_each_entry_rcu(type, &ip_set_type_list, list)
+		if (STREQ(type->name, name) &&
+		    (type->family == family || type->family == AF_UNSPEC)) {
+			found = true;
+			if (type->revision < *min)
+				*min = type->revision;
+			else if (type->revision > *max)
+				*max = type->revision;
+		}
+	rcu_read_unlock();
+	if (found)
+		return 0;
+
+	return try_to_load_type(name);
+}
+
+#define family_name(f)	((f) == AF_INET ? "inet" : \
+			 (f) == AF_INET6 ? "inet6" : "any")
+
+/* Register a set type structure. The type is identified by
+ * the unique triple of name, family and revision.
+ */
+int
+ip_set_type_register(struct ip_set_type *type)
+{
+	int ret = 0;
+
+	if (type->protocol != IPSET_PROTOCOL) {
+		pr_warning("ip_set type %s, family %s, revision %u uses "
+			   "wrong protocol version %u (want %u)\n",
+			   type->name, family_name(type->family),
+			   type->revision, type->protocol, IPSET_PROTOCOL);
+		return -EINVAL;
+	}
+
+	ip_set_type_lock();
+	if (find_set_type(type->name, type->family, type->revision)) {
+		/* Duplicate! */
+		pr_warning("ip_set type %s, family %s, revision %u "
+			   "already registered!\n", type->name,
+			   family_name(type->family), type->revision);
+		ret = -EINVAL;
+		goto unlock;
+	}
+	list_add_rcu(&type->list, &ip_set_type_list);
+	pr_debug("type %s, family %s, revision %u registered.\n",
+		 type->name, family_name(type->family), type->revision);
+unlock:
+	ip_set_type_unlock();
+	return ret;
+}
+EXPORT_SYMBOL_GPL(ip_set_type_register);
+
+/* Unregister a set type. There's a small race with ip_set_create */
+void
+ip_set_type_unregister(struct ip_set_type *type)
+{
+	ip_set_type_lock();
+	if (!find_set_type(type->name, type->family, type->revision)) {
+		pr_warning("ip_set type %s, family %s, revision %u "
+			   "not registered\n", type->name,
+			   family_name(type->family), type->revision);
+		goto unlock;
+	}
+	list_del_rcu(&type->list);
+	pr_debug("type %s, family %s, revision %u unregistered.\n",
+		 type->name, family_name(type->family), type->revision);
+unlock:
+	ip_set_type_unlock();
+
+	synchronize_rcu();
+}
+EXPORT_SYMBOL_GPL(ip_set_type_unregister);
+
+/* Utility functions */
+void *
+ip_set_alloc(size_t size)
+{
+	void *members = NULL;
+
+	if (size < KMALLOC_MAX_SIZE)
+		members = kzalloc(size, GFP_KERNEL | __GFP_NOWARN);
+
+	if (members) {
+		pr_debug("%p: allocated with kmalloc\n", members);
+		return members;
+	}
+
+	members = vzalloc(size);
+	if (!members)
+		return NULL;
+	pr_debug("%p: allocated with vmalloc\n", members);
+
+	return members;
+}
+EXPORT_SYMBOL_GPL(ip_set_alloc);
+
+void
+ip_set_free(void *members)
+{
+	pr_debug("%p: free with %s\n", members,
+		 is_vmalloc_addr(members) ? "vfree" : "kfree");
+	if (is_vmalloc_addr(members))
+		vfree(members);
+	else
+		kfree(members);
+}
+EXPORT_SYMBOL_GPL(ip_set_free);
+
+static inline bool
+flag_nested(const struct nlattr *nla)
+{
+	return nla->nla_type & NLA_F_NESTED;
+}
+
+static const struct nla_policy ipaddr_policy[IPSET_ATTR_IPADDR_MAX + 1] = {
+	[IPSET_ATTR_IPADDR_IPV4]	= { .type = NLA_U32 },
+	[IPSET_ATTR_IPADDR_IPV6]	= { .type = NLA_BINARY,
+					    .len = sizeof(struct in6_addr) },
+};
+
+int
+ip_set_get_ipaddr4(struct nlattr *nla,  __be32 *ipaddr)
+{
+	struct nlattr *tb[IPSET_ATTR_IPADDR_MAX+1];
+
+	if (unlikely(!flag_nested(nla)))
+		return -IPSET_ERR_PROTOCOL;
+	if (nla_parse_nested(tb, IPSET_ATTR_IPADDR_MAX, nla, ipaddr_policy))
+		return -IPSET_ERR_PROTOCOL;
+	if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_IPADDR_IPV4)))
+		return -IPSET_ERR_PROTOCOL;
+
+	*ipaddr = nla_get_be32(tb[IPSET_ATTR_IPADDR_IPV4]);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ip_set_get_ipaddr4);
+
+int
+ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr)
+{
+	struct nlattr *tb[IPSET_ATTR_IPADDR_MAX+1];
+
+	if (unlikely(!flag_nested(nla)))
+		return -IPSET_ERR_PROTOCOL;
+
+	if (nla_parse_nested(tb, IPSET_ATTR_IPADDR_MAX, nla, ipaddr_policy))
+		return -IPSET_ERR_PROTOCOL;
+	if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_IPADDR_IPV6)))
+		return -IPSET_ERR_PROTOCOL;
+
+	memcpy(ipaddr, nla_data(tb[IPSET_ATTR_IPADDR_IPV6]),
+		sizeof(struct in6_addr));
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ip_set_get_ipaddr6);
+
+/*
+ * Creating/destroying/renaming/swapping affect the existence and
+ * the properties of a set. All of these can be executed from userspace
+ * only and serialized by the nfnl mutex indirectly from nfnetlink.
+ *
+ * Sets are identified by their index in ip_set_list and the index
+ * is used by the external references (set/SET netfilter modules).
+ *
+ * The set behind an index may change by swapping only, from userspace.
+ */
+
+static inline void
+__ip_set_get(ip_set_id_t index)
+{
+	atomic_inc(&ip_set_list[index]->ref);
+}
+
+static inline void
+__ip_set_put(ip_set_id_t index)
+{
+	atomic_dec(&ip_set_list[index]->ref);
+}
+
+/*
+ * Add, del and test set entries from kernel.
+ *
+ * The set behind the index must exist and must be referenced
+ * so it can't be destroyed (or changed) under our foot.
+ */
+
+int
+ip_set_test(ip_set_id_t index, const struct sk_buff *skb,
+	    u8 family, u8 dim, u8 flags)
+{
+	struct ip_set *set = ip_set_list[index];
+	int ret = 0;
+
+	BUG_ON(set == NULL || atomic_read(&set->ref) == 0);
+	pr_debug("set %s, index %u\n", set->name, index);
+
+	if (dim < set->type->dimension ||
+	    !(family == set->family || set->family == AF_UNSPEC))
+		return 0;
+
+	read_lock_bh(&set->lock);
+	ret = set->variant->kadt(set, skb, IPSET_TEST, family, dim, flags);
+	read_unlock_bh(&set->lock);
+
+	if (ret == -EAGAIN) {
+		/* Type requests element to be completed */
+		pr_debug("element must be competed, ADD is triggered\n");
+		write_lock_bh(&set->lock);
+		set->variant->kadt(set, skb, IPSET_ADD, family, dim, flags);
+		write_unlock_bh(&set->lock);
+		ret = 1;
+	}
+
+	/* Convert error codes to nomatch */
+	return (ret < 0 ? 0 : ret);
+}
+EXPORT_SYMBOL_GPL(ip_set_test);
+
+int
+ip_set_add(ip_set_id_t index, const struct sk_buff *skb,
+	   u8 family, u8 dim, u8 flags)
+{
+	struct ip_set *set = ip_set_list[index];
+	int ret;
+
+	BUG_ON(set == NULL || atomic_read(&set->ref) == 0);
+	pr_debug("set %s, index %u\n", set->name, index);
+
+	if (dim < set->type->dimension ||
+	    !(family == set->family || set->family == AF_UNSPEC))
+		return 0;
+
+	write_lock_bh(&set->lock);
+	ret = set->variant->kadt(set, skb, IPSET_ADD, family, dim, flags);
+	write_unlock_bh(&set->lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(ip_set_add);
+
+int
+ip_set_del(ip_set_id_t index, const struct sk_buff *skb,
+	   u8 family, u8 dim, u8 flags)
+{
+	struct ip_set *set = ip_set_list[index];
+	int ret = 0;
+
+	BUG_ON(set == NULL || atomic_read(&set->ref) == 0);
+	pr_debug("set %s, index %u\n", set->name, index);
+
+	if (dim < set->type->dimension ||
+	    !(family == set->family || set->family == AF_UNSPEC))
+		return 0;
+
+	write_lock_bh(&set->lock);
+	ret = set->variant->kadt(set, skb, IPSET_DEL, family, dim, flags);
+	write_unlock_bh(&set->lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(ip_set_del);
+
+/*
+ * Find set by name, reference it once. The reference makes sure the
+ * thing pointed to, does not go away under our feet.
+ *
+ * The nfnl mutex must already be activated.
+ */
+ip_set_id_t
+ip_set_get_byname(const char *name, struct ip_set **set)
+{
+	ip_set_id_t i, index = IPSET_INVALID_ID;
+	struct ip_set *s;
+
+	for (i = 0; i < ip_set_max; i++) {
+		s = ip_set_list[i];
+		if (s != NULL && STREQ(s->name, name)) {
+			__ip_set_get(i);
+			index = i;
+			*set = s;
+		}
+	}
+
+	return index;
+}
+EXPORT_SYMBOL_GPL(ip_set_get_byname);
+
+/*
+ * If the given set pointer points to a valid set, decrement
+ * reference count by 1. The caller shall not assume the index
+ * to be valid, after calling this function.
+ *
+ * The nfnl mutex must already be activated.
+ */
+void
+ip_set_put_byindex(ip_set_id_t index)
+{
+	if (ip_set_list[index] != NULL) {
+		BUG_ON(atomic_read(&ip_set_list[index]->ref) == 0);
+		__ip_set_put(index);
+	}
+}
+EXPORT_SYMBOL_GPL(ip_set_put_byindex);
+
+/*
+ * Get the name of a set behind a set index.
+ * We assume the set is referenced, so it does exist and
+ * can't be destroyed. The set cannot be renamed due to
+ * the referencing either.
+ *
+ * The nfnl mutex must already be activated.
+ */
+const char *
+ip_set_name_byindex(ip_set_id_t index)
+{
+	const struct ip_set *set = ip_set_list[index];
+
+	BUG_ON(set == NULL);
+	BUG_ON(atomic_read(&set->ref) == 0);
+
+	/* Referenced, so it's safe */
+	return set->name;
+}
+EXPORT_SYMBOL_GPL(ip_set_name_byindex);
+
+/*
+ * Routines to call by external subsystems, which do not
+ * call nfnl_lock for us.
+ */
+
+/*
+ * Find set by name, reference it once. The reference makes sure the
+ * thing pointed to, does not go away under our feet.
+ *
+ * The nfnl mutex is used in the function.
+ */
+ip_set_id_t
+ip_set_nfnl_get(const char *name)
+{
+	struct ip_set *s;
+	ip_set_id_t index;
+
+	nfnl_lock();
+	index = ip_set_get_byname(name, &s);
+	nfnl_unlock();
+
+	return index;
+}
+EXPORT_SYMBOL_GPL(ip_set_nfnl_get);
+
+/*
+ * Find set by index, reference it once. The reference makes sure the
+ * thing pointed to, does not go away under our feet.
+ *
+ * The nfnl mutex is used in the function.
+ */
+ip_set_id_t
+ip_set_nfnl_get_byindex(ip_set_id_t index)
+{
+	if (index > ip_set_max)
+		return IPSET_INVALID_ID;
+
+	nfnl_lock();
+	if (ip_set_list[index])
+		__ip_set_get(index);
+	else
+		index = IPSET_INVALID_ID;
+	nfnl_unlock();
+
+	return index;
+}
+EXPORT_SYMBOL_GPL(ip_set_nfnl_get_byindex);
+
+/*
+ * If the given set pointer points to a valid set, decrement
+ * reference count by 1. The caller shall not assume the index
+ * to be valid, after calling this function.
+ *
+ * The nfnl mutex is used in the function.
+ */
+void
+ip_set_nfnl_put(ip_set_id_t index)
+{
+	nfnl_lock();
+	if (ip_set_list[index] != NULL) {
+		BUG_ON(atomic_read(&ip_set_list[index]->ref) == 0);
+		__ip_set_put(index);
+	}
+	nfnl_unlock();
+}
+EXPORT_SYMBOL_GPL(ip_set_nfnl_put);
+
+/*
+ * Communication protocol with userspace over netlink.
+ *
+ * We already locked by nfnl_lock.
+ */
+
+static inline bool
+protocol_failed(const struct nlattr * const tb[])
+{
+	return !tb[IPSET_ATTR_PROTOCOL] ||
+	       nla_get_u8(tb[IPSET_ATTR_PROTOCOL]) != IPSET_PROTOCOL;
+}
+
+static inline u32
+flag_exist(const struct nlmsghdr *nlh)
+{
+	return nlh->nlmsg_flags & NLM_F_EXCL ? 0 : IPSET_FLAG_EXIST;
+}
+
+static struct nlmsghdr *
+start_msg(struct sk_buff *skb, u32 pid, u32 seq, unsigned int flags,
+	  enum ipset_cmd cmd)
+{
+	struct nlmsghdr *nlh;
+	struct nfgenmsg *nfmsg;
+
+	nlh = nlmsg_put(skb, pid, seq, cmd | (NFNL_SUBSYS_IPSET << 8),
+			sizeof(*nfmsg), flags);
+	if (nlh == NULL)
+		return NULL;
+
+	nfmsg = nlmsg_data(nlh);
+	nfmsg->nfgen_family = AF_INET;
+	nfmsg->version = NFNETLINK_V0;
+	nfmsg->res_id = 0;
+
+	return nlh;
+}
+
+/* Create a set */
+
+static const struct nla_policy ip_set_create_policy[IPSET_ATTR_CMD_MAX + 1] = {
+	[IPSET_ATTR_PROTOCOL]	= { .type = NLA_U8 },
+	[IPSET_ATTR_SETNAME]	= { .type = NLA_NUL_STRING,
+				    .len = IPSET_MAXNAMELEN - 1 },
+	[IPSET_ATTR_TYPENAME]	= { .type = NLA_NUL_STRING,
+				    .len = IPSET_MAXNAMELEN - 1},
+	[IPSET_ATTR_REVISION]	= { .type = NLA_U8 },
+	[IPSET_ATTR_FAMILY]	= { .type = NLA_U8 },
+	[IPSET_ATTR_DATA]	= { .type = NLA_NESTED },
+};
+
+static ip_set_id_t
+find_set_id(const char *name)
+{
+	ip_set_id_t i, index = IPSET_INVALID_ID;
+	const struct ip_set *set;
+
+	for (i = 0; index == IPSET_INVALID_ID && i < ip_set_max; i++) {
+		set = ip_set_list[i];
+		if (set != NULL && STREQ(set->name, name))
+			index = i;
+	}
+	return index;
+}
+
+static inline struct ip_set *
+find_set(const char *name)
+{
+	ip_set_id_t index = find_set_id(name);
+
+	return index == IPSET_INVALID_ID ? NULL : ip_set_list[index];
+}
+
+static int
+find_free_id(const char *name, ip_set_id_t *index, struct ip_set **set)
+{
+	ip_set_id_t i;
+
+	*index = IPSET_INVALID_ID;
+	for (i = 0;  i < ip_set_max; i++) {
+		if (ip_set_list[i] == NULL) {
+			if (*index == IPSET_INVALID_ID)
+				*index = i;
+		} else if (STREQ(name, ip_set_list[i]->name)) {
+			/* Name clash */
+			*set = ip_set_list[i];
+			return -EEXIST;
+		}
+	}
+	if (*index == IPSET_INVALID_ID)
+		/* No free slot remained */
+		return -IPSET_ERR_MAX_SETS;
+	return 0;
+}
+
+static int
+ip_set_create(struct sock *ctnl, struct sk_buff *skb,
+	      const struct nlmsghdr *nlh,
+	      const struct nlattr * const attr[])
+{
+	struct ip_set *set, *clash;
+	ip_set_id_t index = IPSET_INVALID_ID;
+	struct nlattr *tb[IPSET_ATTR_CREATE_MAX+1] = {};
+	const char *name, *typename;
+	u8 family, revision;
+	u32 flags = flag_exist(nlh);
+	int ret = 0;
+
+	if (unlikely(protocol_failed(attr) ||
+		     attr[IPSET_ATTR_SETNAME] == NULL ||
+		     attr[IPSET_ATTR_TYPENAME] == NULL ||
+		     attr[IPSET_ATTR_REVISION] == NULL ||
+		     attr[IPSET_ATTR_FAMILY] == NULL ||
+		     (attr[IPSET_ATTR_DATA] != NULL &&
+		      !flag_nested(attr[IPSET_ATTR_DATA]))))
+		return -IPSET_ERR_PROTOCOL;
+
+	name = nla_data(attr[IPSET_ATTR_SETNAME]);
+	typename = nla_data(attr[IPSET_ATTR_TYPENAME]);
+	family = nla_get_u8(attr[IPSET_ATTR_FAMILY]);
+	revision = nla_get_u8(attr[IPSET_ATTR_REVISION]);
+	pr_debug("setname: %s, typename: %s, family: %s, revision: %u\n",
+		 name, typename, family_name(family), revision);
+
+	/*
+	 * First, and without any locks, allocate and initialize
+	 * a normal base set structure.
+	 */
+	set = kzalloc(sizeof(struct ip_set), GFP_KERNEL);
+	if (!set)
+		return -ENOMEM;
+	rwlock_init(&set->lock);
+	strlcpy(set->name, name, IPSET_MAXNAMELEN);
+	atomic_set(&set->ref, 0);
+	set->family = family;
+
+	/*
+	 * Next, check that we know the type, and take
+	 * a reference on the type, to make sure it stays available
+	 * while constructing our new set.
+	 *
+	 * After referencing the type, we try to create the type
+	 * specific part of the set without holding any locks.
+	 */
+	ret = find_set_type_get(typename, family, revision, &(set->type));
+	if (ret)
+		goto out;
+
+	/*
+	 * Without holding any locks, create private part.
+	 */
+	if (attr[IPSET_ATTR_DATA] &&
+	    nla_parse_nested(tb, IPSET_ATTR_CREATE_MAX, attr[IPSET_ATTR_DATA],
+			     set->type->create_policy)) {
+	    	ret = -IPSET_ERR_PROTOCOL;
+	    	goto put_out;
+	}
+
+	ret = set->type->create(set, tb, flags);
+	if (ret != 0)
+		goto put_out;
+
+	/* BTW, ret==0 here. */
+
+	/*
+	 * Here, we have a valid, constructed set and we are protected
+	 * by nfnl_lock. Find the first free index in ip_set_list and
+	 * check clashing.
+	 */
+	if ((ret = find_free_id(set->name, &index, &clash)) != 0) {
+		/* If this is the same set and requested, ignore error */
+		if (ret == -EEXIST &&
+		    (flags & IPSET_FLAG_EXIST) &&
+		    STREQ(set->type->name, clash->type->name) &&
+		    set->type->family == clash->type->family &&
+		    set->type->revision == clash->type->revision &&
+		    set->variant->same_set(set, clash))
+			ret = 0;
+		goto cleanup;
+	}
+
+	/*
+	 * Finally! Add our shiny new set to the list, and be done.
+	 */
+	pr_debug("create: '%s' created with index %u!\n", set->name, index);
+	ip_set_list[index] = set;
+
+	return ret;
+
+cleanup:
+	set->variant->destroy(set);
+put_out:
+	module_put(set->type->me);
+out:
+	kfree(set);
+	return ret;
+}
+
+/* Destroy sets */
+
+static const struct nla_policy
+ip_set_setname_policy[IPSET_ATTR_CMD_MAX + 1] = {
+	[IPSET_ATTR_PROTOCOL]	= { .type = NLA_U8 },
+	[IPSET_ATTR_SETNAME]	= { .type = NLA_NUL_STRING,
+				    .len = IPSET_MAXNAMELEN - 1 },
+};
+
+static void
+ip_set_destroy_set(ip_set_id_t index)
+{
+	struct ip_set *set = ip_set_list[index];
+
+	pr_debug("set: %s\n",  set->name);
+	ip_set_list[index] = NULL;
+
+	/* Must call it without holding any lock */
+	set->variant->destroy(set);
+	module_put(set->type->me);
+	kfree(set);
+}
+
+static int
+ip_set_destroy(struct sock *ctnl, struct sk_buff *skb,
+	       const struct nlmsghdr *nlh,
+	       const struct nlattr * const attr[])
+{
+	ip_set_id_t i;
+
+	if (unlikely(protocol_failed(attr)))
+		return -IPSET_ERR_PROTOCOL;
+
+	/* References are protected by the nfnl mutex */
+	if (!attr[IPSET_ATTR_SETNAME]) {
+		for (i = 0; i < ip_set_max; i++) {
+			if (ip_set_list[i] != NULL &&
+			    (atomic_read(&ip_set_list[i]->ref)))
+				return -IPSET_ERR_BUSY;
+		}
+		for (i = 0; i < ip_set_max; i++) {
+			if (ip_set_list[i] != NULL)
+				ip_set_destroy_set(i);
+		}
+	} else {
+		i = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME]));
+		if (i == IPSET_INVALID_ID)
+			return -ENOENT;
+		else if (atomic_read(&ip_set_list[i]->ref))
+			return -IPSET_ERR_BUSY;
+
+		ip_set_destroy_set(i);
+	}
+	return 0;
+}
+
+/* Flush sets */
+
+static void
+ip_set_flush_set(struct ip_set *set)
+{
+	pr_debug("set: %s\n",  set->name);
+
+	write_lock_bh(&set->lock);
+	set->variant->flush(set);
+	write_unlock_bh(&set->lock);
+}
+
+static int
+ip_set_flush(struct sock *ctnl, struct sk_buff *skb,
+	     const struct nlmsghdr *nlh,
+	     const struct nlattr * const attr[])
+{
+	ip_set_id_t i;
+
+	if (unlikely(protocol_failed(attr)))
+		return -EPROTO;
+
+	if (!attr[IPSET_ATTR_SETNAME]) {
+		for (i = 0; i < ip_set_max; i++)
+			if (ip_set_list[i] != NULL)
+				ip_set_flush_set(ip_set_list[i]);
+	} else {
+		i = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME]));
+		if (i == IPSET_INVALID_ID)
+			return -ENOENT;
+
+		ip_set_flush_set(ip_set_list[i]);
+	}
+
+	return 0;
+}
+
+/* Rename a set */
+
+static const struct nla_policy
+ip_set_setname2_policy[IPSET_ATTR_CMD_MAX + 1] = {
+	[IPSET_ATTR_PROTOCOL]	= { .type = NLA_U8 },
+	[IPSET_ATTR_SETNAME]	= { .type = NLA_NUL_STRING,
+				    .len = IPSET_MAXNAMELEN - 1 },
+	[IPSET_ATTR_SETNAME2]	= { .type = NLA_NUL_STRING,
+				    .len = IPSET_MAXNAMELEN - 1 },
+};
+
+static int
+ip_set_rename(struct sock *ctnl, struct sk_buff *skb,
+	      const struct nlmsghdr *nlh,
+	      const struct nlattr * const attr[])
+{
+	struct ip_set *set;
+	const char *name2;
+	ip_set_id_t i;
+
+	if (unlikely(protocol_failed(attr) ||
+		     attr[IPSET_ATTR_SETNAME] == NULL ||
+		     attr[IPSET_ATTR_SETNAME2] == NULL))
+		return -IPSET_ERR_PROTOCOL;
+
+	set = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
+	if (set == NULL)
+		return -ENOENT;
+	if (atomic_read(&set->ref) != 0)
+		return -IPSET_ERR_REFERENCED;
+
+	name2 = nla_data(attr[IPSET_ATTR_SETNAME2]);
+	for (i = 0; i < ip_set_max; i++) {
+		if (ip_set_list[i] != NULL &&
+		    STREQ(ip_set_list[i]->name, name2))
+			return -IPSET_ERR_EXIST_SETNAME2;
+	}
+	strncpy(set->name, name2, IPSET_MAXNAMELEN);
+
+	return 0;
+}
+
+/* Swap two sets so that name/index points to the other.
+ * References and set names are also swapped.
+ *
+ * We are protected by the nfnl mutex and references are
+ * manipulated only by holding the mutex. The kernel interfaces
+ * do not hold the mutex but the pointer settings are atomic
+ * so the ip_set_list always contains valid pointers to the sets.
+ */
+
+static int
+ip_set_swap(struct sock *ctnl, struct sk_buff *skb,
+	    const struct nlmsghdr *nlh,
+	    const struct nlattr * const attr[])
+{
+	struct ip_set *from, *to;
+	ip_set_id_t from_id, to_id;
+	char from_name[IPSET_MAXNAMELEN];
+	u32 from_ref;
+
+	if (unlikely(protocol_failed(attr) ||
+		     attr[IPSET_ATTR_SETNAME] == NULL ||
+		     attr[IPSET_ATTR_SETNAME2] == NULL))
+		return -IPSET_ERR_PROTOCOL;
+
+	from_id = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME]));
+	if (from_id == IPSET_INVALID_ID)
+		return -ENOENT;
+
+	to_id = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME2]));
+	if (to_id == IPSET_INVALID_ID)
+		return -IPSET_ERR_EXIST_SETNAME2;
+
+	from = ip_set_list[from_id];
+	to = ip_set_list[to_id];
+
+	/* Features must not change.
+	 * Not an artifical restriction anymore, as we must prevent
+	 * possible loops created by swapping in setlist type of sets. */
+	if (!(from->type->features == to->type->features &&
+	      from->type->family == to->type->family))
+		return -IPSET_ERR_TYPE_MISMATCH;
+
+	/* No magic here: ref munging protected by the nfnl_lock */
+	strncpy(from_name, from->name, IPSET_MAXNAMELEN);
+	from_ref = atomic_read(&from->ref);
+
+	strncpy(from->name, to->name, IPSET_MAXNAMELEN);
+	atomic_set(&from->ref, atomic_read(&to->ref));
+	strncpy(to->name, from_name, IPSET_MAXNAMELEN);
+	atomic_set(&to->ref, from_ref);
+
+	ip_set_list[from_id] = to;
+	ip_set_list[to_id] = from;
+
+	return 0;
+}
+
+/* List/save set data */
+
+#define DUMP_INIT	0L
+#define DUMP_ALL	1L
+#define DUMP_ONE	2L
+#define DUMP_LAST	3L
+
+static int
+ip_set_dump_done(struct netlink_callback *cb)
+{
+	if (cb->args[2]) {
+		pr_debug("release set %s\n", ip_set_list[cb->args[1]]->name);
+		__ip_set_put((ip_set_id_t) cb->args[1]);
+	}
+	return 0;
+}
+
+static inline void
+dump_attrs(struct nlmsghdr *nlh)
+{
+	const struct nlattr *attr;
+	int rem;
+
+	pr_debug("dump nlmsg\n");
+	nlmsg_for_each_attr(attr, nlh, sizeof(struct nfgenmsg), rem) {
+		pr_debug("type: %u, len %u\n", nla_type(attr), attr->nla_len);
+	}
+}
+
+static int
+dump_init(struct netlink_callback *cb)
+{
+	struct nlmsghdr *nlh = nlmsg_hdr(cb->skb);
+	int min_len = NLMSG_SPACE(sizeof(struct nfgenmsg));
+	struct nlattr *cda[IPSET_ATTR_CMD_MAX+1];
+	struct nlattr *attr = (void *)nlh + min_len;
+	ip_set_id_t index;
+
+	/* Second pass, so parser can't fail */
+	nla_parse(cda, IPSET_ATTR_CMD_MAX,
+		  attr, nlh->nlmsg_len - min_len, ip_set_setname_policy);
+
+	/* cb->args[0] : dump single set/all sets
+	 *         [1] : set index
+	 *         [..]: type specific
+	 */
+
+	if (!cda[IPSET_ATTR_SETNAME]) {
+		cb->args[0] = DUMP_ALL;
+		return 0;
+	}
+
+	index = find_set_id(nla_data(cda[IPSET_ATTR_SETNAME]));
+	if (index == IPSET_INVALID_ID)
+		return -ENOENT;
+
+	cb->args[0] = DUMP_ONE;
+	cb->args[1] = index;
+	return 0;
+}
+
+static int
+ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	ip_set_id_t index = IPSET_INVALID_ID, max;
+	struct ip_set *set = NULL;
+	struct nlmsghdr *nlh = NULL;
+	unsigned int flags = NETLINK_CB(cb->skb).pid ? NLM_F_MULTI : 0;
+	int ret = 0;
+
+	if (cb->args[0] == DUMP_INIT) {
+		ret = dump_init(cb);
+		if (ret < 0) {
+			nlh = nlmsg_hdr(cb->skb);
+			/* We have to create and send the error message
+			 * manually :-( */
+			if (nlh->nlmsg_flags & NLM_F_ACK)
+				netlink_ack(cb->skb, nlh, ret);
+			return ret;
+		}
+	}
+
+	if (cb->args[1] >= ip_set_max)
+		goto out;
+
+	pr_debug("args[0]: %ld args[1]: %ld\n", cb->args[0], cb->args[1]);
+	max = cb->args[0] == DUMP_ONE ? cb->args[1] + 1 : ip_set_max;
+	for (; cb->args[1] < max; cb->args[1]++) {
+		index = (ip_set_id_t) cb->args[1];
+		set = ip_set_list[index];
+		if (set == NULL) {
+			if (cb->args[0] == DUMP_ONE) {
+				ret = -ENOENT;
+				goto out;
+			}
+			continue;
+		}
+		/* When dumping all sets, we must dump "sorted"
+		 * so that lists (unions of sets) are dumped last.
+		 */
+		if (cb->args[0] != DUMP_ONE &&
+		    !((cb->args[0] == DUMP_ALL) ^
+		      (set->type->features & IPSET_DUMP_LAST)))
+			continue;
+		pr_debug("List set: %s\n", set->name);
+		if (!cb->args[2]) {
+			/* Start listing: make sure set won't be destroyed */
+			pr_debug("reference set\n");
+			__ip_set_get(index);
+		}
+		nlh = start_msg(skb, NETLINK_CB(cb->skb).pid,
+				cb->nlh->nlmsg_seq, flags,
+				IPSET_CMD_LIST);
+		if (!nlh) {
+			ret = -EMSGSIZE;
+			goto release_refcount;
+		}
+		NLA_PUT_U8(skb, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL);
+		NLA_PUT_STRING(skb, IPSET_ATTR_SETNAME, set->name);
+		switch (cb->args[2]) {
+		case 0:
+			/* Core header data */
+			NLA_PUT_STRING(skb, IPSET_ATTR_TYPENAME,
+				       set->type->name);
+			NLA_PUT_U8(skb, IPSET_ATTR_FAMILY,
+				   set->family);
+			NLA_PUT_U8(skb, IPSET_ATTR_REVISION,
+				   set->type->revision);
+			ret = set->variant->head(set, skb);
+			if (ret < 0)
+				goto release_refcount;
+			/* Fall through and add elements */
+		default:
+			read_lock_bh(&set->lock);
+			ret = set->variant->list(set, skb, cb);
+			read_unlock_bh(&set->lock);
+			if (!cb->args[2]) {
+				/* Set is done, proceed with next one */
+				if (cb->args[0] == DUMP_ONE)
+					cb->args[1] = IPSET_INVALID_ID;
+				else
+					cb->args[1]++;
+			}
+			goto release_refcount;
+		}
+	}
+	goto out;
+
+nla_put_failure:
+	ret = -EFAULT;
+release_refcount:
+	/* If there was an error or set is done, release set */
+	if (ret || !cb->args[2]) {
+		pr_debug("release set %s\n", ip_set_list[index]->name);
+		__ip_set_put(index);
+	}
+
+	/* If we dump all sets, continue with dumping last ones */
+	if (cb->args[0] == DUMP_ALL && cb->args[1] >= max && !cb->args[2])
+		cb->args[0] = DUMP_LAST;
+
+out:
+	if (nlh) {
+		nlmsg_end(skb, nlh);
+		pr_debug("nlmsg_len: %u\n", nlh->nlmsg_len);
+		dump_attrs(nlh);
+	}
+
+	return ret < 0 ? ret : skb->len;
+}
+
+static int
+ip_set_dump(struct sock *ctnl, struct sk_buff *skb,
+	    const struct nlmsghdr *nlh,
+	    const struct nlattr * const attr[])
+{
+	if (unlikely(protocol_failed(attr)))
+		return -IPSET_ERR_PROTOCOL;
+
+	return netlink_dump_start(ctnl, skb, nlh,
+				  ip_set_dump_start,
+				  ip_set_dump_done);
+}
+
+/* Add, del and test */
+
+static const struct nla_policy ip_set_adt_policy[IPSET_ATTR_CMD_MAX + 1] = {
+	[IPSET_ATTR_PROTOCOL]	= { .type = NLA_U8 },
+	[IPSET_ATTR_SETNAME]	= { .type = NLA_NUL_STRING,
+				    .len = IPSET_MAXNAMELEN - 1 },
+	[IPSET_ATTR_LINENO]	= { .type = NLA_U32 },
+	[IPSET_ATTR_DATA]	= { .type = NLA_NESTED },
+	[IPSET_ATTR_ADT]	= { .type = NLA_NESTED },
+};
+
+static int
+call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set,
+	struct nlattr *tb[], enum ipset_adt adt,
+	u32 flags, bool use_lineno)
+{
+	int ret, retried = 0;
+	u32 lineno = 0;
+	bool eexist = flags & IPSET_FLAG_EXIST;
+
+	do {
+		write_lock_bh(&set->lock);
+		ret = set->variant->uadt(set, tb, adt, &lineno, flags);
+		write_unlock_bh(&set->lock);
+	} while (ret == -EAGAIN &&
+		 set->variant->resize &&
+		 (ret = set->variant->resize(set, retried++)) == 0);
+
+	if (!ret || (ret == -IPSET_ERR_EXIST && eexist))
+		return 0;
+	if (lineno && use_lineno) {
+		/* Error in restore/batch mode: send back lineno */
+		struct nlmsghdr *rep, *nlh = nlmsg_hdr(skb);
+		struct sk_buff *skb2;
+		struct nlmsgerr *errmsg;
+		size_t payload = sizeof(*errmsg) + nlmsg_len(nlh);
+		int min_len = NLMSG_SPACE(sizeof(struct nfgenmsg));
+		struct nlattr *cda[IPSET_ATTR_CMD_MAX+1];
+		struct nlattr *cmdattr;
+		u32 *errline;
+
+		skb2 = nlmsg_new(payload, GFP_KERNEL);
+		if (skb2 == NULL)
+			return -ENOMEM;
+		rep = __nlmsg_put(skb2, NETLINK_CB(skb).pid,
+				  nlh->nlmsg_seq, NLMSG_ERROR, payload, 0);
+		errmsg = nlmsg_data(rep);
+		errmsg->error = ret;
+		memcpy(&errmsg->msg, nlh, nlh->nlmsg_len);
+		cmdattr = (void *)&errmsg->msg + min_len;
+
+		nla_parse(cda, IPSET_ATTR_CMD_MAX,
+			  cmdattr, nlh->nlmsg_len - min_len,
+			  ip_set_adt_policy);
+
+		errline = nla_data(cda[IPSET_ATTR_LINENO]);
+
+		*errline = lineno;
+
+		netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
+		/* Signal netlink not to send its ACK/errmsg.  */
+		return -EINTR;
+	}
+
+	return ret;
+}
+
+static int
+ip_set_uadd(struct sock *ctnl, struct sk_buff *skb,
+	    const struct nlmsghdr *nlh,
+	    const struct nlattr * const attr[])
+{
+	struct ip_set *set;
+	struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {};
+	const struct nlattr *nla;
+	u32 flags = flag_exist(nlh);
+	bool use_lineno;
+	int ret = 0;
+
+	if (unlikely(protocol_failed(attr) ||
+		     attr[IPSET_ATTR_SETNAME] == NULL ||
+		     !((attr[IPSET_ATTR_DATA] != NULL) ^
+		       (attr[IPSET_ATTR_ADT] != NULL)) ||
+		     (attr[IPSET_ATTR_DATA] != NULL &&
+		      !flag_nested(attr[IPSET_ATTR_DATA])) ||
+		     (attr[IPSET_ATTR_ADT] != NULL &&
+		      (!flag_nested(attr[IPSET_ATTR_ADT]) ||
+		       attr[IPSET_ATTR_LINENO] == NULL))))
+		return -IPSET_ERR_PROTOCOL;
+
+	set = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
+	if (set == NULL)
+		return -ENOENT;
+
+	use_lineno = !!attr[IPSET_ATTR_LINENO];
+	if (attr[IPSET_ATTR_DATA]) {
+		if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX,
+				     attr[IPSET_ATTR_DATA],
+				     set->type->adt_policy))
+			return -IPSET_ERR_PROTOCOL;
+		ret = call_ad(ctnl, skb, set, tb, IPSET_ADD, flags,
+			      use_lineno);
+	} else {
+		int nla_rem;
+
+		nla_for_each_nested(nla, attr[IPSET_ATTR_ADT], nla_rem) {
+			memset(tb, 0, sizeof(tb));
+			if (nla_type(nla) != IPSET_ATTR_DATA ||
+			    !flag_nested(nla) ||
+			    nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, nla,
+					     set->type->adt_policy))
+				return -IPSET_ERR_PROTOCOL;
+			ret = call_ad(ctnl, skb, set, tb, IPSET_ADD,
+				      flags, use_lineno);
+			if (ret < 0)
+				return ret;
+		}
+	}
+	return ret;
+}
+
+static int
+ip_set_udel(struct sock *ctnl, struct sk_buff *skb,
+	    const struct nlmsghdr *nlh,
+	    const struct nlattr * const attr[])
+{
+	struct ip_set *set;
+	struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {};
+	const struct nlattr *nla;
+	u32 flags = flag_exist(nlh);
+	bool use_lineno;
+	int ret = 0;
+
+	if (unlikely(protocol_failed(attr) ||
+		     attr[IPSET_ATTR_SETNAME] == NULL ||
+		     !((attr[IPSET_ATTR_DATA] != NULL) ^
+		       (attr[IPSET_ATTR_ADT] != NULL)) ||
+		     (attr[IPSET_ATTR_DATA] != NULL &&
+		      !flag_nested(attr[IPSET_ATTR_DATA])) ||
+		     (attr[IPSET_ATTR_ADT] != NULL &&
+		      (!flag_nested(attr[IPSET_ATTR_ADT]) ||
+		       attr[IPSET_ATTR_LINENO] == NULL))))
+		return -IPSET_ERR_PROTOCOL;
+
+	set = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
+	if (set == NULL)
+		return -ENOENT;
+
+	use_lineno = !!attr[IPSET_ATTR_LINENO];
+	if (attr[IPSET_ATTR_DATA]) {
+		if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX,
+				     attr[IPSET_ATTR_DATA],
+				     set->type->adt_policy))
+			return -IPSET_ERR_PROTOCOL;
+		ret = call_ad(ctnl, skb, set, tb, IPSET_DEL, flags,
+			      use_lineno);
+	} else {
+		int nla_rem;
+
+		nla_for_each_nested(nla, attr[IPSET_ATTR_ADT], nla_rem) {
+			memset(tb, 0, sizeof(*tb));
+			if (nla_type(nla) != IPSET_ATTR_DATA ||
+			    !flag_nested(nla) ||
+			    nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, nla,
+					     set->type->adt_policy))
+				return -IPSET_ERR_PROTOCOL;
+			ret = call_ad(ctnl, skb, set, tb, IPSET_DEL,
+				      flags, use_lineno);
+			if (ret < 0)
+				return ret;
+		}
+	}
+	return ret;
+}
+
+static int
+ip_set_utest(struct sock *ctnl, struct sk_buff *skb,
+	     const struct nlmsghdr *nlh,
+	     const struct nlattr * const attr[])
+{
+	struct ip_set *set;
+	struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {};
+	int ret = 0;
+
+	if (unlikely(protocol_failed(attr) ||
+		     attr[IPSET_ATTR_SETNAME] == NULL ||
+		     attr[IPSET_ATTR_DATA] == NULL ||
+		     !flag_nested(attr[IPSET_ATTR_DATA])))
+		return -IPSET_ERR_PROTOCOL;
+
+	set = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
+	if (set == NULL)
+		return -ENOENT;
+
+	if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, attr[IPSET_ATTR_DATA],
+			     set->type->adt_policy))
+		return -IPSET_ERR_PROTOCOL;
+
+	read_lock_bh(&set->lock);
+	ret = set->variant->uadt(set, tb, IPSET_TEST, NULL, 0);
+	read_unlock_bh(&set->lock);
+	/* Userspace can't trigger element to be re-added */
+	if (ret == -EAGAIN)
+		ret = 1;
+
+	return ret < 0 ? ret : ret > 0 ? 0 : -IPSET_ERR_EXIST;
+}
+
+/* Get headed data of a set */
+
+static int
+ip_set_header(struct sock *ctnl, struct sk_buff *skb,
+	      const struct nlmsghdr *nlh,
+	      const struct nlattr * const attr[])
+{
+	const struct ip_set *set;
+	struct sk_buff *skb2;
+	struct nlmsghdr *nlh2;
+	ip_set_id_t index;
+	int ret = 0;
+
+	if (unlikely(protocol_failed(attr) ||
+		     attr[IPSET_ATTR_SETNAME] == NULL))
+		return -IPSET_ERR_PROTOCOL;
+
+	index = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME]));
+	if (index == IPSET_INVALID_ID)
+		return -ENOENT;
+	set = ip_set_list[index];
+
+	skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (skb2 == NULL)
+		return -ENOMEM;
+
+	nlh2 = start_msg(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, 0,
+			 IPSET_CMD_HEADER);
+	if (!nlh2)
+		goto nlmsg_failure;
+	NLA_PUT_U8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL);
+	NLA_PUT_STRING(skb2, IPSET_ATTR_SETNAME, set->name);
+	NLA_PUT_STRING(skb2, IPSET_ATTR_TYPENAME, set->type->name);
+	NLA_PUT_U8(skb2, IPSET_ATTR_FAMILY, set->family);
+	NLA_PUT_U8(skb2, IPSET_ATTR_REVISION, set->type->revision);
+	nlmsg_end(skb2, nlh2);
+
+	ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+
+nla_put_failure:
+	nlmsg_cancel(skb2, nlh2);
+nlmsg_failure:
+	kfree_skb(skb2);
+	return -EMSGSIZE;
+}
+
+/* Get type data */
+
+static const struct nla_policy ip_set_type_policy[IPSET_ATTR_CMD_MAX + 1] = {
+	[IPSET_ATTR_PROTOCOL]	= { .type = NLA_U8 },
+	[IPSET_ATTR_TYPENAME]	= { .type = NLA_NUL_STRING,
+				    .len = IPSET_MAXNAMELEN - 1 },
+	[IPSET_ATTR_FAMILY]	= { .type = NLA_U8 },
+};
+
+static int
+ip_set_type(struct sock *ctnl, struct sk_buff *skb,
+	    const struct nlmsghdr *nlh,
+	    const struct nlattr * const attr[])
+{
+	struct sk_buff *skb2;
+	struct nlmsghdr *nlh2;
+	u8 family, min, max;
+	const char *typename;
+	int ret = 0;
+
+	if (unlikely(protocol_failed(attr) ||
+		     attr[IPSET_ATTR_TYPENAME] == NULL ||
+		     attr[IPSET_ATTR_FAMILY] == NULL))
+		return -IPSET_ERR_PROTOCOL;
+
+	family = nla_get_u8(attr[IPSET_ATTR_FAMILY]);
+	typename = nla_data(attr[IPSET_ATTR_TYPENAME]);
+	ret = find_set_type_minmax(typename, family, &min, &max);
+	if (ret)
+		return ret;
+
+	skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (skb2 == NULL)
+		return -ENOMEM;
+
+	nlh2 = start_msg(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, 0,
+			 IPSET_CMD_TYPE);
+	if (!nlh2)
+		goto nlmsg_failure;
+	NLA_PUT_U8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL);
+	NLA_PUT_STRING(skb2, IPSET_ATTR_TYPENAME, typename);
+	NLA_PUT_U8(skb2, IPSET_ATTR_FAMILY, family);
+	NLA_PUT_U8(skb2, IPSET_ATTR_REVISION, max);
+	NLA_PUT_U8(skb2, IPSET_ATTR_REVISION_MIN, min);
+	nlmsg_end(skb2, nlh2);
+
+	pr_debug("Send TYPE, nlmsg_len: %u\n", nlh2->nlmsg_len);
+	ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+
+nla_put_failure:
+	nlmsg_cancel(skb2, nlh2);
+nlmsg_failure:
+	kfree_skb(skb2);
+	return -EMSGSIZE;
+}
+
+/* Get protocol version */
+
+static const struct nla_policy
+ip_set_protocol_policy[IPSET_ATTR_CMD_MAX + 1] = {
+	[IPSET_ATTR_PROTOCOL]	= { .type = NLA_U8 },
+};
+
+static int
+ip_set_protocol(struct sock *ctnl, struct sk_buff *skb,
+		const struct nlmsghdr *nlh,
+		const struct nlattr * const attr[])
+{
+	struct sk_buff *skb2;
+	struct nlmsghdr *nlh2;
+	int ret = 0;
+
+	if (unlikely(attr[IPSET_ATTR_PROTOCOL] == NULL))
+		return -IPSET_ERR_PROTOCOL;
+
+	skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (skb2 == NULL)
+		return -ENOMEM;
+
+	nlh2 = start_msg(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, 0,
+			 IPSET_CMD_PROTOCOL);
+	if (!nlh2)
+		goto nlmsg_failure;
+	NLA_PUT_U8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL);
+	nlmsg_end(skb2, nlh2);
+
+	ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+
+nla_put_failure:
+	nlmsg_cancel(skb2, nlh2);
+nlmsg_failure:
+	kfree_skb(skb2);
+	return -EMSGSIZE;
+}
+
+static const struct nfnl_callback ip_set_netlink_subsys_cb[IPSET_MSG_MAX] = {
+	[IPSET_CMD_CREATE]	= {
+		.call		= ip_set_create,
+		.attr_count	= IPSET_ATTR_CMD_MAX,
+		.policy		= ip_set_create_policy,
+	},
+	[IPSET_CMD_DESTROY]	= {
+		.call		= ip_set_destroy,
+		.attr_count	= IPSET_ATTR_CMD_MAX,
+		.policy		= ip_set_setname_policy,
+	},
+	[IPSET_CMD_FLUSH]	= {
+		.call		= ip_set_flush,
+		.attr_count	= IPSET_ATTR_CMD_MAX,
+		.policy		= ip_set_setname_policy,
+	},
+	[IPSET_CMD_RENAME]	= {
+		.call		= ip_set_rename,
+		.attr_count	= IPSET_ATTR_CMD_MAX,
+		.policy		= ip_set_setname2_policy,
+	},
+	[IPSET_CMD_SWAP]	= {
+		.call		= ip_set_swap,
+		.attr_count	= IPSET_ATTR_CMD_MAX,
+		.policy		= ip_set_setname2_policy,
+	},
+	[IPSET_CMD_LIST]	= {
+		.call		= ip_set_dump,
+		.attr_count	= IPSET_ATTR_CMD_MAX,
+		.policy		= ip_set_setname_policy,
+	},
+	[IPSET_CMD_SAVE]	= {
+		.call		= ip_set_dump,
+		.attr_count	= IPSET_ATTR_CMD_MAX,
+		.policy		= ip_set_setname_policy,
+	},
+	[IPSET_CMD_ADD]	= {
+		.call		= ip_set_uadd,
+		.attr_count	= IPSET_ATTR_CMD_MAX,
+		.policy		= ip_set_adt_policy,
+	},
+	[IPSET_CMD_DEL]	= {
+		.call		= ip_set_udel,
+		.attr_count	= IPSET_ATTR_CMD_MAX,
+		.policy		= ip_set_adt_policy,
+	},
+	[IPSET_CMD_TEST]	= {
+		.call		= ip_set_utest,
+		.attr_count	= IPSET_ATTR_CMD_MAX,
+		.policy		= ip_set_adt_policy,
+	},
+	[IPSET_CMD_HEADER]	= {
+		.call		= ip_set_header,
+		.attr_count	= IPSET_ATTR_CMD_MAX,
+		.policy		= ip_set_setname_policy,
+	},
+	[IPSET_CMD_TYPE]	= {
+		.call		= ip_set_type,
+		.attr_count	= IPSET_ATTR_CMD_MAX,
+		.policy		= ip_set_type_policy,
+	},
+	[IPSET_CMD_PROTOCOL]	= {
+		.call		= ip_set_protocol,
+		.attr_count	= IPSET_ATTR_CMD_MAX,
+		.policy		= ip_set_protocol_policy,
+	},
+};
+
+static struct nfnetlink_subsystem ip_set_netlink_subsys __read_mostly = {
+	.name		= "ip_set",
+	.subsys_id	= NFNL_SUBSYS_IPSET,
+	.cb_count	= IPSET_MSG_MAX,
+	.cb		= ip_set_netlink_subsys_cb,
+};
+
+/* Interface to iptables/ip6tables */
+
+static int
+ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len)
+{
+	unsigned *op;
+	void *data;
+	int copylen = *len, ret = 0;
+
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+	if (optval != SO_IP_SET)
+		return -EBADF;
+	if (*len < sizeof(unsigned))
+		return -EINVAL;
+
+	data = vmalloc(*len);
+	if (!data)
+		return -ENOMEM;
+	if (copy_from_user(data, user, *len) != 0) {
+		ret = -EFAULT;
+		goto done;
+	}
+	op = (unsigned *) data;
+
+	if (*op < IP_SET_OP_VERSION) {
+		/* Check the version at the beginning of operations */
+		struct ip_set_req_version *req_version = data;
+		if (req_version->version != IPSET_PROTOCOL) {
+			ret = -EPROTO;
+			goto done;
+		}
+	}
+
+	switch (*op) {
+	case IP_SET_OP_VERSION: {
+		struct ip_set_req_version *req_version = data;
+
+		if (*len != sizeof(struct ip_set_req_version)) {
+			ret = -EINVAL;
+			goto done;
+		}
+
+		req_version->version = IPSET_PROTOCOL;
+		ret = copy_to_user(user, req_version,
+				   sizeof(struct ip_set_req_version));
+		goto done;
+	}
+	case IP_SET_OP_GET_BYNAME: {
+		struct ip_set_req_get_set *req_get = data;
+
+		if (*len != sizeof(struct ip_set_req_get_set)) {
+			ret = -EINVAL;
+			goto done;
+		}
+		req_get->set.name[IPSET_MAXNAMELEN - 1] = '\0';
+		nfnl_lock();
+		req_get->set.index = find_set_id(req_get->set.name);
+		nfnl_unlock();
+		goto copy;
+	}
+	case IP_SET_OP_GET_BYINDEX: {
+		struct ip_set_req_get_set *req_get = data;
+
+		if (*len != sizeof(struct ip_set_req_get_set) ||
+		    req_get->set.index >= ip_set_max) {
+			ret = -EINVAL;
+			goto done;
+		}
+		nfnl_lock();
+		strncpy(req_get->set.name,
+			ip_set_list[req_get->set.index]
+				? ip_set_list[req_get->set.index]->name : "",
+			IPSET_MAXNAMELEN);
+		nfnl_unlock();
+		goto copy;
+	}
+	default:
+		ret = -EBADMSG;
+		goto done;
+	}	/* end of switch(op) */
+
+copy:
+	ret = copy_to_user(user, data, copylen);
+
+done:
+	vfree(data);
+	if (ret > 0)
+		ret = 0;
+	return ret;
+}
+
+static struct nf_sockopt_ops so_set __read_mostly = {
+	.pf		= PF_INET,
+	.get_optmin	= SO_IP_SET,
+	.get_optmax	= SO_IP_SET + 1,
+	.get		= &ip_set_sockfn_get,
+	.owner		= THIS_MODULE,
+};
+
+static int __init
+ip_set_init(void)
+{
+	int ret;
+
+	if (max_sets)
+		ip_set_max = max_sets;
+	if (ip_set_max >= IPSET_INVALID_ID)
+		ip_set_max = IPSET_INVALID_ID - 1;
+
+	ip_set_list = kzalloc(sizeof(struct ip_set *) * ip_set_max,
+			      GFP_KERNEL);
+	if (!ip_set_list) {
+		pr_err("ip_set: Unable to create ip_set_list\n");
+		return -ENOMEM;
+	}
+
+	ret = nfnetlink_subsys_register(&ip_set_netlink_subsys);
+	if (ret != 0) {
+		pr_err("ip_set: cannot register with nfnetlink.\n");
+		kfree(ip_set_list);
+		return ret;
+	}
+	ret = nf_register_sockopt(&so_set);
+	if (ret != 0) {
+		pr_err("SO_SET registry failed: %d\n", ret);
+		nfnetlink_subsys_unregister(&ip_set_netlink_subsys);
+		kfree(ip_set_list);
+		return ret;
+	}
+
+	pr_notice("ip_set: protocol %u\n", IPSET_PROTOCOL);
+	return 0;
+}
+
+static void __exit
+ip_set_fini(void)
+{
+	/* There can't be any existing set */
+	nf_unregister_sockopt(&so_set);
+	nfnetlink_subsys_unregister(&ip_set_netlink_subsys);
+	kfree(ip_set_list);
+	pr_debug("these are the famous last words\n");
+}
+
+module_init(ip_set_init);
+module_exit(ip_set_fini);
diff --git a/net/netfilter/ipset/ip_set_getport.c b/net/netfilter/ipset/ip_set_getport.c
new file mode 100644
index 0000000..8d52272
--- /dev/null
+++ b/net/netfilter/ipset/ip_set_getport.c
@@ -0,0 +1,141 @@
+/* Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/* Get Layer-4 data from the packets */
+
+#include <linux/ip.h>
+#include <linux/skbuff.h>
+#include <linux/icmp.h>
+#include <linux/icmpv6.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+
+#include <linux/netfilter/ipset/ip_set_getport.h>
+
+/* We must handle non-linear skbs */
+static bool
+get_port(const struct sk_buff *skb, int protocol, unsigned int protooff,
+	 bool src, __be16 *port, u8 *proto)
+{
+	switch (protocol) {
+	case IPPROTO_TCP: {
+		struct tcphdr _tcph;
+		const struct tcphdr *th;
+
+		th = skb_header_pointer(skb, protooff, sizeof(_tcph), &_tcph);
+		if (th == NULL)
+			/* No choice either */
+			return false;
+
+		*port = src ? th->source : th->dest;
+		break;
+	}
+	case IPPROTO_UDP: {
+		struct udphdr _udph;
+		const struct udphdr *uh;
+
+		uh = skb_header_pointer(skb, protooff, sizeof(_udph), &_udph);
+		if (uh == NULL)
+			/* No choice either */
+			return false;
+
+		*port = src ? uh->source : uh->dest;
+		break;
+	}
+	case IPPROTO_ICMP: {
+		struct icmphdr _ich;
+		const struct icmphdr *ic;
+
+		ic = skb_header_pointer(skb, protooff, sizeof(_ich), &_ich);
+		if (ic == NULL)
+			return false;
+
+		*port = (__force __be16)htons((ic->type << 8) | ic->code);
+		break;
+	}
+	case IPPROTO_ICMPV6: {
+		struct icmp6hdr _ich;
+		const struct icmp6hdr *ic;
+
+		ic = skb_header_pointer(skb, protooff, sizeof(_ich), &_ich);
+		if (ic == NULL)
+			return false;
+
+		*port = (__force __be16)
+			htons((ic->icmp6_type << 8) | ic->icmp6_code);
+		break;
+	}
+	default:
+		break;
+	}
+	*proto = protocol;
+
+	return true;
+}
+
+bool
+ip_set_get_ip4_port(const struct sk_buff *skb, bool src,
+		    __be16 *port, u8 *proto)
+{
+	const struct iphdr *iph = ip_hdr(skb);
+	unsigned int protooff = ip_hdrlen(skb);
+	int protocol = iph->protocol;
+
+	/* See comments at tcp_match in ip_tables.c */
+	if (protocol <= 0 || (ntohs(iph->frag_off) & IP_OFFSET))
+		return false;
+
+	return get_port(skb, protocol, protooff, src, port, proto);
+}
+EXPORT_SYMBOL_GPL(ip_set_get_ip4_port);
+
+#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
+bool
+ip_set_get_ip6_port(const struct sk_buff *skb, bool src,
+		    __be16 *port, u8 *proto)
+{
+	int protoff;
+	u8 nexthdr;
+
+	nexthdr = ipv6_hdr(skb)->nexthdr;
+	protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr);
+	if (protoff < 0)
+		return false;
+
+	return get_port(skb, nexthdr, protoff, src, port, proto);
+}
+EXPORT_SYMBOL_GPL(ip_set_get_ip6_port);
+#endif
+
+bool
+ip_set_get_ip_port(const struct sk_buff *skb, u8 pf, bool src, __be16 *port)
+{
+	bool ret;
+	u8 proto;
+
+	switch (pf) {
+	case AF_INET:
+		ret = ip_set_get_ip4_port(skb, src, port, &proto);
+		break;
+	case AF_INET6:
+		ret = ip_set_get_ip6_port(skb, src, port, &proto);
+		break;
+	default:
+		return false;
+	}
+	if (!ret)
+		return ret;
+	switch (proto) {
+	case IPPROTO_TCP:
+	case IPPROTO_UDP:
+		return true;
+	default:
+		return false;
+	}
+}
+EXPORT_SYMBOL_GPL(ip_set_get_ip_port);
diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c
new file mode 100644
index 0000000..43bcce2
--- /dev/null
+++ b/net/netfilter/ipset/ip_set_hash_ip.c
@@ -0,0 +1,464 @@
+/* Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/* Kernel module implementing an IP set type: the hash:ip type */
+
+#include <linux/jhash.h>
+#include <linux/module.h>
+#include <linux/ip.h>
+#include <linux/skbuff.h>
+#include <linux/errno.h>
+#include <linux/random.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/netlink.h>
+#include <net/tcp.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter/ipset/pfxlen.h>
+#include <linux/netfilter/ipset/ip_set.h>
+#include <linux/netfilter/ipset/ip_set_timeout.h>
+#include <linux/netfilter/ipset/ip_set_hash.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
+MODULE_DESCRIPTION("hash:ip type of IP sets");
+MODULE_ALIAS("ip_set_hash:ip");
+
+/* Type specific function prefix */
+#define TYPE		hash_ip
+
+static bool
+hash_ip_same_set(const struct ip_set *a, const struct ip_set *b);
+
+#define hash_ip4_same_set	hash_ip_same_set
+#define hash_ip6_same_set	hash_ip_same_set
+
+/* The type variant functions: IPv4 */
+
+/* Member elements without timeout */
+struct hash_ip4_elem {
+	__be32 ip;
+};
+
+/* Member elements with timeout support */
+struct hash_ip4_telem {
+	__be32 ip;
+	unsigned long timeout;
+};
+
+static inline bool
+hash_ip4_data_equal(const struct hash_ip4_elem *ip1,
+		    const struct hash_ip4_elem *ip2)
+{
+	return ip1->ip == ip2->ip;
+}
+
+static inline bool
+hash_ip4_data_isnull(const struct hash_ip4_elem *elem)
+{
+	return elem->ip == 0;
+}
+
+static inline void
+hash_ip4_data_copy(struct hash_ip4_elem *dst, const struct hash_ip4_elem *src)
+{
+	dst->ip = src->ip;
+}
+
+/* Zero valued IP addresses cannot be stored */
+static inline void
+hash_ip4_data_zero_out(struct hash_ip4_elem *elem)
+{
+	elem->ip = 0;
+}
+
+static inline bool
+hash_ip4_data_list(struct sk_buff *skb, const struct hash_ip4_elem *data)
+{
+	NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, data->ip);
+	return 0;
+
+nla_put_failure:
+	return 1;
+}
+
+static bool
+hash_ip4_data_tlist(struct sk_buff *skb, const struct hash_ip4_elem *data)
+{
+	const struct hash_ip4_telem *tdata =
+		(const struct hash_ip4_telem *)data;
+
+	NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, tdata->ip);
+	NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT,
+		      htonl(ip_set_timeout_get(tdata->timeout)));
+
+	return 0;
+
+nla_put_failure:
+	return 1;
+}
+
+#define IP_SET_HASH_WITH_NETMASK
+#define PF		4
+#define HOST_MASK	32
+#include <linux/netfilter/ipset/ip_set_ahash.h>
+
+static int
+hash_ip4_kadt(struct ip_set *set, const struct sk_buff *skb,
+	      enum ipset_adt adt, u8 pf, u8 dim, u8 flags)
+{
+	const struct ip_set_hash *h = set->data;
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	__be32 ip;
+
+	ip4addrptr(skb, flags & IPSET_DIM_ONE_SRC, &ip);
+	ip &= ip_set_netmask(h->netmask);
+	if (ip == 0)
+		return -EINVAL;
+
+	return adtfn(set, &ip, h->timeout);
+}
+
+static int
+hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
+	      enum ipset_adt adt, u32 *lineno, u32 flags)
+{
+	const struct ip_set_hash *h = set->data;
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	u32 ip, ip_to, hosts, timeout = h->timeout;
+	__be32 nip;
+	int ret = 0;
+
+	if (unlikely(!tb[IPSET_ATTR_IP] ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
+		return -IPSET_ERR_PROTOCOL;
+
+	if (tb[IPSET_ATTR_LINENO])
+		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
+	ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip);
+	if (ret)
+		return ret;
+
+	ip &= ip_set_hostmask(h->netmask);
+
+	if (tb[IPSET_ATTR_TIMEOUT]) {
+		if (!with_timeout(h->timeout))
+			return -IPSET_ERR_TIMEOUT;
+		timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
+	}
+
+	if (adt == IPSET_TEST) {
+		nip = htonl(ip);
+		if (nip == 0)
+			return -IPSET_ERR_HASH_ELEM;
+		return adtfn(set, &nip, timeout);
+	}
+
+	if (tb[IPSET_ATTR_IP_TO]) {
+		ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to);
+		if (ret)
+			return ret;
+		if (ip > ip_to)
+			swap(ip, ip_to);
+	} else if (tb[IPSET_ATTR_CIDR]) {
+		u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
+
+		if (cidr > 32)
+			return -IPSET_ERR_INVALID_CIDR;
+		ip &= ip_set_hostmask(cidr);
+		ip_to = ip | ~ip_set_hostmask(cidr);
+	} else
+		ip_to = ip;
+
+	hosts = h->netmask == 32 ? 1 : 2 << (32 - h->netmask - 1);
+
+	for (; !before(ip_to, ip); ip += hosts) {
+		nip = htonl(ip);
+		if (nip == 0)
+			return -IPSET_ERR_HASH_ELEM;
+		ret = adtfn(set, &nip, timeout);
+
+		if (ret && !ip_set_eexist(ret, flags))
+			return ret;
+		else
+			ret = 0;
+	}
+	return ret;
+}
+
+static bool
+hash_ip_same_set(const struct ip_set *a, const struct ip_set *b)
+{
+	const struct ip_set_hash *x = a->data;
+	const struct ip_set_hash *y = b->data;
+
+	/* Resizing changes htable_bits, so we ignore it */
+	return x->maxelem == y->maxelem &&
+	       x->timeout == y->timeout &&
+	       x->netmask == y->netmask;
+}
+
+/* The type variant functions: IPv6 */
+
+struct hash_ip6_elem {
+	union nf_inet_addr ip;
+};
+
+struct hash_ip6_telem {
+	union nf_inet_addr ip;
+	unsigned long timeout;
+};
+
+static inline bool
+hash_ip6_data_equal(const struct hash_ip6_elem *ip1,
+		    const struct hash_ip6_elem *ip2)
+{
+	return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0;
+}
+
+static inline bool
+hash_ip6_data_isnull(const struct hash_ip6_elem *elem)
+{
+	return ipv6_addr_any(&elem->ip.in6);
+}
+
+static inline void
+hash_ip6_data_copy(struct hash_ip6_elem *dst, const struct hash_ip6_elem *src)
+{
+	ipv6_addr_copy(&dst->ip.in6, &src->ip.in6);
+}
+
+static inline void
+hash_ip6_data_zero_out(struct hash_ip6_elem *elem)
+{
+	ipv6_addr_set(&elem->ip.in6, 0, 0, 0, 0);
+}
+
+static inline void
+ip6_netmask(union nf_inet_addr *ip, u8 prefix)
+{
+	ip->ip6[0] &= ip_set_netmask6(prefix)[0];
+	ip->ip6[1] &= ip_set_netmask6(prefix)[1];
+	ip->ip6[2] &= ip_set_netmask6(prefix)[2];
+	ip->ip6[3] &= ip_set_netmask6(prefix)[3];
+}
+
+static bool
+hash_ip6_data_list(struct sk_buff *skb, const struct hash_ip6_elem *data)
+{
+	NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP, &data->ip);
+	return 0;
+
+nla_put_failure:
+	return 1;
+}
+
+static bool
+hash_ip6_data_tlist(struct sk_buff *skb, const struct hash_ip6_elem *data)
+{
+	const struct hash_ip6_telem *e =
+		(const struct hash_ip6_telem *)data;
+
+	NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP, &e->ip);
+	NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT,
+		      htonl(ip_set_timeout_get(e->timeout)));
+	return 0;
+
+nla_put_failure:
+	return 1;
+}
+
+#undef PF
+#undef HOST_MASK
+
+#define PF		6
+#define HOST_MASK	128
+#include <linux/netfilter/ipset/ip_set_ahash.h>
+
+static int
+hash_ip6_kadt(struct ip_set *set, const struct sk_buff *skb,
+	      enum ipset_adt adt, u8 pf, u8 dim, u8 flags)
+{
+	const struct ip_set_hash *h = set->data;
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	union nf_inet_addr ip;
+
+	ip6addrptr(skb, flags & IPSET_DIM_ONE_SRC, &ip.in6);
+	ip6_netmask(&ip, h->netmask);
+	if (ipv6_addr_any(&ip.in6))
+		return -EINVAL;
+
+	return adtfn(set, &ip, h->timeout);
+}
+
+static const struct nla_policy hash_ip6_adt_policy[IPSET_ATTR_ADT_MAX + 1] = {
+	[IPSET_ATTR_IP]		= { .type = NLA_NESTED },
+	[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
+	[IPSET_ATTR_LINENO]	= { .type = NLA_U32 },
+};
+
+static int
+hash_ip6_uadt(struct ip_set *set, struct nlattr *tb[],
+	      enum ipset_adt adt, u32 *lineno, u32 flags)
+{
+	const struct ip_set_hash *h = set->data;
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	union nf_inet_addr ip;
+	u32 timeout = h->timeout;
+	int ret;
+
+	if (unlikely(!tb[IPSET_ATTR_IP] ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
+		     tb[IPSET_ATTR_IP_TO] ||
+		     tb[IPSET_ATTR_CIDR]))
+		return -IPSET_ERR_PROTOCOL;
+
+	if (tb[IPSET_ATTR_LINENO])
+		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
+	ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &ip);
+	if (ret)
+		return ret;
+
+	ip6_netmask(&ip, h->netmask);
+	if (ipv6_addr_any(&ip.in6))
+		return -IPSET_ERR_HASH_ELEM;
+
+	if (tb[IPSET_ATTR_TIMEOUT]) {
+		if (!with_timeout(h->timeout))
+			return -IPSET_ERR_TIMEOUT;
+		timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
+	}
+
+	ret = adtfn(set, &ip, timeout);
+
+	return ip_set_eexist(ret, flags) ? 0 : ret;
+}
+
+/* Create hash:ip type of sets */
+
+static int
+hash_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
+{
+	u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM;
+	u8 netmask, hbits;
+	struct ip_set_hash *h;
+
+	if (!(set->family == AF_INET || set->family == AF_INET6))
+		return -IPSET_ERR_INVALID_FAMILY;
+	netmask = set->family == AF_INET ? 32 : 128;
+	pr_debug("Create set %s with family %s\n",
+		 set->name, set->family == AF_INET ? "inet" : "inet6");
+
+	if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
+		return -IPSET_ERR_PROTOCOL;
+
+	if (tb[IPSET_ATTR_HASHSIZE]) {
+		hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]);
+		if (hashsize < IPSET_MIMINAL_HASHSIZE)
+			hashsize = IPSET_MIMINAL_HASHSIZE;
+	}
+
+	if (tb[IPSET_ATTR_MAXELEM])
+		maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]);
+
+	if (tb[IPSET_ATTR_NETMASK]) {
+		netmask = nla_get_u8(tb[IPSET_ATTR_NETMASK]);
+
+		if ((set->family == AF_INET && netmask > 32) ||
+		    (set->family == AF_INET6 && netmask > 128) ||
+		    netmask == 0)
+			return -IPSET_ERR_INVALID_NETMASK;
+	}
+
+	h = kzalloc(sizeof(*h), GFP_KERNEL);
+	if (!h)
+		return -ENOMEM;
+
+	h->maxelem = maxelem;
+	h->netmask = netmask;
+	get_random_bytes(&h->initval, sizeof(h->initval));
+	h->timeout = IPSET_NO_TIMEOUT;
+
+	hbits = htable_bits(hashsize);
+	h->table = ip_set_alloc(
+			sizeof(struct htable)
+			+ jhash_size(hbits) * sizeof(struct hbucket));
+	if (!h->table) {
+		kfree(h);
+		return -ENOMEM;
+	}
+	h->table->htable_bits = hbits;
+
+	set->data = h;
+
+	if (tb[IPSET_ATTR_TIMEOUT]) {
+		h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
+
+		set->variant = set->family == AF_INET
+			? &hash_ip4_tvariant : &hash_ip6_tvariant;
+
+		if (set->family == AF_INET)
+			hash_ip4_gc_init(set);
+		else
+			hash_ip6_gc_init(set);
+	} else {
+		set->variant = set->family == AF_INET
+			? &hash_ip4_variant : &hash_ip6_variant;
+	}
+
+	pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n",
+		 set->name, jhash_size(h->table->htable_bits),
+		 h->table->htable_bits, h->maxelem, set->data, h->table);
+
+	return 0;
+}
+
+static struct ip_set_type hash_ip_type __read_mostly = {
+	.name		= "hash:ip",
+	.protocol	= IPSET_PROTOCOL,
+	.features	= IPSET_TYPE_IP,
+	.dimension	= IPSET_DIM_ONE,
+	.family		= AF_UNSPEC,
+	.revision	= 0,
+	.create		= hash_ip_create,
+	.create_policy	= {
+		[IPSET_ATTR_HASHSIZE]	= { .type = NLA_U32 },
+		[IPSET_ATTR_MAXELEM]	= { .type = NLA_U32 },
+		[IPSET_ATTR_PROBES]	= { .type = NLA_U8 },
+		[IPSET_ATTR_RESIZE]	= { .type = NLA_U8  },
+		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
+		[IPSET_ATTR_NETMASK]	= { .type = NLA_U8  },
+	},
+	.adt_policy	= {
+		[IPSET_ATTR_IP]		= { .type = NLA_NESTED },
+		[IPSET_ATTR_IP_TO]	= { .type = NLA_NESTED },
+		[IPSET_ATTR_CIDR]	= { .type = NLA_U8 },
+		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
+		[IPSET_ATTR_LINENO]	= { .type = NLA_U32 },
+	},
+	.me		= THIS_MODULE,
+};
+
+static int __init
+hash_ip_init(void)
+{
+	return ip_set_type_register(&hash_ip_type);
+}
+
+static void __exit
+hash_ip_fini(void)
+{
+	ip_set_type_unregister(&hash_ip_type);
+}
+
+module_init(hash_ip_init);
+module_exit(hash_ip_fini);
diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c
new file mode 100644
index 0000000..adbe787
--- /dev/null
+++ b/net/netfilter/ipset/ip_set_hash_ipport.c
@@ -0,0 +1,544 @@
+/* Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/* Kernel module implementing an IP set type: the hash:ip,port type */
+
+#include <linux/jhash.h>
+#include <linux/module.h>
+#include <linux/ip.h>
+#include <linux/skbuff.h>
+#include <linux/errno.h>
+#include <linux/random.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/netlink.h>
+#include <net/tcp.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter/ipset/pfxlen.h>
+#include <linux/netfilter/ipset/ip_set.h>
+#include <linux/netfilter/ipset/ip_set_timeout.h>
+#include <linux/netfilter/ipset/ip_set_getport.h>
+#include <linux/netfilter/ipset/ip_set_hash.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
+MODULE_DESCRIPTION("hash:ip,port type of IP sets");
+MODULE_ALIAS("ip_set_hash:ip,port");
+
+/* Type specific function prefix */
+#define TYPE		hash_ipport
+
+static bool
+hash_ipport_same_set(const struct ip_set *a, const struct ip_set *b);
+
+#define hash_ipport4_same_set	hash_ipport_same_set
+#define hash_ipport6_same_set	hash_ipport_same_set
+
+/* The type variant functions: IPv4 */
+
+/* Member elements without timeout */
+struct hash_ipport4_elem {
+	__be32 ip;
+	__be16 port;
+	u8 proto;
+	u8 padding;
+};
+
+/* Member elements with timeout support */
+struct hash_ipport4_telem {
+	__be32 ip;
+	__be16 port;
+	u8 proto;
+	u8 padding;
+	unsigned long timeout;
+};
+
+static inline bool
+hash_ipport4_data_equal(const struct hash_ipport4_elem *ip1,
+			const struct hash_ipport4_elem *ip2)
+{
+	return ip1->ip == ip2->ip &&
+	       ip1->port == ip2->port &&
+	       ip1->proto == ip2->proto;
+}
+
+static inline bool
+hash_ipport4_data_isnull(const struct hash_ipport4_elem *elem)
+{
+	return elem->proto == 0;
+}
+
+static inline void
+hash_ipport4_data_copy(struct hash_ipport4_elem *dst,
+		       const struct hash_ipport4_elem *src)
+{
+	dst->ip = src->ip;
+	dst->port = src->port;
+	dst->proto = src->proto;
+}
+
+static inline void
+hash_ipport4_data_zero_out(struct hash_ipport4_elem *elem)
+{
+	elem->proto = 0;
+}
+
+static bool
+hash_ipport4_data_list(struct sk_buff *skb,
+		       const struct hash_ipport4_elem *data)
+{
+	NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, data->ip);
+	NLA_PUT_NET16(skb, IPSET_ATTR_PORT, data->port);
+	NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto);
+	return 0;
+
+nla_put_failure:
+	return 1;
+}
+
+static bool
+hash_ipport4_data_tlist(struct sk_buff *skb,
+			const struct hash_ipport4_elem *data)
+{
+	const struct hash_ipport4_telem *tdata =
+		(const struct hash_ipport4_telem *)data;
+
+	NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, tdata->ip);
+	NLA_PUT_NET16(skb, IPSET_ATTR_PORT, tdata->port);
+	NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto);
+	NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT,
+		      htonl(ip_set_timeout_get(tdata->timeout)));
+
+	return 0;
+
+nla_put_failure:
+	return 1;
+}
+
+#define PF		4
+#define HOST_MASK	32
+#include <linux/netfilter/ipset/ip_set_ahash.h>
+
+static int
+hash_ipport4_kadt(struct ip_set *set, const struct sk_buff *skb,
+		  enum ipset_adt adt, u8 pf, u8 dim, u8 flags)
+{
+	const struct ip_set_hash *h = set->data;
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct hash_ipport4_elem data = { };
+
+	if (!ip_set_get_ip4_port(skb, flags & IPSET_DIM_TWO_SRC,
+				 &data.port, &data.proto))
+		return -EINVAL;
+
+	ip4addrptr(skb, flags & IPSET_DIM_ONE_SRC, &data.ip);
+
+	return adtfn(set, &data, h->timeout);
+}
+
+static int
+hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
+		  enum ipset_adt adt, u32 *lineno, u32 flags)
+{
+	const struct ip_set_hash *h = set->data;
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct hash_ipport4_elem data = { };
+	u32 ip, ip_to, p, port, port_to;
+	u32 timeout = h->timeout;
+	int ret;
+
+	if (unlikely(!tb[IPSET_ATTR_IP] ||
+		     !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
+		return -IPSET_ERR_PROTOCOL;
+
+	if (tb[IPSET_ATTR_LINENO])
+		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
+	ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &data.ip);
+	if (ret)
+		return ret;
+
+	if (tb[IPSET_ATTR_PORT])
+		data.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
+	else
+		return -IPSET_ERR_PROTOCOL;
+
+	if (tb[IPSET_ATTR_PROTO]) {
+		data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
+
+		if (data.proto == 0)
+			return -IPSET_ERR_INVALID_PROTO;
+	} else
+		return -IPSET_ERR_MISSING_PROTO;
+
+	switch (data.proto) {
+	case IPPROTO_UDP:
+	case IPPROTO_TCP:
+	case IPPROTO_ICMP:
+		break;
+	default:
+		data.port = 0;
+		break;
+	}
+
+	if (tb[IPSET_ATTR_TIMEOUT]) {
+		if (!with_timeout(h->timeout))
+			return -IPSET_ERR_TIMEOUT;
+		timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
+	}
+
+	if (adt == IPSET_TEST ||
+	    !(data.proto == IPPROTO_TCP || data.proto == IPPROTO_UDP) ||
+	    !(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR] ||
+	      tb[IPSET_ATTR_PORT_TO])) {
+		ret = adtfn(set, &data, timeout);
+		return ip_set_eexist(ret, flags) ? 0 : ret;
+	}
+
+	ip = ntohl(data.ip);
+	if (tb[IPSET_ATTR_IP_TO]) {
+		ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to);
+		if (ret)
+			return ret;
+		if (ip > ip_to)
+			swap(ip, ip_to);
+	} else if (tb[IPSET_ATTR_CIDR]) {
+		u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
+
+		if (cidr > 32)
+			return -IPSET_ERR_INVALID_CIDR;
+		ip &= ip_set_hostmask(cidr);
+		ip_to = ip | ~ip_set_hostmask(cidr);
+	} else
+		ip_to = ip;
+
+	port = ntohs(data.port);
+	if (tb[IPSET_ATTR_PORT_TO]) {
+		port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
+		if (port > port_to)
+			swap(port, port_to);
+	} else
+		port_to = port;
+
+	for (; !before(ip_to, ip); ip++)
+		for (p = port; p <= port_to; p++) {
+			data.ip = htonl(ip);
+			data.port = htons(p);
+			ret = adtfn(set, &data, timeout);
+
+			if (ret && !ip_set_eexist(ret, flags))
+				return ret;
+			else
+				ret = 0;
+		}
+	return ret;
+}
+
+static bool
+hash_ipport_same_set(const struct ip_set *a, const struct ip_set *b)
+{
+	const struct ip_set_hash *x = a->data;
+	const struct ip_set_hash *y = b->data;
+
+	/* Resizing changes htable_bits, so we ignore it */
+	return x->maxelem == y->maxelem &&
+	       x->timeout == y->timeout;
+}
+
+/* The type variant functions: IPv6 */
+
+struct hash_ipport6_elem {
+	union nf_inet_addr ip;
+	__be16 port;
+	u8 proto;
+	u8 padding;
+};
+
+struct hash_ipport6_telem {
+	union nf_inet_addr ip;
+	__be16 port;
+	u8 proto;
+	u8 padding;
+	unsigned long timeout;
+};
+
+static inline bool
+hash_ipport6_data_equal(const struct hash_ipport6_elem *ip1,
+			const struct hash_ipport6_elem *ip2)
+{
+	return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0 &&
+	       ip1->port == ip2->port &&
+	       ip1->proto == ip2->proto;
+}
+
+static inline bool
+hash_ipport6_data_isnull(const struct hash_ipport6_elem *elem)
+{
+	return elem->proto == 0;
+}
+
+static inline void
+hash_ipport6_data_copy(struct hash_ipport6_elem *dst,
+		       const struct hash_ipport6_elem *src)
+{
+	memcpy(dst, src, sizeof(*dst));
+}
+
+static inline void
+hash_ipport6_data_zero_out(struct hash_ipport6_elem *elem)
+{
+	elem->proto = 0;
+}
+
+static bool
+hash_ipport6_data_list(struct sk_buff *skb,
+		       const struct hash_ipport6_elem *data)
+{
+	NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP, &data->ip);
+	NLA_PUT_NET16(skb, IPSET_ATTR_PORT, data->port);
+	NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto);
+	return 0;
+
+nla_put_failure:
+	return 1;
+}
+
+static bool
+hash_ipport6_data_tlist(struct sk_buff *skb,
+			const struct hash_ipport6_elem *data)
+{
+	const struct hash_ipport6_telem *e =
+		(const struct hash_ipport6_telem *)data;
+
+	NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP, &e->ip);
+	NLA_PUT_NET16(skb, IPSET_ATTR_PORT, data->port);
+	NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto);
+	NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT,
+		      htonl(ip_set_timeout_get(e->timeout)));
+	return 0;
+
+nla_put_failure:
+	return 1;
+}
+
+#undef PF
+#undef HOST_MASK
+
+#define PF		6
+#define HOST_MASK	128
+#include <linux/netfilter/ipset/ip_set_ahash.h>
+
+static int
+hash_ipport6_kadt(struct ip_set *set, const struct sk_buff *skb,
+		  enum ipset_adt adt, u8 pf, u8 dim, u8 flags)
+{
+	const struct ip_set_hash *h = set->data;
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct hash_ipport6_elem data = { };
+
+	if (!ip_set_get_ip6_port(skb, flags & IPSET_DIM_TWO_SRC,
+				 &data.port, &data.proto))
+		return -EINVAL;
+
+	ip6addrptr(skb, flags & IPSET_DIM_ONE_SRC, &data.ip.in6);
+
+	return adtfn(set, &data, h->timeout);
+}
+
+static int
+hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[],
+		  enum ipset_adt adt, u32 *lineno, u32 flags)
+{
+	const struct ip_set_hash *h = set->data;
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct hash_ipport6_elem data = { };
+	u32 port, port_to;
+	u32 timeout = h->timeout;
+	int ret;
+
+	if (unlikely(!tb[IPSET_ATTR_IP] ||
+		     !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
+		     tb[IPSET_ATTR_IP_TO] ||
+		     tb[IPSET_ATTR_CIDR]))
+		return -IPSET_ERR_PROTOCOL;
+
+	if (tb[IPSET_ATTR_LINENO])
+		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
+	ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &data.ip);
+	if (ret)
+		return ret;
+
+	if (tb[IPSET_ATTR_PORT])
+		data.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
+	else
+		return -IPSET_ERR_PROTOCOL;
+
+	if (tb[IPSET_ATTR_PROTO]) {
+		data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
+
+		if (data.proto == 0)
+			return -IPSET_ERR_INVALID_PROTO;
+	} else
+		return -IPSET_ERR_MISSING_PROTO;
+
+	switch (data.proto) {
+	case IPPROTO_UDP:
+	case IPPROTO_TCP:
+	case IPPROTO_ICMPV6:
+		break;
+	default:
+		data.port = 0;
+		break;
+	}
+
+	if (tb[IPSET_ATTR_TIMEOUT]) {
+		if (!with_timeout(h->timeout))
+			return -IPSET_ERR_TIMEOUT;
+		timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
+	}
+
+	if (adt == IPSET_TEST ||
+	    !(data.proto == IPPROTO_TCP || data.proto == IPPROTO_UDP) ||
+	    !tb[IPSET_ATTR_PORT_TO]) {
+		ret = adtfn(set, &data, timeout);
+		return ip_set_eexist(ret, flags) ? 0 : ret;
+	}
+
+	port = ntohs(data.port);
+	port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
+	if (port > port_to)
+		swap(port, port_to);
+
+	for (; port <= port_to; port++) {
+		data.port = htons(port);
+		ret = adtfn(set, &data, timeout);
+
+		if (ret && !ip_set_eexist(ret, flags))
+			return ret;
+		else
+			ret = 0;
+	}
+	return ret;
+}
+
+/* Create hash:ip type of sets */
+
+static int
+hash_ipport_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
+{
+	struct ip_set_hash *h;
+	u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM;
+	u8 hbits;
+
+	if (!(set->family == AF_INET || set->family == AF_INET6))
+		return -IPSET_ERR_INVALID_FAMILY;
+
+	if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
+		return -IPSET_ERR_PROTOCOL;
+
+	if (tb[IPSET_ATTR_HASHSIZE]) {
+		hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]);
+		if (hashsize < IPSET_MIMINAL_HASHSIZE)
+			hashsize = IPSET_MIMINAL_HASHSIZE;
+	}
+
+	if (tb[IPSET_ATTR_MAXELEM])
+		maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]);
+
+	h = kzalloc(sizeof(*h), GFP_KERNEL);
+	if (!h)
+		return -ENOMEM;
+
+	h->maxelem = maxelem;
+	get_random_bytes(&h->initval, sizeof(h->initval));
+	h->timeout = IPSET_NO_TIMEOUT;
+
+	hbits = htable_bits(hashsize);
+	h->table = ip_set_alloc(
+			sizeof(struct htable)
+			+ jhash_size(hbits) * sizeof(struct hbucket));
+	if (!h->table) {
+		kfree(h);
+		return -ENOMEM;
+	}
+	h->table->htable_bits = hbits;
+
+	set->data = h;
+
+	if (tb[IPSET_ATTR_TIMEOUT]) {
+		h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
+
+		set->variant = set->family == AF_INET
+			? &hash_ipport4_tvariant : &hash_ipport6_tvariant;
+
+		if (set->family == AF_INET)
+			hash_ipport4_gc_init(set);
+		else
+			hash_ipport6_gc_init(set);
+	} else {
+		set->variant = set->family == AF_INET
+			? &hash_ipport4_variant : &hash_ipport6_variant;
+	}
+
+	pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n",
+		 set->name, jhash_size(h->table->htable_bits),
+		 h->table->htable_bits, h->maxelem, set->data, h->table);
+
+	return 0;
+}
+
+static struct ip_set_type hash_ipport_type __read_mostly = {
+	.name		= "hash:ip,port",
+	.protocol	= IPSET_PROTOCOL,
+	.features	= IPSET_TYPE_IP | IPSET_TYPE_PORT,
+	.dimension	= IPSET_DIM_TWO,
+	.family		= AF_UNSPEC,
+	.revision	= 0,
+	.create		= hash_ipport_create,
+	.create_policy	= {
+		[IPSET_ATTR_HASHSIZE]	= { .type = NLA_U32 },
+		[IPSET_ATTR_MAXELEM]	= { .type = NLA_U32 },
+		[IPSET_ATTR_PROBES]	= { .type = NLA_U8 },
+		[IPSET_ATTR_RESIZE]	= { .type = NLA_U8  },
+		[IPSET_ATTR_PROTO]	= { .type = NLA_U8 },
+		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
+	},
+	.adt_policy	= {
+		[IPSET_ATTR_IP]		= { .type = NLA_NESTED },
+		[IPSET_ATTR_IP_TO]	= { .type = NLA_NESTED },
+		[IPSET_ATTR_PORT]	= { .type = NLA_U16 },
+		[IPSET_ATTR_PORT_TO]	= { .type = NLA_U16 },
+		[IPSET_ATTR_CIDR]	= { .type = NLA_U8 },
+		[IPSET_ATTR_PROTO]	= { .type = NLA_U8 },
+		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
+		[IPSET_ATTR_LINENO]	= { .type = NLA_U32 },
+	},
+	.me		= THIS_MODULE,
+};
+
+static int __init
+hash_ipport_init(void)
+{
+	return ip_set_type_register(&hash_ipport_type);
+}
+
+static void __exit
+hash_ipport_fini(void)
+{
+	ip_set_type_unregister(&hash_ipport_type);
+}
+
+module_init(hash_ipport_init);
+module_exit(hash_ipport_fini);
diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c
new file mode 100644
index 0000000..22e23ab
--- /dev/null
+++ b/net/netfilter/ipset/ip_set_hash_ipportip.c
@@ -0,0 +1,562 @@
+/* Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/* Kernel module implementing an IP set type: the hash:ip,port,ip type */
+
+#include <linux/jhash.h>
+#include <linux/module.h>
+#include <linux/ip.h>
+#include <linux/skbuff.h>
+#include <linux/errno.h>
+#include <linux/random.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/netlink.h>
+#include <net/tcp.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter/ipset/pfxlen.h>
+#include <linux/netfilter/ipset/ip_set.h>
+#include <linux/netfilter/ipset/ip_set_timeout.h>
+#include <linux/netfilter/ipset/ip_set_getport.h>
+#include <linux/netfilter/ipset/ip_set_hash.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
+MODULE_DESCRIPTION("hash:ip,port,ip type of IP sets");
+MODULE_ALIAS("ip_set_hash:ip,port,ip");
+
+/* Type specific function prefix */
+#define TYPE		hash_ipportip
+
+static bool
+hash_ipportip_same_set(const struct ip_set *a, const struct ip_set *b);
+
+#define hash_ipportip4_same_set	hash_ipportip_same_set
+#define hash_ipportip6_same_set	hash_ipportip_same_set
+
+/* The type variant functions: IPv4 */
+
+/* Member elements without timeout */
+struct hash_ipportip4_elem {
+	__be32 ip;
+	__be32 ip2;
+	__be16 port;
+	u8 proto;
+	u8 padding;
+};
+
+/* Member elements with timeout support */
+struct hash_ipportip4_telem {
+	__be32 ip;
+	__be32 ip2;
+	__be16 port;
+	u8 proto;
+	u8 padding;
+	unsigned long timeout;
+};
+
+static inline bool
+hash_ipportip4_data_equal(const struct hash_ipportip4_elem *ip1,
+			  const struct hash_ipportip4_elem *ip2)
+{
+	return ip1->ip == ip2->ip &&
+	       ip1->ip2 == ip2->ip2 &&
+	       ip1->port == ip2->port &&
+	       ip1->proto == ip2->proto;
+}
+
+static inline bool
+hash_ipportip4_data_isnull(const struct hash_ipportip4_elem *elem)
+{
+	return elem->proto == 0;
+}
+
+static inline void
+hash_ipportip4_data_copy(struct hash_ipportip4_elem *dst,
+			 const struct hash_ipportip4_elem *src)
+{
+	memcpy(dst, src, sizeof(*dst));
+}
+
+static inline void
+hash_ipportip4_data_zero_out(struct hash_ipportip4_elem *elem)
+{
+	elem->proto = 0;
+}
+
+static bool
+hash_ipportip4_data_list(struct sk_buff *skb,
+		       const struct hash_ipportip4_elem *data)
+{
+	NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, data->ip);
+	NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP2, data->ip2);
+	NLA_PUT_NET16(skb, IPSET_ATTR_PORT, data->port);
+	NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto);
+	return 0;
+
+nla_put_failure:
+	return 1;
+}
+
+static bool
+hash_ipportip4_data_tlist(struct sk_buff *skb,
+			const struct hash_ipportip4_elem *data)
+{
+	const struct hash_ipportip4_telem *tdata =
+		(const struct hash_ipportip4_telem *)data;
+
+	NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, tdata->ip);
+	NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP2, tdata->ip2);
+	NLA_PUT_NET16(skb, IPSET_ATTR_PORT, tdata->port);
+	NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto);
+	NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT,
+		      htonl(ip_set_timeout_get(tdata->timeout)));
+
+	return 0;
+
+nla_put_failure:
+	return 1;
+}
+
+#define PF		4
+#define HOST_MASK	32
+#include <linux/netfilter/ipset/ip_set_ahash.h>
+
+static int
+hash_ipportip4_kadt(struct ip_set *set, const struct sk_buff *skb,
+		    enum ipset_adt adt, u8 pf, u8 dim, u8 flags)
+{
+	const struct ip_set_hash *h = set->data;
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct hash_ipportip4_elem data = { };
+
+	if (!ip_set_get_ip4_port(skb, flags & IPSET_DIM_TWO_SRC,
+				 &data.port, &data.proto))
+		return -EINVAL;
+
+	ip4addrptr(skb, flags & IPSET_DIM_ONE_SRC, &data.ip);
+	ip4addrptr(skb, flags & IPSET_DIM_THREE_SRC, &data.ip2);
+
+	return adtfn(set, &data, h->timeout);
+}
+
+static int
+hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
+		    enum ipset_adt adt, u32 *lineno, u32 flags)
+{
+	const struct ip_set_hash *h = set->data;
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct hash_ipportip4_elem data = { };
+	u32 ip, ip_to, p, port, port_to;
+	u32 timeout = h->timeout;
+	int ret;
+
+	if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
+		     !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
+		return -IPSET_ERR_PROTOCOL;
+
+	if (tb[IPSET_ATTR_LINENO])
+		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
+	ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &data.ip);
+	if (ret)
+		return ret;
+
+	ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP2], &data.ip2);
+	if (ret)
+		return ret;
+
+	if (tb[IPSET_ATTR_PORT])
+		data.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
+	else
+		return -IPSET_ERR_PROTOCOL;
+
+	if (tb[IPSET_ATTR_PROTO]) {
+		data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
+
+		if (data.proto == 0)
+			return -IPSET_ERR_INVALID_PROTO;
+	} else
+		return -IPSET_ERR_MISSING_PROTO;
+
+	switch (data.proto) {
+	case IPPROTO_UDP:
+	case IPPROTO_TCP:
+	case IPPROTO_ICMP:
+		break;
+	default:
+		data.port = 0;
+		break;
+	}
+
+	if (tb[IPSET_ATTR_TIMEOUT]) {
+		if (!with_timeout(h->timeout))
+			return -IPSET_ERR_TIMEOUT;
+		timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
+	}
+
+	if (adt == IPSET_TEST ||
+	    !(data.proto == IPPROTO_TCP || data.proto == IPPROTO_UDP) ||
+	    !(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR] ||
+	      tb[IPSET_ATTR_PORT_TO])) {
+		ret = adtfn(set, &data, timeout);
+		return ip_set_eexist(ret, flags) ? 0 : ret;
+	}
+
+	ip = ntohl(data.ip);
+	if (tb[IPSET_ATTR_IP_TO]) {
+		ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to);
+		if (ret)
+			return ret;
+		if (ip > ip_to)
+			swap(ip, ip_to);
+	} else if (tb[IPSET_ATTR_CIDR]) {
+		u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
+
+		if (cidr > 32)
+			return -IPSET_ERR_INVALID_CIDR;
+		ip &= ip_set_hostmask(cidr);
+		ip_to = ip | ~ip_set_hostmask(cidr);
+	} else
+		ip_to = ip;
+
+	port = ntohs(data.port);
+	if (tb[IPSET_ATTR_PORT_TO]) {
+		port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
+		if (port > port_to)
+			swap(port, port_to);
+	} else
+		port_to = port;
+
+	for (; !before(ip_to, ip); ip++)
+		for (p = port; p <= port_to; p++) {
+			data.ip = htonl(ip);
+			data.port = htons(p);
+			ret = adtfn(set, &data, timeout);
+
+			if (ret && !ip_set_eexist(ret, flags))
+				return ret;
+			else
+				ret = 0;
+		}
+	return ret;
+}
+
+static bool
+hash_ipportip_same_set(const struct ip_set *a, const struct ip_set *b)
+{
+	const struct ip_set_hash *x = a->data;
+	const struct ip_set_hash *y = b->data;
+
+	/* Resizing changes htable_bits, so we ignore it */
+	return x->maxelem == y->maxelem &&
+	       x->timeout == y->timeout;
+}
+
+/* The type variant functions: IPv6 */
+
+struct hash_ipportip6_elem {
+	union nf_inet_addr ip;
+	union nf_inet_addr ip2;
+	__be16 port;
+	u8 proto;
+	u8 padding;
+};
+
+struct hash_ipportip6_telem {
+	union nf_inet_addr ip;
+	union nf_inet_addr ip2;
+	__be16 port;
+	u8 proto;
+	u8 padding;
+	unsigned long timeout;
+};
+
+static inline bool
+hash_ipportip6_data_equal(const struct hash_ipportip6_elem *ip1,
+			  const struct hash_ipportip6_elem *ip2)
+{
+	return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0 &&
+	       ipv6_addr_cmp(&ip1->ip2.in6, &ip2->ip2.in6) == 0 &&
+	       ip1->port == ip2->port &&
+	       ip1->proto == ip2->proto;
+}
+
+static inline bool
+hash_ipportip6_data_isnull(const struct hash_ipportip6_elem *elem)
+{
+	return elem->proto == 0;
+}
+
+static inline void
+hash_ipportip6_data_copy(struct hash_ipportip6_elem *dst,
+			 const struct hash_ipportip6_elem *src)
+{
+	memcpy(dst, src, sizeof(*dst));
+}
+
+static inline void
+hash_ipportip6_data_zero_out(struct hash_ipportip6_elem *elem)
+{
+	elem->proto = 0;
+}
+
+static bool
+hash_ipportip6_data_list(struct sk_buff *skb,
+			 const struct hash_ipportip6_elem *data)
+{
+	NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP, &data->ip);
+	NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP2, &data->ip2);
+	NLA_PUT_NET16(skb, IPSET_ATTR_PORT, data->port);
+	NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto);
+	return 0;
+
+nla_put_failure:
+	return 1;
+}
+
+static bool
+hash_ipportip6_data_tlist(struct sk_buff *skb,
+			  const struct hash_ipportip6_elem *data)
+{
+	const struct hash_ipportip6_telem *e =
+		(const struct hash_ipportip6_telem *)data;
+
+	NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP, &e->ip);
+	NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP2, &data->ip2);
+	NLA_PUT_NET16(skb, IPSET_ATTR_PORT, data->port);
+	NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto);
+	NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT,
+		      htonl(ip_set_timeout_get(e->timeout)));
+	return 0;
+
+nla_put_failure:
+	return 1;
+}
+
+#undef PF
+#undef HOST_MASK
+
+#define PF		6
+#define HOST_MASK	128
+#include <linux/netfilter/ipset/ip_set_ahash.h>
+
+static int
+hash_ipportip6_kadt(struct ip_set *set, const struct sk_buff *skb,
+		    enum ipset_adt adt, u8 pf, u8 dim, u8 flags)
+{
+	const struct ip_set_hash *h = set->data;
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct hash_ipportip6_elem data = { };
+
+	if (!ip_set_get_ip6_port(skb, flags & IPSET_DIM_TWO_SRC,
+				 &data.port, &data.proto))
+		return -EINVAL;
+
+	ip6addrptr(skb, flags & IPSET_DIM_ONE_SRC, &data.ip.in6);
+	ip6addrptr(skb, flags & IPSET_DIM_THREE_SRC, &data.ip2.in6);
+
+	return adtfn(set, &data, h->timeout);
+}
+
+static int
+hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[],
+		    enum ipset_adt adt, u32 *lineno, u32 flags)
+{
+	const struct ip_set_hash *h = set->data;
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct hash_ipportip6_elem data = { };
+	u32 port, port_to;
+	u32 timeout = h->timeout;
+	int ret;
+
+	if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
+		     !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
+		     tb[IPSET_ATTR_IP_TO] ||
+		     tb[IPSET_ATTR_CIDR]))
+		return -IPSET_ERR_PROTOCOL;
+
+	if (tb[IPSET_ATTR_LINENO])
+		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
+	ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &data.ip);
+	if (ret)
+		return ret;
+
+	ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP2], &data.ip2);
+	if (ret)
+		return ret;
+
+	if (tb[IPSET_ATTR_PORT])
+		data.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
+	else
+		return -IPSET_ERR_PROTOCOL;
+
+	if (tb[IPSET_ATTR_PROTO]) {
+		data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
+
+		if (data.proto == 0)
+			return -IPSET_ERR_INVALID_PROTO;
+	} else
+		return -IPSET_ERR_MISSING_PROTO;
+
+	switch (data.proto) {
+	case IPPROTO_UDP:
+	case IPPROTO_TCP:
+	case IPPROTO_ICMPV6:
+		break;
+	default:
+		data.port = 0;
+		break;
+	}
+
+	if (tb[IPSET_ATTR_TIMEOUT]) {
+		if (!with_timeout(h->timeout))
+			return -IPSET_ERR_TIMEOUT;
+		timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
+	}
+
+	if (adt == IPSET_TEST ||
+	    !(data.proto == IPPROTO_TCP || data.proto == IPPROTO_UDP) ||
+	    !tb[IPSET_ATTR_PORT_TO]) {
+		ret = adtfn(set, &data, timeout);
+		return ip_set_eexist(ret, flags) ? 0 : ret;
+	}
+
+	port = ntohs(data.port);
+	port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
+	if (port > port_to)
+		swap(port, port_to);
+
+	for (; port <= port_to; port++) {
+		data.port = htons(port);
+		ret = adtfn(set, &data, timeout);
+
+		if (ret && !ip_set_eexist(ret, flags))
+			return ret;
+		else
+			ret = 0;
+	}
+	return ret;
+}
+
+/* Create hash:ip type of sets */
+
+static int
+hash_ipportip_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
+{
+	struct ip_set_hash *h;
+	u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM;
+	u8 hbits;
+
+	if (!(set->family == AF_INET || set->family == AF_INET6))
+		return -IPSET_ERR_INVALID_FAMILY;
+
+	if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
+		return -IPSET_ERR_PROTOCOL;
+
+	if (tb[IPSET_ATTR_HASHSIZE]) {
+		hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]);
+		if (hashsize < IPSET_MIMINAL_HASHSIZE)
+			hashsize = IPSET_MIMINAL_HASHSIZE;
+	}
+
+	if (tb[IPSET_ATTR_MAXELEM])
+		maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]);
+
+	h = kzalloc(sizeof(*h), GFP_KERNEL);
+	if (!h)
+		return -ENOMEM;
+
+	h->maxelem = maxelem;
+	get_random_bytes(&h->initval, sizeof(h->initval));
+	h->timeout = IPSET_NO_TIMEOUT;
+
+	hbits = htable_bits(hashsize);
+	h->table = ip_set_alloc(
+			sizeof(struct htable)
+			+ jhash_size(hbits) * sizeof(struct hbucket));
+	if (!h->table) {
+		kfree(h);
+		return -ENOMEM;
+	}
+	h->table->htable_bits = hbits;
+
+	set->data = h;
+
+	if (tb[IPSET_ATTR_TIMEOUT]) {
+		h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
+
+		set->variant = set->family == AF_INET
+			? &hash_ipportip4_tvariant : &hash_ipportip6_tvariant;
+
+		if (set->family == AF_INET)
+			hash_ipportip4_gc_init(set);
+		else
+			hash_ipportip6_gc_init(set);
+	} else {
+		set->variant = set->family == AF_INET
+			? &hash_ipportip4_variant : &hash_ipportip6_variant;
+	}
+
+	pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n",
+		 set->name, jhash_size(h->table->htable_bits),
+		 h->table->htable_bits, h->maxelem, set->data, h->table);
+
+	return 0;
+}
+
+static struct ip_set_type hash_ipportip_type __read_mostly = {
+	.name		= "hash:ip,port,ip",
+	.protocol	= IPSET_PROTOCOL,
+	.features	= IPSET_TYPE_IP | IPSET_TYPE_PORT | IPSET_TYPE_IP2,
+	.dimension	= IPSET_DIM_THREE,
+	.family		= AF_UNSPEC,
+	.revision	= 0,
+	.create		= hash_ipportip_create,
+	.create_policy	= {
+		[IPSET_ATTR_HASHSIZE]	= { .type = NLA_U32 },
+		[IPSET_ATTR_MAXELEM]	= { .type = NLA_U32 },
+		[IPSET_ATTR_PROBES]	= { .type = NLA_U8 },
+		[IPSET_ATTR_RESIZE]	= { .type = NLA_U8  },
+		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
+	},
+	.adt_policy	= {
+		[IPSET_ATTR_IP]		= { .type = NLA_NESTED },
+		[IPSET_ATTR_IP_TO]	= { .type = NLA_NESTED },
+		[IPSET_ATTR_IP2]	= { .type = NLA_NESTED },
+		[IPSET_ATTR_PORT]	= { .type = NLA_U16 },
+		[IPSET_ATTR_PORT_TO]	= { .type = NLA_U16 },
+		[IPSET_ATTR_CIDR]	= { .type = NLA_U8 },
+		[IPSET_ATTR_PROTO]	= { .type = NLA_U8 },
+		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
+		[IPSET_ATTR_LINENO]	= { .type = NLA_U32 },
+	},
+	.me		= THIS_MODULE,
+};
+
+static int __init
+hash_ipportip_init(void)
+{
+	return ip_set_type_register(&hash_ipportip_type);
+}
+
+static void __exit
+hash_ipportip_fini(void)
+{
+	ip_set_type_unregister(&hash_ipportip_type);
+}
+
+module_init(hash_ipportip_init);
+module_exit(hash_ipportip_fini);
diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c
new file mode 100644
index 0000000..6033e8b
--- /dev/null
+++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c
@@ -0,0 +1,628 @@
+/* Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/* Kernel module implementing an IP set type: the hash:ip,port,net type */
+
+#include <linux/jhash.h>
+#include <linux/module.h>
+#include <linux/ip.h>
+#include <linux/skbuff.h>
+#include <linux/errno.h>
+#include <linux/random.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/netlink.h>
+#include <net/tcp.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter/ipset/pfxlen.h>
+#include <linux/netfilter/ipset/ip_set.h>
+#include <linux/netfilter/ipset/ip_set_timeout.h>
+#include <linux/netfilter/ipset/ip_set_getport.h>
+#include <linux/netfilter/ipset/ip_set_hash.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
+MODULE_DESCRIPTION("hash:ip,port,net type of IP sets");
+MODULE_ALIAS("ip_set_hash:ip,port,net");
+
+/* Type specific function prefix */
+#define TYPE		hash_ipportnet
+
+static bool
+hash_ipportnet_same_set(const struct ip_set *a, const struct ip_set *b);
+
+#define hash_ipportnet4_same_set	hash_ipportnet_same_set
+#define hash_ipportnet6_same_set	hash_ipportnet_same_set
+
+/* The type variant functions: IPv4 */
+
+/* Member elements without timeout */
+struct hash_ipportnet4_elem {
+	__be32 ip;
+	__be32 ip2;
+	__be16 port;
+	u8 cidr;
+	u8 proto;
+};
+
+/* Member elements with timeout support */
+struct hash_ipportnet4_telem {
+	__be32 ip;
+	__be32 ip2;
+	__be16 port;
+	u8 cidr;
+	u8 proto;
+	unsigned long timeout;
+};
+
+static inline bool
+hash_ipportnet4_data_equal(const struct hash_ipportnet4_elem *ip1,
+			   const struct hash_ipportnet4_elem *ip2)
+{
+	return ip1->ip == ip2->ip &&
+	       ip1->ip2 == ip2->ip2 &&
+	       ip1->cidr == ip2->cidr &&
+	       ip1->port == ip2->port &&
+	       ip1->proto == ip2->proto;
+}
+
+static inline bool
+hash_ipportnet4_data_isnull(const struct hash_ipportnet4_elem *elem)
+{
+	return elem->proto == 0;
+}
+
+static inline void
+hash_ipportnet4_data_copy(struct hash_ipportnet4_elem *dst,
+			  const struct hash_ipportnet4_elem *src)
+{
+	memcpy(dst, src, sizeof(*dst));
+}
+
+static inline void
+hash_ipportnet4_data_netmask(struct hash_ipportnet4_elem *elem, u8 cidr)
+{
+	elem->ip2 &= ip_set_netmask(cidr);
+	elem->cidr = cidr;
+}
+
+static inline void
+hash_ipportnet4_data_zero_out(struct hash_ipportnet4_elem *elem)
+{
+	elem->proto = 0;
+}
+
+static bool
+hash_ipportnet4_data_list(struct sk_buff *skb,
+			  const struct hash_ipportnet4_elem *data)
+{
+	NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, data->ip);
+	NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP2, data->ip2);
+	NLA_PUT_NET16(skb, IPSET_ATTR_PORT, data->port);
+	NLA_PUT_U8(skb, IPSET_ATTR_CIDR2, data->cidr);
+	NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto);
+	return 0;
+
+nla_put_failure:
+	return 1;
+}
+
+static bool
+hash_ipportnet4_data_tlist(struct sk_buff *skb,
+			   const struct hash_ipportnet4_elem *data)
+{
+	const struct hash_ipportnet4_telem *tdata =
+		(const struct hash_ipportnet4_telem *)data;
+
+	NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, tdata->ip);
+	NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP2, tdata->ip2);
+	NLA_PUT_NET16(skb, IPSET_ATTR_PORT, tdata->port);
+	NLA_PUT_U8(skb, IPSET_ATTR_CIDR2, data->cidr);
+	NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto);
+	NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT,
+		      htonl(ip_set_timeout_get(tdata->timeout)));
+
+	return 0;
+
+nla_put_failure:
+	return 1;
+}
+
+#define IP_SET_HASH_WITH_PROTO
+#define IP_SET_HASH_WITH_NETS
+
+#define PF		4
+#define HOST_MASK	32
+#include <linux/netfilter/ipset/ip_set_ahash.h>
+
+static int
+hash_ipportnet4_kadt(struct ip_set *set, const struct sk_buff *skb,
+		     enum ipset_adt adt, u8 pf, u8 dim, u8 flags)
+{
+	const struct ip_set_hash *h = set->data;
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct hash_ipportnet4_elem data =
+		{ .cidr = h->nets[0].cidr || HOST_MASK };
+
+	if (data.cidr == 0)
+		return -EINVAL;
+	if (adt == IPSET_TEST)
+		data.cidr = HOST_MASK;
+
+	if (!ip_set_get_ip4_port(skb, flags & IPSET_DIM_TWO_SRC,
+				 &data.port, &data.proto))
+		return -EINVAL;
+
+	ip4addrptr(skb, flags & IPSET_DIM_ONE_SRC, &data.ip);
+	ip4addrptr(skb, flags & IPSET_DIM_THREE_SRC, &data.ip2);
+	data.ip2 &= ip_set_netmask(data.cidr);
+
+	return adtfn(set, &data, h->timeout);
+}
+
+static int
+hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
+		     enum ipset_adt adt, u32 *lineno, u32 flags)
+{
+	const struct ip_set_hash *h = set->data;
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct hash_ipportnet4_elem data = { .cidr = HOST_MASK };
+	u32 ip, ip_to, p, port, port_to;
+	u32 timeout = h->timeout;
+	int ret;
+
+	if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
+		     !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
+		return -IPSET_ERR_PROTOCOL;
+
+	if (tb[IPSET_ATTR_LINENO])
+		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
+	ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &data.ip);
+	if (ret)
+		return ret;
+
+	ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP2], &data.ip2);
+	if (ret)
+		return ret;
+
+	if (tb[IPSET_ATTR_CIDR2])
+		data.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR2]);
+
+	if (!data.cidr)
+		return -IPSET_ERR_INVALID_CIDR;
+
+	data.ip2 &= ip_set_netmask(data.cidr);
+
+	if (tb[IPSET_ATTR_PORT])
+		data.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
+	else
+		return -IPSET_ERR_PROTOCOL;
+
+	if (tb[IPSET_ATTR_PROTO]) {
+		data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
+
+		if (data.proto == 0)
+			return -IPSET_ERR_INVALID_PROTO;
+	} else
+		return -IPSET_ERR_MISSING_PROTO;
+
+	switch (data.proto) {
+	case IPPROTO_UDP:
+	case IPPROTO_TCP:
+	case IPPROTO_ICMP:
+		break;
+	default:
+		data.port = 0;
+		break;
+	}
+
+	if (tb[IPSET_ATTR_TIMEOUT]) {
+		if (!with_timeout(h->timeout))
+			return -IPSET_ERR_TIMEOUT;
+		timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
+	}
+
+	if (adt == IPSET_TEST ||
+	    !(data.proto == IPPROTO_TCP || data.proto == IPPROTO_UDP) ||
+	    !(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR] ||
+	      tb[IPSET_ATTR_PORT_TO])) {
+		ret = adtfn(set, &data, timeout);
+		return ip_set_eexist(ret, flags) ? 0 : ret;
+	}
+
+	ip = ntohl(data.ip);
+	if (tb[IPSET_ATTR_IP_TO]) {
+		ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to);
+		if (ret)
+			return ret;
+		if (ip > ip_to)
+			swap(ip, ip_to);
+	} else if (tb[IPSET_ATTR_CIDR]) {
+		u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
+
+		if (cidr > 32)
+			return -IPSET_ERR_INVALID_CIDR;
+		ip &= ip_set_hostmask(cidr);
+		ip_to = ip | ~ip_set_hostmask(cidr);
+	} else
+		ip_to = ip;
+
+	port = ntohs(data.port);
+	if (tb[IPSET_ATTR_PORT_TO]) {
+		port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
+		if (port > port_to)
+			swap(port, port_to);
+	} else
+		port_to = port;
+
+	for (; !before(ip_to, ip); ip++)
+		for (p = port; p <= port_to; p++) {
+			data.ip = htonl(ip);
+			data.port = htons(p);
+			ret = adtfn(set, &data, timeout);
+
+			if (ret && !ip_set_eexist(ret, flags))
+				return ret;
+			else
+				ret = 0;
+		}
+	return ret;
+}
+
+static bool
+hash_ipportnet_same_set(const struct ip_set *a, const struct ip_set *b)
+{
+	const struct ip_set_hash *x = a->data;
+	const struct ip_set_hash *y = b->data;
+
+	/* Resizing changes htable_bits, so we ignore it */
+	return x->maxelem == y->maxelem &&
+	       x->timeout == y->timeout;
+}
+
+/* The type variant functions: IPv6 */
+
+struct hash_ipportnet6_elem {
+	union nf_inet_addr ip;
+	union nf_inet_addr ip2;
+	__be16 port;
+	u8 cidr;
+	u8 proto;
+};
+
+struct hash_ipportnet6_telem {
+	union nf_inet_addr ip;
+	union nf_inet_addr ip2;
+	__be16 port;
+	u8 cidr;
+	u8 proto;
+	unsigned long timeout;
+};
+
+static inline bool
+hash_ipportnet6_data_equal(const struct hash_ipportnet6_elem *ip1,
+			   const struct hash_ipportnet6_elem *ip2)
+{
+	return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0 &&
+	       ipv6_addr_cmp(&ip1->ip2.in6, &ip2->ip2.in6) == 0 &&
+	       ip1->cidr == ip2->cidr &&
+	       ip1->port == ip2->port &&
+	       ip1->proto == ip2->proto;
+}
+
+static inline bool
+hash_ipportnet6_data_isnull(const struct hash_ipportnet6_elem *elem)
+{
+	return elem->proto == 0;
+}
+
+static inline void
+hash_ipportnet6_data_copy(struct hash_ipportnet6_elem *dst,
+			  const struct hash_ipportnet6_elem *src)
+{
+	memcpy(dst, src, sizeof(*dst));
+}
+
+static inline void
+hash_ipportnet6_data_zero_out(struct hash_ipportnet6_elem *elem)
+{
+	elem->proto = 0;
+}
+
+static inline void
+ip6_netmask(union nf_inet_addr *ip, u8 prefix)
+{
+	ip->ip6[0] &= ip_set_netmask6(prefix)[0];
+	ip->ip6[1] &= ip_set_netmask6(prefix)[1];
+	ip->ip6[2] &= ip_set_netmask6(prefix)[2];
+	ip->ip6[3] &= ip_set_netmask6(prefix)[3];
+}
+
+static inline void
+hash_ipportnet6_data_netmask(struct hash_ipportnet6_elem *elem, u8 cidr)
+{
+	ip6_netmask(&elem->ip2, cidr);
+	elem->cidr = cidr;
+}
+
+static bool
+hash_ipportnet6_data_list(struct sk_buff *skb,
+			  const struct hash_ipportnet6_elem *data)
+{
+	NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP, &data->ip);
+	NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP2, &data->ip2);
+	NLA_PUT_NET16(skb, IPSET_ATTR_PORT, data->port);
+	NLA_PUT_U8(skb, IPSET_ATTR_CIDR2, data->cidr);
+	NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto);
+	return 0;
+
+nla_put_failure:
+	return 1;
+}
+
+static bool
+hash_ipportnet6_data_tlist(struct sk_buff *skb,
+			   const struct hash_ipportnet6_elem *data)
+{
+	const struct hash_ipportnet6_telem *e =
+		(const struct hash_ipportnet6_telem *)data;
+
+	NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP, &e->ip);
+	NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP2, &data->ip2);
+	NLA_PUT_NET16(skb, IPSET_ATTR_PORT, data->port);
+	NLA_PUT_U8(skb, IPSET_ATTR_CIDR2, data->cidr);
+	NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto);
+	NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT,
+		      htonl(ip_set_timeout_get(e->timeout)));
+	return 0;
+
+nla_put_failure:
+	return 1;
+}
+
+#undef PF
+#undef HOST_MASK
+
+#define PF		6
+#define HOST_MASK	128
+#include <linux/netfilter/ipset/ip_set_ahash.h>
+
+static int
+hash_ipportnet6_kadt(struct ip_set *set, const struct sk_buff *skb,
+		     enum ipset_adt adt, u8 pf, u8 dim, u8 flags)
+{
+	const struct ip_set_hash *h = set->data;
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct hash_ipportnet6_elem data =
+		{ .cidr = h->nets[0].cidr || HOST_MASK };
+
+	if (data.cidr == 0)
+		return -EINVAL;
+	if (adt == IPSET_TEST)
+		data.cidr = HOST_MASK;
+
+	if (!ip_set_get_ip6_port(skb, flags & IPSET_DIM_TWO_SRC,
+				 &data.port, &data.proto))
+		return -EINVAL;
+
+	ip6addrptr(skb, flags & IPSET_DIM_ONE_SRC, &data.ip.in6);
+	ip6addrptr(skb, flags & IPSET_DIM_THREE_SRC, &data.ip2.in6);
+	ip6_netmask(&data.ip2, data.cidr);
+
+	return adtfn(set, &data, h->timeout);
+}
+
+static int
+hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
+		     enum ipset_adt adt, u32 *lineno, u32 flags)
+{
+	const struct ip_set_hash *h = set->data;
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct hash_ipportnet6_elem data = { .cidr = HOST_MASK };
+	u32 port, port_to;
+	u32 timeout = h->timeout;
+	int ret;
+
+	if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
+		     !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
+		     tb[IPSET_ATTR_IP_TO] ||
+		     tb[IPSET_ATTR_CIDR]))
+		return -IPSET_ERR_PROTOCOL;
+
+	if (tb[IPSET_ATTR_LINENO])
+		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
+	ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &data.ip);
+	if (ret)
+		return ret;
+
+	ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP2], &data.ip2);
+	if (ret)
+		return ret;
+
+	if (tb[IPSET_ATTR_CIDR2])
+		data.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR2]);
+
+	if (!data.cidr)
+		return -IPSET_ERR_INVALID_CIDR;
+
+	ip6_netmask(&data.ip2, data.cidr);
+
+	if (tb[IPSET_ATTR_PORT])
+		data.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
+	else
+		return -IPSET_ERR_PROTOCOL;
+
+	if (tb[IPSET_ATTR_PROTO]) {
+		data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
+
+		if (data.proto == 0)
+			return -IPSET_ERR_INVALID_PROTO;
+	} else
+		return -IPSET_ERR_MISSING_PROTO;
+
+	switch (data.proto) {
+	case IPPROTO_UDP:
+	case IPPROTO_TCP:
+	case IPPROTO_ICMPV6:
+		break;
+	default:
+		data.port = 0;
+		break;
+	}
+
+	if (tb[IPSET_ATTR_TIMEOUT]) {
+		if (!with_timeout(h->timeout))
+			return -IPSET_ERR_TIMEOUT;
+		timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
+	}
+
+	if (adt == IPSET_TEST ||
+	    !(data.proto == IPPROTO_TCP || data.proto == IPPROTO_UDP) ||
+	    !tb[IPSET_ATTR_PORT_TO]) {
+		ret = adtfn(set, &data, timeout);
+		return ip_set_eexist(ret, flags) ? 0 : ret;
+	}
+
+	port = ntohs(data.port);
+	port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
+	if (port > port_to)
+		swap(port, port_to);
+
+	for (; port <= port_to; port++) {
+		data.port = htons(port);
+		ret = adtfn(set, &data, timeout);
+
+		if (ret && !ip_set_eexist(ret, flags))
+			return ret;
+		else
+			ret = 0;
+	}
+	return ret;
+}
+
+/* Create hash:ip type of sets */
+
+static int
+hash_ipportnet_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
+{
+	struct ip_set_hash *h;
+	u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM;
+	u8 hbits;
+
+	if (!(set->family == AF_INET || set->family == AF_INET6))
+		return -IPSET_ERR_INVALID_FAMILY;
+
+	if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
+		return -IPSET_ERR_PROTOCOL;
+
+	if (tb[IPSET_ATTR_HASHSIZE]) {
+		hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]);
+		if (hashsize < IPSET_MIMINAL_HASHSIZE)
+			hashsize = IPSET_MIMINAL_HASHSIZE;
+	}
+
+	if (tb[IPSET_ATTR_MAXELEM])
+		maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]);
+
+	h = kzalloc(sizeof(*h)
+		    + sizeof(struct ip_set_hash_nets)
+		      * (set->family == AF_INET ? 32 : 128), GFP_KERNEL);
+	if (!h)
+		return -ENOMEM;
+
+	h->maxelem = maxelem;
+	get_random_bytes(&h->initval, sizeof(h->initval));
+	h->timeout = IPSET_NO_TIMEOUT;
+
+	hbits = htable_bits(hashsize);
+	h->table = ip_set_alloc(
+			sizeof(struct htable)
+			+ jhash_size(hbits) * sizeof(struct hbucket));
+	if (!h->table) {
+		kfree(h);
+		return -ENOMEM;
+	}
+	h->table->htable_bits = hbits;
+
+	set->data = h;
+
+	if (tb[IPSET_ATTR_TIMEOUT]) {
+		h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
+
+		set->variant = set->family == AF_INET
+			? &hash_ipportnet4_tvariant
+			: &hash_ipportnet6_tvariant;
+
+		if (set->family == AF_INET)
+			hash_ipportnet4_gc_init(set);
+		else
+			hash_ipportnet6_gc_init(set);
+	} else {
+		set->variant = set->family == AF_INET
+			? &hash_ipportnet4_variant : &hash_ipportnet6_variant;
+	}
+
+	pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n",
+		 set->name, jhash_size(h->table->htable_bits),
+		 h->table->htable_bits, h->maxelem, set->data, h->table);
+
+	return 0;
+}
+
+static struct ip_set_type hash_ipportnet_type __read_mostly = {
+	.name		= "hash:ip,port,net",
+	.protocol	= IPSET_PROTOCOL,
+	.features	= IPSET_TYPE_IP | IPSET_TYPE_PORT | IPSET_TYPE_IP2,
+	.dimension	= IPSET_DIM_THREE,
+	.family		= AF_UNSPEC,
+	.revision	= 0,
+	.create		= hash_ipportnet_create,
+	.create_policy	= {
+		[IPSET_ATTR_HASHSIZE]	= { .type = NLA_U32 },
+		[IPSET_ATTR_MAXELEM]	= { .type = NLA_U32 },
+		[IPSET_ATTR_PROBES]	= { .type = NLA_U8 },
+		[IPSET_ATTR_RESIZE]	= { .type = NLA_U8  },
+		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
+	},
+	.adt_policy	= {
+		[IPSET_ATTR_IP]		= { .type = NLA_NESTED },
+		[IPSET_ATTR_IP_TO]	= { .type = NLA_NESTED },
+		[IPSET_ATTR_IP2]	= { .type = NLA_NESTED },
+		[IPSET_ATTR_PORT]	= { .type = NLA_U16 },
+		[IPSET_ATTR_PORT_TO]	= { .type = NLA_U16 },
+		[IPSET_ATTR_CIDR]	= { .type = NLA_U8 },
+		[IPSET_ATTR_CIDR2]	= { .type = NLA_U8 },
+		[IPSET_ATTR_PROTO]	= { .type = NLA_U8 },
+		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
+		[IPSET_ATTR_LINENO]	= { .type = NLA_U32 },
+	},
+	.me		= THIS_MODULE,
+};
+
+static int __init
+hash_ipportnet_init(void)
+{
+	return ip_set_type_register(&hash_ipportnet_type);
+}
+
+static void __exit
+hash_ipportnet_fini(void)
+{
+	ip_set_type_unregister(&hash_ipportnet_type);
+}
+
+module_init(hash_ipportnet_init);
+module_exit(hash_ipportnet_fini);
diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c
new file mode 100644
index 0000000..c4db202
--- /dev/null
+++ b/net/netfilter/ipset/ip_set_hash_net.c
@@ -0,0 +1,458 @@
+/* Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/* Kernel module implementing an IP set type: the hash:net type */
+
+#include <linux/jhash.h>
+#include <linux/module.h>
+#include <linux/ip.h>
+#include <linux/skbuff.h>
+#include <linux/errno.h>
+#include <linux/random.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/netlink.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter/ipset/pfxlen.h>
+#include <linux/netfilter/ipset/ip_set.h>
+#include <linux/netfilter/ipset/ip_set_timeout.h>
+#include <linux/netfilter/ipset/ip_set_hash.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
+MODULE_DESCRIPTION("hash:net type of IP sets");
+MODULE_ALIAS("ip_set_hash:net");
+
+/* Type specific function prefix */
+#define TYPE		hash_net
+
+static bool
+hash_net_same_set(const struct ip_set *a, const struct ip_set *b);
+
+#define hash_net4_same_set	hash_net_same_set
+#define hash_net6_same_set	hash_net_same_set
+
+/* The type variant functions: IPv4 */
+
+/* Member elements without timeout */
+struct hash_net4_elem {
+	__be32 ip;
+	u16 padding0;
+	u8 padding1;
+	u8 cidr;
+};
+
+/* Member elements with timeout support */
+struct hash_net4_telem {
+	__be32 ip;
+	u16 padding0;
+	u8 padding1;
+	u8 cidr;
+	unsigned long timeout;
+};
+
+static inline bool
+hash_net4_data_equal(const struct hash_net4_elem *ip1,
+		    const struct hash_net4_elem *ip2)
+{
+	return ip1->ip == ip2->ip && ip1->cidr == ip2->cidr;
+}
+
+static inline bool
+hash_net4_data_isnull(const struct hash_net4_elem *elem)
+{
+	return elem->cidr == 0;
+}
+
+static inline void
+hash_net4_data_copy(struct hash_net4_elem *dst,
+		    const struct hash_net4_elem *src)
+{
+	dst->ip = src->ip;
+	dst->cidr = src->cidr;
+}
+
+static inline void
+hash_net4_data_netmask(struct hash_net4_elem *elem, u8 cidr)
+{
+	elem->ip &= ip_set_netmask(cidr);
+	elem->cidr = cidr;
+}
+
+/* Zero CIDR values cannot be stored */
+static inline void
+hash_net4_data_zero_out(struct hash_net4_elem *elem)
+{
+	elem->cidr = 0;
+}
+
+static bool
+hash_net4_data_list(struct sk_buff *skb, const struct hash_net4_elem *data)
+{
+	NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, data->ip);
+	NLA_PUT_U8(skb, IPSET_ATTR_CIDR, data->cidr);
+	return 0;
+
+nla_put_failure:
+	return 1;
+}
+
+static bool
+hash_net4_data_tlist(struct sk_buff *skb, const struct hash_net4_elem *data)
+{
+	const struct hash_net4_telem *tdata =
+		(const struct hash_net4_telem *)data;
+
+	NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, tdata->ip);
+	NLA_PUT_U8(skb, IPSET_ATTR_CIDR, tdata->cidr);
+	NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT,
+		      htonl(ip_set_timeout_get(tdata->timeout)));
+
+	return 0;
+
+nla_put_failure:
+	return 1;
+}
+
+#define IP_SET_HASH_WITH_NETS
+
+#define PF		4
+#define HOST_MASK	32
+#include <linux/netfilter/ipset/ip_set_ahash.h>
+
+static int
+hash_net4_kadt(struct ip_set *set, const struct sk_buff *skb,
+	       enum ipset_adt adt, u8 pf, u8 dim, u8 flags)
+{
+	const struct ip_set_hash *h = set->data;
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct hash_net4_elem data = { .cidr = h->nets[0].cidr || HOST_MASK };
+
+	if (data.cidr == 0)
+		return -EINVAL;
+	if (adt == IPSET_TEST)
+		data.cidr = HOST_MASK;
+
+	ip4addrptr(skb, flags & IPSET_DIM_ONE_SRC, &data.ip);
+	data.ip &= ip_set_netmask(data.cidr);
+
+	return adtfn(set, &data, h->timeout);
+}
+
+static int
+hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
+	       enum ipset_adt adt, u32 *lineno, u32 flags)
+{
+	const struct ip_set_hash *h = set->data;
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct hash_net4_elem data = { .cidr = HOST_MASK };
+	u32 timeout = h->timeout;
+	int ret;
+
+	if (unlikely(!tb[IPSET_ATTR_IP] ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
+		return -IPSET_ERR_PROTOCOL;
+
+	if (tb[IPSET_ATTR_LINENO])
+		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
+	ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &data.ip);
+	if (ret)
+		return ret;
+
+	if (tb[IPSET_ATTR_CIDR])
+		data.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
+
+	if (!data.cidr)
+		return -IPSET_ERR_INVALID_CIDR;
+
+	data.ip &= ip_set_netmask(data.cidr);
+
+	if (tb[IPSET_ATTR_TIMEOUT]) {
+		if (!with_timeout(h->timeout))
+			return -IPSET_ERR_TIMEOUT;
+		timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
+	}
+
+	ret = adtfn(set, &data, timeout);
+
+	return ip_set_eexist(ret, flags) ? 0 : ret;
+}
+
+static bool
+hash_net_same_set(const struct ip_set *a, const struct ip_set *b)
+{
+	const struct ip_set_hash *x = a->data;
+	const struct ip_set_hash *y = b->data;
+
+	/* Resizing changes htable_bits, so we ignore it */
+	return x->maxelem == y->maxelem &&
+	       x->timeout == y->timeout;
+}
+
+/* The type variant functions: IPv6 */
+
+struct hash_net6_elem {
+	union nf_inet_addr ip;
+	u16 padding0;
+	u8 padding1;
+	u8 cidr;
+};
+
+struct hash_net6_telem {
+	union nf_inet_addr ip;
+	u16 padding0;
+	u8 padding1;
+	u8 cidr;
+	unsigned long timeout;
+};
+
+static inline bool
+hash_net6_data_equal(const struct hash_net6_elem *ip1,
+		     const struct hash_net6_elem *ip2)
+{
+	return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0 &&
+	       ip1->cidr == ip2->cidr;
+}
+
+static inline bool
+hash_net6_data_isnull(const struct hash_net6_elem *elem)
+{
+	return elem->cidr == 0;
+}
+
+static inline void
+hash_net6_data_copy(struct hash_net6_elem *dst,
+		    const struct hash_net6_elem *src)
+{
+	ipv6_addr_copy(&dst->ip.in6, &src->ip.in6);
+	dst->cidr = src->cidr;
+}
+
+static inline void
+hash_net6_data_zero_out(struct hash_net6_elem *elem)
+{
+	elem->cidr = 0;
+}
+
+static inline void
+ip6_netmask(union nf_inet_addr *ip, u8 prefix)
+{
+	ip->ip6[0] &= ip_set_netmask6(prefix)[0];
+	ip->ip6[1] &= ip_set_netmask6(prefix)[1];
+	ip->ip6[2] &= ip_set_netmask6(prefix)[2];
+	ip->ip6[3] &= ip_set_netmask6(prefix)[3];
+}
+
+static inline void
+hash_net6_data_netmask(struct hash_net6_elem *elem, u8 cidr)
+{
+	ip6_netmask(&elem->ip, cidr);
+	elem->cidr = cidr;
+}
+
+static bool
+hash_net6_data_list(struct sk_buff *skb, const struct hash_net6_elem *data)
+{
+	NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP, &data->ip);
+	NLA_PUT_U8(skb, IPSET_ATTR_CIDR, data->cidr);
+	return 0;
+
+nla_put_failure:
+	return 1;
+}
+
+static bool
+hash_net6_data_tlist(struct sk_buff *skb, const struct hash_net6_elem *data)
+{
+	const struct hash_net6_telem *e =
+		(const struct hash_net6_telem *)data;
+
+	NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP, &e->ip);
+	NLA_PUT_U8(skb, IPSET_ATTR_CIDR, e->cidr);
+	NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT,
+		      htonl(ip_set_timeout_get(e->timeout)));
+	return 0;
+
+nla_put_failure:
+	return 1;
+}
+
+#undef PF
+#undef HOST_MASK
+
+#define PF		6
+#define HOST_MASK	128
+#include <linux/netfilter/ipset/ip_set_ahash.h>
+
+static int
+hash_net6_kadt(struct ip_set *set, const struct sk_buff *skb,
+	       enum ipset_adt adt, u8 pf, u8 dim, u8 flags)
+{
+	const struct ip_set_hash *h = set->data;
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct hash_net6_elem data = { .cidr = h->nets[0].cidr || HOST_MASK };
+
+	if (data.cidr == 0)
+		return -EINVAL;
+	if (adt == IPSET_TEST)
+		data.cidr = HOST_MASK;
+
+	ip6addrptr(skb, flags & IPSET_DIM_ONE_SRC, &data.ip.in6);
+	ip6_netmask(&data.ip, data.cidr);
+
+	return adtfn(set, &data, h->timeout);
+}
+
+static int
+hash_net6_uadt(struct ip_set *set, struct nlattr *tb[],
+	       enum ipset_adt adt, u32 *lineno, u32 flags)
+{
+	const struct ip_set_hash *h = set->data;
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct hash_net6_elem data = { .cidr = HOST_MASK };
+	u32 timeout = h->timeout;
+	int ret;
+
+	if (unlikely(!tb[IPSET_ATTR_IP] ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
+		return -IPSET_ERR_PROTOCOL;
+
+	if (tb[IPSET_ATTR_LINENO])
+		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
+	ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &data.ip);
+	if (ret)
+		return ret;
+
+	if (tb[IPSET_ATTR_CIDR])
+		data.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
+
+	if (!data.cidr)
+		return -IPSET_ERR_INVALID_CIDR;
+
+	ip6_netmask(&data.ip, data.cidr);
+
+	if (tb[IPSET_ATTR_TIMEOUT]) {
+		if (!with_timeout(h->timeout))
+			return -IPSET_ERR_TIMEOUT;
+		timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
+	}
+
+	ret = adtfn(set, &data, timeout);
+
+	return ip_set_eexist(ret, flags) ? 0 : ret;
+}
+
+/* Create hash:ip type of sets */
+
+static int
+hash_net_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
+{
+	u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM;
+	struct ip_set_hash *h;
+	u8 hbits;
+
+	if (!(set->family == AF_INET || set->family == AF_INET6))
+		return -IPSET_ERR_INVALID_FAMILY;
+
+	if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
+		return -IPSET_ERR_PROTOCOL;
+
+	if (tb[IPSET_ATTR_HASHSIZE]) {
+		hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]);
+		if (hashsize < IPSET_MIMINAL_HASHSIZE)
+			hashsize = IPSET_MIMINAL_HASHSIZE;
+	}
+
+	if (tb[IPSET_ATTR_MAXELEM])
+		maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]);
+
+	h = kzalloc(sizeof(*h)
+		    + sizeof(struct ip_set_hash_nets)
+		      * (set->family == AF_INET ? 32 : 128), GFP_KERNEL);
+	if (!h)
+		return -ENOMEM;
+
+	h->maxelem = maxelem;
+	get_random_bytes(&h->initval, sizeof(h->initval));
+	h->timeout = IPSET_NO_TIMEOUT;
+
+	hbits = htable_bits(hashsize);
+	h->table = ip_set_alloc(
+			sizeof(struct htable)
+			+ jhash_size(hbits) * sizeof(struct hbucket));
+	if (!h->table) {
+		kfree(h);
+		return -ENOMEM;
+	}
+	h->table->htable_bits = hbits;
+
+	set->data = h;
+
+	if (tb[IPSET_ATTR_TIMEOUT]) {
+		h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
+
+		set->variant = set->family == AF_INET
+			? &hash_net4_tvariant : &hash_net6_tvariant;
+
+		if (set->family == AF_INET)
+			hash_net4_gc_init(set);
+		else
+			hash_net6_gc_init(set);
+	} else {
+		set->variant = set->family == AF_INET
+			? &hash_net4_variant : &hash_net6_variant;
+	}
+
+	pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n",
+		 set->name, jhash_size(h->table->htable_bits),
+		 h->table->htable_bits, h->maxelem, set->data, h->table);
+
+	return 0;
+}
+
+static struct ip_set_type hash_net_type __read_mostly = {
+	.name		= "hash:net",
+	.protocol	= IPSET_PROTOCOL,
+	.features	= IPSET_TYPE_IP,
+	.dimension	= IPSET_DIM_ONE,
+	.family		= AF_UNSPEC,
+	.revision	= 0,
+	.create		= hash_net_create,
+	.create_policy	= {
+		[IPSET_ATTR_HASHSIZE]	= { .type = NLA_U32 },
+		[IPSET_ATTR_MAXELEM]	= { .type = NLA_U32 },
+		[IPSET_ATTR_PROBES]	= { .type = NLA_U8 },
+		[IPSET_ATTR_RESIZE]	= { .type = NLA_U8  },
+		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
+	},
+	.adt_policy	= {
+		[IPSET_ATTR_IP]		= { .type = NLA_NESTED },
+		[IPSET_ATTR_CIDR]	= { .type = NLA_U8 },
+		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
+	},
+	.me		= THIS_MODULE,
+};
+
+static int __init
+hash_net_init(void)
+{
+	return ip_set_type_register(&hash_net_type);
+}
+
+static void __exit
+hash_net_fini(void)
+{
+	ip_set_type_unregister(&hash_net_type);
+}
+
+module_init(hash_net_init);
+module_exit(hash_net_fini);
diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c
new file mode 100644
index 0000000..34a1656
--- /dev/null
+++ b/net/netfilter/ipset/ip_set_hash_netport.c
@@ -0,0 +1,578 @@
+/* Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/* Kernel module implementing an IP set type: the hash:net,port type */
+
+#include <linux/jhash.h>
+#include <linux/module.h>
+#include <linux/ip.h>
+#include <linux/skbuff.h>
+#include <linux/errno.h>
+#include <linux/random.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/netlink.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter/ipset/pfxlen.h>
+#include <linux/netfilter/ipset/ip_set.h>
+#include <linux/netfilter/ipset/ip_set_timeout.h>
+#include <linux/netfilter/ipset/ip_set_getport.h>
+#include <linux/netfilter/ipset/ip_set_hash.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
+MODULE_DESCRIPTION("hash:net,port type of IP sets");
+MODULE_ALIAS("ip_set_hash:net,port");
+
+/* Type specific function prefix */
+#define TYPE		hash_netport
+
+static bool
+hash_netport_same_set(const struct ip_set *a, const struct ip_set *b);
+
+#define hash_netport4_same_set	hash_netport_same_set
+#define hash_netport6_same_set	hash_netport_same_set
+
+/* The type variant functions: IPv4 */
+
+/* Member elements without timeout */
+struct hash_netport4_elem {
+	__be32 ip;
+	__be16 port;
+	u8 proto;
+	u8 cidr;
+};
+
+/* Member elements with timeout support */
+struct hash_netport4_telem {
+	__be32 ip;
+	__be16 port;
+	u8 proto;
+	u8 cidr;
+	unsigned long timeout;
+};
+
+static inline bool
+hash_netport4_data_equal(const struct hash_netport4_elem *ip1,
+			 const struct hash_netport4_elem *ip2)
+{
+	return ip1->ip == ip2->ip &&
+	       ip1->port == ip2->port &&
+	       ip1->proto == ip2->proto &&
+	       ip1->cidr == ip2->cidr;
+}
+
+static inline bool
+hash_netport4_data_isnull(const struct hash_netport4_elem *elem)
+{
+	return elem->proto == 0;
+}
+
+static inline void
+hash_netport4_data_copy(struct hash_netport4_elem *dst,
+			const struct hash_netport4_elem *src)
+{
+	dst->ip = src->ip;
+	dst->port = src->port;
+	dst->proto = src->proto;
+	dst->cidr = src->cidr;
+}
+
+static inline void
+hash_netport4_data_netmask(struct hash_netport4_elem *elem, u8 cidr)
+{
+	elem->ip &= ip_set_netmask(cidr);
+	elem->cidr = cidr;
+}
+
+static inline void
+hash_netport4_data_zero_out(struct hash_netport4_elem *elem)
+{
+	elem->proto = 0;
+}
+
+static bool
+hash_netport4_data_list(struct sk_buff *skb,
+			const struct hash_netport4_elem *data)
+{
+	NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, data->ip);
+	NLA_PUT_NET16(skb, IPSET_ATTR_PORT, data->port);
+	NLA_PUT_U8(skb, IPSET_ATTR_CIDR, data->cidr);
+	NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto);
+	return 0;
+
+nla_put_failure:
+	return 1;
+}
+
+static bool
+hash_netport4_data_tlist(struct sk_buff *skb,
+			 const struct hash_netport4_elem *data)
+{
+	const struct hash_netport4_telem *tdata =
+		(const struct hash_netport4_telem *)data;
+
+	NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, tdata->ip);
+	NLA_PUT_NET16(skb, IPSET_ATTR_PORT, tdata->port);
+	NLA_PUT_U8(skb, IPSET_ATTR_CIDR, data->cidr);
+	NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto);
+	NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT,
+		      htonl(ip_set_timeout_get(tdata->timeout)));
+
+	return 0;
+
+nla_put_failure:
+	return 1;
+}
+
+#define IP_SET_HASH_WITH_PROTO
+#define IP_SET_HASH_WITH_NETS
+
+#define PF		4
+#define HOST_MASK	32
+#include <linux/netfilter/ipset/ip_set_ahash.h>
+
+static int
+hash_netport4_kadt(struct ip_set *set, const struct sk_buff *skb,
+		   enum ipset_adt adt, u8 pf, u8 dim, u8 flags)
+{
+	const struct ip_set_hash *h = set->data;
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct hash_netport4_elem data = {
+		.cidr = h->nets[0].cidr || HOST_MASK };
+
+	if (data.cidr == 0)
+		return -EINVAL;
+	if (adt == IPSET_TEST)
+		data.cidr = HOST_MASK;
+
+	if (!ip_set_get_ip4_port(skb, flags & IPSET_DIM_TWO_SRC,
+				 &data.port, &data.proto))
+		return -EINVAL;
+
+	ip4addrptr(skb, flags & IPSET_DIM_ONE_SRC, &data.ip);
+	data.ip &= ip_set_netmask(data.cidr);
+
+	return adtfn(set, &data, h->timeout);
+}
+
+static int
+hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
+		   enum ipset_adt adt, u32 *lineno, u32 flags)
+{
+	const struct ip_set_hash *h = set->data;
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct hash_netport4_elem data = { .cidr = HOST_MASK };
+	u32 port, port_to;
+	u32 timeout = h->timeout;
+	int ret;
+
+	if (unlikely(!tb[IPSET_ATTR_IP] ||
+		     !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
+		return -IPSET_ERR_PROTOCOL;
+
+	if (tb[IPSET_ATTR_LINENO])
+		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
+	ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &data.ip);
+	if (ret)
+		return ret;
+
+	if (tb[IPSET_ATTR_CIDR])
+		data.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
+	if (!data.cidr)
+		return -IPSET_ERR_INVALID_CIDR;
+	data.ip &= ip_set_netmask(data.cidr);
+
+	if (tb[IPSET_ATTR_PORT])
+		data.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
+	else
+		return -IPSET_ERR_PROTOCOL;
+
+	if (tb[IPSET_ATTR_PROTO]) {
+		data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
+
+		if (data.proto == 0)
+			return -IPSET_ERR_INVALID_PROTO;
+	} else
+		return -IPSET_ERR_MISSING_PROTO;
+
+	switch (data.proto) {
+	case IPPROTO_UDP:
+	case IPPROTO_TCP:
+	case IPPROTO_ICMP:
+		break;
+	default:
+		data.port = 0;
+		break;
+	}
+
+	if (tb[IPSET_ATTR_TIMEOUT]) {
+		if (!with_timeout(h->timeout))
+			return -IPSET_ERR_TIMEOUT;
+		timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
+	}
+
+	if (adt == IPSET_TEST ||
+	    !(data.proto == IPPROTO_TCP || data.proto == IPPROTO_UDP) ||
+	    !tb[IPSET_ATTR_PORT_TO]) {
+		ret = adtfn(set, &data, timeout);
+		return ip_set_eexist(ret, flags) ? 0 : ret;
+	}
+
+	port = ntohs(data.port);
+	port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
+	if (port > port_to)
+		swap(port, port_to);
+
+	for (; port <= port_to; port++) {
+		data.port = htons(port);
+		ret = adtfn(set, &data, timeout);
+
+		if (ret && !ip_set_eexist(ret, flags))
+			return ret;
+		else
+			ret = 0;
+	}
+	return ret;
+}
+
+static bool
+hash_netport_same_set(const struct ip_set *a, const struct ip_set *b)
+{
+	const struct ip_set_hash *x = a->data;
+	const struct ip_set_hash *y = b->data;
+
+	/* Resizing changes htable_bits, so we ignore it */
+	return x->maxelem == y->maxelem &&
+	       x->timeout == y->timeout;
+}
+
+/* The type variant functions: IPv6 */
+
+struct hash_netport6_elem {
+	union nf_inet_addr ip;
+	__be16 port;
+	u8 proto;
+	u8 cidr;
+};
+
+struct hash_netport6_telem {
+	union nf_inet_addr ip;
+	__be16 port;
+	u8 proto;
+	u8 cidr;
+	unsigned long timeout;
+};
+
+static inline bool
+hash_netport6_data_equal(const struct hash_netport6_elem *ip1,
+			 const struct hash_netport6_elem *ip2)
+{
+	return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0 &&
+	       ip1->port == ip2->port &&
+	       ip1->proto == ip2->proto &&
+	       ip1->cidr == ip2->cidr;
+}
+
+static inline bool
+hash_netport6_data_isnull(const struct hash_netport6_elem *elem)
+{
+	return elem->proto == 0;
+}
+
+static inline void
+hash_netport6_data_copy(struct hash_netport6_elem *dst,
+			const struct hash_netport6_elem *src)
+{
+	memcpy(dst, src, sizeof(*dst));
+}
+
+static inline void
+hash_netport6_data_zero_out(struct hash_netport6_elem *elem)
+{
+	elem->proto = 0;
+}
+
+static inline void
+ip6_netmask(union nf_inet_addr *ip, u8 prefix)
+{
+	ip->ip6[0] &= ip_set_netmask6(prefix)[0];
+	ip->ip6[1] &= ip_set_netmask6(prefix)[1];
+	ip->ip6[2] &= ip_set_netmask6(prefix)[2];
+	ip->ip6[3] &= ip_set_netmask6(prefix)[3];
+}
+
+static inline void
+hash_netport6_data_netmask(struct hash_netport6_elem *elem, u8 cidr)
+{
+	ip6_netmask(&elem->ip, cidr);
+	elem->cidr = cidr;
+}
+
+static bool
+hash_netport6_data_list(struct sk_buff *skb,
+			const struct hash_netport6_elem *data)
+{
+	NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP, &data->ip);
+	NLA_PUT_NET16(skb, IPSET_ATTR_PORT, data->port);
+	NLA_PUT_U8(skb, IPSET_ATTR_CIDR, data->cidr);
+	NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto);
+	return 0;
+
+nla_put_failure:
+	return 1;
+}
+
+static bool
+hash_netport6_data_tlist(struct sk_buff *skb,
+			 const struct hash_netport6_elem *data)
+{
+	const struct hash_netport6_telem *e =
+		(const struct hash_netport6_telem *)data;
+
+	NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP, &e->ip);
+	NLA_PUT_NET16(skb, IPSET_ATTR_PORT, data->port);
+	NLA_PUT_U8(skb, IPSET_ATTR_CIDR, data->cidr);
+	NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto);
+	NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT,
+		      htonl(ip_set_timeout_get(e->timeout)));
+	return 0;
+
+nla_put_failure:
+	return 1;
+}
+
+#undef PF
+#undef HOST_MASK
+
+#define PF		6
+#define HOST_MASK	128
+#include <linux/netfilter/ipset/ip_set_ahash.h>
+
+static int
+hash_netport6_kadt(struct ip_set *set, const struct sk_buff *skb,
+		   enum ipset_adt adt, u8 pf, u8 dim, u8 flags)
+{
+	const struct ip_set_hash *h = set->data;
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct hash_netport6_elem data = {
+		.cidr = h->nets[0].cidr || HOST_MASK };
+
+	if (data.cidr == 0)
+		return -EINVAL;
+	if (adt == IPSET_TEST)
+		data.cidr = HOST_MASK;
+
+	if (!ip_set_get_ip6_port(skb, flags & IPSET_DIM_TWO_SRC,
+				 &data.port, &data.proto))
+		return -EINVAL;
+
+	ip6addrptr(skb, flags & IPSET_DIM_ONE_SRC, &data.ip.in6);
+	ip6_netmask(&data.ip, data.cidr);
+
+	return adtfn(set, &data, h->timeout);
+}
+
+static int
+hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[],
+		   enum ipset_adt adt, u32 *lineno, u32 flags)
+{
+	const struct ip_set_hash *h = set->data;
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct hash_netport6_elem data = { .cidr = HOST_MASK };
+	u32 port, port_to;
+	u32 timeout = h->timeout;
+	int ret;
+
+	if (unlikely(!tb[IPSET_ATTR_IP] ||
+		     !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
+		return -IPSET_ERR_PROTOCOL;
+
+	if (tb[IPSET_ATTR_LINENO])
+		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
+	ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &data.ip);
+	if (ret)
+		return ret;
+
+	if (tb[IPSET_ATTR_CIDR])
+		data.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
+	if (!data.cidr)
+		return -IPSET_ERR_INVALID_CIDR;
+	ip6_netmask(&data.ip, data.cidr);
+
+	if (tb[IPSET_ATTR_PORT])
+		data.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
+	else
+		return -IPSET_ERR_PROTOCOL;
+
+	if (tb[IPSET_ATTR_PROTO]) {
+		data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
+
+		if (data.proto == 0)
+			return -IPSET_ERR_INVALID_PROTO;
+	} else
+		return -IPSET_ERR_MISSING_PROTO;
+
+	switch (data.proto) {
+	case IPPROTO_UDP:
+	case IPPROTO_TCP:
+	case IPPROTO_ICMPV6:
+		break;
+	default:
+		data.port = 0;
+		break;
+	}
+
+	if (tb[IPSET_ATTR_TIMEOUT]) {
+		if (!with_timeout(h->timeout))
+			return -IPSET_ERR_TIMEOUT;
+		timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
+	}
+
+	if (adt == IPSET_TEST ||
+	    !(data.proto == IPPROTO_TCP || data.proto == IPPROTO_UDP) ||
+	    !tb[IPSET_ATTR_PORT_TO]) {
+		ret = adtfn(set, &data, timeout);
+		return ip_set_eexist(ret, flags) ? 0 : ret;
+	}
+
+	port = ntohs(data.port);
+	port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
+	if (port > port_to)
+		swap(port, port_to);
+
+	for (; port <= port_to; port++) {
+		data.port = htons(port);
+		ret = adtfn(set, &data, timeout);
+
+		if (ret && !ip_set_eexist(ret, flags))
+			return ret;
+		else
+			ret = 0;
+	}
+	return ret;
+}
+
+/* Create hash:ip type of sets */
+
+static int
+hash_netport_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
+{
+	struct ip_set_hash *h;
+	u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM;
+	u8 hbits;
+
+	if (!(set->family == AF_INET || set->family == AF_INET6))
+		return -IPSET_ERR_INVALID_FAMILY;
+
+	if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
+		return -IPSET_ERR_PROTOCOL;
+
+	if (tb[IPSET_ATTR_HASHSIZE]) {
+		hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]);
+		if (hashsize < IPSET_MIMINAL_HASHSIZE)
+			hashsize = IPSET_MIMINAL_HASHSIZE;
+	}
+
+	if (tb[IPSET_ATTR_MAXELEM])
+		maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]);
+
+	h = kzalloc(sizeof(*h)
+		    + sizeof(struct ip_set_hash_nets)
+		      * (set->family == AF_INET ? 32 : 128), GFP_KERNEL);
+	if (!h)
+		return -ENOMEM;
+
+	h->maxelem = maxelem;
+	get_random_bytes(&h->initval, sizeof(h->initval));
+	h->timeout = IPSET_NO_TIMEOUT;
+
+	hbits = htable_bits(hashsize);
+	h->table = ip_set_alloc(
+			sizeof(struct htable)
+			+ jhash_size(hbits) * sizeof(struct hbucket));
+	if (!h->table) {
+		kfree(h);
+		return -ENOMEM;
+	}
+	h->table->htable_bits = hbits;
+
+	set->data = h;
+
+	if (tb[IPSET_ATTR_TIMEOUT]) {
+		h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
+
+		set->variant = set->family == AF_INET
+			? &hash_netport4_tvariant : &hash_netport6_tvariant;
+
+		if (set->family == AF_INET)
+			hash_netport4_gc_init(set);
+		else
+			hash_netport6_gc_init(set);
+	} else {
+		set->variant = set->family == AF_INET
+			? &hash_netport4_variant : &hash_netport6_variant;
+	}
+
+	pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n",
+		 set->name, jhash_size(h->table->htable_bits),
+		 h->table->htable_bits, h->maxelem, set->data, h->table);
+
+	return 0;
+}
+
+static struct ip_set_type hash_netport_type __read_mostly = {
+	.name		= "hash:net,port",
+	.protocol	= IPSET_PROTOCOL,
+	.features	= IPSET_TYPE_IP | IPSET_TYPE_PORT,
+	.dimension	= IPSET_DIM_TWO,
+	.family		= AF_UNSPEC,
+	.revision	= 0,
+	.create		= hash_netport_create,
+	.create_policy	= {
+		[IPSET_ATTR_HASHSIZE]	= { .type = NLA_U32 },
+		[IPSET_ATTR_MAXELEM]	= { .type = NLA_U32 },
+		[IPSET_ATTR_PROBES]	= { .type = NLA_U8 },
+		[IPSET_ATTR_RESIZE]	= { .type = NLA_U8  },
+		[IPSET_ATTR_PROTO]	= { .type = NLA_U8 },
+		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
+	},
+	.adt_policy	= {
+		[IPSET_ATTR_IP]		= { .type = NLA_NESTED },
+		[IPSET_ATTR_PORT]	= { .type = NLA_U16 },
+		[IPSET_ATTR_PORT_TO]	= { .type = NLA_U16 },
+		[IPSET_ATTR_PROTO]	= { .type = NLA_U8 },
+		[IPSET_ATTR_CIDR]	= { .type = NLA_U8 },
+		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
+		[IPSET_ATTR_LINENO]	= { .type = NLA_U32 },
+	},
+	.me		= THIS_MODULE,
+};
+
+static int __init
+hash_netport_init(void)
+{
+	return ip_set_type_register(&hash_netport_type);
+}
+
+static void __exit
+hash_netport_fini(void)
+{
+	ip_set_type_unregister(&hash_netport_type);
+}
+
+module_init(hash_netport_init);
+module_exit(hash_netport_fini);
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
new file mode 100644
index 0000000..a47c329
--- /dev/null
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -0,0 +1,584 @@
+/* Copyright (C) 2008-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/* Kernel module implementing an IP set type: the list:set type */
+
+#include <linux/module.h>
+#include <linux/ip.h>
+#include <linux/skbuff.h>
+#include <linux/errno.h>
+
+#include <linux/netfilter/ipset/ip_set.h>
+#include <linux/netfilter/ipset/ip_set_timeout.h>
+#include <linux/netfilter/ipset/ip_set_list.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
+MODULE_DESCRIPTION("list:set type of IP sets");
+MODULE_ALIAS("ip_set_list:set");
+
+/* Member elements without and with timeout */
+struct set_elem {
+	ip_set_id_t id;
+};
+
+struct set_telem {
+	ip_set_id_t id;
+	unsigned long timeout;
+};
+
+/* Type structure */
+struct list_set {
+	size_t dsize;		/* element size */
+	u32 size;		/* size of set list array */
+	u32 timeout;		/* timeout value */
+	struct timer_list gc;	/* garbage collection */
+	struct set_elem members[0]; /* the set members */
+};
+
+static inline struct set_elem *
+list_set_elem(const struct list_set *map, u32 id)
+{
+	return (struct set_elem *)((char *)map->members + id * map->dsize);
+}
+
+static inline bool
+list_set_timeout(const struct list_set *map, u32 id)
+{
+	const struct set_telem *elem =
+		(const struct set_telem *) list_set_elem(map, id);
+
+	return ip_set_timeout_test(elem->timeout);
+}
+
+static inline bool
+list_set_expired(const struct list_set *map, u32 id)
+{
+	const struct set_telem *elem =
+		(const struct set_telem *) list_set_elem(map, id);
+
+	return ip_set_timeout_expired(elem->timeout);
+}
+
+static inline int
+list_set_exist(const struct set_telem *elem)
+{
+	return elem->id != IPSET_INVALID_ID &&
+	       !ip_set_timeout_expired(elem->timeout);
+}
+
+/* Set list without and with timeout */
+
+static int
+list_set_kadt(struct ip_set *set, const struct sk_buff *skb,
+	      enum ipset_adt adt, u8 pf, u8 dim, u8 flags)
+{
+	struct list_set *map = set->data;
+	struct set_elem *elem;
+	u32 i;
+	int ret;
+
+	for (i = 0; i < map->size; i++) {
+		elem = list_set_elem(map, i);
+		if (elem->id == IPSET_INVALID_ID)
+			return 0;
+		if (with_timeout(map->timeout) && list_set_expired(map, i))
+			continue;
+		switch (adt) {
+		case IPSET_TEST:
+			ret = ip_set_test(elem->id, skb, pf, dim, flags);
+			if (ret > 0)
+				return ret;
+			break;
+		case IPSET_ADD:
+			ret = ip_set_add(elem->id, skb, pf, dim, flags);
+			if (ret == 0)
+				return ret;
+			break;
+		case IPSET_DEL:
+			ret = ip_set_del(elem->id, skb, pf, dim, flags);
+			if (ret == 0)
+				return ret;
+			break;
+		default:
+			break;
+		}
+	}
+	return -EINVAL;
+}
+
+static bool
+next_id_eq(const struct list_set *map, u32 i, ip_set_id_t id)
+{
+	const struct set_elem *elem;
+
+	if (i + 1 < map->size) {
+		elem = list_set_elem(map, i + 1);
+		return !!(elem->id == id &&
+			  !(with_timeout(map->timeout) &&
+			    list_set_expired(map, i + 1)));
+	}
+
+	return 0;
+}
+
+static void
+list_elem_add(struct list_set *map, u32 i, ip_set_id_t id)
+{
+	struct set_elem *e;
+
+	for (; i < map->size; i++) {
+		e = list_set_elem(map, i);
+		swap(e->id, id);
+		if (e->id == IPSET_INVALID_ID)
+			break;
+	}
+}
+
+static void
+list_elem_tadd(struct list_set *map, u32 i, ip_set_id_t id,
+	       unsigned long timeout)
+{
+	struct set_telem *e;
+
+	for (; i < map->size; i++) {
+		e = (struct set_telem *)list_set_elem(map, i);
+		swap(e->id, id);
+		if (e->id == IPSET_INVALID_ID)
+			break;
+		swap(e->timeout, timeout);
+	}
+}
+
+static int
+list_set_add(struct list_set *map, u32 i, ip_set_id_t id,
+	     unsigned long timeout)
+{
+	const struct set_elem *e = list_set_elem(map, i);
+
+	if (i == map->size - 1 && e->id != IPSET_INVALID_ID)
+		/* Last element replaced: e.g. add new,before,last */
+		ip_set_put_byindex(e->id);
+	if (with_timeout(map->timeout))
+		list_elem_tadd(map, i, id, timeout);
+	else
+		list_elem_add(map, i, id);
+
+	return 0;
+}
+
+static int
+list_set_del(struct list_set *map, ip_set_id_t id, u32 i)
+{
+	struct set_elem *a = list_set_elem(map, i), *b;
+
+	ip_set_put_byindex(id);
+
+	for (; i < map->size - 1; i++) {
+		b = list_set_elem(map, i + 1);
+		a->id = b->id;
+		if (with_timeout(map->timeout))
+			((struct set_telem *)a)->timeout =
+				((struct set_telem *)b)->timeout;
+		a = b;
+		if (a->id == IPSET_INVALID_ID)
+			break;
+	}
+	/* Last element */
+	a->id = IPSET_INVALID_ID;
+	return 0;
+}
+
+static int
+list_set_uadt(struct ip_set *set, struct nlattr *tb[],
+	      enum ipset_adt adt, u32 *lineno, u32 flags)
+{
+	struct list_set *map = set->data;
+	bool with_timeout = with_timeout(map->timeout);
+	int before = 0;
+	u32 timeout = map->timeout;
+	ip_set_id_t id, refid = IPSET_INVALID_ID;
+	const struct set_elem *elem;
+	struct ip_set *s;
+	u32 i;
+	int ret = 0;
+
+	if (unlikely(!tb[IPSET_ATTR_NAME] ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
+		return -IPSET_ERR_PROTOCOL;
+
+	if (tb[IPSET_ATTR_LINENO])
+		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
+	id = ip_set_get_byname(nla_data(tb[IPSET_ATTR_NAME]), &s);
+	if (id == IPSET_INVALID_ID)
+		return -IPSET_ERR_NAME;
+	/* "Loop detection" */
+	if (s->type->features & IPSET_TYPE_NAME) {
+		ret = -IPSET_ERR_LOOP;
+		goto finish;
+	}
+
+	if (tb[IPSET_ATTR_CADT_FLAGS]) {
+		u32 f = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+		before = f & IPSET_FLAG_BEFORE;
+	}
+
+	if (before && !tb[IPSET_ATTR_NAMEREF]) {
+		ret = -IPSET_ERR_BEFORE;
+		goto finish;
+	}
+
+	if (tb[IPSET_ATTR_NAMEREF]) {
+		refid = ip_set_get_byname(nla_data(tb[IPSET_ATTR_NAMEREF]),
+					  &s);
+		if (refid == IPSET_INVALID_ID) {
+			ret = -IPSET_ERR_NAMEREF;
+			goto finish;
+		}
+		if (!before)
+			before = -1;
+	}
+	if (tb[IPSET_ATTR_TIMEOUT]) {
+		if (!with_timeout) {
+			ret = -IPSET_ERR_TIMEOUT;
+			goto finish;
+		}
+		timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
+	}
+
+	switch (adt) {
+	case IPSET_TEST:
+		for (i = 0; i < map->size && !ret; i++) {
+			elem = list_set_elem(map, i);
+			if (elem->id == IPSET_INVALID_ID ||
+			    (before != 0 && i + 1 >= map->size))
+				break;
+			else if (with_timeout && list_set_expired(map, i))
+				continue;
+			else if (before > 0 && elem->id == id)
+				ret = next_id_eq(map, i, refid);
+			else if (before < 0 && elem->id == refid)
+				ret = next_id_eq(map, i, id);
+			else if (before == 0 && elem->id == id)
+				ret = 1;
+		}
+		break;
+	case IPSET_ADD:
+		for (i = 0; i < map->size && !ret; i++) {
+			elem = list_set_elem(map, i);
+			if (elem->id == id &&
+			    !(with_timeout && list_set_expired(map, i)))
+				ret = -IPSET_ERR_EXIST;
+		}
+		if (ret == -IPSET_ERR_EXIST)
+			break;
+		ret = -IPSET_ERR_LIST_FULL;
+		for (i = 0; i < map->size && ret == -IPSET_ERR_LIST_FULL; i++) {
+			elem = list_set_elem(map, i);
+			if (elem->id == IPSET_INVALID_ID)
+				ret = before != 0 ? -IPSET_ERR_REF_EXIST
+					: list_set_add(map, i, id, timeout);
+			else if (elem->id != refid)
+				continue;
+			else if (with_timeout && list_set_expired(map, i))
+				ret = -IPSET_ERR_REF_EXIST;
+			else if (before)
+				ret = list_set_add(map, i, id, timeout);
+			else if (i + 1 < map->size)
+				ret = list_set_add(map, i + 1, id, timeout);
+		}
+		break;
+	case IPSET_DEL:
+		ret = -IPSET_ERR_EXIST;
+		for (i = 0; i < map->size && ret == -IPSET_ERR_EXIST; i++) {
+			elem = list_set_elem(map, i);
+			if (elem->id == IPSET_INVALID_ID) {
+				ret = before != 0 ? -IPSET_ERR_REF_EXIST
+						  : -IPSET_ERR_EXIST;
+				break;
+			} else if (with_timeout && list_set_expired(map, i))
+				continue;
+			else if (elem->id == id &&
+				 (before == 0 ||
+				  (before > 0 &&
+				   next_id_eq(map, i, refid))))
+				ret = list_set_del(map, id, i);
+			else if (before < 0 &&
+				 elem->id == refid &&
+				 next_id_eq(map, i, id))
+				ret = list_set_del(map, id, i + 1);
+		}
+		break;
+	default:
+		break;
+	}
+
+finish:
+	if (refid != IPSET_INVALID_ID)
+		ip_set_put_byindex(refid);
+	if (adt != IPSET_ADD || ret)
+		ip_set_put_byindex(id);
+
+	return ip_set_eexist(ret, flags) ? 0 : ret;
+}
+
+static void
+list_set_flush(struct ip_set *set)
+{
+	struct list_set *map = set->data;
+	struct set_elem *elem;
+	u32 i;
+
+	for (i = 0; i < map->size; i++) {
+		elem = list_set_elem(map, i);
+		if (elem->id != IPSET_INVALID_ID) {
+			ip_set_put_byindex(elem->id);
+			elem->id = IPSET_INVALID_ID;
+		}
+	}
+}
+
+static void
+list_set_destroy(struct ip_set *set)
+{
+	struct list_set *map = set->data;
+
+	if (with_timeout(map->timeout))
+		del_timer_sync(&map->gc);
+	list_set_flush(set);
+	kfree(map);
+
+	set->data = NULL;
+}
+
+static int
+list_set_head(struct ip_set *set, struct sk_buff *skb)
+{
+	const struct list_set *map = set->data;
+	struct nlattr *nested;
+
+	nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
+	if (!nested)
+		goto nla_put_failure;
+	NLA_PUT_NET32(skb, IPSET_ATTR_SIZE, htonl(map->size));
+	if (with_timeout(map->timeout))
+		NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, htonl(map->timeout));
+	NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES,
+		      htonl(atomic_read(&set->ref) - 1));
+	NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE,
+		      htonl(sizeof(*map) + map->size * map->dsize));
+	ipset_nest_end(skb, nested);
+
+	return 0;
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
+static int
+list_set_list(const struct ip_set *set,
+	      struct sk_buff *skb, struct netlink_callback *cb)
+{
+	const struct list_set *map = set->data;
+	struct nlattr *atd, *nested;
+	u32 i, first = cb->args[2];
+	const struct set_elem *e;
+
+	atd = ipset_nest_start(skb, IPSET_ATTR_ADT);
+	if (!atd)
+		return -EMSGSIZE;
+	for (; cb->args[2] < map->size; cb->args[2]++) {
+		i = cb->args[2];
+		e = list_set_elem(map, i);
+		if (e->id == IPSET_INVALID_ID)
+			goto finish;
+		if (with_timeout(map->timeout) && list_set_expired(map, i))
+			continue;
+		nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
+		if (!nested) {
+			if (i == first) {
+				nla_nest_cancel(skb, atd);
+				return -EMSGSIZE;
+			} else
+				goto nla_put_failure;
+		}
+		NLA_PUT_STRING(skb, IPSET_ATTR_NAME,
+			       ip_set_name_byindex(e->id));
+		if (with_timeout(map->timeout)) {
+			const struct set_telem *te =
+				(const struct set_telem *) e;
+			NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT,
+				      htonl(ip_set_timeout_get(te->timeout)));
+		}
+		ipset_nest_end(skb, nested);
+	}
+finish:
+	ipset_nest_end(skb, atd);
+	/* Set listing finished */
+	cb->args[2] = 0;
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(skb, nested);
+	ipset_nest_end(skb, atd);
+	if (unlikely(i == first)) {
+		cb->args[2] = 0;
+		return -EMSGSIZE;
+	}
+	return 0;
+}
+
+static bool
+list_set_same_set(const struct ip_set *a, const struct ip_set *b)
+{
+	const struct list_set *x = a->data;
+	const struct list_set *y = b->data;
+
+	return x->size == y->size &&
+	       x->timeout == y->timeout;
+}
+
+static const struct ip_set_type_variant list_set = {
+	.kadt	= list_set_kadt,
+	.uadt	= list_set_uadt,
+	.destroy = list_set_destroy,
+	.flush	= list_set_flush,
+	.head	= list_set_head,
+	.list	= list_set_list,
+	.same_set = list_set_same_set,
+};
+
+static void
+list_set_gc(unsigned long ul_set)
+{
+	struct ip_set *set = (struct ip_set *) ul_set;
+	struct list_set *map = set->data;
+	struct set_telem *e;
+	u32 i;
+
+	/* We run parallel with other readers (test element)
+	 * but adding/deleting new entries is locked out */
+	read_lock_bh(&set->lock);
+	for (i = map->size - 1; i >= 0; i--) {
+		e = (struct set_telem *) list_set_elem(map, i);
+		if (e->id != IPSET_INVALID_ID &&
+		    list_set_expired(map, i))
+			list_set_del(map, e->id, i);
+	}
+	read_unlock_bh(&set->lock);
+
+	map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ;
+	add_timer(&map->gc);
+}
+
+static void
+list_set_gc_init(struct ip_set *set)
+{
+	struct list_set *map = set->data;
+
+	init_timer(&map->gc);
+	map->gc.data = (unsigned long) set;
+	map->gc.function = list_set_gc;
+	map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ;
+	add_timer(&map->gc);
+}
+
+/* Create list:set type of sets */
+
+static bool
+init_list_set(struct ip_set *set, u32 size, size_t dsize,
+	      unsigned long timeout)
+{
+	struct list_set *map;
+	struct set_elem *e;
+	u32 i;
+
+	map = kzalloc(sizeof(*map) + size * dsize, GFP_KERNEL);
+	if (!map)
+		return false;
+
+	map->size = size;
+	map->dsize = dsize;
+	map->timeout = timeout;
+	set->data = map;
+
+	for (i = 0; i < size; i++) {
+		e = list_set_elem(map, i);
+		e->id = IPSET_INVALID_ID;
+	}
+
+	return true;
+}
+
+static int
+list_set_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
+{
+	u32 size = IP_SET_LIST_DEFAULT_SIZE;
+
+	if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_SIZE) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
+		return -IPSET_ERR_PROTOCOL;
+
+	if (tb[IPSET_ATTR_SIZE])
+		size = ip_set_get_h32(tb[IPSET_ATTR_SIZE]);
+	if (size < IP_SET_LIST_MIN_SIZE)
+		size = IP_SET_LIST_MIN_SIZE;
+
+	if (tb[IPSET_ATTR_TIMEOUT]) {
+		if (!init_list_set(set, size, sizeof(struct set_telem),
+				   ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT])))
+			return -ENOMEM;
+
+		list_set_gc_init(set);
+	} else {
+		if (!init_list_set(set, size, sizeof(struct set_elem),
+				   IPSET_NO_TIMEOUT))
+			return -ENOMEM;
+	}
+	set->variant = &list_set;
+	return 0;
+}
+
+static struct ip_set_type list_set_type __read_mostly = {
+	.name		= "list:set",
+	.protocol	= IPSET_PROTOCOL,
+	.features	= IPSET_TYPE_NAME | IPSET_DUMP_LAST,
+	.dimension	= IPSET_DIM_ONE,
+	.family		= AF_UNSPEC,
+	.revision	= 0,
+	.create		= list_set_create,
+	.create_policy	= {
+		[IPSET_ATTR_SIZE]	= { .type = NLA_U32 },
+		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
+	},
+	.adt_policy	= {
+		[IPSET_ATTR_NAME]	= { .type = NLA_STRING,
+					    .len = IPSET_MAXNAMELEN },
+		[IPSET_ATTR_NAMEREF]	= { .type = NLA_STRING,
+					    .len = IPSET_MAXNAMELEN },
+		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
+		[IPSET_ATTR_LINENO]	= { .type = NLA_U32 },
+		[IPSET_ATTR_CADT_FLAGS]	= { .type = NLA_U32 },
+	},
+	.me		= THIS_MODULE,
+};
+
+static int __init
+list_set_init(void)
+{
+	return ip_set_type_register(&list_set_type);
+}
+
+static void __exit
+list_set_fini(void)
+{
+	ip_set_type_unregister(&list_set_type);
+}
+
+module_init(list_set_init);
+module_exit(list_set_fini);
diff --git a/net/netfilter/ipset/pfxlen.c b/net/netfilter/ipset/pfxlen.c
new file mode 100644
index 0000000..23f8c81
--- /dev/null
+++ b/net/netfilter/ipset/pfxlen.c
@@ -0,0 +1,291 @@
+#include <linux/netfilter/ipset/pfxlen.h>
+
+/*
+ * Prefixlen maps for fast conversions, by Jan Engelhardt.
+ */
+
+#define E(a, b, c, d) \
+	{.ip6 = { \
+		__constant_htonl(a), __constant_htonl(b), \
+		__constant_htonl(c), __constant_htonl(d), \
+	} }
+
+/*
+ * This table works for both IPv4 and IPv6;
+ * just use prefixlen_netmask_map[prefixlength].ip.
+ */
+const union nf_inet_addr ip_set_netmask_map[] = {
+	E(0x00000000, 0x00000000, 0x00000000, 0x00000000),
+	E(0x80000000, 0x00000000, 0x00000000, 0x00000000),
+	E(0xC0000000, 0x00000000, 0x00000000, 0x00000000),
+	E(0xE0000000, 0x00000000, 0x00000000, 0x00000000),
+	E(0xF0000000, 0x00000000, 0x00000000, 0x00000000),
+	E(0xF8000000, 0x00000000, 0x00000000, 0x00000000),
+	E(0xFC000000, 0x00000000, 0x00000000, 0x00000000),
+	E(0xFE000000, 0x00000000, 0x00000000, 0x00000000),
+	E(0xFF000000, 0x00000000, 0x00000000, 0x00000000),
+	E(0xFF800000, 0x00000000, 0x00000000, 0x00000000),
+	E(0xFFC00000, 0x00000000, 0x00000000, 0x00000000),
+	E(0xFFE00000, 0x00000000, 0x00000000, 0x00000000),
+	E(0xFFF00000, 0x00000000, 0x00000000, 0x00000000),
+	E(0xFFF80000, 0x00000000, 0x00000000, 0x00000000),
+	E(0xFFFC0000, 0x00000000, 0x00000000, 0x00000000),
+	E(0xFFFE0000, 0x00000000, 0x00000000, 0x00000000),
+	E(0xFFFF0000, 0x00000000, 0x00000000, 0x00000000),
+	E(0xFFFF8000, 0x00000000, 0x00000000, 0x00000000),
+	E(0xFFFFC000, 0x00000000, 0x00000000, 0x00000000),
+	E(0xFFFFE000, 0x00000000, 0x00000000, 0x00000000),
+	E(0xFFFFF000, 0x00000000, 0x00000000, 0x00000000),
+	E(0xFFFFF800, 0x00000000, 0x00000000, 0x00000000),
+	E(0xFFFFFC00, 0x00000000, 0x00000000, 0x00000000),
+	E(0xFFFFFE00, 0x00000000, 0x00000000, 0x00000000),
+	E(0xFFFFFF00, 0x00000000, 0x00000000, 0x00000000),
+	E(0xFFFFFF80, 0x00000000, 0x00000000, 0x00000000),
+	E(0xFFFFFFC0, 0x00000000, 0x00000000, 0x00000000),
+	E(0xFFFFFFE0, 0x00000000, 0x00000000, 0x00000000),
+	E(0xFFFFFFF0, 0x00000000, 0x00000000, 0x00000000),
+	E(0xFFFFFFF8, 0x00000000, 0x00000000, 0x00000000),
+	E(0xFFFFFFFC, 0x00000000, 0x00000000, 0x00000000),
+	E(0xFFFFFFFE, 0x00000000, 0x00000000, 0x00000000),
+	E(0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000),
+	E(0xFFFFFFFF, 0x80000000, 0x00000000, 0x00000000),
+	E(0xFFFFFFFF, 0xC0000000, 0x00000000, 0x00000000),
+	E(0xFFFFFFFF, 0xE0000000, 0x00000000, 0x00000000),
+	E(0xFFFFFFFF, 0xF0000000, 0x00000000, 0x00000000),
+	E(0xFFFFFFFF, 0xF8000000, 0x00000000, 0x00000000),
+	E(0xFFFFFFFF, 0xFC000000, 0x00000000, 0x00000000),
+	E(0xFFFFFFFF, 0xFE000000, 0x00000000, 0x00000000),
+	E(0xFFFFFFFF, 0xFF000000, 0x00000000, 0x00000000),
+	E(0xFFFFFFFF, 0xFF800000, 0x00000000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFC00000, 0x00000000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFE00000, 0x00000000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFF00000, 0x00000000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFF80000, 0x00000000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFC0000, 0x00000000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFE0000, 0x00000000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFF0000, 0x00000000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFF8000, 0x00000000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFC000, 0x00000000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFE000, 0x00000000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFF000, 0x00000000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFF800, 0x00000000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFC00, 0x00000000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFE00, 0x00000000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFF00, 0x00000000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFF80, 0x00000000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFFC0, 0x00000000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFFE0, 0x00000000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFFF0, 0x00000000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFFF8, 0x00000000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFFFC, 0x00000000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFFFE, 0x00000000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0x80000000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x80000000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF),
+};
+EXPORT_SYMBOL_GPL(ip_set_netmask_map);
+
+#undef  E
+#define E(a, b, c, d) 						\
+	{.ip6 = { (__force __be32) a, (__force __be32) b,	\
+		  (__force __be32) c, (__force __be32) d,	\
+	} }
+
+/*
+ * This table works for both IPv4 and IPv6;
+ * just use prefixlen_hostmask_map[prefixlength].ip.
+ */
+const union nf_inet_addr ip_set_hostmask_map[] = {
+	E(0x00000000, 0x00000000, 0x00000000, 0x00000000),
+	E(0x80000000, 0x00000000, 0x00000000, 0x00000000),
+	E(0xC0000000, 0x00000000, 0x00000000, 0x00000000),
+	E(0xE0000000, 0x00000000, 0x00000000, 0x00000000),
+	E(0xF0000000, 0x00000000, 0x00000000, 0x00000000),
+	E(0xF8000000, 0x00000000, 0x00000000, 0x00000000),
+	E(0xFC000000, 0x00000000, 0x00000000, 0x00000000),
+	E(0xFE000000, 0x00000000, 0x00000000, 0x00000000),
+	E(0xFF000000, 0x00000000, 0x00000000, 0x00000000),
+	E(0xFF800000, 0x00000000, 0x00000000, 0x00000000),
+	E(0xFFC00000, 0x00000000, 0x00000000, 0x00000000),
+	E(0xFFE00000, 0x00000000, 0x00000000, 0x00000000),
+	E(0xFFF00000, 0x00000000, 0x00000000, 0x00000000),
+	E(0xFFF80000, 0x00000000, 0x00000000, 0x00000000),
+	E(0xFFFC0000, 0x00000000, 0x00000000, 0x00000000),
+	E(0xFFFE0000, 0x00000000, 0x00000000, 0x00000000),
+	E(0xFFFF0000, 0x00000000, 0x00000000, 0x00000000),
+	E(0xFFFF8000, 0x00000000, 0x00000000, 0x00000000),
+	E(0xFFFFC000, 0x00000000, 0x00000000, 0x00000000),
+	E(0xFFFFE000, 0x00000000, 0x00000000, 0x00000000),
+	E(0xFFFFF000, 0x00000000, 0x00000000, 0x00000000),
+	E(0xFFFFF800, 0x00000000, 0x00000000, 0x00000000),
+	E(0xFFFFFC00, 0x00000000, 0x00000000, 0x00000000),
+	E(0xFFFFFE00, 0x00000000, 0x00000000, 0x00000000),
+	E(0xFFFFFF00, 0x00000000, 0x00000000, 0x00000000),
+	E(0xFFFFFF80, 0x00000000, 0x00000000, 0x00000000),
+	E(0xFFFFFFC0, 0x00000000, 0x00000000, 0x00000000),
+	E(0xFFFFFFE0, 0x00000000, 0x00000000, 0x00000000),
+	E(0xFFFFFFF0, 0x00000000, 0x00000000, 0x00000000),
+	E(0xFFFFFFF8, 0x00000000, 0x00000000, 0x00000000),
+	E(0xFFFFFFFC, 0x00000000, 0x00000000, 0x00000000),
+	E(0xFFFFFFFE, 0x00000000, 0x00000000, 0x00000000),
+	E(0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000),
+	E(0xFFFFFFFF, 0x80000000, 0x00000000, 0x00000000),
+	E(0xFFFFFFFF, 0xC0000000, 0x00000000, 0x00000000),
+	E(0xFFFFFFFF, 0xE0000000, 0x00000000, 0x00000000),
+	E(0xFFFFFFFF, 0xF0000000, 0x00000000, 0x00000000),
+	E(0xFFFFFFFF, 0xF8000000, 0x00000000, 0x00000000),
+	E(0xFFFFFFFF, 0xFC000000, 0x00000000, 0x00000000),
+	E(0xFFFFFFFF, 0xFE000000, 0x00000000, 0x00000000),
+	E(0xFFFFFFFF, 0xFF000000, 0x00000000, 0x00000000),
+	E(0xFFFFFFFF, 0xFF800000, 0x00000000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFC00000, 0x00000000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFE00000, 0x00000000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFF00000, 0x00000000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFF80000, 0x00000000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFC0000, 0x00000000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFE0000, 0x00000000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFF0000, 0x00000000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFF8000, 0x00000000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFC000, 0x00000000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFE000, 0x00000000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFF000, 0x00000000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFF800, 0x00000000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFC00, 0x00000000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFE00, 0x00000000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFF00, 0x00000000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFF80, 0x00000000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFFC0, 0x00000000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFFE0, 0x00000000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFFF0, 0x00000000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFFF8, 0x00000000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFFFC, 0x00000000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFFFE, 0x00000000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0x80000000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x80000000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE),
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF),
+};
+EXPORT_SYMBOL_GPL(ip_set_hostmask_map);
diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c
index a475ede..5c48ffb 100644
--- a/net/netfilter/ipvs/ip_vs_app.c
+++ b/net/netfilter/ipvs/ip_vs_app.c
@@ -43,11 +43,6 @@ EXPORT_SYMBOL(register_ip_vs_app);
 EXPORT_SYMBOL(unregister_ip_vs_app);
 EXPORT_SYMBOL(register_ip_vs_app_inc);
 
-/* ipvs application list head */
-static LIST_HEAD(ip_vs_app_list);
-static DEFINE_MUTEX(__ip_vs_app_mutex);
-
-
 /*
  *	Get an ip_vs_app object
  */
@@ -67,7 +62,8 @@ static inline void ip_vs_app_put(struct ip_vs_app *app)
  *	Allocate/initialize app incarnation and register it in proto apps.
  */
 static int
-ip_vs_app_inc_new(struct ip_vs_app *app, __u16 proto, __u16 port)
+ip_vs_app_inc_new(struct net *net, struct ip_vs_app *app, __u16 proto,
+		  __u16 port)
 {
 	struct ip_vs_protocol *pp;
 	struct ip_vs_app *inc;
@@ -98,7 +94,7 @@ ip_vs_app_inc_new(struct ip_vs_app *app, __u16 proto, __u16 port)
 		}
 	}
 
-	ret = pp->register_app(inc);
+	ret = pp->register_app(net, inc);
 	if (ret)
 		goto out;
 
@@ -119,7 +115,7 @@ ip_vs_app_inc_new(struct ip_vs_app *app, __u16 proto, __u16 port)
  *	Release app incarnation
  */
 static void
-ip_vs_app_inc_release(struct ip_vs_app *inc)
+ip_vs_app_inc_release(struct net *net, struct ip_vs_app *inc)
 {
 	struct ip_vs_protocol *pp;
 
@@ -127,7 +123,7 @@ ip_vs_app_inc_release(struct ip_vs_app *inc)
 		return;
 
 	if (pp->unregister_app)
-		pp->unregister_app(inc);
+		pp->unregister_app(net, inc);
 
 	IP_VS_DBG(9, "%s App %s:%u unregistered\n",
 		  pp->name, inc->name, ntohs(inc->port));
@@ -168,15 +164,17 @@ void ip_vs_app_inc_put(struct ip_vs_app *inc)
  *	Register an application incarnation in protocol applications
  */
 int
-register_ip_vs_app_inc(struct ip_vs_app *app, __u16 proto, __u16 port)
+register_ip_vs_app_inc(struct net *net, struct ip_vs_app *app, __u16 proto,
+		       __u16 port)
 {
+	struct netns_ipvs *ipvs = net_ipvs(net);
 	int result;
 
-	mutex_lock(&__ip_vs_app_mutex);
+	mutex_lock(&ipvs->app_mutex);
 
-	result = ip_vs_app_inc_new(app, proto, port);
+	result = ip_vs_app_inc_new(net, app, proto, port);
 
-	mutex_unlock(&__ip_vs_app_mutex);
+	mutex_unlock(&ipvs->app_mutex);
 
 	return result;
 }
@@ -185,16 +183,17 @@ register_ip_vs_app_inc(struct ip_vs_app *app, __u16 proto, __u16 port)
 /*
  *	ip_vs_app registration routine
  */
-int register_ip_vs_app(struct ip_vs_app *app)
+int register_ip_vs_app(struct net *net, struct ip_vs_app *app)
 {
+	struct netns_ipvs *ipvs = net_ipvs(net);
 	/* increase the module use count */
 	ip_vs_use_count_inc();
 
-	mutex_lock(&__ip_vs_app_mutex);
+	mutex_lock(&ipvs->app_mutex);
 
-	list_add(&app->a_list, &ip_vs_app_list);
+	list_add(&app->a_list, &ipvs->app_list);
 
-	mutex_unlock(&__ip_vs_app_mutex);
+	mutex_unlock(&ipvs->app_mutex);
 
 	return 0;
 }
@@ -204,19 +203,20 @@ int register_ip_vs_app(struct ip_vs_app *app)
  *	ip_vs_app unregistration routine
  *	We are sure there are no app incarnations attached to services
  */
-void unregister_ip_vs_app(struct ip_vs_app *app)
+void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app)
 {
+	struct netns_ipvs *ipvs = net_ipvs(net);
 	struct ip_vs_app *inc, *nxt;
 
-	mutex_lock(&__ip_vs_app_mutex);
+	mutex_lock(&ipvs->app_mutex);
 
 	list_for_each_entry_safe(inc, nxt, &app->incs_list, a_list) {
-		ip_vs_app_inc_release(inc);
+		ip_vs_app_inc_release(net, inc);
 	}
 
 	list_del(&app->a_list);
 
-	mutex_unlock(&__ip_vs_app_mutex);
+	mutex_unlock(&ipvs->app_mutex);
 
 	/* decrease the module use count */
 	ip_vs_use_count_dec();
@@ -226,7 +226,8 @@ void unregister_ip_vs_app(struct ip_vs_app *app)
 /*
  *	Bind ip_vs_conn to its ip_vs_app (called by cp constructor)
  */
-int ip_vs_bind_app(struct ip_vs_conn *cp, struct ip_vs_protocol *pp)
+int ip_vs_bind_app(struct ip_vs_conn *cp,
+		   struct ip_vs_protocol *pp)
 {
 	return pp->app_conn_bind(cp);
 }
@@ -481,11 +482,11 @@ int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb)
  *	/proc/net/ip_vs_app entry function
  */
 
-static struct ip_vs_app *ip_vs_app_idx(loff_t pos)
+static struct ip_vs_app *ip_vs_app_idx(struct netns_ipvs *ipvs, loff_t pos)
 {
 	struct ip_vs_app *app, *inc;
 
-	list_for_each_entry(app, &ip_vs_app_list, a_list) {
+	list_for_each_entry(app, &ipvs->app_list, a_list) {
 		list_for_each_entry(inc, &app->incs_list, a_list) {
 			if (pos-- == 0)
 				return inc;
@@ -497,19 +498,24 @@ static struct ip_vs_app *ip_vs_app_idx(loff_t pos)
 
 static void *ip_vs_app_seq_start(struct seq_file *seq, loff_t *pos)
 {
-	mutex_lock(&__ip_vs_app_mutex);
+	struct net *net = seq_file_net(seq);
+	struct netns_ipvs *ipvs = net_ipvs(net);
 
-	return *pos ? ip_vs_app_idx(*pos - 1) : SEQ_START_TOKEN;
+	mutex_lock(&ipvs->app_mutex);
+
+	return *pos ? ip_vs_app_idx(ipvs, *pos - 1) : SEQ_START_TOKEN;
 }
 
 static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
 	struct ip_vs_app *inc, *app;
 	struct list_head *e;
+	struct net *net = seq_file_net(seq);
+	struct netns_ipvs *ipvs = net_ipvs(net);
 
 	++*pos;
 	if (v == SEQ_START_TOKEN)
-		return ip_vs_app_idx(0);
+		return ip_vs_app_idx(ipvs, 0);
 
 	inc = v;
 	app = inc->app;
@@ -518,7 +524,7 @@ static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 		return list_entry(e, struct ip_vs_app, a_list);
 
 	/* go on to next application */
-	for (e = app->a_list.next; e != &ip_vs_app_list; e = e->next) {
+	for (e = app->a_list.next; e != &ipvs->app_list; e = e->next) {
 		app = list_entry(e, struct ip_vs_app, a_list);
 		list_for_each_entry(inc, &app->incs_list, a_list) {
 			return inc;
@@ -529,7 +535,9 @@ static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 
 static void ip_vs_app_seq_stop(struct seq_file *seq, void *v)
 {
-	mutex_unlock(&__ip_vs_app_mutex);
+	struct netns_ipvs *ipvs = net_ipvs(seq_file_net(seq));
+
+	mutex_unlock(&ipvs->app_mutex);
 }
 
 static int ip_vs_app_seq_show(struct seq_file *seq, void *v)
@@ -557,7 +565,8 @@ static const struct seq_operations ip_vs_app_seq_ops = {
 
 static int ip_vs_app_open(struct inode *inode, struct file *file)
 {
-	return seq_open(file, &ip_vs_app_seq_ops);
+	return seq_open_net(inode, file, &ip_vs_app_seq_ops,
+			    sizeof(struct seq_net_private));
 }
 
 static const struct file_operations ip_vs_app_fops = {
@@ -569,15 +578,36 @@ static const struct file_operations ip_vs_app_fops = {
 };
 #endif
 
-int __init ip_vs_app_init(void)
+static int __net_init __ip_vs_app_init(struct net *net)
 {
-	/* we will replace it with proc_net_ipvs_create() soon */
-	proc_net_fops_create(&init_net, "ip_vs_app", 0, &ip_vs_app_fops);
+	struct netns_ipvs *ipvs = net_ipvs(net);
+
+	INIT_LIST_HEAD(&ipvs->app_list);
+	__mutex_init(&ipvs->app_mutex, "ipvs->app_mutex", &ipvs->app_key);
+	proc_net_fops_create(net, "ip_vs_app", 0, &ip_vs_app_fops);
 	return 0;
 }
 
+static void __net_exit __ip_vs_app_cleanup(struct net *net)
+{
+	proc_net_remove(net, "ip_vs_app");
+}
+
+static struct pernet_operations ip_vs_app_ops = {
+	.init = __ip_vs_app_init,
+	.exit = __ip_vs_app_cleanup,
+};
+
+int __init ip_vs_app_init(void)
+{
+	int rv;
+
+	rv = register_pernet_subsys(&ip_vs_app_ops);
+	return rv;
+}
+
 
 void ip_vs_app_cleanup(void)
 {
-	proc_net_remove(&init_net, "ip_vs_app");
+	unregister_pernet_subsys(&ip_vs_app_ops);
 }
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index e9adecd..9c2a517 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -48,35 +48,32 @@
 /*
  * Connection hash size. Default is what was selected at compile time.
 */
-int ip_vs_conn_tab_bits = CONFIG_IP_VS_TAB_BITS;
+static int ip_vs_conn_tab_bits = CONFIG_IP_VS_TAB_BITS;
 module_param_named(conn_tab_bits, ip_vs_conn_tab_bits, int, 0444);
 MODULE_PARM_DESC(conn_tab_bits, "Set connections' hash size");
 
 /* size and mask values */
-int ip_vs_conn_tab_size;
-int ip_vs_conn_tab_mask;
+int ip_vs_conn_tab_size __read_mostly;
+static int ip_vs_conn_tab_mask __read_mostly;
 
 /*
  *  Connection hash table: for input and output packets lookups of IPVS
  */
-static struct list_head *ip_vs_conn_tab;
+static struct hlist_head *ip_vs_conn_tab __read_mostly;
 
 /*  SLAB cache for IPVS connections */
 static struct kmem_cache *ip_vs_conn_cachep __read_mostly;
 
-/*  counter for current IPVS connections */
-static atomic_t ip_vs_conn_count = ATOMIC_INIT(0);
-
 /*  counter for no client port connections */
 static atomic_t ip_vs_conn_no_cport_cnt = ATOMIC_INIT(0);
 
 /* random value for IPVS connection hash */
-static unsigned int ip_vs_conn_rnd;
+static unsigned int ip_vs_conn_rnd __read_mostly;
 
 /*
  *  Fine locking granularity for big connection hash table
  */
-#define CT_LOCKARRAY_BITS  4
+#define CT_LOCKARRAY_BITS  5
 #define CT_LOCKARRAY_SIZE  (1<<CT_LOCKARRAY_BITS)
 #define CT_LOCKARRAY_MASK  (CT_LOCKARRAY_SIZE-1)
 
@@ -133,19 +130,19 @@ static inline void ct_write_unlock_bh(unsigned key)
 /*
  *	Returns hash value for IPVS connection entry
  */
-static unsigned int ip_vs_conn_hashkey(int af, unsigned proto,
+static unsigned int ip_vs_conn_hashkey(struct net *net, int af, unsigned proto,
 				       const union nf_inet_addr *addr,
 				       __be16 port)
 {
 #ifdef CONFIG_IP_VS_IPV6
 	if (af == AF_INET6)
-		return jhash_3words(jhash(addr, 16, ip_vs_conn_rnd),
-				    (__force u32)port, proto, ip_vs_conn_rnd)
-			& ip_vs_conn_tab_mask;
+		return (jhash_3words(jhash(addr, 16, ip_vs_conn_rnd),
+				    (__force u32)port, proto, ip_vs_conn_rnd) ^
+			((size_t)net>>8)) & ip_vs_conn_tab_mask;
 #endif
-	return jhash_3words((__force u32)addr->ip, (__force u32)port, proto,
-			    ip_vs_conn_rnd)
-		& ip_vs_conn_tab_mask;
+	return (jhash_3words((__force u32)addr->ip, (__force u32)port, proto,
+			    ip_vs_conn_rnd) ^
+		((size_t)net>>8)) & ip_vs_conn_tab_mask;
 }
 
 static unsigned int ip_vs_conn_hashkey_param(const struct ip_vs_conn_param *p,
@@ -166,18 +163,18 @@ static unsigned int ip_vs_conn_hashkey_param(const struct ip_vs_conn_param *p,
 		port = p->vport;
 	}
 
-	return ip_vs_conn_hashkey(p->af, p->protocol, addr, port);
+	return ip_vs_conn_hashkey(p->net, p->af, p->protocol, addr, port);
 }
 
 static unsigned int ip_vs_conn_hashkey_conn(const struct ip_vs_conn *cp)
 {
 	struct ip_vs_conn_param p;
 
-	ip_vs_conn_fill_param(cp->af, cp->protocol, &cp->caddr, cp->cport,
-			      NULL, 0, &p);
+	ip_vs_conn_fill_param(ip_vs_conn_net(cp), cp->af, cp->protocol,
+			      &cp->caddr, cp->cport, NULL, 0, &p);
 
-	if (cp->dest && cp->dest->svc->pe) {
-		p.pe = cp->dest->svc->pe;
+	if (cp->pe) {
+		p.pe = cp->pe;
 		p.pe_data = cp->pe_data;
 		p.pe_data_len = cp->pe_data_len;
 	}
@@ -186,7 +183,7 @@ static unsigned int ip_vs_conn_hashkey_conn(const struct ip_vs_conn *cp)
 }
 
 /*
- *	Hashes ip_vs_conn in ip_vs_conn_tab by proto,addr,port.
+ *	Hashes ip_vs_conn in ip_vs_conn_tab by netns,proto,addr,port.
  *	returns bool success.
  */
 static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
@@ -204,7 +201,7 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
 	spin_lock(&cp->lock);
 
 	if (!(cp->flags & IP_VS_CONN_F_HASHED)) {
-		list_add(&cp->c_list, &ip_vs_conn_tab[hash]);
+		hlist_add_head(&cp->c_list, &ip_vs_conn_tab[hash]);
 		cp->flags |= IP_VS_CONN_F_HASHED;
 		atomic_inc(&cp->refcnt);
 		ret = 1;
@@ -237,7 +234,7 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp)
 	spin_lock(&cp->lock);
 
 	if (cp->flags & IP_VS_CONN_F_HASHED) {
-		list_del(&cp->c_list);
+		hlist_del(&cp->c_list);
 		cp->flags &= ~IP_VS_CONN_F_HASHED;
 		atomic_dec(&cp->refcnt);
 		ret = 1;
@@ -262,18 +259,20 @@ __ip_vs_conn_in_get(const struct ip_vs_conn_param *p)
 {
 	unsigned hash;
 	struct ip_vs_conn *cp;
+	struct hlist_node *n;
 
 	hash = ip_vs_conn_hashkey_param(p, false);
 
 	ct_read_lock(hash);
 
-	list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
+	hlist_for_each_entry(cp, n, &ip_vs_conn_tab[hash], c_list) {
 		if (cp->af == p->af &&
+		    p->cport == cp->cport && p->vport == cp->vport &&
 		    ip_vs_addr_equal(p->af, p->caddr, &cp->caddr) &&
 		    ip_vs_addr_equal(p->af, p->vaddr, &cp->vaddr) &&
-		    p->cport == cp->cport && p->vport == cp->vport &&
 		    ((!p->cport) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) &&
-		    p->protocol == cp->protocol) {
+		    p->protocol == cp->protocol &&
+		    ip_vs_conn_net_eq(cp, p->net)) {
 			/* HIT */
 			atomic_inc(&cp->refcnt);
 			ct_read_unlock(hash);
@@ -313,23 +312,23 @@ ip_vs_conn_fill_param_proto(int af, const struct sk_buff *skb,
 			    struct ip_vs_conn_param *p)
 {
 	__be16 _ports[2], *pptr;
+	struct net *net = skb_net(skb);
 
 	pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
 	if (pptr == NULL)
 		return 1;
 
 	if (likely(!inverse))
-		ip_vs_conn_fill_param(af, iph->protocol, &iph->saddr, pptr[0],
-				      &iph->daddr, pptr[1], p);
+		ip_vs_conn_fill_param(net, af, iph->protocol, &iph->saddr,
+				      pptr[0], &iph->daddr, pptr[1], p);
 	else
-		ip_vs_conn_fill_param(af, iph->protocol, &iph->daddr, pptr[1],
-				      &iph->saddr, pptr[0], p);
+		ip_vs_conn_fill_param(net, af, iph->protocol, &iph->daddr,
+				      pptr[1], &iph->saddr, pptr[0], p);
 	return 0;
 }
 
 struct ip_vs_conn *
 ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb,
-			struct ip_vs_protocol *pp,
 			const struct ip_vs_iphdr *iph,
 			unsigned int proto_off, int inverse)
 {
@@ -347,14 +346,17 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p)
 {
 	unsigned hash;
 	struct ip_vs_conn *cp;
+	struct hlist_node *n;
 
 	hash = ip_vs_conn_hashkey_param(p, false);
 
 	ct_read_lock(hash);
 
-	list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
+	hlist_for_each_entry(cp, n, &ip_vs_conn_tab[hash], c_list) {
+		if (!ip_vs_conn_net_eq(cp, p->net))
+			continue;
 		if (p->pe_data && p->pe->ct_match) {
-			if (p->pe->ct_match(p, cp))
+			if (p->pe == cp->pe && p->pe->ct_match(p, cp))
 				goto out;
 			continue;
 		}
@@ -394,6 +396,7 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p)
 {
 	unsigned hash;
 	struct ip_vs_conn *cp, *ret=NULL;
+	struct hlist_node *n;
 
 	/*
 	 *	Check for "full" addressed entries
@@ -402,12 +405,13 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p)
 
 	ct_read_lock(hash);
 
-	list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
+	hlist_for_each_entry(cp, n, &ip_vs_conn_tab[hash], c_list) {
 		if (cp->af == p->af &&
+		    p->vport == cp->cport && p->cport == cp->dport &&
 		    ip_vs_addr_equal(p->af, p->vaddr, &cp->caddr) &&
 		    ip_vs_addr_equal(p->af, p->caddr, &cp->daddr) &&
-		    p->vport == cp->cport && p->cport == cp->dport &&
-		    p->protocol == cp->protocol) {
+		    p->protocol == cp->protocol &&
+		    ip_vs_conn_net_eq(cp, p->net)) {
 			/* HIT */
 			atomic_inc(&cp->refcnt);
 			ret = cp;
@@ -428,7 +432,6 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p)
 
 struct ip_vs_conn *
 ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb,
-			 struct ip_vs_protocol *pp,
 			 const struct ip_vs_iphdr *iph,
 			 unsigned int proto_off, int inverse)
 {
@@ -611,9 +614,9 @@ struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp)
 	struct ip_vs_dest *dest;
 
 	if ((cp) && (!cp->dest)) {
-		dest = ip_vs_find_dest(cp->af, &cp->daddr, cp->dport,
-				       &cp->vaddr, cp->vport,
-				       cp->protocol);
+		dest = ip_vs_find_dest(ip_vs_conn_net(cp), cp->af, &cp->daddr,
+				       cp->dport, &cp->vaddr, cp->vport,
+				       cp->protocol, cp->fwmark);
 		ip_vs_bind_dest(cp, dest);
 		return dest;
 	} else
@@ -686,13 +689,14 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp)
 int ip_vs_check_template(struct ip_vs_conn *ct)
 {
 	struct ip_vs_dest *dest = ct->dest;
+	struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(ct));
 
 	/*
 	 * Checking the dest server status.
 	 */
 	if ((dest == NULL) ||
 	    !(dest->flags & IP_VS_DEST_F_AVAILABLE) ||
-	    (sysctl_ip_vs_expire_quiescent_template &&
+	    (ipvs->sysctl_expire_quiescent_template &&
 	     (atomic_read(&dest->weight) == 0))) {
 		IP_VS_DBG_BUF(9, "check_template: dest not available for "
 			      "protocol %s s:%s:%d v:%s:%d "
@@ -730,6 +734,7 @@ int ip_vs_check_template(struct ip_vs_conn *ct)
 static void ip_vs_conn_expire(unsigned long data)
 {
 	struct ip_vs_conn *cp = (struct ip_vs_conn *)data;
+	struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp));
 
 	cp->timeout = 60*HZ;
 
@@ -765,13 +770,14 @@ static void ip_vs_conn_expire(unsigned long data)
 		if (cp->flags & IP_VS_CONN_F_NFCT)
 			ip_vs_conn_drop_conntrack(cp);
 
+		ip_vs_pe_put(cp->pe);
 		kfree(cp->pe_data);
 		if (unlikely(cp->app != NULL))
 			ip_vs_unbind_app(cp);
 		ip_vs_unbind_dest(cp);
 		if (cp->flags & IP_VS_CONN_F_NO_CPORT)
 			atomic_dec(&ip_vs_conn_no_cport_cnt);
-		atomic_dec(&ip_vs_conn_count);
+		atomic_dec(&ipvs->conn_count);
 
 		kmem_cache_free(ip_vs_conn_cachep, cp);
 		return;
@@ -802,10 +808,12 @@ void ip_vs_conn_expire_now(struct ip_vs_conn *cp)
 struct ip_vs_conn *
 ip_vs_conn_new(const struct ip_vs_conn_param *p,
 	       const union nf_inet_addr *daddr, __be16 dport, unsigned flags,
-	       struct ip_vs_dest *dest)
+	       struct ip_vs_dest *dest, __u32 fwmark)
 {
 	struct ip_vs_conn *cp;
-	struct ip_vs_protocol *pp = ip_vs_proto_get(p->protocol);
+	struct netns_ipvs *ipvs = net_ipvs(p->net);
+	struct ip_vs_proto_data *pd = ip_vs_proto_data_get(p->net,
+							   p->protocol);
 
 	cp = kmem_cache_zalloc(ip_vs_conn_cachep, GFP_ATOMIC);
 	if (cp == NULL) {
@@ -813,8 +821,9 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
 		return NULL;
 	}
 
-	INIT_LIST_HEAD(&cp->c_list);
+	INIT_HLIST_NODE(&cp->c_list);
 	setup_timer(&cp->timer, ip_vs_conn_expire, (unsigned long)cp);
+	ip_vs_conn_net_set(cp, p->net);
 	cp->af		   = p->af;
 	cp->protocol	   = p->protocol;
 	ip_vs_addr_copy(p->af, &cp->caddr, p->caddr);
@@ -826,7 +835,10 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
 			&cp->daddr, daddr);
 	cp->dport          = dport;
 	cp->flags	   = flags;
-	if (flags & IP_VS_CONN_F_TEMPLATE && p->pe_data) {
+	cp->fwmark         = fwmark;
+	if (flags & IP_VS_CONN_F_TEMPLATE && p->pe) {
+		ip_vs_pe_get(p->pe);
+		cp->pe = p->pe;
 		cp->pe_data = p->pe_data;
 		cp->pe_data_len = p->pe_data_len;
 	}
@@ -842,7 +854,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
 	atomic_set(&cp->n_control, 0);
 	atomic_set(&cp->in_pkts, 0);
 
-	atomic_inc(&ip_vs_conn_count);
+	atomic_inc(&ipvs->conn_count);
 	if (flags & IP_VS_CONN_F_NO_CPORT)
 		atomic_inc(&ip_vs_conn_no_cport_cnt);
 
@@ -861,8 +873,8 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
 #endif
 		ip_vs_bind_xmit(cp);
 
-	if (unlikely(pp && atomic_read(&pp->appcnt)))
-		ip_vs_bind_app(cp, pp);
+	if (unlikely(pd && atomic_read(&pd->appcnt)))
+		ip_vs_bind_app(cp, pd->pp);
 
 	/*
 	 * Allow conntrack to be preserved. By default, conntrack
@@ -871,7 +883,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
 	 * IP_VS_CONN_F_ONE_PACKET too.
 	 */
 
-	if (ip_vs_conntrack_enabled())
+	if (ip_vs_conntrack_enabled(ipvs))
 		cp->flags |= IP_VS_CONN_F_NFCT;
 
 	/* Hash it in the ip_vs_conn_tab finally */
@@ -884,18 +896,24 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
  *	/proc/net/ip_vs_conn entries
  */
 #ifdef CONFIG_PROC_FS
+struct ip_vs_iter_state {
+	struct seq_net_private	p;
+	struct hlist_head	*l;
+};
 
 static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos)
 {
 	int idx;
 	struct ip_vs_conn *cp;
+	struct ip_vs_iter_state *iter = seq->private;
+	struct hlist_node *n;
 
 	for (idx = 0; idx < ip_vs_conn_tab_size; idx++) {
 		ct_read_lock_bh(idx);
-		list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
+		hlist_for_each_entry(cp, n, &ip_vs_conn_tab[idx], c_list) {
 			if (pos-- == 0) {
-				seq->private = &ip_vs_conn_tab[idx];
-			return cp;
+				iter->l = &ip_vs_conn_tab[idx];
+				return cp;
 			}
 		}
 		ct_read_unlock_bh(idx);
@@ -906,14 +924,18 @@ static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos)
 
 static void *ip_vs_conn_seq_start(struct seq_file *seq, loff_t *pos)
 {
-	seq->private = NULL;
+	struct ip_vs_iter_state *iter = seq->private;
+
+	iter->l = NULL;
 	return *pos ? ip_vs_conn_array(seq, *pos - 1) :SEQ_START_TOKEN;
 }
 
 static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
 	struct ip_vs_conn *cp = v;
-	struct list_head *e, *l = seq->private;
+	struct ip_vs_iter_state *iter = seq->private;
+	struct hlist_node *e;
+	struct hlist_head *l = iter->l;
 	int idx;
 
 	++*pos;
@@ -921,27 +943,28 @@ static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 		return ip_vs_conn_array(seq, 0);
 
 	/* more on same hash chain? */
-	if ((e = cp->c_list.next) != l)
-		return list_entry(e, struct ip_vs_conn, c_list);
+	if ((e = cp->c_list.next))
+		return hlist_entry(e, struct ip_vs_conn, c_list);
 
 	idx = l - ip_vs_conn_tab;
 	ct_read_unlock_bh(idx);
 
 	while (++idx < ip_vs_conn_tab_size) {
 		ct_read_lock_bh(idx);
-		list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
-			seq->private = &ip_vs_conn_tab[idx];
+		hlist_for_each_entry(cp, e, &ip_vs_conn_tab[idx], c_list) {
+			iter->l = &ip_vs_conn_tab[idx];
 			return cp;
 		}
 		ct_read_unlock_bh(idx);
 	}
-	seq->private = NULL;
+	iter->l = NULL;
 	return NULL;
 }
 
 static void ip_vs_conn_seq_stop(struct seq_file *seq, void *v)
 {
-	struct list_head *l = seq->private;
+	struct ip_vs_iter_state *iter = seq->private;
+	struct hlist_head *l = iter->l;
 
 	if (l)
 		ct_read_unlock_bh(l - ip_vs_conn_tab);
@@ -955,18 +978,19 @@ static int ip_vs_conn_seq_show(struct seq_file *seq, void *v)
    "Pro FromIP   FPrt ToIP     TPrt DestIP   DPrt State       Expires PEName PEData\n");
 	else {
 		const struct ip_vs_conn *cp = v;
+		struct net *net = seq_file_net(seq);
 		char pe_data[IP_VS_PENAME_MAXLEN + IP_VS_PEDATA_MAXLEN + 3];
 		size_t len = 0;
 
-		if (cp->dest && cp->pe_data &&
-		    cp->dest->svc->pe->show_pe_data) {
+		if (!ip_vs_conn_net_eq(cp, net))
+			return 0;
+		if (cp->pe_data) {
 			pe_data[0] = ' ';
-			len = strlen(cp->dest->svc->pe->name);
-			memcpy(pe_data + 1, cp->dest->svc->pe->name, len);
+			len = strlen(cp->pe->name);
+			memcpy(pe_data + 1, cp->pe->name, len);
 			pe_data[len + 1] = ' ';
 			len += 2;
-			len += cp->dest->svc->pe->show_pe_data(cp,
-							       pe_data + len);
+			len += cp->pe->show_pe_data(cp, pe_data + len);
 		}
 		pe_data[len] = '\0';
 
@@ -1004,7 +1028,8 @@ static const struct seq_operations ip_vs_conn_seq_ops = {
 
 static int ip_vs_conn_open(struct inode *inode, struct file *file)
 {
-	return seq_open(file, &ip_vs_conn_seq_ops);
+	return seq_open_net(inode, file, &ip_vs_conn_seq_ops,
+			    sizeof(struct ip_vs_iter_state));
 }
 
 static const struct file_operations ip_vs_conn_fops = {
@@ -1031,6 +1056,10 @@ static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v)
    "Pro FromIP   FPrt ToIP     TPrt DestIP   DPrt State       Origin Expires\n");
 	else {
 		const struct ip_vs_conn *cp = v;
+		struct net *net = seq_file_net(seq);
+
+		if (!ip_vs_conn_net_eq(cp, net))
+			return 0;
 
 #ifdef CONFIG_IP_VS_IPV6
 		if (cp->af == AF_INET6)
@@ -1067,7 +1096,8 @@ static const struct seq_operations ip_vs_conn_sync_seq_ops = {
 
 static int ip_vs_conn_sync_open(struct inode *inode, struct file *file)
 {
-	return seq_open(file, &ip_vs_conn_sync_seq_ops);
+	return seq_open_net(inode, file, &ip_vs_conn_sync_seq_ops,
+			    sizeof(struct ip_vs_iter_state));
 }
 
 static const struct file_operations ip_vs_conn_sync_fops = {
@@ -1113,7 +1143,7 @@ static inline int todrop_entry(struct ip_vs_conn *cp)
 }
 
 /* Called from keventd and must protect itself from softirqs */
-void ip_vs_random_dropentry(void)
+void ip_vs_random_dropentry(struct net *net)
 {
 	int idx;
 	struct ip_vs_conn *cp;
@@ -1123,17 +1153,19 @@ void ip_vs_random_dropentry(void)
 	 */
 	for (idx = 0; idx < (ip_vs_conn_tab_size>>5); idx++) {
 		unsigned hash = net_random() & ip_vs_conn_tab_mask;
+		struct hlist_node *n;
 
 		/*
 		 *  Lock is actually needed in this loop.
 		 */
 		ct_write_lock_bh(hash);
 
-		list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
+		hlist_for_each_entry(cp, n, &ip_vs_conn_tab[hash], c_list) {
 			if (cp->flags & IP_VS_CONN_F_TEMPLATE)
 				/* connection template */
 				continue;
-
+			if (!ip_vs_conn_net_eq(cp, net))
+				continue;
 			if (cp->protocol == IPPROTO_TCP) {
 				switch(cp->state) {
 				case IP_VS_TCP_S_SYN_RECV:
@@ -1168,20 +1200,24 @@ void ip_vs_random_dropentry(void)
 /*
  *      Flush all the connection entries in the ip_vs_conn_tab
  */
-static void ip_vs_conn_flush(void)
+static void ip_vs_conn_flush(struct net *net)
 {
 	int idx;
 	struct ip_vs_conn *cp;
+	struct netns_ipvs *ipvs = net_ipvs(net);
 
-  flush_again:
+flush_again:
 	for (idx = 0; idx < ip_vs_conn_tab_size; idx++) {
+		struct hlist_node *n;
+
 		/*
 		 *  Lock is actually needed in this loop.
 		 */
 		ct_write_lock_bh(idx);
 
-		list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
-
+		hlist_for_each_entry(cp, n, &ip_vs_conn_tab[idx], c_list) {
+			if (!ip_vs_conn_net_eq(cp, net))
+				continue;
 			IP_VS_DBG(4, "del connection\n");
 			ip_vs_conn_expire_now(cp);
 			if (cp->control) {
@@ -1194,16 +1230,41 @@ static void ip_vs_conn_flush(void)
 
 	/* the counter may be not NULL, because maybe some conn entries
 	   are run by slow timer handler or unhashed but still referred */
-	if (atomic_read(&ip_vs_conn_count) != 0) {
+	if (atomic_read(&ipvs->conn_count) != 0) {
 		schedule();
 		goto flush_again;
 	}
 }
+/*
+ * per netns init and exit
+ */
+int __net_init __ip_vs_conn_init(struct net *net)
+{
+	struct netns_ipvs *ipvs = net_ipvs(net);
+
+	atomic_set(&ipvs->conn_count, 0);
+
+	proc_net_fops_create(net, "ip_vs_conn", 0, &ip_vs_conn_fops);
+	proc_net_fops_create(net, "ip_vs_conn_sync", 0, &ip_vs_conn_sync_fops);
+	return 0;
+}
 
+static void __net_exit __ip_vs_conn_cleanup(struct net *net)
+{
+	/* flush all the connection entries first */
+	ip_vs_conn_flush(net);
+	proc_net_remove(net, "ip_vs_conn");
+	proc_net_remove(net, "ip_vs_conn_sync");
+}
+static struct pernet_operations ipvs_conn_ops = {
+	.init = __ip_vs_conn_init,
+	.exit = __ip_vs_conn_cleanup,
+};
 
 int __init ip_vs_conn_init(void)
 {
 	int idx;
+	int retc;
 
 	/* Compute size and mask */
 	ip_vs_conn_tab_size = 1 << ip_vs_conn_tab_bits;
@@ -1212,8 +1273,7 @@ int __init ip_vs_conn_init(void)
 	/*
 	 * Allocate the connection hash table and initialize its list heads
 	 */
-	ip_vs_conn_tab = vmalloc(ip_vs_conn_tab_size *
-				 sizeof(struct list_head));
+	ip_vs_conn_tab = vmalloc(ip_vs_conn_tab_size * sizeof(*ip_vs_conn_tab));
 	if (!ip_vs_conn_tab)
 		return -ENOMEM;
 
@@ -1233,32 +1293,25 @@ int __init ip_vs_conn_init(void)
 	IP_VS_DBG(0, "Each connection entry needs %Zd bytes at least\n",
 		  sizeof(struct ip_vs_conn));
 
-	for (idx = 0; idx < ip_vs_conn_tab_size; idx++) {
-		INIT_LIST_HEAD(&ip_vs_conn_tab[idx]);
-	}
+	for (idx = 0; idx < ip_vs_conn_tab_size; idx++)
+		INIT_HLIST_HEAD(&ip_vs_conn_tab[idx]);
 
 	for (idx = 0; idx < CT_LOCKARRAY_SIZE; idx++)  {
 		rwlock_init(&__ip_vs_conntbl_lock_array[idx].l);
 	}
 
-	proc_net_fops_create(&init_net, "ip_vs_conn", 0, &ip_vs_conn_fops);
-	proc_net_fops_create(&init_net, "ip_vs_conn_sync", 0, &ip_vs_conn_sync_fops);
+	retc = register_pernet_subsys(&ipvs_conn_ops);
 
 	/* calculate the random value for connection hash */
 	get_random_bytes(&ip_vs_conn_rnd, sizeof(ip_vs_conn_rnd));
 
-	return 0;
+	return retc;
 }
 
-
 void ip_vs_conn_cleanup(void)
 {
-	/* flush all the connection entries first */
-	ip_vs_conn_flush();
-
+	unregister_pernet_subsys(&ipvs_conn_ops);
 	/* Release the empty cache */
 	kmem_cache_destroy(ip_vs_conn_cachep);
-	proc_net_remove(&init_net, "ip_vs_conn");
-	proc_net_remove(&init_net, "ip_vs_conn_sync");
 	vfree(ip_vs_conn_tab);
 }
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index b4e51e9..2d1f932 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -41,6 +41,7 @@
 #include <net/icmp.h>                   /* for icmp_send */
 #include <net/route.h>
 #include <net/ip6_checksum.h>
+#include <net/netns/generic.h>		/* net_generic() */
 
 #include <linux/netfilter.h>
 #include <linux/netfilter_ipv4.h>
@@ -68,6 +69,12 @@ EXPORT_SYMBOL(ip_vs_conn_put);
 EXPORT_SYMBOL(ip_vs_get_debug_level);
 #endif
 
+int ip_vs_net_id __read_mostly;
+#ifdef IP_VS_GENERIC_NETNS
+EXPORT_SYMBOL(ip_vs_net_id);
+#endif
+/* netns cnt used for uniqueness */
+static atomic_t ipvs_netns_cnt = ATOMIC_INIT(0);
 
 /* ID used in ICMP lookups */
 #define icmp_id(icmph)          (((icmph)->un).echo.id)
@@ -108,21 +115,28 @@ static inline void
 ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
 {
 	struct ip_vs_dest *dest = cp->dest;
+	struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
+
 	if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
-		spin_lock(&dest->stats.lock);
-		dest->stats.ustats.inpkts++;
-		dest->stats.ustats.inbytes += skb->len;
-		spin_unlock(&dest->stats.lock);
-
-		spin_lock(&dest->svc->stats.lock);
-		dest->svc->stats.ustats.inpkts++;
-		dest->svc->stats.ustats.inbytes += skb->len;
-		spin_unlock(&dest->svc->stats.lock);
-
-		spin_lock(&ip_vs_stats.lock);
-		ip_vs_stats.ustats.inpkts++;
-		ip_vs_stats.ustats.inbytes += skb->len;
-		spin_unlock(&ip_vs_stats.lock);
+		struct ip_vs_cpu_stats *s;
+
+		s = this_cpu_ptr(dest->stats.cpustats);
+		s->ustats.inpkts++;
+		u64_stats_update_begin(&s->syncp);
+		s->ustats.inbytes += skb->len;
+		u64_stats_update_end(&s->syncp);
+
+		s = this_cpu_ptr(dest->svc->stats.cpustats);
+		s->ustats.inpkts++;
+		u64_stats_update_begin(&s->syncp);
+		s->ustats.inbytes += skb->len;
+		u64_stats_update_end(&s->syncp);
+
+		s = this_cpu_ptr(ipvs->cpustats);
+		s->ustats.inpkts++;
+		u64_stats_update_begin(&s->syncp);
+		s->ustats.inbytes += skb->len;
+		u64_stats_update_end(&s->syncp);
 	}
 }
 
@@ -131,21 +145,28 @@ static inline void
 ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
 {
 	struct ip_vs_dest *dest = cp->dest;
+	struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
+
 	if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
-		spin_lock(&dest->stats.lock);
-		dest->stats.ustats.outpkts++;
-		dest->stats.ustats.outbytes += skb->len;
-		spin_unlock(&dest->stats.lock);
-
-		spin_lock(&dest->svc->stats.lock);
-		dest->svc->stats.ustats.outpkts++;
-		dest->svc->stats.ustats.outbytes += skb->len;
-		spin_unlock(&dest->svc->stats.lock);
-
-		spin_lock(&ip_vs_stats.lock);
-		ip_vs_stats.ustats.outpkts++;
-		ip_vs_stats.ustats.outbytes += skb->len;
-		spin_unlock(&ip_vs_stats.lock);
+		struct ip_vs_cpu_stats *s;
+
+		s = this_cpu_ptr(dest->stats.cpustats);
+		s->ustats.outpkts++;
+		u64_stats_update_begin(&s->syncp);
+		s->ustats.outbytes += skb->len;
+		u64_stats_update_end(&s->syncp);
+
+		s = this_cpu_ptr(dest->svc->stats.cpustats);
+		s->ustats.outpkts++;
+		u64_stats_update_begin(&s->syncp);
+		s->ustats.outbytes += skb->len;
+		u64_stats_update_end(&s->syncp);
+
+		s = this_cpu_ptr(ipvs->cpustats);
+		s->ustats.outpkts++;
+		u64_stats_update_begin(&s->syncp);
+		s->ustats.outbytes += skb->len;
+		u64_stats_update_end(&s->syncp);
 	}
 }
 
@@ -153,41 +174,44 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
 static inline void
 ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc)
 {
-	spin_lock(&cp->dest->stats.lock);
-	cp->dest->stats.ustats.conns++;
-	spin_unlock(&cp->dest->stats.lock);
+	struct netns_ipvs *ipvs = net_ipvs(svc->net);
+	struct ip_vs_cpu_stats *s;
+
+	s = this_cpu_ptr(cp->dest->stats.cpustats);
+	s->ustats.conns++;
 
-	spin_lock(&svc->stats.lock);
-	svc->stats.ustats.conns++;
-	spin_unlock(&svc->stats.lock);
+	s = this_cpu_ptr(svc->stats.cpustats);
+	s->ustats.conns++;
 
-	spin_lock(&ip_vs_stats.lock);
-	ip_vs_stats.ustats.conns++;
-	spin_unlock(&ip_vs_stats.lock);
+	s = this_cpu_ptr(ipvs->cpustats);
+	s->ustats.conns++;
 }
 
 
 static inline int
 ip_vs_set_state(struct ip_vs_conn *cp, int direction,
 		const struct sk_buff *skb,
-		struct ip_vs_protocol *pp)
+		struct ip_vs_proto_data *pd)
 {
-	if (unlikely(!pp->state_transition))
+	if (unlikely(!pd->pp->state_transition))
 		return 0;
-	return pp->state_transition(cp, direction, skb, pp);
+	return pd->pp->state_transition(cp, direction, skb, pd);
 }
 
-static inline void
+static inline int
 ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc,
 			      struct sk_buff *skb, int protocol,
 			      const union nf_inet_addr *caddr, __be16 cport,
 			      const union nf_inet_addr *vaddr, __be16 vport,
 			      struct ip_vs_conn_param *p)
 {
-	ip_vs_conn_fill_param(svc->af, protocol, caddr, cport, vaddr, vport, p);
+	ip_vs_conn_fill_param(svc->net, svc->af, protocol, caddr, cport, vaddr,
+			      vport, p);
 	p->pe = svc->pe;
 	if (p->pe && p->pe->fill_param)
-		p->pe->fill_param(p, skb);
+		return p->pe->fill_param(p, skb);
+
+	return 0;
 }
 
 /*
@@ -200,7 +224,7 @@ ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc,
 static struct ip_vs_conn *
 ip_vs_sched_persist(struct ip_vs_service *svc,
 		    struct sk_buff *skb,
-		    __be16 ports[2])
+		    __be16 src_port, __be16 dst_port, int *ignored)
 {
 	struct ip_vs_conn *cp = NULL;
 	struct ip_vs_iphdr iph;
@@ -224,8 +248,8 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
 
 	IP_VS_DBG_BUF(6, "p-schedule: src %s:%u dest %s:%u "
 		      "mnet %s\n",
-		      IP_VS_DBG_ADDR(svc->af, &iph.saddr), ntohs(ports[0]),
-		      IP_VS_DBG_ADDR(svc->af, &iph.daddr), ntohs(ports[1]),
+		      IP_VS_DBG_ADDR(svc->af, &iph.saddr), ntohs(src_port),
+		      IP_VS_DBG_ADDR(svc->af, &iph.daddr), ntohs(dst_port),
 		      IP_VS_DBG_ADDR(svc->af, &snet));
 
 	/*
@@ -247,14 +271,14 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
 		const union nf_inet_addr fwmark = { .ip = htonl(svc->fwmark) };
 		__be16 vport = 0;
 
-		if (ports[1] == svc->port) {
+		if (dst_port == svc->port) {
 			/* non-FTP template:
 			 * <protocol, caddr, 0, vaddr, vport, daddr, dport>
 			 * FTP template:
 			 * <protocol, caddr, 0, vaddr, 0, daddr, 0>
 			 */
 			if (svc->port != FTPPORT)
-				vport = ports[1];
+				vport = dst_port;
 		} else {
 			/* Note: persistent fwmark-based services and
 			 * persistent port zero service are handled here.
@@ -268,24 +292,31 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
 				vaddr = &fwmark;
 			}
 		}
-		ip_vs_conn_fill_param_persist(svc, skb, protocol, &snet, 0,
-					      vaddr, vport, &param);
+		/* return *ignored = -1 so NF_DROP can be used */
+		if (ip_vs_conn_fill_param_persist(svc, skb, protocol, &snet, 0,
+						  vaddr, vport, &param) < 0) {
+			*ignored = -1;
+			return NULL;
+		}
 	}
 
 	/* Check if a template already exists */
 	ct = ip_vs_ct_in_get(&param);
 	if (!ct || !ip_vs_check_template(ct)) {
-		/* No template found or the dest of the connection
+		/*
+		 * No template found or the dest of the connection
 		 * template is not available.
+		 * return *ignored=0 i.e. ICMP and NF_DROP
 		 */
 		dest = svc->scheduler->schedule(svc, skb);
 		if (!dest) {
 			IP_VS_DBG(1, "p-schedule: no dest found.\n");
 			kfree(param.pe_data);
+			*ignored = 0;
 			return NULL;
 		}
 
-		if (ports[1] == svc->port && svc->port != FTPPORT)
+		if (dst_port == svc->port && svc->port != FTPPORT)
 			dport = dest->port;
 
 		/* Create a template
@@ -293,9 +324,10 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
 		 * and thus param.pe_data will be destroyed
 		 * when the template expires */
 		ct = ip_vs_conn_new(&param, &dest->addr, dport,
-				    IP_VS_CONN_F_TEMPLATE, dest);
+				    IP_VS_CONN_F_TEMPLATE, dest, skb->mark);
 		if (ct == NULL) {
 			kfree(param.pe_data);
+			*ignored = -1;
 			return NULL;
 		}
 
@@ -306,7 +338,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
 		kfree(param.pe_data);
 	}
 
-	dport = ports[1];
+	dport = dst_port;
 	if (dport == svc->port && dest->port)
 		dport = dest->port;
 
@@ -317,11 +349,13 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
 	/*
 	 *    Create a new connection according to the template
 	 */
-	ip_vs_conn_fill_param(svc->af, iph.protocol, &iph.saddr, ports[0],
-			      &iph.daddr, ports[1], &param);
-	cp = ip_vs_conn_new(&param, &dest->addr, dport, flags, dest);
+	ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol, &iph.saddr,
+			      src_port, &iph.daddr, dst_port, &param);
+
+	cp = ip_vs_conn_new(&param, &dest->addr, dport, flags, dest, skb->mark);
 	if (cp == NULL) {
 		ip_vs_conn_put(ct);
+		*ignored = -1;
 		return NULL;
 	}
 
@@ -341,11 +375,27 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
  *  It selects a server according to the virtual service, and
  *  creates a connection entry.
  *  Protocols supported: TCP, UDP
+ *
+ *  Usage of *ignored
+ *
+ * 1 :   protocol tried to schedule (eg. on SYN), found svc but the
+ *       svc/scheduler decides that this packet should be accepted with
+ *       NF_ACCEPT because it must not be scheduled.
+ *
+ * 0 :   scheduler can not find destination, so try bypass or
+ *       return ICMP and then NF_DROP (ip_vs_leave).
+ *
+ * -1 :  scheduler tried to schedule but fatal error occurred, eg.
+ *       ip_vs_conn_new failure (ENOMEM) or ip_vs_sip_fill_param
+ *       failure such as missing Call-ID, ENOMEM on skb_linearize
+ *       or pe_data. In this case we should return NF_DROP without
+ *       any attempts to send ICMP with ip_vs_leave.
  */
 struct ip_vs_conn *
 ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
-	       struct ip_vs_protocol *pp, int *ignored)
+	       struct ip_vs_proto_data *pd, int *ignored)
 {
+	struct ip_vs_protocol *pp = pd->pp;
 	struct ip_vs_conn *cp = NULL;
 	struct ip_vs_iphdr iph;
 	struct ip_vs_dest *dest;
@@ -371,12 +421,10 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
 	}
 
 	/*
-	 * Do not schedule replies from local real server. It is risky
-	 * for fwmark services but mostly for persistent services.
+	 *    Do not schedule replies from local real server.
 	 */
 	if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
-	    (svc->flags & IP_VS_SVC_F_PERSISTENT || svc->fwmark) &&
-	    (cp = pp->conn_in_get(svc->af, skb, pp, &iph, iph.len, 1))) {
+	    (cp = pp->conn_in_get(svc->af, skb, &iph, iph.len, 1))) {
 		IP_VS_DBG_PKT(12, svc->af, pp, skb, 0,
 			      "Not scheduling reply for existing connection");
 		__ip_vs_conn_put(cp);
@@ -386,10 +434,10 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
 	/*
 	 *    Persistent service
 	 */
-	if (svc->flags & IP_VS_SVC_F_PERSISTENT) {
-		*ignored = 0;
-		return ip_vs_sched_persist(svc, skb, pptr);
-	}
+	if (svc->flags & IP_VS_SVC_F_PERSISTENT)
+		return ip_vs_sched_persist(svc, skb, pptr[0], pptr[1], ignored);
+
+	*ignored = 0;
 
 	/*
 	 *    Non-persistent service
@@ -402,8 +450,6 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
 		return NULL;
 	}
 
-	*ignored = 0;
-
 	dest = svc->scheduler->schedule(svc, skb);
 	if (dest == NULL) {
 		IP_VS_DBG(1, "Schedule: no dest found.\n");
@@ -419,13 +465,17 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
 	 */
 	{
 		struct ip_vs_conn_param p;
-		ip_vs_conn_fill_param(svc->af, iph.protocol, &iph.saddr,
-				      pptr[0], &iph.daddr, pptr[1], &p);
+
+		ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol,
+				      &iph.saddr, pptr[0], &iph.daddr, pptr[1],
+				      &p);
 		cp = ip_vs_conn_new(&p, &dest->addr,
 				    dest->port ? dest->port : pptr[1],
-				    flags, dest);
-		if (!cp)
+				    flags, dest, skb->mark);
+		if (!cp) {
+			*ignored = -1;
 			return NULL;
+		}
 	}
 
 	IP_VS_DBG_BUF(6, "Schedule fwd:%c c:%s:%u v:%s:%u "
@@ -447,11 +497,14 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
  *  no destination is available for a new connection.
  */
 int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
-		struct ip_vs_protocol *pp)
+		struct ip_vs_proto_data *pd)
 {
+	struct net *net;
+	struct netns_ipvs *ipvs;
 	__be16 _ports[2], *pptr;
 	struct ip_vs_iphdr iph;
 	int unicast;
+
 	ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);
 
 	pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports);
@@ -459,18 +512,20 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
 		ip_vs_service_put(svc);
 		return NF_DROP;
 	}
+	net = skb_net(skb);
 
 #ifdef CONFIG_IP_VS_IPV6
 	if (svc->af == AF_INET6)
 		unicast = ipv6_addr_type(&iph.daddr.in6) & IPV6_ADDR_UNICAST;
 	else
 #endif
-		unicast = (inet_addr_type(&init_net, iph.daddr.ip) == RTN_UNICAST);
+		unicast = (inet_addr_type(net, iph.daddr.ip) == RTN_UNICAST);
 
 	/* if it is fwmark-based service, the cache_bypass sysctl is up
 	   and the destination is a non-local unicast, then create
 	   a cache_bypass connection entry */
-	if (sysctl_ip_vs_cache_bypass && svc->fwmark && unicast) {
+	ipvs = net_ipvs(net);
+	if (ipvs->sysctl_cache_bypass && svc->fwmark && unicast) {
 		int ret, cs;
 		struct ip_vs_conn *cp;
 		unsigned int flags = (svc->flags & IP_VS_SVC_F_ONEPACKET &&
@@ -484,12 +539,12 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
 		IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__);
 		{
 			struct ip_vs_conn_param p;
-			ip_vs_conn_fill_param(svc->af, iph.protocol,
+			ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol,
 					      &iph.saddr, pptr[0],
 					      &iph.daddr, pptr[1], &p);
 			cp = ip_vs_conn_new(&p, &daddr, 0,
 					    IP_VS_CONN_F_BYPASS | flags,
-					    NULL);
+					    NULL, skb->mark);
 			if (!cp)
 				return NF_DROP;
 		}
@@ -498,10 +553,10 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
 		ip_vs_in_stats(cp, skb);
 
 		/* set state */
-		cs = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pp);
+		cs = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd);
 
 		/* transmit the first SYN packet */
-		ret = cp->packet_xmit(skb, cp, pp);
+		ret = cp->packet_xmit(skb, cp, pd->pp);
 		/* do not touch skb anymore */
 
 		atomic_inc(&cp->in_pkts);
@@ -674,7 +729,7 @@ void ip_vs_nat_icmp_v6(struct sk_buff *skb, struct ip_vs_protocol *pp,
 #endif
 
 /* Handle relevant response ICMP messages - forward to the right
- * destination host. Used for NAT and local client.
+ * destination host.
  */
 static int handle_response_icmp(int af, struct sk_buff *skb,
 				union nf_inet_addr *snet,
@@ -682,6 +737,7 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
 				struct ip_vs_protocol *pp,
 				unsigned int offset, unsigned int ihl)
 {
+	struct netns_ipvs *ipvs;
 	unsigned int verdict = NF_DROP;
 
 	if (IP_VS_FWD_METHOD(cp) != 0) {
@@ -703,6 +759,8 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
 	if (!skb_make_writable(skb, offset))
 		goto out;
 
+	ipvs = net_ipvs(skb_net(skb));
+
 #ifdef CONFIG_IP_VS_IPV6
 	if (af == AF_INET6)
 		ip_vs_nat_icmp_v6(skb, pp, cp, 1);
@@ -712,11 +770,11 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
 
 #ifdef CONFIG_IP_VS_IPV6
 	if (af == AF_INET6) {
-		if (sysctl_ip_vs_snat_reroute && ip6_route_me_harder(skb) != 0)
+		if (ipvs->sysctl_snat_reroute && ip6_route_me_harder(skb) != 0)
 			goto out;
 	} else
 #endif
-		if ((sysctl_ip_vs_snat_reroute ||
+		if ((ipvs->sysctl_snat_reroute ||
 		     skb_rtable(skb)->rt_flags & RTCF_LOCAL) &&
 		    ip_route_me_harder(skb, RTN_LOCAL) != 0)
 			goto out;
@@ -808,7 +866,7 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related,
 
 	ip_vs_fill_iphdr(AF_INET, cih, &ciph);
 	/* The embedded headers contain source and dest in reverse order */
-	cp = pp->conn_out_get(AF_INET, skb, pp, &ciph, offset, 1);
+	cp = pp->conn_out_get(AF_INET, skb, &ciph, offset, 1);
 	if (!cp)
 		return NF_ACCEPT;
 
@@ -885,7 +943,7 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related,
 
 	ip_vs_fill_iphdr(AF_INET6, cih, &ciph);
 	/* The embedded headers contain source and dest in reverse order */
-	cp = pp->conn_out_get(AF_INET6, skb, pp, &ciph, offset, 1);
+	cp = pp->conn_out_get(AF_INET6, skb, &ciph, offset, 1);
 	if (!cp)
 		return NF_ACCEPT;
 
@@ -921,12 +979,14 @@ static inline int is_tcp_reset(const struct sk_buff *skb, int nh_len)
 }
 
 /* Handle response packets: rewrite addresses and send away...
- * Used for NAT and local client.
  */
 static unsigned int
-handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
+handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
 		struct ip_vs_conn *cp, int ihl)
 {
+	struct ip_vs_protocol *pp = pd->pp;
+	struct netns_ipvs *ipvs;
+
 	IP_VS_DBG_PKT(11, af, pp, skb, 0, "Outgoing packet");
 
 	if (!skb_make_writable(skb, ihl))
@@ -961,13 +1021,15 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
 	 * if it came from this machine itself.  So re-compute
 	 * the routing information.
 	 */
+	ipvs = net_ipvs(skb_net(skb));
+
 #ifdef CONFIG_IP_VS_IPV6
 	if (af == AF_INET6) {
-		if (sysctl_ip_vs_snat_reroute && ip6_route_me_harder(skb) != 0)
+		if (ipvs->sysctl_snat_reroute && ip6_route_me_harder(skb) != 0)
 			goto drop;
 	} else
 #endif
-		if ((sysctl_ip_vs_snat_reroute ||
+		if ((ipvs->sysctl_snat_reroute ||
 		     skb_rtable(skb)->rt_flags & RTCF_LOCAL) &&
 		    ip_route_me_harder(skb, RTN_LOCAL) != 0)
 			goto drop;
@@ -975,7 +1037,7 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
 	IP_VS_DBG_PKT(10, af, pp, skb, 0, "After SNAT");
 
 	ip_vs_out_stats(cp, skb);
-	ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp);
+	ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pd);
 	skb->ipvs_property = 1;
 	if (!(cp->flags & IP_VS_CONN_F_NFCT))
 		ip_vs_notrack(skb);
@@ -999,9 +1061,12 @@ drop:
 static unsigned int
 ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
 {
+	struct net *net = NULL;
 	struct ip_vs_iphdr iph;
 	struct ip_vs_protocol *pp;
+	struct ip_vs_proto_data *pd;
 	struct ip_vs_conn *cp;
+	struct netns_ipvs *ipvs;
 
 	EnterFunction(11);
 
@@ -1022,6 +1087,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
 	if (unlikely(!skb_dst(skb)))
 		return NF_ACCEPT;
 
+	net = skb_net(skb);
 	ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
 #ifdef CONFIG_IP_VS_IPV6
 	if (af == AF_INET6) {
@@ -1045,9 +1111,10 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
 			ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
 		}
 
-	pp = ip_vs_proto_get(iph.protocol);
-	if (unlikely(!pp))
+	pd = ip_vs_proto_data_get(net, iph.protocol);
+	if (unlikely(!pd))
 		return NF_ACCEPT;
+	pp = pd->pp;
 
 	/* reassemble IP fragments */
 #ifdef CONFIG_IP_VS_IPV6
@@ -1073,11 +1140,12 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
 	/*
 	 * Check if the packet belongs to an existing entry
 	 */
-	cp = pp->conn_out_get(af, skb, pp, &iph, iph.len, 0);
+	cp = pp->conn_out_get(af, skb, &iph, iph.len, 0);
+	ipvs = net_ipvs(net);
 
 	if (likely(cp))
-		return handle_response(af, skb, pp, cp, iph.len);
-	if (sysctl_ip_vs_nat_icmp_send &&
+		return handle_response(af, skb, pd, cp, iph.len);
+	if (ipvs->sysctl_nat_icmp_send &&
 	    (pp->protocol == IPPROTO_TCP ||
 	     pp->protocol == IPPROTO_UDP ||
 	     pp->protocol == IPPROTO_SCTP)) {
@@ -1087,7 +1155,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
 					  sizeof(_ports), _ports);
 		if (pptr == NULL)
 			return NF_ACCEPT;	/* Not for me */
-		if (ip_vs_lookup_real_service(af, iph.protocol,
+		if (ip_vs_lookup_real_service(net, af, iph.protocol,
 					      &iph.saddr,
 					      pptr[0])) {
 			/*
@@ -1202,14 +1270,15 @@ ip_vs_local_reply6(unsigned int hooknum, struct sk_buff *skb,
 static int
 ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
 {
+	struct net *net = NULL;
 	struct iphdr *iph;
 	struct icmphdr	_icmph, *ic;
 	struct iphdr	_ciph, *cih;	/* The ip header contained within the ICMP */
 	struct ip_vs_iphdr ciph;
 	struct ip_vs_conn *cp;
 	struct ip_vs_protocol *pp;
+	struct ip_vs_proto_data *pd;
 	unsigned int offset, ihl, verdict;
-	union nf_inet_addr snet;
 
 	*related = 1;
 
@@ -1249,9 +1318,11 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
 	if (cih == NULL)
 		return NF_ACCEPT; /* The packet looks wrong, ignore */
 
-	pp = ip_vs_proto_get(cih->protocol);
-	if (!pp)
+	net = skb_net(skb);
+	pd = ip_vs_proto_data_get(net, cih->protocol);
+	if (!pd)
 		return NF_ACCEPT;
+	pp = pd->pp;
 
 	/* Is the embedded protocol header present? */
 	if (unlikely(cih->frag_off & htons(IP_OFFSET) &&
@@ -1265,18 +1336,9 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
 
 	ip_vs_fill_iphdr(AF_INET, cih, &ciph);
 	/* The embedded headers contain source and dest in reverse order */
-	cp = pp->conn_in_get(AF_INET, skb, pp, &ciph, offset, 1);
-	if (!cp) {
-		/* The packet could also belong to a local client */
-		cp = pp->conn_out_get(AF_INET, skb, pp, &ciph, offset, 1);
-		if (cp) {
-			snet.ip = iph->saddr;
-			return handle_response_icmp(AF_INET, skb, &snet,
-						    cih->protocol, cp, pp,
-						    offset, ihl);
-		}
+	cp = pp->conn_in_get(AF_INET, skb, &ciph, offset, 1);
+	if (!cp)
 		return NF_ACCEPT;
-	}
 
 	verdict = NF_DROP;
 
@@ -1312,6 +1374,7 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
 static int
 ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
 {
+	struct net *net = NULL;
 	struct ipv6hdr *iph;
 	struct icmp6hdr	_icmph, *ic;
 	struct ipv6hdr	_ciph, *cih;	/* The ip header contained
@@ -1319,8 +1382,8 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
 	struct ip_vs_iphdr ciph;
 	struct ip_vs_conn *cp;
 	struct ip_vs_protocol *pp;
+	struct ip_vs_proto_data *pd;
 	unsigned int offset, verdict;
-	union nf_inet_addr snet;
 	struct rt6_info *rt;
 
 	*related = 1;
@@ -1361,9 +1424,11 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
 	if (cih == NULL)
 		return NF_ACCEPT; /* The packet looks wrong, ignore */
 
-	pp = ip_vs_proto_get(cih->nexthdr);
-	if (!pp)
+	net = skb_net(skb);
+	pd = ip_vs_proto_data_get(net, cih->nexthdr);
+	if (!pd)
 		return NF_ACCEPT;
+	pp = pd->pp;
 
 	/* Is the embedded protocol header present? */
 	/* TODO: we don't support fragmentation at the moment anyways */
@@ -1377,19 +1442,9 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
 
 	ip_vs_fill_iphdr(AF_INET6, cih, &ciph);
 	/* The embedded headers contain source and dest in reverse order */
-	cp = pp->conn_in_get(AF_INET6, skb, pp, &ciph, offset, 1);
-	if (!cp) {
-		/* The packet could also belong to a local client */
-		cp = pp->conn_out_get(AF_INET6, skb, pp, &ciph, offset, 1);
-		if (cp) {
-			ipv6_addr_copy(&snet.in6, &iph->saddr);
-			return handle_response_icmp(AF_INET6, skb, &snet,
-						    cih->nexthdr,
-						    cp, pp, offset,
-						    sizeof(struct ipv6hdr));
-		}
+	cp = pp->conn_in_get(AF_INET6, skb, &ciph, offset, 1);
+	if (!cp)
 		return NF_ACCEPT;
-	}
 
 	verdict = NF_DROP;
 
@@ -1423,10 +1478,13 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
 static unsigned int
 ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
 {
+	struct net *net;
 	struct ip_vs_iphdr iph;
 	struct ip_vs_protocol *pp;
+	struct ip_vs_proto_data *pd;
 	struct ip_vs_conn *cp;
 	int ret, restart, pkts;
+	struct netns_ipvs *ipvs;
 
 	/* Already marked as IPVS request or reply? */
 	if (skb->ipvs_property)
@@ -1480,20 +1538,21 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
 			ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
 		}
 
+	net = skb_net(skb);
 	/* Protocol supported? */
-	pp = ip_vs_proto_get(iph.protocol);
-	if (unlikely(!pp))
+	pd = ip_vs_proto_data_get(net, iph.protocol);
+	if (unlikely(!pd))
 		return NF_ACCEPT;
-
+	pp = pd->pp;
 	/*
 	 * Check if the packet belongs to an existing connection entry
 	 */
-	cp = pp->conn_in_get(af, skb, pp, &iph, iph.len, 0);
+	cp = pp->conn_in_get(af, skb, &iph, iph.len, 0);
 
 	if (unlikely(!cp)) {
 		int v;
 
-		if (!pp->conn_schedule(af, skb, pp, &v, &cp))
+		if (!pp->conn_schedule(af, skb, pd, &v, &cp))
 			return v;
 	}
 
@@ -1505,12 +1564,13 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
 	}
 
 	IP_VS_DBG_PKT(11, af, pp, skb, 0, "Incoming packet");
-
+	net = skb_net(skb);
+	ipvs = net_ipvs(net);
 	/* Check the server status */
 	if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) {
 		/* the destination server is not available */
 
-		if (sysctl_ip_vs_expire_nodest_conn) {
+		if (ipvs->sysctl_expire_nodest_conn) {
 			/* try to expire the connection immediately */
 			ip_vs_conn_expire_now(cp);
 		}
@@ -1521,7 +1581,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
 	}
 
 	ip_vs_in_stats(cp, skb);
-	restart = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pp);
+	restart = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd);
 	if (cp->packet_xmit)
 		ret = cp->packet_xmit(skb, cp, pp);
 		/* do not touch skb anymore */
@@ -1535,35 +1595,41 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
 	 *
 	 * Sync connection if it is about to close to
 	 * encorage the standby servers to update the connections timeout
+	 *
+	 * For ONE_PKT let ip_vs_sync_conn() do the filter work.
 	 */
-	pkts = atomic_add_return(1, &cp->in_pkts);
-	if (af == AF_INET && (ip_vs_sync_state & IP_VS_STATE_MASTER) &&
+
+	if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
+		pkts = ipvs->sysctl_sync_threshold[0];
+	else
+		pkts = atomic_add_return(1, &cp->in_pkts);
+
+	if ((ipvs->sync_state & IP_VS_STATE_MASTER) &&
 	    cp->protocol == IPPROTO_SCTP) {
 		if ((cp->state == IP_VS_SCTP_S_ESTABLISHED &&
-			(pkts % sysctl_ip_vs_sync_threshold[1]
-			 == sysctl_ip_vs_sync_threshold[0])) ||
+			(pkts % ipvs->sysctl_sync_threshold[1]
+			 == ipvs->sysctl_sync_threshold[0])) ||
 				(cp->old_state != cp->state &&
 				 ((cp->state == IP_VS_SCTP_S_CLOSED) ||
 				  (cp->state == IP_VS_SCTP_S_SHUT_ACK_CLI) ||
 				  (cp->state == IP_VS_SCTP_S_SHUT_ACK_SER)))) {
-			ip_vs_sync_conn(cp);
+			ip_vs_sync_conn(net, cp);
 			goto out;
 		}
 	}
 
 	/* Keep this block last: TCP and others with pp->num_states <= 1 */
-	else if (af == AF_INET &&
-	    (ip_vs_sync_state & IP_VS_STATE_MASTER) &&
+	else if ((ipvs->sync_state & IP_VS_STATE_MASTER) &&
 	    (((cp->protocol != IPPROTO_TCP ||
 	       cp->state == IP_VS_TCP_S_ESTABLISHED) &&
-	      (pkts % sysctl_ip_vs_sync_threshold[1]
-	       == sysctl_ip_vs_sync_threshold[0])) ||
+	      (pkts % ipvs->sysctl_sync_threshold[1]
+	       == ipvs->sysctl_sync_threshold[0])) ||
 	     ((cp->protocol == IPPROTO_TCP) && (cp->old_state != cp->state) &&
 	      ((cp->state == IP_VS_TCP_S_FIN_WAIT) ||
 	       (cp->state == IP_VS_TCP_S_CLOSE) ||
 	       (cp->state == IP_VS_TCP_S_CLOSE_WAIT) ||
 	       (cp->state == IP_VS_TCP_S_TIME_WAIT)))))
-		ip_vs_sync_conn(cp);
+		ip_vs_sync_conn(net, cp);
 out:
 	cp->old_state = cp->state;
 
@@ -1782,7 +1848,39 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
 	},
 #endif
 };
+/*
+ *	Initialize IP Virtual Server netns mem.
+ */
+static int __net_init __ip_vs_init(struct net *net)
+{
+	struct netns_ipvs *ipvs;
 
+	ipvs = net_generic(net, ip_vs_net_id);
+	if (ipvs == NULL) {
+		pr_err("%s(): no memory.\n", __func__);
+		return -ENOMEM;
+	}
+	ipvs->net = net;
+	/* Counters used for creating unique names */
+	ipvs->gen = atomic_read(&ipvs_netns_cnt);
+	atomic_inc(&ipvs_netns_cnt);
+	net->ipvs = ipvs;
+	printk(KERN_INFO "IPVS: Creating netns size=%zu id=%d\n",
+			 sizeof(struct netns_ipvs), ipvs->gen);
+	return 0;
+}
+
+static void __net_exit __ip_vs_cleanup(struct net *net)
+{
+	IP_VS_DBG(10, "ipvs netns %d released\n", net_ipvs(net)->gen);
+}
+
+static struct pernet_operations ipvs_core_ops = {
+	.init = __ip_vs_init,
+	.exit = __ip_vs_cleanup,
+	.id   = &ip_vs_net_id,
+	.size = sizeof(struct netns_ipvs),
+};
 
 /*
  *	Initialize IP Virtual Server
@@ -1791,8 +1889,11 @@ static int __init ip_vs_init(void)
 {
 	int ret;
 
-	ip_vs_estimator_init();
+	ret = register_pernet_subsys(&ipvs_core_ops);	/* Alloc ip_vs struct */
+	if (ret < 0)
+		return ret;
 
+	ip_vs_estimator_init();
 	ret = ip_vs_control_init();
 	if (ret < 0) {
 		pr_err("can't setup control.\n");
@@ -1813,15 +1914,23 @@ static int __init ip_vs_init(void)
 		goto cleanup_app;
 	}
 
+	ret = ip_vs_sync_init();
+	if (ret < 0) {
+		pr_err("can't setup sync data.\n");
+		goto cleanup_conn;
+	}
+
 	ret = nf_register_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
 	if (ret < 0) {
 		pr_err("can't register hooks.\n");
-		goto cleanup_conn;
+		goto cleanup_sync;
 	}
 
 	pr_info("ipvs loaded.\n");
 	return ret;
 
+cleanup_sync:
+	ip_vs_sync_cleanup();
   cleanup_conn:
 	ip_vs_conn_cleanup();
   cleanup_app:
@@ -1831,17 +1940,20 @@ static int __init ip_vs_init(void)
 	ip_vs_control_cleanup();
   cleanup_estimator:
 	ip_vs_estimator_cleanup();
+	unregister_pernet_subsys(&ipvs_core_ops);	/* free ip_vs struct */
 	return ret;
 }
 
 static void __exit ip_vs_cleanup(void)
 {
 	nf_unregister_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
+	ip_vs_sync_cleanup();
 	ip_vs_conn_cleanup();
 	ip_vs_app_cleanup();
 	ip_vs_protocol_cleanup();
 	ip_vs_control_cleanup();
 	ip_vs_estimator_cleanup();
+	unregister_pernet_subsys(&ipvs_core_ops);	/* free ip_vs struct */
 	pr_info("ipvs unloaded.\n");
 }
 
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index ba98e13..d69ec26 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -38,6 +38,7 @@
 #include <linux/mutex.h>
 
 #include <net/net_namespace.h>
+#include <linux/nsproxy.h>
 #include <net/ip.h>
 #ifdef CONFIG_IP_VS_IPV6
 #include <net/ipv6.h>
@@ -57,42 +58,7 @@ static DEFINE_MUTEX(__ip_vs_mutex);
 /* lock for service table */
 static DEFINE_RWLOCK(__ip_vs_svc_lock);
 
-/* lock for table with the real services */
-static DEFINE_RWLOCK(__ip_vs_rs_lock);
-
-/* lock for state and timeout tables */
-static DEFINE_SPINLOCK(ip_vs_securetcp_lock);
-
-/* lock for drop entry handling */
-static DEFINE_SPINLOCK(__ip_vs_dropentry_lock);
-
-/* lock for drop packet handling */
-static DEFINE_SPINLOCK(__ip_vs_droppacket_lock);
-
-/* 1/rate drop and drop-entry variables */
-int ip_vs_drop_rate = 0;
-int ip_vs_drop_counter = 0;
-static atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
-
-/* number of virtual services */
-static int ip_vs_num_services = 0;
-
 /* sysctl variables */
-static int sysctl_ip_vs_drop_entry = 0;
-static int sysctl_ip_vs_drop_packet = 0;
-static int sysctl_ip_vs_secure_tcp = 0;
-static int sysctl_ip_vs_amemthresh = 1024;
-static int sysctl_ip_vs_am_droprate = 10;
-int sysctl_ip_vs_cache_bypass = 0;
-int sysctl_ip_vs_expire_nodest_conn = 0;
-int sysctl_ip_vs_expire_quiescent_template = 0;
-int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
-int sysctl_ip_vs_nat_icmp_send = 0;
-#ifdef CONFIG_IP_VS_NFCT
-int sysctl_ip_vs_conntrack;
-#endif
-int sysctl_ip_vs_snat_reroute = 1;
-
 
 #ifdef CONFIG_IP_VS_DEBUG
 static int sysctl_ip_vs_debug_level = 0;
@@ -105,7 +71,8 @@ int ip_vs_get_debug_level(void)
 
 #ifdef CONFIG_IP_VS_IPV6
 /* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
-static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr)
+static int __ip_vs_addr_is_local_v6(struct net *net,
+				    const struct in6_addr *addr)
 {
 	struct rt6_info *rt;
 	struct flowi fl = {
@@ -114,7 +81,7 @@ static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr)
 		.fl6_src = { .s6_addr32 = {0, 0, 0, 0} },
 	};
 
-	rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
+	rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl);
 	if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
 			return 1;
 
@@ -125,7 +92,7 @@ static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr)
  *	update_defense_level is called from keventd and from sysctl,
  *	so it needs to protect itself from softirqs
  */
-static void update_defense_level(void)
+static void update_defense_level(struct netns_ipvs *ipvs)
 {
 	struct sysinfo i;
 	static int old_secure_tcp = 0;
@@ -141,73 +108,73 @@ static void update_defense_level(void)
 	/* si_swapinfo(&i); */
 	/* availmem = availmem - (i.totalswap - i.freeswap); */
 
-	nomem = (availmem < sysctl_ip_vs_amemthresh);
+	nomem = (availmem < ipvs->sysctl_amemthresh);
 
 	local_bh_disable();
 
 	/* drop_entry */
-	spin_lock(&__ip_vs_dropentry_lock);
-	switch (sysctl_ip_vs_drop_entry) {
+	spin_lock(&ipvs->dropentry_lock);
+	switch (ipvs->sysctl_drop_entry) {
 	case 0:
-		atomic_set(&ip_vs_dropentry, 0);
+		atomic_set(&ipvs->dropentry, 0);
 		break;
 	case 1:
 		if (nomem) {
-			atomic_set(&ip_vs_dropentry, 1);
-			sysctl_ip_vs_drop_entry = 2;
+			atomic_set(&ipvs->dropentry, 1);
+			ipvs->sysctl_drop_entry = 2;
 		} else {
-			atomic_set(&ip_vs_dropentry, 0);
+			atomic_set(&ipvs->dropentry, 0);
 		}
 		break;
 	case 2:
 		if (nomem) {
-			atomic_set(&ip_vs_dropentry, 1);
+			atomic_set(&ipvs->dropentry, 1);
 		} else {
-			atomic_set(&ip_vs_dropentry, 0);
-			sysctl_ip_vs_drop_entry = 1;
+			atomic_set(&ipvs->dropentry, 0);
+			ipvs->sysctl_drop_entry = 1;
 		};
 		break;
 	case 3:
-		atomic_set(&ip_vs_dropentry, 1);
+		atomic_set(&ipvs->dropentry, 1);
 		break;
 	}
-	spin_unlock(&__ip_vs_dropentry_lock);
+	spin_unlock(&ipvs->dropentry_lock);
 
 	/* drop_packet */
-	spin_lock(&__ip_vs_droppacket_lock);
-	switch (sysctl_ip_vs_drop_packet) {
+	spin_lock(&ipvs->droppacket_lock);
+	switch (ipvs->sysctl_drop_packet) {
 	case 0:
-		ip_vs_drop_rate = 0;
+		ipvs->drop_rate = 0;
 		break;
 	case 1:
 		if (nomem) {
-			ip_vs_drop_rate = ip_vs_drop_counter
-				= sysctl_ip_vs_amemthresh /
-				(sysctl_ip_vs_amemthresh-availmem);
-			sysctl_ip_vs_drop_packet = 2;
+			ipvs->drop_rate = ipvs->drop_counter
+				= ipvs->sysctl_amemthresh /
+				(ipvs->sysctl_amemthresh-availmem);
+			ipvs->sysctl_drop_packet = 2;
 		} else {
-			ip_vs_drop_rate = 0;
+			ipvs->drop_rate = 0;
 		}
 		break;
 	case 2:
 		if (nomem) {
-			ip_vs_drop_rate = ip_vs_drop_counter
-				= sysctl_ip_vs_amemthresh /
-				(sysctl_ip_vs_amemthresh-availmem);
+			ipvs->drop_rate = ipvs->drop_counter
+				= ipvs->sysctl_amemthresh /
+				(ipvs->sysctl_amemthresh-availmem);
 		} else {
-			ip_vs_drop_rate = 0;
-			sysctl_ip_vs_drop_packet = 1;
+			ipvs->drop_rate = 0;
+			ipvs->sysctl_drop_packet = 1;
 		}
 		break;
 	case 3:
-		ip_vs_drop_rate = sysctl_ip_vs_am_droprate;
+		ipvs->drop_rate = ipvs->sysctl_am_droprate;
 		break;
 	}
-	spin_unlock(&__ip_vs_droppacket_lock);
+	spin_unlock(&ipvs->droppacket_lock);
 
 	/* secure_tcp */
-	spin_lock(&ip_vs_securetcp_lock);
-	switch (sysctl_ip_vs_secure_tcp) {
+	spin_lock(&ipvs->securetcp_lock);
+	switch (ipvs->sysctl_secure_tcp) {
 	case 0:
 		if (old_secure_tcp >= 2)
 			to_change = 0;
@@ -216,7 +183,7 @@ static void update_defense_level(void)
 		if (nomem) {
 			if (old_secure_tcp < 2)
 				to_change = 1;
-			sysctl_ip_vs_secure_tcp = 2;
+			ipvs->sysctl_secure_tcp = 2;
 		} else {
 			if (old_secure_tcp >= 2)
 				to_change = 0;
@@ -229,7 +196,7 @@ static void update_defense_level(void)
 		} else {
 			if (old_secure_tcp >= 2)
 				to_change = 0;
-			sysctl_ip_vs_secure_tcp = 1;
+			ipvs->sysctl_secure_tcp = 1;
 		}
 		break;
 	case 3:
@@ -237,10 +204,11 @@ static void update_defense_level(void)
 			to_change = 1;
 		break;
 	}
-	old_secure_tcp = sysctl_ip_vs_secure_tcp;
+	old_secure_tcp = ipvs->sysctl_secure_tcp;
 	if (to_change >= 0)
-		ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1);
-	spin_unlock(&ip_vs_securetcp_lock);
+		ip_vs_protocol_timeout_change(ipvs,
+					      ipvs->sysctl_secure_tcp > 1);
+	spin_unlock(&ipvs->securetcp_lock);
 
 	local_bh_enable();
 }
@@ -250,16 +218,16 @@ static void update_defense_level(void)
  *	Timer for checking the defense
  */
 #define DEFENSE_TIMER_PERIOD	1*HZ
-static void defense_work_handler(struct work_struct *work);
-static DECLARE_DELAYED_WORK(defense_work, defense_work_handler);
 
 static void defense_work_handler(struct work_struct *work)
 {
-	update_defense_level();
-	if (atomic_read(&ip_vs_dropentry))
-		ip_vs_random_dropentry();
+	struct netns_ipvs *ipvs =
+		container_of(work, struct netns_ipvs, defense_work.work);
 
-	schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
+	update_defense_level(ipvs);
+	if (atomic_read(&ipvs->dropentry))
+		ip_vs_random_dropentry(ipvs->net);
+	schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
 }
 
 int
@@ -287,33 +255,13 @@ static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
 /* the service table hashed by fwmark */
 static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
 
-/*
- *	Hash table: for real service lookups
- */
-#define IP_VS_RTAB_BITS 4
-#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
-#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
-
-static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE];
-
-/*
- *	Trash for destinations
- */
-static LIST_HEAD(ip_vs_dest_trash);
-
-/*
- *	FTP & NULL virtual service counters
- */
-static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
-static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
-
 
 /*
  *	Returns hash value for virtual service
  */
-static __inline__ unsigned
-ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr,
-		  __be16 port)
+static inline unsigned
+ip_vs_svc_hashkey(struct net *net, int af, unsigned proto,
+		  const union nf_inet_addr *addr, __be16 port)
 {
 	register unsigned porth = ntohs(port);
 	__be32 addr_fold = addr->ip;
@@ -323,6 +271,7 @@ ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr,
 		addr_fold = addr->ip6[0]^addr->ip6[1]^
 			    addr->ip6[2]^addr->ip6[3];
 #endif
+	addr_fold ^= ((size_t)net>>8);
 
 	return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
 		& IP_VS_SVC_TAB_MASK;
@@ -331,13 +280,13 @@ ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr,
 /*
  *	Returns hash value of fwmark for virtual service lookup
  */
-static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark)
+static inline unsigned ip_vs_svc_fwm_hashkey(struct net *net, __u32 fwmark)
 {
-	return fwmark & IP_VS_SVC_TAB_MASK;
+	return (((size_t)net>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK;
 }
 
 /*
- *	Hashes a service in the ip_vs_svc_table by <proto,addr,port>
+ *	Hashes a service in the ip_vs_svc_table by <netns,proto,addr,port>
  *	or in the ip_vs_svc_fwm_table by fwmark.
  *	Should be called with locked tables.
  */
@@ -353,16 +302,16 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc)
 
 	if (svc->fwmark == 0) {
 		/*
-		 *  Hash it by <protocol,addr,port> in ip_vs_svc_table
+		 *  Hash it by <netns,protocol,addr,port> in ip_vs_svc_table
 		 */
-		hash = ip_vs_svc_hashkey(svc->af, svc->protocol, &svc->addr,
-					 svc->port);
+		hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol,
+					 &svc->addr, svc->port);
 		list_add(&svc->s_list, &ip_vs_svc_table[hash]);
 	} else {
 		/*
-		 *  Hash it by fwmark in ip_vs_svc_fwm_table
+		 *  Hash it by fwmark in svc_fwm_table
 		 */
-		hash = ip_vs_svc_fwm_hashkey(svc->fwmark);
+		hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark);
 		list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
 	}
 
@@ -374,7 +323,7 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc)
 
 
 /*
- *	Unhashes a service from ip_vs_svc_table/ip_vs_svc_fwm_table.
+ *	Unhashes a service from svc_table / svc_fwm_table.
  *	Should be called with locked tables.
  */
 static int ip_vs_svc_unhash(struct ip_vs_service *svc)
@@ -386,10 +335,10 @@ static int ip_vs_svc_unhash(struct ip_vs_service *svc)
 	}
 
 	if (svc->fwmark == 0) {
-		/* Remove it from the ip_vs_svc_table table */
+		/* Remove it from the svc_table table */
 		list_del(&svc->s_list);
 	} else {
-		/* Remove it from the ip_vs_svc_fwm_table table */
+		/* Remove it from the svc_fwm_table table */
 		list_del(&svc->f_list);
 	}
 
@@ -400,23 +349,24 @@ static int ip_vs_svc_unhash(struct ip_vs_service *svc)
 
 
 /*
- *	Get service by {proto,addr,port} in the service table.
+ *	Get service by {netns, proto,addr,port} in the service table.
  */
 static inline struct ip_vs_service *
-__ip_vs_service_find(int af, __u16 protocol, const union nf_inet_addr *vaddr,
-		    __be16 vport)
+__ip_vs_service_find(struct net *net, int af, __u16 protocol,
+		     const union nf_inet_addr *vaddr, __be16 vport)
 {
 	unsigned hash;
 	struct ip_vs_service *svc;
 
 	/* Check for "full" addressed entries */
-	hash = ip_vs_svc_hashkey(af, protocol, vaddr, vport);
+	hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport);
 
 	list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
 		if ((svc->af == af)
 		    && ip_vs_addr_equal(af, &svc->addr, vaddr)
 		    && (svc->port == vport)
-		    && (svc->protocol == protocol)) {
+		    && (svc->protocol == protocol)
+		    && net_eq(svc->net, net)) {
 			/* HIT */
 			return svc;
 		}
@@ -430,16 +380,17 @@ __ip_vs_service_find(int af, __u16 protocol, const union nf_inet_addr *vaddr,
  *	Get service by {fwmark} in the service table.
  */
 static inline struct ip_vs_service *
-__ip_vs_svc_fwm_find(int af, __u32 fwmark)
+__ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark)
 {
 	unsigned hash;
 	struct ip_vs_service *svc;
 
 	/* Check for fwmark addressed entries */
-	hash = ip_vs_svc_fwm_hashkey(fwmark);
+	hash = ip_vs_svc_fwm_hashkey(net, fwmark);
 
 	list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
-		if (svc->fwmark == fwmark && svc->af == af) {
+		if (svc->fwmark == fwmark && svc->af == af
+		    && net_eq(svc->net, net)) {
 			/* HIT */
 			return svc;
 		}
@@ -449,42 +400,44 @@ __ip_vs_svc_fwm_find(int af, __u32 fwmark)
 }
 
 struct ip_vs_service *
-ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,
+ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
 		  const union nf_inet_addr *vaddr, __be16 vport)
 {
 	struct ip_vs_service *svc;
+	struct netns_ipvs *ipvs = net_ipvs(net);
 
 	read_lock(&__ip_vs_svc_lock);
 
 	/*
 	 *	Check the table hashed by fwmark first
 	 */
-	if (fwmark && (svc = __ip_vs_svc_fwm_find(af, fwmark)))
+	svc = __ip_vs_svc_fwm_find(net, af, fwmark);
+	if (fwmark && svc)
 		goto out;
 
 	/*
 	 *	Check the table hashed by <protocol,addr,port>
 	 *	for "full" addressed entries
 	 */
-	svc = __ip_vs_service_find(af, protocol, vaddr, vport);
+	svc = __ip_vs_service_find(net, af, protocol, vaddr, vport);
 
 	if (svc == NULL
 	    && protocol == IPPROTO_TCP
-	    && atomic_read(&ip_vs_ftpsvc_counter)
+	    && atomic_read(&ipvs->ftpsvc_counter)
 	    && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
 		/*
 		 * Check if ftp service entry exists, the packet
 		 * might belong to FTP data connections.
 		 */
-		svc = __ip_vs_service_find(af, protocol, vaddr, FTPPORT);
+		svc = __ip_vs_service_find(net, af, protocol, vaddr, FTPPORT);
 	}
 
 	if (svc == NULL
-	    && atomic_read(&ip_vs_nullsvc_counter)) {
+	    && atomic_read(&ipvs->nullsvc_counter)) {
 		/*
 		 * Check if the catch-all port (port zero) exists
 		 */
-		svc = __ip_vs_service_find(af, protocol, vaddr, 0);
+		svc = __ip_vs_service_find(net, af, protocol, vaddr, 0);
 	}
 
   out:
@@ -519,6 +472,7 @@ __ip_vs_unbind_svc(struct ip_vs_dest *dest)
 			      svc->fwmark,
 			      IP_VS_DBG_ADDR(svc->af, &svc->addr),
 			      ntohs(svc->port), atomic_read(&svc->usecnt));
+		free_percpu(svc->stats.cpustats);
 		kfree(svc);
 	}
 }
@@ -545,10 +499,10 @@ static inline unsigned ip_vs_rs_hashkey(int af,
 }
 
 /*
- *	Hashes ip_vs_dest in ip_vs_rtable by <proto,addr,port>.
+ *	Hashes ip_vs_dest in rs_table by <proto,addr,port>.
  *	should be called with locked tables.
  */
-static int ip_vs_rs_hash(struct ip_vs_dest *dest)
+static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
 {
 	unsigned hash;
 
@@ -562,19 +516,19 @@ static int ip_vs_rs_hash(struct ip_vs_dest *dest)
 	 */
 	hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
 
-	list_add(&dest->d_list, &ip_vs_rtable[hash]);
+	list_add(&dest->d_list, &ipvs->rs_table[hash]);
 
 	return 1;
 }
 
 /*
- *	UNhashes ip_vs_dest from ip_vs_rtable.
+ *	UNhashes ip_vs_dest from rs_table.
  *	should be called with locked tables.
  */
 static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
 {
 	/*
-	 * Remove it from the ip_vs_rtable table.
+	 * Remove it from the rs_table table.
 	 */
 	if (!list_empty(&dest->d_list)) {
 		list_del(&dest->d_list);
@@ -588,10 +542,11 @@ static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
  *	Lookup real service by <proto,addr,port> in the real service table.
  */
 struct ip_vs_dest *
-ip_vs_lookup_real_service(int af, __u16 protocol,
+ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
 			  const union nf_inet_addr *daddr,
 			  __be16 dport)
 {
+	struct netns_ipvs *ipvs = net_ipvs(net);
 	unsigned hash;
 	struct ip_vs_dest *dest;
 
@@ -601,19 +556,19 @@ ip_vs_lookup_real_service(int af, __u16 protocol,
 	 */
 	hash = ip_vs_rs_hashkey(af, daddr, dport);
 
-	read_lock(&__ip_vs_rs_lock);
-	list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) {
+	read_lock(&ipvs->rs_lock);
+	list_for_each_entry(dest, &ipvs->rs_table[hash], d_list) {
 		if ((dest->af == af)
 		    && ip_vs_addr_equal(af, &dest->addr, daddr)
 		    && (dest->port == dport)
 		    && ((dest->protocol == protocol) ||
 			dest->vfwmark)) {
 			/* HIT */
-			read_unlock(&__ip_vs_rs_lock);
+			read_unlock(&ipvs->rs_lock);
 			return dest;
 		}
 	}
-	read_unlock(&__ip_vs_rs_lock);
+	read_unlock(&ipvs->rs_lock);
 
 	return NULL;
 }
@@ -652,15 +607,16 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
  * ip_vs_lookup_real_service() looked promissing, but
  * seems not working as expected.
  */
-struct ip_vs_dest *ip_vs_find_dest(int af, const union nf_inet_addr *daddr,
+struct ip_vs_dest *ip_vs_find_dest(struct net  *net, int af,
+				   const union nf_inet_addr *daddr,
 				   __be16 dport,
 				   const union nf_inet_addr *vaddr,
-				   __be16 vport, __u16 protocol)
+				   __be16 vport, __u16 protocol, __u32 fwmark)
 {
 	struct ip_vs_dest *dest;
 	struct ip_vs_service *svc;
 
-	svc = ip_vs_service_get(af, 0, protocol, vaddr, vport);
+	svc = ip_vs_service_get(net, af, fwmark, protocol, vaddr, vport);
 	if (!svc)
 		return NULL;
 	dest = ip_vs_lookup_dest(svc, daddr, dport);
@@ -685,11 +641,12 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
 		     __be16 dport)
 {
 	struct ip_vs_dest *dest, *nxt;
+	struct netns_ipvs *ipvs = net_ipvs(svc->net);
 
 	/*
 	 * Find the destination in trash
 	 */
-	list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
+	list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
 		IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
 			      "dest->refcnt=%d\n",
 			      dest->vfwmark,
@@ -720,6 +677,7 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
 			list_del(&dest->n_list);
 			ip_vs_dst_reset(dest);
 			__ip_vs_unbind_svc(dest);
+			free_percpu(dest->stats.cpustats);
 			kfree(dest);
 		}
 	}
@@ -737,14 +695,16 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
  *  are expired, and the refcnt of each destination in the trash must
  *  be 1, so we simply release them here.
  */
-static void ip_vs_trash_cleanup(void)
+static void ip_vs_trash_cleanup(struct net *net)
 {
 	struct ip_vs_dest *dest, *nxt;
+	struct netns_ipvs *ipvs = net_ipvs(net);
 
-	list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
+	list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
 		list_del(&dest->n_list);
 		ip_vs_dst_reset(dest);
 		__ip_vs_unbind_svc(dest);
+		free_percpu(dest->stats.cpustats);
 		kfree(dest);
 	}
 }
@@ -768,6 +728,7 @@ static void
 __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
 		    struct ip_vs_dest_user_kern *udest, int add)
 {
+	struct netns_ipvs *ipvs = net_ipvs(svc->net);
 	int conn_flags;
 
 	/* set the weight and the flags */
@@ -780,12 +741,12 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
 		conn_flags |= IP_VS_CONN_F_NOOUTPUT;
 	} else {
 		/*
-		 *    Put the real service in ip_vs_rtable if not present.
+		 *    Put the real service in rs_table if not present.
 		 *    For now only for NAT!
 		 */
-		write_lock_bh(&__ip_vs_rs_lock);
-		ip_vs_rs_hash(dest);
-		write_unlock_bh(&__ip_vs_rs_lock);
+		write_lock_bh(&ipvs->rs_lock);
+		ip_vs_rs_hash(ipvs, dest);
+		write_unlock_bh(&ipvs->rs_lock);
 	}
 	atomic_set(&dest->conn_flags, conn_flags);
 
@@ -813,7 +774,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
 	spin_unlock_bh(&dest->dst_lock);
 
 	if (add)
-		ip_vs_new_estimator(&dest->stats);
+		ip_vs_new_estimator(svc->net, &dest->stats);
 
 	write_lock_bh(&__ip_vs_svc_lock);
 
@@ -850,12 +811,12 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
 		atype = ipv6_addr_type(&udest->addr.in6);
 		if ((!(atype & IPV6_ADDR_UNICAST) ||
 			atype & IPV6_ADDR_LINKLOCAL) &&
-			!__ip_vs_addr_is_local_v6(&udest->addr.in6))
+			!__ip_vs_addr_is_local_v6(svc->net, &udest->addr.in6))
 			return -EINVAL;
 	} else
 #endif
 	{
-		atype = inet_addr_type(&init_net, udest->addr.ip);
+		atype = inet_addr_type(svc->net, udest->addr.ip);
 		if (atype != RTN_LOCAL && atype != RTN_UNICAST)
 			return -EINVAL;
 	}
@@ -865,6 +826,11 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
 		pr_err("%s(): no memory.\n", __func__);
 		return -ENOMEM;
 	}
+	dest->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
+	if (!dest->stats.cpustats) {
+		pr_err("%s() alloc_percpu failed\n", __func__);
+		goto err_alloc;
+	}
 
 	dest->af = svc->af;
 	dest->protocol = svc->protocol;
@@ -888,6 +854,10 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
 
 	LeaveFunction(2);
 	return 0;
+
+err_alloc:
+	kfree(dest);
+	return -ENOMEM;
 }
 
 
@@ -1006,16 +976,18 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
 /*
  *	Delete a destination (must be already unlinked from the service)
  */
-static void __ip_vs_del_dest(struct ip_vs_dest *dest)
+static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest)
 {
-	ip_vs_kill_estimator(&dest->stats);
+	struct netns_ipvs *ipvs = net_ipvs(net);
+
+	ip_vs_kill_estimator(net, &dest->stats);
 
 	/*
 	 *  Remove it from the d-linked list with the real services.
 	 */
-	write_lock_bh(&__ip_vs_rs_lock);
+	write_lock_bh(&ipvs->rs_lock);
 	ip_vs_rs_unhash(dest);
-	write_unlock_bh(&__ip_vs_rs_lock);
+	write_unlock_bh(&ipvs->rs_lock);
 
 	/*
 	 *  Decrease the refcnt of the dest, and free the dest
@@ -1034,6 +1006,7 @@ static void __ip_vs_del_dest(struct ip_vs_dest *dest)
 		   and only one user context can update virtual service at a
 		   time, so the operation here is OK */
 		atomic_dec(&dest->svc->refcnt);
+		free_percpu(dest->stats.cpustats);
 		kfree(dest);
 	} else {
 		IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
@@ -1041,7 +1014,7 @@ static void __ip_vs_del_dest(struct ip_vs_dest *dest)
 			      IP_VS_DBG_ADDR(dest->af, &dest->addr),
 			      ntohs(dest->port),
 			      atomic_read(&dest->refcnt));
-		list_add(&dest->n_list, &ip_vs_dest_trash);
+		list_add(&dest->n_list, &ipvs->dest_trash);
 		atomic_inc(&dest->refcnt);
 	}
 }
@@ -1105,7 +1078,7 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
 	/*
 	 *	Delete the destination
 	 */
-	__ip_vs_del_dest(dest);
+	__ip_vs_del_dest(svc->net, dest);
 
 	LeaveFunction(2);
 
@@ -1117,13 +1090,14 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
  *	Add a service into the service hash table
  */
 static int
-ip_vs_add_service(struct ip_vs_service_user_kern *u,
+ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
 		  struct ip_vs_service **svc_p)
 {
 	int ret = 0;
 	struct ip_vs_scheduler *sched = NULL;
 	struct ip_vs_pe *pe = NULL;
 	struct ip_vs_service *svc = NULL;
+	struct netns_ipvs *ipvs = net_ipvs(net);
 
 	/* increase the module use count */
 	ip_vs_use_count_inc();
@@ -1137,7 +1111,7 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
 	}
 
 	if (u->pe_name && *u->pe_name) {
-		pe = ip_vs_pe_get(u->pe_name);
+		pe = ip_vs_pe_getbyname(u->pe_name);
 		if (pe == NULL) {
 			pr_info("persistence engine module ip_vs_pe_%s "
 				"not found\n", u->pe_name);
@@ -1159,6 +1133,11 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
 		ret = -ENOMEM;
 		goto out_err;
 	}
+	svc->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
+	if (!svc->stats.cpustats) {
+		pr_err("%s() alloc_percpu failed\n", __func__);
+		goto out_err;
+	}
 
 	/* I'm the first user of the service */
 	atomic_set(&svc->usecnt, 0);
@@ -1172,6 +1151,7 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
 	svc->flags = u->flags;
 	svc->timeout = u->timeout * HZ;
 	svc->netmask = u->netmask;
+	svc->net = net;
 
 	INIT_LIST_HEAD(&svc->destinations);
 	rwlock_init(&svc->sched_lock);
@@ -1189,15 +1169,15 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
 
 	/* Update the virtual service counters */
 	if (svc->port == FTPPORT)
-		atomic_inc(&ip_vs_ftpsvc_counter);
+		atomic_inc(&ipvs->ftpsvc_counter);
 	else if (svc->port == 0)
-		atomic_inc(&ip_vs_nullsvc_counter);
+		atomic_inc(&ipvs->nullsvc_counter);
 
-	ip_vs_new_estimator(&svc->stats);
+	ip_vs_new_estimator(net, &svc->stats);
 
 	/* Count only IPv4 services for old get/setsockopt interface */
 	if (svc->af == AF_INET)
-		ip_vs_num_services++;
+		ipvs->num_services++;
 
 	/* Hash the service into the service table */
 	write_lock_bh(&__ip_vs_svc_lock);
@@ -1207,6 +1187,7 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
 	*svc_p = svc;
 	return 0;
 
+
  out_err:
 	if (svc != NULL) {
 		ip_vs_unbind_scheduler(svc);
@@ -1215,6 +1196,8 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
 			ip_vs_app_inc_put(svc->inc);
 			local_bh_enable();
 		}
+		if (svc->stats.cpustats)
+			free_percpu(svc->stats.cpustats);
 		kfree(svc);
 	}
 	ip_vs_scheduler_put(sched);
@@ -1248,7 +1231,7 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
 	old_sched = sched;
 
 	if (u->pe_name && *u->pe_name) {
-		pe = ip_vs_pe_get(u->pe_name);
+		pe = ip_vs_pe_getbyname(u->pe_name);
 		if (pe == NULL) {
 			pr_info("persistence engine module ip_vs_pe_%s "
 				"not found\n", u->pe_name);
@@ -1334,14 +1317,15 @@ static void __ip_vs_del_service(struct ip_vs_service *svc)
 	struct ip_vs_dest *dest, *nxt;
 	struct ip_vs_scheduler *old_sched;
 	struct ip_vs_pe *old_pe;
+	struct netns_ipvs *ipvs = net_ipvs(svc->net);
 
 	pr_info("%s: enter\n", __func__);
 
 	/* Count only IPv4 services for old get/setsockopt interface */
 	if (svc->af == AF_INET)
-		ip_vs_num_services--;
+		ipvs->num_services--;
 
-	ip_vs_kill_estimator(&svc->stats);
+	ip_vs_kill_estimator(svc->net, &svc->stats);
 
 	/* Unbind scheduler */
 	old_sched = svc->scheduler;
@@ -1364,16 +1348,16 @@ static void __ip_vs_del_service(struct ip_vs_service *svc)
 	 */
 	list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
 		__ip_vs_unlink_dest(svc, dest, 0);
-		__ip_vs_del_dest(dest);
+		__ip_vs_del_dest(svc->net, dest);
 	}
 
 	/*
 	 *    Update the virtual service counters
 	 */
 	if (svc->port == FTPPORT)
-		atomic_dec(&ip_vs_ftpsvc_counter);
+		atomic_dec(&ipvs->ftpsvc_counter);
 	else if (svc->port == 0)
-		atomic_dec(&ip_vs_nullsvc_counter);
+		atomic_dec(&ipvs->nullsvc_counter);
 
 	/*
 	 *    Free the service if nobody refers to it
@@ -1383,6 +1367,7 @@ static void __ip_vs_del_service(struct ip_vs_service *svc)
 			      svc->fwmark,
 			      IP_VS_DBG_ADDR(svc->af, &svc->addr),
 			      ntohs(svc->port), atomic_read(&svc->usecnt));
+		free_percpu(svc->stats.cpustats);
 		kfree(svc);
 	}
 
@@ -1428,17 +1413,19 @@ static int ip_vs_del_service(struct ip_vs_service *svc)
 /*
  *	Flush all the virtual services
  */
-static int ip_vs_flush(void)
+static int ip_vs_flush(struct net *net)
 {
 	int idx;
 	struct ip_vs_service *svc, *nxt;
 
 	/*
-	 * Flush the service table hashed by <protocol,addr,port>
+	 * Flush the service table hashed by <netns,protocol,addr,port>
 	 */
 	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
-		list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) {
-			ip_vs_unlink_service(svc);
+		list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx],
+					 s_list) {
+			if (net_eq(svc->net, net))
+				ip_vs_unlink_service(svc);
 		}
 	}
 
@@ -1448,7 +1435,8 @@ static int ip_vs_flush(void)
 	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
 		list_for_each_entry_safe(svc, nxt,
 					 &ip_vs_svc_fwm_table[idx], f_list) {
-			ip_vs_unlink_service(svc);
+			if (net_eq(svc->net, net))
+				ip_vs_unlink_service(svc);
 		}
 	}
 
@@ -1472,24 +1460,26 @@ static int ip_vs_zero_service(struct ip_vs_service *svc)
 	return 0;
 }
 
-static int ip_vs_zero_all(void)
+static int ip_vs_zero_all(struct net *net)
 {
 	int idx;
 	struct ip_vs_service *svc;
 
 	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
 		list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
-			ip_vs_zero_service(svc);
+			if (net_eq(svc->net, net))
+				ip_vs_zero_service(svc);
 		}
 	}
 
 	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
 		list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
-			ip_vs_zero_service(svc);
+			if (net_eq(svc->net, net))
+				ip_vs_zero_service(svc);
 		}
 	}
 
-	ip_vs_zero_stats(&ip_vs_stats);
+	ip_vs_zero_stats(net_ipvs(net)->tot_stats);
 	return 0;
 }
 
@@ -1498,6 +1488,7 @@ static int
 proc_do_defense_mode(ctl_table *table, int write,
 		     void __user *buffer, size_t *lenp, loff_t *ppos)
 {
+	struct net *net = current->nsproxy->net_ns;
 	int *valp = table->data;
 	int val = *valp;
 	int rc;
@@ -1508,7 +1499,7 @@ proc_do_defense_mode(ctl_table *table, int write,
 			/* Restore the correct value */
 			*valp = val;
 		} else {
-			update_defense_level();
+			update_defense_level(net_ipvs(net));
 		}
 	}
 	return rc;
@@ -1534,45 +1525,54 @@ proc_do_sync_threshold(ctl_table *table, int write,
 	return rc;
 }
 
+static int
+proc_do_sync_mode(ctl_table *table, int write,
+		     void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	int *valp = table->data;
+	int val = *valp;
+	int rc;
+
+	rc = proc_dointvec(table, write, buffer, lenp, ppos);
+	if (write && (*valp != val)) {
+		if ((*valp < 0) || (*valp > 1)) {
+			/* Restore the correct value */
+			*valp = val;
+		} else {
+			struct net *net = current->nsproxy->net_ns;
+			ip_vs_sync_switch_mode(net, val);
+		}
+	}
+	return rc;
+}
 
 /*
  *	IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
+ *	Do not change order or insert new entries without
+ *	align with netns init in __ip_vs_control_init()
  */
 
 static struct ctl_table vs_vars[] = {
 	{
 		.procname	= "amemthresh",
-		.data		= &sysctl_ip_vs_amemthresh,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
-#ifdef CONFIG_IP_VS_DEBUG
-	{
-		.procname	= "debug_level",
-		.data		= &sysctl_ip_vs_debug_level,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-#endif
 	{
 		.procname	= "am_droprate",
-		.data		= &sysctl_ip_vs_am_droprate,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
 	{
 		.procname	= "drop_entry",
-		.data		= &sysctl_ip_vs_drop_entry,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_do_defense_mode,
 	},
 	{
 		.procname	= "drop_packet",
-		.data		= &sysctl_ip_vs_drop_packet,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_do_defense_mode,
@@ -1580,7 +1580,6 @@ static struct ctl_table vs_vars[] = {
 #ifdef CONFIG_IP_VS_NFCT
 	{
 		.procname	= "conntrack",
-		.data		= &sysctl_ip_vs_conntrack,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
@@ -1588,18 +1587,62 @@ static struct ctl_table vs_vars[] = {
 #endif
 	{
 		.procname	= "secure_tcp",
-		.data		= &sysctl_ip_vs_secure_tcp,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_do_defense_mode,
 	},
 	{
 		.procname	= "snat_reroute",
-		.data		= &sysctl_ip_vs_snat_reroute,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
 	},
+	{
+		.procname	= "sync_version",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_do_sync_mode,
+	},
+	{
+		.procname	= "cache_bypass",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "expire_nodest_conn",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "expire_quiescent_template",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "sync_threshold",
+		.maxlen		=
+			sizeof(((struct netns_ipvs *)0)->sysctl_sync_threshold),
+		.mode		= 0644,
+		.proc_handler	= proc_do_sync_threshold,
+	},
+	{
+		.procname	= "nat_icmp_send",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+#ifdef CONFIG_IP_VS_DEBUG
+	{
+		.procname	= "debug_level",
+		.data		= &sysctl_ip_vs_debug_level,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+#endif
 #if 0
 	{
 		.procname	= "timeout_established",
@@ -1686,41 +1729,6 @@ static struct ctl_table vs_vars[] = {
 		.proc_handler	= proc_dointvec_jiffies,
 	},
 #endif
-	{
-		.procname	= "cache_bypass",
-		.data		= &sysctl_ip_vs_cache_bypass,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-	{
-		.procname	= "expire_nodest_conn",
-		.data		= &sysctl_ip_vs_expire_nodest_conn,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-	{
-		.procname	= "expire_quiescent_template",
-		.data		= &sysctl_ip_vs_expire_quiescent_template,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-	{
-		.procname	= "sync_threshold",
-		.data		= &sysctl_ip_vs_sync_threshold,
-		.maxlen		= sizeof(sysctl_ip_vs_sync_threshold),
-		.mode		= 0644,
-		.proc_handler	= proc_do_sync_threshold,
-	},
-	{
-		.procname	= "nat_icmp_send",
-		.data		= &sysctl_ip_vs_nat_icmp_send,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
 	{ }
 };
 
@@ -1732,11 +1740,10 @@ const struct ctl_path net_vs_ctl_path[] = {
 };
 EXPORT_SYMBOL_GPL(net_vs_ctl_path);
 
-static struct ctl_table_header * sysctl_header;
-
 #ifdef CONFIG_PROC_FS
 
 struct ip_vs_iter {
+	struct seq_net_private p;  /* Do not move this, netns depends upon it*/
 	struct list_head *table;
 	int bucket;
 };
@@ -1763,6 +1770,7 @@ static inline const char *ip_vs_fwd_name(unsigned flags)
 /* Get the Nth entry in the two lists */
 static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
 {
+	struct net *net = seq_file_net(seq);
 	struct ip_vs_iter *iter = seq->private;
 	int idx;
 	struct ip_vs_service *svc;
@@ -1770,7 +1778,7 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
 	/* look in hash by protocol */
 	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
 		list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
-			if (pos-- == 0){
+			if (net_eq(svc->net, net) && pos-- == 0) {
 				iter->table = ip_vs_svc_table;
 				iter->bucket = idx;
 				return svc;
@@ -1781,7 +1789,7 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
 	/* keep looking in fwmark */
 	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
 		list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
-			if (pos-- == 0) {
+			if (net_eq(svc->net, net) && pos-- == 0) {
 				iter->table = ip_vs_svc_fwm_table;
 				iter->bucket = idx;
 				return svc;
@@ -1935,7 +1943,7 @@ static const struct seq_operations ip_vs_info_seq_ops = {
 
 static int ip_vs_info_open(struct inode *inode, struct file *file)
 {
-	return seq_open_private(file, &ip_vs_info_seq_ops,
+	return seq_open_net(inode, file, &ip_vs_info_seq_ops,
 			sizeof(struct ip_vs_iter));
 }
 
@@ -1949,13 +1957,11 @@ static const struct file_operations ip_vs_info_fops = {
 
 #endif
 
-struct ip_vs_stats ip_vs_stats = {
-	.lock = __SPIN_LOCK_UNLOCKED(ip_vs_stats.lock),
-};
-
 #ifdef CONFIG_PROC_FS
 static int ip_vs_stats_show(struct seq_file *seq, void *v)
 {
+	struct net *net = seq_file_single_net(seq);
+	struct ip_vs_stats *tot_stats = net_ipvs(net)->tot_stats;
 
 /*               01234567 01234567 01234567 0123456701234567 0123456701234567 */
 	seq_puts(seq,
@@ -1963,29 +1969,29 @@ static int ip_vs_stats_show(struct seq_file *seq, void *v)
 	seq_printf(seq,
 		   "   Conns  Packets  Packets            Bytes            Bytes\n");
 
-	spin_lock_bh(&ip_vs_stats.lock);
-	seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.ustats.conns,
-		   ip_vs_stats.ustats.inpkts, ip_vs_stats.ustats.outpkts,
-		   (unsigned long long) ip_vs_stats.ustats.inbytes,
-		   (unsigned long long) ip_vs_stats.ustats.outbytes);
+	spin_lock_bh(&tot_stats->lock);
+	seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", tot_stats->ustats.conns,
+		   tot_stats->ustats.inpkts, tot_stats->ustats.outpkts,
+		   (unsigned long long) tot_stats->ustats.inbytes,
+		   (unsigned long long) tot_stats->ustats.outbytes);
 
 /*                 01234567 01234567 01234567 0123456701234567 0123456701234567 */
 	seq_puts(seq,
 		   " Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
 	seq_printf(seq,"%8X %8X %8X %16X %16X\n",
-			ip_vs_stats.ustats.cps,
-			ip_vs_stats.ustats.inpps,
-			ip_vs_stats.ustats.outpps,
-			ip_vs_stats.ustats.inbps,
-			ip_vs_stats.ustats.outbps);
-	spin_unlock_bh(&ip_vs_stats.lock);
+			tot_stats->ustats.cps,
+			tot_stats->ustats.inpps,
+			tot_stats->ustats.outpps,
+			tot_stats->ustats.inbps,
+			tot_stats->ustats.outbps);
+	spin_unlock_bh(&tot_stats->lock);
 
 	return 0;
 }
 
 static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
 {
-	return single_open(file, ip_vs_stats_show, NULL);
+	return single_open_net(inode, file, ip_vs_stats_show);
 }
 
 static const struct file_operations ip_vs_stats_fops = {
@@ -1996,13 +2002,70 @@ static const struct file_operations ip_vs_stats_fops = {
 	.release = single_release,
 };
 
+static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
+{
+	struct net *net = seq_file_single_net(seq);
+	struct ip_vs_stats *tot_stats = net_ipvs(net)->tot_stats;
+	int i;
+
+/*               01234567 01234567 01234567 0123456701234567 0123456701234567 */
+	seq_puts(seq,
+		 "       Total Incoming Outgoing         Incoming         Outgoing\n");
+	seq_printf(seq,
+		   "CPU    Conns  Packets  Packets            Bytes            Bytes\n");
+
+	for_each_possible_cpu(i) {
+		struct ip_vs_cpu_stats *u = per_cpu_ptr(net->ipvs->cpustats, i);
+		seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n",
+			    i, u->ustats.conns, u->ustats.inpkts,
+			    u->ustats.outpkts, (__u64)u->ustats.inbytes,
+			    (__u64)u->ustats.outbytes);
+	}
+
+	spin_lock_bh(&tot_stats->lock);
+	seq_printf(seq, "  ~ %8X %8X %8X %16LX %16LX\n\n",
+		   tot_stats->ustats.conns, tot_stats->ustats.inpkts,
+		   tot_stats->ustats.outpkts,
+		   (unsigned long long) tot_stats->ustats.inbytes,
+		   (unsigned long long) tot_stats->ustats.outbytes);
+
+/*                 01234567 01234567 01234567 0123456701234567 0123456701234567 */
+	seq_puts(seq,
+		   "     Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
+	seq_printf(seq, "    %8X %8X %8X %16X %16X\n",
+			tot_stats->ustats.cps,
+			tot_stats->ustats.inpps,
+			tot_stats->ustats.outpps,
+			tot_stats->ustats.inbps,
+			tot_stats->ustats.outbps);
+	spin_unlock_bh(&tot_stats->lock);
+
+	return 0;
+}
+
+static int ip_vs_stats_percpu_seq_open(struct inode *inode, struct file *file)
+{
+	return single_open_net(inode, file, ip_vs_stats_percpu_show);
+}
+
+static const struct file_operations ip_vs_stats_percpu_fops = {
+	.owner = THIS_MODULE,
+	.open = ip_vs_stats_percpu_seq_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
 #endif
 
 /*
  *	Set timeout values for tcp tcpfin udp in the timeout_table.
  */
-static int ip_vs_set_timeout(struct ip_vs_timeout_user *u)
+static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u)
 {
+#if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
+	struct ip_vs_proto_data *pd;
+#endif
+
 	IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
 		  u->tcp_timeout,
 		  u->tcp_fin_timeout,
@@ -2010,19 +2073,22 @@ static int ip_vs_set_timeout(struct ip_vs_timeout_user *u)
 
 #ifdef CONFIG_IP_VS_PROTO_TCP
 	if (u->tcp_timeout) {
-		ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED]
+		pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
+		pd->timeout_table[IP_VS_TCP_S_ESTABLISHED]
 			= u->tcp_timeout * HZ;
 	}
 
 	if (u->tcp_fin_timeout) {
-		ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT]
+		pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
+		pd->timeout_table[IP_VS_TCP_S_FIN_WAIT]
 			= u->tcp_fin_timeout * HZ;
 	}
 #endif
 
 #ifdef CONFIG_IP_VS_PROTO_UDP
 	if (u->udp_timeout) {
-		ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL]
+		pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
+		pd->timeout_table[IP_VS_UDP_S_NORMAL]
 			= u->udp_timeout * HZ;
 	}
 #endif
@@ -2087,6 +2153,7 @@ static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
 static int
 do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
 {
+	struct net *net = sock_net(sk);
 	int ret;
 	unsigned char arg[MAX_ARG_LEN];
 	struct ip_vs_service_user *usvc_compat;
@@ -2121,19 +2188,20 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
 
 	if (cmd == IP_VS_SO_SET_FLUSH) {
 		/* Flush the virtual service */
-		ret = ip_vs_flush();
+		ret = ip_vs_flush(net);
 		goto out_unlock;
 	} else if (cmd == IP_VS_SO_SET_TIMEOUT) {
 		/* Set timeout values for (tcp tcpfin udp) */
-		ret = ip_vs_set_timeout((struct ip_vs_timeout_user *)arg);
+		ret = ip_vs_set_timeout(net, (struct ip_vs_timeout_user *)arg);
 		goto out_unlock;
 	} else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
 		struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
-		ret = start_sync_thread(dm->state, dm->mcast_ifn, dm->syncid);
+		ret = start_sync_thread(net, dm->state, dm->mcast_ifn,
+					dm->syncid);
 		goto out_unlock;
 	} else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
 		struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
-		ret = stop_sync_thread(dm->state);
+		ret = stop_sync_thread(net, dm->state);
 		goto out_unlock;
 	}
 
@@ -2148,7 +2216,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
 	if (cmd == IP_VS_SO_SET_ZERO) {
 		/* if no service address is set, zero counters in all */
 		if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
-			ret = ip_vs_zero_all();
+			ret = ip_vs_zero_all(net);
 			goto out_unlock;
 		}
 	}
@@ -2165,10 +2233,10 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
 
 	/* Lookup the exact service by <protocol, addr, port> or fwmark */
 	if (usvc.fwmark == 0)
-		svc = __ip_vs_service_find(usvc.af, usvc.protocol,
+		svc = __ip_vs_service_find(net, usvc.af, usvc.protocol,
 					   &usvc.addr, usvc.port);
 	else
-		svc = __ip_vs_svc_fwm_find(usvc.af, usvc.fwmark);
+		svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark);
 
 	if (cmd != IP_VS_SO_SET_ADD
 	    && (svc == NULL || svc->protocol != usvc.protocol)) {
@@ -2181,7 +2249,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
 		if (svc != NULL)
 			ret = -EEXIST;
 		else
-			ret = ip_vs_add_service(&usvc, &svc);
+			ret = ip_vs_add_service(net, &usvc, &svc);
 		break;
 	case IP_VS_SO_SET_EDIT:
 		ret = ip_vs_edit_service(svc, &usvc);
@@ -2241,7 +2309,8 @@ ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
 }
 
 static inline int
-__ip_vs_get_service_entries(const struct ip_vs_get_services *get,
+__ip_vs_get_service_entries(struct net *net,
+			    const struct ip_vs_get_services *get,
 			    struct ip_vs_get_services __user *uptr)
 {
 	int idx, count=0;
@@ -2252,7 +2321,7 @@ __ip_vs_get_service_entries(const struct ip_vs_get_services *get,
 	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
 		list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
 			/* Only expose IPv4 entries to old interface */
-			if (svc->af != AF_INET)
+			if (svc->af != AF_INET || !net_eq(svc->net, net))
 				continue;
 
 			if (count >= get->num_services)
@@ -2271,7 +2340,7 @@ __ip_vs_get_service_entries(const struct ip_vs_get_services *get,
 	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
 		list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
 			/* Only expose IPv4 entries to old interface */
-			if (svc->af != AF_INET)
+			if (svc->af != AF_INET || !net_eq(svc->net, net))
 				continue;
 
 			if (count >= get->num_services)
@@ -2291,7 +2360,7 @@ __ip_vs_get_service_entries(const struct ip_vs_get_services *get,
 }
 
 static inline int
-__ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
+__ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
 			 struct ip_vs_get_dests __user *uptr)
 {
 	struct ip_vs_service *svc;
@@ -2299,9 +2368,9 @@ __ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
 	int ret = 0;
 
 	if (get->fwmark)
-		svc = __ip_vs_svc_fwm_find(AF_INET, get->fwmark);
+		svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark);
 	else
-		svc = __ip_vs_service_find(AF_INET, get->protocol, &addr,
+		svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr,
 					   get->port);
 
 	if (svc) {
@@ -2336,17 +2405,21 @@ __ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
 }
 
 static inline void
-__ip_vs_get_timeouts(struct ip_vs_timeout_user *u)
+__ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u)
 {
+#if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
+	struct ip_vs_proto_data *pd;
+#endif
+
 #ifdef CONFIG_IP_VS_PROTO_TCP
-	u->tcp_timeout =
-		ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
-	u->tcp_fin_timeout =
-		ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
+	pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
+	u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
+	u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
 #endif
 #ifdef CONFIG_IP_VS_PROTO_UDP
+	pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
 	u->udp_timeout =
-		ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
+			pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
 #endif
 }
 
@@ -2375,7 +2448,10 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
 	unsigned char arg[128];
 	int ret = 0;
 	unsigned int copylen;
+	struct net *net = sock_net(sk);
+	struct netns_ipvs *ipvs = net_ipvs(net);
 
+	BUG_ON(!net);
 	if (!capable(CAP_NET_ADMIN))
 		return -EPERM;
 
@@ -2418,7 +2494,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
 		struct ip_vs_getinfo info;
 		info.version = IP_VS_VERSION_CODE;
 		info.size = ip_vs_conn_tab_size;
-		info.num_services = ip_vs_num_services;
+		info.num_services = ipvs->num_services;
 		if (copy_to_user(user, &info, sizeof(info)) != 0)
 			ret = -EFAULT;
 	}
@@ -2437,7 +2513,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
 			ret = -EINVAL;
 			goto out;
 		}
-		ret = __ip_vs_get_service_entries(get, user);
+		ret = __ip_vs_get_service_entries(net, get, user);
 	}
 	break;
 
@@ -2450,10 +2526,11 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
 		entry = (struct ip_vs_service_entry *)arg;
 		addr.ip = entry->addr;
 		if (entry->fwmark)
-			svc = __ip_vs_svc_fwm_find(AF_INET, entry->fwmark);
+			svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark);
 		else
-			svc = __ip_vs_service_find(AF_INET, entry->protocol,
-						   &addr, entry->port);
+			svc = __ip_vs_service_find(net, AF_INET,
+						   entry->protocol, &addr,
+						   entry->port);
 		if (svc) {
 			ip_vs_copy_service(entry, svc);
 			if (copy_to_user(user, entry, sizeof(*entry)) != 0)
@@ -2476,7 +2553,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
 			ret = -EINVAL;
 			goto out;
 		}
-		ret = __ip_vs_get_dest_entries(get, user);
+		ret = __ip_vs_get_dest_entries(net, get, user);
 	}
 	break;
 
@@ -2484,7 +2561,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
 	{
 		struct ip_vs_timeout_user t;
 
-		__ip_vs_get_timeouts(&t);
+		__ip_vs_get_timeouts(net, &t);
 		if (copy_to_user(user, &t, sizeof(t)) != 0)
 			ret = -EFAULT;
 	}
@@ -2495,15 +2572,17 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
 		struct ip_vs_daemon_user d[2];
 
 		memset(&d, 0, sizeof(d));
-		if (ip_vs_sync_state & IP_VS_STATE_MASTER) {
+		if (ipvs->sync_state & IP_VS_STATE_MASTER) {
 			d[0].state = IP_VS_STATE_MASTER;
-			strlcpy(d[0].mcast_ifn, ip_vs_master_mcast_ifn, sizeof(d[0].mcast_ifn));
-			d[0].syncid = ip_vs_master_syncid;
+			strlcpy(d[0].mcast_ifn, ipvs->master_mcast_ifn,
+				sizeof(d[0].mcast_ifn));
+			d[0].syncid = ipvs->master_syncid;
 		}
-		if (ip_vs_sync_state & IP_VS_STATE_BACKUP) {
+		if (ipvs->sync_state & IP_VS_STATE_BACKUP) {
 			d[1].state = IP_VS_STATE_BACKUP;
-			strlcpy(d[1].mcast_ifn, ip_vs_backup_mcast_ifn, sizeof(d[1].mcast_ifn));
-			d[1].syncid = ip_vs_backup_syncid;
+			strlcpy(d[1].mcast_ifn, ipvs->backup_mcast_ifn,
+				sizeof(d[1].mcast_ifn));
+			d[1].syncid = ipvs->backup_syncid;
 		}
 		if (copy_to_user(user, &d, sizeof(d)) != 0)
 			ret = -EFAULT;
@@ -2542,6 +2621,7 @@ static struct genl_family ip_vs_genl_family = {
 	.name		= IPVS_GENL_NAME,
 	.version	= IPVS_GENL_VERSION,
 	.maxattr	= IPVS_CMD_MAX,
+	.netnsok        = true,         /* Make ipvsadm to work on netns */
 };
 
 /* Policy used for first-level command attributes */
@@ -2696,11 +2776,12 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb,
 	int idx = 0, i;
 	int start = cb->args[0];
 	struct ip_vs_service *svc;
+	struct net *net = skb_sknet(skb);
 
 	mutex_lock(&__ip_vs_mutex);
 	for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
 		list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
-			if (++idx <= start)
+			if (++idx <= start || !net_eq(svc->net, net))
 				continue;
 			if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
 				idx--;
@@ -2711,7 +2792,7 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb,
 
 	for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
 		list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
-			if (++idx <= start)
+			if (++idx <= start || !net_eq(svc->net, net))
 				continue;
 			if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
 				idx--;
@@ -2727,7 +2808,8 @@ nla_put_failure:
 	return skb->len;
 }
 
-static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
+static int ip_vs_genl_parse_service(struct net *net,
+				    struct ip_vs_service_user_kern *usvc,
 				    struct nlattr *nla, int full_entry,
 				    struct ip_vs_service **ret_svc)
 {
@@ -2770,9 +2852,9 @@ static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
 	}
 
 	if (usvc->fwmark)
-		svc = __ip_vs_svc_fwm_find(usvc->af, usvc->fwmark);
+		svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark);
 	else
-		svc = __ip_vs_service_find(usvc->af, usvc->protocol,
+		svc = __ip_vs_service_find(net, usvc->af, usvc->protocol,
 					   &usvc->addr, usvc->port);
 	*ret_svc = svc;
 
@@ -2809,13 +2891,14 @@ static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
 	return 0;
 }
 
-static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla)
+static struct ip_vs_service *ip_vs_genl_find_service(struct net *net,
+						     struct nlattr *nla)
 {
 	struct ip_vs_service_user_kern usvc;
 	struct ip_vs_service *svc;
 	int ret;
 
-	ret = ip_vs_genl_parse_service(&usvc, nla, 0, &svc);
+	ret = ip_vs_genl_parse_service(net, &usvc, nla, 0, &svc);
 	return ret ? ERR_PTR(ret) : svc;
 }
 
@@ -2883,6 +2966,7 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb,
 	struct ip_vs_service *svc;
 	struct ip_vs_dest *dest;
 	struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
+	struct net *net = skb_sknet(skb);
 
 	mutex_lock(&__ip_vs_mutex);
 
@@ -2891,7 +2975,8 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb,
 			IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
 		goto out_err;
 
-	svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]);
+
+	svc = ip_vs_genl_find_service(net, attrs[IPVS_CMD_ATTR_SERVICE]);
 	if (IS_ERR(svc) || svc == NULL)
 		goto out_err;
 
@@ -3005,20 +3090,23 @@ nla_put_failure:
 static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
 				   struct netlink_callback *cb)
 {
+	struct net *net = skb_net(skb);
+	struct netns_ipvs *ipvs = net_ipvs(net);
+
 	mutex_lock(&__ip_vs_mutex);
-	if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
+	if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
 		if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
-					   ip_vs_master_mcast_ifn,
-					   ip_vs_master_syncid, cb) < 0)
+					   ipvs->master_mcast_ifn,
+					   ipvs->master_syncid, cb) < 0)
 			goto nla_put_failure;
 
 		cb->args[0] = 1;
 	}
 
-	if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
+	if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
 		if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
-					   ip_vs_backup_mcast_ifn,
-					   ip_vs_backup_syncid, cb) < 0)
+					   ipvs->backup_mcast_ifn,
+					   ipvs->backup_syncid, cb) < 0)
 			goto nla_put_failure;
 
 		cb->args[1] = 1;
@@ -3030,31 +3118,33 @@ nla_put_failure:
 	return skb->len;
 }
 
-static int ip_vs_genl_new_daemon(struct nlattr **attrs)
+static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs)
 {
 	if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
 	      attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
 	      attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
 		return -EINVAL;
 
-	return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
+	return start_sync_thread(net,
+				 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
 				 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
 				 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
 }
 
-static int ip_vs_genl_del_daemon(struct nlattr **attrs)
+static int ip_vs_genl_del_daemon(struct net *net, struct nlattr **attrs)
 {
 	if (!attrs[IPVS_DAEMON_ATTR_STATE])
 		return -EINVAL;
 
-	return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
+	return stop_sync_thread(net,
+				nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
 }
 
-static int ip_vs_genl_set_config(struct nlattr **attrs)
+static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs)
 {
 	struct ip_vs_timeout_user t;
 
-	__ip_vs_get_timeouts(&t);
+	__ip_vs_get_timeouts(net, &t);
 
 	if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
 		t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
@@ -3066,7 +3156,7 @@ static int ip_vs_genl_set_config(struct nlattr **attrs)
 	if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
 		t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
 
-	return ip_vs_set_timeout(&t);
+	return ip_vs_set_timeout(net, &t);
 }
 
 static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
@@ -3076,16 +3166,20 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
 	struct ip_vs_dest_user_kern udest;
 	int ret = 0, cmd;
 	int need_full_svc = 0, need_full_dest = 0;
+	struct net *net;
+	struct netns_ipvs *ipvs;
 
+	net = skb_sknet(skb);
+	ipvs = net_ipvs(net);
 	cmd = info->genlhdr->cmd;
 
 	mutex_lock(&__ip_vs_mutex);
 
 	if (cmd == IPVS_CMD_FLUSH) {
-		ret = ip_vs_flush();
+		ret = ip_vs_flush(net);
 		goto out;
 	} else if (cmd == IPVS_CMD_SET_CONFIG) {
-		ret = ip_vs_genl_set_config(info->attrs);
+		ret = ip_vs_genl_set_config(net, info->attrs);
 		goto out;
 	} else if (cmd == IPVS_CMD_NEW_DAEMON ||
 		   cmd == IPVS_CMD_DEL_DAEMON) {
@@ -3101,13 +3195,13 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
 		}
 
 		if (cmd == IPVS_CMD_NEW_DAEMON)
-			ret = ip_vs_genl_new_daemon(daemon_attrs);
+			ret = ip_vs_genl_new_daemon(net, daemon_attrs);
 		else
-			ret = ip_vs_genl_del_daemon(daemon_attrs);
+			ret = ip_vs_genl_del_daemon(net, daemon_attrs);
 		goto out;
 	} else if (cmd == IPVS_CMD_ZERO &&
 		   !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
-		ret = ip_vs_zero_all();
+		ret = ip_vs_zero_all(net);
 		goto out;
 	}
 
@@ -3117,7 +3211,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
 	if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
 		need_full_svc = 1;
 
-	ret = ip_vs_genl_parse_service(&usvc,
+	ret = ip_vs_genl_parse_service(net, &usvc,
 				       info->attrs[IPVS_CMD_ATTR_SERVICE],
 				       need_full_svc, &svc);
 	if (ret)
@@ -3147,7 +3241,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
 	switch (cmd) {
 	case IPVS_CMD_NEW_SERVICE:
 		if (svc == NULL)
-			ret = ip_vs_add_service(&usvc, &svc);
+			ret = ip_vs_add_service(net, &usvc, &svc);
 		else
 			ret = -EEXIST;
 		break;
@@ -3185,7 +3279,11 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
 	struct sk_buff *msg;
 	void *reply;
 	int ret, cmd, reply_cmd;
+	struct net *net;
+	struct netns_ipvs *ipvs;
 
+	net = skb_sknet(skb);
+	ipvs = net_ipvs(net);
 	cmd = info->genlhdr->cmd;
 
 	if (cmd == IPVS_CMD_GET_SERVICE)
@@ -3214,7 +3312,8 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
 	{
 		struct ip_vs_service *svc;
 
-		svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]);
+		svc = ip_vs_genl_find_service(net,
+					      info->attrs[IPVS_CMD_ATTR_SERVICE]);
 		if (IS_ERR(svc)) {
 			ret = PTR_ERR(svc);
 			goto out_err;
@@ -3234,7 +3333,7 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
 	{
 		struct ip_vs_timeout_user t;
 
-		__ip_vs_get_timeouts(&t);
+		__ip_vs_get_timeouts(net, &t);
 #ifdef CONFIG_IP_VS_PROTO_TCP
 		NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
 		NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
@@ -3380,62 +3479,173 @@ static void ip_vs_genl_unregister(void)
 
 /* End of Generic Netlink interface definitions */
 
+/*
+ * per netns intit/exit func.
+ */
+int __net_init __ip_vs_control_init(struct net *net)
+{
+	int idx;
+	struct netns_ipvs *ipvs = net_ipvs(net);
+	struct ctl_table *tbl;
+
+	atomic_set(&ipvs->dropentry, 0);
+	spin_lock_init(&ipvs->dropentry_lock);
+	spin_lock_init(&ipvs->droppacket_lock);
+	spin_lock_init(&ipvs->securetcp_lock);
+	ipvs->rs_lock = __RW_LOCK_UNLOCKED(ipvs->rs_lock);
+
+	/* Initialize rs_table */
+	for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
+		INIT_LIST_HEAD(&ipvs->rs_table[idx]);
+
+	INIT_LIST_HEAD(&ipvs->dest_trash);
+	atomic_set(&ipvs->ftpsvc_counter, 0);
+	atomic_set(&ipvs->nullsvc_counter, 0);
+
+	/* procfs stats */
+	ipvs->tot_stats = kzalloc(sizeof(struct ip_vs_stats), GFP_KERNEL);
+	if (ipvs->tot_stats == NULL) {
+		pr_err("%s(): no memory.\n", __func__);
+		return -ENOMEM;
+	}
+	ipvs->cpustats = alloc_percpu(struct ip_vs_cpu_stats);
+	if (!ipvs->cpustats) {
+		pr_err("%s() alloc_percpu failed\n", __func__);
+		goto err_alloc;
+	}
+	spin_lock_init(&ipvs->tot_stats->lock);
+
+	proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops);
+	proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops);
+	proc_net_fops_create(net, "ip_vs_stats_percpu", 0,
+			     &ip_vs_stats_percpu_fops);
+
+	if (!net_eq(net, &init_net)) {
+		tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL);
+		if (tbl == NULL)
+			goto err_dup;
+	} else
+		tbl = vs_vars;
+	/* Initialize sysctl defaults */
+	idx = 0;
+	ipvs->sysctl_amemthresh = 1024;
+	tbl[idx++].data = &ipvs->sysctl_amemthresh;
+	ipvs->sysctl_am_droprate = 10;
+	tbl[idx++].data = &ipvs->sysctl_am_droprate;
+	tbl[idx++].data = &ipvs->sysctl_drop_entry;
+	tbl[idx++].data = &ipvs->sysctl_drop_packet;
+#ifdef CONFIG_IP_VS_NFCT
+	tbl[idx++].data = &ipvs->sysctl_conntrack;
+#endif
+	tbl[idx++].data = &ipvs->sysctl_secure_tcp;
+	ipvs->sysctl_snat_reroute = 1;
+	tbl[idx++].data = &ipvs->sysctl_snat_reroute;
+	ipvs->sysctl_sync_ver = 1;
+	tbl[idx++].data = &ipvs->sysctl_sync_ver;
+	tbl[idx++].data = &ipvs->sysctl_cache_bypass;
+	tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn;
+	tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template;
+	ipvs->sysctl_sync_threshold[0] = 3;
+	ipvs->sysctl_sync_threshold[1] = 50;
+	tbl[idx].data = &ipvs->sysctl_sync_threshold;
+	tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
+	tbl[idx++].data = &ipvs->sysctl_nat_icmp_send;
+
+
+#ifdef CONFIG_SYSCTL
+	ipvs->sysctl_hdr = register_net_sysctl_table(net, net_vs_ctl_path,
+						     tbl);
+	if (ipvs->sysctl_hdr == NULL) {
+		if (!net_eq(net, &init_net))
+			kfree(tbl);
+		goto err_dup;
+	}
+#endif
+	ip_vs_new_estimator(net, ipvs->tot_stats);
+	ipvs->sysctl_tbl = tbl;
+	/* Schedule defense work */
+	INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler);
+	schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
+	return 0;
+
+err_dup:
+	free_percpu(ipvs->cpustats);
+err_alloc:
+	kfree(ipvs->tot_stats);
+	return -ENOMEM;
+}
+
+static void __net_exit __ip_vs_control_cleanup(struct net *net)
+{
+	struct netns_ipvs *ipvs = net_ipvs(net);
+
+	ip_vs_trash_cleanup(net);
+	ip_vs_kill_estimator(net, ipvs->tot_stats);
+	cancel_delayed_work_sync(&ipvs->defense_work);
+	cancel_work_sync(&ipvs->defense_work.work);
+#ifdef CONFIG_SYSCTL
+	unregister_net_sysctl_table(ipvs->sysctl_hdr);
+#endif
+	proc_net_remove(net, "ip_vs_stats_percpu");
+	proc_net_remove(net, "ip_vs_stats");
+	proc_net_remove(net, "ip_vs");
+	free_percpu(ipvs->cpustats);
+	kfree(ipvs->tot_stats);
+}
+
+static struct pernet_operations ipvs_control_ops = {
+	.init = __ip_vs_control_init,
+	.exit = __ip_vs_control_cleanup,
+};
 
 int __init ip_vs_control_init(void)
 {
-	int ret;
 	int idx;
+	int ret;
 
 	EnterFunction(2);
 
-	/* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */
+	/* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */
 	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++)  {
 		INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
 		INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
 	}
-	for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++)  {
-		INIT_LIST_HEAD(&ip_vs_rtable[idx]);
+
+	ret = register_pernet_subsys(&ipvs_control_ops);
+	if (ret) {
+		pr_err("cannot register namespace.\n");
+		goto err;
 	}
-	smp_wmb();
+
+	smp_wmb();	/* Do we really need it now ? */
 
 	ret = nf_register_sockopt(&ip_vs_sockopts);
 	if (ret) {
 		pr_err("cannot register sockopt.\n");
-		return ret;
+		goto err_net;
 	}
 
 	ret = ip_vs_genl_register();
 	if (ret) {
 		pr_err("cannot register Generic Netlink interface.\n");
 		nf_unregister_sockopt(&ip_vs_sockopts);
-		return ret;
+		goto err_net;
 	}
 
-	proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops);
-	proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops);
-
-	sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars);
-
-	ip_vs_new_estimator(&ip_vs_stats);
-
-	/* Hook the defense timer */
-	schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
-
 	LeaveFunction(2);
 	return 0;
+
+err_net:
+	unregister_pernet_subsys(&ipvs_control_ops);
+err:
+	return ret;
 }
 
 
 void ip_vs_control_cleanup(void)
 {
 	EnterFunction(2);
-	ip_vs_trash_cleanup();
-	cancel_delayed_work_sync(&defense_work);
-	cancel_work_sync(&defense_work.work);
-	ip_vs_kill_estimator(&ip_vs_stats);
-	unregister_sysctl_table(sysctl_header);
-	proc_net_remove(&init_net, "ip_vs_stats");
-	proc_net_remove(&init_net, "ip_vs");
+	unregister_pernet_subsys(&ipvs_control_ops);
 	ip_vs_genl_unregister();
 	nf_unregister_sockopt(&ip_vs_sockopts);
 	LeaveFunction(2);
diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c
index ff28801..f560a05 100644
--- a/net/netfilter/ipvs/ip_vs_est.c
+++ b/net/netfilter/ipvs/ip_vs_est.c
@@ -8,8 +8,12 @@
  *              as published by the Free Software Foundation; either version
  *              2 of the License, or (at your option) any later version.
  *
- * Changes:
- *
+ * Changes:     Hans Schillstrom <hans.schillstrom@ericsson.com>
+ *              Network name space (netns) aware.
+ *              Global data moved to netns i.e struct netns_ipvs
+ *              Affected data: est_list and est_lock.
+ *              estimation_timer() runs with timer per netns.
+ *              get_stats()) do the per cpu summing.
  */
 
 #define KMSG_COMPONENT "IPVS"
@@ -48,11 +52,42 @@
  */
 
 
-static void estimation_timer(unsigned long arg);
+/*
+ * Make a summary from each cpu
+ */
+static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum,
+				 struct ip_vs_cpu_stats *stats)
+{
+	int i;
+
+	for_each_possible_cpu(i) {
+		struct ip_vs_cpu_stats *s = per_cpu_ptr(stats, i);
+		unsigned int start;
+		__u64 inbytes, outbytes;
+		if (i) {
+			sum->conns += s->ustats.conns;
+			sum->inpkts += s->ustats.inpkts;
+			sum->outpkts += s->ustats.outpkts;
+			do {
+				start = u64_stats_fetch_begin_bh(&s->syncp);
+				inbytes = s->ustats.inbytes;
+				outbytes = s->ustats.outbytes;
+			} while (u64_stats_fetch_retry_bh(&s->syncp, start));
+			sum->inbytes += inbytes;
+			sum->outbytes += outbytes;
+		} else {
+			sum->conns = s->ustats.conns;
+			sum->inpkts = s->ustats.inpkts;
+			sum->outpkts = s->ustats.outpkts;
+			do {
+				start = u64_stats_fetch_begin_bh(&s->syncp);
+				sum->inbytes = s->ustats.inbytes;
+				sum->outbytes = s->ustats.outbytes;
+			} while (u64_stats_fetch_retry_bh(&s->syncp, start));
+		}
+	}
+}
 
-static LIST_HEAD(est_list);
-static DEFINE_SPINLOCK(est_lock);
-static DEFINE_TIMER(est_timer, estimation_timer, 0, 0);
 
 static void estimation_timer(unsigned long arg)
 {
@@ -62,11 +97,16 @@ static void estimation_timer(unsigned long arg)
 	u32 n_inpkts, n_outpkts;
 	u64 n_inbytes, n_outbytes;
 	u32 rate;
+	struct net *net = (struct net *)arg;
+	struct netns_ipvs *ipvs;
 
-	spin_lock(&est_lock);
-	list_for_each_entry(e, &est_list, list) {
+	ipvs = net_ipvs(net);
+	ip_vs_read_cpu_stats(&ipvs->tot_stats->ustats, ipvs->cpustats);
+	spin_lock(&ipvs->est_lock);
+	list_for_each_entry(e, &ipvs->est_list, list) {
 		s = container_of(e, struct ip_vs_stats, est);
 
+		ip_vs_read_cpu_stats(&s->ustats, s->cpustats);
 		spin_lock(&s->lock);
 		n_conns = s->ustats.conns;
 		n_inpkts = s->ustats.inpkts;
@@ -75,38 +115,39 @@ static void estimation_timer(unsigned long arg)
 		n_outbytes = s->ustats.outbytes;
 
 		/* scaled by 2^10, but divided 2 seconds */
-		rate = (n_conns - e->last_conns)<<9;
+		rate = (n_conns - e->last_conns) << 9;
 		e->last_conns = n_conns;
-		e->cps += ((long)rate - (long)e->cps)>>2;
-		s->ustats.cps = (e->cps+0x1FF)>>10;
+		e->cps += ((long)rate - (long)e->cps) >> 2;
+		s->ustats.cps = (e->cps + 0x1FF) >> 10;
 
-		rate = (n_inpkts - e->last_inpkts)<<9;
+		rate = (n_inpkts - e->last_inpkts) << 9;
 		e->last_inpkts = n_inpkts;
-		e->inpps += ((long)rate - (long)e->inpps)>>2;
-		s->ustats.inpps = (e->inpps+0x1FF)>>10;
+		e->inpps += ((long)rate - (long)e->inpps) >> 2;
+		s->ustats.inpps = (e->inpps + 0x1FF) >> 10;
 
-		rate = (n_outpkts - e->last_outpkts)<<9;
+		rate = (n_outpkts - e->last_outpkts) << 9;
 		e->last_outpkts = n_outpkts;
-		e->outpps += ((long)rate - (long)e->outpps)>>2;
-		s->ustats.outpps = (e->outpps+0x1FF)>>10;
+		e->outpps += ((long)rate - (long)e->outpps) >> 2;
+		s->ustats.outpps = (e->outpps + 0x1FF) >> 10;
 
-		rate = (n_inbytes - e->last_inbytes)<<4;
+		rate = (n_inbytes - e->last_inbytes) << 4;
 		e->last_inbytes = n_inbytes;
-		e->inbps += ((long)rate - (long)e->inbps)>>2;
-		s->ustats.inbps = (e->inbps+0xF)>>5;
+		e->inbps += ((long)rate - (long)e->inbps) >> 2;
+		s->ustats.inbps = (e->inbps + 0xF) >> 5;
 
-		rate = (n_outbytes - e->last_outbytes)<<4;
+		rate = (n_outbytes - e->last_outbytes) << 4;
 		e->last_outbytes = n_outbytes;
-		e->outbps += ((long)rate - (long)e->outbps)>>2;
-		s->ustats.outbps = (e->outbps+0xF)>>5;
+		e->outbps += ((long)rate - (long)e->outbps) >> 2;
+		s->ustats.outbps = (e->outbps + 0xF) >> 5;
 		spin_unlock(&s->lock);
 	}
-	spin_unlock(&est_lock);
-	mod_timer(&est_timer, jiffies + 2*HZ);
+	spin_unlock(&ipvs->est_lock);
+	mod_timer(&ipvs->est_timer, jiffies + 2*HZ);
 }
 
-void ip_vs_new_estimator(struct ip_vs_stats *stats)
+void ip_vs_new_estimator(struct net *net, struct ip_vs_stats *stats)
 {
+	struct netns_ipvs *ipvs = net_ipvs(net);
 	struct ip_vs_estimator *est = &stats->est;
 
 	INIT_LIST_HEAD(&est->list);
@@ -126,18 +167,19 @@ void ip_vs_new_estimator(struct ip_vs_stats *stats)
 	est->last_outbytes = stats->ustats.outbytes;
 	est->outbps = stats->ustats.outbps<<5;
 
-	spin_lock_bh(&est_lock);
-	list_add(&est->list, &est_list);
-	spin_unlock_bh(&est_lock);
+	spin_lock_bh(&ipvs->est_lock);
+	list_add(&est->list, &ipvs->est_list);
+	spin_unlock_bh(&ipvs->est_lock);
 }
 
-void ip_vs_kill_estimator(struct ip_vs_stats *stats)
+void ip_vs_kill_estimator(struct net *net, struct ip_vs_stats *stats)
 {
+	struct netns_ipvs *ipvs = net_ipvs(net);
 	struct ip_vs_estimator *est = &stats->est;
 
-	spin_lock_bh(&est_lock);
+	spin_lock_bh(&ipvs->est_lock);
 	list_del(&est->list);
-	spin_unlock_bh(&est_lock);
+	spin_unlock_bh(&ipvs->est_lock);
 }
 
 void ip_vs_zero_estimator(struct ip_vs_stats *stats)
@@ -157,13 +199,35 @@ void ip_vs_zero_estimator(struct ip_vs_stats *stats)
 	est->outbps = 0;
 }
 
-int __init ip_vs_estimator_init(void)
+static int __net_init __ip_vs_estimator_init(struct net *net)
 {
-	mod_timer(&est_timer, jiffies + 2 * HZ);
+	struct netns_ipvs *ipvs = net_ipvs(net);
+
+	INIT_LIST_HEAD(&ipvs->est_list);
+	spin_lock_init(&ipvs->est_lock);
+	setup_timer(&ipvs->est_timer, estimation_timer, (unsigned long)net);
+	mod_timer(&ipvs->est_timer, jiffies + 2 * HZ);
 	return 0;
 }
 
+static void __net_exit __ip_vs_estimator_exit(struct net *net)
+{
+	del_timer_sync(&net_ipvs(net)->est_timer);
+}
+static struct pernet_operations ip_vs_app_ops = {
+	.init = __ip_vs_estimator_init,
+	.exit = __ip_vs_estimator_exit,
+};
+
+int __init ip_vs_estimator_init(void)
+{
+	int rv;
+
+	rv = register_pernet_subsys(&ip_vs_app_ops);
+	return rv;
+}
+
 void ip_vs_estimator_cleanup(void)
 {
-	del_timer_sync(&est_timer);
+	unregister_pernet_subsys(&ip_vs_app_ops);
 }
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index 7545500..6b5dd6d 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -157,6 +157,7 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
 	int ret = 0;
 	enum ip_conntrack_info ctinfo;
 	struct nf_conn *ct;
+	struct net *net;
 
 #ifdef CONFIG_IP_VS_IPV6
 	/* This application helper doesn't work with IPv6 yet,
@@ -197,18 +198,20 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
 		 */
 		{
 			struct ip_vs_conn_param p;
-			ip_vs_conn_fill_param(AF_INET, iph->protocol,
-					      &from, port, &cp->caddr, 0, &p);
+			ip_vs_conn_fill_param(ip_vs_conn_net(cp), AF_INET,
+					      iph->protocol, &from, port,
+					      &cp->caddr, 0, &p);
 			n_cp = ip_vs_conn_out_get(&p);
 		}
 		if (!n_cp) {
 			struct ip_vs_conn_param p;
-			ip_vs_conn_fill_param(AF_INET, IPPROTO_TCP, &cp->caddr,
+			ip_vs_conn_fill_param(ip_vs_conn_net(cp),
+					      AF_INET, IPPROTO_TCP, &cp->caddr,
 					      0, &cp->vaddr, port, &p);
 			n_cp = ip_vs_conn_new(&p, &from, port,
 					      IP_VS_CONN_F_NO_CPORT |
 					      IP_VS_CONN_F_NFCT,
-					      cp->dest);
+					      cp->dest, skb->mark);
 			if (!n_cp)
 				return 0;
 
@@ -257,8 +260,9 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
 		 * would be adjusted twice.
 		 */
 
+		net = skb_net(skb);
 		cp->app_data = NULL;
-		ip_vs_tcp_conn_listen(n_cp);
+		ip_vs_tcp_conn_listen(net, n_cp);
 		ip_vs_conn_put(n_cp);
 		return ret;
 	}
@@ -287,6 +291,7 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
 	union nf_inet_addr to;
 	__be16 port;
 	struct ip_vs_conn *n_cp;
+	struct net *net;
 
 #ifdef CONFIG_IP_VS_IPV6
 	/* This application helper doesn't work with IPv6 yet,
@@ -358,14 +363,15 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
 
 	{
 		struct ip_vs_conn_param p;
-		ip_vs_conn_fill_param(AF_INET, iph->protocol, &to, port,
-				      &cp->vaddr, htons(ntohs(cp->vport)-1),
-				      &p);
+		ip_vs_conn_fill_param(ip_vs_conn_net(cp), AF_INET,
+				      iph->protocol, &to, port, &cp->vaddr,
+				      htons(ntohs(cp->vport)-1), &p);
 		n_cp = ip_vs_conn_in_get(&p);
 		if (!n_cp) {
 			n_cp = ip_vs_conn_new(&p, &cp->daddr,
 					      htons(ntohs(cp->dport)-1),
-					      IP_VS_CONN_F_NFCT, cp->dest);
+					      IP_VS_CONN_F_NFCT, cp->dest,
+					      skb->mark);
 			if (!n_cp)
 				return 0;
 
@@ -377,7 +383,8 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
 	/*
 	 *	Move tunnel to listen state
 	 */
-	ip_vs_tcp_conn_listen(n_cp);
+	net = skb_net(skb);
+	ip_vs_tcp_conn_listen(net, n_cp);
 	ip_vs_conn_put(n_cp);
 
 	return 1;
@@ -398,23 +405,22 @@ static struct ip_vs_app ip_vs_ftp = {
 	.pkt_in =	ip_vs_ftp_in,
 };
 
-
 /*
- *	ip_vs_ftp initialization
+ *	per netns ip_vs_ftp initialization
  */
-static int __init ip_vs_ftp_init(void)
+static int __net_init __ip_vs_ftp_init(struct net *net)
 {
 	int i, ret;
 	struct ip_vs_app *app = &ip_vs_ftp;
 
-	ret = register_ip_vs_app(app);
+	ret = register_ip_vs_app(net, app);
 	if (ret)
 		return ret;
 
 	for (i=0; i<IP_VS_APP_MAX_PORTS; i++) {
 		if (!ports[i])
 			continue;
-		ret = register_ip_vs_app_inc(app, app->protocol, ports[i]);
+		ret = register_ip_vs_app_inc(net, app, app->protocol, ports[i]);
 		if (ret)
 			break;
 		pr_info("%s: loaded support on port[%d] = %d\n",
@@ -422,18 +428,39 @@ static int __init ip_vs_ftp_init(void)
 	}
 
 	if (ret)
-		unregister_ip_vs_app(app);
+		unregister_ip_vs_app(net, app);
 
 	return ret;
 }
+/*
+ *	netns exit
+ */
+static void __ip_vs_ftp_exit(struct net *net)
+{
+	struct ip_vs_app *app = &ip_vs_ftp;
+
+	unregister_ip_vs_app(net, app);
+}
+
+static struct pernet_operations ip_vs_ftp_ops = {
+	.init = __ip_vs_ftp_init,
+	.exit = __ip_vs_ftp_exit,
+};
 
+int __init ip_vs_ftp_init(void)
+{
+	int rv;
+
+	rv = register_pernet_subsys(&ip_vs_ftp_ops);
+	return rv;
+}
 
 /*
  *	ip_vs_ftp finish.
  */
 static void __exit ip_vs_ftp_exit(void)
 {
-	unregister_ip_vs_app(&ip_vs_ftp);
+	unregister_pernet_subsys(&ip_vs_ftp_ops);
 }
 
 
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index 9323f89..6bf7a80 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -70,7 +70,6 @@
  *    entries that haven't been touched for a day.
  */
 #define COUNT_FOR_FULL_EXPIRATION   30
-static int sysctl_ip_vs_lblc_expiration = 24*60*60*HZ;
 
 
 /*
@@ -117,7 +116,7 @@ struct ip_vs_lblc_table {
 static ctl_table vs_vars_table[] = {
 	{
 		.procname	= "lblc_expiration",
-		.data		= &sysctl_ip_vs_lblc_expiration,
+		.data		= NULL,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_jiffies,
@@ -125,8 +124,6 @@ static ctl_table vs_vars_table[] = {
 	{ }
 };
 
-static struct ctl_table_header * sysctl_header;
-
 static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en)
 {
 	list_del(&en->list);
@@ -248,6 +245,7 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc)
 	struct ip_vs_lblc_entry *en, *nxt;
 	unsigned long now = jiffies;
 	int i, j;
+	struct netns_ipvs *ipvs = net_ipvs(svc->net);
 
 	for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) {
 		j = (j + 1) & IP_VS_LBLC_TAB_MASK;
@@ -255,7 +253,8 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc)
 		write_lock(&svc->sched_lock);
 		list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
 			if (time_before(now,
-					en->lastuse + sysctl_ip_vs_lblc_expiration))
+					en->lastuse +
+					ipvs->sysctl_lblc_expiration))
 				continue;
 
 			ip_vs_lblc_free(en);
@@ -390,12 +389,7 @@ __ip_vs_lblc_schedule(struct ip_vs_service *svc)
 	int loh, doh;
 
 	/*
-	 * We think the overhead of processing active connections is fifty
-	 * times higher than that of inactive connections in average. (This
-	 * fifty times might not be accurate, we will change it later.) We
-	 * use the following formula to estimate the overhead:
-	 *                dest->activeconns*50 + dest->inactconns
-	 * and the load:
+	 * We use the following formula to estimate the load:
 	 *                (dest overhead) / dest->weight
 	 *
 	 * Remember -- no floats in kernel mode!!!
@@ -411,8 +405,7 @@ __ip_vs_lblc_schedule(struct ip_vs_service *svc)
 			continue;
 		if (atomic_read(&dest->weight) > 0) {
 			least = dest;
-			loh = atomic_read(&least->activeconns) * 50
-				+ atomic_read(&least->inactconns);
+			loh = ip_vs_dest_conn_overhead(least);
 			goto nextstage;
 		}
 	}
@@ -426,8 +419,7 @@ __ip_vs_lblc_schedule(struct ip_vs_service *svc)
 		if (dest->flags & IP_VS_DEST_F_OVERLOAD)
 			continue;
 
-		doh = atomic_read(&dest->activeconns) * 50
-			+ atomic_read(&dest->inactconns);
+		doh = ip_vs_dest_conn_overhead(dest);
 		if (loh * atomic_read(&dest->weight) >
 		    doh * atomic_read(&least->weight)) {
 			least = dest;
@@ -511,7 +503,7 @@ ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 	/* No cache entry or it is invalid, time to schedule */
 	dest = __ip_vs_lblc_schedule(svc);
 	if (!dest) {
-		IP_VS_ERR_RL("LBLC: no destination available\n");
+		ip_vs_scheduler_err(svc, "no destination available");
 		return NULL;
 	}
 
@@ -543,23 +535,73 @@ static struct ip_vs_scheduler ip_vs_lblc_scheduler =
 	.schedule =		ip_vs_lblc_schedule,
 };
 
+/*
+ *  per netns init.
+ */
+static int __net_init __ip_vs_lblc_init(struct net *net)
+{
+	struct netns_ipvs *ipvs = net_ipvs(net);
+
+	if (!net_eq(net, &init_net)) {
+		ipvs->lblc_ctl_table = kmemdup(vs_vars_table,
+						sizeof(vs_vars_table),
+						GFP_KERNEL);
+		if (ipvs->lblc_ctl_table == NULL)
+			return -ENOMEM;
+	} else
+		ipvs->lblc_ctl_table = vs_vars_table;
+	ipvs->sysctl_lblc_expiration = 24*60*60*HZ;
+	ipvs->lblc_ctl_table[0].data = &ipvs->sysctl_lblc_expiration;
+
+#ifdef CONFIG_SYSCTL
+	ipvs->lblc_ctl_header =
+		register_net_sysctl_table(net, net_vs_ctl_path,
+					  ipvs->lblc_ctl_table);
+	if (!ipvs->lblc_ctl_header) {
+		if (!net_eq(net, &init_net))
+			kfree(ipvs->lblc_ctl_table);
+		return -ENOMEM;
+	}
+#endif
+
+	return 0;
+}
+
+static void __net_exit __ip_vs_lblc_exit(struct net *net)
+{
+	struct netns_ipvs *ipvs = net_ipvs(net);
+
+#ifdef CONFIG_SYSCTL
+	unregister_net_sysctl_table(ipvs->lblc_ctl_header);
+#endif
+
+	if (!net_eq(net, &init_net))
+		kfree(ipvs->lblc_ctl_table);
+}
+
+static struct pernet_operations ip_vs_lblc_ops = {
+	.init = __ip_vs_lblc_init,
+	.exit = __ip_vs_lblc_exit,
+};
 
 static int __init ip_vs_lblc_init(void)
 {
 	int ret;
 
-	sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars_table);
+	ret = register_pernet_subsys(&ip_vs_lblc_ops);
+	if (ret)
+		return ret;
+
 	ret = register_ip_vs_scheduler(&ip_vs_lblc_scheduler);
 	if (ret)
-		unregister_sysctl_table(sysctl_header);
+		unregister_pernet_subsys(&ip_vs_lblc_ops);
 	return ret;
 }
 
-
 static void __exit ip_vs_lblc_cleanup(void)
 {
-	unregister_sysctl_table(sysctl_header);
 	unregister_ip_vs_scheduler(&ip_vs_lblc_scheduler);
+	unregister_pernet_subsys(&ip_vs_lblc_ops);
 }
 
 
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index dbeed8e..0063176 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -70,8 +70,6 @@
  *    entries that haven't been touched for a day.
  */
 #define COUNT_FOR_FULL_EXPIRATION   30
-static int sysctl_ip_vs_lblcr_expiration = 24*60*60*HZ;
-
 
 /*
  *     for IPVS lblcr entry hash table
@@ -180,8 +178,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
 
 		if ((atomic_read(&least->weight) > 0)
 		    && (least->flags & IP_VS_DEST_F_AVAILABLE)) {
-			loh = atomic_read(&least->activeconns) * 50
-				+ atomic_read(&least->inactconns);
+			loh = ip_vs_dest_conn_overhead(least);
 			goto nextstage;
 		}
 	}
@@ -194,8 +191,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
 		if (dest->flags & IP_VS_DEST_F_OVERLOAD)
 			continue;
 
-		doh = atomic_read(&dest->activeconns) * 50
-			+ atomic_read(&dest->inactconns);
+		doh = ip_vs_dest_conn_overhead(dest);
 		if ((loh * atomic_read(&dest->weight) >
 		     doh * atomic_read(&least->weight))
 		    && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
@@ -230,8 +226,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
 	list_for_each_entry(e, &set->list, list) {
 		most = e->dest;
 		if (atomic_read(&most->weight) > 0) {
-			moh = atomic_read(&most->activeconns) * 50
-				+ atomic_read(&most->inactconns);
+			moh = ip_vs_dest_conn_overhead(most);
 			goto nextstage;
 		}
 	}
@@ -241,8 +236,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
   nextstage:
 	list_for_each_entry(e, &set->list, list) {
 		dest = e->dest;
-		doh = atomic_read(&dest->activeconns) * 50
-			+ atomic_read(&dest->inactconns);
+		doh = ip_vs_dest_conn_overhead(dest);
 		/* moh/mw < doh/dw ==> moh*dw < doh*mw, where mw,dw>0 */
 		if ((moh * atomic_read(&dest->weight) <
 		     doh * atomic_read(&most->weight))
@@ -296,7 +290,7 @@ struct ip_vs_lblcr_table {
 static ctl_table vs_vars_table[] = {
 	{
 		.procname	= "lblcr_expiration",
-		.data		= &sysctl_ip_vs_lblcr_expiration,
+		.data		= NULL,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_jiffies,
@@ -304,8 +298,6 @@ static ctl_table vs_vars_table[] = {
 	{ }
 };
 
-static struct ctl_table_header * sysctl_header;
-
 static inline void ip_vs_lblcr_free(struct ip_vs_lblcr_entry *en)
 {
 	list_del(&en->list);
@@ -425,14 +417,15 @@ static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc)
 	unsigned long now = jiffies;
 	int i, j;
 	struct ip_vs_lblcr_entry *en, *nxt;
+	struct netns_ipvs *ipvs = net_ipvs(svc->net);
 
 	for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) {
 		j = (j + 1) & IP_VS_LBLCR_TAB_MASK;
 
 		write_lock(&svc->sched_lock);
 		list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
-			if (time_after(en->lastuse+sysctl_ip_vs_lblcr_expiration,
-				       now))
+			if (time_after(en->lastuse
+					+ ipvs->sysctl_lblcr_expiration, now))
 				continue;
 
 			ip_vs_lblcr_free(en);
@@ -566,12 +559,7 @@ __ip_vs_lblcr_schedule(struct ip_vs_service *svc)
 	int loh, doh;
 
 	/*
-	 * We think the overhead of processing active connections is fifty
-	 * times higher than that of inactive connections in average. (This
-	 * fifty times might not be accurate, we will change it later.) We
-	 * use the following formula to estimate the overhead:
-	 *                dest->activeconns*50 + dest->inactconns
-	 * and the load:
+	 * We use the following formula to estimate the load:
 	 *                (dest overhead) / dest->weight
 	 *
 	 * Remember -- no floats in kernel mode!!!
@@ -588,8 +576,7 @@ __ip_vs_lblcr_schedule(struct ip_vs_service *svc)
 
 		if (atomic_read(&dest->weight) > 0) {
 			least = dest;
-			loh = atomic_read(&least->activeconns) * 50
-				+ atomic_read(&least->inactconns);
+			loh = ip_vs_dest_conn_overhead(least);
 			goto nextstage;
 		}
 	}
@@ -603,8 +590,7 @@ __ip_vs_lblcr_schedule(struct ip_vs_service *svc)
 		if (dest->flags & IP_VS_DEST_F_OVERLOAD)
 			continue;
 
-		doh = atomic_read(&dest->activeconns) * 50
-			+ atomic_read(&dest->inactconns);
+		doh = ip_vs_dest_conn_overhead(dest);
 		if (loh * atomic_read(&dest->weight) >
 		    doh * atomic_read(&least->weight)) {
 			least = dest;
@@ -664,6 +650,7 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 	read_lock(&svc->sched_lock);
 	en = ip_vs_lblcr_get(svc->af, tbl, &iph.daddr);
 	if (en) {
+		struct netns_ipvs *ipvs = net_ipvs(svc->net);
 		/* We only hold a read lock, but this is atomic */
 		en->lastuse = jiffies;
 
@@ -675,7 +662,7 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 		/* More than one destination + enough time passed by, cleanup */
 		if (atomic_read(&en->set.size) > 1 &&
 				time_after(jiffies, en->set.lastmod +
-				sysctl_ip_vs_lblcr_expiration)) {
+				ipvs->sysctl_lblcr_expiration)) {
 			struct ip_vs_dest *m;
 
 			write_lock(&en->set.lock);
@@ -694,7 +681,7 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 		/* The cache entry is invalid, time to schedule */
 		dest = __ip_vs_lblcr_schedule(svc);
 		if (!dest) {
-			IP_VS_ERR_RL("LBLCR: no destination available\n");
+			ip_vs_scheduler_err(svc, "no destination available");
 			read_unlock(&svc->sched_lock);
 			return NULL;
 		}
@@ -744,23 +731,73 @@ static struct ip_vs_scheduler ip_vs_lblcr_scheduler =
 	.schedule =		ip_vs_lblcr_schedule,
 };
 
+/*
+ *  per netns init.
+ */
+static int __net_init __ip_vs_lblcr_init(struct net *net)
+{
+	struct netns_ipvs *ipvs = net_ipvs(net);
+
+	if (!net_eq(net, &init_net)) {
+		ipvs->lblcr_ctl_table = kmemdup(vs_vars_table,
+						sizeof(vs_vars_table),
+						GFP_KERNEL);
+		if (ipvs->lblcr_ctl_table == NULL)
+			return -ENOMEM;
+	} else
+		ipvs->lblcr_ctl_table = vs_vars_table;
+	ipvs->sysctl_lblcr_expiration = 24*60*60*HZ;
+	ipvs->lblcr_ctl_table[0].data = &ipvs->sysctl_lblcr_expiration;
+
+#ifdef CONFIG_SYSCTL
+	ipvs->lblcr_ctl_header =
+		register_net_sysctl_table(net, net_vs_ctl_path,
+					  ipvs->lblcr_ctl_table);
+	if (!ipvs->lblcr_ctl_header) {
+		if (!net_eq(net, &init_net))
+			kfree(ipvs->lblcr_ctl_table);
+		return -ENOMEM;
+	}
+#endif
+
+	return 0;
+}
+
+static void __net_exit __ip_vs_lblcr_exit(struct net *net)
+{
+	struct netns_ipvs *ipvs = net_ipvs(net);
+
+#ifdef CONFIG_SYSCTL
+	unregister_net_sysctl_table(ipvs->lblcr_ctl_header);
+#endif
+
+	if (!net_eq(net, &init_net))
+		kfree(ipvs->lblcr_ctl_table);
+}
+
+static struct pernet_operations ip_vs_lblcr_ops = {
+	.init = __ip_vs_lblcr_init,
+	.exit = __ip_vs_lblcr_exit,
+};
 
 static int __init ip_vs_lblcr_init(void)
 {
 	int ret;
 
-	sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars_table);
+	ret = register_pernet_subsys(&ip_vs_lblcr_ops);
+	if (ret)
+		return ret;
+
 	ret = register_ip_vs_scheduler(&ip_vs_lblcr_scheduler);
 	if (ret)
-		unregister_sysctl_table(sysctl_header);
+		unregister_pernet_subsys(&ip_vs_lblcr_ops);
 	return ret;
 }
 
-
 static void __exit ip_vs_lblcr_cleanup(void)
 {
-	unregister_sysctl_table(sysctl_header);
 	unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler);
+	unregister_pernet_subsys(&ip_vs_lblcr_ops);
 }
 
 
diff --git a/net/netfilter/ipvs/ip_vs_lc.c b/net/netfilter/ipvs/ip_vs_lc.c
index 4f69db1..f391819 100644
--- a/net/netfilter/ipvs/ip_vs_lc.c
+++ b/net/netfilter/ipvs/ip_vs_lc.c
@@ -22,22 +22,6 @@
 
 #include <net/ip_vs.h>
 
-
-static inline unsigned int
-ip_vs_lc_dest_overhead(struct ip_vs_dest *dest)
-{
-	/*
-	 * We think the overhead of processing active connections is 256
-	 * times higher than that of inactive connections in average. (This
-	 * 256 times might not be accurate, we will change it later) We
-	 * use the following formula to estimate the overhead now:
-	 *		  dest->activeconns*256 + dest->inactconns
-	 */
-	return (atomic_read(&dest->activeconns) << 8) +
-		atomic_read(&dest->inactconns);
-}
-
-
 /*
  *	Least Connection scheduling
  */
@@ -62,7 +46,7 @@ ip_vs_lc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 		if ((dest->flags & IP_VS_DEST_F_OVERLOAD) ||
 		    atomic_read(&dest->weight) == 0)
 			continue;
-		doh = ip_vs_lc_dest_overhead(dest);
+		doh = ip_vs_dest_conn_overhead(dest);
 		if (!least || doh < loh) {
 			least = dest;
 			loh = doh;
@@ -70,7 +54,7 @@ ip_vs_lc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 	}
 
 	if (!least)
-		IP_VS_ERR_RL("LC: no destination available\n");
+		ip_vs_scheduler_err(svc, "no destination available");
 	else
 		IP_VS_DBG_BUF(6, "LC: server %s:%u activeconns %d "
 			      "inactconns %d\n",
diff --git a/net/netfilter/ipvs/ip_vs_nfct.c b/net/netfilter/ipvs/ip_vs_nfct.c
index 4680647..f454c80 100644
--- a/net/netfilter/ipvs/ip_vs_nfct.c
+++ b/net/netfilter/ipvs/ip_vs_nfct.c
@@ -141,6 +141,7 @@ static void ip_vs_nfct_expect_callback(struct nf_conn *ct,
 	struct nf_conntrack_tuple *orig, new_reply;
 	struct ip_vs_conn *cp;
 	struct ip_vs_conn_param p;
+	struct net *net = nf_ct_net(ct);
 
 	if (exp->tuple.src.l3num != PF_INET)
 		return;
@@ -155,7 +156,7 @@ static void ip_vs_nfct_expect_callback(struct nf_conn *ct,
 
 	/* RS->CLIENT */
 	orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
-	ip_vs_conn_fill_param(exp->tuple.src.l3num, orig->dst.protonum,
+	ip_vs_conn_fill_param(net, exp->tuple.src.l3num, orig->dst.protonum,
 			      &orig->src.u3, orig->src.u.tcp.port,
 			      &orig->dst.u3, orig->dst.u.tcp.port, &p);
 	cp = ip_vs_conn_out_get(&p);
@@ -268,7 +269,8 @@ void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp)
 		" for conn " FMT_CONN "\n",
 		__func__, ARG_TUPLE(&tuple), ARG_CONN(cp));
 
-	h = nf_conntrack_find_get(&init_net, NF_CT_DEFAULT_ZONE, &tuple);
+	h = nf_conntrack_find_get(ip_vs_conn_net(cp), NF_CT_DEFAULT_ZONE,
+				  &tuple);
 	if (h) {
 		ct = nf_ct_tuplehash_to_ctrack(h);
 		/* Show what happens instead of calling nf_ct_kill() */
diff --git a/net/netfilter/ipvs/ip_vs_nq.c b/net/netfilter/ipvs/ip_vs_nq.c
index c413e18..984d9c1 100644
--- a/net/netfilter/ipvs/ip_vs_nq.c
+++ b/net/netfilter/ipvs/ip_vs_nq.c
@@ -99,7 +99,7 @@ ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 	}
 
 	if (!least) {
-		IP_VS_ERR_RL("NQ: no destination available\n");
+		ip_vs_scheduler_err(svc, "no destination available");
 		return NULL;
 	}
 
diff --git a/net/netfilter/ipvs/ip_vs_pe.c b/net/netfilter/ipvs/ip_vs_pe.c
index 3414af7..5cf859c 100644
--- a/net/netfilter/ipvs/ip_vs_pe.c
+++ b/net/netfilter/ipvs/ip_vs_pe.c
@@ -29,12 +29,11 @@ void ip_vs_unbind_pe(struct ip_vs_service *svc)
 }
 
 /* Get pe in the pe list by name */
-static struct ip_vs_pe *
-ip_vs_pe_getbyname(const char *pe_name)
+struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name)
 {
 	struct ip_vs_pe *pe;
 
-	IP_VS_DBG(2, "%s(): pe_name \"%s\"\n", __func__,
+	IP_VS_DBG(10, "%s(): pe_name \"%s\"\n", __func__,
 		  pe_name);
 
 	spin_lock_bh(&ip_vs_pe_lock);
@@ -60,28 +59,22 @@ ip_vs_pe_getbyname(const char *pe_name)
 }
 
 /* Lookup pe and try to load it if it doesn't exist */
-struct ip_vs_pe *ip_vs_pe_get(const char *name)
+struct ip_vs_pe *ip_vs_pe_getbyname(const char *name)
 {
 	struct ip_vs_pe *pe;
 
 	/* Search for the pe by name */
-	pe = ip_vs_pe_getbyname(name);
+	pe = __ip_vs_pe_getbyname(name);
 
 	/* If pe not found, load the module and search again */
 	if (!pe) {
 		request_module("ip_vs_pe_%s", name);
-		pe = ip_vs_pe_getbyname(name);
+		pe = __ip_vs_pe_getbyname(name);
 	}
 
 	return pe;
 }
 
-void ip_vs_pe_put(struct ip_vs_pe *pe)
-{
-	if (pe && pe->module)
-		module_put(pe->module);
-}
-
 /* Register a pe in the pe list */
 int register_ip_vs_pe(struct ip_vs_pe *pe)
 {
diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c
index b8b4e96..0d83bc0 100644
--- a/net/netfilter/ipvs/ip_vs_pe_sip.c
+++ b/net/netfilter/ipvs/ip_vs_pe_sip.c
@@ -71,6 +71,7 @@ ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb)
 	struct ip_vs_iphdr iph;
 	unsigned int dataoff, datalen, matchoff, matchlen;
 	const char *dptr;
+	int retc;
 
 	ip_vs_fill_iphdr(p->af, skb_network_header(skb), &iph);
 
@@ -83,6 +84,8 @@ ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb)
 	if (dataoff >= skb->len)
 		return -EINVAL;
 
+	if ((retc=skb_linearize(skb)) < 0)
+		return retc;
 	dptr = skb->data + dataoff;
 	datalen = skb->len - dataoff;
 
diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c
index c539983..17484a4 100644
--- a/net/netfilter/ipvs/ip_vs_proto.c
+++ b/net/netfilter/ipvs/ip_vs_proto.c
@@ -60,6 +60,35 @@ static int __used __init register_ip_vs_protocol(struct ip_vs_protocol *pp)
 	return 0;
 }
 
+#if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP) || \
+    defined(CONFIG_IP_VS_PROTO_SCTP) || defined(CONFIG_IP_VS_PROTO_AH) || \
+    defined(CONFIG_IP_VS_PROTO_ESP)
+/*
+ *	register an ipvs protocols netns related data
+ */
+static int
+register_ip_vs_proto_netns(struct net *net, struct ip_vs_protocol *pp)
+{
+	struct netns_ipvs *ipvs = net_ipvs(net);
+	unsigned hash = IP_VS_PROTO_HASH(pp->protocol);
+	struct ip_vs_proto_data *pd =
+			kzalloc(sizeof(struct ip_vs_proto_data), GFP_ATOMIC);
+
+	if (!pd) {
+		pr_err("%s(): no memory.\n", __func__);
+		return -ENOMEM;
+	}
+	pd->pp = pp;	/* For speed issues */
+	pd->next = ipvs->proto_data_table[hash];
+	ipvs->proto_data_table[hash] = pd;
+	atomic_set(&pd->appcnt, 0);	/* Init app counter */
+
+	if (pp->init_netns != NULL)
+		pp->init_netns(net, pd);
+
+	return 0;
+}
+#endif
 
 /*
  *	unregister an ipvs protocol
@@ -82,6 +111,29 @@ static int unregister_ip_vs_protocol(struct ip_vs_protocol *pp)
 	return -ESRCH;
 }
 
+/*
+ *	unregister an ipvs protocols netns data
+ */
+static int
+unregister_ip_vs_proto_netns(struct net *net, struct ip_vs_proto_data *pd)
+{
+	struct netns_ipvs *ipvs = net_ipvs(net);
+	struct ip_vs_proto_data **pd_p;
+	unsigned hash = IP_VS_PROTO_HASH(pd->pp->protocol);
+
+	pd_p = &ipvs->proto_data_table[hash];
+	for (; *pd_p; pd_p = &(*pd_p)->next) {
+		if (*pd_p == pd) {
+			*pd_p = pd->next;
+			if (pd->pp->exit_netns != NULL)
+				pd->pp->exit_netns(net, pd);
+			kfree(pd);
+			return 0;
+		}
+	}
+
+	return -ESRCH;
+}
 
 /*
  *	get ip_vs_protocol object by its proto.
@@ -100,19 +152,44 @@ struct ip_vs_protocol * ip_vs_proto_get(unsigned short proto)
 }
 EXPORT_SYMBOL(ip_vs_proto_get);
 
+/*
+ *	get ip_vs_protocol object data by netns and proto
+ */
+struct ip_vs_proto_data *
+__ipvs_proto_data_get(struct netns_ipvs *ipvs, unsigned short proto)
+{
+	struct ip_vs_proto_data *pd;
+	unsigned hash = IP_VS_PROTO_HASH(proto);
+
+	for (pd = ipvs->proto_data_table[hash]; pd; pd = pd->next) {
+		if (pd->pp->protocol == proto)
+			return pd;
+	}
+
+	return NULL;
+}
+
+struct ip_vs_proto_data *
+ip_vs_proto_data_get(struct net *net, unsigned short proto)
+{
+	struct netns_ipvs *ipvs = net_ipvs(net);
+
+	return __ipvs_proto_data_get(ipvs, proto);
+}
+EXPORT_SYMBOL(ip_vs_proto_data_get);
 
 /*
  *	Propagate event for state change to all protocols
  */
-void ip_vs_protocol_timeout_change(int flags)
+void ip_vs_protocol_timeout_change(struct netns_ipvs *ipvs, int flags)
 {
-	struct ip_vs_protocol *pp;
+	struct ip_vs_proto_data *pd;
 	int i;
 
 	for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) {
-		for (pp = ip_vs_proto_table[i]; pp; pp = pp->next) {
-			if (pp->timeout_change)
-				pp->timeout_change(pp, flags);
+		for (pd = ipvs->proto_data_table[i]; pd; pd = pd->next) {
+			if (pd->pp->timeout_change)
+				pd->pp->timeout_change(pd, flags);
 		}
 	}
 }
@@ -236,6 +313,46 @@ ip_vs_tcpudp_debug_packet(int af, struct ip_vs_protocol *pp,
 		ip_vs_tcpudp_debug_packet_v4(pp, skb, offset, msg);
 }
 
+/*
+ * per network name-space init
+ */
+static int __net_init __ip_vs_protocol_init(struct net *net)
+{
+#ifdef CONFIG_IP_VS_PROTO_TCP
+	register_ip_vs_proto_netns(net, &ip_vs_protocol_tcp);
+#endif
+#ifdef CONFIG_IP_VS_PROTO_UDP
+	register_ip_vs_proto_netns(net, &ip_vs_protocol_udp);
+#endif
+#ifdef CONFIG_IP_VS_PROTO_SCTP
+	register_ip_vs_proto_netns(net, &ip_vs_protocol_sctp);
+#endif
+#ifdef CONFIG_IP_VS_PROTO_AH
+	register_ip_vs_proto_netns(net, &ip_vs_protocol_ah);
+#endif
+#ifdef CONFIG_IP_VS_PROTO_ESP
+	register_ip_vs_proto_netns(net, &ip_vs_protocol_esp);
+#endif
+	return 0;
+}
+
+static void __net_exit __ip_vs_protocol_cleanup(struct net *net)
+{
+	struct netns_ipvs *ipvs = net_ipvs(net);
+	struct ip_vs_proto_data *pd;
+	int i;
+
+	/* unregister all the ipvs proto data for this netns */
+	for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) {
+		while ((pd = ipvs->proto_data_table[i]) != NULL)
+			unregister_ip_vs_proto_netns(net, pd);
+	}
+}
+
+static struct pernet_operations ipvs_proto_ops = {
+	.init = __ip_vs_protocol_init,
+	.exit = __ip_vs_protocol_cleanup,
+};
 
 int __init ip_vs_protocol_init(void)
 {
@@ -265,6 +382,7 @@ int __init ip_vs_protocol_init(void)
 	REGISTER_PROTOCOL(&ip_vs_protocol_esp);
 #endif
 	pr_info("Registered protocols (%s)\n", &protocols[2]);
+	return register_pernet_subsys(&ipvs_proto_ops);
 
 	return 0;
 }
@@ -275,6 +393,7 @@ void ip_vs_protocol_cleanup(void)
 	struct ip_vs_protocol *pp;
 	int i;
 
+	unregister_pernet_subsys(&ipvs_proto_ops);
 	/* unregister all the ipvs protocols */
 	for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) {
 		while ((pp = ip_vs_proto_table[i]) != NULL)
diff --git a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
index 3a04611..5b8eb8b 100644
--- a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
@@ -41,28 +41,30 @@ struct isakmp_hdr {
 #define PORT_ISAKMP	500
 
 static void
-ah_esp_conn_fill_param_proto(int af, const struct ip_vs_iphdr *iph,
-			     int inverse, struct ip_vs_conn_param *p)
+ah_esp_conn_fill_param_proto(struct net *net, int af,
+			     const struct ip_vs_iphdr *iph, int inverse,
+			     struct ip_vs_conn_param *p)
 {
 	if (likely(!inverse))
-		ip_vs_conn_fill_param(af, IPPROTO_UDP,
+		ip_vs_conn_fill_param(net, af, IPPROTO_UDP,
 				      &iph->saddr, htons(PORT_ISAKMP),
 				      &iph->daddr, htons(PORT_ISAKMP), p);
 	else
-		ip_vs_conn_fill_param(af, IPPROTO_UDP,
+		ip_vs_conn_fill_param(net, af, IPPROTO_UDP,
 				      &iph->daddr, htons(PORT_ISAKMP),
 				      &iph->saddr, htons(PORT_ISAKMP), p);
 }
 
 static struct ip_vs_conn *
-ah_esp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
+ah_esp_conn_in_get(int af, const struct sk_buff *skb,
 		   const struct ip_vs_iphdr *iph, unsigned int proto_off,
 		   int inverse)
 {
 	struct ip_vs_conn *cp;
 	struct ip_vs_conn_param p;
+	struct net *net = skb_net(skb);
 
-	ah_esp_conn_fill_param_proto(af, iph, inverse, &p);
+	ah_esp_conn_fill_param_proto(net, af, iph, inverse, &p);
 	cp = ip_vs_conn_in_get(&p);
 	if (!cp) {
 		/*
@@ -72,7 +74,7 @@ ah_esp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
 		IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for outin packet "
 			      "%s%s %s->%s\n",
 			      inverse ? "ICMP+" : "",
-			      pp->name,
+			      ip_vs_proto_get(iph->protocol)->name,
 			      IP_VS_DBG_ADDR(af, &iph->saddr),
 			      IP_VS_DBG_ADDR(af, &iph->daddr));
 	}
@@ -83,21 +85,21 @@ ah_esp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
 
 static struct ip_vs_conn *
 ah_esp_conn_out_get(int af, const struct sk_buff *skb,
-		    struct ip_vs_protocol *pp,
 		    const struct ip_vs_iphdr *iph,
 		    unsigned int proto_off,
 		    int inverse)
 {
 	struct ip_vs_conn *cp;
 	struct ip_vs_conn_param p;
+	struct net *net = skb_net(skb);
 
-	ah_esp_conn_fill_param_proto(af, iph, inverse, &p);
+	ah_esp_conn_fill_param_proto(net, af, iph, inverse, &p);
 	cp = ip_vs_conn_out_get(&p);
 	if (!cp) {
 		IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for inout packet "
 			      "%s%s %s->%s\n",
 			      inverse ? "ICMP+" : "",
-			      pp->name,
+			      ip_vs_proto_get(iph->protocol)->name,
 			      IP_VS_DBG_ADDR(af, &iph->saddr),
 			      IP_VS_DBG_ADDR(af, &iph->daddr));
 	}
@@ -107,7 +109,7 @@ ah_esp_conn_out_get(int af, const struct sk_buff *skb,
 
 
 static int
-ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
+ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
 		     int *verdict, struct ip_vs_conn **cpp)
 {
 	/*
@@ -117,26 +119,14 @@ ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
 	return 0;
 }
 
-static void ah_esp_init(struct ip_vs_protocol *pp)
-{
-	/* nothing to do now */
-}
-
-
-static void ah_esp_exit(struct ip_vs_protocol *pp)
-{
-	/* nothing to do now */
-}
-
-
 #ifdef CONFIG_IP_VS_PROTO_AH
 struct ip_vs_protocol ip_vs_protocol_ah = {
 	.name =			"AH",
 	.protocol =		IPPROTO_AH,
 	.num_states =		1,
 	.dont_defrag =		1,
-	.init =			ah_esp_init,
-	.exit =			ah_esp_exit,
+	.init =			NULL,
+	.exit =			NULL,
 	.conn_schedule =	ah_esp_conn_schedule,
 	.conn_in_get =		ah_esp_conn_in_get,
 	.conn_out_get =		ah_esp_conn_out_get,
@@ -149,7 +139,6 @@ struct ip_vs_protocol ip_vs_protocol_ah = {
 	.app_conn_bind =	NULL,
 	.debug_packet =		ip_vs_tcpudp_debug_packet,
 	.timeout_change =	NULL,		/* ISAKMP */
-	.set_state_timeout =	NULL,
 };
 #endif
 
@@ -159,8 +148,8 @@ struct ip_vs_protocol ip_vs_protocol_esp = {
 	.protocol =		IPPROTO_ESP,
 	.num_states =		1,
 	.dont_defrag =		1,
-	.init =			ah_esp_init,
-	.exit =			ah_esp_exit,
+	.init =			NULL,
+	.exit =			NULL,
 	.conn_schedule =	ah_esp_conn_schedule,
 	.conn_in_get =		ah_esp_conn_in_get,
 	.conn_out_get =		ah_esp_conn_out_get,
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index 1ea96bcd..b027ccc 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -9,9 +9,10 @@
 #include <net/ip_vs.h>
 
 static int
-sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
+sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
 		   int *verdict, struct ip_vs_conn **cpp)
 {
+	struct net *net;
 	struct ip_vs_service *svc;
 	sctp_chunkhdr_t _schunkh, *sch;
 	sctp_sctphdr_t *sh, _sctph;
@@ -27,13 +28,13 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
 				 sizeof(_schunkh), &_schunkh);
 	if (sch == NULL)
 		return 0;
-
+	net = skb_net(skb);
 	if ((sch->type == SCTP_CID_INIT) &&
-	    (svc = ip_vs_service_get(af, skb->mark, iph.protocol,
+	    (svc = ip_vs_service_get(net, af, skb->mark, iph.protocol,
 				     &iph.daddr, sh->dest))) {
 		int ignored;
 
-		if (ip_vs_todrop()) {
+		if (ip_vs_todrop(net_ipvs(net))) {
 			/*
 			 * It seems that we are very loaded.
 			 * We have to drop this packet :(
@@ -46,14 +47,19 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
 		 * Let the virtual server select a real server for the
 		 * incoming connection, and create a connection entry.
 		 */
-		*cpp = ip_vs_schedule(svc, skb, pp, &ignored);
-		if (!*cpp && !ignored) {
-			*verdict = ip_vs_leave(svc, skb, pp);
+		*cpp = ip_vs_schedule(svc, skb, pd, &ignored);
+		if (!*cpp && ignored <= 0) {
+			if (!ignored)
+				*verdict = ip_vs_leave(svc, skb, pd);
+			else {
+				ip_vs_service_put(svc);
+				*verdict = NF_DROP;
+			}
 			return 0;
 		}
 		ip_vs_service_put(svc);
 	}
-
+	/* NF_ACCEPT */
 	return 1;
 }
 
@@ -856,7 +862,7 @@ static struct ipvs_sctp_nextstate
 /*
  *      Timeout table[state]
  */
-static int sctp_timeouts[IP_VS_SCTP_S_LAST + 1] = {
+static const int sctp_timeouts[IP_VS_SCTP_S_LAST + 1] = {
 	[IP_VS_SCTP_S_NONE]         =     2 * HZ,
 	[IP_VS_SCTP_S_INIT_CLI]     =     1 * 60 * HZ,
 	[IP_VS_SCTP_S_INIT_SER]     =     1 * 60 * HZ,
@@ -900,20 +906,8 @@ static const char *sctp_state_name(int state)
 	return "?";
 }
 
-static void sctp_timeout_change(struct ip_vs_protocol *pp, int flags)
-{
-}
-
-static int
-sctp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
-{
-
-return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_SCTP_S_LAST,
-				sctp_state_name_table, sname, to);
-}
-
 static inline int
-set_sctp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
+set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
 		int direction, const struct sk_buff *skb)
 {
 	sctp_chunkhdr_t _sctpch, *sch;
@@ -971,7 +965,7 @@ set_sctp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
 
 		IP_VS_DBG_BUF(8, "%s %s  %s:%d->"
 				"%s:%d state: %s->%s conn->refcnt:%d\n",
-				pp->name,
+				pd->pp->name,
 				((direction == IP_VS_DIR_OUTPUT) ?
 				 "output " : "input "),
 				IP_VS_DBG_ADDR(cp->af, &cp->daddr),
@@ -995,75 +989,73 @@ set_sctp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
 			}
 		}
 	}
+	if (likely(pd))
+		cp->timeout = pd->timeout_table[cp->state = next_state];
+	else	/* What to do ? */
+		cp->timeout = sctp_timeouts[cp->state = next_state];
 
-	 cp->timeout = pp->timeout_table[cp->state = next_state];
-
-	 return 1;
+	return 1;
 }
 
 static int
 sctp_state_transition(struct ip_vs_conn *cp, int direction,
-		const struct sk_buff *skb, struct ip_vs_protocol *pp)
+		const struct sk_buff *skb, struct ip_vs_proto_data *pd)
 {
 	int ret = 0;
 
 	spin_lock(&cp->lock);
-	ret = set_sctp_state(pp, cp, direction, skb);
+	ret = set_sctp_state(pd, cp, direction, skb);
 	spin_unlock(&cp->lock);
 
 	return ret;
 }
 
-/*
- *      Hash table for SCTP application incarnations
- */
-#define SCTP_APP_TAB_BITS        4
-#define SCTP_APP_TAB_SIZE        (1 << SCTP_APP_TAB_BITS)
-#define SCTP_APP_TAB_MASK        (SCTP_APP_TAB_SIZE - 1)
-
-static struct list_head sctp_apps[SCTP_APP_TAB_SIZE];
-static DEFINE_SPINLOCK(sctp_app_lock);
-
 static inline __u16 sctp_app_hashkey(__be16 port)
 {
 	return (((__force u16)port >> SCTP_APP_TAB_BITS) ^ (__force u16)port)
 		& SCTP_APP_TAB_MASK;
 }
 
-static int sctp_register_app(struct ip_vs_app *inc)
+static int sctp_register_app(struct net *net, struct ip_vs_app *inc)
 {
 	struct ip_vs_app *i;
 	__u16 hash;
 	__be16 port = inc->port;
 	int ret = 0;
+	struct netns_ipvs *ipvs = net_ipvs(net);
+	struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_SCTP);
 
 	hash = sctp_app_hashkey(port);
 
-	spin_lock_bh(&sctp_app_lock);
-	list_for_each_entry(i, &sctp_apps[hash], p_list) {
+	spin_lock_bh(&ipvs->sctp_app_lock);
+	list_for_each_entry(i, &ipvs->sctp_apps[hash], p_list) {
 		if (i->port == port) {
 			ret = -EEXIST;
 			goto out;
 		}
 	}
-	list_add(&inc->p_list, &sctp_apps[hash]);
-	atomic_inc(&ip_vs_protocol_sctp.appcnt);
+	list_add(&inc->p_list, &ipvs->sctp_apps[hash]);
+	atomic_inc(&pd->appcnt);
 out:
-	spin_unlock_bh(&sctp_app_lock);
+	spin_unlock_bh(&ipvs->sctp_app_lock);
 
 	return ret;
 }
 
-static void sctp_unregister_app(struct ip_vs_app *inc)
+static void sctp_unregister_app(struct net *net, struct ip_vs_app *inc)
 {
-	spin_lock_bh(&sctp_app_lock);
-	atomic_dec(&ip_vs_protocol_sctp.appcnt);
+	struct netns_ipvs *ipvs = net_ipvs(net);
+	struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_SCTP);
+
+	spin_lock_bh(&ipvs->sctp_app_lock);
+	atomic_dec(&pd->appcnt);
 	list_del(&inc->p_list);
-	spin_unlock_bh(&sctp_app_lock);
+	spin_unlock_bh(&ipvs->sctp_app_lock);
 }
 
 static int sctp_app_conn_bind(struct ip_vs_conn *cp)
 {
+	struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp));
 	int hash;
 	struct ip_vs_app *inc;
 	int result = 0;
@@ -1074,12 +1066,12 @@ static int sctp_app_conn_bind(struct ip_vs_conn *cp)
 	/* Lookup application incarnations and bind the right one */
 	hash = sctp_app_hashkey(cp->vport);
 
-	spin_lock(&sctp_app_lock);
-	list_for_each_entry(inc, &sctp_apps[hash], p_list) {
+	spin_lock(&ipvs->sctp_app_lock);
+	list_for_each_entry(inc, &ipvs->sctp_apps[hash], p_list) {
 		if (inc->port == cp->vport) {
 			if (unlikely(!ip_vs_app_inc_get(inc)))
 				break;
-			spin_unlock(&sctp_app_lock);
+			spin_unlock(&ipvs->sctp_app_lock);
 
 			IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->"
 					"%s:%u to app %s on port %u\n",
@@ -1095,43 +1087,50 @@ static int sctp_app_conn_bind(struct ip_vs_conn *cp)
 			goto out;
 		}
 	}
-	spin_unlock(&sctp_app_lock);
+	spin_unlock(&ipvs->sctp_app_lock);
 out:
 	return result;
 }
 
-static void ip_vs_sctp_init(struct ip_vs_protocol *pp)
+/* ---------------------------------------------
+ *   timeouts is netns related now.
+ * ---------------------------------------------
+ */
+static void __ip_vs_sctp_init(struct net *net, struct ip_vs_proto_data *pd)
 {
-	IP_VS_INIT_HASH_TABLE(sctp_apps);
-	pp->timeout_table = sctp_timeouts;
-}
+	struct netns_ipvs *ipvs = net_ipvs(net);
 
+	ip_vs_init_hash_table(ipvs->sctp_apps, SCTP_APP_TAB_SIZE);
+	spin_lock_init(&ipvs->sctp_app_lock);
+	pd->timeout_table = ip_vs_create_timeout_table((int *)sctp_timeouts,
+							sizeof(sctp_timeouts));
+}
 
-static void ip_vs_sctp_exit(struct ip_vs_protocol *pp)
+static void __ip_vs_sctp_exit(struct net *net, struct ip_vs_proto_data *pd)
 {
-
+	kfree(pd->timeout_table);
 }
 
 struct ip_vs_protocol ip_vs_protocol_sctp = {
-	.name = "SCTP",
-	.protocol = IPPROTO_SCTP,
-	.num_states = IP_VS_SCTP_S_LAST,
-	.dont_defrag = 0,
-	.appcnt = ATOMIC_INIT(0),
-	.init = ip_vs_sctp_init,
-	.exit = ip_vs_sctp_exit,
-	.register_app = sctp_register_app,
+	.name		= "SCTP",
+	.protocol	= IPPROTO_SCTP,
+	.num_states	= IP_VS_SCTP_S_LAST,
+	.dont_defrag	= 0,
+	.init		= NULL,
+	.exit		= NULL,
+	.init_netns	= __ip_vs_sctp_init,
+	.exit_netns	= __ip_vs_sctp_exit,
+	.register_app	= sctp_register_app,
 	.unregister_app = sctp_unregister_app,
-	.conn_schedule = sctp_conn_schedule,
-	.conn_in_get = ip_vs_conn_in_get_proto,
-	.conn_out_get = ip_vs_conn_out_get_proto,
-	.snat_handler = sctp_snat_handler,
-	.dnat_handler = sctp_dnat_handler,
-	.csum_check = sctp_csum_check,
-	.state_name = sctp_state_name,
+	.conn_schedule	= sctp_conn_schedule,
+	.conn_in_get	= ip_vs_conn_in_get_proto,
+	.conn_out_get	= ip_vs_conn_out_get_proto,
+	.snat_handler	= sctp_snat_handler,
+	.dnat_handler	= sctp_dnat_handler,
+	.csum_check	= sctp_csum_check,
+	.state_name	= sctp_state_name,
 	.state_transition = sctp_state_transition,
-	.app_conn_bind = sctp_app_conn_bind,
-	.debug_packet = ip_vs_tcpudp_debug_packet,
-	.timeout_change = sctp_timeout_change,
-	.set_state_timeout = sctp_set_state_timeout,
+	.app_conn_bind	= sctp_app_conn_bind,
+	.debug_packet	= ip_vs_tcpudp_debug_packet,
+	.timeout_change	= NULL,
 };
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index f6c5200..c0cc341 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -9,8 +9,12 @@
  *              as published by the Free Software Foundation; either version
  *              2 of the License, or (at your option) any later version.
  *
- * Changes:
+ * Changes:     Hans Schillstrom <hans.schillstrom@ericsson.com>
  *
+ *              Network name space (netns) aware.
+ *              Global data moved to netns i.e struct netns_ipvs
+ *              tcp_timeouts table has copy per netns in a hash table per
+ *              protocol ip_vs_proto_data and is handled by netns
  */
 
 #define KMSG_COMPONENT "IPVS"
@@ -28,9 +32,10 @@
 #include <net/ip_vs.h>
 
 static int
-tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
+tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
 		  int *verdict, struct ip_vs_conn **cpp)
 {
+	struct net *net;
 	struct ip_vs_service *svc;
 	struct tcphdr _tcph, *th;
 	struct ip_vs_iphdr iph;
@@ -42,14 +47,14 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
 		*verdict = NF_DROP;
 		return 0;
 	}
-
+	net = skb_net(skb);
 	/* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */
 	if (th->syn &&
-	    (svc = ip_vs_service_get(af, skb->mark, iph.protocol, &iph.daddr,
-				     th->dest))) {
+	    (svc = ip_vs_service_get(net, af, skb->mark, iph.protocol,
+				     &iph.daddr, th->dest))) {
 		int ignored;
 
-		if (ip_vs_todrop()) {
+		if (ip_vs_todrop(net_ipvs(net))) {
 			/*
 			 * It seems that we are very loaded.
 			 * We have to drop this packet :(
@@ -63,13 +68,19 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
 		 * Let the virtual server select a real server for the
 		 * incoming connection, and create a connection entry.
 		 */
-		*cpp = ip_vs_schedule(svc, skb, pp, &ignored);
-		if (!*cpp && !ignored) {
-			*verdict = ip_vs_leave(svc, skb, pp);
+		*cpp = ip_vs_schedule(svc, skb, pd, &ignored);
+		if (!*cpp && ignored <= 0) {
+			if (!ignored)
+				*verdict = ip_vs_leave(svc, skb, pd);
+			else {
+				ip_vs_service_put(svc);
+				*verdict = NF_DROP;
+			}
 			return 0;
 		}
 		ip_vs_service_put(svc);
 	}
+	/* NF_ACCEPT */
 	return 1;
 }
 
@@ -338,7 +349,7 @@ static const int tcp_state_off[IP_VS_DIR_LAST] = {
 /*
  *	Timeout table[state]
  */
-static int tcp_timeouts[IP_VS_TCP_S_LAST+1] = {
+static const int tcp_timeouts[IP_VS_TCP_S_LAST+1] = {
 	[IP_VS_TCP_S_NONE]		=	2*HZ,
 	[IP_VS_TCP_S_ESTABLISHED]	=	15*60*HZ,
 	[IP_VS_TCP_S_SYN_SENT]		=	2*60*HZ,
@@ -437,10 +448,7 @@ static struct tcp_states_t tcp_states_dos [] = {
 /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
 };
 
-static struct tcp_states_t *tcp_state_table = tcp_states;
-
-
-static void tcp_timeout_change(struct ip_vs_protocol *pp, int flags)
+static void tcp_timeout_change(struct ip_vs_proto_data *pd, int flags)
 {
 	int on = (flags & 1);		/* secure_tcp */
 
@@ -450,14 +458,7 @@ static void tcp_timeout_change(struct ip_vs_protocol *pp, int flags)
 	** for most if not for all of the applications. Something
 	** like "capabilities" (flags) for each object.
 	*/
-	tcp_state_table = (on? tcp_states_dos : tcp_states);
-}
-
-static int
-tcp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
-{
-	return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_TCP_S_LAST,
-				       tcp_state_name_table, sname, to);
+	pd->tcp_state_table = (on ? tcp_states_dos : tcp_states);
 }
 
 static inline int tcp_state_idx(struct tcphdr *th)
@@ -474,7 +475,7 @@ static inline int tcp_state_idx(struct tcphdr *th)
 }
 
 static inline void
-set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
+set_tcp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
 	      int direction, struct tcphdr *th)
 {
 	int state_idx;
@@ -497,7 +498,8 @@ set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
 		goto tcp_state_out;
 	}
 
-	new_state = tcp_state_table[state_off+state_idx].next_state[cp->state];
+	new_state =
+		pd->tcp_state_table[state_off+state_idx].next_state[cp->state];
 
   tcp_state_out:
 	if (new_state != cp->state) {
@@ -505,7 +507,7 @@ set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
 
 		IP_VS_DBG_BUF(8, "%s %s [%c%c%c%c] %s:%d->"
 			      "%s:%d state: %s->%s conn->refcnt:%d\n",
-			      pp->name,
+			      pd->pp->name,
 			      ((state_off == TCP_DIR_OUTPUT) ?
 			       "output " : "input "),
 			      th->syn ? 'S' : '.',
@@ -535,17 +537,19 @@ set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
 		}
 	}
 
-	cp->timeout = pp->timeout_table[cp->state = new_state];
+	if (likely(pd))
+		cp->timeout = pd->timeout_table[cp->state = new_state];
+	else	/* What to do ? */
+		cp->timeout = tcp_timeouts[cp->state = new_state];
 }
 
-
 /*
  *	Handle state transitions
  */
 static int
 tcp_state_transition(struct ip_vs_conn *cp, int direction,
 		     const struct sk_buff *skb,
-		     struct ip_vs_protocol *pp)
+		     struct ip_vs_proto_data *pd)
 {
 	struct tcphdr _tcph, *th;
 
@@ -560,23 +564,12 @@ tcp_state_transition(struct ip_vs_conn *cp, int direction,
 		return 0;
 
 	spin_lock(&cp->lock);
-	set_tcp_state(pp, cp, direction, th);
+	set_tcp_state(pd, cp, direction, th);
 	spin_unlock(&cp->lock);
 
 	return 1;
 }
 
-
-/*
- *	Hash table for TCP application incarnations
- */
-#define	TCP_APP_TAB_BITS	4
-#define	TCP_APP_TAB_SIZE	(1 << TCP_APP_TAB_BITS)
-#define	TCP_APP_TAB_MASK	(TCP_APP_TAB_SIZE - 1)
-
-static struct list_head tcp_apps[TCP_APP_TAB_SIZE];
-static DEFINE_SPINLOCK(tcp_app_lock);
-
 static inline __u16 tcp_app_hashkey(__be16 port)
 {
 	return (((__force u16)port >> TCP_APP_TAB_BITS) ^ (__force u16)port)
@@ -584,44 +577,50 @@ static inline __u16 tcp_app_hashkey(__be16 port)
 }
 
 
-static int tcp_register_app(struct ip_vs_app *inc)
+static int tcp_register_app(struct net *net, struct ip_vs_app *inc)
 {
 	struct ip_vs_app *i;
 	__u16 hash;
 	__be16 port = inc->port;
 	int ret = 0;
+	struct netns_ipvs *ipvs = net_ipvs(net);
+	struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
 
 	hash = tcp_app_hashkey(port);
 
-	spin_lock_bh(&tcp_app_lock);
-	list_for_each_entry(i, &tcp_apps[hash], p_list) {
+	spin_lock_bh(&ipvs->tcp_app_lock);
+	list_for_each_entry(i, &ipvs->tcp_apps[hash], p_list) {
 		if (i->port == port) {
 			ret = -EEXIST;
 			goto out;
 		}
 	}
-	list_add(&inc->p_list, &tcp_apps[hash]);
-	atomic_inc(&ip_vs_protocol_tcp.appcnt);
+	list_add(&inc->p_list, &ipvs->tcp_apps[hash]);
+	atomic_inc(&pd->appcnt);
 
   out:
-	spin_unlock_bh(&tcp_app_lock);
+	spin_unlock_bh(&ipvs->tcp_app_lock);
 	return ret;
 }
 
 
 static void
-tcp_unregister_app(struct ip_vs_app *inc)
+tcp_unregister_app(struct net *net, struct ip_vs_app *inc)
 {
-	spin_lock_bh(&tcp_app_lock);
-	atomic_dec(&ip_vs_protocol_tcp.appcnt);
+	struct netns_ipvs *ipvs = net_ipvs(net);
+	struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
+
+	spin_lock_bh(&ipvs->tcp_app_lock);
+	atomic_dec(&pd->appcnt);
 	list_del(&inc->p_list);
-	spin_unlock_bh(&tcp_app_lock);
+	spin_unlock_bh(&ipvs->tcp_app_lock);
 }
 
 
 static int
 tcp_app_conn_bind(struct ip_vs_conn *cp)
 {
+	struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp));
 	int hash;
 	struct ip_vs_app *inc;
 	int result = 0;
@@ -633,12 +632,12 @@ tcp_app_conn_bind(struct ip_vs_conn *cp)
 	/* Lookup application incarnations and bind the right one */
 	hash = tcp_app_hashkey(cp->vport);
 
-	spin_lock(&tcp_app_lock);
-	list_for_each_entry(inc, &tcp_apps[hash], p_list) {
+	spin_lock(&ipvs->tcp_app_lock);
+	list_for_each_entry(inc, &ipvs->tcp_apps[hash], p_list) {
 		if (inc->port == cp->vport) {
 			if (unlikely(!ip_vs_app_inc_get(inc)))
 				break;
-			spin_unlock(&tcp_app_lock);
+			spin_unlock(&ipvs->tcp_app_lock);
 
 			IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->"
 				      "%s:%u to app %s on port %u\n",
@@ -655,7 +654,7 @@ tcp_app_conn_bind(struct ip_vs_conn *cp)
 			goto out;
 		}
 	}
-	spin_unlock(&tcp_app_lock);
+	spin_unlock(&ipvs->tcp_app_lock);
 
   out:
 	return result;
@@ -665,24 +664,35 @@ tcp_app_conn_bind(struct ip_vs_conn *cp)
 /*
  *	Set LISTEN timeout. (ip_vs_conn_put will setup timer)
  */
-void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp)
+void ip_vs_tcp_conn_listen(struct net *net, struct ip_vs_conn *cp)
 {
+	struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
+
 	spin_lock(&cp->lock);
 	cp->state = IP_VS_TCP_S_LISTEN;
-	cp->timeout = ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_LISTEN];
+	cp->timeout = (pd ? pd->timeout_table[IP_VS_TCP_S_LISTEN]
+			   : tcp_timeouts[IP_VS_TCP_S_LISTEN]);
 	spin_unlock(&cp->lock);
 }
 
-
-static void ip_vs_tcp_init(struct ip_vs_protocol *pp)
+/* ---------------------------------------------
+ *   timeouts is netns related now.
+ * ---------------------------------------------
+ */
+static void __ip_vs_tcp_init(struct net *net, struct ip_vs_proto_data *pd)
 {
-	IP_VS_INIT_HASH_TABLE(tcp_apps);
-	pp->timeout_table = tcp_timeouts;
-}
+	struct netns_ipvs *ipvs = net_ipvs(net);
 
+	ip_vs_init_hash_table(ipvs->tcp_apps, TCP_APP_TAB_SIZE);
+	spin_lock_init(&ipvs->tcp_app_lock);
+	pd->timeout_table = ip_vs_create_timeout_table((int *)tcp_timeouts,
+							sizeof(tcp_timeouts));
+	pd->tcp_state_table =  tcp_states;
+}
 
-static void ip_vs_tcp_exit(struct ip_vs_protocol *pp)
+static void __ip_vs_tcp_exit(struct net *net, struct ip_vs_proto_data *pd)
 {
+	kfree(pd->timeout_table);
 }
 
 
@@ -691,9 +701,10 @@ struct ip_vs_protocol ip_vs_protocol_tcp = {
 	.protocol =		IPPROTO_TCP,
 	.num_states =		IP_VS_TCP_S_LAST,
 	.dont_defrag =		0,
-	.appcnt =		ATOMIC_INIT(0),
-	.init =			ip_vs_tcp_init,
-	.exit =			ip_vs_tcp_exit,
+	.init =			NULL,
+	.exit =			NULL,
+	.init_netns =		__ip_vs_tcp_init,
+	.exit_netns =		__ip_vs_tcp_exit,
 	.register_app =		tcp_register_app,
 	.unregister_app =	tcp_unregister_app,
 	.conn_schedule =	tcp_conn_schedule,
@@ -707,5 +718,4 @@ struct ip_vs_protocol ip_vs_protocol_tcp = {
 	.app_conn_bind =	tcp_app_conn_bind,
 	.debug_packet =		ip_vs_tcpudp_debug_packet,
 	.timeout_change =	tcp_timeout_change,
-	.set_state_timeout =	tcp_set_state_timeout,
 };
diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c
index 9d106a0..f1282cb 100644
--- a/net/netfilter/ipvs/ip_vs_proto_udp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_udp.c
@@ -9,7 +9,8 @@
  *              as published by the Free Software Foundation; either version
  *              2 of the License, or (at your option) any later version.
  *
- * Changes:
+ * Changes:     Hans Schillstrom <hans.schillstrom@ericsson.com>
+ *              Network name space (netns) aware.
  *
  */
 
@@ -28,9 +29,10 @@
 #include <net/ip6_checksum.h>
 
 static int
-udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
+udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
 		  int *verdict, struct ip_vs_conn **cpp)
 {
+	struct net *net;
 	struct ip_vs_service *svc;
 	struct udphdr _udph, *uh;
 	struct ip_vs_iphdr iph;
@@ -42,13 +44,13 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
 		*verdict = NF_DROP;
 		return 0;
 	}
-
-	svc = ip_vs_service_get(af, skb->mark, iph.protocol,
+	net = skb_net(skb);
+	svc = ip_vs_service_get(net, af, skb->mark, iph.protocol,
 				&iph.daddr, uh->dest);
 	if (svc) {
 		int ignored;
 
-		if (ip_vs_todrop()) {
+		if (ip_vs_todrop(net_ipvs(net))) {
 			/*
 			 * It seems that we are very loaded.
 			 * We have to drop this packet :(
@@ -62,13 +64,19 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
 		 * Let the virtual server select a real server for the
 		 * incoming connection, and create a connection entry.
 		 */
-		*cpp = ip_vs_schedule(svc, skb, pp, &ignored);
-		if (!*cpp && !ignored) {
-			*verdict = ip_vs_leave(svc, skb, pp);
+		*cpp = ip_vs_schedule(svc, skb, pd, &ignored);
+		if (!*cpp && ignored <= 0) {
+			if (!ignored)
+				*verdict = ip_vs_leave(svc, skb, pd);
+			else {
+				ip_vs_service_put(svc);
+				*verdict = NF_DROP;
+			}
 			return 0;
 		}
 		ip_vs_service_put(svc);
 	}
+	/* NF_ACCEPT */
 	return 1;
 }
 
@@ -338,19 +346,6 @@ udp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
 	return 1;
 }
 
-
-/*
- *	Note: the caller guarantees that only one of register_app,
- *	unregister_app or app_conn_bind is called each time.
- */
-
-#define	UDP_APP_TAB_BITS	4
-#define	UDP_APP_TAB_SIZE	(1 << UDP_APP_TAB_BITS)
-#define	UDP_APP_TAB_MASK	(UDP_APP_TAB_SIZE - 1)
-
-static struct list_head udp_apps[UDP_APP_TAB_SIZE];
-static DEFINE_SPINLOCK(udp_app_lock);
-
 static inline __u16 udp_app_hashkey(__be16 port)
 {
 	return (((__force u16)port >> UDP_APP_TAB_BITS) ^ (__force u16)port)
@@ -358,44 +353,50 @@ static inline __u16 udp_app_hashkey(__be16 port)
 }
 
 
-static int udp_register_app(struct ip_vs_app *inc)
+static int udp_register_app(struct net *net, struct ip_vs_app *inc)
 {
 	struct ip_vs_app *i;
 	__u16 hash;
 	__be16 port = inc->port;
 	int ret = 0;
+	struct netns_ipvs *ipvs = net_ipvs(net);
+	struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
 
 	hash = udp_app_hashkey(port);
 
 
-	spin_lock_bh(&udp_app_lock);
-	list_for_each_entry(i, &udp_apps[hash], p_list) {
+	spin_lock_bh(&ipvs->udp_app_lock);
+	list_for_each_entry(i, &ipvs->udp_apps[hash], p_list) {
 		if (i->port == port) {
 			ret = -EEXIST;
 			goto out;
 		}
 	}
-	list_add(&inc->p_list, &udp_apps[hash]);
-	atomic_inc(&ip_vs_protocol_udp.appcnt);
+	list_add(&inc->p_list, &ipvs->udp_apps[hash]);
+	atomic_inc(&pd->appcnt);
 
   out:
-	spin_unlock_bh(&udp_app_lock);
+	spin_unlock_bh(&ipvs->udp_app_lock);
 	return ret;
 }
 
 
 static void
-udp_unregister_app(struct ip_vs_app *inc)
+udp_unregister_app(struct net *net, struct ip_vs_app *inc)
 {
-	spin_lock_bh(&udp_app_lock);
-	atomic_dec(&ip_vs_protocol_udp.appcnt);
+	struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
+	struct netns_ipvs *ipvs = net_ipvs(net);
+
+	spin_lock_bh(&ipvs->udp_app_lock);
+	atomic_dec(&pd->appcnt);
 	list_del(&inc->p_list);
-	spin_unlock_bh(&udp_app_lock);
+	spin_unlock_bh(&ipvs->udp_app_lock);
 }
 
 
 static int udp_app_conn_bind(struct ip_vs_conn *cp)
 {
+	struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp));
 	int hash;
 	struct ip_vs_app *inc;
 	int result = 0;
@@ -407,12 +408,12 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp)
 	/* Lookup application incarnations and bind the right one */
 	hash = udp_app_hashkey(cp->vport);
 
-	spin_lock(&udp_app_lock);
-	list_for_each_entry(inc, &udp_apps[hash], p_list) {
+	spin_lock(&ipvs->udp_app_lock);
+	list_for_each_entry(inc, &ipvs->udp_apps[hash], p_list) {
 		if (inc->port == cp->vport) {
 			if (unlikely(!ip_vs_app_inc_get(inc)))
 				break;
-			spin_unlock(&udp_app_lock);
+			spin_unlock(&ipvs->udp_app_lock);
 
 			IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->"
 				      "%s:%u to app %s on port %u\n",
@@ -429,14 +430,14 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp)
 			goto out;
 		}
 	}
-	spin_unlock(&udp_app_lock);
+	spin_unlock(&ipvs->udp_app_lock);
 
   out:
 	return result;
 }
 
 
-static int udp_timeouts[IP_VS_UDP_S_LAST+1] = {
+static const int udp_timeouts[IP_VS_UDP_S_LAST+1] = {
 	[IP_VS_UDP_S_NORMAL]		=	5*60*HZ,
 	[IP_VS_UDP_S_LAST]		=	2*HZ,
 };
@@ -446,14 +447,6 @@ static const char *const udp_state_name_table[IP_VS_UDP_S_LAST+1] = {
 	[IP_VS_UDP_S_LAST]		=	"BUG!",
 };
 
-
-static int
-udp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
-{
-	return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_UDP_S_LAST,
-				       udp_state_name_table, sname, to);
-}
-
 static const char * udp_state_name(int state)
 {
 	if (state >= IP_VS_UDP_S_LAST)
@@ -464,20 +457,30 @@ static const char * udp_state_name(int state)
 static int
 udp_state_transition(struct ip_vs_conn *cp, int direction,
 		     const struct sk_buff *skb,
-		     struct ip_vs_protocol *pp)
+		     struct ip_vs_proto_data *pd)
 {
-	cp->timeout = pp->timeout_table[IP_VS_UDP_S_NORMAL];
+	if (unlikely(!pd)) {
+		pr_err("UDP no ns data\n");
+		return 0;
+	}
+
+	cp->timeout = pd->timeout_table[IP_VS_UDP_S_NORMAL];
 	return 1;
 }
 
-static void udp_init(struct ip_vs_protocol *pp)
+static void __udp_init(struct net *net, struct ip_vs_proto_data *pd)
 {
-	IP_VS_INIT_HASH_TABLE(udp_apps);
-	pp->timeout_table = udp_timeouts;
+	struct netns_ipvs *ipvs = net_ipvs(net);
+
+	ip_vs_init_hash_table(ipvs->udp_apps, UDP_APP_TAB_SIZE);
+	spin_lock_init(&ipvs->udp_app_lock);
+	pd->timeout_table = ip_vs_create_timeout_table((int *)udp_timeouts,
+							sizeof(udp_timeouts));
 }
 
-static void udp_exit(struct ip_vs_protocol *pp)
+static void __udp_exit(struct net *net, struct ip_vs_proto_data *pd)
 {
+	kfree(pd->timeout_table);
 }
 
 
@@ -486,8 +489,10 @@ struct ip_vs_protocol ip_vs_protocol_udp = {
 	.protocol =		IPPROTO_UDP,
 	.num_states =		IP_VS_UDP_S_LAST,
 	.dont_defrag =		0,
-	.init =			udp_init,
-	.exit =			udp_exit,
+	.init =			NULL,
+	.exit =			NULL,
+	.init_netns =		__udp_init,
+	.exit_netns =		__udp_exit,
 	.conn_schedule =	udp_conn_schedule,
 	.conn_in_get =		ip_vs_conn_in_get_proto,
 	.conn_out_get =		ip_vs_conn_out_get_proto,
@@ -501,5 +506,4 @@ struct ip_vs_protocol ip_vs_protocol_udp = {
 	.app_conn_bind =	udp_app_conn_bind,
 	.debug_packet =		ip_vs_tcpudp_debug_packet,
 	.timeout_change =	NULL,
-	.set_state_timeout =	udp_set_state_timeout,
 };
diff --git a/net/netfilter/ipvs/ip_vs_rr.c b/net/netfilter/ipvs/ip_vs_rr.c
index e210f37..c49b388 100644
--- a/net/netfilter/ipvs/ip_vs_rr.c
+++ b/net/netfilter/ipvs/ip_vs_rr.c
@@ -72,7 +72,7 @@ ip_vs_rr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 		q = q->next;
 	} while (q != p);
 	write_unlock(&svc->sched_lock);
-	IP_VS_ERR_RL("RR: no destination available\n");
+	ip_vs_scheduler_err(svc, "no destination available");
 	return NULL;
 
   out:
diff --git a/net/netfilter/ipvs/ip_vs_sched.c b/net/netfilter/ipvs/ip_vs_sched.c
index 076ebe0..08dbdd5 100644
--- a/net/netfilter/ipvs/ip_vs_sched.c
+++ b/net/netfilter/ipvs/ip_vs_sched.c
@@ -29,6 +29,7 @@
 
 #include <net/ip_vs.h>
 
+EXPORT_SYMBOL(ip_vs_scheduler_err);
 /*
  *  IPVS scheduler list
  */
@@ -146,6 +147,30 @@ void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler)
 		module_put(scheduler->module);
 }
 
+/*
+ * Common error output helper for schedulers
+ */
+
+void ip_vs_scheduler_err(struct ip_vs_service *svc, const char *msg)
+{
+	if (svc->fwmark) {
+		IP_VS_ERR_RL("%s: FWM %u 0x%08X - %s\n",
+			     svc->scheduler->name, svc->fwmark,
+			     svc->fwmark, msg);
+#ifdef CONFIG_IP_VS_IPV6
+	} else if (svc->af == AF_INET6) {
+		IP_VS_ERR_RL("%s: %s [%pI6]:%d - %s\n",
+			     svc->scheduler->name,
+			     ip_vs_proto_name(svc->protocol),
+			     &svc->addr.in6, ntohs(svc->port), msg);
+#endif
+	} else {
+		IP_VS_ERR_RL("%s: %s %pI4:%d - %s\n",
+			     svc->scheduler->name,
+			     ip_vs_proto_name(svc->protocol),
+			     &svc->addr.ip, ntohs(svc->port), msg);
+	}
+}
 
 /*
  *  Register a scheduler in the scheduler list
diff --git a/net/netfilter/ipvs/ip_vs_sed.c b/net/netfilter/ipvs/ip_vs_sed.c
index 1ab75a9..89ead246 100644
--- a/net/netfilter/ipvs/ip_vs_sed.c
+++ b/net/netfilter/ipvs/ip_vs_sed.c
@@ -87,7 +87,7 @@ ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 			goto nextstage;
 		}
 	}
-	IP_VS_ERR_RL("SED: no destination available\n");
+	ip_vs_scheduler_err(svc, "no destination available");
 	return NULL;
 
 	/*
diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c
index e6cc174..b5e2556 100644
--- a/net/netfilter/ipvs/ip_vs_sh.c
+++ b/net/netfilter/ipvs/ip_vs_sh.c
@@ -223,7 +223,7 @@ ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 	    || !(dest->flags & IP_VS_DEST_F_AVAILABLE)
 	    || atomic_read(&dest->weight) <= 0
 	    || is_overloaded(dest)) {
-		IP_VS_ERR_RL("SH: no destination available\n");
+		ip_vs_scheduler_err(svc, "no destination available");
 		return NULL;
 	}
 
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index ab85aed..fecf24d 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -5,6 +5,18 @@
  *              high-performance and highly available server based on a
  *              cluster of servers.
  *
+ * Version 1,   is capable of handling both version 0 and 1 messages.
+ *              Version 0 is the plain old format.
+ *              Note Version 0 receivers will just drop Ver 1 messages.
+ *              Version 1 is capable of handle IPv6, Persistence data,
+ *              time-outs, and firewall marks.
+ *              In ver.1 "ip_vs_sync_conn_options" will be sent in netw. order.
+ *              Ver. 0 can be turned on by sysctl -w net.ipv4.vs.sync_version=0
+ *
+ * Definitions  Message: is a complete datagram
+ *              Sync_conn: is a part of a Message
+ *              Param Data is an option to a Sync_conn.
+ *
  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
  *
  * ip_vs_sync:  sync connection info from master load balancer to backups
@@ -15,6 +27,8 @@
  *	Alexandre Cassen	:	Added SyncID support for incoming sync
  *					messages filtering.
  *	Justin Ossevoort	:	Fix endian problem on sync message size.
+ *	Hans Schillstrom	:	Added Version 1: i.e. IPv6,
+ *					Persistence support, fwmark and time-out.
  */
 
 #define KMSG_COMPONENT "IPVS"
@@ -35,6 +49,8 @@
 #include <linux/wait.h>
 #include <linux/kernel.h>
 
+#include <asm/unaligned.h>		/* Used for ntoh_seq and hton_seq */
+
 #include <net/ip.h>
 #include <net/sock.h>
 
@@ -43,11 +59,13 @@
 #define IP_VS_SYNC_GROUP 0xe0000051    /* multicast addr - 224.0.0.81 */
 #define IP_VS_SYNC_PORT  8848          /* multicast port */
 
+#define SYNC_PROTO_VER  1		/* Protocol version in header */
 
 /*
  *	IPVS sync connection entry
+ *	Version 0, i.e. original version.
  */
-struct ip_vs_sync_conn {
+struct ip_vs_sync_conn_v0 {
 	__u8			reserved;
 
 	/* Protocol, addresses and port numbers */
@@ -71,41 +89,159 @@ struct ip_vs_sync_conn_options {
 	struct ip_vs_seq        out_seq;        /* outgoing seq. struct */
 };
 
+/*
+     Sync Connection format (sync_conn)
+
+       0                   1                   2                   3
+       0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |    Type       |    Protocol   | Ver.  |        Size           |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |                             Flags                             |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |            State              |         cport                 |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |            vport              |         dport                 |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |                             fwmark                            |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |                             timeout  (in sec.)                |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |                              ...                              |
+      |                        IP-Addresses  (v4 or v6)               |
+      |                              ...                              |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+  Optional Parameters.
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      | Param. Type    | Param. Length |   Param. data                |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+                               |
+      |                              ...                              |
+      |                               +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |                               | Param Type    | Param. Length |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |                           Param  data                         |
+      |         Last Param data should be padded for 32 bit alignment |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+*/
+
+/*
+ *  Type 0, IPv4 sync connection format
+ */
+struct ip_vs_sync_v4 {
+	__u8			type;
+	__u8			protocol;	/* Which protocol (TCP/UDP) */
+	__be16			ver_size;	/* Version msb 4 bits */
+	/* Flags and state transition */
+	__be32			flags;		/* status flags */
+	__be16			state;		/* state info 	*/
+	/* Protocol, addresses and port numbers */
+	__be16			cport;
+	__be16			vport;
+	__be16			dport;
+	__be32			fwmark;		/* Firewall mark from skb */
+	__be32			timeout;	/* cp timeout */
+	__be32			caddr;		/* client address */
+	__be32			vaddr;		/* virtual address */
+	__be32			daddr;		/* destination address */
+	/* The sequence options start here */
+	/* PE data padded to 32bit alignment after seq. options */
+};
+/*
+ * Type 2 messages IPv6
+ */
+struct ip_vs_sync_v6 {
+	__u8			type;
+	__u8			protocol;	/* Which protocol (TCP/UDP) */
+	__be16			ver_size;	/* Version msb 4 bits */
+	/* Flags and state transition */
+	__be32			flags;		/* status flags */
+	__be16			state;		/* state info 	*/
+	/* Protocol, addresses and port numbers */
+	__be16			cport;
+	__be16			vport;
+	__be16			dport;
+	__be32			fwmark;		/* Firewall mark from skb */
+	__be32			timeout;	/* cp timeout */
+	struct in6_addr		caddr;		/* client address */
+	struct in6_addr		vaddr;		/* virtual address */
+	struct in6_addr		daddr;		/* destination address */
+	/* The sequence options start here */
+	/* PE data padded to 32bit alignment after seq. options */
+};
+
+union ip_vs_sync_conn {
+	struct ip_vs_sync_v4	v4;
+	struct ip_vs_sync_v6	v6;
+};
+
+/* Bits in Type field in above */
+#define STYPE_INET6		0
+#define STYPE_F_INET6		(1 << STYPE_INET6)
+
+#define SVER_SHIFT		12		/* Shift to get version */
+#define SVER_MASK		0x0fff		/* Mask to strip version */
+
+#define IPVS_OPT_SEQ_DATA	1
+#define IPVS_OPT_PE_DATA	2
+#define IPVS_OPT_PE_NAME	3
+#define IPVS_OPT_PARAM		7
+
+#define IPVS_OPT_F_SEQ_DATA	(1 << (IPVS_OPT_SEQ_DATA-1))
+#define IPVS_OPT_F_PE_DATA	(1 << (IPVS_OPT_PE_DATA-1))
+#define IPVS_OPT_F_PE_NAME	(1 << (IPVS_OPT_PE_NAME-1))
+#define IPVS_OPT_F_PARAM	(1 << (IPVS_OPT_PARAM-1))
+
 struct ip_vs_sync_thread_data {
+	struct net *net;
 	struct socket *sock;
 	char *buf;
 };
 
-#define SIMPLE_CONN_SIZE  (sizeof(struct ip_vs_sync_conn))
+/* Version 0 definition of packet sizes */
+#define SIMPLE_CONN_SIZE  (sizeof(struct ip_vs_sync_conn_v0))
 #define FULL_CONN_SIZE  \
-(sizeof(struct ip_vs_sync_conn) + sizeof(struct ip_vs_sync_conn_options))
+(sizeof(struct ip_vs_sync_conn_v0) + sizeof(struct ip_vs_sync_conn_options))
 
 
 /*
-  The master mulitcasts messages to the backup load balancers in the
-  following format.
+  The master mulitcasts messages (Datagrams) to the backup load balancers
+  in the following format.
+
+ Version 1:
+  Note, first byte should be Zero, so ver 0 receivers will drop the packet.
 
        0                   1                   2                   3
        0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-      |  Count Conns  |    SyncID     |            Size               |
+      |      0        |    SyncID     |            Size               |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |  Count Conns  |    Version    |    Reserved, set to Zero      |
       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
       |                                                               |
       |                    IPVS Sync Connection (1)                   |
       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
       |                            .                                  |
-      |                            .                                  |
+      ~                            .                                  ~
       |                            .                                  |
       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
       |                                                               |
       |                    IPVS Sync Connection (n)                   |
       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+ Version 0 Header
+       0                   1                   2                   3
+       0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |  Count Conns  |    SyncID     |            Size               |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |                    IPVS Sync Connection (1)                   |
 */
 
 #define SYNC_MESG_HEADER_LEN	4
 #define MAX_CONNS_PER_SYNCBUFF	255 /* nr_conns in ip_vs_sync_mesg is 8 bit */
 
-struct ip_vs_sync_mesg {
+/* Version 0 header */
+struct ip_vs_sync_mesg_v0 {
 	__u8                    nr_conns;
 	__u8                    syncid;
 	__u16                   size;
@@ -113,9 +249,16 @@ struct ip_vs_sync_mesg {
 	/* ip_vs_sync_conn entries start here */
 };
 
-/* the maximum length of sync (sending/receiving) message */
-static int sync_send_mesg_maxlen;
-static int sync_recv_mesg_maxlen;
+/* Version 1 header */
+struct ip_vs_sync_mesg {
+	__u8			reserved;	/* must be zero */
+	__u8			syncid;
+	__u16			size;
+	__u8			nr_conns;
+	__s8			version;	/* SYNC_PROTO_VER  */
+	__u16			spare;
+	/* ip_vs_sync_conn entries start here */
+};
 
 struct ip_vs_sync_buff {
 	struct list_head        list;
@@ -127,28 +270,6 @@ struct ip_vs_sync_buff {
 	unsigned char           *end;
 };
 
-
-/* the sync_buff list head and the lock */
-static LIST_HEAD(ip_vs_sync_queue);
-static DEFINE_SPINLOCK(ip_vs_sync_lock);
-
-/* current sync_buff for accepting new conn entries */
-static struct ip_vs_sync_buff   *curr_sb = NULL;
-static DEFINE_SPINLOCK(curr_sb_lock);
-
-/* ipvs sync daemon state */
-volatile int ip_vs_sync_state = IP_VS_STATE_NONE;
-volatile int ip_vs_master_syncid = 0;
-volatile int ip_vs_backup_syncid = 0;
-
-/* multicast interface name */
-char ip_vs_master_mcast_ifn[IP_VS_IFNAME_MAXLEN];
-char ip_vs_backup_mcast_ifn[IP_VS_IFNAME_MAXLEN];
-
-/* sync daemon tasks */
-static struct task_struct *sync_master_thread;
-static struct task_struct *sync_backup_thread;
-
 /* multicast addr */
 static struct sockaddr_in mcast_addr = {
 	.sin_family		= AF_INET,
@@ -156,41 +277,71 @@ static struct sockaddr_in mcast_addr = {
 	.sin_addr.s_addr	= cpu_to_be32(IP_VS_SYNC_GROUP),
 };
 
+/*
+ * Copy of struct ip_vs_seq
+ * From unaligned network order to aligned host order
+ */
+static void ntoh_seq(struct ip_vs_seq *no, struct ip_vs_seq *ho)
+{
+	ho->init_seq       = get_unaligned_be32(&no->init_seq);
+	ho->delta          = get_unaligned_be32(&no->delta);
+	ho->previous_delta = get_unaligned_be32(&no->previous_delta);
+}
+
+/*
+ * Copy of struct ip_vs_seq
+ * From Aligned host order to unaligned network order
+ */
+static void hton_seq(struct ip_vs_seq *ho, struct ip_vs_seq *no)
+{
+	put_unaligned_be32(ho->init_seq, &no->init_seq);
+	put_unaligned_be32(ho->delta, &no->delta);
+	put_unaligned_be32(ho->previous_delta, &no->previous_delta);
+}
 
-static inline struct ip_vs_sync_buff *sb_dequeue(void)
+static inline struct ip_vs_sync_buff *sb_dequeue(struct netns_ipvs *ipvs)
 {
 	struct ip_vs_sync_buff *sb;
 
-	spin_lock_bh(&ip_vs_sync_lock);
-	if (list_empty(&ip_vs_sync_queue)) {
+	spin_lock_bh(&ipvs->sync_lock);
+	if (list_empty(&ipvs->sync_queue)) {
 		sb = NULL;
 	} else {
-		sb = list_entry(ip_vs_sync_queue.next,
+		sb = list_entry(ipvs->sync_queue.next,
 				struct ip_vs_sync_buff,
 				list);
 		list_del(&sb->list);
 	}
-	spin_unlock_bh(&ip_vs_sync_lock);
+	spin_unlock_bh(&ipvs->sync_lock);
 
 	return sb;
 }
 
-static inline struct ip_vs_sync_buff * ip_vs_sync_buff_create(void)
+/*
+ * Create a new sync buffer for Version 1 proto.
+ */
+static inline struct ip_vs_sync_buff *
+ip_vs_sync_buff_create(struct netns_ipvs *ipvs)
 {
 	struct ip_vs_sync_buff *sb;
 
 	if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC)))
 		return NULL;
 
-	if (!(sb->mesg=kmalloc(sync_send_mesg_maxlen, GFP_ATOMIC))) {
+	sb->mesg = kmalloc(ipvs->send_mesg_maxlen, GFP_ATOMIC);
+	if (!sb->mesg) {
 		kfree(sb);
 		return NULL;
 	}
+	sb->mesg->reserved = 0;  /* old nr_conns i.e. must be zeo now */
+	sb->mesg->version = SYNC_PROTO_VER;
+	sb->mesg->syncid = ipvs->master_syncid;
+	sb->mesg->size = sizeof(struct ip_vs_sync_mesg);
 	sb->mesg->nr_conns = 0;
-	sb->mesg->syncid = ip_vs_master_syncid;
-	sb->mesg->size = 4;
-	sb->head = (unsigned char *)sb->mesg + 4;
-	sb->end = (unsigned char *)sb->mesg + sync_send_mesg_maxlen;
+	sb->mesg->spare = 0;
+	sb->head = (unsigned char *)sb->mesg + sizeof(struct ip_vs_sync_mesg);
+	sb->end = (unsigned char *)sb->mesg + ipvs->send_mesg_maxlen;
+
 	sb->firstuse = jiffies;
 	return sb;
 }
@@ -201,14 +352,16 @@ static inline void ip_vs_sync_buff_release(struct ip_vs_sync_buff *sb)
 	kfree(sb);
 }
 
-static inline void sb_queue_tail(struct ip_vs_sync_buff *sb)
+static inline void sb_queue_tail(struct netns_ipvs *ipvs)
 {
-	spin_lock(&ip_vs_sync_lock);
-	if (ip_vs_sync_state & IP_VS_STATE_MASTER)
-		list_add_tail(&sb->list, &ip_vs_sync_queue);
+	struct ip_vs_sync_buff *sb = ipvs->sync_buff;
+
+	spin_lock(&ipvs->sync_lock);
+	if (ipvs->sync_state & IP_VS_STATE_MASTER)
+		list_add_tail(&sb->list, &ipvs->sync_queue);
 	else
 		ip_vs_sync_buff_release(sb);
-	spin_unlock(&ip_vs_sync_lock);
+	spin_unlock(&ipvs->sync_lock);
 }
 
 /*
@@ -216,36 +369,101 @@ static inline void sb_queue_tail(struct ip_vs_sync_buff *sb)
  *	than the specified time or the specified time is zero.
  */
 static inline struct ip_vs_sync_buff *
-get_curr_sync_buff(unsigned long time)
+get_curr_sync_buff(struct netns_ipvs *ipvs, unsigned long time)
 {
 	struct ip_vs_sync_buff *sb;
 
-	spin_lock_bh(&curr_sb_lock);
-	if (curr_sb && (time == 0 ||
-			time_before(jiffies - curr_sb->firstuse, time))) {
-		sb = curr_sb;
-		curr_sb = NULL;
+	spin_lock_bh(&ipvs->sync_buff_lock);
+	if (ipvs->sync_buff &&
+	    time_after_eq(jiffies - ipvs->sync_buff->firstuse, time)) {
+		sb = ipvs->sync_buff;
+		ipvs->sync_buff = NULL;
 	} else
 		sb = NULL;
-	spin_unlock_bh(&curr_sb_lock);
+	spin_unlock_bh(&ipvs->sync_buff_lock);
 	return sb;
 }
 
+/*
+ * Switch mode from sending version 0 or 1
+ *  - must handle sync_buf
+ */
+void ip_vs_sync_switch_mode(struct net *net, int mode)
+{
+	struct netns_ipvs *ipvs = net_ipvs(net);
+
+	if (!(ipvs->sync_state & IP_VS_STATE_MASTER))
+		return;
+	if (mode == ipvs->sysctl_sync_ver || !ipvs->sync_buff)
+		return;
+
+	spin_lock_bh(&ipvs->sync_buff_lock);
+	/* Buffer empty ? then let buf_create do the job  */
+	if (ipvs->sync_buff->mesg->size <=  sizeof(struct ip_vs_sync_mesg)) {
+		kfree(ipvs->sync_buff);
+		ipvs->sync_buff = NULL;
+	} else {
+		spin_lock_bh(&ipvs->sync_lock);
+		if (ipvs->sync_state & IP_VS_STATE_MASTER)
+			list_add_tail(&ipvs->sync_buff->list,
+				      &ipvs->sync_queue);
+		else
+			ip_vs_sync_buff_release(ipvs->sync_buff);
+		spin_unlock_bh(&ipvs->sync_lock);
+	}
+	spin_unlock_bh(&ipvs->sync_buff_lock);
+}
 
 /*
+ * Create a new sync buffer for Version 0 proto.
+ */
+static inline struct ip_vs_sync_buff *
+ip_vs_sync_buff_create_v0(struct netns_ipvs *ipvs)
+{
+	struct ip_vs_sync_buff *sb;
+	struct ip_vs_sync_mesg_v0 *mesg;
+
+	if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC)))
+		return NULL;
+
+	sb->mesg = kmalloc(ipvs->send_mesg_maxlen, GFP_ATOMIC);
+	if (!sb->mesg) {
+		kfree(sb);
+		return NULL;
+	}
+	mesg = (struct ip_vs_sync_mesg_v0 *)sb->mesg;
+	mesg->nr_conns = 0;
+	mesg->syncid = ipvs->master_syncid;
+	mesg->size = sizeof(struct ip_vs_sync_mesg_v0);
+	sb->head = (unsigned char *)mesg + sizeof(struct ip_vs_sync_mesg_v0);
+	sb->end = (unsigned char *)mesg + ipvs->send_mesg_maxlen;
+	sb->firstuse = jiffies;
+	return sb;
+}
+
+/*
+ *      Version 0 , could be switched in by sys_ctl.
  *      Add an ip_vs_conn information into the current sync_buff.
- *      Called by ip_vs_in.
  */
-void ip_vs_sync_conn(struct ip_vs_conn *cp)
+void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp)
 {
-	struct ip_vs_sync_mesg *m;
-	struct ip_vs_sync_conn *s;
+	struct netns_ipvs *ipvs = net_ipvs(net);
+	struct ip_vs_sync_mesg_v0 *m;
+	struct ip_vs_sync_conn_v0 *s;
 	int len;
 
-	spin_lock(&curr_sb_lock);
-	if (!curr_sb) {
-		if (!(curr_sb=ip_vs_sync_buff_create())) {
-			spin_unlock(&curr_sb_lock);
+	if (unlikely(cp->af != AF_INET))
+		return;
+	/* Do not sync ONE PACKET */
+	if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
+		return;
+
+	spin_lock(&ipvs->sync_buff_lock);
+	if (!ipvs->sync_buff) {
+		ipvs->sync_buff =
+			ip_vs_sync_buff_create_v0(ipvs);
+		if (!ipvs->sync_buff) {
+			spin_unlock(&ipvs->sync_buff_lock);
 			pr_err("ip_vs_sync_buff_create failed.\n");
 			return;
 		}
@@ -253,10 +471,11 @@ void ip_vs_sync_conn(struct ip_vs_conn *cp)
 
 	len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE :
 		SIMPLE_CONN_SIZE;
-	m = curr_sb->mesg;
-	s = (struct ip_vs_sync_conn *)curr_sb->head;
+	m = (struct ip_vs_sync_mesg_v0 *)ipvs->sync_buff->mesg;
+	s = (struct ip_vs_sync_conn_v0 *)ipvs->sync_buff->head;
 
 	/* copy members */
+	s->reserved = 0;
 	s->protocol = cp->protocol;
 	s->cport = cp->cport;
 	s->vport = cp->vport;
@@ -274,83 +493,366 @@ void ip_vs_sync_conn(struct ip_vs_conn *cp)
 
 	m->nr_conns++;
 	m->size += len;
-	curr_sb->head += len;
+	ipvs->sync_buff->head += len;
 
 	/* check if there is a space for next one */
-	if (curr_sb->head+FULL_CONN_SIZE > curr_sb->end) {
-		sb_queue_tail(curr_sb);
-		curr_sb = NULL;
+	if (ipvs->sync_buff->head + FULL_CONN_SIZE > ipvs->sync_buff->end) {
+		sb_queue_tail(ipvs);
+		ipvs->sync_buff = NULL;
 	}
-	spin_unlock(&curr_sb_lock);
+	spin_unlock(&ipvs->sync_buff_lock);
 
 	/* synchronize its controller if it has */
 	if (cp->control)
-		ip_vs_sync_conn(cp->control);
+		ip_vs_sync_conn(net, cp->control);
+}
+
+/*
+ *      Add an ip_vs_conn information into the current sync_buff.
+ *      Called by ip_vs_in.
+ *      Sending Version 1 messages
+ */
+void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp)
+{
+	struct netns_ipvs *ipvs = net_ipvs(net);
+	struct ip_vs_sync_mesg *m;
+	union ip_vs_sync_conn *s;
+	__u8 *p;
+	unsigned int len, pe_name_len, pad;
+
+	/* Handle old version of the protocol */
+	if (ipvs->sysctl_sync_ver == 0) {
+		ip_vs_sync_conn_v0(net, cp);
+		return;
+	}
+	/* Do not sync ONE PACKET */
+	if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
+		goto control;
+sloop:
+	/* Sanity checks */
+	pe_name_len = 0;
+	if (cp->pe_data_len) {
+		if (!cp->pe_data || !cp->dest) {
+			IP_VS_ERR_RL("SYNC, connection pe_data invalid\n");
+			return;
+		}
+		pe_name_len = strnlen(cp->pe->name, IP_VS_PENAME_MAXLEN);
+	}
+
+	spin_lock(&ipvs->sync_buff_lock);
+
+#ifdef CONFIG_IP_VS_IPV6
+	if (cp->af == AF_INET6)
+		len = sizeof(struct ip_vs_sync_v6);
+	else
+#endif
+		len = sizeof(struct ip_vs_sync_v4);
+
+	if (cp->flags & IP_VS_CONN_F_SEQ_MASK)
+		len += sizeof(struct ip_vs_sync_conn_options) + 2;
+
+	if (cp->pe_data_len)
+		len += cp->pe_data_len + 2;	/* + Param hdr field */
+	if (pe_name_len)
+		len += pe_name_len + 2;
+
+	/* check if there is a space for this one  */
+	pad = 0;
+	if (ipvs->sync_buff) {
+		pad = (4 - (size_t)ipvs->sync_buff->head) & 3;
+		if (ipvs->sync_buff->head + len + pad > ipvs->sync_buff->end) {
+			sb_queue_tail(ipvs);
+			ipvs->sync_buff = NULL;
+			pad = 0;
+		}
+	}
+
+	if (!ipvs->sync_buff) {
+		ipvs->sync_buff = ip_vs_sync_buff_create(ipvs);
+		if (!ipvs->sync_buff) {
+			spin_unlock(&ipvs->sync_buff_lock);
+			pr_err("ip_vs_sync_buff_create failed.\n");
+			return;
+		}
+	}
+
+	m = ipvs->sync_buff->mesg;
+	p = ipvs->sync_buff->head;
+	ipvs->sync_buff->head += pad + len;
+	m->size += pad + len;
+	/* Add ev. padding from prev. sync_conn */
+	while (pad--)
+		*(p++) = 0;
+
+	s = (union ip_vs_sync_conn *)p;
+
+	/* Set message type  & copy members */
+	s->v4.type = (cp->af == AF_INET6 ? STYPE_F_INET6 : 0);
+	s->v4.ver_size = htons(len & SVER_MASK);	/* Version 0 */
+	s->v4.flags = htonl(cp->flags & ~IP_VS_CONN_F_HASHED);
+	s->v4.state = htons(cp->state);
+	s->v4.protocol = cp->protocol;
+	s->v4.cport = cp->cport;
+	s->v4.vport = cp->vport;
+	s->v4.dport = cp->dport;
+	s->v4.fwmark = htonl(cp->fwmark);
+	s->v4.timeout = htonl(cp->timeout / HZ);
+	m->nr_conns++;
+
+#ifdef CONFIG_IP_VS_IPV6
+	if (cp->af == AF_INET6) {
+		p += sizeof(struct ip_vs_sync_v6);
+		ipv6_addr_copy(&s->v6.caddr, &cp->caddr.in6);
+		ipv6_addr_copy(&s->v6.vaddr, &cp->vaddr.in6);
+		ipv6_addr_copy(&s->v6.daddr, &cp->daddr.in6);
+	} else
+#endif
+	{
+		p += sizeof(struct ip_vs_sync_v4);	/* options ptr */
+		s->v4.caddr = cp->caddr.ip;
+		s->v4.vaddr = cp->vaddr.ip;
+		s->v4.daddr = cp->daddr.ip;
+	}
+	if (cp->flags & IP_VS_CONN_F_SEQ_MASK) {
+		*(p++) = IPVS_OPT_SEQ_DATA;
+		*(p++) = sizeof(struct ip_vs_sync_conn_options);
+		hton_seq((struct ip_vs_seq *)p, &cp->in_seq);
+		p += sizeof(struct ip_vs_seq);
+		hton_seq((struct ip_vs_seq *)p, &cp->out_seq);
+		p += sizeof(struct ip_vs_seq);
+	}
+	/* Handle pe data */
+	if (cp->pe_data_len && cp->pe_data) {
+		*(p++) = IPVS_OPT_PE_DATA;
+		*(p++) = cp->pe_data_len;
+		memcpy(p, cp->pe_data, cp->pe_data_len);
+		p += cp->pe_data_len;
+		if (pe_name_len) {
+			/* Add PE_NAME */
+			*(p++) = IPVS_OPT_PE_NAME;
+			*(p++) = pe_name_len;
+			memcpy(p, cp->pe->name, pe_name_len);
+			p += pe_name_len;
+		}
+	}
+
+	spin_unlock(&ipvs->sync_buff_lock);
+
+control:
+	/* synchronize its controller if it has */
+	cp = cp->control;
+	if (!cp)
+		return;
+	/*
+	 * Reduce sync rate for templates
+	 * i.e only increment in_pkts for Templates.
+	 */
+	if (cp->flags & IP_VS_CONN_F_TEMPLATE) {
+		int pkts = atomic_add_return(1, &cp->in_pkts);
+
+		if (pkts % ipvs->sysctl_sync_threshold[1] != 1)
+			return;
+	}
+	goto sloop;
 }
 
+/*
+ *  fill_param used by version 1
+ */
 static inline int
-ip_vs_conn_fill_param_sync(int af, int protocol,
-			   const union nf_inet_addr *caddr, __be16 cport,
-			   const union nf_inet_addr *vaddr, __be16 vport,
-			   struct ip_vs_conn_param *p)
+ip_vs_conn_fill_param_sync(struct net *net, int af, union ip_vs_sync_conn *sc,
+			   struct ip_vs_conn_param *p,
+			   __u8 *pe_data, unsigned int pe_data_len,
+			   __u8 *pe_name, unsigned int pe_name_len)
 {
-	/* XXX: Need to take into account persistence engine */
-	ip_vs_conn_fill_param(af, protocol, caddr, cport, vaddr, vport, p);
+#ifdef CONFIG_IP_VS_IPV6
+	if (af == AF_INET6)
+		ip_vs_conn_fill_param(net, af, sc->v6.protocol,
+				      (const union nf_inet_addr *)&sc->v6.caddr,
+				      sc->v6.cport,
+				      (const union nf_inet_addr *)&sc->v6.vaddr,
+				      sc->v6.vport, p);
+	else
+#endif
+		ip_vs_conn_fill_param(net, af, sc->v4.protocol,
+				      (const union nf_inet_addr *)&sc->v4.caddr,
+				      sc->v4.cport,
+				      (const union nf_inet_addr *)&sc->v4.vaddr,
+				      sc->v4.vport, p);
+	/* Handle pe data */
+	if (pe_data_len) {
+		if (pe_name_len) {
+			char buff[IP_VS_PENAME_MAXLEN+1];
+
+			memcpy(buff, pe_name, pe_name_len);
+			buff[pe_name_len]=0;
+			p->pe = __ip_vs_pe_getbyname(buff);
+			if (!p->pe) {
+				IP_VS_DBG(3, "BACKUP, no %s engine found/loaded\n",
+					     buff);
+				return 1;
+			}
+		} else {
+			IP_VS_ERR_RL("BACKUP, Invalid PE parameters\n");
+			return 1;
+		}
+
+		p->pe_data = kmalloc(pe_data_len, GFP_ATOMIC);
+		if (!p->pe_data) {
+			if (p->pe->module)
+				module_put(p->pe->module);
+			return -ENOMEM;
+		}
+		memcpy(p->pe_data, pe_data, pe_data_len);
+		p->pe_data_len = pe_data_len;
+	}
 	return 0;
 }
 
 /*
- *      Process received multicast message and create the corresponding
- *      ip_vs_conn entries.
+ *  Connection Add / Update.
+ *  Common for version 0 and 1 reception of backup sync_conns.
+ *  Param: ...
+ *         timeout is in sec.
  */
-static void ip_vs_process_message(const char *buffer, const size_t buflen)
+static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
+			    unsigned int flags, unsigned int state,
+			    unsigned int protocol, unsigned int type,
+			    const union nf_inet_addr *daddr, __be16 dport,
+			    unsigned long timeout, __u32 fwmark,
+			    struct ip_vs_sync_conn_options *opt)
 {
-	struct ip_vs_sync_mesg *m = (struct ip_vs_sync_mesg *)buffer;
-	struct ip_vs_sync_conn *s;
-	struct ip_vs_sync_conn_options *opt;
-	struct ip_vs_conn *cp;
-	struct ip_vs_protocol *pp;
 	struct ip_vs_dest *dest;
-	struct ip_vs_conn_param param;
-	char *p;
-	int i;
+	struct ip_vs_conn *cp;
+	struct netns_ipvs *ipvs = net_ipvs(net);
 
-	if (buflen < sizeof(struct ip_vs_sync_mesg)) {
-		IP_VS_ERR_RL("sync message header too short\n");
-		return;
-	}
+	if (!(flags & IP_VS_CONN_F_TEMPLATE))
+		cp = ip_vs_conn_in_get(param);
+	else
+		cp = ip_vs_ct_in_get(param);
 
-	/* Convert size back to host byte order */
-	m->size = ntohs(m->size);
+	if (cp && param->pe_data) 	/* Free pe_data */
+		kfree(param->pe_data);
+	if (!cp) {
+		/*
+		 * Find the appropriate destination for the connection.
+		 * If it is not found the connection will remain unbound
+		 * but still handled.
+		 */
+		dest = ip_vs_find_dest(net, type, daddr, dport, param->vaddr,
+				       param->vport, protocol, fwmark);
 
-	if (buflen != m->size) {
-		IP_VS_ERR_RL("bogus sync message size\n");
-		return;
+		/*  Set the approprite ativity flag */
+		if (protocol == IPPROTO_TCP) {
+			if (state != IP_VS_TCP_S_ESTABLISHED)
+				flags |= IP_VS_CONN_F_INACTIVE;
+			else
+				flags &= ~IP_VS_CONN_F_INACTIVE;
+		} else if (protocol == IPPROTO_SCTP) {
+			if (state != IP_VS_SCTP_S_ESTABLISHED)
+				flags |= IP_VS_CONN_F_INACTIVE;
+			else
+				flags &= ~IP_VS_CONN_F_INACTIVE;
+		}
+		cp = ip_vs_conn_new(param, daddr, dport, flags, dest, fwmark);
+		if (dest)
+			atomic_dec(&dest->refcnt);
+		if (!cp) {
+			if (param->pe_data)
+				kfree(param->pe_data);
+			IP_VS_DBG(2, "BACKUP, add new conn. failed\n");
+			return;
+		}
+	} else if (!cp->dest) {
+		dest = ip_vs_try_bind_dest(cp);
+		if (dest)
+			atomic_dec(&dest->refcnt);
+	} else if ((cp->dest) && (cp->protocol == IPPROTO_TCP) &&
+		(cp->state != state)) {
+		/* update active/inactive flag for the connection */
+		dest = cp->dest;
+		if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
+			(state != IP_VS_TCP_S_ESTABLISHED)) {
+			atomic_dec(&dest->activeconns);
+			atomic_inc(&dest->inactconns);
+			cp->flags |= IP_VS_CONN_F_INACTIVE;
+		} else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&
+			(state == IP_VS_TCP_S_ESTABLISHED)) {
+			atomic_inc(&dest->activeconns);
+			atomic_dec(&dest->inactconns);
+			cp->flags &= ~IP_VS_CONN_F_INACTIVE;
+		}
+	} else if ((cp->dest) && (cp->protocol == IPPROTO_SCTP) &&
+		(cp->state != state)) {
+		dest = cp->dest;
+		if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
+		(state != IP_VS_SCTP_S_ESTABLISHED)) {
+			atomic_dec(&dest->activeconns);
+			atomic_inc(&dest->inactconns);
+			cp->flags &= ~IP_VS_CONN_F_INACTIVE;
+		}
 	}
 
-	/* SyncID sanity check */
-	if (ip_vs_backup_syncid != 0 && m->syncid != ip_vs_backup_syncid) {
-		IP_VS_DBG(7, "Ignoring incoming msg with syncid = %d\n",
-			  m->syncid);
-		return;
+	if (opt)
+		memcpy(&cp->in_seq, opt, sizeof(*opt));
+	atomic_set(&cp->in_pkts, ipvs->sysctl_sync_threshold[0]);
+	cp->state = state;
+	cp->old_state = cp->state;
+	/*
+	 * For Ver 0 messages style
+	 *  - Not possible to recover the right timeout for templates
+	 *  - can not find the right fwmark
+	 *    virtual service. If needed, we can do it for
+	 *    non-fwmark persistent services.
+	 * Ver 1 messages style.
+	 *  - No problem.
+	 */
+	if (timeout) {
+		if (timeout > MAX_SCHEDULE_TIMEOUT / HZ)
+			timeout = MAX_SCHEDULE_TIMEOUT / HZ;
+		cp->timeout = timeout*HZ;
+	} else {
+		struct ip_vs_proto_data *pd;
+
+		pd = ip_vs_proto_data_get(net, protocol);
+		if (!(flags & IP_VS_CONN_F_TEMPLATE) && pd && pd->timeout_table)
+			cp->timeout = pd->timeout_table[state];
+		else
+			cp->timeout = (3*60*HZ);
 	}
+	ip_vs_conn_put(cp);
+}
 
-	p = (char *)buffer + sizeof(struct ip_vs_sync_mesg);
+/*
+ *  Process received multicast message for Version 0
+ */
+static void ip_vs_process_message_v0(struct net *net, const char *buffer,
+				     const size_t buflen)
+{
+	struct ip_vs_sync_mesg_v0 *m = (struct ip_vs_sync_mesg_v0 *)buffer;
+	struct ip_vs_sync_conn_v0 *s;
+	struct ip_vs_sync_conn_options *opt;
+	struct ip_vs_protocol *pp;
+	struct ip_vs_conn_param param;
+	char *p;
+	int i;
+
+	p = (char *)buffer + sizeof(struct ip_vs_sync_mesg_v0);
 	for (i=0; i<m->nr_conns; i++) {
 		unsigned flags, state;
 
 		if (p + SIMPLE_CONN_SIZE > buffer+buflen) {
-			IP_VS_ERR_RL("bogus conn in sync message\n");
+			IP_VS_ERR_RL("BACKUP v0, bogus conn\n");
 			return;
 		}
-		s = (struct ip_vs_sync_conn *) p;
+		s = (struct ip_vs_sync_conn_v0 *) p;
 		flags = ntohs(s->flags) | IP_VS_CONN_F_SYNC;
 		flags &= ~IP_VS_CONN_F_HASHED;
 		if (flags & IP_VS_CONN_F_SEQ_MASK) {
 			opt = (struct ip_vs_sync_conn_options *)&s[1];
 			p += FULL_CONN_SIZE;
 			if (p > buffer+buflen) {
-				IP_VS_ERR_RL("bogus conn options in sync message\n");
+				IP_VS_ERR_RL("BACKUP v0, Dropping buffer bogus conn options\n");
 				return;
 			}
 		} else {
@@ -362,118 +864,286 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
 		if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
 			pp = ip_vs_proto_get(s->protocol);
 			if (!pp) {
-				IP_VS_ERR_RL("Unsupported protocol %u in sync msg\n",
+				IP_VS_DBG(2, "BACKUP v0, Unsupported protocol %u\n",
 					s->protocol);
 				continue;
 			}
 			if (state >= pp->num_states) {
-				IP_VS_DBG(2, "Invalid %s state %u in sync msg\n",
+				IP_VS_DBG(2, "BACKUP v0, Invalid %s state %u\n",
 					pp->name, state);
 				continue;
 			}
 		} else {
 			/* protocol in templates is not used for state/timeout */
-			pp = NULL;
 			if (state > 0) {
-				IP_VS_DBG(2, "Invalid template state %u in sync msg\n",
+				IP_VS_DBG(2, "BACKUP v0, Invalid template state %u\n",
 					state);
 				state = 0;
 			}
 		}
 
-		{
-			if (ip_vs_conn_fill_param_sync(AF_INET, s->protocol,
-					      (union nf_inet_addr *)&s->caddr,
-					      s->cport,
-					      (union nf_inet_addr *)&s->vaddr,
-					      s->vport, &param)) {
-				pr_err("ip_vs_conn_fill_param_sync failed");
-				return;
+		ip_vs_conn_fill_param(net, AF_INET, s->protocol,
+				      (const union nf_inet_addr *)&s->caddr,
+				      s->cport,
+				      (const union nf_inet_addr *)&s->vaddr,
+				      s->vport, &param);
+
+		/* Send timeout as Zero */
+		ip_vs_proc_conn(net, &param, flags, state, s->protocol, AF_INET,
+				(union nf_inet_addr *)&s->daddr, s->dport,
+				0, 0, opt);
+	}
+}
+
+/*
+ * Handle options
+ */
+static inline int ip_vs_proc_seqopt(__u8 *p, unsigned int plen,
+				    __u32 *opt_flags,
+				    struct ip_vs_sync_conn_options *opt)
+{
+	struct ip_vs_sync_conn_options *topt;
+
+	topt = (struct ip_vs_sync_conn_options *)p;
+
+	if (plen != sizeof(struct ip_vs_sync_conn_options)) {
+		IP_VS_DBG(2, "BACKUP, bogus conn options length\n");
+		return -EINVAL;
+	}
+	if (*opt_flags & IPVS_OPT_F_SEQ_DATA) {
+		IP_VS_DBG(2, "BACKUP, conn options found twice\n");
+		return -EINVAL;
+	}
+	ntoh_seq(&topt->in_seq, &opt->in_seq);
+	ntoh_seq(&topt->out_seq, &opt->out_seq);
+	*opt_flags |= IPVS_OPT_F_SEQ_DATA;
+	return 0;
+}
+
+static int ip_vs_proc_str(__u8 *p, unsigned int plen, unsigned int *data_len,
+			  __u8 **data, unsigned int maxlen,
+			  __u32 *opt_flags, __u32 flag)
+{
+	if (plen > maxlen) {
+		IP_VS_DBG(2, "BACKUP, bogus par.data len > %d\n", maxlen);
+		return -EINVAL;
+	}
+	if (*opt_flags & flag) {
+		IP_VS_DBG(2, "BACKUP, Par.data found twice 0x%x\n", flag);
+		return -EINVAL;
+	}
+	*data_len = plen;
+	*data = p;
+	*opt_flags |= flag;
+	return 0;
+}
+/*
+ *   Process a Version 1 sync. connection
+ */
+static inline int ip_vs_proc_sync_conn(struct net *net, __u8 *p, __u8 *msg_end)
+{
+	struct ip_vs_sync_conn_options opt;
+	union  ip_vs_sync_conn *s;
+	struct ip_vs_protocol *pp;
+	struct ip_vs_conn_param param;
+	__u32 flags;
+	unsigned int af, state, pe_data_len=0, pe_name_len=0;
+	__u8 *pe_data=NULL, *pe_name=NULL;
+	__u32 opt_flags=0;
+	int retc=0;
+
+	s = (union ip_vs_sync_conn *) p;
+
+	if (s->v6.type & STYPE_F_INET6) {
+#ifdef CONFIG_IP_VS_IPV6
+		af = AF_INET6;
+		p += sizeof(struct ip_vs_sync_v6);
+#else
+		IP_VS_DBG(3,"BACKUP, IPv6 msg received, and IPVS is not compiled for IPv6\n");
+		retc = 10;
+		goto out;
+#endif
+	} else if (!s->v4.type) {
+		af = AF_INET;
+		p += sizeof(struct ip_vs_sync_v4);
+	} else {
+		return -10;
+	}
+	if (p > msg_end)
+		return -20;
+
+	/* Process optional params check Type & Len. */
+	while (p < msg_end) {
+		int ptype;
+		int plen;
+
+		if (p+2 > msg_end)
+			return -30;
+		ptype = *(p++);
+		plen  = *(p++);
+
+		if (!plen || ((p + plen) > msg_end))
+			return -40;
+		/* Handle seq option  p = param data */
+		switch (ptype & ~IPVS_OPT_F_PARAM) {
+		case IPVS_OPT_SEQ_DATA:
+			if (ip_vs_proc_seqopt(p, plen, &opt_flags, &opt))
+				return -50;
+			break;
+
+		case IPVS_OPT_PE_DATA:
+			if (ip_vs_proc_str(p, plen, &pe_data_len, &pe_data,
+					   IP_VS_PEDATA_MAXLEN, &opt_flags,
+					   IPVS_OPT_F_PE_DATA))
+				return -60;
+			break;
+
+		case IPVS_OPT_PE_NAME:
+			if (ip_vs_proc_str(p, plen,&pe_name_len, &pe_name,
+					   IP_VS_PENAME_MAXLEN, &opt_flags,
+					   IPVS_OPT_F_PE_NAME))
+				return -70;
+			break;
+
+		default:
+			/* Param data mandatory ? */
+			if (!(ptype & IPVS_OPT_F_PARAM)) {
+				IP_VS_DBG(3, "BACKUP, Unknown mandatory param %d found\n",
+					  ptype & ~IPVS_OPT_F_PARAM);
+				retc = 20;
+				goto out;
 			}
-			if (!(flags & IP_VS_CONN_F_TEMPLATE))
-				cp = ip_vs_conn_in_get(&param);
-			else
-				cp = ip_vs_ct_in_get(&param);
 		}
-		if (!cp) {
-			/*
-			 * Find the appropriate destination for the connection.
-			 * If it is not found the connection will remain unbound
-			 * but still handled.
-			 */
-			dest = ip_vs_find_dest(AF_INET,
-					       (union nf_inet_addr *)&s->daddr,
-					       s->dport,
-					       (union nf_inet_addr *)&s->vaddr,
-					       s->vport,
-					       s->protocol);
-			/*  Set the approprite ativity flag */
-			if (s->protocol == IPPROTO_TCP) {
-				if (state != IP_VS_TCP_S_ESTABLISHED)
-					flags |= IP_VS_CONN_F_INACTIVE;
-				else
-					flags &= ~IP_VS_CONN_F_INACTIVE;
-			} else if (s->protocol == IPPROTO_SCTP) {
-				if (state != IP_VS_SCTP_S_ESTABLISHED)
-					flags |= IP_VS_CONN_F_INACTIVE;
-				else
-					flags &= ~IP_VS_CONN_F_INACTIVE;
+		p += plen;  /* Next option */
+	}
+
+	/* Get flags and Mask off unsupported */
+	flags  = ntohl(s->v4.flags) & IP_VS_CONN_F_BACKUP_MASK;
+	flags |= IP_VS_CONN_F_SYNC;
+	state = ntohs(s->v4.state);
+
+	if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
+		pp = ip_vs_proto_get(s->v4.protocol);
+		if (!pp) {
+			IP_VS_DBG(3,"BACKUP, Unsupported protocol %u\n",
+				s->v4.protocol);
+			retc = 30;
+			goto out;
+		}
+		if (state >= pp->num_states) {
+			IP_VS_DBG(3, "BACKUP, Invalid %s state %u\n",
+				pp->name, state);
+			retc = 40;
+			goto out;
+		}
+	} else {
+		/* protocol in templates is not used for state/timeout */
+		if (state > 0) {
+			IP_VS_DBG(3, "BACKUP, Invalid template state %u\n",
+				state);
+			state = 0;
+		}
+	}
+	if (ip_vs_conn_fill_param_sync(net, af, s, &param, pe_data,
+				       pe_data_len, pe_name, pe_name_len)) {
+		retc = 50;
+		goto out;
+	}
+	/* If only IPv4, just silent skip IPv6 */
+	if (af == AF_INET)
+		ip_vs_proc_conn(net, &param, flags, state, s->v4.protocol, af,
+				(union nf_inet_addr *)&s->v4.daddr, s->v4.dport,
+				ntohl(s->v4.timeout), ntohl(s->v4.fwmark),
+				(opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL)
+				);
+#ifdef CONFIG_IP_VS_IPV6
+	else
+		ip_vs_proc_conn(net, &param, flags, state, s->v6.protocol, af,
+				(union nf_inet_addr *)&s->v6.daddr, s->v6.dport,
+				ntohl(s->v6.timeout), ntohl(s->v6.fwmark),
+				(opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL)
+				);
+#endif
+	return 0;
+	/* Error exit */
+out:
+	IP_VS_DBG(2, "BACKUP, Single msg dropped err:%d\n", retc);
+	return retc;
+
+}
+/*
+ *      Process received multicast message and create the corresponding
+ *      ip_vs_conn entries.
+ *      Handles Version 0 & 1
+ */
+static void ip_vs_process_message(struct net *net, __u8 *buffer,
+				  const size_t buflen)
+{
+	struct netns_ipvs *ipvs = net_ipvs(net);
+	struct ip_vs_sync_mesg *m2 = (struct ip_vs_sync_mesg *)buffer;
+	__u8 *p, *msg_end;
+	int i, nr_conns;
+
+	if (buflen < sizeof(struct ip_vs_sync_mesg_v0)) {
+		IP_VS_DBG(2, "BACKUP, message header too short\n");
+		return;
+	}
+	/* Convert size back to host byte order */
+	m2->size = ntohs(m2->size);
+
+	if (buflen != m2->size) {
+		IP_VS_DBG(2, "BACKUP, bogus message size\n");
+		return;
+	}
+	/* SyncID sanity check */
+	if (ipvs->backup_syncid != 0 && m2->syncid != ipvs->backup_syncid) {
+		IP_VS_DBG(7, "BACKUP, Ignoring syncid = %d\n", m2->syncid);
+		return;
+	}
+	/* Handle version 1  message */
+	if ((m2->version == SYNC_PROTO_VER) && (m2->reserved == 0)
+	    && (m2->spare == 0)) {
+
+		msg_end = buffer + sizeof(struct ip_vs_sync_mesg);
+		nr_conns = m2->nr_conns;
+
+		for (i=0; i<nr_conns; i++) {
+			union ip_vs_sync_conn *s;
+			unsigned size;
+			int retc;
+
+			p = msg_end;
+			if (p + sizeof(s->v4) > buffer+buflen) {
+				IP_VS_ERR_RL("BACKUP, Dropping buffer, to small\n");
+				return;
 			}
-			cp = ip_vs_conn_new(&param,
-					    (union nf_inet_addr *)&s->daddr,
-					    s->dport, flags, dest);
-			if (dest)
-				atomic_dec(&dest->refcnt);
-			if (!cp) {
-				pr_err("ip_vs_conn_new failed\n");
+			s = (union ip_vs_sync_conn *)p;
+			size = ntohs(s->v4.ver_size) & SVER_MASK;
+			msg_end = p + size;
+			/* Basic sanity checks */
+			if (msg_end  > buffer+buflen) {
+				IP_VS_ERR_RL("BACKUP, Dropping buffer, msg > buffer\n");
 				return;
 			}
-		} else if (!cp->dest) {
-			dest = ip_vs_try_bind_dest(cp);
-			if (dest)
-				atomic_dec(&dest->refcnt);
-		} else if ((cp->dest) && (cp->protocol == IPPROTO_TCP) &&
-			   (cp->state != state)) {
-			/* update active/inactive flag for the connection */
-			dest = cp->dest;
-			if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
-				(state != IP_VS_TCP_S_ESTABLISHED)) {
-				atomic_dec(&dest->activeconns);
-				atomic_inc(&dest->inactconns);
-				cp->flags |= IP_VS_CONN_F_INACTIVE;
-			} else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&
-				(state == IP_VS_TCP_S_ESTABLISHED)) {
-				atomic_inc(&dest->activeconns);
-				atomic_dec(&dest->inactconns);
-				cp->flags &= ~IP_VS_CONN_F_INACTIVE;
+			if (ntohs(s->v4.ver_size) >> SVER_SHIFT) {
+				IP_VS_ERR_RL("BACKUP, Dropping buffer, Unknown version %d\n",
+					      ntohs(s->v4.ver_size) >> SVER_SHIFT);
+				return;
 			}
-		} else if ((cp->dest) && (cp->protocol == IPPROTO_SCTP) &&
-			   (cp->state != state)) {
-			dest = cp->dest;
-			if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
-			     (state != IP_VS_SCTP_S_ESTABLISHED)) {
-			    atomic_dec(&dest->activeconns);
-			    atomic_inc(&dest->inactconns);
-			    cp->flags &= ~IP_VS_CONN_F_INACTIVE;
+			/* Process a single sync_conn */
+			retc = ip_vs_proc_sync_conn(net, p, msg_end);
+			if (retc < 0) {
+				IP_VS_ERR_RL("BACKUP, Dropping buffer, Err: %d in decoding\n",
+					     retc);
+				return;
 			}
+			/* Make sure we have 32 bit alignment */
+			msg_end = p + ((size + 3) & ~3);
 		}
-
-		if (opt)
-			memcpy(&cp->in_seq, opt, sizeof(*opt));
-		atomic_set(&cp->in_pkts, sysctl_ip_vs_sync_threshold[0]);
-		cp->state = state;
-		cp->old_state = cp->state;
-		/*
-		 * We can not recover the right timeout for templates
-		 * in all cases, we can not find the right fwmark
-		 * virtual service. If needed, we can do it for
-		 * non-fwmark persistent services.
-		 */
-		if (!(flags & IP_VS_CONN_F_TEMPLATE) && pp->timeout_table)
-			cp->timeout = pp->timeout_table[state];
-		else
-			cp->timeout = (3*60*HZ);
-		ip_vs_conn_put(cp);
+	} else {
+		/* Old type of message */
+		ip_vs_process_message_v0(net, buffer, buflen);
+		return;
 	}
 }
 
@@ -511,8 +1181,10 @@ static int set_mcast_if(struct sock *sk, char *ifname)
 {
 	struct net_device *dev;
 	struct inet_sock *inet = inet_sk(sk);
+	struct net *net = sock_net(sk);
 
-	if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL)
+	dev = __dev_get_by_name(net, ifname);
+	if (!dev)
 		return -ENODEV;
 
 	if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
@@ -531,30 +1203,33 @@ static int set_mcast_if(struct sock *sk, char *ifname)
  *	Set the maximum length of sync message according to the
  *	specified interface's MTU.
  */
-static int set_sync_mesg_maxlen(int sync_state)
+static int set_sync_mesg_maxlen(struct net *net, int sync_state)
 {
+	struct netns_ipvs *ipvs = net_ipvs(net);
 	struct net_device *dev;
 	int num;
 
 	if (sync_state == IP_VS_STATE_MASTER) {
-		if ((dev = __dev_get_by_name(&init_net, ip_vs_master_mcast_ifn)) == NULL)
+		dev = __dev_get_by_name(net, ipvs->master_mcast_ifn);
+		if (!dev)
 			return -ENODEV;
 
 		num = (dev->mtu - sizeof(struct iphdr) -
 		       sizeof(struct udphdr) -
 		       SYNC_MESG_HEADER_LEN - 20) / SIMPLE_CONN_SIZE;
-		sync_send_mesg_maxlen = SYNC_MESG_HEADER_LEN +
+		ipvs->send_mesg_maxlen = SYNC_MESG_HEADER_LEN +
 			SIMPLE_CONN_SIZE * min(num, MAX_CONNS_PER_SYNCBUFF);
 		IP_VS_DBG(7, "setting the maximum length of sync sending "
-			  "message %d.\n", sync_send_mesg_maxlen);
+			  "message %d.\n", ipvs->send_mesg_maxlen);
 	} else if (sync_state == IP_VS_STATE_BACKUP) {
-		if ((dev = __dev_get_by_name(&init_net, ip_vs_backup_mcast_ifn)) == NULL)
+		dev = __dev_get_by_name(net, ipvs->backup_mcast_ifn);
+		if (!dev)
 			return -ENODEV;
 
-		sync_recv_mesg_maxlen = dev->mtu -
+		ipvs->recv_mesg_maxlen = dev->mtu -
 			sizeof(struct iphdr) - sizeof(struct udphdr);
 		IP_VS_DBG(7, "setting the maximum length of sync receiving "
-			  "message %d.\n", sync_recv_mesg_maxlen);
+			  "message %d.\n", ipvs->recv_mesg_maxlen);
 	}
 
 	return 0;
@@ -569,6 +1244,7 @@ static int set_sync_mesg_maxlen(int sync_state)
 static int
 join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname)
 {
+	struct net *net = sock_net(sk);
 	struct ip_mreqn mreq;
 	struct net_device *dev;
 	int ret;
@@ -576,7 +1252,8 @@ join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname)
 	memset(&mreq, 0, sizeof(mreq));
 	memcpy(&mreq.imr_multiaddr, addr, sizeof(struct in_addr));
 
-	if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL)
+	dev = __dev_get_by_name(net, ifname);
+	if (!dev)
 		return -ENODEV;
 	if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
 		return -EINVAL;
@@ -593,11 +1270,13 @@ join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname)
 
 static int bind_mcastif_addr(struct socket *sock, char *ifname)
 {
+	struct net *net = sock_net(sock->sk);
 	struct net_device *dev;
 	__be32 addr;
 	struct sockaddr_in sin;
 
-	if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL)
+	dev = __dev_get_by_name(net, ifname);
+	if (!dev)
 		return -ENODEV;
 
 	addr = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
@@ -619,19 +1298,20 @@ static int bind_mcastif_addr(struct socket *sock, char *ifname)
 /*
  *      Set up sending multicast socket over UDP
  */
-static struct socket * make_send_sock(void)
+static struct socket *make_send_sock(struct net *net)
 {
+	struct netns_ipvs *ipvs = net_ipvs(net);
 	struct socket *sock;
 	int result;
 
 	/* First create a socket */
-	result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
+	result = __sock_create(net, PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock, 1);
 	if (result < 0) {
 		pr_err("Error during creation of socket; terminating\n");
 		return ERR_PTR(result);
 	}
 
-	result = set_mcast_if(sock->sk, ip_vs_master_mcast_ifn);
+	result = set_mcast_if(sock->sk, ipvs->master_mcast_ifn);
 	if (result < 0) {
 		pr_err("Error setting outbound mcast interface\n");
 		goto error;
@@ -640,7 +1320,7 @@ static struct socket * make_send_sock(void)
 	set_mcast_loop(sock->sk, 0);
 	set_mcast_ttl(sock->sk, 1);
 
-	result = bind_mcastif_addr(sock, ip_vs_master_mcast_ifn);
+	result = bind_mcastif_addr(sock, ipvs->master_mcast_ifn);
 	if (result < 0) {
 		pr_err("Error binding address of the mcast interface\n");
 		goto error;
@@ -664,13 +1344,14 @@ static struct socket * make_send_sock(void)
 /*
  *      Set up receiving multicast socket over UDP
  */
-static struct socket * make_receive_sock(void)
+static struct socket *make_receive_sock(struct net *net)
 {
+	struct netns_ipvs *ipvs = net_ipvs(net);
 	struct socket *sock;
 	int result;
 
 	/* First create a socket */
-	result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
+	result = __sock_create(net, PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock, 1);
 	if (result < 0) {
 		pr_err("Error during creation of socket; terminating\n");
 		return ERR_PTR(result);
@@ -689,7 +1370,7 @@ static struct socket * make_receive_sock(void)
 	/* join the multicast group */
 	result = join_mcast_group(sock->sk,
 			(struct in_addr *) &mcast_addr.sin_addr,
-			ip_vs_backup_mcast_ifn);
+			ipvs->backup_mcast_ifn);
 	if (result < 0) {
 		pr_err("Error joining to the multicast group\n");
 		goto error;
@@ -760,20 +1441,21 @@ ip_vs_receive(struct socket *sock, char *buffer, const size_t buflen)
 static int sync_thread_master(void *data)
 {
 	struct ip_vs_sync_thread_data *tinfo = data;
+	struct netns_ipvs *ipvs = net_ipvs(tinfo->net);
 	struct ip_vs_sync_buff *sb;
 
 	pr_info("sync thread started: state = MASTER, mcast_ifn = %s, "
 		"syncid = %d\n",
-		ip_vs_master_mcast_ifn, ip_vs_master_syncid);
+		ipvs->master_mcast_ifn, ipvs->master_syncid);
 
 	while (!kthread_should_stop()) {
-		while ((sb = sb_dequeue())) {
+		while ((sb = sb_dequeue(ipvs))) {
 			ip_vs_send_sync_msg(tinfo->sock, sb->mesg);
 			ip_vs_sync_buff_release(sb);
 		}
 
-		/* check if entries stay in curr_sb for 2 seconds */
-		sb = get_curr_sync_buff(2 * HZ);
+		/* check if entries stay in ipvs->sync_buff for 2 seconds */
+		sb = get_curr_sync_buff(ipvs, 2 * HZ);
 		if (sb) {
 			ip_vs_send_sync_msg(tinfo->sock, sb->mesg);
 			ip_vs_sync_buff_release(sb);
@@ -783,14 +1465,13 @@ static int sync_thread_master(void *data)
 	}
 
 	/* clean up the sync_buff queue */
-	while ((sb=sb_dequeue())) {
+	while ((sb = sb_dequeue(ipvs)))
 		ip_vs_sync_buff_release(sb);
-	}
 
 	/* clean up the current sync_buff */
-	if ((sb = get_curr_sync_buff(0))) {
+	sb = get_curr_sync_buff(ipvs, 0);
+	if (sb)
 		ip_vs_sync_buff_release(sb);
-	}
 
 	/* release the sending multicast socket */
 	sock_release(tinfo->sock);
@@ -803,11 +1484,12 @@ static int sync_thread_master(void *data)
 static int sync_thread_backup(void *data)
 {
 	struct ip_vs_sync_thread_data *tinfo = data;
+	struct netns_ipvs *ipvs = net_ipvs(tinfo->net);
 	int len;
 
 	pr_info("sync thread started: state = BACKUP, mcast_ifn = %s, "
 		"syncid = %d\n",
-		ip_vs_backup_mcast_ifn, ip_vs_backup_syncid);
+		ipvs->backup_mcast_ifn, ipvs->backup_syncid);
 
 	while (!kthread_should_stop()) {
 		wait_event_interruptible(*sk_sleep(tinfo->sock->sk),
@@ -817,7 +1499,7 @@ static int sync_thread_backup(void *data)
 		/* do we have data now? */
 		while (!skb_queue_empty(&(tinfo->sock->sk->sk_receive_queue))) {
 			len = ip_vs_receive(tinfo->sock, tinfo->buf,
-					sync_recv_mesg_maxlen);
+					ipvs->recv_mesg_maxlen);
 			if (len <= 0) {
 				pr_err("receiving message error\n");
 				break;
@@ -826,7 +1508,7 @@ static int sync_thread_backup(void *data)
 			/* disable bottom half, because it accesses the data
 			   shared by softirq while getting/creating conns */
 			local_bh_disable();
-			ip_vs_process_message(tinfo->buf, len);
+			ip_vs_process_message(tinfo->net, tinfo->buf, len);
 			local_bh_enable();
 		}
 	}
@@ -840,41 +1522,42 @@ static int sync_thread_backup(void *data)
 }
 
 
-int start_sync_thread(int state, char *mcast_ifn, __u8 syncid)
+int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid)
 {
 	struct ip_vs_sync_thread_data *tinfo;
 	struct task_struct **realtask, *task;
 	struct socket *sock;
+	struct netns_ipvs *ipvs = net_ipvs(net);
 	char *name, *buf = NULL;
 	int (*threadfn)(void *data);
 	int result = -ENOMEM;
 
 	IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));
 	IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n",
-		  sizeof(struct ip_vs_sync_conn));
+		  sizeof(struct ip_vs_sync_conn_v0));
 
 	if (state == IP_VS_STATE_MASTER) {
-		if (sync_master_thread)
+		if (ipvs->master_thread)
 			return -EEXIST;
 
-		strlcpy(ip_vs_master_mcast_ifn, mcast_ifn,
-			sizeof(ip_vs_master_mcast_ifn));
-		ip_vs_master_syncid = syncid;
-		realtask = &sync_master_thread;
-		name = "ipvs_syncmaster";
+		strlcpy(ipvs->master_mcast_ifn, mcast_ifn,
+			sizeof(ipvs->master_mcast_ifn));
+		ipvs->master_syncid = syncid;
+		realtask = &ipvs->master_thread;
+		name = "ipvs_master:%d";
 		threadfn = sync_thread_master;
-		sock = make_send_sock();
+		sock = make_send_sock(net);
 	} else if (state == IP_VS_STATE_BACKUP) {
-		if (sync_backup_thread)
+		if (ipvs->backup_thread)
 			return -EEXIST;
 
-		strlcpy(ip_vs_backup_mcast_ifn, mcast_ifn,
-			sizeof(ip_vs_backup_mcast_ifn));
-		ip_vs_backup_syncid = syncid;
-		realtask = &sync_backup_thread;
-		name = "ipvs_syncbackup";
+		strlcpy(ipvs->backup_mcast_ifn, mcast_ifn,
+			sizeof(ipvs->backup_mcast_ifn));
+		ipvs->backup_syncid = syncid;
+		realtask = &ipvs->backup_thread;
+		name = "ipvs_backup:%d";
 		threadfn = sync_thread_backup;
-		sock = make_receive_sock();
+		sock = make_receive_sock(net);
 	} else {
 		return -EINVAL;
 	}
@@ -884,9 +1567,9 @@ int start_sync_thread(int state, char *mcast_ifn, __u8 syncid)
 		goto out;
 	}
 
-	set_sync_mesg_maxlen(state);
+	set_sync_mesg_maxlen(net, state);
 	if (state == IP_VS_STATE_BACKUP) {
-		buf = kmalloc(sync_recv_mesg_maxlen, GFP_KERNEL);
+		buf = kmalloc(ipvs->recv_mesg_maxlen, GFP_KERNEL);
 		if (!buf)
 			goto outsocket;
 	}
@@ -895,10 +1578,11 @@ int start_sync_thread(int state, char *mcast_ifn, __u8 syncid)
 	if (!tinfo)
 		goto outbuf;
 
+	tinfo->net = net;
 	tinfo->sock = sock;
 	tinfo->buf = buf;
 
-	task = kthread_run(threadfn, tinfo, name);
+	task = kthread_run(threadfn, tinfo, name, ipvs->gen);
 	if (IS_ERR(task)) {
 		result = PTR_ERR(task);
 		goto outtinfo;
@@ -906,7 +1590,7 @@ int start_sync_thread(int state, char *mcast_ifn, __u8 syncid)
 
 	/* mark as active */
 	*realtask = task;
-	ip_vs_sync_state |= state;
+	ipvs->sync_state |= state;
 
 	/* increase the module use count */
 	ip_vs_use_count_inc();
@@ -924,16 +1608,18 @@ out:
 }
 
 
-int stop_sync_thread(int state)
+int stop_sync_thread(struct net *net, int state)
 {
+	struct netns_ipvs *ipvs = net_ipvs(net);
+
 	IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));
 
 	if (state == IP_VS_STATE_MASTER) {
-		if (!sync_master_thread)
+		if (!ipvs->master_thread)
 			return -ESRCH;
 
 		pr_info("stopping master sync thread %d ...\n",
-			task_pid_nr(sync_master_thread));
+			task_pid_nr(ipvs->master_thread));
 
 		/*
 		 * The lock synchronizes with sb_queue_tail(), so that we don't
@@ -941,21 +1627,21 @@ int stop_sync_thread(int state)
 		 * progress of stopping the master sync daemon.
 		 */
 
-		spin_lock_bh(&ip_vs_sync_lock);
-		ip_vs_sync_state &= ~IP_VS_STATE_MASTER;
-		spin_unlock_bh(&ip_vs_sync_lock);
-		kthread_stop(sync_master_thread);
-		sync_master_thread = NULL;
+		spin_lock_bh(&ipvs->sync_lock);
+		ipvs->sync_state &= ~IP_VS_STATE_MASTER;
+		spin_unlock_bh(&ipvs->sync_lock);
+		kthread_stop(ipvs->master_thread);
+		ipvs->master_thread = NULL;
 	} else if (state == IP_VS_STATE_BACKUP) {
-		if (!sync_backup_thread)
+		if (!ipvs->backup_thread)
 			return -ESRCH;
 
 		pr_info("stopping backup sync thread %d ...\n",
-			task_pid_nr(sync_backup_thread));
+			task_pid_nr(ipvs->backup_thread));
 
-		ip_vs_sync_state &= ~IP_VS_STATE_BACKUP;
-		kthread_stop(sync_backup_thread);
-		sync_backup_thread = NULL;
+		ipvs->sync_state &= ~IP_VS_STATE_BACKUP;
+		kthread_stop(ipvs->backup_thread);
+		ipvs->backup_thread = NULL;
 	} else {
 		return -EINVAL;
 	}
@@ -965,3 +1651,42 @@ int stop_sync_thread(int state)
 
 	return 0;
 }
+
+/*
+ * Initialize data struct for each netns
+ */
+static int __net_init __ip_vs_sync_init(struct net *net)
+{
+	struct netns_ipvs *ipvs = net_ipvs(net);
+
+	INIT_LIST_HEAD(&ipvs->sync_queue);
+	spin_lock_init(&ipvs->sync_lock);
+	spin_lock_init(&ipvs->sync_buff_lock);
+
+	ipvs->sync_mcast_addr.sin_family = AF_INET;
+	ipvs->sync_mcast_addr.sin_port = cpu_to_be16(IP_VS_SYNC_PORT);
+	ipvs->sync_mcast_addr.sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP);
+	return 0;
+}
+
+static void __ip_vs_sync_cleanup(struct net *net)
+{
+	stop_sync_thread(net, IP_VS_STATE_MASTER);
+	stop_sync_thread(net, IP_VS_STATE_BACKUP);
+}
+
+static struct pernet_operations ipvs_sync_ops = {
+	.init = __ip_vs_sync_init,
+	.exit = __ip_vs_sync_cleanup,
+};
+
+
+int __init ip_vs_sync_init(void)
+{
+	return register_pernet_subsys(&ipvs_sync_ops);
+}
+
+void ip_vs_sync_cleanup(void)
+{
+	unregister_pernet_subsys(&ipvs_sync_ops);
+}
diff --git a/net/netfilter/ipvs/ip_vs_wlc.c b/net/netfilter/ipvs/ip_vs_wlc.c
index bbddfdb..bc1bfc4 100644
--- a/net/netfilter/ipvs/ip_vs_wlc.c
+++ b/net/netfilter/ipvs/ip_vs_wlc.c
@@ -27,22 +27,6 @@
 
 #include <net/ip_vs.h>
 
-
-static inline unsigned int
-ip_vs_wlc_dest_overhead(struct ip_vs_dest *dest)
-{
-	/*
-	 * We think the overhead of processing active connections is 256
-	 * times higher than that of inactive connections in average. (This
-	 * 256 times might not be accurate, we will change it later) We
-	 * use the following formula to estimate the overhead now:
-	 *		  dest->activeconns*256 + dest->inactconns
-	 */
-	return (atomic_read(&dest->activeconns) << 8) +
-		atomic_read(&dest->inactconns);
-}
-
-
 /*
  *	Weighted Least Connection scheduling
  */
@@ -71,11 +55,11 @@ ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 		if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) &&
 		    atomic_read(&dest->weight) > 0) {
 			least = dest;
-			loh = ip_vs_wlc_dest_overhead(least);
+			loh = ip_vs_dest_conn_overhead(least);
 			goto nextstage;
 		}
 	}
-	IP_VS_ERR_RL("WLC: no destination available\n");
+	ip_vs_scheduler_err(svc, "no destination available");
 	return NULL;
 
 	/*
@@ -85,7 +69,7 @@ ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 	list_for_each_entry_continue(dest, &svc->destinations, n_list) {
 		if (dest->flags & IP_VS_DEST_F_OVERLOAD)
 			continue;
-		doh = ip_vs_wlc_dest_overhead(dest);
+		doh = ip_vs_dest_conn_overhead(dest);
 		if (loh * atomic_read(&dest->weight) >
 		    doh * atomic_read(&least->weight)) {
 			least = dest;
diff --git a/net/netfilter/ipvs/ip_vs_wrr.c b/net/netfilter/ipvs/ip_vs_wrr.c
index 30db633..1ef41f5 100644
--- a/net/netfilter/ipvs/ip_vs_wrr.c
+++ b/net/netfilter/ipvs/ip_vs_wrr.c
@@ -147,8 +147,9 @@ ip_vs_wrr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 
 			if (mark->cl == mark->cl->next) {
 				/* no dest entry */
-				IP_VS_ERR_RL("WRR: no destination available: "
-					     "no destinations present\n");
+				ip_vs_scheduler_err(svc,
+					"no destination available: "
+					"no destinations present");
 				dest = NULL;
 				goto out;
 			}
@@ -162,8 +163,8 @@ ip_vs_wrr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 				 */
 				if (mark->cw == 0) {
 					mark->cl = &svc->destinations;
-					IP_VS_ERR_RL("WRR: no destination "
-						     "available\n");
+					ip_vs_scheduler_err(svc,
+						"no destination available");
 					dest = NULL;
 					goto out;
 				}
@@ -185,8 +186,9 @@ ip_vs_wrr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 			/* back to the start, and no dest is found.
 			   It is only possible when all dests are OVERLOADED */
 			dest = NULL;
-			IP_VS_ERR_RL("WRR: no destination available: "
-				     "all destinations are overloaded\n");
+			ip_vs_scheduler_err(svc,
+				"no destination available: "
+				"all destinations are overloaded");
 			goto out;
 		}
 	}
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 5325a3fb..878f6dd 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -43,6 +43,13 @@
 
 #include <net/ip_vs.h>
 
+enum {
+	IP_VS_RT_MODE_LOCAL	= 1, /* Allow local dest */
+	IP_VS_RT_MODE_NON_LOCAL	= 2, /* Allow non-local dest */
+	IP_VS_RT_MODE_RDR	= 4, /* Allow redirect from remote daddr to
+				      * local
+				      */
+};
 
 /*
  *      Destination cache to speed up outgoing route lookup
@@ -77,11 +84,7 @@ __ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos)
 	return dst;
 }
 
-/*
- * Get route to destination or remote server
- * rt_mode: flags, &1=Allow local dest, &2=Allow non-local dest,
- *	    &4=Allow redirect from remote daddr to local
- */
+/* Get route to destination or remote server */
 static struct rtable *
 __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest,
 		   __be32 daddr, u32 rtos, int rt_mode)
@@ -100,7 +103,8 @@ __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest,
 				.fl4_tos = rtos,
 			};
 
-			if (ip_route_output_key(net, &rt, &fl)) {
+			rt = ip_route_output_key(net, &fl);
+			if (IS_ERR(rt)) {
 				spin_unlock(&dest->dst_lock);
 				IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n",
 					     &dest->addr.ip);
@@ -118,7 +122,8 @@ __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest,
 			.fl4_tos = rtos,
 		};
 
-		if (ip_route_output_key(net, &rt, &fl)) {
+		rt = ip_route_output_key(net, &fl);
+		if (IS_ERR(rt)) {
 			IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n",
 				     &daddr);
 			return NULL;
@@ -126,15 +131,16 @@ __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest,
 	}
 
 	local = rt->rt_flags & RTCF_LOCAL;
-	if (!((local ? 1 : 2) & rt_mode)) {
+	if (!((local ? IP_VS_RT_MODE_LOCAL : IP_VS_RT_MODE_NON_LOCAL) &
+	      rt_mode)) {
 		IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI4\n",
 			     (rt->rt_flags & RTCF_LOCAL) ?
 			     "local":"non-local", &rt->rt_dst);
 		ip_rt_put(rt);
 		return NULL;
 	}
-	if (local && !(rt_mode & 4) && !((ort = skb_rtable(skb)) &&
-					 ort->rt_flags & RTCF_LOCAL)) {
+	if (local && !(rt_mode & IP_VS_RT_MODE_RDR) &&
+	    !((ort = skb_rtable(skb)) && ort->rt_flags & RTCF_LOCAL)) {
 		IP_VS_DBG_RL("Redirect from non-local address %pI4 to local "
 			     "requires NAT method, dest: %pI4\n",
 			     &ip_hdr(skb)->daddr, &rt->rt_dst);
@@ -175,9 +181,9 @@ __ip_vs_reroute_locally(struct sk_buff *skb)
 			.fl4_tos = RT_TOS(iph->tos),
 			.mark = skb->mark,
 		};
-		struct rtable *rt;
 
-		if (ip_route_output_key(net, &rt, &fl))
+		rt = ip_route_output_key(net, &fl);
+		if (IS_ERR(rt))
 			return 0;
 		if (!(rt->rt_flags & RTCF_LOCAL)) {
 			ip_rt_put(rt);
@@ -215,8 +221,13 @@ __ip_vs_route_output_v6(struct net *net, struct in6_addr *daddr,
 	    ipv6_dev_get_saddr(net, ip6_dst_idev(dst)->dev,
 			       &fl.fl6_dst, 0, &fl.fl6_src) < 0)
 		goto out_err;
-	if (do_xfrm && xfrm_lookup(net, &dst, &fl, NULL, 0) < 0)
-		goto out_err;
+	if (do_xfrm) {
+		dst = xfrm_lookup(net, dst, &fl, NULL, 0);
+		if (IS_ERR(dst)) {
+			dst = NULL;
+			goto out_err;
+		}
+	}
 	ipv6_addr_copy(ret_saddr, &fl.fl6_src);
 	return dst;
 
@@ -384,13 +395,14 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 
 	EnterFunction(10);
 
-	if (!(rt = __ip_vs_get_out_rt(skb, NULL, iph->daddr,
-				      RT_TOS(iph->tos), 2)))
+	if (!(rt = __ip_vs_get_out_rt(skb, NULL, iph->daddr, RT_TOS(iph->tos),
+				      IP_VS_RT_MODE_NON_LOCAL)))
 		goto tx_error_icmp;
 
 	/* MTU checking */
 	mtu = dst_mtu(&rt->dst);
-	if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
+	if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) &&
+	    !skb_is_gso(skb)) {
 		ip_rt_put(rt);
 		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
 		IP_VS_DBG_RL("%s(): frag needed\n", __func__);
@@ -443,7 +455,7 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 
 	/* MTU checking */
 	mtu = dst_mtu(&rt->dst);
-	if (skb->len > mtu) {
+	if (skb->len > mtu && !skb_is_gso(skb)) {
 		if (!skb->dev) {
 			struct net *net = dev_net(skb_dst(skb)->dev);
 
@@ -512,7 +524,10 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	}
 
 	if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
-				      RT_TOS(iph->tos), 1|2|4)))
+				      RT_TOS(iph->tos),
+				      IP_VS_RT_MODE_LOCAL |
+					IP_VS_RT_MODE_NON_LOCAL |
+					IP_VS_RT_MODE_RDR)))
 		goto tx_error_icmp;
 	local = rt->rt_flags & RTCF_LOCAL;
 	/*
@@ -543,7 +558,8 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 
 	/* MTU checking */
 	mtu = dst_mtu(&rt->dst);
-	if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
+	if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) &&
+	    !skb_is_gso(skb)) {
 		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
 		IP_VS_DBG_RL_PKT(0, AF_INET, pp, skb, 0,
 				 "ip_vs_nat_xmit(): frag needed for");
@@ -658,7 +674,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 
 	/* MTU checking */
 	mtu = dst_mtu(&rt->dst);
-	if (skb->len > mtu) {
+	if (skb->len > mtu && !skb_is_gso(skb)) {
 		if (!skb->dev) {
 			struct net *net = dev_net(skb_dst(skb)->dev);
 
@@ -754,7 +770,8 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	EnterFunction(10);
 
 	if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
-				      RT_TOS(tos), 1|2)))
+				      RT_TOS(tos), IP_VS_RT_MODE_LOCAL |
+						   IP_VS_RT_MODE_NON_LOCAL)))
 		goto tx_error_icmp;
 	if (rt->rt_flags & RTCF_LOCAL) {
 		ip_rt_put(rt);
@@ -773,8 +790,8 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 
 	df |= (old_iph->frag_off & htons(IP_DF));
 
-	if ((old_iph->frag_off & htons(IP_DF))
-	    && mtu < ntohs(old_iph->tot_len)) {
+	if ((old_iph->frag_off & htons(IP_DF) &&
+	    mtu < ntohs(old_iph->tot_len) && !skb_is_gso(skb))) {
 		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
 		IP_VS_DBG_RL("%s(): frag needed\n", __func__);
 		goto tx_error_put;
@@ -886,7 +903,8 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	if (skb_dst(skb))
 		skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
 
-	if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr)) {
+	if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr) &&
+	    !skb_is_gso(skb)) {
 		if (!skb->dev) {
 			struct net *net = dev_net(skb_dst(skb)->dev);
 
@@ -982,7 +1000,9 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	EnterFunction(10);
 
 	if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
-				      RT_TOS(iph->tos), 1|2)))
+				      RT_TOS(iph->tos),
+				      IP_VS_RT_MODE_LOCAL |
+					IP_VS_RT_MODE_NON_LOCAL)))
 		goto tx_error_icmp;
 	if (rt->rt_flags & RTCF_LOCAL) {
 		ip_rt_put(rt);
@@ -991,7 +1011,8 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 
 	/* MTU checking */
 	mtu = dst_mtu(&rt->dst);
-	if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu) {
+	if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu &&
+	    !skb_is_gso(skb)) {
 		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
 		ip_rt_put(rt);
 		IP_VS_DBG_RL("%s(): frag needed\n", __func__);
@@ -1125,7 +1146,10 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	 */
 
 	if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
-				      RT_TOS(ip_hdr(skb)->tos), 1|2|4)))
+				      RT_TOS(ip_hdr(skb)->tos),
+				      IP_VS_RT_MODE_LOCAL |
+					IP_VS_RT_MODE_NON_LOCAL |
+					IP_VS_RT_MODE_RDR)))
 		goto tx_error_icmp;
 	local = rt->rt_flags & RTCF_LOCAL;
 
@@ -1158,7 +1182,8 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 
 	/* MTU checking */
 	mtu = dst_mtu(&rt->dst);
-	if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF))) {
+	if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF)) &&
+	    !skb_is_gso(skb)) {
 		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
 		IP_VS_DBG_RL("%s(): frag needed\n", __func__);
 		goto tx_error_put;
@@ -1272,7 +1297,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 
 	/* MTU checking */
 	mtu = dst_mtu(&rt->dst);
-	if (skb->len > mtu) {
+	if (skb->len > mtu && !skb_is_gso(skb)) {
 		if (!skb->dev) {
 			struct net *net = dev_net(skb_dst(skb)->dev);
 
diff --git a/net/netfilter/nf_conntrack_broadcast.c b/net/netfilter/nf_conntrack_broadcast.c
new file mode 100644
index 0000000..4e99cca
--- /dev/null
+++ b/net/netfilter/nf_conntrack_broadcast.c
@@ -0,0 +1,82 @@
+/*
+ *      broadcast connection tracking helper
+ *
+ *      (c) 2005 Patrick McHardy <kaber@trash.net>
+ *
+ *      This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/ip.h>
+#include <net/route.h>
+#include <linux/inetdevice.h>
+#include <linux/skbuff.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+
+int nf_conntrack_broadcast_help(struct sk_buff *skb,
+				unsigned int protoff,
+				struct nf_conn *ct,
+				enum ip_conntrack_info ctinfo,
+				unsigned int timeout)
+{
+	struct nf_conntrack_expect *exp;
+	struct iphdr *iph = ip_hdr(skb);
+	struct rtable *rt = skb_rtable(skb);
+	struct in_device *in_dev;
+	struct nf_conn_help *help = nfct_help(ct);
+	__be32 mask = 0;
+
+	/* we're only interested in locally generated packets */
+	if (skb->sk == NULL)
+		goto out;
+	if (rt == NULL || !(rt->rt_flags & RTCF_BROADCAST))
+		goto out;
+	if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
+		goto out;
+
+	rcu_read_lock();
+	in_dev = __in_dev_get_rcu(rt->dst.dev);
+	if (in_dev != NULL) {
+		for_primary_ifa(in_dev) {
+			if (ifa->ifa_broadcast == iph->daddr) {
+				mask = ifa->ifa_mask;
+				break;
+			}
+		} endfor_ifa(in_dev);
+	}
+	rcu_read_unlock();
+
+	if (mask == 0)
+		goto out;
+
+	exp = nf_ct_expect_alloc(ct);
+	if (exp == NULL)
+		goto out;
+
+	exp->tuple                = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
+	exp->tuple.src.u.udp.port = help->helper->tuple.src.u.udp.port;
+
+	exp->mask.src.u3.ip       = mask;
+	exp->mask.src.u.udp.port  = htons(0xFFFF);
+
+	exp->expectfn             = NULL;
+	exp->flags                = NF_CT_EXPECT_PERMANENT;
+	exp->class		  = NF_CT_EXPECT_CLASS_DEFAULT;
+	exp->helper               = NULL;
+
+	nf_ct_expect_related(exp);
+	nf_ct_expect_put(exp);
+
+	nf_ct_refresh(ct, skb, timeout * HZ);
+out:
+	return NF_ACCEPT;
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_broadcast_help);
+
+MODULE_LICENSE("GPL");
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 84f4fcc..2f454ef 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -43,6 +43,7 @@
 #include <net/netfilter/nf_conntrack_acct.h>
 #include <net/netfilter/nf_conntrack_ecache.h>
 #include <net/netfilter/nf_conntrack_zones.h>
+#include <net/netfilter/nf_conntrack_timestamp.h>
 #include <net/netfilter/nf_nat.h>
 #include <net/netfilter/nf_nat_core.h>
 
@@ -282,6 +283,11 @@ EXPORT_SYMBOL_GPL(nf_ct_insert_dying_list);
 static void death_by_timeout(unsigned long ul_conntrack)
 {
 	struct nf_conn *ct = (void *)ul_conntrack;
+	struct nf_conn_tstamp *tstamp;
+
+	tstamp = nf_conn_tstamp_find(ct);
+	if (tstamp && tstamp->stop == 0)
+		tstamp->stop = ktime_to_ns(ktime_get_real());
 
 	if (!test_bit(IPS_DYING_BIT, &ct->status) &&
 	    unlikely(nf_conntrack_event(IPCT_DESTROY, ct) < 0)) {
@@ -419,6 +425,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
 	struct nf_conntrack_tuple_hash *h;
 	struct nf_conn *ct;
 	struct nf_conn_help *help;
+	struct nf_conn_tstamp *tstamp;
 	struct hlist_nulls_node *n;
 	enum ip_conntrack_info ctinfo;
 	struct net *net;
@@ -486,8 +493,16 @@ __nf_conntrack_confirm(struct sk_buff *skb)
 	ct->timeout.expires += jiffies;
 	add_timer(&ct->timeout);
 	atomic_inc(&ct->ct_general.use);
-	set_bit(IPS_CONFIRMED_BIT, &ct->status);
+	ct->status |= IPS_CONFIRMED;
+
+	/* set conntrack timestamp, if enabled. */
+	tstamp = nf_conn_tstamp_find(ct);
+	if (tstamp) {
+		if (skb->tstamp.tv64 == 0)
+			__net_timestamp((struct sk_buff *)skb);
 
+		tstamp->start = ktime_to_ns(skb->tstamp);
+	}
 	/* Since the lookup is lockless, hash insertion must be done after
 	 * starting the timer and setting the CONFIRMED bit. The RCU barriers
 	 * guarantee that no other CPU can find the conntrack before the above
@@ -655,7 +670,8 @@ __nf_conntrack_alloc(struct net *net, u16 zone,
 	 * and ct->tuplehash[IP_CT_DIR_REPLY].hnnode.next unchanged.
 	 */
 	memset(&ct->tuplehash[IP_CT_DIR_MAX], 0,
-	       sizeof(*ct) - offsetof(struct nf_conn, tuplehash[IP_CT_DIR_MAX]));
+	       offsetof(struct nf_conn, proto) -
+	       offsetof(struct nf_conn, tuplehash[IP_CT_DIR_MAX]));
 	spin_lock_init(&ct->lock);
 	ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig;
 	ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode.pprev = NULL;
@@ -745,6 +761,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
 	}
 
 	nf_ct_acct_ext_add(ct, GFP_ATOMIC);
+	nf_ct_tstamp_ext_add(ct, GFP_ATOMIC);
 
 	ecache = tmpl ? nf_ct_ecache_find(tmpl) : NULL;
 	nf_ct_ecache_ext_add(ct, ecache ? ecache->ctmask : 0,
@@ -1192,6 +1209,11 @@ struct __nf_ct_flush_report {
 static int kill_report(struct nf_conn *i, void *data)
 {
 	struct __nf_ct_flush_report *fr = (struct __nf_ct_flush_report *)data;
+	struct nf_conn_tstamp *tstamp;
+
+	tstamp = nf_conn_tstamp_find(i);
+	if (tstamp && tstamp->stop == 0)
+		tstamp->stop = ktime_to_ns(ktime_get_real());
 
 	/* If we fail to deliver the event, death_by_timeout() will retry */
 	if (nf_conntrack_event_report(IPCT_DESTROY, i,
@@ -1208,9 +1230,9 @@ static int kill_all(struct nf_conn *i, void *data)
 	return 1;
 }
 
-void nf_ct_free_hashtable(void *hash, int vmalloced, unsigned int size)
+void nf_ct_free_hashtable(void *hash, unsigned int size)
 {
-	if (vmalloced)
+	if (is_vmalloc_addr(hash))
 		vfree(hash);
 	else
 		free_pages((unsigned long)hash,
@@ -1277,8 +1299,7 @@ static void nf_conntrack_cleanup_net(struct net *net)
 		goto i_see_dead_people;
 	}
 
-	nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc,
-			     net->ct.htable_size);
+	nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size);
 	nf_conntrack_ecache_fini(net);
 	nf_conntrack_acct_fini(net);
 	nf_conntrack_expect_fini(net);
@@ -1307,21 +1328,18 @@ void nf_conntrack_cleanup(struct net *net)
 	}
 }
 
-void *nf_ct_alloc_hashtable(unsigned int *sizep, int *vmalloced, int nulls)
+void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls)
 {
 	struct hlist_nulls_head *hash;
 	unsigned int nr_slots, i;
 	size_t sz;
 
-	*vmalloced = 0;
-
 	BUILD_BUG_ON(sizeof(struct hlist_nulls_head) != sizeof(struct hlist_head));
 	nr_slots = *sizep = roundup(*sizep, PAGE_SIZE / sizeof(struct hlist_nulls_head));
 	sz = nr_slots * sizeof(struct hlist_nulls_head);
 	hash = (void *)__get_free_pages(GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO,
 					get_order(sz));
 	if (!hash) {
-		*vmalloced = 1;
 		printk(KERN_WARNING "nf_conntrack: falling back to vmalloc.\n");
 		hash = __vmalloc(sz, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO,
 				 PAGE_KERNEL);
@@ -1337,7 +1355,7 @@ EXPORT_SYMBOL_GPL(nf_ct_alloc_hashtable);
 
 int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
 {
-	int i, bucket, vmalloced, old_vmalloced;
+	int i, bucket;
 	unsigned int hashsize, old_size;
 	struct hlist_nulls_head *hash, *old_hash;
 	struct nf_conntrack_tuple_hash *h;
@@ -1354,7 +1372,7 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
 	if (!hashsize)
 		return -EINVAL;
 
-	hash = nf_ct_alloc_hashtable(&hashsize, &vmalloced, 1);
+	hash = nf_ct_alloc_hashtable(&hashsize, 1);
 	if (!hash)
 		return -ENOMEM;
 
@@ -1376,15 +1394,13 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
 		}
 	}
 	old_size = init_net.ct.htable_size;
-	old_vmalloced = init_net.ct.hash_vmalloc;
 	old_hash = init_net.ct.hash;
 
 	init_net.ct.htable_size = nf_conntrack_htable_size = hashsize;
-	init_net.ct.hash_vmalloc = vmalloced;
 	init_net.ct.hash = hash;
 	spin_unlock_bh(&nf_conntrack_lock);
 
-	nf_ct_free_hashtable(old_hash, old_vmalloced, old_size);
+	nf_ct_free_hashtable(old_hash, old_size);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize);
@@ -1497,8 +1513,7 @@ static int nf_conntrack_init_net(struct net *net)
 	}
 
 	net->ct.htable_size = nf_conntrack_htable_size;
-	net->ct.hash = nf_ct_alloc_hashtable(&net->ct.htable_size,
-					     &net->ct.hash_vmalloc, 1);
+	net->ct.hash = nf_ct_alloc_hashtable(&net->ct.htable_size, 1);
 	if (!net->ct.hash) {
 		ret = -ENOMEM;
 		printk(KERN_ERR "Unable to create nf_conntrack_hash\n");
@@ -1510,6 +1525,9 @@ static int nf_conntrack_init_net(struct net *net)
 	ret = nf_conntrack_acct_init(net);
 	if (ret < 0)
 		goto err_acct;
+	ret = nf_conntrack_tstamp_init(net);
+	if (ret < 0)
+		goto err_tstamp;
 	ret = nf_conntrack_ecache_init(net);
 	if (ret < 0)
 		goto err_ecache;
@@ -1517,12 +1535,13 @@ static int nf_conntrack_init_net(struct net *net)
 	return 0;
 
 err_ecache:
+	nf_conntrack_tstamp_fini(net);
+err_tstamp:
 	nf_conntrack_acct_fini(net);
 err_acct:
 	nf_conntrack_expect_fini(net);
 err_expect:
-	nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc,
-			     net->ct.htable_size);
+	nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size);
 err_hash:
 	kmem_cache_destroy(net->ct.nf_conntrack_cachep);
 err_cache:
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index a20fb0b..cd1e8e0 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -319,7 +319,8 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
 	const struct nf_conntrack_expect_policy *p;
 	unsigned int h = nf_ct_expect_dst_hash(&exp->tuple);
 
-	atomic_inc(&exp->use);
+	/* two references : one for hash insert, one for the timer */
+	atomic_add(2, &exp->use);
 
 	if (master_help) {
 		hlist_add_head(&exp->lnode, &master_help->expectations);
@@ -333,12 +334,14 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
 	setup_timer(&exp->timeout, nf_ct_expectation_timed_out,
 		    (unsigned long)exp);
 	if (master_help) {
-		p = &master_help->helper->expect_policy[exp->class];
+		p = &rcu_dereference_protected(
+				master_help->helper,
+				lockdep_is_held(&nf_conntrack_lock)
+				)->expect_policy[exp->class];
 		exp->timeout.expires = jiffies + p->timeout * HZ;
 	}
 	add_timer(&exp->timeout);
 
-	atomic_inc(&exp->use);
 	NF_CT_STAT_INC(net, expect_create);
 }
 
@@ -369,7 +372,10 @@ static inline int refresh_timer(struct nf_conntrack_expect *i)
 	if (!del_timer(&i->timeout))
 		return 0;
 
-	p = &master_help->helper->expect_policy[i->class];
+	p = &rcu_dereference_protected(
+		master_help->helper,
+		lockdep_is_held(&nf_conntrack_lock)
+		)->expect_policy[i->class];
 	i->timeout.expires = jiffies + p->timeout * HZ;
 	add_timer(&i->timeout);
 	return 1;
@@ -407,7 +413,10 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
 	}
 	/* Will be over limit? */
 	if (master_help) {
-		p = &master_help->helper->expect_policy[expect->class];
+		p = &rcu_dereference_protected(
+			master_help->helper,
+			lockdep_is_held(&nf_conntrack_lock)
+			)->expect_policy[expect->class];
 		if (p->max_expected &&
 		    master_help->expecting[expect->class] >= p->max_expected) {
 			evict_oldest_expect(master, expect);
@@ -478,7 +487,7 @@ static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
 	struct hlist_node *n;
 
 	for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
-		n = rcu_dereference(net->ct.expect_hash[st->bucket].first);
+		n = rcu_dereference(hlist_first_rcu(&net->ct.expect_hash[st->bucket]));
 		if (n)
 			return n;
 	}
@@ -491,11 +500,11 @@ static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
 	struct net *net = seq_file_net(seq);
 	struct ct_expect_iter_state *st = seq->private;
 
-	head = rcu_dereference(head->next);
+	head = rcu_dereference(hlist_next_rcu(head));
 	while (head == NULL) {
 		if (++st->bucket >= nf_ct_expect_hsize)
 			return NULL;
-		head = rcu_dereference(net->ct.expect_hash[st->bucket].first);
+		head = rcu_dereference(hlist_first_rcu(&net->ct.expect_hash[st->bucket]));
 	}
 	return head;
 }
@@ -630,8 +639,7 @@ int nf_conntrack_expect_init(struct net *net)
 	}
 
 	net->ct.expect_count = 0;
-	net->ct.expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize,
-						  &net->ct.expect_vmalloc, 0);
+	net->ct.expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize, 0);
 	if (net->ct.expect_hash == NULL)
 		goto err1;
 
@@ -653,8 +661,7 @@ err3:
 	if (net_eq(net, &init_net))
 		kmem_cache_destroy(nf_ct_expect_cachep);
 err2:
-	nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc,
-			     nf_ct_expect_hsize);
+	nf_ct_free_hashtable(net->ct.expect_hash, nf_ct_expect_hsize);
 err1:
 	return err;
 }
@@ -666,6 +673,5 @@ void nf_conntrack_expect_fini(struct net *net)
 		rcu_barrier(); /* Wait for call_rcu() before destroy */
 		kmem_cache_destroy(nf_ct_expect_cachep);
 	}
-	nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc,
-			     nf_ct_expect_hsize);
+	nf_ct_free_hashtable(net->ct.expect_hash, nf_ct_expect_hsize);
 }
diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c
index bd82450..80a23ed 100644
--- a/net/netfilter/nf_conntrack_extend.c
+++ b/net/netfilter/nf_conntrack_extend.c
@@ -140,15 +140,16 @@ static void update_alloc_size(struct nf_ct_ext_type *type)
 	/* This assumes that extended areas in conntrack for the types
 	   whose NF_CT_EXT_F_PREALLOC bit set are allocated in order */
 	for (i = min; i <= max; i++) {
-		t1 = nf_ct_ext_types[i];
+		t1 = rcu_dereference_protected(nf_ct_ext_types[i],
+				lockdep_is_held(&nf_ct_ext_type_mutex));
 		if (!t1)
 			continue;
 
-		t1->alloc_size = sizeof(struct nf_ct_ext)
-				 + ALIGN(sizeof(struct nf_ct_ext), t1->align)
-				 + t1->len;
+		t1->alloc_size = ALIGN(sizeof(struct nf_ct_ext), t1->align) +
+				 t1->len;
 		for (j = 0; j < NF_CT_EXT_NUM; j++) {
-			t2 = nf_ct_ext_types[j];
+			t2 = rcu_dereference_protected(nf_ct_ext_types[j],
+				lockdep_is_held(&nf_ct_ext_type_mutex));
 			if (t2 == NULL || t2 == t1 ||
 			    (t2->flags & NF_CT_EXT_F_PREALLOC) == 0)
 				continue;
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 59e1a4c..1bdfea3 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -33,7 +33,6 @@ static DEFINE_MUTEX(nf_ct_helper_mutex);
 static struct hlist_head *nf_ct_helper_hash __read_mostly;
 static unsigned int nf_ct_helper_hsize __read_mostly;
 static unsigned int nf_ct_helper_count __read_mostly;
-static int nf_ct_helper_vmalloc;
 
 
 /* Stupid hash, but collision free for the default registrations of the
@@ -158,7 +157,10 @@ static inline int unhelp(struct nf_conntrack_tuple_hash *i,
 	struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(i);
 	struct nf_conn_help *help = nfct_help(ct);
 
-	if (help && help->helper == me) {
+	if (help && rcu_dereference_protected(
+			help->helper,
+			lockdep_is_held(&nf_conntrack_lock)
+			) == me) {
 		nf_conntrack_event(IPCT_HELPER, ct);
 		rcu_assign_pointer(help->helper, NULL);
 	}
@@ -210,7 +212,10 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me,
 		hlist_for_each_entry_safe(exp, n, next,
 					  &net->ct.expect_hash[i], hnode) {
 			struct nf_conn_help *help = nfct_help(exp->master);
-			if ((help->helper == me || exp->helper == me) &&
+			if ((rcu_dereference_protected(
+					help->helper,
+					lockdep_is_held(&nf_conntrack_lock)
+					) == me || exp->helper == me) &&
 			    del_timer(&exp->timeout)) {
 				nf_ct_unlink_expect(exp);
 				nf_ct_expect_put(exp);
@@ -261,8 +266,7 @@ int nf_conntrack_helper_init(void)
 	int err;
 
 	nf_ct_helper_hsize = 1; /* gets rounded up to use one page */
-	nf_ct_helper_hash = nf_ct_alloc_hashtable(&nf_ct_helper_hsize,
-						  &nf_ct_helper_vmalloc, 0);
+	nf_ct_helper_hash = nf_ct_alloc_hashtable(&nf_ct_helper_hsize, 0);
 	if (!nf_ct_helper_hash)
 		return -ENOMEM;
 
@@ -273,14 +277,12 @@ int nf_conntrack_helper_init(void)
 	return 0;
 
 err1:
-	nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_vmalloc,
-			     nf_ct_helper_hsize);
+	nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_hsize);
 	return err;
 }
 
 void nf_conntrack_helper_fini(void)
 {
 	nf_ct_extend_unregister(&helper_extend);
-	nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_vmalloc,
-			     nf_ct_helper_hsize);
+	nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_hsize);
 }
diff --git a/net/netfilter/nf_conntrack_netbios_ns.c b/net/netfilter/nf_conntrack_netbios_ns.c
index aadde01..4c8f30a 100644
--- a/net/netfilter/nf_conntrack_netbios_ns.c
+++ b/net/netfilter/nf_conntrack_netbios_ns.c
@@ -18,14 +18,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/init.h>
-#include <linux/skbuff.h>
-#include <linux/netdevice.h>
-#include <linux/inetdevice.h>
-#include <linux/if_addr.h>
 #include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/netfilter.h>
-#include <net/route.h>
 
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_helper.h>
@@ -40,75 +33,26 @@ MODULE_ALIAS("ip_conntrack_netbios_ns");
 MODULE_ALIAS_NFCT_HELPER("netbios_ns");
 
 static unsigned int timeout __read_mostly = 3;
-module_param(timeout, uint, 0400);
+module_param(timeout, uint, S_IRUSR);
 MODULE_PARM_DESC(timeout, "timeout for master connection/replies in seconds");
 
-static int help(struct sk_buff *skb, unsigned int protoff,
-		struct nf_conn *ct, enum ip_conntrack_info ctinfo)
-{
-	struct nf_conntrack_expect *exp;
-	struct iphdr *iph = ip_hdr(skb);
-	struct rtable *rt = skb_rtable(skb);
-	struct in_device *in_dev;
-	__be32 mask = 0;
-
-	/* we're only interested in locally generated packets */
-	if (skb->sk == NULL)
-		goto out;
-	if (rt == NULL || !(rt->rt_flags & RTCF_BROADCAST))
-		goto out;
-	if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
-		goto out;
-
-	rcu_read_lock();
-	in_dev = __in_dev_get_rcu(rt->dst.dev);
-	if (in_dev != NULL) {
-		for_primary_ifa(in_dev) {
-			if (ifa->ifa_broadcast == iph->daddr) {
-				mask = ifa->ifa_mask;
-				break;
-			}
-		} endfor_ifa(in_dev);
-	}
-	rcu_read_unlock();
-
-	if (mask == 0)
-		goto out;
-
-	exp = nf_ct_expect_alloc(ct);
-	if (exp == NULL)
-		goto out;
-
-	exp->tuple                = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
-	exp->tuple.src.u.udp.port = htons(NMBD_PORT);
-
-	exp->mask.src.u3.ip       = mask;
-	exp->mask.src.u.udp.port  = htons(0xFFFF);
-
-	exp->expectfn             = NULL;
-	exp->flags                = NF_CT_EXPECT_PERMANENT;
-	exp->class		  = NF_CT_EXPECT_CLASS_DEFAULT;
-	exp->helper               = NULL;
-
-	nf_ct_expect_related(exp);
-	nf_ct_expect_put(exp);
-
-	nf_ct_refresh(ct, skb, timeout * HZ);
-out:
-	return NF_ACCEPT;
-}
-
 static struct nf_conntrack_expect_policy exp_policy = {
 	.max_expected	= 1,
 };
 
+static int netbios_ns_help(struct sk_buff *skb, unsigned int protoff,
+		   struct nf_conn *ct, enum ip_conntrack_info ctinfo)
+{
+	return nf_conntrack_broadcast_help(skb, protoff, ct, ctinfo, timeout);
+}
+
 static struct nf_conntrack_helper helper __read_mostly = {
 	.name			= "netbios-ns",
-	.tuple.src.l3num	= AF_INET,
+	.tuple.src.l3num	= NFPROTO_IPV4,
 	.tuple.src.u.udp.port	= cpu_to_be16(NMBD_PORT),
 	.tuple.dst.protonum	= IPPROTO_UDP,
 	.me			= THIS_MODULE,
-	.help			= help,
+	.help			= netbios_ns_help,
 	.expect_policy		= &exp_policy,
 };
 
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index eead9db..30bf8a1 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -42,6 +42,7 @@
 #include <net/netfilter/nf_conntrack_tuple.h>
 #include <net/netfilter/nf_conntrack_acct.h>
 #include <net/netfilter/nf_conntrack_zones.h>
+#include <net/netfilter/nf_conntrack_timestamp.h>
 #ifdef CONFIG_NF_NAT_NEEDED
 #include <net/netfilter/nf_nat_core.h>
 #include <net/netfilter/nf_nat_protocol.h>
@@ -230,6 +231,33 @@ nla_put_failure:
 	return -1;
 }
 
+static int
+ctnetlink_dump_timestamp(struct sk_buff *skb, const struct nf_conn *ct)
+{
+	struct nlattr *nest_count;
+	const struct nf_conn_tstamp *tstamp;
+
+	tstamp = nf_conn_tstamp_find(ct);
+	if (!tstamp)
+		return 0;
+
+	nest_count = nla_nest_start(skb, CTA_TIMESTAMP | NLA_F_NESTED);
+	if (!nest_count)
+		goto nla_put_failure;
+
+	NLA_PUT_BE64(skb, CTA_TIMESTAMP_START, cpu_to_be64(tstamp->start));
+	if (tstamp->stop != 0) {
+		NLA_PUT_BE64(skb, CTA_TIMESTAMP_STOP,
+			     cpu_to_be64(tstamp->stop));
+	}
+	nla_nest_end(skb, nest_count);
+
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
 #ifdef CONFIG_NF_CONNTRACK_MARK
 static inline int
 ctnetlink_dump_mark(struct sk_buff *skb, const struct nf_conn *ct)
@@ -404,6 +432,7 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
 	    ctnetlink_dump_timeout(skb, ct) < 0 ||
 	    ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 ||
 	    ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0 ||
+	    ctnetlink_dump_timestamp(skb, ct) < 0 ||
 	    ctnetlink_dump_protoinfo(skb, ct) < 0 ||
 	    ctnetlink_dump_helpinfo(skb, ct) < 0 ||
 	    ctnetlink_dump_mark(skb, ct) < 0 ||
@@ -471,6 +500,18 @@ ctnetlink_secctx_size(const struct nf_conn *ct)
 }
 
 static inline size_t
+ctnetlink_timestamp_size(const struct nf_conn *ct)
+{
+#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
+	if (!nf_ct_ext_exist(ct, NF_CT_EXT_TSTAMP))
+		return 0;
+	return nla_total_size(0) + 2 * nla_total_size(sizeof(uint64_t));
+#else
+	return 0;
+#endif
+}
+
+static inline size_t
 ctnetlink_nlmsg_size(const struct nf_conn *ct)
 {
 	return NLMSG_ALIGN(sizeof(struct nfgenmsg))
@@ -481,6 +522,7 @@ ctnetlink_nlmsg_size(const struct nf_conn *ct)
 	       + nla_total_size(sizeof(u_int32_t)) /* CTA_ID */
 	       + nla_total_size(sizeof(u_int32_t)) /* CTA_STATUS */
 	       + ctnetlink_counters_size(ct)
+	       + ctnetlink_timestamp_size(ct)
 	       + nla_total_size(sizeof(u_int32_t)) /* CTA_TIMEOUT */
 	       + nla_total_size(0) /* CTA_PROTOINFO */
 	       + nla_total_size(0) /* CTA_HELP */
@@ -571,7 +613,8 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
 
 	if (events & (1 << IPCT_DESTROY)) {
 		if (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 ||
-		    ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0)
+		    ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0 ||
+		    ctnetlink_dump_timestamp(skb, ct) < 0)
 			goto nla_put_failure;
 	} else {
 		if (ctnetlink_dump_timeout(skb, ct) < 0)
@@ -761,7 +804,7 @@ static const struct nla_policy tuple_nla_policy[CTA_TUPLE_MAX+1] = {
 static int
 ctnetlink_parse_tuple(const struct nlattr * const cda[],
 		      struct nf_conntrack_tuple *tuple,
-		      enum ctattr_tuple type, u_int8_t l3num)
+		      enum ctattr_type type, u_int8_t l3num)
 {
 	struct nlattr *tb[CTA_TUPLE_MAX+1];
 	int err;
@@ -1358,6 +1401,7 @@ ctnetlink_create_conntrack(struct net *net, u16 zone,
 	}
 
 	nf_ct_acct_ext_add(ct, GFP_ATOMIC);
+	nf_ct_tstamp_ext_add(ct, GFP_ATOMIC);
 	nf_ct_ecache_ext_add(ct, 0, 0, GFP_ATOMIC);
 	/* we must add conntrack extensions before confirmation. */
 	ct->status |= IPS_CONFIRMED;
@@ -1376,6 +1420,7 @@ ctnetlink_create_conntrack(struct net *net, u16 zone,
 	}
 #endif
 
+	memset(&ct->proto, 0, sizeof(ct->proto));
 	if (cda[CTA_PROTOINFO]) {
 		err = ctnetlink_change_protoinfo(ct, cda);
 		if (err < 0)
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index dc7bb74..5701c8d 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -166,6 +166,7 @@ static void nf_ct_l3proto_unregister_sysctl(struct nf_conntrack_l3proto *l3proto
 int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto)
 {
 	int ret = 0;
+	struct nf_conntrack_l3proto *old;
 
 	if (proto->l3proto >= AF_MAX)
 		return -EBUSY;
@@ -174,7 +175,9 @@ int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto)
 		return -EINVAL;
 
 	mutex_lock(&nf_ct_proto_mutex);
-	if (nf_ct_l3protos[proto->l3proto] != &nf_conntrack_l3proto_generic) {
+	old = rcu_dereference_protected(nf_ct_l3protos[proto->l3proto],
+					lockdep_is_held(&nf_ct_proto_mutex));
+	if (old != &nf_conntrack_l3proto_generic) {
 		ret = -EBUSY;
 		goto out_unlock;
 	}
@@ -201,7 +204,9 @@ void nf_conntrack_l3proto_unregister(struct nf_conntrack_l3proto *proto)
 	BUG_ON(proto->l3proto >= AF_MAX);
 
 	mutex_lock(&nf_ct_proto_mutex);
-	BUG_ON(nf_ct_l3protos[proto->l3proto] != proto);
+	BUG_ON(rcu_dereference_protected(nf_ct_l3protos[proto->l3proto],
+					 lockdep_is_held(&nf_ct_proto_mutex)
+					 ) != proto);
 	rcu_assign_pointer(nf_ct_l3protos[proto->l3proto],
 			   &nf_conntrack_l3proto_generic);
 	nf_ct_l3proto_unregister_sysctl(proto);
@@ -279,7 +284,7 @@ int nf_conntrack_l4proto_register(struct nf_conntrack_l4proto *l4proto)
 	mutex_lock(&nf_ct_proto_mutex);
 	if (!nf_ct_protos[l4proto->l3proto]) {
 		/* l3proto may be loaded latter. */
-		struct nf_conntrack_l4proto **proto_array;
+		struct nf_conntrack_l4proto __rcu **proto_array;
 		int i;
 
 		proto_array = kmalloc(MAX_NF_CT_PROTO *
@@ -291,7 +296,7 @@ int nf_conntrack_l4proto_register(struct nf_conntrack_l4proto *l4proto)
 		}
 
 		for (i = 0; i < MAX_NF_CT_PROTO; i++)
-			proto_array[i] = &nf_conntrack_l4proto_generic;
+			RCU_INIT_POINTER(proto_array[i], &nf_conntrack_l4proto_generic);
 
 		/* Before making proto_array visible to lockless readers,
 		 * we must make sure its content is committed to memory.
@@ -299,8 +304,10 @@ int nf_conntrack_l4proto_register(struct nf_conntrack_l4proto *l4proto)
 		smp_wmb();
 
 		nf_ct_protos[l4proto->l3proto] = proto_array;
-	} else if (nf_ct_protos[l4proto->l3proto][l4proto->l4proto] !=
-					&nf_conntrack_l4proto_generic) {
+	} else if (rcu_dereference_protected(
+			nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
+			lockdep_is_held(&nf_ct_proto_mutex)
+			) != &nf_conntrack_l4proto_generic) {
 		ret = -EBUSY;
 		goto out_unlock;
 	}
@@ -331,7 +338,10 @@ void nf_conntrack_l4proto_unregister(struct nf_conntrack_l4proto *l4proto)
 	BUG_ON(l4proto->l3proto >= PF_MAX);
 
 	mutex_lock(&nf_ct_proto_mutex);
-	BUG_ON(nf_ct_protos[l4proto->l3proto][l4proto->l4proto] != l4proto);
+	BUG_ON(rcu_dereference_protected(
+			nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
+			lockdep_is_held(&nf_ct_proto_mutex)
+			) != l4proto);
 	rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
 			   &nf_conntrack_l4proto_generic);
 	nf_ct_l4proto_unregister_sysctl(l4proto);
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index 5292560..9ae57c5 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -452,6 +452,9 @@ static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb,
 	ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_CLIENT;
 	ct->proto.dccp.role[IP_CT_DIR_REPLY] = CT_DCCP_ROLE_SERVER;
 	ct->proto.dccp.state = CT_DCCP_NONE;
+	ct->proto.dccp.last_pkt = DCCP_PKT_REQUEST;
+	ct->proto.dccp.last_dir = IP_CT_DIR_ORIGINAL;
+	ct->proto.dccp.handshake_seq = 0;
 	return true;
 
 out_invalid:
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index c6049c2..6f4ee70 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -413,6 +413,7 @@ static bool sctp_new(struct nf_conn *ct, const struct sk_buff *skb,
 	    test_bit(SCTP_CID_COOKIE_ACK, map))
 		return false;
 
+	memset(&ct->proto.sctp, 0, sizeof(ct->proto.sctp));
 	new_state = SCTP_CONNTRACK_MAX;
 	for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) {
 		/* Don't need lock here: this conntrack not in circulation yet */
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 3fb2b73..37bf943 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -227,11 +227,11 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
  *	sCL -> sIV
  */
 /* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
-/*synack*/ { sIV, sSR, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sSR },
+/*synack*/ { sIV, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sIG, sSR },
 /*
  *	sSS -> sSR	Standard open.
  *	sS2 -> sSR	Simultaneous open
- *	sSR -> sSR	Retransmitted SYN/ACK.
+ *	sSR -> sIG	Retransmitted SYN/ACK, ignore it.
  *	sES -> sIG	Late retransmitted SYN/ACK?
  *	sFW -> sIG	Might be SYN/ACK answering ignored SYN
  *	sCW -> sIG
@@ -1066,9 +1066,7 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
 	BUG_ON(th == NULL);
 
 	/* Don't need lock here: this conntrack not in circulation yet */
-	new_state
-		= tcp_conntracks[0][get_conntrack_index(th)]
-		[TCP_CONNTRACK_NONE];
+	new_state = tcp_conntracks[0][get_conntrack_index(th)][TCP_CONNTRACK_NONE];
 
 	/* Invalid: delete conntrack */
 	if (new_state >= TCP_CONNTRACK_MAX) {
@@ -1077,6 +1075,7 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
 	}
 
 	if (new_state == TCP_CONNTRACK_SYN_SENT) {
+		memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
 		/* SYN packet */
 		ct->proto.tcp.seen[0].td_end =
 			segment_seq_plus_len(ntohl(th->seq), skb->len,
@@ -1088,11 +1087,11 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
 			ct->proto.tcp.seen[0].td_end;
 
 		tcp_options(skb, dataoff, th, &ct->proto.tcp.seen[0]);
-		ct->proto.tcp.seen[1].flags = 0;
 	} else if (nf_ct_tcp_loose == 0) {
 		/* Don't try to pick up connections. */
 		return false;
 	} else {
+		memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
 		/*
 		 * We are in the middle of a connection,
 		 * its history is lost for us.
@@ -1107,7 +1106,6 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
 		ct->proto.tcp.seen[0].td_maxend =
 			ct->proto.tcp.seen[0].td_end +
 			ct->proto.tcp.seen[0].td_maxwin;
-		ct->proto.tcp.seen[0].td_scale = 0;
 
 		/* We assume SACK and liberal window checking to handle
 		 * window scaling */
@@ -1116,13 +1114,7 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
 					      IP_CT_TCP_FLAG_BE_LIBERAL;
 	}
 
-	ct->proto.tcp.seen[1].td_end = 0;
-	ct->proto.tcp.seen[1].td_maxend = 0;
-	ct->proto.tcp.seen[1].td_maxwin = 0;
-	ct->proto.tcp.seen[1].td_scale = 0;
-
 	/* tcp_packet will set them */
-	ct->proto.tcp.state = TCP_CONNTRACK_NONE;
 	ct->proto.tcp.last_index = TCP_NONE_SET;
 
 	pr_debug("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i "
diff --git a/net/netfilter/nf_conntrack_snmp.c b/net/netfilter/nf_conntrack_snmp.c
new file mode 100644
index 0000000..6e545e2
--- /dev/null
+++ b/net/netfilter/nf_conntrack_snmp.c
@@ -0,0 +1,77 @@
+/*
+ *      SNMP service broadcast connection tracking helper
+ *
+ *      (c) 2011 Jiri Olsa <jolsa@redhat.com>
+ *
+ *      This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/in.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+
+#define SNMP_PORT	161
+
+MODULE_AUTHOR("Jiri Olsa <jolsa@redhat.com>");
+MODULE_DESCRIPTION("SNMP service broadcast connection tracking helper");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NFCT_HELPER("snmp");
+
+static unsigned int timeout __read_mostly = 30;
+module_param(timeout, uint, S_IRUSR);
+MODULE_PARM_DESC(timeout, "timeout for master connection/replies in seconds");
+
+int (*nf_nat_snmp_hook)(struct sk_buff *skb,
+			unsigned int protoff,
+			struct nf_conn *ct,
+			enum ip_conntrack_info ctinfo);
+EXPORT_SYMBOL_GPL(nf_nat_snmp_hook);
+
+static int snmp_conntrack_help(struct sk_buff *skb, unsigned int protoff,
+		struct nf_conn *ct, enum ip_conntrack_info ctinfo)
+{
+	typeof(nf_nat_snmp_hook) nf_nat_snmp;
+
+	nf_conntrack_broadcast_help(skb, protoff, ct, ctinfo, timeout);
+
+	nf_nat_snmp = rcu_dereference(nf_nat_snmp_hook);
+	if (nf_nat_snmp && ct->status & IPS_NAT_MASK)
+		return nf_nat_snmp(skb, protoff, ct, ctinfo);
+
+	return NF_ACCEPT;
+}
+
+static struct nf_conntrack_expect_policy exp_policy = {
+	.max_expected	= 1,
+};
+
+static struct nf_conntrack_helper helper __read_mostly = {
+	.name			= "snmp",
+	.tuple.src.l3num	= NFPROTO_IPV4,
+	.tuple.src.u.udp.port	= cpu_to_be16(SNMP_PORT),
+	.tuple.dst.protonum	= IPPROTO_UDP,
+	.me			= THIS_MODULE,
+	.help			= snmp_conntrack_help,
+	.expect_policy		= &exp_policy,
+};
+
+static int __init nf_conntrack_snmp_init(void)
+{
+	exp_policy.timeout = timeout;
+	return nf_conntrack_helper_register(&helper);
+}
+
+static void __exit nf_conntrack_snmp_fini(void)
+{
+	nf_conntrack_helper_unregister(&helper);
+}
+
+module_init(nf_conntrack_snmp_init);
+module_exit(nf_conntrack_snmp_fini);
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index b4d7f0f..0ae1428 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -29,6 +29,8 @@
 #include <net/netfilter/nf_conntrack_helper.h>
 #include <net/netfilter/nf_conntrack_acct.h>
 #include <net/netfilter/nf_conntrack_zones.h>
+#include <net/netfilter/nf_conntrack_timestamp.h>
+#include <linux/rculist_nulls.h>
 
 MODULE_LICENSE("GPL");
 
@@ -45,6 +47,7 @@ EXPORT_SYMBOL_GPL(print_tuple);
 struct ct_iter_state {
 	struct seq_net_private p;
 	unsigned int bucket;
+	u_int64_t time_now;
 };
 
 static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
@@ -56,7 +59,7 @@ static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
 	for (st->bucket = 0;
 	     st->bucket < net->ct.htable_size;
 	     st->bucket++) {
-		n = rcu_dereference(net->ct.hash[st->bucket].first);
+		n = rcu_dereference(hlist_nulls_first_rcu(&net->ct.hash[st->bucket]));
 		if (!is_a_nulls(n))
 			return n;
 	}
@@ -69,13 +72,15 @@ static struct hlist_nulls_node *ct_get_next(struct seq_file *seq,
 	struct net *net = seq_file_net(seq);
 	struct ct_iter_state *st = seq->private;
 
-	head = rcu_dereference(head->next);
+	head = rcu_dereference(hlist_nulls_next_rcu(head));
 	while (is_a_nulls(head)) {
 		if (likely(get_nulls_value(head) == st->bucket)) {
 			if (++st->bucket >= net->ct.htable_size)
 				return NULL;
 		}
-		head = rcu_dereference(net->ct.hash[st->bucket].first);
+		head = rcu_dereference(
+				hlist_nulls_first_rcu(
+					&net->ct.hash[st->bucket]));
 	}
 	return head;
 }
@@ -93,6 +98,9 @@ static struct hlist_nulls_node *ct_get_idx(struct seq_file *seq, loff_t pos)
 static void *ct_seq_start(struct seq_file *seq, loff_t *pos)
 	__acquires(RCU)
 {
+	struct ct_iter_state *st = seq->private;
+
+	st->time_now = ktime_to_ns(ktime_get_real());
 	rcu_read_lock();
 	return ct_get_idx(seq, *pos);
 }
@@ -132,6 +140,34 @@ static inline int ct_show_secctx(struct seq_file *s, const struct nf_conn *ct)
 }
 #endif
 
+#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
+static int ct_show_delta_time(struct seq_file *s, const struct nf_conn *ct)
+{
+	struct ct_iter_state *st = s->private;
+	struct nf_conn_tstamp *tstamp;
+	s64 delta_time;
+
+	tstamp = nf_conn_tstamp_find(ct);
+	if (tstamp) {
+		delta_time = st->time_now - tstamp->start;
+		if (delta_time > 0)
+			delta_time = div_s64(delta_time, NSEC_PER_SEC);
+		else
+			delta_time = 0;
+
+		return seq_printf(s, "delta-time=%llu ",
+				  (unsigned long long)delta_time);
+	}
+	return 0;
+}
+#else
+static inline int
+ct_show_delta_time(struct seq_file *s, const struct nf_conn *ct)
+{
+	return 0;
+}
+#endif
+
 /* return 0 on success, 1 in case of error */
 static int ct_seq_show(struct seq_file *s, void *v)
 {
@@ -200,6 +236,9 @@ static int ct_seq_show(struct seq_file *s, void *v)
 		goto release;
 #endif
 
+	if (ct_show_delta_time(s, ct))
+		goto release;
+
 	if (seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use)))
 		goto release;
 
diff --git a/net/netfilter/nf_conntrack_timestamp.c b/net/netfilter/nf_conntrack_timestamp.c
new file mode 100644
index 0000000..af7dd31
--- /dev/null
+++ b/net/netfilter/nf_conntrack_timestamp.c
@@ -0,0 +1,120 @@
+/*
+ * (C) 2010 Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation (or any later at your option).
+ */
+
+#include <linux/netfilter.h>
+#include <linux/slab.h>
+#include <linux/kernel.h>
+#include <linux/moduleparam.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_extend.h>
+#include <net/netfilter/nf_conntrack_timestamp.h>
+
+static int nf_ct_tstamp __read_mostly;
+
+module_param_named(tstamp, nf_ct_tstamp, bool, 0644);
+MODULE_PARM_DESC(tstamp, "Enable connection tracking flow timestamping.");
+
+#ifdef CONFIG_SYSCTL
+static struct ctl_table tstamp_sysctl_table[] = {
+	{
+		.procname	= "nf_conntrack_timestamp",
+		.data		= &init_net.ct.sysctl_tstamp,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{}
+};
+#endif /* CONFIG_SYSCTL */
+
+static struct nf_ct_ext_type tstamp_extend __read_mostly = {
+	.len	= sizeof(struct nf_conn_tstamp),
+	.align	= __alignof__(struct nf_conn_tstamp),
+	.id	= NF_CT_EXT_TSTAMP,
+};
+
+#ifdef CONFIG_SYSCTL
+static int nf_conntrack_tstamp_init_sysctl(struct net *net)
+{
+	struct ctl_table *table;
+
+	table = kmemdup(tstamp_sysctl_table, sizeof(tstamp_sysctl_table),
+			GFP_KERNEL);
+	if (!table)
+		goto out;
+
+	table[0].data = &net->ct.sysctl_tstamp;
+
+	net->ct.tstamp_sysctl_header = register_net_sysctl_table(net,
+			nf_net_netfilter_sysctl_path, table);
+	if (!net->ct.tstamp_sysctl_header) {
+		printk(KERN_ERR "nf_ct_tstamp: can't register to sysctl.\n");
+		goto out_register;
+	}
+	return 0;
+
+out_register:
+	kfree(table);
+out:
+	return -ENOMEM;
+}
+
+static void nf_conntrack_tstamp_fini_sysctl(struct net *net)
+{
+	struct ctl_table *table;
+
+	table = net->ct.tstamp_sysctl_header->ctl_table_arg;
+	unregister_net_sysctl_table(net->ct.tstamp_sysctl_header);
+	kfree(table);
+}
+#else
+static int nf_conntrack_tstamp_init_sysctl(struct net *net)
+{
+	return 0;
+}
+
+static void nf_conntrack_tstamp_fini_sysctl(struct net *net)
+{
+}
+#endif
+
+int nf_conntrack_tstamp_init(struct net *net)
+{
+	int ret;
+
+	net->ct.sysctl_tstamp = nf_ct_tstamp;
+
+	if (net_eq(net, &init_net)) {
+		ret = nf_ct_extend_register(&tstamp_extend);
+		if (ret < 0) {
+			printk(KERN_ERR "nf_ct_tstamp: Unable to register "
+					"extension\n");
+			goto out_extend_register;
+		}
+	}
+
+	ret = nf_conntrack_tstamp_init_sysctl(net);
+	if (ret < 0)
+		goto out_sysctl;
+
+	return 0;
+
+out_sysctl:
+	if (net_eq(net, &init_net))
+		nf_ct_extend_unregister(&tstamp_extend);
+out_extend_register:
+	return ret;
+}
+
+void nf_conntrack_tstamp_fini(struct net *net)
+{
+	nf_conntrack_tstamp_fini_sysctl(net);
+	if (net_eq(net, &init_net))
+		nf_ct_extend_unregister(&tstamp_extend);
+}
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index 9181699..20714ed 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -165,7 +165,8 @@ static int seq_show(struct seq_file *s, void *v)
 	struct nf_logger *t;
 	int ret;
 
-	logger = nf_loggers[*pos];
+	logger = rcu_dereference_protected(nf_loggers[*pos],
+					   lockdep_is_held(&nf_log_mutex));
 
 	if (!logger)
 		ret = seq_printf(s, "%2lld NONE (", *pos);
@@ -253,7 +254,8 @@ static int nf_log_proc_dostring(ctl_table *table, int write,
 		mutex_unlock(&nf_log_mutex);
 	} else {
 		mutex_lock(&nf_log_mutex);
-		logger = nf_loggers[tindex];
+		logger = rcu_dereference_protected(nf_loggers[tindex],
+						   lockdep_is_held(&nf_log_mutex));
 		if (!logger)
 			table->data = "NONE";
 		else
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 74aebed5..5ab22e2 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -27,14 +27,17 @@ static DEFINE_MUTEX(queue_handler_mutex);
 int nf_register_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh)
 {
 	int ret;
+	const struct nf_queue_handler *old;
 
 	if (pf >= ARRAY_SIZE(queue_handler))
 		return -EINVAL;
 
 	mutex_lock(&queue_handler_mutex);
-	if (queue_handler[pf] == qh)
+	old = rcu_dereference_protected(queue_handler[pf],
+					lockdep_is_held(&queue_handler_mutex));
+	if (old == qh)
 		ret = -EEXIST;
-	else if (queue_handler[pf])
+	else if (old)
 		ret = -EBUSY;
 	else {
 		rcu_assign_pointer(queue_handler[pf], qh);
@@ -49,11 +52,15 @@ EXPORT_SYMBOL(nf_register_queue_handler);
 /* The caller must flush their queue before this */
 int nf_unregister_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh)
 {
+	const struct nf_queue_handler *old;
+
 	if (pf >= ARRAY_SIZE(queue_handler))
 		return -EINVAL;
 
 	mutex_lock(&queue_handler_mutex);
-	if (queue_handler[pf] && queue_handler[pf] != qh) {
+	old = rcu_dereference_protected(queue_handler[pf],
+					lockdep_is_held(&queue_handler_mutex));
+	if (old && old != qh) {
 		mutex_unlock(&queue_handler_mutex);
 		return -EINVAL;
 	}
@@ -73,7 +80,10 @@ void nf_unregister_queue_handlers(const struct nf_queue_handler *qh)
 
 	mutex_lock(&queue_handler_mutex);
 	for (pf = 0; pf < ARRAY_SIZE(queue_handler); pf++)  {
-		if (queue_handler[pf] == qh)
+		if (rcu_dereference_protected(
+				queue_handler[pf],
+				lockdep_is_held(&queue_handler_mutex)
+				) == qh)
 			rcu_assign_pointer(queue_handler[pf], NULL);
 	}
 	mutex_unlock(&queue_handler_mutex);
@@ -115,7 +125,7 @@ static int __nf_queue(struct sk_buff *skb,
 		      int (*okfn)(struct sk_buff *),
 		      unsigned int queuenum)
 {
-	int status;
+	int status = -ENOENT;
 	struct nf_queue_entry *entry = NULL;
 #ifdef CONFIG_BRIDGE_NETFILTER
 	struct net_device *physindev;
@@ -128,16 +138,20 @@ static int __nf_queue(struct sk_buff *skb,
 	rcu_read_lock();
 
 	qh = rcu_dereference(queue_handler[pf]);
-	if (!qh)
+	if (!qh) {
+		status = -ESRCH;
 		goto err_unlock;
+	}
 
 	afinfo = nf_get_afinfo(pf);
 	if (!afinfo)
 		goto err_unlock;
 
 	entry = kmalloc(sizeof(*entry) + afinfo->route_key_size, GFP_ATOMIC);
-	if (!entry)
+	if (!entry) {
+		status = -ENOMEM;
 		goto err_unlock;
+	}
 
 	*entry = (struct nf_queue_entry) {
 		.skb	= skb,
@@ -151,11 +165,9 @@ static int __nf_queue(struct sk_buff *skb,
 
 	/* If it's going away, ignore hook. */
 	if (!try_module_get(entry->elem->owner)) {
-		rcu_read_unlock();
-		kfree(entry);
-		return 0;
+		status = -ECANCELED;
+		goto err_unlock;
 	}
-
 	/* Bump dev refs so they don't vanish while packet is out */
 	if (indev)
 		dev_hold(indev);
@@ -182,14 +194,13 @@ static int __nf_queue(struct sk_buff *skb,
 		goto err;
 	}
 
-	return 1;
+	return 0;
 
 err_unlock:
 	rcu_read_unlock();
 err:
-	kfree_skb(skb);
 	kfree(entry);
-	return 1;
+	return status;
 }
 
 int nf_queue(struct sk_buff *skb,
@@ -201,6 +212,8 @@ int nf_queue(struct sk_buff *skb,
 	     unsigned int queuenum)
 {
 	struct sk_buff *segs;
+	int err;
+	unsigned int queued;
 
 	if (!skb_is_gso(skb))
 		return __nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
@@ -216,20 +229,35 @@ int nf_queue(struct sk_buff *skb,
 	}
 
 	segs = skb_gso_segment(skb, 0);
-	kfree_skb(skb);
+	/* Does not use PTR_ERR to limit the number of error codes that can be
+	 * returned by nf_queue.  For instance, callers rely on -ECANCELED to mean
+	 * 'ignore this hook'.
+	 */
 	if (IS_ERR(segs))
-		return 1;
+		return -EINVAL;
 
+	queued = 0;
+	err = 0;
 	do {
 		struct sk_buff *nskb = segs->next;
 
 		segs->next = NULL;
-		if (!__nf_queue(segs, elem, pf, hook, indev, outdev, okfn,
-				queuenum))
+		if (err == 0)
+			err = __nf_queue(segs, elem, pf, hook, indev,
+					   outdev, okfn, queuenum);
+		if (err == 0)
+			queued++;
+		else
 			kfree_skb(segs);
 		segs = nskb;
 	} while (segs);
-	return 1;
+
+	/* also free orig skb if only some segments were queued */
+	if (unlikely(err && queued))
+		err = 0;
+	if (err == 0)
+		kfree_skb(skb);
+	return err;
 }
 
 void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
@@ -237,6 +265,7 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
 	struct sk_buff *skb = entry->skb;
 	struct list_head *elem = &entry->elem->list;
 	const struct nf_afinfo *afinfo;
+	int err;
 
 	rcu_read_lock();
 
@@ -270,10 +299,17 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
 		local_bh_enable();
 		break;
 	case NF_QUEUE:
-		if (!__nf_queue(skb, elem, entry->pf, entry->hook,
-				entry->indev, entry->outdev, entry->okfn,
-				verdict >> NF_VERDICT_BITS))
-			goto next_hook;
+		err = __nf_queue(skb, elem, entry->pf, entry->hook,
+				 entry->indev, entry->outdev, entry->okfn,
+				 verdict >> NF_VERDICT_QBITS);
+		if (err < 0) {
+			if (err == -ECANCELED)
+				goto next_hook;
+			if (err == -ESRCH &&
+			   (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS))
+				goto next_hook;
+			kfree_skb(skb);
+		}
 		break;
 	case NF_STOLEN:
 	default:
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 6a1572b..985e9b7 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -376,7 +376,6 @@ __build_packet_message(struct nfulnl_instance *inst,
 			unsigned int hooknum,
 			const struct net_device *indev,
 			const struct net_device *outdev,
-			const struct nf_loginfo *li,
 			const char *prefix, unsigned int plen)
 {
 	struct nfulnl_msg_packet_hdr pmsg;
@@ -652,7 +651,7 @@ nfulnl_log_packet(u_int8_t pf,
 	inst->qlen++;
 
 	__build_packet_message(inst, skb, data_len, pf,
-				hooknum, in, out, li, prefix, plen);
+				hooknum, in, out, prefix, plen);
 
 	if (inst->qlen >= qthreshold)
 		__nfulnl_flush(inst);
@@ -874,19 +873,19 @@ static struct hlist_node *get_first(struct iter_state *st)
 
 	for (st->bucket = 0; st->bucket < INSTANCE_BUCKETS; st->bucket++) {
 		if (!hlist_empty(&instance_table[st->bucket]))
-			return rcu_dereference_bh(instance_table[st->bucket].first);
+			return rcu_dereference_bh(hlist_first_rcu(&instance_table[st->bucket]));
 	}
 	return NULL;
 }
 
 static struct hlist_node *get_next(struct iter_state *st, struct hlist_node *h)
 {
-	h = rcu_dereference_bh(h->next);
+	h = rcu_dereference_bh(hlist_next_rcu(h));
 	while (!h) {
 		if (++st->bucket >= INSTANCE_BUCKETS)
 			return NULL;
 
-		h = rcu_dereference_bh(instance_table[st->bucket].first);
+		h = rcu_dereference_bh(hlist_first_rcu(&instance_table[st->bucket]));
 	}
 	return h;
 }
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 68e67d1..b83123f 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -387,25 +387,31 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
 {
 	struct sk_buff *nskb;
 	struct nfqnl_instance *queue;
-	int err;
+	int err = -ENOBUFS;
 
 	/* rcu_read_lock()ed by nf_hook_slow() */
 	queue = instance_lookup(queuenum);
-	if (!queue)
+	if (!queue) {
+		err = -ESRCH;
 		goto err_out;
+	}
 
-	if (queue->copy_mode == NFQNL_COPY_NONE)
+	if (queue->copy_mode == NFQNL_COPY_NONE) {
+		err = -EINVAL;
 		goto err_out;
+	}
 
 	nskb = nfqnl_build_packet_message(queue, entry);
-	if (nskb == NULL)
+	if (nskb == NULL) {
+		err = -ENOMEM;
 		goto err_out;
-
+	}
 	spin_lock_bh(&queue->lock);
 
-	if (!queue->peer_pid)
+	if (!queue->peer_pid) {
+		err = -EINVAL;
 		goto err_out_free_nskb;
-
+	}
 	if (queue->queue_total >= queue->queue_maxlen) {
 		queue->queue_dropped++;
 		if (net_ratelimit())
@@ -432,7 +438,7 @@ err_out_free_nskb:
 err_out_unlock:
 	spin_unlock_bh(&queue->lock);
 err_out:
-	return -1;
+	return err;
 }
 
 static int
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index c942376..0a77d2f 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -23,6 +23,7 @@
 #include <linux/mutex.h>
 #include <linux/mm.h>
 #include <linux/slab.h>
+#include <linux/audit.h>
 #include <net/net_namespace.h>
 
 #include <linux/netfilter/x_tables.h>
@@ -38,9 +39,8 @@ MODULE_DESCRIPTION("{ip,ip6,arp,eb}_tables backend module");
 #define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
 
 struct compat_delta {
-	struct compat_delta *next;
-	unsigned int offset;
-	int delta;
+	unsigned int offset; /* offset in kernel */
+	int delta; /* delta in 32bit user land */
 };
 
 struct xt_af {
@@ -49,7 +49,9 @@ struct xt_af {
 	struct list_head target;
 #ifdef CONFIG_COMPAT
 	struct mutex compat_mutex;
-	struct compat_delta *compat_offsets;
+	struct compat_delta *compat_tab;
+	unsigned int number; /* number of slots in compat_tab[] */
+	unsigned int cur; /* number of used slots in compat_tab[] */
 #endif
 };
 
@@ -414,54 +416,67 @@ int xt_check_match(struct xt_mtchk_param *par,
 EXPORT_SYMBOL_GPL(xt_check_match);
 
 #ifdef CONFIG_COMPAT
-int xt_compat_add_offset(u_int8_t af, unsigned int offset, short delta)
+int xt_compat_add_offset(u_int8_t af, unsigned int offset, int delta)
 {
-	struct compat_delta *tmp;
+	struct xt_af *xp = &xt[af];
 
-	tmp = kmalloc(sizeof(struct compat_delta), GFP_KERNEL);
-	if (!tmp)
-		return -ENOMEM;
+	if (!xp->compat_tab) {
+		if (!xp->number)
+			return -EINVAL;
+		xp->compat_tab = vmalloc(sizeof(struct compat_delta) * xp->number);
+		if (!xp->compat_tab)
+			return -ENOMEM;
+		xp->cur = 0;
+	}
 
-	tmp->offset = offset;
-	tmp->delta = delta;
+	if (xp->cur >= xp->number)
+		return -EINVAL;
 
-	if (xt[af].compat_offsets) {
-		tmp->next = xt[af].compat_offsets->next;
-		xt[af].compat_offsets->next = tmp;
-	} else {
-		xt[af].compat_offsets = tmp;
-		tmp->next = NULL;
-	}
+	if (xp->cur)
+		delta += xp->compat_tab[xp->cur - 1].delta;
+	xp->compat_tab[xp->cur].offset = offset;
+	xp->compat_tab[xp->cur].delta = delta;
+	xp->cur++;
 	return 0;
 }
 EXPORT_SYMBOL_GPL(xt_compat_add_offset);
 
 void xt_compat_flush_offsets(u_int8_t af)
 {
-	struct compat_delta *tmp, *next;
-
-	if (xt[af].compat_offsets) {
-		for (tmp = xt[af].compat_offsets; tmp; tmp = next) {
-			next = tmp->next;
-			kfree(tmp);
-		}
-		xt[af].compat_offsets = NULL;
+	if (xt[af].compat_tab) {
+		vfree(xt[af].compat_tab);
+		xt[af].compat_tab = NULL;
+		xt[af].number = 0;
 	}
 }
 EXPORT_SYMBOL_GPL(xt_compat_flush_offsets);
 
 int xt_compat_calc_jump(u_int8_t af, unsigned int offset)
 {
-	struct compat_delta *tmp;
-	int delta;
-
-	for (tmp = xt[af].compat_offsets, delta = 0; tmp; tmp = tmp->next)
-		if (tmp->offset < offset)
-			delta += tmp->delta;
-	return delta;
+	struct compat_delta *tmp = xt[af].compat_tab;
+	int mid, left = 0, right = xt[af].cur - 1;
+
+	while (left <= right) {
+		mid = (left + right) >> 1;
+		if (offset > tmp[mid].offset)
+			left = mid + 1;
+		else if (offset < tmp[mid].offset)
+			right = mid - 1;
+		else
+			return mid ? tmp[mid - 1].delta : 0;
+	}
+	WARN_ON_ONCE(1);
+	return 0;
 }
 EXPORT_SYMBOL_GPL(xt_compat_calc_jump);
 
+void xt_compat_init_offsets(u_int8_t af, unsigned int number)
+{
+	xt[af].number = number;
+	xt[af].cur = 0;
+}
+EXPORT_SYMBOL(xt_compat_init_offsets);
+
 int xt_compat_match_offset(const struct xt_match *match)
 {
 	u_int16_t csize = match->compatsize ? : match->matchsize;
@@ -820,6 +835,21 @@ xt_replace_table(struct xt_table *table,
 	 */
 	local_bh_enable();
 
+#ifdef CONFIG_AUDIT
+	if (audit_enabled) {
+		struct audit_buffer *ab;
+
+		ab = audit_log_start(current->audit_context, GFP_KERNEL,
+				     AUDIT_NETFILTER_CFG);
+		if (ab) {
+			audit_log_format(ab, "table=%s family=%u entries=%u",
+					 table->name, table->af,
+					 private->number);
+			audit_log_end(ab);
+		}
+	}
+#endif
+
 	return private;
 }
 EXPORT_SYMBOL_GPL(xt_replace_table);
@@ -1338,7 +1368,7 @@ static int __init xt_init(void)
 		mutex_init(&xt[i].mutex);
 #ifdef CONFIG_COMPAT
 		mutex_init(&xt[i].compat_mutex);
-		xt[i].compat_offsets = NULL;
+		xt[i].compat_tab = NULL;
 #endif
 		INIT_LIST_HEAD(&xt[i].target);
 		INIT_LIST_HEAD(&xt[i].match);
diff --git a/net/netfilter/xt_AUDIT.c b/net/netfilter/xt_AUDIT.c
new file mode 100644
index 0000000..81802d2
--- /dev/null
+++ b/net/netfilter/xt_AUDIT.c
@@ -0,0 +1,204 @@
+/*
+ * Creates audit record for dropped/accepted packets
+ *
+ * (C) 2010-2011 Thomas Graf <tgraf@redhat.com>
+ * (C) 2010-2011 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+*/
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/audit.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/if_arp.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_AUDIT.h>
+#include <net/ipv6.h>
+#include <net/ip.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Thomas Graf <tgraf@redhat.com>");
+MODULE_DESCRIPTION("Xtables: creates audit records for dropped/accepted packets");
+MODULE_ALIAS("ipt_AUDIT");
+MODULE_ALIAS("ip6t_AUDIT");
+MODULE_ALIAS("ebt_AUDIT");
+MODULE_ALIAS("arpt_AUDIT");
+
+static void audit_proto(struct audit_buffer *ab, struct sk_buff *skb,
+			unsigned int proto, unsigned int offset)
+{
+	switch (proto) {
+	case IPPROTO_TCP:
+	case IPPROTO_UDP:
+	case IPPROTO_UDPLITE: {
+		const __be16 *pptr;
+		__be16 _ports[2];
+
+		pptr = skb_header_pointer(skb, offset, sizeof(_ports), _ports);
+		if (pptr == NULL) {
+			audit_log_format(ab, " truncated=1");
+			return;
+		}
+
+		audit_log_format(ab, " sport=%hu dport=%hu",
+				 ntohs(pptr[0]), ntohs(pptr[1]));
+		}
+		break;
+
+	case IPPROTO_ICMP:
+	case IPPROTO_ICMPV6: {
+		const u8 *iptr;
+		u8 _ih[2];
+
+		iptr = skb_header_pointer(skb, offset, sizeof(_ih), &_ih);
+		if (iptr == NULL) {
+			audit_log_format(ab, " truncated=1");
+			return;
+		}
+
+		audit_log_format(ab, " icmptype=%hhu icmpcode=%hhu",
+				 iptr[0], iptr[1]);
+
+		}
+		break;
+	}
+}
+
+static void audit_ip4(struct audit_buffer *ab, struct sk_buff *skb)
+{
+	struct iphdr _iph;
+	const struct iphdr *ih;
+
+	ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph);
+	if (!ih) {
+		audit_log_format(ab, " truncated=1");
+		return;
+	}
+
+	audit_log_format(ab, " saddr=%pI4 daddr=%pI4 ipid=%hu proto=%hhu",
+		&ih->saddr, &ih->daddr, ntohs(ih->id), ih->protocol);
+
+	if (ntohs(ih->frag_off) & IP_OFFSET) {
+		audit_log_format(ab, " frag=1");
+		return;
+	}
+
+	audit_proto(ab, skb, ih->protocol, ih->ihl * 4);
+}
+
+static void audit_ip6(struct audit_buffer *ab, struct sk_buff *skb)
+{
+	struct ipv6hdr _ip6h;
+	const struct ipv6hdr *ih;
+	u8 nexthdr;
+	int offset;
+
+	ih = skb_header_pointer(skb, skb_network_offset(skb), sizeof(_ip6h), &_ip6h);
+	if (!ih) {
+		audit_log_format(ab, " truncated=1");
+		return;
+	}
+
+	nexthdr = ih->nexthdr;
+	offset = ipv6_skip_exthdr(skb, skb_network_offset(skb) + sizeof(_ip6h),
+				  &nexthdr);
+
+	audit_log_format(ab, " saddr=%pI6c daddr=%pI6c proto=%hhu",
+			 &ih->saddr, &ih->daddr, nexthdr);
+
+	if (offset)
+		audit_proto(ab, skb, nexthdr, offset);
+}
+
+static unsigned int
+audit_tg(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	const struct xt_audit_info *info = par->targinfo;
+	struct audit_buffer *ab;
+
+	ab = audit_log_start(NULL, GFP_ATOMIC, AUDIT_NETFILTER_PKT);
+	if (ab == NULL)
+		goto errout;
+
+	audit_log_format(ab, "action=%hhu hook=%u len=%u inif=%s outif=%s",
+			 info->type, par->hooknum, skb->len,
+			 par->in ? par->in->name : "?",
+			 par->out ? par->out->name : "?");
+
+	if (skb->mark)
+		audit_log_format(ab, " mark=%#x", skb->mark);
+
+	if (skb->dev && skb->dev->type == ARPHRD_ETHER) {
+		audit_log_format(ab, " smac=%pM dmac=%pM macproto=0x%04x",
+				 eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest,
+				 ntohs(eth_hdr(skb)->h_proto));
+
+		if (par->family == NFPROTO_BRIDGE) {
+			switch (eth_hdr(skb)->h_proto) {
+			case __constant_htons(ETH_P_IP):
+				audit_ip4(ab, skb);
+				break;
+
+			case __constant_htons(ETH_P_IPV6):
+				audit_ip6(ab, skb);
+				break;
+			}
+		}
+	}
+
+	switch (par->family) {
+	case NFPROTO_IPV4:
+		audit_ip4(ab, skb);
+		break;
+
+	case NFPROTO_IPV6:
+		audit_ip6(ab, skb);
+		break;
+	}
+
+	audit_log_end(ab);
+
+errout:
+	return XT_CONTINUE;
+}
+
+static int audit_tg_check(const struct xt_tgchk_param *par)
+{
+	const struct xt_audit_info *info = par->targinfo;
+
+	if (info->type > XT_AUDIT_TYPE_MAX) {
+		pr_info("Audit type out of range (valid range: 0..%hhu)\n",
+			XT_AUDIT_TYPE_MAX);
+		return -ERANGE;
+	}
+
+	return 0;
+}
+
+static struct xt_target audit_tg_reg __read_mostly = {
+	.name		= "AUDIT",
+	.family		= NFPROTO_UNSPEC,
+	.target		= audit_tg,
+	.targetsize	= sizeof(struct xt_audit_info),
+	.checkentry	= audit_tg_check,
+	.me		= THIS_MODULE,
+};
+
+static int __init audit_tg_init(void)
+{
+	return xt_register_target(&audit_tg_reg);
+}
+
+static void __exit audit_tg_exit(void)
+{
+	xt_unregister_target(&audit_tg_reg);
+}
+
+module_init(audit_tg_init);
+module_exit(audit_tg_exit);
diff --git a/net/netfilter/xt_CLASSIFY.c b/net/netfilter/xt_CLASSIFY.c
index c2c0e4a..af9c4da 100644
--- a/net/netfilter/xt_CLASSIFY.c
+++ b/net/netfilter/xt_CLASSIFY.c
@@ -19,12 +19,14 @@
 #include <linux/netfilter_ipv6.h>
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter/xt_CLASSIFY.h>
+#include <linux/netfilter_arp.h>
 
 MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Xtables: Qdisc classification");
 MODULE_ALIAS("ipt_CLASSIFY");
 MODULE_ALIAS("ip6t_CLASSIFY");
+MODULE_ALIAS("arpt_CLASSIFY");
 
 static unsigned int
 classify_tg(struct sk_buff *skb, const struct xt_action_param *par)
@@ -35,26 +37,36 @@ classify_tg(struct sk_buff *skb, const struct xt_action_param *par)
 	return XT_CONTINUE;
 }
 
-static struct xt_target classify_tg_reg __read_mostly = {
-	.name       = "CLASSIFY",
-	.revision   = 0,
-	.family     = NFPROTO_UNSPEC,
-	.table      = "mangle",
-	.hooks      = (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_FORWARD) |
-		      (1 << NF_INET_POST_ROUTING),
-	.target     = classify_tg,
-	.targetsize = sizeof(struct xt_classify_target_info),
-	.me         = THIS_MODULE,
+static struct xt_target classify_tg_reg[] __read_mostly = {
+	{
+		.name       = "CLASSIFY",
+		.revision   = 0,
+		.family     = NFPROTO_UNSPEC,
+		.hooks      = (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_FORWARD) |
+		              (1 << NF_INET_POST_ROUTING),
+		.target     = classify_tg,
+		.targetsize = sizeof(struct xt_classify_target_info),
+		.me         = THIS_MODULE,
+	},
+	{
+		.name       = "CLASSIFY",
+		.revision   = 0,
+		.family     = NFPROTO_ARP,
+		.hooks      = (1 << NF_ARP_OUT) | (1 << NF_ARP_FORWARD),
+		.target     = classify_tg,
+		.targetsize = sizeof(struct xt_classify_target_info),
+		.me         = THIS_MODULE,
+	},
 };
 
 static int __init classify_tg_init(void)
 {
-	return xt_register_target(&classify_tg_reg);
+	return xt_register_targets(classify_tg_reg, ARRAY_SIZE(classify_tg_reg));
 }
 
 static void __exit classify_tg_exit(void)
 {
-	xt_unregister_target(&classify_tg_reg);
+	xt_unregister_targets(classify_tg_reg, ARRAY_SIZE(classify_tg_reg));
 }
 
 module_init(classify_tg_init);
diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c
index be1f22e..3bdd443 100644
--- a/net/netfilter/xt_IDLETIMER.c
+++ b/net/netfilter/xt_IDLETIMER.c
@@ -313,3 +313,5 @@ MODULE_AUTHOR("Timo Teras <ext-timo.teras@nokia.com>");
 MODULE_AUTHOR("Luciano Coelho <luciano.coelho@nokia.com>");
 MODULE_DESCRIPTION("Xtables: idle time monitor");
 MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("ipt_IDLETIMER");
+MODULE_ALIAS("ip6t_IDLETIMER");
diff --git a/net/netfilter/xt_LED.c b/net/netfilter/xt_LED.c
index a414050..993de2b 100644
--- a/net/netfilter/xt_LED.c
+++ b/net/netfilter/xt_LED.c
@@ -31,6 +31,8 @@
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Adam Nielsen <a.nielsen@shikadi.net>");
 MODULE_DESCRIPTION("Xtables: trigger LED devices on packet match");
+MODULE_ALIAS("ipt_LED");
+MODULE_ALIAS("ip6t_LED");
 
 static LIST_HEAD(xt_led_triggers);
 static DEFINE_MUTEX(xt_led_mutex);
diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c
index 039cce1..d4f4b5d 100644
--- a/net/netfilter/xt_NFQUEUE.c
+++ b/net/netfilter/xt_NFQUEUE.c
@@ -72,18 +72,31 @@ nfqueue_tg_v1(struct sk_buff *skb, const struct xt_action_param *par)
 
 	if (info->queues_total > 1) {
 		if (par->family == NFPROTO_IPV4)
-			queue = hash_v4(skb) % info->queues_total + queue;
+			queue = (((u64) hash_v4(skb) * info->queues_total) >>
+				 32) + queue;
 #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
 		else if (par->family == NFPROTO_IPV6)
-			queue = hash_v6(skb) % info->queues_total + queue;
+			queue = (((u64) hash_v6(skb) * info->queues_total) >>
+				 32) + queue;
 #endif
 	}
 	return NF_QUEUE_NR(queue);
 }
 
-static int nfqueue_tg_v1_check(const struct xt_tgchk_param *par)
+static unsigned int
+nfqueue_tg_v2(struct sk_buff *skb, const struct xt_action_param *par)
 {
-	const struct xt_NFQ_info_v1 *info = par->targinfo;
+	const struct xt_NFQ_info_v2 *info = par->targinfo;
+	unsigned int ret = nfqueue_tg_v1(skb, par);
+
+	if (info->bypass)
+		ret |= NF_VERDICT_FLAG_QUEUE_BYPASS;
+	return ret;
+}
+
+static int nfqueue_tg_check(const struct xt_tgchk_param *par)
+{
+	const struct xt_NFQ_info_v2 *info = par->targinfo;
 	u32 maxid;
 
 	if (unlikely(!rnd_inited)) {
@@ -100,6 +113,8 @@ static int nfqueue_tg_v1_check(const struct xt_tgchk_param *par)
 		       info->queues_total, maxid);
 		return -ERANGE;
 	}
+	if (par->target->revision == 2 && info->bypass > 1)
+		return -EINVAL;
 	return 0;
 }
 
@@ -115,11 +130,20 @@ static struct xt_target nfqueue_tg_reg[] __read_mostly = {
 		.name		= "NFQUEUE",
 		.revision	= 1,
 		.family		= NFPROTO_UNSPEC,
-		.checkentry	= nfqueue_tg_v1_check,
+		.checkentry	= nfqueue_tg_check,
 		.target		= nfqueue_tg_v1,
 		.targetsize	= sizeof(struct xt_NFQ_info_v1),
 		.me		= THIS_MODULE,
 	},
+	{
+		.name		= "NFQUEUE",
+		.revision	= 2,
+		.family		= NFPROTO_UNSPEC,
+		.checkentry	= nfqueue_tg_check,
+		.target		= nfqueue_tg_v2,
+		.targetsize	= sizeof(struct xt_NFQ_info_v2),
+		.me		= THIS_MODULE,
+	},
 };
 
 static int __init nfqueue_tg_init(void)
diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c
index 5128a6c..624725b 100644
--- a/net/netfilter/xt_TEE.c
+++ b/net/netfilter/xt_TEE.c
@@ -73,7 +73,8 @@ tee_tg_route4(struct sk_buff *skb, const struct xt_tee_tginfo *info)
 	fl.fl4_dst = info->gw.ip;
 	fl.fl4_tos = RT_TOS(iph->tos);
 	fl.fl4_scope = RT_SCOPE_UNIVERSE;
-	if (ip_route_output_key(net, &rt, &fl) != 0)
+	rt = ip_route_output_key(net, &fl);
+	if (IS_ERR(rt))
 		return false;
 
 	skb_dst_drop(skb);
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index 5c5b6b9..e029c48 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -185,18 +185,24 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	int connections;
 
 	ct = nf_ct_get(skb, &ctinfo);
-	if (ct != NULL)
-		tuple_ptr = &ct->tuplehash[0].tuple;
-	else if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb),
-				    par->family, &tuple))
+	if (ct != NULL) {
+		if (info->flags & XT_CONNLIMIT_DADDR)
+			tuple_ptr = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
+		else
+			tuple_ptr = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
+	} else if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb),
+				    par->family, &tuple)) {
 		goto hotdrop;
+	}
 
 	if (par->family == NFPROTO_IPV6) {
 		const struct ipv6hdr *iph = ipv6_hdr(skb);
-		memcpy(&addr.ip6, &iph->saddr, sizeof(iph->saddr));
+		memcpy(&addr.ip6, (info->flags & XT_CONNLIMIT_DADDR) ?
+		       &iph->daddr : &iph->saddr, sizeof(addr.ip6));
 	} else {
 		const struct iphdr *iph = ip_hdr(skb);
-		addr.ip = iph->saddr;
+		addr.ip = (info->flags & XT_CONNLIMIT_DADDR) ?
+			  iph->daddr : iph->saddr;
 	}
 
 	spin_lock_bh(&info->data->lock);
@@ -204,13 +210,12 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	                         &info->mask, par->family);
 	spin_unlock_bh(&info->data->lock);
 
-	if (connections < 0) {
+	if (connections < 0)
 		/* kmalloc failed, drop it entirely */
-		par->hotdrop = true;
-		return false;
-	}
+		goto hotdrop;
 
-	return (connections > info->limit) ^ info->inverse;
+	return (connections > info->limit) ^
+	       !!(info->flags & XT_CONNLIMIT_INVERT);
 
  hotdrop:
 	par->hotdrop = true;
@@ -268,25 +273,38 @@ static void connlimit_mt_destroy(const struct xt_mtdtor_param *par)
 	kfree(info->data);
 }
 
-static struct xt_match connlimit_mt_reg __read_mostly = {
-	.name       = "connlimit",
-	.revision   = 0,
-	.family     = NFPROTO_UNSPEC,
-	.checkentry = connlimit_mt_check,
-	.match      = connlimit_mt,
-	.matchsize  = sizeof(struct xt_connlimit_info),
-	.destroy    = connlimit_mt_destroy,
-	.me         = THIS_MODULE,
+static struct xt_match connlimit_mt_reg[] __read_mostly = {
+	{
+		.name       = "connlimit",
+		.revision   = 0,
+		.family     = NFPROTO_UNSPEC,
+		.checkentry = connlimit_mt_check,
+		.match      = connlimit_mt,
+		.matchsize  = sizeof(struct xt_connlimit_info),
+		.destroy    = connlimit_mt_destroy,
+		.me         = THIS_MODULE,
+	},
+	{
+		.name       = "connlimit",
+		.revision   = 1,
+		.family     = NFPROTO_UNSPEC,
+		.checkentry = connlimit_mt_check,
+		.match      = connlimit_mt,
+		.matchsize  = sizeof(struct xt_connlimit_info),
+		.destroy    = connlimit_mt_destroy,
+		.me         = THIS_MODULE,
+	},
 };
 
 static int __init connlimit_mt_init(void)
 {
-	return xt_register_match(&connlimit_mt_reg);
+	return xt_register_matches(connlimit_mt_reg,
+	       ARRAY_SIZE(connlimit_mt_reg));
 }
 
 static void __exit connlimit_mt_exit(void)
 {
-	xt_unregister_match(&connlimit_mt_reg);
+	xt_unregister_matches(connlimit_mt_reg, ARRAY_SIZE(connlimit_mt_reg));
 }
 
 module_init(connlimit_mt_init);
diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c
index e536710..2c0086a 100644
--- a/net/netfilter/xt_conntrack.c
+++ b/net/netfilter/xt_conntrack.c
@@ -112,6 +112,54 @@ ct_proto_port_check(const struct xt_conntrack_mtinfo2 *info,
 	return true;
 }
 
+static inline bool
+port_match(u16 min, u16 max, u16 port, bool invert)
+{
+	return (port >= min && port <= max) ^ invert;
+}
+
+static inline bool
+ct_proto_port_check_v3(const struct xt_conntrack_mtinfo3 *info,
+		       const struct nf_conn *ct)
+{
+	const struct nf_conntrack_tuple *tuple;
+
+	tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
+	if ((info->match_flags & XT_CONNTRACK_PROTO) &&
+	    (nf_ct_protonum(ct) == info->l4proto) ^
+	    !(info->invert_flags & XT_CONNTRACK_PROTO))
+		return false;
+
+	/* Shortcut to match all recognized protocols by using ->src.all. */
+	if ((info->match_flags & XT_CONNTRACK_ORIGSRC_PORT) &&
+	    !port_match(info->origsrc_port, info->origsrc_port_high,
+			ntohs(tuple->src.u.all),
+			info->invert_flags & XT_CONNTRACK_ORIGSRC_PORT))
+		return false;
+
+	if ((info->match_flags & XT_CONNTRACK_ORIGDST_PORT) &&
+	    !port_match(info->origdst_port, info->origdst_port_high,
+			ntohs(tuple->dst.u.all),
+			info->invert_flags & XT_CONNTRACK_ORIGDST_PORT))
+		return false;
+
+	tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
+
+	if ((info->match_flags & XT_CONNTRACK_REPLSRC_PORT) &&
+	    !port_match(info->replsrc_port, info->replsrc_port_high,
+			ntohs(tuple->src.u.all),
+			info->invert_flags & XT_CONNTRACK_REPLSRC_PORT))
+		return false;
+
+	if ((info->match_flags & XT_CONNTRACK_REPLDST_PORT) &&
+	    !port_match(info->repldst_port, info->repldst_port_high,
+			ntohs(tuple->dst.u.all),
+			info->invert_flags & XT_CONNTRACK_REPLDST_PORT))
+		return false;
+
+	return true;
+}
+
 static bool
 conntrack_mt(const struct sk_buff *skb, struct xt_action_param *par,
              u16 state_mask, u16 status_mask)
@@ -170,8 +218,13 @@ conntrack_mt(const struct sk_buff *skb, struct xt_action_param *par,
 		    !(info->invert_flags & XT_CONNTRACK_REPLDST))
 			return false;
 
-	if (!ct_proto_port_check(info, ct))
-		return false;
+	if (par->match->revision != 3) {
+		if (!ct_proto_port_check(info, ct))
+			return false;
+	} else {
+		if (!ct_proto_port_check_v3(par->matchinfo, ct))
+			return false;
+	}
 
 	if ((info->match_flags & XT_CONNTRACK_STATUS) &&
 	    (!!(status_mask & ct->status) ^
@@ -207,10 +260,23 @@ conntrack_mt_v2(const struct sk_buff *skb, struct xt_action_param *par)
 	return conntrack_mt(skb, par, info->state_mask, info->status_mask);
 }
 
+static bool
+conntrack_mt_v3(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_conntrack_mtinfo3 *info = par->matchinfo;
+
+	return conntrack_mt(skb, par, info->state_mask, info->status_mask);
+}
+
 static int conntrack_mt_check(const struct xt_mtchk_param *par)
 {
 	int ret;
 
+	if (strcmp(par->table, "raw") == 0) {
+		pr_info("state is undetermined at the time of raw table\n");
+		return -EINVAL;
+	}
+
 	ret = nf_ct_l3proto_try_module_get(par->family);
 	if (ret < 0)
 		pr_info("cannot load conntrack support for proto=%u\n",
@@ -244,6 +310,16 @@ static struct xt_match conntrack_mt_reg[] __read_mostly = {
 		.destroy    = conntrack_mt_destroy,
 		.me         = THIS_MODULE,
 	},
+	{
+		.name       = "conntrack",
+		.revision   = 3,
+		.family     = NFPROTO_UNSPEC,
+		.matchsize  = sizeof(struct xt_conntrack_mtinfo3),
+		.match      = conntrack_mt_v3,
+		.checkentry = conntrack_mt_check,
+		.destroy    = conntrack_mt_destroy,
+		.me         = THIS_MODULE,
+	},
 };
 
 static int __init conntrack_mt_init(void)
diff --git a/net/netfilter/xt_cpu.c b/net/netfilter/xt_cpu.c
index b39db8a..c7a2e54 100644
--- a/net/netfilter/xt_cpu.c
+++ b/net/netfilter/xt_cpu.c
@@ -22,6 +22,8 @@
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Eric Dumazet <eric.dumazet@gmail.com>");
 MODULE_DESCRIPTION("Xtables: CPU match");
+MODULE_ALIAS("ipt_cpu");
+MODULE_ALIAS("ip6t_cpu");
 
 static int cpu_mt_check(const struct xt_mtchk_param *par)
 {
diff --git a/net/netfilter/xt_devgroup.c b/net/netfilter/xt_devgroup.c
new file mode 100644
index 0000000..d9202cd
--- /dev/null
+++ b/net/netfilter/xt_devgroup.c
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2011 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+
+#include <linux/netfilter/xt_devgroup.h>
+#include <linux/netfilter/x_tables.h>
+
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Xtables: Device group match");
+MODULE_ALIAS("ipt_devgroup");
+MODULE_ALIAS("ip6t_devgroup");
+
+static bool devgroup_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_devgroup_info *info = par->matchinfo;
+
+	if (info->flags & XT_DEVGROUP_MATCH_SRC &&
+	    (((info->src_group ^ par->in->group) & info->src_mask ? 1 : 0) ^
+	     ((info->flags & XT_DEVGROUP_INVERT_SRC) ? 1 : 0)))
+		return false;
+
+	if (info->flags & XT_DEVGROUP_MATCH_DST &&
+	    (((info->dst_group ^ par->out->group) & info->dst_mask ? 1 : 0) ^
+	     ((info->flags & XT_DEVGROUP_INVERT_DST) ? 1 : 0)))
+		return false;
+
+	return true;
+}
+
+static int devgroup_mt_checkentry(const struct xt_mtchk_param *par)
+{
+	const struct xt_devgroup_info *info = par->matchinfo;
+
+	if (info->flags & ~(XT_DEVGROUP_MATCH_SRC | XT_DEVGROUP_INVERT_SRC |
+			    XT_DEVGROUP_MATCH_DST | XT_DEVGROUP_INVERT_DST))
+		return -EINVAL;
+
+	if (info->flags & XT_DEVGROUP_MATCH_SRC &&
+	    par->hook_mask & ~((1 << NF_INET_PRE_ROUTING) |
+			       (1 << NF_INET_LOCAL_IN) |
+			       (1 << NF_INET_FORWARD)))
+		return -EINVAL;
+
+	if (info->flags & XT_DEVGROUP_MATCH_DST &&
+	    par->hook_mask & ~((1 << NF_INET_FORWARD) |
+			       (1 << NF_INET_LOCAL_OUT) |
+			       (1 << NF_INET_POST_ROUTING)))
+		return -EINVAL;
+
+	return 0;
+}
+
+static struct xt_match devgroup_mt_reg __read_mostly = {
+	.name		= "devgroup",
+	.match		= devgroup_mt,
+	.checkentry	= devgroup_mt_checkentry,
+	.matchsize	= sizeof(struct xt_devgroup_info),
+	.family		= NFPROTO_UNSPEC,
+	.me		= THIS_MODULE
+};
+
+static int __init devgroup_mt_init(void)
+{
+	return xt_register_match(&devgroup_mt_reg);
+}
+
+static void __exit devgroup_mt_exit(void)
+{
+	xt_unregister_match(&devgroup_mt_reg);
+}
+
+module_init(devgroup_mt_init);
+module_exit(devgroup_mt_exit);
diff --git a/net/netfilter/xt_iprange.c b/net/netfilter/xt_iprange.c
index 73c33a4..b46626c 100644
--- a/net/netfilter/xt_iprange.c
+++ b/net/netfilter/xt_iprange.c
@@ -31,7 +31,7 @@ iprange_mt4(const struct sk_buff *skb, struct xt_action_param *par)
 			pr_debug("src IP %pI4 NOT in range %s%pI4-%pI4\n",
 			         &iph->saddr,
 			         (info->flags & IPRANGE_SRC_INV) ? "(INV) " : "",
-			         &info->src_max.ip,
+			         &info->src_min.ip,
 			         &info->src_max.ip);
 			return false;
 		}
@@ -76,15 +76,27 @@ iprange_mt6(const struct sk_buff *skb, struct xt_action_param *par)
 		m  = iprange_ipv6_lt(&iph->saddr, &info->src_min.in6);
 		m |= iprange_ipv6_lt(&info->src_max.in6, &iph->saddr);
 		m ^= !!(info->flags & IPRANGE_SRC_INV);
-		if (m)
+		if (m) {
+			pr_debug("src IP %pI6 NOT in range %s%pI6-%pI6\n",
+				 &iph->saddr,
+				 (info->flags & IPRANGE_SRC_INV) ? "(INV) " : "",
+				 &info->src_min.in6,
+				 &info->src_max.in6);
 			return false;
+		}
 	}
 	if (info->flags & IPRANGE_DST) {
 		m  = iprange_ipv6_lt(&iph->daddr, &info->dst_min.in6);
 		m |= iprange_ipv6_lt(&info->dst_max.in6, &iph->daddr);
 		m ^= !!(info->flags & IPRANGE_DST_INV);
-		if (m)
+		if (m) {
+			pr_debug("dst IP %pI6 NOT in range %s%pI6-%pI6\n",
+				 &iph->daddr,
+				 (info->flags & IPRANGE_DST_INV) ? "(INV) " : "",
+				 &info->dst_min.in6,
+				 &info->dst_max.in6);
 			return false;
+		}
 	}
 	return true;
 }
diff --git a/net/netfilter/xt_ipvs.c b/net/netfilter/xt_ipvs.c
index 9127a3d..bb10b07 100644
--- a/net/netfilter/xt_ipvs.c
+++ b/net/netfilter/xt_ipvs.c
@@ -85,7 +85,7 @@ ipvs_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	/*
 	 * Check if the packet belongs to an existing entry
 	 */
-	cp = pp->conn_out_get(family, skb, pp, &iph, iph.len, 1 /* inverse */);
+	cp = pp->conn_out_get(family, skb, &iph, iph.len, 1 /* inverse */);
 	if (unlikely(cp == NULL)) {
 		match = false;
 		goto out;
diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c
new file mode 100644
index 0000000..061d48c
--- /dev/null
+++ b/net/netfilter/xt_set.c
@@ -0,0 +1,359 @@
+/* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu>
+ *                         Patrick Schaaf <bof@bof.de>
+ *                         Martin Josefsson <gandalf@wlug.westbo.se>
+ * Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/* Kernel module which implements the set match and SET target
+ * for netfilter/iptables. */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/version.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_set.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
+MODULE_DESCRIPTION("Xtables: IP set match and target module");
+MODULE_ALIAS("xt_SET");
+MODULE_ALIAS("ipt_set");
+MODULE_ALIAS("ip6t_set");
+MODULE_ALIAS("ipt_SET");
+MODULE_ALIAS("ip6t_SET");
+
+static inline int
+match_set(ip_set_id_t index, const struct sk_buff *skb,
+	  u8 pf, u8 dim, u8 flags, int inv)
+{
+	if (ip_set_test(index, skb, pf, dim, flags))
+		inv = !inv;
+	return inv;
+}
+
+/* Revision 0 interface: backward compatible with netfilter/iptables */
+
+static bool
+set_match_v0(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_set_info_match_v0 *info = par->matchinfo;
+
+	return match_set(info->match_set.index, skb, par->family,
+			 info->match_set.u.compat.dim,
+			 info->match_set.u.compat.flags,
+			 info->match_set.u.compat.flags & IPSET_INV_MATCH);
+}
+
+static void
+compat_flags(struct xt_set_info_v0 *info)
+{
+	u_int8_t i;
+
+	/* Fill out compatibility data according to enum ip_set_kopt */
+	info->u.compat.dim = IPSET_DIM_ZERO;
+	if (info->u.flags[0] & IPSET_MATCH_INV)
+		info->u.compat.flags |= IPSET_INV_MATCH;
+	for (i = 0; i < IPSET_DIM_MAX-1 && info->u.flags[i]; i++) {
+		info->u.compat.dim++;
+		if (info->u.flags[i] & IPSET_SRC)
+			info->u.compat.flags |= (1<<info->u.compat.dim);
+	}
+}
+
+static int
+set_match_v0_checkentry(const struct xt_mtchk_param *par)
+{
+	struct xt_set_info_match_v0 *info = par->matchinfo;
+	ip_set_id_t index;
+
+	index = ip_set_nfnl_get_byindex(info->match_set.index);
+
+	if (index == IPSET_INVALID_ID) {
+		pr_warning("Cannot find set indentified by id %u to match\n",
+			   info->match_set.index);
+		return -ENOENT;
+	}
+	if (info->match_set.u.flags[IPSET_DIM_MAX-1] != 0) {
+		pr_warning("Protocol error: set match dimension "
+			   "is over the limit!\n");
+		return -ERANGE;
+	}
+
+	/* Fill out compatibility data */
+	compat_flags(&info->match_set);
+
+	return 0;
+}
+
+static void
+set_match_v0_destroy(const struct xt_mtdtor_param *par)
+{
+	struct xt_set_info_match_v0 *info = par->matchinfo;
+
+	ip_set_nfnl_put(info->match_set.index);
+}
+
+static unsigned int
+set_target_v0(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	const struct xt_set_info_target_v0 *info = par->targinfo;
+
+	if (info->add_set.index != IPSET_INVALID_ID)
+		ip_set_add(info->add_set.index, skb, par->family,
+			   info->add_set.u.compat.dim,
+			   info->add_set.u.compat.flags);
+	if (info->del_set.index != IPSET_INVALID_ID)
+		ip_set_del(info->del_set.index, skb, par->family,
+			   info->del_set.u.compat.dim,
+			   info->del_set.u.compat.flags);
+
+	return XT_CONTINUE;
+}
+
+static int
+set_target_v0_checkentry(const struct xt_tgchk_param *par)
+{
+	struct xt_set_info_target_v0 *info = par->targinfo;
+	ip_set_id_t index;
+
+	if (info->add_set.index != IPSET_INVALID_ID) {
+		index = ip_set_nfnl_get_byindex(info->add_set.index);
+		if (index == IPSET_INVALID_ID) {
+			pr_warning("Cannot find add_set index %u as target\n",
+				   info->add_set.index);
+			return -ENOENT;
+		}
+	}
+
+	if (info->del_set.index != IPSET_INVALID_ID) {
+		index = ip_set_nfnl_get_byindex(info->del_set.index);
+		if (index == IPSET_INVALID_ID) {
+			pr_warning("Cannot find del_set index %u as target\n",
+				   info->del_set.index);
+			return -ENOENT;
+		}
+	}
+	if (info->add_set.u.flags[IPSET_DIM_MAX-1] != 0 ||
+	    info->del_set.u.flags[IPSET_DIM_MAX-1] != 0) {
+		pr_warning("Protocol error: SET target dimension "
+			   "is over the limit!\n");
+		return -ERANGE;
+	}
+
+	/* Fill out compatibility data */
+	compat_flags(&info->add_set);
+	compat_flags(&info->del_set);
+
+	return 0;
+}
+
+static void
+set_target_v0_destroy(const struct xt_tgdtor_param *par)
+{
+	const struct xt_set_info_target_v0 *info = par->targinfo;
+
+	if (info->add_set.index != IPSET_INVALID_ID)
+		ip_set_nfnl_put(info->add_set.index);
+	if (info->del_set.index != IPSET_INVALID_ID)
+		ip_set_nfnl_put(info->del_set.index);
+}
+
+/* Revision 1: current interface to netfilter/iptables */
+
+static bool
+set_match(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_set_info_match *info = par->matchinfo;
+
+	return match_set(info->match_set.index, skb, par->family,
+			 info->match_set.dim,
+			 info->match_set.flags,
+			 info->match_set.flags & IPSET_INV_MATCH);
+}
+
+static int
+set_match_checkentry(const struct xt_mtchk_param *par)
+{
+	struct xt_set_info_match *info = par->matchinfo;
+	ip_set_id_t index;
+
+	index = ip_set_nfnl_get_byindex(info->match_set.index);
+
+	if (index == IPSET_INVALID_ID) {
+		pr_warning("Cannot find set indentified by id %u to match\n",
+			   info->match_set.index);
+		return -ENOENT;
+	}
+	if (info->match_set.dim > IPSET_DIM_MAX) {
+		pr_warning("Protocol error: set match dimension "
+			   "is over the limit!\n");
+		return -ERANGE;
+	}
+
+	return 0;
+}
+
+static void
+set_match_destroy(const struct xt_mtdtor_param *par)
+{
+	struct xt_set_info_match *info = par->matchinfo;
+
+	ip_set_nfnl_put(info->match_set.index);
+}
+
+static unsigned int
+set_target(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	const struct xt_set_info_target *info = par->targinfo;
+
+	if (info->add_set.index != IPSET_INVALID_ID)
+		ip_set_add(info->add_set.index,
+			   skb, par->family,
+			   info->add_set.dim,
+			   info->add_set.flags);
+	if (info->del_set.index != IPSET_INVALID_ID)
+		ip_set_del(info->del_set.index,
+			   skb, par->family,
+			   info->add_set.dim,
+			   info->del_set.flags);
+
+	return XT_CONTINUE;
+}
+
+static int
+set_target_checkentry(const struct xt_tgchk_param *par)
+{
+	const struct xt_set_info_target *info = par->targinfo;
+	ip_set_id_t index;
+
+	if (info->add_set.index != IPSET_INVALID_ID) {
+		index = ip_set_nfnl_get_byindex(info->add_set.index);
+		if (index == IPSET_INVALID_ID) {
+			pr_warning("Cannot find add_set index %u as target\n",
+				   info->add_set.index);
+			return -ENOENT;
+		}
+	}
+
+	if (info->del_set.index != IPSET_INVALID_ID) {
+		index = ip_set_nfnl_get_byindex(info->del_set.index);
+		if (index == IPSET_INVALID_ID) {
+			pr_warning("Cannot find del_set index %u as target\n",
+				   info->del_set.index);
+			return -ENOENT;
+		}
+	}
+	if (info->add_set.dim > IPSET_DIM_MAX ||
+	    info->del_set.flags > IPSET_DIM_MAX) {
+		pr_warning("Protocol error: SET target dimension "
+			   "is over the limit!\n");
+		return -ERANGE;
+	}
+
+	return 0;
+}
+
+static void
+set_target_destroy(const struct xt_tgdtor_param *par)
+{
+	const struct xt_set_info_target *info = par->targinfo;
+
+	if (info->add_set.index != IPSET_INVALID_ID)
+		ip_set_nfnl_put(info->add_set.index);
+	if (info->del_set.index != IPSET_INVALID_ID)
+		ip_set_nfnl_put(info->del_set.index);
+}
+
+static struct xt_match set_matches[] __read_mostly = {
+	{
+		.name		= "set",
+		.family		= NFPROTO_IPV4,
+		.revision	= 0,
+		.match		= set_match_v0,
+		.matchsize	= sizeof(struct xt_set_info_match_v0),
+		.checkentry	= set_match_v0_checkentry,
+		.destroy	= set_match_v0_destroy,
+		.me		= THIS_MODULE
+	},
+	{
+		.name		= "set",
+		.family		= NFPROTO_IPV4,
+		.revision	= 1,
+		.match		= set_match,
+		.matchsize	= sizeof(struct xt_set_info_match),
+		.checkentry	= set_match_checkentry,
+		.destroy	= set_match_destroy,
+		.me		= THIS_MODULE
+	},
+	{
+		.name		= "set",
+		.family		= NFPROTO_IPV6,
+		.revision	= 1,
+		.match		= set_match,
+		.matchsize	= sizeof(struct xt_set_info_match),
+		.checkentry	= set_match_checkentry,
+		.destroy	= set_match_destroy,
+		.me		= THIS_MODULE
+	},
+};
+
+static struct xt_target set_targets[] __read_mostly = {
+	{
+		.name		= "SET",
+		.revision	= 0,
+		.family		= NFPROTO_IPV4,
+		.target		= set_target_v0,
+		.targetsize	= sizeof(struct xt_set_info_target_v0),
+		.checkentry	= set_target_v0_checkentry,
+		.destroy	= set_target_v0_destroy,
+		.me		= THIS_MODULE
+	},
+	{
+		.name		= "SET",
+		.revision	= 1,
+		.family		= NFPROTO_IPV4,
+		.target		= set_target,
+		.targetsize	= sizeof(struct xt_set_info_target),
+		.checkentry	= set_target_checkentry,
+		.destroy	= set_target_destroy,
+		.me		= THIS_MODULE
+	},
+	{
+		.name		= "SET",
+		.revision	= 1,
+		.family		= NFPROTO_IPV6,
+		.target		= set_target,
+		.targetsize	= sizeof(struct xt_set_info_target),
+		.checkentry	= set_target_checkentry,
+		.destroy	= set_target_destroy,
+		.me		= THIS_MODULE
+	},
+};
+
+static int __init xt_set_init(void)
+{
+	int ret = xt_register_matches(set_matches, ARRAY_SIZE(set_matches));
+
+	if (!ret) {
+		ret = xt_register_targets(set_targets,
+					  ARRAY_SIZE(set_targets));
+		if (ret)
+			xt_unregister_matches(set_matches,
+					      ARRAY_SIZE(set_matches));
+	}
+	return ret;
+}
+
+static void __exit xt_set_fini(void)
+{
+	xt_unregister_matches(set_matches, ARRAY_SIZE(set_matches));
+	xt_unregister_targets(set_targets, ARRAY_SIZE(set_targets));
+}
+
+module_init(xt_set_init);
+module_exit(xt_set_fini);
diff --git a/net/netlabel/netlabel_user.h b/net/netlabel/netlabel_user.h
index 6caef8b..f4fc4c9 100644
--- a/net/netlabel/netlabel_user.h
+++ b/net/netlabel/netlabel_user.h
@@ -49,9 +49,9 @@
 static inline void netlbl_netlink_auditinfo(struct sk_buff *skb,
 					    struct netlbl_audit *audit_info)
 {
-	audit_info->secid = NETLINK_CB(skb).sid;
-	audit_info->loginuid = NETLINK_CB(skb).loginuid;
-	audit_info->sessionid = NETLINK_CB(skb).sessionid;
+	security_task_getsecid(current, &audit_info->secid);
+	audit_info->loginuid = audit_get_loginuid(current);
+	audit_info->sessionid = audit_get_sessionid(current);
 }
 
 /* NetLabel NETLINK I/O functions */
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 1f92459..c8f35b5 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1362,17 +1362,8 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
 
 	NETLINK_CB(skb).pid	= nlk->pid;
 	NETLINK_CB(skb).dst_group = dst_group;
-	NETLINK_CB(skb).loginuid = audit_get_loginuid(current);
-	NETLINK_CB(skb).sessionid = audit_get_sessionid(current);
-	security_task_getsecid(current, &(NETLINK_CB(skb).sid));
 	memcpy(NETLINK_CREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
 
-	/* What can I do? Netlink is asynchronous, so that
-	   we will have to save current capabilities to
-	   check them, when this message will be delivered
-	   to corresponding kernel module.   --ANK (980802)
-	 */
-
 	err = -EFAULT;
 	if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) {
 		kfree_skb(skb);
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 91cb1d7..b5362e9 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -164,7 +164,6 @@ struct packet_mreq_max {
 static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
 		int closing, int tx_ring);
 
-#define PGV_FROM_VMALLOC 1
 struct pgv {
 	char *buffer;
 };
@@ -466,7 +465,7 @@ retry:
 	 */
 
 	err = -EMSGSIZE;
-	if (len > dev->mtu + dev->hard_header_len)
+	if (len > dev->mtu + dev->hard_header_len + VLAN_HLEN)
 		goto out_unlock;
 
 	if (!skb) {
@@ -497,6 +496,19 @@ retry:
 		goto retry;
 	}
 
+	if (len > (dev->mtu + dev->hard_header_len)) {
+		/* Earlier code assumed this would be a VLAN pkt,
+		 * double-check this now that we have the actual
+		 * packet in hand.
+		 */
+		struct ethhdr *ehdr;
+		skb_reset_mac_header(skb);
+		ehdr = eth_hdr(skb);
+		if (ehdr->h_proto != htons(ETH_P_8021Q)) {
+			err = -EMSGSIZE;
+			goto out_unlock;
+		}
+	}
 
 	skb->protocol = proto;
 	skb->dev = dev;
@@ -523,11 +535,11 @@ static inline unsigned int run_filter(const struct sk_buff *skb,
 {
 	struct sk_filter *filter;
 
-	rcu_read_lock_bh();
-	filter = rcu_dereference_bh(sk->sk_filter);
+	rcu_read_lock();
+	filter = rcu_dereference(sk->sk_filter);
 	if (filter != NULL)
 		res = sk_run_filter(skb, filter->insns);
-	rcu_read_unlock_bh();
+	rcu_read_unlock();
 
 	return res;
 }
@@ -954,7 +966,6 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
 
 static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
 {
-	struct socket *sock;
 	struct sk_buff *skb;
 	struct net_device *dev;
 	__be16 proto;
@@ -966,8 +977,6 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
 	int len_sum = 0;
 	int status = 0;
 
-	sock = po->sk.sk_socket;
-
 	mutex_lock(&po->pg_vec_lock);
 
 	err = -EBUSY;
@@ -1200,7 +1209,7 @@ static int packet_snd(struct socket *sock,
 	}
 
 	err = -EMSGSIZE;
-	if (!gso_type && (len > dev->mtu+reserve))
+	if (!gso_type && (len > dev->mtu + reserve + VLAN_HLEN))
 		goto out_unlock;
 
 	err = -ENOBUFS;
@@ -1225,6 +1234,20 @@ static int packet_snd(struct socket *sock,
 	if (err < 0)
 		goto out_free;
 
+	if (!gso_type && (len > dev->mtu + reserve)) {
+		/* Earlier code assumed this would be a VLAN pkt,
+		 * double-check this now that we have the actual
+		 * packet in hand.
+		 */
+		struct ethhdr *ehdr;
+		skb_reset_mac_header(skb);
+		ehdr = eth_hdr(skb);
+		if (ehdr->h_proto != htons(ETH_P_8021Q)) {
+			err = -EMSGSIZE;
+			goto out_free;
+		}
+	}
+
 	skb->protocol = proto;
 	skb->dev = dev;
 	skb->priority = sk->sk_priority;
diff --git a/net/phonet/Kconfig b/net/phonet/Kconfig
index 0d9b8a2..6ec7d55 100644
--- a/net/phonet/Kconfig
+++ b/net/phonet/Kconfig
@@ -14,15 +14,3 @@ config PHONET
 
 	  To compile this driver as a module, choose M here: the module
 	  will be called phonet. If unsure, say N.
-
-config PHONET_PIPECTRLR
-	bool "Phonet Pipe Controller (EXPERIMENTAL)"
-	depends on PHONET && EXPERIMENTAL
-	default N
-	help
-	  The Pipe Controller implementation in Phonet stack to support Pipe
-	  data with Nokia Slim modems like WG2.5 used on ST-Ericsson U8500
-	  platform.
-
-	  This option is incompatible with older Nokia modems.
-	  Say N here unless you really know what you are doing.
diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c
index 1072b2c..c6fffd9 100644
--- a/net/phonet/af_phonet.c
+++ b/net/phonet/af_phonet.c
@@ -110,6 +110,7 @@ static int pn_socket_create(struct net *net, struct socket *sock, int protocol,
 	sk->sk_protocol = protocol;
 	pn = pn_sk(sk);
 	pn->sobject = 0;
+	pn->dobject = 0;
 	pn->resource = 0;
 	sk->sk_prot->init(sk);
 	err = 0;
@@ -194,11 +195,7 @@ static int pn_send(struct sk_buff *skb, struct net_device *dev,
 	if (skb->pkt_type == PACKET_LOOPBACK) {
 		skb_reset_mac_header(skb);
 		skb_orphan(skb);
-		if (irq)
-			netif_rx(skb);
-		else
-			netif_rx_ni(skb);
-		err = 0;
+		err = (irq ? netif_rx(skb) : netif_rx_ni(skb)) ? -ENOBUFS : 0;
 	} else {
 		err = dev_hard_header(skb, dev, ntohs(skb->protocol),
 					NULL, NULL, skb->len);
@@ -207,6 +204,8 @@ static int pn_send(struct sk_buff *skb, struct net_device *dev,
 			goto drop;
 		}
 		err = dev_queue_xmit(skb);
+		if (unlikely(err > 0))
+			err = net_xmit_errno(err);
 	}
 
 	return err;
@@ -242,8 +241,18 @@ int pn_skb_send(struct sock *sk, struct sk_buff *skb,
 	struct net_device *dev;
 	struct pn_sock *pn = pn_sk(sk);
 	int err;
-	u16 src;
-	u8 daddr = pn_sockaddr_get_addr(target), saddr = PN_NO_ADDR;
+	u16 src, dst;
+	u8 daddr, saddr, res;
+
+	src = pn->sobject;
+	if (target != NULL) {
+		dst = pn_sockaddr_get_object(target);
+		res = pn_sockaddr_get_resource(target);
+	} else {
+		dst = pn->dobject;
+		res = pn->resource;
+	}
+	daddr = pn_addr(dst);
 
 	err = -EHOSTUNREACH;
 	if (sk->sk_bound_dev_if)
@@ -251,10 +260,9 @@ int pn_skb_send(struct sock *sk, struct sk_buff *skb,
 	else if (phonet_address_lookup(net, daddr) == 0) {
 		dev = phonet_device_get(net);
 		skb->pkt_type = PACKET_LOOPBACK;
-	} else if (pn_sockaddr_get_object(target) == 0) {
+	} else if (dst == 0) {
 		/* Resource routing (small race until phonet_rcv()) */
-		struct sock *sk = pn_find_sock_by_res(net,
-							target->spn_resource);
+		struct sock *sk = pn_find_sock_by_res(net, res);
 		if (sk)	{
 			sock_put(sk);
 			dev = phonet_device_get(net);
@@ -271,12 +279,10 @@ int pn_skb_send(struct sock *sk, struct sk_buff *skb,
 	if (saddr == PN_NO_ADDR)
 		goto drop;
 
-	src = pn->sobject;
 	if (!pn_addr(src))
 		src = pn_object(saddr, pn_obj(src));
 
-	err = pn_send(skb, dev, pn_sockaddr_get_object(target),
-			src, pn_sockaddr_get_resource(target), 0);
+	err = pn_send(skb, dev, dst, src, res, 0);
 	dev_put(dev);
 	return err;
 
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index 3e60f2e..68e635f 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -42,7 +42,7 @@
  * TCP_ESTABLISHED	connected pipe in enabled state
  *
  * pep_sock locking:
- *  - sk_state, ackq, hlist: sock lock needed
+ *  - sk_state, hlist: sock lock needed
  *  - listener: read only
  *  - pipe_handle: read only
  */
@@ -50,11 +50,6 @@
 #define CREDITS_MAX	10
 #define CREDITS_THR	7
 
-static const struct sockaddr_pn pipe_srv = {
-	.spn_family = AF_PHONET,
-	.spn_resource = 0xD9, /* pipe service */
-};
-
 #define pep_sb_size(s) (((s) + 5) & ~3) /* 2-bytes head, 32-bits aligned */
 
 /* Get the next TLV sub-block. */
@@ -82,236 +77,95 @@ static unsigned char *pep_get_sb(struct sk_buff *skb, u8 *ptype, u8 *plen,
 	return data;
 }
 
-static int pep_reply(struct sock *sk, struct sk_buff *oskb,
-			u8 code, const void *data, int len, gfp_t priority)
+static struct sk_buff *pep_alloc_skb(struct sock *sk, const void *payload,
+					int len, gfp_t priority)
 {
-	const struct pnpipehdr *oph = pnp_hdr(oskb);
-	struct pnpipehdr *ph;
-	struct sk_buff *skb;
-
-	skb = alloc_skb(MAX_PNPIPE_HEADER + len, priority);
+	struct sk_buff *skb = alloc_skb(MAX_PNPIPE_HEADER + len, priority);
 	if (!skb)
-		return -ENOMEM;
+		return NULL;
 	skb_set_owner_w(skb, sk);
 
 	skb_reserve(skb, MAX_PNPIPE_HEADER);
 	__skb_put(skb, len);
-	skb_copy_to_linear_data(skb, data, len);
-	__skb_push(skb, sizeof(*ph));
+	skb_copy_to_linear_data(skb, payload, len);
+	__skb_push(skb, sizeof(struct pnpipehdr));
 	skb_reset_transport_header(skb);
-	ph = pnp_hdr(skb);
-	ph->utid = oph->utid;
-	ph->message_id = oph->message_id + 1; /* REQ -> RESP */
-	ph->pipe_handle = oph->pipe_handle;
-	ph->error_code = code;
-
-	return pn_skb_send(sk, skb, &pipe_srv);
-}
-
-#define PAD 0x00
-
-#ifdef CONFIG_PHONET_PIPECTRLR
-static u8 pipe_negotiate_fc(u8 *host_fc, u8 *remote_fc, int len)
-{
-	int i, j;
-	u8 base_fc, final_fc;
-
-	for (i = 0; i < len; i++) {
-		base_fc = host_fc[i];
-		for (j = 0; j < len; j++) {
-			if (remote_fc[j] == base_fc) {
-				final_fc = base_fc;
-				goto done;
-			}
-		}
-	}
-	return -EINVAL;
-
-done:
-	return final_fc;
-
-}
-
-static int pipe_get_flow_info(struct sock *sk, struct sk_buff *skb,
-		u8 *pref_rx_fc, u8 *req_tx_fc)
-{
-	struct pnpipehdr *hdr;
-	u8 n_sb;
-
-	if (!pskb_may_pull(skb, sizeof(*hdr) + 4))
-		return -EINVAL;
-
-	hdr = pnp_hdr(skb);
-	n_sb = hdr->data[4];
-
-	__skb_pull(skb, sizeof(*hdr) + 4);
-	while (n_sb > 0) {
-		u8 type, buf[3], len = sizeof(buf);
-		u8 *data = pep_get_sb(skb, &type, &len, buf);
-
-		if (data == NULL)
-			return -EINVAL;
-
-		switch (type) {
-		case PN_PIPE_SB_REQUIRED_FC_TX:
-			if (len < 3 || (data[2] | data[3] | data[4]) > 3)
-				break;
-			req_tx_fc[0] = data[2];
-			req_tx_fc[1] = data[3];
-			req_tx_fc[2] = data[4];
-			break;
-
-		case PN_PIPE_SB_PREFERRED_FC_RX:
-			if (len < 3 || (data[2] | data[3] | data[4]) > 3)
-				break;
-			pref_rx_fc[0] = data[2];
-			pref_rx_fc[1] = data[3];
-			pref_rx_fc[2] = data[4];
-			break;
-
-		}
-		n_sb--;
-	}
-	return 0;
+	return skb;
 }
 
-static int pipe_handler_send_req(struct sock *sk, u8 utid,
-		u8 msg_id, gfp_t priority)
+static int pep_reply(struct sock *sk, struct sk_buff *oskb, u8 code,
+			const void *data, int len, gfp_t priority)
 {
-	int len;
+	const struct pnpipehdr *oph = pnp_hdr(oskb);
 	struct pnpipehdr *ph;
 	struct sk_buff *skb;
-	struct pep_sock *pn = pep_sk(sk);
-
-	static const u8 data[4] = {
-		PAD, PAD, PAD, PAD,
-	};
+	struct sockaddr_pn peer;
 
-	switch (msg_id) {
-	case PNS_PEP_CONNECT_REQ:
-		len = sizeof(data);
-		break;
-
-	case PNS_PEP_DISCONNECT_REQ:
-	case PNS_PEP_ENABLE_REQ:
-	case PNS_PEP_DISABLE_REQ:
-		len = 0;
-		break;
-
-	default:
-		return -EINVAL;
-	}
-
-	skb = alloc_skb(MAX_PNPIPE_HEADER + len, priority);
+	skb = pep_alloc_skb(sk, data, len, priority);
 	if (!skb)
 		return -ENOMEM;
-	skb_set_owner_w(skb, sk);
 
-	skb_reserve(skb, MAX_PNPIPE_HEADER);
-	if (len) {
-		__skb_put(skb, len);
-		skb_copy_to_linear_data(skb, data, len);
-	}
-	__skb_push(skb, sizeof(*ph));
-	skb_reset_transport_header(skb);
 	ph = pnp_hdr(skb);
-	ph->utid = utid;
-	ph->message_id = msg_id;
-	ph->pipe_handle = pn->pipe_handle;
-	ph->error_code = PN_PIPE_NO_ERROR;
+	ph->utid = oph->utid;
+	ph->message_id = oph->message_id + 1; /* REQ -> RESP */
+	ph->pipe_handle = oph->pipe_handle;
+	ph->error_code = code;
 
-	return pn_skb_send(sk, skb, &pn->remote_pep);
+	pn_skb_get_src_sockaddr(oskb, &peer);
+	return pn_skb_send(sk, skb, &peer);
 }
 
-static int pipe_handler_send_created_ind(struct sock *sk,
-		u8 utid, u8 msg_id)
+static int pep_indicate(struct sock *sk, u8 id, u8 code,
+			const void *data, int len, gfp_t priority)
 {
-	int err_code;
+	struct pep_sock *pn = pep_sk(sk);
 	struct pnpipehdr *ph;
 	struct sk_buff *skb;
 
-	struct pep_sock *pn = pep_sk(sk);
-	static u8 data[4] = {
-		0x03, 0x04,
-	};
-	data[2] = pn->tx_fc;
-	data[3] = pn->rx_fc;
-
-	/*
-	 * actually, below is number of sub-blocks and not error code.
-	 * Pipe_created_ind message format does not have any
-	 * error code field. However, the Phonet stack will always send
-	 * an error code as part of pnpipehdr. So, use that err_code to
-	 * specify the number of sub-blocks.
-	 */
-	err_code = 0x01;
-
-	skb = alloc_skb(MAX_PNPIPE_HEADER + sizeof(data), GFP_ATOMIC);
+	skb = pep_alloc_skb(sk, data, len, priority);
 	if (!skb)
 		return -ENOMEM;
-	skb_set_owner_w(skb, sk);
 
-	skb_reserve(skb, MAX_PNPIPE_HEADER);
-	__skb_put(skb, sizeof(data));
-	skb_copy_to_linear_data(skb, data, sizeof(data));
-	__skb_push(skb, sizeof(*ph));
-	skb_reset_transport_header(skb);
 	ph = pnp_hdr(skb);
-	ph->utid = utid;
-	ph->message_id = msg_id;
+	ph->utid = 0;
+	ph->message_id = id;
 	ph->pipe_handle = pn->pipe_handle;
-	ph->error_code = err_code;
-
-	return pn_skb_send(sk, skb, &pn->remote_pep);
+	ph->data[0] = code;
+	return pn_skb_send(sk, skb, NULL);
 }
 
-static int pipe_handler_send_ind(struct sock *sk, u8 utid, u8 msg_id)
+#define PAD 0x00
+
+static int pipe_handler_request(struct sock *sk, u8 id, u8 code,
+				const void *data, int len)
 {
-	int err_code;
+	struct pep_sock *pn = pep_sk(sk);
 	struct pnpipehdr *ph;
 	struct sk_buff *skb;
-	struct pep_sock *pn = pep_sk(sk);
-
-	/*
-	 * actually, below is a filler.
-	 * Pipe_enabled/disabled_ind message format does not have any
-	 * error code field. However, the Phonet stack will always send
-	 * an error code as part of pnpipehdr. So, use that err_code to
-	 * specify the filler value.
-	 */
-	err_code = 0x0;
 
-	skb = alloc_skb(MAX_PNPIPE_HEADER, GFP_ATOMIC);
+	skb = pep_alloc_skb(sk, data, len, GFP_KERNEL);
 	if (!skb)
 		return -ENOMEM;
-	skb_set_owner_w(skb, sk);
 
-	skb_reserve(skb, MAX_PNPIPE_HEADER);
-	__skb_push(skb, sizeof(*ph));
-	skb_reset_transport_header(skb);
 	ph = pnp_hdr(skb);
-	ph->utid = utid;
-	ph->message_id = msg_id;
+	ph->utid = id; /* whatever */
+	ph->message_id = id;
 	ph->pipe_handle = pn->pipe_handle;
-	ph->error_code = err_code;
-
-	return pn_skb_send(sk, skb, &pn->remote_pep);
+	ph->data[0] = code;
+	return pn_skb_send(sk, skb, NULL);
 }
 
-static int pipe_handler_enable_pipe(struct sock *sk, int enable)
+static int pipe_handler_send_created_ind(struct sock *sk)
 {
-	int utid, req;
-
-	if (enable) {
-		utid = PNS_PIPE_ENABLE_UTID;
-		req = PNS_PEP_ENABLE_REQ;
-	} else {
-		utid = PNS_PIPE_DISABLE_UTID;
-		req = PNS_PEP_DISABLE_REQ;
-	}
-	return pipe_handler_send_req(sk, utid, req, GFP_ATOMIC);
+	struct pep_sock *pn = pep_sk(sk);
+	u8 data[4] = {
+		PN_PIPE_SB_NEGOTIATED_FC, pep_sb_size(2),
+		pn->tx_fc, pn->rx_fc,
+	};
+
+	return pep_indicate(sk, PNS_PIPE_CREATED_IND, 1 /* sub-blocks */,
+				data, 4, GFP_ATOMIC);
 }
-#endif
 
 static int pep_accept_conn(struct sock *sk, struct sk_buff *skb)
 {
@@ -334,11 +188,12 @@ static int pep_accept_conn(struct sock *sk, struct sk_buff *skb)
 				GFP_KERNEL);
 }
 
-static int pep_reject_conn(struct sock *sk, struct sk_buff *skb, u8 code)
+static int pep_reject_conn(struct sock *sk, struct sk_buff *skb, u8 code,
+				gfp_t priority)
 {
 	static const u8 data[4] = { PAD, PAD, PAD, 0 /* sub-blocks */ };
 	WARN_ON(code == PN_PIPE_NO_ERROR);
-	return pep_reply(sk, skb, code, data, sizeof(data), GFP_ATOMIC);
+	return pep_reply(sk, skb, code, data, sizeof(data), priority);
 }
 
 /* Control requests are not sent by the pipe service and have a specific
@@ -350,23 +205,21 @@ static int pep_ctrlreq_error(struct sock *sk, struct sk_buff *oskb, u8 code,
 	struct sk_buff *skb;
 	struct pnpipehdr *ph;
 	struct sockaddr_pn dst;
+	u8 data[4] = {
+		oph->data[0], /* PEP type */
+		code, /* error code, at an unusual offset */
+		PAD, PAD,
+	};
 
-	skb = alloc_skb(MAX_PNPIPE_HEADER + 4, priority);
+	skb = pep_alloc_skb(sk, data, 4, priority);
 	if (!skb)
 		return -ENOMEM;
-	skb_set_owner_w(skb, sk);
-
-	skb_reserve(skb, MAX_PHONET_HEADER);
-	ph = (struct pnpipehdr *)skb_put(skb, sizeof(*ph) + 4);
 
+	ph = pnp_hdr(skb);
 	ph->utid = oph->utid;
 	ph->message_id = PNS_PEP_CTRL_RESP;
 	ph->pipe_handle = oph->pipe_handle;
 	ph->data[0] = oph->data[1]; /* CTRL id */
-	ph->data[1] = oph->data[0]; /* PEP type */
-	ph->data[2] = code; /* error code, at an usual offset */
-	ph->data[3] = PAD;
-	ph->data[4] = PAD;
 
 	pn_skb_get_src_sockaddr(oskb, &dst);
 	return pn_skb_send(sk, skb, &dst);
@@ -374,38 +227,15 @@ static int pep_ctrlreq_error(struct sock *sk, struct sk_buff *oskb, u8 code,
 
 static int pipe_snd_status(struct sock *sk, u8 type, u8 status, gfp_t priority)
 {
-	struct pep_sock *pn = pep_sk(sk);
-	struct pnpipehdr *ph;
-	struct sk_buff *skb;
+	u8 data[4] = { type, PAD, PAD, status };
 
-	skb = alloc_skb(MAX_PNPIPE_HEADER + 4, priority);
-	if (!skb)
-		return -ENOMEM;
-	skb_set_owner_w(skb, sk);
-
-	skb_reserve(skb, MAX_PNPIPE_HEADER + 4);
-	__skb_push(skb, sizeof(*ph) + 4);
-	skb_reset_transport_header(skb);
-	ph = pnp_hdr(skb);
-	ph->utid = 0;
-	ph->message_id = PNS_PEP_STATUS_IND;
-	ph->pipe_handle = pn->pipe_handle;
-	ph->pep_type = PN_PEP_TYPE_COMMON;
-	ph->data[1] = type;
-	ph->data[2] = PAD;
-	ph->data[3] = PAD;
-	ph->data[4] = status;
-
-#ifdef CONFIG_PHONET_PIPECTRLR
-	return pn_skb_send(sk, skb, &pn->remote_pep);
-#else
-	return pn_skb_send(sk, skb, &pipe_srv);
-#endif
+	return pep_indicate(sk, PNS_PEP_STATUS_IND, PN_PEP_TYPE_COMMON,
+				data, 4, priority);
 }
 
 /* Send our RX flow control information to the sender.
  * Socket must be locked. */
-static void pipe_grant_credits(struct sock *sk)
+static void pipe_grant_credits(struct sock *sk, gfp_t priority)
 {
 	struct pep_sock *pn = pep_sk(sk);
 
@@ -415,16 +245,16 @@ static void pipe_grant_credits(struct sock *sk)
 	case PN_LEGACY_FLOW_CONTROL: /* TODO */
 		break;
 	case PN_ONE_CREDIT_FLOW_CONTROL:
-		pipe_snd_status(sk, PN_PEP_IND_FLOW_CONTROL,
-				PEP_IND_READY, GFP_ATOMIC);
-		pn->rx_credits = 1;
+		if (pipe_snd_status(sk, PN_PEP_IND_FLOW_CONTROL,
+					PEP_IND_READY, priority) == 0)
+			pn->rx_credits = 1;
 		break;
 	case PN_MULTI_CREDIT_FLOW_CONTROL:
 		if ((pn->rx_credits + CREDITS_THR) > CREDITS_MAX)
 			break;
 		if (pipe_snd_status(sk, PN_PEP_IND_ID_MCFC_GRANT_CREDITS,
 					CREDITS_MAX - pn->rx_credits,
-					GFP_ATOMIC) == 0)
+					priority) == 0)
 			pn->rx_credits = CREDITS_MAX;
 		break;
 	}
@@ -522,7 +352,7 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb)
 
 	switch (hdr->message_id) {
 	case PNS_PEP_CONNECT_REQ:
-		pep_reject_conn(sk, skb, PN_PIPE_ERR_PEP_IN_USE);
+		pep_reject_conn(sk, skb, PN_PIPE_ERR_PEP_IN_USE, GFP_ATOMIC);
 		break;
 
 	case PNS_PEP_DISCONNECT_REQ:
@@ -532,35 +362,11 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb)
 			sk->sk_state_change(sk);
 		break;
 
-#ifdef CONFIG_PHONET_PIPECTRLR
-	case PNS_PEP_DISCONNECT_RESP:
-		pn->pipe_state = PIPE_IDLE;
-		sk->sk_state = TCP_CLOSE;
-		break;
-#endif
-
 	case PNS_PEP_ENABLE_REQ:
 		/* Wait for PNS_PIPE_(ENABLED|REDIRECTED)_IND */
 		pep_reply(sk, skb, PN_PIPE_NO_ERROR, NULL, 0, GFP_ATOMIC);
 		break;
 
-#ifdef CONFIG_PHONET_PIPECTRLR
-	case PNS_PEP_ENABLE_RESP:
-		pn->pipe_state = PIPE_ENABLED;
-		pipe_handler_send_ind(sk, PNS_PIPE_ENABLED_IND_UTID,
-				PNS_PIPE_ENABLED_IND);
-
-		if (!pn_flow_safe(pn->tx_fc)) {
-			atomic_set(&pn->tx_credits, 1);
-			sk->sk_write_space(sk);
-		}
-		if (sk->sk_state == TCP_ESTABLISHED)
-			break; /* Nothing to do */
-		sk->sk_state = TCP_ESTABLISHED;
-		pipe_grant_credits(sk);
-		break;
-#endif
-
 	case PNS_PEP_RESET_REQ:
 		switch (hdr->state_after_reset) {
 		case PN_PIPE_DISABLE:
@@ -579,17 +385,6 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb)
 		pep_reply(sk, skb, PN_PIPE_NO_ERROR, NULL, 0, GFP_ATOMIC);
 		break;
 
-#ifdef CONFIG_PHONET_PIPECTRLR
-	case PNS_PEP_DISABLE_RESP:
-		pn->pipe_state = PIPE_DISABLED;
-		atomic_set(&pn->tx_credits, 0);
-		pipe_handler_send_ind(sk, PNS_PIPE_DISABLED_IND_UTID,
-				PNS_PIPE_DISABLED_IND);
-		sk->sk_state = TCP_SYN_RECV;
-		pn->rx_credits = 0;
-		break;
-#endif
-
 	case PNS_PEP_CTRL_REQ:
 		if (skb_queue_len(&pn->ctrlreq_queue) >= PNPIPE_CTRLREQ_MAX) {
 			atomic_inc(&sk->sk_drops);
@@ -607,7 +402,8 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb)
 		if (!pn_flow_safe(pn->rx_fc)) {
 			err = sock_queue_rcv_skb(sk, skb);
 			if (!err)
-				return 0;
+				return NET_RX_SUCCESS;
+			err = -ENOBUFS;
 			break;
 		}
 
@@ -645,7 +441,7 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb)
 		if (sk->sk_state == TCP_ESTABLISHED)
 			break; /* Nothing to do */
 		sk->sk_state = TCP_ESTABLISHED;
-		pipe_grant_credits(sk);
+		pipe_grant_credits(sk, GFP_ATOMIC);
 		break;
 
 	case PNS_PIPE_DISABLED_IND:
@@ -660,7 +456,7 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb)
 	}
 out:
 	kfree_skb(skb);
-	return err;
+	return (err == -ENOBUFS) ? NET_RX_DROP : NET_RX_SUCCESS;
 
 queue:
 	skb->dev = NULL;
@@ -669,7 +465,7 @@ queue:
 	skb_queue_tail(queue, skb);
 	if (!sock_flag(sk, SOCK_DEAD))
 		sk->sk_data_ready(sk, err);
-	return 0;
+	return NET_RX_SUCCESS;
 }
 
 /* Destroy connected sock. */
@@ -681,133 +477,126 @@ static void pipe_destruct(struct sock *sk)
 	skb_queue_purge(&pn->ctrlreq_queue);
 }
 
-#ifdef CONFIG_PHONET_PIPECTRLR
-static int pep_connresp_rcv(struct sock *sk, struct sk_buff *skb)
+static u8 pipe_negotiate_fc(const u8 *fcs, unsigned n)
 {
-	struct pep_sock *pn = pep_sk(sk);
-	u8 host_pref_rx_fc[3] = {3, 2, 1}, host_req_tx_fc[3] = {3, 2, 1};
-	u8 remote_pref_rx_fc[3], remote_req_tx_fc[3];
-	u8 negotiated_rx_fc, negotiated_tx_fc;
-	int ret;
-
-	pipe_get_flow_info(sk, skb, remote_pref_rx_fc,
-			remote_req_tx_fc);
-	negotiated_tx_fc = pipe_negotiate_fc(remote_req_tx_fc,
-			host_pref_rx_fc,
-			sizeof(host_pref_rx_fc));
-	negotiated_rx_fc = pipe_negotiate_fc(host_req_tx_fc,
-			remote_pref_rx_fc,
-			sizeof(host_pref_rx_fc));
-
-	pn->pipe_state = PIPE_DISABLED;
-	sk->sk_state = TCP_SYN_RECV;
-	sk->sk_backlog_rcv = pipe_do_rcv;
-	sk->sk_destruct = pipe_destruct;
-	pn->rx_credits = 0;
-	pn->rx_fc = negotiated_rx_fc;
-	pn->tx_fc = negotiated_tx_fc;
-	sk->sk_state_change(sk);
+	unsigned i;
+	u8 final_fc = PN_NO_FLOW_CONTROL;
 
-	ret = pipe_handler_send_created_ind(sk,
-			PNS_PIPE_CREATED_IND_UTID,
-			PNS_PIPE_CREATED_IND
-			);
+	for (i = 0; i < n; i++) {
+		u8 fc = fcs[i];
 
-	return ret;
+		if (fc > final_fc && fc < PN_MAX_FLOW_CONTROL)
+			final_fc = fc;
+	}
+	return final_fc;
 }
-#endif
 
-static int pep_connreq_rcv(struct sock *sk, struct sk_buff *skb)
+static int pep_connresp_rcv(struct sock *sk, struct sk_buff *skb)
 {
-	struct sock *newsk;
-	struct pep_sock *newpn, *pn = pep_sk(sk);
+	struct pep_sock *pn = pep_sk(sk);
 	struct pnpipehdr *hdr;
-	struct sockaddr_pn dst;
-	u16 peer_type;
-	u8 pipe_handle, enabled, n_sb;
-	u8 aligned = 0;
+	u8 n_sb;
 
 	if (!pskb_pull(skb, sizeof(*hdr) + 4))
 		return -EINVAL;
 
 	hdr = pnp_hdr(skb);
-	pipe_handle = hdr->pipe_handle;
-	switch (hdr->state_after_connect) {
-	case PN_PIPE_DISABLE:
-		enabled = 0;
-		break;
-	case PN_PIPE_ENABLE:
-		enabled = 1;
-		break;
-	default:
-		pep_reject_conn(sk, skb, PN_PIPE_ERR_INVALID_PARAM);
-		return -EINVAL;
-	}
-	peer_type = hdr->other_pep_type << 8;
-
-	if (unlikely(sk->sk_state != TCP_LISTEN) || sk_acceptq_is_full(sk)) {
-		pep_reject_conn(sk, skb, PN_PIPE_ERR_PEP_IN_USE);
-		return -ENOBUFS;
-	}
+	if (hdr->error_code != PN_PIPE_NO_ERROR)
+		return -ECONNREFUSED;
 
-	/* Parse sub-blocks (options) */
+	/* Parse sub-blocks */
 	n_sb = hdr->data[4];
 	while (n_sb > 0) {
-		u8 type, buf[1], len = sizeof(buf);
+		u8 type, buf[6], len = sizeof(buf);
 		const u8 *data = pep_get_sb(skb, &type, &len, buf);
 
 		if (data == NULL)
 			return -EINVAL;
+
 		switch (type) {
-		case PN_PIPE_SB_CONNECT_REQ_PEP_SUB_TYPE:
-			if (len < 1)
-				return -EINVAL;
-			peer_type = (peer_type & 0xff00) | data[0];
+		case PN_PIPE_SB_REQUIRED_FC_TX:
+			if (len < 2 || len < data[0])
+				break;
+			pn->tx_fc = pipe_negotiate_fc(data + 2, len - 2);
 			break;
-		case PN_PIPE_SB_ALIGNED_DATA:
-			aligned = data[0] != 0;
+
+		case PN_PIPE_SB_PREFERRED_FC_RX:
+			if (len < 2 || len < data[0])
+				break;
+			pn->rx_fc = pipe_negotiate_fc(data + 2, len - 2);
 			break;
+
 		}
 		n_sb--;
 	}
 
-	skb = skb_clone(skb, GFP_ATOMIC);
-	if (!skb)
-		return -ENOMEM;
+	return pipe_handler_send_created_ind(sk);
+}
 
-	/* Create a new to-be-accepted sock */
-	newsk = sk_alloc(sock_net(sk), PF_PHONET, GFP_ATOMIC, sk->sk_prot);
-	if (!newsk) {
-		kfree_skb(skb);
-		return -ENOMEM;
-	}
-	sock_init_data(NULL, newsk);
-	newsk->sk_state = TCP_SYN_RECV;
-	newsk->sk_backlog_rcv = pipe_do_rcv;
-	newsk->sk_protocol = sk->sk_protocol;
-	newsk->sk_destruct = pipe_destruct;
+/* Queue an skb to an actively connected sock.
+ * Socket lock must be held. */
+static int pipe_handler_do_rcv(struct sock *sk, struct sk_buff *skb)
+{
+	struct pep_sock *pn = pep_sk(sk);
+	struct pnpipehdr *hdr = pnp_hdr(skb);
+	int err = NET_RX_SUCCESS;
 
-	newpn = pep_sk(newsk);
-	pn_skb_get_dst_sockaddr(skb, &dst);
-	newpn->pn_sk.sobject = pn_sockaddr_get_object(&dst);
-	newpn->pn_sk.resource = pn->pn_sk.resource;
-	skb_queue_head_init(&newpn->ctrlreq_queue);
-	newpn->pipe_handle = pipe_handle;
-	atomic_set(&newpn->tx_credits, 0);
-	newpn->peer_type = peer_type;
-	newpn->rx_credits = 0;
-	newpn->rx_fc = newpn->tx_fc = PN_LEGACY_FLOW_CONTROL;
-	newpn->init_enable = enabled;
-	newpn->aligned = aligned;
+	switch (hdr->message_id) {
+	case PNS_PIPE_ALIGNED_DATA:
+		__skb_pull(skb, 1);
+		/* fall through */
+	case PNS_PIPE_DATA:
+		__skb_pull(skb, 3); /* Pipe data header */
+		if (!pn_flow_safe(pn->rx_fc)) {
+			err = sock_queue_rcv_skb(sk, skb);
+			if (!err)
+				return NET_RX_SUCCESS;
+			err = NET_RX_DROP;
+			break;
+		}
 
-	BUG_ON(!skb_queue_empty(&newsk->sk_receive_queue));
-	skb_queue_head(&newsk->sk_receive_queue, skb);
-	if (!sock_flag(sk, SOCK_DEAD))
-		sk->sk_data_ready(sk, 0);
+		if (pn->rx_credits == 0) {
+			atomic_inc(&sk->sk_drops);
+			err = NET_RX_DROP;
+			break;
+		}
+		pn->rx_credits--;
+		skb->dev = NULL;
+		skb_set_owner_r(skb, sk);
+		err = skb->len;
+		skb_queue_tail(&sk->sk_receive_queue, skb);
+		if (!sock_flag(sk, SOCK_DEAD))
+			sk->sk_data_ready(sk, err);
+		return NET_RX_SUCCESS;
 
-	sk_acceptq_added(sk);
-	sk_add_node(newsk, &pn->ackq);
-	return 0;
+	case PNS_PEP_CONNECT_RESP:
+		if (sk->sk_state != TCP_SYN_SENT)
+			break;
+		if (!sock_flag(sk, SOCK_DEAD))
+			sk->sk_state_change(sk);
+		if (pep_connresp_rcv(sk, skb)) {
+			sk->sk_state = TCP_CLOSE_WAIT;
+			break;
+		}
+
+		sk->sk_state = TCP_ESTABLISHED;
+		if (!pn_flow_safe(pn->tx_fc)) {
+			atomic_set(&pn->tx_credits, 1);
+			sk->sk_write_space(sk);
+		}
+		pipe_grant_credits(sk, GFP_ATOMIC);
+		break;
+
+	case PNS_PEP_DISCONNECT_RESP:
+		/* sock should already be dead, nothing to do */
+		break;
+
+	case PNS_PEP_STATUS_IND:
+		pipe_rcv_status(sk, skb);
+		break;
+	}
+	kfree_skb(skb);
+	return err;
 }
 
 /* Listening sock must be locked */
@@ -847,7 +636,6 @@ static int pep_do_rcv(struct sock *sk, struct sk_buff *skb)
 	struct sock *sknode;
 	struct pnpipehdr *hdr;
 	struct sockaddr_pn dst;
-	int err = NET_RX_SUCCESS;
 	u8 pipe_handle;
 
 	if (!pskb_may_pull(skb, sizeof(*hdr)))
@@ -865,26 +653,18 @@ static int pep_do_rcv(struct sock *sk, struct sk_buff *skb)
 	if (sknode)
 		return sk_receive_skb(sknode, skb, 1);
 
-	/* Look for a pipe handle pending accept */
-	sknode = pep_find_pipe(&pn->ackq, &dst, pipe_handle);
-	if (sknode) {
-		sock_put(sknode);
-		if (net_ratelimit())
-			printk(KERN_WARNING"Phonet unconnected PEP ignored");
-		err = NET_RX_DROP;
-		goto drop;
-	}
-
 	switch (hdr->message_id) {
 	case PNS_PEP_CONNECT_REQ:
-		err = pep_connreq_rcv(sk, skb);
-		break;
-
-#ifdef CONFIG_PHONET_PIPECTRLR
-	case PNS_PEP_CONNECT_RESP:
-		err = pep_connresp_rcv(sk, skb);
-		break;
-#endif
+		if (sk->sk_state != TCP_LISTEN || sk_acceptq_is_full(sk)) {
+			pep_reject_conn(sk, skb, PN_PIPE_ERR_PEP_IN_USE,
+					GFP_ATOMIC);
+			break;
+		}
+		skb_queue_head(&sk->sk_receive_queue, skb);
+		sk_acceptq_added(sk);
+		if (!sock_flag(sk, SOCK_DEAD))
+			sk->sk_data_ready(sk, 0);
+		return NET_RX_SUCCESS;
 
 	case PNS_PEP_DISCONNECT_REQ:
 		pep_reply(sk, skb, PN_PIPE_NO_ERROR, NULL, 0, GFP_ATOMIC);
@@ -898,12 +678,17 @@ static int pep_do_rcv(struct sock *sk, struct sk_buff *skb)
 	case PNS_PEP_ENABLE_REQ:
 	case PNS_PEP_DISABLE_REQ:
 		/* invalid handle is not even allowed here! */
+		break;
+
 	default:
-		err = NET_RX_DROP;
+		if ((1 << sk->sk_state)
+				& ~(TCPF_CLOSE|TCPF_LISTEN|TCPF_CLOSE_WAIT))
+			/* actively connected socket */
+			return pipe_handler_do_rcv(sk, skb);
 	}
 drop:
 	kfree_skb(skb);
-	return err;
+	return NET_RX_SUCCESS;
 }
 
 static int pipe_do_remove(struct sock *sk)
@@ -912,20 +697,16 @@ static int pipe_do_remove(struct sock *sk)
 	struct pnpipehdr *ph;
 	struct sk_buff *skb;
 
-	skb = alloc_skb(MAX_PNPIPE_HEADER, GFP_KERNEL);
+	skb = pep_alloc_skb(sk, NULL, 0, GFP_KERNEL);
 	if (!skb)
 		return -ENOMEM;
 
-	skb_reserve(skb, MAX_PNPIPE_HEADER);
-	__skb_push(skb, sizeof(*ph));
-	skb_reset_transport_header(skb);
 	ph = pnp_hdr(skb);
 	ph->utid = 0;
 	ph->message_id = PNS_PIPE_REMOVE_REQ;
 	ph->pipe_handle = pn->pipe_handle;
 	ph->data[0] = PAD;
-
-	return pn_skb_send(sk, skb, &pipe_srv);
+	return pn_skb_send(sk, skb, NULL);
 }
 
 /* associated socket ceases to exist */
@@ -938,29 +719,15 @@ static void pep_sock_close(struct sock *sk, long timeout)
 	sk_common_release(sk);
 
 	lock_sock(sk);
-	if (sk->sk_state == TCP_LISTEN) {
-		/* Destroy the listen queue */
-		struct sock *sknode;
-		struct hlist_node *p, *n;
-
-		sk_for_each_safe(sknode, p, n, &pn->ackq)
-			sk_del_node_init(sknode);
-		sk->sk_state = TCP_CLOSE;
-	} else if ((1 << sk->sk_state) & (TCPF_SYN_RECV|TCPF_ESTABLISHED))
-		/* Forcefully remove dangling Phonet pipe */
-		pipe_do_remove(sk);
-
-#ifdef CONFIG_PHONET_PIPECTRLR
-	if (pn->pipe_state != PIPE_IDLE) {
-		/* send pep disconnect request */
-		pipe_handler_send_req(sk,
-				PNS_PEP_DISCONNECT_UTID, PNS_PEP_DISCONNECT_REQ,
-				GFP_KERNEL);
-
-		pn->pipe_state = PIPE_IDLE;
-		sk->sk_state = TCP_CLOSE;
+	if ((1 << sk->sk_state) & (TCPF_SYN_RECV|TCPF_ESTABLISHED)) {
+		if (sk->sk_backlog_rcv == pipe_do_rcv)
+			/* Forcefully remove dangling Phonet pipe */
+			pipe_do_remove(sk);
+		else
+			pipe_handler_request(sk, PNS_PEP_DISCONNECT_REQ, PAD,
+						NULL, 0);
 	}
-#endif
+	sk->sk_state = TCP_CLOSE;
 
 	ifindex = pn->ifindex;
 	pn->ifindex = 0;
@@ -971,86 +738,141 @@ static void pep_sock_close(struct sock *sk, long timeout)
 	sock_put(sk);
 }
 
-static int pep_wait_connreq(struct sock *sk, int noblock)
+static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp)
 {
-	struct task_struct *tsk = current;
-	struct pep_sock *pn = pep_sk(sk);
-	long timeo = sock_rcvtimeo(sk, noblock);
-
-	for (;;) {
-		DEFINE_WAIT(wait);
+	struct pep_sock *pn = pep_sk(sk), *newpn;
+	struct sock *newsk = NULL;
+	struct sk_buff *skb;
+	struct pnpipehdr *hdr;
+	struct sockaddr_pn dst, src;
+	int err;
+	u16 peer_type;
+	u8 pipe_handle, enabled, n_sb;
+	u8 aligned = 0;
 
-		if (sk->sk_state != TCP_LISTEN)
-			return -EINVAL;
-		if (!hlist_empty(&pn->ackq))
-			break;
-		if (!timeo)
-			return -EWOULDBLOCK;
-		if (signal_pending(tsk))
-			return sock_intr_errno(timeo);
+	skb = skb_recv_datagram(sk, 0, flags & O_NONBLOCK, errp);
+	if (!skb)
+		return NULL;
 
-		prepare_to_wait_exclusive(sk_sleep(sk), &wait,
-						TASK_INTERRUPTIBLE);
-		release_sock(sk);
-		timeo = schedule_timeout(timeo);
-		lock_sock(sk);
-		finish_wait(sk_sleep(sk), &wait);
+	lock_sock(sk);
+	if (sk->sk_state != TCP_LISTEN) {
+		err = -EINVAL;
+		goto drop;
 	}
+	sk_acceptq_removed(sk);
 
-	return 0;
-}
+	err = -EPROTO;
+	if (!pskb_may_pull(skb, sizeof(*hdr) + 4))
+		goto drop;
 
-static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp)
-{
-	struct pep_sock *pn = pep_sk(sk);
-	struct sock *newsk = NULL;
-	struct sk_buff *oskb;
-	int err;
+	hdr = pnp_hdr(skb);
+	pipe_handle = hdr->pipe_handle;
+	switch (hdr->state_after_connect) {
+	case PN_PIPE_DISABLE:
+		enabled = 0;
+		break;
+	case PN_PIPE_ENABLE:
+		enabled = 1;
+		break;
+	default:
+		pep_reject_conn(sk, skb, PN_PIPE_ERR_INVALID_PARAM,
+				GFP_KERNEL);
+		goto drop;
+	}
+	peer_type = hdr->other_pep_type << 8;
 
-	lock_sock(sk);
-	err = pep_wait_connreq(sk, flags & O_NONBLOCK);
-	if (err)
-		goto out;
+	/* Parse sub-blocks (options) */
+	n_sb = hdr->data[4];
+	while (n_sb > 0) {
+		u8 type, buf[1], len = sizeof(buf);
+		const u8 *data = pep_get_sb(skb, &type, &len, buf);
 
-	newsk = __sk_head(&pn->ackq);
+		if (data == NULL)
+			goto drop;
+		switch (type) {
+		case PN_PIPE_SB_CONNECT_REQ_PEP_SUB_TYPE:
+			if (len < 1)
+				goto drop;
+			peer_type = (peer_type & 0xff00) | data[0];
+			break;
+		case PN_PIPE_SB_ALIGNED_DATA:
+			aligned = data[0] != 0;
+			break;
+		}
+		n_sb--;
+	}
 
-	oskb = skb_dequeue(&newsk->sk_receive_queue);
-	err = pep_accept_conn(newsk, oskb);
-	if (err) {
-		skb_queue_head(&newsk->sk_receive_queue, oskb);
+	/* Check for duplicate pipe handle */
+	newsk = pep_find_pipe(&pn->hlist, &dst, pipe_handle);
+	if (unlikely(newsk)) {
+		__sock_put(newsk);
 		newsk = NULL;
-		goto out;
+		pep_reject_conn(sk, skb, PN_PIPE_ERR_PEP_IN_USE, GFP_KERNEL);
+		goto drop;
+	}
+
+	/* Create a new to-be-accepted sock */
+	newsk = sk_alloc(sock_net(sk), PF_PHONET, GFP_KERNEL, sk->sk_prot);
+	if (!newsk) {
+		pep_reject_conn(sk, skb, PN_PIPE_ERR_OVERLOAD, GFP_KERNEL);
+		err = -ENOBUFS;
+		goto drop;
 	}
-	kfree_skb(oskb);
 
+	sock_init_data(NULL, newsk);
+	newsk->sk_state = TCP_SYN_RECV;
+	newsk->sk_backlog_rcv = pipe_do_rcv;
+	newsk->sk_protocol = sk->sk_protocol;
+	newsk->sk_destruct = pipe_destruct;
+
+	newpn = pep_sk(newsk);
+	pn_skb_get_dst_sockaddr(skb, &dst);
+	pn_skb_get_src_sockaddr(skb, &src);
+	newpn->pn_sk.sobject = pn_sockaddr_get_object(&dst);
+	newpn->pn_sk.dobject = pn_sockaddr_get_object(&src);
+	newpn->pn_sk.resource = pn_sockaddr_get_resource(&dst);
 	sock_hold(sk);
-	pep_sk(newsk)->listener = sk;
+	newpn->listener = sk;
+	skb_queue_head_init(&newpn->ctrlreq_queue);
+	newpn->pipe_handle = pipe_handle;
+	atomic_set(&newpn->tx_credits, 0);
+	newpn->ifindex = 0;
+	newpn->peer_type = peer_type;
+	newpn->rx_credits = 0;
+	newpn->rx_fc = newpn->tx_fc = PN_LEGACY_FLOW_CONTROL;
+	newpn->init_enable = enabled;
+	newpn->aligned = aligned;
 
-	sock_hold(newsk);
-	sk_del_node_init(newsk);
-	sk_acceptq_removed(sk);
+	err = pep_accept_conn(newsk, skb);
+	if (err) {
+		sock_put(newsk);
+		newsk = NULL;
+		goto drop;
+	}
 	sk_add_node(newsk, &pn->hlist);
-	__sock_put(newsk);
-
-out:
+drop:
 	release_sock(sk);
+	kfree_skb(skb);
 	*errp = err;
 	return newsk;
 }
 
-#ifdef CONFIG_PHONET_PIPECTRLR
 static int pep_sock_connect(struct sock *sk, struct sockaddr *addr, int len)
 {
 	struct pep_sock *pn = pep_sk(sk);
-	struct sockaddr_pn *spn =  (struct sockaddr_pn *)addr;
-
-	memcpy(&pn->remote_pep, spn, sizeof(struct sockaddr_pn));
+	int err;
+	u8 data[4] = { 0 /* sub-blocks */, PAD, PAD, PAD };
 
-	return pipe_handler_send_req(sk,
-			PNS_PEP_CONNECT_UTID, PNS_PEP_CONNECT_REQ,
-			GFP_ATOMIC);
+	pn->pipe_handle = 1; /* anything but INVALID_HANDLE */
+	err = pipe_handler_request(sk, PNS_PEP_CONNECT_REQ,
+					PN_PIPE_ENABLE, data, 4);
+	if (err) {
+		pn->pipe_handle = PN_PIPE_INVALID_HANDLE;
+		return err;
+	}
+	sk->sk_state = TCP_SYN_SENT;
+	return 0;
 }
-#endif
 
 static int pep_ioctl(struct sock *sk, int cmd, unsigned long arg)
 {
@@ -1081,10 +903,18 @@ static int pep_init(struct sock *sk)
 {
 	struct pep_sock *pn = pep_sk(sk);
 
-	INIT_HLIST_HEAD(&pn->ackq);
+	sk->sk_destruct = pipe_destruct;
 	INIT_HLIST_HEAD(&pn->hlist);
+	pn->listener = NULL;
 	skb_queue_head_init(&pn->ctrlreq_queue);
+	atomic_set(&pn->tx_credits, 0);
+	pn->ifindex = 0;
+	pn->peer_type = 0;
 	pn->pipe_handle = PN_PIPE_INVALID_HANDLE;
+	pn->rx_credits = 0;
+	pn->rx_fc = pn->tx_fc = PN_LEGACY_FLOW_CONTROL;
+	pn->init_enable = 1;
+	pn->aligned = 0;
 	return 0;
 }
 
@@ -1103,18 +933,6 @@ static int pep_setsockopt(struct sock *sk, int level, int optname,
 
 	lock_sock(sk);
 	switch (optname) {
-#ifdef CONFIG_PHONET_PIPECTRLR
-	case PNPIPE_PIPE_HANDLE:
-		if (val) {
-			if (pn->pipe_state > PIPE_IDLE) {
-				err = -EFAULT;
-				break;
-			}
-			pn->pipe_handle = val;
-			break;
-		}
-#endif
-
 	case PNPIPE_ENCAP:
 		if (val && val != PNPIPE_ENCAP_IP) {
 			err = -EINVAL;
@@ -1141,16 +959,6 @@ static int pep_setsockopt(struct sock *sk, int level, int optname,
 		}
 		goto out_norel;
 
-#ifdef CONFIG_PHONET_PIPECTRLR
-	case PNPIPE_ENABLE:
-		if (pn->pipe_state <= PIPE_IDLE) {
-			err = -ENOTCONN;
-			break;
-		}
-		err = pipe_handler_enable_pipe(sk, val);
-		break;
-#endif
-
 	default:
 		err = -ENOPROTOOPT;
 	}
@@ -1180,13 +988,11 @@ static int pep_getsockopt(struct sock *sk, int level, int optname,
 		val = pn->ifindex;
 		break;
 
-#ifdef CONFIG_PHONET_PIPECTRLR
-	case PNPIPE_ENABLE:
-		if (pn->pipe_state <= PIPE_IDLE)
-			return -ENOTCONN;
-		val = pn->pipe_state != PIPE_DISABLED;
+	case PNPIPE_HANDLE:
+		val = pn->pipe_handle;
+		if (val == PN_PIPE_INVALID_HANDLE)
+			return -EINVAL;
 		break;
-#endif
 
 	default:
 		return -ENOPROTOOPT;
@@ -1222,11 +1028,7 @@ static int pipe_skb_send(struct sock *sk, struct sk_buff *skb)
 	} else
 		ph->message_id = PNS_PIPE_DATA;
 	ph->pipe_handle = pn->pipe_handle;
-#ifdef CONFIG_PHONET_PIPECTRLR
-	err = pn_skb_send(sk, skb, &pn->remote_pep);
-#else
-	err = pn_skb_send(sk, skb, &pipe_srv);
-#endif
+	err = pn_skb_send(sk, skb, NULL);
 
 	if (err && pn_flow_safe(pn->tx_fc))
 		atomic_inc(&pn->tx_credits);
@@ -1355,7 +1157,7 @@ struct sk_buff *pep_read(struct sock *sk)
 	struct sk_buff *skb = skb_dequeue(&sk->sk_receive_queue);
 
 	if (sk->sk_state == TCP_ESTABLISHED)
-		pipe_grant_credits(sk);
+		pipe_grant_credits(sk, GFP_ATOMIC);
 	return skb;
 }
 
@@ -1400,7 +1202,7 @@ static int pep_recvmsg(struct kiocb *iocb, struct sock *sk,
 	}
 
 	if (sk->sk_state == TCP_ESTABLISHED)
-		pipe_grant_credits(sk);
+		pipe_grant_credits(sk, GFP_KERNEL);
 	release_sock(sk);
 copy:
 	msg->msg_flags |= MSG_EOR;
@@ -1424,9 +1226,9 @@ static void pep_sock_unhash(struct sock *sk)
 
 	lock_sock(sk);
 
-#ifndef CONFIG_PHONET_PIPECTRLR
-	if ((1 << sk->sk_state) & ~(TCPF_CLOSE|TCPF_LISTEN)) {
+	if (pn->listener != NULL) {
 		skparent = pn->listener;
+		pn->listener = NULL;
 		release_sock(sk);
 
 		pn = pep_sk(skparent);
@@ -1434,7 +1236,7 @@ static void pep_sock_unhash(struct sock *sk)
 		sk_del_node_init(sk);
 		sk = skparent;
 	}
-#endif
+
 	/* Unhash a listening sock only when it is closed
 	 * and all of its active connected pipes are closed. */
 	if (hlist_empty(&pn->hlist))
@@ -1448,9 +1250,7 @@ static void pep_sock_unhash(struct sock *sk)
 static struct proto pep_proto = {
 	.close		= pep_sock_close,
 	.accept		= pep_sock_accept,
-#ifdef CONFIG_PHONET_PIPECTRLR
 	.connect	= pep_sock_connect,
-#endif
 	.ioctl		= pep_ioctl,
 	.init		= pep_init,
 	.setsockopt	= pep_setsockopt,
diff --git a/net/phonet/socket.c b/net/phonet/socket.c
index 25f746d..b1adafa 100644
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -225,15 +225,18 @@ static int pn_socket_autobind(struct socket *sock)
 	return 0; /* socket was already bound */
 }
 
-#ifdef CONFIG_PHONET_PIPECTRLR
 static int pn_socket_connect(struct socket *sock, struct sockaddr *addr,
 		int len, int flags)
 {
 	struct sock *sk = sock->sk;
+	struct pn_sock *pn = pn_sk(sk);
 	struct sockaddr_pn *spn = (struct sockaddr_pn *)addr;
-	long timeo;
+	struct task_struct *tsk = current;
+	long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
 	int err;
 
+	if (pn_socket_autobind(sock))
+		return -ENOBUFS;
 	if (len < sizeof(struct sockaddr_pn))
 		return -EINVAL;
 	if (spn->spn_family != AF_PHONET)
@@ -243,82 +246,61 @@ static int pn_socket_connect(struct socket *sock, struct sockaddr *addr,
 
 	switch (sock->state) {
 	case SS_UNCONNECTED:
-		sk->sk_state = TCP_CLOSE;
-		break;
-	case SS_CONNECTING:
-		switch (sk->sk_state) {
-		case TCP_SYN_RECV:
-			sock->state = SS_CONNECTED;
+		if (sk->sk_state != TCP_CLOSE) {
 			err = -EISCONN;
 			goto out;
-		case TCP_CLOSE:
-			err = -EALREADY;
-			if (flags & O_NONBLOCK)
-				goto out;
-			goto wait_connect;
 		}
 		break;
-	case SS_CONNECTED:
-		switch (sk->sk_state) {
-		case TCP_SYN_RECV:
-			err = -EISCONN;
-			goto out;
-		case TCP_CLOSE:
-			sock->state = SS_UNCONNECTED;
-			break;
-		}
-		break;
-	case SS_DISCONNECTING:
-	case SS_FREE:
-		break;
+	case SS_CONNECTING:
+		err = -EALREADY;
+		goto out;
+	default:
+		err = -EISCONN;
+		goto out;
 	}
-	sk->sk_state = TCP_CLOSE;
-	sk_stream_kill_queues(sk);
 
+	pn->dobject = pn_sockaddr_get_object(spn);
+	pn->resource = pn_sockaddr_get_resource(spn);
 	sock->state = SS_CONNECTING;
+
 	err = sk->sk_prot->connect(sk, addr, len);
-	if (err < 0) {
+	if (err) {
 		sock->state = SS_UNCONNECTED;
-		sk->sk_state = TCP_CLOSE;
+		pn->dobject = 0;
 		goto out;
 	}
 
-	err = -EINPROGRESS;
-wait_connect:
-	if (sk->sk_state != TCP_SYN_RECV && (flags & O_NONBLOCK))
-		goto out;
-
-	timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
-	release_sock(sk);
-
-	err = -ERESTARTSYS;
-	timeo = wait_event_interruptible_timeout(*sk_sleep(sk),
-			sk->sk_state != TCP_CLOSE,
-			timeo);
-
-	lock_sock(sk);
-	if (timeo < 0)
-		goto out; /* -ERESTARTSYS */
+	while (sk->sk_state == TCP_SYN_SENT) {
+		DEFINE_WAIT(wait);
 
-	err = -ETIMEDOUT;
-	if (timeo == 0 && sk->sk_state != TCP_SYN_RECV)
-		goto out;
+		if (!timeo) {
+			err = -EINPROGRESS;
+			goto out;
+		}
+		if (signal_pending(tsk)) {
+			err = sock_intr_errno(timeo);
+			goto out;
+		}
 
-	if (sk->sk_state != TCP_SYN_RECV) {
-		sock->state = SS_UNCONNECTED;
-		err = sock_error(sk);
-		if (!err)
-			err = -ECONNREFUSED;
-		goto out;
+		prepare_to_wait_exclusive(sk_sleep(sk), &wait,
+						TASK_INTERRUPTIBLE);
+		release_sock(sk);
+		timeo = schedule_timeout(timeo);
+		lock_sock(sk);
+		finish_wait(sk_sleep(sk), &wait);
 	}
-	sock->state = SS_CONNECTED;
-	err = 0;
 
+	if ((1 << sk->sk_state) & (TCPF_SYN_RECV|TCPF_ESTABLISHED))
+		err = 0;
+	else if (sk->sk_state == TCP_CLOSE_WAIT)
+		err = -ECONNRESET;
+	else
+		err = -ECONNREFUSED;
+	sock->state = err ? SS_UNCONNECTED : SS_CONNECTED;
 out:
 	release_sock(sk);
 	return err;
 }
-#endif
 
 static int pn_socket_accept(struct socket *sock, struct socket *newsock,
 				int flags)
@@ -327,6 +309,9 @@ static int pn_socket_accept(struct socket *sock, struct socket *newsock,
 	struct sock *newsk;
 	int err;
 
+	if (unlikely(sk->sk_state != TCP_LISTEN))
+		return -EINVAL;
+
 	newsk = sk->sk_prot->accept(sk, flags, &err);
 	if (!newsk)
 		return err;
@@ -363,13 +348,8 @@ static unsigned int pn_socket_poll(struct file *file, struct socket *sock,
 
 	poll_wait(file, sk_sleep(sk), wait);
 
-	switch (sk->sk_state) {
-	case TCP_LISTEN:
-		return hlist_empty(&pn->ackq) ? 0 : POLLIN;
-	case TCP_CLOSE:
+	if (sk->sk_state == TCP_CLOSE)
 		return POLLERR;
-	}
-
 	if (!skb_queue_empty(&sk->sk_receive_queue))
 		mask |= POLLIN | POLLRDNORM;
 	if (!skb_queue_empty(&pn->ctrlreq_queue))
@@ -428,19 +408,19 @@ static int pn_socket_listen(struct socket *sock, int backlog)
 	struct sock *sk = sock->sk;
 	int err = 0;
 
-	if (sock->state != SS_UNCONNECTED)
-		return -EINVAL;
 	if (pn_socket_autobind(sock))
 		return -ENOBUFS;
 
 	lock_sock(sk);
-	if (sk->sk_state != TCP_CLOSE) {
+	if (sock->state != SS_UNCONNECTED) {
 		err = -EINVAL;
 		goto out;
 	}
 
-	sk->sk_state = TCP_LISTEN;
-	sk->sk_ack_backlog = 0;
+	if (sk->sk_state != TCP_LISTEN) {
+		sk->sk_state = TCP_LISTEN;
+		sk->sk_ack_backlog = 0;
+	}
 	sk->sk_max_ack_backlog = backlog;
 out:
 	release_sock(sk);
@@ -488,11 +468,7 @@ const struct proto_ops phonet_stream_ops = {
 	.owner		= THIS_MODULE,
 	.release	= pn_socket_release,
 	.bind		= pn_socket_bind,
-#ifdef CONFIG_PHONET_PIPECTRLR
 	.connect	= pn_socket_connect,
-#else
-	.connect	= sock_no_connect,
-#endif
 	.socketpair	= sock_no_socketpair,
 	.accept		= pn_socket_accept,
 	.getname	= pn_socket_getname,
@@ -633,8 +609,8 @@ static int pn_sock_seq_show(struct seq_file *seq, void *v)
 
 		seq_printf(seq, "%2d %04X:%04X:%02X %02X %08X:%08X %5d %lu "
 			"%d %p %d%n",
-			sk->sk_protocol, pn->sobject, 0, pn->resource,
-			sk->sk_state,
+			sk->sk_protocol, pn->sobject, pn->dobject,
+			pn->resource, sk->sk_state,
 			sk_wmem_alloc_get(sk), sk_rmem_alloc_get(sk),
 			sock_i_uid(sk), sock_i_ino(sk),
 			atomic_read(&sk->sk_refcnt), sk,
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 9542449..da8adac 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -50,7 +50,6 @@ rdsdebug(char *fmt, ...)
 #define RDS_FRAG_SIZE	((unsigned int)(1 << RDS_FRAG_SHIFT))
 
 #define RDS_CONG_MAP_BYTES	(65536 / 8)
-#define RDS_CONG_MAP_LONGS	(RDS_CONG_MAP_BYTES / sizeof(unsigned long))
 #define RDS_CONG_MAP_PAGES	(PAGE_ALIGN(RDS_CONG_MAP_BYTES) / PAGE_SIZE)
 #define RDS_CONG_MAP_PAGE_BITS	(PAGE_SIZE * 8)
 
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index d952e7e..5ee0c62 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -803,7 +803,6 @@ static int rose_connect(struct socket *sock, struct sockaddr *uaddr, int addr_le
 
 		rose_insert_socket(sk);		/* Finish the bind */
 	}
-rose_try_next_neigh:
 	rose->dest_addr   = addr->srose_addr;
 	rose->dest_call   = addr->srose_call;
 	rose->rand        = ((long)rose & 0xFFFF) + rose->lci;
@@ -865,12 +864,6 @@ rose_try_next_neigh:
 	}
 
 	if (sk->sk_state != TCP_ESTABLISHED) {
-	/* Try next neighbour */
-		rose->neighbour = rose_get_neigh(&addr->srose_addr, &cause, &diagnostic, 0);
-		if (rose->neighbour)
-			goto rose_try_next_neigh;
-
-		/* No more neighbours */
 		sock->state = SS_UNCONNECTED;
 		err = sock_error(sk);	/* Always set at this point */
 		goto out_release;
diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c
index b4fdaac..88a77e9 100644
--- a/net/rose/rose_route.c
+++ b/net/rose/rose_route.c
@@ -674,29 +674,34 @@ struct rose_route *rose_route_free_lci(unsigned int lci, struct rose_neigh *neig
  *	Find a neighbour or a route given a ROSE address.
  */
 struct rose_neigh *rose_get_neigh(rose_address *addr, unsigned char *cause,
-	unsigned char *diagnostic, int new)
+	unsigned char *diagnostic, int route_frame)
 {
 	struct rose_neigh *res = NULL;
 	struct rose_node *node;
 	int failed = 0;
 	int i;
 
-	if (!new) spin_lock_bh(&rose_node_list_lock);
+	if (!route_frame) spin_lock_bh(&rose_node_list_lock);
 	for (node = rose_node_list; node != NULL; node = node->next) {
 		if (rosecmpm(addr, &node->address, node->mask) == 0) {
 			for (i = 0; i < node->count; i++) {
-				if (new) {
-					if (node->neighbour[i]->restarted) {
-						res = node->neighbour[i];
-						goto out;
-					}
+				if (node->neighbour[i]->restarted) {
+					res = node->neighbour[i];
+					goto out;
 				}
-				else {
+			}
+		}
+	}
+	if (!route_frame) { /* connect request */
+		for (node = rose_node_list; node != NULL; node = node->next) {
+			if (rosecmpm(addr, &node->address, node->mask) == 0) {
+				for (i = 0; i < node->count; i++) {
 					if (!rose_ftimer_running(node->neighbour[i])) {
 						res = node->neighbour[i];
+						failed = 0;
 						goto out;
-					} else
-						failed = 1;
+					}
+					failed = 1;
 				}
 			}
 		}
@@ -711,8 +716,7 @@ struct rose_neigh *rose_get_neigh(rose_address *addr, unsigned char *cause,
 	}
 
 out:
-	if (!new) spin_unlock_bh(&rose_node_list_lock);
-
+	if (!route_frame) spin_unlock_bh(&rose_node_list_lock);
 	return res;
 }
 
diff --git a/net/rxrpc/ar-peer.c b/net/rxrpc/ar-peer.c
index a53fb25..3620c56 100644
--- a/net/rxrpc/ar-peer.c
+++ b/net/rxrpc/ar-peer.c
@@ -37,7 +37,6 @@ static void rxrpc_assess_MTU_size(struct rxrpc_peer *peer)
 {
 	struct rtable *rt;
 	struct flowi fl;
-	int ret;
 
 	peer->if_mtu = 1500;
 
@@ -58,9 +57,9 @@ static void rxrpc_assess_MTU_size(struct rxrpc_peer *peer)
 		BUG();
 	}
 
-	ret = ip_route_output_key(&init_net, &rt, &fl);
-	if (ret < 0) {
-		_leave(" [route err %d]", ret);
+	rt = ip_route_output_key(&init_net, &fl);
+	if (IS_ERR(rt)) {
+		_leave(" [route err %ld]", PTR_ERR(rt));
 		return;
 	}
 
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index f04d4a4..a7a5583d 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -126,6 +126,17 @@ config NET_SCH_RED
 	  To compile this code as a module, choose M here: the
 	  module will be called sch_red.
 
+config NET_SCH_SFB
+	tristate "Stochastic Fair Blue (SFB)"
+	---help---
+	  Say Y here if you want to use the Stochastic Fair Blue (SFB)
+	  packet scheduling algorithm.
+
+	  See the top of <file:net/sched/sch_sfb.c> for more details.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called sch_sfb.
+
 config NET_SCH_SFQ
 	tristate "Stochastic Fairness Queueing (SFQ)"
 	---help---
@@ -205,6 +216,29 @@ config NET_SCH_DRR
 
 	  If unsure, say N.
 
+config NET_SCH_MQPRIO
+	tristate "Multi-queue priority scheduler (MQPRIO)"
+	help
+	  Say Y here if you want to use the Multi-queue Priority scheduler.
+	  This scheduler allows QOS to be offloaded on NICs that have support
+	  for offloading QOS schedulers.
+
+	  To compile this driver as a module, choose M here: the module will
+	  be called sch_mqprio.
+
+	  If unsure, say N.
+
+config NET_SCH_CHOKE
+	tristate "CHOose and Keep responsive flow scheduler (CHOKE)"
+	help
+	  Say Y here if you want to use the CHOKe packet scheduler (CHOose
+	  and Keep for responsive flows, CHOose and Kill for unresponsive
+	  flows). This is a variation of RED which trys to penalize flows
+	  that monopolize the queue.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called sch_choke.
+
 config NET_SCH_INGRESS
 	tristate "Ingress Qdisc"
 	depends on NET_CLS_ACT
@@ -243,7 +277,7 @@ config NET_CLS_TCINDEX
 
 config NET_CLS_ROUTE4
 	tristate "Routing decision (ROUTE)"
-	select NET_CLS_ROUTE
+	select IP_ROUTE_CLASSID
 	select NET_CLS
 	---help---
 	  If you say Y here, you will be able to classify packets
@@ -252,9 +286,6 @@ config NET_CLS_ROUTE4
 	  To compile this code as a module, choose M here: the
 	  module will be called cls_route.
 
-config NET_CLS_ROUTE
-	bool
-
 config NET_CLS_FW
 	tristate "Netfilter mark (FW)"
 	select NET_CLS
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 960f5db..2e77b8d 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -24,6 +24,7 @@ obj-$(CONFIG_NET_SCH_RED)	+= sch_red.o
 obj-$(CONFIG_NET_SCH_GRED)	+= sch_gred.o
 obj-$(CONFIG_NET_SCH_INGRESS)	+= sch_ingress.o 
 obj-$(CONFIG_NET_SCH_DSMARK)	+= sch_dsmark.o
+obj-$(CONFIG_NET_SCH_SFB)	+= sch_sfb.o
 obj-$(CONFIG_NET_SCH_SFQ)	+= sch_sfq.o
 obj-$(CONFIG_NET_SCH_TBF)	+= sch_tbf.o
 obj-$(CONFIG_NET_SCH_TEQL)	+= sch_teql.o
@@ -32,6 +33,9 @@ obj-$(CONFIG_NET_SCH_MULTIQ)	+= sch_multiq.o
 obj-$(CONFIG_NET_SCH_ATM)	+= sch_atm.o
 obj-$(CONFIG_NET_SCH_NETEM)	+= sch_netem.o
 obj-$(CONFIG_NET_SCH_DRR)	+= sch_drr.o
+obj-$(CONFIG_NET_SCH_MQPRIO)	+= sch_mqprio.o
+obj-$(CONFIG_NET_SCH_CHOKE)	+= sch_choke.o
+
 obj-$(CONFIG_NET_CLS_U32)	+= cls_u32.o
 obj-$(CONFIG_NET_CLS_ROUTE4)	+= cls_route.o
 obj-$(CONFIG_NET_CLS_FW)	+= cls_fw.o
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 23b25f8..15873e1 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -78,7 +78,7 @@ static int tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb,
 			   struct tc_action *a, struct tcf_hashinfo *hinfo)
 {
 	struct tcf_common *p;
-	int err = 0, index = -1,i = 0, s_i = 0, n_i = 0;
+	int err = 0, index = -1, i = 0, s_i = 0, n_i = 0;
 	struct nlattr *nest;
 
 	read_lock_bh(hinfo->lock);
@@ -126,7 +126,7 @@ static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a,
 {
 	struct tcf_common *p, *s_p;
 	struct nlattr *nest;
-	int i= 0, n_i = 0;
+	int i = 0, n_i = 0;
 
 	nest = nla_nest_start(skb, a->order);
 	if (nest == NULL)
@@ -138,7 +138,7 @@ static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a,
 		while (p != NULL) {
 			s_p = p->tcfc_next;
 			if (ACT_P_DELETED == tcf_hash_release(p, 0, hinfo))
-				 module_put(a->ops->owner);
+				module_put(a->ops->owner);
 			n_i++;
 			p = s_p;
 		}
@@ -447,7 +447,8 @@ tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
 	nest = nla_nest_start(skb, TCA_OPTIONS);
 	if (nest == NULL)
 		goto nla_put_failure;
-	if ((err = tcf_action_dump_old(skb, a, bind, ref)) > 0) {
+	err = tcf_action_dump_old(skb, a, bind, ref);
+	if (err > 0) {
 		nla_nest_end(skb, nest);
 		return err;
 	}
@@ -491,7 +492,7 @@ struct tc_action *tcf_action_init_1(struct nlattr *nla, struct nlattr *est,
 	struct tc_action *a;
 	struct tc_action_ops *a_o;
 	char act_name[IFNAMSIZ];
-	struct nlattr *tb[TCA_ACT_MAX+1];
+	struct nlattr *tb[TCA_ACT_MAX + 1];
 	struct nlattr *kind;
 	int err;
 
@@ -549,9 +550,9 @@ struct tc_action *tcf_action_init_1(struct nlattr *nla, struct nlattr *est,
 		goto err_free;
 
 	/* module count goes up only when brand new policy is created
-	   if it exists and is only bound to in a_o->init() then
-	   ACT_P_CREATED is not returned (a zero is).
-	*/
+	 * if it exists and is only bound to in a_o->init() then
+	 * ACT_P_CREATED is not returned (a zero is).
+	 */
 	if (err != ACT_P_CREATED)
 		module_put(a_o->owner);
 	a->ops = a_o;
@@ -569,7 +570,7 @@ err_out:
 struct tc_action *tcf_action_init(struct nlattr *nla, struct nlattr *est,
 				  char *name, int ovr, int bind)
 {
-	struct nlattr *tb[TCA_ACT_MAX_PRIO+1];
+	struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
 	struct tc_action *head = NULL, *act, *act_prev = NULL;
 	int err;
 	int i;
@@ -697,7 +698,7 @@ act_get_notify(struct net *net, u32 pid, struct nlmsghdr *n,
 static struct tc_action *
 tcf_action_get_1(struct nlattr *nla, struct nlmsghdr *n, u32 pid)
 {
-	struct nlattr *tb[TCA_ACT_MAX+1];
+	struct nlattr *tb[TCA_ACT_MAX + 1];
 	struct tc_action *a;
 	int index;
 	int err;
@@ -770,7 +771,7 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
 	struct tcamsg *t;
 	struct netlink_callback dcb;
 	struct nlattr *nest;
-	struct nlattr *tb[TCA_ACT_MAX+1];
+	struct nlattr *tb[TCA_ACT_MAX + 1];
 	struct nlattr *kind;
 	struct tc_action *a = create_a(0);
 	int err = -ENOMEM;
@@ -821,7 +822,8 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
 	nlh->nlmsg_flags |= NLM_F_ROOT;
 	module_put(a->ops->owner);
 	kfree(a);
-	err = rtnetlink_send(skb, net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
+	err = rtnetlink_send(skb, net, pid, RTNLGRP_TC,
+			     n->nlmsg_flags & NLM_F_ECHO);
 	if (err > 0)
 		return 0;
 
@@ -842,14 +844,14 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
 	      u32 pid, int event)
 {
 	int i, ret;
-	struct nlattr *tb[TCA_ACT_MAX_PRIO+1];
+	struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
 	struct tc_action *head = NULL, *act, *act_prev = NULL;
 
 	ret = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL);
 	if (ret < 0)
 		return ret;
 
-	if (event == RTM_DELACTION && n->nlmsg_flags&NLM_F_ROOT) {
+	if (event == RTM_DELACTION && n->nlmsg_flags & NLM_F_ROOT) {
 		if (tb[1] != NULL)
 			return tca_action_flush(net, tb[1], n, pid);
 		else
@@ -892,7 +894,7 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
 		/* now do the delete */
 		tcf_action_destroy(head, 0);
 		ret = rtnetlink_send(skb, net, pid, RTNLGRP_TC,
-				     n->nlmsg_flags&NLM_F_ECHO);
+				     n->nlmsg_flags & NLM_F_ECHO);
 		if (ret > 0)
 			return 0;
 		return ret;
@@ -936,7 +938,7 @@ static int tcf_add_notify(struct net *net, struct tc_action *a,
 	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
 	NETLINK_CB(skb).dst_group = RTNLGRP_TC;
 
-	err = rtnetlink_send(skb, net, pid, RTNLGRP_TC, flags&NLM_F_ECHO);
+	err = rtnetlink_send(skb, net, pid, RTNLGRP_TC, flags & NLM_F_ECHO);
 	if (err > 0)
 		err = 0;
 	return err;
@@ -967,7 +969,7 @@ tcf_action_add(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
 
 	/* dump then free all the actions after update; inserted policy
 	 * stays intact
-	 * */
+	 */
 	ret = tcf_add_notify(net, act, pid, seq, RTM_NEWACTION, n->nlmsg_flags);
 	for (a = act; a; a = act) {
 		act = a->next;
@@ -993,8 +995,7 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 		return -EINVAL;
 	}
 
-	/* n->nlmsg_flags&NLM_F_CREATE
-	 * */
+	/* n->nlmsg_flags & NLM_F_CREATE */
 	switch (n->nlmsg_type) {
 	case RTM_NEWACTION:
 		/* we are going to assume all other flags
@@ -1003,7 +1004,7 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 		 * but since we want avoid ambiguity (eg when flags
 		 * is zero) then just set this
 		 */
-		if (n->nlmsg_flags&NLM_F_REPLACE)
+		if (n->nlmsg_flags & NLM_F_REPLACE)
 			ovr = 1;
 replay:
 		ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, pid, ovr);
@@ -1028,7 +1029,7 @@ replay:
 static struct nlattr *
 find_dump_kind(const struct nlmsghdr *n)
 {
-	struct nlattr *tb1, *tb2[TCA_ACT_MAX+1];
+	struct nlattr *tb1, *tb2[TCA_ACT_MAX + 1];
 	struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
 	struct nlattr *nla[TCAA_MAX + 1];
 	struct nlattr *kind;
@@ -1071,9 +1072,8 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
 	}
 
 	a_o = tc_lookup_action(kind);
-	if (a_o == NULL) {
+	if (a_o == NULL)
 		return 0;
-	}
 
 	memset(&a, 0, sizeof(struct tc_action));
 	a.ops = a_o;
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index 83ddfc0..6cdf9ab 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -63,7 +63,7 @@ static int tcf_csum_init(struct nlattr *nla, struct nlattr *est,
 	if (nla == NULL)
 		return -EINVAL;
 
-	err = nla_parse_nested(tb, TCA_CSUM_MAX, nla,csum_policy);
+	err = nla_parse_nested(tb, TCA_CSUM_MAX, nla, csum_policy);
 	if (err < 0)
 		return err;
 
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index c2ed90a..2b4ab4b 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -50,7 +50,7 @@ static int gact_determ(struct tcf_gact *gact)
 }
 
 typedef int (*g_rand)(struct tcf_gact *gact);
-static g_rand gact_rand[MAX_RAND]= { NULL, gact_net_rand, gact_determ };
+static g_rand gact_rand[MAX_RAND] = { NULL, gact_net_rand, gact_determ };
 #endif /* CONFIG_GACT_PROB */
 
 static const struct nla_policy gact_policy[TCA_GACT_MAX + 1] = {
@@ -89,7 +89,7 @@ static int tcf_gact_init(struct nlattr *nla, struct nlattr *est,
 		pc = tcf_hash_create(parm->index, est, a, sizeof(*gact),
 				     bind, &gact_idx_gen, &gact_hash_info);
 		if (IS_ERR(pc))
-		    return PTR_ERR(pc);
+			return PTR_ERR(pc);
 		ret = ACT_P_CREATED;
 	} else {
 		if (!ovr) {
@@ -205,9 +205,9 @@ MODULE_LICENSE("GPL");
 static int __init gact_init_module(void)
 {
 #ifdef CONFIG_GACT_PROB
-	printk(KERN_INFO "GACT probability on\n");
+	pr_info("GACT probability on\n");
 #else
-	printk(KERN_INFO "GACT probability NOT on\n");
+	pr_info("GACT probability NOT on\n");
 #endif
 	return tcf_register_action(&act_gact_ops);
 }
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index c2a7c20..9fc211a 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -138,7 +138,7 @@ static int tcf_ipt_init(struct nlattr *nla, struct nlattr *est,
 		pc = tcf_hash_create(index, est, a, sizeof(*ipt), bind,
 				     &ipt_idx_gen, &ipt_hash_info);
 		if (IS_ERR(pc))
-		    return PTR_ERR(pc);
+			return PTR_ERR(pc);
 		ret = ACT_P_CREATED;
 	} else {
 		if (!ovr) {
@@ -162,7 +162,8 @@ static int tcf_ipt_init(struct nlattr *nla, struct nlattr *est,
 	if (unlikely(!t))
 		goto err2;
 
-	if ((err = ipt_init_target(t, tname, hook)) < 0)
+	err = ipt_init_target(t, tname, hook);
+	if (err < 0)
 		goto err3;
 
 	spin_lock_bh(&ipt->tcf_lock);
@@ -212,8 +213,9 @@ static int tcf_ipt(struct sk_buff *skb, struct tc_action *a,
 	bstats_update(&ipt->tcf_bstats, skb);
 
 	/* yes, we have to worry about both in and out dev
-	 worry later - danger - this API seems to have changed
-	 from earlier kernels */
+	 * worry later - danger - this API seems to have changed
+	 * from earlier kernels
+	 */
 	par.in       = skb->dev;
 	par.out      = NULL;
 	par.hooknum  = ipt->tcfi_hook;
@@ -253,9 +255,9 @@ static int tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int
 	struct tc_cnt c;
 
 	/* for simple targets kernel size == user size
-	** user name = target name
-	** for foolproof you need to not assume this
-	*/
+	 * user name = target name
+	 * for foolproof you need to not assume this
+	 */
 
 	t = kmemdup(ipt->tcfi_t, ipt->tcfi_t->u.user.target_size, GFP_ATOMIC);
 	if (unlikely(!t))
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index d765067..961386e 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -41,13 +41,13 @@ static struct tcf_hashinfo mirred_hash_info = {
 	.lock	=	&mirred_lock,
 };
 
-static inline int tcf_mirred_release(struct tcf_mirred *m, int bind)
+static int tcf_mirred_release(struct tcf_mirred *m, int bind)
 {
 	if (m) {
 		if (bind)
 			m->tcf_bindcnt--;
 		m->tcf_refcnt--;
-		if(!m->tcf_bindcnt && m->tcf_refcnt <= 0) {
+		if (!m->tcf_bindcnt && m->tcf_refcnt <= 0) {
 			list_del(&m->tcfm_list);
 			if (m->tcfm_dev)
 				dev_put(m->tcfm_dev);
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 178a4bd..762b027 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -69,7 +69,7 @@ static int tcf_nat_init(struct nlattr *nla, struct nlattr *est,
 		pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind,
 				     &nat_idx_gen, &nat_hash_info);
 		if (IS_ERR(pc))
-		    return PTR_ERR(pc);
+			return PTR_ERR(pc);
 		p = to_tcf_nat(pc);
 		ret = ACT_P_CREATED;
 	} else {
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 445bef7..50c7c06 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -70,7 +70,7 @@ static int tcf_pedit_init(struct nlattr *nla, struct nlattr *est,
 		pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind,
 				     &pedit_idx_gen, &pedit_hash_info);
 		if (IS_ERR(pc))
-		    return PTR_ERR(pc);
+			return PTR_ERR(pc);
 		p = to_pedit(pc);
 		keys = kmalloc(ksize, GFP_KERNEL);
 		if (keys == NULL) {
@@ -127,11 +127,9 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a,
 	int i, munged = 0;
 	unsigned int off;
 
-	if (skb_cloned(skb)) {
-		if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) {
-			return p->tcf_action;
-		}
-	}
+	if (skb_cloned(skb) &&
+	    pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+		return p->tcf_action;
 
 	off = skb_network_offset(skb);
 
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index e2f08b1..8a16307 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -22,8 +22,8 @@
 #include <net/act_api.h>
 #include <net/netlink.h>
 
-#define L2T(p,L)   qdisc_l2t((p)->tcfp_R_tab, L)
-#define L2T_P(p,L) qdisc_l2t((p)->tcfp_P_tab, L)
+#define L2T(p, L)   qdisc_l2t((p)->tcfp_R_tab, L)
+#define L2T_P(p, L) qdisc_l2t((p)->tcfp_P_tab, L)
 
 #define POL_TAB_MASK     15
 static struct tcf_common *tcf_police_ht[POL_TAB_MASK + 1];
@@ -37,8 +37,7 @@ static struct tcf_hashinfo police_hash_info = {
 };
 
 /* old policer structure from before tc actions */
-struct tc_police_compat
-{
+struct tc_police_compat {
 	u32			index;
 	int			action;
 	u32			limit;
@@ -139,7 +138,7 @@ static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = {
 static int tcf_act_police_locate(struct nlattr *nla, struct nlattr *est,
 				 struct tc_action *a, int ovr, int bind)
 {
-	unsigned h;
+	unsigned int h;
 	int ret = 0, err;
 	struct nlattr *tb[TCA_POLICE_MAX + 1];
 	struct tc_police *parm;
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 7287cff..a34a22d 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -47,7 +47,7 @@ static int tcf_simp(struct sk_buff *skb, struct tc_action *a, struct tcf_result
 	/* print policy string followed by _ then packet count
 	 * Example if this was the 3rd packet and the string was "hello"
 	 * then it would look like "hello_3" (without quotes)
-	 **/
+	 */
 	pr_info("simple: %s_%d\n",
 	       (char *)d->tcfd_defdata, d->tcf_bstats.packets);
 	spin_unlock(&d->tcf_lock);
@@ -125,7 +125,7 @@ static int tcf_simp_init(struct nlattr *nla, struct nlattr *est,
 		pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind,
 				     &simp_idx_gen, &simp_hash_info);
 		if (IS_ERR(pc))
-		    return PTR_ERR(pc);
+			return PTR_ERR(pc);
 
 		d = to_defact(pc);
 		ret = alloc_defdata(d, defdata);
@@ -149,7 +149,7 @@ static int tcf_simp_init(struct nlattr *nla, struct nlattr *est,
 	return ret;
 }
 
-static inline int tcf_simp_cleanup(struct tc_action *a, int bind)
+static int tcf_simp_cleanup(struct tc_action *a, int bind)
 {
 	struct tcf_defact *d = a->priv;
 
@@ -158,8 +158,8 @@ static inline int tcf_simp_cleanup(struct tc_action *a, int bind)
 	return 0;
 }
 
-static inline int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a,
-				int bind, int ref)
+static int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a,
+			 int bind, int ref)
 {
 	unsigned char *b = skb_tail_pointer(skb);
 	struct tcf_defact *d = a->priv;
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index 836f5fe..5f6f0c7 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -113,7 +113,7 @@ static int tcf_skbedit_init(struct nlattr *nla, struct nlattr *est,
 		pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind,
 				     &skbedit_idx_gen, &skbedit_hash_info);
 		if (IS_ERR(pc))
-		    return PTR_ERR(pc);
+			return PTR_ERR(pc);
 
 		d = to_skbedit(pc);
 		ret = ACT_P_CREATED;
@@ -144,7 +144,7 @@ static int tcf_skbedit_init(struct nlattr *nla, struct nlattr *est,
 	return ret;
 }
 
-static inline int tcf_skbedit_cleanup(struct tc_action *a, int bind)
+static int tcf_skbedit_cleanup(struct tc_action *a, int bind)
 {
 	struct tcf_skbedit *d = a->priv;
 
@@ -153,8 +153,8 @@ static inline int tcf_skbedit_cleanup(struct tc_action *a, int bind)
 	return 0;
 }
 
-static inline int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a,
-				int bind, int ref)
+static int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a,
+			    int bind, int ref)
 {
 	unsigned char *b = skb_tail_pointer(skb);
 	struct tcf_skbedit *d = a->priv;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 5fd0c28..bb2c523 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -85,7 +85,7 @@ int unregister_tcf_proto_ops(struct tcf_proto_ops *ops)
 	int rc = -ENOENT;
 
 	write_lock(&cls_mod_lock);
-	for (tp = &tcf_proto_base; (t=*tp) != NULL; tp = &t->next)
+	for (tp = &tcf_proto_base; (t = *tp) != NULL; tp = &t->next)
 		if (t == ops)
 			break;
 
@@ -111,7 +111,7 @@ static inline u32 tcf_auto_prio(struct tcf_proto *tp)
 	u32 first = TC_H_MAKE(0xC0000000U, 0U);
 
 	if (tp)
-		first = tp->prio-1;
+		first = tp->prio - 1;
 
 	return first;
 }
@@ -149,7 +149,8 @@ replay:
 
 	if (prio == 0) {
 		/* If no priority is given, user wants we allocated it. */
-		if (n->nlmsg_type != RTM_NEWTFILTER || !(n->nlmsg_flags&NLM_F_CREATE))
+		if (n->nlmsg_type != RTM_NEWTFILTER ||
+		    !(n->nlmsg_flags & NLM_F_CREATE))
 			return -ENOENT;
 		prio = TC_H_MAKE(0x80000000U, 0U);
 	}
@@ -176,7 +177,8 @@ replay:
 	}
 
 	/* Is it classful? */
-	if ((cops = q->ops->cl_ops) == NULL)
+	cops = q->ops->cl_ops;
+	if (!cops)
 		return -EINVAL;
 
 	if (cops->tcf_chain == NULL)
@@ -196,10 +198,11 @@ replay:
 		goto errout;
 
 	/* Check the chain for existence of proto-tcf with this priority */
-	for (back = chain; (tp=*back) != NULL; back = &tp->next) {
+	for (back = chain; (tp = *back) != NULL; back = &tp->next) {
 		if (tp->prio >= prio) {
 			if (tp->prio == prio) {
-				if (!nprio || (tp->protocol != protocol && protocol))
+				if (!nprio ||
+				    (tp->protocol != protocol && protocol))
 					goto errout;
 			} else
 				tp = NULL;
@@ -216,7 +219,8 @@ replay:
 			goto errout;
 
 		err = -ENOENT;
-		if (n->nlmsg_type != RTM_NEWTFILTER || !(n->nlmsg_flags&NLM_F_CREATE))
+		if (n->nlmsg_type != RTM_NEWTFILTER ||
+		    !(n->nlmsg_flags & NLM_F_CREATE))
 			goto errout;
 
 
@@ -420,7 +424,8 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
 
 	if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
 		return skb->len;
-	if ((dev = __dev_get_by_index(net, tcm->tcm_ifindex)) == NULL)
+	dev = __dev_get_by_index(net, tcm->tcm_ifindex);
+	if (!dev)
 		return skb->len;
 
 	if (!tcm->tcm_parent)
@@ -429,7 +434,8 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
 		q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
 	if (!q)
 		goto out;
-	if ((cops = q->ops->cl_ops) == NULL)
+	cops = q->ops->cl_ops;
+	if (!cops)
 		goto errout;
 	if (cops->tcf_chain == NULL)
 		goto errout;
@@ -444,8 +450,9 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
 
 	s_t = cb->args[0];
 
-	for (tp=*chain, t=0; tp; tp = tp->next, t++) {
-		if (t < s_t) continue;
+	for (tp = *chain, t = 0; tp; tp = tp->next, t++) {
+		if (t < s_t)
+			continue;
 		if (TC_H_MAJ(tcm->tcm_info) &&
 		    TC_H_MAJ(tcm->tcm_info) != tp->prio)
 			continue;
@@ -468,10 +475,10 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
 		arg.skb = skb;
 		arg.cb = cb;
 		arg.w.stop = 0;
-		arg.w.skip = cb->args[1]-1;
+		arg.w.skip = cb->args[1] - 1;
 		arg.w.count = 0;
 		tp->ops->walk(tp, &arg.w);
-		cb->args[1] = arg.w.count+1;
+		cb->args[1] = arg.w.count + 1;
 		if (arg.w.stop)
 			break;
 	}
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index f23d915..8be8872 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -21,14 +21,12 @@
 #include <net/act_api.h>
 #include <net/pkt_cls.h>
 
-struct basic_head
-{
+struct basic_head {
 	u32			hgenerator;
 	struct list_head	flist;
 };
 
-struct basic_filter
-{
+struct basic_filter {
 	u32			handle;
 	struct tcf_exts		exts;
 	struct tcf_ematch_tree	ematches;
@@ -92,8 +90,7 @@ static int basic_init(struct tcf_proto *tp)
 	return 0;
 }
 
-static inline void basic_delete_filter(struct tcf_proto *tp,
-				       struct basic_filter *f)
+static void basic_delete_filter(struct tcf_proto *tp, struct basic_filter *f)
 {
 	tcf_unbind_filter(tp, &f->res);
 	tcf_exts_destroy(tp, &f->exts);
@@ -135,9 +132,9 @@ static const struct nla_policy basic_policy[TCA_BASIC_MAX + 1] = {
 	[TCA_BASIC_EMATCHES]	= { .type = NLA_NESTED },
 };
 
-static inline int basic_set_parms(struct tcf_proto *tp, struct basic_filter *f,
-				  unsigned long base, struct nlattr **tb,
-				  struct nlattr *est)
+static int basic_set_parms(struct tcf_proto *tp, struct basic_filter *f,
+			   unsigned long base, struct nlattr **tb,
+			   struct nlattr *est)
 {
 	int err = -EINVAL;
 	struct tcf_exts e;
@@ -203,7 +200,7 @@ static int basic_change(struct tcf_proto *tp, unsigned long base, u32 handle,
 		} while (--i > 0 && basic_get(tp, head->hgenerator));
 
 		if (i <= 0) {
-			printk(KERN_ERR "Insufficient number of handles\n");
+			pr_err("Insufficient number of handles\n");
 			goto errout;
 		}
 
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index d49c40f..32a3351 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -56,7 +56,8 @@ static struct cgroup_subsys_state *cgrp_create(struct cgroup_subsys *ss,
 {
 	struct cgroup_cls_state *cs;
 
-	if (!(cs = kzalloc(sizeof(*cs), GFP_KERNEL)))
+	cs = kzalloc(sizeof(*cs), GFP_KERNEL);
+	if (!cs)
 		return ERR_PTR(-ENOMEM);
 
 	if (cgrp->parent)
@@ -94,8 +95,7 @@ static int cgrp_populate(struct cgroup_subsys *ss, struct cgroup *cgrp)
 	return cgroup_add_files(cgrp, ss, ss_files, ARRAY_SIZE(ss_files));
 }
 
-struct cls_cgroup_head
-{
+struct cls_cgroup_head {
 	u32			handle;
 	struct tcf_exts		exts;
 	struct tcf_ematch_tree	ematches;
@@ -166,7 +166,7 @@ static int cls_cgroup_change(struct tcf_proto *tp, unsigned long base,
 			     u32 handle, struct nlattr **tca,
 			     unsigned long *arg)
 {
-	struct nlattr *tb[TCA_CGROUP_MAX+1];
+	struct nlattr *tb[TCA_CGROUP_MAX + 1];
 	struct cls_cgroup_head *head = tp->root;
 	struct tcf_ematch_tree t;
 	struct tcf_exts e;
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 5b271a1..8ec0139 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -121,7 +121,7 @@ static u32 flow_get_proto_src(struct sk_buff *skb)
 		if (!pskb_network_may_pull(skb, sizeof(*iph)))
 			break;
 		iph = ip_hdr(skb);
-		if (iph->frag_off & htons(IP_MF|IP_OFFSET))
+		if (iph->frag_off & htons(IP_MF | IP_OFFSET))
 			break;
 		poff = proto_ports_offset(iph->protocol);
 		if (poff >= 0 &&
@@ -163,7 +163,7 @@ static u32 flow_get_proto_dst(struct sk_buff *skb)
 		if (!pskb_network_may_pull(skb, sizeof(*iph)))
 			break;
 		iph = ip_hdr(skb);
-		if (iph->frag_off & htons(IP_MF|IP_OFFSET))
+		if (iph->frag_off & htons(IP_MF | IP_OFFSET))
 			break;
 		poff = proto_ports_offset(iph->protocol);
 		if (poff >= 0 &&
@@ -276,7 +276,7 @@ fallback:
 
 static u32 flow_get_rtclassid(const struct sk_buff *skb)
 {
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
 	if (skb_dst(skb))
 		return skb_dst(skb)->tclassid;
 #endif
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index 93b0a7b..26e7bc4 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -31,14 +31,12 @@
 
 #define HTSIZE (PAGE_SIZE/sizeof(struct fw_filter *))
 
-struct fw_head
-{
+struct fw_head {
 	struct fw_filter *ht[HTSIZE];
 	u32 mask;
 };
 
-struct fw_filter
-{
+struct fw_filter {
 	struct fw_filter	*next;
 	u32			id;
 	struct tcf_result	res;
@@ -53,7 +51,7 @@ static const struct tcf_ext_map fw_ext_map = {
 	.police = TCA_FW_POLICE
 };
 
-static __inline__ int fw_hash(u32 handle)
+static inline int fw_hash(u32 handle)
 {
 	if (HTSIZE == 4096)
 		return ((handle >> 24) & 0xFFF) ^
@@ -82,14 +80,14 @@ static __inline__ int fw_hash(u32 handle)
 static int fw_classify(struct sk_buff *skb, struct tcf_proto *tp,
 			  struct tcf_result *res)
 {
-	struct fw_head *head = (struct fw_head*)tp->root;
+	struct fw_head *head = (struct fw_head *)tp->root;
 	struct fw_filter *f;
 	int r;
 	u32 id = skb->mark;
 
 	if (head != NULL) {
 		id &= head->mask;
-		for (f=head->ht[fw_hash(id)]; f; f=f->next) {
+		for (f = head->ht[fw_hash(id)]; f; f = f->next) {
 			if (f->id == id) {
 				*res = f->res;
 #ifdef CONFIG_NET_CLS_IND
@@ -105,7 +103,8 @@ static int fw_classify(struct sk_buff *skb, struct tcf_proto *tp,
 		}
 	} else {
 		/* old method */
-		if (id && (TC_H_MAJ(id) == 0 || !(TC_H_MAJ(id^tp->q->handle)))) {
+		if (id && (TC_H_MAJ(id) == 0 ||
+			   !(TC_H_MAJ(id ^ tp->q->handle)))) {
 			res->classid = id;
 			res->class = 0;
 			return 0;
@@ -117,13 +116,13 @@ static int fw_classify(struct sk_buff *skb, struct tcf_proto *tp,
 
 static unsigned long fw_get(struct tcf_proto *tp, u32 handle)
 {
-	struct fw_head *head = (struct fw_head*)tp->root;
+	struct fw_head *head = (struct fw_head *)tp->root;
 	struct fw_filter *f;
 
 	if (head == NULL)
 		return 0;
 
-	for (f=head->ht[fw_hash(handle)]; f; f=f->next) {
+	for (f = head->ht[fw_hash(handle)]; f; f = f->next) {
 		if (f->id == handle)
 			return (unsigned long)f;
 	}
@@ -139,8 +138,7 @@ static int fw_init(struct tcf_proto *tp)
 	return 0;
 }
 
-static inline void
-fw_delete_filter(struct tcf_proto *tp, struct fw_filter *f)
+static void fw_delete_filter(struct tcf_proto *tp, struct fw_filter *f)
 {
 	tcf_unbind_filter(tp, &f->res);
 	tcf_exts_destroy(tp, &f->exts);
@@ -156,8 +154,8 @@ static void fw_destroy(struct tcf_proto *tp)
 	if (head == NULL)
 		return;
 
-	for (h=0; h<HTSIZE; h++) {
-		while ((f=head->ht[h]) != NULL) {
+	for (h = 0; h < HTSIZE; h++) {
+		while ((f = head->ht[h]) != NULL) {
 			head->ht[h] = f->next;
 			fw_delete_filter(tp, f);
 		}
@@ -167,14 +165,14 @@ static void fw_destroy(struct tcf_proto *tp)
 
 static int fw_delete(struct tcf_proto *tp, unsigned long arg)
 {
-	struct fw_head *head = (struct fw_head*)tp->root;
-	struct fw_filter *f = (struct fw_filter*)arg;
+	struct fw_head *head = (struct fw_head *)tp->root;
+	struct fw_filter *f = (struct fw_filter *)arg;
 	struct fw_filter **fp;
 
 	if (head == NULL || f == NULL)
 		goto out;
 
-	for (fp=&head->ht[fw_hash(f->id)]; *fp; fp = &(*fp)->next) {
+	for (fp = &head->ht[fw_hash(f->id)]; *fp; fp = &(*fp)->next) {
 		if (*fp == f) {
 			tcf_tree_lock(tp);
 			*fp = f->next;
@@ -240,7 +238,7 @@ static int fw_change(struct tcf_proto *tp, unsigned long base,
 		     struct nlattr **tca,
 		     unsigned long *arg)
 {
-	struct fw_head *head = (struct fw_head*)tp->root;
+	struct fw_head *head = (struct fw_head *)tp->root;
 	struct fw_filter *f = (struct fw_filter *) *arg;
 	struct nlattr *opt = tca[TCA_OPTIONS];
 	struct nlattr *tb[TCA_FW_MAX + 1];
@@ -302,7 +300,7 @@ errout:
 
 static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 {
-	struct fw_head *head = (struct fw_head*)tp->root;
+	struct fw_head *head = (struct fw_head *)tp->root;
 	int h;
 
 	if (head == NULL)
@@ -332,7 +330,7 @@ static int fw_dump(struct tcf_proto *tp, unsigned long fh,
 		   struct sk_buff *skb, struct tcmsg *t)
 {
 	struct fw_head *head = (struct fw_head *)tp->root;
-	struct fw_filter *f = (struct fw_filter*)fh;
+	struct fw_filter *f = (struct fw_filter *)fh;
 	unsigned char *b = skb_tail_pointer(skb);
 	struct nlattr *nest;
 
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index 694dcd8..a9079053 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -23,34 +23,30 @@
 #include <net/pkt_cls.h>
 
 /*
-   1. For now we assume that route tags < 256.
-      It allows to use direct table lookups, instead of hash tables.
-   2. For now we assume that "from TAG" and "fromdev DEV" statements
-      are mutually  exclusive.
-   3. "to TAG from ANY" has higher priority, than "to ANY from XXX"
+ * 1. For now we assume that route tags < 256.
+ *    It allows to use direct table lookups, instead of hash tables.
+ * 2. For now we assume that "from TAG" and "fromdev DEV" statements
+ *    are mutually  exclusive.
+ * 3. "to TAG from ANY" has higher priority, than "to ANY from XXX"
  */
 
-struct route4_fastmap
-{
+struct route4_fastmap {
 	struct route4_filter	*filter;
 	u32			id;
 	int			iif;
 };
 
-struct route4_head
-{
+struct route4_head {
 	struct route4_fastmap	fastmap[16];
-	struct route4_bucket	*table[256+1];
+	struct route4_bucket	*table[256 + 1];
 };
 
-struct route4_bucket
-{
+struct route4_bucket {
 	/* 16 FROM buckets + 16 IIF buckets + 1 wildcard bucket */
-	struct route4_filter	*ht[16+16+1];
+	struct route4_filter	*ht[16 + 16 + 1];
 };
 
-struct route4_filter
-{
+struct route4_filter {
 	struct route4_filter	*next;
 	u32			id;
 	int			iif;
@@ -61,20 +57,20 @@ struct route4_filter
 	struct route4_bucket	*bkt;
 };
 
-#define ROUTE4_FAILURE ((struct route4_filter*)(-1L))
+#define ROUTE4_FAILURE ((struct route4_filter *)(-1L))
 
 static const struct tcf_ext_map route_ext_map = {
 	.police = TCA_ROUTE4_POLICE,
 	.action = TCA_ROUTE4_ACT
 };
 
-static __inline__ int route4_fastmap_hash(u32 id, int iif)
+static inline int route4_fastmap_hash(u32 id, int iif)
 {
-	return id&0xF;
+	return id & 0xF;
 }
 
-static inline
-void route4_reset_fastmap(struct Qdisc *q, struct route4_head *head, u32 id)
+static void
+route4_reset_fastmap(struct Qdisc *q, struct route4_head *head, u32 id)
 {
 	spinlock_t *root_lock = qdisc_root_sleeping_lock(q);
 
@@ -83,32 +79,33 @@ void route4_reset_fastmap(struct Qdisc *q, struct route4_head *head, u32 id)
 	spin_unlock_bh(root_lock);
 }
 
-static inline void
+static void
 route4_set_fastmap(struct route4_head *head, u32 id, int iif,
 		   struct route4_filter *f)
 {
 	int h = route4_fastmap_hash(id, iif);
+
 	head->fastmap[h].id = id;
 	head->fastmap[h].iif = iif;
 	head->fastmap[h].filter = f;
 }
 
-static __inline__ int route4_hash_to(u32 id)
+static inline int route4_hash_to(u32 id)
 {
-	return id&0xFF;
+	return id & 0xFF;
 }
 
-static __inline__ int route4_hash_from(u32 id)
+static inline int route4_hash_from(u32 id)
 {
-	return (id>>16)&0xF;
+	return (id >> 16) & 0xF;
 }
 
-static __inline__ int route4_hash_iif(int iif)
+static inline int route4_hash_iif(int iif)
 {
-	return 16 + ((iif>>16)&0xF);
+	return 16 + ((iif >> 16) & 0xF);
 }
 
-static __inline__ int route4_hash_wild(void)
+static inline int route4_hash_wild(void)
 {
 	return 32;
 }
@@ -131,21 +128,22 @@ static __inline__ int route4_hash_wild(void)
 static int route4_classify(struct sk_buff *skb, struct tcf_proto *tp,
 			   struct tcf_result *res)
 {
-	struct route4_head *head = (struct route4_head*)tp->root;
+	struct route4_head *head = (struct route4_head *)tp->root;
 	struct dst_entry *dst;
 	struct route4_bucket *b;
 	struct route4_filter *f;
 	u32 id, h;
 	int iif, dont_cache = 0;
 
-	if ((dst = skb_dst(skb)) == NULL)
+	dst = skb_dst(skb);
+	if (!dst)
 		goto failure;
 
 	id = dst->tclassid;
 	if (head == NULL)
 		goto old_method;
 
-	iif = ((struct rtable*)dst)->fl.iif;
+	iif = ((struct rtable *)dst)->rt_iif;
 
 	h = route4_fastmap_hash(id, iif);
 	if (id == head->fastmap[h].id &&
@@ -161,7 +159,8 @@ static int route4_classify(struct sk_buff *skb, struct tcf_proto *tp,
 	h = route4_hash_to(id);
 
 restart:
-	if ((b = head->table[h]) != NULL) {
+	b = head->table[h];
+	if (b) {
 		for (f = b->ht[route4_hash_from(id)]; f; f = f->next)
 			if (f->id == id)
 				ROUTE4_APPLY_RESULT();
@@ -197,8 +196,9 @@ old_method:
 
 static inline u32 to_hash(u32 id)
 {
-	u32 h = id&0xFF;
-	if (id&0x8000)
+	u32 h = id & 0xFF;
+
+	if (id & 0x8000)
 		h += 256;
 	return h;
 }
@@ -211,17 +211,17 @@ static inline u32 from_hash(u32 id)
 	if (!(id & 0x8000)) {
 		if (id > 255)
 			return 256;
-		return id&0xF;
+		return id & 0xF;
 	}
-	return 16 + (id&0xF);
+	return 16 + (id & 0xF);
 }
 
 static unsigned long route4_get(struct tcf_proto *tp, u32 handle)
 {
-	struct route4_head *head = (struct route4_head*)tp->root;
+	struct route4_head *head = (struct route4_head *)tp->root;
 	struct route4_bucket *b;
 	struct route4_filter *f;
-	unsigned h1, h2;
+	unsigned int h1, h2;
 
 	if (!head)
 		return 0;
@@ -230,11 +230,12 @@ static unsigned long route4_get(struct tcf_proto *tp, u32 handle)
 	if (h1 > 256)
 		return 0;
 
-	h2 = from_hash(handle>>16);
+	h2 = from_hash(handle >> 16);
 	if (h2 > 32)
 		return 0;
 
-	if ((b = head->table[h1]) != NULL) {
+	b = head->table[h1];
+	if (b) {
 		for (f = b->ht[h2]; f; f = f->next)
 			if (f->handle == handle)
 				return (unsigned long)f;
@@ -251,7 +252,7 @@ static int route4_init(struct tcf_proto *tp)
 	return 0;
 }
 
-static inline void
+static void
 route4_delete_filter(struct tcf_proto *tp, struct route4_filter *f)
 {
 	tcf_unbind_filter(tp, &f->res);
@@ -267,11 +268,12 @@ static void route4_destroy(struct tcf_proto *tp)
 	if (head == NULL)
 		return;
 
-	for (h1=0; h1<=256; h1++) {
+	for (h1 = 0; h1 <= 256; h1++) {
 		struct route4_bucket *b;
 
-		if ((b = head->table[h1]) != NULL) {
-			for (h2=0; h2<=32; h2++) {
+		b = head->table[h1];
+		if (b) {
+			for (h2 = 0; h2 <= 32; h2++) {
 				struct route4_filter *f;
 
 				while ((f = b->ht[h2]) != NULL) {
@@ -287,9 +289,9 @@ static void route4_destroy(struct tcf_proto *tp)
 
 static int route4_delete(struct tcf_proto *tp, unsigned long arg)
 {
-	struct route4_head *head = (struct route4_head*)tp->root;
-	struct route4_filter **fp, *f = (struct route4_filter*)arg;
-	unsigned h = 0;
+	struct route4_head *head = (struct route4_head *)tp->root;
+	struct route4_filter **fp, *f = (struct route4_filter *)arg;
+	unsigned int h = 0;
 	struct route4_bucket *b;
 	int i;
 
@@ -299,7 +301,7 @@ static int route4_delete(struct tcf_proto *tp, unsigned long arg)
 	h = f->handle;
 	b = f->bkt;
 
-	for (fp = &b->ht[from_hash(h>>16)]; *fp; fp = &(*fp)->next) {
+	for (fp = &b->ht[from_hash(h >> 16)]; *fp; fp = &(*fp)->next) {
 		if (*fp == f) {
 			tcf_tree_lock(tp);
 			*fp = f->next;
@@ -310,7 +312,7 @@ static int route4_delete(struct tcf_proto *tp, unsigned long arg)
 
 			/* Strip tree */
 
-			for (i=0; i<=32; i++)
+			for (i = 0; i <= 32; i++)
 				if (b->ht[i])
 					return 0;
 
@@ -380,7 +382,8 @@ static int route4_set_parms(struct tcf_proto *tp, unsigned long base,
 	}
 
 	h1 = to_hash(nhandle);
-	if ((b = head->table[h1]) == NULL) {
+	b = head->table[h1];
+	if (!b) {
 		err = -ENOBUFS;
 		b = kzalloc(sizeof(struct route4_bucket), GFP_KERNEL);
 		if (b == NULL)
@@ -391,6 +394,7 @@ static int route4_set_parms(struct tcf_proto *tp, unsigned long base,
 		tcf_tree_unlock(tp);
 	} else {
 		unsigned int h2 = from_hash(nhandle >> 16);
+
 		err = -EEXIST;
 		for (fp = b->ht[h2]; fp; fp = fp->next)
 			if (fp->handle == f->handle)
@@ -444,7 +448,8 @@ static int route4_change(struct tcf_proto *tp, unsigned long base,
 	if (err < 0)
 		return err;
 
-	if ((f = (struct route4_filter*)*arg) != NULL) {
+	f = (struct route4_filter *)*arg;
+	if (f) {
 		if (f->handle != handle && handle)
 			return -EINVAL;
 
@@ -481,7 +486,7 @@ static int route4_change(struct tcf_proto *tp, unsigned long base,
 
 reinsert:
 	h = from_hash(f->handle >> 16);
-	for (fp = &f->bkt->ht[h]; (f1=*fp) != NULL; fp = &f1->next)
+	for (fp = &f->bkt->ht[h]; (f1 = *fp) != NULL; fp = &f1->next)
 		if (f->handle < f1->handle)
 			break;
 
@@ -492,7 +497,8 @@ reinsert:
 	if (old_handle && f->handle != old_handle) {
 		th = to_hash(old_handle);
 		h = from_hash(old_handle >> 16);
-		if ((b = head->table[th]) != NULL) {
+		b = head->table[th];
+		if (b) {
 			for (fp = &b->ht[h]; *fp; fp = &(*fp)->next) {
 				if (*fp == f) {
 					*fp = f->next;
@@ -515,7 +521,7 @@ errout:
 static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 {
 	struct route4_head *head = tp->root;
-	unsigned h, h1;
+	unsigned int h, h1;
 
 	if (head == NULL)
 		arg->stop = 1;
@@ -549,7 +555,7 @@ static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 static int route4_dump(struct tcf_proto *tp, unsigned long fh,
 		       struct sk_buff *skb, struct tcmsg *t)
 {
-	struct route4_filter *f = (struct route4_filter*)fh;
+	struct route4_filter *f = (struct route4_filter *)fh;
 	unsigned char *b = skb_tail_pointer(skb);
 	struct nlattr *nest;
 	u32 id;
@@ -563,15 +569,15 @@ static int route4_dump(struct tcf_proto *tp, unsigned long fh,
 	if (nest == NULL)
 		goto nla_put_failure;
 
-	if (!(f->handle&0x8000)) {
-		id = f->id&0xFF;
+	if (!(f->handle & 0x8000)) {
+		id = f->id & 0xFF;
 		NLA_PUT_U32(skb, TCA_ROUTE4_TO, id);
 	}
-	if (f->handle&0x80000000) {
-		if ((f->handle>>16) != 0xFFFF)
+	if (f->handle & 0x80000000) {
+		if ((f->handle >> 16) != 0xFFFF)
 			NLA_PUT_U32(skb, TCA_ROUTE4_IIF, f->iif);
 	} else {
-		id = f->id>>16;
+		id = f->id >> 16;
 		NLA_PUT_U32(skb, TCA_ROUTE4_FROM, id);
 	}
 	if (f->res.classid)
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index 425a179..402c44b 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -66,28 +66,25 @@
    powerful classification engine.  */
 
 
-struct rsvp_head
-{
+struct rsvp_head {
 	u32			tmap[256/32];
 	u32			hgenerator;
 	u8			tgenerator;
 	struct rsvp_session	*ht[256];
 };
 
-struct rsvp_session
-{
+struct rsvp_session {
 	struct rsvp_session	*next;
 	__be32			dst[RSVP_DST_LEN];
 	struct tc_rsvp_gpi 	dpi;
 	u8			protocol;
 	u8			tunnelid;
 	/* 16 (src,sport) hash slots, and one wildcard source slot */
-	struct rsvp_filter	*ht[16+1];
+	struct rsvp_filter	*ht[16 + 1];
 };
 
 
-struct rsvp_filter
-{
+struct rsvp_filter {
 	struct rsvp_filter	*next;
 	__be32			src[RSVP_DST_LEN];
 	struct tc_rsvp_gpi	spi;
@@ -100,17 +97,19 @@ struct rsvp_filter
 	struct rsvp_session	*sess;
 };
 
-static __inline__ unsigned hash_dst(__be32 *dst, u8 protocol, u8 tunnelid)
+static inline unsigned int hash_dst(__be32 *dst, u8 protocol, u8 tunnelid)
 {
-	unsigned h = (__force __u32)dst[RSVP_DST_LEN-1];
+	unsigned int h = (__force __u32)dst[RSVP_DST_LEN - 1];
+
 	h ^= h>>16;
 	h ^= h>>8;
 	return (h ^ protocol ^ tunnelid) & 0xFF;
 }
 
-static __inline__ unsigned hash_src(__be32 *src)
+static inline unsigned int hash_src(__be32 *src)
 {
-	unsigned h = (__force __u32)src[RSVP_DST_LEN-1];
+	unsigned int h = (__force __u32)src[RSVP_DST_LEN-1];
+
 	h ^= h>>16;
 	h ^= h>>8;
 	h ^= h>>4;
@@ -134,10 +133,10 @@ static struct tcf_ext_map rsvp_ext_map = {
 static int rsvp_classify(struct sk_buff *skb, struct tcf_proto *tp,
 			 struct tcf_result *res)
 {
-	struct rsvp_session **sht = ((struct rsvp_head*)tp->root)->ht;
+	struct rsvp_session **sht = ((struct rsvp_head *)tp->root)->ht;
 	struct rsvp_session *s;
 	struct rsvp_filter *f;
-	unsigned h1, h2;
+	unsigned int h1, h2;
 	__be32 *dst, *src;
 	u8 protocol;
 	u8 tunnelid = 0;
@@ -162,13 +161,13 @@ restart:
 	src = &nhptr->saddr.s6_addr32[0];
 	dst = &nhptr->daddr.s6_addr32[0];
 	protocol = nhptr->nexthdr;
-	xprt = ((u8*)nhptr) + sizeof(struct ipv6hdr);
+	xprt = ((u8 *)nhptr) + sizeof(struct ipv6hdr);
 #else
 	src = &nhptr->saddr;
 	dst = &nhptr->daddr;
 	protocol = nhptr->protocol;
-	xprt = ((u8*)nhptr) + (nhptr->ihl<<2);
-	if (nhptr->frag_off & htons(IP_MF|IP_OFFSET))
+	xprt = ((u8 *)nhptr) + (nhptr->ihl<<2);
+	if (nhptr->frag_off & htons(IP_MF | IP_OFFSET))
 		return -1;
 #endif
 
@@ -176,10 +175,10 @@ restart:
 	h2 = hash_src(src);
 
 	for (s = sht[h1]; s; s = s->next) {
-		if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] &&
+		if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN - 1] &&
 		    protocol == s->protocol &&
 		    !(s->dpi.mask &
-		      (*(u32*)(xprt+s->dpi.offset)^s->dpi.key)) &&
+		      (*(u32 *)(xprt + s->dpi.offset) ^ s->dpi.key)) &&
 #if RSVP_DST_LEN == 4
 		    dst[0] == s->dst[0] &&
 		    dst[1] == s->dst[1] &&
@@ -188,8 +187,8 @@ restart:
 		    tunnelid == s->tunnelid) {
 
 			for (f = s->ht[h2]; f; f = f->next) {
-				if (src[RSVP_DST_LEN-1] == f->src[RSVP_DST_LEN-1] &&
-				    !(f->spi.mask & (*(u32*)(xprt+f->spi.offset)^f->spi.key))
+				if (src[RSVP_DST_LEN-1] == f->src[RSVP_DST_LEN - 1] &&
+				    !(f->spi.mask & (*(u32 *)(xprt + f->spi.offset) ^ f->spi.key))
 #if RSVP_DST_LEN == 4
 				    &&
 				    src[0] == f->src[0] &&
@@ -205,7 +204,7 @@ matched:
 						return 0;
 
 					tunnelid = f->res.classid;
-					nhptr = (void*)(xprt + f->tunnelhdr - sizeof(*nhptr));
+					nhptr = (void *)(xprt + f->tunnelhdr - sizeof(*nhptr));
 					goto restart;
 				}
 			}
@@ -224,11 +223,11 @@ matched:
 
 static unsigned long rsvp_get(struct tcf_proto *tp, u32 handle)
 {
-	struct rsvp_session **sht = ((struct rsvp_head*)tp->root)->ht;
+	struct rsvp_session **sht = ((struct rsvp_head *)tp->root)->ht;
 	struct rsvp_session *s;
 	struct rsvp_filter *f;
-	unsigned h1 = handle&0xFF;
-	unsigned h2 = (handle>>8)&0xFF;
+	unsigned int h1 = handle & 0xFF;
+	unsigned int h2 = (handle >> 8) & 0xFF;
 
 	if (h2 > 16)
 		return 0;
@@ -258,7 +257,7 @@ static int rsvp_init(struct tcf_proto *tp)
 	return -ENOBUFS;
 }
 
-static inline void
+static void
 rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
 {
 	tcf_unbind_filter(tp, &f->res);
@@ -277,13 +276,13 @@ static void rsvp_destroy(struct tcf_proto *tp)
 
 	sht = data->ht;
 
-	for (h1=0; h1<256; h1++) {
+	for (h1 = 0; h1 < 256; h1++) {
 		struct rsvp_session *s;
 
 		while ((s = sht[h1]) != NULL) {
 			sht[h1] = s->next;
 
-			for (h2=0; h2<=16; h2++) {
+			for (h2 = 0; h2 <= 16; h2++) {
 				struct rsvp_filter *f;
 
 				while ((f = s->ht[h2]) != NULL) {
@@ -299,13 +298,13 @@ static void rsvp_destroy(struct tcf_proto *tp)
 
 static int rsvp_delete(struct tcf_proto *tp, unsigned long arg)
 {
-	struct rsvp_filter **fp, *f = (struct rsvp_filter*)arg;
-	unsigned h = f->handle;
+	struct rsvp_filter **fp, *f = (struct rsvp_filter *)arg;
+	unsigned int h = f->handle;
 	struct rsvp_session **sp;
 	struct rsvp_session *s = f->sess;
 	int i;
 
-	for (fp = &s->ht[(h>>8)&0xFF]; *fp; fp = &(*fp)->next) {
+	for (fp = &s->ht[(h >> 8) & 0xFF]; *fp; fp = &(*fp)->next) {
 		if (*fp == f) {
 			tcf_tree_lock(tp);
 			*fp = f->next;
@@ -314,12 +313,12 @@ static int rsvp_delete(struct tcf_proto *tp, unsigned long arg)
 
 			/* Strip tree */
 
-			for (i=0; i<=16; i++)
+			for (i = 0; i <= 16; i++)
 				if (s->ht[i])
 					return 0;
 
 			/* OK, session has no flows */
-			for (sp = &((struct rsvp_head*)tp->root)->ht[h&0xFF];
+			for (sp = &((struct rsvp_head *)tp->root)->ht[h & 0xFF];
 			     *sp; sp = &(*sp)->next) {
 				if (*sp == s) {
 					tcf_tree_lock(tp);
@@ -337,13 +336,14 @@ static int rsvp_delete(struct tcf_proto *tp, unsigned long arg)
 	return 0;
 }
 
-static unsigned gen_handle(struct tcf_proto *tp, unsigned salt)
+static unsigned int gen_handle(struct tcf_proto *tp, unsigned salt)
 {
 	struct rsvp_head *data = tp->root;
 	int i = 0xFFFF;
 
 	while (i-- > 0) {
 		u32 h;
+
 		if ((data->hgenerator += 0x10000) == 0)
 			data->hgenerator = 0x10000;
 		h = data->hgenerator|salt;
@@ -355,10 +355,10 @@ static unsigned gen_handle(struct tcf_proto *tp, unsigned salt)
 
 static int tunnel_bts(struct rsvp_head *data)
 {
-	int n = data->tgenerator>>5;
-	u32 b = 1<<(data->tgenerator&0x1F);
+	int n = data->tgenerator >> 5;
+	u32 b = 1 << (data->tgenerator & 0x1F);
 
-	if (data->tmap[n]&b)
+	if (data->tmap[n] & b)
 		return 0;
 	data->tmap[n] |= b;
 	return 1;
@@ -372,10 +372,10 @@ static void tunnel_recycle(struct rsvp_head *data)
 
 	memset(tmap, 0, sizeof(tmap));
 
-	for (h1=0; h1<256; h1++) {
+	for (h1 = 0; h1 < 256; h1++) {
 		struct rsvp_session *s;
 		for (s = sht[h1]; s; s = s->next) {
-			for (h2=0; h2<=16; h2++) {
+			for (h2 = 0; h2 <= 16; h2++) {
 				struct rsvp_filter *f;
 
 				for (f = s->ht[h2]; f; f = f->next) {
@@ -395,8 +395,8 @@ static u32 gen_tunnel(struct rsvp_head *data)
 {
 	int i, k;
 
-	for (k=0; k<2; k++) {
-		for (i=255; i>0; i--) {
+	for (k = 0; k < 2; k++) {
+		for (i = 255; i > 0; i--) {
 			if (++data->tgenerator == 0)
 				data->tgenerator = 1;
 			if (tunnel_bts(data))
@@ -428,7 +428,7 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base,
 	struct nlattr *opt = tca[TCA_OPTIONS-1];
 	struct nlattr *tb[TCA_RSVP_MAX + 1];
 	struct tcf_exts e;
-	unsigned h1, h2;
+	unsigned int h1, h2;
 	__be32 *dst;
 	int err;
 
@@ -443,7 +443,8 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base,
 	if (err < 0)
 		return err;
 
-	if ((f = (struct rsvp_filter*)*arg) != NULL) {
+	f = (struct rsvp_filter *)*arg;
+	if (f) {
 		/* Node exists: adjust only classid */
 
 		if (f->handle != handle && handle)
@@ -500,7 +501,7 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base,
 			goto errout;
 	}
 
-	for (sp = &data->ht[h1]; (s=*sp) != NULL; sp = &s->next) {
+	for (sp = &data->ht[h1]; (s = *sp) != NULL; sp = &s->next) {
 		if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] &&
 		    pinfo && pinfo->protocol == s->protocol &&
 		    memcmp(&pinfo->dpi, &s->dpi, sizeof(s->dpi)) == 0 &&
@@ -523,7 +524,7 @@ insert:
 			tcf_exts_change(tp, &f->exts, &e);
 
 			for (fp = &s->ht[h2]; *fp; fp = &(*fp)->next)
-				if (((*fp)->spi.mask&f->spi.mask) != f->spi.mask)
+				if (((*fp)->spi.mask & f->spi.mask) != f->spi.mask)
 					break;
 			f->next = *fp;
 			wmb();
@@ -567,7 +568,7 @@ errout2:
 static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 {
 	struct rsvp_head *head = tp->root;
-	unsigned h, h1;
+	unsigned int h, h1;
 
 	if (arg->stop)
 		return;
@@ -598,7 +599,7 @@ static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 static int rsvp_dump(struct tcf_proto *tp, unsigned long fh,
 		     struct sk_buff *skb, struct tcmsg *t)
 {
-	struct rsvp_filter *f = (struct rsvp_filter*)fh;
+	struct rsvp_filter *f = (struct rsvp_filter *)fh;
 	struct rsvp_session *s;
 	unsigned char *b = skb_tail_pointer(skb);
 	struct nlattr *nest;
@@ -624,7 +625,7 @@ static int rsvp_dump(struct tcf_proto *tp, unsigned long fh,
 	NLA_PUT(skb, TCA_RSVP_PINFO, sizeof(pinfo), &pinfo);
 	if (f->res.classid)
 		NLA_PUT_U32(skb, TCA_RSVP_CLASSID, f->res.classid);
-	if (((f->handle>>8)&0xFF) != 16)
+	if (((f->handle >> 8) & 0xFF) != 16)
 		NLA_PUT(skb, TCA_RSVP_SRC, sizeof(f->src), f->src);
 
 	if (tcf_exts_dump(skb, &f->exts, &rsvp_ext_map) < 0)
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index 20ef330..36667fa 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -249,7 +249,7 @@ tcindex_set_parms(struct tcf_proto *tp, unsigned long base, u32 handle,
 		 * of the hashing index is below the threshold.
 		 */
 		if ((cp.mask >> cp.shift) < PERFECT_HASH_THRESHOLD)
-			cp.hash = (cp.mask >> cp.shift)+1;
+			cp.hash = (cp.mask >> cp.shift) + 1;
 		else
 			cp.hash = DEFAULT_HASH_SIZE;
 	}
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index b0c2a82..3b93fc0c 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -42,8 +42,7 @@
 #include <net/act_api.h>
 #include <net/pkt_cls.h>
 
-struct tc_u_knode
-{
+struct tc_u_knode {
 	struct tc_u_knode	*next;
 	u32			handle;
 	struct tc_u_hnode	*ht_up;
@@ -63,19 +62,17 @@ struct tc_u_knode
 	struct tc_u32_sel	sel;
 };
 
-struct tc_u_hnode
-{
+struct tc_u_hnode {
 	struct tc_u_hnode	*next;
 	u32			handle;
 	u32			prio;
 	struct tc_u_common	*tp_c;
 	int			refcnt;
-	unsigned		divisor;
+	unsigned int		divisor;
 	struct tc_u_knode	*ht[1];
 };
 
-struct tc_u_common
-{
+struct tc_u_common {
 	struct tc_u_hnode	*hlist;
 	struct Qdisc		*q;
 	int			refcnt;
@@ -87,9 +84,11 @@ static const struct tcf_ext_map u32_ext_map = {
 	.police = TCA_U32_POLICE
 };
 
-static __inline__ unsigned u32_hash_fold(__be32 key, struct tc_u32_sel *sel, u8 fshift)
+static inline unsigned int u32_hash_fold(__be32 key,
+					 const struct tc_u32_sel *sel,
+					 u8 fshift)
 {
-	unsigned h = ntohl(key & sel->hmask)>>fshift;
+	unsigned int h = ntohl(key & sel->hmask) >> fshift;
 
 	return h;
 }
@@ -101,7 +100,7 @@ static int u32_classify(struct sk_buff *skb, struct tcf_proto *tp, struct tcf_re
 		unsigned int	  off;
 	} stack[TC_U32_MAXDEPTH];
 
-	struct tc_u_hnode *ht = (struct tc_u_hnode*)tp->root;
+	struct tc_u_hnode *ht = (struct tc_u_hnode *)tp->root;
 	unsigned int off = skb_network_offset(skb);
 	struct tc_u_knode *n;
 	int sdepth = 0;
@@ -120,7 +119,7 @@ next_knode:
 		struct tc_u32_key *key = n->sel.keys;
 
 #ifdef CONFIG_CLS_U32_PERF
-		n->pf->rcnt +=1;
+		n->pf->rcnt += 1;
 		j = 0;
 #endif
 
@@ -133,14 +132,14 @@ next_knode:
 		}
 #endif
 
-		for (i = n->sel.nkeys; i>0; i--, key++) {
+		for (i = n->sel.nkeys; i > 0; i--, key++) {
 			int toff = off + key->off + (off2 & key->offmask);
-			__be32 *data, _data;
+			__be32 *data, hdata;
 
 			if (skb_headroom(skb) + toff > INT_MAX)
 				goto out;
 
-			data = skb_header_pointer(skb, toff, 4, &_data);
+			data = skb_header_pointer(skb, toff, 4, &hdata);
 			if (!data)
 				goto out;
 			if ((*data ^ key->val) & key->mask) {
@@ -148,13 +147,13 @@ next_knode:
 				goto next_knode;
 			}
 #ifdef CONFIG_CLS_U32_PERF
-			n->pf->kcnts[j] +=1;
+			n->pf->kcnts[j] += 1;
 			j++;
 #endif
 		}
 		if (n->ht_down == NULL) {
 check_terminal:
-			if (n->sel.flags&TC_U32_TERMINAL) {
+			if (n->sel.flags & TC_U32_TERMINAL) {
 
 				*res = n->res;
 #ifdef CONFIG_NET_CLS_IND
@@ -164,7 +163,7 @@ check_terminal:
 				}
 #endif
 #ifdef CONFIG_CLS_U32_PERF
-				n->pf->rhit +=1;
+				n->pf->rhit += 1;
 #endif
 				r = tcf_exts_exec(skb, &n->exts, res);
 				if (r < 0) {
@@ -188,26 +187,26 @@ check_terminal:
 		ht = n->ht_down;
 		sel = 0;
 		if (ht->divisor) {
-			__be32 *data, _data;
+			__be32 *data, hdata;
 
 			data = skb_header_pointer(skb, off + n->sel.hoff, 4,
-						  &_data);
+						  &hdata);
 			if (!data)
 				goto out;
 			sel = ht->divisor & u32_hash_fold(*data, &n->sel,
 							  n->fshift);
 		}
-		if (!(n->sel.flags&(TC_U32_VAROFFSET|TC_U32_OFFSET|TC_U32_EAT)))
+		if (!(n->sel.flags & (TC_U32_VAROFFSET | TC_U32_OFFSET | TC_U32_EAT)))
 			goto next_ht;
 
-		if (n->sel.flags&(TC_U32_OFFSET|TC_U32_VAROFFSET)) {
+		if (n->sel.flags & (TC_U32_OFFSET | TC_U32_VAROFFSET)) {
 			off2 = n->sel.off + 3;
 			if (n->sel.flags & TC_U32_VAROFFSET) {
-				__be16 *data, _data;
+				__be16 *data, hdata;
 
 				data = skb_header_pointer(skb,
 							  off + n->sel.offoff,
-							  2, &_data);
+							  2, &hdata);
 				if (!data)
 					goto out;
 				off2 += ntohs(n->sel.offmask & *data) >>
@@ -215,7 +214,7 @@ check_terminal:
 			}
 			off2 &= ~3;
 		}
-		if (n->sel.flags&TC_U32_EAT) {
+		if (n->sel.flags & TC_U32_EAT) {
 			off += off2;
 			off2 = 0;
 		}
@@ -236,11 +235,11 @@ out:
 
 deadloop:
 	if (net_ratelimit())
-		printk(KERN_WARNING "cls_u32: dead loop\n");
+		pr_warning("cls_u32: dead loop\n");
 	return -1;
 }
 
-static __inline__ struct tc_u_hnode *
+static struct tc_u_hnode *
 u32_lookup_ht(struct tc_u_common *tp_c, u32 handle)
 {
 	struct tc_u_hnode *ht;
@@ -252,10 +251,10 @@ u32_lookup_ht(struct tc_u_common *tp_c, u32 handle)
 	return ht;
 }
 
-static __inline__ struct tc_u_knode *
+static struct tc_u_knode *
 u32_lookup_key(struct tc_u_hnode *ht, u32 handle)
 {
-	unsigned sel;
+	unsigned int sel;
 	struct tc_u_knode *n = NULL;
 
 	sel = TC_U32_HASH(handle);
@@ -300,7 +299,7 @@ static u32 gen_new_htid(struct tc_u_common *tp_c)
 	do {
 		if (++tp_c->hgenerator == 0x7FF)
 			tp_c->hgenerator = 1;
-	} while (--i>0 && u32_lookup_ht(tp_c, (tp_c->hgenerator|0x800)<<20));
+	} while (--i > 0 && u32_lookup_ht(tp_c, (tp_c->hgenerator|0x800)<<20));
 
 	return i > 0 ? (tp_c->hgenerator|0x800)<<20 : 0;
 }
@@ -378,9 +377,9 @@ static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode* key)
 static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
 {
 	struct tc_u_knode *n;
-	unsigned h;
+	unsigned int h;
 
-	for (h=0; h<=ht->divisor; h++) {
+	for (h = 0; h <= ht->divisor; h++) {
 		while ((n = ht->ht[h]) != NULL) {
 			ht->ht[h] = n->next;
 
@@ -446,13 +445,13 @@ static void u32_destroy(struct tcf_proto *tp)
 
 static int u32_delete(struct tcf_proto *tp, unsigned long arg)
 {
-	struct tc_u_hnode *ht = (struct tc_u_hnode*)arg;
+	struct tc_u_hnode *ht = (struct tc_u_hnode *)arg;
 
 	if (ht == NULL)
 		return 0;
 
 	if (TC_U32_KEY(ht->handle))
-		return u32_delete_key(tp, (struct tc_u_knode*)ht);
+		return u32_delete_key(tp, (struct tc_u_knode *)ht);
 
 	if (tp->root == ht)
 		return -EINVAL;
@@ -470,14 +469,14 @@ static int u32_delete(struct tcf_proto *tp, unsigned long arg)
 static u32 gen_new_kid(struct tc_u_hnode *ht, u32 handle)
 {
 	struct tc_u_knode *n;
-	unsigned i = 0x7FF;
+	unsigned int i = 0x7FF;
 
-	for (n=ht->ht[TC_U32_HASH(handle)]; n; n = n->next)
+	for (n = ht->ht[TC_U32_HASH(handle)]; n; n = n->next)
 		if (i < TC_U32_NODE(n->handle))
 			i = TC_U32_NODE(n->handle);
 	i++;
 
-	return handle|(i>0xFFF ? 0xFFF : i);
+	return handle | (i > 0xFFF ? 0xFFF : i);
 }
 
 static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = {
@@ -566,7 +565,8 @@ static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle,
 	if (err < 0)
 		return err;
 
-	if ((n = (struct tc_u_knode*)*arg) != NULL) {
+	n = (struct tc_u_knode *)*arg;
+	if (n) {
 		if (TC_U32_KEY(n->handle) == 0)
 			return -EINVAL;
 
@@ -574,7 +574,7 @@ static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle,
 	}
 
 	if (tb[TCA_U32_DIVISOR]) {
-		unsigned divisor = nla_get_u32(tb[TCA_U32_DIVISOR]);
+		unsigned int divisor = nla_get_u32(tb[TCA_U32_DIVISOR]);
 
 		if (--divisor > 0x100)
 			return -EINVAL;
@@ -585,7 +585,7 @@ static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle,
 			if (handle == 0)
 				return -ENOMEM;
 		}
-		ht = kzalloc(sizeof(*ht) + divisor*sizeof(void*), GFP_KERNEL);
+		ht = kzalloc(sizeof(*ht) + divisor*sizeof(void *), GFP_KERNEL);
 		if (ht == NULL)
 			return -ENOBUFS;
 		ht->tp_c = tp_c;
@@ -683,7 +683,7 @@ static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 	struct tc_u_common *tp_c = tp->data;
 	struct tc_u_hnode *ht;
 	struct tc_u_knode *n;
-	unsigned h;
+	unsigned int h;
 
 	if (arg->stop)
 		return;
@@ -717,7 +717,7 @@ static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 static int u32_dump(struct tcf_proto *tp, unsigned long fh,
 		     struct sk_buff *skb, struct tcmsg *t)
 {
-	struct tc_u_knode *n = (struct tc_u_knode*)fh;
+	struct tc_u_knode *n = (struct tc_u_knode *)fh;
 	struct nlattr *nest;
 
 	if (n == NULL)
@@ -730,8 +730,9 @@ static int u32_dump(struct tcf_proto *tp, unsigned long fh,
 		goto nla_put_failure;
 
 	if (TC_U32_KEY(n->handle) == 0) {
-		struct tc_u_hnode *ht = (struct tc_u_hnode*)fh;
-		u32 divisor = ht->divisor+1;
+		struct tc_u_hnode *ht = (struct tc_u_hnode *)fh;
+		u32 divisor = ht->divisor + 1;
+
 		NLA_PUT_U32(skb, TCA_U32_DIVISOR, divisor);
 	} else {
 		NLA_PUT(skb, TCA_U32_SEL,
@@ -755,7 +756,7 @@ static int u32_dump(struct tcf_proto *tp, unsigned long fh,
 			goto nla_put_failure;
 
 #ifdef CONFIG_NET_CLS_IND
-		if(strlen(n->indev))
+		if (strlen(n->indev))
 			NLA_PUT_STRING(skb, TCA_U32_INDEV, n->indev);
 #endif
 #ifdef CONFIG_CLS_U32_PERF
diff --git a/net/sched/em_cmp.c b/net/sched/em_cmp.c
index bc45039..1c8360a 100644
--- a/net/sched/em_cmp.c
+++ b/net/sched/em_cmp.c
@@ -33,40 +33,41 @@ static int em_cmp_match(struct sk_buff *skb, struct tcf_ematch *em,
 		return 0;
 
 	switch (cmp->align) {
-		case TCF_EM_ALIGN_U8:
-			val = *ptr;
-			break;
+	case TCF_EM_ALIGN_U8:
+		val = *ptr;
+		break;
 
-		case TCF_EM_ALIGN_U16:
-			val = get_unaligned_be16(ptr);
+	case TCF_EM_ALIGN_U16:
+		val = get_unaligned_be16(ptr);
 
-			if (cmp_needs_transformation(cmp))
-				val = be16_to_cpu(val);
-			break;
+		if (cmp_needs_transformation(cmp))
+			val = be16_to_cpu(val);
+		break;
 
-		case TCF_EM_ALIGN_U32:
-			/* Worth checking boundries? The branching seems
-			 * to get worse. Visit again. */
-			val = get_unaligned_be32(ptr);
+	case TCF_EM_ALIGN_U32:
+		/* Worth checking boundries? The branching seems
+		 * to get worse. Visit again.
+		 */
+		val = get_unaligned_be32(ptr);
 
-			if (cmp_needs_transformation(cmp))
-				val = be32_to_cpu(val);
-			break;
+		if (cmp_needs_transformation(cmp))
+			val = be32_to_cpu(val);
+		break;
 
-		default:
-			return 0;
+	default:
+		return 0;
 	}
 
 	if (cmp->mask)
 		val &= cmp->mask;
 
 	switch (cmp->opnd) {
-		case TCF_EM_OPND_EQ:
-			return val == cmp->val;
-		case TCF_EM_OPND_LT:
-			return val < cmp->val;
-		case TCF_EM_OPND_GT:
-			return val > cmp->val;
+	case TCF_EM_OPND_EQ:
+		return val == cmp->val;
+	case TCF_EM_OPND_LT:
+		return val < cmp->val;
+	case TCF_EM_OPND_GT:
+		return val > cmp->val;
 	}
 
 	return 0;
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index 34da5e2..a4de67e 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -73,21 +73,18 @@
 #include <net/pkt_cls.h>
 #include <net/sock.h>
 
-struct meta_obj
-{
+struct meta_obj {
 	unsigned long		value;
 	unsigned int		len;
 };
 
-struct meta_value
-{
+struct meta_value {
 	struct tcf_meta_val	hdr;
 	unsigned long		val;
 	unsigned int		len;
 };
 
-struct meta_match
-{
+struct meta_match {
 	struct meta_value	lvalue;
 	struct meta_value	rvalue;
 };
@@ -255,7 +252,7 @@ META_COLLECTOR(int_rtclassid)
 	if (unlikely(skb_dst(skb) == NULL))
 		*err = -1;
 	else
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
 		dst->value = skb_dst(skb)->tclassid;
 #else
 		dst->value = 0;
@@ -267,7 +264,7 @@ META_COLLECTOR(int_rtiif)
 	if (unlikely(skb_rtable(skb) == NULL))
 		*err = -1;
 	else
-		dst->value = skb_rtable(skb)->fl.iif;
+		dst->value = skb_rtable(skb)->rt_iif;
 }
 
 /**************************************************************************
@@ -404,7 +401,7 @@ META_COLLECTOR(int_sk_sndbuf)
 META_COLLECTOR(int_sk_alloc)
 {
 	SKIP_NONLOCAL(skb);
-	dst->value = skb->sk->sk_allocation;
+	dst->value = (__force int) skb->sk->sk_allocation;
 }
 
 META_COLLECTOR(int_sk_route_caps)
@@ -483,8 +480,7 @@ META_COLLECTOR(int_sk_write_pend)
  * Meta value collectors assignment table
  **************************************************************************/
 
-struct meta_ops
-{
+struct meta_ops {
 	void		(*get)(struct sk_buff *, struct tcf_pkt_info *,
 			       struct meta_value *, struct meta_obj *, int *);
 };
@@ -494,7 +490,7 @@ struct meta_ops
 
 /* Meta value operations table listing all meta value collectors and
  * assigns them to a type and meta id. */
-static struct meta_ops __meta_ops[TCF_META_TYPE_MAX+1][TCF_META_ID_MAX+1] = {
+static struct meta_ops __meta_ops[TCF_META_TYPE_MAX + 1][TCF_META_ID_MAX + 1] = {
 	[TCF_META_TYPE_VAR] = {
 		[META_ID(DEV)]			= META_FUNC(var_dev),
 		[META_ID(SK_BOUND_IF)] 		= META_FUNC(var_sk_bound_if),
@@ -550,7 +546,7 @@ static struct meta_ops __meta_ops[TCF_META_TYPE_MAX+1][TCF_META_ID_MAX+1] = {
 	}
 };
 
-static inline struct meta_ops * meta_ops(struct meta_value *val)
+static inline struct meta_ops *meta_ops(struct meta_value *val)
 {
 	return &__meta_ops[meta_type(val)][meta_id(val)];
 }
@@ -649,9 +645,8 @@ static int meta_int_dump(struct sk_buff *skb, struct meta_value *v, int tlv)
 {
 	if (v->len == sizeof(unsigned long))
 		NLA_PUT(skb, tlv, sizeof(unsigned long), &v->val);
-	else if (v->len == sizeof(u32)) {
+	else if (v->len == sizeof(u32))
 		NLA_PUT_U32(skb, tlv, v->val);
-	}
 
 	return 0;
 
@@ -663,8 +658,7 @@ nla_put_failure:
  * Type specific operations table
  **************************************************************************/
 
-struct meta_type_ops
-{
+struct meta_type_ops {
 	void	(*destroy)(struct meta_value *);
 	int	(*compare)(struct meta_obj *, struct meta_obj *);
 	int	(*change)(struct meta_value *, struct nlattr *);
@@ -672,7 +666,7 @@ struct meta_type_ops
 	int	(*dump)(struct sk_buff *, struct meta_value *, int);
 };
 
-static struct meta_type_ops __meta_type_ops[TCF_META_TYPE_MAX+1] = {
+static struct meta_type_ops __meta_type_ops[TCF_META_TYPE_MAX + 1] = {
 	[TCF_META_TYPE_VAR] = {
 		.destroy = meta_var_destroy,
 		.compare = meta_var_compare,
@@ -688,7 +682,7 @@ static struct meta_type_ops __meta_type_ops[TCF_META_TYPE_MAX+1] = {
 	}
 };
 
-static inline struct meta_type_ops * meta_type_ops(struct meta_value *v)
+static inline struct meta_type_ops *meta_type_ops(struct meta_value *v)
 {
 	return &__meta_type_ops[meta_type(v)];
 }
@@ -713,7 +707,7 @@ static int meta_get(struct sk_buff *skb, struct tcf_pkt_info *info,
 		return err;
 
 	if (meta_type_ops(v)->apply_extras)
-	    meta_type_ops(v)->apply_extras(v, dst);
+		meta_type_ops(v)->apply_extras(v, dst);
 
 	return 0;
 }
@@ -732,12 +726,12 @@ static int em_meta_match(struct sk_buff *skb, struct tcf_ematch *m,
 	r = meta_type_ops(&meta->lvalue)->compare(&l_value, &r_value);
 
 	switch (meta->lvalue.hdr.op) {
-		case TCF_EM_OPND_EQ:
-			return !r;
-		case TCF_EM_OPND_LT:
-			return r < 0;
-		case TCF_EM_OPND_GT:
-			return r > 0;
+	case TCF_EM_OPND_EQ:
+		return !r;
+	case TCF_EM_OPND_LT:
+		return r < 0;
+	case TCF_EM_OPND_GT:
+		return r > 0;
 	}
 
 	return 0;
@@ -771,7 +765,7 @@ static inline int meta_change_data(struct meta_value *dst, struct nlattr *nla)
 
 static inline int meta_is_supported(struct meta_value *val)
 {
-	return (!meta_id(val) || meta_ops(val)->get);
+	return !meta_id(val) || meta_ops(val)->get;
 }
 
 static const struct nla_policy meta_policy[TCA_EM_META_MAX + 1] = {
diff --git a/net/sched/em_nbyte.c b/net/sched/em_nbyte.c
index 1a4176a..a3bed07 100644
--- a/net/sched/em_nbyte.c
+++ b/net/sched/em_nbyte.c
@@ -18,8 +18,7 @@
 #include <linux/tc_ematch/tc_em_nbyte.h>
 #include <net/pkt_cls.h>
 
-struct nbyte_data
-{
+struct nbyte_data {
 	struct tcf_em_nbyte	hdr;
 	char			pattern[0];
 };
diff --git a/net/sched/em_text.c b/net/sched/em_text.c
index ea8f566..15d353d 100644
--- a/net/sched/em_text.c
+++ b/net/sched/em_text.c
@@ -19,8 +19,7 @@
 #include <linux/tc_ematch/tc_em_text.h>
 #include <net/pkt_cls.h>
 
-struct text_match
-{
+struct text_match {
 	u16			from_offset;
 	u16			to_offset;
 	u8			from_layer;
diff --git a/net/sched/em_u32.c b/net/sched/em_u32.c
index 953f147..797bdb8 100644
--- a/net/sched/em_u32.c
+++ b/net/sched/em_u32.c
@@ -35,7 +35,7 @@ static int em_u32_match(struct sk_buff *skb, struct tcf_ematch *em,
 	if (!tcf_valid_offset(skb, ptr, sizeof(u32)))
 		return 0;
 
-	return !(((*(__be32*) ptr)  ^ key->val) & key->mask);
+	return !(((*(__be32 *) ptr)  ^ key->val) & key->mask);
 }
 
 static struct tcf_ematch_ops em_u32_ops = {
diff --git a/net/sched/ematch.c b/net/sched/ematch.c
index 5e37da9..88d93eb 100644
--- a/net/sched/ematch.c
+++ b/net/sched/ematch.c
@@ -93,7 +93,7 @@
 static LIST_HEAD(ematch_ops);
 static DEFINE_RWLOCK(ematch_mod_lock);
 
-static inline struct tcf_ematch_ops * tcf_em_lookup(u16 kind)
+static struct tcf_ematch_ops *tcf_em_lookup(u16 kind)
 {
 	struct tcf_ematch_ops *e = NULL;
 
@@ -163,8 +163,8 @@ void tcf_em_unregister(struct tcf_ematch_ops *ops)
 }
 EXPORT_SYMBOL(tcf_em_unregister);
 
-static inline struct tcf_ematch * tcf_em_get_match(struct tcf_ematch_tree *tree,
-						   int index)
+static inline struct tcf_ematch *tcf_em_get_match(struct tcf_ematch_tree *tree,
+						  int index)
 {
 	return &tree->matches[index];
 }
@@ -184,7 +184,8 @@ static int tcf_em_validate(struct tcf_proto *tp,
 
 	if (em_hdr->kind == TCF_EM_CONTAINER) {
 		/* Special ematch called "container", carries an index
-		 * referencing an external ematch sequence. */
+		 * referencing an external ematch sequence.
+		 */
 		u32 ref;
 
 		if (data_len < sizeof(ref))
@@ -195,7 +196,8 @@ static int tcf_em_validate(struct tcf_proto *tp,
 			goto errout;
 
 		/* We do not allow backward jumps to avoid loops and jumps
-		 * to our own position are of course illegal. */
+		 * to our own position are of course illegal.
+		 */
 		if (ref <= idx)
 			goto errout;
 
@@ -208,7 +210,8 @@ static int tcf_em_validate(struct tcf_proto *tp,
 		 * which automatically releases the reference again, therefore
 		 * the module MUST not be given back under any circumstances
 		 * here. Be aware, the destroy function assumes that the
-		 * module is held if the ops field is non zero. */
+		 * module is held if the ops field is non zero.
+		 */
 		em->ops = tcf_em_lookup(em_hdr->kind);
 
 		if (em->ops == NULL) {
@@ -221,7 +224,8 @@ static int tcf_em_validate(struct tcf_proto *tp,
 			if (em->ops) {
 				/* We dropped the RTNL mutex in order to
 				 * perform the module load. Tell the caller
-				 * to replay the request. */
+				 * to replay the request.
+				 */
 				module_put(em->ops->owner);
 				err = -EAGAIN;
 			}
@@ -230,7 +234,8 @@ static int tcf_em_validate(struct tcf_proto *tp,
 		}
 
 		/* ematch module provides expected length of data, so we
-		 * can do a basic sanity check. */
+		 * can do a basic sanity check.
+		 */
 		if (em->ops->datalen && data_len < em->ops->datalen)
 			goto errout;
 
@@ -246,7 +251,8 @@ static int tcf_em_validate(struct tcf_proto *tp,
 			 * TCF_EM_SIMPLE may be specified stating that the
 			 * data only consists of a u32 integer and the module
 			 * does not expected a memory reference but rather
-			 * the value carried. */
+			 * the value carried.
+			 */
 			if (em_hdr->flags & TCF_EM_SIMPLE) {
 				if (data_len < sizeof(u32))
 					goto errout;
@@ -334,7 +340,8 @@ int tcf_em_tree_validate(struct tcf_proto *tp, struct nlattr *nla,
 	 * The array of rt attributes is parsed in the order as they are
 	 * provided, their type must be incremental from 1 to n. Even
 	 * if it does not serve any real purpose, a failure of sticking
-	 * to this policy will result in parsing failure. */
+	 * to this policy will result in parsing failure.
+	 */
 	for (idx = 0; nla_ok(rt_match, list_len); idx++) {
 		err = -EINVAL;
 
@@ -359,7 +366,8 @@ int tcf_em_tree_validate(struct tcf_proto *tp, struct nlattr *nla,
 	/* Check if the number of matches provided by userspace actually
 	 * complies with the array of matches. The number was used for
 	 * the validation of references and a mismatch could lead to
-	 * undefined references during the matching process. */
+	 * undefined references during the matching process.
+	 */
 	if (idx != tree_hdr->nmatches) {
 		err = -EINVAL;
 		goto errout_abort;
@@ -449,7 +457,7 @@ int tcf_em_tree_dump(struct sk_buff *skb, struct tcf_ematch_tree *tree, int tlv)
 			.flags = em->flags
 		};
 
-		NLA_PUT(skb, i+1, sizeof(em_hdr), &em_hdr);
+		NLA_PUT(skb, i + 1, sizeof(em_hdr), &em_hdr);
 
 		if (em->ops && em->ops->dump) {
 			if (em->ops->dump(skb, em) < 0)
@@ -478,6 +486,7 @@ static inline int tcf_em_match(struct sk_buff *skb, struct tcf_ematch *em,
 			       struct tcf_pkt_info *info)
 {
 	int r = em->ops->match(skb, em, info);
+
 	return tcf_em_is_inverted(em) ? !r : r;
 }
 
@@ -527,8 +536,8 @@ pop_stack:
 
 stack_overflow:
 	if (net_ratelimit())
-		printk(KERN_WARNING "tc ematch: local stack overflow,"
-			" increase NET_EMATCH_STACK\n");
+		pr_warning("tc ematch: local stack overflow,"
+			   " increase NET_EMATCH_STACK\n");
 	return -1;
 }
 EXPORT_SYMBOL(__tcf_em_tree_match);
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index b22ca2d..7490f3f 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -187,7 +187,7 @@ int unregister_qdisc(struct Qdisc_ops *qops)
 	int err = -ENOENT;
 
 	write_lock(&qdisc_mod_lock);
-	for (qp = &qdisc_base; (q=*qp)!=NULL; qp = &q->next)
+	for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
 		if (q == qops)
 			break;
 	if (q) {
@@ -321,7 +321,9 @@ void qdisc_put_rtab(struct qdisc_rate_table *tab)
 	if (!tab || --tab->refcnt)
 		return;
 
-	for (rtabp = &qdisc_rtab_list; (rtab=*rtabp) != NULL; rtabp = &rtab->next) {
+	for (rtabp = &qdisc_rtab_list;
+	     (rtab = *rtabp) != NULL;
+	     rtabp = &rtab->next) {
 		if (rtab == tab) {
 			*rtabp = rtab->next;
 			kfree(rtab);
@@ -396,6 +398,11 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt)
 	return stab;
 }
 
+static void stab_kfree_rcu(struct rcu_head *head)
+{
+	kfree(container_of(head, struct qdisc_size_table, rcu));
+}
+
 void qdisc_put_stab(struct qdisc_size_table *tab)
 {
 	if (!tab)
@@ -405,7 +412,7 @@ void qdisc_put_stab(struct qdisc_size_table *tab)
 
 	if (--tab->refcnt == 0) {
 		list_del(&tab->list);
-		kfree(tab);
+		call_rcu_bh(&tab->rcu, stab_kfree_rcu);
 	}
 
 	spin_unlock(&qdisc_stab_lock);
@@ -428,7 +435,7 @@ nla_put_failure:
 	return -1;
 }
 
-void qdisc_calculate_pkt_len(struct sk_buff *skb, struct qdisc_size_table *stab)
+void __qdisc_calculate_pkt_len(struct sk_buff *skb, const struct qdisc_size_table *stab)
 {
 	int pkt_len, slot;
 
@@ -454,14 +461,13 @@ out:
 		pkt_len = 1;
 	qdisc_skb_cb(skb)->pkt_len = pkt_len;
 }
-EXPORT_SYMBOL(qdisc_calculate_pkt_len);
+EXPORT_SYMBOL(__qdisc_calculate_pkt_len);
 
 void qdisc_warn_nonwc(char *txt, struct Qdisc *qdisc)
 {
 	if (!(qdisc->flags & TCQ_F_WARN_NONWC)) {
-		printk(KERN_WARNING
-		       "%s: %s qdisc %X: is non-work-conserving?\n",
-		       txt, qdisc->ops->id, qdisc->handle >> 16);
+		pr_warn("%s: %s qdisc %X: is non-work-conserving?\n",
+			txt, qdisc->ops->id, qdisc->handle >> 16);
 		qdisc->flags |= TCQ_F_WARN_NONWC;
 	}
 }
@@ -472,7 +478,7 @@ static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
 	struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
 						 timer);
 
-	wd->qdisc->flags &= ~TCQ_F_THROTTLED;
+	qdisc_unthrottled(wd->qdisc);
 	__netif_schedule(qdisc_root(wd->qdisc));
 
 	return HRTIMER_NORESTART;
@@ -494,7 +500,7 @@ void qdisc_watchdog_schedule(struct qdisc_watchdog *wd, psched_time_t expires)
 		     &qdisc_root_sleeping(wd->qdisc)->state))
 		return;
 
-	wd->qdisc->flags |= TCQ_F_THROTTLED;
+	qdisc_throttled(wd->qdisc);
 	time = ktime_set(0, 0);
 	time = ktime_add_ns(time, PSCHED_TICKS2NS(expires));
 	hrtimer_start(&wd->timer, time, HRTIMER_MODE_ABS);
@@ -504,7 +510,7 @@ EXPORT_SYMBOL(qdisc_watchdog_schedule);
 void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
 {
 	hrtimer_cancel(&wd->timer);
-	wd->qdisc->flags &= ~TCQ_F_THROTTLED;
+	qdisc_unthrottled(wd->qdisc);
 }
 EXPORT_SYMBOL(qdisc_watchdog_cancel);
 
@@ -625,7 +631,7 @@ static u32 qdisc_alloc_handle(struct net_device *dev)
 			autohandle = TC_H_MAKE(0x80000000U, 0);
 	} while	(qdisc_lookup(dev, autohandle) && --i > 0);
 
-	return i>0 ? autohandle : 0;
+	return i > 0 ? autohandle : 0;
 }
 
 void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
@@ -834,7 +840,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
 				err = PTR_ERR(stab);
 				goto err_out4;
 			}
-			sch->stab = stab;
+			rcu_assign_pointer(sch->stab, stab);
 		}
 		if (tca[TCA_RATE]) {
 			spinlock_t *root_lock;
@@ -874,7 +880,7 @@ err_out4:
 	 * Any broken qdiscs that would require a ops->reset() here?
 	 * The qdisc was never in action so it shouldn't be necessary.
 	 */
-	qdisc_put_stab(sch->stab);
+	qdisc_put_stab(rtnl_dereference(sch->stab));
 	if (ops->destroy)
 		ops->destroy(sch);
 	goto err_out3;
@@ -882,7 +888,7 @@ err_out4:
 
 static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
 {
-	struct qdisc_size_table *stab = NULL;
+	struct qdisc_size_table *ostab, *stab = NULL;
 	int err = 0;
 
 	if (tca[TCA_OPTIONS]) {
@@ -899,8 +905,9 @@ static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
 			return PTR_ERR(stab);
 	}
 
-	qdisc_put_stab(sch->stab);
-	sch->stab = stab;
+	ostab = rtnl_dereference(sch->stab);
+	rcu_assign_pointer(sch->stab, stab);
+	qdisc_put_stab(ostab);
 
 	if (tca[TCA_RATE]) {
 		/* NB: ignores errors from replace_estimator
@@ -915,9 +922,8 @@ out:
 	return 0;
 }
 
-struct check_loop_arg
-{
-	struct qdisc_walker 	w;
+struct check_loop_arg {
+	struct qdisc_walker	w;
 	struct Qdisc		*p;
 	int			depth;
 };
@@ -970,7 +976,8 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 	struct Qdisc *p = NULL;
 	int err;
 
-	if ((dev = __dev_get_by_index(net, tcm->tcm_ifindex)) == NULL)
+	dev = __dev_get_by_index(net, tcm->tcm_ifindex);
+	if (!dev)
 		return -ENODEV;
 
 	err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
@@ -980,12 +987,12 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 	if (clid) {
 		if (clid != TC_H_ROOT) {
 			if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
-				if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
+				p = qdisc_lookup(dev, TC_H_MAJ(clid));
+				if (!p)
 					return -ENOENT;
 				q = qdisc_leaf(p, clid);
-			} else { /* ingress */
-				if (dev_ingress_queue(dev))
-					q = dev_ingress_queue(dev)->qdisc_sleeping;
+			} else if (dev_ingress_queue(dev)) {
+				q = dev_ingress_queue(dev)->qdisc_sleeping;
 			}
 		} else {
 			q = dev->qdisc;
@@ -996,7 +1003,8 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 		if (tcm->tcm_handle && q->handle != tcm->tcm_handle)
 			return -EINVAL;
 	} else {
-		if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
+		q = qdisc_lookup(dev, tcm->tcm_handle);
+		if (!q)
 			return -ENOENT;
 	}
 
@@ -1008,7 +1016,8 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 			return -EINVAL;
 		if (q->handle == 0)
 			return -ENOENT;
-		if ((err = qdisc_graft(dev, p, skb, n, clid, NULL, q)) != 0)
+		err = qdisc_graft(dev, p, skb, n, clid, NULL, q);
+		if (err != 0)
 			return err;
 	} else {
 		qdisc_notify(net, skb, n, clid, NULL, q);
@@ -1017,7 +1026,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 }
 
 /*
-   Create/change qdisc.
+ * Create/change qdisc.
  */
 
 static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
@@ -1036,7 +1045,8 @@ replay:
 	clid = tcm->tcm_parent;
 	q = p = NULL;
 
-	if ((dev = __dev_get_by_index(net, tcm->tcm_ifindex)) == NULL)
+	dev = __dev_get_by_index(net, tcm->tcm_ifindex);
+	if (!dev)
 		return -ENODEV;
 
 	err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
@@ -1046,12 +1056,12 @@ replay:
 	if (clid) {
 		if (clid != TC_H_ROOT) {
 			if (clid != TC_H_INGRESS) {
-				if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
+				p = qdisc_lookup(dev, TC_H_MAJ(clid));
+				if (!p)
 					return -ENOENT;
 				q = qdisc_leaf(p, clid);
-			} else { /* ingress */
-				if (dev_ingress_queue_create(dev))
-					q = dev_ingress_queue(dev)->qdisc_sleeping;
+			} else if (dev_ingress_queue_create(dev)) {
+				q = dev_ingress_queue(dev)->qdisc_sleeping;
 			}
 		} else {
 			q = dev->qdisc;
@@ -1063,13 +1073,14 @@ replay:
 
 		if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
 			if (tcm->tcm_handle) {
-				if (q && !(n->nlmsg_flags&NLM_F_REPLACE))
+				if (q && !(n->nlmsg_flags & NLM_F_REPLACE))
 					return -EEXIST;
 				if (TC_H_MIN(tcm->tcm_handle))
 					return -EINVAL;
-				if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
+				q = qdisc_lookup(dev, tcm->tcm_handle);
+				if (!q)
 					goto create_n_graft;
-				if (n->nlmsg_flags&NLM_F_EXCL)
+				if (n->nlmsg_flags & NLM_F_EXCL)
 					return -EEXIST;
 				if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
 					return -EINVAL;
@@ -1079,7 +1090,7 @@ replay:
 				atomic_inc(&q->refcnt);
 				goto graft;
 			} else {
-				if (q == NULL)
+				if (!q)
 					goto create_n_graft;
 
 				/* This magic test requires explanation.
@@ -1101,9 +1112,9 @@ replay:
 				 *   For now we select create/graft, if
 				 *   user gave KIND, which does not match existing.
 				 */
-				if ((n->nlmsg_flags&NLM_F_CREATE) &&
-				    (n->nlmsg_flags&NLM_F_REPLACE) &&
-				    ((n->nlmsg_flags&NLM_F_EXCL) ||
+				if ((n->nlmsg_flags & NLM_F_CREATE) &&
+				    (n->nlmsg_flags & NLM_F_REPLACE) &&
+				    ((n->nlmsg_flags & NLM_F_EXCL) ||
 				     (tca[TCA_KIND] &&
 				      nla_strcmp(tca[TCA_KIND], q->ops->id))))
 					goto create_n_graft;
@@ -1118,7 +1129,7 @@ replay:
 	/* Change qdisc parameters */
 	if (q == NULL)
 		return -ENOENT;
-	if (n->nlmsg_flags&NLM_F_EXCL)
+	if (n->nlmsg_flags & NLM_F_EXCL)
 		return -EEXIST;
 	if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
 		return -EINVAL;
@@ -1128,7 +1139,7 @@ replay:
 	return err;
 
 create_n_graft:
-	if (!(n->nlmsg_flags&NLM_F_CREATE))
+	if (!(n->nlmsg_flags & NLM_F_CREATE))
 		return -ENOENT;
 	if (clid == TC_H_INGRESS) {
 		if (dev_ingress_queue(dev))
@@ -1175,6 +1186,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
 	struct nlmsghdr  *nlh;
 	unsigned char *b = skb_tail_pointer(skb);
 	struct gnet_dump d;
+	struct qdisc_size_table *stab;
 
 	nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
 	tcm = NLMSG_DATA(nlh);
@@ -1190,7 +1202,8 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
 		goto nla_put_failure;
 	q->qstats.qlen = q->q.qlen;
 
-	if (q->stab && qdisc_dump_stab(skb, q->stab) < 0)
+	stab = rtnl_dereference(q->stab);
+	if (stab && qdisc_dump_stab(skb, stab) < 0)
 		goto nla_put_failure;
 
 	if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
@@ -1234,16 +1247,19 @@ static int qdisc_notify(struct net *net, struct sk_buff *oskb,
 		return -ENOBUFS;
 
 	if (old && !tc_qdisc_dump_ignore(old)) {
-		if (tc_fill_qdisc(skb, old, clid, pid, n->nlmsg_seq, 0, RTM_DELQDISC) < 0)
+		if (tc_fill_qdisc(skb, old, clid, pid, n->nlmsg_seq,
+				  0, RTM_DELQDISC) < 0)
 			goto err_out;
 	}
 	if (new && !tc_qdisc_dump_ignore(new)) {
-		if (tc_fill_qdisc(skb, new, clid, pid, n->nlmsg_seq, old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
+		if (tc_fill_qdisc(skb, new, clid, pid, n->nlmsg_seq,
+				  old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
 			goto err_out;
 	}
 
 	if (skb->len)
-		return rtnetlink_send(skb, net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
+		return rtnetlink_send(skb, net, pid, RTNLGRP_TC,
+				      n->nlmsg_flags & NLM_F_ECHO);
 
 err_out:
 	kfree_skb(skb);
@@ -1275,7 +1291,7 @@ static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
 			q_idx++;
 			continue;
 		}
-		if (!tc_qdisc_dump_ignore(q) && 
+		if (!tc_qdisc_dump_ignore(q) &&
 		    tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
 				  cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
 			goto done;
@@ -1356,7 +1372,8 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 	u32 qid = TC_H_MAJ(clid);
 	int err;
 
-	if ((dev = __dev_get_by_index(net, tcm->tcm_ifindex)) == NULL)
+	dev = __dev_get_by_index(net, tcm->tcm_ifindex);
+	if (!dev)
 		return -ENODEV;
 
 	err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
@@ -1391,9 +1408,9 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 			qid = dev->qdisc->handle;
 
 		/* Now qid is genuine qdisc handle consistent
-		   both with parent and child.
-
-		   TC_H_MAJ(pid) still may be unspecified, complete it now.
+		 * both with parent and child.
+		 *
+		 * TC_H_MAJ(pid) still may be unspecified, complete it now.
 		 */
 		if (pid)
 			pid = TC_H_MAKE(qid, pid);
@@ -1403,7 +1420,8 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 	}
 
 	/* OK. Locate qdisc */
-	if ((q = qdisc_lookup(dev, qid)) == NULL)
+	q = qdisc_lookup(dev, qid);
+	if (!q)
 		return -ENOENT;
 
 	/* An check that it supports classes */
@@ -1423,13 +1441,14 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 
 	if (cl == 0) {
 		err = -ENOENT;
-		if (n->nlmsg_type != RTM_NEWTCLASS || !(n->nlmsg_flags&NLM_F_CREATE))
+		if (n->nlmsg_type != RTM_NEWTCLASS ||
+		    !(n->nlmsg_flags & NLM_F_CREATE))
 			goto out;
 	} else {
 		switch (n->nlmsg_type) {
 		case RTM_NEWTCLASS:
 			err = -EEXIST;
-			if (n->nlmsg_flags&NLM_F_EXCL)
+			if (n->nlmsg_flags & NLM_F_EXCL)
 				goto out;
 			break;
 		case RTM_DELTCLASS:
@@ -1521,14 +1540,14 @@ static int tclass_notify(struct net *net, struct sk_buff *oskb,
 		return -EINVAL;
 	}
 
-	return rtnetlink_send(skb, net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
+	return rtnetlink_send(skb, net, pid, RTNLGRP_TC,
+			      n->nlmsg_flags & NLM_F_ECHO);
 }
 
-struct qdisc_dump_args
-{
-	struct qdisc_walker w;
-	struct sk_buff *skb;
-	struct netlink_callback *cb;
+struct qdisc_dump_args {
+	struct qdisc_walker	w;
+	struct sk_buff		*skb;
+	struct netlink_callback	*cb;
 };
 
 static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walker *arg)
@@ -1590,7 +1609,7 @@ static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
 
 static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
 {
-	struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh);
+	struct tcmsg *tcm = (struct tcmsg *)NLMSG_DATA(cb->nlh);
 	struct net *net = sock_net(skb->sk);
 	struct netdev_queue *dev_queue;
 	struct net_device *dev;
@@ -1598,7 +1617,8 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
 
 	if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
 		return 0;
-	if ((dev = dev_get_by_index(net, tcm->tcm_ifindex)) == NULL)
+	dev = dev_get_by_index(net, tcm->tcm_ifindex);
+	if (!dev)
 		return 0;
 
 	s_t = cb->args[0];
@@ -1621,19 +1641,22 @@ done:
 }
 
 /* Main classifier routine: scans classifier chain attached
-   to this qdisc, (optionally) tests for protocol and asks
-   specific classifiers.
+ * to this qdisc, (optionally) tests for protocol and asks
+ * specific classifiers.
  */
 int tc_classify_compat(struct sk_buff *skb, struct tcf_proto *tp,
 		       struct tcf_result *res)
 {
 	__be16 protocol = skb->protocol;
-	int err = 0;
+	int err;
 
 	for (; tp; tp = tp->next) {
-		if ((tp->protocol == protocol ||
-		     tp->protocol == htons(ETH_P_ALL)) &&
-		    (err = tp->classify(skb, tp, res)) >= 0) {
+		if (tp->protocol != protocol &&
+		    tp->protocol != htons(ETH_P_ALL))
+			continue;
+		err = tp->classify(skb, tp, res);
+
+		if (err >= 0) {
 #ifdef CONFIG_NET_CLS_ACT
 			if (err != TC_ACT_RECLASSIFY && skb->tc_verd)
 				skb->tc_verd = SET_TC_VERD(skb->tc_verd, 0);
@@ -1649,12 +1672,12 @@ int tc_classify(struct sk_buff *skb, struct tcf_proto *tp,
 		struct tcf_result *res)
 {
 	int err = 0;
-	__be16 protocol;
 #ifdef CONFIG_NET_CLS_ACT
+	__be16 protocol;
 	struct tcf_proto *otp = tp;
 reclassify:
-#endif
 	protocol = skb->protocol;
+#endif
 
 	err = tc_classify_compat(skb, tp, res);
 #ifdef CONFIG_NET_CLS_ACT
@@ -1664,11 +1687,11 @@ reclassify:
 
 		if (verd++ >= MAX_REC_LOOP) {
 			if (net_ratelimit())
-				printk(KERN_NOTICE
-				       "%s: packet reclassify loop"
+				pr_notice("%s: packet reclassify loop"
 					  " rule prio %u protocol %02x\n",
-				       tp->q->ops->id,
-				       tp->prio & 0xffff, ntohs(tp->protocol));
+					  tp->q->ops->id,
+					  tp->prio & 0xffff,
+					  ntohs(tp->protocol));
 			return TC_ACT_SHOT;
 		}
 		skb->tc_verd = SET_TC_VERD(skb->tc_verd, verd);
@@ -1761,7 +1784,7 @@ static int __init pktsched_init(void)
 
 	err = register_pernet_subsys(&psched_net_ops);
 	if (err) {
-		printk(KERN_ERR "pktsched_init: "
+		pr_err("pktsched_init: "
 		       "cannot initialize per netns operations\n");
 		return err;
 	}
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index 943d733..3f08158 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -319,7 +319,7 @@ static int atm_tc_delete(struct Qdisc *sch, unsigned long arg)
 	 * creation), and one for the reference held when calling delete.
 	 */
 	if (flow->ref < 2) {
-		printk(KERN_ERR "atm_tc_delete: flow->ref == %d\n", flow->ref);
+		pr_err("atm_tc_delete: flow->ref == %d\n", flow->ref);
 		return -EINVAL;
 	}
 	if (flow->ref > 2)
@@ -384,12 +384,12 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 			}
 		}
 		flow = NULL;
-	done:
-		;		
+done:
+		;
 	}
-	if (!flow)
+	if (!flow) {
 		flow = &p->link;
-	else {
+	} else {
 		if (flow->vcc)
 			ATM_SKB(skb)->atm_options = flow->vcc->atm_options;
 		/*@@@ looks good ... but it's not supposed to work :-) */
@@ -576,8 +576,7 @@ static void atm_tc_destroy(struct Qdisc *sch)
 
 	list_for_each_entry_safe(flow, tmp, &p->flows, list) {
 		if (flow->ref > 1)
-			printk(KERN_ERR "atm_destroy: %p->ref = %d\n", flow,
-			       flow->ref);
+			pr_err("atm_destroy: %p->ref = %d\n", flow, flow->ref);
 		atm_tc_put(sch, (unsigned long)flow);
 	}
 	tasklet_kill(&p->task);
@@ -616,9 +615,8 @@ static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl,
 	}
 	if (flow->excess)
 		NLA_PUT_U32(skb, TCA_ATM_EXCESS, flow->classid);
-	else {
+	else
 		NLA_PUT_U32(skb, TCA_ATM_EXCESS, 0);
-	}
 
 	nla_nest_end(skb, nest);
 	return skb->len;
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 5f63ec5..24d94c0 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -72,8 +72,7 @@
 struct cbq_sched_data;
 
 
-struct cbq_class
-{
+struct cbq_class {
 	struct Qdisc_class_common common;
 	struct cbq_class	*next_alive;	/* next class with backlog in this priority band */
 
@@ -139,19 +138,18 @@ struct cbq_class
 	int			refcnt;
 	int			filters;
 
-	struct cbq_class 	*defaults[TC_PRIO_MAX+1];
+	struct cbq_class	*defaults[TC_PRIO_MAX + 1];
 };
 
-struct cbq_sched_data
-{
+struct cbq_sched_data {
 	struct Qdisc_class_hash	clhash;			/* Hash table of all classes */
-	int			nclasses[TC_CBQ_MAXPRIO+1];
-	unsigned		quanta[TC_CBQ_MAXPRIO+1];
+	int			nclasses[TC_CBQ_MAXPRIO + 1];
+	unsigned int		quanta[TC_CBQ_MAXPRIO + 1];
 
 	struct cbq_class	link;
 
-	unsigned		activemask;
-	struct cbq_class	*active[TC_CBQ_MAXPRIO+1];	/* List of all classes
+	unsigned int		activemask;
+	struct cbq_class	*active[TC_CBQ_MAXPRIO + 1];	/* List of all classes
 								   with backlog */
 
 #ifdef CONFIG_NET_CLS_ACT
@@ -162,7 +160,7 @@ struct cbq_sched_data
 	int			tx_len;
 	psched_time_t		now;		/* Cached timestamp */
 	psched_time_t		now_rt;		/* Cached real time */
-	unsigned		pmask;
+	unsigned int		pmask;
 
 	struct hrtimer		delay_timer;
 	struct qdisc_watchdog	watchdog;	/* Watchdog timer,
@@ -175,9 +173,9 @@ struct cbq_sched_data
 };
 
 
-#define L2T(cl,len)	qdisc_l2t((cl)->R_tab,len)
+#define L2T(cl, len)	qdisc_l2t((cl)->R_tab, len)
 
-static __inline__ struct cbq_class *
+static inline struct cbq_class *
 cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
 {
 	struct Qdisc_class_common *clc;
@@ -193,25 +191,27 @@ cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
 static struct cbq_class *
 cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
 {
-	struct cbq_class *cl, *new;
+	struct cbq_class *cl;
 
-	for (cl = this->tparent; cl; cl = cl->tparent)
-		if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this)
-			return new;
+	for (cl = this->tparent; cl; cl = cl->tparent) {
+		struct cbq_class *new = cl->defaults[TC_PRIO_BESTEFFORT];
 
+		if (new != NULL && new != this)
+			return new;
+	}
 	return NULL;
 }
 
 #endif
 
 /* Classify packet. The procedure is pretty complicated, but
-   it allows us to combine link sharing and priority scheduling
-   transparently.
-
-   Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
-   so that it resolves to split nodes. Then packets are classified
-   by logical priority, or a more specific classifier may be attached
-   to the split node.
+ * it allows us to combine link sharing and priority scheduling
+ * transparently.
+ *
+ * Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
+ * so that it resolves to split nodes. Then packets are classified
+ * by logical priority, or a more specific classifier may be attached
+ * to the split node.
  */
 
 static struct cbq_class *
@@ -227,7 +227,7 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 	/*
 	 *  Step 1. If skb->priority points to one of our classes, use it.
 	 */
-	if (TC_H_MAJ(prio^sch->handle) == 0 &&
+	if (TC_H_MAJ(prio ^ sch->handle) == 0 &&
 	    (cl = cbq_class_lookup(q, prio)) != NULL)
 		return cl;
 
@@ -243,10 +243,11 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 		    (result = tc_classify_compat(skb, head->filter_list, &res)) < 0)
 			goto fallback;
 
-		if ((cl = (void*)res.class) == NULL) {
+		cl = (void *)res.class;
+		if (!cl) {
 			if (TC_H_MAJ(res.classid))
 				cl = cbq_class_lookup(q, res.classid);
-			else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL)
+			else if ((cl = defmap[res.classid & TC_PRIO_MAX]) == NULL)
 				cl = defmap[TC_PRIO_BESTEFFORT];
 
 			if (cl == NULL || cl->level >= head->level)
@@ -282,7 +283,7 @@ fallback:
 	 * Step 4. No success...
 	 */
 	if (TC_H_MAJ(prio) == 0 &&
-	    !(cl = head->defaults[prio&TC_PRIO_MAX]) &&
+	    !(cl = head->defaults[prio & TC_PRIO_MAX]) &&
 	    !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
 		return head;
 
@@ -290,12 +291,12 @@ fallback:
 }
 
 /*
-   A packet has just been enqueued on the empty class.
-   cbq_activate_class adds it to the tail of active class list
-   of its priority band.
+ * A packet has just been enqueued on the empty class.
+ * cbq_activate_class adds it to the tail of active class list
+ * of its priority band.
  */
 
-static __inline__ void cbq_activate_class(struct cbq_class *cl)
+static inline void cbq_activate_class(struct cbq_class *cl)
 {
 	struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 	int prio = cl->cpriority;
@@ -314,9 +315,9 @@ static __inline__ void cbq_activate_class(struct cbq_class *cl)
 }
 
 /*
-   Unlink class from active chain.
-   Note that this same procedure is done directly in cbq_dequeue*
-   during round-robin procedure.
+ * Unlink class from active chain.
+ * Note that this same procedure is done directly in cbq_dequeue*
+ * during round-robin procedure.
  */
 
 static void cbq_deactivate_class(struct cbq_class *this)
@@ -350,7 +351,7 @@ cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
 {
 	int toplevel = q->toplevel;
 
-	if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) {
+	if (toplevel > cl->level && !(qdisc_is_throttled(cl->q))) {
 		psched_time_t now;
 		psched_tdiff_t incr;
 
@@ -363,7 +364,7 @@ cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
 				q->toplevel = cl->level;
 				return;
 			}
-		} while ((cl=cl->borrow) != NULL && toplevel > cl->level);
+		} while ((cl = cl->borrow) != NULL && toplevel > cl->level);
 	}
 }
 
@@ -417,11 +418,11 @@ static void cbq_ovl_classic(struct cbq_class *cl)
 		delay += cl->offtime;
 
 		/*
-		   Class goes to sleep, so that it will have no
-		   chance to work avgidle. Let's forgive it 8)
-
-		   BTW cbq-2.0 has a crap in this
-		   place, apparently they forgot to shift it by cl->ewma_log.
+		 * Class goes to sleep, so that it will have no
+		 * chance to work avgidle. Let's forgive it 8)
+		 *
+		 * BTW cbq-2.0 has a crap in this
+		 * place, apparently they forgot to shift it by cl->ewma_log.
 		 */
 		if (cl->avgidle < 0)
 			delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
@@ -438,8 +439,8 @@ static void cbq_ovl_classic(struct cbq_class *cl)
 		q->wd_expires = delay;
 
 	/* Dirty work! We must schedule wakeups based on
-	   real available rate, rather than leaf rate,
-	   which may be tiny (even zero).
+	 * real available rate, rather than leaf rate,
+	 * which may be tiny (even zero).
 	 */
 	if (q->toplevel == TC_CBQ_MAXLEVEL) {
 		struct cbq_class *b;
@@ -459,7 +460,7 @@ static void cbq_ovl_classic(struct cbq_class *cl)
 }
 
 /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
-   they go overlimit
+ * they go overlimit
  */
 
 static void cbq_ovl_rclassic(struct cbq_class *cl)
@@ -594,7 +595,7 @@ static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
 	struct Qdisc *sch = q->watchdog.qdisc;
 	psched_time_t now;
 	psched_tdiff_t delay = 0;
-	unsigned pmask;
+	unsigned int pmask;
 
 	now = psched_get_time();
 
@@ -623,7 +624,7 @@ static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
 		hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS);
 	}
 
-	sch->flags &= ~TCQ_F_THROTTLED;
+	qdisc_unthrottled(sch);
 	__netif_schedule(qdisc_root(sch));
 	return HRTIMER_NORESTART;
 }
@@ -663,15 +664,15 @@ static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
 #endif
 
 /*
-   It is mission critical procedure.
-
-   We "regenerate" toplevel cutoff, if transmitting class
-   has backlog and it is not regulated. It is not part of
-   original CBQ description, but looks more reasonable.
-   Probably, it is wrong. This question needs further investigation.
-*/
+ * It is mission critical procedure.
+ *
+ * We "regenerate" toplevel cutoff, if transmitting class
+ * has backlog and it is not regulated. It is not part of
+ * original CBQ description, but looks more reasonable.
+ * Probably, it is wrong. This question needs further investigation.
+ */
 
-static __inline__ void
+static inline void
 cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
 		    struct cbq_class *borrowed)
 {
@@ -682,7 +683,7 @@ cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
 					q->toplevel = borrowed->level;
 					return;
 				}
-			} while ((borrowed=borrowed->borrow) != NULL);
+			} while ((borrowed = borrowed->borrow) != NULL);
 		}
 #if 0
 	/* It is not necessary now. Uncommenting it
@@ -710,10 +711,10 @@ cbq_update(struct cbq_sched_data *q)
 		cl->bstats.bytes += len;
 
 		/*
-		   (now - last) is total time between packet right edges.
-		   (last_pktlen/rate) is "virtual" busy time, so that
-
-			 idle = (now - last) - last_pktlen/rate
+		 * (now - last) is total time between packet right edges.
+		 * (last_pktlen/rate) is "virtual" busy time, so that
+		 *
+		 *	idle = (now - last) - last_pktlen/rate
 		 */
 
 		idle = q->now - cl->last;
@@ -723,9 +724,9 @@ cbq_update(struct cbq_sched_data *q)
 			idle -= L2T(cl, len);
 
 		/* true_avgidle := (1-W)*true_avgidle + W*idle,
-		   where W=2^{-ewma_log}. But cl->avgidle is scaled:
-		   cl->avgidle == true_avgidle/W,
-		   hence:
+		 * where W=2^{-ewma_log}. But cl->avgidle is scaled:
+		 * cl->avgidle == true_avgidle/W,
+		 * hence:
 		 */
 			avgidle += idle - (avgidle>>cl->ewma_log);
 		}
@@ -739,22 +740,22 @@ cbq_update(struct cbq_sched_data *q)
 			cl->avgidle = avgidle;
 
 			/* Calculate expected time, when this class
-			   will be allowed to send.
-			   It will occur, when:
-			   (1-W)*true_avgidle + W*delay = 0, i.e.
-			   idle = (1/W - 1)*(-true_avgidle)
-			   or
-			   idle = (1 - W)*(-cl->avgidle);
+			 * will be allowed to send.
+			 * It will occur, when:
+			 * (1-W)*true_avgidle + W*delay = 0, i.e.
+			 * idle = (1/W - 1)*(-true_avgidle)
+			 * or
+			 * idle = (1 - W)*(-cl->avgidle);
 			 */
 			idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
 
 			/*
-			   That is not all.
-			   To maintain the rate allocated to the class,
-			   we add to undertime virtual clock,
-			   necessary to complete transmitted packet.
-			   (len/phys_bandwidth has been already passed
-			   to the moment of cbq_update)
+			 * That is not all.
+			 * To maintain the rate allocated to the class,
+			 * we add to undertime virtual clock,
+			 * necessary to complete transmitted packet.
+			 * (len/phys_bandwidth has been already passed
+			 * to the moment of cbq_update)
 			 */
 
 			idle -= L2T(&q->link, len);
@@ -776,7 +777,7 @@ cbq_update(struct cbq_sched_data *q)
 	cbq_update_toplevel(q, this, q->tx_borrowed);
 }
 
-static __inline__ struct cbq_class *
+static inline struct cbq_class *
 cbq_under_limit(struct cbq_class *cl)
 {
 	struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
@@ -792,16 +793,17 @@ cbq_under_limit(struct cbq_class *cl)
 
 	do {
 		/* It is very suspicious place. Now overlimit
-		   action is generated for not bounded classes
-		   only if link is completely congested.
-		   Though it is in agree with ancestor-only paradigm,
-		   it looks very stupid. Particularly,
-		   it means that this chunk of code will either
-		   never be called or result in strong amplification
-		   of burstiness. Dangerous, silly, and, however,
-		   no another solution exists.
+		 * action is generated for not bounded classes
+		 * only if link is completely congested.
+		 * Though it is in agree with ancestor-only paradigm,
+		 * it looks very stupid. Particularly,
+		 * it means that this chunk of code will either
+		 * never be called or result in strong amplification
+		 * of burstiness. Dangerous, silly, and, however,
+		 * no another solution exists.
 		 */
-		if ((cl = cl->borrow) == NULL) {
+		cl = cl->borrow;
+		if (!cl) {
 			this_cl->qstats.overlimits++;
 			this_cl->overlimit(this_cl);
 			return NULL;
@@ -814,7 +816,7 @@ cbq_under_limit(struct cbq_class *cl)
 	return cl;
 }
 
-static __inline__ struct sk_buff *
+static inline struct sk_buff *
 cbq_dequeue_prio(struct Qdisc *sch, int prio)
 {
 	struct cbq_sched_data *q = qdisc_priv(sch);
@@ -838,7 +840,7 @@ cbq_dequeue_prio(struct Qdisc *sch, int prio)
 
 			if (cl->deficit <= 0) {
 				/* Class exhausted its allotment per
-				   this round. Switch to the next one.
+				 * this round. Switch to the next one.
 				 */
 				deficit = 1;
 				cl->deficit += cl->quantum;
@@ -848,8 +850,8 @@ cbq_dequeue_prio(struct Qdisc *sch, int prio)
 			skb = cl->q->dequeue(cl->q);
 
 			/* Class did not give us any skb :-(
-			   It could occur even if cl->q->q.qlen != 0
-			   f.e. if cl->q == "tbf"
+			 * It could occur even if cl->q->q.qlen != 0
+			 * f.e. if cl->q == "tbf"
 			 */
 			if (skb == NULL)
 				goto skip_class;
@@ -878,7 +880,7 @@ cbq_dequeue_prio(struct Qdisc *sch, int prio)
 skip_class:
 			if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
 				/* Class is empty or penalized.
-				   Unlink it from active chain.
+				 * Unlink it from active chain.
 				 */
 				cl_prev->next_alive = cl->next_alive;
 				cl->next_alive = NULL;
@@ -917,14 +919,14 @@ next_class:
 	return NULL;
 }
 
-static __inline__ struct sk_buff *
+static inline struct sk_buff *
 cbq_dequeue_1(struct Qdisc *sch)
 {
 	struct cbq_sched_data *q = qdisc_priv(sch);
 	struct sk_buff *skb;
-	unsigned activemask;
+	unsigned int activemask;
 
-	activemask = q->activemask&0xFF;
+	activemask = q->activemask & 0xFF;
 	while (activemask) {
 		int prio = ffz(~activemask);
 		activemask &= ~(1<<prio);
@@ -949,11 +951,11 @@ cbq_dequeue(struct Qdisc *sch)
 	if (q->tx_class) {
 		psched_tdiff_t incr2;
 		/* Time integrator. We calculate EOS time
-		   by adding expected packet transmission time.
-		   If real time is greater, we warp artificial clock,
-		   so that:
-
-		   cbq_time = max(real_time, work);
+		 * by adding expected packet transmission time.
+		 * If real time is greater, we warp artificial clock,
+		 * so that:
+		 *
+		 * cbq_time = max(real_time, work);
 		 */
 		incr2 = L2T(&q->link, q->tx_len);
 		q->now += incr2;
@@ -971,27 +973,27 @@ cbq_dequeue(struct Qdisc *sch)
 		if (skb) {
 			qdisc_bstats_update(sch, skb);
 			sch->q.qlen--;
-			sch->flags &= ~TCQ_F_THROTTLED;
+			qdisc_unthrottled(sch);
 			return skb;
 		}
 
 		/* All the classes are overlimit.
-
-		   It is possible, if:
-
-		   1. Scheduler is empty.
-		   2. Toplevel cutoff inhibited borrowing.
-		   3. Root class is overlimit.
-
-		   Reset 2d and 3d conditions and retry.
-
-		   Note, that NS and cbq-2.0 are buggy, peeking
-		   an arbitrary class is appropriate for ancestor-only
-		   sharing, but not for toplevel algorithm.
-
-		   Our version is better, but slower, because it requires
-		   two passes, but it is unavoidable with top-level sharing.
-		*/
+		 *
+		 * It is possible, if:
+		 *
+		 * 1. Scheduler is empty.
+		 * 2. Toplevel cutoff inhibited borrowing.
+		 * 3. Root class is overlimit.
+		 *
+		 * Reset 2d and 3d conditions and retry.
+		 *
+		 * Note, that NS and cbq-2.0 are buggy, peeking
+		 * an arbitrary class is appropriate for ancestor-only
+		 * sharing, but not for toplevel algorithm.
+		 *
+		 * Our version is better, but slower, because it requires
+		 * two passes, but it is unavoidable with top-level sharing.
+		 */
 
 		if (q->toplevel == TC_CBQ_MAXLEVEL &&
 		    q->link.undertime == PSCHED_PASTPERFECT)
@@ -1002,7 +1004,8 @@ cbq_dequeue(struct Qdisc *sch)
 	}
 
 	/* No packets in scheduler or nobody wants to give them to us :-(
-	   Sigh... start watchdog timer in the last case. */
+	 * Sigh... start watchdog timer in the last case.
+	 */
 
 	if (sch->q.qlen) {
 		sch->qstats.overlimits++;
@@ -1024,13 +1027,14 @@ static void cbq_adjust_levels(struct cbq_class *this)
 		int level = 0;
 		struct cbq_class *cl;
 
-		if ((cl = this->children) != NULL) {
+		cl = this->children;
+		if (cl) {
 			do {
 				if (cl->level > level)
 					level = cl->level;
 			} while ((cl = cl->sibling) != this->children);
 		}
-		this->level = level+1;
+		this->level = level + 1;
 	} while ((this = this->tparent) != NULL);
 }
 
@@ -1046,14 +1050,15 @@ static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
 	for (h = 0; h < q->clhash.hashsize; h++) {
 		hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
 			/* BUGGGG... Beware! This expression suffer of
-			   arithmetic overflows!
+			 * arithmetic overflows!
 			 */
 			if (cl->priority == prio) {
 				cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
 					q->quanta[prio];
 			}
 			if (cl->quantum <= 0 || cl->quantum>32*qdisc_dev(cl->qdisc)->mtu) {
-				printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->common.classid, cl->quantum);
+				pr_warning("CBQ: class %08x has bad quantum==%ld, repaired.\n",
+					   cl->common.classid, cl->quantum);
 				cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1;
 			}
 		}
@@ -1064,18 +1069,18 @@ static void cbq_sync_defmap(struct cbq_class *cl)
 {
 	struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 	struct cbq_class *split = cl->split;
-	unsigned h;
+	unsigned int h;
 	int i;
 
 	if (split == NULL)
 		return;
 
-	for (i=0; i<=TC_PRIO_MAX; i++) {
-		if (split->defaults[i] == cl && !(cl->defmap&(1<<i)))
+	for (i = 0; i <= TC_PRIO_MAX; i++) {
+		if (split->defaults[i] == cl && !(cl->defmap & (1<<i)))
 			split->defaults[i] = NULL;
 	}
 
-	for (i=0; i<=TC_PRIO_MAX; i++) {
+	for (i = 0; i <= TC_PRIO_MAX; i++) {
 		int level = split->level;
 
 		if (split->defaults[i])
@@ -1088,7 +1093,7 @@ static void cbq_sync_defmap(struct cbq_class *cl)
 			hlist_for_each_entry(c, n, &q->clhash.hash[h],
 					     common.hnode) {
 				if (c->split == split && c->level < level &&
-				    c->defmap&(1<<i)) {
+				    c->defmap & (1<<i)) {
 					split->defaults[i] = c;
 					level = c->level;
 				}
@@ -1102,7 +1107,8 @@ static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 ma
 	struct cbq_class *split = NULL;
 
 	if (splitid == 0) {
-		if ((split = cl->split) == NULL)
+		split = cl->split;
+		if (!split)
 			return;
 		splitid = split->common.classid;
 	}
@@ -1120,9 +1126,9 @@ static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 ma
 		cl->defmap = 0;
 		cbq_sync_defmap(cl);
 		cl->split = split;
-		cl->defmap = def&mask;
+		cl->defmap = def & mask;
 	} else
-		cl->defmap = (cl->defmap&~mask)|(def&mask);
+		cl->defmap = (cl->defmap & ~mask) | (def & mask);
 
 	cbq_sync_defmap(cl);
 }
@@ -1135,7 +1141,7 @@ static void cbq_unlink_class(struct cbq_class *this)
 	qdisc_class_hash_remove(&q->clhash, &this->common);
 
 	if (this->tparent) {
-		clp=&this->sibling;
+		clp = &this->sibling;
 		cl = *clp;
 		do {
 			if (cl == this) {
@@ -1174,7 +1180,7 @@ static void cbq_link_class(struct cbq_class *this)
 	}
 }
 
-static unsigned int cbq_drop(struct Qdisc* sch)
+static unsigned int cbq_drop(struct Qdisc *sch)
 {
 	struct cbq_sched_data *q = qdisc_priv(sch);
 	struct cbq_class *cl, *cl_head;
@@ -1182,7 +1188,8 @@ static unsigned int cbq_drop(struct Qdisc* sch)
 	unsigned int len;
 
 	for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
-		if ((cl_head = q->active[prio]) == NULL)
+		cl_head = q->active[prio];
+		if (!cl_head)
 			continue;
 
 		cl = cl_head;
@@ -1199,13 +1206,13 @@ static unsigned int cbq_drop(struct Qdisc* sch)
 }
 
 static void
-cbq_reset(struct Qdisc* sch)
+cbq_reset(struct Qdisc *sch)
 {
 	struct cbq_sched_data *q = qdisc_priv(sch);
 	struct cbq_class *cl;
 	struct hlist_node *n;
 	int prio;
-	unsigned h;
+	unsigned int h;
 
 	q->activemask = 0;
 	q->pmask = 0;
@@ -1237,21 +1244,21 @@ cbq_reset(struct Qdisc* sch)
 
 static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
 {
-	if (lss->change&TCF_CBQ_LSS_FLAGS) {
-		cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
-		cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
+	if (lss->change & TCF_CBQ_LSS_FLAGS) {
+		cl->share = (lss->flags & TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
+		cl->borrow = (lss->flags & TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
 	}
-	if (lss->change&TCF_CBQ_LSS_EWMA)
+	if (lss->change & TCF_CBQ_LSS_EWMA)
 		cl->ewma_log = lss->ewma_log;
-	if (lss->change&TCF_CBQ_LSS_AVPKT)
+	if (lss->change & TCF_CBQ_LSS_AVPKT)
 		cl->avpkt = lss->avpkt;
-	if (lss->change&TCF_CBQ_LSS_MINIDLE)
+	if (lss->change & TCF_CBQ_LSS_MINIDLE)
 		cl->minidle = -(long)lss->minidle;
-	if (lss->change&TCF_CBQ_LSS_MAXIDLE) {
+	if (lss->change & TCF_CBQ_LSS_MAXIDLE) {
 		cl->maxidle = lss->maxidle;
 		cl->avgidle = lss->maxidle;
 	}
-	if (lss->change&TCF_CBQ_LSS_OFFTIME)
+	if (lss->change & TCF_CBQ_LSS_OFFTIME)
 		cl->offtime = lss->offtime;
 	return 0;
 }
@@ -1279,10 +1286,10 @@ static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
 	if (wrr->weight)
 		cl->weight = wrr->weight;
 	if (wrr->priority) {
-		cl->priority = wrr->priority-1;
+		cl->priority = wrr->priority - 1;
 		cl->cpriority = cl->priority;
 		if (cl->priority >= cl->priority2)
-			cl->priority2 = TC_CBQ_MAXPRIO-1;
+			cl->priority2 = TC_CBQ_MAXPRIO - 1;
 	}
 
 	cbq_addprio(q, cl);
@@ -1299,10 +1306,10 @@ static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
 		cl->overlimit = cbq_ovl_delay;
 		break;
 	case TC_CBQ_OVL_LOWPRIO:
-		if (ovl->priority2-1 >= TC_CBQ_MAXPRIO ||
-		    ovl->priority2-1 <= cl->priority)
+		if (ovl->priority2 - 1 >= TC_CBQ_MAXPRIO ||
+		    ovl->priority2 - 1 <= cl->priority)
 			return -EINVAL;
-		cl->priority2 = ovl->priority2-1;
+		cl->priority2 = ovl->priority2 - 1;
 		cl->overlimit = cbq_ovl_lowprio;
 		break;
 	case TC_CBQ_OVL_DROP:
@@ -1381,9 +1388,9 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
 	if (!q->link.q)
 		q->link.q = &noop_qdisc;
 
-	q->link.priority = TC_CBQ_MAXPRIO-1;
-	q->link.priority2 = TC_CBQ_MAXPRIO-1;
-	q->link.cpriority = TC_CBQ_MAXPRIO-1;
+	q->link.priority = TC_CBQ_MAXPRIO - 1;
+	q->link.priority2 = TC_CBQ_MAXPRIO - 1;
+	q->link.cpriority = TC_CBQ_MAXPRIO - 1;
 	q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
 	q->link.overlimit = cbq_ovl_classic;
 	q->link.allot = psched_mtu(qdisc_dev(sch));
@@ -1414,7 +1421,7 @@ put_rtab:
 	return err;
 }
 
-static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
+static int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
 {
 	unsigned char *b = skb_tail_pointer(skb);
 
@@ -1426,7 +1433,7 @@ nla_put_failure:
 	return -1;
 }
 
-static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
+static int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
 {
 	unsigned char *b = skb_tail_pointer(skb);
 	struct tc_cbq_lssopt opt;
@@ -1451,15 +1458,15 @@ nla_put_failure:
 	return -1;
 }
 
-static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
+static int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
 {
 	unsigned char *b = skb_tail_pointer(skb);
 	struct tc_cbq_wrropt opt;
 
 	opt.flags = 0;
 	opt.allot = cl->allot;
-	opt.priority = cl->priority+1;
-	opt.cpriority = cl->cpriority+1;
+	opt.priority = cl->priority + 1;
+	opt.cpriority = cl->cpriority + 1;
 	opt.weight = cl->weight;
 	NLA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
 	return skb->len;
@@ -1469,13 +1476,13 @@ nla_put_failure:
 	return -1;
 }
 
-static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
+static int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
 {
 	unsigned char *b = skb_tail_pointer(skb);
 	struct tc_cbq_ovl opt;
 
 	opt.strategy = cl->ovl_strategy;
-	opt.priority2 = cl->priority2+1;
+	opt.priority2 = cl->priority2 + 1;
 	opt.pad = 0;
 	opt.penalty = cl->penalty;
 	NLA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
@@ -1486,7 +1493,7 @@ nla_put_failure:
 	return -1;
 }
 
-static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
+static int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
 {
 	unsigned char *b = skb_tail_pointer(skb);
 	struct tc_cbq_fopt opt;
@@ -1505,7 +1512,7 @@ nla_put_failure:
 }
 
 #ifdef CONFIG_NET_CLS_ACT
-static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
+static int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
 {
 	unsigned char *b = skb_tail_pointer(skb);
 	struct tc_cbq_police opt;
@@ -1569,7 +1576,7 @@ static int
 cbq_dump_class(struct Qdisc *sch, unsigned long arg,
 	       struct sk_buff *skb, struct tcmsg *tcm)
 {
-	struct cbq_class *cl = (struct cbq_class*)arg;
+	struct cbq_class *cl = (struct cbq_class *)arg;
 	struct nlattr *nest;
 
 	if (cl->tparent)
@@ -1597,7 +1604,7 @@ cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
 	struct gnet_dump *d)
 {
 	struct cbq_sched_data *q = qdisc_priv(sch);
-	struct cbq_class *cl = (struct cbq_class*)arg;
+	struct cbq_class *cl = (struct cbq_class *)arg;
 
 	cl->qstats.qlen = cl->q->q.qlen;
 	cl->xstats.avgidle = cl->avgidle;
@@ -1617,7 +1624,7 @@ cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
 static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
 		     struct Qdisc **old)
 {
-	struct cbq_class *cl = (struct cbq_class*)arg;
+	struct cbq_class *cl = (struct cbq_class *)arg;
 
 	if (new == NULL) {
 		new = qdisc_create_dflt(sch->dev_queue,
@@ -1640,10 +1647,9 @@ static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
 	return 0;
 }
 
-static struct Qdisc *
-cbq_leaf(struct Qdisc *sch, unsigned long arg)
+static struct Qdisc *cbq_leaf(struct Qdisc *sch, unsigned long arg)
 {
-	struct cbq_class *cl = (struct cbq_class*)arg;
+	struct cbq_class *cl = (struct cbq_class *)arg;
 
 	return cl->q;
 }
@@ -1682,13 +1688,12 @@ static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
 		kfree(cl);
 }
 
-static void
-cbq_destroy(struct Qdisc* sch)
+static void cbq_destroy(struct Qdisc *sch)
 {
 	struct cbq_sched_data *q = qdisc_priv(sch);
 	struct hlist_node *n, *next;
 	struct cbq_class *cl;
-	unsigned h;
+	unsigned int h;
 
 #ifdef CONFIG_NET_CLS_ACT
 	q->rx_class = NULL;
@@ -1712,7 +1717,7 @@ cbq_destroy(struct Qdisc* sch)
 
 static void cbq_put(struct Qdisc *sch, unsigned long arg)
 {
-	struct cbq_class *cl = (struct cbq_class*)arg;
+	struct cbq_class *cl = (struct cbq_class *)arg;
 
 	if (--cl->refcnt == 0) {
 #ifdef CONFIG_NET_CLS_ACT
@@ -1735,7 +1740,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
 {
 	int err;
 	struct cbq_sched_data *q = qdisc_priv(sch);
-	struct cbq_class *cl = (struct cbq_class*)*arg;
+	struct cbq_class *cl = (struct cbq_class *)*arg;
 	struct nlattr *opt = tca[TCA_OPTIONS];
 	struct nlattr *tb[TCA_CBQ_MAX + 1];
 	struct cbq_class *parent;
@@ -1827,13 +1832,14 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
 
 	if (classid) {
 		err = -EINVAL;
-		if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid))
+		if (TC_H_MAJ(classid ^ sch->handle) ||
+		    cbq_class_lookup(q, classid))
 			goto failure;
 	} else {
 		int i;
-		classid = TC_H_MAKE(sch->handle,0x8000);
+		classid = TC_H_MAKE(sch->handle, 0x8000);
 
-		for (i=0; i<0x8000; i++) {
+		for (i = 0; i < 0x8000; i++) {
 			if (++q->hgenerator >= 0x8000)
 				q->hgenerator = 1;
 			if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
@@ -1890,11 +1896,11 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
 	cl->minidle = -0x7FFFFFFF;
 	cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
 	cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
-	if (cl->ewma_log==0)
+	if (cl->ewma_log == 0)
 		cl->ewma_log = q->link.ewma_log;
-	if (cl->maxidle==0)
+	if (cl->maxidle == 0)
 		cl->maxidle = q->link.maxidle;
-	if (cl->avpkt==0)
+	if (cl->avpkt == 0)
 		cl->avpkt = q->link.avpkt;
 	cl->overlimit = cbq_ovl_classic;
 	if (tb[TCA_CBQ_OVL_STRATEGY])
@@ -1920,7 +1926,7 @@ failure:
 static int cbq_delete(struct Qdisc *sch, unsigned long arg)
 {
 	struct cbq_sched_data *q = qdisc_priv(sch);
-	struct cbq_class *cl = (struct cbq_class*)arg;
+	struct cbq_class *cl = (struct cbq_class *)arg;
 	unsigned int qlen;
 
 	if (cl->filters || cl->children || cl == &q->link)
@@ -1978,7 +1984,7 @@ static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
 				     u32 classid)
 {
 	struct cbq_sched_data *q = qdisc_priv(sch);
-	struct cbq_class *p = (struct cbq_class*)parent;
+	struct cbq_class *p = (struct cbq_class *)parent;
 	struct cbq_class *cl = cbq_class_lookup(q, classid);
 
 	if (cl) {
@@ -1992,7 +1998,7 @@ static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
 
 static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
 {
-	struct cbq_class *cl = (struct cbq_class*)arg;
+	struct cbq_class *cl = (struct cbq_class *)arg;
 
 	cl->filters--;
 }
@@ -2002,7 +2008,7 @@ static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
 	struct cbq_sched_data *q = qdisc_priv(sch);
 	struct cbq_class *cl;
 	struct hlist_node *n;
-	unsigned h;
+	unsigned int h;
 
 	if (arg->stop)
 		return;
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
new file mode 100644
index 0000000..06afbae
--- /dev/null
+++ b/net/sched/sch_choke.c
@@ -0,0 +1,688 @@
+/*
+ * net/sched/sch_choke.c	CHOKE scheduler
+ *
+ * Copyright (c) 2011 Stephen Hemminger <shemminger@vyatta.com>
+ * Copyright (c) 2011 Eric Dumazet <eric.dumazet@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/reciprocal_div.h>
+#include <linux/vmalloc.h>
+#include <net/pkt_sched.h>
+#include <net/inet_ecn.h>
+#include <net/red.h>
+#include <linux/ip.h>
+#include <net/ip.h>
+#include <linux/ipv6.h>
+#include <net/ipv6.h>
+
+/*
+   CHOKe stateless AQM for fair bandwidth allocation
+   =================================================
+
+   CHOKe (CHOose and Keep for responsive flows, CHOose and Kill for
+   unresponsive flows) is a variant of RED that penalizes misbehaving flows but
+   maintains no flow state. The difference from RED is an additional step
+   during the enqueuing process. If average queue size is over the
+   low threshold (qmin), a packet is chosen at random from the queue.
+   If both the new and chosen packet are from the same flow, both
+   are dropped. Unlike RED, CHOKe is not really a "classful" qdisc because it
+   needs to access packets in queue randomly. It has a minimal class
+   interface to allow overriding the builtin flow classifier with
+   filters.
+
+   Source:
+   R. Pan, B. Prabhakar, and K. Psounis, "CHOKe, A Stateless
+   Active Queue Management Scheme for Approximating Fair Bandwidth Allocation",
+   IEEE INFOCOM, 2000.
+
+   A. Tang, J. Wang, S. Low, "Understanding CHOKe: Throughput and Spatial
+   Characteristics", IEEE/ACM Transactions on Networking, 2004
+
+ */
+
+/* Upper bound on size of sk_buff table (packets) */
+#define CHOKE_MAX_QUEUE	(128*1024 - 1)
+
+struct choke_sched_data {
+/* Parameters */
+	u32		 limit;
+	unsigned char	 flags;
+
+	struct red_parms parms;
+
+/* Variables */
+	struct tcf_proto *filter_list;
+	struct {
+		u32	prob_drop;	/* Early probability drops */
+		u32	prob_mark;	/* Early probability marks */
+		u32	forced_drop;	/* Forced drops, qavg > max_thresh */
+		u32	forced_mark;	/* Forced marks, qavg > max_thresh */
+		u32	pdrop;          /* Drops due to queue limits */
+		u32	other;          /* Drops due to drop() calls */
+		u32	matched;	/* Drops to flow match */
+	} stats;
+
+	unsigned int	 head;
+	unsigned int	 tail;
+
+	unsigned int	 tab_mask; /* size - 1 */
+
+	struct sk_buff **tab;
+};
+
+/* deliver a random number between 0 and N - 1 */
+static u32 random_N(unsigned int N)
+{
+	return reciprocal_divide(random32(), N);
+}
+
+/* number of elements in queue including holes */
+static unsigned int choke_len(const struct choke_sched_data *q)
+{
+	return (q->tail - q->head) & q->tab_mask;
+}
+
+/* Is ECN parameter configured */
+static int use_ecn(const struct choke_sched_data *q)
+{
+	return q->flags & TC_RED_ECN;
+}
+
+/* Should packets over max just be dropped (versus marked) */
+static int use_harddrop(const struct choke_sched_data *q)
+{
+	return q->flags & TC_RED_HARDDROP;
+}
+
+/* Move head pointer forward to skip over holes */
+static void choke_zap_head_holes(struct choke_sched_data *q)
+{
+	do {
+		q->head = (q->head + 1) & q->tab_mask;
+		if (q->head == q->tail)
+			break;
+	} while (q->tab[q->head] == NULL);
+}
+
+/* Move tail pointer backwards to reuse holes */
+static void choke_zap_tail_holes(struct choke_sched_data *q)
+{
+	do {
+		q->tail = (q->tail - 1) & q->tab_mask;
+		if (q->head == q->tail)
+			break;
+	} while (q->tab[q->tail] == NULL);
+}
+
+/* Drop packet from queue array by creating a "hole" */
+static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx)
+{
+	struct choke_sched_data *q = qdisc_priv(sch);
+	struct sk_buff *skb = q->tab[idx];
+
+	q->tab[idx] = NULL;
+
+	if (idx == q->head)
+		choke_zap_head_holes(q);
+	if (idx == q->tail)
+		choke_zap_tail_holes(q);
+
+	sch->qstats.backlog -= qdisc_pkt_len(skb);
+	qdisc_drop(skb, sch);
+	qdisc_tree_decrease_qlen(sch, 1);
+	--sch->q.qlen;
+}
+
+/*
+ * Compare flow of two packets
+ *  Returns true only if source and destination address and port match.
+ *          false for special cases
+ */
+static bool choke_match_flow(struct sk_buff *skb1,
+			     struct sk_buff *skb2)
+{
+	int off1, off2, poff;
+	const u32 *ports1, *ports2;
+	u8 ip_proto;
+	__u32 hash1;
+
+	if (skb1->protocol != skb2->protocol)
+		return false;
+
+	/* Use hash value as quick check
+	 * Assumes that __skb_get_rxhash makes IP header and ports linear
+	 */
+	hash1 = skb_get_rxhash(skb1);
+	if (!hash1 || hash1 != skb_get_rxhash(skb2))
+		return false;
+
+	/* Probably match, but be sure to avoid hash collisions */
+	off1 = skb_network_offset(skb1);
+	off2 = skb_network_offset(skb2);
+
+	switch (skb1->protocol) {
+	case __constant_htons(ETH_P_IP): {
+		const struct iphdr *ip1, *ip2;
+
+		ip1 = (const struct iphdr *) (skb1->data + off1);
+		ip2 = (const struct iphdr *) (skb2->data + off2);
+
+		ip_proto = ip1->protocol;
+		if (ip_proto != ip2->protocol ||
+		    ip1->saddr != ip2->saddr || ip1->daddr != ip2->daddr)
+			return false;
+
+		if ((ip1->frag_off | ip2->frag_off) & htons(IP_MF | IP_OFFSET))
+			ip_proto = 0;
+		off1 += ip1->ihl * 4;
+		off2 += ip2->ihl * 4;
+		break;
+	}
+
+	case __constant_htons(ETH_P_IPV6): {
+		const struct ipv6hdr *ip1, *ip2;
+
+		ip1 = (const struct ipv6hdr *) (skb1->data + off1);
+		ip2 = (const struct ipv6hdr *) (skb2->data + off2);
+
+		ip_proto = ip1->nexthdr;
+		if (ip_proto != ip2->nexthdr ||
+		    ipv6_addr_cmp(&ip1->saddr, &ip2->saddr) ||
+		    ipv6_addr_cmp(&ip1->daddr, &ip2->daddr))
+			return false;
+		off1 += 40;
+		off2 += 40;
+	}
+
+	default: /* Maybe compare MAC header here? */
+		return false;
+	}
+
+	poff = proto_ports_offset(ip_proto);
+	if (poff < 0)
+		return true;
+
+	off1 += poff;
+	off2 += poff;
+
+	ports1 = (__force u32 *)(skb1->data + off1);
+	ports2 = (__force u32 *)(skb2->data + off2);
+	return *ports1 == *ports2;
+}
+
+struct choke_skb_cb {
+	u16 classid;
+};
+
+static inline struct choke_skb_cb *choke_skb_cb(const struct sk_buff *skb)
+{
+	BUILD_BUG_ON(sizeof(skb->cb) <
+		sizeof(struct qdisc_skb_cb) + sizeof(struct choke_skb_cb));
+	return (struct choke_skb_cb *)qdisc_skb_cb(skb)->data;
+}
+
+static inline void choke_set_classid(struct sk_buff *skb, u16 classid)
+{
+	choke_skb_cb(skb)->classid = classid;
+}
+
+static u16 choke_get_classid(const struct sk_buff *skb)
+{
+	return choke_skb_cb(skb)->classid;
+}
+
+/*
+ * Classify flow using either:
+ *  1. pre-existing classification result in skb
+ *  2. fast internal classification
+ *  3. use TC filter based classification
+ */
+static bool choke_classify(struct sk_buff *skb,
+			   struct Qdisc *sch, int *qerr)
+
+{
+	struct choke_sched_data *q = qdisc_priv(sch);
+	struct tcf_result res;
+	int result;
+
+	result = tc_classify(skb, q->filter_list, &res);
+	if (result >= 0) {
+#ifdef CONFIG_NET_CLS_ACT
+		switch (result) {
+		case TC_ACT_STOLEN:
+		case TC_ACT_QUEUED:
+			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
+		case TC_ACT_SHOT:
+			return false;
+		}
+#endif
+		choke_set_classid(skb, TC_H_MIN(res.classid));
+		return true;
+	}
+
+	return false;
+}
+
+/*
+ * Select a packet at random from queue
+ * HACK: since queue can have holes from previous deletion; retry several
+ *   times to find a random skb but then just give up and return the head
+ * Will return NULL if queue is empty (q->head == q->tail)
+ */
+static struct sk_buff *choke_peek_random(const struct choke_sched_data *q,
+					 unsigned int *pidx)
+{
+	struct sk_buff *skb;
+	int retrys = 3;
+
+	do {
+		*pidx = (q->head + random_N(choke_len(q))) & q->tab_mask;
+		skb = q->tab[*pidx];
+		if (skb)
+			return skb;
+	} while (--retrys > 0);
+
+	return q->tab[*pidx = q->head];
+}
+
+/*
+ * Compare new packet with random packet in queue
+ * returns true if matched and sets *pidx
+ */
+static bool choke_match_random(const struct choke_sched_data *q,
+			       struct sk_buff *nskb,
+			       unsigned int *pidx)
+{
+	struct sk_buff *oskb;
+
+	if (q->head == q->tail)
+		return false;
+
+	oskb = choke_peek_random(q, pidx);
+	if (q->filter_list)
+		return choke_get_classid(nskb) == choke_get_classid(oskb);
+
+	return choke_match_flow(oskb, nskb);
+}
+
+static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+{
+	struct choke_sched_data *q = qdisc_priv(sch);
+	struct red_parms *p = &q->parms;
+	int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
+
+	if (q->filter_list) {
+		/* If using external classifiers, get result and record it. */
+		if (!choke_classify(skb, sch, &ret))
+			goto other_drop;	/* Packet was eaten by filter */
+	}
+
+	/* Compute average queue usage (see RED) */
+	p->qavg = red_calc_qavg(p, sch->q.qlen);
+	if (red_is_idling(p))
+		red_end_of_idle_period(p);
+
+	/* Is queue small? */
+	if (p->qavg <= p->qth_min)
+		p->qcount = -1;
+	else {
+		unsigned int idx;
+
+		/* Draw a packet at random from queue and compare flow */
+		if (choke_match_random(q, skb, &idx)) {
+			q->stats.matched++;
+			choke_drop_by_idx(sch, idx);
+			goto congestion_drop;
+		}
+
+		/* Queue is large, always mark/drop */
+		if (p->qavg > p->qth_max) {
+			p->qcount = -1;
+
+			sch->qstats.overlimits++;
+			if (use_harddrop(q) || !use_ecn(q) ||
+			    !INET_ECN_set_ce(skb)) {
+				q->stats.forced_drop++;
+				goto congestion_drop;
+			}
+
+			q->stats.forced_mark++;
+		} else if (++p->qcount) {
+			if (red_mark_probability(p, p->qavg)) {
+				p->qcount = 0;
+				p->qR = red_random(p);
+
+				sch->qstats.overlimits++;
+				if (!use_ecn(q) || !INET_ECN_set_ce(skb)) {
+					q->stats.prob_drop++;
+					goto congestion_drop;
+				}
+
+				q->stats.prob_mark++;
+			}
+		} else
+			p->qR = red_random(p);
+	}
+
+	/* Admit new packet */
+	if (sch->q.qlen < q->limit) {
+		q->tab[q->tail] = skb;
+		q->tail = (q->tail + 1) & q->tab_mask;
+		++sch->q.qlen;
+		sch->qstats.backlog += qdisc_pkt_len(skb);
+		return NET_XMIT_SUCCESS;
+	}
+
+	q->stats.pdrop++;
+	sch->qstats.drops++;
+	kfree_skb(skb);
+	return NET_XMIT_DROP;
+
+ congestion_drop:
+	qdisc_drop(skb, sch);
+	return NET_XMIT_CN;
+
+ other_drop:
+	if (ret & __NET_XMIT_BYPASS)
+		sch->qstats.drops++;
+	kfree_skb(skb);
+	return ret;
+}
+
+static struct sk_buff *choke_dequeue(struct Qdisc *sch)
+{
+	struct choke_sched_data *q = qdisc_priv(sch);
+	struct sk_buff *skb;
+
+	if (q->head == q->tail) {
+		if (!red_is_idling(&q->parms))
+			red_start_of_idle_period(&q->parms);
+		return NULL;
+	}
+
+	skb = q->tab[q->head];
+	q->tab[q->head] = NULL;
+	choke_zap_head_holes(q);
+	--sch->q.qlen;
+	sch->qstats.backlog -= qdisc_pkt_len(skb);
+	qdisc_bstats_update(sch, skb);
+
+	return skb;
+}
+
+static unsigned int choke_drop(struct Qdisc *sch)
+{
+	struct choke_sched_data *q = qdisc_priv(sch);
+	unsigned int len;
+
+	len = qdisc_queue_drop(sch);
+	if (len > 0)
+		q->stats.other++;
+	else {
+		if (!red_is_idling(&q->parms))
+			red_start_of_idle_period(&q->parms);
+	}
+
+	return len;
+}
+
+static void choke_reset(struct Qdisc *sch)
+{
+	struct choke_sched_data *q = qdisc_priv(sch);
+
+	red_restart(&q->parms);
+}
+
+static const struct nla_policy choke_policy[TCA_CHOKE_MAX + 1] = {
+	[TCA_CHOKE_PARMS]	= { .len = sizeof(struct tc_red_qopt) },
+	[TCA_CHOKE_STAB]	= { .len = RED_STAB_SIZE },
+};
+
+
+static void choke_free(void *addr)
+{
+	if (addr) {
+		if (is_vmalloc_addr(addr))
+			vfree(addr);
+		else
+			kfree(addr);
+	}
+}
+
+static int choke_change(struct Qdisc *sch, struct nlattr *opt)
+{
+	struct choke_sched_data *q = qdisc_priv(sch);
+	struct nlattr *tb[TCA_CHOKE_MAX + 1];
+	const struct tc_red_qopt *ctl;
+	int err;
+	struct sk_buff **old = NULL;
+	unsigned int mask;
+
+	if (opt == NULL)
+		return -EINVAL;
+
+	err = nla_parse_nested(tb, TCA_CHOKE_MAX, opt, choke_policy);
+	if (err < 0)
+		return err;
+
+	if (tb[TCA_CHOKE_PARMS] == NULL ||
+	    tb[TCA_CHOKE_STAB] == NULL)
+		return -EINVAL;
+
+	ctl = nla_data(tb[TCA_CHOKE_PARMS]);
+
+	if (ctl->limit > CHOKE_MAX_QUEUE)
+		return -EINVAL;
+
+	mask = roundup_pow_of_two(ctl->limit + 1) - 1;
+	if (mask != q->tab_mask) {
+		struct sk_buff **ntab;
+
+		ntab = kcalloc(mask + 1, sizeof(struct sk_buff *), GFP_KERNEL);
+		if (!ntab)
+			ntab = vzalloc((mask + 1) * sizeof(struct sk_buff *));
+		if (!ntab)
+			return -ENOMEM;
+
+		sch_tree_lock(sch);
+		old = q->tab;
+		if (old) {
+			unsigned int oqlen = sch->q.qlen, tail = 0;
+
+			while (q->head != q->tail) {
+				struct sk_buff *skb = q->tab[q->head];
+
+				q->head = (q->head + 1) & q->tab_mask;
+				if (!skb)
+					continue;
+				if (tail < mask) {
+					ntab[tail++] = skb;
+					continue;
+				}
+				sch->qstats.backlog -= qdisc_pkt_len(skb);
+				--sch->q.qlen;
+				qdisc_drop(skb, sch);
+			}
+			qdisc_tree_decrease_qlen(sch, oqlen - sch->q.qlen);
+			q->head = 0;
+			q->tail = tail;
+		}
+
+		q->tab_mask = mask;
+		q->tab = ntab;
+	} else
+		sch_tree_lock(sch);
+
+	q->flags = ctl->flags;
+	q->limit = ctl->limit;
+
+	red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog,
+		      ctl->Plog, ctl->Scell_log,
+		      nla_data(tb[TCA_CHOKE_STAB]));
+
+	if (q->head == q->tail)
+		red_end_of_idle_period(&q->parms);
+
+	sch_tree_unlock(sch);
+	choke_free(old);
+	return 0;
+}
+
+static int choke_init(struct Qdisc *sch, struct nlattr *opt)
+{
+	return choke_change(sch, opt);
+}
+
+static int choke_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+	struct choke_sched_data *q = qdisc_priv(sch);
+	struct nlattr *opts = NULL;
+	struct tc_red_qopt opt = {
+		.limit		= q->limit,
+		.flags		= q->flags,
+		.qth_min	= q->parms.qth_min >> q->parms.Wlog,
+		.qth_max	= q->parms.qth_max >> q->parms.Wlog,
+		.Wlog		= q->parms.Wlog,
+		.Plog		= q->parms.Plog,
+		.Scell_log	= q->parms.Scell_log,
+	};
+
+	opts = nla_nest_start(skb, TCA_OPTIONS);
+	if (opts == NULL)
+		goto nla_put_failure;
+
+	NLA_PUT(skb, TCA_CHOKE_PARMS, sizeof(opt), &opt);
+	return nla_nest_end(skb, opts);
+
+nla_put_failure:
+	nla_nest_cancel(skb, opts);
+	return -EMSGSIZE;
+}
+
+static int choke_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
+{
+	struct choke_sched_data *q = qdisc_priv(sch);
+	struct tc_choke_xstats st = {
+		.early	= q->stats.prob_drop + q->stats.forced_drop,
+		.marked	= q->stats.prob_mark + q->stats.forced_mark,
+		.pdrop	= q->stats.pdrop,
+		.other	= q->stats.other,
+		.matched = q->stats.matched,
+	};
+
+	return gnet_stats_copy_app(d, &st, sizeof(st));
+}
+
+static void choke_destroy(struct Qdisc *sch)
+{
+	struct choke_sched_data *q = qdisc_priv(sch);
+
+	tcf_destroy_chain(&q->filter_list);
+	choke_free(q->tab);
+}
+
+static struct Qdisc *choke_leaf(struct Qdisc *sch, unsigned long arg)
+{
+	return NULL;
+}
+
+static unsigned long choke_get(struct Qdisc *sch, u32 classid)
+{
+	return 0;
+}
+
+static void choke_put(struct Qdisc *q, unsigned long cl)
+{
+}
+
+static unsigned long choke_bind(struct Qdisc *sch, unsigned long parent,
+				u32 classid)
+{
+	return 0;
+}
+
+static struct tcf_proto **choke_find_tcf(struct Qdisc *sch, unsigned long cl)
+{
+	struct choke_sched_data *q = qdisc_priv(sch);
+
+	if (cl)
+		return NULL;
+	return &q->filter_list;
+}
+
+static int choke_dump_class(struct Qdisc *sch, unsigned long cl,
+			  struct sk_buff *skb, struct tcmsg *tcm)
+{
+	tcm->tcm_handle |= TC_H_MIN(cl);
+	return 0;
+}
+
+static void choke_walk(struct Qdisc *sch, struct qdisc_walker *arg)
+{
+	if (!arg->stop) {
+		if (arg->fn(sch, 1, arg) < 0) {
+			arg->stop = 1;
+			return;
+		}
+		arg->count++;
+	}
+}
+
+static const struct Qdisc_class_ops choke_class_ops = {
+	.leaf		=	choke_leaf,
+	.get		=	choke_get,
+	.put		=	choke_put,
+	.tcf_chain	=	choke_find_tcf,
+	.bind_tcf	=	choke_bind,
+	.unbind_tcf	=	choke_put,
+	.dump		=	choke_dump_class,
+	.walk		=	choke_walk,
+};
+
+static struct sk_buff *choke_peek_head(struct Qdisc *sch)
+{
+	struct choke_sched_data *q = qdisc_priv(sch);
+
+	return (q->head != q->tail) ? q->tab[q->head] : NULL;
+}
+
+static struct Qdisc_ops choke_qdisc_ops __read_mostly = {
+	.id		=	"choke",
+	.priv_size	=	sizeof(struct choke_sched_data),
+
+	.enqueue	=	choke_enqueue,
+	.dequeue	=	choke_dequeue,
+	.peek		=	choke_peek_head,
+	.drop		=	choke_drop,
+	.init		=	choke_init,
+	.destroy	=	choke_destroy,
+	.reset		=	choke_reset,
+	.change		=	choke_change,
+	.dump		=	choke_dump,
+	.dump_stats	=	choke_dump_stats,
+	.owner		=	THIS_MODULE,
+};
+
+static int __init choke_module_init(void)
+{
+	return register_qdisc(&choke_qdisc_ops);
+}
+
+static void __exit choke_module_exit(void)
+{
+	unregister_qdisc(&choke_qdisc_ops);
+}
+
+module_init(choke_module_init)
+module_exit(choke_module_exit)
+
+MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 0f7bf3f..2c79020 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -137,10 +137,10 @@ static int dsmark_change(struct Qdisc *sch, u32 classid, u32 parent,
 		mask = nla_get_u8(tb[TCA_DSMARK_MASK]);
 
 	if (tb[TCA_DSMARK_VALUE])
-		p->value[*arg-1] = nla_get_u8(tb[TCA_DSMARK_VALUE]);
+		p->value[*arg - 1] = nla_get_u8(tb[TCA_DSMARK_VALUE]);
 
 	if (tb[TCA_DSMARK_MASK])
-		p->mask[*arg-1] = mask;
+		p->mask[*arg - 1] = mask;
 
 	err = 0;
 
@@ -155,8 +155,8 @@ static int dsmark_delete(struct Qdisc *sch, unsigned long arg)
 	if (!dsmark_valid_index(p, arg))
 		return -EINVAL;
 
-	p->mask[arg-1] = 0xff;
-	p->value[arg-1] = 0;
+	p->mask[arg - 1] = 0xff;
+	p->value[arg - 1] = 0;
 
 	return 0;
 }
@@ -175,7 +175,7 @@ static void dsmark_walk(struct Qdisc *sch, struct qdisc_walker *walker)
 		if (p->mask[i] == 0xff && !p->value[i])
 			goto ignore;
 		if (walker->count >= walker->skip) {
-			if (walker->fn(sch, i+1, walker) < 0) {
+			if (walker->fn(sch, i + 1, walker) < 0) {
 				walker->stop = 1;
 				break;
 			}
@@ -304,9 +304,8 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
 		 * and don't need yet another qdisc as a bypass.
 		 */
 		if (p->mask[index] != 0xff || p->value[index])
-			printk(KERN_WARNING
-			       "dsmark_dequeue: unsupported protocol %d\n",
-			       ntohs(skb->protocol));
+			pr_warning("dsmark_dequeue: unsupported protocol %d\n",
+				   ntohs(skb->protocol));
 		break;
 	}
 
@@ -424,14 +423,14 @@ static int dsmark_dump_class(struct Qdisc *sch, unsigned long cl,
 	if (!dsmark_valid_index(p, cl))
 		return -EINVAL;
 
-	tcm->tcm_handle = TC_H_MAKE(TC_H_MAJ(sch->handle), cl-1);
+	tcm->tcm_handle = TC_H_MAKE(TC_H_MAJ(sch->handle), cl - 1);
 	tcm->tcm_info = p->q->handle;
 
 	opts = nla_nest_start(skb, TCA_OPTIONS);
 	if (opts == NULL)
 		goto nla_put_failure;
-	NLA_PUT_U8(skb, TCA_DSMARK_MASK, p->mask[cl-1]);
-	NLA_PUT_U8(skb, TCA_DSMARK_VALUE, p->value[cl-1]);
+	NLA_PUT_U8(skb, TCA_DSMARK_MASK, p->mask[cl - 1]);
+	NLA_PUT_U8(skb, TCA_DSMARK_VALUE, p->value[cl - 1]);
 
 	return nla_nest_end(skb, opts);
 
diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
index d468b47..66effe2 100644
--- a/net/sched/sch_fifo.c
+++ b/net/sched/sch_fifo.c
@@ -19,36 +19,25 @@
 
 /* 1 band FIFO pseudo-"scheduler" */
 
-struct fifo_sched_data
+static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 {
-	u32 limit;
-};
-
-static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch)
-{
-	struct fifo_sched_data *q = qdisc_priv(sch);
-
-	if (likely(sch->qstats.backlog + qdisc_pkt_len(skb) <= q->limit))
+	if (likely(sch->qstats.backlog + qdisc_pkt_len(skb) <= sch->limit))
 		return qdisc_enqueue_tail(skb, sch);
 
 	return qdisc_reshape_fail(skb, sch);
 }
 
-static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch)
+static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 {
-	struct fifo_sched_data *q = qdisc_priv(sch);
-
-	if (likely(skb_queue_len(&sch->q) < q->limit))
+	if (likely(skb_queue_len(&sch->q) < sch->limit))
 		return qdisc_enqueue_tail(skb, sch);
 
 	return qdisc_reshape_fail(skb, sch);
 }
 
-static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc* sch)
+static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 {
-	struct fifo_sched_data *q = qdisc_priv(sch);
-
-	if (likely(skb_queue_len(&sch->q) < q->limit))
+	if (likely(skb_queue_len(&sch->q) < sch->limit))
 		return qdisc_enqueue_tail(skb, sch);
 
 	/* queue full, remove one skb to fulfill the limit */
@@ -61,31 +50,40 @@ static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc* sch)
 
 static int fifo_init(struct Qdisc *sch, struct nlattr *opt)
 {
-	struct fifo_sched_data *q = qdisc_priv(sch);
+	bool bypass;
+	bool is_bfifo = sch->ops == &bfifo_qdisc_ops;
 
 	if (opt == NULL) {
 		u32 limit = qdisc_dev(sch)->tx_queue_len ? : 1;
 
-		if (sch->ops == &bfifo_qdisc_ops)
+		if (is_bfifo)
 			limit *= psched_mtu(qdisc_dev(sch));
 
-		q->limit = limit;
+		sch->limit = limit;
 	} else {
 		struct tc_fifo_qopt *ctl = nla_data(opt);
 
 		if (nla_len(opt) < sizeof(*ctl))
 			return -EINVAL;
 
-		q->limit = ctl->limit;
+		sch->limit = ctl->limit;
 	}
 
+	if (is_bfifo)
+		bypass = sch->limit >= psched_mtu(qdisc_dev(sch));
+	else
+		bypass = sch->limit >= 1;
+
+	if (bypass)
+		sch->flags |= TCQ_F_CAN_BYPASS;
+	else
+		sch->flags &= ~TCQ_F_CAN_BYPASS;
 	return 0;
 }
 
 static int fifo_dump(struct Qdisc *sch, struct sk_buff *skb)
 {
-	struct fifo_sched_data *q = qdisc_priv(sch);
-	struct tc_fifo_qopt opt = { .limit = q->limit };
+	struct tc_fifo_qopt opt = { .limit = sch->limit };
 
 	NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
 	return skb->len;
@@ -96,7 +94,7 @@ nla_put_failure:
 
 struct Qdisc_ops pfifo_qdisc_ops __read_mostly = {
 	.id		=	"pfifo",
-	.priv_size	=	sizeof(struct fifo_sched_data),
+	.priv_size	=	0,
 	.enqueue	=	pfifo_enqueue,
 	.dequeue	=	qdisc_dequeue_head,
 	.peek		=	qdisc_peek_head,
@@ -111,7 +109,7 @@ EXPORT_SYMBOL(pfifo_qdisc_ops);
 
 struct Qdisc_ops bfifo_qdisc_ops __read_mostly = {
 	.id		=	"bfifo",
-	.priv_size	=	sizeof(struct fifo_sched_data),
+	.priv_size	=	0,
 	.enqueue	=	bfifo_enqueue,
 	.dequeue	=	qdisc_dequeue_head,
 	.peek		=	qdisc_peek_head,
@@ -126,7 +124,7 @@ EXPORT_SYMBOL(bfifo_qdisc_ops);
 
 struct Qdisc_ops pfifo_head_drop_qdisc_ops __read_mostly = {
 	.id		=	"pfifo_head_drop",
-	.priv_size	=	sizeof(struct fifo_sched_data),
+	.priv_size	=	0,
 	.enqueue	=	pfifo_tail_enqueue,
 	.dequeue	=	qdisc_dequeue_head,
 	.peek		=	qdisc_peek_head,
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 1bc6980..c84b659 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -87,8 +87,8 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb,
 		 */
 		kfree_skb(skb);
 		if (net_ratelimit())
-			printk(KERN_WARNING "Dead loop on netdevice %s, "
-			       "fix it urgently!\n", dev_queue->dev->name);
+			pr_warning("Dead loop on netdevice %s, fix it urgently!\n",
+				   dev_queue->dev->name);
 		ret = qdisc_qlen(q);
 	} else {
 		/*
@@ -137,8 +137,8 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
 	} else {
 		/* Driver returned NETDEV_TX_BUSY - requeue skb */
 		if (unlikely (ret != NETDEV_TX_BUSY && net_ratelimit()))
-			printk(KERN_WARNING "BUG %s code %d qlen %d\n",
-			       dev->name, ret, q->q.qlen);
+			pr_warning("BUG %s code %d qlen %d\n",
+				   dev->name, ret, q->q.qlen);
 
 		ret = dev_requeue_skb(skb, q);
 	}
@@ -412,8 +412,9 @@ static struct Qdisc noqueue_qdisc = {
 };
 
 
-static const u8 prio2band[TC_PRIO_MAX+1] =
-	{ 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1 };
+static const u8 prio2band[TC_PRIO_MAX + 1] = {
+	1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1
+};
 
 /* 3-band FIFO queue: old style, but should be a bit faster than
    generic prio+fifo combination.
@@ -445,7 +446,7 @@ static inline struct sk_buff_head *band2list(struct pfifo_fast_priv *priv,
 	return priv->q + band;
 }
 
-static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc)
+static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc)
 {
 	if (skb_queue_len(&qdisc->q) < qdisc_dev(qdisc)->tx_queue_len) {
 		int band = prio2band[skb->priority & TC_PRIO_MAX];
@@ -460,7 +461,7 @@ static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc)
 	return qdisc_drop(skb, qdisc);
 }
 
-static struct sk_buff *pfifo_fast_dequeue(struct Qdisc* qdisc)
+static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc)
 {
 	struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
 	int band = bitmap2band[priv->bitmap];
@@ -479,7 +480,7 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc* qdisc)
 	return NULL;
 }
 
-static struct sk_buff *pfifo_fast_peek(struct Qdisc* qdisc)
+static struct sk_buff *pfifo_fast_peek(struct Qdisc *qdisc)
 {
 	struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
 	int band = bitmap2band[priv->bitmap];
@@ -493,7 +494,7 @@ static struct sk_buff *pfifo_fast_peek(struct Qdisc* qdisc)
 	return NULL;
 }
 
-static void pfifo_fast_reset(struct Qdisc* qdisc)
+static void pfifo_fast_reset(struct Qdisc *qdisc)
 {
 	int prio;
 	struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
@@ -510,7 +511,7 @@ static int pfifo_fast_dump(struct Qdisc *qdisc, struct sk_buff *skb)
 {
 	struct tc_prio_qopt opt = { .bands = PFIFO_FAST_BANDS };
 
-	memcpy(&opt.priomap, prio2band, TC_PRIO_MAX+1);
+	memcpy(&opt.priomap, prio2band, TC_PRIO_MAX + 1);
 	NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
 	return skb->len;
 
@@ -526,6 +527,8 @@ static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt)
 	for (prio = 0; prio < PFIFO_FAST_BANDS; prio++)
 		skb_queue_head_init(band2list(priv, prio));
 
+	/* Can by-pass the queue discipline */
+	qdisc->flags |= TCQ_F_CAN_BYPASS;
 	return 0;
 }
 
@@ -540,27 +543,32 @@ struct Qdisc_ops pfifo_fast_ops __read_mostly = {
 	.dump		=	pfifo_fast_dump,
 	.owner		=	THIS_MODULE,
 };
+EXPORT_SYMBOL(pfifo_fast_ops);
 
 struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
 			  struct Qdisc_ops *ops)
 {
 	void *p;
 	struct Qdisc *sch;
-	unsigned int size;
+	unsigned int size = QDISC_ALIGN(sizeof(*sch)) + ops->priv_size;
 	int err = -ENOBUFS;
 
-	/* ensure that the Qdisc and the private data are 64-byte aligned */
-	size = QDISC_ALIGN(sizeof(*sch));
-	size += ops->priv_size + (QDISC_ALIGNTO - 1);
-
 	p = kzalloc_node(size, GFP_KERNEL,
 			 netdev_queue_numa_node_read(dev_queue));
 
 	if (!p)
 		goto errout;
 	sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p);
-	sch->padded = (char *) sch - (char *) p;
-
+	/* if we got non aligned memory, ask more and do alignment ourself */
+	if (sch != p) {
+		kfree(p);
+		p = kzalloc_node(size + QDISC_ALIGNTO - 1, GFP_KERNEL,
+				 netdev_queue_numa_node_read(dev_queue));
+		if (!p)
+			goto errout;
+		sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p);
+		sch->padded = (char *) sch - (char *) p;
+	}
 	INIT_LIST_HEAD(&sch->list);
 	skb_queue_head_init(&sch->q);
 	spin_lock_init(&sch->busylock);
@@ -630,7 +638,7 @@ void qdisc_destroy(struct Qdisc *qdisc)
 #ifdef CONFIG_NET_SCHED
 	qdisc_list_del(qdisc);
 
-	qdisc_put_stab(qdisc->stab);
+	qdisc_put_stab(rtnl_dereference(qdisc->stab));
 #endif
 	gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est);
 	if (ops->reset)
@@ -674,25 +682,21 @@ struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue,
 
 	return oqdisc;
 }
+EXPORT_SYMBOL(dev_graft_qdisc);
 
 static void attach_one_default_qdisc(struct net_device *dev,
 				     struct netdev_queue *dev_queue,
 				     void *_unused)
 {
-	struct Qdisc *qdisc;
+	struct Qdisc *qdisc = &noqueue_qdisc;
 
 	if (dev->tx_queue_len) {
 		qdisc = qdisc_create_dflt(dev_queue,
 					  &pfifo_fast_ops, TC_H_ROOT);
 		if (!qdisc) {
-			printk(KERN_INFO "%s: activation failed\n", dev->name);
+			netdev_info(dev, "activation failed\n");
 			return;
 		}
-
-		/* Can by-pass the queue discipline for default qdisc */
-		qdisc->flags |= TCQ_F_CAN_BYPASS;
-	} else {
-		qdisc =  &noqueue_qdisc;
 	}
 	dev_queue->qdisc_sleeping = qdisc;
 }
@@ -761,6 +765,7 @@ void dev_activate(struct net_device *dev)
 		dev_watchdog_up(dev);
 	}
 }
+EXPORT_SYMBOL(dev_activate);
 
 static void dev_deactivate_queue(struct net_device *dev,
 				 struct netdev_queue *dev_queue,
@@ -841,6 +846,7 @@ void dev_deactivate(struct net_device *dev)
 	dev_deactivate_many(&single);
 	list_del(&single);
 }
+EXPORT_SYMBOL(dev_deactivate);
 
 static void dev_init_scheduler_queue(struct net_device *dev,
 				     struct netdev_queue *dev_queue,
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index 51dcc2a..b9493a0 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -32,8 +32,7 @@
 struct gred_sched_data;
 struct gred_sched;
 
-struct gred_sched_data
-{
+struct gred_sched_data {
 	u32		limit;		/* HARD maximal queue length	*/
 	u32      	DP;		/* the drop pramaters */
 	u32		bytesin;	/* bytes seen on virtualQ so far*/
@@ -50,8 +49,7 @@ enum {
 	GRED_RIO_MODE,
 };
 
-struct gred_sched
-{
+struct gred_sched {
 	struct gred_sched_data *tab[MAX_DPs];
 	unsigned long	flags;
 	u32		red_flags;
@@ -150,17 +148,18 @@ static inline int gred_use_harddrop(struct gred_sched *t)
 	return t->red_flags & TC_RED_HARDDROP;
 }
 
-static int gred_enqueue(struct sk_buff *skb, struct Qdisc* sch)
+static int gred_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 {
-	struct gred_sched_data *q=NULL;
-	struct gred_sched *t= qdisc_priv(sch);
+	struct gred_sched_data *q = NULL;
+	struct gred_sched *t = qdisc_priv(sch);
 	unsigned long qavg = 0;
 	u16 dp = tc_index_to_dp(skb);
 
-	if (dp >= t->DPs  || (q = t->tab[dp]) == NULL) {
+	if (dp >= t->DPs || (q = t->tab[dp]) == NULL) {
 		dp = t->def;
 
-		if ((q = t->tab[dp]) == NULL) {
+		q = t->tab[dp];
+		if (!q) {
 			/* Pass through packets not assigned to a DP
 			 * if no default DP has been configured. This
 			 * allows for DP flows to be left untouched.
@@ -183,7 +182,7 @@ static int gred_enqueue(struct sk_buff *skb, struct Qdisc* sch)
 		for (i = 0; i < t->DPs; i++) {
 			if (t->tab[i] && t->tab[i]->prio < q->prio &&
 			    !red_is_idling(&t->tab[i]->parms))
-				qavg +=t->tab[i]->parms.qavg;
+				qavg += t->tab[i]->parms.qavg;
 		}
 
 	}
@@ -203,28 +202,28 @@ static int gred_enqueue(struct sk_buff *skb, struct Qdisc* sch)
 		gred_store_wred_set(t, q);
 
 	switch (red_action(&q->parms, q->parms.qavg + qavg)) {
-		case RED_DONT_MARK:
-			break;
-
-		case RED_PROB_MARK:
-			sch->qstats.overlimits++;
-			if (!gred_use_ecn(t) || !INET_ECN_set_ce(skb)) {
-				q->stats.prob_drop++;
-				goto congestion_drop;
-			}
-
-			q->stats.prob_mark++;
-			break;
-
-		case RED_HARD_MARK:
-			sch->qstats.overlimits++;
-			if (gred_use_harddrop(t) || !gred_use_ecn(t) ||
-			    !INET_ECN_set_ce(skb)) {
-				q->stats.forced_drop++;
-				goto congestion_drop;
-			}
-			q->stats.forced_mark++;
-			break;
+	case RED_DONT_MARK:
+		break;
+
+	case RED_PROB_MARK:
+		sch->qstats.overlimits++;
+		if (!gred_use_ecn(t) || !INET_ECN_set_ce(skb)) {
+			q->stats.prob_drop++;
+			goto congestion_drop;
+		}
+
+		q->stats.prob_mark++;
+		break;
+
+	case RED_HARD_MARK:
+		sch->qstats.overlimits++;
+		if (gred_use_harddrop(t) || !gred_use_ecn(t) ||
+		    !INET_ECN_set_ce(skb)) {
+			q->stats.forced_drop++;
+			goto congestion_drop;
+		}
+		q->stats.forced_mark++;
+		break;
 	}
 
 	if (q->backlog + qdisc_pkt_len(skb) <= q->limit) {
@@ -241,7 +240,7 @@ congestion_drop:
 	return NET_XMIT_CN;
 }
 
-static struct sk_buff *gred_dequeue(struct Qdisc* sch)
+static struct sk_buff *gred_dequeue(struct Qdisc *sch)
 {
 	struct sk_buff *skb;
 	struct gred_sched *t = qdisc_priv(sch);
@@ -254,9 +253,9 @@ static struct sk_buff *gred_dequeue(struct Qdisc* sch)
 
 		if (dp >= t->DPs || (q = t->tab[dp]) == NULL) {
 			if (net_ratelimit())
-				printk(KERN_WARNING "GRED: Unable to relocate "
-				       "VQ 0x%x after dequeue, screwing up "
-				       "backlog.\n", tc_index_to_dp(skb));
+				pr_warning("GRED: Unable to relocate VQ 0x%x "
+					   "after dequeue, screwing up "
+					   "backlog.\n", tc_index_to_dp(skb));
 		} else {
 			q->backlog -= qdisc_pkt_len(skb);
 
@@ -273,7 +272,7 @@ static struct sk_buff *gred_dequeue(struct Qdisc* sch)
 	return NULL;
 }
 
-static unsigned int gred_drop(struct Qdisc* sch)
+static unsigned int gred_drop(struct Qdisc *sch)
 {
 	struct sk_buff *skb;
 	struct gred_sched *t = qdisc_priv(sch);
@@ -286,9 +285,9 @@ static unsigned int gred_drop(struct Qdisc* sch)
 
 		if (dp >= t->DPs || (q = t->tab[dp]) == NULL) {
 			if (net_ratelimit())
-				printk(KERN_WARNING "GRED: Unable to relocate "
-				       "VQ 0x%x while dropping, screwing up "
-				       "backlog.\n", tc_index_to_dp(skb));
+				pr_warning("GRED: Unable to relocate VQ 0x%x "
+					   "while dropping, screwing up "
+					   "backlog.\n", tc_index_to_dp(skb));
 		} else {
 			q->backlog -= len;
 			q->stats.other++;
@@ -308,7 +307,7 @@ static unsigned int gred_drop(struct Qdisc* sch)
 
 }
 
-static void gred_reset(struct Qdisc* sch)
+static void gred_reset(struct Qdisc *sch)
 {
 	int i;
 	struct gred_sched *t = qdisc_priv(sch);
@@ -369,8 +368,8 @@ static inline int gred_change_table_def(struct Qdisc *sch, struct nlattr *dps)
 
 	for (i = table->DPs; i < MAX_DPs; i++) {
 		if (table->tab[i]) {
-			printk(KERN_WARNING "GRED: Warning: Destroying "
-			       "shadowed VQ 0x%x\n", i);
+			pr_warning("GRED: Warning: Destroying "
+				   "shadowed VQ 0x%x\n", i);
 			gred_destroy_vq(table->tab[i]);
 			table->tab[i] = NULL;
 		}
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 14a799de..6488e64 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -81,8 +81,7 @@
  *   that are expensive on 32-bit architectures.
  */
 
-struct internal_sc
-{
+struct internal_sc {
 	u64	sm1;	/* scaled slope of the 1st segment */
 	u64	ism1;	/* scaled inverse-slope of the 1st segment */
 	u64	dx;	/* the x-projection of the 1st segment */
@@ -92,8 +91,7 @@ struct internal_sc
 };
 
 /* runtime service curve */
-struct runtime_sc
-{
+struct runtime_sc {
 	u64	x;	/* current starting position on x-axis */
 	u64	y;	/* current starting position on y-axis */
 	u64	sm1;	/* scaled slope of the 1st segment */
@@ -104,15 +102,13 @@ struct runtime_sc
 	u64	ism2;	/* scaled inverse-slope of the 2nd segment */
 };
 
-enum hfsc_class_flags
-{
+enum hfsc_class_flags {
 	HFSC_RSC = 0x1,
 	HFSC_FSC = 0x2,
 	HFSC_USC = 0x4
 };
 
-struct hfsc_class
-{
+struct hfsc_class {
 	struct Qdisc_class_common cl_common;
 	unsigned int	refcnt;		/* usage count */
 
@@ -140,8 +136,8 @@ struct hfsc_class
 	u64	cl_cumul;		/* cumulative work in bytes done by
 					   real-time criteria */
 
-	u64 	cl_d;			/* deadline*/
-	u64 	cl_e;			/* eligible time */
+	u64	cl_d;			/* deadline*/
+	u64	cl_e;			/* eligible time */
 	u64	cl_vt;			/* virtual time */
 	u64	cl_f;			/* time when this class will fit for
 					   link-sharing, max(myf, cfmin) */
@@ -176,8 +172,7 @@ struct hfsc_class
 	unsigned long	cl_nactive;	/* number of active children */
 };
 
-struct hfsc_sched
-{
+struct hfsc_sched {
 	u16	defcls;				/* default class id */
 	struct hfsc_class root;			/* root class */
 	struct Qdisc_class_hash clhash;		/* class hash */
@@ -693,7 +688,7 @@ init_vf(struct hfsc_class *cl, unsigned int len)
 		if (go_active) {
 			n = rb_last(&cl->cl_parent->vt_tree);
 			if (n != NULL) {
-				max_cl = rb_entry(n, struct hfsc_class,vt_node);
+				max_cl = rb_entry(n, struct hfsc_class, vt_node);
 				/*
 				 * set vt to the average of the min and max
 				 * classes.  if the parent's period didn't
@@ -1177,8 +1172,10 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 			return NULL;
 		}
 #endif
-		if ((cl = (struct hfsc_class *)res.class) == NULL) {
-			if ((cl = hfsc_find_class(res.classid, sch)) == NULL)
+		cl = (struct hfsc_class *)res.class;
+		if (!cl) {
+			cl = hfsc_find_class(res.classid, sch);
+			if (!cl)
 				break; /* filter selected invalid classid */
 			if (cl->level >= head->level)
 				break; /* filter may only point downwards */
@@ -1316,7 +1313,7 @@ hfsc_dump_sc(struct sk_buff *skb, int attr, struct internal_sc *sc)
 	return -1;
 }
 
-static inline int
+static int
 hfsc_dump_curves(struct sk_buff *skb, struct hfsc_class *cl)
 {
 	if ((cl->cl_flags & HFSC_RSC) &&
@@ -1420,7 +1417,8 @@ hfsc_schedule_watchdog(struct Qdisc *sch)
 	struct hfsc_class *cl;
 	u64 next_time = 0;
 
-	if ((cl = eltree_get_minel(q)) != NULL)
+	cl = eltree_get_minel(q);
+	if (cl)
 		next_time = cl->cl_e;
 	if (q->root.cl_cfmin != 0) {
 		if (next_time == 0 || next_time > q->root.cl_cfmin)
@@ -1625,7 +1623,8 @@ hfsc_dequeue(struct Qdisc *sch)
 	 * find the class with the minimum deadline among
 	 * the eligible classes.
 	 */
-	if ((cl = eltree_get_mindl(q, cur_time)) != NULL) {
+	cl = eltree_get_mindl(q, cur_time);
+	if (cl) {
 		realtime = 1;
 	} else {
 		/*
@@ -1664,7 +1663,7 @@ hfsc_dequeue(struct Qdisc *sch)
 		set_passive(cl);
 	}
 
-	sch->flags &= ~TCQ_F_THROTTLED;
+	qdisc_unthrottled(sch);
 	qdisc_bstats_update(sch, skb);
 	sch->q.qlen--;
 
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index fc12fe6..e1429a8 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -99,9 +99,10 @@ struct htb_class {
 			struct rb_root feed[TC_HTB_NUMPRIO];	/* feed trees */
 			struct rb_node *ptr[TC_HTB_NUMPRIO];	/* current class ptr */
 			/* When class changes from state 1->2 and disconnects from
-			   parent's feed then we lost ptr value and start from the
-			   first child again. Here we store classid of the
-			   last valid ptr (used when ptr is NULL). */
+			 * parent's feed then we lost ptr value and start from the
+			 * first child again. Here we store classid of the
+			 * last valid ptr (used when ptr is NULL).
+			 */
 			u32 last_ptr_id[TC_HTB_NUMPRIO];
 		} inner;
 	} un;
@@ -185,7 +186,7 @@ static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
  * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessfull
  * then finish and return direct queue.
  */
-#define HTB_DIRECT (struct htb_class*)-1
+#define HTB_DIRECT ((struct htb_class *)-1L)
 
 static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
 				      int *qerr)
@@ -197,11 +198,13 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
 	int result;
 
 	/* allow to select class by setting skb->priority to valid classid;
-	   note that nfmark can be used too by attaching filter fw with no
-	   rules in it */
+	 * note that nfmark can be used too by attaching filter fw with no
+	 * rules in it
+	 */
 	if (skb->priority == sch->handle)
 		return HTB_DIRECT;	/* X:0 (direct flow) selected */
-	if ((cl = htb_find(skb->priority, sch)) != NULL && cl->level == 0)
+	cl = htb_find(skb->priority, sch);
+	if (cl && cl->level == 0)
 		return cl;
 
 	*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
@@ -216,10 +219,12 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
 			return NULL;
 		}
 #endif
-		if ((cl = (void *)res.class) == NULL) {
+		cl = (void *)res.class;
+		if (!cl) {
 			if (res.classid == sch->handle)
 				return HTB_DIRECT;	/* X:0 (direct flow) */
-			if ((cl = htb_find(res.classid, sch)) == NULL)
+			cl = htb_find(res.classid, sch);
+			if (!cl)
 				break;	/* filter selected invalid classid */
 		}
 		if (!cl->level)
@@ -378,7 +383,8 @@ static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
 
 			if (p->un.inner.feed[prio].rb_node)
 				/* parent already has its feed in use so that
-				   reset bit in mask as parent is already ok */
+				 * reset bit in mask as parent is already ok
+				 */
 				mask &= ~(1 << prio);
 
 			htb_add_to_id_tree(p->un.inner.feed + prio, cl, prio);
@@ -413,8 +419,9 @@ static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
 
 			if (p->un.inner.ptr[prio] == cl->node + prio) {
 				/* we are removing child which is pointed to from
-				   parent feed - forget the pointer but remember
-				   classid */
+				 * parent feed - forget the pointer but remember
+				 * classid
+				 */
 				p->un.inner.last_ptr_id[prio] = cl->common.classid;
 				p->un.inner.ptr[prio] = NULL;
 			}
@@ -663,8 +670,9 @@ static psched_time_t htb_do_events(struct htb_sched *q, int level,
 				   unsigned long start)
 {
 	/* don't run for longer than 2 jiffies; 2 is used instead of
-	   1 to simplify things when jiffy is going to be incremented
-	   too soon */
+	 * 1 to simplify things when jiffy is going to be incremented
+	 * too soon
+	 */
 	unsigned long stop_at = start + 2;
 	while (time_before(jiffies, stop_at)) {
 		struct htb_class *cl;
@@ -687,7 +695,7 @@ static psched_time_t htb_do_events(struct htb_sched *q, int level,
 
 	/* too much load - let's continue after a break for scheduling */
 	if (!(q->warned & HTB_WARN_TOOMANYEVENTS)) {
-		printk(KERN_WARNING "htb: too many events!\n");
+		pr_warning("htb: too many events!\n");
 		q->warned |= HTB_WARN_TOOMANYEVENTS;
 	}
 
@@ -695,7 +703,8 @@ static psched_time_t htb_do_events(struct htb_sched *q, int level,
 }
 
 /* Returns class->node+prio from id-tree where classe's id is >= id. NULL
-   is no such one exists. */
+ * is no such one exists.
+ */
 static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n,
 					      u32 id)
 {
@@ -739,12 +748,14 @@ static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio,
 	for (i = 0; i < 65535; i++) {
 		if (!*sp->pptr && *sp->pid) {
 			/* ptr was invalidated but id is valid - try to recover
-			   the original or next ptr */
+			 * the original or next ptr
+			 */
 			*sp->pptr =
 			    htb_id_find_next_upper(prio, sp->root, *sp->pid);
 		}
 		*sp->pid = 0;	/* ptr is valid now so that remove this hint as it
-				   can become out of date quickly */
+				 * can become out of date quickly
+				 */
 		if (!*sp->pptr) {	/* we are at right end; rewind & go up */
 			*sp->pptr = sp->root;
 			while ((*sp->pptr)->rb_left)
@@ -772,7 +783,8 @@ static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio,
 }
 
 /* dequeues packet at given priority and level; call only if
-   you are sure that there is active class at prio/level */
+ * you are sure that there is active class at prio/level
+ */
 static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, int prio,
 					int level)
 {
@@ -789,9 +801,10 @@ next:
 			return NULL;
 
 		/* class can be empty - it is unlikely but can be true if leaf
-		   qdisc drops packets in enqueue routine or if someone used
-		   graft operation on the leaf since last dequeue;
-		   simply deactivate and skip such class */
+		 * qdisc drops packets in enqueue routine or if someone used
+		 * graft operation on the leaf since last dequeue;
+		 * simply deactivate and skip such class
+		 */
 		if (unlikely(cl->un.leaf.q->q.qlen == 0)) {
 			struct htb_class *next;
 			htb_deactivate(q, cl);
@@ -831,7 +844,8 @@ next:
 					  ptr[0]) + prio);
 		}
 		/* this used to be after charge_class but this constelation
-		   gives us slightly better performance */
+		 * gives us slightly better performance
+		 */
 		if (!cl->un.leaf.q->q.qlen)
 			htb_deactivate(q, cl);
 		htb_charge_class(q, cl, level, skb);
@@ -852,7 +866,7 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch)
 	if (skb != NULL) {
 ok:
 		qdisc_bstats_update(sch, skb);
-		sch->flags &= ~TCQ_F_THROTTLED;
+		qdisc_unthrottled(sch);
 		sch->q.qlen--;
 		return skb;
 	}
@@ -883,6 +897,7 @@ ok:
 		m = ~q->row_mask[level];
 		while (m != (int)(-1)) {
 			int prio = ffz(m);
+
 			m |= 1 << prio;
 			skb = htb_dequeue_tree(q, prio, level);
 			if (likely(skb != NULL))
@@ -987,13 +1002,12 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt)
 		return err;
 
 	if (tb[TCA_HTB_INIT] == NULL) {
-		printk(KERN_ERR "HTB: hey probably you have bad tc tool ?\n");
+		pr_err("HTB: hey probably you have bad tc tool ?\n");
 		return -EINVAL;
 	}
 	gopt = nla_data(tb[TCA_HTB_INIT]);
 	if (gopt->version != HTB_VER >> 16) {
-		printk(KERN_ERR
-		       "HTB: need tc/htb version %d (minor is %d), you have %d\n",
+		pr_err("HTB: need tc/htb version %d (minor is %d), you have %d\n",
 		       HTB_VER >> 16, HTB_VER & 0xffff, gopt->version);
 		return -EINVAL;
 	}
@@ -1206,9 +1220,10 @@ static void htb_destroy(struct Qdisc *sch)
 	cancel_work_sync(&q->work);
 	qdisc_watchdog_cancel(&q->watchdog);
 	/* This line used to be after htb_destroy_class call below
-	   and surprisingly it worked in 2.4. But it must precede it
-	   because filter need its target class alive to be able to call
-	   unbind_filter on it (without Oops). */
+	 * and surprisingly it worked in 2.4. But it must precede it
+	 * because filter need its target class alive to be able to call
+	 * unbind_filter on it (without Oops).
+	 */
 	tcf_destroy_chain(&q->filter_list);
 
 	for (i = 0; i < q->clhash.hashsize; i++) {
@@ -1342,11 +1357,12 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 
 		/* check maximal depth */
 		if (parent && parent->parent && parent->parent->level < 2) {
-			printk(KERN_ERR "htb: tree is too deep\n");
+			pr_err("htb: tree is too deep\n");
 			goto failure;
 		}
 		err = -ENOBUFS;
-		if ((cl = kzalloc(sizeof(*cl), GFP_KERNEL)) == NULL)
+		cl = kzalloc(sizeof(*cl), GFP_KERNEL);
+		if (!cl)
 			goto failure;
 
 		err = gen_new_estimator(&cl->bstats, &cl->rate_est,
@@ -1366,8 +1382,9 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 			RB_CLEAR_NODE(&cl->node[prio]);
 
 		/* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
-		   so that can't be used inside of sch_tree_lock
-		   -- thanks to Karlis Peisenieks */
+		 * so that can't be used inside of sch_tree_lock
+		 * -- thanks to Karlis Peisenieks
+		 */
 		new_q = qdisc_create_dflt(sch->dev_queue,
 					  &pfifo_qdisc_ops, classid);
 		sch_tree_lock(sch);
@@ -1419,17 +1436,18 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 	}
 
 	/* it used to be a nasty bug here, we have to check that node
-	   is really leaf before changing cl->un.leaf ! */
+	 * is really leaf before changing cl->un.leaf !
+	 */
 	if (!cl->level) {
 		cl->quantum = rtab->rate.rate / q->rate2quantum;
 		if (!hopt->quantum && cl->quantum < 1000) {
-			printk(KERN_WARNING
+			pr_warning(
 			       "HTB: quantum of class %X is small. Consider r2q change.\n",
 			       cl->common.classid);
 			cl->quantum = 1000;
 		}
 		if (!hopt->quantum && cl->quantum > 200000) {
-			printk(KERN_WARNING
+			pr_warning(
 			       "HTB: quantum of class %X is big. Consider r2q change.\n",
 			       cl->common.classid);
 			cl->quantum = 200000;
@@ -1478,13 +1496,13 @@ static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
 	struct htb_class *cl = htb_find(classid, sch);
 
 	/*if (cl && !cl->level) return 0;
-	   The line above used to be there to prevent attaching filters to
-	   leaves. But at least tc_index filter uses this just to get class
-	   for other reasons so that we have to allow for it.
-	   ----
-	   19.6.2002 As Werner explained it is ok - bind filter is just
-	   another way to "lock" the class - unlike "get" this lock can
-	   be broken by class during destroy IIUC.
+	 * The line above used to be there to prevent attaching filters to
+	 * leaves. But at least tc_index filter uses this just to get class
+	 * for other reasons so that we have to allow for it.
+	 * ----
+	 * 19.6.2002 As Werner explained it is ok - bind filter is just
+	 * another way to "lock" the class - unlike "get" this lock can
+	 * be broken by class during destroy IIUC.
 	 */
 	if (cl)
 		cl->filter_cnt++;
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index ecc302f..ec5cbc8 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -61,7 +61,6 @@ static int mq_init(struct Qdisc *sch, struct nlattr *opt)
 						    TC_H_MIN(ntx + 1)));
 		if (qdisc == NULL)
 			goto err;
-		qdisc->flags |= TCQ_F_CAN_BYPASS;
 		priv->qdiscs[ntx] = qdisc;
 	}
 
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
new file mode 100644
index 0000000..ea17cbe
--- /dev/null
+++ b/net/sched/sch_mqprio.c
@@ -0,0 +1,418 @@
+/*
+ * net/sched/sch_mqprio.c
+ *
+ * Copyright (c) 2010 John Fastabend <john.r.fastabend@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
+#include <net/sch_generic.h>
+
+struct mqprio_sched {
+	struct Qdisc		**qdiscs;
+	int hw_owned;
+};
+
+static void mqprio_destroy(struct Qdisc *sch)
+{
+	struct net_device *dev = qdisc_dev(sch);
+	struct mqprio_sched *priv = qdisc_priv(sch);
+	unsigned int ntx;
+
+	if (priv->qdiscs) {
+		for (ntx = 0;
+		     ntx < dev->num_tx_queues && priv->qdiscs[ntx];
+		     ntx++)
+			qdisc_destroy(priv->qdiscs[ntx]);
+		kfree(priv->qdiscs);
+	}
+
+	if (priv->hw_owned && dev->netdev_ops->ndo_setup_tc)
+		dev->netdev_ops->ndo_setup_tc(dev, 0);
+	else
+		netdev_set_num_tc(dev, 0);
+}
+
+static int mqprio_parse_opt(struct net_device *dev, struct tc_mqprio_qopt *qopt)
+{
+	int i, j;
+
+	/* Verify num_tc is not out of max range */
+	if (qopt->num_tc > TC_MAX_QUEUE)
+		return -EINVAL;
+
+	/* Verify priority mapping uses valid tcs */
+	for (i = 0; i < TC_BITMASK + 1; i++) {
+		if (qopt->prio_tc_map[i] >= qopt->num_tc)
+			return -EINVAL;
+	}
+
+	/* net_device does not support requested operation */
+	if (qopt->hw && !dev->netdev_ops->ndo_setup_tc)
+		return -EINVAL;
+
+	/* if hw owned qcount and qoffset are taken from LLD so
+	 * no reason to verify them here
+	 */
+	if (qopt->hw)
+		return 0;
+
+	for (i = 0; i < qopt->num_tc; i++) {
+		unsigned int last = qopt->offset[i] + qopt->count[i];
+
+		/* Verify the queue count is in tx range being equal to the
+		 * real_num_tx_queues indicates the last queue is in use.
+		 */
+		if (qopt->offset[i] >= dev->real_num_tx_queues ||
+		    !qopt->count[i] ||
+		    last > dev->real_num_tx_queues)
+			return -EINVAL;
+
+		/* Verify that the offset and counts do not overlap */
+		for (j = i + 1; j < qopt->num_tc; j++) {
+			if (last > qopt->offset[j])
+				return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
+{
+	struct net_device *dev = qdisc_dev(sch);
+	struct mqprio_sched *priv = qdisc_priv(sch);
+	struct netdev_queue *dev_queue;
+	struct Qdisc *qdisc;
+	int i, err = -EOPNOTSUPP;
+	struct tc_mqprio_qopt *qopt = NULL;
+
+	BUILD_BUG_ON(TC_MAX_QUEUE != TC_QOPT_MAX_QUEUE);
+	BUILD_BUG_ON(TC_BITMASK != TC_QOPT_BITMASK);
+
+	if (sch->parent != TC_H_ROOT)
+		return -EOPNOTSUPP;
+
+	if (!netif_is_multiqueue(dev))
+		return -EOPNOTSUPP;
+
+	if (nla_len(opt) < sizeof(*qopt))
+		return -EINVAL;
+
+	qopt = nla_data(opt);
+	if (mqprio_parse_opt(dev, qopt))
+		return -EINVAL;
+
+	/* pre-allocate qdisc, attachment can't fail */
+	priv->qdiscs = kcalloc(dev->num_tx_queues, sizeof(priv->qdiscs[0]),
+			       GFP_KERNEL);
+	if (priv->qdiscs == NULL) {
+		err = -ENOMEM;
+		goto err;
+	}
+
+	for (i = 0; i < dev->num_tx_queues; i++) {
+		dev_queue = netdev_get_tx_queue(dev, i);
+		qdisc = qdisc_create_dflt(dev_queue, &pfifo_fast_ops,
+					  TC_H_MAKE(TC_H_MAJ(sch->handle),
+						    TC_H_MIN(i + 1)));
+		if (qdisc == NULL) {
+			err = -ENOMEM;
+			goto err;
+		}
+		priv->qdiscs[i] = qdisc;
+	}
+
+	/* If the mqprio options indicate that hardware should own
+	 * the queue mapping then run ndo_setup_tc otherwise use the
+	 * supplied and verified mapping
+	 */
+	if (qopt->hw) {
+		priv->hw_owned = 1;
+		err = dev->netdev_ops->ndo_setup_tc(dev, qopt->num_tc);
+		if (err)
+			goto err;
+	} else {
+		netdev_set_num_tc(dev, qopt->num_tc);
+		for (i = 0; i < qopt->num_tc; i++)
+			netdev_set_tc_queue(dev, i,
+					    qopt->count[i], qopt->offset[i]);
+	}
+
+	/* Always use supplied priority mappings */
+	for (i = 0; i < TC_BITMASK + 1; i++)
+		netdev_set_prio_tc_map(dev, i, qopt->prio_tc_map[i]);
+
+	sch->flags |= TCQ_F_MQROOT;
+	return 0;
+
+err:
+	mqprio_destroy(sch);
+	return err;
+}
+
+static void mqprio_attach(struct Qdisc *sch)
+{
+	struct net_device *dev = qdisc_dev(sch);
+	struct mqprio_sched *priv = qdisc_priv(sch);
+	struct Qdisc *qdisc;
+	unsigned int ntx;
+
+	/* Attach underlying qdisc */
+	for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
+		qdisc = priv->qdiscs[ntx];
+		qdisc = dev_graft_qdisc(qdisc->dev_queue, qdisc);
+		if (qdisc)
+			qdisc_destroy(qdisc);
+	}
+	kfree(priv->qdiscs);
+	priv->qdiscs = NULL;
+}
+
+static struct netdev_queue *mqprio_queue_get(struct Qdisc *sch,
+					     unsigned long cl)
+{
+	struct net_device *dev = qdisc_dev(sch);
+	unsigned long ntx = cl - 1 - netdev_get_num_tc(dev);
+
+	if (ntx >= dev->num_tx_queues)
+		return NULL;
+	return netdev_get_tx_queue(dev, ntx);
+}
+
+static int mqprio_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new,
+		    struct Qdisc **old)
+{
+	struct net_device *dev = qdisc_dev(sch);
+	struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl);
+
+	if (!dev_queue)
+		return -EINVAL;
+
+	if (dev->flags & IFF_UP)
+		dev_deactivate(dev);
+
+	*old = dev_graft_qdisc(dev_queue, new);
+
+	if (dev->flags & IFF_UP)
+		dev_activate(dev);
+
+	return 0;
+}
+
+static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+	struct net_device *dev = qdisc_dev(sch);
+	struct mqprio_sched *priv = qdisc_priv(sch);
+	unsigned char *b = skb_tail_pointer(skb);
+	struct tc_mqprio_qopt opt = { 0 };
+	struct Qdisc *qdisc;
+	unsigned int i;
+
+	sch->q.qlen = 0;
+	memset(&sch->bstats, 0, sizeof(sch->bstats));
+	memset(&sch->qstats, 0, sizeof(sch->qstats));
+
+	for (i = 0; i < dev->num_tx_queues; i++) {
+		qdisc = netdev_get_tx_queue(dev, i)->qdisc;
+		spin_lock_bh(qdisc_lock(qdisc));
+		sch->q.qlen		+= qdisc->q.qlen;
+		sch->bstats.bytes	+= qdisc->bstats.bytes;
+		sch->bstats.packets	+= qdisc->bstats.packets;
+		sch->qstats.qlen	+= qdisc->qstats.qlen;
+		sch->qstats.backlog	+= qdisc->qstats.backlog;
+		sch->qstats.drops	+= qdisc->qstats.drops;
+		sch->qstats.requeues	+= qdisc->qstats.requeues;
+		sch->qstats.overlimits	+= qdisc->qstats.overlimits;
+		spin_unlock_bh(qdisc_lock(qdisc));
+	}
+
+	opt.num_tc = netdev_get_num_tc(dev);
+	memcpy(opt.prio_tc_map, dev->prio_tc_map, sizeof(opt.prio_tc_map));
+	opt.hw = priv->hw_owned;
+
+	for (i = 0; i < netdev_get_num_tc(dev); i++) {
+		opt.count[i] = dev->tc_to_txq[i].count;
+		opt.offset[i] = dev->tc_to_txq[i].offset;
+	}
+
+	NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
+
+	return skb->len;
+nla_put_failure:
+	nlmsg_trim(skb, b);
+	return -1;
+}
+
+static struct Qdisc *mqprio_leaf(struct Qdisc *sch, unsigned long cl)
+{
+	struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl);
+
+	if (!dev_queue)
+		return NULL;
+
+	return dev_queue->qdisc_sleeping;
+}
+
+static unsigned long mqprio_get(struct Qdisc *sch, u32 classid)
+{
+	struct net_device *dev = qdisc_dev(sch);
+	unsigned int ntx = TC_H_MIN(classid);
+
+	if (ntx > dev->num_tx_queues + netdev_get_num_tc(dev))
+		return 0;
+	return ntx;
+}
+
+static void mqprio_put(struct Qdisc *sch, unsigned long cl)
+{
+}
+
+static int mqprio_dump_class(struct Qdisc *sch, unsigned long cl,
+			 struct sk_buff *skb, struct tcmsg *tcm)
+{
+	struct net_device *dev = qdisc_dev(sch);
+
+	if (cl <= netdev_get_num_tc(dev)) {
+		tcm->tcm_parent = TC_H_ROOT;
+		tcm->tcm_info = 0;
+	} else {
+		int i;
+		struct netdev_queue *dev_queue;
+
+		dev_queue = mqprio_queue_get(sch, cl);
+		tcm->tcm_parent = 0;
+		for (i = 0; i < netdev_get_num_tc(dev); i++) {
+			struct netdev_tc_txq tc = dev->tc_to_txq[i];
+			int q_idx = cl - netdev_get_num_tc(dev);
+
+			if (q_idx > tc.offset &&
+			    q_idx <= tc.offset + tc.count) {
+				tcm->tcm_parent =
+					TC_H_MAKE(TC_H_MAJ(sch->handle),
+						  TC_H_MIN(i + 1));
+				break;
+			}
+		}
+		tcm->tcm_info = dev_queue->qdisc_sleeping->handle;
+	}
+	tcm->tcm_handle |= TC_H_MIN(cl);
+	return 0;
+}
+
+static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
+				   struct gnet_dump *d)
+	__releases(d->lock)
+	__acquires(d->lock)
+{
+	struct net_device *dev = qdisc_dev(sch);
+
+	if (cl <= netdev_get_num_tc(dev)) {
+		int i;
+		struct Qdisc *qdisc;
+		struct gnet_stats_queue qstats = {0};
+		struct gnet_stats_basic_packed bstats = {0};
+		struct netdev_tc_txq tc = dev->tc_to_txq[cl - 1];
+
+		/* Drop lock here it will be reclaimed before touching
+		 * statistics this is required because the d->lock we
+		 * hold here is the look on dev_queue->qdisc_sleeping
+		 * also acquired below.
+		 */
+		spin_unlock_bh(d->lock);
+
+		for (i = tc.offset; i < tc.offset + tc.count; i++) {
+			qdisc = netdev_get_tx_queue(dev, i)->qdisc;
+			spin_lock_bh(qdisc_lock(qdisc));
+			bstats.bytes      += qdisc->bstats.bytes;
+			bstats.packets    += qdisc->bstats.packets;
+			qstats.qlen       += qdisc->qstats.qlen;
+			qstats.backlog    += qdisc->qstats.backlog;
+			qstats.drops      += qdisc->qstats.drops;
+			qstats.requeues   += qdisc->qstats.requeues;
+			qstats.overlimits += qdisc->qstats.overlimits;
+			spin_unlock_bh(qdisc_lock(qdisc));
+		}
+		/* Reclaim root sleeping lock before completing stats */
+		spin_lock_bh(d->lock);
+		if (gnet_stats_copy_basic(d, &bstats) < 0 ||
+		    gnet_stats_copy_queue(d, &qstats) < 0)
+			return -1;
+	} else {
+		struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl);
+
+		sch = dev_queue->qdisc_sleeping;
+		sch->qstats.qlen = sch->q.qlen;
+		if (gnet_stats_copy_basic(d, &sch->bstats) < 0 ||
+		    gnet_stats_copy_queue(d, &sch->qstats) < 0)
+			return -1;
+	}
+	return 0;
+}
+
+static void mqprio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
+{
+	struct net_device *dev = qdisc_dev(sch);
+	unsigned long ntx;
+
+	if (arg->stop)
+		return;
+
+	/* Walk hierarchy with a virtual class per tc */
+	arg->count = arg->skip;
+	for (ntx = arg->skip;
+	     ntx < dev->num_tx_queues + netdev_get_num_tc(dev);
+	     ntx++) {
+		if (arg->fn(sch, ntx + 1, arg) < 0) {
+			arg->stop = 1;
+			break;
+		}
+		arg->count++;
+	}
+}
+
+static const struct Qdisc_class_ops mqprio_class_ops = {
+	.graft		= mqprio_graft,
+	.leaf		= mqprio_leaf,
+	.get		= mqprio_get,
+	.put		= mqprio_put,
+	.walk		= mqprio_walk,
+	.dump		= mqprio_dump_class,
+	.dump_stats	= mqprio_dump_class_stats,
+};
+
+static struct Qdisc_ops mqprio_qdisc_ops __read_mostly = {
+	.cl_ops		= &mqprio_class_ops,
+	.id		= "mqprio",
+	.priv_size	= sizeof(struct mqprio_sched),
+	.init		= mqprio_init,
+	.destroy	= mqprio_destroy,
+	.attach		= mqprio_attach,
+	.dump		= mqprio_dump,
+	.owner		= THIS_MODULE,
+};
+
+static int __init mqprio_module_init(void)
+{
+	return register_qdisc(&mqprio_qdisc_ops);
+}
+
+static void __exit mqprio_module_exit(void)
+{
+	unregister_qdisc(&mqprio_qdisc_ops);
+}
+
+module_init(mqprio_module_init);
+module_exit(mqprio_module_exit);
+
+MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index 436a2e7..edc1950 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -156,7 +156,7 @@ static unsigned int multiq_drop(struct Qdisc *sch)
 	unsigned int len;
 	struct Qdisc *qdisc;
 
-	for (band = q->bands-1; band >= 0; band--) {
+	for (band = q->bands - 1; band >= 0; band--) {
 		qdisc = q->queues[band];
 		if (qdisc->ops->drop) {
 			len = qdisc->ops->drop(qdisc);
@@ -265,7 +265,7 @@ static int multiq_init(struct Qdisc *sch, struct nlattr *opt)
 	for (i = 0; i < q->max_bands; i++)
 		q->queues[i] = &noop_qdisc;
 
-	err = multiq_tune(sch,opt);
+	err = multiq_tune(sch, opt);
 
 	if (err)
 		kfree(q->queues);
@@ -346,7 +346,7 @@ static int multiq_dump_class(struct Qdisc *sch, unsigned long cl,
 	struct multiq_sched_data *q = qdisc_priv(sch);
 
 	tcm->tcm_handle |= TC_H_MIN(cl);
-	tcm->tcm_info = q->queues[cl-1]->handle;
+	tcm->tcm_info = q->queues[cl - 1]->handle;
 	return 0;
 }
 
@@ -378,7 +378,7 @@ static void multiq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
 			arg->count++;
 			continue;
 		}
-		if (arg->fn(sch, band+1, arg) < 0) {
+		if (arg->fn(sch, band + 1, arg) < 0) {
 			arg->stop = 1;
 			break;
 		}
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 6a3006b..edbbf7a 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -19,12 +19,13 @@
 #include <linux/kernel.h>
 #include <linux/errno.h>
 #include <linux/skbuff.h>
+#include <linux/vmalloc.h>
 #include <linux/rtnetlink.h>
 
 #include <net/netlink.h>
 #include <net/pkt_sched.h>
 
-#define VERSION "1.2"
+#define VERSION "1.3"
 
 /*	Network Emulation Queuing algorithm.
 	====================================
@@ -47,6 +48,20 @@
 	 layering other disciplines.  It does not need to do bandwidth
 	 control either since that can be handled by using token
 	 bucket or other rate control.
+
+     Correlated Loss Generator models
+
+	Added generation of correlated loss according to the
+	"Gilbert-Elliot" model, a 4-state markov model.
+
+	References:
+	[1] NetemCLG Home http://netgroup.uniroma2.it/NetemCLG
+	[2] S. Salsano, F. Ludovici, A. Ordine, "Definition of a general
+	and intuitive loss model for packet networks and its implementation
+	in the Netem module in the Linux kernel", available in [1]
+
+	Authors: Stefano Salsano <stefano.salsano at uniroma2.it
+		 Fabio Ludovici <fabio.ludovici at yahoo.it>
 */
 
 struct netem_sched_data {
@@ -73,6 +88,26 @@ struct netem_sched_data {
 		u32  size;
 		s16 table[0];
 	} *delay_dist;
+
+	enum  {
+		CLG_RANDOM,
+		CLG_4_STATES,
+		CLG_GILB_ELL,
+	} loss_model;
+
+	/* Correlated Loss Generation models */
+	struct clgstate {
+		/* state of the Markov chain */
+		u8 state;
+
+		/* 4-states and Gilbert-Elliot models */
+		u32 a1;	/* p13 for 4-states or p for GE */
+		u32 a2;	/* p31 for 4-states or r for GE */
+		u32 a3;	/* p32 for 4-states or h for GE */
+		u32 a4;	/* p14 for 4-states or 1-k for GE */
+		u32 a5; /* p23 used only in 4-states */
+	} clg;
+
 };
 
 /* Time stamp put into socket buffer control block */
@@ -115,6 +150,122 @@ static u32 get_crandom(struct crndstate *state)
 	return answer;
 }
 
+/* loss_4state - 4-state model loss generator
+ * Generates losses according to the 4-state Markov chain adopted in
+ * the GI (General and Intuitive) loss model.
+ */
+static bool loss_4state(struct netem_sched_data *q)
+{
+	struct clgstate *clg = &q->clg;
+	u32 rnd = net_random();
+
+	/*
+	 * Makes a comparision between rnd and the transition
+	 * probabilities outgoing from the current state, then decides the
+	 * next state and if the next packet has to be transmitted or lost.
+	 * The four states correspond to:
+	 *   1 => successfully transmitted packets within a gap period
+	 *   4 => isolated losses within a gap period
+	 *   3 => lost packets within a burst period
+	 *   2 => successfully transmitted packets within a burst period
+	 */
+	switch (clg->state) {
+	case 1:
+		if (rnd < clg->a4) {
+			clg->state = 4;
+			return true;
+		} else if (clg->a4 < rnd && rnd < clg->a1) {
+			clg->state = 3;
+			return true;
+		} else if (clg->a1 < rnd)
+			clg->state = 1;
+
+		break;
+	case 2:
+		if (rnd < clg->a5) {
+			clg->state = 3;
+			return true;
+		} else
+			clg->state = 2;
+
+		break;
+	case 3:
+		if (rnd < clg->a3)
+			clg->state = 2;
+		else if (clg->a3 < rnd && rnd < clg->a2 + clg->a3) {
+			clg->state = 1;
+			return true;
+		} else if (clg->a2 + clg->a3 < rnd) {
+			clg->state = 3;
+			return true;
+		}
+		break;
+	case 4:
+		clg->state = 1;
+		break;
+	}
+
+	return false;
+}
+
+/* loss_gilb_ell - Gilbert-Elliot model loss generator
+ * Generates losses according to the Gilbert-Elliot loss model or
+ * its special cases  (Gilbert or Simple Gilbert)
+ *
+ * Makes a comparision between random number and the transition
+ * probabilities outgoing from the current state, then decides the
+ * next state. A second random number is extracted and the comparision
+ * with the loss probability of the current state decides if the next
+ * packet will be transmitted or lost.
+ */
+static bool loss_gilb_ell(struct netem_sched_data *q)
+{
+	struct clgstate *clg = &q->clg;
+
+	switch (clg->state) {
+	case 1:
+		if (net_random() < clg->a1)
+			clg->state = 2;
+		if (net_random() < clg->a4)
+			return true;
+	case 2:
+		if (net_random() < clg->a2)
+			clg->state = 1;
+		if (clg->a3 > net_random())
+			return true;
+	}
+
+	return false;
+}
+
+static bool loss_event(struct netem_sched_data *q)
+{
+	switch (q->loss_model) {
+	case CLG_RANDOM:
+		/* Random packet drop 0 => none, ~0 => all */
+		return q->loss && q->loss >= get_crandom(&q->loss_cor);
+
+	case CLG_4_STATES:
+		/* 4state loss model algorithm (used also for GI model)
+		* Extracts a value from the markov 4 state loss generator,
+		* if it is 1 drops a packet and if needed writes the event in
+		* the kernel logs
+		*/
+		return loss_4state(q);
+
+	case CLG_GILB_ELL:
+		/* Gilbert-Elliot loss model algorithm
+		* Extracts a value from the Gilbert-Elliot loss generator,
+		* if it is 1 drops a packet and if needed writes the event in
+		* the kernel logs
+		*/
+		return loss_gilb_ell(q);
+	}
+
+	return false;	/* not reached */
+}
+
+
 /* tabledist - return a pseudo-randomly distributed value with mean mu and
  * std deviation sigma.  Uses table lookup to approximate the desired
  * distribution, and a uniformly-distributed pseudo-random source.
@@ -161,14 +312,12 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	int ret;
 	int count = 1;
 
-	pr_debug("netem_enqueue skb=%p\n", skb);
-
 	/* Random duplication */
 	if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor))
 		++count;
 
-	/* Random packet drop 0 => none, ~0 => all */
-	if (q->loss && q->loss >= get_crandom(&q->loss_cor))
+	/* Drop packet? */
+	if (loss_event(q))
 		--count;
 
 	if (count == 0) {
@@ -211,8 +360,8 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	}
 
 	cb = netem_skb_cb(skb);
-	if (q->gap == 0 || 		/* not doing reordering */
-	    q->counter < q->gap || 	/* inside last reordering gap */
+	if (q->gap == 0 ||		/* not doing reordering */
+	    q->counter < q->gap ||	/* inside last reordering gap */
 	    q->reorder < get_crandom(&q->reorder_cor)) {
 		psched_time_t now;
 		psched_tdiff_t delay;
@@ -238,17 +387,18 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		ret = NET_XMIT_SUCCESS;
 	}
 
-	if (likely(ret == NET_XMIT_SUCCESS)) {
-		sch->q.qlen++;
-	} else if (net_xmit_drop_count(ret)) {
-		sch->qstats.drops++;
+	if (ret != NET_XMIT_SUCCESS) {
+		if (net_xmit_drop_count(ret)) {
+			sch->qstats.drops++;
+			return ret;
+		}
 	}
 
-	pr_debug("netem: enqueue ret %d\n", ret);
-	return ret;
+	sch->q.qlen++;
+	return NET_XMIT_SUCCESS;
 }
 
-static unsigned int netem_drop(struct Qdisc* sch)
+static unsigned int netem_drop(struct Qdisc *sch)
 {
 	struct netem_sched_data *q = qdisc_priv(sch);
 	unsigned int len = 0;
@@ -265,7 +415,7 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch)
 	struct netem_sched_data *q = qdisc_priv(sch);
 	struct sk_buff *skb;
 
-	if (sch->flags & TCQ_F_THROTTLED)
+	if (qdisc_is_throttled(sch))
 		return NULL;
 
 	skb = q->qdisc->ops->peek(q->qdisc);
@@ -287,9 +437,10 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch)
 			if (G_TC_FROM(skb->tc_verd) & AT_INGRESS)
 				skb->tstamp.tv64 = 0;
 #endif
-			pr_debug("netem_dequeue: return skb=%p\n", skb);
-			qdisc_bstats_update(sch, skb);
+
 			sch->q.qlen--;
+			qdisc_unthrottled(sch);
+			qdisc_bstats_update(sch, skb);
 			return skb;
 		}
 
@@ -308,6 +459,16 @@ static void netem_reset(struct Qdisc *sch)
 	qdisc_watchdog_cancel(&q->watchdog);
 }
 
+static void dist_free(struct disttable *d)
+{
+	if (d) {
+		if (is_vmalloc_addr(d))
+			vfree(d);
+		else
+			kfree(d);
+	}
+}
+
 /*
  * Distribution data is a variable size payload containing
  * signed 16 bit values.
@@ -315,16 +476,20 @@ static void netem_reset(struct Qdisc *sch)
 static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr)
 {
 	struct netem_sched_data *q = qdisc_priv(sch);
-	unsigned long n = nla_len(attr)/sizeof(__s16);
+	size_t n = nla_len(attr)/sizeof(__s16);
 	const __s16 *data = nla_data(attr);
 	spinlock_t *root_lock;
 	struct disttable *d;
 	int i;
+	size_t s;
 
-	if (n > 65536)
+	if (n > NETEM_DIST_MAX)
 		return -EINVAL;
 
-	d = kmalloc(sizeof(*d) + n*sizeof(d->table[0]), GFP_KERNEL);
+	s = sizeof(struct disttable) + n * sizeof(s16);
+	d = kmalloc(s, GFP_KERNEL);
+	if (!d)
+		d = vmalloc(s);
 	if (!d)
 		return -ENOMEM;
 
@@ -335,7 +500,7 @@ static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr)
 	root_lock = qdisc_root_sleeping_lock(sch);
 
 	spin_lock_bh(root_lock);
-	kfree(q->delay_dist);
+	dist_free(q->delay_dist);
 	q->delay_dist = d;
 	spin_unlock_bh(root_lock);
 	return 0;
@@ -369,10 +534,66 @@ static void get_corrupt(struct Qdisc *sch, const struct nlattr *attr)
 	init_crandom(&q->corrupt_cor, r->correlation);
 }
 
+static int get_loss_clg(struct Qdisc *sch, const struct nlattr *attr)
+{
+	struct netem_sched_data *q = qdisc_priv(sch);
+	const struct nlattr *la;
+	int rem;
+
+	nla_for_each_nested(la, attr, rem) {
+		u16 type = nla_type(la);
+
+		switch(type) {
+		case NETEM_LOSS_GI: {
+			const struct tc_netem_gimodel *gi = nla_data(la);
+
+			if (nla_len(la) != sizeof(struct tc_netem_gimodel)) {
+				pr_info("netem: incorrect gi model size\n");
+				return -EINVAL;
+			}
+
+			q->loss_model = CLG_4_STATES;
+
+			q->clg.state = 1;
+			q->clg.a1 = gi->p13;
+			q->clg.a2 = gi->p31;
+			q->clg.a3 = gi->p32;
+			q->clg.a4 = gi->p14;
+			q->clg.a5 = gi->p23;
+			break;
+		}
+
+		case NETEM_LOSS_GE: {
+			const struct tc_netem_gemodel *ge = nla_data(la);
+
+			if (nla_len(la) != sizeof(struct tc_netem_gemodel)) {
+				pr_info("netem: incorrect gi model size\n");
+				return -EINVAL;
+			}
+
+			q->loss_model = CLG_GILB_ELL;
+			q->clg.state = 1;
+			q->clg.a1 = ge->p;
+			q->clg.a2 = ge->r;
+			q->clg.a3 = ge->h;
+			q->clg.a4 = ge->k1;
+			break;
+		}
+
+		default:
+			pr_info("netem: unknown loss type %u\n", type);
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
 static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = {
 	[TCA_NETEM_CORR]	= { .len = sizeof(struct tc_netem_corr) },
 	[TCA_NETEM_REORDER]	= { .len = sizeof(struct tc_netem_reorder) },
 	[TCA_NETEM_CORRUPT]	= { .len = sizeof(struct tc_netem_corrupt) },
+	[TCA_NETEM_LOSS]	= { .type = NLA_NESTED },
 };
 
 static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
@@ -380,11 +601,15 @@ static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
 {
 	int nested_len = nla_len(nla) - NLA_ALIGN(len);
 
-	if (nested_len < 0)
+	if (nested_len < 0) {
+		pr_info("netem: invalid attributes len %d\n", nested_len);
 		return -EINVAL;
+	}
+
 	if (nested_len >= nla_attr_size(0))
 		return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len),
 				 nested_len, policy);
+
 	memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
 	return 0;
 }
@@ -407,7 +632,7 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt)
 
 	ret = fifo_set_limit(q->qdisc, qopt->limit);
 	if (ret) {
-		pr_debug("netem: can't set fifo limit\n");
+		pr_info("netem: can't set fifo limit\n");
 		return ret;
 	}
 
@@ -440,7 +665,11 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt)
 	if (tb[TCA_NETEM_CORRUPT])
 		get_corrupt(sch, tb[TCA_NETEM_CORRUPT]);
 
-	return 0;
+	q->loss_model = CLG_RANDOM;
+	if (tb[TCA_NETEM_LOSS])
+		ret = get_loss_clg(sch, tb[TCA_NETEM_LOSS]);
+
+	return ret;
 }
 
 /*
@@ -535,16 +764,17 @@ static int netem_init(struct Qdisc *sch, struct nlattr *opt)
 
 	qdisc_watchdog_init(&q->watchdog, sch);
 
+	q->loss_model = CLG_RANDOM;
 	q->qdisc = qdisc_create_dflt(sch->dev_queue, &tfifo_qdisc_ops,
 				     TC_H_MAKE(sch->handle, 1));
 	if (!q->qdisc) {
-		pr_debug("netem: qdisc create failed\n");
+		pr_notice("netem: qdisc create tfifo qdisc failed\n");
 		return -ENOMEM;
 	}
 
 	ret = netem_change(sch, opt);
 	if (ret) {
-		pr_debug("netem: change failed\n");
+		pr_info("netem: change failed\n");
 		qdisc_destroy(q->qdisc);
 	}
 	return ret;
@@ -556,14 +786,61 @@ static void netem_destroy(struct Qdisc *sch)
 
 	qdisc_watchdog_cancel(&q->watchdog);
 	qdisc_destroy(q->qdisc);
-	kfree(q->delay_dist);
+	dist_free(q->delay_dist);
+}
+
+static int dump_loss_model(const struct netem_sched_data *q,
+			   struct sk_buff *skb)
+{
+	struct nlattr *nest;
+
+	nest = nla_nest_start(skb, TCA_NETEM_LOSS);
+	if (nest == NULL)
+		goto nla_put_failure;
+
+	switch (q->loss_model) {
+	case CLG_RANDOM:
+		/* legacy loss model */
+		nla_nest_cancel(skb, nest);
+		return 0;	/* no data */
+
+	case CLG_4_STATES: {
+		struct tc_netem_gimodel gi = {
+			.p13 = q->clg.a1,
+			.p31 = q->clg.a2,
+			.p32 = q->clg.a3,
+			.p14 = q->clg.a4,
+			.p23 = q->clg.a5,
+		};
+
+		NLA_PUT(skb, NETEM_LOSS_GI, sizeof(gi), &gi);
+		break;
+	}
+	case CLG_GILB_ELL: {
+		struct tc_netem_gemodel ge = {
+			.p = q->clg.a1,
+			.r = q->clg.a2,
+			.h = q->clg.a3,
+			.k1 = q->clg.a4,
+		};
+
+		NLA_PUT(skb, NETEM_LOSS_GE, sizeof(ge), &ge);
+		break;
+	}
+	}
+
+	nla_nest_end(skb, nest);
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(skb, nest);
+	return -1;
 }
 
 static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
 {
 	const struct netem_sched_data *q = qdisc_priv(sch);
-	unsigned char *b = skb_tail_pointer(skb);
-	struct nlattr *nla = (struct nlattr *) b;
+	struct nlattr *nla = (struct nlattr *) skb_tail_pointer(skb);
 	struct tc_netem_qopt qopt;
 	struct tc_netem_corr cor;
 	struct tc_netem_reorder reorder;
@@ -590,17 +867,87 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
 	corrupt.correlation = q->corrupt_cor.rho;
 	NLA_PUT(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt);
 
-	nla->nla_len = skb_tail_pointer(skb) - b;
+	if (dump_loss_model(q, skb) != 0)
+		goto nla_put_failure;
 
-	return skb->len;
+	return nla_nest_end(skb, nla);
 
 nla_put_failure:
-	nlmsg_trim(skb, b);
+	nlmsg_trim(skb, nla);
 	return -1;
 }
 
+static int netem_dump_class(struct Qdisc *sch, unsigned long cl,
+			  struct sk_buff *skb, struct tcmsg *tcm)
+{
+	struct netem_sched_data *q = qdisc_priv(sch);
+
+	if (cl != 1) 	/* only one class */
+		return -ENOENT;
+
+	tcm->tcm_handle |= TC_H_MIN(1);
+	tcm->tcm_info = q->qdisc->handle;
+
+	return 0;
+}
+
+static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
+		     struct Qdisc **old)
+{
+	struct netem_sched_data *q = qdisc_priv(sch);
+
+	if (new == NULL)
+		new = &noop_qdisc;
+
+	sch_tree_lock(sch);
+	*old = q->qdisc;
+	q->qdisc = new;
+	qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
+	qdisc_reset(*old);
+	sch_tree_unlock(sch);
+
+	return 0;
+}
+
+static struct Qdisc *netem_leaf(struct Qdisc *sch, unsigned long arg)
+{
+	struct netem_sched_data *q = qdisc_priv(sch);
+	return q->qdisc;
+}
+
+static unsigned long netem_get(struct Qdisc *sch, u32 classid)
+{
+	return 1;
+}
+
+static void netem_put(struct Qdisc *sch, unsigned long arg)
+{
+}
+
+static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker)
+{
+	if (!walker->stop) {
+		if (walker->count >= walker->skip)
+			if (walker->fn(sch, 1, walker) < 0) {
+				walker->stop = 1;
+				return;
+			}
+		walker->count++;
+	}
+}
+
+static const struct Qdisc_class_ops netem_class_ops = {
+	.graft		=	netem_graft,
+	.leaf		=	netem_leaf,
+	.get		=	netem_get,
+	.put		=	netem_put,
+	.walk		=	netem_walk,
+	.dump		=	netem_dump_class,
+};
+
 static struct Qdisc_ops netem_qdisc_ops __read_mostly = {
 	.id		=	"netem",
+	.cl_ops		=	&netem_class_ops,
 	.priv_size	=	sizeof(struct netem_sched_data),
 	.enqueue	=	netem_enqueue,
 	.dequeue	=	netem_dequeue,
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index fbd710d..2a318f2 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -22,8 +22,7 @@
 #include <net/pkt_sched.h>
 
 
-struct prio_sched_data
-{
+struct prio_sched_data {
 	int bands;
 	struct tcf_proto *filter_list;
 	u8  prio2band[TC_PRIO_MAX+1];
@@ -54,7 +53,7 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 		if (!q->filter_list || err < 0) {
 			if (TC_H_MAJ(band))
 				band = 0;
-			return q->queues[q->prio2band[band&TC_PRIO_MAX]];
+			return q->queues[q->prio2band[band & TC_PRIO_MAX]];
 		}
 		band = res.classid;
 	}
@@ -106,7 +105,7 @@ static struct sk_buff *prio_peek(struct Qdisc *sch)
 	return NULL;
 }
 
-static struct sk_buff *prio_dequeue(struct Qdisc* sch)
+static struct sk_buff *prio_dequeue(struct Qdisc *sch)
 {
 	struct prio_sched_data *q = qdisc_priv(sch);
 	int prio;
@@ -124,7 +123,7 @@ static struct sk_buff *prio_dequeue(struct Qdisc* sch)
 
 }
 
-static unsigned int prio_drop(struct Qdisc* sch)
+static unsigned int prio_drop(struct Qdisc *sch)
 {
 	struct prio_sched_data *q = qdisc_priv(sch);
 	int prio;
@@ -143,24 +142,24 @@ static unsigned int prio_drop(struct Qdisc* sch)
 
 
 static void
-prio_reset(struct Qdisc* sch)
+prio_reset(struct Qdisc *sch)
 {
 	int prio;
 	struct prio_sched_data *q = qdisc_priv(sch);
 
-	for (prio=0; prio<q->bands; prio++)
+	for (prio = 0; prio < q->bands; prio++)
 		qdisc_reset(q->queues[prio]);
 	sch->q.qlen = 0;
 }
 
 static void
-prio_destroy(struct Qdisc* sch)
+prio_destroy(struct Qdisc *sch)
 {
 	int prio;
 	struct prio_sched_data *q = qdisc_priv(sch);
 
 	tcf_destroy_chain(&q->filter_list);
-	for (prio=0; prio<q->bands; prio++)
+	for (prio = 0; prio < q->bands; prio++)
 		qdisc_destroy(q->queues[prio]);
 }
 
@@ -177,7 +176,7 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt)
 	if (qopt->bands > TCQ_PRIO_BANDS || qopt->bands < 2)
 		return -EINVAL;
 
-	for (i=0; i<=TC_PRIO_MAX; i++) {
+	for (i = 0; i <= TC_PRIO_MAX; i++) {
 		if (qopt->priomap[i] >= qopt->bands)
 			return -EINVAL;
 	}
@@ -186,7 +185,7 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt)
 	q->bands = qopt->bands;
 	memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1);
 
-	for (i=q->bands; i<TCQ_PRIO_BANDS; i++) {
+	for (i = q->bands; i < TCQ_PRIO_BANDS; i++) {
 		struct Qdisc *child = q->queues[i];
 		q->queues[i] = &noop_qdisc;
 		if (child != &noop_qdisc) {
@@ -196,9 +195,10 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt)
 	}
 	sch_tree_unlock(sch);
 
-	for (i=0; i<q->bands; i++) {
+	for (i = 0; i < q->bands; i++) {
 		if (q->queues[i] == &noop_qdisc) {
 			struct Qdisc *child, *old;
+
 			child = qdisc_create_dflt(sch->dev_queue,
 						  &pfifo_qdisc_ops,
 						  TC_H_MAKE(sch->handle, i + 1));
@@ -224,7 +224,7 @@ static int prio_init(struct Qdisc *sch, struct nlattr *opt)
 	struct prio_sched_data *q = qdisc_priv(sch);
 	int i;
 
-	for (i=0; i<TCQ_PRIO_BANDS; i++)
+	for (i = 0; i < TCQ_PRIO_BANDS; i++)
 		q->queues[i] = &noop_qdisc;
 
 	if (opt == NULL) {
@@ -232,7 +232,7 @@ static int prio_init(struct Qdisc *sch, struct nlattr *opt)
 	} else {
 		int err;
 
-		if ((err= prio_tune(sch, opt)) != 0)
+		if ((err = prio_tune(sch, opt)) != 0)
 			return err;
 	}
 	return 0;
@@ -245,7 +245,7 @@ static int prio_dump(struct Qdisc *sch, struct sk_buff *skb)
 	struct tc_prio_qopt opt;
 
 	opt.bands = q->bands;
-	memcpy(&opt.priomap, q->prio2band, TC_PRIO_MAX+1);
+	memcpy(&opt.priomap, q->prio2band, TC_PRIO_MAX + 1);
 
 	NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
 
@@ -342,7 +342,7 @@ static void prio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
 			arg->count++;
 			continue;
 		}
-		if (arg->fn(sch, prio+1, arg) < 0) {
+		if (arg->fn(sch, prio + 1, arg) < 0) {
 			arg->stop = 1;
 			break;
 		}
@@ -350,7 +350,7 @@ static void prio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
 	}
 }
 
-static struct tcf_proto ** prio_find_tcf(struct Qdisc *sch, unsigned long cl)
+static struct tcf_proto **prio_find_tcf(struct Qdisc *sch, unsigned long cl)
 {
 	struct prio_sched_data *q = qdisc_priv(sch);
 
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 9f98dbd..6649463 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -36,8 +36,7 @@
 	if RED works correctly.
  */
 
-struct red_sched_data
-{
+struct red_sched_data {
 	u32			limit;		/* HARD maximal queue length */
 	unsigned char		flags;
 	struct red_parms	parms;
@@ -55,7 +54,7 @@ static inline int red_use_harddrop(struct red_sched_data *q)
 	return q->flags & TC_RED_HARDDROP;
 }
 
-static int red_enqueue(struct sk_buff *skb, struct Qdisc* sch)
+static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 {
 	struct red_sched_data *q = qdisc_priv(sch);
 	struct Qdisc *child = q->qdisc;
@@ -67,29 +66,29 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc* sch)
 		red_end_of_idle_period(&q->parms);
 
 	switch (red_action(&q->parms, q->parms.qavg)) {
-		case RED_DONT_MARK:
-			break;
-
-		case RED_PROB_MARK:
-			sch->qstats.overlimits++;
-			if (!red_use_ecn(q) || !INET_ECN_set_ce(skb)) {
-				q->stats.prob_drop++;
-				goto congestion_drop;
-			}
-
-			q->stats.prob_mark++;
-			break;
-
-		case RED_HARD_MARK:
-			sch->qstats.overlimits++;
-			if (red_use_harddrop(q) || !red_use_ecn(q) ||
-			    !INET_ECN_set_ce(skb)) {
-				q->stats.forced_drop++;
-				goto congestion_drop;
-			}
-
-			q->stats.forced_mark++;
-			break;
+	case RED_DONT_MARK:
+		break;
+
+	case RED_PROB_MARK:
+		sch->qstats.overlimits++;
+		if (!red_use_ecn(q) || !INET_ECN_set_ce(skb)) {
+			q->stats.prob_drop++;
+			goto congestion_drop;
+		}
+
+		q->stats.prob_mark++;
+		break;
+
+	case RED_HARD_MARK:
+		sch->qstats.overlimits++;
+		if (red_use_harddrop(q) || !red_use_ecn(q) ||
+		    !INET_ECN_set_ce(skb)) {
+			q->stats.forced_drop++;
+			goto congestion_drop;
+		}
+
+		q->stats.forced_mark++;
+		break;
 	}
 
 	ret = qdisc_enqueue(skb, child);
@@ -106,7 +105,7 @@ congestion_drop:
 	return NET_XMIT_CN;
 }
 
-static struct sk_buff * red_dequeue(struct Qdisc* sch)
+static struct sk_buff *red_dequeue(struct Qdisc *sch)
 {
 	struct sk_buff *skb;
 	struct red_sched_data *q = qdisc_priv(sch);
@@ -123,7 +122,7 @@ static struct sk_buff * red_dequeue(struct Qdisc* sch)
 	return skb;
 }
 
-static struct sk_buff * red_peek(struct Qdisc* sch)
+static struct sk_buff *red_peek(struct Qdisc *sch)
 {
 	struct red_sched_data *q = qdisc_priv(sch);
 	struct Qdisc *child = q->qdisc;
@@ -131,7 +130,7 @@ static struct sk_buff * red_peek(struct Qdisc* sch)
 	return child->ops->peek(child);
 }
 
-static unsigned int red_drop(struct Qdisc* sch)
+static unsigned int red_drop(struct Qdisc *sch)
 {
 	struct red_sched_data *q = qdisc_priv(sch);
 	struct Qdisc *child = q->qdisc;
@@ -150,7 +149,7 @@ static unsigned int red_drop(struct Qdisc* sch)
 	return 0;
 }
 
-static void red_reset(struct Qdisc* sch)
+static void red_reset(struct Qdisc *sch)
 {
 	struct red_sched_data *q = qdisc_priv(sch);
 
@@ -217,7 +216,7 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt)
 	return 0;
 }
 
-static int red_init(struct Qdisc* sch, struct nlattr *opt)
+static int red_init(struct Qdisc *sch, struct nlattr *opt)
 {
 	struct red_sched_data *q = qdisc_priv(sch);
 
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
new file mode 100644
index 0000000..0a833d0
--- /dev/null
+++ b/net/sched/sch_sfb.c
@@ -0,0 +1,709 @@
+/*
+ * net/sched/sch_sfb.c	  Stochastic Fair Blue
+ *
+ * Copyright (c) 2008-2011 Juliusz Chroboczek <jch@pps.jussieu.fr>
+ * Copyright (c) 2011 Eric Dumazet <eric.dumazet@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * W. Feng, D. Kandlur, D. Saha, K. Shin. Blue:
+ * A New Class of Active Queue Management Algorithms.
+ * U. Michigan CSE-TR-387-99, April 1999.
+ *
+ * http://www.thefengs.com/wuchang/blue/CSE-TR-387-99.pdf
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <linux/random.h>
+#include <linux/jhash.h>
+#include <net/ip.h>
+#include <net/pkt_sched.h>
+#include <net/inet_ecn.h>
+
+/*
+ * SFB uses two B[l][n] : L x N arrays of bins (L levels, N bins per level)
+ * This implementation uses L = 8 and N = 16
+ * This permits us to split one 32bit hash (provided per packet by rxhash or
+ * external classifier) into 8 subhashes of 4 bits.
+ */
+#define SFB_BUCKET_SHIFT 4
+#define SFB_NUMBUCKETS	(1 << SFB_BUCKET_SHIFT) /* N bins per Level */
+#define SFB_BUCKET_MASK (SFB_NUMBUCKETS - 1)
+#define SFB_LEVELS	(32 / SFB_BUCKET_SHIFT) /* L */
+
+/* SFB algo uses a virtual queue, named "bin" */
+struct sfb_bucket {
+	u16		qlen; /* length of virtual queue */
+	u16		p_mark; /* marking probability */
+};
+
+/* We use a double buffering right before hash change
+ * (Section 4.4 of SFB reference : moving hash functions)
+ */
+struct sfb_bins {
+	u32		  perturbation; /* jhash perturbation */
+	struct sfb_bucket bins[SFB_LEVELS][SFB_NUMBUCKETS];
+};
+
+struct sfb_sched_data {
+	struct Qdisc	*qdisc;
+	struct tcf_proto *filter_list;
+	unsigned long	rehash_interval;
+	unsigned long	warmup_time;	/* double buffering warmup time in jiffies */
+	u32		max;
+	u32		bin_size;	/* maximum queue length per bin */
+	u32		increment;	/* d1 */
+	u32		decrement;	/* d2 */
+	u32		limit;		/* HARD maximal queue length */
+	u32		penalty_rate;
+	u32		penalty_burst;
+	u32		tokens_avail;
+	unsigned long	rehash_time;
+	unsigned long	token_time;
+
+	u8		slot;		/* current active bins (0 or 1) */
+	bool		double_buffering;
+	struct sfb_bins bins[2];
+
+	struct {
+		u32	earlydrop;
+		u32	penaltydrop;
+		u32	bucketdrop;
+		u32	queuedrop;
+		u32	childdrop;	/* drops in child qdisc */
+		u32	marked;		/* ECN mark */
+	} stats;
+};
+
+/*
+ * Each queued skb might be hashed on one or two bins
+ * We store in skb_cb the two hash values.
+ * (A zero value means double buffering was not used)
+ */
+struct sfb_skb_cb {
+	u32 hashes[2];
+};
+
+static inline struct sfb_skb_cb *sfb_skb_cb(const struct sk_buff *skb)
+{
+	BUILD_BUG_ON(sizeof(skb->cb) <
+		sizeof(struct qdisc_skb_cb) + sizeof(struct sfb_skb_cb));
+	return (struct sfb_skb_cb *)qdisc_skb_cb(skb)->data;
+}
+
+/*
+ * If using 'internal' SFB flow classifier, hash comes from skb rxhash
+ * If using external classifier, hash comes from the classid.
+ */
+static u32 sfb_hash(const struct sk_buff *skb, u32 slot)
+{
+	return sfb_skb_cb(skb)->hashes[slot];
+}
+
+/* Probabilities are coded as Q0.16 fixed-point values,
+ * with 0xFFFF representing 65535/65536 (almost 1.0)
+ * Addition and subtraction are saturating in [0, 65535]
+ */
+static u32 prob_plus(u32 p1, u32 p2)
+{
+	u32 res = p1 + p2;
+
+	return min_t(u32, res, SFB_MAX_PROB);
+}
+
+static u32 prob_minus(u32 p1, u32 p2)
+{
+	return p1 > p2 ? p1 - p2 : 0;
+}
+
+static void increment_one_qlen(u32 sfbhash, u32 slot, struct sfb_sched_data *q)
+{
+	int i;
+	struct sfb_bucket *b = &q->bins[slot].bins[0][0];
+
+	for (i = 0; i < SFB_LEVELS; i++) {
+		u32 hash = sfbhash & SFB_BUCKET_MASK;
+
+		sfbhash >>= SFB_BUCKET_SHIFT;
+		if (b[hash].qlen < 0xFFFF)
+			b[hash].qlen++;
+		b += SFB_NUMBUCKETS; /* next level */
+	}
+}
+
+static void increment_qlen(const struct sk_buff *skb, struct sfb_sched_data *q)
+{
+	u32 sfbhash;
+
+	sfbhash = sfb_hash(skb, 0);
+	if (sfbhash)
+		increment_one_qlen(sfbhash, 0, q);
+
+	sfbhash = sfb_hash(skb, 1);
+	if (sfbhash)
+		increment_one_qlen(sfbhash, 1, q);
+}
+
+static void decrement_one_qlen(u32 sfbhash, u32 slot,
+			       struct sfb_sched_data *q)
+{
+	int i;
+	struct sfb_bucket *b = &q->bins[slot].bins[0][0];
+
+	for (i = 0; i < SFB_LEVELS; i++) {
+		u32 hash = sfbhash & SFB_BUCKET_MASK;
+
+		sfbhash >>= SFB_BUCKET_SHIFT;
+		if (b[hash].qlen > 0)
+			b[hash].qlen--;
+		b += SFB_NUMBUCKETS; /* next level */
+	}
+}
+
+static void decrement_qlen(const struct sk_buff *skb, struct sfb_sched_data *q)
+{
+	u32 sfbhash;
+
+	sfbhash = sfb_hash(skb, 0);
+	if (sfbhash)
+		decrement_one_qlen(sfbhash, 0, q);
+
+	sfbhash = sfb_hash(skb, 1);
+	if (sfbhash)
+		decrement_one_qlen(sfbhash, 1, q);
+}
+
+static void decrement_prob(struct sfb_bucket *b, struct sfb_sched_data *q)
+{
+	b->p_mark = prob_minus(b->p_mark, q->decrement);
+}
+
+static void increment_prob(struct sfb_bucket *b, struct sfb_sched_data *q)
+{
+	b->p_mark = prob_plus(b->p_mark, q->increment);
+}
+
+static void sfb_zero_all_buckets(struct sfb_sched_data *q)
+{
+	memset(&q->bins, 0, sizeof(q->bins));
+}
+
+/*
+ * compute max qlen, max p_mark, and avg p_mark
+ */
+static u32 sfb_compute_qlen(u32 *prob_r, u32 *avgpm_r, const struct sfb_sched_data *q)
+{
+	int i;
+	u32 qlen = 0, prob = 0, totalpm = 0;
+	const struct sfb_bucket *b = &q->bins[q->slot].bins[0][0];
+
+	for (i = 0; i < SFB_LEVELS * SFB_NUMBUCKETS; i++) {
+		if (qlen < b->qlen)
+			qlen = b->qlen;
+		totalpm += b->p_mark;
+		if (prob < b->p_mark)
+			prob = b->p_mark;
+		b++;
+	}
+	*prob_r = prob;
+	*avgpm_r = totalpm / (SFB_LEVELS * SFB_NUMBUCKETS);
+	return qlen;
+}
+
+
+static void sfb_init_perturbation(u32 slot, struct sfb_sched_data *q)
+{
+	q->bins[slot].perturbation = net_random();
+}
+
+static void sfb_swap_slot(struct sfb_sched_data *q)
+{
+	sfb_init_perturbation(q->slot, q);
+	q->slot ^= 1;
+	q->double_buffering = false;
+}
+
+/* Non elastic flows are allowed to use part of the bandwidth, expressed
+ * in "penalty_rate" packets per second, with "penalty_burst" burst
+ */
+static bool sfb_rate_limit(struct sk_buff *skb, struct sfb_sched_data *q)
+{
+	if (q->penalty_rate == 0 || q->penalty_burst == 0)
+		return true;
+
+	if (q->tokens_avail < 1) {
+		unsigned long age = min(10UL * HZ, jiffies - q->token_time);
+
+		q->tokens_avail = (age * q->penalty_rate) / HZ;
+		if (q->tokens_avail > q->penalty_burst)
+			q->tokens_avail = q->penalty_burst;
+		q->token_time = jiffies;
+		if (q->tokens_avail < 1)
+			return true;
+	}
+
+	q->tokens_avail--;
+	return false;
+}
+
+static bool sfb_classify(struct sk_buff *skb, struct sfb_sched_data *q,
+			 int *qerr, u32 *salt)
+{
+	struct tcf_result res;
+	int result;
+
+	result = tc_classify(skb, q->filter_list, &res);
+	if (result >= 0) {
+#ifdef CONFIG_NET_CLS_ACT
+		switch (result) {
+		case TC_ACT_STOLEN:
+		case TC_ACT_QUEUED:
+			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
+		case TC_ACT_SHOT:
+			return false;
+		}
+#endif
+		*salt = TC_H_MIN(res.classid);
+		return true;
+	}
+	return false;
+}
+
+static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+{
+
+	struct sfb_sched_data *q = qdisc_priv(sch);
+	struct Qdisc *child = q->qdisc;
+	int i;
+	u32 p_min = ~0;
+	u32 minqlen = ~0;
+	u32 r, slot, salt, sfbhash;
+	int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
+
+	if (q->rehash_interval > 0) {
+		unsigned long limit = q->rehash_time + q->rehash_interval;
+
+		if (unlikely(time_after(jiffies, limit))) {
+			sfb_swap_slot(q);
+			q->rehash_time = jiffies;
+		} else if (unlikely(!q->double_buffering && q->warmup_time > 0 &&
+				    time_after(jiffies, limit - q->warmup_time))) {
+			q->double_buffering = true;
+		}
+	}
+
+	if (q->filter_list) {
+		/* If using external classifiers, get result and record it. */
+		if (!sfb_classify(skb, q, &ret, &salt))
+			goto other_drop;
+	} else {
+		salt = skb_get_rxhash(skb);
+	}
+
+	slot = q->slot;
+
+	sfbhash = jhash_1word(salt, q->bins[slot].perturbation);
+	if (!sfbhash)
+		sfbhash = 1;
+	sfb_skb_cb(skb)->hashes[slot] = sfbhash;
+
+	for (i = 0; i < SFB_LEVELS; i++) {
+		u32 hash = sfbhash & SFB_BUCKET_MASK;
+		struct sfb_bucket *b = &q->bins[slot].bins[i][hash];
+
+		sfbhash >>= SFB_BUCKET_SHIFT;
+		if (b->qlen == 0)
+			decrement_prob(b, q);
+		else if (b->qlen >= q->bin_size)
+			increment_prob(b, q);
+		if (minqlen > b->qlen)
+			minqlen = b->qlen;
+		if (p_min > b->p_mark)
+			p_min = b->p_mark;
+	}
+
+	slot ^= 1;
+	sfb_skb_cb(skb)->hashes[slot] = 0;
+
+	if (unlikely(minqlen >= q->max || sch->q.qlen >= q->limit)) {
+		sch->qstats.overlimits++;
+		if (minqlen >= q->max)
+			q->stats.bucketdrop++;
+		else
+			q->stats.queuedrop++;
+		goto drop;
+	}
+
+	if (unlikely(p_min >= SFB_MAX_PROB)) {
+		/* Inelastic flow */
+		if (q->double_buffering) {
+			sfbhash = jhash_1word(salt, q->bins[slot].perturbation);
+			if (!sfbhash)
+				sfbhash = 1;
+			sfb_skb_cb(skb)->hashes[slot] = sfbhash;
+
+			for (i = 0; i < SFB_LEVELS; i++) {
+				u32 hash = sfbhash & SFB_BUCKET_MASK;
+				struct sfb_bucket *b = &q->bins[slot].bins[i][hash];
+
+				sfbhash >>= SFB_BUCKET_SHIFT;
+				if (b->qlen == 0)
+					decrement_prob(b, q);
+				else if (b->qlen >= q->bin_size)
+					increment_prob(b, q);
+			}
+		}
+		if (sfb_rate_limit(skb, q)) {
+			sch->qstats.overlimits++;
+			q->stats.penaltydrop++;
+			goto drop;
+		}
+		goto enqueue;
+	}
+
+	r = net_random() & SFB_MAX_PROB;
+
+	if (unlikely(r < p_min)) {
+		if (unlikely(p_min > SFB_MAX_PROB / 2)) {
+			/* If we're marking that many packets, then either
+			 * this flow is unresponsive, or we're badly congested.
+			 * In either case, we want to start dropping packets.
+			 */
+			if (r < (p_min - SFB_MAX_PROB / 2) * 2) {
+				q->stats.earlydrop++;
+				goto drop;
+			}
+		}
+		if (INET_ECN_set_ce(skb)) {
+			q->stats.marked++;
+		} else {
+			q->stats.earlydrop++;
+			goto drop;
+		}
+	}
+
+enqueue:
+	ret = qdisc_enqueue(skb, child);
+	if (likely(ret == NET_XMIT_SUCCESS)) {
+		sch->q.qlen++;
+		increment_qlen(skb, q);
+	} else if (net_xmit_drop_count(ret)) {
+		q->stats.childdrop++;
+		sch->qstats.drops++;
+	}
+	return ret;
+
+drop:
+	qdisc_drop(skb, sch);
+	return NET_XMIT_CN;
+other_drop:
+	if (ret & __NET_XMIT_BYPASS)
+		sch->qstats.drops++;
+	kfree_skb(skb);
+	return ret;
+}
+
+static struct sk_buff *sfb_dequeue(struct Qdisc *sch)
+{
+	struct sfb_sched_data *q = qdisc_priv(sch);
+	struct Qdisc *child = q->qdisc;
+	struct sk_buff *skb;
+
+	skb = child->dequeue(q->qdisc);
+
+	if (skb) {
+		qdisc_bstats_update(sch, skb);
+		sch->q.qlen--;
+		decrement_qlen(skb, q);
+	}
+
+	return skb;
+}
+
+static struct sk_buff *sfb_peek(struct Qdisc *sch)
+{
+	struct sfb_sched_data *q = qdisc_priv(sch);
+	struct Qdisc *child = q->qdisc;
+
+	return child->ops->peek(child);
+}
+
+/* No sfb_drop -- impossible since the child doesn't return the dropped skb. */
+
+static void sfb_reset(struct Qdisc *sch)
+{
+	struct sfb_sched_data *q = qdisc_priv(sch);
+
+	qdisc_reset(q->qdisc);
+	sch->q.qlen = 0;
+	q->slot = 0;
+	q->double_buffering = false;
+	sfb_zero_all_buckets(q);
+	sfb_init_perturbation(0, q);
+}
+
+static void sfb_destroy(struct Qdisc *sch)
+{
+	struct sfb_sched_data *q = qdisc_priv(sch);
+
+	tcf_destroy_chain(&q->filter_list);
+	qdisc_destroy(q->qdisc);
+}
+
+static const struct nla_policy sfb_policy[TCA_SFB_MAX + 1] = {
+	[TCA_SFB_PARMS]	= { .len = sizeof(struct tc_sfb_qopt) },
+};
+
+static const struct tc_sfb_qopt sfb_default_ops = {
+	.rehash_interval = 600 * MSEC_PER_SEC,
+	.warmup_time = 60 * MSEC_PER_SEC,
+	.limit = 0,
+	.max = 25,
+	.bin_size = 20,
+	.increment = (SFB_MAX_PROB + 500) / 1000, /* 0.1 % */
+	.decrement = (SFB_MAX_PROB + 3000) / 6000,
+	.penalty_rate = 10,
+	.penalty_burst = 20,
+};
+
+static int sfb_change(struct Qdisc *sch, struct nlattr *opt)
+{
+	struct sfb_sched_data *q = qdisc_priv(sch);
+	struct Qdisc *child;
+	struct nlattr *tb[TCA_SFB_MAX + 1];
+	const struct tc_sfb_qopt *ctl = &sfb_default_ops;
+	u32 limit;
+	int err;
+
+	if (opt) {
+		err = nla_parse_nested(tb, TCA_SFB_MAX, opt, sfb_policy);
+		if (err < 0)
+			return -EINVAL;
+
+		if (tb[TCA_SFB_PARMS] == NULL)
+			return -EINVAL;
+
+		ctl = nla_data(tb[TCA_SFB_PARMS]);
+	}
+
+	limit = ctl->limit;
+	if (limit == 0)
+		limit = max_t(u32, qdisc_dev(sch)->tx_queue_len, 1);
+
+	child = fifo_create_dflt(sch, &pfifo_qdisc_ops, limit);
+	if (IS_ERR(child))
+		return PTR_ERR(child);
+
+	sch_tree_lock(sch);
+
+	qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen);
+	qdisc_destroy(q->qdisc);
+	q->qdisc = child;
+
+	q->rehash_interval = msecs_to_jiffies(ctl->rehash_interval);
+	q->warmup_time = msecs_to_jiffies(ctl->warmup_time);
+	q->rehash_time = jiffies;
+	q->limit = limit;
+	q->increment = ctl->increment;
+	q->decrement = ctl->decrement;
+	q->max = ctl->max;
+	q->bin_size = ctl->bin_size;
+	q->penalty_rate = ctl->penalty_rate;
+	q->penalty_burst = ctl->penalty_burst;
+	q->tokens_avail = ctl->penalty_burst;
+	q->token_time = jiffies;
+
+	q->slot = 0;
+	q->double_buffering = false;
+	sfb_zero_all_buckets(q);
+	sfb_init_perturbation(0, q);
+	sfb_init_perturbation(1, q);
+
+	sch_tree_unlock(sch);
+
+	return 0;
+}
+
+static int sfb_init(struct Qdisc *sch, struct nlattr *opt)
+{
+	struct sfb_sched_data *q = qdisc_priv(sch);
+
+	q->qdisc = &noop_qdisc;
+	return sfb_change(sch, opt);
+}
+
+static int sfb_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+	struct sfb_sched_data *q = qdisc_priv(sch);
+	struct nlattr *opts;
+	struct tc_sfb_qopt opt = {
+		.rehash_interval = jiffies_to_msecs(q->rehash_interval),
+		.warmup_time = jiffies_to_msecs(q->warmup_time),
+		.limit = q->limit,
+		.max = q->max,
+		.bin_size = q->bin_size,
+		.increment = q->increment,
+		.decrement = q->decrement,
+		.penalty_rate = q->penalty_rate,
+		.penalty_burst = q->penalty_burst,
+	};
+
+	sch->qstats.backlog = q->qdisc->qstats.backlog;
+	opts = nla_nest_start(skb, TCA_OPTIONS);
+	NLA_PUT(skb, TCA_SFB_PARMS, sizeof(opt), &opt);
+	return nla_nest_end(skb, opts);
+
+nla_put_failure:
+	nla_nest_cancel(skb, opts);
+	return -EMSGSIZE;
+}
+
+static int sfb_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
+{
+	struct sfb_sched_data *q = qdisc_priv(sch);
+	struct tc_sfb_xstats st = {
+		.earlydrop = q->stats.earlydrop,
+		.penaltydrop = q->stats.penaltydrop,
+		.bucketdrop = q->stats.bucketdrop,
+		.queuedrop = q->stats.queuedrop,
+		.childdrop = q->stats.childdrop,
+		.marked = q->stats.marked,
+	};
+
+	st.maxqlen = sfb_compute_qlen(&st.maxprob, &st.avgprob, q);
+
+	return gnet_stats_copy_app(d, &st, sizeof(st));
+}
+
+static int sfb_dump_class(struct Qdisc *sch, unsigned long cl,
+			  struct sk_buff *skb, struct tcmsg *tcm)
+{
+	return -ENOSYS;
+}
+
+static int sfb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
+		     struct Qdisc **old)
+{
+	struct sfb_sched_data *q = qdisc_priv(sch);
+
+	if (new == NULL)
+		new = &noop_qdisc;
+
+	sch_tree_lock(sch);
+	*old = q->qdisc;
+	q->qdisc = new;
+	qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
+	qdisc_reset(*old);
+	sch_tree_unlock(sch);
+	return 0;
+}
+
+static struct Qdisc *sfb_leaf(struct Qdisc *sch, unsigned long arg)
+{
+	struct sfb_sched_data *q = qdisc_priv(sch);
+
+	return q->qdisc;
+}
+
+static unsigned long sfb_get(struct Qdisc *sch, u32 classid)
+{
+	return 1;
+}
+
+static void sfb_put(struct Qdisc *sch, unsigned long arg)
+{
+}
+
+static int sfb_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
+			    struct nlattr **tca, unsigned long *arg)
+{
+	return -ENOSYS;
+}
+
+static int sfb_delete(struct Qdisc *sch, unsigned long cl)
+{
+	return -ENOSYS;
+}
+
+static void sfb_walk(struct Qdisc *sch, struct qdisc_walker *walker)
+{
+	if (!walker->stop) {
+		if (walker->count >= walker->skip)
+			if (walker->fn(sch, 1, walker) < 0) {
+				walker->stop = 1;
+				return;
+			}
+		walker->count++;
+	}
+}
+
+static struct tcf_proto **sfb_find_tcf(struct Qdisc *sch, unsigned long cl)
+{
+	struct sfb_sched_data *q = qdisc_priv(sch);
+
+	if (cl)
+		return NULL;
+	return &q->filter_list;
+}
+
+static unsigned long sfb_bind(struct Qdisc *sch, unsigned long parent,
+			      u32 classid)
+{
+	return 0;
+}
+
+
+static const struct Qdisc_class_ops sfb_class_ops = {
+	.graft		=	sfb_graft,
+	.leaf		=	sfb_leaf,
+	.get		=	sfb_get,
+	.put		=	sfb_put,
+	.change		=	sfb_change_class,
+	.delete		=	sfb_delete,
+	.walk		=	sfb_walk,
+	.tcf_chain	=	sfb_find_tcf,
+	.bind_tcf	=	sfb_bind,
+	.unbind_tcf	=	sfb_put,
+	.dump		=	sfb_dump_class,
+};
+
+static struct Qdisc_ops sfb_qdisc_ops __read_mostly = {
+	.id		=	"sfb",
+	.priv_size	=	sizeof(struct sfb_sched_data),
+	.cl_ops		=	&sfb_class_ops,
+	.enqueue	=	sfb_enqueue,
+	.dequeue	=	sfb_dequeue,
+	.peek		=	sfb_peek,
+	.init		=	sfb_init,
+	.reset		=	sfb_reset,
+	.destroy	=	sfb_destroy,
+	.change		=	sfb_change,
+	.dump		=	sfb_dump,
+	.dump_stats	=	sfb_dump_stats,
+	.owner		=	THIS_MODULE,
+};
+
+static int __init sfb_module_init(void)
+{
+	return register_qdisc(&sfb_qdisc_ops);
+}
+
+static void __exit sfb_module_exit(void)
+{
+	unregister_qdisc(&sfb_qdisc_ops);
+}
+
+module_init(sfb_module_init)
+module_exit(sfb_module_exit)
+
+MODULE_DESCRIPTION("Stochastic Fair Blue queue discipline");
+MODULE_AUTHOR("Juliusz Chroboczek");
+MODULE_AUTHOR("Eric Dumazet");
+MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index edea8ce..c2e628d 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -21,6 +21,7 @@
 #include <linux/skbuff.h>
 #include <linux/jhash.h>
 #include <linux/slab.h>
+#include <linux/vmalloc.h>
 #include <net/ip.h>
 #include <net/netlink.h>
 #include <net/pkt_sched.h>
@@ -76,7 +77,8 @@
 #define SFQ_DEPTH		128 /* max number of packets per flow */
 #define SFQ_SLOTS		128 /* max number of flows */
 #define SFQ_EMPTY_SLOT		255
-#define SFQ_HASH_DIVISOR	1024
+#define SFQ_DEFAULT_HASH_DIVISOR 1024
+
 /* We use 16 bits to store allot, and want to handle packets up to 64K
  * Scale allot by 8 (1<<3) so that no overflow occurs.
  */
@@ -92,8 +94,7 @@ typedef unsigned char sfq_index;
  * while following values [SFQ_SLOTS ... SFQ_SLOTS + SFQ_DEPTH - 1]
  * are 'pointers' to dep[] array
  */
-struct sfq_head
-{
+struct sfq_head {
 	sfq_index	next;
 	sfq_index	prev;
 };
@@ -108,13 +109,12 @@ struct sfq_slot {
 	short		allot; /* credit for this slot */
 };
 
-struct sfq_sched_data
-{
+struct sfq_sched_data {
 /* Parameters */
 	int		perturb_period;
-	unsigned	quantum;	/* Allotment per round: MUST BE >= MTU */
+	unsigned int	quantum;	/* Allotment per round: MUST BE >= MTU */
 	int		limit;
-
+	unsigned int	divisor;	/* number of slots in hash table */
 /* Variables */
 	struct tcf_proto *filter_list;
 	struct timer_list perturb_timer;
@@ -122,7 +122,7 @@ struct sfq_sched_data
 	sfq_index	cur_depth;	/* depth of longest slot */
 	unsigned short  scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */
 	struct sfq_slot *tail;		/* current slot in round */
-	sfq_index	ht[SFQ_HASH_DIVISOR];	/* Hash table */
+	sfq_index	*ht;		/* Hash table (divisor slots) */
 	struct sfq_slot	slots[SFQ_SLOTS];
 	struct sfq_head	dep[SFQ_DEPTH];	/* Linked list of slots, indexed by depth */
 };
@@ -137,12 +137,12 @@ static inline struct sfq_head *sfq_dep_head(struct sfq_sched_data *q, sfq_index
 	return &q->dep[val - SFQ_SLOTS];
 }
 
-static __inline__ unsigned sfq_fold_hash(struct sfq_sched_data *q, u32 h, u32 h1)
+static unsigned int sfq_fold_hash(struct sfq_sched_data *q, u32 h, u32 h1)
 {
-	return jhash_2words(h, h1, q->perturbation) & (SFQ_HASH_DIVISOR - 1);
+	return jhash_2words(h, h1, q->perturbation) & (q->divisor - 1);
 }
 
-static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
+static unsigned int sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
 {
 	u32 h, h2;
 
@@ -157,13 +157,13 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
 		iph = ip_hdr(skb);
 		h = (__force u32)iph->daddr;
 		h2 = (__force u32)iph->saddr ^ iph->protocol;
-		if (iph->frag_off & htons(IP_MF|IP_OFFSET))
+		if (iph->frag_off & htons(IP_MF | IP_OFFSET))
 			break;
 		poff = proto_ports_offset(iph->protocol);
 		if (poff >= 0 &&
 		    pskb_network_may_pull(skb, iph->ihl * 4 + 4 + poff)) {
 			iph = ip_hdr(skb);
-			h2 ^= *(u32*)((void *)iph + iph->ihl * 4 + poff);
+			h2 ^= *(u32 *)((void *)iph + iph->ihl * 4 + poff);
 		}
 		break;
 	}
@@ -181,7 +181,7 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
 		if (poff >= 0 &&
 		    pskb_network_may_pull(skb, sizeof(*iph) + 4 + poff)) {
 			iph = ipv6_hdr(skb);
-			h2 ^= *(u32*)((void *)iph + sizeof(*iph) + poff);
+			h2 ^= *(u32 *)((void *)iph + sizeof(*iph) + poff);
 		}
 		break;
 	}
@@ -203,7 +203,7 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
 
 	if (TC_H_MAJ(skb->priority) == sch->handle &&
 	    TC_H_MIN(skb->priority) > 0 &&
-	    TC_H_MIN(skb->priority) <= SFQ_HASH_DIVISOR)
+	    TC_H_MIN(skb->priority) <= q->divisor)
 		return TC_H_MIN(skb->priority);
 
 	if (!q->filter_list)
@@ -221,7 +221,7 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
 			return 0;
 		}
 #endif
-		if (TC_H_MIN(res.classid) <= SFQ_HASH_DIVISOR)
+		if (TC_H_MIN(res.classid) <= q->divisor)
 			return TC_H_MIN(res.classid);
 	}
 	return 0;
@@ -491,13 +491,18 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
 	if (opt->nla_len < nla_attr_size(sizeof(*ctl)))
 		return -EINVAL;
 
+	if (ctl->divisor &&
+	    (!is_power_of_2(ctl->divisor) || ctl->divisor > 65536))
+		return -EINVAL;
+
 	sch_tree_lock(sch);
 	q->quantum = ctl->quantum ? : psched_mtu(qdisc_dev(sch));
 	q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
 	q->perturb_period = ctl->perturb_period * HZ;
 	if (ctl->limit)
 		q->limit = min_t(u32, ctl->limit, SFQ_DEPTH - 1);
-
+	if (ctl->divisor)
+		q->divisor = ctl->divisor;
 	qlen = sch->q.qlen;
 	while (sch->q.qlen > q->limit)
 		sfq_drop(sch);
@@ -515,15 +520,13 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
 static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
 {
 	struct sfq_sched_data *q = qdisc_priv(sch);
+	size_t sz;
 	int i;
 
 	q->perturb_timer.function = sfq_perturbation;
 	q->perturb_timer.data = (unsigned long)sch;
 	init_timer_deferrable(&q->perturb_timer);
 
-	for (i = 0; i < SFQ_HASH_DIVISOR; i++)
-		q->ht[i] = SFQ_EMPTY_SLOT;
-
 	for (i = 0; i < SFQ_DEPTH; i++) {
 		q->dep[i].next = i + SFQ_SLOTS;
 		q->dep[i].prev = i + SFQ_SLOTS;
@@ -532,6 +535,7 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
 	q->limit = SFQ_DEPTH - 1;
 	q->cur_depth = 0;
 	q->tail = NULL;
+	q->divisor = SFQ_DEFAULT_HASH_DIVISOR;
 	if (opt == NULL) {
 		q->quantum = psched_mtu(qdisc_dev(sch));
 		q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
@@ -543,10 +547,23 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
 			return err;
 	}
 
+	sz = sizeof(q->ht[0]) * q->divisor;
+	q->ht = kmalloc(sz, GFP_KERNEL);
+	if (!q->ht && sz > PAGE_SIZE)
+		q->ht = vmalloc(sz);
+	if (!q->ht)
+		return -ENOMEM;
+	for (i = 0; i < q->divisor; i++)
+		q->ht[i] = SFQ_EMPTY_SLOT;
+
 	for (i = 0; i < SFQ_SLOTS; i++) {
 		slot_queue_init(&q->slots[i]);
 		sfq_link(q, i);
 	}
+	if (q->limit >= 1)
+		sch->flags |= TCQ_F_CAN_BYPASS;
+	else
+		sch->flags &= ~TCQ_F_CAN_BYPASS;
 	return 0;
 }
 
@@ -557,6 +574,10 @@ static void sfq_destroy(struct Qdisc *sch)
 	tcf_destroy_chain(&q->filter_list);
 	q->perturb_period = 0;
 	del_timer_sync(&q->perturb_timer);
+	if (is_vmalloc_addr(q->ht))
+		vfree(q->ht);
+	else
+		kfree(q->ht);
 }
 
 static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
@@ -569,7 +590,7 @@ static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
 	opt.perturb_period = q->perturb_period / HZ;
 
 	opt.limit = q->limit;
-	opt.divisor = SFQ_HASH_DIVISOR;
+	opt.divisor = q->divisor;
 	opt.flows = q->limit;
 
 	NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
@@ -594,6 +615,8 @@ static unsigned long sfq_get(struct Qdisc *sch, u32 classid)
 static unsigned long sfq_bind(struct Qdisc *sch, unsigned long parent,
 			      u32 classid)
 {
+	/* we cannot bypass queue discipline anymore */
+	sch->flags &= ~TCQ_F_CAN_BYPASS;
 	return 0;
 }
 
@@ -647,7 +670,7 @@ static void sfq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
 	if (arg->stop)
 		return;
 
-	for (i = 0; i < SFQ_HASH_DIVISOR; i++) {
+	for (i = 0; i < q->divisor; i++) {
 		if (q->ht[i] == SFQ_EMPTY_SLOT ||
 		    arg->count < arg->skip) {
 			arg->count++;
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index e931658..1dcfb52 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -97,8 +97,7 @@
 	changed the limit is not effective anymore.
 */
 
-struct tbf_sched_data
-{
+struct tbf_sched_data {
 /* Parameters */
 	u32		limit;		/* Maximal length of backlog: bytes */
 	u32		buffer;		/* Token bucket depth/rate: MUST BE >= MTU/B */
@@ -115,10 +114,10 @@ struct tbf_sched_data
 	struct qdisc_watchdog watchdog;	/* Watchdog timer */
 };
 
-#define L2T(q,L)   qdisc_l2t((q)->R_tab,L)
-#define L2T_P(q,L) qdisc_l2t((q)->P_tab,L)
+#define L2T(q, L)   qdisc_l2t((q)->R_tab, L)
+#define L2T_P(q, L) qdisc_l2t((q)->P_tab, L)
 
-static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch)
+static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 {
 	struct tbf_sched_data *q = qdisc_priv(sch);
 	int ret;
@@ -137,7 +136,7 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch)
 	return NET_XMIT_SUCCESS;
 }
 
-static unsigned int tbf_drop(struct Qdisc* sch)
+static unsigned int tbf_drop(struct Qdisc *sch)
 {
 	struct tbf_sched_data *q = qdisc_priv(sch);
 	unsigned int len = 0;
@@ -149,7 +148,7 @@ static unsigned int tbf_drop(struct Qdisc* sch)
 	return len;
 }
 
-static struct sk_buff *tbf_dequeue(struct Qdisc* sch)
+static struct sk_buff *tbf_dequeue(struct Qdisc *sch)
 {
 	struct tbf_sched_data *q = qdisc_priv(sch);
 	struct sk_buff *skb;
@@ -185,7 +184,7 @@ static struct sk_buff *tbf_dequeue(struct Qdisc* sch)
 			q->tokens = toks;
 			q->ptokens = ptoks;
 			sch->q.qlen--;
-			sch->flags &= ~TCQ_F_THROTTLED;
+			qdisc_unthrottled(sch);
 			qdisc_bstats_update(sch, skb);
 			return skb;
 		}
@@ -209,7 +208,7 @@ static struct sk_buff *tbf_dequeue(struct Qdisc* sch)
 	return NULL;
 }
 
-static void tbf_reset(struct Qdisc* sch)
+static void tbf_reset(struct Qdisc *sch)
 {
 	struct tbf_sched_data *q = qdisc_priv(sch);
 
@@ -227,7 +226,7 @@ static const struct nla_policy tbf_policy[TCA_TBF_MAX + 1] = {
 	[TCA_TBF_PTAB]	= { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
 };
 
-static int tbf_change(struct Qdisc* sch, struct nlattr *opt)
+static int tbf_change(struct Qdisc *sch, struct nlattr *opt)
 {
 	int err;
 	struct tbf_sched_data *q = qdisc_priv(sch);
@@ -236,7 +235,7 @@ static int tbf_change(struct Qdisc* sch, struct nlattr *opt)
 	struct qdisc_rate_table *rtab = NULL;
 	struct qdisc_rate_table *ptab = NULL;
 	struct Qdisc *child = NULL;
-	int max_size,n;
+	int max_size, n;
 
 	err = nla_parse_nested(tb, TCA_TBF_PTAB, opt, tbf_policy);
 	if (err < 0)
@@ -259,15 +258,18 @@ static int tbf_change(struct Qdisc* sch, struct nlattr *opt)
 	}
 
 	for (n = 0; n < 256; n++)
-		if (rtab->data[n] > qopt->buffer) break;
-	max_size = (n << qopt->rate.cell_log)-1;
+		if (rtab->data[n] > qopt->buffer)
+			break;
+	max_size = (n << qopt->rate.cell_log) - 1;
 	if (ptab) {
 		int size;
 
 		for (n = 0; n < 256; n++)
-			if (ptab->data[n] > qopt->mtu) break;
-		size = (n << qopt->peakrate.cell_log)-1;
-		if (size < max_size) max_size = size;
+			if (ptab->data[n] > qopt->mtu)
+				break;
+		size = (n << qopt->peakrate.cell_log) - 1;
+		if (size < max_size)
+			max_size = size;
 	}
 	if (max_size < 0)
 		goto done;
@@ -310,7 +312,7 @@ done:
 	return err;
 }
 
-static int tbf_init(struct Qdisc* sch, struct nlattr *opt)
+static int tbf_init(struct Qdisc *sch, struct nlattr *opt)
 {
 	struct tbf_sched_data *q = qdisc_priv(sch);
 
@@ -422,8 +424,7 @@ static void tbf_walk(struct Qdisc *sch, struct qdisc_walker *walker)
 	}
 }
 
-static const struct Qdisc_class_ops tbf_class_ops =
-{
+static const struct Qdisc_class_ops tbf_class_ops = {
 	.graft		=	tbf_graft,
 	.leaf		=	tbf_leaf,
 	.get		=	tbf_get,
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index d84e732..45cd300 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -53,8 +53,7 @@
       which will not break load balancing, though native slave
       traffic will have the highest priority.  */
 
-struct teql_master
-{
+struct teql_master {
 	struct Qdisc_ops qops;
 	struct net_device *dev;
 	struct Qdisc *slaves;
@@ -65,22 +64,21 @@ struct teql_master
 	unsigned long	tx_dropped;
 };
 
-struct teql_sched_data
-{
+struct teql_sched_data {
 	struct Qdisc *next;
 	struct teql_master *m;
 	struct neighbour *ncache;
 	struct sk_buff_head q;
 };
 
-#define NEXT_SLAVE(q) (((struct teql_sched_data*)qdisc_priv(q))->next)
+#define NEXT_SLAVE(q) (((struct teql_sched_data *)qdisc_priv(q))->next)
 
-#define FMASK (IFF_BROADCAST|IFF_POINTOPOINT)
+#define FMASK (IFF_BROADCAST | IFF_POINTOPOINT)
 
 /* "teql*" qdisc routines */
 
 static int
-teql_enqueue(struct sk_buff *skb, struct Qdisc* sch)
+teql_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 {
 	struct net_device *dev = qdisc_dev(sch);
 	struct teql_sched_data *q = qdisc_priv(sch);
@@ -96,7 +94,7 @@ teql_enqueue(struct sk_buff *skb, struct Qdisc* sch)
 }
 
 static struct sk_buff *
-teql_dequeue(struct Qdisc* sch)
+teql_dequeue(struct Qdisc *sch)
 {
 	struct teql_sched_data *dat = qdisc_priv(sch);
 	struct netdev_queue *dat_queue;
@@ -118,13 +116,13 @@ teql_dequeue(struct Qdisc* sch)
 }
 
 static struct sk_buff *
-teql_peek(struct Qdisc* sch)
+teql_peek(struct Qdisc *sch)
 {
 	/* teql is meant to be used as root qdisc */
 	return NULL;
 }
 
-static __inline__ void
+static inline void
 teql_neigh_release(struct neighbour *n)
 {
 	if (n)
@@ -132,7 +130,7 @@ teql_neigh_release(struct neighbour *n)
 }
 
 static void
-teql_reset(struct Qdisc* sch)
+teql_reset(struct Qdisc *sch)
 {
 	struct teql_sched_data *dat = qdisc_priv(sch);
 
@@ -142,13 +140,14 @@ teql_reset(struct Qdisc* sch)
 }
 
 static void
-teql_destroy(struct Qdisc* sch)
+teql_destroy(struct Qdisc *sch)
 {
 	struct Qdisc *q, *prev;
 	struct teql_sched_data *dat = qdisc_priv(sch);
 	struct teql_master *master = dat->m;
 
-	if ((prev = master->slaves) != NULL) {
+	prev = master->slaves;
+	if (prev) {
 		do {
 			q = NEXT_SLAVE(prev);
 			if (q == sch) {
@@ -180,7 +179,7 @@ teql_destroy(struct Qdisc* sch)
 static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt)
 {
 	struct net_device *dev = qdisc_dev(sch);
-	struct teql_master *m = (struct teql_master*)sch->ops;
+	struct teql_master *m = (struct teql_master *)sch->ops;
 	struct teql_sched_data *q = qdisc_priv(sch);
 
 	if (dev->hard_header_len > m->dev->hard_header_len)
@@ -291,7 +290,8 @@ restart:
 	nores = 0;
 	busy = 0;
 
-	if ((q = start) == NULL)
+	q = start;
+	if (!q)
 		goto drop;
 
 	do {
@@ -356,10 +356,10 @@ drop:
 
 static int teql_master_open(struct net_device *dev)
 {
-	struct Qdisc * q;
+	struct Qdisc *q;
 	struct teql_master *m = netdev_priv(dev);
 	int mtu = 0xFFFE;
-	unsigned flags = IFF_NOARP|IFF_MULTICAST;
+	unsigned int flags = IFF_NOARP | IFF_MULTICAST;
 
 	if (m->slaves == NULL)
 		return -EUNATCH;
@@ -427,7 +427,7 @@ static int teql_master_mtu(struct net_device *dev, int new_mtu)
 		do {
 			if (new_mtu > qdisc_dev(q)->mtu)
 				return -EINVAL;
-		} while ((q=NEXT_SLAVE(q)) != m->slaves);
+		} while ((q = NEXT_SLAVE(q)) != m->slaves);
 	}
 
 	dev->mtu = new_mtu;
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 5f1fb8b..6b04287 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -1089,7 +1089,6 @@ static void sctp_assoc_bh_rcv(struct work_struct *work)
 			     base.inqueue.immediate);
 	struct sctp_endpoint *ep;
 	struct sctp_chunk *chunk;
-	struct sock *sk;
 	struct sctp_inq *inqueue;
 	int state;
 	sctp_subtype_t subtype;
@@ -1097,7 +1096,6 @@ static void sctp_assoc_bh_rcv(struct work_struct *work)
 
 	/* The association should be held so we should be safe. */
 	ep = asoc->ep;
-	sk = asoc->base.sk;
 
 	inqueue = &asoc->base.inqueue;
 	sctp_association_hold(asoc);
diff --git a/net/sctp/input.c b/net/sctp/input.c
index ea21924..826661b 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -948,14 +948,11 @@ static struct sctp_association *__sctp_rcv_init_lookup(struct sk_buff *skb,
 	union sctp_addr addr;
 	union sctp_addr *paddr = &addr;
 	struct sctphdr *sh = sctp_hdr(skb);
-	sctp_chunkhdr_t *ch;
 	union sctp_params params;
 	sctp_init_chunk_t *init;
 	struct sctp_transport *transport;
 	struct sctp_af *af;
 
-	ch = (sctp_chunkhdr_t *) skb->data;
-
 	/*
 	 * This code will NOT touch anything inside the chunk--it is
 	 * strictly READ-ONLY.
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 8c6d379..26dc0051 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -545,13 +545,11 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt,
 	struct sctp_transport *transport = pkt->transport;
 	sctp_xmit_t status;
 	struct sctp_chunk *chunk, *chunk1;
-	struct sctp_association *asoc;
 	int fast_rtx;
 	int error = 0;
 	int timer = 0;
 	int done = 0;
 
-	asoc = q->asoc;
 	lqueue = &q->retransmit;
 	fast_rtx = q->fast_rtx;
 
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index e58f947..4e55e6c 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -491,9 +491,9 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc,
 	SCTP_DEBUG_PRINTK("%s: DST:%pI4, SRC:%pI4 - ",
 			  __func__, &fl.fl4_dst, &fl.fl4_src);
 
-	if (!ip_route_output_key(&init_net, &rt, &fl)) {
+	rt = ip_route_output_key(&init_net, &fl);
+	if (!IS_ERR(rt))
 		dst = &rt->dst;
-	}
 
 	/* If there is no association or if a source address is passed, no
 	 * more validation is required.
@@ -535,7 +535,8 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc,
 		    (AF_INET == laddr->a.sa.sa_family)) {
 			fl.fl4_src = laddr->a.v4.sin_addr.s_addr;
 			fl.fl_ip_sport = laddr->a.v4.sin_port;
-			if (!ip_route_output_key(&init_net, &rt, &fl)) {
+			rt = ip_route_output_key(&init_net, &fl);
+			if (!IS_ERR(rt)) {
 				dst = &rt->dst;
 				goto out_unlock;
 			}
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index b23428f..de98665 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -3375,7 +3375,6 @@ struct sctp_chunk *sctp_make_fwdtsn(const struct sctp_association *asoc,
 				    struct sctp_fwdtsn_skip *skiplist)
 {
 	struct sctp_chunk *retval = NULL;
-	struct sctp_fwdtsn_chunk *ftsn_chunk;
 	struct sctp_fwdtsn_hdr ftsn_hdr;
 	struct sctp_fwdtsn_skip skip;
 	size_t hint;
@@ -3388,8 +3387,6 @@ struct sctp_chunk *sctp_make_fwdtsn(const struct sctp_association *asoc,
 	if (!retval)
 		return NULL;
 
-	ftsn_chunk = (struct sctp_fwdtsn_chunk *)retval->subh.fwdtsn_hdr;
-
 	ftsn_hdr.new_cum_tsn = htonl(new_cum_tsn);
 	retval->subh.fwdtsn_hdr =
 		sctp_addto_chunk(retval, sizeof(ftsn_hdr), &ftsn_hdr);
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 8e02550..3951a10 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -2928,7 +2928,6 @@ static int sctp_setsockopt_peer_primary_addr(struct sock *sk, char __user *optva
 					     unsigned int optlen)
 {
 	struct sctp_sock	*sp;
-	struct sctp_endpoint	*ep;
 	struct sctp_association	*asoc = NULL;
 	struct sctp_setpeerprim	prim;
 	struct sctp_chunk	*chunk;
@@ -2936,7 +2935,6 @@ static int sctp_setsockopt_peer_primary_addr(struct sock *sk, char __user *optva
 	int 			err;
 
 	sp = sctp_sk(sk);
-	ep = sp->ep;
 
 	if (!sctp_addip_enable)
 		return -EPERM;
@@ -6102,15 +6100,16 @@ static void __sctp_write_space(struct sctp_association *asoc)
 			wake_up_interruptible(&asoc->wait);
 
 		if (sctp_writeable(sk)) {
-			if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk)))
-				wake_up_interruptible(sk_sleep(sk));
+			wait_queue_head_t *wq = sk_sleep(sk);
+
+			if (wq && waitqueue_active(wq))
+				wake_up_interruptible(wq);
 
 			/* Note that we try to include the Async I/O support
 			 * here by modeling from the current TCP/UDP code.
 			 * We have not tested with it yet.
 			 */
-			if (sock->wq->fasync_list &&
-			    !(sk->sk_shutdown & SEND_SHUTDOWN))
+			if (!(sk->sk_shutdown & SEND_SHUTDOWN))
 				sock_wake_async(sock,
 						SOCK_WAKE_SPACE, POLL_OUT);
 		}
diff --git a/net/sctp/tsnmap.c b/net/sctp/tsnmap.c
index 747d541..f1e40ceb 100644
--- a/net/sctp/tsnmap.c
+++ b/net/sctp/tsnmap.c
@@ -344,7 +344,7 @@ __u16 sctp_tsnmap_num_gabs(struct sctp_tsnmap *map,
 
 	/* Refresh the gap ack information. */
 	if (sctp_tsnmap_has_gap(map)) {
-		__u16 start, end;
+		__u16 start = 0, end = 0;
 		sctp_tsnmap_iter_init(map, &iter);
 		while (sctp_tsnmap_next_gap_ack(map, &iter,
 						&start,
diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index c7f7e49..1767818 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -105,11 +105,8 @@ int sctp_ulpq_tail_data(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk,
 			gfp_t gfp)
 {
 	struct sk_buff_head temp;
-	sctp_data_chunk_t *hdr;
 	struct sctp_ulpevent *event;
 
-	hdr = (sctp_data_chunk_t *) chunk->chunk_hdr;
-
 	/* Create an event from the incoming chunk. */
 	event = sctp_ulpevent_make_rcvmsg(chunk->asoc, chunk, gfp);
 	if (!event)
@@ -743,11 +740,9 @@ static void sctp_ulpq_retrieve_ordered(struct sctp_ulpq *ulpq,
 	struct sk_buff *pos, *tmp;
 	struct sctp_ulpevent *cevent;
 	struct sctp_stream *in;
-	__u16 sid, csid;
-	__u16 ssn, cssn;
+	__u16 sid, csid, cssn;
 
 	sid = event->stream;
-	ssn = event->ssn;
 	in  = &ulpq->asoc->ssnmap->in;
 
 	event_list = (struct sk_buff_head *) sctp_event2skb(event)->prev;
diff --git a/net/socket.c b/net/socket.c
index ac2219f..937d0fc 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -240,17 +240,19 @@ static struct kmem_cache *sock_inode_cachep __read_mostly;
 static struct inode *sock_alloc_inode(struct super_block *sb)
 {
 	struct socket_alloc *ei;
+	struct socket_wq *wq;
 
 	ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
 	if (!ei)
 		return NULL;
-	ei->socket.wq = kmalloc(sizeof(struct socket_wq), GFP_KERNEL);
-	if (!ei->socket.wq) {
+	wq = kmalloc(sizeof(*wq), GFP_KERNEL);
+	if (!wq) {
 		kmem_cache_free(sock_inode_cachep, ei);
 		return NULL;
 	}
-	init_waitqueue_head(&ei->socket.wq->wait);
-	ei->socket.wq->fasync_list = NULL;
+	init_waitqueue_head(&wq->wait);
+	wq->fasync_list = NULL;
+	RCU_INIT_POINTER(ei->socket.wq, wq);
 
 	ei->socket.state = SS_UNCONNECTED;
 	ei->socket.flags = 0;
@@ -273,9 +275,11 @@ static void wq_free_rcu(struct rcu_head *head)
 static void sock_destroy_inode(struct inode *inode)
 {
 	struct socket_alloc *ei;
+	struct socket_wq *wq;
 
 	ei = container_of(inode, struct socket_alloc, vfs_inode);
-	call_rcu(&ei->socket.wq->rcu, wq_free_rcu);
+	wq = rcu_dereference_protected(ei->socket.wq, 1);
+	call_rcu(&wq->rcu, wq_free_rcu);
 	kmem_cache_free(sock_inode_cachep, ei);
 }
 
@@ -524,7 +528,7 @@ void sock_release(struct socket *sock)
 		module_put(owner);
 	}
 
-	if (sock->wq->fasync_list)
+	if (rcu_dereference_protected(sock->wq, 1)->fasync_list)
 		printk(KERN_ERR "sock_release: fasync list not empty!\n");
 
 	percpu_sub(sockets_in_use, 1);
@@ -1108,15 +1112,16 @@ static int sock_fasync(int fd, struct file *filp, int on)
 {
 	struct socket *sock = filp->private_data;
 	struct sock *sk = sock->sk;
+	struct socket_wq *wq;
 
 	if (sk == NULL)
 		return -EINVAL;
 
 	lock_sock(sk);
+	wq = rcu_dereference_protected(sock->wq, sock_owned_by_user(sk));
+	fasync_helper(fd, filp, on, &wq->fasync_list);
 
-	fasync_helper(fd, filp, on, &sock->wq->fasync_list);
-
-	if (!sock->wq->fasync_list)
+	if (!wq->fasync_list)
 		sock_reset_flag(sk, SOCK_FASYNC);
 	else
 		sock_set_flag(sk, SOCK_FASYNC);
@@ -2643,7 +2648,8 @@ static int bond_ioctl(struct net *net, unsigned int cmd,
 
 		old_fs = get_fs();
 		set_fs(KERNEL_DS);
-		err = dev_ioctl(net, cmd, &kifr);
+		err = dev_ioctl(net, cmd,
+				(struct ifreq __user __force *) &kifr);
 		set_fs(old_fs);
 
 		return err;
@@ -2752,7 +2758,7 @@ static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
 
 	old_fs = get_fs();
 	set_fs(KERNEL_DS);
-	err = dev_ioctl(net, cmd, (void __user *)&ifr);
+	err = dev_ioctl(net, cmd, (void  __user __force *)&ifr);
 	set_fs(old_fs);
 
 	if (cmd == SIOCGIFMAP && !err) {
@@ -2857,7 +2863,8 @@ static int routing_ioctl(struct net *net, struct socket *sock,
 		ret |= __get_user(rtdev, &(ur4->rt_dev));
 		if (rtdev) {
 			ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
-			r4.rt_dev = devname; devname[15] = 0;
+			r4.rt_dev = (char __user __force *)devname;
+			devname[15] = 0;
 		} else
 			r4.rt_dev = NULL;
 
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index d802e94..b7d435c 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -420,6 +420,7 @@ static void svc_sock_setbufsize(struct socket *sock, unsigned int snd,
 static void svc_udp_data_ready(struct sock *sk, int count)
 {
 	struct svc_sock	*svsk = (struct svc_sock *)sk->sk_user_data;
+	wait_queue_head_t *wq = sk_sleep(sk);
 
 	if (svsk) {
 		dprintk("svc: socket %p(inet %p), count=%d, busy=%d\n",
@@ -428,8 +429,8 @@ static void svc_udp_data_ready(struct sock *sk, int count)
 		set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
 		svc_xprt_enqueue(&svsk->sk_xprt);
 	}
-	if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk)))
-		wake_up_interruptible(sk_sleep(sk));
+	if (wq && waitqueue_active(wq))
+		wake_up_interruptible(wq);
 }
 
 /*
@@ -438,6 +439,7 @@ static void svc_udp_data_ready(struct sock *sk, int count)
 static void svc_write_space(struct sock *sk)
 {
 	struct svc_sock	*svsk = (struct svc_sock *)(sk->sk_user_data);
+	wait_queue_head_t *wq = sk_sleep(sk);
 
 	if (svsk) {
 		dprintk("svc: socket %p(inet %p), write_space busy=%d\n",
@@ -445,10 +447,10 @@ static void svc_write_space(struct sock *sk)
 		svc_xprt_enqueue(&svsk->sk_xprt);
 	}
 
-	if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) {
+	if (wq && waitqueue_active(wq)) {
 		dprintk("RPC svc_write_space: someone sleeping on %p\n",
 		       svsk);
-		wake_up_interruptible(sk_sleep(sk));
+		wake_up_interruptible(wq);
 	}
 }
 
@@ -739,6 +741,7 @@ static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv)
 static void svc_tcp_listen_data_ready(struct sock *sk, int count_unused)
 {
 	struct svc_sock	*svsk = (struct svc_sock *)sk->sk_user_data;
+	wait_queue_head_t *wq;
 
 	dprintk("svc: socket %p TCP (listen) state change %d\n",
 		sk, sk->sk_state);
@@ -761,8 +764,9 @@ static void svc_tcp_listen_data_ready(struct sock *sk, int count_unused)
 			printk("svc: socket %p: no user data\n", sk);
 	}
 
-	if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk)))
-		wake_up_interruptible_all(sk_sleep(sk));
+	wq = sk_sleep(sk);
+	if (wq && waitqueue_active(wq))
+		wake_up_interruptible_all(wq);
 }
 
 /*
@@ -771,6 +775,7 @@ static void svc_tcp_listen_data_ready(struct sock *sk, int count_unused)
 static void svc_tcp_state_change(struct sock *sk)
 {
 	struct svc_sock	*svsk = (struct svc_sock *)sk->sk_user_data;
+	wait_queue_head_t *wq = sk_sleep(sk);
 
 	dprintk("svc: socket %p TCP (connected) state change %d (svsk %p)\n",
 		sk, sk->sk_state, sk->sk_user_data);
@@ -781,13 +786,14 @@ static void svc_tcp_state_change(struct sock *sk)
 		set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
 		svc_xprt_enqueue(&svsk->sk_xprt);
 	}
-	if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk)))
-		wake_up_interruptible_all(sk_sleep(sk));
+	if (wq && waitqueue_active(wq))
+		wake_up_interruptible_all(wq);
 }
 
 static void svc_tcp_data_ready(struct sock *sk, int count)
 {
 	struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data;
+	wait_queue_head_t *wq = sk_sleep(sk);
 
 	dprintk("svc: socket %p TCP data ready (svsk %p)\n",
 		sk, sk->sk_user_data);
@@ -795,8 +801,8 @@ static void svc_tcp_data_ready(struct sock *sk, int count)
 		set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
 		svc_xprt_enqueue(&svsk->sk_xprt);
 	}
-	if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk)))
-		wake_up_interruptible(sk_sleep(sk));
+	if (wq && waitqueue_active(wq))
+		wake_up_interruptible(wq);
 }
 
 /*
@@ -1531,6 +1537,7 @@ static void svc_sock_detach(struct svc_xprt *xprt)
 {
 	struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
 	struct sock *sk = svsk->sk_sk;
+	wait_queue_head_t *wq;
 
 	dprintk("svc: svc_sock_detach(%p)\n", svsk);
 
@@ -1539,8 +1546,9 @@ static void svc_sock_detach(struct svc_xprt *xprt)
 	sk->sk_data_ready = svsk->sk_odata;
 	sk->sk_write_space = svsk->sk_owspace;
 
-	if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk)))
-		wake_up_interruptible(sk_sleep(sk));
+	wq = sk_sleep(sk);
+	if (wq && waitqueue_active(wq))
+		wake_up_interruptible(wq);
 }
 
 /*
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 70ab5ef..7dc1dc7 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 2004-2006, Ericsson AB
  * Copyright (c) 2004, Intel Corporation.
- * Copyright (c) 2005, Wind River Systems
+ * Copyright (c) 2005, 2010-2011, Wind River Systems
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -61,8 +61,8 @@
  */
 
 struct bcbearer_pair {
-	struct bearer *primary;
-	struct bearer *secondary;
+	struct tipc_bearer *primary;
+	struct tipc_bearer *secondary;
 };
 
 /**
@@ -81,7 +81,7 @@ struct bcbearer_pair {
  */
 
 struct bcbearer {
-	struct bearer bearer;
+	struct tipc_bearer bearer;
 	struct media media;
 	struct bcbearer_pair bpairs[MAX_BEARERS];
 	struct bcbearer_pair bpairs_temp[TIPC_MAX_LINK_PRI + 1];
@@ -93,6 +93,7 @@ struct bcbearer {
  * struct bclink - link used for broadcast messages
  * @link: (non-standard) broadcast link structure
  * @node: (non-standard) node structure representing b'cast link's peer node
+ * @retransmit_to: node that most recently requested a retransmit
  *
  * Handles sequence numbering, fragmentation, bundling, etc.
  */
@@ -100,6 +101,7 @@ struct bcbearer {
 struct bclink {
 	struct link link;
 	struct tipc_node node;
+	struct tipc_node *retransmit_to;
 };
 
 
@@ -184,6 +186,17 @@ static int bclink_ack_allowed(u32 n)
 
 
 /**
+ * tipc_bclink_retransmit_to - get most recent node to request retransmission
+ *
+ * Called with bc_lock locked
+ */
+
+struct tipc_node *tipc_bclink_retransmit_to(void)
+{
+	return bclink->retransmit_to;
+}
+
+/**
  * bclink_retransmit_pkt - retransmit broadcast packets
  * @after: sequence number of last packet to *not* retransmit
  * @to: sequence number of last packet to retransmit
@@ -285,6 +298,7 @@ static void bclink_send_nack(struct tipc_node *n_ptr)
 		msg = buf_msg(buf);
 		tipc_msg_init(msg, BCAST_PROTOCOL, STATE_MSG,
 			 INT_H_SIZE, n_ptr->addr);
+		msg_set_non_seq(msg, 1);
 		msg_set_mc_netid(msg, tipc_net_id);
 		msg_set_bcast_ack(msg, mod(n_ptr->bclink.last_in));
 		msg_set_bcgap_after(msg, n_ptr->bclink.gap_after);
@@ -405,8 +419,6 @@ int tipc_bclink_send_msg(struct sk_buff *buf)
 	else
 		bclink_set_last_sent();
 
-	if (bcl->out_queue_size > bcl->stats.max_queue_sz)
-		bcl->stats.max_queue_sz = bcl->out_queue_size;
 	bcl->stats.queue_sz_counts++;
 	bcl->stats.accu_queue_sz += bcl->out_queue_size;
 
@@ -444,10 +456,9 @@ void tipc_bclink_recv_pkt(struct sk_buff *buf)
 			tipc_node_unlock(node);
 			spin_lock_bh(&bc_lock);
 			bcl->stats.recv_nacks++;
-			bcl->owner->next = node;   /* remember requestor */
+			bclink->retransmit_to = node;
 			bclink_retransmit_pkt(msg_bcgap_after(msg),
 					      msg_bcgap_to(msg));
-			bcl->owner->next = NULL;
 			spin_unlock_bh(&bc_lock);
 		} else {
 			tipc_bclink_peek_nack(msg_destnode(msg),
@@ -574,8 +585,8 @@ static int tipc_bcbearer_send(struct sk_buff *buf,
 	bcbearer->remains = tipc_bcast_nmap;
 
 	for (bp_index = 0; bp_index < MAX_BEARERS; bp_index++) {
-		struct bearer *p = bcbearer->bpairs[bp_index].primary;
-		struct bearer *s = bcbearer->bpairs[bp_index].secondary;
+		struct tipc_bearer *p = bcbearer->bpairs[bp_index].primary;
+		struct tipc_bearer *s = bcbearer->bpairs[bp_index].secondary;
 
 		if (!p)
 			break;	/* no more bearers to try */
@@ -584,11 +595,11 @@ static int tipc_bcbearer_send(struct sk_buff *buf,
 		if (bcbearer->remains_new.count == bcbearer->remains.count)
 			continue;	/* bearer pair doesn't add anything */
 
-		if (p->publ.blocked ||
-		    p->media->send_msg(buf, &p->publ, &p->media->bcast_addr)) {
+		if (p->blocked ||
+		    p->media->send_msg(buf, p, &p->media->bcast_addr)) {
 			/* unable to send on primary bearer */
-			if (!s || s->publ.blocked ||
-			    s->media->send_msg(buf, &s->publ,
+			if (!s || s->blocked ||
+			    s->media->send_msg(buf, s,
 					       &s->media->bcast_addr)) {
 				/* unable to send on either bearer */
 				continue;
@@ -633,7 +644,7 @@ void tipc_bcbearer_sort(void)
 	memset(bp_temp, 0, sizeof(bcbearer->bpairs_temp));
 
 	for (b_index = 0; b_index < MAX_BEARERS; b_index++) {
-		struct bearer *b = &tipc_bearers[b_index];
+		struct tipc_bearer *b = &tipc_bearers[b_index];
 
 		if (!b->active || !b->nodes.count)
 			continue;
@@ -682,12 +693,12 @@ void tipc_bcbearer_sort(void)
 
 void tipc_bcbearer_push(void)
 {
-	struct bearer *b_ptr;
+	struct tipc_bearer *b_ptr;
 
 	spin_lock_bh(&bc_lock);
 	b_ptr = &bcbearer->bearer;
-	if (b_ptr->publ.blocked) {
-		b_ptr->publ.blocked = 0;
+	if (b_ptr->blocked) {
+		b_ptr->blocked = 0;
 		tipc_bearer_lock_push(b_ptr);
 	}
 	spin_unlock_bh(&bc_lock);
diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h
index 51f8c53..500c97f 100644
--- a/net/tipc/bcast.h
+++ b/net/tipc/bcast.h
@@ -2,7 +2,7 @@
  * net/tipc/bcast.h: Include file for TIPC broadcast code
  *
  * Copyright (c) 2003-2006, Ericsson AB
- * Copyright (c) 2005, Wind River Systems
+ * Copyright (c) 2005, 2010-2011, Wind River Systems
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -90,6 +90,7 @@ void tipc_port_list_free(struct port_list *pl_ptr);
 
 int  tipc_bclink_init(void);
 void tipc_bclink_stop(void);
+struct tipc_node *tipc_bclink_retransmit_to(void);
 void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked);
 int  tipc_bclink_send_msg(struct sk_buff *buf);
 void tipc_bclink_recv_pkt(struct sk_buff *buf);
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 837b7a4..f2839b0 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -2,7 +2,7 @@
  * net/tipc/bearer.c: TIPC bearer code
  *
  * Copyright (c) 1996-2006, Ericsson AB
- * Copyright (c) 2004-2006, Wind River Systems
+ * Copyright (c) 2004-2006, 2010-2011, Wind River Systems
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -44,7 +44,7 @@
 static struct media media_list[MAX_MEDIA];
 static u32 media_count;
 
-struct bearer tipc_bearers[MAX_BEARERS];
+struct tipc_bearer tipc_bearers[MAX_BEARERS];
 
 /**
  * media_name_valid - validate media name
@@ -278,13 +278,13 @@ static int bearer_name_validate(const char *name,
  * bearer_find - locates bearer object with matching bearer name
  */
 
-static struct bearer *bearer_find(const char *name)
+static struct tipc_bearer *bearer_find(const char *name)
 {
-	struct bearer *b_ptr;
+	struct tipc_bearer *b_ptr;
 	u32 i;
 
 	for (i = 0, b_ptr = tipc_bearers; i < MAX_BEARERS; i++, b_ptr++) {
-		if (b_ptr->active && (!strcmp(b_ptr->publ.name, name)))
+		if (b_ptr->active && (!strcmp(b_ptr->name, name)))
 			return b_ptr;
 	}
 	return NULL;
@@ -294,16 +294,16 @@ static struct bearer *bearer_find(const char *name)
  * tipc_bearer_find_interface - locates bearer object with matching interface name
  */
 
-struct bearer *tipc_bearer_find_interface(const char *if_name)
+struct tipc_bearer *tipc_bearer_find_interface(const char *if_name)
 {
-	struct bearer *b_ptr;
+	struct tipc_bearer *b_ptr;
 	char *b_if_name;
 	u32 i;
 
 	for (i = 0, b_ptr = tipc_bearers; i < MAX_BEARERS; i++, b_ptr++) {
 		if (!b_ptr->active)
 			continue;
-		b_if_name = strchr(b_ptr->publ.name, ':') + 1;
+		b_if_name = strchr(b_ptr->name, ':') + 1;
 		if (!strcmp(b_if_name, if_name))
 			return b_ptr;
 	}
@@ -318,7 +318,7 @@ struct sk_buff *tipc_bearer_get_names(void)
 {
 	struct sk_buff *buf;
 	struct media *m_ptr;
-	struct bearer *b_ptr;
+	struct tipc_bearer *b_ptr;
 	int i, j;
 
 	buf = tipc_cfg_reply_alloc(MAX_BEARERS * TLV_SPACE(TIPC_MAX_BEARER_NAME));
@@ -331,8 +331,8 @@ struct sk_buff *tipc_bearer_get_names(void)
 			b_ptr = &tipc_bearers[j];
 			if (b_ptr->active && (b_ptr->media == m_ptr)) {
 				tipc_cfg_append_tlv(buf, TIPC_TLV_BEARER_NAME,
-						    b_ptr->publ.name,
-						    strlen(b_ptr->publ.name) + 1);
+						    b_ptr->name,
+						    strlen(b_ptr->name) + 1);
 			}
 		}
 	}
@@ -340,14 +340,14 @@ struct sk_buff *tipc_bearer_get_names(void)
 	return buf;
 }
 
-void tipc_bearer_add_dest(struct bearer *b_ptr, u32 dest)
+void tipc_bearer_add_dest(struct tipc_bearer *b_ptr, u32 dest)
 {
 	tipc_nmap_add(&b_ptr->nodes, dest);
 	tipc_disc_update_link_req(b_ptr->link_req);
 	tipc_bcbearer_sort();
 }
 
-void tipc_bearer_remove_dest(struct bearer *b_ptr, u32 dest)
+void tipc_bearer_remove_dest(struct tipc_bearer *b_ptr, u32 dest)
 {
 	tipc_nmap_remove(&b_ptr->nodes, dest);
 	tipc_disc_update_link_req(b_ptr->link_req);
@@ -362,12 +362,12 @@ void tipc_bearer_remove_dest(struct bearer *b_ptr, u32 dest)
  * bearer.lock must be taken before calling
  * Returns binary true(1) ore false(0)
  */
-static int bearer_push(struct bearer *b_ptr)
+static int bearer_push(struct tipc_bearer *b_ptr)
 {
 	u32 res = 0;
 	struct link *ln, *tln;
 
-	if (b_ptr->publ.blocked)
+	if (b_ptr->blocked)
 		return 0;
 
 	while (!list_empty(&b_ptr->cong_links) && (res != PUSH_FAILED)) {
@@ -382,13 +382,13 @@ static int bearer_push(struct bearer *b_ptr)
 	return list_empty(&b_ptr->cong_links);
 }
 
-void tipc_bearer_lock_push(struct bearer *b_ptr)
+void tipc_bearer_lock_push(struct tipc_bearer *b_ptr)
 {
 	int res;
 
-	spin_lock_bh(&b_ptr->publ.lock);
+	spin_lock_bh(&b_ptr->lock);
 	res = bearer_push(b_ptr);
-	spin_unlock_bh(&b_ptr->publ.lock);
+	spin_unlock_bh(&b_ptr->lock);
 	if (res)
 		tipc_bcbearer_push();
 }
@@ -398,16 +398,14 @@ void tipc_bearer_lock_push(struct bearer *b_ptr)
  * Interrupt enabling new requests after bearer congestion or blocking:
  * See bearer_send().
  */
-void tipc_continue(struct tipc_bearer *tb_ptr)
+void tipc_continue(struct tipc_bearer *b_ptr)
 {
-	struct bearer *b_ptr = (struct bearer *)tb_ptr;
-
-	spin_lock_bh(&b_ptr->publ.lock);
+	spin_lock_bh(&b_ptr->lock);
 	b_ptr->continue_count++;
 	if (!list_empty(&b_ptr->cong_links))
 		tipc_k_signal((Handler)tipc_bearer_lock_push, (unsigned long)b_ptr);
-	b_ptr->publ.blocked = 0;
-	spin_unlock_bh(&b_ptr->publ.lock);
+	b_ptr->blocked = 0;
+	spin_unlock_bh(&b_ptr->lock);
 }
 
 /*
@@ -418,7 +416,7 @@ void tipc_continue(struct tipc_bearer *tb_ptr)
  * bearer.lock is busy
  */
 
-static void tipc_bearer_schedule_unlocked(struct bearer *b_ptr, struct link *l_ptr)
+static void tipc_bearer_schedule_unlocked(struct tipc_bearer *b_ptr, struct link *l_ptr)
 {
 	list_move_tail(&l_ptr->link_list, &b_ptr->cong_links);
 }
@@ -431,11 +429,11 @@ static void tipc_bearer_schedule_unlocked(struct bearer *b_ptr, struct link *l_p
  * bearer.lock is free
  */
 
-void tipc_bearer_schedule(struct bearer *b_ptr, struct link *l_ptr)
+void tipc_bearer_schedule(struct tipc_bearer *b_ptr, struct link *l_ptr)
 {
-	spin_lock_bh(&b_ptr->publ.lock);
+	spin_lock_bh(&b_ptr->lock);
 	tipc_bearer_schedule_unlocked(b_ptr, l_ptr);
-	spin_unlock_bh(&b_ptr->publ.lock);
+	spin_unlock_bh(&b_ptr->lock);
 }
 
 
@@ -444,18 +442,18 @@ void tipc_bearer_schedule(struct bearer *b_ptr, struct link *l_ptr)
  * and if there is, try to resolve it before returning.
  * 'tipc_net_lock' is read_locked when this function is called
  */
-int tipc_bearer_resolve_congestion(struct bearer *b_ptr, struct link *l_ptr)
+int tipc_bearer_resolve_congestion(struct tipc_bearer *b_ptr, struct link *l_ptr)
 {
 	int res = 1;
 
 	if (list_empty(&b_ptr->cong_links))
 		return 1;
-	spin_lock_bh(&b_ptr->publ.lock);
+	spin_lock_bh(&b_ptr->lock);
 	if (!bearer_push(b_ptr)) {
 		tipc_bearer_schedule_unlocked(b_ptr, l_ptr);
 		res = 0;
 	}
-	spin_unlock_bh(&b_ptr->publ.lock);
+	spin_unlock_bh(&b_ptr->lock);
 	return res;
 }
 
@@ -463,9 +461,9 @@ int tipc_bearer_resolve_congestion(struct bearer *b_ptr, struct link *l_ptr)
  * tipc_bearer_congested - determines if bearer is currently congested
  */
 
-int tipc_bearer_congested(struct bearer *b_ptr, struct link *l_ptr)
+int tipc_bearer_congested(struct tipc_bearer *b_ptr, struct link *l_ptr)
 {
-	if (unlikely(b_ptr->publ.blocked))
+	if (unlikely(b_ptr->blocked))
 		return 1;
 	if (likely(list_empty(&b_ptr->cong_links)))
 		return 0;
@@ -478,7 +476,7 @@ int tipc_bearer_congested(struct bearer *b_ptr, struct link *l_ptr)
 
 int tipc_enable_bearer(const char *name, u32 bcast_scope, u32 priority)
 {
-	struct bearer *b_ptr;
+	struct tipc_bearer *b_ptr;
 	struct media *m_ptr;
 	struct bearer_name b_name;
 	char addr_string[16];
@@ -528,7 +526,7 @@ restart:
 			bearer_id = i;
 			continue;
 		}
-		if (!strcmp(name, tipc_bearers[i].publ.name)) {
+		if (!strcmp(name, tipc_bearers[i].name)) {
 			warn("Bearer <%s> rejected, already enabled\n", name);
 			goto failed;
 		}
@@ -551,8 +549,8 @@ restart:
 	}
 
 	b_ptr = &tipc_bearers[bearer_id];
-	strcpy(b_ptr->publ.name, name);
-	res = m_ptr->enable_bearer(&b_ptr->publ);
+	strcpy(b_ptr->name, name);
+	res = m_ptr->enable_bearer(b_ptr);
 	if (res) {
 		warn("Bearer <%s> rejected, enable failure (%d)\n", name, -res);
 		goto failed;
@@ -568,9 +566,9 @@ restart:
 	INIT_LIST_HEAD(&b_ptr->links);
 	if (m_ptr->bcast) {
 		b_ptr->link_req = tipc_disc_init_link_req(b_ptr, &m_ptr->bcast_addr,
-							  bcast_scope, 2);
+							  bcast_scope);
 	}
-	spin_lock_init(&b_ptr->publ.lock);
+	spin_lock_init(&b_ptr->lock);
 	write_unlock_bh(&tipc_net_lock);
 	info("Enabled bearer <%s>, discovery domain %s, priority %u\n",
 	     name, tipc_addr_string_fill(addr_string, bcast_scope), priority);
@@ -587,7 +585,7 @@ failed:
 
 int tipc_block_bearer(const char *name)
 {
-	struct bearer *b_ptr = NULL;
+	struct tipc_bearer *b_ptr = NULL;
 	struct link *l_ptr;
 	struct link *temp_l_ptr;
 
@@ -600,8 +598,8 @@ int tipc_block_bearer(const char *name)
 	}
 
 	info("Blocking bearer <%s>\n", name);
-	spin_lock_bh(&b_ptr->publ.lock);
-	b_ptr->publ.blocked = 1;
+	spin_lock_bh(&b_ptr->lock);
+	b_ptr->blocked = 1;
 	list_for_each_entry_safe(l_ptr, temp_l_ptr, &b_ptr->links, link_list) {
 		struct tipc_node *n_ptr = l_ptr->owner;
 
@@ -609,7 +607,7 @@ int tipc_block_bearer(const char *name)
 		tipc_link_reset(l_ptr);
 		spin_unlock_bh(&n_ptr->lock);
 	}
-	spin_unlock_bh(&b_ptr->publ.lock);
+	spin_unlock_bh(&b_ptr->lock);
 	read_unlock_bh(&tipc_net_lock);
 	return 0;
 }
@@ -620,27 +618,27 @@ int tipc_block_bearer(const char *name)
  * Note: This routine assumes caller holds tipc_net_lock.
  */
 
-static void bearer_disable(struct bearer *b_ptr)
+static void bearer_disable(struct tipc_bearer *b_ptr)
 {
 	struct link *l_ptr;
 	struct link *temp_l_ptr;
 
-	info("Disabling bearer <%s>\n", b_ptr->publ.name);
+	info("Disabling bearer <%s>\n", b_ptr->name);
 	tipc_disc_stop_link_req(b_ptr->link_req);
-	spin_lock_bh(&b_ptr->publ.lock);
+	spin_lock_bh(&b_ptr->lock);
 	b_ptr->link_req = NULL;
-	b_ptr->publ.blocked = 1;
-	b_ptr->media->disable_bearer(&b_ptr->publ);
+	b_ptr->blocked = 1;
+	b_ptr->media->disable_bearer(b_ptr);
 	list_for_each_entry_safe(l_ptr, temp_l_ptr, &b_ptr->links, link_list) {
 		tipc_link_delete(l_ptr);
 	}
-	spin_unlock_bh(&b_ptr->publ.lock);
-	memset(b_ptr, 0, sizeof(struct bearer));
+	spin_unlock_bh(&b_ptr->lock);
+	memset(b_ptr, 0, sizeof(struct tipc_bearer));
 }
 
 int tipc_disable_bearer(const char *name)
 {
-	struct bearer *b_ptr;
+	struct tipc_bearer *b_ptr;
 	int res;
 
 	write_lock_bh(&tipc_net_lock);
diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index 85f451d..255dea6 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -2,7 +2,7 @@
  * net/tipc/bearer.h: Include file for TIPC bearer code
  *
  * Copyright (c) 1996-2006, Ericsson AB
- * Copyright (c) 2005, Wind River Systems
+ * Copyright (c) 2005, 2010-2011, Wind River Systems
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -61,26 +61,7 @@ struct tipc_media_addr {
 	} dev_addr;
 };
 
-/**
- * struct tipc_bearer - TIPC bearer info available to media code
- * @usr_handle: pointer to additional media-specific information about bearer
- * @mtu: max packet size bearer can support
- * @blocked: non-zero if bearer is blocked
- * @lock: spinlock for controlling access to bearer
- * @addr: media-specific address associated with bearer
- * @name: bearer name (format = media:interface)
- *
- * Note: TIPC initializes "name" and "lock" fields; media code is responsible
- * for initialization all other fields when a bearer is enabled.
- */
-struct tipc_bearer {
-	void *usr_handle;
-	u32 mtu;
-	int blocked;
-	spinlock_t lock;
-	struct tipc_media_addr addr;
-	char name[TIPC_MAX_BEARER_NAME];
-};
+struct tipc_bearer;
 
 /**
  * struct media - TIPC media information available to internal users
@@ -115,8 +96,13 @@ struct media {
 };
 
 /**
- * struct bearer - TIPC bearer information available to internal users
- * @publ: bearer information available to privileged users
+ * struct tipc_bearer - TIPC bearer structure
+ * @usr_handle: pointer to additional media-specific information about bearer
+ * @mtu: max packet size bearer can support
+ * @blocked: non-zero if bearer is blocked
+ * @lock: spinlock for controlling access to bearer
+ * @addr: media-specific address associated with bearer
+ * @name: bearer name (format = media:interface)
  * @media: ptr to media structure associated with bearer
  * @priority: default link priority for bearer
  * @detect_scope: network address mask used during automatic link creation
@@ -128,10 +114,18 @@ struct media {
  * @active: non-zero if bearer structure is represents a bearer
  * @net_plane: network plane ('A' through 'H') currently associated with bearer
  * @nodes: indicates which nodes in cluster can be reached through bearer
+ *
+ * Note: media-specific code is responsible for initialization of the fields
+ * indicated below when a bearer is enabled; TIPC's generic bearer code takes
+ * care of initializing all other fields.
  */
-
-struct bearer {
-	struct tipc_bearer publ;
+struct tipc_bearer {
+	void *usr_handle;			/* initalized by media */
+	u32 mtu;				/* initalized by media */
+	int blocked;				/* initalized by media */
+	struct tipc_media_addr addr;		/* initalized by media */
+	char name[TIPC_MAX_BEARER_NAME];
+	spinlock_t lock;
 	struct media *media;
 	u32 priority;
 	u32 detect_scope;
@@ -152,7 +146,7 @@ struct bearer_name {
 
 struct link;
 
-extern struct bearer tipc_bearers[];
+extern struct tipc_bearer tipc_bearers[];
 
 /*
  * TIPC routines available to supported media types
@@ -186,14 +180,14 @@ void tipc_media_addr_printf(struct print_buf *pb, struct tipc_media_addr *a);
 struct sk_buff *tipc_media_get_names(void);
 
 struct sk_buff *tipc_bearer_get_names(void);
-void tipc_bearer_add_dest(struct bearer *b_ptr, u32 dest);
-void tipc_bearer_remove_dest(struct bearer *b_ptr, u32 dest);
-void tipc_bearer_schedule(struct bearer *b_ptr, struct link *l_ptr);
-struct bearer *tipc_bearer_find_interface(const char *if_name);
-int tipc_bearer_resolve_congestion(struct bearer *b_ptr, struct link *l_ptr);
-int tipc_bearer_congested(struct bearer *b_ptr, struct link *l_ptr);
+void tipc_bearer_add_dest(struct tipc_bearer *b_ptr, u32 dest);
+void tipc_bearer_remove_dest(struct tipc_bearer *b_ptr, u32 dest);
+void tipc_bearer_schedule(struct tipc_bearer *b_ptr, struct link *l_ptr);
+struct tipc_bearer *tipc_bearer_find_interface(const char *if_name);
+int tipc_bearer_resolve_congestion(struct tipc_bearer *b_ptr, struct link *l_ptr);
+int tipc_bearer_congested(struct tipc_bearer *b_ptr, struct link *l_ptr);
 void tipc_bearer_stop(void);
-void tipc_bearer_lock_push(struct bearer *b_ptr);
+void tipc_bearer_lock_push(struct tipc_bearer *b_ptr);
 
 
 /**
@@ -214,10 +208,11 @@ void tipc_bearer_lock_push(struct bearer *b_ptr);
  * and let TIPC's link code deal with the undelivered message.
  */
 
-static inline int tipc_bearer_send(struct bearer *b_ptr, struct sk_buff *buf,
+static inline int tipc_bearer_send(struct tipc_bearer *b_ptr,
+				   struct sk_buff *buf,
 				   struct tipc_media_addr *dest)
 {
-	return !b_ptr->media->send_msg(buf, &b_ptr->publ, dest);
+	return !b_ptr->media->send_msg(buf, b_ptr, dest);
 }
 
 #endif	/* _TIPC_BEARER_H */
diff --git a/net/tipc/core.c b/net/tipc/core.c
index e071579..2da1fc7 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -2,7 +2,7 @@
  * net/tipc/core.c: TIPC module code
  *
  * Copyright (c) 2003-2006, Ericsson AB
- * Copyright (c) 2005-2006, Wind River Systems
+ * Copyright (c) 2005-2006, 2010-2011, Wind River Systems
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -57,7 +57,6 @@
 
 int tipc_mode = TIPC_NOT_RUNNING;
 int tipc_random;
-atomic_t tipc_user_count = ATOMIC_INIT(0);
 
 const char tipc_alphabet[] =
 	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_.";
diff --git a/net/tipc/core.h b/net/tipc/core.h
index 9971585..37544d9 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -2,7 +2,7 @@
  * net/tipc/core.h: Include file for TIPC global declarations
  *
  * Copyright (c) 2005-2006, Ericsson AB
- * Copyright (c) 2005-2007, Wind River Systems
+ * Copyright (c) 2005-2007, 2010-2011, Wind River Systems
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -161,7 +161,6 @@ extern int tipc_remote_management;
 extern int tipc_mode;
 extern int tipc_random;
 extern const char tipc_alphabet[];
-extern atomic_t tipc_user_count;
 
 
 /*
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index fa026bd..09ce231 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -2,7 +2,7 @@
  * net/tipc/discover.c
  *
  * Copyright (c) 2003-2006, Ericsson AB
- * Copyright (c) 2005-2006, Wind River Systems
+ * Copyright (c) 2005-2006, 2010-2011, Wind River Systems
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -57,7 +57,7 @@
  * @timer_intv: current interval between requests (in ms)
  */
 struct link_req {
-	struct bearer *bearer;
+	struct tipc_bearer *bearer;
 	struct tipc_media_addr dest;
 	struct sk_buff *buf;
 	struct timer_list timer;
@@ -67,15 +67,13 @@ struct link_req {
 /**
  * tipc_disc_init_msg - initialize a link setup message
  * @type: message type (request or response)
- * @req_links: number of links associated with message
  * @dest_domain: network domain of node(s) which should respond to message
  * @b_ptr: ptr to bearer issuing message
  */
 
 static struct sk_buff *tipc_disc_init_msg(u32 type,
-					  u32 req_links,
 					  u32 dest_domain,
-					  struct bearer *b_ptr)
+					  struct tipc_bearer *b_ptr)
 {
 	struct sk_buff *buf = tipc_buf_acquire(DSC_H_SIZE);
 	struct tipc_msg *msg;
@@ -84,10 +82,9 @@ static struct sk_buff *tipc_disc_init_msg(u32 type,
 		msg = buf_msg(buf);
 		tipc_msg_init(msg, LINK_CONFIG, type, DSC_H_SIZE, dest_domain);
 		msg_set_non_seq(msg, 1);
-		msg_set_req_links(msg, req_links);
 		msg_set_dest_domain(msg, dest_domain);
 		msg_set_bc_netid(msg, tipc_net_id);
-		msg_set_media_addr(msg, &b_ptr->publ.addr);
+		msg_set_media_addr(msg, &b_ptr->addr);
 	}
 	return buf;
 }
@@ -99,7 +96,7 @@ static struct sk_buff *tipc_disc_init_msg(u32 type,
  * @media_addr: media address advertised by duplicated node
  */
 
-static void disc_dupl_alert(struct bearer *b_ptr, u32 node_addr,
+static void disc_dupl_alert(struct tipc_bearer *b_ptr, u32 node_addr,
 			    struct tipc_media_addr *media_addr)
 {
 	char node_addr_str[16];
@@ -111,7 +108,7 @@ static void disc_dupl_alert(struct bearer *b_ptr, u32 node_addr,
 	tipc_media_addr_printf(&pb, media_addr);
 	tipc_printbuf_validate(&pb);
 	warn("Duplicate %s using %s seen on <%s>\n",
-	     node_addr_str, media_addr_str, b_ptr->publ.name);
+	     node_addr_str, media_addr_str, b_ptr->name);
 }
 
 /**
@@ -120,7 +117,7 @@ static void disc_dupl_alert(struct bearer *b_ptr, u32 node_addr,
  * @b_ptr: bearer that message arrived on
  */
 
-void tipc_disc_recv_msg(struct sk_buff *buf, struct bearer *b_ptr)
+void tipc_disc_recv_msg(struct sk_buff *buf, struct tipc_bearer *b_ptr)
 {
 	struct link *link;
 	struct tipc_media_addr media_addr;
@@ -140,7 +137,7 @@ void tipc_disc_recv_msg(struct sk_buff *buf, struct bearer *b_ptr)
 	if (!tipc_addr_node_valid(orig))
 		return;
 	if (orig == tipc_own_addr) {
-		if (memcmp(&media_addr, &b_ptr->publ.addr, sizeof(media_addr)))
+		if (memcmp(&media_addr, &b_ptr->addr, sizeof(media_addr)))
 			disc_dupl_alert(b_ptr, tipc_own_addr, &media_addr);
 		return;
 	}
@@ -191,9 +188,9 @@ void tipc_disc_recv_msg(struct sk_buff *buf, struct bearer *b_ptr)
 		spin_unlock_bh(&n_ptr->lock);
 		if ((type == DSC_RESP_MSG) || link_fully_up)
 			return;
-		rbuf = tipc_disc_init_msg(DSC_RESP_MSG, 1, orig, b_ptr);
+		rbuf = tipc_disc_init_msg(DSC_RESP_MSG, orig, b_ptr);
 		if (rbuf != NULL) {
-			b_ptr->media->send_msg(rbuf, &b_ptr->publ, &media_addr);
+			b_ptr->media->send_msg(rbuf, b_ptr, &media_addr);
 			buf_discard(rbuf);
 		}
 	}
@@ -249,9 +246,9 @@ void tipc_disc_update_link_req(struct link_req *req)
 
 static void disc_timeout(struct link_req *req)
 {
-	spin_lock_bh(&req->bearer->publ.lock);
+	spin_lock_bh(&req->bearer->lock);
 
-	req->bearer->media->send_msg(req->buf, &req->bearer->publ, &req->dest);
+	req->bearer->media->send_msg(req->buf, req->bearer, &req->dest);
 
 	if ((req->timer_intv == TIPC_LINK_REQ_SLOW) ||
 	    (req->timer_intv == TIPC_LINK_REQ_FAST)) {
@@ -266,7 +263,7 @@ static void disc_timeout(struct link_req *req)
 	}
 	k_start_timer(&req->timer, req->timer_intv);
 
-	spin_unlock_bh(&req->bearer->publ.lock);
+	spin_unlock_bh(&req->bearer->lock);
 }
 
 /**
@@ -274,15 +271,13 @@ static void disc_timeout(struct link_req *req)
  * @b_ptr: ptr to bearer issuing requests
  * @dest: destination address for request messages
  * @dest_domain: network domain of node(s) which should respond to message
- * @req_links: max number of desired links
  *
  * Returns pointer to link request structure, or NULL if unable to create.
  */
 
-struct link_req *tipc_disc_init_link_req(struct bearer *b_ptr,
+struct link_req *tipc_disc_init_link_req(struct tipc_bearer *b_ptr,
 					 const struct tipc_media_addr *dest,
-					 u32 dest_domain,
-					 u32 req_links)
+					 u32 dest_domain)
 {
 	struct link_req *req;
 
@@ -290,7 +285,7 @@ struct link_req *tipc_disc_init_link_req(struct bearer *b_ptr,
 	if (!req)
 		return NULL;
 
-	req->buf = tipc_disc_init_msg(DSC_REQ_MSG, req_links, dest_domain, b_ptr);
+	req->buf = tipc_disc_init_msg(DSC_REQ_MSG, dest_domain, b_ptr);
 	if (!req->buf) {
 		kfree(req);
 		return NULL;
diff --git a/net/tipc/discover.h b/net/tipc/discover.h
index d2c3cff..e48a167 100644
--- a/net/tipc/discover.h
+++ b/net/tipc/discover.h
@@ -2,7 +2,7 @@
  * net/tipc/discover.h
  *
  * Copyright (c) 2003-2006, Ericsson AB
- * Copyright (c) 2005, Wind River Systems
+ * Copyright (c) 2005, 2010-2011, Wind River Systems
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -39,13 +39,12 @@
 
 struct link_req;
 
-struct link_req *tipc_disc_init_link_req(struct bearer *b_ptr,
+struct link_req *tipc_disc_init_link_req(struct tipc_bearer *b_ptr,
 					 const struct tipc_media_addr *dest,
-					 u32 dest_domain,
-					 u32 req_links);
+					 u32 dest_domain);
 void tipc_disc_update_link_req(struct link_req *req);
 void tipc_disc_stop_link_req(struct link_req *req);
 
-void tipc_disc_recv_msg(struct sk_buff *buf, struct bearer *b_ptr);
+void tipc_disc_recv_msg(struct sk_buff *buf, struct tipc_bearer *b_ptr);
 
 #endif
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 18702f5..89fbb6d6 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -2,7 +2,7 @@
  * net/tipc/link.c: TIPC link code
  *
  * Copyright (c) 1996-2007, Ericsson AB
- * Copyright (c) 2004-2007, Wind River Systems
+ * Copyright (c) 2004-2007, 2010-2011, Wind River Systems
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -90,7 +90,7 @@ static void link_handle_out_of_seq_msg(struct link *l_ptr,
 static void link_recv_proto_msg(struct link *l_ptr, struct sk_buff *buf);
 static int  link_recv_changeover_msg(struct link **l_ptr, struct sk_buff **buf);
 static void link_set_supervision_props(struct link *l_ptr, u32 tolerance);
-static int  link_send_sections_long(struct port *sender,
+static int  link_send_sections_long(struct tipc_port *sender,
 				    struct iovec const *msg_sect,
 				    u32 num_sect, u32 destnode);
 static void link_check_defragm_bufs(struct link *l_ptr);
@@ -113,7 +113,7 @@ static void link_init_max_pkt(struct link *l_ptr)
 {
 	u32 max_pkt;
 
-	max_pkt = (l_ptr->b_ptr->publ.mtu & ~3);
+	max_pkt = (l_ptr->b_ptr->mtu & ~3);
 	if (max_pkt > MAX_MSG_SIZE)
 		max_pkt = MAX_MSG_SIZE;
 
@@ -246,9 +246,6 @@ static void link_timeout(struct link *l_ptr)
 	l_ptr->stats.accu_queue_sz += l_ptr->out_queue_size;
 	l_ptr->stats.queue_sz_counts++;
 
-	if (l_ptr->out_queue_size > l_ptr->stats.max_queue_sz)
-		l_ptr->stats.max_queue_sz = l_ptr->out_queue_size;
-
 	if (l_ptr->first_out) {
 		struct tipc_msg *msg = buf_msg(l_ptr->first_out);
 		u32 length = msg_size(msg);
@@ -303,7 +300,7 @@ static void link_set_timer(struct link *l_ptr, u32 time)
  * Returns pointer to link.
  */
 
-struct link *tipc_link_create(struct bearer *b_ptr, const u32 peer,
+struct link *tipc_link_create(struct tipc_bearer *b_ptr, const u32 peer,
 			      const struct tipc_media_addr *media_addr)
 {
 	struct link *l_ptr;
@@ -317,7 +314,7 @@ struct link *tipc_link_create(struct bearer *b_ptr, const u32 peer,
 	}
 
 	l_ptr->addr = peer;
-	if_name = strchr(b_ptr->publ.name, ':') + 1;
+	if_name = strchr(b_ptr->name, ':') + 1;
 	sprintf(l_ptr->name, "%u.%u.%u:%s-%u.%u.%u:",
 		tipc_zone(tipc_own_addr), tipc_cluster(tipc_own_addr),
 		tipc_node(tipc_own_addr),
@@ -391,7 +388,9 @@ void tipc_link_delete(struct link *l_ptr)
 
 static void link_start(struct link *l_ptr)
 {
+	tipc_node_lock(l_ptr->owner);
 	link_state_event(l_ptr, STARTING_EVT);
+	tipc_node_unlock(l_ptr->owner);
 }
 
 /**
@@ -406,7 +405,7 @@ static void link_start(struct link *l_ptr)
 
 static int link_schedule_port(struct link *l_ptr, u32 origport, u32 sz)
 {
-	struct port *p_ptr;
+	struct tipc_port *p_ptr;
 
 	spin_lock_bh(&tipc_port_list_lock);
 	p_ptr = tipc_port_lock(origport);
@@ -415,7 +414,7 @@ static int link_schedule_port(struct link *l_ptr, u32 origport, u32 sz)
 			goto exit;
 		if (!list_empty(&p_ptr->wait_list))
 			goto exit;
-		p_ptr->publ.congested = 1;
+		p_ptr->congested = 1;
 		p_ptr->waiting_pkts = 1 + ((sz - 1) / l_ptr->max_pkt);
 		list_add_tail(&p_ptr->wait_list, &l_ptr->waiting_ports);
 		l_ptr->stats.link_congs++;
@@ -428,8 +427,8 @@ exit:
 
 void tipc_link_wakeup_ports(struct link *l_ptr, int all)
 {
-	struct port *p_ptr;
-	struct port *temp_p_ptr;
+	struct tipc_port *p_ptr;
+	struct tipc_port *temp_p_ptr;
 	int win = l_ptr->queue_limit[0] - l_ptr->out_queue_size;
 
 	if (all)
@@ -445,11 +444,11 @@ void tipc_link_wakeup_ports(struct link *l_ptr, int all)
 		if (win <= 0)
 			break;
 		list_del_init(&p_ptr->wait_list);
-		spin_lock_bh(p_ptr->publ.lock);
-		p_ptr->publ.congested = 0;
-		p_ptr->wakeup(&p_ptr->publ);
+		spin_lock_bh(p_ptr->lock);
+		p_ptr->congested = 0;
+		p_ptr->wakeup(p_ptr);
 		win -= p_ptr->waiting_pkts;
-		spin_unlock_bh(p_ptr->publ.lock);
+		spin_unlock_bh(p_ptr->lock);
 	}
 
 exit:
@@ -824,7 +823,10 @@ static void link_add_to_outqueue(struct link *l_ptr,
 		l_ptr->last_out = buf;
 	} else
 		l_ptr->first_out = l_ptr->last_out = buf;
+
 	l_ptr->out_queue_size++;
+	if (l_ptr->out_queue_size > l_ptr->stats.max_queue_sz)
+		l_ptr->stats.max_queue_sz = l_ptr->out_queue_size;
 }
 
 /*
@@ -867,9 +869,6 @@ int tipc_link_send_buf(struct link *l_ptr, struct sk_buff *buf)
 
 	/* Packet can be queued or sent: */
 
-	if (queue_size > l_ptr->stats.max_queue_sz)
-		l_ptr->stats.max_queue_sz = queue_size;
-
 	if (likely(!tipc_bearer_congested(l_ptr->b_ptr, l_ptr) &&
 		   !link_congested(l_ptr))) {
 		link_add_to_outqueue(l_ptr, buf, msg);
@@ -1027,12 +1026,12 @@ int tipc_send_buf_fast(struct sk_buff *buf, u32 destnode)
  * except for total message length.
  * Returns user data length or errno.
  */
-int tipc_link_send_sections_fast(struct port *sender,
+int tipc_link_send_sections_fast(struct tipc_port *sender,
 				 struct iovec const *msg_sect,
 				 const u32 num_sect,
 				 u32 destaddr)
 {
-	struct tipc_msg *hdr = &sender->publ.phdr;
+	struct tipc_msg *hdr = &sender->phdr;
 	struct link *l_ptr;
 	struct sk_buff *buf;
 	struct tipc_node *node;
@@ -1045,7 +1044,7 @@ again:
 	 * (Must not hold any locks while building message.)
 	 */
 
-	res = tipc_msg_build(hdr, msg_sect, num_sect, sender->publ.max_pkt,
+	res = tipc_msg_build(hdr, msg_sect, num_sect, sender->max_pkt,
 			!sender->user_port, &buf);
 
 	read_lock_bh(&tipc_net_lock);
@@ -1056,7 +1055,7 @@ again:
 		if (likely(l_ptr)) {
 			if (likely(buf)) {
 				res = link_send_buf_fast(l_ptr, buf,
-							 &sender->publ.max_pkt);
+							 &sender->max_pkt);
 				if (unlikely(res < 0))
 					buf_discard(buf);
 exit:
@@ -1075,7 +1074,7 @@ exit:
 			if (link_congested(l_ptr) ||
 			    !list_empty(&l_ptr->b_ptr->cong_links)) {
 				res = link_schedule_port(l_ptr,
-							 sender->publ.ref, res);
+							 sender->ref, res);
 				goto exit;
 			}
 
@@ -1084,12 +1083,12 @@ exit:
 			 * then re-try fast path or fragment the message
 			 */
 
-			sender->publ.max_pkt = l_ptr->max_pkt;
+			sender->max_pkt = l_ptr->max_pkt;
 			tipc_node_unlock(node);
 			read_unlock_bh(&tipc_net_lock);
 
 
-			if ((msg_hdr_sz(hdr) + res) <= sender->publ.max_pkt)
+			if ((msg_hdr_sz(hdr) + res) <= sender->max_pkt)
 				goto again;
 
 			return link_send_sections_long(sender, msg_sect,
@@ -1123,14 +1122,14 @@ exit:
  *
  * Returns user data length or errno.
  */
-static int link_send_sections_long(struct port *sender,
+static int link_send_sections_long(struct tipc_port *sender,
 				   struct iovec const *msg_sect,
 				   u32 num_sect,
 				   u32 destaddr)
 {
 	struct link *l_ptr;
 	struct tipc_node *node;
-	struct tipc_msg *hdr = &sender->publ.phdr;
+	struct tipc_msg *hdr = &sender->phdr;
 	u32 dsz = msg_data_sz(hdr);
 	u32 max_pkt, fragm_sz, rest;
 	struct tipc_msg fragm_hdr;
@@ -1142,7 +1141,7 @@ static int link_send_sections_long(struct port *sender,
 
 again:
 	fragm_no = 1;
-	max_pkt = sender->publ.max_pkt - INT_H_SIZE;
+	max_pkt = sender->max_pkt - INT_H_SIZE;
 		/* leave room for tunnel header in case of link changeover */
 	fragm_sz = max_pkt - INT_H_SIZE;
 		/* leave room for fragmentation header in each fragment */
@@ -1157,7 +1156,7 @@ again:
 
 	tipc_msg_init(&fragm_hdr, MSG_FRAGMENTER, FIRST_FRAGMENT,
 		 INT_H_SIZE, msg_destnode(hdr));
-	msg_set_link_selector(&fragm_hdr, sender->publ.ref);
+	msg_set_link_selector(&fragm_hdr, sender->ref);
 	msg_set_size(&fragm_hdr, max_pkt);
 	msg_set_fragm_no(&fragm_hdr, 1);
 
@@ -1238,13 +1237,13 @@ error:
 	node = tipc_node_find(destaddr);
 	if (likely(node)) {
 		tipc_node_lock(node);
-		l_ptr = node->active_links[sender->publ.ref & 1];
+		l_ptr = node->active_links[sender->ref & 1];
 		if (!l_ptr) {
 			tipc_node_unlock(node);
 			goto reject;
 		}
 		if (l_ptr->max_pkt < max_pkt) {
-			sender->publ.max_pkt = l_ptr->max_pkt;
+			sender->max_pkt = l_ptr->max_pkt;
 			tipc_node_unlock(node);
 			for (; buf_chain; buf_chain = buf) {
 				buf = buf_chain->next;
@@ -1441,7 +1440,7 @@ static void link_retransmit_failure(struct link *l_ptr, struct sk_buff *buf)
 		info("Outstanding acks: %lu\n",
 		     (unsigned long) TIPC_SKB_CB(buf)->handle);
 
-		n_ptr = l_ptr->owner->next;
+		n_ptr = tipc_bclink_retransmit_to();
 		tipc_node_lock(n_ptr);
 
 		tipc_addr_string_fill(addr_string, n_ptr->addr);
@@ -1595,11 +1594,10 @@ static int link_recv_buf_validate(struct sk_buff *buf)
  * structure (i.e. cannot be NULL), but bearer can be inactive.
  */
 
-void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *tb_ptr)
+void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *b_ptr)
 {
 	read_lock_bh(&tipc_net_lock);
 	while (head) {
-		struct bearer *b_ptr = (struct bearer *)tb_ptr;
 		struct tipc_node *n_ptr;
 		struct link *l_ptr;
 		struct sk_buff *crs;
@@ -1950,6 +1948,7 @@ void tipc_link_send_proto_msg(struct link *l_ptr, u32 msg_typ, int probe_msg,
 		msg_set_ack(msg, mod(l_ptr->reset_checkpoint - 1));
 		msg_set_seq_gap(msg, 0);
 		msg_set_next_sent(msg, 1);
+		msg_set_probe(msg, 0);
 		msg_set_link_tolerance(msg, l_ptr->tolerance);
 		msg_set_linkprio(msg, l_ptr->priority);
 		msg_set_max_pkt(msg, l_ptr->max_pkt_target);
@@ -2618,6 +2617,9 @@ static void link_check_defragm_bufs(struct link *l_ptr)
 
 static void link_set_supervision_props(struct link *l_ptr, u32 tolerance)
 {
+	if ((tolerance < TIPC_MIN_LINK_TOL) || (tolerance > TIPC_MAX_LINK_TOL))
+		return;
+
 	l_ptr->tolerance = tolerance;
 	l_ptr->continuity_interval =
 		((tolerance / 4) > 500) ? 500 : tolerance / 4;
@@ -2658,7 +2660,7 @@ void tipc_link_set_queue_limits(struct link *l_ptr, u32 window)
 static struct link *link_find_link(const char *name, struct tipc_node **node)
 {
 	struct link_name link_name_parts;
-	struct bearer *b_ptr;
+	struct tipc_bearer *b_ptr;
 	struct link *l_ptr;
 
 	if (!link_name_validate(name, &link_name_parts))
@@ -2961,7 +2963,7 @@ static void link_print(struct link *l_ptr, const char *str)
 
 	tipc_printf(buf, str);
 	tipc_printf(buf, "Link %x<%s>:",
-		    l_ptr->addr, l_ptr->b_ptr->publ.name);
+		    l_ptr->addr, l_ptr->b_ptr->name);
 
 #ifdef CONFIG_TIPC_DEBUG
 	if (link_reset_reset(l_ptr) || link_reset_unknown(l_ptr))
@@ -2981,9 +2983,9 @@ static void link_print(struct link *l_ptr, const char *str)
 		     != (l_ptr->out_queue_size - 1)) ||
 		    (l_ptr->last_out->next != NULL)) {
 			tipc_printf(buf, "\nSend queue inconsistency\n");
-			tipc_printf(buf, "first_out= %x ", l_ptr->first_out);
-			tipc_printf(buf, "next_out= %x ", l_ptr->next_out);
-			tipc_printf(buf, "last_out= %x ", l_ptr->last_out);
+			tipc_printf(buf, "first_out= %p ", l_ptr->first_out);
+			tipc_printf(buf, "next_out= %p ", l_ptr->next_out);
+			tipc_printf(buf, "last_out= %p ", l_ptr->last_out);
 		}
 	} else
 		tipc_printf(buf, "[]");
diff --git a/net/tipc/link.h b/net/tipc/link.h
index 70967e6..a7794e7 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -2,7 +2,7 @@
  * net/tipc/link.h: Include file for TIPC link code
  *
  * Copyright (c) 1995-2006, Ericsson AB
- * Copyright (c) 2004-2005, Wind River Systems
+ * Copyright (c) 2004-2005, 2010-2011, Wind River Systems
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -122,7 +122,7 @@ struct link {
 	u32 checkpoint;
 	u32 peer_session;
 	u32 peer_bearer_id;
-	struct bearer *b_ptr;
+	struct tipc_bearer *b_ptr;
 	u32 tolerance;
 	u32 continuity_interval;
 	u32 abort_limit;
@@ -196,24 +196,18 @@ struct link {
 		u32 bearer_congs;
 		u32 deferred_recv;
 		u32 duplicates;
-
-		/* for statistical profiling of send queue size */
-
-		u32 max_queue_sz;
-		u32 accu_queue_sz;
-		u32 queue_sz_counts;
-
-		/* for statistical profiling of message lengths */
-
-		u32 msg_length_counts;
-		u32 msg_lengths_total;
-		u32 msg_length_profile[7];
+		u32 max_queue_sz;	/* send queue size high water mark */
+		u32 accu_queue_sz;	/* used for send queue size profiling */
+		u32 queue_sz_counts;	/* used for send queue size profiling */
+		u32 msg_length_counts;	/* used for message length profiling */
+		u32 msg_lengths_total;	/* used for message length profiling */
+		u32 msg_length_profile[7]; /* used for msg. length profiling */
 	} stats;
 };
 
-struct port;
+struct tipc_port;
 
-struct link *tipc_link_create(struct bearer *b_ptr, const u32 peer,
+struct link *tipc_link_create(struct tipc_bearer *b_ptr, const u32 peer,
 			      const struct tipc_media_addr *media_addr);
 void tipc_link_delete(struct link *l_ptr);
 void tipc_link_changeover(struct link *l_ptr);
@@ -230,7 +224,7 @@ void tipc_link_reset(struct link *l_ptr);
 int tipc_link_send(struct sk_buff *buf, u32 dest, u32 selector);
 int tipc_link_send_buf(struct link *l_ptr, struct sk_buff *buf);
 u32 tipc_link_get_max_pkt(u32 dest, u32 selector);
-int tipc_link_send_sections_fast(struct port *sender,
+int tipc_link_send_sections_fast(struct tipc_port *sender,
 				 struct iovec const *msg_sect,
 				 const u32 num_sect,
 				 u32 destnode);
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index bb6180c..0787e12 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -2,7 +2,7 @@
  * net/tipc/msg.c: TIPC message header routines
  *
  * Copyright (c) 2000-2006, Ericsson AB
- * Copyright (c) 2005, Wind River Systems
+ * Copyright (c) 2005, 2010-2011, Wind River Systems
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -381,20 +381,15 @@ void tipc_msg_dbg(struct print_buf *buf, struct tipc_msg *msg, const char *str)
 			tipc_printf(buf, ":OPRT(%u):", msg_origport(msg));
 			tipc_printf(buf, ":DPRT(%u):", msg_destport(msg));
 		}
-		if (msg_routed(msg) && !msg_non_seq(msg))
-			tipc_printf(buf, ":TSEQN(%u)", msg_transp_seqno(msg));
 	}
 	if (msg_user(msg) == NAME_DISTRIBUTOR) {
 		tipc_printf(buf, ":ONOD(%x):", msg_orignode(msg));
 		tipc_printf(buf, ":DNOD(%x):", msg_destnode(msg));
-		if (msg_routed(msg))
-			tipc_printf(buf, ":CSEQN(%u)", msg_transp_seqno(msg));
 	}
 
 	if (msg_user(msg) ==  LINK_CONFIG) {
 		u32 *raw = (u32 *)msg;
 		struct tipc_media_addr *orig = (struct tipc_media_addr *)&raw[5];
-		tipc_printf(buf, ":REQL(%u):", msg_req_links(msg));
 		tipc_printf(buf, ":DDOM(%x):", msg_dest_domain(msg));
 		tipc_printf(buf, ":NETID(%u):", msg_bc_netid(msg));
 		tipc_media_addr_printf(buf, orig);
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index 92c4c4f..9d643a1 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -2,7 +2,7 @@
  * net/tipc/msg.h: Include file for TIPC message header routines
  *
  * Copyright (c) 2000-2007, Ericsson AB
- * Copyright (c) 2005-2008, Wind River Systems
+ * Copyright (c) 2005-2008, 2010-2011, Wind River Systems
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -438,11 +438,6 @@ static inline void msg_set_nametype(struct tipc_msg *m, u32 n)
 	msg_set_word(m, 8, n);
 }
 
-static inline u32 msg_transp_seqno(struct tipc_msg *m)
-{
-	return msg_word(m, 8);
-}
-
 static inline void msg_set_timestamp(struct tipc_msg *m, u32 n)
 {
 	msg_set_word(m, 8, n);
@@ -453,11 +448,6 @@ static inline u32 msg_timestamp(struct tipc_msg *m)
 	return msg_word(m, 8);
 }
 
-static inline void msg_set_transp_seqno(struct tipc_msg *m, u32 n)
-{
-	msg_set_word(m, 8, n);
-}
-
 static inline u32 msg_nameinst(struct tipc_msg *m)
 {
 	return msg_word(m, 9);
@@ -577,16 +567,6 @@ static inline void msg_set_seq_gap(struct tipc_msg *m, u32 n)
 	msg_set_bits(m, 1, 16, 0x1fff, n);
 }
 
-static inline u32 msg_req_links(struct tipc_msg *m)
-{
-	return msg_bits(m, 1, 16, 0xfff);
-}
-
-static inline void msg_set_req_links(struct tipc_msg *m, u32 n)
-{
-	msg_set_bits(m, 1, 16, 0xfff, n);
-}
-
 
 /*
  * Word 2
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 3af53e3..e4dba1d 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -2,7 +2,7 @@
  * net/tipc/node.c: TIPC node management routines
  *
  * Copyright (c) 2000-2006, Ericsson AB
- * Copyright (c) 2005-2006, Wind River Systems
+ * Copyright (c) 2005-2006, 2010-2011, Wind River Systems
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -238,7 +238,7 @@ struct tipc_node *tipc_node_attach_link(struct link *l_ptr)
 			return n_ptr;
 		}
 		err("Attempt to establish second link on <%s> to %s\n",
-		    l_ptr->b_ptr->publ.name,
+		    l_ptr->b_ptr->name,
 		    tipc_addr_string_fill(addr_string, l_ptr->addr));
 	}
 	return NULL;
diff --git a/net/tipc/port.c b/net/tipc/port.c
index 067bab2..6ff78f9 100644
--- a/net/tipc/port.c
+++ b/net/tipc/port.c
@@ -2,7 +2,7 @@
  * net/tipc/port.c: TIPC port code
  *
  * Copyright (c) 1992-2007, Ericsson AB
- * Copyright (c) 2004-2008, Wind River Systems
+ * Copyright (c) 2004-2008, 2010-2011, Wind River Systems
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -54,33 +54,19 @@ static DEFINE_SPINLOCK(queue_lock);
 
 static LIST_HEAD(ports);
 static void port_handle_node_down(unsigned long ref);
-static struct sk_buff *port_build_self_abort_msg(struct port *, u32 err);
-static struct sk_buff *port_build_peer_abort_msg(struct port *, u32 err);
+static struct sk_buff *port_build_self_abort_msg(struct tipc_port *, u32 err);
+static struct sk_buff *port_build_peer_abort_msg(struct tipc_port *, u32 err);
 static void port_timeout(unsigned long ref);
 
 
-static u32 port_peernode(struct port *p_ptr)
+static u32 port_peernode(struct tipc_port *p_ptr)
 {
-	return msg_destnode(&p_ptr->publ.phdr);
+	return msg_destnode(&p_ptr->phdr);
 }
 
-static u32 port_peerport(struct port *p_ptr)
+static u32 port_peerport(struct tipc_port *p_ptr)
 {
-	return msg_destport(&p_ptr->publ.phdr);
-}
-
-static u32 port_out_seqno(struct port *p_ptr)
-{
-	return msg_transp_seqno(&p_ptr->publ.phdr);
-}
-
-static void port_incr_out_seqno(struct port *p_ptr)
-{
-	struct tipc_msg *m = &p_ptr->publ.phdr;
-
-	if (likely(!msg_routed(m)))
-		return;
-	msg_set_transp_seqno(m, (msg_transp_seqno(m) + 1));
+	return msg_destport(&p_ptr->phdr);
 }
 
 /**
@@ -94,7 +80,7 @@ int tipc_multicast(u32 ref, struct tipc_name_seq const *seq,
 	struct sk_buff *buf;
 	struct sk_buff *ibuf = NULL;
 	struct port_list dports = {0, NULL, };
-	struct port *oport = tipc_port_deref(ref);
+	struct tipc_port *oport = tipc_port_deref(ref);
 	int ext_targets;
 	int res;
 
@@ -103,7 +89,7 @@ int tipc_multicast(u32 ref, struct tipc_name_seq const *seq,
 
 	/* Create multicast message */
 
-	hdr = &oport->publ.phdr;
+	hdr = &oport->phdr;
 	msg_set_type(hdr, TIPC_MCAST_MSG);
 	msg_set_nametype(hdr, seq->type);
 	msg_set_namelower(hdr, seq->lower);
@@ -211,7 +197,7 @@ struct tipc_port *tipc_createport_raw(void *usr_handle,
 			void (*wakeup)(struct tipc_port *),
 			const u32 importance)
 {
-	struct port *p_ptr;
+	struct tipc_port *p_ptr;
 	struct tipc_msg *msg;
 	u32 ref;
 
@@ -220,21 +206,19 @@ struct tipc_port *tipc_createport_raw(void *usr_handle,
 		warn("Port creation failed, no memory\n");
 		return NULL;
 	}
-	ref = tipc_ref_acquire(p_ptr, &p_ptr->publ.lock);
+	ref = tipc_ref_acquire(p_ptr, &p_ptr->lock);
 	if (!ref) {
 		warn("Port creation failed, reference table exhausted\n");
 		kfree(p_ptr);
 		return NULL;
 	}
 
-	p_ptr->publ.usr_handle = usr_handle;
-	p_ptr->publ.max_pkt = MAX_PKT_DEFAULT;
-	p_ptr->publ.ref = ref;
-	msg = &p_ptr->publ.phdr;
+	p_ptr->usr_handle = usr_handle;
+	p_ptr->max_pkt = MAX_PKT_DEFAULT;
+	p_ptr->ref = ref;
+	msg = &p_ptr->phdr;
 	tipc_msg_init(msg, importance, TIPC_NAMED_MSG, LONG_H_SIZE, 0);
 	msg_set_origport(msg, ref);
-	p_ptr->last_in_seqno = 41;
-	p_ptr->sent = 1;
 	INIT_LIST_HEAD(&p_ptr->wait_list);
 	INIT_LIST_HEAD(&p_ptr->subscription.nodesub_list);
 	p_ptr->dispatcher = dispatcher;
@@ -246,12 +230,12 @@ struct tipc_port *tipc_createport_raw(void *usr_handle,
 	INIT_LIST_HEAD(&p_ptr->port_list);
 	list_add_tail(&p_ptr->port_list, &ports);
 	spin_unlock_bh(&tipc_port_list_lock);
-	return &(p_ptr->publ);
+	return p_ptr;
 }
 
 int tipc_deleteport(u32 ref)
 {
-	struct port *p_ptr;
+	struct tipc_port *p_ptr;
 	struct sk_buff *buf = NULL;
 
 	tipc_withdraw(ref, 0, NULL);
@@ -263,7 +247,7 @@ int tipc_deleteport(u32 ref)
 	tipc_port_unlock(p_ptr);
 
 	k_cancel_timer(&p_ptr->timer);
-	if (p_ptr->publ.connected) {
+	if (p_ptr->connected) {
 		buf = port_build_peer_abort_msg(p_ptr, TIPC_ERR_NO_PORT);
 		tipc_nodesub_unsubscribe(&p_ptr->subscription);
 	}
@@ -279,14 +263,14 @@ int tipc_deleteport(u32 ref)
 	return 0;
 }
 
-static int port_unreliable(struct port *p_ptr)
+static int port_unreliable(struct tipc_port *p_ptr)
 {
-	return msg_src_droppable(&p_ptr->publ.phdr);
+	return msg_src_droppable(&p_ptr->phdr);
 }
 
 int tipc_portunreliable(u32 ref, unsigned int *isunreliable)
 {
-	struct port *p_ptr;
+	struct tipc_port *p_ptr;
 
 	p_ptr = tipc_port_lock(ref);
 	if (!p_ptr)
@@ -298,24 +282,24 @@ int tipc_portunreliable(u32 ref, unsigned int *isunreliable)
 
 int tipc_set_portunreliable(u32 ref, unsigned int isunreliable)
 {
-	struct port *p_ptr;
+	struct tipc_port *p_ptr;
 
 	p_ptr = tipc_port_lock(ref);
 	if (!p_ptr)
 		return -EINVAL;
-	msg_set_src_droppable(&p_ptr->publ.phdr, (isunreliable != 0));
+	msg_set_src_droppable(&p_ptr->phdr, (isunreliable != 0));
 	tipc_port_unlock(p_ptr);
 	return 0;
 }
 
-static int port_unreturnable(struct port *p_ptr)
+static int port_unreturnable(struct tipc_port *p_ptr)
 {
-	return msg_dest_droppable(&p_ptr->publ.phdr);
+	return msg_dest_droppable(&p_ptr->phdr);
 }
 
 int tipc_portunreturnable(u32 ref, unsigned int *isunrejectable)
 {
-	struct port *p_ptr;
+	struct tipc_port *p_ptr;
 
 	p_ptr = tipc_port_lock(ref);
 	if (!p_ptr)
@@ -327,12 +311,12 @@ int tipc_portunreturnable(u32 ref, unsigned int *isunrejectable)
 
 int tipc_set_portunreturnable(u32 ref, unsigned int isunrejectable)
 {
-	struct port *p_ptr;
+	struct tipc_port *p_ptr;
 
 	p_ptr = tipc_port_lock(ref);
 	if (!p_ptr)
 		return -EINVAL;
-	msg_set_dest_droppable(&p_ptr->publ.phdr, (isunrejectable != 0));
+	msg_set_dest_droppable(&p_ptr->phdr, (isunrejectable != 0));
 	tipc_port_unlock(p_ptr);
 	return 0;
 }
@@ -345,7 +329,7 @@ int tipc_set_portunreturnable(u32 ref, unsigned int isunrejectable)
 static struct sk_buff *port_build_proto_msg(u32 destport, u32 destnode,
 					    u32 origport, u32 orignode,
 					    u32 usr, u32 type, u32 err,
-					    u32 seqno, u32 ack)
+					    u32 ack)
 {
 	struct sk_buff *buf;
 	struct tipc_msg *msg;
@@ -358,7 +342,6 @@ static struct sk_buff *port_build_proto_msg(u32 destport, u32 destnode,
 		msg_set_destport(msg, destport);
 		msg_set_origport(msg, origport);
 		msg_set_orignode(msg, orignode);
-		msg_set_transp_seqno(msg, seqno);
 		msg_set_msgcnt(msg, ack);
 	}
 	return buf;
@@ -413,10 +396,10 @@ int tipc_reject_msg(struct sk_buff *buf, u32 err)
 	/* send self-abort message when rejecting on a connected port */
 	if (msg_connected(msg)) {
 		struct sk_buff *abuf = NULL;
-		struct port *p_ptr = tipc_port_lock(msg_destport(msg));
+		struct tipc_port *p_ptr = tipc_port_lock(msg_destport(msg));
 
 		if (p_ptr) {
-			if (p_ptr->publ.connected)
+			if (p_ptr->connected)
 				abuf = port_build_self_abort_msg(p_ptr, err);
 			tipc_port_unlock(p_ptr);
 		}
@@ -429,7 +412,7 @@ int tipc_reject_msg(struct sk_buff *buf, u32 err)
 	return data_sz;
 }
 
-int tipc_port_reject_sections(struct port *p_ptr, struct tipc_msg *hdr,
+int tipc_port_reject_sections(struct tipc_port *p_ptr, struct tipc_msg *hdr,
 			      struct iovec const *msg_sect, u32 num_sect,
 			      int err)
 {
@@ -446,13 +429,13 @@ int tipc_port_reject_sections(struct port *p_ptr, struct tipc_msg *hdr,
 
 static void port_timeout(unsigned long ref)
 {
-	struct port *p_ptr = tipc_port_lock(ref);
+	struct tipc_port *p_ptr = tipc_port_lock(ref);
 	struct sk_buff *buf = NULL;
 
 	if (!p_ptr)
 		return;
 
-	if (!p_ptr->publ.connected) {
+	if (!p_ptr->connected) {
 		tipc_port_unlock(p_ptr);
 		return;
 	}
@@ -463,14 +446,12 @@ static void port_timeout(unsigned long ref)
 	} else {
 		buf = port_build_proto_msg(port_peerport(p_ptr),
 					   port_peernode(p_ptr),
-					   p_ptr->publ.ref,
+					   p_ptr->ref,
 					   tipc_own_addr,
 					   CONN_MANAGER,
 					   CONN_PROBE,
 					   TIPC_OK,
-					   port_out_seqno(p_ptr),
 					   0);
-		port_incr_out_seqno(p_ptr);
 		p_ptr->probing_state = PROBING;
 		k_start_timer(&p_ptr->timer, p_ptr->probing_interval);
 	}
@@ -481,7 +462,7 @@ static void port_timeout(unsigned long ref)
 
 static void port_handle_node_down(unsigned long ref)
 {
-	struct port *p_ptr = tipc_port_lock(ref);
+	struct tipc_port *p_ptr = tipc_port_lock(ref);
 	struct sk_buff *buf = NULL;
 
 	if (!p_ptr)
@@ -492,73 +473,71 @@ static void port_handle_node_down(unsigned long ref)
 }
 
 
-static struct sk_buff *port_build_self_abort_msg(struct port *p_ptr, u32 err)
+static struct sk_buff *port_build_self_abort_msg(struct tipc_port *p_ptr, u32 err)
 {
-	u32 imp = msg_importance(&p_ptr->publ.phdr);
+	u32 imp = msg_importance(&p_ptr->phdr);
 
-	if (!p_ptr->publ.connected)
+	if (!p_ptr->connected)
 		return NULL;
 	if (imp < TIPC_CRITICAL_IMPORTANCE)
 		imp++;
-	return port_build_proto_msg(p_ptr->publ.ref,
+	return port_build_proto_msg(p_ptr->ref,
 				    tipc_own_addr,
 				    port_peerport(p_ptr),
 				    port_peernode(p_ptr),
 				    imp,
 				    TIPC_CONN_MSG,
 				    err,
-				    p_ptr->last_in_seqno + 1,
 				    0);
 }
 
 
-static struct sk_buff *port_build_peer_abort_msg(struct port *p_ptr, u32 err)
+static struct sk_buff *port_build_peer_abort_msg(struct tipc_port *p_ptr, u32 err)
 {
-	u32 imp = msg_importance(&p_ptr->publ.phdr);
+	u32 imp = msg_importance(&p_ptr->phdr);
 
-	if (!p_ptr->publ.connected)
+	if (!p_ptr->connected)
 		return NULL;
 	if (imp < TIPC_CRITICAL_IMPORTANCE)
 		imp++;
 	return port_build_proto_msg(port_peerport(p_ptr),
 				    port_peernode(p_ptr),
-				    p_ptr->publ.ref,
+				    p_ptr->ref,
 				    tipc_own_addr,
 				    imp,
 				    TIPC_CONN_MSG,
 				    err,
-				    port_out_seqno(p_ptr),
 				    0);
 }
 
 void tipc_port_recv_proto_msg(struct sk_buff *buf)
 {
 	struct tipc_msg *msg = buf_msg(buf);
-	struct port *p_ptr = tipc_port_lock(msg_destport(msg));
+	struct tipc_port *p_ptr = tipc_port_lock(msg_destport(msg));
 	u32 err = TIPC_OK;
 	struct sk_buff *r_buf = NULL;
 	struct sk_buff *abort_buf = NULL;
 
 	if (!p_ptr) {
 		err = TIPC_ERR_NO_PORT;
-	} else if (p_ptr->publ.connected) {
+	} else if (p_ptr->connected) {
 		if ((port_peernode(p_ptr) != msg_orignode(msg)) ||
 		    (port_peerport(p_ptr) != msg_origport(msg))) {
 			err = TIPC_ERR_NO_PORT;
 		} else if (msg_type(msg) == CONN_ACK) {
 			int wakeup = tipc_port_congested(p_ptr) &&
-				     p_ptr->publ.congested &&
+				     p_ptr->congested &&
 				     p_ptr->wakeup;
 			p_ptr->acked += msg_msgcnt(msg);
 			if (tipc_port_congested(p_ptr))
 				goto exit;
-			p_ptr->publ.congested = 0;
+			p_ptr->congested = 0;
 			if (!wakeup)
 				goto exit;
-			p_ptr->wakeup(&p_ptr->publ);
+			p_ptr->wakeup(p_ptr);
 			goto exit;
 		}
-	} else if (p_ptr->publ.published) {
+	} else if (p_ptr->published) {
 		err = TIPC_ERR_NO_PORT;
 	}
 	if (err) {
@@ -569,7 +548,6 @@ void tipc_port_recv_proto_msg(struct sk_buff *buf)
 					     TIPC_HIGH_IMPORTANCE,
 					     TIPC_CONN_MSG,
 					     err,
-					     0,
 					     0);
 		goto exit;
 	}
@@ -583,11 +561,9 @@ void tipc_port_recv_proto_msg(struct sk_buff *buf)
 					     CONN_MANAGER,
 					     CONN_PROBE_REPLY,
 					     TIPC_OK,
-					     port_out_seqno(p_ptr),
 					     0);
 	}
 	p_ptr->probing_state = CONFIRMED;
-	port_incr_out_seqno(p_ptr);
 exit:
 	if (p_ptr)
 		tipc_port_unlock(p_ptr);
@@ -596,29 +572,29 @@ exit:
 	buf_discard(buf);
 }
 
-static void port_print(struct port *p_ptr, struct print_buf *buf, int full_id)
+static void port_print(struct tipc_port *p_ptr, struct print_buf *buf, int full_id)
 {
 	struct publication *publ;
 
 	if (full_id)
 		tipc_printf(buf, "<%u.%u.%u:%u>:",
 			    tipc_zone(tipc_own_addr), tipc_cluster(tipc_own_addr),
-			    tipc_node(tipc_own_addr), p_ptr->publ.ref);
+			    tipc_node(tipc_own_addr), p_ptr->ref);
 	else
-		tipc_printf(buf, "%-10u:", p_ptr->publ.ref);
+		tipc_printf(buf, "%-10u:", p_ptr->ref);
 
-	if (p_ptr->publ.connected) {
+	if (p_ptr->connected) {
 		u32 dport = port_peerport(p_ptr);
 		u32 destnode = port_peernode(p_ptr);
 
 		tipc_printf(buf, " connected to <%u.%u.%u:%u>",
 			    tipc_zone(destnode), tipc_cluster(destnode),
 			    tipc_node(destnode), dport);
-		if (p_ptr->publ.conn_type != 0)
+		if (p_ptr->conn_type != 0)
 			tipc_printf(buf, " via {%u,%u}",
-				    p_ptr->publ.conn_type,
-				    p_ptr->publ.conn_instance);
-	} else if (p_ptr->publ.published) {
+				    p_ptr->conn_type,
+				    p_ptr->conn_instance);
+	} else if (p_ptr->published) {
 		tipc_printf(buf, " bound to");
 		list_for_each_entry(publ, &p_ptr->publications, pport_list) {
 			if (publ->lower == publ->upper)
@@ -639,7 +615,7 @@ struct sk_buff *tipc_port_get_ports(void)
 	struct sk_buff *buf;
 	struct tlv_desc *rep_tlv;
 	struct print_buf pb;
-	struct port *p_ptr;
+	struct tipc_port *p_ptr;
 	int str_len;
 
 	buf = tipc_cfg_reply_alloc(TLV_SPACE(MAX_PORT_QUERY));
@@ -650,9 +626,9 @@ struct sk_buff *tipc_port_get_ports(void)
 	tipc_printbuf_init(&pb, TLV_DATA(rep_tlv), MAX_PORT_QUERY);
 	spin_lock_bh(&tipc_port_list_lock);
 	list_for_each_entry(p_ptr, &ports, port_list) {
-		spin_lock_bh(p_ptr->publ.lock);
+		spin_lock_bh(p_ptr->lock);
 		port_print(p_ptr, &pb, 0);
-		spin_unlock_bh(p_ptr->publ.lock);
+		spin_unlock_bh(p_ptr->lock);
 	}
 	spin_unlock_bh(&tipc_port_list_lock);
 	str_len = tipc_printbuf_validate(&pb);
@@ -665,12 +641,12 @@ struct sk_buff *tipc_port_get_ports(void)
 
 void tipc_port_reinit(void)
 {
-	struct port *p_ptr;
+	struct tipc_port *p_ptr;
 	struct tipc_msg *msg;
 
 	spin_lock_bh(&tipc_port_list_lock);
 	list_for_each_entry(p_ptr, &ports, port_list) {
-		msg = &p_ptr->publ.phdr;
+		msg = &p_ptr->phdr;
 		if (msg_orignode(msg) == tipc_own_addr)
 			break;
 		msg_set_prevnode(msg, tipc_own_addr);
@@ -695,7 +671,7 @@ static void port_dispatcher_sigh(void *dummy)
 	spin_unlock_bh(&queue_lock);
 
 	while (buf) {
-		struct port *p_ptr;
+		struct tipc_port *p_ptr;
 		struct user_port *up_ptr;
 		struct tipc_portid orig;
 		struct tipc_name_seq dseq;
@@ -720,8 +696,8 @@ static void port_dispatcher_sigh(void *dummy)
 		orig.node = msg_orignode(msg);
 		up_ptr = p_ptr->user_port;
 		usr_handle = up_ptr->usr_handle;
-		connected = p_ptr->publ.connected;
-		published = p_ptr->publ.published;
+		connected = p_ptr->connected;
+		published = p_ptr->published;
 
 		if (unlikely(msg_errcode(msg)))
 			goto err;
@@ -732,6 +708,7 @@ static void port_dispatcher_sigh(void *dummy)
 				tipc_conn_msg_event cb = up_ptr->conn_msg_cb;
 				u32 peer_port = port_peerport(p_ptr);
 				u32 peer_node = port_peernode(p_ptr);
+				u32 dsz;
 
 				tipc_port_unlock(p_ptr);
 				if (unlikely(!cb))
@@ -742,13 +719,14 @@ static void port_dispatcher_sigh(void *dummy)
 				} else if ((msg_origport(msg) != peer_port) ||
 					   (msg_orignode(msg) != peer_node))
 					goto reject;
-				if (unlikely(++p_ptr->publ.conn_unacked >=
-					     TIPC_FLOW_CONTROL_WIN))
+				dsz = msg_data_sz(msg);
+				if (unlikely(dsz &&
+					     (++p_ptr->conn_unacked >=
+					      TIPC_FLOW_CONTROL_WIN)))
 					tipc_acknowledge(dref,
-							 p_ptr->publ.conn_unacked);
+							 p_ptr->conn_unacked);
 				skb_pull(buf, msg_hdr_sz(msg));
-				cb(usr_handle, dref, &buf, msg_data(msg),
-				   msg_data_sz(msg));
+				cb(usr_handle, dref, &buf, msg_data(msg), dsz);
 				break;
 			}
 		case TIPC_DIRECT_MSG:{
@@ -872,7 +850,7 @@ static u32 port_dispatcher(struct tipc_port *dummy, struct sk_buff *buf)
 
 static void port_wakeup_sh(unsigned long ref)
 {
-	struct port *p_ptr;
+	struct tipc_port *p_ptr;
 	struct user_port *up_ptr;
 	tipc_continue_event cb = NULL;
 	void *uh = NULL;
@@ -898,14 +876,14 @@ static void port_wakeup(struct tipc_port *p_ptr)
 
 void tipc_acknowledge(u32 ref, u32 ack)
 {
-	struct port *p_ptr;
+	struct tipc_port *p_ptr;
 	struct sk_buff *buf = NULL;
 
 	p_ptr = tipc_port_lock(ref);
 	if (!p_ptr)
 		return;
-	if (p_ptr->publ.connected) {
-		p_ptr->publ.conn_unacked -= ack;
+	if (p_ptr->connected) {
+		p_ptr->conn_unacked -= ack;
 		buf = port_build_proto_msg(port_peerport(p_ptr),
 					   port_peernode(p_ptr),
 					   ref,
@@ -913,7 +891,6 @@ void tipc_acknowledge(u32 ref, u32 ack)
 					   CONN_MANAGER,
 					   CONN_ACK,
 					   TIPC_OK,
-					   port_out_seqno(p_ptr),
 					   ack);
 	}
 	tipc_port_unlock(p_ptr);
@@ -936,14 +913,14 @@ int tipc_createport(void *usr_handle,
 		    u32 *portref)
 {
 	struct user_port *up_ptr;
-	struct port *p_ptr;
+	struct tipc_port *p_ptr;
 
 	up_ptr = kmalloc(sizeof(*up_ptr), GFP_ATOMIC);
 	if (!up_ptr) {
 		warn("Port creation failed, no memory\n");
 		return -ENOMEM;
 	}
-	p_ptr = (struct port *)tipc_createport_raw(NULL, port_dispatcher,
+	p_ptr = (struct tipc_port *)tipc_createport_raw(NULL, port_dispatcher,
 						   port_wakeup, importance);
 	if (!p_ptr) {
 		kfree(up_ptr);
@@ -952,7 +929,7 @@ int tipc_createport(void *usr_handle,
 
 	p_ptr->user_port = up_ptr;
 	up_ptr->usr_handle = usr_handle;
-	up_ptr->ref = p_ptr->publ.ref;
+	up_ptr->ref = p_ptr->ref;
 	up_ptr->err_cb = error_cb;
 	up_ptr->named_err_cb = named_error_cb;
 	up_ptr->conn_err_cb = conn_error_cb;
@@ -960,26 +937,26 @@ int tipc_createport(void *usr_handle,
 	up_ptr->named_msg_cb = named_msg_cb;
 	up_ptr->conn_msg_cb = conn_msg_cb;
 	up_ptr->continue_event_cb = continue_event_cb;
-	*portref = p_ptr->publ.ref;
+	*portref = p_ptr->ref;
 	tipc_port_unlock(p_ptr);
 	return 0;
 }
 
 int tipc_portimportance(u32 ref, unsigned int *importance)
 {
-	struct port *p_ptr;
+	struct tipc_port *p_ptr;
 
 	p_ptr = tipc_port_lock(ref);
 	if (!p_ptr)
 		return -EINVAL;
-	*importance = (unsigned int)msg_importance(&p_ptr->publ.phdr);
+	*importance = (unsigned int)msg_importance(&p_ptr->phdr);
 	tipc_port_unlock(p_ptr);
 	return 0;
 }
 
 int tipc_set_portimportance(u32 ref, unsigned int imp)
 {
-	struct port *p_ptr;
+	struct tipc_port *p_ptr;
 
 	if (imp > TIPC_CRITICAL_IMPORTANCE)
 		return -EINVAL;
@@ -987,7 +964,7 @@ int tipc_set_portimportance(u32 ref, unsigned int imp)
 	p_ptr = tipc_port_lock(ref);
 	if (!p_ptr)
 		return -EINVAL;
-	msg_set_importance(&p_ptr->publ.phdr, (u32)imp);
+	msg_set_importance(&p_ptr->phdr, (u32)imp);
 	tipc_port_unlock(p_ptr);
 	return 0;
 }
@@ -995,7 +972,7 @@ int tipc_set_portimportance(u32 ref, unsigned int imp)
 
 int tipc_publish(u32 ref, unsigned int scope, struct tipc_name_seq const *seq)
 {
-	struct port *p_ptr;
+	struct tipc_port *p_ptr;
 	struct publication *publ;
 	u32 key;
 	int res = -EINVAL;
@@ -1004,7 +981,7 @@ int tipc_publish(u32 ref, unsigned int scope, struct tipc_name_seq const *seq)
 	if (!p_ptr)
 		return -EINVAL;
 
-	if (p_ptr->publ.connected)
+	if (p_ptr->connected)
 		goto exit;
 	if (seq->lower > seq->upper)
 		goto exit;
@@ -1016,11 +993,11 @@ int tipc_publish(u32 ref, unsigned int scope, struct tipc_name_seq const *seq)
 		goto exit;
 	}
 	publ = tipc_nametbl_publish(seq->type, seq->lower, seq->upper,
-				    scope, p_ptr->publ.ref, key);
+				    scope, p_ptr->ref, key);
 	if (publ) {
 		list_add(&publ->pport_list, &p_ptr->publications);
 		p_ptr->pub_count++;
-		p_ptr->publ.published = 1;
+		p_ptr->published = 1;
 		res = 0;
 	}
 exit:
@@ -1030,7 +1007,7 @@ exit:
 
 int tipc_withdraw(u32 ref, unsigned int scope, struct tipc_name_seq const *seq)
 {
-	struct port *p_ptr;
+	struct tipc_port *p_ptr;
 	struct publication *publ;
 	struct publication *tpubl;
 	int res = -EINVAL;
@@ -1063,37 +1040,36 @@ int tipc_withdraw(u32 ref, unsigned int scope, struct tipc_name_seq const *seq)
 		}
 	}
 	if (list_empty(&p_ptr->publications))
-		p_ptr->publ.published = 0;
+		p_ptr->published = 0;
 	tipc_port_unlock(p_ptr);
 	return res;
 }
 
 int tipc_connect2port(u32 ref, struct tipc_portid const *peer)
 {
-	struct port *p_ptr;
+	struct tipc_port *p_ptr;
 	struct tipc_msg *msg;
 	int res = -EINVAL;
 
 	p_ptr = tipc_port_lock(ref);
 	if (!p_ptr)
 		return -EINVAL;
-	if (p_ptr->publ.published || p_ptr->publ.connected)
+	if (p_ptr->published || p_ptr->connected)
 		goto exit;
 	if (!peer->ref)
 		goto exit;
 
-	msg = &p_ptr->publ.phdr;
+	msg = &p_ptr->phdr;
 	msg_set_destnode(msg, peer->node);
 	msg_set_destport(msg, peer->ref);
 	msg_set_orignode(msg, tipc_own_addr);
-	msg_set_origport(msg, p_ptr->publ.ref);
-	msg_set_transp_seqno(msg, 42);
+	msg_set_origport(msg, p_ptr->ref);
 	msg_set_type(msg, TIPC_CONN_MSG);
 	msg_set_hdr_sz(msg, SHORT_H_SIZE);
 
 	p_ptr->probing_interval = PROBING_INTERVAL;
 	p_ptr->probing_state = CONFIRMED;
-	p_ptr->publ.connected = 1;
+	p_ptr->connected = 1;
 	k_start_timer(&p_ptr->timer, p_ptr->probing_interval);
 
 	tipc_nodesub_subscribe(&p_ptr->subscription, peer->node,
@@ -1102,7 +1078,7 @@ int tipc_connect2port(u32 ref, struct tipc_portid const *peer)
 	res = 0;
 exit:
 	tipc_port_unlock(p_ptr);
-	p_ptr->publ.max_pkt = tipc_link_get_max_pkt(peer->node, ref);
+	p_ptr->max_pkt = tipc_link_get_max_pkt(peer->node, ref);
 	return res;
 }
 
@@ -1120,7 +1096,7 @@ int tipc_disconnect_port(struct tipc_port *tp_ptr)
 		tp_ptr->connected = 0;
 		/* let timer expire on it's own to avoid deadlock! */
 		tipc_nodesub_unsubscribe(
-			&((struct port *)tp_ptr)->subscription);
+			&((struct tipc_port *)tp_ptr)->subscription);
 		res = 0;
 	} else {
 		res = -ENOTCONN;
@@ -1135,7 +1111,7 @@ int tipc_disconnect_port(struct tipc_port *tp_ptr)
 
 int tipc_disconnect(u32 ref)
 {
-	struct port *p_ptr;
+	struct tipc_port *p_ptr;
 	int res;
 
 	p_ptr = tipc_port_lock(ref);
@@ -1151,15 +1127,15 @@ int tipc_disconnect(u32 ref)
  */
 int tipc_shutdown(u32 ref)
 {
-	struct port *p_ptr;
+	struct tipc_port *p_ptr;
 	struct sk_buff *buf = NULL;
 
 	p_ptr = tipc_port_lock(ref);
 	if (!p_ptr)
 		return -EINVAL;
 
-	if (p_ptr->publ.connected) {
-		u32 imp = msg_importance(&p_ptr->publ.phdr);
+	if (p_ptr->connected) {
+		u32 imp = msg_importance(&p_ptr->phdr);
 		if (imp < TIPC_CRITICAL_IMPORTANCE)
 			imp++;
 		buf = port_build_proto_msg(port_peerport(p_ptr),
@@ -1169,7 +1145,6 @@ int tipc_shutdown(u32 ref)
 					   imp,
 					   TIPC_CONN_MSG,
 					   TIPC_CONN_SHUTDOWN,
-					   port_out_seqno(p_ptr),
 					   0);
 	}
 	tipc_port_unlock(p_ptr);
@@ -1182,13 +1157,13 @@ int tipc_shutdown(u32 ref)
  *                        message for this node.
  */
 
-static int tipc_port_recv_sections(struct port *sender, unsigned int num_sect,
+static int tipc_port_recv_sections(struct tipc_port *sender, unsigned int num_sect,
 				   struct iovec const *msg_sect)
 {
 	struct sk_buff *buf;
 	int res;
 
-	res = tipc_msg_build(&sender->publ.phdr, msg_sect, num_sect,
+	res = tipc_msg_build(&sender->phdr, msg_sect, num_sect,
 			MAX_MSG_SIZE, !sender->user_port, &buf);
 	if (likely(buf))
 		tipc_port_recv_msg(buf);
@@ -1201,15 +1176,15 @@ static int tipc_port_recv_sections(struct port *sender, unsigned int num_sect,
 
 int tipc_send(u32 ref, unsigned int num_sect, struct iovec const *msg_sect)
 {
-	struct port *p_ptr;
+	struct tipc_port *p_ptr;
 	u32 destnode;
 	int res;
 
 	p_ptr = tipc_port_deref(ref);
-	if (!p_ptr || !p_ptr->publ.connected)
+	if (!p_ptr || !p_ptr->connected)
 		return -EINVAL;
 
-	p_ptr->publ.congested = 1;
+	p_ptr->congested = 1;
 	if (!tipc_port_congested(p_ptr)) {
 		destnode = port_peernode(p_ptr);
 		if (likely(destnode != tipc_own_addr))
@@ -1219,14 +1194,14 @@ int tipc_send(u32 ref, unsigned int num_sect, struct iovec const *msg_sect)
 			res = tipc_port_recv_sections(p_ptr, num_sect, msg_sect);
 
 		if (likely(res != -ELINKCONG)) {
-			port_incr_out_seqno(p_ptr);
-			p_ptr->publ.congested = 0;
-			p_ptr->sent++;
+			p_ptr->congested = 0;
+			if (res > 0)
+				p_ptr->sent++;
 			return res;
 		}
 	}
 	if (port_unreliable(p_ptr)) {
-		p_ptr->publ.congested = 0;
+		p_ptr->congested = 0;
 		/* Just calculate msg length and return */
 		return tipc_msg_calc_data_size(msg_sect, num_sect);
 	}
@@ -1240,17 +1215,17 @@ int tipc_send(u32 ref, unsigned int num_sect, struct iovec const *msg_sect)
 int tipc_send2name(u32 ref, struct tipc_name const *name, unsigned int domain,
 	   unsigned int num_sect, struct iovec const *msg_sect)
 {
-	struct port *p_ptr;
+	struct tipc_port *p_ptr;
 	struct tipc_msg *msg;
 	u32 destnode = domain;
 	u32 destport;
 	int res;
 
 	p_ptr = tipc_port_deref(ref);
-	if (!p_ptr || p_ptr->publ.connected)
+	if (!p_ptr || p_ptr->connected)
 		return -EINVAL;
 
-	msg = &p_ptr->publ.phdr;
+	msg = &p_ptr->phdr;
 	msg_set_type(msg, TIPC_NAMED_MSG);
 	msg_set_orignode(msg, tipc_own_addr);
 	msg_set_origport(msg, ref);
@@ -1263,13 +1238,17 @@ int tipc_send2name(u32 ref, struct tipc_name const *name, unsigned int domain,
 	msg_set_destport(msg, destport);
 
 	if (likely(destport)) {
-		p_ptr->sent++;
 		if (likely(destnode == tipc_own_addr))
-			return tipc_port_recv_sections(p_ptr, num_sect, msg_sect);
-		res = tipc_link_send_sections_fast(p_ptr, msg_sect, num_sect,
-						   destnode);
-		if (likely(res != -ELINKCONG))
+			res = tipc_port_recv_sections(p_ptr, num_sect,
+						      msg_sect);
+		else
+			res = tipc_link_send_sections_fast(p_ptr, msg_sect,
+							   num_sect, destnode);
+		if (likely(res != -ELINKCONG)) {
+			if (res > 0)
+				p_ptr->sent++;
 			return res;
+		}
 		if (port_unreliable(p_ptr)) {
 			/* Just calculate msg length and return */
 			return tipc_msg_calc_data_size(msg_sect, num_sect);
@@ -1287,27 +1266,32 @@ int tipc_send2name(u32 ref, struct tipc_name const *name, unsigned int domain,
 int tipc_send2port(u32 ref, struct tipc_portid const *dest,
 	   unsigned int num_sect, struct iovec const *msg_sect)
 {
-	struct port *p_ptr;
+	struct tipc_port *p_ptr;
 	struct tipc_msg *msg;
 	int res;
 
 	p_ptr = tipc_port_deref(ref);
-	if (!p_ptr || p_ptr->publ.connected)
+	if (!p_ptr || p_ptr->connected)
 		return -EINVAL;
 
-	msg = &p_ptr->publ.phdr;
+	msg = &p_ptr->phdr;
 	msg_set_type(msg, TIPC_DIRECT_MSG);
 	msg_set_orignode(msg, tipc_own_addr);
 	msg_set_origport(msg, ref);
 	msg_set_destnode(msg, dest->node);
 	msg_set_destport(msg, dest->ref);
 	msg_set_hdr_sz(msg, DIR_MSG_H_SIZE);
-	p_ptr->sent++;
+
 	if (dest->node == tipc_own_addr)
-		return tipc_port_recv_sections(p_ptr, num_sect, msg_sect);
-	res = tipc_link_send_sections_fast(p_ptr, msg_sect, num_sect, dest->node);
-	if (likely(res != -ELINKCONG))
+		res =  tipc_port_recv_sections(p_ptr, num_sect, msg_sect);
+	else
+		res = tipc_link_send_sections_fast(p_ptr, msg_sect, num_sect,
+						   dest->node);
+	if (likely(res != -ELINKCONG)) {
+		if (res > 0)
+			p_ptr->sent++;
 		return res;
+	}
 	if (port_unreliable(p_ptr)) {
 		/* Just calculate msg length and return */
 		return tipc_msg_calc_data_size(msg_sect, num_sect);
@@ -1322,15 +1306,15 @@ int tipc_send2port(u32 ref, struct tipc_portid const *dest,
 int tipc_send_buf2port(u32 ref, struct tipc_portid const *dest,
 	       struct sk_buff *buf, unsigned int dsz)
 {
-	struct port *p_ptr;
+	struct tipc_port *p_ptr;
 	struct tipc_msg *msg;
 	int res;
 
-	p_ptr = (struct port *)tipc_ref_deref(ref);
-	if (!p_ptr || p_ptr->publ.connected)
+	p_ptr = (struct tipc_port *)tipc_ref_deref(ref);
+	if (!p_ptr || p_ptr->connected)
 		return -EINVAL;
 
-	msg = &p_ptr->publ.phdr;
+	msg = &p_ptr->phdr;
 	msg_set_type(msg, TIPC_DIRECT_MSG);
 	msg_set_orignode(msg, tipc_own_addr);
 	msg_set_origport(msg, ref);
@@ -1343,12 +1327,16 @@ int tipc_send_buf2port(u32 ref, struct tipc_portid const *dest,
 
 	skb_push(buf, DIR_MSG_H_SIZE);
 	skb_copy_to_linear_data(buf, msg, DIR_MSG_H_SIZE);
-	p_ptr->sent++;
+
 	if (dest->node == tipc_own_addr)
-		return tipc_port_recv_msg(buf);
-	res = tipc_send_buf_fast(buf, dest->node);
-	if (likely(res != -ELINKCONG))
+		res = tipc_port_recv_msg(buf);
+	else
+		res = tipc_send_buf_fast(buf, dest->node);
+	if (likely(res != -ELINKCONG)) {
+		if (res > 0)
+			p_ptr->sent++;
 		return res;
+	}
 	if (port_unreliable(p_ptr))
 		return dsz;
 	return -ELINKCONG;
diff --git a/net/tipc/port.h b/net/tipc/port.h
index 8e84b98..87b9424 100644
--- a/net/tipc/port.h
+++ b/net/tipc/port.h
@@ -2,7 +2,7 @@
  * net/tipc/port.h: Include file for TIPC port code
  *
  * Copyright (c) 1994-2007, Ericsson AB
- * Copyright (c) 2004-2007, Wind River Systems
+ * Copyright (c) 2004-2007, 2010-2011, Wind River Systems
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -95,7 +95,7 @@ struct user_port {
 };
 
 /**
- * struct tipc_port - TIPC port info available to socket API
+ * struct tipc_port - TIPC port structure
  * @usr_handle: pointer to additional user-defined information about port
  * @lock: pointer to spinlock for controlling access to port
  * @connected: non-zero if port is currently connected to a peer port
@@ -107,43 +107,33 @@ struct user_port {
  * @max_pkt: maximum packet size "hint" used when building messages sent by port
  * @ref: unique reference to port in TIPC object registry
  * @phdr: preformatted message header used when sending messages
- */
-struct tipc_port {
-	void *usr_handle;
-	spinlock_t *lock;
-	int connected;
-	u32 conn_type;
-	u32 conn_instance;
-	u32 conn_unacked;
-	int published;
-	u32 congested;
-	u32 max_pkt;
-	u32 ref;
-	struct tipc_msg phdr;
-};
-
-/**
- * struct port - TIPC port structure
- * @publ: TIPC port info available to privileged users
  * @port_list: adjacent ports in TIPC's global list of ports
  * @dispatcher: ptr to routine which handles received messages
  * @wakeup: ptr to routine to call when port is no longer congested
  * @user_port: ptr to user port associated with port (if any)
  * @wait_list: adjacent ports in list of ports waiting on link congestion
  * @waiting_pkts:
- * @sent:
- * @acked:
+ * @sent: # of non-empty messages sent by port
+ * @acked: # of non-empty message acknowledgements from connected port's peer
  * @publications: list of publications for port
  * @pub_count: total # of publications port has made during its lifetime
  * @probing_state:
  * @probing_interval:
- * @last_in_seqno:
  * @timer_ref:
  * @subscription: "node down" subscription used to terminate failed connections
  */
-
-struct port {
-	struct tipc_port publ;
+struct tipc_port {
+	void *usr_handle;
+	spinlock_t *lock;
+	int connected;
+	u32 conn_type;
+	u32 conn_instance;
+	u32 conn_unacked;
+	int published;
+	u32 congested;
+	u32 max_pkt;
+	u32 ref;
+	struct tipc_msg phdr;
 	struct list_head port_list;
 	u32 (*dispatcher)(struct tipc_port *, struct sk_buff *);
 	void (*wakeup)(struct tipc_port *);
@@ -156,7 +146,6 @@ struct port {
 	u32 pub_count;
 	u32 probing_state;
 	u32 probing_interval;
-	u32 last_in_seqno;
 	struct timer_list timer;
 	struct tipc_node_subscr subscription;
 };
@@ -230,7 +219,7 @@ int tipc_send_buf2port(u32 portref, struct tipc_portid const *dest,
 int tipc_multicast(u32 portref, struct tipc_name_seq const *seq,
 		unsigned int section_count, struct iovec const *msg);
 
-int tipc_port_reject_sections(struct port *p_ptr, struct tipc_msg *hdr,
+int tipc_port_reject_sections(struct tipc_port *p_ptr, struct tipc_msg *hdr,
 			      struct iovec const *msg_sect, u32 num_sect,
 			      int err);
 struct sk_buff *tipc_port_get_ports(void);
@@ -242,9 +231,9 @@ void tipc_port_reinit(void);
  * tipc_port_lock - lock port instance referred to and return its pointer
  */
 
-static inline struct port *tipc_port_lock(u32 ref)
+static inline struct tipc_port *tipc_port_lock(u32 ref)
 {
-	return (struct port *)tipc_ref_lock(ref);
+	return (struct tipc_port *)tipc_ref_lock(ref);
 }
 
 /**
@@ -253,27 +242,27 @@ static inline struct port *tipc_port_lock(u32 ref)
  * Can use pointer instead of tipc_ref_unlock() since port is already locked.
  */
 
-static inline void tipc_port_unlock(struct port *p_ptr)
+static inline void tipc_port_unlock(struct tipc_port *p_ptr)
 {
-	spin_unlock_bh(p_ptr->publ.lock);
+	spin_unlock_bh(p_ptr->lock);
 }
 
-static inline struct port *tipc_port_deref(u32 ref)
+static inline struct tipc_port *tipc_port_deref(u32 ref)
 {
-	return (struct port *)tipc_ref_deref(ref);
+	return (struct tipc_port *)tipc_ref_deref(ref);
 }
 
-static inline u32 tipc_peer_port(struct port *p_ptr)
+static inline u32 tipc_peer_port(struct tipc_port *p_ptr)
 {
-	return msg_destport(&p_ptr->publ.phdr);
+	return msg_destport(&p_ptr->phdr);
 }
 
-static inline u32 tipc_peer_node(struct port *p_ptr)
+static inline u32 tipc_peer_node(struct tipc_port *p_ptr)
 {
-	return msg_destnode(&p_ptr->publ.phdr);
+	return msg_destnode(&p_ptr->phdr);
 }
 
-static inline int tipc_port_congested(struct port *p_ptr)
+static inline int tipc_port_congested(struct tipc_port *p_ptr)
 {
 	return (p_ptr->sent - p_ptr->acked) >= (TIPC_FLOW_CONTROL_WIN * 2);
 }
@@ -284,7 +273,7 @@ static inline int tipc_port_congested(struct port *p_ptr)
 
 static inline int tipc_port_recv_msg(struct sk_buff *buf)
 {
-	struct port *p_ptr;
+	struct tipc_port *p_ptr;
 	struct tipc_msg *msg = buf_msg(buf);
 	u32 destport = msg_destport(msg);
 	u32 dsz = msg_data_sz(msg);
@@ -299,7 +288,7 @@ static inline int tipc_port_recv_msg(struct sk_buff *buf)
 	/* validate destination & pass to port, otherwise reject message */
 	p_ptr = tipc_port_lock(destport);
 	if (likely(p_ptr)) {
-		if (likely(p_ptr->publ.connected)) {
+		if (likely(p_ptr->connected)) {
 			if ((unlikely(msg_origport(msg) != tipc_peer_port(p_ptr))) ||
 			    (unlikely(msg_orignode(msg) != tipc_peer_node(p_ptr))) ||
 			    (unlikely(!msg_connected(msg)))) {
@@ -308,7 +297,7 @@ static inline int tipc_port_recv_msg(struct sk_buff *buf)
 				goto reject;
 			}
 		}
-		err = p_ptr->dispatcher(&p_ptr->publ, buf);
+		err = p_ptr->dispatcher(p_ptr, buf);
 		tipc_port_unlock(p_ptr);
 		if (likely(!err))
 			return dsz;
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 2b02a3a..125dcb0 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -2,7 +2,7 @@
  * net/tipc/socket.c: TIPC socket API
  *
  * Copyright (c) 2001-2007, Ericsson AB
- * Copyright (c) 2004-2008, Wind River Systems
+ * Copyright (c) 2004-2008, 2010-2011, Wind River Systems
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -241,7 +241,6 @@ static int tipc_create(struct net *net, struct socket *sock, int protocol,
 			tipc_set_portunreliable(tp_ptr->ref, 1);
 	}
 
-	atomic_inc(&tipc_user_count);
 	return 0;
 }
 
@@ -321,7 +320,6 @@ static int release(struct socket *sock)
 	sock_put(sk);
 	sock->sk = NULL;
 
-	atomic_dec(&tipc_user_count);
 	return res;
 }
 
@@ -495,6 +493,8 @@ static int dest_name_check(struct sockaddr_tipc *dest, struct msghdr *m)
 	if (likely(dest->addr.name.name.type != TIPC_CFG_SRV))
 		return -EACCES;
 
+	if (!m->msg_iovlen || (m->msg_iov[0].iov_len < sizeof(hdr)))
+		return -EMSGSIZE;
 	if (copy_from_user(&hdr, m->msg_iov[0].iov_base, sizeof(hdr)))
 		return -EFAULT;
 	if ((ntohs(hdr.tcm_type) & 0xC000) && (!capable(CAP_NET_ADMIN)))
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index ca04479..aae9eae 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -2,7 +2,7 @@
  * net/tipc/subscr.c: TIPC network topology service
  *
  * Copyright (c) 2000-2006, Ericsson AB
- * Copyright (c) 2005-2007, Wind River Systems
+ * Copyright (c) 2005-2007, 2010-2011, Wind River Systems
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -160,7 +160,7 @@ void tipc_subscr_report_overlap(struct subscription *sub,
 
 static void subscr_timeout(struct subscription *sub)
 {
-	struct port *server_port;
+	struct tipc_port *server_port;
 
 	/* Validate server port reference (in case subscriber is terminating) */
 
@@ -472,8 +472,6 @@ static void subscr_named_msg_event(void *usr_handle,
 				   struct tipc_portid const *orig,
 				   struct tipc_name_seq const *dest)
 {
-	static struct iovec msg_sect = {NULL, 0};
-
 	struct subscriber *subscriber;
 	u32 server_port_ref;
 
@@ -508,7 +506,7 @@ static void subscr_named_msg_event(void *usr_handle,
 
 	/* Lock server port (& save lock address for future use) */
 
-	subscriber->lock = tipc_port_lock(subscriber->port_ref)->publ.lock;
+	subscriber->lock = tipc_port_lock(subscriber->port_ref)->lock;
 
 	/* Add subscriber to topology server's subscriber list */
 
@@ -523,7 +521,7 @@ static void subscr_named_msg_event(void *usr_handle,
 
 	/* Send an ACK- to complete connection handshaking */
 
-	tipc_send(server_port_ref, 1, &msg_sect);
+	tipc_send(server_port_ref, 0, NULL);
 
 	/* Handle optional subscription request */
 
@@ -542,7 +540,6 @@ int tipc_subscr_start(void)
 	spin_lock_init(&topsrv.lock);
 	INIT_LIST_HEAD(&topsrv.subscriber_list);
 
-	spin_lock_bh(&topsrv.lock);
 	res = tipc_createport(NULL,
 			      TIPC_CRITICAL_IMPORTANCE,
 			      NULL,
@@ -563,12 +560,10 @@ int tipc_subscr_start(void)
 		goto failed;
 	}
 
-	spin_unlock_bh(&topsrv.lock);
 	return 0;
 
 failed:
 	err("Failed to create subscription service\n");
-	spin_unlock_bh(&topsrv.lock);
 	return res;
 }
 
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 437a99e..de87018 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1171,7 +1171,7 @@ restart:
 	newsk->sk_type		= sk->sk_type;
 	init_peercred(newsk);
 	newu = unix_sk(newsk);
-	newsk->sk_wq		= &newu->peer_wq;
+	RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
 	otheru = unix_sk(other);
 
 	/* copy address information from listening to new sock*/
@@ -1475,6 +1475,12 @@ restart:
 			goto out_free;
 	}
 
+	if (sk_filter(other, skb) < 0) {
+		/* Toss the packet but do not return any error to the sender */
+		err = len;
+		goto out_free;
+	}
+
 	unix_state_lock(other);
 	err = -EPERM;
 	if (!unix_may_send(sk, other))
@@ -1561,7 +1567,6 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
 	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
 	struct sock *sk = sock->sk;
 	struct sock *other = NULL;
-	struct sockaddr_un *sunaddr = msg->msg_name;
 	int err, size;
 	struct sk_buff *skb;
 	int sent = 0;
@@ -1584,7 +1589,6 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
 		err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
 		goto out_err;
 	} else {
-		sunaddr = NULL;
 		err = -ENOTCONN;
 		other = unix_peer(sk);
 		if (!other)
@@ -1987,36 +1991,38 @@ static int unix_shutdown(struct socket *sock, int mode)
 
 	mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN);
 
-	if (mode) {
-		unix_state_lock(sk);
-		sk->sk_shutdown |= mode;
-		other = unix_peer(sk);
-		if (other)
-			sock_hold(other);
-		unix_state_unlock(sk);
-		sk->sk_state_change(sk);
-
-		if (other &&
-			(sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
-
-			int peer_mode = 0;
-
-			if (mode&RCV_SHUTDOWN)
-				peer_mode |= SEND_SHUTDOWN;
-			if (mode&SEND_SHUTDOWN)
-				peer_mode |= RCV_SHUTDOWN;
-			unix_state_lock(other);
-			other->sk_shutdown |= peer_mode;
-			unix_state_unlock(other);
-			other->sk_state_change(other);
-			if (peer_mode == SHUTDOWN_MASK)
-				sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
-			else if (peer_mode & RCV_SHUTDOWN)
-				sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
-		}
-		if (other)
-			sock_put(other);
+	if (!mode)
+		return 0;
+
+	unix_state_lock(sk);
+	sk->sk_shutdown |= mode;
+	other = unix_peer(sk);
+	if (other)
+		sock_hold(other);
+	unix_state_unlock(sk);
+	sk->sk_state_change(sk);
+
+	if (other &&
+		(sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
+
+		int peer_mode = 0;
+
+		if (mode&RCV_SHUTDOWN)
+			peer_mode |= SEND_SHUTDOWN;
+		if (mode&SEND_SHUTDOWN)
+			peer_mode |= RCV_SHUTDOWN;
+		unix_state_lock(other);
+		other->sk_shutdown |= peer_mode;
+		unix_state_unlock(other);
+		other->sk_state_change(other);
+		if (peer_mode == SHUTDOWN_MASK)
+			sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
+		else if (peer_mode & RCV_SHUTDOWN)
+			sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
 	}
+	if (other)
+		sock_put(other);
+
 	return 0;
 }
 
diff --git a/net/wanrouter/wanmain.c b/net/wanrouter/wanmain.c
index 74944a2..788a12c 100644
--- a/net/wanrouter/wanmain.c
+++ b/net/wanrouter/wanmain.c
@@ -59,8 +59,6 @@
 #include <asm/uaccess.h>        /* copy_to/from_user */
 #include <linux/init.h>         /* __initfunc et al. */
 
-#define KMEM_SAFETYZONE 8
-
 #define DEV_TO_SLAVE(dev)	(*((struct net_device **)netdev_priv(dev)))
 
 /*
diff --git a/net/wireless/core.c b/net/wireless/core.c
index e9a5f8c..fe01de2 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -718,13 +718,6 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
 			wdev->ps = false;
 		/* allow mac80211 to determine the timeout */
 		wdev->ps_timeout = -1;
-		if (rdev->ops->set_power_mgmt)
-			if (rdev->ops->set_power_mgmt(wdev->wiphy, dev,
-						      wdev->ps,
-						      wdev->ps_timeout)) {
-				/* assume this means it's off */
-				wdev->ps = false;
-			}
 
 		if (!dev->ethtool_ops)
 			dev->ethtool_ops = &cfg80211_ethtool_ops;
@@ -813,6 +806,19 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
 		rdev->opencount++;
 		mutex_unlock(&rdev->devlist_mtx);
 		cfg80211_unlock_rdev(rdev);
+
+		/*
+		 * Configure power management to the driver here so that its
+		 * correctly set also after interface type changes etc.
+		 */
+		if (wdev->iftype == NL80211_IFTYPE_STATION &&
+		    rdev->ops->set_power_mgmt)
+			if (rdev->ops->set_power_mgmt(wdev->wiphy, dev,
+						      wdev->ps,
+						      wdev->ps_timeout)) {
+				/* assume this means it's off */
+				wdev->ps = false;
+			}
 		break;
 	case NETDEV_UNREGISTER:
 		/*
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 9b62710..4ebce42 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -1968,13 +1968,41 @@ static int parse_station_flags(struct genl_info *info,
 	return 0;
 }
 
+static bool nl80211_put_sta_rate(struct sk_buff *msg, struct rate_info *info,
+				 int attr)
+{
+	struct nlattr *rate;
+	u16 bitrate;
+
+	rate = nla_nest_start(msg, attr);
+	if (!rate)
+		goto nla_put_failure;
+
+	/* cfg80211_calculate_bitrate will return 0 for mcs >= 32 */
+	bitrate = cfg80211_calculate_bitrate(info);
+	if (bitrate > 0)
+		NLA_PUT_U16(msg, NL80211_RATE_INFO_BITRATE, bitrate);
+
+	if (info->flags & RATE_INFO_FLAGS_MCS)
+		NLA_PUT_U8(msg, NL80211_RATE_INFO_MCS, info->mcs);
+	if (info->flags & RATE_INFO_FLAGS_40_MHZ_WIDTH)
+		NLA_PUT_FLAG(msg, NL80211_RATE_INFO_40_MHZ_WIDTH);
+	if (info->flags & RATE_INFO_FLAGS_SHORT_GI)
+		NLA_PUT_FLAG(msg, NL80211_RATE_INFO_SHORT_GI);
+
+	nla_nest_end(msg, rate);
+	return true;
+
+nla_put_failure:
+	return false;
+}
+
 static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq,
 				int flags, struct net_device *dev,
 				const u8 *mac_addr, struct station_info *sinfo)
 {
 	void *hdr;
-	struct nlattr *sinfoattr, *txrate;
-	u16 bitrate;
+	struct nlattr *sinfoattr;
 
 	hdr = nl80211hdr_put(msg, pid, seq, flags, NL80211_CMD_NEW_STATION);
 	if (!hdr)
@@ -2013,24 +2041,14 @@ static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq,
 		NLA_PUT_U8(msg, NL80211_STA_INFO_SIGNAL_AVG,
 			   sinfo->signal_avg);
 	if (sinfo->filled & STATION_INFO_TX_BITRATE) {
-		txrate = nla_nest_start(msg, NL80211_STA_INFO_TX_BITRATE);
-		if (!txrate)
+		if (!nl80211_put_sta_rate(msg, &sinfo->txrate,
+					  NL80211_STA_INFO_TX_BITRATE))
+			goto nla_put_failure;
+	}
+	if (sinfo->filled & STATION_INFO_RX_BITRATE) {
+		if (!nl80211_put_sta_rate(msg, &sinfo->rxrate,
+					  NL80211_STA_INFO_RX_BITRATE))
 			goto nla_put_failure;
-
-		/* cfg80211_calculate_bitrate will return 0 for mcs >= 32 */
-		bitrate = cfg80211_calculate_bitrate(&sinfo->txrate);
-		if (bitrate > 0)
-			NLA_PUT_U16(msg, NL80211_RATE_INFO_BITRATE, bitrate);
-
-		if (sinfo->txrate.flags & RATE_INFO_FLAGS_MCS)
-			NLA_PUT_U8(msg, NL80211_RATE_INFO_MCS,
-				    sinfo->txrate.mcs);
-		if (sinfo->txrate.flags & RATE_INFO_FLAGS_40_MHZ_WIDTH)
-			NLA_PUT_FLAG(msg, NL80211_RATE_INFO_40_MHZ_WIDTH);
-		if (sinfo->txrate.flags & RATE_INFO_FLAGS_SHORT_GI)
-			NLA_PUT_FLAG(msg, NL80211_RATE_INFO_SHORT_GI);
-
-		nla_nest_end(msg, txrate);
 	}
 	if (sinfo->filled & STATION_INFO_RX_PACKETS)
 		NLA_PUT_U32(msg, NL80211_STA_INFO_RX_PACKETS,
@@ -2718,7 +2736,7 @@ static int nl80211_get_mesh_config(struct sk_buff *skb,
 	hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0,
 			     NL80211_CMD_GET_MESH_CONFIG);
 	if (!hdr)
-		goto nla_put_failure;
+		goto out;
 	pinfoattr = nla_nest_start(msg, NL80211_ATTR_MESH_CONFIG);
 	if (!pinfoattr)
 		goto nla_put_failure;
@@ -2759,6 +2777,7 @@ static int nl80211_get_mesh_config(struct sk_buff *skb,
 
  nla_put_failure:
 	genlmsg_cancel(msg, hdr);
+ out:
 	nlmsg_free(msg);
 	return -ENOBUFS;
 }
@@ -2954,7 +2973,7 @@ static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info)
 	hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0,
 			     NL80211_CMD_GET_REG);
 	if (!hdr)
-		goto nla_put_failure;
+		goto put_failure;
 
 	NLA_PUT_STRING(msg, NL80211_ATTR_REG_ALPHA2,
 		cfg80211_regdomain->alpha2);
@@ -3001,6 +3020,7 @@ static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info)
 
 nla_put_failure:
 	genlmsg_cancel(msg, hdr);
+put_failure:
 	nlmsg_free(msg);
 	err = -EMSGSIZE;
 out:
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 37693b6..c565689 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -1801,9 +1801,9 @@ void regulatory_hint_disconnect(void)
 
 static bool freq_is_chan_12_13_14(u16 freq)
 {
-	if (freq == ieee80211_channel_to_frequency(12) ||
-	    freq == ieee80211_channel_to_frequency(13) ||
-	    freq == ieee80211_channel_to_frequency(14))
+	if (freq == ieee80211_channel_to_frequency(12, IEEE80211_BAND_2GHZ) ||
+	    freq == ieee80211_channel_to_frequency(13, IEEE80211_BAND_2GHZ) ||
+	    freq == ieee80211_channel_to_frequency(14, IEEE80211_BAND_2GHZ))
 		return true;
 	return false;
 }
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 7620ae2..6a750bc 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -29,29 +29,37 @@ ieee80211_get_response_rate(struct ieee80211_supported_band *sband,
 }
 EXPORT_SYMBOL(ieee80211_get_response_rate);
 
-int ieee80211_channel_to_frequency(int chan)
+int ieee80211_channel_to_frequency(int chan, enum ieee80211_band band)
 {
-	if (chan < 14)
-		return 2407 + chan * 5;
-
-	if (chan == 14)
-		return 2484;
-
-	/* FIXME: 802.11j 17.3.8.3.2 */
-	return (chan + 1000) * 5;
+	/* see 802.11 17.3.8.3.2 and Annex J
+	 * there are overlapping channel numbers in 5GHz and 2GHz bands */
+	if (band == IEEE80211_BAND_5GHZ) {
+		if (chan >= 182 && chan <= 196)
+			return 4000 + chan * 5;
+		else
+			return 5000 + chan * 5;
+	} else { /* IEEE80211_BAND_2GHZ */
+		if (chan == 14)
+			return 2484;
+		else if (chan < 14)
+			return 2407 + chan * 5;
+		else
+			return 0; /* not supported */
+	}
 }
 EXPORT_SYMBOL(ieee80211_channel_to_frequency);
 
 int ieee80211_frequency_to_channel(int freq)
 {
+	/* see 802.11 17.3.8.3.2 and Annex J */
 	if (freq == 2484)
 		return 14;
-
-	if (freq < 2484)
+	else if (freq < 2484)
 		return (freq - 2407) / 5;
-
-	/* FIXME: 802.11j 17.3.8.3.2 */
-	return freq/5 - 1000;
+	else if (freq >= 4910 && freq <= 4980)
+		return (freq - 4000) / 5;
+	else
+		return (freq - 5000) / 5;
 }
 EXPORT_SYMBOL(ieee80211_frequency_to_channel);
 
@@ -159,12 +167,15 @@ int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev,
 
 	/*
 	 * Disallow pairwise keys with non-zero index unless it's WEP
-	 * (because current deployments use pairwise WEP keys with
-	 * non-zero indizes but 802.11i clearly specifies to use zero)
+	 * or a vendor specific cipher (because current deployments use
+	 * pairwise WEP keys with non-zero indices and for vendor specific
+	 * ciphers this should be validated in the driver or hardware level
+	 * - but 802.11i clearly specifies to use zero)
 	 */
 	if (pairwise && key_idx &&
-	    params->cipher != WLAN_CIPHER_SUITE_WEP40 &&
-	    params->cipher != WLAN_CIPHER_SUITE_WEP104)
+	    ((params->cipher == WLAN_CIPHER_SUITE_TKIP) ||
+	     (params->cipher == WLAN_CIPHER_SUITE_CCMP) ||
+	     (params->cipher == WLAN_CIPHER_SUITE_AES_CMAC)))
 		return -EINVAL;
 
 	switch (params->cipher) {
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index d112f03..0bf169b 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -267,9 +267,12 @@ int cfg80211_wext_freq(struct wiphy *wiphy, struct iw_freq *freq)
 	 * -EINVAL for impossible things.
 	 */
 	if (freq->e == 0) {
+		enum ieee80211_band band = IEEE80211_BAND_2GHZ;
 		if (freq->m < 0)
 			return 0;
-		return ieee80211_channel_to_frequency(freq->m);
+		if (freq->m > 14)
+			band = IEEE80211_BAND_5GHZ;
+		return ieee80211_channel_to_frequency(freq->m, band);
 	} else {
 		int i, div = 1000000;
 		for (i = 0; i < freq->e; i++)
diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c
index 8b4d6e3..58064d9 100644
--- a/net/xfrm/xfrm_algo.c
+++ b/net/xfrm/xfrm_algo.c
@@ -618,21 +618,21 @@ static int xfrm_alg_name_match(const struct xfrm_algo_desc *entry,
 			(entry->compat && !strcmp(name, entry->compat)));
 }
 
-struct xfrm_algo_desc *xfrm_aalg_get_byname(char *name, int probe)
+struct xfrm_algo_desc *xfrm_aalg_get_byname(const char *name, int probe)
 {
 	return xfrm_find_algo(&xfrm_aalg_list, xfrm_alg_name_match, name,
 			      probe);
 }
 EXPORT_SYMBOL_GPL(xfrm_aalg_get_byname);
 
-struct xfrm_algo_desc *xfrm_ealg_get_byname(char *name, int probe)
+struct xfrm_algo_desc *xfrm_ealg_get_byname(const char *name, int probe)
 {
 	return xfrm_find_algo(&xfrm_ealg_list, xfrm_alg_name_match, name,
 			      probe);
 }
 EXPORT_SYMBOL_GPL(xfrm_ealg_get_byname);
 
-struct xfrm_algo_desc *xfrm_calg_get_byname(char *name, int probe)
+struct xfrm_algo_desc *xfrm_calg_get_byname(const char *name, int probe)
 {
 	return xfrm_find_algo(&xfrm_calg_list, xfrm_alg_name_match, name,
 			      probe);
@@ -654,7 +654,7 @@ static int xfrm_aead_name_match(const struct xfrm_algo_desc *entry,
 	       !strcmp(name, entry->name);
 }
 
-struct xfrm_algo_desc *xfrm_aead_get_byname(char *name, int icv_len, int probe)
+struct xfrm_algo_desc *xfrm_aead_get_byname(const char *name, int icv_len, int probe)
 {
 	struct xfrm_aead_name data = {
 		.name = name,
diff --git a/net/xfrm/xfrm_hash.h b/net/xfrm/xfrm_hash.h
index 8e69533..7199d78 100644
--- a/net/xfrm/xfrm_hash.h
+++ b/net/xfrm/xfrm_hash.h
@@ -4,29 +4,32 @@
 #include <linux/xfrm.h>
 #include <linux/socket.h>
 
-static inline unsigned int __xfrm4_addr_hash(xfrm_address_t *addr)
+static inline unsigned int __xfrm4_addr_hash(const xfrm_address_t *addr)
 {
 	return ntohl(addr->a4);
 }
 
-static inline unsigned int __xfrm6_addr_hash(xfrm_address_t *addr)
+static inline unsigned int __xfrm6_addr_hash(const xfrm_address_t *addr)
 {
 	return ntohl(addr->a6[2] ^ addr->a6[3]);
 }
 
-static inline unsigned int __xfrm4_daddr_saddr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr)
+static inline unsigned int __xfrm4_daddr_saddr_hash(const xfrm_address_t *daddr,
+						    const xfrm_address_t *saddr)
 {
 	u32 sum = (__force u32)daddr->a4 + (__force u32)saddr->a4;
 	return ntohl((__force __be32)sum);
 }
 
-static inline unsigned int __xfrm6_daddr_saddr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr)
+static inline unsigned int __xfrm6_daddr_saddr_hash(const xfrm_address_t *daddr,
+						    const xfrm_address_t *saddr)
 {
 	return ntohl(daddr->a6[2] ^ daddr->a6[3] ^
 		     saddr->a6[2] ^ saddr->a6[3]);
 }
 
-static inline unsigned int __xfrm_dst_hash(xfrm_address_t *daddr, xfrm_address_t *saddr,
+static inline unsigned int __xfrm_dst_hash(const xfrm_address_t *daddr,
+					   const xfrm_address_t *saddr,
 					   u32 reqid, unsigned short family,
 					   unsigned int hmask)
 {
@@ -42,8 +45,8 @@ static inline unsigned int __xfrm_dst_hash(xfrm_address_t *daddr, xfrm_address_t
 	return (h ^ (h >> 16)) & hmask;
 }
 
-static inline unsigned __xfrm_src_hash(xfrm_address_t *daddr,
-				       xfrm_address_t *saddr,
+static inline unsigned __xfrm_src_hash(const xfrm_address_t *daddr,
+				       const xfrm_address_t *saddr,
 				       unsigned short family,
 				       unsigned int hmask)
 {
@@ -60,8 +63,8 @@ static inline unsigned __xfrm_src_hash(xfrm_address_t *daddr,
 }
 
 static inline unsigned int
-__xfrm_spi_hash(xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family,
-		unsigned int hmask)
+__xfrm_spi_hash(const xfrm_address_t *daddr, __be32 spi, u8 proto,
+		unsigned short family, unsigned int hmask)
 {
 	unsigned int h = (__force u32)spi ^ proto;
 	switch (family) {
@@ -80,10 +83,11 @@ static inline unsigned int __idx_hash(u32 index, unsigned int hmask)
 	return (index ^ (index >> 8)) & hmask;
 }
 
-static inline unsigned int __sel_hash(struct xfrm_selector *sel, unsigned short family, unsigned int hmask)
+static inline unsigned int __sel_hash(const struct xfrm_selector *sel,
+				      unsigned short family, unsigned int hmask)
 {
-	xfrm_address_t *daddr = &sel->daddr;
-	xfrm_address_t *saddr = &sel->saddr;
+	const xfrm_address_t *daddr = &sel->daddr;
+	const xfrm_address_t *saddr = &sel->saddr;
 	unsigned int h = 0;
 
 	switch (family) {
@@ -107,7 +111,9 @@ static inline unsigned int __sel_hash(struct xfrm_selector *sel, unsigned short
 	return h & hmask;
 }
 
-static inline unsigned int __addr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, unsigned int hmask)
+static inline unsigned int __addr_hash(const xfrm_address_t *daddr,
+				       const xfrm_address_t *saddr,
+				       unsigned short family, unsigned int hmask)
 {
 	unsigned int h = 0;
 
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 6459588..b1932a6 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -51,14 +51,14 @@ static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo);
 static void xfrm_init_pmtu(struct dst_entry *dst);
 static int stale_bundle(struct dst_entry *dst);
 static int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *xdst,
-			  struct flowi *fl, int family, int strict);
+			  const struct flowi *fl, int family);
 
 
 static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
 						int dir);
 
 static inline int
-__xfrm4_selector_match(struct xfrm_selector *sel, struct flowi *fl)
+__xfrm4_selector_match(const struct xfrm_selector *sel, const struct flowi *fl)
 {
 	return  addr_match(&fl->fl4_dst, &sel->daddr, sel->prefixlen_d) &&
 		addr_match(&fl->fl4_src, &sel->saddr, sel->prefixlen_s) &&
@@ -69,7 +69,7 @@ __xfrm4_selector_match(struct xfrm_selector *sel, struct flowi *fl)
 }
 
 static inline int
-__xfrm6_selector_match(struct xfrm_selector *sel, struct flowi *fl)
+__xfrm6_selector_match(const struct xfrm_selector *sel, const struct flowi *fl)
 {
 	return  addr_match(&fl->fl6_dst, &sel->daddr, sel->prefixlen_d) &&
 		addr_match(&fl->fl6_src, &sel->saddr, sel->prefixlen_s) &&
@@ -79,8 +79,8 @@ __xfrm6_selector_match(struct xfrm_selector *sel, struct flowi *fl)
 		(fl->oif == sel->ifindex || !sel->ifindex);
 }
 
-int xfrm_selector_match(struct xfrm_selector *sel, struct flowi *fl,
-		    unsigned short family)
+int xfrm_selector_match(const struct xfrm_selector *sel, const struct flowi *fl,
+			unsigned short family)
 {
 	switch (family) {
 	case AF_INET:
@@ -92,8 +92,8 @@ int xfrm_selector_match(struct xfrm_selector *sel, struct flowi *fl,
 }
 
 static inline struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos,
-						  xfrm_address_t *saddr,
-						  xfrm_address_t *daddr,
+						  const xfrm_address_t *saddr,
+						  const xfrm_address_t *daddr,
 						  int family)
 {
 	struct xfrm_policy_afinfo *afinfo;
@@ -311,7 +311,9 @@ static inline unsigned int idx_hash(struct net *net, u32 index)
 	return __idx_hash(index, net->xfrm.policy_idx_hmask);
 }
 
-static struct hlist_head *policy_hash_bysel(struct net *net, struct xfrm_selector *sel, unsigned short family, int dir)
+static struct hlist_head *policy_hash_bysel(struct net *net,
+					    const struct xfrm_selector *sel,
+					    unsigned short family, int dir)
 {
 	unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
 	unsigned int hash = __sel_hash(sel, family, hmask);
@@ -321,7 +323,10 @@ static struct hlist_head *policy_hash_bysel(struct net *net, struct xfrm_selecto
 		net->xfrm.policy_bydst[dir].table + hash);
 }
 
-static struct hlist_head *policy_hash_direct(struct net *net, xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, int dir)
+static struct hlist_head *policy_hash_direct(struct net *net,
+					     const xfrm_address_t *daddr,
+					     const xfrm_address_t *saddr,
+					     unsigned short family, int dir)
 {
 	unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
 	unsigned int hash = __addr_hash(daddr, saddr, family, hmask);
@@ -864,10 +869,11 @@ EXPORT_SYMBOL(xfrm_policy_walk_done);
  *
  * Returns 0 if policy found, else an -errno.
  */
-static int xfrm_policy_match(struct xfrm_policy *pol, struct flowi *fl,
+static int xfrm_policy_match(const struct xfrm_policy *pol,
+			     const struct flowi *fl,
 			     u8 type, u16 family, int dir)
 {
-	struct xfrm_selector *sel = &pol->selector;
+	const struct xfrm_selector *sel = &pol->selector;
 	int match, ret = -ESRCH;
 
 	if (pol->family != family ||
@@ -884,12 +890,12 @@ static int xfrm_policy_match(struct xfrm_policy *pol, struct flowi *fl,
 }
 
 static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
-						     struct flowi *fl,
+						     const struct flowi *fl,
 						     u16 family, u8 dir)
 {
 	int err;
 	struct xfrm_policy *pol, *ret;
-	xfrm_address_t *daddr, *saddr;
+	const xfrm_address_t *daddr, *saddr;
 	struct hlist_node *entry;
 	struct hlist_head *chain;
 	u32 priority = ~0U;
@@ -941,7 +947,7 @@ fail:
 }
 
 static struct xfrm_policy *
-__xfrm_policy_lookup(struct net *net, struct flowi *fl, u16 family, u8 dir)
+__xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir)
 {
 #ifdef CONFIG_XFRM_SUB_POLICY
 	struct xfrm_policy *pol;
@@ -954,7 +960,7 @@ __xfrm_policy_lookup(struct net *net, struct flowi *fl, u16 family, u8 dir)
 }
 
 static struct flow_cache_object *
-xfrm_policy_lookup(struct net *net, struct flowi *fl, u16 family,
+xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family,
 		   u8 dir, struct flow_cache_object *old_obj, void *ctx)
 {
 	struct xfrm_policy *pol;
@@ -990,7 +996,8 @@ static inline int policy_to_flow_dir(int dir)
 	}
 }
 
-static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl)
+static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir,
+						 const struct flowi *fl)
 {
 	struct xfrm_policy *pol;
 
@@ -1098,7 +1105,7 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
 	return 0;
 }
 
-static struct xfrm_policy *clone_policy(struct xfrm_policy *old, int dir)
+static struct xfrm_policy *clone_policy(const struct xfrm_policy *old, int dir)
 {
 	struct xfrm_policy *newp = xfrm_policy_alloc(xp_net(old), GFP_ATOMIC);
 
@@ -1157,9 +1164,8 @@ xfrm_get_saddr(struct net *net, xfrm_address_t *local, xfrm_address_t *remote,
 /* Resolve list of templates for the flow, given policy. */
 
 static int
-xfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl,
-		      struct xfrm_state **xfrm,
-		      unsigned short family)
+xfrm_tmpl_resolve_one(struct xfrm_policy *policy, const struct flowi *fl,
+		      struct xfrm_state **xfrm, unsigned short family)
 {
 	struct net *net = xp_net(policy);
 	int nx;
@@ -1214,9 +1220,8 @@ fail:
 }
 
 static int
-xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, struct flowi *fl,
-		  struct xfrm_state **xfrm,
-		  unsigned short family)
+xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, const struct flowi *fl,
+		  struct xfrm_state **xfrm, unsigned short family)
 {
 	struct xfrm_state *tp[XFRM_MAX_DEPTH];
 	struct xfrm_state **tpp = (npols > 1) ? tp : xfrm;
@@ -1256,7 +1261,7 @@ xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, struct flowi *fl,
  * still valid.
  */
 
-static inline int xfrm_get_tos(struct flowi *fl, int family)
+static inline int xfrm_get_tos(const struct flowi *fl, int family)
 {
 	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
 	int tos;
@@ -1340,7 +1345,7 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
 	default:
 		BUG();
 	}
-	xdst = dst_alloc(dst_ops);
+	xdst = dst_alloc(dst_ops, 0);
 	xfrm_policy_put_afinfo(afinfo);
 
 	if (likely(xdst))
@@ -1369,7 +1374,7 @@ static inline int xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst,
 }
 
 static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
-				struct flowi *fl)
+				const struct flowi *fl)
 {
 	struct xfrm_policy_afinfo *afinfo =
 		xfrm_policy_get_afinfo(xdst->u.dst.ops->family);
@@ -1392,7 +1397,7 @@ static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
 
 static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
 					    struct xfrm_state **xfrm, int nx,
-					    struct flowi *fl,
+					    const struct flowi *fl,
 					    struct dst_entry *dst)
 {
 	struct net *net = xp_net(policy);
@@ -1508,7 +1513,7 @@ free_dst:
 }
 
 static int inline
-xfrm_dst_alloc_copy(void **target, void *src, int size)
+xfrm_dst_alloc_copy(void **target, const void *src, int size)
 {
 	if (!*target) {
 		*target = kmalloc(size, GFP_ATOMIC);
@@ -1520,7 +1525,7 @@ xfrm_dst_alloc_copy(void **target, void *src, int size)
 }
 
 static int inline
-xfrm_dst_update_parent(struct dst_entry *dst, struct xfrm_selector *sel)
+xfrm_dst_update_parent(struct dst_entry *dst, const struct xfrm_selector *sel)
 {
 #ifdef CONFIG_XFRM_SUB_POLICY
 	struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
@@ -1532,7 +1537,7 @@ xfrm_dst_update_parent(struct dst_entry *dst, struct xfrm_selector *sel)
 }
 
 static int inline
-xfrm_dst_update_origin(struct dst_entry *dst, struct flowi *fl)
+xfrm_dst_update_origin(struct dst_entry *dst, const struct flowi *fl)
 {
 #ifdef CONFIG_XFRM_SUB_POLICY
 	struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
@@ -1542,7 +1547,7 @@ xfrm_dst_update_origin(struct dst_entry *dst, struct flowi *fl)
 #endif
 }
 
-static int xfrm_expand_policies(struct flowi *fl, u16 family,
+static int xfrm_expand_policies(const struct flowi *fl, u16 family,
 				struct xfrm_policy **pols,
 				int *num_pols, int *num_xfrms)
 {
@@ -1588,7 +1593,7 @@ static int xfrm_expand_policies(struct flowi *fl, u16 family,
 
 static struct xfrm_dst *
 xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
-			       struct flowi *fl, u16 family,
+			       const struct flowi *fl, u16 family,
 			       struct dst_entry *dst_orig)
 {
 	struct net *net = xp_net(pols[0]);
@@ -1631,7 +1636,7 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
 }
 
 static struct flow_cache_object *
-xfrm_bundle_lookup(struct net *net, struct flowi *fl, u16 family, u8 dir,
+xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir,
 		   struct flow_cache_object *oldflo, void *ctx)
 {
 	struct dst_entry *dst_orig = (struct dst_entry *)ctx;
@@ -1730,18 +1735,36 @@ error:
 	return ERR_PTR(err);
 }
 
+static struct dst_entry *make_blackhole(struct net *net, u16 family,
+					struct dst_entry *dst_orig)
+{
+	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
+	struct dst_entry *ret;
+
+	if (!afinfo) {
+		dst_release(dst_orig);
+		ret = ERR_PTR(-EINVAL);
+	} else {
+		ret = afinfo->blackhole_route(net, dst_orig);
+	}
+	xfrm_policy_put_afinfo(afinfo);
+
+	return ret;
+}
+
 /* Main function: finds/creates a bundle for given flow.
  *
  * At the moment we eat a raw IP route. Mostly to speed up lookups
  * on interfaces with disabled IPsec.
  */
-int __xfrm_lookup(struct net *net, struct dst_entry **dst_p, struct flowi *fl,
-		  struct sock *sk, int flags)
+struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
+			      const struct flowi *fl,
+			      struct sock *sk, int flags)
 {
 	struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
 	struct flow_cache_object *flo;
 	struct xfrm_dst *xdst;
-	struct dst_entry *dst, *dst_orig = *dst_p, *route;
+	struct dst_entry *dst, *route;
 	u16 family = dst_orig->ops->family;
 	u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT);
 	int i, err, num_pols, num_xfrms = 0, drop_pols = 0;
@@ -1823,9 +1846,10 @@ restart:
 			dst_release(dst);
 			xfrm_pols_put(pols, drop_pols);
 			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
-			return -EREMOTE;
+
+			return make_blackhole(net, family, dst_orig);
 		}
-		if (flags & XFRM_LOOKUP_WAIT) {
+		if (fl->flags & FLOWI_FLAG_CAN_SLEEP) {
 			DECLARE_WAITQUEUE(wait, current);
 
 			add_wait_queue(&net->xfrm.km_waitq, &wait);
@@ -1867,47 +1891,33 @@ no_transform:
 		goto error;
 	} else if (num_xfrms > 0) {
 		/* Flow transformed */
-		*dst_p = dst;
 		dst_release(dst_orig);
 	} else {
 		/* Flow passes untransformed */
 		dst_release(dst);
+		dst = dst_orig;
 	}
 ok:
 	xfrm_pols_put(pols, drop_pols);
-	return 0;
+	return dst;
 
 nopol:
-	if (!(flags & XFRM_LOOKUP_ICMP))
+	if (!(flags & XFRM_LOOKUP_ICMP)) {
+		dst = dst_orig;
 		goto ok;
+	}
 	err = -ENOENT;
 error:
 	dst_release(dst);
 dropdst:
 	dst_release(dst_orig);
-	*dst_p = NULL;
 	xfrm_pols_put(pols, drop_pols);
-	return err;
-}
-EXPORT_SYMBOL(__xfrm_lookup);
-
-int xfrm_lookup(struct net *net, struct dst_entry **dst_p, struct flowi *fl,
-		struct sock *sk, int flags)
-{
-	int err = __xfrm_lookup(net, dst_p, fl, sk, flags);
-
-	if (err == -EREMOTE) {
-		dst_release(*dst_p);
-		*dst_p = NULL;
-		err = -EAGAIN;
-	}
-
-	return err;
+	return ERR_PTR(err);
 }
 EXPORT_SYMBOL(xfrm_lookup);
 
 static inline int
-xfrm_secpath_reject(int idx, struct sk_buff *skb, struct flowi *fl)
+xfrm_secpath_reject(int idx, struct sk_buff *skb, const struct flowi *fl)
 {
 	struct xfrm_state *x;
 
@@ -1926,7 +1936,7 @@ xfrm_secpath_reject(int idx, struct sk_buff *skb, struct flowi *fl)
  */
 
 static inline int
-xfrm_state_ok(struct xfrm_tmpl *tmpl, struct xfrm_state *x,
+xfrm_state_ok(const struct xfrm_tmpl *tmpl, const struct xfrm_state *x,
 	      unsigned short family)
 {
 	if (xfrm_state_kern(x))
@@ -1949,7 +1959,7 @@ xfrm_state_ok(struct xfrm_tmpl *tmpl, struct xfrm_state *x,
  * Otherwise "-2 - errored_index" is returned.
  */
 static inline int
-xfrm_policy_ok(struct xfrm_tmpl *tmpl, struct sec_path *sp, int start,
+xfrm_policy_ok(const struct xfrm_tmpl *tmpl, const struct sec_path *sp, int start,
 	       unsigned short family)
 {
 	int idx = start;
@@ -1987,7 +1997,7 @@ int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl,
 }
 EXPORT_SYMBOL(__xfrm_decode_session);
 
-static inline int secpath_has_nontransport(struct sec_path *sp, int k, int *idxp)
+static inline int secpath_has_nontransport(const struct sec_path *sp, int k, int *idxp)
 {
 	for (; k < sp->len; k++) {
 		if (sp->xvec[k]->props.mode != XFRM_MODE_TRANSPORT) {
@@ -2162,7 +2172,7 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
 	struct net *net = dev_net(skb->dev);
 	struct flowi fl;
 	struct dst_entry *dst;
-	int res;
+	int res = 0;
 
 	if (xfrm_decode_session(skb, &fl, family) < 0) {
 		XFRM_INC_STATS(net, LINUX_MIB_XFRMFWDHDRERROR);
@@ -2170,9 +2180,12 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
 	}
 
 	skb_dst_force(skb);
-	dst = skb_dst(skb);
 
-	res = xfrm_lookup(net, &dst, &fl, NULL, 0) == 0;
+	dst = xfrm_lookup(net, skb_dst(skb), &fl, NULL, 0);
+	if (IS_ERR(dst)) {
+		res = 1;
+		dst = NULL;
+	}
 	skb_dst_set(skb, dst);
 	return res;
 }
@@ -2210,7 +2223,7 @@ static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie)
 
 static int stale_bundle(struct dst_entry *dst)
 {
-	return !xfrm_bundle_ok(NULL, (struct xfrm_dst *)dst, NULL, AF_UNSPEC, 0);
+	return !xfrm_bundle_ok(NULL, (struct xfrm_dst *)dst, NULL, AF_UNSPEC);
 }
 
 void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
@@ -2283,7 +2296,7 @@ static void xfrm_init_pmtu(struct dst_entry *dst)
  */
 
 static int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first,
-		struct flowi *fl, int family, int strict)
+			  const struct flowi *fl, int family)
 {
 	struct dst_entry *dst = &first->u.dst;
 	struct xfrm_dst *last;
@@ -2320,11 +2333,6 @@ static int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first,
 		    xdst->policy_genid != atomic_read(&xdst->pols[0]->genid))
 			return 0;
 
-		if (strict && fl &&
-		    !(dst->xfrm->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) &&
-		    !xfrm_state_addr_flow_check(dst->xfrm, fl, family))
-			return 0;
-
 		mtu = dst_mtu(dst->child);
 		if (xdst->child_mtu_cached != mtu) {
 			last = xdst;
@@ -2735,8 +2743,8 @@ EXPORT_SYMBOL_GPL(xfrm_audit_policy_delete);
 #endif
 
 #ifdef CONFIG_XFRM_MIGRATE
-static int xfrm_migrate_selector_match(struct xfrm_selector *sel_cmp,
-				       struct xfrm_selector *sel_tgt)
+static int xfrm_migrate_selector_match(const struct xfrm_selector *sel_cmp,
+				       const struct xfrm_selector *sel_tgt)
 {
 	if (sel_cmp->proto == IPSEC_ULPROTO_ANY) {
 		if (sel_tgt->family == sel_cmp->family &&
@@ -2756,7 +2764,7 @@ static int xfrm_migrate_selector_match(struct xfrm_selector *sel_cmp,
 	return 0;
 }
 
-static struct xfrm_policy * xfrm_migrate_policy_find(struct xfrm_selector *sel,
+static struct xfrm_policy * xfrm_migrate_policy_find(const struct xfrm_selector *sel,
 						     u8 dir, u8 type)
 {
 	struct xfrm_policy *pol, *ret = NULL;
@@ -2792,7 +2800,7 @@ static struct xfrm_policy * xfrm_migrate_policy_find(struct xfrm_selector *sel,
 	return ret;
 }
 
-static int migrate_tmpl_match(struct xfrm_migrate *m, struct xfrm_tmpl *t)
+static int migrate_tmpl_match(const struct xfrm_migrate *m, const struct xfrm_tmpl *t)
 {
 	int match = 0;
 
@@ -2862,7 +2870,7 @@ static int xfrm_policy_migrate(struct xfrm_policy *pol,
 	return 0;
 }
 
-static int xfrm_migrate_check(struct xfrm_migrate *m, int num_migrate)
+static int xfrm_migrate_check(const struct xfrm_migrate *m, int num_migrate)
 {
 	int i, j;
 
@@ -2896,7 +2904,7 @@ static int xfrm_migrate_check(struct xfrm_migrate *m, int num_migrate)
 	return 0;
 }
 
-int xfrm_migrate(struct xfrm_selector *sel, u8 dir, u8 type,
+int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
 		 struct xfrm_migrate *m, int num_migrate,
 		 struct xfrm_kmaddress *k)
 {
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 220ebc0..81221d9 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -50,8 +50,8 @@ static void xfrm_audit_state_replay(struct xfrm_state *x,
 #endif /* CONFIG_AUDITSYSCALL */
 
 static inline unsigned int xfrm_dst_hash(struct net *net,
-					 xfrm_address_t *daddr,
-					 xfrm_address_t *saddr,
+					 const xfrm_address_t *daddr,
+					 const xfrm_address_t *saddr,
 					 u32 reqid,
 					 unsigned short family)
 {
@@ -59,15 +59,16 @@ static inline unsigned int xfrm_dst_hash(struct net *net,
 }
 
 static inline unsigned int xfrm_src_hash(struct net *net,
-					 xfrm_address_t *daddr,
-					 xfrm_address_t *saddr,
+					 const xfrm_address_t *daddr,
+					 const xfrm_address_t *saddr,
 					 unsigned short family)
 {
 	return __xfrm_src_hash(daddr, saddr, family, net->xfrm.state_hmask);
 }
 
 static inline unsigned int
-xfrm_spi_hash(struct net *net, xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family)
+xfrm_spi_hash(struct net *net, const xfrm_address_t *daddr,
+	      __be32 spi, u8 proto, unsigned short family)
 {
 	return __xfrm_spi_hash(daddr, spi, proto, family, net->xfrm.state_hmask);
 }
@@ -656,9 +657,9 @@ void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si)
 EXPORT_SYMBOL(xfrm_sad_getinfo);
 
 static int
-xfrm_init_tempstate(struct xfrm_state *x, struct flowi *fl,
-		    struct xfrm_tmpl *tmpl,
-		    xfrm_address_t *daddr, xfrm_address_t *saddr,
+xfrm_init_tempstate(struct xfrm_state *x, const struct flowi *fl,
+		    const struct xfrm_tmpl *tmpl,
+		    const xfrm_address_t *daddr, const xfrm_address_t *saddr,
 		    unsigned short family)
 {
 	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
@@ -677,7 +678,10 @@ xfrm_init_tempstate(struct xfrm_state *x, struct flowi *fl,
 	return 0;
 }
 
-static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark, xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family)
+static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark,
+					      const xfrm_address_t *daddr,
+					      __be32 spi, u8 proto,
+					      unsigned short family)
 {
 	unsigned int h = xfrm_spi_hash(net, daddr, spi, proto, family);
 	struct xfrm_state *x;
@@ -699,7 +703,10 @@ static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark, xfrm_ad
 	return NULL;
 }
 
-static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, u32 mark, xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family)
+static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, u32 mark,
+						     const xfrm_address_t *daddr,
+						     const xfrm_address_t *saddr,
+						     u8 proto, unsigned short family)
 {
 	unsigned int h = xfrm_src_hash(net, daddr, saddr, family);
 	struct xfrm_state *x;
@@ -746,8 +753,7 @@ static void xfrm_hash_grow_check(struct net *net, int have_hash_collision)
 }
 
 static void xfrm_state_look_at(struct xfrm_policy *pol, struct xfrm_state *x,
-			       struct flowi *fl, unsigned short family,
-			       xfrm_address_t *daddr, xfrm_address_t *saddr,
+			       const struct flowi *fl, unsigned short family,
 			       struct xfrm_state **best, int *acq_in_progress,
 			       int *error)
 {
@@ -784,8 +790,8 @@ static void xfrm_state_look_at(struct xfrm_policy *pol, struct xfrm_state *x,
 }
 
 struct xfrm_state *
-xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
-		struct flowi *fl, struct xfrm_tmpl *tmpl,
+xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
+		const struct flowi *fl, struct xfrm_tmpl *tmpl,
 		struct xfrm_policy *pol, int *err,
 		unsigned short family)
 {
@@ -813,7 +819,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
 		    tmpl->mode == x->props.mode &&
 		    tmpl->id.proto == x->id.proto &&
 		    (tmpl->id.spi == x->id.spi || !tmpl->id.spi))
-			xfrm_state_look_at(pol, x, fl, encap_family, daddr, saddr,
+			xfrm_state_look_at(pol, x, fl, encap_family,
 					   &best, &acquire_in_progress, &error);
 	}
 	if (best)
@@ -829,7 +835,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
 		    tmpl->mode == x->props.mode &&
 		    tmpl->id.proto == x->id.proto &&
 		    (tmpl->id.spi == x->id.spi || !tmpl->id.spi))
-			xfrm_state_look_at(pol, x, fl, encap_family, daddr, saddr,
+			xfrm_state_look_at(pol, x, fl, encap_family,
 					   &best, &acquire_in_progress, &error);
 	}
 
@@ -991,7 +997,11 @@ void xfrm_state_insert(struct xfrm_state *x)
 EXPORT_SYMBOL(xfrm_state_insert);
 
 /* xfrm_state_lock is held */
-static struct xfrm_state *__find_acq_core(struct net *net, struct xfrm_mark *m, unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create)
+static struct xfrm_state *__find_acq_core(struct net *net, struct xfrm_mark *m,
+					  unsigned short family, u8 mode,
+					  u32 reqid, u8 proto,
+					  const xfrm_address_t *daddr,
+					  const xfrm_address_t *saddr, int create)
 {
 	unsigned int h = xfrm_dst_hash(net, daddr, saddr, reqid, family);
 	struct hlist_node *entry;
@@ -1369,7 +1379,7 @@ int xfrm_state_check_expire(struct xfrm_state *x)
 EXPORT_SYMBOL(xfrm_state_check_expire);
 
 struct xfrm_state *
-xfrm_state_lookup(struct net *net, u32 mark, xfrm_address_t *daddr, __be32 spi,
+xfrm_state_lookup(struct net *net, u32 mark, const xfrm_address_t *daddr, __be32 spi,
 		  u8 proto, unsigned short family)
 {
 	struct xfrm_state *x;
@@ -1383,7 +1393,7 @@ EXPORT_SYMBOL(xfrm_state_lookup);
 
 struct xfrm_state *
 xfrm_state_lookup_byaddr(struct net *net, u32 mark,
-			 xfrm_address_t *daddr, xfrm_address_t *saddr,
+			 const xfrm_address_t *daddr, const xfrm_address_t *saddr,
 			 u8 proto, unsigned short family)
 {
 	struct xfrm_state *x;
@@ -1397,7 +1407,7 @@ EXPORT_SYMBOL(xfrm_state_lookup_byaddr);
 
 struct xfrm_state *
 xfrm_find_acq(struct net *net, struct xfrm_mark *mark, u8 mode, u32 reqid, u8 proto,
-	      xfrm_address_t *daddr, xfrm_address_t *saddr,
+	      const xfrm_address_t *daddr, const xfrm_address_t *saddr,
 	      int create, unsigned short family)
 {
 	struct xfrm_state *x;
@@ -1727,7 +1737,7 @@ void xfrm_replay_advance(struct xfrm_state *x, __be32 net_seq)
 static LIST_HEAD(xfrm_km_list);
 static DEFINE_RWLOCK(xfrm_km_lock);
 
-void km_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c)
+void km_policy_notify(struct xfrm_policy *xp, int dir, const struct km_event *c)
 {
 	struct xfrm_mgr *km;
 
@@ -1738,7 +1748,7 @@ void km_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c)
 	read_unlock(&xfrm_km_lock);
 }
 
-void km_state_notify(struct xfrm_state *x, struct km_event *c)
+void km_state_notify(struct xfrm_state *x, const struct km_event *c)
 {
 	struct xfrm_mgr *km;
 	read_lock(&xfrm_km_lock);
@@ -1819,9 +1829,9 @@ void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid)
 EXPORT_SYMBOL(km_policy_expired);
 
 #ifdef CONFIG_XFRM_MIGRATE
-int km_migrate(struct xfrm_selector *sel, u8 dir, u8 type,
-	       struct xfrm_migrate *m, int num_migrate,
-	       struct xfrm_kmaddress *k)
+int km_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
+	       const struct xfrm_migrate *m, int num_migrate,
+	       const struct xfrm_kmaddress *k)
 {
 	int err = -EINVAL;
 	int ret;
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 6129196..468ab60 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -234,7 +234,7 @@ out:
 }
 
 static int attach_one_algo(struct xfrm_algo **algpp, u8 *props,
-			   struct xfrm_algo_desc *(*get_byname)(char *, int),
+			   struct xfrm_algo_desc *(*get_byname)(const char *, int),
 			   struct nlattr *rta)
 {
 	struct xfrm_algo *p, *ualg;
@@ -497,9 +497,9 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
 	struct xfrm_state *x;
 	int err;
 	struct km_event c;
-	uid_t loginuid = NETLINK_CB(skb).loginuid;
-	u32 sessionid = NETLINK_CB(skb).sessionid;
-	u32 sid = NETLINK_CB(skb).sid;
+	uid_t loginuid = audit_get_loginuid(current);
+	u32 sessionid = audit_get_sessionid(current);
+	u32 sid;
 
 	err = verify_newsa_info(p, attrs);
 	if (err)
@@ -515,6 +515,7 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
 	else
 		err = xfrm_state_update(x);
 
+	security_task_getsecid(current, &sid);
 	xfrm_audit_state_add(x, err ? 0 : 1, loginuid, sessionid, sid);
 
 	if (err < 0) {
@@ -575,9 +576,9 @@ static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
 	int err = -ESRCH;
 	struct km_event c;
 	struct xfrm_usersa_id *p = nlmsg_data(nlh);
-	uid_t loginuid = NETLINK_CB(skb).loginuid;
-	u32 sessionid = NETLINK_CB(skb).sessionid;
-	u32 sid = NETLINK_CB(skb).sid;
+	uid_t loginuid = audit_get_loginuid(current);
+	u32 sessionid = audit_get_sessionid(current);
+	u32 sid;
 
 	x = xfrm_user_state_lookup(net, p, attrs, &err);
 	if (x == NULL)
@@ -602,6 +603,7 @@ static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
 	km_state_notify(x, &c);
 
 out:
+	security_task_getsecid(current, &sid);
 	xfrm_audit_state_delete(x, err ? 0 : 1, loginuid, sessionid, sid);
 	xfrm_state_put(x);
 	return err;
@@ -1265,9 +1267,9 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
 	struct km_event c;
 	int err;
 	int excl;
-	uid_t loginuid = NETLINK_CB(skb).loginuid;
-	u32 sessionid = NETLINK_CB(skb).sessionid;
-	u32 sid = NETLINK_CB(skb).sid;
+	uid_t loginuid = audit_get_loginuid(current);
+	u32 sessionid = audit_get_sessionid(current);
+	u32 sid;
 
 	err = verify_newpolicy_info(p);
 	if (err)
@@ -1286,6 +1288,7 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
 	 * a type XFRM_MSG_UPDPOLICY - JHS */
 	excl = nlh->nlmsg_type == XFRM_MSG_NEWPOLICY;
 	err = xfrm_policy_insert(p->dir, xp, excl);
+	security_task_getsecid(current, &sid);
 	xfrm_audit_policy_add(xp, err ? 0 : 1, loginuid, sessionid, sid);
 
 	if (err) {
@@ -1522,10 +1525,11 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
 					    NETLINK_CB(skb).pid);
 		}
 	} else {
-		uid_t loginuid = NETLINK_CB(skb).loginuid;
-		u32 sessionid = NETLINK_CB(skb).sessionid;
-		u32 sid = NETLINK_CB(skb).sid;
+		uid_t loginuid = audit_get_loginuid(current);
+		u32 sessionid = audit_get_sessionid(current);
+		u32 sid;
 
+		security_task_getsecid(current, &sid);
 		xfrm_audit_policy_delete(xp, err ? 0 : 1, loginuid, sessionid,
 					 sid);
 
@@ -1553,9 +1557,9 @@ static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
 	struct xfrm_audit audit_info;
 	int err;
 
-	audit_info.loginuid = NETLINK_CB(skb).loginuid;
-	audit_info.sessionid = NETLINK_CB(skb).sessionid;
-	audit_info.secid = NETLINK_CB(skb).sid;
+	audit_info.loginuid = audit_get_loginuid(current);
+	audit_info.sessionid = audit_get_sessionid(current);
+	security_task_getsecid(current, &audit_info.secid);
 	err = xfrm_state_flush(net, p->proto, &audit_info);
 	if (err) {
 		if (err == -ESRCH) /* empty table */
@@ -1582,7 +1586,7 @@ static inline size_t xfrm_aevent_msgsize(void)
 	       + nla_total_size(4); /* XFRM_AE_ETHR */
 }
 
-static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, struct km_event *c)
+static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, const struct km_event *c)
 {
 	struct xfrm_aevent_id *id;
 	struct nlmsghdr *nlh;
@@ -1720,9 +1724,9 @@ static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
 	if (err)
 		return err;
 
-	audit_info.loginuid = NETLINK_CB(skb).loginuid;
-	audit_info.sessionid = NETLINK_CB(skb).sessionid;
-	audit_info.secid = NETLINK_CB(skb).sid;
+	audit_info.loginuid = audit_get_loginuid(current);
+	audit_info.sessionid = audit_get_sessionid(current);
+	security_task_getsecid(current, &audit_info.secid);
 	err = xfrm_policy_flush(net, type, &audit_info);
 	if (err) {
 		if (err == -ESRCH) /* empty table */
@@ -1789,9 +1793,11 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh,
 
 	err = 0;
 	if (up->hard) {
-		uid_t loginuid = NETLINK_CB(skb).loginuid;
-		uid_t sessionid = NETLINK_CB(skb).sessionid;
-		u32 sid = NETLINK_CB(skb).sid;
+		uid_t loginuid = audit_get_loginuid(current);
+		u32 sessionid = audit_get_sessionid(current);
+		u32 sid;
+
+		security_task_getsecid(current, &sid);
 		xfrm_policy_delete(xp, p->dir);
 		xfrm_audit_policy_delete(xp, 1, loginuid, sessionid, sid);
 
@@ -1830,9 +1836,11 @@ static int xfrm_add_sa_expire(struct sk_buff *skb, struct nlmsghdr *nlh,
 	km_state_expired(x, ue->hard, current->pid);
 
 	if (ue->hard) {
-		uid_t loginuid = NETLINK_CB(skb).loginuid;
-		uid_t sessionid = NETLINK_CB(skb).sessionid;
-		u32 sid = NETLINK_CB(skb).sid;
+		uid_t loginuid = audit_get_loginuid(current);
+		u32 sessionid = audit_get_sessionid(current);
+		u32 sid;
+
+		security_task_getsecid(current, &sid);
 		__xfrm_state_delete(x);
 		xfrm_audit_state_delete(x, 1, loginuid, sessionid, sid);
 	}
@@ -1986,7 +1994,7 @@ static int xfrm_do_migrate(struct sk_buff *skb, struct nlmsghdr *nlh,
 #endif
 
 #ifdef CONFIG_XFRM_MIGRATE
-static int copy_to_user_migrate(struct xfrm_migrate *m, struct sk_buff *skb)
+static int copy_to_user_migrate(const struct xfrm_migrate *m, struct sk_buff *skb)
 {
 	struct xfrm_user_migrate um;
 
@@ -2004,7 +2012,7 @@ static int copy_to_user_migrate(struct xfrm_migrate *m, struct sk_buff *skb)
 	return nla_put(skb, XFRMA_MIGRATE, sizeof(um), &um);
 }
 
-static int copy_to_user_kmaddress(struct xfrm_kmaddress *k, struct sk_buff *skb)
+static int copy_to_user_kmaddress(const struct xfrm_kmaddress *k, struct sk_buff *skb)
 {
 	struct xfrm_user_kmaddress uk;
 
@@ -2025,11 +2033,11 @@ static inline size_t xfrm_migrate_msgsize(int num_migrate, int with_kma)
 	      + userpolicy_type_attrsize();
 }
 
-static int build_migrate(struct sk_buff *skb, struct xfrm_migrate *m,
-			 int num_migrate, struct xfrm_kmaddress *k,
-			 struct xfrm_selector *sel, u8 dir, u8 type)
+static int build_migrate(struct sk_buff *skb, const struct xfrm_migrate *m,
+			 int num_migrate, const struct xfrm_kmaddress *k,
+			 const struct xfrm_selector *sel, u8 dir, u8 type)
 {
-	struct xfrm_migrate *mp;
+	const struct xfrm_migrate *mp;
 	struct xfrm_userpolicy_id *pol_id;
 	struct nlmsghdr *nlh;
 	int i;
@@ -2061,9 +2069,9 @@ nlmsg_failure:
 	return -EMSGSIZE;
 }
 
-static int xfrm_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type,
-			     struct xfrm_migrate *m, int num_migrate,
-			     struct xfrm_kmaddress *k)
+static int xfrm_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
+			     const struct xfrm_migrate *m, int num_migrate,
+			     const struct xfrm_kmaddress *k)
 {
 	struct net *net = &init_net;
 	struct sk_buff *skb;
@@ -2079,9 +2087,9 @@ static int xfrm_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type,
 	return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_MIGRATE, GFP_ATOMIC);
 }
 #else
-static int xfrm_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type,
-			     struct xfrm_migrate *m, int num_migrate,
-			     struct xfrm_kmaddress *k)
+static int xfrm_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
+			     const struct xfrm_migrate *m, int num_migrate,
+			     const struct xfrm_kmaddress *k)
 {
 	return -ENOPROTOOPT;
 }
@@ -2220,7 +2228,7 @@ static inline size_t xfrm_expire_msgsize(void)
 	       + nla_total_size(sizeof(struct xfrm_mark));
 }
 
-static int build_expire(struct sk_buff *skb, struct xfrm_state *x, struct km_event *c)
+static int build_expire(struct sk_buff *skb, struct xfrm_state *x, const struct km_event *c)
 {
 	struct xfrm_user_expire *ue;
 	struct nlmsghdr *nlh;
@@ -2242,7 +2250,7 @@ nla_put_failure:
 	return -EMSGSIZE;
 }
 
-static int xfrm_exp_state_notify(struct xfrm_state *x, struct km_event *c)
+static int xfrm_exp_state_notify(struct xfrm_state *x, const struct km_event *c)
 {
 	struct net *net = xs_net(x);
 	struct sk_buff *skb;
@@ -2259,7 +2267,7 @@ static int xfrm_exp_state_notify(struct xfrm_state *x, struct km_event *c)
 	return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_EXPIRE, GFP_ATOMIC);
 }
 
-static int xfrm_aevent_state_notify(struct xfrm_state *x, struct km_event *c)
+static int xfrm_aevent_state_notify(struct xfrm_state *x, const struct km_event *c)
 {
 	struct net *net = xs_net(x);
 	struct sk_buff *skb;
@@ -2274,7 +2282,7 @@ static int xfrm_aevent_state_notify(struct xfrm_state *x, struct km_event *c)
 	return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_AEVENTS, GFP_ATOMIC);
 }
 
-static int xfrm_notify_sa_flush(struct km_event *c)
+static int xfrm_notify_sa_flush(const struct km_event *c)
 {
 	struct net *net = c->net;
 	struct xfrm_usersa_flush *p;
@@ -2330,7 +2338,7 @@ static inline size_t xfrm_sa_len(struct xfrm_state *x)
 	return l;
 }
 
-static int xfrm_notify_sa(struct xfrm_state *x, struct km_event *c)
+static int xfrm_notify_sa(struct xfrm_state *x, const struct km_event *c)
 {
 	struct net *net = xs_net(x);
 	struct xfrm_usersa_info *p;
@@ -2387,7 +2395,7 @@ nla_put_failure:
 	return -1;
 }
 
-static int xfrm_send_state_notify(struct xfrm_state *x, struct km_event *c)
+static int xfrm_send_state_notify(struct xfrm_state *x, const struct km_event *c)
 {
 
 	switch (c->event) {
@@ -2546,7 +2554,7 @@ static inline size_t xfrm_polexpire_msgsize(struct xfrm_policy *xp)
 }
 
 static int build_polexpire(struct sk_buff *skb, struct xfrm_policy *xp,
-			   int dir, struct km_event *c)
+			   int dir, const struct km_event *c)
 {
 	struct xfrm_user_polexpire *upe;
 	struct nlmsghdr *nlh;
@@ -2576,7 +2584,7 @@ nlmsg_failure:
 	return -EMSGSIZE;
 }
 
-static int xfrm_exp_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c)
+static int xfrm_exp_policy_notify(struct xfrm_policy *xp, int dir, const struct km_event *c)
 {
 	struct net *net = xp_net(xp);
 	struct sk_buff *skb;
@@ -2591,7 +2599,7 @@ static int xfrm_exp_policy_notify(struct xfrm_policy *xp, int dir, struct km_eve
 	return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_EXPIRE, GFP_ATOMIC);
 }
 
-static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, struct km_event *c)
+static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, const struct km_event *c)
 {
 	struct net *net = xp_net(xp);
 	struct xfrm_userpolicy_info *p;
@@ -2656,7 +2664,7 @@ nlmsg_failure:
 	return -1;
 }
 
-static int xfrm_notify_policy_flush(struct km_event *c)
+static int xfrm_notify_policy_flush(const struct km_event *c)
 {
 	struct net *net = c->net;
 	struct nlmsghdr *nlh;
@@ -2681,7 +2689,7 @@ nlmsg_failure:
 	return -1;
 }
 
-static int xfrm_send_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c)
+static int xfrm_send_policy_notify(struct xfrm_policy *xp, int dir, const struct km_event *c)
 {
 
 	switch (c->event) {