From 84c61d92bb6e9048eecc0738a83f1bf66f053026 Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Fri, 1 Aug 2014 11:13:30 +0300
Subject: Bluetooth: Add convenience function to check for pending power off

There are several situations where we're interested in knowing whether
we're currently in the process of powering off an adapter. This patch
adds a convenience function for the purpose and makes it public since
we'll soon need to access it from hci_event.c as well.

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index b5d5af3..8394abc 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -1351,6 +1351,7 @@ void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type,
 void mgmt_remote_name(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type,
 		      u8 addr_type, s8 rssi, u8 *name, u8 name_len);
 void mgmt_discovering(struct hci_dev *hdev, u8 discovering);
+bool mgmt_powering_down(struct hci_dev *hdev);
 void mgmt_new_ltk(struct hci_dev *hdev, struct smp_ltk *key, bool persistent);
 void mgmt_new_irk(struct hci_dev *hdev, struct smp_irk *irk);
 void mgmt_new_csrk(struct hci_dev *hdev, struct smp_csrk *csrk,
-- 
cgit v1.1


From 432df05eb1e57adfc46df08abbedca6c3b8862f7 Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Fri, 1 Aug 2014 11:13:31 +0300
Subject: Bluetooth: Create unified helper function for updating page scan

Similar to our hci_update_background_scan() function we can simplify a
lot of code by creating a unified helper function for doing page scan
updates. This patch adds such a function to hci_core.c and updates all
the relevant places to use it.

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 8394abc..cc2eb77 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -968,6 +968,9 @@ void hci_conn_del_sysfs(struct hci_conn *conn);
 #define lmp_host_le_capable(dev)   (!!((dev)->features[1][0] & LMP_HOST_LE))
 #define lmp_host_le_br_capable(dev) (!!((dev)->features[1][0] & LMP_HOST_LE_BREDR))
 
+#define hdev_is_powered(hdev) (test_bit(HCI_UP, &hdev->flags) && \
+				!test_bit(HCI_AUTO_OFF, &hdev->dev_flags))
+
 /* ----- HCI protocols ----- */
 #define HCI_PROTO_DEFER             0x01
 
@@ -1256,6 +1259,8 @@ bool hci_req_pending(struct hci_dev *hdev);
 void hci_req_add_le_scan_disable(struct hci_request *req);
 void hci_req_add_le_passive_scan(struct hci_request *req);
 
+void hci_update_page_scan(struct hci_dev *hdev, struct hci_request *req);
+
 struct sk_buff *__hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen,
 			       const void *param, u32 timeout);
 struct sk_buff *__hci_cmd_sync_ev(struct hci_dev *hdev, u16 opcode, u32 plen,
-- 
cgit v1.1


From d52deb17489b8155e031fb1a9f116c602d719e11 Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Thu, 7 Aug 2014 22:56:44 +0300
Subject: Bluetooth: Resume BT_CONNECTED state after LE security elevation

The LE ATT socket uses a special trick where it temporarily sets
BT_CONFIG state for the duration of a security level elevation. In order
to not require special hacks for going back to BT_CONNECTED state in the
l2cap_core.c code the most reasonable place to resume the state is the
resume callback. This patch adds a new flag to track the pending
security level change and ensures that the state is set back to
BT_CONNECTED in the resume callback in case the flag is set.

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/l2cap.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index 8df15ad..4a51e75 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -708,6 +708,7 @@ enum {
 	FLAG_EFS_ENABLE,
 	FLAG_DEFER_SETUP,
 	FLAG_LE_CONN_REQ_SENT,
+	FLAG_PENDING_SECURITY,
 };
 
 enum {
-- 
cgit v1.1


From f193844c51e88ea3d2137bb0c1d38d27d37691a2 Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Fri, 8 Aug 2014 09:37:15 +0300
Subject: Bluetooth: Add more L2CAP convenience callbacks

In preparation for converting SMP to use l2cap_chan it's useful to add a
few more callback helpers so that smp.c won't need to define all of its
own.

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/l2cap.h | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index 4a51e75..a72965f 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -838,18 +838,43 @@ static inline struct l2cap_chan *l2cap_chan_no_new_connection(struct l2cap_chan
 	return NULL;
 }
 
+static inline int l2cap_chan_no_recv(struct l2cap_chan *chan, struct sk_buff *skb)
+{
+	return -ENOSYS;
+}
+
+static inline struct sk_buff *l2cap_chan_no_alloc_skb(struct l2cap_chan *chan,
+						      unsigned long hdr_len,
+						      unsigned long len, int nb)
+{
+	return ERR_PTR(-ENOSYS);
+}
+
 static inline void l2cap_chan_no_teardown(struct l2cap_chan *chan, int err)
 {
 }
 
+static inline void l2cap_chan_no_close(struct l2cap_chan *chan)
+{
+}
+
 static inline void l2cap_chan_no_ready(struct l2cap_chan *chan)
 {
 }
 
+static inline void l2cap_chan_no_state_change(struct l2cap_chan *chan,
+					      int state, int err)
+{
+}
+
 static inline void l2cap_chan_no_defer(struct l2cap_chan *chan)
 {
 }
 
+static inline void l2cap_chan_no_suspend(struct l2cap_chan *chan)
+{
+}
+
 static inline void l2cap_chan_no_resume(struct l2cap_chan *chan)
 {
 }
-- 
cgit v1.1


From 70db83c4bcdc1447bbcb318389561c90d7056b18 Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Fri, 8 Aug 2014 09:37:16 +0300
Subject: Bluetooth: Add SMP L2CAP channel skeleton

This patch creates the initial SMP L2CAP channels and a skeleton for
their callbacks. There is one per-adapter channel created upon adapter
registration, and then one channel per-connection created through the
new_connection callback. The channels are registered with the reserved
CID 0x1f for now in order to not conflict with existing SMP
functionality. Once everything is in place the value can be changed to
what it should be, i.e. L2CAP_CID_SMP.

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h | 1 +
 include/net/bluetooth/l2cap.h    | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index cc2eb77..2571fc1 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -303,6 +303,7 @@ struct hci_dev {
 	__u32			req_result;
 
 	struct crypto_blkcipher	*tfm_aes;
+	void			*smp_data;
 
 	struct discovery_state	discovery;
 	struct hci_conn_hash	conn_hash;
diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index a72965f..1a037ba 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -637,6 +637,7 @@ struct l2cap_conn {
 
 	struct delayed_work	security_timer;
 	struct smp_chan		*smp_chan;
+	struct l2cap_chan	*smp;
 
 	struct list_head	chan_l;
 	struct mutex		chan_lock;
-- 
cgit v1.1


From defce9e83666658d4420d65e45ab1ad190992f72 Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Fri, 8 Aug 2014 09:37:17 +0300
Subject: Bluetooth: Make AES crypto context private to SMP

Now that we have per-adapter SMP data thanks to the root SMP L2CAP
channel we can take advantage of it and attach the AES crypto context
(only used for SMP) to it. This means that the smp_irk_matches() and
smp_generate_rpa() function can be converted to internally handle the
AES context.

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 2571fc1..5f0b77b 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -302,7 +302,6 @@ struct hci_dev {
 	__u32			req_status;
 	__u32			req_result;
 
-	struct crypto_blkcipher	*tfm_aes;
 	void			*smp_data;
 
 	struct discovery_state	discovery;
-- 
cgit v1.1


From 5d88cc73dded31a93fcc4821f33a8c3d755bf454 Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Fri, 8 Aug 2014 09:37:18 +0300
Subject: Bluetooth: Convert SMP to use l2cap_chan infrastructure

Now that we have all the necessary pieces in place we can fully convert
SMP to use the L2CAP channel infrastructure. This patch adds the
necessary callbacks and removes the now unneeded conn->smp_chan pointer.

One notable behavioral change in this patch comes from the following
code snippet:

-       case L2CAP_CID_SMP:
-               if (smp_sig_channel(conn, skb))
-                       l2cap_conn_del(conn->hcon, EACCES);

This piece of code was essentially forcing a disconnection if garbage
SMP data was received. The l2cap_conn_del() function is private to
l2cap_conn.c so we don't have access to it anymore when using the L2CAP
channel callbacks. Therefore, the behavior of the new code is simply to
return errors in the recv() callback (which is simply the old
smp_sig_channel()), but no disconnection will occur.

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/l2cap.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index 1a037ba..bda6252 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -636,7 +636,6 @@ struct l2cap_conn {
 	__u8			disc_reason;
 
 	struct delayed_work	security_timer;
-	struct smp_chan		*smp_chan;
 	struct l2cap_chan	*smp;
 
 	struct list_head	chan_l;
-- 
cgit v1.1


From dec5b49235e2526d7aacf5b93ea48f5e30c2f7c3 Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Mon, 11 Aug 2014 22:06:37 +0300
Subject: Bluetooth: Add public l2cap_conn_shutdown() API to request
 disconnection

Since we no-longer do special handling of SMP within l2cap_core.c we
don't have any code for calling l2cap_conn_del() when smp.c doesn't like
the data it gets. At the same time we cannot simply export
l2cap_conn_del() since it will try to lock the channels it calls into
whereas we already hold the lock in the smp.c l2cap_chan callbacks (i.e.
it'd lead to a deadlock).

This patch adds a new l2cap_conn_shutdown() API which is very similar to
l2cap_conn_del() except that it defers the call to l2cap_conn_del()
through a workqueue, thereby making it safe to use it from an L2CAP
channel callback.

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/l2cap.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index bda6252..40f3486 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -625,6 +625,9 @@ struct l2cap_conn {
 
 	struct delayed_work	info_timer;
 
+	int			disconn_err;
+	struct work_struct	disconn_work;
+
 	struct sk_buff		*rx_skb;
 	__u32			rx_len;
 	__u8			tx_ident;
@@ -944,6 +947,7 @@ void l2cap_logical_cfm(struct l2cap_chan *chan, struct hci_chan *hchan,
 		       u8 status);
 void __l2cap_physical_cfm(struct l2cap_chan *chan, int result);
 
+void l2cap_conn_shutdown(struct l2cap_conn *conn, int err);
 void l2cap_conn_get(struct l2cap_conn *conn);
 void l2cap_conn_put(struct l2cap_conn *conn);
 
-- 
cgit v1.1


From 276d807317dead63ef2f13aa46e3c17d57ba0713 Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Mon, 11 Aug 2014 22:06:41 +0300
Subject: Bluetooth: Remove unused l2cap_conn->security_timer

Now that there are no-longer any users for l2cap_conn->security_timer we
can go ahead and simply remove it. The patch makes initialization of the
conn->info_timer unconditional since it's better not to leave any
l2cap_conn data structures uninitialized no matter what the underlying
transport.

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/l2cap.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index 40f3486..cedda39 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -638,7 +638,6 @@ struct l2cap_conn {
 
 	__u8			disc_reason;
 
-	struct delayed_work	security_timer;
 	struct l2cap_chan	*smp;
 
 	struct list_head	chan_l;
-- 
cgit v1.1


From a74a8c846fb699f3277c0c21278bd4c414074b4a Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 22 Jul 2014 14:50:47 +0200
Subject: mac80211: don't duplicate station QoS capability data

We currently track the QoS capability twice: for all peer stations
in the WLAN_STA_WME flag, and for any clients associated to an AP
interface separately for drivers in the sta->sta.wme field.

Remove the WLAN_STA_WME flag and track the capability only in the
driver-visible field, getting rid of the limitation that the field
is only valid in AP mode.

Reviewed-by: Arik Nemtsov <arik@wizery.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index dae2e24..1cd8444 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1405,7 +1405,7 @@ struct ieee80211_sta_rates {
  * @supp_rates: Bitmap of supported rates (per band)
  * @ht_cap: HT capabilities of this STA; restricted to our own capabilities
  * @vht_cap: VHT capabilities of this STA; restricted to our own capabilities
- * @wme: indicates whether the STA supports WME. Only valid during AP-mode.
+ * @wme: indicates whether the STA supports QoS/WME.
  * @drv_priv: data area for driver use, will always be aligned to
  *	sizeof(void *), size is determined in hw information.
  * @uapsd_queues: bitmap of queues configured for uapsd. Only valid
-- 
cgit v1.1


From e91ded8db57472c20b59b2242b100764cc152a10 Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Mon, 4 Aug 2014 04:50:41 -0400
Subject: uapi: netfilter_arp: use __u8 instead of u_int8_t

Similarly, the u_int8_t type is non-standard and not defined.  Change
it to use __u8 like the rest of the netfilter headers.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter_arp/arpt_mangle.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/netfilter_arp/arpt_mangle.h b/include/uapi/linux/netfilter_arp/arpt_mangle.h
index 250f502..8c2b16a 100644
--- a/include/uapi/linux/netfilter_arp/arpt_mangle.h
+++ b/include/uapi/linux/netfilter_arp/arpt_mangle.h
@@ -13,7 +13,7 @@ struct arpt_mangle
 	union {
 		struct in_addr tgt_ip;
 	} u_t;
-	u_int8_t flags;
+	__u8 flags;
 	int target;
 };
 
-- 
cgit v1.1


From d4261e5650004d6d51137553ea5433d5828562dc Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@resnulli.us>
Date: Tue, 19 Aug 2014 16:02:12 +0200
Subject: bonding: create netlink event when bonding option is changed

Userspace needs to be notified if one changes some option.

Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Acked-by: Veaceslav Falico <vfalico@gmail.com>
Acked-by: Andy Gospodarek <gospo@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 3837739..7e2b0b8 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1982,6 +1982,7 @@ struct pcpu_sw_netstats {
 #define NETDEV_CHANGEUPPER	0x0015
 #define NETDEV_RESEND_IGMP	0x0016
 #define NETDEV_PRECHANGEMTU	0x0017 /* notify before mtu change happened */
+#define NETDEV_CHANGEINFODATA	0x0018
 
 int register_netdevice_notifier(struct notifier_block *nb);
 int unregister_netdevice_notifier(struct notifier_block *nb);
-- 
cgit v1.1


From d2de875c6d4cbec8a99c880160181a3ed5b9992e Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 22 Aug 2014 18:32:09 -0700
Subject: net: use ktime_get_ns() and ktime_get_real_ns() helpers

ktime_get_ns() replaces ktime_to_ns(ktime_get())

ktime_get_real_ns() replaces ktime_to_ns(ktime_get_real())

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/codel.h     | 2 +-
 include/net/pkt_sched.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/codel.h b/include/net/codel.h
index fe0eab3..aeee280 100644
--- a/include/net/codel.h
+++ b/include/net/codel.h
@@ -66,7 +66,7 @@ typedef s32 codel_tdiff_t;
 
 static inline codel_time_t codel_get_time(void)
 {
-	u64 ns = ktime_to_ns(ktime_get());
+	u64 ns = ktime_get_ns();
 
 	return ns >> CODEL_SHIFT;
 }
diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index ec030cd..8bbe626 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -50,7 +50,7 @@ typedef long	psched_tdiff_t;
 
 static inline psched_time_t psched_get_time(void)
 {
-	return PSCHED_NS2TICKS(ktime_to_ns(ktime_get()));
+	return PSCHED_NS2TICKS(ktime_get_ns());
 }
 
 static inline psched_tdiff_t
-- 
cgit v1.1


From 989e04c5bc3ff77d65e1f0d87bf7904dfa30d41c Mon Sep 17 00:00:00 2001
From: Yuchung Cheng <ycheng@google.com>
Date: Fri, 22 Aug 2014 14:15:22 -0700
Subject: tcp: improve undo on timeout

Upon timeout, undo (via both timestamps/Eifel and DSACKs) was
disabled if any retransmits were still in flight.  The concern was
perhaps that spurious retransmission sent in a previous recovery
episode may trigger DSACKs to falsely undo the current recovery.

However, this inadvertently misses undo opportunities (using either
TCP timestamps or DSACKs) when timeout occurs during a loss episode,
i.e.  recurring timeouts or timeout during fast recovery. In these
cases some retransmissions will be in flight but we should allow
undo. Furthermore, we should only reset undo_marker and undo_retrans
upon timeout if we are starting a new recovery episode. Finally,
when we do reset our undo state, we now do so in a manner similar
to tcp_enter_recovery(), so that we require a DSACK for each of
the outstsanding retransmissions. This will achieve the original
goal by requiring that we receive the same number of DSACKs as
retransmissions.

This patch increases the undo events by 50% on Google servers.

Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index fa5258f..e567f0d 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -276,7 +276,7 @@ struct tcp_sock {
 	u32	retrans_stamp;	/* Timestamp of the last retransmit,
 				 * also used in SYN-SENT to remember stamp of
 				 * the first SYN. */
-	u32	undo_marker;	/* tracking retrans started here. */
+	u32	undo_marker;	/* snd_una upon a new recovery episode. */
 	int	undo_retrans;	/* number of undoable retransmissions. */
 	u32	total_retrans;	/* Total retransmits for entire connection */
 
-- 
cgit v1.1


From 3af20efc0f83cdc65ce56ec108c0e81f602364df Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Fri, 22 Aug 2014 18:55:39 -0700
Subject: net: phy: broadcom: extract all registers to brcmphy.h

Commit 439d39a9ac8fbbba9c04581361188f33f21ced50 ("net: phy: broadcom:
extract register definitions") added a bunch of registers to brcmphy.h
but left some to broadcom.c, move all of them to the header file since
the BCM54xx and BCM7xxx PHY drivers do share all of these registers.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/brcmphy.h | 103 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 103 insertions(+)

(limited to 'include')

diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index 61219b9..be31bf9 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -92,4 +92,107 @@
 
 #define MII_BCM54XX_AUXCTL_SHDWSEL_AUXCTL	0x0000
 
+/*
+ * Broadcom LED source encodings.  These are used in BCM5461, BCM5481,
+ * BCM5482, and possibly some others.
+ */
+#define BCM_LED_SRC_LINKSPD1	0x0
+#define BCM_LED_SRC_LINKSPD2	0x1
+#define BCM_LED_SRC_XMITLED	0x2
+#define BCM_LED_SRC_ACTIVITYLED	0x3
+#define BCM_LED_SRC_FDXLED	0x4
+#define BCM_LED_SRC_SLAVE	0x5
+#define BCM_LED_SRC_INTR	0x6
+#define BCM_LED_SRC_QUALITY	0x7
+#define BCM_LED_SRC_RCVLED	0x8
+#define BCM_LED_SRC_MULTICOLOR1	0xa
+#define BCM_LED_SRC_OPENSHORT	0xb
+#define BCM_LED_SRC_OFF		0xe	/* Tied high */
+#define BCM_LED_SRC_ON		0xf	/* Tied low */
+
+
+/*
+ * BCM5482: Shadow registers
+ * Shadow values go into bits [14:10] of register 0x1c to select a shadow
+ * register to access.
+ */
+/* 00101: Spare Control Register 3 */
+#define BCM54XX_SHD_SCR3		0x05
+#define  BCM54XX_SHD_SCR3_DEF_CLK125	0x0001
+#define  BCM54XX_SHD_SCR3_DLLAPD_DIS	0x0002
+#define  BCM54XX_SHD_SCR3_TRDDAPD	0x0004
+
+/* 01010: Auto Power-Down */
+#define BCM54XX_SHD_APD			0x0a
+#define  BCM54XX_SHD_APD_EN		0x0020
+
+#define BCM5482_SHD_LEDS1	0x0d	/* 01101: LED Selector 1 */
+					/* LED3 / ~LINKSPD[2] selector */
+#define BCM5482_SHD_LEDS1_LED3(src)	((src & 0xf) << 4)
+					/* LED1 / ~LINKSPD[1] selector */
+#define BCM5482_SHD_LEDS1_LED1(src)	((src & 0xf) << 0)
+#define BCM54XX_SHD_RGMII_MODE	0x0b	/* 01011: RGMII Mode Selector */
+#define BCM5482_SHD_SSD		0x14	/* 10100: Secondary SerDes control */
+#define BCM5482_SHD_SSD_LEDM	0x0008	/* SSD LED Mode enable */
+#define BCM5482_SHD_SSD_EN	0x0001	/* SSD enable */
+#define BCM5482_SHD_MODE	0x1f	/* 11111: Mode Control Register */
+#define BCM5482_SHD_MODE_1000BX	0x0001	/* Enable 1000BASE-X registers */
+
+
+/*
+ * EXPANSION SHADOW ACCESS REGISTERS.  (PHY REG 0x15, 0x16, and 0x17)
+ */
+#define MII_BCM54XX_EXP_AADJ1CH0		0x001f
+#define  MII_BCM54XX_EXP_AADJ1CH0_SWP_ABCD_OEN	0x0200
+#define  MII_BCM54XX_EXP_AADJ1CH0_SWSEL_THPF	0x0100
+#define MII_BCM54XX_EXP_AADJ1CH3		0x601f
+#define  MII_BCM54XX_EXP_AADJ1CH3_ADCCKADJ	0x0002
+#define MII_BCM54XX_EXP_EXP08			0x0F08
+#define  MII_BCM54XX_EXP_EXP08_RJCT_2MHZ	0x0001
+#define  MII_BCM54XX_EXP_EXP08_EARLY_DAC_WAKE	0x0200
+#define MII_BCM54XX_EXP_EXP75			0x0f75
+#define  MII_BCM54XX_EXP_EXP75_VDACCTRL		0x003c
+#define  MII_BCM54XX_EXP_EXP75_CM_OSC		0x0001
+#define MII_BCM54XX_EXP_EXP96			0x0f96
+#define  MII_BCM54XX_EXP_EXP96_MYST		0x0010
+#define MII_BCM54XX_EXP_EXP97			0x0f97
+#define  MII_BCM54XX_EXP_EXP97_MYST		0x0c0c
+
+/*
+ * BCM5482: Secondary SerDes registers
+ */
+#define BCM5482_SSD_1000BX_CTL		0x00	/* 1000BASE-X Control */
+#define BCM5482_SSD_1000BX_CTL_PWRDOWN	0x0800	/* Power-down SSD */
+#define BCM5482_SSD_SGMII_SLAVE		0x15	/* SGMII Slave Register */
+#define BCM5482_SSD_SGMII_SLAVE_EN	0x0002	/* Slave mode enable */
+#define BCM5482_SSD_SGMII_SLAVE_AD	0x0001	/* Slave auto-detection */
+
+
+/*****************************************************************************/
+/* Fast Ethernet Transceiver definitions. */
+/*****************************************************************************/
+
+#define MII_BRCM_FET_INTREG		0x1a	/* Interrupt register */
+#define MII_BRCM_FET_IR_MASK		0x0100	/* Mask all interrupts */
+#define MII_BRCM_FET_IR_LINK_EN		0x0200	/* Link status change enable */
+#define MII_BRCM_FET_IR_SPEED_EN	0x0400	/* Link speed change enable */
+#define MII_BRCM_FET_IR_DUPLEX_EN	0x0800	/* Duplex mode change enable */
+#define MII_BRCM_FET_IR_ENABLE		0x4000	/* Interrupt enable */
+
+#define MII_BRCM_FET_BRCMTEST		0x1f	/* Brcm test register */
+#define MII_BRCM_FET_BT_SRE		0x0080	/* Shadow register enable */
+
+
+/*** Shadow register definitions ***/
+
+#define MII_BRCM_FET_SHDW_MISCCTRL	0x10	/* Shadow misc ctrl */
+#define MII_BRCM_FET_SHDW_MC_FAME	0x4000	/* Force Auto MDIX enable */
+
+#define MII_BRCM_FET_SHDW_AUXMODE4	0x1a	/* Auxiliary mode 4 */
+#define MII_BRCM_FET_SHDW_AM4_LED_MASK	0x0003
+#define MII_BRCM_FET_SHDW_AM4_LED_MODE1 0x0001
+
+#define MII_BRCM_FET_SHDW_AUXSTAT2	0x1b	/* Auxiliary status 2 */
+#define MII_BRCM_FET_SHDW_AS2_APDE	0x0020	/* Auto power down enable */
+
 #endif /* _LINUX_BRCMPHY_H */
-- 
cgit v1.1


From 705314797b8b997554b7e9d0ea7b65a497356e53 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Fri, 22 Aug 2014 18:55:40 -0700
Subject: net: phy: broadcom: move shadow 0x1C register accessors to brcmphy.h

The shadow register 0x1C is used both by the BCM54xxx PHYs and the
BCM7xxx internal PHYs, move the accessors to a common location so both
drivers can use them.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/brcmphy.h | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'include')

diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index be31bf9..722cf26 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -195,4 +195,24 @@
 #define MII_BRCM_FET_SHDW_AUXSTAT2	0x1b	/* Auxiliary status 2 */
 #define MII_BRCM_FET_SHDW_AS2_APDE	0x0020	/* Auto power down enable */
 
+/*
+ * Indirect register access functions for the 1000BASE-T/100BASE-TX/10BASE-T
+ * 0x1c shadow registers.
+ */
+static inline int bcm54xx_shadow_read(struct phy_device *phydev, u16 shadow)
+{
+	phy_write(phydev, MII_BCM54XX_SHD, MII_BCM54XX_SHD_VAL(shadow));
+	return MII_BCM54XX_SHD_DATA(phy_read(phydev, MII_BCM54XX_SHD));
+}
+
+static inline int bcm54xx_shadow_write(struct phy_device *phydev, u16 shadow,
+				       u16 val)
+{
+	return phy_write(phydev, MII_BCM54XX_SHD,
+			 MII_BCM54XX_SHD_WRITE |
+			 MII_BCM54XX_SHD_VAL(shadow) |
+			 MII_BCM54XX_SHD_DATA(val));
+}
+
+
 #endif /* _LINUX_BRCMPHY_H */
-- 
cgit v1.1


From 66ce7fb9807b036058aa380bfd2b3851ae25ce39 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Fri, 22 Aug 2014 18:55:43 -0700
Subject: net: phy: export phy_{read,write}_mmd_indirect

Some PHY drivers might need to access Clause 45 registers in Clause 22
compatibility mode to e.g: properly advertise EEE support when disabled
by default.

Export these two helper functions: phy_read_mmd_indirect() and
phy_write_mmd_indirect() for drivers to use them.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/phy.h | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

(limited to 'include')

diff --git a/include/linux/phy.h b/include/linux/phy.h
index ed39956..d090cfc 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -598,6 +598,19 @@ static inline int phy_read_mmd(struct phy_device *phydev, int devad, u32 regnum)
 }
 
 /**
+ * phy_read_mmd_indirect - reads data from the MMD registers
+ * @phydev: The PHY device bus
+ * @prtad: MMD Address
+ * @devad: MMD DEVAD
+ * @addr: PHY address on the MII bus
+ *
+ * Description: it reads data from the MMD registers (clause 22 to access to
+ * clause 45) of the specified phy address.
+ */
+int phy_read_mmd_indirect(struct phy_device *phydev, int prtad,
+			  int devad, int addr);
+
+/**
  * phy_read - Convenience function for reading a given PHY register
  * @phydev: the phy_device struct
  * @regnum: register number to read
@@ -668,6 +681,20 @@ static inline int phy_write_mmd(struct phy_device *phydev, int devad,
 	return mdiobus_write(phydev->bus, phydev->addr, regnum, val);
 }
 
+/**
+ * phy_write_mmd_indirect - writes data to the MMD registers
+ * @phydev: The PHY device
+ * @prtad: MMD Address
+ * @devad: MMD DEVAD
+ * @addr: PHY address on the MII bus
+ * @data: data to write in the MMD register
+ *
+ * Description: Write data from the MMD registers of the specified
+ * phy address.
+ */
+void phy_write_mmd_indirect(struct phy_device *phydev, int prtad,
+			    int devad, int addr, u32 data);
+
 struct phy_device *phy_device_create(struct mii_bus *bus, int addr, int phy_id,
 				     bool is_c45,
 				     struct phy_c45_device_ids *c45_ids);
-- 
cgit v1.1


From b8f9a02924bbeb0c46ca4c19561cbe765b80e264 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Fri, 22 Aug 2014 18:55:45 -0700
Subject: net: phy: bcm7xxx: enable EEE at the PHY level

The 28nm Gigabit PHY on BCM7xxx chips comes out of reset with absolutely
no EEE capabilities, such that we would actually return that we do not
support EEE when accessing 3.20 (MDIO_PCS_EEE_ABLE) registers.

Poke through the vendor-specific C45 register to enable EEE globally at
the PHY level, and advertise supported EEE modes.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/brcmphy.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index 722cf26..ee1431d9 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -214,5 +214,8 @@ static inline int bcm54xx_shadow_write(struct phy_device *phydev, u16 shadow,
 			 MII_BCM54XX_SHD_DATA(val));
 }
 
+#define BRCM_CL45VEN_EEE_CONTROL	0x803d
+#define LPI_FEATURE_EN			0x8000
+#define LPI_FEATURE_EN_DIG1000X		0x4000
 
 #endif /* _LINUX_BRCMPHY_H */
-- 
cgit v1.1


From 690e36e726d00d2528bc569809048adf61550d80 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sat, 23 Aug 2014 12:13:41 -0700
Subject: net: Allow raw buffers to be passed into the flow dissector.

Drivers, and perhaps other entities we have not yet considered,
sometimes want to know how deep the protocol headers go before
deciding how large of an SKB to allocate and how much of the packet to
place into the linear SKB area.

For example, consider a driver which has a device which DMAs into
pools of pages and then tells the driver where the data went in the
DMA descriptor(s).  The driver can then build an SKB and reference
most of the data via SKB fragments (which are page/offset/length
triplets).

However at least some of the front of the packet should be placed into
the linear SKB area, which comes before the fragments, so that packet
processing can get at the headers efficiently.  The first thing each
protocol layer is going to do is a "pskb_may_pull()" so we might as
well aggregate as much of this as possible while we're building the
SKB in the driver.

Part of supporting this is that we don't have an SKB yet, so we want
to be able to let the flow dissector operate on a raw buffer in order
to compute the offset of the end of the headers.

So now we have a __skb_flow_dissect() which takes an explicit data
pointer and length.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h  | 18 ++++++++++++------
 include/net/flow_keys.h | 14 ++++++++++++--
 2 files changed, 24 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index abde271..18ddf96 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2567,20 +2567,26 @@ __wsum __skb_checksum(const struct sk_buff *skb, int offset, int len,
 __wsum skb_checksum(const struct sk_buff *skb, int offset, int len,
 		    __wsum csum);
 
-static inline void *skb_header_pointer(const struct sk_buff *skb, int offset,
-				       int len, void *buffer)
+static inline void *__skb_header_pointer(const struct sk_buff *skb, int offset,
+					 int len, void *data, int hlen, void *buffer)
 {
-	int hlen = skb_headlen(skb);
-
 	if (hlen - offset >= len)
-		return skb->data + offset;
+		return data + offset;
 
-	if (skb_copy_bits(skb, offset, buffer, len) < 0)
+	if (!skb ||
+	    skb_copy_bits(skb, offset, buffer, len) < 0)
 		return NULL;
 
 	return buffer;
 }
 
+static inline void *skb_header_pointer(const struct sk_buff *skb, int offset,
+				       int len, void *buffer)
+{
+	return __skb_header_pointer(skb, offset, len, skb->data,
+				    skb_headlen(skb), buffer);
+}
+
 /**
  *	skb_needs_linearize - check if we need to linearize a given skb
  *			      depending on the given device features.
diff --git a/include/net/flow_keys.h b/include/net/flow_keys.h
index 6667a05..4040f63 100644
--- a/include/net/flow_keys.h
+++ b/include/net/flow_keys.h
@@ -27,7 +27,17 @@ struct flow_keys {
 	u8 ip_proto;
 };
 
-bool skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow);
-__be32 skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto);
+bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow,
+			void *data, int hlen);
+static inline bool skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow)
+{
+	return __skb_flow_dissect(skb, flow, NULL, 0);
+}
+__be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto,
+			    void *data, int hlen_proto);
+static inline __be32 skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto)
+{
+	return __skb_flow_get_ports(skb, thoff, ip_proto, NULL, 0);
+}
 u32 flow_hash_from_keys(struct flow_keys *keys);
 #endif
-- 
cgit v1.1


From e2a093ff0dbfa4c5d99f25241cf33325e9691d91 Mon Sep 17 00:00:00 2001
From: Ana Rey <anarey@gmail.com>
Date: Wed, 6 Aug 2014 13:52:49 +0200
Subject: netfilter: nft_meta: add pkttype support

Add pkttype support for ip, ipv6 and inet families of tables.

This allows you to fetch the meta packet type based on the link layer
information. The loopback traffic is a special case, the packet type
is guessed from the network layer header.

No special handling for bridge and arp since we're not going to see
such traffic in the loopback interface.

Joint work with Alvaro Neira Ayuso <alvaroneay@gmail.com>

Signed-off-by: Alvaro Neira Ayuso <alvaroneay@gmail.com>
Signed-off-by: Ana Rey <anarey@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/nf_tables.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 801bdd1..98144cd 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -571,6 +571,7 @@ enum nft_exthdr_attributes {
  * @NFT_META_L4PROTO: layer 4 protocol number
  * @NFT_META_BRI_IIFNAME: packet input bridge interface name
  * @NFT_META_BRI_OIFNAME: packet output bridge interface name
+ * @NFT_META_PKTTYPE: packet type (skb->pkt_type), special handling for loopback
  */
 enum nft_meta_keys {
 	NFT_META_LEN,
@@ -592,6 +593,7 @@ enum nft_meta_keys {
 	NFT_META_L4PROTO,
 	NFT_META_BRI_IIFNAME,
 	NFT_META_BRI_OIFNAME,
+	NFT_META_PKTTYPE,
 };
 
 /**
-- 
cgit v1.1


From afc5be3079796b024823bad42dc5ebf716453575 Mon Sep 17 00:00:00 2001
From: Ana Rey <anarey@gmail.com>
Date: Sun, 24 Aug 2014 14:08:36 +0200
Subject: netfilter: nft_meta: Add cpu attribute support

Add cpu support to meta expresion.

This allows you to match packets with cpu number.

Signed-off-by: Ana Rey <anarey@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/nf_tables.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 98144cd..c9b6f00 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -572,6 +572,7 @@ enum nft_exthdr_attributes {
  * @NFT_META_BRI_IIFNAME: packet input bridge interface name
  * @NFT_META_BRI_OIFNAME: packet output bridge interface name
  * @NFT_META_PKTTYPE: packet type (skb->pkt_type), special handling for loopback
+ * @NFT_META_CPU: cpu id through smp_processor_id()
  */
 enum nft_meta_keys {
 	NFT_META_LEN,
@@ -594,6 +595,7 @@ enum nft_meta_keys {
 	NFT_META_BRI_IIFNAME,
 	NFT_META_BRI_OIFNAME,
 	NFT_META_PKTTYPE,
+	NFT_META_CPU,
 };
 
 /**
-- 
cgit v1.1


From 1b05756c48ea07ced9604ef01d11194d936da163 Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Tue, 5 Aug 2014 22:02:34 +0200
Subject: netfilter: ipset: Fix warn: integer overflows 'sizeof(*map) + size *
 set->dsize'

Dan Carpenter reported that the static checker emits the warning

        net/netfilter/ipset/ip_set_list_set.c:600 init_list_set()
        warn: integer overflows 'sizeof(*map) + size * set->dsize'

Limit the maximal number of elements in list type of sets.

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
---
 include/linux/netfilter/ipset/ip_set_list.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/netfilter/ipset/ip_set_list.h b/include/linux/netfilter/ipset/ip_set_list.h
index 68c2aea..fe2622a 100644
--- a/include/linux/netfilter/ipset/ip_set_list.h
+++ b/include/linux/netfilter/ipset/ip_set_list.h
@@ -6,5 +6,6 @@
 
 #define IP_SET_LIST_DEFAULT_SIZE	8
 #define IP_SET_LIST_MIN_SIZE		4
+#define IP_SET_LIST_MAX_SIZE		65536
 
 #endif /* __IP_SET_LIST_H */
-- 
cgit v1.1


From 573e8fca255a27e3573b51f9b183d62641c47a3d Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Fri, 22 Aug 2014 13:33:47 -0700
Subject: net: skb_gro_checksum_* functions

Add skb_gro_checksum_validate, skb_gro_checksum_validate_zero_check,
and skb_gro_checksum_simple_validate, and __skb_gro_checksum_complete.
These are the cognates of the normal checksum functions but are used
in the gro_receive path and operate on GRO related fields in sk_buffs.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 76 +++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 74 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 7e2b0b8..eb73444 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1883,7 +1883,13 @@ struct napi_gro_cb {
 	u16	proto;
 
 	/* Used in udp_gro_receive */
-	u16	udp_mark;
+	u8	udp_mark:1;
+
+	/* GRO checksum is valid */
+	u8	csum_valid:1;
+
+	/* Number encapsulation layers crossed */
+	u8	encapsulation;
 
 	/* used to support CHECKSUM_COMPLETE for tunneling protocols */
 	__wsum	csum;
@@ -2154,11 +2160,77 @@ static inline void *skb_gro_network_header(struct sk_buff *skb)
 static inline void skb_gro_postpull_rcsum(struct sk_buff *skb,
 					const void *start, unsigned int len)
 {
-	if (skb->ip_summed == CHECKSUM_COMPLETE)
+	if (NAPI_GRO_CB(skb)->csum_valid)
 		NAPI_GRO_CB(skb)->csum = csum_sub(NAPI_GRO_CB(skb)->csum,
 						  csum_partial(start, len, 0));
 }
 
+/* GRO checksum functions. These are logical equivalents of the normal
+ * checksum functions (in skbuff.h) except that they operate on the GRO
+ * offsets and fields in sk_buff.
+ */
+
+__sum16 __skb_gro_checksum_complete(struct sk_buff *skb);
+
+static inline bool __skb_gro_checksum_validate_needed(struct sk_buff *skb,
+						      bool zero_okay,
+						      __sum16 check)
+{
+	return (skb->ip_summed != CHECKSUM_PARTIAL &&
+		(skb->ip_summed != CHECKSUM_UNNECESSARY ||
+		 (NAPI_GRO_CB(skb)->encapsulation > skb->encapsulation)) &&
+		(!zero_okay || check));
+}
+
+static inline __sum16 __skb_gro_checksum_validate_complete(struct sk_buff *skb,
+							   __wsum psum)
+{
+	if (NAPI_GRO_CB(skb)->csum_valid &&
+	    !csum_fold(csum_add(psum, NAPI_GRO_CB(skb)->csum)))
+		return 0;
+
+	NAPI_GRO_CB(skb)->csum = psum;
+
+	return __skb_gro_checksum_complete(skb);
+}
+
+/* Update skb for CHECKSUM_UNNECESSARY when we verified a top level
+ * checksum or an encapsulated one during GRO. This saves work
+ * if we fallback to normal path with the packet.
+ */
+static inline void skb_gro_incr_csum_unnecessary(struct sk_buff *skb)
+{
+	if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
+		if (NAPI_GRO_CB(skb)->encapsulation)
+			skb->encapsulation = 1;
+	} else if (skb->ip_summed != CHECKSUM_PARTIAL) {
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+		skb->encapsulation = 0;
+	}
+}
+
+#define __skb_gro_checksum_validate(skb, proto, zero_okay, check,	\
+				    compute_pseudo)			\
+({									\
+	__sum16 __ret = 0;						\
+	if (__skb_gro_checksum_validate_needed(skb, zero_okay, check))	\
+		__ret = __skb_gro_checksum_validate_complete(skb,	\
+				compute_pseudo(skb, proto));		\
+	if (!__ret)							\
+		skb_gro_incr_csum_unnecessary(skb);			\
+	__ret;								\
+})
+
+#define skb_gro_checksum_validate(skb, proto, compute_pseudo)		\
+	__skb_gro_checksum_validate(skb, proto, false, 0, compute_pseudo)
+
+#define skb_gro_checksum_validate_zero_check(skb, proto, check,		\
+					     compute_pseudo)		\
+	__skb_gro_checksum_validate(skb, proto, true, check, compute_pseudo)
+
+#define skb_gro_checksum_simple_validate(skb)				\
+	__skb_gro_checksum_validate(skb, 0, false, 0, null_compute_pseudo)
+
 static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev,
 				  unsigned short type,
 				  const void *daddr, const void *saddr,
-- 
cgit v1.1


From 1933a7852ce6a81349855431b25122d7666bbfca Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Fri, 22 Aug 2014 13:34:04 -0700
Subject: net: add gro_compute_pseudo functions

Add inet_gro_compute_pseudo and ip6_gro_compute_pseudo. These are
the logical equivalents of inet_compute_pseudo and ip6_compute_pseudo
for GRO path. The IP header is taken from skb_gro_network_header.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h           | 8 ++++++++
 include/net/ip6_checksum.h | 8 ++++++++
 2 files changed, 16 insertions(+)

(limited to 'include')

diff --git a/include/net/ip.h b/include/net/ip.h
index db4a771..c8fd611 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -364,6 +364,14 @@ static inline void inet_set_txhash(struct sock *sk)
 	sk->sk_txhash = flow_hash_from_keys(&keys);
 }
 
+static inline __wsum inet_gro_compute_pseudo(struct sk_buff *skb, int proto)
+{
+	const struct iphdr *iph = skb_gro_network_header(skb);
+
+	return csum_tcpudp_nofold(iph->saddr, iph->daddr,
+				  skb_gro_len(skb), proto, 0);
+}
+
 /*
  *	Map a multicast IP onto multicast MAC for type ethernet.
  */
diff --git a/include/net/ip6_checksum.h b/include/net/ip6_checksum.h
index 55236cb..1a49b73 100644
--- a/include/net/ip6_checksum.h
+++ b/include/net/ip6_checksum.h
@@ -48,6 +48,14 @@ static inline __wsum ip6_compute_pseudo(struct sk_buff *skb, int proto)
 					    skb->len, proto, 0));
 }
 
+static inline __wsum ip6_gro_compute_pseudo(struct sk_buff *skb, int proto)
+{
+	const struct ipv6hdr *iph = skb_gro_network_header(skb);
+
+	return ~csum_unfold(csum_ipv6_magic(&iph->saddr, &iph->daddr,
+					    skb_gro_len(skb), proto, 0));
+}
+
 static __inline__ __sum16 tcp_v6_check(int len,
 				   const struct in6_addr *saddr,
 				   const struct in6_addr *daddr,
-- 
cgit v1.1


From 57c67ff4bd92af634f7c91c40eb02a96dd785dda Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Fri, 22 Aug 2014 13:34:44 -0700
Subject: udp: additional GRO support

Implement GRO for UDPv6. Add UDP checksum verification in gro_receive
for both UDP4 and UDP6 calling skb_gro_checksum_validate_zero_check.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/udp.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'include')

diff --git a/include/net/udp.h b/include/net/udp.h
index 70f9413..16f4e80 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -158,6 +158,24 @@ static inline __sum16 udp_v4_check(int len, __be32 saddr,
 void udp_set_csum(bool nocheck, struct sk_buff *skb,
 		  __be32 saddr, __be32 daddr, int len);
 
+struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb,
+				 struct udphdr *uh);
+int udp_gro_complete(struct sk_buff *skb, int nhoff);
+
+static inline struct udphdr *udp_gro_udphdr(struct sk_buff *skb)
+{
+	struct udphdr *uh;
+	unsigned int hlen, off;
+
+	off  = skb_gro_offset(skb);
+	hlen = off + sizeof(*uh);
+	uh   = skb_gro_header_fast(skb, off);
+	if (skb_gro_header_hard(skb, hlen))
+		uh = skb_gro_header_slow(skb, hlen, off);
+
+	return uh;
+}
+
 /* hash routines shared between UDPv4/6 and UDP-Litev4/6 */
 static inline void udp_lib_hash(struct sock *sk)
 {
-- 
cgit v1.1


From a98406e22c12e514bac28fec0a49dc793edaf3a8 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Sat, 23 Aug 2014 17:03:28 +0200
Subject: random32: improvements to prandom_bytes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch addresses a couple of minor items, mostly addesssing
prandom_bytes(): 1) prandom_bytes{,_state}() should use size_t
for length arguments, 2) We can use put_unaligned() when filling
the array instead of open coding it [ perhaps some archs will
further benefit from their own arch specific implementation when
GCC cannot make up for it ], 3) Fix a typo, 4) Better use unsigned
int as type for getting the arch seed, 5) Make use of
prandom_u32_max() for timer slack.

Regarding the change to put_unaligned(), callers of prandom_bytes()
which internally invoke prandom_bytes_state(), don't bother as
they expect the array to be filled randomly and don't have any
control of the internal state what-so-ever (that's also why we
have periodic reseeding there, etc), so they really don't care.

Now for the direct callers of prandom_bytes_state(), which
are solely located in test cases for MTD devices, that is,
drivers/mtd/tests/{oobtest.c,pagetest.c,subpagetest.c}:

These tests basically fill a test write-vector through
prandom_bytes_state() with an a-priori defined seed each time
and write that to a MTD device. Later on, they set up a read-vector
and read back that blocks from the device. So in the verification
phase, the write-vector is being re-setup [ so same seed and
prandom_bytes_state() called ], and then memcmp()'ed against the
read-vector to check if the data is the same.

Akinobu, Lothar and I also tested this patch and it runs through
the 3 relevant MTD test cases w/o any errors on the nandsim device
(simulator for MTD devs) for x86_64, ppc64, ARM (i.MX28, i.MX53
and i.MX6):

  # modprobe nandsim first_id_byte=0x20 second_id_byte=0xac \
                     third_id_byte=0x00 fourth_id_byte=0x15
  # modprobe mtd_oobtest dev=0
  # modprobe mtd_pagetest dev=0
  # modprobe mtd_subpagetest dev=0

We also don't have any users depending directly on a particular
result of the PRNG (except the PRNG self-test itself), and that's
just fine as it e.g. allowed us easily to do things like upgrading
from taus88 to taus113.

Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Tested-by: Akinobu Mita <akinobu.mita@gmail.com>
Tested-by: Lothar Waßmann <LW@KARO-electronics.de>
Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/random.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/random.h b/include/linux/random.h
index 57fbbff..b05856e 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -26,7 +26,7 @@ unsigned int get_random_int(void);
 unsigned long randomize_range(unsigned long start, unsigned long end, unsigned long len);
 
 u32 prandom_u32(void);
-void prandom_bytes(void *buf, int nbytes);
+void prandom_bytes(void *buf, size_t nbytes);
 void prandom_seed(u32 seed);
 void prandom_reseed_late(void);
 
@@ -35,7 +35,7 @@ struct rnd_state {
 };
 
 u32 prandom_u32_state(struct rnd_state *state);
-void prandom_bytes_state(struct rnd_state *state, void *buf, int nbytes);
+void prandom_bytes_state(struct rnd_state *state, void *buf, size_t nbytes);
 
 /**
  * prandom_u32_max - returns a pseudo-random number in interval [0, ep_ro)
-- 
cgit v1.1


From 4798248e4e023170e937a65a1d30fcc52496dd42 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 22 Aug 2014 16:21:53 -0700
Subject: net: Add ops->ndo_xmit_flush()

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index eb73444..220c509 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -782,6 +782,19 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
  *        (can also return NETDEV_TX_LOCKED iff NETIF_F_LLTX)
  *	Required can not be NULL.
  *
+ * void (*ndo_xmit_flush)(struct net_device *dev, u16 queue);
+ *	A driver implements this function when it wishes to support
+ *	deferred TX queue flushing.  The idea is that the expensive
+ *	operation to trigger TX queue processing can be done after
+ *	N calls to ndo_start_xmit rather than being done every single
+ *	time.  In this regime ndo_start_xmit will be called one or more
+ *	times, and then a final ndo_xmit_flush call will be made to
+ *	have the driver tell the device about the new pending TX queue
+ *	entries.  The kernel keeps track of which queues need flushing
+ *	by monitoring skb->queue_mapping of the packets it submits to
+ *	ndo_start_xmit.  This is the queue value that will be passed
+ *	to ndo_xmit_flush.
+ *
  * u16 (*ndo_select_queue)(struct net_device *dev, struct sk_buff *skb,
  *                         void *accel_priv, select_queue_fallback_t fallback);
  *	Called to decide which queue to when device supports multiple
@@ -1005,6 +1018,7 @@ struct net_device_ops {
 	int			(*ndo_stop)(struct net_device *dev);
 	netdev_tx_t		(*ndo_start_xmit) (struct sk_buff *skb,
 						   struct net_device *dev);
+	void			(*ndo_xmit_flush)(struct net_device *dev, u16 queue);
 	u16			(*ndo_select_queue)(struct net_device *dev,
 						    struct sk_buff *skb,
 						    void *accel_priv,
@@ -3430,6 +3444,27 @@ int __init dev_proc_init(void);
 #define dev_proc_init() 0
 #endif
 
+static inline netdev_tx_t __netdev_start_xmit(const struct net_device_ops *ops,
+					      struct sk_buff *skb, struct net_device *dev)
+{
+	netdev_tx_t ret;
+	u16 q;
+
+	q = skb->queue_mapping;
+	ret = ops->ndo_start_xmit(skb, dev);
+	if (dev_xmit_complete(ret) && ops->ndo_xmit_flush)
+		ops->ndo_xmit_flush(dev, q);
+
+	return ret;
+}
+
+static inline netdev_tx_t netdev_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	const struct net_device_ops *ops = dev->netdev_ops;
+
+	return __netdev_start_xmit(ops, skb, dev);
+}
+
 int netdev_class_create_file_ns(struct class_attribute *class_attr,
 				const void *ns);
 void netdev_class_remove_file_ns(struct class_attribute *class_attr,
-- 
cgit v1.1


From 48ea526a6877d605c961aa37fae33f3227b29424 Mon Sep 17 00:00:00 2001
From: Amir Vadai <amirv@mellanox.com>
Date: Mon, 25 Aug 2014 16:06:53 +0300
Subject: net/mlx4: Use is_kdump_kernel() to detect kdump kernel

Use is_kdump_kernel() to detect kdump kernel, instead of reset_devices.

Signed-off-by: Amir Vadai <amirv@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx4/device.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 071f6b2..783dd09 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -38,6 +38,7 @@
 #include <linux/completion.h>
 #include <linux/radix-tree.h>
 #include <linux/cpu_rmap.h>
+#include <linux/crash_dump.h>
 
 #include <linux/atomic.h>
 
@@ -1275,7 +1276,7 @@ int mlx4_mr_rereg_mem_write(struct mlx4_dev *dev, struct mlx4_mr *mr,
 /* Returns true if running in low memory profile (kdump kernel) */
 static inline bool mlx4_low_memory_profile(void)
 {
-	return reset_devices;
+	return is_kdump_kernel();
 }
 
 #endif /* MLX4_DEVICE_H */
-- 
cgit v1.1


From 0b725a2ca61bedc33a2a63d0451d528b268cf975 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 25 Aug 2014 15:51:53 -0700
Subject: net: Remove ndo_xmit_flush netdev operation, use signalling instead.

As reported by Jesper Dangaard Brouer, for high packet rates the
overhead of having another indirect call in the TX path is
non-trivial.

There is the indirect call itself, and then there is all of the
reloading of the state to refetch the tail pointer value and
then write the device register.

Move to a more passive scheme, which requires very light modifications
to the device drivers.

The signal is a new skb->xmit_more value, if it is non-zero it means
that more SKBs are pending to be transmitted on the same queue as the
current SKB.  And therefore, the driver may elide the tail pointer
update.

Right now skb->xmit_more is always zero.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 25 ++-----------------------
 include/linux/skbuff.h    |  2 ++
 2 files changed, 4 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 220c509..039b237 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -782,19 +782,6 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
  *        (can also return NETDEV_TX_LOCKED iff NETIF_F_LLTX)
  *	Required can not be NULL.
  *
- * void (*ndo_xmit_flush)(struct net_device *dev, u16 queue);
- *	A driver implements this function when it wishes to support
- *	deferred TX queue flushing.  The idea is that the expensive
- *	operation to trigger TX queue processing can be done after
- *	N calls to ndo_start_xmit rather than being done every single
- *	time.  In this regime ndo_start_xmit will be called one or more
- *	times, and then a final ndo_xmit_flush call will be made to
- *	have the driver tell the device about the new pending TX queue
- *	entries.  The kernel keeps track of which queues need flushing
- *	by monitoring skb->queue_mapping of the packets it submits to
- *	ndo_start_xmit.  This is the queue value that will be passed
- *	to ndo_xmit_flush.
- *
  * u16 (*ndo_select_queue)(struct net_device *dev, struct sk_buff *skb,
  *                         void *accel_priv, select_queue_fallback_t fallback);
  *	Called to decide which queue to when device supports multiple
@@ -1018,7 +1005,6 @@ struct net_device_ops {
 	int			(*ndo_stop)(struct net_device *dev);
 	netdev_tx_t		(*ndo_start_xmit) (struct sk_buff *skb,
 						   struct net_device *dev);
-	void			(*ndo_xmit_flush)(struct net_device *dev, u16 queue);
 	u16			(*ndo_select_queue)(struct net_device *dev,
 						    struct sk_buff *skb,
 						    void *accel_priv,
@@ -3447,15 +3433,8 @@ int __init dev_proc_init(void);
 static inline netdev_tx_t __netdev_start_xmit(const struct net_device_ops *ops,
 					      struct sk_buff *skb, struct net_device *dev)
 {
-	netdev_tx_t ret;
-	u16 q;
-
-	q = skb->queue_mapping;
-	ret = ops->ndo_start_xmit(skb, dev);
-	if (dev_xmit_complete(ret) && ops->ndo_xmit_flush)
-		ops->ndo_xmit_flush(dev, q);
-
-	return ret;
+	skb->xmit_more = 0;
+	return ops->ndo_start_xmit(skb, dev);
 }
 
 static inline netdev_tx_t netdev_start_xmit(struct sk_buff *skb, struct net_device *dev)
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 18ddf96..9b3802a 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -452,6 +452,7 @@ static inline u32 skb_mstamp_us_delta(const struct skb_mstamp *t1,
  *	@tc_verd: traffic control verdict
  *	@hash: the packet hash
  *	@queue_mapping: Queue mapping for multiqueue devices
+ *	@xmit_more: More SKBs are pending for this queue
  *	@ndisc_nodetype: router type (from link layer)
  *	@ooo_okay: allow the mapping of a socket to a queue to be changed
  *	@l4_hash: indicate hash is a canonical 4-tuple hash over transport
@@ -558,6 +559,7 @@ struct sk_buff {
 
 	__u16			queue_mapping;
 	kmemcheck_bitfield_begin(flags2);
+	__u8			xmit_more:1;
 #ifdef CONFIG_IPV6_NDISC_NODETYPE
 	__u8			ndisc_nodetype:2;
 #endif
-- 
cgit v1.1


From 453a940ea725d692282f9e66475cec0d1b1e12f2 Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Mon, 25 Aug 2014 17:03:47 -0700
Subject: net: make skb an optional parameter for__skb_flow_dissect()

Fixes: commit 690e36e726d00d2 (net: Allow raw buffers to be passed into the flow dissector)
Cc: David S. Miller <davem@davemloft.net>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/flow_keys.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/flow_keys.h b/include/net/flow_keys.h
index 4040f63..9a03f73 100644
--- a/include/net/flow_keys.h
+++ b/include/net/flow_keys.h
@@ -28,10 +28,10 @@ struct flow_keys {
 };
 
 bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow,
-			void *data, int hlen);
+			void *data, __be16 proto, int nhoff, int hlen);
 static inline bool skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow)
 {
-	return __skb_flow_dissect(skb, flow, NULL, 0);
+	return __skb_flow_dissect(skb, flow, NULL, 0, 0, 0);
 }
 __be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto,
 			    void *data, int hlen_proto);
-- 
cgit v1.1


From 970fdfa89babb5a6f1a3d345e8cb54d92c1e3a8f Mon Sep 17 00:00:00 2001
From: Vladimir Kondratiev <qca_vkondrat@qca.qualcomm.com>
Date: Mon, 11 Aug 2014 03:29:57 -0700
Subject: cfg80211: remove @gfp parameter from cfg80211_rx_mgmt()

In the cfg80211_rx_mgmt(), parameter @gfp was used for the memory allocation.
But, memory get allocated under spin_lock_bh(), this implies atomic context.
So, one can't use GFP_KERNEL, only variants with no __GFP_WAIT. Actually, in all
occurrences GFP_ATOMIC is used (wil6210 use GFP_KERNEL by mistake),
and it should be this way or warning triggered in the memory allocation code.

Remove @gfp parameter as no actual choice exist, and use hard coded
GFP_ATOMIC for memory allocation.

Signed-off-by: Vladimir Kondratiev <qca_vkondrat@qca.qualcomm.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 0a080c4..7b8dac3 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -4412,7 +4412,6 @@ void cfg80211_conn_failed(struct net_device *dev, const u8 *mac_addr,
  * @buf: Management frame (header + body)
  * @len: length of the frame data
  * @flags: flags, as defined in enum nl80211_rxmgmt_flags
- * @gfp: context flags
  *
  * This function is called whenever an Action frame is received for a station
  * mode interface, but is not processed in kernel.
@@ -4423,7 +4422,7 @@ void cfg80211_conn_failed(struct net_device *dev, const u8 *mac_addr,
  * driver is responsible for rejecting the frame.
  */
 bool cfg80211_rx_mgmt(struct wireless_dev *wdev, int freq, int sig_dbm,
-		      const u8 *buf, size_t len, u32 flags, gfp_t gfp);
+		      const u8 *buf, size_t len, u32 flags);
 
 /**
  * cfg80211_mgmt_tx_status - notification of TX status for management frame
-- 
cgit v1.1


From 170fd0b1f6108b48df4369afa0ee29a83e922748 Mon Sep 17 00:00:00 2001
From: Andrei Otcheretianski <andrei.otcheretianski@intel.com>
Date: Wed, 30 Jul 2014 14:36:18 +0300
Subject: ieee80211: Support parsing TPC report element in action frames

TPC report element is contained in spectrum management's tpc report
action frames and in radio measurement's link measurement report
action frames. Add a function which checks whether an action frame
contains this element. This may be needed by the drivers in order
to set the correct tx power value in these frames.

Signed-off-by: Andrei Otcheretianski <andrei.otcheretianski@intel.com>
Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 65 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 65 insertions(+)

(limited to 'include')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 63ab3873..8018c91 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -838,6 +838,16 @@ enum ieee80211_vht_opmode_bits {
 
 #define WLAN_SA_QUERY_TR_ID_LEN 2
 
+/**
+ * struct ieee80211_tpc_report_ie
+ *
+ * This structure refers to "TPC Report element"
+ */
+struct ieee80211_tpc_report_ie {
+	u8 tx_power;
+	u8 link_margin;
+} __packed;
+
 struct ieee80211_mgmt {
 	__le16 frame_control;
 	__le16 duration;
@@ -973,6 +983,13 @@ struct ieee80211_mgmt {
 					u8 action_code;
 					u8 operating_mode;
 				} __packed vht_opmode_notif;
+				struct {
+					u8 action_code;
+					u8 dialog_token;
+					u8 tpc_elem_id;
+					u8 tpc_elem_length;
+					struct ieee80211_tpc_report_ie tpc;
+				} __packed tpc_report;
 			} u;
 		} __packed action;
 	} u;
@@ -1865,6 +1882,7 @@ enum ieee80211_category {
 	WLAN_CATEGORY_DLS = 2,
 	WLAN_CATEGORY_BACK = 3,
 	WLAN_CATEGORY_PUBLIC = 4,
+	WLAN_CATEGORY_RADIO_MEASUREMENT = 5,
 	WLAN_CATEGORY_HT = 7,
 	WLAN_CATEGORY_SA_QUERY = 8,
 	WLAN_CATEGORY_PROTECTED_DUAL_OF_ACTION = 9,
@@ -2378,4 +2396,51 @@ static inline bool ieee80211_check_tim(const struct ieee80211_tim_ie *tim,
 #define TU_TO_JIFFIES(x)	(usecs_to_jiffies((x) * 1024))
 #define TU_TO_EXP_TIME(x)	(jiffies + TU_TO_JIFFIES(x))
 
+/**
+ * ieee80211_action_contains_tpc - checks if the frame contains TPC element
+ * @skb: the skb containing the frame, length will be checked
+ *
+ * This function checks if it's either TPC report action frame or Link
+ * Measurement report action frame as defined in IEEE Std. 802.11-2012 8.5.2.5
+ * and 8.5.7.5 accordingly.
+ */
+static inline bool ieee80211_action_contains_tpc(struct sk_buff *skb)
+{
+	struct ieee80211_mgmt *mgmt = (void *)skb->data;
+
+	if (!ieee80211_is_action(mgmt->frame_control))
+		return false;
+
+	if (skb->len < IEEE80211_MIN_ACTION_SIZE +
+		       sizeof(mgmt->u.action.u.tpc_report))
+		return false;
+
+	/*
+	 * TPC report - check that:
+	 * category = 0 (Spectrum Management) or 5 (Radio Measurement)
+	 * spectrum management action = 3 (TPC/Link Measurement report)
+	 * TPC report EID = 35
+	 * TPC report element length = 2
+	 *
+	 * The spectrum management's tpc_report struct is used here both for
+	 * parsing tpc_report and radio measurement's link measurement report
+	 * frame, since the relevant part is identical in both frames.
+	 */
+	if (mgmt->u.action.category != WLAN_CATEGORY_SPECTRUM_MGMT &&
+	    mgmt->u.action.category != WLAN_CATEGORY_RADIO_MEASUREMENT)
+		return false;
+
+	/* both spectrum mgmt and link measurement have same action code */
+	if (mgmt->u.action.u.tpc_report.action_code !=
+	    WLAN_ACTION_SPCT_TPC_RPRT)
+		return false;
+
+	if (mgmt->u.action.u.tpc_report.tpc_elem_id != WLAN_EID_TPC_REPORT ||
+	    mgmt->u.action.u.tpc_report.tpc_elem_length !=
+	    sizeof(struct ieee80211_tpc_report_ie))
+		return false;
+
+	return true;
+}
+
 #endif /* LINUX_IEEE80211_H */
-- 
cgit v1.1


From ca34e3b5c808385b175650605faa29e71e91991b Mon Sep 17 00:00:00 2001
From: Ido Yariv <ido@wizery.com>
Date: Tue, 29 Jul 2014 15:38:53 +0300
Subject: mac80211: Fix accounting of the tailroom-needed counter

When hw acceleration is enabled, the GENERATE_IV or PUT_IV_SPACE flags
will only require headroom space. Consequently, the tailroom-needed
counter can safely be decremented.

Signed-off-by: Ido Yariv <idox.yariv@intel.com>
Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 1cd8444..1fbed0a 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1226,7 +1226,8 @@ struct ieee80211_vif *wdev_to_ieee80211_vif(struct wireless_dev *wdev);
  *
  * @IEEE80211_KEY_FLAG_GENERATE_IV: This flag should be set by the
  *	driver to indicate that it requires IV generation for this
- *	particular key.
+ *	particular key. Setting this flag does not necessarily mean that SKBs
+ *	will have sufficient tailroom for ICV or MIC.
  * @IEEE80211_KEY_FLAG_GENERATE_MMIC: This flag should be set by
  *	the driver for a TKIP key if it requires Michael MIC
  *	generation in software.
@@ -1238,7 +1239,9 @@ struct ieee80211_vif *wdev_to_ieee80211_vif(struct wireless_dev *wdev);
  * @IEEE80211_KEY_FLAG_PUT_IV_SPACE: This flag should be set by the driver
  *	if space should be prepared for the IV, but the IV
  *	itself should not be generated. Do not set together with
- *	@IEEE80211_KEY_FLAG_GENERATE_IV on the same key.
+ *	@IEEE80211_KEY_FLAG_GENERATE_IV on the same key. Setting this flag does
+ *	not necessarily mean that SKBs will have sufficient tailroom for ICV or
+ *	MIC.
  * @IEEE80211_KEY_FLAG_RX_MGMT: This key will be used to decrypt received
  *	management frames. The flag can help drivers that have a hardware
  *	crypto implementation that doesn't deal with management frames
-- 
cgit v1.1


From c70f59a2a007c57843195a93c3b7308204e0a5ab Mon Sep 17 00:00:00 2001
From: Ido Yariv <ido@wizery.com>
Date: Tue, 29 Jul 2014 15:39:14 +0300
Subject: mac80211: don't resize skbs needlessly

Header-less cloned skbs with sufficient headroom need not be cloned
unless the tailroom is going to be modified.

Fix ieee80211_skb_resize so it would only resize cloned skbs if either
the header isn't released or the tailroom is going to be modified.

Some drivers might have assumed that skbs are never cloned, so add a HW
flag that explicitly permits cloned TX skbs. Drivers which do not modify
TX skbs should set this flag to avoid copying skbs.

Signed-off-by: Ido Yariv <idox.yariv@intel.com>
Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 1fbed0a..c9b2bec 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1609,6 +1609,9 @@ struct ieee80211_tx_control {
  *	is not enabled the default action is to disconnect when getting the
  *	CSA frame.
  *
+ * @IEEE80211_HW_SUPPORTS_CLONED_SKBS: The driver will never modify the payload
+ *	or tailroom of TX skbs without copying them first.
+ *
  * @IEEE80211_SINGLE_HW_SCAN_ON_ALL_BANDS: The HW supports scanning on all bands
  *	in one command, mac80211 doesn't have to run separate scans per band.
  */
@@ -1642,7 +1645,7 @@ enum ieee80211_hw_flags {
 	IEEE80211_HW_TIMING_BEACON_ONLY			= 1<<26,
 	IEEE80211_HW_SUPPORTS_HT_CCK_RATES		= 1<<27,
 	IEEE80211_HW_CHANCTX_STA_CSA			= 1<<28,
-	/* bit 29 unused */
+	IEEE80211_HW_SUPPORTS_CLONED_SKBS		= 1<<29,
 	IEEE80211_SINGLE_HW_SCAN_ON_ALL_BANDS		= 1<<30,
 };
 
-- 
cgit v1.1


From 0e227084aee36b3ba27b4fc9cd9e425be6ce2ab8 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 12 Aug 2014 20:34:30 +0200
Subject: cfg80211: clarify BSS probe response vs. beacon data

There are a few possible cases of where BSS data came from:
 1) only a beacon has been received
 2) only a probe response has been received
 3) the driver didn't report what it received (this happens when
    using cfg80211_inform_bss[_width]())
 4) both probe response and beacon data has been received

Unfortunately, in the userspace API, a few things weren't there:
 a) there was no way to differentiate cases 1) and 4) above
    without comparing the data of the IEs
 b) the TSF was always from the last frame, instead of being
    exposed for beacon/probe response separately like IEs

Fix this by
   i) exporting a new flag attribute that indicates whether or
      not probe response data has been received - this addresses (a)
  ii) exporting a BEACON_TSF attribute that holds the beacon's TSF
      if a beacon has been received
 iii) not exporting the beacon attributes in case (3) above as that
      would just lead userspace into thinking the data actually came
      from a beacon when that isn't clear

To implement this, track inside the IEs struct whether or not it
(definitely) came from a beacon.

Reported-by: William Seto
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       |  2 ++
 include/uapi/linux/nl80211.h | 16 ++++++++++++++--
 2 files changed, 16 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 7b8dac3..77b85a8 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1503,12 +1503,14 @@ enum cfg80211_signal_type {
  * @tsf: TSF contained in the frame that carried these IEs
  * @rcu_head: internal use, for freeing
  * @len: length of the IEs
+ * @from_beacon: these IEs are known to come from a beacon
  * @data: IE data
  */
 struct cfg80211_bss_ies {
 	u64 tsf;
 	struct rcu_head rcu_head;
 	int len;
+	bool from_beacon;
 	u8 data[];
 };
 
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index f1db15b..d097568 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -3055,14 +3055,20 @@ enum nl80211_bss_scan_width {
  * @NL80211_BSS_BSSID: BSSID of the BSS (6 octets)
  * @NL80211_BSS_FREQUENCY: frequency in MHz (u32)
  * @NL80211_BSS_TSF: TSF of the received probe response/beacon (u64)
+ *	(if @NL80211_BSS_PRESP_DATA is present then this is known to be
+ *	from a probe response, otherwise it may be from the same beacon
+ *	that the NL80211_BSS_BEACON_TSF will be from)
  * @NL80211_BSS_BEACON_INTERVAL: beacon interval of the (I)BSS (u16)
  * @NL80211_BSS_CAPABILITY: capability field (CPU order, u16)
  * @NL80211_BSS_INFORMATION_ELEMENTS: binary attribute containing the
  *	raw information elements from the probe response/beacon (bin);
- *	if the %NL80211_BSS_BEACON_IES attribute is present, the IEs here are
- *	from a Probe Response frame; otherwise they are from a Beacon frame.
+ *	if the %NL80211_BSS_BEACON_IES attribute is present and the data is
+ *	different then the IEs here are from a Probe Response frame; otherwise
+ *	they are from a Beacon frame.
  *	However, if the driver does not indicate the source of the IEs, these
  *	IEs may be from either frame subtype.
+ *	If present, the @NL80211_BSS_PRESP_DATA attribute indicates that the
+ *	data here is known to be from a probe response, without any heuristics.
  * @NL80211_BSS_SIGNAL_MBM: signal strength of probe response/beacon
  *	in mBm (100 * dBm) (s32)
  * @NL80211_BSS_SIGNAL_UNSPEC: signal strength of the probe response/beacon
@@ -3074,6 +3080,10 @@ enum nl80211_bss_scan_width {
  *	yet been received
  * @NL80211_BSS_CHAN_WIDTH: channel width of the control channel
  *	(u32, enum nl80211_bss_scan_width)
+ * @NL80211_BSS_BEACON_TSF: TSF of the last received beacon (u64)
+ *	(not present if no beacon frame has been received yet)
+ * @NL80211_BSS_PRESP_DATA: the data in @NL80211_BSS_INFORMATION_ELEMENTS and
+ *	@NL80211_BSS_TSF is known to be from a probe response (flag attribute)
  * @__NL80211_BSS_AFTER_LAST: internal
  * @NL80211_BSS_MAX: highest BSS attribute
  */
@@ -3091,6 +3101,8 @@ enum nl80211_bss {
 	NL80211_BSS_SEEN_MS_AGO,
 	NL80211_BSS_BEACON_IES,
 	NL80211_BSS_CHAN_WIDTH,
+	NL80211_BSS_BEACON_TSF,
+	NL80211_BSS_PRESP_DATA,
 
 	/* keep last */
 	__NL80211_BSS_AFTER_LAST,
-- 
cgit v1.1


From 5bc8c1f2b070bab82ed738f98ecfac725e33c57f Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 12 Aug 2014 21:01:28 +0200
Subject: cfg80211: allow passing frame type to cfg80211_inform_bss()

When using the cfg80211_inform_bss[_width]() functions drivers
cannot currently indicate whether the data was received in a
beacon or probe response. Fix that by passing a new enum that
indicates such (or unknown).

For good measure, use it in ath6kl.

Acked-by: Kalle Valo <kvalo@qca.qualcomm.com> [ath6kl]
Acked-by: Arend van Spriel <arend@broadcom.com> [brcmfmac]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 77b85a8..ab21299 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -3767,11 +3767,25 @@ cfg80211_inform_bss_frame(struct wiphy *wiphy,
 }
 
 /**
- * cfg80211_inform_bss - inform cfg80211 of a new BSS
+ * enum cfg80211_bss_frame_type - frame type that the BSS data came from
+ * @CFG80211_BSS_FTYPE_UNKNOWN: driver doesn't know whether the data is
+ *	from a beacon or probe response
+ * @CFG80211_BSS_FTYPE_BEACON: data comes from a beacon
+ * @CFG80211_BSS_FTYPE_PRESP: data comes from a probe response
+ */
+enum cfg80211_bss_frame_type {
+	CFG80211_BSS_FTYPE_UNKNOWN,
+	CFG80211_BSS_FTYPE_BEACON,
+	CFG80211_BSS_FTYPE_PRESP,
+};
+
+/**
+ * cfg80211_inform_bss_width - inform cfg80211 of a new BSS
  *
  * @wiphy: the wiphy reporting the BSS
  * @rx_channel: The channel the frame was received on
  * @scan_width: width of the control channel
+ * @ftype: frame type (if known)
  * @bssid: the BSSID of the BSS
  * @tsf: the TSF sent by the peer in the beacon/probe response (or 0)
  * @capability: the capability field sent by the peer
@@ -3791,6 +3805,7 @@ struct cfg80211_bss * __must_check
 cfg80211_inform_bss_width(struct wiphy *wiphy,
 			  struct ieee80211_channel *rx_channel,
 			  enum nl80211_bss_scan_width scan_width,
+			  enum cfg80211_bss_frame_type ftype,
 			  const u8 *bssid, u64 tsf, u16 capability,
 			  u16 beacon_interval, const u8 *ie, size_t ielen,
 			  s32 signal, gfp_t gfp);
@@ -3798,12 +3813,13 @@ cfg80211_inform_bss_width(struct wiphy *wiphy,
 static inline struct cfg80211_bss * __must_check
 cfg80211_inform_bss(struct wiphy *wiphy,
 		    struct ieee80211_channel *rx_channel,
+		    enum cfg80211_bss_frame_type ftype,
 		    const u8 *bssid, u64 tsf, u16 capability,
 		    u16 beacon_interval, const u8 *ie, size_t ielen,
 		    s32 signal, gfp_t gfp)
 {
 	return cfg80211_inform_bss_width(wiphy, rx_channel,
-					 NL80211_BSS_CHAN_WIDTH_20,
+					 NL80211_BSS_CHAN_WIDTH_20, ftype,
 					 bssid, tsf, capability,
 					 beacon_interval, ie, ielen, signal,
 					 gfp);
-- 
cgit v1.1


From f111f780ae1abf4cdc464f24293be90c010a04f6 Mon Sep 17 00:00:00 2001
From: Alexey Perevalov <a.perevalov@samsung.com>
Date: Wed, 20 Aug 2014 22:03:18 +0400
Subject: netfilter: nfnetlink_acct: add filter support to nfacct counter
 list/reset

You can use this to skip accounting objects when listing/resetting
via NFNL_MSG_ACCT_GET/NFNL_MSG_ACCT_GET_CTRZERO messages with the
NLM_F_DUMP netlink flag. The filtering covers the following cases:

1. No filter specified. In this case, the client will get old behaviour,
2. List/reset counter object only: In this case, you have to use
   NFACCT_F_QUOTA as mask and value 0.
3. List/reset quota objects only: You have to use NFACCT_F_QUOTA_PKTS
   as mask and value - the same, for byte based quota mask should be
   NFACCT_F_QUOTA_BYTES and value - the same.

If you want to obtain the object with any quota type
(ie. NFACCT_F_QUOTA_PKTS|NFACCT_F_QUOTA_BYTES), you need to perform
two dump requests, one to obtain NFACCT_F_QUOTA_PKTS objects and
another for NFACCT_F_QUOTA_BYTES.

Signed-off-by: Alexey Perevalov <a.perevalov@samsung.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/nfnetlink_acct.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/netfilter/nfnetlink_acct.h b/include/uapi/linux/netfilter/nfnetlink_acct.h
index 51404ec..f3e34db 100644
--- a/include/uapi/linux/netfilter/nfnetlink_acct.h
+++ b/include/uapi/linux/netfilter/nfnetlink_acct.h
@@ -28,9 +28,17 @@ enum nfnl_acct_type {
 	NFACCT_USE,
 	NFACCT_FLAGS,
 	NFACCT_QUOTA,
+	NFACCT_FILTER,
 	__NFACCT_MAX
 };
 #define NFACCT_MAX (__NFACCT_MAX - 1)
 
+enum nfnl_attr_filter_type {
+	NFACCT_FILTER_UNSPEC,
+	NFACCT_FILTER_MASK,
+	NFACCT_FILTER_VALUE,
+	__NFACCT_FILTER_MAX
+};
+#define NFACCT_FILTER_MAX (__NFACCT_FILTER_MAX - 1)
 
 #endif /* _UAPI_NFNL_ACCT_H_ */
-- 
cgit v1.1


From 5c21403d74af2c9cd635a34c2f9199681a5b813e Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 27 Aug 2014 14:39:04 -0700
Subject: net: Update sk_buff flag bit availability comment.

We lost one when xmit_more was added.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 9b3802a..b69b7b5 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -580,7 +580,7 @@ struct sk_buff {
 	__u8			encap_hdr_csum:1;
 	__u8			csum_valid:1;
 	__u8			csum_complete_sw:1;
-	/* 2/4 bit hole (depending on ndisc_nodetype presence) */
+	/* 1/3 bit hole (depending on ndisc_nodetype presence) */
 	kmemcheck_bitfield_end(flags2);
 
 #if defined CONFIG_NET_DMA || defined CONFIG_NET_RX_BUSY_POLL
-- 
cgit v1.1


From 3e8a72d1dae374cf6fc1dba97cec663585845ff9 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Wed, 27 Aug 2014 17:04:46 -0700
Subject: net: dsa: reduce number of protocol hooks

DSA is currently registering one packet_type function per EtherType it
needs to intercept in the receive path of a DSA-enabled Ethernet device.
Right now we have three of them: trailer, DSA and eDSA, and there might
be more in the future, this will not scale to the addition of new
protocols.

This patch proceeds with adding a new layer of abstraction and two new
functions:

dsa_switch_rcv() which will dispatch into the tag-protocol specific
receive function implemented by net/dsa/tag_*.c

dsa_slave_xmit() which will dispatch into the tag-protocol specific
transmit function implemented by net/dsa/tag_*.c

When we do create the per-port slave network devices, we iterate over
the switch protocol to assign the DSA-specific receive and transmit
operations.

A new fake ethertype value is used: ETH_P_XDSA to illustrate the fact
that this is no longer going to look like ETH_P_DSA or ETH_P_TRAILER
like it used to be.

This allows us to greatly simplify the check in eth_type_trans() and
always override the skb->protocol with ETH_P_XDSA for Ethernet switches
tagged protocol, while also reducing the number repetitive slave
netdevice_ops assignments.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h     | 28 ++++++++++++----------------
 include/net/dsa.h             | 20 +++-----------------
 include/uapi/linux/if_ether.h |  1 +
 3 files changed, 16 insertions(+), 33 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 039b237..1875dc7 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1781,24 +1781,13 @@ void dev_net_set(struct net_device *dev, struct net *net)
 #endif
 }
 
-static inline bool netdev_uses_dsa_tags(struct net_device *dev)
+static inline bool netdev_uses_dsa(struct net_device *dev)
 {
-#ifdef CONFIG_NET_DSA_TAG_DSA
-	if (dev->dsa_ptr != NULL)
-		return dsa_uses_dsa_tags(dev->dsa_ptr);
-#endif
-
-	return 0;
-}
-
-static inline bool netdev_uses_trailer_tags(struct net_device *dev)
-{
-#ifdef CONFIG_NET_DSA_TAG_TRAILER
-	if (dev->dsa_ptr != NULL)
-		return dsa_uses_trailer_tags(dev->dsa_ptr);
+#ifdef CONFIG_NET_DSA
+	return dev->dsa_ptr != NULL;
+#else
+	return false;
 #endif
-
-	return 0;
 }
 
 /**
@@ -1933,6 +1922,13 @@ struct udp_offload {
 	struct offload_callbacks callbacks;
 };
 
+struct dsa_device_ops {
+	netdev_tx_t (*xmit)(struct sk_buff *skb, struct net_device *dev);
+	int (*rcv)(struct sk_buff *skb, struct net_device *dev,
+		   struct packet_type *pt, struct net_device *orig_dev);
+};
+
+
 /* often modified stats are per cpu, other are shared (netdev->stats) */
 struct pcpu_sw_netstats {
 	u64     rx_packets;
diff --git a/include/net/dsa.h b/include/net/dsa.h
index 6efce38..6e26f1e 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -59,6 +59,8 @@ struct dsa_platform_data {
 	struct dsa_chip_data	*chip;
 };
 
+struct dsa_device_ops;
+
 struct dsa_switch_tree {
 	/*
 	 * Configuration data for the platform device that owns
@@ -71,6 +73,7 @@ struct dsa_switch_tree {
 	 * protocol to use.
 	 */
 	struct net_device	*master_netdev;
+	const struct dsa_device_ops	*ops;
 	__be16			tag_protocol;
 
 	/*
@@ -186,21 +189,4 @@ static inline void *ds_to_priv(struct dsa_switch *ds)
 	return (void *)(ds + 1);
 }
 
-/*
- * The original DSA tag format and some other tag formats have no
- * ethertype, which means that we need to add a little hack to the
- * networking receive path to make sure that received frames get
- * the right ->protocol assigned to them when one of those tag
- * formats is in use.
- */
-static inline bool dsa_uses_dsa_tags(struct dsa_switch_tree *dst)
-{
-	return !!(dst->tag_protocol == htons(ETH_P_DSA));
-}
-
-static inline bool dsa_uses_trailer_tags(struct dsa_switch_tree *dst)
-{
-	return !!(dst->tag_protocol == htons(ETH_P_TRAILER));
-}
-
 #endif
diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h
index 0f8210b..aa63ed0 100644
--- a/include/uapi/linux/if_ether.h
+++ b/include/uapi/linux/if_ether.h
@@ -128,6 +128,7 @@
 #define ETH_P_PHONET	0x00F5		/* Nokia Phonet frames          */
 #define ETH_P_IEEE802154 0x00F6		/* IEEE802.15.4 frame		*/
 #define ETH_P_CAIF	0x00F7		/* ST-Ericsson CAIF protocol	*/
+#define ETH_P_XDSA	0x00F8		/* Multiplexed DSA protocol	*/
 
 /*
  *	This is an Ethernet frame header.
-- 
cgit v1.1


From 464c3668f065baeacfffa9d421959d21069389fe Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Wed, 27 Aug 2014 17:04:48 -0700
Subject: net: phy: provide stub for fixed_phy_set_link_update

In preparation for updating the DSA code and avoid using ifdefs there,
provide an empty stub for fixed_phy_set_link_update when
CONFIG_FIXED_PHY is not selected.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/phy_fixed.h | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/phy_fixed.h b/include/linux/phy_fixed.h
index ae612ac..9411386 100644
--- a/include/linux/phy_fixed.h
+++ b/include/linux/phy_fixed.h
@@ -18,6 +18,9 @@ extern int fixed_phy_register(unsigned int irq,
 			      struct fixed_phy_status *status,
 			      struct device_node *np);
 extern void fixed_phy_del(int phy_addr);
+extern int fixed_phy_set_link_update(struct phy_device *phydev,
+			int (*link_update)(struct net_device *,
+					   struct fixed_phy_status *));
 #else
 static inline int fixed_phy_add(unsigned int irq, int phy_id,
 				struct fixed_phy_status *status)
@@ -34,14 +37,12 @@ static inline int fixed_phy_del(int phy_addr)
 {
 	return -ENODEV;
 }
-#endif /* CONFIG_FIXED_PHY */
-
-/*
- * This function issued only by fixed_phy-aware drivers, no need
- * protect it with #ifdef
- */
-extern int fixed_phy_set_link_update(struct phy_device *phydev,
+static inline int fixed_phy_set_link_update(struct phy_device *phydev,
 			int (*link_update)(struct net_device *,
-					   struct fixed_phy_status *));
+					   struct fixed_phy_status *))
+{
+	return -ENODEV;
+}
+#endif /* CONFIG_FIXED_PHY */
 
 #endif /* __PHY_FIXED_H */
-- 
cgit v1.1


From fa981d9af82e08f316ed25ed43078f995cc4be0a Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Wed, 27 Aug 2014 17:04:49 -0700
Subject: net: dsa: provide a switch device device tree node pointer

We might need to fetch additional resources from the device tree node
pointer, such as register ranges or other properties. Keep a device_node
pointer around for this.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index 6e26f1e..decc627 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -15,6 +15,7 @@
 #include <linux/list.h>
 #include <linux/timer.h>
 #include <linux/workqueue.h>
+#include <linux/of.h>
 
 #define DSA_MAX_SWITCHES	4
 #define DSA_MAX_PORTS		12
@@ -26,6 +27,12 @@ struct dsa_chip_data {
 	struct device	*mii_bus;
 	int		sw_addr;
 
+	/* Device tree node pointer for this specific switch chip
+	 * used during switch setup in case additional properties
+	 * and resources needs to be used
+	 */
+	struct device_node *of_node;
+
 	/*
 	 * The names of the switch's ports.  Use "cpu" to
 	 * designate the switch port that the cpu is connected to,
-- 
cgit v1.1


From bd47497a0171b96264927e3377254db13b9fe3e3 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Wed, 27 Aug 2014 17:04:50 -0700
Subject: net: dsa: retain a per-port device_node pointer

We will later use the per-port device_node pointer to fetch a bunch of
port-specific properties.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index decc627..597875d 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -41,6 +41,7 @@ struct dsa_chip_data {
 	 * or any other string to indicate this is a physical port.
 	 */
 	char		*port_names[DSA_MAX_PORTS];
+	struct device_node *port_dn[DSA_MAX_PORTS];
 
 	/*
 	 * An array (with nr_chips elements) of which element [a]
-- 
cgit v1.1


From 0d8bcdd383b8865e752a7e8edb4712c2e3902052 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Wed, 27 Aug 2014 17:04:51 -0700
Subject: net: dsa: allow for more complex PHY setups

Modify the DSA slave interface to be bound to an arbitray PHY, not just
the ones that are available as child PHY devices of the switch MDIO bus.

This allows us for instance to have external PHYs connected to a
separate MDIO bus, but yet also connected to a given switch port.

Under certain configurations, the physical port mask might not be a 1:1
mapping to the MII PHYs mask. This is the case, if e.g: Port 1 of the
switch is used and connects to a PHY at a MDIO address different than 1.

Introduce a phys_mii_mask variable which allows driver to implement and
divert their own MDIO read/writes operations for a subset of the MDIO
PHY addresses.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index 597875d..dc35745 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -130,6 +130,7 @@ struct dsa_switch {
 	 */
 	u32			dsa_port_mask;
 	u32			phys_port_mask;
+	u32			phys_mii_mask;
 	struct mii_bus		*slave_mii_bus;
 	struct net_device	*ports[DSA_MAX_PORTS];
 };
-- 
cgit v1.1


From 5aed85cec29882d1c4b4b2a01cb75a99efdbe4ed Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Wed, 27 Aug 2014 17:04:52 -0700
Subject: net: dsa: allow switches to work without tagging

In case switch port tagging is disabled (voluntarily, or the switch just
does not support it), allow us to continue using the defined set of
dsa_device_ops in net/dsa/slave.c.

We introduce dsa_protocol_is_tagged() to check whether we need to
override skb->protocol and go through the DSA-specifif packet_type
function, or if we just go on and receive the SKB through the normal
path.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 6 +++---
 include/net/dsa.h         | 5 +++++
 2 files changed, 8 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 1875dc7..4298013 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1784,10 +1784,10 @@ void dev_net_set(struct net_device *dev, struct net *net)
 static inline bool netdev_uses_dsa(struct net_device *dev)
 {
 #ifdef CONFIG_NET_DSA
-	return dev->dsa_ptr != NULL;
-#else
-	return false;
+	if (dev->dsa_ptr != NULL)
+		return dsa_uses_tagged_protocol(dev->dsa_ptr);
 #endif
+	return false;
 }
 
 /**
diff --git a/include/net/dsa.h b/include/net/dsa.h
index dc35745..1035f64 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -198,4 +198,9 @@ static inline void *ds_to_priv(struct dsa_switch *ds)
 	return (void *)(ds + 1);
 }
 
+static inline bool dsa_uses_tagged_protocol(struct dsa_switch_tree *dst)
+{
+	return dst->tag_protocol != 0;
+}
+
 #endif
-- 
cgit v1.1


From ec9436baedb689668c409cfc8b69eb9573b0d661 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Wed, 27 Aug 2014 17:04:53 -0700
Subject: net: dsa: allow drivers to do link adjustment

Whenever libphy determines that the link status of a given PHY/port has
changed, allow to call into the switch driver link adjustment callback
so proper actions can be taken care of by the switch driver upon link
notification.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index 1035f64..2d383592 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -16,6 +16,7 @@
 #include <linux/timer.h>
 #include <linux/workqueue.h>
 #include <linux/of.h>
+#include <linux/phy.h>
 
 #define DSA_MAX_SWITCHES	4
 #define DSA_MAX_PORTS		12
@@ -182,6 +183,12 @@ struct dsa_switch_driver {
 	void	(*poll_link)(struct dsa_switch *ds);
 
 	/*
+	 * Link state adjustment (called from libphy)
+	 */
+	void	(*adjust_link)(struct dsa_switch *ds, int port,
+				struct phy_device *phydev);
+
+	/*
 	 * ethtool hardware statistics.
 	 */
 	void	(*get_strings)(struct dsa_switch *ds, int port, uint8_t *data);
-- 
cgit v1.1


From ce31b31c68e7e39f29b1257581fbd08ce3ca5589 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Wed, 27 Aug 2014 17:04:54 -0700
Subject: net: dsa: allow updating fixed PHY link information

Allow switch drivers to hook a PHY link update callback to perform
port-specific link work.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index 2d383592..2c9563f 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -17,6 +17,7 @@
 #include <linux/workqueue.h>
 #include <linux/of.h>
 #include <linux/phy.h>
+#include <linux/phy_fixed.h>
 
 #define DSA_MAX_SWITCHES	4
 #define DSA_MAX_PORTS		12
@@ -187,6 +188,8 @@ struct dsa_switch_driver {
 	 */
 	void	(*adjust_link)(struct dsa_switch *ds, int port,
 				struct phy_device *phydev);
+	void	(*fixed_link_update)(struct dsa_switch *ds, int port,
+				struct fixed_phy_status *st);
 
 	/*
 	 * ethtool hardware statistics.
-- 
cgit v1.1


From 5037d532b83d7325a2743dffe82882a64697a8e8 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Wed, 27 Aug 2014 17:04:55 -0700
Subject: net: dsa: add Broadcom tag RX/TX handler

Add support for the 4-bytes Broadcom tag that built-in switches such as
the Starfighter 2 might insert when receiving packets, or that we need
to insert while targetting specific switch ports. We use a fake local
EtherType value for this 4-bytes switch tag: ETH_P_BRCMTAG to make sure
we can assign DSA-specific network operations within the DSA drivers.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index 2c9563f..9771292 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -19,6 +19,11 @@
 #include <linux/phy.h>
 #include <linux/phy_fixed.h>
 
+/* Not an official ethertype value, used only internally for DSA
+ * demultiplexing
+ */
+#define ETH_P_BRCMTAG		(ETH_P_XDSA + 1)
+
 #define DSA_MAX_SWITCHES	4
 #define DSA_MAX_PORTS		12
 
-- 
cgit v1.1


From 97fdaab4699de3a2a91001efef60bb0622de1c53 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Tue, 26 Aug 2014 13:15:25 -0700
Subject: net: phy: broadcom: fix PHY_BCM_OUI_4

PHY_BCM_OUI_4 is missing two significant digits that actually make it an
OUI, add those missing bits so it becomes usable again for matching.

Fixes: b560a58c45c6 ("net: phy: add Broadcom BCM7xxx internal PHY driver")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/brcmphy.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index ee1431d9..921f17c 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -21,7 +21,7 @@
 #define PHY_BCM_OUI_1			0x00206000
 #define PHY_BCM_OUI_2			0x0143bc00
 #define PHY_BCM_OUI_3			0x03625c00
-#define PHY_BCM_OUI_4			0x600d0000
+#define PHY_BCM_OUI_4			0x600d8400
 #define PHY_BCM_OUI_5			0x03625e00
 
 
-- 
cgit v1.1


From 11bf2bbd596add62a86a74fc7aedc0b86c6ec154 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Tue, 26 Aug 2014 13:15:26 -0700
Subject: net: phy: broadcom: add new Broadcom OUI

Broadcom started to use a new OUI for its 2013 and newer products:
D4-01-29 which translates into 0xae025000 for a 32-bits OUI, add its
definition.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/brcmphy.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index 921f17c..cbcfad3 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -23,7 +23,7 @@
 #define PHY_BCM_OUI_3			0x03625c00
 #define PHY_BCM_OUI_4			0x600d8400
 #define PHY_BCM_OUI_5			0x03625e00
-
+#define PHY_BCM_OUI_6			0xae025000
 
 #define PHY_BCM_FLAGS_MODE_COPPER	0x00000001
 #define PHY_BCM_FLAGS_MODE_1000BX	0x00000002
-- 
cgit v1.1


From 430ad68ffb5fa632a277162e5995cd6f7a39fb78 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Tue, 26 Aug 2014 13:15:27 -0700
Subject: net: phy: bcm7xxx: add BCM7250 and BCM7364 PHY entries

Add two new entries to the Broadcom BCM7xxx internal PHY driver for
BCM7250 and BCM7364 chips. Those chips share the usual 28nm process
Gigabit PHY sequence and require the same workarounds so far.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/brcmphy.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index cbcfad3..5bd35cc 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -13,6 +13,8 @@
 #define PHY_ID_BCM5461			0x002060c0
 #define PHY_ID_BCM57780			0x03625d90
 
+#define PHY_ID_BCM7250			0xae025280
+#define PHY_ID_BCM7364			0xae025260
 #define PHY_ID_BCM7366			0x600d8490
 #define PHY_ID_BCM7439			0x600d8480
 #define PHY_ID_BCM7445			0x600d8510
-- 
cgit v1.1


From 10c51b56232d24f150e39884a9e749fd99cbc60c Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Wed, 27 Aug 2014 11:11:27 +0200
Subject: net: add skb_get_tx_queue() helper

Replace occurences of skb_get_queue_mapping() and follow-up
netdev_get_tx_queue() with an actual helper function.

Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 4298013..dfc1d8b 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1747,6 +1747,12 @@ struct netdev_queue *netdev_get_tx_queue(const struct net_device *dev,
 	return &dev->_tx[index];
 }
 
+static inline struct netdev_queue *skb_get_tx_queue(const struct net_device *dev,
+						    const struct sk_buff *skb)
+{
+	return netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
+}
+
 static inline void netdev_for_each_tx_queue(struct net_device *dev,
 					    void (*f)(struct net_device *,
 						      struct netdev_queue *,
-- 
cgit v1.1


From 18fe8db5f2b53e4ac67b47048f24f50c57a2a759 Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Thu, 28 Aug 2014 13:44:31 +0200
Subject: include/linux/cycx_x25.h: Remove unused header

The header file include/linux/cycx_x25.h does not seem to be used
anywhere. It was orphaned by 6fcdf4facb "wanrouter: delete now
orphaned header content, files/drivers". Remove it.

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/cycx_x25.h | 125 -----------------------------------------------
 1 file changed, 125 deletions(-)
 delete mode 100644 include/linux/cycx_x25.h

(limited to 'include')

diff --git a/include/linux/cycx_x25.h b/include/linux/cycx_x25.h
deleted file mode 100644
index 362bf19..0000000
--- a/include/linux/cycx_x25.h
+++ /dev/null
@@ -1,125 +0,0 @@
-#ifndef	_CYCX_X25_H
-#define	_CYCX_X25_H
-/*
-* cycx_x25.h	Cyclom X.25 firmware API definitions.
-*
-* Author:	Arnaldo Carvalho de Melo <acme@conectiva.com.br>
-*
-* Copyright:	(c) 1998-2003 Arnaldo Carvalho de Melo
-*
-* Based on sdla_x25.h by Gene Kozin <74604.152@compuserve.com>
-*
-*		This program is free software; you can redistribute it and/or
-*		modify it under the terms of the GNU General Public License
-*		as published by the Free Software Foundation; either version
-*		2 of the License, or (at your option) any later version.
-* ============================================================================
-* 2000/04/02	acme		dprintk and cycx_debug
-* 1999/01/03	acme		judicious use of data types
-* 1999/01/02	acme		#define X25_ACK_N3	0x4411
-* 1998/12/28	acme		cleanup: lot'o'things removed
-*					 commands listed,
-*					 TX25Cmd & TX25Config structs
-*					 typedef'ed
-*/
-#ifndef PACKED
-#define PACKED __attribute__((packed))
-#endif 
-
-/* X.25 shared memory layout. */
-#define	X25_MBOX_OFFS	0x300	/* general mailbox block */
-#define	X25_RXMBOX_OFFS	0x340	/* receive mailbox */
-
-/* Debug */
-#define dprintk(level, format, a...) if (cycx_debug >= level) printk(format, ##a)
-
-extern unsigned int cycx_debug;
-
-/* Data Structures */
-/* X.25 Command Block. */
-struct cycx_x25_cmd {
-	u16 command;
-	u16 link;	/* values: 0 or 1 */
-	u16 len;	/* values: 0 thru 0x205 (517) */
-	u32 buf;
-} PACKED;
-
-/* Defines for the 'command' field. */
-#define X25_CONNECT_REQUEST             0x4401
-#define X25_CONNECT_RESPONSE            0x4402
-#define X25_DISCONNECT_REQUEST          0x4403
-#define X25_DISCONNECT_RESPONSE         0x4404
-#define X25_DATA_REQUEST                0x4405
-#define X25_ACK_TO_VC			0x4406
-#define X25_INTERRUPT_RESPONSE          0x4407
-#define X25_CONFIG                      0x4408
-#define X25_CONNECT_INDICATION          0x4409
-#define X25_CONNECT_CONFIRM             0x440A
-#define X25_DISCONNECT_INDICATION       0x440B
-#define X25_DISCONNECT_CONFIRM          0x440C
-#define X25_DATA_INDICATION             0x440E
-#define X25_INTERRUPT_INDICATION        0x440F
-#define X25_ACK_FROM_VC			0x4410
-#define X25_ACK_N3			0x4411
-#define X25_CONNECT_COLLISION           0x4413
-#define X25_N3WIN                       0x4414
-#define X25_LINE_ON                     0x4415
-#define X25_LINE_OFF                    0x4416
-#define X25_RESET_REQUEST               0x4417
-#define X25_LOG                         0x4500
-#define X25_STATISTIC                   0x4600
-#define X25_TRACE                       0x4700
-#define X25_N2TRACEXC                   0x4702
-#define X25_N3TRACEXC                   0x4703
-
-/**
- *	struct cycx_x25_config - cyclom2x x25 firmware configuration
- *	@link - link number
- *	@speed - line speed
- *	@clock - internal/external
- *	@n2 - # of level 2 retransm.(values: 1 thru FF)
- *	@n2win - level 2 window (values: 1 thru 7)
- *	@n3win - level 3 window (values: 1 thru 7)
- *	@nvc - # of logical channels (values: 1 thru 64)
- *	@pktlen - level 3 packet length - log base 2 of size
- *	@locaddr - my address
- *	@remaddr - remote address
- *	@t1 - time, in seconds
- *	@t2 - time, in seconds
- *	@t21 - time, in seconds
- *	@npvc - # of permanent virt. circuits (1 thru nvc)
- *	@t23 - time, in seconds
- *	@flags - see dosx25.doc, in portuguese, for details
- */
-struct cycx_x25_config {
-	u8  link;
-	u8  speed;
-	u8  clock;
-	u8  n2;
-	u8  n2win;
-	u8  n3win;
-	u8  nvc;
-	u8  pktlen;
-	u8  locaddr;
-	u8  remaddr;
-	u16 t1;
-	u16 t2;
-	u8  t21;
-	u8  npvc;
-	u8  t23;
-	u8  flags;
-} PACKED;
-
-struct cycx_x25_stats {
-	u16 rx_crc_errors;
-	u16 rx_over_errors;
-	u16 n2_tx_frames;
-	u16 n2_rx_frames;
-	u16 tx_timeouts;
-	u16 rx_timeouts;
-	u16 n3_tx_packets;
-	u16 n3_rx_packets;
-	u16 tx_aborts;
-	u16 rx_aborts;
-} PACKED;
-#endif	/* _CYCX_X25_H */
-- 
cgit v1.1


From fbd74659d4513816a6249b0db491e8d831803520 Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Thu, 28 Aug 2014 13:44:32 +0200
Subject: include/linux/i82593.h: Remove unused header

The header file include/linux/i82593.h does not seem to be used
anywhere. It was orphaned by 8a594170 "drivers/net: delete intel
i825xx based znet notebook driver". Remove it.

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/i82593.h | 229 -------------------------------------------------
 1 file changed, 229 deletions(-)
 delete mode 100644 include/linux/i82593.h

(limited to 'include')

diff --git a/include/linux/i82593.h b/include/linux/i82593.h
deleted file mode 100644
index afac5c7..0000000
--- a/include/linux/i82593.h
+++ /dev/null
@@ -1,229 +0,0 @@
-/*
- * Definitions for Intel 82593 CSMA/CD Core LAN Controller
- * The definitions are taken from the 1992 users manual with Intel
- * order number 297125-001.
- *
- * /usr/src/pc/RCS/i82593.h,v 1.1 1996/07/17 15:23:12 root Exp
- *
- * Copyright 1994, Anders Klemets <klemets@it.kth.se>
- *
- * HISTORY
- * i82593.h,v
- * Revision 1.4  2005/11/4  09:15:00  baroniunas
- * Modified copyright with permission of author as follows:
- *
- *   "If I82539.H is the only file with my copyright statement
- *    that is included in the Source Forge project, then you have
- *    my approval to change the copyright statement to be a GPL
- *    license, in the way you proposed on October 10."
- *
- * Revision 1.1  1996/07/17 15:23:12  root
- * Initial revision
- *
- * Revision 1.3  1995/04/05  15:13:58  adj
- * Initial alpha release
- *
- * Revision 1.2  1994/06/16  23:57:31  klemets
- * Mirrored all the fields in the configuration block.
- *
- * Revision 1.1  1994/06/02  20:25:34  klemets
- * Initial revision
- *
- *
- */
-#ifndef	_I82593_H
-#define	_I82593_H
-
-/* Intel 82593 CSMA/CD Core LAN Controller */
-
-/* Port 0 Command Register definitions */
-
-/* Execution operations */
-#define OP0_NOP			0	/* CHNL = 0 */
-#define OP0_SWIT_TO_PORT_1	0	/* CHNL = 1 */
-#define OP0_IA_SETUP		1
-#define OP0_CONFIGURE		2
-#define OP0_MC_SETUP		3
-#define OP0_TRANSMIT		4
-#define OP0_TDR			5
-#define OP0_DUMP		6
-#define OP0_DIAGNOSE		7
-#define OP0_TRANSMIT_NO_CRC	9
-#define OP0_RETRANSMIT		12
-#define OP0_ABORT		13
-/* Reception operations */
-#define OP0_RCV_ENABLE		8
-#define OP0_RCV_DISABLE		10
-#define OP0_STOP_RCV		11
-/* Status pointer control operations */
-#define OP0_FIX_PTR		15	/* CHNL = 1 */
-#define OP0_RLS_PTR		15	/* CHNL = 0 */
-#define OP0_RESET		14
-
-#define CR0_CHNL		(1 << 4)	/* 0=Channel 0, 1=Channel 1 */
-#define CR0_STATUS_0		0x00
-#define CR0_STATUS_1		0x20
-#define CR0_STATUS_2		0x40
-#define CR0_STATUS_3		0x60
-#define CR0_INT_ACK		(1 << 7)	/* 0=No ack, 1=acknowledge */
-
-/* Port 0 Status Register definitions */
-
-#define SR0_NO_RESULT		0		/* dummy */
-#define SR0_EVENT_MASK		0x0f
-#define SR0_IA_SETUP_DONE	1
-#define SR0_CONFIGURE_DONE	2
-#define SR0_MC_SETUP_DONE	3
-#define SR0_TRANSMIT_DONE	4
-#define SR0_TDR_DONE		5
-#define SR0_DUMP_DONE		6
-#define SR0_DIAGNOSE_PASSED	7
-#define SR0_TRANSMIT_NO_CRC_DONE 9
-#define SR0_RETRANSMIT_DONE	12
-#define SR0_EXECUTION_ABORTED	13
-#define SR0_END_OF_FRAME	8
-#define SR0_RECEPTION_ABORTED	10
-#define SR0_DIAGNOSE_FAILED	15
-#define SR0_STOP_REG_HIT	11
-
-#define SR0_CHNL		(1 << 4)
-#define SR0_EXECUTION		(1 << 5)
-#define SR0_RECEPTION		(1 << 6)
-#define SR0_INTERRUPT		(1 << 7)
-#define SR0_BOTH_RX_TX		(SR0_EXECUTION | SR0_RECEPTION)
-
-#define SR3_EXEC_STATE_MASK	0x03
-#define SR3_EXEC_IDLE		0
-#define SR3_TX_ABORT_IN_PROGRESS 1
-#define SR3_EXEC_ACTIVE		2
-#define SR3_ABORT_IN_PROGRESS	3
-#define SR3_EXEC_CHNL		(1 << 2)
-#define SR3_STP_ON_NO_RSRC	(1 << 3)
-#define SR3_RCVING_NO_RSRC	(1 << 4)
-#define SR3_RCV_STATE_MASK	0x60
-#define SR3_RCV_IDLE		0x00
-#define SR3_RCV_READY		0x20
-#define SR3_RCV_ACTIVE		0x40
-#define SR3_RCV_STOP_IN_PROG	0x60
-#define SR3_RCV_CHNL		(1 << 7)
-
-/* Port 1 Command Register definitions */
-
-#define OP1_NOP			0
-#define OP1_SWIT_TO_PORT_0	1
-#define OP1_INT_DISABLE		2
-#define OP1_INT_ENABLE		3
-#define OP1_SET_TS		5
-#define OP1_RST_TS		7
-#define OP1_POWER_DOWN		8
-#define OP1_RESET_RING_MNGMT	11
-#define OP1_RESET		14
-#define OP1_SEL_RST		15
-
-#define CR1_STATUS_4		0x00
-#define CR1_STATUS_5		0x20
-#define CR1_STATUS_6		0x40
-#define CR1_STOP_REG_UPDATE	(1 << 7)
-
-/* Receive frame status bits */
-
-#define	RX_RCLD			(1 << 0)
-#define RX_IA_MATCH		(1 << 1)
-#define	RX_NO_AD_MATCH		(1 << 2)
-#define RX_NO_SFD		(1 << 3)
-#define RX_SRT_FRM		(1 << 7)
-#define RX_OVRRUN		(1 << 8)
-#define RX_ALG_ERR		(1 << 10)
-#define RX_CRC_ERR		(1 << 11)
-#define RX_LEN_ERR		(1 << 12)
-#define RX_RCV_OK		(1 << 13)
-#define RX_TYP_LEN		(1 << 15)
-
-/* Transmit status bits */
-
-#define TX_NCOL_MASK		0x0f
-#define TX_FRTL			(1 << 4)
-#define TX_MAX_COL		(1 << 5)
-#define TX_HRT_BEAT		(1 << 6)
-#define TX_DEFER		(1 << 7)
-#define TX_UND_RUN		(1 << 8)
-#define TX_LOST_CTS		(1 << 9)
-#define TX_LOST_CRS		(1 << 10)
-#define TX_LTCOL		(1 << 11)
-#define TX_OK			(1 << 13)
-#define TX_COLL			(1 << 15)
-
-struct i82593_conf_block {
-  u_char fifo_limit : 4,
-  	 forgnesi   : 1,
-  	 fifo_32    : 1,
-  	 d6mod      : 1,
-  	 throttle_enb : 1;
-  u_char throttle   : 6,
-	 cntrxint   : 1,
-	 contin	    : 1;
-  u_char addr_len   : 3,
-  	 acloc 	    : 1,
- 	 preamb_len : 2,
-  	 loopback   : 2;
-  u_char lin_prio   : 3,
-	 tbofstop   : 1,
-	 exp_prio   : 3,
-	 bof_met    : 1;
-  u_char	    : 4,
-	 ifrm_spc   : 4;
-  u_char	    : 5,
-	 slottim_low : 3;
-  u_char slottim_hi : 3,
-		    : 1,
-	 max_retr   : 4;
-  u_char prmisc     : 1,
-	 bc_dis     : 1,
-  		    : 1,
-	 crs_1	    : 1,
-	 nocrc_ins  : 1,
-	 crc_1632   : 1,
-  	 	    : 1,
-  	 crs_cdt    : 1;
-  u_char cs_filter  : 3,
-	 crs_src    : 1,
-	 cd_filter  : 3,
-		    : 1;
-  u_char	    : 2,
-  	 min_fr_len : 6;
-  u_char lng_typ    : 1,
-	 lng_fld    : 1,
-	 rxcrc_xf   : 1,
-	 artx	    : 1,
-	 sarec	    : 1,
-	 tx_jabber  : 1,	/* why is this called max_len in the manual? */
-	 hash_1	    : 1,
-  	 lbpkpol    : 1;
-  u_char	    : 6,
-  	 fdx	    : 1,
-  	  	    : 1;
-  u_char dummy_6    : 6,	/* supposed to be ones */
-  	 mult_ia    : 1,
-  	 dis_bof    : 1;
-  u_char dummy_1    : 1,	/* supposed to be one */
-	 tx_ifs_retrig : 2,
-	 mc_all     : 1,
-	 rcv_mon    : 2,
-	 frag_acpt  : 1,
-  	 tstrttrs   : 1;
-  u_char fretx	    : 1,
-	 runt_eop   : 1,
-	 hw_sw_pin  : 1,
-	 big_endn   : 1,
-	 syncrqs    : 1,
-	 sttlen     : 1,
-	 tx_eop     : 1,
-  	 rx_eop	    : 1;
-  u_char rbuf_size  : 5,
-	 rcvstop    : 1,
-  	 	    : 2;
-};
-
-#define I82593_MAX_MULTICAST_ADDRESSES	128	/* Hardware hashed filter */
-
-#endif /* _I82593_H */
-- 
cgit v1.1


From 6fb7c3778f0fba0bad099c30e834c413c4f8bcb5 Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Thu, 28 Aug 2014 13:44:33 +0200
Subject: include/linux/phonedev.h: Remove unused header

The header file include/linux/phonedev.h does not seem to be used
anywhere. It was orphaned by 7326446c "Staging: remove telephony
drivers". Remove it.

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/phonedev.h | 25 -------------------------
 1 file changed, 25 deletions(-)
 delete mode 100644 include/linux/phonedev.h

(limited to 'include')

diff --git a/include/linux/phonedev.h b/include/linux/phonedev.h
deleted file mode 100644
index 4269de9..0000000
--- a/include/linux/phonedev.h
+++ /dev/null
@@ -1,25 +0,0 @@
-#ifndef __LINUX_PHONEDEV_H
-#define __LINUX_PHONEDEV_H
-
-#include <linux/types.h>
-
-#ifdef __KERNEL__
-
-#include <linux/poll.h>
-
-struct phone_device {
-	struct phone_device *next;
-	const struct file_operations *f_op;
-	int (*open) (struct phone_device *, struct file *);
-	int board;		/* Device private index */
-	int minor;
-};
-
-extern int phonedev_init(void);
-#define PHONE_MAJOR	100
-extern int phone_register_device(struct phone_device *, int unit);
-#define PHONE_UNIT_ANY	-1
-extern void phone_unregister_device(struct phone_device *);
-
-#endif
-#endif
-- 
cgit v1.1


From 918bbc4ffdb84e9d2696315e427a6c43de65bc01 Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Thu, 28 Aug 2014 13:44:34 +0200
Subject: include/rxrpc/types.h: Remove unused header

The header file include/rxrpc/types.h does not seem to be used
anywhere. It was orphaned by 63b6be55 "[AF_RXRPC]: Delete the old
RxRPC code.". Remove it.

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/rxrpc/types.h | 41 -----------------------------------------
 1 file changed, 41 deletions(-)
 delete mode 100644 include/rxrpc/types.h

(limited to 'include')

diff --git a/include/rxrpc/types.h b/include/rxrpc/types.h
deleted file mode 100644
index 30d48f6..0000000
--- a/include/rxrpc/types.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/* types.h: Rx types
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#ifndef _LINUX_RXRPC_TYPES_H
-#define _LINUX_RXRPC_TYPES_H
-
-#include <linux/types.h>
-#include <linux/list.h>
-#include <linux/socket.h>
-#include <linux/in.h>
-#include <linux/spinlock.h>
-#include <linux/atomic.h>
-
-typedef uint32_t	rxrpc_seq_t;	/* Rx message sequence number */
-typedef uint32_t	rxrpc_serial_t;	/* Rx message serial number */
-typedef __be32	rxrpc_seq_net_t; /* on-the-wire Rx message sequence number */
-typedef __be32	rxrpc_serial_net_t; /* on-the-wire Rx message serial number */
-
-struct rxrpc_call;
-struct rxrpc_connection;
-struct rxrpc_header;
-struct rxrpc_message;
-struct rxrpc_operation;
-struct rxrpc_peer;
-struct rxrpc_service;
-typedef struct rxrpc_timer rxrpc_timer_t;
-struct rxrpc_transport;
-
-typedef void (*rxrpc_call_attn_func_t)(struct rxrpc_call *call);
-typedef void (*rxrpc_call_error_func_t)(struct rxrpc_call *call);
-typedef void (*rxrpc_call_aemap_func_t)(struct rxrpc_call *call);
-
-#endif /* _LINUX_RXRPC_TYPES_H */
-- 
cgit v1.1


From de20fe8e2cc3c4ca13fdb529e6720d9d199333fe Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Wed, 27 Aug 2014 21:26:35 -0700
Subject: net: Allocate a new 16 bits for flags in skbuff

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index b69b7b5..3c9574c 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -598,6 +598,10 @@ struct sk_buff {
 		__u32		reserved_tailroom;
 	};
 
+	kmemcheck_bitfield_begin(flags3);
+	/* 16 bit hole */
+	kmemcheck_bitfield_end(flags3);
+
 	__be16			inner_protocol;
 	__u16			inner_transport_header;
 	__u16			inner_network_header;
-- 
cgit v1.1


From 77cffe23c1f88835f6bd7b47bfa0c060c2969828 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Wed, 27 Aug 2014 21:26:46 -0700
Subject: net: Clarification of CHECKSUM_UNNECESSARY

This patch:
 - Clarifies the specific requirements of devices returning
   CHECKSUM_UNNECESSARY (comments in skbuff.h).
 - Adds csum_level field to skbuff. This is used to express how
   many checksums are covered by CHECKSUM_UNNECESSARY (stores n - 1).
   This replaces the overloading of skb->encapsulation, that field is
   is now only used to indicate inner headers are valid.
 - Change __skb_checksum_validate_needed to "consume" each checksum
   as indicated by csum_level as layers of the the packet are parsed.
 - Remove skb_pop_rcv_encapsulation, no longer needed in the new
   csum_level model.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 74 ++++++++++++++++++++++++++++++++++----------------
 1 file changed, 51 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 3c9574c..c93b585 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -47,11 +47,29 @@
  *
  *   The hardware you're dealing with doesn't calculate the full checksum
  *   (as in CHECKSUM_COMPLETE), but it does parse headers and verify checksums
- *   for specific protocols e.g. TCP/UDP/SCTP, then, for such packets it will
- *   set CHECKSUM_UNNECESSARY if their checksums are okay. skb->csum is still
- *   undefined in this case though. It is a bad option, but, unfortunately,
- *   nowadays most vendors do this. Apparently with the secret goal to sell
- *   you new devices, when you will add new protocol to your host, f.e. IPv6 8)
+ *   for specific protocols. For such packets it will set CHECKSUM_UNNECESSARY
+ *   if their checksums are okay. skb->csum is still undefined in this case
+ *   though. It is a bad option, but, unfortunately, nowadays most vendors do
+ *   this. Apparently with the secret goal to sell you new devices, when you
+ *   will add new protocol to your host, f.e. IPv6 8)
+ *
+ *   CHECKSUM_UNNECESSARY is applicable to following protocols:
+ *     TCP: IPv6 and IPv4.
+ *     UDP: IPv4 and IPv6. A device may apply CHECKSUM_UNNECESSARY to a
+ *       zero UDP checksum for either IPv4 or IPv6, the networking stack
+ *       may perform further validation in this case.
+ *     GRE: only if the checksum is present in the header.
+ *     SCTP: indicates the CRC in SCTP header has been validated.
+ *
+ *   skb->csum_level indicates the number of consecutive checksums found in
+ *   the packet minus one that have been verified as CHECKSUM_UNNECESSARY.
+ *   For instance if a device receives an IPv6->UDP->GRE->IPv4->TCP packet
+ *   and a device is able to verify the checksums for UDP (possibly zero),
+ *   GRE (checksum flag is set), and TCP-- skb->csum_level would be set to
+ *   two. If the device were only able to verify the UDP checksum and not
+ *   GRE, either because it doesn't support GRE checksum of because GRE
+ *   checksum is bad, skb->csum_level would be set to zero (TCP checksum is
+ *   not considered in this case).
  *
  * CHECKSUM_COMPLETE:
  *
@@ -112,6 +130,9 @@
 #define CHECKSUM_COMPLETE	2
 #define CHECKSUM_PARTIAL	3
 
+/* Maximum value in skb->csum_level */
+#define SKB_MAX_CSUM_LEVEL	3
+
 #define SKB_DATA_ALIGN(X)	ALIGN(X, SMP_CACHE_BYTES)
 #define SKB_WITH_OVERHEAD(X)	\
 	((X) - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
@@ -571,11 +592,7 @@ struct sk_buff {
 	__u8			wifi_acked:1;
 	__u8			no_fcs:1;
 	__u8			head_frag:1;
-	/* Encapsulation protocol and NIC drivers should use
-	 * this flag to indicate to each other if the skb contains
-	 * encapsulated packet or not and maybe use the inner packet
-	 * headers if needed
-	 */
+	/* Indicates the inner headers are valid in the skbuff. */
 	__u8			encapsulation:1;
 	__u8			encap_hdr_csum:1;
 	__u8			csum_valid:1;
@@ -599,7 +616,8 @@ struct sk_buff {
 	};
 
 	kmemcheck_bitfield_begin(flags3);
-	/* 16 bit hole */
+	__u8			csum_level:2;
+	/* 14 bit hole */
 	kmemcheck_bitfield_end(flags3);
 
 	__be16			inner_protocol;
@@ -1866,18 +1884,6 @@ static inline int pskb_network_may_pull(struct sk_buff *skb, unsigned int len)
 	return pskb_may_pull(skb, skb_network_offset(skb) + len);
 }
 
-static inline void skb_pop_rcv_encapsulation(struct sk_buff *skb)
-{
-	/* Only continue with checksum unnecessary if device indicated
-	 * it is valid across encapsulation (skb->encapsulation was set).
-	 */
-	if (skb->ip_summed == CHECKSUM_UNNECESSARY && !skb->encapsulation)
-		skb->ip_summed = CHECKSUM_NONE;
-
-	skb->encapsulation = 0;
-	skb->csum_valid = 0;
-}
-
 /*
  * CPUs often take a performance hit when accessing unaligned memory
  * locations. The actual performance hit varies, it can be small if the
@@ -2798,6 +2804,27 @@ static inline __sum16 skb_checksum_complete(struct sk_buff *skb)
 	       0 : __skb_checksum_complete(skb);
 }
 
+static inline void __skb_decr_checksum_unnecessary(struct sk_buff *skb)
+{
+	if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
+		if (skb->csum_level == 0)
+			skb->ip_summed = CHECKSUM_NONE;
+		else
+			skb->csum_level--;
+	}
+}
+
+static inline void __skb_incr_checksum_unnecessary(struct sk_buff *skb)
+{
+	if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
+		if (skb->csum_level < SKB_MAX_CSUM_LEVEL)
+			skb->csum_level++;
+	} else if (skb->ip_summed == CHECKSUM_NONE) {
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+		skb->csum_level = 0;
+	}
+}
+
 /* Check if we need to perform checksum complete validation.
  *
  * Returns true if checksum complete is needed, false otherwise
@@ -2809,6 +2836,7 @@ static inline bool __skb_checksum_validate_needed(struct sk_buff *skb,
 {
 	if (skb_csum_unnecessary(skb) || (zero_okay && !check)) {
 		skb->csum_valid = 1;
+		__skb_decr_checksum_unnecessary(skb);
 		return false;
 	}
 
-- 
cgit v1.1


From 662880f4420340aad4f9a62a349c6c9d4faa1a5d Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Wed, 27 Aug 2014 21:26:56 -0700
Subject: net: Allow GRO to use and set levels of checksum unnecessary

Allow GRO path to "consume" checksums provided in CHECKSUM_UNNECESSARY
and to report new checksums verfied for use in fallback to normal
path.

Change GRO checksum path to track csum_level using a csum_cnt field
in NAPI_GRO_CB. On GRO initialization, if ip_summed is
CHECKSUM_UNNECESSARY set NAPI_GRO_CB(skb)->csum_cnt to
skb->csum_level + 1. For each checksum verified, decrement
NAPI_GRO_CB(skb)->csum_cnt while its greater than zero. If a checksum
is verfied and NAPI_GRO_CB(skb)->csum_cnt == 0, we have verified a
deeper checksum than originally indicated in skbuf so increment
csum_level (or initialize to CHECKSUM_UNNECESSARY if ip_summed is
CHECKSUM_NONE or CHECKSUM_COMPLETE).

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 26 ++++++++++++--------------
 1 file changed, 12 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index dfc1d8b..456eb1f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1883,8 +1883,8 @@ struct napi_gro_cb {
 	/* GRO checksum is valid */
 	u8	csum_valid:1;
 
-	/* Number encapsulation layers crossed */
-	u8	encapsulation;
+	/* Number of checksums via CHECKSUM_UNNECESSARY */
+	u8	csum_cnt:3;
 
 	/* used to support CHECKSUM_COMPLETE for tunneling protocols */
 	__wsum	csum;
@@ -2179,8 +2179,7 @@ static inline bool __skb_gro_checksum_validate_needed(struct sk_buff *skb,
 						      __sum16 check)
 {
 	return (skb->ip_summed != CHECKSUM_PARTIAL &&
-		(skb->ip_summed != CHECKSUM_UNNECESSARY ||
-		 (NAPI_GRO_CB(skb)->encapsulation > skb->encapsulation)) &&
+		NAPI_GRO_CB(skb)->csum_cnt == 0 &&
 		(!zero_okay || check));
 }
 
@@ -2196,18 +2195,17 @@ static inline __sum16 __skb_gro_checksum_validate_complete(struct sk_buff *skb,
 	return __skb_gro_checksum_complete(skb);
 }
 
-/* Update skb for CHECKSUM_UNNECESSARY when we verified a top level
- * checksum or an encapsulated one during GRO. This saves work
- * if we fallback to normal path with the packet.
- */
 static inline void skb_gro_incr_csum_unnecessary(struct sk_buff *skb)
 {
-	if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
-		if (NAPI_GRO_CB(skb)->encapsulation)
-			skb->encapsulation = 1;
-	} else if (skb->ip_summed != CHECKSUM_PARTIAL) {
-		skb->ip_summed = CHECKSUM_UNNECESSARY;
-		skb->encapsulation = 0;
+	if (NAPI_GRO_CB(skb)->csum_cnt > 0) {
+		/* Consume a checksum from CHECKSUM_UNNECESSARY */
+		NAPI_GRO_CB(skb)->csum_cnt--;
+	} else {
+		/* Update skb for CHECKSUM_UNNECESSARY and csum_level when we
+		 * verified a new top level checksum or an encapsulated one
+		 * during GRO. This saves work if we fallback to normal path.
+		 */
+		__skb_incr_checksum_unnecessary(skb);
 	}
 }
 
-- 
cgit v1.1


From cfdbeeafdbbdbc006f700e92cbad2cb5d4529f3d Mon Sep 17 00:00:00 2001
From: Vincent Cuissard <cuissard@marvell.com>
Date: Tue, 22 Jul 2014 19:48:38 +0200
Subject: NFC: NCI: Add support of ISO15693

Update nci.h to respect latest NCI specification proposal
(stop using proprietary opcodes). Handle ISO15693 parameters
in NCI_RF_ACTIVATED_NTF handler.

Signed-off-by: Vincent Cuissard <cuissard@marvell.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/net/nfc/nci.h | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/nfc/nci.h b/include/net/nfc/nci.h
index fbfa4e4..9eca9ae 100644
--- a/include/net/nfc/nci.h
+++ b/include/net/nfc/nci.h
@@ -2,6 +2,7 @@
  *  The NFC Controller Interface is the communication protocol between an
  *  NFC Controller (NFCC) and a Device Host (DH).
  *
+ *  Copyright (C) 2014 Marvell International Ltd.
  *  Copyright (C) 2011 Texas Instruments, Inc.
  *
  *  Written by Ilan Elias <ilane@ti.com>
@@ -65,19 +66,18 @@
 #define NCI_NFC_F_PASSIVE_POLL_MODE				0x02
 #define NCI_NFC_A_ACTIVE_POLL_MODE				0x03
 #define NCI_NFC_F_ACTIVE_POLL_MODE				0x05
-#define NCI_NFC_15693_PASSIVE_POLL_MODE				0x06
+#define NCI_NFC_V_PASSIVE_POLL_MODE				0x06
 #define NCI_NFC_A_PASSIVE_LISTEN_MODE				0x80
 #define NCI_NFC_B_PASSIVE_LISTEN_MODE				0x81
 #define NCI_NFC_F_PASSIVE_LISTEN_MODE				0x82
 #define NCI_NFC_A_ACTIVE_LISTEN_MODE				0x83
 #define NCI_NFC_F_ACTIVE_LISTEN_MODE				0x85
-#define NCI_NFC_15693_PASSIVE_LISTEN_MODE			0x86
 
 /* NCI RF Technologies */
 #define NCI_NFC_RF_TECHNOLOGY_A					0x00
 #define NCI_NFC_RF_TECHNOLOGY_B					0x01
 #define NCI_NFC_RF_TECHNOLOGY_F					0x02
-#define NCI_NFC_RF_TECHNOLOGY_15693				0x03
+#define NCI_NFC_RF_TECHNOLOGY_V					0x03
 
 /* NCI Bit Rates */
 #define NCI_NFC_BIT_RATE_106					0x00
@@ -87,6 +87,7 @@
 #define NCI_NFC_BIT_RATE_1695					0x04
 #define NCI_NFC_BIT_RATE_3390					0x05
 #define NCI_NFC_BIT_RATE_6780					0x06
+#define NCI_NFC_BIT_RATE_26					0x20
 
 /* NCI RF Protocols */
 #define NCI_RF_PROTOCOL_UNKNOWN					0x00
@@ -95,6 +96,7 @@
 #define NCI_RF_PROTOCOL_T3T					0x03
 #define NCI_RF_PROTOCOL_ISO_DEP					0x04
 #define NCI_RF_PROTOCOL_NFC_DEP					0x05
+#define NCI_RF_PROTOCOL_T5T					0x06
 
 /* NCI RF Interfaces */
 #define NCI_RF_INTERFACE_NFCEE_DIRECT				0x00
@@ -328,6 +330,12 @@ struct rf_tech_specific_params_nfcf_poll {
 	__u8	sensf_res[18];	/* 16 or 18 Bytes */
 } __packed;
 
+struct rf_tech_specific_params_nfcv_poll {
+	__u8	res_flags;
+	__u8	dsfid;
+	__u8	uid[8];	/* 8 Bytes */
+} __packed;
+
 struct nci_rf_discover_ntf {
 	__u8	rf_discovery_id;
 	__u8	rf_protocol;
@@ -338,6 +346,7 @@ struct nci_rf_discover_ntf {
 		struct rf_tech_specific_params_nfca_poll nfca_poll;
 		struct rf_tech_specific_params_nfcb_poll nfcb_poll;
 		struct rf_tech_specific_params_nfcf_poll nfcf_poll;
+		struct rf_tech_specific_params_nfcv_poll nfcv_poll;
 	} rf_tech_specific_params;
 
 	__u8	ntf_type;
@@ -372,6 +381,7 @@ struct nci_rf_intf_activated_ntf {
 		struct rf_tech_specific_params_nfca_poll nfca_poll;
 		struct rf_tech_specific_params_nfcb_poll nfcb_poll;
 		struct rf_tech_specific_params_nfcf_poll nfcf_poll;
+		struct rf_tech_specific_params_nfcv_poll nfcv_poll;
 	} rf_tech_specific_params;
 
 	__u8	data_exch_rf_tech_and_mode;
-- 
cgit v1.1


From 10b3ad8c21bb4b135768c30dd4c51a1c744da699 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 29 Aug 2014 21:07:24 -0700
Subject: net: Do txq_trans_update() in netdev_start_xmit()

That way we don't have to audit every call site to make sure it is
doing this properly.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 456eb1f..1617180 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3437,11 +3437,17 @@ static inline netdev_tx_t __netdev_start_xmit(const struct net_device_ops *ops,
 	return ops->ndo_start_xmit(skb, dev);
 }
 
-static inline netdev_tx_t netdev_start_xmit(struct sk_buff *skb, struct net_device *dev)
+static inline netdev_tx_t netdev_start_xmit(struct sk_buff *skb, struct net_device *dev,
+					    struct netdev_queue *txq)
 {
 	const struct net_device_ops *ops = dev->netdev_ops;
+	int rc;
 
-	return __netdev_start_xmit(ops, skb, dev);
+	rc = __netdev_start_xmit(ops, skb, dev);
+	if (rc == NETDEV_TX_OK)
+		txq_trans_update(txq);
+
+	return rc;
 }
 
 int netdev_class_create_file_ns(struct class_attribute *class_attr,
-- 
cgit v1.1


From fa2dbdc253c2aee2a760c64de454cb62469ec11d Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 29 Aug 2014 21:55:22 -0700
Subject: net: Pass a "more" indication down into netdev_start_xmit() code
 paths.

For now it will always be false.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 1617180..5050218 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3431,19 +3431,20 @@ int __init dev_proc_init(void);
 #endif
 
 static inline netdev_tx_t __netdev_start_xmit(const struct net_device_ops *ops,
-					      struct sk_buff *skb, struct net_device *dev)
+					      struct sk_buff *skb, struct net_device *dev,
+					      bool more)
 {
-	skb->xmit_more = 0;
+	skb->xmit_more = more ? 1 : 0;
 	return ops->ndo_start_xmit(skb, dev);
 }
 
 static inline netdev_tx_t netdev_start_xmit(struct sk_buff *skb, struct net_device *dev,
-					    struct netdev_queue *txq)
+					    struct netdev_queue *txq, bool more)
 {
 	const struct net_device_ops *ops = dev->netdev_ops;
 	int rc;
 
-	rc = __netdev_start_xmit(ops, skb, dev);
+	rc = __netdev_start_xmit(ops, skb, dev, more);
 	if (rc == NETDEV_TX_OK)
 		txq_trans_update(txq);
 
-- 
cgit v1.1


From 50cbe9ab5f8d92d2d4a327b56e96559d8f63a1fa Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sat, 30 Aug 2014 19:13:51 -0700
Subject: net: Validate xmit SKBs right when we pull them out of the qdisc.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 5050218..47c49ba 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2827,6 +2827,7 @@ int dev_set_mac_address(struct net_device *, struct sockaddr *);
 int dev_change_carrier(struct net_device *, bool new_carrier);
 int dev_get_phys_port_id(struct net_device *dev,
 			 struct netdev_phys_port_id *ppid);
+struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev);
 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 			struct netdev_queue *txq);
 int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
-- 
cgit v1.1


From ce93718fb7cdbc064c3000ff59e4d3200bdfa744 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sat, 30 Aug 2014 19:22:20 -0700
Subject: net: Don't keep around original SKB when we software segment GSO
 frames.

Just maintain the list properly by returning the head of the remaining
SKB list from dev_hard_start_xmit().

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 47c49ba..202c25a 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2828,8 +2828,8 @@ int dev_change_carrier(struct net_device *, bool new_carrier);
 int dev_get_phys_port_id(struct net_device *dev,
 			 struct netdev_phys_port_id *ppid);
 struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev);
-int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
-			struct netdev_queue *txq);
+struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
+				    struct netdev_queue *txq, int *ret);
 int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
 int dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
 bool is_skb_forwardable(struct net_device *dev, struct sk_buff *skb);
-- 
cgit v1.1


From 5a21232983aa7acfe7fd26170832a9e0a4a7b4ae Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Sun, 31 Aug 2014 15:12:41 -0700
Subject: net: Support for csum_bad in skbuff

This flag indicates that an invalid checksum was detected in the
packet. __skb_mark_checksum_bad helper function was added to set this.

Checksums can be marked bad from a driver or the GRO path (the latter
is implemented in this patch). csum_bad is checked in
__skb_checksum_validate_complete (i.e. calling that when ip_summed ==
CHECKSUM_NONE).

csum_bad works in conjunction with ip_summed value. In the case that
ip_summed is CHECKSUM_NONE and csum_bad is set, this implies that the
first (or next) checksum encountered in the packet is bad. When
ip_summed is CHECKSUM_UNNECESSARY, the first checksum after the last
one validated is bad. For example, if ip_summed == CHECKSUM_UNNECESSARY,
csum_level == 1, and csum_bad is set-- then the third checksum in the
packet is bad. In the normal path, the packet will be dropped when
processing the protocol layer of the bad checksum:
__skb_decr_checksum_unnecessary called twice for the good checksums
changing ip_summed to CHECKSUM_NONE so that
__skb_checksum_validate_complete is called to validate the third
checksum and that will fail since csum_bad is set.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  4 +++-
 include/linux/skbuff.h    | 21 ++++++++++++++++++++-
 2 files changed, 23 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 202c25a..a0ab6d9 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2216,7 +2216,9 @@ static inline void skb_gro_incr_csum_unnecessary(struct sk_buff *skb)
 	if (__skb_gro_checksum_validate_needed(skb, zero_okay, check))	\
 		__ret = __skb_gro_checksum_validate_complete(skb,	\
 				compute_pseudo(skb, proto));		\
-	if (!__ret)							\
+	if (__ret)							\
+		__skb_mark_checksum_bad(skb);				\
+	else								\
 		skb_gro_incr_csum_unnecessary(skb);			\
 	__ret;								\
 })
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index c93b585..23710a2 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -617,7 +617,8 @@ struct sk_buff {
 
 	kmemcheck_bitfield_begin(flags3);
 	__u8			csum_level:2;
-	/* 14 bit hole */
+	__u8			csum_bad:1;
+	/* 13 bit hole */
 	kmemcheck_bitfield_end(flags3);
 
 	__be16			inner_protocol;
@@ -2825,6 +2826,21 @@ static inline void __skb_incr_checksum_unnecessary(struct sk_buff *skb)
 	}
 }
 
+static inline void __skb_mark_checksum_bad(struct sk_buff *skb)
+{
+	/* Mark current checksum as bad (typically called from GRO
+	 * path). In the case that ip_summed is CHECKSUM_NONE
+	 * this must be the first checksum encountered in the packet.
+	 * When ip_summed is CHECKSUM_UNNECESSARY, this is the first
+	 * checksum after the last one validated. For UDP, a zero
+	 * checksum can not be marked as bad.
+	 */
+
+	if (skb->ip_summed == CHECKSUM_NONE ||
+	    skb->ip_summed == CHECKSUM_UNNECESSARY)
+		skb->csum_bad = 1;
+}
+
 /* Check if we need to perform checksum complete validation.
  *
  * Returns true if checksum complete is needed, false otherwise
@@ -2866,6 +2882,9 @@ static inline __sum16 __skb_checksum_validate_complete(struct sk_buff *skb,
 			skb->csum_valid = 1;
 			return 0;
 		}
+	} else if (skb->csum_bad) {
+		/* ip_summed == CHECKSUM_NONE in this case */
+		return 1;
 	}
 
 	skb->csum = psum;
-- 
cgit v1.1


From d96535a17dbbafd567961d14c08c0984ddda9c3c Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Sun, 31 Aug 2014 15:12:42 -0700
Subject: net: Infrastructure for checksum unnecessary conversions

For normal path, added skb_checksum_try_convert which is called
to attempt to convert CHECKSUM_UNNECESSARY to CHECKSUM_COMPLETE. The
primary condition to allow this is that ip_summed is CHECKSUM_NONE
and csum_valid is true, which will be the state after consuming
a CHECKSUM_UNNECESSARY.

For GRO path, added skb_gro_checksum_try_convert which is the GRO
analogue of skb_checksum_try_convert. The primary condition to allow
this is that NAPI_GRO_CB(skb)->csum_cnt == 0 and
NAPI_GRO_CB(skb)->csum_valid is set. This implies that we have consumed
all available CHECKSUM_UNNECESSARY checksums in the GRO path.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 20 ++++++++++++++++++++
 include/linux/skbuff.h    | 20 ++++++++++++++++++++
 2 files changed, 40 insertions(+)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index a0ab6d9..5be20a7 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2233,6 +2233,26 @@ static inline void skb_gro_incr_csum_unnecessary(struct sk_buff *skb)
 #define skb_gro_checksum_simple_validate(skb)				\
 	__skb_gro_checksum_validate(skb, 0, false, 0, null_compute_pseudo)
 
+static inline bool __skb_gro_checksum_convert_check(struct sk_buff *skb)
+{
+	return (NAPI_GRO_CB(skb)->csum_cnt == 0 &&
+		!NAPI_GRO_CB(skb)->csum_valid);
+}
+
+static inline void __skb_gro_checksum_convert(struct sk_buff *skb,
+					      __sum16 check, __wsum pseudo)
+{
+	NAPI_GRO_CB(skb)->csum = ~pseudo;
+	NAPI_GRO_CB(skb)->csum_valid = 1;
+}
+
+#define skb_gro_checksum_try_convert(skb, proto, check, compute_pseudo)	\
+do {									\
+	if (__skb_gro_checksum_convert_check(skb))			\
+		__skb_gro_checksum_convert(skb, check,			\
+					   compute_pseudo(skb, proto));	\
+} while (0)
+
 static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev,
 				  unsigned short type,
 				  const void *daddr, const void *saddr,
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 23710a2..02529fc 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2942,6 +2942,26 @@ static inline __wsum null_compute_pseudo(struct sk_buff *skb, int proto)
 #define skb_checksum_simple_validate(skb)				\
 	__skb_checksum_validate(skb, 0, true, false, 0, null_compute_pseudo)
 
+static inline bool __skb_checksum_convert_check(struct sk_buff *skb)
+{
+	return (skb->ip_summed == CHECKSUM_NONE &&
+		skb->csum_valid && !skb->csum_bad);
+}
+
+static inline void __skb_checksum_convert(struct sk_buff *skb,
+					  __sum16 check, __wsum pseudo)
+{
+	skb->csum = ~pseudo;
+	skb->ip_summed = CHECKSUM_COMPLETE;
+}
+
+#define skb_checksum_try_convert(skb, proto, check, compute_pseudo)	\
+do {									\
+	if (__skb_checksum_convert_check(skb))				\
+		__skb_checksum_convert(skb, check,			\
+				       compute_pseudo(skb, proto));	\
+} while (0)
+
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 void nf_conntrack_destroy(struct nf_conntrack *nfct);
 static inline void nf_conntrack_put(struct nf_conntrack *nfct)
-- 
cgit v1.1


From 2abb7cdc0dc84e99b76ef983a1ae1978922aa9b3 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Sun, 31 Aug 2014 15:12:43 -0700
Subject: udp: Add support for doing checksum unnecessary conversion

Add support for doing CHECKSUM_UNNECESSARY to CHECKSUM_COMPLETE
conversion in UDP tunneling path.

In the normal UDP path, we call skb_checksum_try_convert after locating
the UDP socket. The check is that checksum conversion is enabled for
the socket (new flag in UDP socket) and that checksum field is
non-zero.

In the UDP GRO path, we call skb_gro_checksum_try_convert after
checksum is validated and checksum field is non-zero. Since this is
already in GRO we assume that checksum conversion is always wanted.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/udp.h | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/udp.h b/include/linux/udp.h
index 247cfdc..ee32775 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -49,7 +49,11 @@ struct udp_sock {
 	unsigned int	 corkflag;	/* Cork is required */
 	__u8		 encap_type;	/* Is this an Encapsulation socket? */
 	unsigned char	 no_check6_tx:1,/* Send zero UDP6 checksums on TX? */
-			 no_check6_rx:1;/* Allow zero UDP6 checksums on RX? */
+			 no_check6_rx:1,/* Allow zero UDP6 checksums on RX? */
+			 convert_csum:1;/* On receive, convert checksum
+					 * unnecessary to checksum complete
+					 * if possible.
+					 */
 	/*
 	 * Following member retains the information to create a UDP header
 	 * when the socket is uncorked.
@@ -98,6 +102,16 @@ static inline bool udp_get_no_check6_rx(struct sock *sk)
 	return udp_sk(sk)->no_check6_rx;
 }
 
+static inline void udp_set_convert_csum(struct sock *sk, bool val)
+{
+	udp_sk(sk)->convert_csum = val;
+}
+
+static inline bool udp_get_convert_csum(struct sock *sk)
+{
+	return udp_sk(sk)->convert_csum;
+}
+
 #define udp_portaddr_for_each_entry(__sk, node, list) \
 	hlist_nulls_for_each_entry(__sk, node, list, __sk_common.skc_portaddr_node)
 
-- 
cgit v1.1


From 364a9e93243d1785f310c0964af0e24bf1adac03 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Sun, 31 Aug 2014 21:30:27 -0400
Subject: sock: deduplicate errqueue dequeue

sk->sk_error_queue is dequeued in four locations. All share the
exact same logic. Deduplicate.

Also collapse the two critical sections for dequeue (at the top of
the recv handler) and signal (at the bottom).

This moves signal generation for the next packet forward, which should
be harmless.

It also changes the behavior if the recv handler exits early with an
error. Previously, a signal for follow-up packets on the errqueue
would then not be scheduled. The new behavior, to always signal, is
arguably a bug fix.

For rxrpc, the change causes the same function to be called repeatedly
for each queued packet (because the recv handler == sk_error_report).
It is likely that all packets will fail for the same reason (e.g.,
memory exhaustion).

This code runs without sk_lock held, so it is not safe to trust that
sk->sk_err is immutable inbetween releasing q->lock and the subsequent
test. Introduce int err just to avoid this potential race.

Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/sock.h b/include/net/sock.h
index 7f2ab72..3fde613 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2041,6 +2041,7 @@ void sk_stop_timer(struct sock *sk, struct timer_list *timer);
 int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
 
 int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb);
+struct sk_buff *sock_dequeue_err_skb(struct sock *sk);
 
 /*
  *	Recover an error report and clear atomically
-- 
cgit v1.1


From b58555f1767c9f4e330fcf168e4e753d2d9196e0 Mon Sep 17 00:00:00 2001
From: Christophe Gouault <christophe.gouault@6wind.com>
Date: Fri, 29 Aug 2014 16:16:04 +0200
Subject: xfrm: hash prefixed policies based on preflen thresholds

The idea is an extension of the current policy hashing.

Today only non-prefixed policies are stored in a hash table. This
patch relaxes the constraints, and hashes policies whose prefix
lengths are greater or equal to a configurable threshold.

Each hash table (one per direction) maintains its own set of IPv4 and
IPv6 thresholds (dbits4, sbits4, dbits6, sbits6), by default (32, 32,
128, 128).

Example, if the output hash table is configured with values (16, 24,
56, 64):

ip xfrm policy add dir out src 10.22.0.0/20 dst 10.24.1.0/24 ... => hashed
ip xfrm policy add dir out src 10.22.0.0/16 dst 10.24.1.1/32 ... => hashed
ip xfrm policy add dir out src 10.22.0.0/16 dst 10.24.0.0/16 ... => unhashed

ip xfrm policy add dir out \
    src 3ffe:304:124:2200::/60 dst 3ffe:304:124:2401::/64 ...    => hashed
ip xfrm policy add dir out \
    src 3ffe:304:124:2200::/56 dst 3ffe:304:124:2401::2/128 ...  => hashed
ip xfrm policy add dir out \
    src 3ffe:304:124:2200::/56 dst 3ffe:304:124:2400::/56 ...    => unhashed

The high order bits of the addresses (up to the threshold) are used to
compute the hash key.

Signed-off-by: Christophe Gouault <christophe.gouault@6wind.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/netns/xfrm.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h
index 3492434..41902a8 100644
--- a/include/net/netns/xfrm.h
+++ b/include/net/netns/xfrm.h
@@ -13,6 +13,10 @@ struct ctl_table_header;
 struct xfrm_policy_hash {
 	struct hlist_head	*table;
 	unsigned int		hmask;
+	u8			dbits4;
+	u8			sbits4;
+	u8			dbits6;
+	u8			sbits6;
 };
 
 struct netns_xfrm {
-- 
cgit v1.1


From 880a6fab8f6ba5b5abe59ea68533202ddea1012c Mon Sep 17 00:00:00 2001
From: Christophe Gouault <christophe.gouault@6wind.com>
Date: Fri, 29 Aug 2014 16:16:05 +0200
Subject: xfrm: configure policy hash table thresholds by netlink

Enable to specify local and remote prefix length thresholds for the
policy hash table via a netlink XFRM_MSG_NEWSPDINFO message.

prefix length thresholds are specified by XFRMA_SPD_IPV4_HTHRESH and
XFRMA_SPD_IPV6_HTHRESH optional attributes (struct xfrmu_spdhthresh).

example:

    struct xfrmu_spdhthresh thresh4 = {
        .lbits = 0;
        .rbits = 24;
    };
    struct xfrmu_spdhthresh thresh6 = {
        .lbits = 0;
        .rbits = 56;
    };
    struct nlmsghdr *hdr;
    struct nl_msg *msg;

    msg = nlmsg_alloc();
    hdr = nlmsg_put(msg, NL_AUTO_PORT, NL_AUTO_SEQ, XFRMA_SPD_IPV4_HTHRESH, sizeof(__u32), NLM_F_REQUEST);
    nla_put(msg, XFRMA_SPD_IPV4_HTHRESH, sizeof(thresh4), &thresh4);
    nla_put(msg, XFRMA_SPD_IPV6_HTHRESH, sizeof(thresh6), &thresh6);
    nla_send_auto(sk, msg);

The numbers are the policy selector minimum prefix lengths to put a
policy in the hash table.

- lbits is the local threshold (source address for out policies,
  destination address for in and fwd policies).

- rbits is the remote threshold (destination address for out
  policies, source address for in and fwd policies).

The default values are:

XFRMA_SPD_IPV4_HTHRESH: 32 32
XFRMA_SPD_IPV6_HTHRESH: 128 128

Dynamic re-building of the SPD is performed when the thresholds values
are changed.

The current thresholds can be read via a XFRM_MSG_GETSPDINFO request:
the kernel replies to XFRM_MSG_GETSPDINFO requests by an
XFRM_MSG_NEWSPDINFO message, with both attributes
XFRMA_SPD_IPV4_HTHRESH and XFRMA_SPD_IPV6_HTHRESH.

Signed-off-by: Christophe Gouault <christophe.gouault@6wind.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/netns/xfrm.h  | 10 ++++++++++
 include/net/xfrm.h        |  1 +
 include/uapi/linux/xfrm.h |  7 +++++++
 3 files changed, 18 insertions(+)

(limited to 'include')

diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h
index 41902a8..9da7982 100644
--- a/include/net/netns/xfrm.h
+++ b/include/net/netns/xfrm.h
@@ -19,6 +19,15 @@ struct xfrm_policy_hash {
 	u8			sbits6;
 };
 
+struct xfrm_policy_hthresh {
+	struct work_struct	work;
+	seqlock_t		lock;
+	u8			lbits4;
+	u8			rbits4;
+	u8			lbits6;
+	u8			rbits6;
+};
+
 struct netns_xfrm {
 	struct list_head	state_all;
 	/*
@@ -45,6 +54,7 @@ struct netns_xfrm {
 	struct xfrm_policy_hash	policy_bydst[XFRM_POLICY_MAX * 2];
 	unsigned int		policy_count[XFRM_POLICY_MAX * 2];
 	struct work_struct	policy_hash_work;
+	struct xfrm_policy_hthresh policy_hthresh;
 
 
 	struct sock		*nlsk;
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 721e9c3..dc4865e 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1591,6 +1591,7 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark,
 struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8, int dir,
 				     u32 id, int delete, int *err);
 int xfrm_policy_flush(struct net *net, u8 type, bool task_valid);
+void xfrm_policy_hash_rebuild(struct net *net);
 u32 xfrm_get_acqseq(void);
 int verify_spi_info(u8 proto, u32 min, u32 max);
 int xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi);
diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h
index 25e5dd9..02d5125 100644
--- a/include/uapi/linux/xfrm.h
+++ b/include/uapi/linux/xfrm.h
@@ -328,6 +328,8 @@ enum xfrm_spdattr_type_t {
 	XFRMA_SPD_UNSPEC,
 	XFRMA_SPD_INFO,
 	XFRMA_SPD_HINFO,
+	XFRMA_SPD_IPV4_HTHRESH,
+	XFRMA_SPD_IPV6_HTHRESH,
 	__XFRMA_SPD_MAX
 
 #define XFRMA_SPD_MAX (__XFRMA_SPD_MAX - 1)
@@ -347,6 +349,11 @@ struct xfrmu_spdhinfo {
 	__u32 spdhmcnt;
 };
 
+struct xfrmu_spdhthresh {
+	__u8 lbits;
+	__u8 rbits;
+};
+
 struct xfrm_usersa_info {
 	struct xfrm_selector		sel;
 	struct xfrm_id			id;
-- 
cgit v1.1


From 30766f4c2d60dd2a3fc67b7114174c417f43f4c6 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 5 Aug 2014 20:02:42 +0200
Subject: netfilter: nat: move specific NAT IPv4 to core

Move the specific NAT IPv4 core functions that are called from the
hooks from iptable_nat.c to nf_nat_l3proto_ipv4.c. This prepares the
ground to allow iptables and nft to use the same NAT engine code that
comes in a follow up patch.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_nat_l3proto.h | 38 ++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

(limited to 'include')

diff --git a/include/net/netfilter/nf_nat_l3proto.h b/include/net/netfilter/nf_nat_l3proto.h
index 5a2919b..bc2d515 100644
--- a/include/net/netfilter/nf_nat_l3proto.h
+++ b/include/net/netfilter/nf_nat_l3proto.h
@@ -42,6 +42,44 @@ const struct nf_nat_l3proto *__nf_nat_l3proto_find(u8 l3proto);
 int nf_nat_icmp_reply_translation(struct sk_buff *skb, struct nf_conn *ct,
 				  enum ip_conntrack_info ctinfo,
 				  unsigned int hooknum);
+
+unsigned int nf_nat_ipv4_in(const struct nf_hook_ops *ops, struct sk_buff *skb,
+			    const struct net_device *in,
+			    const struct net_device *out,
+			    unsigned int (*do_chain)(const struct nf_hook_ops *ops,
+						     struct sk_buff *skb,
+						     const struct net_device *in,
+						     const struct net_device *out,
+						     struct nf_conn *ct));
+
+unsigned int nf_nat_ipv4_out(const struct nf_hook_ops *ops, struct sk_buff *skb,
+			     const struct net_device *in,
+			     const struct net_device *out,
+			     unsigned int (*do_chain)(const struct nf_hook_ops *ops,
+						      struct sk_buff *skb,
+						      const struct net_device *in,
+						      const struct net_device *out,
+						      struct nf_conn *ct));
+
+unsigned int nf_nat_ipv4_local_fn(const struct nf_hook_ops *ops,
+				  struct sk_buff *skb,
+				  const struct net_device *in,
+				  const struct net_device *out,
+				  unsigned int (*do_chain)(const struct nf_hook_ops *ops,
+							   struct sk_buff *skb,
+							   const struct net_device *in,
+							   const struct net_device *out,
+							   struct nf_conn *ct));
+
+unsigned int nf_nat_ipv4_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
+			    const struct net_device *in,
+			    const struct net_device *out,
+			    unsigned int (*do_chain)(const struct nf_hook_ops *ops,
+						     struct sk_buff *skb,
+						     const struct net_device *in,
+						     const struct net_device *out,
+						     struct nf_conn *ct));
+
 int nf_nat_icmpv6_reply_translation(struct sk_buff *skb, struct nf_conn *ct,
 				    enum ip_conntrack_info ctinfo,
 				    unsigned int hooknum, unsigned int hdrlen);
-- 
cgit v1.1


From 940001762ac514810e305aab356983829e5fa82a Mon Sep 17 00:00:00 2001
From: Ying Xue <ying.xue@windriver.com>
Date: Wed, 3 Sep 2014 09:22:36 +0800
Subject: lib/rhashtable: allow user to set the minimum shifts of shrinking

Although rhashtable library allows user to specify a quiet big size
for user's created hash table, the table may be shrunk to a
very small size - HASH_MIN_SIZE(4) after object is removed from
the table at the first time. Subsequently, even if the total amount
of objects saved in the table is quite lower than user's initial
setting in a long time, the hash table size is still dynamically
adjusted by rhashtable_shrink() or rhashtable_expand() each time
object is inserted or removed from the table. However, as
synchronize_rcu() has to be called when table is shrunk or
expanded by the two functions, we should permit user to set the
minimum table size through configuring the minimum number of shifts
according to user specific requirement, avoiding these expensive
actions of shrinking or expanding because of calling synchronize_rcu().

Signed-off-by: Ying Xue <ying.xue@windriver.com>
Acked-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rhashtable.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index 36826c0..fb298e9d 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -44,6 +44,7 @@ struct rhashtable;
  * @head_offset: Offset of rhash_head in struct to be hashed
  * @hash_rnd: Seed to use while hashing
  * @max_shift: Maximum number of shifts while expanding
+ * @min_shift: Minimum number of shifts while shrinking
  * @hashfn: Function to hash key
  * @obj_hashfn: Function to hash object
  * @grow_decision: If defined, may return true if table should expand
@@ -57,6 +58,7 @@ struct rhashtable_params {
 	size_t			head_offset;
 	u32			hash_rnd;
 	size_t			max_shift;
+	size_t			min_shift;
 	rht_hashfn_t		hashfn;
 	rht_obj_hashfn_t	obj_hashfn;
 	bool			(*grow_decision)(const struct rhashtable *ht,
-- 
cgit v1.1


From 87fed556d08d21dd7dd3e0222c94c187e4c2d5e2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <zajec5@gmail.com>
Date: Wed, 3 Sep 2014 10:35:13 +0200
Subject: bcma: get info about flash type SoC booted from
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There is an ongoing work on cleaning MIPS's nvram support so it could be
re-used on other platforms (bcm53xx to say precisely).
This will require a bit of extra logic in bcma this patch implements.

Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/bcma/bcma_regs.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/bcma/bcma_regs.h b/include/linux/bcma/bcma_regs.h
index 917dcd7..e64ae7b 100644
--- a/include/linux/bcma/bcma_regs.h
+++ b/include/linux/bcma/bcma_regs.h
@@ -39,6 +39,11 @@
 #define  BCMA_RESET_CTL_RESET		0x0001
 #define BCMA_RESET_ST			0x0804
 
+#define BCMA_NS_ROM_IOST_BOOT_DEV_MASK	0x0003
+#define BCMA_NS_ROM_IOST_BOOT_DEV_NOR	0x0000
+#define BCMA_NS_ROM_IOST_BOOT_DEV_NAND	0x0001
+#define BCMA_NS_ROM_IOST_BOOT_DEV_ROM	0x0002
+
 /* BCMA PCI config space registers. */
 #define BCMA_PCI_PMCSR			0x44
 #define  BCMA_PCI_PE			0x100
-- 
cgit v1.1


From 2f711939d2ea9dfaecebecd1324d2ec7a7a21f65 Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Tue, 2 Sep 2014 15:49:25 +0200
Subject: ipv6: add sysctl_mld_qrv to configure query robustness variable

This patch adds a new sysctl_mld_qrv knob to configure the mldv1/v2 query
robustness variable. It specifies how many retransmit of unsolicited mld
retransmit should happen. Admins might want to tune this on lossy links.

Also reset mld state on interface down/up, so we pick up new sysctl
settings during interface up event.

IPv6 certification requests this knob to be available.

I didn't make this knob netns specific, as it is mostly a setting in a
physical environment and should be per host.

Cc: Flavio Leitner <fbl@redhat.com>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Acked-by: Flavio Leitner <fbl@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ipv6.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index a2db816..7e247e9 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -121,6 +121,7 @@ struct frag_hdr {
 
 /* sysctls */
 extern int sysctl_mld_max_msf;
+extern int sysctl_mld_qrv;
 
 #define _DEVINC(net, statname, modifier, idev, field)			\
 ({									\
-- 
cgit v1.1


From a9fe8e29945d56f35235a3a0fba99b4cf181d211 Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Tue, 2 Sep 2014 15:49:26 +0200
Subject: ipv4: implement igmp_qrv sysctl to tune igmp robustness variable

As in IPv6 people might increase the igmp query robustness variable to
make sure unsolicited state change reports aren't lost on the network. Add
and document this new knob to igmp code.

RFCs allow tuning this parameter back to first IGMP RFC, so we also use
this setting for all counters, including source specific multicast.

Also take over sysctl value when upping the interface and don't reuse
the last one seen on the interface.

Cc: Flavio Leitner <fbl@redhat.com>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Acked-by: Flavio Leitner <fbl@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/igmp.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/igmp.h b/include/linux/igmp.h
index f47550d..2c677af 100644
--- a/include/linux/igmp.h
+++ b/include/linux/igmp.h
@@ -39,6 +39,7 @@ static inline struct igmpv3_query *
 
 extern int sysctl_igmp_max_memberships;
 extern int sysctl_igmp_max_msf;
+extern int sysctl_igmp_qrv;
 
 struct ip_sf_socklist {
 	unsigned int		sl_max;
-- 
cgit v1.1


From d98ad83ee86e523cc00cbf425f456fbd14b4fdc4 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 3 Sep 2014 15:24:57 +0300
Subject: mac80211: add Intel Mobile Communications copyright

Our legal structure changed at some point (see wikipedia), but
we forgot to immediately switch over to the new copyright
notice.

For files that we have modified in the time since the change,
add the proper copyright notice now.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index c9b2bec..db4975c 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -4,6 +4,7 @@
  * Copyright 2002-2005, Devicescape Software, Inc.
  * Copyright 2006-2007	Jiri Benc <jbenc@suse.cz>
  * Copyright 2007-2010	Johannes Berg <johannes@sipsolutions.net>
+ * Copyright 2013-2014  Intel Mobile Communications GmbH
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
-- 
cgit v1.1


From 2740f0cf8ec8bc7ee6a58f68841759e367dda98f Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 3 Sep 2014 15:24:58 +0300
Subject: cfg80211: add Intel Mobile Communications copyright

Our legal structure changed at some point (see wikipedia), but
we forgot to immediately switch over to the new copyright
notice.

For files that we have modified in the time since the change,
add the proper copyright notice now.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 1 +
 include/net/cfg80211.h    | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 8018c91..77d4f26 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -6,6 +6,7 @@
  * Copyright (c) 2002-2003, Jouni Malinen <jkmaline@cc.hut.fi>
  * Copyright (c) 2005, Devicescape Software, Inc.
  * Copyright (c) 2006, Michael Wu <flamingice@sourmilk.net>
+ * Copyright (c) 2013 - 2014 Intel Mobile Communications GmbH
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index ab21299..6be68bb 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -4,6 +4,7 @@
  * 802.11 device and configuration interface
  *
  * Copyright 2006-2010	Johannes Berg <johannes@sipsolutions.net>
+ * Copyright 2013-2014 Intel Mobile Communications GmbH
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
-- 
cgit v1.1


From 1c7e23bf50264a251de53ad9fb1604683b801258 Mon Sep 17 00:00:00 2001
From: Assaf Krauss <assaf.krauss@intel.com>
Date: Wed, 3 Sep 2014 15:25:00 +0300
Subject: nl80211: Allow declaring RRM-related features

Radio Resource Measurement (RRM) is a bundle of features which will
require the entire stack to participate.
In this patch, the driver is given the opportunity to advertise the
device's support for these RRM-related features, using feature flags:
1. Support for Quiet IEs.
2. Support for adding DS Parameter Set IE to probe requests.
3. Support for adding WFA TPC Report IE to probe requests.
4. Support for inserting tx power value to tx-ed packets at a fixed
   offset. This is used in action frames, such as RRM's Link
   Measurement Report, where the actual tx power should be reported
   in the frame.

Signed-off-by: Assaf Krauss <assaf.krauss@intel.com>
Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index d097568..c166d3d 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -3968,6 +3968,16 @@ enum nl80211_ap_sme_features {
  * @NL80211_FEATURE_AP_MODE_CHAN_WIDTH_CHANGE: This driver supports dynamic
  *	channel bandwidth change (e.g., HT 20 <-> 40 MHz channel) during the
  *	lifetime of a BSS.
+ * @NL80211_FEATURE_DS_PARAM_SET_IE_IN_PROBES: This device adds a DS Parameter
+ *	Set IE to probe requests.
+ * @NL80211_FEATURE_WFA_TPC_IE_IN_PROBES: This device adds a WFA TPC Report IE
+ *	to probe requests.
+ * @NL80211_FEATURE_QUIET: This device, in client mode, supports Quiet Period
+ *	requests sent to it by an AP.
+ * @NL80211_FEATURE_TX_POWER_INSERTION: This device is capable of inserting the
+ *	current tx power value into the TPC Report IE in the spectrum
+ *	management TPC Report action frame, and in the Radio Measurement Link
+ *	Measurement Report action frame.
  */
 enum nl80211_feature_flags {
 	NL80211_FEATURE_SK_TX_STATUS			= 1 << 0,
@@ -3989,6 +3999,10 @@ enum nl80211_feature_flags {
 	NL80211_FEATURE_USERSPACE_MPM			= 1 << 16,
 	NL80211_FEATURE_ACTIVE_MONITOR			= 1 << 17,
 	NL80211_FEATURE_AP_MODE_CHAN_WIDTH_CHANGE	= 1 << 18,
+	NL80211_FEATURE_DS_PARAM_SET_IE_IN_PROBES	= 1 << 19,
+	NL80211_FEATURE_WFA_TPC_IE_IN_PROBES		= 1 << 20,
+	NL80211_FEATURE_QUIET				= 1 << 21,
+	NL80211_FEATURE_TX_POWER_INSERTION		= 1 << 22,
 };
 
 /**
-- 
cgit v1.1


From bab5ab7d2a5466406e8003d038cc7ce6b2d5d804 Mon Sep 17 00:00:00 2001
From: Assaf Krauss <assaf.krauss@intel.com>
Date: Wed, 3 Sep 2014 15:25:01 +0300
Subject: nl80211: Add flag attribute for RRM connections

Add a flag attribute to use in associations, for tagging the target
connection as supporting RRM. It is the responsibility of upper
layers to set this flag only if both the underlying device, and the
target network indeed support RRM.
To be used in ASSOCIATE and CONNECT commands.

Signed-off-by: Assaf Krauss <assaf.krauss@intel.com>
Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       |  2 ++
 include/uapi/linux/nl80211.h | 13 +++++++++++++
 2 files changed, 15 insertions(+)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 6be68bb..a887eeb 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1608,10 +1608,12 @@ struct cfg80211_auth_request {
  *
  * @ASSOC_REQ_DISABLE_HT:  Disable HT (802.11n)
  * @ASSOC_REQ_DISABLE_VHT:  Disable VHT
+ * @ASSOC_REQ_USE_RRM: Declare RRM capability in this association
  */
 enum cfg80211_assoc_req_flags {
 	ASSOC_REQ_DISABLE_HT		= BIT(0),
 	ASSOC_REQ_DISABLE_VHT		= BIT(1),
+	ASSOC_REQ_USE_RRM		= BIT(2),
 };
 
 /**
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index c166d3d..de69d3d 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -1594,6 +1594,17 @@ enum nl80211_commands {
  * @NL80211_ATTR_TDLS_INITIATOR: flag attribute indicating the current end is
  *	the TDLS link initiator.
  *
+ * @NL80211_ATTR_USE_RRM: flag for indicating whether the current connection
+ *	shall support Radio Resource Measurements (11k). This attribute can be
+ *	used with %NL80211_CMD_ASSOCIATE and %NL80211_CMD_CONNECT requests.
+ *	User space applications are expected to use this flag only if the
+ *	underlying device supports these minimal RRM features:
+ *		%NL80211_FEATURE_DS_PARAM_SET_IE_IN_PROBES,
+ *		%NL80211_FEATURE_QUIET,
+ *	If this flag is used, driver must add the Power Capabilities IE to the
+ *	association request. In addition, it must also set the RRM capability
+ *	flag in the association request's Capability Info field.
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -1936,6 +1947,8 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_TDLS_INITIATOR,
 
+	NL80211_ATTR_USE_RRM,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
-- 
cgit v1.1


From 3057dbfdab1b86a77ed6d512fc857b032f78663b Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo.bianconi83@gmail.com>
Date: Thu, 4 Sep 2014 23:57:40 +0200
Subject: cfg80211: enable dynack through nl80211

Enable ACK timeout estimation algorithm (dynack) using mac80211
set_coverage_class API. Dynack is activated passing coverage class equals to -1
to lower drivers and it is automatically disabled setting valid value for
coverage class.
Define NL80211_ATTR_WIPHY_DYN_ACK flag attribute to enable dynack from
userspace. In order to activate dynack NL80211_FEATURE_ACKTO_ESTIMATION feature
flag must be set by lower drivers to indicate dynack capability.

Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi83@gmail.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       |  2 ++
 include/uapi/linux/nl80211.h | 12 ++++++++++++
 2 files changed, 14 insertions(+)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index a887eeb..0d17ec9 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1805,6 +1805,7 @@ struct cfg80211_connect_params {
  * @WIPHY_PARAM_FRAG_THRESHOLD: wiphy->frag_threshold has changed
  * @WIPHY_PARAM_RTS_THRESHOLD: wiphy->rts_threshold has changed
  * @WIPHY_PARAM_COVERAGE_CLASS: coverage class changed
+ * @WIPHY_PARAM_DYN_ACK: dynack has been enabled
  */
 enum wiphy_params_flags {
 	WIPHY_PARAM_RETRY_SHORT		= 1 << 0,
@@ -1812,6 +1813,7 @@ enum wiphy_params_flags {
 	WIPHY_PARAM_FRAG_THRESHOLD	= 1 << 2,
 	WIPHY_PARAM_RTS_THRESHOLD	= 1 << 3,
 	WIPHY_PARAM_COVERAGE_CLASS	= 1 << 4,
+	WIPHY_PARAM_DYN_ACK		= 1 << 5,
 };
 
 /*
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index de69d3d..29c4399 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -1605,6 +1605,12 @@ enum nl80211_commands {
  *	association request. In addition, it must also set the RRM capability
  *	flag in the association request's Capability Info field.
  *
+ * @NL80211_ATTR_WIPHY_DYN_ACK: flag attribute used to enable ACK timeout
+ *	estimation algorithm (dynack). In order to activate dynack
+ *	%NL80211_FEATURE_ACKTO_ESTIMATION feature flag must be set by lower
+ *	drivers to indicate dynack capability. Dynack is automatically disabled
+ *	setting valid value for coverage class.
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -1949,6 +1955,8 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_USE_RRM,
 
+	NL80211_ATTR_WIPHY_DYN_ACK,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -3991,6 +3999,9 @@ enum nl80211_ap_sme_features {
  *	current tx power value into the TPC Report IE in the spectrum
  *	management TPC Report action frame, and in the Radio Measurement Link
  *	Measurement Report action frame.
+ * @NL80211_FEATURE_ACKTO_ESTIMATION: This driver supports dynamic ACK timeout
+ *	estimation (dynack). %NL80211_ATTR_WIPHY_DYN_ACK flag attribute is used
+ *	to enable dynack.
  */
 enum nl80211_feature_flags {
 	NL80211_FEATURE_SK_TX_STATUS			= 1 << 0,
@@ -4016,6 +4027,7 @@ enum nl80211_feature_flags {
 	NL80211_FEATURE_WFA_TPC_IE_IN_PROBES		= 1 << 20,
 	NL80211_FEATURE_QUIET				= 1 << 21,
 	NL80211_FEATURE_TX_POWER_INSERTION		= 1 << 22,
+	NL80211_FEATURE_ACKTO_ESTIMATION		= 1 << 23,
 };
 
 /**
-- 
cgit v1.1


From a4bcaf5556da649f0160e60fa7b4bb2c29801c12 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo.bianconi83@gmail.com>
Date: Thu, 4 Sep 2014 23:57:41 +0200
Subject: mac80211: extend set_coverage_class signature

Extend mac80211 set_coverage_class API in order to enable ACK timeout
estimation algorithm (dynack) passing coverage class equals to -1
to lower drivers. Synchronize set_coverage_class routine signature with
mac80211 function pointer for p54, ath9k, ath9k_htc and ath5k drivers.

Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi83@gmail.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index db4975c..c6e6a68 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -2673,7 +2673,9 @@ enum ieee80211_roc_type {
  *
  * @set_coverage_class: Set slot time for given coverage class as specified
  *	in IEEE 802.11-2007 section 17.3.8.6 and modify ACK timeout
- *	accordingly. This callback is not required and may sleep.
+ *	accordingly; coverage class equals to -1 to enable ACK timeout
+ *	estimation algorithm (dynack). To disable dynack set valid value for
+ *	coverage class. This callback is not required and may sleep.
  *
  * @testmode_cmd: Implement a cfg80211 test mode command. The passed @vif may
  *	be %NULL. The callback can sleep.
@@ -2957,7 +2959,7 @@ struct ieee80211_ops {
 	int (*get_survey)(struct ieee80211_hw *hw, int idx,
 		struct survey_info *survey);
 	void (*rfkill_poll)(struct ieee80211_hw *hw);
-	void (*set_coverage_class)(struct ieee80211_hw *hw, u8 coverage_class);
+	void (*set_coverage_class)(struct ieee80211_hw *hw, s16 coverage_class);
 #ifdef CONFIG_NL80211_TESTMODE
 	int (*testmode_cmd)(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
 			    void *data, int len);
-- 
cgit v1.1


From 60a3b2253c413cf601783b070507d7dd6620c954 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Tue, 2 Sep 2014 22:53:44 +0200
Subject: net: bpf: make eBPF interpreter images read-only

With eBPF getting more extended and exposure to user space is on it's way,
hardening the memory range the interpreter uses to steer its command flow
seems appropriate.  This patch moves the to be interpreted bytecode to
read-only pages.

In case we execute a corrupted BPF interpreter image for some reason e.g.
caused by an attacker which got past a verifier stage, it would not only
provide arbitrary read/write memory access but arbitrary function calls
as well. After setting up the BPF interpreter image, its contents do not
change until destruction time, thus we can setup the image on immutable
made pages in order to mitigate modifications to that code. The idea
is derived from commit 314beb9bcabf ("x86: bpf_jit_comp: secure bpf jit
against spraying attacks").

This is possible because bpf_prog is not part of sk_filter anymore.
After setup bpf_prog cannot be altered during its life-time. This prevents
any modifications to the entire bpf_prog structure (incl. function/JIT
image pointer).

Every eBPF program (including classic BPF that are migrated) have to call
bpf_prog_select_runtime() to select either interpreter or a JIT image
as a last setup step, and they all are being freed via bpf_prog_free(),
including non-JIT. Therefore, we can easily integrate this into the
eBPF life-time, plus since we directly allocate a bpf_prog, we have no
performance penalty.

Tested with seccomp and test_bpf testsuite in JIT/non-JIT mode and manual
inspection of kernel_page_tables.  Brad Spengler proposed the same idea
via Twitter during development of this patch.

Joint work with Hannes Frederic Sowa.

Suggested-by: Brad Spengler <spender@grsecurity.net>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Cc: Kees Cook <keescook@chromium.org>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/filter.h | 49 ++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 44 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index a5227ab..c789945 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -9,6 +9,11 @@
 #include <linux/skbuff.h>
 #include <linux/workqueue.h>
 #include <uapi/linux/filter.h>
+#include <asm/cacheflush.h>
+
+struct sk_buff;
+struct sock;
+struct seccomp_data;
 
 /* Internally used and optimized filter representation with extended
  * instruction set based on top of classic BPF.
@@ -320,20 +325,23 @@ struct sock_fprog_kern {
 	struct sock_filter	*filter;
 };
 
-struct sk_buff;
-struct sock;
-struct seccomp_data;
+struct bpf_work_struct {
+	struct bpf_prog *prog;
+	struct work_struct work;
+};
 
 struct bpf_prog {
+	u32			pages;		/* Number of allocated pages */
 	u32			jited:1,	/* Is our filter JIT'ed? */
 				len:31;		/* Number of filter blocks */
 	struct sock_fprog_kern	*orig_prog;	/* Original BPF program */
+	struct bpf_work_struct	*work;		/* Deferred free work struct */
 	unsigned int		(*bpf_func)(const struct sk_buff *skb,
 					    const struct bpf_insn *filter);
+	/* Instructions for interpreter */
 	union {
 		struct sock_filter	insns[0];
 		struct bpf_insn		insnsi[0];
-		struct work_struct	work;
 	};
 };
 
@@ -353,6 +361,26 @@ static inline unsigned int bpf_prog_size(unsigned int proglen)
 
 #define bpf_classic_proglen(fprog) (fprog->len * sizeof(fprog->filter[0]))
 
+#ifdef CONFIG_DEBUG_SET_MODULE_RONX
+static inline void bpf_prog_lock_ro(struct bpf_prog *fp)
+{
+	set_memory_ro((unsigned long)fp, fp->pages);
+}
+
+static inline void bpf_prog_unlock_ro(struct bpf_prog *fp)
+{
+	set_memory_rw((unsigned long)fp, fp->pages);
+}
+#else
+static inline void bpf_prog_lock_ro(struct bpf_prog *fp)
+{
+}
+
+static inline void bpf_prog_unlock_ro(struct bpf_prog *fp)
+{
+}
+#endif /* CONFIG_DEBUG_SET_MODULE_RONX */
+
 int sk_filter(struct sock *sk, struct sk_buff *skb);
 
 void bpf_prog_select_runtime(struct bpf_prog *fp);
@@ -361,6 +389,17 @@ void bpf_prog_free(struct bpf_prog *fp);
 int bpf_convert_filter(struct sock_filter *prog, int len,
 		       struct bpf_insn *new_prog, int *new_len);
 
+struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags);
+struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size,
+				  gfp_t gfp_extra_flags);
+void __bpf_prog_free(struct bpf_prog *fp);
+
+static inline void bpf_prog_unlock_free(struct bpf_prog *fp)
+{
+	bpf_prog_unlock_ro(fp);
+	__bpf_prog_free(fp);
+}
+
 int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog);
 void bpf_prog_destroy(struct bpf_prog *fp);
 
@@ -450,7 +489,7 @@ static inline void bpf_jit_compile(struct bpf_prog *fp)
 
 static inline void bpf_jit_free(struct bpf_prog *fp)
 {
-	kfree(fp);
+	bpf_prog_unlock_free(fp);
 }
 #endif /* CONFIG_BPF_JIT */
 
-- 
cgit v1.1


From f0db9b073415848709dd59a6394969882f517da9 Mon Sep 17 00:00:00 2001
From: Govindarajulu Varadarajan <_govind@gmx.com>
Date: Wed, 3 Sep 2014 03:17:20 +0530
Subject: ethtool: Add generic options for tunables

This patch adds new ethtool cmd, ETHTOOL_GTUNABLE & ETHTOOL_STUNABLE for getting
tunable values from driver.

Add get_tunable and set_tunable to ethtool_ops. Driver implements these
functions for getting/setting tunable value.

Signed-off-by: Govindarajulu Varadarajan <_govind@gmx.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h      |  4 ++++
 include/uapi/linux/ethtool.h | 28 ++++++++++++++++++++++++++++
 2 files changed, 32 insertions(+)

(limited to 'include')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index e658229..c1a2d60 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -257,6 +257,10 @@ struct ethtool_ops {
 				     struct ethtool_eeprom *, u8 *);
 	int	(*get_eee)(struct net_device *, struct ethtool_eee *);
 	int	(*set_eee)(struct net_device *, struct ethtool_eee *);
+	int	(*get_tunable)(struct net_device *,
+			       const struct ethtool_tunable *, void *);
+	int	(*set_tunable)(struct net_device *,
+			       const struct ethtool_tunable *, const void *);
 
 
 };
diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index e3c7a71..7a364f2 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -209,6 +209,32 @@ struct ethtool_value {
 	__u32	data;
 };
 
+enum tunable_id {
+	ETHTOOL_ID_UNSPEC,
+	ETHTOOL_RX_COPYBREAK,
+};
+
+enum tunable_type_id {
+	ETHTOOL_TUNABLE_UNSPEC,
+	ETHTOOL_TUNABLE_U8,
+	ETHTOOL_TUNABLE_U16,
+	ETHTOOL_TUNABLE_U32,
+	ETHTOOL_TUNABLE_U64,
+	ETHTOOL_TUNABLE_STRING,
+	ETHTOOL_TUNABLE_S8,
+	ETHTOOL_TUNABLE_S16,
+	ETHTOOL_TUNABLE_S32,
+	ETHTOOL_TUNABLE_S64,
+};
+
+struct ethtool_tunable {
+	__u32	cmd;
+	__u32	id;
+	__u32	type_id;
+	__u32	len;
+	void	*data[0];
+};
+
 /**
  * struct ethtool_regs - hardware register dump
  * @cmd: Command number = %ETHTOOL_GREGS
@@ -1152,6 +1178,8 @@ enum ethtool_sfeatures_retval_bits {
 
 #define ETHTOOL_GRSSH		0x00000046 /* Get RX flow hash configuration */
 #define ETHTOOL_SRSSH		0x00000047 /* Set RX flow hash configuration */
+#define ETHTOOL_GTUNABLE	0x00000048 /* Get tunable configuration */
+#define ETHTOOL_STUNABLE	0x00000049 /* Set tunable configuration */
 
 /* compatibility with older code */
 #define SPARC_ETH_GSET		ETHTOOL_GSET
-- 
cgit v1.1


From caa415270c732505240bb60171c44a7838c555e8 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 3 Sep 2014 22:21:56 -0700
Subject: ipv4: fix a race in update_or_create_fnhe()

nh_exceptions is effectively used under rcu, but lacks proper
barriers. Between kzalloc() and setting of nh->nh_exceptions(),
we need a proper memory barrier.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Fixes: 4895c771c7f00 ("ipv4: Add FIB nexthop exceptions.")
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip_fib.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 9922093..f30fd55 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -87,7 +87,7 @@ struct fib_nh {
 	int			nh_saddr_genid;
 	struct rtable __rcu * __percpu *nh_pcpu_rth_output;
 	struct rtable __rcu	*nh_rth_input;
-	struct fnhe_hash_bucket	*nh_exceptions;
+	struct fnhe_hash_bucket	__rcu *nh_exceptions;
 };
 
 /*
-- 
cgit v1.1


From d546c621542df9e45eedc91f35356e887ac63b7b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 4 Sep 2014 08:21:31 -0700
Subject: ipv4: harden fnhe_hashfun()

Lets make this hash function a bit secure, as ICMP attacks are still
in the wild.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip_fib.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index f30fd55..dc9d2a2 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -65,7 +65,8 @@ struct fnhe_hash_bucket {
 	struct fib_nh_exception __rcu	*chain;
 };
 
-#define FNHE_HASH_SIZE		2048
+#define FNHE_HASH_SHIFT		11
+#define FNHE_HASH_SIZE		(1 << FNHE_HASH_SHIFT)
 #define FNHE_RECLAIM_DEPTH	5
 
 struct fib_nh {
-- 
cgit v1.1


From 62bccb8cdb69051b95a55ab0c489e3cab261c8ef Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Thu, 4 Sep 2014 13:31:35 -0400
Subject: net-timestamp: Make the clone operation stand-alone from phy
 timestamping

The phy timestamping takes a different path than the regular timestamping
does in that it will create a clone first so that the packets needing to be
timestamped can be placed in a queue, or the context block could be used.

In order to support these use cases I am pulling the core of the code out
so it can be used in other drivers beyond just phy devices.

In addition I have added a destructor named sock_efree which is meant to
provide a simple way for dropping the reference to skb exceptions that
aren't part of either the receive or send windows for the socket, and I
have removed some duplication in spots where this destructor could be used
in place of sock_edemux.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 2 ++
 include/net/sock.h     | 1 +
 2 files changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 02529fc..1cf0cfa 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2690,6 +2690,8 @@ static inline ktime_t net_invalid_timestamp(void)
 	return ktime_set(0, 0);
 }
 
+struct sk_buff *skb_clone_sk(struct sk_buff *skb);
+
 #ifdef CONFIG_NETWORK_PHY_TIMESTAMPING
 
 void skb_clone_tx_timestamp(struct sk_buff *skb);
diff --git a/include/net/sock.h b/include/net/sock.h
index 3fde613..e02be37 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1574,6 +1574,7 @@ struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
 void sock_wfree(struct sk_buff *skb);
 void skb_orphan_partial(struct sk_buff *skb);
 void sock_rfree(struct sk_buff *skb);
+void sock_efree(struct sk_buff *skb);
 void sock_edemux(struct sk_buff *skb);
 
 int sock_setsockopt(struct socket *sock, int level, int op,
-- 
cgit v1.1


From 82eabd9eb2ec1603282a2c3f74dfcb6fe0aaea0e Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Thu, 4 Sep 2014 13:32:11 -0400
Subject: net: merge cases where sock_efree and sock_edemux are the same
 function

Since sock_efree and sock_demux are essentially the same code for non-TCP
sockets and the case where CONFIG_INET is not defined we can combine the
code or replace the call to sock_edemux in several spots.  As a result we
can avoid a bit of unnecessary code or code duplication.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/net/sock.h b/include/net/sock.h
index e02be37..ad23e80 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1575,7 +1575,11 @@ void sock_wfree(struct sk_buff *skb);
 void skb_orphan_partial(struct sk_buff *skb);
 void sock_rfree(struct sk_buff *skb);
 void sock_efree(struct sk_buff *skb);
+#ifdef CONFIG_INET
 void sock_edemux(struct sk_buff *skb);
+#else
+#define sock_edemux(skb) sock_efree(skb)
+#endif
 
 int sock_setsockopt(struct socket *sock, int level, int op,
 		    char __user *optval, unsigned int optlen);
-- 
cgit v1.1


From 56193d1bce2b2759cb4bdcc00cd05544894a0c90 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Fri, 5 Sep 2014 19:20:26 -0400
Subject: net: Add function for parsing the header length out of linear
 ethernet frames

This patch updates some of the flow_dissector api so that it can be used to
parse the length of ethernet buffers stored in fragments.  Most of the
changes needed were to __skb_get_poff as it needed to be updated to support
sending a linear buffer instead of a skb.

I have split __skb_get_poff into two functions, the first is skb_get_poff
and it retains the functionality of the original __skb_get_poff.  The other
function is __skb_get_poff which now works much like __skb_flow_dissect in
relation to skb_flow_dissect in that it provides the same functionality but
works with just a data buffer and hlen instead of needing an skb.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/etherdevice.h | 1 +
 include/linux/skbuff.h      | 4 +++-
 include/net/flow_keys.h     | 2 ++
 3 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index 9c5529d..733980f 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -29,6 +29,7 @@
 #include <asm/bitsperlong.h>
 
 #ifdef __KERNEL__
+u32 eth_get_headlen(void *data, unsigned int max_len);
 __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev);
 extern const struct header_ops eth_header_ops;
 
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 1cf0cfa..07c9fdd 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3218,7 +3218,9 @@ bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off);
 
 int skb_checksum_setup(struct sk_buff *skb, bool recalculate);
 
-u32 __skb_get_poff(const struct sk_buff *skb);
+u32 skb_get_poff(const struct sk_buff *skb);
+u32 __skb_get_poff(const struct sk_buff *skb, void *data,
+		   const struct flow_keys *keys, int hlen);
 
 /**
  * skb_head_is_locked - Determine if the skb->head is locked down
diff --git a/include/net/flow_keys.h b/include/net/flow_keys.h
index 9a03f73..7ee2df0 100644
--- a/include/net/flow_keys.h
+++ b/include/net/flow_keys.h
@@ -40,4 +40,6 @@ static inline __be32 skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8
 	return __skb_flow_get_ports(skb, thoff, ip_proto, NULL, 0);
 }
 u32 flow_hash_from_keys(struct flow_keys *keys);
+unsigned int flow_get_hlen(const unsigned char *data, unsigned int max_len,
+			   __be16 protocol);
 #endif
-- 
cgit v1.1


From 04317dafd11dd7b0ec19b85f098414abae6ed5f7 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 5 Sep 2014 15:33:32 -0700
Subject: tcp: introduce TCP_SKB_CB(skb)->tcp_tw_isn

TCP_SKB_CB(skb)->when has different meaning in output and input paths.

In output path, it contains a timestamp.
In input path, it contains an ISN, chosen by tcp_timewait_state_process()

Lets add a different name to ease code comprehension.

Note that 'when' field will disappear in following patch,
as skb_mstamp already contains timestamp, the anonymous
union will promptly disappear as well.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 590e01a..0cd7d2c 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -698,7 +698,12 @@ struct tcp_skb_cb {
 	} header;	/* For incoming frames		*/
 	__u32		seq;		/* Starting sequence number	*/
 	__u32		end_seq;	/* SEQ + FIN + SYN + datalen	*/
-	__u32		when;		/* used to compute rtt's	*/
+	union {
+		/* used in output path */
+		__u32		when;	/* used to compute rtt's	*/
+		/* used in input path */
+		__u32		tcp_tw_isn; /* isn chosen by tcp_timewait_state_process() */
+	};
 	__u8		tcp_flags;	/* TCP header flags. (tcp[13])	*/
 
 	__u8		sacked;		/* State flags for SACK/FACK.	*/
-- 
cgit v1.1


From 7faee5c0d514162853a343d93e4a0b6bb8bfec21 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 5 Sep 2014 15:33:33 -0700
Subject: tcp: remove TCP_SKB_CB(skb)->when

After commit 740b0f1841f6 ("tcp: switch rtt estimations to usec resolution"),
we no longer need to maintain timestamps in two different fields.

TCP_SKB_CB(skb)->when can be removed, as same information sits in skb_mstamp.stamp_jiffies

Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 0cd7d2c..a4201ef 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -672,6 +672,12 @@ void tcp_send_window_probe(struct sock *sk);
  */
 #define tcp_time_stamp		((__u32)(jiffies))
 
+static inline u32 tcp_skb_timestamp(const struct sk_buff *skb)
+{
+	return skb->skb_mstamp.stamp_jiffies;
+}
+
+
 #define tcp_flag_byte(th) (((u_int8_t *)th)[13])
 
 #define TCPHDR_FIN 0x01
@@ -698,12 +704,7 @@ struct tcp_skb_cb {
 	} header;	/* For incoming frames		*/
 	__u32		seq;		/* Starting sequence number	*/
 	__u32		end_seq;	/* SEQ + FIN + SYN + datalen	*/
-	union {
-		/* used in output path */
-		__u32		when;	/* used to compute rtt's	*/
-		/* used in input path */
-		__u32		tcp_tw_isn; /* isn chosen by tcp_timewait_state_process() */
-	};
+	__u32		tcp_tw_isn;	/* isn chosen by tcp_timewait_state_process() */
 	__u8		tcp_flags;	/* TCP header flags. (tcp[13])	*/
 
 	__u8		sacked;		/* State flags for SACK/FACK.	*/
-- 
cgit v1.1


From c8d6591752e96c550cb98b781326d72d8eedcc79 Mon Sep 17 00:00:00 2001
From: "Steinar H. Gunderson" <sgunderson@bigfoot.com>
Date: Wed, 3 Sep 2014 06:48:37 -0700
Subject: mac80211: support DTPC IE (from Cisco Client eXtensions)

Linux already supports 802.11h, where the access point can tell the
client to reduce its transmission power. However, 802.11h is only
defined for 5 GHz, where the need for this is much smaller than on
2.4 GHz.

Cisco has their own solution, called DTPC (Dynamic Transmit Power
Control). Cisco APs on a controller sometimes but not always send
802.11h; they always send DTPC, even on 2.4 GHz. This patch adds support
for parsing and honoring the DTPC IE in addition to the 802.11h
element (they do not always contain the same limits, so both must
be honored); the format is not documented, but very simple.

Tested (on top of wireless.git and on 3.16.1) against a Cisco Aironet
1142 joined to a Cisco 2504 WLC, by setting various transmit power
levels for the given access points and observing the results.
The Wireshark 802.11 dissector agrees with the interpretation of the
element, except for negative numbers, which seem to never happen
anyway.

Signed-off-by: Steinar H. Gunderson <sgunderson@bigfoot.com>
Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
---
 include/linux/ieee80211.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 77d4f26..61a09f0 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1824,7 +1824,8 @@ enum ieee80211_eid {
 	WLAN_EID_DMG_TSPEC = 146,
 	WLAN_EID_DMG_AT = 147,
 	WLAN_EID_DMG_CAP = 148,
-	/* 149-150 reserved for Cisco */
+	/* 149 reserved for Cisco */
+	WLAN_EID_CISCO_VENDOR_SPECIFIC = 150,
 	WLAN_EID_DMG_OPERATION = 151,
 	WLAN_EID_DMG_BSS_PARAM_CHANGE = 152,
 	WLAN_EID_DMG_BEAM_REFINEMENT = 153,
-- 
cgit v1.1


From c16900cf285ca240f0f84117bf8b88a03c55469b Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Fri, 15 Aug 2014 21:17:06 +0300
Subject: Bluetooth: Fix hci_conn reference counting for fixed channels

Now that SMP has been converted to use fixed channels we've got a bit of
a problem with the hci_conn reference counting. So far the L2CAP code
has kept a reference for each L2CAP channel that was notified of the
connection. With SMP however this would mean that the connection is
never dropped even though there are no other users of it. Furthermore,
SMP already does its own hci_conn reference counting internally,
starting from a security or pairing request and ending with the key
distribution.

This patch makes L2CAP fixed channels default to the L2CAP core not
keeping a hci_conn reference for them. A new FLAG_HOLD_HCI_CONN flag is
added so that L2CAP users can declare an exception to this rule and hold
a reference even for their fixed channels. One such exception is the
L2CAP socket layer which does want a reference for each socket (e.g. an
ATT socket which uses a fixed channel).

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/l2cap.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index cedda39..1558ecc 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -711,6 +711,7 @@ enum {
 	FLAG_DEFER_SETUP,
 	FLAG_LE_CONN_REQ_SENT,
 	FLAG_PENDING_SECURITY,
+	FLAG_HOLD_HCI_CONN,
 };
 
 enum {
-- 
cgit v1.1


From 51bb8457ddfa74ede52bf8c02054dea831d59fff Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Fri, 15 Aug 2014 21:06:57 +0300
Subject: Bluetooth: Improve *_get() functions to return the object type

It's natural to have *_get() functions that increment the reference
count of an object to return the object type itself. This way it's
simple to make a copy of the object pointer and increase the reference
count in a single step. This patch updates two such get() functions,
namely hci_conn_get() and l2cap_conn_get(), and updates the users to
take advantage of the new API.

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h | 3 ++-
 include/net/bluetooth/l2cap.h    | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index b0ded13..aa75eee 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -756,9 +756,10 @@ void hci_le_conn_failed(struct hci_conn *conn, u8 status);
  * _get()/_drop() in it, but require the caller to have a valid ref (FIXME).
  */
 
-static inline void hci_conn_get(struct hci_conn *conn)
+static inline struct hci_conn *hci_conn_get(struct hci_conn *conn)
 {
 	get_device(&conn->dev);
+	return conn;
 }
 
 static inline void hci_conn_put(struct hci_conn *conn)
diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index 1558ecc..8f1652e 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -948,7 +948,7 @@ void l2cap_logical_cfm(struct l2cap_chan *chan, struct hci_chan *hchan,
 void __l2cap_physical_cfm(struct l2cap_chan *chan, int result);
 
 void l2cap_conn_shutdown(struct l2cap_conn *conn, int err);
-void l2cap_conn_get(struct l2cap_conn *conn);
+struct l2cap_conn *l2cap_conn_get(struct l2cap_conn *conn);
 void l2cap_conn_put(struct l2cap_conn *conn);
 
 int l2cap_register_user(struct l2cap_conn *conn, struct l2cap_user *user);
-- 
cgit v1.1


From eb78d7e53d144995b9e023b151de19fa40af72f3 Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Mon, 18 Aug 2014 00:41:41 +0300
Subject: Bluetooth: Use zero timeout for immediate scheduling

There's no point in passing a "small" timeout to queue_delayed_work() to
try to get the callback faster scheduled. Passing 0 is perfectly valid
and will cause a shortcut to a direct queue_work().

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index aa75eee..18c24f6 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -791,7 +791,7 @@ static inline void hci_conn_drop(struct hci_conn *conn)
 				if (!conn->out)
 					timeo *= 2;
 			} else {
-				timeo = msecs_to_jiffies(10);
+				timeo = 0;
 			}
 			break;
 
@@ -800,7 +800,7 @@ static inline void hci_conn_drop(struct hci_conn *conn)
 			break;
 
 		default:
-			timeo = msecs_to_jiffies(10);
+			timeo = 0;
 			break;
 		}
 
-- 
cgit v1.1


From f94b665dcf15324f5ac8aa639e47be0829b6409d Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Mon, 18 Aug 2014 00:41:44 +0300
Subject: Bluetooth: Ignore incoming data after initiating disconnection

When hci_chan_del is called the disconnection routines get scheduled
through a workqueue. If there's any incoming ACL data before the
routines get executed there's a chance that a new hci_chan is created
and the disconnection never happens. This patch adds a new hci_conn flag
to indicate that we're in the process of driving the connection down. We
set the flag in hci_chan_del and check for it in hci_chan_create so that
no new channels are created for the same connection.

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 18c24f6..dbe7364 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -553,6 +553,7 @@ enum {
 	HCI_CONN_FIPS,
 	HCI_CONN_STK_ENCRYPT,
 	HCI_CONN_AUTH_INITIATOR,
+	HCI_CONN_DROP,
 };
 
 static inline bool hci_conn_ssp_enabled(struct hci_conn *conn)
-- 
cgit v1.1


From b04afa0c280b7e7ced88692251d75a78c8fcb2a7 Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Mon, 18 Aug 2014 20:33:30 +0300
Subject: Bluetooth: Remove unused l2cap_conn_shutdown API

Now that there are no more users of the l2cap_conn_shutdown API (since
smp.c switched to using hci_disconnect) we can simply remove it along
with all of it's l2cap_conn variables.

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/l2cap.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index 8f1652e..be25edd 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -625,9 +625,6 @@ struct l2cap_conn {
 
 	struct delayed_work	info_timer;
 
-	int			disconn_err;
-	struct work_struct	disconn_work;
-
 	struct sk_buff		*rx_skb;
 	__u32			rx_len;
 	__u8			tx_ident;
@@ -947,7 +944,6 @@ void l2cap_logical_cfm(struct l2cap_chan *chan, struct hci_chan *hchan,
 		       u8 status);
 void __l2cap_physical_cfm(struct l2cap_chan *chan, int result);
 
-void l2cap_conn_shutdown(struct l2cap_conn *conn, int err);
 struct l2cap_conn *l2cap_conn_get(struct l2cap_conn *conn);
 void l2cap_conn_put(struct l2cap_conn *conn);
 
-- 
cgit v1.1


From e3b679d56caa2bc555dee646a6ac5861631e7a28 Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Mon, 18 Aug 2014 20:33:32 +0300
Subject: Bluetooth: Update hci_disconnect() to return an error value

We'll soon use hci_disconnect() from places that are interested to know
whether the hci_send_cmd() really succeeded or not. This patch updates
hci_disconnect() to pass on any error returned from hci_send_cmd().

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index dbe7364..2b6e04d3 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -703,7 +703,7 @@ static inline struct hci_conn *hci_conn_hash_lookup_state(struct hci_dev *hdev,
 	return NULL;
 }
 
-void hci_disconnect(struct hci_conn *conn, __u8 reason);
+int hci_disconnect(struct hci_conn *conn, __u8 reason);
 bool hci_setup_sync(struct hci_conn *conn, __u16 handle);
 void hci_sco_setup(struct hci_conn *conn, __u8 status);
 
-- 
cgit v1.1


From f3d82d0c8ec025fc113408e3ad5775fed5a060ff Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Fri, 5 Sep 2014 22:19:50 +0300
Subject: Bluetooth: Move identity address update behind a workqueue

The identity address update of all channels for an l2cap_conn needs to
take the lock for each channel, i.e. it's safest to do this by a
separate workqueue callback.

Previously this was partially solved by moving the entire SMP key
distribution behind a workqueue. However, if we want SMP context locking
to be correct and safe we should always use the l2cap_chan lock when
accessing it, meaning even smp_distribute_keys needs to take that lock
which would once again create a dead lock when updating the identity
address.

The simplest way to solve this is to have l2cap_conn manage the deferred
work which is what this patch does. A subsequent patch will remove the
now unnecessary SMP key distribution work struct.

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/l2cap.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index be25edd..ead99f0 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -633,6 +633,8 @@ struct l2cap_conn {
 	struct sk_buff_head	pending_rx;
 	struct work_struct	pending_rx_work;
 
+	struct work_struct	id_addr_update_work;
+
 	__u8			disc_reason;
 
 	struct l2cap_chan	*smp;
@@ -937,7 +939,6 @@ int l2cap_ertm_init(struct l2cap_chan *chan);
 void l2cap_chan_add(struct l2cap_conn *conn, struct l2cap_chan *chan);
 void __l2cap_chan_add(struct l2cap_conn *conn, struct l2cap_chan *chan);
 void l2cap_chan_del(struct l2cap_chan *chan, int err);
-void l2cap_conn_update_id_addr(struct hci_conn *hcon);
 void l2cap_send_conn_req(struct l2cap_chan *chan);
 void l2cap_move_start(struct l2cap_chan *chan);
 void l2cap_logical_cfm(struct l2cap_chan *chan, struct hci_chan *hchan,
-- 
cgit v1.1


From fc75cc8684d21d3649b28c4c37d4ce3f000759e4 Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Fri, 5 Sep 2014 22:19:52 +0300
Subject: Bluetooth: Fix locking of the SMP context

Before the move the l2cap_chan the SMP context (smp_chan) didn't have
any kind of proper locking. The best there existed was the
HCI_CONN_LE_SMP_PEND flag which was used to enable mutual exclusion for
potential multiple creators of the SMP context.

Now that SMP has been converted to use the l2cap_chan infrastructure and
since the SMP context is directly mapped to a corresponding l2cap_chan
we get the SMP context locking essentially for free through the
l2cap_chan lock. For all callbacks that l2cap_core.c makes for each
channel implementation (smp.c in the case of SMP) the l2cap_chan lock is
held through l2cap_chan_lock(chan).

Since the calls from l2cap_core.c to smp.c are covered the only missing
piece to have the locking implemented properly is to ensure that the
lock is held for any other call path that may access the SMP context.
This means user responses through mgmt.c, requests to elevate the
security of a connection through hci_conn.c, as well as any deferred
work through workqueues.

This patch adds the necessary locking to all these other code paths that
try to access the SMP context. Since mutual exclusion for the l2cap_chan
access is now covered from all directions the patch also removes
unnecessary HCI_CONN_LE_SMP_PEND flag (once we've acquired the chan lock
we can simply check whether chan->smp is set to know if there's an SMP
context).

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 2b6e04d3..045d913 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -539,7 +539,6 @@ enum {
 	HCI_CONN_RSWITCH_PEND,
 	HCI_CONN_MODE_CHANGE_PEND,
 	HCI_CONN_SCO_SETUP_PEND,
-	HCI_CONN_LE_SMP_PEND,
 	HCI_CONN_MGMT_CONNECTED,
 	HCI_CONN_SSP_ENABLED,
 	HCI_CONN_SC_ENABLED,
-- 
cgit v1.1


From a7f26b7e1ee73ac9e766c430fea5af658d839954 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Mon, 8 Sep 2014 19:08:34 -0400
Subject: inet: remove dead inetpeer sequence code

inetpeer sequence numbers are no longer incremented, so no need to
check and flush the tree. The function that increments the sequence
number was already dead code and removed in in "ipv4: remove unused
function" (068a6e18). Remove the code that checks for a change, too.

Verifying that v4_seq and v6_seq are never incremented and thus that
flush_check compares bp->flush_seq to 0 is trivial.

The second part of the change removes flush_check completely even
though bp->flush_seq is exactly !0 once, at initialization. This
change is correct because the time this branch is true is when
bp->root == peer_avl_empty_rcu, in which the branch and
inetpeer_invalidate_tree are a NOOP.

Signed-off-by: Willem de Bruijn <willemb@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inetpeer.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h
index 01d590e..80479ab 100644
--- a/include/net/inetpeer.h
+++ b/include/net/inetpeer.h
@@ -61,7 +61,6 @@ struct inet_peer {
 struct inet_peer_base {
 	struct inet_peer __rcu	*root;
 	seqlock_t		lock;
-	u32			flush_seq;
 	int			total;
 };
 
-- 
cgit v1.1


From e1e930f591bfd9604c3077f0af5c390f4f890259 Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Mon, 8 Sep 2014 17:09:49 -0700
Subject: Bluetooth: Fix mgmt pairing failure when authentication fails

Whether through HCI with BR/EDR or SMP with LE when authentication fails
we should also notify any pending Pair Device mgmt command. This patch
updates the mgmt_auth_failed function to take the actual hci_conn object
and makes sure that any pending pairing command is notified and cleaned
up appropriately.

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 045d913..206b92b 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -1342,8 +1342,7 @@ int mgmt_user_passkey_neg_reply_complete(struct hci_dev *hdev, bdaddr_t *bdaddr,
 int mgmt_user_passkey_notify(struct hci_dev *hdev, bdaddr_t *bdaddr,
 			     u8 link_type, u8 addr_type, u32 passkey,
 			     u8 entered);
-void mgmt_auth_failed(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type,
-		      u8 addr_type, u8 status);
+void mgmt_auth_failed(struct hci_conn *conn, u8 status);
 void mgmt_auth_enable_complete(struct hci_dev *hdev, u8 status);
 void mgmt_ssp_enable_complete(struct hci_dev *hdev, u8 enable, u8 status);
 void mgmt_sc_enable_complete(struct hci_dev *hdev, u8 enable, u8 status);
-- 
cgit v1.1


From 2a5538e9aa4929329813bee69922c9ae4990fcad Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 25 Aug 2014 12:05:27 +0200
Subject: netfilter: nat: move specific NAT IPv6 to core

Move the specific NAT IPv6 core functions that are called from the
hooks from ip6table_nat.c to nf_nat_l3proto_ipv6.c. This prepares the
ground to allow iptables and nft to use the same NAT engine code that
comes in a follow up patch.

This also renames nf_nat_ipv6_fn to nft_nat_ipv6_fn in
net/ipv6/netfilter/nft_chain_nat_ipv6.c to avoid a compilation breakage.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_nat_l3proto.h | 37 ++++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

(limited to 'include')

diff --git a/include/net/netfilter/nf_nat_l3proto.h b/include/net/netfilter/nf_nat_l3proto.h
index bc2d515..340c013 100644
--- a/include/net/netfilter/nf_nat_l3proto.h
+++ b/include/net/netfilter/nf_nat_l3proto.h
@@ -84,4 +84,41 @@ int nf_nat_icmpv6_reply_translation(struct sk_buff *skb, struct nf_conn *ct,
 				    enum ip_conntrack_info ctinfo,
 				    unsigned int hooknum, unsigned int hdrlen);
 
+unsigned int nf_nat_ipv6_in(const struct nf_hook_ops *ops, struct sk_buff *skb,
+			    const struct net_device *in,
+			    const struct net_device *out,
+			    unsigned int (*do_chain)(const struct nf_hook_ops *ops,
+						     struct sk_buff *skb,
+						     const struct net_device *in,
+						     const struct net_device *out,
+						     struct nf_conn *ct));
+
+unsigned int nf_nat_ipv6_out(const struct nf_hook_ops *ops, struct sk_buff *skb,
+			     const struct net_device *in,
+			     const struct net_device *out,
+			     unsigned int (*do_chain)(const struct nf_hook_ops *ops,
+						      struct sk_buff *skb,
+						      const struct net_device *in,
+						      const struct net_device *out,
+						      struct nf_conn *ct));
+
+unsigned int nf_nat_ipv6_local_fn(const struct nf_hook_ops *ops,
+				  struct sk_buff *skb,
+				  const struct net_device *in,
+				  const struct net_device *out,
+				  unsigned int (*do_chain)(const struct nf_hook_ops *ops,
+							   struct sk_buff *skb,
+							   const struct net_device *in,
+							   const struct net_device *out,
+							   struct nf_conn *ct));
+
+unsigned int nf_nat_ipv6_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
+			    const struct net_device *in,
+			    const struct net_device *out,
+			    unsigned int (*do_chain)(const struct nf_hook_ops *ops,
+						     struct sk_buff *skb,
+						     const struct net_device *in,
+						     const struct net_device *out,
+						     struct nf_conn *ct));
+
 #endif /* _NF_NAT_L3PROTO_H */
-- 
cgit v1.1


From 3045d76070abe725dbb7fd8ff39c27b820d5a7eb Mon Sep 17 00:00:00 2001
From: Ana Rey <anarey@gmail.com>
Date: Tue, 2 Sep 2014 20:36:14 +0200
Subject: netfilter: nf_tables: add devgroup support in meta expresion

Add devgroup support to let us match device group of a packets incoming
or outgoing interface.

Signed-off-by: Ana Rey <anarey@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/nf_tables.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index c9b6f00..c000947 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -573,6 +573,8 @@ enum nft_exthdr_attributes {
  * @NFT_META_BRI_OIFNAME: packet output bridge interface name
  * @NFT_META_PKTTYPE: packet type (skb->pkt_type), special handling for loopback
  * @NFT_META_CPU: cpu id through smp_processor_id()
+ * @NFT_META_IIFGROUP: packet input interface group
+ * @NFT_META_OIFGROUP: packet output interface group
  */
 enum nft_meta_keys {
 	NFT_META_LEN,
@@ -596,6 +598,8 @@ enum nft_meta_keys {
 	NFT_META_BRI_OIFNAME,
 	NFT_META_PKTTYPE,
 	NFT_META_CPU,
+	NFT_META_IIFGROUP,
+	NFT_META_OIFGROUP,
 };
 
 /**
-- 
cgit v1.1


From e42eff8a32f8b7bde88ea3c5a56391407cbe84f3 Mon Sep 17 00:00:00 2001
From: Arturo Borrero <arturo.borrero.glez@gmail.com>
Date: Thu, 4 Sep 2014 14:06:14 +0200
Subject: netfilter: nft_nat: include a flag attribute

Both SNAT and DNAT (and the upcoming masquerade) can have additional
configuration parameters, such as port randomization and NAT addressing
persistence. We can cover these scenarios by simply adding a flag
attribute for userspace to fill when needed.

The flags to use are defined in include/uapi/linux/netfilter/nf_nat.h:

 NF_NAT_RANGE_MAP_IPS
 NF_NAT_RANGE_PROTO_SPECIFIED
 NF_NAT_RANGE_PROTO_RANDOM
 NF_NAT_RANGE_PERSISTENT
 NF_NAT_RANGE_PROTO_RANDOM_FULLY
 NF_NAT_RANGE_PROTO_RANDOM_ALL

The caller must take care of not messing up with the flags, as they are
added unconditionally to the final resulting nf_nat_range.

Signed-off-by: Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/nf_nat.h    | 5 +++++
 include/uapi/linux/netfilter/nf_tables.h | 2 ++
 2 files changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/netfilter/nf_nat.h b/include/uapi/linux/netfilter/nf_nat.h
index 1ad3659..0880781 100644
--- a/include/uapi/linux/netfilter/nf_nat.h
+++ b/include/uapi/linux/netfilter/nf_nat.h
@@ -13,6 +13,11 @@
 #define NF_NAT_RANGE_PROTO_RANDOM_ALL		\
 	(NF_NAT_RANGE_PROTO_RANDOM | NF_NAT_RANGE_PROTO_RANDOM_FULLY)
 
+#define NF_NAT_RANGE_MASK					\
+	(NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED |	\
+	 NF_NAT_RANGE_PROTO_RANDOM | NF_NAT_RANGE_PERSISTENT |	\
+	 NF_NAT_RANGE_PROTO_RANDOM_FULLY)
+
 struct nf_nat_ipv4_range {
 	unsigned int			flags;
 	__be32				min_ip;
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index c000947..6022c6e 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -785,6 +785,7 @@ enum nft_nat_types {
  * @NFTA_NAT_REG_ADDR_MAX: source register of address range end (NLA_U32: nft_registers)
  * @NFTA_NAT_REG_PROTO_MIN: source register of proto range start (NLA_U32: nft_registers)
  * @NFTA_NAT_REG_PROTO_MAX: source register of proto range end (NLA_U32: nft_registers)
+ * @NFTA_NAT_FLAGS: NAT flags (see NF_NAT_RANGE_* in linux/netfilter/nf_nat.h) (NLA_U32)
  */
 enum nft_nat_attributes {
 	NFTA_NAT_UNSPEC,
@@ -794,6 +795,7 @@ enum nft_nat_attributes {
 	NFTA_NAT_REG_ADDR_MAX,
 	NFTA_NAT_REG_PROTO_MIN,
 	NFTA_NAT_REG_PROTO_MAX,
+	NFTA_NAT_FLAGS,
 	__NFTA_NAT_MAX
 };
 #define NFTA_NAT_MAX		(__NFTA_NAT_MAX - 1)
-- 
cgit v1.1


From 8dd33cc93ec92b8460ed2ad98c6db39276f6a72b Mon Sep 17 00:00:00 2001
From: Arturo Borrero <arturo.borrero.glez@gmail.com>
Date: Thu, 4 Sep 2014 14:06:33 +0200
Subject: netfilter: nf_nat: generalize IPv4 masquerading support for nf_tables

Let's refactor the code so we can reach the masquerade functionality
from outside the xt context (ie. nftables).

The patch includes the addition of an atomic counter to the masquerade
notifier: the stuff to be done by the notifier is the same for xt and
nftables. Therefore, only one notification handler is needed.

This factorization only involves IPv4; a similar patch follows to
handle IPv6.

Signed-off-by: Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/ipv4/nf_nat_masquerade.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)
 create mode 100644 include/net/netfilter/ipv4/nf_nat_masquerade.h

(limited to 'include')

diff --git a/include/net/netfilter/ipv4/nf_nat_masquerade.h b/include/net/netfilter/ipv4/nf_nat_masquerade.h
new file mode 100644
index 0000000..a9c001c
--- /dev/null
+++ b/include/net/netfilter/ipv4/nf_nat_masquerade.h
@@ -0,0 +1,14 @@
+#ifndef _NF_NAT_MASQUERADE_IPV4_H_
+#define _NF_NAT_MASQUERADE_IPV4_H_
+
+#include <net/netfilter/nf_nat.h>
+
+unsigned int
+nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
+		       const struct nf_nat_range *range,
+		       const struct net_device *out);
+
+void nf_nat_masquerade_ipv4_register_notifier(void);
+void nf_nat_masquerade_ipv4_unregister_notifier(void);
+
+#endif /*_NF_NAT_MASQUERADE_IPV4_H_ */
-- 
cgit v1.1


From be6b635cd674add9410efa9ac6f03e0040848b12 Mon Sep 17 00:00:00 2001
From: Arturo Borrero <arturo.borrero.glez@gmail.com>
Date: Thu, 4 Sep 2014 14:06:49 +0200
Subject: netfilter: nf_nat: generalize IPv6 masquerading support for nf_tables

Let's refactor the code so we can reach the masquerade functionality
from outside the xt context (ie. nftables).

The patch includes the addition of an atomic counter to the masquerade
notifier: the stuff to be done by the notifier is the same for xt and
nftables. Therefore, only one notification handler is needed.

This factorization only involves IPv6; a similar patch exists to
handle IPv4.

Signed-off-by: Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/ipv6/nf_nat_masquerade.h | 10 ++++++++++
 1 file changed, 10 insertions(+)
 create mode 100644 include/net/netfilter/ipv6/nf_nat_masquerade.h

(limited to 'include')

diff --git a/include/net/netfilter/ipv6/nf_nat_masquerade.h b/include/net/netfilter/ipv6/nf_nat_masquerade.h
new file mode 100644
index 0000000..0a13396
--- /dev/null
+++ b/include/net/netfilter/ipv6/nf_nat_masquerade.h
@@ -0,0 +1,10 @@
+#ifndef _NF_NAT_MASQUERADE_IPV6_H_
+#define _NF_NAT_MASQUERADE_IPV6_H_
+
+unsigned int
+nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range *range,
+		       const struct net_device *out);
+void nf_nat_masquerade_ipv6_register_notifier(void);
+void nf_nat_masquerade_ipv6_unregister_notifier(void);
+
+#endif /* _NF_NAT_MASQUERADE_IPV6_H_ */
-- 
cgit v1.1


From 9ba1f726bec090399eb9bb9157eb32dedc8e8c45 Mon Sep 17 00:00:00 2001
From: Arturo Borrero <arturo.borrero.glez@gmail.com>
Date: Mon, 8 Sep 2014 13:45:00 +0200
Subject: netfilter: nf_tables: add new nft_masq expression

The nft_masq expression is intended to perform NAT in the masquerade flavour.

We decided to have the masquerade functionality in a separated expression other
than nft_nat.

Signed-off-by: Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nft_masq.h         | 16 ++++++++++++++++
 include/uapi/linux/netfilter/nf_tables.h | 11 +++++++++++
 2 files changed, 27 insertions(+)
 create mode 100644 include/net/netfilter/nft_masq.h

(limited to 'include')

diff --git a/include/net/netfilter/nft_masq.h b/include/net/netfilter/nft_masq.h
new file mode 100644
index 0000000..c72729f
--- /dev/null
+++ b/include/net/netfilter/nft_masq.h
@@ -0,0 +1,16 @@
+#ifndef _NFT_MASQ_H_
+#define _NFT_MASQ_H_
+
+struct nft_masq {
+	u32	flags;
+};
+
+extern const struct nla_policy nft_masq_policy[];
+
+int nft_masq_init(const struct nft_ctx *ctx,
+		  const struct nft_expr *expr,
+		  const struct nlattr * const tb[]);
+
+int nft_masq_dump(struct sk_buff *skb, const struct nft_expr *expr);
+
+#endif /* _NFT_MASQ_H_ */
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 6022c6e..eeec0ae 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -800,4 +800,15 @@ enum nft_nat_attributes {
 };
 #define NFTA_NAT_MAX		(__NFTA_NAT_MAX - 1)
 
+/**
+ * enum nft_masq_attributes - nf_tables masquerade expression attributes
+ *
+ * @NFTA_MASQ_FLAGS: NAT flags (see NF_NAT_RANGE_* in linux/netfilter/nf_nat.h) (NLA_U32)
+ */
+enum nft_masq_attributes {
+	NFTA_MASQ_FLAGS,
+	__NFTA_MASQ_MAX
+};
+#define NFTA_MASQ_MAX		(__NFTA_MASQ_MAX - 1)
+
 #endif /* _LINUX_NF_TABLES_H */
-- 
cgit v1.1


From 02ab695bb37ee9ad515df0d0790d5977505dd04a Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Thu, 4 Sep 2014 22:17:17 -0700
Subject: net: filter: add "load 64-bit immediate" eBPF instruction

add BPF_LD_IMM64 instruction to load 64-bit immediate value into a register.
All previous instructions were 8-byte. This is first 16-byte instruction.
Two consecutive 'struct bpf_insn' blocks are interpreted as single instruction:
insn[0].code = BPF_LD | BPF_DW | BPF_IMM
insn[0].dst_reg = destination register
insn[0].imm = lower 32-bit
insn[1].code = 0
insn[1].imm = upper 32-bit
All unused fields must be zero.

Classic BPF has similar instruction: BPF_LD | BPF_W | BPF_IMM
which loads 32-bit immediate value into a register.

x64 JITs it as single 'movabsq %rax, imm64'
arm64 may JIT as sequence of four 'movk x0, #imm16, lsl #shift' insn

Note that old eBPF programs are binary compatible with new interpreter.

It helps eBPF programs load 64-bit constant into a register with one
instruction instead of using two registers and 4 instructions:
BPF_MOV32_IMM(R1, imm32)
BPF_ALU64_IMM(BPF_LSH, R1, 32)
BPF_MOV32_IMM(R2, imm32)
BPF_ALU64_REG(BPF_OR, R1, R2)

User space generated programs will use this instruction to load constants only.

To tell kernel that user space needs a pointer the _pseudo_ variant of
this instruction may be added later, which will use extra bits of encoding
to indicate what type of pointer user space is asking kernel to provide.
For example 'off' or 'src_reg' fields can be used for such purpose.
src_reg = 1 could mean that user space is asking kernel to validate and
load in-kernel map pointer.
src_reg = 2 could mean that user space needs readonly data section pointer
src_reg = 3 could mean that user space needs a pointer to per-cpu local data
All such future pseudo instructions will not be carrying the actual pointer
as part of the instruction, but rather will be treated as a request to kernel
to provide one. The kernel will verify the request_for_a_pointer, then
will drop _pseudo_ marking and will store actual internal pointer inside
the instruction, so the end result is the interpreter and JITs never
see pseudo BPF_LD_IMM64 insns and only operate on generic BPF_LD_IMM64 that
loads 64-bit immediate into a register. User space never operates on direct
pointers and verifier can easily recognize request_for_pointer vs other
instructions.

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/filter.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'include')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index c789945..bf323da 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -166,6 +166,24 @@ enum {
 		.off   = 0,					\
 		.imm   = IMM })
 
+/* BPF_LD_IMM64 macro encodes single 'load 64-bit immediate' insn */
+#define BPF_LD_IMM64(DST, IMM)					\
+	BPF_LD_IMM64_RAW(DST, 0, IMM)
+
+#define BPF_LD_IMM64_RAW(DST, SRC, IMM)				\
+	((struct bpf_insn) {					\
+		.code  = BPF_LD | BPF_DW | BPF_IMM,		\
+		.dst_reg = DST,					\
+		.src_reg = SRC,					\
+		.off   = 0,					\
+		.imm   = (__u32) (IMM) }),			\
+	((struct bpf_insn) {					\
+		.code  = 0, /* zero is reserved opcode */	\
+		.dst_reg = 0,					\
+		.src_reg = 0,					\
+		.off   = 0,					\
+		.imm   = ((__u64) (IMM)) >> 32 })
+
 /* Short form of mov based on type, BPF_X: dst_reg = src_reg, BPF_K: dst_reg = imm32 */
 
 #define BPF_MOV64_RAW(TYPE, DST, SRC, IMM)			\
-- 
cgit v1.1


From daedfb22451dd02b35c0549566cbb7cc06bdd53b Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Thu, 4 Sep 2014 22:17:18 -0700
Subject: net: filter: split filter.h and expose eBPF to user space

allow user space to generate eBPF programs

uapi/linux/bpf.h: eBPF instruction set definition

linux/filter.h: the rest

This patch only moves macro definitions, but practically it freezes existing
eBPF instruction set, though new instructions can still be added in the future.

These eBPF definitions cannot go into uapi/linux/filter.h, since the names
may conflict with existing applications.

Full eBPF ISA description is in Documentation/networking/filter.txt

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Acked-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/filter.h    | 56 +---------------------------------------
 include/uapi/linux/Kbuild |  1 +
 include/uapi/linux/bpf.h  | 65 +++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 67 insertions(+), 55 deletions(-)
 create mode 100644 include/uapi/linux/bpf.h

(limited to 'include')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index bf323da..8f82ef3 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -10,58 +10,12 @@
 #include <linux/workqueue.h>
 #include <uapi/linux/filter.h>
 #include <asm/cacheflush.h>
+#include <uapi/linux/bpf.h>
 
 struct sk_buff;
 struct sock;
 struct seccomp_data;
 
-/* Internally used and optimized filter representation with extended
- * instruction set based on top of classic BPF.
- */
-
-/* instruction classes */
-#define BPF_ALU64	0x07	/* alu mode in double word width */
-
-/* ld/ldx fields */
-#define BPF_DW		0x18	/* double word */
-#define BPF_XADD	0xc0	/* exclusive add */
-
-/* alu/jmp fields */
-#define BPF_MOV		0xb0	/* mov reg to reg */
-#define BPF_ARSH	0xc0	/* sign extending arithmetic shift right */
-
-/* change endianness of a register */
-#define BPF_END		0xd0	/* flags for endianness conversion: */
-#define BPF_TO_LE	0x00	/* convert to little-endian */
-#define BPF_TO_BE	0x08	/* convert to big-endian */
-#define BPF_FROM_LE	BPF_TO_LE
-#define BPF_FROM_BE	BPF_TO_BE
-
-#define BPF_JNE		0x50	/* jump != */
-#define BPF_JSGT	0x60	/* SGT is signed '>', GT in x86 */
-#define BPF_JSGE	0x70	/* SGE is signed '>=', GE in x86 */
-#define BPF_CALL	0x80	/* function call */
-#define BPF_EXIT	0x90	/* function return */
-
-/* Register numbers */
-enum {
-	BPF_REG_0 = 0,
-	BPF_REG_1,
-	BPF_REG_2,
-	BPF_REG_3,
-	BPF_REG_4,
-	BPF_REG_5,
-	BPF_REG_6,
-	BPF_REG_7,
-	BPF_REG_8,
-	BPF_REG_9,
-	BPF_REG_10,
-	__MAX_BPF_REG,
-};
-
-/* BPF has 10 general purpose 64-bit registers and stack frame. */
-#define MAX_BPF_REG	__MAX_BPF_REG
-
 /* ArgX, context and stack frame pointer register positions. Note,
  * Arg1, Arg2, Arg3, etc are used as argument mappings of function
  * calls in BPF_CALL instruction.
@@ -322,14 +276,6 @@ enum {
 #define SK_RUN_FILTER(filter, ctx) \
 	(*filter->prog->bpf_func)(ctx, filter->prog->insnsi)
 
-struct bpf_insn {
-	__u8	code;		/* opcode */
-	__u8	dst_reg:4;	/* dest register */
-	__u8	src_reg:4;	/* source register */
-	__s16	off;		/* signed offset */
-	__s32	imm;		/* signed immediate constant */
-};
-
 #ifdef CONFIG_COMPAT
 /* A struct sock_filter is architecture independent. */
 struct compat_sock_fprog {
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index 24e9033..fb3f7b6 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -67,6 +67,7 @@ header-y += bfs_fs.h
 header-y += binfmts.h
 header-y += blkpg.h
 header-y += blktrace_api.h
+header-y += bpf.h
 header-y += bpqether.h
 header-y += bsg.h
 header-y += btrfs.h
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
new file mode 100644
index 0000000..479ed0b
--- /dev/null
+++ b/include/uapi/linux/bpf.h
@@ -0,0 +1,65 @@
+/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#ifndef _UAPI__LINUX_BPF_H__
+#define _UAPI__LINUX_BPF_H__
+
+#include <linux/types.h>
+
+/* Extended instruction set based on top of classic BPF */
+
+/* instruction classes */
+#define BPF_ALU64	0x07	/* alu mode in double word width */
+
+/* ld/ldx fields */
+#define BPF_DW		0x18	/* double word */
+#define BPF_XADD	0xc0	/* exclusive add */
+
+/* alu/jmp fields */
+#define BPF_MOV		0xb0	/* mov reg to reg */
+#define BPF_ARSH	0xc0	/* sign extending arithmetic shift right */
+
+/* change endianness of a register */
+#define BPF_END		0xd0	/* flags for endianness conversion: */
+#define BPF_TO_LE	0x00	/* convert to little-endian */
+#define BPF_TO_BE	0x08	/* convert to big-endian */
+#define BPF_FROM_LE	BPF_TO_LE
+#define BPF_FROM_BE	BPF_TO_BE
+
+#define BPF_JNE		0x50	/* jump != */
+#define BPF_JSGT	0x60	/* SGT is signed '>', GT in x86 */
+#define BPF_JSGE	0x70	/* SGE is signed '>=', GE in x86 */
+#define BPF_CALL	0x80	/* function call */
+#define BPF_EXIT	0x90	/* function return */
+
+/* Register numbers */
+enum {
+	BPF_REG_0 = 0,
+	BPF_REG_1,
+	BPF_REG_2,
+	BPF_REG_3,
+	BPF_REG_4,
+	BPF_REG_5,
+	BPF_REG_6,
+	BPF_REG_7,
+	BPF_REG_8,
+	BPF_REG_9,
+	BPF_REG_10,
+	__MAX_BPF_REG,
+};
+
+/* BPF has 10 general purpose 64-bit registers and stack frame. */
+#define MAX_BPF_REG	__MAX_BPF_REG
+
+struct bpf_insn {
+	__u8	code;		/* opcode */
+	__u8	dst_reg:4;	/* dest register */
+	__u8	src_reg:4;	/* source register */
+	__s16	off;		/* signed offset */
+	__s32	imm;		/* signed immediate constant */
+};
+
+#endif /* _UAPI__LINUX_BPF_H__ */
-- 
cgit v1.1


From 49a601589caaf0e93194c0cc9b4ecddbe75dd2d5 Mon Sep 17 00:00:00 2001
From: Vincent Bernat <vincent@bernat.im>
Date: Fri, 5 Sep 2014 15:09:03 +0200
Subject: net/ipv4: bind ip_nonlocal_bind to current netns

net.ipv4.ip_nonlocal_bind sysctl was global to all network
namespaces. This patch allows to set a different value for each
network namespace.

Signed-off-by: Vincent Bernat <vincent@bernat.im>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h         | 2 --
 include/net/netns/ipv4.h | 1 +
 2 files changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/ip.h b/include/net/ip.h
index c8fd611..14bfc8e 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -229,8 +229,6 @@ static inline int inet_is_local_reserved_port(struct net *net, int port)
 }
 #endif
 
-extern int sysctl_ip_nonlocal_bind;
-
 /* From inetpeer.c */
 extern int inet_peer_threshold;
 extern int inet_peer_minttl;
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index aec5e12..24945ce 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -76,6 +76,7 @@ struct netns_ipv4 {
 	int sysctl_tcp_ecn;
 	int sysctl_ip_no_pmtu_disc;
 	int sysctl_ip_fwd_use_pmtu;
+	int sysctl_ip_nonlocal_bind;
 
 	int sysctl_fwmark_reflect;
 	int sysctl_tcp_fwmark_accept;
-- 
cgit v1.1


From e5c3ea5c668033b303e7ac835d7d91da32d97958 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@resnulli.us>
Date: Fri, 5 Sep 2014 15:51:31 +0200
Subject: bridge: implement rtnl_link_ops->get_size and
 rtnl_link_ops->fill_info

Allow rtnetlink users to get bridge master info in IFLA_INFO_DATA attr
This initial part implements forward_delay, hello_time, max_age options.

Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_link.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index ff95760..c80f95f 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -215,6 +215,18 @@ enum in6_addr_gen_mode {
 	IN6_ADDR_GEN_MODE_NONE,
 };
 
+/* Bridge section */
+
+enum {
+	IFLA_BR_UNSPEC,
+	IFLA_BR_FORWARD_DELAY,
+	IFLA_BR_HELLO_TIME,
+	IFLA_BR_MAX_AGE,
+	__IFLA_BR_MAX,
+};
+
+#define IFLA_BR_MAX	(__IFLA_BR_MAX - 1)
+
 enum {
 	BRIDGE_MODE_UNSPEC,
 	BRIDGE_MODE_HAIRPIN,
-- 
cgit v1.1


From dc8ecdd3a3fccf73fcb07711cde064ce5727f9d1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <zajec5@gmail.com>
Date: Mon, 1 Sep 2014 23:11:06 +0200
Subject: bcma: move bus struct setup into early part of host specific code
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This change is important for SoC host. In future we will want to know
chip ID (needed for early MIPS boot) before doing cores scanning.

Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
Acked-by: Hauke Mehrtens <hauke@hauke-m.de>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/bcma/bcma.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h
index 0272e49..c1ba87d 100644
--- a/include/linux/bcma/bcma.h
+++ b/include/linux/bcma/bcma.h
@@ -332,7 +332,6 @@ struct bcma_bus {
 	struct bcma_device *mapped_core;
 	struct list_head cores;
 	u8 nr_cores;
-	u8 init_done:1;
 	u8 num;
 
 	struct bcma_drv_cc drv_cc;
-- 
cgit v1.1


From a395135ddebb0a06052b84c309eb6cb68b79c797 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <zajec5@gmail.com>
Date: Mon, 1 Sep 2014 23:11:07 +0200
Subject: bcma: use separated function to initialize bus on SoC
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This is required to split SoC bus init into two phases. The later one
(which includes scanning) should be called when kalloc is available.

Cc: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
Acked-by: Hauke Mehrtens <hauke@hauke-m.de>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/bcma/bcma_soc.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/bcma/bcma_soc.h b/include/linux/bcma/bcma_soc.h
index 4203c55..f24d245 100644
--- a/include/linux/bcma/bcma_soc.h
+++ b/include/linux/bcma/bcma_soc.h
@@ -10,6 +10,7 @@ struct bcma_soc {
 };
 
 int __init bcma_host_soc_register(struct bcma_soc *soc);
+int __init bcma_host_soc_init(struct bcma_soc *soc);
 
 int bcma_bus_register(struct bcma_bus *bus);
 
-- 
cgit v1.1


From 23a2f39c8f4035eade7f226eb7ada30c78d9eee3 Mon Sep 17 00:00:00 2001
From: Hauke Mehrtens <hauke@hauke-m.de>
Date: Mon, 8 Sep 2014 22:53:35 +0200
Subject: bcma: store more alternative addresses

Each core could have more than one alternative address. There are cores
with 8 alternative addresses for different functions. The PHY control
in the Chip common B core is done through the 2. alternative address
and not the first one.

Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
CC: linux-usb@vger.kernel.org
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/bcma/bcma.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h
index c1ba87d..7fc16c9 100644
--- a/include/linux/bcma/bcma.h
+++ b/include/linux/bcma/bcma.h
@@ -267,7 +267,7 @@ struct bcma_device {
 	u8 core_unit;
 
 	u32 addr;
-	u32 addr1;
+	u32 addr_s[8];
 	u32 wrap;
 
 	void __iomem *io_addr;
-- 
cgit v1.1


From 1716bcf3f76fe71e98d4851a3eb73ea3d93d4773 Mon Sep 17 00:00:00 2001
From: Hauke Mehrtens <hauke@hauke-m.de>
Date: Mon, 8 Sep 2014 22:53:36 +0200
Subject: bcma: add support for chipcommon B core

This core is used on BCM4708 to configure the PCIe and USB3 PHYs and it
contains the addresses to the Device Management unit. This will be used
by the PCIe driver first.

Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/bcma/bcma.h                   | 1 +
 include/linux/bcma/bcma_driver_chipcommon.h | 8 ++++++++
 2 files changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h
index 7fc16c9..6345979 100644
--- a/include/linux/bcma/bcma.h
+++ b/include/linux/bcma/bcma.h
@@ -335,6 +335,7 @@ struct bcma_bus {
 	u8 num;
 
 	struct bcma_drv_cc drv_cc;
+	struct bcma_drv_cc_b drv_cc_b;
 	struct bcma_drv_pci drv_pci[2];
 	struct bcma_drv_pcie2 drv_pcie2;
 	struct bcma_drv_mips drv_mips;
diff --git a/include/linux/bcma/bcma_driver_chipcommon.h b/include/linux/bcma/bcma_driver_chipcommon.h
index 63d105c..db6fa21 100644
--- a/include/linux/bcma/bcma_driver_chipcommon.h
+++ b/include/linux/bcma/bcma_driver_chipcommon.h
@@ -644,6 +644,12 @@ struct bcma_drv_cc {
 #endif
 };
 
+struct bcma_drv_cc_b {
+	struct bcma_device *core;
+	u8 setup_done:1;
+	void __iomem *mii;
+};
+
 /* Register access */
 #define bcma_cc_read32(cc, offset) \
 	bcma_read32((cc)->core, offset)
@@ -699,4 +705,6 @@ extern void bcma_pmu_spuravoid_pllupdate(struct bcma_drv_cc *cc, int spuravoid);
 
 extern u32 bcma_pmu_get_bus_clock(struct bcma_drv_cc *cc);
 
+void bcma_chipco_b_mii_write(struct bcma_drv_cc_b *ccb, u32 offset, u32 value);
+
 #endif /* LINUX_BCMA_DRIVER_CC_H_ */
-- 
cgit v1.1


From 738cbe72adc5c8f2016c4c68aa5162631d4f27e1 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Mon, 8 Sep 2014 08:04:47 +0200
Subject: net: bpf: consolidate JIT binary allocator

Introduced in commit 314beb9bcabf ("x86: bpf_jit_comp: secure bpf jit
against spraying attacks") and later on replicated in aa2d2c73c21f
("s390/bpf,jit: address randomize and write protect jit code") for
s390 architecture, write protection for BPF JIT images got added and
a random start address of the JIT code, so that it's not on a page
boundary anymore.

Since both use a very similar allocator for the BPF binary header,
we can consolidate this code into the BPF core as it's mostly JIT
independant anyway.

This will also allow for future archs that support DEBUG_SET_MODULE_RONX
to just reuse instead of reimplementing it.

JIT tested on x86_64 and s390x with BPF test suite.

Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/filter.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 8f82ef3..868764f 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -289,6 +289,11 @@ struct sock_fprog_kern {
 	struct sock_filter	*filter;
 };
 
+struct bpf_binary_header {
+	unsigned int pages;
+	u8 image[];
+};
+
 struct bpf_work_struct {
 	struct bpf_prog *prog;
 	struct work_struct work;
@@ -358,6 +363,14 @@ struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size,
 				  gfp_t gfp_extra_flags);
 void __bpf_prog_free(struct bpf_prog *fp);
 
+typedef void (*bpf_jit_fill_hole_t)(void *area, unsigned int size);
+
+struct bpf_binary_header *
+bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr,
+		     unsigned int alignment,
+		     bpf_jit_fill_hole_t bpf_fill_ill_insns);
+void bpf_jit_binary_free(struct bpf_binary_header *hdr);
+
 static inline void bpf_prog_unlock_free(struct bpf_prog *fp)
 {
 	bpf_prog_unlock_ro(fp);
-- 
cgit v1.1


From 286aad3c4014ca825c447e07e24f8929e6d266d2 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Mon, 8 Sep 2014 08:04:49 +0200
Subject: net: bpf: be friendly to kmemcheck

Reported by Mikulas Patocka, kmemcheck currently barks out a
false positive since we don't have special kmemcheck annotation
for bitfields used in bpf_prog structure.

We currently have jited:1, len:31 and thus when accessing len
while CONFIG_KMEMCHECK enabled, kmemcheck throws a warning that
we're reading uninitialized memory.

As we don't need the whole bit universe for pages member, we
can just split it to u16 and use a bool flag for jited instead
of a bitfield.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/filter.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 868764f..4b59ede 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -300,9 +300,9 @@ struct bpf_work_struct {
 };
 
 struct bpf_prog {
-	u32			pages;		/* Number of allocated pages */
-	u32			jited:1,	/* Is our filter JIT'ed? */
-				len:31;		/* Number of filter blocks */
+	u16			pages;		/* Number of allocated pages */
+	bool			jited;		/* Is our filter JIT'ed? */
+	u32			len;		/* Number of filter blocks */
 	struct sock_fprog_kern	*orig_prog;	/* Original BPF program */
 	struct bpf_work_struct	*work;		/* Deferred free work struct */
 	unsigned int		(*bpf_func)(const struct sk_buff *skb,
-- 
cgit v1.1


From 67cc0d4077951295f42bed63805e91b46c24477b Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Mon, 8 Sep 2014 19:58:58 -0400
Subject: net-timestamp: optimize sock_tx_timestamp default path

Few packets have timestamping enabled. Exit sock_tx_timestamp quickly
in this common case.

Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/sock.h b/include/net/sock.h
index 049ab1b..515a4d0 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2199,6 +2199,8 @@ static inline void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
 		sk->sk_stamp = skb->tstamp;
 }
 
+void __sock_tx_timestamp(const struct sock *sk, __u8 *tx_flags);
+
 /**
  * sock_tx_timestamp - checks whether the outgoing packet is to be time stamped
  * @sk:		socket sending this packet
@@ -2206,7 +2208,13 @@ static inline void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
  *
  * Note : callers should take care of initial *tx_flags value (usually 0)
  */
-void sock_tx_timestamp(const struct sock *sk, __u8 *tx_flags);
+static inline void sock_tx_timestamp(const struct sock *sk, __u8 *tx_flags)
+{
+	if (unlikely(sk->sk_tsflags))
+		__sock_tx_timestamp(sk, tx_flags);
+	if (unlikely(sock_flag(sk, SOCK_WIFI_STATUS)))
+		*tx_flags |= SKBTX_WIFI_STATUS;
+}
 
 /**
  * sk_eat_skb - Release a skb if it is no longer needed
-- 
cgit v1.1


From b954d83421d51d822c42e5ab7b65069b25ad3005 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Wed, 10 Sep 2014 15:01:02 +0200
Subject: net: bpf: only build bpf_jit_binary_{alloc, free}() when jit selected

Since BPF JIT depends on the availability of module_alloc() and
module_free() helpers (HAVE_BPF_JIT and MODULES), we better build
that code only in case we have BPF_JIT in our config enabled, just
like with other JIT code. Fixes builds for arm/marzen_defconfig
and sh/rsk7269_defconfig.

====================
kernel/built-in.o: In function `bpf_jit_binary_alloc':
/home/cwang/linux/kernel/bpf/core.c:144: undefined reference to `module_alloc'
kernel/built-in.o: In function `bpf_jit_binary_free':
/home/cwang/linux/kernel/bpf/core.c:164: undefined reference to `module_free'
make: *** [vmlinux] Error 1
====================

Reported-by: Fengguang Wu <fengguang.wu@intel.com>
Fixes: 738cbe72adc5 ("net: bpf: consolidate JIT binary allocator")
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/filter.h | 78 +++++++++++++++++++++++++-------------------------
 1 file changed, 39 insertions(+), 39 deletions(-)

(limited to 'include')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 4b59ede..1a0bc6d 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -4,12 +4,18 @@
 #ifndef __LINUX_FILTER_H__
 #define __LINUX_FILTER_H__
 
+#include <stdarg.h>
+
 #include <linux/atomic.h>
 #include <linux/compat.h>
 #include <linux/skbuff.h>
+#include <linux/linkage.h>
+#include <linux/printk.h>
 #include <linux/workqueue.h>
-#include <uapi/linux/filter.h>
+
 #include <asm/cacheflush.h>
+
+#include <uapi/linux/filter.h>
 #include <uapi/linux/bpf.h>
 
 struct sk_buff;
@@ -363,14 +369,6 @@ struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size,
 				  gfp_t gfp_extra_flags);
 void __bpf_prog_free(struct bpf_prog *fp);
 
-typedef void (*bpf_jit_fill_hole_t)(void *area, unsigned int size);
-
-struct bpf_binary_header *
-bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr,
-		     unsigned int alignment,
-		     bpf_jit_fill_hole_t bpf_fill_ill_insns);
-void bpf_jit_binary_free(struct bpf_binary_header *hdr);
-
 static inline void bpf_prog_unlock_free(struct bpf_prog *fp)
 {
 	bpf_prog_unlock_ro(fp);
@@ -393,6 +391,38 @@ void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp);
 u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
 void bpf_int_jit_compile(struct bpf_prog *fp);
 
+#ifdef CONFIG_BPF_JIT
+typedef void (*bpf_jit_fill_hole_t)(void *area, unsigned int size);
+
+struct bpf_binary_header *
+bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr,
+		     unsigned int alignment,
+		     bpf_jit_fill_hole_t bpf_fill_ill_insns);
+void bpf_jit_binary_free(struct bpf_binary_header *hdr);
+
+void bpf_jit_compile(struct bpf_prog *fp);
+void bpf_jit_free(struct bpf_prog *fp);
+
+static inline void bpf_jit_dump(unsigned int flen, unsigned int proglen,
+				u32 pass, void *image)
+{
+	pr_err("flen=%u proglen=%u pass=%u image=%pK\n",
+	       flen, proglen, pass, image);
+	if (image)
+		print_hex_dump(KERN_ERR, "JIT code: ", DUMP_PREFIX_OFFSET,
+			       16, 1, image, proglen, false);
+}
+#else
+static inline void bpf_jit_compile(struct bpf_prog *fp)
+{
+}
+
+static inline void bpf_jit_free(struct bpf_prog *fp)
+{
+	bpf_prog_unlock_free(fp);
+}
+#endif /* CONFIG_BPF_JIT */
+
 #define BPF_ANC		BIT(15)
 
 static inline u16 bpf_anc_helper(const struct sock_filter *ftest)
@@ -440,36 +470,6 @@ static inline void *bpf_load_pointer(const struct sk_buff *skb, int k,
 	return bpf_internal_load_pointer_neg_helper(skb, k, size);
 }
 
-#ifdef CONFIG_BPF_JIT
-#include <stdarg.h>
-#include <linux/linkage.h>
-#include <linux/printk.h>
-
-void bpf_jit_compile(struct bpf_prog *fp);
-void bpf_jit_free(struct bpf_prog *fp);
-
-static inline void bpf_jit_dump(unsigned int flen, unsigned int proglen,
-				u32 pass, void *image)
-{
-	pr_err("flen=%u proglen=%u pass=%u image=%pK\n",
-	       flen, proglen, pass, image);
-	if (image)
-		print_hex_dump(KERN_ERR, "JIT code: ", DUMP_PREFIX_OFFSET,
-			       16, 1, image, proglen, false);
-}
-#else
-#include <linux/slab.h>
-
-static inline void bpf_jit_compile(struct bpf_prog *fp)
-{
-}
-
-static inline void bpf_jit_free(struct bpf_prog *fp)
-{
-	bpf_prog_unlock_free(fp);
-}
-#endif /* CONFIG_BPF_JIT */
-
 static inline int bpf_tell_extensions(void)
 {
 	return SKF_AD_MAX;
-- 
cgit v1.1


From 78f686cae0c67a2edd167cbbe2f36017f0fa4b30 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 10 Sep 2014 22:28:06 +0300
Subject: cfg80211: don't put kek/kck/replay counter on the stack

There's no need to put the values on the stack, just pass a
pointer to the data in the nl80211 message. This reduces stack
usage and avoids potential issues with putting sensitive data
on the stack.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 0d17ec9..c2c710c 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1980,14 +1980,12 @@ struct cfg80211_wowlan_wakeup {
 
 /**
  * struct cfg80211_gtk_rekey_data - rekey data
- * @kek: key encryption key
- * @kck: key confirmation key
- * @replay_ctr: replay counter
+ * @kek: key encryption key (NL80211_KEK_LEN bytes)
+ * @kck: key confirmation key (NL80211_KCK_LEN bytes)
+ * @replay_ctr: replay counter (NL80211_REPLAY_CTR_LEN bytes)
  */
 struct cfg80211_gtk_rekey_data {
-	u8 kek[NL80211_KEK_LEN];
-	u8 kck[NL80211_KCK_LEN];
-	u8 replay_ctr[NL80211_REPLAY_CTR_LEN];
+	const u8 *kek, *kck, *replay_ctr;
 };
 
 /**
-- 
cgit v1.1


From 960d01acf62747d6518694f92be5b06f67473833 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 9 Sep 2014 22:55:35 +0300
Subject: cfg80211: add WMM traffic stream API

Add nl80211 and driver API to validate, add and delete traffic
streams with appropriate settings.

The API calls for userspace doing the action frame handshake
with the peer, and then allows only to set up the parameters
in the driver. To avoid setting up a session only to tear it
down again, the validate API is provided, but the real usage
later can still fail so userspace must be prepared for that.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h    |  4 ++++
 include/net/cfg80211.h       | 23 ++++++++++++++++++++++-
 include/uapi/linux/nl80211.h | 28 ++++++++++++++++++++++++++++
 3 files changed, 54 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 61a09f0..b1be39c 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -166,8 +166,12 @@ static inline u16 ieee80211_sn_sub(u16 sn1, u16 sn2)
 
 #define IEEE80211_MAX_MESH_ID_LEN	32
 
+#define IEEE80211_FIRST_TSPEC_TSID	8
 #define IEEE80211_NUM_TIDS		16
 
+/* number of user priorities 802.11 uses */
+#define IEEE80211_NUM_UPS		8
+
 #define IEEE80211_QOS_CTL_LEN		2
 /* 1d tag mask */
 #define IEEE80211_QOS_CTL_TAG1D_MASK		0x0007
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index c2c710c..a13beab1 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -2318,6 +2318,17 @@ struct cfg80211_qos_map {
  * @set_ap_chanwidth: Set the AP (including P2P GO) mode channel width for the
  *	given interface This is used e.g. for dynamic HT 20/40 MHz channel width
  *	changes during the lifetime of the BSS.
+ *
+ * @add_tx_ts: validate (if admitted_time is 0) or add a TX TS to the device
+ *	with the given parameters; action frame exchange has been handled by
+ *	userspace so this just has to modify the TX path to take the TS into
+ *	account.
+ *	If the admitted time is 0 just validate the parameters to make sure
+ *	the session can be created at all; it is valid to just always return
+ *	success for that but that may result in inefficient behaviour (handshake
+ *	with the peer followed by immediate teardown when the addition is later
+ *	rejected)
+ * @del_tx_ts: remove an existing TX TS
  */
 struct cfg80211_ops {
 	int	(*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow);
@@ -2558,6 +2569,12 @@ struct cfg80211_ops {
 
 	int	(*set_ap_chanwidth)(struct wiphy *wiphy, struct net_device *dev,
 				    struct cfg80211_chan_def *chandef);
+
+	int	(*add_tx_ts)(struct wiphy *wiphy, struct net_device *dev,
+			     u8 tsid, const u8 *peer, u8 user_prio,
+			     u16 admitted_time);
+	int	(*del_tx_ts)(struct wiphy *wiphy, struct net_device *dev,
+			     u8 tsid, const u8 *peer);
 };
 
 /*
@@ -2604,9 +2621,13 @@ struct cfg80211_ops {
  * @WIPHY_FLAG_SUPPORTS_5_10_MHZ: Device supports 5 MHz and 10 MHz channels.
  * @WIPHY_FLAG_HAS_CHANNEL_SWITCH: Device supports channel switch in
  *	beaconing mode (AP, IBSS, Mesh, ...).
+ * @WIPHY_FLAG_SUPPORTS_WMM_ADMISSION: the device supports setting up WMM
+ *	TSPEC sessions (TID aka TSID 0-7) with the NL80211_CMD_ADD_TX_TS
+ *	command. Standard IEEE 802.11 TSPEC setup is not yet supported, it
+ *	needs to be able to handle Block-Ack agreements and other things.
  */
 enum wiphy_flags {
-	/* use hole at 0 */
+	WIPHY_FLAG_SUPPORTS_WMM_ADMISSION	= BIT(0),
 	/* use hole at 1 */
 	/* use hole at 2 */
 	WIPHY_FLAG_NETNS_OK			= BIT(3),
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 29c4399..e5b8caf 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -722,6 +722,22 @@
  *	QoS mapping is relevant for IP packets, it is only valid during an
  *	association. This is cleared on disassociation and AP restart.
  *
+ * @NL80211_CMD_ADD_TX_TS: Ask the kernel to add a traffic stream for the given
+ *	%NL80211_ATTR_TSID and %NL80211_ATTR_MAC with %NL80211_ATTR_USER_PRIO
+ *	and %NL80211_ATTR_ADMITTED_TIME parameters.
+ *	Note that the action frame handshake with the AP shall be handled by
+ *	userspace via the normal management RX/TX framework, this only sets
+ *	up the TX TS in the driver/device.
+ *	If the admitted time attribute is not added then the request just checks
+ *	if a subsequent setup could be successful, the intent is to use this to
+ *	avoid setting up a session with the AP when local restrictions would
+ *	make that impossible. However, the subsequent "real" setup may still
+ *	fail even if the check was successful.
+ * @NL80211_CMD_DEL_TX_TS: Remove an existing TS with the %NL80211_ATTR_TSID
+ *	and %NL80211_ATTR_MAC parameters. It isn't necessary to call this
+ *	before removing a station entry entirely, or before disassociating
+ *	or similar, cleanup will happen in the driver/device in this case.
+ *
  * @NL80211_CMD_MAX: highest used command number
  * @__NL80211_CMD_AFTER_LAST: internal use
  */
@@ -893,6 +909,9 @@ enum nl80211_commands {
 
 	NL80211_CMD_SET_QOS_MAP,
 
+	NL80211_CMD_ADD_TX_TS,
+	NL80211_CMD_DEL_TX_TS,
+
 	/* add new commands above here */
 
 	/* used to define NL80211_CMD_MAX below */
@@ -1611,6 +1630,11 @@ enum nl80211_commands {
  *	drivers to indicate dynack capability. Dynack is automatically disabled
  *	setting valid value for coverage class.
  *
+ * @NL80211_ATTR_TSID: a TSID value (u8 attribute)
+ * @NL80211_ATTR_USER_PRIO: user priority value (u8 attribute)
+ * @NL80211_ATTR_ADMITTED_TIME: admitted time in units of 32 microseconds
+ *	(per second) (u16 attribute)
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -1957,6 +1981,10 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_WIPHY_DYN_ACK,
 
+	NL80211_ATTR_TSID,
+	NL80211_ATTR_USER_PRIO,
+	NL80211_ATTR_ADMITTED_TIME,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
-- 
cgit v1.1


From b0b6aa2c8e0d0e34f7658d5cc1e4fbb59f701c42 Mon Sep 17 00:00:00 2001
From: Eliad Peller <eliad@wizery.com>
Date: Tue, 9 Sep 2014 17:09:45 +0300
Subject: cfg80211/mac80211: add wmm info to assoc event

Userspace might need to know what queues are configured
for uapsd (e.g. for setting proper default values in tspecs).

Add this bitmap to the association event (inside wmm
nested attribute)

Add additional parameter to cfg80211_rx_assoc_resp,
and update its callers.

Signed-off-by: Eliad Peller <eliadx.peller@intel.com>
Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index a13beab1..2ed1f80 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -3944,6 +3944,7 @@ void cfg80211_auth_timeout(struct net_device *dev, const u8 *addr);
  *	moves to cfg80211 in this call
  * @buf: authentication frame (header + body)
  * @len: length of the frame data
+ * @uapsd_queues: bitmap of ACs configured to uapsd. -1 if n/a.
  *
  * After being asked to associate via cfg80211_ops::assoc() the driver must
  * call either this function or cfg80211_auth_timeout().
@@ -3952,7 +3953,8 @@ void cfg80211_auth_timeout(struct net_device *dev, const u8 *addr);
  */
 void cfg80211_rx_assoc_resp(struct net_device *dev,
 			    struct cfg80211_bss *bss,
-			    const u8 *buf, size_t len);
+			    const u8 *buf, size_t len,
+			    int uapsd_queues);
 
 /**
  * cfg80211_assoc_timeout - notification of timed out association
-- 
cgit v1.1


From 18998c381b19bfc3c285361ff6200ded7444aa2c Mon Sep 17 00:00:00 2001
From: Eliad Peller <eliad@wizery.com>
Date: Wed, 10 Sep 2014 14:07:34 +0300
Subject: cfg80211: allow requesting SMPS mode on ap start

Add feature bits to indicate device support for
static-smps and dynamic-smps modes.

Add a new NL80211_ATTR_SMPS_MODE attribue to allow
configuring the smps mode to be used by the ap
(e.g. configuring to ap to dynamic smps mode will
reduce power consumption while having minor effect
on throughput)

Signed-off-by: Eliad Peller <eliad@wizery.com>
Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       |  2 ++
 include/uapi/linux/nl80211.h | 33 +++++++++++++++++++++++++++++++++
 2 files changed, 35 insertions(+)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 2ed1f80..a2ddcf2 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -664,6 +664,7 @@ struct cfg80211_acl_data {
  * @crypto: crypto settings
  * @privacy: the BSS uses privacy
  * @auth_type: Authentication type (algorithm)
+ * @smps_mode: SMPS mode
  * @inactivity_timeout: time in seconds to determine station's inactivity.
  * @p2p_ctwindow: P2P CT Window
  * @p2p_opp_ps: P2P opportunistic PS
@@ -682,6 +683,7 @@ struct cfg80211_ap_settings {
 	struct cfg80211_crypto_settings crypto;
 	bool privacy;
 	enum nl80211_auth_type auth_type;
+	enum nl80211_smps_mode smps_mode;
 	int inactivity_timeout;
 	u8 p2p_ctwindow;
 	bool p2p_opp_ps;
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index e5b8caf..4b28dc0 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -1635,6 +1635,9 @@ enum nl80211_commands {
  * @NL80211_ATTR_ADMITTED_TIME: admitted time in units of 32 microseconds
  *	(per second) (u16 attribute)
  *
+ * @NL80211_ATTR_SMPS_MODE: SMPS mode to use (ap mode). see
+ *	&enum nl80211_smps_mode.
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -1985,6 +1988,8 @@ enum nl80211_attrs {
 	NL80211_ATTR_USER_PRIO,
 	NL80211_ATTR_ADMITTED_TIME,
 
+	NL80211_ATTR_SMPS_MODE,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -4030,6 +4035,13 @@ enum nl80211_ap_sme_features {
  * @NL80211_FEATURE_ACKTO_ESTIMATION: This driver supports dynamic ACK timeout
  *	estimation (dynack). %NL80211_ATTR_WIPHY_DYN_ACK flag attribute is used
  *	to enable dynack.
+ * @NL80211_FEATURE_STATIC_SMPS: Device supports static spatial
+ *	multiplexing powersave, ie. can turn off all but one chain
+ *	even on HT connections that should be using more chains.
+ * @NL80211_FEATURE_DYNAMIC_SMPS: Device supports dynamic spatial
+ *	multiplexing powersave, ie. can turn off all but one chain
+ *	and then wake the rest up as required after, for example,
+ *	rts/cts handshake.
  */
 enum nl80211_feature_flags {
 	NL80211_FEATURE_SK_TX_STATUS			= 1 << 0,
@@ -4056,6 +4068,8 @@ enum nl80211_feature_flags {
 	NL80211_FEATURE_QUIET				= 1 << 21,
 	NL80211_FEATURE_TX_POWER_INSERTION		= 1 << 22,
 	NL80211_FEATURE_ACKTO_ESTIMATION		= 1 << 23,
+	NL80211_FEATURE_STATIC_SMPS			= 1 << 24,
+	NL80211_FEATURE_DYNAMIC_SMPS			= 1 << 25,
 };
 
 /**
@@ -4130,6 +4144,25 @@ enum nl80211_acl_policy {
 };
 
 /**
+ * enum nl80211_smps_mode - SMPS mode
+ *
+ * Requested SMPS mode (for AP mode)
+ *
+ * @NL80211_SMPS_OFF: SMPS off (use all antennas).
+ * @NL80211_SMPS_STATIC: static SMPS (use a single antenna)
+ * @NL80211_SMPS_DYNAMIC: dynamic smps (start with a single antenna and
+ *	turn on other antennas after CTS/RTS).
+ */
+enum nl80211_smps_mode {
+	NL80211_SMPS_OFF,
+	NL80211_SMPS_STATIC,
+	NL80211_SMPS_DYNAMIC,
+
+	__NL80211_SMPS_AFTER_LAST,
+	NL80211_SMPS_MAX = __NL80211_SMPS_AFTER_LAST - 1
+};
+
+/**
  * enum nl80211_radar_event - type of radar event for DFS operation
  *
  * Type of event to be used with NL80211_ATTR_RADAR_EVENT to inform userspace
-- 
cgit v1.1


From 0d8614b4b926d0f657d15d7eb5125bcb24b9fd41 Mon Sep 17 00:00:00 2001
From: Eliad Peller <eliad@wizery.com>
Date: Wed, 10 Sep 2014 14:07:36 +0300
Subject: mac80211: replace SMPS hw flags with wiphy feature bits

Use the new static_smps / dynamic_smps feature bits
instead of mac80211-internal hw flags.

Signed-off-by: Eliad Peller <eliad@wizery.com>
Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 13 +------------
 1 file changed, 1 insertion(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index c6e6a68..0ad1f47 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1537,16 +1537,6 @@ struct ieee80211_tx_control {
  * @IEEE80211_HW_MFP_CAPABLE:
  *	Hardware supports management frame protection (MFP, IEEE 802.11w).
  *
- * @IEEE80211_HW_SUPPORTS_STATIC_SMPS:
- *	Hardware supports static spatial multiplexing powersave,
- *	ie. can turn off all but one chain even on HT connections
- *	that should be using more chains.
- *
- * @IEEE80211_HW_SUPPORTS_DYNAMIC_SMPS:
- *	Hardware supports dynamic spatial multiplexing powersave,
- *	ie. can turn off all but one chain and then wake the rest
- *	up as required after, for example, rts/cts handshake.
- *
  * @IEEE80211_HW_SUPPORTS_UAPSD:
  *	Hardware supports Unscheduled Automatic Power Save Delivery
  *	(U-APSD) in managed mode. The mode is configured with
@@ -1632,8 +1622,7 @@ enum ieee80211_hw_flags {
 	IEEE80211_HW_SUPPORTS_DYNAMIC_PS		= 1<<12,
 	IEEE80211_HW_MFP_CAPABLE			= 1<<13,
 	IEEE80211_HW_WANT_MONITOR_VIF			= 1<<14,
-	IEEE80211_HW_SUPPORTS_STATIC_SMPS		= 1<<15,
-	IEEE80211_HW_SUPPORTS_DYNAMIC_SMPS		= 1<<16,
+	/* free slots */
 	IEEE80211_HW_SUPPORTS_UAPSD			= 1<<17,
 	IEEE80211_HW_REPORTS_TX_ACK_STATUS		= 1<<18,
 	IEEE80211_HW_CONNECTION_MONITOR			= 1<<19,
-- 
cgit v1.1


From 67981fefb20e717cea55b42f9081a833fa46b3be Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Thu, 11 Sep 2014 16:55:04 +0200
Subject: netfilter: fix compilation of masquerading without
 IP_NF_TARGET_MASQUERADE
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

 CONFIG_NF_NAT_MASQUERADE_IPV6=m
 # CONFIG_IP6_NF_TARGET_MASQUERADE is not set

results in:

net/ipv6/netfilter/nf_nat_masquerade_ipv6.c: In function ‘nf_nat_masquerade_ipv6’:
net/ipv6/netfilter/nf_nat_masquerade_ipv6.c:41:14: error: ‘struct nf_conn_nat’ has no member named ‘masq_index’
  nfct_nat(ct)->masq_index = out->ifindex;
              ^
net/ipv6/netfilter/nf_nat_masquerade_ipv6.c: In function ‘device_cmp’:
net/ipv6/netfilter/nf_nat_masquerade_ipv6.c:61:12: error: ‘const struct nf_conn_nat’ has no member named ‘masq_index’
  return nat->masq_index == (int)(long)ifindex;
            ^
net/ipv6/netfilter/nf_nat_masquerade_ipv6.c:62:1: warning: control
reaches end of non-void function [-Wreturn-type]
 }
 ^
make[3]: *** [net/ipv6/netfilter/nf_nat_masquerade_ipv6.o] Error 1

Fix this by using the new NF_NAT_MASQUERADE_IPV4 and _IPV6 symbols
in include/net/netfilter/nf_nat.h.

Reported-by: Jim Davis <jim.epost@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_nat.h | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h
index a71dd33..344b1ab 100644
--- a/include/net/netfilter/nf_nat.h
+++ b/include/net/netfilter/nf_nat.h
@@ -32,10 +32,8 @@ struct nf_conn_nat {
 	struct hlist_node bysource;
 	struct nf_conn *ct;
 	union nf_conntrack_nat_help help;
-#if defined(CONFIG_IP_NF_TARGET_MASQUERADE) || \
-    defined(CONFIG_IP_NF_TARGET_MASQUERADE_MODULE) || \
-    defined(CONFIG_IP6_NF_TARGET_MASQUERADE) || \
-    defined(CONFIG_IP6_NF_TARGET_MASQUERADE_MODULE)
+#if IS_ENABLED(CONFIG_NF_NAT_MASQUERADE_IPV4) || \
+    IS_ENABLED(CONFIG_NF_NAT_MASQUERADE_IPV6)
 	int masq_index;
 #endif
 };
@@ -68,8 +66,8 @@ static inline bool nf_nat_oif_changed(unsigned int hooknum,
 				      struct nf_conn_nat *nat,
 				      const struct net_device *out)
 {
-#if IS_ENABLED(CONFIG_IP_NF_TARGET_MASQUERADE) || \
-    IS_ENABLED(CONFIG_IP6_NF_TARGET_MASQUERADE)
+#if IS_ENABLED(CONFIG_NF_NAT_MASQUERADE_IPV4) || \
+    IS_ENABLED(CONFIG_NF_NAT_MASQUERADE_IPV6)
 	return nat->masq_index && hooknum == NF_INET_POST_ROUTING &&
 	       CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL &&
 	       nat->masq_index != out->ifindex;
-- 
cgit v1.1


From 39e393bb4f653d38aea40190e1aa9a49062eed4d Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Thu, 11 Sep 2014 11:02:39 +0200
Subject: netfilter: nf_tables: add NFTA_MASQ_UNSPEC to nft_masq_attributes

To keep this consistent with other nft_*_attributes.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/nf_tables.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index eeec0ae..66d66dd 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -806,6 +806,7 @@ enum nft_nat_attributes {
  * @NFTA_MASQ_FLAGS: NAT flags (see NF_NAT_RANGE_* in linux/netfilter/nf_nat.h) (NLA_U32)
  */
 enum nft_masq_attributes {
+	NFTA_MASQ_UNSPEC,
 	NFTA_MASQ_FLAGS,
 	__NFTA_MASQ_MAX
 };
-- 
cgit v1.1


From 46e5da40aec256155cfedee96dd21a75da941f2c Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Fri, 12 Sep 2014 20:04:52 -0700
Subject: net: qdisc: use rcu prefix and silence sparse warnings

Add __rcu notation to qdisc handling by doing this we can make
smatch output more legible. And anyways some of the cases should
be using rcu_dereference() see qdisc_all_tx_empty(),
qdisc_tx_chainging(), and so on.

Also *wake_queue() API is commonly called from driver timer routines
without rcu lock or rtnl lock. So I added rcu_read_lock() blocks
around netif_wake_subqueue and netif_tx_wake_queue.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 29 ++++-------------------------
 include/net/sch_generic.h | 21 +++++++++++++++------
 2 files changed, 19 insertions(+), 31 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index ba72f6b..ae721f5 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -543,7 +543,7 @@ struct netdev_queue {
  * read mostly part
  */
 	struct net_device	*dev;
-	struct Qdisc		*qdisc;
+	struct Qdisc __rcu	*qdisc;
 	struct Qdisc		*qdisc_sleeping;
 #ifdef CONFIG_SYSFS
 	struct kobject		kobj;
@@ -2356,12 +2356,7 @@ static inline void input_queue_tail_incr_save(struct softnet_data *sd,
 DECLARE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
 
 void __netif_schedule(struct Qdisc *q);
-
-static inline void netif_schedule_queue(struct netdev_queue *txq)
-{
-	if (!(txq->state & QUEUE_STATE_ANY_XOFF))
-		__netif_schedule(txq->qdisc);
-}
+void netif_schedule_queue(struct netdev_queue *txq);
 
 static inline void netif_tx_schedule_all(struct net_device *dev)
 {
@@ -2397,11 +2392,7 @@ static inline void netif_tx_start_all_queues(struct net_device *dev)
 	}
 }
 
-static inline void netif_tx_wake_queue(struct netdev_queue *dev_queue)
-{
-	if (test_and_clear_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state))
-		__netif_schedule(dev_queue->qdisc);
-}
+void netif_tx_wake_queue(struct netdev_queue *dev_queue);
 
 /**
  *	netif_wake_queue - restart transmit
@@ -2673,19 +2664,7 @@ static inline bool netif_subqueue_stopped(const struct net_device *dev,
 	return __netif_subqueue_stopped(dev, skb_get_queue_mapping(skb));
 }
 
-/**
- *	netif_wake_subqueue - allow sending packets on subqueue
- *	@dev: network device
- *	@queue_index: sub queue index
- *
- * Resume individual transmit queue of a device with multiple transmit queues.
- */
-static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index)
-{
-	struct netdev_queue *txq = netdev_get_tx_queue(dev, queue_index);
-	if (test_and_clear_bit(__QUEUE_STATE_DRV_XOFF, &txq->state))
-		__netif_schedule(txq->qdisc);
-}
+void netif_wake_subqueue(struct net_device *dev, u16 queue_index);
 
 #ifdef CONFIG_XPS
 int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index a3cfb8e..56838ab 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -259,7 +259,9 @@ static inline spinlock_t *qdisc_lock(struct Qdisc *qdisc)
 
 static inline struct Qdisc *qdisc_root(const struct Qdisc *qdisc)
 {
-	return qdisc->dev_queue->qdisc;
+	struct Qdisc *q = rcu_dereference_rtnl(qdisc->dev_queue->qdisc);
+
+	return q;
 }
 
 static inline struct Qdisc *qdisc_root_sleeping(const struct Qdisc *qdisc)
@@ -384,7 +386,7 @@ static inline void qdisc_reset_all_tx_gt(struct net_device *dev, unsigned int i)
 	struct Qdisc *qdisc;
 
 	for (; i < dev->num_tx_queues; i++) {
-		qdisc = netdev_get_tx_queue(dev, i)->qdisc;
+		qdisc = rtnl_dereference(netdev_get_tx_queue(dev, i)->qdisc);
 		if (qdisc) {
 			spin_lock_bh(qdisc_lock(qdisc));
 			qdisc_reset(qdisc);
@@ -402,13 +404,18 @@ static inline void qdisc_reset_all_tx(struct net_device *dev)
 static inline bool qdisc_all_tx_empty(const struct net_device *dev)
 {
 	unsigned int i;
+
+	rcu_read_lock();
 	for (i = 0; i < dev->num_tx_queues; i++) {
 		struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
-		const struct Qdisc *q = txq->qdisc;
+		const struct Qdisc *q = rcu_dereference(txq->qdisc);
 
-		if (q->q.qlen)
+		if (q->q.qlen) {
+			rcu_read_unlock();
 			return false;
+		}
 	}
+	rcu_read_unlock();
 	return true;
 }
 
@@ -416,9 +423,10 @@ static inline bool qdisc_all_tx_empty(const struct net_device *dev)
 static inline bool qdisc_tx_changing(const struct net_device *dev)
 {
 	unsigned int i;
+
 	for (i = 0; i < dev->num_tx_queues; i++) {
 		struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
-		if (txq->qdisc != txq->qdisc_sleeping)
+		if (rcu_access_pointer(txq->qdisc) != txq->qdisc_sleeping)
 			return true;
 	}
 	return false;
@@ -428,9 +436,10 @@ static inline bool qdisc_tx_changing(const struct net_device *dev)
 static inline bool qdisc_tx_is_noop(const struct net_device *dev)
 {
 	unsigned int i;
+
 	for (i = 0; i < dev->num_tx_queues; i++) {
 		struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
-		if (txq->qdisc != &noop_qdisc)
+		if (rcu_access_pointer(txq->qdisc) != &noop_qdisc)
 			return false;
 	}
 	return true;
-- 
cgit v1.1


From 25d8c0d55f241ce2d360df1bea48e23a55836ee6 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Fri, 12 Sep 2014 20:05:27 -0700
Subject: net: rcu-ify tcf_proto

rcu'ify tcf_proto this allows calling tc_classify() without holding
any locks. Updaters are protected by RTNL.

This patch prepares the core net_sched infrastracture for running
the classifier/action chains without holding the qdisc lock however
it does nothing to ensure cls_xxx and act_xxx types also work without
locking. Additional patches are required to address the fall out.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 56838ab..1e89b9a 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -143,7 +143,7 @@ struct Qdisc_class_ops {
 	void			(*walk)(struct Qdisc *, struct qdisc_walker * arg);
 
 	/* Filter manipulation */
-	struct tcf_proto **	(*tcf_chain)(struct Qdisc *, unsigned long);
+	struct tcf_proto __rcu ** (*tcf_chain)(struct Qdisc *, unsigned long);
 	unsigned long		(*bind_tcf)(struct Qdisc *, unsigned long,
 					u32 classid);
 	void			(*unbind_tcf)(struct Qdisc *, unsigned long);
@@ -212,8 +212,8 @@ struct tcf_proto_ops {
 
 struct tcf_proto {
 	/* Fast access part */
-	struct tcf_proto	*next;
-	void			*root;
+	struct tcf_proto __rcu	*next;
+	void __rcu		*root;
 	int			(*classify)(struct sk_buff *,
 					    const struct tcf_proto *,
 					    struct tcf_result *);
@@ -225,6 +225,7 @@ struct tcf_proto {
 	struct Qdisc		*q;
 	void			*data;
 	const struct tcf_proto_ops	*ops;
+	struct rcu_head		rcu;
 };
 
 struct qdisc_skb_cb {
@@ -378,7 +379,7 @@ struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue,
 void __qdisc_calculate_pkt_len(struct sk_buff *skb,
 			       const struct qdisc_size_table *stab);
 void tcf_destroy(struct tcf_proto *tp);
-void tcf_destroy_chain(struct tcf_proto **fl);
+void tcf_destroy_chain(struct tcf_proto __rcu **fl);
 
 /* Reset all TX qdiscs greater then index of a device.  */
 static inline void qdisc_reset_all_tx_gt(struct net_device *dev, unsigned int i)
-- 
cgit v1.1


From 331b72922c5f58d48fd5500acadc91777cc31970 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Fri, 12 Sep 2014 20:08:20 -0700
Subject: net: sched: RCU cls_tcindex

Make cls_tcindex RCU safe.

This patch addds a new RCU routine rcu_dereference_bh_rtnl() to check
caller either holds the rcu read lock or RTNL. This is needed to
handle the case where tcindex_lookup() is being called in both cases.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rtnetlink.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 167bae7..6cacbce 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -47,6 +47,16 @@ static inline int lockdep_rtnl_is_held(void)
 	rcu_dereference_check(p, lockdep_rtnl_is_held())
 
 /**
+ * rcu_dereference_bh_rtnl - rcu_dereference_bh with debug checking
+ * @p: The pointer to read, prior to dereference
+ *
+ * Do an rcu_dereference_bh(p), but check caller either holds rcu_read_lock_bh()
+ * or RTNL. Note : Please prefer rtnl_dereference() or rcu_dereference_bh()
+ */
+#define rcu_dereference_bh_rtnl(p)				\
+	rcu_dereference_bh_check(p, lockdep_rtnl_is_held())
+
+/**
  * rtnl_dereference - fetch RCU pointer when updates are prevented by RTNL
  * @p: The pointer to read, prior to dereferencing
  *
-- 
cgit v1.1


From 6c555490e0ce885a9caf0a045db69382a3ccbc9c Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Thu, 11 Sep 2014 15:35:09 -0700
Subject: ipv6: drop useless rcu_read_lock() in anycast

These code is now protected by rtnl lock, rcu read lock
is useless now.

Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index ae721f5..ee38b94 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2083,8 +2083,8 @@ void __dev_remove_pack(struct packet_type *pt);
 void dev_add_offload(struct packet_offload *po);
 void dev_remove_offload(struct packet_offload *po);
 
-struct net_device *dev_get_by_flags_rcu(struct net *net, unsigned short flags,
-					unsigned short mask);
+struct net_device *__dev_get_by_flags(struct net *net, unsigned short flags,
+				      unsigned short mask);
 struct net_device *dev_get_by_name(struct net *net, const char *name);
 struct net_device *dev_get_by_name_rcu(struct net *net, const char *name);
 struct net_device *__dev_get_by_name(struct net *net, const char *name);
-- 
cgit v1.1


From 013b4d90387a5dca54281263e0d4650db97bd67c Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Thu, 11 Sep 2014 15:35:11 -0700
Subject: ipv6: clean up ipv6_dev_ac_inc()

Make it accept inet6_dev, and rename it to __ipv6_dev_ac_inc()
to reflect this change.

Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/addrconf.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index f679877..9b1d42e 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -202,7 +202,7 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex,
 		      const struct in6_addr *addr);
 void ipv6_sock_ac_close(struct sock *sk);
 
-int ipv6_dev_ac_inc(struct net_device *dev, const struct in6_addr *addr);
+int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr);
 int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr);
 bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev,
 			 const struct in6_addr *addr);
-- 
cgit v1.1


From ac7a04c33dd7f8e429df4b929ba3a3e8e729cc89 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Thu, 11 Sep 2014 21:18:09 -0700
Subject: net: dsa: change tag_protocol to an enum

Now that we introduced an additional multiplexing/demultiplexing layer
with commit 3e8a72d1dae37 ("net: dsa: reduce number of protocol hooks")
that lives within the DSA code, we no longer need to have a given switch
driver tag_protocol be an actual ethertype value, instead, we can
replace it with an enum: dsa_tag_protocol.

Do this replacement in the drivers, which allows us to get rid of the
cpu_to_be16()/htons() dance, and remove ETH_P_BRCMTAG since we do not
need it anymore.

Suggested-by: Alexander Duyck <alexander.duyck@gmail.com>
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index 9771292..8a8a5d9 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -19,10 +19,13 @@
 #include <linux/phy.h>
 #include <linux/phy_fixed.h>
 
-/* Not an official ethertype value, used only internally for DSA
- * demultiplexing
- */
-#define ETH_P_BRCMTAG		(ETH_P_XDSA + 1)
+enum dsa_tag_protocol {
+	DSA_TAG_PROTO_NONE = 0,
+	DSA_TAG_PROTO_DSA,
+	DSA_TAG_PROTO_TRAILER,
+	DSA_TAG_PROTO_EDSA,
+	DSA_TAG_PROTO_BRCM,
+};
 
 #define DSA_MAX_SWITCHES	4
 #define DSA_MAX_PORTS		12
@@ -89,7 +92,7 @@ struct dsa_switch_tree {
 	 */
 	struct net_device	*master_netdev;
 	const struct dsa_device_ops	*ops;
-	__be16			tag_protocol;
+	enum dsa_tag_protocol	tag_protocol;
 
 	/*
 	 * The switch and port to which the CPU is attached.
@@ -166,7 +169,7 @@ static inline u8 dsa_upstream_port(struct dsa_switch *ds)
 struct dsa_switch_driver {
 	struct list_head	list;
 
-	__be16			tag_protocol;
+	enum dsa_tag_protocol	tag_protocol;
 	int			priv_size;
 
 	/*
@@ -215,7 +218,7 @@ static inline void *ds_to_priv(struct dsa_switch *ds)
 
 static inline bool dsa_uses_tagged_protocol(struct dsa_switch_tree *dst)
 {
-	return dst->tag_protocol != 0;
+	return dst->tag_protocol != DSA_TAG_PROTO_NONE;
 }
 
 #endif
-- 
cgit v1.1


From 233577a22089facf5271ab5e845b2262047c971f Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Fri, 12 Sep 2014 14:04:43 +0200
Subject: net: filter: constify detection of pkt_type_offset

Currently we have 2 pkt_type_offset functions doing the same thing and
spread across the architecture files. Remove those and replace them
with a PKT_TYPE_OFFSET macro helper which gets the constant value from a
zero sized sk_buff member right in front of the bitfield with offsetof.
This new offset marker does not change size of struct sk_buff.

Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Markos Chandras <markos.chandras@imgtec.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Daniel Borkmann <dborkman@redhat.com>
Cc: Alexei Starovoitov <alexei.starovoitov@gmail.com>
Signed-off-by: Denis Kirjanov <kda@linux-powerpc.org>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Acked-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 07c9fdd..756e3d0 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -548,6 +548,16 @@ struct sk_buff {
 				ip_summed:2,
 				nohdr:1,
 				nfctinfo:3;
+
+/* if you move pkt_type around you also must adapt those constants */
+#ifdef __BIG_ENDIAN_BITFIELD
+#define PKT_TYPE_MAX	(7 << 5)
+#else
+#define PKT_TYPE_MAX	7
+#endif
+#define PKT_TYPE_OFFSET()	offsetof(struct sk_buff, __pkt_type_offset)
+
+	__u8			__pkt_type_offset[0];
 	__u8			pkt_type:3,
 				fclone:2,
 				ipvs_property:1,
-- 
cgit v1.1


From 3fc8867740b4a0bf56f372c6f5ddd14970962fb1 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Fri, 12 Sep 2014 23:12:46 -0700
Subject: netdevice: Support DSA tagging when DSA is built as a module

This change corrects an error seen when DSA tagging is built as a module.
Without this change it is not possible to get XDSA tagged frames as the
test for tagging is stripped by the #ifdef check.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index ee38b94..f9e81d1 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1789,7 +1789,7 @@ void dev_net_set(struct net_device *dev, struct net *net)
 
 static inline bool netdev_uses_dsa(struct net_device *dev)
 {
-#ifdef CONFIG_NET_DSA
+#if IS_ENABLED(CONFIG_NET_DSA)
 	if (dev->dsa_ptr != NULL)
 		return dsa_uses_tagged_protocol(dev->dsa_ptr);
 #endif
-- 
cgit v1.1


From 43e73e4e2ad05d9bf3b438cfbe1e71b57a85f26c Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Sun, 14 Sep 2014 23:06:28 +0200
Subject: Bluetooth: Provide HCI command opcode information to driver

The Bluetooth core already does processing of the HCI command header
and puts it together before sending it to the driver. It is not really
efficient for the driver to look at the HCI command header again in
case it has to make certain decisions about certain commands. To make
this easier, just provide the opcode as part of the SKB control buffer
information. The extra information about the opcode is optional and
only provided for HCI commands.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/bluetooth.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h
index 373000d..7e666d0 100644
--- a/include/net/bluetooth/bluetooth.h
+++ b/include/net/bluetooth/bluetooth.h
@@ -284,6 +284,7 @@ struct hci_req_ctrl {
 struct bt_skb_cb {
 	__u8 pkt_type;
 	__u8 incoming;
+	__u16 opcode;
 	__u16 expect;
 	__u8 force_active;
 	struct l2cap_chan *chan;
-- 
cgit v1.1


From 0e9871e3f79fd17c691b50a9669220c54ff084a2 Mon Sep 17 00:00:00 2001
From: Anton Danilov <littlesmilingcloud@gmail.com>
Date: Thu, 28 Aug 2014 10:11:27 +0400
Subject: netfilter: ipset: Add skbinfo extension kernel support in the ipset
 core.

Skbinfo extension provides mapping of metainformation with lookup in the ipset tables.
This patch defines the flags, the constants, the functions and the structures
for the data type independent support of the extension.
Note the firewall mark stores in the kernel structures as two 32bit values,
but transfered through netlink as one 64bit value.

Signed-off-by: Anton Danilov <littlesmilingcloud@gmail.com>
Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
---
 include/linux/netfilter/ipset/ip_set.h      | 56 ++++++++++++++++++++++++++++-
 include/uapi/linux/netfilter/ipset/ip_set.h | 12 +++++++
 2 files changed, 67 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index 96afc29..b97aac5 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -57,6 +57,8 @@ enum ip_set_extension {
 	IPSET_EXT_COUNTER = (1 << IPSET_EXT_BIT_COUNTER),
 	IPSET_EXT_BIT_COMMENT = 2,
 	IPSET_EXT_COMMENT = (1 << IPSET_EXT_BIT_COMMENT),
+	IPSET_EXT_BIT_SKBINFO = 3,
+	IPSET_EXT_SKBINFO = (1 << IPSET_EXT_BIT_SKBINFO),
 	/* Mark set with an extension which needs to call destroy */
 	IPSET_EXT_BIT_DESTROY = 7,
 	IPSET_EXT_DESTROY = (1 << IPSET_EXT_BIT_DESTROY),
@@ -65,12 +67,14 @@ enum ip_set_extension {
 #define SET_WITH_TIMEOUT(s)	((s)->extensions & IPSET_EXT_TIMEOUT)
 #define SET_WITH_COUNTER(s)	((s)->extensions & IPSET_EXT_COUNTER)
 #define SET_WITH_COMMENT(s)	((s)->extensions & IPSET_EXT_COMMENT)
+#define SET_WITH_SKBINFO(s)	((s)->extensions & IPSET_EXT_SKBINFO)
 #define SET_WITH_FORCEADD(s)	((s)->flags & IPSET_CREATE_FLAG_FORCEADD)
 
 /* Extension id, in size order */
 enum ip_set_ext_id {
 	IPSET_EXT_ID_COUNTER = 0,
 	IPSET_EXT_ID_TIMEOUT,
+	IPSET_EXT_ID_SKBINFO,
 	IPSET_EXT_ID_COMMENT,
 	IPSET_EXT_ID_MAX,
 };
@@ -92,6 +96,10 @@ struct ip_set_ext {
 	u64 packets;
 	u64 bytes;
 	u32 timeout;
+	u32 skbmark;
+	u32 skbmarkmask;
+	u32 skbprio;
+	u16 skbqueue;
 	char *comment;
 };
 
@@ -104,6 +112,13 @@ struct ip_set_comment {
 	char *str;
 };
 
+struct ip_set_skbinfo {
+	u32 skbmark;
+	u32 skbmarkmask;
+	u32 skbprio;
+	u16 skbqueue;
+};
+
 struct ip_set;
 
 #define ext_timeout(e, s)	\
@@ -112,7 +127,8 @@ struct ip_set;
 (struct ip_set_counter *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_COUNTER])
 #define ext_comment(e, s)	\
 (struct ip_set_comment *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_COMMENT])
-
+#define ext_skbinfo(e, s)	\
+(struct ip_set_skbinfo *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_SKBINFO])
 
 typedef int (*ipset_adtfn)(struct ip_set *set, void *value,
 			   const struct ip_set_ext *ext,
@@ -256,6 +272,8 @@ ip_set_put_flags(struct sk_buff *skb, struct ip_set *set)
 		cadt_flags |= IPSET_FLAG_WITH_COUNTERS;
 	if (SET_WITH_COMMENT(set))
 		cadt_flags |= IPSET_FLAG_WITH_COMMENT;
+	if (SET_WITH_SKBINFO(set))
+		cadt_flags |= IPSET_FLAG_WITH_SKBINFO;
 	if (SET_WITH_FORCEADD(set))
 		cadt_flags |= IPSET_FLAG_WITH_FORCEADD;
 
@@ -304,6 +322,39 @@ ip_set_update_counter(struct ip_set_counter *counter,
 	}
 }
 
+static inline void
+ip_set_get_skbinfo(struct ip_set_skbinfo *skbinfo,
+		      const struct ip_set_ext *ext,
+		      struct ip_set_ext *mext, u32 flags)
+{
+		mext->skbmark = skbinfo->skbmark;
+		mext->skbmarkmask = skbinfo->skbmarkmask;
+		mext->skbprio = skbinfo->skbprio;
+		mext->skbqueue = skbinfo->skbqueue;
+}
+static inline bool
+ip_set_put_skbinfo(struct sk_buff *skb, struct ip_set_skbinfo *skbinfo)
+{
+	return nla_put_net64(skb, IPSET_ATTR_SKBMARK,
+			     cpu_to_be64((u64)skbinfo->skbmark << 32 |
+					 skbinfo->skbmarkmask)) ||
+	       nla_put_net32(skb, IPSET_ATTR_SKBPRIO,
+			     cpu_to_be32(skbinfo->skbprio)) ||
+	       nla_put_net16(skb, IPSET_ATTR_SKBQUEUE,
+			     cpu_to_be16(skbinfo->skbqueue));
+
+}
+
+static inline void
+ip_set_init_skbinfo(struct ip_set_skbinfo *skbinfo,
+		    const struct ip_set_ext *ext)
+{
+	skbinfo->skbmark = ext->skbmark;
+	skbinfo->skbmarkmask = ext->skbmarkmask;
+	skbinfo->skbprio = ext->skbprio;
+	skbinfo->skbqueue = ext->skbqueue;
+}
+
 static inline bool
 ip_set_put_counter(struct sk_buff *skb, struct ip_set_counter *counter)
 {
@@ -497,6 +548,9 @@ ip_set_put_extensions(struct sk_buff *skb, const struct ip_set *set,
 	if (SET_WITH_COMMENT(set) &&
 	    ip_set_put_comment(skb, ext_comment(e, set)))
 		return -EMSGSIZE;
+	if (SET_WITH_SKBINFO(set) &&
+	    ip_set_put_skbinfo(skb, ext_skbinfo(e, set)))
+		return -EMSGSIZE;
 	return 0;
 }
 
diff --git a/include/uapi/linux/netfilter/ipset/ip_set.h b/include/uapi/linux/netfilter/ipset/ip_set.h
index 78c2f2e..ca03119 100644
--- a/include/uapi/linux/netfilter/ipset/ip_set.h
+++ b/include/uapi/linux/netfilter/ipset/ip_set.h
@@ -115,6 +115,9 @@ enum {
 	IPSET_ATTR_BYTES,
 	IPSET_ATTR_PACKETS,
 	IPSET_ATTR_COMMENT,
+	IPSET_ATTR_SKBMARK,
+	IPSET_ATTR_SKBPRIO,
+	IPSET_ATTR_SKBQUEUE,
 	__IPSET_ATTR_ADT_MAX,
 };
 #define IPSET_ATTR_ADT_MAX	(__IPSET_ATTR_ADT_MAX - 1)
@@ -147,6 +150,7 @@ enum ipset_errno {
 	IPSET_ERR_COUNTER,
 	IPSET_ERR_COMMENT,
 	IPSET_ERR_INVALID_MARKMASK,
+	IPSET_ERR_SKBINFO,
 
 	/* Type specific error codes */
 	IPSET_ERR_TYPE_SPECIFIC = 4352,
@@ -170,6 +174,12 @@ enum ipset_cmd_flags {
 	IPSET_FLAG_MATCH_COUNTERS = (1 << IPSET_FLAG_BIT_MATCH_COUNTERS),
 	IPSET_FLAG_BIT_RETURN_NOMATCH = 7,
 	IPSET_FLAG_RETURN_NOMATCH = (1 << IPSET_FLAG_BIT_RETURN_NOMATCH),
+	IPSET_FLAG_BIT_MAP_SKBMARK = 8,
+	IPSET_FLAG_MAP_SKBMARK = (1 << IPSET_FLAG_BIT_MAP_SKBMARK),
+	IPSET_FLAG_BIT_MAP_SKBPRIO = 9,
+	IPSET_FLAG_MAP_SKBPRIO = (1 << IPSET_FLAG_BIT_MAP_SKBPRIO),
+	IPSET_FLAG_BIT_MAP_SKBQUEUE = 10,
+	IPSET_FLAG_MAP_SKBQUEUE = (1 << IPSET_FLAG_BIT_MAP_SKBQUEUE),
 	IPSET_FLAG_CMD_MAX = 15,
 };
 
@@ -187,6 +197,8 @@ enum ipset_cadt_flags {
 	IPSET_FLAG_WITH_COMMENT = (1 << IPSET_FLAG_BIT_WITH_COMMENT),
 	IPSET_FLAG_BIT_WITH_FORCEADD = 5,
 	IPSET_FLAG_WITH_FORCEADD = (1 << IPSET_FLAG_BIT_WITH_FORCEADD),
+	IPSET_FLAG_BIT_WITH_SKBINFO = 6,
+	IPSET_FLAG_WITH_SKBINFO = (1 << IPSET_FLAG_BIT_WITH_SKBINFO),
 	IPSET_FLAG_CADT_MAX	= 15,
 };
 
-- 
cgit v1.1


From 76cea4109ca89dea218fdc652d2e1535fd9b5fc7 Mon Sep 17 00:00:00 2001
From: Anton Danilov <littlesmilingcloud@gmail.com>
Date: Tue, 2 Sep 2014 14:21:20 +0400
Subject: netfilter: ipset: Add skbinfo extension support to SET target.

Signed-off-by: Anton Danilov <littlesmilingcloud@gmail.com>
Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
---
 include/uapi/linux/netfilter/xt_set.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/netfilter/xt_set.h b/include/uapi/linux/netfilter/xt_set.h
index 964d3d4..d6a1df1 100644
--- a/include/uapi/linux/netfilter/xt_set.h
+++ b/include/uapi/linux/netfilter/xt_set.h
@@ -71,4 +71,14 @@ struct xt_set_info_match_v3 {
 	__u32 flags;
 };
 
+/* Revision 3 target */
+
+struct xt_set_info_target_v3 {
+	struct xt_set_info add_set;
+	struct xt_set_info del_set;
+	struct xt_set_info map_set;
+	__u32 flags;
+	__u32 timeout;
+};
+
 #endif /*_XT_SET_H*/
-- 
cgit v1.1


From aef96193fe7b2791c4a3b19fe75426b929769471 Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Mon, 15 Sep 2014 17:30:54 +0200
Subject: netfilter: ipset: send nonzero skbinfo extensions only

Do not send zero valued skbinfo extensions to userspace at listing.

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
---
 include/linux/netfilter/ipset/ip_set.h | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index b97aac5..f1606fa 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -335,13 +335,17 @@ ip_set_get_skbinfo(struct ip_set_skbinfo *skbinfo,
 static inline bool
 ip_set_put_skbinfo(struct sk_buff *skb, struct ip_set_skbinfo *skbinfo)
 {
-	return nla_put_net64(skb, IPSET_ATTR_SKBMARK,
-			     cpu_to_be64((u64)skbinfo->skbmark << 32 |
-					 skbinfo->skbmarkmask)) ||
-	       nla_put_net32(skb, IPSET_ATTR_SKBPRIO,
-			     cpu_to_be32(skbinfo->skbprio)) ||
-	       nla_put_net16(skb, IPSET_ATTR_SKBQUEUE,
-			     cpu_to_be16(skbinfo->skbqueue));
+	/* Send nonzero parameters only */
+	return ((skbinfo->skbmark || skbinfo->skbmarkmask) &&
+		nla_put_net64(skb, IPSET_ATTR_SKBMARK,
+			      cpu_to_be64((u64)skbinfo->skbmark << 32 |
+					  skbinfo->skbmarkmask))) ||
+	       (skbinfo->skbprio &&
+	        nla_put_net32(skb, IPSET_ATTR_SKBPRIO,
+			      cpu_to_be32(skbinfo->skbprio))) ||
+	       (skbinfo->skbqueue &&
+	        nla_put_net16(skb, IPSET_ATTR_SKBQUEUE,
+			     cpu_to_be16(skbinfo->skbqueue)));
 
 }
 
-- 
cgit v1.1


From 5075314e4e4b559cc37675ad8a721a89bccd6284 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Mon, 15 Sep 2014 13:00:19 -0400
Subject: dsa: Split ops up, and avoid assigning tag_protocol and receive
 separately

This change addresses several issues.

First, it was possible to set tag_protocol without setting the ops pointer.
To correct that I have reordered things so that rcv is now populated before
we set tag_protocol.

Second, it didn't make much sense to keep setting the device ops each time a
new slave was registered.  So by moving the receive portion out into root
switch initialization that issue should be addressed.

Third, I wanted to avoid sending tags if the rcv pointer was not registered
so I changed the tag check to verify if the rcv function pointer is set on
the root tree.  If it is then we start sending DSA tagged frames.

Finally I split the device ops pointer in the structures into two spots.  I
placed the rcv function pointer in the root switch since this makes it
easiest to access from there, and I placed the xmit function pointer in the
slave for the same reason.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  7 -------
 include/net/dsa.h         | 10 ++++++----
 2 files changed, 6 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index f9e81d1..28d4378 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1928,13 +1928,6 @@ struct udp_offload {
 	struct offload_callbacks callbacks;
 };
 
-struct dsa_device_ops {
-	netdev_tx_t (*xmit)(struct sk_buff *skb, struct net_device *dev);
-	int (*rcv)(struct sk_buff *skb, struct net_device *dev,
-		   struct packet_type *pt, struct net_device *orig_dev);
-};
-
-
 /* often modified stats are per cpu, other are shared (netdev->stats) */
 struct pcpu_sw_netstats {
 	u64     rx_packets;
diff --git a/include/net/dsa.h b/include/net/dsa.h
index 8a8a5d9..a55c4e6 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -77,7 +77,7 @@ struct dsa_platform_data {
 	struct dsa_chip_data	*chip;
 };
 
-struct dsa_device_ops;
+struct packet_type;
 
 struct dsa_switch_tree {
 	/*
@@ -91,7 +91,10 @@ struct dsa_switch_tree {
 	 * protocol to use.
 	 */
 	struct net_device	*master_netdev;
-	const struct dsa_device_ops	*ops;
+	int			(*rcv)(struct sk_buff *skb,
+				       struct net_device *dev,
+				       struct packet_type *pt,
+				       struct net_device *orig_dev);
 	enum dsa_tag_protocol	tag_protocol;
 
 	/*
@@ -218,7 +221,6 @@ static inline void *ds_to_priv(struct dsa_switch *ds)
 
 static inline bool dsa_uses_tagged_protocol(struct dsa_switch_tree *dst)
 {
-	return dst->tag_protocol != DSA_TAG_PROTO_NONE;
+	return dst->rcv != NULL;
 }
-
 #endif
-- 
cgit v1.1


From b4d2394d01bc642e95b2cba956d908423c1bef77 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Mon, 15 Sep 2014 13:00:27 -0400
Subject: dsa: Replace mii_bus with a generic host device

This change makes it so that instead of passing and storing a mii_bus we
instead pass and store a host_dev.  From there we can test to determine the
exact type of device, and can verify it is the correct device for our switch.

So for example it would be possible to pass a device pointer from a pci_dev
and instead of checking for a PHY ID we could check for a vendor and/or device
ID.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index a55c4e6..c779e9b 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -34,7 +34,7 @@ struct dsa_chip_data {
 	/*
 	 * How to access the switch configuration registers.
 	 */
-	struct device	*mii_bus;
+	struct device	*host_dev;
 	int		sw_addr;
 
 	/* Device tree node pointer for this specific switch chip
@@ -134,9 +134,9 @@ struct dsa_switch {
 	struct dsa_switch_driver	*drv;
 
 	/*
-	 * Reference to mii bus to use.
+	 * Reference to host device to use.
 	 */
-	struct mii_bus		*master_mii_bus;
+	struct device		*master_dev;
 
 	/*
 	 * Slave mii_bus and devices for the individual ports.
@@ -178,7 +178,7 @@ struct dsa_switch_driver {
 	/*
 	 * Probing and setup.
 	 */
-	char	*(*probe)(struct mii_bus *bus, int sw_addr);
+	char	*(*probe)(struct device *host_dev, int sw_addr);
 	int	(*setup)(struct dsa_switch *ds);
 	int	(*set_addr)(struct dsa_switch *ds, u8 *addr);
 
@@ -213,6 +213,7 @@ struct dsa_switch_driver {
 
 void register_switch_driver(struct dsa_switch_driver *type);
 void unregister_switch_driver(struct dsa_switch_driver *type);
+struct mii_bus *dsa_host_dev_to_mii_bus(struct device *dev);
 
 static inline void *ds_to_priv(struct dsa_switch *ds)
 {
-- 
cgit v1.1


From 6cff339bbd5f9eda7a5e8a521f91a88d046e6d0c Mon Sep 17 00:00:00 2001
From: Alex Gartrell <agartrell@fb.com>
Date: Tue, 9 Sep 2014 16:40:20 -0700
Subject: ipvs: Add destination address family to netlink interface

This is necessary to support heterogeneous pools.  For example, if you have
an ipv6 addressed network, you'll want to be able to forward ipv4 traffic
into it.

This patch enforces that destination address family is the same as service
family, as none of the forwarding mechanisms support anything else.

For the old setsockopt mechanism, we simply set the dest address family to
AF_INET as we do with the service.

Signed-off-by: Alex Gartrell <agartrell@fb.com>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 include/net/ip_vs.h        | 3 +++
 include/uapi/linux/ip_vs.h | 3 +++
 2 files changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 624a8a5..b7e2b62 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -648,6 +648,9 @@ struct ip_vs_dest_user_kern {
 	/* thresholds for active connections */
 	u32			u_threshold;	/* upper threshold */
 	u32			l_threshold;	/* lower threshold */
+
+	/* Address family of addr */
+	u16			af;
 };
 
 
diff --git a/include/uapi/linux/ip_vs.h b/include/uapi/linux/ip_vs.h
index fbcffe8..cabe95d 100644
--- a/include/uapi/linux/ip_vs.h
+++ b/include/uapi/linux/ip_vs.h
@@ -384,6 +384,9 @@ enum {
 	IPVS_DEST_ATTR_PERSIST_CONNS,	/* persistent connections */
 
 	IPVS_DEST_ATTR_STATS,		/* nested attribute for dest stats */
+
+	IPVS_DEST_ATTR_ADDR_FAMILY,	/* Address family of address */
+
 	__IPVS_DEST_ATTR_MAX,
 };
 
-- 
cgit v1.1


From 655eef103d0bd99f540a52f7ede032e120756846 Mon Sep 17 00:00:00 2001
From: Alex Gartrell <agartrell@fb.com>
Date: Tue, 9 Sep 2014 16:40:21 -0700
Subject: ipvs: Supply destination addr family to ip_vs_{lookup_dest,find_dest}

We need to remove the assumption that virtual address family is the same as
real address family in order to support heterogeneous services (that is,
services with v4 vips and v6 backends or the opposite).

Signed-off-by: Alex Gartrell <agartrell@fb.com>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 include/net/ip_vs.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index b7e2b62..2fa1155 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -1399,8 +1399,9 @@ void ip_vs_unregister_nl_ioctl(void);
 int ip_vs_control_init(void);
 void ip_vs_control_cleanup(void);
 struct ip_vs_dest *
-ip_vs_find_dest(struct net *net, int af, const union nf_inet_addr *daddr,
-		__be16 dport, const union nf_inet_addr *vaddr, __be16 vport,
+ip_vs_find_dest(struct net *net, int svc_af, int dest_af,
+		const union nf_inet_addr *daddr, __be16 dport,
+		const union nf_inet_addr *vaddr, __be16 vport,
 		__u16 protocol, __u32 fwmark, __u32 flags);
 void ip_vs_try_bind_dest(struct ip_vs_conn *cp);
 
-- 
cgit v1.1


From ba38528aae6ee2d22226c6a78727ddc13512b068 Mon Sep 17 00:00:00 2001
From: Alex Gartrell <agartrell@fb.com>
Date: Tue, 9 Sep 2014 16:40:23 -0700
Subject: ipvs: Supply destination address family to ip_vs_conn_new

The assumption that dest af is equal to service af is now unreliable, so we
must specify it manually so as not to copy just the first 4 bytes of a v6
address or doing an illegal read of 16 butes on a v6 address.

We "lie" in two places: for synchronization (which we will explicitly
disallow from happening when we have heterogeneous pools) and for black
hole addresses where there's no real dest.

Signed-off-by: Alex Gartrell <agartrell@fb.com>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 include/net/ip_vs.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 2fa1155..7600dbe 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -535,6 +535,7 @@ struct ip_vs_conn {
 	union nf_inet_addr      daddr;          /* destination address */
 	volatile __u32          flags;          /* status flags */
 	__u16                   protocol;       /* Which protocol (TCP/UDP) */
+	__u16			daf;		/* Address family of the dest */
 #ifdef CONFIG_NET_NS
 	struct net              *net;           /* Name space */
 #endif
@@ -1213,7 +1214,7 @@ static inline void __ip_vs_conn_put(struct ip_vs_conn *cp)
 void ip_vs_conn_put(struct ip_vs_conn *cp);
 void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport);
 
-struct ip_vs_conn *ip_vs_conn_new(const struct ip_vs_conn_param *p,
+struct ip_vs_conn *ip_vs_conn_new(const struct ip_vs_conn_param *p, int dest_af,
 				  const union nf_inet_addr *daddr,
 				  __be16 dport, unsigned int flags,
 				  struct ip_vs_dest *dest, __u32 fwmark);
-- 
cgit v1.1


From 391f503d69779867f05e9296ae523e9002c2d7ee Mon Sep 17 00:00:00 2001
From: Alex Gartrell <agartrell@fb.com>
Date: Tue, 9 Sep 2014 16:40:24 -0700
Subject: ipvs: prevent mixing heterogeneous pools and synchronization

The synchronization protocol is not compatible with heterogeneous pools, so
we need to verify that we're not turning both on at the same time.

Signed-off-by: Alex Gartrell <agartrell@fb.com>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 include/net/ip_vs.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 7600dbe..576d7f0 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -990,6 +990,10 @@ struct netns_ipvs {
 	char			backup_mcast_ifn[IP_VS_IFNAME_MAXLEN];
 	/* net name space ptr */
 	struct net		*net;            /* Needed by timer routines */
+	/* Number of heterogeneous destinations, needed because
+	 * heterogeneous are not supported when synchronization is
+	 * enabled */
+	unsigned int		mixed_address_family_dests;
 };
 
 #define DEFAULT_SYNC_THRESHOLD	3
-- 
cgit v1.1


From 971427f353f3c42c8dcef62e7124440df68eb809 Mon Sep 17 00:00:00 2001
From: Andy Zhou <azhou@nicira.com>
Date: Mon, 15 Sep 2014 19:37:25 -0700
Subject: openvswitch: Add recirc and hash action.

Recirc action allows a packet to reenter openvswitch processing.
currently openvswitch lookup flow for packet received and execute
set of actions on that packet, with help of recirc action we can
process/modify the packet and recirculate it back in openvswitch
for another pass.

OVS hash action calculates 5-tupple hash and set hash in flow-key
hash. This can be used along with recirculation for distributing
packets among different ports for bond devices.
For example:
OVS bonding can use following actions:
Match on: bond flow; Action: hash, recirc(id)
Match on: recirc-id == id and hash lower bits == a;
          Action: output port_bond_a

Signed-off-by: Andy Zhou <azhou@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
---
 include/uapi/linux/openvswitch.h | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index a794d1d..f7fc507 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h
@@ -289,6 +289,9 @@ enum ovs_key_attr {
 	OVS_KEY_ATTR_TUNNEL,    /* Nested set of ovs_tunnel attributes */
 	OVS_KEY_ATTR_SCTP,      /* struct ovs_key_sctp */
 	OVS_KEY_ATTR_TCP_FLAGS,	/* be16 TCP flags. */
+	OVS_KEY_ATTR_DP_HASH,      /* u32 hash value. Value 0 indicates the hash
+				   is not computed by the datapath. */
+	OVS_KEY_ATTR_RECIRC_ID, /* u32 recirc id */
 
 #ifdef __KERNEL__
 	OVS_KEY_ATTR_IPV4_TUNNEL,  /* struct ovs_key_ipv4_tunnel */
@@ -493,6 +496,27 @@ struct ovs_action_push_vlan {
 	__be16 vlan_tci;	/* 802.1Q TCI (VLAN ID and priority). */
 };
 
+/* Data path hash algorithm for computing Datapath hash.
+ *
+ * The algorithm type only specifies the fields in a flow
+ * will be used as part of the hash. Each datapath is free
+ * to use its own hash algorithm. The hash value will be
+ * opaque to the user space daemon.
+ */
+enum ovs_hash_alg {
+	OVS_HASH_ALG_L4,
+};
+
+/*
+ * struct ovs_action_hash - %OVS_ACTION_ATTR_HASH action argument.
+ * @hash_alg: Algorithm used to compute hash prior to recirculation.
+ * @hash_basis: basis used for computing hash.
+ */
+struct ovs_action_hash {
+	uint32_t  hash_alg;     /* One of ovs_hash_alg. */
+	uint32_t  hash_basis;
+};
+
 /**
  * enum ovs_action_attr - Action types.
  *
@@ -521,6 +545,8 @@ enum ovs_action_attr {
 	OVS_ACTION_ATTR_PUSH_VLAN,    /* struct ovs_action_push_vlan. */
 	OVS_ACTION_ATTR_POP_VLAN,     /* No argument. */
 	OVS_ACTION_ATTR_SAMPLE,       /* Nested OVS_SAMPLE_ATTR_*. */
+	OVS_ACTION_ATTR_RECIRC,       /* u32 recirc_id. */
+	OVS_ACTION_ATTR_HASH,	      /* struct ovs_action_hash. */
 	__OVS_ACTION_ATTR_MAX
 };
 
-- 
cgit v1.1


From 0097db06f5ab2df1756bc4cbf4395593024d87a1 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Tue, 16 Sep 2014 21:36:09 +0200
Subject: Bluetooth: Remove exported hci_recv_fragment function

The hci_recv_fragment function is no longer used by any driver and thus
do not export it. In fact it is not even needed by the core and it can
be removed altogether.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/hci_core.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 206b92b..37ff1ae 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -926,7 +926,6 @@ int hci_remove_remote_oob_data(struct hci_dev *hdev, bdaddr_t *bdaddr);
 void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb);
 
 int hci_recv_frame(struct hci_dev *hdev, struct sk_buff *skb);
-int hci_recv_fragment(struct hci_dev *hdev, int type, void *data, int count);
 int hci_recv_stream_fragment(struct hci_dev *hdev, void *data, int count);
 
 void hci_init_sysfs(struct hci_dev *hdev);
-- 
cgit v1.1


From 689f1c9de2abbd76fda224d12cea5f43568a4335 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 18 Sep 2014 16:38:18 +0800
Subject: ipsec: Remove obsolete MAX_AH_AUTH_LEN

While tracking down the MAX_AH_AUTH_LEN crash in an old kernel
I thought that this limit was rather arbitrary and we should
just get rid of it.

In fact it seems that we've already done all the work needed
to remove it apart from actually removing it.  This limit was
there in order to limit stack usage.  Since we've already
switched over to allocating scratch space using kmalloc, there
is no longer any need to limit the authentication length.

This patch kills all references to it, including the BUG_ONs
that led me here.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/ah.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include')

diff --git a/include/net/ah.h b/include/net/ah.h
index ca95b98..4e2dfa4 100644
--- a/include/net/ah.h
+++ b/include/net/ah.h
@@ -3,9 +3,6 @@
 
 #include <linux/skbuff.h>
 
-/* This is the maximum truncated ICV length that we know of. */
-#define MAX_AH_AUTH_LEN	64
-
 struct crypto_ahash;
 
 struct ah_data {
-- 
cgit v1.1


From 84d7fce693884897c6196cc98228a2ad56ae2a9a Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Thu, 4 Sep 2014 14:30:22 +0200
Subject: netfilter: nf_tables: export rule-set generation ID

This patch exposes the ruleset generation ID in three ways:

1) The new command NFT_MSG_GETGEN that exposes the 32-bits ruleset
   generation ID. This ID is incremented in every commit and it
   should be large enough to avoid wraparound problems.

2) The less significant 16-bits of the generation ID are exposed through
   the nfgenmsg->res_id header field. This allows us to quickly catch
   if the ruleset has change between two consecutive list dumps from
   different object lists (in this specific case I think the risk of
   wraparound is unlikely).

3) Userspace subscribers may receive notifications of new rule-set
   generation after every commit. This also provides an alternative
   way to monitor the generation ID. If the events are lost, the
   userspace process hits a overrun error, so it knows that it is
   working with a stale ruleset anyway.

Patrick spotted that rule-set transformations in userspace may take
quite some time. In that case, it annotates the 32-bits generation ID
before fetching the rule-set, then:

1) it compares it to what we obtain after the transformation to
   make sure it is not working with a stale rule-set and no wraparound
   has ocurred.

2) it subscribes to ruleset notifications, so it can watch for new
   generation ID.

This is complementary to the NLM_F_DUMP_INTR approach, which allows
us to detect an interference in the middle one single list dumping.
There is no way to explicitly check that an interference has occurred
between two list dumps from the kernel, since it doesn't know how
many lists the userspace client is actually going to dump.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/nf_tables.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 66d66dd..b72ccfe 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -51,6 +51,8 @@ enum nft_verdicts {
  * @NFT_MSG_NEWSETELEM: create a new set element (enum nft_set_elem_attributes)
  * @NFT_MSG_GETSETELEM: get a set element (enum nft_set_elem_attributes)
  * @NFT_MSG_DELSETELEM: delete a set element (enum nft_set_elem_attributes)
+ * @NFT_MSG_NEWGEN: announce a new generation, only for events (enum nft_gen_attributes)
+ * @NFT_MSG_GETGEN: get the rule-set generation (enum nft_gen_attributes)
  */
 enum nf_tables_msg_types {
 	NFT_MSG_NEWTABLE,
@@ -68,6 +70,8 @@ enum nf_tables_msg_types {
 	NFT_MSG_NEWSETELEM,
 	NFT_MSG_GETSETELEM,
 	NFT_MSG_DELSETELEM,
+	NFT_MSG_NEWGEN,
+	NFT_MSG_GETGEN,
 	NFT_MSG_MAX,
 };
 
@@ -812,4 +816,16 @@ enum nft_masq_attributes {
 };
 #define NFTA_MASQ_MAX		(__NFTA_MASQ_MAX - 1)
 
+/**
+ * enum nft_gen_attributes - nf_tables ruleset generation attributes
+ *
+ * @NFTA_GEN_ID: Ruleset generation ID (NLA_U32)
+ */
+enum nft_gen_attributes {
+	NFTA_GEN_UNSPEC,
+	NFTA_GEN_ID,
+	__NFTA_GEN_MAX
+};
+#define NFTA_GEN_MAX		(__NFTA_GEN_MAX - 1)
+
 #endif /* _LINUX_NF_TABLES_H */
-- 
cgit v1.1


From fd384412e199b62c3ddaabd18dce86d0e164c5b9 Mon Sep 17 00:00:00 2001
From: Andy Zhou <azhou@nicira.com>
Date: Tue, 16 Sep 2014 17:31:16 -0700
Subject: udp_tunnel: Seperate ipv6 functions into its own file.

Add ip6_udp_tunnel.c for ipv6 UDP tunnel functions to avoid ifdefs
in udp_tunnel.c

Signed-off-by: Andy Zhou <azhou@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/udp_tunnel.h | 28 ++++++++++++++++++++++++++--
 1 file changed, 26 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h
index ffd69cb..0b9e017 100644
--- a/include/net/udp_tunnel.h
+++ b/include/net/udp_tunnel.h
@@ -26,7 +26,31 @@ struct udp_port_cfg {
 				use_udp6_rx_checksums:1;
 };
 
-int udp_sock_create(struct net *net, struct udp_port_cfg *cfg,
-		    struct socket **sockp);
+int udp_sock_create4(struct net *net, struct udp_port_cfg *cfg,
+		     struct socket **sockp);
+
+#if IS_ENABLED(CONFIG_IPV6)
+int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg,
+		     struct socket **sockp);
+#else
+static inline int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg,
+				   struct socket **sockp)
+{
+	return 0;
+}
+#endif
+
+static inline int udp_sock_create(struct net *net,
+				  struct udp_port_cfg *cfg,
+				  struct socket **sockp)
+{
+	if (cfg->family == AF_INET)
+		return udp_sock_create4(net, cfg, sockp);
+
+	if (cfg->family == AF_INET6)
+		return udp_sock_create6(net, cfg, sockp);
+
+	return -EPFNOSUPPORT;
+}
 
 #endif
-- 
cgit v1.1


From 6a93cc9052748c6355ec9d5b6c38b77f85f1cb0d Mon Sep 17 00:00:00 2001
From: Andy Zhou <azhou@nicira.com>
Date: Tue, 16 Sep 2014 17:31:17 -0700
Subject: udp-tunnel: Add a few more UDP tunnel APIs

Added a few more UDP tunnel APIs that can be shared by UDP based
tunnel protocol implementation. The main ones are highlighted below.

setup_udp_tunnel_sock() configures UDP listener socket for
receiving UDP encapsulated packets.

udp_tunnel_xmit_skb() and upd_tunnel6_xmit_skb() transmit skb
using UDP encapsulation.

udp_tunnel_sock_release() closes the UDP tunnel listener socket.

Signed-off-by: Andy Zhou <azhou@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/udp_tunnel.h | 57 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 57 insertions(+)

(limited to 'include')

diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h
index 0b9e017..a47790b 100644
--- a/include/net/udp_tunnel.h
+++ b/include/net/udp_tunnel.h
@@ -1,6 +1,14 @@
 #ifndef __NET_UDP_TUNNEL_H
 #define __NET_UDP_TUNNEL_H
 
+#include <net/ip_tunnels.h>
+#include <net/udp.h>
+
+#if IS_ENABLED(CONFIG_IPV6)
+#include <net/ipv6.h>
+#include <net/addrconf.h>
+#endif
+
 struct udp_port_cfg {
 	u8			family;
 
@@ -53,4 +61,53 @@ static inline int udp_sock_create(struct net *net,
 	return -EPFNOSUPPORT;
 }
 
+typedef int (*udp_tunnel_encap_rcv_t)(struct sock *sk, struct sk_buff *skb);
+typedef void (*udp_tunnel_encap_destroy_t)(struct sock *sk);
+
+struct udp_tunnel_sock_cfg {
+	void *sk_user_data;     /* user data used by encap_rcv call back */
+	/* Used for setting up udp_sock fields, see udp.h for details */
+	__u8  encap_type;
+	udp_tunnel_encap_rcv_t encap_rcv;
+	udp_tunnel_encap_destroy_t encap_destroy;
+};
+
+/* Setup the given (UDP) sock to receive UDP encapsulated packets */
+void setup_udp_tunnel_sock(struct net *net, struct socket *sock,
+			   struct udp_tunnel_sock_cfg *sock_cfg);
+
+/* Transmit the skb using UDP encapsulation. */
+int udp_tunnel_xmit_skb(struct socket *sock, struct rtable *rt,
+			struct sk_buff *skb, __be32 src, __be32 dst,
+			__u8 tos, __u8 ttl, __be16 df, __be16 src_port,
+			__be16 dst_port, bool xnet);
+
+#if IS_ENABLED(CONFIG_IPV6)
+int udp_tunnel6_xmit_skb(struct socket *sock, struct dst_entry *dst,
+			 struct sk_buff *skb, struct net_device *dev,
+			 struct in6_addr *saddr, struct in6_addr *daddr,
+			 __u8 prio, __u8 ttl, __be16 src_port,
+			 __be16 dst_port);
+#endif
+
+void udp_tunnel_sock_release(struct socket *sock);
+
+static inline struct sk_buff *udp_tunnel_handle_offloads(struct sk_buff *skb,
+							 bool udp_csum)
+{
+	int type = udp_csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
+
+	return iptunnel_handle_offloads(skb, udp_csum, type);
+}
+
+static inline void udp_tunnel_encap_enable(struct socket *sock)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+	if (sock->sk->sk_family == PF_INET6)
+		ipv6_stub->udpv6_encap_enable();
+	else
+#endif
+		udp_encap_enable();
+}
+
 #endif
-- 
cgit v1.1


From 2e4e44107176d552f8bb1bb76053e850e3809841 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 17 Sep 2014 04:49:49 -0700
Subject: net: add alloc_skb_with_frags() helper

Extract from sock_alloc_send_pskb() code building skb with frags,
so that we can reuse this in other contexts.

Intent is to use it from tcp_send_rcvq(), tcp_collapse(), ...

We also want to replace some skb_linearize() calls to a more reliable
strategy in pathological cases where we need to reduce number of frags.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 756e3d0..f1bfa37 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -769,6 +769,12 @@ static inline struct sk_buff *alloc_skb(unsigned int size,
 	return __alloc_skb(size, priority, 0, NUMA_NO_NODE);
 }
 
+struct sk_buff *alloc_skb_with_frags(unsigned long header_len,
+				     unsigned long data_len,
+				     int max_page_order,
+				     int *errcode,
+				     gfp_t gfp_mask);
+
 static inline struct sk_buff *alloc_skb_fclone(unsigned int size,
 					       gfp_t priority)
 {
-- 
cgit v1.1


From bb7d93496f7ac203f7c3e9678000d1c83eb4e0ba Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Fri, 19 Sep 2014 13:07:50 -0700
Subject: net: phy: broadcom: add helper for PHY revision and patch level

The Broadcom BCM7xxx internal PHYs do not contain any useful revision
information in the low 4-bits of their MII_PHYSID2 (MII register 3)
which could allow us to properly identify them.

As a result, we need the actual hardware block integrating these PHYs:
GENET or the SF2 switch to tell us what revision they are built with. To
assist with that, add two helper macros for fetching the the PHY
revision and patch level from the struct phy_device::dev_flags.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/brcmphy.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index 5bd35cc..e4ee9c6 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -41,6 +41,8 @@
 #define PHY_BRCM_DIS_TXCRXC_NOENRGY	0x00008000
 /* Broadcom BCM7xxx specific workarounds */
 #define PHY_BRCM_100MBPS_WAR		0x00010000
+#define PHY_BRCM_7XXX_REV(x)		(((x) >> 8) & 0xff)
+#define PHY_BRCM_7XXX_PATCH(x)		((x) & 0xff)
 #define PHY_BCM_FLAGS_VALID		0x80000000
 
 /* Broadcom BCM54XX register definitions, common to most Broadcom PHYs */
-- 
cgit v1.1


From 80780a54ecded1647e661ababde13554a149f7f3 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Fri, 19 Sep 2014 13:07:52 -0700
Subject: net: bcmgenet: remove PHY_BRCM_100MBPS_WAR

Now that we have removed the need for the PHY_BRCM_100MBPS_WAR flag, we
can remove it from the GENET driver and the broadcom shared header file.
The PHY driver checks the PHY supported bitmask instead.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/brcmphy.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index e4ee9c6..3f626fe 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -40,7 +40,6 @@
 #define PHY_BRCM_CLEAR_RGMII_MODE	0x00004000
 #define PHY_BRCM_DIS_TXCRXC_NOENRGY	0x00008000
 /* Broadcom BCM7xxx specific workarounds */
-#define PHY_BRCM_100MBPS_WAR		0x00010000
 #define PHY_BRCM_7XXX_REV(x)		(((x) >> 8) & 0xff)
 #define PHY_BRCM_7XXX_PATCH(x)		((x) & 0xff)
 #define PHY_BCM_FLAGS_VALID		0x80000000
-- 
cgit v1.1


From 6819563e646a7f3692836daefd12cd86c697759f Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Fri, 19 Sep 2014 13:07:54 -0700
Subject: net: dsa: allow switch drivers to specify phy_device::dev_flags

Some switch drivers (e.g: bcm_sf2) may have to communicate specific
workarounds or flags towards the PHY device driver. Allow switches
driver to be delegated that task by introducing a get_phy_flags()
callback which will do just that.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index c779e9b..e020b8a 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -181,6 +181,7 @@ struct dsa_switch_driver {
 	char	*(*probe)(struct device *host_dev, int sw_addr);
 	int	(*setup)(struct dsa_switch *ds);
 	int	(*set_addr)(struct dsa_switch *ds, u8 *addr);
+	u32	(*get_phy_flags)(struct dsa_switch *ds, int port);
 
 	/*
 	 * Access to the switch's PHY registers.
-- 
cgit v1.1


From 23461551c00628c3f3fe9cf837bf53cf8f212b63 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Wed, 17 Sep 2014 12:25:56 -0700
Subject: fou: Support for foo-over-udp RX path

This patch provides a receive path for foo-over-udp. This allows
direct encapsulation of IP protocols over UDP. The bound destination
port is used to map to an IP protocol, and the XFRM framework
(udp_encap_rcv) is used to receive encapsulated packets. Upon
reception, the encapsulation header is logically removed (pointer
to transport header is advanced) and the packet is reinjected into
the receive path with the IP protocol indicated by the mapping.

Netlink is used to configure FOU ports. The configuration information
includes the port number to bind to and the IP protocol corresponding
to that port.

This should support GRE/UDP
(http://tools.ietf.org/html/draft-yong-tsvwg-gre-in-udp-encap-02),
as will as the other IP tunneling protocols (IPIP, SIT).

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/fou.h | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)
 create mode 100644 include/uapi/linux/fou.h

(limited to 'include')

diff --git a/include/uapi/linux/fou.h b/include/uapi/linux/fou.h
new file mode 100644
index 0000000..e03376d
--- /dev/null
+++ b/include/uapi/linux/fou.h
@@ -0,0 +1,32 @@
+/* fou.h - FOU Interface */
+
+#ifndef _UAPI_LINUX_FOU_H
+#define _UAPI_LINUX_FOU_H
+
+/* NETLINK_GENERIC related info
+ */
+#define FOU_GENL_NAME		"fou"
+#define FOU_GENL_VERSION	0x1
+
+enum {
+	FOU_ATTR_UNSPEC,
+	FOU_ATTR_PORT,				/* u16 */
+	FOU_ATTR_AF,				/* u8 */
+	FOU_ATTR_IPPROTO,			/* u8 */
+
+	__FOU_ATTR_MAX,
+};
+
+#define FOU_ATTR_MAX		(__FOU_ATTR_MAX - 1)
+
+enum {
+	FOU_CMD_UNSPEC,
+	FOU_CMD_ADD,
+	FOU_CMD_DEL,
+
+	__FOU_CMD_MAX,
+};
+
+#define FOU_CMD_MAX	(__FOU_CMD_MAX - 1)
+
+#endif /* _UAPI_LINUX_FOU_H */
-- 
cgit v1.1


From afe93325bc02a5b2dea0cd7d78225de692265e6e Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Wed, 17 Sep 2014 12:25:57 -0700
Subject: fou: Add GRO support

Implement fou_gro_receive and fou_gro_complete, and populate these
in the correponsing udp_offloads for the socket. Added ipproto to
udp_offloads and pass this from UDP to the fou GRO routine in proto
field of napi_gro_cb structure.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 28d4378..4354b43 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1874,7 +1874,7 @@ struct napi_gro_cb {
 	/* jiffies when first packet was created/queued */
 	unsigned long age;
 
-	/* Used in ipv6_gro_receive() */
+	/* Used in ipv6_gro_receive() and foo-over-udp */
 	u16	proto;
 
 	/* Used in udp_gro_receive */
@@ -1925,6 +1925,7 @@ struct packet_offload {
 
 struct udp_offload {
 	__be16			 port;
+	u8			 ipproto;
 	struct offload_callbacks callbacks;
 };
 
-- 
cgit v1.1


From 56328486539ddd07cbaafec7a542a2c8a3043623 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Wed, 17 Sep 2014 12:25:58 -0700
Subject: net: Changes to ip_tunnel to support foo-over-udp encapsulation

This patch changes IP tunnel to support (secondary) encapsulation,
Foo-over-UDP. Changes include:

1) Adding tun_hlen as the tunnel header length, encap_hlen as the
   encapsulation header length, and hlen becomes the grand total
   of these.
2) Added common netlink define to support FOU encapsulation.
3) Routines to perform FOU encapsulation.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip_tunnels.h       | 19 ++++++++++++++++++-
 include/uapi/linux/if_tunnel.h | 12 ++++++++++++
 2 files changed, 30 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 8dd8cab..7f538ba 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -10,6 +10,7 @@
 #include <net/gro_cells.h>
 #include <net/inet_ecn.h>
 #include <net/ip.h>
+#include <net/netns/generic.h>
 #include <net/rtnetlink.h>
 
 #if IS_ENABLED(CONFIG_IPV6)
@@ -31,6 +32,13 @@ struct ip_tunnel_6rd_parm {
 };
 #endif
 
+struct ip_tunnel_encap {
+	__u16			type;
+	__u16			flags;
+	__be16			sport;
+	__be16			dport;
+};
+
 struct ip_tunnel_prl_entry {
 	struct ip_tunnel_prl_entry __rcu *next;
 	__be32				addr;
@@ -56,13 +64,18 @@ struct ip_tunnel {
 	/* These four fields used only by GRE */
 	__u32		i_seqno;	/* The last seen seqno	*/
 	__u32		o_seqno;	/* The last output seqno */
-	int		hlen;		/* Precalculated header length */
+	int		tun_hlen;	/* Precalculated header length */
 	int		mlink;
 
 	struct ip_tunnel_dst __percpu *dst_cache;
 
 	struct ip_tunnel_parm parms;
 
+	int		encap_hlen;	/* Encap header length (FOU,GUE) */
+	struct ip_tunnel_encap encap;
+
+	int		hlen;		/* tun_hlen + encap_hlen */
+
 	/* for SIT */
 #ifdef CONFIG_IPV6_SIT_6RD
 	struct ip_tunnel_6rd_parm ip6rd;
@@ -114,6 +127,8 @@ void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops);
 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
 		    const struct iphdr *tnl_params, const u8 protocol);
 int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd);
+int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t,
+		    u8 *protocol, struct flowi4 *fl4);
 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu);
 
 struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev,
@@ -131,6 +146,8 @@ int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
 		      struct ip_tunnel_parm *p);
 void ip_tunnel_setup(struct net_device *dev, int net_id);
 void ip_tunnel_dst_reset_all(struct ip_tunnel *t);
+int ip_tunnel_encap_setup(struct ip_tunnel *t,
+			  struct ip_tunnel_encap *ipencap);
 
 /* Extract dsfield from inner protocol */
 static inline u8 ip_tunnel_get_dsfield(const struct iphdr *iph,
diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h
index 3bce9e9..9fedca7 100644
--- a/include/uapi/linux/if_tunnel.h
+++ b/include/uapi/linux/if_tunnel.h
@@ -53,10 +53,22 @@ enum {
 	IFLA_IPTUN_6RD_RELAY_PREFIX,
 	IFLA_IPTUN_6RD_PREFIXLEN,
 	IFLA_IPTUN_6RD_RELAY_PREFIXLEN,
+	IFLA_IPTUN_ENCAP_TYPE,
+	IFLA_IPTUN_ENCAP_FLAGS,
+	IFLA_IPTUN_ENCAP_SPORT,
+	IFLA_IPTUN_ENCAP_DPORT,
 	__IFLA_IPTUN_MAX,
 };
 #define IFLA_IPTUN_MAX	(__IFLA_IPTUN_MAX - 1)
 
+enum tunnel_encap_types {
+	TUNNEL_ENCAP_NONE,
+	TUNNEL_ENCAP_FOU,
+};
+
+#define TUNNEL_ENCAP_FLAG_CSUM		(1<<0)
+#define TUNNEL_ENCAP_FLAG_CSUM6		(1<<1)
+
 /* SIT-mode i_flags */
 #define	SIT_ISATAP	0x0001
 
-- 
cgit v1.1


From 4565e9919cda747815547e2e5d7b78f15efbffdf Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Wed, 17 Sep 2014 12:26:01 -0700
Subject: gre: Setup and TX path for gre/UDP foo-over-udp encapsulation

Added netlink attrs to configure FOU encapsulation for GRE, netlink
handling of these flags, and properly adjust MTU for encapsulation.
ip_tunnel_encap is called from ip_tunnel_xmit to actually perform FOU
encapsulation.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_tunnel.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h
index 9fedca7..7c832af 100644
--- a/include/uapi/linux/if_tunnel.h
+++ b/include/uapi/linux/if_tunnel.h
@@ -106,6 +106,10 @@ enum {
 	IFLA_GRE_ENCAP_LIMIT,
 	IFLA_GRE_FLOWINFO,
 	IFLA_GRE_FLAGS,
+	IFLA_GRE_ENCAP_TYPE,
+	IFLA_GRE_ENCAP_FLAGS,
+	IFLA_GRE_ENCAP_SPORT,
+	IFLA_GRE_ENCAP_DPORT,
 	__IFLA_GRE_MAX,
 };
 
-- 
cgit v1.1


From 54003f119c26573d3bb86a5efc64f3e5fd43b8c6 Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Wed, 17 Sep 2014 23:23:12 +0200
Subject: net: fix sparse warnings in SNMP_UPD_PO_STATS(_BH)

ptr used to be a non __percpu pointer (result of a this_cpu_ptr
assignment, 7d720c3e4f0c4 ("percpu: add __percpu sparse annotations to
net")). Since d25398df59b56 ("net: avoid reloads in SNMP_UPD_PO_STATS"),
that's no longer the case, SNMP_UPD_PO_STATS uses this_cpu_add and ptr
is now __percpu.

Silence sparse warnings by preserving the original type and
annotation, and remove the out-of-date comment.

warning: incorrect type in initializer (different address spaces)
   expected unsigned long long *ptr
   got unsigned long long [noderef] <asn:3>*<noident>
warning: incorrect type in initializer (different address spaces)
   expected void const [noderef] <asn:3>*__vpp_verify
   got unsigned long long *<noident>
warning: incorrect type in initializer (different address spaces)
   expected void const [noderef] <asn:3>*__vpp_verify
   got unsigned long long *<noident>

Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/snmp.h | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/net/snmp.h b/include/net/snmp.h
index f1f27fd..8fd2f49 100644
--- a/include/net/snmp.h
+++ b/include/net/snmp.h
@@ -146,19 +146,15 @@ struct linux_xfrm_mib {
 
 #define SNMP_ADD_STATS(mib, field, addend)	\
 			this_cpu_add(mib->mibs[field], addend)
-/*
- * Use "__typeof__(*mib) *ptr" instead of "__typeof__(mib) ptr"
- * to make @ptr a non-percpu pointer.
- */
 #define SNMP_UPD_PO_STATS(mib, basefield, addend)	\
 	do { \
-		__typeof__(*mib->mibs) *ptr = mib->mibs;	\
+		__typeof__((mib->mibs) + 0) ptr = mib->mibs;	\
 		this_cpu_inc(ptr[basefield##PKTS]);		\
 		this_cpu_add(ptr[basefield##OCTETS], addend);	\
 	} while (0)
 #define SNMP_UPD_PO_STATS_BH(mib, basefield, addend)	\
 	do { \
-		__typeof__(*mib->mibs) *ptr = mib->mibs;	\
+		__typeof__((mib->mibs) + 0) ptr = mib->mibs;	\
 		__this_cpu_inc(ptr[basefield##PKTS]);		\
 		__this_cpu_add(ptr[basefield##OCTETS], addend);	\
 	} while (0)
-- 
cgit v1.1


From 77507aa249aecd06fa25ad058b64481e46887a01 Mon Sep 17 00:00:00 2001
From: Ido Shamay <idos@mellanox.com>
Date: Thu, 18 Sep 2014 11:50:59 +0300
Subject: net/mlx4_core: Enable CQE/EQE stride support

This feature is intended for archs having cache line larger then 64B.

Since our CQE/EQEs are generally 64B in those systems, HW will write
twice to the same cache line consecutively, causing pipe locks due to
he hazard prevention mechanism. For elements in a cyclic buffer, writes
are consecutive, so entries smaller than a cache line should be
avoided, especially if they are written at a high rate.

Reduce consecutive writes to same cache line in CQs/EQs, by allowing the
driver to increase the distance between entries so that each will reside
in a different cache line. Until the introduction of this feature, there
were two types of CQE/EQE:

1. 32B stride and context in the [0-31] segment
2. 64B stride and context in the [32-63] segment

This feature introduces two additional types:

3. 128B stride and context in the [0-31] segment (128B cache line)
4. 256B stride and context in the [0-31] segment (256B cache line)

Modify the mlx4_core driver to query the device for the CQE/EQE cache
line stride capability and to enable that capability when the host
cache line size is larger than 64 bytes (supported cache lines are
128B and 256B).

The mlx4 IB driver and libmlx4 need not be aware of this change. The PF
context behaviour is changed to require this change in VF drivers
running on such archs.

Signed-off-by: Ido Shamay <idos@mellanox.com>
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx4/device.h | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 1befd8d..7bcefe7 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -185,19 +185,24 @@ enum {
 	MLX4_DEV_CAP_FLAG2_DMFS_IPOIB		= 1LL <<  9,
 	MLX4_DEV_CAP_FLAG2_VXLAN_OFFLOADS	= 1LL <<  10,
 	MLX4_DEV_CAP_FLAG2_MAD_DEMUX		= 1LL <<  11,
+	MLX4_DEV_CAP_FLAG2_CQE_STRIDE		= 1LL <<  12,
+	MLX4_DEV_CAP_FLAG2_EQE_STRIDE		= 1LL <<  13
 };
 
 enum {
 	MLX4_DEV_CAP_64B_EQE_ENABLED	= 1LL << 0,
-	MLX4_DEV_CAP_64B_CQE_ENABLED	= 1LL << 1
+	MLX4_DEV_CAP_64B_CQE_ENABLED	= 1LL << 1,
+	MLX4_DEV_CAP_CQE_STRIDE_ENABLED	= 1LL << 2,
+	MLX4_DEV_CAP_EQE_STRIDE_ENABLED	= 1LL << 3
 };
 
 enum {
-	MLX4_USER_DEV_CAP_64B_CQE	= 1L << 0
+	MLX4_USER_DEV_CAP_LARGE_CQE	= 1L << 0
 };
 
 enum {
-	MLX4_FUNC_CAP_64B_EQE_CQE	= 1L << 0
+	MLX4_FUNC_CAP_64B_EQE_CQE	= 1L << 0,
+	MLX4_FUNC_CAP_EQE_CQE_STRIDE	= 1L << 1
 };
 
 
-- 
cgit v1.1


From 2446254915a7d6f08bba9a755a34cc0402880472 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Thu, 18 Sep 2014 17:31:22 -0700
Subject: net: dsa: allow switch drivers to implement suspend/resume hooks

Add an abstraction layer to suspend/resume switch devices, doing the
following split:

- suspend/resume the slave network devices and their corresponding PHY
  devices
- suspend/resume the switch hardware using switch driver callbacks

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index e020b8a..846dce4 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -210,6 +210,12 @@ struct dsa_switch_driver {
 	void	(*get_ethtool_stats)(struct dsa_switch *ds,
 				     int port, uint64_t *data);
 	int	(*get_sset_count)(struct dsa_switch *ds);
+
+	/*
+	 * Suspend and resume
+	 */
+	int	(*suspend)(struct dsa_switch *ds);
+	int	(*resume)(struct dsa_switch *ds);
 };
 
 void register_switch_driver(struct dsa_switch_driver *type);
-- 
cgit v1.1


From 19e57c4e6dc6b82a3204b801f4c5f27c7d007559 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Thu, 18 Sep 2014 17:31:24 -0700
Subject: net: dsa: add {get, set}_wol callbacks to slave devices

Allow switch drivers to implement per-port Wake-on-LAN getter and
setters.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index 846dce4..d8054fb 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -212,6 +212,14 @@ struct dsa_switch_driver {
 	int	(*get_sset_count)(struct dsa_switch *ds);
 
 	/*
+	 * ethtool Wake-on-LAN
+	 */
+	void	(*get_wol)(struct dsa_switch *ds, int port,
+			   struct ethtool_wolinfo *w);
+	int	(*set_wol)(struct dsa_switch *ds, int port,
+			   struct ethtool_wolinfo *w);
+
+	/*
 	 * Suspend and resume
 	 */
 	int	(*suspend)(struct dsa_switch *ds);
-- 
cgit v1.1


From 35f7aa5309c048bb70e58571942795fa9411ce6a Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Sat, 20 Sep 2014 14:03:55 +0200
Subject: ipv6: mld: answer mldv2 queries with mldv1 reports in mldv1 fallback

RFC2710 (MLDv1), section 3.7. says:

  The length of a received MLD message is computed by taking the
  IPv6 Payload Length value and subtracting the length of any IPv6
  extension headers present between the IPv6 header and the MLD
  message. If that length is greater than 24 octets, that indicates
  that there are other fields present *beyond* the fields described
  above, perhaps belonging to a *future backwards-compatible* version
  of MLD. An implementation of the version of MLD specified in this
  document *MUST NOT* send an MLD message longer than 24 octets and
  MUST ignore anything past the first 24 octets of a received MLD
  message.

RFC3810 (MLDv2), section 8.2.1. states for *listeners* regarding
presence of MLDv1 routers:

  In order to be compatible with MLDv1 routers, MLDv2 hosts MUST
  operate in version 1 compatibility mode. [...] When Host
  Compatibility Mode is MLDv2, a host acts using the MLDv2 protocol
  on that interface. When Host Compatibility Mode is MLDv1, a host
  acts in MLDv1 compatibility mode, using *only* the MLDv1 protocol,
  on that interface. [...]

While section 8.3.1. specifies *router* behaviour regarding presence
of MLDv1 routers:

  MLDv2 routers may be placed on a network where there is at least
  one MLDv1 router. The following requirements apply:

  If an MLDv1 router is present on the link, the Querier MUST use
  the *lowest* version of MLD present on the network. This must be
  administratively assured. Routers that desire to be compatible
  with MLDv1 MUST have a configuration option to act in MLDv1 mode;
  if an MLDv1 router is present on the link, the system administrator
  must explicitly configure all MLDv2 routers to act in MLDv1 mode.
  When in MLDv1 mode, the Querier MUST send periodic General Queries
  truncated at the Multicast Address field (i.e., 24 bytes long),
  and SHOULD also warn about receiving an MLDv2 Query (such warnings
  must be rate-limited). The Querier MUST also fill in the Maximum
  Response Delay in the Maximum Response Code field, i.e., the
  exponential algorithm described in section 5.1.3. is not used. [...]

That means that we should not get queries from different versions of
MLD. When there's a MLDv1 router present, MLDv2 enforces truncation
and MRC == MRD (both fields are overlapping within the 24 octet range).

Section 8.3.2. specifies behaviour in the presence of MLDv1 multicast
address *listeners*:

  MLDv2 routers may be placed on a network where there are hosts
  that have not yet been upgraded to MLDv2. In order to be compatible
  with MLDv1 hosts, MLDv2 routers MUST operate in version 1 compatibility
  mode. MLDv2 routers keep a compatibility mode per multicast address
  record. The compatibility mode of a multicast address is determined
  from the Multicast Address Compatibility Mode variable, which can be
  in one of the two following states: MLDv1 or MLDv2.

  The Multicast Address Compatibility Mode of a multicast address
  record is set to MLDv1 whenever an MLDv1 Multicast Listener Report is
  *received* for that multicast address. At the same time, the Older
  Version Host Present timer for the multicast address is set to Older
  Version Host Present Timeout seconds. The timer is re-set whenever a
  new MLDv1 Report is received for that multicast address. If the Older
  Version Host Present timer expires, the router switches back to
  Multicast Address Compatibility Mode of MLDv2 for that multicast
  address. [...]

That means, what can happen is the following scenario, that hosts can
act in MLDv1 compatibility mode when they previously have received an
MLDv1 query (or, simply operate in MLDv1 mode-only); and at the same
time, an MLDv2 router could start up and transmits MLDv2 startup query
messages while being unaware of the current operational mode.

Given RFC2710, section 3.7 we would need to answer to that with an MLDv1
listener report, so that the router according to RFC3810, section 8.3.2.
would receive that and internally switch to MLDv1 compatibility as well.

Right now, I believe since the initial implementation of MLDv2, Linux
hosts would just silently drop such MLDv2 queries instead of replying
with an MLDv1 listener report, which would prevent a MLDv2 router going
into fallback mode (until it receives other MLDv1 queries).

Since the mapping of MRC to MRD in exactly such cases can make use of
the exponential algorithm from 5.1.3, we cannot [strictly speaking] be
aware in MLDv1 of the encoding in MRC, it seems also not mentioned by
the RFC. Since encodings are the same up to 32767, assume in such a
situation this value as a hard upper limit we would clamp. We have asked
one of the RFC authors on that regard, and he mentioned that there seem
not to be any implementations that make use of that exponential algorithm
on startup messages. In any case, this patch fixes this MLD
interoperability issue.

Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/mld.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/mld.h b/include/net/mld.h
index faa1d16..01d7513 100644
--- a/include/net/mld.h
+++ b/include/net/mld.h
@@ -88,12 +88,15 @@ struct mld2_query {
 #define MLDV2_QQIC_EXP(value)	(((value) >> 4) & 0x07)
 #define MLDV2_QQIC_MAN(value)	((value) & 0x0f)
 
+#define MLD_EXP_MIN_LIMIT	32768UL
+#define MLDV1_MRD_MAX_COMPAT	(MLD_EXP_MIN_LIMIT - 1)
+
 static inline unsigned long mldv2_mrc(const struct mld2_query *mlh2)
 {
 	/* RFC3810, 5.1.3. Maximum Response Code */
 	unsigned long ret, mc_mrc = ntohs(mlh2->mld2q_mrc);
 
-	if (mc_mrc < 32768) {
+	if (mc_mrc < MLD_EXP_MIN_LIMIT) {
 		ret = mc_mrc;
 	} else {
 		unsigned long mc_man, mc_exp;
-- 
cgit v1.1


From fcdd1cf4dd63aecf86c987d7f4ec7187be5c2fbc Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 22 Sep 2014 13:19:44 -0700
Subject: tcp: avoid possible arithmetic overflows

icsk_rto is a 32bit field, and icsk_backoff can reach 15 by default,
or more if some sysctl (eg tcp_retries2) are changed.

Better use 64bit to perform icsk_rto << icsk_backoff operations

As Joe Perches suggested, add a helper for this.

Yuchung spotted the tcp_v4_err() case.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_connection_sock.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index 5fbe656..848e85c 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -242,6 +242,15 @@ static inline void inet_csk_reset_xmit_timer(struct sock *sk, const int what,
 #endif
 }
 
+static inline unsigned long
+inet_csk_rto_backoff(const struct inet_connection_sock *icsk,
+		     unsigned long max_when)
+{
+        u64 when = (u64)icsk->icsk_rto << icsk->icsk_backoff;
+
+        return (unsigned long)min_t(u64, when, max_when);
+}
+
 struct sock *inet_csk_accept(struct sock *sk, int flags, int *err);
 
 struct request_sock *inet_csk_search_req(const struct sock *sk,
-- 
cgit v1.1


From 4cdf507d54525842dfd9f6313fdafba039084046 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 19 Sep 2014 07:38:40 -0700
Subject: icmp: add a global rate limitation

Current ICMP rate limiting uses inetpeer cache, which is an RBL tree
protected by a lock, meaning that hosts can be stuck hard if all cpus
want to check ICMP limits.

When say a DNS or NTP server process is restarted, inetpeer tree grows
quick and machine comes to its knees.

iptables can not help because the bottleneck happens before ICMP
messages are even cooked and sent.

This patch adds a new global limitation, using a token bucket filter,
controlled by two new sysctl :

icmp_msgs_per_sec - INTEGER
    Limit maximal number of ICMP packets sent per second from this host.
    Only messages whose type matches icmp_ratemask are
    controlled by this limit.
    Default: 1000

icmp_msgs_burst - INTEGER
    icmp_msgs_per_sec controls number of ICMP packets sent per second,
    while icmp_msgs_burst controls the burst size of these packets.
    Default: 50

Note that if we really want to send millions of ICMP messages per
second, we might extend idea and infra added in commit 04ca6973f7c1a
("ip: make IP identifiers less predictable") :
add a token bucket in the ip_idents hash and no longer rely on inetpeer.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/net/ip.h b/include/net/ip.h
index 14bfc8e..fcd9068 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -548,6 +548,10 @@ void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err, __be16 port,
 void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 dport,
 		    u32 info);
 
+bool icmp_global_allow(void);
+extern int sysctl_icmp_msgs_per_sec;
+extern int sysctl_icmp_msgs_burst;
+
 #ifdef CONFIG_PROC_FS
 int ip_misc_proc_init(void);
 #endif
-- 
cgit v1.1


From 9e87f9a9c4c4754508b2c2638fbde9e10c7a103b Mon Sep 17 00:00:00 2001
From: Christophe Ricard <christophe.ricard@gmail.com>
Date: Sat, 13 Sep 2014 10:28:49 +0200
Subject: NFC: nci: Add support for proprietary RF Protocols

In NFC Forum NCI specification, some RF Protocol values are
reserved for proprietary use (from 0x80 to 0xfe).
Some CLF vendor may need to use one value within this range
for specific technology.
Furthermore, some CLF may not becompliant with NFC Froum NCI
specification 2.0 and therefore will not support RF Protocol
value 0x06 for PROTOCOL_T5T as mention in a draft specification
and in a recent push.

Adding get_rf_protocol handle to the nci_ops structure will
help to set the correct technology to target.

Signed-off-by: Christophe Ricard <christophe-h.ricard@st.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/net/nfc/nci_core.h | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h
index 1f9a0f5..75d10e6 100644
--- a/include/net/nfc/nci_core.h
+++ b/include/net/nfc/nci_core.h
@@ -64,10 +64,11 @@ enum nci_state {
 struct nci_dev;
 
 struct nci_ops {
-	int (*open)(struct nci_dev *ndev);
-	int (*close)(struct nci_dev *ndev);
-	int (*send)(struct nci_dev *ndev, struct sk_buff *skb);
-	int (*setup)(struct nci_dev *ndev);
+	int   (*open)(struct nci_dev *ndev);
+	int   (*close)(struct nci_dev *ndev);
+	int   (*send)(struct nci_dev *ndev, struct sk_buff *skb);
+	int   (*setup)(struct nci_dev *ndev);
+	__u32 (*get_rfprotocol)(struct nci_dev *ndev, __u8 rf_protocol);
 };
 
 #define NCI_MAX_SUPPORTED_RF_INTERFACES		4
-- 
cgit v1.1


From 2b0bf6c85a4940e00516f68ff7103329abf8512d Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 22 Sep 2014 11:17:41 -0700
Subject: Bluetooth: Convert bt_<level> logging functions to return void

No caller or macro uses the return value so make all
the functions return void.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/bluetooth.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h
index 7e666d0..58695ff 100644
--- a/include/net/bluetooth/bluetooth.h
+++ b/include/net/bluetooth/bluetooth.h
@@ -120,9 +120,9 @@ struct bt_voice {
 #define BT_RCVMTU		13
 
 __printf(1, 2)
-int bt_info(const char *fmt, ...);
+void bt_info(const char *fmt, ...);
 __printf(1, 2)
-int bt_err(const char *fmt, ...);
+void bt_err(const char *fmt, ...);
 
 #define BT_INFO(fmt, ...)	bt_info(fmt "\n", ##__VA_ARGS__)
 #define BT_ERR(fmt, ...)	bt_err(fmt "\n", ##__VA_ARGS__)
-- 
cgit v1.1


From d41c15cf95bd91b9c333f6f749670e22c8a47ad9 Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Wed, 24 Sep 2014 13:14:46 +0300
Subject: Bluetooth: Fix reason code used for rejecting SCO connections

The core specification defines valid values for the
HCI_Reject_Synchronous_Connection_Request command to be 0x0D-0x0F. So
far the code has been using HCI_ERROR_REMOTE_USER_TERM (0x13) which is
not a valid value and is therefore being rejected by some controllers:

 > HCI Event: Connect Request (0x04) plen 10
	bdaddr 40:6F:2A:6A:E5:E0 class 0x000000 type eSCO
 < HCI Command: Reject Synchronous Connection (0x01|0x002a) plen 7
	bdaddr 40:6F:2A:6A:E5:E0 reason 0x13
	Reason: Remote User Terminated Connection
 > HCI Event: Command Status (0x0f) plen 4
	Reject Synchronous Connection (0x01|0x002a) status 0x12 ncmd 1
	Error: Invalid HCI Command Parameters

This patch introduces a new define for a value from the valid range
(0x0d == Connection Rejected Due To Limited Resources) and uses it
instead for rejecting incoming connections.

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 3f8547f..6e8f249 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -385,6 +385,7 @@ enum {
 #define HCI_ERROR_AUTH_FAILURE		0x05
 #define HCI_ERROR_MEMORY_EXCEEDED	0x07
 #define HCI_ERROR_CONNECTION_TIMEOUT	0x08
+#define HCI_ERROR_REJ_LIMITED_RESOURCES	0x0d
 #define HCI_ERROR_REJ_BAD_ADDR		0x0f
 #define HCI_ERROR_REMOTE_USER_TERM	0x13
 #define HCI_ERROR_REMOTE_LOW_RESOURCES	0x14
-- 
cgit v1.1


From 53e50398968d43338c4d932114e68bc099fc5fbd Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Sat, 20 Sep 2014 14:52:30 -0700
Subject: net: Remove gso_send_check as an offload callback

The send_check logic was only interesting in cases of TCP offload and
UDP UFO where the checksum needed to be initialized to the pseudo
header checksum. Now we've moved that logic into the related
gso_segment functions so gso_send_check is no longer needed.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 4354b43..9f5d293 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1911,7 +1911,6 @@ struct packet_type {
 struct offload_callbacks {
 	struct sk_buff		*(*gso_segment)(struct sk_buff *skb,
 						netdev_features_t features);
-	int			(*gso_send_check)(struct sk_buff *skb);
 	struct sk_buff		**(*gro_receive)(struct sk_buff **head,
 					       struct sk_buff *skb);
 	int			(*gro_complete)(struct sk_buff *skb, int nhoff);
-- 
cgit v1.1


From 7276ca3fa23864133f5ee7431c51546d9b7f695f Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 22 Sep 2014 13:28:16 +0200
Subject: netfilter: bridge: nf_bridge_copy_header as static inline in header

Move nf_bridge_copy_header() as static inline in netfilter_bridge.h
header file. This patch prepares the modularization of the br_netfilter
code.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter_bridge.h | 48 +++++++++++++++++++++++++++++++---------
 1 file changed, 38 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h
index 8ab1c27..fe996d5 100644
--- a/include/linux/netfilter_bridge.h
+++ b/include/linux/netfilter_bridge.h
@@ -24,16 +24,6 @@ enum nf_br_hook_priorities {
 #define BRNF_8021Q			0x10
 #define BRNF_PPPoE			0x20
 
-/* Only used in br_forward.c */
-int nf_bridge_copy_header(struct sk_buff *skb);
-static inline int nf_bridge_maybe_copy_header(struct sk_buff *skb)
-{
-	if (skb->nf_bridge &&
-	    skb->nf_bridge->mask & (BRNF_BRIDGED | BRNF_BRIDGED_DNAT))
-		return nf_bridge_copy_header(skb);
-  	return 0;
-}
-
 static inline unsigned int nf_bridge_encap_header_len(const struct sk_buff *skb)
 {
 	switch (skb->protocol) {
@@ -46,6 +36,44 @@ static inline unsigned int nf_bridge_encap_header_len(const struct sk_buff *skb)
 	}
 }
 
+static inline void nf_bridge_update_protocol(struct sk_buff *skb)
+{
+	if (skb->nf_bridge->mask & BRNF_8021Q)
+		skb->protocol = htons(ETH_P_8021Q);
+	else if (skb->nf_bridge->mask & BRNF_PPPoE)
+		skb->protocol = htons(ETH_P_PPP_SES);
+}
+
+/* Fill in the header for fragmented IP packets handled by
+ * the IPv4 connection tracking code.
+ *
+ * Only used in br_forward.c
+ */
+static inline int nf_bridge_copy_header(struct sk_buff *skb)
+{
+	int err;
+	unsigned int header_size;
+
+	nf_bridge_update_protocol(skb);
+	header_size = ETH_HLEN + nf_bridge_encap_header_len(skb);
+	err = skb_cow_head(skb, header_size);
+	if (err)
+		return err;
+
+	skb_copy_to_linear_data_offset(skb, -header_size,
+				       skb->nf_bridge->data, header_size);
+	__skb_push(skb, nf_bridge_encap_header_len(skb));
+	return 0;
+}
+
+static inline int nf_bridge_maybe_copy_header(struct sk_buff *skb)
+{
+	if (skb->nf_bridge &&
+	    skb->nf_bridge->mask & (BRNF_BRIDGED | BRNF_BRIDGED_DNAT))
+		return nf_bridge_copy_header(skb);
+  	return 0;
+}
+
 static inline unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb)
 {
 	if (unlikely(skb->nf_bridge->mask & BRNF_PPPoE))
-- 
cgit v1.1


From 34666d467cbf1e2e3c7bb15a63eccfb582cdd71f Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Thu, 18 Sep 2014 11:29:03 +0200
Subject: netfilter: bridge: move br_netfilter out of the core

Jesper reported that br_netfilter always registers the hooks since
this is part of the bridge core. This harms performance for people that
don't need this.

This patch modularizes br_netfilter so it can be rmmod'ed, thus,
the hooks can be unregistered. I think the bridge netfilter should have
been a separated module since the beginning, Patrick agreed on that.

Note that this is breaking compatibility for users that expect that
bridge netfilter is going to be available after explicitly 'modprobe
bridge' or via automatic load through brctl.

However, the damage can be easily undone by modprobing br_netfilter.
The bridge core also spots a message to provide a clue to people that
didn't notice that this has been deprecated.

On top of that, the plan is that nftables will not rely on this software
layer, but integrate the connection tracking into the bridge layer to
enable stateful filtering and NAT, which is was bridge netfilter users
seem to require.

This patch still keeps the fake_dst_ops in the bridge core, since this
is required by when the bridge port is initialized. So we can safely
modprobe/rmmod br_netfilter anytime.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Acked-by: Florian Westphal <fw@strlen.de>
---
 include/linux/netfilter_bridge.h       |  2 +-
 include/linux/skbuff.h                 | 12 ++++++------
 include/net/neighbour.h                |  2 +-
 include/net/netfilter/ipv4/nf_reject.h |  2 +-
 include/net/netfilter/ipv6/nf_reject.h |  2 +-
 5 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h
index fe996d5..c755e49 100644
--- a/include/linux/netfilter_bridge.h
+++ b/include/linux/netfilter_bridge.h
@@ -15,7 +15,7 @@ enum nf_br_hook_priorities {
 	NF_BR_PRI_LAST = INT_MAX,
 };
 
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 
 #define BRNF_PKT_TYPE			0x01
 #define BRNF_BRIDGED_DNAT		0x02
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 07c9fdd..c4ff43f 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -156,7 +156,7 @@ struct nf_conntrack {
 };
 #endif
 
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 struct nf_bridge_info {
 	atomic_t		use;
 	unsigned int		mask;
@@ -560,7 +560,7 @@ struct sk_buff {
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 	struct nf_conntrack	*nfct;
 #endif
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 	struct nf_bridge_info	*nf_bridge;
 #endif
 
@@ -2977,7 +2977,7 @@ static inline void nf_conntrack_get(struct nf_conntrack *nfct)
 		atomic_inc(&nfct->use);
 }
 #endif
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 static inline void nf_bridge_put(struct nf_bridge_info *nf_bridge)
 {
 	if (nf_bridge && atomic_dec_and_test(&nf_bridge->use))
@@ -2995,7 +2995,7 @@ static inline void nf_reset(struct sk_buff *skb)
 	nf_conntrack_put(skb->nfct);
 	skb->nfct = NULL;
 #endif
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 	nf_bridge_put(skb->nf_bridge);
 	skb->nf_bridge = NULL;
 #endif
@@ -3016,7 +3016,7 @@ static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src)
 	nf_conntrack_get(src->nfct);
 	dst->nfctinfo = src->nfctinfo;
 #endif
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 	dst->nf_bridge  = src->nf_bridge;
 	nf_bridge_get(src->nf_bridge);
 #endif
@@ -3030,7 +3030,7 @@ static inline void nf_copy(struct sk_buff *dst, const struct sk_buff *src)
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 	nf_conntrack_put(dst->nfct);
 #endif
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 	nf_bridge_put(dst->nf_bridge);
 #endif
 	__nf_copy(dst, src);
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 47f4254..f60558d 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -373,7 +373,7 @@ static inline int neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
 	return 0;
 }
 
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 static inline int neigh_hh_bridge(struct hh_cache *hh, struct sk_buff *skb)
 {
 	unsigned int seq, hh_alen;
diff --git a/include/net/netfilter/ipv4/nf_reject.h b/include/net/netfilter/ipv4/nf_reject.h
index 931fbf8..f713b5a 100644
--- a/include/net/netfilter/ipv4/nf_reject.h
+++ b/include/net/netfilter/ipv4/nf_reject.h
@@ -98,7 +98,7 @@ static void nf_send_reset(struct sk_buff *oldskb, int hook)
 
 	nf_ct_attach(nskb, oldskb);
 
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 	/* If we use ip_local_out for bridged traffic, the MAC source on
 	 * the RST will be ours, instead of the destination's.  This confuses
 	 * some routers/firewalls, and they drop the packet.  So we need to
diff --git a/include/net/netfilter/ipv6/nf_reject.h b/include/net/netfilter/ipv6/nf_reject.h
index 710d17e..7a10cfc 100644
--- a/include/net/netfilter/ipv6/nf_reject.h
+++ b/include/net/netfilter/ipv6/nf_reject.h
@@ -147,7 +147,7 @@ static void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook)
 
 	nf_ct_attach(nskb, oldskb);
 
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 	/* If we use ip6_local_out for bridged traffic, the MAC source on
 	 * the RST will be ours, instead of the destination's.  This confuses
 	 * some routers/firewalls, and they drop the packet.  So we need to
-- 
cgit v1.1


From 99c55f7d47c0dc6fc64729f37bf435abf43f4c60 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Fri, 26 Sep 2014 00:16:57 -0700
Subject: bpf: introduce BPF syscall and maps

BPF syscall is a multiplexor for a range of different operations on eBPF.
This patch introduces syscall with single command to create a map.
Next patch adds commands to access maps.

'maps' is a generic storage of different types for sharing data between kernel
and userspace.

Userspace example:
/* this syscall wrapper creates a map with given type and attributes
 * and returns map_fd on success.
 * use close(map_fd) to delete the map
 */
int bpf_create_map(enum bpf_map_type map_type, int key_size,
                   int value_size, int max_entries)
{
    union bpf_attr attr = {
        .map_type = map_type,
        .key_size = key_size,
        .value_size = value_size,
        .max_entries = max_entries
    };

    return bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
}

'union bpf_attr' is backwards compatible with future extensions.

More details in Documentation/networking/filter.txt and in manpage

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h      | 41 +++++++++++++++++++++++++++++++++++++++++
 include/uapi/linux/bpf.h | 23 +++++++++++++++++++++++
 2 files changed, 64 insertions(+)
 create mode 100644 include/linux/bpf.h

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
new file mode 100644
index 0000000..48014a7
--- /dev/null
+++ b/include/linux/bpf.h
@@ -0,0 +1,41 @@
+/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#ifndef _LINUX_BPF_H
+#define _LINUX_BPF_H 1
+
+#include <uapi/linux/bpf.h>
+#include <linux/workqueue.h>
+
+struct bpf_map;
+
+/* map is generic key/value storage optionally accesible by eBPF programs */
+struct bpf_map_ops {
+	/* funcs callable from userspace (via syscall) */
+	struct bpf_map *(*map_alloc)(union bpf_attr *attr);
+	void (*map_free)(struct bpf_map *);
+};
+
+struct bpf_map {
+	atomic_t refcnt;
+	enum bpf_map_type map_type;
+	u32 key_size;
+	u32 value_size;
+	u32 max_entries;
+	struct bpf_map_ops *ops;
+	struct work_struct work;
+};
+
+struct bpf_map_type_list {
+	struct list_head list_node;
+	struct bpf_map_ops *ops;
+	enum bpf_map_type type;
+};
+
+void bpf_register_map_type(struct bpf_map_type_list *tl);
+void bpf_map_put(struct bpf_map *map);
+
+#endif /* _LINUX_BPF_H */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 479ed0b..f58a10f 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -62,4 +62,27 @@ struct bpf_insn {
 	__s32	imm;		/* signed immediate constant */
 };
 
+/* BPF syscall commands */
+enum bpf_cmd {
+	/* create a map with given type and attributes
+	 * fd = bpf(BPF_MAP_CREATE, union bpf_attr *, u32 size)
+	 * returns fd or negative error
+	 * map is deleted when fd is closed
+	 */
+	BPF_MAP_CREATE,
+};
+
+enum bpf_map_type {
+	BPF_MAP_TYPE_UNSPEC,
+};
+
+union bpf_attr {
+	struct { /* anonymous struct used by BPF_MAP_CREATE command */
+		__u32	map_type;	/* one of enum bpf_map_type */
+		__u32	key_size;	/* size of key in bytes */
+		__u32	value_size;	/* size of value in bytes */
+		__u32	max_entries;	/* max number of entries in a map */
+	};
+} __attribute__((aligned(8)));
+
 #endif /* _UAPI__LINUX_BPF_H__ */
-- 
cgit v1.1


From 749730ce42a2121e1c88350d69478bff3994b10a Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Fri, 26 Sep 2014 00:16:58 -0700
Subject: bpf: enable bpf syscall on x64 and i386

done as separate commit to ease conflict resolution

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/syscalls.h          | 3 ++-
 include/uapi/asm-generic/unistd.h | 4 +++-
 2 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 0f86d85..bda9b81 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -65,6 +65,7 @@ struct old_linux_dirent;
 struct perf_event_attr;
 struct file_handle;
 struct sigaltstack;
+union bpf_attr;
 
 #include <linux/types.h>
 #include <linux/aio_abi.h>
@@ -875,5 +876,5 @@ asmlinkage long sys_seccomp(unsigned int op, unsigned int flags,
 			    const char __user *uargs);
 asmlinkage long sys_getrandom(char __user *buf, size_t count,
 			      unsigned int flags);
-
+asmlinkage long sys_bpf(int cmd, union bpf_attr *attr, unsigned int size);
 #endif
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 11d11bc..22749c1 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -705,9 +705,11 @@ __SYSCALL(__NR_seccomp, sys_seccomp)
 __SYSCALL(__NR_getrandom, sys_getrandom)
 #define __NR_memfd_create 279
 __SYSCALL(__NR_memfd_create, sys_memfd_create)
+#define __NR_bpf 280
+__SYSCALL(__NR_bpf, sys_bpf)
 
 #undef __NR_syscalls
-#define __NR_syscalls 280
+#define __NR_syscalls 281
 
 /*
  * All syscalls below here should go away really,
-- 
cgit v1.1


From db20fd2b01087bdfbe30bce314a198eefedcc42e Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Fri, 26 Sep 2014 00:16:59 -0700
Subject: bpf: add lookup/update/delete/iterate methods to BPF maps

'maps' is a generic storage of different types for sharing data between kernel
and userspace.

The maps are accessed from user space via BPF syscall, which has commands:

- create a map with given type and attributes
  fd = bpf(BPF_MAP_CREATE, union bpf_attr *attr, u32 size)
  returns fd or negative error

- lookup key in a given map referenced by fd
  err = bpf(BPF_MAP_LOOKUP_ELEM, union bpf_attr *attr, u32 size)
  using attr->map_fd, attr->key, attr->value
  returns zero and stores found elem into value or negative error

- create or update key/value pair in a given map
  err = bpf(BPF_MAP_UPDATE_ELEM, union bpf_attr *attr, u32 size)
  using attr->map_fd, attr->key, attr->value
  returns zero or negative error

- find and delete element by key in a given map
  err = bpf(BPF_MAP_DELETE_ELEM, union bpf_attr *attr, u32 size)
  using attr->map_fd, attr->key

- iterate map elements (based on input key return next_key)
  err = bpf(BPF_MAP_GET_NEXT_KEY, union bpf_attr *attr, u32 size)
  using attr->map_fd, attr->key, attr->next_key

- close(fd) deletes the map

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h      |  8 ++++++++
 include/uapi/linux/bpf.h | 38 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 46 insertions(+)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 48014a7..2887f3f 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -9,6 +9,7 @@
 
 #include <uapi/linux/bpf.h>
 #include <linux/workqueue.h>
+#include <linux/file.h>
 
 struct bpf_map;
 
@@ -17,6 +18,12 @@ struct bpf_map_ops {
 	/* funcs callable from userspace (via syscall) */
 	struct bpf_map *(*map_alloc)(union bpf_attr *attr);
 	void (*map_free)(struct bpf_map *);
+	int (*map_get_next_key)(struct bpf_map *map, void *key, void *next_key);
+
+	/* funcs callable from userspace and from eBPF programs */
+	void *(*map_lookup_elem)(struct bpf_map *map, void *key);
+	int (*map_update_elem)(struct bpf_map *map, void *key, void *value);
+	int (*map_delete_elem)(struct bpf_map *map, void *key);
 };
 
 struct bpf_map {
@@ -37,5 +44,6 @@ struct bpf_map_type_list {
 
 void bpf_register_map_type(struct bpf_map_type_list *tl);
 void bpf_map_put(struct bpf_map *map);
+struct bpf_map *bpf_map_get(struct fd f);
 
 #endif /* _LINUX_BPF_H */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index f58a10f..395cabd 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -70,6 +70,35 @@ enum bpf_cmd {
 	 * map is deleted when fd is closed
 	 */
 	BPF_MAP_CREATE,
+
+	/* lookup key in a given map
+	 * err = bpf(BPF_MAP_LOOKUP_ELEM, union bpf_attr *attr, u32 size)
+	 * Using attr->map_fd, attr->key, attr->value
+	 * returns zero and stores found elem into value
+	 * or negative error
+	 */
+	BPF_MAP_LOOKUP_ELEM,
+
+	/* create or update key/value pair in a given map
+	 * err = bpf(BPF_MAP_UPDATE_ELEM, union bpf_attr *attr, u32 size)
+	 * Using attr->map_fd, attr->key, attr->value
+	 * returns zero or negative error
+	 */
+	BPF_MAP_UPDATE_ELEM,
+
+	/* find and delete elem by key in a given map
+	 * err = bpf(BPF_MAP_DELETE_ELEM, union bpf_attr *attr, u32 size)
+	 * Using attr->map_fd, attr->key
+	 * returns zero or negative error
+	 */
+	BPF_MAP_DELETE_ELEM,
+
+	/* lookup key in a given map and return next key
+	 * err = bpf(BPF_MAP_GET_NEXT_KEY, union bpf_attr *attr, u32 size)
+	 * Using attr->map_fd, attr->key, attr->next_key
+	 * returns zero and stores next key or negative error
+	 */
+	BPF_MAP_GET_NEXT_KEY,
 };
 
 enum bpf_map_type {
@@ -83,6 +112,15 @@ union bpf_attr {
 		__u32	value_size;	/* size of value in bytes */
 		__u32	max_entries;	/* max number of entries in a map */
 	};
+
+	struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
+		__u32		map_fd;
+		__aligned_u64	key;
+		union {
+			__aligned_u64 value;
+			__aligned_u64 next_key;
+		};
+	};
 } __attribute__((aligned(8)));
 
 #endif /* _UAPI__LINUX_BPF_H__ */
-- 
cgit v1.1


From 09756af46893c18839062976c3252e93a1beeba7 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Fri, 26 Sep 2014 00:17:00 -0700
Subject: bpf: expand BPF syscall with program load/unload

eBPF programs are similar to kernel modules. They are loaded by the user
process and automatically unloaded when process exits. Each eBPF program is
a safe run-to-completion set of instructions. eBPF verifier statically
determines that the program terminates and is safe to execute.

The following syscall wrapper can be used to load the program:
int bpf_prog_load(enum bpf_prog_type prog_type,
                  const struct bpf_insn *insns, int insn_cnt,
                  const char *license)
{
    union bpf_attr attr = {
        .prog_type = prog_type,
        .insns = ptr_to_u64(insns),
        .insn_cnt = insn_cnt,
        .license = ptr_to_u64(license),
    };

    return bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
}
where 'insns' is an array of eBPF instructions and 'license' is a string
that must be GPL compatible to call helper functions marked gpl_only

Upon succesful load the syscall returns prog_fd.
Use close(prog_fd) to unload the program.

User space tests and examples follow in the later patches

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h      | 38 ++++++++++++++++++++++++++++++++++++++
 include/linux/filter.h   |  8 ++------
 include/uapi/linux/bpf.h | 26 ++++++++++++++++++++++++++
 3 files changed, 66 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 2887f3f..9297918 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -46,4 +46,42 @@ void bpf_register_map_type(struct bpf_map_type_list *tl);
 void bpf_map_put(struct bpf_map *map);
 struct bpf_map *bpf_map_get(struct fd f);
 
+/* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs
+ * to in-kernel helper functions and for adjusting imm32 field in BPF_CALL
+ * instructions after verifying
+ */
+struct bpf_func_proto {
+	u64 (*func)(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
+	bool gpl_only;
+};
+
+struct bpf_verifier_ops {
+	/* return eBPF function prototype for verification */
+	const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
+};
+
+struct bpf_prog_type_list {
+	struct list_head list_node;
+	struct bpf_verifier_ops *ops;
+	enum bpf_prog_type type;
+};
+
+void bpf_register_prog_type(struct bpf_prog_type_list *tl);
+
+struct bpf_prog;
+
+struct bpf_prog_aux {
+	atomic_t refcnt;
+	bool is_gpl_compatible;
+	enum bpf_prog_type prog_type;
+	struct bpf_verifier_ops *ops;
+	struct bpf_map **used_maps;
+	u32 used_map_cnt;
+	struct bpf_prog *prog;
+	struct work_struct work;
+};
+
+void bpf_prog_put(struct bpf_prog *prog);
+struct bpf_prog *bpf_prog_get(u32 ufd);
+
 #endif /* _LINUX_BPF_H */
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 1a0bc6d..4ffc095 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -21,6 +21,7 @@
 struct sk_buff;
 struct sock;
 struct seccomp_data;
+struct bpf_prog_aux;
 
 /* ArgX, context and stack frame pointer register positions. Note,
  * Arg1, Arg2, Arg3, etc are used as argument mappings of function
@@ -300,17 +301,12 @@ struct bpf_binary_header {
 	u8 image[];
 };
 
-struct bpf_work_struct {
-	struct bpf_prog *prog;
-	struct work_struct work;
-};
-
 struct bpf_prog {
 	u16			pages;		/* Number of allocated pages */
 	bool			jited;		/* Is our filter JIT'ed? */
 	u32			len;		/* Number of filter blocks */
 	struct sock_fprog_kern	*orig_prog;	/* Original BPF program */
-	struct bpf_work_struct	*work;		/* Deferred free work struct */
+	struct bpf_prog_aux	*aux;		/* Auxiliary fields */
 	unsigned int		(*bpf_func)(const struct sk_buff *skb,
 					    const struct bpf_insn *filter);
 	/* Instructions for interpreter */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 395cabd..424f442 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -99,12 +99,23 @@ enum bpf_cmd {
 	 * returns zero and stores next key or negative error
 	 */
 	BPF_MAP_GET_NEXT_KEY,
+
+	/* verify and load eBPF program
+	 * prog_fd = bpf(BPF_PROG_LOAD, union bpf_attr *attr, u32 size)
+	 * Using attr->prog_type, attr->insns, attr->license
+	 * returns fd or negative error
+	 */
+	BPF_PROG_LOAD,
 };
 
 enum bpf_map_type {
 	BPF_MAP_TYPE_UNSPEC,
 };
 
+enum bpf_prog_type {
+	BPF_PROG_TYPE_UNSPEC,
+};
+
 union bpf_attr {
 	struct { /* anonymous struct used by BPF_MAP_CREATE command */
 		__u32	map_type;	/* one of enum bpf_map_type */
@@ -121,6 +132,21 @@ union bpf_attr {
 			__aligned_u64 next_key;
 		};
 	};
+
+	struct { /* anonymous struct used by BPF_PROG_LOAD command */
+		__u32		prog_type;	/* one of enum bpf_prog_type */
+		__u32		insn_cnt;
+		__aligned_u64	insns;
+		__aligned_u64	license;
+	};
 } __attribute__((aligned(8)));
 
+/* integer value in 'imm' field of BPF_CALL instruction selects which helper
+ * function eBPF program intends to call
+ */
+enum bpf_func_id {
+	BPF_FUNC_unspec,
+	__BPF_FUNC_MAX_ID,
+};
+
 #endif /* _UAPI__LINUX_BPF_H__ */
-- 
cgit v1.1


From 51580e798cb61b0fc63fa3aa6c5c975375aa0550 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Fri, 26 Sep 2014 00:17:02 -0700
Subject: bpf: verifier (add docs)

this patch adds all of eBPF verfier documentation and empty bpf_check()

The end goal for the verifier is to statically check safety of the program.

Verifier will catch:
- loops
- out of range jumps
- unreachable instructions
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention

More details in Documentation/networking/filter.txt

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 9297918..9dfeb36 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -83,5 +83,7 @@ struct bpf_prog_aux {
 
 void bpf_prog_put(struct bpf_prog *prog);
 struct bpf_prog *bpf_prog_get(u32 ufd);
+/* verify correctness of eBPF program */
+int bpf_check(struct bpf_prog *fp, union bpf_attr *attr);
 
 #endif /* _LINUX_BPF_H */
-- 
cgit v1.1


From cbd357008604925355ae7b54a09137dabb81b580 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Fri, 26 Sep 2014 00:17:03 -0700
Subject: bpf: verifier (add ability to receive verification log)

add optional attributes for BPF_PROG_LOAD syscall:
union bpf_attr {
    struct {
	...
	__u32         log_level; /* verbosity level of eBPF verifier */
	__u32         log_size;  /* size of user buffer */
	__aligned_u64 log_buf;   /* user supplied 'char *buffer' */
    };
};

when log_level > 0 the verifier will return its verification log in the user
supplied buffer 'log_buf' which can be used by program author to analyze why
verifier rejected given program.

'Understanding eBPF verifier messages' section of Documentation/networking/filter.txt
provides several examples of these messages, like the program:

  BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
  BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
  BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
  BPF_LD_MAP_FD(BPF_REG_1, 0),
  BPF_CALL_FUNC(BPF_FUNC_map_lookup_elem),
  BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
  BPF_ST_MEM(BPF_DW, BPF_REG_0, 4, 0),
  BPF_EXIT_INSN(),

will be rejected with the following multi-line message in log_buf:

  0: (7a) *(u64 *)(r10 -8) = 0
  1: (bf) r2 = r10
  2: (07) r2 += -8
  3: (b7) r1 = 0
  4: (85) call 1
  5: (15) if r0 == 0x0 goto pc+1
   R0=map_ptr R10=fp
  6: (7a) *(u64 *)(r0 +4) = 0
  misaligned access off 4 size 8

The format of the output can change at any time as verifier evolves.

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/bpf.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 424f442..31b0ac2 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -138,6 +138,9 @@ union bpf_attr {
 		__u32		insn_cnt;
 		__aligned_u64	insns;
 		__aligned_u64	license;
+		__u32		log_level;	/* verbosity level of verifier */
+		__u32		log_size;	/* size of user buffer */
+		__aligned_u64	log_buf;	/* user supplied buffer */
 	};
 } __attribute__((aligned(8)));
 
-- 
cgit v1.1


From 0246e64d9a5fcd4805198de59b9b5cf1f974eb41 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Fri, 26 Sep 2014 00:17:04 -0700
Subject: bpf: handle pseudo BPF_LD_IMM64 insn

eBPF programs passed from userspace are using pseudo BPF_LD_IMM64 instructions
to refer to process-local map_fd. Scan the program for such instructions and
if FDs are valid, convert them to 'struct bpf_map' pointers which will be used
by verifier to check access to maps in bpf_map_lookup/update() calls.
If program passes verifier, convert pseudo BPF_LD_IMM64 into generic by dropping
BPF_PSEUDO_MAP_FD flag.

Note that eBPF interpreter is generic and knows nothing about pseudo insns.

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/filter.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 4ffc095..ca95abd 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -145,6 +145,12 @@ struct bpf_prog_aux;
 		.off   = 0,					\
 		.imm   = ((__u64) (IMM)) >> 32 })
 
+#define BPF_PSEUDO_MAP_FD	1
+
+/* pseudo BPF_LD_IMM64 insn used to refer to process-local map_fd */
+#define BPF_LD_MAP_FD(DST, MAP_FD)				\
+	BPF_LD_IMM64_RAW(DST, BPF_PSEUDO_MAP_FD, MAP_FD)
+
 /* Short form of mov based on type, BPF_X: dst_reg = src_reg, BPF_K: dst_reg = imm32 */
 
 #define BPF_MOV64_RAW(TYPE, DST, SRC, IMM)			\
-- 
cgit v1.1


From 17a5267067f3c372fec9ffb798d6eaba6b5e6a4c Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Fri, 26 Sep 2014 00:17:06 -0700
Subject: bpf: verifier (add verifier core)

This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6

This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields

Kernel subsystem configures the verifier with two callbacks:

- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
  that provides information to the verifer which fields of 'ctx'
  are accessible (remember 'ctx' is the first argument to eBPF program)

- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
  returns argument constraints of kernel helper functions that eBPF program
  may call, so that verifier can checks that R1-R5 types match the prototype

More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h | 47 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 47 insertions(+)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 9dfeb36..3cf9175 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -46,6 +46,31 @@ void bpf_register_map_type(struct bpf_map_type_list *tl);
 void bpf_map_put(struct bpf_map *map);
 struct bpf_map *bpf_map_get(struct fd f);
 
+/* function argument constraints */
+enum bpf_arg_type {
+	ARG_ANYTHING = 0,	/* any argument is ok */
+
+	/* the following constraints used to prototype
+	 * bpf_map_lookup/update/delete_elem() functions
+	 */
+	ARG_CONST_MAP_PTR,	/* const argument used as pointer to bpf_map */
+	ARG_PTR_TO_MAP_KEY,	/* pointer to stack used as map key */
+	ARG_PTR_TO_MAP_VALUE,	/* pointer to stack used as map value */
+
+	/* the following constraints used to prototype bpf_memcmp() and other
+	 * functions that access data on eBPF program stack
+	 */
+	ARG_PTR_TO_STACK,	/* any pointer to eBPF program stack */
+	ARG_CONST_STACK_SIZE,	/* number of bytes accessed from stack */
+};
+
+/* type of values returned from helper functions */
+enum bpf_return_type {
+	RET_INTEGER,			/* function returns integer */
+	RET_VOID,			/* function doesn't return anything */
+	RET_PTR_TO_MAP_VALUE_OR_NULL,	/* returns a pointer to map elem value or NULL */
+};
+
 /* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs
  * to in-kernel helper functions and for adjusting imm32 field in BPF_CALL
  * instructions after verifying
@@ -53,11 +78,33 @@ struct bpf_map *bpf_map_get(struct fd f);
 struct bpf_func_proto {
 	u64 (*func)(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
 	bool gpl_only;
+	enum bpf_return_type ret_type;
+	enum bpf_arg_type arg1_type;
+	enum bpf_arg_type arg2_type;
+	enum bpf_arg_type arg3_type;
+	enum bpf_arg_type arg4_type;
+	enum bpf_arg_type arg5_type;
+};
+
+/* bpf_context is intentionally undefined structure. Pointer to bpf_context is
+ * the first argument to eBPF programs.
+ * For socket filters: 'struct bpf_context *' == 'struct sk_buff *'
+ */
+struct bpf_context;
+
+enum bpf_access_type {
+	BPF_READ = 1,
+	BPF_WRITE = 2
 };
 
 struct bpf_verifier_ops {
 	/* return eBPF function prototype for verification */
 	const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
+
+	/* return true if 'size' wide access at offset 'off' within bpf_context
+	 * with 'type' (read or write) is allowed
+	 */
+	bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
 };
 
 struct bpf_prog_type_list {
-- 
cgit v1.1


From 6ea754eb761d9e7a8ac6fa462b05f9e4cf04fb6c Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 22 Sep 2014 11:10:50 -0700
Subject: net: Change netdev_<level> logging functions to return void

No caller or macro uses the return value so make all
the functions return void.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 9f5d293..9b7fbac 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3612,22 +3612,22 @@ static inline const char *netdev_reg_state(const struct net_device *dev)
 }
 
 __printf(3, 4)
-int netdev_printk(const char *level, const struct net_device *dev,
-		  const char *format, ...);
+void netdev_printk(const char *level, const struct net_device *dev,
+		   const char *format, ...);
 __printf(2, 3)
-int netdev_emerg(const struct net_device *dev, const char *format, ...);
+void netdev_emerg(const struct net_device *dev, const char *format, ...);
 __printf(2, 3)
-int netdev_alert(const struct net_device *dev, const char *format, ...);
+void netdev_alert(const struct net_device *dev, const char *format, ...);
 __printf(2, 3)
-int netdev_crit(const struct net_device *dev, const char *format, ...);
+void netdev_crit(const struct net_device *dev, const char *format, ...);
 __printf(2, 3)
-int netdev_err(const struct net_device *dev, const char *format, ...);
+void netdev_err(const struct net_device *dev, const char *format, ...);
 __printf(2, 3)
-int netdev_warn(const struct net_device *dev, const char *format, ...);
+void netdev_warn(const struct net_device *dev, const char *format, ...);
 __printf(2, 3)
-int netdev_notice(const struct net_device *dev, const char *format, ...);
+void netdev_notice(const struct net_device *dev, const char *format, ...);
 __printf(2, 3)
-int netdev_info(const struct net_device *dev, const char *format, ...);
+void netdev_info(const struct net_device *dev, const char *format, ...);
 
 #define MODULE_ALIAS_NETDEV(device) \
 	MODULE_ALIAS("netdev-" device)
@@ -3645,7 +3645,6 @@ do {								\
 ({								\
 	if (0)							\
 		netdev_printk(KERN_DEBUG, __dev, format, ##args); \
-	0;							\
 })
 #endif
 
-- 
cgit v1.1


From f4a775d14489a801a5b8b0540e23ab82e2703091 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 22 Sep 2014 16:29:32 -0700
Subject: net: introduce __skb_header_release()

While profiling TCP stack, I noticed one useless atomic operation
in tcp_sendmsg(), caused by skb_header_release().

It turns out all current skb_header_release() users have a fresh skb,
that no other user can see, so we can avoid one atomic operation.

Introduce __skb_header_release() to clearly document this.

This gave me a 1.5 % improvement on TCP_RR workload.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index f1bfa37..8eaa624 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1083,6 +1083,7 @@ static inline int skb_header_cloned(const struct sk_buff *skb)
  *	Drop a reference to the header part of the buffer.  This is done
  *	by acquiring a payload reference.  You must not read from the header
  *	part of skb->data after this.
+ *	Note : Check if you can use __skb_header_release() instead.
  */
 static inline void skb_header_release(struct sk_buff *skb)
 {
@@ -1092,6 +1093,20 @@ static inline void skb_header_release(struct sk_buff *skb)
 }
 
 /**
+ *	__skb_header_release - release reference to header
+ *	@skb: buffer to operate on
+ *
+ *	Variant of skb_header_release() assuming skb is private to caller.
+ *	We can avoid one atomic operation.
+ */
+static inline void __skb_header_release(struct sk_buff *skb)
+{
+	skb->nohdr = 1;
+	atomic_set(&skb_shinfo(skb)->dataref, 1 + (1 << SKB_DATAREF_SHIFT));
+}
+
+
+/**
  *	skb_shared - is the buffer shared
  *	@skb: buffer to check
  *
-- 
cgit v1.1


From 4565af0d406bed44bc8756230eae8f7caa5e0334 Mon Sep 17 00:00:00 2001
From: LEROY Christophe <christophe.leroy@c-s.fr>
Date: Tue, 23 Sep 2014 10:54:32 +0200
Subject: net: optimise csum_replace4()

csum_partial() is a generic function which is not optimised for small fixed
length calculations, and its use requires to store "from" and "to" values in
memory while we already have them available in registers. This also has impact,
especially on RISC processors. In the same spirit as the change done by
Eric Dumazet on csum_replace2(), this patch rewrites inet_proto_csum_replace4()
taking into account RFC1624.

I spotted during a NATted tcp transfert that csum_partial() is one of top 5
consuming functions (around 8%), and the second user of csum_partial() is
inet_proto_csum_replace4().

I have proposed the same modification to inet_proto_csum_replace4() in another
patch.

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/checksum.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/checksum.h b/include/net/checksum.h
index 87cb190..6465bae8 100644
--- a/include/net/checksum.h
+++ b/include/net/checksum.h
@@ -122,9 +122,7 @@ static inline __wsum csum_partial_ext(const void *buff, int len, __wsum sum)
 
 static inline void csum_replace4(__sum16 *sum, __be32 from, __be32 to)
 {
-	__be32 diff[] = { ~from, to };
-
-	*sum = csum_fold(csum_partial(diff, sizeof(diff), ~csum_unfold(*sum)));
+	*sum = csum_fold(csum_add(csum_sub(~csum_unfold(*sum), from), to));
 }
 
 /* Implements RFC 1624 (Incremental Internet Checksum)
-- 
cgit v1.1


From 24a2d43d8886f5a29c3cf108927f630c545a9a38 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 27 Sep 2014 09:50:55 -0700
Subject: ipv4: rename ip_options_echo to __ip_options_echo()

ip_options_echo() assumes struct ip_options is provided in &IPCB(skb)->opt
Lets break this assumption, but provide a helper to not change all call points.

ip_send_unicast_reply() gets a new struct ip_options pointer.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/ip.h b/include/net/ip.h
index fcd9068..0bb6207 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -180,8 +180,10 @@ static inline __u8 ip_reply_arg_flowi_flags(const struct ip_reply_arg *arg)
 	return (arg->flags & IP_REPLY_ARG_NOSRCCHECK) ? FLOWI_FLAG_ANYSRC : 0;
 }
 
-void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr,
-			   __be32 saddr, const struct ip_reply_arg *arg,
+void ip_send_unicast_reply(struct net *net, struct sk_buff *skb,
+			   const struct ip_options *sopt,
+			   __be32 daddr, __be32 saddr,
+			   const struct ip_reply_arg *arg,
 			   unsigned int len);
 
 #define IP_INC_STATS(net, field)	SNMP_INC_STATS64((net)->mib.ip_statistics, field)
@@ -511,7 +513,14 @@ int ip_forward(struct sk_buff *skb);
  
 void ip_options_build(struct sk_buff *skb, struct ip_options *opt,
 		      __be32 daddr, struct rtable *rt, int is_frag);
-int ip_options_echo(struct ip_options *dopt, struct sk_buff *skb);
+
+int __ip_options_echo(struct ip_options *dopt, struct sk_buff *skb,
+		      const struct ip_options *sopt);
+static inline int ip_options_echo(struct ip_options *dopt, struct sk_buff *skb)
+{
+	return __ip_options_echo(dopt, skb, &IPCB(skb)->opt);
+}
+
 void ip_options_fragment(struct sk_buff *skb);
 int ip_options_compile(struct net *net, struct ip_options *opt,
 		       struct sk_buff *skb);
-- 
cgit v1.1


From a224772db8420ecb7ce91a9ba5d535ee3a50d982 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 27 Sep 2014 09:50:56 -0700
Subject: ipv6: add a struct inet6_skb_parm param to ipv6_opt_accepted()

ipv6_opt_accepted() assumes IP6CB(skb) holds the struct inet6_skb_parm
that it needs. Lets not assume this, as TCP stack might use a different
place.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ipv6.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 7e247e9..97f4720 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -288,7 +288,8 @@ struct ipv6_txoptions *ipv6_renew_options(struct sock *sk,
 struct ipv6_txoptions *ipv6_fixup_options(struct ipv6_txoptions *opt_space,
 					  struct ipv6_txoptions *opt);
 
-bool ipv6_opt_accepted(const struct sock *sk, const struct sk_buff *skb);
+bool ipv6_opt_accepted(const struct sock *sk, const struct sk_buff *skb,
+		       const struct inet6_skb_parm *opt);
 
 static inline bool ipv6_accept_ra(struct inet6_dev *idev)
 {
-- 
cgit v1.1


From 971f10eca186cab238c49daa91f703c5a001b0b1 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 27 Sep 2014 09:50:57 -0700
Subject: tcp: better TCP_SKB_CB layout to reduce cache line misses

TCP maintains lists of skb in write queue, and in receive queues
(in order and out of order queues)

Scanning these lists both in input and output path usually requires
access to skb->next, TCP_SKB_CB(skb)->seq, and TCP_SKB_CB(skb)->end_seq

These fields are currently in two different cache lines, meaning we
waste lot of memory bandwidth when these queues are big and flows
have either packet drops or packet reorders.

We can move TCP_SKB_CB(skb)->header at the end of TCP_SKB_CB, because
this header is not used in fast path. This allows TCP to search much faster
in the skb lists.

Even with regular flows, we save one cache line miss in fast path.

Thanks to Christoph Paasch for noticing we need to cleanup
skb->cb[] (IPCB/IP6CB) before entering IP stack in tx path,
and that I forgot IPCB use in tcp_v4_hnd_req() and tcp_v4_save_options().

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index a4201ef..4dc6641 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -696,12 +696,6 @@ static inline u32 tcp_skb_timestamp(const struct sk_buff *skb)
  * If this grows please adjust skbuff.h:skbuff->cb[xxx] size appropriately.
  */
 struct tcp_skb_cb {
-	union {
-		struct inet_skb_parm	h4;
-#if IS_ENABLED(CONFIG_IPV6)
-		struct inet6_skb_parm	h6;
-#endif
-	} header;	/* For incoming frames		*/
 	__u32		seq;		/* Starting sequence number	*/
 	__u32		end_seq;	/* SEQ + FIN + SYN + datalen	*/
 	__u32		tcp_tw_isn;	/* isn chosen by tcp_timewait_state_process() */
@@ -720,6 +714,12 @@ struct tcp_skb_cb {
 	__u8		ip_dsfield;	/* IPv4 tos or IPv6 dsfield	*/
 	/* 1 byte hole */
 	__u32		ack_seq;	/* Sequence number ACK'd	*/
+	union {
+		struct inet_skb_parm	h4;
+#if IS_ENABLED(CONFIG_IPV6)
+		struct inet6_skb_parm	h6;
+#endif
+	} header;	/* For incoming frames		*/
 };
 
 #define TCP_SKB_CB(__skb)	((struct tcp_skb_cb *)&((__skb)->cb[0]))
-- 
cgit v1.1


From cd7d8498c9a5d510c64db38d9f4f4fbc41790f09 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 24 Sep 2014 04:11:22 -0700
Subject: tcp: change tcp_skb_pcount() location

Our goal is to access no more than one cache line access per skb in
a write or receive queue when doing the various walks.

After recent TCP_SKB_CB() reorganizations, it is almost done.

Last part is tcp_skb_pcount() which currently uses
skb_shinfo(skb)->gso_segs, which is a terrible choice, because it needs
3 cache lines in current kernel (skb->head, skb->end, and
shinfo->gso_segs are all in 3 different cache lines, far from skb->cb)

This very simple patch reuses space currently taken by tcp_tw_isn
only in input path, as tcp_skb_pcount is only needed for skb stored in
write queue.

This considerably speeds up tcp_ack(), granted we avoid shinfo->tx_flags
to get SKBTX_ACK_TSTAMP, which seems possible.

This also speeds up all sack processing in general.

This speeds up tcp_sendmsg() because it no longer has to access/dirty
shinfo.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h | 23 +++++++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 4dc6641..02a9a2c 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -698,7 +698,16 @@ static inline u32 tcp_skb_timestamp(const struct sk_buff *skb)
 struct tcp_skb_cb {
 	__u32		seq;		/* Starting sequence number	*/
 	__u32		end_seq;	/* SEQ + FIN + SYN + datalen	*/
-	__u32		tcp_tw_isn;	/* isn chosen by tcp_timewait_state_process() */
+	union {
+		/* Note : tcp_tw_isn is used in input path only
+		 *	  (isn chosen by tcp_timewait_state_process())
+		 *
+		 * 	  tcp_gso_segs is used in write queue only,
+		 *	  cf tcp_skb_pcount()
+		 */
+		__u32		tcp_tw_isn;
+		__u32		tcp_gso_segs;
+	};
 	__u8		tcp_flags;	/* TCP header flags. (tcp[13])	*/
 
 	__u8		sacked;		/* State flags for SACK/FACK.	*/
@@ -746,7 +755,17 @@ TCP_ECN_create_request(struct request_sock *req, const struct sk_buff *skb,
  */
 static inline int tcp_skb_pcount(const struct sk_buff *skb)
 {
-	return skb_shinfo(skb)->gso_segs;
+	return TCP_SKB_CB(skb)->tcp_gso_segs;
+}
+
+static inline void tcp_skb_pcount_set(struct sk_buff *skb, int segs)
+{
+	TCP_SKB_CB(skb)->tcp_gso_segs = segs;
+}
+
+static inline void tcp_skb_pcount_add(struct sk_buff *skb, int segs)
+{
+	TCP_SKB_CB(skb)->tcp_gso_segs += segs;
 }
 
 /* This is valid iff tcp_skb_pcount() > 1. */
-- 
cgit v1.1


From b2f2af21e37f6d12bd735c27da8942331aa9b3d7 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Wed, 24 Sep 2014 17:05:18 -0700
Subject: net: dsa: allow enabling and disable switch ports

Whenever a per-port network device is used/unused, invoke the switch
driver port_enable/port_disable callbacks to allow saving as much power
as possible by disabling unused parts of the switch (RX/TX logic, memory
arrays, PHYs...). We supply a PHY device argument to make sure the
switch driver can act on the PHY device if needed (like putting/taking
the PHY out of deep low power mode).

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index d8054fb..4f664fe 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -224,6 +224,14 @@ struct dsa_switch_driver {
 	 */
 	int	(*suspend)(struct dsa_switch *ds);
 	int	(*resume)(struct dsa_switch *ds);
+
+	/*
+	 * Port enable/disable
+	 */
+	int	(*port_enable)(struct dsa_switch *ds, int port,
+			       struct phy_device *phy);
+	void	(*port_disable)(struct dsa_switch *ds, int port,
+				struct phy_device *phy);
 };
 
 void register_switch_driver(struct dsa_switch_driver *type);
-- 
cgit v1.1


From 7905288f093ad924004609bb89a7ce1597892726 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Wed, 24 Sep 2014 17:05:21 -0700
Subject: net: dsa: allow switches driver to implement get/set EEE

Allow switches driver to query and enable/disable EEE on a per-port
basis by implementing the ethtool_{get,set}_eee settings and delegating
these operations to the switch driver.

set_eee() will need to coordinate with the PHY driver to make sure that
EEE is enabled, the link-partner supports it and the auto-negotiation
result is satisfactory.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index 4f664fe..58ad8c6 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -232,6 +232,15 @@ struct dsa_switch_driver {
 			       struct phy_device *phy);
 	void	(*port_disable)(struct dsa_switch *ds, int port,
 				struct phy_device *phy);
+
+	/*
+	 * EEE setttings
+	 */
+	int	(*set_eee)(struct dsa_switch *ds, int port,
+			   struct phy_device *phydev,
+			   struct ethtool_eee *e);
+	int	(*get_eee)(struct dsa_switch *ds, int port,
+			   struct ethtool_eee *e);
 };
 
 void register_switch_driver(struct dsa_switch_driver *type);
-- 
cgit v1.1


From 18d0264f630e200772bf236ac5747c47e908501e Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Thu, 25 Sep 2014 10:26:37 -0700
Subject: net_sched: remove the first parameter from tcf_exts_destroy()

Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Acked-by: Jamal Hadi Salim <hadi@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_cls.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 6da46dc..73f9532 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -137,7 +137,7 @@ tcf_exts_exec(struct sk_buff *skb, struct tcf_exts *exts,
 int tcf_exts_validate(struct net *net, struct tcf_proto *tp,
 		      struct nlattr **tb, struct nlattr *rate_tlv,
 		      struct tcf_exts *exts, bool ovr);
-void tcf_exts_destroy(struct tcf_proto *tp, struct tcf_exts *exts);
+void tcf_exts_destroy(struct tcf_exts *exts);
 void tcf_exts_change(struct tcf_proto *tp, struct tcf_exts *dst,
 		     struct tcf_exts *src);
 int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts);
-- 
cgit v1.1


From 3d9a0d2f8212879407e58d67f460d8920eb6543d Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 25 Sep 2014 23:04:56 -0700
Subject: dql: dql_queued() should write first to reduce bus transactions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

While doing high throughput test on a BQL enabled NIC,
I found a very high cost in ndo_start_xmit() when accessing BQL data.

It turned out the problem was caused by compiler trying to be
smart, but involving a bad MESI transaction :

  0.05 │  mov    0xc0(%rax),%edi    // LOAD dql->num_queued
  0.48 │  mov    %edx,0xc8(%rax)    // STORE dql->last_obj_cnt = count
 58.23 │  add    %edx,%edi
  0.58 │  cmp    %edi,0xc4(%rax)
  0.76 │  mov    %edi,0xc0(%rax)    // STORE dql->num_queued += count
  0.72 │  js     bd8

I got an incredible 10 % gain [1] by making sure cpu do not attempt
to get the cache line in Shared mode, but directly requests for
ownership.

New code :
	mov    %edx,0xc8(%rax)  // STORE dql->last_obj_cnt = count
	add    %edx,0xc0(%rax)  // RMW   dql->num_queued += count
	mov    0xc4(%rax),%ecx  // LOAD dql->adj_limit
	mov    0xc0(%rax),%edx  // LOAD dql->num_queued
	cmp    %edx,%ecx

The TX completion was running from another cpu, with high interrupts
rate.

Note that I am using barrier() as a soft hint, as mb() here could be
too heavy cost.

[1] This was a netperf TCP_STREAM with TSO disabled, but GSO enabled.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/dynamic_queue_limits.h | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/dynamic_queue_limits.h b/include/linux/dynamic_queue_limits.h
index 5621547..a4be703 100644
--- a/include/linux/dynamic_queue_limits.h
+++ b/include/linux/dynamic_queue_limits.h
@@ -73,14 +73,22 @@ static inline void dql_queued(struct dql *dql, unsigned int count)
 {
 	BUG_ON(count > DQL_MAX_OBJECT);
 
-	dql->num_queued += count;
 	dql->last_obj_cnt = count;
+
+	/* We want to force a write first, so that cpu do not attempt
+	 * to get cache line containing last_obj_cnt, num_queued, adj_limit
+	 * in Shared state, but directly does a Request For Ownership
+	 * It is only a hint, we use barrier() only.
+	 */
+	barrier();
+
+	dql->num_queued += count;
 }
 
 /* Returns how many objects can be queued, < 0 indicates over limit. */
 static inline int dql_avail(const struct dql *dql)
 {
-	return dql->adj_limit - dql->num_queued;
+	return ACCESS_ONCE(dql->adj_limit) - ACCESS_ONCE(dql->num_queued);
 }
 
 /* Record number of completed objects and recalculate the limit. */
-- 
cgit v1.1


From 55d8694fa82c9b5858ae5a78a210353961f908f9 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 26 Sep 2014 22:37:32 +0200
Subject: net: tcp: assign tcp cong_ops when tcp sk is created

Split assignment and initialization from one into two functions.

This is required by followup patches that add Datacenter TCP
(DCTCP) congestion control algorithm - we need to be able to
determine if the connection is moderated by DCTCP before the
3WHS has finished.

As we walk the available congestion control list during the
assignment, we are always guaranteed to have Reno present as
it's fixed compiled-in. Therefore, since we're doing the
early assignment, we don't have a real use for the Reno alias
tcp_init_congestion_ops anymore and can thus remove it.

Actual usage of the congestion control operations are being
made after the 3WHS has finished, in some cases however we
can access get_info() via diag if implemented, therefore we
need to zero out the private area for those modules.

Joint work with Daniel Borkmann and Glenn Judd.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: Glenn Judd <glenn.judd@morganstanley.com>
Acked-by: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 02a9a2c..f99b0c0 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -824,6 +824,7 @@ struct tcp_congestion_ops {
 int tcp_register_congestion_control(struct tcp_congestion_ops *type);
 void tcp_unregister_congestion_control(struct tcp_congestion_ops *type);
 
+void tcp_assign_congestion_control(struct sock *sk);
 void tcp_init_congestion_control(struct sock *sk);
 void tcp_cleanup_congestion_control(struct sock *sk);
 int tcp_set_default_congestion_control(const char *name);
@@ -835,7 +836,6 @@ int tcp_set_congestion_control(struct sock *sk, const char *name);
 int tcp_slow_start(struct tcp_sock *tp, u32 acked);
 void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w);
 
-extern struct tcp_congestion_ops tcp_init_congestion_ops;
 u32 tcp_reno_ssthresh(struct sock *sk);
 void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked);
 extern struct tcp_congestion_ops tcp_reno;
-- 
cgit v1.1


From 30e502a34b8b21fae2c789da102bd9f6e99fef83 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Fri, 26 Sep 2014 22:37:33 +0200
Subject: net: tcp: add flag for ca to indicate that ECN is required

This patch adds a flag to TCP congestion algorithms that allows
for requesting to mark IPv4/IPv6 sockets with transport as ECN
capable, that is, ECT(0), when required by a congestion algorithm.

It is currently used and needed in DataCenter TCP (DCTCP), as it
requires both peers to assert ECT on all IP packets sent - it
uses ECN feedback (i.e. CE, Congestion Encountered information)
from switches inside the data center to derive feedback to the
end hosts.

Therefore, simply add a new flag to icsk_ca_ops. Note that DCTCP's
algorithm/behaviour slightly diverges from RFC3168, therefore this
is only (!) enabled iff the assigned congestion control ops module
has requested this. By that, we can tightly couple this logic really
only to the provided congestion control ops.

Joint work with Florian Westphal and Glenn Judd.

Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Glenn Judd <glenn.judd@morganstanley.com>
Acked-by: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h | 61 +++++++++++++++++++++++++++++++++++++++----------------
 1 file changed, 44 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index f99b0c0..a12f145 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -733,23 +733,6 @@ struct tcp_skb_cb {
 
 #define TCP_SKB_CB(__skb)	((struct tcp_skb_cb *)&((__skb)->cb[0]))
 
-/* RFC3168 : 6.1.1 SYN packets must not have ECT/ECN bits set
- *
- * If we receive a SYN packet with these bits set, it means a network is
- * playing bad games with TOS bits. In order to avoid possible false congestion
- * notifications, we disable TCP ECN negociation.
- */
-static inline void
-TCP_ECN_create_request(struct request_sock *req, const struct sk_buff *skb,
-		struct net *net)
-{
-	const struct tcphdr *th = tcp_hdr(skb);
-
-	if (net->ipv4.sysctl_tcp_ecn && th->ece && th->cwr &&
-	    INET_ECN_is_not_ect(TCP_SKB_CB(skb)->ip_dsfield))
-		inet_rsk(req)->ecn_ok = 1;
-}
-
 /* Due to TSO, an SKB can be composed of multiple actual
  * packets.  To keep these tracked properly, we use this.
  */
@@ -791,7 +774,10 @@ enum tcp_ca_event {
 #define TCP_CA_MAX	128
 #define TCP_CA_BUF_MAX	(TCP_CA_NAME_MAX*TCP_CA_MAX)
 
+/* Algorithm can be set on socket without CAP_NET_ADMIN privileges */
 #define TCP_CONG_NON_RESTRICTED 0x1
+/* Requires ECN/ECT set on all packets */
+#define TCP_CONG_NEEDS_ECN	0x2
 
 struct tcp_congestion_ops {
 	struct list_head	list;
@@ -840,6 +826,13 @@ u32 tcp_reno_ssthresh(struct sock *sk);
 void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked);
 extern struct tcp_congestion_ops tcp_reno;
 
+static inline bool tcp_ca_needs_ecn(const struct sock *sk)
+{
+	const struct inet_connection_sock *icsk = inet_csk(sk);
+
+	return icsk->icsk_ca_ops->flags & TCP_CONG_NEEDS_ECN;
+}
+
 static inline void tcp_set_ca_state(struct sock *sk, const u8 ca_state)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
@@ -857,6 +850,40 @@ static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event)
 		icsk->icsk_ca_ops->cwnd_event(sk, event);
 }
 
+/* RFC3168 : 6.1.1 SYN packets must not have ECT/ECN bits set
+ *
+ * If we receive a SYN packet with these bits set, it means a
+ * network is playing bad games with TOS bits. In order to
+ * avoid possible false congestion notifications, we disable
+ * TCP ECN negociation.
+ *
+ * Exception: tcp_ca wants ECN. This is required for DCTCP
+ * congestion control; it requires setting ECT on all packets,
+ * including SYN. We inverse the test in this case: If our
+ * local socket wants ECN, but peer only set ece/cwr (but not
+ * ECT in IP header) its probably a non-DCTCP aware sender.
+ */
+static inline void
+TCP_ECN_create_request(struct request_sock *req, const struct sk_buff *skb,
+		       const struct sock *listen_sk)
+{
+	const struct tcphdr *th = tcp_hdr(skb);
+	const struct net *net = sock_net(listen_sk);
+	bool th_ecn = th->ece && th->cwr;
+	bool ect, need_ecn;
+
+	if (!th_ecn)
+		return;
+
+	ect = !INET_ECN_is_not_ect(TCP_SKB_CB(skb)->ip_dsfield);
+	need_ecn = tcp_ca_needs_ecn(listen_sk);
+
+	if (!ect && !need_ecn && net->ipv4.sysctl_tcp_ecn)
+		inet_rsk(req)->ecn_ok = 1;
+	else if (ect && need_ecn)
+		inet_rsk(req)->ecn_ok = 1;
+}
+
 /* These functions determine how the current flow behaves in respect of SACK
  * handling. SACK is negotiated with the peer, and therefore it can vary
  * between different flows.
-- 
cgit v1.1


From 7354c8c389d18719dd71cc810da70b0921d66694 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 26 Sep 2014 22:37:34 +0200
Subject: net: tcp: split ack slow/fast events from cwnd_event

The congestion control ops "cwnd_event" currently supports
CA_EVENT_FAST_ACK and CA_EVENT_SLOW_ACK events (among others).
Both FAST and SLOW_ACK are only used by Westwood congestion
control algorithm.

This removes both flags from cwnd_event and adds a new
in_ack_event callback for this. The goal is to be able to
provide more detailed information about ACKs, such as whether
ECE flag was set, or whether the ACK resulted in a window
update.

It is required for DataCenter TCP (DCTCP) congestion control
algorithm as it makes a different choice depending on ECE being
set or not.

Joint work with Daniel Borkmann and Glenn Judd.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: Glenn Judd <glenn.judd@morganstanley.com>
Acked-by: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index a12f145..7ec6a28 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -763,8 +763,10 @@ enum tcp_ca_event {
 	CA_EVENT_CWND_RESTART,	/* congestion window restart */
 	CA_EVENT_COMPLETE_CWR,	/* end of congestion recovery */
 	CA_EVENT_LOSS,		/* loss timeout */
-	CA_EVENT_FAST_ACK,	/* in sequence ack */
-	CA_EVENT_SLOW_ACK,	/* other ack */
+};
+
+enum tcp_ca_ack_event_flags {
+	CA_ACK_SLOWPATH = (1 << 0),
 };
 
 /*
@@ -796,6 +798,8 @@ struct tcp_congestion_ops {
 	void (*set_state)(struct sock *sk, u8 new_state);
 	/* call when cwnd event occurs (optional) */
 	void (*cwnd_event)(struct sock *sk, enum tcp_ca_event ev);
+	/* call when ack arrives (optional) */
+	void (*in_ack_event)(struct sock *sk, u32 flags);
 	/* new value of cwnd after loss (optional) */
 	u32  (*undo_cwnd)(struct sock *sk);
 	/* hook for packet ack accounting (optional) */
-- 
cgit v1.1


From 9890092e46b2996bb85f7f973e69424cb5c07bc0 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 26 Sep 2014 22:37:35 +0200
Subject: net: tcp: more detailed ACK events and events for CE marked packets

DataCenter TCP (DCTCP) determines cwnd growth based on ECN information
and ACK properties, e.g. ACK that updates window is treated differently
than DUPACK.

Also DCTCP needs information whether ACK was delayed ACK. Furthermore,
DCTCP also implements a CE state machine that keeps track of CE markings
of incoming packets.

Therefore, extend the congestion control framework to provide these
event types, so that DCTCP can be properly implemented as a normal
congestion algorithm module outside of the core stack.

Joint work with Daniel Borkmann and Glenn Judd.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: Glenn Judd <glenn.judd@morganstanley.com>
Acked-by: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 7ec6a28..1f57c53 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -763,10 +763,17 @@ enum tcp_ca_event {
 	CA_EVENT_CWND_RESTART,	/* congestion window restart */
 	CA_EVENT_COMPLETE_CWR,	/* end of congestion recovery */
 	CA_EVENT_LOSS,		/* loss timeout */
+	CA_EVENT_ECN_NO_CE,	/* ECT set, but not CE marked */
+	CA_EVENT_ECN_IS_CE,	/* received CE marked IP packet */
+	CA_EVENT_DELAYED_ACK,	/* Delayed ack is sent */
+	CA_EVENT_NON_DELAYED_ACK,
 };
 
+/* Information about inbound ACK, passed to cong_ops->in_ack_event() */
 enum tcp_ca_ack_event_flags {
-	CA_ACK_SLOWPATH = (1 << 0),
+	CA_ACK_SLOWPATH		= (1 << 0),	/* In slow path processing */
+	CA_ACK_WIN_UPDATE	= (1 << 1),	/* ACK updated window */
+	CA_ACK_ECE		= (1 << 2),	/* ECE bit is set on ack */
 };
 
 /*
-- 
cgit v1.1


From e3118e8359bb7c59555aca60c725106e6d78c5ce Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Fri, 26 Sep 2014 22:37:36 +0200
Subject: net: tcp: add DCTCP congestion control algorithm

This work adds the DataCenter TCP (DCTCP) congestion control
algorithm [1], which has been first published at SIGCOMM 2010 [2],
resp. follow-up analysis at SIGMETRICS 2011 [3] (and also, more
recently as an informational IETF draft available at [4]).

DCTCP is an enhancement to the TCP congestion control algorithm for
data center networks. Typical data center workloads are i.e.
i) partition/aggregate (queries; bursty, delay sensitive), ii) short
messages e.g. 50KB-1MB (for coordination and control state; delay
sensitive), and iii) large flows e.g. 1MB-100MB (data update;
throughput sensitive). DCTCP has therefore been designed for such
environments to provide/achieve the following three requirements:

  * High burst tolerance (incast due to partition/aggregate)
  * Low latency (short flows, queries)
  * High throughput (continuous data updates, large file
    transfers) with commodity, shallow buffered switches

The basic idea of its design consists of two fundamentals: i) on the
switch side, packets are being marked when its internal queue
length > threshold K (K is chosen so that a large enough headroom
for marked traffic is still available in the switch queue); ii) the
sender/host side maintains a moving average of the fraction of marked
packets, so each RTT, F is being updated as follows:

 F := X / Y, where X is # of marked ACKs, Y is total # of ACKs
 alpha := (1 - g) * alpha + g * F, where g is a smoothing constant

The resulting alpha (iow: probability that switch queue is congested)
is then being used in order to adaptively decrease the congestion
window W:

 W := (1 - (alpha / 2)) * W

The means for receiving marked packets resp. marking them on switch
side in DCTCP is the use of ECN.

RFC3168 describes a mechanism for using Explicit Congestion Notification
from the switch for early detection of congestion, rather than waiting
for segment loss to occur.

However, this method only detects the presence of congestion, not
the *extent*. In the presence of mild congestion, it reduces the TCP
congestion window too aggressively and unnecessarily affects the
throughput of long flows [4].

DCTCP, as mentioned, enhances Explicit Congestion Notification (ECN)
processing to estimate the fraction of bytes that encounter congestion,
rather than simply detecting that some congestion has occurred. DCTCP
then scales the TCP congestion window based on this estimate [4],
thus it can derive multibit feedback from the information present in
the single-bit sequence of marks in its control law. And thus act in
*proportion* to the extent of congestion, not its *presence*.

Switches therefore set the Congestion Experienced (CE) codepoint in
packets when internal queue lengths exceed threshold K. Resulting,
DCTCP delivers the same or better throughput than normal TCP, while
using 90% less buffer space.

It was found in [2] that DCTCP enables the applications to handle 10x
the current background traffic, without impacting foreground traffic.
Moreover, a 10x increase in foreground traffic did not cause any
timeouts, and thus largely eliminates TCP incast collapse problems.

The algorithm itself has already seen deployments in large production
data centers since then.

We did a long-term stress-test and analysis in a data center, short
summary of our TCP incast tests with iperf compared to cubic:

This test measured DCTCP throughput and latency and compared it with
CUBIC throughput and latency for an incast scenario. In this test, 19
senders sent at maximum rate to a single receiver. The receiver simply
ran iperf -s.

The senders ran iperf -c <receiver> -t 30. All senders started
simultaneously (using local clocks synchronized by ntp).

This test was repeated multiple times. Below shows the results from a
single test. Other tests are similar. (DCTCP results were extremely
consistent, CUBIC results show some variance induced by the TCP timeouts
that CUBIC encountered.)

For this test, we report statistics on the number of TCP timeouts,
flow throughput, and traffic latency.

1) Timeouts (total over all flows, and per flow summaries):

            CUBIC            DCTCP
  Total     3227             25
  Mean       169.842          1.316
  Median     183              1
  Max        207              5
  Min        123              0
  Stddev      28.991          1.600

Timeout data is taken by measuring the net change in netstat -s
"other TCP timeouts" reported. As a result, the timeout measurements
above are not restricted to the test traffic, and we believe that it
is likely that all of the "DCTCP timeouts" are actually timeouts for
non-test traffic. We report them nevertheless. CUBIC will also include
some non-test timeouts, but they are drawfed by bona fide test traffic
timeouts for CUBIC. Clearly DCTCP does an excellent job of preventing
TCP timeouts. DCTCP reduces timeouts by at least two orders of
magnitude and may well have eliminated them in this scenario.

2) Throughput (per flow in Mbps):

            CUBIC            DCTCP
  Mean      521.684          521.895
  Median    464              523
  Max       776              527
  Min       403              519
  Stddev    105.891            2.601
  Fairness    0.962            0.999

Throughput data was simply the average throughput for each flow
reported by iperf. By avoiding TCP timeouts, DCTCP is able to
achieve much better per-flow results. In CUBIC, many flows
experience TCP timeouts which makes flow throughput unpredictable and
unfair. DCTCP, on the other hand, provides very clean predictable
throughput without incurring TCP timeouts. Thus, the standard deviation
of CUBIC throughput is dramatically higher than the standard deviation
of DCTCP throughput.

Mean throughput is nearly identical because even though cubic flows
suffer TCP timeouts, other flows will step in and fill the unused
bandwidth. Note that this test is something of a best case scenario
for incast under CUBIC: it allows other flows to fill in for flows
experiencing a timeout. Under situations where the receiver is issuing
requests and then waiting for all flows to complete, flows cannot fill
in for timed out flows and throughput will drop dramatically.

3) Latency (in ms):

            CUBIC            DCTCP
  Mean      4.0088           0.04219
  Median    4.055            0.0395
  Max       4.2              0.085
  Min       3.32             0.028
  Stddev    0.1666           0.01064

Latency for each protocol was computed by running "ping -i 0.2
<receiver>" from a single sender to the receiver during the incast
test. For DCTCP, "ping -Q 0x6 -i 0.2 <receiver>" was used to ensure
that traffic traversed the DCTCP queue and was not dropped when the
queue size was greater than the marking threshold. The summary
statistics above are over all ping metrics measured between the single
sender, receiver pair.

The latency results for this test show a dramatic difference between
CUBIC and DCTCP. CUBIC intentionally overflows the switch buffer
which incurs the maximum queue latency (more buffer memory will lead
to high latency.) DCTCP, on the other hand, deliberately attempts to
keep queue occupancy low. The result is a two orders of magnitude
reduction of latency with DCTCP - even with a switch with relatively
little RAM. Switches with larger amounts of RAM will incur increasing
amounts of latency for CUBIC, but not for DCTCP.

4) Convergence and stability test:

This test measured the time that DCTCP took to fairly redistribute
bandwidth when a new flow commences. It also measured DCTCP's ability
to remain stable at a fair bandwidth distribution. DCTCP is compared
with CUBIC for this test.

At the commencement of this test, a single flow is sending at maximum
rate (near 10 Gbps) to a single receiver. One second after that first
flow commences, a new flow from a distinct server begins sending to
the same receiver as the first flow. After the second flow has sent
data for 10 seconds, the second flow is terminated. The first flow
sends for an additional second. Ideally, the bandwidth would be evenly
shared as soon as the second flow starts, and recover as soon as it
stops.

The results of this test are shown below. Note that the flow bandwidth
for the two flows was measured near the same time, but not
simultaneously.

DCTCP performs nearly perfectly within the measurement limitations
of this test: bandwidth is quickly distributed fairly between the two
flows, remains stable throughout the duration of the test, and
recovers quickly. CUBIC, in contrast, is slow to divide the bandwidth
fairly, and has trouble remaining stable.

  CUBIC                      DCTCP

  Seconds  Flow 1  Flow 2    Seconds  Flow 1  Flow 2
   0       9.93    0          0       9.92    0
   0.5     9.87    0          0.5     9.86    0
   1       8.73    2.25       1       6.46    4.88
   1.5     7.29    2.8        1.5     4.9     4.99
   2       6.96    3.1        2       4.92    4.94
   2.5     6.67    3.34       2.5     4.93    5
   3       6.39    3.57       3       4.92    4.99
   3.5     6.24    3.75       3.5     4.94    4.74
   4       6       3.94       4       5.34    4.71
   4.5     5.88    4.09       4.5     4.99    4.97
   5       5.27    4.98       5       4.83    5.01
   5.5     4.93    5.04       5.5     4.89    4.99
   6       4.9     4.99       6       4.92    5.04
   6.5     4.93    5.1        6.5     4.91    4.97
   7       4.28    5.8        7       4.97    4.97
   7.5     4.62    4.91       7.5     4.99    4.82
   8       5.05    4.45       8       5.16    4.76
   8.5     5.93    4.09       8.5     4.94    4.98
   9       5.73    4.2        9       4.92    5.02
   9.5     5.62    4.32       9.5     4.87    5.03
  10       6.12    3.2       10       4.91    5.01
  10.5     6.91    3.11      10.5     4.87    5.04
  11       8.48    0         11       8.49    4.94
  11.5     9.87    0         11.5     9.9     0

SYN/ACK ECT test:

This test demonstrates the importance of ECT on SYN and SYN-ACK packets
by measuring the connection probability in the presence of competing
flows for a DCTCP connection attempt *without* ECT in the SYN packet.
The test was repeated five times for each number of competing flows.

              Competing Flows  1 |    2 |    4 |    8 |   16
                               ------------------------------
Mean Connection Probability    1 | 0.67 | 0.45 | 0.28 |    0
Median Connection Probability  1 | 0.65 | 0.45 | 0.25 |    0

As the number of competing flows moves beyond 1, the connection
probability drops rapidly.

Enabling DCTCP with this patch requires the following steps:

DCTCP must be running both on the sender and receiver side in your
data center, i.e.:

  sysctl -w net.ipv4.tcp_congestion_control=dctcp

Also, ECN functionality must be enabled on all switches in your
data center for DCTCP to work. The default ECN marking threshold (K)
heuristic on the switch for DCTCP is e.g., 20 packets (30KB) at
1Gbps, and 65 packets (~100KB) at 10Gbps (K > 1/7 * C * RTT, [4]).

In above tests, for each switch port, traffic was segregated into two
queues. For any packet with a DSCP of 0x01 - or equivalently a TOS of
0x04 - the packet was placed into the DCTCP queue. All other packets
were placed into the default drop-tail queue. For the DCTCP queue,
RED/ECN marking was enabled, here, with a marking threshold of 75 KB.
More details however, we refer you to the paper [2] under section 3).

There are no code changes required to applications running in user
space. DCTCP has been implemented in full *isolation* of the rest of
the TCP code as its own congestion control module, so that it can run
without a need to expose code to the core of the TCP stack, and thus
nothing changes for non-DCTCP users.

Changes in the CA framework code are minimal, and DCTCP algorithm
operates on mechanisms that are already available in most Silicon.
The gain (dctcp_shift_g) is currently a fixed constant (1/16) from
the paper, but we leave the option that it can be chosen carefully
to a different value by the user.

In case DCTCP is being used and ECN support on peer site is off,
DCTCP falls back after 3WHS to operate in normal TCP Reno mode.

ss {-4,-6} -t -i diag interface:

  ... dctcp wscale:7,7 rto:203 rtt:2.349/0.026 mss:1448 cwnd:2054
  ssthresh:1102 ce_state 0 alpha 15 ab_ecn 0 ab_tot 735584
  send 10129.2Mbps pacing_rate 20254.1Mbps unacked:1822 retrans:0/15
  reordering:101 rcv_space:29200

  ... dctcp-reno wscale:7,7 rto:201 rtt:0.711/1.327 ato:40 mss:1448
  cwnd:10 ssthresh:1102 fallback_mode send 162.9Mbps pacing_rate
  325.5Mbps rcv_rtt:1.5 rcv_space:29200

More information about DCTCP can be found in [1-4].

  [1] http://simula.stanford.edu/~alizade/Site/DCTCP.html
  [2] http://simula.stanford.edu/~alizade/Site/DCTCP_files/dctcp-final.pdf
  [3] http://simula.stanford.edu/~alizade/Site/DCTCP_files/dctcp_analysis-full.pdf
  [4] http://tools.ietf.org/html/draft-bensley-tcpm-dctcp-00

Joint work with Florian Westphal and Glenn Judd.

Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Glenn Judd <glenn.judd@morganstanley.com>
Acked-by: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/inet_diag.h | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h
index bbde90f..d65c0a0 100644
--- a/include/uapi/linux/inet_diag.h
+++ b/include/uapi/linux/inet_diag.h
@@ -110,10 +110,10 @@ enum {
 	INET_DIAG_TCLASS,
 	INET_DIAG_SKMEMINFO,
 	INET_DIAG_SHUTDOWN,
+	INET_DIAG_DCTCPINFO,
 };
 
-#define INET_DIAG_MAX INET_DIAG_SHUTDOWN
-
+#define INET_DIAG_MAX INET_DIAG_DCTCPINFO
 
 /* INET_DIAG_MEM */
 
@@ -133,5 +133,14 @@ struct tcpvegas_info {
 	__u32	tcpv_minrtt;
 };
 
+/* INET_DIAG_DCTCPINFO */
+
+struct tcp_dctcp_info {
+	__u16	dctcp_enabled;
+	__u16	dctcp_ce_state;
+	__u32	dctcp_alpha;
+	__u32	dctcp_ab_ecn;
+	__u32	dctcp_ab_tot;
+};
 
 #endif /* _UAPI_INET_DIAG_H_ */
-- 
cgit v1.1


From 9363dc4b599949bde338cdaba1cf7cac243e4e97 Mon Sep 17 00:00:00 2001
From: Arturo Borrero <arturo.borrero.glez@gmail.com>
Date: Tue, 23 Sep 2014 13:30:41 +0200
Subject: netfilter: nf_tables: store and dump set policy

We want to know in which cases the user explicitly sets the policy
options. In that case, we also want to dump back the info.

Signed-off-by: Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index c4d8619..3d72923 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -241,6 +241,7 @@ void nft_unregister_set(struct nft_set_ops *ops);
  * 	@dtype: data type (verdict or numeric type defined by userspace)
  * 	@size: maximum set size
  * 	@nelems: number of elements
+ *	@policy: set parameterization (see enum nft_set_policies)
  * 	@ops: set ops
  * 	@flags: set flags
  * 	@klen: key length
@@ -255,6 +256,7 @@ struct nft_set {
 	u32				dtype;
 	u32				size;
 	u32				nelems;
+	u16				policy;
 	/* runtime data below here */
 	const struct nft_set_ops	*ops ____cacheline_aligned;
 	u16				flags;
-- 
cgit v1.1


From b1937227316417aa7568d01e6fa1f272e98fb890 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 28 Sep 2014 22:18:47 -0700
Subject: net: reorganize sk_buff for faster __copy_skb_header()

With proliferation of bit fields in sk_buff, __copy_skb_header() became
quite expensive, showing as the most expensive function in a GSO
workload.

__copy_skb_header() performance is also critical for non GSO TCP
operations, as it is used from skb_clone()

This patch carefully moves all the fields that were not copied in a
separate zone : cloned, nohdr, fclone, peeked, head_frag, xmit_more

Then I moved all other fields and all other copied fields in a section
delimited by headers_start[0]/headers_end[0] section so that we
can use a single memcpy() call, inlined by compiler using long
word load/stores.

I also tried to make all copies in the natural orders of sk_buff,
to help hardware prefetching.

I made sure sk_buff size did not change.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 133 ++++++++++++++++++++++++++-----------------------
 1 file changed, 72 insertions(+), 61 deletions(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 8eaa624..b6cced3 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -527,27 +527,41 @@ struct sk_buff {
 	char			cb[48] __aligned(8);
 
 	unsigned long		_skb_refdst;
+	void			(*destructor)(struct sk_buff *skb);
 #ifdef CONFIG_XFRM
 	struct	sec_path	*sp;
 #endif
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+	struct nf_conntrack	*nfct;
+#endif
+#ifdef CONFIG_BRIDGE_NETFILTER
+	struct nf_bridge_info	*nf_bridge;
+#endif
 	unsigned int		len,
 				data_len;
 	__u16			mac_len,
 				hdr_len;
-	union {
-		__wsum		csum;
-		struct {
-			__u16	csum_start;
-			__u16	csum_offset;
-		};
-	};
-	__u32			priority;
+
+	/* Following fields are _not_ copied in __copy_skb_header()
+	 * Note that queue_mapping is here mostly to fill a hole.
+	 */
 	kmemcheck_bitfield_begin(flags1);
-	__u8			ignore_df:1,
-				cloned:1,
-				ip_summed:2,
+	__u16			queue_mapping;
+	__u8			cloned:1,
 				nohdr:1,
-				nfctinfo:3;
+				fclone:2,
+				peeked:1,
+				head_frag:1,
+				xmit_more:1;
+	/* one bit hole */
+	kmemcheck_bitfield_end(flags1);
+
+
+
+	/* fields enclosed in headers_start/headers_end are copied
+	 * using a single memcpy() in __copy_skb_header()
+	 */
+	__u32			headers_start[0];
 
 /* if you move pkt_type around you also must adapt those constants */
 #ifdef __BIG_ENDIAN_BITFIELD
@@ -558,58 +572,53 @@ struct sk_buff {
 #define PKT_TYPE_OFFSET()	offsetof(struct sk_buff, __pkt_type_offset)
 
 	__u8			__pkt_type_offset[0];
-	__u8			pkt_type:3,
-				fclone:2,
-				ipvs_property:1,
-				peeked:1,
-				nf_trace:1;
-	kmemcheck_bitfield_end(flags1);
-	__be16			protocol;
-
-	void			(*destructor)(struct sk_buff *skb);
-#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
-	struct nf_conntrack	*nfct;
-#endif
-#ifdef CONFIG_BRIDGE_NETFILTER
-	struct nf_bridge_info	*nf_bridge;
-#endif
-
-	int			skb_iif;
-
-	__u32			hash;
-
-	__be16			vlan_proto;
-	__u16			vlan_tci;
-
-#ifdef CONFIG_NET_SCHED
-	__u16			tc_index;	/* traffic control index */
-#ifdef CONFIG_NET_CLS_ACT
-	__u16			tc_verd;	/* traffic control verdict */
-#endif
-#endif
-
-	__u16			queue_mapping;
-	kmemcheck_bitfield_begin(flags2);
-	__u8			xmit_more:1;
-#ifdef CONFIG_IPV6_NDISC_NODETYPE
-	__u8			ndisc_nodetype:2;
-#endif
+	__u8			pkt_type:3;
 	__u8			pfmemalloc:1;
+	__u8			ignore_df:1;
+	__u8			nfctinfo:3;
+
+	__u8			nf_trace:1;
+	__u8			ip_summed:2;
 	__u8			ooo_okay:1;
 	__u8			l4_hash:1;
 	__u8			sw_hash:1;
 	__u8			wifi_acked_valid:1;
 	__u8			wifi_acked:1;
+
 	__u8			no_fcs:1;
-	__u8			head_frag:1;
 	/* Indicates the inner headers are valid in the skbuff. */
 	__u8			encapsulation:1;
 	__u8			encap_hdr_csum:1;
 	__u8			csum_valid:1;
 	__u8			csum_complete_sw:1;
-	/* 1/3 bit hole (depending on ndisc_nodetype presence) */
-	kmemcheck_bitfield_end(flags2);
+	__u8			csum_level:2;
+	__u8			csum_bad:1;
 
+#ifdef CONFIG_IPV6_NDISC_NODETYPE
+	__u8			ndisc_nodetype:2;
+#endif
+	__u8			ipvs_property:1;
+	/* 5 or 7 bit hole */
+
+#ifdef CONFIG_NET_SCHED
+	__u16			tc_index;	/* traffic control index */
+#ifdef CONFIG_NET_CLS_ACT
+	__u16			tc_verd;	/* traffic control verdict */
+#endif
+#endif
+
+	union {
+		__wsum		csum;
+		struct {
+			__u16	csum_start;
+			__u16	csum_offset;
+		};
+	};
+	__u32			priority;
+	int			skb_iif;
+	__u32			hash;
+	__be16			vlan_proto;
+	__u16			vlan_tci;
 #if defined CONFIG_NET_DMA || defined CONFIG_NET_RX_BUSY_POLL
 	union {
 		unsigned int	napi_id;
@@ -625,19 +634,18 @@ struct sk_buff {
 		__u32		reserved_tailroom;
 	};
 
-	kmemcheck_bitfield_begin(flags3);
-	__u8			csum_level:2;
-	__u8			csum_bad:1;
-	/* 13 bit hole */
-	kmemcheck_bitfield_end(flags3);
-
 	__be16			inner_protocol;
 	__u16			inner_transport_header;
 	__u16			inner_network_header;
 	__u16			inner_mac_header;
+
+	__be16			protocol;
 	__u16			transport_header;
 	__u16			network_header;
 	__u16			mac_header;
+
+	__u32			headers_end[0];
+
 	/* These elements must be at the end, see alloc_skb() for details.  */
 	sk_buff_data_t		tail;
 	sk_buff_data_t		end;
@@ -3040,19 +3048,22 @@ static inline void nf_reset_trace(struct sk_buff *skb)
 }
 
 /* Note: This doesn't put any conntrack and bridge info in dst. */
-static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src)
+static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src,
+			     bool copy)
 {
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 	dst->nfct = src->nfct;
 	nf_conntrack_get(src->nfct);
-	dst->nfctinfo = src->nfctinfo;
+	if (copy)
+		dst->nfctinfo = src->nfctinfo;
 #endif
 #ifdef CONFIG_BRIDGE_NETFILTER
 	dst->nf_bridge  = src->nf_bridge;
 	nf_bridge_get(src->nf_bridge);
 #endif
 #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) || defined(CONFIG_NF_TABLES)
-	dst->nf_trace = src->nf_trace;
+	if (copy)
+		dst->nf_trace = src->nf_trace;
 #endif
 }
 
@@ -3064,7 +3075,7 @@ static inline void nf_copy(struct sk_buff *dst, const struct sk_buff *src)
 #ifdef CONFIG_BRIDGE_NETFILTER
 	nf_bridge_put(dst->nf_bridge);
 #endif
-	__nf_copy(dst, src);
+	__nf_copy(dst, src, true);
 }
 
 #ifdef CONFIG_NETWORK_SECMARK
-- 
cgit v1.1


From 8c14f9c70327a6fb75534c4c61d7ea9c82ccf78f Mon Sep 17 00:00:00 2001
From: Michael Grzeschik <m.grzeschik@pengutronix.de>
Date: Mon, 29 Sep 2014 11:55:36 +0200
Subject: ARCNET: add com20020 PCI IDs with metadata

This patch adds metadata for the com20020 to prepare for devices with
multiple io address areas with multi card interfaces.

Signed-off-by: Michael Grzeschik <m.grzeschik@pengutronix.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/com20020.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'include')

diff --git a/include/linux/com20020.h b/include/linux/com20020.h
index 5dcfb94..6a1ceca 100644
--- a/include/linux/com20020.h
+++ b/include/linux/com20020.h
@@ -41,6 +41,22 @@ extern const struct net_device_ops com20020_netdev_ops;
 #define BUS_ALIGN  1
 #endif
 
+#define PLX_PCI_MAX_CARDS 1
+
+struct com20020_pci_channel_map {
+	u32 bar;
+	u32 offset;
+	u32 size;               /* 0x00 - auto, e.g. length of entire bar */
+};
+
+struct com20020_pci_card_info {
+	const char *name;
+	int devcount;
+
+	struct com20020_pci_channel_map chan_map_tbl[PLX_PCI_MAX_CARDS];
+
+	unsigned int flags;
+};
 
 #define _INTMASK  (ioaddr+BUS_ALIGN*0)	/* writable */
 #define _STATUS   (ioaddr+BUS_ALIGN*0)	/* readable */
-- 
cgit v1.1


From c51da42a6346c0c747e70a4f5ae873da1150a784 Mon Sep 17 00:00:00 2001
From: Michael Grzeschik <m.grzeschik@pengutronix.de>
Date: Mon, 29 Sep 2014 11:55:37 +0200
Subject: ARCNET: add support for multi interfaces on com20020

The com20020-pci driver is currently designed to instance
one netdev with one pci device. This patch adds support to
instance many cards with one pci device, depending on the device
data in the private data.

Signed-off-by: Michael Grzeschik <m.grzeschik@pengutronix.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/com20020.h | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/com20020.h b/include/linux/com20020.h
index 6a1ceca..8589899 100644
--- a/include/linux/com20020.h
+++ b/include/linux/com20020.h
@@ -41,7 +41,7 @@ extern const struct net_device_ops com20020_netdev_ops;
 #define BUS_ALIGN  1
 #endif
 
-#define PLX_PCI_MAX_CARDS 1
+#define PLX_PCI_MAX_CARDS 2
 
 struct com20020_pci_channel_map {
 	u32 bar;
@@ -58,6 +58,19 @@ struct com20020_pci_card_info {
 	unsigned int flags;
 };
 
+struct com20020_priv {
+	struct com20020_pci_card_info *ci;
+	struct list_head list_dev;
+};
+
+struct com20020_dev {
+	struct list_head list;
+	struct net_device *dev;
+
+	struct com20020_priv *pci_priv;
+	int index;
+};
+
 #define _INTMASK  (ioaddr+BUS_ALIGN*0)	/* writable */
 #define _STATUS   (ioaddr+BUS_ALIGN*0)	/* readable */
 #define _COMMAND  (ioaddr+BUS_ALIGN*1)	/* standard arcnet commands */
-- 
cgit v1.1


From d82bd1229885d550d03926cfa937703f6caa3cc0 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 29 Sep 2014 13:08:29 +0200
Subject: tcp: move TCP_ECN_create_request out of header

After Octavian Purdilas tcp ipv4/ipv6 unification work this helper only
has a single callsite.

While at it, convert name to lowercase, suggested by Stephen.

Suggested-by: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h | 34 ----------------------------------
 1 file changed, 34 deletions(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 1f57c53..545a79a 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -861,40 +861,6 @@ static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event)
 		icsk->icsk_ca_ops->cwnd_event(sk, event);
 }
 
-/* RFC3168 : 6.1.1 SYN packets must not have ECT/ECN bits set
- *
- * If we receive a SYN packet with these bits set, it means a
- * network is playing bad games with TOS bits. In order to
- * avoid possible false congestion notifications, we disable
- * TCP ECN negociation.
- *
- * Exception: tcp_ca wants ECN. This is required for DCTCP
- * congestion control; it requires setting ECT on all packets,
- * including SYN. We inverse the test in this case: If our
- * local socket wants ECN, but peer only set ece/cwr (but not
- * ECT in IP header) its probably a non-DCTCP aware sender.
- */
-static inline void
-TCP_ECN_create_request(struct request_sock *req, const struct sk_buff *skb,
-		       const struct sock *listen_sk)
-{
-	const struct tcphdr *th = tcp_hdr(skb);
-	const struct net *net = sock_net(listen_sk);
-	bool th_ecn = th->ece && th->cwr;
-	bool ect, need_ecn;
-
-	if (!th_ecn)
-		return;
-
-	ect = !INET_ECN_is_not_ect(TCP_SKB_CB(skb)->ip_dsfield);
-	need_ecn = tcp_ca_needs_ecn(listen_sk);
-
-	if (!ect && !need_ecn && net->ipv4.sysctl_tcp_ecn)
-		inet_rsk(req)->ecn_ok = 1;
-	else if (ect && need_ecn)
-		inet_rsk(req)->ecn_ok = 1;
-}
-
 /* These functions determine how the current flow behaves in respect of SACK
  * handling. SACK is negotiated with the peer, and therefore it can vary
  * between different flows.
-- 
cgit v1.1


From 79cf79abce71eb7dbc40e2f3121048ca5405cb47 Mon Sep 17 00:00:00 2001
From: Michael Braun <michael-dev@fami-braun.de>
Date: Thu, 25 Sep 2014 16:31:08 +0200
Subject: macvlan: add source mode

This patch adds a new mode of operation to macvlan, called "source".
It allows one to set a list of allowed mac address, which is used
to match against source mac address from received frames on underlying
interface.
This enables creating mac based VLAN associations, instead of standard
port or tag based. The feature is useful to deploy 802.1x mac based
behavior, where drivers of underlying interfaces doesn't allows that.

Configuration is done through the netlink interface using e.g.:
 ip link add link eth0 name macvlan0 type macvlan mode source
 ip link add link eth0 name macvlan1 type macvlan mode source
 ip link set link dev macvlan0 type macvlan macaddr add 00:11:11:11:11:11
 ip link set link dev macvlan0 type macvlan macaddr add 00:22:22:22:22:22
 ip link set link dev macvlan0 type macvlan macaddr add 00:33:33:33:33:33
 ip link set link dev macvlan1 type macvlan macaddr add 00:33:33:33:33:33
 ip link set link dev macvlan1 type macvlan macaddr add 00:44:44:44:44:44

This allows clients with MAC addresses 00:11:11:11:11:11,
00:22:22:22:22:22 to be part of only VLAN associated with macvlan0
interface. Clients with MAC addresses 00:44:44:44:44:44 with only VLAN
associated with macvlan1 interface. And client with MAC address
00:33:33:33:33:33 to be associated with both VLANs.

Based on work of Stefan Gula <steweg@gmail.com>

v8: last version of Stefan Gula for Kernel 3.2.1
v9: rework onto linux-next 2014-03-12 by Michael Braun
    add MACADDR_SET command, enable to configure mac for source mode
    while creating interface
v10:
  - reduce indention level
  - rename source_list to source_entry
  - use aligned 64bit ether address
  - use hash_64 instead of addr[5]
v11:
  - rebase for 3.14 / linux-next 20.04.2014
v12
  - rebase for linux-next 2014-09-25

Signed-off-by: Michael Braun <michael-dev@fami-braun.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_macvlan.h   |  1 +
 include/uapi/linux/if_link.h | 12 ++++++++++++
 2 files changed, 13 insertions(+)

(limited to 'include')

diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h
index 6b2c7cf..6f6929e 100644
--- a/include/linux/if_macvlan.h
+++ b/include/linux/if_macvlan.h
@@ -60,6 +60,7 @@ struct macvlan_dev {
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	struct netpoll		*netpoll;
 #endif
+	unsigned int		macaddr_count;
 };
 
 static inline void macvlan_count_rx(const struct macvlan_dev *vlan,
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index c80f95f..0bdb77e 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -303,6 +303,10 @@ enum {
 	IFLA_MACVLAN_UNSPEC,
 	IFLA_MACVLAN_MODE,
 	IFLA_MACVLAN_FLAGS,
+	IFLA_MACVLAN_MACADDR_MODE,
+	IFLA_MACVLAN_MACADDR,
+	IFLA_MACVLAN_MACADDR_DATA,
+	IFLA_MACVLAN_MACADDR_COUNT,
 	__IFLA_MACVLAN_MAX,
 };
 
@@ -313,6 +317,14 @@ enum macvlan_mode {
 	MACVLAN_MODE_VEPA    = 2, /* talk to other ports through ext bridge */
 	MACVLAN_MODE_BRIDGE  = 4, /* talk to bridge ports directly */
 	MACVLAN_MODE_PASSTHRU = 8,/* take over the underlying device */
+	MACVLAN_MODE_SOURCE  = 16,/* use source MAC address list to assign */
+};
+
+enum macvlan_macaddr_mode {
+	MACVLAN_MACADDR_ADD,
+	MACVLAN_MACADDR_DEL,
+	MACVLAN_MACADDR_FLUSH,
+	MACVLAN_MACADDR_SET,
 };
 
 #define MACVLAN_FLAG_NOPROMISC	1
-- 
cgit v1.1


From 22e0f8b9322cb1a48b1357e8f4ae6f5a9eca8cfa Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Sun, 28 Sep 2014 11:52:56 -0700
Subject: net: sched: make bstats per cpu and estimator RCU safe

In order to run qdisc's without locking statistics and estimators
need to be handled correctly.

To resolve bstats make the statistics per cpu. And because this is
only needed for qdiscs that are running without locks which is not
the case for most qdiscs in the near future only create percpu
stats when qdiscs set the TCQ_F_CPUSTATS flag.

Next because estimators use the bstats to calculate packets per
second and bytes per second the estimator code paths are updated
to use the per cpu statistics.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/gen_stats.h   | 11 +++++++++++
 include/net/sch_generic.h | 22 +++++++++++++++++++++-
 2 files changed, 32 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/gen_stats.h b/include/net/gen_stats.h
index ea4271d..ce3c128 100644
--- a/include/net/gen_stats.h
+++ b/include/net/gen_stats.h
@@ -6,6 +6,11 @@
 #include <linux/rtnetlink.h>
 #include <linux/pkt_sched.h>
 
+struct gnet_stats_basic_cpu {
+	struct gnet_stats_basic_packed bstats;
+	struct u64_stats_sync syncp;
+};
+
 struct gnet_dump {
 	spinlock_t *      lock;
 	struct sk_buff *  skb;
@@ -27,7 +32,11 @@ int gnet_stats_start_copy_compat(struct sk_buff *skb, int type,
 				 spinlock_t *lock, struct gnet_dump *d);
 
 int gnet_stats_copy_basic(struct gnet_dump *d,
+			  struct gnet_stats_basic_cpu __percpu *cpu,
 			  struct gnet_stats_basic_packed *b);
+void __gnet_stats_copy_basic(struct gnet_stats_basic_packed *bstats,
+			     struct gnet_stats_basic_cpu __percpu *cpu,
+			     struct gnet_stats_basic_packed *b);
 int gnet_stats_copy_rate_est(struct gnet_dump *d,
 			     const struct gnet_stats_basic_packed *b,
 			     struct gnet_stats_rate_est64 *r);
@@ -37,11 +46,13 @@ int gnet_stats_copy_app(struct gnet_dump *d, void *st, int len);
 int gnet_stats_finish_copy(struct gnet_dump *d);
 
 int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
+		      struct gnet_stats_basic_cpu __percpu *cpu_bstats,
 		      struct gnet_stats_rate_est64 *rate_est,
 		      spinlock_t *stats_lock, struct nlattr *opt);
 void gen_kill_estimator(struct gnet_stats_basic_packed *bstats,
 			struct gnet_stats_rate_est64 *rate_est);
 int gen_replace_estimator(struct gnet_stats_basic_packed *bstats,
+			  struct gnet_stats_basic_cpu __percpu *cpu_bstats,
 			  struct gnet_stats_rate_est64 *rate_est,
 			  spinlock_t *stats_lock, struct nlattr *opt);
 bool gen_estimator_active(const struct gnet_stats_basic_packed *bstats,
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index e65b8e0..4b93511 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -6,6 +6,7 @@
 #include <linux/rcupdate.h>
 #include <linux/pkt_sched.h>
 #include <linux/pkt_cls.h>
+#include <linux/percpu.h>
 #include <net/gen_stats.h>
 #include <net/rtnetlink.h>
 
@@ -58,6 +59,7 @@ struct Qdisc {
 				      * multiqueue device.
 				      */
 #define TCQ_F_WARN_NONWC	(1 << 16)
+#define TCQ_F_CPUSTATS		0x20 /* run using percpu statistics */
 	u32			limit;
 	const struct Qdisc_ops	*ops;
 	struct qdisc_size_table	__rcu *stab;
@@ -83,7 +85,10 @@ struct Qdisc {
 	 */
 	unsigned long		state;
 	struct sk_buff_head	q;
-	struct gnet_stats_basic_packed bstats;
+	union {
+		struct gnet_stats_basic_packed bstats;
+		struct gnet_stats_basic_cpu __percpu *cpu_bstats;
+	} __packed;
 	unsigned int		__state;
 	struct gnet_stats_queue	qstats;
 	struct rcu_head		rcu_head;
@@ -487,6 +492,10 @@ static inline int qdisc_enqueue_root(struct sk_buff *skb, struct Qdisc *sch)
 	return qdisc_enqueue(skb, sch) & NET_XMIT_MASK;
 }
 
+static inline bool qdisc_is_percpu_stats(const struct Qdisc *q)
+{
+	return q->flags & TCQ_F_CPUSTATS;
+}
 
 static inline void bstats_update(struct gnet_stats_basic_packed *bstats,
 				 const struct sk_buff *skb)
@@ -495,6 +504,17 @@ static inline void bstats_update(struct gnet_stats_basic_packed *bstats,
 	bstats->packets += skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1;
 }
 
+static inline void qdisc_bstats_update_cpu(struct Qdisc *sch,
+					   const struct sk_buff *skb)
+{
+	struct gnet_stats_basic_cpu *bstats =
+				this_cpu_ptr(sch->cpu_bstats);
+
+	u64_stats_update_begin(&bstats->syncp);
+	bstats_update(&bstats->bstats, skb);
+	u64_stats_update_end(&bstats->syncp);
+}
+
 static inline void qdisc_bstats_update(struct Qdisc *sch,
 				       const struct sk_buff *skb)
 {
-- 
cgit v1.1


From 25331d6ce42bcf4b34b6705fce4da15c3fabe62f Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Sun, 28 Sep 2014 11:53:29 -0700
Subject: net: sched: implement qstat helper routines

This adds helpers to manipulate qstats logic and replaces locations
that touch the counters directly. This simplifies future patches
to push qstats onto per cpu counters.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h | 39 +++++++++++++++++++++++++++++++++------
 1 file changed, 33 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 4b93511..23a0f0f 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -521,11 +521,38 @@ static inline void qdisc_bstats_update(struct Qdisc *sch,
 	bstats_update(&sch->bstats, skb);
 }
 
+static inline void qdisc_qstats_backlog_dec(struct Qdisc *sch,
+					    const struct sk_buff *skb)
+{
+	sch->qstats.backlog -= qdisc_pkt_len(skb);
+}
+
+static inline void qdisc_qstats_backlog_inc(struct Qdisc *sch,
+					    const struct sk_buff *skb)
+{
+	sch->qstats.backlog += qdisc_pkt_len(skb);
+}
+
+static inline void __qdisc_qstats_drop(struct Qdisc *sch, int count)
+{
+	sch->qstats.drops += count;
+}
+
+static inline void qdisc_qstats_drop(struct Qdisc *sch)
+{
+	sch->qstats.drops++;
+}
+
+static inline void qdisc_qstats_overlimit(struct Qdisc *sch)
+{
+	sch->qstats.overlimits++;
+}
+
 static inline int __qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch,
 				       struct sk_buff_head *list)
 {
 	__skb_queue_tail(list, skb);
-	sch->qstats.backlog += qdisc_pkt_len(skb);
+	qdisc_qstats_backlog_inc(sch, skb);
 
 	return NET_XMIT_SUCCESS;
 }
@@ -541,7 +568,7 @@ static inline struct sk_buff *__qdisc_dequeue_head(struct Qdisc *sch,
 	struct sk_buff *skb = __skb_dequeue(list);
 
 	if (likely(skb != NULL)) {
-		sch->qstats.backlog -= qdisc_pkt_len(skb);
+		qdisc_qstats_backlog_dec(sch, skb);
 		qdisc_bstats_update(sch, skb);
 	}
 
@@ -560,7 +587,7 @@ static inline unsigned int __qdisc_queue_drop_head(struct Qdisc *sch,
 
 	if (likely(skb != NULL)) {
 		unsigned int len = qdisc_pkt_len(skb);
-		sch->qstats.backlog -= len;
+		qdisc_qstats_backlog_dec(sch, skb);
 		kfree_skb(skb);
 		return len;
 	}
@@ -579,7 +606,7 @@ static inline struct sk_buff *__qdisc_dequeue_tail(struct Qdisc *sch,
 	struct sk_buff *skb = __skb_dequeue_tail(list);
 
 	if (likely(skb != NULL))
-		sch->qstats.backlog -= qdisc_pkt_len(skb);
+		qdisc_qstats_backlog_dec(sch, skb);
 
 	return skb;
 }
@@ -661,14 +688,14 @@ static inline unsigned int qdisc_queue_drop(struct Qdisc *sch)
 static inline int qdisc_drop(struct sk_buff *skb, struct Qdisc *sch)
 {
 	kfree_skb(skb);
-	sch->qstats.drops++;
+	qdisc_qstats_drop(sch);
 
 	return NET_XMIT_DROP;
 }
 
 static inline int qdisc_reshape_fail(struct sk_buff *skb, struct Qdisc *sch)
 {
-	sch->qstats.drops++;
+	qdisc_qstats_drop(sch);
 
 #ifdef CONFIG_NET_CLS_ACT
 	if (sch->reshape_fail == NULL || sch->reshape_fail(skb, sch))
-- 
cgit v1.1


From 6401585366326fc0ecbc372ec60d1a15cd8be2f5 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Sun, 28 Sep 2014 11:53:57 -0700
Subject: net: sched: restrict use of qstats qlen

This removes the use of qstats->qlen variable from the classifiers
and makes it an explicit argument to gnet_stats_copy_queue().

The qlen represents the qdisc queue length and is packed into
the qstats at the last moment before passnig to user space. By
handling it explicitely we avoid, in the percpu stats case, having
to figure out which per_cpu variable to put it in.

It would probably be best to remove it from qstats completely
but qstats is a user space ABI and can't be broken. A future
patch could make an internal only qstats structure that would
avoid having to allocate an additional u32 variable on the
Qdisc struct. This would make the qstats struct 128bits instead
of 128+32.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/gen_stats.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/gen_stats.h b/include/net/gen_stats.h
index ce3c128..de9b3dd 100644
--- a/include/net/gen_stats.h
+++ b/include/net/gen_stats.h
@@ -40,7 +40,8 @@ void __gnet_stats_copy_basic(struct gnet_stats_basic_packed *bstats,
 int gnet_stats_copy_rate_est(struct gnet_dump *d,
 			     const struct gnet_stats_basic_packed *b,
 			     struct gnet_stats_rate_est64 *r);
-int gnet_stats_copy_queue(struct gnet_dump *d, struct gnet_stats_queue *q);
+int gnet_stats_copy_queue(struct gnet_dump *d,
+			  struct gnet_stats_queue *q, __u32 len);
 int gnet_stats_copy_app(struct gnet_dump *d, void *st, int len);
 
 int gnet_stats_finish_copy(struct gnet_dump *d);
-- 
cgit v1.1


From b0ab6f92752b9f9d8da980506e9df3bd9dcd7ed3 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Sun, 28 Sep 2014 11:54:24 -0700
Subject: net: sched: enable per cpu qstats

After previous patches to simplify qstats the qstats can be
made per cpu with a packed union in Qdisc struct.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/gen_stats.h   |  3 ++-
 include/net/sch_generic.h | 12 +++++++++++-
 2 files changed, 13 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/gen_stats.h b/include/net/gen_stats.h
index de9b3dd..cbafa37 100644
--- a/include/net/gen_stats.h
+++ b/include/net/gen_stats.h
@@ -41,7 +41,8 @@ int gnet_stats_copy_rate_est(struct gnet_dump *d,
 			     const struct gnet_stats_basic_packed *b,
 			     struct gnet_stats_rate_est64 *r);
 int gnet_stats_copy_queue(struct gnet_dump *d,
-			  struct gnet_stats_queue *q, __u32 len);
+			  struct gnet_stats_queue __percpu *cpu_q,
+			  struct gnet_stats_queue *q, __u32 qlen);
 int gnet_stats_copy_app(struct gnet_dump *d, void *st, int len);
 
 int gnet_stats_finish_copy(struct gnet_dump *d);
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 23a0f0f..f126698 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -90,7 +90,10 @@ struct Qdisc {
 		struct gnet_stats_basic_cpu __percpu *cpu_bstats;
 	} __packed;
 	unsigned int		__state;
-	struct gnet_stats_queue	qstats;
+	union {
+		struct gnet_stats_queue	qstats;
+		struct gnet_stats_queue	__percpu *cpu_qstats;
+	} __packed;
 	struct rcu_head		rcu_head;
 	int			padded;
 	atomic_t		refcnt;
@@ -543,6 +546,13 @@ static inline void qdisc_qstats_drop(struct Qdisc *sch)
 	sch->qstats.drops++;
 }
 
+static inline void qdisc_qstats_drop_cpu(struct Qdisc *sch)
+{
+	struct gnet_stats_queue *qstats = this_cpu_ptr(sch->cpu_qstats);
+
+	qstats->drops++;
+}
+
 static inline void qdisc_qstats_overlimit(struct Qdisc *sch)
 {
 	sch->qstats.overlimits++;
-- 
cgit v1.1


From 2101e533f41a90b25bee17ce969734e26eb0eb55 Mon Sep 17 00:00:00 2001
From: Hauke Mehrtens <hauke@hauke-m.de>
Date: Fri, 26 Sep 2014 00:09:19 +0200
Subject: bcma: register bcma as device tree driver

This driver is used by the bcm53xx ARM SoC code. Now it is possible to
give the address of the chipcommon core in device tree and bcma will
search for all the other cores.

Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/bcma/bcma.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h
index 6345979..729f48e 100644
--- a/include/linux/bcma/bcma.h
+++ b/include/linux/bcma/bcma.h
@@ -323,6 +323,8 @@ struct bcma_bus {
 		struct pci_dev *host_pci;
 		/* Pointer to the SDIO device (only for BCMA_HOSTTYPE_SDIO) */
 		struct sdio_func *host_sdio;
+		/* Pointer to platform device (only for BCMA_HOSTTYPE_SOC) */
+		struct platform_device *host_pdev;
 	};
 
 	struct bcma_chipinfo chipinfo;
-- 
cgit v1.1


From e1c00e10e92c04aa637126db2e59b092bd4878f8 Mon Sep 17 00:00:00 2001
From: Majd Dibbiny <majd@mellanox.com>
Date: Tue, 30 Sep 2014 12:03:48 +0300
Subject: net/mlx4_core: New init and exit flow for mlx4_core

In the new flow, we separate the pci initialization and teardown
from the initialization and teardown of the other resources.

__mlx4_init_one handles the pci resources initialization. It then
calls mlx4_load_one to initialize the remainder of the resources.

When removing a device, mlx4_remove_one is invoked. However, now
mlx4_remove_one calls mlx4_unload_one to free all the resources except the pci
resources. When mlx4_unload_one returns, mlx4_remove_one then frees the
pci resources.

The above separation will allow us to implement 'reset flow' in the future.
It will also enable more EQs for VFs and is a pre-step to the modern API to
enable/disable SRIOV.

Also added nvfs; an integer array of size MLX4_MAX_PORTS + 1; to the mlx4_dev
struct. This new field is used to avoid parsing the num_vfs module parameter
each time the mlx4_restart_one is called.

Signed-off-by: Majd Dibbiny <majd@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx4/device.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 03b5608..b2f8ab9 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -707,6 +707,7 @@ struct mlx4_dev {
 	u64			regid_promisc_array[MLX4_MAX_PORTS + 1];
 	u64			regid_allmulti_array[MLX4_MAX_PORTS + 1];
 	struct mlx4_vf_dev     *dev_vfs;
+	int                     nvfs[MLX4_MAX_PORTS + 1];
 };
 
 struct mlx4_eqe {
-- 
cgit v1.1


From a12a601ed163578084a48708ae376805f79a1ccf Mon Sep 17 00:00:00 2001
From: Li RongQing <roy.qing.li@gmail.com>
Date: Tue, 30 Sep 2014 09:49:55 +0800
Subject: tcp: Change tcp_slow_start function to return void

No caller uses the return value, so make this function return void.

Signed-off-by: Li RongQing <roy.qing.li@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 545a79a..ba2f9d0 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -830,7 +830,7 @@ void tcp_get_available_congestion_control(char *buf, size_t len);
 void tcp_get_allowed_congestion_control(char *buf, size_t len);
 int tcp_set_allowed_congestion_control(char *allowed);
 int tcp_set_congestion_control(struct sock *sk, const char *name);
-int tcp_slow_start(struct tcp_sock *tp, u32 acked);
+void tcp_slow_start(struct tcp_sock *tp, u32 acked);
 void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w);
 
 u32 tcp_reno_ssthresh(struct sock *sk);
-- 
cgit v1.1


From d0bf4a9e92b9a93ffeeacbd7b6cb83e0ee3dc2ef Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 29 Sep 2014 13:29:15 -0700
Subject: net: cleanup and document skb fclone layout

Lets use a proper structure to clearly document and implement
skb fast clones.

Then, we might experiment more easily alternative layouts.

This patch adds a new skb_fclone_busy() helper, used by tcp and xfrm,
to stop leaking of implementation details.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 262efdb..d8f7d74 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -781,6 +781,31 @@ struct sk_buff *alloc_skb_with_frags(unsigned long header_len,
 				     int *errcode,
 				     gfp_t gfp_mask);
 
+/* Layout of fast clones : [skb1][skb2][fclone_ref] */
+struct sk_buff_fclones {
+	struct sk_buff	skb1;
+
+	struct sk_buff	skb2;
+
+	atomic_t	fclone_ref;
+};
+
+/**
+ *	skb_fclone_busy - check if fclone is busy
+ *	@skb: buffer
+ *
+ * Returns true is skb is a fast clone, and its clone is not freed.
+ */
+static inline bool skb_fclone_busy(const struct sk_buff *skb)
+{
+	const struct sk_buff_fclones *fclones;
+
+	fclones = container_of(skb, struct sk_buff_fclones, skb1);
+
+	return skb->fclone == SKB_FCLONE_ORIG &&
+	       fclones->skb2.fclone == SKB_FCLONE_CLONE;
+}
+
 static inline struct sk_buff *alloc_skb_fclone(unsigned int size,
 					       gfp_t priority)
 {
-- 
cgit v1.1


From 8bce6d7d0d1ede22af334ee241841e9278365278 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Mon, 29 Sep 2014 20:22:29 -0700
Subject: udp: Generalize skb_udp_segment

skb_udp_segment is the function called from udp4_ufo_fragment to
segment a UDP tunnel packet. This function currently assumes
segmentation is transparent Ethernet bridging (i.e. VXLAN
encapsulation). This patch generalizes the function to
operate on either Ethertype or IP protocol.

The inner_protocol field must be set to the protocol of the inner
header. This can now be either an Ethertype or an IP protocol
(in a union). A new flag in the skbuff indicates which type is
effective. skb_set_inner_protocol and skb_set_inner_ipproto
helper functions were added to set the inner_protocol. These
functions are called from the point where the tunnel encapsulation
is occuring.

When skb_udp_tunnel_segment is called, the function to segment the
inner packet is selected based on the inner IP or Ethertype. In the
case of an IP protocol encapsulation, the function is derived from
inet[6]_offloads. In the case of Ethertype, skb->protocol is
set to the inner_protocol and skb_mac_gso_segment is called. (GRE
currently does this, but it might be possible to lookup the protocol
in offload_base and call the appropriate segmenation function
directly).

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 26 ++++++++++++++++++++++++--
 include/net/udp.h      |  3 ++-
 2 files changed, 26 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index d8f7d74..7c5036d 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -596,7 +596,8 @@ struct sk_buff {
 	__u8			ndisc_nodetype:2;
 #endif
 	__u8			ipvs_property:1;
-	/* 5 or 7 bit hole */
+	__u8			inner_protocol_type:1;
+	/* 4 or 6 bit hole */
 
 #ifdef CONFIG_NET_SCHED
 	__u16			tc_index;	/* traffic control index */
@@ -632,7 +633,11 @@ struct sk_buff {
 		__u32		reserved_tailroom;
 	};
 
-	__be16			inner_protocol;
+	union {
+		__be16		inner_protocol;
+		__u8		inner_ipproto;
+	};
+
 	__u16			inner_transport_header;
 	__u16			inner_network_header;
 	__u16			inner_mac_header;
@@ -1762,6 +1767,23 @@ static inline void skb_reserve(struct sk_buff *skb, int len)
 	skb->tail += len;
 }
 
+#define ENCAP_TYPE_ETHER	0
+#define ENCAP_TYPE_IPPROTO	1
+
+static inline void skb_set_inner_protocol(struct sk_buff *skb,
+					  __be16 protocol)
+{
+	skb->inner_protocol = protocol;
+	skb->inner_protocol_type = ENCAP_TYPE_ETHER;
+}
+
+static inline void skb_set_inner_ipproto(struct sk_buff *skb,
+					 __u8 ipproto)
+{
+	skb->inner_ipproto = ipproto;
+	skb->inner_protocol_type = ENCAP_TYPE_IPPROTO;
+}
+
 static inline void skb_reset_inner_headers(struct sk_buff *skb)
 {
 	skb->inner_mac_header = skb->mac_header;
diff --git a/include/net/udp.h b/include/net/udp.h
index 16f4e80..07f9b70 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -239,7 +239,8 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg);
 int udp_disconnect(struct sock *sk, int flags);
 unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait);
 struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
-				       netdev_features_t features);
+				       netdev_features_t features,
+				       bool is_ipv6);
 int udp_lib_getsockopt(struct sock *sk, int level, int optname,
 		       char __user *optval, int __user *optlen);
 int udp_lib_setsockopt(struct sock *sk, int level, int optname,
-- 
cgit v1.1


From a0efb80ce3abacfd22a4284c3730924fc2f1f077 Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Tue, 30 Sep 2014 16:07:24 -0700
Subject: net_sched: avoid calling tcf_unbind_filter() in call_rcu callback

This fixes the following crash:

[   63.976822] general protection fault: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC
[   63.980094] CPU: 1 PID: 15 Comm: ksoftirqd/1 Not tainted 3.17.0-rc6+ #648
[   63.980094] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
[   63.980094] task: ffff880117dea690 ti: ffff880117dfc000 task.ti: ffff880117dfc000
[   63.980094] RIP: 0010:[<ffffffff817e6d07>]  [<ffffffff817e6d07>] u32_destroy_key+0x27/0x6d
[   63.980094] RSP: 0018:ffff880117dffcc0  EFLAGS: 00010202
[   63.980094] RAX: ffff880117dea690 RBX: ffff8800d02e0820 RCX: 0000000000000000
[   63.980094] RDX: 0000000000000001 RSI: 0000000000000002 RDI: 6b6b6b6b6b6b6b6b
[   63.980094] RBP: ffff880117dffcd0 R08: 0000000000000000 R09: 0000000000000000
[   63.980094] R10: 00006c0900006ba8 R11: 00006ba100006b9d R12: 0000000000000001
[   63.980094] R13: ffff8800d02e0898 R14: ffffffff817e6d4d R15: ffff880117387a30
[   63.980094] FS:  0000000000000000(0000) GS:ffff88011a800000(0000) knlGS:0000000000000000
[   63.980094] CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
[   63.980094] CR2: 00007f07e6732fed CR3: 000000011665b000 CR4: 00000000000006e0
[   63.980094] Stack:
[   63.980094]  ffff88011a9cd300 ffffffff82051ac0 ffff880117dffce0 ffffffff817e6d68
[   63.980094]  ffff880117dffd70 ffffffff810cb4c7 ffffffff810cb3cd ffff880117dfffd8
[   63.980094]  ffff880117dea690 ffff880117dea690 ffff880117dfffd8 000000000000000a
[   63.980094] Call Trace:
[   63.980094]  [<ffffffff817e6d68>] u32_delete_key_freepf_rcu+0x1b/0x1d
[   63.980094]  [<ffffffff810cb4c7>] rcu_process_callbacks+0x3bb/0x691
[   63.980094]  [<ffffffff810cb3cd>] ? rcu_process_callbacks+0x2c1/0x691
[   63.980094]  [<ffffffff817e6d4d>] ? u32_destroy_key+0x6d/0x6d
[   63.980094]  [<ffffffff810780a4>] __do_softirq+0x142/0x323
[   63.980094]  [<ffffffff810782a8>] run_ksoftirqd+0x23/0x53
[   63.980094]  [<ffffffff81092126>] smpboot_thread_fn+0x203/0x221
[   63.980094]  [<ffffffff81091f23>] ? smpboot_unpark_thread+0x33/0x33
[   63.980094]  [<ffffffff8108e44d>] kthread+0xc9/0xd1
[   63.980094]  [<ffffffff819e00ea>] ? do_wait_for_common+0xf8/0x125
[   63.980094]  [<ffffffff8108e384>] ? __kthread_parkme+0x61/0x61
[   63.980094]  [<ffffffff819e43ec>] ret_from_fork+0x7c/0xb0
[   63.980094]  [<ffffffff8108e384>] ? __kthread_parkme+0x61/0x61

tp could be freed in call_rcu callback too, the order is not guaranteed.

John Fastabend says:

====================
Its worth noting why this is safe. Any running schedulers will either
read the valid class field or it will be zeroed.

All schedulers today when the class is 0 do a lookup using the
same call used by the tcf_exts_bind(). So even if we have a running
classifier hit the null class pointer it will do a lookup and get
to the same result. This is particularly fragile at the moment because
the only way to verify this is to audit the schedulers call sites.
====================

Cc: John Fastabend <john.r.fastabend@intel.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Acked-by: John Fastabend <john.r.fastabend@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_cls.h | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 73f9532..ef44ad9 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -20,11 +20,7 @@ int unregister_tcf_proto_ops(struct tcf_proto_ops *ops);
 static inline unsigned long
 __cls_set_class(unsigned long *clp, unsigned long cl)
 {
-	unsigned long old_cl;
- 
-	old_cl = *clp;
-	*clp = cl;
-	return old_cl;
+	return xchg(clp, cl);
 }
 
 static inline unsigned long
-- 
cgit v1.1


From d068b02cfdfc27f5962ec82ec5568b706f599edc Mon Sep 17 00:00:00 2001
From: Petri Gynther <pgynther@google.com>
Date: Wed, 1 Oct 2014 11:58:02 -0700
Subject: net: phy: add BCM7425 and BCM7429 PHYs

Signed-off-by: Petri Gynther <pgynther@google.com>
Acked-by: Florian Fainelli <f.fainelli@gmai.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/brcmphy.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index 3f626fe..7ccd928 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -16,6 +16,8 @@
 #define PHY_ID_BCM7250			0xae025280
 #define PHY_ID_BCM7364			0xae025260
 #define PHY_ID_BCM7366			0x600d8490
+#define PHY_ID_BCM7425			0x03625e60
+#define PHY_ID_BCM7429			0x600d8730
 #define PHY_ID_BCM7439			0x600d8480
 #define PHY_ID_BCM7445			0x600d8510
 
-- 
cgit v1.1


From 51b0a5d8c21a91801bbef9bcc8639dc0b206c6cd Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 26 Sep 2014 14:35:14 +0200
Subject: netfilter: nft_reject: introduce icmp code abstraction for inet and
 bridge

This patch introduces the NFT_REJECT_ICMPX_UNREACH type which provides
an abstraction to the ICMP and ICMPv6 codes that you can use from the
inet and bridge tables, they are:

* NFT_REJECT_ICMPX_NO_ROUTE: no route to host - network unreachable
* NFT_REJECT_ICMPX_PORT_UNREACH: port unreachable
* NFT_REJECT_ICMPX_HOST_UNREACH: host unreachable
* NFT_REJECT_ICMPX_ADMIN_PROHIBITED: administratevely prohibited

You can still use the specific codes when restricting the rule to match
the corresponding layer 3 protocol.

I decided to not overload the existing NFT_REJECT_ICMP_UNREACH to have
different semantics depending on the table family and to allow the user
to specify ICMP family specific codes if they restrict it to the
corresponding family.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/ipv4/nf_reject.h   |  1 +
 include/net/netfilter/nft_reject.h       |  9 ++-------
 include/uapi/linux/netfilter/nf_tables.h | 21 +++++++++++++++++++++
 3 files changed, 24 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/ipv4/nf_reject.h b/include/net/netfilter/ipv4/nf_reject.h
index f713b5a..8ce0638 100644
--- a/include/net/netfilter/ipv4/nf_reject.h
+++ b/include/net/netfilter/ipv4/nf_reject.h
@@ -5,6 +5,7 @@
 #include <net/tcp.h>
 #include <net/route.h>
 #include <net/dst.h>
+#include <net/icmp.h>
 
 static inline void nf_send_unreach(struct sk_buff *skb_in, int code)
 {
diff --git a/include/net/netfilter/nft_reject.h b/include/net/netfilter/nft_reject.h
index 36b0da2..60fa153 100644
--- a/include/net/netfilter/nft_reject.h
+++ b/include/net/netfilter/nft_reject.h
@@ -14,12 +14,7 @@ int nft_reject_init(const struct nft_ctx *ctx,
 
 int nft_reject_dump(struct sk_buff *skb, const struct nft_expr *expr);
 
-void nft_reject_ipv4_eval(const struct nft_expr *expr,
-			  struct nft_data data[NFT_REG_MAX + 1],
-			  const struct nft_pktinfo *pkt);
-
-void nft_reject_ipv6_eval(const struct nft_expr *expr,
-			  struct nft_data data[NFT_REG_MAX + 1],
-			  const struct nft_pktinfo *pkt);
+int nft_reject_icmp_code(u8 code);
+int nft_reject_icmpv6_code(u8 code);
 
 #endif
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index b72ccfe..c26df67 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -749,13 +749,34 @@ enum nft_queue_attributes {
  *
  * @NFT_REJECT_ICMP_UNREACH: reject using ICMP unreachable
  * @NFT_REJECT_TCP_RST: reject using TCP RST
+ * @NFT_REJECT_ICMPX_UNREACH: abstracted ICMP unreachable for bridge and inet
  */
 enum nft_reject_types {
 	NFT_REJECT_ICMP_UNREACH,
 	NFT_REJECT_TCP_RST,
+	NFT_REJECT_ICMPX_UNREACH,
 };
 
 /**
+ * enum nft_reject_code - Generic reject codes for IPv4/IPv6
+ *
+ * @NFT_REJECT_ICMPX_NO_ROUTE: no route to host / network unreachable
+ * @NFT_REJECT_ICMPX_PORT_UNREACH: port unreachable
+ * @NFT_REJECT_ICMPX_HOST_UNREACH: host unreachable
+ * @NFT_REJECT_ICMPX_ADMIN_PROHIBITED: administratively prohibited
+ *
+ * These codes are mapped to real ICMP and ICMPv6 codes.
+ */
+enum nft_reject_inet_code {
+	NFT_REJECT_ICMPX_NO_ROUTE	= 0,
+	NFT_REJECT_ICMPX_PORT_UNREACH,
+	NFT_REJECT_ICMPX_HOST_UNREACH,
+	NFT_REJECT_ICMPX_ADMIN_PROHIBITED,
+	__NFT_REJECT_ICMPX_MAX
+};
+#define NFT_REJECT_ICMPX_MAX	(__NFT_REJECT_ICMPX_MAX + 1)
+
+/**
  * enum nft_reject_attributes - nf_tables reject expression netlink attributes
  *
  * @NFTA_REJECT_TYPE: packet type to use (NLA_U32: nft_reject_types)
-- 
cgit v1.1


From c8d7b98bec43faaa6583c3135030be5eb4693acb Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 26 Sep 2014 14:35:15 +0200
Subject: netfilter: move nf_send_resetX() code to nf_reject_ipvX modules

Move nf_send_reset() and nf_send_reset6() to nf_reject_ipv4 and
nf_reject_ipv6 respectively. This code is shared by x_tables and
nf_tables.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/ipv4/nf_reject.h | 118 +--------------------------------
 1 file changed, 1 insertion(+), 117 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/ipv4/nf_reject.h b/include/net/netfilter/ipv4/nf_reject.h
index 8ce0638..e842719 100644
--- a/include/net/netfilter/ipv4/nf_reject.h
+++ b/include/net/netfilter/ipv4/nf_reject.h
@@ -1,10 +1,6 @@
 #ifndef _IPV4_NF_REJECT_H
 #define _IPV4_NF_REJECT_H
 
-#include <net/ip.h>
-#include <net/tcp.h>
-#include <net/route.h>
-#include <net/dst.h>
 #include <net/icmp.h>
 
 static inline void nf_send_unreach(struct sk_buff *skb_in, int code)
@@ -12,118 +8,6 @@ static inline void nf_send_unreach(struct sk_buff *skb_in, int code)
 	icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0);
 }
 
-/* Send RST reply */
-static void nf_send_reset(struct sk_buff *oldskb, int hook)
-{
-	struct sk_buff *nskb;
-	const struct iphdr *oiph;
-	struct iphdr *niph;
-	const struct tcphdr *oth;
-	struct tcphdr _otcph, *tcph;
-
-	/* IP header checks: fragment. */
-	if (ip_hdr(oldskb)->frag_off & htons(IP_OFFSET))
-		return;
-
-	oth = skb_header_pointer(oldskb, ip_hdrlen(oldskb),
-				 sizeof(_otcph), &_otcph);
-	if (oth == NULL)
-		return;
-
-	/* No RST for RST. */
-	if (oth->rst)
-		return;
-
-	if (skb_rtable(oldskb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
-		return;
-
-	/* Check checksum */
-	if (nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), IPPROTO_TCP))
-		return;
-	oiph = ip_hdr(oldskb);
-
-	nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct tcphdr) +
-			 LL_MAX_HEADER, GFP_ATOMIC);
-	if (!nskb)
-		return;
-
-	skb_reserve(nskb, LL_MAX_HEADER);
-
-	skb_reset_network_header(nskb);
-	niph = (struct iphdr *)skb_put(nskb, sizeof(struct iphdr));
-	niph->version	= 4;
-	niph->ihl	= sizeof(struct iphdr) / 4;
-	niph->tos	= 0;
-	niph->id	= 0;
-	niph->frag_off	= htons(IP_DF);
-	niph->protocol	= IPPROTO_TCP;
-	niph->check	= 0;
-	niph->saddr	= oiph->daddr;
-	niph->daddr	= oiph->saddr;
-
-	skb_reset_transport_header(nskb);
-	tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr));
-	memset(tcph, 0, sizeof(*tcph));
-	tcph->source	= oth->dest;
-	tcph->dest	= oth->source;
-	tcph->doff	= sizeof(struct tcphdr) / 4;
-
-	if (oth->ack)
-		tcph->seq = oth->ack_seq;
-	else {
-		tcph->ack_seq = htonl(ntohl(oth->seq) + oth->syn + oth->fin +
-				      oldskb->len - ip_hdrlen(oldskb) -
-				      (oth->doff << 2));
-		tcph->ack = 1;
-	}
-
-	tcph->rst	= 1;
-	tcph->check = ~tcp_v4_check(sizeof(struct tcphdr), niph->saddr,
-				    niph->daddr, 0);
-	nskb->ip_summed = CHECKSUM_PARTIAL;
-	nskb->csum_start = (unsigned char *)tcph - nskb->head;
-	nskb->csum_offset = offsetof(struct tcphdr, check);
-
-	/* ip_route_me_harder expects skb->dst to be set */
-	skb_dst_set_noref(nskb, skb_dst(oldskb));
-
-	nskb->protocol = htons(ETH_P_IP);
-	if (ip_route_me_harder(nskb, RTN_UNSPEC))
-		goto free_nskb;
-
-	niph->ttl	= ip4_dst_hoplimit(skb_dst(nskb));
-
-	/* "Never happens" */
-	if (nskb->len > dst_mtu(skb_dst(nskb)))
-		goto free_nskb;
-
-	nf_ct_attach(nskb, oldskb);
-
-#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
-	/* If we use ip_local_out for bridged traffic, the MAC source on
-	 * the RST will be ours, instead of the destination's.  This confuses
-	 * some routers/firewalls, and they drop the packet.  So we need to
-	 * build the eth header using the original destination's MAC as the
-	 * source, and send the RST packet directly.
-	 */
-	if (oldskb->nf_bridge) {
-		struct ethhdr *oeth = eth_hdr(oldskb);
-		nskb->dev = oldskb->nf_bridge->physindev;
-		niph->tot_len = htons(nskb->len);
-		ip_send_check(niph);
-		if (dev_hard_header(nskb, nskb->dev, ntohs(nskb->protocol),
-				    oeth->h_source, oeth->h_dest, nskb->len) < 0)
-			goto free_nskb;
-		dev_queue_xmit(nskb);
-	} else
-#endif
-		ip_local_out(nskb);
-
-	return;
-
- free_nskb:
-	kfree_skb(nskb);
-}
-
+void nf_send_reset(struct sk_buff *oldskb, int hook);
 
 #endif /* _IPV4_NF_REJECT_H */
-- 
cgit v1.1


From 4b7fd5d97ee6e599247b4a55122ca6ba80c8148d Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Thu, 2 Oct 2014 11:13:21 +0200
Subject: netfilter: explicit module dependency between br_netfilter and
 physdev

You can use physdev to match the physical interface enslaved to the
bridge device. This information is stored in skb->nf_bridge and it is
set up by br_netfilter. So, this is only available when iptables is
used from the bridge netfilter path.

Since 34666d4 ("netfilter: bridge: move br_netfilter out of the core"),
the br_netfilter code is modular. To reduce the impact of this change,
we can autoload the br_netfilter if the physdev match is used since
we assume that the users need br_netfilter in place.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/br_netfilter.h | 6 ++++++
 1 file changed, 6 insertions(+)
 create mode 100644 include/net/netfilter/br_netfilter.h

(limited to 'include')

diff --git a/include/net/netfilter/br_netfilter.h b/include/net/netfilter/br_netfilter.h
new file mode 100644
index 0000000..2aa6048
--- /dev/null
+++ b/include/net/netfilter/br_netfilter.h
@@ -0,0 +1,6 @@
+#ifndef _BR_NETFILTER_H_
+#define _BR_NETFILTER_H_
+
+void br_netfilter_enable(void);
+
+#endif /* _BR_NETFILTER_H_ */
-- 
cgit v1.1


From 07dcc686fa8f6667dec4696804cdb43a90267b9a Mon Sep 17 00:00:00 2001
From: Simon Horman <horms@verge.net.au>
Date: Tue, 30 Sep 2014 10:50:06 +0900
Subject: ipvs: Clean up comment style in ip_vs.h

* Consistently use the multi-line comment style for networking code:

  /* This
   * That
   * The other thing
   */

* Use single-line comment style for comments with only one line of text.

* In general follow the leading '*' of each line of a comment with a
  single space and then text.

* Add missing line break between functions, remove double line break,
  align comments to previous lines whenever possible.

Reported-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Signed-off-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/ip_vs.h | 214 ++++++++++++++++++----------------------------------
 1 file changed, 75 insertions(+), 139 deletions(-)

(limited to 'include')

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 576d7f0..615b20b 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -1,6 +1,5 @@
-/*
- *      IP Virtual Server
- *      data structure and functionality definitions
+/* IP Virtual Server
+ * data structure and functionality definitions
  */
 
 #ifndef _NET_IP_VS_H
@@ -12,7 +11,7 @@
 
 #include <linux/list.h>                 /* for struct list_head */
 #include <linux/spinlock.h>             /* for struct rwlock_t */
-#include <linux/atomic.h>                 /* for struct atomic_t */
+#include <linux/atomic.h>               /* for struct atomic_t */
 #include <linux/compiler.h>
 #include <linux/timer.h>
 #include <linux/bug.h>
@@ -30,15 +29,13 @@
 #endif
 #include <net/net_namespace.h>		/* Netw namespace */
 
-/*
- * Generic access of ipvs struct
- */
+/* Generic access of ipvs struct */
 static inline struct netns_ipvs *net_ipvs(struct net* net)
 {
 	return net->ipvs;
 }
-/*
- * Get net ptr from skb in traffic cases
+
+/* Get net ptr from skb in traffic cases
  * use skb_sknet when call is from userland (ioctl or netlink)
  */
 static inline struct net *skb_net(const struct sk_buff *skb)
@@ -90,8 +87,8 @@ static inline struct net *skb_sknet(const struct sk_buff *skb)
 	return &init_net;
 #endif
 }
-/*
- * This one needed for single_open_net since net is stored directly in
+
+/* This one needed for single_open_net since net is stored directly in
  * private not as a struct i.e. seq_file_net can't be used.
  */
 static inline struct net *seq_file_single_net(struct seq_file *seq)
@@ -108,7 +105,7 @@ extern int ip_vs_conn_tab_size;
 
 struct ip_vs_iphdr {
 	__u32 len;	/* IPv4 simply where L4 starts
-			   IPv6 where L4 Transport Header starts */
+			 * IPv6 where L4 Transport Header starts */
 	__u16 fragoffs; /* IPv6 fragment offset, 0 if first frag (or not frag)*/
 	__s16 protocol;
 	__s32 flags;
@@ -304,16 +301,11 @@ static inline const char *ip_vs_dbg_addr(int af, char *buf, size_t buf_len,
 #define LeaveFunction(level)   do {} while (0)
 #endif
 
-
-/*
- *      The port number of FTP service (in network order).
- */
+/* The port number of FTP service (in network order). */
 #define FTPPORT  cpu_to_be16(21)
 #define FTPDATA  cpu_to_be16(20)
 
-/*
- *      TCP State Values
- */
+/* TCP State Values */
 enum {
 	IP_VS_TCP_S_NONE = 0,
 	IP_VS_TCP_S_ESTABLISHED,
@@ -329,25 +321,19 @@ enum {
 	IP_VS_TCP_S_LAST
 };
 
-/*
- *	UDP State Values
- */
+/* UDP State Values */
 enum {
 	IP_VS_UDP_S_NORMAL,
 	IP_VS_UDP_S_LAST,
 };
 
-/*
- *	ICMP State Values
- */
+/* ICMP State Values */
 enum {
 	IP_VS_ICMP_S_NORMAL,
 	IP_VS_ICMP_S_LAST,
 };
 
-/*
- *	SCTP State Values
- */
+/* SCTP State Values */
 enum ip_vs_sctp_states {
 	IP_VS_SCTP_S_NONE,
 	IP_VS_SCTP_S_INIT1,
@@ -366,21 +352,18 @@ enum ip_vs_sctp_states {
 	IP_VS_SCTP_S_LAST
 };
 
-/*
- *	Delta sequence info structure
- *	Each ip_vs_conn has 2 (output AND input seq. changes).
- *      Only used in the VS/NAT.
+/* Delta sequence info structure
+ * Each ip_vs_conn has 2 (output AND input seq. changes).
+ * Only used in the VS/NAT.
  */
 struct ip_vs_seq {
 	__u32			init_seq;	/* Add delta from this seq */
 	__u32			delta;		/* Delta in sequence numbers */
 	__u32			previous_delta;	/* Delta in sequence numbers
-						   before last resized pkt */
+						 * before last resized pkt */
 };
 
-/*
- * counters per cpu
- */
+/* counters per cpu */
 struct ip_vs_counters {
 	__u32		conns;		/* connections scheduled */
 	__u32		inpkts;		/* incoming packets */
@@ -388,17 +371,13 @@ struct ip_vs_counters {
 	__u64		inbytes;	/* incoming bytes */
 	__u64		outbytes;	/* outgoing bytes */
 };
-/*
- * Stats per cpu
- */
+/* Stats per cpu */
 struct ip_vs_cpu_stats {
 	struct ip_vs_counters   ustats;
 	struct u64_stats_sync   syncp;
 };
 
-/*
- *	IPVS statistics objects
- */
+/* IPVS statistics objects */
 struct ip_vs_estimator {
 	struct list_head	list;
 
@@ -491,9 +470,7 @@ struct ip_vs_protocol {
 	void (*timeout_change)(struct ip_vs_proto_data *pd, int flags);
 };
 
-/*
- * protocol data per netns
- */
+/* protocol data per netns */
 struct ip_vs_proto_data {
 	struct ip_vs_proto_data	*next;
 	struct ip_vs_protocol	*pp;
@@ -520,9 +497,7 @@ struct ip_vs_conn_param {
 	__u8				pe_data_len;
 };
 
-/*
- *	IP_VS structure allocated for each dynamically scheduled connection
- */
+/* IP_VS structure allocated for each dynamically scheduled connection */
 struct ip_vs_conn {
 	struct hlist_node	c_list;         /* hashed list heads */
 	/* Protocol, addresses and port numbers */
@@ -561,17 +536,18 @@ struct ip_vs_conn {
 	struct ip_vs_dest       *dest;          /* real server */
 	atomic_t                in_pkts;        /* incoming packet counter */
 
-	/* packet transmitter for different forwarding methods.  If it
-	   mangles the packet, it must return NF_DROP or better NF_STOLEN,
-	   otherwise this must be changed to a sk_buff **.
-	   NF_ACCEPT can be returned when destination is local.
+	/* Packet transmitter for different forwarding methods.  If it
+	 * mangles the packet, it must return NF_DROP or better NF_STOLEN,
+	 * otherwise this must be changed to a sk_buff **.
+	 * NF_ACCEPT can be returned when destination is local.
 	 */
 	int (*packet_xmit)(struct sk_buff *skb, struct ip_vs_conn *cp,
 			   struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph);
 
 	/* Note: we can group the following members into a structure,
-	   in order to save more space, and the following members are
-	   only used in VS/NAT anyway */
+	 * in order to save more space, and the following members are
+	 * only used in VS/NAT anyway
+	 */
 	struct ip_vs_app        *app;           /* bound ip_vs_app object */
 	void                    *app_data;      /* Application private data */
 	struct ip_vs_seq        in_seq;         /* incoming seq. struct */
@@ -584,9 +560,7 @@ struct ip_vs_conn {
 	struct rcu_head		rcu_head;
 };
 
-/*
- *  To save some memory in conn table when name space is disabled.
- */
+/* To save some memory in conn table when name space is disabled. */
 static inline struct net *ip_vs_conn_net(const struct ip_vs_conn *cp)
 {
 #ifdef CONFIG_NET_NS
@@ -595,6 +569,7 @@ static inline struct net *ip_vs_conn_net(const struct ip_vs_conn *cp)
 	return &init_net;
 #endif
 }
+
 static inline void ip_vs_conn_net_set(struct ip_vs_conn *cp, struct net *net)
 {
 #ifdef CONFIG_NET_NS
@@ -612,13 +587,12 @@ static inline int ip_vs_conn_net_eq(const struct ip_vs_conn *cp,
 #endif
 }
 
-/*
- *	Extended internal versions of struct ip_vs_service_user and
- *	ip_vs_dest_user for IPv6 support.
+/* Extended internal versions of struct ip_vs_service_user and ip_vs_dest_user
+ * for IPv6 support.
  *
- *	We need these to conveniently pass around service and destination
- *	options, but unfortunately, we also need to keep the old definitions to
- *	maintain userspace backwards compatibility for the setsockopt interface.
+ * We need these to conveniently pass around service and destination
+ * options, but unfortunately, we also need to keep the old definitions to
+ * maintain userspace backwards compatibility for the setsockopt interface.
  */
 struct ip_vs_service_user_kern {
 	/* virtual service addresses */
@@ -656,8 +630,8 @@ struct ip_vs_dest_user_kern {
 
 
 /*
- *	The information about the virtual service offered to the net
- *	and the forwarding entries
+ * The information about the virtual service offered to the net and the
+ * forwarding entries.
  */
 struct ip_vs_service {
 	struct hlist_node	s_list;   /* for normal service table */
@@ -697,9 +671,8 @@ struct ip_vs_dest_dst {
 	struct rcu_head		rcu_head;
 };
 
-/*
- *	The real server destination forwarding entry
- *	with ip address, port number, and so on.
+/* The real server destination forwarding entry with ip address, port number,
+ * and so on.
  */
 struct ip_vs_dest {
 	struct list_head	n_list;   /* for the dests in the service */
@@ -738,10 +711,7 @@ struct ip_vs_dest {
 	unsigned int		in_rs_table:1;	/* we are in rs_table */
 };
 
-
-/*
- *	The scheduler object
- */
+/* The scheduler object */
 struct ip_vs_scheduler {
 	struct list_head	n_list;		/* d-linked list head */
 	char			*name;		/* scheduler name */
@@ -781,9 +751,7 @@ struct ip_vs_pe {
 	int (*show_pe_data)(const struct ip_vs_conn *cp, char *buf);
 };
 
-/*
- *	The application module object (a.k.a. app incarnation)
- */
+/* The application module object (a.k.a. app incarnation) */
 struct ip_vs_app {
 	struct list_head	a_list;		/* member in app list */
 	int			type;		/* IP_VS_APP_TYPE_xxx */
@@ -799,16 +767,14 @@ struct ip_vs_app {
 	atomic_t		usecnt;		/* usage counter */
 	struct rcu_head		rcu_head;
 
-	/*
-	 * output hook: Process packet in inout direction, diff set for TCP.
+	/* output hook: Process packet in inout direction, diff set for TCP.
 	 * Return: 0=Error, 1=Payload Not Mangled/Mangled but checksum is ok,
 	 *	   2=Mangled but checksum was not updated
 	 */
 	int (*pkt_out)(struct ip_vs_app *, struct ip_vs_conn *,
 		       struct sk_buff *, int *diff);
 
-	/*
-	 * input hook: Process packet in outin direction, diff set for TCP.
+	/* input hook: Process packet in outin direction, diff set for TCP.
 	 * Return: 0=Error, 1=Payload Not Mangled/Mangled but checksum is ok,
 	 *	   2=Mangled but checksum was not updated
 	 */
@@ -867,9 +833,7 @@ struct ipvs_master_sync_state {
 struct netns_ipvs {
 	int			gen;		/* Generation */
 	int			enable;		/* enable like nf_hooks do */
-	/*
-	 *	Hash table: for real service lookups
-	 */
+	/* Hash table: for real service lookups */
 	#define IP_VS_RTAB_BITS 4
 	#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
 	#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
@@ -903,7 +867,7 @@ struct netns_ipvs {
 	struct list_head	sctp_apps[SCTP_APP_TAB_SIZE];
 #endif
 	/* ip_vs_conn */
-	atomic_t		conn_count;      /*  connection counter */
+	atomic_t		conn_count;      /* connection counter */
 
 	/* ip_vs_ctl */
 	struct ip_vs_stats		tot_stats;  /* Statistics & est. */
@@ -990,9 +954,9 @@ struct netns_ipvs {
 	char			backup_mcast_ifn[IP_VS_IFNAME_MAXLEN];
 	/* net name space ptr */
 	struct net		*net;            /* Needed by timer routines */
-	/* Number of heterogeneous destinations, needed because
-	 * heterogeneous are not supported when synchronization is
-	 * enabled */
+	/* Number of heterogeneous destinations, needed becaus heterogeneous
+	 * are not supported when synchronization is enabled.
+	 */
 	unsigned int		mixed_address_family_dests;
 };
 
@@ -1147,9 +1111,8 @@ static inline int sysctl_backup_only(struct netns_ipvs *ipvs)
 
 #endif
 
-/*
- *      IPVS core functions
- *      (from ip_vs_core.c)
+/* IPVS core functions
+ * (from ip_vs_core.c)
  */
 const char *ip_vs_proto_name(unsigned int proto);
 void ip_vs_init_hash_table(struct list_head *table, int rows);
@@ -1157,11 +1120,9 @@ void ip_vs_init_hash_table(struct list_head *table, int rows);
 
 #define IP_VS_APP_TYPE_FTP	1
 
-/*
- *     ip_vs_conn handling functions
- *     (from ip_vs_conn.c)
+/* ip_vs_conn handling functions
+ * (from ip_vs_conn.c)
  */
-
 enum {
 	IP_VS_DIR_INPUT = 0,
 	IP_VS_DIR_OUTPUT,
@@ -1292,9 +1253,7 @@ ip_vs_control_add(struct ip_vs_conn *cp, struct ip_vs_conn *ctl_cp)
 	atomic_inc(&ctl_cp->n_control);
 }
 
-/*
- * IPVS netns init & cleanup functions
- */
+/* IPVS netns init & cleanup functions */
 int ip_vs_estimator_net_init(struct net *net);
 int ip_vs_control_net_init(struct net *net);
 int ip_vs_protocol_net_init(struct net *net);
@@ -1309,9 +1268,8 @@ void ip_vs_estimator_net_cleanup(struct net *net);
 void ip_vs_sync_net_cleanup(struct net *net);
 void ip_vs_service_net_cleanup(struct net *net);
 
-/*
- *      IPVS application functions
- *      (from ip_vs_app.c)
+/* IPVS application functions
+ * (from ip_vs_app.c)
  */
 #define IP_VS_APP_MAX_PORTS  8
 struct ip_vs_app *register_ip_vs_app(struct net *net, struct ip_vs_app *app);
@@ -1331,9 +1289,7 @@ int unregister_ip_vs_pe(struct ip_vs_pe *pe);
 struct ip_vs_pe *ip_vs_pe_getbyname(const char *name);
 struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name);
 
-/*
- * Use a #define to avoid all of module.h just for these trivial ops
- */
+/* Use a #define to avoid all of module.h just for these trivial ops */
 #define ip_vs_pe_get(pe)			\
 	if (pe && pe->module)			\
 		__module_get(pe->module);
@@ -1342,9 +1298,7 @@ struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name);
 	if (pe && pe->module)			\
 		module_put(pe->module);
 
-/*
- *	IPVS protocol functions (from ip_vs_proto.c)
- */
+/* IPVS protocol functions (from ip_vs_proto.c) */
 int ip_vs_protocol_init(void);
 void ip_vs_protocol_cleanup(void);
 void ip_vs_protocol_timeout_change(struct netns_ipvs *ipvs, int flags);
@@ -1362,9 +1316,8 @@ extern struct ip_vs_protocol ip_vs_protocol_esp;
 extern struct ip_vs_protocol ip_vs_protocol_ah;
 extern struct ip_vs_protocol ip_vs_protocol_sctp;
 
-/*
- *      Registering/unregistering scheduler functions
- *      (from ip_vs_sched.c)
+/* Registering/unregistering scheduler functions
+ * (from ip_vs_sched.c)
  */
 int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler);
 int unregister_ip_vs_scheduler(struct ip_vs_scheduler *scheduler);
@@ -1383,10 +1336,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
 
 void ip_vs_scheduler_err(struct ip_vs_service *svc, const char *msg);
 
-
-/*
- *      IPVS control data and functions (from ip_vs_ctl.c)
- */
+/* IPVS control data and functions (from ip_vs_ctl.c) */
 extern struct ip_vs_stats ip_vs_stats;
 extern int sysctl_ip_vs_sync_ver;
 
@@ -1427,26 +1377,21 @@ static inline void ip_vs_dest_put_and_free(struct ip_vs_dest *dest)
 		kfree(dest);
 }
 
-/*
- *      IPVS sync daemon data and function prototypes
- *      (from ip_vs_sync.c)
+/* IPVS sync daemon data and function prototypes
+ * (from ip_vs_sync.c)
  */
 int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid);
 int stop_sync_thread(struct net *net, int state);
 void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp, int pkts);
 
-/*
- *      IPVS rate estimator prototypes (from ip_vs_est.c)
- */
+/* IPVS rate estimator prototypes (from ip_vs_est.c) */
 void ip_vs_start_estimator(struct net *net, struct ip_vs_stats *stats);
 void ip_vs_stop_estimator(struct net *net, struct ip_vs_stats *stats);
 void ip_vs_zero_estimator(struct ip_vs_stats *stats);
 void ip_vs_read_estimator(struct ip_vs_stats_user *dst,
 			  struct ip_vs_stats *stats);
 
-/*
- *	Various IPVS packet transmitters (from ip_vs_xmit.c)
- */
+/* Various IPVS packet transmitters (from ip_vs_xmit.c) */
 int ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 		    struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph);
 int ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
@@ -1477,12 +1422,10 @@ int ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 #endif
 
 #ifdef CONFIG_SYSCTL
-/*
- *	This is a simple mechanism to ignore packets when
- *	we are loaded. Just set ip_vs_drop_rate to 'n' and
- *	we start to drop 1/rate of the packets
+/* This is a simple mechanism to ignore packets when
+ * we are loaded. Just set ip_vs_drop_rate to 'n' and
+ * we start to drop 1/rate of the packets
  */
-
 static inline int ip_vs_todrop(struct netns_ipvs *ipvs)
 {
 	if (!ipvs->drop_rate)
@@ -1496,9 +1439,7 @@ static inline int ip_vs_todrop(struct netns_ipvs *ipvs)
 static inline int ip_vs_todrop(struct netns_ipvs *ipvs) { return 0; }
 #endif
 
-/*
- *      ip_vs_fwd_tag returns the forwarding tag of the connection
- */
+/* ip_vs_fwd_tag returns the forwarding tag of the connection */
 #define IP_VS_FWD_METHOD(cp)  (cp->flags & IP_VS_CONN_F_FWD_MASK)
 
 static inline char ip_vs_fwd_tag(struct ip_vs_conn *cp)
@@ -1557,9 +1498,7 @@ static inline __wsum ip_vs_check_diff2(__be16 old, __be16 new, __wsum oldsum)
 	return csum_partial(diff, sizeof(diff), oldsum);
 }
 
-/*
- * Forget current conntrack (unconfirmed) and attach notrack entry
- */
+/* Forget current conntrack (unconfirmed) and attach notrack entry */
 static inline void ip_vs_notrack(struct sk_buff *skb)
 {
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
@@ -1576,9 +1515,8 @@ static inline void ip_vs_notrack(struct sk_buff *skb)
 }
 
 #ifdef CONFIG_IP_VS_NFCT
-/*
- *      Netfilter connection tracking
- *      (from ip_vs_nfct.c)
+/* Netfilter connection tracking
+ * (from ip_vs_nfct.c)
  */
 static inline int ip_vs_conntrack_enabled(struct netns_ipvs *ipvs)
 {
@@ -1617,14 +1555,12 @@ static inline int ip_vs_confirm_conntrack(struct sk_buff *skb)
 static inline void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp)
 {
 }
-/* CONFIG_IP_VS_NFCT */
-#endif
+#endif /* CONFIG_IP_VS_NFCT */
 
 static inline int
 ip_vs_dest_conn_overhead(struct ip_vs_dest *dest)
 {
-	/*
-	 * We think the overhead of processing active connections is 256
+	/* We think the overhead of processing active connections is 256
 	 * times higher than that of inactive connections in average. (This
 	 * 256 times might not be accurate, we will change it later) We
 	 * use the following formula to estimate the overhead now:
-- 
cgit v1.1


From dba4b74d2da8798626e2b702ad3f452671e335f7 Mon Sep 17 00:00:00 2001
From: Vladimir Kondratiev <qca_vkondrat@qca.qualcomm.com>
Date: Wed, 1 Oct 2014 15:05:25 +0300
Subject: wil6210: atomic I/O for the card memory

Introduce netdev IOCTLs, to be used by the debug tools.

Allows to read/write single dword value or
memory block, aligned to dword
Different address modes supported:
- BAR offset
- Firmware "linker" address
- target's AHB bus

Signed-off-by: Vladimir Kondratiev <qca_vkondrat@qca.qualcomm.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/uapi/linux/wil6210_uapi.h | 87 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 87 insertions(+)
 create mode 100644 include/uapi/linux/wil6210_uapi.h

(limited to 'include')

diff --git a/include/uapi/linux/wil6210_uapi.h b/include/uapi/linux/wil6210_uapi.h
new file mode 100644
index 0000000..6a3cddd
--- /dev/null
+++ b/include/uapi/linux/wil6210_uapi.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2014 Qualcomm Atheros, Inc.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef __WIL6210_UAPI_H__
+#define __WIL6210_UAPI_H__
+
+#if !defined(__KERNEL__)
+#define __user
+#endif
+
+#include <linux/sockios.h>
+
+/* Numbers SIOCDEVPRIVATE and SIOCDEVPRIVATE + 1
+ * are used by Android devices to implement PNO (preferred network offload).
+ * Albeit it is temporary solution, use different numbers to avoid conflicts
+ */
+
+/**
+ * Perform 32-bit I/O operation to the card memory
+ *
+ * User code should arrange data in memory like this:
+ *
+ *	struct wil_memio io;
+ *	struct ifreq ifr = {
+ *		.ifr_data = &io,
+ *	};
+ */
+#define WIL_IOCTL_MEMIO (SIOCDEVPRIVATE + 2)
+
+/**
+ * Perform block I/O operation to the card memory
+ *
+ * User code should arrange data in memory like this:
+ *
+ *	void *buf;
+ *	struct wil_memio_block io = {
+ *		.block = buf,
+ *	};
+ *	struct ifreq ifr = {
+ *		.ifr_data = &io,
+ *	};
+ */
+#define WIL_IOCTL_MEMIO_BLOCK (SIOCDEVPRIVATE + 3)
+
+/**
+ * operation to perform
+ *
+ * @wil_mmio_op_mask - bits defining operation,
+ * @wil_mmio_addr_mask - bits defining addressing mode
+ */
+enum wil_memio_op {
+	wil_mmio_read = 0,
+	wil_mmio_write = 1,
+	wil_mmio_op_mask = 0xff,
+	wil_mmio_addr_linker = 0 << 8,
+	wil_mmio_addr_ahb = 1 << 8,
+	wil_mmio_addr_bar = 2 << 8,
+	wil_mmio_addr_mask = 0xff00,
+};
+
+struct wil_memio {
+	uint32_t op; /* enum wil_memio_op */
+	uint32_t addr; /* should be 32-bit aligned */
+	uint32_t val;
+};
+
+struct wil_memio_block {
+	uint32_t op; /* enum wil_memio_op */
+	uint32_t addr; /* should be 32-bit aligned */
+	uint32_t size; /* should be multiple of 4 */
+	void __user *block; /* block address */
+};
+
+#endif /* __WIL6210_UAPI_H__ */
-- 
cgit v1.1


From 5772e9a3463b264cee5a4e73ef586ad482d7ba48 Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Wed, 1 Oct 2014 22:35:59 +0200
Subject: qdisc: bulk dequeue support for qdiscs with TCQ_F_ONETXQUEUE

Based on DaveM's recent API work on dev_hard_start_xmit(), that allows
sending/processing an entire skb list.

This patch implements qdisc bulk dequeue, by allowing multiple packets
to be dequeued in dequeue_skb().

The optimization principle for this is two fold, (1) to amortize
locking cost and (2) avoid expensive tailptr update for notifying HW.
 (1) Several packets are dequeued while holding the qdisc root_lock,
amortizing locking cost over several packet.  The dequeued SKB list is
processed under the TXQ lock in dev_hard_start_xmit(), thus also
amortizing the cost of the TXQ lock.
 (2) Further more, dev_hard_start_xmit() will utilize the skb->xmit_more
API to delay HW tailptr update, which also reduces the cost per
packet.

One restriction of the new API is that every SKB must belong to the
same TXQ.  This patch takes the easy way out, by restricting bulk
dequeue to qdisc's with the TCQ_F_ONETXQUEUE flag, that specifies the
qdisc only have attached a single TXQ.

Some detail about the flow; dev_hard_start_xmit() will process the skb
list, and transmit packets individually towards the driver (see
xmit_one()).  In case the driver stops midway in the list, the
remaining skb list is returned by dev_hard_start_xmit().  In
sch_direct_xmit() this returned list is requeued by dev_requeue_skb().

To avoid overshooting the HW limits, which results in requeuing, the
patch limits the amount of bytes dequeued, based on the drivers BQL
limits.  In-effect bulking will only happen for BQL enabled drivers.

Small amounts for extra HoL blocking (2x MTU/0.24ms) were
measured at 100Mbit/s, with bulking 8 packets, but the
oscillating nature of the measurement indicate something, like
sched latency might be causing this effect. More comparisons
show, that this oscillation goes away occationally. Thus, we
disregard this artifact completely and remove any "magic" bulking
limit.

For now, as a conservative approach, stop bulking when seeing TSO and
segmented GSO packets.  They already benefit from bulking on their own.
A followup patch add this, to allow easier bisect-ability for finding
regressions.

Jointed work with Hannes, Daniel and Florian.

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'include')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index f126698..d17ed6f 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -7,6 +7,7 @@
 #include <linux/pkt_sched.h>
 #include <linux/pkt_cls.h>
 #include <linux/percpu.h>
+#include <linux/dynamic_queue_limits.h>
 #include <net/gen_stats.h>
 #include <net/rtnetlink.h>
 
@@ -119,6 +120,21 @@ static inline void qdisc_run_end(struct Qdisc *qdisc)
 	qdisc->__state &= ~__QDISC___STATE_RUNNING;
 }
 
+static inline bool qdisc_may_bulk(const struct Qdisc *qdisc)
+{
+	return qdisc->flags & TCQ_F_ONETXQUEUE;
+}
+
+static inline int qdisc_avail_bulklimit(const struct netdev_queue *txq)
+{
+#ifdef CONFIG_BQL
+	/* Non-BQL migrated drivers will return 0, too. */
+	return dql_avail(&txq->dql);
+#else
+	return 0;
+#endif
+}
+
 static inline bool qdisc_is_throttled(const struct Qdisc *qdisc)
 {
 	return test_bit(__QDISC_STATE_THROTTLED, &qdisc->state) ? true : false;
-- 
cgit v1.1


From 55a93b3ea780908b7d1b3a8cf1976223a9268d78 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 3 Oct 2014 15:31:07 -0700
Subject: qdisc: validate skb without holding lock

Validation of skb can be pretty expensive :

GSO segmentation and/or checksum computations.

We can do this without holding qdisc lock, so that other cpus
can queue additional packets.

Trick is that requeued packets were already validated, so we carry
a boolean so that sch_direct_xmit() can validate a fresh skb list,
or directly use an old one.

Tested on 40Gb NIC (8 TX queues) and 200 concurrent flows, 48 threads
host.

Turning TSO on or off had no effect on throughput, only few more cpu
cycles. Lock contention on qdisc lock disappeared.

Same if disabling TX checksum offload.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 2 +-
 include/net/pkt_sched.h   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 9b7fbac..910fb17 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2821,7 +2821,7 @@ int dev_set_mac_address(struct net_device *, struct sockaddr *);
 int dev_change_carrier(struct net_device *, bool new_carrier);
 int dev_get_phys_port_id(struct net_device *dev,
 			 struct netdev_phys_port_id *ppid);
-struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev);
+struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev);
 struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 				    struct netdev_queue *txq, int *ret);
 int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index 8bbe626..e4b3c82 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -99,7 +99,7 @@ void qdisc_put_stab(struct qdisc_size_table *tab);
 void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc);
 int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
 		    struct net_device *dev, struct netdev_queue *txq,
-		    spinlock_t *root_lock);
+		    spinlock_t *root_lock, bool validate);
 
 void __qdisc_run(struct Qdisc *q);
 
-- 
cgit v1.1


From c7a08ac7ee68b9af0d5af99c7b34b574cac4d144 Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@mellanox.com>
Date: Thu, 2 Oct 2014 12:19:42 +0300
Subject: net/mlx5_core: Update device capabilities handling

Rearrange struct mlx5_caps so it has a "gen" field to represent the current
capabilities configured for the device. Max capabilities can also be queried
from the device. Also update capabilities struct to contain more fields as per
the latest revision if firmware specification.

Signed-off-by: Eli Cohen <eli@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx5/device.h | 24 +++++++++++++++---------
 include/linux/mlx5/driver.h | 28 +++++++++++++++++++++++++---
 2 files changed, 40 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 3349471..dce01fd 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -71,6 +71,11 @@ enum {
 };
 
 enum {
+	MLX5_MIN_PKEY_TABLE_SIZE = 128,
+	MLX5_MAX_LOG_PKEY_TABLE  = 5,
+};
+
+enum {
 	MLX5_PERM_LOCAL_READ	= 1 << 2,
 	MLX5_PERM_LOCAL_WRITE	= 1 << 3,
 	MLX5_PERM_REMOTE_READ	= 1 << 4,
@@ -184,10 +189,10 @@ enum {
 	MLX5_DEV_CAP_FLAG_CQ_MODER	= 1LL << 29,
 	MLX5_DEV_CAP_FLAG_RESIZE_CQ	= 1LL << 30,
 	MLX5_DEV_CAP_FLAG_RESIZE_SRQ	= 1LL << 32,
+	MLX5_DEV_CAP_FLAG_DCT		= 1LL << 37,
 	MLX5_DEV_CAP_FLAG_REMOTE_FENCE	= 1LL << 38,
 	MLX5_DEV_CAP_FLAG_TLP_HINTS	= 1LL << 39,
 	MLX5_DEV_CAP_FLAG_SIG_HAND_OVER	= 1LL << 40,
-	MLX5_DEV_CAP_FLAG_DCT		= 1LL << 41,
 	MLX5_DEV_CAP_FLAG_CMDIF_CSUM	= 3LL << 46,
 };
 
@@ -243,10 +248,14 @@ enum {
 };
 
 enum {
-	MLX5_CAP_OFF_DCT		= 41,
 	MLX5_CAP_OFF_CMDIF_CSUM		= 46,
 };
 
+enum {
+	HCA_CAP_OPMOD_GET_MAX	= 0,
+	HCA_CAP_OPMOD_GET_CUR	= 1,
+};
+
 struct mlx5_inbox_hdr {
 	__be16		opcode;
 	u8		rsvd[4];
@@ -303,9 +312,10 @@ struct mlx5_hca_cap {
 	u8	log_max_ra_req_qp;
 	u8	rsvd10;
 	u8	log_max_ra_res_qp;
-	u8	rsvd11[4];
+	u8	pad_cap;
+	u8	rsvd11[3];
 	__be16	max_qp_count;
-	__be16	rsvd12;
+	__be16	pkey_table_size;
 	u8	rsvd13;
 	u8	local_ca_ack_delay;
 	u8	rsvd14;
@@ -335,11 +345,7 @@ struct mlx5_hca_cap {
 	u8	log_max_xrcd;
 	u8	rsvd25[42];
 	__be16  log_uar_page_sz;
-	u8	rsvd26[28];
-	u8	log_max_atomic_size_qp;
-	u8	rsvd27[2];
-	u8	log_max_atomic_size_dc;
-	u8	rsvd28[76];
+	u8	rsvd26[108];
 };
 
 
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index b88e9b4..45a2add 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -335,23 +335,30 @@ struct mlx5_port_caps {
 	int	pkey_table_len;
 };
 
-struct mlx5_caps {
+struct mlx5_general_caps {
 	u8	log_max_eq;
 	u8	log_max_cq;
 	u8	log_max_qp;
 	u8	log_max_mkey;
 	u8	log_max_pd;
 	u8	log_max_srq;
+	u8	log_max_strq;
+	u8	log_max_mrw_sz;
+	u8	log_max_bsf_list_size;
+	u8	log_max_klm_list_size;
 	u32	max_cqes;
 	int	max_wqes;
+	u32	max_eqes;
+	u32	max_indirection;
 	int	max_sq_desc_sz;
 	int	max_rq_desc_sz;
+	int	max_dc_sq_desc_sz;
 	u64	flags;
 	u16	stat_rate_support;
 	int	log_max_msg;
 	int	num_ports;
-	int	max_ra_res_qp;
-	int	max_ra_req_qp;
+	u8	log_max_ra_res_qp;
+	u8	log_max_ra_req_qp;
 	int	max_srq_wqes;
 	int	bf_reg_size;
 	int	bf_regs_per_page;
@@ -363,6 +370,19 @@ struct mlx5_caps {
 	u8	log_max_mcg;
 	u32	max_qp_mcg;
 	int	min_page_sz;
+	int	pd_cap;
+	u32	max_qp_counters;
+	u32	pkey_table_size;
+	u8	log_max_ra_req_dc;
+	u8	log_max_ra_res_dc;
+	u32	uar_sz;
+	u8	min_log_pg_sz;
+	u8	log_max_xrcd;
+	u16	log_uar_page_sz;
+};
+
+struct mlx5_caps {
+	struct mlx5_general_caps gen;
 };
 
 struct mlx5_cmd_mailbox {
@@ -695,6 +715,8 @@ void mlx5_cmd_cleanup(struct mlx5_core_dev *dev);
 void mlx5_cmd_use_events(struct mlx5_core_dev *dev);
 void mlx5_cmd_use_polling(struct mlx5_core_dev *dev);
 int mlx5_cmd_status_to_err(struct mlx5_outbox_hdr *hdr);
+int mlx5_core_get_caps(struct mlx5_core_dev *dev, struct mlx5_caps *caps,
+		       u16 opmod);
 int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
 		  int out_size);
 int mlx5_cmd_exec_cb(struct mlx5_core_dev *dev, void *in, int in_size,
-- 
cgit v1.1


From d29b796adada8780db3512c4a34b339f9aeef1ae Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@mellanox.com>
Date: Thu, 2 Oct 2014 12:19:43 +0300
Subject: net/mlx5_core: Use hardware registers description header file

Add an auto generated header file that describes hardware registers along with
set of macros that set/get values. The macros do static checks to avoid
overflow, handle endianess, and overall provide a clean way to code commands.
Currently the header file is small and we will add structs as we make use of
the macros.
A few commands were removed from the commands enum since they are not supported
currently and will be added when support is available.

Signed-off-by: Eli Cohen <eli@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx5/device.h   |  44 +++++++++++++
 include/linux/mlx5/driver.h   |  76 +---------------------
 include/linux/mlx5/mlx5_ifc.h | 143 ++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 188 insertions(+), 75 deletions(-)
 create mode 100644 include/linux/mlx5/mlx5_ifc.h

(limited to 'include')

diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index dce01fd..0032687 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -44,6 +44,50 @@
 #error Host endianness not defined
 #endif
 
+/* helper macros */
+#define __mlx5_nullp(typ) ((struct mlx5_ifc_##typ##_bits *)0)
+#define __mlx5_bit_sz(typ, fld) sizeof(__mlx5_nullp(typ)->fld)
+#define __mlx5_bit_off(typ, fld) ((unsigned)(unsigned long)(&(__mlx5_nullp(typ)->fld)))
+#define __mlx5_dw_off(typ, fld) (__mlx5_bit_off(typ, fld) / 32)
+#define __mlx5_64_off(typ, fld) (__mlx5_bit_off(typ, fld) / 64)
+#define __mlx5_dw_bit_off(typ, fld) (32 - __mlx5_bit_sz(typ, fld) - (__mlx5_bit_off(typ, fld) & 0x1f))
+#define __mlx5_mask(typ, fld) ((u32)((1ull << __mlx5_bit_sz(typ, fld)) - 1))
+#define __mlx5_dw_mask(typ, fld) (__mlx5_mask(typ, fld) << __mlx5_dw_bit_off(typ, fld))
+#define __mlx5_st_sz_bits(typ) sizeof(struct mlx5_ifc_##typ##_bits)
+
+#define MLX5_FLD_SZ_BYTES(typ, fld) (__mlx5_bit_sz(typ, fld) / 8)
+#define MLX5_ST_SZ_BYTES(typ) (sizeof(struct mlx5_ifc_##typ##_bits) / 8)
+#define MLX5_ST_SZ_DW(typ) (sizeof(struct mlx5_ifc_##typ##_bits) / 32)
+#define MLX5_BYTE_OFF(typ, fld) (__mlx5_bit_off(typ, fld) / 8)
+#define MLX5_ADDR_OF(typ, p, fld) ((char *)(p) + MLX5_BYTE_OFF(typ, fld))
+
+/* insert a value to a struct */
+#define MLX5_SET(typ, p, fld, v) do { \
+	BUILD_BUG_ON(__mlx5_st_sz_bits(typ) % 32);             \
+	*((__be32 *)(p) + __mlx5_dw_off(typ, fld)) = \
+	cpu_to_be32((be32_to_cpu(*((__be32 *)(p) + __mlx5_dw_off(typ, fld))) & \
+		     (~__mlx5_dw_mask(typ, fld))) | (((v) & __mlx5_mask(typ, fld)) \
+		     << __mlx5_dw_bit_off(typ, fld))); \
+} while (0)
+
+#define MLX5_GET(typ, p, fld) ((be32_to_cpu(*((__be32 *)(p) +\
+__mlx5_dw_off(typ, fld))) >> __mlx5_dw_bit_off(typ, fld)) & \
+__mlx5_mask(typ, fld))
+
+#define MLX5_GET_PR(typ, p, fld) ({ \
+	u32 ___t = MLX5_GET(typ, p, fld); \
+	pr_debug(#fld " = 0x%x\n", ___t); \
+	___t; \
+})
+
+#define MLX5_SET64(typ, p, fld, v) do { \
+	BUILD_BUG_ON(__mlx5_bit_sz(typ, fld) != 64); \
+	BUILD_BUG_ON(__mlx5_bit_off(typ, fld) % 64); \
+	*((__be64 *)(p) + __mlx5_64_off(typ, fld)) = cpu_to_be64(v); \
+} while (0)
+
+#define MLX5_GET64(typ, p, fld) be64_to_cpu(*((__be64 *)(p) + __mlx5_64_off(typ, fld)))
+
 enum {
 	MLX5_MAX_COMMANDS		= 32,
 	MLX5_CMD_DATA_BLOCK_SIZE	= 512,
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 45a2add..6f48dc7 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -44,6 +44,7 @@
 
 #include <linux/mlx5/device.h>
 #include <linux/mlx5/doorbell.h>
+#include <linux/mlx5/mlx5_ifc.h>
 
 enum {
 	MLX5_BOARD_ID_LEN = 64,
@@ -99,81 +100,6 @@ enum {
 };
 
 enum {
-	MLX5_CMD_OP_QUERY_HCA_CAP		= 0x100,
-	MLX5_CMD_OP_QUERY_ADAPTER		= 0x101,
-	MLX5_CMD_OP_INIT_HCA			= 0x102,
-	MLX5_CMD_OP_TEARDOWN_HCA		= 0x103,
-	MLX5_CMD_OP_ENABLE_HCA			= 0x104,
-	MLX5_CMD_OP_DISABLE_HCA			= 0x105,
-	MLX5_CMD_OP_QUERY_PAGES			= 0x107,
-	MLX5_CMD_OP_MANAGE_PAGES		= 0x108,
-	MLX5_CMD_OP_SET_HCA_CAP			= 0x109,
-
-	MLX5_CMD_OP_CREATE_MKEY			= 0x200,
-	MLX5_CMD_OP_QUERY_MKEY			= 0x201,
-	MLX5_CMD_OP_DESTROY_MKEY		= 0x202,
-	MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS	= 0x203,
-
-	MLX5_CMD_OP_CREATE_EQ			= 0x301,
-	MLX5_CMD_OP_DESTROY_EQ			= 0x302,
-	MLX5_CMD_OP_QUERY_EQ			= 0x303,
-
-	MLX5_CMD_OP_CREATE_CQ			= 0x400,
-	MLX5_CMD_OP_DESTROY_CQ			= 0x401,
-	MLX5_CMD_OP_QUERY_CQ			= 0x402,
-	MLX5_CMD_OP_MODIFY_CQ			= 0x403,
-
-	MLX5_CMD_OP_CREATE_QP			= 0x500,
-	MLX5_CMD_OP_DESTROY_QP			= 0x501,
-	MLX5_CMD_OP_RST2INIT_QP			= 0x502,
-	MLX5_CMD_OP_INIT2RTR_QP			= 0x503,
-	MLX5_CMD_OP_RTR2RTS_QP			= 0x504,
-	MLX5_CMD_OP_RTS2RTS_QP			= 0x505,
-	MLX5_CMD_OP_SQERR2RTS_QP		= 0x506,
-	MLX5_CMD_OP_2ERR_QP			= 0x507,
-	MLX5_CMD_OP_RTS2SQD_QP			= 0x508,
-	MLX5_CMD_OP_SQD2RTS_QP			= 0x509,
-	MLX5_CMD_OP_2RST_QP			= 0x50a,
-	MLX5_CMD_OP_QUERY_QP			= 0x50b,
-	MLX5_CMD_OP_CONF_SQP			= 0x50c,
-	MLX5_CMD_OP_MAD_IFC			= 0x50d,
-	MLX5_CMD_OP_INIT2INIT_QP		= 0x50e,
-	MLX5_CMD_OP_SUSPEND_QP			= 0x50f,
-	MLX5_CMD_OP_UNSUSPEND_QP		= 0x510,
-	MLX5_CMD_OP_SQD2SQD_QP			= 0x511,
-	MLX5_CMD_OP_ALLOC_QP_COUNTER_SET	= 0x512,
-	MLX5_CMD_OP_DEALLOC_QP_COUNTER_SET	= 0x513,
-	MLX5_CMD_OP_QUERY_QP_COUNTER_SET	= 0x514,
-
-	MLX5_CMD_OP_CREATE_PSV			= 0x600,
-	MLX5_CMD_OP_DESTROY_PSV			= 0x601,
-	MLX5_CMD_OP_QUERY_PSV			= 0x602,
-	MLX5_CMD_OP_QUERY_SIG_RULE_TABLE	= 0x603,
-	MLX5_CMD_OP_QUERY_BLOCK_SIZE_TABLE	= 0x604,
-
-	MLX5_CMD_OP_CREATE_SRQ			= 0x700,
-	MLX5_CMD_OP_DESTROY_SRQ			= 0x701,
-	MLX5_CMD_OP_QUERY_SRQ			= 0x702,
-	MLX5_CMD_OP_ARM_RQ			= 0x703,
-	MLX5_CMD_OP_RESIZE_SRQ			= 0x704,
-
-	MLX5_CMD_OP_ALLOC_PD			= 0x800,
-	MLX5_CMD_OP_DEALLOC_PD			= 0x801,
-	MLX5_CMD_OP_ALLOC_UAR			= 0x802,
-	MLX5_CMD_OP_DEALLOC_UAR			= 0x803,
-
-	MLX5_CMD_OP_ATTACH_TO_MCG		= 0x806,
-	MLX5_CMD_OP_DETACH_FROM_MCG		= 0x807,
-
-
-	MLX5_CMD_OP_ALLOC_XRCD			= 0x80e,
-	MLX5_CMD_OP_DEALLOC_XRCD		= 0x80f,
-
-	MLX5_CMD_OP_ACCESS_REG			= 0x805,
-	MLX5_CMD_OP_MAX				= 0x810,
-};
-
-enum {
 	MLX5_REG_PCAP		 = 0x5001,
 	MLX5_REG_PMTU		 = 0x5003,
 	MLX5_REG_PTYS		 = 0x5004,
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
new file mode 100644
index 0000000..df3bd9b
--- /dev/null
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -0,0 +1,143 @@
+/*
+ * Copyright (c) 2014, Mellanox Technologies inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef MLX5_IFC_H
+#define MLX5_IFC_H
+
+enum {
+	MLX5_CMD_OP_QUERY_HCA_CAP                 = 0x100,
+	MLX5_CMD_OP_QUERY_ADAPTER                 = 0x101,
+	MLX5_CMD_OP_INIT_HCA                      = 0x102,
+	MLX5_CMD_OP_TEARDOWN_HCA                  = 0x103,
+	MLX5_CMD_OP_ENABLE_HCA                    = 0x104,
+	MLX5_CMD_OP_DISABLE_HCA                   = 0x105,
+	MLX5_CMD_OP_QUERY_PAGES                   = 0x107,
+	MLX5_CMD_OP_MANAGE_PAGES                  = 0x108,
+	MLX5_CMD_OP_SET_HCA_CAP                   = 0x109,
+	MLX5_CMD_OP_CREATE_MKEY                   = 0x200,
+	MLX5_CMD_OP_QUERY_MKEY                    = 0x201,
+	MLX5_CMD_OP_DESTROY_MKEY                  = 0x202,
+	MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS        = 0x203,
+	MLX5_CMD_OP_PAGE_FAULT_RESUME             = 0x204,
+	MLX5_CMD_OP_CREATE_EQ                     = 0x301,
+	MLX5_CMD_OP_DESTROY_EQ                    = 0x302,
+	MLX5_CMD_OP_QUERY_EQ                      = 0x303,
+	MLX5_CMD_OP_GEN_EQE                       = 0x304,
+	MLX5_CMD_OP_CREATE_CQ                     = 0x400,
+	MLX5_CMD_OP_DESTROY_CQ                    = 0x401,
+	MLX5_CMD_OP_QUERY_CQ                      = 0x402,
+	MLX5_CMD_OP_MODIFY_CQ                     = 0x403,
+	MLX5_CMD_OP_CREATE_QP                     = 0x500,
+	MLX5_CMD_OP_DESTROY_QP                    = 0x501,
+	MLX5_CMD_OP_RST2INIT_QP                   = 0x502,
+	MLX5_CMD_OP_INIT2RTR_QP                   = 0x503,
+	MLX5_CMD_OP_RTR2RTS_QP                    = 0x504,
+	MLX5_CMD_OP_RTS2RTS_QP                    = 0x505,
+	MLX5_CMD_OP_SQERR2RTS_QP                  = 0x506,
+	MLX5_CMD_OP_2ERR_QP                       = 0x507,
+	MLX5_CMD_OP_2RST_QP                       = 0x50a,
+	MLX5_CMD_OP_QUERY_QP                      = 0x50b,
+	MLX5_CMD_OP_INIT2INIT_QP                  = 0x50e,
+	MLX5_CMD_OP_CREATE_PSV                    = 0x600,
+	MLX5_CMD_OP_DESTROY_PSV                   = 0x601,
+	MLX5_CMD_OP_CREATE_SRQ                    = 0x700,
+	MLX5_CMD_OP_DESTROY_SRQ                   = 0x701,
+	MLX5_CMD_OP_QUERY_SRQ                     = 0x702,
+	MLX5_CMD_OP_ARM_RQ                        = 0x703,
+	MLX5_CMD_OP_RESIZE_SRQ                    = 0x704,
+	MLX5_CMD_OP_CREATE_DCT                    = 0x710,
+	MLX5_CMD_OP_DESTROY_DCT                   = 0x711,
+	MLX5_CMD_OP_DRAIN_DCT                     = 0x712,
+	MLX5_CMD_OP_QUERY_DCT                     = 0x713,
+	MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION     = 0x714,
+	MLX5_CMD_OP_QUERY_VPORT_STATE             = 0x750,
+	MLX5_CMD_OP_MODIFY_VPORT_STATE            = 0x751,
+	MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT       = 0x752,
+	MLX5_CMD_OP_MODIFY_ESW_VPORT_CONTEXT      = 0x753,
+	MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT       = 0x754,
+	MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT      = 0x755,
+	MLX5_CMD_OP_QUERY_RCOE_ADDRESS            = 0x760,
+	MLX5_CMD_OP_SET_ROCE_ADDRESS              = 0x761,
+	MLX5_CMD_OP_QUERY_VPORT_COUNTER           = 0x770,
+	MLX5_CMD_OP_ALLOC_Q_COUNTER               = 0x771,
+	MLX5_CMD_OP_DEALLOC_Q_COUNTER             = 0x772,
+	MLX5_CMD_OP_QUERY_Q_COUNTER               = 0x773,
+	MLX5_CMD_OP_ALLOC_PD                      = 0x800,
+	MLX5_CMD_OP_DEALLOC_PD                    = 0x801,
+	MLX5_CMD_OP_ALLOC_UAR                     = 0x802,
+	MLX5_CMD_OP_DEALLOC_UAR                   = 0x803,
+	MLX5_CMD_OP_CONFIG_INT_MODERATION         = 0x804,
+	MLX5_CMD_OP_ACCESS_REG                    = 0x805,
+	MLX5_CMD_OP_ATTACH_TO_MCG                 = 0x806,
+	MLX5_CMD_OP_DETACH_FROM_MCG               = 0x807,
+	MLX5_CMD_OP_GET_DROPPED_PACKET_LOG        = 0x80a,
+	MLX5_CMD_OP_MAD_IFC                       = 0x50d,
+	MLX5_CMD_OP_QUERY_MAD_DEMUX               = 0x80b,
+	MLX5_CMD_OP_SET_MAD_DEMUX                 = 0x80c,
+	MLX5_CMD_OP_NOP                           = 0x80d,
+	MLX5_CMD_OP_ALLOC_XRCD                    = 0x80e,
+	MLX5_CMD_OP_DEALLOC_XRCD                  = 0x80f,
+	MLX5_CMD_OP_SET_BURST_SIZE                = 0x812,
+	MLX5_CMD_OP_QUERY_BURST_SZIE              = 0x813,
+	MLX5_CMD_OP_ACTIVATE_TRACER               = 0x814,
+	MLX5_CMD_OP_DEACTIVATE_TRACER             = 0x815,
+	MLX5_CMD_OP_CREATE_SNIFFER_RULE           = 0x820,
+	MLX5_CMD_OP_DESTROY_SNIFFER_RULE          = 0x821,
+	MLX5_CMD_OP_QUERY_CONG_PARAMS             = 0x822,
+	MLX5_CMD_OP_MODIFY_CONG_PARAMS            = 0x823,
+	MLX5_CMD_OP_QUERY_CONG_STATISTICS         = 0x824,
+	MLX5_CMD_OP_CREATE_TIR                    = 0x900,
+	MLX5_CMD_OP_MODIFY_TIR                    = 0x901,
+	MLX5_CMD_OP_DESTROY_TIR                   = 0x902,
+	MLX5_CMD_OP_QUERY_TIR                     = 0x903,
+	MLX5_CMD_OP_CREATE_TIS                    = 0x912,
+	MLX5_CMD_OP_MODIFY_TIS                    = 0x913,
+	MLX5_CMD_OP_DESTROY_TIS                   = 0x914,
+	MLX5_CMD_OP_QUERY_TIS                     = 0x915,
+	MLX5_CMD_OP_CREATE_SQ                     = 0x904,
+	MLX5_CMD_OP_MODIFY_SQ                     = 0x905,
+	MLX5_CMD_OP_DESTROY_SQ                    = 0x906,
+	MLX5_CMD_OP_QUERY_SQ                      = 0x907,
+	MLX5_CMD_OP_CREATE_RQ                     = 0x908,
+	MLX5_CMD_OP_MODIFY_RQ                     = 0x909,
+	MLX5_CMD_OP_DESTROY_RQ                    = 0x90a,
+	MLX5_CMD_OP_QUERY_RQ                      = 0x90b,
+	MLX5_CMD_OP_CREATE_RMP                    = 0x90c,
+	MLX5_CMD_OP_MODIFY_RMP                    = 0x90d,
+	MLX5_CMD_OP_DESTROY_RMP                   = 0x90e,
+	MLX5_CMD_OP_QUERY_RMP                     = 0x90f,
+	MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY          = 0x910,
+	MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY        = 0x911,
+	MLX5_CMD_OP_MAX				  = 0x911
+};
+
+#endif /* MLX5_IFC_H */
-- 
cgit v1.1


From b775516b042f9e35f856bd2914afefd9d23021d7 Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@mellanox.com>
Date: Thu, 2 Oct 2014 12:19:44 +0300
Subject: net/mlx5_core: use set/get macros in device caps

Transform device capabilities related commands to use set/get macros to
manipulate command mailboxes.

Signed-off-by: Eli Cohen <eli@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx5/device.h   |  92 -------------------
 include/linux/mlx5/driver.h   |   1 +
 include/linux/mlx5/mlx5_ifc.h | 206 ++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 207 insertions(+), 92 deletions(-)

(limited to 'include')

diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 0032687..1d67fd3 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -327,98 +327,6 @@ struct mlx5_cmd_query_adapter_mbox_out {
 	u8			vsd_psid[16];
 };
 
-struct mlx5_hca_cap {
-	u8	rsvd1[16];
-	u8	log_max_srq_sz;
-	u8	log_max_qp_sz;
-	u8	rsvd2;
-	u8	log_max_qp;
-	u8	log_max_strq_sz;
-	u8	log_max_srqs;
-	u8	rsvd4[2];
-	u8	rsvd5;
-	u8	log_max_cq_sz;
-	u8	rsvd6;
-	u8	log_max_cq;
-	u8	log_max_eq_sz;
-	u8	log_max_mkey;
-	u8	rsvd7;
-	u8	log_max_eq;
-	u8	max_indirection;
-	u8	log_max_mrw_sz;
-	u8	log_max_bsf_list_sz;
-	u8	log_max_klm_list_sz;
-	u8	rsvd_8_0;
-	u8	log_max_ra_req_dc;
-	u8	rsvd_8_1;
-	u8	log_max_ra_res_dc;
-	u8	rsvd9;
-	u8	log_max_ra_req_qp;
-	u8	rsvd10;
-	u8	log_max_ra_res_qp;
-	u8	pad_cap;
-	u8	rsvd11[3];
-	__be16	max_qp_count;
-	__be16	pkey_table_size;
-	u8	rsvd13;
-	u8	local_ca_ack_delay;
-	u8	rsvd14;
-	u8	num_ports;
-	u8	log_max_msg;
-	u8	rsvd15[3];
-	__be16	stat_rate_support;
-	u8	rsvd16[2];
-	__be64	flags;
-	u8	rsvd17;
-	u8	uar_sz;
-	u8	rsvd18;
-	u8	log_pg_sz;
-	__be16	bf_log_bf_reg_size;
-	u8	rsvd19[4];
-	__be16	max_desc_sz_sq;
-	u8	rsvd20[2];
-	__be16	max_desc_sz_rq;
-	u8	rsvd21[2];
-	__be16	max_desc_sz_sq_dc;
-	__be32	max_qp_mcg;
-	u8	rsvd22[3];
-	u8	log_max_mcg;
-	u8	rsvd23;
-	u8	log_max_pd;
-	u8	rsvd24;
-	u8	log_max_xrcd;
-	u8	rsvd25[42];
-	__be16  log_uar_page_sz;
-	u8	rsvd26[108];
-};
-
-
-struct mlx5_cmd_query_hca_cap_mbox_in {
-	struct mlx5_inbox_hdr	hdr;
-	u8			rsvd[8];
-};
-
-
-struct mlx5_cmd_query_hca_cap_mbox_out {
-	struct mlx5_outbox_hdr	hdr;
-	u8			rsvd0[8];
-	struct mlx5_hca_cap     hca_cap;
-};
-
-
-struct mlx5_cmd_set_hca_cap_mbox_in {
-	struct mlx5_inbox_hdr	hdr;
-	u8			rsvd[8];
-	struct mlx5_hca_cap     hca_cap;
-};
-
-
-struct mlx5_cmd_set_hca_cap_mbox_out {
-	struct mlx5_outbox_hdr	hdr;
-	u8			rsvd0[8];
-};
-
-
 struct mlx5_cmd_init_hca_mbox_in {
 	struct mlx5_inbox_hdr	hdr;
 	u8			rsvd0[2];
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 6f48dc7..c439f9c 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -641,6 +641,7 @@ void mlx5_cmd_cleanup(struct mlx5_core_dev *dev);
 void mlx5_cmd_use_events(struct mlx5_core_dev *dev);
 void mlx5_cmd_use_polling(struct mlx5_core_dev *dev);
 int mlx5_cmd_status_to_err(struct mlx5_outbox_hdr *hdr);
+int mlx5_cmd_status_to_err_v2(void *ptr);
 int mlx5_core_get_caps(struct mlx5_core_dev *dev, struct mlx5_caps *caps,
 		       u16 opmod);
 int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index df3bd9b..5f48b8f 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -140,4 +140,210 @@ enum {
 	MLX5_CMD_OP_MAX				  = 0x911
 };
 
+struct mlx5_ifc_cmd_hca_cap_bits {
+	u8         reserved_0[0x80];
+
+	u8         log_max_srq_sz[0x8];
+	u8         log_max_qp_sz[0x8];
+	u8         reserved_1[0xb];
+	u8         log_max_qp[0x5];
+
+	u8         log_max_strq_sz[0x8];
+	u8         reserved_2[0x3];
+	u8         log_max_srqs[0x5];
+	u8         reserved_3[0x10];
+
+	u8         reserved_4[0x8];
+	u8         log_max_cq_sz[0x8];
+	u8         reserved_5[0xb];
+	u8         log_max_cq[0x5];
+
+	u8         log_max_eq_sz[0x8];
+	u8         reserved_6[0x2];
+	u8         log_max_mkey[0x6];
+	u8         reserved_7[0xc];
+	u8         log_max_eq[0x4];
+
+	u8         max_indirection[0x8];
+	u8         reserved_8[0x1];
+	u8         log_max_mrw_sz[0x7];
+	u8         reserved_9[0x2];
+	u8         log_max_bsf_list_size[0x6];
+	u8         reserved_10[0x2];
+	u8         log_max_klm_list_size[0x6];
+
+	u8         reserved_11[0xa];
+	u8         log_max_ra_req_dc[0x6];
+	u8         reserved_12[0xa];
+	u8         log_max_ra_res_dc[0x6];
+
+	u8         reserved_13[0xa];
+	u8         log_max_ra_req_qp[0x6];
+	u8         reserved_14[0xa];
+	u8         log_max_ra_res_qp[0x6];
+
+	u8         pad_cap[0x1];
+	u8         cc_query_allowed[0x1];
+	u8         cc_modify_allowed[0x1];
+	u8         reserved_15[0x1d];
+
+	u8         reserved_16[0x6];
+	u8         max_qp_cnt[0xa];
+	u8         pkey_table_size[0x10];
+
+	u8         eswitch_owner[0x1];
+	u8         reserved_17[0xa];
+	u8         local_ca_ack_delay[0x5];
+	u8         reserved_18[0x8];
+	u8         num_ports[0x8];
+
+	u8         reserved_19[0x3];
+	u8         log_max_msg[0x5];
+	u8         reserved_20[0x18];
+
+	u8         stat_rate_support[0x10];
+	u8         reserved_21[0x10];
+
+	u8         reserved_22[0x10];
+	u8         cmdif_checksum[0x2];
+	u8         sigerr_cqe[0x1];
+	u8         reserved_23[0x1];
+	u8         wq_signature[0x1];
+	u8         sctr_data_cqe[0x1];
+	u8         reserved_24[0x1];
+	u8         sho[0x1];
+	u8         tph[0x1];
+	u8         rf[0x1];
+	u8         dc[0x1];
+	u8         reserved_25[0x2];
+	u8         roce[0x1];
+	u8         atomic[0x1];
+	u8         rsz_srq[0x1];
+
+	u8         cq_oi[0x1];
+	u8         cq_resize[0x1];
+	u8         cq_moderation[0x1];
+	u8         sniffer_rule_flow[0x1];
+	u8         sniffer_rule_vport[0x1];
+	u8         sniffer_rule_phy[0x1];
+	u8         reserved_26[0x1];
+	u8         pg[0x1];
+	u8         block_lb_mc[0x1];
+	u8         reserved_27[0x3];
+	u8         cd[0x1];
+	u8         reserved_28[0x1];
+	u8         apm[0x1];
+	u8         reserved_29[0x7];
+	u8         qkv[0x1];
+	u8         pkv[0x1];
+	u8         reserved_30[0x4];
+	u8         xrc[0x1];
+	u8         ud[0x1];
+	u8         uc[0x1];
+	u8         rc[0x1];
+
+	u8         reserved_31[0xa];
+	u8         uar_sz[0x6];
+	u8         reserved_32[0x8];
+	u8         log_pg_sz[0x8];
+
+	u8         bf[0x1];
+	u8         reserved_33[0xa];
+	u8         log_bf_reg_size[0x5];
+	u8         reserved_34[0x10];
+
+	u8         reserved_35[0x10];
+	u8         max_wqe_sz_sq[0x10];
+
+	u8         reserved_36[0x10];
+	u8         max_wqe_sz_rq[0x10];
+
+	u8         reserved_37[0x10];
+	u8         max_wqe_sz_sq_dc[0x10];
+
+	u8         reserved_38[0x7];
+	u8         max_qp_mcg[0x19];
+
+	u8         reserved_39[0x18];
+	u8         log_max_mcg[0x8];
+
+	u8         reserved_40[0xb];
+	u8         log_max_pd[0x5];
+	u8         reserved_41[0xb];
+	u8         log_max_xrcd[0x5];
+
+	u8         reserved_42[0x20];
+
+	u8         reserved_43[0x3];
+	u8         log_max_rq[0x5];
+	u8         reserved_44[0x3];
+	u8         log_max_sq[0x5];
+	u8         reserved_45[0x3];
+	u8         log_max_tir[0x5];
+	u8         reserved_46[0x3];
+	u8         log_max_tis[0x5];
+
+	u8         reserved_47[0x13];
+	u8         log_max_rq_per_tir[0x5];
+	u8         reserved_48[0x3];
+	u8         log_max_tis_per_sq[0x5];
+
+	u8         reserved_49[0xe0];
+
+	u8         reserved_50[0x10];
+	u8         log_uar_page_sz[0x10];
+
+	u8         reserved_51[0x100];
+
+	u8         reserved_52[0x1f];
+	u8         cqe_zip[0x1];
+
+	u8         cqe_zip_timeout[0x10];
+	u8         cqe_zip_max_num[0x10];
+
+	u8         reserved_53[0x220];
+};
+
+struct mlx5_ifc_set_hca_cap_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x40];
+
+	struct mlx5_ifc_cmd_hca_cap_bits hca_capability_struct;
+};
+
+struct mlx5_ifc_query_hca_cap_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x40];
+};
+
+struct mlx5_ifc_query_hca_cap_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+
+	u8         capability_struct[256][0x8];
+};
+
+struct mlx5_ifc_set_hca_cap_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+};
+
 #endif /* MLX5_IFC_H */
-- 
cgit v1.1


From 5903325a64834211daf63a62db3b35ee580cb8bf Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@mellanox.com>
Date: Thu, 2 Oct 2014 12:19:45 +0300
Subject: net/mlx5_core: Identify resources by their type

This patch puts a common part as the first field of mlx5_core_qp. This field is
used to identify which resource generated an event. This is required since upcoming
new resource types such as DC targets are allocated for the same numerical space
as regular QPs and may generate the same events. By searching the resource in the
same table we can then look at the common field to identify the resource.

Signed-off-by: Eli Cohen <eli@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx5/driver.h | 13 ++++++++++++-
 include/linux/mlx5/qp.h     |  3 +--
 2 files changed, 13 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index c439f9c..246310d 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -375,6 +375,16 @@ struct mlx5_core_mr {
 	u32			pd;
 };
 
+enum mlx5_res_type {
+	MLX5_RES_QP,
+};
+
+struct mlx5_core_rsc_common {
+	enum mlx5_res_type	res;
+	atomic_t		refcount;
+	struct completion	free;
+};
+
 struct mlx5_core_srq {
 	u32		srqn;
 	int		max;
@@ -700,7 +710,7 @@ int mlx5_eq_init(struct mlx5_core_dev *dev);
 void mlx5_eq_cleanup(struct mlx5_core_dev *dev);
 void mlx5_fill_page_array(struct mlx5_buf *buf, __be64 *pas);
 void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn);
-void mlx5_qp_event(struct mlx5_core_dev *dev, u32 qpn, int event_type);
+void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type);
 void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type);
 struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn);
 void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, unsigned long vector);
@@ -737,6 +747,7 @@ void mlx5_cmdif_debugfs_cleanup(struct mlx5_core_dev *dev);
 int mlx5_core_create_psv(struct mlx5_core_dev *dev, u32 pdn,
 			 int npsvs, u32 *sig_index);
 int mlx5_core_destroy_psv(struct mlx5_core_dev *dev, int psv_num);
+void mlx5_core_put_rsc(struct mlx5_core_rsc_common *common);
 
 static inline u32 mlx5_mkey_to_idx(u32 mkey)
 {
diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h
index 9709b30..7c4c0f1f 100644
--- a/include/linux/mlx5/qp.h
+++ b/include/linux/mlx5/qp.h
@@ -342,10 +342,9 @@ struct mlx5_stride_block_ctrl_seg {
 };
 
 struct mlx5_core_qp {
+	struct mlx5_core_rsc_common	common; /* must be first */
 	void (*event)		(struct mlx5_core_qp *, int);
 	int			qpn;
-	atomic_t		refcount;
-	struct completion	free;
 	struct mlx5_rsc_debug	*dbg;
 	int			pid;
 };
-- 
cgit v1.1


From efc98d08e1ec4fd131f794370b274dceaf32c958 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Fri, 3 Oct 2014 15:48:08 -0700
Subject: fou: eliminate IPv4,v6 specific GRO functions

This patch removes fou[46]_gro_receive and fou[46]_gro_complete
functions. The v4 or v6 variants were chosen for the UDP offloads
based on the address family of the socket this is not necessary
or correct. Alternatively, this patch adds is_ipv6 to napi_gro_skb.
This is set in udp6_gro_receive and unset in udp4_gro_receive. In
fou_gro_receive the value is used to select the correct inet_offloads
for the protocol of the outer IP header.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 910fb17..22d54b9 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1886,6 +1886,9 @@ struct napi_gro_cb {
 	/* Number of checksums via CHECKSUM_UNNECESSARY */
 	u8	csum_cnt:3;
 
+	/* Used in foo-over-udp, set in udp[46]_gro_receive */
+	u8	is_ipv6:1;
+
 	/* used to support CHECKSUM_COMPLETE for tunneling protocols */
 	__wsum	csum;
 
-- 
cgit v1.1


From 37dd0247797b168ad1cc7f5dbec825a1ee66535b Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Fri, 3 Oct 2014 15:48:09 -0700
Subject: gue: Receive side for Generic UDP Encapsulation

This patch adds support receiving for GUE packets in the fou module. The
fou module now supports direct foo-over-udp (no encapsulation header)
and GUE. To support this a type parameter is added to the fou netlink
parameters.

For a GUE socket we define gue_udp_recv, gue_gro_receive, and
gue_gro_complete to handle the specifics of the GUE protocol. Most
of the code to manage and configure sockets is common with the fou.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/gue.h        | 23 +++++++++++++++++++++++
 include/uapi/linux/fou.h |  7 +++++++
 2 files changed, 30 insertions(+)
 create mode 100644 include/net/gue.h

(limited to 'include')

diff --git a/include/net/gue.h b/include/net/gue.h
new file mode 100644
index 0000000..b6c3327
--- /dev/null
+++ b/include/net/gue.h
@@ -0,0 +1,23 @@
+#ifndef __NET_GUE_H
+#define __NET_GUE_H
+
+struct guehdr {
+	union {
+		struct {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+			__u8	hlen:4,
+			version:4;
+#elif defined (__BIG_ENDIAN_BITFIELD)
+			__u8	version:4,
+				hlen:4;
+#else
+#error  "Please fix <asm/byteorder.h>"
+#endif
+			__u8    next_hdr;
+			__u16   flags;
+		};
+		__u32 word;
+	};
+};
+
+#endif
diff --git a/include/uapi/linux/fou.h b/include/uapi/linux/fou.h
index e03376d..8df0689 100644
--- a/include/uapi/linux/fou.h
+++ b/include/uapi/linux/fou.h
@@ -13,6 +13,7 @@ enum {
 	FOU_ATTR_PORT,				/* u16 */
 	FOU_ATTR_AF,				/* u8 */
 	FOU_ATTR_IPPROTO,			/* u8 */
+	FOU_ATTR_TYPE,				/* u8 */
 
 	__FOU_ATTR_MAX,
 };
@@ -27,6 +28,12 @@ enum {
 	__FOU_CMD_MAX,
 };
 
+enum {
+	FOU_ENCAP_UNSPEC,
+	FOU_ENCAP_DIRECT,
+	FOU_ENCAP_GUE,
+};
+
 #define FOU_CMD_MAX	(__FOU_CMD_MAX - 1)
 
 #endif /* _UAPI_LINUX_FOU_H */
-- 
cgit v1.1


From bc1fc390e1728672b5b343b85185fcc1fe41043b Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Fri, 3 Oct 2014 15:48:10 -0700
Subject: ip_tunnel: Add GUE support

This patch allows configuring IPIP, sit, and GRE tunnels to use GUE.
This is very similar to fou excpet that we need to insert the GUE header
in addition to the UDP header on transmit.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_tunnel.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h
index 7c832af..280d9e0 100644
--- a/include/uapi/linux/if_tunnel.h
+++ b/include/uapi/linux/if_tunnel.h
@@ -64,6 +64,7 @@ enum {
 enum tunnel_encap_types {
 	TUNNEL_ENCAP_NONE,
 	TUNNEL_ENCAP_FOU,
+	TUNNEL_ENCAP_GUE,
 };
 
 #define TUNNEL_ENCAP_FLAG_CSUM		(1<<0)
-- 
cgit v1.1


From c8753d55afb436fd6a25c8bbe8d783f6dcf1c9f8 Mon Sep 17 00:00:00 2001
From: Vijay Subramanian <subramanian.vijay@gmail.com>
Date: Thu, 2 Oct 2014 10:00:43 -0700
Subject: net: Cleanup skb cloning by adding SKB_FCLONE_FREE

SKB_FCLONE_UNAVAILABLE has overloaded meaning depending on type of skb.
1: If skb is allocated from head_cache, it indicates fclone is not available.
2: If skb is a companion fclone skb (allocated from fclone_cache), it indicates
it is available to be used.

To avoid confusion for case 2 above, this patch  replaces
SKB_FCLONE_UNAVAILABLE with SKB_FCLONE_FREE where appropriate. For fclone
companion skbs, this indicates it is free for use.

SKB_FCLONE_UNAVAILABLE will now simply indicate skb is from head_cache and
cannot / will not have a companion fclone.

Signed-off-by: Vijay Subramanian <subramanian.vijay@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 7c5036d..3a5ec76 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -339,9 +339,10 @@ struct skb_shared_info {
 
 
 enum {
-	SKB_FCLONE_UNAVAILABLE,
-	SKB_FCLONE_ORIG,
-	SKB_FCLONE_CLONE,
+	SKB_FCLONE_UNAVAILABLE,	/* skb has no fclone (from head_cache) */
+	SKB_FCLONE_ORIG,	/* orig skb (from fclone_cache) */
+	SKB_FCLONE_CLONE,	/* companion fclone skb (from fclone_cache) */
+	SKB_FCLONE_FREE,	/* this companion fclone skb is available */
 };
 
 enum {
-- 
cgit v1.1


From dd3619f2ed5bd5ffce90f4fd8361ccd46d59b9b6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20Barr=C3=A9?= <sebastien.barre@uclouvain.be>
Date: Thu, 2 Oct 2014 21:15:22 +0200
Subject: Removed unused inet6 address state
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

the inet6 state INET6_IFADDR_STATE_UP only appeared in its definition.

Cc: Christoph Paasch <christoph.paasch@uclouvain.be>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Sébastien Barré <sebastien.barre@uclouvain.be>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/if_inet6.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
index d07b1a6..55a8d40 100644
--- a/include/net/if_inet6.h
+++ b/include/net/if_inet6.h
@@ -35,7 +35,6 @@ enum {
 	INET6_IFADDR_STATE_DAD,
 	INET6_IFADDR_STATE_POSTDAD,
 	INET6_IFADDR_STATE_ERRDAD,
-	INET6_IFADDR_STATE_UP,
 	INET6_IFADDR_STATE_DEAD,
 };
 
-- 
cgit v1.1


From bdf6fa52f01b941d4a80372d56de465bdbbd1d23 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vyasevich@gmail.com>
Date: Fri, 3 Oct 2014 18:16:20 -0400
Subject: sctp: handle association restarts when the socket is closed.

Currently association restarts do not take into consideration the
state of the socket.  When a restart happens, the current assocation
simply transitions into established state.  This creates a condition
where a remote system, through a the restart procedure, may create a
local association that is no way reachable by user.  The conditions
to trigger this are as follows:
  1) Remote does not acknoledge some data causing data to remain
     outstanding.
  2) Local application calls close() on the socket.  Since data
     is still outstanding, the association is placed in SHUTDOWN_PENDING
     state.  However, the socket is closed.
  3) The remote tries to create a new association, triggering a restart
     on the local system.  The association moves from SHUTDOWN_PENDING
     to ESTABLISHED.  At this point, it is no longer reachable by
     any socket on the local system.

This patch addresses the above situation by moving the newly ESTABLISHED
association into SHUTDOWN-SENT state and bundling a SHUTDOWN after
the COOKIE-ACK chunk.  This way, the restarted associate immidiately
enters the shutdown procedure and forces the termination of the
unreachable association.

Reported-by: David Laight <David.Laight@aculab.com>
Signed-off-by: Vlad Yasevich <vyasevich@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/command.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/sctp/command.h b/include/net/sctp/command.h
index f22538e..d4a20d0 100644
--- a/include/net/sctp/command.h
+++ b/include/net/sctp/command.h
@@ -115,7 +115,7 @@ typedef enum {
  * analysis of the state functions, but in reality just taken from
  * thin air in the hopes othat we don't trigger a kernel panic.
  */
-#define SCTP_MAX_NUM_COMMANDS 14
+#define SCTP_MAX_NUM_COMMANDS 20
 
 typedef union {
 	void *zero_all;	/* Set to NULL to clear the entire union */
-- 
cgit v1.1


From 0b5e8b8eeae40bae6ad7c7e91c97c3c0d0e57882 Mon Sep 17 00:00:00 2001
From: Andy Zhou <azhou@nicira.com>
Date: Fri, 3 Oct 2014 15:35:28 -0700
Subject: net: Add Geneve tunneling protocol driver

This adds a device level support for Geneve -- Generic Network
Virtualization Encapsulation. The protocol is documented at
http://tools.ietf.org/html/draft-gross-geneve-01

Only protocol layer Geneve support is provided by this driver.
Openvswitch can be used for configuring, set up and tear down
functional Geneve tunnels.

Signed-off-by: Jesse Gross <jesse@nicira.com>
Signed-off-by: Andy Zhou <azhou@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/geneve.h     | 91 ++++++++++++++++++++++++++++++++++++++++++++++++
 include/net/ip_tunnels.h |  2 ++
 2 files changed, 93 insertions(+)
 create mode 100644 include/net/geneve.h

(limited to 'include')

diff --git a/include/net/geneve.h b/include/net/geneve.h
new file mode 100644
index 0000000..ce98865
--- /dev/null
+++ b/include/net/geneve.h
@@ -0,0 +1,91 @@
+#ifndef __NET_GENEVE_H
+#define __NET_GENEVE_H  1
+
+#include <net/udp_tunnel.h>
+
+struct geneve_sock;
+
+typedef void (geneve_rcv_t)(struct geneve_sock *gs, struct sk_buff *skb);
+
+struct geneve_sock {
+	struct hlist_node	hlist;
+	geneve_rcv_t		*rcv;
+	void			*rcv_data;
+	struct work_struct	del_work;
+	struct socket		*sock;
+	struct rcu_head		rcu;
+	atomic_t		refcnt;
+	struct udp_offload	udp_offloads;
+};
+
+/* Geneve Header:
+ *  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *  |Ver|  Opt Len  |O|C|    Rsvd.  |          Protocol Type        |
+ *  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *  |        Virtual Network Identifier (VNI)       |    Reserved   |
+ *  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *  |                    Variable Length Options                    |
+ *  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * Option Header:
+ *  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *  |          Option Class         |      Type     |R|R|R| Length  |
+ *  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *  |                      Variable Option Data                     |
+ *  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+
+struct geneve_opt {
+	__be16	opt_class;
+	u8	type;
+#ifdef __LITTLE_ENDIAN_BITFIELD
+	u8	length:5;
+	u8	r3:1;
+	u8	r2:1;
+	u8	r1:1;
+#else
+	u8	r1:1;
+	u8	r2:1;
+	u8	r3:1;
+	u8	length:5;
+#endif
+	u8	opt_data[];
+};
+
+#define GENEVE_CRIT_OPT_TYPE (1 << 7)
+
+struct genevehdr {
+#ifdef __LITTLE_ENDIAN_BITFIELD
+	u8 opt_len:6;
+	u8 ver:2;
+	u8 rsvd1:6;
+	u8 critical:1;
+	u8 oam:1;
+#else
+	u8 ver:2;
+	u8 opt_len:6;
+	u8 oam:1;
+	u8 critical:1;
+	u8 rsvd1:6;
+#endif
+	__be16 proto_type;
+	u8 vni[3];
+	u8 rsvd2;
+	struct geneve_opt options[];
+};
+
+#define GENEVE_VER 0
+#define GENEVE_BASE_HLEN (sizeof(struct udphdr) + sizeof(struct genevehdr))
+
+struct geneve_sock *geneve_sock_add(struct net *net, __be16 port,
+				    geneve_rcv_t *rcv, void *data,
+				    bool no_share, bool ipv6);
+
+void geneve_sock_release(struct geneve_sock *vs);
+
+int geneve_xmit_skb(struct geneve_sock *gs, struct rtable *rt,
+		    struct sk_buff *skb, __be32 src, __be32 dst, __u8 tos,
+		    __u8 ttl, __be16 df, __be16 src_port, __be16 dst_port,
+		    __be16 tun_flags, u8 vni[3], u8 opt_len, u8 *opt,
+		    bool xnet);
+#endif
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 7f538ba..a9ce155 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -95,6 +95,8 @@ struct ip_tunnel {
 #define TUNNEL_VERSION	__cpu_to_be16(0x40)
 #define TUNNEL_NO_KEY	__cpu_to_be16(0x80)
 #define TUNNEL_DONT_FRAGMENT    __cpu_to_be16(0x0100)
+#define TUNNEL_OAM	__cpu_to_be16(0x0200)
+#define TUNNEL_CRIT_OPT	__cpu_to_be16(0x0400)
 
 struct tnl_ptk_info {
 	__be16 flags;
-- 
cgit v1.1


From 67fa034194bf82a3d5ca841759d921297daa63ca Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@nicira.com>
Date: Fri, 3 Oct 2014 15:35:30 -0700
Subject: openvswitch: Add support for matching on OAM packets.

Some tunnel formats have mechanisms for indicating that packets are
OAM frames that should be handled specially (either as high priority or
not forwarded beyond an endpoint). This provides support for allowing
those types of packets to be matched.

Signed-off-by: Jesse Gross <jesse@nicira.com>
Signed-off-by: Andy Zhou <azhou@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/openvswitch.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index f7fc507..7c06106 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h
@@ -309,6 +309,7 @@ enum ovs_tunnel_key_attr {
 	OVS_TUNNEL_KEY_ATTR_TTL,                /* u8 Tunnel IP TTL. */
 	OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT,      /* No argument, set DF. */
 	OVS_TUNNEL_KEY_ATTR_CSUM,               /* No argument. CSUM packet. */
+	OVS_TUNNEL_KEY_ATTR_OAM,                /* No argument. OAM frame.  */
 	__OVS_TUNNEL_KEY_ATTR_MAX
 };
 
-- 
cgit v1.1


From f0b128c1e2cc33ad104daf0f51a51e34f7763c5f Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@nicira.com>
Date: Fri, 3 Oct 2014 15:35:31 -0700
Subject: openvswitch: Wrap struct ovs_key_ipv4_tunnel in a new structure.

Currently, the flow information that is matched for tunnels and
the tunnel data passed around with packets is the same. However,
as additional information is added this is not necessarily desirable,
as in the case of pointers.

This adds a new structure for tunnel metadata which currently contains
only the existing struct. This change is purely internal to the kernel
since the current OVS_KEY_ATTR_IPV4_TUNNEL is simply a compressed version
of OVS_KEY_ATTR_TUNNEL that is translated at flow setup.

Signed-off-by: Jesse Gross <jesse@nicira.com>
Signed-off-by: Andy Zhou <azhou@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/openvswitch.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index 7c06106..6753032 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h
@@ -294,7 +294,7 @@ enum ovs_key_attr {
 	OVS_KEY_ATTR_RECIRC_ID, /* u32 recirc id */
 
 #ifdef __KERNEL__
-	OVS_KEY_ATTR_IPV4_TUNNEL,  /* struct ovs_key_ipv4_tunnel */
+	OVS_KEY_ATTR_TUNNEL_INFO,  /* struct ovs_tunnel_info */
 #endif
 	__OVS_KEY_ATTR_MAX
 };
-- 
cgit v1.1


From f5796684069e0c71c65bce6a6d4766114aec1396 Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@nicira.com>
Date: Fri, 3 Oct 2014 15:35:33 -0700
Subject: openvswitch: Add support for Geneve tunneling.

The Openvswitch implementation is completely agnostic to the options
that are in use and can handle newly defined options without
further work. It does this by simply matching on a byte array
of options and allowing userspace to setup flows on this array.

Signed-off-by: Jesse Gross <jesse@nicira.com>
Singed-off-by: Ansis Atteka <aatteka@nicira.com>
Signed-off-by: Andy Zhou <azhou@nicira.com>
Acked-by: Thomas Graf <tgraf@noironetworks.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip_tunnels.h         | 21 +++++++++++----------
 include/uapi/linux/openvswitch.h |  2 ++
 2 files changed, 13 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index a9ce155..5bc6ede 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -86,17 +86,18 @@ struct ip_tunnel {
 	struct gro_cells	gro_cells;
 };
 
-#define TUNNEL_CSUM	__cpu_to_be16(0x01)
-#define TUNNEL_ROUTING	__cpu_to_be16(0x02)
-#define TUNNEL_KEY	__cpu_to_be16(0x04)
-#define TUNNEL_SEQ	__cpu_to_be16(0x08)
-#define TUNNEL_STRICT	__cpu_to_be16(0x10)
-#define TUNNEL_REC	__cpu_to_be16(0x20)
-#define TUNNEL_VERSION	__cpu_to_be16(0x40)
-#define TUNNEL_NO_KEY	__cpu_to_be16(0x80)
+#define TUNNEL_CSUM		__cpu_to_be16(0x01)
+#define TUNNEL_ROUTING		__cpu_to_be16(0x02)
+#define TUNNEL_KEY		__cpu_to_be16(0x04)
+#define TUNNEL_SEQ		__cpu_to_be16(0x08)
+#define TUNNEL_STRICT		__cpu_to_be16(0x10)
+#define TUNNEL_REC		__cpu_to_be16(0x20)
+#define TUNNEL_VERSION		__cpu_to_be16(0x40)
+#define TUNNEL_NO_KEY		__cpu_to_be16(0x80)
 #define TUNNEL_DONT_FRAGMENT    __cpu_to_be16(0x0100)
-#define TUNNEL_OAM	__cpu_to_be16(0x0200)
-#define TUNNEL_CRIT_OPT	__cpu_to_be16(0x0400)
+#define TUNNEL_OAM		__cpu_to_be16(0x0200)
+#define TUNNEL_CRIT_OPT		__cpu_to_be16(0x0400)
+#define TUNNEL_OPTIONS_PRESENT	__cpu_to_be16(0x0800)
 
 struct tnl_ptk_info {
 	__be16 flags;
diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index 6753032..435eabc 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h
@@ -192,6 +192,7 @@ enum ovs_vport_type {
 	OVS_VPORT_TYPE_INTERNAL, /* network device implemented by datapath */
 	OVS_VPORT_TYPE_GRE,      /* GRE tunnel. */
 	OVS_VPORT_TYPE_VXLAN,	 /* VXLAN tunnel. */
+	OVS_VPORT_TYPE_GENEVE,	 /* Geneve tunnel. */
 	__OVS_VPORT_TYPE_MAX
 };
 
@@ -310,6 +311,7 @@ enum ovs_tunnel_key_attr {
 	OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT,      /* No argument, set DF. */
 	OVS_TUNNEL_KEY_ATTR_CSUM,               /* No argument. CSUM packet. */
 	OVS_TUNNEL_KEY_ATTR_OAM,                /* No argument. OAM frame.  */
+	OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS,        /* Array of Geneve options. */
 	__OVS_TUNNEL_KEY_ATTR_MAX
 };
 
-- 
cgit v1.1


From f2600cf02b5b59aaee082c3485b7f01fc7f7b70c Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 4 Oct 2014 10:11:31 -0700
Subject: net: sched: avoid costly atomic operation in fq_dequeue()

Standard qdisc API to setup a timer implies an atomic operation on every
packet dequeue : qdisc_unthrottled()

It turns out this is not really needed for FQ, as FQ has no concept of
global qdisc throttling, being a qdisc handling many different flows,
some of them can be throttled, while others are not.

Fix is straightforward : add a 'bool throttle' to
qdisc_watchdog_schedule_ns(), and remove calls to qdisc_unthrottled()
in sch_fq.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_sched.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index e4b3c82..27a3383 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -65,12 +65,12 @@ struct qdisc_watchdog {
 };
 
 void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc);
-void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires);
+void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires, bool throttle);
 
 static inline void qdisc_watchdog_schedule(struct qdisc_watchdog *wd,
 					   psched_time_t expires)
 {
-	qdisc_watchdog_schedule_ns(wd, PSCHED_TICKS2NS(expires));
+	qdisc_watchdog_schedule_ns(wd, PSCHED_TICKS2NS(expires), true);
 }
 
 void qdisc_watchdog_cancel(struct qdisc_watchdog *wd);
-- 
cgit v1.1


From 7dfa4b414d4eec8da56e44fb2b4aea3e549b092f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 5 Oct 2014 12:35:09 +0300
Subject: net/mlx4_en: Code cleanups in tx path

- Remove unused variable ring->poll_cnt
- No need to set some fields if using blueflame
- Add missing const's
- Use unlikely
- Remove unneeded new line
- Make some comments more precise
- struct mlx4_bf @offset field reduced to unsigned int to save space

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Amir Vadai <amirv@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx4/device.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index b2f8ab9..37e4404 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -583,7 +583,7 @@ struct mlx4_uar {
 };
 
 struct mlx4_bf {
-	unsigned long		offset;
+	unsigned int		offset;
 	int			buf_size;
 	struct mlx4_uar	       *uar;
 	void __iomem	       *reg;
-- 
cgit v1.1


From 1255a5055449781a92076fc5429952f2b33cf309 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 5 Oct 2014 12:35:21 +0300
Subject: ethtool: Ethtool parameter to dynamically change tx_copybreak

Use new ethtool [sg]et_tunable() to set tx_copybread (inline threshold)

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Amir Vadai <amirv@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/ethtool.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index 7a364f2..99b4305 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -212,6 +212,7 @@ struct ethtool_value {
 enum tunable_id {
 	ETHTOOL_ID_UNSPEC,
 	ETHTOOL_RX_COPYBREAK,
+	ETHTOOL_TX_COPYBREAK,
 };
 
 enum tunable_type_id {
-- 
cgit v1.1


From fcbeb976d7ce783fd58e63e61c196d9a8912b3be Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 5 Oct 2014 10:11:27 -0700
Subject: net: introduce netdevice gso_min_segs attribute

Some TSO engines might have a too heavy setup cost, that impacts
performance on hosts sending small bursts (2 MSS per packet).

This patch adds a device gso_min_segs, allowing drivers to set
a minimum segment size for TSO packets, according to the NIC
performance.

Tested on a mlx4 NIC, this allows to get a ~110% increase of
throughput when sending 2 MSS per packet.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 22d54b9..2df86f5 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1416,6 +1416,8 @@ enum netdev_priv_flags {
  *	@gso_max_size:	Maximum size of generic segmentation offload
  *	@gso_max_segs:	Maximum number of segments that can be passed to the
  *			NIC for GSO
+ *	@gso_min_segs:	Minimum number of segments that can be passed to the
+ *			NIC for GSO
  *
  *	@dcbnl_ops:	Data Center Bridging netlink ops
  *	@num_tc:	Number of traffic classes in the net device
@@ -1666,7 +1668,7 @@ struct net_device {
 	unsigned int		gso_max_size;
 #define GSO_MAX_SEGS		65535
 	u16			gso_max_segs;
-
+	u16			gso_min_segs;
 #ifdef CONFIG_DCB
 	const struct dcbnl_rtnl_ops *dcbnl_ops;
 #endif
-- 
cgit v1.1


From 82a470f1119eb7d2e4941b915bf9cd6fd8d54494 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Sun, 5 Oct 2014 21:27:53 -0700
Subject: net: sched: remove tcf_proto from ematch calls

This removes the tcf_proto argument from the ematch code paths that
only need it to reference the net namespace. This allows simplifying
qdisc code paths especially when we need to tear down the ematch
from an RCU callback. In this case we can not guarentee that the
tcf_proto structure is still valid.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Acked-by: Cong Wang <cwang@twopensource.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_cls.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index ef44ad9..bc49967 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -166,6 +166,7 @@ struct tcf_ematch {
 	unsigned int		datalen;
 	u16			matchid;
 	u16			flags;
+	struct net		*net;
 };
 
 static inline int tcf_em_is_container(struct tcf_ematch *em)
@@ -229,12 +230,11 @@ struct tcf_ematch_tree {
 struct tcf_ematch_ops {
 	int			kind;
 	int			datalen;
-	int			(*change)(struct tcf_proto *, void *,
+	int			(*change)(struct net *net, void *,
 					  int, struct tcf_ematch *);
 	int			(*match)(struct sk_buff *, struct tcf_ematch *,
 					 struct tcf_pkt_info *);
-	void			(*destroy)(struct tcf_proto *,
-					   struct tcf_ematch *);
+	void			(*destroy)(struct tcf_ematch *);
 	int			(*dump)(struct sk_buff *, struct tcf_ematch *);
 	struct module		*owner;
 	struct list_head	link;
@@ -244,7 +244,7 @@ int tcf_em_register(struct tcf_ematch_ops *);
 void tcf_em_unregister(struct tcf_ematch_ops *);
 int tcf_em_tree_validate(struct tcf_proto *, struct nlattr *,
 			 struct tcf_ematch_tree *);
-void tcf_em_tree_destroy(struct tcf_proto *, struct tcf_ematch_tree *);
+void tcf_em_tree_destroy(struct tcf_ematch_tree *);
 int tcf_em_tree_dump(struct sk_buff *, struct tcf_ematch_tree *, int);
 int __tcf_em_tree_match(struct sk_buff *, struct tcf_ematch_tree *,
 			struct tcf_pkt_info *);
@@ -301,7 +301,7 @@ struct tcf_ematch_tree {
 };
 
 #define tcf_em_tree_validate(tp, tb, t) ((void)(t), 0)
-#define tcf_em_tree_destroy(tp, t) do { (void)(t); } while(0)
+#define tcf_em_tree_destroy(t) do { (void)(t); } while(0)
 #define tcf_em_tree_dump(skb, t, tlv) (0)
 #define tcf_em_tree_change(tp, dst, src) do { } while(0)
 #define tcf_em_tree_match(skb, t, info) ((void)(info), 1)
-- 
cgit v1.1


From 94b2cfe02bfe3f1918d91bd6f498e308c5605cbc Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Mon, 6 Oct 2014 19:58:34 +0200
Subject: ipv6: minor fib6 cleanups like type safety, bool conversion, inline
 removal

Also renamed struct fib6_walker_t to fib6_walker and enum fib_walk_state_t
to fib6_walk_state as recommended by Cong Wang.

Cc: Cong Wang <cwang@twopensource.com>
Cc: YOSHIFUJI Hideaki <hideaki@yoshifuji.org>
Cc: Martin Lau <kafai@fb.com>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_fib.h | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index cf485f9..9221bf4 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -202,15 +202,25 @@ static inline void ip6_rt_put(struct rt6_info *rt)
 	dst_release(&rt->dst);
 }
 
-struct fib6_walker_t {
+enum fib6_walk_state {
+#ifdef CONFIG_IPV6_SUBTREES
+	FWS_S,
+#endif
+	FWS_L,
+	FWS_R,
+	FWS_C,
+	FWS_U
+};
+
+struct fib6_walker {
 	struct list_head lh;
 	struct fib6_node *root, *node;
 	struct rt6_info *leaf;
-	unsigned char state;
-	unsigned char prune;
+	enum fib6_walk_state state;
+	bool prune;
 	unsigned int skip;
 	unsigned int count;
-	int (*func)(struct fib6_walker_t *);
+	int (*func)(struct fib6_walker *);
 	void *args;
 };
 
-- 
cgit v1.1


From 42b18706469a02c1f84375ac0ee2f30f28d85d4c Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Mon, 6 Oct 2014 19:58:35 +0200
Subject: ipv6: make rt_sernum atomic and serial number fields ordinary ints

Cc: YOSHIFUJI Hideaki <hideaki@yoshifuji.org>
Cc: Martin Lau <kafai@fb.com>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_fib.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 9221bf4..8eea35d 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -64,7 +64,7 @@ struct fib6_node {
 
 	__u16			fn_bit;		/* bit key */
 	__u16			fn_flags;
-	__u32			fn_sernum;
+	int			fn_sernum;
 	struct rt6_info		*rr_ptr;
 };
 
-- 
cgit v1.1


From 812918c464eca0e8c145f975932ca5020e9c05cb Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Mon, 6 Oct 2014 19:58:37 +0200
Subject: ipv6: make fib6 serial number per namespace

Try to reduce number of possible fn_sernum mutation by constraining them
to their namespace.

Also remove rt_genid which I forgot to remove in 705f1c869d577c ("ipv6:
remove rt6i_genid").

Cc: YOSHIFUJI Hideaki <hideaki@yoshifuji.org>
Cc: Martin Lau <kafai@fb.com>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/ipv6.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index eade27a..69ae41f 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -76,7 +76,7 @@ struct netns_ipv6 {
 #endif
 #endif
 	atomic_t		dev_addr_genid;
-	atomic_t		rt_genid;
+	atomic_t		fib6_sernum;
 };
 
 #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
-- 
cgit v1.1


From 7c5df8fa1921450d2210db9928f43cf4f414982c Mon Sep 17 00:00:00 2001
From: Andy Zhou <azhou@nicira.com>
Date: Mon, 6 Oct 2014 15:15:14 -0700
Subject: openvswitch: fix a compilation error when CONFIG_INET is not setW!

Fix a openvswitch compilation error when CONFIG_INET is not set:

=====================================================
   In file included from include/net/geneve.h:4:0,
                       from net/openvswitch/flow_netlink.c:45:
		          include/net/udp_tunnel.h: In function 'udp_tunnel_handle_offloads':
			  >> include/net/udp_tunnel.h:100:2: error: implicit declaration of function 'iptunnel_handle_offloads' [-Werror=implicit-function-declaration]
			  >>      return iptunnel_handle_offloads(skb, udp_csum, type);
			  >>           ^
			  >>           >> include/net/udp_tunnel.h:100:2: warning: return makes pointer from integer without a cast
			  >>           >>    cc1: some warnings being treated as errors

=====================================================

Reported-by: kbuild test robot <fengguang.wu@intel.com>
Signed-off-by: Andy Zhou <azhou@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/geneve.h | 36 +++++++++++++++++++++---------------
 1 file changed, 21 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/net/geneve.h b/include/net/geneve.h
index ce98865..112132c 100644
--- a/include/net/geneve.h
+++ b/include/net/geneve.h
@@ -1,22 +1,10 @@
 #ifndef __NET_GENEVE_H
 #define __NET_GENEVE_H  1
 
+#ifdef CONFIG_INET
 #include <net/udp_tunnel.h>
+#endif
 
-struct geneve_sock;
-
-typedef void (geneve_rcv_t)(struct geneve_sock *gs, struct sk_buff *skb);
-
-struct geneve_sock {
-	struct hlist_node	hlist;
-	geneve_rcv_t		*rcv;
-	void			*rcv_data;
-	struct work_struct	del_work;
-	struct socket		*sock;
-	struct rcu_head		rcu;
-	atomic_t		refcnt;
-	struct udp_offload	udp_offloads;
-};
 
 /* Geneve Header:
  *  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
@@ -74,6 +62,22 @@ struct genevehdr {
 	struct geneve_opt options[];
 };
 
+#ifdef CONFIG_INET
+struct geneve_sock;
+
+typedef void (geneve_rcv_t)(struct geneve_sock *gs, struct sk_buff *skb);
+
+struct geneve_sock {
+	struct hlist_node	hlist;
+	geneve_rcv_t		*rcv;
+	void			*rcv_data;
+	struct work_struct	del_work;
+	struct socket		*sock;
+	struct rcu_head		rcu;
+	atomic_t		refcnt;
+	struct udp_offload	udp_offloads;
+};
+
 #define GENEVE_VER 0
 #define GENEVE_BASE_HLEN (sizeof(struct udphdr) + sizeof(struct genevehdr))
 
@@ -88,4 +92,6 @@ int geneve_xmit_skb(struct geneve_sock *gs, struct rtable *rt,
 		    __u8 ttl, __be16 df, __be16 src_port, __be16 dst_port,
 		    __be16 tun_flags, u8 vni[3], u8 opt_len, u8 *opt,
 		    bool xnet);
-#endif
+#endif /*ifdef CONFIG_INET */
+
+#endif /*ifdef__NET_GENEVE_H */
-- 
cgit v1.1


From fd2ef0ba3071c92ac6272ab22ea3f2b16d88a4eb Mon Sep 17 00:00:00 2001
From: Petri Gynther <pgynther@google.com>
Date: Mon, 6 Oct 2014 11:38:30 -0700
Subject: net: phy: adjust fixed_phy_register() return value

Adjust fixed_phy_register() to return struct phy_device *, so that
it becomes easy to use fixed PHYs without device tree support:

  phydev = fixed_phy_register(PHY_POLL, &fixed_phy_status, NULL);
  fixed_phy_set_link_update(phydev, fixed_phy_link_update);
  phy_connect_direct(netdev, phydev, handler_fn, phy_interface);

This change is a prerequisite for modifying bcmgenet driver to work
without a device tree on Broadcom's MIPS-based 7xxx platforms.

Signed-off-by: Petri Gynther <pgynther@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/phy_fixed.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/phy_fixed.h b/include/linux/phy_fixed.h
index 9411386..f2ca1b4 100644
--- a/include/linux/phy_fixed.h
+++ b/include/linux/phy_fixed.h
@@ -14,9 +14,9 @@ struct device_node;
 #ifdef CONFIG_FIXED_PHY
 extern int fixed_phy_add(unsigned int irq, int phy_id,
 			 struct fixed_phy_status *status);
-extern int fixed_phy_register(unsigned int irq,
-			      struct fixed_phy_status *status,
-			      struct device_node *np);
+extern struct phy_device *fixed_phy_register(unsigned int irq,
+					     struct fixed_phy_status *status,
+					     struct device_node *np);
 extern void fixed_phy_del(int phy_addr);
 extern int fixed_phy_set_link_update(struct phy_device *phydev,
 			int (*link_update)(struct net_device *,
@@ -27,11 +27,11 @@ static inline int fixed_phy_add(unsigned int irq, int phy_id,
 {
 	return -ENODEV;
 }
-static inline int fixed_phy_register(unsigned int irq,
-				     struct fixed_phy_status *status,
-				     struct device_node *np)
+static inline struct phy_device *fixed_phy_register(unsigned int irq,
+						struct fixed_phy_status *status,
+						struct device_node *np)
 {
-	return -ENODEV;
+	return ERR_PTR(-ENODEV);
 }
 static inline int fixed_phy_del(int phy_addr)
 {
-- 
cgit v1.1


From 0287587884b15041203b3a362d485e1ab1f24445 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 5 Oct 2014 18:38:35 -0700
Subject: net: better IFF_XMIT_DST_RELEASE support

Testing xmit_more support with netperf and connected UDP sockets,
I found strange dst refcount false sharing.

Current handling of IFF_XMIT_DST_RELEASE is not optimal.

Dropping dst in validate_xmit_skb() is certainly too late in case
packet was queued by cpu X but dequeued by cpu Y

The logical point to take care of drop/force is in __dev_queue_xmit()
before even taking qdisc lock.

As Julian Anastasov pointed out, need for skb_dst() might come from some
packet schedulers or classifiers.

This patch adds new helper to cleanly express needs of various drivers
or qdiscs/classifiers.

Drivers that need skb_dst() in their ndo_start_xmit() should call
following helper in their setup instead of the prior :

	dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
->
	netif_keep_dst(dev);

Instead of using a single bit, we use two bits, one being
eventually rebuilt in bonding/team drivers.

The other one, is permanent and blocks IFF_XMIT_DST_RELEASE being
rebuilt in bonding/team. Eventually, we could add something
smarter later.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Julian Anastasov <ja@ssi.bg>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 2df86f5..3a4315b 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1206,6 +1206,7 @@ enum netdev_priv_flags {
 	IFF_SUPP_NOFCS			= 1<<19,
 	IFF_LIVE_ADDR_CHANGE		= 1<<20,
 	IFF_MACVLAN			= 1<<21,
+	IFF_XMIT_DST_RELEASE_PERM	= 1<<22,
 };
 
 #define IFF_802_1Q_VLAN			IFF_802_1Q_VLAN
@@ -1230,6 +1231,7 @@ enum netdev_priv_flags {
 #define IFF_SUPP_NOFCS			IFF_SUPP_NOFCS
 #define IFF_LIVE_ADDR_CHANGE		IFF_LIVE_ADDR_CHANGE
 #define IFF_MACVLAN			IFF_MACVLAN
+#define IFF_XMIT_DST_RELEASE_PERM	IFF_XMIT_DST_RELEASE_PERM
 
 /**
  *	struct net_device - The DEVICE structure.
@@ -3588,6 +3590,12 @@ static inline bool netif_supports_nofcs(struct net_device *dev)
 	return dev->priv_flags & IFF_SUPP_NOFCS;
 }
 
+/* This device needs to keep skb dst for qdisc enqueue or ndo_start_xmit() */
+static inline void netif_keep_dst(struct net_device *dev)
+{
+	dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM);
+}
+
 extern struct pernet_operations __net_initdata loopback_net_ops;
 
 /* Logging, debugging and troubleshooting/diagnostic helpers. */
-- 
cgit v1.1


From 583d4a6885cfa75a3d189f6bb69b5c545e961c75 Mon Sep 17 00:00:00 2001
From: LEROY Christophe <christophe.leroy@c-s.fr>
Date: Tue, 7 Oct 2014 15:04:57 +0200
Subject: net: fs_enet: Remove non NAPI RX

In the probe function, use_napi is inconditionnaly set to 1. This patch removes
all the code which is conditional to !use_napi, and removes use_napi which has
then become useless.

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/fs_enet_pd.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/fs_enet_pd.h b/include/linux/fs_enet_pd.h
index efb0596..77d783f 100644
--- a/include/linux/fs_enet_pd.h
+++ b/include/linux/fs_enet_pd.h
@@ -139,7 +139,6 @@ struct fs_platform_info {
 	int rx_ring, tx_ring;	/* number of buffers on rx     */
 	__u8 macaddr[ETH_ALEN];	/* mac address                 */
 	int rx_copybreak;	/* limit we copy small frames  */
-	int use_napi;		/* use NAPI                    */
 	int napi_weight;	/* NAPI weight                 */
 
 	int use_rmii;		/* use RMII mode 	       */
-- 
cgit v1.1


From 709c48b39ecf11a81f3820c13a828c330fd832b9 Mon Sep 17 00:00:00 2001
From: Masanari Iida <standby24x7@gmail.com>
Date: Wed, 8 Oct 2014 23:53:39 +0900
Subject: net: description of dma_cookie cause make xmldocs warning

In commit 7bced397510ab569d31de4c70b39e13355046387,
dma_cookie was removed from struct skbuff.
But the description of dma_cookie still exist.
So the "make xmldocs" output following warning.

Warning(.//include/linux/skbuff.h:609): Excess struct/union
/enum/typedef member 'dma_cookie' description in 'sk_buff'

Remove description of dma_cookie fix the symptom.

Signed-off-by: Masanari Iida <standby24x7@gmail.com>
Acked-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 3a5ec76..776104b 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -483,8 +483,6 @@ static inline u32 skb_mstamp_us_delta(const struct skb_mstamp *t1,
  *	@wifi_acked_valid: wifi_acked was set
  *	@wifi_acked: whether frame was acked on wifi or not
  *	@no_fcs:  Request NIC to treat last 4 bytes as Ethernet FCS
- *	@dma_cookie: a cookie to one of several possible DMA operations
- *		done by skb DMA functions
   *	@napi_id: id of the NAPI struct this skb came from
  *	@secmark: security marking
  *	@mark: Generic packet mark
-- 
cgit v1.1


From 535114539bb2c081b6680cb5a34be17e7b45df37 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 8 Oct 2014 08:19:27 -0700
Subject: net: add netdev_txq_bql_{enqueue, complete}_prefetchw() helpers

Add two helpers so that drivers do not have to care of BQL being
available or not.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Jim Davis <jim.epost@gmail.com>
Fixes: 29d40c903247 ("net/mlx4_en: Use prefetch in tx path")
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 3a4315b..838407a 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -30,6 +30,7 @@
 #include <linux/bug.h>
 #include <linux/delay.h>
 #include <linux/atomic.h>
+#include <linux/prefetch.h>
 #include <asm/cache.h>
 #include <asm/byteorder.h>
 
@@ -2480,6 +2481,34 @@ netif_xmit_frozen_or_drv_stopped(const struct netdev_queue *dev_queue)
 	return dev_queue->state & QUEUE_STATE_DRV_XOFF_OR_FROZEN;
 }
 
+/**
+ *	netdev_txq_bql_enqueue_prefetchw - prefetch bql data for write
+ *	@dev_queue: pointer to transmit queue
+ *
+ * BQL enabled drivers might use this helper in their ndo_start_xmit(),
+ * to give appropriate hint to the cpu.
+ */
+static inline void netdev_txq_bql_enqueue_prefetchw(struct netdev_queue *dev_queue)
+{
+#ifdef CONFIG_BQL
+	prefetchw(&dev_queue->dql.num_queued);
+#endif
+}
+
+/**
+ *	netdev_txq_bql_complete_prefetchw - prefetch bql data for write
+ *	@dev_queue: pointer to transmit queue
+ *
+ * BQL enabled drivers might use this helper in their TX completion path,
+ * to give appropriate hint to the cpu.
+ */
+static inline void netdev_txq_bql_complete_prefetchw(struct netdev_queue *dev_queue)
+{
+#ifdef CONFIG_BQL
+	prefetchw(&dev_queue->dql.limit);
+#endif
+}
+
 static inline void netdev_tx_sent_queue(struct netdev_queue *dev_queue,
 					unsigned int bytes)
 {
-- 
cgit v1.1