summaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/Makefile2
-rw-r--r--net/core/datagram.c14
-rw-r--r--net/core/dev.c98
-rw-r--r--net/core/dev_addr_lists.c85
-rw-r--r--net/core/ethtool.c215
-rw-r--r--net/core/filter.c1292
-rw-r--r--net/core/net_namespace.c2
-rw-r--r--net/core/pktgen.c50
-rw-r--r--net/core/ptp_classifier.c4
-rw-r--r--net/core/rtnetlink.c73
-rw-r--r--net/core/secure_seq.c25
-rw-r--r--net/core/skbuff.c28
-rw-r--r--net/core/sock.c4
-rw-r--r--net/core/tso.c77
14 files changed, 1117 insertions, 852 deletions
diff --git a/net/core/Makefile b/net/core/Makefile
index 826b925..71093d9 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -9,7 +9,7 @@ obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \
neighbour.o rtnetlink.o utils.o link_watch.o filter.o \
- sock_diag.o dev_ioctl.o
+ sock_diag.o dev_ioctl.o tso.o
obj-$(CONFIG_XFRM) += flow.o
obj-y += net-sysfs.o
diff --git a/net/core/datagram.c b/net/core/datagram.c
index a16ed7b..6b1c04c 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -739,11 +739,15 @@ __sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len)
__sum16 sum;
sum = csum_fold(skb_checksum(skb, 0, len, skb->csum));
- if (likely(!sum)) {
- if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE))
- netdev_rx_csum_fault(skb->dev);
- skb->ip_summed = CHECKSUM_UNNECESSARY;
- }
+ if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) && !sum &&
+ !skb->csum_complete_sw)
+ netdev_rx_csum_fault(skb->dev);
+
+ /* Save checksum complete for later use */
+ skb->csum = sum;
+ skb->ip_summed = CHECKSUM_COMPLETE;
+ skb->csum_complete_sw = 1;
+
return sum;
}
EXPORT_SYMBOL(__skb_checksum_complete_head);
diff --git a/net/core/dev.c b/net/core/dev.c
index 8908a68..30eedf6 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1661,6 +1661,29 @@ bool is_skb_forwardable(struct net_device *dev, struct sk_buff *skb)
}
EXPORT_SYMBOL_GPL(is_skb_forwardable);
+int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
+{
+ if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
+ if (skb_copy_ubufs(skb, GFP_ATOMIC)) {
+ atomic_long_inc(&dev->rx_dropped);
+ kfree_skb(skb);
+ return NET_RX_DROP;
+ }
+ }
+
+ if (unlikely(!is_skb_forwardable(dev, skb))) {
+ atomic_long_inc(&dev->rx_dropped);
+ kfree_skb(skb);
+ return NET_RX_DROP;
+ }
+
+ skb_scrub_packet(skb, true);
+ skb->protocol = eth_type_trans(skb, dev);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(__dev_forward_skb);
+
/**
* dev_forward_skb - loopback an skb to another netif
*
@@ -1681,24 +1704,7 @@ EXPORT_SYMBOL_GPL(is_skb_forwardable);
*/
int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
{
- if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
- if (skb_copy_ubufs(skb, GFP_ATOMIC)) {
- atomic_long_inc(&dev->rx_dropped);
- kfree_skb(skb);
- return NET_RX_DROP;
- }
- }
-
- if (unlikely(!is_skb_forwardable(dev, skb))) {
- atomic_long_inc(&dev->rx_dropped);
- kfree_skb(skb);
- return NET_RX_DROP;
- }
-
- skb_scrub_packet(skb, true);
- skb->protocol = eth_type_trans(skb, dev);
-
- return netif_rx_internal(skb);
+ return __dev_forward_skb(dev, skb) ?: netif_rx_internal(skb);
}
EXPORT_SYMBOL_GPL(dev_forward_skb);
@@ -2507,13 +2513,39 @@ static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features)
return 0;
}
+/* If MPLS offload request, verify we are testing hardware MPLS features
+ * instead of standard features for the netdev.
+ */
+#ifdef CONFIG_NET_MPLS_GSO
+static netdev_features_t net_mpls_features(struct sk_buff *skb,
+ netdev_features_t features,
+ __be16 type)
+{
+ if (type == htons(ETH_P_MPLS_UC) || type == htons(ETH_P_MPLS_MC))
+ features &= skb->dev->mpls_features;
+
+ return features;
+}
+#else
+static netdev_features_t net_mpls_features(struct sk_buff *skb,
+ netdev_features_t features,
+ __be16 type)
+{
+ return features;
+}
+#endif
+
static netdev_features_t harmonize_features(struct sk_buff *skb,
netdev_features_t features)
{
int tmp;
+ __be16 type;
+
+ type = skb_network_protocol(skb, &tmp);
+ features = net_mpls_features(skb, features, type);
if (skb->ip_summed != CHECKSUM_NONE &&
- !can_checksum_protocol(features, skb_network_protocol(skb, &tmp))) {
+ !can_checksum_protocol(features, type)) {
features &= ~NETIF_F_ALL_CSUM;
} else if (illegal_highdma(skb->dev, skb)) {
features &= ~NETIF_F_SG;
@@ -5689,10 +5721,6 @@ static void rollback_registered_many(struct list_head *head)
*/
call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
- if (!dev->rtnl_link_ops ||
- dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
- rtmsg_ifinfo(RTM_DELLINK, dev, ~0U, GFP_KERNEL);
-
/*
* Flush the unicast and multicast chains
*/
@@ -5702,6 +5730,10 @@ static void rollback_registered_many(struct list_head *head)
if (dev->netdev_ops->ndo_uninit)
dev->netdev_ops->ndo_uninit(dev);
+ if (!dev->rtnl_link_ops ||
+ dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
+ rtmsg_ifinfo(RTM_DELLINK, dev, ~0U, GFP_KERNEL);
+
/* Notifier chain MUST detach us all upper devices. */
WARN_ON(netdev_has_any_upper_dev(dev));
@@ -5927,10 +5959,7 @@ static void netdev_init_one_queue(struct net_device *dev,
static void netif_free_tx_queues(struct net_device *dev)
{
- if (is_vmalloc_addr(dev->_tx))
- vfree(dev->_tx);
- else
- kfree(dev->_tx);
+ kvfree(dev->_tx);
}
static int netif_alloc_netdev_queues(struct net_device *dev)
@@ -6404,10 +6433,7 @@ void netdev_freemem(struct net_device *dev)
{
char *addr = (char *)dev - dev->padded;
- if (is_vmalloc_addr(addr))
- vfree(addr);
- else
- kfree(addr);
+ kvfree(addr);
}
/**
@@ -6512,11 +6538,6 @@ free_all:
free_pcpu:
free_percpu(dev->pcpu_refcnt);
- netif_free_tx_queues(dev);
-#ifdef CONFIG_SYSFS
- kfree(dev->_rx);
-#endif
-
free_dev:
netdev_freemem(dev);
return NULL;
@@ -6613,6 +6634,9 @@ EXPORT_SYMBOL(unregister_netdevice_queue);
/**
* unregister_netdevice_many - unregister many devices
* @head: list of devices
+ *
+ * Note: As most callers use a stack allocated list_head,
+ * we force a list_del() to make sure stack wont be corrupted later.
*/
void unregister_netdevice_many(struct list_head *head)
{
@@ -6622,6 +6646,7 @@ void unregister_netdevice_many(struct list_head *head)
rollback_registered_many(head);
list_for_each_entry(dev, head, unreg_list)
net_set_todo(dev);
+ list_del(head);
}
}
EXPORT_SYMBOL(unregister_netdevice_many);
@@ -7077,7 +7102,6 @@ static void __net_exit default_device_exit_batch(struct list_head *net_list)
}
}
unregister_netdevice_many(&dev_kill_list);
- list_del(&dev_kill_list);
rtnl_unlock();
}
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index 329d579..b6b2306 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -225,6 +225,91 @@ void __hw_addr_unsync(struct netdev_hw_addr_list *to_list,
}
EXPORT_SYMBOL(__hw_addr_unsync);
+/**
+ * __hw_addr_sync_dev - Synchonize device's multicast list
+ * @list: address list to syncronize
+ * @dev: device to sync
+ * @sync: function to call if address should be added
+ * @unsync: function to call if address should be removed
+ *
+ * This funciton is intended to be called from the ndo_set_rx_mode
+ * function of devices that require explicit address add/remove
+ * notifications. The unsync function may be NULL in which case
+ * the addresses requiring removal will simply be removed without
+ * any notification to the device.
+ **/
+int __hw_addr_sync_dev(struct netdev_hw_addr_list *list,
+ struct net_device *dev,
+ int (*sync)(struct net_device *, const unsigned char *),
+ int (*unsync)(struct net_device *,
+ const unsigned char *))
+{
+ struct netdev_hw_addr *ha, *tmp;
+ int err;
+
+ /* first go through and flush out any stale entries */
+ list_for_each_entry_safe(ha, tmp, &list->list, list) {
+ if (!ha->sync_cnt || ha->refcount != 1)
+ continue;
+
+ /* if unsync is defined and fails defer unsyncing address */
+ if (unsync && unsync(dev, ha->addr))
+ continue;
+
+ ha->sync_cnt--;
+ __hw_addr_del_entry(list, ha, false, false);
+ }
+
+ /* go through and sync new entries to the list */
+ list_for_each_entry_safe(ha, tmp, &list->list, list) {
+ if (ha->sync_cnt)
+ continue;
+
+ err = sync(dev, ha->addr);
+ if (err)
+ return err;
+
+ ha->sync_cnt++;
+ ha->refcount++;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(__hw_addr_sync_dev);
+
+/**
+ * __hw_addr_unsync_dev - Remove synchonized addresses from device
+ * @list: address list to remove syncronized addresses from
+ * @dev: device to sync
+ * @unsync: function to call if address should be removed
+ *
+ * Remove all addresses that were added to the device by __hw_addr_sync_dev().
+ * This function is intended to be called from the ndo_stop or ndo_open
+ * functions on devices that require explicit address add/remove
+ * notifications. If the unsync function pointer is NULL then this function
+ * can be used to just reset the sync_cnt for the addresses in the list.
+ **/
+void __hw_addr_unsync_dev(struct netdev_hw_addr_list *list,
+ struct net_device *dev,
+ int (*unsync)(struct net_device *,
+ const unsigned char *))
+{
+ struct netdev_hw_addr *ha, *tmp;
+
+ list_for_each_entry_safe(ha, tmp, &list->list, list) {
+ if (!ha->sync_cnt)
+ continue;
+
+ /* if unsync is defined and fails defer unsyncing address */
+ if (unsync && unsync(dev, ha->addr))
+ continue;
+
+ ha->sync_cnt--;
+ __hw_addr_del_entry(list, ha, false, false);
+ }
+}
+EXPORT_SYMBOL(__hw_addr_unsync_dev);
+
static void __hw_addr_flush(struct netdev_hw_addr_list *list)
{
struct netdev_hw_addr *ha, *tmp;
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 640ba0e..17cb912 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -557,6 +557,23 @@ err_out:
return ret;
}
+static int ethtool_copy_validate_indir(u32 *indir, void __user *useraddr,
+ struct ethtool_rxnfc *rx_rings,
+ u32 size)
+{
+ int i;
+
+ if (copy_from_user(indir, useraddr, size * sizeof(indir[0])))
+ return -EFAULT;
+
+ /* Validate ring indices */
+ for (i = 0; i < size; i++)
+ if (indir[i] >= rx_rings->data)
+ return -EINVAL;
+
+ return 0;
+}
+
static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev,
void __user *useraddr)
{
@@ -565,7 +582,7 @@ static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev,
int ret;
if (!dev->ethtool_ops->get_rxfh_indir_size ||
- !dev->ethtool_ops->get_rxfh_indir)
+ !dev->ethtool_ops->get_rxfh)
return -EOPNOTSUPP;
dev_size = dev->ethtool_ops->get_rxfh_indir_size(dev);
if (dev_size == 0)
@@ -591,7 +608,7 @@ static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev,
if (!indir)
return -ENOMEM;
- ret = dev->ethtool_ops->get_rxfh_indir(dev, indir);
+ ret = dev->ethtool_ops->get_rxfh(dev, indir, NULL);
if (ret)
goto out;
@@ -613,8 +630,9 @@ static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev,
u32 *indir;
const struct ethtool_ops *ops = dev->ethtool_ops;
int ret;
+ u32 ringidx_offset = offsetof(struct ethtool_rxfh_indir, ring_index[0]);
- if (!ops->get_rxfh_indir_size || !ops->set_rxfh_indir ||
+ if (!ops->get_rxfh_indir_size || !ops->set_rxfh ||
!ops->get_rxnfc)
return -EOPNOTSUPP;
@@ -643,28 +661,184 @@ static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev,
for (i = 0; i < dev_size; i++)
indir[i] = ethtool_rxfh_indir_default(i, rx_rings.data);
} else {
- if (copy_from_user(indir,
- useraddr +
- offsetof(struct ethtool_rxfh_indir,
- ring_index[0]),
- dev_size * sizeof(indir[0]))) {
+ ret = ethtool_copy_validate_indir(indir,
+ useraddr + ringidx_offset,
+ &rx_rings,
+ dev_size);
+ if (ret)
+ goto out;
+ }
+
+ ret = ops->set_rxfh(dev, indir, NULL);
+
+out:
+ kfree(indir);
+ return ret;
+}
+
+static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev,
+ void __user *useraddr)
+{
+ int ret;
+ const struct ethtool_ops *ops = dev->ethtool_ops;
+ u32 user_indir_size, user_key_size;
+ u32 dev_indir_size = 0, dev_key_size = 0;
+ struct ethtool_rxfh rxfh;
+ u32 total_size;
+ u32 indir_bytes;
+ u32 *indir = NULL;
+ u8 *hkey = NULL;
+ u8 *rss_config;
+
+ if (!(dev->ethtool_ops->get_rxfh_indir_size ||
+ dev->ethtool_ops->get_rxfh_key_size) ||
+ !dev->ethtool_ops->get_rxfh)
+ return -EOPNOTSUPP;
+
+ if (ops->get_rxfh_indir_size)
+ dev_indir_size = ops->get_rxfh_indir_size(dev);
+ if (ops->get_rxfh_key_size)
+ dev_key_size = ops->get_rxfh_key_size(dev);
+
+ if ((dev_key_size + dev_indir_size) == 0)
+ return -EOPNOTSUPP;
+
+ if (copy_from_user(&rxfh, useraddr, sizeof(rxfh)))
+ return -EFAULT;
+ user_indir_size = rxfh.indir_size;
+ user_key_size = rxfh.key_size;
+
+ /* Check that reserved fields are 0 for now */
+ if (rxfh.rss_context || rxfh.rsvd[0] || rxfh.rsvd[1])
+ return -EINVAL;
+
+ rxfh.indir_size = dev_indir_size;
+ rxfh.key_size = dev_key_size;
+ if (copy_to_user(useraddr, &rxfh, sizeof(rxfh)))
+ return -EFAULT;
+
+ /* If the user buffer size is 0, this is just a query for the
+ * device table size and key size. Otherwise, if the User size is
+ * not equal to device table size or key size it's an error.
+ */
+ if (!user_indir_size && !user_key_size)
+ return 0;
+
+ if ((user_indir_size && (user_indir_size != dev_indir_size)) ||
+ (user_key_size && (user_key_size != dev_key_size)))
+ return -EINVAL;
+
+ indir_bytes = user_indir_size * sizeof(indir[0]);
+ total_size = indir_bytes + user_key_size;
+ rss_config = kzalloc(total_size, GFP_USER);
+ if (!rss_config)
+ return -ENOMEM;
+
+ if (user_indir_size)
+ indir = (u32 *)rss_config;
+
+ if (user_key_size)
+ hkey = rss_config + indir_bytes;
+
+ ret = dev->ethtool_ops->get_rxfh(dev, indir, hkey);
+ if (!ret) {
+ if (copy_to_user(useraddr +
+ offsetof(struct ethtool_rxfh, rss_config[0]),
+ rss_config, total_size))
ret = -EFAULT;
+ }
+
+ kfree(rss_config);
+
+ return ret;
+}
+
+static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
+ void __user *useraddr)
+{
+ int ret;
+ const struct ethtool_ops *ops = dev->ethtool_ops;
+ struct ethtool_rxnfc rx_rings;
+ struct ethtool_rxfh rxfh;
+ u32 dev_indir_size = 0, dev_key_size = 0, i;
+ u32 *indir = NULL, indir_bytes = 0;
+ u8 *hkey = NULL;
+ u8 *rss_config;
+ u32 rss_cfg_offset = offsetof(struct ethtool_rxfh, rss_config[0]);
+
+ if (!(ops->get_rxfh_indir_size || ops->get_rxfh_key_size) ||
+ !ops->get_rxnfc || !ops->set_rxfh)
+ return -EOPNOTSUPP;
+
+ if (ops->get_rxfh_indir_size)
+ dev_indir_size = ops->get_rxfh_indir_size(dev);
+ if (ops->get_rxfh_key_size)
+ dev_key_size = dev->ethtool_ops->get_rxfh_key_size(dev);
+ if ((dev_key_size + dev_indir_size) == 0)
+ return -EOPNOTSUPP;
+
+ if (copy_from_user(&rxfh, useraddr, sizeof(rxfh)))
+ return -EFAULT;
+
+ /* Check that reserved fields are 0 for now */
+ if (rxfh.rss_context || rxfh.rsvd[0] || rxfh.rsvd[1])
+ return -EINVAL;
+
+ /* If either indir or hash key is valid, proceed further.
+ * It is not valid to request that both be unchanged.
+ */
+ if ((rxfh.indir_size &&
+ rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE &&
+ rxfh.indir_size != dev_indir_size) ||
+ (rxfh.key_size && (rxfh.key_size != dev_key_size)) ||
+ (rxfh.indir_size == ETH_RXFH_INDIR_NO_CHANGE &&
+ rxfh.key_size == 0))
+ return -EINVAL;
+
+ if (rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE)
+ indir_bytes = dev_indir_size * sizeof(indir[0]);
+
+ rss_config = kzalloc(indir_bytes + rxfh.key_size, GFP_USER);
+ if (!rss_config)
+ return -ENOMEM;
+
+ rx_rings.cmd = ETHTOOL_GRXRINGS;
+ ret = ops->get_rxnfc(dev, &rx_rings, NULL);
+ if (ret)
+ goto out;
+
+ /* rxfh.indir_size == 0 means reset the indir table to default.
+ * rxfh.indir_size == ETH_RXFH_INDIR_NO_CHANGE means leave it unchanged.
+ */
+ if (rxfh.indir_size &&
+ rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE) {
+ indir = (u32 *)rss_config;
+ ret = ethtool_copy_validate_indir(indir,
+ useraddr + rss_cfg_offset,
+ &rx_rings,
+ rxfh.indir_size);
+ if (ret)
goto out;
- }
+ } else if (rxfh.indir_size == 0) {
+ indir = (u32 *)rss_config;
+ for (i = 0; i < dev_indir_size; i++)
+ indir[i] = ethtool_rxfh_indir_default(i, rx_rings.data);
+ }
- /* Validate ring indices */
- for (i = 0; i < dev_size; i++) {
- if (indir[i] >= rx_rings.data) {
- ret = -EINVAL;
- goto out;
- }
+ if (rxfh.key_size) {
+ hkey = rss_config + indir_bytes;
+ if (copy_from_user(hkey,
+ useraddr + rss_cfg_offset + indir_bytes,
+ rxfh.key_size)) {
+ ret = -EFAULT;
+ goto out;
}
}
- ret = ops->set_rxfh_indir(dev, indir);
+ ret = ops->set_rxfh(dev, indir, hkey);
out:
- kfree(indir);
+ kfree(rss_config);
return ret;
}
@@ -1491,6 +1665,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
case ETHTOOL_GRXCLSRULE:
case ETHTOOL_GRXCLSRLALL:
case ETHTOOL_GRXFHINDIR:
+ case ETHTOOL_GRSSH:
case ETHTOOL_GFEATURES:
case ETHTOOL_GCHANNELS:
case ETHTOOL_GET_TS_INFO:
@@ -1628,6 +1803,12 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
case ETHTOOL_SRXFHINDIR:
rc = ethtool_set_rxfh_indir(dev, useraddr);
break;
+ case ETHTOOL_GRSSH:
+ rc = ethtool_get_rxfh(dev, useraddr);
+ break;
+ case ETHTOOL_SRSSH:
+ rc = ethtool_set_rxfh(dev, useraddr);
+ break;
case ETHTOOL_GFEATURES:
rc = ethtool_get_features(dev, useraddr);
break;
diff --git a/net/core/filter.c b/net/core/filter.c
index 4aec7b9..735fad8 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -45,6 +45,27 @@
#include <linux/seccomp.h>
#include <linux/if_vlan.h>
+/* Registers */
+#define BPF_R0 regs[BPF_REG_0]
+#define BPF_R1 regs[BPF_REG_1]
+#define BPF_R2 regs[BPF_REG_2]
+#define BPF_R3 regs[BPF_REG_3]
+#define BPF_R4 regs[BPF_REG_4]
+#define BPF_R5 regs[BPF_REG_5]
+#define BPF_R6 regs[BPF_REG_6]
+#define BPF_R7 regs[BPF_REG_7]
+#define BPF_R8 regs[BPF_REG_8]
+#define BPF_R9 regs[BPF_REG_9]
+#define BPF_R10 regs[BPF_REG_10]
+
+/* Named registers */
+#define DST regs[insn->dst_reg]
+#define SRC regs[insn->src_reg]
+#define FP regs[BPF_REG_FP]
+#define ARG1 regs[BPF_REG_ARG1]
+#define CTX regs[BPF_REG_CTX]
+#define IMM insn->imm
+
/* No hurry in this branch
*
* Exported for the bpf jit load helper.
@@ -57,9 +78,9 @@ void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, uns
ptr = skb_network_header(skb) + k - SKF_NET_OFF;
else if (k >= SKF_LL_OFF)
ptr = skb_mac_header(skb) + k - SKF_LL_OFF;
-
if (ptr >= skb->head && ptr + size <= skb_tail_pointer(skb))
return ptr;
+
return NULL;
}
@@ -68,6 +89,7 @@ static inline void *load_pointer(const struct sk_buff *skb, int k,
{
if (k >= 0)
return skb_header_pointer(skb, k, size, buffer);
+
return bpf_internal_load_pointer_neg_helper(skb, k, size);
}
@@ -122,13 +144,6 @@ noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
return 0;
}
-/* Register mappings for user programs. */
-#define A_REG 0
-#define X_REG 7
-#define TMP_REG 8
-#define ARG2_REG 2
-#define ARG3_REG 3
-
/**
* __sk_run_filter - run a filter on a given context
* @ctx: buffer to run the filter on
@@ -138,447 +153,442 @@ noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
* keep, 0 for none. @ctx is the data we are operating on, @insn is the
* array of filter instructions.
*/
-unsigned int __sk_run_filter(void *ctx, const struct sock_filter_int *insn)
+static unsigned int __sk_run_filter(void *ctx, const struct sock_filter_int *insn)
{
u64 stack[MAX_BPF_STACK / sizeof(u64)];
u64 regs[MAX_BPF_REG], tmp;
- void *ptr;
- int off;
-
-#define K insn->imm
-#define A regs[insn->a_reg]
-#define X regs[insn->x_reg]
-#define R0 regs[0]
-
-#define CONT ({insn++; goto select_insn; })
-#define CONT_JMP ({insn++; goto select_insn; })
-
static const void *jumptable[256] = {
[0 ... 255] = &&default_label,
/* Now overwrite non-defaults ... */
-#define DL(A, B, C) [A|B|C] = &&A##_##B##_##C
- DL(BPF_ALU, BPF_ADD, BPF_X),
- DL(BPF_ALU, BPF_ADD, BPF_K),
- DL(BPF_ALU, BPF_SUB, BPF_X),
- DL(BPF_ALU, BPF_SUB, BPF_K),
- DL(BPF_ALU, BPF_AND, BPF_X),
- DL(BPF_ALU, BPF_AND, BPF_K),
- DL(BPF_ALU, BPF_OR, BPF_X),
- DL(BPF_ALU, BPF_OR, BPF_K),
- DL(BPF_ALU, BPF_LSH, BPF_X),
- DL(BPF_ALU, BPF_LSH, BPF_K),
- DL(BPF_ALU, BPF_RSH, BPF_X),
- DL(BPF_ALU, BPF_RSH, BPF_K),
- DL(BPF_ALU, BPF_XOR, BPF_X),
- DL(BPF_ALU, BPF_XOR, BPF_K),
- DL(BPF_ALU, BPF_MUL, BPF_X),
- DL(BPF_ALU, BPF_MUL, BPF_K),
- DL(BPF_ALU, BPF_MOV, BPF_X),
- DL(BPF_ALU, BPF_MOV, BPF_K),
- DL(BPF_ALU, BPF_DIV, BPF_X),
- DL(BPF_ALU, BPF_DIV, BPF_K),
- DL(BPF_ALU, BPF_MOD, BPF_X),
- DL(BPF_ALU, BPF_MOD, BPF_K),
- DL(BPF_ALU, BPF_NEG, 0),
- DL(BPF_ALU, BPF_END, BPF_TO_BE),
- DL(BPF_ALU, BPF_END, BPF_TO_LE),
- DL(BPF_ALU64, BPF_ADD, BPF_X),
- DL(BPF_ALU64, BPF_ADD, BPF_K),
- DL(BPF_ALU64, BPF_SUB, BPF_X),
- DL(BPF_ALU64, BPF_SUB, BPF_K),
- DL(BPF_ALU64, BPF_AND, BPF_X),
- DL(BPF_ALU64, BPF_AND, BPF_K),
- DL(BPF_ALU64, BPF_OR, BPF_X),
- DL(BPF_ALU64, BPF_OR, BPF_K),
- DL(BPF_ALU64, BPF_LSH, BPF_X),
- DL(BPF_ALU64, BPF_LSH, BPF_K),
- DL(BPF_ALU64, BPF_RSH, BPF_X),
- DL(BPF_ALU64, BPF_RSH, BPF_K),
- DL(BPF_ALU64, BPF_XOR, BPF_X),
- DL(BPF_ALU64, BPF_XOR, BPF_K),
- DL(BPF_ALU64, BPF_MUL, BPF_X),
- DL(BPF_ALU64, BPF_MUL, BPF_K),
- DL(BPF_ALU64, BPF_MOV, BPF_X),
- DL(BPF_ALU64, BPF_MOV, BPF_K),
- DL(BPF_ALU64, BPF_ARSH, BPF_X),
- DL(BPF_ALU64, BPF_ARSH, BPF_K),
- DL(BPF_ALU64, BPF_DIV, BPF_X),
- DL(BPF_ALU64, BPF_DIV, BPF_K),
- DL(BPF_ALU64, BPF_MOD, BPF_X),
- DL(BPF_ALU64, BPF_MOD, BPF_K),
- DL(BPF_ALU64, BPF_NEG, 0),
- DL(BPF_JMP, BPF_CALL, 0),
- DL(BPF_JMP, BPF_JA, 0),
- DL(BPF_JMP, BPF_JEQ, BPF_X),
- DL(BPF_JMP, BPF_JEQ, BPF_K),
- DL(BPF_JMP, BPF_JNE, BPF_X),
- DL(BPF_JMP, BPF_JNE, BPF_K),
- DL(BPF_JMP, BPF_JGT, BPF_X),
- DL(BPF_JMP, BPF_JGT, BPF_K),
- DL(BPF_JMP, BPF_JGE, BPF_X),
- DL(BPF_JMP, BPF_JGE, BPF_K),
- DL(BPF_JMP, BPF_JSGT, BPF_X),
- DL(BPF_JMP, BPF_JSGT, BPF_K),
- DL(BPF_JMP, BPF_JSGE, BPF_X),
- DL(BPF_JMP, BPF_JSGE, BPF_K),
- DL(BPF_JMP, BPF_JSET, BPF_X),
- DL(BPF_JMP, BPF_JSET, BPF_K),
- DL(BPF_JMP, BPF_EXIT, 0),
- DL(BPF_STX, BPF_MEM, BPF_B),
- DL(BPF_STX, BPF_MEM, BPF_H),
- DL(BPF_STX, BPF_MEM, BPF_W),
- DL(BPF_STX, BPF_MEM, BPF_DW),
- DL(BPF_STX, BPF_XADD, BPF_W),
- DL(BPF_STX, BPF_XADD, BPF_DW),
- DL(BPF_ST, BPF_MEM, BPF_B),
- DL(BPF_ST, BPF_MEM, BPF_H),
- DL(BPF_ST, BPF_MEM, BPF_W),
- DL(BPF_ST, BPF_MEM, BPF_DW),
- DL(BPF_LDX, BPF_MEM, BPF_B),
- DL(BPF_LDX, BPF_MEM, BPF_H),
- DL(BPF_LDX, BPF_MEM, BPF_W),
- DL(BPF_LDX, BPF_MEM, BPF_DW),
- DL(BPF_LD, BPF_ABS, BPF_W),
- DL(BPF_LD, BPF_ABS, BPF_H),
- DL(BPF_LD, BPF_ABS, BPF_B),
- DL(BPF_LD, BPF_IND, BPF_W),
- DL(BPF_LD, BPF_IND, BPF_H),
- DL(BPF_LD, BPF_IND, BPF_B),
-#undef DL
+ /* 32 bit ALU operations */
+ [BPF_ALU | BPF_ADD | BPF_X] = &&ALU_ADD_X,
+ [BPF_ALU | BPF_ADD | BPF_K] = &&ALU_ADD_K,
+ [BPF_ALU | BPF_SUB | BPF_X] = &&ALU_SUB_X,
+ [BPF_ALU | BPF_SUB | BPF_K] = &&ALU_SUB_K,
+ [BPF_ALU | BPF_AND | BPF_X] = &&ALU_AND_X,
+ [BPF_ALU | BPF_AND | BPF_K] = &&ALU_AND_K,
+ [BPF_ALU | BPF_OR | BPF_X] = &&ALU_OR_X,
+ [BPF_ALU | BPF_OR | BPF_K] = &&ALU_OR_K,
+ [BPF_ALU | BPF_LSH | BPF_X] = &&ALU_LSH_X,
+ [BPF_ALU | BPF_LSH | BPF_K] = &&ALU_LSH_K,
+ [BPF_ALU | BPF_RSH | BPF_X] = &&ALU_RSH_X,
+ [BPF_ALU | BPF_RSH | BPF_K] = &&ALU_RSH_K,
+ [BPF_ALU | BPF_XOR | BPF_X] = &&ALU_XOR_X,
+ [BPF_ALU | BPF_XOR | BPF_K] = &&ALU_XOR_K,
+ [BPF_ALU | BPF_MUL | BPF_X] = &&ALU_MUL_X,
+ [BPF_ALU | BPF_MUL | BPF_K] = &&ALU_MUL_K,
+ [BPF_ALU | BPF_MOV | BPF_X] = &&ALU_MOV_X,
+ [BPF_ALU | BPF_MOV | BPF_K] = &&ALU_MOV_K,
+ [BPF_ALU | BPF_DIV | BPF_X] = &&ALU_DIV_X,
+ [BPF_ALU | BPF_DIV | BPF_K] = &&ALU_DIV_K,
+ [BPF_ALU | BPF_MOD | BPF_X] = &&ALU_MOD_X,
+ [BPF_ALU | BPF_MOD | BPF_K] = &&ALU_MOD_K,
+ [BPF_ALU | BPF_NEG] = &&ALU_NEG,
+ [BPF_ALU | BPF_END | BPF_TO_BE] = &&ALU_END_TO_BE,
+ [BPF_ALU | BPF_END | BPF_TO_LE] = &&ALU_END_TO_LE,
+ /* 64 bit ALU operations */
+ [BPF_ALU64 | BPF_ADD | BPF_X] = &&ALU64_ADD_X,
+ [BPF_ALU64 | BPF_ADD | BPF_K] = &&ALU64_ADD_K,
+ [BPF_ALU64 | BPF_SUB | BPF_X] = &&ALU64_SUB_X,
+ [BPF_ALU64 | BPF_SUB | BPF_K] = &&ALU64_SUB_K,
+ [BPF_ALU64 | BPF_AND | BPF_X] = &&ALU64_AND_X,
+ [BPF_ALU64 | BPF_AND | BPF_K] = &&ALU64_AND_K,
+ [BPF_ALU64 | BPF_OR | BPF_X] = &&ALU64_OR_X,
+ [BPF_ALU64 | BPF_OR | BPF_K] = &&ALU64_OR_K,
+ [BPF_ALU64 | BPF_LSH | BPF_X] = &&ALU64_LSH_X,
+ [BPF_ALU64 | BPF_LSH | BPF_K] = &&ALU64_LSH_K,
+ [BPF_ALU64 | BPF_RSH | BPF_X] = &&ALU64_RSH_X,
+ [BPF_ALU64 | BPF_RSH | BPF_K] = &&ALU64_RSH_K,
+ [BPF_ALU64 | BPF_XOR | BPF_X] = &&ALU64_XOR_X,
+ [BPF_ALU64 | BPF_XOR | BPF_K] = &&ALU64_XOR_K,
+ [BPF_ALU64 | BPF_MUL | BPF_X] = &&ALU64_MUL_X,
+ [BPF_ALU64 | BPF_MUL | BPF_K] = &&ALU64_MUL_K,
+ [BPF_ALU64 | BPF_MOV | BPF_X] = &&ALU64_MOV_X,
+ [BPF_ALU64 | BPF_MOV | BPF_K] = &&ALU64_MOV_K,
+ [BPF_ALU64 | BPF_ARSH | BPF_X] = &&ALU64_ARSH_X,
+ [BPF_ALU64 | BPF_ARSH | BPF_K] = &&ALU64_ARSH_K,
+ [BPF_ALU64 | BPF_DIV | BPF_X] = &&ALU64_DIV_X,
+ [BPF_ALU64 | BPF_DIV | BPF_K] = &&ALU64_DIV_K,
+ [BPF_ALU64 | BPF_MOD | BPF_X] = &&ALU64_MOD_X,
+ [BPF_ALU64 | BPF_MOD | BPF_K] = &&ALU64_MOD_K,
+ [BPF_ALU64 | BPF_NEG] = &&ALU64_NEG,
+ /* Call instruction */
+ [BPF_JMP | BPF_CALL] = &&JMP_CALL,
+ /* Jumps */
+ [BPF_JMP | BPF_JA] = &&JMP_JA,
+ [BPF_JMP | BPF_JEQ | BPF_X] = &&JMP_JEQ_X,
+ [BPF_JMP | BPF_JEQ | BPF_K] = &&JMP_JEQ_K,
+ [BPF_JMP | BPF_JNE | BPF_X] = &&JMP_JNE_X,
+ [BPF_JMP | BPF_JNE | BPF_K] = &&JMP_JNE_K,
+ [BPF_JMP | BPF_JGT | BPF_X] = &&JMP_JGT_X,
+ [BPF_JMP | BPF_JGT | BPF_K] = &&JMP_JGT_K,
+ [BPF_JMP | BPF_JGE | BPF_X] = &&JMP_JGE_X,
+ [BPF_JMP | BPF_JGE | BPF_K] = &&JMP_JGE_K,
+ [BPF_JMP | BPF_JSGT | BPF_X] = &&JMP_JSGT_X,
+ [BPF_JMP | BPF_JSGT | BPF_K] = &&JMP_JSGT_K,
+ [BPF_JMP | BPF_JSGE | BPF_X] = &&JMP_JSGE_X,
+ [BPF_JMP | BPF_JSGE | BPF_K] = &&JMP_JSGE_K,
+ [BPF_JMP | BPF_JSET | BPF_X] = &&JMP_JSET_X,
+ [BPF_JMP | BPF_JSET | BPF_K] = &&JMP_JSET_K,
+ /* Program return */
+ [BPF_JMP | BPF_EXIT] = &&JMP_EXIT,
+ /* Store instructions */
+ [BPF_STX | BPF_MEM | BPF_B] = &&STX_MEM_B,
+ [BPF_STX | BPF_MEM | BPF_H] = &&STX_MEM_H,
+ [BPF_STX | BPF_MEM | BPF_W] = &&STX_MEM_W,
+ [BPF_STX | BPF_MEM | BPF_DW] = &&STX_MEM_DW,
+ [BPF_STX | BPF_XADD | BPF_W] = &&STX_XADD_W,
+ [BPF_STX | BPF_XADD | BPF_DW] = &&STX_XADD_DW,
+ [BPF_ST | BPF_MEM | BPF_B] = &&ST_MEM_B,
+ [BPF_ST | BPF_MEM | BPF_H] = &&ST_MEM_H,
+ [BPF_ST | BPF_MEM | BPF_W] = &&ST_MEM_W,
+ [BPF_ST | BPF_MEM | BPF_DW] = &&ST_MEM_DW,
+ /* Load instructions */
+ [BPF_LDX | BPF_MEM | BPF_B] = &&LDX_MEM_B,
+ [BPF_LDX | BPF_MEM | BPF_H] = &&LDX_MEM_H,
+ [BPF_LDX | BPF_MEM | BPF_W] = &&LDX_MEM_W,
+ [BPF_LDX | BPF_MEM | BPF_DW] = &&LDX_MEM_DW,
+ [BPF_LD | BPF_ABS | BPF_W] = &&LD_ABS_W,
+ [BPF_LD | BPF_ABS | BPF_H] = &&LD_ABS_H,
+ [BPF_LD | BPF_ABS | BPF_B] = &&LD_ABS_B,
+ [BPF_LD | BPF_IND | BPF_W] = &&LD_IND_W,
+ [BPF_LD | BPF_IND | BPF_H] = &&LD_IND_H,
+ [BPF_LD | BPF_IND | BPF_B] = &&LD_IND_B,
};
+ void *ptr;
+ int off;
+
+#define CONT ({ insn++; goto select_insn; })
+#define CONT_JMP ({ insn++; goto select_insn; })
- regs[FP_REG] = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)];
- regs[ARG1_REG] = (u64) (unsigned long) ctx;
- regs[A_REG] = 0;
- regs[X_REG] = 0;
+ FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)];
+ ARG1 = (u64) (unsigned long) ctx;
+
+ /* Registers used in classic BPF programs need to be reset first. */
+ regs[BPF_REG_A] = 0;
+ regs[BPF_REG_X] = 0;
select_insn:
goto *jumptable[insn->code];
/* ALU */
#define ALU(OPCODE, OP) \
- BPF_ALU64_##OPCODE##_BPF_X: \
- A = A OP X; \
+ ALU64_##OPCODE##_X: \
+ DST = DST OP SRC; \
CONT; \
- BPF_ALU_##OPCODE##_BPF_X: \
- A = (u32) A OP (u32) X; \
+ ALU_##OPCODE##_X: \
+ DST = (u32) DST OP (u32) SRC; \
CONT; \
- BPF_ALU64_##OPCODE##_BPF_K: \
- A = A OP K; \
+ ALU64_##OPCODE##_K: \
+ DST = DST OP IMM; \
CONT; \
- BPF_ALU_##OPCODE##_BPF_K: \
- A = (u32) A OP (u32) K; \
+ ALU_##OPCODE##_K: \
+ DST = (u32) DST OP (u32) IMM; \
CONT;
- ALU(BPF_ADD, +)
- ALU(BPF_SUB, -)
- ALU(BPF_AND, &)
- ALU(BPF_OR, |)
- ALU(BPF_LSH, <<)
- ALU(BPF_RSH, >>)
- ALU(BPF_XOR, ^)
- ALU(BPF_MUL, *)
+ ALU(ADD, +)
+ ALU(SUB, -)
+ ALU(AND, &)
+ ALU(OR, |)
+ ALU(LSH, <<)
+ ALU(RSH, >>)
+ ALU(XOR, ^)
+ ALU(MUL, *)
#undef ALU
- BPF_ALU_BPF_NEG_0:
- A = (u32) -A;
+ ALU_NEG:
+ DST = (u32) -DST;
CONT;
- BPF_ALU64_BPF_NEG_0:
- A = -A;
+ ALU64_NEG:
+ DST = -DST;
CONT;
- BPF_ALU_BPF_MOV_BPF_X:
- A = (u32) X;
+ ALU_MOV_X:
+ DST = (u32) SRC;
CONT;
- BPF_ALU_BPF_MOV_BPF_K:
- A = (u32) K;
+ ALU_MOV_K:
+ DST = (u32) IMM;
CONT;
- BPF_ALU64_BPF_MOV_BPF_X:
- A = X;
+ ALU64_MOV_X:
+ DST = SRC;
CONT;
- BPF_ALU64_BPF_MOV_BPF_K:
- A = K;
+ ALU64_MOV_K:
+ DST = IMM;
CONT;
- BPF_ALU64_BPF_ARSH_BPF_X:
- (*(s64 *) &A) >>= X;
+ ALU64_ARSH_X:
+ (*(s64 *) &DST) >>= SRC;
CONT;
- BPF_ALU64_BPF_ARSH_BPF_K:
- (*(s64 *) &A) >>= K;
+ ALU64_ARSH_K:
+ (*(s64 *) &DST) >>= IMM;
CONT;
- BPF_ALU64_BPF_MOD_BPF_X:
- if (unlikely(X == 0))
+ ALU64_MOD_X:
+ if (unlikely(SRC == 0))
return 0;
- tmp = A;
- A = do_div(tmp, X);
+ tmp = DST;
+ DST = do_div(tmp, SRC);
CONT;
- BPF_ALU_BPF_MOD_BPF_X:
- if (unlikely(X == 0))
+ ALU_MOD_X:
+ if (unlikely(SRC == 0))
return 0;
- tmp = (u32) A;
- A = do_div(tmp, (u32) X);
+ tmp = (u32) DST;
+ DST = do_div(tmp, (u32) SRC);
CONT;
- BPF_ALU64_BPF_MOD_BPF_K:
- tmp = A;
- A = do_div(tmp, K);
+ ALU64_MOD_K:
+ tmp = DST;
+ DST = do_div(tmp, IMM);
CONT;
- BPF_ALU_BPF_MOD_BPF_K:
- tmp = (u32) A;
- A = do_div(tmp, (u32) K);
+ ALU_MOD_K:
+ tmp = (u32) DST;
+ DST = do_div(tmp, (u32) IMM);
CONT;
- BPF_ALU64_BPF_DIV_BPF_X:
- if (unlikely(X == 0))
+ ALU64_DIV_X:
+ if (unlikely(SRC == 0))
return 0;
- do_div(A, X);
+ do_div(DST, SRC);
CONT;
- BPF_ALU_BPF_DIV_BPF_X:
- if (unlikely(X == 0))
+ ALU_DIV_X:
+ if (unlikely(SRC == 0))
return 0;
- tmp = (u32) A;
- do_div(tmp, (u32) X);
- A = (u32) tmp;
+ tmp = (u32) DST;
+ do_div(tmp, (u32) SRC);
+ DST = (u32) tmp;
CONT;
- BPF_ALU64_BPF_DIV_BPF_K:
- do_div(A, K);
+ ALU64_DIV_K:
+ do_div(DST, IMM);
CONT;
- BPF_ALU_BPF_DIV_BPF_K:
- tmp = (u32) A;
- do_div(tmp, (u32) K);
- A = (u32) tmp;
+ ALU_DIV_K:
+ tmp = (u32) DST;
+ do_div(tmp, (u32) IMM);
+ DST = (u32) tmp;
CONT;
- BPF_ALU_BPF_END_BPF_TO_BE:
- switch (K) {
+ ALU_END_TO_BE:
+ switch (IMM) {
case 16:
- A = (__force u16) cpu_to_be16(A);
+ DST = (__force u16) cpu_to_be16(DST);
break;
case 32:
- A = (__force u32) cpu_to_be32(A);
+ DST = (__force u32) cpu_to_be32(DST);
break;
case 64:
- A = (__force u64) cpu_to_be64(A);
+ DST = (__force u64) cpu_to_be64(DST);
break;
}
CONT;
- BPF_ALU_BPF_END_BPF_TO_LE:
- switch (K) {
+ ALU_END_TO_LE:
+ switch (IMM) {
case 16:
- A = (__force u16) cpu_to_le16(A);
+ DST = (__force u16) cpu_to_le16(DST);
break;
case 32:
- A = (__force u32) cpu_to_le32(A);
+ DST = (__force u32) cpu_to_le32(DST);
break;
case 64:
- A = (__force u64) cpu_to_le64(A);
+ DST = (__force u64) cpu_to_le64(DST);
break;
}
CONT;
/* CALL */
- BPF_JMP_BPF_CALL_0:
- /* Function call scratches R1-R5 registers, preserves R6-R9,
- * and stores return value into R0.
+ JMP_CALL:
+ /* Function call scratches BPF_R1-BPF_R5 registers,
+ * preserves BPF_R6-BPF_R9, and stores return value
+ * into BPF_R0.
*/
- R0 = (__bpf_call_base + insn->imm)(regs[1], regs[2], regs[3],
- regs[4], regs[5]);
+ BPF_R0 = (__bpf_call_base + insn->imm)(BPF_R1, BPF_R2, BPF_R3,
+ BPF_R4, BPF_R5);
CONT;
/* JMP */
- BPF_JMP_BPF_JA_0:
+ JMP_JA:
insn += insn->off;
CONT;
- BPF_JMP_BPF_JEQ_BPF_X:
- if (A == X) {
+ JMP_JEQ_X:
+ if (DST == SRC) {
insn += insn->off;
CONT_JMP;
}
CONT;
- BPF_JMP_BPF_JEQ_BPF_K:
- if (A == K) {
+ JMP_JEQ_K:
+ if (DST == IMM) {
insn += insn->off;
CONT_JMP;
}
CONT;
- BPF_JMP_BPF_JNE_BPF_X:
- if (A != X) {
+ JMP_JNE_X:
+ if (DST != SRC) {
insn += insn->off;
CONT_JMP;
}
CONT;
- BPF_JMP_BPF_JNE_BPF_K:
- if (A != K) {
+ JMP_JNE_K:
+ if (DST != IMM) {
insn += insn->off;
CONT_JMP;
}
CONT;
- BPF_JMP_BPF_JGT_BPF_X:
- if (A > X) {
+ JMP_JGT_X:
+ if (DST > SRC) {
insn += insn->off;
CONT_JMP;
}
CONT;
- BPF_JMP_BPF_JGT_BPF_K:
- if (A > K) {
+ JMP_JGT_K:
+ if (DST > IMM) {
insn += insn->off;
CONT_JMP;
}
CONT;
- BPF_JMP_BPF_JGE_BPF_X:
- if (A >= X) {
+ JMP_JGE_X:
+ if (DST >= SRC) {
insn += insn->off;
CONT_JMP;
}
CONT;
- BPF_JMP_BPF_JGE_BPF_K:
- if (A >= K) {
+ JMP_JGE_K:
+ if (DST >= IMM) {
insn += insn->off;
CONT_JMP;
}
CONT;
- BPF_JMP_BPF_JSGT_BPF_X:
- if (((s64)A) > ((s64)X)) {
+ JMP_JSGT_X:
+ if (((s64) DST) > ((s64) SRC)) {
insn += insn->off;
CONT_JMP;
}
CONT;
- BPF_JMP_BPF_JSGT_BPF_K:
- if (((s64)A) > ((s64)K)) {
+ JMP_JSGT_K:
+ if (((s64) DST) > ((s64) IMM)) {
insn += insn->off;
CONT_JMP;
}
CONT;
- BPF_JMP_BPF_JSGE_BPF_X:
- if (((s64)A) >= ((s64)X)) {
+ JMP_JSGE_X:
+ if (((s64) DST) >= ((s64) SRC)) {
insn += insn->off;
CONT_JMP;
}
CONT;
- BPF_JMP_BPF_JSGE_BPF_K:
- if (((s64)A) >= ((s64)K)) {
+ JMP_JSGE_K:
+ if (((s64) DST) >= ((s64) IMM)) {
insn += insn->off;
CONT_JMP;
}
CONT;
- BPF_JMP_BPF_JSET_BPF_X:
- if (A & X) {
+ JMP_JSET_X:
+ if (DST & SRC) {
insn += insn->off;
CONT_JMP;
}
CONT;
- BPF_JMP_BPF_JSET_BPF_K:
- if (A & K) {
+ JMP_JSET_K:
+ if (DST & IMM) {
insn += insn->off;
CONT_JMP;
}
CONT;
- BPF_JMP_BPF_EXIT_0:
- return R0;
+ JMP_EXIT:
+ return BPF_R0;
/* STX and ST and LDX*/
-#define LDST(SIZEOP, SIZE) \
- BPF_STX_BPF_MEM_##SIZEOP: \
- *(SIZE *)(unsigned long) (A + insn->off) = X; \
- CONT; \
- BPF_ST_BPF_MEM_##SIZEOP: \
- *(SIZE *)(unsigned long) (A + insn->off) = K; \
- CONT; \
- BPF_LDX_BPF_MEM_##SIZEOP: \
- A = *(SIZE *)(unsigned long) (X + insn->off); \
+#define LDST(SIZEOP, SIZE) \
+ STX_MEM_##SIZEOP: \
+ *(SIZE *)(unsigned long) (DST + insn->off) = SRC; \
+ CONT; \
+ ST_MEM_##SIZEOP: \
+ *(SIZE *)(unsigned long) (DST + insn->off) = IMM; \
+ CONT; \
+ LDX_MEM_##SIZEOP: \
+ DST = *(SIZE *)(unsigned long) (SRC + insn->off); \
CONT;
- LDST(BPF_B, u8)
- LDST(BPF_H, u16)
- LDST(BPF_W, u32)
- LDST(BPF_DW, u64)
+ LDST(B, u8)
+ LDST(H, u16)
+ LDST(W, u32)
+ LDST(DW, u64)
#undef LDST
- BPF_STX_BPF_XADD_BPF_W: /* lock xadd *(u32 *)(A + insn->off) += X */
- atomic_add((u32) X, (atomic_t *)(unsigned long)
- (A + insn->off));
+ STX_XADD_W: /* lock xadd *(u32 *)(dst_reg + off16) += src_reg */
+ atomic_add((u32) SRC, (atomic_t *)(unsigned long)
+ (DST + insn->off));
CONT;
- BPF_STX_BPF_XADD_BPF_DW: /* lock xadd *(u64 *)(A + insn->off) += X */
- atomic64_add((u64) X, (atomic64_t *)(unsigned long)
- (A + insn->off));
+ STX_XADD_DW: /* lock xadd *(u64 *)(dst_reg + off16) += src_reg */
+ atomic64_add((u64) SRC, (atomic64_t *)(unsigned long)
+ (DST + insn->off));
CONT;
- BPF_LD_BPF_ABS_BPF_W: /* R0 = ntohl(*(u32 *) (skb->data + K)) */
- off = K;
+ LD_ABS_W: /* BPF_R0 = ntohl(*(u32 *) (skb->data + imm32)) */
+ off = IMM;
load_word:
- /* BPF_LD + BPD_ABS and BPF_LD + BPF_IND insns are only
- * appearing in the programs where ctx == skb. All programs
- * keep 'ctx' in regs[CTX_REG] == R6, sk_convert_filter()
- * saves it in R6, internal BPF verifier will check that
- * R6 == ctx.
+ /* BPF_LD + BPD_ABS and BPF_LD + BPF_IND insns are
+ * only appearing in the programs where ctx ==
+ * skb. All programs keep 'ctx' in regs[BPF_REG_CTX]
+ * == BPF_R6, sk_convert_filter() saves it in BPF_R6,
+ * internal BPF verifier will check that BPF_R6 ==
+ * ctx.
*
- * BPF_ABS and BPF_IND are wrappers of function calls, so
- * they scratch R1-R5 registers, preserve R6-R9, and store
- * return value into R0.
+ * BPF_ABS and BPF_IND are wrappers of function calls,
+ * so they scratch BPF_R1-BPF_R5 registers, preserve
+ * BPF_R6-BPF_R9, and store return value into BPF_R0.
*
* Implicit input:
- * ctx
+ * ctx == skb == BPF_R6 == CTX
*
* Explicit input:
- * X == any register
- * K == 32-bit immediate
+ * SRC == any register
+ * IMM == 32-bit immediate
*
* Output:
- * R0 - 8/16/32-bit skb data converted to cpu endianness
+ * BPF_R0 - 8/16/32-bit skb data converted to cpu endianness
*/
- ptr = load_pointer((struct sk_buff *) ctx, off, 4, &tmp);
+
+ ptr = load_pointer((struct sk_buff *) (unsigned long) CTX, off, 4, &tmp);
if (likely(ptr != NULL)) {
- R0 = get_unaligned_be32(ptr);
+ BPF_R0 = get_unaligned_be32(ptr);
CONT;
}
+
return 0;
- BPF_LD_BPF_ABS_BPF_H: /* R0 = ntohs(*(u16 *) (skb->data + K)) */
- off = K;
+ LD_ABS_H: /* BPF_R0 = ntohs(*(u16 *) (skb->data + imm32)) */
+ off = IMM;
load_half:
- ptr = load_pointer((struct sk_buff *) ctx, off, 2, &tmp);
+ ptr = load_pointer((struct sk_buff *) (unsigned long) CTX, off, 2, &tmp);
if (likely(ptr != NULL)) {
- R0 = get_unaligned_be16(ptr);
+ BPF_R0 = get_unaligned_be16(ptr);
CONT;
}
+
return 0;
- BPF_LD_BPF_ABS_BPF_B: /* R0 = *(u8 *) (ctx + K) */
- off = K;
+ LD_ABS_B: /* BPF_R0 = *(u8 *) (skb->data + imm32) */
+ off = IMM;
load_byte:
- ptr = load_pointer((struct sk_buff *) ctx, off, 1, &tmp);
+ ptr = load_pointer((struct sk_buff *) (unsigned long) CTX, off, 1, &tmp);
if (likely(ptr != NULL)) {
- R0 = *(u8 *)ptr;
+ BPF_R0 = *(u8 *)ptr;
CONT;
}
+
return 0;
- BPF_LD_BPF_IND_BPF_W: /* R0 = ntohl(*(u32 *) (skb->data + X + K)) */
- off = K + X;
+ LD_IND_W: /* BPF_R0 = ntohl(*(u32 *) (skb->data + src_reg + imm32)) */
+ off = IMM + SRC;
goto load_word;
- BPF_LD_BPF_IND_BPF_H: /* R0 = ntohs(*(u16 *) (skb->data + X + K)) */
- off = K + X;
+ LD_IND_H: /* BPF_R0 = ntohs(*(u16 *) (skb->data + src_reg + imm32)) */
+ off = IMM + SRC;
goto load_half;
- BPF_LD_BPF_IND_BPF_B: /* R0 = *(u8 *) (skb->data + X + K) */
- off = K + X;
+ LD_IND_B: /* BPF_R0 = *(u8 *) (skb->data + src_reg + imm32) */
+ off = IMM + SRC;
goto load_byte;
default_label:
/* If we ever reach this, we have a bug somewhere. */
WARN_RATELIMIT(1, "unknown opcode %02x\n", insn->code);
return 0;
-#undef CONT_JMP
-#undef CONT
-
-#undef R0
-#undef X
-#undef A
-#undef K
}
-u32 sk_run_filter_int_seccomp(const struct seccomp_data *ctx,
- const struct sock_filter_int *insni)
- __attribute__ ((alias ("__sk_run_filter")));
-
-u32 sk_run_filter_int_skb(const struct sk_buff *ctx,
- const struct sock_filter_int *insni)
- __attribute__ ((alias ("__sk_run_filter")));
-EXPORT_SYMBOL_GPL(sk_run_filter_int_skb);
-
/* Helper to find the offset of pkt_type in sk_buff structure. We want
* to make sure its still a 3bit field starting at a byte boundary;
* taken from arch/x86/net/bpf_jit_comp.c.
*/
+#ifdef __BIG_ENDIAN_BITFIELD
+#define PKT_TYPE_MAX (7 << 5)
+#else
#define PKT_TYPE_MAX 7
+#endif
static unsigned int pkt_type_offset(void)
{
struct sk_buff skb_probe = { .pkt_type = ~0, };
@@ -594,16 +604,14 @@ static unsigned int pkt_type_offset(void)
return -1;
}
-static u64 __skb_get_pay_offset(u64 ctx, u64 A, u64 X, u64 r4, u64 r5)
+static u64 __skb_get_pay_offset(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
{
- struct sk_buff *skb = (struct sk_buff *)(long) ctx;
-
- return __skb_get_poff(skb);
+ return __skb_get_poff((struct sk_buff *)(unsigned long) ctx);
}
-static u64 __skb_get_nlattr(u64 ctx, u64 A, u64 X, u64 r4, u64 r5)
+static u64 __skb_get_nlattr(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
{
- struct sk_buff *skb = (struct sk_buff *)(long) ctx;
+ struct sk_buff *skb = (struct sk_buff *)(unsigned long) ctx;
struct nlattr *nla;
if (skb_is_nonlinear(skb))
@@ -612,19 +620,19 @@ static u64 __skb_get_nlattr(u64 ctx, u64 A, u64 X, u64 r4, u64 r5)
if (skb->len < sizeof(struct nlattr))
return 0;
- if (A > skb->len - sizeof(struct nlattr))
+ if (a > skb->len - sizeof(struct nlattr))
return 0;
- nla = nla_find((struct nlattr *) &skb->data[A], skb->len - A, X);
+ nla = nla_find((struct nlattr *) &skb->data[a], skb->len - a, x);
if (nla)
return (void *) nla - (void *) skb->data;
return 0;
}
-static u64 __skb_get_nlattr_nest(u64 ctx, u64 A, u64 X, u64 r4, u64 r5)
+static u64 __skb_get_nlattr_nest(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
{
- struct sk_buff *skb = (struct sk_buff *)(long) ctx;
+ struct sk_buff *skb = (struct sk_buff *)(unsigned long) ctx;
struct nlattr *nla;
if (skb_is_nonlinear(skb))
@@ -633,25 +641,31 @@ static u64 __skb_get_nlattr_nest(u64 ctx, u64 A, u64 X, u64 r4, u64 r5)
if (skb->len < sizeof(struct nlattr))
return 0;
- if (A > skb->len - sizeof(struct nlattr))
+ if (a > skb->len - sizeof(struct nlattr))
return 0;
- nla = (struct nlattr *) &skb->data[A];
- if (nla->nla_len > skb->len - A)
+ nla = (struct nlattr *) &skb->data[a];
+ if (nla->nla_len > skb->len - a)
return 0;
- nla = nla_find_nested(nla, X);
+ nla = nla_find_nested(nla, x);
if (nla)
return (void *) nla - (void *) skb->data;
return 0;
}
-static u64 __get_raw_cpu_id(u64 ctx, u64 A, u64 X, u64 r4, u64 r5)
+static u64 __get_raw_cpu_id(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
{
return raw_smp_processor_id();
}
+/* note that this only generates 32-bit random numbers */
+static u64 __get_random_u32(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
+{
+ return prandom_u32();
+}
+
static bool convert_bpf_extensions(struct sock_filter *fp,
struct sock_filter_int **insnp)
{
@@ -661,119 +675,83 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
case SKF_AD_OFF + SKF_AD_PROTOCOL:
BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2);
- insn->code = BPF_LDX | BPF_MEM | BPF_H;
- insn->a_reg = A_REG;
- insn->x_reg = CTX_REG;
- insn->off = offsetof(struct sk_buff, protocol);
- insn++;
-
+ /* A = *(u16 *) (CTX + offsetof(protocol)) */
+ *insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
+ offsetof(struct sk_buff, protocol));
/* A = ntohs(A) [emitting a nop or swap16] */
- insn->code = BPF_ALU | BPF_END | BPF_FROM_BE;
- insn->a_reg = A_REG;
- insn->imm = 16;
+ *insn = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, 16);
break;
case SKF_AD_OFF + SKF_AD_PKTTYPE:
- insn->code = BPF_LDX | BPF_MEM | BPF_B;
- insn->a_reg = A_REG;
- insn->x_reg = CTX_REG;
- insn->off = pkt_type_offset();
+ *insn = BPF_LDX_MEM(BPF_B, BPF_REG_A, BPF_REG_CTX,
+ pkt_type_offset());
if (insn->off < 0)
return false;
insn++;
-
- insn->code = BPF_ALU | BPF_AND | BPF_K;
- insn->a_reg = A_REG;
- insn->imm = PKT_TYPE_MAX;
+ *insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, PKT_TYPE_MAX);
+#ifdef __BIG_ENDIAN_BITFIELD
+ insn++;
+ *insn = BPF_ALU32_IMM(BPF_RSH, BPF_REG_A, 5);
+#endif
break;
case SKF_AD_OFF + SKF_AD_IFINDEX:
case SKF_AD_OFF + SKF_AD_HATYPE:
- if (FIELD_SIZEOF(struct sk_buff, dev) == 8)
- insn->code = BPF_LDX | BPF_MEM | BPF_DW;
- else
- insn->code = BPF_LDX | BPF_MEM | BPF_W;
- insn->a_reg = TMP_REG;
- insn->x_reg = CTX_REG;
- insn->off = offsetof(struct sk_buff, dev);
- insn++;
-
- insn->code = BPF_JMP | BPF_JNE | BPF_K;
- insn->a_reg = TMP_REG;
- insn->imm = 0;
- insn->off = 1;
- insn++;
-
- insn->code = BPF_JMP | BPF_EXIT;
- insn++;
-
BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, type) != 2);
-
- insn->a_reg = A_REG;
- insn->x_reg = TMP_REG;
-
- if (fp->k == SKF_AD_OFF + SKF_AD_IFINDEX) {
- insn->code = BPF_LDX | BPF_MEM | BPF_W;
- insn->off = offsetof(struct net_device, ifindex);
- } else {
- insn->code = BPF_LDX | BPF_MEM | BPF_H;
- insn->off = offsetof(struct net_device, type);
- }
+ BUILD_BUG_ON(bytes_to_bpf_size(FIELD_SIZEOF(struct sk_buff, dev)) < 0);
+
+ *insn++ = BPF_LDX_MEM(bytes_to_bpf_size(FIELD_SIZEOF(struct sk_buff, dev)),
+ BPF_REG_TMP, BPF_REG_CTX,
+ offsetof(struct sk_buff, dev));
+ /* if (tmp != 0) goto pc + 1 */
+ *insn++ = BPF_JMP_IMM(BPF_JNE, BPF_REG_TMP, 0, 1);
+ *insn++ = BPF_EXIT_INSN();
+ if (fp->k == SKF_AD_OFF + SKF_AD_IFINDEX)
+ *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_TMP,
+ offsetof(struct net_device, ifindex));
+ else
+ *insn = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_TMP,
+ offsetof(struct net_device, type));
break;
case SKF_AD_OFF + SKF_AD_MARK:
BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
- insn->code = BPF_LDX | BPF_MEM | BPF_W;
- insn->a_reg = A_REG;
- insn->x_reg = CTX_REG;
- insn->off = offsetof(struct sk_buff, mark);
+ *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX,
+ offsetof(struct sk_buff, mark));
break;
case SKF_AD_OFF + SKF_AD_RXHASH:
BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4);
- insn->code = BPF_LDX | BPF_MEM | BPF_W;
- insn->a_reg = A_REG;
- insn->x_reg = CTX_REG;
- insn->off = offsetof(struct sk_buff, hash);
+ *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX,
+ offsetof(struct sk_buff, hash));
break;
case SKF_AD_OFF + SKF_AD_QUEUE:
BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2);
- insn->code = BPF_LDX | BPF_MEM | BPF_H;
- insn->a_reg = A_REG;
- insn->x_reg = CTX_REG;
- insn->off = offsetof(struct sk_buff, queue_mapping);
+ *insn = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
+ offsetof(struct sk_buff, queue_mapping));
break;
case SKF_AD_OFF + SKF_AD_VLAN_TAG:
case SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT:
BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2);
-
- insn->code = BPF_LDX | BPF_MEM | BPF_H;
- insn->a_reg = A_REG;
- insn->x_reg = CTX_REG;
- insn->off = offsetof(struct sk_buff, vlan_tci);
- insn++;
-
BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000);
+ /* A = *(u16 *) (CTX + offsetof(vlan_tci)) */
+ *insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
+ offsetof(struct sk_buff, vlan_tci));
if (fp->k == SKF_AD_OFF + SKF_AD_VLAN_TAG) {
- insn->code = BPF_ALU | BPF_AND | BPF_K;
- insn->a_reg = A_REG;
- insn->imm = ~VLAN_TAG_PRESENT;
+ *insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A,
+ ~VLAN_TAG_PRESENT);
} else {
- insn->code = BPF_ALU | BPF_RSH | BPF_K;
- insn->a_reg = A_REG;
- insn->imm = 12;
- insn++;
-
- insn->code = BPF_ALU | BPF_AND | BPF_K;
- insn->a_reg = A_REG;
- insn->imm = 1;
+ /* A >>= 12 */
+ *insn++ = BPF_ALU32_IMM(BPF_RSH, BPF_REG_A, 12);
+ /* A &= 1 */
+ *insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, 1);
}
break;
@@ -781,46 +759,36 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
case SKF_AD_OFF + SKF_AD_NLATTR:
case SKF_AD_OFF + SKF_AD_NLATTR_NEST:
case SKF_AD_OFF + SKF_AD_CPU:
- /* arg1 = ctx */
- insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
- insn->a_reg = ARG1_REG;
- insn->x_reg = CTX_REG;
- insn++;
-
+ case SKF_AD_OFF + SKF_AD_RANDOM:
+ /* arg1 = CTX */
+ *insn++ = BPF_MOV64_REG(BPF_REG_ARG1, BPF_REG_CTX);
/* arg2 = A */
- insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
- insn->a_reg = ARG2_REG;
- insn->x_reg = A_REG;
- insn++;
-
+ *insn++ = BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_A);
/* arg3 = X */
- insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
- insn->a_reg = ARG3_REG;
- insn->x_reg = X_REG;
- insn++;
-
- /* Emit call(ctx, arg2=A, arg3=X) */
- insn->code = BPF_JMP | BPF_CALL;
+ *insn++ = BPF_MOV64_REG(BPF_REG_ARG3, BPF_REG_X);
+ /* Emit call(arg1=CTX, arg2=A, arg3=X) */
switch (fp->k) {
case SKF_AD_OFF + SKF_AD_PAY_OFFSET:
- insn->imm = __skb_get_pay_offset - __bpf_call_base;
+ *insn = BPF_EMIT_CALL(__skb_get_pay_offset);
break;
case SKF_AD_OFF + SKF_AD_NLATTR:
- insn->imm = __skb_get_nlattr - __bpf_call_base;
+ *insn = BPF_EMIT_CALL(__skb_get_nlattr);
break;
case SKF_AD_OFF + SKF_AD_NLATTR_NEST:
- insn->imm = __skb_get_nlattr_nest - __bpf_call_base;
+ *insn = BPF_EMIT_CALL(__skb_get_nlattr_nest);
break;
case SKF_AD_OFF + SKF_AD_CPU:
- insn->imm = __get_raw_cpu_id - __bpf_call_base;
+ *insn = BPF_EMIT_CALL(__get_raw_cpu_id);
+ break;
+ case SKF_AD_OFF + SKF_AD_RANDOM:
+ *insn = BPF_EMIT_CALL(__get_random_u32);
break;
}
break;
case SKF_AD_OFF + SKF_AD_ALU_XOR_X:
- insn->code = BPF_ALU | BPF_XOR | BPF_X;
- insn->a_reg = A_REG;
- insn->x_reg = X_REG;
+ /* A ^= X */
+ *insn = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_X);
break;
default:
@@ -870,7 +838,7 @@ int sk_convert_filter(struct sock_filter *prog, int len,
u8 bpf_src;
BUILD_BUG_ON(BPF_MEMWORDS * sizeof(u32) > MAX_BPF_STACK);
- BUILD_BUG_ON(FP_REG + 1 != MAX_BPF_REG);
+ BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG);
if (len <= 0 || len >= BPF_MAXINSNS)
return -EINVAL;
@@ -885,11 +853,8 @@ do_pass:
new_insn = new_prog;
fp = prog;
- if (new_insn) {
- new_insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
- new_insn->a_reg = CTX_REG;
- new_insn->x_reg = ARG1_REG;
- }
+ if (new_insn)
+ *new_insn = BPF_MOV64_REG(BPF_REG_CTX, BPF_REG_ARG1);
new_insn++;
for (i = 0; i < len; fp++, i++) {
@@ -937,17 +902,16 @@ do_pass:
convert_bpf_extensions(fp, &insn))
break;
- insn->code = fp->code;
- insn->a_reg = A_REG;
- insn->x_reg = X_REG;
- insn->imm = fp->k;
+ *insn = BPF_RAW_INSN(fp->code, BPF_REG_A, BPF_REG_X, 0, fp->k);
break;
- /* Jump opcodes map as-is, but offsets need adjustment. */
- case BPF_JMP | BPF_JA:
- target = i + fp->k + 1;
- insn->code = fp->code;
-#define EMIT_JMP \
+ /* Jump transformation cannot use BPF block macros
+ * everywhere as offset calculation and target updates
+ * require a bit more work than the rest, i.e. jump
+ * opcodes map as-is, but offsets need adjustment.
+ */
+
+#define BPF_EMIT_JMP \
do { \
if (target >= len || target < 0) \
goto err; \
@@ -956,7 +920,10 @@ do_pass:
insn->off -= insn - tmp_insns; \
} while (0)
- EMIT_JMP;
+ case BPF_JMP | BPF_JA:
+ target = i + fp->k + 1;
+ insn->code = fp->code;
+ BPF_EMIT_JMP;
break;
case BPF_JMP | BPF_JEQ | BPF_K:
@@ -972,17 +939,14 @@ do_pass:
* immediate into tmp register and use it
* in compare insn.
*/
- insn->code = BPF_ALU | BPF_MOV | BPF_K;
- insn->a_reg = TMP_REG;
- insn->imm = fp->k;
- insn++;
+ *insn++ = BPF_MOV32_IMM(BPF_REG_TMP, fp->k);
- insn->a_reg = A_REG;
- insn->x_reg = TMP_REG;
+ insn->dst_reg = BPF_REG_A;
+ insn->src_reg = BPF_REG_TMP;
bpf_src = BPF_X;
} else {
- insn->a_reg = A_REG;
- insn->x_reg = X_REG;
+ insn->dst_reg = BPF_REG_A;
+ insn->src_reg = BPF_REG_X;
insn->imm = fp->k;
bpf_src = BPF_SRC(fp->code);
}
@@ -991,7 +955,7 @@ do_pass:
if (fp->jf == 0) {
insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src;
target = i + fp->jt + 1;
- EMIT_JMP;
+ BPF_EMIT_JMP;
break;
}
@@ -999,127 +963,94 @@ do_pass:
if (fp->jt == 0 && BPF_OP(fp->code) == BPF_JEQ) {
insn->code = BPF_JMP | BPF_JNE | bpf_src;
target = i + fp->jf + 1;
- EMIT_JMP;
+ BPF_EMIT_JMP;
break;
}
/* Other jumps are mapped into two insns: Jxx and JA. */
target = i + fp->jt + 1;
insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src;
- EMIT_JMP;
+ BPF_EMIT_JMP;
insn++;
insn->code = BPF_JMP | BPF_JA;
target = i + fp->jf + 1;
- EMIT_JMP;
+ BPF_EMIT_JMP;
break;
/* ldxb 4 * ([14] & 0xf) is remaped into 6 insns. */
case BPF_LDX | BPF_MSH | BPF_B:
- insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
- insn->a_reg = TMP_REG;
- insn->x_reg = A_REG;
- insn++;
-
- insn->code = BPF_LD | BPF_ABS | BPF_B;
- insn->a_reg = A_REG;
- insn->imm = fp->k;
- insn++;
-
- insn->code = BPF_ALU | BPF_AND | BPF_K;
- insn->a_reg = A_REG;
- insn->imm = 0xf;
- insn++;
-
- insn->code = BPF_ALU | BPF_LSH | BPF_K;
- insn->a_reg = A_REG;
- insn->imm = 2;
- insn++;
-
- insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
- insn->a_reg = X_REG;
- insn->x_reg = A_REG;
- insn++;
-
- insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
- insn->a_reg = A_REG;
- insn->x_reg = TMP_REG;
+ /* tmp = A */
+ *insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_A);
+ /* A = BPF_R0 = *(u8 *) (skb->data + K) */
+ *insn++ = BPF_LD_ABS(BPF_B, fp->k);
+ /* A &= 0xf */
+ *insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, 0xf);
+ /* A <<= 2 */
+ *insn++ = BPF_ALU32_IMM(BPF_LSH, BPF_REG_A, 2);
+ /* X = A */
+ *insn++ = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A);
+ /* A = tmp */
+ *insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_TMP);
break;
/* RET_K, RET_A are remaped into 2 insns. */
case BPF_RET | BPF_A:
case BPF_RET | BPF_K:
- insn->code = BPF_ALU | BPF_MOV |
- (BPF_RVAL(fp->code) == BPF_K ?
- BPF_K : BPF_X);
- insn->a_reg = 0;
- insn->x_reg = A_REG;
- insn->imm = fp->k;
- insn++;
-
- insn->code = BPF_JMP | BPF_EXIT;
+ *insn++ = BPF_MOV32_RAW(BPF_RVAL(fp->code) == BPF_K ?
+ BPF_K : BPF_X, BPF_REG_0,
+ BPF_REG_A, fp->k);
+ *insn = BPF_EXIT_INSN();
break;
/* Store to stack. */
case BPF_ST:
case BPF_STX:
- insn->code = BPF_STX | BPF_MEM | BPF_W;
- insn->a_reg = FP_REG;
- insn->x_reg = fp->code == BPF_ST ? A_REG : X_REG;
- insn->off = -(BPF_MEMWORDS - fp->k) * 4;
+ *insn = BPF_STX_MEM(BPF_W, BPF_REG_FP, BPF_CLASS(fp->code) ==
+ BPF_ST ? BPF_REG_A : BPF_REG_X,
+ -(BPF_MEMWORDS - fp->k) * 4);
break;
/* Load from stack. */
case BPF_LD | BPF_MEM:
case BPF_LDX | BPF_MEM:
- insn->code = BPF_LDX | BPF_MEM | BPF_W;
- insn->a_reg = BPF_CLASS(fp->code) == BPF_LD ?
- A_REG : X_REG;
- insn->x_reg = FP_REG;
- insn->off = -(BPF_MEMWORDS - fp->k) * 4;
+ *insn = BPF_LDX_MEM(BPF_W, BPF_CLASS(fp->code) == BPF_LD ?
+ BPF_REG_A : BPF_REG_X, BPF_REG_FP,
+ -(BPF_MEMWORDS - fp->k) * 4);
break;
/* A = K or X = K */
case BPF_LD | BPF_IMM:
case BPF_LDX | BPF_IMM:
- insn->code = BPF_ALU | BPF_MOV | BPF_K;
- insn->a_reg = BPF_CLASS(fp->code) == BPF_LD ?
- A_REG : X_REG;
- insn->imm = fp->k;
+ *insn = BPF_MOV32_IMM(BPF_CLASS(fp->code) == BPF_LD ?
+ BPF_REG_A : BPF_REG_X, fp->k);
break;
/* X = A */
case BPF_MISC | BPF_TAX:
- insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
- insn->a_reg = X_REG;
- insn->x_reg = A_REG;
+ *insn = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A);
break;
/* A = X */
case BPF_MISC | BPF_TXA:
- insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
- insn->a_reg = A_REG;
- insn->x_reg = X_REG;
+ *insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_X);
break;
/* A = skb->len or X = skb->len */
case BPF_LD | BPF_W | BPF_LEN:
case BPF_LDX | BPF_W | BPF_LEN:
- insn->code = BPF_LDX | BPF_MEM | BPF_W;
- insn->a_reg = BPF_CLASS(fp->code) == BPF_LD ?
- A_REG : X_REG;
- insn->x_reg = CTX_REG;
- insn->off = offsetof(struct sk_buff, len);
+ *insn = BPF_LDX_MEM(BPF_W, BPF_CLASS(fp->code) == BPF_LD ?
+ BPF_REG_A : BPF_REG_X, BPF_REG_CTX,
+ offsetof(struct sk_buff, len));
break;
- /* access seccomp_data fields */
+ /* Access seccomp_data fields. */
case BPF_LDX | BPF_ABS | BPF_W:
- insn->code = BPF_LDX | BPF_MEM | BPF_W;
- insn->a_reg = A_REG;
- insn->x_reg = CTX_REG;
- insn->off = fp->k;
+ /* A = *(u32 *) (ctx + K) */
+ *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX, fp->k);
break;
+ /* Unkown instruction. */
default:
goto err;
}
@@ -1128,7 +1059,6 @@ do_pass:
if (new_prog)
memcpy(new_insn, tmp_insns,
sizeof(*insn) * (insn - tmp_insns));
-
new_insn += insn - tmp_insns;
}
@@ -1143,7 +1073,6 @@ do_pass:
new_flen = new_insn - new_prog;
if (pass > 2)
goto err;
-
goto do_pass;
}
@@ -1167,44 +1096,46 @@ err:
*/
static int check_load_and_stores(struct sock_filter *filter, int flen)
{
- u16 *masks, memvalid = 0; /* one bit per cell, 16 cells */
+ u16 *masks, memvalid = 0; /* One bit per cell, 16 cells */
int pc, ret = 0;
BUILD_BUG_ON(BPF_MEMWORDS > 16);
+
masks = kmalloc(flen * sizeof(*masks), GFP_KERNEL);
if (!masks)
return -ENOMEM;
+
memset(masks, 0xff, flen * sizeof(*masks));
for (pc = 0; pc < flen; pc++) {
memvalid &= masks[pc];
switch (filter[pc].code) {
- case BPF_S_ST:
- case BPF_S_STX:
+ case BPF_ST:
+ case BPF_STX:
memvalid |= (1 << filter[pc].k);
break;
- case BPF_S_LD_MEM:
- case BPF_S_LDX_MEM:
+ case BPF_LD | BPF_MEM:
+ case BPF_LDX | BPF_MEM:
if (!(memvalid & (1 << filter[pc].k))) {
ret = -EINVAL;
goto error;
}
break;
- case BPF_S_JMP_JA:
- /* a jump must set masks on target */
+ case BPF_JMP | BPF_JA:
+ /* A jump must set masks on target */
masks[pc + 1 + filter[pc].k] &= memvalid;
memvalid = ~0;
break;
- case BPF_S_JMP_JEQ_K:
- case BPF_S_JMP_JEQ_X:
- case BPF_S_JMP_JGE_K:
- case BPF_S_JMP_JGE_X:
- case BPF_S_JMP_JGT_K:
- case BPF_S_JMP_JGT_X:
- case BPF_S_JMP_JSET_X:
- case BPF_S_JMP_JSET_K:
- /* a jump must set masks on targets */
+ case BPF_JMP | BPF_JEQ | BPF_K:
+ case BPF_JMP | BPF_JEQ | BPF_X:
+ case BPF_JMP | BPF_JGE | BPF_K:
+ case BPF_JMP | BPF_JGE | BPF_X:
+ case BPF_JMP | BPF_JGT | BPF_K:
+ case BPF_JMP | BPF_JGT | BPF_X:
+ case BPF_JMP | BPF_JSET | BPF_K:
+ case BPF_JMP | BPF_JSET | BPF_X:
+ /* A jump must set masks on targets */
masks[pc + 1 + filter[pc].jt] &= memvalid;
masks[pc + 1 + filter[pc].jf] &= memvalid;
memvalid = ~0;
@@ -1216,6 +1147,72 @@ error:
return ret;
}
+static bool chk_code_allowed(u16 code_to_probe)
+{
+ static const bool codes[] = {
+ /* 32 bit ALU operations */
+ [BPF_ALU | BPF_ADD | BPF_K] = true,
+ [BPF_ALU | BPF_ADD | BPF_X] = true,
+ [BPF_ALU | BPF_SUB | BPF_K] = true,
+ [BPF_ALU | BPF_SUB | BPF_X] = true,
+ [BPF_ALU | BPF_MUL | BPF_K] = true,
+ [BPF_ALU | BPF_MUL | BPF_X] = true,
+ [BPF_ALU | BPF_DIV | BPF_K] = true,
+ [BPF_ALU | BPF_DIV | BPF_X] = true,
+ [BPF_ALU | BPF_MOD | BPF_K] = true,
+ [BPF_ALU | BPF_MOD | BPF_X] = true,
+ [BPF_ALU | BPF_AND | BPF_K] = true,
+ [BPF_ALU | BPF_AND | BPF_X] = true,
+ [BPF_ALU | BPF_OR | BPF_K] = true,
+ [BPF_ALU | BPF_OR | BPF_X] = true,
+ [BPF_ALU | BPF_XOR | BPF_K] = true,
+ [BPF_ALU | BPF_XOR | BPF_X] = true,
+ [BPF_ALU | BPF_LSH | BPF_K] = true,
+ [BPF_ALU | BPF_LSH | BPF_X] = true,
+ [BPF_ALU | BPF_RSH | BPF_K] = true,
+ [BPF_ALU | BPF_RSH | BPF_X] = true,
+ [BPF_ALU | BPF_NEG] = true,
+ /* Load instructions */
+ [BPF_LD | BPF_W | BPF_ABS] = true,
+ [BPF_LD | BPF_H | BPF_ABS] = true,
+ [BPF_LD | BPF_B | BPF_ABS] = true,
+ [BPF_LD | BPF_W | BPF_LEN] = true,
+ [BPF_LD | BPF_W | BPF_IND] = true,
+ [BPF_LD | BPF_H | BPF_IND] = true,
+ [BPF_LD | BPF_B | BPF_IND] = true,
+ [BPF_LD | BPF_IMM] = true,
+ [BPF_LD | BPF_MEM] = true,
+ [BPF_LDX | BPF_W | BPF_LEN] = true,
+ [BPF_LDX | BPF_B | BPF_MSH] = true,
+ [BPF_LDX | BPF_IMM] = true,
+ [BPF_LDX | BPF_MEM] = true,
+ /* Store instructions */
+ [BPF_ST] = true,
+ [BPF_STX] = true,
+ /* Misc instructions */
+ [BPF_MISC | BPF_TAX] = true,
+ [BPF_MISC | BPF_TXA] = true,
+ /* Return instructions */
+ [BPF_RET | BPF_K] = true,
+ [BPF_RET | BPF_A] = true,
+ /* Jump instructions */
+ [BPF_JMP | BPF_JA] = true,
+ [BPF_JMP | BPF_JEQ | BPF_K] = true,
+ [BPF_JMP | BPF_JEQ | BPF_X] = true,
+ [BPF_JMP | BPF_JGE | BPF_K] = true,
+ [BPF_JMP | BPF_JGE | BPF_X] = true,
+ [BPF_JMP | BPF_JGT | BPF_K] = true,
+ [BPF_JMP | BPF_JGT | BPF_X] = true,
+ [BPF_JMP | BPF_JSET | BPF_K] = true,
+ [BPF_JMP | BPF_JSET | BPF_X] = true,
+ };
+
+ if (code_to_probe >= ARRAY_SIZE(codes))
+ return false;
+
+ return codes[code_to_probe];
+}
+
/**
* sk_chk_filter - verify socket filter code
* @filter: filter to verify
@@ -1232,153 +1229,76 @@ error:
*/
int sk_chk_filter(struct sock_filter *filter, unsigned int flen)
{
- /*
- * Valid instructions are initialized to non-0.
- * Invalid instructions are initialized to 0.
- */
- static const u8 codes[] = {
- [BPF_ALU|BPF_ADD|BPF_K] = BPF_S_ALU_ADD_K,
- [BPF_ALU|BPF_ADD|BPF_X] = BPF_S_ALU_ADD_X,
- [BPF_ALU|BPF_SUB|BPF_K] = BPF_S_ALU_SUB_K,
- [BPF_ALU|BPF_SUB|BPF_X] = BPF_S_ALU_SUB_X,
- [BPF_ALU|BPF_MUL|BPF_K] = BPF_S_ALU_MUL_K,
- [BPF_ALU|BPF_MUL|BPF_X] = BPF_S_ALU_MUL_X,
- [BPF_ALU|BPF_DIV|BPF_X] = BPF_S_ALU_DIV_X,
- [BPF_ALU|BPF_MOD|BPF_K] = BPF_S_ALU_MOD_K,
- [BPF_ALU|BPF_MOD|BPF_X] = BPF_S_ALU_MOD_X,
- [BPF_ALU|BPF_AND|BPF_K] = BPF_S_ALU_AND_K,
- [BPF_ALU|BPF_AND|BPF_X] = BPF_S_ALU_AND_X,
- [BPF_ALU|BPF_OR|BPF_K] = BPF_S_ALU_OR_K,
- [BPF_ALU|BPF_OR|BPF_X] = BPF_S_ALU_OR_X,
- [BPF_ALU|BPF_XOR|BPF_K] = BPF_S_ALU_XOR_K,
- [BPF_ALU|BPF_XOR|BPF_X] = BPF_S_ALU_XOR_X,
- [BPF_ALU|BPF_LSH|BPF_K] = BPF_S_ALU_LSH_K,
- [BPF_ALU|BPF_LSH|BPF_X] = BPF_S_ALU_LSH_X,
- [BPF_ALU|BPF_RSH|BPF_K] = BPF_S_ALU_RSH_K,
- [BPF_ALU|BPF_RSH|BPF_X] = BPF_S_ALU_RSH_X,
- [BPF_ALU|BPF_NEG] = BPF_S_ALU_NEG,
- [BPF_LD|BPF_W|BPF_ABS] = BPF_S_LD_W_ABS,
- [BPF_LD|BPF_H|BPF_ABS] = BPF_S_LD_H_ABS,
- [BPF_LD|BPF_B|BPF_ABS] = BPF_S_LD_B_ABS,
- [BPF_LD|BPF_W|BPF_LEN] = BPF_S_LD_W_LEN,
- [BPF_LD|BPF_W|BPF_IND] = BPF_S_LD_W_IND,
- [BPF_LD|BPF_H|BPF_IND] = BPF_S_LD_H_IND,
- [BPF_LD|BPF_B|BPF_IND] = BPF_S_LD_B_IND,
- [BPF_LD|BPF_IMM] = BPF_S_LD_IMM,
- [BPF_LDX|BPF_W|BPF_LEN] = BPF_S_LDX_W_LEN,
- [BPF_LDX|BPF_B|BPF_MSH] = BPF_S_LDX_B_MSH,
- [BPF_LDX|BPF_IMM] = BPF_S_LDX_IMM,
- [BPF_MISC|BPF_TAX] = BPF_S_MISC_TAX,
- [BPF_MISC|BPF_TXA] = BPF_S_MISC_TXA,
- [BPF_RET|BPF_K] = BPF_S_RET_K,
- [BPF_RET|BPF_A] = BPF_S_RET_A,
- [BPF_ALU|BPF_DIV|BPF_K] = BPF_S_ALU_DIV_K,
- [BPF_LD|BPF_MEM] = BPF_S_LD_MEM,
- [BPF_LDX|BPF_MEM] = BPF_S_LDX_MEM,
- [BPF_ST] = BPF_S_ST,
- [BPF_STX] = BPF_S_STX,
- [BPF_JMP|BPF_JA] = BPF_S_JMP_JA,
- [BPF_JMP|BPF_JEQ|BPF_K] = BPF_S_JMP_JEQ_K,
- [BPF_JMP|BPF_JEQ|BPF_X] = BPF_S_JMP_JEQ_X,
- [BPF_JMP|BPF_JGE|BPF_K] = BPF_S_JMP_JGE_K,
- [BPF_JMP|BPF_JGE|BPF_X] = BPF_S_JMP_JGE_X,
- [BPF_JMP|BPF_JGT|BPF_K] = BPF_S_JMP_JGT_K,
- [BPF_JMP|BPF_JGT|BPF_X] = BPF_S_JMP_JGT_X,
- [BPF_JMP|BPF_JSET|BPF_K] = BPF_S_JMP_JSET_K,
- [BPF_JMP|BPF_JSET|BPF_X] = BPF_S_JMP_JSET_X,
- };
- int pc;
bool anc_found;
+ int pc;
if (flen == 0 || flen > BPF_MAXINSNS)
return -EINVAL;
- /* check the filter code now */
+ /* Check the filter code now */
for (pc = 0; pc < flen; pc++) {
struct sock_filter *ftest = &filter[pc];
- u16 code = ftest->code;
- if (code >= ARRAY_SIZE(codes))
- return -EINVAL;
- code = codes[code];
- if (!code)
+ /* May we actually operate on this code? */
+ if (!chk_code_allowed(ftest->code))
return -EINVAL;
+
/* Some instructions need special checks */
- switch (code) {
- case BPF_S_ALU_DIV_K:
- case BPF_S_ALU_MOD_K:
- /* check for division by zero */
+ switch (ftest->code) {
+ case BPF_ALU | BPF_DIV | BPF_K:
+ case BPF_ALU | BPF_MOD | BPF_K:
+ /* Check for division by zero */
if (ftest->k == 0)
return -EINVAL;
break;
- case BPF_S_LD_MEM:
- case BPF_S_LDX_MEM:
- case BPF_S_ST:
- case BPF_S_STX:
- /* check for invalid memory addresses */
+ case BPF_LD | BPF_MEM:
+ case BPF_LDX | BPF_MEM:
+ case BPF_ST:
+ case BPF_STX:
+ /* Check for invalid memory addresses */
if (ftest->k >= BPF_MEMWORDS)
return -EINVAL;
break;
- case BPF_S_JMP_JA:
- /*
- * Note, the large ftest->k might cause loops.
+ case BPF_JMP | BPF_JA:
+ /* Note, the large ftest->k might cause loops.
* Compare this with conditional jumps below,
* where offsets are limited. --ANK (981016)
*/
- if (ftest->k >= (unsigned int)(flen-pc-1))
+ if (ftest->k >= (unsigned int)(flen - pc - 1))
return -EINVAL;
break;
- case BPF_S_JMP_JEQ_K:
- case BPF_S_JMP_JEQ_X:
- case BPF_S_JMP_JGE_K:
- case BPF_S_JMP_JGE_X:
- case BPF_S_JMP_JGT_K:
- case BPF_S_JMP_JGT_X:
- case BPF_S_JMP_JSET_X:
- case BPF_S_JMP_JSET_K:
- /* for conditionals both must be safe */
+ case BPF_JMP | BPF_JEQ | BPF_K:
+ case BPF_JMP | BPF_JEQ | BPF_X:
+ case BPF_JMP | BPF_JGE | BPF_K:
+ case BPF_JMP | BPF_JGE | BPF_X:
+ case BPF_JMP | BPF_JGT | BPF_K:
+ case BPF_JMP | BPF_JGT | BPF_X:
+ case BPF_JMP | BPF_JSET | BPF_K:
+ case BPF_JMP | BPF_JSET | BPF_X:
+ /* Both conditionals must be safe */
if (pc + ftest->jt + 1 >= flen ||
pc + ftest->jf + 1 >= flen)
return -EINVAL;
break;
- case BPF_S_LD_W_ABS:
- case BPF_S_LD_H_ABS:
- case BPF_S_LD_B_ABS:
+ case BPF_LD | BPF_W | BPF_ABS:
+ case BPF_LD | BPF_H | BPF_ABS:
+ case BPF_LD | BPF_B | BPF_ABS:
anc_found = false;
-#define ANCILLARY(CODE) case SKF_AD_OFF + SKF_AD_##CODE: \
- code = BPF_S_ANC_##CODE; \
- anc_found = true; \
- break
- switch (ftest->k) {
- ANCILLARY(PROTOCOL);
- ANCILLARY(PKTTYPE);
- ANCILLARY(IFINDEX);
- ANCILLARY(NLATTR);
- ANCILLARY(NLATTR_NEST);
- ANCILLARY(MARK);
- ANCILLARY(QUEUE);
- ANCILLARY(HATYPE);
- ANCILLARY(RXHASH);
- ANCILLARY(CPU);
- ANCILLARY(ALU_XOR_X);
- ANCILLARY(VLAN_TAG);
- ANCILLARY(VLAN_TAG_PRESENT);
- ANCILLARY(PAY_OFFSET);
- }
-
- /* ancillary operation unknown or unsupported */
+ if (bpf_anc_helper(ftest) & BPF_ANC)
+ anc_found = true;
+ /* Ancillary operation unknown or unsupported */
if (anc_found == false && ftest->k >= SKF_AD_OFF)
return -EINVAL;
}
- ftest->code = code;
}
- /* last instruction must be a RET code */
+ /* Last instruction must be a RET code */
switch (filter[flen - 1].code) {
- case BPF_S_RET_K:
- case BPF_S_RET_A:
+ case BPF_RET | BPF_K:
+ case BPF_RET | BPF_A:
return check_load_and_stores(filter, flen);
}
+
return -EINVAL;
}
EXPORT_SYMBOL(sk_chk_filter);
@@ -1423,7 +1343,7 @@ static void sk_filter_release_rcu(struct rcu_head *rcu)
struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu);
sk_release_orig_filter(fp);
- bpf_jit_free(fp);
+ sk_filter_free(fp);
}
/**
@@ -1461,7 +1381,7 @@ static struct sk_filter *__sk_migrate_realloc(struct sk_filter *fp,
fp_new = sock_kmalloc(sk, len, GFP_KERNEL);
if (fp_new) {
- memcpy(fp_new, fp, sizeof(struct sk_filter));
+ *fp_new = *fp;
/* As we're kepping orig_prog in fp_new along,
* we need to make sure we're not evicting it
* from the old fp.
@@ -1478,7 +1398,7 @@ static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp,
{
struct sock_filter *old_prog;
struct sk_filter *old_fp;
- int i, err, new_len, old_len = fp->len;
+ int err, new_len, old_len = fp->len;
/* We are free to overwrite insns et al right here as it
* won't be used at this point in time anymore internally
@@ -1488,13 +1408,6 @@ static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp,
BUILD_BUG_ON(sizeof(struct sock_filter) !=
sizeof(struct sock_filter_int));
- /* For now, we need to unfiddle BPF_S_* identifiers in place.
- * This can sooner or later on be subject to removal, e.g. when
- * JITs have been converted.
- */
- for (i = 0; i < fp->len; i++)
- sk_decode_filter(&fp->insns[i], &fp->insns[i]);
-
/* Conversion cannot happen on overlapping memory areas,
* so we need to keep the user BPF around until the 2nd
* pass. At this time, the user BPF is stored in fp->insns.
@@ -1523,7 +1436,6 @@ static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp,
goto out_err_free;
}
- fp->bpf_func = sk_run_filter_int_skb;
fp->len = new_len;
/* 2nd pass: remap sock_filter insns into sock_filter_int insns. */
@@ -1536,6 +1448,8 @@ static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp,
*/
goto out_err_free;
+ sk_filter_select_runtime(fp);
+
kfree(old_prog);
return fp;
@@ -1550,6 +1464,33 @@ out_err:
return ERR_PTR(err);
}
+void __weak bpf_int_jit_compile(struct sk_filter *prog)
+{
+}
+
+/**
+ * sk_filter_select_runtime - select execution runtime for BPF program
+ * @fp: sk_filter populated with internal BPF program
+ *
+ * try to JIT internal BPF program, if JIT is not available select interpreter
+ * BPF program will be executed via SK_RUN_FILTER() macro
+ */
+void sk_filter_select_runtime(struct sk_filter *fp)
+{
+ fp->bpf_func = (void *) __sk_run_filter;
+
+ /* Probe if internal BPF can be JITed */
+ bpf_int_jit_compile(fp);
+}
+EXPORT_SYMBOL_GPL(sk_filter_select_runtime);
+
+/* free internal BPF program */
+void sk_filter_free(struct sk_filter *fp)
+{
+ bpf_jit_free(fp);
+}
+EXPORT_SYMBOL_GPL(sk_filter_free);
+
static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp,
struct sock *sk)
{
@@ -1592,7 +1533,7 @@ static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp,
* a negative errno code is returned. On success the return is zero.
*/
int sk_unattached_filter_create(struct sk_filter **pfp,
- struct sock_fprog *fprog)
+ struct sock_fprog_kern *fprog)
{
unsigned int fsize = sk_filter_proglen(fprog);
struct sk_filter *fp;
@@ -1713,83 +1654,6 @@ int sk_detach_filter(struct sock *sk)
}
EXPORT_SYMBOL_GPL(sk_detach_filter);
-void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to)
-{
- static const u16 decodes[] = {
- [BPF_S_ALU_ADD_K] = BPF_ALU|BPF_ADD|BPF_K,
- [BPF_S_ALU_ADD_X] = BPF_ALU|BPF_ADD|BPF_X,
- [BPF_S_ALU_SUB_K] = BPF_ALU|BPF_SUB|BPF_K,
- [BPF_S_ALU_SUB_X] = BPF_ALU|BPF_SUB|BPF_X,
- [BPF_S_ALU_MUL_K] = BPF_ALU|BPF_MUL|BPF_K,
- [BPF_S_ALU_MUL_X] = BPF_ALU|BPF_MUL|BPF_X,
- [BPF_S_ALU_DIV_X] = BPF_ALU|BPF_DIV|BPF_X,
- [BPF_S_ALU_MOD_K] = BPF_ALU|BPF_MOD|BPF_K,
- [BPF_S_ALU_MOD_X] = BPF_ALU|BPF_MOD|BPF_X,
- [BPF_S_ALU_AND_K] = BPF_ALU|BPF_AND|BPF_K,
- [BPF_S_ALU_AND_X] = BPF_ALU|BPF_AND|BPF_X,
- [BPF_S_ALU_OR_K] = BPF_ALU|BPF_OR|BPF_K,
- [BPF_S_ALU_OR_X] = BPF_ALU|BPF_OR|BPF_X,
- [BPF_S_ALU_XOR_K] = BPF_ALU|BPF_XOR|BPF_K,
- [BPF_S_ALU_XOR_X] = BPF_ALU|BPF_XOR|BPF_X,
- [BPF_S_ALU_LSH_K] = BPF_ALU|BPF_LSH|BPF_K,
- [BPF_S_ALU_LSH_X] = BPF_ALU|BPF_LSH|BPF_X,
- [BPF_S_ALU_RSH_K] = BPF_ALU|BPF_RSH|BPF_K,
- [BPF_S_ALU_RSH_X] = BPF_ALU|BPF_RSH|BPF_X,
- [BPF_S_ALU_NEG] = BPF_ALU|BPF_NEG,
- [BPF_S_LD_W_ABS] = BPF_LD|BPF_W|BPF_ABS,
- [BPF_S_LD_H_ABS] = BPF_LD|BPF_H|BPF_ABS,
- [BPF_S_LD_B_ABS] = BPF_LD|BPF_B|BPF_ABS,
- [BPF_S_ANC_PROTOCOL] = BPF_LD|BPF_B|BPF_ABS,
- [BPF_S_ANC_PKTTYPE] = BPF_LD|BPF_B|BPF_ABS,
- [BPF_S_ANC_IFINDEX] = BPF_LD|BPF_B|BPF_ABS,
- [BPF_S_ANC_NLATTR] = BPF_LD|BPF_B|BPF_ABS,
- [BPF_S_ANC_NLATTR_NEST] = BPF_LD|BPF_B|BPF_ABS,
- [BPF_S_ANC_MARK] = BPF_LD|BPF_B|BPF_ABS,
- [BPF_S_ANC_QUEUE] = BPF_LD|BPF_B|BPF_ABS,
- [BPF_S_ANC_HATYPE] = BPF_LD|BPF_B|BPF_ABS,
- [BPF_S_ANC_RXHASH] = BPF_LD|BPF_B|BPF_ABS,
- [BPF_S_ANC_CPU] = BPF_LD|BPF_B|BPF_ABS,
- [BPF_S_ANC_ALU_XOR_X] = BPF_LD|BPF_B|BPF_ABS,
- [BPF_S_ANC_VLAN_TAG] = BPF_LD|BPF_B|BPF_ABS,
- [BPF_S_ANC_VLAN_TAG_PRESENT] = BPF_LD|BPF_B|BPF_ABS,
- [BPF_S_ANC_PAY_OFFSET] = BPF_LD|BPF_B|BPF_ABS,
- [BPF_S_LD_W_LEN] = BPF_LD|BPF_W|BPF_LEN,
- [BPF_S_LD_W_IND] = BPF_LD|BPF_W|BPF_IND,
- [BPF_S_LD_H_IND] = BPF_LD|BPF_H|BPF_IND,
- [BPF_S_LD_B_IND] = BPF_LD|BPF_B|BPF_IND,
- [BPF_S_LD_IMM] = BPF_LD|BPF_IMM,
- [BPF_S_LDX_W_LEN] = BPF_LDX|BPF_W|BPF_LEN,
- [BPF_S_LDX_B_MSH] = BPF_LDX|BPF_B|BPF_MSH,
- [BPF_S_LDX_IMM] = BPF_LDX|BPF_IMM,
- [BPF_S_MISC_TAX] = BPF_MISC|BPF_TAX,
- [BPF_S_MISC_TXA] = BPF_MISC|BPF_TXA,
- [BPF_S_RET_K] = BPF_RET|BPF_K,
- [BPF_S_RET_A] = BPF_RET|BPF_A,
- [BPF_S_ALU_DIV_K] = BPF_ALU|BPF_DIV|BPF_K,
- [BPF_S_LD_MEM] = BPF_LD|BPF_MEM,
- [BPF_S_LDX_MEM] = BPF_LDX|BPF_MEM,
- [BPF_S_ST] = BPF_ST,
- [BPF_S_STX] = BPF_STX,
- [BPF_S_JMP_JA] = BPF_JMP|BPF_JA,
- [BPF_S_JMP_JEQ_K] = BPF_JMP|BPF_JEQ|BPF_K,
- [BPF_S_JMP_JEQ_X] = BPF_JMP|BPF_JEQ|BPF_X,
- [BPF_S_JMP_JGE_K] = BPF_JMP|BPF_JGE|BPF_K,
- [BPF_S_JMP_JGE_X] = BPF_JMP|BPF_JGE|BPF_X,
- [BPF_S_JMP_JGT_K] = BPF_JMP|BPF_JGT|BPF_K,
- [BPF_S_JMP_JGT_X] = BPF_JMP|BPF_JGT|BPF_X,
- [BPF_S_JMP_JSET_K] = BPF_JMP|BPF_JSET|BPF_K,
- [BPF_S_JMP_JSET_X] = BPF_JMP|BPF_JSET|BPF_X,
- };
- u16 code;
-
- code = filt->code;
-
- to->code = decodes[code];
- to->jt = filt->jt;
- to->jf = filt->jf;
- to->k = filt->k;
-}
-
int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf,
unsigned int len)
{
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 7c8ffd9..85b6269 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -273,7 +273,7 @@ static void cleanup_net(struct work_struct *work)
{
const struct pernet_operations *ops;
struct net *net, *tmp;
- LIST_HEAD(net_kill_list);
+ struct list_head net_kill_list;
LIST_HEAD(net_exit_list);
/* Atomically snapshot the list of namespaces to cleanup */
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 0304f98..fc17a9d 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -573,7 +573,7 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
is_zero_ether_addr(pkt_dev->src_mac) ?
pkt_dev->odev->dev_addr : pkt_dev->src_mac);
- seq_printf(seq, "dst_mac: ");
+ seq_puts(seq, "dst_mac: ");
seq_printf(seq, "%pM\n", pkt_dev->dst_mac);
seq_printf(seq,
@@ -588,7 +588,7 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
if (pkt_dev->nr_labels) {
unsigned int i;
- seq_printf(seq, " mpls: ");
+ seq_puts(seq, " mpls: ");
for (i = 0; i < pkt_dev->nr_labels; i++)
seq_printf(seq, "%08x%s", ntohl(pkt_dev->labels[i]),
i == pkt_dev->nr_labels-1 ? "\n" : ", ");
@@ -613,67 +613,67 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
if (pkt_dev->node >= 0)
seq_printf(seq, " node: %d\n", pkt_dev->node);
- seq_printf(seq, " Flags: ");
+ seq_puts(seq, " Flags: ");
if (pkt_dev->flags & F_IPV6)
- seq_printf(seq, "IPV6 ");
+ seq_puts(seq, "IPV6 ");
if (pkt_dev->flags & F_IPSRC_RND)
- seq_printf(seq, "IPSRC_RND ");
+ seq_puts(seq, "IPSRC_RND ");
if (pkt_dev->flags & F_IPDST_RND)
- seq_printf(seq, "IPDST_RND ");
+ seq_puts(seq, "IPDST_RND ");
if (pkt_dev->flags & F_TXSIZE_RND)
- seq_printf(seq, "TXSIZE_RND ");
+ seq_puts(seq, "TXSIZE_RND ");
if (pkt_dev->flags & F_UDPSRC_RND)
- seq_printf(seq, "UDPSRC_RND ");
+ seq_puts(seq, "UDPSRC_RND ");
if (pkt_dev->flags & F_UDPDST_RND)
- seq_printf(seq, "UDPDST_RND ");
+ seq_puts(seq, "UDPDST_RND ");
if (pkt_dev->flags & F_UDPCSUM)
- seq_printf(seq, "UDPCSUM ");
+ seq_puts(seq, "UDPCSUM ");
if (pkt_dev->flags & F_MPLS_RND)
- seq_printf(seq, "MPLS_RND ");
+ seq_puts(seq, "MPLS_RND ");
if (pkt_dev->flags & F_QUEUE_MAP_RND)
- seq_printf(seq, "QUEUE_MAP_RND ");
+ seq_puts(seq, "QUEUE_MAP_RND ");
if (pkt_dev->flags & F_QUEUE_MAP_CPU)
- seq_printf(seq, "QUEUE_MAP_CPU ");
+ seq_puts(seq, "QUEUE_MAP_CPU ");
if (pkt_dev->cflows) {
if (pkt_dev->flags & F_FLOW_SEQ)
- seq_printf(seq, "FLOW_SEQ "); /*in sequence flows*/
+ seq_puts(seq, "FLOW_SEQ "); /*in sequence flows*/
else
- seq_printf(seq, "FLOW_RND ");
+ seq_puts(seq, "FLOW_RND ");
}
#ifdef CONFIG_XFRM
if (pkt_dev->flags & F_IPSEC_ON) {
- seq_printf(seq, "IPSEC ");
+ seq_puts(seq, "IPSEC ");
if (pkt_dev->spi)
seq_printf(seq, "spi:%u", pkt_dev->spi);
}
#endif
if (pkt_dev->flags & F_MACSRC_RND)
- seq_printf(seq, "MACSRC_RND ");
+ seq_puts(seq, "MACSRC_RND ");
if (pkt_dev->flags & F_MACDST_RND)
- seq_printf(seq, "MACDST_RND ");
+ seq_puts(seq, "MACDST_RND ");
if (pkt_dev->flags & F_VID_RND)
- seq_printf(seq, "VID_RND ");
+ seq_puts(seq, "VID_RND ");
if (pkt_dev->flags & F_SVID_RND)
- seq_printf(seq, "SVID_RND ");
+ seq_puts(seq, "SVID_RND ");
if (pkt_dev->flags & F_NODE)
- seq_printf(seq, "NODE_ALLOC ");
+ seq_puts(seq, "NODE_ALLOC ");
seq_puts(seq, "\n");
@@ -716,7 +716,7 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
if (pkt_dev->result[0])
seq_printf(seq, "Result: %s\n", pkt_dev->result);
else
- seq_printf(seq, "Result: Idle\n");
+ seq_puts(seq, "Result: Idle\n");
return 0;
}
@@ -1735,14 +1735,14 @@ static int pktgen_thread_show(struct seq_file *seq, void *v)
BUG_ON(!t);
- seq_printf(seq, "Running: ");
+ seq_puts(seq, "Running: ");
if_lock(t);
list_for_each_entry(pkt_dev, &t->if_list, list)
if (pkt_dev->running)
seq_printf(seq, "%s ", pkt_dev->odevname);
- seq_printf(seq, "\nStopped: ");
+ seq_puts(seq, "\nStopped: ");
list_for_each_entry(pkt_dev, &t->if_list, list)
if (!pkt_dev->running)
@@ -1751,7 +1751,7 @@ static int pktgen_thread_show(struct seq_file *seq, void *v)
if (t->result[0])
seq_printf(seq, "\nResult: %s\n", t->result);
else
- seq_printf(seq, "\nResult: NA\n");
+ seq_puts(seq, "\nResult: NA\n");
if_unlock(t);
diff --git a/net/core/ptp_classifier.c b/net/core/ptp_classifier.c
index eaba0f6..d3027a7 100644
--- a/net/core/ptp_classifier.c
+++ b/net/core/ptp_classifier.c
@@ -88,7 +88,7 @@ EXPORT_SYMBOL_GPL(ptp_classify_raw);
void __init ptp_classifier_init(void)
{
- static struct sock_filter ptp_filter[] = {
+ static struct sock_filter ptp_filter[] __initdata = {
{ 0x28, 0, 0, 0x0000000c },
{ 0x15, 0, 12, 0x00000800 },
{ 0x30, 0, 0, 0x00000017 },
@@ -133,7 +133,7 @@ void __init ptp_classifier_init(void)
{ 0x16, 0, 0, 0x00000000 },
{ 0x06, 0, 0, 0x00000000 },
};
- struct sock_fprog ptp_prog = {
+ struct sock_fprog_kern ptp_prog = {
.len = ARRAY_SIZE(ptp_filter), .filter = ptp_filter,
};
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 2d8d8fc..1063996 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -798,8 +798,8 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev,
size += num_vfs *
(nla_total_size(sizeof(struct ifla_vf_mac)) +
nla_total_size(sizeof(struct ifla_vf_vlan)) +
- nla_total_size(sizeof(struct ifla_vf_tx_rate)) +
- nla_total_size(sizeof(struct ifla_vf_spoofchk)));
+ nla_total_size(sizeof(struct ifla_vf_spoofchk)) +
+ nla_total_size(sizeof(struct ifla_vf_rate)));
return size;
} else
return 0;
@@ -1065,6 +1065,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
struct ifla_vf_info ivi;
struct ifla_vf_mac vf_mac;
struct ifla_vf_vlan vf_vlan;
+ struct ifla_vf_rate vf_rate;
struct ifla_vf_tx_rate vf_tx_rate;
struct ifla_vf_spoofchk vf_spoofchk;
struct ifla_vf_link_state vf_linkstate;
@@ -1085,6 +1086,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
break;
vf_mac.vf =
vf_vlan.vf =
+ vf_rate.vf =
vf_tx_rate.vf =
vf_spoofchk.vf =
vf_linkstate.vf = ivi.vf;
@@ -1092,7 +1094,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac));
vf_vlan.vlan = ivi.vlan;
vf_vlan.qos = ivi.qos;
- vf_tx_rate.rate = ivi.tx_rate;
+ vf_tx_rate.rate = ivi.max_tx_rate;
+ vf_rate.min_tx_rate = ivi.min_tx_rate;
+ vf_rate.max_tx_rate = ivi.max_tx_rate;
vf_spoofchk.setting = ivi.spoofchk;
vf_linkstate.link_state = ivi.linkstate;
vf = nla_nest_start(skb, IFLA_VF_INFO);
@@ -1102,6 +1106,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
}
if (nla_put(skb, IFLA_VF_MAC, sizeof(vf_mac), &vf_mac) ||
nla_put(skb, IFLA_VF_VLAN, sizeof(vf_vlan), &vf_vlan) ||
+ nla_put(skb, IFLA_VF_RATE, sizeof(vf_rate),
+ &vf_rate) ||
nla_put(skb, IFLA_VF_TX_RATE, sizeof(vf_tx_rate),
&vf_tx_rate) ||
nla_put(skb, IFLA_VF_SPOOFCHK, sizeof(vf_spoofchk),
@@ -1208,6 +1214,10 @@ static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = {
.len = sizeof(struct ifla_vf_tx_rate) },
[IFLA_VF_SPOOFCHK] = { .type = NLA_BINARY,
.len = sizeof(struct ifla_vf_spoofchk) },
+ [IFLA_VF_RATE] = { .type = NLA_BINARY,
+ .len = sizeof(struct ifla_vf_rate) },
+ [IFLA_VF_LINK_STATE] = { .type = NLA_BINARY,
+ .len = sizeof(struct ifla_vf_link_state) },
};
static const struct nla_policy ifla_port_policy[IFLA_PORT_MAX+1] = {
@@ -1234,6 +1244,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
struct nlattr *tb[IFLA_MAX+1];
u32 ext_filter_mask = 0;
int err;
+ int hdrlen;
s_h = cb->args[0];
s_idx = cb->args[1];
@@ -1241,8 +1252,17 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
rcu_read_lock();
cb->seq = net->dev_base_seq;
- if (nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb, IFLA_MAX,
- ifla_policy) >= 0) {
+ /* A hack to preserve kernel<->userspace interface.
+ * The correct header is ifinfomsg. It is consistent with rtnl_getlink.
+ * However, before Linux v3.9 the code here assumed rtgenmsg and that's
+ * what iproute2 < v3.9.0 used.
+ * We can detect the old iproute2. Even including the IFLA_EXT_MASK
+ * attribute, its netlink message is shorter than struct ifinfomsg.
+ */
+ hdrlen = nlmsg_len(cb->nlh) < sizeof(struct ifinfomsg) ?
+ sizeof(struct rtgenmsg) : sizeof(struct ifinfomsg);
+
+ if (nlmsg_parse(cb->nlh, hdrlen, tb, IFLA_MAX, ifla_policy) >= 0) {
if (tb[IFLA_EXT_MASK])
ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
@@ -1367,11 +1387,29 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr *attr)
}
case IFLA_VF_TX_RATE: {
struct ifla_vf_tx_rate *ivt;
+ struct ifla_vf_info ivf;
ivt = nla_data(vf);
err = -EOPNOTSUPP;
- if (ops->ndo_set_vf_tx_rate)
- err = ops->ndo_set_vf_tx_rate(dev, ivt->vf,
- ivt->rate);
+ if (ops->ndo_get_vf_config)
+ err = ops->ndo_get_vf_config(dev, ivt->vf,
+ &ivf);
+ if (err)
+ break;
+ err = -EOPNOTSUPP;
+ if (ops->ndo_set_vf_rate)
+ err = ops->ndo_set_vf_rate(dev, ivt->vf,
+ ivf.min_tx_rate,
+ ivt->rate);
+ break;
+ }
+ case IFLA_VF_RATE: {
+ struct ifla_vf_rate *ivt;
+ ivt = nla_data(vf);
+ err = -EOPNOTSUPP;
+ if (ops->ndo_set_vf_rate)
+ err = ops->ndo_set_vf_rate(dev, ivt->vf,
+ ivt->min_tx_rate,
+ ivt->max_tx_rate);
break;
}
case IFLA_VF_SPOOFCHK: {
@@ -1744,7 +1782,6 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh)
ops->dellink(dev, &list_kill);
unregister_netdevice_many(&list_kill);
- list_del(&list_kill);
return 0;
}
@@ -2019,11 +2056,15 @@ replay:
if (ops->newlink) {
err = ops->newlink(net, dev, tb, data);
/* Drivers should call free_netdev() in ->destructor
- * and unregister it on failure so that device could be
- * finally freed in rtnl_unlock.
+ * and unregister it on failure after registration
+ * so that device could be finally freed in rtnl_unlock.
*/
- if (err < 0)
+ if (err < 0) {
+ /* If device is not registered at all, free it now */
+ if (dev->reg_state == NETREG_UNINITIALIZED)
+ free_netdev(dev);
goto out;
+ }
} else {
err = register_netdevice(dev);
if (err < 0) {
@@ -2095,9 +2136,13 @@ static u16 rtnl_calcit(struct sk_buff *skb, struct nlmsghdr *nlh)
struct nlattr *tb[IFLA_MAX+1];
u32 ext_filter_mask = 0;
u16 min_ifinfo_dump_size = 0;
+ int hdrlen;
+
+ /* Same kernel<->userspace interface hack as in rtnl_dump_ifinfo. */
+ hdrlen = nlmsg_len(nlh) < sizeof(struct ifinfomsg) ?
+ sizeof(struct rtgenmsg) : sizeof(struct ifinfomsg);
- if (nlmsg_parse(nlh, sizeof(struct ifinfomsg), tb, IFLA_MAX,
- ifla_policy) >= 0) {
+ if (nlmsg_parse(nlh, hdrlen, tb, IFLA_MAX, ifla_policy) >= 0) {
if (tb[IFLA_EXT_MASK])
ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
}
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
index 897da56..ba71212 100644
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c
@@ -85,31 +85,6 @@ EXPORT_SYMBOL(secure_ipv6_port_ephemeral);
#endif
#ifdef CONFIG_INET
-__u32 secure_ip_id(__be32 daddr)
-{
- u32 hash[MD5_DIGEST_WORDS];
-
- net_secret_init();
- hash[0] = (__force __u32) daddr;
- hash[1] = net_secret[13];
- hash[2] = net_secret[14];
- hash[3] = net_secret[15];
-
- md5_transform(hash, net_secret);
-
- return hash[0];
-}
-
-__u32 secure_ipv6_id(const __be32 daddr[4])
-{
- __u32 hash[4];
-
- net_secret_init();
- memcpy(hash, daddr, 16);
- md5_transform(hash, net_secret);
-
- return hash[0];
-}
__u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr,
__be16 sport, __be16 dport)
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 8383b2b..bf92824 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -694,7 +694,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
#endif
memcpy(new->cb, old->cb, sizeof(old->cb));
new->csum = old->csum;
- new->local_df = old->local_df;
+ new->ignore_df = old->ignore_df;
new->pkt_type = old->pkt_type;
new->ip_summed = old->ip_summed;
skb_copy_queue_mapping(new, old);
@@ -951,10 +951,13 @@ struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask)
EXPORT_SYMBOL(skb_copy);
/**
- * __pskb_copy - create copy of an sk_buff with private head.
+ * __pskb_copy_fclone - create copy of an sk_buff with private head.
* @skb: buffer to copy
* @headroom: headroom of new skb
* @gfp_mask: allocation priority
+ * @fclone: if true allocate the copy of the skb from the fclone
+ * cache instead of the head cache; it is recommended to set this
+ * to true for the cases where the copy will likely be cloned
*
* Make a copy of both an &sk_buff and part of its data, located
* in header. Fragmented data remain shared. This is used when
@@ -964,11 +967,12 @@ EXPORT_SYMBOL(skb_copy);
* The returned buffer has a reference count of 1.
*/
-struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom, gfp_t gfp_mask)
+struct sk_buff *__pskb_copy_fclone(struct sk_buff *skb, int headroom,
+ gfp_t gfp_mask, bool fclone)
{
unsigned int size = skb_headlen(skb) + headroom;
- struct sk_buff *n = __alloc_skb(size, gfp_mask,
- skb_alloc_rx_flag(skb), NUMA_NO_NODE);
+ int flags = skb_alloc_rx_flag(skb) | (fclone ? SKB_ALLOC_FCLONE : 0);
+ struct sk_buff *n = __alloc_skb(size, gfp_mask, flags, NUMA_NO_NODE);
if (!n)
goto out;
@@ -1008,7 +1012,7 @@ struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom, gfp_t gfp_mask)
out:
return n;
}
-EXPORT_SYMBOL(__pskb_copy);
+EXPORT_SYMBOL(__pskb_copy_fclone);
/**
* pskb_expand_head - reallocate header of &sk_buff
@@ -2881,12 +2885,14 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
int pos;
int dummy;
+ __skb_push(head_skb, doffset);
proto = skb_network_protocol(head_skb, &dummy);
if (unlikely(!proto))
return ERR_PTR(-EINVAL);
- csum = !!can_checksum_protocol(features, proto);
- __skb_push(head_skb, doffset);
+ csum = !head_skb->encap_hdr_csum &&
+ !!can_checksum_protocol(features, proto);
+
headroom = skb_headroom(head_skb);
pos = skb_headlen(head_skb);
@@ -2983,6 +2989,8 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
nskb->csum = skb_copy_and_csum_bits(head_skb, offset,
skb_put(nskb, len),
len, 0);
+ SKB_GSO_CB(nskb)->csum_start =
+ skb_headroom(nskb) + offset;
continue;
}
@@ -3052,6 +3060,8 @@ perform_csum_check:
nskb->csum = skb_checksum(nskb, doffset,
nskb->len - doffset, 0);
nskb->ip_summed = CHECKSUM_NONE;
+ SKB_GSO_CB(nskb)->csum_start =
+ skb_headroom(nskb) + doffset;
}
} while ((offset += len) < head_skb->len);
@@ -3913,7 +3923,7 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet)
skb->tstamp.tv64 = 0;
skb->pkt_type = PACKET_HOST;
skb->skb_iif = 0;
- skb->local_df = 0;
+ skb->ignore_df = 0;
skb_dst_drop(skb);
skb->mark = 0;
secpath_reset(skb);
diff --git a/net/core/sock.c b/net/core/sock.c
index 664ee42..026e01f 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -784,7 +784,7 @@ set_rcvbuf:
break;
case SO_NO_CHECK:
- sk->sk_no_check = valbool;
+ sk->sk_no_check_tx = valbool;
break;
case SO_PRIORITY:
@@ -1064,7 +1064,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
break;
case SO_NO_CHECK:
- v.val = sk->sk_no_check;
+ v.val = sk->sk_no_check_tx;
break;
case SO_PRIORITY:
diff --git a/net/core/tso.c b/net/core/tso.c
new file mode 100644
index 0000000..8c3203c
--- /dev/null
+++ b/net/core/tso.c
@@ -0,0 +1,77 @@
+#include <linux/export.h>
+#include <net/ip.h>
+#include <net/tso.h>
+
+/* Calculate expected number of TX descriptors */
+int tso_count_descs(struct sk_buff *skb)
+{
+ /* The Marvell Way */
+ return skb_shinfo(skb)->gso_segs * 2 + skb_shinfo(skb)->nr_frags;
+}
+EXPORT_SYMBOL(tso_count_descs);
+
+void tso_build_hdr(struct sk_buff *skb, char *hdr, struct tso_t *tso,
+ int size, bool is_last)
+{
+ struct iphdr *iph;
+ struct tcphdr *tcph;
+ int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
+ int mac_hdr_len = skb_network_offset(skb);
+
+ memcpy(hdr, skb->data, hdr_len);
+ iph = (struct iphdr *)(hdr + mac_hdr_len);
+ iph->id = htons(tso->ip_id);
+ iph->tot_len = htons(size + hdr_len - mac_hdr_len);
+ tcph = (struct tcphdr *)(hdr + skb_transport_offset(skb));
+ tcph->seq = htonl(tso->tcp_seq);
+ tso->ip_id++;
+
+ if (!is_last) {
+ /* Clear all special flags for not last packet */
+ tcph->psh = 0;
+ tcph->fin = 0;
+ tcph->rst = 0;
+ }
+}
+EXPORT_SYMBOL(tso_build_hdr);
+
+void tso_build_data(struct sk_buff *skb, struct tso_t *tso, int size)
+{
+ tso->tcp_seq += size;
+ tso->size -= size;
+ tso->data += size;
+
+ if ((tso->size == 0) &&
+ (tso->next_frag_idx < skb_shinfo(skb)->nr_frags)) {
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[tso->next_frag_idx];
+
+ /* Move to next segment */
+ tso->size = frag->size;
+ tso->data = page_address(frag->page.p) + frag->page_offset;
+ tso->next_frag_idx++;
+ }
+}
+EXPORT_SYMBOL(tso_build_data);
+
+void tso_start(struct sk_buff *skb, struct tso_t *tso)
+{
+ int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
+
+ tso->ip_id = ntohs(ip_hdr(skb)->id);
+ tso->tcp_seq = ntohl(tcp_hdr(skb)->seq);
+ tso->next_frag_idx = 0;
+
+ /* Build first data */
+ tso->size = skb_headlen(skb) - hdr_len;
+ tso->data = skb->data + hdr_len;
+ if ((tso->size == 0) &&
+ (tso->next_frag_idx < skb_shinfo(skb)->nr_frags)) {
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[tso->next_frag_idx];
+
+ /* Move to next segment */
+ tso->size = frag->size;
+ tso->data = page_address(frag->page.p) + frag->page_offset;
+ tso->next_frag_idx++;
+ }
+}
+EXPORT_SYMBOL(tso_start);
OpenPOWER on IntegriCloud