summaryrefslogtreecommitdiffstats
path: root/net/openvswitch
diff options
context:
space:
mode:
Diffstat (limited to 'net/openvswitch')
-rw-r--r--net/openvswitch/Kconfig23
-rw-r--r--net/openvswitch/Makefile14
-rw-r--r--net/openvswitch/actions.c355
-rw-r--r--net/openvswitch/datapath.c346
-rw-r--r--net/openvswitch/datapath.h24
-rw-r--r--net/openvswitch/flow.c38
-rw-r--r--net/openvswitch/flow.h88
-rw-r--r--net/openvswitch/flow_netlink.c629
-rw-r--r--net/openvswitch/flow_netlink.h18
-rw-r--r--net/openvswitch/flow_table.c27
-rw-r--r--net/openvswitch/flow_table.h10
-rw-r--r--net/openvswitch/vport-geneve.c46
-rw-r--r--net/openvswitch/vport-gre.c47
-rw-r--r--net/openvswitch/vport-internal_dev.c22
-rw-r--r--net/openvswitch/vport-netdev.c16
-rw-r--r--net/openvswitch/vport-netdev.h3
-rw-r--r--net/openvswitch/vport-vxlan.c47
-rw-r--r--net/openvswitch/vport.c171
-rw-r--r--net/openvswitch/vport.h34
19 files changed, 1316 insertions, 642 deletions
diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig
index ba3bb82..b7d818c 100644
--- a/net/openvswitch/Kconfig
+++ b/net/openvswitch/Kconfig
@@ -4,7 +4,9 @@
config OPENVSWITCH
tristate "Open vSwitch"
+ depends on INET
select LIBCRC32C
+ select NET_MPLS_GSO
---help---
Open vSwitch is a multilayer Ethernet switch targeted at virtualized
environments. In addition to supporting a variety of features
@@ -29,11 +31,10 @@ config OPENVSWITCH
If unsure, say N.
config OPENVSWITCH_GRE
- bool "Open vSwitch GRE tunneling support"
- depends on INET
+ tristate "Open vSwitch GRE tunneling support"
depends on OPENVSWITCH
- depends on NET_IPGRE_DEMUX && !(OPENVSWITCH=y && NET_IPGRE_DEMUX=m)
- default y
+ depends on NET_IPGRE_DEMUX
+ default OPENVSWITCH
---help---
If you say Y here, then the Open vSwitch will be able create GRE
vport.
@@ -43,11 +44,10 @@ config OPENVSWITCH_GRE
If unsure, say Y.
config OPENVSWITCH_VXLAN
- bool "Open vSwitch VXLAN tunneling support"
- depends on INET
+ tristate "Open vSwitch VXLAN tunneling support"
depends on OPENVSWITCH
- depends on VXLAN && !(OPENVSWITCH=y && VXLAN=m)
- default y
+ depends on VXLAN
+ default OPENVSWITCH
---help---
If you say Y here, then the Open vSwitch will be able create vxlan vport.
@@ -56,11 +56,10 @@ config OPENVSWITCH_VXLAN
If unsure, say Y.
config OPENVSWITCH_GENEVE
- bool "Open vSwitch Geneve tunneling support"
- depends on INET
+ tristate "Open vSwitch Geneve tunneling support"
depends on OPENVSWITCH
- depends on GENEVE && !(OPENVSWITCH=y && GENEVE=m)
- default y
+ depends on GENEVE
+ default OPENVSWITCH
---help---
If you say Y here, then the Open vSwitch will be able create geneve vport.
diff --git a/net/openvswitch/Makefile b/net/openvswitch/Makefile
index 9a33a27..91b9478 100644
--- a/net/openvswitch/Makefile
+++ b/net/openvswitch/Makefile
@@ -15,14 +15,6 @@ openvswitch-y := \
vport-internal_dev.o \
vport-netdev.o
-ifneq ($(CONFIG_OPENVSWITCH_GENEVE),)
-openvswitch-y += vport-geneve.o
-endif
-
-ifneq ($(CONFIG_OPENVSWITCH_VXLAN),)
-openvswitch-y += vport-vxlan.o
-endif
-
-ifneq ($(CONFIG_OPENVSWITCH_GRE),)
-openvswitch-y += vport-gre.o
-endif
+obj-$(CONFIG_OPENVSWITCH_GENEVE)+= vport-geneve.o
+obj-$(CONFIG_OPENVSWITCH_VXLAN) += vport-vxlan.o
+obj-$(CONFIG_OPENVSWITCH_GRE) += vport-gre.o
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 8c4229b..764fdc3 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -28,10 +28,12 @@
#include <linux/in6.h>
#include <linux/if_arp.h>
#include <linux/if_vlan.h>
+
#include <net/ip.h>
#include <net/ipv6.h>
#include <net/checksum.h>
#include <net/dsfield.h>
+#include <net/mpls.h>
#include <net/sctp/checksum.h>
#include "datapath.h"
@@ -67,7 +69,7 @@ static void action_fifo_init(struct action_fifo *fifo)
fifo->tail = 0;
}
-static bool action_fifo_is_empty(struct action_fifo *fifo)
+static bool action_fifo_is_empty(const struct action_fifo *fifo)
{
return (fifo->head == fifo->tail);
}
@@ -90,7 +92,7 @@ static struct deferred_action *action_fifo_put(struct action_fifo *fifo)
/* Return true if fifo is not full */
static struct deferred_action *add_deferred_actions(struct sk_buff *skb,
- struct sw_flow_key *key,
+ const struct sw_flow_key *key,
const struct nlattr *attr)
{
struct action_fifo *fifo;
@@ -107,100 +109,131 @@ static struct deferred_action *add_deferred_actions(struct sk_buff *skb,
return da;
}
-static int make_writable(struct sk_buff *skb, int write_len)
+static void invalidate_flow_key(struct sw_flow_key *key)
+{
+ key->eth.type = htons(0);
+}
+
+static bool is_flow_key_valid(const struct sw_flow_key *key)
{
- if (!pskb_may_pull(skb, write_len))
+ return !!key->eth.type;
+}
+
+static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key,
+ const struct ovs_action_push_mpls *mpls)
+{
+ __be32 *new_mpls_lse;
+ struct ethhdr *hdr;
+
+ /* Networking stack do not allow simultaneous Tunnel and MPLS GSO. */
+ if (skb->encapsulation)
+ return -ENOTSUPP;
+
+ if (skb_cow_head(skb, MPLS_HLEN) < 0)
return -ENOMEM;
- if (!skb_cloned(skb) || skb_clone_writable(skb, write_len))
- return 0;
+ skb_push(skb, MPLS_HLEN);
+ memmove(skb_mac_header(skb) - MPLS_HLEN, skb_mac_header(skb),
+ skb->mac_len);
+ skb_reset_mac_header(skb);
+
+ new_mpls_lse = (__be32 *)skb_mpls_header(skb);
+ *new_mpls_lse = mpls->mpls_lse;
- return pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
+ if (skb->ip_summed == CHECKSUM_COMPLETE)
+ skb->csum = csum_add(skb->csum, csum_partial(new_mpls_lse,
+ MPLS_HLEN, 0));
+
+ hdr = eth_hdr(skb);
+ hdr->h_proto = mpls->mpls_ethertype;
+
+ skb_set_inner_protocol(skb, skb->protocol);
+ skb->protocol = mpls->mpls_ethertype;
+
+ invalidate_flow_key(key);
+ return 0;
}
-/* remove VLAN header from packet and update csum accordingly. */
-static int __pop_vlan_tci(struct sk_buff *skb, __be16 *current_tci)
+static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key,
+ const __be16 ethertype)
{
- struct vlan_hdr *vhdr;
+ struct ethhdr *hdr;
int err;
- err = make_writable(skb, VLAN_ETH_HLEN);
+ err = skb_ensure_writable(skb, skb->mac_len + MPLS_HLEN);
if (unlikely(err))
return err;
- if (skb->ip_summed == CHECKSUM_COMPLETE)
- skb->csum = csum_sub(skb->csum, csum_partial(skb->data
- + (2 * ETH_ALEN), VLAN_HLEN, 0));
+ skb_postpull_rcsum(skb, skb_mpls_header(skb), MPLS_HLEN);
- vhdr = (struct vlan_hdr *)(skb->data + ETH_HLEN);
- *current_tci = vhdr->h_vlan_TCI;
+ memmove(skb_mac_header(skb) + MPLS_HLEN, skb_mac_header(skb),
+ skb->mac_len);
- memmove(skb->data + VLAN_HLEN, skb->data, 2 * ETH_ALEN);
- __skb_pull(skb, VLAN_HLEN);
+ __skb_pull(skb, MPLS_HLEN);
+ skb_reset_mac_header(skb);
- vlan_set_encap_proto(skb, vhdr);
- skb->mac_header += VLAN_HLEN;
- if (skb_network_offset(skb) < ETH_HLEN)
- skb_set_network_header(skb, ETH_HLEN);
- skb_reset_mac_len(skb);
+ /* skb_mpls_header() is used to locate the ethertype
+ * field correctly in the presence of VLAN tags.
+ */
+ hdr = (struct ethhdr *)(skb_mpls_header(skb) - ETH_HLEN);
+ hdr->h_proto = ethertype;
+ if (eth_p_mpls(skb->protocol))
+ skb->protocol = ethertype;
+ invalidate_flow_key(key);
return 0;
}
-static int pop_vlan(struct sk_buff *skb)
+static int set_mpls(struct sk_buff *skb, struct sw_flow_key *key,
+ const __be32 *mpls_lse)
{
- __be16 tci;
+ __be32 *stack;
int err;
- if (likely(vlan_tx_tag_present(skb))) {
- skb->vlan_tci = 0;
- } else {
- if (unlikely(skb->protocol != htons(ETH_P_8021Q) ||
- skb->len < VLAN_ETH_HLEN))
- return 0;
-
- err = __pop_vlan_tci(skb, &tci);
- if (err)
- return err;
- }
- /* move next vlan tag to hw accel tag */
- if (likely(skb->protocol != htons(ETH_P_8021Q) ||
- skb->len < VLAN_ETH_HLEN))
- return 0;
-
- err = __pop_vlan_tci(skb, &tci);
+ err = skb_ensure_writable(skb, skb->mac_len + MPLS_HLEN);
if (unlikely(err))
return err;
- __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), ntohs(tci));
+ stack = (__be32 *)skb_mpls_header(skb);
+ if (skb->ip_summed == CHECKSUM_COMPLETE) {
+ __be32 diff[] = { ~(*stack), *mpls_lse };
+ skb->csum = ~csum_partial((char *)diff, sizeof(diff),
+ ~skb->csum);
+ }
+
+ *stack = *mpls_lse;
+ key->mpls.top_lse = *mpls_lse;
return 0;
}
-static int push_vlan(struct sk_buff *skb, const struct ovs_action_push_vlan *vlan)
+static int pop_vlan(struct sk_buff *skb, struct sw_flow_key *key)
{
- if (unlikely(vlan_tx_tag_present(skb))) {
- u16 current_tag;
-
- /* push down current VLAN tag */
- current_tag = vlan_tx_tag_get(skb);
-
- if (!__vlan_put_tag(skb, skb->vlan_proto, current_tag))
- return -ENOMEM;
+ int err;
- if (skb->ip_summed == CHECKSUM_COMPLETE)
- skb->csum = csum_add(skb->csum, csum_partial(skb->data
- + (2 * ETH_ALEN), VLAN_HLEN, 0));
+ err = skb_vlan_pop(skb);
+ if (vlan_tx_tag_present(skb))
+ invalidate_flow_key(key);
+ else
+ key->eth.tci = 0;
+ return err;
+}
- }
- __vlan_hwaccel_put_tag(skb, vlan->vlan_tpid, ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT);
- return 0;
+static int push_vlan(struct sk_buff *skb, struct sw_flow_key *key,
+ const struct ovs_action_push_vlan *vlan)
+{
+ if (vlan_tx_tag_present(skb))
+ invalidate_flow_key(key);
+ else
+ key->eth.tci = vlan->vlan_tci;
+ return skb_vlan_push(skb, vlan->vlan_tpid,
+ ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT);
}
-static int set_eth_addr(struct sk_buff *skb,
+static int set_eth_addr(struct sk_buff *skb, struct sw_flow_key *key,
const struct ovs_key_ethernet *eth_key)
{
int err;
- err = make_writable(skb, ETH_HLEN);
+ err = skb_ensure_writable(skb, ETH_HLEN);
if (unlikely(err))
return err;
@@ -211,11 +244,13 @@ static int set_eth_addr(struct sk_buff *skb,
ovs_skb_postpush_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2);
+ ether_addr_copy(key->eth.src, eth_key->eth_src);
+ ether_addr_copy(key->eth.dst, eth_key->eth_dst);
return 0;
}
static void set_ip_addr(struct sk_buff *skb, struct iphdr *nh,
- __be32 *addr, __be32 new_addr)
+ __be32 *addr, __be32 new_addr)
{
int transport_len = skb->len - skb_transport_offset(skb);
@@ -298,42 +333,52 @@ static void set_ip_ttl(struct sk_buff *skb, struct iphdr *nh, u8 new_ttl)
nh->ttl = new_ttl;
}
-static int set_ipv4(struct sk_buff *skb, const struct ovs_key_ipv4 *ipv4_key)
+static int set_ipv4(struct sk_buff *skb, struct sw_flow_key *key,
+ const struct ovs_key_ipv4 *ipv4_key)
{
struct iphdr *nh;
int err;
- err = make_writable(skb, skb_network_offset(skb) +
- sizeof(struct iphdr));
+ err = skb_ensure_writable(skb, skb_network_offset(skb) +
+ sizeof(struct iphdr));
if (unlikely(err))
return err;
nh = ip_hdr(skb);
- if (ipv4_key->ipv4_src != nh->saddr)
+ if (ipv4_key->ipv4_src != nh->saddr) {
set_ip_addr(skb, nh, &nh->saddr, ipv4_key->ipv4_src);
+ key->ipv4.addr.src = ipv4_key->ipv4_src;
+ }
- if (ipv4_key->ipv4_dst != nh->daddr)
+ if (ipv4_key->ipv4_dst != nh->daddr) {
set_ip_addr(skb, nh, &nh->daddr, ipv4_key->ipv4_dst);
+ key->ipv4.addr.dst = ipv4_key->ipv4_dst;
+ }
- if (ipv4_key->ipv4_tos != nh->tos)
+ if (ipv4_key->ipv4_tos != nh->tos) {
ipv4_change_dsfield(nh, 0, ipv4_key->ipv4_tos);
+ key->ip.tos = nh->tos;
+ }
- if (ipv4_key->ipv4_ttl != nh->ttl)
+ if (ipv4_key->ipv4_ttl != nh->ttl) {
set_ip_ttl(skb, nh, ipv4_key->ipv4_ttl);
+ key->ip.ttl = ipv4_key->ipv4_ttl;
+ }
return 0;
}
-static int set_ipv6(struct sk_buff *skb, const struct ovs_key_ipv6 *ipv6_key)
+static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *key,
+ const struct ovs_key_ipv6 *ipv6_key)
{
struct ipv6hdr *nh;
int err;
__be32 *saddr;
__be32 *daddr;
- err = make_writable(skb, skb_network_offset(skb) +
- sizeof(struct ipv6hdr));
+ err = skb_ensure_writable(skb, skb_network_offset(skb) +
+ sizeof(struct ipv6hdr));
if (unlikely(err))
return err;
@@ -341,9 +386,12 @@ static int set_ipv6(struct sk_buff *skb, const struct ovs_key_ipv6 *ipv6_key)
saddr = (__be32 *)&nh->saddr;
daddr = (__be32 *)&nh->daddr;
- if (memcmp(ipv6_key->ipv6_src, saddr, sizeof(ipv6_key->ipv6_src)))
+ if (memcmp(ipv6_key->ipv6_src, saddr, sizeof(ipv6_key->ipv6_src))) {
set_ipv6_addr(skb, ipv6_key->ipv6_proto, saddr,
ipv6_key->ipv6_src, true);
+ memcpy(&key->ipv6.addr.src, ipv6_key->ipv6_src,
+ sizeof(ipv6_key->ipv6_src));
+ }
if (memcmp(ipv6_key->ipv6_dst, daddr, sizeof(ipv6_key->ipv6_dst))) {
unsigned int offset = 0;
@@ -357,16 +405,22 @@ static int set_ipv6(struct sk_buff *skb, const struct ovs_key_ipv6 *ipv6_key)
set_ipv6_addr(skb, ipv6_key->ipv6_proto, daddr,
ipv6_key->ipv6_dst, recalc_csum);
+ memcpy(&key->ipv6.addr.dst, ipv6_key->ipv6_dst,
+ sizeof(ipv6_key->ipv6_dst));
}
set_ipv6_tc(nh, ipv6_key->ipv6_tclass);
+ key->ip.tos = ipv6_get_dsfield(nh);
+
set_ipv6_fl(nh, ntohl(ipv6_key->ipv6_label));
- nh->hop_limit = ipv6_key->ipv6_hlimit;
+ key->ipv6.label = *(__be32 *)nh & htonl(IPV6_FLOWINFO_FLOWLABEL);
+ nh->hop_limit = ipv6_key->ipv6_hlimit;
+ key->ip.ttl = ipv6_key->ipv6_hlimit;
return 0;
}
-/* Must follow make_writable() since that can move the skb data. */
+/* Must follow skb_ensure_writable() since that can move the skb data. */
static void set_tp_port(struct sk_buff *skb, __be16 *port,
__be16 new_port, __sum16 *check)
{
@@ -390,54 +444,64 @@ static void set_udp_port(struct sk_buff *skb, __be16 *port, __be16 new_port)
}
}
-static int set_udp(struct sk_buff *skb, const struct ovs_key_udp *udp_port_key)
+static int set_udp(struct sk_buff *skb, struct sw_flow_key *key,
+ const struct ovs_key_udp *udp_port_key)
{
struct udphdr *uh;
int err;
- err = make_writable(skb, skb_transport_offset(skb) +
- sizeof(struct udphdr));
+ err = skb_ensure_writable(skb, skb_transport_offset(skb) +
+ sizeof(struct udphdr));
if (unlikely(err))
return err;
uh = udp_hdr(skb);
- if (udp_port_key->udp_src != uh->source)
+ if (udp_port_key->udp_src != uh->source) {
set_udp_port(skb, &uh->source, udp_port_key->udp_src);
+ key->tp.src = udp_port_key->udp_src;
+ }
- if (udp_port_key->udp_dst != uh->dest)
+ if (udp_port_key->udp_dst != uh->dest) {
set_udp_port(skb, &uh->dest, udp_port_key->udp_dst);
+ key->tp.dst = udp_port_key->udp_dst;
+ }
return 0;
}
-static int set_tcp(struct sk_buff *skb, const struct ovs_key_tcp *tcp_port_key)
+static int set_tcp(struct sk_buff *skb, struct sw_flow_key *key,
+ const struct ovs_key_tcp *tcp_port_key)
{
struct tcphdr *th;
int err;
- err = make_writable(skb, skb_transport_offset(skb) +
- sizeof(struct tcphdr));
+ err = skb_ensure_writable(skb, skb_transport_offset(skb) +
+ sizeof(struct tcphdr));
if (unlikely(err))
return err;
th = tcp_hdr(skb);
- if (tcp_port_key->tcp_src != th->source)
+ if (tcp_port_key->tcp_src != th->source) {
set_tp_port(skb, &th->source, tcp_port_key->tcp_src, &th->check);
+ key->tp.src = tcp_port_key->tcp_src;
+ }
- if (tcp_port_key->tcp_dst != th->dest)
+ if (tcp_port_key->tcp_dst != th->dest) {
set_tp_port(skb, &th->dest, tcp_port_key->tcp_dst, &th->check);
+ key->tp.dst = tcp_port_key->tcp_dst;
+ }
return 0;
}
-static int set_sctp(struct sk_buff *skb,
- const struct ovs_key_sctp *sctp_port_key)
+static int set_sctp(struct sk_buff *skb, struct sw_flow_key *key,
+ const struct ovs_key_sctp *sctp_port_key)
{
struct sctphdr *sh;
int err;
unsigned int sctphoff = skb_transport_offset(skb);
- err = make_writable(skb, sctphoff + sizeof(struct sctphdr));
+ err = skb_ensure_writable(skb, sctphoff + sizeof(struct sctphdr));
if (unlikely(err))
return err;
@@ -458,39 +522,35 @@ static int set_sctp(struct sk_buff *skb,
sh->checksum = old_csum ^ old_correct_csum ^ new_csum;
skb_clear_hash(skb);
+ key->tp.src = sctp_port_key->sctp_src;
+ key->tp.dst = sctp_port_key->sctp_dst;
}
return 0;
}
-static int do_output(struct datapath *dp, struct sk_buff *skb, int out_port)
+static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port)
{
- struct vport *vport;
-
- if (unlikely(!skb))
- return -ENOMEM;
+ struct vport *vport = ovs_vport_rcu(dp, out_port);
- vport = ovs_vport_rcu(dp, out_port);
- if (unlikely(!vport)) {
+ if (likely(vport))
+ ovs_vport_send(vport, skb);
+ else
kfree_skb(skb);
- return -ENODEV;
- }
-
- ovs_vport_send(vport, skb);
- return 0;
}
static int output_userspace(struct datapath *dp, struct sk_buff *skb,
struct sw_flow_key *key, const struct nlattr *attr)
{
+ struct ovs_tunnel_info info;
struct dp_upcall_info upcall;
const struct nlattr *a;
int rem;
upcall.cmd = OVS_PACKET_CMD_ACTION;
- upcall.key = key;
upcall.userdata = NULL;
upcall.portid = 0;
+ upcall.egress_tun_info = NULL;
for (a = nla_data(attr), rem = nla_len(attr); rem > 0;
a = nla_next(a, &rem)) {
@@ -502,15 +562,27 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb,
case OVS_USERSPACE_ATTR_PID:
upcall.portid = nla_get_u32(a);
break;
+
+ case OVS_USERSPACE_ATTR_EGRESS_TUN_PORT: {
+ /* Get out tunnel info. */
+ struct vport *vport;
+
+ vport = ovs_vport_rcu(dp, nla_get_u32(a));
+ if (vport) {
+ int err;
+
+ err = ovs_vport_get_egress_tun_info(vport, skb,
+ &info);
+ if (!err)
+ upcall.egress_tun_info = &info;
+ }
+ break;
}
- }
- return ovs_dp_upcall(dp, skb, &upcall);
-}
+ } /* End of switch. */
+ }
-static bool last_action(const struct nlattr *a, int rem)
-{
- return a->nla_len == rem;
+ return ovs_dp_upcall(dp, skb, key, &upcall);
}
static int sample(struct datapath *dp, struct sk_buff *skb,
@@ -547,7 +619,7 @@ static int sample(struct datapath *dp, struct sk_buff *skb,
* user space. This skb will be consumed by its caller.
*/
if (likely(nla_type(a) == OVS_ACTION_ATTR_USERSPACE &&
- last_action(a, rem)))
+ nla_is_last(a, rem)))
return output_userspace(dp, skb, key, a);
skb = skb_clone(skb, GFP_ATOMIC);
@@ -580,18 +652,20 @@ static void execute_hash(struct sk_buff *skb, struct sw_flow_key *key,
key->ovs_flow_hash = hash;
}
-static int execute_set_action(struct sk_buff *skb,
- const struct nlattr *nested_attr)
+static int execute_set_action(struct sk_buff *skb, struct sw_flow_key *key,
+ const struct nlattr *nested_attr)
{
int err = 0;
switch (nla_type(nested_attr)) {
case OVS_KEY_ATTR_PRIORITY:
skb->priority = nla_get_u32(nested_attr);
+ key->phy.priority = skb->priority;
break;
case OVS_KEY_ATTR_SKB_MARK:
skb->mark = nla_get_u32(nested_attr);
+ key->phy.skb_mark = skb->mark;
break;
case OVS_KEY_ATTR_TUNNEL_INFO:
@@ -599,27 +673,31 @@ static int execute_set_action(struct sk_buff *skb,
break;
case OVS_KEY_ATTR_ETHERNET:
- err = set_eth_addr(skb, nla_data(nested_attr));
+ err = set_eth_addr(skb, key, nla_data(nested_attr));
break;
case OVS_KEY_ATTR_IPV4:
- err = set_ipv4(skb, nla_data(nested_attr));
+ err = set_ipv4(skb, key, nla_data(nested_attr));
break;
case OVS_KEY_ATTR_IPV6:
- err = set_ipv6(skb, nla_data(nested_attr));
+ err = set_ipv6(skb, key, nla_data(nested_attr));
break;
case OVS_KEY_ATTR_TCP:
- err = set_tcp(skb, nla_data(nested_attr));
+ err = set_tcp(skb, key, nla_data(nested_attr));
break;
case OVS_KEY_ATTR_UDP:
- err = set_udp(skb, nla_data(nested_attr));
+ err = set_udp(skb, key, nla_data(nested_attr));
break;
case OVS_KEY_ATTR_SCTP:
- err = set_sctp(skb, nla_data(nested_attr));
+ err = set_sctp(skb, key, nla_data(nested_attr));
+ break;
+
+ case OVS_KEY_ATTR_MPLS:
+ err = set_mpls(skb, key, nla_data(nested_attr));
break;
}
@@ -631,13 +709,17 @@ static int execute_recirc(struct datapath *dp, struct sk_buff *skb,
const struct nlattr *a, int rem)
{
struct deferred_action *da;
- int err;
- err = ovs_flow_key_update(skb, key);
- if (err)
- return err;
+ if (!is_flow_key_valid(key)) {
+ int err;
+
+ err = ovs_flow_key_update(skb, key);
+ if (err)
+ return err;
+ }
+ BUG_ON(!is_flow_key_valid(key));
- if (!last_action(a, rem)) {
+ if (!nla_is_last(a, rem)) {
/* Recirc action is the not the last action
* of the action list, need to clone the skb.
*/
@@ -672,7 +754,8 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
/* Every output action needs a separate clone of 'skb', but the common
* case is just a single output action, so that doing a clone and
* then freeing the original skbuff is wasteful. So the following code
- * is slightly obscure just to avoid that. */
+ * is slightly obscure just to avoid that.
+ */
int prev_port = -1;
const struct nlattr *a;
int rem;
@@ -681,8 +764,12 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
a = nla_next(a, &rem)) {
int err = 0;
- if (prev_port != -1) {
- do_output(dp, skb_clone(skb, GFP_ATOMIC), prev_port);
+ if (unlikely(prev_port != -1)) {
+ struct sk_buff *out_skb = skb_clone(skb, GFP_ATOMIC);
+
+ if (out_skb)
+ do_output(dp, out_skb, prev_port);
+
prev_port = -1;
}
@@ -699,19 +786,25 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
execute_hash(skb, key, a);
break;
+ case OVS_ACTION_ATTR_PUSH_MPLS:
+ err = push_mpls(skb, key, nla_data(a));
+ break;
+
+ case OVS_ACTION_ATTR_POP_MPLS:
+ err = pop_mpls(skb, key, nla_get_be16(a));
+ break;
+
case OVS_ACTION_ATTR_PUSH_VLAN:
- err = push_vlan(skb, nla_data(a));
- if (unlikely(err)) /* skb already freed. */
- return err;
+ err = push_vlan(skb, key, nla_data(a));
break;
case OVS_ACTION_ATTR_POP_VLAN:
- err = pop_vlan(skb);
+ err = pop_vlan(skb, key);
break;
case OVS_ACTION_ATTR_RECIRC:
err = execute_recirc(dp, skb, key, a, rem);
- if (last_action(a, rem)) {
+ if (nla_is_last(a, rem)) {
/* If this is the last action, the skb has
* been consumed or freed.
* Return immediately.
@@ -721,7 +814,7 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
break;
case OVS_ACTION_ATTR_SET:
- err = execute_set_action(skb, nla_data(a));
+ err = execute_set_action(skb, key, nla_data(a));
break;
case OVS_ACTION_ATTR_SAMPLE:
@@ -771,14 +864,12 @@ static void process_deferred_actions(struct datapath *dp)
/* Execute a list of actions against 'skb'. */
int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb,
+ const struct sw_flow_actions *acts,
struct sw_flow_key *key)
{
int level = this_cpu_read(exec_actions_level);
- struct sw_flow_actions *acts;
int err;
- acts = rcu_dereference(OVS_CB(skb)->flow->sf_acts);
-
this_cpu_inc(exec_actions_level);
OVS_CB(skb)->egress_tun_info = NULL;
err = do_execute_actions(dp, skb, key,
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index f9e556b..332b5a0 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -59,6 +59,7 @@
#include "vport-netdev.h"
int ovs_net_id __read_mostly;
+EXPORT_SYMBOL_GPL(ovs_net_id);
static struct genl_family dp_packet_genl_family;
static struct genl_family dp_flow_genl_family;
@@ -130,27 +131,41 @@ int lockdep_ovsl_is_held(void)
else
return 1;
}
+EXPORT_SYMBOL_GPL(lockdep_ovsl_is_held);
#endif
static struct vport *new_vport(const struct vport_parms *);
static int queue_gso_packets(struct datapath *dp, struct sk_buff *,
+ const struct sw_flow_key *,
const struct dp_upcall_info *);
static int queue_userspace_packet(struct datapath *dp, struct sk_buff *,
+ const struct sw_flow_key *,
const struct dp_upcall_info *);
-/* Must be called with rcu_read_lock or ovs_mutex. */
-static struct datapath *get_dp(struct net *net, int dp_ifindex)
+/* Must be called with rcu_read_lock. */
+static struct datapath *get_dp_rcu(struct net *net, int dp_ifindex)
{
- struct datapath *dp = NULL;
- struct net_device *dev;
+ struct net_device *dev = dev_get_by_index_rcu(net, dp_ifindex);
- rcu_read_lock();
- dev = dev_get_by_index_rcu(net, dp_ifindex);
if (dev) {
struct vport *vport = ovs_internal_dev_get_vport(dev);
if (vport)
- dp = vport->dp;
+ return vport->dp;
}
+
+ return NULL;
+}
+
+/* The caller must hold either ovs_mutex or rcu_read_lock to keep the
+ * returned dp pointer valid.
+ */
+static inline struct datapath *get_dp(struct net *net, int dp_ifindex)
+{
+ struct datapath *dp;
+
+ WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_ovsl_is_held());
+ rcu_read_lock();
+ dp = get_dp_rcu(net, dp_ifindex);
rcu_read_unlock();
return dp;
@@ -163,7 +178,7 @@ const char *ovs_dp_name(const struct datapath *dp)
return vport->ops->get_name(vport);
}
-static int get_dpifindex(struct datapath *dp)
+static int get_dpifindex(const struct datapath *dp)
{
struct vport *local;
int ifindex;
@@ -185,6 +200,7 @@ static void destroy_dp_rcu(struct rcu_head *rcu)
{
struct datapath *dp = container_of(rcu, struct datapath, rcu);
+ ovs_flow_tbl_destroy(&dp->table);
free_percpu(dp->stats_percpu);
release_net(ovs_dp_get_net(dp));
kfree(dp->ports);
@@ -243,6 +259,7 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
const struct vport *p = OVS_CB(skb)->input_vport;
struct datapath *dp = p->dp;
struct sw_flow *flow;
+ struct sw_flow_actions *sf_acts;
struct dp_stats_percpu *stats;
u64 *stats_counter;
u32 n_mask_hit;
@@ -256,10 +273,10 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
int error;
upcall.cmd = OVS_PACKET_CMD_MISS;
- upcall.key = key;
upcall.userdata = NULL;
upcall.portid = ovs_vport_find_upcall_portid(p, skb);
- error = ovs_dp_upcall(dp, skb, &upcall);
+ upcall.egress_tun_info = NULL;
+ error = ovs_dp_upcall(dp, skb, key, &upcall);
if (unlikely(error))
kfree_skb(skb);
else
@@ -268,10 +285,10 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
goto out;
}
- OVS_CB(skb)->flow = flow;
+ ovs_flow_stats_update(flow, key->tp.flags, skb);
+ sf_acts = rcu_dereference(flow->sf_acts);
+ ovs_execute_actions(dp, skb, sf_acts, key);
- ovs_flow_stats_update(OVS_CB(skb)->flow, key->tp.flags, skb);
- ovs_execute_actions(dp, skb, key);
stats_counter = &stats->n_hit;
out:
@@ -283,6 +300,7 @@ out:
}
int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
+ const struct sw_flow_key *key,
const struct dp_upcall_info *upcall_info)
{
struct dp_stats_percpu *stats;
@@ -294,9 +312,9 @@ int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
}
if (!skb_is_gso(skb))
- err = queue_userspace_packet(dp, skb, upcall_info);
+ err = queue_userspace_packet(dp, skb, key, upcall_info);
else
- err = queue_gso_packets(dp, skb, upcall_info);
+ err = queue_gso_packets(dp, skb, key, upcall_info);
if (err)
goto err;
@@ -313,39 +331,43 @@ err:
}
static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
+ const struct sw_flow_key *key,
const struct dp_upcall_info *upcall_info)
{
unsigned short gso_type = skb_shinfo(skb)->gso_type;
- struct dp_upcall_info later_info;
struct sw_flow_key later_key;
struct sk_buff *segs, *nskb;
+ struct ovs_skb_cb ovs_cb;
int err;
+ ovs_cb = *OVS_CB(skb);
segs = __skb_gso_segment(skb, NETIF_F_SG, false);
+ *OVS_CB(skb) = ovs_cb;
if (IS_ERR(segs))
return PTR_ERR(segs);
if (segs == NULL)
return -EINVAL;
+ if (gso_type & SKB_GSO_UDP) {
+ /* The initial flow key extracted by ovs_flow_key_extract()
+ * in this case is for a first fragment, so we need to
+ * properly mark later fragments.
+ */
+ later_key = *key;
+ later_key.ip.frag = OVS_FRAG_TYPE_LATER;
+ }
+
/* Queue all of the segments. */
skb = segs;
do {
- err = queue_userspace_packet(dp, skb, upcall_info);
+ *OVS_CB(skb) = ovs_cb;
+ if (gso_type & SKB_GSO_UDP && skb != segs)
+ key = &later_key;
+
+ err = queue_userspace_packet(dp, skb, key, upcall_info);
if (err)
break;
- if (skb == segs && gso_type & SKB_GSO_UDP) {
- /* The initial flow key extracted by ovs_flow_extract()
- * in this case is for a first fragment, so we need to
- * properly mark later fragments.
- */
- later_key = *upcall_info->key;
- later_key.ip.frag = OVS_FRAG_TYPE_LATER;
-
- later_info = *upcall_info;
- later_info.key = &later_key;
- upcall_info = &later_info;
- }
} while ((skb = skb->next));
/* Free all of the segments. */
@@ -360,46 +382,26 @@ static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
return err;
}
-static size_t key_attr_size(void)
-{
- return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */
- + nla_total_size(0) /* OVS_KEY_ATTR_TUNNEL */
- + nla_total_size(8) /* OVS_TUNNEL_KEY_ATTR_ID */
- + nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_SRC */
- + nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_DST */
- + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TOS */
- + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TTL */
- + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */
- + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_CSUM */
- + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_OAM */
- + nla_total_size(256) /* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS */
- + nla_total_size(4) /* OVS_KEY_ATTR_IN_PORT */
- + nla_total_size(4) /* OVS_KEY_ATTR_SKB_MARK */
- + nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */
- + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */
- + nla_total_size(4) /* OVS_KEY_ATTR_8021Q */
- + nla_total_size(0) /* OVS_KEY_ATTR_ENCAP */
- + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */
- + nla_total_size(40) /* OVS_KEY_ATTR_IPV6 */
- + nla_total_size(2) /* OVS_KEY_ATTR_ICMPV6 */
- + nla_total_size(28); /* OVS_KEY_ATTR_ND */
-}
-
-static size_t upcall_msg_size(const struct nlattr *userdata,
+static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info,
unsigned int hdrlen)
{
size_t size = NLMSG_ALIGN(sizeof(struct ovs_header))
+ nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */
- + nla_total_size(key_attr_size()); /* OVS_PACKET_ATTR_KEY */
+ + nla_total_size(ovs_key_attr_size()); /* OVS_PACKET_ATTR_KEY */
/* OVS_PACKET_ATTR_USERDATA */
- if (userdata)
- size += NLA_ALIGN(userdata->nla_len);
+ if (upcall_info->userdata)
+ size += NLA_ALIGN(upcall_info->userdata->nla_len);
+
+ /* OVS_PACKET_ATTR_EGRESS_TUN_KEY */
+ if (upcall_info->egress_tun_info)
+ size += nla_total_size(ovs_tun_key_attr_size());
return size;
}
static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
+ const struct sw_flow_key *key,
const struct dp_upcall_info *upcall_info)
{
struct ovs_header *upcall;
@@ -423,11 +425,10 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
if (!nskb)
return -ENOMEM;
- nskb = __vlan_put_tag(nskb, nskb->vlan_proto, vlan_tx_tag_get(nskb));
+ nskb = __vlan_hwaccel_push_inside(nskb);
if (!nskb)
return -ENOMEM;
- nskb->vlan_tci = 0;
skb = nskb;
}
@@ -450,7 +451,7 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
else
hlen = skb->len;
- len = upcall_msg_size(upcall_info->userdata, hlen);
+ len = upcall_msg_size(upcall_info, hlen);
user_skb = genlmsg_new_unicast(len, &info, GFP_ATOMIC);
if (!user_skb) {
err = -ENOMEM;
@@ -462,7 +463,7 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
upcall->dp_ifindex = dp_ifindex;
nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_KEY);
- err = ovs_nla_put_flow(upcall_info->key, upcall_info->key, user_skb);
+ err = ovs_nla_put_flow(key, key, user_skb);
BUG_ON(err);
nla_nest_end(user_skb, nla);
@@ -471,6 +472,14 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
nla_len(upcall_info->userdata),
nla_data(upcall_info->userdata));
+ if (upcall_info->egress_tun_info) {
+ nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_EGRESS_TUN_KEY);
+ err = ovs_nla_put_egress_tunnel_key(user_skb,
+ upcall_info->egress_tun_info);
+ BUG_ON(err);
+ nla_nest_end(user_skb, nla);
+ }
+
/* Only reserve room for attribute header, packet data is added
* in skb_zerocopy() */
if (!(nla = nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, 0))) {
@@ -510,11 +519,13 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
struct sw_flow_actions *acts;
struct sk_buff *packet;
struct sw_flow *flow;
+ struct sw_flow_actions *sf_acts;
struct datapath *dp;
struct ethhdr *eth;
struct vport *input_vport;
int len;
int err;
+ bool log = !a[OVS_FLOW_ATTR_PROBE];
err = -EINVAL;
if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] ||
@@ -548,29 +559,22 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
goto err_kfree_skb;
err = ovs_flow_key_extract_userspace(a[OVS_PACKET_ATTR_KEY], packet,
- &flow->key);
+ &flow->key, log);
if (err)
goto err_flow_free;
- acts = ovs_nla_alloc_flow_actions(nla_len(a[OVS_PACKET_ATTR_ACTIONS]));
- err = PTR_ERR(acts);
- if (IS_ERR(acts))
- goto err_flow_free;
-
err = ovs_nla_copy_actions(a[OVS_PACKET_ATTR_ACTIONS],
- &flow->key, 0, &acts);
+ &flow->key, &acts, log);
if (err)
goto err_flow_free;
rcu_assign_pointer(flow->sf_acts, acts);
-
OVS_CB(packet)->egress_tun_info = NULL;
- OVS_CB(packet)->flow = flow;
packet->priority = flow->key.phy.priority;
packet->mark = flow->key.phy.skb_mark;
rcu_read_lock();
- dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
+ dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
err = -ENODEV;
if (!dp)
goto err_unlock;
@@ -583,9 +587,10 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
goto err_unlock;
OVS_CB(packet)->input_vport = input_vport;
+ sf_acts = rcu_dereference(flow->sf_acts);
local_bh_disable();
- err = ovs_execute_actions(dp, packet, &flow->key);
+ err = ovs_execute_actions(dp, packet, sf_acts, &flow->key);
local_bh_enable();
rcu_read_unlock();
@@ -628,7 +633,7 @@ static struct genl_family dp_packet_genl_family = {
.n_ops = ARRAY_SIZE(dp_packet_genl_ops),
};
-static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats,
+static void get_dp_stats(const struct datapath *dp, struct ovs_dp_stats *stats,
struct ovs_dp_megaflow_stats *mega_stats)
{
int i;
@@ -662,8 +667,8 @@ static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats,
static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts)
{
return NLMSG_ALIGN(sizeof(struct ovs_header))
- + nla_total_size(key_attr_size()) /* OVS_FLOW_ATTR_KEY */
- + nla_total_size(key_attr_size()) /* OVS_FLOW_ATTR_MASK */
+ + nla_total_size(ovs_key_attr_size()) /* OVS_FLOW_ATTR_KEY */
+ + nla_total_size(ovs_key_attr_size()) /* OVS_FLOW_ATTR_MASK */
+ nla_total_size(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */
+ nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */
+ nla_total_size(8) /* OVS_FLOW_ATTR_USED */
@@ -671,58 +676,67 @@ static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts)
}
/* Called with ovs_mutex or RCU read lock. */
-static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,
- struct sk_buff *skb, u32 portid,
- u32 seq, u32 flags, u8 cmd)
+static int ovs_flow_cmd_fill_match(const struct sw_flow *flow,
+ struct sk_buff *skb)
{
- const int skb_orig_len = skb->len;
- struct nlattr *start;
- struct ovs_flow_stats stats;
- __be16 tcp_flags;
- unsigned long used;
- struct ovs_header *ovs_header;
struct nlattr *nla;
int err;
- ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family, flags, cmd);
- if (!ovs_header)
- return -EMSGSIZE;
-
- ovs_header->dp_ifindex = dp_ifindex;
-
/* Fill flow key. */
nla = nla_nest_start(skb, OVS_FLOW_ATTR_KEY);
if (!nla)
- goto nla_put_failure;
+ return -EMSGSIZE;
err = ovs_nla_put_flow(&flow->unmasked_key, &flow->unmasked_key, skb);
if (err)
- goto error;
+ return err;
+
nla_nest_end(skb, nla);
+ /* Fill flow mask. */
nla = nla_nest_start(skb, OVS_FLOW_ATTR_MASK);
if (!nla)
- goto nla_put_failure;
+ return -EMSGSIZE;
err = ovs_nla_put_flow(&flow->key, &flow->mask->key, skb);
if (err)
- goto error;
+ return err;
nla_nest_end(skb, nla);
+ return 0;
+}
+
+/* Called with ovs_mutex or RCU read lock. */
+static int ovs_flow_cmd_fill_stats(const struct sw_flow *flow,
+ struct sk_buff *skb)
+{
+ struct ovs_flow_stats stats;
+ __be16 tcp_flags;
+ unsigned long used;
ovs_flow_stats_get(flow, &stats, &used, &tcp_flags);
if (used &&
nla_put_u64(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used)))
- goto nla_put_failure;
+ return -EMSGSIZE;
if (stats.n_packets &&
nla_put(skb, OVS_FLOW_ATTR_STATS, sizeof(struct ovs_flow_stats), &stats))
- goto nla_put_failure;
+ return -EMSGSIZE;
if ((u8)ntohs(tcp_flags) &&
nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, (u8)ntohs(tcp_flags)))
- goto nla_put_failure;
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+/* Called with ovs_mutex or RCU read lock. */
+static int ovs_flow_cmd_fill_actions(const struct sw_flow *flow,
+ struct sk_buff *skb, int skb_orig_len)
+{
+ struct nlattr *start;
+ int err;
/* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if
* this is the first flow to be dumped into 'skb'. This is unusual for
@@ -746,17 +760,47 @@ static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,
nla_nest_end(skb, start);
else {
if (skb_orig_len)
- goto error;
+ return err;
nla_nest_cancel(skb, start);
}
- } else if (skb_orig_len)
- goto nla_put_failure;
+ } else if (skb_orig_len) {
+ return -EMSGSIZE;
+ }
+
+ return 0;
+}
+
+/* Called with ovs_mutex or RCU read lock. */
+static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,
+ struct sk_buff *skb, u32 portid,
+ u32 seq, u32 flags, u8 cmd)
+{
+ const int skb_orig_len = skb->len;
+ struct ovs_header *ovs_header;
+ int err;
+
+ ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family,
+ flags, cmd);
+ if (!ovs_header)
+ return -EMSGSIZE;
+
+ ovs_header->dp_ifindex = dp_ifindex;
+
+ err = ovs_flow_cmd_fill_match(flow, skb);
+ if (err)
+ goto error;
+
+ err = ovs_flow_cmd_fill_stats(flow, skb);
+ if (err)
+ goto error;
+
+ err = ovs_flow_cmd_fill_actions(flow, skb, skb_orig_len);
+ if (err)
+ goto error;
return genlmsg_end(skb, ovs_header);
-nla_put_failure:
- err = -EMSGSIZE;
error:
genlmsg_cancel(skb, ovs_header);
return err;
@@ -811,13 +855,18 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
struct sw_flow_actions *acts;
struct sw_flow_match match;
int error;
+ bool log = !a[OVS_FLOW_ATTR_PROBE];
/* Must have key and actions. */
error = -EINVAL;
- if (!a[OVS_FLOW_ATTR_KEY])
+ if (!a[OVS_FLOW_ATTR_KEY]) {
+ OVS_NLERR(log, "Flow key attr not present in new flow.");
goto error;
- if (!a[OVS_FLOW_ATTR_ACTIONS])
+ }
+ if (!a[OVS_FLOW_ATTR_ACTIONS]) {
+ OVS_NLERR(log, "Flow actions attr not present in new flow.");
goto error;
+ }
/* Most of the time we need to allocate a new flow, do it before
* locking.
@@ -830,24 +879,19 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
/* Extract key. */
ovs_match_init(&match, &new_flow->unmasked_key, &mask);
- error = ovs_nla_get_match(&match,
- a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]);
+ error = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY],
+ a[OVS_FLOW_ATTR_MASK], log);
if (error)
goto err_kfree_flow;
ovs_flow_mask_key(&new_flow->key, &new_flow->unmasked_key, &mask);
/* Validate actions. */
- acts = ovs_nla_alloc_flow_actions(nla_len(a[OVS_FLOW_ATTR_ACTIONS]));
- error = PTR_ERR(acts);
- if (IS_ERR(acts))
- goto err_kfree_flow;
-
error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &new_flow->key,
- 0, &acts);
+ &acts, log);
if (error) {
- OVS_NLERR("Flow actions may not be safe on all matching packets.\n");
- goto err_kfree_acts;
+ OVS_NLERR(log, "Flow actions may not be safe on all matching packets.");
+ goto err_kfree_flow;
}
reply = ovs_flow_cmd_alloc_info(acts, info, false);
@@ -899,6 +943,7 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
}
/* The unmasked key has to be the same for flow updates. */
if (unlikely(!ovs_flow_cmp_unmasked_key(flow, &match))) {
+ /* Look for any overlapping flow. */
flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
if (!flow) {
error = -ENOENT;
@@ -938,23 +983,21 @@ error:
return error;
}
+/* Factor out action copy to avoid "Wframe-larger-than=1024" warning. */
static struct sw_flow_actions *get_flow_actions(const struct nlattr *a,
const struct sw_flow_key *key,
- const struct sw_flow_mask *mask)
+ const struct sw_flow_mask *mask,
+ bool log)
{
struct sw_flow_actions *acts;
struct sw_flow_key masked_key;
int error;
- acts = ovs_nla_alloc_flow_actions(nla_len(a));
- if (IS_ERR(acts))
- return acts;
-
ovs_flow_mask_key(&masked_key, key, mask);
- error = ovs_nla_copy_actions(a, &masked_key, 0, &acts);
+ error = ovs_nla_copy_actions(a, &masked_key, &acts, log);
if (error) {
- OVS_NLERR("Flow actions may not be safe on all matching packets.\n");
- kfree(acts);
+ OVS_NLERR(log,
+ "Actions may not be safe on all matching packets");
return ERR_PTR(error);
}
@@ -973,29 +1016,31 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
struct sw_flow_actions *old_acts = NULL, *acts = NULL;
struct sw_flow_match match;
int error;
+ bool log = !a[OVS_FLOW_ATTR_PROBE];
/* Extract key. */
error = -EINVAL;
- if (!a[OVS_FLOW_ATTR_KEY])
+ if (!a[OVS_FLOW_ATTR_KEY]) {
+ OVS_NLERR(log, "Flow key attribute not present in set flow.");
goto error;
+ }
ovs_match_init(&match, &key, &mask);
- error = ovs_nla_get_match(&match,
- a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]);
+ error = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY],
+ a[OVS_FLOW_ATTR_MASK], log);
if (error)
goto error;
/* Validate actions. */
if (a[OVS_FLOW_ATTR_ACTIONS]) {
- acts = get_flow_actions(a[OVS_FLOW_ATTR_ACTIONS], &key, &mask);
+ acts = get_flow_actions(a[OVS_FLOW_ATTR_ACTIONS], &key, &mask,
+ log);
if (IS_ERR(acts)) {
error = PTR_ERR(acts);
goto error;
}
- }
- /* Can allocate before locking if have acts. */
- if (acts) {
+ /* Can allocate before locking if have acts. */
reply = ovs_flow_cmd_alloc_info(acts, info, false);
if (IS_ERR(reply)) {
error = PTR_ERR(reply);
@@ -1070,14 +1115,16 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
struct datapath *dp;
struct sw_flow_match match;
int err;
+ bool log = !a[OVS_FLOW_ATTR_PROBE];
if (!a[OVS_FLOW_ATTR_KEY]) {
- OVS_NLERR("Flow get message rejected, Key attribute missing.\n");
+ OVS_NLERR(log,
+ "Flow get message rejected, Key attribute missing.");
return -EINVAL;
}
ovs_match_init(&match, &key, NULL);
- err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL);
+ err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL, log);
if (err)
return err;
@@ -1118,10 +1165,12 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
struct datapath *dp;
struct sw_flow_match match;
int err;
+ bool log = !a[OVS_FLOW_ATTR_PROBE];
if (likely(a[OVS_FLOW_ATTR_KEY])) {
ovs_match_init(&match, &key, NULL);
- err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL);
+ err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL,
+ log);
if (unlikely(err))
return err;
}
@@ -1179,7 +1228,7 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
struct datapath *dp;
rcu_read_lock();
- dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
+ dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
if (!dp) {
rcu_read_unlock();
return -ENODEV;
@@ -1211,8 +1260,10 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = {
[OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED },
+ [OVS_FLOW_ATTR_MASK] = { .type = NLA_NESTED },
[OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
[OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
+ [OVS_FLOW_ATTR_PROBE] = { .type = NLA_FLAG },
};
static const struct genl_ops dp_flow_genl_ops[] = {
@@ -1313,7 +1364,7 @@ static struct sk_buff *ovs_dp_cmd_alloc_info(struct genl_info *info)
/* Called with rcu_read_lock or ovs_mutex. */
static struct datapath *lookup_datapath(struct net *net,
- struct ovs_header *ovs_header,
+ const struct ovs_header *ovs_header,
struct nlattr *a[OVS_DP_ATTR_MAX + 1])
{
struct datapath *dp;
@@ -1341,7 +1392,7 @@ static void ovs_dp_reset_user_features(struct sk_buff *skb, struct genl_info *in
dp->user_features = 0;
}
-static void ovs_dp_change(struct datapath *dp, struct nlattr **a)
+static void ovs_dp_change(struct datapath *dp, struct nlattr *a[])
{
if (a[OVS_DP_ATTR_USER_FEATURES])
dp->user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]);
@@ -1442,7 +1493,7 @@ err_destroy_ports_array:
err_destroy_percpu:
free_percpu(dp->stats_percpu);
err_destroy_table:
- ovs_flow_tbl_destroy(&dp->table, false);
+ ovs_flow_tbl_destroy(&dp->table);
err_free_dp:
release_net(ovs_dp_get_net(dp));
kfree(dp);
@@ -1474,8 +1525,6 @@ static void __dp_destroy(struct datapath *dp)
ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));
/* RCU destroy the flow table */
- ovs_flow_tbl_destroy(&dp->table, true);
-
call_rcu(&dp->rcu, destroy_dp_rcu);
}
@@ -1707,7 +1756,7 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid,
/* Called with ovs_mutex or RCU read lock. */
static struct vport *lookup_vport(struct net *net,
- struct ovs_header *ovs_header,
+ const struct ovs_header *ovs_header,
struct nlattr *a[OVS_VPORT_ATTR_MAX + 1])
{
struct datapath *dp;
@@ -1764,6 +1813,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
return -ENOMEM;
ovs_lock();
+restart:
dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
err = -ENODEV;
if (!dp)
@@ -1795,8 +1845,11 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
vport = new_vport(&parms);
err = PTR_ERR(vport);
- if (IS_ERR(vport))
+ if (IS_ERR(vport)) {
+ if (err == -EAGAIN)
+ goto restart;
goto exit_unlock_free;
+ }
err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
info->snd_seq, 0, OVS_VPORT_CMD_NEW);
@@ -1939,7 +1992,7 @@ static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
int i, j = 0;
rcu_read_lock();
- dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
+ dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
if (!dp) {
rcu_read_unlock();
return -ENODEV;
@@ -2112,12 +2165,18 @@ static int __init dp_init(void)
if (err)
goto error_netns_exit;
+ err = ovs_netdev_init();
+ if (err)
+ goto error_unreg_notifier;
+
err = dp_register_genl();
if (err < 0)
- goto error_unreg_notifier;
+ goto error_unreg_netdev;
return 0;
+error_unreg_netdev:
+ ovs_netdev_exit();
error_unreg_notifier:
unregister_netdevice_notifier(&ovs_dp_device_notifier);
error_netns_exit:
@@ -2137,6 +2196,7 @@ error:
static void dp_cleanup(void)
{
dp_unregister_genl(ARRAY_SIZE(dp_genl_families));
+ ovs_netdev_exit();
unregister_netdevice_notifier(&ovs_dp_device_notifier);
unregister_pernet_device(&ovs_net_ops);
rcu_barrier();
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index 9741354..3ece945 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -94,14 +94,12 @@ struct datapath {
/**
* struct ovs_skb_cb - OVS data in skb CB
- * @flow: The flow associated with this packet. May be %NULL if no flow.
* @egress_tun_key: Tunnel information about this packet on egress path.
* NULL if the packet is not being tunneled.
* @input_vport: The original vport packet came in on. This value is cached
* when a packet is received by OVS.
*/
struct ovs_skb_cb {
- struct sw_flow *flow;
struct ovs_tunnel_info *egress_tun_info;
struct vport *input_vport;
};
@@ -110,18 +108,18 @@ struct ovs_skb_cb {
/**
* struct dp_upcall - metadata to include with a packet to send to userspace
* @cmd: One of %OVS_PACKET_CMD_*.
- * @key: Becomes %OVS_PACKET_ATTR_KEY. Must be nonnull.
* @userdata: If nonnull, its variable-length value is passed to userspace as
* %OVS_PACKET_ATTR_USERDATA.
- * @pid: Netlink PID to which packet should be sent. If @pid is 0 then no
- * packet is sent and the packet is accounted in the datapath's @n_lost
+ * @portid: Netlink portid to which packet should be sent. If @portid is 0
+ * then no packet is sent and the packet is accounted in the datapath's @n_lost
* counter.
+ * @egress_tun_info: If nonnull, becomes %OVS_PACKET_ATTR_EGRESS_TUN_KEY.
*/
struct dp_upcall_info {
- u8 cmd;
- const struct sw_flow_key *key;
+ const struct ovs_tunnel_info *egress_tun_info;
const struct nlattr *userdata;
u32 portid;
+ u8 cmd;
};
/**
@@ -151,7 +149,7 @@ int lockdep_ovsl_is_held(void);
#define rcu_dereference_ovsl(p) \
rcu_dereference_check(p, lockdep_ovsl_is_held())
-static inline struct net *ovs_dp_get_net(struct datapath *dp)
+static inline struct net *ovs_dp_get_net(const struct datapath *dp)
{
return read_pnet(&dp->net);
}
@@ -187,23 +185,23 @@ extern struct genl_family dp_vport_genl_family;
void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key);
void ovs_dp_detach_port(struct vport *);
int ovs_dp_upcall(struct datapath *, struct sk_buff *,
- const struct dp_upcall_info *);
+ const struct sw_flow_key *, const struct dp_upcall_info *);
const char *ovs_dp_name(const struct datapath *dp);
struct sk_buff *ovs_vport_cmd_build_info(struct vport *, u32 pid, u32 seq,
u8 cmd);
int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb,
- struct sw_flow_key *);
+ const struct sw_flow_actions *, struct sw_flow_key *);
void ovs_dp_notify_wq(struct work_struct *work);
int action_fifos_init(void);
void action_fifos_exit(void);
-#define OVS_NLERR(fmt, ...) \
+#define OVS_NLERR(logging_allowed, fmt, ...) \
do { \
- if (net_ratelimit()) \
- pr_info("netlink: " fmt, ##__VA_ARGS__); \
+ if (logging_allowed && net_ratelimit()) \
+ pr_info("netlink: " fmt "\n", ##__VA_ARGS__); \
} while (0)
#endif /* datapath.h */
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 2b78789..70bef2a 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -32,6 +32,7 @@
#include <linux/if_arp.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
+#include <linux/mpls.h>
#include <linux/sctp.h>
#include <linux/smp.h>
#include <linux/tcp.h>
@@ -42,6 +43,7 @@
#include <net/ip.h>
#include <net/ip_tunnels.h>
#include <net/ipv6.h>
+#include <net/mpls.h>
#include <net/ndisc.h>
#include "datapath.h"
@@ -64,7 +66,7 @@ u64 ovs_flow_used_time(unsigned long flow_jiffies)
#define TCP_FLAGS_BE16(tp) (*(__be16 *)&tcp_flag_word(tp) & htons(0x0FFF))
void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags,
- struct sk_buff *skb)
+ const struct sk_buff *skb)
{
struct flow_stats *stats;
int node = numa_node_id();
@@ -480,6 +482,7 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
return -ENOMEM;
skb_reset_network_header(skb);
+ skb_reset_mac_len(skb);
__skb_push(skb, skb->data - skb_mac_header(skb));
/* Network layer. */
@@ -584,6 +587,33 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
memset(&key->ip, 0, sizeof(key->ip));
memset(&key->ipv4, 0, sizeof(key->ipv4));
}
+ } else if (eth_p_mpls(key->eth.type)) {
+ size_t stack_len = MPLS_HLEN;
+
+ /* In the presence of an MPLS label stack the end of the L2
+ * header and the beginning of the L3 header differ.
+ *
+ * Advance network_header to the beginning of the L3
+ * header. mac_len corresponds to the end of the L2 header.
+ */
+ while (1) {
+ __be32 lse;
+
+ error = check_header(skb, skb->mac_len + stack_len);
+ if (unlikely(error))
+ return 0;
+
+ memcpy(&lse, skb_network_header(skb), MPLS_HLEN);
+
+ if (stack_len == MPLS_HLEN)
+ memcpy(&key->mpls.top_lse, &lse, MPLS_HLEN);
+
+ skb_set_network_header(skb, skb->mac_len + stack_len);
+ if (lse & htonl(MPLS_LS_S_MASK))
+ break;
+
+ stack_len += MPLS_HLEN;
+ }
} else if (key->eth.type == htons(ETH_P_IPV6)) {
int nh_len; /* IPv6 Header + Extensions */
@@ -649,7 +679,7 @@ int ovs_flow_key_update(struct sk_buff *skb, struct sw_flow_key *key)
return key_extract(skb, key);
}
-int ovs_flow_key_extract(struct ovs_tunnel_info *tun_info,
+int ovs_flow_key_extract(const struct ovs_tunnel_info *tun_info,
struct sk_buff *skb, struct sw_flow_key *key)
{
/* Extract metadata from packet. */
@@ -682,12 +712,12 @@ int ovs_flow_key_extract(struct ovs_tunnel_info *tun_info,
int ovs_flow_key_extract_userspace(const struct nlattr *attr,
struct sk_buff *skb,
- struct sw_flow_key *key)
+ struct sw_flow_key *key, bool log)
{
int err;
/* Extract metadata from netlink attributes. */
- err = ovs_nla_get_flow_metadata(attr, key);
+ err = ovs_nla_get_flow_metadata(attr, key, log);
if (err)
return err;
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index 7181331..a8b30f3 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -37,8 +37,8 @@ struct sk_buff;
/* Used to memset ovs_key_ipv4_tunnel padding. */
#define OVS_TUNNEL_KEY_SIZE \
- (offsetof(struct ovs_key_ipv4_tunnel, ipv4_ttl) + \
- FIELD_SIZEOF(struct ovs_key_ipv4_tunnel, ipv4_ttl))
+ (offsetof(struct ovs_key_ipv4_tunnel, tp_dst) + \
+ FIELD_SIZEOF(struct ovs_key_ipv4_tunnel, tp_dst))
struct ovs_key_ipv4_tunnel {
__be64 tun_id;
@@ -47,11 +47,13 @@ struct ovs_key_ipv4_tunnel {
__be16 tun_flags;
u8 ipv4_tos;
u8 ipv4_ttl;
+ __be16 tp_src;
+ __be16 tp_dst;
} __packed __aligned(4); /* Minimize padding. */
struct ovs_tunnel_info {
struct ovs_key_ipv4_tunnel tunnel;
- struct geneve_opt *options;
+ const struct geneve_opt *options;
u8 options_len;
};
@@ -64,27 +66,59 @@ struct ovs_tunnel_info {
FIELD_SIZEOF(struct sw_flow_key, tun_opts) - \
opt_len))
-static inline void ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info,
- const struct iphdr *iph,
- __be64 tun_id, __be16 tun_flags,
- struct geneve_opt *opts,
- u8 opts_len)
+static inline void __ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info,
+ __be32 saddr, __be32 daddr,
+ u8 tos, u8 ttl,
+ __be16 tp_src,
+ __be16 tp_dst,
+ __be64 tun_id,
+ __be16 tun_flags,
+ const struct geneve_opt *opts,
+ u8 opts_len)
{
tun_info->tunnel.tun_id = tun_id;
- tun_info->tunnel.ipv4_src = iph->saddr;
- tun_info->tunnel.ipv4_dst = iph->daddr;
- tun_info->tunnel.ipv4_tos = iph->tos;
- tun_info->tunnel.ipv4_ttl = iph->ttl;
+ tun_info->tunnel.ipv4_src = saddr;
+ tun_info->tunnel.ipv4_dst = daddr;
+ tun_info->tunnel.ipv4_tos = tos;
+ tun_info->tunnel.ipv4_ttl = ttl;
tun_info->tunnel.tun_flags = tun_flags;
- /* clear struct padding. */
- memset((unsigned char *)&tun_info->tunnel + OVS_TUNNEL_KEY_SIZE, 0,
- sizeof(tun_info->tunnel) - OVS_TUNNEL_KEY_SIZE);
+ /* For the tunnel types on the top of IPsec, the tp_src and tp_dst of
+ * the upper tunnel are used.
+ * E.g: GRE over IPSEC, the tp_src and tp_port are zero.
+ */
+ tun_info->tunnel.tp_src = tp_src;
+ tun_info->tunnel.tp_dst = tp_dst;
+
+ /* Clear struct padding. */
+ if (sizeof(tun_info->tunnel) != OVS_TUNNEL_KEY_SIZE)
+ memset((unsigned char *)&tun_info->tunnel + OVS_TUNNEL_KEY_SIZE,
+ 0, sizeof(tun_info->tunnel) - OVS_TUNNEL_KEY_SIZE);
tun_info->options = opts;
tun_info->options_len = opts_len;
}
+static inline void ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info,
+ const struct iphdr *iph,
+ __be16 tp_src,
+ __be16 tp_dst,
+ __be64 tun_id,
+ __be16 tun_flags,
+ const struct geneve_opt *opts,
+ u8 opts_len)
+{
+ __ovs_flow_tun_info_init(tun_info, iph->saddr, iph->daddr,
+ iph->tos, iph->ttl,
+ tp_src, tp_dst,
+ tun_id, tun_flags,
+ opts, opts_len);
+}
+
+#define OVS_SW_FLOW_KEY_METADATA_SIZE \
+ (offsetof(struct sw_flow_key, recirc_id) + \
+ FIELD_SIZEOF(struct sw_flow_key, recirc_id))
+
struct sw_flow_key {
u8 tun_opts[255];
u8 tun_opts_len;
@@ -102,12 +136,17 @@ struct sw_flow_key {
__be16 tci; /* 0 if no VLAN, VLAN_TAG_PRESENT set otherwise. */
__be16 type; /* Ethernet frame type. */
} eth;
- struct {
- u8 proto; /* IP protocol or lower 8 bits of ARP opcode. */
- u8 tos; /* IP ToS. */
- u8 ttl; /* IP TTL/hop limit. */
- u8 frag; /* One of OVS_FRAG_TYPE_*. */
- } ip;
+ union {
+ struct {
+ __be32 top_lse; /* top label stack entry */
+ } mpls;
+ struct {
+ u8 proto; /* IP protocol or lower 8 bits of ARP opcode. */
+ u8 tos; /* IP ToS. */
+ u8 ttl; /* IP TTL/hop limit. */
+ u8 frag; /* One of OVS_FRAG_TYPE_*. */
+ } ip;
+ };
struct {
__be16 src; /* TCP/UDP/SCTP source port. */
__be16 dst; /* TCP/UDP/SCTP destination port. */
@@ -205,18 +244,19 @@ struct arp_eth_header {
} __packed;
void ovs_flow_stats_update(struct sw_flow *, __be16 tcp_flags,
- struct sk_buff *);
+ const struct sk_buff *);
void ovs_flow_stats_get(const struct sw_flow *, struct ovs_flow_stats *,
unsigned long *used, __be16 *tcp_flags);
void ovs_flow_stats_clear(struct sw_flow *);
u64 ovs_flow_used_time(unsigned long flow_jiffies);
int ovs_flow_key_update(struct sk_buff *skb, struct sw_flow_key *key);
-int ovs_flow_key_extract(struct ovs_tunnel_info *tun_info, struct sk_buff *skb,
+int ovs_flow_key_extract(const struct ovs_tunnel_info *tun_info,
+ struct sk_buff *skb,
struct sw_flow_key *key);
/* Extract key from packet coming from userspace. */
int ovs_flow_key_extract_userspace(const struct nlattr *attr,
struct sk_buff *skb,
- struct sw_flow_key *key);
+ struct sw_flow_key *key, bool log);
#endif /* flow.h */
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 918e966..9645a21 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -46,24 +46,22 @@
#include <net/ip.h>
#include <net/ipv6.h>
#include <net/ndisc.h>
+#include <net/mpls.h>
#include "flow_netlink.h"
-static void update_range__(struct sw_flow_match *match,
- size_t offset, size_t size, bool is_mask)
+static void update_range(struct sw_flow_match *match,
+ size_t offset, size_t size, bool is_mask)
{
- struct sw_flow_key_range *range = NULL;
+ struct sw_flow_key_range *range;
size_t start = rounddown(offset, sizeof(long));
size_t end = roundup(offset + size, sizeof(long));
if (!is_mask)
range = &match->range;
- else if (match->mask)
+ else
range = &match->mask->range;
- if (!range)
- return;
-
if (range->start == range->end) {
range->start = start;
range->end = end;
@@ -79,22 +77,20 @@ static void update_range__(struct sw_flow_match *match,
#define SW_FLOW_KEY_PUT(match, field, value, is_mask) \
do { \
- update_range__(match, offsetof(struct sw_flow_key, field), \
- sizeof((match)->key->field), is_mask); \
- if (is_mask) { \
- if ((match)->mask) \
- (match)->mask->key.field = value; \
- } else { \
+ update_range(match, offsetof(struct sw_flow_key, field), \
+ sizeof((match)->key->field), is_mask); \
+ if (is_mask) \
+ (match)->mask->key.field = value; \
+ else \
(match)->key->field = value; \
- } \
} while (0)
#define SW_FLOW_KEY_MEMCPY_OFFSET(match, offset, value_p, len, is_mask) \
do { \
- update_range__(match, offset, len, is_mask); \
+ update_range(match, offset, len, is_mask); \
if (is_mask) \
memcpy((u8 *)&(match)->mask->key + offset, value_p, \
- len); \
+ len); \
else \
memcpy((u8 *)(match)->key + offset, value_p, len); \
} while (0)
@@ -103,22 +99,20 @@ static void update_range__(struct sw_flow_match *match,
SW_FLOW_KEY_MEMCPY_OFFSET(match, offsetof(struct sw_flow_key, field), \
value_p, len, is_mask)
-#define SW_FLOW_KEY_MEMSET_FIELD(match, field, value, is_mask) \
- do { \
- update_range__(match, offsetof(struct sw_flow_key, field), \
- sizeof((match)->key->field), is_mask); \
- if (is_mask) { \
- if ((match)->mask) \
- memset((u8 *)&(match)->mask->key.field, value,\
- sizeof((match)->mask->key.field)); \
- } else { \
+#define SW_FLOW_KEY_MEMSET_FIELD(match, field, value, is_mask) \
+ do { \
+ update_range(match, offsetof(struct sw_flow_key, field), \
+ sizeof((match)->key->field), is_mask); \
+ if (is_mask) \
+ memset((u8 *)&(match)->mask->key.field, value, \
+ sizeof((match)->mask->key.field)); \
+ else \
memset((u8 *)&(match)->key->field, value, \
sizeof((match)->key->field)); \
- } \
} while (0)
static bool match_validate(const struct sw_flow_match *match,
- u64 key_attrs, u64 mask_attrs)
+ u64 key_attrs, u64 mask_attrs, bool log)
{
u64 key_expected = 1 << OVS_KEY_ATTR_ETHERNET;
u64 mask_allowed = key_attrs; /* At most allow all key attributes */
@@ -134,7 +128,8 @@ static bool match_validate(const struct sw_flow_match *match,
| (1 << OVS_KEY_ATTR_ICMP)
| (1 << OVS_KEY_ATTR_ICMPV6)
| (1 << OVS_KEY_ATTR_ARP)
- | (1 << OVS_KEY_ATTR_ND));
+ | (1 << OVS_KEY_ATTR_ND)
+ | (1 << OVS_KEY_ATTR_MPLS));
/* Always allowed mask fields. */
mask_allowed |= ((1 << OVS_KEY_ATTR_TUNNEL)
@@ -149,6 +144,12 @@ static bool match_validate(const struct sw_flow_match *match,
mask_allowed |= 1 << OVS_KEY_ATTR_ARP;
}
+ if (eth_p_mpls(match->key->eth.type)) {
+ key_expected |= 1 << OVS_KEY_ATTR_MPLS;
+ if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
+ mask_allowed |= 1 << OVS_KEY_ATTR_MPLS;
+ }
+
if (match->key->eth.type == htons(ETH_P_IP)) {
key_expected |= 1 << OVS_KEY_ATTR_IPV4;
if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
@@ -229,21 +230,65 @@ static bool match_validate(const struct sw_flow_match *match,
if ((key_attrs & key_expected) != key_expected) {
/* Key attributes check failed. */
- OVS_NLERR("Missing expected key attributes (key_attrs=%llx, expected=%llx).\n",
- (unsigned long long)key_attrs, (unsigned long long)key_expected);
+ OVS_NLERR(log, "Missing key (keys=%llx, expected=%llx)",
+ (unsigned long long)key_attrs,
+ (unsigned long long)key_expected);
return false;
}
if ((mask_attrs & mask_allowed) != mask_attrs) {
/* Mask attributes check failed. */
- OVS_NLERR("Contain more than allowed mask fields (mask_attrs=%llx, mask_allowed=%llx).\n",
- (unsigned long long)mask_attrs, (unsigned long long)mask_allowed);
+ OVS_NLERR(log, "Unexpected mask (mask=%llx, allowed=%llx)",
+ (unsigned long long)mask_attrs,
+ (unsigned long long)mask_allowed);
return false;
}
return true;
}
+size_t ovs_tun_key_attr_size(void)
+{
+ /* Whenever adding new OVS_TUNNEL_KEY_ FIELDS, we should consider
+ * updating this function.
+ */
+ return nla_total_size(8) /* OVS_TUNNEL_KEY_ATTR_ID */
+ + nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_SRC */
+ + nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_DST */
+ + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TOS */
+ + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TTL */
+ + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */
+ + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_CSUM */
+ + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_OAM */
+ + nla_total_size(256) /* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS */
+ + nla_total_size(2) /* OVS_TUNNEL_KEY_ATTR_TP_SRC */
+ + nla_total_size(2); /* OVS_TUNNEL_KEY_ATTR_TP_DST */
+}
+
+size_t ovs_key_attr_size(void)
+{
+ /* Whenever adding new OVS_KEY_ FIELDS, we should consider
+ * updating this function.
+ */
+ BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 22);
+
+ return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */
+ + nla_total_size(0) /* OVS_KEY_ATTR_TUNNEL */
+ + ovs_tun_key_attr_size()
+ + nla_total_size(4) /* OVS_KEY_ATTR_IN_PORT */
+ + nla_total_size(4) /* OVS_KEY_ATTR_SKB_MARK */
+ + nla_total_size(4) /* OVS_KEY_ATTR_DP_HASH */
+ + nla_total_size(4) /* OVS_KEY_ATTR_RECIRC_ID */
+ + nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */
+ + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */
+ + nla_total_size(4) /* OVS_KEY_ATTR_VLAN */
+ + nla_total_size(0) /* OVS_KEY_ATTR_ENCAP */
+ + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */
+ + nla_total_size(40) /* OVS_KEY_ATTR_IPV6 */
+ + nla_total_size(2) /* OVS_KEY_ATTR_ICMPV6 */
+ + nla_total_size(28); /* OVS_KEY_ATTR_ND */
+}
+
/* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */
static const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
[OVS_KEY_ATTR_ENCAP] = -1,
@@ -266,6 +311,7 @@ static const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
[OVS_KEY_ATTR_RECIRC_ID] = sizeof(u32),
[OVS_KEY_ATTR_DP_HASH] = sizeof(u32),
[OVS_KEY_ATTR_TUNNEL] = -1,
+ [OVS_KEY_ATTR_MPLS] = sizeof(struct ovs_key_mpls),
};
static bool is_all_zero(const u8 *fp, size_t size)
@@ -284,7 +330,7 @@ static bool is_all_zero(const u8 *fp, size_t size)
static int __parse_flow_nlattrs(const struct nlattr *attr,
const struct nlattr *a[],
- u64 *attrsp, bool nz)
+ u64 *attrsp, bool log, bool nz)
{
const struct nlattr *nla;
u64 attrs;
@@ -296,21 +342,20 @@ static int __parse_flow_nlattrs(const struct nlattr *attr,
int expected_len;
if (type > OVS_KEY_ATTR_MAX) {
- OVS_NLERR("Unknown key attribute (type=%d, max=%d).\n",
+ OVS_NLERR(log, "Key type %d is out of range max %d",
type, OVS_KEY_ATTR_MAX);
return -EINVAL;
}
if (attrs & (1 << type)) {
- OVS_NLERR("Duplicate key attribute (type %d).\n", type);
+ OVS_NLERR(log, "Duplicate key (type %d).", type);
return -EINVAL;
}
expected_len = ovs_key_lens[type];
if (nla_len(nla) != expected_len && expected_len != -1) {
- OVS_NLERR("Key attribute has unexpected length (type=%d"
- ", length=%d, expected=%d).\n", type,
- nla_len(nla), expected_len);
+ OVS_NLERR(log, "Key %d has unexpected len %d expected %d",
+ type, nla_len(nla), expected_len);
return -EINVAL;
}
@@ -320,7 +365,7 @@ static int __parse_flow_nlattrs(const struct nlattr *attr,
}
}
if (rem) {
- OVS_NLERR("Message has %d unknown bytes.\n", rem);
+ OVS_NLERR(log, "Message has %d unknown bytes.", rem);
return -EINVAL;
}
@@ -329,28 +374,84 @@ static int __parse_flow_nlattrs(const struct nlattr *attr,
}
static int parse_flow_mask_nlattrs(const struct nlattr *attr,
- const struct nlattr *a[], u64 *attrsp)
+ const struct nlattr *a[], u64 *attrsp,
+ bool log)
{
- return __parse_flow_nlattrs(attr, a, attrsp, true);
+ return __parse_flow_nlattrs(attr, a, attrsp, log, true);
}
static int parse_flow_nlattrs(const struct nlattr *attr,
- const struct nlattr *a[], u64 *attrsp)
+ const struct nlattr *a[], u64 *attrsp,
+ bool log)
{
- return __parse_flow_nlattrs(attr, a, attrsp, false);
+ return __parse_flow_nlattrs(attr, a, attrsp, log, false);
+}
+
+static int genev_tun_opt_from_nlattr(const struct nlattr *a,
+ struct sw_flow_match *match, bool is_mask,
+ bool log)
+{
+ unsigned long opt_key_offset;
+
+ if (nla_len(a) > sizeof(match->key->tun_opts)) {
+ OVS_NLERR(log, "Geneve option length err (len %d, max %zu).",
+ nla_len(a), sizeof(match->key->tun_opts));
+ return -EINVAL;
+ }
+
+ if (nla_len(a) % 4 != 0) {
+ OVS_NLERR(log, "Geneve opt len %d is not a multiple of 4.",
+ nla_len(a));
+ return -EINVAL;
+ }
+
+ /* We need to record the length of the options passed
+ * down, otherwise packets with the same format but
+ * additional options will be silently matched.
+ */
+ if (!is_mask) {
+ SW_FLOW_KEY_PUT(match, tun_opts_len, nla_len(a),
+ false);
+ } else {
+ /* This is somewhat unusual because it looks at
+ * both the key and mask while parsing the
+ * attributes (and by extension assumes the key
+ * is parsed first). Normally, we would verify
+ * that each is the correct length and that the
+ * attributes line up in the validate function.
+ * However, that is difficult because this is
+ * variable length and we won't have the
+ * information later.
+ */
+ if (match->key->tun_opts_len != nla_len(a)) {
+ OVS_NLERR(log, "Geneve option len %d != mask len %d",
+ match->key->tun_opts_len, nla_len(a));
+ return -EINVAL;
+ }
+
+ SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true);
+ }
+
+ opt_key_offset = (unsigned long)GENEVE_OPTS((struct sw_flow_key *)0,
+ nla_len(a));
+ SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, nla_data(a),
+ nla_len(a), is_mask);
+ return 0;
}
static int ipv4_tun_from_nlattr(const struct nlattr *attr,
- struct sw_flow_match *match, bool is_mask)
+ struct sw_flow_match *match, bool is_mask,
+ bool log)
{
struct nlattr *a;
int rem;
bool ttl = false;
__be16 tun_flags = 0;
- unsigned long opt_key_offset;
nla_for_each_nested(a, attr, rem) {
int type = nla_type(a);
+ int err;
+
static const u32 ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = {
[OVS_TUNNEL_KEY_ATTR_ID] = sizeof(u64),
[OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = sizeof(u32),
@@ -359,20 +460,21 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
[OVS_TUNNEL_KEY_ATTR_TTL] = 1,
[OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = 0,
[OVS_TUNNEL_KEY_ATTR_CSUM] = 0,
+ [OVS_TUNNEL_KEY_ATTR_TP_SRC] = sizeof(u16),
+ [OVS_TUNNEL_KEY_ATTR_TP_DST] = sizeof(u16),
[OVS_TUNNEL_KEY_ATTR_OAM] = 0,
[OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS] = -1,
};
if (type > OVS_TUNNEL_KEY_ATTR_MAX) {
- OVS_NLERR("Unknown IPv4 tunnel attribute (type=%d, max=%d).\n",
- type, OVS_TUNNEL_KEY_ATTR_MAX);
+ OVS_NLERR(log, "Tunnel attr %d out of range max %d",
+ type, OVS_TUNNEL_KEY_ATTR_MAX);
return -EINVAL;
}
if (ovs_tunnel_key_lens[type] != nla_len(a) &&
ovs_tunnel_key_lens[type] != -1) {
- OVS_NLERR("IPv4 tunnel attribute type has unexpected "
- " length (type=%d, length=%d, expected=%d).\n",
+ OVS_NLERR(log, "Tunnel attr %d has unexpected len %d expected %d",
type, nla_len(a), ovs_tunnel_key_lens[type]);
return -EINVAL;
}
@@ -406,62 +508,26 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
case OVS_TUNNEL_KEY_ATTR_CSUM:
tun_flags |= TUNNEL_CSUM;
break;
+ case OVS_TUNNEL_KEY_ATTR_TP_SRC:
+ SW_FLOW_KEY_PUT(match, tun_key.tp_src,
+ nla_get_be16(a), is_mask);
+ break;
+ case OVS_TUNNEL_KEY_ATTR_TP_DST:
+ SW_FLOW_KEY_PUT(match, tun_key.tp_dst,
+ nla_get_be16(a), is_mask);
+ break;
case OVS_TUNNEL_KEY_ATTR_OAM:
tun_flags |= TUNNEL_OAM;
break;
case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS:
- tun_flags |= TUNNEL_OPTIONS_PRESENT;
- if (nla_len(a) > sizeof(match->key->tun_opts)) {
- OVS_NLERR("Geneve option length exceeds maximum size (len %d, max %zu).\n",
- nla_len(a),
- sizeof(match->key->tun_opts));
- return -EINVAL;
- }
-
- if (nla_len(a) % 4 != 0) {
- OVS_NLERR("Geneve option length is not a multiple of 4 (len %d).\n",
- nla_len(a));
- return -EINVAL;
- }
-
- /* We need to record the length of the options passed
- * down, otherwise packets with the same format but
- * additional options will be silently matched.
- */
- if (!is_mask) {
- SW_FLOW_KEY_PUT(match, tun_opts_len, nla_len(a),
- false);
- } else {
- /* This is somewhat unusual because it looks at
- * both the key and mask while parsing the
- * attributes (and by extension assumes the key
- * is parsed first). Normally, we would verify
- * that each is the correct length and that the
- * attributes line up in the validate function.
- * However, that is difficult because this is
- * variable length and we won't have the
- * information later.
- */
- if (match->key->tun_opts_len != nla_len(a)) {
- OVS_NLERR("Geneve option key length (%d) is different from mask length (%d).",
- match->key->tun_opts_len,
- nla_len(a));
- return -EINVAL;
- }
-
- SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff,
- true);
- }
+ err = genev_tun_opt_from_nlattr(a, match, is_mask, log);
+ if (err)
+ return err;
- opt_key_offset = (unsigned long)GENEVE_OPTS(
- (struct sw_flow_key *)0,
- nla_len(a));
- SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset,
- nla_data(a), nla_len(a),
- is_mask);
+ tun_flags |= TUNNEL_OPTIONS_PRESENT;
break;
default:
- OVS_NLERR("Unknown IPv4 tunnel attribute (%d).\n",
+ OVS_NLERR(log, "Unknown IPv4 tunnel attribute %d",
type);
return -EINVAL;
}
@@ -470,18 +536,19 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
SW_FLOW_KEY_PUT(match, tun_key.tun_flags, tun_flags, is_mask);
if (rem > 0) {
- OVS_NLERR("IPv4 tunnel attribute has %d unknown bytes.\n", rem);
+ OVS_NLERR(log, "IPv4 tunnel attribute has %d unknown bytes.",
+ rem);
return -EINVAL;
}
if (!is_mask) {
if (!match->key->tun_key.ipv4_dst) {
- OVS_NLERR("IPv4 tunnel destination address is zero.\n");
+ OVS_NLERR(log, "IPv4 tunnel dst address is zero");
return -EINVAL;
}
if (!ttl) {
- OVS_NLERR("IPv4 tunnel TTL not specified.\n");
+ OVS_NLERR(log, "IPv4 tunnel TTL not specified.");
return -EINVAL;
}
}
@@ -514,6 +581,12 @@ static int __ipv4_tun_to_nlattr(struct sk_buff *skb,
if ((output->tun_flags & TUNNEL_CSUM) &&
nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM))
return -EMSGSIZE;
+ if (output->tp_src &&
+ nla_put_be16(skb, OVS_TUNNEL_KEY_ATTR_TP_SRC, output->tp_src))
+ return -EMSGSIZE;
+ if (output->tp_dst &&
+ nla_put_be16(skb, OVS_TUNNEL_KEY_ATTR_TP_DST, output->tp_dst))
+ return -EMSGSIZE;
if ((output->tun_flags & TUNNEL_OAM) &&
nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM))
return -EMSGSIZE;
@@ -525,7 +598,6 @@ static int __ipv4_tun_to_nlattr(struct sk_buff *skb,
return 0;
}
-
static int ipv4_tun_to_nlattr(struct sk_buff *skb,
const struct ovs_key_ipv4_tunnel *output,
const struct geneve_opt *tun_opts,
@@ -546,8 +618,17 @@ static int ipv4_tun_to_nlattr(struct sk_buff *skb,
return 0;
}
+int ovs_nla_put_egress_tunnel_key(struct sk_buff *skb,
+ const struct ovs_tunnel_info *egress_tun_info)
+{
+ return __ipv4_tun_to_nlattr(skb, &egress_tun_info->tunnel,
+ egress_tun_info->options,
+ egress_tun_info->options_len);
+}
+
static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs,
- const struct nlattr **a, bool is_mask)
+ const struct nlattr **a, bool is_mask,
+ bool log)
{
if (*attrs & (1 << OVS_KEY_ATTR_DP_HASH)) {
u32 hash_val = nla_get_u32(a[OVS_KEY_ATTR_DP_HASH]);
@@ -572,10 +653,13 @@ static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs,
if (*attrs & (1 << OVS_KEY_ATTR_IN_PORT)) {
u32 in_port = nla_get_u32(a[OVS_KEY_ATTR_IN_PORT]);
- if (is_mask)
+ if (is_mask) {
in_port = 0xffffffff; /* Always exact match in_port. */
- else if (in_port >= DP_MAX_PORTS)
+ } else if (in_port >= DP_MAX_PORTS) {
+ OVS_NLERR(log, "Port %d exceeds max allowable %d",
+ in_port, DP_MAX_PORTS);
return -EINVAL;
+ }
SW_FLOW_KEY_PUT(match, phy.in_port, in_port, is_mask);
*attrs &= ~(1 << OVS_KEY_ATTR_IN_PORT);
@@ -591,7 +675,7 @@ static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs,
}
if (*attrs & (1 << OVS_KEY_ATTR_TUNNEL)) {
if (ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match,
- is_mask))
+ is_mask, log))
return -EINVAL;
*attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL);
}
@@ -599,12 +683,12 @@ static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs,
}
static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs,
- const struct nlattr **a, bool is_mask)
+ const struct nlattr **a, bool is_mask,
+ bool log)
{
int err;
- u64 orig_attrs = attrs;
- err = metadata_from_nlattrs(match, &attrs, a, is_mask);
+ err = metadata_from_nlattrs(match, &attrs, a, is_mask, log);
if (err)
return err;
@@ -625,17 +709,16 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs,
tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
if (!(tci & htons(VLAN_TAG_PRESENT))) {
if (is_mask)
- OVS_NLERR("VLAN TCI mask does not have exact match for VLAN_TAG_PRESENT bit.\n");
+ OVS_NLERR(log, "VLAN TCI mask does not have exact match for VLAN_TAG_PRESENT bit.");
else
- OVS_NLERR("VLAN TCI does not have VLAN_TAG_PRESENT bit set.\n");
+ OVS_NLERR(log, "VLAN TCI does not have VLAN_TAG_PRESENT bit set.");
return -EINVAL;
}
SW_FLOW_KEY_PUT(match, eth.tci, tci, is_mask);
attrs &= ~(1 << OVS_KEY_ATTR_VLAN);
- } else if (!is_mask)
- SW_FLOW_KEY_PUT(match, eth.tci, htons(0xffff), true);
+ }
if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) {
__be16 eth_type;
@@ -645,8 +728,8 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs,
/* Always exact match EtherType. */
eth_type = htons(0xffff);
} else if (ntohs(eth_type) < ETH_P_802_3_MIN) {
- OVS_NLERR("EtherType is less than minimum (type=%x, min=%x).\n",
- ntohs(eth_type), ETH_P_802_3_MIN);
+ OVS_NLERR(log, "EtherType %x is less than min %x",
+ ntohs(eth_type), ETH_P_802_3_MIN);
return -EINVAL;
}
@@ -661,8 +744,8 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs,
ipv4_key = nla_data(a[OVS_KEY_ATTR_IPV4]);
if (!is_mask && ipv4_key->ipv4_frag > OVS_FRAG_TYPE_MAX) {
- OVS_NLERR("Unknown IPv4 fragment type (value=%d, max=%d).\n",
- ipv4_key->ipv4_frag, OVS_FRAG_TYPE_MAX);
+ OVS_NLERR(log, "IPv4 frag type %d is out of range max %d",
+ ipv4_key->ipv4_frag, OVS_FRAG_TYPE_MAX);
return -EINVAL;
}
SW_FLOW_KEY_PUT(match, ip.proto,
@@ -685,13 +768,13 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs,
ipv6_key = nla_data(a[OVS_KEY_ATTR_IPV6]);
if (!is_mask && ipv6_key->ipv6_frag > OVS_FRAG_TYPE_MAX) {
- OVS_NLERR("Unknown IPv6 fragment type (value=%d, max=%d).\n",
- ipv6_key->ipv6_frag, OVS_FRAG_TYPE_MAX);
+ OVS_NLERR(log, "IPv6 frag type %d is out of range max %d",
+ ipv6_key->ipv6_frag, OVS_FRAG_TYPE_MAX);
return -EINVAL;
}
if (!is_mask && ipv6_key->ipv6_label & htonl(0xFFF00000)) {
- OVS_NLERR("IPv6 flow label %x is out of range (max=%x).\n",
+ OVS_NLERR(log, "IPv6 flow label %x is out of range (max=%x).\n",
ntohl(ipv6_key->ipv6_label), (1 << 20) - 1);
return -EINVAL;
}
@@ -723,7 +806,7 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs,
arp_key = nla_data(a[OVS_KEY_ATTR_ARP]);
if (!is_mask && (arp_key->arp_op & htons(0xff00))) {
- OVS_NLERR("Unknown ARP opcode (opcode=%d).\n",
+ OVS_NLERR(log, "Unknown ARP opcode (opcode=%d).",
arp_key->arp_op);
return -EINVAL;
}
@@ -742,6 +825,16 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs,
attrs &= ~(1 << OVS_KEY_ATTR_ARP);
}
+ if (attrs & (1 << OVS_KEY_ATTR_MPLS)) {
+ const struct ovs_key_mpls *mpls_key;
+
+ mpls_key = nla_data(a[OVS_KEY_ATTR_MPLS]);
+ SW_FLOW_KEY_PUT(match, mpls.top_lse,
+ mpls_key->mpls_lse, is_mask);
+
+ attrs &= ~(1 << OVS_KEY_ATTR_MPLS);
+ }
+
if (attrs & (1 << OVS_KEY_ATTR_TCP)) {
const struct ovs_key_tcp *tcp_key;
@@ -752,15 +845,9 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs,
}
if (attrs & (1 << OVS_KEY_ATTR_TCP_FLAGS)) {
- if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) {
- SW_FLOW_KEY_PUT(match, tp.flags,
- nla_get_be16(a[OVS_KEY_ATTR_TCP_FLAGS]),
- is_mask);
- } else {
- SW_FLOW_KEY_PUT(match, tp.flags,
- nla_get_be16(a[OVS_KEY_ATTR_TCP_FLAGS]),
- is_mask);
- }
+ SW_FLOW_KEY_PUT(match, tp.flags,
+ nla_get_be16(a[OVS_KEY_ATTR_TCP_FLAGS]),
+ is_mask);
attrs &= ~(1 << OVS_KEY_ATTR_TCP_FLAGS);
}
@@ -819,8 +906,11 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs,
attrs &= ~(1 << OVS_KEY_ATTR_ND);
}
- if (attrs != 0)
+ if (attrs != 0) {
+ OVS_NLERR(log, "Unknown key attributes %llx",
+ (unsigned long long)attrs);
return -EINVAL;
+ }
return 0;
}
@@ -858,10 +948,14 @@ static void mask_set_nlattr(struct nlattr *attr, u8 val)
* of this flow.
* @mask: Optional. Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink
* attribute specifies the mask field of the wildcarded flow.
+ * @log: Boolean to allow kernel error logging. Normally true, but when
+ * probing for feature compatibility this should be passed in as false to
+ * suppress unnecessary error logging.
*/
int ovs_nla_get_match(struct sw_flow_match *match,
- const struct nlattr *key,
- const struct nlattr *mask)
+ const struct nlattr *nla_key,
+ const struct nlattr *nla_mask,
+ bool log)
{
const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
const struct nlattr *encap;
@@ -871,7 +965,7 @@ int ovs_nla_get_match(struct sw_flow_match *match,
bool encap_valid = false;
int err;
- err = parse_flow_nlattrs(key, a, &key_attrs);
+ err = parse_flow_nlattrs(nla_key, a, &key_attrs, log);
if (err)
return err;
@@ -882,7 +976,7 @@ int ovs_nla_get_match(struct sw_flow_match *match,
if (!((key_attrs & (1 << OVS_KEY_ATTR_VLAN)) &&
(key_attrs & (1 << OVS_KEY_ATTR_ENCAP)))) {
- OVS_NLERR("Invalid Vlan frame.\n");
+ OVS_NLERR(log, "Invalid Vlan frame.");
return -EINVAL;
}
@@ -893,61 +987,68 @@ int ovs_nla_get_match(struct sw_flow_match *match,
encap_valid = true;
if (tci & htons(VLAN_TAG_PRESENT)) {
- err = parse_flow_nlattrs(encap, a, &key_attrs);
+ err = parse_flow_nlattrs(encap, a, &key_attrs, log);
if (err)
return err;
} else if (!tci) {
/* Corner case for truncated 802.1Q header. */
if (nla_len(encap)) {
- OVS_NLERR("Truncated 802.1Q header has non-zero encap attribute.\n");
+ OVS_NLERR(log, "Truncated 802.1Q header has non-zero encap attribute.");
return -EINVAL;
}
} else {
- OVS_NLERR("Encap attribute is set for a non-VLAN frame.\n");
+ OVS_NLERR(log, "Encap attr is set for non-VLAN frame");
return -EINVAL;
}
}
- err = ovs_key_from_nlattrs(match, key_attrs, a, false);
+ err = ovs_key_from_nlattrs(match, key_attrs, a, false, log);
if (err)
return err;
- if (match->mask && !mask) {
- /* Create an exact match mask. We need to set to 0xff all the
- * 'match->mask' fields that have been touched in 'match->key'.
- * We cannot simply memset 'match->mask', because padding bytes
- * and fields not specified in 'match->key' should be left to 0.
- * Instead, we use a stream of netlink attributes, copied from
- * 'key' and set to 0xff: ovs_key_from_nlattrs() will take care
- * of filling 'match->mask' appropriately.
- */
- newmask = kmemdup(key, nla_total_size(nla_len(key)),
- GFP_KERNEL);
- if (!newmask)
- return -ENOMEM;
+ if (match->mask) {
+ if (!nla_mask) {
+ /* Create an exact match mask. We need to set to 0xff
+ * all the 'match->mask' fields that have been touched
+ * in 'match->key'. We cannot simply memset
+ * 'match->mask', because padding bytes and fields not
+ * specified in 'match->key' should be left to 0.
+ * Instead, we use a stream of netlink attributes,
+ * copied from 'key' and set to 0xff.
+ * ovs_key_from_nlattrs() will take care of filling
+ * 'match->mask' appropriately.
+ */
+ newmask = kmemdup(nla_key,
+ nla_total_size(nla_len(nla_key)),
+ GFP_KERNEL);
+ if (!newmask)
+ return -ENOMEM;
- mask_set_nlattr(newmask, 0xff);
+ mask_set_nlattr(newmask, 0xff);
- /* The userspace does not send tunnel attributes that are 0,
- * but we should not wildcard them nonetheless.
- */
- if (match->key->tun_key.ipv4_dst)
- SW_FLOW_KEY_MEMSET_FIELD(match, tun_key, 0xff, true);
+ /* The userspace does not send tunnel attributes that
+ * are 0, but we should not wildcard them nonetheless.
+ */
+ if (match->key->tun_key.ipv4_dst)
+ SW_FLOW_KEY_MEMSET_FIELD(match, tun_key,
+ 0xff, true);
- mask = newmask;
- }
+ nla_mask = newmask;
+ }
- if (mask) {
- err = parse_flow_mask_nlattrs(mask, a, &mask_attrs);
+ err = parse_flow_mask_nlattrs(nla_mask, a, &mask_attrs, log);
if (err)
goto free_newmask;
+ /* Always match on tci. */
+ SW_FLOW_KEY_PUT(match, eth.tci, htons(0xffff), true);
+
if (mask_attrs & 1 << OVS_KEY_ATTR_ENCAP) {
__be16 eth_type = 0;
__be16 tci = 0;
if (!encap_valid) {
- OVS_NLERR("Encap mask attribute is set for non-VLAN frame.\n");
+ OVS_NLERR(log, "Encap mask attribute is set for non-VLAN frame.");
err = -EINVAL;
goto free_newmask;
}
@@ -959,12 +1060,13 @@ int ovs_nla_get_match(struct sw_flow_match *match,
if (eth_type == htons(0xffff)) {
mask_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
encap = a[OVS_KEY_ATTR_ENCAP];
- err = parse_flow_mask_nlattrs(encap, a, &mask_attrs);
+ err = parse_flow_mask_nlattrs(encap, a,
+ &mask_attrs, log);
if (err)
goto free_newmask;
} else {
- OVS_NLERR("VLAN frames must have an exact match on the TPID (mask=%x).\n",
- ntohs(eth_type));
+ OVS_NLERR(log, "VLAN frames must have an exact match on the TPID (mask=%x).",
+ ntohs(eth_type));
err = -EINVAL;
goto free_newmask;
}
@@ -973,18 +1075,19 @@ int ovs_nla_get_match(struct sw_flow_match *match,
tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
if (!(tci & htons(VLAN_TAG_PRESENT))) {
- OVS_NLERR("VLAN tag present bit must have an exact match (tci_mask=%x).\n", ntohs(tci));
+ OVS_NLERR(log, "VLAN tag present bit must have an exact match (tci_mask=%x).",
+ ntohs(tci));
err = -EINVAL;
goto free_newmask;
}
}
- err = ovs_key_from_nlattrs(match, mask_attrs, a, true);
+ err = ovs_key_from_nlattrs(match, mask_attrs, a, true, log);
if (err)
goto free_newmask;
}
- if (!match_validate(match, key_attrs, mask_attrs))
+ if (!match_validate(match, key_attrs, mask_attrs, log))
err = -EINVAL;
free_newmask:
@@ -997,6 +1100,9 @@ free_newmask:
* @key: Receives extracted in_port, priority, tun_key and skb_mark.
* @attr: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute
* sequence.
+ * @log: Boolean to allow kernel error logging. Normally true, but when
+ * probing for feature compatibility this should be passed in as false to
+ * suppress unnecessary error logging.
*
* This parses a series of Netlink attributes that form a flow key, which must
* take the same form accepted by flow_from_nlattrs(), but only enough of it to
@@ -1005,14 +1111,15 @@ free_newmask:
*/
int ovs_nla_get_flow_metadata(const struct nlattr *attr,
- struct sw_flow_key *key)
+ struct sw_flow_key *key,
+ bool log)
{
const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
struct sw_flow_match match;
u64 attrs = 0;
int err;
- err = parse_flow_nlattrs(attr, a, &attrs);
+ err = parse_flow_nlattrs(attr, a, &attrs, log);
if (err)
return -EINVAL;
@@ -1021,7 +1128,7 @@ int ovs_nla_get_flow_metadata(const struct nlattr *attr,
key->phy.in_port = DP_MAX_PORTS;
- return metadata_from_nlattrs(&match, &attrs, a, false);
+ return metadata_from_nlattrs(&match, &attrs, a, false, log);
}
int ovs_nla_put_flow(const struct sw_flow_key *swkey,
@@ -1147,6 +1254,14 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey,
arp_key->arp_op = htons(output->ip.proto);
ether_addr_copy(arp_key->arp_sha, output->ipv4.arp.sha);
ether_addr_copy(arp_key->arp_tha, output->ipv4.arp.tha);
+ } else if (eth_p_mpls(swkey->eth.type)) {
+ struct ovs_key_mpls *mpls_key;
+
+ nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS, sizeof(*mpls_key));
+ if (!nla)
+ goto nla_put_failure;
+ mpls_key = nla_data(nla);
+ mpls_key->mpls_lse = output->mpls.top_lse;
}
if ((swkey->eth.type == htons(ETH_P_IP) ||
@@ -1233,12 +1348,14 @@ nla_put_failure:
#define MAX_ACTIONS_BUFSIZE (32 * 1024)
-struct sw_flow_actions *ovs_nla_alloc_flow_actions(int size)
+static struct sw_flow_actions *nla_alloc_flow_actions(int size, bool log)
{
struct sw_flow_actions *sfa;
- if (size > MAX_ACTIONS_BUFSIZE)
+ if (size > MAX_ACTIONS_BUFSIZE) {
+ OVS_NLERR(log, "Flow action size %u bytes exceeds max", size);
return ERR_PTR(-EINVAL);
+ }
sfa = kmalloc(sizeof(*sfa) + size, GFP_KERNEL);
if (!sfa)
@@ -1256,7 +1373,7 @@ void ovs_nla_free_flow_actions(struct sw_flow_actions *sf_acts)
}
static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa,
- int attr_len)
+ int attr_len, bool log)
{
struct sw_flow_actions *acts;
@@ -1276,7 +1393,7 @@ static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa,
new_acts_size = MAX_ACTIONS_BUFSIZE;
}
- acts = ovs_nla_alloc_flow_actions(new_acts_size);
+ acts = nla_alloc_flow_actions(new_acts_size, log);
if (IS_ERR(acts))
return (void *)acts;
@@ -1291,11 +1408,11 @@ out:
}
static struct nlattr *__add_action(struct sw_flow_actions **sfa,
- int attrtype, void *data, int len)
+ int attrtype, void *data, int len, bool log)
{
struct nlattr *a;
- a = reserve_sfa_size(sfa, nla_attr_size(len));
+ a = reserve_sfa_size(sfa, nla_attr_size(len), log);
if (IS_ERR(a))
return a;
@@ -1310,24 +1427,22 @@ static struct nlattr *__add_action(struct sw_flow_actions **sfa,
}
static int add_action(struct sw_flow_actions **sfa, int attrtype,
- void *data, int len)
+ void *data, int len, bool log)
{
struct nlattr *a;
- a = __add_action(sfa, attrtype, data, len);
- if (IS_ERR(a))
- return PTR_ERR(a);
+ a = __add_action(sfa, attrtype, data, len, log);
- return 0;
+ return PTR_ERR_OR_ZERO(a);
}
static inline int add_nested_action_start(struct sw_flow_actions **sfa,
- int attrtype)
+ int attrtype, bool log)
{
int used = (*sfa)->actions_len;
int err;
- err = add_action(sfa, attrtype, NULL, 0);
+ err = add_action(sfa, attrtype, NULL, 0, log);
if (err)
return err;
@@ -1343,9 +1458,15 @@ static inline void add_nested_action_end(struct sw_flow_actions *sfa,
a->nla_len = sfa->actions_len - st_offset;
}
+static int __ovs_nla_copy_actions(const struct nlattr *attr,
+ const struct sw_flow_key *key,
+ int depth, struct sw_flow_actions **sfa,
+ __be16 eth_type, __be16 vlan_tci, bool log);
+
static int validate_and_copy_sample(const struct nlattr *attr,
const struct sw_flow_key *key, int depth,
- struct sw_flow_actions **sfa)
+ struct sw_flow_actions **sfa,
+ __be16 eth_type, __be16 vlan_tci, bool log)
{
const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1];
const struct nlattr *probability, *actions;
@@ -1371,18 +1492,19 @@ static int validate_and_copy_sample(const struct nlattr *attr,
return -EINVAL;
/* validation done, copy sample action. */
- start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SAMPLE);
+ start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SAMPLE, log);
if (start < 0)
return start;
err = add_action(sfa, OVS_SAMPLE_ATTR_PROBABILITY,
- nla_data(probability), sizeof(u32));
+ nla_data(probability), sizeof(u32), log);
if (err)
return err;
- st_acts = add_nested_action_start(sfa, OVS_SAMPLE_ATTR_ACTIONS);
+ st_acts = add_nested_action_start(sfa, OVS_SAMPLE_ATTR_ACTIONS, log);
if (st_acts < 0)
return st_acts;
- err = ovs_nla_copy_actions(actions, key, depth + 1, sfa);
+ err = __ovs_nla_copy_actions(actions, key, depth + 1, sfa,
+ eth_type, vlan_tci, log);
if (err)
return err;
@@ -1392,10 +1514,10 @@ static int validate_and_copy_sample(const struct nlattr *attr,
return 0;
}
-static int validate_tp_port(const struct sw_flow_key *flow_key)
+static int validate_tp_port(const struct sw_flow_key *flow_key,
+ __be16 eth_type)
{
- if ((flow_key->eth.type == htons(ETH_P_IP) ||
- flow_key->eth.type == htons(ETH_P_IPV6)) &&
+ if ((eth_type == htons(ETH_P_IP) || eth_type == htons(ETH_P_IPV6)) &&
(flow_key->tp.src || flow_key->tp.dst))
return 0;
@@ -1419,7 +1541,7 @@ void ovs_match_init(struct sw_flow_match *match,
}
static int validate_and_copy_set_tun(const struct nlattr *attr,
- struct sw_flow_actions **sfa)
+ struct sw_flow_actions **sfa, bool log)
{
struct sw_flow_match match;
struct sw_flow_key key;
@@ -1428,7 +1550,7 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
int err, start;
ovs_match_init(&match, &key, NULL);
- err = ipv4_tun_from_nlattr(nla_data(attr), &match, false);
+ err = ipv4_tun_from_nlattr(nla_data(attr), &match, false, log);
if (err)
return err;
@@ -1457,12 +1579,12 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
key.tun_key.tun_flags |= crit_opt ? TUNNEL_CRIT_OPT : 0;
};
- start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET);
+ start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET, log);
if (start < 0)
return start;
a = __add_action(sfa, OVS_KEY_ATTR_TUNNEL_INFO, NULL,
- sizeof(*tun_info) + key.tun_opts_len);
+ sizeof(*tun_info) + key.tun_opts_len, log);
if (IS_ERR(a))
return PTR_ERR(a);
@@ -1490,7 +1612,7 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
static int validate_set(const struct nlattr *a,
const struct sw_flow_key *flow_key,
struct sw_flow_actions **sfa,
- bool *set_tun)
+ bool *set_tun, __be16 eth_type, bool log)
{
const struct nlattr *ovs_key = nla_data(a);
int key_type = nla_type(ovs_key);
@@ -1515,14 +1637,17 @@ static int validate_set(const struct nlattr *a,
break;
case OVS_KEY_ATTR_TUNNEL:
+ if (eth_p_mpls(eth_type))
+ return -EINVAL;
+
*set_tun = true;
- err = validate_and_copy_set_tun(a, sfa);
+ err = validate_and_copy_set_tun(a, sfa, log);
if (err)
return err;
break;
case OVS_KEY_ATTR_IPV4:
- if (flow_key->eth.type != htons(ETH_P_IP))
+ if (eth_type != htons(ETH_P_IP))
return -EINVAL;
if (!flow_key->ip.proto)
@@ -1538,7 +1663,7 @@ static int validate_set(const struct nlattr *a,
break;
case OVS_KEY_ATTR_IPV6:
- if (flow_key->eth.type != htons(ETH_P_IPV6))
+ if (eth_type != htons(ETH_P_IPV6))
return -EINVAL;
if (!flow_key->ip.proto)
@@ -1560,19 +1685,24 @@ static int validate_set(const struct nlattr *a,
if (flow_key->ip.proto != IPPROTO_TCP)
return -EINVAL;
- return validate_tp_port(flow_key);
+ return validate_tp_port(flow_key, eth_type);
case OVS_KEY_ATTR_UDP:
if (flow_key->ip.proto != IPPROTO_UDP)
return -EINVAL;
- return validate_tp_port(flow_key);
+ return validate_tp_port(flow_key, eth_type);
+
+ case OVS_KEY_ATTR_MPLS:
+ if (!eth_p_mpls(eth_type))
+ return -EINVAL;
+ break;
case OVS_KEY_ATTR_SCTP:
if (flow_key->ip.proto != IPPROTO_SCTP)
return -EINVAL;
- return validate_tp_port(flow_key);
+ return validate_tp_port(flow_key, eth_type);
default:
return -EINVAL;
@@ -1586,6 +1716,7 @@ static int validate_userspace(const struct nlattr *attr)
static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] = {
[OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 },
[OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_UNSPEC },
+ [OVS_USERSPACE_ATTR_EGRESS_TUN_PORT] = {.type = NLA_U32 },
};
struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1];
int error;
@@ -1603,12 +1734,12 @@ static int validate_userspace(const struct nlattr *attr)
}
static int copy_action(const struct nlattr *from,
- struct sw_flow_actions **sfa)
+ struct sw_flow_actions **sfa, bool log)
{
int totlen = NLA_ALIGN(from->nla_len);
struct nlattr *to;
- to = reserve_sfa_size(sfa, from->nla_len);
+ to = reserve_sfa_size(sfa, from->nla_len, log);
if (IS_ERR(to))
return PTR_ERR(to);
@@ -1616,12 +1747,13 @@ static int copy_action(const struct nlattr *from,
return 0;
}
-int ovs_nla_copy_actions(const struct nlattr *attr,
- const struct sw_flow_key *key,
- int depth,
- struct sw_flow_actions **sfa)
+static int __ovs_nla_copy_actions(const struct nlattr *attr,
+ const struct sw_flow_key *key,
+ int depth, struct sw_flow_actions **sfa,
+ __be16 eth_type, __be16 vlan_tci, bool log)
{
const struct nlattr *a;
+ bool out_tnl_port = false;
int rem, err;
if (depth >= SAMPLE_ACTION_DEPTH)
@@ -1633,6 +1765,8 @@ int ovs_nla_copy_actions(const struct nlattr *attr,
[OVS_ACTION_ATTR_OUTPUT] = sizeof(u32),
[OVS_ACTION_ATTR_RECIRC] = sizeof(u32),
[OVS_ACTION_ATTR_USERSPACE] = (u32)-1,
+ [OVS_ACTION_ATTR_PUSH_MPLS] = sizeof(struct ovs_action_push_mpls),
+ [OVS_ACTION_ATTR_POP_MPLS] = sizeof(__be16),
[OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan),
[OVS_ACTION_ATTR_POP_VLAN] = 0,
[OVS_ACTION_ATTR_SET] = (u32)-1,
@@ -1662,6 +1796,8 @@ int ovs_nla_copy_actions(const struct nlattr *attr,
case OVS_ACTION_ATTR_OUTPUT:
if (nla_get_u32(a) >= DP_MAX_PORTS)
return -EINVAL;
+ out_tnl_port = false;
+
break;
case OVS_ACTION_ATTR_HASH: {
@@ -1678,6 +1814,7 @@ int ovs_nla_copy_actions(const struct nlattr *attr,
}
case OVS_ACTION_ATTR_POP_VLAN:
+ vlan_tci = htons(0);
break;
case OVS_ACTION_ATTR_PUSH_VLAN:
@@ -1686,29 +1823,77 @@ int ovs_nla_copy_actions(const struct nlattr *attr,
return -EINVAL;
if (!(vlan->vlan_tci & htons(VLAN_TAG_PRESENT)))
return -EINVAL;
+ vlan_tci = vlan->vlan_tci;
break;
case OVS_ACTION_ATTR_RECIRC:
break;
+ case OVS_ACTION_ATTR_PUSH_MPLS: {
+ const struct ovs_action_push_mpls *mpls = nla_data(a);
+
+ /* Networking stack do not allow simultaneous Tunnel
+ * and MPLS GSO.
+ */
+ if (out_tnl_port)
+ return -EINVAL;
+
+ if (!eth_p_mpls(mpls->mpls_ethertype))
+ return -EINVAL;
+ /* Prohibit push MPLS other than to a white list
+ * for packets that have a known tag order.
+ */
+ if (vlan_tci & htons(VLAN_TAG_PRESENT) ||
+ (eth_type != htons(ETH_P_IP) &&
+ eth_type != htons(ETH_P_IPV6) &&
+ eth_type != htons(ETH_P_ARP) &&
+ eth_type != htons(ETH_P_RARP) &&
+ !eth_p_mpls(eth_type)))
+ return -EINVAL;
+ eth_type = mpls->mpls_ethertype;
+ break;
+ }
+
+ case OVS_ACTION_ATTR_POP_MPLS:
+ if (vlan_tci & htons(VLAN_TAG_PRESENT) ||
+ !eth_p_mpls(eth_type))
+ return -EINVAL;
+
+ /* Disallow subsequent L2.5+ set and mpls_pop actions
+ * as there is no check here to ensure that the new
+ * eth_type is valid and thus set actions could
+ * write off the end of the packet or otherwise
+ * corrupt it.
+ *
+ * Support for these actions is planned using packet
+ * recirculation.
+ */
+ eth_type = htons(0);
+ break;
+
case OVS_ACTION_ATTR_SET:
- err = validate_set(a, key, sfa, &skip_copy);
+ err = validate_set(a, key, sfa,
+ &out_tnl_port, eth_type, log);
if (err)
return err;
+
+ skip_copy = out_tnl_port;
break;
case OVS_ACTION_ATTR_SAMPLE:
- err = validate_and_copy_sample(a, key, depth, sfa);
+ err = validate_and_copy_sample(a, key, depth, sfa,
+ eth_type, vlan_tci, log);
if (err)
return err;
skip_copy = true;
break;
default:
+ OVS_NLERR(log, "Unknown Action type %d", type);
return -EINVAL;
}
if (!skip_copy) {
- err = copy_action(a, sfa);
+ err = copy_action(a, sfa, log);
if (err)
return err;
}
@@ -1720,6 +1905,24 @@ int ovs_nla_copy_actions(const struct nlattr *attr,
return 0;
}
+int ovs_nla_copy_actions(const struct nlattr *attr,
+ const struct sw_flow_key *key,
+ struct sw_flow_actions **sfa, bool log)
+{
+ int err;
+
+ *sfa = nla_alloc_flow_actions(nla_len(attr), log);
+ if (IS_ERR(*sfa))
+ return PTR_ERR(*sfa);
+
+ err = __ovs_nla_copy_actions(attr, key, 0, sfa, key->eth.type,
+ key->eth.tci, log);
+ if (err)
+ kfree(*sfa);
+
+ return err;
+}
+
static int sample_action_to_attr(const struct nlattr *attr, struct sk_buff *skb)
{
const struct nlattr *a;
diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h
index 206e45a..577f12b 100644
--- a/net/openvswitch/flow_netlink.h
+++ b/net/openvswitch/flow_netlink.h
@@ -37,24 +37,28 @@
#include "flow.h"
+size_t ovs_tun_key_attr_size(void);
+size_t ovs_key_attr_size(void);
+
void ovs_match_init(struct sw_flow_match *match,
struct sw_flow_key *key, struct sw_flow_mask *mask);
int ovs_nla_put_flow(const struct sw_flow_key *,
const struct sw_flow_key *, struct sk_buff *);
-int ovs_nla_get_flow_metadata(const struct nlattr *, struct sw_flow_key *);
+int ovs_nla_get_flow_metadata(const struct nlattr *, struct sw_flow_key *,
+ bool log);
-int ovs_nla_get_match(struct sw_flow_match *match,
- const struct nlattr *,
- const struct nlattr *);
+int ovs_nla_get_match(struct sw_flow_match *, const struct nlattr *key,
+ const struct nlattr *mask, bool log);
+int ovs_nla_put_egress_tunnel_key(struct sk_buff *,
+ const struct ovs_tunnel_info *);
int ovs_nla_copy_actions(const struct nlattr *attr,
- const struct sw_flow_key *key, int depth,
- struct sw_flow_actions **sfa);
+ const struct sw_flow_key *key,
+ struct sw_flow_actions **sfa, bool log);
int ovs_nla_put_actions(const struct nlattr *attr,
int len, struct sk_buff *skb);
-struct sw_flow_actions *ovs_nla_alloc_flow_actions(int actions_len);
void ovs_nla_free_flow_actions(struct sw_flow_actions *);
#endif /* flow_netlink.h */
diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index cf2d853..5899bf1 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2007-2013 Nicira, Inc.
+ * Copyright (c) 2007-2014 Nicira, Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
@@ -25,7 +25,7 @@
#include <linux/if_vlan.h>
#include <net/llc_pdu.h>
#include <linux/kernel.h>
-#include <linux/hash.h>
+#include <linux/jhash.h>
#include <linux/jiffies.h>
#include <linux/llc.h>
#include <linux/module.h>
@@ -107,7 +107,7 @@ err:
return ERR_PTR(-ENOMEM);
}
-int ovs_flow_tbl_count(struct flow_table *table)
+int ovs_flow_tbl_count(const struct flow_table *table)
{
return table->count;
}
@@ -250,11 +250,14 @@ skip_flows:
__table_instance_destroy(ti);
}
-void ovs_flow_tbl_destroy(struct flow_table *table, bool deferred)
+/* No need for locking this function is called from RCU callback or
+ * error path.
+ */
+void ovs_flow_tbl_destroy(struct flow_table *table)
{
- struct table_instance *ti = ovsl_dereference(table->ti);
+ struct table_instance *ti = rcu_dereference_raw(table->ti);
- table_instance_destroy(ti, deferred);
+ table_instance_destroy(ti, false);
}
struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti,
@@ -363,7 +366,7 @@ static u32 flow_hash(const struct sw_flow_key *key, int key_start,
/* Make sure number of hash bytes are multiple of u32. */
BUILD_BUG_ON(sizeof(long) % sizeof(u32));
- return arch_fast_hash2(hash_key, hash_u32s, 0);
+ return jhash2(hash_key, hash_u32s, 0);
}
static int flow_key_start(const struct sw_flow_key *key)
@@ -398,7 +401,7 @@ static bool flow_cmp_masked_key(const struct sw_flow *flow,
}
bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow,
- struct sw_flow_match *match)
+ const struct sw_flow_match *match)
{
struct sw_flow_key *key = match->key;
int key_start = flow_key_start(key);
@@ -409,7 +412,7 @@ bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow,
static struct sw_flow *masked_flow_lookup(struct table_instance *ti,
const struct sw_flow_key *unmasked,
- struct sw_flow_mask *mask)
+ const struct sw_flow_mask *mask)
{
struct sw_flow *flow;
struct hlist_head *head;
@@ -457,7 +460,7 @@ struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl,
}
struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,
- struct sw_flow_match *match)
+ const struct sw_flow_match *match)
{
struct table_instance *ti = rcu_dereference_ovsl(tbl->ti);
struct sw_flow_mask *mask;
@@ -560,7 +563,7 @@ static struct sw_flow_mask *flow_mask_find(const struct flow_table *tbl,
/* Add 'mask' into the mask list, if it is not already there. */
static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow,
- struct sw_flow_mask *new)
+ const struct sw_flow_mask *new)
{
struct sw_flow_mask *mask;
mask = flow_mask_find(tbl, new);
@@ -583,7 +586,7 @@ static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow,
/* Must be called with OVS mutex held. */
int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow,
- struct sw_flow_mask *mask)
+ const struct sw_flow_mask *mask)
{
struct table_instance *new_ti = NULL;
struct table_instance *ti;
diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h
index 5918bff..309fa64 100644
--- a/net/openvswitch/flow_table.h
+++ b/net/openvswitch/flow_table.h
@@ -61,12 +61,12 @@ struct sw_flow *ovs_flow_alloc(void);
void ovs_flow_free(struct sw_flow *, bool deferred);
int ovs_flow_tbl_init(struct flow_table *);
-int ovs_flow_tbl_count(struct flow_table *table);
-void ovs_flow_tbl_destroy(struct flow_table *table, bool deferred);
+int ovs_flow_tbl_count(const struct flow_table *table);
+void ovs_flow_tbl_destroy(struct flow_table *table);
int ovs_flow_tbl_flush(struct flow_table *flow_table);
int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow,
- struct sw_flow_mask *mask);
+ const struct sw_flow_mask *mask);
void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow);
int ovs_flow_tbl_num_masks(const struct flow_table *table);
struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *table,
@@ -77,9 +77,9 @@ struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *,
struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *,
const struct sw_flow_key *);
struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,
- struct sw_flow_match *match);
+ const struct sw_flow_match *match);
bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow,
- struct sw_flow_match *match);
+ const struct sw_flow_match *match);
void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src,
const struct sw_flow_mask *mask);
diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c
index 106a9d8..347fa23 100644
--- a/net/openvswitch/vport-geneve.c
+++ b/net/openvswitch/vport-geneve.c
@@ -17,6 +17,7 @@
#include <linux/rculist.h>
#include <linux/udp.h>
#include <linux/if_vlan.h>
+#include <linux/module.h>
#include <net/geneve.h>
#include <net/icmp.h>
@@ -28,6 +29,8 @@
#include "datapath.h"
#include "vport.h"
+static struct vport_ops ovs_geneve_vport_ops;
+
/**
* struct geneve_port - Keeps track of open UDP ports
* @gs: The socket created for this port number.
@@ -65,7 +68,7 @@ static void tunnel_id_to_vni(__be64 tun_id, __u8 *vni)
}
/* Convert 24 bit VNI to 64 bit tunnel ID. */
-static __be64 vni_to_tunnel_id(__u8 *vni)
+static __be64 vni_to_tunnel_id(const __u8 *vni)
{
#ifdef __BIG_ENDIAN
return (vni[0] << 16) | (vni[1] << 8) | vni[2];
@@ -94,7 +97,9 @@ static void geneve_rcv(struct geneve_sock *gs, struct sk_buff *skb)
key = vni_to_tunnel_id(geneveh->vni);
- ovs_flow_tun_info_init(&tun_info, ip_hdr(skb), key, flags,
+ ovs_flow_tun_info_init(&tun_info, ip_hdr(skb),
+ udp_hdr(skb)->source, udp_hdr(skb)->dest,
+ key, flags,
geneveh->options, opts_len);
ovs_vport_receive(vport, skb, &tun_info);
@@ -225,11 +230,46 @@ static const char *geneve_get_name(const struct vport *vport)
return geneve_port->name;
}
-const struct vport_ops ovs_geneve_vport_ops = {
+static int geneve_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
+ struct ovs_tunnel_info *egress_tun_info)
+{
+ struct geneve_port *geneve_port = geneve_vport(vport);
+ struct net *net = ovs_dp_get_net(vport->dp);
+ __be16 dport = inet_sk(geneve_port->gs->sock->sk)->inet_sport;
+ __be16 sport = udp_flow_src_port(net, skb, 1, USHRT_MAX, true);
+
+ /* Get tp_src and tp_dst, refert to geneve_build_header().
+ */
+ return ovs_tunnel_get_egress_info(egress_tun_info,
+ ovs_dp_get_net(vport->dp),
+ OVS_CB(skb)->egress_tun_info,
+ IPPROTO_UDP, skb->mark, sport, dport);
+}
+
+static struct vport_ops ovs_geneve_vport_ops = {
.type = OVS_VPORT_TYPE_GENEVE,
.create = geneve_tnl_create,
.destroy = geneve_tnl_destroy,
.get_name = geneve_get_name,
.get_options = geneve_get_options,
.send = geneve_tnl_send,
+ .owner = THIS_MODULE,
+ .get_egress_tun_info = geneve_get_egress_tun_info,
};
+
+static int __init ovs_geneve_tnl_init(void)
+{
+ return ovs_vport_ops_register(&ovs_geneve_vport_ops);
+}
+
+static void __exit ovs_geneve_tnl_exit(void)
+{
+ ovs_vport_ops_unregister(&ovs_geneve_vport_ops);
+}
+
+module_init(ovs_geneve_tnl_init);
+module_exit(ovs_geneve_tnl_exit);
+
+MODULE_DESCRIPTION("OVS: Geneve swiching port");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("vport-type-5");
diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c
index 108b82d..6b69df5 100644
--- a/net/openvswitch/vport-gre.c
+++ b/net/openvswitch/vport-gre.c
@@ -29,6 +29,7 @@
#include <linux/jhash.h>
#include <linux/list.h>
#include <linux/kernel.h>
+#include <linux/module.h>
#include <linux/workqueue.h>
#include <linux/rculist.h>
#include <net/route.h>
@@ -45,6 +46,8 @@
#include "datapath.h"
#include "vport.h"
+static struct vport_ops ovs_gre_vport_ops;
+
/* Returns the least-significant 32 bits of a __be64. */
static __be32 be64_get_low32(__be64 x)
{
@@ -105,7 +108,7 @@ static int gre_rcv(struct sk_buff *skb,
return PACKET_REJECT;
key = key_to_tunnel_id(tpi->key, tpi->seq);
- ovs_flow_tun_info_init(&tun_info, ip_hdr(skb), key,
+ ovs_flow_tun_info_init(&tun_info, ip_hdr(skb), 0, 0, key,
filter_tnl_flags(tpi->flags), NULL, 0);
ovs_vport_receive(vport, skb, &tun_info);
@@ -172,14 +175,10 @@ static int gre_tnl_send(struct vport *vport, struct sk_buff *skb)
goto err_free_rt;
}
- if (vlan_tx_tag_present(skb)) {
- if (unlikely(!__vlan_put_tag(skb,
- skb->vlan_proto,
- vlan_tx_tag_get(skb)))) {
- err = -ENOMEM;
- goto err_free_rt;
- }
- skb->vlan_tci = 0;
+ skb = vlan_hwaccel_push_inside(skb);
+ if (unlikely(!skb)) {
+ err = -ENOMEM;
+ goto err_free_rt;
}
/* Push Tunnel header. */
@@ -281,10 +280,38 @@ static void gre_tnl_destroy(struct vport *vport)
gre_exit();
}
-const struct vport_ops ovs_gre_vport_ops = {
+static int gre_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
+ struct ovs_tunnel_info *egress_tun_info)
+{
+ return ovs_tunnel_get_egress_info(egress_tun_info,
+ ovs_dp_get_net(vport->dp),
+ OVS_CB(skb)->egress_tun_info,
+ IPPROTO_GRE, skb->mark, 0, 0);
+}
+
+static struct vport_ops ovs_gre_vport_ops = {
.type = OVS_VPORT_TYPE_GRE,
.create = gre_create,
.destroy = gre_tnl_destroy,
.get_name = gre_get_name,
.send = gre_tnl_send,
+ .get_egress_tun_info = gre_get_egress_tun_info,
+ .owner = THIS_MODULE,
};
+
+static int __init ovs_gre_tnl_init(void)
+{
+ return ovs_vport_ops_register(&ovs_gre_vport_ops);
+}
+
+static void __exit ovs_gre_tnl_exit(void)
+{
+ ovs_vport_ops_unregister(&ovs_gre_vport_ops);
+}
+
+module_init(ovs_gre_tnl_init);
+module_exit(ovs_gre_tnl_exit);
+
+MODULE_DESCRIPTION("OVS: GRE switching port");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("vport-type-3");
diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c
index 8451612..6a55f71 100644
--- a/net/openvswitch/vport-internal_dev.c
+++ b/net/openvswitch/vport-internal_dev.c
@@ -36,6 +36,8 @@ struct internal_dev {
struct vport *vport;
};
+static struct vport_ops ovs_internal_vport_ops;
+
static struct internal_dev *internal_dev_priv(struct net_device *netdev)
{
return netdev_priv(netdev);
@@ -222,6 +224,11 @@ static int internal_dev_recv(struct vport *vport, struct sk_buff *skb)
struct net_device *netdev = netdev_vport_priv(vport)->dev;
int len;
+ if (unlikely(!(netdev->flags & IFF_UP))) {
+ kfree_skb(skb);
+ return 0;
+ }
+
len = skb->len;
skb_dst_drop(skb);
@@ -238,7 +245,7 @@ static int internal_dev_recv(struct vport *vport, struct sk_buff *skb)
return len;
}
-const struct vport_ops ovs_internal_vport_ops = {
+static struct vport_ops ovs_internal_vport_ops = {
.type = OVS_VPORT_TYPE_INTERNAL,
.create = internal_dev_create,
.destroy = internal_dev_destroy,
@@ -261,10 +268,21 @@ struct vport *ovs_internal_dev_get_vport(struct net_device *netdev)
int ovs_internal_dev_rtnl_link_register(void)
{
- return rtnl_link_register(&internal_dev_link_ops);
+ int err;
+
+ err = rtnl_link_register(&internal_dev_link_ops);
+ if (err < 0)
+ return err;
+
+ err = ovs_vport_ops_register(&ovs_internal_vport_ops);
+ if (err < 0)
+ rtnl_link_unregister(&internal_dev_link_ops);
+
+ return err;
}
void ovs_internal_dev_rtnl_link_unregister(void)
{
+ ovs_vport_ops_unregister(&ovs_internal_vport_ops);
rtnl_link_unregister(&internal_dev_link_ops);
}
diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c
index d21f77d..4776282 100644
--- a/net/openvswitch/vport-netdev.c
+++ b/net/openvswitch/vport-netdev.c
@@ -33,6 +33,8 @@
#include "vport-internal_dev.h"
#include "vport-netdev.h"
+static struct vport_ops ovs_netdev_vport_ops;
+
/* Must be called with rcu_read_lock. */
static void netdev_port_receive(struct vport *vport, struct sk_buff *skb)
{
@@ -75,7 +77,7 @@ static rx_handler_result_t netdev_frame_hook(struct sk_buff **pskb)
return RX_HANDLER_CONSUMED;
}
-static struct net_device *get_dpdev(struct datapath *dp)
+static struct net_device *get_dpdev(const struct datapath *dp)
{
struct vport *local;
@@ -224,10 +226,20 @@ struct vport *ovs_netdev_get_vport(struct net_device *dev)
return NULL;
}
-const struct vport_ops ovs_netdev_vport_ops = {
+static struct vport_ops ovs_netdev_vport_ops = {
.type = OVS_VPORT_TYPE_NETDEV,
.create = netdev_create,
.destroy = netdev_destroy,
.get_name = ovs_netdev_get_name,
.send = netdev_send,
};
+
+int __init ovs_netdev_init(void)
+{
+ return ovs_vport_ops_register(&ovs_netdev_vport_ops);
+}
+
+void ovs_netdev_exit(void)
+{
+ ovs_vport_ops_unregister(&ovs_netdev_vport_ops);
+}
diff --git a/net/openvswitch/vport-netdev.h b/net/openvswitch/vport-netdev.h
index 8df01c11..6f7038e 100644
--- a/net/openvswitch/vport-netdev.h
+++ b/net/openvswitch/vport-netdev.h
@@ -41,4 +41,7 @@ netdev_vport_priv(const struct vport *vport)
const char *ovs_netdev_get_name(const struct vport *);
void ovs_netdev_detach_dev(struct vport *);
+int __init ovs_netdev_init(void);
+void ovs_netdev_exit(void);
+
#endif /* vport_netdev.h */
diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c
index 2735e01..38f95a5 100644
--- a/net/openvswitch/vport-vxlan.c
+++ b/net/openvswitch/vport-vxlan.c
@@ -24,6 +24,7 @@
#include <linux/net.h>
#include <linux/rculist.h>
#include <linux/udp.h>
+#include <linux/module.h>
#include <net/icmp.h>
#include <net/ip.h>
@@ -50,6 +51,8 @@ struct vxlan_port {
char name[IFNAMSIZ];
};
+static struct vport_ops ovs_vxlan_vport_ops;
+
static inline struct vxlan_port *vxlan_vport(const struct vport *vport)
{
return vport_priv(vport);
@@ -66,7 +69,9 @@ static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, __be32 vx_vni)
/* Save outer tunnel values */
iph = ip_hdr(skb);
key = cpu_to_be64(ntohl(vx_vni) >> 8);
- ovs_flow_tun_info_init(&tun_info, iph, key, TUNNEL_KEY, NULL, 0);
+ ovs_flow_tun_info_init(&tun_info, iph,
+ udp_hdr(skb)->source, udp_hdr(skb)->dest,
+ key, TUNNEL_KEY, NULL, 0);
ovs_vport_receive(vport, skb, &tun_info);
}
@@ -186,17 +191,55 @@ error:
return err;
}
+static int vxlan_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
+ struct ovs_tunnel_info *egress_tun_info)
+{
+ struct net *net = ovs_dp_get_net(vport->dp);
+ struct vxlan_port *vxlan_port = vxlan_vport(vport);
+ __be16 dst_port = inet_sk(vxlan_port->vs->sock->sk)->inet_sport;
+ __be16 src_port;
+ int port_min;
+ int port_max;
+
+ inet_get_local_port_range(net, &port_min, &port_max);
+ src_port = udp_flow_src_port(net, skb, 0, 0, true);
+
+ return ovs_tunnel_get_egress_info(egress_tun_info, net,
+ OVS_CB(skb)->egress_tun_info,
+ IPPROTO_UDP, skb->mark,
+ src_port, dst_port);
+}
+
static const char *vxlan_get_name(const struct vport *vport)
{
struct vxlan_port *vxlan_port = vxlan_vport(vport);
return vxlan_port->name;
}
-const struct vport_ops ovs_vxlan_vport_ops = {
+static struct vport_ops ovs_vxlan_vport_ops = {
.type = OVS_VPORT_TYPE_VXLAN,
.create = vxlan_tnl_create,
.destroy = vxlan_tnl_destroy,
.get_name = vxlan_get_name,
.get_options = vxlan_get_options,
.send = vxlan_tnl_send,
+ .get_egress_tun_info = vxlan_get_egress_tun_info,
+ .owner = THIS_MODULE,
};
+
+static int __init ovs_vxlan_tnl_init(void)
+{
+ return ovs_vport_ops_register(&ovs_vxlan_vport_ops);
+}
+
+static void __exit ovs_vxlan_tnl_exit(void)
+{
+ ovs_vport_ops_unregister(&ovs_vxlan_vport_ops);
+}
+
+module_init(ovs_vxlan_tnl_init);
+module_exit(ovs_vxlan_tnl_exit);
+
+MODULE_DESCRIPTION("OVS: VXLAN switching port");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("vport-type-4");
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 6015802..9584526 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -28,6 +28,7 @@
#include <linux/rtnetlink.h>
#include <linux/compat.h>
#include <net/net_namespace.h>
+#include <linux/module.h>
#include "datapath.h"
#include "vport.h"
@@ -36,22 +37,7 @@
static void ovs_vport_record_error(struct vport *,
enum vport_err_type err_type);
-/* List of statically compiled vport implementations. Don't forget to also
- * add yours to the list at the bottom of vport.h. */
-static const struct vport_ops *vport_ops_list[] = {
- &ovs_netdev_vport_ops,
- &ovs_internal_vport_ops,
-
-#ifdef CONFIG_OPENVSWITCH_GRE
- &ovs_gre_vport_ops,
-#endif
-#ifdef CONFIG_OPENVSWITCH_VXLAN
- &ovs_vxlan_vport_ops,
-#endif
-#ifdef CONFIG_OPENVSWITCH_GENEVE
- &ovs_geneve_vport_ops,
-#endif
-};
+static LIST_HEAD(vport_ops_list);
/* Protected by RCU read lock for reading, ovs_mutex for writing. */
static struct hlist_head *dev_table;
@@ -82,12 +68,38 @@ void ovs_vport_exit(void)
kfree(dev_table);
}
-static struct hlist_head *hash_bucket(struct net *net, const char *name)
+static struct hlist_head *hash_bucket(const struct net *net, const char *name)
{
unsigned int hash = jhash(name, strlen(name), (unsigned long) net);
return &dev_table[hash & (VPORT_HASH_BUCKETS - 1)];
}
+int ovs_vport_ops_register(struct vport_ops *ops)
+{
+ int err = -EEXIST;
+ struct vport_ops *o;
+
+ ovs_lock();
+ list_for_each_entry(o, &vport_ops_list, list)
+ if (ops->type == o->type)
+ goto errout;
+
+ list_add_tail(&ops->list, &vport_ops_list);
+ err = 0;
+errout:
+ ovs_unlock();
+ return err;
+}
+EXPORT_SYMBOL_GPL(ovs_vport_ops_register);
+
+void ovs_vport_ops_unregister(struct vport_ops *ops)
+{
+ ovs_lock();
+ list_del(&ops->list);
+ ovs_unlock();
+}
+EXPORT_SYMBOL_GPL(ovs_vport_ops_unregister);
+
/**
* ovs_vport_locate - find a port that has already been created
*
@@ -95,7 +107,7 @@ static struct hlist_head *hash_bucket(struct net *net, const char *name)
*
* Must be called with ovs or RCU read lock.
*/
-struct vport *ovs_vport_locate(struct net *net, const char *name)
+struct vport *ovs_vport_locate(const struct net *net, const char *name)
{
struct hlist_head *bucket = hash_bucket(net, name);
struct vport *vport;
@@ -153,6 +165,7 @@ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops,
return vport;
}
+EXPORT_SYMBOL_GPL(ovs_vport_alloc);
/**
* ovs_vport_free - uninitialize and free vport
@@ -173,6 +186,18 @@ void ovs_vport_free(struct vport *vport)
free_percpu(vport->percpu_stats);
kfree(vport);
}
+EXPORT_SYMBOL_GPL(ovs_vport_free);
+
+static struct vport_ops *ovs_vport_lookup(const struct vport_parms *parms)
+{
+ struct vport_ops *ops;
+
+ list_for_each_entry(ops, &vport_ops_list, list)
+ if (ops->type == parms->type)
+ return ops;
+
+ return NULL;
+}
/**
* ovs_vport_add - add vport device (for kernel callers)
@@ -184,31 +209,40 @@ void ovs_vport_free(struct vport *vport)
*/
struct vport *ovs_vport_add(const struct vport_parms *parms)
{
+ struct vport_ops *ops;
struct vport *vport;
- int err = 0;
- int i;
- for (i = 0; i < ARRAY_SIZE(vport_ops_list); i++) {
- if (vport_ops_list[i]->type == parms->type) {
- struct hlist_head *bucket;
+ ops = ovs_vport_lookup(parms);
+ if (ops) {
+ struct hlist_head *bucket;
- vport = vport_ops_list[i]->create(parms);
- if (IS_ERR(vport)) {
- err = PTR_ERR(vport);
- goto out;
- }
+ if (!try_module_get(ops->owner))
+ return ERR_PTR(-EAFNOSUPPORT);
- bucket = hash_bucket(ovs_dp_get_net(vport->dp),
- vport->ops->get_name(vport));
- hlist_add_head_rcu(&vport->hash_node, bucket);
+ vport = ops->create(parms);
+ if (IS_ERR(vport)) {
+ module_put(ops->owner);
return vport;
}
+
+ bucket = hash_bucket(ovs_dp_get_net(vport->dp),
+ vport->ops->get_name(vport));
+ hlist_add_head_rcu(&vport->hash_node, bucket);
+ return vport;
}
- err = -EAFNOSUPPORT;
+ /* Unlock to attempt module load and return -EAGAIN if load
+ * was successful as we need to restart the port addition
+ * workflow.
+ */
+ ovs_unlock();
+ request_module("vport-type-%d", parms->type);
+ ovs_lock();
-out:
- return ERR_PTR(err);
+ if (!ovs_vport_lookup(parms))
+ return ERR_PTR(-EAFNOSUPPORT);
+ else
+ return ERR_PTR(-EAGAIN);
}
/**
@@ -242,6 +276,8 @@ void ovs_vport_del(struct vport *vport)
hlist_del_rcu(&vport->hash_node);
vport->ops->destroy(vport);
+
+ module_put(vport->ops->owner);
}
/**
@@ -344,7 +380,7 @@ int ovs_vport_get_options(const struct vport *vport, struct sk_buff *skb)
*
* Must be called with ovs_mutex.
*/
-int ovs_vport_set_upcall_portids(struct vport *vport, struct nlattr *ids)
+int ovs_vport_set_upcall_portids(struct vport *vport, const struct nlattr *ids)
{
struct vport_portids *old, *vport_portids;
@@ -435,7 +471,7 @@ u32 ovs_vport_find_upcall_portid(const struct vport *vport, struct sk_buff *skb)
* skb->data should point to the Ethernet header.
*/
void ovs_vport_receive(struct vport *vport, struct sk_buff *skb,
- struct ovs_tunnel_info *tun_info)
+ const struct ovs_tunnel_info *tun_info)
{
struct pcpu_sw_netstats *stats;
struct sw_flow_key key;
@@ -457,6 +493,7 @@ void ovs_vport_receive(struct vport *vport, struct sk_buff *skb,
}
ovs_dp_process_packet(skb, &key);
}
+EXPORT_SYMBOL_GPL(ovs_vport_receive);
/**
* ovs_vport_send - send a packet on a device
@@ -535,3 +572,65 @@ void ovs_vport_deferred_free(struct vport *vport)
call_rcu(&vport->rcu, free_vport_rcu);
}
+EXPORT_SYMBOL_GPL(ovs_vport_deferred_free);
+
+int ovs_tunnel_get_egress_info(struct ovs_tunnel_info *egress_tun_info,
+ struct net *net,
+ const struct ovs_tunnel_info *tun_info,
+ u8 ipproto,
+ u32 skb_mark,
+ __be16 tp_src,
+ __be16 tp_dst)
+{
+ const struct ovs_key_ipv4_tunnel *tun_key;
+ struct rtable *rt;
+ struct flowi4 fl;
+
+ if (unlikely(!tun_info))
+ return -EINVAL;
+
+ tun_key = &tun_info->tunnel;
+
+ /* Route lookup to get srouce IP address.
+ * The process may need to be changed if the corresponding process
+ * in vports ops changed.
+ */
+ memset(&fl, 0, sizeof(fl));
+ fl.daddr = tun_key->ipv4_dst;
+ fl.saddr = tun_key->ipv4_src;
+ fl.flowi4_tos = RT_TOS(tun_key->ipv4_tos);
+ fl.flowi4_mark = skb_mark;
+ fl.flowi4_proto = ipproto;
+
+ rt = ip_route_output_key(net, &fl);
+ if (IS_ERR(rt))
+ return PTR_ERR(rt);
+
+ ip_rt_put(rt);
+
+ /* Generate egress_tun_info based on tun_info,
+ * saddr, tp_src and tp_dst
+ */
+ __ovs_flow_tun_info_init(egress_tun_info,
+ fl.saddr, tun_key->ipv4_dst,
+ tun_key->ipv4_tos,
+ tun_key->ipv4_ttl,
+ tp_src, tp_dst,
+ tun_key->tun_id,
+ tun_key->tun_flags,
+ tun_info->options,
+ tun_info->options_len);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(ovs_tunnel_get_egress_info);
+
+int ovs_vport_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
+ struct ovs_tunnel_info *info)
+{
+ /* get_egress_tun_info() is only implemented on tunnel ports. */
+ if (unlikely(!vport->ops->get_egress_tun_info))
+ return -EINVAL;
+
+ return vport->ops->get_egress_tun_info(vport, skb, info);
+}
diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h
index 8942125..99c8e71 100644
--- a/net/openvswitch/vport.h
+++ b/net/openvswitch/vport.h
@@ -45,19 +45,29 @@ void ovs_vport_exit(void);
struct vport *ovs_vport_add(const struct vport_parms *);
void ovs_vport_del(struct vport *);
-struct vport *ovs_vport_locate(struct net *net, const char *name);
+struct vport *ovs_vport_locate(const struct net *net, const char *name);
void ovs_vport_get_stats(struct vport *, struct ovs_vport_stats *);
int ovs_vport_set_options(struct vport *, struct nlattr *options);
int ovs_vport_get_options(const struct vport *, struct sk_buff *);
-int ovs_vport_set_upcall_portids(struct vport *, struct nlattr *pids);
+int ovs_vport_set_upcall_portids(struct vport *, const struct nlattr *pids);
int ovs_vport_get_upcall_portids(const struct vport *, struct sk_buff *);
u32 ovs_vport_find_upcall_portid(const struct vport *, struct sk_buff *);
int ovs_vport_send(struct vport *, struct sk_buff *);
+int ovs_tunnel_get_egress_info(struct ovs_tunnel_info *egress_tun_info,
+ struct net *net,
+ const struct ovs_tunnel_info *tun_info,
+ u8 ipproto,
+ u32 skb_mark,
+ __be16 tp_src,
+ __be16 tp_dst);
+int ovs_vport_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
+ struct ovs_tunnel_info *info);
+
/* The following definitions are for implementers of vport devices: */
struct vport_err_stats {
@@ -146,6 +156,8 @@ struct vport_parms {
* @get_name: Get the device's name.
* @send: Send a packet on the device. Returns the length of the packet sent,
* zero for dropped packets or negative for error.
+ * @get_egress_tun_info: Get the egress tunnel 5-tuple and other info for
+ * a packet.
*/
struct vport_ops {
enum ovs_vport_type type;
@@ -161,6 +173,11 @@ struct vport_ops {
const char *(*get_name)(const struct vport *);
int (*send)(struct vport *, struct sk_buff *);
+ int (*get_egress_tun_info)(struct vport *, struct sk_buff *,
+ struct ovs_tunnel_info *);
+
+ struct module *owner;
+ struct list_head list;
};
enum vport_err_type {
@@ -207,15 +224,7 @@ static inline struct vport *vport_from_priv(void *priv)
}
void ovs_vport_receive(struct vport *, struct sk_buff *,
- struct ovs_tunnel_info *);
-
-/* List of statically compiled vport implementations. Don't forget to also
- * add yours to the list at the top of vport.c. */
-extern const struct vport_ops ovs_netdev_vport_ops;
-extern const struct vport_ops ovs_internal_vport_ops;
-extern const struct vport_ops ovs_gre_vport_ops;
-extern const struct vport_ops ovs_vxlan_vport_ops;
-extern const struct vport_ops ovs_geneve_vport_ops;
+ const struct ovs_tunnel_info *);
static inline void ovs_skb_postpush_rcsum(struct sk_buff *skb,
const void *start, unsigned int len)
@@ -224,4 +233,7 @@ static inline void ovs_skb_postpush_rcsum(struct sk_buff *skb,
skb->csum = csum_add(skb->csum, csum_partial(start, len, 0));
}
+int ovs_vport_ops_register(struct vport_ops *ops);
+void ovs_vport_ops_unregister(struct vport_ops *ops);
+
#endif /* vport.h */
OpenPOWER on IntegriCloud