From 22061d8014455b01eb018bd6c35a1b3040ccc230 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Thu, 15 Nov 2012 08:49:11 +0000 Subject: net: Switch to using the new packet offload infrustructure Convert to using the new GSO/GRO registration mechanism and new packet offload structure. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/ipv4/af_inet.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'net/ipv4/af_inet.c') diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 766c596..4c99c5f 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1662,6 +1662,10 @@ static int ipv4_proc_init(void); static struct packet_type ip_packet_type __read_mostly = { .type = cpu_to_be16(ETH_P_IP), .func = ip_rcv, +}; + +static struct packet_offload ip_packet_offload __read_mostly = { + .type = cpu_to_be16(ETH_P_IP), .gso_send_check = inet_gso_send_check, .gso_segment = inet_gso_segment, .gro_receive = inet_gro_receive, @@ -1781,6 +1785,7 @@ static int __init inet_init(void) ipfrag_init(); + dev_add_offload(&ip_packet_offload); dev_add_pack(&ip_packet_type); rc = 0; -- cgit v1.1 From de27d001d187721de775ed9e0c485aa6f517c745 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Thu, 15 Nov 2012 08:49:12 +0000 Subject: net: Add net protocol offload registration infrustructure Create a new data structure for IPv4 protocols that holds GRO/GSO callbacks and a new array to track the protocols that register GRO/GSO. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/ipv4/af_inet.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'net/ipv4/af_inet.c') diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 4c99c5f..3918d86 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1566,6 +1566,13 @@ static const struct net_protocol tcp_protocol = { .netns_ok = 1, }; +static const struct net_offload tcp_offload = { + .gso_send_check = tcp_v4_gso_send_check, + .gso_segment = tcp_tso_segment, + .gro_receive = tcp4_gro_receive, + .gro_complete = tcp4_gro_complete, +}; + static const struct net_protocol udp_protocol = { .handler = udp_rcv, .err_handler = udp_err, @@ -1575,6 +1582,11 @@ static const struct net_protocol udp_protocol = { .netns_ok = 1, }; +static const struct net_offload udp_offload = { + .gso_send_check = udp4_ufo_send_check, + .gso_segment = udp4_ufo_fragment, +}; + static const struct net_protocol icmp_protocol = { .handler = icmp_rcv, .err_handler = ping_err, -- cgit v1.1 From bca49f843eac59cbb1ddd1f4a5d65fcc23b62efd Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Thu, 15 Nov 2012 08:49:14 +0000 Subject: ipv4: Switch to using the new offload infrastructure. Switch IPv4 code base to using the new GRO/GSO calls and data. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/ipv4/af_inet.c | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) (limited to 'net/ipv4/af_inet.c') diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 3918d86..66f63ce 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1251,7 +1251,7 @@ EXPORT_SYMBOL(inet_sk_rebuild_header); static int inet_gso_send_check(struct sk_buff *skb) { - const struct net_protocol *ops; + const struct net_offload *ops; const struct iphdr *iph; int proto; int ihl; @@ -1275,7 +1275,7 @@ static int inet_gso_send_check(struct sk_buff *skb) err = -EPROTONOSUPPORT; rcu_read_lock(); - ops = rcu_dereference(inet_protos[proto]); + ops = rcu_dereference(inet_offloads[proto]); if (likely(ops && ops->gso_send_check)) err = ops->gso_send_check(skb); rcu_read_unlock(); @@ -1288,7 +1288,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, netdev_features_t features) { struct sk_buff *segs = ERR_PTR(-EINVAL); - const struct net_protocol *ops; + const struct net_offload *ops; struct iphdr *iph; int proto; int ihl; @@ -1325,7 +1325,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, segs = ERR_PTR(-EPROTONOSUPPORT); rcu_read_lock(); - ops = rcu_dereference(inet_protos[proto]); + ops = rcu_dereference(inet_offloads[proto]); if (likely(ops && ops->gso_segment)) segs = ops->gso_segment(skb, features); rcu_read_unlock(); @@ -1356,7 +1356,7 @@ out: static struct sk_buff **inet_gro_receive(struct sk_buff **head, struct sk_buff *skb) { - const struct net_protocol *ops; + const struct net_offload *ops; struct sk_buff **pp = NULL; struct sk_buff *p; const struct iphdr *iph; @@ -1378,7 +1378,7 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, proto = iph->protocol; rcu_read_lock(); - ops = rcu_dereference(inet_protos[proto]); + ops = rcu_dereference(inet_offloads[proto]); if (!ops || !ops->gro_receive) goto out_unlock; @@ -1435,7 +1435,7 @@ static int inet_gro_complete(struct sk_buff *skb) { __be16 newlen = htons(skb->len - skb_network_offset(skb)); struct iphdr *iph = ip_hdr(skb); - const struct net_protocol *ops; + const struct net_offload *ops; int proto = iph->protocol; int err = -ENOSYS; @@ -1443,7 +1443,7 @@ static int inet_gro_complete(struct sk_buff *skb) iph->tot_len = newlen; rcu_read_lock(); - ops = rcu_dereference(inet_protos[proto]); + ops = rcu_dereference(inet_offloads[proto]); if (WARN_ON(!ops || !ops->gro_complete)) goto out_unlock; @@ -1558,10 +1558,6 @@ static const struct net_protocol tcp_protocol = { .early_demux = tcp_v4_early_demux, .handler = tcp_v4_rcv, .err_handler = tcp_v4_err, - .gso_send_check = tcp_v4_gso_send_check, - .gso_segment = tcp_tso_segment, - .gro_receive = tcp4_gro_receive, - .gro_complete = tcp4_gro_complete, .no_policy = 1, .netns_ok = 1, }; @@ -1576,8 +1572,6 @@ static const struct net_offload tcp_offload = { static const struct net_protocol udp_protocol = { .handler = udp_rcv, .err_handler = udp_err, - .gso_send_check = udp4_ufo_send_check, - .gso_segment = udp4_ufo_fragment, .no_policy = 1, .netns_ok = 1, }; @@ -1726,6 +1720,14 @@ static int __init inet_init(void) tcp_prot.sysctl_mem = init_net.ipv4.sysctl_tcp_mem; /* + * Add offloads + */ + if (inet_add_offload(&udp_offload, IPPROTO_UDP) < 0) + pr_crit("%s: Cannot add UDP protocol offload\n", __func__); + if (inet_add_offload(&tcp_offload, IPPROTO_TCP) < 0) + pr_crit("%s: Cannot add TCP protocol offlaod\n", __func__); + + /* * Add all the base protocols. */ -- cgit v1.1 From 808a8f884554f93315f663b2694addb4a177c578 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Thu, 15 Nov 2012 08:49:21 +0000 Subject: ipv4: Pull GSO registration out of inet_init() Since GSO/GRO support is now separated, make IPv4 GSO a stand-alone init call and not part of inet_init(). Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/ipv4/af_inet.c | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) (limited to 'net/ipv4/af_inet.c') diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 66f63ce..9f2e7fd 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1665,11 +1665,6 @@ static int ipv4_proc_init(void); * IP protocol layer initialiser */ -static struct packet_type ip_packet_type __read_mostly = { - .type = cpu_to_be16(ETH_P_IP), - .func = ip_rcv, -}; - static struct packet_offload ip_packet_offload __read_mostly = { .type = cpu_to_be16(ETH_P_IP), .gso_send_check = inet_gso_send_check, @@ -1678,6 +1673,27 @@ static struct packet_offload ip_packet_offload __read_mostly = { .gro_complete = inet_gro_complete, }; +static int __init ipv4_offload_init(void) +{ + /* + * Add offloads + */ + if (inet_add_offload(&udp_offload, IPPROTO_UDP) < 0) + pr_crit("%s: Cannot add UDP protocol offload\n", __func__); + if (inet_add_offload(&tcp_offload, IPPROTO_TCP) < 0) + pr_crit("%s: Cannot add TCP protocol offlaod\n", __func__); + + dev_add_offload(&ip_packet_offload); + return 0; +} + +fs_initcall(ipv4_offload_init); + +static struct packet_type ip_packet_type __read_mostly = { + .type = cpu_to_be16(ETH_P_IP), + .func = ip_rcv, +}; + static int __init inet_init(void) { struct sk_buff *dummy_skb; @@ -1720,14 +1736,6 @@ static int __init inet_init(void) tcp_prot.sysctl_mem = init_net.ipv4.sysctl_tcp_mem; /* - * Add offloads - */ - if (inet_add_offload(&udp_offload, IPPROTO_UDP) < 0) - pr_crit("%s: Cannot add UDP protocol offload\n", __func__); - if (inet_add_offload(&tcp_offload, IPPROTO_TCP) < 0) - pr_crit("%s: Cannot add TCP protocol offlaod\n", __func__); - - /* * Add all the base protocols. */ @@ -1799,7 +1807,6 @@ static int __init inet_init(void) ipfrag_init(); - dev_add_offload(&ip_packet_offload); dev_add_pack(&ip_packet_type); rc = 0; -- cgit v1.1 From f191a1d17f227032c159e5499809f545402b6dc6 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Thu, 15 Nov 2012 08:49:23 +0000 Subject: net: Remove code duplication between offload structures Move the offload callbacks into its own structure. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/ipv4/af_inet.c | 42 ++++++++++++++++++++++++------------------ 1 file changed, 24 insertions(+), 18 deletions(-) (limited to 'net/ipv4/af_inet.c') diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 9f2e7fd..d5e5a05 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1276,8 +1276,8 @@ static int inet_gso_send_check(struct sk_buff *skb) rcu_read_lock(); ops = rcu_dereference(inet_offloads[proto]); - if (likely(ops && ops->gso_send_check)) - err = ops->gso_send_check(skb); + if (likely(ops && ops->callbacks.gso_send_check)) + err = ops->callbacks.gso_send_check(skb); rcu_read_unlock(); out: @@ -1326,8 +1326,8 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, rcu_read_lock(); ops = rcu_dereference(inet_offloads[proto]); - if (likely(ops && ops->gso_segment)) - segs = ops->gso_segment(skb, features); + if (likely(ops && ops->callbacks.gso_segment)) + segs = ops->callbacks.gso_segment(skb, features); rcu_read_unlock(); if (!segs || IS_ERR(segs)) @@ -1379,7 +1379,7 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, rcu_read_lock(); ops = rcu_dereference(inet_offloads[proto]); - if (!ops || !ops->gro_receive) + if (!ops || !ops->callbacks.gro_receive) goto out_unlock; if (*(u8 *)iph != 0x45) @@ -1420,7 +1420,7 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, skb_gro_pull(skb, sizeof(*iph)); skb_set_transport_header(skb, skb_gro_offset(skb)); - pp = ops->gro_receive(head, skb); + pp = ops->callbacks.gro_receive(head, skb); out_unlock: rcu_read_unlock(); @@ -1444,10 +1444,10 @@ static int inet_gro_complete(struct sk_buff *skb) rcu_read_lock(); ops = rcu_dereference(inet_offloads[proto]); - if (WARN_ON(!ops || !ops->gro_complete)) + if (WARN_ON(!ops || !ops->callbacks.gro_complete)) goto out_unlock; - err = ops->gro_complete(skb); + err = ops->callbacks.gro_complete(skb); out_unlock: rcu_read_unlock(); @@ -1563,10 +1563,12 @@ static const struct net_protocol tcp_protocol = { }; static const struct net_offload tcp_offload = { - .gso_send_check = tcp_v4_gso_send_check, - .gso_segment = tcp_tso_segment, - .gro_receive = tcp4_gro_receive, - .gro_complete = tcp4_gro_complete, + .callbacks = { + .gso_send_check = tcp_v4_gso_send_check, + .gso_segment = tcp_tso_segment, + .gro_receive = tcp4_gro_receive, + .gro_complete = tcp4_gro_complete, + }, }; static const struct net_protocol udp_protocol = { @@ -1577,8 +1579,10 @@ static const struct net_protocol udp_protocol = { }; static const struct net_offload udp_offload = { - .gso_send_check = udp4_ufo_send_check, - .gso_segment = udp4_ufo_fragment, + .callbacks = { + .gso_send_check = udp4_ufo_send_check, + .gso_segment = udp4_ufo_fragment, + }, }; static const struct net_protocol icmp_protocol = { @@ -1667,10 +1671,12 @@ static int ipv4_proc_init(void); static struct packet_offload ip_packet_offload __read_mostly = { .type = cpu_to_be16(ETH_P_IP), - .gso_send_check = inet_gso_send_check, - .gso_segment = inet_gso_segment, - .gro_receive = inet_gro_receive, - .gro_complete = inet_gro_complete, + .callbacks = { + .gso_send_check = inet_gso_send_check, + .gso_segment = inet_gso_segment, + .gro_receive = inet_gro_receive, + .gro_complete = inet_gro_complete, + }, }; static int __init ipv4_offload_init(void) -- cgit v1.1 From 52e804c6dfaa5df1e4b0e290357b82ad4e4cda2c Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 16 Nov 2012 03:03:05 +0000 Subject: net: Allow userns root to control ipv4 Allow an unpriviled user who has created a user namespace, and then created a network namespace to effectively use the new network namespace, by reducing capable(CAP_NET_ADMIN) and capable(CAP_NET_RAW) calls to be ns_capable(net->user_ns, CAP_NET_ADMIN), or capable(net->user_ns, CAP_NET_RAW) calls. Settings that merely control a single network device are allowed. Either the network device is a logical network device where restrictions make no difference or the network device is hardware NIC that has been explicity moved from the initial network namespace. In general policy and network stack state changes are allowed while resource control is left unchanged. Allow creating raw sockets. Allow the SIOCSARP ioctl to control the arp cache. Allow the SIOCSIFFLAG ioctl to allow setting network device flags. Allow the SIOCSIFADDR ioctl to allow setting a netdevice ipv4 address. Allow the SIOCSIFBRDADDR ioctl to allow setting a netdevice ipv4 broadcast address. Allow the SIOCSIFDSTADDR ioctl to allow setting a netdevice ipv4 destination address. Allow the SIOCSIFNETMASK ioctl to allow setting a netdevice ipv4 netmask. Allow the SIOCADDRT and SIOCDELRT ioctls to allow adding and deleting ipv4 routes. Allow the SIOCADDTUNNEL, SIOCCHGTUNNEL and SIOCDELTUNNEL ioctls for adding, changing and deleting gre tunnels. Allow the SIOCADDTUNNEL, SIOCCHGTUNNEL and SIOCDELTUNNEL ioctls for adding, changing and deleting ipip tunnels. Allow the SIOCADDTUNNEL, SIOCCHGTUNNEL and SIOCDELTUNNEL ioctls for adding, changing and deleting ipsec virtual tunnel interfaces. Allow setting the MRT_INIT, MRT_DONE, MRT_ADD_VIF, MRT_DEL_VIF, MRT_ADD_MFC, MRT_DEL_MFC, MRT_ASSERT, MRT_PIM, MRT_TABLE socket options on multicast routing sockets. Allow setting and receiving IPOPT_CIPSO, IP_OPT_SEC, IP_OPT_SID and arbitrary ip options. Allow setting IP_SEC_POLICY/IP_XFRM_POLICY ipv4 socket option. Allow setting the IP_TRANSPARENT ipv4 socket option. Allow setting the TCP_REPAIR socket option. Allow setting the TCP_CONGESTION socket option. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- net/ipv4/af_inet.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net/ipv4/af_inet.c') diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index d5e5a05..4f5f220 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -346,7 +346,8 @@ lookup_protocol: } err = -EPERM; - if (sock->type == SOCK_RAW && !kern && !capable(CAP_NET_RAW)) + if (sock->type == SOCK_RAW && !kern && + !ns_capable(net->user_ns, CAP_NET_RAW)) goto out_rcu_unlock; err = -EAFNOSUPPORT; -- cgit v1.1 From 3594698a1fb8e5ae60a92c72ce9ca280256939a7 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 16 Nov 2012 03:03:12 +0000 Subject: net: Make CAP_NET_BIND_SERVICE per user namespace Allow privileged users in any user namespace to bind to privileged sockets in network namespaces they control. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- net/ipv4/af_inet.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net/ipv4/af_inet.c') diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 4f5f220..24b384b 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -474,6 +474,7 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) struct sockaddr_in *addr = (struct sockaddr_in *)uaddr; struct sock *sk = sock->sk; struct inet_sock *inet = inet_sk(sk); + struct net *net = sock_net(sk); unsigned short snum; int chk_addr_ret; int err; @@ -497,7 +498,7 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) goto out; } - chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr); + chk_addr_ret = inet_addr_type(net, addr->sin_addr.s_addr); /* Not specified by any standard per-se, however it breaks too * many applications when removed. It is unfortunate since @@ -517,7 +518,8 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) snum = ntohs(addr->sin_port); err = -EACCES; - if (snum && snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE)) + if (snum && snum < PROT_SOCK && + !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE)) goto out; /* We keep a pair of addresses. rcv_saddr is the one -- cgit v1.1