summaryrefslogtreecommitdiffstats
path: root/net/ipv6
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv6')
-rw-r--r--net/ipv6/Makefile4
-rw-r--r--net/ipv6/addrconf.c42
-rw-r--r--net/ipv6/af_inet6.c6
-rw-r--r--net/ipv6/anycast.c133
-rw-r--r--net/ipv6/datagram.c15
-rw-r--r--net/ipv6/icmp.c20
-rw-r--r--net/ipv6/ip6_offload.c32
-rw-r--r--net/ipv6/ip6_output.c10
-rw-r--r--net/ipv6/ip6_udp_tunnel.c107
-rw-r--r--net/ipv6/mcast.c218
-rw-r--r--net/ipv6/netfilter/Kconfig41
-rw-r--r--net/ipv6/netfilter/Makefile4
-rw-r--r--net/ipv6/netfilter/ip6t_MASQUERADE.c76
-rw-r--r--net/ipv6/netfilter/ip6table_nat.c233
-rw-r--r--net/ipv6/netfilter/nf_nat_l3proto_ipv6.c199
-rw-r--r--net/ipv6/netfilter/nf_nat_masquerade_ipv6.c120
-rw-r--r--net/ipv6/netfilter/nft_chain_nat_ipv6.c165
-rw-r--r--net/ipv6/netfilter/nft_masq_ipv6.c89
-rw-r--r--net/ipv6/protocol.c1
-rw-r--r--net/ipv6/sit.c107
-rw-r--r--net/ipv6/syncookies.c4
-rw-r--r--net/ipv6/sysctl_net_ipv6.c10
-rw-r--r--net/ipv6/tcp_ipv6.c32
-rw-r--r--net/ipv6/tcpv6_offload.c43
-rw-r--r--net/ipv6/udp.c4
-rw-r--r--net/ipv6/udp_offload.c72
26 files changed, 1070 insertions, 717 deletions
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index 2fe6836..2e8c061 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -45,3 +45,7 @@ obj-y += addrconf_core.o exthdrs_core.o ip6_checksum.o ip6_icmp.o
obj-$(CONFIG_INET) += output_core.o protocol.o $(ipv6-offload)
obj-$(subst m,y,$(CONFIG_IPV6)) += inet6_hashtables.o
+
+ifneq ($(CONFIG_IPV6),)
+obj-$(CONFIG_NET_UDP_TUNNEL) += ip6_udp_tunnel.o
+endif
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 267ce3c..e189480 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1690,14 +1690,12 @@ void addrconf_dad_failure(struct inet6_ifaddr *ifp)
addrconf_mod_dad_work(ifp, 0);
}
-/* Join to solicited addr multicast group. */
-
+/* Join to solicited addr multicast group.
+ * caller must hold RTNL */
void addrconf_join_solict(struct net_device *dev, const struct in6_addr *addr)
{
struct in6_addr maddr;
- ASSERT_RTNL();
-
if (dev->flags&(IFF_LOOPBACK|IFF_NOARP))
return;
@@ -1705,12 +1703,11 @@ void addrconf_join_solict(struct net_device *dev, const struct in6_addr *addr)
ipv6_dev_mc_inc(dev, &maddr);
}
+/* caller must hold RTNL */
void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr)
{
struct in6_addr maddr;
- ASSERT_RTNL();
-
if (idev->dev->flags&(IFF_LOOPBACK|IFF_NOARP))
return;
@@ -1718,26 +1715,24 @@ void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr)
__ipv6_dev_mc_dec(idev, &maddr);
}
+/* caller must hold RTNL */
static void addrconf_join_anycast(struct inet6_ifaddr *ifp)
{
struct in6_addr addr;
- ASSERT_RTNL();
-
if (ifp->prefix_len >= 127) /* RFC 6164 */
return;
ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len);
if (ipv6_addr_any(&addr))
return;
- ipv6_dev_ac_inc(ifp->idev->dev, &addr);
+ __ipv6_dev_ac_inc(ifp->idev, &addr);
}
+/* caller must hold RTNL */
static void addrconf_leave_anycast(struct inet6_ifaddr *ifp)
{
struct in6_addr addr;
- ASSERT_RTNL();
-
if (ifp->prefix_len >= 127) /* RFC 6164 */
return;
ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len);
@@ -2849,6 +2844,9 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
if (dev->flags & IFF_SLAVE)
break;
+ if (idev && idev->cnf.disable_ipv6)
+ break;
+
if (event == NETDEV_UP) {
if (!addrconf_qdisc_ok(dev)) {
/* device is not ready yet. */
@@ -3099,11 +3097,13 @@ restart:
write_unlock_bh(&idev->lock);
- /* Step 5: Discard multicast list */
- if (how)
+ /* Step 5: Discard anycast and multicast list */
+ if (how) {
+ ipv6_ac_destroy_dev(idev);
ipv6_mc_destroy_dev(idev);
- else
+ } else {
ipv6_mc_down(idev);
+ }
idev->tstamp = jiffies;
@@ -4773,15 +4773,11 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
addrconf_leave_solict(ifp->idev, &ifp->addr);
if (!ipv6_addr_any(&ifp->peer_addr)) {
struct rt6_info *rt;
- struct net_device *dev = ifp->idev->dev;
-
- rt = rt6_lookup(dev_net(dev), &ifp->peer_addr, NULL,
- dev->ifindex, 1);
- if (rt) {
- dst_hold(&rt->dst);
- if (ip6_del_rt(rt))
- dst_free(&rt->dst);
- }
+
+ rt = addrconf_get_prefix_route(&ifp->peer_addr, 128,
+ ifp->idev->dev, 0, 0);
+ if (rt && ip6_del_rt(rt))
+ dst_free(&rt->dst);
}
dst_hold(&ifp->rt->dst);
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index b9393e6..34f726f 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -302,7 +302,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
/* Reproduce AF_INET checks to make the bindings consistent */
v4addr = addr->sin6_addr.s6_addr32[3];
chk_addr_ret = inet_addr_type(net, v4addr);
- if (!sysctl_ip_nonlocal_bind &&
+ if (!net->ipv4.sysctl_ip_nonlocal_bind &&
!(inet->freebind || inet->transparent) &&
v4addr != htonl(INADDR_ANY) &&
chk_addr_ret != RTN_LOCAL &&
@@ -672,10 +672,10 @@ int inet6_sk_rebuild_header(struct sock *sk)
}
EXPORT_SYMBOL_GPL(inet6_sk_rebuild_header);
-bool ipv6_opt_accepted(const struct sock *sk, const struct sk_buff *skb)
+bool ipv6_opt_accepted(const struct sock *sk, const struct sk_buff *skb,
+ const struct inet6_skb_parm *opt)
{
const struct ipv6_pinfo *np = inet6_sk(sk);
- const struct inet6_skb_parm *opt = IP6CB(skb);
if (np->rxopt.all) {
if ((opt->hop && (np->rxopt.bits.hopopts ||
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 2101832..f5e319a 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -46,10 +46,6 @@
static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr);
-/* Big ac list lock for all the sockets */
-static DEFINE_SPINLOCK(ipv6_sk_ac_lock);
-
-
/*
* socket join an anycast group
*/
@@ -77,7 +73,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
pac->acl_next = NULL;
pac->acl_addr = *addr;
- rcu_read_lock();
+ rtnl_lock();
if (ifindex == 0) {
struct rt6_info *rt;
@@ -90,11 +86,11 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
goto error;
} else {
/* router, no matching interface: just pick one */
- dev = dev_get_by_flags_rcu(net, IFF_UP,
- IFF_UP | IFF_LOOPBACK);
+ dev = __dev_get_by_flags(net, IFF_UP,
+ IFF_UP | IFF_LOOPBACK);
}
} else
- dev = dev_get_by_index_rcu(net, ifindex);
+ dev = __dev_get_by_index(net, ifindex);
if (dev == NULL) {
err = -ENODEV;
@@ -126,17 +122,15 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
goto error;
}
- err = ipv6_dev_ac_inc(dev, addr);
+ err = __ipv6_dev_ac_inc(idev, addr);
if (!err) {
- spin_lock_bh(&ipv6_sk_ac_lock);
pac->acl_next = np->ipv6_ac_list;
np->ipv6_ac_list = pac;
- spin_unlock_bh(&ipv6_sk_ac_lock);
pac = NULL;
}
error:
- rcu_read_unlock();
+ rtnl_unlock();
if (pac)
sock_kfree_s(sk, pac, sizeof(*pac));
return err;
@@ -152,7 +146,7 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
struct ipv6_ac_socklist *pac, *prev_pac;
struct net *net = sock_net(sk);
- spin_lock_bh(&ipv6_sk_ac_lock);
+ rtnl_lock();
prev_pac = NULL;
for (pac = np->ipv6_ac_list; pac; pac = pac->acl_next) {
if ((ifindex == 0 || pac->acl_ifindex == ifindex) &&
@@ -161,7 +155,7 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
prev_pac = pac;
}
if (!pac) {
- spin_unlock_bh(&ipv6_sk_ac_lock);
+ rtnl_unlock();
return -ENOENT;
}
if (prev_pac)
@@ -169,13 +163,10 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
else
np->ipv6_ac_list = pac->acl_next;
- spin_unlock_bh(&ipv6_sk_ac_lock);
-
- rcu_read_lock();
- dev = dev_get_by_index_rcu(net, pac->acl_ifindex);
+ dev = __dev_get_by_index(net, pac->acl_ifindex);
if (dev)
ipv6_dev_ac_dec(dev, &pac->acl_addr);
- rcu_read_unlock();
+ rtnl_unlock();
sock_kfree_s(sk, pac, sizeof(*pac));
return 0;
@@ -192,18 +183,16 @@ void ipv6_sock_ac_close(struct sock *sk)
if (!np->ipv6_ac_list)
return;
- spin_lock_bh(&ipv6_sk_ac_lock);
+ rtnl_lock();
pac = np->ipv6_ac_list;
np->ipv6_ac_list = NULL;
- spin_unlock_bh(&ipv6_sk_ac_lock);
prev_index = 0;
- rcu_read_lock();
while (pac) {
struct ipv6_ac_socklist *next = pac->acl_next;
if (pac->acl_ifindex != prev_index) {
- dev = dev_get_by_index_rcu(net, pac->acl_ifindex);
+ dev = __dev_get_by_index(net, pac->acl_ifindex);
prev_index = pac->acl_ifindex;
}
if (dev)
@@ -211,7 +200,12 @@ void ipv6_sock_ac_close(struct sock *sk)
sock_kfree_s(sk, pac, sizeof(*pac));
pac = next;
}
- rcu_read_unlock();
+ rtnl_unlock();
+}
+
+static void aca_get(struct ifacaddr6 *aca)
+{
+ atomic_inc(&aca->aca_refcnt);
}
static void aca_put(struct ifacaddr6 *ac)
@@ -223,20 +217,39 @@ static void aca_put(struct ifacaddr6 *ac)
}
}
+static struct ifacaddr6 *aca_alloc(struct rt6_info *rt,
+ const struct in6_addr *addr)
+{
+ struct inet6_dev *idev = rt->rt6i_idev;
+ struct ifacaddr6 *aca;
+
+ aca = kzalloc(sizeof(*aca), GFP_ATOMIC);
+ if (aca == NULL)
+ return NULL;
+
+ aca->aca_addr = *addr;
+ in6_dev_hold(idev);
+ aca->aca_idev = idev;
+ aca->aca_rt = rt;
+ aca->aca_users = 1;
+ /* aca_tstamp should be updated upon changes */
+ aca->aca_cstamp = aca->aca_tstamp = jiffies;
+ atomic_set(&aca->aca_refcnt, 1);
+ spin_lock_init(&aca->aca_lock);
+
+ return aca;
+}
+
/*
* device anycast group inc (add if not found)
*/
-int ipv6_dev_ac_inc(struct net_device *dev, const struct in6_addr *addr)
+int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr)
{
struct ifacaddr6 *aca;
- struct inet6_dev *idev;
struct rt6_info *rt;
int err;
- idev = in6_dev_get(dev);
-
- if (idev == NULL)
- return -EINVAL;
+ ASSERT_RTNL();
write_lock_bh(&idev->lock);
if (idev->dead) {
@@ -252,46 +265,35 @@ int ipv6_dev_ac_inc(struct net_device *dev, const struct in6_addr *addr)
}
}
- /*
- * not found: create a new one.
- */
-
- aca = kzalloc(sizeof(struct ifacaddr6), GFP_ATOMIC);
-
- if (aca == NULL) {
- err = -ENOMEM;
- goto out;
- }
-
rt = addrconf_dst_alloc(idev, addr, true);
if (IS_ERR(rt)) {
- kfree(aca);
err = PTR_ERR(rt);
goto out;
}
-
- aca->aca_addr = *addr;
- aca->aca_idev = idev;
- aca->aca_rt = rt;
- aca->aca_users = 1;
- /* aca_tstamp should be updated upon changes */
- aca->aca_cstamp = aca->aca_tstamp = jiffies;
- atomic_set(&aca->aca_refcnt, 2);
- spin_lock_init(&aca->aca_lock);
+ aca = aca_alloc(rt, addr);
+ if (aca == NULL) {
+ ip6_rt_put(rt);
+ err = -ENOMEM;
+ goto out;
+ }
aca->aca_next = idev->ac_list;
idev->ac_list = aca;
+
+ /* Hold this for addrconf_join_solict() below before we unlock,
+ * it is already exposed via idev->ac_list.
+ */
+ aca_get(aca);
write_unlock_bh(&idev->lock);
ip6_ins_rt(rt);
- addrconf_join_solict(dev, &aca->aca_addr);
+ addrconf_join_solict(idev->dev, &aca->aca_addr);
aca_put(aca);
return 0;
out:
write_unlock_bh(&idev->lock);
- in6_dev_put(idev);
return err;
}
@@ -302,6 +304,8 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr)
{
struct ifacaddr6 *aca, *prev_aca;
+ ASSERT_RTNL();
+
write_lock_bh(&idev->lock);
prev_aca = NULL;
for (aca = idev->ac_list; aca; aca = aca->aca_next) {
@@ -331,7 +335,7 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr)
return 0;
}
-/* called with rcu_read_lock() */
+/* called with rtnl_lock() */
static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr)
{
struct inet6_dev *idev = __in6_dev_get(dev);
@@ -341,6 +345,27 @@ static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr)
return __ipv6_dev_ac_dec(idev, addr);
}
+void ipv6_ac_destroy_dev(struct inet6_dev *idev)
+{
+ struct ifacaddr6 *aca;
+
+ write_lock_bh(&idev->lock);
+ while ((aca = idev->ac_list) != NULL) {
+ idev->ac_list = aca->aca_next;
+ write_unlock_bh(&idev->lock);
+
+ addrconf_leave_solict(idev, &aca->aca_addr);
+
+ dst_hold(&aca->aca_rt->dst);
+ ip6_del_rt(aca->aca_rt);
+
+ aca_put(aca);
+
+ write_lock_bh(&idev->lock);
+ }
+ write_unlock_bh(&idev->lock);
+}
+
/*
* check if the interface has this anycast address
* called with rcu_read_lock()
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 1844e87..2cdc383 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -332,7 +332,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
{
struct ipv6_pinfo *np = inet6_sk(sk);
struct sock_exterr_skb *serr;
- struct sk_buff *skb, *skb2;
+ struct sk_buff *skb;
DECLARE_SOCKADDR(struct sockaddr_in6 *, sin, msg->msg_name);
struct {
struct sock_extended_err ee;
@@ -342,7 +342,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
int copied;
err = -EAGAIN;
- skb = skb_dequeue(&sk->sk_error_queue);
+ skb = sock_dequeue_err_skb(sk);
if (skb == NULL)
goto out;
@@ -415,17 +415,6 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
msg->msg_flags |= MSG_ERRQUEUE;
err = copied;
- /* Reset and regenerate socket error */
- spin_lock_bh(&sk->sk_error_queue.lock);
- sk->sk_err = 0;
- if ((skb2 = skb_peek(&sk->sk_error_queue)) != NULL) {
- sk->sk_err = SKB_EXT_ERR(skb2)->ee.ee_errno;
- spin_unlock_bh(&sk->sk_error_queue.lock);
- sk->sk_error_report(sk);
- } else {
- spin_unlock_bh(&sk->sk_error_queue.lock);
- }
-
out_free_skb:
kfree_skb(skb);
out:
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 394bb82..141e1f3 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -170,11 +170,11 @@ static bool is_ineligible(const struct sk_buff *skb)
/*
* Check the ICMP output rate limit
*/
-static inline bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
- struct flowi6 *fl6)
+static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
+ struct flowi6 *fl6)
{
- struct dst_entry *dst;
struct net *net = sock_net(sk);
+ struct dst_entry *dst;
bool res = false;
/* Informational messages are not limited. */
@@ -199,16 +199,20 @@ static inline bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
} else {
struct rt6_info *rt = (struct rt6_info *)dst;
int tmo = net->ipv6.sysctl.icmpv6_time;
- struct inet_peer *peer;
/* Give more bandwidth to wider prefixes. */
if (rt->rt6i_dst.plen < 128)
tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
- peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
- res = inet_peer_xrlim_allow(peer, tmo);
- if (peer)
- inet_putpeer(peer);
+ if (icmp_global_allow()) {
+ struct inet_peer *peer;
+
+ peer = inet_getpeer_v6(net->ipv6.peers,
+ &rt->rt6i_dst.addr, 1);
+ res = inet_peer_xrlim_allow(peer, tmo);
+ if (peer)
+ inet_putpeer(peer);
+ }
}
dst_release(dst);
return res;
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index 5bcda33..9034f76 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -53,31 +53,6 @@ static int ipv6_gso_pull_exthdrs(struct sk_buff *skb, int proto)
return proto;
}
-static int ipv6_gso_send_check(struct sk_buff *skb)
-{
- const struct ipv6hdr *ipv6h;
- const struct net_offload *ops;
- int err = -EINVAL;
-
- if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h))))
- goto out;
-
- ipv6h = ipv6_hdr(skb);
- __skb_pull(skb, sizeof(*ipv6h));
- err = -EPROTONOSUPPORT;
-
- ops = rcu_dereference(inet6_offloads[
- ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr)]);
-
- if (likely(ops && ops->callbacks.gso_send_check)) {
- skb_reset_transport_header(skb);
- err = ops->callbacks.gso_send_check(skb);
- }
-
-out:
- return err;
-}
-
static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
netdev_features_t features)
{
@@ -261,6 +236,9 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head,
/* flush if Traffic Class fields are different */
NAPI_GRO_CB(p)->flush |= !!(first_word & htonl(0x0FF00000));
NAPI_GRO_CB(p)->flush |= flush;
+
+ /* Clear flush_id, there's really no concept of ID in IPv6. */
+ NAPI_GRO_CB(p)->flush_id = 0;
}
NAPI_GRO_CB(skb)->flush |= flush;
@@ -303,7 +281,6 @@ out_unlock:
static struct packet_offload ipv6_packet_offload __read_mostly = {
.type = cpu_to_be16(ETH_P_IPV6),
.callbacks = {
- .gso_send_check = ipv6_gso_send_check,
.gso_segment = ipv6_gso_segment,
.gro_receive = ipv6_gro_receive,
.gro_complete = ipv6_gro_complete,
@@ -312,8 +289,9 @@ static struct packet_offload ipv6_packet_offload __read_mostly = {
static const struct net_offload sit_offload = {
.callbacks = {
- .gso_send_check = ipv6_gso_send_check,
.gso_segment = ipv6_gso_segment,
+ .gro_receive = ipv6_gro_receive,
+ .gro_complete = ipv6_gro_complete,
},
};
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index b7a3e7b..8e950c2 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -701,11 +701,7 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
return 0;
}
- while (frag) {
- skb = frag->next;
- kfree_skb(frag);
- frag = skb;
- }
+ kfree_skb_list(frag);
IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
IPSTATS_MIB_FRAGFAILS);
@@ -1008,7 +1004,7 @@ struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
if (final_dst)
fl6->daddr = *final_dst;
- return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
+ return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
}
EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
@@ -1040,7 +1036,7 @@ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
if (final_dst)
fl6->daddr = *final_dst;
- return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
+ return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
}
EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c
new file mode 100644
index 0000000..b04ed72
--- /dev/null
+++ b/net/ipv6/ip6_udp_tunnel.c
@@ -0,0 +1,107 @@
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/socket.h>
+#include <linux/udp.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/in6.h>
+#include <net/udp.h>
+#include <net/udp_tunnel.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+#include <net/ip6_tunnel.h>
+#include <net/ip6_checksum.h>
+
+int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg,
+ struct socket **sockp)
+{
+ struct sockaddr_in6 udp6_addr;
+ int err;
+ struct socket *sock = NULL;
+
+ err = sock_create_kern(AF_INET6, SOCK_DGRAM, 0, &sock);
+ if (err < 0)
+ goto error;
+
+ sk_change_net(sock->sk, net);
+
+ udp6_addr.sin6_family = AF_INET6;
+ memcpy(&udp6_addr.sin6_addr, &cfg->local_ip6,
+ sizeof(udp6_addr.sin6_addr));
+ udp6_addr.sin6_port = cfg->local_udp_port;
+ err = kernel_bind(sock, (struct sockaddr *)&udp6_addr,
+ sizeof(udp6_addr));
+ if (err < 0)
+ goto error;
+
+ if (cfg->peer_udp_port) {
+ udp6_addr.sin6_family = AF_INET6;
+ memcpy(&udp6_addr.sin6_addr, &cfg->peer_ip6,
+ sizeof(udp6_addr.sin6_addr));
+ udp6_addr.sin6_port = cfg->peer_udp_port;
+ err = kernel_connect(sock,
+ (struct sockaddr *)&udp6_addr,
+ sizeof(udp6_addr), 0);
+ }
+ if (err < 0)
+ goto error;
+
+ udp_set_no_check6_tx(sock->sk, !cfg->use_udp6_tx_checksums);
+ udp_set_no_check6_rx(sock->sk, !cfg->use_udp6_rx_checksums);
+
+ *sockp = sock;
+ return 0;
+
+error:
+ if (sock) {
+ kernel_sock_shutdown(sock, SHUT_RDWR);
+ sk_release_kernel(sock->sk);
+ }
+ *sockp = NULL;
+ return err;
+}
+EXPORT_SYMBOL_GPL(udp_sock_create6);
+
+int udp_tunnel6_xmit_skb(struct socket *sock, struct dst_entry *dst,
+ struct sk_buff *skb, struct net_device *dev,
+ struct in6_addr *saddr, struct in6_addr *daddr,
+ __u8 prio, __u8 ttl, __be16 src_port, __be16 dst_port)
+{
+ struct udphdr *uh;
+ struct ipv6hdr *ip6h;
+ struct sock *sk = sock->sk;
+
+ __skb_push(skb, sizeof(*uh));
+ skb_reset_transport_header(skb);
+ uh = udp_hdr(skb);
+
+ uh->dest = dst_port;
+ uh->source = src_port;
+
+ uh->len = htons(skb->len);
+ uh->check = 0;
+
+ memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+ IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED
+ | IPSKB_REROUTED);
+ skb_dst_set(skb, dst);
+
+ udp6_set_csum(udp_get_no_check6_tx(sk), skb, &inet6_sk(sk)->saddr,
+ &sk->sk_v6_daddr, skb->len);
+
+ __skb_push(skb, sizeof(*ip6h));
+ skb_reset_network_header(skb);
+ ip6h = ipv6_hdr(skb);
+ ip6_flow_hdr(ip6h, prio, htonl(0));
+ ip6h->payload_len = htons(skb->len);
+ ip6h->nexthdr = IPPROTO_UDP;
+ ip6h->hop_limit = ttl;
+ ip6h->daddr = *daddr;
+ ip6h->saddr = *saddr;
+
+ ip6tunnel_xmit(skb, dev);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(udp_tunnel6_xmit_skb);
+
+MODULE_LICENSE("GPL");
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 7088179..9648de2 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -64,15 +64,6 @@
#include <net/ip6_checksum.h>
-/* Set to 3 to get tracing... */
-#define MCAST_DEBUG 2
-
-#if MCAST_DEBUG >= 3
-#define MDBG(x) printk x
-#else
-#define MDBG(x)
-#endif
-
/* Ensure that we have struct in6_addr aligned on 32bit word. */
static void *__mld2_query_bugs[] __attribute__((__unused__)) = {
BUILD_BUG_ON_NULL(offsetof(struct mld2_query, mld2q_srcs) % 4),
@@ -82,9 +73,6 @@ static void *__mld2_query_bugs[] __attribute__((__unused__)) = {
static struct in6_addr mld2_all_mcr = MLD2_ALL_MCR_INIT;
-/* Big mc list lock for all the sockets */
-static DEFINE_SPINLOCK(ipv6_sk_mc_lock);
-
static void igmp6_join_group(struct ifmcaddr6 *ma);
static void igmp6_leave_group(struct ifmcaddr6 *ma);
static void igmp6_timer_handler(unsigned long data);
@@ -121,6 +109,7 @@ static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml,
#define IPV6_MLD_MAX_MSF 64
int sysctl_mld_max_msf __read_mostly = IPV6_MLD_MAX_MSF;
+int sysctl_mld_qrv __read_mostly = MLD_QRV_DEFAULT;
/*
* socket join on multicast group
@@ -172,7 +161,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
mc_lst->next = NULL;
mc_lst->addr = *addr;
- rcu_read_lock();
+ rtnl_lock();
if (ifindex == 0) {
struct rt6_info *rt;
rt = rt6_lookup(net, addr, NULL, 0, 0);
@@ -181,10 +170,10 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
ip6_rt_put(rt);
}
} else
- dev = dev_get_by_index_rcu(net, ifindex);
+ dev = __dev_get_by_index(net, ifindex);
if (dev == NULL) {
- rcu_read_unlock();
+ rtnl_unlock();
sock_kfree_s(sk, mc_lst, sizeof(*mc_lst));
return -ENODEV;
}
@@ -201,17 +190,15 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
err = ipv6_dev_mc_inc(dev, addr);
if (err) {
- rcu_read_unlock();
+ rtnl_unlock();
sock_kfree_s(sk, mc_lst, sizeof(*mc_lst));
return err;
}
- spin_lock(&ipv6_sk_mc_lock);
mc_lst->next = np->ipv6_mc_list;
rcu_assign_pointer(np->ipv6_mc_list, mc_lst);
- spin_unlock(&ipv6_sk_mc_lock);
- rcu_read_unlock();
+ rtnl_unlock();
return 0;
}
@@ -229,20 +216,17 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
if (!ipv6_addr_is_multicast(addr))
return -EINVAL;
- spin_lock(&ipv6_sk_mc_lock);
+ rtnl_lock();
for (lnk = &np->ipv6_mc_list;
- (mc_lst = rcu_dereference_protected(*lnk,
- lockdep_is_held(&ipv6_sk_mc_lock))) != NULL;
+ (mc_lst = rtnl_dereference(*lnk)) != NULL;
lnk = &mc_lst->next) {
if ((ifindex == 0 || mc_lst->ifindex == ifindex) &&
ipv6_addr_equal(&mc_lst->addr, addr)) {
struct net_device *dev;
*lnk = mc_lst->next;
- spin_unlock(&ipv6_sk_mc_lock);
- rcu_read_lock();
- dev = dev_get_by_index_rcu(net, mc_lst->ifindex);
+ dev = __dev_get_by_index(net, mc_lst->ifindex);
if (dev != NULL) {
struct inet6_dev *idev = __in6_dev_get(dev);
@@ -251,13 +235,14 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
__ipv6_dev_mc_dec(idev, &mc_lst->addr);
} else
(void) ip6_mc_leave_src(sk, mc_lst, NULL);
- rcu_read_unlock();
+ rtnl_unlock();
+
atomic_sub(sizeof(*mc_lst), &sk->sk_omem_alloc);
kfree_rcu(mc_lst, rcu);
return 0;
}
}
- spin_unlock(&ipv6_sk_mc_lock);
+ rtnl_unlock();
return -EADDRNOTAVAIL;
}
@@ -302,16 +287,13 @@ void ipv6_sock_mc_close(struct sock *sk)
if (!rcu_access_pointer(np->ipv6_mc_list))
return;
- spin_lock(&ipv6_sk_mc_lock);
- while ((mc_lst = rcu_dereference_protected(np->ipv6_mc_list,
- lockdep_is_held(&ipv6_sk_mc_lock))) != NULL) {
+ rtnl_lock();
+ while ((mc_lst = rtnl_dereference(np->ipv6_mc_list)) != NULL) {
struct net_device *dev;
np->ipv6_mc_list = mc_lst->next;
- spin_unlock(&ipv6_sk_mc_lock);
- rcu_read_lock();
- dev = dev_get_by_index_rcu(net, mc_lst->ifindex);
+ dev = __dev_get_by_index(net, mc_lst->ifindex);
if (dev) {
struct inet6_dev *idev = __in6_dev_get(dev);
@@ -320,14 +302,12 @@ void ipv6_sock_mc_close(struct sock *sk)
__ipv6_dev_mc_dec(idev, &mc_lst->addr);
} else
(void) ip6_mc_leave_src(sk, mc_lst, NULL);
- rcu_read_unlock();
atomic_sub(sizeof(*mc_lst), &sk->sk_omem_alloc);
kfree_rcu(mc_lst, rcu);
- spin_lock(&ipv6_sk_mc_lock);
}
- spin_unlock(&ipv6_sk_mc_lock);
+ rtnl_unlock();
}
int ip6_mc_source(int add, int omode, struct sock *sk,
@@ -576,9 +556,8 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
}
err = -EADDRNOTAVAIL;
- /*
- * changes to the ipv6_mc_list require the socket lock and
- * a read lock on ip6_sk_mc_lock. We have the socket lock,
+ /* changes to the ipv6_mc_list require the socket lock and
+ * rtnl lock. We have the socket lock and rcu read lock,
* so reading the list is safe.
*/
@@ -602,9 +581,8 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
copy_to_user(optval, gsf, GROUP_FILTER_SIZE(0))) {
return -EFAULT;
}
- /* changes to psl require the socket lock, a read lock on
- * on ipv6_sk_mc_lock and a write lock on pmc->sflock. We
- * have the socket lock, so reading here is safe.
+ /* changes to psl require the socket lock, and a write lock
+ * on pmc->sflock. We have the socket lock so reading here is safe.
*/
for (i = 0; i < copycount; i++) {
struct sockaddr_in6 *psin6;
@@ -663,14 +641,6 @@ bool inet6_mc_check(struct sock *sk, const struct in6_addr *mc_addr,
return rv;
}
-static void ma_put(struct ifmcaddr6 *mc)
-{
- if (atomic_dec_and_test(&mc->mca_refcnt)) {
- in6_dev_put(mc->idev);
- kfree(mc);
- }
-}
-
static void igmp6_group_added(struct ifmcaddr6 *mc)
{
struct net_device *dev = mc->idev->dev;
@@ -836,6 +806,48 @@ static void mld_clear_delrec(struct inet6_dev *idev)
read_unlock_bh(&idev->lock);
}
+static void mca_get(struct ifmcaddr6 *mc)
+{
+ atomic_inc(&mc->mca_refcnt);
+}
+
+static void ma_put(struct ifmcaddr6 *mc)
+{
+ if (atomic_dec_and_test(&mc->mca_refcnt)) {
+ in6_dev_put(mc->idev);
+ kfree(mc);
+ }
+}
+
+static struct ifmcaddr6 *mca_alloc(struct inet6_dev *idev,
+ const struct in6_addr *addr)
+{
+ struct ifmcaddr6 *mc;
+
+ mc = kzalloc(sizeof(*mc), GFP_ATOMIC);
+ if (mc == NULL)
+ return NULL;
+
+ setup_timer(&mc->mca_timer, igmp6_timer_handler, (unsigned long)mc);
+
+ mc->mca_addr = *addr;
+ mc->idev = idev; /* reference taken by caller */
+ mc->mca_users = 1;
+ /* mca_stamp should be updated upon changes */
+ mc->mca_cstamp = mc->mca_tstamp = jiffies;
+ atomic_set(&mc->mca_refcnt, 1);
+ spin_lock_init(&mc->mca_lock);
+
+ /* initial mode is (EX, empty) */
+ mc->mca_sfmode = MCAST_EXCLUDE;
+ mc->mca_sfcount[MCAST_EXCLUDE] = 1;
+
+ if (ipv6_addr_is_ll_all_nodes(&mc->mca_addr) ||
+ IPV6_ADDR_MC_SCOPE(&mc->mca_addr) < IPV6_ADDR_SCOPE_LINKLOCAL)
+ mc->mca_flags |= MAF_NOREPORT;
+
+ return mc;
+}
/*
* device multicast group inc (add if not found)
@@ -845,6 +857,8 @@ int ipv6_dev_mc_inc(struct net_device *dev, const struct in6_addr *addr)
struct ifmcaddr6 *mc;
struct inet6_dev *idev;
+ ASSERT_RTNL();
+
/* we need to take a reference on idev */
idev = in6_dev_get(dev);
@@ -869,38 +883,20 @@ int ipv6_dev_mc_inc(struct net_device *dev, const struct in6_addr *addr)
}
}
- /*
- * not found: create a new one.
- */
-
- mc = kzalloc(sizeof(struct ifmcaddr6), GFP_ATOMIC);
-
- if (mc == NULL) {
+ mc = mca_alloc(idev, addr);
+ if (!mc) {
write_unlock_bh(&idev->lock);
in6_dev_put(idev);
return -ENOMEM;
}
- setup_timer(&mc->mca_timer, igmp6_timer_handler, (unsigned long)mc);
-
- mc->mca_addr = *addr;
- mc->idev = idev; /* (reference taken) */
- mc->mca_users = 1;
- /* mca_stamp should be updated upon changes */
- mc->mca_cstamp = mc->mca_tstamp = jiffies;
- atomic_set(&mc->mca_refcnt, 2);
- spin_lock_init(&mc->mca_lock);
-
- /* initial mode is (EX, empty) */
- mc->mca_sfmode = MCAST_EXCLUDE;
- mc->mca_sfcount[MCAST_EXCLUDE] = 1;
-
- if (ipv6_addr_is_ll_all_nodes(&mc->mca_addr) ||
- IPV6_ADDR_MC_SCOPE(&mc->mca_addr) < IPV6_ADDR_SCOPE_LINKLOCAL)
- mc->mca_flags |= MAF_NOREPORT;
-
mc->next = idev->mc_list;
idev->mc_list = mc;
+
+ /* Hold this for the code below before we unlock,
+ * it is already exposed via idev->mc_list.
+ */
+ mca_get(mc);
write_unlock_bh(&idev->lock);
mld_del_delrec(idev, &mc->mca_addr);
@@ -916,6 +912,8 @@ int __ipv6_dev_mc_dec(struct inet6_dev *idev, const struct in6_addr *addr)
{
struct ifmcaddr6 *ma, **map;
+ ASSERT_RTNL();
+
write_lock_bh(&idev->lock);
for (map = &idev->mc_list; (ma = *map) != NULL; map = &ma->next) {
if (ipv6_addr_equal(&ma->mca_addr, addr)) {
@@ -942,7 +940,7 @@ int ipv6_dev_mc_dec(struct net_device *dev, const struct in6_addr *addr)
struct inet6_dev *idev;
int err;
- rcu_read_lock();
+ ASSERT_RTNL();
idev = __in6_dev_get(dev);
if (!idev)
@@ -950,7 +948,6 @@ int ipv6_dev_mc_dec(struct net_device *dev, const struct in6_addr *addr)
else
err = __ipv6_dev_mc_dec(idev, addr);
- rcu_read_unlock();
return err;
}
@@ -1191,15 +1188,16 @@ static void mld_update_qrv(struct inet6_dev *idev,
* and SHOULD NOT be one. Catch this here if we ever run
* into such a case in future.
*/
+ const int min_qrv = min(MLD_QRV_DEFAULT, sysctl_mld_qrv);
WARN_ON(idev->mc_qrv == 0);
if (mlh2->mld2q_qrv > 0)
idev->mc_qrv = mlh2->mld2q_qrv;
- if (unlikely(idev->mc_qrv < 2)) {
+ if (unlikely(idev->mc_qrv < min_qrv)) {
net_warn_ratelimited("IPv6: MLD: clamping QRV from %u to %u!\n",
- idev->mc_qrv, MLD_QRV_DEFAULT);
- idev->mc_qrv = MLD_QRV_DEFAULT;
+ idev->mc_qrv, min_qrv);
+ idev->mc_qrv = min_qrv;
}
}
@@ -1239,7 +1237,7 @@ static void mld_update_qri(struct inet6_dev *idev,
}
static int mld_process_v1(struct inet6_dev *idev, struct mld_msg *mld,
- unsigned long *max_delay)
+ unsigned long *max_delay, bool v1_query)
{
unsigned long mldv1_md;
@@ -1247,11 +1245,32 @@ static int mld_process_v1(struct inet6_dev *idev, struct mld_msg *mld,
if (mld_in_v2_mode_only(idev))
return -EINVAL;
- /* MLDv1 router present */
mldv1_md = ntohs(mld->mld_maxdelay);
+
+ /* When in MLDv1 fallback and a MLDv2 router start-up being
+ * unaware of current MLDv1 operation, the MRC == MRD mapping
+ * only works when the exponential algorithm is not being
+ * used (as MLDv1 is unaware of such things).
+ *
+ * According to the RFC author, the MLDv2 implementations
+ * he's aware of all use a MRC < 32768 on start up queries.
+ *
+ * Thus, should we *ever* encounter something else larger
+ * than that, just assume the maximum possible within our
+ * reach.
+ */
+ if (!v1_query)
+ mldv1_md = min(mldv1_md, MLDV1_MRD_MAX_COMPAT);
+
*max_delay = max(msecs_to_jiffies(mldv1_md), 1UL);
- mld_set_v1_mode(idev);
+ /* MLDv1 router present: we need to go into v1 mode *only*
+ * when an MLDv1 query is received as per section 9.12. of
+ * RFC3810! And we know from RFC2710 section 3.7 that MLDv1
+ * queries MUST be of exactly 24 octets.
+ */
+ if (v1_query)
+ mld_set_v1_mode(idev);
/* cancel MLDv2 report timer */
mld_gq_stop_timer(idev);
@@ -1266,10 +1285,6 @@ static int mld_process_v1(struct inet6_dev *idev, struct mld_msg *mld,
static int mld_process_v2(struct inet6_dev *idev, struct mld2_query *mld,
unsigned long *max_delay)
{
- /* hosts need to stay in MLDv1 mode, discard MLDv2 queries */
- if (mld_in_v1_mode(idev))
- return -EINVAL;
-
*max_delay = max(msecs_to_jiffies(mldv2_mrc(mld)), 1UL);
mld_update_qrv(idev, mld);
@@ -1326,8 +1341,11 @@ int igmp6_event_query(struct sk_buff *skb)
!(group_type&IPV6_ADDR_MULTICAST))
return -EINVAL;
- if (len == MLD_V1_QUERY_LEN) {
- err = mld_process_v1(idev, mld, &max_delay);
+ if (len < MLD_V1_QUERY_LEN) {
+ return -EINVAL;
+ } else if (len == MLD_V1_QUERY_LEN || mld_in_v1_mode(idev)) {
+ err = mld_process_v1(idev, mld, &max_delay,
+ len == MLD_V1_QUERY_LEN);
if (err < 0)
return err;
} else if (len >= MLD_V2_QUERY_LEN_MIN) {
@@ -1359,8 +1377,9 @@ int igmp6_event_query(struct sk_buff *skb)
mlh2 = (struct mld2_query *)skb_transport_header(skb);
mark = 1;
}
- } else
+ } else {
return -EINVAL;
+ }
read_lock_bh(&idev->lock);
if (group_type == IPV6_ADDR_ANY) {
@@ -2366,7 +2385,7 @@ static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml,
{
int err;
- /* callers have the socket lock and a write lock on ipv6_sk_mc_lock,
+ /* callers have the socket lock and rtnl lock
* so no other readers or writers of iml or its sflist
*/
if (!iml->sflist) {
@@ -2478,6 +2497,14 @@ void ipv6_mc_down(struct inet6_dev *idev)
mld_clear_delrec(idev);
}
+static void ipv6_mc_reset(struct inet6_dev *idev)
+{
+ idev->mc_qrv = sysctl_mld_qrv;
+ idev->mc_qi = MLD_QI_DEFAULT;
+ idev->mc_qri = MLD_QRI_DEFAULT;
+ idev->mc_v1_seen = 0;
+ idev->mc_maxdelay = unsolicited_report_interval(idev);
+}
/* Device going up */
@@ -2488,6 +2515,7 @@ void ipv6_mc_up(struct inet6_dev *idev)
/* Install multicast list, except for all-nodes (already installed) */
read_lock_bh(&idev->lock);
+ ipv6_mc_reset(idev);
for (i = idev->mc_list; i; i = i->next)
igmp6_group_added(i);
read_unlock_bh(&idev->lock);
@@ -2508,13 +2536,7 @@ void ipv6_mc_init_dev(struct inet6_dev *idev)
(unsigned long)idev);
setup_timer(&idev->mc_dad_timer, mld_dad_timer_expire,
(unsigned long)idev);
-
- idev->mc_qrv = MLD_QRV_DEFAULT;
- idev->mc_qi = MLD_QI_DEFAULT;
- idev->mc_qri = MLD_QRI_DEFAULT;
-
- idev->mc_maxdelay = unsolicited_report_interval(idev);
- idev->mc_v1_seen = 0;
+ ipv6_mc_reset(idev);
write_unlock_bh(&idev->lock);
}
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index ac93df1..a8f2530 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -57,9 +57,19 @@ config NFT_REJECT_IPV6
config NF_LOG_IPV6
tristate "IPv6 packet logging"
- depends on NETFILTER_ADVANCED
+ default m if NETFILTER_ADVANCED=n
select NF_LOG_COMMON
+config NF_NAT_IPV6
+ tristate "IPv6 NAT"
+ depends on NF_CONNTRACK_IPV6
+ depends on NETFILTER_ADVANCED
+ select NF_NAT
+ help
+ The IPv6 NAT option allows masquerading, port forwarding and other
+ forms of full Network Address Port Translation. This can be
+ controlled by iptables or nft.
+
config IP6_NF_IPTABLES
tristate "IP6 tables support (required for filtering)"
depends on INET && IPV6
@@ -232,22 +242,37 @@ config IP6_NF_SECURITY
If unsure, say N.
-config NF_NAT_IPV6
- tristate "IPv6 NAT"
+config IP6_NF_NAT
+ tristate "ip6tables NAT support"
depends on NF_CONNTRACK_IPV6
depends on NETFILTER_ADVANCED
select NF_NAT
+ select NF_NAT_IPV6
+ select NETFILTER_XT_NAT
help
- The IPv6 NAT option allows masquerading, port forwarding and other
- forms of full Network Address Port Translation. It is controlled by
- the `nat' table in ip6tables, see the man page for ip6tables(8).
+ This enables the `nat' table in ip6tables. This allows masquerading,
+ port forwarding and other forms of full Network Address Port
+ Translation.
To compile it as a module, choose M here. If unsure, say N.
-if NF_NAT_IPV6
+if IP6_NF_NAT
+
+config NF_NAT_MASQUERADE_IPV6
+ tristate "IPv6 masquerade support"
+ help
+ This is the kernel functionality to provide NAT in the masquerade
+ flavour (automatic source address selection) for IPv6.
+
+config NFT_MASQ_IPV6
+ tristate "IPv6 masquerade support for nf_tables"
+ depends on NF_TABLES_IPV6
+ depends on NFT_MASQ
+ select NF_NAT_MASQUERADE_IPV6
config IP6_NF_TARGET_MASQUERADE
tristate "MASQUERADE target support"
+ select NF_NAT_MASQUERADE_IPV6
help
Masquerading is a special case of NAT: all outgoing connections are
changed to seem to come from a particular interface's address, and
@@ -265,7 +290,7 @@ config IP6_NF_TARGET_NPT
To compile it as a module, choose M here. If unsure, say N.
-endif # NF_NAT_IPV6
+endif # IP6_NF_NAT
endif # IP6_NF_IPTABLES
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index c0b2631..0f7e5b3 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -8,7 +8,7 @@ obj-$(CONFIG_IP6_NF_FILTER) += ip6table_filter.o
obj-$(CONFIG_IP6_NF_MANGLE) += ip6table_mangle.o
obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o
obj-$(CONFIG_IP6_NF_SECURITY) += ip6table_security.o
-obj-$(CONFIG_NF_NAT_IPV6) += ip6table_nat.o
+obj-$(CONFIG_IP6_NF_NAT) += ip6table_nat.o
# objects for l3 independent conntrack
nf_conntrack_ipv6-y := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o
@@ -18,6 +18,7 @@ obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o
nf_nat_ipv6-y := nf_nat_l3proto_ipv6.o nf_nat_proto_icmpv6.o
obj-$(CONFIG_NF_NAT_IPV6) += nf_nat_ipv6.o
+obj-$(CONFIG_NF_NAT_MASQUERADE_IPV6) += nf_nat_masquerade_ipv6.o
# defrag
nf_defrag_ipv6-y := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o
@@ -31,6 +32,7 @@ obj-$(CONFIG_NF_TABLES_IPV6) += nf_tables_ipv6.o
obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV6) += nft_chain_route_ipv6.o
obj-$(CONFIG_NFT_CHAIN_NAT_IPV6) += nft_chain_nat_ipv6.o
obj-$(CONFIG_NFT_REJECT_IPV6) += nft_reject_ipv6.o
+obj-$(CONFIG_NFT_MASQ_IPV6) += nft_masq_ipv6.o
# matches
obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o
diff --git a/net/ipv6/netfilter/ip6t_MASQUERADE.c b/net/ipv6/netfilter/ip6t_MASQUERADE.c
index 3e4e92d..7f9f45d 100644
--- a/net/ipv6/netfilter/ip6t_MASQUERADE.c
+++ b/net/ipv6/netfilter/ip6t_MASQUERADE.c
@@ -19,33 +19,12 @@
#include <net/netfilter/nf_nat.h>
#include <net/addrconf.h>
#include <net/ipv6.h>
+#include <net/netfilter/ipv6/nf_nat_masquerade.h>
static unsigned int
masquerade_tg6(struct sk_buff *skb, const struct xt_action_param *par)
{
- const struct nf_nat_range *range = par->targinfo;
- enum ip_conntrack_info ctinfo;
- struct in6_addr src;
- struct nf_conn *ct;
- struct nf_nat_range newrange;
-
- ct = nf_ct_get(skb, &ctinfo);
- NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
- ctinfo == IP_CT_RELATED_REPLY));
-
- if (ipv6_dev_get_saddr(dev_net(par->out), par->out,
- &ipv6_hdr(skb)->daddr, 0, &src) < 0)
- return NF_DROP;
-
- nfct_nat(ct)->masq_index = par->out->ifindex;
-
- newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS;
- newrange.min_addr.in6 = src;
- newrange.max_addr.in6 = src;
- newrange.min_proto = range->min_proto;
- newrange.max_proto = range->max_proto;
-
- return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC);
+ return nf_nat_masquerade_ipv6(skb, par->targinfo, par->out);
}
static int masquerade_tg6_checkentry(const struct xt_tgchk_param *par)
@@ -57,48 +36,6 @@ static int masquerade_tg6_checkentry(const struct xt_tgchk_param *par)
return 0;
}
-static int device_cmp(struct nf_conn *ct, void *ifindex)
-{
- const struct nf_conn_nat *nat = nfct_nat(ct);
-
- if (!nat)
- return 0;
- if (nf_ct_l3num(ct) != NFPROTO_IPV6)
- return 0;
- return nat->masq_index == (int)(long)ifindex;
-}
-
-static int masq_device_event(struct notifier_block *this,
- unsigned long event, void *ptr)
-{
- const struct net_device *dev = netdev_notifier_info_to_dev(ptr);
- struct net *net = dev_net(dev);
-
- if (event == NETDEV_DOWN)
- nf_ct_iterate_cleanup(net, device_cmp,
- (void *)(long)dev->ifindex, 0, 0);
-
- return NOTIFY_DONE;
-}
-
-static struct notifier_block masq_dev_notifier = {
- .notifier_call = masq_device_event,
-};
-
-static int masq_inet_event(struct notifier_block *this,
- unsigned long event, void *ptr)
-{
- struct inet6_ifaddr *ifa = ptr;
- struct netdev_notifier_info info;
-
- netdev_notifier_info_init(&info, ifa->idev->dev);
- return masq_device_event(this, event, &info);
-}
-
-static struct notifier_block masq_inet_notifier = {
- .notifier_call = masq_inet_event,
-};
-
static struct xt_target masquerade_tg6_reg __read_mostly = {
.name = "MASQUERADE",
.family = NFPROTO_IPV6,
@@ -115,17 +52,14 @@ static int __init masquerade_tg6_init(void)
int err;
err = xt_register_target(&masquerade_tg6_reg);
- if (err == 0) {
- register_netdevice_notifier(&masq_dev_notifier);
- register_inet6addr_notifier(&masq_inet_notifier);
- }
+ if (err == 0)
+ nf_nat_masquerade_ipv6_register_notifier();
return err;
}
static void __exit masquerade_tg6_exit(void)
{
- unregister_inet6addr_notifier(&masq_inet_notifier);
- unregister_netdevice_notifier(&masq_dev_notifier);
+ nf_nat_masquerade_ipv6_unregister_notifier();
xt_unregister_target(&masquerade_tg6_reg);
}
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
index 387d8b8..b0634ac 100644
--- a/net/ipv6/netfilter/ip6table_nat.c
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -30,222 +30,57 @@ static const struct xt_table nf_nat_ipv6_table = {
.af = NFPROTO_IPV6,
};
-static unsigned int alloc_null_binding(struct nf_conn *ct, unsigned int hooknum)
-{
- /* Force range to this IP; let proto decide mapping for
- * per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED).
- */
- struct nf_nat_range range;
-
- range.flags = 0;
- pr_debug("Allocating NULL binding for %p (%pI6)\n", ct,
- HOOK2MANIP(hooknum) == NF_NAT_MANIP_SRC ?
- &ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip6 :
- &ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip6);
-
- return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum));
-}
-
-static unsigned int nf_nat_rule_find(struct sk_buff *skb, unsigned int hooknum,
- const struct net_device *in,
- const struct net_device *out,
- struct nf_conn *ct)
+static unsigned int ip6table_nat_do_chain(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ struct nf_conn *ct)
{
struct net *net = nf_ct_net(ct);
- unsigned int ret;
- ret = ip6t_do_table(skb, hooknum, in, out, net->ipv6.ip6table_nat);
- if (ret == NF_ACCEPT) {
- if (!nf_nat_initialized(ct, HOOK2MANIP(hooknum)))
- ret = alloc_null_binding(ct, hooknum);
- }
- return ret;
+ return ip6t_do_table(skb, ops->hooknum, in, out, net->ipv6.ip6table_nat);
}
-static unsigned int
-nf_nat_ipv6_fn(const struct nf_hook_ops *ops,
- struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
+static unsigned int ip6table_nat_fn(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
{
- struct nf_conn *ct;
- enum ip_conntrack_info ctinfo;
- struct nf_conn_nat *nat;
- enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum);
- __be16 frag_off;
- int hdrlen;
- u8 nexthdr;
-
- ct = nf_ct_get(skb, &ctinfo);
- /* Can't track? It's not due to stress, or conntrack would
- * have dropped it. Hence it's the user's responsibilty to
- * packet filter it out, or implement conntrack/NAT for that
- * protocol. 8) --RR
- */
- if (!ct)
- return NF_ACCEPT;
-
- /* Don't try to NAT if this packet is not conntracked */
- if (nf_ct_is_untracked(ct))
- return NF_ACCEPT;
-
- nat = nf_ct_nat_ext_add(ct);
- if (nat == NULL)
- return NF_ACCEPT;
-
- switch (ctinfo) {
- case IP_CT_RELATED:
- case IP_CT_RELATED_REPLY:
- nexthdr = ipv6_hdr(skb)->nexthdr;
- hdrlen = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
- &nexthdr, &frag_off);
-
- if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) {
- if (!nf_nat_icmpv6_reply_translation(skb, ct, ctinfo,
- ops->hooknum,
- hdrlen))
- return NF_DROP;
- else
- return NF_ACCEPT;
- }
- /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */
- case IP_CT_NEW:
- /* Seen it before? This can happen for loopback, retrans,
- * or local packets.
- */
- if (!nf_nat_initialized(ct, maniptype)) {
- unsigned int ret;
-
- ret = nf_nat_rule_find(skb, ops->hooknum, in, out, ct);
- if (ret != NF_ACCEPT)
- return ret;
- } else {
- pr_debug("Already setup manip %s for ct %p\n",
- maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST",
- ct);
- if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out))
- goto oif_changed;
- }
- break;
-
- default:
- /* ESTABLISHED */
- NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
- ctinfo == IP_CT_ESTABLISHED_REPLY);
- if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out))
- goto oif_changed;
- }
-
- return nf_nat_packet(ct, ctinfo, ops->hooknum, skb);
-
-oif_changed:
- nf_ct_kill_acct(ct, ctinfo, skb);
- return NF_DROP;
+ return nf_nat_ipv6_fn(ops, skb, in, out, ip6table_nat_do_chain);
}
-static unsigned int
-nf_nat_ipv6_in(const struct nf_hook_ops *ops,
- struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
+static unsigned int ip6table_nat_in(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
{
- unsigned int ret;
- struct in6_addr daddr = ipv6_hdr(skb)->daddr;
-
- ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn);
- if (ret != NF_DROP && ret != NF_STOLEN &&
- ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr))
- skb_dst_drop(skb);
-
- return ret;
+ return nf_nat_ipv6_in(ops, skb, in, out, ip6table_nat_do_chain);
}
-static unsigned int
-nf_nat_ipv6_out(const struct nf_hook_ops *ops,
- struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
+static unsigned int ip6table_nat_out(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
{
-#ifdef CONFIG_XFRM
- const struct nf_conn *ct;
- enum ip_conntrack_info ctinfo;
- int err;
-#endif
- unsigned int ret;
-
- /* root is playing with raw sockets. */
- if (skb->len < sizeof(struct ipv6hdr))
- return NF_ACCEPT;
-
- ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn);
-#ifdef CONFIG_XFRM
- if (ret != NF_DROP && ret != NF_STOLEN &&
- !(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
- (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
- enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
-
- if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3,
- &ct->tuplehash[!dir].tuple.dst.u3) ||
- (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 &&
- ct->tuplehash[dir].tuple.src.u.all !=
- ct->tuplehash[!dir].tuple.dst.u.all)) {
- err = nf_xfrm_me_harder(skb, AF_INET6);
- if (err < 0)
- ret = NF_DROP_ERR(err);
- }
- }
-#endif
- return ret;
+ return nf_nat_ipv6_out(ops, skb, in, out, ip6table_nat_do_chain);
}
-static unsigned int
-nf_nat_ipv6_local_fn(const struct nf_hook_ops *ops,
- struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
+static unsigned int ip6table_nat_local_fn(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
{
- const struct nf_conn *ct;
- enum ip_conntrack_info ctinfo;
- unsigned int ret;
- int err;
-
- /* root is playing with raw sockets. */
- if (skb->len < sizeof(struct ipv6hdr))
- return NF_ACCEPT;
-
- ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn);
- if (ret != NF_DROP && ret != NF_STOLEN &&
- (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
- enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
-
- if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3,
- &ct->tuplehash[!dir].tuple.src.u3)) {
- err = ip6_route_me_harder(skb);
- if (err < 0)
- ret = NF_DROP_ERR(err);
- }
-#ifdef CONFIG_XFRM
- else if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
- ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 &&
- ct->tuplehash[dir].tuple.dst.u.all !=
- ct->tuplehash[!dir].tuple.src.u.all) {
- err = nf_xfrm_me_harder(skb, AF_INET6);
- if (err < 0)
- ret = NF_DROP_ERR(err);
- }
-#endif
- }
- return ret;
+ return nf_nat_ipv6_local_fn(ops, skb, in, out, ip6table_nat_do_chain);
}
static struct nf_hook_ops nf_nat_ipv6_ops[] __read_mostly = {
/* Before packet filtering, change destination */
{
- .hook = nf_nat_ipv6_in,
+ .hook = ip6table_nat_in,
.owner = THIS_MODULE,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_PRE_ROUTING,
@@ -253,7 +88,7 @@ static struct nf_hook_ops nf_nat_ipv6_ops[] __read_mostly = {
},
/* After packet filtering, change source */
{
- .hook = nf_nat_ipv6_out,
+ .hook = ip6table_nat_out,
.owner = THIS_MODULE,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_POST_ROUTING,
@@ -261,7 +96,7 @@ static struct nf_hook_ops nf_nat_ipv6_ops[] __read_mostly = {
},
/* Before packet filtering, change destination */
{
- .hook = nf_nat_ipv6_local_fn,
+ .hook = ip6table_nat_local_fn,
.owner = THIS_MODULE,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_LOCAL_OUT,
@@ -269,7 +104,7 @@ static struct nf_hook_ops nf_nat_ipv6_ops[] __read_mostly = {
},
/* After packet filtering, change source */
{
- .hook = nf_nat_ipv6_fn,
+ .hook = ip6table_nat_fn,
.owner = THIS_MODULE,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_LOCAL_IN,
diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
index fc8e49b..c5812e1 100644
--- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
@@ -261,6 +261,205 @@ int nf_nat_icmpv6_reply_translation(struct sk_buff *skb,
}
EXPORT_SYMBOL_GPL(nf_nat_icmpv6_reply_translation);
+unsigned int
+nf_nat_ipv6_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ const struct net_device *in, const struct net_device *out,
+ unsigned int (*do_chain)(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ struct nf_conn *ct))
+{
+ struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn_nat *nat;
+ enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum);
+ __be16 frag_off;
+ int hdrlen;
+ u8 nexthdr;
+
+ ct = nf_ct_get(skb, &ctinfo);
+ /* Can't track? It's not due to stress, or conntrack would
+ * have dropped it. Hence it's the user's responsibilty to
+ * packet filter it out, or implement conntrack/NAT for that
+ * protocol. 8) --RR
+ */
+ if (!ct)
+ return NF_ACCEPT;
+
+ /* Don't try to NAT if this packet is not conntracked */
+ if (nf_ct_is_untracked(ct))
+ return NF_ACCEPT;
+
+ nat = nf_ct_nat_ext_add(ct);
+ if (nat == NULL)
+ return NF_ACCEPT;
+
+ switch (ctinfo) {
+ case IP_CT_RELATED:
+ case IP_CT_RELATED_REPLY:
+ nexthdr = ipv6_hdr(skb)->nexthdr;
+ hdrlen = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
+ &nexthdr, &frag_off);
+
+ if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) {
+ if (!nf_nat_icmpv6_reply_translation(skb, ct, ctinfo,
+ ops->hooknum,
+ hdrlen))
+ return NF_DROP;
+ else
+ return NF_ACCEPT;
+ }
+ /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */
+ case IP_CT_NEW:
+ /* Seen it before? This can happen for loopback, retrans,
+ * or local packets.
+ */
+ if (!nf_nat_initialized(ct, maniptype)) {
+ unsigned int ret;
+
+ ret = do_chain(ops, skb, in, out, ct);
+ if (ret != NF_ACCEPT)
+ return ret;
+
+ if (nf_nat_initialized(ct, HOOK2MANIP(ops->hooknum)))
+ break;
+
+ ret = nf_nat_alloc_null_binding(ct, ops->hooknum);
+ if (ret != NF_ACCEPT)
+ return ret;
+ } else {
+ pr_debug("Already setup manip %s for ct %p\n",
+ maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST",
+ ct);
+ if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out))
+ goto oif_changed;
+ }
+ break;
+
+ default:
+ /* ESTABLISHED */
+ NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
+ ctinfo == IP_CT_ESTABLISHED_REPLY);
+ if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out))
+ goto oif_changed;
+ }
+
+ return nf_nat_packet(ct, ctinfo, ops->hooknum, skb);
+
+oif_changed:
+ nf_ct_kill_acct(ct, ctinfo, skb);
+ return NF_DROP;
+}
+EXPORT_SYMBOL_GPL(nf_nat_ipv6_fn);
+
+unsigned int
+nf_nat_ipv6_in(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ const struct net_device *in, const struct net_device *out,
+ unsigned int (*do_chain)(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ struct nf_conn *ct))
+{
+ unsigned int ret;
+ struct in6_addr daddr = ipv6_hdr(skb)->daddr;
+
+ ret = nf_nat_ipv6_fn(ops, skb, in, out, do_chain);
+ if (ret != NF_DROP && ret != NF_STOLEN &&
+ ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr))
+ skb_dst_drop(skb);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(nf_nat_ipv6_in);
+
+unsigned int
+nf_nat_ipv6_out(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ const struct net_device *in, const struct net_device *out,
+ unsigned int (*do_chain)(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ struct nf_conn *ct))
+{
+#ifdef CONFIG_XFRM
+ const struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
+ int err;
+#endif
+ unsigned int ret;
+
+ /* root is playing with raw sockets. */
+ if (skb->len < sizeof(struct ipv6hdr))
+ return NF_ACCEPT;
+
+ ret = nf_nat_ipv6_fn(ops, skb, in, out, do_chain);
+#ifdef CONFIG_XFRM
+ if (ret != NF_DROP && ret != NF_STOLEN &&
+ !(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
+ (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+ if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3,
+ &ct->tuplehash[!dir].tuple.dst.u3) ||
+ (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 &&
+ ct->tuplehash[dir].tuple.src.u.all !=
+ ct->tuplehash[!dir].tuple.dst.u.all)) {
+ err = nf_xfrm_me_harder(skb, AF_INET6);
+ if (err < 0)
+ ret = NF_DROP_ERR(err);
+ }
+ }
+#endif
+ return ret;
+}
+EXPORT_SYMBOL_GPL(nf_nat_ipv6_out);
+
+unsigned int
+nf_nat_ipv6_local_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ const struct net_device *in, const struct net_device *out,
+ unsigned int (*do_chain)(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ struct nf_conn *ct))
+{
+ const struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
+ unsigned int ret;
+ int err;
+
+ /* root is playing with raw sockets. */
+ if (skb->len < sizeof(struct ipv6hdr))
+ return NF_ACCEPT;
+
+ ret = nf_nat_ipv6_fn(ops, skb, in, out, do_chain);
+ if (ret != NF_DROP && ret != NF_STOLEN &&
+ (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+ if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3,
+ &ct->tuplehash[!dir].tuple.src.u3)) {
+ err = ip6_route_me_harder(skb);
+ if (err < 0)
+ ret = NF_DROP_ERR(err);
+ }
+#ifdef CONFIG_XFRM
+ else if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
+ ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 &&
+ ct->tuplehash[dir].tuple.dst.u.all !=
+ ct->tuplehash[!dir].tuple.src.u.all) {
+ err = nf_xfrm_me_harder(skb, AF_INET6);
+ if (err < 0)
+ ret = NF_DROP_ERR(err);
+ }
+#endif
+ }
+ return ret;
+}
+EXPORT_SYMBOL_GPL(nf_nat_ipv6_local_fn);
+
static int __init nf_nat_l3proto_ipv6_init(void)
{
int err;
diff --git a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
new file mode 100644
index 0000000..7745609
--- /dev/null
+++ b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
@@ -0,0 +1,120 @@
+/*
+ * Copyright (c) 2011 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Based on Rusty Russell's IPv6 MASQUERADE target. Development of IPv6
+ * NAT funded by Astaro.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/atomic.h>
+#include <linux/netdevice.h>
+#include <linux/ipv6.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv6.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/addrconf.h>
+#include <net/ipv6.h>
+#include <net/netfilter/ipv6/nf_nat_masquerade.h>
+
+unsigned int
+nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range *range,
+ const struct net_device *out)
+{
+ enum ip_conntrack_info ctinfo;
+ struct in6_addr src;
+ struct nf_conn *ct;
+ struct nf_nat_range newrange;
+
+ ct = nf_ct_get(skb, &ctinfo);
+ NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
+ ctinfo == IP_CT_RELATED_REPLY));
+
+ if (ipv6_dev_get_saddr(dev_net(out), out,
+ &ipv6_hdr(skb)->daddr, 0, &src) < 0)
+ return NF_DROP;
+
+ nfct_nat(ct)->masq_index = out->ifindex;
+
+ newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS;
+ newrange.min_addr.in6 = src;
+ newrange.max_addr.in6 = src;
+ newrange.min_proto = range->min_proto;
+ newrange.max_proto = range->max_proto;
+
+ return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC);
+}
+EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6);
+
+static int device_cmp(struct nf_conn *ct, void *ifindex)
+{
+ const struct nf_conn_nat *nat = nfct_nat(ct);
+
+ if (!nat)
+ return 0;
+ if (nf_ct_l3num(ct) != NFPROTO_IPV6)
+ return 0;
+ return nat->masq_index == (int)(long)ifindex;
+}
+
+static int masq_device_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ const struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct net *net = dev_net(dev);
+
+ if (event == NETDEV_DOWN)
+ nf_ct_iterate_cleanup(net, device_cmp,
+ (void *)(long)dev->ifindex, 0, 0);
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block masq_dev_notifier = {
+ .notifier_call = masq_device_event,
+};
+
+static int masq_inet_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct inet6_ifaddr *ifa = ptr;
+ struct netdev_notifier_info info;
+
+ netdev_notifier_info_init(&info, ifa->idev->dev);
+ return masq_device_event(this, event, &info);
+}
+
+static struct notifier_block masq_inet_notifier = {
+ .notifier_call = masq_inet_event,
+};
+
+static atomic_t masquerade_notifier_refcount = ATOMIC_INIT(0);
+
+void nf_nat_masquerade_ipv6_register_notifier(void)
+{
+ /* check if the notifier is already set */
+ if (atomic_inc_return(&masquerade_notifier_refcount) > 1)
+ return;
+
+ register_netdevice_notifier(&masq_dev_notifier);
+ register_inet6addr_notifier(&masq_inet_notifier);
+}
+EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6_register_notifier);
+
+void nf_nat_masquerade_ipv6_unregister_notifier(void)
+{
+ /* check if the notifier still has clients */
+ if (atomic_dec_return(&masquerade_notifier_refcount) > 0)
+ return;
+
+ unregister_inet6addr_notifier(&masq_inet_notifier);
+ unregister_netdevice_notifier(&masq_dev_notifier);
+}
+EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6_unregister_notifier);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
diff --git a/net/ipv6/netfilter/nft_chain_nat_ipv6.c b/net/ipv6/netfilter/nft_chain_nat_ipv6.c
index d189fcb..1c4b75d 100644
--- a/net/ipv6/netfilter/nft_chain_nat_ipv6.c
+++ b/net/ipv6/netfilter/nft_chain_nat_ipv6.c
@@ -24,144 +24,53 @@
#include <net/netfilter/nf_nat_l3proto.h>
#include <net/ipv6.h>
-/*
- * IPv6 NAT chains
- */
-
-static unsigned int nf_nat_ipv6_fn(const struct nf_hook_ops *ops,
- struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
+static unsigned int nft_nat_do_chain(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ struct nf_conn *ct)
{
- enum ip_conntrack_info ctinfo;
- struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
- struct nf_conn_nat *nat;
- enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum);
- __be16 frag_off;
- int hdrlen;
- u8 nexthdr;
struct nft_pktinfo pkt;
- unsigned int ret;
-
- if (ct == NULL || nf_ct_is_untracked(ct))
- return NF_ACCEPT;
-
- nat = nf_ct_nat_ext_add(ct);
- if (nat == NULL)
- return NF_ACCEPT;
-
- switch (ctinfo) {
- case IP_CT_RELATED:
- case IP_CT_RELATED + IP_CT_IS_REPLY:
- nexthdr = ipv6_hdr(skb)->nexthdr;
- hdrlen = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
- &nexthdr, &frag_off);
-
- if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) {
- if (!nf_nat_icmpv6_reply_translation(skb, ct, ctinfo,
- ops->hooknum,
- hdrlen))
- return NF_DROP;
- else
- return NF_ACCEPT;
- }
- /* Fall through */
- case IP_CT_NEW:
- if (nf_nat_initialized(ct, maniptype))
- break;
-
- nft_set_pktinfo_ipv6(&pkt, ops, skb, in, out);
- ret = nft_do_chain(&pkt, ops);
- if (ret != NF_ACCEPT)
- return ret;
- if (!nf_nat_initialized(ct, maniptype)) {
- ret = nf_nat_alloc_null_binding(ct, ops->hooknum);
- if (ret != NF_ACCEPT)
- return ret;
- }
- default:
- break;
- }
+ nft_set_pktinfo_ipv6(&pkt, ops, skb, in, out);
- return nf_nat_packet(ct, ctinfo, ops->hooknum, skb);
+ return nft_do_chain(&pkt, ops);
}
-static unsigned int nf_nat_ipv6_prerouting(const struct nf_hook_ops *ops,
- struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
+static unsigned int nft_nat_ipv6_fn(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
{
- struct in6_addr daddr = ipv6_hdr(skb)->daddr;
- unsigned int ret;
-
- ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn);
- if (ret != NF_DROP && ret != NF_STOLEN &&
- ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr))
- skb_dst_drop(skb);
-
- return ret;
+ return nf_nat_ipv6_fn(ops, skb, in, out, nft_nat_do_chain);
}
-static unsigned int nf_nat_ipv6_postrouting(const struct nf_hook_ops *ops,
- struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
+static unsigned int nft_nat_ipv6_in(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
{
- enum ip_conntrack_info ctinfo __maybe_unused;
- const struct nf_conn *ct __maybe_unused;
- unsigned int ret;
-
- ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn);
-#ifdef CONFIG_XFRM
- if (ret != NF_DROP && ret != NF_STOLEN &&
- !(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
- (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
- enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
-
- if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3,
- &ct->tuplehash[!dir].tuple.dst.u3) ||
- (ct->tuplehash[dir].tuple.src.u.all !=
- ct->tuplehash[!dir].tuple.dst.u.all))
- if (nf_xfrm_me_harder(skb, AF_INET6) < 0)
- ret = NF_DROP;
- }
-#endif
- return ret;
+ return nf_nat_ipv6_in(ops, skb, in, out, nft_nat_do_chain);
}
-static unsigned int nf_nat_ipv6_output(const struct nf_hook_ops *ops,
- struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
+static unsigned int nft_nat_ipv6_out(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
{
- enum ip_conntrack_info ctinfo;
- const struct nf_conn *ct;
- unsigned int ret;
-
- ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn);
- if (ret != NF_DROP && ret != NF_STOLEN &&
- (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
- enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+ return nf_nat_ipv6_out(ops, skb, in, out, nft_nat_do_chain);
+}
- if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3,
- &ct->tuplehash[!dir].tuple.src.u3)) {
- if (ip6_route_me_harder(skb))
- ret = NF_DROP;
- }
-#ifdef CONFIG_XFRM
- else if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
- ct->tuplehash[dir].tuple.dst.u.all !=
- ct->tuplehash[!dir].tuple.src.u.all)
- if (nf_xfrm_me_harder(skb, AF_INET6))
- ret = NF_DROP;
-#endif
- }
- return ret;
+static unsigned int nft_nat_ipv6_local_fn(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ return nf_nat_ipv6_local_fn(ops, skb, in, out, nft_nat_do_chain);
}
static const struct nf_chain_type nft_chain_nat_ipv6 = {
@@ -174,10 +83,10 @@ static const struct nf_chain_type nft_chain_nat_ipv6 = {
(1 << NF_INET_LOCAL_OUT) |
(1 << NF_INET_LOCAL_IN),
.hooks = {
- [NF_INET_PRE_ROUTING] = nf_nat_ipv6_prerouting,
- [NF_INET_POST_ROUTING] = nf_nat_ipv6_postrouting,
- [NF_INET_LOCAL_OUT] = nf_nat_ipv6_output,
- [NF_INET_LOCAL_IN] = nf_nat_ipv6_fn,
+ [NF_INET_PRE_ROUTING] = nft_nat_ipv6_in,
+ [NF_INET_POST_ROUTING] = nft_nat_ipv6_out,
+ [NF_INET_LOCAL_OUT] = nft_nat_ipv6_local_fn,
+ [NF_INET_LOCAL_IN] = nft_nat_ipv6_fn,
},
};
diff --git a/net/ipv6/netfilter/nft_masq_ipv6.c b/net/ipv6/netfilter/nft_masq_ipv6.c
new file mode 100644
index 0000000..4e51334
--- /dev/null
+++ b/net/ipv6/netfilter/nft_masq_ipv6.c
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nft_masq.h>
+#include <net/netfilter/ipv6/nf_nat_masquerade.h>
+
+static void nft_masq_ipv6_eval(const struct nft_expr *expr,
+ struct nft_data data[NFT_REG_MAX + 1],
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_masq *priv = nft_expr_priv(expr);
+ struct nf_nat_range range;
+ unsigned int verdict;
+
+ range.flags = priv->flags;
+
+ verdict = nf_nat_masquerade_ipv6(pkt->skb, &range, pkt->out);
+
+ data[NFT_REG_VERDICT].verdict = verdict;
+}
+
+static int nft_masq_ipv6_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ int err;
+
+ err = nft_masq_init(ctx, expr, tb);
+ if (err < 0)
+ return err;
+
+ nf_nat_masquerade_ipv6_register_notifier();
+ return 0;
+}
+
+static void nft_masq_ipv6_destroy(const struct nft_ctx *ctx,
+ const struct nft_expr *expr)
+{
+ nf_nat_masquerade_ipv6_unregister_notifier();
+}
+
+static struct nft_expr_type nft_masq_ipv6_type;
+static const struct nft_expr_ops nft_masq_ipv6_ops = {
+ .type = &nft_masq_ipv6_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_masq)),
+ .eval = nft_masq_ipv6_eval,
+ .init = nft_masq_ipv6_init,
+ .destroy = nft_masq_ipv6_destroy,
+ .dump = nft_masq_dump,
+};
+
+static struct nft_expr_type nft_masq_ipv6_type __read_mostly = {
+ .family = NFPROTO_IPV6,
+ .name = "masq",
+ .ops = &nft_masq_ipv6_ops,
+ .policy = nft_masq_policy,
+ .maxattr = NFTA_MASQ_MAX,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_masq_ipv6_module_init(void)
+{
+ return nft_register_expr(&nft_masq_ipv6_type);
+}
+
+static void __exit nft_masq_ipv6_module_exit(void)
+{
+ nft_unregister_expr(&nft_masq_ipv6_type);
+}
+
+module_init(nft_masq_ipv6_module_init);
+module_exit(nft_masq_ipv6_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>");
+MODULE_ALIAS_NFT_AF_EXPR(AF_INET6, "masq");
diff --git a/net/ipv6/protocol.c b/net/ipv6/protocol.c
index e048cf1..e3770ab 100644
--- a/net/ipv6/protocol.c
+++ b/net/ipv6/protocol.c
@@ -51,6 +51,7 @@ EXPORT_SYMBOL(inet6_del_protocol);
#endif
const struct net_offload __rcu *inet6_offloads[MAX_INET_PROTOS] __read_mostly;
+EXPORT_SYMBOL(inet6_offloads);
int inet6_add_offload(const struct net_offload *prot, unsigned char protocol)
{
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 86e3fa8..db75809 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -822,6 +822,8 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
int addr_type;
u8 ttl;
int err;
+ u8 protocol = IPPROTO_IPV6;
+ int t_hlen = tunnel->hlen + sizeof(struct iphdr);
if (skb->protocol != htons(ETH_P_IPV6))
goto tx_error;
@@ -911,8 +913,14 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
goto tx_error;
}
+ skb = iptunnel_handle_offloads(skb, false, SKB_GSO_SIT);
+ if (IS_ERR(skb)) {
+ ip_rt_put(rt);
+ goto out;
+ }
+
if (df) {
- mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
+ mtu = dst_mtu(&rt->dst) - t_hlen;
if (mtu < 68) {
dev->stats.collisions++;
@@ -947,7 +955,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
/*
* Okay, now see if we can stuff it in the buffer as-is.
*/
- max_headroom = LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr);
+ max_headroom = LL_RESERVED_SPACE(tdev) + t_hlen;
if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
(skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
@@ -969,14 +977,13 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
ttl = iph6->hop_limit;
tos = INET_ECN_encapsulate(tos, ipv6_get_dsfield(iph6));
- skb = iptunnel_handle_offloads(skb, false, SKB_GSO_SIT);
- if (IS_ERR(skb)) {
+ if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0) {
ip_rt_put(rt);
- goto out;
+ goto tx_error;
}
err = iptunnel_xmit(skb->sk, rt, skb, fl4.saddr, fl4.daddr,
- IPPROTO_IPV6, tos, ttl, df,
+ protocol, tos, ttl, df,
!net_eq(tunnel->net, dev_net(dev)));
iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
return NETDEV_TX_OK;
@@ -1059,8 +1066,10 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev)
tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
if (tdev) {
+ int t_hlen = tunnel->hlen + sizeof(struct iphdr);
+
dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
- dev->mtu = tdev->mtu - sizeof(struct iphdr);
+ dev->mtu = tdev->mtu - t_hlen;
if (dev->mtu < IPV6_MIN_MTU)
dev->mtu = IPV6_MIN_MTU;
}
@@ -1307,7 +1316,10 @@ done:
static int ipip6_tunnel_change_mtu(struct net_device *dev, int new_mtu)
{
- if (new_mtu < IPV6_MIN_MTU || new_mtu > 0xFFF8 - sizeof(struct iphdr))
+ struct ip_tunnel *tunnel = netdev_priv(dev);
+ int t_hlen = tunnel->hlen + sizeof(struct iphdr);
+
+ if (new_mtu < IPV6_MIN_MTU || new_mtu > 0xFFF8 - t_hlen)
return -EINVAL;
dev->mtu = new_mtu;
return 0;
@@ -1338,12 +1350,15 @@ static void ipip6_dev_free(struct net_device *dev)
static void ipip6_tunnel_setup(struct net_device *dev)
{
+ struct ip_tunnel *tunnel = netdev_priv(dev);
+ int t_hlen = tunnel->hlen + sizeof(struct iphdr);
+
dev->netdev_ops = &ipip6_netdev_ops;
dev->destructor = ipip6_dev_free;
dev->type = ARPHRD_SIT;
- dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr);
- dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr);
+ dev->hard_header_len = LL_MAX_HEADER + t_hlen;
+ dev->mtu = ETH_DATA_LEN - t_hlen;
dev->flags = IFF_NOARP;
dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
dev->iflink = 0;
@@ -1466,6 +1481,40 @@ static void ipip6_netlink_parms(struct nlattr *data[],
}
+/* This function returns true when ENCAP attributes are present in the nl msg */
+static bool ipip6_netlink_encap_parms(struct nlattr *data[],
+ struct ip_tunnel_encap *ipencap)
+{
+ bool ret = false;
+
+ memset(ipencap, 0, sizeof(*ipencap));
+
+ if (!data)
+ return ret;
+
+ if (data[IFLA_IPTUN_ENCAP_TYPE]) {
+ ret = true;
+ ipencap->type = nla_get_u16(data[IFLA_IPTUN_ENCAP_TYPE]);
+ }
+
+ if (data[IFLA_IPTUN_ENCAP_FLAGS]) {
+ ret = true;
+ ipencap->flags = nla_get_u16(data[IFLA_IPTUN_ENCAP_FLAGS]);
+ }
+
+ if (data[IFLA_IPTUN_ENCAP_SPORT]) {
+ ret = true;
+ ipencap->sport = nla_get_u16(data[IFLA_IPTUN_ENCAP_SPORT]);
+ }
+
+ if (data[IFLA_IPTUN_ENCAP_DPORT]) {
+ ret = true;
+ ipencap->dport = nla_get_u16(data[IFLA_IPTUN_ENCAP_DPORT]);
+ }
+
+ return ret;
+}
+
#ifdef CONFIG_IPV6_SIT_6RD
/* This function returns true when 6RD attributes are present in the nl msg */
static bool ipip6_netlink_6rd_parms(struct nlattr *data[],
@@ -1509,12 +1558,20 @@ static int ipip6_newlink(struct net *src_net, struct net_device *dev,
{
struct net *net = dev_net(dev);
struct ip_tunnel *nt;
+ struct ip_tunnel_encap ipencap;
#ifdef CONFIG_IPV6_SIT_6RD
struct ip_tunnel_6rd ip6rd;
#endif
int err;
nt = netdev_priv(dev);
+
+ if (ipip6_netlink_encap_parms(data, &ipencap)) {
+ err = ip_tunnel_encap_setup(nt, &ipencap);
+ if (err < 0)
+ return err;
+ }
+
ipip6_netlink_parms(data, &nt->parms);
if (ipip6_tunnel_locate(net, &nt->parms, 0))
@@ -1537,15 +1594,23 @@ static int ipip6_changelink(struct net_device *dev, struct nlattr *tb[],
{
struct ip_tunnel *t = netdev_priv(dev);
struct ip_tunnel_parm p;
+ struct ip_tunnel_encap ipencap;
struct net *net = t->net;
struct sit_net *sitn = net_generic(net, sit_net_id);
#ifdef CONFIG_IPV6_SIT_6RD
struct ip_tunnel_6rd ip6rd;
#endif
+ int err;
if (dev == sitn->fb_tunnel_dev)
return -EINVAL;
+ if (ipip6_netlink_encap_parms(data, &ipencap)) {
+ err = ip_tunnel_encap_setup(t, &ipencap);
+ if (err < 0)
+ return err;
+ }
+
ipip6_netlink_parms(data, &p);
if (((dev->flags & IFF_POINTOPOINT) && !p.iph.daddr) ||
@@ -1599,6 +1664,14 @@ static size_t ipip6_get_size(const struct net_device *dev)
/* IFLA_IPTUN_6RD_RELAY_PREFIXLEN */
nla_total_size(2) +
#endif
+ /* IFLA_IPTUN_ENCAP_TYPE */
+ nla_total_size(2) +
+ /* IFLA_IPTUN_ENCAP_FLAGS */
+ nla_total_size(2) +
+ /* IFLA_IPTUN_ENCAP_SPORT */
+ nla_total_size(2) +
+ /* IFLA_IPTUN_ENCAP_DPORT */
+ nla_total_size(2) +
0;
}
@@ -1630,6 +1703,16 @@ static int ipip6_fill_info(struct sk_buff *skb, const struct net_device *dev)
goto nla_put_failure;
#endif
+ if (nla_put_u16(skb, IFLA_IPTUN_ENCAP_TYPE,
+ tunnel->encap.type) ||
+ nla_put_u16(skb, IFLA_IPTUN_ENCAP_SPORT,
+ tunnel->encap.sport) ||
+ nla_put_u16(skb, IFLA_IPTUN_ENCAP_DPORT,
+ tunnel->encap.dport) ||
+ nla_put_u16(skb, IFLA_IPTUN_ENCAP_FLAGS,
+ tunnel->encap.dport))
+ goto nla_put_failure;
+
return 0;
nla_put_failure:
@@ -1651,6 +1734,10 @@ static const struct nla_policy ipip6_policy[IFLA_IPTUN_MAX + 1] = {
[IFLA_IPTUN_6RD_PREFIXLEN] = { .type = NLA_U16 },
[IFLA_IPTUN_6RD_RELAY_PREFIXLEN] = { .type = NLA_U16 },
#endif
+ [IFLA_IPTUN_ENCAP_TYPE] = { .type = NLA_U16 },
+ [IFLA_IPTUN_ENCAP_FLAGS] = { .type = NLA_U16 },
+ [IFLA_IPTUN_ENCAP_SPORT] = { .type = NLA_U16 },
+ [IFLA_IPTUN_ENCAP_DPORT] = { .type = NLA_U16 },
};
static void ipip6_dellink(struct net_device *dev, struct list_head *head)
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 83cea1d..9a2838e 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -24,7 +24,7 @@
#define COOKIEBITS 24 /* Upper bits store count */
#define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1)
-static u32 syncookie6_secret[2][16-4+SHA_DIGEST_WORDS];
+static u32 syncookie6_secret[2][16-4+SHA_DIGEST_WORDS] __read_mostly;
/* RFC 2460, Section 8.3:
* [ipv6 tcp] MSS must be computed as the maximum packet size minus 60 [..]
@@ -203,7 +203,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
ireq->ir_num = ntohs(th->dest);
ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
- if (ipv6_opt_accepted(sk, skb) ||
+ if (ipv6_opt_accepted(sk, skb, &TCP_SKB_CB(skb)->header.h6) ||
np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
atomic_inc(&skb->users);
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index 0c56c93..c5c10fa 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -16,6 +16,8 @@
#include <net/addrconf.h>
#include <net/inet_frag.h>
+static int one = 1;
+
static struct ctl_table ipv6_table_template[] = {
{
.procname = "bindv6only",
@@ -63,6 +65,14 @@ static struct ctl_table ipv6_rotable[] = {
.mode = 0644,
.proc_handler = proc_dointvec
},
+ {
+ .procname = "mld_qrv",
+ .data = &sysctl_mld_qrv,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &one
+ },
{ }
};
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 29964c3..132bac1 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -93,13 +93,16 @@ static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk,
static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
- const struct rt6_info *rt = (const struct rt6_info *)dst;
- dst_hold(dst);
- sk->sk_rx_dst = dst;
- inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
- if (rt->rt6i_node)
- inet6_sk(sk)->rx_dst_cookie = rt->rt6i_node->fn_sernum;
+ if (dst) {
+ const struct rt6_info *rt = (const struct rt6_info *)dst;
+
+ dst_hold(dst);
+ sk->sk_rx_dst = dst;
+ inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
+ if (rt->rt6i_node)
+ inet6_sk(sk)->rx_dst_cookie = rt->rt6i_node->fn_sernum;
+ }
}
static void tcp_v6_hash(struct sock *sk)
@@ -738,8 +741,9 @@ static void tcp_v6_init_req(struct request_sock *req, struct sock *sk,
ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
ireq->ir_iif = inet6_iif(skb);
- if (!TCP_SKB_CB(skb)->when &&
- (ipv6_opt_accepted(sk, skb) || np->rxopt.bits.rxinfo ||
+ if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
+ (ipv6_opt_accepted(sk, skb, &TCP_SKB_CB(skb)->header.h6) ||
+ np->rxopt.bits.rxinfo ||
np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
np->rxopt.bits.rxohlim || np->repflow)) {
atomic_inc(&skb->users);
@@ -1364,7 +1368,7 @@ ipv6_pktoptions:
np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
if (np->repflow)
np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
- if (ipv6_opt_accepted(sk, opt_skb)) {
+ if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
skb_set_owner_r(opt_skb, sk);
opt_skb = xchg(&np->pktoptions, opt_skb);
} else {
@@ -1408,11 +1412,19 @@ static int tcp_v6_rcv(struct sk_buff *skb)
th = tcp_hdr(skb);
hdr = ipv6_hdr(skb);
+ /* This is tricky : We move IPCB at its correct location into TCP_SKB_CB()
+ * barrier() makes sure compiler wont play fool^Waliasing games.
+ */
+ memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
+ sizeof(struct inet6_skb_parm));
+ barrier();
+
TCP_SKB_CB(skb)->seq = ntohl(th->seq);
TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
skb->len - th->doff*4);
TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
- TCP_SKB_CB(skb)->when = 0;
+ TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
+ TCP_SKB_CB(skb)->tcp_tw_isn = 0;
TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
TCP_SKB_CB(skb)->sacked = 0;
diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c
index dbb3d92..c1ab771 100644
--- a/net/ipv6/tcpv6_offload.c
+++ b/net/ipv6/tcpv6_offload.c
@@ -15,23 +15,6 @@
#include <net/ip6_checksum.h>
#include "ip6_offload.h"
-static int tcp_v6_gso_send_check(struct sk_buff *skb)
-{
- const struct ipv6hdr *ipv6h;
- struct tcphdr *th;
-
- if (!pskb_may_pull(skb, sizeof(*th)))
- return -EINVAL;
-
- ipv6h = ipv6_hdr(skb);
- th = tcp_hdr(skb);
-
- th->check = 0;
- skb->ip_summed = CHECKSUM_PARTIAL;
- __tcp_v6_send_check(skb, &ipv6h->saddr, &ipv6h->daddr);
- return 0;
-}
-
static struct sk_buff **tcp6_gro_receive(struct sk_buff **head,
struct sk_buff *skb)
{
@@ -58,10 +41,32 @@ static int tcp6_gro_complete(struct sk_buff *skb, int thoff)
return tcp_gro_complete(skb);
}
+struct sk_buff *tcp6_gso_segment(struct sk_buff *skb,
+ netdev_features_t features)
+{
+ struct tcphdr *th;
+
+ if (!pskb_may_pull(skb, sizeof(*th)))
+ return ERR_PTR(-EINVAL);
+
+ if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
+ const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+ struct tcphdr *th = tcp_hdr(skb);
+
+ /* Set up pseudo header, usually expect stack to have done
+ * this.
+ */
+
+ th->check = 0;
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ __tcp_v6_send_check(skb, &ipv6h->saddr, &ipv6h->daddr);
+ }
+
+ return tcp_gso_segment(skb, features);
+}
static const struct net_offload tcpv6_offload = {
.callbacks = {
- .gso_send_check = tcp_v6_gso_send_check,
- .gso_segment = tcp_gso_segment,
+ .gso_segment = tcp6_gso_segment,
.gro_receive = tcp6_gro_receive,
.gro_complete = tcp6_gro_complete,
},
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 12fcce8f..f6ba535 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -891,6 +891,10 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
goto csum_error;
}
+ if (udp_sk(sk)->convert_csum && uh->check && !IS_UDPLITE(sk))
+ skb_checksum_try_convert(skb, IPPROTO_UDP, uh->check,
+ ip6_compute_pseudo);
+
ret = udpv6_queue_rcv_skb(sk, skb);
sock_put(sk);
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index b13e377..212ebfc 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -17,28 +17,6 @@
#include <net/ip6_checksum.h>
#include "ip6_offload.h"
-static int udp6_ufo_send_check(struct sk_buff *skb)
-{
- const struct ipv6hdr *ipv6h;
- struct udphdr *uh;
-
- if (!pskb_may_pull(skb, sizeof(*uh)))
- return -EINVAL;
-
- if (likely(!skb->encapsulation)) {
- ipv6h = ipv6_hdr(skb);
- uh = udp_hdr(skb);
-
- uh->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, skb->len,
- IPPROTO_UDP, 0);
- skb->csum_start = skb_transport_header(skb) - skb->head;
- skb->csum_offset = offsetof(struct udphdr, check);
- skb->ip_summed = CHECKSUM_PARTIAL;
- }
-
- return 0;
-}
-
static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
netdev_features_t features)
{
@@ -49,7 +27,6 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
u8 *packet_start, *prevhdr;
u8 nexthdr;
u8 frag_hdr_sz = sizeof(struct frag_hdr);
- int offset;
__wsum csum;
int tnl_hlen;
@@ -83,13 +60,27 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
(SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))
segs = skb_udp_tunnel_segment(skb, features);
else {
+ const struct ipv6hdr *ipv6h;
+ struct udphdr *uh;
+
+ if (!pskb_may_pull(skb, sizeof(struct udphdr)))
+ goto out;
+
/* Do software UFO. Complete and fill in the UDP checksum as HW cannot
* do checksum of UDP packets sent as multiple IP fragments.
*/
- offset = skb_checksum_start_offset(skb);
- csum = skb_checksum(skb, offset, skb->len - offset, 0);
- offset += skb->csum_offset;
- *(__sum16 *)(skb->data + offset) = csum_fold(csum);
+
+ uh = udp_hdr(skb);
+ ipv6h = ipv6_hdr(skb);
+
+ uh->check = 0;
+ csum = skb_checksum(skb, 0, skb->len, 0);
+ uh->check = udp_v6_check(skb->len, &ipv6h->saddr,
+ &ipv6h->daddr, csum);
+
+ if (uh->check == 0)
+ uh->check = CSUM_MANGLED_0;
+
skb->ip_summed = CHECKSUM_NONE;
/* Check if there is enough headroom to insert fragment header. */
@@ -134,19 +125,29 @@ static struct sk_buff **udp6_gro_receive(struct sk_buff **head,
{
struct udphdr *uh = udp_gro_udphdr(skb);
+ if (unlikely(!uh))
+ goto flush;
+
/* Don't bother verifying checksum if we're going to flush anyway. */
- if (unlikely(!uh) ||
- (!NAPI_GRO_CB(skb)->flush &&
- skb_gro_checksum_validate_zero_check(skb, IPPROTO_UDP, uh->check,
- ip6_gro_compute_pseudo))) {
- NAPI_GRO_CB(skb)->flush = 1;
- return NULL;
- }
+ if (NAPI_GRO_CB(skb)->flush)
+ goto skip;
+ if (skb_gro_checksum_validate_zero_check(skb, IPPROTO_UDP, uh->check,
+ ip6_gro_compute_pseudo))
+ goto flush;
+ else if (uh->check)
+ skb_gro_checksum_try_convert(skb, IPPROTO_UDP, uh->check,
+ ip6_gro_compute_pseudo);
+
+skip:
return udp_gro_receive(head, skb, uh);
+
+flush:
+ NAPI_GRO_CB(skb)->flush = 1;
+ return NULL;
}
-int udp6_gro_complete(struct sk_buff *skb, int nhoff)
+static int udp6_gro_complete(struct sk_buff *skb, int nhoff)
{
const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);
@@ -160,7 +161,6 @@ int udp6_gro_complete(struct sk_buff *skb, int nhoff)
static const struct net_offload udpv6_offload = {
.callbacks = {
- .gso_send_check = udp6_ufo_send_check,
.gso_segment = udp6_ufo_fragment,
.gro_receive = udp6_gro_receive,
.gro_complete = udp6_gro_complete,
OpenPOWER on IntegriCloud