11 files changed, 286 insertions, 78 deletions
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 05b708b..4715f25 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -246,7 +246,7 @@ static u32 cipso_v4_map_cache_hash(const unsigned char *key, u32 key_len)
  * success, negative values on error.
  *
  */
-static int cipso_v4_cache_init(void)
+static int __init cipso_v4_cache_init(void)
 {
 	u32 iter;
 
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index dced89f..efa70ad 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -7,6 +7,7 @@
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <net/genetlink.h>
+#include <net/gue.h>
 #include <net/ip.h>
 #include <net/protocol.h>
 #include <net/udp.h>
@@ -27,6 +28,7 @@ struct fou {
 };
 
 struct fou_cfg {
+	u16 type;
 	u8 protocol;
 	struct udp_port_cfg udp_config;
 };
@@ -64,15 +66,51 @@ static int fou_udp_recv(struct sock *sk, struct sk_buff *skb)
 					  sizeof(struct udphdr));
 }
 
+static int gue_udp_recv(struct sock *sk, struct sk_buff *skb)
+{
+	struct fou *fou = fou_from_sock(sk);
+	size_t len;
+	struct guehdr *guehdr;
+	struct udphdr *uh;
+
+	if (!fou)
+		return 1;
+
+	len = sizeof(struct udphdr) + sizeof(struct guehdr);
+	if (!pskb_may_pull(skb, len))
+		goto drop;
+
+	uh = udp_hdr(skb);
+	guehdr = (struct guehdr *)&uh[1];
+
+	len += guehdr->hlen << 2;
+	if (!pskb_may_pull(skb, len))
+		goto drop;
+
+	if (guehdr->version != 0)
+		goto drop;
+
+	if (guehdr->flags) {
+		/* No support yet */
+		goto drop;
+	}
+
+	return fou_udp_encap_recv_deliver(skb, guehdr->next_hdr, len);
+drop:
+	kfree_skb(skb);
+	return 0;
+}
+
 static struct sk_buff **fou_gro_receive(struct sk_buff **head,
-					struct sk_buff *skb,
-					const struct net_offload **offloads)
+					struct sk_buff *skb)
 {
 	const struct net_offload *ops;
 	struct sk_buff **pp = NULL;
 	u8 proto = NAPI_GRO_CB(skb)->proto;
+	const struct net_offload **offloads;
 
 	rcu_read_lock();
+	offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
 	ops = rcu_dereference(offloads[proto]);
 	if (!ops || !ops->callbacks.gro_receive)
 		goto out_unlock;
@@ -85,14 +123,15 @@ out_unlock:
 	return pp;
 }
 
-static int fou_gro_complete(struct sk_buff *skb, int nhoff,
-			    const struct net_offload **offloads)
+static int fou_gro_complete(struct sk_buff *skb, int nhoff)
 {
 	const struct net_offload *ops;
 	u8 proto = NAPI_GRO_CB(skb)->proto;
 	int err = -ENOSYS;
+	const struct net_offload **offloads;
 
 	rcu_read_lock();
+	offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
 	ops = rcu_dereference(offloads[proto]);
 	if (WARN_ON(!ops || !ops->callbacks.gro_complete))
 		goto out_unlock;
@@ -105,26 +144,110 @@ out_unlock:
 	return err;
 }
 
-static struct sk_buff **fou4_gro_receive(struct sk_buff **head,
-					 struct sk_buff *skb)
+static struct sk_buff **gue_gro_receive(struct sk_buff **head,
+					struct sk_buff *skb)
 {
-	return fou_gro_receive(head, skb, inet_offloads);
-}
+	const struct net_offload **offloads;
+	const struct net_offload *ops;
+	struct sk_buff **pp = NULL;
+	struct sk_buff *p;
+	u8 proto;
+	struct guehdr *guehdr;
+	unsigned int hlen, guehlen;
+	unsigned int off;
+	int flush = 1;
+
+	off = skb_gro_offset(skb);
+	hlen = off + sizeof(*guehdr);
+	guehdr = skb_gro_header_fast(skb, off);
+	if (skb_gro_header_hard(skb, hlen)) {
+		guehdr = skb_gro_header_slow(skb, hlen, off);
+		if (unlikely(!guehdr))
+			goto out;
+	}
 
-static int fou4_gro_complete(struct sk_buff *skb, int nhoff)
-{
-	return fou_gro_complete(skb, nhoff, inet_offloads);
-}
+	proto = guehdr->next_hdr;
 
-static struct sk_buff **fou6_gro_receive(struct sk_buff **head,
-					 struct sk_buff *skb)
-{
-	return fou_gro_receive(head, skb, inet6_offloads);
+	rcu_read_lock();
+	offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
+	ops = rcu_dereference(offloads[proto]);
+	if (WARN_ON(!ops || !ops->callbacks.gro_receive))
+		goto out_unlock;
+
+	guehlen = sizeof(*guehdr) + (guehdr->hlen << 2);
+
+	hlen = off + guehlen;
+	if (skb_gro_header_hard(skb, hlen)) {
+		guehdr = skb_gro_header_slow(skb, hlen, off);
+		if (unlikely(!guehdr))
+			goto out_unlock;
+	}
+
+	flush = 0;
+
+	for (p = *head; p; p = p->next) {
+		const struct guehdr *guehdr2;
+
+		if (!NAPI_GRO_CB(p)->same_flow)
+			continue;
+
+		guehdr2 = (struct guehdr *)(p->data + off);
+
+		/* Compare base GUE header to be equal (covers
+		 * hlen, version, next_hdr, and flags.
+		 */
+		if (guehdr->word != guehdr2->word) {
+			NAPI_GRO_CB(p)->same_flow = 0;
+			continue;
+		}
+
+		/* Compare optional fields are the same. */
+		if (guehdr->hlen && memcmp(&guehdr[1], &guehdr2[1],
+					   guehdr->hlen << 2)) {
+			NAPI_GRO_CB(p)->same_flow = 0;
+			continue;
+		}
+	}
+
+	skb_gro_pull(skb, guehlen);
+
+	/* Adjusted NAPI_GRO_CB(skb)->csum after skb_gro_pull()*/
+	skb_gro_postpull_rcsum(skb, guehdr, guehlen);
+
+	pp = ops->callbacks.gro_receive(head, skb);
+
+out_unlock:
+	rcu_read_unlock();
+out:
+	NAPI_GRO_CB(skb)->flush |= flush;
+
+	return pp;
 }
 
-static int fou6_gro_complete(struct sk_buff *skb, int nhoff)
+static int gue_gro_complete(struct sk_buff *skb, int nhoff)
 {
-	return fou_gro_complete(skb, nhoff, inet6_offloads);
+	const struct net_offload **offloads;
+	struct guehdr *guehdr = (struct guehdr *)(skb->data + nhoff);
+	const struct net_offload *ops;
+	unsigned int guehlen;
+	u8 proto;
+	int err = -ENOENT;
+
+	proto = guehdr->next_hdr;
+
+	guehlen = sizeof(*guehdr) + (guehdr->hlen << 2);
+
+	rcu_read_lock();
+	offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
+	ops = rcu_dereference(offloads[proto]);
+	if (WARN_ON(!ops || !ops->callbacks.gro_complete))
+		goto out_unlock;
+
+	err = ops->callbacks.gro_complete(skb, nhoff + guehlen);
+
+out_unlock:
+	rcu_read_unlock();
+	return err;
 }
 
 static int fou_add_to_port_list(struct fou *fou)
@@ -162,6 +285,28 @@ static void fou_release(struct fou *fou)
 	kfree(fou);
 }
 
+static int fou_encap_init(struct sock *sk, struct fou *fou, struct fou_cfg *cfg)
+{
+	udp_sk(sk)->encap_rcv = fou_udp_recv;
+	fou->protocol = cfg->protocol;
+	fou->udp_offloads.callbacks.gro_receive = fou_gro_receive;
+	fou->udp_offloads.callbacks.gro_complete = fou_gro_complete;
+	fou->udp_offloads.port = cfg->udp_config.local_udp_port;
+	fou->udp_offloads.ipproto = cfg->protocol;
+
+	return 0;
+}
+
+static int gue_encap_init(struct sock *sk, struct fou *fou, struct fou_cfg *cfg)
+{
+	udp_sk(sk)->encap_rcv = gue_udp_recv;
+	fou->udp_offloads.callbacks.gro_receive = gue_gro_receive;
+	fou->udp_offloads.callbacks.gro_complete = gue_gro_complete;
+	fou->udp_offloads.port = cfg->udp_config.local_udp_port;
+
+	return 0;
+}
+
 static int fou_create(struct net *net, struct fou_cfg *cfg,
 		      struct socket **sockp)
 {
@@ -184,10 +329,24 @@ static int fou_create(struct net *net, struct fou_cfg *cfg,
 
 	sk = sock->sk;
 
-	/* Mark socket as an encapsulation socket. See net/ipv4/udp.c */
-	fou->protocol = cfg->protocol;
-	fou->port =  cfg->udp_config.local_udp_port;
-	udp_sk(sk)->encap_rcv = fou_udp_recv;
+	fou->port = cfg->udp_config.local_udp_port;
+
+	/* Initial for fou type */
+	switch (cfg->type) {
+	case FOU_ENCAP_DIRECT:
+		err = fou_encap_init(sk, fou, cfg);
+		if (err)
+			goto error;
+		break;
+	case FOU_ENCAP_GUE:
+		err = gue_encap_init(sk, fou, cfg);
+		if (err)
+			goto error;
+		break;
+	default:
+		err = -EINVAL;
+		goto error;
+	}
 
 	udp_sk(sk)->encap_type = 1;
 	udp_encap_enable();
@@ -199,23 +358,6 @@ static int fou_create(struct net *net, struct fou_cfg *cfg,
 
 	sk->sk_allocation = GFP_ATOMIC;
 
-	switch (cfg->udp_config.family) {
-	case AF_INET:
-		fou->udp_offloads.callbacks.gro_receive = fou4_gro_receive;
-		fou->udp_offloads.callbacks.gro_complete = fou4_gro_complete;
-		break;
-	case AF_INET6:
-		fou->udp_offloads.callbacks.gro_receive = fou6_gro_receive;
-		fou->udp_offloads.callbacks.gro_complete = fou6_gro_complete;
-		break;
-	default:
-		err = -EPFNOSUPPORT;
-		goto error;
-	}
-
-	fou->udp_offloads.port = cfg->udp_config.local_udp_port;
-	fou->udp_offloads.ipproto = cfg->protocol;
-
 	if (cfg->udp_config.family == AF_INET) {
 		err = udp_add_offload(&fou->udp_offloads);
 		if (err)
@@ -272,6 +414,7 @@ static struct nla_policy fou_nl_policy[FOU_ATTR_MAX + 1] = {
 	[FOU_ATTR_PORT] = { .type = NLA_U16, },
 	[FOU_ATTR_AF] = { .type = NLA_U8, },
 	[FOU_ATTR_IPPROTO] = { .type = NLA_U8, },
+	[FOU_ATTR_TYPE] = { .type = NLA_U8, },
 };
 
 static int parse_nl_config(struct genl_info *info,
@@ -299,6 +442,9 @@ static int parse_nl_config(struct genl_info *info,
 	if (info->attrs[FOU_ATTR_IPPROTO])
 		cfg->protocol = nla_get_u8(info->attrs[FOU_ATTR_IPPROTO]);
 
+	if (info->attrs[FOU_ATTR_TYPE])
+		cfg->type = nla_get_u8(info->attrs[FOU_ATTR_TYPE]);
+
 	return 0;
 }
 
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 15f0e2b..2811cc1 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -790,7 +790,7 @@ static void __net_exit ip4_frags_ns_ctl_unregister(struct net *net)
 	kfree(table);
 }
 
-static void ip4_frags_ctl_register(void)
+static void __init ip4_frags_ctl_register(void)
 {
 	register_net_sysctl(&init_net, "net/ipv4", ip4_frags_ctl_table);
 }
@@ -804,7 +804,7 @@ static inline void ip4_frags_ns_ctl_unregister(struct net *net)
 {
 }
 
-static inline void ip4_frags_ctl_register(void)
+static inline void __init ip4_frags_ctl_register(void)
 {
 }
 #endif
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 829aff8b..0485ef1 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -241,6 +241,8 @@ static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
 	/* Push GRE header. */
 	gre_build_header(skb, &tpi, tunnel->tun_hlen);
 
+	skb_set_inner_protocol(skb, tpi.proto);
+
 	ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
 }
 
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index b75b47b..0bb8e14 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -56,6 +56,7 @@
 #include <net/netns/generic.h>
 #include <net/rtnetlink.h>
 #include <net/udp.h>
+#include <net/gue.h>
 
 #if IS_ENABLED(CONFIG_IPV6)
 #include <net/ipv6.h>
@@ -495,6 +496,8 @@ static int ip_encap_hlen(struct ip_tunnel_encap *e)
 		return 0;
 	case TUNNEL_ENCAP_FOU:
 		return sizeof(struct udphdr);
+	case TUNNEL_ENCAP_GUE:
+		return sizeof(struct udphdr) + sizeof(struct guehdr);
 	default:
 		return -EINVAL;
 	}
@@ -546,6 +549,15 @@ static int fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
 	skb_reset_transport_header(skb);
 	uh = udp_hdr(skb);
 
+	if (e->type == TUNNEL_ENCAP_GUE) {
+		struct guehdr *guehdr = (struct guehdr *)&uh[1];
+
+		guehdr->version = 0;
+		guehdr->hlen = 0;
+		guehdr->flags = 0;
+		guehdr->next_hdr = *protocol;
+	}
+
 	uh->dest = e->dport;
 	uh->source = sport;
 	uh->len = htons(skb->len);
@@ -565,6 +577,7 @@ int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t,
 	case TUNNEL_ENCAP_NONE:
 		return 0;
 	case TUNNEL_ENCAP_FOU:
+	case TUNNEL_ENCAP_GUE:
 		return fou_build_header(skb, &t->encap, t->encap_hlen,
 					protocol, fl4);
 	default:
@@ -759,7 +772,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
 		df |= (inner_iph->frag_off&htons(IP_DF));
 
 	max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
-			+ rt->dst.header_len;
+			+ rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
 	if (max_headroom > dev->needed_headroom)
 		dev->needed_headroom = max_headroom;
 
@@ -853,9 +866,14 @@ int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
 
 		t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
 
-		if (!t && (cmd == SIOCADDTUNNEL)) {
-			t = ip_tunnel_create(net, itn, p);
-			err = PTR_ERR_OR_ZERO(t);
+		if (cmd == SIOCADDTUNNEL) {
+			if (!t) {
+				t = ip_tunnel_create(net, itn, p);
+				err = PTR_ERR_OR_ZERO(t);
+				break;
+			}
+
+			err = -EEXIST;
 			break;
 		}
 		if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index bfec31d..ea88ab3 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -224,6 +224,8 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 	if (IS_ERR(skb))
 		goto out;
 
+	skb_set_inner_ipproto(skb, IPPROTO_IPIP);
+
 	ip_tunnel_xmit(skb, dev, tiph, tiph->protocol);
 	return NETDEV_TX_OK;
 
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index d4bd68d..793c0bb 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -746,7 +746,7 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow
 	}
 
 	n = ipv4_neigh_lookup(&rt->dst, NULL, &new_gw);
-	if (n) {
+	if (!IS_ERR(n)) {
 		if (!(n->nud_state & NUD_VALID)) {
 			neigh_event_send(n, NULL);
 		} else {
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index cf5e508..26a6f11 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2693,7 +2693,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 		break;
 #endif
 	case TCP_USER_TIMEOUT:
-		/* Cap the max timeout in ms TCP will retry/retrans
+		/* Cap the max time in ms TCP will retry or probe the window
 		 * before giving up and aborting (ETIMEDOUT) a connection.
 		 */
 		if (val < 0)
@@ -3172,7 +3172,7 @@ static int __init set_thash_entries(char *str)
 }
 __setup("thash_entries=", set_thash_entries);
 
-static void tcp_init_mem(void)
+static void __init tcp_init_mem(void)
 {
 	unsigned long limit = nr_free_buffer_pages() / 8;
 	limit = max(limit, 128UL);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index ee567e9..8d4eac7 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2110,10 +2110,7 @@ bool tcp_schedule_loss_probe(struct sock *sk)
 static bool skb_still_in_host_queue(const struct sock *sk,
 				    const struct sk_buff *skb)
 {
-	const struct sk_buff *fclone = skb + 1;
-
-	if (unlikely(skb->fclone == SKB_FCLONE_ORIG &&
-		     fclone->fclone == SKB_FCLONE_CLONE)) {
+	if (unlikely(skb_fclone_busy(skb))) {
 		NET_INC_STATS_BH(sock_net(sk),
 				 LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES);
 		return true;
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index b24360f..9b21ae8 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -52,7 +52,7 @@ static void tcp_write_err(struct sock *sk)
  *    limit.
  * 2. If we have strong memory pressure.
  */
-static int tcp_out_of_resources(struct sock *sk, int do_reset)
+static int tcp_out_of_resources(struct sock *sk, bool do_reset)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	int shift = 0;
@@ -72,7 +72,7 @@ static int tcp_out_of_resources(struct sock *sk, int do_reset)
 		if ((s32)(tcp_time_stamp - tp->lsndtime) <= TCP_TIMEWAIT_LEN ||
 		    /*  2. Window is closed. */
 		    (!tp->snd_wnd && !tp->packets_out))
-			do_reset = 1;
+			do_reset = true;
 		if (do_reset)
 			tcp_send_active_reset(sk, GFP_ATOMIC);
 		tcp_done(sk);
@@ -270,40 +270,41 @@ static void tcp_probe_timer(struct sock *sk)
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
 	int max_probes;
+	u32 start_ts;
 
 	if (tp->packets_out || !tcp_send_head(sk)) {
 		icsk->icsk_probes_out = 0;
 		return;
 	}
 
-	/* *WARNING* RFC 1122 forbids this
-	 *
-	 * It doesn't AFAIK, because we kill the retransmit timer -AK
-	 *
-	 * FIXME: We ought not to do it, Solaris 2.5 actually has fixing
-	 * this behaviour in Solaris down as a bug fix. [AC]
-	 *
-	 * Let me to explain. icsk_probes_out is zeroed by incoming ACKs
-	 * even if they advertise zero window. Hence, connection is killed only
-	 * if we received no ACKs for normal connection timeout. It is not killed
-	 * only because window stays zero for some time, window may be zero
-	 * until armageddon and even later. We are in full accordance
-	 * with RFCs, only probe timer combines both retransmission timeout
-	 * and probe timeout in one bottle.				--ANK
+	/* RFC 1122 4.2.2.17 requires the sender to stay open indefinitely as
+	 * long as the receiver continues to respond probes. We support this by
+	 * default and reset icsk_probes_out with incoming ACKs. But if the
+	 * socket is orphaned or the user specifies TCP_USER_TIMEOUT, we
+	 * kill the socket when the retry count and the time exceeds the
+	 * corresponding system limit. We also implement similar policy when
+	 * we use RTO to probe window in tcp_retransmit_timer().
 	 */
-	max_probes = sysctl_tcp_retries2;
+	start_ts = tcp_skb_timestamp(tcp_send_head(sk));
+	if (!start_ts)
+		skb_mstamp_get(&tcp_send_head(sk)->skb_mstamp);
+	else if (icsk->icsk_user_timeout &&
+		 (s32)(tcp_time_stamp - start_ts) > icsk->icsk_user_timeout)
+		goto abort;
 
+	max_probes = sysctl_tcp_retries2;
 	if (sock_flag(sk, SOCK_DEAD)) {
 		const int alive = inet_csk_rto_backoff(icsk, TCP_RTO_MAX) < TCP_RTO_MAX;
 
 		max_probes = tcp_orphan_retries(sk, alive);
-
-		if (tcp_out_of_resources(sk, alive || icsk->icsk_probes_out <= max_probes))
+		if (!alive && icsk->icsk_backoff >= max_probes)
+			goto abort;
+		if (tcp_out_of_resources(sk, true))
 			return;
 	}
 
 	if (icsk->icsk_probes_out > max_probes) {
-		tcp_write_err(sk);
+abort:		tcp_write_err(sk);
 	} else {
 		/* Only send another probe if we didn't close things up. */
 		tcp_send_probe0(sk);
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 19ebe6a..507310e 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -25,8 +25,11 @@ struct udp_offload_priv {
 	struct udp_offload_priv __rcu *next;
 };
 
-struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
-				       netdev_features_t features)
+static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
+	netdev_features_t features,
+	struct sk_buff *(*gso_inner_segment)(struct sk_buff *skb,
+					     netdev_features_t features),
+	__be16 new_protocol)
 {
 	struct sk_buff *segs = ERR_PTR(-EINVAL);
 	u16 mac_offset = skb->mac_header;
@@ -48,7 +51,7 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
 	skb_reset_mac_header(skb);
 	skb_set_network_header(skb, skb_inner_network_offset(skb));
 	skb->mac_len = skb_inner_network_offset(skb);
-	skb->protocol = htons(ETH_P_TEB);
+	skb->protocol = new_protocol;
 
 	need_csum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM);
 	if (need_csum)
@@ -56,7 +59,7 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
 
 	/* segment inner packet. */
 	enc_features = skb->dev->hw_enc_features & netif_skb_features(skb);
-	segs = skb_mac_gso_segment(skb, enc_features);
+	segs = gso_inner_segment(skb, enc_features);
 	if (IS_ERR_OR_NULL(segs)) {
 		skb_gso_error_unwind(skb, protocol, tnl_hlen, mac_offset,
 				     mac_len);
@@ -101,6 +104,44 @@ out:
 	return segs;
 }
 
+struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
+				       netdev_features_t features,
+				       bool is_ipv6)
+{
+	__be16 protocol = skb->protocol;
+	const struct net_offload **offloads;
+	const struct net_offload *ops;
+	struct sk_buff *segs = ERR_PTR(-EINVAL);
+	struct sk_buff *(*gso_inner_segment)(struct sk_buff *skb,
+					     netdev_features_t features);
+
+	rcu_read_lock();
+
+	switch (skb->inner_protocol_type) {
+	case ENCAP_TYPE_ETHER:
+		protocol = skb->inner_protocol;
+		gso_inner_segment = skb_mac_gso_segment;
+		break;
+	case ENCAP_TYPE_IPPROTO:
+		offloads = is_ipv6 ? inet6_offloads : inet_offloads;
+		ops = rcu_dereference(offloads[skb->inner_ipproto]);
+		if (!ops || !ops->callbacks.gso_segment)
+			goto out_unlock;
+		gso_inner_segment = ops->callbacks.gso_segment;
+		break;
+	default:
+		goto out_unlock;
+	}
+
+	segs = __skb_udp_tunnel_segment(skb, features, gso_inner_segment,
+					protocol);
+
+out_unlock:
+	rcu_read_unlock();
+
+	return segs;
+}
+
 static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
 					 netdev_features_t features)
 {
@@ -113,7 +154,7 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
 	if (skb->encapsulation &&
 	    (skb_shinfo(skb)->gso_type &
 	     (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))) {
-		segs = skb_udp_tunnel_segment(skb, features);
+		segs = skb_udp_tunnel_segment(skb, features, false);
 		goto out;
 	}
 
@@ -293,6 +334,7 @@ static struct sk_buff **udp4_gro_receive(struct sk_buff **head,
 		skb_gro_checksum_try_convert(skb, IPPROTO_UDP, uh->check,
 					     inet_gro_compute_pseudo);
 skip:
+	NAPI_GRO_CB(skb)->is_ipv6 = 0;
 	return udp_gro_receive(head, skb, uh);
 
 flush: