From aaea4ed74d71dda1cf2cc19c206552d537201452 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sat, 7 Jul 2012 20:32:12 +0300 Subject: ipvs: ip_vs_ftp depends on nf_conntrack_ftp helper The FTP application indirectly depends on the nf_conntrack_ftp helper for proper NAT support. If the module is not loaded, IPVS can resize the packets for the command connection, eg. PASV response but the SEQ adjustment logic in ipv4_confirm is not called without helper. Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net/netfilter/ipvs') diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig index f987138..8b2cffd 100644 --- a/net/netfilter/ipvs/Kconfig +++ b/net/netfilter/ipvs/Kconfig @@ -250,7 +250,8 @@ comment 'IPVS application helper' config IP_VS_FTP tristate "FTP protocol helper" - depends on IP_VS_PROTO_TCP && NF_CONNTRACK && NF_NAT + depends on IP_VS_PROTO_TCP && NF_CONNTRACK && NF_NAT && \ + NF_CONNTRACK_FTP select IP_VS_NFCT ---help--- FTP is a protocol that transfers IP address and/or port number in -- cgit v1.1 From be97fdb5fbcc828240c51769cd28cba609158703 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Thu, 12 Jul 2012 23:06:20 +0300 Subject: ipvs: generalize app registration in netns Get rid of the ftp_app pointer and allow applications to be registered without adding fields in the netns_ipvs structure. v2: fix coding style as suggested by Pablo Neira Ayuso Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_app.c | 58 ++++++++++++++++++++++++++++++------------ net/netfilter/ipvs/ip_vs_ftp.c | 21 ++++----------- 2 files changed, 47 insertions(+), 32 deletions(-) (limited to 'net/netfilter/ipvs') diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c index 64f9e8f..9713e6e 100644 --- a/net/netfilter/ipvs/ip_vs_app.c +++ b/net/netfilter/ipvs/ip_vs_app.c @@ -180,22 +180,38 @@ register_ip_vs_app_inc(struct net *net, struct ip_vs_app *app, __u16 proto, } -/* - * ip_vs_app registration routine - */ -int register_ip_vs_app(struct net *net, struct ip_vs_app *app) +/* Register application for netns */ +struct ip_vs_app *register_ip_vs_app(struct net *net, struct ip_vs_app *app) { struct netns_ipvs *ipvs = net_ipvs(net); - /* increase the module use count */ - ip_vs_use_count_inc(); + struct ip_vs_app *a; + int err = 0; + + if (!ipvs) + return ERR_PTR(-ENOENT); mutex_lock(&__ip_vs_app_mutex); - list_add(&app->a_list, &ipvs->app_list); + list_for_each_entry(a, &ipvs->app_list, a_list) { + if (!strcmp(app->name, a->name)) { + err = -EEXIST; + goto out_unlock; + } + } + a = kmemdup(app, sizeof(*app), GFP_KERNEL); + if (!a) { + err = -ENOMEM; + goto out_unlock; + } + INIT_LIST_HEAD(&a->incs_list); + list_add(&a->a_list, &ipvs->app_list); + /* increase the module use count */ + ip_vs_use_count_inc(); +out_unlock: mutex_unlock(&__ip_vs_app_mutex); - return 0; + return err ? ERR_PTR(err) : a; } @@ -205,20 +221,29 @@ int register_ip_vs_app(struct net *net, struct ip_vs_app *app) */ void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app) { - struct ip_vs_app *inc, *nxt; + struct netns_ipvs *ipvs = net_ipvs(net); + struct ip_vs_app *a, *anxt, *inc, *nxt; + + if (!ipvs) + return; mutex_lock(&__ip_vs_app_mutex); - list_for_each_entry_safe(inc, nxt, &app->incs_list, a_list) { - ip_vs_app_inc_release(net, inc); - } + list_for_each_entry_safe(a, anxt, &ipvs->app_list, a_list) { + if (app && strcmp(app->name, a->name)) + continue; + list_for_each_entry_safe(inc, nxt, &a->incs_list, a_list) { + ip_vs_app_inc_release(net, inc); + } - list_del(&app->a_list); + list_del(&a->a_list); + kfree(a); - mutex_unlock(&__ip_vs_app_mutex); + /* decrease the module use count */ + ip_vs_use_count_dec(); + } - /* decrease the module use count */ - ip_vs_use_count_dec(); + mutex_unlock(&__ip_vs_app_mutex); } @@ -586,5 +611,6 @@ int __net_init ip_vs_app_net_init(struct net *net) void __net_exit ip_vs_app_net_cleanup(struct net *net) { + unregister_ip_vs_app(net, NULL /* all */); proc_net_remove(net, "ip_vs_app"); } diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c index b20b29c..ad70b7e 100644 --- a/net/netfilter/ipvs/ip_vs_ftp.c +++ b/net/netfilter/ipvs/ip_vs_ftp.c @@ -441,16 +441,10 @@ static int __net_init __ip_vs_ftp_init(struct net *net) if (!ipvs) return -ENOENT; - app = kmemdup(&ip_vs_ftp, sizeof(struct ip_vs_app), GFP_KERNEL); - if (!app) - return -ENOMEM; - INIT_LIST_HEAD(&app->a_list); - INIT_LIST_HEAD(&app->incs_list); - ipvs->ftp_app = app; - ret = register_ip_vs_app(net, app); - if (ret) - goto err_exit; + app = register_ip_vs_app(net, &ip_vs_ftp); + if (IS_ERR(app)) + return PTR_ERR(app); for (i = 0; i < ports_count; i++) { if (!ports[i]) @@ -464,9 +458,7 @@ static int __net_init __ip_vs_ftp_init(struct net *net) return 0; err_unreg: - unregister_ip_vs_app(net, app); -err_exit: - kfree(ipvs->ftp_app); + unregister_ip_vs_app(net, &ip_vs_ftp); return ret; } /* @@ -474,10 +466,7 @@ err_exit: */ static void __ip_vs_ftp_exit(struct net *net) { - struct netns_ipvs *ipvs = net_ipvs(net); - - unregister_ip_vs_app(net, ipvs->ftp_app); - kfree(ipvs->ftp_app); + unregister_ip_vs_app(net, &ip_vs_ftp); } static struct pernet_operations ip_vs_ftp_ops = { -- cgit v1.1 From 2b2d280817bd576e97ccd243b9b3a344d11ddd11 Mon Sep 17 00:00:00 2001 From: Claudiu Ghioc Date: Wed, 18 Jul 2012 12:10:22 +0300 Subject: ipvs: fixed sparse warning Removed the following sparse warnings, wether CONFIG_SYSCTL is defined or not: * warning: symbol 'ip_vs_control_net_init_sysctl' was not declared. Should it be static? * warning: symbol 'ip_vs_control_net_cleanup_sysctl' was not declared. Should it be static? Signed-off-by: Claudiu Ghioc Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_ctl.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net/netfilter/ipvs') diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 84444dd..d6d5cca 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -3675,7 +3675,7 @@ static void ip_vs_genl_unregister(void) * per netns intit/exit func. */ #ifdef CONFIG_SYSCTL -int __net_init ip_vs_control_net_init_sysctl(struct net *net) +static int __net_init ip_vs_control_net_init_sysctl(struct net *net) { int idx; struct netns_ipvs *ipvs = net_ipvs(net); @@ -3743,7 +3743,7 @@ int __net_init ip_vs_control_net_init_sysctl(struct net *net) return 0; } -void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net) +static void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); @@ -3754,8 +3754,8 @@ void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net) #else -int __net_init ip_vs_control_net_init_sysctl(struct net *net) { return 0; } -void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net) { } +static int __net_init ip_vs_control_net_init_sysctl(struct net *net) { return 0; } +static void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net) { } #endif -- cgit v1.1 From f2edb9f7706dcb2c0d9a362b2ba849efe3a97f5e Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Fri, 20 Jul 2012 11:59:52 +0300 Subject: ipvs: implement passive PMTUD for IPIP packets IPVS is missing the logic to update PMTU in routing for its IPIP packets. We monitor the dst_mtu and can return FRAG_NEEDED messages but if the tunneled packets get ICMP error we can not rely on other traffic to save the lowest MTU. The following patch adds ICMP handling for IPIP packets in incoming direction, from some remote host to our local IP used as saddr in the outer header. By this way we can forward any related ICMP traffic if it is for IPVS TUN connection. For the special case of PMTUD we update the routing and if client requested DF we can forward the error. To properly update the routing we have to bind the cached route (dest->dst_cache) to the selected saddr because ipv4_update_pmtu uses saddr for dst lookup. Add IP_VS_RT_MODE_CONNECT flag to force such binding with second route. Update ip_vs_tunnel_xmit to provide IP_VS_RT_MODE_CONNECT and change the code to copy DF. For now we prefer not to force PMTU discovery (outer DF=1) because we don't have configuration option to enable or disable PMTUD. As we do not keep any packets to resend, we prefer not to play games with packets without DF bit because the sender is not informed when they are rejected. Also, change ops->update_pmtu to be called only for local clients because there is no point to update MTU for input routes, in our case skb->dst->dev is lo. It seems the code is copied from ipip.c where the skb dst points to tunnel device. Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_core.c | 76 +++++++++++++++++++++++++++++++++++++-- net/netfilter/ipvs/ip_vs_xmit.c | 79 ++++++++++++++++++++++++++++------------- 2 files changed, 128 insertions(+), 27 deletions(-) (limited to 'net/netfilter/ipvs') diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index b54ecce..58918e2 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1303,7 +1303,8 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) struct ip_vs_conn *cp; struct ip_vs_protocol *pp; struct ip_vs_proto_data *pd; - unsigned int offset, ihl, verdict; + unsigned int offset, offset2, ihl, verdict; + bool ipip; *related = 1; @@ -1345,6 +1346,21 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) net = skb_net(skb); + /* Special case for errors for IPIP packets */ + ipip = false; + if (cih->protocol == IPPROTO_IPIP) { + if (unlikely(cih->frag_off & htons(IP_OFFSET))) + return NF_ACCEPT; + /* Error for our IPIP must arrive at LOCAL_IN */ + if (!(skb_rtable(skb)->rt_flags & RTCF_LOCAL)) + return NF_ACCEPT; + offset += cih->ihl * 4; + cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph); + if (cih == NULL) + return NF_ACCEPT; /* The packet looks wrong, ignore */ + ipip = true; + } + pd = ip_vs_proto_data_get(net, cih->protocol); if (!pd) return NF_ACCEPT; @@ -1358,11 +1374,14 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) IP_VS_DBG_PKT(11, AF_INET, pp, skb, offset, "Checking incoming ICMP for"); + offset2 = offset; offset += cih->ihl * 4; ip_vs_fill_iphdr(AF_INET, cih, &ciph); - /* The embedded headers contain source and dest in reverse order */ - cp = pp->conn_in_get(AF_INET, skb, &ciph, offset, 1); + /* The embedded headers contain source and dest in reverse order. + * For IPIP this is error for request, not for reply. + */ + cp = pp->conn_in_get(AF_INET, skb, &ciph, offset, ipip ? 0 : 1); if (!cp) return NF_ACCEPT; @@ -1376,6 +1395,57 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) goto out; } + if (ipip) { + __be32 info = ic->un.gateway; + + /* Update the MTU */ + if (ic->type == ICMP_DEST_UNREACH && + ic->code == ICMP_FRAG_NEEDED) { + struct ip_vs_dest *dest = cp->dest; + u32 mtu = ntohs(ic->un.frag.mtu); + + /* Strip outer IP and ICMP, go to IPIP header */ + __skb_pull(skb, ihl + sizeof(_icmph)); + offset2 -= ihl + sizeof(_icmph); + skb_reset_network_header(skb); + IP_VS_DBG(12, "ICMP for IPIP %pI4->%pI4: mtu=%u\n", + &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr, mtu); + rcu_read_lock(); + ipv4_update_pmtu(skb, dev_net(skb->dev), + mtu, 0, 0, 0, 0); + rcu_read_unlock(); + /* Client uses PMTUD? */ + if (!(cih->frag_off & htons(IP_DF))) + goto ignore_ipip; + /* Prefer the resulting PMTU */ + if (dest) { + spin_lock(&dest->dst_lock); + if (dest->dst_cache) + mtu = dst_mtu(dest->dst_cache); + spin_unlock(&dest->dst_lock); + } + if (mtu > 68 + sizeof(struct iphdr)) + mtu -= sizeof(struct iphdr); + info = htonl(mtu); + } + /* Strip outer IP, ICMP and IPIP, go to IP header of + * original request. + */ + __skb_pull(skb, offset2); + skb_reset_network_header(skb); + IP_VS_DBG(12, "Sending ICMP for %pI4->%pI4: t=%u, c=%u, i=%u\n", + &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr, + ic->type, ic->code, ntohl(info)); + icmp_send(skb, ic->type, ic->code, info); + /* ICMP can be shorter but anyways, account it */ + ip_vs_out_stats(cp, skb); + +ignore_ipip: + consume_skb(skb); + verdict = NF_STOLEN; + goto out; + } + /* do the statistics and put it back */ ip_vs_in_stats(cp, skb); if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol) diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 65b616a..c2275ba 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -49,6 +49,7 @@ enum { IP_VS_RT_MODE_RDR = 4, /* Allow redirect from remote daddr to * local */ + IP_VS_RT_MODE_CONNECT = 8, /* Always bind route to saddr */ }; /* @@ -84,6 +85,42 @@ __ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos) return dst; } +/* Get route to daddr, update *saddr, optionally bind route to saddr */ +static struct rtable *do_output_route4(struct net *net, __be32 daddr, + u32 rtos, int rt_mode, __be32 *saddr) +{ + struct flowi4 fl4; + struct rtable *rt; + int loop = 0; + + memset(&fl4, 0, sizeof(fl4)); + fl4.daddr = daddr; + fl4.saddr = (rt_mode & IP_VS_RT_MODE_CONNECT) ? *saddr : 0; + fl4.flowi4_tos = rtos; + +retry: + rt = ip_route_output_key(net, &fl4); + if (IS_ERR(rt)) { + /* Invalid saddr ? */ + if (PTR_ERR(rt) == -EINVAL && *saddr && + rt_mode & IP_VS_RT_MODE_CONNECT && !loop) { + *saddr = 0; + flowi4_update_output(&fl4, 0, rtos, daddr, 0); + goto retry; + } + IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", &daddr); + return NULL; + } else if (!*saddr && rt_mode & IP_VS_RT_MODE_CONNECT && fl4.saddr) { + ip_rt_put(rt); + *saddr = fl4.saddr; + flowi4_update_output(&fl4, 0, rtos, daddr, fl4.saddr); + loop++; + goto retry; + } + *saddr = fl4.saddr; + return rt; +} + /* Get route to destination or remote server */ static struct rtable * __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest, @@ -98,20 +135,13 @@ __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest, spin_lock(&dest->dst_lock); if (!(rt = (struct rtable *) __ip_vs_dst_check(dest, rtos))) { - struct flowi4 fl4; - - memset(&fl4, 0, sizeof(fl4)); - fl4.daddr = dest->addr.ip; - fl4.flowi4_tos = rtos; - rt = ip_route_output_key(net, &fl4); - if (IS_ERR(rt)) { + rt = do_output_route4(net, dest->addr.ip, rtos, + rt_mode, &dest->dst_saddr.ip); + if (!rt) { spin_unlock(&dest->dst_lock); - IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", - &dest->addr.ip); return NULL; } __ip_vs_dst_set(dest, rtos, dst_clone(&rt->dst), 0); - dest->dst_saddr.ip = fl4.saddr; IP_VS_DBG(10, "new dst %pI4, src %pI4, refcnt=%d, " "rtos=%X\n", &dest->addr.ip, &dest->dst_saddr.ip, @@ -122,19 +152,17 @@ __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest, *ret_saddr = dest->dst_saddr.ip; spin_unlock(&dest->dst_lock); } else { - struct flowi4 fl4; + __be32 saddr = htonl(INADDR_ANY); - memset(&fl4, 0, sizeof(fl4)); - fl4.daddr = daddr; - fl4.flowi4_tos = rtos; - rt = ip_route_output_key(net, &fl4); - if (IS_ERR(rt)) { - IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", - &daddr); + /* For such unconfigured boxes avoid many route lookups + * for performance reasons because we do not remember saddr + */ + rt_mode &= ~IP_VS_RT_MODE_CONNECT; + rt = do_output_route4(net, daddr, rtos, rt_mode, &saddr); + if (!rt) return NULL; - } if (ret_saddr) - *ret_saddr = fl4.saddr; + *ret_saddr = saddr; } local = rt->rt_flags & RTCF_LOCAL; @@ -331,6 +359,7 @@ ip_vs_dst_reset(struct ip_vs_dest *dest) old_dst = dest->dst_cache; dest->dst_cache = NULL; dst_release(old_dst); + dest->dst_saddr.ip = 0; } #define IP_VS_XMIT_TUNNEL(skb, cp) \ @@ -771,7 +800,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, struct net_device *tdev; /* Device to other host */ struct iphdr *old_iph = ip_hdr(skb); u8 tos = old_iph->tos; - __be16 df = old_iph->frag_off; + __be16 df; struct iphdr *iph; /* Our new IP header */ unsigned int max_headroom; /* The extra header space needed */ int mtu; @@ -781,7 +810,8 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, RT_TOS(tos), IP_VS_RT_MODE_LOCAL | - IP_VS_RT_MODE_NON_LOCAL, + IP_VS_RT_MODE_NON_LOCAL | + IP_VS_RT_MODE_CONNECT, &saddr))) goto tx_error_icmp; if (rt->rt_flags & RTCF_LOCAL) { @@ -796,10 +826,11 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__); goto tx_error_put; } - if (skb_dst(skb)) + if (rt_is_output_route(skb_rtable(skb))) skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); - df |= (old_iph->frag_off & htons(IP_DF)); + /* Copy DF, reset fragment offset and MF */ + df = old_iph->frag_off & htons(IP_DF); if ((old_iph->frag_off & htons(IP_DF) && mtu < ntohs(old_iph->tot_len) && !skb_is_gso(skb))) { -- cgit v1.1 From 3654e61137db891f5312e6dd813b961484b5fdf3 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Fri, 20 Jul 2012 11:59:53 +0300 Subject: ipvs: add pmtu_disc option to disable IP DF for TUN packets Disabling PMTU discovery can increase the output packet rate but some users have enough resources and prefer to fragment than to drop traffic. By default, we copy the DF bit but if pmtu_disc is disabled we do not send FRAG_NEEDED messages anymore. Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_ctl.c | 8 ++++++++ net/netfilter/ipvs/ip_vs_xmit.c | 6 +++--- 2 files changed, 11 insertions(+), 3 deletions(-) (limited to 'net/netfilter/ipvs') diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index d6d5cca..03d3fc6 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1801,6 +1801,12 @@ static struct ctl_table vs_vars[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "pmtu_disc", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, #ifdef CONFIG_IP_VS_DEBUG { .procname = "debug_level", @@ -3726,6 +3732,8 @@ static int __net_init ip_vs_control_net_init_sysctl(struct net *net) ipvs->sysctl_sync_retries = clamp_t(int, DEFAULT_SYNC_RETRIES, 0, 3); tbl[idx++].data = &ipvs->sysctl_sync_retries; tbl[idx++].data = &ipvs->sysctl_nat_icmp_send; + ipvs->sysctl_pmtu_disc = 1; + tbl[idx++].data = &ipvs->sysctl_pmtu_disc; ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl); diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index c2275ba..543a554 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -795,6 +795,7 @@ int ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp) { + struct netns_ipvs *ipvs = net_ipvs(skb_net(skb)); struct rtable *rt; /* Route to the other host */ __be32 saddr; /* Source for tunnel */ struct net_device *tdev; /* Device to other host */ @@ -830,10 +831,9 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); /* Copy DF, reset fragment offset and MF */ - df = old_iph->frag_off & htons(IP_DF); + df = sysctl_pmtu_disc(ipvs) ? old_iph->frag_off & htons(IP_DF) : 0; - if ((old_iph->frag_off & htons(IP_DF) && - mtu < ntohs(old_iph->tot_len) && !skb_is_gso(skb))) { + if (df && mtu < ntohs(old_iph->tot_len) && !skb_is_gso(skb)) { icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); IP_VS_DBG_RL("%s(): frag needed\n", __func__); goto tx_error_put; -- cgit v1.1