summaryrefslogtreecommitdiffstats
path: root/net/netfilter/ipvs/ip_vs_core.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/netfilter/ipvs/ip_vs_core.c')
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c586
1 files changed, 413 insertions, 173 deletions
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index e5fef7a..b4e51e9 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -48,6 +48,7 @@
#ifdef CONFIG_IP_VS_IPV6
#include <net/ipv6.h>
#include <linux/netfilter_ipv6.h>
+#include <net/ip6_route.h>
#endif
#include <net/ip_vs.h>
@@ -342,7 +343,8 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
* Protocols supported: TCP, UDP
*/
struct ip_vs_conn *
-ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb)
+ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
+ struct ip_vs_protocol *pp, int *ignored)
{
struct ip_vs_conn *cp = NULL;
struct ip_vs_iphdr iph;
@@ -350,16 +352,44 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb)
__be16 _ports[2], *pptr;
unsigned int flags;
+ *ignored = 1;
ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);
pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports);
if (pptr == NULL)
return NULL;
/*
+ * FTPDATA needs this check when using local real server.
+ * Never schedule Active FTPDATA connections from real server.
+ * For LVS-NAT they must be already created. For other methods
+ * with persistence the connection is created on SYN+ACK.
+ */
+ if (pptr[0] == FTPDATA) {
+ IP_VS_DBG_PKT(12, svc->af, pp, skb, 0,
+ "Not scheduling FTPDATA");
+ return NULL;
+ }
+
+ /*
+ * Do not schedule replies from local real server. It is risky
+ * for fwmark services but mostly for persistent services.
+ */
+ if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
+ (svc->flags & IP_VS_SVC_F_PERSISTENT || svc->fwmark) &&
+ (cp = pp->conn_in_get(svc->af, skb, pp, &iph, iph.len, 1))) {
+ IP_VS_DBG_PKT(12, svc->af, pp, skb, 0,
+ "Not scheduling reply for existing connection");
+ __ip_vs_conn_put(cp);
+ return NULL;
+ }
+
+ /*
* Persistent service
*/
- if (svc->flags & IP_VS_SVC_F_PERSISTENT)
+ if (svc->flags & IP_VS_SVC_F_PERSISTENT) {
+ *ignored = 0;
return ip_vs_sched_persist(svc, skb, pptr);
+ }
/*
* Non-persistent service
@@ -372,6 +402,8 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb)
return NULL;
}
+ *ignored = 0;
+
dest = svc->scheduler->schedule(svc, skb);
if (dest == NULL) {
IP_VS_DBG(1, "Schedule: no dest found.\n");
@@ -498,35 +530,32 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
* ICMP_PORT_UNREACH is sent here no matter it is TCP/UDP. --WZ
*/
#ifdef CONFIG_IP_VS_IPV6
- if (svc->af == AF_INET6)
+ if (svc->af == AF_INET6) {
+ if (!skb->dev) {
+ struct net *net = dev_net(skb_dst(skb)->dev);
+
+ skb->dev = net->loopback_dev;
+ }
icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
- else
+ } else
#endif
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
return NF_DROP;
}
-/*
- * It is hooked before NF_IP_PRI_NAT_SRC at the NF_INET_POST_ROUTING
- * chain and is used to avoid double NAT and confirmation when we do
- * not want to keep the conntrack structure
- */
-static unsigned int ip_vs_post_routing(unsigned int hooknum,
- struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
+__sum16 ip_vs_checksum_complete(struct sk_buff *skb, int offset)
{
- if (!skb->ipvs_property)
- return NF_ACCEPT;
- /* The packet was sent from IPVS, exit this chain */
- return NF_STOP;
+ return csum_fold(skb_checksum(skb, offset, skb->len - offset, 0));
}
-__sum16 ip_vs_checksum_complete(struct sk_buff *skb, int offset)
+static inline enum ip_defrag_users ip_vs_defrag_user(unsigned int hooknum)
{
- return csum_fold(skb_checksum(skb, offset, skb->len - offset, 0));
+ if (NF_INET_LOCAL_IN == hooknum)
+ return IP_DEFRAG_VS_IN;
+ if (NF_INET_FORWARD == hooknum)
+ return IP_DEFRAG_VS_FWD;
+ return IP_DEFRAG_VS_OUT;
}
static inline int ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user)
@@ -589,10 +618,10 @@ void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp,
skb->ip_summed = CHECKSUM_UNNECESSARY;
if (inout)
- IP_VS_DBG_PKT(11, pp, skb, (void *)ciph - (void *)iph,
+ IP_VS_DBG_PKT(11, AF_INET, pp, skb, (void *)ciph - (void *)iph,
"Forwarding altered outgoing ICMP");
else
- IP_VS_DBG_PKT(11, pp, skb, (void *)ciph - (void *)iph,
+ IP_VS_DBG_PKT(11, AF_INET, pp, skb, (void *)ciph - (void *)iph,
"Forwarding altered incoming ICMP");
}
@@ -634,11 +663,13 @@ void ip_vs_nat_icmp_v6(struct sk_buff *skb, struct ip_vs_protocol *pp,
skb->ip_summed = CHECKSUM_PARTIAL;
if (inout)
- IP_VS_DBG_PKT(11, pp, skb, (void *)ciph - (void *)iph,
- "Forwarding altered outgoing ICMPv6");
+ IP_VS_DBG_PKT(11, AF_INET6, pp, skb,
+ (void *)ciph - (void *)iph,
+ "Forwarding altered outgoing ICMPv6");
else
- IP_VS_DBG_PKT(11, pp, skb, (void *)ciph - (void *)iph,
- "Forwarding altered incoming ICMPv6");
+ IP_VS_DBG_PKT(11, AF_INET6, pp, skb,
+ (void *)ciph - (void *)iph,
+ "Forwarding altered incoming ICMPv6");
}
#endif
@@ -679,11 +710,23 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
#endif
ip_vs_nat_icmp(skb, pp, cp, 1);
+#ifdef CONFIG_IP_VS_IPV6
+ if (af == AF_INET6) {
+ if (sysctl_ip_vs_snat_reroute && ip6_route_me_harder(skb) != 0)
+ goto out;
+ } else
+#endif
+ if ((sysctl_ip_vs_snat_reroute ||
+ skb_rtable(skb)->rt_flags & RTCF_LOCAL) &&
+ ip_route_me_harder(skb, RTN_LOCAL) != 0)
+ goto out;
+
/* do the statistics and put it back */
ip_vs_out_stats(cp, skb);
+ skb->ipvs_property = 1;
if (!(cp->flags & IP_VS_CONN_F_NFCT))
- skb->ipvs_property = 1;
+ ip_vs_notrack(skb);
else
ip_vs_update_conntrack(skb, cp, 0);
verdict = NF_ACCEPT;
@@ -699,7 +742,8 @@ out:
* Find any that might be relevant, check against existing connections.
* Currently handles error types - unreachable, quench, ttl exceeded.
*/
-static int ip_vs_out_icmp(struct sk_buff *skb, int *related)
+static int ip_vs_out_icmp(struct sk_buff *skb, int *related,
+ unsigned int hooknum)
{
struct iphdr *iph;
struct icmphdr _icmph, *ic;
@@ -714,7 +758,7 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related)
/* reassemble IP fragments */
if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
- if (ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT))
+ if (ip_vs_gather_frags(skb, ip_vs_defrag_user(hooknum)))
return NF_STOLEN;
}
@@ -757,7 +801,8 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related)
pp->dont_defrag))
return NF_ACCEPT;
- IP_VS_DBG_PKT(11, pp, skb, offset, "Checking outgoing ICMP for");
+ IP_VS_DBG_PKT(11, AF_INET, pp, skb, offset,
+ "Checking outgoing ICMP for");
offset += cih->ihl * 4;
@@ -773,7 +818,8 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related)
}
#ifdef CONFIG_IP_VS_IPV6
-static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related)
+static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related,
+ unsigned int hooknum)
{
struct ipv6hdr *iph;
struct icmp6hdr _icmph, *ic;
@@ -789,7 +835,7 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related)
/* reassemble IP fragments */
if (ipv6_hdr(skb)->nexthdr == IPPROTO_FRAGMENT) {
- if (ip_vs_gather_frags_v6(skb, IP_DEFRAG_VS_OUT))
+ if (ip_vs_gather_frags_v6(skb, ip_vs_defrag_user(hooknum)))
return NF_STOLEN;
}
@@ -832,7 +878,8 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related)
if (unlikely(cih->nexthdr == IPPROTO_FRAGMENT && pp->dont_defrag))
return NF_ACCEPT;
- IP_VS_DBG_PKT(11, pp, skb, offset, "Checking outgoing ICMPv6 for");
+ IP_VS_DBG_PKT(11, AF_INET6, pp, skb, offset,
+ "Checking outgoing ICMPv6 for");
offset += sizeof(struct ipv6hdr);
@@ -880,7 +927,7 @@ static unsigned int
handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
struct ip_vs_conn *cp, int ihl)
{
- IP_VS_DBG_PKT(11, pp, skb, 0, "Outgoing packet");
+ IP_VS_DBG_PKT(11, af, pp, skb, 0, "Outgoing packet");
if (!skb_make_writable(skb, ihl))
goto drop;
@@ -914,23 +961,24 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
* if it came from this machine itself. So re-compute
* the routing information.
*/
- if (sysctl_ip_vs_snat_reroute) {
#ifdef CONFIG_IP_VS_IPV6
- if (af == AF_INET6) {
- if (ip6_route_me_harder(skb) != 0)
- goto drop;
- } else
+ if (af == AF_INET6) {
+ if (sysctl_ip_vs_snat_reroute && ip6_route_me_harder(skb) != 0)
+ goto drop;
+ } else
#endif
- if (ip_route_me_harder(skb, RTN_LOCAL) != 0)
- goto drop;
- }
+ if ((sysctl_ip_vs_snat_reroute ||
+ skb_rtable(skb)->rt_flags & RTCF_LOCAL) &&
+ ip_route_me_harder(skb, RTN_LOCAL) != 0)
+ goto drop;
- IP_VS_DBG_PKT(10, pp, skb, 0, "After SNAT");
+ IP_VS_DBG_PKT(10, af, pp, skb, 0, "After SNAT");
ip_vs_out_stats(cp, skb);
ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp);
+ skb->ipvs_property = 1;
if (!(cp->flags & IP_VS_CONN_F_NFCT))
- skb->ipvs_property = 1;
+ ip_vs_notrack(skb);
else
ip_vs_update_conntrack(skb, cp, 0);
ip_vs_conn_put(cp);
@@ -946,53 +994,54 @@ drop:
}
/*
- * It is hooked at the NF_INET_FORWARD chain, used only for VS/NAT.
* Check if outgoing packet belongs to the established ip_vs_conn.
*/
static unsigned int
-ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
- const struct net_device *in, const struct net_device *out,
- int (*okfn)(struct sk_buff *))
+ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
{
struct ip_vs_iphdr iph;
struct ip_vs_protocol *pp;
struct ip_vs_conn *cp;
- int af;
EnterFunction(11);
- af = (skb->protocol == htons(ETH_P_IP)) ? AF_INET : AF_INET6;
-
+ /* Already marked as IPVS request or reply? */
if (skb->ipvs_property)
return NF_ACCEPT;
+ /* Bad... Do not break raw sockets */
+ if (unlikely(skb->sk != NULL && hooknum == NF_INET_LOCAL_OUT &&
+ af == AF_INET)) {
+ struct sock *sk = skb->sk;
+ struct inet_sock *inet = inet_sk(skb->sk);
+
+ if (inet && sk->sk_family == PF_INET && inet->nodefrag)
+ return NF_ACCEPT;
+ }
+
+ if (unlikely(!skb_dst(skb)))
+ return NF_ACCEPT;
+
ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6) {
if (unlikely(iph.protocol == IPPROTO_ICMPV6)) {
- int related, verdict = ip_vs_out_icmp_v6(skb, &related);
+ int related;
+ int verdict = ip_vs_out_icmp_v6(skb, &related,
+ hooknum);
- if (related) {
- if (sysctl_ip_vs_snat_reroute &&
- NF_ACCEPT == verdict &&
- ip6_route_me_harder(skb))
- verdict = NF_DROP;
+ if (related)
return verdict;
- }
ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
}
} else
#endif
if (unlikely(iph.protocol == IPPROTO_ICMP)) {
- int related, verdict = ip_vs_out_icmp(skb, &related);
+ int related;
+ int verdict = ip_vs_out_icmp(skb, &related, hooknum);
- if (related) {
- if (sysctl_ip_vs_snat_reroute &&
- NF_ACCEPT == verdict &&
- ip_route_me_harder(skb, RTN_LOCAL))
- verdict = NF_DROP;
+ if (related)
return verdict;
- }
ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
}
@@ -1003,19 +1052,19 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
/* reassemble IP fragments */
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6) {
- if (unlikely(iph.protocol == IPPROTO_ICMPV6)) {
- int related, verdict = ip_vs_out_icmp_v6(skb, &related);
-
- if (related)
- return verdict;
-
- ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
+ if (ipv6_hdr(skb)->nexthdr == IPPROTO_FRAGMENT) {
+ if (ip_vs_gather_frags_v6(skb,
+ ip_vs_defrag_user(hooknum)))
+ return NF_STOLEN;
}
+
+ ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
} else
#endif
if (unlikely(ip_hdr(skb)->frag_off & htons(IP_MF|IP_OFFSET) &&
!pp->dont_defrag)) {
- if (ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT))
+ if (ip_vs_gather_frags(skb,
+ ip_vs_defrag_user(hooknum)))
return NF_STOLEN;
ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
@@ -1026,55 +1075,123 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
*/
cp = pp->conn_out_get(af, skb, pp, &iph, iph.len, 0);
- if (unlikely(!cp)) {
- if (sysctl_ip_vs_nat_icmp_send &&
- (pp->protocol == IPPROTO_TCP ||
- pp->protocol == IPPROTO_UDP ||
- pp->protocol == IPPROTO_SCTP)) {
- __be16 _ports[2], *pptr;
-
- pptr = skb_header_pointer(skb, iph.len,
- sizeof(_ports), _ports);
- if (pptr == NULL)
- return NF_ACCEPT; /* Not for me */
- if (ip_vs_lookup_real_service(af, iph.protocol,
- &iph.saddr,
- pptr[0])) {
- /*
- * Notify the real server: there is no
- * existing entry if it is not RST
- * packet or not TCP packet.
- */
- if ((iph.protocol != IPPROTO_TCP &&
- iph.protocol != IPPROTO_SCTP)
- || ((iph.protocol == IPPROTO_TCP
- && !is_tcp_reset(skb, iph.len))
- || (iph.protocol == IPPROTO_SCTP
- && !is_sctp_abort(skb,
- iph.len)))) {
+ if (likely(cp))
+ return handle_response(af, skb, pp, cp, iph.len);
+ if (sysctl_ip_vs_nat_icmp_send &&
+ (pp->protocol == IPPROTO_TCP ||
+ pp->protocol == IPPROTO_UDP ||
+ pp->protocol == IPPROTO_SCTP)) {
+ __be16 _ports[2], *pptr;
+
+ pptr = skb_header_pointer(skb, iph.len,
+ sizeof(_ports), _ports);
+ if (pptr == NULL)
+ return NF_ACCEPT; /* Not for me */
+ if (ip_vs_lookup_real_service(af, iph.protocol,
+ &iph.saddr,
+ pptr[0])) {
+ /*
+ * Notify the real server: there is no
+ * existing entry if it is not RST
+ * packet or not TCP packet.
+ */
+ if ((iph.protocol != IPPROTO_TCP &&
+ iph.protocol != IPPROTO_SCTP)
+ || ((iph.protocol == IPPROTO_TCP
+ && !is_tcp_reset(skb, iph.len))
+ || (iph.protocol == IPPROTO_SCTP
+ && !is_sctp_abort(skb,
+ iph.len)))) {
#ifdef CONFIG_IP_VS_IPV6
- if (af == AF_INET6)
- icmpv6_send(skb,
- ICMPV6_DEST_UNREACH,
- ICMPV6_PORT_UNREACH,
- 0);
- else
+ if (af == AF_INET6) {
+ struct net *net =
+ dev_net(skb_dst(skb)->dev);
+
+ if (!skb->dev)
+ skb->dev = net->loopback_dev;
+ icmpv6_send(skb,
+ ICMPV6_DEST_UNREACH,
+ ICMPV6_PORT_UNREACH,
+ 0);
+ } else
#endif
- icmp_send(skb,
- ICMP_DEST_UNREACH,
- ICMP_PORT_UNREACH, 0);
- return NF_DROP;
- }
+ icmp_send(skb,
+ ICMP_DEST_UNREACH,
+ ICMP_PORT_UNREACH, 0);
+ return NF_DROP;
}
}
- IP_VS_DBG_PKT(12, pp, skb, 0,
- "packet continues traversal as normal");
- return NF_ACCEPT;
}
+ IP_VS_DBG_PKT(12, af, pp, skb, 0,
+ "ip_vs_out: packet continues traversal as normal");
+ return NF_ACCEPT;
+}
+
+/*
+ * It is hooked at the NF_INET_FORWARD and NF_INET_LOCAL_IN chain,
+ * used only for VS/NAT.
+ * Check if packet is reply for established ip_vs_conn.
+ */
+static unsigned int
+ip_vs_reply4(unsigned int hooknum, struct sk_buff *skb,
+ const struct net_device *in, const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ return ip_vs_out(hooknum, skb, AF_INET);
+}
+
+/*
+ * It is hooked at the NF_INET_LOCAL_OUT chain, used only for VS/NAT.
+ * Check if packet is reply for established ip_vs_conn.
+ */
+static unsigned int
+ip_vs_local_reply4(unsigned int hooknum, struct sk_buff *skb,
+ const struct net_device *in, const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ unsigned int verdict;
- return handle_response(af, skb, pp, cp, iph.len);
+ /* Disable BH in LOCAL_OUT until all places are fixed */
+ local_bh_disable();
+ verdict = ip_vs_out(hooknum, skb, AF_INET);
+ local_bh_enable();
+ return verdict;
}
+#ifdef CONFIG_IP_VS_IPV6
+
+/*
+ * It is hooked at the NF_INET_FORWARD and NF_INET_LOCAL_IN chain,
+ * used only for VS/NAT.
+ * Check if packet is reply for established ip_vs_conn.
+ */
+static unsigned int
+ip_vs_reply6(unsigned int hooknum, struct sk_buff *skb,
+ const struct net_device *in, const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ return ip_vs_out(hooknum, skb, AF_INET6);
+}
+
+/*
+ * It is hooked at the NF_INET_LOCAL_OUT chain, used only for VS/NAT.
+ * Check if packet is reply for established ip_vs_conn.
+ */
+static unsigned int
+ip_vs_local_reply6(unsigned int hooknum, struct sk_buff *skb,
+ const struct net_device *in, const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ unsigned int verdict;
+
+ /* Disable BH in LOCAL_OUT until all places are fixed */
+ local_bh_disable();
+ verdict = ip_vs_out(hooknum, skb, AF_INET6);
+ local_bh_enable();
+ return verdict;
+}
+
+#endif
/*
* Handle ICMP messages in the outside-to-inside direction (incoming).
@@ -1098,8 +1215,7 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
/* reassemble IP fragments */
if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
- if (ip_vs_gather_frags(skb, hooknum == NF_INET_LOCAL_IN ?
- IP_DEFRAG_VS_IN : IP_DEFRAG_VS_FWD))
+ if (ip_vs_gather_frags(skb, ip_vs_defrag_user(hooknum)))
return NF_STOLEN;
}
@@ -1142,7 +1258,8 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
pp->dont_defrag))
return NF_ACCEPT;
- IP_VS_DBG_PKT(11, pp, skb, offset, "Checking incoming ICMP for");
+ IP_VS_DBG_PKT(11, AF_INET, pp, skb, offset,
+ "Checking incoming ICMP for");
offset += cih->ihl * 4;
@@ -1176,7 +1293,14 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol)
offset += 2 * sizeof(__u16);
verdict = ip_vs_icmp_xmit(skb, cp, pp, offset);
- /* do not touch skb anymore */
+ /* LOCALNODE from FORWARD hook is not supported */
+ if (verdict == NF_ACCEPT && hooknum == NF_INET_FORWARD &&
+ skb_rtable(skb)->rt_flags & RTCF_LOCAL) {
+ IP_VS_DBG(1, "%s(): "
+ "local delivery to %pI4 but in FORWARD\n",
+ __func__, &skb_rtable(skb)->rt_dst);
+ verdict = NF_DROP;
+ }
out:
__ip_vs_conn_put(cp);
@@ -1197,14 +1321,13 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
struct ip_vs_protocol *pp;
unsigned int offset, verdict;
union nf_inet_addr snet;
+ struct rt6_info *rt;
*related = 1;
/* reassemble IP fragments */
if (ipv6_hdr(skb)->nexthdr == IPPROTO_FRAGMENT) {
- if (ip_vs_gather_frags_v6(skb, hooknum == NF_INET_LOCAL_IN ?
- IP_DEFRAG_VS_IN :
- IP_DEFRAG_VS_FWD))
+ if (ip_vs_gather_frags_v6(skb, ip_vs_defrag_user(hooknum)))
return NF_STOLEN;
}
@@ -1247,7 +1370,8 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
if (unlikely(cih->nexthdr == IPPROTO_FRAGMENT && pp->dont_defrag))
return NF_ACCEPT;
- IP_VS_DBG_PKT(11, pp, skb, offset, "Checking incoming ICMPv6 for");
+ IP_VS_DBG_PKT(11, AF_INET6, pp, skb, offset,
+ "Checking incoming ICMPv6 for");
offset += sizeof(struct ipv6hdr);
@@ -1275,7 +1399,15 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
IPPROTO_SCTP == cih->nexthdr)
offset += 2 * sizeof(__u16);
verdict = ip_vs_icmp_xmit_v6(skb, cp, pp, offset);
- /* do not touch skb anymore */
+ /* LOCALNODE from FORWARD hook is not supported */
+ if (verdict == NF_ACCEPT && hooknum == NF_INET_FORWARD &&
+ (rt = (struct rt6_info *) skb_dst(skb)) &&
+ rt->rt6i_dev && rt->rt6i_dev->flags & IFF_LOOPBACK) {
+ IP_VS_DBG(1, "%s(): "
+ "local delivery to %pI6 but in FORWARD\n",
+ __func__, &rt->rt6i_dst);
+ verdict = NF_DROP;
+ }
__ip_vs_conn_put(cp);
@@ -1289,35 +1421,49 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
* and send it on its way...
*/
static unsigned int
-ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
- const struct net_device *in, const struct net_device *out,
- int (*okfn)(struct sk_buff *))
+ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
{
struct ip_vs_iphdr iph;
struct ip_vs_protocol *pp;
struct ip_vs_conn *cp;
- int ret, restart, af, pkts;
-
- af = (skb->protocol == htons(ETH_P_IP)) ? AF_INET : AF_INET6;
+ int ret, restart, pkts;
- ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
+ /* Already marked as IPVS request or reply? */
+ if (skb->ipvs_property)
+ return NF_ACCEPT;
/*
- * Big tappo: only PACKET_HOST, including loopback for local client
- * Don't handle local packets on IPv6 for now
+ * Big tappo:
+ * - remote client: only PACKET_HOST
+ * - route: used for struct net when skb->dev is unset
*/
- if (unlikely(skb->pkt_type != PACKET_HOST)) {
- IP_VS_DBG_BUF(12, "packet type=%d proto=%d daddr=%s ignored\n",
- skb->pkt_type,
- iph.protocol,
- IP_VS_DBG_ADDR(af, &iph.daddr));
+ if (unlikely((skb->pkt_type != PACKET_HOST &&
+ hooknum != NF_INET_LOCAL_OUT) ||
+ !skb_dst(skb))) {
+ ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
+ IP_VS_DBG_BUF(12, "packet type=%d proto=%d daddr=%s"
+ " ignored in hook %u\n",
+ skb->pkt_type, iph.protocol,
+ IP_VS_DBG_ADDR(af, &iph.daddr), hooknum);
return NF_ACCEPT;
}
+ ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
+
+ /* Bad... Do not break raw sockets */
+ if (unlikely(skb->sk != NULL && hooknum == NF_INET_LOCAL_OUT &&
+ af == AF_INET)) {
+ struct sock *sk = skb->sk;
+ struct inet_sock *inet = inet_sk(skb->sk);
+
+ if (inet && sk->sk_family == PF_INET && inet->nodefrag)
+ return NF_ACCEPT;
+ }
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6) {
if (unlikely(iph.protocol == IPPROTO_ICMPV6)) {
- int related, verdict = ip_vs_in_icmp_v6(skb, &related, hooknum);
+ int related;
+ int verdict = ip_vs_in_icmp_v6(skb, &related, hooknum);
if (related)
return verdict;
@@ -1326,7 +1472,8 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
} else
#endif
if (unlikely(iph.protocol == IPPROTO_ICMP)) {
- int related, verdict = ip_vs_in_icmp(skb, &related, hooknum);
+ int related;
+ int verdict = ip_vs_in_icmp(skb, &related, hooknum);
if (related)
return verdict;
@@ -1346,23 +1493,18 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
if (unlikely(!cp)) {
int v;
- /* For local client packets, it could be a response */
- cp = pp->conn_out_get(af, skb, pp, &iph, iph.len, 0);
- if (cp)
- return handle_response(af, skb, pp, cp, iph.len);
-
if (!pp->conn_schedule(af, skb, pp, &v, &cp))
return v;
}
if (unlikely(!cp)) {
/* sorry, all this trouble for a no-hit :) */
- IP_VS_DBG_PKT(12, pp, skb, 0,
- "packet continues traversal as normal");
+ IP_VS_DBG_PKT(12, af, pp, skb, 0,
+ "ip_vs_in: packet continues traversal as normal");
return NF_ACCEPT;
}
- IP_VS_DBG_PKT(11, pp, skb, 0, "Incoming packet");
+ IP_VS_DBG_PKT(11, af, pp, skb, 0, "Incoming packet");
/* Check the server status */
if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) {
@@ -1429,6 +1571,72 @@ out:
return ret;
}
+/*
+ * AF_INET handler in NF_INET_LOCAL_IN chain
+ * Schedule and forward packets from remote clients
+ */
+static unsigned int
+ip_vs_remote_request4(unsigned int hooknum, struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ return ip_vs_in(hooknum, skb, AF_INET);
+}
+
+/*
+ * AF_INET handler in NF_INET_LOCAL_OUT chain
+ * Schedule and forward packets from local clients
+ */
+static unsigned int
+ip_vs_local_request4(unsigned int hooknum, struct sk_buff *skb,
+ const struct net_device *in, const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ unsigned int verdict;
+
+ /* Disable BH in LOCAL_OUT until all places are fixed */
+ local_bh_disable();
+ verdict = ip_vs_in(hooknum, skb, AF_INET);
+ local_bh_enable();
+ return verdict;
+}
+
+#ifdef CONFIG_IP_VS_IPV6
+
+/*
+ * AF_INET6 handler in NF_INET_LOCAL_IN chain
+ * Schedule and forward packets from remote clients
+ */
+static unsigned int
+ip_vs_remote_request6(unsigned int hooknum, struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ return ip_vs_in(hooknum, skb, AF_INET6);
+}
+
+/*
+ * AF_INET6 handler in NF_INET_LOCAL_OUT chain
+ * Schedule and forward packets from local clients
+ */
+static unsigned int
+ip_vs_local_request6(unsigned int hooknum, struct sk_buff *skb,
+ const struct net_device *in, const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ unsigned int verdict;
+
+ /* Disable BH in LOCAL_OUT until all places are fixed */
+ local_bh_disable();
+ verdict = ip_vs_in(hooknum, skb, AF_INET6);
+ local_bh_enable();
+ return verdict;
+}
+
+#endif
+
/*
* It is hooked at the NF_INET_FORWARD chain, in order to catch ICMP
@@ -1469,23 +1677,39 @@ ip_vs_forward_icmp_v6(unsigned int hooknum, struct sk_buff *skb,
static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
+ /* After packet filtering, change source only for VS/NAT */
+ {
+ .hook = ip_vs_reply4,
+ .owner = THIS_MODULE,
+ .pf = PF_INET,
+ .hooknum = NF_INET_LOCAL_IN,
+ .priority = 99,
+ },
/* After packet filtering, forward packet through VS/DR, VS/TUN,
* or VS/NAT(change destination), so that filtering rules can be
* applied to IPVS. */
{
- .hook = ip_vs_in,
+ .hook = ip_vs_remote_request4,
.owner = THIS_MODULE,
.pf = PF_INET,
- .hooknum = NF_INET_LOCAL_IN,
- .priority = 100,
+ .hooknum = NF_INET_LOCAL_IN,
+ .priority = 101,
},
- /* After packet filtering, change source only for VS/NAT */
+ /* Before ip_vs_in, change source only for VS/NAT */
+ {
+ .hook = ip_vs_local_reply4,
+ .owner = THIS_MODULE,
+ .pf = PF_INET,
+ .hooknum = NF_INET_LOCAL_OUT,
+ .priority = -99,
+ },
+ /* After mangle, schedule and forward local requests */
{
- .hook = ip_vs_out,
+ .hook = ip_vs_local_request4,
.owner = THIS_MODULE,
.pf = PF_INET,
- .hooknum = NF_INET_FORWARD,
- .priority = 100,
+ .hooknum = NF_INET_LOCAL_OUT,
+ .priority = -98,
},
/* After packet filtering (but before ip_vs_out_icmp), catch icmp
* destined for 0.0.0.0/0, which is for incoming IPVS connections */
@@ -1493,35 +1717,51 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
.hook = ip_vs_forward_icmp,
.owner = THIS_MODULE,
.pf = PF_INET,
- .hooknum = NF_INET_FORWARD,
- .priority = 99,
+ .hooknum = NF_INET_FORWARD,
+ .priority = 99,
},
- /* Before the netfilter connection tracking, exit from POST_ROUTING */
+ /* After packet filtering, change source only for VS/NAT */
{
- .hook = ip_vs_post_routing,
+ .hook = ip_vs_reply4,
.owner = THIS_MODULE,
.pf = PF_INET,
- .hooknum = NF_INET_POST_ROUTING,
- .priority = NF_IP_PRI_NAT_SRC-1,
+ .hooknum = NF_INET_FORWARD,
+ .priority = 100,
},
#ifdef CONFIG_IP_VS_IPV6
+ /* After packet filtering, change source only for VS/NAT */
+ {
+ .hook = ip_vs_reply6,
+ .owner = THIS_MODULE,
+ .pf = PF_INET6,
+ .hooknum = NF_INET_LOCAL_IN,
+ .priority = 99,
+ },
/* After packet filtering, forward packet through VS/DR, VS/TUN,
* or VS/NAT(change destination), so that filtering rules can be
* applied to IPVS. */
{
- .hook = ip_vs_in,
+ .hook = ip_vs_remote_request6,
.owner = THIS_MODULE,
.pf = PF_INET6,
- .hooknum = NF_INET_LOCAL_IN,
- .priority = 100,
+ .hooknum = NF_INET_LOCAL_IN,
+ .priority = 101,
},
- /* After packet filtering, change source only for VS/NAT */
+ /* Before ip_vs_in, change source only for VS/NAT */
{
- .hook = ip_vs_out,
+ .hook = ip_vs_local_reply6,
+ .owner = THIS_MODULE,
+ .pf = PF_INET,
+ .hooknum = NF_INET_LOCAL_OUT,
+ .priority = -99,
+ },
+ /* After mangle, schedule and forward local requests */
+ {
+ .hook = ip_vs_local_request6,
.owner = THIS_MODULE,
.pf = PF_INET6,
- .hooknum = NF_INET_FORWARD,
- .priority = 100,
+ .hooknum = NF_INET_LOCAL_OUT,
+ .priority = -98,
},
/* After packet filtering (but before ip_vs_out_icmp), catch icmp
* destined for 0.0.0.0/0, which is for incoming IPVS connections */
@@ -1529,16 +1769,16 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
.hook = ip_vs_forward_icmp_v6,
.owner = THIS_MODULE,
.pf = PF_INET6,
- .hooknum = NF_INET_FORWARD,
- .priority = 99,
+ .hooknum = NF_INET_FORWARD,
+ .priority = 99,
},
- /* Before the netfilter connection tracking, exit from POST_ROUTING */
+ /* After packet filtering, change source only for VS/NAT */
{
- .hook = ip_vs_post_routing,
+ .hook = ip_vs_reply6,
.owner = THIS_MODULE,
.pf = PF_INET6,
- .hooknum = NF_INET_POST_ROUTING,
- .priority = NF_IP6_PRI_NAT_SRC-1,
+ .hooknum = NF_INET_FORWARD,
+ .priority = 100,
},
#endif
};
OpenPOWER on IntegriCloud