diff options
Diffstat (limited to 'net/netfilter/ipvs')
-rw-r--r-- | net/netfilter/ipvs/Kconfig | 11 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_app.c | 43 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_conn.c | 55 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_core.c | 57 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_ctl.c | 10 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_ftp.c | 176 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_proto.c | 1 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_proto_sctp.c | 55 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_proto_tcp.c | 50 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_proto_udp.c | 56 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_xmit.c | 115 |
11 files changed, 322 insertions, 307 deletions
diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig index 712ccad..46a77d5 100644 --- a/net/netfilter/ipvs/Kconfig +++ b/net/netfilter/ipvs/Kconfig @@ -3,7 +3,7 @@ # menuconfig IP_VS tristate "IP virtual server support" - depends on NET && INET && NETFILTER + depends on NET && INET && NETFILTER && NF_CONNTRACK ---help--- IP Virtual Server support will let you build a high-performance virtual server based on cluster of two or more real servers. This @@ -26,7 +26,7 @@ if IP_VS config IP_VS_IPV6 bool "IPv6 support for IPVS" - depends on EXPERIMENTAL && (IPV6 = y || IP_VS = IPV6) + depends on IPV6 = y || IP_VS = IPV6 ---help--- Add IPv6 support to IPVS. This is incomplete and might be dangerous. @@ -87,19 +87,16 @@ config IP_VS_PROTO_UDP protocol. Say Y if unsure. config IP_VS_PROTO_AH_ESP - bool - depends on UNDEFINED + def_bool IP_VS_PROTO_ESP || IP_VS_PROTO_AH config IP_VS_PROTO_ESP bool "ESP load balancing support" - select IP_VS_PROTO_AH_ESP ---help--- This option enables support for load balancing ESP (Encapsulation Security Payload) transport protocol. Say Y if unsure. config IP_VS_PROTO_AH bool "AH load balancing support" - select IP_VS_PROTO_AH_ESP ---help--- This option enables support for load balancing AH (Authentication Header) transport protocol. Say Y if unsure. @@ -238,7 +235,7 @@ comment 'IPVS application helper' config IP_VS_FTP tristate "FTP protocol helper" - depends on IP_VS_PROTO_TCP + depends on IP_VS_PROTO_TCP && NF_NAT ---help--- FTP is a protocol that transfers IP address and/or port number in the payload. In the virtual server via Network Address Translation, diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c index 1cb0e83..e76f87f 100644 --- a/net/netfilter/ipvs/ip_vs_app.c +++ b/net/netfilter/ipvs/ip_vs_app.c @@ -569,49 +569,6 @@ static const struct file_operations ip_vs_app_fops = { }; #endif - -/* - * Replace a segment of data with a new segment - */ -int ip_vs_skb_replace(struct sk_buff *skb, gfp_t pri, - char *o_buf, int o_len, char *n_buf, int n_len) -{ - int diff; - int o_offset; - int o_left; - - EnterFunction(9); - - diff = n_len - o_len; - o_offset = o_buf - (char *)skb->data; - /* The length of left data after o_buf+o_len in the skb data */ - o_left = skb->len - (o_offset + o_len); - - if (diff <= 0) { - memmove(o_buf + n_len, o_buf + o_len, o_left); - memcpy(o_buf, n_buf, n_len); - skb_trim(skb, skb->len + diff); - } else if (diff <= skb_tailroom(skb)) { - skb_put(skb, diff); - memmove(o_buf + n_len, o_buf + o_len, o_left); - memcpy(o_buf, n_buf, n_len); - } else { - if (pskb_expand_head(skb, skb_headroom(skb), diff, pri)) - return -ENOMEM; - skb_put(skb, diff); - memmove(skb->data + o_offset + n_len, - skb->data + o_offset + o_len, o_left); - skb_copy_to_linear_data_offset(skb, o_offset, n_buf, n_len); - } - - /* must update the iph total length here */ - ip_hdr(skb)->tot_len = htons(skb->len); - - LeaveFunction(9); - return 0; -} - - int __init ip_vs_app_init(void) { /* we will replace it with proc_net_ipvs_create() soon */ diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index ff04e9e..b71c69a 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -158,6 +158,9 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp) unsigned hash; int ret; + if (cp->flags & IP_VS_CONN_F_ONE_PACKET) + return 0; + /* Hash by protocol, client address and port */ hash = ip_vs_conn_hashkey(cp->af, cp->protocol, &cp->caddr, cp->cport); @@ -268,6 +271,29 @@ struct ip_vs_conn *ip_vs_conn_in_get return cp; } +struct ip_vs_conn * +ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb, + struct ip_vs_protocol *pp, + const struct ip_vs_iphdr *iph, + unsigned int proto_off, int inverse) +{ + __be16 _ports[2], *pptr; + + pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); + if (pptr == NULL) + return NULL; + + if (likely(!inverse)) + return ip_vs_conn_in_get(af, iph->protocol, + &iph->saddr, pptr[0], + &iph->daddr, pptr[1]); + else + return ip_vs_conn_in_get(af, iph->protocol, + &iph->daddr, pptr[1], + &iph->saddr, pptr[0]); +} +EXPORT_SYMBOL_GPL(ip_vs_conn_in_get_proto); + /* Get reference to connection template */ struct ip_vs_conn *ip_vs_ct_in_get (int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port, @@ -353,14 +379,37 @@ struct ip_vs_conn *ip_vs_conn_out_get return ret; } +struct ip_vs_conn * +ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb, + struct ip_vs_protocol *pp, + const struct ip_vs_iphdr *iph, + unsigned int proto_off, int inverse) +{ + __be16 _ports[2], *pptr; + + pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); + if (pptr == NULL) + return NULL; + + if (likely(!inverse)) + return ip_vs_conn_out_get(af, iph->protocol, + &iph->saddr, pptr[0], + &iph->daddr, pptr[1]); + else + return ip_vs_conn_out_get(af, iph->protocol, + &iph->daddr, pptr[1], + &iph->saddr, pptr[0]); +} +EXPORT_SYMBOL_GPL(ip_vs_conn_out_get_proto); /* * Put back the conn and restart its timer with its timeout */ void ip_vs_conn_put(struct ip_vs_conn *cp) { - /* reset it expire in its timeout */ - mod_timer(&cp->timer, jiffies+cp->timeout); + unsigned long t = (cp->flags & IP_VS_CONN_F_ONE_PACKET) ? + 0 : cp->timeout; + mod_timer(&cp->timer, jiffies+t); __ip_vs_conn_put(cp); } @@ -653,7 +702,7 @@ static void ip_vs_conn_expire(unsigned long data) /* * unhash it if it is hashed in the conn table */ - if (!ip_vs_conn_unhash(cp)) + if (!ip_vs_conn_unhash(cp) && !(cp->flags & IP_VS_CONN_F_ONE_PACKET)) goto expire_later; /* diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 1cd6e3f..4f8ddba4 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -54,7 +54,6 @@ EXPORT_SYMBOL(register_ip_vs_scheduler); EXPORT_SYMBOL(unregister_ip_vs_scheduler); -EXPORT_SYMBOL(ip_vs_skb_replace); EXPORT_SYMBOL(ip_vs_proto_name); EXPORT_SYMBOL(ip_vs_conn_new); EXPORT_SYMBOL(ip_vs_conn_in_get); @@ -194,6 +193,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc, struct ip_vs_dest *dest; struct ip_vs_conn *ct; __be16 dport; /* destination port to forward */ + __be16 flags; union nf_inet_addr snet; /* source network of the client, after masking */ @@ -340,6 +340,10 @@ ip_vs_sched_persist(struct ip_vs_service *svc, dport = ports[1]; } + flags = (svc->flags & IP_VS_SVC_F_ONEPACKET + && iph.protocol == IPPROTO_UDP)? + IP_VS_CONN_F_ONE_PACKET : 0; + /* * Create a new connection according to the template */ @@ -347,7 +351,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc, &iph.saddr, ports[0], &iph.daddr, ports[1], &dest->addr, dport, - 0, + flags, dest); if (cp == NULL) { ip_vs_conn_put(ct); @@ -377,7 +381,7 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) struct ip_vs_conn *cp = NULL; struct ip_vs_iphdr iph; struct ip_vs_dest *dest; - __be16 _ports[2], *pptr; + __be16 _ports[2], *pptr, flags; ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports); @@ -407,6 +411,10 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) return NULL; } + flags = (svc->flags & IP_VS_SVC_F_ONEPACKET + && iph.protocol == IPPROTO_UDP)? + IP_VS_CONN_F_ONE_PACKET : 0; + /* * Create a connection entry. */ @@ -414,7 +422,7 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) &iph.saddr, pptr[0], &iph.daddr, pptr[1], &dest->addr, dest->port ? dest->port : pptr[1], - 0, + flags, dest); if (cp == NULL) return NULL; @@ -464,6 +472,9 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, if (sysctl_ip_vs_cache_bypass && svc->fwmark && unicast) { int ret, cs; struct ip_vs_conn *cp; + __u16 flags = (svc->flags & IP_VS_SVC_F_ONEPACKET && + iph.protocol == IPPROTO_UDP)? + IP_VS_CONN_F_ONE_PACKET : 0; union nf_inet_addr daddr = { .all = { 0, 0, 0, 0 } }; ip_vs_service_put(svc); @@ -474,7 +485,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, &iph.saddr, pptr[0], &iph.daddr, pptr[1], &daddr, 0, - IP_VS_CONN_F_BYPASS, + IP_VS_CONN_F_BYPASS | flags, NULL); if (cp == NULL) return NF_DROP; @@ -524,26 +535,6 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, return NF_DROP; } - -/* - * It is hooked before NF_IP_PRI_NAT_SRC at the NF_INET_POST_ROUTING - * chain, and is used for VS/NAT. - * It detects packets for VS/NAT connections and sends the packets - * immediately. This can avoid that iptable_nat mangles the packets - * for VS/NAT. - */ -static unsigned int ip_vs_post_routing(unsigned int hooknum, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) -{ - if (!skb->ipvs_property) - return NF_ACCEPT; - /* The packet was sent from IPVS, exit this chain */ - return NF_STOP; -} - __sum16 ip_vs_checksum_complete(struct sk_buff *skb, int offset) { return csum_fold(skb_checksum(skb, offset, skb->len - offset, 0)); @@ -1487,14 +1478,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { .hooknum = NF_INET_FORWARD, .priority = 99, }, - /* Before the netfilter connection tracking, exit from POST_ROUTING */ - { - .hook = ip_vs_post_routing, - .owner = THIS_MODULE, - .pf = PF_INET, - .hooknum = NF_INET_POST_ROUTING, - .priority = NF_IP_PRI_NAT_SRC-1, - }, #ifdef CONFIG_IP_VS_IPV6 /* After packet filtering, forward packet through VS/DR, VS/TUN, * or VS/NAT(change destination), so that filtering rules can be @@ -1523,14 +1506,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { .hooknum = NF_INET_FORWARD, .priority = 99, }, - /* Before the netfilter connection tracking, exit from POST_ROUTING */ - { - .hook = ip_vs_post_routing, - .owner = THIS_MODULE, - .pf = PF_INET6, - .hooknum = NF_INET_POST_ROUTING, - .priority = NF_IP6_PRI_NAT_SRC-1, - }, #endif }; diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 36dc1d8..0f0c079 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1864,14 +1864,16 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v) svc->scheduler->name); else #endif - seq_printf(seq, "%s %08X:%04X %s ", + seq_printf(seq, "%s %08X:%04X %s %s ", ip_vs_proto_name(svc->protocol), ntohl(svc->addr.ip), ntohs(svc->port), - svc->scheduler->name); + svc->scheduler->name, + (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":""); } else { - seq_printf(seq, "FWM %08X %s ", - svc->fwmark, svc->scheduler->name); + seq_printf(seq, "FWM %08X %s %s", + svc->fwmark, svc->scheduler->name, + (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":""); } if (svc->flags & IP_VS_SVC_F_PERSISTENT) diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c index 2ae747a..f228a17 100644 --- a/net/netfilter/ipvs/ip_vs_ftp.c +++ b/net/netfilter/ipvs/ip_vs_ftp.c @@ -20,6 +20,17 @@ * * Author: Wouter Gadeyne * + * + * Code for ip_vs_expect_related and ip_vs_expect_callback is taken from + * http://www.ssi.bg/~ja/nfct/: + * + * ip_vs_nfct.c: Netfilter connection tracking support for IPVS + * + * Portions Copyright (C) 2001-2002 + * Antefacto Ltd, 181 Parnell St, Dublin 1, Ireland. + * + * Portions Copyright (C) 2003-2008 + * Julian Anastasov */ #define KMSG_COMPONENT "IPVS" @@ -32,6 +43,9 @@ #include <linux/in.h> #include <linux/ip.h> #include <linux/netfilter.h> +#include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_conntrack_expect.h> +#include <net/netfilter/nf_nat_helper.h> #include <linux/gfp.h> #include <net/protocol.h> #include <net/tcp.h> @@ -43,6 +57,16 @@ #define SERVER_STRING "227 Entering Passive Mode (" #define CLIENT_STRING "PORT " +#define FMT_TUPLE "%pI4:%u->%pI4:%u/%u" +#define ARG_TUPLE(T) &(T)->src.u3.ip, ntohs((T)->src.u.all), \ + &(T)->dst.u3.ip, ntohs((T)->dst.u.all), \ + (T)->dst.protonum + +#define FMT_CONN "%pI4:%u->%pI4:%u->%pI4:%u/%u:%u" +#define ARG_CONN(C) &((C)->caddr.ip), ntohs((C)->cport), \ + &((C)->vaddr.ip), ntohs((C)->vport), \ + &((C)->daddr.ip), ntohs((C)->dport), \ + (C)->protocol, (C)->state /* * List of ports (up to IP_VS_APP_MAX_PORTS) to be handled by helper @@ -123,6 +147,119 @@ static int ip_vs_ftp_get_addrport(char *data, char *data_limit, return 1; } +/* + * Called from init_conntrack() as expectfn handler. + */ +static void +ip_vs_expect_callback(struct nf_conn *ct, + struct nf_conntrack_expect *exp) +{ + struct nf_conntrack_tuple *orig, new_reply; + struct ip_vs_conn *cp; + + if (exp->tuple.src.l3num != PF_INET) + return; + + /* + * We assume that no NF locks are held before this callback. + * ip_vs_conn_out_get and ip_vs_conn_in_get should match their + * expectations even if they use wildcard values, now we provide the + * actual values from the newly created original conntrack direction. + * The conntrack is confirmed when packet reaches IPVS hooks. + */ + + /* RS->CLIENT */ + orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; + cp = ip_vs_conn_out_get(exp->tuple.src.l3num, orig->dst.protonum, + &orig->src.u3, orig->src.u.tcp.port, + &orig->dst.u3, orig->dst.u.tcp.port); + if (cp) { + /* Change reply CLIENT->RS to CLIENT->VS */ + new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple; + IP_VS_DBG(7, "%s(): ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", " + FMT_TUPLE ", found inout cp=" FMT_CONN "\n", + __func__, ct, ct->status, + ARG_TUPLE(orig), ARG_TUPLE(&new_reply), + ARG_CONN(cp)); + new_reply.dst.u3 = cp->vaddr; + new_reply.dst.u.tcp.port = cp->vport; + IP_VS_DBG(7, "%s(): ct=%p, new tuples=" FMT_TUPLE ", " FMT_TUPLE + ", inout cp=" FMT_CONN "\n", + __func__, ct, + ARG_TUPLE(orig), ARG_TUPLE(&new_reply), + ARG_CONN(cp)); + goto alter; + } + + /* CLIENT->VS */ + cp = ip_vs_conn_in_get(exp->tuple.src.l3num, orig->dst.protonum, + &orig->src.u3, orig->src.u.tcp.port, + &orig->dst.u3, orig->dst.u.tcp.port); + if (cp) { + /* Change reply VS->CLIENT to RS->CLIENT */ + new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple; + IP_VS_DBG(7, "%s(): ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", " + FMT_TUPLE ", found outin cp=" FMT_CONN "\n", + __func__, ct, ct->status, + ARG_TUPLE(orig), ARG_TUPLE(&new_reply), + ARG_CONN(cp)); + new_reply.src.u3 = cp->daddr; + new_reply.src.u.tcp.port = cp->dport; + IP_VS_DBG(7, "%s(): ct=%p, new tuples=" FMT_TUPLE ", " + FMT_TUPLE ", outin cp=" FMT_CONN "\n", + __func__, ct, + ARG_TUPLE(orig), ARG_TUPLE(&new_reply), + ARG_CONN(cp)); + goto alter; + } + + IP_VS_DBG(7, "%s(): ct=%p, status=0x%lX, tuple=" FMT_TUPLE + " - unknown expect\n", + __func__, ct, ct->status, ARG_TUPLE(orig)); + return; + +alter: + /* Never alter conntrack for non-NAT conns */ + if (IP_VS_FWD_METHOD(cp) == IP_VS_CONN_F_MASQ) + nf_conntrack_alter_reply(ct, &new_reply); + ip_vs_conn_put(cp); + return; +} + +/* + * Create NF conntrack expectation with wildcard (optional) source port. + * Then the default callback function will alter the reply and will confirm + * the conntrack entry when the first packet comes. + */ +static void +ip_vs_expect_related(struct sk_buff *skb, struct nf_conn *ct, + struct ip_vs_conn *cp, u_int8_t proto, + const __be16 *port, int from_rs) +{ + struct nf_conntrack_expect *exp; + + BUG_ON(!ct || ct == &nf_conntrack_untracked); + + exp = nf_ct_expect_alloc(ct); + if (!exp) + return; + + if (from_rs) + nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, + nf_ct_l3num(ct), &cp->daddr, &cp->caddr, + proto, port, &cp->cport); + else + nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, + nf_ct_l3num(ct), &cp->caddr, &cp->vaddr, + proto, port, &cp->vport); + + exp->expectfn = ip_vs_expect_callback; + + IP_VS_DBG(7, "%s(): ct=%p, expect tuple=" FMT_TUPLE "\n", + __func__, ct, ARG_TUPLE(&exp->tuple)); + nf_ct_expect_related(exp); + nf_ct_expect_put(exp); +} /* * Look at outgoing ftp packets to catch the response to a PASV command @@ -149,7 +286,9 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, struct ip_vs_conn *n_cp; char buf[24]; /* xxx.xxx.xxx.xxx,ppp,ppp\000 */ unsigned buf_len; - int ret; + int ret = 0; + enum ip_conntrack_info ctinfo; + struct nf_conn *ct; #ifdef CONFIG_IP_VS_IPV6 /* This application helper doesn't work with IPv6 yet, @@ -219,19 +358,26 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, buf_len = strlen(buf); + ct = nf_ct_get(skb, &ctinfo); + if (ct && !nf_ct_is_untracked(ct)) { + /* If mangling fails this function will return 0 + * which will cause the packet to be dropped. + * Mangling can only fail under memory pressure, + * hopefully it will succeed on the retransmitted + * packet. + */ + ret = nf_nat_mangle_tcp_packet(skb, ct, ctinfo, + start-data, end-start, + buf, buf_len); + if (ret) + ip_vs_expect_related(skb, ct, n_cp, + IPPROTO_TCP, NULL, 0); + } + /* - * Calculate required delta-offset to keep TCP happy + * Not setting 'diff' is intentional, otherwise the sequence + * would be adjusted twice. */ - *diff = buf_len - (end-start); - - if (*diff == 0) { - /* simply replace it with new passive address */ - memcpy(start, buf, buf_len); - ret = 1; - } else { - ret = !ip_vs_skb_replace(skb, GFP_ATOMIC, start, - end-start, buf, buf_len); - } cp->app_data = NULL; ip_vs_tcp_conn_listen(n_cp); @@ -263,6 +409,7 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, union nf_inet_addr to; __be16 port; struct ip_vs_conn *n_cp; + struct nf_conn *ct; #ifdef CONFIG_IP_VS_IPV6 /* This application helper doesn't work with IPv6 yet, @@ -349,6 +496,11 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, ip_vs_control_add(n_cp, cp); } + ct = (struct nf_conn *)skb->nfct; + if (ct && ct != &nf_conntrack_untracked) + ip_vs_expect_related(skb, ct, n_cp, + IPPROTO_TCP, &n_cp->dport, 1); + /* * Move tunnel to listen state */ diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c index 2d3d5e4..027f654 100644 --- a/net/netfilter/ipvs/ip_vs_proto.c +++ b/net/netfilter/ipvs/ip_vs_proto.c @@ -98,6 +98,7 @@ struct ip_vs_protocol * ip_vs_proto_get(unsigned short proto) return NULL; } +EXPORT_SYMBOL(ip_vs_proto_get); /* diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index c9a3f7a..4c0855c 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -8,55 +8,6 @@ #include <net/sctp/checksum.h> #include <net/ip_vs.h> - -static struct ip_vs_conn * -sctp_conn_in_get(int af, - const struct sk_buff *skb, - struct ip_vs_protocol *pp, - const struct ip_vs_iphdr *iph, - unsigned int proto_off, - int inverse) -{ - __be16 _ports[2], *pptr; - - pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); - if (pptr == NULL) - return NULL; - - if (likely(!inverse)) - return ip_vs_conn_in_get(af, iph->protocol, - &iph->saddr, pptr[0], - &iph->daddr, pptr[1]); - else - return ip_vs_conn_in_get(af, iph->protocol, - &iph->daddr, pptr[1], - &iph->saddr, pptr[0]); -} - -static struct ip_vs_conn * -sctp_conn_out_get(int af, - const struct sk_buff *skb, - struct ip_vs_protocol *pp, - const struct ip_vs_iphdr *iph, - unsigned int proto_off, - int inverse) -{ - __be16 _ports[2], *pptr; - - pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); - if (pptr == NULL) - return NULL; - - if (likely(!inverse)) - return ip_vs_conn_out_get(af, iph->protocol, - &iph->saddr, pptr[0], - &iph->daddr, pptr[1]); - else - return ip_vs_conn_out_get(af, iph->protocol, - &iph->daddr, pptr[1], - &iph->saddr, pptr[0]); -} - static int sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, int *verdict, struct ip_vs_conn **cpp) @@ -173,7 +124,7 @@ sctp_dnat_handler(struct sk_buff *skb, return 0; /* Call application helper if needed */ - if (!ip_vs_app_pkt_out(cp, skb)) + if (!ip_vs_app_pkt_in(cp, skb)) return 0; } @@ -1169,8 +1120,8 @@ struct ip_vs_protocol ip_vs_protocol_sctp = { .register_app = sctp_register_app, .unregister_app = sctp_unregister_app, .conn_schedule = sctp_conn_schedule, - .conn_in_get = sctp_conn_in_get, - .conn_out_get = sctp_conn_out_get, + .conn_in_get = ip_vs_conn_in_get_proto, + .conn_out_get = ip_vs_conn_out_get_proto, .snat_handler = sctp_snat_handler, .dnat_handler = sctp_dnat_handler, .csum_check = sctp_csum_check, diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c index 91d28e0..282d24d 100644 --- a/net/netfilter/ipvs/ip_vs_proto_tcp.c +++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c @@ -27,52 +27,6 @@ #include <net/ip_vs.h> - -static struct ip_vs_conn * -tcp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, - const struct ip_vs_iphdr *iph, unsigned int proto_off, - int inverse) -{ - __be16 _ports[2], *pptr; - - pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); - if (pptr == NULL) - return NULL; - - if (likely(!inverse)) { - return ip_vs_conn_in_get(af, iph->protocol, - &iph->saddr, pptr[0], - &iph->daddr, pptr[1]); - } else { - return ip_vs_conn_in_get(af, iph->protocol, - &iph->daddr, pptr[1], - &iph->saddr, pptr[0]); - } -} - -static struct ip_vs_conn * -tcp_conn_out_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, - const struct ip_vs_iphdr *iph, unsigned int proto_off, - int inverse) -{ - __be16 _ports[2], *pptr; - - pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); - if (pptr == NULL) - return NULL; - - if (likely(!inverse)) { - return ip_vs_conn_out_get(af, iph->protocol, - &iph->saddr, pptr[0], - &iph->daddr, pptr[1]); - } else { - return ip_vs_conn_out_get(af, iph->protocol, - &iph->daddr, pptr[1], - &iph->saddr, pptr[0]); - } -} - - static int tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, int *verdict, struct ip_vs_conn **cpp) @@ -721,8 +675,8 @@ struct ip_vs_protocol ip_vs_protocol_tcp = { .register_app = tcp_register_app, .unregister_app = tcp_unregister_app, .conn_schedule = tcp_conn_schedule, - .conn_in_get = tcp_conn_in_get, - .conn_out_get = tcp_conn_out_get, + .conn_in_get = ip_vs_conn_in_get_proto, + .conn_out_get = ip_vs_conn_out_get_proto, .snat_handler = tcp_snat_handler, .dnat_handler = tcp_dnat_handler, .csum_check = tcp_csum_check, diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c index e7a6885..8553231 100644 --- a/net/netfilter/ipvs/ip_vs_proto_udp.c +++ b/net/netfilter/ipvs/ip_vs_proto_udp.c @@ -27,58 +27,6 @@ #include <net/ip.h> #include <net/ip6_checksum.h> -static struct ip_vs_conn * -udp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, - const struct ip_vs_iphdr *iph, unsigned int proto_off, - int inverse) -{ - struct ip_vs_conn *cp; - __be16 _ports[2], *pptr; - - pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); - if (pptr == NULL) - return NULL; - - if (likely(!inverse)) { - cp = ip_vs_conn_in_get(af, iph->protocol, - &iph->saddr, pptr[0], - &iph->daddr, pptr[1]); - } else { - cp = ip_vs_conn_in_get(af, iph->protocol, - &iph->daddr, pptr[1], - &iph->saddr, pptr[0]); - } - - return cp; -} - - -static struct ip_vs_conn * -udp_conn_out_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, - const struct ip_vs_iphdr *iph, unsigned int proto_off, - int inverse) -{ - struct ip_vs_conn *cp; - __be16 _ports[2], *pptr; - - pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); - if (pptr == NULL) - return NULL; - - if (likely(!inverse)) { - cp = ip_vs_conn_out_get(af, iph->protocol, - &iph->saddr, pptr[0], - &iph->daddr, pptr[1]); - } else { - cp = ip_vs_conn_out_get(af, iph->protocol, - &iph->daddr, pptr[1], - &iph->saddr, pptr[0]); - } - - return cp; -} - - static int udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, int *verdict, struct ip_vs_conn **cpp) @@ -520,8 +468,8 @@ struct ip_vs_protocol ip_vs_protocol_udp = { .init = udp_init, .exit = udp_exit, .conn_schedule = udp_conn_schedule, - .conn_in_get = udp_conn_in_get, - .conn_out_get = udp_conn_out_get, + .conn_in_get = ip_vs_conn_in_get_proto, + .conn_out_get = ip_vs_conn_out_get_proto, .snat_handler = udp_snat_handler, .dnat_handler = udp_dnat_handler, .csum_check = udp_csum_check, diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 93c15a1..21e1a5e 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -28,6 +28,7 @@ #include <net/ip6_route.h> #include <linux/icmpv6.h> #include <linux/netfilter.h> +#include <net/netfilter/nf_conntrack.h> #include <linux/netfilter_ipv4.h> #include <net/ip_vs.h> @@ -90,10 +91,10 @@ __ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos) &dest->addr.ip); return NULL; } - __ip_vs_dst_set(dest, rtos, dst_clone(&rt->u.dst)); + __ip_vs_dst_set(dest, rtos, dst_clone(&rt->dst)); IP_VS_DBG(10, "new dst %pI4, refcnt=%d, rtos=%X\n", &dest->addr.ip, - atomic_read(&rt->u.dst.__refcnt), rtos); + atomic_read(&rt->dst.__refcnt), rtos); } spin_unlock(&dest->dst_lock); } else { @@ -148,10 +149,10 @@ __ip_vs_get_out_rt_v6(struct ip_vs_conn *cp) &dest->addr.in6); return NULL; } - __ip_vs_dst_set(dest, 0, dst_clone(&rt->u.dst)); + __ip_vs_dst_set(dest, 0, dst_clone(&rt->dst)); IP_VS_DBG(10, "new dst %pI6, refcnt=%d\n", &dest->addr.in6, - atomic_read(&rt->u.dst.__refcnt)); + atomic_read(&rt->dst.__refcnt)); } spin_unlock(&dest->dst_lock); } else { @@ -198,7 +199,7 @@ do { \ (skb)->ipvs_property = 1; \ skb_forward_csum(skb); \ NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \ - (rt)->u.dst.dev, dst_output); \ + (rt)->dst.dev, dst_output); \ } while (0) @@ -245,7 +246,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, } /* MTU checking */ - mtu = dst_mtu(&rt->u.dst); + mtu = dst_mtu(&rt->dst); if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) { ip_rt_put(rt); icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); @@ -265,7 +266,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, /* drop old route */ skb_dst_drop(skb); - skb_dst_set(skb, &rt->u.dst); + skb_dst_set(skb, &rt->dst); /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; @@ -309,9 +310,9 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, } /* MTU checking */ - mtu = dst_mtu(&rt->u.dst); + mtu = dst_mtu(&rt->dst); if (skb->len > mtu) { - dst_release(&rt->u.dst); + dst_release(&rt->dst); icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); IP_VS_DBG_RL("%s(): frag needed\n", __func__); goto tx_error; @@ -323,13 +324,13 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, */ skb = skb_share_check(skb, GFP_ATOMIC); if (unlikely(skb == NULL)) { - dst_release(&rt->u.dst); + dst_release(&rt->dst); return NF_STOLEN; } /* drop old route */ skb_dst_drop(skb); - skb_dst_set(skb, &rt->u.dst); + skb_dst_set(skb, &rt->dst); /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; @@ -348,6 +349,30 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, } #endif +static void +ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp) +{ + struct nf_conn *ct = (struct nf_conn *)skb->nfct; + struct nf_conntrack_tuple new_tuple; + + if (ct == NULL || nf_ct_is_untracked(ct) || nf_ct_is_confirmed(ct)) + return; + + /* + * The connection is not yet in the hashtable, so we update it. + * CIP->VIP will remain the same, so leave the tuple in + * IP_CT_DIR_ORIGINAL untouched. When the reply comes back from the + * real-server we will see RIP->DIP. + */ + new_tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple; + new_tuple.src.u3 = cp->daddr; + /* + * This will also take care of UDP and other protocols. + */ + new_tuple.src.u.tcp.port = cp->dport; + nf_conntrack_alter_reply(ct, &new_tuple); +} + /* * NAT transmitter (only for outside-to-inside nat forwarding) * Not used for related ICMP @@ -376,7 +401,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, goto tx_error_icmp; /* MTU checking */ - mtu = dst_mtu(&rt->u.dst); + mtu = dst_mtu(&rt->dst); if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) { ip_rt_put(rt); icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); @@ -388,12 +413,12 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, if (!skb_make_writable(skb, sizeof(struct iphdr))) goto tx_error_put; - if (skb_cow(skb, rt->u.dst.dev->hard_header_len)) + if (skb_cow(skb, rt->dst.dev->hard_header_len)) goto tx_error_put; /* drop old route */ skb_dst_drop(skb); - skb_dst_set(skb, &rt->u.dst); + skb_dst_set(skb, &rt->dst); /* mangle the packet */ if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp)) @@ -403,6 +428,8 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT"); + ip_vs_update_conntrack(skb, cp); + /* FIXME: when application helper enlarges the packet and the length is larger than the MTU of outgoing device, there will be still MTU problem. */ @@ -452,9 +479,9 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, goto tx_error_icmp; /* MTU checking */ - mtu = dst_mtu(&rt->u.dst); + mtu = dst_mtu(&rt->dst); if (skb->len > mtu) { - dst_release(&rt->u.dst); + dst_release(&rt->dst); icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); IP_VS_DBG_RL_PKT(0, pp, skb, 0, "ip_vs_nat_xmit_v6(): frag needed for"); @@ -465,12 +492,12 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, if (!skb_make_writable(skb, sizeof(struct ipv6hdr))) goto tx_error_put; - if (skb_cow(skb, rt->u.dst.dev->hard_header_len)) + if (skb_cow(skb, rt->dst.dev->hard_header_len)) goto tx_error_put; /* drop old route */ skb_dst_drop(skb); - skb_dst_set(skb, &rt->u.dst); + skb_dst_set(skb, &rt->dst); /* mangle the packet */ if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp)) @@ -479,6 +506,8 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT"); + ip_vs_update_conntrack(skb, cp); + /* FIXME: when application helper enlarges the packet and the length is larger than the MTU of outgoing device, there will be still MTU problem. */ @@ -498,7 +527,7 @@ tx_error: kfree_skb(skb); return NF_STOLEN; tx_error_put: - dst_release(&rt->u.dst); + dst_release(&rt->dst); goto tx_error; } #endif @@ -549,9 +578,9 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(tos)))) goto tx_error_icmp; - tdev = rt->u.dst.dev; + tdev = rt->dst.dev; - mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr); + mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr); if (mtu < 68) { ip_rt_put(rt); IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__); @@ -601,7 +630,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, /* drop old route */ skb_dst_drop(skb); - skb_dst_set(skb, &rt->u.dst); + skb_dst_set(skb, &rt->dst); /* * Push down and install the IPIP header. @@ -615,7 +644,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, iph->daddr = rt->rt_dst; iph->saddr = rt->rt_src; iph->ttl = old_iph->ttl; - ip_select_ident(iph, &rt->u.dst, NULL); + ip_select_ident(iph, &rt->dst, NULL); /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; @@ -660,12 +689,12 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, if (!rt) goto tx_error_icmp; - tdev = rt->u.dst.dev; + tdev = rt->dst.dev; - mtu = dst_mtu(&rt->u.dst) - sizeof(struct ipv6hdr); + mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr); /* TODO IPv6: do we need this check in IPv6? */ if (mtu < 1280) { - dst_release(&rt->u.dst); + dst_release(&rt->dst); IP_VS_DBG_RL("%s(): mtu less than 1280\n", __func__); goto tx_error; } @@ -674,7 +703,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr)) { icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); - dst_release(&rt->u.dst); + dst_release(&rt->dst); IP_VS_DBG_RL("%s(): frag needed\n", __func__); goto tx_error; } @@ -689,7 +718,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); if (!new_skb) { - dst_release(&rt->u.dst); + dst_release(&rt->dst); kfree_skb(skb); IP_VS_ERR_RL("%s(): no memory\n", __func__); return NF_STOLEN; @@ -707,7 +736,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, /* drop old route */ skb_dst_drop(skb); - skb_dst_set(skb, &rt->u.dst); + skb_dst_set(skb, &rt->dst); /* * Push down and install the IPIP header. @@ -760,7 +789,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, goto tx_error_icmp; /* MTU checking */ - mtu = dst_mtu(&rt->u.dst); + mtu = dst_mtu(&rt->dst); if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu) { icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); ip_rt_put(rt); @@ -780,7 +809,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, /* drop old route */ skb_dst_drop(skb); - skb_dst_set(skb, &rt->u.dst); + skb_dst_set(skb, &rt->dst); /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; @@ -813,10 +842,10 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, goto tx_error_icmp; /* MTU checking */ - mtu = dst_mtu(&rt->u.dst); + mtu = dst_mtu(&rt->dst); if (skb->len > mtu) { icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); - dst_release(&rt->u.dst); + dst_release(&rt->dst); IP_VS_DBG_RL("%s(): frag needed\n", __func__); goto tx_error; } @@ -827,13 +856,13 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, */ skb = skb_share_check(skb, GFP_ATOMIC); if (unlikely(skb == NULL)) { - dst_release(&rt->u.dst); + dst_release(&rt->dst); return NF_STOLEN; } /* drop old route */ skb_dst_drop(skb); - skb_dst_set(skb, &rt->u.dst); + skb_dst_set(skb, &rt->dst); /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; @@ -888,7 +917,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, goto tx_error_icmp; /* MTU checking */ - mtu = dst_mtu(&rt->u.dst); + mtu = dst_mtu(&rt->dst); if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF))) { ip_rt_put(rt); icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); @@ -900,12 +929,12 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, if (!skb_make_writable(skb, offset)) goto tx_error_put; - if (skb_cow(skb, rt->u.dst.dev->hard_header_len)) + if (skb_cow(skb, rt->dst.dev->hard_header_len)) goto tx_error_put; /* drop the old route when skb is not shared */ skb_dst_drop(skb); - skb_dst_set(skb, &rt->u.dst); + skb_dst_set(skb, &rt->dst); ip_vs_nat_icmp(skb, pp, cp, 0); @@ -963,9 +992,9 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, goto tx_error_icmp; /* MTU checking */ - mtu = dst_mtu(&rt->u.dst); + mtu = dst_mtu(&rt->dst); if (skb->len > mtu) { - dst_release(&rt->u.dst); + dst_release(&rt->dst); icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); IP_VS_DBG_RL("%s(): frag needed\n", __func__); goto tx_error; @@ -975,12 +1004,12 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, if (!skb_make_writable(skb, offset)) goto tx_error_put; - if (skb_cow(skb, rt->u.dst.dev->hard_header_len)) + if (skb_cow(skb, rt->dst.dev->hard_header_len)) goto tx_error_put; /* drop the old route when skb is not shared */ skb_dst_drop(skb); - skb_dst_set(skb, &rt->u.dst); + skb_dst_set(skb, &rt->dst); ip_vs_nat_icmp_v6(skb, pp, cp, 0); @@ -1001,7 +1030,7 @@ out: LeaveFunction(10); return rc; tx_error_put: - dst_release(&rt->u.dst); + dst_release(&rt->dst); goto tx_error; } #endif |