summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorkp <kp@FreeBSD.org>2015-10-21 15:32:21 +0000
committerkp <kp@FreeBSD.org>2015-10-21 15:32:21 +0000
commitd621159ed6a7d1c98cf81f17e313dffc64bf7c4f (patch)
treebf808b690f2a7aeabe78ae7b008b62f190733067
parenta7e84cb353a2230668d83d375dc3fae7f6419bab (diff)
downloadFreeBSD-src-d621159ed6a7d1c98cf81f17e313dffc64bf7c4f.zip
FreeBSD-src-d621159ed6a7d1c98cf81f17e313dffc64bf7c4f.tar.gz
MFC r289316:
pf: Fix TSO issues In certain configurations (mostly but not exclusively as a VM on Xen) pf produced packets with an invalid TCP checksum. The problem was that pf could only handle packets with a full checksum. The FreeBSD IP stack produces TCP packets with a pseudo-header checksum (only addresses, length and protocol). Certain network interfaces expect to see the pseudo-header checksum, so they end up producing packets with invalid checksums. To fix this stop calculating the full checksum and teach pf to only update TCP checksums if TSO is disabled or the change affects the pseudo-header checksum. PR: 154428, 193579, 198868 Relnotes: yes Sponsored by: RootBSD
-rw-r--r--sys/net/pfvar.h5
-rw-r--r--sys/netpfil/pf/pf.c112
-rw-r--r--sys/netpfil/pf/pf_ioctl.c13
-rw-r--r--sys/netpfil/pf/pf_norm.c13
4 files changed, 93 insertions, 50 deletions
diff --git a/sys/net/pfvar.h b/sys/net/pfvar.h
index 4a5f2a0..ae5ecb9 100644
--- a/sys/net/pfvar.h
+++ b/sys/net/pfvar.h
@@ -1554,6 +1554,8 @@ extern void pf_print_state(struct pf_state *);
extern void pf_print_flags(u_int8_t);
extern u_int16_t pf_cksum_fixup(u_int16_t, u_int16_t, u_int16_t,
u_int8_t);
+extern u_int16_t pf_proto_cksum_fixup(struct mbuf *, u_int16_t,
+ u_int16_t, u_int16_t, u_int8_t);
VNET_DECLARE(struct ifnet *, sync_ifp);
#define V_sync_ifp VNET(sync_ifp);
@@ -1583,6 +1585,9 @@ u_int32_t pf_new_isn(struct pf_state *);
void *pf_pull_hdr(struct mbuf *, int, void *, int, u_short *, u_short *,
sa_family_t);
void pf_change_a(void *, u_int16_t *, u_int32_t, u_int8_t);
+void pf_change_proto_a(struct mbuf *, void *, u_int16_t *, u_int32_t,
+ u_int8_t);
+void pf_change_tcp_a(struct mbuf *, void *, u_int16_t *, u_int32_t);
void pf_send_deferred_syn(struct pf_state *);
int pf_match_addr(u_int8_t, struct pf_addr *, struct pf_addr *,
struct pf_addr *, sa_family_t);
diff --git a/sys/netpfil/pf/pf.c b/sys/netpfil/pf/pf.c
index b3268a2..8f3a5ed 100644
--- a/sys/netpfil/pf/pf.c
+++ b/sys/netpfil/pf/pf.c
@@ -203,7 +203,7 @@ static void pf_init_threshold(struct pf_threshold *, u_int32_t,
static void pf_add_threshold(struct pf_threshold *);
static int pf_check_threshold(struct pf_threshold *);
-static void pf_change_ap(struct pf_addr *, u_int16_t *,
+static void pf_change_ap(struct mbuf *, struct pf_addr *, u_int16_t *,
u_int16_t *, u_int16_t *, struct pf_addr *,
u_int16_t, u_int8_t, sa_family_t);
static int pf_modulate_sack(struct mbuf *, int, struct pf_pdesc *,
@@ -1989,6 +1989,22 @@ pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2)
}
}
+/**
+ * Checksum updates are a little complicated because the checksum in the TCP/UDP
+ * header isn't always a full checksum. In some cases (i.e. output) it's a
+ * pseudo-header checksum, which is a partial checksum over src/dst IP
+ * addresses, protocol number and length.
+ *
+ * That means we have the following cases:
+ * * Input or forwarding: we don't have TSO, the checksum fields are full
+ * checksums, we need to update the checksum whenever we change anything.
+ * * Output (i.e. the checksum is a pseudo-header checksum):
+ * x The field being updated is src/dst address or affects the length of
+ * the packet. We need to update the pseudo-header checksum (note that this
+ * checksum is not ones' complement).
+ * x Some other field is being modified (e.g. src/dst port numbers): We
+ * don't have to update anything.
+ **/
u_int16_t
pf_cksum_fixup(u_int16_t cksum, u_int16_t old, u_int16_t new, u_int8_t udp)
{
@@ -2004,9 +2020,20 @@ pf_cksum_fixup(u_int16_t cksum, u_int16_t old, u_int16_t new, u_int8_t udp)
return (l);
}
+u_int16_t
+pf_proto_cksum_fixup(struct mbuf *m, u_int16_t cksum, u_int16_t old,
+ u_int16_t new, u_int8_t udp)
+{
+ if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6))
+ return (cksum);
+
+ return (pf_cksum_fixup(cksum, old, new, udp));
+}
+
static void
-pf_change_ap(struct pf_addr *a, u_int16_t *p, u_int16_t *ic, u_int16_t *pc,
- struct pf_addr *an, u_int16_t pn, u_int8_t u, sa_family_t af)
+pf_change_ap(struct mbuf *m, struct pf_addr *a, u_int16_t *p, u_int16_t *ic,
+ u_int16_t *pc, struct pf_addr *an, u_int16_t pn, u_int8_t u,
+ sa_family_t af)
{
struct pf_addr ao;
u_int16_t po = *p;
@@ -2014,6 +2041,9 @@ pf_change_ap(struct pf_addr *a, u_int16_t *p, u_int16_t *ic, u_int16_t *pc,
PF_ACPY(&ao, a, af);
PF_ACPY(a, an, af);
+ if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6))
+ *pc = ~*pc;
+
*p = pn;
switch (af) {
@@ -2023,17 +2053,19 @@ pf_change_ap(struct pf_addr *a, u_int16_t *p, u_int16_t *ic, u_int16_t *pc,
ao.addr16[0], an->addr16[0], 0),
ao.addr16[1], an->addr16[1], 0);
*p = pn;
- *pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc,
+
+ *pc = pf_cksum_fixup(pf_cksum_fixup(*pc,
ao.addr16[0], an->addr16[0], u),
- ao.addr16[1], an->addr16[1], u),
- po, pn, u);
+ ao.addr16[1], an->addr16[1], u);
+
+ *pc = pf_proto_cksum_fixup(m, *pc, po, pn, u);
break;
#endif /* INET */
#ifdef INET6
case AF_INET6:
*pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
- pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc,
+ pf_cksum_fixup(pf_cksum_fixup(*pc,
ao.addr16[0], an->addr16[0], u),
ao.addr16[1], an->addr16[1], u),
ao.addr16[2], an->addr16[2], u),
@@ -2041,13 +2073,20 @@ pf_change_ap(struct pf_addr *a, u_int16_t *p, u_int16_t *ic, u_int16_t *pc,
ao.addr16[4], an->addr16[4], u),
ao.addr16[5], an->addr16[5], u),
ao.addr16[6], an->addr16[6], u),
- ao.addr16[7], an->addr16[7], u),
- po, pn, u);
+ ao.addr16[7], an->addr16[7], u);
+
+ *pc = pf_proto_cksum_fixup(m, *pc, po, pn, u);
break;
#endif /* INET6 */
}
-}
+ if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA |
+ CSUM_DELAY_DATA_IPV6)) {
+ *pc = ~*pc;
+ if (! *pc)
+ *pc = 0xffff;
+ }
+}
/* Changes a u_int32_t. Uses a void * so there are no align restrictions */
void
@@ -2061,6 +2100,19 @@ pf_change_a(void *a, u_int16_t *c, u_int32_t an, u_int8_t u)
ao % 65536, an % 65536, u);
}
+void
+pf_change_proto_a(struct mbuf *m, void *a, u_int16_t *c, u_int32_t an, u_int8_t udp)
+{
+ u_int32_t ao;
+
+ memcpy(&ao, a, sizeof(ao));
+ memcpy(a, &an, sizeof(u_int32_t));
+
+ *c = pf_proto_cksum_fixup(m,
+ pf_proto_cksum_fixup(m, *c, ao / 65536, an / 65536, udp),
+ ao % 65536, an % 65536, udp);
+}
+
#ifdef INET6
static void
pf_change_a6(struct pf_addr *a, u_int16_t *c, struct pf_addr *an, u_int8_t u)
@@ -2206,12 +2258,10 @@ pf_modulate_sack(struct mbuf *m, int off, struct pf_pdesc *pd,
for (i = 2; i + TCPOLEN_SACK <= olen;
i += TCPOLEN_SACK) {
memcpy(&sack, &opt[i], sizeof(sack));
- pf_change_a(&sack.start, &th->th_sum,
- htonl(ntohl(sack.start) -
- dst->seqdiff), 0);
- pf_change_a(&sack.end, &th->th_sum,
- htonl(ntohl(sack.end) -
- dst->seqdiff), 0);
+ pf_change_proto_a(m, &sack.start, &th->th_sum,
+ htonl(ntohl(sack.start) - dst->seqdiff), 0);
+ pf_change_proto_a(m, &sack.end, &th->th_sum,
+ htonl(ntohl(sack.end) - dst->seqdiff), 0);
memcpy(&opt[i], &sack, sizeof(sack));
}
copyback = 1;
@@ -3115,7 +3165,7 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction,
if (PF_ANEQ(saddr, &nk->addr[pd->sidx], af) ||
nk->port[pd->sidx] != sport) {
- pf_change_ap(saddr, &th->th_sport, pd->ip_sum,
+ pf_change_ap(m, saddr, &th->th_sport, pd->ip_sum,
&th->th_sum, &nk->addr[pd->sidx],
nk->port[pd->sidx], 0, af);
pd->sport = &th->th_sport;
@@ -3124,7 +3174,7 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction,
if (PF_ANEQ(daddr, &nk->addr[pd->didx], af) ||
nk->port[pd->didx] != dport) {
- pf_change_ap(daddr, &th->th_dport, pd->ip_sum,
+ pf_change_ap(m, daddr, &th->th_dport, pd->ip_sum,
&th->th_sum, &nk->addr[pd->didx],
nk->port[pd->didx], 0, af);
dport = th->th_dport;
@@ -3138,7 +3188,7 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction,
if (PF_ANEQ(saddr, &nk->addr[pd->sidx], af) ||
nk->port[pd->sidx] != sport) {
- pf_change_ap(saddr, &pd->hdr.udp->uh_sport,
+ pf_change_ap(m, saddr, &pd->hdr.udp->uh_sport,
pd->ip_sum, &pd->hdr.udp->uh_sum,
&nk->addr[pd->sidx],
nk->port[pd->sidx], 1, af);
@@ -3148,7 +3198,7 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction,
if (PF_ANEQ(daddr, &nk->addr[pd->didx], af) ||
nk->port[pd->didx] != dport) {
- pf_change_ap(daddr, &pd->hdr.udp->uh_dport,
+ pf_change_ap(m, daddr, &pd->hdr.udp->uh_dport,
pd->ip_sum, &pd->hdr.udp->uh_sum,
&nk->addr[pd->didx],
nk->port[pd->didx], 1, af);
@@ -3500,7 +3550,7 @@ pf_create_state(struct pf_rule *r, struct pf_rule *nr, struct pf_rule *a,
if ((s->src.seqdiff = pf_tcp_iss(pd) - s->src.seqlo) ==
0)
s->src.seqdiff = 1;
- pf_change_a(&th->th_seq, &th->th_sum,
+ pf_change_proto_a(m, &th->th_seq, &th->th_sum,
htonl(s->src.seqlo + s->src.seqdiff), 0);
*rewrite = 1;
} else
@@ -3824,9 +3874,9 @@ pf_tcp_track_full(struct pf_state_peer *src, struct pf_state_peer *dst,
while ((src->seqdiff = arc4random() - seq) == 0)
;
ack = ntohl(th->th_ack) - dst->seqdiff;
- pf_change_a(&th->th_seq, &th->th_sum, htonl(seq +
+ pf_change_proto_a(m, &th->th_seq, &th->th_sum, htonl(seq +
src->seqdiff), 0);
- pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0);
+ pf_change_proto_a(m, &th->th_ack, &th->th_sum, htonl(ack), 0);
*copyback = 1;
} else {
ack = ntohl(th->th_ack);
@@ -3876,9 +3926,9 @@ pf_tcp_track_full(struct pf_state_peer *src, struct pf_state_peer *dst,
ack = ntohl(th->th_ack) - dst->seqdiff;
if (src->seqdiff) {
/* Modulate sequence numbers */
- pf_change_a(&th->th_seq, &th->th_sum, htonl(seq +
+ pf_change_proto_a(m, &th->th_seq, &th->th_sum, htonl(seq +
src->seqdiff), 0);
- pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0);
+ pf_change_proto_a(m, &th->th_ack, &th->th_sum, htonl(ack), 0);
*copyback = 1;
}
end = seq + pd->p_len;
@@ -4332,14 +4382,14 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif,
if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af) ||
nk->port[pd->sidx] != th->th_sport)
- pf_change_ap(pd->src, &th->th_sport, pd->ip_sum,
- &th->th_sum, &nk->addr[pd->sidx],
+ pf_change_ap(m, pd->src, &th->th_sport,
+ pd->ip_sum, &th->th_sum, &nk->addr[pd->sidx],
nk->port[pd->sidx], 0, pd->af);
if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af) ||
nk->port[pd->didx] != th->th_dport)
- pf_change_ap(pd->dst, &th->th_dport, pd->ip_sum,
- &th->th_sum, &nk->addr[pd->didx],
+ pf_change_ap(m, pd->dst, &th->th_dport,
+ pd->ip_sum, &th->th_sum, &nk->addr[pd->didx],
nk->port[pd->didx], 0, pd->af);
copyback = 1;
}
@@ -4403,13 +4453,13 @@ pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif,
if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af) ||
nk->port[pd->sidx] != uh->uh_sport)
- pf_change_ap(pd->src, &uh->uh_sport, pd->ip_sum,
+ pf_change_ap(m, pd->src, &uh->uh_sport, pd->ip_sum,
&uh->uh_sum, &nk->addr[pd->sidx],
nk->port[pd->sidx], 1, pd->af);
if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af) ||
nk->port[pd->didx] != uh->uh_dport)
- pf_change_ap(pd->dst, &uh->uh_dport, pd->ip_sum,
+ pf_change_ap(m, pd->dst, &uh->uh_dport, pd->ip_sum,
&uh->uh_sum, &nk->addr[pd->didx],
nk->port[pd->didx], 1, pd->af);
m_copyback(m, off, sizeof(*uh), (caddr_t)uh);
diff --git a/sys/netpfil/pf/pf_ioctl.c b/sys/netpfil/pf/pf_ioctl.c
index a3528f0..d91c851 100644
--- a/sys/netpfil/pf/pf_ioctl.c
+++ b/sys/netpfil/pf/pf_ioctl.c
@@ -3560,12 +3560,6 @@ pf_check_out(void *arg, struct mbuf **m, struct ifnet *ifp, int dir,
{
int chk;
- /* We need a proper CSUM befor we start (s. OpenBSD ip_output) */
- if ((*m)->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
- in_delayed_cksum(*m);
- (*m)->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
- }
-
chk = pf_test(PF_OUT, ifp, m, inp);
if (chk && *m) {
m_freem(*m);
@@ -3604,13 +3598,6 @@ pf_check6_out(void *arg, struct mbuf **m, struct ifnet *ifp, int dir,
{
int chk;
- /* We need a proper CSUM before we start (s. OpenBSD ip_output) */
- if ((*m)->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) {
- in6_delayed_cksum(*m,
- (*m)->m_pkthdr.len - sizeof(struct ip6_hdr),
- sizeof(struct ip6_hdr));
- (*m)->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA_IPV6;
- }
CURVNET_SET(ifp->if_vnet);
chk = pf_test6(PF_OUT, ifp, m, inp);
CURVNET_RESTORE();
diff --git a/sys/netpfil/pf/pf_norm.c b/sys/netpfil/pf/pf_norm.c
index d56ad3c..4f9a499 100644
--- a/sys/netpfil/pf/pf_norm.c
+++ b/sys/netpfil/pf/pf_norm.c
@@ -1680,13 +1680,14 @@ pf_normalize_tcp(int dir, struct pfi_kif *kif, struct mbuf *m, int ipoff,
th->th_x2 = 0;
nv = *(u_int16_t *)(&th->th_ack + 1);
- th->th_sum = pf_cksum_fixup(th->th_sum, ov, nv, 0);
+ th->th_sum = pf_proto_cksum_fixup(m, th->th_sum, ov, nv, 0);
rewrite = 1;
}
/* Remove urgent pointer, if TH_URG is not set */
if (!(flags & TH_URG) && th->th_urp) {
- th->th_sum = pf_cksum_fixup(th->th_sum, th->th_urp, 0, 0);
+ th->th_sum = pf_proto_cksum_fixup(m, th->th_sum, th->th_urp,
+ 0, 0);
th->th_urp = 0;
rewrite = 1;
}
@@ -1887,7 +1888,7 @@ pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd,
(src->scrub->pfss_flags &
PFSS_TIMESTAMP)) {
tsval = ntohl(tsval);
- pf_change_a(&opt[2],
+ pf_change_proto_a(m, &opt[2],
&th->th_sum,
htonl(tsval +
src->scrub->pfss_ts_mod),
@@ -1903,7 +1904,7 @@ pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd,
PFSS_TIMESTAMP)) {
tsecr = ntohl(tsecr)
- dst->scrub->pfss_ts_mod;
- pf_change_a(&opt[6],
+ pf_change_proto_a(m, &opt[6],
&th->th_sum, htonl(tsecr),
0);
copyback = 1;
@@ -2230,8 +2231,8 @@ pf_normalize_tcpopt(struct pf_rule *r, struct mbuf *m, struct tcphdr *th,
case TCPOPT_MAXSEG:
mss = (u_int16_t *)(optp + 2);
if ((ntohs(*mss)) > r->max_mss) {
- th->th_sum = pf_cksum_fixup(th->th_sum,
- *mss, htons(r->max_mss), 0);
+ th->th_sum = pf_proto_cksum_fixup(m,
+ th->th_sum, *mss, htons(r->max_mss), 0);
*mss = htons(r->max_mss);
rewrite = 1;
}
OpenPOWER on IntegriCloud