diff options
-rw-r--r-- | sys/netinet/if_ether.c | 129 | ||||
-rw-r--r-- | sys/netinet/in.c | 6 | ||||
-rw-r--r-- | sys/netinet/in_var.h | 3 | ||||
-rw-r--r-- | usr.sbin/arp/arp.4 | 50 |
4 files changed, 171 insertions, 17 deletions
diff --git a/sys/netinet/if_ether.c b/sys/netinet/if_ether.c index 78d2f43..c99c465 100644 --- a/sys/netinet/if_ether.c +++ b/sys/netinet/if_ether.c @@ -125,6 +125,28 @@ SYSCTL_VNET_INT(_net_link_ether_inet, OID_AUTO, maxhold, CTLFLAG_RW, &VNET_NAME(arp_maxhold), 0, "Number of packets to hold per ARP entry"); +/* + * Due to the exponential backoff algorithm used for the interval between GARP + * retransmissions, the maximum number of retransmissions is limited for + * sanity. This limit corresponds to a maximum interval between retransmissions + * of 2^16 seconds ~= 18 hours. + * + * Making this limit more dynamic is more complicated than worthwhile, + * especially since sending out GARPs spaced days apart would be of little + * use. A maximum dynamic limit would look something like: + * + * const int max = fls(INT_MAX / hz) - 1; + */ +#define MAX_GARP_RETRANSMITS 16 +static int sysctl_garp_rexmit(SYSCTL_HANDLER_ARGS); +static int garp_rexmit_count = 0; /* GARP retransmission setting. */ + +SYSCTL_PROC(_net_link_ether_inet, OID_AUTO, garp_rexmit_count, + CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_MPSAFE, + &garp_rexmit_count, 0, sysctl_garp_rexmit, "I", + "Number of times to retransmit GARP packets;" + " 0 to disable, maximum of 16"); + static void arp_init(void); static void arpintr(struct mbuf *); static void arptimer(void *); @@ -928,6 +950,109 @@ drop: } #endif +/* + * Handle the garp_rexmit_count. Like sysctl_handle_int(), but limits the range + * of valid values. + */ +static int +sysctl_garp_rexmit(SYSCTL_HANDLER_ARGS) +{ + int error; + int rexmit_count = *(int *)arg1; + + error = sysctl_handle_int(oidp, &rexmit_count, 0, req); + + /* Enforce limits on any new value that may have been set. */ + if (!error && req->newptr) { + /* A new value was set. */ + if (rexmit_count < 0) { + rexmit_count = 0; + } else if (rexmit_count > MAX_GARP_RETRANSMITS) { + rexmit_count = MAX_GARP_RETRANSMITS; + } + *(int *)arg1 = rexmit_count; + } + + return (error); +} + +/* + * Retransmit a Gratuitous ARP (GARP) and, if necessary, schedule a callout to + * retransmit it again. A pending callout owns a reference to the ifa. + */ +static void +garp_rexmit(void *arg) +{ + struct in_ifaddr *ia = arg; + + if (callout_pending(&ia->ia_garp_timer) || + !callout_active(&ia->ia_garp_timer)) { + IFA_UNLOCK(&ia->ia_ifa); + ifa_free(&ia->ia_ifa); + return; + } + + /* + * Drop ifa lock while the ARP request is generated. + */ + IFA_UNLOCK(&ia->ia_ifa); + + arprequest(ia->ia_ifa.ifa_ifp, &IA_SIN(ia)->sin_addr, + &IA_SIN(ia)->sin_addr, IF_LLADDR(ia->ia_ifa.ifa_ifp)); + + /* + * Increment the count of retransmissions. If the count has reached the + * maximum value, stop sending the GARP packets. Otherwise, schedule + * the callout to retransmit another GARP packet. + */ + ++ia->ia_garp_count; + if (ia->ia_garp_count >= garp_rexmit_count) { + ifa_free(&ia->ia_ifa); + } else { + int rescheduled; + IFA_LOCK(&ia->ia_ifa); + rescheduled = callout_reset(&ia->ia_garp_timer, + (1 << ia->ia_garp_count) * hz, + garp_rexmit, ia); + IFA_UNLOCK(&ia->ia_ifa); + if (rescheduled) { + ifa_free(&ia->ia_ifa); + } + } +} + +/* + * Start the GARP retransmit timer. + * + * A single GARP is always transmitted when an IPv4 address is added + * to an interface and that is usually sufficient. However, in some + * circumstances, such as when a shared address is passed between + * cluster nodes, this single GARP may occasionally be dropped or + * lost. This can lead to neighbors on the network link working with a + * stale ARP cache and sending packets destined for that address to + * the node that previously owned the address, which may not respond. + * + * To avoid this situation, GARP retransmits can be enabled by setting + * the net.link.ether.inet.garp_rexmit_count sysctl to a value greater + * than zero. The setting represents the maximum number of + * retransmissions. The interval between retransmissions is calculated + * using an exponential backoff algorithm, doubling each time, so the + * retransmission intervals are: {1, 2, 4, 8, 16, ...} (seconds). + */ +static void +garp_timer_start(struct ifaddr *ifa) +{ + struct in_ifaddr *ia = (struct in_ifaddr *) ifa; + + IFA_LOCK(ifa); + ia->ia_garp_count = 0; + if (callout_reset(&ia->ia_garp_timer, (1 << ia->ia_garp_count) * hz, + garp_rexmit, ia) == 0) { + ifa_ref(ifa); + } + IFA_UNLOCK(ifa); +} + void arp_ifinit(struct ifnet *ifp, struct ifaddr *ifa) { @@ -939,6 +1064,10 @@ arp_ifinit(struct ifnet *ifp, struct ifaddr *ifa) if (ntohl(IA_SIN(ifa)->sin_addr.s_addr) != INADDR_ANY) { arprequest(ifp, &IA_SIN(ifa)->sin_addr, &IA_SIN(ifa)->sin_addr, IF_LLADDR(ifp)); + if (garp_rexmit_count > 0) { + garp_timer_start(ifa); + } + /* * interface address is considered static entry * because the output of the arp utility shows diff --git a/sys/netinet/in.c b/sys/netinet/in.c index b61b4b6..119ddd6 100644 --- a/sys/netinet/in.c +++ b/sys/netinet/in.c @@ -417,6 +417,8 @@ in_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp, ifa->ifa_addr = (struct sockaddr *)&ia->ia_addr; ifa->ifa_dstaddr = (struct sockaddr *)&ia->ia_dstaddr; ifa->ifa_netmask = (struct sockaddr *)&ia->ia_sockmask; + callout_init_mtx(&ia->ia_garp_timer, &ifa->ifa_mtx, + CALLOUT_RETURNUNLOCKED); ia->ia_sockmask.sin_len = 8; ia->ia_sockmask.sin_family = AF_INET; @@ -594,6 +596,10 @@ in_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp, } else ifa_free(&iap->ia_ifa); + IFA_LOCK(&ia->ia_ifa); + if (callout_stop(&ia->ia_garp_timer)) + ifa_free(&ia->ia_ifa); + IFA_UNLOCK(&ia->ia_ifa); ifa_free(&ia->ia_ifa); /* in_ifaddrhead */ out: if (ia != NULL) diff --git a/sys/netinet/in_var.h b/sys/netinet/in_var.h index 8657dbb..0cb43b0 100644 --- a/sys/netinet/in_var.h +++ b/sys/netinet/in_var.h @@ -33,6 +33,7 @@ #ifndef _NETINET_IN_VAR_H_ #define _NETINET_IN_VAR_H_ +#include <sys/callout.h> #include <sys/queue.h> #include <sys/fnv_hash.h> #include <sys/tree.h> @@ -69,6 +70,8 @@ struct in_ifaddr { struct sockaddr_in ia_dstaddr; /* reserve space for broadcast addr */ #define ia_broadaddr ia_dstaddr struct sockaddr_in ia_sockmask; /* reserve space for general netmask */ + struct callout ia_garp_timer; /* timer for retransmitting GARPs */ + int ia_garp_count; /* count of retransmitted GARPs */ }; struct in_aliasreq { diff --git a/usr.sbin/arp/arp.4 b/usr.sbin/arp/arp.4 index 93cfd07..37caf46 100644 --- a/usr.sbin/arp/arp.4 +++ b/usr.sbin/arp/arp.4 @@ -28,7 +28,7 @@ .\" @(#)arp4.4 6.5 (Berkeley) 4/18/94 .\" $FreeBSD$ .\" -.Dd May 11, 2013 +.Dd October 7, 2016 .Dt ARP 4 .Os .Sh NAME @@ -121,49 +121,65 @@ of the MIB. .Bl -tag -width "log_arp_permanent_modify" .It Va allow_multicast -Should the kernel install ARP entries with multicast bit set in -the hardware address. -Installing such entries is RFC 1812 violation, but some prorietary -load balancing techniques require routers on network to do so. +Install ARP entries with the multicast bit set in the hardware address. +Installing such entries is an RFC 1812 violation, but some proprietary load +balancing techniques require routers to do so. Turned off by default. +.It Va garp_rexmit_count +Retransmit gratuitous ARP (GARP) packets when an IPv4 address is added to an +interface. +A GARP is always transmitted when an IPv4 address is added to an interface. +A non-zero value causes the GARP packet to be retransmitted the stated number +of times. +The interval between retransmissions is doubled each time, so the +retransmission intervals are: {1, 2, 4, 8, 16, ...} (seconds). +The default value of zero means only the initial GARP is sent; no +additional GARP packets are retransmitted. +The maximum value is sixteen. +.Pp +The default behavior of a single GARP packet is usually sufficient. +However, a single GARP might be dropped or lost in some circumstances. +This is particularly harmful when a shared address is passed between cluster +nodes. +Neighbors on the network link might then work with a stale ARP cache and send +packets destined for that address to the node that previously owned the +address, which might not respond. .It Va log_arp_movements -Should the kernel log movements of IP addresses from one hardware -address to an other. +Log movements of IP addresses from one hardware address to another. See .Sx DIAGNOSTICS below. Turned on by default. .It Va log_arp_permanent_modify -Should the kernel log attempts of remote host on network to modify a -permanent ARP entry. +Log attempts by a remote host to modify a permanent ARP entry. See .Sx DIAGNOSTICS below. Turned on by default. .It Va log_arp_wrong_iface -Should the kernel log attempts to insert an ARP entry on an interface -when the IP network the address belongs to is connected to an other -interface. +Log attempts to insert an ARP entry on an interface when the IP network to +which the address belongs is connected to another interface. See .Sx DIAGNOSTICS below. Turned on by default. .It Va max_log_per_second -Limit number of remotely triggered logging events to a configured value -per second. +Limit the number of remotely triggered logging events to a configured value per +second. Default is 1 log message per second. .It Va max_age How long an ARP entry is held in the cache until it needs to be refreshed. Default is 1200 seconds. .It Va maxhold -How many packets hold in the per-entry output queue while the entry +How many packets to hold in the per-entry output queue while the entry is being resolved. Default is one packet. .It Va maxtries -Number of retransmits before host is considered down and error is returned. +Number of retransmits before a host is considered down and an error is +returned. Default is 5 tries. .It Va proxyall -Enables ARP proxying for all hosts on net. +Enables ARP proxying. Turned off by default. .It Va useloopback If an ARP entry is added for local address, force the traffic to go through |