summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--sys/netinet/if_ether.c129
-rw-r--r--sys/netinet/in.c6
-rw-r--r--sys/netinet/in_var.h3
-rw-r--r--usr.sbin/arp/arp.450
4 files changed, 171 insertions, 17 deletions
diff --git a/sys/netinet/if_ether.c b/sys/netinet/if_ether.c
index 78d2f43..c99c465 100644
--- a/sys/netinet/if_ether.c
+++ b/sys/netinet/if_ether.c
@@ -125,6 +125,28 @@ SYSCTL_VNET_INT(_net_link_ether_inet, OID_AUTO, maxhold, CTLFLAG_RW,
&VNET_NAME(arp_maxhold), 0,
"Number of packets to hold per ARP entry");
+/*
+ * Due to the exponential backoff algorithm used for the interval between GARP
+ * retransmissions, the maximum number of retransmissions is limited for
+ * sanity. This limit corresponds to a maximum interval between retransmissions
+ * of 2^16 seconds ~= 18 hours.
+ *
+ * Making this limit more dynamic is more complicated than worthwhile,
+ * especially since sending out GARPs spaced days apart would be of little
+ * use. A maximum dynamic limit would look something like:
+ *
+ * const int max = fls(INT_MAX / hz) - 1;
+ */
+#define MAX_GARP_RETRANSMITS 16
+static int sysctl_garp_rexmit(SYSCTL_HANDLER_ARGS);
+static int garp_rexmit_count = 0; /* GARP retransmission setting. */
+
+SYSCTL_PROC(_net_link_ether_inet, OID_AUTO, garp_rexmit_count,
+ CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_MPSAFE,
+ &garp_rexmit_count, 0, sysctl_garp_rexmit, "I",
+ "Number of times to retransmit GARP packets;"
+ " 0 to disable, maximum of 16");
+
static void arp_init(void);
static void arpintr(struct mbuf *);
static void arptimer(void *);
@@ -928,6 +950,109 @@ drop:
}
#endif
+/*
+ * Handle the garp_rexmit_count. Like sysctl_handle_int(), but limits the range
+ * of valid values.
+ */
+static int
+sysctl_garp_rexmit(SYSCTL_HANDLER_ARGS)
+{
+ int error;
+ int rexmit_count = *(int *)arg1;
+
+ error = sysctl_handle_int(oidp, &rexmit_count, 0, req);
+
+ /* Enforce limits on any new value that may have been set. */
+ if (!error && req->newptr) {
+ /* A new value was set. */
+ if (rexmit_count < 0) {
+ rexmit_count = 0;
+ } else if (rexmit_count > MAX_GARP_RETRANSMITS) {
+ rexmit_count = MAX_GARP_RETRANSMITS;
+ }
+ *(int *)arg1 = rexmit_count;
+ }
+
+ return (error);
+}
+
+/*
+ * Retransmit a Gratuitous ARP (GARP) and, if necessary, schedule a callout to
+ * retransmit it again. A pending callout owns a reference to the ifa.
+ */
+static void
+garp_rexmit(void *arg)
+{
+ struct in_ifaddr *ia = arg;
+
+ if (callout_pending(&ia->ia_garp_timer) ||
+ !callout_active(&ia->ia_garp_timer)) {
+ IFA_UNLOCK(&ia->ia_ifa);
+ ifa_free(&ia->ia_ifa);
+ return;
+ }
+
+ /*
+ * Drop ifa lock while the ARP request is generated.
+ */
+ IFA_UNLOCK(&ia->ia_ifa);
+
+ arprequest(ia->ia_ifa.ifa_ifp, &IA_SIN(ia)->sin_addr,
+ &IA_SIN(ia)->sin_addr, IF_LLADDR(ia->ia_ifa.ifa_ifp));
+
+ /*
+ * Increment the count of retransmissions. If the count has reached the
+ * maximum value, stop sending the GARP packets. Otherwise, schedule
+ * the callout to retransmit another GARP packet.
+ */
+ ++ia->ia_garp_count;
+ if (ia->ia_garp_count >= garp_rexmit_count) {
+ ifa_free(&ia->ia_ifa);
+ } else {
+ int rescheduled;
+ IFA_LOCK(&ia->ia_ifa);
+ rescheduled = callout_reset(&ia->ia_garp_timer,
+ (1 << ia->ia_garp_count) * hz,
+ garp_rexmit, ia);
+ IFA_UNLOCK(&ia->ia_ifa);
+ if (rescheduled) {
+ ifa_free(&ia->ia_ifa);
+ }
+ }
+}
+
+/*
+ * Start the GARP retransmit timer.
+ *
+ * A single GARP is always transmitted when an IPv4 address is added
+ * to an interface and that is usually sufficient. However, in some
+ * circumstances, such as when a shared address is passed between
+ * cluster nodes, this single GARP may occasionally be dropped or
+ * lost. This can lead to neighbors on the network link working with a
+ * stale ARP cache and sending packets destined for that address to
+ * the node that previously owned the address, which may not respond.
+ *
+ * To avoid this situation, GARP retransmits can be enabled by setting
+ * the net.link.ether.inet.garp_rexmit_count sysctl to a value greater
+ * than zero. The setting represents the maximum number of
+ * retransmissions. The interval between retransmissions is calculated
+ * using an exponential backoff algorithm, doubling each time, so the
+ * retransmission intervals are: {1, 2, 4, 8, 16, ...} (seconds).
+ */
+static void
+garp_timer_start(struct ifaddr *ifa)
+{
+ struct in_ifaddr *ia = (struct in_ifaddr *) ifa;
+
+ IFA_LOCK(ifa);
+ ia->ia_garp_count = 0;
+ if (callout_reset(&ia->ia_garp_timer, (1 << ia->ia_garp_count) * hz,
+ garp_rexmit, ia) == 0) {
+ ifa_ref(ifa);
+ }
+ IFA_UNLOCK(ifa);
+}
+
void
arp_ifinit(struct ifnet *ifp, struct ifaddr *ifa)
{
@@ -939,6 +1064,10 @@ arp_ifinit(struct ifnet *ifp, struct ifaddr *ifa)
if (ntohl(IA_SIN(ifa)->sin_addr.s_addr) != INADDR_ANY) {
arprequest(ifp, &IA_SIN(ifa)->sin_addr,
&IA_SIN(ifa)->sin_addr, IF_LLADDR(ifp));
+ if (garp_rexmit_count > 0) {
+ garp_timer_start(ifa);
+ }
+
/*
* interface address is considered static entry
* because the output of the arp utility shows
diff --git a/sys/netinet/in.c b/sys/netinet/in.c
index b61b4b6..119ddd6 100644
--- a/sys/netinet/in.c
+++ b/sys/netinet/in.c
@@ -417,6 +417,8 @@ in_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
ifa->ifa_addr = (struct sockaddr *)&ia->ia_addr;
ifa->ifa_dstaddr = (struct sockaddr *)&ia->ia_dstaddr;
ifa->ifa_netmask = (struct sockaddr *)&ia->ia_sockmask;
+ callout_init_mtx(&ia->ia_garp_timer, &ifa->ifa_mtx,
+ CALLOUT_RETURNUNLOCKED);
ia->ia_sockmask.sin_len = 8;
ia->ia_sockmask.sin_family = AF_INET;
@@ -594,6 +596,10 @@ in_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
} else
ifa_free(&iap->ia_ifa);
+ IFA_LOCK(&ia->ia_ifa);
+ if (callout_stop(&ia->ia_garp_timer))
+ ifa_free(&ia->ia_ifa);
+ IFA_UNLOCK(&ia->ia_ifa);
ifa_free(&ia->ia_ifa); /* in_ifaddrhead */
out:
if (ia != NULL)
diff --git a/sys/netinet/in_var.h b/sys/netinet/in_var.h
index 8657dbb..0cb43b0 100644
--- a/sys/netinet/in_var.h
+++ b/sys/netinet/in_var.h
@@ -33,6 +33,7 @@
#ifndef _NETINET_IN_VAR_H_
#define _NETINET_IN_VAR_H_
+#include <sys/callout.h>
#include <sys/queue.h>
#include <sys/fnv_hash.h>
#include <sys/tree.h>
@@ -69,6 +70,8 @@ struct in_ifaddr {
struct sockaddr_in ia_dstaddr; /* reserve space for broadcast addr */
#define ia_broadaddr ia_dstaddr
struct sockaddr_in ia_sockmask; /* reserve space for general netmask */
+ struct callout ia_garp_timer; /* timer for retransmitting GARPs */
+ int ia_garp_count; /* count of retransmitted GARPs */
};
struct in_aliasreq {
diff --git a/usr.sbin/arp/arp.4 b/usr.sbin/arp/arp.4
index 93cfd07..37caf46 100644
--- a/usr.sbin/arp/arp.4
+++ b/usr.sbin/arp/arp.4
@@ -28,7 +28,7 @@
.\" @(#)arp4.4 6.5 (Berkeley) 4/18/94
.\" $FreeBSD$
.\"
-.Dd May 11, 2013
+.Dd October 7, 2016
.Dt ARP 4
.Os
.Sh NAME
@@ -121,49 +121,65 @@ of the
MIB.
.Bl -tag -width "log_arp_permanent_modify"
.It Va allow_multicast
-Should the kernel install ARP entries with multicast bit set in
-the hardware address.
-Installing such entries is RFC 1812 violation, but some prorietary
-load balancing techniques require routers on network to do so.
+Install ARP entries with the multicast bit set in the hardware address.
+Installing such entries is an RFC 1812 violation, but some proprietary load
+balancing techniques require routers to do so.
Turned off by default.
+.It Va garp_rexmit_count
+Retransmit gratuitous ARP (GARP) packets when an IPv4 address is added to an
+interface.
+A GARP is always transmitted when an IPv4 address is added to an interface.
+A non-zero value causes the GARP packet to be retransmitted the stated number
+of times.
+The interval between retransmissions is doubled each time, so the
+retransmission intervals are: {1, 2, 4, 8, 16, ...} (seconds).
+The default value of zero means only the initial GARP is sent; no
+additional GARP packets are retransmitted.
+The maximum value is sixteen.
+.Pp
+The default behavior of a single GARP packet is usually sufficient.
+However, a single GARP might be dropped or lost in some circumstances.
+This is particularly harmful when a shared address is passed between cluster
+nodes.
+Neighbors on the network link might then work with a stale ARP cache and send
+packets destined for that address to the node that previously owned the
+address, which might not respond.
.It Va log_arp_movements
-Should the kernel log movements of IP addresses from one hardware
-address to an other.
+Log movements of IP addresses from one hardware address to another.
See
.Sx DIAGNOSTICS
below.
Turned on by default.
.It Va log_arp_permanent_modify
-Should the kernel log attempts of remote host on network to modify a
-permanent ARP entry.
+Log attempts by a remote host to modify a permanent ARP entry.
See
.Sx DIAGNOSTICS
below.
Turned on by default.
.It Va log_arp_wrong_iface
-Should the kernel log attempts to insert an ARP entry on an interface
-when the IP network the address belongs to is connected to an other
-interface.
+Log attempts to insert an ARP entry on an interface when the IP network to
+which the address belongs is connected to another interface.
See
.Sx DIAGNOSTICS
below.
Turned on by default.
.It Va max_log_per_second
-Limit number of remotely triggered logging events to a configured value
-per second.
+Limit the number of remotely triggered logging events to a configured value per
+second.
Default is 1 log message per second.
.It Va max_age
How long an ARP entry is held in the cache until it needs to be refreshed.
Default is 1200 seconds.
.It Va maxhold
-How many packets hold in the per-entry output queue while the entry
+How many packets to hold in the per-entry output queue while the entry
is being resolved.
Default is one packet.
.It Va maxtries
-Number of retransmits before host is considered down and error is returned.
+Number of retransmits before a host is considered down and an error is
+returned.
Default is 5 tries.
.It Va proxyall
-Enables ARP proxying for all hosts on net.
+Enables ARP proxying.
Turned off by default.
.It Va useloopback
If an ARP entry is added for local address, force the traffic to go through
OpenPOWER on IntegriCloud