summaryrefslogtreecommitdiffstats
path: root/sys
diff options
context:
space:
mode:
authorglebius <glebius@FreeBSD.org>2011-12-16 12:16:56 +0000
committerglebius <glebius@FreeBSD.org>2011-12-16 12:16:56 +0000
commit27a36f6ac8242750daa092abd7180b10d16f4508 (patch)
tree059f66a20d251fa947969ff34292206d47ffc45c /sys
parentb646d3f6998d3f539bcda6f94be4926461780cf7 (diff)
downloadFreeBSD-src-27a36f6ac8242750daa092abd7180b10d16f4508.zip
FreeBSD-src-27a36f6ac8242750daa092abd7180b10d16f4508.tar.gz
A major overhaul of the CARP implementation. The ip_carp.c was started
from scratch, copying needed functionality from the old implemenation on demand, with a thorough review of all code. The main change is that interface layer has been removed from the CARP. Now redundant addresses are configured exactly on the interfaces, they run on. The CARP configuration itself is, as before, configured and read via SIOCSVH/SIOCGVH ioctls. A new prefix created with SIOCAIFADDR or SIOCAIFADDR_IN6 may now be configured to a particular virtual host id, which makes the prefix redundant. ifconfig(8) semantics has been changed too: now one doesn't need to clone carpXX interface, he/she should directly configure a vhid on a Ethernet interface. To supply vhid data from the kernel to an application the getifaddrs(8) function had been changed to pass ifam_data with each address. [1] The new implementation definitely closes all PRs related to carp(4) being an interface, and may close several others. It also allows to run a single redundant IP per interface. Big thanks to Bjoern Zeeb for his help with inet6 part of patch, for idea on using ifam_data and for several rounds of reviewing! PR: kern/117000, kern/126945, kern/126714, kern/120130, kern/117448 Reviewed by: bz Submitted by: bz [1]
Diffstat (limited to 'sys')
-rw-r--r--sys/net/if.c22
-rw-r--r--sys/net/if.h5
-rw-r--r--sys/net/if_ethersubr.c2
-rw-r--r--sys/net/if_types.h1
-rw-r--r--sys/net/if_var.h2
-rw-r--r--sys/net/rtsock.c16
-rw-r--r--sys/netinet/if_ether.c32
-rw-r--r--sys/netinet/if_ether.h1
-rw-r--r--sys/netinet/in.c63
-rw-r--r--sys/netinet/in_var.h3
-rw-r--r--sys/netinet/ip_carp.c2380
-rw-r--r--sys/netinet/ip_carp.h37
-rw-r--r--sys/netinet6/in6.c24
-rw-r--r--sys/netinet6/in6_ifattach.c1
-rw-r--r--sys/netinet6/in6_var.h1
-rw-r--r--sys/netinet6/nd6.c3
-rw-r--r--sys/netinet6/nd6_nbr.c14
-rw-r--r--sys/sys/param.h2
18 files changed, 1140 insertions, 1469 deletions
diff --git a/sys/net/if.c b/sys/net/if.c
index 8c6c24c..4377343 100644
--- a/sys/net/if.c
+++ b/sys/net/if.c
@@ -130,17 +130,19 @@ void (*lagg_linkstate_p)(struct ifnet *ifp, int state);
/* These are external hooks for CARP. */
void (*carp_linkstate_p)(struct ifnet *ifp);
#if defined(INET) || defined(INET6)
-struct ifnet *(*carp_forus_p)(struct ifnet *ifp, u_char *dhost);
+int (*carp_forus_p)(struct ifnet *ifp, u_char *dhost);
int (*carp_output_p)(struct ifnet *ifp, struct mbuf *m,
- struct sockaddr *sa, struct rtentry *rt);
+ struct sockaddr *sa);
+int (*carp_ioctl_p)(struct ifreq *, u_long, struct thread *);
+int (*carp_attach_p)(struct ifaddr *, int);
+void (*carp_detach_p)(struct ifaddr *);
#endif
#ifdef INET
-int (*carp_iamatch_p)(struct ifnet *, struct in_ifaddr *, struct in_addr *,
- u_int8_t **);
+int (*carp_iamatch_p)(struct ifaddr *, uint8_t **);
#endif
#ifdef INET6
struct ifaddr *(*carp_iamatch6_p)(struct ifnet *ifp, struct in6_addr *taddr6);
-caddr_t (*carp_macmatch6_p)(struct ifnet *ifp, struct mbuf *m,
+caddr_t (*carp_macmatch6_p)(struct ifnet *ifp, struct mbuf *m,
const struct in6_addr *taddr);
#endif
@@ -2506,6 +2508,16 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct thread *td)
error = if_getgroupmembers((struct ifgroupreq *)data);
CURVNET_RESTORE();
return (error);
+#if defined(INET) || defined(INET6)
+ case SIOCSVH:
+ case SIOCGVH:
+ if (carp_ioctl_p == NULL)
+ error = EPROTONOSUPPORT;
+ else
+ error = (*carp_ioctl_p)(ifr, cmd, td);
+ CURVNET_RESTORE();
+ return (error);
+#endif
}
ifp = ifunit_ref(ifr->ifr_name);
diff --git a/sys/net/if.h b/sys/net/if.h
index 4f2dc6f..08420f4 100644
--- a/sys/net/if.h
+++ b/sys/net/if.h
@@ -85,7 +85,7 @@ struct if_data {
u_char ifi_addrlen; /* media address length */
u_char ifi_hdrlen; /* media header length */
u_char ifi_link_state; /* current link state */
- u_char ifi_spare_char1; /* spare byte */
+ u_char ifi_vhid; /* carp vhid */
u_char ifi_spare_char2; /* spare byte */
u_char ifi_datalen; /* length of this data struct */
u_long ifi_mtu; /* maximum transmission unit */
@@ -267,6 +267,8 @@ struct ifa_msghdr {
int ifam_flags; /* value of ifa_flags */
u_short ifam_index; /* index for associated ifp */
int ifam_metric; /* value of ifa_metric */
+ struct if_data ifam_data;/* statistics and other data about if or
+ * address */
};
/*
@@ -357,6 +359,7 @@ struct ifaliasreq {
struct sockaddr ifra_addr;
struct sockaddr ifra_broadaddr;
struct sockaddr ifra_mask;
+ int ifra_vhid;
};
struct ifmediareq {
diff --git a/sys/net/if_ethersubr.c b/sys/net/if_ethersubr.c
index 9eae683..253371a 100644
--- a/sys/net/if_ethersubr.c
+++ b/sys/net/if_ethersubr.c
@@ -397,7 +397,7 @@ ether_output(struct ifnet *ifp, struct mbuf *m,
#if defined(INET) || defined(INET6)
if (ifp->if_carp &&
- (error = (*carp_output_p)(ifp, m, dst, NULL)))
+ (error = (*carp_output_p)(ifp, m, dst)))
goto bad;
#endif
diff --git a/sys/net/if_types.h b/sys/net/if_types.h
index c2effac..fe6ab5e 100644
--- a/sys/net/if_types.h
+++ b/sys/net/if_types.h
@@ -250,6 +250,5 @@
#define IFT_ENC 0xf4
#define IFT_PFLOG 0xf6
#define IFT_PFSYNC 0xf7
-#define IFT_CARP 0xf8 /* Common Address Redundancy Protocol */
#define IFT_IPXIP 0xf9 /* IPX over IP tunneling; no longer used. */
#endif /* !_NET_IF_TYPES_H_ */
diff --git a/sys/net/if_var.h b/sys/net/if_var.h
index 1cb8440..ea60944 100644
--- a/sys/net/if_var.h
+++ b/sys/net/if_var.h
@@ -69,6 +69,7 @@ struct rt_addrinfo;
struct socket;
struct ether_header;
struct carp_if;
+struct carp_softc;
struct ifvlantrunk;
struct route;
struct vnet;
@@ -729,6 +730,7 @@ struct ifaddr {
struct sockaddr *ifa_netmask; /* used to determine subnet */
struct if_data if_data; /* not all members are meaningful */
struct ifnet *ifa_ifp; /* back-pointer to interface */
+ struct carp_softc *ifa_carp; /* pointer to CARP data */
TAILQ_ENTRY(ifaddr) ifa_link; /* queue macro glue */
void (*ifa_rtrequest) /* check or clean routes (+ or -)'d */
(int, struct rtentry *, struct rt_addrinfo *);
diff --git a/sys/net/rtsock.c b/sys/net/rtsock.c
index b2313c0..3520824 100644
--- a/sys/net/rtsock.c
+++ b/sys/net/rtsock.c
@@ -63,6 +63,7 @@
#include <netinet/in.h>
#include <netinet/if_ether.h>
+#include <netinet/ip_carp.h>
#ifdef INET6
#include <netinet6/scope6_var.h>
#endif
@@ -83,7 +84,7 @@ struct if_data32 {
uint8_t ifi_addrlen;
uint8_t ifi_hdrlen;
uint8_t ifi_link_state;
- uint8_t ifi_spare_char1;
+ uint8_t ifi_vhid;
uint8_t ifi_spare_char2;
uint8_t ifi_datalen;
uint32_t ifi_mtu;
@@ -122,6 +123,9 @@ MALLOC_DEFINE(M_RTABLE, "routetbl", "routing tables");
static struct sockaddr route_src = { 2, PF_ROUTE, };
static struct sockaddr sa_zero = { sizeof(sa_zero), AF_INET, };
+/* These are external hooks for CARP. */
+int (*carp_get_vhid_p)(struct ifaddr *);
+
/*
* Used by rtsock/raw_input callback code to decide whether to filter the update
* notification to a socket bound to a particular FIB.
@@ -1508,6 +1512,7 @@ copy_ifdata32(struct if_data *src, struct if_data32 *dst)
CP(*src, *dst, ifi_addrlen);
CP(*src, *dst, ifi_hdrlen);
CP(*src, *dst, ifi_link_state);
+ CP(*src, *dst, ifi_vhid);
dst->ifi_datalen = sizeof(struct if_data32);
CP(*src, *dst, ifi_mtu);
CP(*src, *dst, ifi_metric);
@@ -1559,6 +1564,9 @@ sysctl_iflist(int af, struct walkarg *w)
ifm32->ifm_flags = ifp->if_flags |
ifp->if_drv_flags;
copy_ifdata32(&ifp->if_data, &ifm32->ifm_data);
+ if (carp_get_vhid_p != NULL)
+ ifm32->ifm_data.ifi_vhid =
+ (*carp_get_vhid_p)(ifa);
ifm32->ifm_addrs = info.rti_addrs;
error = SYSCTL_OUT(w->w_req, (caddr_t)ifm32,
len);
@@ -1569,6 +1577,9 @@ sysctl_iflist(int af, struct walkarg *w)
ifm->ifm_index = ifp->if_index;
ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
ifm->ifm_data = ifp->if_data;
+ if (carp_get_vhid_p != NULL)
+ ifm->ifm_data.ifi_vhid =
+ (*carp_get_vhid_p)(ifa);
ifm->ifm_addrs = info.rti_addrs;
error = SYSCTL_OUT(w->w_req, (caddr_t)ifm, len);
#ifdef COMPAT_FREEBSD32
@@ -1595,6 +1606,9 @@ sysctl_iflist(int af, struct walkarg *w)
ifam->ifam_flags = ifa->ifa_flags;
ifam->ifam_metric = ifa->ifa_metric;
ifam->ifam_addrs = info.rti_addrs;
+ if (carp_get_vhid_p != NULL)
+ ifam->ifam_data.ifi_vhid =
+ (*carp_get_vhid_p)(ifa);
error = SYSCTL_OUT(w->w_req, w->w_tmem, len);
if (error)
goto done;
diff --git a/sys/netinet/if_ether.c b/sys/netinet/if_ether.c
index 60fa944..546714f 100644
--- a/sys/netinet/if_ether.c
+++ b/sys/netinet/if_ether.c
@@ -139,8 +139,6 @@ static const struct netisr_handler arp_nh = {
};
#ifdef AF_INET
-void arp_ifscrub(struct ifnet *ifp, uint32_t addr);
-
/*
* called by in_ifscrub to remove entry from the table when
* the interface goes away
@@ -516,7 +514,7 @@ in_arpinput(struct mbuf *m)
int op, flags;
int req_len;
int bridged = 0, is_bridge = 0;
- int carp_match = 0;
+ int carped;
struct sockaddr_in sin;
sin.sin_len = sizeof(struct sockaddr_in);
sin.sin_family = AF_INET;
@@ -561,24 +559,14 @@ in_arpinput(struct mbuf *m)
* For a bridge, we want to check the address irrespective
* of the receive interface. (This will change slightly
* when we have clusters of interfaces).
- * If the interface does not match, but the recieving interface
- * is part of carp, we call carp_iamatch to see if this is a
- * request for the virtual host ip.
- * XXX: This is really ugly!
*/
IN_IFADDR_RLOCK();
LIST_FOREACH(ia, INADDR_HASH(itaddr.s_addr), ia_hash) {
if (((bridged && ia->ia_ifp->if_bridge == ifp->if_bridge) ||
ia->ia_ifp == ifp) &&
- itaddr.s_addr == ia->ia_addr.sin_addr.s_addr) {
- ifa_ref(&ia->ia_ifa);
- IN_IFADDR_RUNLOCK();
- goto match;
- }
- if (ifp->if_carp != NULL &&
- (*carp_iamatch_p)(ifp, ia, &isaddr, &enaddr) &&
- itaddr.s_addr == ia->ia_addr.sin_addr.s_addr) {
- carp_match = 1;
+ itaddr.s_addr == ia->ia_addr.sin_addr.s_addr &&
+ (ia->ia_ifa.ifa_carp == NULL ||
+ (*carp_iamatch_p)(&ia->ia_ifa, &enaddr))) {
ifa_ref(&ia->ia_ifa);
IN_IFADDR_RUNLOCK();
goto match;
@@ -643,6 +631,7 @@ in_arpinput(struct mbuf *m)
match:
if (!enaddr)
enaddr = (u_int8_t *)IF_LLADDR(ifp);
+ carped = (ia->ia_ifa.ifa_carp != NULL);
myaddr = ia->ia_addr.sin_addr;
ifa_free(&ia->ia_ifa);
if (!bcmp(ar_sha(ah), enaddr, ifp->if_addrlen))
@@ -659,9 +648,9 @@ match:
* case we suppress the warning to avoid false positive complaints of
* potential misconfiguration.
*/
- if (!bridged && isaddr.s_addr == myaddr.s_addr && myaddr.s_addr != 0) {
- log(LOG_ERR,
- "arp: %*D is using my IP address %s on %s!\n",
+ if (!bridged && !carped && isaddr.s_addr == myaddr.s_addr &&
+ myaddr.s_addr != 0) {
+ log(LOG_ERR, "arp: %*D is using my IP address %s on %s!\n",
ifp->if_addrlen, (u_char *)ar_sha(ah), ":",
inet_ntoa(isaddr), ifp->if_xname);
itaddr = myaddr;
@@ -682,7 +671,7 @@ match:
IF_AFDATA_UNLOCK(ifp);
if (la != NULL) {
/* the following is not an error when doing bridging */
- if (!bridged && la->lle_tbl->llt_ifp != ifp && !carp_match) {
+ if (!bridged && la->lle_tbl->llt_ifp != ifp) {
if (log_arp_wrong_iface)
log(LOG_WARNING, "arp: %s is on %s "
"but got reply from %*D on %s\n",
@@ -879,6 +868,9 @@ arp_ifinit(struct ifnet *ifp, struct ifaddr *ifa)
{
struct llentry *lle;
+ if (ifa->ifa_carp != NULL)
+ return;
+
if (ntohl(IA_SIN(ifa)->sin_addr.s_addr) != INADDR_ANY) {
arprequest(ifp, &IA_SIN(ifa)->sin_addr,
&IA_SIN(ifa)->sin_addr, IF_LLADDR(ifp));
diff --git a/sys/netinet/if_ether.h b/sys/netinet/if_ether.h
index 1b69436..4b3ebd7 100644
--- a/sys/netinet/if_ether.h
+++ b/sys/netinet/if_ether.h
@@ -117,6 +117,7 @@ int arpresolve(struct ifnet *ifp, struct rtentry *rt,
struct llentry **lle);
void arp_ifinit(struct ifnet *, struct ifaddr *);
void arp_ifinit2(struct ifnet *, struct ifaddr *, u_char *);
+void arp_ifscrub(struct ifnet *, uint32_t);
#include <sys/eventhandler.h>
typedef void (*llevent_arp_update_fn)(void *, struct llentry *);
diff --git a/sys/netinet/in.c b/sys/netinet/in.c
index 51bb470..7e9b1ed 100644
--- a/sys/netinet/in.c
+++ b/sys/netinet/in.c
@@ -56,10 +56,12 @@ __FBSDID("$FreeBSD$");
#include <net/route.h>
#include <net/vnet.h>
+#include <netinet/if_ether.h>
#include <netinet/in.h>
#include <netinet/in_var.h>
#include <netinet/in_pcb.h>
#include <netinet/ip_var.h>
+#include <netinet/ip_carp.h>
#include <netinet/igmp_var.h>
#include <netinet/udp.h>
#include <netinet/udp_var.h>
@@ -69,17 +71,15 @@ static void in_len2mask(struct in_addr *, int);
static int in_lifaddr_ioctl(struct socket *, u_long, caddr_t,
struct ifnet *, struct thread *);
-static int in_addprefix(struct in_ifaddr *, int);
-static int in_scrubprefix(struct in_ifaddr *, u_int);
static void in_socktrim(struct sockaddr_in *);
-static int in_ifinit(struct ifnet *,
- struct in_ifaddr *, struct sockaddr_in *, int, int);
+static int in_ifinit(struct ifnet *, struct in_ifaddr *,
+ struct sockaddr_in *, int, int, int);
static void in_purgemaddrs(struct ifnet *);
-static VNET_DEFINE(int, sameprefixcarponly);
-#define V_sameprefixcarponly VNET(sameprefixcarponly)
-SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, same_prefix_carp_only, CTLFLAG_RW,
- &VNET_NAME(sameprefixcarponly), 0,
+static VNET_DEFINE(int, nosameprefix);
+#define V_nosameprefix VNET(nosameprefix)
+SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, no_same_prefix, CTLFLAG_RW,
+ &VNET_NAME(nosameprefix), 0,
"Refuse to create same prefixes on different interfaces");
VNET_DECLARE(struct inpcbinfo, ripcbinfo);
@@ -517,7 +517,7 @@ in_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
case SIOCSIFADDR:
error = in_ifinit(ifp, ia,
- (struct sockaddr_in *) &ifr->ifr_addr, 1, 0);
+ (struct sockaddr_in *) &ifr->ifr_addr, 1, 0, 0);
if (error != 0 && iaIsNew)
break;
if (error == 0) {
@@ -570,7 +570,7 @@ in_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
}
if (hostIsNew || maskIsNew)
error = in_ifinit(ifp, ia, &ifra->ifra_addr, 0,
- maskIsNew);
+ maskIsNew, ifra->ifra_vhid);
if (error != 0 && iaIsNew)
break;
@@ -609,6 +609,9 @@ in_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
panic("in_control: unsupported ioctl");
}
+ if (ia->ia_ifa.ifa_carp)
+ (*carp_detach_p)(&ia->ia_ifa);
+
IF_ADDR_LOCK(ifp);
/* Re-check that ia is still part of the list. */
TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
@@ -843,7 +846,7 @@ in_ifscrub(struct ifnet *ifp, struct in_ifaddr *ia, u_int flags)
*/
static int
in_ifinit(struct ifnet *ifp, struct in_ifaddr *ia, struct sockaddr_in *sin,
- int scrub, int masksupplied)
+ int scrub, int masksupplied, int vhid)
{
register u_long i = ntohl(sin->sin_addr.s_addr);
int flags = RTF_UP, error = 0;
@@ -859,6 +862,15 @@ in_ifinit(struct ifnet *ifp, struct in_ifaddr *ia, struct sockaddr_in *sin,
ia, ia_hash);
IN_IFADDR_WUNLOCK();
+ if (vhid > 0) {
+ if (carp_attach_p != NULL)
+ error = (*carp_attach_p)(&ia->ia_ifa, vhid);
+ else
+ error = EPROTONOSUPPORT;
+ }
+ if (error)
+ return (error);
+
/*
* Give the interface a chance to initialize
* if this is its first address,
@@ -885,11 +897,6 @@ in_ifinit(struct ifnet *ifp, struct in_ifaddr *ia, struct sockaddr_in *sin,
ia->ia_subnet = i & ia->ia_subnetmask;
in_socktrim(&ia->ia_sockmask);
/*
- * XXX: carp(4) does not have interface route
- */
- if (ifp->if_type == IFT_CARP)
- return (0);
- /*
* Add route for the network.
*/
ia->ia_ifa.ifa_metric = ifp->if_metric;
@@ -907,7 +914,7 @@ in_ifinit(struct ifnet *ifp, struct in_ifaddr *ia, struct sockaddr_in *sin,
return (0);
flags |= RTF_HOST;
}
- if ((error = in_addprefix(ia, flags)) != 0)
+ if (!vhid && (error = in_addprefix(ia, flags)) != 0)
return (error);
if (ia->ia_addr.sin_addr.s_addr == INADDR_ANY)
@@ -920,7 +927,7 @@ in_ifinit(struct ifnet *ifp, struct in_ifaddr *ia, struct sockaddr_in *sin,
/*
* add a loopback route to self
*/
- if (V_useloopback && !(ifp->if_flags & IFF_LOOPBACK)) {
+ if (V_useloopback && !vhid && !(ifp->if_flags & IFF_LOOPBACK)) {
struct route ia_ro;
bzero(&ia_ro, sizeof(ia_ro));
@@ -992,7 +999,7 @@ static void in_addralias_rtmsg(int cmd, struct in_addr *prefix,
/*
* Check if we have a route for the given prefix already or add one accordingly.
*/
-static int
+int
in_addprefix(struct in_ifaddr *target, int flags)
{
struct in_ifaddr *ia;
@@ -1038,9 +1045,7 @@ in_addprefix(struct in_ifaddr *target, int flags)
} else
break;
#endif
- if (V_sameprefixcarponly &&
- target->ia_ifp->if_type != IFT_CARP &&
- ia->ia_ifp->if_type != IFT_CARP) {
+ if (V_nosameprefix) {
IN_IFADDR_RUNLOCK();
return (EEXIST);
} else {
@@ -1061,14 +1066,12 @@ in_addprefix(struct in_ifaddr *target, int flags)
return (error);
}
-extern void arp_ifscrub(struct ifnet *ifp, uint32_t addr);
-
/*
* If there is no other address in the system that can serve a route to the
* same prefix, remove the route. Hand over the route to the new address
* otherwise.
*/
-static int
+int
in_scrubprefix(struct in_ifaddr *target, u_int flags)
{
struct in_ifaddr *ia;
@@ -1156,13 +1159,8 @@ in_scrubprefix(struct in_ifaddr *target, u_int flags)
* If we got a matching prefix address, move IFA_ROUTE and
* the route itself to it. Make sure that routing daemons
* get a heads-up.
- *
- * XXX: a special case for carp(4) interface - this should
- * be more generally specified as an interface that
- * doesn't support such action.
*/
- if ((ia->ia_flags & IFA_ROUTE) == 0
- && (ia->ia_ifp->if_type != IFT_CARP)) {
+ if ((ia->ia_flags & IFA_ROUTE) == 0) {
ifa_ref(&ia->ia_ifa);
IN_IFADDR_RUNLOCK();
error = rtinit(&(target->ia_ifa), (int)RTM_DELETE,
@@ -1310,9 +1308,6 @@ in_purgemaddrs(struct ifnet *ifp)
IN_MULTI_UNLOCK();
}
-#include <net/if_dl.h>
-#include <netinet/if_ether.h>
-
struct in_llentry {
struct llentry base;
struct sockaddr_in l3_addr4;
diff --git a/sys/netinet/in_var.h b/sys/netinet/in_var.h
index 3a5e328..dea8393 100644
--- a/sys/netinet/in_var.h
+++ b/sys/netinet/in_var.h
@@ -77,6 +77,7 @@ struct in_aliasreq {
struct sockaddr_in ifra_broadaddr;
#define ifra_dstaddr ifra_broadaddr
struct sockaddr_in ifra_mask;
+ int ifra_vhid;
};
/*
* Given a pointer to an in_ifaddr (ifaddr),
@@ -442,6 +443,8 @@ int in_leavegroup_locked(struct in_multi *,
int in_control(struct socket *, u_long, caddr_t, struct ifnet *,
struct thread *);
void in_rtqdrain(void);
+int in_addprefix(struct in_ifaddr *, int);
+int in_scrubprefix(struct in_ifaddr *, u_int);
void ip_input(struct mbuf *);
int in_ifadown(struct ifaddr *ifa, int);
void in_ifscrub(struct ifnet *, struct in_ifaddr *, u_int);
diff --git a/sys/netinet/ip_carp.c b/sys/netinet/ip_carp.c
index 73024f7..2875537 100644
--- a/sys/netinet/ip_carp.c
+++ b/sys/netinet/ip_carp.c
@@ -1,6 +1,8 @@
-/*
- * Copyright (c) 2002 Michael Shalayeff. All rights reserved.
- * Copyright (c) 2003 Ryan McBride. All rights reserved.
+/*-
+ * Copyright (c) 2002 Michael Shalayeff.
+ * Copyright (c) 2003 Ryan McBride.
+ * Copyright (c) 2011 Gleb Smirnoff <glebius@FreeBSD.org>
+ * All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -31,38 +33,30 @@ __FBSDID("$FreeBSD$");
#include "opt_inet.h"
#include "opt_inet6.h"
-#include <sys/types.h>
#include <sys/param.h>
#include <sys/systm.h>
-#include <sys/conf.h>
+#include <sys/bus.h>
+#include <sys/jail.h>
#include <sys/kernel.h>
#include <sys/limits.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/module.h>
-#include <sys/time.h>
#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
#include <sys/sysctl.h>
#include <sys/syslog.h>
-#include <sys/signalvar.h>
-#include <sys/filio.h>
-#include <sys/sockio.h>
-
-#include <sys/socket.h>
-#include <sys/vnode.h>
-
-#include <machine/stdarg.h>
-#include <net/bpf.h>
#include <net/ethernet.h>
#include <net/fddi.h>
-#include <net/iso88025.h>
#include <net/if.h>
-#include <net/if_clone.h>
#include <net/if_dl.h>
+#include <net/if_llatbl.h>
#include <net/if_types.h>
+#include <net/iso88025.h>
#include <net/route.h>
#include <net/vnet.h>
@@ -71,12 +65,9 @@ __FBSDID("$FreeBSD$");
#include <netinet/in_var.h>
#include <netinet/ip_carp.h>
#include <netinet/ip.h>
-
#include <machine/in_cksum.h>
#endif
-
#ifdef INET
-#include <netinet/in_systm.h>
#include <netinet/ip_var.h>
#include <netinet/if_ether.h>
#endif
@@ -85,65 +76,117 @@ __FBSDID("$FreeBSD$");
#include <netinet/icmp6.h>
#include <netinet/ip6.h>
#include <netinet6/ip6protosw.h>
+#include <netinet6/in6_var.h>
#include <netinet6/ip6_var.h>
#include <netinet6/scope6_var.h>
-#include <netinet6/in6_var.h>
#include <netinet6/nd6.h>
#endif
#include <crypto/sha1.h>
-#define CARP_IFNAME "carp"
-static MALLOC_DEFINE(M_CARP, "CARP", "CARP interfaces");
-SYSCTL_DECL(_net_inet_carp);
+static MALLOC_DEFINE(M_CARP, "CARP", "CARP addresses");
struct carp_softc {
- struct ifnet *sc_ifp; /* Interface clue */
- struct ifnet *sc_carpdev; /* Pointer to parent interface */
- struct in_ifaddr *sc_ia; /* primary iface address */
+ struct ifnet *sc_carpdev; /* Pointer to parent ifnet. */
+ struct ifaddr **sc_ifas; /* Our ifaddrs. */
+ struct sockaddr_dl sc_addr; /* Our link level address. */
+ struct callout sc_ad_tmo; /* Advertising timeout. */
#ifdef INET
- struct ip_moptions sc_imo;
+ struct callout sc_md_tmo; /* Master down timeout. */
#endif
#ifdef INET6
- struct in6_ifaddr *sc_ia6; /* primary iface address v6 */
- struct ip6_moptions sc_im6o;
-#endif /* INET6 */
- TAILQ_ENTRY(carp_softc) sc_list;
-
- enum { INIT = 0, BACKUP, MASTER } sc_state;
+ struct callout sc_md6_tmo; /* XXX: Master down timeout. */
+#endif
+ struct mtx sc_mtx;
- int sc_flags_backup;
- int sc_suppress;
+ int sc_vhid;
+ int sc_advskew;
+ int sc_advbase;
- int sc_sendad_errors;
+ int sc_naddrs;
+ int sc_naddrs6;
+ int sc_ifasiz;
+ enum { INIT = 0, BACKUP, MASTER } sc_state;
+ int sc_suppress;
+ int sc_sendad_errors;
#define CARP_SENDAD_MAX_ERRORS 3
- int sc_sendad_success;
+ int sc_sendad_success;
#define CARP_SENDAD_MIN_SUCCESS 3
- int sc_vhid;
- int sc_advskew;
- int sc_naddrs;
- int sc_naddrs6;
- int sc_advbase; /* seconds */
- int sc_init_counter;
- u_int64_t sc_counter;
+ int sc_init_counter;
+ uint64_t sc_counter;
/* authentication */
-#define CARP_HMAC_PAD 64
+#define CARP_HMAC_PAD 64
unsigned char sc_key[CARP_KEY_LEN];
unsigned char sc_pad[CARP_HMAC_PAD];
SHA1_CTX sc_sha1;
- struct callout sc_ad_tmo; /* advertisement timeout */
- struct callout sc_md_tmo; /* master down timeout */
- struct callout sc_md6_tmo; /* master down timeout */
-
- LIST_ENTRY(carp_softc) sc_next; /* Interface clue */
+ TAILQ_ENTRY(carp_softc) sc_list; /* On the carp_if list. */
+ LIST_ENTRY(carp_softc) sc_next; /* On the global list. */
};
-#define SC2IFP(sc) ((sc)->sc_ifp)
+
+struct carp_if {
+#ifdef INET
+ int cif_naddrs;
+#endif
+#ifdef INET6
+ int cif_naddrs6;
+#endif
+ TAILQ_HEAD(, carp_softc) cif_vrs;
+#ifdef INET
+ struct ip_moptions cif_imo;
+#endif
+#ifdef INET6
+ struct ip6_moptions cif_im6o;
+#endif
+ struct ifnet *cif_ifp;
+ struct mtx cif_mtx;
+};
+
+#define CARP_INET 0
+#define CARP_INET6 1
+static int proto_reg[] = {-1, -1};
+
+/*
+ * Brief design of carp(4).
+ *
+ * Any carp-capable ifnet may have a list of carp softcs hanging off
+ * its ifp->if_carp pointer. Each softc represents one unique virtual
+ * host id, or vhid. The softc has a back pointer to the ifnet. All
+ * softcs are joined in a global list, which has quite limited use.
+ *
+ * Any interface address that takes part in CARP negotiation has a
+ * pointer to the softc of its vhid, ifa->ifa_carp. That could be either
+ * AF_INET or AF_INET6 address.
+ *
+ * Although, one can get the softc's backpointer to ifnet and traverse
+ * through its ifp->if_addrhead queue to find all interface addresses
+ * involved in CARP, we keep a growable array of ifaddr pointers. This
+ * allows us to avoid grabbing the IF_ADDR_LOCK() in many traversals that
+ * do calls into the network stack, thus avoiding LORs.
+ *
+ * Locking:
+ *
+ * Each softc has a lock sc_mtx. It is used to synchronise carp_input_c(),
+ * callout-driven events and ioctl()s.
+ *
+ * To traverse the list of softcs on an ifnet we use CIF_LOCK(), to
+ * traverse the global list we use the mutex carp_mtx.
+ *
+ * Known issues with locking:
+ *
+ * - There is no protection for races between two ioctl() requests,
+ * neither SIOCSVH, nor SIOCAIFADDR & SIOCAIFADDR_IN6. I think that all
+ * interface ioctl()s should be serialized right in net/if.c.
+ * - Sending ad, we put the pointer to the softc in an mtag, and no reference
+ * counting is done on the softc.
+ * - On module unload we may race (?) with packet processing thread
+ * dereferencing our function pointers.
+ */
int carp_suppress_preempt = 0;
-int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, 1, 0, 0 }; /* XXX for now */
+int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, 1, 0, };
SYSCTL_NODE(_net_inet, IPPROTO_CARP, carp, CTLFLAG_RW, 0, "CARP");
SYSCTL_INT(_net_inet_carp, CARPCTL_ALLOW, allow, CTLFLAG_RW,
&carp_opts[CARPCTL_ALLOW], 0, "Accept incoming CARP packets");
@@ -151,8 +194,6 @@ SYSCTL_INT(_net_inet_carp, CARPCTL_PREEMPT, preempt, CTLFLAG_RW,
&carp_opts[CARPCTL_PREEMPT], 0, "high-priority backup preemption mode");
SYSCTL_INT(_net_inet_carp, CARPCTL_LOG, log, CTLFLAG_RW,
&carp_opts[CARPCTL_LOG], 0, "log bad carp packets");
-SYSCTL_INT(_net_inet_carp, CARPCTL_ARPBALANCE, arpbalance, CTLFLAG_RW,
- &carp_opts[CARPCTL_ARPBALANCE], 0, "balance arp responses");
SYSCTL_INT(_net_inet_carp, OID_AUTO, suppress_preempt, CTLFLAG_RD,
&carp_suppress_preempt, 0, "Preemption is suppressed");
@@ -161,36 +202,22 @@ SYSCTL_STRUCT(_net_inet_carp, CARPCTL_STATS, stats, CTLFLAG_RW,
&carpstats, carpstats,
"CARP statistics (struct carpstats, netinet/ip_carp.h)");
-struct carp_if {
- TAILQ_HEAD(, carp_softc) vhif_vrs;
- int vhif_nvrs;
-
- struct ifnet *vhif_ifp;
- struct mtx vhif_mtx;
-};
-
-#define CARP_INET 0
-#define CARP_INET6 1
-static int proto_reg[] = {-1, -1};
-
-/* Get carp_if from softc. Valid after carp_set_addr{,6}. */
-#define SC2CIF(sc) ((struct carp_if *)(sc)->sc_carpdev->if_carp)
-
-/* lock per carp_if queue */
-#define CARP_LOCK_INIT(cif) mtx_init(&(cif)->vhif_mtx, "carp_if", \
+#define CARP_LOCK_INIT(sc) mtx_init(&(sc)->sc_mtx, "carp_softc", \
NULL, MTX_DEF)
-#define CARP_LOCK_DESTROY(cif) mtx_destroy(&(cif)->vhif_mtx)
-#define CARP_LOCK_ASSERT(cif) mtx_assert(&(cif)->vhif_mtx, MA_OWNED)
-#define CARP_LOCK(cif) mtx_lock(&(cif)->vhif_mtx)
-#define CARP_UNLOCK(cif) mtx_unlock(&(cif)->vhif_mtx)
-
-#define CARP_SCLOCK(sc) mtx_lock(&SC2CIF(sc)->vhif_mtx)
-#define CARP_SCUNLOCK(sc) mtx_unlock(&SC2CIF(sc)->vhif_mtx)
-#define CARP_SCLOCK_ASSERT(sc) mtx_assert(&SC2CIF(sc)->vhif_mtx, MA_OWNED)
+#define CARP_LOCK_DESTROY(sc) mtx_destroy(&(sc)->sc_mtx)
+#define CARP_LOCK_ASSERT(sc) mtx_assert(&(sc)->sc_mtx, MA_OWNED)
+#define CARP_LOCK(sc) mtx_lock(&(sc)->sc_mtx)
+#define CARP_UNLOCK(sc) mtx_unlock(&(sc)->sc_mtx)
+#define CIF_LOCK_INIT(cif) mtx_init(&(cif)->cif_mtx, "carp_if", \
+ NULL, MTX_DEF)
+#define CIF_LOCK_DESTROY(cif) mtx_destroy(&(cif)->cif_mtx)
+#define CIF_LOCK_ASSERT(cif) mtx_assert(&(cif)->cif_mtx, MA_OWNED)
+#define CIF_LOCK(cif) mtx_lock(&(cif)->cif_mtx)
+#define CIF_UNLOCK(cif) mtx_unlock(&(cif)->cif_mtx)
#define CARP_LOG(...) do { \
if (carp_opts[CARPCTL_LOG] > 0) \
- log(LOG_INFO, __VA_ARGS__); \
+ log(LOG_INFO, "carp: " __VA_ARGS__); \
} while (0)
#define CARP_DEBUG(...) do { \
@@ -198,58 +225,43 @@ static int proto_reg[] = {-1, -1};
log(LOG_DEBUG, __VA_ARGS__); \
} while (0)
-static void carp_hmac_prepare(struct carp_softc *);
-static void carp_hmac_generate(struct carp_softc *, u_int32_t *,
- unsigned char *);
-static int carp_hmac_verify(struct carp_softc *, u_int32_t *,
- unsigned char *);
-static void carp_setroute(struct carp_softc *, int);
+#define IFNET_FOREACH_IFA(ifp, ifa) \
+ IF_ADDR_LOCK_ASSERT(ifp); \
+ TAILQ_FOREACH((ifa), &(ifp)->if_addrhead, ifa_link) \
+ if ((ifa)->ifa_carp != NULL)
+
+#define CARP_FOREACH_IFA(sc, ifa) \
+ CARP_LOCK_ASSERT(sc); \
+ for (int _i = 0; \
+ _i < (sc)->sc_naddrs + (sc)->sc_naddrs6 && \
+ ((ifa) = sc->sc_ifas[_i]) != NULL; \
+ ++_i)
+
+#define IFNET_FOREACH_CARP(ifp, sc) \
+ CIF_LOCK_ASSERT(ifp->if_carp); \
+ TAILQ_FOREACH((sc), &(ifp)->if_carp->cif_vrs, sc_list)
+
static void carp_input_c(struct mbuf *, struct carp_header *, sa_family_t);
-static int carp_clone_create(struct if_clone *, int, caddr_t);
-static void carp_clone_destroy(struct ifnet *);
-static void carpdetach(struct carp_softc *, int);
-static int carp_prepare_ad(struct mbuf *, struct carp_softc *,
- struct carp_header *);
-static void carp_send_ad_all(void);
-static void carp_send_ad(void *);
-static void carp_send_ad_locked(struct carp_softc *);
-#ifdef INET
-static void carp_send_arp(struct carp_softc *);
-#endif
+static struct carp_softc
+ *carp_alloc(struct ifnet *);
+static void carp_destroy(struct carp_softc *);
+static struct carp_if
+ *carp_alloc_if(struct ifnet *);
+static void carp_free_if(struct carp_if *);
+static void carp_set_state(struct carp_softc *, int);
+static void carp_sc_state(struct carp_softc *);
+static void carp_setrun(struct carp_softc *, sa_family_t);
static void carp_master_down(void *);
static void carp_master_down_locked(struct carp_softc *);
-static int carp_ioctl(struct ifnet *, u_long, caddr_t);
-static int carp_looutput(struct ifnet *, struct mbuf *, struct sockaddr *,
- struct route *);
-static void carp_start(struct ifnet *);
-static void carp_setrun(struct carp_softc *, sa_family_t);
-static void carp_set_state(struct carp_softc *, int);
-#ifdef INET
-static int carp_addrcount(struct carp_if *, struct in_ifaddr *, int);
-#endif
-enum { CARP_COUNT_MASTER, CARP_COUNT_RUNNING };
-
-#ifdef INET
-static void carp_multicast_cleanup(struct carp_softc *, int dofree);
-static int carp_set_addr(struct carp_softc *, struct sockaddr_in *);
-static int carp_del_addr(struct carp_softc *, struct sockaddr_in *);
-#endif
-static void carp_carpdev_state_locked(struct carp_if *);
-static void carp_sc_state_locked(struct carp_softc *);
-#ifdef INET6
-static void carp_send_na(struct carp_softc *);
-static int carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *);
-static int carp_del_addr6(struct carp_softc *, struct sockaddr_in6 *);
-static void carp_multicast6_cleanup(struct carp_softc *, int dofree);
-#endif
+static void carp_send_ad(void *);
+static void carp_send_ad_locked(struct carp_softc *);
+static void carp_addroute(struct carp_softc *);
+static void carp_delroute(struct carp_softc *);
-static LIST_HEAD(, carp_softc) carpif_list;
+static LIST_HEAD(, carp_softc) carp_list;
static struct mtx carp_mtx;
-IFC_SIMPLE_DECLARE(carp, 0);
-
-static eventhandler_tag if_detach_event_tag;
-static __inline u_int16_t
+static __inline uint16_t
carp_cksum(struct mbuf *m, int len)
{
return (in_cksum(m, len));
@@ -258,8 +270,8 @@ carp_cksum(struct mbuf *m, int len)
static void
carp_hmac_prepare(struct carp_softc *sc)
{
- u_int8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT;
- u_int8_t vhid = sc->sc_vhid & 0xff;
+ uint8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT;
+ uint8_t vhid = sc->sc_vhid & 0xff;
struct ifaddr *ifa;
int i, found;
#ifdef INET
@@ -269,18 +281,15 @@ carp_hmac_prepare(struct carp_softc *sc)
struct in6_addr last6, cur6, in6;
#endif
- if (sc->sc_carpdev)
- CARP_SCLOCK(sc);
-
- /* XXX: possible race here */
+ CARP_LOCK_ASSERT(sc);
- /* compute ipad from key */
+ /* Compute ipad from key. */
bzero(sc->sc_pad, sizeof(sc->sc_pad));
bcopy(sc->sc_key, sc->sc_pad, sizeof(sc->sc_key));
for (i = 0; i < sizeof(sc->sc_pad); i++)
sc->sc_pad[i] ^= 0x36;
- /* precompute first part of inner hash */
+ /* Precompute first part of inner hash. */
SHA1Init(&sc->sc_sha1);
SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad));
SHA1Update(&sc->sc_sha1, (void *)&version, sizeof(version));
@@ -292,8 +301,7 @@ carp_hmac_prepare(struct carp_softc *sc)
found = 0;
last = cur;
cur.s_addr = 0xffffffff;
- IF_ADDR_LOCK(SC2IFP(sc));
- TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) {
+ CARP_FOREACH_IFA(sc, ifa) {
in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr;
if (ifa->ifa_addr->sa_family == AF_INET &&
ntohl(in.s_addr) > ntohl(last.s_addr) &&
@@ -302,7 +310,6 @@ carp_hmac_prepare(struct carp_softc *sc)
found++;
}
}
- IF_ADDR_UNLOCK(SC2IFP(sc));
if (found)
SHA1Update(&sc->sc_sha1, (void *)&cur, sizeof(cur));
} while (found);
@@ -313,8 +320,7 @@ carp_hmac_prepare(struct carp_softc *sc)
found = 0;
last6 = cur6;
memset(&cur6, 0xff, sizeof(cur6));
- IF_ADDR_LOCK(SC2IFP(sc));
- TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) {
+ CARP_FOREACH_IFA(sc, ifa) {
in6 = ifatoia6(ifa)->ia_addr.sin6_addr;
if (IN6_IS_SCOPE_EMBED(&in6))
in6.s6_addr16[1] = 0;
@@ -325,7 +331,6 @@ carp_hmac_prepare(struct carp_softc *sc)
found++;
}
}
- IF_ADDR_UNLOCK(SC2IFP(sc));
if (found)
SHA1Update(&sc->sc_sha1, (void *)&cur6, sizeof(cur6));
} while (found);
@@ -334,17 +339,16 @@ carp_hmac_prepare(struct carp_softc *sc)
/* convert ipad to opad */
for (i = 0; i < sizeof(sc->sc_pad); i++)
sc->sc_pad[i] ^= 0x36 ^ 0x5c;
-
- if (sc->sc_carpdev)
- CARP_SCUNLOCK(sc);
}
static void
-carp_hmac_generate(struct carp_softc *sc, u_int32_t counter[2],
+carp_hmac_generate(struct carp_softc *sc, uint32_t counter[2],
unsigned char md[20])
{
SHA1_CTX sha1ctx;
+ CARP_LOCK_ASSERT(sc);
+
/* fetch first half of inner hash */
bcopy(&sc->sc_sha1, &sha1ctx, sizeof(sha1ctx));
@@ -359,203 +363,18 @@ carp_hmac_generate(struct carp_softc *sc, u_int32_t counter[2],
}
static int
-carp_hmac_verify(struct carp_softc *sc, u_int32_t counter[2],
+carp_hmac_verify(struct carp_softc *sc, uint32_t counter[2],
unsigned char md[20])
{
unsigned char md2[20];
- CARP_SCLOCK_ASSERT(sc);
+ CARP_LOCK_ASSERT(sc);
carp_hmac_generate(sc, counter, md2);
return (bcmp(md, md2, sizeof(md2)));
}
-static void
-carp_setroute(struct carp_softc *sc, int cmd)
-{
- struct ifaddr *ifa;
- int s;
-
- if (sc->sc_carpdev)
- CARP_SCLOCK_ASSERT(sc);
-
- s = splnet();
- TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) {
-#ifdef INET
- if (ifa->ifa_addr->sa_family == AF_INET &&
- sc->sc_carpdev != NULL) {
- int count = carp_addrcount(
- (struct carp_if *)sc->sc_carpdev->if_carp,
- ifatoia(ifa), CARP_COUNT_MASTER);
-
- if ((cmd == RTM_ADD && count == 1) ||
- (cmd == RTM_DELETE && count == 0))
- rtinit(ifa, cmd, RTF_UP | RTF_HOST);
- }
-#endif
- }
- splx(s);
-}
-
-static int
-carp_clone_create(struct if_clone *ifc, int unit, caddr_t params)
-{
-
- struct carp_softc *sc;
- struct ifnet *ifp;
-
- sc = malloc(sizeof(*sc), M_CARP, M_WAITOK|M_ZERO);
- ifp = SC2IFP(sc) = if_alloc(IFT_ETHER);
- if (ifp == NULL) {
- free(sc, M_CARP);
- return (ENOSPC);
- }
-
- sc->sc_flags_backup = 0;
- sc->sc_suppress = 0;
- sc->sc_advbase = CARP_DFLTINTV;
- sc->sc_vhid = -1; /* required setting */
- sc->sc_advskew = 0;
- sc->sc_init_counter = 1;
- sc->sc_naddrs = sc->sc_naddrs6 = 0; /* M_ZERO? */
-#ifdef INET
- sc->sc_imo.imo_membership = (struct in_multi **)malloc(
- (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_CARP,
- M_WAITOK);
- sc->sc_imo.imo_mfilters = NULL;
- sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS;
- sc->sc_imo.imo_multicast_vif = -1;
-#endif
-#ifdef INET6
- sc->sc_im6o.im6o_membership = (struct in6_multi **)malloc(
- (sizeof(struct in6_multi *) * IPV6_MIN_MEMBERSHIPS), M_CARP,
- M_WAITOK);
- sc->sc_im6o.im6o_mfilters = NULL;
- sc->sc_im6o.im6o_max_memberships = IPV6_MIN_MEMBERSHIPS;
- sc->sc_im6o.im6o_multicast_hlim = CARP_DFLTTL;
-#endif
-
- callout_init(&sc->sc_ad_tmo, CALLOUT_MPSAFE);
- callout_init(&sc->sc_md_tmo, CALLOUT_MPSAFE);
- callout_init(&sc->sc_md6_tmo, CALLOUT_MPSAFE);
-
- ifp->if_softc = sc;
- if_initname(ifp, CARP_IFNAME, unit);
- ifp->if_mtu = ETHERMTU;
- ifp->if_flags = IFF_LOOPBACK;
- ifp->if_ioctl = carp_ioctl;
- ifp->if_output = carp_looutput;
- ifp->if_start = carp_start;
- ifp->if_type = IFT_CARP;
- ifp->if_snd.ifq_maxlen = ifqmaxlen;
- ifp->if_hdrlen = 0;
- if_attach(ifp);
- bpfattach(SC2IFP(sc), DLT_NULL, sizeof(u_int32_t));
- mtx_lock(&carp_mtx);
- LIST_INSERT_HEAD(&carpif_list, sc, sc_next);
- mtx_unlock(&carp_mtx);
- return (0);
-}
-
-static void
-carp_clone_destroy(struct ifnet *ifp)
-{
- struct carp_softc *sc = ifp->if_softc;
-
- if (sc->sc_carpdev)
- CARP_SCLOCK(sc);
- carpdetach(sc, 1); /* Returns unlocked. */
-
- mtx_lock(&carp_mtx);
- LIST_REMOVE(sc, sc_next);
- mtx_unlock(&carp_mtx);
- bpfdetach(ifp);
- if_detach(ifp);
- if_free(ifp);
-#ifdef INET
- free(sc->sc_imo.imo_membership, M_CARP);
-#endif
-#ifdef INET6
- free(sc->sc_im6o.im6o_membership, M_CARP);
-#endif
- free(sc, M_CARP);
-}
-
-/*
- * This function can be called on CARP interface destroy path,
- * and in case of the removal of the underlying interface as
- * well. We differentiate these two cases: in case of destruction
- * of the underlying interface, we do not cleanup our multicast
- * memberships, since they are already freed. But we purge pointers
- * to multicast structures, since they are no longer valid, to
- * avoid panic in future calls to carpdetach(). Also, we do not
- * release the lock on return, because the function will be
- * called once more, for another CARP instance on the same
- * interface.
- */
-static void
-carpdetach(struct carp_softc *sc, int unlock)
-{
- struct carp_if *cif;
-
- callout_stop(&sc->sc_ad_tmo);
- callout_stop(&sc->sc_md_tmo);
- callout_stop(&sc->sc_md6_tmo);
-
- if (sc->sc_suppress)
- carp_suppress_preempt--;
- sc->sc_suppress = 0;
-
- if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS)
- carp_suppress_preempt--;
- sc->sc_sendad_errors = 0;
-
- carp_set_state(sc, INIT);
- SC2IFP(sc)->if_flags &= ~IFF_UP;
- carp_setrun(sc, 0);
-#ifdef INET
- carp_multicast_cleanup(sc, unlock);
-#endif
-#ifdef INET6
- carp_multicast6_cleanup(sc, unlock);
-#endif
-
- if (sc->sc_carpdev != NULL) {
- cif = (struct carp_if *)sc->sc_carpdev->if_carp;
- CARP_LOCK_ASSERT(cif);
- TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list);
- if (!--cif->vhif_nvrs) {
- ifpromisc(sc->sc_carpdev, 0);
- sc->sc_carpdev->if_carp = NULL;
- CARP_LOCK_DESTROY(cif);
- free(cif, M_CARP);
- } else if (unlock)
- CARP_UNLOCK(cif);
- sc->sc_carpdev = NULL;
- }
-}
-
-/* Detach an interface from the carp. */
-static void
-carp_ifdetach(void *arg __unused, struct ifnet *ifp)
-{
- struct carp_if *cif = (struct carp_if *)ifp->if_carp;
- struct carp_softc *sc, *nextsc;
-
- if (cif == NULL)
- return;
-
- /*
- * XXX: At the end of for() cycle the lock will be destroyed.
- */
- CARP_LOCK(cif);
- for (sc = TAILQ_FIRST(&cif->vhif_vrs); sc; sc = nextsc) {
- nextsc = TAILQ_NEXT(sc, sc_list);
- carpdetach(sc, 0);
- }
-}
-
/*
* process input packet.
* we have rearranged checks order compared to the rfc,
@@ -576,20 +395,10 @@ carp_input(struct mbuf *m, int hlen)
return;
}
- /* check if received on a valid carp interface */
- if (m->m_pkthdr.rcvif->if_carp == NULL) {
- CARPSTATS_INC(carps_badif);
- CARP_DEBUG("carp_input: packet received on non-carp "
- "interface: %s\n",
- m->m_pkthdr.rcvif->if_xname);
- m_freem(m);
- return;
- }
-
/* verify that the IP TTL is 255. */
if (ip->ip_ttl != CARP_DFLTTL) {
CARPSTATS_INC(carps_badttl);
- CARP_DEBUG("carp_input: received ttl %d != 255 on %s\n",
+ CARP_DEBUG("%s: received ttl %d != 255 on %s\n", __func__,
ip->ip_ttl,
m->m_pkthdr.rcvif->if_xname);
m_freem(m);
@@ -600,9 +409,8 @@ carp_input(struct mbuf *m, int hlen)
if (m->m_pkthdr.len < iplen + sizeof(*ch)) {
CARPSTATS_INC(carps_badlen);
- CARP_DEBUG("carp_input: received len %zd < "
- "sizeof(struct carp_header) on %s\n",
- m->m_len - sizeof(struct ip),
+ CARP_DEBUG("%s: received len %zd < sizeof(struct carp_header) "
+ "on %s\n", __func__, m->m_len - sizeof(struct ip),
m->m_pkthdr.rcvif->if_xname);
m_freem(m);
return;
@@ -611,7 +419,7 @@ carp_input(struct mbuf *m, int hlen)
if (iplen + sizeof(*ch) < m->m_len) {
if ((m = m_pullup(m, iplen + sizeof(*ch))) == NULL) {
CARPSTATS_INC(carps_hdrops);
- CARP_DEBUG("carp_input: pullup failed\n");
+ CARP_DEBUG("%s: pullup failed\n", __func__);
return;
}
ip = mtod(m, struct ip *);
@@ -625,7 +433,7 @@ carp_input(struct mbuf *m, int hlen)
len = iplen + sizeof(*ch);
if (len > m->m_pkthdr.len) {
CARPSTATS_INC(carps_badlen);
- CARP_DEBUG("carp_input: packet too short %d on %s\n",
+ CARP_DEBUG("%s: packet too short %d on %s\n", __func__,
m->m_pkthdr.len,
m->m_pkthdr.rcvif->if_xname);
m_freem(m);
@@ -643,7 +451,7 @@ carp_input(struct mbuf *m, int hlen)
m->m_data += iplen;
if (carp_cksum(m, len - iplen)) {
CARPSTATS_INC(carps_badsum);
- CARP_DEBUG("carp_input: checksum failed on %s\n",
+ CARP_DEBUG("%s: checksum failed on %s\n", __func__,
m->m_pkthdr.rcvif->if_xname);
m_freem(m);
return;
@@ -673,9 +481,8 @@ carp6_input(struct mbuf **mp, int *offp, int proto)
/* check if received on a valid carp interface */
if (m->m_pkthdr.rcvif->if_carp == NULL) {
CARPSTATS_INC(carps_badif);
- CARP_DEBUG("carp6_input: packet received on non-carp "
- "interface: %s\n",
- m->m_pkthdr.rcvif->if_xname);
+ CARP_DEBUG("%s: packet received on non-carp interface: %s\n",
+ __func__, m->m_pkthdr.rcvif->if_xname);
m_freem(m);
return (IPPROTO_DONE);
}
@@ -683,9 +490,8 @@ carp6_input(struct mbuf **mp, int *offp, int proto)
/* verify that the IP TTL is 255 */
if (ip6->ip6_hlim != CARP_DFLTTL) {
CARPSTATS_INC(carps_badttl);
- CARP_DEBUG("carp6_input: received ttl %d != 255 on %s\n",
- ip6->ip6_hlim,
- m->m_pkthdr.rcvif->if_xname);
+ CARP_DEBUG("%s: received ttl %d != 255 on %s\n", __func__,
+ ip6->ip6_hlim, m->m_pkthdr.rcvif->if_xname);
m_freem(m);
return (IPPROTO_DONE);
}
@@ -695,7 +501,7 @@ carp6_input(struct mbuf **mp, int *offp, int proto)
IP6_EXTHDR_GET(ch, struct carp_header *, m, *offp, sizeof(*ch));
if (ch == NULL) {
CARPSTATS_INC(carps_badlen);
- CARP_DEBUG("carp6_input: packet size %u too small\n", len);
+ CARP_DEBUG("%s: packet size %u too small\n", __func__, len);
return (IPPROTO_DONE);
}
@@ -704,7 +510,7 @@ carp6_input(struct mbuf **mp, int *offp, int proto)
m->m_data += *offp;
if (carp_cksum(m, sizeof(*ch))) {
CARPSTATS_INC(carps_badsum);
- CARP_DEBUG("carp6_input: checksum failed, on %s\n",
+ CARP_DEBUG("%s: checksum failed, on %s\n", __func__,
m->m_pkthdr.rcvif->if_xname);
m_freem(m);
return (IPPROTO_DONE);
@@ -720,62 +526,46 @@ static void
carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af)
{
struct ifnet *ifp = m->m_pkthdr.rcvif;
+ struct ifaddr *ifa;
struct carp_softc *sc;
- u_int64_t tmp_counter;
+ uint64_t tmp_counter;
struct timeval sc_tv, ch_tv;
/* verify that the VHID is valid on the receiving interface */
- CARP_LOCK(ifp->if_carp);
- TAILQ_FOREACH(sc, &((struct carp_if *)ifp->if_carp)->vhif_vrs, sc_list)
- if (sc->sc_vhid == ch->carp_vhid)
+ IF_ADDR_LOCK(ifp);
+ IFNET_FOREACH_IFA(ifp, ifa)
+ if (ifa->ifa_addr->sa_family == af &&
+ ifa->ifa_carp->sc_vhid == ch->carp_vhid) {
+ ifa_ref(ifa);
break;
+ }
+ IF_ADDR_UNLOCK(ifp);
- if (!sc || !((SC2IFP(sc)->if_flags & IFF_UP) &&
- (SC2IFP(sc)->if_drv_flags & IFF_DRV_RUNNING))) {
+ if (ifa == NULL) {
CARPSTATS_INC(carps_badvhid);
- CARP_UNLOCK(ifp->if_carp);
m_freem(m);
return;
}
- getmicrotime(&SC2IFP(sc)->if_lastchange);
- SC2IFP(sc)->if_ipackets++;
- SC2IFP(sc)->if_ibytes += m->m_pkthdr.len;
-
- if (bpf_peers_present(SC2IFP(sc)->if_bpf)) {
- uint32_t af1 = af;
-#ifdef INET
- struct ip *ip = mtod(m, struct ip *);
-
- /* BPF wants net byte order */
- if (af == AF_INET) {
- ip->ip_len = htons(ip->ip_len + (ip->ip_hl << 2));
- ip->ip_off = htons(ip->ip_off);
- }
-#endif
- bpf_mtap2(SC2IFP(sc)->if_bpf, &af1, sizeof(af1), m);
- }
-
/* verify the CARP version. */
if (ch->carp_version != CARP_VERSION) {
CARPSTATS_INC(carps_badver);
- SC2IFP(sc)->if_ierrors++;
- CARP_UNLOCK(ifp->if_carp);
- CARP_DEBUG("%s; invalid version %d\n",
- SC2IFP(sc)->if_xname,
+ CARP_DEBUG("%s: invalid version %d\n", ifp->if_xname,
ch->carp_version);
+ ifa_free(ifa);
m_freem(m);
return;
}
- /* verify the hash */
+ sc = ifa->ifa_carp;
+ CARP_LOCK(sc);
+ ifa_free(ifa);
+
if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) {
CARPSTATS_INC(carps_badauth);
- SC2IFP(sc)->if_ierrors++;
- CARP_UNLOCK(ifp->if_carp);
- CARP_DEBUG("%s: incorrect hash\n", SC2IFP(sc)->if_xname);
- m_freem(m);
- return;
+ CARP_DEBUG("%s: incorrect hash for VHID %u@%s\n", __func__,
+ sc->sc_vhid, ifp->if_xname);
+ goto out;
}
tmp_counter = ntohl(ch->carp_counter[0]);
@@ -806,12 +596,13 @@ carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af)
if (timevalcmp(&sc_tv, &ch_tv, >) ||
timevalcmp(&sc_tv, &ch_tv, ==)) {
callout_stop(&sc->sc_ad_tmo);
- CARP_LOG("%s: MASTER -> BACKUP "
- "(more frequent advertisement received)\n",
- SC2IFP(sc)->if_xname);
+ CARP_LOG("VHID %u@%s: MASTER -> BACKUP "
+ "(more frequent advertisement received)\n",
+ sc->sc_vhid,
+ sc->sc_carpdev->if_xname);
carp_set_state(sc, BACKUP);
carp_setrun(sc, 0);
- carp_setroute(sc, RTM_DELETE);
+ carp_delroute(sc);
}
break;
case BACKUP:
@@ -821,9 +612,10 @@ carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af)
*/
if (carp_opts[CARPCTL_PREEMPT] &&
timevalcmp(&sc_tv, &ch_tv, <)) {
- CARP_LOG("%s: BACKUP -> MASTER "
+ CARP_LOG("VHID %u@%s: BACKUP -> MASTER "
"(preempting a slower master)\n",
- SC2IFP(sc)->if_xname);
+ sc->sc_vhid,
+ sc->sc_carpdev->if_xname);
carp_master_down_locked(sc);
break;
}
@@ -835,9 +627,10 @@ carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af)
*/
sc_tv.tv_sec = sc->sc_advbase * 3;
if (timevalcmp(&sc_tv, &ch_tv, <)) {
- CARP_LOG("%s: BACKUP -> MASTER "
+ CARP_LOG("VHID %u@%s: BACKUP -> MASTER "
"(master timed out)\n",
- SC2IFP(sc)->if_xname);
+ sc->sc_vhid,
+ sc->sc_carpdev->if_xname);
carp_master_down_locked(sc);
break;
}
@@ -850,17 +643,15 @@ carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af)
break;
}
- CARP_UNLOCK(ifp->if_carp);
-
+out:
+ CARP_UNLOCK(sc);
m_freem(m);
- return;
}
static int
carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch)
{
struct m_tag *mtag;
- struct ifnet *ifp = SC2IFP(sc);
if (sc->sc_init_counter) {
/* this could also be seconds since unix epoch */
@@ -876,35 +667,38 @@ carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch)
carp_hmac_generate(sc, ch->carp_counter, ch->carp_md);
/* Tag packet for carp_output */
- mtag = m_tag_get(PACKET_TAG_CARP, sizeof(struct ifnet *), M_NOWAIT);
- if (mtag == NULL) {
+ if ((mtag = m_tag_get(PACKET_TAG_CARP, sizeof(struct carp_softc *),
+ M_NOWAIT)) == NULL) {
m_freem(m);
- SC2IFP(sc)->if_oerrors++;
+ CARPSTATS_INC(carps_onomem);
return (ENOMEM);
}
- bcopy(&ifp, (caddr_t)(mtag + 1), sizeof(struct ifnet *));
+ bcopy(&sc, (caddr_t)(mtag + 1), sizeof(struct carp_softc *));
m_tag_prepend(m, mtag);
return (0);
}
static void
-carp_send_ad_all(void)
+carp_send_ad_all(struct carp_softc *badsc)
{
struct carp_softc *sc;
+ /*
+ * Avoid LOR and recursive call to carp_send_ad_locked().
+ */
+ CARP_UNLOCK(badsc);
+
mtx_lock(&carp_mtx);
- LIST_FOREACH(sc, &carpif_list, sc_next) {
- if (sc->sc_carpdev == NULL)
- continue;
- CARP_SCLOCK(sc);
- if ((SC2IFP(sc)->if_flags & IFF_UP) &&
- (SC2IFP(sc)->if_drv_flags & IFF_DRV_RUNNING) &&
- sc->sc_state == MASTER)
+ LIST_FOREACH(sc, &carp_list, sc_next)
+ if (sc != badsc && sc->sc_state == MASTER) {
+ CARP_LOCK(sc);
carp_send_ad_locked(sc);
- CARP_SCUNLOCK(sc);
- }
+ CARP_UNLOCK(sc);
+ }
mtx_unlock(&carp_mtx);
+
+ CARP_LOCK(badsc);
}
static void
@@ -912,9 +706,9 @@ carp_send_ad(void *v)
{
struct carp_softc *sc = v;
- CARP_SCLOCK(sc);
+ CARP_LOCK_ASSERT(sc);
carp_send_ad_locked(sc);
- CARP_SCUNLOCK(sc);
+ CARP_UNLOCK(sc);
}
static void
@@ -922,48 +716,42 @@ carp_send_ad_locked(struct carp_softc *sc)
{
struct carp_header ch;
struct timeval tv;
+ struct sockaddr sa;
+ struct ifaddr *ifa;
struct carp_header *ch_ptr;
struct mbuf *m;
- int len, advbase, advskew;
+ int len, advskew;
- CARP_SCLOCK_ASSERT(sc);
+ CARP_LOCK_ASSERT(sc);
- /* bow out if we've lost our UPness or RUNNINGuiness */
- if (!((SC2IFP(sc)->if_flags & IFF_UP) &&
- (SC2IFP(sc)->if_drv_flags & IFF_DRV_RUNNING))) {
- advbase = 255;
- advskew = 255;
- } else {
- advbase = sc->sc_advbase;
- if (!carp_suppress_preempt || sc->sc_advskew > 240)
- advskew = sc->sc_advskew;
- else
- advskew = 240;
- tv.tv_sec = advbase;
- tv.tv_usec = advskew * 1000000 / 256;
- }
+ if (!carp_suppress_preempt || sc->sc_advskew > 240)
+ advskew = sc->sc_advskew;
+ else
+ advskew = 240;
+ tv.tv_sec = sc->sc_advbase;
+ tv.tv_usec = advskew * 1000000 / 256;
ch.carp_version = CARP_VERSION;
ch.carp_type = CARP_ADVERTISEMENT;
ch.carp_vhid = sc->sc_vhid;
- ch.carp_advbase = advbase;
+ ch.carp_advbase = sc->sc_advbase;
ch.carp_advskew = advskew;
ch.carp_authlen = 7; /* XXX DEFINE */
ch.carp_pad1 = 0; /* must be zero */
ch.carp_cksum = 0;
+ /* XXXGL: OpenBSD picks first ifaddr with needed family. */
+
#ifdef INET
- if (sc->sc_ia) {
+ if (sc->sc_naddrs) {
struct ip *ip;
- MGETHDR(m, M_DONTWAIT, MT_HEADER);
+ MGETHDR(m, M_NOWAIT, MT_HEADER);
if (m == NULL) {
- SC2IFP(sc)->if_oerrors++;
CARPSTATS_INC(carps_onomem);
/* XXX maybe less ? */
- if (advbase != 255 || advskew != 255)
- callout_reset(&sc->sc_ad_tmo, tvtohz(&tv),
- carp_send_ad, sc);
+ callout_reset(&sc->sc_ad_tmo, tvtohz(&tv),
+ carp_send_ad, sc);
return;
}
len = sizeof(*ip) + sizeof(ch);
@@ -982,7 +770,16 @@ carp_send_ad_locked(struct carp_softc *sc)
ip->ip_ttl = CARP_DFLTTL;
ip->ip_p = IPPROTO_CARP;
ip->ip_sum = 0;
- ip->ip_src.s_addr = sc->sc_ia->ia_addr.sin_addr.s_addr;
+
+ bzero(&sa, sizeof(sa));
+ sa.sa_family = AF_INET;
+ ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev);
+ if (ifa != NULL) {
+ ip->ip_src.s_addr =
+ ifatoia(ifa)->ia_addr.sin_addr.s_addr;
+ ifa_free(ifa);
+ } else
+ ip->ip_src.s_addr = 0;
ip->ip_dst.s_addr = htonl(INADDR_CARP_GROUP);
ch_ptr = (struct carp_header *)(&ip[1]);
@@ -994,22 +791,16 @@ carp_send_ad_locked(struct carp_softc *sc)
ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip));
m->m_data -= sizeof(*ip);
- getmicrotime(&SC2IFP(sc)->if_lastchange);
- SC2IFP(sc)->if_opackets++;
- SC2IFP(sc)->if_obytes += len;
CARPSTATS_INC(carps_opackets);
- if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL)) {
- SC2IFP(sc)->if_oerrors++;
+ if (ip_output(m, NULL, NULL, IP_RAWOUTPUT,
+ &sc->sc_carpdev->if_carp->cif_imo, NULL)) {
if (sc->sc_sendad_errors < INT_MAX)
sc->sc_sendad_errors++;
if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
carp_suppress_preempt++;
- if (carp_suppress_preempt == 1) {
- CARP_SCUNLOCK(sc);
- carp_send_ad_all();
- CARP_SCLOCK(sc);
- }
+ if (carp_suppress_preempt == 1)
+ carp_send_ad_all(sc);
}
sc->sc_sendad_success = 0;
} else {
@@ -1025,17 +816,15 @@ carp_send_ad_locked(struct carp_softc *sc)
}
#endif /* INET */
#ifdef INET6
- if (sc->sc_ia6) {
+ if (sc->sc_naddrs6) {
struct ip6_hdr *ip6;
- MGETHDR(m, M_DONTWAIT, MT_HEADER);
+ MGETHDR(m, M_NOWAIT, MT_HEADER);
if (m == NULL) {
- SC2IFP(sc)->if_oerrors++;
CARPSTATS_INC(carps_onomem);
/* XXX maybe less ? */
- if (advbase != 255 || advskew != 255)
- callout_reset(&sc->sc_ad_tmo, tvtohz(&tv),
- carp_send_ad, sc);
+ callout_reset(&sc->sc_ad_tmo, tvtohz(&tv),
+ carp_send_ad, sc);
return;
}
len = sizeof(*ip6) + sizeof(ch);
@@ -1049,14 +838,23 @@ carp_send_ad_locked(struct carp_softc *sc)
ip6->ip6_vfc |= IPV6_VERSION;
ip6->ip6_hlim = CARP_DFLTTL;
ip6->ip6_nxt = IPPROTO_CARP;
- bcopy(&sc->sc_ia6->ia_addr.sin6_addr, &ip6->ip6_src,
- sizeof(struct in6_addr));
- /* set the multicast destination */
+ bzero(&sa, sizeof(sa));
+
+ /* set the source address */
+ sa.sa_family = AF_INET6;
+ ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev);
+ if (ifa != NULL) {
+ bcopy(IFA_IN6(ifa), &ip6->ip6_src,
+ sizeof(struct in6_addr));
+ ifa_free(ifa);
+ } else
+ /* This should never happen with IPv6. */
+ bzero(&ip6->ip6_src, sizeof(struct in6_addr));
+ /* Set the multicast destination. */
ip6->ip6_dst.s6_addr16[0] = htons(0xff02);
ip6->ip6_dst.s6_addr8[15] = 0x12;
if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) {
- SC2IFP(sc)->if_oerrors++;
m_freem(m);
CARP_DEBUG("%s: in6_setscope failed\n", __func__);
return;
@@ -1071,22 +869,16 @@ carp_send_ad_locked(struct carp_softc *sc)
ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip6));
m->m_data -= sizeof(*ip6);
- getmicrotime(&SC2IFP(sc)->if_lastchange);
- SC2IFP(sc)->if_opackets++;
- SC2IFP(sc)->if_obytes += len;
CARPSTATS_INC(carps_opackets6);
- if (ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL, NULL)) {
- SC2IFP(sc)->if_oerrors++;
+ if (ip6_output(m, NULL, NULL, 0,
+ &sc->sc_carpdev->if_carp->cif_im6o, NULL, NULL)) {
if (sc->sc_sendad_errors < INT_MAX)
sc->sc_sendad_errors++;
if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
carp_suppress_preempt++;
- if (carp_suppress_preempt == 1) {
- CARP_SCUNLOCK(sc);
- carp_send_ad_all();
- CARP_SCLOCK(sc);
- }
+ if (carp_suppress_preempt == 1)
+ carp_send_ad_all(sc);
}
sc->sc_sendad_success = 0;
} else {
@@ -1102,10 +894,55 @@ carp_send_ad_locked(struct carp_softc *sc)
}
#endif /* INET6 */
- if (advbase != 255 || advskew != 255)
- callout_reset(&sc->sc_ad_tmo, tvtohz(&tv),
- carp_send_ad, sc);
+ callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), carp_send_ad, sc);
+}
+
+static void
+carp_addroute(struct carp_softc *sc)
+{
+ struct ifaddr *ifa;
+
+ CARP_FOREACH_IFA(sc, ifa)
+ switch (ifa->ifa_addr->sa_family) {
+#ifdef INET
+ case AF_INET:
+ in_addprefix(ifatoia(ifa), RTF_UP);
+ ifa_add_loopback_route(ifa,
+ (struct sockaddr *)&ifatoia(ifa)->ia_addr);
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ ifa_add_loopback_route(ifa,
+ (struct sockaddr *)&ifatoia6(ifa)->ia_addr);
+ in6_ifaddloop(ifa);
+ break;
+#endif
+ }
+}
+
+static void
+carp_delroute(struct carp_softc *sc)
+{
+ struct ifaddr *ifa;
+ CARP_FOREACH_IFA(sc, ifa)
+ switch (ifa->ifa_addr->sa_family) {
+#ifdef INET
+ case AF_INET:
+ ifa_del_loopback_route(ifa,
+ (struct sockaddr *)&ifatoia(ifa)->ia_addr);
+ in_scrubprefix(ifatoia(ifa), LLE_STATIC);
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ ifa_del_loopback_route(ifa,
+ (struct sockaddr *)&ifatoia6(ifa)->ia_addr);
+ in6_ifremloop(ifa);
+ break;
+#endif
+ }
}
#ifdef INET
@@ -1119,16 +956,22 @@ carp_send_arp(struct carp_softc *sc)
{
struct ifaddr *ifa;
- TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) {
-
- if (ifa->ifa_addr->sa_family != AF_INET)
- continue;
+ CARP_FOREACH_IFA(sc, ifa)
+ if (ifa->ifa_addr->sa_family == AF_INET)
+ arp_ifinit2(sc->sc_carpdev, ifa, LLADDR(&sc->sc_addr));
+}
-/* arprequest(sc->sc_carpdev, &in, &in, IF_LLADDR(sc->sc_ifp)); */
- arp_ifinit2(sc->sc_carpdev, ifa, IF_LLADDR(sc->sc_ifp));
+int
+carp_iamatch(struct ifaddr *ifa, uint8_t **enaddr)
+{
+ struct carp_softc *sc = ifa->ifa_carp;
- DELAY(1000); /* XXX */
+ if (sc->sc_state == MASTER) {
+ *enaddr = LLADDR(&sc->sc_addr);
+ return (1);
}
+
+ return (0);
}
#endif
@@ -1136,226 +979,95 @@ carp_send_arp(struct carp_softc *sc)
static void
carp_send_na(struct carp_softc *sc)
{
+ static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
struct ifaddr *ifa;
struct in6_addr *in6;
- static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
-
- TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) {
+ CARP_FOREACH_IFA(sc, ifa) {
if (ifa->ifa_addr->sa_family != AF_INET6)
continue;
- in6 = &ifatoia6(ifa)->ia_addr.sin6_addr;
+ in6 = IFA_IN6(ifa);
nd6_na_output(sc->sc_carpdev, &mcast, in6,
ND_NA_FLAG_OVERRIDE, 1, NULL);
DELAY(1000); /* XXX */
}
}
-#endif /* INET6 */
-
-#ifdef INET
-static int
-carp_addrcount(struct carp_if *cif, struct in_ifaddr *ia, int type)
-{
- struct carp_softc *vh;
- struct ifaddr *ifa;
- int count = 0;
-
- CARP_LOCK_ASSERT(cif);
-
- TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
- if ((type == CARP_COUNT_RUNNING &&
- (SC2IFP(vh)->if_flags & IFF_UP) &&
- (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING)) ||
- (type == CARP_COUNT_MASTER && vh->sc_state == MASTER)) {
- IF_ADDR_LOCK(SC2IFP(vh));
- TAILQ_FOREACH(ifa, &SC2IFP(vh)->if_addrlist,
- ifa_list) {
- if (ifa->ifa_addr->sa_family == AF_INET &&
- ia->ia_addr.sin_addr.s_addr ==
- ifatoia(ifa)->ia_addr.sin_addr.s_addr)
- count++;
- }
- IF_ADDR_UNLOCK(SC2IFP(vh));
- }
- }
- return (count);
-}
-
-int
-carp_iamatch(struct ifnet *ifp, struct in_ifaddr *ia,
- struct in_addr *isaddr, u_int8_t **enaddr)
-{
- struct carp_if *cif;
- struct carp_softc *vh;
- int index, count = 0;
- struct ifaddr *ifa;
-
- cif = ifp->if_carp;
- CARP_LOCK(cif);
-
- if (carp_opts[CARPCTL_ARPBALANCE]) {
- /*
- * XXX proof of concept implementation.
- * We use the source ip to decide which virtual host should
- * handle the request. If we're master of that virtual host,
- * then we respond, otherwise, just drop the arp packet on
- * the floor.
- */
- count = carp_addrcount(cif, ia, CARP_COUNT_RUNNING);
- if (count == 0) {
- /* should never reach this */
- CARP_UNLOCK(cif);
- return (0);
- }
-
- /* this should be a hash, like pf_hash() */
- index = ntohl(isaddr->s_addr) % count;
- count = 0;
-
- TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
- if ((SC2IFP(vh)->if_flags & IFF_UP) &&
- (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING)) {
- IF_ADDR_LOCK(SC2IFP(vh));
- TAILQ_FOREACH(ifa, &SC2IFP(vh)->if_addrlist,
- ifa_list) {
- if (ifa->ifa_addr->sa_family ==
- AF_INET &&
- ia->ia_addr.sin_addr.s_addr ==
- ifatoia(ifa)->ia_addr.sin_addr.s_addr) {
- if (count == index) {
- if (vh->sc_state ==
- MASTER) {
- *enaddr = IF_LLADDR(vh->sc_ifp);
- IF_ADDR_UNLOCK(SC2IFP(vh));
- CARP_UNLOCK(cif);
- return (1);
- } else {
- IF_ADDR_UNLOCK(SC2IFP(vh));
- CARP_UNLOCK(cif);
- return (0);
- }
- }
- count++;
- }
- }
- IF_ADDR_UNLOCK(SC2IFP(vh));
- }
- }
- } else {
- TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
- if ((SC2IFP(vh)->if_flags & IFF_UP) &&
- (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING) &&
- ia->ia_ifp == SC2IFP(vh) &&
- vh->sc_state == MASTER) {
- *enaddr = IF_LLADDR(vh->sc_ifp);
- CARP_UNLOCK(cif);
- return (1);
- }
- }
- }
- CARP_UNLOCK(cif);
- return (0);
-}
-#endif
-#ifdef INET6
struct ifaddr *
carp_iamatch6(struct ifnet *ifp, struct in6_addr *taddr)
{
- struct carp_if *cif;
- struct carp_softc *vh;
struct ifaddr *ifa;
- cif = ifp->if_carp;
- CARP_LOCK(cif);
- TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
- IF_ADDR_LOCK(SC2IFP(vh));
- TAILQ_FOREACH(ifa, &SC2IFP(vh)->if_addrlist, ifa_list) {
- if (IN6_ARE_ADDR_EQUAL(taddr,
- &ifatoia6(ifa)->ia_addr.sin6_addr) &&
- (SC2IFP(vh)->if_flags & IFF_UP) &&
- (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING) &&
- vh->sc_state == MASTER) {
- ifa_ref(ifa);
- IF_ADDR_UNLOCK(SC2IFP(vh));
- CARP_UNLOCK(cif);
- return (ifa);
- }
+ IF_ADDR_LOCK(ifp);
+ IFNET_FOREACH_IFA(ifp, ifa)
+ if (ifa->ifa_addr->sa_family == AF_INET6 &&
+ ifa->ifa_carp->sc_state == MASTER &&
+ IN6_ARE_ADDR_EQUAL(taddr, IFA_IN6(ifa))) {
+ ifa_ref(ifa);
+ IF_ADDR_UNLOCK(ifp);
+ return (ifa);
}
- IF_ADDR_UNLOCK(SC2IFP(vh));
- }
- CARP_UNLOCK(cif);
-
+ IF_ADDR_UNLOCK(ifp);
+
return (NULL);
}
caddr_t
carp_macmatch6(struct ifnet *ifp, struct mbuf *m, const struct in6_addr *taddr)
{
- struct m_tag *mtag;
- struct carp_if *cif;
- struct carp_softc *sc;
struct ifaddr *ifa;
- cif = ifp->if_carp;
- CARP_LOCK(cif);
- TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) {
- IF_ADDR_LOCK(SC2IFP(sc));
- TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) {
- if (IN6_ARE_ADDR_EQUAL(taddr,
- &ifatoia6(ifa)->ia_addr.sin6_addr) &&
- (SC2IFP(sc)->if_flags & IFF_UP) &&
- (SC2IFP(sc)->if_drv_flags & IFF_DRV_RUNNING)) {
- struct ifnet *ifp = SC2IFP(sc);
- mtag = m_tag_get(PACKET_TAG_CARP,
- sizeof(struct ifnet *), M_NOWAIT);
- if (mtag == NULL) {
- /* better a bit than nothing */
- IF_ADDR_UNLOCK(SC2IFP(sc));
- CARP_UNLOCK(cif);
- return (IF_LLADDR(sc->sc_ifp));
- }
- bcopy(&ifp, (caddr_t)(mtag + 1),
- sizeof(struct ifnet *));
- m_tag_prepend(m, mtag);
+ IF_ADDR_LOCK(ifp);
+ IFNET_FOREACH_IFA(ifp, ifa)
+ if (ifa->ifa_addr->sa_family == AF_INET6 &&
+ IN6_ARE_ADDR_EQUAL(taddr, IFA_IN6(ifa))) {
+ struct carp_softc *sc = ifa->ifa_carp;
+ struct m_tag *mtag;
- IF_ADDR_UNLOCK(SC2IFP(sc));
- CARP_UNLOCK(cif);
- return (IF_LLADDR(sc->sc_ifp));
- }
+ IF_ADDR_UNLOCK(ifp);
+
+ mtag = m_tag_get(PACKET_TAG_CARP,
+ sizeof(struct ifnet *), M_NOWAIT);
+ if (mtag == NULL)
+ /* Better a bit than nothing. */
+ return (LLADDR(&sc->sc_addr));
+
+ bcopy(&ifp, (caddr_t)(mtag + 1),
+ sizeof(struct ifnet *));
+ m_tag_prepend(m, mtag);
+
+ return (LLADDR(&sc->sc_addr));
}
- IF_ADDR_UNLOCK(SC2IFP(sc));
- }
- CARP_UNLOCK(cif);
+ IF_ADDR_UNLOCK(ifp);
return (NULL);
}
-#endif
+#endif /* INET6 */
-struct ifnet *
+int
carp_forus(struct ifnet *ifp, u_char *dhost)
{
- struct carp_if *cif;
- struct carp_softc *vh;
- u_int8_t *ena = dhost;
+ struct carp_softc *sc;
+ uint8_t *ena = dhost;
if (ena[0] || ena[1] || ena[2] != 0x5e || ena[3] || ena[4] != 1)
- return (NULL);
-
- cif = ifp->if_carp;
- CARP_LOCK(cif);
- TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list)
- if ((SC2IFP(vh)->if_flags & IFF_UP) &&
- (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING) &&
- vh->sc_state == MASTER &&
- !bcmp(dhost, IF_LLADDR(vh->sc_ifp), ETHER_ADDR_LEN)) {
- CARP_UNLOCK(cif);
- return (SC2IFP(vh));
+ return (0);
+
+ CIF_LOCK(ifp->if_carp);
+ IFNET_FOREACH_CARP(ifp, sc) {
+ CARP_LOCK(sc);
+ if (sc->sc_state == MASTER && !bcmp(dhost, LLADDR(&sc->sc_addr),
+ ETHER_ADDR_LEN)) {
+ CARP_UNLOCK(sc);
+ CIF_UNLOCK(ifp->if_carp);
+ return (1);
}
+ CARP_UNLOCK(sc);
+ }
+ CIF_UNLOCK(ifp->if_carp);
- CARP_UNLOCK(cif);
- return (NULL);
+ return (0);
}
static void
@@ -1363,24 +1075,25 @@ carp_master_down(void *v)
{
struct carp_softc *sc = v;
- CARP_SCLOCK(sc);
- carp_master_down_locked(sc);
- CARP_SCUNLOCK(sc);
+ CARP_LOCK_ASSERT(sc);
+
+ if (sc->sc_state == BACKUP) {
+ CARP_LOG("VHID %u@%s: BACKUP -> MASTER (preempting)\n",
+ sc->sc_vhid,
+ sc->sc_carpdev->if_xname);
+ carp_master_down_locked(sc);
+ }
+
+ CARP_UNLOCK(sc);
}
static void
carp_master_down_locked(struct carp_softc *sc)
{
- if (sc->sc_carpdev)
- CARP_SCLOCK_ASSERT(sc);
+
+ CARP_LOCK_ASSERT(sc);
switch (sc->sc_state) {
- case INIT:
- printf("%s: master_down event in INIT state\n",
- SC2IFP(sc)->if_xname);
- break;
- case MASTER:
- break;
case BACKUP:
carp_set_state(sc, MASTER);
carp_send_ad_locked(sc);
@@ -1389,9 +1102,18 @@ carp_master_down_locked(struct carp_softc *sc)
#endif
#ifdef INET6
carp_send_na(sc);
-#endif /* INET6 */
+#endif
carp_setrun(sc, 0);
- carp_setroute(sc, RTM_ADD);
+ carp_addroute(sc);
+ break;
+ case INIT:
+ case MASTER:
+#ifdef INVARIANTS
+ panic("carp: VHID %u@%s: master_down event in %s state\n",
+ sc->sc_vhid,
+ sc->sc_carpdev->if_xname,
+ sc->sc_state ? "MASTER" : "INIT");
+#endif
break;
}
}
@@ -1405,28 +1127,19 @@ carp_setrun(struct carp_softc *sc, sa_family_t af)
{
struct timeval tv;
- if (sc->sc_carpdev == NULL) {
- SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING;
- carp_set_state(sc, INIT);
- return;
- } else
- CARP_SCLOCK_ASSERT(sc);
-
- if (SC2IFP(sc)->if_flags & IFF_UP &&
- sc->sc_vhid > 0 && (sc->sc_naddrs || sc->sc_naddrs6) &&
- sc->sc_carpdev->if_link_state == LINK_STATE_UP)
- SC2IFP(sc)->if_drv_flags |= IFF_DRV_RUNNING;
- else {
- SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING;
- carp_setroute(sc, RTM_DELETE);
+ CARP_LOCK_ASSERT(sc);
+
+ if ((sc->sc_carpdev->if_flags & IFF_UP) == 0 ||
+ sc->sc_carpdev->if_link_state != LINK_STATE_UP ||
+ (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0))
return;
- }
switch (sc->sc_state) {
case INIT:
- CARP_LOG("%s: INIT -> BACKUP\n", SC2IFP(sc)->if_xname);
+ CARP_LOG("VHID %u@%s: INIT -> BACKUP\n",
+ sc->sc_vhid,
+ sc->sc_carpdev->if_xname);
carp_set_state(sc, BACKUP);
- carp_setroute(sc, RTM_DELETE);
carp_setrun(sc, 0);
break;
case BACKUP:
@@ -1439,20 +1152,24 @@ carp_setrun(struct carp_softc *sc, sa_family_t af)
callout_reset(&sc->sc_md_tmo, tvtohz(&tv),
carp_master_down, sc);
break;
-#endif /* INET */
+#endif
#ifdef INET6
case AF_INET6:
callout_reset(&sc->sc_md6_tmo, tvtohz(&tv),
carp_master_down, sc);
break;
-#endif /* INET6 */
+#endif
default:
+#ifdef INET
if (sc->sc_naddrs)
callout_reset(&sc->sc_md_tmo, tvtohz(&tv),
carp_master_down, sc);
+#endif
+#ifdef INET6
if (sc->sc_naddrs6)
callout_reset(&sc->sc_md6_tmo, tvtohz(&tv),
carp_master_down, sc);
+#endif
break;
}
break;
@@ -1465,546 +1182,473 @@ carp_setrun(struct carp_softc *sc, sa_family_t af)
}
}
-#ifdef INET
-static void
-carp_multicast_cleanup(struct carp_softc *sc, int dofree)
+/*
+ * Setup multicast structures.
+ */
+static int
+carp_multicast_setup(struct carp_softc *sc, sa_family_t sa)
{
- struct ip_moptions *imo = &sc->sc_imo;
- u_int16_t n = imo->imo_num_memberships;
-
- /* Clean up our own multicast memberships */
- while (n-- > 0) {
- if (imo->imo_membership[n] != NULL) {
- if (dofree)
- in_delmulti(imo->imo_membership[n]);
- imo->imo_membership[n] = NULL;
+ struct ifnet *ifp = sc->sc_carpdev;
+ struct carp_if *cif = ifp->if_carp;
+ int error = 0;
+
+ switch (sa) {
+#ifdef INET
+ case AF_INET:
+ {
+ struct ip_moptions *imo = &cif->cif_imo;
+ struct in_addr addr;
+
+ if (imo->imo_membership)
+ return (0);
+
+ imo->imo_membership = (struct in_multi **)malloc(
+ (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_CARP,
+ M_WAITOK);
+ imo->imo_mfilters = NULL;
+ imo->imo_max_memberships = IP_MIN_MEMBERSHIPS;
+ imo->imo_multicast_vif = -1;
+
+ addr.s_addr = htonl(INADDR_CARP_GROUP);
+ if ((error = in_joingroup(ifp, &addr, NULL,
+ &imo->imo_membership[0])) != 0) {
+ free(imo->imo_membership, M_CARP);
+ break;
+ }
+ imo->imo_num_memberships++;
+ imo->imo_multicast_ifp = ifp;
+ imo->imo_multicast_ttl = CARP_DFLTTL;
+ imo->imo_multicast_loop = 0;
+ break;
+ }
+#endif
+#ifdef INET6
+ case AF_INET6:
+ {
+ struct ip6_moptions *im6o = &cif->cif_im6o;
+ struct in6_addr in6;
+ struct in6_multi *in6m;
+
+ if (im6o->im6o_membership)
+ return (0);
+
+ im6o->im6o_membership = (struct in6_multi **)malloc(
+ (sizeof(struct in6_multi *) * IPV6_MIN_MEMBERSHIPS), M_CARP,
+ M_ZERO|M_WAITOK);
+ im6o->im6o_mfilters = NULL;
+ im6o->im6o_max_memberships = IPV6_MIN_MEMBERSHIPS;
+ im6o->im6o_multicast_hlim = CARP_DFLTTL;
+ im6o->im6o_multicast_ifp = ifp;
+
+ /* Join IPv6 CARP multicast group. */
+ bzero(&in6, sizeof(in6));
+ in6.s6_addr16[0] = htons(0xff02);
+ in6.s6_addr8[15] = 0x12;
+ if ((error = in6_setscope(&in6, ifp, NULL)) != 0) {
+ free(im6o->im6o_membership, M_CARP);
+ break;
+ }
+ in6m = NULL;
+ if ((error = in6_mc_join(ifp, &in6, NULL, &in6m, 0)) != 0) {
+ free(im6o->im6o_membership, M_CARP);
+ break;
}
+ im6o->im6o_membership[0] = in6m;
+ im6o->im6o_num_memberships++;
+
+ /* Join solicited multicast address. */
+ bzero(&in6, sizeof(in6));
+ in6.s6_addr16[0] = htons(0xff02);
+ in6.s6_addr32[1] = 0;
+ in6.s6_addr32[2] = htonl(1);
+ in6.s6_addr32[3] = 0;
+ in6.s6_addr8[12] = 0xff;
+ if ((error = in6_setscope(&in6, ifp, NULL)) != 0) {
+ in6_mc_leave(im6o->im6o_membership[0], NULL);
+ free(im6o->im6o_membership, M_CARP);
+ break;
+ }
+ in6m = NULL;
+ if ((error = in6_mc_join(ifp, &in6, NULL, &in6m, 0)) != 0) {
+ in6_mc_leave(im6o->im6o_membership[0], NULL);
+ free(im6o->im6o_membership, M_CARP);
+ break;
+ }
+ im6o->im6o_membership[1] = in6m;
+ im6o->im6o_num_memberships++;
+ break;
+ }
+#endif
}
- KASSERT(imo->imo_mfilters == NULL,
- ("%s: imo_mfilters != NULL", __func__));
- imo->imo_num_memberships = 0;
- imo->imo_multicast_ifp = NULL;
+
+ return (error);
}
-#endif
-#ifdef INET6
+/*
+ * Free multicast structures.
+ */
static void
-carp_multicast6_cleanup(struct carp_softc *sc, int dofree)
+carp_multicast_cleanup(struct carp_softc *sc, sa_family_t sa)
{
- struct ip6_moptions *im6o = &sc->sc_im6o;
- u_int16_t n = im6o->im6o_num_memberships;
-
- while (n-- > 0) {
- if (im6o->im6o_membership[n] != NULL) {
- if (dofree)
- in6_mc_leave(im6o->im6o_membership[n], NULL);
- im6o->im6o_membership[n] = NULL;
+ struct ifnet *ifp = sc->sc_carpdev;
+ struct carp_if *cif = ifp->if_carp;
+
+ switch (sa) {
+#ifdef INET
+ case AF_INET:
+ if (sc->sc_naddrs == 0) {
+ struct ip_moptions *imo = &cif->cif_imo;
+
+ in_leavegroup(imo->imo_membership[0], NULL);
+ KASSERT(imo->imo_mfilters == NULL,
+ ("%s: imo_mfilters != NULL", __func__));
+ free(imo->imo_membership, M_CARP);
+ imo->imo_membership = NULL;
+
+ }
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ if (sc->sc_naddrs6 == 0) {
+ struct ip6_moptions *im6o = &cif->cif_im6o;
+
+ in6_mc_leave(im6o->im6o_membership[0], NULL);
+ in6_mc_leave(im6o->im6o_membership[1], NULL);
+ KASSERT(im6o->im6o_mfilters == NULL,
+ ("%s: im6o_mfilters != NULL", __func__));
+ free(im6o->im6o_membership, M_CARP);
+ im6o->im6o_membership = NULL;
}
+ break;
+#endif
}
- KASSERT(im6o->im6o_mfilters == NULL,
- ("%s: im6o_mfilters != NULL", __func__));
- im6o->im6o_num_memberships = 0;
- im6o->im6o_multicast_ifp = NULL;
}
-#endif
-#ifdef INET
-static int
-carp_set_addr(struct carp_softc *sc, struct sockaddr_in *sin)
+int
+carp_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa)
{
- struct ifnet *ifp;
- struct carp_if *cif;
- struct in_ifaddr *ia, *ia_if;
- struct ip_moptions *imo = &sc->sc_imo;
- struct in_addr addr;
- u_long iaddr = htonl(sin->sin_addr.s_addr);
- int own, error;
-
- if (sin->sin_addr.s_addr == 0) {
- if (!(SC2IFP(sc)->if_flags & IFF_UP))
- carp_set_state(sc, INIT);
- if (sc->sc_naddrs)
- SC2IFP(sc)->if_flags |= IFF_UP;
- if (sc->sc_carpdev)
- CARP_SCLOCK(sc);
- carp_setrun(sc, 0);
- if (sc->sc_carpdev)
- CARP_SCUNLOCK(sc);
- return (0);
- }
+ struct m_tag *mtag;
+ struct carp_softc *sc;
- /* we have to do it by hands to check we won't match on us */
- ia_if = NULL; own = 0;
- IN_IFADDR_RLOCK();
- TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
- /* and, yeah, we need a multicast-capable iface too */
- if (ia->ia_ifp != SC2IFP(sc) &&
- (ia->ia_ifp->if_flags & IFF_MULTICAST) &&
- (iaddr & ia->ia_subnetmask) == ia->ia_subnet) {
- if (!ia_if)
- ia_if = ia;
- if (sin->sin_addr.s_addr ==
- ia->ia_addr.sin_addr.s_addr)
- own++;
- }
- }
+ if (!sa)
+ return (0);
- if (!ia_if) {
- IN_IFADDR_RUNLOCK();
- return (EADDRNOTAVAIL);
+ switch (sa->sa_family) {
+#ifdef INET
+ case AF_INET:
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ break;
+#endif
+ default:
+ return (0);
}
- ia = ia_if;
- ifa_ref(&ia->ia_ifa);
- IN_IFADDR_RUNLOCK();
+ mtag = m_tag_find(m, PACKET_TAG_CARP, NULL);
+ if (mtag == NULL)
+ return (0);
- ifp = ia->ia_ifp;
+ bcopy(mtag + 1, &sc, sizeof(struct carp_softc *));
- if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0 ||
- (imo->imo_multicast_ifp && imo->imo_multicast_ifp != ifp)) {
- ifa_free(&ia->ia_ifa);
- return (EADDRNOTAVAIL);
- }
+ /* Set the source MAC address to the Virtual Router MAC Address. */
+ switch (ifp->if_type) {
+ case IFT_ETHER:
+ case IFT_L2VLAN: {
+ struct ether_header *eh;
- if (imo->imo_num_memberships == 0) {
- addr.s_addr = htonl(INADDR_CARP_GROUP);
- if ((imo->imo_membership[0] = in_addmulti(&addr, ifp)) ==
- NULL) {
- ifa_free(&ia->ia_ifa);
- return (ENOBUFS);
+ eh = mtod(m, struct ether_header *);
+ eh->ether_shost[0] = 0;
+ eh->ether_shost[1] = 0;
+ eh->ether_shost[2] = 0x5e;
+ eh->ether_shost[3] = 0;
+ eh->ether_shost[4] = 1;
+ eh->ether_shost[5] = sc->sc_vhid;
}
- imo->imo_num_memberships++;
- imo->imo_multicast_ifp = ifp;
- imo->imo_multicast_ttl = CARP_DFLTTL;
- imo->imo_multicast_loop = 0;
- }
-
- if (!ifp->if_carp) {
+ break;
+ case IFT_FDDI: {
+ struct fddi_header *fh;
- cif = malloc(sizeof(*cif), M_CARP,
- M_WAITOK|M_ZERO);
- if (!cif) {
- error = ENOBUFS;
- goto cleanup;
+ fh = mtod(m, struct fddi_header *);
+ fh->fddi_shost[0] = 0;
+ fh->fddi_shost[1] = 0;
+ fh->fddi_shost[2] = 0x5e;
+ fh->fddi_shost[3] = 0;
+ fh->fddi_shost[4] = 1;
+ fh->fddi_shost[5] = sc->sc_vhid;
}
- if ((error = ifpromisc(ifp, 1))) {
- free(cif, M_CARP);
- goto cleanup;
+ break;
+ case IFT_ISO88025: {
+ struct iso88025_header *th;
+ th = mtod(m, struct iso88025_header *);
+ th->iso88025_shost[0] = 3;
+ th->iso88025_shost[1] = 0;
+ th->iso88025_shost[2] = 0x40 >> (sc->sc_vhid - 1);
+ th->iso88025_shost[3] = 0x40000 >> (sc->sc_vhid - 1);
+ th->iso88025_shost[4] = 0;
+ th->iso88025_shost[5] = 0;
}
-
- CARP_LOCK_INIT(cif);
- CARP_LOCK(cif);
- cif->vhif_ifp = ifp;
- TAILQ_INIT(&cif->vhif_vrs);
- ifp->if_carp = cif;
-
- } else {
- struct carp_softc *vr;
-
- cif = (struct carp_if *)ifp->if_carp;
- CARP_LOCK(cif);
- TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list)
- if (vr != sc && vr->sc_vhid == sc->sc_vhid) {
- CARP_UNLOCK(cif);
- error = EEXIST;
- goto cleanup;
- }
+ break;
+ default:
+ printf("%s: carp is not supported for the %d interface type\n",
+ ifp->if_xname, ifp->if_type);
+ return (EOPNOTSUPP);
}
- sc->sc_ia = ia;
- sc->sc_carpdev = ifp;
- { /* XXX prevent endless loop if already in queue */
- struct carp_softc *vr, *after = NULL;
- int myself = 0;
- cif = (struct carp_if *)ifp->if_carp;
+ return (0);
+}
- /* XXX: cif should not change, right? So we still hold the lock */
- CARP_LOCK_ASSERT(cif);
+static struct carp_softc*
+carp_alloc(struct ifnet *ifp)
+{
+ struct carp_softc *sc;
+ struct carp_if *cif;
- TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
- if (vr == sc)
- myself = 1;
- if (vr->sc_vhid < sc->sc_vhid)
- after = vr;
+ if ((cif = ifp->if_carp) == NULL) {
+ cif = carp_alloc_if(ifp);
+ if (cif == NULL)
+ return (NULL);
}
- if (!myself) {
- /* We're trying to keep things in order */
- if (after == NULL) {
- TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list);
- } else {
- TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list);
- }
- cif->vhif_nvrs++;
- }
- }
+ sc = malloc(sizeof(*sc), M_CARP, M_WAITOK|M_ZERO);
- sc->sc_naddrs++;
- SC2IFP(sc)->if_flags |= IFF_UP;
- if (own)
- sc->sc_advskew = 0;
- carp_sc_state_locked(sc);
- carp_setrun(sc, 0);
+ sc->sc_advbase = CARP_DFLTINTV;
+ sc->sc_vhid = -1; /* required setting */
+ sc->sc_init_counter = 1;
+ sc->sc_state = INIT;
- CARP_UNLOCK(cif);
- ifa_free(&ia->ia_ifa); /* XXXRW: should hold reference for softc. */
+ sc->sc_ifasiz = sizeof(struct ifaddr *);
+ sc->sc_ifas = malloc(sc->sc_ifasiz, M_CARP, M_WAITOK|M_ZERO);
+ sc->sc_carpdev = ifp;
- return (0);
+ CARP_LOCK_INIT(sc);
+#ifdef INET
+ callout_init_mtx(&sc->sc_md_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED);
+#endif
+#ifdef INET6
+ callout_init_mtx(&sc->sc_md6_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED);
+#endif
+ callout_init_mtx(&sc->sc_ad_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED);
-cleanup:
- in_delmulti(imo->imo_membership[--imo->imo_num_memberships]);
- ifa_free(&ia->ia_ifa);
- return (error);
+ CIF_LOCK(cif);
+ TAILQ_INSERT_TAIL(&cif->cif_vrs, sc, sc_list);
+ CIF_UNLOCK(cif);
+
+ mtx_lock(&carp_mtx);
+ LIST_INSERT_HEAD(&carp_list, sc, sc_next);
+ mtx_unlock(&carp_mtx);
+
+ return (sc);
}
static int
-carp_del_addr(struct carp_softc *sc, struct sockaddr_in *sin)
+carp_grow_ifas(struct carp_softc *sc)
{
- int error = 0;
+ struct ifaddr **new;
- if (!--sc->sc_naddrs) {
- struct carp_if *cif = (struct carp_if *)sc->sc_carpdev->if_carp;
- struct ip_moptions *imo = &sc->sc_imo;
+ CARP_LOCK_ASSERT(sc);
- CARP_LOCK(cif);
- callout_stop(&sc->sc_ad_tmo);
- SC2IFP(sc)->if_flags &= ~IFF_UP;
- SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING;
- sc->sc_vhid = -1;
- in_delmulti(imo->imo_membership[--imo->imo_num_memberships]);
- imo->imo_multicast_ifp = NULL;
- TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list);
- if (!--cif->vhif_nvrs) {
- sc->sc_carpdev->if_carp = NULL;
- CARP_LOCK_DESTROY(cif);
- free(cif, M_CARP);
- } else {
- CARP_UNLOCK(cif);
- }
- }
+ new = malloc(sc->sc_ifasiz * 2, M_CARP, M_NOWAIT|M_ZERO);
+ if (new == NULL)
+ return (ENOMEM);
+ bcopy(sc->sc_ifas, new, sc->sc_ifasiz);
+ free(sc->sc_ifas, M_CARP);
+ sc->sc_ifas = new;
+ sc->sc_ifasiz *= 2;
- return (error);
+ return (0);
}
-#endif
-#ifdef INET6
-static int
-carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6)
+static void
+carp_destroy(struct carp_softc *sc)
{
- struct ifnet *ifp;
- struct carp_if *cif;
- struct in6_ifaddr *ia, *ia_if;
- struct ip6_moptions *im6o = &sc->sc_im6o;
- struct in6_addr in6;
- int own, error;
-
- error = 0;
-
- if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
- if (!(SC2IFP(sc)->if_flags & IFF_UP))
- carp_set_state(sc, INIT);
- if (sc->sc_naddrs6)
- SC2IFP(sc)->if_flags |= IFF_UP;
- if (sc->sc_carpdev)
- CARP_SCLOCK(sc);
- carp_setrun(sc, 0);
- if (sc->sc_carpdev)
- CARP_SCUNLOCK(sc);
- return (0);
- }
+ struct ifnet *ifp = sc->sc_carpdev;
+ struct carp_if *cif = ifp->if_carp;
- /* we have to do it by hands to check we won't match on us */
- ia_if = NULL; own = 0;
- IN6_IFADDR_RLOCK();
- TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) {
- int i;
-
- for (i = 0; i < 4; i++) {
- if ((sin6->sin6_addr.s6_addr32[i] &
- ia->ia_prefixmask.sin6_addr.s6_addr32[i]) !=
- (ia->ia_addr.sin6_addr.s6_addr32[i] &
- ia->ia_prefixmask.sin6_addr.s6_addr32[i]))
- break;
- }
- /* and, yeah, we need a multicast-capable iface too */
- if (ia->ia_ifp != SC2IFP(sc) &&
- (ia->ia_ifp->if_flags & IFF_MULTICAST) &&
- (i == 4)) {
- if (!ia_if)
- ia_if = ia;
- if (IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr,
- &ia->ia_addr.sin6_addr))
- own++;
- }
- }
+ CIF_LOCK(cif);
+ TAILQ_REMOVE(&cif->cif_vrs, sc, sc_list);
+ if (TAILQ_EMPTY(&cif->cif_vrs))
+ carp_free_if(cif);
+ else
+ CIF_UNLOCK(cif);
- if (!ia_if) {
- IN6_IFADDR_RUNLOCK();
- return (EADDRNOTAVAIL);
- }
- ia = ia_if;
- ifa_ref(&ia->ia_ifa);
- IN6_IFADDR_RUNLOCK();
- ifp = ia->ia_ifp;
-
- if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0 ||
- (im6o->im6o_multicast_ifp && im6o->im6o_multicast_ifp != ifp)) {
- ifa_free(&ia->ia_ifa);
- return (EADDRNOTAVAIL);
- }
+ mtx_lock(&carp_mtx);
+ LIST_REMOVE(sc, sc_next);
+ mtx_unlock(&carp_mtx);
- if (!sc->sc_naddrs6) {
- struct in6_multi *in6m;
+ CARP_LOCK(sc);
+ callout_drain(&sc->sc_ad_tmo);
+#ifdef INET
+ callout_drain(&sc->sc_md_tmo);
+#endif
+#ifdef INET6
+ callout_drain(&sc->sc_md6_tmo);
+#endif
+ CARP_LOCK_DESTROY(sc);
- im6o->im6o_multicast_ifp = ifp;
+ free(sc->sc_ifas, M_CARP);
+ free(sc, M_CARP);
+}
- /* join CARP multicast address */
- bzero(&in6, sizeof(in6));
- in6.s6_addr16[0] = htons(0xff02);
- in6.s6_addr8[15] = 0x12;
- if (in6_setscope(&in6, ifp, NULL) != 0)
- goto cleanup;
- in6m = NULL;
- error = in6_mc_join(ifp, &in6, NULL, &in6m, 0);
- if (error)
- goto cleanup;
- im6o->im6o_membership[0] = in6m;
- im6o->im6o_num_memberships++;
+static struct carp_if*
+carp_alloc_if(struct ifnet *ifp)
+{
+ struct carp_if *cif;
- /* join solicited multicast address */
- bzero(&in6, sizeof(in6));
- in6.s6_addr16[0] = htons(0xff02);
- in6.s6_addr32[1] = 0;
- in6.s6_addr32[2] = htonl(1);
- in6.s6_addr32[3] = sin6->sin6_addr.s6_addr32[3];
- in6.s6_addr8[12] = 0xff;
- if (in6_setscope(&in6, ifp, NULL) != 0)
- goto cleanup;
- in6m = NULL;
- error = in6_mc_join(ifp, &in6, NULL, &in6m, 0);
- if (error)
- goto cleanup;
- im6o->im6o_membership[1] = in6m;
- im6o->im6o_num_memberships++;
- }
+ cif = malloc(sizeof(*cif), M_CARP, M_WAITOK|M_ZERO);
- if (!ifp->if_carp) {
- cif = malloc(sizeof(*cif), M_CARP,
- M_WAITOK|M_ZERO);
- if (!cif) {
- error = ENOBUFS;
- goto cleanup;
- }
- if ((error = ifpromisc(ifp, 1))) {
- free(cif, M_CARP);
- goto cleanup;
- }
+ if (ifpromisc(ifp, 1) != 0)
+ goto cleanup;
- CARP_LOCK_INIT(cif);
- CARP_LOCK(cif);
- cif->vhif_ifp = ifp;
- TAILQ_INIT(&cif->vhif_vrs);
- ifp->if_carp = cif;
+ CIF_LOCK_INIT(cif);
+ cif->cif_ifp = ifp;
+ TAILQ_INIT(&cif->cif_vrs);
- } else {
- struct carp_softc *vr;
+ IF_ADDR_LOCK(ifp);
+ ifp->if_carp = cif;
+ if_ref(ifp);
+ IF_ADDR_UNLOCK(ifp);
- cif = (struct carp_if *)ifp->if_carp;
- CARP_LOCK(cif);
- TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list)
- if (vr != sc && vr->sc_vhid == sc->sc_vhid) {
- CARP_UNLOCK(cif);
- error = EINVAL;
- goto cleanup;
- }
- }
- sc->sc_ia6 = ia;
- sc->sc_carpdev = ifp;
+ return (cif);
- { /* XXX prevent endless loop if already in queue */
- struct carp_softc *vr, *after = NULL;
- int myself = 0;
- cif = (struct carp_if *)ifp->if_carp;
- CARP_LOCK_ASSERT(cif);
-
- TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
- if (vr == sc)
- myself = 1;
- if (vr->sc_vhid < sc->sc_vhid)
- after = vr;
- }
+cleanup:
+ free(cif, M_CARP);
- if (!myself) {
- /* We're trying to keep things in order */
- if (after == NULL) {
- TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list);
- } else {
- TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list);
- }
- cif->vhif_nvrs++;
- }
- }
+ return (NULL);
+}
+
+static void
+carp_free_if(struct carp_if *cif)
+{
+ struct ifnet *ifp = cif->cif_ifp;
- sc->sc_naddrs6++;
- SC2IFP(sc)->if_flags |= IFF_UP;
- if (own)
- sc->sc_advskew = 0;
- carp_sc_state_locked(sc);
- carp_setrun(sc, 0);
+ CIF_LOCK_ASSERT(cif);
+ KASSERT(TAILQ_EMPTY(&cif->cif_vrs), ("%s: softc list not empty",
+ __func__));
- CARP_UNLOCK(cif);
- ifa_free(&ia->ia_ifa); /* XXXRW: should hold reference for softc. */
+ IF_ADDR_LOCK(ifp);
+ ifp->if_carp = NULL;
+ if_rele(ifp);
+ IF_ADDR_UNLOCK(ifp);
- return (0);
+ CIF_LOCK_DESTROY(cif);
-cleanup:
- if (!sc->sc_naddrs6)
- carp_multicast6_cleanup(sc, 1);
- ifa_free(&ia->ia_ifa);
- return (error);
+ ifpromisc(ifp, 0);
+
+ free(cif, M_CARP);
}
-static int
-carp_del_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6)
+static void
+carp_carprcp(struct carpreq *carpr, struct carp_softc *sc, int priv)
{
- int error = 0;
-
- if (!--sc->sc_naddrs6) {
- struct carp_if *cif = (struct carp_if *)sc->sc_carpdev->if_carp;
-
- CARP_LOCK(cif);
- callout_stop(&sc->sc_ad_tmo);
- SC2IFP(sc)->if_flags &= ~IFF_UP;
- SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING;
- sc->sc_vhid = -1;
- carp_multicast6_cleanup(sc, 1);
- TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list);
- if (!--cif->vhif_nvrs) {
- CARP_LOCK_DESTROY(cif);
- sc->sc_carpdev->if_carp = NULL;
- free(cif, M_CARP);
- } else
- CARP_UNLOCK(cif);
- }
- return (error);
+ CARP_LOCK(sc);
+ carpr->carpr_state = sc->sc_state;
+ carpr->carpr_vhid = sc->sc_vhid;
+ carpr->carpr_advbase = sc->sc_advbase;
+ carpr->carpr_advskew = sc->sc_advskew;
+ if (priv)
+ bcopy(sc->sc_key, carpr->carpr_key, sizeof(carpr->carpr_key));
+ else
+ bzero(carpr->carpr_key, sizeof(carpr->carpr_key));
+ CARP_UNLOCK(sc);
}
-#endif /* INET6 */
-static int
-carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr)
+int
+carp_ioctl(struct ifreq *ifr, u_long cmd, struct thread *td)
{
- struct carp_softc *sc = ifp->if_softc, *vr;
struct carpreq carpr;
- struct ifaddr *ifa;
- struct ifreq *ifr;
- struct ifaliasreq *ifra;
- int locked = 0, error = 0;
+ struct ifnet *ifp;
+ struct carp_softc *sc = NULL;
+ int error = 0, locked = 0;
- ifa = (struct ifaddr *)addr;
- ifra = (struct ifaliasreq *)addr;
- ifr = (struct ifreq *)addr;
+ if ((error = copyin(ifr->ifr_data, &carpr, sizeof carpr)))
+ return (error);
- switch (cmd) {
- case SIOCSIFADDR:
- switch (ifa->ifa_addr->sa_family) {
-#ifdef INET
- case AF_INET:
- SC2IFP(sc)->if_flags |= IFF_UP;
- bcopy(ifa->ifa_addr, ifa->ifa_dstaddr,
- sizeof(struct sockaddr));
- error = carp_set_addr(sc, satosin(ifa->ifa_addr));
- break;
-#endif /* INET */
-#ifdef INET6
- case AF_INET6:
- SC2IFP(sc)->if_flags |= IFF_UP;
- error = carp_set_addr6(sc, satosin6(ifa->ifa_addr));
- break;
-#endif /* INET6 */
- default:
- error = EAFNOSUPPORT;
- break;
- }
+ ifp = ifunit_ref(ifr->ifr_name);
+ if (ifp == NULL)
+ return (ENXIO);
+
+ switch (ifp->if_type) {
+ case IFT_ETHER:
+ case IFT_L2VLAN:
+ case IFT_FDDI:
+ case IFT_ISO88025:
break;
+ default:
+ error = EOPNOTSUPP;
+ goto out;
+ }
- case SIOCAIFADDR:
- switch (ifa->ifa_addr->sa_family) {
-#ifdef INET
- case AF_INET:
- SC2IFP(sc)->if_flags |= IFF_UP;
- bcopy(ifa->ifa_addr, ifa->ifa_dstaddr,
- sizeof(struct sockaddr));
- error = carp_set_addr(sc, satosin(&ifra->ifra_addr));
- break;
-#endif /* INET */
-#ifdef INET6
- case AF_INET6:
- SC2IFP(sc)->if_flags |= IFF_UP;
- error = carp_set_addr6(sc, satosin6(&ifra->ifra_addr));
+ if ((ifp->if_flags & IFF_MULTICAST) == 0) {
+ error = EADDRNOTAVAIL;
+ goto out;
+ }
+
+ switch (cmd) {
+ case SIOCSVH:
+ if ((error = priv_check(td, PRIV_NETINET_CARP)))
break;
-#endif /* INET6 */
- default:
- error = EAFNOSUPPORT;
+ if (carpr.carpr_vhid <= 0 || carpr.carpr_vhid > CARP_MAXVHID ||
+ carpr.carpr_advbase < 0 || carpr.carpr_advskew < 0) {
+ error = EINVAL;
break;
}
- break;
- case SIOCDIFADDR:
- switch (ifa->ifa_addr->sa_family) {
-#ifdef INET
- case AF_INET:
- error = carp_del_addr(sc, satosin(&ifra->ifra_addr));
- break;
-#endif /* INET */
-#ifdef INET6
- case AF_INET6:
- error = carp_del_addr6(sc, satosin6(&ifra->ifra_addr));
- break;
-#endif /* INET6 */
- default:
- error = EAFNOSUPPORT;
- break;
+ if (ifp->if_carp) {
+ CIF_LOCK(ifp->if_carp);
+ IFNET_FOREACH_CARP(ifp, sc)
+ if (sc->sc_vhid == carpr.carpr_vhid)
+ break;
+ CIF_UNLOCK(ifp->if_carp);
}
- break;
+ if (sc == NULL) {
+ sc = carp_alloc(ifp);
+ if (sc == NULL) {
+ error = EINVAL; /* XXX: ifpromisc failed */
+ break;
+ }
- case SIOCSIFFLAGS:
- if (sc->sc_carpdev) {
- locked = 1;
- CARP_SCLOCK(sc);
+ CARP_LOCK(sc);
+ sc->sc_vhid = carpr.carpr_vhid;
+ LLADDR(&sc->sc_addr)[0] = 0;
+ LLADDR(&sc->sc_addr)[1] = 0;
+ LLADDR(&sc->sc_addr)[2] = 0x5e;
+ LLADDR(&sc->sc_addr)[3] = 0;
+ LLADDR(&sc->sc_addr)[4] = 1;
+ LLADDR(&sc->sc_addr)[5] = sc->sc_vhid;
+ } else
+ CARP_LOCK(sc);
+ locked = 1;
+ if (carpr.carpr_advbase > 0) {
+ if (carpr.carpr_advbase > 255 ||
+ carpr.carpr_advbase < CARP_DFLTINTV) {
+ error = EINVAL;
+ break;
+ }
+ sc->sc_advbase = carpr.carpr_advbase;
}
- if (sc->sc_state != INIT && !(ifr->ifr_flags & IFF_UP)) {
- callout_stop(&sc->sc_ad_tmo);
- callout_stop(&sc->sc_md_tmo);
- callout_stop(&sc->sc_md6_tmo);
- if (sc->sc_state == MASTER)
- carp_send_ad_locked(sc);
- carp_set_state(sc, INIT);
- carp_setrun(sc, 0);
- } else if (sc->sc_state == INIT && (ifr->ifr_flags & IFF_UP)) {
- SC2IFP(sc)->if_flags |= IFF_UP;
- carp_setrun(sc, 0);
+ if (carpr.carpr_advskew > 0) {
+ if (carpr.carpr_advskew >= 255) {
+ error = EINVAL;
+ break;
+ }
+ sc->sc_advskew = carpr.carpr_advskew;
}
- break;
-
- case SIOCSVH:
- error = priv_check(curthread, PRIV_NETINET_CARP);
- if (error)
- break;
- if ((error = copyin(ifr->ifr_data, &carpr, sizeof carpr)))
- break;
- error = 1;
- if (sc->sc_carpdev) {
- locked = 1;
- CARP_SCLOCK(sc);
+ if (carpr.carpr_key[0] != '\0') {
+ bcopy(carpr.carpr_key, sc->sc_key, sizeof(sc->sc_key));
+ carp_hmac_prepare(sc);
}
- if (sc->sc_state != INIT && carpr.carpr_state != sc->sc_state) {
+ if (sc->sc_state != INIT &&
+ carpr.carpr_state != sc->sc_state) {
switch (carpr.carpr_state) {
case BACKUP:
callout_stop(&sc->sc_ad_tmo);
carp_set_state(sc, BACKUP);
carp_setrun(sc, 0);
- carp_setroute(sc, RTM_DELETE);
+ carp_delroute(sc);
break;
case MASTER:
carp_master_down_locked(sc);
@@ -2013,296 +1657,279 @@ carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr)
break;
}
}
- if (carpr.carpr_vhid > 0) {
- if (carpr.carpr_vhid > 255) {
- error = EINVAL;
- break;
- }
- if (sc->sc_carpdev) {
- struct carp_if *cif;
- cif = (struct carp_if *)sc->sc_carpdev->if_carp;
- TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list)
- if (vr != sc &&
- vr->sc_vhid == carpr.carpr_vhid) {
- error = EEXIST;
- break;
- }
- if (error == EEXIST)
- break;
- }
- sc->sc_vhid = carpr.carpr_vhid;
- IF_LLADDR(sc->sc_ifp)[0] = 0;
- IF_LLADDR(sc->sc_ifp)[1] = 0;
- IF_LLADDR(sc->sc_ifp)[2] = 0x5e;
- IF_LLADDR(sc->sc_ifp)[3] = 0;
- IF_LLADDR(sc->sc_ifp)[4] = 1;
- IF_LLADDR(sc->sc_ifp)[5] = sc->sc_vhid;
- error--;
+ break;
+
+ case SIOCGVH:
+ {
+ int priveleged;
+
+ if (carpr.carpr_vhid < 0 || carpr.carpr_vhid > CARP_MAXVHID) {
+ error = EINVAL;
+ break;
}
- if (carpr.carpr_advbase > 0 || carpr.carpr_advskew > 0) {
- if (carpr.carpr_advskew >= 255) {
- error = EINVAL;
+ if (carpr.carpr_count < 1) {
+ error = EMSGSIZE;
+ break;
+ }
+ if (ifp->if_carp == NULL) {
+ error = ENOENT;
+ break;
+ }
+
+ priveleged = (priv_check(td, PRIV_NETINET_CARP) == 0);
+ if (carpr.carpr_vhid != 0) {
+ CIF_LOCK(ifp->if_carp);
+ IFNET_FOREACH_CARP(ifp, sc)
+ if (sc->sc_vhid == carpr.carpr_vhid)
+ break;
+ CIF_UNLOCK(ifp->if_carp);
+ if (sc == NULL) {
+ error = ENOENT;
break;
}
- if (carpr.carpr_advbase > 255) {
- error = EINVAL;
+ carp_carprcp(&carpr, sc, priveleged);
+ error = copyout(&carpr, ifr->ifr_data, sizeof(carpr));
+ } else {
+ int i, count;
+
+ count = 0;
+ CIF_LOCK(ifp->if_carp);
+ IFNET_FOREACH_CARP(ifp, sc)
+ count++;
+
+ if (count > carpr.carpr_count) {
+ CIF_UNLOCK(ifp->if_carp);
+ error = EMSGSIZE;
break;
}
- sc->sc_advbase = carpr.carpr_advbase;
- sc->sc_advskew = carpr.carpr_advskew;
- error--;
- }
- bcopy(carpr.carpr_key, sc->sc_key, sizeof(sc->sc_key));
- if (error > 0)
- error = EINVAL;
- else {
- error = 0;
- carp_setrun(sc, 0);
- }
- break;
- case SIOCGVH:
- /* XXX: lockless read */
- bzero(&carpr, sizeof(carpr));
- carpr.carpr_state = sc->sc_state;
- carpr.carpr_vhid = sc->sc_vhid;
- carpr.carpr_advbase = sc->sc_advbase;
- carpr.carpr_advskew = sc->sc_advskew;
- error = priv_check(curthread, PRIV_NETINET_CARP);
- if (error == 0)
- bcopy(sc->sc_key, carpr.carpr_key,
- sizeof(carpr.carpr_key));
- error = copyout(&carpr, ifr->ifr_data, sizeof(carpr));
+ i = 0;
+ IFNET_FOREACH_CARP(ifp, sc) {
+ carp_carprcp(&carpr, sc, priveleged);
+ carpr.carpr_count = count;
+ error = copyout(&carpr, ifr->ifr_data +
+ (i * sizeof(carpr)), sizeof(carpr));
+ if (error) {
+ CIF_UNLOCK(ifp->if_carp);
+ break;
+ }
+ i++;
+ }
+ CIF_UNLOCK(ifp->if_carp);
+ }
break;
-
+ }
default:
error = EINVAL;
}
+out:
if (locked)
- CARP_SCUNLOCK(sc);
-
- carp_hmac_prepare(sc);
+ CARP_UNLOCK(sc);
+ if_rele(ifp);
return (error);
}
-/*
- * XXX: this is looutput. We should eventually use it from there.
- */
static int
-carp_looutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
- struct route *ro)
+carp_get_vhid(struct ifaddr *ifa)
{
- u_int32_t af;
- struct rtentry *rt = NULL;
- M_ASSERTPKTHDR(m); /* check if we have the packet header */
-
- if (ro != NULL)
- rt = ro->ro_rt;
- if (rt && rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
- m_freem(m);
- return (rt->rt_flags & RTF_BLACKHOLE ? 0 :
- rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH);
- }
-
- ifp->if_opackets++;
- ifp->if_obytes += m->m_pkthdr.len;
-
- /* BPF writes need to be handled specially. */
- if (dst->sa_family == AF_UNSPEC) {
- bcopy(dst->sa_data, &af, sizeof(af));
- dst->sa_family = af;
- }
+ if (ifa == NULL || ifa->ifa_carp == NULL)
+ return (0);
-#if 1 /* XXX */
- switch (dst->sa_family) {
- case AF_INET:
- case AF_INET6:
- case AF_IPX:
- case AF_APPLETALK:
- break;
- default:
- printf("carp_looutput: af=%d unexpected\n", dst->sa_family);
- m_freem(m);
- return (EAFNOSUPPORT);
- }
-#endif
- return(if_simloop(ifp, m, dst->sa_family, 0));
-}
-
-/*
- * Start output on carp interface. This function should never be called.
- */
-static void
-carp_start(struct ifnet *ifp)
-{
-#ifdef DEBUG
- printf("%s: start called\n", ifp->if_xname);
-#endif
+ return (ifa->ifa_carp->sc_vhid);
}
int
-carp_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa,
- struct rtentry *rt)
+carp_attach(struct ifaddr *ifa, int vhid)
{
- struct m_tag *mtag;
+ struct ifnet *ifp = ifa->ifa_ifp;
struct carp_softc *sc;
- struct ifnet *carp_ifp;
+ int index, error;
- if (!sa)
- return (0);
+ if (ifp->if_carp == NULL)
+ return (ENOPROTOOPT);
- switch (sa->sa_family) {
+ switch (ifa->ifa_addr->sa_family) {
#ifdef INET
case AF_INET:
- break;
-#endif /* INET */
+#endif
#ifdef INET6
case AF_INET6:
+#endif
break;
-#endif /* INET6 */
default:
- return (0);
+ return (EPROTOTYPE);
}
- mtag = m_tag_find(m, PACKET_TAG_CARP, NULL);
- if (mtag == NULL)
- return (0);
+ CIF_LOCK(ifp->if_carp);
+ IFNET_FOREACH_CARP(ifp, sc)
+ if (sc->sc_vhid == vhid)
+ break;
+ CIF_UNLOCK(ifp->if_carp);
+ if (sc == NULL)
+ return (ENOENT);
- bcopy(mtag + 1, &carp_ifp, sizeof(struct ifnet *));
- sc = carp_ifp->if_softc;
+ if (ifa->ifa_carp) {
+ if (ifa->ifa_carp->sc_vhid != vhid)
+ carp_detach(ifa);
+ else
+ return (0);
+ }
- /* Set the source MAC address to Virtual Router MAC Address */
- switch (ifp->if_type) {
- case IFT_ETHER:
- case IFT_L2VLAN: {
- struct ether_header *eh;
+ error = carp_multicast_setup(sc, ifa->ifa_addr->sa_family);
+ if (error)
+ return (error);
- eh = mtod(m, struct ether_header *);
- eh->ether_shost[0] = 0;
- eh->ether_shost[1] = 0;
- eh->ether_shost[2] = 0x5e;
- eh->ether_shost[3] = 0;
- eh->ether_shost[4] = 1;
- eh->ether_shost[5] = sc->sc_vhid;
+ CARP_LOCK(sc);
+ index = sc->sc_naddrs + sc->sc_naddrs6 + 1;
+ if (index > sc->sc_ifasiz / sizeof(struct ifaddr *))
+ if ((error = carp_grow_ifas(sc)) != 0) {
+ carp_multicast_cleanup(sc,
+ ifa->ifa_addr->sa_family);
+ CARP_UNLOCK(sc);
+ return (error);
}
- break;
- case IFT_FDDI: {
- struct fddi_header *fh;
- fh = mtod(m, struct fddi_header *);
- fh->fddi_shost[0] = 0;
- fh->fddi_shost[1] = 0;
- fh->fddi_shost[2] = 0x5e;
- fh->fddi_shost[3] = 0;
- fh->fddi_shost[4] = 1;
- fh->fddi_shost[5] = sc->sc_vhid;
- }
+ switch (ifa->ifa_addr->sa_family) {
+#ifdef INET
+ case AF_INET:
+ sc->sc_naddrs++;
break;
- case IFT_ISO88025: {
- struct iso88025_header *th;
- th = mtod(m, struct iso88025_header *);
- th->iso88025_shost[0] = 3;
- th->iso88025_shost[1] = 0;
- th->iso88025_shost[2] = 0x40 >> (sc->sc_vhid - 1);
- th->iso88025_shost[3] = 0x40000 >> (sc->sc_vhid - 1);
- th->iso88025_shost[4] = 0;
- th->iso88025_shost[5] = 0;
- }
+#endif
+#ifdef INET6
+ case AF_INET6:
+ sc->sc_naddrs6++;
break;
- default:
- printf("%s: carp is not supported for this interface type\n",
- ifp->if_xname);
- return (EOPNOTSUPP);
+#endif
}
+ ifa_ref(ifa);
+ sc->sc_ifas[index - 1] = ifa;
+ ifa->ifa_carp = sc;
+
+ carp_hmac_prepare(sc);
+ carp_sc_state(sc);
+
+ CARP_UNLOCK(sc);
+
return (0);
}
-static void
-carp_set_state(struct carp_softc *sc, int state)
+void
+carp_detach(struct ifaddr *ifa)
{
- int link_state;
+ struct carp_softc *sc = ifa->ifa_carp;
+ int i, index;
- if (sc->sc_carpdev)
- CARP_SCLOCK_ASSERT(sc);
+ KASSERT(sc != NULL, ("%s: %p not attached", __func__, ifa));
- if (sc->sc_state == state)
- return;
+ CARP_LOCK(sc);
- sc->sc_state = state;
- switch (state) {
- case BACKUP:
- link_state = LINK_STATE_DOWN;
- break;
- case MASTER:
- link_state = LINK_STATE_UP;
+ /* Shift array. */
+ index = sc->sc_naddrs + sc->sc_naddrs6;
+ for (i = 0; i < index; i++)
+ if (sc->sc_ifas[i] == ifa)
+ break;
+ KASSERT(i < index, ("%s: %p no backref", __func__, ifa));
+ for (; i < index - 1; i++)
+ sc->sc_ifas[i] = sc->sc_ifas[i+1];
+ sc->sc_ifas[index - 1] = NULL;
+
+ switch (ifa->ifa_addr->sa_family) {
+#ifdef INET
+ case AF_INET:
+ sc->sc_naddrs--;
break;
- default:
- link_state = LINK_STATE_UNKNOWN;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ sc->sc_naddrs6--;
break;
+#endif
}
- if_link_state_change(SC2IFP(sc), link_state);
+
+ carp_multicast_cleanup(sc, ifa->ifa_addr->sa_family);
+
+ ifa->ifa_carp = NULL;
+ ifa_free(ifa);
+
+ carp_hmac_prepare(sc);
+ carp_sc_state(sc);
+
+ if (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) {
+ CARP_UNLOCK(sc);
+ carp_destroy(sc);
+ } else
+ CARP_UNLOCK(sc);
}
-void
-carp_carpdev_state(struct ifnet *ifp)
+static void
+carp_set_state(struct carp_softc *sc, int state)
{
- struct carp_if *cif;
- cif = ifp->if_carp;
- CARP_LOCK(cif);
- carp_carpdev_state_locked(cif);
- CARP_UNLOCK(cif);
+ CARP_LOCK_ASSERT(sc);
+
+ if (sc->sc_state != state) {
+ const char *carp_states[] = { CARP_STATES };
+ char subsys[IFNAMSIZ+5];
+
+ sc->sc_state = state;
+
+ snprintf(subsys, IFNAMSIZ+5, "%u@%s", sc->sc_vhid,
+ sc->sc_carpdev->if_xname);
+ devctl_notify("CARP", subsys, carp_states[state], NULL);
+ }
}
static void
-carp_carpdev_state_locked(struct carp_if *cif)
+carp_linkstate(struct ifnet *ifp)
{
struct carp_softc *sc;
- TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list)
- carp_sc_state_locked(sc);
+ CIF_LOCK(ifp->if_carp);
+ IFNET_FOREACH_CARP(ifp, sc) {
+ CARP_LOCK(sc);
+ carp_sc_state(sc);
+ CARP_UNLOCK(sc);
+ }
+ CIF_UNLOCK(ifp->if_carp);
}
static void
-carp_sc_state_locked(struct carp_softc *sc)
+carp_sc_state(struct carp_softc *sc)
{
- CARP_SCLOCK_ASSERT(sc);
+
+ CARP_LOCK_ASSERT(sc);
if (sc->sc_carpdev->if_link_state != LINK_STATE_UP ||
!(sc->sc_carpdev->if_flags & IFF_UP)) {
- sc->sc_flags_backup = SC2IFP(sc)->if_flags;
- SC2IFP(sc)->if_flags &= ~IFF_UP;
- SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING;
callout_stop(&sc->sc_ad_tmo);
+#ifdef INET
callout_stop(&sc->sc_md_tmo);
+#endif
+#ifdef INET6
callout_stop(&sc->sc_md6_tmo);
+#endif
carp_set_state(sc, INIT);
carp_setrun(sc, 0);
if (!sc->sc_suppress) {
carp_suppress_preempt++;
- if (carp_suppress_preempt == 1) {
- CARP_SCUNLOCK(sc);
- carp_send_ad_all();
- CARP_SCLOCK(sc);
- }
+ if (carp_suppress_preempt == 1)
+ carp_send_ad_all(sc);
}
sc->sc_suppress = 1;
} else {
- SC2IFP(sc)->if_flags |= sc->sc_flags_backup;
carp_set_state(sc, INIT);
carp_setrun(sc, 0);
if (sc->sc_suppress)
carp_suppress_preempt--;
sc->sc_suppress = 0;
}
-
- return;
}
+
#ifdef INET
extern struct domain inetdomain;
static struct protosw in_carp_protosw = {
@@ -2335,10 +1962,6 @@ static void
carp_mod_cleanup(void)
{
- if (if_detach_event_tag == NULL)
- return;
- EVENTHANDLER_DEREGISTER(ifnet_departure_event, if_detach_event_tag);
- if_clone_detach(&carp_cloner);
#ifdef INET
if (proto_reg[CARP_INET] == 0) {
(void)ipproto_unregister(IPPROTO_CARP);
@@ -2356,6 +1979,10 @@ carp_mod_cleanup(void)
carp_iamatch6_p = NULL;
carp_macmatch6_p = NULL;
#endif
+ carp_ioctl_p = NULL;
+ carp_attach_p = NULL;
+ carp_detach_p = NULL;
+ carp_get_vhid_p = NULL;
carp_linkstate_p = NULL;
carp_forus_p = NULL;
carp_output_p = NULL;
@@ -2367,22 +1994,21 @@ carp_mod_load(void)
{
int err;
- if_detach_event_tag = EVENTHANDLER_REGISTER(ifnet_departure_event,
- carp_ifdetach, NULL, EVENTHANDLER_PRI_ANY);
- if (if_detach_event_tag == NULL)
- return (ENOMEM);
mtx_init(&carp_mtx, "carp_mtx", NULL, MTX_DEF);
- LIST_INIT(&carpif_list);
- if_clone_attach(&carp_cloner);
- carp_linkstate_p = carp_carpdev_state;
+ LIST_INIT(&carp_list);
+ carp_get_vhid_p = carp_get_vhid;
carp_forus_p = carp_forus;
carp_output_p = carp_output;
+ carp_linkstate_p = carp_linkstate;
+ carp_ioctl_p = carp_ioctl;
+ carp_attach_p = carp_attach;
+ carp_detach_p = carp_detach;
#ifdef INET6
carp_iamatch6_p = carp_iamatch6;
carp_macmatch6_p = carp_macmatch6;
proto_reg[CARP_INET6] = pf_proto_register(PF_INET6,
(struct protosw *)&in6_carp_protosw);
- if (proto_reg[CARP_INET6] != 0) {
+ if (proto_reg[CARP_INET6]) {
printf("carp: error %d attaching to PF_INET6\n",
proto_reg[CARP_INET6]);
carp_mod_cleanup();
@@ -2398,7 +2024,7 @@ carp_mod_load(void)
#ifdef INET
carp_iamatch_p = carp_iamatch;
proto_reg[CARP_INET] = pf_proto_register(PF_INET, &in_carp_protosw);
- if (proto_reg[CARP_INET] != 0) {
+ if (proto_reg[CARP_INET]) {
printf("carp: error %d attaching to PF_INET\n",
proto_reg[CARP_INET]);
carp_mod_cleanup();
@@ -2411,7 +2037,7 @@ carp_mod_load(void)
return (err);
}
#endif
- return 0;
+ return (0);
}
static int
@@ -2422,17 +2048,13 @@ carp_modevent(module_t mod, int type, void *data)
return carp_mod_load();
/* NOTREACHED */
case MOD_UNLOAD:
- /*
- * XXX: For now, disallow module unloading by default due to
- * a race condition where a thread may dereference one of the
- * function pointer hooks after the module has been
- * unloaded, during processing of a packet, causing a panic.
- */
-#ifdef CARPMOD_CAN_UNLOAD
- carp_mod_cleanup();
-#else
- return (EBUSY);
-#endif
+ mtx_lock(&carp_mtx);
+ if (LIST_EMPTY(&carp_list))
+ carp_mod_cleanup();
+ else {
+ mtx_unlock(&carp_mtx);
+ return (EBUSY);
+ }
break;
default:
diff --git a/sys/netinet/ip_carp.h b/sys/netinet/ip_carp.h
index 2f2b4f2..d8b82a8 100644
--- a/sys/netinet/ip_carp.h
+++ b/sys/netinet/ip_carp.h
@@ -126,10 +126,12 @@ struct carpstats {
* Configuration structure for SIOCSVH SIOCGVH
*/
struct carpreq {
+ int carpr_count;
+ int carpr_vhid;
+#define CARP_MAXVHID 255
int carpr_state;
#define CARP_STATES "INIT", "BACKUP", "MASTER"
#define CARP_MAXSTATE 2
- int carpr_vhid;
int carpr_advskew;
int carpr_advbase;
unsigned char carpr_key[CARP_KEY_LEN];
@@ -144,8 +146,7 @@ struct carpreq {
#define CARPCTL_PREEMPT 2 /* high-pri backup preemption mode */
#define CARPCTL_LOG 3 /* log bad packets */
#define CARPCTL_STATS 4 /* statistics (read-only) */
-#define CARPCTL_ARPBALANCE 5 /* balance arp responses */
-#define CARPCTL_MAXID 6
+#define CARPCTL_MAXID 5
#define CARPCTL_NAMES { \
{ 0, 0 }, \
@@ -153,33 +154,37 @@ struct carpreq {
{ "preempt", CTLTYPE_INT }, \
{ "log", CTLTYPE_INT }, \
{ "stats", CTLTYPE_STRUCT }, \
- { "arpbalance", CTLTYPE_INT }, \
}
#ifdef _KERNEL
-void carp_carpdev_state(struct ifnet *);
-void carp_input (struct mbuf *, int);
-int carp6_input (struct mbuf **, int *, int);
-int carp_output (struct ifnet *, struct mbuf *, struct sockaddr *,
- struct rtentry *);
-int carp_iamatch (struct ifnet *, struct in_ifaddr *, struct in_addr *,
- u_int8_t **);
+int carp_ioctl(struct ifreq *, u_long, struct thread *);
+int carp_attach(struct ifaddr *, int);
+void carp_detach(struct ifaddr *);
+void carp_carpdev_state(struct ifnet *);
+void carp_input (struct mbuf *, int);
+int carp6_input (struct mbuf **, int *, int);
+int carp_output (struct ifnet *, struct mbuf *, struct sockaddr *);
+int carp_iamatch(struct ifaddr *, uint8_t **);
struct ifaddr *carp_iamatch6(struct ifnet *, struct in6_addr *);
caddr_t carp_macmatch6(struct ifnet *, struct mbuf *, const struct in6_addr *);
-struct ifnet *carp_forus (struct ifnet *, u_char *);
+int carp_forus(struct ifnet *, u_char *);
/* These are external networking stack hooks for CARP */
/* net/if.c */
+extern int (*carp_ioctl_p)(struct ifreq *, u_long, struct thread *);
+extern int (*carp_attach_p)(struct ifaddr *, int);
+extern void (*carp_detach_p)(struct ifaddr *);
extern void (*carp_linkstate_p)(struct ifnet *);
/* net/if_bridge.c net/if_ethersubr.c */
-extern struct ifnet *(*carp_forus_p)(struct ifnet *, u_char *);
+extern int (*carp_forus_p)(struct ifnet *, u_char *);
/* net/if_ethersubr.c */
extern int (*carp_output_p)(struct ifnet *, struct mbuf *,
- struct sockaddr *, struct rtentry *);
+ struct sockaddr *);
+/* net/rtsock.c */
+extern int (*carp_get_vhid_p)(struct ifaddr *);
#ifdef INET
/* netinet/if_ether.c */
-extern int (*carp_iamatch_p)(struct ifnet *, struct in_ifaddr *,
- struct in_addr *, u_int8_t **);
+extern int (*carp_iamatch_p)(struct ifaddr *, uint8_t **);
#endif
#ifdef INET6
/* netinet6/nd6_nbr.c */
diff --git a/sys/netinet6/in6.c b/sys/netinet6/in6.c
index 8514f73..ef22f03 100644
--- a/sys/netinet6/in6.c
+++ b/sys/netinet6/in6.c
@@ -95,6 +95,7 @@ __FBSDID("$FreeBSD$");
#include <netinet/in_systm.h>
#include <netinet/ip.h>
#include <netinet/in_pcb.h>
+#include <netinet/ip_carp.h>
#include <netinet/ip6.h>
#include <netinet6/ip6_var.h>
@@ -272,6 +273,7 @@ in6_control(struct socket *so, u_long cmd, caddr_t data,
struct in6_ifaddr *ia = NULL;
struct in6_aliasreq *ifra = (struct in6_aliasreq *)data;
struct sockaddr_in6 *sa6;
+ int carp_attached = 0;
int error;
switch (cmd) {
@@ -652,6 +654,18 @@ in6_control(struct socket *so, u_long cmd, caddr_t data,
break;
}
+ if (ifra->ifra_vhid > 0) {
+ if (carp_attach_p != NULL)
+ error = (*carp_attach_p)(&ia->ia_ifa,
+ ifra->ifra_vhid);
+ else
+ error = EPROTONOSUPPORT;
+ if (error)
+ goto out;
+ else
+ carp_attached = 1;
+ }
+
/*
* then, make the prefix on-link on the interface.
* XXX: we'd rather create the prefix before the address, but
@@ -695,9 +709,14 @@ in6_control(struct socket *so, u_long cmd, caddr_t data,
* nd6_prelist_add will install the corresponding
* interface route.
*/
- if ((error = nd6_prelist_add(&pr0, NULL, &pr)) != 0)
+ if ((error = nd6_prelist_add(&pr0, NULL, &pr)) != 0) {
+ if (carp_attached)
+ (*carp_detach_p)(&ia->ia_ifa);
goto out;
+ }
if (pr == NULL) {
+ if (carp_attached)
+ (*carp_detach_p)(&ia->ia_ifa);
log(LOG_ERR, "nd6_prelist_add succeeded but "
"no prefix\n");
error = EINVAL;
@@ -1301,6 +1320,9 @@ in6_purgeaddr(struct ifaddr *ifa)
struct rtentry *rt;
struct ifaddr *ifa0, *nifa;
+ if (ifa->ifa_carp)
+ (*carp_detach_p)(ifa);
+
/*
* find another IPv6 address as the gateway for the
* link-local and node-local all-nodes multicast
diff --git a/sys/netinet6/in6_ifattach.c b/sys/netinet6/in6_ifattach.c
index 2ff7455..1dae2c0 100644
--- a/sys/netinet6/in6_ifattach.c
+++ b/sys/netinet6/in6_ifattach.c
@@ -705,7 +705,6 @@ in6_ifattach(struct ifnet *ifp, struct ifnet *altifp)
switch (ifp->if_type) {
case IFT_PFLOG:
case IFT_PFSYNC:
- case IFT_CARP:
return;
}
diff --git a/sys/netinet6/in6_var.h b/sys/netinet6/in6_var.h
index 00342fd..3e93e71 100644
--- a/sys/netinet6/in6_var.h
+++ b/sys/netinet6/in6_var.h
@@ -287,6 +287,7 @@ struct in6_aliasreq {
struct sockaddr_in6 ifra_prefixmask;
int ifra_flags;
struct in6_addrlifetime ifra_lifetime;
+ int ifra_vhid;
};
/* prefix type macro */
diff --git a/sys/netinet6/nd6.c b/sys/netinet6/nd6.c
index 875daa0..5d6f8c0 100644
--- a/sys/netinet6/nd6.c
+++ b/sys/netinet6/nd6.c
@@ -2173,9 +2173,6 @@ nd6_need_cache(struct ifnet *ifp)
#ifdef IFT_IEEE80211
case IFT_IEEE80211:
#endif
-#ifdef IFT_CARP
- case IFT_CARP:
-#endif
case IFT_INFINIBAND:
case IFT_GIF: /* XXX need more cases? */
case IFT_PPP:
diff --git a/sys/netinet6/nd6_nbr.c b/sys/netinet6/nd6_nbr.c
index 0221c72..a2aaeea 100644
--- a/sys/netinet6/nd6_nbr.c
+++ b/sys/netinet6/nd6_nbr.c
@@ -225,7 +225,7 @@ nd6_ns_input(struct mbuf *m, int off, int icmp6len)
/* (1) and (3) check. */
if (ifp->if_carp)
ifa = (*carp_iamatch6_p)(ifp, &taddr6);
- if (ifa == NULL)
+ else
ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp, &taddr6);
/* (2) check. */
@@ -688,7 +688,14 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len)
lladdrlen = ndopts.nd_opts_tgt_lladdr->nd_opt_len << 3;
}
- ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp, &taddr6);
+ /*
+ * This effectively disables the DAD check on a non-master CARP
+ * address.
+ */
+ if (ifp->if_carp)
+ ifa = (*carp_iamatch6_p)(ifp, &taddr6);
+ else
+ ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp, &taddr6);
/*
* Target address matches one of my interface address.
@@ -1133,9 +1140,6 @@ nd6_ifptomac(struct ifnet *ifp)
#ifdef IFT_IEEE80211
case IFT_IEEE80211:
#endif
-#ifdef IFT_CARP
- case IFT_CARP:
-#endif
case IFT_INFINIBAND:
case IFT_BRIDGE:
case IFT_ISO88025:
diff --git a/sys/sys/param.h b/sys/sys/param.h
index 1ce8e35..5e9b4e5 100644
--- a/sys/sys/param.h
+++ b/sys/sys/param.h
@@ -58,7 +58,7 @@
* in the range 5 to 9.
*/
#undef __FreeBSD_version
-#define __FreeBSD_version 1000002 /* Master, propagated to newvers */
+#define __FreeBSD_version 1000003 /* Master, propagated to newvers */
/*
* __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD,
OpenPOWER on IntegriCloud