summaryrefslogtreecommitdiffstats
path: root/sys
diff options
context:
space:
mode:
Diffstat (limited to 'sys')
-rw-r--r--sys/conf/files2
-rw-r--r--sys/conf/options1
-rw-r--r--sys/net/if.c18
-rw-r--r--sys/net/if_ethersubr.c26
-rw-r--r--sys/net/if_media.h6
-rw-r--r--sys/net/if_types.h1
-rw-r--r--sys/net/if_var.h3
-rw-r--r--sys/netinet/if_ether.c39
-rw-r--r--sys/netinet/if_ether.h1
-rw-r--r--sys/netinet/in.h2
-rw-r--r--sys/netinet/in_proto.c16
-rw-r--r--sys/netinet/ip_carp.c2032
-rw-r--r--sys/netinet/ip_carp.h163
-rw-r--r--sys/netinet/ip_input.c11
-rw-r--r--sys/netinet6/in6.c37
-rw-r--r--sys/netinet6/in6_ifattach.c1
-rw-r--r--sys/netinet6/in6_proto.c13
-rw-r--r--sys/netinet6/in6_var.h4
-rw-r--r--sys/netinet6/nd6.c3
-rw-r--r--sys/netinet6/nd6_nbr.c29
-rw-r--r--sys/sys/mbuf.h1
21 files changed, 2398 insertions, 11 deletions
diff --git a/sys/conf/files b/sys/conf/files
index daa8a9f..1a9c674 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -279,6 +279,7 @@ crypto/rijndael/rijndael-api-fst.c optional geom_bde
crypto/rijndael/rijndael-api-fst.c optional random
crypto/rijndael/rijndael-api.c optional ipsec
crypto/rijndael/rijndael-api.c optional wlan_ccmp
+crypto/sha1.c optional carp
crypto/sha1.c optional netgraph_mppc_encryption
crypto/sha1.c optional crypto
crypto/sha1.c optional ipsec
@@ -1483,6 +1484,7 @@ netinet/if_atm.c optional atm
netinet/if_ether.c optional ether
netinet/igmp.c optional inet
netinet/in.c optional inet
+netinet/ip_carp.c optional carp
netinet/in_gif.c optional gif inet
netinet/ip_gre.c optional gre inet
netinet/ip_id.c optional inet
diff --git a/sys/conf/options b/sys/conf/options
index f104f94..008c932 100644
--- a/sys/conf/options
+++ b/sys/conf/options
@@ -610,6 +610,7 @@ NDEVFSOVERFLOW opt_devfs.h
DEV_BPF opt_bpf.h
DEV_ISA opt_isa.h
DEV_MCA opt_mca.h
+DEV_CARP opt_carp.h
DEV_SPLASH opt_splash.h
EISA_SLOTS opt_eisa.h
diff --git a/sys/net/if.c b/sys/net/if.c
index b6505df..3c25986 100644
--- a/sys/net/if.c
+++ b/sys/net/if.c
@@ -34,6 +34,7 @@
#include "opt_inet6.h"
#include "opt_inet.h"
#include "opt_mac.h"
+#include "opt_carp.h"
#include <sys/param.h>
#include <sys/types.h>
@@ -78,6 +79,9 @@
#ifdef INET
#include <netinet/if_ether.h>
#endif
+#ifdef DEV_CARP
+#include <netinet/ip_carp.h>
+#endif
void (*ng_ether_link_state_p)(struct ifnet *ifp, int state);
@@ -529,6 +533,12 @@ if_detach(struct ifnet *ifp)
int found;
EVENTHANDLER_INVOKE(ifnet_departure_event, ifp);
+#ifdef DEV_CARP
+ /* Maybe hook to the generalized departure handler above?!? */
+ if (ifp->if_carp)
+ carp_ifdetach(ifp);
+#endif
+
/*
* Remove routes and flush queues.
*/
@@ -933,6 +943,10 @@ if_unroute(struct ifnet *ifp, int flag, int fam)
if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
pfctlinput(PRC_IFDOWN, ifa->ifa_addr);
if_qflush(&ifp->if_snd);
+#ifdef DEV_CARP
+ if (ifp->if_carp)
+ carp_carpdev_state(ifp->if_carp);
+#endif
rt_ifmsg(ifp);
}
@@ -951,6 +965,10 @@ if_route(struct ifnet *ifp, int flag, int fam)
TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
pfctlinput(PRC_IFUP, ifa->ifa_addr);
+#ifdef DEV_CARP
+ if (ifp->if_carp)
+ carp_carpdev_state(ifp->if_carp);
+#endif
rt_ifmsg(ifp);
#ifdef INET6
in6_if_up(ifp);
diff --git a/sys/net/if_ethersubr.c b/sys/net/if_ethersubr.c
index 1b973fe..2c4ff99 100644
--- a/sys/net/if_ethersubr.c
+++ b/sys/net/if_ethersubr.c
@@ -37,6 +37,7 @@
#include "opt_bdg.h"
#include "opt_mac.h"
#include "opt_netgraph.h"
+#include "opt_carp.h"
#include <sys/param.h>
#include <sys/systm.h>
@@ -73,6 +74,10 @@
#include <netinet6/nd6.h>
#endif
+#ifdef DEV_CARP
+#include <netinet/ip_carp.h>
+#endif
+
#ifdef IPX
#include <netipx/ipx.h>
#include <netipx/ipx_if.h>
@@ -315,6 +320,12 @@ ether_output(struct ifnet *ifp, struct mbuf *m,
}
}
+#ifdef DEV_CARP
+ if (ifp->if_carp &&
+ (error = carp_output(ifp, m, dst, NULL)))
+ goto bad;
+#endif
+
/* Handle ng_ether(4) processing, if any */
if (IFP2AC(ifp)->ac_netgraph != NULL) {
if ((error = (*ng_ether_output_p)(ifp, &m)) != 0) {
@@ -606,6 +617,18 @@ ether_demux(struct ifnet *ifp, struct mbuf *m)
if (!(BDG_ACTIVE(ifp)) &&
!((ether_type == ETHERTYPE_VLAN || m->m_flags & M_VLANTAG) &&
ifp->if_nvlans > 0)) {
+#ifdef DEV_CARP
+ /*
+ * XXX: Okay, we need to call carp_forus() and - if it is for us
+ * jump over code that does the normal check
+ * "ac_enaddr == ether_dhost". The check sequence is a bit
+ * different from OpenBSD, so we jump over as few code as possible,
+ * to catch _all_ sanity checks. This needs evaluation, to see if
+ * the carp ether_dhost values break any of these checks!
+ */
+ if (ifp->if_carp && carp_forus(ifp->if_carp, eh->ether_dhost))
+ goto pre_stats;
+#endif
/*
* Discard packet if upper layers shouldn't see it because it
* was unicast to a different Ethernet address. If the driver
@@ -628,6 +651,9 @@ ether_demux(struct ifnet *ifp, struct mbuf *m)
}
}
+#ifdef DEV_CARP
+pre_stats:
+#endif
/* Discard packet if interface is not up */
if ((ifp->if_flags & IFF_UP) == 0) {
m_freem(m);
diff --git a/sys/net/if_media.h b/sys/net/if_media.h
index 68a785d..4f7389b 100644
--- a/sys/net/if_media.h
+++ b/sys/net/if_media.h
@@ -227,6 +227,11 @@ int ifmedia_ioctl(struct ifnet *ifp, struct ifreq *ifr,
#define IFM_ATM_UNASSIGNED 0x00000400 /* unassigned cells */
/*
+ * CARP Common Address Redundancy Protocol
+ */
+#define IFM_CARP 0x000000c0
+
+/*
* Shared media sub-types
*/
#define IFM_AUTO 0 /* Autoselect best media */
@@ -299,6 +304,7 @@ struct ifmedia_description {
{ IFM_FDDI, "FDDI" }, \
{ IFM_IEEE80211, "IEEE 802.11 Wireless Ethernet" }, \
{ IFM_ATM, "ATM" }, \
+ { IFM_CARP, "Common Address Redundancy Protocol" }, \
{ 0, NULL }, \
}
diff --git a/sys/net/if_types.h b/sys/net/if_types.h
index 7af5bec..56dca45 100644
--- a/sys/net/if_types.h
+++ b/sys/net/if_types.h
@@ -247,4 +247,5 @@
#define IFT_FAITH 0xf2
#define IFT_PFLOG 0xf6
#define IFT_PFSYNC 0xf7
+#define IFT_CARP 0xf8 /* Common Address Redundancy Protocol */
#endif /* !_NET_IF_TYPES_H_ */
diff --git a/sys/net/if_var.h b/sys/net/if_var.h
index d1d9fa2..78eea2c 100644
--- a/sys/net/if_var.h
+++ b/sys/net/if_var.h
@@ -68,6 +68,7 @@ struct rtentry;
struct rt_addrinfo;
struct socket;
struct ether_header;
+struct carp_if;
#endif
#include <sys/queue.h> /* get TAILQ macros */
@@ -146,7 +147,7 @@ struct ifnet {
*/
struct knlist if_klist; /* events attached to this if */
int if_pcount; /* number of promiscuous listeners */
- void *if_carp; /* carp (tbd) interface pointer */
+ struct carp_if *if_carp; /* carp interface structure */
struct bpf_if *if_bpf; /* packet filter structure */
u_short if_index; /* numeric abbreviation for this if */
short if_timer; /* time 'til if_watchdog called */
diff --git a/sys/netinet/if_ether.c b/sys/netinet/if_ether.c
index 2f1c037..1bc1d06 100644
--- a/sys/netinet/if_ether.c
+++ b/sys/netinet/if_ether.c
@@ -39,6 +39,7 @@
#include "opt_inet.h"
#include "opt_bdg.h"
#include "opt_mac.h"
+#include "opt_carp.h"
#include <sys/param.h>
#include <sys/kernel.h>
@@ -67,6 +68,10 @@
#include <net/if_arc.h>
#include <net/iso88025.h>
+#ifdef DEV_CARP
+#include <netinet/ip_carp.h>
+#endif
+
#define SIN(s) ((struct sockaddr_in *)s)
#define SDL(s) ((struct sockaddr_dl *)s)
@@ -545,6 +550,7 @@ in_arpinput(m)
struct sockaddr_dl *sdl;
struct sockaddr sa;
struct in_addr isaddr, itaddr, myaddr;
+ u_int8_t *enaddr = NULL;
int op, rif_len;
int req_len;
@@ -563,10 +569,18 @@ in_arpinput(m)
* For a bridge, we want to check the address irrespective
* of the receive interface. (This will change slightly
* when we have clusters of interfaces).
+ * If the interface does not match, but the recieving interface
+ * is part of carp, we call carp_iamatch to see if this is a
+ * request for the virtual host ip.
+ * XXX: This is really ugly!
*/
LIST_FOREACH(ia, INADDR_HASH(itaddr.s_addr), ia_hash)
- if ((do_bridge || (ia->ia_ifp == ifp)) &&
- itaddr.s_addr == ia->ia_addr.sin_addr.s_addr)
+ if ((do_bridge || (ia->ia_ifp == ifp)
+#ifdef DEV_CARP
+ || (ifp->if_carp
+ && carp_iamatch(ifp->if_carp, ia, &isaddr, &enaddr))
+#endif
+ ) && itaddr.s_addr == ia->ia_addr.sin_addr.s_addr)
goto match;
LIST_FOREACH(ia, INADDR_HASH(isaddr.s_addr), ia_hash)
if ((do_bridge || (ia->ia_ifp == ifp)) &&
@@ -587,8 +601,10 @@ in_arpinput(m)
if (!do_bridge || (ia = TAILQ_FIRST(&in_ifaddrhead)) == NULL)
goto drop;
match:
+ if (!enaddr)
+ enaddr = (u_int8_t *)IF_LLADDR(ifp);
myaddr = ia->ia_addr.sin_addr;
- if (!bcmp(ar_sha(ah), IF_LLADDR(ifp), ifp->if_addrlen))
+ if (!bcmp(ar_sha(ah), enaddr, ifp->if_addrlen))
goto drop; /* it's from me, ignore it. */
if (!bcmp(ar_sha(ah), ifp->if_broadcastaddr, ifp->if_addrlen)) {
log(LOG_ERR,
@@ -711,7 +727,7 @@ reply:
if (itaddr.s_addr == myaddr.s_addr) {
/* I am the target */
(void)memcpy(ar_tha(ah), ar_sha(ah), ah->ar_hln);
- (void)memcpy(ar_sha(ah), IF_LLADDR(ifp), ah->ar_hln);
+ (void)memcpy(ar_sha(ah), enaddr, ah->ar_hln);
} else {
la = arplookup(itaddr.s_addr, 0, SIN_PROXY);
if (la == NULL) {
@@ -738,7 +754,7 @@ reply:
goto drop;
}
(void)memcpy(ar_tha(ah), ar_sha(ah), ah->ar_hln);
- (void)memcpy(ar_sha(ah), IF_LLADDR(ifp), ah->ar_hln);
+ (void)memcpy(ar_sha(ah), enaddr, ah->ar_hln);
rtfree(rt);
/*
@@ -880,6 +896,19 @@ arp_ifinit(ifp, ifa)
ifa->ifa_flags |= RTF_CLONING;
}
+void
+arp_ifinit2(ifp, ifa, enaddr)
+ struct ifnet *ifp;
+ struct ifaddr *ifa;
+ u_char *enaddr;
+{
+ if (ntohl(IA_SIN(ifa)->sin_addr.s_addr) != INADDR_ANY)
+ arprequest(ifp, &IA_SIN(ifa)->sin_addr,
+ &IA_SIN(ifa)->sin_addr, enaddr);
+ ifa->ifa_rtrequest = arp_rtrequest;
+ ifa->ifa_flags |= RTF_CLONING;
+}
+
static void
arp_init(void)
{
diff --git a/sys/netinet/if_ether.h b/sys/netinet/if_ether.h
index 9b60492..14df15f 100644
--- a/sys/netinet/if_ether.h
+++ b/sys/netinet/if_ether.h
@@ -112,6 +112,7 @@ extern u_char ether_ipmulticast_max[ETHER_ADDR_LEN];
int arpresolve(struct ifnet *ifp, struct rtentry *rt,
struct mbuf *m, struct sockaddr *dst, u_char *desten);
void arp_ifinit(struct ifnet *, struct ifaddr *);
+void arp_ifinit2(struct ifnet *, struct ifaddr *, u_char *);
#endif
#endif
diff --git a/sys/netinet/in.h b/sys/netinet/in.h
index 20995bd..45c269c 100644
--- a/sys/netinet/in.h
+++ b/sys/netinet/in.h
@@ -230,6 +230,7 @@ __END_DECLS
#define IPPROTO_IPCOMP 108 /* payload compression (IPComp) */
/* 101-254: Partly Unassigned */
#define IPPROTO_PIM 103 /* Protocol Independent Mcast */
+#define IPPROTO_CARP 112 /* CARP */
#define IPPROTO_PGM 113 /* PGM */
#define IPPROTO_PFSYNC 240 /* PFSYNC */
/* 255: Reserved */
@@ -357,6 +358,7 @@ __END_DECLS
#define INADDR_UNSPEC_GROUP (u_int32_t)0xe0000000 /* 224.0.0.0 */
#define INADDR_ALLHOSTS_GROUP (u_int32_t)0xe0000001 /* 224.0.0.1 */
#define INADDR_ALLRTRS_GROUP (u_int32_t)0xe0000002 /* 224.0.0.2 */
+#define INADDR_CARP_GROUP (u_int32_t)0xe0000012 /* 224.0.0.18 */
#define INADDR_PFSYNC_GROUP (u_int32_t)0xe00000f0 /* 224.0.0.240 */
#define INADDR_ALLMDNS_GROUP (u_int32_t)0xe00000fb /* 224.0.0.251 */
#define INADDR_MAX_LOCAL_GROUP (u_int32_t)0xe00000ff /* 224.0.0.255 */
diff --git a/sys/netinet/in_proto.c b/sys/netinet/in_proto.c
index 9ae32c4..103682f 100644
--- a/sys/netinet/in_proto.c
+++ b/sys/netinet/in_proto.c
@@ -35,6 +35,7 @@
#include "opt_ipsec.h"
#include "opt_inet6.h"
#include "opt_pf.h"
+#include "opt_carp.h"
#include <sys/param.h>
#include <sys/systm.h>
@@ -90,6 +91,10 @@
#include <net/if_pfsync.h>
#endif
+#ifdef DEV_CARP
+#include <netinet/ip_carp.h>
+#endif
+
extern struct domain inetdomain;
/* Spacer for loadable protocols. */
@@ -237,6 +242,14 @@ struct protosw inetsw[] = {
&rip_usrreqs
},
#endif /* DEV_PFSYNC */
+#ifdef DEV_CARP
+{ SOCK_RAW, &inetdomain, IPPROTO_CARP, PR_ATOMIC|PR_ADDR,
+ carp_input, (pr_output_t*)rip_output, 0, rip_ctloutput,
+ 0,
+ 0, 0, 0, 0,
+ &rip_usrreqs
+},
+#endif /* DEV_CARP */
/* Spacer n-times for loadable protocols. */
IPPROTOSPACER,
IPPROTOSPACER,
@@ -290,3 +303,6 @@ SYSCTL_NODE(_net_inet, IPPROTO_RAW, raw, CTLFLAG_RW, 0, "RAW");
#ifdef PIM
SYSCTL_NODE(_net_inet, IPPROTO_PIM, pim, CTLFLAG_RW, 0, "PIM");
#endif
+#ifdef DEV_CARP
+SYSCTL_NODE(_net_inet, IPPROTO_CARP, carp, CTLFLAG_RW, 0, "CARP");
+#endif
diff --git a/sys/netinet/ip_carp.c b/sys/netinet/ip_carp.c
new file mode 100644
index 0000000..ec54ef7
--- /dev/null
+++ b/sys/netinet/ip_carp.c
@@ -0,0 +1,2032 @@
+/* $FreeBSD$ */
+
+/*
+ * Copyright (c) 2002 Michael Shalayeff. All rights reserved.
+ * Copyright (c) 2003 Ryan McBride. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "opt_carp.h"
+#include "opt_bpf.h"
+#include "opt_inet.h"
+#include "opt_inet6.h"
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/conf.h>
+#include <sys/kernel.h>
+#include <sys/limits.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#include <sys/time.h>
+#include <sys/proc.h>
+#include <sys/sysctl.h>
+#include <sys/syslog.h>
+#include <sys/signalvar.h>
+#include <sys/filio.h>
+#include <sys/sockio.h>
+
+#include <sys/socket.h>
+#include <sys/vnode.h>
+
+#include <machine/stdarg.h>
+
+#include <net/bpf.h>
+#include <net/ethernet.h>
+#include <net/fddi.h>
+#include <net/iso88025.h>
+#include <net/if.h>
+#include <net/if_clone.h>
+#include <net/if_types.h>
+#include <net/route.h>
+
+#ifdef INET
+#include <netinet/in.h>
+#include <netinet/in_var.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <netinet/if_ether.h>
+#include <machine/in_cksum.h>
+#endif
+
+#ifdef INET6
+#include <netinet/icmp6.h>
+#include <netinet/ip6.h>
+#include <netinet6/ip6_var.h>
+#include <netinet6/nd6.h>
+#include <net/if_dl.h>
+#endif
+
+#include <crypto/sha1.h>
+#include <netinet/ip_carp.h>
+
+#define CARP_IFNAME "carp"
+static MALLOC_DEFINE(M_CARP, "CARP", "CARP interfaces");
+SYSCTL_DECL(_net_inet_carp);
+
+struct carp_softc {
+ struct arpcom sc_ac; /* Interface clue */
+ int if_flags; /* UP/DOWN */
+ struct ifnet *sc_ifp; /* Parent */
+ struct in_ifaddr *sc_ia; /* primary iface address */
+ struct ip_moptions sc_imo;
+#ifdef INET6
+ struct in6_ifaddr *sc_ia6; /* primary iface address v6 */
+ struct ip6_moptions sc_im6o;
+#endif /* INET6 */
+ TAILQ_ENTRY(carp_softc) sc_list;
+
+ enum { INIT = 0, BACKUP, MASTER } sc_state;
+
+ int sc_flags_backup;
+ int sc_suppress;
+
+ int sc_sendad_errors;
+#define CARP_SENDAD_MAX_ERRORS 3
+ int sc_sendad_success;
+#define CARP_SENDAD_MIN_SUCCESS 3
+
+ int sc_vhid;
+ int sc_advskew;
+ int sc_naddrs;
+ int sc_naddrs6;
+ int sc_advbase; /* seconds */
+ int sc_init_counter;
+ u_int64_t sc_counter;
+
+ /* authentication */
+#define CARP_HMAC_PAD 64
+ unsigned char sc_key[CARP_KEY_LEN];
+ unsigned char sc_pad[CARP_HMAC_PAD];
+ SHA1_CTX sc_sha1;
+
+ struct callout sc_ad_tmo; /* advertisement timeout */
+ struct callout sc_md_tmo; /* master down timeout */
+ struct callout sc_md6_tmo; /* master down timeout */
+
+ LIST_ENTRY(carp_softc) sc_next; /* Interface clue */
+};
+#define sc_if sc_ac.ac_if
+
+int carp_suppress_preempt = 0;
+int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, 1, 0, 0 }; /* XXX for now */
+SYSCTL_INT(_net_inet_carp, CARPCTL_ALLOW, allow, CTLFLAG_RW,
+ &carp_opts[CARPCTL_ALLOW], 0, "Accept incoming CARP packets");
+SYSCTL_INT(_net_inet_carp, CARPCTL_PREEMPT, preempt, CTLFLAG_RW,
+ &carp_opts[CARPCTL_PREEMPT], 0, "high-priority backup preemption mode");
+SYSCTL_INT(_net_inet_carp, CARPCTL_LOG, log, CTLFLAG_RW,
+ &carp_opts[CARPCTL_LOG], 0, "log bad carp packets");
+SYSCTL_INT(_net_inet_carp, CARPCTL_ARPBALANCE, arpbalance, CTLFLAG_RW,
+ &carp_opts[CARPCTL_ARPBALANCE], 0, "balance arp responses");
+
+struct carpstats carpstats;
+SYSCTL_STRUCT(_net_inet_carp, CARPCTL_STATS, stats, CTLFLAG_RW,
+ &carpstats, carpstats,
+ "CARP statistics (struct carpstats, netinet/ip_carp.h)");
+
+struct carp_if {
+ TAILQ_HEAD(, carp_softc) vhif_vrs;
+ int vhif_nvrs;
+
+ struct ifnet *vhif_ifp;
+ struct mtx vhif_mtx;
+};
+/* lock per carp_if queue */
+#define CARP_LOCK_INIT(cif) mtx_init(&(cif)->vhif_mtx, "carp", \
+ NULL, MTX_DEF)
+#define CARP_LOCK_DESTROY(cif) mtx_destroy(&(cif->vhif_mtx))
+#define CARP_LOCK_ASSERT(cif) mtx_assert(&(cif)->vhif_mtx, MA_OWNED)
+#define CARP_LOCK(cif) mtx_lock(&(cif)->vhif_mtx)
+#define CARP_UNLOCK(cif) mtx_unlock(&(cif)->vhif_mtx)
+
+#define CARP_LOG(sc, s) \
+ if (carp_opts[CARPCTL_LOG]) { \
+ if (sc != NULL) \
+ log(LOG_INFO, "%s: ", (sc)->sc_if.if_xname); \
+ else \
+ log(LOG_INFO, "carp: "); \
+ printf s; \
+/* addlog s; addlog("\n"); */ \
+}
+
+void carp_hmac_prepare(struct carp_softc *);
+void carp_hmac_generate(struct carp_softc *, u_int32_t *,
+ unsigned char *);
+int carp_hmac_verify(struct carp_softc *, u_int32_t *,
+ unsigned char *);
+void carp_setroute(struct carp_softc *, int);
+void carp_input_c(struct mbuf *, struct carp_softc *,
+ struct carp_header *, sa_family_t);
+int carp_clone_create(struct if_clone *, int);
+void carp_clone_destroy(struct ifnet *);
+void carpdetach(struct carp_softc *);
+int carp_prepare_ad(struct mbuf *, struct carp_softc *,
+ struct carp_header *);
+void carp_send_ad_all(void);
+void carp_send_ad(void *);
+void carp_send_arp(struct carp_softc *);
+void carp_master_down(void *);
+int carp_ioctl(struct ifnet *, u_long, caddr_t);
+static int carp_looutput(struct ifnet *, struct mbuf *, struct sockaddr *,
+ struct rtentry *);
+void carp_start(struct ifnet *);
+void carp_setrun(struct carp_softc *, sa_family_t);
+void carp_set_state(struct carp_softc *, int);
+int carp_addrcount(struct carp_if *, struct in_ifaddr *, int);
+enum { CARP_COUNT_MASTER, CARP_COUNT_RUNNING };
+
+int carp_set_addr(struct carp_softc *, struct sockaddr_in *);
+int carp_del_addr(struct carp_softc *, struct sockaddr_in *);
+#ifdef INET6
+void carp_send_na(struct carp_softc *);
+int carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *);
+int carp_del_addr6(struct carp_softc *, struct sockaddr_in6 *);
+#endif
+
+static LIST_HEAD(, carp_softc) carpif_list;
+IFC_SIMPLE_DECLARE(carp, 0);
+
+static __inline u_int16_t
+carp_cksum(struct mbuf *m, int len)
+{
+ return (in_cksum(m, len));
+}
+
+void
+carp_hmac_prepare(struct carp_softc *sc)
+{
+ u_int8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT;
+ u_int8_t vhid = sc->sc_vhid & 0xff;
+ struct ifaddr *ifa;
+ int i;
+#ifdef INET6
+ struct in6_addr in6;
+#endif
+
+ /* compute ipad from key */
+ bzero(sc->sc_pad, sizeof(sc->sc_pad));
+ bcopy(sc->sc_key, sc->sc_pad, sizeof(sc->sc_key));
+ for (i = 0; i < sizeof(sc->sc_pad); i++)
+ sc->sc_pad[i] ^= 0x36;
+
+ /* precompute first part of inner hash */
+ SHA1Init(&sc->sc_sha1);
+ SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad));
+ SHA1Update(&sc->sc_sha1, (void *)&version, sizeof(version));
+ SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type));
+ SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid));
+#ifdef INET
+ TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) {
+ if (ifa->ifa_addr->sa_family == AF_INET)
+ SHA1Update(&sc->sc_sha1,
+ (void *)&ifatoia(ifa)->ia_addr.sin_addr.s_addr,
+ sizeof(struct in_addr));
+ }
+#endif /* INET */
+#ifdef INET6
+ TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) {
+ if (ifa->ifa_addr->sa_family == AF_INET6) {
+ in6 = ifatoia6(ifa)->ia_addr.sin6_addr;
+ if (IN6_IS_ADDR_LINKLOCAL(&in6))
+ in6.s6_addr16[1] = 0;
+ SHA1Update(&sc->sc_sha1, (void *)&in6, sizeof(in6));
+ }
+ }
+#endif /* INET6 */
+
+ /* convert ipad to opad */
+ for (i = 0; i < sizeof(sc->sc_pad); i++)
+ sc->sc_pad[i] ^= 0x36 ^ 0x5c;
+}
+
+void
+carp_hmac_generate(struct carp_softc *sc, u_int32_t counter[2],
+ unsigned char md[20])
+{
+ SHA1_CTX sha1ctx;
+
+ /* fetch first half of inner hash */
+ bcopy(&sc->sc_sha1, &sha1ctx, sizeof(sha1ctx));
+
+ SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter));
+ SHA1Final(md, &sha1ctx);
+
+ /* outer hash */
+ SHA1Init(&sha1ctx);
+ SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad));
+ SHA1Update(&sha1ctx, md, 20);
+ SHA1Final(md, &sha1ctx);
+}
+
+int
+carp_hmac_verify(struct carp_softc *sc, u_int32_t counter[2],
+ unsigned char md[20])
+{
+ unsigned char md2[20];
+
+ carp_hmac_generate(sc, counter, md2);
+
+ return (bcmp(md, md2, sizeof(md2)));
+}
+
+void
+carp_setroute(struct carp_softc *sc, int cmd)
+{
+ struct ifaddr *ifa;
+ int s;
+
+ s = splnet();
+ TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) {
+ if (ifa->ifa_addr->sa_family == AF_INET && sc->sc_ifp != NULL) {
+ int count = carp_addrcount(
+ (struct carp_if *)sc->sc_ifp->if_carp,
+ ifatoia(ifa), CARP_COUNT_MASTER);
+
+ if ((cmd == RTM_ADD && count == 1) ||
+ (cmd == RTM_DELETE && count == 0))
+ rtinit(ifa, cmd, RTF_UP | RTF_HOST);
+ }
+#ifdef INET6
+ if (ifa->ifa_addr->sa_family == AF_INET6) {
+ if (cmd == RTM_ADD)
+ in6_ifaddloop(ifa);
+ else
+ in6_ifremloop(ifa);
+ }
+#endif /* INET6 */
+ }
+ splx(s);
+}
+
+int
+carp_clone_create(struct if_clone *ifc, int unit)
+{
+
+ struct carp_softc *sc;
+ struct ifnet *ifp;
+
+ MALLOC(sc, struct carp_softc *, sizeof(*sc), M_CARP, M_WAITOK|M_ZERO);
+
+ sc->sc_flags_backup = 0;
+ sc->sc_suppress = 0;
+ sc->sc_advbase = CARP_DFLTINTV;
+ sc->sc_vhid = -1; /* required setting */
+ sc->sc_advskew = 0;
+ sc->sc_init_counter = 1;
+ sc->sc_naddrs = sc->sc_naddrs6 = 0; /* M_ZERO? */
+#ifdef INET6
+ sc->sc_im6o.im6o_multicast_hlim = CARP_DFLTTL;
+#endif
+
+ callout_init(&sc->sc_ad_tmo, 0);
+ callout_init(&sc->sc_md_tmo, 0);
+ callout_init(&sc->sc_md6_tmo, 0);
+
+ ifp = &sc->sc_if;
+ ifp->if_softc = sc;
+ if_initname(ifp, CARP_IFNAME, unit);
+ ifp->if_mtu = ETHERMTU;
+ ifp->if_flags = 0;
+ ifp->if_ioctl = carp_ioctl;
+ ifp->if_output = carp_looutput;
+ ifp->if_start = carp_start;
+ ifp->if_type = IFT_CARP;
+ ifp->if_snd.ifq_maxlen = ifqmaxlen;
+ ifp->if_hdrlen = 0;
+ if_attach(ifp);
+ LIST_INSERT_HEAD(&carpif_list, sc, sc_next);
+ bpfattach(&sc->sc_if, DLT_LOOP, sizeof(u_int32_t));
+ return (0);
+}
+
+void
+carp_clone_destroy(struct ifnet *ifp)
+{
+ struct carp_softc *sc = ifp->if_softc;
+ struct carp_if *cif;
+ struct ip_moptions *imo = &sc->sc_imo;
+#ifdef INET6
+ struct ip6_moptions *im6o = &sc->sc_im6o;
+#endif
+
+/* carpdetach(sc); */
+
+ callout_stop(&sc->sc_ad_tmo);
+ callout_stop(&sc->sc_md_tmo);
+ callout_stop(&sc->sc_md6_tmo);
+
+ if (imo->imo_num_memberships) {
+ in_delmulti(imo->imo_membership[--imo->imo_num_memberships]);
+ imo->imo_multicast_ifp = NULL;
+ }
+#ifdef INET6
+ while (!LIST_EMPTY(&im6o->im6o_memberships)) {
+ struct in6_multi_mship *imm =
+ LIST_FIRST(&im6o->im6o_memberships);
+ LIST_REMOVE(imm, i6mm_chain);
+ in6_leavegroup(imm);
+ }
+ im6o->im6o_multicast_ifp = NULL;
+#endif
+
+ /* Remove ourself from parents if_carp queue */
+ if (sc->sc_ifp && (cif = sc->sc_ifp->if_carp)) {
+ CARP_LOCK(cif);
+ TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list);
+ if (!--cif->vhif_nvrs) {
+ sc->sc_ifp->if_carp = NULL;
+ CARP_LOCK_DESTROY(cif);
+ FREE(cif, M_CARP);
+ } else {
+ CARP_UNLOCK(cif);
+ }
+ }
+
+ bpfdetach(ifp);
+ if_detach(ifp);
+ LIST_REMOVE(sc, sc_next);
+ free(sc, M_CARP);
+}
+
+/*
+ * process input packet.
+ * we have rearranged checks order compared to the rfc,
+ * but it seems more efficient this way or not possible otherwise.
+ */
+void
+carp_input(struct mbuf *m, int hlen)
+{
+ struct carp_softc *sc = NULL;
+ struct ip *ip = mtod(m, struct ip *);
+ struct carp_header *ch;
+ int iplen, len;
+
+ carpstats.carps_ipackets++;
+
+ if (!carp_opts[CARPCTL_ALLOW]) {
+ m_freem(m);
+ return;
+ }
+
+ /* check if received on a valid carp interface */
+ if (m->m_pkthdr.rcvif->if_carp == NULL) {
+ carpstats.carps_badif++;
+ CARP_LOG(sc, ("packet received on non-carp interface: %s",
+ m->m_pkthdr.rcvif->if_xname));
+ m_freem(m);
+ return;
+ }
+
+ /* verify that the IP TTL is 255. */
+ if (ip->ip_ttl != CARP_DFLTTL) {
+ carpstats.carps_badttl++;
+ CARP_LOG(sc, ("received ttl %d != 255i on %s", ip->ip_ttl,
+ m->m_pkthdr.rcvif->if_xname));
+ m_freem(m);
+ return;
+ }
+
+ iplen = ip->ip_hl << 2;
+
+ if (m->m_pkthdr.len < iplen + sizeof(*ch)) {
+ carpstats.carps_badlen++;
+ CARP_LOG(sc, ("received len %d < sizeof(struct carp_header)",
+ m->m_len - sizeof(struct ip)));
+ m_freem(m);
+ return;
+ }
+
+ if (iplen + sizeof(*ch) < m->m_len) {
+ if ((m = m_pullup(m, iplen + sizeof(*ch))) == NULL) {
+ carpstats.carps_hdrops++;
+ /* CARP_LOG ? */
+ return;
+ }
+ ip = mtod(m, struct ip *);
+ }
+ ch = (struct carp_header *)((char *)ip + iplen);
+
+ /*
+ * verify that the received packet length is
+ * equal to the CARP header
+ */
+ len = iplen + sizeof(*ch);
+ if (len > m->m_pkthdr.len) {
+ carpstats.carps_badlen++;
+ CARP_LOG(sc, ("packet too short %d on %s", m->m_pkthdr.len,
+ m->m_pkthdr.rcvif->if_xname));
+ m_freem(m);
+ return;
+ }
+
+ if ((m = m_pullup(m, len)) == NULL) {
+ carpstats.carps_hdrops++;
+ return;
+ }
+ ip = mtod(m, struct ip *);
+ ch = (struct carp_header *)((char *)ip + iplen);
+
+ /* verify the CARP checksum */
+ m->m_data += iplen;
+ if (carp_cksum(m, len - iplen)) {
+ carpstats.carps_badsum++;
+ CARP_LOG(sc, ("checksum failed on %s",
+ m->m_pkthdr.rcvif->if_xname));
+ m_freem(m);
+ return;
+ }
+ m->m_data -= iplen;
+
+ carp_input_c(m, sc, ch, AF_INET);
+}
+
+#ifdef INET6
+int
+carp6_input(struct mbuf **mp, int *offp, int proto)
+{
+ struct mbuf *m = *mp;
+ struct carp_softc *sc = NULL;
+ struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
+ struct carp_header *ch;
+ u_int len;
+
+ carpstats.carps_ipackets6++;
+
+ if (!carp_opts[CARPCTL_ALLOW]) {
+ m_freem(m);
+ return (IPPROTO_DONE);
+ }
+
+ /* check if received on a valid carp interface */
+ if (m->m_pkthdr.rcvif->if_carp == NULL) {
+ carpstats.carps_badif++;
+ CARP_LOG(sc, ("packet received on non-carp interface: %s",
+ m->m_pkthdr.rcvif->if_xname));
+ m_freem(m);
+ return (IPPROTO_DONE);
+ }
+
+ /* verify that the IP TTL is 255 */
+ if (ip6->ip6_hlim != CARP_DFLTTL) {
+ carpstats.carps_badttl++;
+ CARP_LOG(sc, ("received ttl %d != 255 on %s", ip6->ip6_hlim,
+ m->m_pkthdr.rcvif->if_xname));
+ m_freem(m);
+ return (IPPROTO_DONE);
+ }
+
+ /* verify that we have a complete carp packet */
+ len = m->m_len;
+ IP6_EXTHDR_GET(ch, struct carp_header *, m, *offp, sizeof(*ch));
+ if (ch == NULL) {
+ carpstats.carps_badlen++;
+ CARP_LOG(sc, ("packet size %u too small on %s", len,
+ m->m_pkthdr.rcvif->if_xname));
+ return (IPPROTO_DONE);
+ }
+
+
+ /* verify the CARP checksum */
+ m->m_data += *offp;
+ if (carp_cksum(m, sizeof(*ch))) {
+ carpstats.carps_badsum++;
+ CARP_LOG(sc, ("checksum failed, on %s",
+ m->m_pkthdr.rcvif->if_xname));
+ m_freem(m);
+ return (IPPROTO_DONE);
+ }
+ m->m_data -= *offp;
+
+ carp_input_c(m, sc, ch, AF_INET6);
+ return (IPPROTO_DONE);
+}
+#endif /* INET6 */
+
+void
+carp_input_c(struct mbuf *m, struct carp_softc *sc,
+ struct carp_header *ch, sa_family_t af)
+{
+ struct ifnet *ifp = m->m_pkthdr.rcvif;
+ u_int64_t tmp_counter;
+ struct timeval sc_tv, ch_tv;
+
+ /* verify that the VHID is valid on the receiving interface */
+ CARP_LOCK(ifp->if_carp);
+ TAILQ_FOREACH(sc, &((struct carp_if *)ifp->if_carp)->vhif_vrs, sc_list)
+ if (sc->sc_vhid == ch->carp_vhid)
+ break;
+ CARP_UNLOCK(ifp->if_carp);
+ if (!sc || (sc->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) !=
+ (IFF_UP|IFF_RUNNING)) {
+ carpstats.carps_badvhid++;
+ m_freem(m);
+ return;
+ }
+
+ getmicrotime(&sc->sc_if.if_lastchange);
+ sc->sc_if.if_ipackets++;
+ sc->sc_if.if_ibytes += m->m_pkthdr.len;
+
+ if (sc->sc_if.if_bpf) {
+ /*
+ * We need to prepend the address family as
+ * a four byte field. Cons up a dummy header
+ * to pacify bpf. This is safe because bpf
+ * will only read from the mbuf (i.e., it won't
+ * try to free it or keep a pointer to it).
+ */
+ struct mbuf m0;
+ struct ip *ip = mtod(m, struct ip *);
+ u_int32_t maf = htonl(af);
+
+ m0.m_next = m;
+ m0.m_len = sizeof(maf);
+ m0.m_data = (char *)&maf;
+ /* BPF wants net byte order */
+ ip->ip_len = htonl(ip->ip_len);
+ ip->ip_off = htonl(ip->ip_off);
+ BPF_MTAP(&sc->sc_if, &m0);
+ }
+
+ /* verify the CARP version. */
+ if (ch->carp_version != CARP_VERSION) {
+ carpstats.carps_badver++;
+ sc->sc_if.if_ierrors++;
+ CARP_LOG(sc, ("invalid version %d", ch->carp_version));
+ m_freem(m);
+ return;
+ }
+
+ /* verify the hash */
+ if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) {
+ carpstats.carps_badauth++;
+ sc->sc_if.if_ierrors++;
+ CARP_LOG(sc, ("incorrect hash"));
+ m_freem(m);
+ return;
+ }
+
+ tmp_counter = ntohl(ch->carp_counter[0]);
+ tmp_counter = tmp_counter<<32;
+ tmp_counter += ntohl(ch->carp_counter[1]);
+
+ /* XXX Replay protection goes here */
+
+ sc->sc_init_counter = 0;
+ sc->sc_counter = tmp_counter;
+
+ sc_tv.tv_sec = sc->sc_advbase;
+ if (carp_suppress_preempt && sc->sc_advskew < 240)
+ sc_tv.tv_usec = 240 * 1000000 / 256;
+ else
+ sc_tv.tv_usec = sc->sc_advskew * 1000000 / 256;
+ ch_tv.tv_sec = ch->carp_advbase;
+ ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256;
+
+ switch (sc->sc_state) {
+ case INIT:
+ break;
+ case MASTER:
+ /*
+ * If we receive an advertisement from a master who's going to
+ * be more frequent than us, go into BACKUP state.
+ */
+ if (timevalcmp(&sc_tv, &ch_tv, >) ||
+ timevalcmp(&sc_tv, &ch_tv, ==)) {
+ callout_stop(&sc->sc_ad_tmo);
+ carp_set_state(sc, BACKUP);
+ carp_setrun(sc, 0);
+ carp_setroute(sc, RTM_DELETE);
+ }
+ break;
+ case BACKUP:
+ /*
+ * If we're pre-empting masters who advertise slower than us,
+ * and this one claims to be slower, treat him as down.
+ */
+ if (carp_opts[CARPCTL_PREEMPT] &&
+ timevalcmp(&sc_tv, &ch_tv, <)) {
+ carp_master_down(sc);
+ break;
+ }
+
+ /*
+ * If the master is going to advertise at such a low frequency
+ * that he's guaranteed to time out, we'd might as well just
+ * treat him as timed out now.
+ */
+ sc_tv.tv_sec = sc->sc_advbase * 3;
+ if (timevalcmp(&sc_tv, &ch_tv, <)) {
+ carp_master_down(sc);
+ break;
+ }
+
+ /*
+ * Otherwise, we reset the counter and wait for the next
+ * advertisement.
+ */
+ carp_setrun(sc, af);
+ break;
+ }
+
+ m_freem(m);
+ return;
+}
+
+void
+carpdetach(struct carp_softc *sc)
+{
+ struct ifaddr *ifa;
+
+ callout_stop(&sc->sc_ad_tmo);
+ callout_stop(&sc->sc_md_tmo);
+ callout_stop(&sc->sc_md6_tmo);
+
+ while ((ifa = TAILQ_FIRST(&sc->sc_if.if_addrlist)) != NULL)
+ if (ifa->ifa_addr->sa_family == AF_INET) {
+ struct in_ifaddr *ia = ifatoia(ifa);
+
+ carp_del_addr(sc, &ia->ia_addr);
+
+ /* ripped screaming from in_control(SIOCDIFADDR) */
+ in_ifscrub(&sc->sc_if, ia);
+ TAILQ_REMOVE(&sc->sc_if.if_addrlist, ifa, ifa_link);
+ TAILQ_REMOVE(&in_ifaddrhead, ia, ia_link);
+ IFAFREE((&ia->ia_ifa));
+ }
+}
+
+/* Detach an interface from the carp. */
+void
+carp_ifdetach(struct ifnet *ifp)
+{
+ struct carp_softc *sc;
+ struct carp_if *cif = (struct carp_if *)ifp->if_carp;
+
+ CARP_LOCK(cif);
+ TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list)
+ carpdetach(sc);
+ CARP_UNLOCK(cif);
+}
+
+int
+carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch)
+{
+ struct m_tag *mtag;
+ struct ifnet *ifp = &sc->sc_if;
+
+ if (sc->sc_init_counter) {
+ /* this could also be seconds since unix epoch */
+ sc->sc_counter = arc4random();
+ sc->sc_counter = sc->sc_counter << 32;
+ sc->sc_counter += arc4random();
+ } else
+ sc->sc_counter++;
+
+ ch->carp_counter[0] = htonl((sc->sc_counter>>32)&0xffffffff);
+ ch->carp_counter[1] = htonl(sc->sc_counter&0xffffffff);
+
+ carp_hmac_generate(sc, ch->carp_counter, ch->carp_md);
+
+ /* Tag packet for carp_output */
+ mtag = m_tag_get(PACKET_TAG_CARP, sizeof(struct ifnet *), M_NOWAIT);
+ if (mtag == NULL) {
+ m_freem(m);
+ sc->sc_if.if_oerrors++;
+ return (ENOMEM);
+ }
+ bcopy(&ifp, (caddr_t)(mtag + 1), sizeof(struct ifnet *));
+ m_tag_prepend(m, mtag);
+
+ return (0);
+}
+
+void
+carp_send_ad_all(void)
+{
+ struct ifnet *ifp;
+ struct carp_if *cif;
+ struct carp_softc *vh;
+
+ TAILQ_FOREACH(ifp, &ifnet, if_list) {
+ if (ifp->if_carp == NULL)
+ continue;
+
+ cif = (struct carp_if *)ifp->if_carp;
+ CARP_LOCK(cif);
+ TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
+ if ((vh->sc_ac.ac_if.if_flags & (IFF_UP|IFF_RUNNING)) &&
+ vh->sc_state == MASTER)
+ carp_send_ad(vh);
+ }
+ CARP_UNLOCK(cif);
+ }
+}
+
+void
+carp_send_ad(void *v)
+{
+ struct carp_header ch;
+ struct timeval tv;
+ struct carp_softc *sc = v;
+ struct carp_header *ch_ptr;
+ struct mbuf *m;
+ int len, advbase, advskew;
+
+ /* bow out if we've lost our UPness or RUNNINGuiness */
+ if ((sc->sc_if.if_flags &
+ (IFF_UP|IFF_RUNNING)) != (IFF_UP|IFF_RUNNING)) {
+ advbase = 255;
+ advskew = 255;
+ } else {
+ advbase = sc->sc_advbase;
+ if (!carp_suppress_preempt || sc->sc_advskew > 240)
+ advskew = sc->sc_advskew;
+ else
+ advskew = 240;
+ tv.tv_sec = advbase;
+ tv.tv_usec = advskew * 1000000 / 256;
+ }
+
+ ch.carp_version = CARP_VERSION;
+ ch.carp_type = CARP_ADVERTISEMENT;
+ ch.carp_vhid = sc->sc_vhid;
+ ch.carp_advbase = advbase;
+ ch.carp_advskew = advskew;
+ ch.carp_authlen = 7; /* XXX DEFINE */
+ ch.carp_pad1 = 0; /* must be zero */
+ ch.carp_cksum = 0;
+
+#ifdef INET
+ if (sc->sc_ia) {
+ struct ip *ip;
+
+ MGETHDR(m, M_DONTWAIT, MT_HEADER);
+ if (m == NULL) {
+ sc->sc_ac.ac_if.if_oerrors++;
+ carpstats.carps_onomem++;
+ /* XXX maybe less ? */
+ if (advbase != 255 || advskew != 255)
+ callout_reset(&sc->sc_ad_tmo, tvtohz(&tv),
+ carp_send_ad, sc);
+ return;
+ }
+ len = sizeof(*ip) + sizeof(ch);
+ m->m_pkthdr.len = len;
+ m->m_pkthdr.rcvif = NULL;
+ m->m_len = len;
+ MH_ALIGN(m, m->m_len);
+ m->m_flags |= M_MCAST;
+ ip = mtod(m, struct ip *);
+ ip->ip_v = IPVERSION;
+ ip->ip_hl = sizeof(*ip) >> 2;
+ ip->ip_tos = IPTOS_LOWDELAY;
+ ip->ip_len = len;
+ ip->ip_id = ip_newid();
+ ip->ip_off = IP_DF;
+ ip->ip_ttl = CARP_DFLTTL;
+ ip->ip_p = IPPROTO_CARP;
+ ip->ip_sum = 0;
+ ip->ip_src.s_addr = sc->sc_ia->ia_addr.sin_addr.s_addr;
+ ip->ip_dst.s_addr = htonl(INADDR_CARP_GROUP);
+
+ ch_ptr = (struct carp_header *)(&ip[1]);
+ bcopy(&ch, ch_ptr, sizeof(ch));
+ if (carp_prepare_ad(m, sc, ch_ptr))
+ return;
+
+ m->m_data += sizeof(*ip);
+ ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip));
+ m->m_data -= sizeof(*ip);
+
+ getmicrotime(&sc->sc_if.if_lastchange);
+ sc->sc_ac.ac_if.if_opackets++;
+ sc->sc_ac.ac_if.if_obytes += len;
+ carpstats.carps_opackets++;
+
+ if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL)) {
+ sc->sc_if.if_oerrors++;
+ if (sc->sc_sendad_errors < INT_MAX)
+ sc->sc_sendad_errors++;
+ if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
+ carp_suppress_preempt++;
+ if (carp_suppress_preempt == 1)
+ carp_send_ad_all();
+ }
+ sc->sc_sendad_success = 0;
+ } else {
+ if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
+ if (++sc->sc_sendad_success >=
+ CARP_SENDAD_MIN_SUCCESS) {
+ carp_suppress_preempt--;
+ sc->sc_sendad_errors = 0;
+ }
+ } else
+ sc->sc_sendad_errors = 0;
+ }
+ }
+#endif /* INET */
+#ifdef INET6
+ if (sc->sc_ia6) {
+ struct ip6_hdr *ip6;
+
+ MGETHDR(m, M_DONTWAIT, MT_HEADER);
+ if (m == NULL) {
+ sc->sc_ac.ac_if.if_oerrors++;
+ carpstats.carps_onomem++;
+ /* XXX maybe less ? */
+ if (advbase != 255 || advskew != 255)
+ callout_reset(&sc->sc_ad_tmo, tvtohz(&tv),
+ carp_send_ad, sc);
+ return;
+ }
+ len = sizeof(*ip6) + sizeof(ch);
+ m->m_pkthdr.len = len;
+ m->m_pkthdr.rcvif = NULL;
+ m->m_len = len;
+ MH_ALIGN(m, m->m_len);
+ m->m_flags |= M_MCAST;
+ ip6 = mtod(m, struct ip6_hdr *);
+ bzero(ip6, sizeof(*ip6));
+ ip6->ip6_vfc |= IPV6_VERSION;
+ ip6->ip6_hlim = CARP_DFLTTL;
+ ip6->ip6_nxt = IPPROTO_CARP;
+ bcopy(&sc->sc_ia6->ia_addr.sin6_addr, &ip6->ip6_src,
+ sizeof(struct in6_addr));
+ /* set the multicast destination */
+
+ ip6->ip6_dst.s6_addr8[0] = 0xff;
+ ip6->ip6_dst.s6_addr8[1] = 0x02;
+ ip6->ip6_dst.s6_addr8[15] = 0x12;
+
+ ch_ptr = (struct carp_header *)(&ip6[1]);
+ bcopy(&ch, ch_ptr, sizeof(ch));
+ if (carp_prepare_ad(m, sc, ch_ptr))
+ return;
+
+ m->m_data += sizeof(*ip6);
+ ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip6));
+ m->m_data -= sizeof(*ip6);
+
+ getmicrotime(&sc->sc_if.if_lastchange);
+ sc->sc_if.if_opackets++;
+ sc->sc_if.if_obytes += len;
+ carpstats.carps_opackets6++;
+
+ if (ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL, NULL)) {
+ sc->sc_if.if_oerrors++;
+ if (sc->sc_sendad_errors < INT_MAX)
+ sc->sc_sendad_errors++;
+ if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
+ carp_suppress_preempt++;
+ if (carp_suppress_preempt == 1)
+ carp_send_ad_all();
+ }
+ sc->sc_sendad_success = 0;
+ } else {
+ if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
+ if (++sc->sc_sendad_success >=
+ CARP_SENDAD_MIN_SUCCESS) {
+ carp_suppress_preempt--;
+ sc->sc_sendad_errors = 0;
+ }
+ } else
+ sc->sc_sendad_errors = 0;
+ }
+ }
+#endif /* INET6 */
+
+ if (advbase != 255 || advskew != 255)
+ callout_reset(&sc->sc_ad_tmo, tvtohz(&tv),
+ carp_send_ad, sc);
+
+}
+
+/*
+ * Broadcast a gratuitous ARP request containing
+ * the virtual router MAC address for each IP address
+ * associated with the virtual router.
+ */
+void
+carp_send_arp(struct carp_softc *sc)
+{
+ struct ifaddr *ifa;
+
+ TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) {
+
+ if (ifa->ifa_addr->sa_family != AF_INET)
+ continue;
+
+/* arprequest(sc->sc_ifp, &in, &in, sc->sc_ac.ac_enaddr); */
+ arp_ifinit2(sc->sc_ifp, ifa, sc->sc_ac.ac_enaddr);
+
+ DELAY(1000); /* XXX */
+ }
+}
+
+#ifdef INET6
+void
+carp_send_na(struct carp_softc *sc)
+{
+ struct ifaddr *ifa;
+ struct in6_addr *in6;
+ static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
+
+ TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) {
+
+ if (ifa->ifa_addr->sa_family != AF_INET6)
+ continue;
+
+ in6 = &ifatoia6(ifa)->ia_addr.sin6_addr;
+ nd6_na_output(sc->sc_ifp, &mcast, in6,
+ ND_NA_FLAG_OVERRIDE, 1, NULL);
+ DELAY(1000); /* XXX */
+ }
+}
+#endif /* INET6 */
+
+int
+carp_addrcount(struct carp_if *cif, struct in_ifaddr *ia, int type)
+{
+ struct carp_softc *vh;
+ struct ifaddr *ifa;
+ int count = 0;
+
+ TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
+ if ((type == CARP_COUNT_RUNNING &&
+ (vh->sc_ac.ac_if.if_flags & (IFF_UP|IFF_RUNNING)) ==
+ (IFF_UP|IFF_RUNNING)) ||
+ (type == CARP_COUNT_MASTER && vh->sc_state == MASTER)) {
+ TAILQ_FOREACH(ifa, &vh->sc_ac.ac_if.if_addrlist,
+ ifa_list) {
+ if (ifa->ifa_addr->sa_family == AF_INET &&
+ ia->ia_addr.sin_addr.s_addr ==
+ ifatoia(ifa)->ia_addr.sin_addr.s_addr)
+ count++;
+ }
+ }
+ }
+ return (count);
+}
+
+int
+carp_iamatch(void *v, struct in_ifaddr *ia,
+ struct in_addr *isaddr, u_int8_t **enaddr)
+{
+ struct carp_if *cif = v;
+ struct carp_softc *vh;
+ int index, count = 0;
+ struct ifaddr *ifa;
+
+ CARP_LOCK(cif);
+
+ if (carp_opts[CARPCTL_ARPBALANCE]) {
+ /*
+ * XXX proof of concept implementation.
+ * We use the source ip to decide which virtual host should
+ * handle the request. If we're master of that virtual host,
+ * then we respond, otherwise, just drop the arp packet on
+ * the floor.
+ */
+ count = carp_addrcount(cif, ia, CARP_COUNT_RUNNING);
+ if (count == 0) {
+ /* should never reach this */
+ CARP_UNLOCK(cif);
+ return (0);
+ }
+
+ /* this should be a hash, like pf_hash() */
+ index = isaddr->s_addr % count;
+ count = 0;
+
+ TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
+ if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) ==
+ (IFF_UP|IFF_RUNNING)) {
+ TAILQ_FOREACH(ifa, &vh->sc_if.if_addrlist,
+ ifa_list) {
+ if (ifa->ifa_addr->sa_family ==
+ AF_INET &&
+ ia->ia_addr.sin_addr.s_addr ==
+ ifatoia(ifa)->ia_addr.sin_addr.s_addr) {
+ if (count == index) {
+ if (vh->sc_state ==
+ MASTER) {
+ *enaddr = vh->sc_ac.ac_enaddr;
+ CARP_UNLOCK(cif);
+ return (1);
+ } else {
+ CARP_UNLOCK(cif);
+ return (0);
+ }
+ }
+ count++;
+ }
+ }
+ }
+ }
+ } else {
+ TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
+ if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) ==
+ (IFF_UP|IFF_RUNNING) && ia->ia_ifp ==
+ &vh->sc_if) {
+ *enaddr = vh->sc_ac.ac_enaddr;
+ CARP_UNLOCK(cif);
+ return (1);
+ }
+ }
+ }
+ CARP_UNLOCK(cif);
+ return (0);
+}
+
+#ifdef INET6
+struct ifaddr *
+carp_iamatch6(void *v, struct in6_addr *taddr)
+{
+ struct carp_if *cif = v;
+ struct carp_softc *vh;
+ struct ifaddr *ifa;
+
+ CARP_LOCK(cif);
+ TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
+ TAILQ_FOREACH(ifa, &vh->sc_if.if_addrlist, ifa_list) {
+ if (IN6_ARE_ADDR_EQUAL(taddr,
+ &ifatoia6(ifa)->ia_addr.sin6_addr) &&
+ ((vh->sc_if.if_flags &
+ (IFF_UP|IFF_RUNNING)) == (IFF_UP|IFF_RUNNING))) {
+ CARP_UNLOCK(cif);
+ return (ifa);
+ }
+ }
+ }
+ CARP_UNLOCK(cif);
+
+ return (NULL);
+}
+
+void *
+carp_macmatch6(void *v, struct mbuf *m, const struct in6_addr *taddr)
+{
+ struct m_tag *mtag;
+ struct carp_if *cif = v;
+ struct carp_softc *sc;
+ struct ifaddr *ifa;
+
+ CARP_LOCK(cif);
+ TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) {
+ TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) {
+ if (IN6_ARE_ADDR_EQUAL(taddr,
+ &ifatoia6(ifa)->ia_addr.sin6_addr) &&
+ ((sc->sc_if.if_flags &
+ (IFF_UP|IFF_RUNNING)) == (IFF_UP|IFF_RUNNING))) {
+ struct ifnet *ifp = &sc->sc_if;
+ mtag = m_tag_get(PACKET_TAG_CARP,
+ sizeof(struct ifnet *), M_NOWAIT);
+ if (mtag == NULL) {
+ /* better a bit than nothing */
+ CARP_UNLOCK(cif);
+ return (sc->sc_ac.ac_enaddr);
+ }
+ bcopy(&ifp, (caddr_t)(mtag + 1),
+ sizeof(struct ifnet *));
+ m_tag_prepend(m, mtag);
+
+ CARP_UNLOCK(cif);
+ return (sc->sc_ac.ac_enaddr);
+ }
+ }
+ }
+ CARP_UNLOCK(cif);
+
+ return (NULL);
+}
+#endif
+
+struct ifnet *
+carp_forus(void *v, void *dhost)
+{
+ struct carp_if *cif = v;
+ struct carp_softc *vh;
+ u_int8_t *ena = dhost;
+
+ if (ena[0] || ena[1] || ena[2] != 0x5e || ena[3] || ena[4] != 1)
+ return (NULL);
+
+ CARP_LOCK(cif);
+ TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list)
+ if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) ==
+ (IFF_UP|IFF_RUNNING) && vh->sc_state == MASTER &&
+ !bcmp(dhost, vh->sc_ac.ac_enaddr, ETHER_ADDR_LEN)) {
+ CARP_UNLOCK(cif);
+ return (&vh->sc_if);
+ }
+
+ CARP_UNLOCK(cif);
+ return (NULL);
+}
+
+void
+carp_master_down(void *v)
+{
+ struct carp_softc *sc = v;
+
+ switch (sc->sc_state) {
+ case INIT:
+ printf("%s: master_down event in INIT state\n",
+ sc->sc_if.if_xname);
+ break;
+ case MASTER:
+ break;
+ case BACKUP:
+ carp_set_state(sc, MASTER);
+ carp_send_ad(sc);
+ carp_send_arp(sc);
+#ifdef INET6
+ carp_send_na(sc);
+#endif /* INET6 */
+ carp_setrun(sc, 0);
+ carp_setroute(sc, RTM_ADD);
+ break;
+ }
+}
+
+/*
+ * When in backup state, af indicates whether to reset the master down timer
+ * for v4 or v6. If it's set to zero, reset the ones which are already pending.
+ */
+void
+carp_setrun(struct carp_softc *sc, sa_family_t af)
+{
+ struct timeval tv;
+
+ if (sc->sc_if.if_flags & IFF_UP &&
+ sc->sc_vhid > 0 && (sc->sc_naddrs || sc->sc_naddrs6))
+ sc->sc_if.if_flags |= IFF_RUNNING;
+ else {
+ sc->sc_if.if_flags &= ~IFF_RUNNING;
+ carp_setroute(sc, RTM_DELETE);
+ return;
+ }
+
+ switch (sc->sc_state) {
+ case INIT:
+ if (carp_opts[CARPCTL_PREEMPT] && !carp_suppress_preempt) {
+ carp_send_ad(sc);
+ carp_send_arp(sc);
+#ifdef INET6
+ carp_send_na(sc);
+#endif /* INET6 */
+ carp_set_state(sc, MASTER);
+ carp_setroute(sc, RTM_ADD);
+ } else {
+ carp_set_state(sc, BACKUP);
+ carp_setroute(sc, RTM_DELETE);
+ carp_setrun(sc, 0);
+ }
+ break;
+ case BACKUP:
+ callout_stop(&sc->sc_ad_tmo);
+ tv.tv_sec = 3 * sc->sc_advbase;
+ tv.tv_usec = sc->sc_advskew * 1000000 / 256;
+ switch (af) {
+#ifdef INET
+ case AF_INET:
+ callout_reset(&sc->sc_md_tmo, tvtohz(&tv),
+ carp_master_down, sc);
+ break;
+#endif /* INET */
+#ifdef INET6
+ case AF_INET6:
+ callout_reset(&sc->sc_md6_tmo, tvtohz(&tv),
+ carp_master_down, sc);
+ break;
+#endif /* INET6 */
+ default:
+ if (sc->sc_naddrs)
+ callout_reset(&sc->sc_md_tmo, tvtohz(&tv),
+ carp_master_down, sc);
+ if (sc->sc_naddrs6)
+ callout_reset(&sc->sc_md6_tmo, tvtohz(&tv),
+ carp_master_down, sc);
+ break;
+ }
+ break;
+ case MASTER:
+ tv.tv_sec = sc->sc_advbase;
+ tv.tv_usec = sc->sc_advskew * 1000000 / 256;
+ callout_reset(&sc->sc_ad_tmo, tvtohz(&tv),
+ carp_send_ad, sc);
+ break;
+ }
+}
+
+int
+carp_set_addr(struct carp_softc *sc, struct sockaddr_in *sin)
+{
+ struct ifnet *ifp;
+ struct carp_if *cif;
+ struct in_ifaddr *ia, *ia_if;
+ struct ip_moptions *imo = &sc->sc_imo;
+ struct in_addr addr;
+ u_long iaddr = htonl(sin->sin_addr.s_addr);
+ int own, error;
+
+ if (sin->sin_addr.s_addr == 0) {
+ if (!(sc->sc_if.if_flags & IFF_UP))
+ carp_set_state(sc, INIT);
+ if (sc->sc_naddrs)
+ sc->sc_if.if_flags |= IFF_UP;
+ carp_setrun(sc, 0);
+ return (0);
+ }
+
+ /* we have to do it by hands to check we won't match on us */
+ ia_if = NULL; own = 0;
+ TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) {
+ /* and, yeah, we need a multicast-capable iface too */
+ if (ia->ia_ifp != &sc->sc_if &&
+ (ia->ia_ifp->if_flags & IFF_MULTICAST) &&
+ (iaddr & ia->ia_subnetmask) == ia->ia_subnet) {
+ if (!ia_if)
+ ia_if = ia;
+ if (sin->sin_addr.s_addr ==
+ ia->ia_addr.sin_addr.s_addr)
+ own++;
+ }
+ }
+
+ if (!ia_if)
+ return (EADDRNOTAVAIL);
+
+ ia = ia_if;
+ ifp = ia->ia_ifp;
+
+ if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0 ||
+ (imo->imo_multicast_ifp && imo->imo_multicast_ifp != ifp))
+ return (EADDRNOTAVAIL);
+
+ if (imo->imo_num_memberships == 0) {
+ addr.s_addr = htonl(INADDR_CARP_GROUP);
+ if ((imo->imo_membership[0] = in_addmulti(&addr, ifp)) == NULL)
+ return (ENOBUFS);
+ imo->imo_num_memberships++;
+ imo->imo_multicast_ifp = ifp;
+ imo->imo_multicast_ttl = CARP_DFLTTL;
+ imo->imo_multicast_loop = 0;
+ }
+
+ if (!ifp->if_carp) {
+
+ MALLOC(cif, struct carp_if *, sizeof(*cif), M_CARP,
+ M_WAITOK|M_ZERO);
+ if (!cif) {
+ error = ENOBUFS;
+ goto cleanup;
+ }
+ if ((error = ifpromisc(ifp, 1))) {
+ FREE(cif, M_CARP);
+ goto cleanup;
+ }
+
+ CARP_LOCK_INIT(cif);
+ CARP_LOCK(cif);
+ cif->vhif_ifp = ifp;
+ TAILQ_INIT(&cif->vhif_vrs);
+ ifp->if_carp = cif;
+
+ } else {
+ struct carp_softc *vr;
+
+ cif = (struct carp_if *)ifp->if_carp;
+ CARP_LOCK(cif);
+ TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list)
+ if (vr != sc && vr->sc_vhid == sc->sc_vhid) {
+ CARP_UNLOCK(cif);
+ error = EINVAL;
+ goto cleanup;
+ }
+ }
+ sc->sc_ia = ia;
+ sc->sc_ifp = ifp;
+
+ { /* XXX prevent endless loop if already in queue */
+ struct carp_softc *vr, *after = NULL;
+ int myself = 0;
+ cif = (struct carp_if *)ifp->if_carp;
+
+ /* XXX: cif should not change, right? So we still hold the lock */
+ CARP_LOCK_ASSERT(cif);
+
+ TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
+ if (vr == sc)
+ myself = 1;
+ if (vr->sc_vhid < sc->sc_vhid)
+ after = vr;
+ }
+
+ if (!myself) {
+ /* We're trying to keep things in order */
+ if (after == NULL) {
+ TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list);
+ } else {
+ TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list);
+ }
+ cif->vhif_nvrs++;
+ }
+ }
+
+ CARP_UNLOCK(cif);
+
+ sc->sc_naddrs++;
+ sc->sc_if.if_flags |= IFF_UP;
+ if (own)
+ sc->sc_advskew = 0;
+ carp_set_state(sc, INIT);
+ carp_setrun(sc, 0);
+
+ return (0);
+
+cleanup:
+ in_delmulti(imo->imo_membership[--imo->imo_num_memberships]);
+ return (error);
+}
+
+int
+carp_del_addr(struct carp_softc *sc, struct sockaddr_in *sin)
+{
+ int error = 0;
+
+ if (!--sc->sc_naddrs) {
+ struct carp_if *cif = (struct carp_if *)sc->sc_ifp->if_carp;
+ struct ip_moptions *imo = &sc->sc_imo;
+
+ callout_stop(&sc->sc_ad_tmo);
+ sc->sc_if.if_flags &= ~(IFF_UP|IFF_RUNNING);
+ sc->sc_vhid = -1;
+ in_delmulti(imo->imo_membership[--imo->imo_num_memberships]);
+ imo->imo_multicast_ifp = NULL;
+ CARP_LOCK(cif);
+ TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list);
+ if (!--cif->vhif_nvrs) {
+ sc->sc_ifp->if_carp = NULL;
+ CARP_LOCK_DESTROY(cif);
+ FREE(cif, M_IFADDR);
+ } else {
+ CARP_UNLOCK(cif);
+ }
+ }
+
+ return (error);
+}
+
+#ifdef INET6
+int
+carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6)
+{
+ struct ifnet *ifp;
+ struct carp_if *cif;
+ struct in6_ifaddr *ia, *ia_if;
+ struct ip6_moptions *im6o = &sc->sc_im6o;
+ struct in6_multi_mship *imm;
+ struct sockaddr_in6 addr;
+ int own, error;
+
+ if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
+ if (!(sc->sc_if.if_flags & IFF_UP))
+ carp_set_state(sc, INIT);
+ if (sc->sc_naddrs6)
+ sc->sc_if.if_flags |= IFF_UP;
+ carp_setrun(sc, 0);
+ return (0);
+ }
+
+ /* we have to do it by hands to check we won't match on us */
+ ia_if = NULL; own = 0;
+ for (ia = in6_ifaddr; ia; ia = ia->ia_next) {
+ int i;
+
+ for (i = 0; i < 4; i++) {
+ if ((sin6->sin6_addr.s6_addr32[i] &
+ ia->ia_prefixmask.sin6_addr.s6_addr32[i]) !=
+ (ia->ia_addr.sin6_addr.s6_addr32[i] &
+ ia->ia_prefixmask.sin6_addr.s6_addr32[i]))
+ break;
+ }
+ /* and, yeah, we need a multicast-capable iface too */
+ if (ia->ia_ifp != &sc->sc_ac.ac_if &&
+ (ia->ia_ifp->if_flags & IFF_MULTICAST) &&
+ (i == 4)) {
+ if (!ia_if)
+ ia_if = ia;
+ if (IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr,
+ &ia->ia_addr.sin6_addr))
+ own++;
+ }
+ }
+
+ if (!ia_if)
+ return (EADDRNOTAVAIL);
+ ia = ia_if;
+ ifp = ia->ia_ifp;
+
+ if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0 ||
+ (im6o->im6o_multicast_ifp && im6o->im6o_multicast_ifp != ifp))
+ return (EADDRNOTAVAIL);
+
+ if (!sc->sc_naddrs6) {
+ im6o->im6o_multicast_ifp = ifp;
+
+ /* join CARP multicast address */
+ bzero(&addr, sizeof(addr));
+ addr.sin6_family = AF_INET6;
+ addr.sin6_len = sizeof(addr);
+ addr.sin6_addr.s6_addr16[0] = htons(0xff02);
+ addr.sin6_addr.s6_addr16[1] = htons(ifp->if_index);
+ addr.sin6_addr.s6_addr8[15] = 0x12;
+ if ((imm = in6_joingroup(ifp, &addr.sin6_addr, &error)) == NULL)
+ goto cleanup;
+ LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain);
+
+ /* join solicited multicast address */
+ bzero(&addr.sin6_addr, sizeof(addr.sin6_addr));
+ addr.sin6_addr.s6_addr16[0] = htons(0xff02);
+ addr.sin6_addr.s6_addr16[1] = htons(ifp->if_index);
+ addr.sin6_addr.s6_addr32[1] = 0;
+ addr.sin6_addr.s6_addr32[2] = htonl(1);
+ addr.sin6_addr.s6_addr32[3] = sin6->sin6_addr.s6_addr32[3];
+ addr.sin6_addr.s6_addr8[12] = 0xff;
+ if ((imm = in6_joingroup(ifp, &addr.sin6_addr, &error)) == NULL)
+ goto cleanup;
+ LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain);
+ }
+
+ if (!ifp->if_carp) {
+ MALLOC(cif, struct carp_if *, sizeof(*cif), M_CARP,
+ M_WAITOK|M_ZERO);
+ if (!cif) {
+ error = ENOBUFS;
+ goto cleanup;
+ }
+ if ((error = ifpromisc(ifp, 1))) {
+ FREE(cif, M_CARP);
+ goto cleanup;
+ }
+
+ CARP_LOCK_INIT(cif);
+ CARP_LOCK(cif);
+ cif->vhif_ifp = ifp;
+ TAILQ_INIT(&cif->vhif_vrs);
+ ifp->if_carp = cif;
+
+ } else {
+ struct carp_softc *vr;
+
+ cif = (struct carp_if *)ifp->if_carp;
+ CARP_LOCK(cif);
+ TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list)
+ if (vr != sc && vr->sc_vhid == sc->sc_vhid) {
+ CARP_UNLOCK(cif);
+ error = EINVAL;
+ goto cleanup;
+ }
+ }
+ sc->sc_ia6 = ia;
+ sc->sc_ifp = ifp;
+
+ { /* XXX prevent endless loop if already in queue */
+ struct carp_softc *vr, *after = NULL;
+ int myself = 0;
+ cif = (struct carp_if *)ifp->if_carp;
+ CARP_LOCK_ASSERT(cif);
+
+ TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
+ if (vr == sc)
+ myself = 1;
+ if (vr->sc_vhid < sc->sc_vhid)
+ after = vr;
+ }
+
+ if (!myself) {
+ /* We're trying to keep things in order */
+ if (after == NULL) {
+ TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list);
+ } else {
+ TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list);
+ }
+ cif->vhif_nvrs++;
+ }
+ }
+
+ CARP_UNLOCK(cif);
+ sc->sc_naddrs6++;
+ sc->sc_ac.ac_if.if_flags |= IFF_UP;
+ if (own)
+ sc->sc_advskew = 0;
+ carp_set_state(sc, INIT);
+ carp_setrun(sc, 0);
+
+ return (0);
+
+cleanup:
+ /* clean up multicast memberships */
+ if (!sc->sc_naddrs6) {
+ while (!LIST_EMPTY(&im6o->im6o_memberships)) {
+ imm = LIST_FIRST(&im6o->im6o_memberships);
+ LIST_REMOVE(imm, i6mm_chain);
+ in6_leavegroup(imm);
+ }
+ }
+ return (error);
+}
+
+int
+carp_del_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6)
+{
+ int error = 0;
+
+ if (!--sc->sc_naddrs6) {
+ struct carp_if *cif = (struct carp_if *)sc->sc_ifp->if_carp;
+ struct ip6_moptions *im6o = &sc->sc_im6o;
+
+ callout_stop(&sc->sc_ad_tmo);
+ sc->sc_ac.ac_if.if_flags &= ~(IFF_UP|IFF_RUNNING);
+ sc->sc_vhid = -1;
+ CARP_LOCK(cif);
+ while (!LIST_EMPTY(&im6o->im6o_memberships)) {
+ struct in6_multi_mship *imm =
+ LIST_FIRST(&im6o->im6o_memberships);
+
+ LIST_REMOVE(imm, i6mm_chain);
+ in6_leavegroup(imm);
+ }
+ im6o->im6o_multicast_ifp = NULL;
+ TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list);
+ if (!--cif->vhif_nvrs) {
+ CARP_LOCK_DESTROY(cif);
+ sc->sc_ifp->if_carp = NULL;
+ FREE(cif, M_IFADDR);
+ } else
+ CARP_UNLOCK(cif);
+ }
+
+ return (error);
+}
+#endif /* INET6 */
+
+int
+carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr)
+{
+ struct carp_softc *sc = ifp->if_softc, *vr;
+ struct carpreq carpr;
+ struct ifaddr *ifa;
+ struct ifreq *ifr;
+ struct ifaliasreq *ifra;
+ int error = 0;
+
+ ifa = (struct ifaddr *)addr;
+ ifra = (struct ifaliasreq *)addr;
+ ifr = (struct ifreq *)addr;
+
+ switch (cmd) {
+ case SIOCSIFADDR:
+ switch (ifa->ifa_addr->sa_family) {
+#ifdef INET
+ case AF_INET:
+ sc->sc_if.if_flags |= IFF_UP;
+ bcopy(ifa->ifa_addr, ifa->ifa_dstaddr,
+ sizeof(struct sockaddr));
+ error = carp_set_addr(sc, satosin(ifa->ifa_addr));
+ break;
+#endif /* INET */
+#ifdef INET6
+ case AF_INET6:
+ sc->sc_if.if_flags |= IFF_UP;
+ error = carp_set_addr6(sc, satosin6(ifa->ifa_addr));
+ break;
+#endif /* INET6 */
+ default:
+ error = EAFNOSUPPORT;
+ break;
+ }
+ break;
+
+ case SIOCAIFADDR:
+ switch (ifa->ifa_addr->sa_family) {
+#ifdef INET
+ case AF_INET:
+ sc->sc_if.if_flags |= IFF_UP;
+ bcopy(ifa->ifa_addr, ifa->ifa_dstaddr,
+ sizeof(struct sockaddr));
+ error = carp_set_addr(sc, satosin(&ifra->ifra_addr));
+ break;
+#endif /* INET */
+#ifdef INET6
+ case AF_INET6:
+ sc->sc_if.if_flags |= IFF_UP;
+ error = carp_set_addr6(sc, satosin6(&ifra->ifra_addr));
+ break;
+#endif /* INET6 */
+ default:
+ error = EAFNOSUPPORT;
+ break;
+ }
+ break;
+
+ case SIOCDIFADDR:
+ sc->if_flags &= ~IFF_UP;
+ switch (ifa->ifa_addr->sa_family) {
+#ifdef INET
+ case AF_INET:
+ error = carp_del_addr(sc, satosin(&ifra->ifra_addr));
+ break;
+#endif /* INET */
+#ifdef INET6
+ case AF_INET6:
+ error = carp_del_addr6(sc, satosin6(&ifra->ifra_addr));
+ break;
+#endif /* INET6 */
+ default:
+ error = EAFNOSUPPORT;
+ break;
+ }
+ break;
+
+ case SIOCSIFFLAGS:
+ if (sc->sc_state != INIT && !(ifr->ifr_flags & IFF_UP)) {
+ sc->if_flags &= ~IFF_UP;
+ callout_stop(&sc->sc_ad_tmo);
+ callout_stop(&sc->sc_md_tmo);
+ callout_stop(&sc->sc_md6_tmo);
+ if (sc->sc_state == MASTER)
+ carp_send_ad(sc);
+ carp_set_state(sc, INIT);
+ carp_setrun(sc, 0);
+ } else if (sc->sc_state == INIT && (ifr->ifr_flags & IFF_UP)) {
+ sc->sc_if.if_flags |= IFF_UP;
+ carp_setrun(sc, 0);
+ }
+ break;
+
+ case SIOCSVH:
+ if ((error = suser(curthread)) != 0)
+ break;
+ if ((error = copyin(ifr->ifr_data, &carpr, sizeof carpr)))
+ break;
+ error = 1;
+ if (sc->sc_state != INIT && carpr.carpr_state != sc->sc_state) {
+ switch (carpr.carpr_state) {
+ case BACKUP:
+ callout_stop(&sc->sc_ad_tmo);
+ carp_set_state(sc, BACKUP);
+ carp_setrun(sc, 0);
+ carp_setroute(sc, RTM_DELETE);
+ break;
+ case MASTER:
+ carp_master_down(sc);
+ break;
+ default:
+ break;
+ }
+ }
+ if (carpr.carpr_vhid > 0) {
+ if (carpr.carpr_vhid > 255) {
+ error = EINVAL;
+ break;
+ }
+ if (sc->sc_ifp) {
+ struct carp_if *cif;
+ cif = (struct carp_if *)sc->sc_ifp->if_carp;
+ CARP_LOCK(cif);
+ TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list)
+ if (vr != sc &&
+ vr->sc_vhid == carpr.carpr_vhid) {
+ CARP_UNLOCK(cif);
+ return EINVAL;
+ }
+ CARP_UNLOCK(cif);
+ }
+ sc->sc_vhid = carpr.carpr_vhid;
+ sc->sc_ac.ac_enaddr[0] = 0;
+ sc->sc_ac.ac_enaddr[1] = 0;
+ sc->sc_ac.ac_enaddr[2] = 0x5e;
+ sc->sc_ac.ac_enaddr[3] = 0;
+ sc->sc_ac.ac_enaddr[4] = 1;
+ sc->sc_ac.ac_enaddr[5] = sc->sc_vhid;
+ error--;
+ }
+ if (carpr.carpr_advbase > 0 || carpr.carpr_advskew > 0) {
+ if (carpr.carpr_advskew >= 255) {
+ error = EINVAL;
+ break;
+ }
+ if (carpr.carpr_advbase > 255) {
+ error = EINVAL;
+ break;
+ }
+ sc->sc_advbase = carpr.carpr_advbase;
+ sc->sc_advskew = carpr.carpr_advskew;
+ error--;
+ }
+ bcopy(carpr.carpr_key, sc->sc_key, sizeof(sc->sc_key));
+ if (error > 0)
+ error = EINVAL;
+ else {
+ error = 0;
+ carp_setrun(sc, 0);
+ }
+ break;
+
+ case SIOCGVH:
+ bzero(&carpr, sizeof(carpr));
+ carpr.carpr_state = sc->sc_state;
+ carpr.carpr_vhid = sc->sc_vhid;
+ carpr.carpr_advbase = sc->sc_advbase;
+ carpr.carpr_advskew = sc->sc_advskew;
+ if (suser(curthread) == 0)
+ bcopy(sc->sc_key, carpr.carpr_key,
+ sizeof(carpr.carpr_key));
+ error = copyout(&carpr, ifr->ifr_data, sizeof(carpr));
+ break;
+
+ default:
+ error = EINVAL;
+ }
+
+ carp_hmac_prepare(sc);
+ return (error);
+}
+
+/*
+ * XXX: this is looutput. We should eventually use it from there.
+ */
+static int
+carp_looutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
+ struct rtentry *rt)
+{
+ M_ASSERTPKTHDR(m); /* check if we have the packet header */
+
+ if (rt && rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
+ m_freem(m);
+ return (rt->rt_flags & RTF_BLACKHOLE ? 0 :
+ rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH);
+ }
+
+ ifp->if_opackets++;
+ ifp->if_obytes += m->m_pkthdr.len;
+#if 1 /* XXX */
+ switch (dst->sa_family) {
+ case AF_INET:
+ case AF_INET6:
+ case AF_IPX:
+ case AF_APPLETALK:
+ break;
+ default:
+ printf("carp_looutput: af=%d unexpected\n", dst->sa_family);
+ m_freem(m);
+ return (EAFNOSUPPORT);
+ }
+#endif
+ return(if_simloop(ifp, m, dst->sa_family, 0));
+}
+
+/*
+ * Start output on carp interface. This function should never be called.
+ */
+void
+carp_start(struct ifnet *ifp)
+{
+#ifdef DEBUG
+ printf("%s: start called\n", ifp->if_xname);
+#endif
+}
+
+int
+carp_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa,
+ struct rtentry *rt)
+{
+ struct m_tag *mtag;
+ struct carp_softc *sc;
+ struct ifnet *carp_ifp;
+
+ if (!sa)
+ return (0);
+
+ switch (sa->sa_family) {
+#ifdef INET
+ case AF_INET:
+ break;
+#endif /* INET */
+#ifdef INET6
+ case AF_INET6:
+ break;
+#endif /* INET6 */
+ default:
+ return (0);
+ }
+
+ mtag = m_tag_find(m, PACKET_TAG_CARP, NULL);
+ if (mtag == NULL)
+ return (0);
+
+ bcopy(mtag + 1, &carp_ifp, sizeof(struct ifnet *));
+ sc = carp_ifp->if_softc;
+
+ /* Set the source MAC address to Virtual Router MAC Address */
+ switch (ifp->if_type) {
+ case IFT_ETHER: {
+ struct ether_header *eh;
+
+ eh = mtod(m, struct ether_header *);
+ eh->ether_shost[0] = 0;
+ eh->ether_shost[1] = 0;
+ eh->ether_shost[2] = 0x5e;
+ eh->ether_shost[3] = 0;
+ eh->ether_shost[4] = 1;
+ eh->ether_shost[5] = sc->sc_vhid;
+ }
+ break;
+ case IFT_FDDI: {
+ struct fddi_header *fh;
+
+ fh = mtod(m, struct fddi_header *);
+ fh->fddi_shost[0] = 0;
+ fh->fddi_shost[1] = 0;
+ fh->fddi_shost[2] = 0x5e;
+ fh->fddi_shost[3] = 0;
+ fh->fddi_shost[4] = 1;
+ fh->fddi_shost[5] = sc->sc_vhid;
+ }
+ break;
+ case IFT_ISO88025: {
+ struct iso88025_header *th;
+ th = mtod(m, struct iso88025_header *);
+ th->iso88025_shost[0] = 3;
+ th->iso88025_shost[1] = 0;
+ th->iso88025_shost[2] = 0x40 >> (sc->sc_vhid - 1);
+ th->iso88025_shost[3] = 0x40000 >> (sc->sc_vhid - 1);
+ th->iso88025_shost[4] = 0;
+ th->iso88025_shost[5] = 0;
+ }
+ break;
+ default:
+ printf("%s: carp is not supported for this interface type\n",
+ ifp->if_xname);
+ return (EOPNOTSUPP);
+ }
+
+ return (0);
+}
+
+void
+carp_set_state(struct carp_softc *sc, int state)
+{
+ if (sc->sc_state == state)
+ return;
+
+ sc->sc_state = state;
+ switch (state) {
+ case BACKUP:
+ sc->sc_ac.ac_if.if_link_state = LINK_STATE_DOWN;
+ break;
+ case MASTER:
+ sc->sc_ac.ac_if.if_link_state = LINK_STATE_UP;
+ break;
+ default:
+ sc->sc_ac.ac_if.if_link_state = LINK_STATE_UNKNOWN;
+ break;
+ }
+ rt_ifmsg(&sc->sc_ac.ac_if);
+}
+
+void
+carp_carpdev_state(void *v)
+{
+ struct carp_if *cif = v;
+ struct carp_softc *sc;
+
+ CARP_LOCK(cif);
+ TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) {
+ if (sc->sc_ifp->if_link_state == LINK_STATE_DOWN ||
+ !(sc->sc_ifp->if_flags & IFF_UP)) {
+ sc->sc_flags_backup = sc->sc_if.if_flags;
+ sc->sc_if.if_flags &= ~(IFF_UP|IFF_RUNNING);
+ callout_stop(&sc->sc_ad_tmo);
+ callout_stop(&sc->sc_md_tmo);
+ callout_stop(&sc->sc_md6_tmo);
+ carp_set_state(sc, INIT);
+ carp_setrun(sc, 0);
+ if (!sc->sc_suppress) {
+ carp_suppress_preempt++;
+ if (carp_suppress_preempt == 1)
+ carp_send_ad_all();
+ }
+ sc->sc_suppress = 1;
+ } else {
+ sc->sc_if.if_flags |= sc->sc_flags_backup;
+ carp_set_state(sc, INIT);
+ carp_setrun(sc, 0);
+ if (sc->sc_suppress)
+ carp_suppress_preempt--;
+ sc->sc_suppress = 0;
+ }
+ }
+ CARP_UNLOCK(cif);
+}
+
+static int
+carp_modevent(module_t mod, int type, void *data)
+{
+ int error = 0;
+
+ switch (type) {
+ case MOD_LOAD:
+ LIST_INIT(&carpif_list);
+ if_clone_attach(&carp_cloner);
+ printf("carp: attached\n");
+ break;
+
+ case MOD_UNLOAD:
+ if_clone_detach(&carp_cloner);
+ while (!LIST_EMPTY(&carpif_list))
+ carp_clone_destroy(
+ &LIST_FIRST(&carpif_list)->sc_if);
+ break;
+
+ default:
+ error = EINVAL;
+ break;
+ }
+
+ return error;
+}
+
+static moduledata_t carp_mod = {
+ "carp",
+ carp_modevent,
+ 0
+};
+
+DECLARE_MODULE(carp, carp_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
diff --git a/sys/netinet/ip_carp.h b/sys/netinet/ip_carp.h
new file mode 100644
index 0000000..a050a88
--- /dev/null
+++ b/sys/netinet/ip_carp.h
@@ -0,0 +1,163 @@
+/* $FreeBSD$ */
+/* $OpenBSD: ip_carp.h,v 1.8 2004/07/29 22:12:15 mcbride Exp $ */
+
+/*
+ * Copyright (c) 2002 Michael Shalayeff. All rights reserved.
+ * Copyright (c) 2003 Ryan McBride. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _IP_CARP_H
+#define _IP_CARP_H
+
+/*
+ * The CARP header layout is as follows:
+ *
+ * 0 1 2 3
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |Version| Type | VirtualHostID | AdvSkew | Auth Len |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Reserved | AdvBase | Checksum |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Counter (1) |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Counter (2) |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | SHA-1 HMAC (1) |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | SHA-1 HMAC (2) |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | SHA-1 HMAC (3) |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | SHA-1 HMAC (4) |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | SHA-1 HMAC (5) |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ */
+
+struct carp_header {
+#if BYTE_ORDER == LITTLE_ENDIAN
+ u_int8_t carp_type:4,
+ carp_version:4;
+#endif
+#if BYTE_ORDER == BIG_ENDIAN
+ u_int8_t carp_version:4,
+ carp_type:4;
+#endif
+ u_int8_t carp_vhid; /* virtual host id */
+ u_int8_t carp_advskew; /* advertisement skew */
+ u_int8_t carp_authlen; /* size of counter+md, 32bit chunks */
+ u_int8_t carp_pad1; /* reserved */
+ u_int8_t carp_advbase; /* advertisement interval */
+ u_int16_t carp_cksum;
+ u_int32_t carp_counter[2];
+ unsigned char carp_md[20]; /* SHA1 HMAC */
+} __packed;
+
+#define CARP_DFLTTL 255
+
+/* carp_version */
+#define CARP_VERSION 2
+
+/* carp_type */
+#define CARP_ADVERTISEMENT 0x01
+
+#define CARP_KEY_LEN 20 /* a sha1 hash of a passphrase */
+
+/* carp_advbase */
+#define CARP_DFLTINTV 1
+
+/*
+ * Statistics.
+ */
+struct carpstats {
+ uint64_t carps_ipackets; /* total input packets, IPv4 */
+ uint64_t carps_ipackets6; /* total input packets, IPv6 */
+ uint64_t carps_badif; /* wrong interface */
+ uint64_t carps_badttl; /* TTL is not CARP_DFLTTL */
+ uint64_t carps_hdrops; /* packets shorter than hdr */
+ uint64_t carps_badsum; /* bad checksum */
+ uint64_t carps_badver; /* bad (incl unsupp) version */
+ uint64_t carps_badlen; /* data length does not match */
+ uint64_t carps_badauth; /* bad authentication */
+ uint64_t carps_badvhid; /* bad VHID */
+ uint64_t carps_badaddrs; /* bad address list */
+
+ uint64_t carps_opackets; /* total output packets, IPv4 */
+ uint64_t carps_opackets6; /* total output packets, IPv6 */
+ uint64_t carps_onomem; /* no memory for an mbuf */
+ uint64_t carps_ostates; /* total state updates sent */
+
+ uint64_t carps_preempt; /* if enabled, preemptions */
+};
+
+/*
+ * Configuration structure for SIOCSVH SIOCGVH
+ */
+struct carpreq {
+ int carpr_state;
+#define CARP_STATES "INIT", "BACKUP", "MASTER"
+#define CARP_MAXSTATE 2
+ int carpr_vhid;
+ int carpr_advskew;
+ int carpr_advbase;
+ unsigned char carpr_key[CARP_KEY_LEN];
+};
+#define SIOCSVH _IOWR('i', 245, struct ifreq)
+#define SIOCGVH _IOWR('i', 246, struct ifreq)
+
+/*
+ * Names for CARP sysctl objects
+ */
+#define CARPCTL_ALLOW 1 /* accept incoming CARP packets */
+#define CARPCTL_PREEMPT 2 /* high-pri backup preemption mode */
+#define CARPCTL_LOG 3 /* log bad packets */
+#define CARPCTL_STATS 4 /* statistics (read-only) */
+#define CARPCTL_ARPBALANCE 5 /* balance arp responses */
+#define CARPCTL_MAXID 6
+
+#define CARPCTL_NAMES { \
+ { 0, 0 }, \
+ { "allow", CTLTYPE_INT }, \
+ { "preempt", CTLTYPE_INT }, \
+ { "log", CTLTYPE_INT }, \
+ { "stats", CTLTYPE_STRUCT }, \
+ { "arpbalance", CTLTYPE_INT }, \
+}
+
+#ifdef _KERNEL
+void carp_ifdetach (struct ifnet *);
+void carp_carpdev_state(void *);
+void carp_input (struct mbuf *, int);
+int carp6_input (struct mbuf **, int *, int);
+int carp_output (struct ifnet *, struct mbuf *, struct sockaddr *,
+ struct rtentry *);
+int carp_iamatch (void *, struct in_ifaddr *, struct in_addr *,
+ u_int8_t **);
+struct ifaddr *carp_iamatch6(void *, struct in6_addr *);
+void *carp_macmatch6(void *, struct mbuf *, const struct in6_addr *);
+struct ifnet *carp_forus (void *, void *);
+#endif
+#endif /* _IP_CARP_H */
diff --git a/sys/netinet/ip_input.c b/sys/netinet/ip_input.c
index 3021285..ecf79ae 100644
--- a/sys/netinet/ip_input.c
+++ b/sys/netinet/ip_input.c
@@ -35,6 +35,7 @@
#include "opt_ipstealth.h"
#include "opt_ipsec.h"
#include "opt_mac.h"
+#include "opt_carp.h"
#include <sys/param.h>
#include <sys/systm.h>
@@ -66,6 +67,9 @@
#include <netinet/ip_var.h>
#include <netinet/ip_icmp.h>
#include <machine/in_cksum.h>
+#ifdef DEV_CARP
+#include <netinet/ip_carp.h>
+#endif
#include <sys/socketvar.h>
@@ -509,10 +513,17 @@ passin:
* XXX - Checking is incompatible with IP aliases added
* to the loopback interface instead of the interface where
* the packets are received.
+ *
+ * XXX - This is the case for carp vhost IPs as well so we
+ * insert a workaround. If the packet got here, we already
+ * checked with carp_iamatch() and carp_forus().
*/
checkif = ip_checkinterface && (ipforwarding == 0) &&
m->m_pkthdr.rcvif != NULL &&
((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) == 0) &&
+#ifdef DEV_CARP
+ !m->m_pkthdr.rcvif->if_carp &&
+#endif
(dchg == 0);
/*
diff --git a/sys/netinet6/in6.c b/sys/netinet6/in6.c
index 163dbfe..7bd5e64 100644
--- a/sys/netinet6/in6.c
+++ b/sys/netinet6/in6.c
@@ -206,7 +206,7 @@ in6_ifloop_request(int cmd, struct ifaddr *ifa)
* rely on the cloning mechanism from the corresponding interface route
* any more.
*/
-static void
+void
in6_ifaddloop(struct ifaddr *ifa)
{
struct rtentry *rt;
@@ -226,7 +226,7 @@ in6_ifaddloop(struct ifaddr *ifa)
* Remove loopback rtentry of ownaddr generated by in6_ifaddloop(),
* if it exists.
*/
-static void
+void
in6_ifremloop(struct ifaddr *ifa)
{
struct in6_ifaddr *ia;
@@ -1551,6 +1551,39 @@ in6_ifinit(ifp, ia, sin6, newhost)
return (error);
}
+struct in6_multi_mship *
+in6_joingroup(ifp, addr, errorp)
+ struct ifnet *ifp;
+ struct in6_addr *addr;
+ int *errorp;
+{
+ struct in6_multi_mship *imm;
+
+ imm = malloc(sizeof(*imm), M_IPMADDR, M_NOWAIT);
+ if (!imm) {
+ *errorp = ENOBUFS;
+ return NULL;
+ }
+ imm->i6mm_maddr = in6_addmulti(addr, ifp, errorp);
+ if (!imm->i6mm_maddr) {
+ /* *errorp is alrady set */
+ free(imm, M_IPMADDR);
+ return NULL;
+ }
+ return imm;
+}
+
+int
+in6_leavegroup(imm)
+ struct in6_multi_mship *imm;
+{
+
+ if (imm->i6mm_maddr)
+ in6_delmulti(imm->i6mm_maddr);
+ free(imm, M_IPMADDR);
+ return 0;
+}
+
/*
* Find an IPv6 interface link-local address specific to an interface.
*/
diff --git a/sys/netinet6/in6_ifattach.c b/sys/netinet6/in6_ifattach.c
index fe12e4b..8837a91 100644
--- a/sys/netinet6/in6_ifattach.c
+++ b/sys/netinet6/in6_ifattach.c
@@ -671,6 +671,7 @@ in6_ifattach(ifp, altifp)
#endif
case IFT_PFLOG:
case IFT_PFSYNC:
+ case IFT_CARP:
return;
}
diff --git a/sys/netinet6/in6_proto.c b/sys/netinet6/in6_proto.c
index 129c5f3..7900303 100644
--- a/sys/netinet6/in6_proto.c
+++ b/sys/netinet6/in6_proto.c
@@ -64,6 +64,7 @@
#include "opt_inet.h"
#include "opt_inet6.h"
#include "opt_ipsec.h"
+#include "opt_carp.h"
#include <sys/param.h>
#include <sys/socket.h>
@@ -121,6 +122,10 @@
#endif
#endif /* IPSEC */
+#ifdef DEV_CARP
+#include <netinet/ip_carp.h>
+#endif
+
#ifdef FAST_IPSEC
#include <netipsec/ipsec6.h>
#define IPSEC
@@ -241,6 +246,14 @@ struct ip6protosw inet6sw[] = {
0, 0, 0, 0,
&rip6_usrreqs
},
+#ifdef DEV_CARP
+{ SOCK_RAW, &inet6domain, IPPROTO_CARP, PR_ATOMIC|PR_ADDR,
+ carp6_input, rip6_output, 0, rip6_ctloutput,
+ 0,
+ 0, 0, 0, 0,
+ &rip6_usrreqs
+},
+#endif /* DEV_CARP */
/* raw wildcard */
{ SOCK_RAW, &inet6domain, 0, PR_ATOMIC|PR_ADDR,
rip6_input, rip6_output, 0, rip6_ctloutput,
diff --git a/sys/netinet6/in6_var.h b/sys/netinet6/in6_var.h
index c89a9b5..05ec3d5 100644
--- a/sys/netinet6/in6_var.h
+++ b/sys/netinet6/in6_var.h
@@ -578,6 +578,8 @@ do { \
struct in6_multi *in6_addmulti __P((struct in6_addr *, struct ifnet *,
int *));
void in6_delmulti __P((struct in6_multi *));
+struct in6_multi_mship *in6_joingroup(struct ifnet *, struct in6_addr *, int *);
+int in6_leavegroup(struct in6_multi_mship *);
int in6_mask2len __P((struct in6_addr *, u_char *));
int in6_control __P((struct socket *, u_long, caddr_t, struct ifnet *,
struct thread *));
@@ -604,6 +606,8 @@ int in6_prefix_ioctl __P((struct socket *, u_long, caddr_t,
int in6_prefix_add_ifid __P((int, struct in6_ifaddr *));
void in6_prefix_remove_ifid __P((int, struct in6_ifaddr *));
void in6_purgeprefix __P((struct ifnet *));
+void in6_ifremloop(struct ifaddr *);
+void in6_ifaddloop(struct ifaddr *);
int in6_is_addr_deprecated __P((struct sockaddr_in6 *));
struct inpcb;
diff --git a/sys/netinet6/nd6.c b/sys/netinet6/nd6.c
index 2c639b0..e274966 100644
--- a/sys/netinet6/nd6.c
+++ b/sys/netinet6/nd6.c
@@ -2025,6 +2025,9 @@ nd6_need_cache(ifp)
#ifdef IFT_IEEE80211
case IFT_IEEE80211:
#endif
+#ifdef IFT_CARP
+ case IFT_CARP:
+#endif
case IFT_GIF: /* XXX need more cases? */
return (1);
default:
diff --git a/sys/netinet6/nd6_nbr.c b/sys/netinet6/nd6_nbr.c
index 3abcf36..e428bd4 100644
--- a/sys/netinet6/nd6_nbr.c
+++ b/sys/netinet6/nd6_nbr.c
@@ -32,6 +32,8 @@
#include "opt_inet.h"
#include "opt_inet6.h"
+#include "opt_ipsec.h"
+#include "opt_carp.h"
#include <sys/param.h>
#include <sys/systm.h>
@@ -59,6 +61,10 @@
#include <netinet6/nd6.h>
#include <netinet/icmp6.h>
+#ifdef DEV_CARP
+#include <netinet/ip_carp.h>
+#endif
+
#include <net/net_osdep.h>
#define SDL(s) ((struct sockaddr_dl *)s)
@@ -94,7 +100,7 @@ nd6_ns_input(m, off, icmp6len)
struct in6_addr taddr6;
struct in6_addr myaddr6;
char *lladdr = NULL;
- struct ifaddr *ifa;
+ struct ifaddr *ifa = NULL;
int lladdrlen = 0;
int anycast = 0, proxy = 0, tentative = 0;
int tlladdr;
@@ -193,7 +199,14 @@ nd6_ns_input(m, off, icmp6len)
* (3) "tentative" address on which DAD is being performed.
*/
/* (1) and (3) check. */
+#ifdef DEV_CARP
+ if (ifp->if_carp)
+ ifa = carp_iamatch6(ifp->if_carp, &taddr6);
+ if (!ifa)
+ ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp, &taddr6);
+#else
ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp, &taddr6);
+#endif
/* (2) check. */
if (!ifa) {
@@ -888,9 +901,16 @@ nd6_na_output(ifp, daddr6, taddr6, flags, tlladdr, sdl0)
* lladdr in sdl0. If we are not proxying (sending NA for
* my address) use lladdr configured for the interface.
*/
- if (sdl0 == NULL)
+ if (sdl0 == NULL) {
+#ifdef DEV_CARP
+ if (ifp->if_carp)
+ mac = carp_macmatch6(ifp->if_carp, m, taddr6);
+ if (mac == NULL)
+ mac = nd6_ifptomac(ifp);
+#else
mac = nd6_ifptomac(ifp);
- else if (sdl0->sa_family == AF_LINK) {
+#endif
+ } else if (sdl0->sa_family == AF_LINK) {
struct sockaddr_dl *sdl;
sdl = (struct sockaddr_dl *)sdl0;
if (sdl->sdl_alen == ifp->if_addrlen)
@@ -943,6 +963,9 @@ nd6_ifptomac(ifp)
#ifdef IFT_IEEE80211
case IFT_IEEE80211:
#endif
+#ifdef IFT_CARP
+ case IFT_CARP:
+#endif
case IFT_ISO88025:
return ((caddr_t)(ifp + 1));
default:
diff --git a/sys/sys/mbuf.h b/sys/sys/mbuf.h
index c954f92..1621fa7 100644
--- a/sys/sys/mbuf.h
+++ b/sys/sys/mbuf.h
@@ -664,6 +664,7 @@ struct mbuf *m_uiotombuf(struct uio *, int, int);
#define PACKET_TAG_RTSOCKFAM 25 /* rtsock sa family */
#define PACKET_TAG_PF_TRANSLATE_LOCALHOST 26 /* PF translate localhost */
#define PACKET_TAG_IPOPTIONS 27 /* Saved IP options */
+#define PACKET_TAG_CARP 28 /* CARP info */
/* Packet tag routines. */
struct m_tag *m_tag_alloc(u_int32_t, int, int, int);
OpenPOWER on IntegriCloud