diff options
31 files changed, 4793 insertions, 4030 deletions
diff --git a/lib/libc/net/sctp_sys_calls.c b/lib/libc/net/sctp_sys_calls.c index 5c0f025..9a2c8bf 100644 --- a/lib/libc/net/sctp_sys_calls.c +++ b/lib/libc/net/sctp_sys_calls.c @@ -164,13 +164,14 @@ sctp_getaddrlen(sa_family_t family) } int -sctp_connectx(int sd, const struct sockaddr *addrs, int addrcnt) +sctp_connectx(int sd, const struct sockaddr *addrs, int addrcnt, sctp_assoc_t * id) { char buf[SCTP_STACK_BUF_SIZE]; int i, ret, cnt, *aa; char *cpto; const struct sockaddr *at; size_t len = sizeof(int); + sctp_assoc_t *p_id; at = addrs; cnt = 0; @@ -211,7 +212,11 @@ sctp_connectx(int sd, const struct sockaddr *addrs, int addrcnt) aa = (int *)buf; *aa = cnt; ret = setsockopt(sd, IPPROTO_SCTP, SCTP_CONNECT_X, (void *)buf, - (socklen_t) len); + (socklen_t)len); + if ((ret == 0) && id) { + p_id = (sctp_assoc_t *) buf; + *id = *p_id; + } return (ret); } @@ -300,7 +305,7 @@ sctp_getpaddrs(int sd, sctp_assoc_t id, struct sockaddr **raddrs) asoc = id; siz = sizeof(sctp_assoc_t); if (getsockopt(sd, IPPROTO_SCTP, SCTP_GET_REMOTE_ADDR_SIZE, - &asoc, &siz) != 0) { + &asoc, &siz) != 0) { errno = ENOMEM; return (-1); } @@ -316,7 +321,7 @@ sctp_getpaddrs(int sd, sctp_assoc_t id, struct sockaddr **raddrs) addrs->sget_assoc_id = id; /* Now lets get the array of addresses */ if (getsockopt(sd, IPPROTO_SCTP, SCTP_GET_PEER_ADDRESSES, - addrs, &siz) != 0) { + addrs, &siz) != 0) { free(addrs); errno = ENOMEM; return (-1); @@ -333,7 +338,7 @@ sctp_getpaddrs(int sd, sctp_assoc_t id, struct sockaddr **raddrs) return (cnt); } -void +void sctp_freepaddrs(struct sockaddr *addrs) { /* Take away the hidden association id */ @@ -398,7 +403,7 @@ sctp_getladdrs(int sd, sctp_assoc_t id, struct sockaddr **raddrs) return (cnt); } -void +void sctp_freeladdrs(struct sockaddr *addrs) { /* Take away the hidden association id */ diff --git a/sys/conf/files b/sys/conf/files index fb3dba8..39b64da 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -1786,6 +1786,7 @@ netinet/sctp_asconf.c optional inet inet6 sctp netinet/sctp_peeloff.c optional inet inet6 sctp netinet/sctp_crc32.c optional inet inet6 sctp netinet/sctp_auth.c optional inet inet6 sctp +netinet/sctp_sysctl.c optional inet inet6 sctp netinet/tcp_debug.c optional tcpdebug netinet/tcp_hostcache.c optional inet netinet/tcp_input.c optional inet diff --git a/sys/netinet/sctp.h b/sys/netinet/sctp.h index 6aa7dec..ab29820 100644 --- a/sys/netinet/sctp.h +++ b/sys/netinet/sctp.h @@ -157,7 +157,44 @@ struct sctp_paramhdr { #define SCTP_PCB_STATUS 0x00001105 #define SCTP_GET_NONCE_VALUES 0x00001106 +/* Special hook for dynamically setting primary for all assoc's, + * this is a write only option that requires root privledge. + */ +#define SCTP_SET_DYNAMIC_PRIMARY 0x00002001 + +/* VRF (virtual router feature) and multi-VRF support + * options. VRF's provide splits within a router + * that give the views of multiple routers. A + * standard host, without VRF support, is just + * a single VRF. If VRF's are supported then + * the transport must be VRF aware. This means + * that every socket call coming in must be directed + * within the endpoint to one of the VRF's it belongs + * to. The endpoint, before binding, may select + * the "default" VRF it is in by using a set socket + * option with SCTP_VRF_ID. This will also + * get propegated to the default VRF. Once the + * endpoint binds an address then it CANNOT add + * additional VRF's to become a Multi-VRF endpoint. + * + * Before BINDING additional VRF's can be added with + * the SCTP_ADD_VRF_ID call or deleted with + * SCTP_DEL_VRF_ID. + * + * Associations are ALWAYS contained inside a single + * VRF. They cannot reside in two (or more) VRF's. Incoming + * packets, assuming the router is VRF aware, can always + * tell us what VRF they arrived on. A host not supporting + * any VRF's will find that the packets always arrived on the + * single VRF that the host has. + * + */ +#define SCTP_VRF_ID 0x00003001 +#define SCTP_ADD_VRF_ID 0x00003002 +#define SCTP_GET_VRF_IDS 0x00003003 +#define SCTP_GET_ASOC_VRF 0x00003004 +#define SCTP_DEL_VRF_ID 0x00003005 /* * hidden implementation specific options these are NOT user visible (should * move out of sctp.h) diff --git a/sys/netinet/sctp_asconf.c b/sys/netinet/sctp_asconf.c index 8abab9d..d4f088b 100644 --- a/sys/netinet/sctp_asconf.c +++ b/sys/netinet/sctp_asconf.c @@ -34,6 +34,7 @@ __FBSDID("$FreeBSD$"); #include <netinet/sctp_os.h> #include <netinet/sctp_var.h> +#include <netinet/sctp_sysctl.h> #include <netinet/sctp_pcb.h> #include <netinet/sctp_header.h> #include <netinet/sctputil.h> @@ -47,8 +48,6 @@ __FBSDID("$FreeBSD$"); * SCTP_DEBUG_ASCONF2: detailed info */ #ifdef SCTP_DEBUG -extern uint32_t sctp_debug_on; - #endif /* SCTP_DEBUG */ @@ -316,7 +315,6 @@ sctp_process_asconf_add_ip(struct mbuf *m, struct sctp_asconf_paramhdr *aph, } sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, NULL, SCTP_FROM_SCTP_ASCONF + SCTP_LOC_1); sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, NULL); - } return m_reply; @@ -990,12 +988,12 @@ sctp_asconf_cleanup(struct sctp_tcb *stcb, struct sctp_nets *net) /* * process an ADD/DELETE IP ack from peer. - * addr corresponding ifaddr to the address being added/deleted. + * addr corresponding sctp_ifa to the address being added/deleted. * type: SCTP_ADD_IP_ADDRESS or SCTP_DEL_IP_ADDRESS. * flag: 1=success, 0=failure. */ static void -sctp_asconf_addr_mgmt_ack(struct sctp_tcb *stcb, struct ifaddr *addr, +sctp_asconf_addr_mgmt_ack(struct sctp_tcb *stcb, struct sctp_ifa *addr, uint16_t type, uint32_t flag) { /* @@ -1023,7 +1021,7 @@ sctp_asconf_addr_mgmt_ack(struct sctp_tcb *stcb, struct ifaddr *addr, * for add. If a duplicate operation is found, ignore the new one. */ static uint32_t -sctp_asconf_queue_add(struct sctp_tcb *stcb, struct ifaddr *ifa, uint16_t type) +sctp_asconf_queue_add(struct sctp_tcb *stcb, struct sctp_ifa *ifa, uint16_t type) { struct sctp_asconf_addr *aa, *aa_next; struct sockaddr *sa; @@ -1037,7 +1035,7 @@ sctp_asconf_queue_add(struct sctp_tcb *stcb, struct ifaddr *ifa, uint16_t type) aa = aa_next) { aa_next = TAILQ_NEXT(aa, next); /* address match? */ - if (sctp_asconf_addr_match(aa, ifa->ifa_addr) == 0) + if (sctp_asconf_addr_match(aa, &ifa->address.sa) == 0) continue; /* is the request already in queue (sent or not) */ if (aa->ap.aph.ph.param_type == type) { @@ -1077,11 +1075,11 @@ sctp_asconf_queue_add(struct sctp_tcb *stcb, struct ifaddr *ifa, uint16_t type) aa->ap.aph.ph.param_type = type; aa->ifa = ifa; /* correlation_id filled in during send routine later... */ - if (ifa->ifa_addr->sa_family == AF_INET6) { + if (ifa->address.sa.sa_family == AF_INET6) { /* IPv6 address */ struct sockaddr_in6 *sin6; - sin6 = (struct sockaddr_in6 *)ifa->ifa_addr; + sin6 = (struct sockaddr_in6 *)&ifa->address.sa; sa = (struct sockaddr *)sin6; aa->ap.addrp.ph.param_type = SCTP_IPV6_ADDRESS; aa->ap.addrp.ph.param_length = (sizeof(struct sctp_ipv6addr_param)); @@ -1090,9 +1088,9 @@ sctp_asconf_queue_add(struct sctp_tcb *stcb, struct ifaddr *ifa, uint16_t type) sizeof(struct sctp_ipv6addr_param); memcpy(&aa->ap.addrp.addr, &sin6->sin6_addr, sizeof(struct in6_addr)); - } else if (ifa->ifa_addr->sa_family == AF_INET) { + } else if (ifa->address.sa.sa_family == AF_INET) { /* IPv4 address */ - struct sockaddr_in *sin = (struct sockaddr_in *)ifa->ifa_addr; + struct sockaddr_in *sin = (struct sockaddr_in *)&ifa->address.sa; sa = (struct sockaddr *)sin; aa->ap.addrp.ph.param_type = SCTP_IPV4_ADDRESS; @@ -1153,6 +1151,7 @@ sctp_asconf_queue_add_sa(struct sctp_tcb *stcb, struct sockaddr *sa, uint16_t type) { struct sctp_asconf_addr *aa, *aa_next; + uint32_t vrf_id; /* see if peer supports ASCONF */ if (stcb->asoc.peer_supports_asconf == 0) { @@ -1208,8 +1207,9 @@ sctp_asconf_queue_add_sa(struct sctp_tcb *stcb, struct sockaddr *sa, } /* fill in asconf address parameter fields */ /* top level elements are "networked" during send */ + vrf_id = SCTP_DEFAULT_VRFID; aa->ap.aph.ph.param_type = type; - aa->ifa = sctp_find_ifa_by_addr(sa); + aa->ifa = sctp_find_ifa_by_addr(sa, vrf_id, 0); /* correlation_id filled in during send routine later... */ if (sa->sa_family == AF_INET6) { /* IPv6 address */ @@ -1590,35 +1590,6 @@ sctp_handle_asconf_ack(struct mbuf *m, int offset, } } -/* is this an interface that we care about at all? */ -static uint32_t -sctp_is_desired_interface_type(struct ifaddr *ifa) -{ - int result; - - /* check the interface type to see if it's one we care about */ - switch (ifa->ifa_ifp->if_type) { - case IFT_ETHER: - case IFT_ISO88023: - case IFT_ISO88025: - case IFT_STARLAN: - case IFT_P10: - case IFT_P80: - case IFT_HY: - case IFT_FDDI: - case IFT_PPP: - case IFT_XETHER: - case IFT_SLIP: - case IFT_GIF: - result = 1; - break; - default: - result = 0; - } - - return (result); -} - static uint32_t sctp_is_scopeid_in_nets(struct sctp_tcb *stcb, struct sockaddr *sa) { @@ -1656,7 +1627,7 @@ sctp_is_scopeid_in_nets(struct sctp_tcb *stcb, struct sockaddr *sa) */ static void sctp_addr_mgmt_assoc(struct sctp_inpcb *inp, struct sctp_tcb *stcb, - struct ifaddr *ifa, uint16_t type) + struct sctp_ifa *ifa, uint16_t type) { int status; @@ -1672,23 +1643,17 @@ sctp_addr_mgmt_assoc(struct sctp_inpcb *inp, struct sctp_tcb *stcb, */ /* first, make sure it's a good address family */ - if (ifa->ifa_addr->sa_family != AF_INET6 && - ifa->ifa_addr->sa_family != AF_INET) { + if (ifa->address.sa.sa_family != AF_INET6 && + ifa->address.sa.sa_family != AF_INET) { return; } /* make sure we're "allowed" to add this type of addr */ - if (ifa->ifa_addr->sa_family == AF_INET6) { - struct in6_ifaddr *ifa6; - + if (ifa->address.sa.sa_family == AF_INET6) { /* invalid if we're not a v6 endpoint */ if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) == 0) return; /* is the v6 addr really valid ? */ - ifa6 = (struct in6_ifaddr *)ifa; - if (IFA6_IS_DEPRECATED(ifa6) || - (ifa6->ia6_flags & - (IN6_IFF_DETACHED | IN6_IFF_ANYCAST | IN6_IFF_NOTREADY))) { - /* can't use an invalid address */ + if (ifa->localifa_flags & SCTP_ADDR_IFA_UNUSEABLE) { return; } } @@ -1700,27 +1665,16 @@ sctp_addr_mgmt_assoc(struct sctp_inpcb *inp, struct sctp_tcb *stcb, * Recall that this routine is only called for the subset bound * w/ASCONF allowed case. */ - - /* - * do a scope_id check against any link local addresses in the - * destination nets list to see if we should put this local address - * on the pending list or not eg. don't put on the list if we have a - * link local destination with the same scope_id - */ - if (type == SCTP_ADD_IP_ADDRESS) { - if (sctp_is_scopeid_in_nets(stcb, ifa->ifa_addr) == 0) { - sctp_add_local_addr_assoc(stcb, ifa); - } - } + sctp_add_local_addr_assoc(stcb, ifa, 1); /* * check address scope if address is out of scope, don't queue * anything... note: this would leave the address on both inp and * asoc lists */ - if (ifa->ifa_addr->sa_family == AF_INET6) { + if (ifa->address.sa.sa_family == AF_INET6) { struct sockaddr_in6 *sin6; - sin6 = (struct sockaddr_in6 *)ifa->ifa_addr; + sin6 = (struct sockaddr_in6 *)&ifa->address.sin6; if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { /* we skip unspecifed addresses */ return; @@ -1730,7 +1684,7 @@ sctp_addr_mgmt_assoc(struct sctp_inpcb *inp, struct sctp_tcb *stcb, return; } /* is it the right link local scope? */ - if (sctp_is_scopeid_in_nets(stcb, ifa->ifa_addr) == 0) { + if (sctp_is_scopeid_in_nets(stcb, &ifa->address.sa) == 0) { return; } } @@ -1738,7 +1692,7 @@ sctp_addr_mgmt_assoc(struct sctp_inpcb *inp, struct sctp_tcb *stcb, IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr)) { return; } - } else if (ifa->ifa_addr->sa_family == AF_INET) { + } else if (ifa->address.sa.sa_family == AF_INET) { struct sockaddr_in *sin; struct in6pcb *inp6; @@ -1748,7 +1702,7 @@ sctp_addr_mgmt_assoc(struct sctp_inpcb *inp, struct sctp_tcb *stcb, SCTP_IPV6_V6ONLY(inp6)) return; - sin = (struct sockaddr_in *)ifa->ifa_addr; + sin = (struct sockaddr_in *)&ifa->address.sa; if (sin->sin_addr.s_addr == 0) { /* we skip unspecifed addresses */ return; @@ -1759,16 +1713,10 @@ sctp_addr_mgmt_assoc(struct sctp_inpcb *inp, struct sctp_tcb *stcb, } } else { /* else, not AF_INET or AF_INET6, so skip */ -#ifdef SCTP_DEBUG - if (sctp_debug_on & SCTP_DEBUG_ASCONF1) { - printf("addr_mgmt_assoc: not AF_INET or AF_INET6\n"); - } -#endif /* SCTP_DEBUG */ return; } /* queue an asconf for this address add/delete */ - if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_DO_ASCONF)) { /* does the peer do asconf? */ if (stcb->asoc.peer_supports_asconf) { @@ -1786,247 +1734,266 @@ sctp_addr_mgmt_assoc(struct sctp_inpcb *inp, struct sctp_tcb *stcb, stcb, stcb->asoc.primary_destination); } } - } else { - /* this is the boundall, no ASCONF case */ -#if 0 - /* Peter: Fixe me? why the if 0? */ - /* - * assume kernel will delete this very shortly; add done - * above - */ - if (type == SCTP_DEL_IP_ADDRESS) { - /* if deleting, add this addr to the do not use list */ - sctp_add_local_addr_assoc(stcb, ifa); - } -#endif } } -static void -sctp_addr_mgmt_ep(struct sctp_inpcb *inp, struct ifaddr *ifa, uint16_t type) -{ - struct sctp_tcb *stcb; - - - SCTP_INP_WLOCK(inp); - /* make sure we're "allowed" to add this type of addr */ - if (ifa->ifa_addr->sa_family == AF_INET6) { - struct in6_ifaddr *ifa6; - /* invalid if we're not a v6 endpoint */ - if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) == 0) { - SCTP_INP_WUNLOCK(inp); - return; - } - /* is the v6 addr really valid ? */ - ifa6 = (struct in6_ifaddr *)ifa; - if (IFA6_IS_DEPRECATED(ifa6) || - (ifa6->ia6_flags & - (IN6_IFF_DETACHED | IN6_IFF_ANYCAST | IN6_IFF_NOTREADY))) { - /* can't use an invalid address */ - SCTP_INP_WUNLOCK(inp); - return; - } - } else if (ifa->ifa_addr->sa_family == AF_INET) { - /* invalid if we are a v6 only endpoint */ - struct in6pcb *inp6; - - inp6 = (struct in6pcb *)&inp->ip_inp.inp; - - if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) && - SCTP_IPV6_V6ONLY(inp6)) { - SCTP_INP_WUNLOCK(inp); - return; - } - } else { - /* invalid address family */ - SCTP_INP_WUNLOCK(inp); - return; - } - /* is this endpoint subset bound ? */ - if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) == 0) { - /* subset bound endpoint */ - if (sctp_is_feature_off(inp, SCTP_PCB_FLAGS_DO_ASCONF)) { - /* - * subset bound, but ASCONFs not allowed... if - * adding, nothing to do, since not allowed if - * deleting, remove address from endpoint peer will - * have to "timeout" this addr - */ - if (type == SCTP_DEL_IP_ADDRESS) { - sctp_del_local_addr_ep(inp, ifa); +int +sctp_iterator_ep(struct sctp_inpcb *inp, void *ptr, uint32_t val) +{ + struct sctp_asconf_iterator *asc; + struct sctp_ifa *ifa; + struct sctp_laddr *l; + int type; + int cnt_invalid = 0; + + asc = (struct sctp_asconf_iterator *)ptr; + LIST_FOREACH(l, &asc->list_of_work, sctp_nxt_addr) { + ifa = l->ifa; + type = l->action; + if (ifa->address.sa.sa_family == AF_INET6) { + /* invalid if we're not a v6 endpoint */ + if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) == 0) { + cnt_invalid++; + if (asc->cnt == cnt_invalid) + return (1); + else + continue; } - /* no asconfs to queue for this inp... */ - SCTP_INP_WUNLOCK(inp); - return; - } else { - /* - * subset bound, ASCONFs allowed... if adding, add - * address to endpoint list if deleting, remove - * address from endpoint - */ - if (type == SCTP_ADD_IP_ADDRESS) { - sctp_add_local_addr_ep(inp, ifa); - } else { - sctp_del_local_addr_ep(inp, ifa); + } else if (ifa->address.sa.sa_family == AF_INET) { + /* invalid if we are a v6 only endpoint */ + struct in6pcb *inp6; + + inp6 = (struct in6pcb *)&inp->ip_inp.inp; + if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) && + SCTP_IPV6_V6ONLY(inp6)) { + cnt_invalid++; + if (asc->cnt == cnt_invalid) + return (1); + else + continue; } - /* drop through and notify all asocs */ + } else { + /* invalid address family */ + cnt_invalid++; + if (asc->cnt == cnt_invalid) + return (1); + else + continue; } } - /* process for all associations for this endpoint */ - LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) { - SCTP_TCB_LOCK(stcb); - sctp_addr_mgmt_assoc(inp, stcb, ifa, type); - SCTP_TCB_UNLOCK(stcb); - } - SCTP_INP_WUNLOCK(inp); + return (0); } -/* - * restrict the use of this address - */ -static void -sctp_addr_mgmt_restrict_ep(struct sctp_inpcb *inp, struct ifaddr *ifa) +int +sctp_iterator_ep_end(struct sctp_inpcb *inp, void *ptr, uint32_t val) { - struct sctp_tcb *stcb; - - /* is this endpoint bound to all? */ - if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) == 0) { - /* - * Nothing to do for subset bound case. Allow sctp_bindx() - * to manage the address lists - */ - return; - } - SCTP_INP_RLOCK(inp); - /* process for all associations for this endpoint */ - LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) { - /* put this address on the "pending/do not use yet" list */ - SCTP_TCB_LOCK(stcb); - sctp_add_local_addr_assoc(stcb, ifa); - SCTP_TCB_UNLOCK(stcb); + struct sctp_ifa *ifa; + struct sctp_asconf_iterator *asc; + struct sctp_laddr *laddr, *nladdr, *l; + + /* Only for specific case not bound all */ + asc = (struct sctp_asconf_iterator *)ptr; + LIST_FOREACH(l, &asc->list_of_work, sctp_nxt_addr) { + ifa = l->ifa; + if (l->action == SCTP_ADD_IP_ADDRESS) { + LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) { + if (laddr->ifa == ifa) { + laddr->action = 0; + break; + } + } + } else if (l->action == SCTP_DEL_IP_ADDRESS) { + laddr = LIST_FIRST(&inp->sctp_addr_list); + while (laddr) { + nladdr = LIST_NEXT(laddr, sctp_nxt_addr); + /* remove only after all guys are done */ + if (laddr->ifa == ifa) { + sctp_del_local_addr_ep(inp, ifa); + } + laddr = nladdr; + } + } } - SCTP_INP_RUNLOCK(inp); + return (0); } -/* - * this is only called for kernel initiated address changes eg. it will check - * the PCB_FLAGS_AUTO_ASCONF flag - */ -static void -sctp_addr_mgmt(struct ifaddr *ifa, uint16_t type) +void +sctp_iterator_stcb(struct sctp_inpcb *inp, struct sctp_tcb *stcb, void *ptr, + uint32_t val) { - struct sockaddr *sa; - struct sctp_inpcb *inp; - - /* make sure we care about this interface... */ - if (!sctp_is_desired_interface_type(ifa)) { - return; - } - sa = ifa->ifa_addr; - if (sa->sa_family != AF_INET && sa->sa_family != AF_INET6) - return; - -#ifdef SCTP_DEBUG - if (sctp_debug_on & SCTP_DEBUG_ASCONF1) { - if (type == SCTP_ADD_IP_ADDRESS) - printf("sctp_addr_mgmt: kernel adds "); - else - printf("sctp_addr_mgmt: kernel deletes "); - sctp_print_address(sa); - } -#endif /* SCTP_DEBUG */ + struct sctp_asconf_iterator *asc; + struct sctp_ifa *ifa; + struct sctp_laddr *l; + int cnt_invalid = 0; + int type, status; + + asc = (struct sctp_asconf_iterator *)ptr; + LIST_FOREACH(l, &asc->list_of_work, sctp_nxt_addr) { + ifa = l->ifa; + type = l->action; + /* Same checks again for assoc */ + if (ifa->address.sa.sa_family == AF_INET6) { + /* invalid if we're not a v6 endpoint */ + struct sockaddr_in6 *sin6; + + if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) == 0) { + cnt_invalid++; + if (asc->cnt == cnt_invalid) + return; + else + continue; + } + sin6 = (struct sockaddr_in6 *)&ifa->address.sin6; + if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { + /* we skip unspecifed addresses */ + continue; + } + if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) { + if (stcb->asoc.local_scope == 0) { + continue; + } + /* is it the right link local scope? */ + if (sctp_is_scopeid_in_nets(stcb, &ifa->address.sa) == 0) { + continue; + } + } + } else if (ifa->address.sa.sa_family == AF_INET) { + /* invalid if we are a v6 only endpoint */ + struct in6pcb *inp6; + struct sockaddr_in *sin; + + inp6 = (struct in6pcb *)&inp->ip_inp.inp; + /* invalid if we are a v6 only endpoint */ + if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) && + SCTP_IPV6_V6ONLY(inp6)) + continue; - /* go through all our PCB's */ - LIST_FOREACH(inp, &sctppcbinfo.listhead, sctp_list) { - if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_AUTO_ASCONF)) { - sctp_addr_mgmt_ep(inp, ifa, type); + sin = (struct sockaddr_in *)&ifa->address.sa; + if (sin->sin_addr.s_addr == 0) { + /* we skip unspecifed addresses */ + continue; + } + if (stcb->asoc.ipv4_local_scope == 0 && + IN4_ISPRIVATE_ADDRESS(&sin->sin_addr)) { + continue;; + } + if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) && + SCTP_IPV6_V6ONLY(inp6)) { + cnt_invalid++; + if (asc->cnt == cnt_invalid) + return; + else + continue; + } } else { - /* this address is going away anyways... */ - if (type == SCTP_DEL_IP_ADDRESS) + /* invalid address family */ + cnt_invalid++; + if (asc->cnt == cnt_invalid) return; - /* (temporarily) restrict this address */ - sctp_addr_mgmt_restrict_ep(inp, ifa); + else + continue; } - /* else, not allowing automatic asconf's, so ignore */ - } -} - -/* - * add/delete IP address requests from kernel (via routing change) assumed - * that the address is non-broadcast, non-multicast all addresses are passed - * from any type of interface-- need to filter duplicate addresses may get - * requested - */ - -void -sctp_add_ip_address(struct ifaddr *ifa) -{ - sctp_addr_mgmt(ifa, SCTP_ADD_IP_ADDRESS); -} - -void -sctp_delete_ip_address(struct ifaddr *ifa) -{ - struct sctp_inpcb *inp; - - /* process the delete */ - sctp_addr_mgmt(ifa, SCTP_DEL_IP_ADDRESS); - /* - * need to remove this ifaddr from any cached routes and also any - * from any assoc "restricted/pending" lists - */ - /* make sure we care about this interface... */ - if (!sctp_is_desired_interface_type(ifa)) { - return; - } - /* go through all our PCB's */ - SCTP_INP_INFO_RLOCK(); - LIST_FOREACH(inp, &sctppcbinfo.listhead, sctp_list) { - struct sctp_tcb *stcb; - struct sctp_laddr *laddr, *laddr_next; - - /* process for all associations for this endpoint */ - SCTP_INP_RLOCK(inp); - LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) { + /* put this address on the "pending/do not use yet" list */ + if (type == SCTP_ADD_IP_ADDRESS) { + sctp_add_local_addr_assoc(stcb, ifa, 1); + } else if (type == SCTP_DEL_IP_ADDRESS) { struct sctp_nets *net; - /* process through the nets list */ TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { struct rtentry *rt; /* delete this address if cached */ - rt = net->ro.ro_rt; - if (rt != NULL && rt->rt_ifa == ifa) { - /* RTFREE(rt); */ - net->ro.ro_rt = NULL; + if (net->ro._s_addr && + (net->ro._s_addr->ifa == ifa)) { + sctp_free_ifa(net->ro._s_addr); + net->ro._s_addr = NULL; + net->src_addr_selected = 0; + rt = net->ro.ro_rt; + if (rt) { + RTFREE(rt); + net->ro.ro_rt = NULL; + } + /* + * Now we deleted our src address, + * should we not also now reset the + * cwnd/rto to start as if its a new + * address? + */ + sctp_set_initial_cc_param(stcb, net); + net->RTO = stcb->asoc.initial_rto; + } - } /* for each net */ - /* process through the asoc "pending" list */ - laddr = LIST_FIRST(&stcb->asoc.sctp_local_addr_list); - while (laddr != NULL) { - laddr_next = LIST_NEXT(laddr, sctp_nxt_addr); - /* remove if in use */ - if (laddr->ifa == ifa) { - sctp_remove_laddr(laddr); + } + } else if (type == SCTP_SET_PRIM_ADDR) { + if ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) == 0) { + /* + * must validate the ifa in question is in + * the ep + */ + if (sctp_is_addr_in_ep(stcb->sctp_ep, ifa) == 0) { + continue; + } + } else { + /* Need to check scopes for this guy */ + if (sctp_is_address_in_scope(ifa, + stcb->asoc.ipv4_addr_legal, + stcb->asoc.ipv6_addr_legal, + stcb->asoc.loopback_scope, + stcb->asoc.ipv4_local_scope, + stcb->asoc.local_scope, + stcb->asoc.site_scope, 0) == 0) { + continue; + } + } + + } + /* queue an asconf for this address add/delete */ + if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_DO_ASCONF)) { + /* does the peer do asconf? */ + if (stcb->asoc.peer_supports_asconf) { + /* queue an asconf for this addr */ + + status = sctp_asconf_queue_add(stcb, ifa, type); + /* + * if queued ok, and in correct state, set + * the ASCONF timer if in non-open state, we + * will set this timer when the state does + * go open and do all the asconf's + */ + if (status == 0 && + SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) { + sctp_timer_start(SCTP_TIMER_TYPE_ASCONF, inp, + stcb, stcb->asoc.primary_destination); } - laddr = laddr_next; - } /* while */ - } /* for each stcb */ - /* process through the inp bound addr list */ - laddr = LIST_FIRST(&inp->sctp_addr_list); - while (laddr != NULL) { - laddr_next = LIST_NEXT(laddr, sctp_nxt_addr); - /* remove if in use */ - if (laddr->ifa == ifa) { - sctp_remove_laddr(laddr); } - laddr = laddr_next; } - SCTP_INP_RUNLOCK(inp); } - SCTP_INP_INFO_RUNLOCK(); +} + +void +sctp_iterator_end(void *ptr, uint32_t val) +{ + struct sctp_asconf_iterator *asc; + struct sctp_ifa *ifa; + struct sctp_laddr *l, *l_next; + + asc = (struct sctp_asconf_iterator *)ptr; + l = LIST_FIRST(&asc->list_of_work); + while (l != NULL) { + l_next = LIST_NEXT(l, sctp_nxt_addr); + ifa = l->ifa; + if (l->action == SCTP_ADD_IP_ADDRESS) { + /* Clear the defer use flag */ + ifa->localifa_flags &= ~SCTP_ADDR_DEFER_USE; + } + sctp_free_ifa(ifa); + SCTP_ZONE_FREE(sctppcbinfo.ipi_zone_laddr, l); + SCTP_DECR_LADDR_COUNT(); + l = l_next; + } + SCTP_FREE(asc); } /* @@ -2067,14 +2034,10 @@ sctp_set_primary_ip_address_sa(struct sctp_tcb *stcb, struct sockaddr *sa) } void -sctp_set_primary_ip_address(struct ifaddr *ifa) +sctp_set_primary_ip_address(struct sctp_ifa *ifa) { struct sctp_inpcb *inp; - /* make sure we care about this interface... */ - if (!sctp_is_desired_interface_type(ifa)) { - return; - } /* go through all our PCB's */ LIST_FOREACH(inp, &sctppcbinfo.listhead, sctp_list) { struct sctp_tcb *stcb; @@ -2095,7 +2058,7 @@ sctp_set_primary_ip_address(struct ifaddr *ifa) if (sctp_debug_on & SCTP_DEBUG_ASCONF1) { printf("set_primary_ip_address: queued on stcb=%p, ", stcb); - sctp_print_address(ifa->ifa_addr); + sctp_print_address(&ifa->address.sa); } #endif /* SCTP_DEBUG */ } @@ -2106,21 +2069,23 @@ sctp_set_primary_ip_address(struct ifaddr *ifa) static struct sockaddr * sctp_find_valid_localaddr(struct sctp_tcb *stcb) { - struct ifnet *ifn; - struct ifaddr *ifa; - - - TAILQ_FOREACH(ifn, &ifnet, if_list) { - if (stcb->asoc.loopback_scope == 0 && ifn->if_type == IFT_LOOP) { + struct sctp_vrf *vrf = NULL; + struct sctp_ifn *sctp_ifn; + struct sctp_ifa *sctp_ifa; + + vrf = sctp_find_vrf(stcb->asoc.vrf_id); + LIST_FOREACH(sctp_ifn, &vrf->ifnlist, next_ifn) { + if (stcb->asoc.loopback_scope == 0 && + SCTP_IFN_IS_IFT_LOOP(sctp_ifn)) { /* Skip if loopback_scope not set */ continue; } - TAILQ_FOREACH(ifa, &ifn->if_addrlist, ifa_list) { - if (ifa->ifa_addr->sa_family == AF_INET && + LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) { + if (sctp_ifa->address.sa.sa_family == AF_INET && stcb->asoc.ipv4_addr_legal) { struct sockaddr_in *sin; - sin = (struct sockaddr_in *)ifa->ifa_addr; + sin = (struct sockaddr_in *)&sctp_ifa->address.sa; if (sin->sin_addr.s_addr == 0) { /* skip unspecifed addresses */ continue; @@ -2129,23 +2094,18 @@ sctp_find_valid_localaddr(struct sctp_tcb *stcb) IN4_ISPRIVATE_ADDRESS(&sin->sin_addr)) continue; - if (sctp_is_addr_restricted(stcb, - ifa->ifa_addr)) + if (sctp_is_addr_restricted(stcb, sctp_ifa)) continue; /* found a valid local v4 address to use */ - return (ifa->ifa_addr); - } else if (ifa->ifa_addr->sa_family == AF_INET6 && + return (&sctp_ifa->address.sa); + } else if (sctp_ifa->address.sa.sa_family == AF_INET6 && stcb->asoc.ipv6_addr_legal) { struct sockaddr_in6 *sin6; - struct in6_ifaddr *ifa6; - ifa6 = (struct in6_ifaddr *)ifa; - if (IFA6_IS_DEPRECATED(ifa6) || - (ifa6->ia6_flags & (IN6_IFF_DETACHED | - IN6_IFF_ANYCAST | IN6_IFF_NOTREADY))) + if (sctp_ifa->localifa_flags & SCTP_ADDR_IFA_UNUSEABLE) { continue; - - sin6 = (struct sockaddr_in6 *)ifa->ifa_addr; + } + sin6 = (struct sockaddr_in6 *)&sctp_ifa->address.sa; if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { /* we skip unspecifed addresses */ continue; @@ -2158,7 +2118,7 @@ sctp_find_valid_localaddr(struct sctp_tcb *stcb) continue; /* found a valid local v6 address to use */ - return (ifa->ifa_addr); + return (&sctp_ifa->address.sa); } } } @@ -2175,15 +2135,15 @@ sctp_find_valid_localaddr_ep(struct sctp_tcb *stcb) if (laddr->ifa == NULL) { continue; } - if (laddr->ifa->ifa_addr == NULL) { + if (laddr->ifa == NULL) { continue; } /* is the address restricted ? */ - if (sctp_is_addr_restricted(stcb, laddr->ifa->ifa_addr)) + if (sctp_is_addr_restricted(stcb, laddr->ifa)) continue; /* found a valid local address to use */ - return (laddr->ifa->ifa_addr); + return (&laddr->ifa->address.sa); } /* no valid addresses found */ return (NULL); @@ -2384,11 +2344,12 @@ sctp_process_initack_addresses(struct sctp_tcb *stcb, struct mbuf *m, { struct sctp_paramhdr tmp_param, *ph; uint16_t plen, ptype; + struct sctp_ifa *sctp_ifa; struct sctp_ipv6addr_param addr_store; struct sockaddr_in6 sin6; struct sockaddr_in sin; struct sockaddr *sa; - struct ifaddr *ifa; + uint32_t vrf_id; #ifdef SCTP_DEBUG if (sctp_debug_on & SCTP_DEBUG_ASCONF2) { @@ -2438,7 +2399,9 @@ sctp_process_initack_addresses(struct sctp_tcb *stcb, struct mbuf *m, struct sctp_ipv4addr_param *a4p; /* get the entire IPv4 address param */ - a4p = (struct sctp_ipv4addr_param *)sctp_m_getptr(m, offset, sizeof(struct sctp_ipv4addr_param), (uint8_t *) & addr_store); + a4p = (struct sctp_ipv4addr_param *)sctp_m_getptr(m, offset, + sizeof(struct sctp_ipv4addr_param), + (uint8_t *) & addr_store); if (plen != sizeof(struct sctp_ipv4addr_param) || a4p == NULL) { return; @@ -2450,8 +2413,9 @@ sctp_process_initack_addresses(struct sctp_tcb *stcb, struct mbuf *m, } /* see if this address really (still) exists */ - ifa = sctp_find_ifa_by_addr(sa); - if (ifa == NULL) { + vrf_id = SCTP_DEFAULT_VRFID; + sctp_ifa = sctp_find_ifa_by_addr(sa, vrf_id, 0); + if (sctp_ifa == NULL) { /* address doesn't exist anymore */ int status; @@ -2474,25 +2438,7 @@ sctp_process_initack_addresses(struct sctp_tcb *stcb, struct mbuf *m, stcb->asoc.primary_destination); } } - } else { - /* address still exists */ - /* - * if subset bound, ep addr's managed by default if - * not doing ASCONF, add the address to the assoc - */ - if ((stcb->sctp_ep->sctp_flags & - SCTP_PCB_FLAGS_BOUNDALL) == 0 && - (sctp_is_feature_off(stcb->sctp_ep, - SCTP_PCB_FLAGS_DO_ASCONF))) { -#ifdef SCTP_DEBUG - if (sctp_debug_on & SCTP_DEBUG_ASCONF2) { - printf("process_initack_addrs: adding local addr to asoc\n"); - } -#endif /* SCTP_DEBUG */ - sctp_add_local_addr_assoc(stcb, ifa); - } } - next_addr: /* * Sanity check: Make sure the length isn't 0, otherwise @@ -2530,8 +2476,9 @@ sctp_addr_in_initack(struct sctp_tcb *stcb, struct mbuf *m, uint32_t offset, struct sctp_ipv4addr_param *a4p; #ifdef INET6 - struct sockaddr_in6 *sin6, sin6_tmp; + struct sockaddr_in6 *sin6; struct sctp_ipv6addr_param *a6p; + struct sockaddr_in6 sin6_tmp; #endif /* INET6 */ @@ -2642,7 +2589,7 @@ sctp_check_address_list_ep(struct sctp_tcb *stcb, struct mbuf *m, int offset, #endif /* SCTP_DEBUG */ continue; } - if (laddr->ifa->ifa_addr == NULL) { + if (laddr->ifa == NULL) { #ifdef SCTP_DEBUG if (sctp_debug_on & SCTP_DEBUG_ASCONF1) { printf("check_addr_list_ep: laddr->ifa->ifa_addr is NULL"); @@ -2651,12 +2598,12 @@ sctp_check_address_list_ep(struct sctp_tcb *stcb, struct mbuf *m, int offset, continue; } /* do i have it implicitly? */ - if (sctp_cmpaddr(laddr->ifa->ifa_addr, init_addr)) { + if (sctp_cmpaddr(&laddr->ifa->address.sa, init_addr)) { continue; } /* check to see if in the init-ack */ if (!sctp_addr_in_initack(stcb, m, offset, length, - laddr->ifa->ifa_addr)) { + &laddr->ifa->address.sa)) { /* try to add it */ sctp_addr_mgmt_assoc(stcb->sctp_ep, stcb, laddr->ifa, SCTP_ADD_IP_ADDRESS); @@ -2674,27 +2621,34 @@ sctp_check_address_list_all(struct sctp_tcb *stcb, struct mbuf *m, int offset, uint16_t local_scope, uint16_t site_scope, uint16_t ipv4_scope, uint16_t loopback_scope) { - struct ifnet *ifn; - struct ifaddr *ifa; - + struct sctp_vrf *vrf = NULL; + struct sctp_ifn *sctp_ifn; + struct sctp_ifa *sctp_ifa; + uint32_t vrf_id; + + vrf_id = SCTP_DEFAULT_VRFID; + vrf = sctp_find_vrf(vrf_id); + if (vrf == NULL) { + return; + } /* go through all our known interfaces */ - TAILQ_FOREACH(ifn, &ifnet, if_list) { - if (loopback_scope == 0 && ifn->if_type == IFT_LOOP) { + LIST_FOREACH(sctp_ifn, &vrf->ifnlist, next_ifn) { + if (loopback_scope == 0 && SCTP_IFN_IS_IFT_LOOP(sctp_ifn)) { /* skip loopback interface */ continue; } /* go through each interface address */ - TAILQ_FOREACH(ifa, &ifn->if_addrlist, ifa_list) { + LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) { /* do i have it implicitly? */ - if (sctp_cmpaddr(ifa->ifa_addr, init_addr)) { + if (sctp_cmpaddr(&sctp_ifa->address.sa, init_addr)) { continue; } /* check to see if in the init-ack */ if (!sctp_addr_in_initack(stcb, m, offset, length, - ifa->ifa_addr)) { + &sctp_ifa->address.sa)) { /* try to add it */ sctp_addr_mgmt_assoc(stcb->sctp_ep, stcb, - ifa, SCTP_ADD_IP_ADDRESS); + sctp_ifa, SCTP_ADD_IP_ADDRESS); } } /* end foreach ifa */ } /* end foreach ifn */ @@ -2737,71 +2691,65 @@ sctp_check_address_list(struct sctp_tcb *stcb, struct mbuf *m, int offset, * sctp_bindx() support */ uint32_t -sctp_addr_mgmt_ep_sa(struct sctp_inpcb *inp, struct sockaddr *sa, uint16_t type) +sctp_addr_mgmt_ep_sa(struct sctp_inpcb *inp, struct sockaddr *sa, uint32_t type, uint32_t vrf_id) { - struct ifaddr *ifa; + struct sctp_ifa *ifa; - - if (sa->sa_len == 0) + if (sa->sa_len == 0) { return (EINVAL); - - ifa = sctp_find_ifa_by_addr(sa); + } + if (type == SCTP_ADD_IP_ADDRESS) { + /* For an add the address MUST be on the system */ + ifa = sctp_find_ifa_by_addr(sa, vrf_id, 0); + } else if (type == SCTP_DEL_IP_ADDRESS) { + /* For a delete we need to find it in the inp */ + ifa = sctp_find_ifa_in_ep(inp, sa, 0); + } else { + ifa = NULL; + } if (ifa != NULL) { -#ifdef INET6 - if (ifa->ifa_addr->sa_family == AF_INET6) { - struct in6_ifaddr *ifa6; - - ifa6 = (struct in6_ifaddr *)ifa; - if (IFA6_IS_DEPRECATED(ifa6) || - (ifa6->ia6_flags & (IN6_IFF_DETACHED | - IN6_IFF_ANYCAST | IN6_IFF_NOTREADY))) { - /* Can't bind a non-existent addr. */ - return (EINVAL); + /* add this address */ + struct sctp_asconf_iterator *asc; + struct sctp_laddr *wi; + + SCTP_MALLOC(asc, struct sctp_asconf_iterator *, + sizeof(struct sctp_asconf_iterator), "SCTP_ASCONF_ITERATOR"); + if (asc == NULL) { + return (ENOMEM); + } + wi = SCTP_ZONE_GET(sctppcbinfo.ipi_zone_laddr, struct sctp_laddr); + if (wi == NULL) { + SCTP_FREE(asc); + return (ENOMEM); + } + if (type == SCTP_ADD_IP_ADDRESS) { + sctp_add_local_addr_ep(inp, ifa, type); + } else if (type == SCTP_DEL_IP_ADDRESS) { + struct sctp_laddr *laddr; + + LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) { + if (ifa == laddr->ifa) { + /* Mark in the delete */ + laddr->action = type; + } } } -#endif /* INET6 */ - /* add this address */ - sctp_addr_mgmt_ep(inp, ifa, type); + LIST_INIT(&asc->list_of_work); + asc->cnt = 1; + SCTP_INCR_LADDR_COUNT(); + wi->ifa = ifa; + wi->action = type; + atomic_add_int(&ifa->refcount, 1); + LIST_INSERT_HEAD(&asc->list_of_work, wi, sctp_nxt_addr); + sctp_initiate_iterator(sctp_iterator_ep, + sctp_iterator_stcb, + sctp_iterator_ep_end, + SCTP_PCB_ANY_FLAGS, + SCTP_PCB_ANY_FEATURES, SCTP_ASOC_ANY_STATE, (void *)asc, 0, + sctp_iterator_end, inp, 0); } else { /* invalid address! */ return (EADDRNOTAVAIL); } return (0); } - -void -sctp_addr_change(struct ifaddr *ifa, int cmd) -{ - struct sctp_laddr *wi; - - wi = SCTP_ZONE_GET(sctppcbinfo.ipi_zone_laddr, struct sctp_laddr); - if (wi == NULL) { - /* - * Gak, what can we do? We have lost an address change can - * you say HOSED? - */ -#ifdef SCTP_DEBUG - if (sctp_debug_on & SCTP_DEBUG_PCB1) { - printf("Lost and address change ???\n"); - } -#endif /* SCTP_DEBUG */ - return; - } - SCTP_INCR_LADDR_COUNT(); - bzero(wi, sizeof(*wi)); - wi->ifa = ifa; - IFAREF(ifa); - - wi->action = cmd; - SCTP_IPI_ADDR_LOCK(); - /* - * Should this really be a tailq? As it is we will process the - * newest first :-0 - */ - LIST_INSERT_HEAD(&sctppcbinfo.addr_wq, wi, sctp_nxt_addr); - sctp_timer_start(SCTP_TIMER_TYPE_ADDR_WQ, - (struct sctp_inpcb *)NULL, - (struct sctp_tcb *)NULL, - (struct sctp_nets *)NULL); - SCTP_IPI_ADDR_UNLOCK(); -} diff --git a/sys/netinet/sctp_asconf.h b/sys/netinet/sctp_asconf.h index 2bece63..22df5cf 100644 --- a/sys/netinet/sctp_asconf.h +++ b/sys/netinet/sctp_asconf.h @@ -55,19 +55,21 @@ sctp_handle_asconf_ack(struct mbuf *, int, extern uint32_t sctp_addr_mgmt_ep_sa(struct sctp_inpcb *, struct sockaddr *, - uint16_t); + uint32_t, uint32_t); -extern void sctp_add_ip_address(struct ifaddr *); -extern void sctp_delete_ip_address(struct ifaddr *); +int sctp_iterator_ep(struct sctp_inpcb *inp, void *ptr, uint32_t val); +void sctp_iterator_stcb(struct sctp_inpcb *inp, struct sctp_tcb *stcb, void *ptr, uint32_t type); +int sctp_iterator_ep_end(struct sctp_inpcb *inp, void *ptr, uint32_t val); +void sctp_iterator_end(void *ptr, uint32_t val); -extern void sctp_addr_change(struct ifaddr *ifa, int cmd); extern int32_t sctp_set_primary_ip_address_sa(struct sctp_tcb *, struct sockaddr *); -extern void sctp_set_primary_ip_address(struct ifaddr *); +extern void + sctp_set_primary_ip_address(struct sctp_ifa *ifa); extern void sctp_check_address_list(struct sctp_tcb *, struct mbuf *, int, int, diff --git a/sys/netinet/sctp_auth.c b/sys/netinet/sctp_auth.c index 86df162..81973b7 100644 --- a/sys/netinet/sctp_auth.c +++ b/sys/netinet/sctp_auth.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2001-2006, Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2001-2007, Cisco Systems, Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -36,14 +36,13 @@ __FBSDID("$FreeBSD$"); #include <netinet/sctp_header.h> #include <netinet/sctp_pcb.h> #include <netinet/sctp_var.h> +#include <netinet/sctp_sysctl.h> #include <netinet/sctputil.h> #include <netinet/sctp_indata.h> #include <netinet/sctp_output.h> #include <netinet/sctp_auth.h> #ifdef SCTP_DEBUG -extern uint32_t sctp_debug_on; - #define SCTP_AUTH_DEBUG (sctp_debug_on & SCTP_DEBUG_AUTH1) #define SCTP_AUTH_DEBUG2 (sctp_debug_on & SCTP_DEBUG_AUTH2) #endif /* SCTP_DEBUG */ @@ -1935,7 +1934,7 @@ sctp_initialize_auth_params(struct sctp_inpcb *inp, struct sctp_tcb *stcb) { uint16_t chunks_len = 0; uint16_t hmacs_len = 0; - uint16_t random_len = sctp_auth_random_len; + uint16_t random_len = SCTP_AUTH_RANDOM_SIZE_DEFAULT; sctp_key_t *new_key; uint16_t keylen; diff --git a/sys/netinet/sctp_auth.h b/sys/netinet/sctp_auth.h index 5d0f4da..f9a5488 100644 --- a/sys/netinet/sctp_auth.h +++ b/sys/netinet/sctp_auth.h @@ -98,17 +98,10 @@ typedef struct sctp_authinfo { } sctp_authinfo_t; -/* - * global variables - */ -extern uint32_t sctp_asconf_auth_nochk; /* sysctl to disable ASCONF auth chk */ -extern uint32_t sctp_auth_disable; /* sysctl for temp feature interop */ -extern uint32_t sctp_auth_random_len; /* sysctl */ /* * Macros */ - #define sctp_auth_is_required_chunk(chunk, list) ((list == NULL) ? (0) : (list->chunks[chunk] != 0)) /* diff --git a/sys/netinet/sctp_bsd_addr.c b/sys/netinet/sctp_bsd_addr.c index fb33fa8..4f83668 100644 --- a/sys/netinet/sctp_bsd_addr.c +++ b/sys/netinet/sctp_bsd_addr.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2001-2006, Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2001-2007, Cisco Systems, Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -45,1925 +45,306 @@ __FBSDID("$FreeBSD$"); #include <netinet/sctp_timer.h> #include <netinet/sctp_asconf.h> #include <netinet/sctp_indata.h> - - -/* XXX - * This module needs to be rewritten with an eye towards getting - * rid of the user of ifa.. and use another list method George - * as told me of. - */ +#include <sys/unistd.h> #ifdef SCTP_DEBUG extern uint32_t sctp_debug_on; #endif -static struct sockaddr_in * -sctp_is_v4_ifa_addr_prefered(struct ifaddr *ifa, uint8_t loopscope, uint8_t ipv4_scope, uint8_t * sin_loop, uint8_t * sin_local) -{ - struct sockaddr_in *sin; - /* - * Here we determine if its a prefered address. A prefered address - * means it is the same scope or higher scope then the destination. - * L = loopback, P = private, G = global - * ----------------------------------------- src | dest | - * result ----------------------------------------- L | L | - * yes ----------------------------------------- P | L | yes - * ----------------------------------------- G | L | yes - * ----------------------------------------- L | P | no - * ----------------------------------------- P | P | yes - * ----------------------------------------- G | P | no - * ----------------------------------------- L | G | no - * ----------------------------------------- P | G | no - * ----------------------------------------- G | G | yes - * ----------------------------------------- - */ - - if (ifa->ifa_addr->sa_family != AF_INET) { - /* forget non-v4 */ - return (NULL); - } - /* Ok the address may be ok */ - sin = (struct sockaddr_in *)ifa->ifa_addr; - if (sin->sin_addr.s_addr == 0) { - return (NULL); - } - *sin_local = *sin_loop = 0; - if ((ifa->ifa_ifp->if_type == IFT_LOOP) || - (IN4_ISLOOPBACK_ADDRESS(&sin->sin_addr))) { - *sin_loop = 1; - *sin_local = 1; - } - if ((IN4_ISPRIVATE_ADDRESS(&sin->sin_addr))) { - *sin_local = 1; - } - if (!loopscope && *sin_loop) { - /* Its a loopback address and we don't have loop scope */ - return (NULL); - } - if (!ipv4_scope && *sin_local) { - /* - * Its a private address, and we don't have private address - * scope - */ - return (NULL); - } - if (((ipv4_scope == 0) && (loopscope == 0)) && (*sin_local)) { - /* its a global src and a private dest */ - return (NULL); - } - /* its a prefered address */ - return (sin); -} - -static struct sockaddr_in * -sctp_is_v4_ifa_addr_acceptable(struct ifaddr *ifa, uint8_t loopscope, uint8_t ipv4_scope, uint8_t * sin_loop, uint8_t * sin_local) +#if defined(SCTP_USE_THREAD_BASED_ITERATOR) +void +sctp_wakeup_iterator(void) { - struct sockaddr_in *sin; - - /* - * Here we determine if its a acceptable address. A acceptable - * address means it is the same scope or higher scope but we can - * allow for NAT which means its ok to have a global dest and a - * private src. - * - * L = loopback, P = private, G = global - * ----------------------------------------- src | dest | - * result ----------------------------------------- L | L | - * yes ----------------------------------------- P | L | yes - * ----------------------------------------- G | L | yes - * ----------------------------------------- L | P | no - * ----------------------------------------- P | P | yes - * ----------------------------------------- G | P | yes - - * probably this won't work. - * ----------------------------------------- L | G | - * no ----------------------------------------- P | G | - * yes ----------------------------------------- G | G | - * yes ----------------------------------------- - */ - - if (ifa->ifa_addr->sa_family != AF_INET) { - /* forget non-v4 */ - return (NULL); - } - /* Ok the address may be ok */ - sin = (struct sockaddr_in *)ifa->ifa_addr; - if (sin->sin_addr.s_addr == 0) { - return (NULL); - } - *sin_local = *sin_loop = 0; - if ((ifa->ifa_ifp->if_type == IFT_LOOP) || - (IN4_ISLOOPBACK_ADDRESS(&sin->sin_addr))) { - *sin_loop = 1; - *sin_local = 1; - } - if ((IN4_ISPRIVATE_ADDRESS(&sin->sin_addr))) { - *sin_local = 1; - } - if (!loopscope && *sin_loop) { - /* Its a loopback address and we don't have loop scope */ - return (NULL); - } - /* its an acceptable address */ - return (sin); + wakeup(&sctppcbinfo.iterator_running); } -/* - * This treats the address list on the ep as a restricted list (negative - * list). If a the passed address is listed, then the address is NOT allowed - * on the association. - */ -int -sctp_is_addr_restricted(struct sctp_tcb *stcb, struct sockaddr *addr) +static void +sctp_iterator_thread(void *v) { - struct sctp_laddr *laddr; - -#ifdef SCTP_DEBUG - int cnt = 0; - -#endif - if (stcb == NULL) { - /* There are no restrictions, no TCB :-) */ - return (0); - } -#ifdef SCTP_DEBUG - LIST_FOREACH(laddr, &stcb->asoc.sctp_local_addr_list, sctp_nxt_addr) { - cnt++; - } - if (sctp_debug_on & SCTP_DEBUG_OUTPUT4) { - printf("There are %d addresses on the restricted list\n", cnt); + SCTP_IPI_ITERATOR_WQ_LOCK(); + sctppcbinfo.iterator_running = 0; + while (1) { + msleep(&sctppcbinfo.iterator_running, + &sctppcbinfo.ipi_iterator_wq_mtx, + 0, "waiting_for_work", 0); + sctp_iterator_worker(); } - cnt = 0; -#endif - LIST_FOREACH(laddr, &stcb->asoc.sctp_local_addr_list, sctp_nxt_addr) { - if (laddr->ifa == NULL) { -#ifdef SCTP_DEBUG - if (sctp_debug_on & SCTP_DEBUG_OUTPUT1) { - printf("Help I have fallen and I can't get up!\n"); - } -#endif - continue; - } -#ifdef SCTP_DEBUG - if (sctp_debug_on & SCTP_DEBUG_OUTPUT4) { - cnt++; - printf("Restricted address[%d]:", cnt); - sctp_print_address(laddr->ifa->ifa_addr); - } -#endif - if (sctp_cmpaddr(addr, laddr->ifa->ifa_addr) == 1) { - /* Yes it is on the list */ - return (1); - } - } - return (0); } -static int -sctp_is_addr_in_ep(struct sctp_inpcb *inp, struct ifaddr *ifa) +void +sctp_startup_iterator(void) { - struct sctp_laddr *laddr; - - if (ifa == NULL) - return (0); - LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) { - if (laddr->ifa == NULL) { -#ifdef SCTP_DEBUG - if (sctp_debug_on & SCTP_DEBUG_OUTPUT1) { - printf("Help I have fallen and I can't get up!\n"); - } -#endif - continue; - } - if (laddr->ifa->ifa_addr == NULL) - continue; - if (laddr->ifa == ifa) - /* same pointer */ - return (1); - if (laddr->ifa->ifa_addr->sa_family != ifa->ifa_addr->sa_family) { - /* skip non compatible address comparison */ - continue; - } - if (sctp_cmpaddr(ifa->ifa_addr, laddr->ifa->ifa_addr) == 1) { - /* Yes it is restricted */ - return (1); - } - } - return (0); + int ret; + + ret = kthread_create(sctp_iterator_thread, + (void *)NULL, + &sctppcbinfo.thread_proc, + RFPROC, + SCTP_KTHREAD_PAGES, + SCTP_KTRHEAD_NAME); } - - -static struct in_addr -sctp_choose_v4_boundspecific_inp(struct sctp_inpcb *inp, - struct route *ro, - uint8_t ipv4_scope, - uint8_t loopscope) -{ - struct in_addr ans; - struct sctp_laddr *laddr; - struct sockaddr_in *sin; - struct ifnet *ifn; - struct ifaddr *ifa; - uint8_t sin_loop, sin_local; - struct rtentry *rt; - - /* - * first question, is the ifn we will emit on in our list, if so, we - * want that one. - */ - rt = ro->ro_rt; - ifn = rt->rt_ifp; - if (ifn) { - /* is a prefered one on the interface we route out? */ - TAILQ_FOREACH(ifa, &ifn->if_addrlist, ifa_list) { - sin = sctp_is_v4_ifa_addr_prefered(ifa, loopscope, ipv4_scope, &sin_loop, &sin_local); - if (sin == NULL) - continue; - if (sctp_is_addr_in_ep(inp, ifa)) { - return (sin->sin_addr); - } - } - /* is an acceptable one on the interface we route out? */ - TAILQ_FOREACH(ifa, &ifn->if_addrlist, ifa_list) { - sin = sctp_is_v4_ifa_addr_acceptable(ifa, loopscope, ipv4_scope, &sin_loop, &sin_local); - if (sin == NULL) - continue; - if (sctp_is_addr_in_ep(inp, ifa)) { - return (sin->sin_addr); - } - } - } - /* ok, what about a prefered address in the inp */ - for (laddr = LIST_FIRST(&inp->sctp_addr_list); - laddr && (laddr != inp->next_addr_touse); - laddr = LIST_NEXT(laddr, sctp_nxt_addr)) { - if (laddr->ifa == NULL) { - /* address has been removed */ - continue; - } - sin = sctp_is_v4_ifa_addr_prefered(laddr->ifa, loopscope, ipv4_scope, &sin_loop, &sin_local); - if (sin == NULL) - continue; - return (sin->sin_addr); - - } - /* ok, what about an acceptable address in the inp */ - for (laddr = LIST_FIRST(&inp->sctp_addr_list); - laddr && (laddr != inp->next_addr_touse); - laddr = LIST_NEXT(laddr, sctp_nxt_addr)) { - if (laddr->ifa == NULL) { - /* address has been removed */ - continue; - } - sin = sctp_is_v4_ifa_addr_acceptable(laddr->ifa, loopscope, ipv4_scope, &sin_loop, &sin_local); - if (sin == NULL) - continue; - return (sin->sin_addr); - - } - - /* - * no address bound can be a source for the destination we are in - * trouble - */ -#ifdef SCTP_DEBUG - if (sctp_debug_on & SCTP_DEBUG_OUTPUT1) { - printf("Src address selection for EP, no acceptable src address found for address\n"); - } #endif - RTFREE(ro->ro_rt); - ro->ro_rt = NULL; - memset(&ans, 0, sizeof(ans)); - return (ans); -} - -static struct in_addr -sctp_choose_v4_boundspecific_stcb(struct sctp_inpcb *inp, - struct sctp_tcb *stcb, - struct sctp_nets *net, - struct route *ro, - uint8_t ipv4_scope, - uint8_t loopscope, - int non_asoc_addr_ok) +void +sctp_gather_internal_ifa_flags(struct sctp_ifa *ifa) { - /* - * Here we have two cases, bound all asconf allowed. bound all - * asconf not allowed. - * - */ - struct sctp_laddr *laddr, *starting_point; - struct in_addr ans; - struct ifnet *ifn; - struct ifaddr *ifa; - uint8_t sin_loop, sin_local, start_at_beginning = 0; - struct sockaddr_in *sin; - struct rtentry *rt; - - /* - * first question, is the ifn we will emit on in our list, if so, we - * want that one. - */ - rt = ro->ro_rt; - ifn = rt->rt_ifp; - - if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_DO_ASCONF)) { - /* - * Here we use the list of addresses on the endpoint. Then - * the addresses listed on the "restricted" list is just - * that, address that have not been added and can't be used - * (unless the non_asoc_addr_ok is set). - */ -#ifdef SCTP_DEBUG - if (sctp_debug_on & SCTP_DEBUG_OUTPUT1) { - printf("Have a STCB - asconf allowed, not bound all have a netgative list\n"); - } -#endif - /* - * first question, is the ifn we will emit on in our list, - * if so, we want that one. - */ - if (ifn) { - /* first try for an prefered address on the ep */ - TAILQ_FOREACH(ifa, &ifn->if_addrlist, ifa_list) { - if (sctp_is_addr_in_ep(inp, ifa)) { - sin = sctp_is_v4_ifa_addr_prefered(ifa, loopscope, ipv4_scope, &sin_loop, &sin_local); - if (sin == NULL) - continue; - if ((non_asoc_addr_ok == 0) && - (sctp_is_addr_restricted(stcb, (struct sockaddr *)sin))) { - /* on the no-no list */ - continue; - } - return (sin->sin_addr); - } - } - /* next try for an acceptable address on the ep */ - TAILQ_FOREACH(ifa, &ifn->if_addrlist, ifa_list) { - if (sctp_is_addr_in_ep(inp, ifa)) { - sin = sctp_is_v4_ifa_addr_acceptable(ifa, loopscope, ipv4_scope, &sin_loop, &sin_local); - if (sin == NULL) - continue; - if ((non_asoc_addr_ok == 0) && - (sctp_is_addr_restricted(stcb, (struct sockaddr *)sin))) { - /* on the no-no list */ - continue; - } - return (sin->sin_addr); - } - } - - } - /* - * if we can't find one like that then we must look at all - * addresses bound to pick one at first prefereable then - * secondly acceptable. - */ - starting_point = stcb->asoc.last_used_address; -sctpv4_from_the_top: - if (stcb->asoc.last_used_address == NULL) { - start_at_beginning = 1; - stcb->asoc.last_used_address = LIST_FIRST(&inp->sctp_addr_list); - } - /* search beginning with the last used address */ - for (laddr = stcb->asoc.last_used_address; laddr; - laddr = LIST_NEXT(laddr, sctp_nxt_addr)) { - if (laddr->ifa == NULL) { - /* address has been removed */ - continue; - } - sin = sctp_is_v4_ifa_addr_prefered(laddr->ifa, loopscope, ipv4_scope, &sin_loop, &sin_local); - if (sin == NULL) - continue; - if ((non_asoc_addr_ok == 0) && - (sctp_is_addr_restricted(stcb, (struct sockaddr *)sin))) { - /* on the no-no list */ - continue; - } - return (sin->sin_addr); + struct in6_ifaddr *ifa6; - } - if (start_at_beginning == 0) { - stcb->asoc.last_used_address = NULL; - goto sctpv4_from_the_top; - } - /* now try for any higher scope than the destination */ - stcb->asoc.last_used_address = starting_point; - start_at_beginning = 0; -sctpv4_from_the_top2: - if (stcb->asoc.last_used_address == NULL) { - start_at_beginning = 1; - stcb->asoc.last_used_address = LIST_FIRST(&inp->sctp_addr_list); - } - /* search beginning with the last used address */ - for (laddr = stcb->asoc.last_used_address; laddr; - laddr = LIST_NEXT(laddr, sctp_nxt_addr)) { - if (laddr->ifa == NULL) { - /* address has been removed */ - continue; - } - sin = sctp_is_v4_ifa_addr_acceptable(laddr->ifa, loopscope, ipv4_scope, &sin_loop, &sin_local); - if (sin == NULL) - continue; - if ((non_asoc_addr_ok == 0) && - (sctp_is_addr_restricted(stcb, (struct sockaddr *)sin))) { - /* on the no-no list */ - continue; - } - return (sin->sin_addr); - } - if (start_at_beginning == 0) { - stcb->asoc.last_used_address = NULL; - goto sctpv4_from_the_top2; + ifa6 = (struct in6_ifaddr *)ifa->ifa; + ifa->flags = ifa6->ia6_flags; + if (!ip6_use_deprecated) { + if (ifa->flags & + IN6_IFF_DEPRECATED) { + ifa->localifa_flags |= SCTP_ADDR_IFA_UNUSEABLE; + } else { + ifa->localifa_flags &= ~SCTP_ADDR_IFA_UNUSEABLE; } } else { - /* - * Here we have an address list on the association, thats - * the only valid source addresses that we can use. - */ -#ifdef SCTP_DEBUG - if (sctp_debug_on & SCTP_DEBUG_OUTPUT1) { - printf("Have a STCB - no asconf allowed, not bound all have a postive list\n"); - } -#endif - /* - * First look at all addresses for one that is on the - * interface we route out - */ - LIST_FOREACH(laddr, &stcb->asoc.sctp_local_addr_list, - sctp_nxt_addr) { - if (laddr->ifa == NULL) { - /* address has been removed */ - continue; - } - sin = sctp_is_v4_ifa_addr_prefered(laddr->ifa, loopscope, ipv4_scope, &sin_loop, &sin_local); - if (sin == NULL) - continue; - /* - * first question, is laddr->ifa an address - * associated with the emit interface - */ - if (ifn) { - TAILQ_FOREACH(ifa, &ifn->if_addrlist, ifa_list) { - if (laddr->ifa == ifa) { - sin = (struct sockaddr_in *)laddr->ifa->ifa_addr; - return (sin->sin_addr); - } - if (sctp_cmpaddr(ifa->ifa_addr, laddr->ifa->ifa_addr) == 1) { - sin = (struct sockaddr_in *)laddr->ifa->ifa_addr; - return (sin->sin_addr); - } - } - } - } - /* what about an acceptable one on the interface? */ - LIST_FOREACH(laddr, &stcb->asoc.sctp_local_addr_list, - sctp_nxt_addr) { - if (laddr->ifa == NULL) { - /* address has been removed */ - continue; - } - sin = sctp_is_v4_ifa_addr_acceptable(laddr->ifa, loopscope, ipv4_scope, &sin_loop, &sin_local); - if (sin == NULL) - continue; - /* - * first question, is laddr->ifa an address - * associated with the emit interface - */ - if (ifn) { - TAILQ_FOREACH(ifa, &ifn->if_addrlist, ifa_list) { - if (laddr->ifa == ifa) { - sin = (struct sockaddr_in *)laddr->ifa->ifa_addr; - return (sin->sin_addr); - } - if (sctp_cmpaddr(ifa->ifa_addr, laddr->ifa->ifa_addr) == 1) { - sin = (struct sockaddr_in *)laddr->ifa->ifa_addr; - return (sin->sin_addr); - } - } - } - } - /* ok, next one that is preferable in general */ - LIST_FOREACH(laddr, &stcb->asoc.sctp_local_addr_list, - sctp_nxt_addr) { - if (laddr->ifa == NULL) { - /* address has been removed */ - continue; - } - sin = sctp_is_v4_ifa_addr_prefered(laddr->ifa, loopscope, ipv4_scope, &sin_loop, &sin_local); - if (sin == NULL) - continue; - return (sin->sin_addr); - } - - /* last, what about one that is acceptable */ - LIST_FOREACH(laddr, &stcb->asoc.sctp_local_addr_list, - sctp_nxt_addr) { - if (laddr->ifa == NULL) { - /* address has been removed */ - continue; - } - sin = sctp_is_v4_ifa_addr_acceptable(laddr->ifa, loopscope, ipv4_scope, &sin_loop, &sin_local); - if (sin == NULL) - continue; - return (sin->sin_addr); - } - } - RTFREE(ro->ro_rt); - ro->ro_rt = NULL; - memset(&ans, 0, sizeof(ans)); - return (ans); -} - -static struct sockaddr_in * -sctp_select_v4_nth_prefered_addr_from_ifn_boundall(struct ifnet *ifn, struct sctp_tcb *stcb, int non_asoc_addr_ok, - uint8_t loopscope, uint8_t ipv4_scope, int cur_addr_num) -{ - struct ifaddr *ifa; - struct sockaddr_in *sin; - uint8_t sin_loop, sin_local; - int num_eligible_addr = 0; - - TAILQ_FOREACH(ifa, &ifn->if_addrlist, ifa_list) { - sin = sctp_is_v4_ifa_addr_prefered(ifa, loopscope, ipv4_scope, &sin_loop, &sin_local); - if (sin == NULL) - continue; - if (stcb) { - if ((non_asoc_addr_ok == 0) && sctp_is_addr_restricted(stcb, (struct sockaddr *)sin)) { - /* - * It is restricted for some reason.. - * probably not yet added. - */ - continue; - } - } - if (cur_addr_num == num_eligible_addr) { - return (sin); - } - } - return (NULL); -} - - -static int -sctp_count_v4_num_prefered_boundall(struct ifnet *ifn, struct sctp_tcb *stcb, int non_asoc_addr_ok, - uint8_t loopscope, uint8_t ipv4_scope, uint8_t * sin_loop, uint8_t * sin_local) -{ - struct ifaddr *ifa; - struct sockaddr_in *sin; - int num_eligible_addr = 0; - - TAILQ_FOREACH(ifa, &ifn->if_addrlist, ifa_list) { - sin = sctp_is_v4_ifa_addr_prefered(ifa, loopscope, ipv4_scope, sin_loop, sin_local); - if (sin == NULL) - continue; - if (stcb) { - if ((non_asoc_addr_ok == 0) && sctp_is_addr_restricted(stcb, (struct sockaddr *)sin)) { - /* - * It is restricted for some reason.. - * probably not yet added. - */ - continue; - } - } - num_eligible_addr++; - } - return (num_eligible_addr); - -} - -static struct in_addr -sctp_choose_v4_boundall(struct sctp_inpcb *inp, - struct sctp_tcb *stcb, - struct sctp_nets *net, - struct route *ro, - uint8_t ipv4_scope, - uint8_t loopscope, - int non_asoc_addr_ok) -{ - int cur_addr_num = 0, num_prefered = 0; - uint8_t sin_loop, sin_local; - struct ifnet *ifn; - struct sockaddr_in *sin; - struct in_addr ans; - struct ifaddr *ifa; - struct rtentry *rt; - - /* - * For v4 we can use (in boundall) any address in the association. - * If non_asoc_addr_ok is set we can use any address (at least in - * theory). So we look for prefered addresses first. If we find one, - * we use it. Otherwise we next try to get an address on the - * interface, which we should be able to do (unless non_asoc_addr_ok - * is false and we are routed out that way). In these cases where we - * can't use the address of the interface we go through all the - * ifn's looking for an address we can use and fill that in. Punting - * means we send back address 0, which will probably cause problems - * actually since then IP will fill in the address of the route ifn, - * which means we probably already rejected it.. i.e. here comes an - * abort :-<. - */ - rt = ro->ro_rt; - ifn = rt->rt_ifp; - if (net) { - cur_addr_num = net->indx_of_eligible_next_to_use; + ifa->localifa_flags &= ~SCTP_ADDR_IFA_UNUSEABLE; } - if (ifn == NULL) { - goto bound_all_v4_plan_c; - } - num_prefered = sctp_count_v4_num_prefered_boundall(ifn, stcb, non_asoc_addr_ok, loopscope, ipv4_scope, &sin_loop, &sin_local); -#ifdef SCTP_DEBUG - if (sctp_debug_on & SCTP_DEBUG_OUTPUT1) { - printf("Found %d prefered source addresses\n", num_prefered); - } -#endif - if (num_prefered == 0) { - /* - * no eligible addresses, we must use some other interface - * address if we can find one. - */ - goto bound_all_v4_plan_b; - } - /* - * Ok we have num_eligible_addr set with how many we can use, this - * may vary from call to call due to addresses being deprecated - * etc.. - */ - if (cur_addr_num >= num_prefered) { - cur_addr_num = 0; - } - /* - * select the nth address from the list (where cur_addr_num is the - * nth) and 0 is the first one, 1 is the second one etc... - */ -#ifdef SCTP_DEBUG - if (sctp_debug_on & SCTP_DEBUG_OUTPUT1) { - printf("cur_addr_num:%d\n", cur_addr_num); - } -#endif - sin = sctp_select_v4_nth_prefered_addr_from_ifn_boundall(ifn, stcb, non_asoc_addr_ok, loopscope, - ipv4_scope, cur_addr_num); - - /* if sin is NULL something changed??, plan_a now */ - if (sin) { - return (sin->sin_addr); - } - /* - * plan_b: Look at the interface that we emit on and see if we can - * find an acceptable address. - */ -bound_all_v4_plan_b: - TAILQ_FOREACH(ifa, &ifn->if_addrlist, ifa_list) { - sin = sctp_is_v4_ifa_addr_acceptable(ifa, loopscope, ipv4_scope, &sin_loop, &sin_local); - if (sin == NULL) - continue; - if (stcb) { - if ((non_asoc_addr_ok == 0) && sctp_is_addr_restricted(stcb, (struct sockaddr *)sin)) { - /* - * It is restricted for some reason.. - * probably not yet added. - */ - continue; - } - } - return (sin->sin_addr); - } - /* - * plan_c: Look at all interfaces and find a prefered address. If we - * reache here we are in trouble I think. - */ -bound_all_v4_plan_c: - for (ifn = TAILQ_FIRST(&ifnet); - ifn && (ifn != inp->next_ifn_touse); - ifn = TAILQ_NEXT(ifn, if_list)) { - if (loopscope == 0 && ifn->if_type == IFT_LOOP) { - /* wrong base scope */ - continue; - } - if (ifn == rt->rt_ifp) - /* already looked at this guy */ - continue; - num_prefered = sctp_count_v4_num_prefered_boundall(ifn, stcb, non_asoc_addr_ok, - loopscope, ipv4_scope, &sin_loop, &sin_local); -#ifdef SCTP_DEBUG - if (sctp_debug_on & SCTP_DEBUG_OUTPUT1) { - printf("Found ifn:%p %d prefered source addresses\n", ifn, num_prefered); - } -#endif - if (num_prefered == 0) { - /* - * None on this interface. - */ - continue; - } - /* - * Ok we have num_eligible_addr set with how many we can - * use, this may vary from call to call due to addresses - * being deprecated etc.. - */ - if (cur_addr_num >= num_prefered) { - cur_addr_num = 0; - } - sin = sctp_select_v4_nth_prefered_addr_from_ifn_boundall(ifn, stcb, non_asoc_addr_ok, loopscope, - ipv4_scope, cur_addr_num); - if (sin == NULL) - continue; - return (sin->sin_addr); - - } - - /* - * plan_d: We are in deep trouble. No prefered address on any - * interface. And the emit interface does not even have an - * acceptable address. Take anything we can get! If this does not - * work we are probably going to emit a packet that will illicit an - * ABORT, falling through. - */ - - for (ifn = TAILQ_FIRST(&ifnet); - ifn && (ifn != inp->next_ifn_touse); - ifn = TAILQ_NEXT(ifn, if_list)) { - if (loopscope == 0 && ifn->if_type == IFT_LOOP) { - /* wrong base scope */ - continue; - } - if (ifn == rt->rt_ifp) - /* already looked at this guy */ - continue; - - TAILQ_FOREACH(ifa, &ifn->if_addrlist, ifa_list) { - sin = sctp_is_v4_ifa_addr_acceptable(ifa, loopscope, ipv4_scope, &sin_loop, &sin_local); - if (sin == NULL) - continue; - if (stcb) { - if ((non_asoc_addr_ok == 0) && sctp_is_addr_restricted(stcb, (struct sockaddr *)sin)) { - /* - * It is restricted for some - * reason.. probably not yet added. - */ - continue; - } - } - return (sin->sin_addr); - } - } - /* - * Ok we can find NO address to source from that is not on our - * negative list. It is either the special ASCONF case where we are - * sourceing from a intf that has been ifconfig'd to a different - * address (i.e. it holds a ADD/DEL/SET-PRIM and the proper lookup - * address. OR we are hosed, and this baby is going to abort the - * association. - */ - if (non_asoc_addr_ok) { - return (((struct sockaddr_in *)(rt->rt_ifa->ifa_addr))->sin_addr); + if (ifa->flags & + (IN6_IFF_DETACHED | + IN6_IFF_ANYCAST | + IN6_IFF_NOTREADY)) { + ifa->localifa_flags |= SCTP_ADDR_IFA_UNUSEABLE; } else { - RTFREE(ro->ro_rt); - ro->ro_rt = NULL; - memset(&ans, 0, sizeof(ans)); - return (ans); + ifa->localifa_flags &= ~SCTP_ADDR_IFA_UNUSEABLE; } } -/* tcb may be NULL */ -struct in_addr -sctp_ipv4_source_address_selection(struct sctp_inpcb *inp, - struct sctp_tcb *stcb, struct route *ro, struct sctp_nets *net, - int non_asoc_addr_ok) +static uint32_t +sctp_is_desired_interface_type(struct ifaddr *ifa) { - struct in_addr ans; - struct sockaddr_in *to = (struct sockaddr_in *)&ro->ro_dst; - uint8_t ipv4_scope, loopscope; - - /* - * Rules: - Find the route if needed, cache if I can. - Look at - * interface address in route, Is it in the bound list. If so we - * have the best source. - If not we must rotate amongst the - * addresses. - * - * Cavets and issues - * - * Do we need to pay attention to scope. We can have a private address - * or a global address we are sourcing or sending to. So if we draw - * it out source * dest * result - * ------------------------------------------ a Private * - * Global * NAT? ------------------------------------------ b - * Private * Private * No problem - * ------------------------------------------ c Global * - * Private * Huh, How will this work? - * ------------------------------------------ d Global * - * Global * No Problem ------------------------------------------ - * - * And then we add to that what happens if there are multiple addresses - * assigned to an interface. Remember the ifa on a ifn is a linked - * list of addresses. So one interface can have more than one IPv4 - * address. What happens if we have both a private and a global - * address? Do we then use context of destination to sort out which - * one is best? And what about NAT's sending P->G may get you a NAT - * translation, or should you select the G thats on the interface in - * preference. - * - * Decisions: - * - * - count the number of addresses on the interface. - if its one, no - * problem except case <c>. For <a> we will assume a NAT out there. - * - if there are more than one, then we need to worry about scope P - * or G. We should prefer G -> G and P -> P if possible. Then as a - * secondary fall back to mixed types G->P being a last ditch one. - - * The above all works for bound all, but bound specific we need to - * use the same concept but instead only consider the bound - * addresses. If the bound set is NOT assigned to the interface then - * we must use rotation amongst them. - * - * Notes: For v4, we can always punt and let ip_output decide by - * sending back a source of 0.0.0.0 - */ - - if (ro->ro_rt == NULL) { - /* - * Need a route to cache. - * - */ - rtalloc_ign(ro, 0UL); - } - if (ro->ro_rt == NULL) { - /* No route to host .. punt */ - memset(&ans, 0, sizeof(ans)); - return (ans); - } - /* Setup our scopes */ - if (stcb) { - ipv4_scope = stcb->asoc.ipv4_local_scope; - loopscope = stcb->asoc.loopback_scope; - } else { - /* Scope based on outbound address */ - if ((IN4_ISPRIVATE_ADDRESS(&to->sin_addr))) { - ipv4_scope = 1; - loopscope = 0; - } else if (IN4_ISLOOPBACK_ADDRESS(&to->sin_addr)) { - ipv4_scope = 1; - loopscope = 1; - } else { - ipv4_scope = 0; - loopscope = 0; - } - } - if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) { - /* - * When bound to all if the address list is set it is a - * negative list. Addresses being added by asconf. - */ - return (sctp_choose_v4_boundall(inp, stcb, net, ro, - ipv4_scope, loopscope, non_asoc_addr_ok)); - } - /* - * Three possiblities here: - * - * a) stcb is NULL, which means we operate only from the list of - * addresses (ifa's) bound to the assoc and we care not about the - * list. b) stcb is NOT-NULL, which means we have an assoc structure - * and auto-asconf is on. This means that the list of addresses is a - * NOT list. We use the list from the inp, but any listed address in - * our list is NOT yet added. However if the non_asoc_addr_ok is set - * we CAN use an address NOT available (i.e. being added). Its a - * negative list. c) stcb is NOT-NULL, which means we have an assoc - * structure and auto-asconf is off. This means that the list of - * addresses is the ONLY addresses I can use.. its positive. - * - * Note we collapse b & c into the same function just like in the v6 - * address selection. - */ - if (stcb) { - return (sctp_choose_v4_boundspecific_stcb(inp, stcb, net, - ro, ipv4_scope, loopscope, non_asoc_addr_ok)); - } else { - return (sctp_choose_v4_boundspecific_inp(inp, ro, - ipv4_scope, loopscope)); - } - /* this should not be reached */ - memset(&ans, 0, sizeof(ans)); - return (ans); + int result; + + /* check the interface type to see if it's one we care about */ + switch (ifa->ifa_ifp->if_type) { + case IFT_ETHER: + case IFT_ISO88023: + case IFT_ISO88024: + case IFT_ISO88025: + case IFT_ISO88026: + case IFT_STARLAN: + case IFT_P10: + case IFT_P80: + case IFT_HY: + case IFT_FDDI: + case IFT_XETHER: + case IFT_ISDNBASIC: + case IFT_ISDNPRIMARY: + case IFT_PTPSERIAL: + case IFT_PPP: + case IFT_LOOP: + case IFT_SLIP: + case IFT_IP: + case IFT_IPOVERCDLC: + case IFT_IPOVERCLAW: + case IFT_VIRTUALIPADDRESS: + result = 1; + break; + default: + result = 0; + } + + return (result); } - - -static struct sockaddr_in6 * -sctp_is_v6_ifa_addr_acceptable(struct ifaddr *ifa, int loopscope, int loc_scope, int *sin_loop, int *sin_local) -{ - struct in6_ifaddr *ifa6; - struct sockaddr_in6 *sin6; - - - if (ifa->ifa_addr->sa_family != AF_INET6) { - /* forget non-v6 */ - return (NULL); - } - ifa6 = (struct in6_ifaddr *)ifa; - /* ok to use deprecated addresses? */ - if (!ip6_use_deprecated) { - if (IFA6_IS_DEPRECATED(ifa6)) { - /* can't use this type */ - return (NULL); - } - } - /* are we ok, with the current state of this address? */ - if (ifa6->ia6_flags & - (IN6_IFF_DETACHED | IN6_IFF_NOTREADY | IN6_IFF_ANYCAST)) { - /* Can't use these types */ - return (NULL); - } - /* Ok the address may be ok */ - sin6 = (struct sockaddr_in6 *)ifa->ifa_addr; - *sin_local = *sin_loop = 0; - if ((ifa->ifa_ifp->if_type == IFT_LOOP) || - (IN6_IS_ADDR_LOOPBACK(&sin6->sin6_addr))) { - *sin_loop = 1; - } - if (!loopscope && *sin_loop) { - /* Its a loopback address and we don't have loop scope */ - return (NULL); - } - if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { - /* we skip unspecifed addresses */ - return (NULL); - } - if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) { - *sin_local = 1; - } - if (!loc_scope && *sin_local) { - /* - * Its a link local address, and we don't have link local - * scope - */ - return (NULL); - } - return (sin6); -} - - -static struct sockaddr_in6 * -sctp_choose_v6_boundspecific_stcb(struct sctp_inpcb *inp, - struct sctp_tcb *stcb, - struct sctp_nets *net, - struct route *ro, - uint8_t loc_scope, - uint8_t loopscope, - int non_asoc_addr_ok) +static void +sctp_init_ifns_for_vrf(int vrfid) { /* - * Each endpoint has a list of local addresses associated with it. - * The address list is either a "negative list" i.e. those addresses - * that are NOT allowed to be used as a source OR a "postive list" - * i.e. those addresses that CAN be used. - * - * Its a negative list if asconf is allowed. What we do in this case is - * use the ep address list BUT we have to cross check it against the - * negative list. - * - * In the case where NO asconf is allowed, we have just a straight - * association level list that we must use to find a source address. + * Here we must apply ANY locks needed by the IFN we access and also + * make sure we lock any IFA that exists as we float through the + * list of IFA's */ - struct sctp_laddr *laddr, *starting_point; - struct sockaddr_in6 *sin6; - int sin_loop, sin_local; - int start_at_beginning = 0; struct ifnet *ifn; struct ifaddr *ifa; - struct rtentry *rt; - - rt = ro->ro_rt; - ifn = rt->rt_ifp; - if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_DO_ASCONF)) { -#ifdef SCTP_DEBUG - if (sctp_debug_on & SCTP_DEBUG_OUTPUT1) { - printf("Have a STCB - asconf allowed, not bound all have a netgative list\n"); - } -#endif - /* - * first question, is the ifn we will emit on in our list, - * if so, we want that one. - */ - if (ifn) { - TAILQ_FOREACH(ifa, &ifn->if_addrlist, ifa_list) { - if (sctp_is_addr_in_ep(inp, ifa)) { - sin6 = sctp_is_v6_ifa_addr_acceptable(ifa, loopscope, loc_scope, &sin_loop, &sin_local); - if (sin6 == NULL) - continue; - if ((non_asoc_addr_ok == 0) && - (sctp_is_addr_restricted(stcb, (struct sockaddr *)sin6))) { - /* on the no-no list */ - continue; - } - return (sin6); - } - } - } - starting_point = stcb->asoc.last_used_address; - /* First try for matching scope */ -sctp_from_the_top: - if (stcb->asoc.last_used_address == NULL) { - start_at_beginning = 1; - stcb->asoc.last_used_address = LIST_FIRST(&inp->sctp_addr_list); - } - /* search beginning with the last used address */ - for (laddr = stcb->asoc.last_used_address; laddr; - laddr = LIST_NEXT(laddr, sctp_nxt_addr)) { - if (laddr->ifa == NULL) { - /* address has been removed */ - continue; - } - sin6 = sctp_is_v6_ifa_addr_acceptable(laddr->ifa, loopscope, loc_scope, &sin_loop, &sin_local); - if (sin6 == NULL) - continue; - if ((non_asoc_addr_ok == 0) && (sctp_is_addr_restricted(stcb, (struct sockaddr *)sin6))) { - /* on the no-no list */ - continue; - } - /* is it of matching scope ? */ - if ((loopscope == 0) && - (loc_scope == 0) && - (sin_loop == 0) && - (sin_local == 0)) { - /* all of global scope we are ok with it */ - return (sin6); - } - if (loopscope && sin_loop) - /* both on the loopback, thats ok */ - return (sin6); - if (loc_scope && sin_local) - /* both local scope */ - return (sin6); + struct in6_ifaddr *ifa6; + struct sctp_ifa *sctp_ifa; + uint32_t ifa_flags; - } - if (start_at_beginning == 0) { - stcb->asoc.last_used_address = NULL; - goto sctp_from_the_top; - } - /* now try for any higher scope than the destination */ - stcb->asoc.last_used_address = starting_point; - start_at_beginning = 0; -sctp_from_the_top2: - if (stcb->asoc.last_used_address == NULL) { - start_at_beginning = 1; - stcb->asoc.last_used_address = LIST_FIRST(&inp->sctp_addr_list); - } - /* search beginning with the last used address */ - for (laddr = stcb->asoc.last_used_address; laddr; - laddr = LIST_NEXT(laddr, sctp_nxt_addr)) { - if (laddr->ifa == NULL) { - /* address has been removed */ - continue; - } - sin6 = sctp_is_v6_ifa_addr_acceptable(laddr->ifa, loopscope, loc_scope, &sin_loop, &sin_local); - if (sin6 == NULL) - continue; - if ((non_asoc_addr_ok == 0) && (sctp_is_addr_restricted(stcb, (struct sockaddr *)sin6))) { - /* on the no-no list */ + TAILQ_FOREACH(ifn, &ifnet, if_list) { + TAILQ_FOREACH(ifa, &ifn->if_addrlist, ifa_list) { + if (ifa->ifa_addr == NULL) { continue; } - return (sin6); - } - if (start_at_beginning == 0) { - stcb->asoc.last_used_address = NULL; - goto sctp_from_the_top2; - } - } else { -#ifdef SCTP_DEBUG - if (sctp_debug_on & SCTP_DEBUG_OUTPUT1) { - printf("Have a STCB - no asconf allowed, not bound all have a postive list\n"); - } -#endif - /* First try for interface output match */ - LIST_FOREACH(laddr, &stcb->asoc.sctp_local_addr_list, - sctp_nxt_addr) { - if (laddr->ifa == NULL) { - /* address has been removed */ + if ((ifa->ifa_addr->sa_family != AF_INET) && + (ifa->ifa_addr->sa_family != AF_INET6) + ) { + /* non inet/inet6 skip */ continue; } - sin6 = sctp_is_v6_ifa_addr_acceptable(laddr->ifa, loopscope, loc_scope, &sin_loop, &sin_local); - if (sin6 == NULL) - continue; - /* - * first question, is laddr->ifa an address - * associated with the emit interface - */ - if (ifn) { - TAILQ_FOREACH(ifa, &ifn->if_addrlist, ifa_list) { - if (laddr->ifa == ifa) { - sin6 = (struct sockaddr_in6 *)laddr->ifa->ifa_addr; - return (sin6); - } - if (sctp_cmpaddr(ifa->ifa_addr, laddr->ifa->ifa_addr) == 1) { - sin6 = (struct sockaddr_in6 *)laddr->ifa->ifa_addr; - return (sin6); - } + if (ifa->ifa_addr->sa_family == AF_INET6) { + ifa6 = (struct in6_ifaddr *)ifa; + ifa_flags = ifa6->ia6_flags; + if (IN6_IS_ADDR_UNSPECIFIED(&((struct sockaddr_in6 *)ifa->ifa_addr)->sin6_addr)) { + /* skip unspecifed addresses */ + continue; + } + } else if (ifa->ifa_addr->sa_family == AF_INET) { + if (((struct sockaddr_in *)ifa->ifa_addr)->sin_addr.s_addr == 0) { + continue; } } - } - /* Next try for matching scope */ - LIST_FOREACH(laddr, &stcb->asoc.sctp_local_addr_list, - sctp_nxt_addr) { - if (laddr->ifa == NULL) { - /* address has been removed */ - continue; - } - sin6 = sctp_is_v6_ifa_addr_acceptable(laddr->ifa, loopscope, loc_scope, &sin_loop, &sin_local); - if (sin6 == NULL) + if (sctp_is_desired_interface_type(ifa) == 0) { + /* non desired type */ continue; - - if ((loopscope == 0) && - (loc_scope == 0) && - (sin_loop == 0) && - (sin_local == 0)) { - /* all of global scope we are ok with it */ - return (sin6); } - if (loopscope && sin_loop) - /* both on the loopback, thats ok */ - return (sin6); - if (loc_scope && sin_local) - /* both local scope */ - return (sin6); - } - /* ok, now try for a higher scope in the source address */ - /* First try for matching scope */ - LIST_FOREACH(laddr, &stcb->asoc.sctp_local_addr_list, - sctp_nxt_addr) { - if (laddr->ifa == NULL) { - /* address has been removed */ - continue; + if ((ifa->ifa_addr->sa_family == AF_INET6) || + (ifa->ifa_addr->sa_family == AF_INET)) { + if (ifa->ifa_addr->sa_family == AF_INET6) { + ifa6 = (struct in6_ifaddr *)ifa; + ifa_flags = ifa6->ia6_flags; + } else { + ifa_flags = 0; + } + sctp_ifa = sctp_add_addr_to_vrf(vrfid, + (void *)ifn, + ifn->if_index, + ifn->if_type, + ifn->if_xname, + (void *)ifa, + ifa->ifa_addr, + ifa_flags + ); + if (sctp_ifa) { + sctp_ifa->localifa_flags &= ~SCTP_ADDR_DEFER_USE; + } } - sin6 = sctp_is_v6_ifa_addr_acceptable(laddr->ifa, loopscope, loc_scope, &sin_loop, &sin_local); - if (sin6 == NULL) - continue; - return (sin6); } } - RTFREE(ro->ro_rt); - ro->ro_rt = NULL; - return (NULL); } -static struct sockaddr_in6 * -sctp_choose_v6_boundspecific_inp(struct sctp_inpcb *inp, - struct route *ro, - uint8_t loc_scope, - uint8_t loopscope) -{ - /* - * Here we are bound specific and have only an inp. We must find an - * address that is bound that we can give out as a src address. We - * prefer two addresses of same scope if we can find them that way. - */ - struct sctp_laddr *laddr; - struct sockaddr_in6 *sin6; - struct ifnet *ifn; - struct ifaddr *ifa; - int sin_loop, sin_local; - struct rtentry *rt; - - /* - * first question, is the ifn we will emit on in our list, if so, we - * want that one. - */ - rt = ro->ro_rt; - ifn = rt->rt_ifp; - if (ifn) { - TAILQ_FOREACH(ifa, &ifn->if_addrlist, ifa_list) { - sin6 = sctp_is_v6_ifa_addr_acceptable(ifa, loopscope, loc_scope, &sin_loop, &sin_local); - if (sin6 == NULL) - continue; - if (sctp_is_addr_in_ep(inp, ifa)) { - return (sin6); - } - } - } - for (laddr = LIST_FIRST(&inp->sctp_addr_list); - laddr && (laddr != inp->next_addr_touse); - laddr = LIST_NEXT(laddr, sctp_nxt_addr)) { - if (laddr->ifa == NULL) { - /* address has been removed */ - continue; - } - sin6 = sctp_is_v6_ifa_addr_acceptable(laddr->ifa, loopscope, loc_scope, &sin_loop, &sin_local); - if (sin6 == NULL) - continue; +void +sctp_init_vrf_list(int vrfid) +{ + if (vrfid > SCTP_MAX_VRF_ID) + /* can't do that */ + return; - if ((loopscope == 0) && - (loc_scope == 0) && - (sin_loop == 0) && - (sin_local == 0)) { - /* all of global scope we are ok with it */ - return (sin6); - } - if (loopscope && sin_loop) - /* both on the loopback, thats ok */ - return (sin6); - if (loc_scope && sin_local) - /* both local scope */ - return (sin6); + /* Don't care about return here */ + (void)sctp_allocate_vrf(vrfid); - } /* - * if we reach here, we could not find two addresses of the same - * scope to give out. Lets look for any higher level scope for a - * source address. + * Now we need to build all the ifn's for this vrf and there + * addresses */ - for (laddr = LIST_FIRST(&inp->sctp_addr_list); - laddr && (laddr != inp->next_addr_touse); - laddr = LIST_NEXT(laddr, sctp_nxt_addr)) { - if (laddr->ifa == NULL) { - /* address has been removed */ - continue; - } - sin6 = sctp_is_v6_ifa_addr_acceptable(laddr->ifa, loopscope, loc_scope, &sin_loop, &sin_local); - if (sin6 == NULL) - continue; - return (sin6); - } - /* no address bound can be a source for the destination */ -#ifdef SCTP_DEBUG - if (sctp_debug_on & SCTP_DEBUG_OUTPUT1) { - printf("Src address selection for EP, no acceptable src address found for address\n"); - } -#endif - RTFREE(ro->ro_rt); - ro->ro_rt = NULL; - return (NULL); + sctp_init_ifns_for_vrf(vrfid); } +static uint8_t first_time = 0; -static struct sockaddr_in6 * -sctp_select_v6_nth_addr_from_ifn_boundall(struct ifnet *ifn, struct sctp_tcb *stcb, int non_asoc_addr_ok, uint8_t loopscope, - uint8_t loc_scope, int cur_addr_num, int match_scope) -{ - struct ifaddr *ifa; - struct sockaddr_in6 *sin6; - int sin_loop, sin_local; - int num_eligible_addr = 0; - TAILQ_FOREACH(ifa, &ifn->if_addrlist, ifa_list) { - sin6 = sctp_is_v6_ifa_addr_acceptable(ifa, loopscope, loc_scope, &sin_loop, &sin_local); - if (sin6 == NULL) - continue; - if (stcb) { - if ((non_asoc_addr_ok == 0) && sctp_is_addr_restricted(stcb, (struct sockaddr *)sin6)) { - /* - * It is restricted for some reason.. - * probably not yet added. - */ - continue; - } - } - if (match_scope) { - /* Here we are asked to match scope if possible */ - if (loopscope && sin_loop) - /* src and destination are loopback scope */ - return (sin6); - if (loc_scope && sin_local) - /* src and destination are local scope */ - return (sin6); - if ((loopscope == 0) && - (loc_scope == 0) && - (sin_loop == 0) && - (sin_local == 0)) { - /* src and destination are global scope */ - return (sin6); - } - continue; - } - if (num_eligible_addr == cur_addr_num) { - /* this is it */ - return (sin6); - } - num_eligible_addr++; - } - return (NULL); -} - - -static int -sctp_count_v6_num_eligible_boundall(struct ifnet *ifn, struct sctp_tcb *stcb, - int non_asoc_addr_ok, uint8_t loopscope, uint8_t loc_scope) -{ - struct ifaddr *ifa; - struct sockaddr_in6 *sin6; - int num_eligible_addr = 0; - int sin_loop, sin_local; - - TAILQ_FOREACH(ifa, &ifn->if_addrlist, ifa_list) { - sin6 = sctp_is_v6_ifa_addr_acceptable(ifa, loopscope, loc_scope, &sin_loop, &sin_local); - if (sin6 == NULL) - continue; - if (stcb) { - if ((non_asoc_addr_ok == 0) && sctp_is_addr_restricted(stcb, (struct sockaddr *)sin6)) { - /* - * It is restricted for some reason.. - * probably not yet added. - */ - continue; - } - } - num_eligible_addr++; - } - return (num_eligible_addr); -} - - -static struct sockaddr_in6 * -sctp_choose_v6_boundall(struct sctp_inpcb *inp, - struct sctp_tcb *stcb, - struct sctp_nets *net, - struct route *ro, - uint8_t loc_scope, - uint8_t loopscope, - int non_asoc_addr_ok) +void +sctp_addr_change(struct ifaddr *ifa, int cmd) { - /* - * Ok, we are bound all SO any address is ok to use as long as it is - * NOT in the negative list. - */ - int num_eligible_addr; - int cur_addr_num = 0; - int started_at_beginning = 0; - int match_scope_prefered; - - /* - * first question is, how many eligible addresses are there for the - * destination ifn that we are using that are within the proper - * scope? - */ - struct ifnet *ifn; - struct sockaddr_in6 *sin6; - struct rtentry *rt; + struct sctp_laddr *wi; + struct sctp_ifa *ifap = NULL; + uint32_t ifa_flags = 0; + struct in6_ifaddr *ifa6; - rt = ro->ro_rt; - ifn = rt->rt_ifp; - if (net) { - cur_addr_num = net->indx_of_eligible_next_to_use; - } - if (cur_addr_num == 0) { - match_scope_prefered = 1; - } else { - match_scope_prefered = 0; - } - num_eligible_addr = sctp_count_v6_num_eligible_boundall(ifn, stcb, non_asoc_addr_ok, loopscope, loc_scope); -#ifdef SCTP_DEBUG - if (sctp_debug_on & SCTP_DEBUG_OUTPUT1) { - printf("Found %d eligible source addresses\n", num_eligible_addr); - } -#endif - if (num_eligible_addr == 0) { - /* - * no eligible addresses, we must use some other interface - * address if we can find one. - */ - goto bound_all_v6_plan_b; - } - /* - * Ok we have num_eligible_addr set with how many we can use, this - * may vary from call to call due to addresses being deprecated - * etc.. - */ - if (cur_addr_num >= num_eligible_addr) { - cur_addr_num = 0; - } /* - * select the nth address from the list (where cur_addr_num is the - * nth) and 0 is the first one, 1 is the second one etc... + * BSD only has one VRF, if this changes we will need to hook in the + * right things here to get the id to pass to the address managment + * routine. */ -#ifdef SCTP_DEBUG - if (sctp_debug_on & SCTP_DEBUG_OUTPUT1) { - printf("cur_addr_num:%d match_scope_prefered:%d select it\n", - cur_addr_num, match_scope_prefered); + if (first_time == 0) { + /* Special test to see if my ::1 will showup with this */ + first_time = 1; + sctp_init_ifns_for_vrf(SCTP_DEFAULT_VRFID); } -#endif - sin6 = sctp_select_v6_nth_addr_from_ifn_boundall(ifn, stcb, non_asoc_addr_ok, loopscope, - loc_scope, cur_addr_num, match_scope_prefered); - if (match_scope_prefered && (sin6 == NULL)) { - /* retry without the preference for matching scope */ -#ifdef SCTP_DEBUG - if (sctp_debug_on & SCTP_DEBUG_OUTPUT1) { - printf("retry with no match_scope_prefered\n"); - } -#endif - sin6 = sctp_select_v6_nth_addr_from_ifn_boundall(ifn, stcb, non_asoc_addr_ok, loopscope, - loc_scope, cur_addr_num, 0); + if ((cmd != RTM_ADD) && (cmd != RTM_DELETE)) { + /* don't know what to do with this */ + return; } - if (sin6) { -#ifdef SCTP_DEBUG - if (sctp_debug_on & SCTP_DEBUG_OUTPUT1) { - printf("Selected address %d ifn:%p for the route\n", cur_addr_num, ifn); - } -#endif - if (net) { - /* store so we get the next one */ - if (cur_addr_num < 255) - net->indx_of_eligible_next_to_use = cur_addr_num + 1; - else - net->indx_of_eligible_next_to_use = 0; - } - return (sin6); + if (ifa->ifa_addr == NULL) { + return; } - num_eligible_addr = 0; -bound_all_v6_plan_b: - /* - * ok, if we reach here we either fell through due to something - * changing during an interupt (unlikely) or we have NO eligible - * source addresses for the ifn of the route (most likely). We must - * look at all the other interfaces EXCEPT rt->rt_ifp and do the - * same game. - */ - if (inp->next_ifn_touse == NULL) { - started_at_beginning = 1; - inp->next_ifn_touse = TAILQ_FIRST(&ifnet); -#ifdef SCTP_DEBUG - if (sctp_debug_on & SCTP_DEBUG_OUTPUT1) { - printf("Start at first IFN:%p\n", inp->next_ifn_touse); - } -#endif - } else { - inp->next_ifn_touse = TAILQ_NEXT(inp->next_ifn_touse, if_list); -#ifdef SCTP_DEBUG - if (sctp_debug_on & SCTP_DEBUG_OUTPUT1) { - printf("Resume at IFN:%p\n", inp->next_ifn_touse); - } -#endif - if (inp->next_ifn_touse == NULL) { -#ifdef SCTP_DEBUG - if (sctp_debug_on & SCTP_DEBUG_OUTPUT1) { - printf("IFN Resets\n"); - } -#endif - started_at_beginning = 1; - inp->next_ifn_touse = TAILQ_FIRST(&ifnet); - } + if ((ifa->ifa_addr->sa_family != AF_INET) && + (ifa->ifa_addr->sa_family != AF_INET6) + ) { + /* non inet/inet6 skip */ + return; } - for (ifn = inp->next_ifn_touse; ifn; - ifn = TAILQ_NEXT(ifn, if_list)) { - if (loopscope == 0 && ifn->if_type == IFT_LOOP) { - /* wrong base scope */ - continue; - } - if (loc_scope && (ifn->if_index != loc_scope)) { - /* - * by definition the scope (from to->sin6_scopeid) - * must match that of the interface. If not then we - * could pick a wrong scope for the address. - * Ususally we don't hit plan-b since the route - * handles this. However we can hit plan-b when we - * send to local-host so the route is the loopback - * interface, but the destination is a link local. - */ - continue; - } - if (ifn == rt->rt_ifp) { - /* already looked at this guy */ - continue; - } - /* - * Address rotation will only work when we are not rotating - * sourced interfaces and are using the interface of the - * route. We would need to have a per interface index in - * order to do proper rotation. - */ - num_eligible_addr = sctp_count_v6_num_eligible_boundall(ifn, stcb, non_asoc_addr_ok, loopscope, loc_scope); -#ifdef SCTP_DEBUG - if (sctp_debug_on & SCTP_DEBUG_OUTPUT1) { - printf("IFN:%p has %d eligible\n", ifn, num_eligible_addr); - } -#endif - if (num_eligible_addr == 0) { - /* none we can use */ - continue; - } - /* - * Ok we have num_eligible_addr set with how many we can - * use, this may vary from call to call due to addresses - * being deprecated etc.. - */ - inp->next_ifn_touse = ifn; - - /* - * select the first one we can find with perference for - * matching scope. - */ - sin6 = sctp_select_v6_nth_addr_from_ifn_boundall(ifn, stcb, non_asoc_addr_ok, loopscope, loc_scope, 0, 1); - if (sin6 == NULL) { - /* - * can't find one with matching scope how about a - * source with higher scope - */ - sin6 = sctp_select_v6_nth_addr_from_ifn_boundall(ifn, stcb, non_asoc_addr_ok, loopscope, loc_scope, 0, 0); - if (sin6 == NULL) - /* Hmm, can't find one in the interface now */ - continue; + if (ifa->ifa_addr->sa_family == AF_INET6) { + ifa6 = (struct in6_ifaddr *)ifa; + ifa_flags = ifa6->ia6_flags; + if (IN6_IS_ADDR_UNSPECIFIED(&((struct sockaddr_in6 *)ifa->ifa_addr)->sin6_addr)) { + /* skip unspecifed addresses */ + return; } -#ifdef SCTP_DEBUG - if (sctp_debug_on & SCTP_DEBUG_OUTPUT1) { - printf("Selected the %d'th address of ifn:%p\n", - cur_addr_num, - ifn); + } else if (ifa->ifa_addr->sa_family == AF_INET) { + if (((struct sockaddr_in *)ifa->ifa_addr)->sin_addr.s_addr == 0) { + return; } -#endif - return (sin6); } - if (started_at_beginning == 0) { - /* - * we have not been through all of them yet, force us to go - * through them all. - */ -#ifdef SCTP_DEBUG - if (sctp_debug_on & SCTP_DEBUG_OUTPUT1) { - printf("Force a recycle\n"); - } -#endif - inp->next_ifn_touse = NULL; - goto bound_all_v6_plan_b; + if (sctp_is_desired_interface_type(ifa) == 0) { + /* non desired type */ + return; } - RTFREE(ro->ro_rt); - ro->ro_rt = NULL; - return (NULL); - -} - -/* stcb and net may be NULL */ -struct in6_addr -sctp_ipv6_source_address_selection(struct sctp_inpcb *inp, - struct sctp_tcb *stcb, struct route *ro, struct sctp_nets *net, - int non_asoc_addr_ok) -{ - struct in6_addr ans; - struct sockaddr_in6 *rt_addr; - uint8_t loc_scope, loopscope; - struct sockaddr_in6 *to = (struct sockaddr_in6 *)&ro->ro_dst; - - /* - * This routine is tricky standard v6 src address selection cannot - * take into account what we have bound etc, so we can't use it. - * - * Instead here is what we must do: 1) Make sure we have a route, if we - * don't have a route we can never reach the peer. 2) Once we have a - * route, determine the scope of the route. Link local, loopback or - * global. 3) Next we divide into three types. Either we are bound - * all.. which means we want to use one of the addresses of the - * interface we are going out. <or> 4a) We have not stcb, which - * means we are using the specific addresses bound on an inp, in - * this case we are similar to the stcb case (4b below) accept the - * list is always a positive list.<or> 4b) We are bound specific - * with a stcb, which means we have a list of bound addresses and we - * must see if the ifn of the route is actually one of the bound - * addresses. If not, then we must rotate addresses amongst properly - * scoped bound addresses, if so we use the address of the - * interface. 5) Always, no matter which path we take through the - * above we must be sure the source address we use is allowed to be - * used. I.e. IN6_IFF_DETACHED, IN6_IFF_NOTREADY, and - * IN6_IFF_ANYCAST addresses cannot be used. 6) Addresses that are - * deprecated MAY be used if (!ip6_use_deprecated) { if - * (IFA6_IS_DEPRECATED(ifa6)) { skip the address } } - */ - - /*** 1> determine route, if not already done */ - if (ro->ro_rt == NULL) { + if (cmd == RTM_ADD) { + ifap = sctp_add_addr_to_vrf(SCTP_DEFAULT_VRFID, (void *)ifa->ifa_ifp, + ifa->ifa_ifp->if_index, ifa->ifa_ifp->if_type, + ifa->ifa_ifp->if_xname, + (void *)ifa, ifa->ifa_addr, ifa_flags); /* - * Need a route to cache. + * Bump up the refcount so that when the timer completes it + * will drop back down. */ - int scope_save; + if (ifap) + atomic_add_int(&ifap->refcount, 1); - scope_save = to->sin6_scope_id; - to->sin6_scope_id = 0; + } else if (cmd == RTM_DELETE) { - rtalloc_ign(ro, 0UL); - to->sin6_scope_id = scope_save; - } - if (ro->ro_rt == NULL) { + ifap = sctp_del_addr_from_vrf(SCTP_DEFAULT_VRFID, ifa->ifa_addr, ifa->ifa_ifp->if_index); /* - * no route to host. this packet is going no-where. We - * probably should make sure we arrange to send back an - * error. + * We don't bump refcount here so when it completes the + * final delete will happen. */ -#ifdef SCTP_DEBUG - if (sctp_debug_on & SCTP_DEBUG_OUTPUT1) { - printf("No route to host, this packet cannot be sent!\n"); - } -#endif - memset(&ans, 0, sizeof(ans)); - return (ans); } - /*** 2a> determine scope for outbound address/route */ - loc_scope = loopscope = 0; - /* - * We base our scope on the outbound packet scope and route, NOT the - * TCB (if there is one). This way in local scope we will only use a - * local scope src address when we send to a local address. - */ + if (ifap == NULL) + return; - if (IN6_IS_ADDR_LOOPBACK(&to->sin6_addr)) { + wi = SCTP_ZONE_GET(sctppcbinfo.ipi_zone_laddr, struct sctp_laddr); + if (wi == NULL) { /* - * If the route goes to the loopback address OR the address - * is a loopback address, we are loopback scope. + * Gak, what can we do? We have lost an address change can + * you say HOSED? */ - loc_scope = 0; - loopscope = 1; - if (net != NULL) { - /* mark it as local */ - net->addr_is_local = 1; - } - } else if (IN6_IS_ADDR_LINKLOCAL(&to->sin6_addr)) { - if (to->sin6_scope_id) - loc_scope = to->sin6_scope_id; - else { - loc_scope = 1; - } - loopscope = 0; - } - /* - * now, depending on which way we are bound we call the appropriate - * routine to do steps 3-6 - */ #ifdef SCTP_DEBUG - if (sctp_debug_on & SCTP_DEBUG_OUTPUT1) { - printf("Destination address:"); - sctp_print_address((struct sockaddr *)to); - } -#endif - - if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) { - rt_addr = sctp_choose_v6_boundall(inp, stcb, net, ro, loc_scope, loopscope, non_asoc_addr_ok); - } else { - if (stcb) - rt_addr = sctp_choose_v6_boundspecific_stcb(inp, stcb, net, ro, loc_scope, loopscope, non_asoc_addr_ok); - else - /* - * we can't have a non-asoc address since we have no - * association - */ - rt_addr = sctp_choose_v6_boundspecific_inp(inp, ro, loc_scope, loopscope); - } - if (rt_addr == NULL) { - /* no suitable address? */ - struct in6_addr in6; - -#ifdef SCTP_DEBUG - if (sctp_debug_on & SCTP_DEBUG_OUTPUT1) { - printf("V6 packet will reach dead-end no suitable src address\n"); + if (sctp_debug_on & SCTP_DEBUG_PCB1) { + printf("Lost and address change ???\n"); } -#endif - memset(&in6, 0, sizeof(in6)); - return (in6); - } -#ifdef SCTP_DEBUG - if (sctp_debug_on & SCTP_DEBUG_OUTPUT1) { - printf("Source address selected is:"); - sctp_print_address((struct sockaddr *)rt_addr); - } -#endif - return (rt_addr->sin6_addr); -} +#endif /* SCTP_DEBUG */ - -static -int -sctp_is_address_in_scope(struct ifaddr *ifa, - int ipv4_addr_legal, - int ipv6_addr_legal, - int loopback_scope, - int ipv4_local_scope, - int local_scope, - int site_scope) -{ - if ((loopback_scope == 0) && - (ifa->ifa_ifp) && - (ifa->ifa_ifp->if_type == IFT_LOOP)) { - /* - * skip loopback if not in scope * - */ - return (0); + /* Opps, must decrement the count */ + sctp_free_ifa(ifap); + return; } - if ((ifa->ifa_addr->sa_family == AF_INET) && ipv4_addr_legal) { - struct sockaddr_in *sin; - - sin = (struct sockaddr_in *)ifa->ifa_addr; - if (sin->sin_addr.s_addr == 0) { - /* not in scope , unspecified */ - return (0); - } - if ((ipv4_local_scope == 0) && - (IN4_ISPRIVATE_ADDRESS(&sin->sin_addr))) { - /* private address not in scope */ - return (0); - } - } else if ((ifa->ifa_addr->sa_family == AF_INET6) && ipv6_addr_legal) { - struct sockaddr_in6 *sin6; - struct in6_ifaddr *ifa6; - - ifa6 = (struct in6_ifaddr *)ifa; - /* ok to use deprecated addresses? */ - if (!ip6_use_deprecated) { - if (ifa6->ia6_flags & - IN6_IFF_DEPRECATED) { - return (0); - } - } - if (ifa6->ia6_flags & - (IN6_IFF_DETACHED | - IN6_IFF_ANYCAST | - IN6_IFF_NOTREADY)) { - return (0); - } - sin6 = (struct sockaddr_in6 *)ifa->ifa_addr; - if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { - /* skip unspecifed addresses */ - return (0); - } - if ( /* (local_scope == 0) && */ - (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr))) { - return (0); - } - if ((site_scope == 0) && - (IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr))) { - return (0); - } - } else { - return (0); + SCTP_INCR_LADDR_COUNT(); + bzero(wi, sizeof(*wi)); + wi->ifa = ifap; + if (cmd == RTM_ADD) { + wi->action = SCTP_ADD_IP_ADDRESS; + } else if (cmd == RTM_DELETE) { + wi->action = SCTP_DEL_IP_ADDRESS; } - return (1); -} - -static struct mbuf * -sctp_add_addr_to_mbuf(struct mbuf *m, struct ifaddr *ifa) -{ - struct sctp_paramhdr *parmh; - struct mbuf *mret; - int len; - - if (ifa->ifa_addr->sa_family == AF_INET) { - len = sizeof(struct sctp_ipv4addr_param); - } else if (ifa->ifa_addr->sa_family == AF_INET6) { - len = sizeof(struct sctp_ipv6addr_param); - } else { - /* unknown type */ - return (m); - } - if (M_TRAILINGSPACE(m) >= len) { - /* easy side we just drop it on the end */ - parmh = (struct sctp_paramhdr *)(SCTP_BUF_AT(m, SCTP_BUF_LEN(m))); - mret = m; - } else { - /* Need more space */ - mret = m; - while (SCTP_BUF_NEXT(mret) != NULL) { - mret = SCTP_BUF_NEXT(mret); - } - SCTP_BUF_NEXT(mret) = sctp_get_mbuf_for_msg(len, 0, M_DONTWAIT, 1, MT_DATA); - if (SCTP_BUF_NEXT(mret) == NULL) { - /* We are hosed, can't add more addresses */ - return (m); - } - mret = SCTP_BUF_NEXT(mret); - parmh = mtod(mret, struct sctp_paramhdr *); - } - /* now add the parameter */ - if (ifa->ifa_addr->sa_family == AF_INET) { - struct sctp_ipv4addr_param *ipv4p; - struct sockaddr_in *sin; - - sin = (struct sockaddr_in *)ifa->ifa_addr; - ipv4p = (struct sctp_ipv4addr_param *)parmh; - parmh->param_type = htons(SCTP_IPV4_ADDRESS); - parmh->param_length = htons(len); - ipv4p->addr = sin->sin_addr.s_addr; - SCTP_BUF_LEN(mret) += len; - } else if (ifa->ifa_addr->sa_family == AF_INET6) { - struct sctp_ipv6addr_param *ipv6p; - struct sockaddr_in6 *sin6; - - sin6 = (struct sockaddr_in6 *)ifa->ifa_addr; - ipv6p = (struct sctp_ipv6addr_param *)parmh; - parmh->param_type = htons(SCTP_IPV6_ADDRESS); - parmh->param_length = htons(len); - memcpy(ipv6p->addr, &sin6->sin6_addr, - sizeof(ipv6p->addr)); - /* clear embedded scope in the address */ - in6_clearscope((struct in6_addr *)ipv6p->addr); - SCTP_BUF_LEN(mret) += len; - } else { - return (m); - } - return (mret); -} - - -struct mbuf * -sctp_add_addresses_to_i_ia(struct sctp_inpcb *inp, struct sctp_scoping *scope, struct mbuf *m_at, int cnt_inits_to) -{ - int cnt; - - if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) { - struct ifnet *ifn; - struct ifaddr *ifa; - - cnt = cnt_inits_to; - TAILQ_FOREACH(ifn, &ifnet, if_list) { - if ((scope->loopback_scope == 0) && - (ifn->if_type == IFT_LOOP)) { - /* - * Skip loopback devices if loopback_scope - * not set - */ - continue; - } - TAILQ_FOREACH(ifa, &ifn->if_addrlist, ifa_list) { - if (sctp_is_address_in_scope(ifa, - scope->ipv4_addr_legal, - scope->ipv6_addr_legal, - scope->loopback_scope, - scope->ipv4_local_scope, - scope->local_scope, - scope->site_scope) == 0) { - continue; - } - cnt++; - } - } - if (cnt > 1) { - TAILQ_FOREACH(ifn, &ifnet, if_list) { - if ((scope->loopback_scope == 0) && - (ifn->if_type == IFT_LOOP)) { - /* - * Skip loopback devices if - * loopback_scope not set - */ - continue; - } - TAILQ_FOREACH(ifa, &ifn->if_addrlist, ifa_list) { - if (sctp_is_address_in_scope(ifa, - scope->ipv4_addr_legal, - scope->ipv6_addr_legal, - scope->loopback_scope, - scope->ipv4_local_scope, - scope->local_scope, - scope->site_scope) == 0) { - continue; - } - m_at = sctp_add_addr_to_mbuf(m_at, ifa); - } - } - } - } else { - struct sctp_laddr *laddr; - int cnt; - - cnt = cnt_inits_to; - /* First, how many ? */ - LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) { - if (laddr->ifa == NULL) { - continue; - } - if (laddr->ifa->ifa_addr == NULL) - continue; - if (sctp_is_address_in_scope(laddr->ifa, - scope->ipv4_addr_legal, - scope->ipv6_addr_legal, - scope->loopback_scope, - scope->ipv4_local_scope, - scope->local_scope, - scope->site_scope) == 0) { - continue; - } - cnt++; - } - /* - * To get through a NAT we only list addresses if we have - * more than one. That way if you just bind a single address - * we let the source of the init dictate our address. - */ - if (cnt > 1) { - LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) { - if (laddr->ifa == NULL) { - continue; - } - if (laddr->ifa->ifa_addr == NULL) { - continue; - } - if (sctp_is_address_in_scope(laddr->ifa, - scope->ipv4_addr_legal, - scope->ipv6_addr_legal, - scope->loopback_scope, - scope->ipv4_local_scope, - scope->local_scope, - scope->site_scope) == 0) { - continue; - } - m_at = sctp_add_addr_to_mbuf(m_at, laddr->ifa); - } - } - } - return (m_at); + SCTP_IPI_ITERATOR_WQ_LOCK(); + /* + * Should this really be a tailq? As it is we will process the + * newest first :-0 + */ + LIST_INSERT_HEAD(&sctppcbinfo.addr_wq, wi, sctp_nxt_addr); + sctp_timer_start(SCTP_TIMER_TYPE_ADDR_WQ, + (struct sctp_inpcb *)NULL, + (struct sctp_tcb *)NULL, + (struct sctp_nets *)NULL); + SCTP_IPI_ITERATOR_WQ_UNLOCK(); } diff --git a/sys/netinet/sctp_bsd_addr.h b/sys/netinet/sctp_bsd_addr.h index ad7b5b8..0752ea3 100644 --- a/sys/netinet/sctp_bsd_addr.h +++ b/sys/netinet/sctp_bsd_addr.h @@ -38,27 +38,18 @@ __FBSDID("$FreeBSD$"); #if defined(_KERNEL) -int sctp_is_addr_restricted(struct sctp_tcb *, struct sockaddr *); +#if defined(SCTP_USE_THREAD_BASED_ITERATOR) +void sctp_wakeup_iterator(void); +void sctp_startup_iterator(void); -struct in_addr -sctp_ipv4_source_address_selection(struct sctp_inpcb *inp, - struct sctp_tcb *stcb, - struct route *ro, struct sctp_nets *net, - int non_asoc_addr_ok); - -struct in6_addr -sctp_ipv6_source_address_selection(struct sctp_inpcb *, - struct sctp_tcb *, struct route *, - struct sctp_nets *, int); +#endif +void + sctp_gather_internal_ifa_flags(struct sctp_ifa *ifa); -struct mbuf * -sctp_add_addresses_to_i_ia(struct sctp_inpcb *inp, - struct sctp_scoping *scope, - struct mbuf *m_at, - int cnt_inits_to); +extern void sctp_addr_change(struct ifaddr *ifa, int cmd); #endif #endif diff --git a/sys/netinet/sctp_constants.h b/sys/netinet/sctp_constants.h index dcb05af..f892f9c 100644 --- a/sys/netinet/sctp_constants.h +++ b/sys/netinet/sctp_constants.h @@ -36,12 +36,55 @@ __FBSDID("$FreeBSD$"); #ifndef __sctp_constants_h__ #define __sctp_constants_h__ +/* Number of packets to get before sack sent by default */ +#define SCTP_DEFAULT_SACK_FREQ 2 + +/* Address limit - This variable is calculated + * based on an 1500 byte mtu. We take out 100 bytes + * for the cookie, 40 bytes for a v6 header and 32 + * bytes for the init structure. A second init structure + * for the init-ack and then finally a third one for the + * imbedded init. This yeilds 100+40+(3 * 32) = 236 bytes. + * This leaves 1264 bytes for addresses. Now whatever we + * send in the INIT() we need to allow to get back in the + * INIT-ACK plus all the values from INIT and INIT-ACK + * listed in the cookie. Plus we need some overhead for + * maybe copied parameters in the COOKIE. If we + * allow 20 addresses, and each side has 20 V6 addresses + * that will be 400 bytes. In the INIT-ACK we will + * see the INIT-ACK 400 + 800 in the cookie. This leaves + * 64 bytes slack for misc things in the cookie. Otherwise + * we need to allow IP fragmentation.. which I believe + * the INIT-ACK and COOKIE do, I don't think we do that + * to the INIT though. So the max you could make this + * value is 60 addresses. + */ +#define SCTP_ADDRESS_LIMIT 20 + +/* Number of addresses where we just skip the counting */ +#define SCTP_COUNT_LIMIT 40 + +/* Number of ticks to delay before running + * iterator on an address change. + */ +#define SCTP_ADDRESS_TICK_DELAY 2 #define SCTP_VERSION_STRING "KAME-BSD 1.1" /* #define SCTP_AUDITING_ENABLED 1 used for debug/auditing */ #define SCTP_AUDIT_SIZE 256 #define SCTP_STAT_LOG_SIZE 80000 +#define SCTP_USE_THREAD_BASED_ITERATOR 1 + +#define SCTP_KTRHEAD_NAME "sctp_iterator" +#define SCTP_KTHREAD_PAGES 2 + + +/* If you support Multi-VRF how big to + * make the initial array of VRF's to. + */ +#define SCTP_DEFAULT_VRF_SIZE 4 + /* Places that CWND log can happen from */ #define SCTP_CWND_LOG_FROM_FR 1 #define SCTP_CWND_LOG_FROM_RTX 2 @@ -188,7 +231,11 @@ __FBSDID("$FreeBSD$"); #define SCTP_SCALE_FOR_ADDR 2 /* default AUTO_ASCONF mode enable(1)/disable(0) value (sysctl) */ -#define SCTP_DEFAULT_AUTO_ASCONF 0 +#if defined (__APPLE__) && !defined(SCTP_APPLE_AUTO_ASCONF) +#define SCTP_DEFAULT_AUTO_ASCONF 0 +#else +#define SCTP_DEFAULT_AUTO_ASCONF 1 +#endif /* * Theshold for rwnd updates, we have to read (sb_hiwat >> @@ -305,6 +352,7 @@ __FBSDID("$FreeBSD$"); #define SCTP_OUTPUT_FROM_STRRST_REQ 12 #define SCTP_OUTPUT_FROM_USR_RCVD 13 #define SCTP_OUTPUT_FROM_COOKIE_ACK 14 +#define SCTP_OUTPUT_FROM_DRAIN 15 /* SCTP chunk types are moved sctp.h for application (NAT, FW) use */ /* align to 32-bit sizes */ @@ -425,7 +473,7 @@ __FBSDID("$FreeBSD$"); #define SCTP_ADDR_OUT_OF_SCOPE 0x080 #define SCTP_ADDR_DOUBLE_SWITCH 0x100 #define SCTP_ADDR_UNCONFIRMED 0x200 - +#define SCTP_ADDR_REQ_PRIMARY 0x400 #define SCTP_REACHABLE_MASK 0x203 /* bound address types (e.g. valid address types to allow) */ @@ -551,11 +599,15 @@ __FBSDID("$FreeBSD$"); */ #define SCTP_ASOC_MAX_CHUNKS_ON_QUEUE 512 -#define MSEC_TO_TICKS(x) ((hz == 1000) ? x : (((x) * hz) / 1000)) -#define TICKS_TO_MSEC(x) ((hz == 1000) ? x : (((x) * 1000) / hz)) +/* The conversion from time to ticks and vice versa is done by rounding + * upwards. This way we can test in the code the time to be positive and + * know that this corresponds to a positive number of ticks. + */ +#define MSEC_TO_TICKS(x) ((hz == 1000) ? x : ((((x) * hz) + 999) / 1000)) +#define TICKS_TO_MSEC(x) ((hz == 1000) ? x : ((((x) * 1000) + (hz - 1)) / hz)) #define SEC_TO_TICKS(x) ((x) * hz) -#define TICKS_TO_SEC(x) ((x) / hz) +#define TICKS_TO_SEC(x) (((x) + (hz - 1)) / hz) /* * Basically the minimum amount of time before I do a early FR. Making this diff --git a/sys/netinet/sctp_header.h b/sys/netinet/sctp_header.h index e5d04af..4b2a758 100644 --- a/sys/netinet/sctp_header.h +++ b/sys/netinet/sctp_header.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2001-2006, Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2001-2007, Cisco Systems, Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -49,9 +49,12 @@ struct sctp_ipv4addr_param { uint32_t addr; /* IPV4 address */ }; +#define SCTP_V6_ADDR_BYTES 16 + + struct sctp_ipv6addr_param { struct sctp_paramhdr ph;/* type=SCTP_IPV6_PARAM_TYPE, len=20 */ - uint8_t addr[16]; /* IPV6 address */ + uint8_t addr[SCTP_V6_ADDR_BYTES]; /* IPV6 address */ }; /* Cookie Preservative */ @@ -60,16 +63,19 @@ struct sctp_cookie_perserve_param { uint32_t time; /* time in ms to extend cookie */ }; +#define SCTP_ARRAY_MIN_LEN 1 + /* Host Name Address */ struct sctp_host_name_param { struct sctp_paramhdr ph;/* type=SCTP_HOSTNAME_ADDRESS */ - char name[1]; /* host name */ + char name[SCTP_ARRAY_MIN_LEN]; /* host name */ }; /* supported address type */ struct sctp_supported_addr_param { struct sctp_paramhdr ph;/* type=SCTP_SUPPORTED_ADDRTYPE */ - uint16_t addr_type[1]; /* array of supported address types */ + uint16_t addr_type[SCTP_ARRAY_MIN_LEN]; /* array of supported address + * types */ }; /* ECN parameter */ @@ -157,18 +163,20 @@ struct sctp_init { /* optional param's follow */ }; +#define SCTP_IDENTIFICATION_SIZE 16 +#define SCTP_ADDRESS_SIZE 4 /* state cookie header */ struct sctp_state_cookie { /* this is our definition... */ - uint8_t identification[16]; /* id of who we are */ + uint8_t identification[SCTP_IDENTIFICATION_SIZE]; /* id of who we are */ uint32_t cookie_life; /* life I will award this cookie */ uint32_t tie_tag_my_vtag; /* my tag in old association */ uint32_t tie_tag_peer_vtag; /* peers tag in old association */ uint32_t peers_vtag; /* peers tag in INIT (for quick ref) */ uint32_t my_vtag; /* my tag in INIT-ACK (for quick ref) */ struct timeval time_entered; /* the time I built cookie */ - uint32_t address[4]; /* 4 ints/128 bits */ + uint32_t address[SCTP_ADDRESS_SIZE]; /* 4 ints/128 bits */ uint32_t addr_type; /* address type */ - uint32_t laddress[4]; /* my local from address */ + uint32_t laddress[SCTP_ADDRESS_SIZE]; /* my local from address */ uint32_t laddr_type; /* my local from address type */ uint32_t scope_id; /* v6 scope id for link-locals */ uint16_t peerport; /* port address of the peer in the INIT */ @@ -519,7 +527,7 @@ struct sctp_auth_invalid_hmac { * feel is worth it for now. */ #ifndef SCTP_MAX_OVERHEAD -#ifdef AF_INET6 +#ifdef INET6 #define SCTP_MAX_OVERHEAD (sizeof(struct sctp_data_chunk) + \ sizeof(struct sctphdr) + \ sizeof(struct sctp_ecne_chunk) + \ @@ -549,7 +557,7 @@ struct sctp_auth_invalid_hmac { #define SCTP_MIN_OVERHEAD (sizeof(struct ip) + \ sizeof(struct sctphdr)) -#endif /* AF_INET6 */ +#endif /* INET6 */ #endif /* !SCTP_MAX_OVERHEAD */ #define SCTP_MED_V4_OVERHEAD (sizeof(struct sctp_data_chunk) + \ diff --git a/sys/netinet/sctp_indata.c b/sys/netinet/sctp_indata.c index 86945f7..517f1d1 100644 --- a/sys/netinet/sctp_indata.c +++ b/sys/netinet/sctp_indata.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2001-2006, Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2001-2007, Cisco Systems, Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -35,6 +35,7 @@ __FBSDID("$FreeBSD$"); #include <netinet/sctp_os.h> #include <netinet/sctp_var.h> +#include <netinet/sctp_sysctl.h> #include <netinet/sctp_pcb.h> #include <netinet/sctp_header.h> #include <netinet/sctputil.h> @@ -45,11 +46,6 @@ __FBSDID("$FreeBSD$"); #include <netinet/sctp_timer.h> -#ifdef SCTP_DEBUG -extern uint32_t sctp_debug_on; - -#endif - /* * NOTES: On the outbound side of things I need to check the sack timer to * see if I should generate a sack into the chunk queue (if I have data to @@ -60,8 +56,6 @@ extern uint32_t sctp_debug_on; * the list. */ -extern int sctp_strict_sacks; - __inline void sctp_set_rwnd(struct sctp_tcb *stcb, struct sctp_association *asoc) { @@ -1438,7 +1432,6 @@ sctp_does_tsn_belong_to_reasm(struct sctp_association *asoc, } -extern unsigned int sctp_max_chunks_on_queue; static int sctp_process_a_data_chunk(struct sctp_tcb *stcb, struct sctp_association *asoc, struct mbuf **m, int offset, struct sctp_data_chunk *ch, int chk_length, @@ -1505,15 +1498,7 @@ sctp_process_a_data_chunk(struct sctp_tcb *stcb, struct sctp_association *asoc, asoc->dup_tsns[asoc->numduptsns] = tsn; asoc->numduptsns++; } - if (!SCTP_OS_TIMER_PENDING(&asoc->dack_timer.timer)) { - /* - * By starting the timer we assure that we WILL sack - * at the end of the packet when sctp_sack_check - * gets called. - */ - sctp_timer_start(SCTP_TIMER_TYPE_RECV, stcb->sctp_ep, - stcb, NULL); - } + asoc->send_sack = 1; return (0); } /* @@ -2382,19 +2367,18 @@ sctp_sack_check(struct sctp_tcb *stcb, int ok_to_sack, int was_a_gap, int *abort */ stcb->asoc.cmt_dac_pkts_rcvd++; - if ((stcb->asoc.first_ack_sent == 0) || /* First time we send a - * sack */ + if ((stcb->asoc.send_sack == 1) || /* We need to send a + * SACK */ ((was_a_gap) && (is_a_gap == 0)) || /* was a gap, but no * longer is one */ (stcb->asoc.numduptsns) || /* we have dup's */ (is_a_gap) || /* is still a gap */ - (stcb->asoc.delayed_ack == 0) || - (SCTP_OS_TIMER_PENDING(&stcb->asoc.dack_timer.timer)) /* timer was up . second - * packet */ + (stcb->asoc.delayed_ack == 0) || /* Delayed sack disabled */ + (stcb->asoc.data_pkts_seen >= stcb->asoc.sack_freq) /* hit limit of pkts */ ) { if ((sctp_cmt_on_off) && (sctp_cmt_use_dac) && - (stcb->asoc.first_ack_sent == 1) && + (stcb->asoc.send_sack == 0) && (stcb->asoc.numduptsns == 0) && (stcb->asoc.delayed_ack) && (!SCTP_OS_TIMER_PENDING(&stcb->asoc.dack_timer.timer))) { @@ -2420,13 +2404,14 @@ sctp_sack_check(struct sctp_tcb *stcb, int ok_to_sack, int was_a_gap, int *abort * first packet OR there are gaps or * duplicates. */ - stcb->asoc.first_ack_sent = 1; SCTP_OS_TIMER_STOP(&stcb->asoc.dack_timer.timer); sctp_send_sack(stcb); } } else { - sctp_timer_start(SCTP_TIMER_TYPE_RECV, - stcb->sctp_ep, stcb, NULL); + if (!SCTP_OS_TIMER_PENDING(&stcb->asoc.dack_timer.timer)) { + sctp_timer_start(SCTP_TIMER_TYPE_RECV, + stcb->sctp_ep, stcb, NULL); + } } } } @@ -2488,8 +2473,6 @@ doit_again: } } -extern int sctp_strict_data_order; - int sctp_process_data(struct mbuf **mm, int iphlen, int *offset, int length, struct sctphdr *sh, struct sctp_inpcb *inp, struct sctp_tcb *stcb, @@ -2573,6 +2556,7 @@ sctp_process_data(struct mbuf **mm, int iphlen, int *offset, int length, */ *high_tsn = asoc->cumulative_tsn; break_flag = 0; + asoc->data_pkts_seen++; while (stop_proc == 0) { /* validate chunk length */ chk_length = ntohs(ch->ch.chunk_length); @@ -2753,26 +2737,21 @@ sctp_process_data(struct mbuf **mm, int iphlen, int *offset, int length, sctp_service_queues(stcb, asoc); if (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_SENT) { - /* - * Assure that we ack right away by making sure that a d-ack - * timer is running. So the sack_check will send a sack. - */ - sctp_timer_start(SCTP_TIMER_TYPE_RECV, stcb->sctp_ep, stcb, - net); + /* Assure that we ack right away */ + stcb->asoc.send_sack = 1; } /* Start a sack timer or QUEUE a SACK for sending */ if ((stcb->asoc.cumulative_tsn == stcb->asoc.highest_tsn_inside_map) && - (stcb->asoc.first_ack_sent)) { - /* Everything is in order */ - if (stcb->asoc.mapping_array[0] == 0xff) { - /* need to do the slide */ - sctp_sack_check(stcb, 1, was_a_gap, &abort_flag); - } else { + (stcb->asoc.mapping_array[0] != 0xff)) { + if ((stcb->asoc.data_pkts_seen >= stcb->asoc.sack_freq) || + (stcb->asoc.delayed_ack == 0) || + (stcb->asoc.send_sack == 1)) { if (SCTP_OS_TIMER_PENDING(&stcb->asoc.dack_timer.timer)) { - stcb->asoc.first_ack_sent = 1; SCTP_OS_TIMER_STOP(&stcb->asoc.dack_timer.timer); - sctp_send_sack(stcb); - } else { + } + sctp_send_sack(stcb); + } else { + if (!SCTP_OS_TIMER_PENDING(&stcb->asoc.dack_timer.timer)) { sctp_timer_start(SCTP_TIMER_TYPE_RECV, stcb->sctp_ep, stcb, NULL); } @@ -3016,6 +2995,7 @@ sctp_handle_segments(struct sctp_tcb *stcb, struct sctp_association *asoc, tp1->whoTo->flight_size -= tp1->book_size; else tp1->whoTo->flight_size = 0; + if (asoc->total_flight >= tp1->book_size) { asoc->total_flight -= tp1->book_size; if (asoc->total_flight_count > 0) @@ -3069,6 +3049,11 @@ sctp_handle_segments(struct sctp_tcb *stcb, struct sctp_association *asoc, (*ecn_seg_sums) &= SCTP_SACK_NONCE_SUM; tp1->sent = SCTP_DATAGRAM_MARKED; + if (tp1->rec.data.chunk_was_revoked) { + /* deflate the cwnd */ + tp1->whoTo->cwnd -= tp1->book_size; + tp1->rec.data.chunk_was_revoked = 0; + } } break; } /* if (tp1->TSN_seq == j) */ @@ -3108,43 +3093,30 @@ sctp_check_for_revoked(struct sctp_association *asoc, uint32_t cumack, */ if (tp1->sent == SCTP_DATAGRAM_ACKED) { /* it has been revoked */ + tp1->sent = SCTP_DATAGRAM_SENT; + tp1->rec.data.chunk_was_revoked = 1; + /* + * We must add this stuff back in to assure + * timers and such get started. + */ + tp1->whoTo->flight_size += tp1->book_size; + /* + * We inflate the cwnd to compensate for our + * artificial inflation of the flight_size. + */ + tp1->whoTo->cwnd += tp1->book_size; + asoc->total_flight_count++; + asoc->total_flight += tp1->book_size; - if (sctp_cmt_on_off) { - /* - * If CMT is ON, leave "sent" at - * ACKED. CMT causes reordering of - * data and acks (received on - * different interfaces) can be - * persistently reordered. Acking - * followed by apparent revoking and - * re-acking causes unexpected weird - * behavior. So, at this time, CMT - * does not respect renegs. Renegs - * cannot be recovered. I will fix - * this once I am sure that things - * are working right again with CMT. - */ - } else { - tp1->sent = SCTP_DATAGRAM_SENT; - tp1->rec.data.chunk_was_revoked = 1; - /* - * We must add this stuff back in to - * assure timers and such get - * started. - */ - tp1->whoTo->flight_size += tp1->book_size; - asoc->total_flight_count++; - asoc->total_flight += tp1->book_size; - tot_revoked++; + tot_revoked++; #ifdef SCTP_SACK_LOGGING - sctp_log_sack(asoc->last_acked_seq, - cumack, - tp1->rec.data.TSN_seq, - 0, - 0, - SCTP_LOG_TSN_REVOKED); + sctp_log_sack(asoc->last_acked_seq, + cumack, + tp1->rec.data.TSN_seq, + 0, + 0, + SCTP_LOG_TSN_REVOKED); #endif - } } else if (tp1->sent == SCTP_DATAGRAM_MARKED) { /* it has been re-acked in this SACK */ tp1->sent = SCTP_DATAGRAM_ACKED; @@ -3173,8 +3145,6 @@ sctp_check_for_revoked(struct sctp_association *asoc, uint32_t cumack, } } -extern int sctp_peer_chunk_oh; - static void sctp_strike_gap_ack_chunks(struct sctp_tcb *stcb, struct sctp_association *asoc, u_long biggest_tsn_acked, u_long biggest_tsn_newly_acked, u_long this_sack_lowest_newack, int accum_moved) @@ -3301,9 +3271,11 @@ sctp_strike_gap_ack_chunks(struct sctp_tcb *stcb, struct sctp_association *asoc, * the tsn... otherwise we CAN strike the TSN. */ /* - * @@@ JRI: Check for CMT + * @@@ JRI: Check for CMT if (accum_moved && + * asoc->fast_retran_loss_recovery && (sctp_cmt_on_off == + * 0)) { */ - if (accum_moved && asoc->fast_retran_loss_recovery && (sctp_cmt_on_off == 0)) { + if (accum_moved && asoc->fast_retran_loss_recovery) { /* * Strike the TSN if in fast-recovery and cum-ack * moved. @@ -3400,8 +3372,8 @@ sctp_strike_gap_ack_chunks(struct sctp_tcb *stcb, struct sctp_association *asoc, } } /* - * @@@ JRI: TODO: remove code for HTNA algo. CMT's - * SFR algo covers HTNA. + * JRI: TODO: remove code for HTNA algo. CMT's SFR + * algo covers HTNA. */ } else if (compare_with_wrap(tp1->rec.data.TSN_seq, biggest_tsn_newly_acked, MAX_TSN)) { @@ -3462,7 +3434,6 @@ sctp_strike_gap_ack_chunks(struct sctp_tcb *stcb, struct sctp_association *asoc, * CMT: Using RTX_SSTHRESH policy for CMT. * If CMT is being used, then pick dest with * largest ssthresh for any retransmission. - * (iyengar@cis.udel.edu, 2005/08/12) */ tp1->no_fr_allowed = 1; alt = tp1->whoTo; @@ -3863,9 +3834,6 @@ sctp_hs_cwnd_decrease(struct sctp_tcb *stcb, struct sctp_nets *net) #endif -extern int sctp_early_fr; -extern int sctp_L2_abc_variable; - static __inline void sctp_cwnd_update(struct sctp_tcb *stcb, @@ -3878,7 +3846,8 @@ sctp_cwnd_update(struct sctp_tcb *stcb, /* update cwnd and Early FR */ /******************************/ TAILQ_FOREACH(net, &asoc->nets, sctp_next) { -#ifdef JANA_CODE_WHY_THIS + +#ifdef JANA_CMT_FAST_RECOVERY /* * CMT fast recovery code. Need to debug. */ @@ -3952,22 +3921,26 @@ sctp_cwnd_update(struct sctp_tcb *stcb, } } } -#ifdef JANA_CODE_WHY_THIS +#ifdef JANA_CMT_FAST_RECOVERY /* - * Cannot skip for CMT. Need to come back and check these - * variables for CMT. CMT fast recovery code. Need to debug. + * CMT fast recovery code + */ + /* + * if (sctp_cmt_on_off == 1 && + * net->fast_retran_loss_recovery && + * net->will_exit_fast_recovery == 0) { // @@@ Do something + * } else if (sctp_cmt_on_off == 0 && + * asoc->fast_retran_loss_recovery && will_exit == 0) { */ - if (sctp_cmt_on_off == 1 && - net->fast_retran_loss_recovery && - net->will_exit_fast_recovery == 0) #endif - if (sctp_cmt_on_off == 0 && asoc->fast_retran_loss_recovery && will_exit == 0) { - /* - * If we are in loss recovery we skip any - * cwnd update - */ - goto skip_cwnd_update; - } + + if (asoc->fast_retran_loss_recovery && will_exit == 0) { + /* + * If we are in loss recovery we skip any cwnd + * update + */ + goto skip_cwnd_update; + } /* * CMT: CUC algorithm. Update cwnd if pseudo-cumack has * moved. @@ -4172,6 +4145,7 @@ sctp_express_handle_sack(struct sctp_tcb *stcb, uint32_t cumack, } else { tp1->whoTo->flight_size = 0; } + if (asoc->total_flight >= tp1->book_size) { asoc->total_flight -= tp1->book_size; if (asoc->total_flight_count > 0) @@ -4206,6 +4180,11 @@ sctp_express_handle_sack(struct sctp_tcb *stcb, uint32_t cumack, if (tp1->sent == SCTP_DATAGRAM_RESEND) { sctp_ucount_decr(asoc->sent_queue_retran_cnt); } + if (tp1->rec.data.chunk_was_revoked) { + /* deflate the cwnd */ + tp1->whoTo->cwnd -= tp1->book_size; + tp1->rec.data.chunk_was_revoked = 0; + } tp1->sent = SCTP_DATAGRAM_ACKED; } } else { @@ -4657,7 +4636,8 @@ sctp_handle_sack(struct sctp_sack_chunk *ch, struct sctp_tcb *stcb, net->net_ack2 = 0; /* - * CMT: Reset CUC algo variable before SACK processing + * CMT: Reset CUC and Fast recovery algo variables before + * SACK processing */ net->new_pseudo_cumack = 0; net->will_exit_fast_recovery = 0; @@ -4775,6 +4755,11 @@ sctp_handle_sack(struct sctp_sack_chunk *ch, struct sctp_tcb *stcb, (asoc->sent_queue_retran_cnt & 0x000000ff)); #endif } + if (tp1->rec.data.chunk_was_revoked) { + /* deflate the cwnd */ + tp1->whoTo->cwnd -= tp1->book_size; + tp1->rec.data.chunk_was_revoked = 0; + } tp1->sent = SCTP_DATAGRAM_ACKED; } } else { @@ -4919,7 +4904,7 @@ done_with_it: #endif } - if ((sctp_cmt_on_off == 0) && asoc->fast_retran_loss_recovery && accum_moved) { + if (asoc->fast_retran_loss_recovery && accum_moved) { if (compare_with_wrap(asoc->last_acked_seq, asoc->fast_recovery_tsn, MAX_TSN) || asoc->last_acked_seq == asoc->fast_recovery_tsn) { @@ -4951,6 +4936,14 @@ done_with_it: tp1->sent = SCTP_DATAGRAM_SENT; tp1->rec.data.chunk_was_revoked = 1; tp1->whoTo->flight_size += tp1->book_size; + /* + * To ensure that this increase in + * flightsize, which is artificial, + * does not throttle the sender, we + * also increase the cwnd + * artificially. + */ + tp1->whoTo->cwnd += tp1->book_size; asoc->total_flight_count++; asoc->total_flight += tp1->book_size; cnt_revoked++; @@ -5139,10 +5132,12 @@ done_with_it: } /* * CMT fast recovery code. Need to debug. ((sctp_cmt_on_off == 1) && - * (net->fast_retran_loss_recovery == 0))) + * (net->fast_retran_loss_recovery == 0))) if + * ((asoc->fast_retran_loss_recovery == 0) || (sctp_cmt_on_off == + * 1)) { */ TAILQ_FOREACH(net, &asoc->nets, sctp_next) { - if ((asoc->fast_retran_loss_recovery == 0) || (sctp_cmt_on_off == 1)) { + if (asoc->fast_retran_loss_recovery == 0) { /* out of a RFC2582 Fast recovery window? */ if (net->net_ack > 0) { /* @@ -5287,6 +5282,9 @@ done_with_it: /* end satellite t3 loss recovery */ asoc->sat_t3_loss_recovery = 0; } + /* + * CMT Fast recovery + */ TAILQ_FOREACH(net, &asoc->nets, sctp_next) { if (net->will_exit_fast_recovery) { /* Ok, we must exit fast recovery */ diff --git a/sys/netinet/sctp_input.c b/sys/netinet/sctp_input.c index f227c8e..86634a9 100644 --- a/sys/netinet/sctp_input.c +++ b/sys/netinet/sctp_input.c @@ -35,6 +35,7 @@ __FBSDID("$FreeBSD$"); #include <netinet/sctp_os.h> #include <netinet/sctp_var.h> +#include <netinet/sctp_sysctl.h> #include <netinet/sctp_pcb.h> #include <netinet/sctp_header.h> #include <netinet/sctputil.h> @@ -45,15 +46,6 @@ __FBSDID("$FreeBSD$"); #include <netinet/sctp_asconf.h> -#ifdef SCTP_DEBUG -extern uint32_t sctp_debug_on; - -#endif - - - -struct sctp_foo_stuff sctp_logoff[30000]; -int sctp_logoff_stuff = 0; static void @@ -476,6 +468,22 @@ sctp_handle_heartbeat_ack(struct sctp_heartbeat_chunk *cp, * confirm the destination. */ r_net->dest_state &= ~SCTP_ADDR_UNCONFIRMED; + if (r_net->dest_state & SCTP_ADDR_REQ_PRIMARY) { + stcb->asoc.primary_destination = r_net; + r_net->dest_state &= ~SCTP_ADDR_WAS_PRIMARY; + r_net->dest_state &= ~SCTP_ADDR_REQ_PRIMARY; + r_net = TAILQ_FIRST(&stcb->asoc.nets); + if (r_net != stcb->asoc.primary_destination) { + /* + * first one on the list is NOT the primary + * sctp_cmpaddr() is much more efficent if + * the primary is the first on the list, + * make it so. + */ + TAILQ_REMOVE(&stcb->asoc.nets, stcb->asoc.primary_destination, sctp_next); + TAILQ_INSERT_HEAD(&stcb->asoc.nets, stcb->asoc.primary_destination, sctp_next); + } + } sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_CONFIRMED, stcb, 0, (void *)r_net); } @@ -1528,6 +1536,7 @@ sctp_process_cookie_new(struct mbuf *m, int iphlen, int offset, struct sockaddr_in *sin; struct sockaddr_in6 *sin6; struct sctp_association *asoc; + uint32_t vrf; int chk_length; int init_offset, initack_offset, initack_limit; int retval; @@ -1535,6 +1544,8 @@ sctp_process_cookie_new(struct mbuf *m, int iphlen, int offset, uint32_t old_tag; uint8_t auth_chunk_buf[SCTP_PARAM_BUFFER_SIZE]; + vrf = SCTP_DEFAULT_VRFID; + /* * find and validate the INIT chunk in the cookie (peer's info) the * INIT should start after the cookie-echo header struct (chunk @@ -1596,7 +1607,7 @@ sctp_process_cookie_new(struct mbuf *m, int iphlen, int offset, * and popluate */ stcb = sctp_aloc_assoc(inp, init_src, 0, &error, - ntohl(initack_cp->init.initiate_tag)); + ntohl(initack_cp->init.initiate_tag), vrf); if (stcb == NULL) { struct mbuf *op_err; @@ -3498,9 +3509,6 @@ sctp_handle_packet_dropped(struct sctp_pktdrop_chunk *cp, } } -extern int sctp_strict_init; -extern int sctp_abort_if_one_2_one_hits_limit; - /* * handles all control chunks in a packet inputs: - m: mbuf chain, assumed to * still contain IP/SCTP header - stcb: is the tcb found for this packet - @@ -4662,11 +4670,6 @@ trigger_send: return (0); } -extern int sctp_no_csum_on_loopback; - - -int sctp_buf_index = 0; -uint8_t sctp_list_of_chunks[30000]; void @@ -4799,16 +4802,6 @@ sctp_skip_csum_4: if (mlen < (ip->ip_len - iphlen)) { SCTP_STAT_INCR(sctps_hdrops); goto bad; - } { - /* TEMP log the first chunk */ - int x; - - x = atomic_fetchadd_int(&sctp_buf_index, 1); - if (x >= 30000) { - sctp_buf_index = 1; - x = 0;; - } - sctp_list_of_chunks[x] = ch->chunk_type; } /* * Locate pcb and tcb for datagram sctp_findassociation_addr() wants diff --git a/sys/netinet/sctp_lock_bsd.h b/sys/netinet/sctp_lock_bsd.h index a60d31b..07d7823 100644 --- a/sys/netinet/sctp_lock_bsd.h +++ b/sys/netinet/sctp_lock_bsd.h @@ -106,9 +106,8 @@ extern int sctp_logoff_stuff; } while (0) - #define SCTP_IPI_ADDR_INIT() \ - mtx_init(&sctppcbinfo.ipi_addr_mtx, "sctp-addr-wq", "sctp_addr_wq", MTX_DEF) + mtx_init(&sctppcbinfo.ipi_addr_mtx, "sctp-addr", "sctp_addr", MTX_DEF) #define SCTP_IPI_ADDR_DESTROY() \ mtx_destroy(&sctppcbinfo.ipi_addr_mtx) @@ -119,6 +118,24 @@ extern int sctp_logoff_stuff; #define SCTP_IPI_ADDR_UNLOCK() mtx_unlock(&sctppcbinfo.ipi_addr_mtx) + + +#define SCTP_IPI_ITERATOR_WQ_INIT() \ + mtx_init(&sctppcbinfo.ipi_iterator_wq_mtx, "sctp-it-wq", "sctp_it_wq", MTX_DEF) + +#define SCTP_IPI_ITERATOR_WQ_DESTROY() \ + mtx_destroy(&sctppcbinfo.ipi_iterator_wq_mtx) + +#define SCTP_IPI_ITERATOR_WQ_LOCK() do { \ + mtx_lock(&sctppcbinfo.ipi_iterator_wq_mtx); \ +} while (0) + +#define SCTP_IPI_ITERATOR_WQ_UNLOCK() mtx_unlock(&sctppcbinfo.ipi_iterator_wq_mtx) + + + + + #define SCTP_INP_INFO_RUNLOCK() mtx_unlock(&sctppcbinfo.ipi_ep_mtx) #define SCTP_INP_INFO_WUNLOCK() mtx_unlock(&sctppcbinfo.ipi_ep_mtx) @@ -190,36 +207,9 @@ extern int sctp_logoff_stuff; #define SCTP_TCB_SEND_UNLOCK(_tcb) mtx_unlock(&(_tcb)->tcb_send_mtx) -#ifdef INVARIANTS - -#define SCTP_INP_INCR_REF(_inp) { int x; \ - atomic_add_int(&((_inp)->refcount), 1); \ - x = atomic_fetchadd_int(&sctp_logoff_stuff, 1); \ - if(x == 30000) \ - sctp_logoff_stuff = x = 0; \ - sctp_logoff[x].inp = _inp; \ - sctp_logoff[x].ticks = ticks; \ - sctp_logoff[x].lineno = __LINE__; \ - sctp_logoff[x].updown = 1; \ -} - -#define SCTP_INP_DECR_REF(_inp) { int x; \ - if (atomic_fetchadd_int(&((_inp)->refcount), -1) == 0 ) panic("refcount goes negative"); \ - x = atomic_fetchadd_int(&sctp_logoff_stuff, 1); \ - if(x == 30000) \ - sctp_logoff_stuff = x = 0; \ - sctp_logoff[x].inp = _inp; \ - sctp_logoff[x].ticks = ticks; \ - sctp_logoff[x].lineno = __LINE__; \ - sctp_logoff[x].updown = 0; \ -} - -#else - #define SCTP_INP_INCR_REF(_inp) atomic_add_int(&((_inp)->refcount), 1) #define SCTP_INP_DECR_REF(_inp) atomic_add_int(&((_inp)->refcount), -1) -#endif #ifdef SCTP_LOCK_LOGGING #define SCTP_ASOC_CREATE_LOCK(_inp) \ diff --git a/sys/netinet/sctp_os.h b/sys/netinet/sctp_os.h index 15d256e..4fc7922 100644 --- a/sys/netinet/sctp_os.h +++ b/sys/netinet/sctp_os.h @@ -62,4 +62,11 @@ __FBSDID("$FreeBSD$"); +/* All os's must implement this address gatherer. If + * no VRF's exist, then vrf 0 is the only one and all + * addresses and ifn's live here. + */ +#define SCTP_DEFAULT_VRF 0 +void sctp_init_vrf_list(int vrfid); + #endif diff --git a/sys/netinet/sctp_os_bsd.h b/sys/netinet/sctp_os_bsd.h index c95b698..945161b 100644 --- a/sys/netinet/sctp_os_bsd.h +++ b/sys/netinet/sctp_os_bsd.h @@ -51,6 +51,7 @@ __FBSDID("$FreeBSD$"); #include <sys/sysctl.h> #include <sys/resourcevar.h> #include <sys/uio.h> +#include <sys/kthread.h> #include <sys/priv.h> #include <sys/random.h> #include <sys/limits.h> @@ -109,6 +110,23 @@ __FBSDID("$FreeBSD$"); #define SCTP_LIST_EMPTY(list) LIST_EMPTY(list) /* + * Local address and interface list handling + */ +#define SCTP_MAX_VRF_ID 0 +#define SCTP_SIZE_OF_VRF_HASH 3 +#define SCTP_IFNAMSIZ IFNAMSIZ +#define SCTP_DEFAULT_VRFID 0 + +#define SCTP_IFN_IS_IFT_LOOP(ifn) ((ifn)->ifn_type == IFT_LOOP) + +/* + * Access to IFN's to help with src-addr-selection + */ +/* This could return VOID if the index works but for BSD we provide both. */ +#define SCTP_GET_IFN_VOID_FROM_ROUTE(ro) (void *)ro->ro_rt->rt_ifp +#define SCTP_GET_IF_INDEX_FROM_ROUTE(ro) ro->ro_rt->rt_ifp->if_index + +/* * general memory allocation */ #define SCTP_MALLOC(var, type, size, name) \ @@ -125,6 +143,8 @@ __FBSDID("$FreeBSD$"); #define SCTP_FREE_SONAME(var) FREE(var, M_SONAME) +#define SCTP_PROCESS_STRUCT struct proc * + /* * zone allocation functions */ @@ -168,7 +188,6 @@ typedef struct callout sctp_os_timer_t; /* * Functions */ - /* Mbuf manipulation and access macros */ #define SCTP_BUF_LEN(m) (m->m_len) #define SCTP_BUF_NEXT(m) (m->m_next) @@ -224,7 +243,12 @@ typedef struct callout sctp_os_timer_t; /* is the endpoint v6only? */ #define SCTP_IPV6_V6ONLY(inp) (((struct inpcb *)inp)->inp_flags & IN6P_IPV6_V6ONLY) - +/* is the socket non-blocking? */ +#define SCTP_SO_IS_NBIO(so) ((so)->so_state & SS_NBIO) +#define SCTP_SET_SO_NBIO(so) ((so)->so_state |= SS_NBIO) +#define SCTP_CLEAR_SO_NBIO(so) ((so)->so_state &= ~SS_NBIO) +/* get the socket type */ +#define SCTP_SO_TYPE(so) ((so)->so_type) /* * SCTP AUTH diff --git a/sys/netinet/sctp_output.c b/sys/netinet/sctp_output.c index 7572130..883f272 100644 --- a/sys/netinet/sctp_output.c +++ b/sys/netinet/sctp_output.c @@ -36,6 +36,7 @@ __FBSDID("$FreeBSD$"); #include <netinet/sctp_os.h> #include <sys/proc.h> #include <netinet/sctp_var.h> +#include <netinet/sctp_sysctl.h> #include <netinet/sctp_header.h> #include <netinet/sctp_pcb.h> #include <netinet/sctputil.h> @@ -48,11 +49,6 @@ __FBSDID("$FreeBSD$"); #include <netinet/sctp_indata.h> #include <netinet/sctp_bsd_addr.h> -#ifdef SCTP_DEBUG -extern uint32_t sctp_debug_on; - -#endif - #define SCTP_MAX_GAPS_INARRAY 4 @@ -1860,9 +1856,1188 @@ struct sack_track sack_array[256] = { }; +int +sctp_is_address_in_scope(struct sctp_ifa *ifa, + int ipv4_addr_legal, + int ipv6_addr_legal, + int loopback_scope, + int ipv4_local_scope, + int local_scope, + int site_scope, + int do_update) +{ + if ((loopback_scope == 0) && + (ifa->ifn_p) && SCTP_IFN_IS_IFT_LOOP(ifa->ifn_p)) { + /* + * skip loopback if not in scope * + */ + return (0); + } + if ((ifa->address.sa.sa_family == AF_INET) && ipv4_addr_legal) { + struct sockaddr_in *sin; + + sin = (struct sockaddr_in *)&ifa->address.sin; + if (sin->sin_addr.s_addr == 0) { + /* not in scope , unspecified */ + return (0); + } + if ((ipv4_local_scope == 0) && + (IN4_ISPRIVATE_ADDRESS(&sin->sin_addr))) { + /* private address not in scope */ + return (0); + } + } else if ((ifa->address.sa.sa_family == AF_INET6) && ipv6_addr_legal) { + struct sockaddr_in6 *sin6; + + /* + * Must update the flags, bummer, which means any IFA locks + * must now be applied HERE <-> + */ + if (do_update) { + sctp_gather_internal_ifa_flags(ifa); + } + if (ifa->localifa_flags & SCTP_ADDR_IFA_UNUSEABLE) { + return (0); + } + /* ok to use deprecated addresses? */ + sin6 = (struct sockaddr_in6 *)&ifa->address.sin6; + if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { + /* skip unspecifed addresses */ + return (0); + } + if ( /* (local_scope == 0) && */ + (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr))) { + return (0); + } + if ((site_scope == 0) && + (IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr))) { + return (0); + } + } else { + return (0); + } + return (1); +} + +static struct mbuf * +sctp_add_addr_to_mbuf(struct mbuf *m, struct sctp_ifa *ifa) +{ + struct sctp_paramhdr *parmh; + struct mbuf *mret; + int len; + + if (ifa->address.sa.sa_family == AF_INET) { + len = sizeof(struct sctp_ipv4addr_param); + } else if (ifa->address.sa.sa_family == AF_INET6) { + len = sizeof(struct sctp_ipv6addr_param); + } else { + /* unknown type */ + return (m); + } + if (M_TRAILINGSPACE(m) >= len) { + /* easy side we just drop it on the end */ + parmh = (struct sctp_paramhdr *)(SCTP_BUF_AT(m, SCTP_BUF_LEN(m))); + mret = m; + } else { + /* Need more space */ + mret = m; + while (SCTP_BUF_NEXT(mret) != NULL) { + mret = SCTP_BUF_NEXT(mret); + } + SCTP_BUF_NEXT(mret) = sctp_get_mbuf_for_msg(len, 0, M_DONTWAIT, 1, MT_DATA); + if (SCTP_BUF_NEXT(mret) == NULL) { + /* We are hosed, can't add more addresses */ + return (m); + } + mret = SCTP_BUF_NEXT(mret); + parmh = mtod(mret, struct sctp_paramhdr *); + } + /* now add the parameter */ + if (ifa->address.sa.sa_family == AF_INET) { + struct sctp_ipv4addr_param *ipv4p; + struct sockaddr_in *sin; + + sin = (struct sockaddr_in *)&ifa->address.sin; + ipv4p = (struct sctp_ipv4addr_param *)parmh; + parmh->param_type = htons(SCTP_IPV4_ADDRESS); + parmh->param_length = htons(len); + ipv4p->addr = sin->sin_addr.s_addr; + SCTP_BUF_LEN(mret) += len; + } else if (ifa->address.sa.sa_family == AF_INET6) { + struct sctp_ipv6addr_param *ipv6p; + struct sockaddr_in6 *sin6; + + sin6 = (struct sockaddr_in6 *)&ifa->address.sin6; + ipv6p = (struct sctp_ipv6addr_param *)parmh; + parmh->param_type = htons(SCTP_IPV6_ADDRESS); + parmh->param_length = htons(len); + memcpy(ipv6p->addr, &sin6->sin6_addr, + sizeof(ipv6p->addr)); + /* clear embedded scope in the address */ + in6_clearscope((struct in6_addr *)ipv6p->addr); + SCTP_BUF_LEN(mret) += len; + } else { + return (m); + } + return (mret); +} + + +struct mbuf * +sctp_add_addresses_to_i_ia(struct sctp_inpcb *inp, struct sctp_scoping *scope, + struct mbuf *m_at, int cnt_inits_to) +{ + struct sctp_vrf *vrf = NULL; + int cnt, limit_out = 0, total_count; + uint32_t vrf_id; + + vrf_id = SCTP_DEFAULT_VRFID; + SCTP_IPI_ADDR_LOCK(); + vrf = sctp_find_vrf(vrf_id); + if (vrf == NULL) { + SCTP_IPI_ADDR_UNLOCK(); + return (m_at); + } + if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) { + struct sctp_ifa *sctp_ifap; + struct sctp_ifn *sctp_ifnp; + + cnt = cnt_inits_to; + if (vrf->total_ifa_count > SCTP_COUNT_LIMIT) { + limit_out = 1; + cnt = SCTP_ADDRESS_LIMIT; + goto skip_count; + } + LIST_FOREACH(sctp_ifnp, &vrf->ifnlist, next_ifn) { + if ((scope->loopback_scope == 0) && + SCTP_IFN_IS_IFT_LOOP(sctp_ifnp)) { + /* + * Skip loopback devices if loopback_scope + * not set + */ + continue; + } + LIST_FOREACH(sctp_ifap, &sctp_ifnp->ifalist, next_ifa) { + if (sctp_is_address_in_scope(sctp_ifap, + scope->ipv4_addr_legal, + scope->ipv6_addr_legal, + scope->loopback_scope, + scope->ipv4_local_scope, + scope->local_scope, + scope->site_scope, 1) == 0) { + continue; + } + cnt++; + if (cnt > SCTP_ADDRESS_LIMIT) { + break; + } + } + if (cnt > SCTP_ADDRESS_LIMIT) { + break; + } + } +skip_count: + if (cnt > 1) { + total_count = 0; + LIST_FOREACH(sctp_ifnp, &vrf->ifnlist, next_ifn) { + cnt = 0; + if ((scope->loopback_scope == 0) && + SCTP_IFN_IS_IFT_LOOP(sctp_ifnp)) { + /* + * Skip loopback devices if + * loopback_scope not set + */ + continue; + } + LIST_FOREACH(sctp_ifap, &sctp_ifnp->ifalist, next_ifa) { + if (sctp_is_address_in_scope(sctp_ifap, + scope->ipv4_addr_legal, + scope->ipv6_addr_legal, + scope->loopback_scope, + scope->ipv4_local_scope, + scope->local_scope, + scope->site_scope, 0) == 0) { + continue; + } + m_at = sctp_add_addr_to_mbuf(m_at, sctp_ifap); + if (limit_out) { + cnt++; + total_count++; + if (cnt >= 2) { + /* + * two from each + * address + */ + break; + } + if (total_count > SCTP_ADDRESS_LIMIT) { + /* No more addresses */ + break; + } + } + } + } + } + } else { + struct sctp_laddr *laddr; + + cnt = cnt_inits_to; + /* First, how many ? */ + LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) { + if (laddr->ifa == NULL) { + continue; + } + if (laddr->ifa->localifa_flags & SCTP_BEING_DELETED) + /* + * Address being deleted by the system, dont + * list. + */ + continue; + if (laddr->action == SCTP_DEL_IP_ADDRESS) { + /* + * Address being deleted on this ep don't + * list. + */ + continue; + } + if (sctp_is_address_in_scope(laddr->ifa, + scope->ipv4_addr_legal, + scope->ipv6_addr_legal, + scope->loopback_scope, + scope->ipv4_local_scope, + scope->local_scope, + scope->site_scope, 1) == 0) { + continue; + } + cnt++; + } + if (cnt > SCTP_ADDRESS_LIMIT) { + limit_out = 1; + } + /* + * To get through a NAT we only list addresses if we have + * more than one. That way if you just bind a single address + * we let the source of the init dictate our address. + */ + if (cnt > 1) { + LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) { + cnt = 0; + if (laddr->ifa == NULL) { + continue; + } + if (laddr->ifa->localifa_flags & SCTP_BEING_DELETED) + continue; + + if (sctp_is_address_in_scope(laddr->ifa, + scope->ipv4_addr_legal, + scope->ipv6_addr_legal, + scope->loopback_scope, + scope->ipv4_local_scope, + scope->local_scope, + scope->site_scope, 0) == 0) { + continue; + } + m_at = sctp_add_addr_to_mbuf(m_at, laddr->ifa); + cnt++; + if (cnt >= SCTP_ADDRESS_LIMIT) { + break; + } + } + } + } + SCTP_IPI_ADDR_UNLOCK(); + return (m_at); +} + +static struct sctp_ifa * +sctp_is_ifa_addr_prefered(struct sctp_ifa *ifa, + uint8_t dest_is_loop, + uint8_t dest_is_priv, + sa_family_t fam) +{ + uint8_t dest_is_global = 0; + + /* + * is_scope -> dest_is_priv is true if destination is a private + * address + */ + /* dest_is_loop is true if destination is a loopback addresses */ + + /* + * Here we determine if its a prefered address. A prefered address + * means it is the same scope or higher scope then the destination. + * L = loopback, P = private, G = global + * ----------------------------------------- src | dest | result + * ---------------------------------------- L | L | yes + * ----------------------------------------- P | L | + * yes-v4 no-v6 ----------------------------------------- G | + * L | yes-v4 no-v6 ----------------------------------------- L + * | P | no ----------------------------------------- P | + * P | yes ----------------------------------------- G | + * P | no ----------------------------------------- L | G + * | no ----------------------------------------- P | G | + * no ----------------------------------------- G | G | + * yes ----------------------------------------- + */ + + if (ifa->address.sa.sa_family != fam) { + /* forget mis-matched family */ + return (NULL); + } + if ((dest_is_priv == 0) && (dest_is_loop == 0)) { + dest_is_global = 1; + } + /* Ok the address may be ok */ + if (fam == AF_INET6) { + /* ok to use deprecated addresses? */ + if (ifa->localifa_flags & SCTP_ADDR_IFA_UNUSEABLE) { + return (NULL); + } + if (ifa->src_is_priv) { + if (dest_is_loop) { + return (NULL); + } + } + if (ifa->src_is_glob) { + + if (dest_is_loop) { + return (NULL); + } + } + } + /* + * Now that we know what is what, implement or table this could in + * theory be done slicker (it used to be), but this is + * straightforward and easier to validate :-) + */ + if ((ifa->src_is_loop) && (dest_is_priv)) { + return (NULL); + } + if ((ifa->src_is_glob) && (dest_is_priv)) { + return (NULL); + } + if ((ifa->src_is_loop) && (dest_is_global)) { + return (NULL); + } + if ((ifa->src_is_priv) && (dest_is_global)) { + return (NULL); + } + /* its a prefered address */ + return (ifa); +} + +static struct sctp_ifa * +sctp_is_ifa_addr_acceptable(struct sctp_ifa *ifa, + uint8_t dest_is_loop, + uint8_t dest_is_priv, + sa_family_t fam) +{ + uint8_t dest_is_global = 0; + + + /* + * Here we determine if its a acceptable address. A acceptable + * address means it is the same scope or higher scope but we can + * allow for NAT which means its ok to have a global dest and a + * private src. + * + * L = loopback, P = private, G = global + * ----------------------------------------- src | dest | result + * ----------------------------------------- L | L | yes + * ----------------------------------------- P | L | + * yes-v4 no-v6 ----------------------------------------- G | + * L | yes ----------------------------------------- L | + * P | no ----------------------------------------- P | P + * | yes ----------------------------------------- G | P + * | yes - May not work ----------------------------------------- + * L | G | no ----------------------------------------- P + * | G | yes - May not work + * ----------------------------------------- G | G | yes + * ----------------------------------------- + */ + + if (ifa->address.sa.sa_family != fam) { + /* forget non matching family */ + return (NULL); + } + /* Ok the address may be ok */ + if ((dest_is_loop == 0) && (dest_is_priv == 0)) { + dest_is_global = 1; + } + if (fam == AF_INET6) { + /* ok to use deprecated addresses? */ + if (ifa->localifa_flags & SCTP_ADDR_IFA_UNUSEABLE) { + return (NULL); + } + if (ifa->src_is_priv) { + /* Special case, linklocal to loop */ + if (dest_is_loop) + return (NULL); + } + } + /* + * Now that we know what is what, implement or table this could in + * theory be done slicker (it used to be), but this is + * straightforward and easier to validate :-) + */ + + if ((ifa->src_is_loop == 0) && (dest_is_priv)) { + return (NULL); + } + if ((ifa->src_is_loop == 0) && (dest_is_global)) { + return (NULL); + } + /* its an acceptable address */ + return (ifa); +} + +int +sctp_is_addr_restricted(struct sctp_tcb *stcb, struct sctp_ifa *ifa) +{ + struct sctp_laddr *laddr; + + if (stcb == NULL) { + /* There are no restrictions, no TCB :-) */ + return (0); + } + LIST_FOREACH(laddr, &stcb->asoc.sctp_restricted_addrs, sctp_nxt_addr) { + if (laddr->ifa == NULL) { +#ifdef SCTP_DEBUG + if (sctp_debug_on & SCTP_DEBUG_OUTPUT1) { + printf("Help I have fallen and I can't get up!\n"); + } +#endif + continue; + } + if (laddr->ifa == ifa) { + /* Yes it is on the list */ + return (1); + } + } + return (0); +} + + +int +sctp_is_addr_in_ep(struct sctp_inpcb *inp, struct sctp_ifa *ifa) +{ + struct sctp_laddr *laddr; + + if (ifa == NULL) + return (0); + LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) { + if (laddr->ifa == NULL) { +#ifdef SCTP_DEBUG + if (sctp_debug_on & SCTP_DEBUG_OUTPUT1) { + printf("Help I have fallen and I can't get up!\n"); + } +#endif + continue; + } + if ((laddr->ifa == ifa) && laddr->action == 0) + /* same pointer */ + return (1); + } + return (0); +} + + + +static struct sctp_ifa * +sctp_choose_boundspecific_inp(struct sctp_inpcb *inp, + struct route *ro, + uint32_t vrf_id, + int non_asoc_addr_ok, + uint8_t dest_is_priv, + uint8_t dest_is_loop, + sa_family_t fam) +{ + struct sctp_laddr *laddr, *starting_point; + void *ifn; + int resettotop = 0; + struct sctp_ifn *sctp_ifn; + struct sctp_ifa *sctp_ifa, *pass; + struct sctp_vrf *vrf; + uint32_t ifn_index; + + vrf = sctp_find_vrf(vrf_id); + if (vrf == NULL) + return (NULL); + + ifn = SCTP_GET_IFN_VOID_FROM_ROUTE(ro); + ifn_index = SCTP_GET_IF_INDEX_FROM_ROUTE(ro); + sctp_ifn = sctp_find_ifn(vrf, ifn, ifn_index); + /* + * first question, is the ifn we will emit on in our list, if so, we + * want such an address. Note that we first looked for a prefered + * address. + */ + if (sctp_ifn) { + /* is a prefered one on the interface we route out? */ + LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) { + if ((sctp_ifa->localifa_flags & SCTP_ADDR_DEFER_USE) && (non_asoc_addr_ok == 0)) + continue; + pass = sctp_is_ifa_addr_prefered(sctp_ifa, dest_is_loop, dest_is_priv, fam); + if (pass == NULL) + continue; + if (sctp_is_addr_in_ep(inp, pass)) { + atomic_add_int(&pass->refcount, 1); + return (pass); + } + } + } + /* + * ok, now we now need to find one on the list of the addresses. We + * can't get one on the emitting interface so lets find first a + * prefered one. If not that a acceptable one otherwise... we return + * NULL. + */ + starting_point = inp->next_addr_touse; +once_again: + if (inp->next_addr_touse == NULL) { + inp->next_addr_touse = LIST_FIRST(&inp->sctp_addr_list); + resettotop = 1; + } + for (laddr = inp->next_addr_touse; laddr; laddr = LIST_NEXT(laddr, sctp_nxt_addr)) { + if (laddr->ifa == NULL) { + /* address has been removed */ + continue; + } + pass = sctp_is_ifa_addr_prefered(laddr->ifa, dest_is_loop, dest_is_priv, fam); + if (pass == NULL) + continue; + atomic_add_int(&pass->refcount, 1); + return (pass); + } + if (resettotop == 0) { + inp->next_addr_touse = NULL; + goto once_again; + } + inp->next_addr_touse = starting_point; + resettotop = 0; +once_again_too: + if (inp->next_addr_touse == NULL) { + inp->next_addr_touse = LIST_FIRST(&inp->sctp_addr_list); + resettotop = 1; + } + /* ok, what about an acceptable address in the inp */ + for (laddr = inp->next_addr_touse; laddr; laddr = LIST_NEXT(laddr, sctp_nxt_addr)) { + if (laddr->ifa == NULL) { + /* address has been removed */ + continue; + } + pass = sctp_is_ifa_addr_acceptable(laddr->ifa, dest_is_loop, dest_is_priv, fam); + if (pass == NULL) + continue; + atomic_add_int(&pass->refcount, 1); + return (pass); + } + if (resettotop == 0) { + inp->next_addr_touse = NULL; + goto once_again_too; + } + /* + * no address bound can be a source for the destination we are in + * trouble + */ + return (NULL); +} + + + +static struct sctp_ifa * +sctp_choose_boundspecific_stcb(struct sctp_inpcb *inp, + struct sctp_tcb *stcb, + struct sctp_nets *net, + struct route *ro, + uint32_t vrf_id, + uint8_t dest_is_priv, + uint8_t dest_is_loop, + int non_asoc_addr_ok, + sa_family_t fam) +{ + struct sctp_laddr *laddr, *starting_point; + void *ifn; + struct sctp_ifn *sctp_ifn; + struct sctp_ifa *sctp_ifa, *pass; + uint8_t start_at_beginning = 0; + struct sctp_vrf *vrf; + uint32_t ifn_index; + + /* + * first question, is the ifn we will emit on in our list, if so, we + * want that one. + */ + vrf = sctp_find_vrf(vrf_id); + if (vrf == NULL) + return (NULL); + + ifn = SCTP_GET_IFN_VOID_FROM_ROUTE(ro); + ifn_index = SCTP_GET_IF_INDEX_FROM_ROUTE(ro); + sctp_ifn = sctp_find_ifn(vrf, ifn, ifn_index); + /* + * first question, is the ifn we will emit on in our list, if so, we + * want that one.. First we look for a prefered. Second we go for an + * acceptable. + */ + if (sctp_ifn) { + /* first try for an prefered address on the ep */ + LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) { + if ((sctp_ifa->localifa_flags & SCTP_ADDR_DEFER_USE) && (non_asoc_addr_ok == 0)) + continue; + if (sctp_is_addr_in_ep(inp, sctp_ifa)) { + pass = sctp_is_ifa_addr_prefered(sctp_ifa, dest_is_loop, dest_is_priv, fam); + if (pass == NULL) + continue; + if ((non_asoc_addr_ok == 0) && + (sctp_is_addr_restricted(stcb, pass))) { + /* on the no-no list */ + continue; + } + atomic_add_int(&pass->refcount, 1); + return (pass); + } + } + /* next try for an acceptable address on the ep */ + LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) { + if ((sctp_ifa->localifa_flags & SCTP_ADDR_DEFER_USE) && (non_asoc_addr_ok == 0)) + continue; + if (sctp_is_addr_in_ep(inp, sctp_ifa)) { + pass = sctp_is_ifa_addr_acceptable(sctp_ifa, dest_is_loop, dest_is_priv, fam); + if (pass == NULL) + continue; + if ((non_asoc_addr_ok == 0) && + (sctp_is_addr_restricted(stcb, pass))) { + /* on the no-no list */ + continue; + } + atomic_add_int(&pass->refcount, 1); + return (pass); + } + } + + } + /* + * if we can't find one like that then we must look at all addresses + * bound to pick one at first prefereable then secondly acceptable. + */ + starting_point = stcb->asoc.last_used_address; +sctp_from_the_top: + if (stcb->asoc.last_used_address == NULL) { + start_at_beginning = 1; + stcb->asoc.last_used_address = LIST_FIRST(&inp->sctp_addr_list); + } + /* search beginning with the last used address */ + for (laddr = stcb->asoc.last_used_address; laddr; + laddr = LIST_NEXT(laddr, sctp_nxt_addr)) { + if (laddr->ifa == NULL) { + /* address has been removed */ + continue; + } + pass = sctp_is_ifa_addr_prefered(laddr->ifa, dest_is_loop, dest_is_priv, fam); + if (pass == NULL) + continue; + if ((non_asoc_addr_ok == 0) && + (sctp_is_addr_restricted(stcb, pass))) { + /* on the no-no list */ + continue; + } + stcb->asoc.last_used_address = laddr; + atomic_add_int(&pass->refcount, 1); + return (pass); + + } + if (start_at_beginning == 0) { + stcb->asoc.last_used_address = NULL; + goto sctp_from_the_top; + } + /* now try for any higher scope than the destination */ + stcb->asoc.last_used_address = starting_point; + start_at_beginning = 0; +sctp_from_the_top2: + if (stcb->asoc.last_used_address == NULL) { + start_at_beginning = 1; + stcb->asoc.last_used_address = LIST_FIRST(&inp->sctp_addr_list); + } + /* search beginning with the last used address */ + for (laddr = stcb->asoc.last_used_address; laddr; + laddr = LIST_NEXT(laddr, sctp_nxt_addr)) { + if (laddr->ifa == NULL) { + /* address has been removed */ + continue; + } + pass = sctp_is_ifa_addr_acceptable(laddr->ifa, dest_is_loop, dest_is_priv, fam); + if (pass == NULL) + continue; + if ((non_asoc_addr_ok == 0) && + (sctp_is_addr_restricted(stcb, pass))) { + /* on the no-no list */ + continue; + } + stcb->asoc.last_used_address = laddr; + atomic_add_int(&pass->refcount, 1); + return (pass); + } + if (start_at_beginning == 0) { + stcb->asoc.last_used_address = NULL; + goto sctp_from_the_top2; + } + return (NULL); +} + +static struct sctp_ifa * +sctp_select_nth_prefered_addr_from_ifn_boundall(struct sctp_ifn *ifn, + struct sctp_tcb *stcb, + int non_asoc_addr_ok, + uint8_t dest_is_loop, + uint8_t dest_is_priv, + int addr_wanted, + sa_family_t fam) +{ + struct sctp_ifa *ifa, *pass; + int num_eligible_addr = 0; + + LIST_FOREACH(ifa, &ifn->ifalist, next_ifa) { + if ((ifa->localifa_flags & SCTP_ADDR_DEFER_USE) && (non_asoc_addr_ok == 0)) + continue; + pass = sctp_is_ifa_addr_prefered(ifa, dest_is_loop, dest_is_priv, fam); + if (pass == NULL) + continue; + if (stcb) { + if ((non_asoc_addr_ok == 0) && sctp_is_addr_restricted(stcb, pass)) { + /* + * It is restricted for some reason.. + * probably not yet added. + */ + continue; + } + } + if (num_eligible_addr >= addr_wanted) { + return (pass); + } + num_eligible_addr++; + } + return (NULL); +} -extern int sctp_peer_chunk_oh; + +static int +sctp_count_num_prefered_boundall(struct sctp_ifn *ifn, + struct sctp_tcb *stcb, + int non_asoc_addr_ok, + uint8_t dest_is_loop, + uint8_t dest_is_priv, + sa_family_t fam) +{ + struct sctp_ifa *ifa, *pass; + int num_eligible_addr = 0; + + LIST_FOREACH(ifa, &ifn->ifalist, next_ifa) { + if ((ifa->localifa_flags & SCTP_ADDR_DEFER_USE) && (non_asoc_addr_ok == 0)) { + continue; + } + pass = sctp_is_ifa_addr_prefered(ifa, dest_is_loop, dest_is_priv, fam); + if (pass == NULL) { + continue; + } + if (stcb) { + if ((non_asoc_addr_ok == 0) && sctp_is_addr_restricted(stcb, pass)) { + /* + * It is restricted for some reason.. + * probably not yet added. + */ + continue; + } + } + num_eligible_addr++; + } + return (num_eligible_addr); +} + +static struct sctp_ifa * +sctp_choose_boundall(struct sctp_inpcb *inp, + struct sctp_tcb *stcb, + struct sctp_nets *net, + struct route *ro, + uint32_t vrf_id, + uint8_t dest_is_priv, + uint8_t dest_is_loop, + int non_asoc_addr_ok, + sa_family_t fam) +{ + int cur_addr_num = 0, num_prefered = 0; + void *ifn; + struct sctp_ifn *sctp_ifn, *looked_at = NULL, *emit_ifn; + struct sctp_ifa *sctp_ifa, *pass; + uint32_t ifn_index; + struct sctp_vrf *vrf; + + /* + * For boundall we can use any address in the association. If + * non_asoc_addr_ok is set we can use any address (at least in + * theory). So we look for prefered addresses first. If we find one, + * we use it. Otherwise we next try to get an address on the + * interface, which we should be able to do (unless non_asoc_addr_ok + * is false and we are routed out that way). In these cases where we + * can't use the address of the interface we go through all the + * ifn's looking for an address we can use and fill that in. Punting + * means we send back address 0, which will probably cause problems + * actually since then IP will fill in the address of the route ifn, + * which means we probably already rejected it.. i.e. here comes an + * abort :-<. + */ + vrf = sctp_find_vrf(vrf_id); + if (vrf == NULL) + return (NULL); + + ifn = SCTP_GET_IFN_VOID_FROM_ROUTE(ro); + ifn_index = SCTP_GET_IF_INDEX_FROM_ROUTE(ro); + + emit_ifn = looked_at = sctp_ifn = sctp_find_ifn(vrf, ifn, ifn_index); + if (sctp_ifn == NULL) { + /* ?? We don't have this guy ?? */ + goto bound_all_plan_b; + } + if (net) { + cur_addr_num = net->indx_of_eligible_next_to_use; + } + num_prefered = sctp_count_num_prefered_boundall(sctp_ifn, + stcb, + non_asoc_addr_ok, + dest_is_loop, + dest_is_priv, fam); +#ifdef SCTP_DEBUG + if (sctp_debug_on & SCTP_DEBUG_OUTPUT1) { + printf("Found %d prefered source addresses\n", num_prefered); + } +#endif + if (num_prefered == 0) { + /* + * no eligible addresses, we must use some other interface + * address if we can find one. + */ + goto bound_all_plan_b; + } + /* + * Ok we have num_eligible_addr set with how many we can use, this + * may vary from call to call due to addresses being deprecated + * etc.. + */ + if (cur_addr_num >= num_prefered) { + cur_addr_num = 0; + } + /* + * select the nth address from the list (where cur_addr_num is the + * nth) and 0 is the first one, 1 is the second one etc... + */ +#ifdef SCTP_DEBUG + if (sctp_debug_on & SCTP_DEBUG_OUTPUT1) { + printf("cur_addr_num:%d\n", cur_addr_num); + } +#endif + sctp_ifa = sctp_select_nth_prefered_addr_from_ifn_boundall(sctp_ifn, stcb, non_asoc_addr_ok, dest_is_loop, + dest_is_priv, cur_addr_num, fam); + + /* if sctp_ifa is NULL something changed??, fall to plan b. */ + if (sctp_ifa) { + atomic_add_int(&sctp_ifa->refcount, 1); + if (net) { + /* save off where the next one we will want */ + net->indx_of_eligible_next_to_use = cur_addr_num + 1; + } + return (sctp_ifa); + } + /* + * plan_b: Look at all interfaces and find a prefered address. If no + * prefered fall through to plan_c. + */ +bound_all_plan_b: + LIST_FOREACH(sctp_ifn, &vrf->ifnlist, next_ifn) { + if (dest_is_loop == 0 && SCTP_IFN_IS_IFT_LOOP(sctp_ifn)) { + /* wrong base scope */ + continue; + } + if ((sctp_ifn == looked_at) && looked_at) + /* already looked at this guy */ + continue; + num_prefered = sctp_count_num_prefered_boundall(sctp_ifn, stcb, non_asoc_addr_ok, + dest_is_loop, dest_is_priv, fam); +#ifdef SCTP_DEBUG + if (sctp_debug_on & SCTP_DEBUG_OUTPUT1) { + printf("Found ifn:%p %d prefered source addresses\n", ifn, num_prefered); + } +#endif + if (num_prefered == 0) { + /* + * None on this interface. + */ + continue; + } +#ifdef SCTP_DEBUG + if (sctp_debug_on & SCTP_DEBUG_OUTPUT2) { + printf("num prefered:%d on interface:%p cur_addr_num:%d\n", + num_prefered, + sctp_ifn, + cur_addr_num); + } +#endif + + /* + * Ok we have num_eligible_addr set with how many we can + * use, this may vary from call to call due to addresses + * being deprecated etc.. + */ + if (cur_addr_num >= num_prefered) { + cur_addr_num = 0; + } + pass = sctp_select_nth_prefered_addr_from_ifn_boundall(sctp_ifn, stcb, non_asoc_addr_ok, dest_is_loop, + dest_is_priv, cur_addr_num, fam); + if (pass == NULL) + continue; + if (net) { + net->indx_of_eligible_next_to_use = cur_addr_num + 1; +#ifdef SCTP_DEBUG + if (sctp_debug_on & SCTP_DEBUG_OUTPUT2) { + printf("we selected %d\n", cur_addr_num); + printf("Source:"); + sctp_print_address(&pass->address.sa); + printf("Dest:"); + sctp_print_address(&net->ro._l_addr.sa); + } +#endif + } + atomic_add_int(&pass->refcount, 1); + return (pass); + + } + + /* + * plan_c: See if we have an acceptable address on the emit + * interface + */ +#ifdef SCTP_DEBUG + if (sctp_debug_on & SCTP_DEBUG_OUTPUT2) { + if (net) { + printf("Plan C no prefered for Dest:"); + sctp_print_address(&net->ro._l_addr.sa); + } + } +#endif + + LIST_FOREACH(sctp_ifa, &emit_ifn->ifalist, next_ifa) { + if ((sctp_ifa->localifa_flags & SCTP_ADDR_DEFER_USE) && (non_asoc_addr_ok == 0)) + continue; + pass = sctp_is_ifa_addr_acceptable(sctp_ifa, dest_is_loop, dest_is_priv, fam); + if (pass == NULL) + continue; + if (stcb) { + if ((non_asoc_addr_ok == 0) && sctp_is_addr_restricted(stcb, pass)) { + /* + * It is restricted for some reason.. + * probably not yet added. + */ + continue; + } + } + atomic_add_int(&pass->refcount, 1); + return (pass); + } + + /* + * plan_d: We are in trouble. No prefered address on the emit + * interface. And not even a perfered address on all interfaces. Go + * out and see if we can find an acceptable address somewhere + * amongst all interfaces. + */ + LIST_FOREACH(sctp_ifn, &vrf->ifnlist, next_ifn) { + if (dest_is_loop == 0 && SCTP_IFN_IS_IFT_LOOP(sctp_ifn)) { + /* wrong base scope */ + continue; + } + if ((sctp_ifn == looked_at) && looked_at) + /* already looked at this guy */ + continue; + + LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) { + if ((sctp_ifa->localifa_flags & SCTP_ADDR_DEFER_USE) && (non_asoc_addr_ok == 0)) + continue; + pass = sctp_is_ifa_addr_acceptable(sctp_ifa, dest_is_loop, dest_is_priv, fam); + if (pass == NULL) + continue; + if (stcb) { + if ((non_asoc_addr_ok == 0) && sctp_is_addr_restricted(stcb, pass)) { + /* + * It is restricted for some + * reason.. probably not yet added. + */ + continue; + } + } + atomic_add_int(&pass->refcount, 1); + return (pass); + } + } + /* + * Ok we can find NO address to source from that is not on our + * negative list and non_asoc_address is NOT ok, or its on our + * negative list. We cant source to it :-( + */ + return (NULL); +} + + + +/* tcb may be NULL */ +struct sctp_ifa * +sctp_source_address_selection(struct sctp_inpcb *inp, + struct sctp_tcb *stcb, + struct route *ro, + struct sctp_nets *net, + int non_asoc_addr_ok, uint32_t vrf_id) +{ + + struct sockaddr_in *to = (struct sockaddr_in *)&ro->ro_dst; + struct sockaddr_in6 *to6 = (struct sockaddr_in6 *)&ro->ro_dst; + struct sctp_ifa *answer; + uint8_t dest_is_priv, dest_is_loop; + int did_rtalloc = 0; + sa_family_t fam; + + /* + * Rules: - Find the route if needed, cache if I can. - Look at + * interface address in route, Is it in the bound list. If so we + * have the best source. - If not we must rotate amongst the + * addresses. + * + * Cavets and issues + * + * Do we need to pay attention to scope. We can have a private address + * or a global address we are sourcing or sending to. So if we draw + * it out zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz + * For V4 ------------------------------------------ source * + * dest * result ----------------------------------------- <a> + * Private * Global * NAT + * ----------------------------------------- <b> Private * + * Private * No problem ----------------------------------------- + * <c> Global * Private * Huh, How will this work? + * ----------------------------------------- <d> Global * + * Global * No Problem ------------------------------------------ + * zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz For V6 + * ------------------------------------------ source * dest * + * result ----------------------------------------- <a> Linklocal * + * Global * ----------------------------------------- <b> + * Linklocal * Linklocal * No problem + * ----------------------------------------- <c> Global * + * Linklocal * Huh, How will this work? + * ----------------------------------------- <d> Global * + * Global * No Problem ------------------------------------------ + * zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz + * + * And then we add to that what happens if there are multiple addresses + * assigned to an interface. Remember the ifa on a ifn is a linked + * list of addresses. So one interface can have more than one IP + * address. What happens if we have both a private and a global + * address? Do we then use context of destination to sort out which + * one is best? And what about NAT's sending P->G may get you a NAT + * translation, or should you select the G thats on the interface in + * preference. + * + * Decisions: + * + * - count the number of addresses on the interface. - if its one, no + * problem except case <c>. For <a> we will assume a NAT out there. + * - if there are more than one, then we need to worry about scope P + * or G. We should prefer G -> G and P -> P if possible. Then as a + * secondary fall back to mixed types G->P being a last ditch one. - + * The above all works for bound all, but bound specific we need to + * use the same concept but instead only consider the bound + * addresses. If the bound set is NOT assigned to the interface then + * we must use rotation amongst the bound addresses.. + * + */ + if (ro->ro_rt == NULL) { + /* + * Need a route to cache. + * + */ + rtalloc_ign(ro, 0UL); + did_rtalloc = 1; + } + if (ro->ro_rt == NULL) { + return (NULL); + } + fam = to->sin_family; + dest_is_priv = dest_is_loop = 0; + /* Setup our scopes for the destination */ + if (fam == AF_INET) { + /* Scope based on outbound address */ + if ((IN4_ISPRIVATE_ADDRESS(&to->sin_addr))) { + dest_is_priv = 1; + } else if (IN4_ISLOOPBACK_ADDRESS(&to->sin_addr)) { + dest_is_loop = 1; + if (net != NULL) { + /* mark it as local */ + net->addr_is_local = 1; + } + } + } else if (fam == AF_INET6) { + /* Scope based on outbound address */ + if (IN6_IS_ADDR_LOOPBACK(&to6->sin6_addr)) { + /* + * If the route goes to the loopback address OR the + * address is a loopback address, we are loopback + * scope. But we don't use dest_is_priv (link local + * addresses). + */ + dest_is_loop = 1; + if (net != NULL) { + /* mark it as local */ + net->addr_is_local = 1; + } + } else if (IN6_IS_ADDR_LINKLOCAL(&to6->sin6_addr)) { + dest_is_priv = 1; + } + } + if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) { + /* + * When bound to all if the address list is set it is a + * negative list. Addresses being added by asconf. + */ + answer = sctp_choose_boundall(inp, stcb, net, ro, vrf_id, + dest_is_priv, + dest_is_loop, + non_asoc_addr_ok, + fam); + return (answer); + } + /* + * Three possiblities here: + * + * a) stcb is NULL, which means we operate only from the list of + * addresses (ifa's) bound to the endpoint and we care not about the + * list. b) stcb is NOT-NULL, which means we have an assoc structure + * and auto-asconf is on. This means that the list of addresses is a + * NOT list. We use the list from the inp, but any listed address in + * our list is NOT yet added. However if the non_asoc_addr_ok is set + * we CAN use an address NOT available (i.e. being added). Its a + * negative list. c) stcb is NOT-NULL, which means we have an assoc + * structure and auto-asconf is off. This means that the list of + * addresses is the ONLY addresses I can use.. its positive. + * + * Note we collapse b & c into the same function just like in the v6 + * address selection. + */ + if (stcb) { + answer = sctp_choose_boundspecific_stcb(inp, stcb, net, ro, vrf_id, + dest_is_priv, dest_is_loop, non_asoc_addr_ok, fam); + + } else { + answer = sctp_choose_boundspecific_inp(inp, ro, vrf_id, non_asoc_addr_ok, dest_is_priv, dest_is_loop, fam); + + } + return (answer); +} static int sctp_find_cmsg(int c_type, void *data, struct mbuf *control, int cpsize) @@ -1914,10 +3089,7 @@ sctp_find_cmsg(int c_type, void *data, struct mbuf *control, int cpsize) } -extern int sctp_mbuf_threshold_count; - - -__inline struct mbuf * +struct mbuf * sctp_get_mbuf_for_msg(unsigned int space_needed, int want_header, int how, int allonebuf, int type) { @@ -2121,8 +3293,6 @@ sctp_get_ect(struct sctp_tcb *stcb, } } -extern int sctp_no_csum_on_loopback; - static int sctp_lowlevel_chunk_output(struct sctp_inpcb *inp, struct sctp_tcb *stcb, /* may be NULL */ @@ -2156,12 +3326,20 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp, uint32_t csum; int ret; unsigned int have_mtu; + uint32_t vrf_id; struct route *ro; + if ((net) && (net->dest_state & SCTP_ADDR_OUT_OF_SCOPE)) { sctp_m_freem(m); return (EFAULT); } + if (stcb == NULL) { + vrf_id = SCTP_DEFAULT_VRFID; + } else { + vrf_id = stcb->asoc.vrf_id; + } + /* fill in the HMAC digest for any AUTH chunk in the packet */ if ((auth != NULL) && (stcb != NULL)) { sctp_fill_hmac_digest_m(m, auth_offset, auth, stcb); @@ -2186,7 +3364,7 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp, } if (to->sa_family == AF_INET) { - struct ip *ip; + struct ip *ip = NULL; struct route iproute; uint8_t tos_value; @@ -2250,16 +3428,25 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp, if (net) { if (net->src_addr_selected == 0) { /* Cache the source address */ - ((struct sockaddr_in *)&net->ro._s_addr)->sin_addr = sctp_ipv4_source_address_selection(inp, - stcb, - ro, net, out_of_asoc_ok); - if (ro->ro_rt) - net->src_addr_selected = 1; + net->ro._s_addr = sctp_source_address_selection(inp, stcb, + ro, net, out_of_asoc_ok, vrf_id); + if (net->ro._s_addr == NULL) { + /* No route to host */ + goto no_route; + } + net->src_addr_selected = 1; } - ip->ip_src = ((struct sockaddr_in *)&net->ro._s_addr)->sin_addr; + ip->ip_src = net->ro._s_addr->address.sin.sin_addr; } else { - ip->ip_src = sctp_ipv4_source_address_selection(inp, - stcb, ro, net, out_of_asoc_ok); + struct sctp_ifa *_lsrc; + + _lsrc = sctp_source_address_selection(inp, + stcb, ro, net, out_of_asoc_ok, vrf_id); + if (_lsrc == NULL) { + goto no_route; + } + ip->ip_src = _lsrc->address.sin.sin_addr; + sctp_free_ifa(_lsrc); } /* @@ -2273,22 +3460,30 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp, /* * src addr selection failed to find a route (or * valid source addr), so we can't get there from - * here! + * here (yet)! */ + no_route: #ifdef SCTP_DEBUG if (sctp_debug_on & SCTP_DEBUG_OUTPUT1) { - printf("low_level_output: dropped v4 packet- no valid source addr\n"); - printf("Destination was %x\n", (uint32_t) (ntohl(ip->ip_dst.s_addr))); + printf("low_level_output: dropped packet - no valid source addr\n"); + if (net) { + printf("Destination was "); + sctp_print_address(&net->ro._l_addr.sa); + } } #endif /* SCTP_DEBUG */ if (net) { - if ((net->dest_state & SCTP_ADDR_REACHABLE) && stcb) - sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_DOWN, - stcb, - SCTP_FAILED_THRESHOLD, - (void *)net); - net->dest_state &= ~SCTP_ADDR_REACHABLE; - net->dest_state |= SCTP_ADDR_NOT_REACHABLE; + if (net->dest_state & SCTP_ADDR_CONFIRMED) { + if ((net->dest_state & SCTP_ADDR_REACHABLE) && stcb) { + printf("no route takes interface %p down\n", net); + sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_DOWN, + stcb, + SCTP_FAILED_THRESHOLD, + (void *)net); + net->dest_state &= ~SCTP_ADDR_REACHABLE; + net->dest_state |= SCTP_ADDR_NOT_REACHABLE; + } + } if (stcb) { if (net == stcb->asoc.primary_destination) { /* need a new primary */ @@ -2300,6 +3495,10 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp, (struct sockaddr *)NULL, alt) == 0) { net->dest_state |= SCTP_ADDR_WAS_PRIMARY; + if (net->ro._s_addr) { + sctp_free_ifa(net->ro._s_addr); + net->ro._s_addr = NULL; + } net->src_addr_selected = 0; } } @@ -2349,8 +3548,10 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp, #endif if (net == NULL) { /* free tempy routes */ - if (ro->ro_rt) + if (ro->ro_rt) { RTFREE(ro->ro_rt); + ro->ro_rt = NULL; + } } else { /* PMTU check versus smallest asoc MTU goes here */ if (ro->ro_rt != NULL) { @@ -2361,6 +3562,11 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp, } } else { /* route was freed */ + if (net->ro._s_addr && + net->src_addr_selected) { + sctp_free_ifa(net->ro._s_addr); + net->ro._s_addr = NULL; + } net->src_addr_selected = 0; } } @@ -2448,16 +3654,30 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp, if (net) { if (net->src_addr_selected == 0) { /* Cache the source address */ - ((struct sockaddr_in6 *)&net->ro._s_addr)->sin6_addr = sctp_ipv6_source_address_selection(inp, - stcb, ro, net, out_of_asoc_ok); - - if (ro->ro_rt) - net->src_addr_selected = 1; + net->ro._s_addr = sctp_source_address_selection(inp, + stcb, + ro, + net, + out_of_asoc_ok, + vrf_id); + if (net->ro._s_addr == NULL) { +#ifdef SCTP_DEBUG + printf("V6:No route to host\n"); +#endif + goto no_route; + } + net->src_addr_selected = 1; } - lsa6->sin6_addr = ((struct sockaddr_in6 *)&net->ro._s_addr)->sin6_addr; + lsa6->sin6_addr = net->ro._s_addr->address.sin6.sin6_addr; } else { - lsa6->sin6_addr = sctp_ipv6_source_address_selection( - inp, stcb, ro, net, out_of_asoc_ok); + struct sctp_ifa *_lsrc; + + _lsrc = sctp_source_address_selection(inp, stcb, ro, net, out_of_asoc_ok, vrf_id); + if (_lsrc == NULL) { + goto no_route; + } + lsa6->sin6_addr = _lsrc->address.sin6.sin6_addr; + sctp_free_ifa(_lsrc); } lsa6->sin6_port = inp->sctp_lport; @@ -2467,38 +3687,7 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp, * valid source addr), so we can't get there from * here! */ -#ifdef SCTP_DEBUG - if (sctp_debug_on & SCTP_DEBUG_OUTPUT1) { - printf("low_level_output: dropped v6 pkt- no valid source addr\n"); - } -#endif - sctp_m_freem(o_pak); - if (net) { - if ((net->dest_state & SCTP_ADDR_REACHABLE) && stcb) - sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_DOWN, - stcb, - SCTP_FAILED_THRESHOLD, - (void *)net); - net->dest_state &= ~SCTP_ADDR_REACHABLE; - net->dest_state |= SCTP_ADDR_NOT_REACHABLE; - if (stcb) { - if (net == stcb->asoc.primary_destination) { - /* need a new primary */ - struct sctp_nets *alt; - - alt = sctp_find_alternate_net(stcb, net, 0); - if (alt != net) { - if (sctp_set_primary_addr(stcb, - (struct sockaddr *)NULL, - alt) == 0) { - net->dest_state |= SCTP_ADDR_WAS_PRIMARY; - net->src_addr_selected = 0; - } - } - } - } - } - return (EHOSTUNREACH); + goto no_route; } /* * XXX: sa6 may not have a valid sin6_scope_id in the @@ -2565,8 +3754,9 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp, #endif /* SCTP_DEBUG_OUTPUT */ SCTP_STAT_INCR(sctps_sendpackets); SCTP_STAT_INCR_COUNTER64(sctps_outpackets); - if (ret) + if (ret) { SCTP_STAT_INCR(sctps_senderrors); + } if (net == NULL) { /* Now if we had a temp route free it */ if (ro->ro_rt) { @@ -2576,6 +3766,12 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp, /* PMTU check versus smallest asoc MTU goes here */ if (ro->ro_rt == NULL) { /* Route was freed */ + + if (net->ro._s_addr && + net->src_addr_selected) { + sctp_free_ifa(net->ro._s_addr); + net->ro._s_addr = NULL; + } net->src_addr_selected = 0; } if (ro->ro_rt != NULL) { @@ -3264,7 +4460,9 @@ sctp_send_initiate_ack(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int abort_flag, padval, sz_of; int num_ext; int p_len; + uint32_t vrf_id; + vrf_id = SCTP_DEFAULT_VRFID; if (stcb) { asoc = &stcb->asoc; } else { @@ -3358,7 +4556,7 @@ sctp_send_initiate_ack(struct sctp_inpcb *inp, struct sctp_tcb *stcb, to = (struct sockaddr *)&store; iph = mtod(init_pkt, struct ip *); if (iph->ip_v == IPVERSION) { - struct in_addr addr; + struct sctp_ifa *addr; struct route iproute; sin->sin_family = AF_INET; @@ -3375,12 +4573,16 @@ sctp_send_initiate_ack(struct sctp_inpcb *inp, struct sctp_tcb *stcb, memset(&iproute, 0, sizeof(iproute)); ro = &iproute; memcpy(&ro->ro_dst, sin, sizeof(*sin)); - addr = sctp_ipv4_source_address_selection(inp, NULL, - ro, NULL, 0); + addr = sctp_source_address_selection(inp, NULL, + ro, NULL, 0, vrf_id); + if (addr == NULL) + return; + if (ro->ro_rt) { RTFREE(ro->ro_rt); + ro->ro_rt = NULL; } - stc.laddress[0] = addr.s_addr; + stc.laddress[0] = addr->address.sin.sin_addr.s_addr; stc.laddress[1] = 0; stc.laddress[2] = 0; stc.laddress[3] = 0; @@ -3395,14 +4597,14 @@ sctp_send_initiate_ack(struct sctp_inpcb *inp, struct sctp_tcb *stcb, stc.ipv4_scope = 1; #endif /* SCTP_DONT_DO_PRIVADDR_SCOPE */ /* Must use the address in this case */ - if (sctp_is_address_on_local_host((struct sockaddr *)sin)) { + if (sctp_is_address_on_local_host((struct sockaddr *)sin, vrf_id)) { stc.loopback_scope = 1; stc.ipv4_scope = 1; stc.site_scope = 1; - stc.local_scope = 1; + stc.local_scope = 0; } } else if (iph->ip_v == (IPV6_VERSION >> 4)) { - struct in6_addr addr; + struct sctp_ifa *addr; struct route_in6 iproute6; @@ -3417,9 +4619,9 @@ sctp_send_initiate_ack(struct sctp_inpcb *inp, struct sctp_tcb *stcb, sin6->sin6_scope_id = 0; stc.addr_type = SCTP_IPV6_ADDRESS; stc.scope_id = 0; - if (sctp_is_address_on_local_host((struct sockaddr *)sin6)) { + if (sctp_is_address_on_local_host((struct sockaddr *)sin6, vrf_id)) { stc.loopback_scope = 1; - stc.local_scope = 1; + stc.local_scope = 0; stc.site_scope = 1; stc.ipv4_scope = 1; } else if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) { @@ -3459,12 +4661,16 @@ sctp_send_initiate_ack(struct sctp_inpcb *inp, struct sctp_tcb *stcb, memset(&iproute6, 0, sizeof(iproute6)); ro = (struct route *)&iproute6; memcpy(&ro->ro_dst, sin6, sizeof(*sin6)); - addr = sctp_ipv6_source_address_selection(inp, NULL, - ro, NULL, 0); + addr = sctp_source_address_selection(inp, NULL, + ro, NULL, 0, vrf_id); + if (addr == NULL) + return; + if (ro->ro_rt) { RTFREE(ro->ro_rt); + ro->ro_rt = NULL; } - memcpy(&stc.laddress, &addr, sizeof(struct in6_addr)); + memcpy(&stc.laddress, &addr->address.sin6.sin6_addr, sizeof(struct in6_addr)); stc.laddr_type = SCTP_IPV6_ADDRESS; } } else { @@ -3501,13 +4707,16 @@ sctp_send_initiate_ack(struct sctp_inpcb *inp, struct sctp_tcb *stcb, * strange case here, the INIT should have * did the selection. */ - net->ro._s_addr.sin.sin_addr = - sctp_ipv4_source_address_selection(inp, - stcb, (struct route *)&net->ro, net, 0); + net->ro._s_addr = sctp_source_address_selection(inp, + stcb, (struct route *)&net->ro, + net, 0, vrf_id); + if (net->ro._s_addr == NULL) + return; + net->src_addr_selected = 1; } - stc.laddress[0] = net->ro._s_addr.sin.sin_addr.s_addr; + stc.laddress[0] = net->ro._s_addr->address.sin.sin_addr.s_addr; stc.laddress[1] = 0; stc.laddress[2] = 0; stc.laddress[3] = 0; @@ -3522,12 +4731,15 @@ sctp_send_initiate_ack(struct sctp_inpcb *inp, struct sctp_tcb *stcb, * strange case here, the INIT should have * did the selection. */ - net->ro._s_addr.sin6.sin6_addr = - sctp_ipv6_source_address_selection(inp, - stcb, (struct route *)&net->ro, net, 0); + net->ro._s_addr = sctp_source_address_selection(inp, + stcb, (struct route *)&net->ro, + net, 0, vrf_id); + if (net->ro._s_addr == NULL) + return; + net->src_addr_selected = 1; } - memcpy(&stc.laddress, &net->ro._l_addr.sin6.sin6_addr, + memcpy(&stc.laddress, &net->ro._s_addr->address.sin6.sin6_addr, sizeof(struct in6_addr)); stc.laddr_type = SCTP_IPV6_ADDRESS; } @@ -3670,7 +4882,7 @@ sctp_send_initiate_ack(struct sctp_inpcb *inp, struct sctp_tcb *stcb, uint16_t random_len; /* generate and add RANDOM parameter */ - random_len = sctp_auth_random_len; + random_len = SCTP_AUTH_RANDOM_SIZE_DEFAULT; random = (struct sctp_auth_random *)(mtod(m, caddr_t)+SCTP_BUF_LEN(m)); random->ph.param_type = htons(SCTP_RANDOM); p_len = sizeof(*random) + random_len; @@ -4011,7 +5223,6 @@ sctp_get_frag_point(struct sctp_tcb *stcb, } return (siz); } -extern unsigned int sctp_max_chunks_on_queue; static void sctp_set_prsctp_policy(struct sctp_tcb *stcb, @@ -4612,7 +5823,7 @@ sctp_sendall(struct sctp_inpcb *inp, struct uio *uio, struct mbuf *m, } ca->m = m; } - ret = sctp_initiate_iterator(NULL, sctp_sendall_iterator, + ret = sctp_initiate_iterator(NULL, sctp_sendall_iterator, NULL, SCTP_PCB_ANY_FLAGS, SCTP_PCB_ANY_FEATURES, SCTP_ASOC_ANY_STATE, (void *)ca, 0, sctp_sendall_completes, inp, 1); @@ -4807,7 +6018,6 @@ sctp_clean_up_ctl(struct sctp_tcb *stcb, struct sctp_association *asoc) } } -extern int sctp_min_split_point; static __inline int sctp_can_we_split_this(struct sctp_tcb *stcb, @@ -5194,14 +6404,14 @@ sctp_fill_outqueue(struct sctp_tcb *stcb, struct sctp_nets *net, int frag_point, int eeor_mode) { struct sctp_association *asoc; - struct sctp_stream_out *strq, *strqn; + struct sctp_stream_out *strq, *strqn, *strqt; int goal_mtu, moved_how_much, total_moved = 0; int locked, giveup; struct sctp_stream_queue_pending *sp; SCTP_TCB_LOCK_ASSERT(stcb); asoc = &stcb->asoc; -#ifdef AF_INET6 +#ifdef INET6 if (net->ro._l_addr.sin6.sin6_family == AF_INET6) { goal_mtu = net->mtu - SCTP_MIN_OVERHEAD; } else { @@ -5268,13 +6478,14 @@ sctp_fill_outqueue(struct sctp_tcb *stcb, break; } else { asoc->locked_on_sending = NULL; + strqt = sctp_select_a_stream(stcb, asoc); if (TAILQ_FIRST(&strq->outqueue) == NULL) { sctp_remove_from_wheel(stcb, asoc, strq); } if (giveup) { break; } - strq = sctp_select_a_stream(stcb, asoc); + strq = strqt; if (strq == NULL) { break; } @@ -5336,8 +6547,6 @@ sctp_move_to_an_alt(struct sctp_tcb *stcb, } } -extern int sctp_early_fr; - int sctp_med_chunk_output(struct sctp_inpcb *inp, struct sctp_tcb *stcb, @@ -5364,7 +6573,7 @@ sctp_med_chunk_output(struct sctp_inpcb *inp, /* temp arrays for unlinking */ struct sctp_tmit_chunk *data_list[SCTP_MAX_DATA_BUNDLING]; int no_fragmentflg, error; - int one_chunk, hbflag; + int one_chunk, hbflag, skip_data_for_this_net; int asconf, cookie, no_out_cnt; int bundle_at, ctl_cnt, no_data_chunks, cwnd_full_ind, eeor_mode; unsigned int mtu, r_mtu, omtu, mx_mtu, to_out; @@ -5515,7 +6724,11 @@ again_one_more_time: endoutchain = outchain = NULL; no_fragmentflg = 1; one_chunk = 0; - + if (net->dest_state & SCTP_ADDR_UNCONFIRMED) { + skip_data_for_this_net = 1; + } else { + skip_data_for_this_net = 0; + } if ((net->ro.ro_rt) && (net->ro.ro_rt->rt_ifp)) { /* * if we have a route and an ifp check to see if we @@ -5741,9 +6954,8 @@ again_one_more_time: */ sctp_move_to_an_alt(stcb, asoc, net); } - sctp_clean_up_ctl(stcb, asoc); *reason_code = 7; - return (error); + continue; } else asoc->ifp_had_enobuf = 0; /* Only HB or ASCONF advances time */ @@ -5804,7 +7016,7 @@ again_one_more_time: else omtu = 0; } - if (((asoc->state & SCTP_STATE_OPEN) == SCTP_STATE_OPEN) || + if ((((asoc->state & SCTP_STATE_OPEN) == SCTP_STATE_OPEN) && (skip_data_for_this_net == 0)) || (cookie)) { for (chk = TAILQ_FIRST(&asoc->send_queue); chk; chk = nchk) { if (no_data_chunks) { @@ -6009,9 +7221,8 @@ again_one_more_time: */ sctp_move_to_an_alt(stcb, asoc, net); } - sctp_clean_up_ctl(stcb, asoc); *reason_code = 6; - return (error); + continue; } else { asoc->ifp_had_enobuf = 0; } @@ -6907,7 +8118,11 @@ one_chunk_around: * flag since this flag dictates if we * subtracted from the fs */ - data_list[i]->rec.data.chunk_was_revoked = 0; + if (data_list[i]->rec.data.chunk_was_revoked) { + /* Deflate the cwnd */ + data_list[i]->whoTo->cwnd -= data_list[i]->book_size; + data_list[i]->rec.data.chunk_was_revoked = 0; + } data_list[i]->snd_count++; sctp_ucount_decr(asoc->sent_queue_retran_cnt); /* record the time */ @@ -7526,10 +8741,14 @@ sctp_send_sack(struct sctp_tcb *stcb) sctp_alloc_a_chunk(stcb, a_chk); if (a_chk == NULL) { /* No memory so we drop the idea, and set a timer */ - sctp_timer_stop(SCTP_TIMER_TYPE_RECV, - stcb->sctp_ep, stcb, NULL, SCTP_FROM_SCTP_OUTPUT + SCTP_LOC_5); - sctp_timer_start(SCTP_TIMER_TYPE_RECV, - stcb->sctp_ep, stcb, NULL); + if (stcb->asoc.delayed_ack) { + sctp_timer_stop(SCTP_TIMER_TYPE_RECV, + stcb->sctp_ep, stcb, NULL, SCTP_FROM_SCTP_OUTPUT + SCTP_LOC_5); + sctp_timer_start(SCTP_TIMER_TYPE_RECV, + stcb->sctp_ep, stcb, NULL); + } else { + stcb->asoc.send_sack = 1; + } return; } a_chk->copy_by_ref = 0; @@ -7537,6 +8756,9 @@ sctp_send_sack(struct sctp_tcb *stcb) a_chk->rec.chunk_id.id = SCTP_SELECTIVE_ACK; a_chk->rec.chunk_id.can_take_data = 1; } + /* Clear our pkt counts */ + asoc->data_pkts_seen = 0; + a_chk->asoc = asoc; a_chk->snd_count = 0; a_chk->send_size = 0; /* fill in later */ @@ -7595,10 +8817,14 @@ sctp_send_sack(struct sctp_tcb *stcb) if (a_chk->whoTo) atomic_subtract_int(&a_chk->whoTo->ref_count, 1); sctp_free_a_chunk(stcb, a_chk); - sctp_timer_stop(SCTP_TIMER_TYPE_RECV, - stcb->sctp_ep, stcb, NULL, SCTP_FROM_SCTP_OUTPUT + SCTP_LOC_6); - sctp_timer_start(SCTP_TIMER_TYPE_RECV, - stcb->sctp_ep, stcb, NULL); + if (stcb->asoc.delayed_ack) { + sctp_timer_stop(SCTP_TIMER_TYPE_RECV, + stcb->sctp_ep, stcb, NULL, SCTP_FROM_SCTP_OUTPUT + SCTP_LOC_6); + sctp_timer_start(SCTP_TIMER_TYPE_RECV, + stcb->sctp_ep, stcb, NULL); + } else { + stcb->asoc.send_sack = 1; + } return; } /* ok, lets go through and fill it in */ @@ -7737,6 +8963,7 @@ sctp_send_sack(struct sctp_tcb *stcb) sack->ch.chunk_length = htons(a_chk->send_size); TAILQ_INSERT_TAIL(&asoc->control_send_queue, a_chk, sctp_next); asoc->ctrl_queue_cnt++; + asoc->send_sack = 0; SCTP_STAT_INCR(sctps_sendsacks); return; } @@ -9214,7 +10441,6 @@ sctp_sosend(struct socket *so, } -extern unsigned int sctp_add_more_threshold; int sctp_lower_sosend(struct socket *so, struct sockaddr *addr, @@ -9381,6 +10607,8 @@ sctp_lower_sosend(struct socket *so, * UDP style, we must go ahead and start the INIT * process */ + uint32_t vrf; + if ((use_rcvinfo) && (srcv) && ((srcv->sinfo_flags & SCTP_ABORT) || ((srcv->sinfo_flags & SCTP_EOF) && @@ -9393,7 +10621,8 @@ sctp_lower_sosend(struct socket *so, goto out_unlocked; } /* get an asoc/stcb struct */ - stcb = sctp_aloc_assoc(inp, addr, 1, &error, 0); + vrf = SCTP_DEFAULT_VRFID; + stcb = sctp_aloc_assoc(inp, addr, 1, &error, 0, vrf); if (stcb == NULL) { /* Error is setup for us in the call */ goto out_unlocked; @@ -9522,7 +10751,7 @@ sctp_lower_sosend(struct socket *so, asoc = &stcb->asoc; } } - if (((so->so_state & SS_NBIO) + if ((SCTP_SO_IS_NBIO(so) || (flags & MSG_NBIO) )) { non_blocking = 1; diff --git a/sys/netinet/sctp_output.h b/sys/netinet/sctp_output.h index e6dca42..5daa406 100644 --- a/sys/netinet/sctp_output.h +++ b/sys/netinet/sctp_output.h @@ -39,6 +39,43 @@ __FBSDID("$FreeBSD$"); #include <netinet/sctp_header.h> #if defined(_KERNEL) + +struct mbuf * +sctp_get_mbuf_for_msg(unsigned int space_needed, + int want_header, int how, int allonebuf, int type); + + + +struct mbuf * +sctp_add_addresses_to_i_ia(struct sctp_inpcb *inp, + struct sctp_scoping *scope, + struct mbuf *m_at, + int cnt_inits_to); + + +int sctp_is_addr_restricted(struct sctp_tcb *, struct sctp_ifa *); + + +int +sctp_is_address_in_scope(struct sctp_ifa *ifa, + int ipv4_addr_legal, + int ipv6_addr_legal, + int loopback_scope, + int ipv4_local_scope, + int local_scope, + int site_scope, + int do_update); +int + sctp_is_addr_in_ep(struct sctp_inpcb *inp, struct sctp_ifa *ifa); + +struct sctp_ifa * +sctp_source_address_selection(struct sctp_inpcb *inp, + struct sctp_tcb *stcb, + struct route *ro, struct sctp_nets *net, + int non_asoc_addr_ok, uint32_t vrf_id); + + + void sctp_send_initiate(struct sctp_inpcb *, struct sctp_tcb *); void @@ -111,10 +148,6 @@ sctp_send_packet_dropped(struct sctp_tcb *, struct sctp_nets *, struct mbuf *, void sctp_send_cwr(struct sctp_tcb *, struct sctp_nets *, uint32_t); -struct mbuf * -sctp_get_mbuf_for_msg(unsigned int space_needed, - int want_header, int how, int allonebuf, int type); - void sctp_add_stream_reset_out(struct sctp_tmit_chunk *chk, int number_entries, uint16_t * list, diff --git a/sys/netinet/sctp_pcb.c b/sys/netinet/sctp_pcb.c index e68c2b8..82e6e24 100644 --- a/sys/netinet/sctp_pcb.c +++ b/sys/netinet/sctp_pcb.c @@ -36,6 +36,7 @@ __FBSDID("$FreeBSD$"); #include <netinet/sctp_os.h> #include <sys/proc.h> #include <netinet/sctp_var.h> +#include <netinet/sctp_sysctl.h> #include <netinet/sctp_pcb.h> #include <netinet/sctputil.h> #include <netinet/sctp.h> @@ -43,18 +44,9 @@ __FBSDID("$FreeBSD$"); #include <netinet/sctp_asconf.h> #include <netinet/sctp_output.h> #include <netinet/sctp_timer.h> +#include <netinet/sctp_bsd_addr.h> -#ifdef SCTP_DEBUG -uint32_t sctp_debug_on = 0; - -#endif /* SCTP_DEBUG */ - - -extern int sctp_pcbtblsize; -extern int sctp_hashtblsize; -extern int sctp_chunkscale; - struct sctp_epinfo sctppcbinfo; /* FIX: we don't handle multiple link local scopes */ @@ -72,7 +64,6 @@ SCTP6_ARE_ADDR_EQUAL(struct in6_addr *a, struct in6_addr *b) return (IN6_ARE_ADDR_EQUAL(&tmp_a, &tmp_b)); } - void sctp_fill_pcbinfo(struct sctp_pcbinfo *spcb) { @@ -93,6 +84,298 @@ sctp_fill_pcbinfo(struct sctp_pcbinfo *spcb) SCTP_INP_INFO_RUNLOCK(); } +/* + * Addresses are added to VRF's (Virtual Router's). For BSD we + * have only the default VRF 0. We maintain a hash list of + * VRF's. Each VRF has its own list of sctp_ifn's. Each of + * these has a list of addresses. When we add a new address + * to a VRF we lookup the ifn/ifn_index, if the ifn does + * not exist we create it and add it to the list of IFN's + * within the VRF. Once we have the sctp_ifn, we add the + * address to the list. So we look something like: + * + * hash-vrf-table + * vrf-> ifn-> ifn -> ifn + * vrf | + * ... +--ifa-> ifa -> ifa + * vrf + * + * We keep these seperate lists since the SCTP subsystem will + * point to these from its source address selection nets structure. + * When an address is deleted it does not happen right away on + * the SCTP side, it gets scheduled. What we do when a + * delete happens is immediately remove the address from + * the master list and decrement the refcount. As our + * addip iterator works through and frees the src address + * selection pointing to the sctp_ifa, eventually the refcount + * will reach 0 and we will delete it. Note that it is assumed + * that any locking on system level ifn/ifa is done at the + * caller of these functions and these routines will only + * lock the SCTP structures as they add or delete things. + * + * Other notes on VRF concepts. + * - An endpoint can be in multiple VRF's + * - An association lives within a VRF and only one VRF. + * - Any incoming packet we can deduce the VRF for by + * looking at the mbuf/pak inbound (for BSD its VRF=0 :D) + * - Any downward send call or connect call must supply the + * VRF via ancillary data or via some sort of set default + * VRF socket option call (again for BSD no brainer since + * the VRF is always 0). + * - An endpoint may add multiple VRF's to it. + * - Listening sockets can accept associations in any + * of the VRF's they are in but the assoc will end up + * in only one VRF (gotten from the packet or connect/send). + * + */ + +struct sctp_vrf * +sctp_allocate_vrf(int vrfid) +{ + struct sctp_vrf *vrf = NULL; + struct sctp_vrflist *bucket; + + /* First allocate the VRF structure */ + vrf = sctp_find_vrf(vrfid); + if (vrf) { + /* Already allocated */ + return (vrf); + } + SCTP_MALLOC(vrf, struct sctp_vrf *, sizeof(struct sctp_vrf), + "SCTP_VRF"); + if (vrf == NULL) { + /* No memory */ +#ifdef INVARIANTS + panic("No memory for VRF:%d", vrfid); +#endif + return (NULL); + } + /* setup the VRF */ + memset(vrf, 0, sizeof(struct sctp_vrf)); + vrf->vrf_id = vrfid; + LIST_INIT(&vrf->ifnlist); + vrf->total_ifa_count = 0; + /* Add it to the hash table */ + bucket = &sctppcbinfo.sctp_vrfhash[(vrfid & sctppcbinfo.hashvrfmark)]; + LIST_INSERT_HEAD(bucket, vrf, next_vrf); + return (vrf); +} + + +struct sctp_ifn * +sctp_find_ifn(struct sctp_vrf *vrf, void *ifn, uint32_t ifn_index) +{ + struct sctp_ifn *sctp_ifnp; + + /* + * We assume the lock is held for the addresses if thats wrong + * problems could occur :-) + */ + LIST_FOREACH(sctp_ifnp, &vrf->ifnlist, next_ifn) { + if (sctp_ifnp->ifn_index == ifn_index) { + return (sctp_ifnp); + } + if (sctp_ifnp->ifn_p && ifn && (sctp_ifnp->ifn_p == ifn)) { + return (sctp_ifnp); + } + } + return (NULL); +} + +struct sctp_vrf * +sctp_find_vrf(uint32_t vrfid) +{ + struct sctp_vrflist *bucket; + struct sctp_vrf *liste; + + bucket = &sctppcbinfo.sctp_vrfhash[(vrfid & sctppcbinfo.hashvrfmark)]; + LIST_FOREACH(liste, bucket, next_vrf) { + if (vrfid == liste->vrf_id) { + return (liste); + } + } + return (NULL); +} + +void +sctp_free_ifa(struct sctp_ifa *sctp_ifap) +{ + int ret; + + ret = atomic_fetchadd_int(&sctp_ifap->refcount, -1); + if (ret == 1) { + /* We zero'd the count */ + SCTP_FREE(sctp_ifap); + } +} + +struct sctp_ifa * +sctp_add_addr_to_vrf(uint32_t vrfid, void *ifn, uint32_t ifn_index, + uint32_t ifn_type, const char *if_name, + void *ifa, struct sockaddr *addr, uint32_t ifa_flags) +{ + struct sctp_vrf *vrf; + struct sctp_ifn *sctp_ifnp = NULL; + struct sctp_ifa *sctp_ifap = NULL; + + /* How granular do we need the locks to be here? */ + SCTP_IPI_ADDR_LOCK(); + vrf = sctp_find_vrf(vrfid); + if (vrf == NULL) { + vrf = sctp_allocate_vrf(vrfid); + if (vrf == NULL) { + SCTP_IPI_ADDR_UNLOCK(); + return (NULL); + } + } + sctp_ifnp = sctp_find_ifn(vrf, ifn, ifn_index); + if (sctp_ifnp == NULL) { + /* + * build one and add it, can't hold lock until after malloc + * done though. + */ + SCTP_IPI_ADDR_UNLOCK(); + SCTP_MALLOC(sctp_ifnp, struct sctp_ifn *, sizeof(struct sctp_ifn), "SCTP_IFN"); + if (sctp_ifnp == NULL) { +#ifdef INVARIANTS + panic("No memory for IFN:%u", sctp_ifnp->ifn_index); +#endif + return (NULL); + } + sctp_ifnp->ifn_index = ifn_index; + sctp_ifnp->ifn_p = ifn; + sctp_ifnp->ifn_type = ifn_type; + sctp_ifnp->ifa_count = 0; + sctp_ifnp->refcount = 0; + sctp_ifnp->vrf = vrf; + memcpy(sctp_ifnp->ifn_name, if_name, SCTP_IFNAMSIZ); + LIST_INIT(&sctp_ifnp->ifalist); + SCTP_IPI_ADDR_LOCK(); + LIST_INSERT_HEAD(&vrf->ifnlist, sctp_ifnp, next_ifn); + } + sctp_ifap = sctp_find_ifa_by_addr(addr, vrf->vrf_id, 1); + if (sctp_ifap) { + /* Hmm, it already exists? */ + if ((sctp_ifap->ifn_p) && + (sctp_ifap->ifn_p->ifn_index == ifn_index)) { + if (sctp_ifap->localifa_flags & SCTP_BEING_DELETED) { + /* easy to solve, just switch back to active */ + sctp_ifap->localifa_flags = SCTP_ADDR_VALID; + sctp_ifap->ifn_p = sctp_ifnp; + exit_stage_left: + SCTP_IPI_ADDR_UNLOCK(); + return (sctp_ifap); + } else { + goto exit_stage_left; + } + } else { + if (sctp_ifap->ifn_p) { + /* + * The first IFN gets the address, + * duplicates are ignored. + */ + goto exit_stage_left; + } else { + /* repair ifnp which was NULL ? */ + sctp_ifap->localifa_flags = SCTP_ADDR_VALID; + sctp_ifap->ifn_p = sctp_ifnp; + atomic_add_int(&sctp_ifnp->refcount, 1); + } + goto exit_stage_left; + } + } + SCTP_IPI_ADDR_UNLOCK(); + SCTP_MALLOC(sctp_ifap, struct sctp_ifa *, sizeof(struct sctp_ifa), "SCTP_IFA"); + if (sctp_ifap == NULL) { +#ifdef INVARIANTS + panic("No memory for IFA"); +#endif + return (NULL); + } + memset(sctp_ifap, 0, sizeof(sctp_ifap)); + sctp_ifap->ifn_p = sctp_ifnp; + atomic_add_int(&sctp_ifnp->refcount, 1); + + sctp_ifap->ifa = ifa; + memcpy(&sctp_ifap->address, addr, addr->sa_len); + sctp_ifap->localifa_flags = SCTP_ADDR_VALID | SCTP_ADDR_DEFER_USE; + sctp_ifap->flags = ifa_flags; + /* Set scope */ + if (sctp_ifap->address.sa.sa_family == AF_INET) { + struct sockaddr_in *sin; + + sin = (struct sockaddr_in *)&sctp_ifap->address.sin; + if (SCTP_IFN_IS_IFT_LOOP(sctp_ifap->ifn_p) || + (IN4_ISLOOPBACK_ADDRESS(&sin->sin_addr))) { + sctp_ifap->src_is_loop = 1; + } + if ((IN4_ISPRIVATE_ADDRESS(&sin->sin_addr))) { + sctp_ifap->src_is_priv = 1; + } + } else if (sctp_ifap->address.sa.sa_family == AF_INET6) { + /* ok to use deprecated addresses? */ + struct sockaddr_in6 *sin6; + + sin6 = (struct sockaddr_in6 *)&sctp_ifap->address.sin6; + if (SCTP_IFN_IS_IFT_LOOP(sctp_ifap->ifn_p) || + (IN6_IS_ADDR_LOOPBACK(&sin6->sin6_addr))) { + sctp_ifap->src_is_loop = 1; + } + if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) { + sctp_ifap->src_is_priv = 1; + } + } + if ((sctp_ifap->src_is_priv == 0) && + (sctp_ifap->src_is_loop == 0)) { + sctp_ifap->src_is_glob = 1; + } + SCTP_IPI_ADDR_LOCK(); + sctp_ifap->refcount = 1; + LIST_INSERT_HEAD(&sctp_ifnp->ifalist, sctp_ifap, next_ifa); + sctp_ifnp->ifa_count++; + vrf->total_ifa_count++; + SCTP_IPI_ADDR_UNLOCK(); + return (sctp_ifap); +} + +struct sctp_ifa * +sctp_del_addr_from_vrf(uint32_t vrfid, struct sockaddr *addr, + uint32_t ifn_index) +{ + struct sctp_vrf *vrf; + struct sctp_ifa *sctp_ifap = NULL; + struct sctp_ifn *sctp_ifnp = NULL; + + SCTP_IPI_ADDR_LOCK(); + + vrf = sctp_find_vrf(vrfid); + if (vrf == NULL) { + printf("Can't find vrfid:%d\n", vrfid); + goto out_now; + } + sctp_ifnp = sctp_find_ifn(vrf, (void *)NULL, ifn_index); + if (sctp_ifnp == NULL) { + sctp_ifap = sctp_find_ifa_by_addr(addr, vrf->vrf_id, 1); + } else { + sctp_ifap = sctp_find_ifa_in_ifn(sctp_ifnp, addr, 1); + } + + if (sctp_ifap) { + sctp_ifap->localifa_flags &= SCTP_ADDR_VALID; + sctp_ifap->localifa_flags |= SCTP_BEING_DELETED; + sctp_ifnp->ifa_count--; + vrf->total_ifa_count--; + LIST_REMOVE(sctp_ifap, next_ifa); + atomic_add_int(&sctp_ifnp->refcount, -1); + } else { + printf("Del Addr-ifn:%d Could not find address:", + ifn_index); + sctp_print_address(addr); + } +out_now: + SCTP_IPI_ADDR_UNLOCK(); + return (sctp_ifap); +} /* * Notes on locks for FreeBSD 5 and up. All association lookups that have a @@ -177,21 +460,20 @@ sctp_tcb_special_locate(struct sctp_inpcb **inp_p, struct sockaddr *from, #endif continue; } - if (laddr->ifa->ifa_addr == NULL) { + if (laddr->ifa->localifa_flags & SCTP_BEING_DELETED) { #ifdef SCTP_DEBUG if (sctp_debug_on & SCTP_DEBUG_PCB1) { - printf("ifa with a NULL address\n"); + printf("ifa being deleted\n"); } #endif continue; } - if (laddr->ifa->ifa_addr->sa_family == + if (laddr->ifa->address.sa.sa_family == to->sa_family) { /* see if it matches */ struct sockaddr_in *intf_addr, *sin; - intf_addr = (struct sockaddr_in *) - laddr->ifa->ifa_addr; + intf_addr = &laddr->ifa->address.sin; sin = (struct sockaddr_in *)to; if (from->sa_family == AF_INET) { if (sin->sin_addr.s_addr == @@ -205,8 +487,7 @@ sctp_tcb_special_locate(struct sctp_inpcb **inp_p, struct sockaddr *from, sin6 = (struct sockaddr_in6 *) to; - intf_addr6 = (struct sockaddr_in6 *) - laddr->ifa->ifa_addr; + intf_addr6 = &laddr->ifa->address.sin6; if (SCTP6_ARE_ADDR_EQUAL(&sin6->sin6_addr, &intf_addr6->sin6_addr)) { @@ -595,12 +876,13 @@ sctp_findassociation_ep_asocid(struct sctp_inpcb *inp, sctp_assoc_t asoc_id, int static struct sctp_inpcb * sctp_endpoint_probe(struct sockaddr *nam, struct sctppcbhead *head, - uint16_t lport) + uint16_t lport, uint32_t vrf_id) { struct sctp_inpcb *inp; struct sockaddr_in *sin; struct sockaddr_in6 *sin6; struct sctp_laddr *laddr; + int fnd; /* * Endpoing probe expects that the INP_INFO is locked. @@ -639,7 +921,14 @@ sctp_endpoint_probe(struct sockaddr *nam, struct sctppcbhead *head, SCTP_INP_RUNLOCK(inp); continue; } + /* does a VRF id match? */ + fnd = 0; + if (inp->def_vrf_id == vrf_id) + fnd = 1; + SCTP_INP_RUNLOCK(inp); + if (!fnd) + continue; return (inp); } SCTP_INP_RUNLOCK(inp); @@ -676,6 +965,15 @@ sctp_endpoint_probe(struct sockaddr *nam, struct sctppcbhead *head, SCTP_INP_RUNLOCK(inp); continue; } + /* does a VRF id match? */ + fnd = 0; + if (inp->def_vrf_id == vrf_id) + fnd = 1; + + if (!fnd) { + SCTP_INP_RUNLOCK(inp); + continue; + } LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) { if (laddr->ifa == NULL) { #ifdef SCTP_DEBUG @@ -691,20 +989,19 @@ sctp_endpoint_probe(struct sockaddr *nam, struct sctppcbhead *head, laddr->ifa); } #endif - if (laddr->ifa->ifa_addr == NULL) { + if (laddr->ifa->localifa_flags & SCTP_BEING_DELETED) { #ifdef SCTP_DEBUG if (sctp_debug_on & SCTP_DEBUG_PCB1) { - printf("Huh IFA as an ifa_addr=NULL, "); + printf("Huh IFA being deleted\n"); } #endif continue; } - if (laddr->ifa->ifa_addr->sa_family == nam->sa_family) { + if (laddr->ifa->address.sa.sa_family == nam->sa_family) { /* possible, see if it matches */ struct sockaddr_in *intf_addr; - intf_addr = (struct sockaddr_in *) - laddr->ifa->ifa_addr; + intf_addr = &laddr->ifa->address.sin; if (nam->sa_family == AF_INET) { if (sin->sin_addr.s_addr == intf_addr->sin_addr.s_addr) { @@ -714,8 +1011,7 @@ sctp_endpoint_probe(struct sockaddr *nam, struct sctppcbhead *head, } else if (nam->sa_family == AF_INET6) { struct sockaddr_in6 *intf_addr6; - intf_addr6 = (struct sockaddr_in6 *) - laddr->ifa->ifa_addr; + intf_addr6 = &laddr->ifa->address.sin6; if (SCTP6_ARE_ADDR_EQUAL(&sin6->sin6_addr, &intf_addr6->sin6_addr)) { SCTP_INP_RUNLOCK(inp); @@ -731,7 +1027,7 @@ sctp_endpoint_probe(struct sockaddr *nam, struct sctppcbhead *head, struct sctp_inpcb * -sctp_pcb_findep(struct sockaddr *nam, int find_tcp_pool, int have_lock) +sctp_pcb_findep(struct sockaddr *nam, int find_tcp_pool, int have_lock, uint32_t vrf_id) { /* * First we check the hash table to see if someone has this port @@ -765,7 +1061,7 @@ sctp_pcb_findep(struct sockaddr *nam, int find_tcp_pool, int have_lock) } head = &sctppcbinfo.sctp_ephash[SCTP_PCBHASH_ALLADDR(lport, sctppcbinfo.hashmark)]; - inp = sctp_endpoint_probe(nam, head, lport); + inp = sctp_endpoint_probe(nam, head, lport, vrf_id); /* * If the TCP model exists it could be that the main listening @@ -786,7 +1082,7 @@ sctp_pcb_findep(struct sockaddr *nam, int find_tcp_pool, int have_lock) */ head = &sctppcbinfo.sctp_tcpephash[i]; if (LIST_FIRST(head)) { - inp = sctp_endpoint_probe(nam, head, lport); + inp = sctp_endpoint_probe(nam, head, lport, vrf_id); if (inp) { /* Found one */ break; @@ -810,7 +1106,7 @@ sctp_pcb_findep(struct sockaddr *nam, int find_tcp_pool, int have_lock) */ struct sctp_tcb * sctp_findassociation_addr_sa(struct sockaddr *to, struct sockaddr *from, - struct sctp_inpcb **inp_p, struct sctp_nets **netp, int find_tcp_pool) + struct sctp_inpcb **inp_p, struct sctp_nets **netp, int find_tcp_pool, uint32_t vrf_id) { struct sctp_inpcb *inp; struct sctp_tcb *retval; @@ -827,7 +1123,7 @@ sctp_findassociation_addr_sa(struct sockaddr *to, struct sockaddr *from, return (retval); } } - inp = sctp_pcb_findep(to, 0, 1); + inp = sctp_pcb_findep(to, 0, 1, vrf_id); if (inp_p != NULL) { *inp_p = inp; } @@ -1027,8 +1323,9 @@ sctp_findassociation_addr(struct mbuf *m, int iphlen, int offset, struct sockaddr *to = (struct sockaddr *)&to_store; struct sockaddr *from = (struct sockaddr *)&from_store; struct sctp_inpcb *inp; + uint32_t vrf_id; - + vrf_id = SCTP_DEFAULT_VRFID; iph = mtod(m, struct ip *); if (iph->ip_v == IPVERSION) { /* its IPv4 */ @@ -1111,11 +1408,11 @@ sctp_findassociation_addr(struct mbuf *m, int iphlen, int offset, } if (inp_p) { retval = sctp_findassociation_addr_sa(to, from, inp_p, netp, - find_tcp_pool); + find_tcp_pool, vrf_id); inp = *inp_p; } else { retval = sctp_findassociation_addr_sa(to, from, &inp, netp, - find_tcp_pool); + find_tcp_pool, vrf_id); } #ifdef SCTP_DEBUG if (sctp_debug_on & SCTP_DEBUG_PCB1) { @@ -1282,24 +1579,6 @@ sctp_findassociation_ep_asconf(struct mbuf *m, int iphlen, int offset, } -extern int sctp_max_burst_default; - -extern unsigned int sctp_delayed_sack_time_default; -extern unsigned int sctp_heartbeat_interval_default; -extern unsigned int sctp_pmtu_raise_time_default; -extern unsigned int sctp_shutdown_guard_time_default; -extern unsigned int sctp_secret_lifetime_default; - -extern unsigned int sctp_rto_max_default; -extern unsigned int sctp_rto_min_default; -extern unsigned int sctp_rto_initial_default; -extern unsigned int sctp_init_rto_max_default; -extern unsigned int sctp_valid_cookie_life_default; -extern unsigned int sctp_init_rtx_max_default; -extern unsigned int sctp_assoc_rtx_max_default; -extern unsigned int sctp_path_rtx_max_default; -extern unsigned int sctp_nr_outgoing_streams_default; - /* * allocate a sctp_inpcb and setup a temporary binding to a port/all * addresses. This way if we don't get a bind we by default pick a ephemeral @@ -1364,21 +1643,21 @@ sctp_inpcb_alloc(struct socket *so) so->so_pcb = (caddr_t)inp; - if ((so->so_type == SOCK_DGRAM) || - (so->so_type == SOCK_SEQPACKET)) { + if ((SCTP_SO_TYPE(so) == SOCK_DGRAM) || + (SCTP_SO_TYPE(so) == SOCK_SEQPACKET)) { /* UDP style socket */ inp->sctp_flags = (SCTP_PCB_FLAGS_UDPTYPE | SCTP_PCB_FLAGS_UNBOUND); sctp_feature_on(inp, SCTP_PCB_FLAGS_RECVDATAIOEVNT); /* Be sure it is NON-BLOCKING IO for UDP */ - /* so->so_state |= SS_NBIO; */ - } else if (so->so_type == SOCK_STREAM) { + /* SCTP_SET_SO_NBIO(so); */ + } else if (SCTP_SO_TYPE(so) == SOCK_STREAM) { /* TCP style socket */ inp->sctp_flags = (SCTP_PCB_FLAGS_TCPTYPE | SCTP_PCB_FLAGS_UNBOUND); sctp_feature_on(inp, SCTP_PCB_FLAGS_RECVDATAIOEVNT); /* Be sure we have blocking IO by default */ - so->so_state &= ~SS_NBIO; + SCTP_CLEAR_SO_NBIO(so); } else { /* * unsupported socket type (RAW, etc)- in case we missed it @@ -1394,6 +1673,8 @@ sctp_inpcb_alloc(struct socket *so) SCTP_ZONE_FREE(sctppcbinfo.ipi_zone_ep, inp); return (ENOBUFS); } + inp->def_vrf_id = SCTP_DEFAULT_VRFID; + SCTP_INP_INFO_WLOCK(); SCTP_INP_LOCK_INIT(inp); SCTP_INP_READ_INIT(inp); @@ -1407,6 +1688,7 @@ sctp_inpcb_alloc(struct socket *so) TAILQ_INIT(&inp->read_queue); LIST_INIT(&inp->sctp_addr_list); + LIST_INIT(&inp->sctp_asoc_list); #ifdef SCTP_TRACK_FREED_ASOCS @@ -1433,6 +1715,7 @@ sctp_inpcb_alloc(struct socket *so) m->sctp_minrto = sctp_rto_min_default; m->initial_rto = sctp_rto_initial_default; m->initial_init_rto_max = sctp_init_rto_max_default; + m->sctp_sack_freq = sctp_sack_freq_default; m->max_open_streams_intome = MAX_SCTP_STREAMS; @@ -1470,7 +1753,6 @@ sctp_inpcb_alloc(struct socket *so) /* How long is a cookie good for ? */ m->def_cookie_life = sctp_valid_cookie_life_default; - /* * Initialize authentication parameters */ @@ -1601,10 +1883,11 @@ sctp_move_pcb_and_assoc(struct sctp_inpcb *old_inp, struct sctp_inpcb *new_inp, } static int -sctp_isport_inuse(struct sctp_inpcb *inp, uint16_t lport) +sctp_isport_inuse(struct sctp_inpcb *inp, uint16_t lport, uint32_t vrf_id) { struct sctppcbhead *head; struct sctp_inpcb *t_inp; + int fnd; head = &sctppcbinfo.sctp_ephash[SCTP_PCBHASH_ALLADDR(lport, sctppcbinfo.hashmark)]; @@ -1613,6 +1896,13 @@ sctp_isport_inuse(struct sctp_inpcb *inp, uint16_t lport) if (t_inp->sctp_lport != lport) { continue; } + /* is it in the VRF in question */ + fnd = 0; + if (t_inp->def_vrf_id == vrf_id) + fnd = 1; + if (!fnd) + continue; + /* This one is in use. */ /* check the v6/v4 binding issue */ if ((t_inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) && @@ -1653,6 +1943,7 @@ sctp_inpcb_bind(struct socket *so, struct sockaddr *addr, struct thread *p) int bindall; uint16_t lport; int error; + uint32_t vrf_id; lport = 0; error = 0; @@ -1712,6 +2003,11 @@ sctp_inpcb_bind(struct socket *so, struct sockaddr *addr, struct thread *p) return (EAFNOSUPPORT); } } + /* + * Setup a vrf_id to be the default for the non-bind-all case. + */ + vrf_id = inp->def_vrf_id; + SCTP_INP_INFO_WLOCK(); SCTP_INP_WLOCK(inp); /* increase our count due to the unlock we do */ @@ -1724,8 +2020,10 @@ sctp_inpcb_bind(struct socket *so, struct sockaddr *addr, struct thread *p) /* got to be root to get at low ports */ if (ntohs(lport) < IPPORT_RESERVED) { if (p && (error = - priv_check(p, - PRIV_NETINET_RESERVEDPORT) + priv_check_cred(p->td_ucred, + PRIV_NETINET_RESERVEDPORT, + SUSER_ALLOWJAIL + ) )) { SCTP_INP_DECR_REF(inp); SCTP_INP_WUNLOCK(inp); @@ -1740,24 +2038,44 @@ sctp_inpcb_bind(struct socket *so, struct sockaddr *addr, struct thread *p) return (error); } SCTP_INP_WUNLOCK(inp); - inp_tmp = sctp_pcb_findep(addr, 0, 1); - if (inp_tmp != NULL) { - /* - * lock guy returned and lower count note that we - * are not bound so inp_tmp should NEVER be inp. And - * it is this inp (inp_tmp) that gets the reference - * bump, so we must lower it. - */ - SCTP_INP_DECR_REF(inp_tmp); - SCTP_INP_DECR_REF(inp); - /* unlock info */ - SCTP_INP_INFO_WUNLOCK(); - return (EADDRNOTAVAIL); + if (bindall) { + vrf_id = inp->def_vrf_id; + inp_tmp = sctp_pcb_findep(addr, 0, 1, vrf_id); + if (inp_tmp != NULL) { + /* + * lock guy returned and lower count note + * that we are not bound so inp_tmp should + * NEVER be inp. And it is this inp + * (inp_tmp) that gets the reference bump, + * so we must lower it. + */ + SCTP_INP_DECR_REF(inp_tmp); + SCTP_INP_DECR_REF(inp); + /* unlock info */ + SCTP_INP_INFO_WUNLOCK(); + return (EADDRNOTAVAIL); + } + } else { + inp_tmp = sctp_pcb_findep(addr, 0, 1, vrf_id); + if (inp_tmp != NULL) { + /* + * lock guy returned and lower count note + * that we are not bound so inp_tmp should + * NEVER be inp. And it is this inp + * (inp_tmp) that gets the reference bump, + * so we must lower it. + */ + SCTP_INP_DECR_REF(inp_tmp); + SCTP_INP_DECR_REF(inp); + /* unlock info */ + SCTP_INP_INFO_WUNLOCK(); + return (EADDRNOTAVAIL); + } } SCTP_INP_WLOCK(inp); if (bindall) { /* verify that no lport is not used by a singleton */ - if (sctp_isport_inuse(inp, lport)) { + if (sctp_isport_inuse(inp, lport, vrf_id)) { /* Sorry someone already has this one bound */ SCTP_INP_DECR_REF(inp); SCTP_INP_WUNLOCK(inp); @@ -1778,6 +2096,7 @@ sctp_inpcb_bind(struct socket *so, struct sockaddr *addr, struct thread *p) uint32_t port_guess; uint16_t port_attempt; int not_done = 1; + int not_found = 1; while (not_done) { port_guess = sctp_select_initial_TSN(&inp->sctp_ep); @@ -1788,8 +2107,14 @@ sctp_inpcb_bind(struct socket *so, struct sockaddr *addr, struct thread *p) if (port_attempt < IPPORT_RESERVED) { port_attempt += IPPORT_RESERVED; } - if (sctp_isport_inuse(inp, htons(port_attempt)) == 0) { + vrf_id = inp->def_vrf_id; + if (sctp_isport_inuse(inp, htons(port_attempt), vrf_id) == 1) { /* got a port we can use */ + not_found = 0; + break; + } + if (not_found == 1) { + /* We can use this port */ not_done = 0; continue; } @@ -1802,8 +2127,14 @@ sctp_inpcb_bind(struct socket *so, struct sockaddr *addr, struct thread *p) if (port_attempt < IPPORT_RESERVED) { port_attempt += IPPORT_RESERVED; } - if (sctp_isport_inuse(inp, htons(port_attempt)) == 0) { + vrf_id = inp->def_vrf_id; + if (sctp_isport_inuse(inp, htons(port_attempt), vrf_id) == 1) { /* got a port we can use */ + not_found = 0; + break; + } + if (not_found == 1) { + /* We can use this port */ not_done = 0; continue; } @@ -1818,8 +2149,14 @@ sctp_inpcb_bind(struct socket *so, struct sockaddr *addr, struct thread *p) if (port_attempt < IPPORT_RESERVED) { port_attempt += IPPORT_RESERVED; } - if (sctp_isport_inuse(inp, htons(port_attempt)) == 0) { + vrf_id = inp->def_vrf_id; + if (sctp_isport_inuse(inp, htons(port_attempt), vrf_id) == 1) { /* got a port we can use */ + not_found = 0; + break; + } + if (not_found == 1) { + /* We can use this port */ not_done = 0; continue; } @@ -1860,7 +2197,7 @@ sctp_inpcb_bind(struct socket *so, struct sockaddr *addr, struct thread *p) * well. It will also have to do the embed scope kame hack * too (before adding). */ - struct ifaddr *ifa; + struct sctp_ifa *ifa; struct sockaddr_storage store_sa; memset(&store_sa, 0, sizeof(store_sa)); @@ -1882,7 +2219,7 @@ sctp_inpcb_bind(struct socket *so, struct sockaddr *addr, struct thread *p) * zero out the port to find the address! yuck! can't do * this earlier since need port for sctp_pcb_findep() */ - ifa = sctp_find_ifa_by_addr((struct sockaddr *)&store_sa); + ifa = sctp_find_ifa_by_addr((struct sockaddr *)&store_sa, vrf_id, 0); if (ifa == NULL) { /* Can't find an interface with that address */ SCTP_INP_WUNLOCK(inp); @@ -1890,16 +2227,8 @@ sctp_inpcb_bind(struct socket *so, struct sockaddr *addr, struct thread *p) return (EADDRNOTAVAIL); } if (addr->sa_family == AF_INET6) { - struct in6_ifaddr *ifa6; - - ifa6 = (struct in6_ifaddr *)ifa; - /* - * allow binding of deprecated addresses as per RFC - * 2462 and ipng discussion - */ - if (ifa6->ia6_flags & (IN6_IFF_DETACHED | - IN6_IFF_ANYCAST | - IN6_IFF_NOTREADY)) { + /* GAK, more FIXME IFA lock? */ + if (ifa->localifa_flags & SCTP_ADDR_IFA_UNUSEABLE) { /* Can't bind a non-existent addr. */ SCTP_INP_WUNLOCK(inp); SCTP_INP_INFO_WUNLOCK(); @@ -1909,15 +2238,19 @@ sctp_inpcb_bind(struct socket *so, struct sockaddr *addr, struct thread *p) /* we're not bound all */ inp->sctp_flags &= ~SCTP_PCB_FLAGS_BOUNDALL; /* set the automatic addr changes from kernel flag */ + sctp_feature_on(inp, SCTP_PCB_FLAGS_DO_ASCONF); if (sctp_auto_asconf == 0) { sctp_feature_off(inp, SCTP_PCB_FLAGS_AUTO_ASCONF); } else { + /* + * allow bindx() to send ASCONF's for binding + * changes + */ sctp_feature_on(inp, SCTP_PCB_FLAGS_AUTO_ASCONF); } - /* allow bindx() to send ASCONF's for binding changes */ - sctp_feature_on(inp, SCTP_PCB_FLAGS_DO_ASCONF); + /* add this address to the endpoint list */ - error = sctp_insert_laddr(&inp->sctp_addr_list, ifa); + error = sctp_insert_laddr(&inp->sctp_addr_list, ifa, 0); if (error != 0) { SCTP_INP_WUNLOCK(inp); SCTP_INP_INFO_WUNLOCK(); @@ -1964,7 +2297,7 @@ sctp_iterator_inp_being_freed(struct sctp_inpcb *inp, struct sctp_inpcb *inp_nex * those guys. The list of iterators should never be very big * though. */ - LIST_FOREACH(it, &sctppcbinfo.iteratorhead, sctp_nxt_itr) { + TAILQ_FOREACH(it, &sctppcbinfo.iteratorhead, sctp_nxt_itr) { if (it == inp->inp_starting_point_for_iterator) /* skip this guy, he's special */ continue; @@ -2393,9 +2726,7 @@ sctp_inpcb_free(struct sctp_inpcb *inp, int immediate, int from) for ((laddr = LIST_FIRST(&inp->sctp_addr_list)); laddr != NULL; laddr = nladdr) { nladdr = LIST_NEXT(laddr, sctp_nxt_addr); - LIST_REMOVE(laddr, sctp_nxt_addr); - SCTP_ZONE_FREE(sctppcbinfo.ipi_zone_laddr, laddr); - SCTP_DECR_LADDR_COUNT(); + sctp_remove_laddr(laddr); } #ifdef SCTP_TRACK_FREED_ASOCS @@ -2448,48 +2779,27 @@ sctp_findnet(struct sctp_tcb *stcb, struct sockaddr *addr) * stats of stuff. */ int -sctp_is_address_on_local_host(struct sockaddr *addr) +sctp_is_address_on_local_host(struct sockaddr *addr, uint32_t vrf_id) { - struct ifnet *ifn; - struct ifaddr *ifa; - - TAILQ_FOREACH(ifn, &ifnet, if_list) { - TAILQ_FOREACH(ifa, &ifn->if_addrlist, ifa_list) { - if (addr->sa_family == ifa->ifa_addr->sa_family) { - /* same family */ - if (addr->sa_family == AF_INET) { - struct sockaddr_in *sin, *sin_c; - - sin = (struct sockaddr_in *)addr; - sin_c = (struct sockaddr_in *) - ifa->ifa_addr; - if (sin->sin_addr.s_addr == - sin_c->sin_addr.s_addr) { - /* - * we are on the same - * machine - */ - return (1); - } - } else if (addr->sa_family == AF_INET6) { - struct sockaddr_in6 *sin6, *sin_c6; + struct sctp_ifa *sctp_ifa; - sin6 = (struct sockaddr_in6 *)addr; - sin_c6 = (struct sockaddr_in6 *) - ifa->ifa_addr; - if (SCTP6_ARE_ADDR_EQUAL(&sin6->sin6_addr, - &sin_c6->sin6_addr)) { - /* - * we are on the same - * machine - */ - return (1); - } - } - } - } + sctp_ifa = sctp_find_ifa_by_addr(addr, vrf_id, 0); + if (sctp_ifa) { + return (1); + } else { + return (0); } - return (0); +} + +void +sctp_set_initial_cc_param(struct sctp_tcb *stcb, struct sctp_nets *net) +{ + net->cwnd = min((net->mtu * 4), max((2 * net->mtu), SCTP_INITIAL_CWND)); + /* we always get at LEAST 2 MTU's */ + if (net->cwnd < (2 * net->mtu)) { + net->cwnd = 2 * net->mtu; + } + net->ssthresh = stcb->asoc.peers_rwnd; } int @@ -2554,23 +2864,7 @@ sctp_add_remote_addr(struct sctp_tcb *stcb, struct sockaddr *newaddr, stcb->asoc.ipv4_local_scope = 1; } #endif /* SCTP_DONT_DO_PRIVADDR_SCOPE */ - - if (sctp_is_address_on_local_host(newaddr)) { - stcb->asoc.loopback_scope = 1; - stcb->asoc.ipv4_local_scope = 1; - stcb->asoc.local_scope = 1; - stcb->asoc.site_scope = 1; - } } else { - if (from == SCTP_ADDR_IS_CONFIRMED) { - /* From connectx */ - if (sctp_is_address_on_local_host(newaddr)) { - stcb->asoc.loopback_scope = 1; - stcb->asoc.ipv4_local_scope = 1; - stcb->asoc.local_scope = 1; - stcb->asoc.site_scope = 1; - } - } /* Validate the address is in scope */ if ((IN4_ISPRIVATE_ADDRESS(&sin->sin_addr)) && (stcb->asoc.ipv4_local_scope == 0)) { @@ -2588,9 +2882,9 @@ sctp_add_remote_addr(struct sctp_tcb *stcb, struct sockaddr *newaddr, /* assure len is set */ sin6->sin6_len = sizeof(struct sockaddr_in6); if (set_scope) { - if (sctp_is_address_on_local_host(newaddr)) { + if (sctp_is_address_on_local_host(newaddr, stcb->asoc.vrf_id)) { stcb->asoc.loopback_scope = 1; - stcb->asoc.local_scope = 1; + stcb->asoc.local_scope = 0; stcb->asoc.ipv4_local_scope = 1; stcb->asoc.site_scope = 1; } else if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) { @@ -2612,15 +2906,6 @@ sctp_add_remote_addr(struct sctp_tcb *stcb, struct sockaddr *newaddr, stcb->asoc.site_scope = 1; } } else { - if (from == SCTP_ADDR_IS_CONFIRMED) { - /* From connectx so we check for localhost. */ - if (sctp_is_address_on_local_host(newaddr)) { - stcb->asoc.loopback_scope = 1; - stcb->asoc.ipv4_local_scope = 1; - stcb->asoc.local_scope = 1; - stcb->asoc.site_scope = 1; - } - } /* Validate the address is in scope */ if (IN6_IS_ADDR_LOOPBACK(&sin6->sin6_addr) && (stcb->asoc.loopback_scope == 0)) { @@ -2650,7 +2935,14 @@ sctp_add_remote_addr(struct sctp_tcb *stcb, struct sockaddr *newaddr, } else if (newaddr->sa_family == AF_INET6) { ((struct sockaddr_in6 *)&net->ro._l_addr)->sin6_port = stcb->rport; } - net->addr_is_local = sctp_is_address_on_local_host(newaddr); + net->addr_is_local = sctp_is_address_on_local_host(newaddr, stcb->asoc.vrf_id); + if (net->addr_is_local && ((set_scope || (from == SCTP_ADDR_IS_CONFIRMED)))) { + stcb->asoc.loopback_scope = 1; + stcb->asoc.ipv4_local_scope = 1; + stcb->asoc.local_scope = 0; + stcb->asoc.site_scope = 1; + addr_inscope = 1; + } net->failure_threshold = stcb->asoc.def_net_failure; if (addr_inscope == 0) { net->dest_state = (SCTP_ADDR_REACHABLE | @@ -2667,11 +2959,11 @@ sctp_add_remote_addr(struct sctp_tcb *stcb, struct sockaddr *newaddr, stcb->asoc.numnets++; *(&net->ref_count) = 1; net->tos_flowlabel = 0; -#ifdef AF_INET +#ifdef INET if (newaddr->sa_family == AF_INET) net->tos_flowlabel = stcb->asoc.default_tos; #endif -#ifdef AF_INET6 +#ifdef INET6 if (newaddr->sa_family == AF_INET6) net->tos_flowlabel = stcb->asoc.default_flowlabel; #endif @@ -2715,13 +3007,8 @@ sctp_add_remote_addr(struct sctp_tcb *stcb, struct sockaddr *newaddr, * We take the max of the burst limit times a MTU or the * INITIAL_CWND. We then limit this to 4 MTU's of sending. */ - net->cwnd = min((net->mtu * 4), max((2 * net->mtu), SCTP_INITIAL_CWND)); + sctp_set_initial_cc_param(stcb, net); - /* we always get at LEAST 2 MTU's */ - if (net->cwnd < (2 * net->mtu)) { - net->cwnd = 2 * net->mtu; - } - net->ssthresh = stcb->asoc.peers_rwnd; #if defined(SCTP_CWND_MONITOR) || defined(SCTP_CWND_LOGGING) sctp_log_cwnd(stcb, net, 0, SCTP_CWND_INITIALIZATION); @@ -2820,7 +3107,7 @@ sctp_add_remote_addr(struct sctp_tcb *stcb, struct sockaddr *newaddr, */ struct sctp_tcb * sctp_aloc_assoc(struct sctp_inpcb *inp, struct sockaddr *firstaddr, - int for_a_init, int *error, uint32_t override_tag) + int for_a_init, int *error, uint32_t override_tag, uint32_t vrf) { struct sctp_tcb *stcb; struct sctp_association *asoc; @@ -2920,7 +3207,7 @@ sctp_aloc_assoc(struct sctp_inpcb *inp, struct sockaddr *firstaddr, /* setup back pointer's */ stcb->sctp_ep = inp; stcb->sctp_socket = inp->sctp_socket; - if ((err = sctp_init_asoc(inp, asoc, for_a_init, override_tag))) { + if ((err = sctp_init_asoc(inp, asoc, for_a_init, override_tag, vrf))) { /* failed */ SCTP_TCB_LOCK_DESTROY(stcb); SCTP_TCB_SEND_LOCK_DESTROY(stcb); @@ -3001,15 +3288,13 @@ sctp_remove_net(struct sctp_tcb *stcb, struct sctp_nets *net) asoc = &stcb->asoc; asoc->numnets--; TAILQ_REMOVE(&asoc->nets, net, sctp_next); - sctp_free_remote_addr(net); if (net == asoc->primary_destination) { /* Reset primary */ struct sctp_nets *lnet; lnet = TAILQ_FIRST(&asoc->nets); /* Try to find a confirmed primary */ - asoc->primary_destination = sctp_find_alternate_net(stcb, lnet, - 0); + asoc->primary_destination = sctp_find_alternate_net(stcb, lnet, 0); } if (net == asoc->last_data_chunk_from) { /* Reset primary */ @@ -3019,10 +3304,7 @@ sctp_remove_net(struct sctp_tcb *stcb, struct sctp_nets *net) /* Clear net */ asoc->last_control_chunk_from = NULL; } -/* if (net == asoc->asconf_last_sent_to) {*/ - /* Reset primary */ -/* asoc->asconf_last_sent_to = TAILQ_FIRST(&asoc->nets);*/ -/* }*/ + sctp_free_remote_addr(net); } /* @@ -3631,13 +3913,11 @@ sctp_free_assoc(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int from_inpcbfre sctp_free_remote_addr(net); } - /* local addresses, if any */ - while (!SCTP_LIST_EMPTY(&asoc->sctp_local_addr_list)) { - laddr = LIST_FIRST(&asoc->sctp_local_addr_list); - LIST_REMOVE(laddr, sctp_nxt_addr); - SCTP_ZONE_FREE(sctppcbinfo.ipi_zone_laddr, laddr); - SCTP_DECR_LADDR_COUNT(); + while (!SCTP_LIST_EMPTY(&asoc->sctp_restricted_addrs)) { + laddr = LIST_FIRST(&asoc->sctp_restricted_addrs); + sctp_remove_laddr(laddr); } + /* pending asconf (address) parameters */ while (!TAILQ_EMPTY(&asoc->asconf_queue)) { aparam = TAILQ_FIRST(&asoc->asconf_queue); @@ -3791,12 +4071,12 @@ sctp_update_ep_vflag(struct sctp_inpcb *inp) #endif /* SCTP_DEBUG */ continue; } - if (laddr->ifa->ifa_addr) { + if (laddr->ifa->localifa_flags & SCTP_BEING_DELETED) { continue; } - if (laddr->ifa->ifa_addr->sa_family == AF_INET6) { + if (laddr->ifa->address.sa.sa_family == AF_INET6) { inp->ip_inp.inp.inp_vflag |= INP_IPV6; - } else if (laddr->ifa->ifa_addr->sa_family == AF_INET) { + } else if (laddr->ifa->address.sa.sa_family == AF_INET) { inp->ip_inp.inp.inp_vflag |= INP_IPV4; } } @@ -3807,7 +4087,7 @@ sctp_update_ep_vflag(struct sctp_inpcb *inp) * done if we are bound to all addresses */ int -sctp_add_local_addr_ep(struct sctp_inpcb *inp, struct ifaddr *ifa) +sctp_add_local_addr_ep(struct sctp_inpcb *inp, struct sctp_ifa *ifa, uint32_t action) { struct sctp_laddr *laddr; int fnd, error; @@ -3818,14 +4098,11 @@ sctp_add_local_addr_ep(struct sctp_inpcb *inp, struct ifaddr *ifa) /* You are already bound to all. You have it already */ return (0); } - if (ifa->ifa_addr->sa_family == AF_INET6) { - struct in6_ifaddr *ifa6; - - ifa6 = (struct in6_ifaddr *)ifa; - if (ifa6->ia6_flags & (IN6_IFF_DETACHED | - IN6_IFF_DEPRECATED | IN6_IFF_ANYCAST | IN6_IFF_NOTREADY)) - /* Can't bind a non-existent addr. */ + if (ifa->address.sa.sa_family == AF_INET6) { + if (ifa->localifa_flags & SCTP_ADDR_IFA_UNUSEABLE) { + /* Can't bind a non-useable addr. */ return (-1); + } } /* first, is it already present? */ LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) { @@ -3835,16 +4112,16 @@ sctp_add_local_addr_ep(struct sctp_inpcb *inp, struct ifaddr *ifa) } } - if (((inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) == 0) && (fnd == 0)) { - /* Not bound to all */ - error = sctp_insert_laddr(&inp->sctp_addr_list, ifa); + if (fnd == 0) { + /* Not in the ep list */ + error = sctp_insert_laddr(&inp->sctp_addr_list, ifa, action); if (error != 0) return (error); inp->laddr_count++; /* update inp_vflag flags */ - if (ifa->ifa_addr->sa_family == AF_INET6) { + if (ifa->address.sa.sa_family == AF_INET6) { inp->ip_inp.inp.inp_vflag |= INP_IPV6; - } else if (ifa->ifa_addr->sa_family == AF_INET) { + } else if (ifa->address.sa.sa_family == AF_INET) { inp->ip_inp.inp.inp_vflag |= INP_IPV4; } } @@ -3881,7 +4158,7 @@ sctp_select_primary_destination(struct sctp_tcb *stcb) * to be done if we are bound to all addresses */ int -sctp_del_local_addr_ep(struct sctp_inpcb *inp, struct ifaddr *ifa) +sctp_del_local_addr_ep(struct sctp_inpcb *inp, struct sctp_ifa *ifa) { struct sctp_laddr *laddr; int fnd; @@ -3901,7 +4178,7 @@ sctp_del_local_addr_ep(struct sctp_inpcb *inp, struct ifaddr *ifa) /* can't delete unless there are at LEAST 2 addresses */ return (-1); } - if (((inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) == 0) && (fnd)) { + if (fnd) { /* * clean up any use of this address go through our * associations and clear any last_used_address that match @@ -3917,29 +4194,40 @@ sctp_del_local_addr_ep(struct sctp_inpcb *inp, struct ifaddr *ifa) /* clean up "last_used_address" */ LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) { + struct sctp_nets *net; + + SCTP_TCB_LOCK(stcb); if (stcb->asoc.last_used_address == laddr) /* delete this address */ stcb->asoc.last_used_address = NULL; + /* + * Now spin through all the nets and purge any ref + * to laddr + */ + TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { + if (net->ro._s_addr && + (net->ro._s_addr->ifa == laddr->ifa)) { + /* Yep, purge src address selected */ + struct rtentry *rt; + + /* delete this address if cached */ + rt = net->ro.ro_rt; + if (rt != NULL) { + RTFREE(rt); + net->ro.ro_rt = NULL; + } + sctp_free_ifa(net->ro._s_addr); + net->ro._s_addr = NULL; + net->src_addr_selected = 0; + } + } + SCTP_TCB_UNLOCK(stcb); } /* for each tcb */ - /* remove it from the ep list */ sctp_remove_laddr(laddr); inp->laddr_count--; /* update inp_vflag flags */ sctp_update_ep_vflag(inp); - /* select a new primary destination if needed */ - LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) { - /* - * presume caller (sctp_asconf.c) already owns INP - * lock - */ - SCTP_TCB_LOCK(stcb); - if (sctp_destination_is_reachable(stcb, - (struct sockaddr *)&stcb->asoc.primary_destination->ro._l_addr) == 0) { - sctp_select_primary_destination(stcb); - } - SCTP_TCB_UNLOCK(stcb); - } /* for each tcb */ } return (0); } @@ -3951,37 +4239,35 @@ sctp_del_local_addr_ep(struct sctp_inpcb *inp, struct ifaddr *ifa) * "valid" address list */ int -sctp_add_local_addr_assoc(struct sctp_tcb *stcb, struct ifaddr *ifa) +sctp_add_local_addr_assoc(struct sctp_tcb *stcb, struct sctp_ifa *ifa, int restricted_list) { struct sctp_inpcb *inp; struct sctp_laddr *laddr; + struct sctpladdr *list; int error; /* - * Assumes TCP is locked.. and possiblye the INP. May need to + * Assumes TCB is locked.. and possibly the INP. May need to * confirm/fix that if we need it and is not the case. */ + list = &stcb->asoc.sctp_restricted_addrs; + inp = stcb->sctp_ep; - if (ifa->ifa_addr->sa_family == AF_INET6) { - struct in6_ifaddr *ifa6; - - ifa6 = (struct in6_ifaddr *)ifa; - if (ifa6->ia6_flags & (IN6_IFF_DETACHED | - /* IN6_IFF_DEPRECATED | */ - IN6_IFF_ANYCAST | - IN6_IFF_NOTREADY)) + if (ifa->address.sa.sa_family == AF_INET6) { + if (ifa->localifa_flags & SCTP_ADDR_IFA_UNUSEABLE) { /* Can't bind a non-existent addr. */ return (-1); + } } /* does the address already exist? */ - LIST_FOREACH(laddr, &stcb->asoc.sctp_local_addr_list, sctp_nxt_addr) { + LIST_FOREACH(laddr, list, sctp_nxt_addr) { if (laddr->ifa == ifa) { return (-1); } } /* add to the list */ - error = sctp_insert_laddr(&stcb->asoc.sctp_local_addr_list, ifa); + error = sctp_insert_laddr(list, ifa, 0); if (error != 0) return (error); return (0); @@ -3991,7 +4277,7 @@ sctp_add_local_addr_assoc(struct sctp_tcb *stcb, struct ifaddr *ifa) * insert an laddr entry with the given ifa for the desired list */ int -sctp_insert_laddr(struct sctpladdr *list, struct ifaddr *ifa) +sctp_insert_laddr(struct sctpladdr *list, struct sctp_ifa *ifa, uint32_t act) { struct sctp_laddr *laddr; @@ -4003,6 +4289,8 @@ sctp_insert_laddr(struct sctpladdr *list, struct ifaddr *ifa) SCTP_INCR_LADDR_COUNT(); bzero(laddr, sizeof(*laddr)); laddr->ifa = ifa; + laddr->action = act; + atomic_add_int(&ifa->refcount, 1); /* insert it */ LIST_INSERT_HEAD(list, laddr, sctp_nxt_addr); @@ -4018,6 +4306,7 @@ sctp_remove_laddr(struct sctp_laddr *laddr) /* remove from the list */ LIST_REMOVE(laddr, sctp_nxt_addr); + sctp_free_ifa(laddr->ifa); SCTP_ZONE_FREE(sctppcbinfo.ipi_zone_laddr, laddr); SCTP_DECR_LADDR_COUNT(); } @@ -4026,7 +4315,7 @@ sctp_remove_laddr(struct sctp_laddr *laddr) * Remove an address from the TCB local address list */ int -sctp_del_local_addr_assoc(struct sctp_tcb *stcb, struct ifaddr *ifa) +sctp_del_local_addr_assoc(struct sctp_tcb *stcb, struct sctp_ifa *ifa) { struct sctp_inpcb *inp; struct sctp_laddr *laddr; @@ -4050,7 +4339,7 @@ sctp_del_local_addr_assoc(struct sctp_tcb *stcb, struct ifaddr *ifa) return (-1); } } - LIST_FOREACH(laddr, &stcb->asoc.sctp_local_addr_list, sctp_nxt_addr) { + LIST_FOREACH(laddr, &stcb->asoc.sctp_restricted_addrs, sctp_nxt_addr) { /* remove the address if it exists */ if (laddr->ifa == NULL) continue; @@ -4064,71 +4353,6 @@ sctp_del_local_addr_assoc(struct sctp_tcb *stcb, struct ifaddr *ifa) return (-1); } -/* - * Remove an address from the TCB local address list lookup using a sockaddr - * addr - */ -int -sctp_del_local_addr_assoc_sa(struct sctp_tcb *stcb, struct sockaddr *sa) -{ - struct sctp_inpcb *inp; - struct sctp_laddr *laddr; - struct sockaddr *l_sa; - - /* - * This function I find does not seem to have a caller. As such we - * NEED TO DELETE this code. If we do find a caller, the caller MUST - * have locked the TCB at the least and probably the INP as well. - */ - inp = stcb->sctp_ep; - /* if subset bound and don't allow ASCONF's, can't delete last */ - if (((inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) == 0) && - (sctp_is_feature_off(inp, SCTP_PCB_FLAGS_DO_ASCONF) == 0)) { - if (stcb->asoc.numnets < 2) { - /* can't delete last address */ - return (-1); - } - } - LIST_FOREACH(laddr, &stcb->asoc.sctp_local_addr_list, sctp_nxt_addr) { - /* make sure the address exists */ - if (laddr->ifa == NULL) - continue; - if (laddr->ifa->ifa_addr == NULL) - continue; - - l_sa = laddr->ifa->ifa_addr; - if (l_sa->sa_family == AF_INET6) { - /* IPv6 address */ - struct sockaddr_in6 *sin1, *sin2; - - sin1 = (struct sockaddr_in6 *)l_sa; - sin2 = (struct sockaddr_in6 *)sa; - if (memcmp(&sin1->sin6_addr, &sin2->sin6_addr, - sizeof(struct in6_addr)) == 0) { - /* matched */ - sctp_remove_laddr(laddr); - return (0); - } - } else if (l_sa->sa_family == AF_INET) { - /* IPv4 address */ - struct sockaddr_in *sin1, *sin2; - - sin1 = (struct sockaddr_in *)l_sa; - sin2 = (struct sockaddr_in *)sa; - if (sin1->sin_addr.s_addr == sin2->sin_addr.s_addr) { - /* matched */ - sctp_remove_laddr(laddr); - return (0); - } - } else { - /* invalid family */ - return (-1); - } - } /* end foreach */ - /* address not found! */ - return (-1); -} - static char sctp_pcb_initialized = 0; /* @@ -4159,7 +4383,7 @@ sctp_pcb_init() LIST_INIT(&sctppcbinfo.listhead); /* init the iterator head */ - LIST_INIT(&sctppcbinfo.iteratorhead); + TAILQ_INIT(&sctppcbinfo.iteratorhead); /* init the hash table of endpoints */ TUNABLE_INT_FETCH("net.inet.sctp.tcbhashsize", &sctp_hashtblsize); @@ -4177,6 +4401,10 @@ sctp_pcb_init() sctppcbinfo.sctp_restarthash = SCTP_HASH_INIT(SCTP_STACK_VTAG_HASH_SIZE, &sctppcbinfo.hashrestartmark); + + sctppcbinfo.sctp_vrfhash = SCTP_HASH_INIT(SCTP_SIZE_OF_VRF_HASH, + &sctppcbinfo.hashvrfmark); + /* init the zones */ /* * FIX ME: Should check for NULL returns, but if it does fail we are @@ -4215,6 +4443,8 @@ sctp_pcb_init() SCTP_IPI_COUNT_INIT(); SCTP_IPI_ADDR_INIT(); + SCTP_IPI_ITERATOR_WQ_INIT(); + LIST_INIT(&sctppcbinfo.addr_wq); /* not sure if we need all the counts */ @@ -4244,6 +4474,18 @@ sctp_pcb_init() LIST_INIT(&sctppcbinfo.vtag_timewait[i]); } +#if defined(SCTP_USE_THREAD_BASED_ITERATOR) + sctppcbinfo.iterator_running = 0; + sctp_startup_iterator(); +#endif + + /* + * INIT the default VRF which for BSD is the only one, other O/S's + * may have more. But initially they must start with one and then + * add the VRF's as addresses are added. + */ + sctp_init_vrf_list(SCTP_DEFAULT_VRF); + } @@ -4814,8 +5056,9 @@ sctp_set_primary_addr(struct sctp_tcb *stcb, struct sockaddr *sa, } else { /* set the primary address */ if (net->dest_state & SCTP_ADDR_UNCONFIRMED) { - /* Must be confirmed */ - return (-1); + /* Must be confirmed, so queue to set */ + net->dest_state |= SCTP_ADDR_REQ_PRIMARY; + return (0); } stcb->asoc.primary_destination = net; net->dest_state &= ~SCTP_ADDR_WAS_PRIMARY; @@ -4922,89 +5165,9 @@ check_time_wait: } -/* - * Delete the address from the endpoint local address list Lookup using a - * sockaddr address (ie. not an ifaddr) - */ -int -sctp_del_local_addr_ep_sa(struct sctp_inpcb *inp, struct sockaddr *sa) -{ - struct sctp_laddr *laddr; - struct sockaddr *l_sa; - int found = 0; - - /* - * Here is another function I cannot find a caller for. As such we - * SHOULD delete it if we have no users. If we find a user that user - * MUST have the INP locked. - * - */ - - if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) { - /* You are already bound to all. You have it already */ - return (EINVAL); - } - LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) { - /* make sure the address exists */ - if (laddr->ifa == NULL) - continue; - if (laddr->ifa->ifa_addr == NULL) - continue; - - l_sa = laddr->ifa->ifa_addr; - if (l_sa->sa_family == AF_INET6) { - /* IPv6 address */ - struct sockaddr_in6 *sin1, *sin2; - - sin1 = (struct sockaddr_in6 *)l_sa; - sin2 = (struct sockaddr_in6 *)sa; - if (memcmp(&sin1->sin6_addr, &sin2->sin6_addr, - sizeof(struct in6_addr)) == 0) { - /* matched */ - found = 1; - break; - } - } else if (l_sa->sa_family == AF_INET) { - /* IPv4 address */ - struct sockaddr_in *sin1, *sin2; - - sin1 = (struct sockaddr_in *)l_sa; - sin2 = (struct sockaddr_in *)sa; - if (sin1->sin_addr.s_addr == sin2->sin_addr.s_addr) { - /* matched */ - found = 1; - break; - } - } else { - /* invalid family */ - return (-1); - } - } - - if (found && inp->laddr_count < 2) { - /* can't delete unless there are at LEAST 2 addresses */ - return (-1); - } - if (found && (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) == 0) { - /* - * remove it from the ep list, this should NOT be done until - * its really gone from the interface list and we won't be - * receiving more of these. Probably right away. If we do - * allow a removal of an address from an association - * (sub-set bind) than this should NOT be called until the - * all ASCONF come back from this association. - */ - sctp_remove_laddr(laddr); - return (0); - } else { - return (-1); - } -} - static sctp_assoc_t reneged_asoc_ids[256]; static uint8_t reneged_at = 0; -extern int sctp_do_drain; static void sctp_drain_mbufs(struct sctp_inpcb *inp, struct sctp_tcb *stcb) @@ -5160,6 +5323,7 @@ sctp_drain_mbufs(struct sctp_inpcb *inp, struct sctp_tcb *stcb) asoc->last_revoke_count = cnt; SCTP_OS_TIMER_STOP(&stcb->asoc.dack_timer.timer); sctp_send_sack(stcb); + sctp_chunk_output(stcb->sctp_ep, stcb, SCTP_OUTPUT_FROM_DRAIN); reneged_asoc_ids[reneged_at] = sctp_get_associd(stcb); reneged_at++; } @@ -5209,9 +5373,17 @@ sctp_drain() * iterated through. */ int -sctp_initiate_iterator(inp_func inpf, asoc_func af, uint32_t pcb_state, - uint32_t pcb_features, uint32_t asoc_state, void *argp, uint32_t argi, - end_func ef, struct sctp_inpcb *s_inp, uint8_t chunk_output_off) +sctp_initiate_iterator(inp_func inpf, + asoc_func af, + inp_func inpe, + uint32_t pcb_state, + uint32_t pcb_features, + uint32_t asoc_state, + void *argp, + uint32_t argi, + end_func ef, + struct sctp_inpcb *s_inp, + uint8_t chunk_output_off) { struct sctp_iterator *it = NULL; @@ -5226,12 +5398,17 @@ sctp_initiate_iterator(inp_func inpf, asoc_func af, uint32_t pcb_state, memset(it, 0, sizeof(*it)); it->function_assoc = af; it->function_inp = inpf; + if (inpf) + it->done_current_ep = 0; + else + it->done_current_ep = 1; it->function_atend = ef; it->pointer = argp; it->val = argi; it->pcb_flags = pcb_state; it->pcb_features = pcb_features; it->asoc_state = asoc_state; + it->function_inp_end = inpe; it->no_chunk_output = chunk_output_off; if (s_inp) { it->inp = s_inp; @@ -5239,17 +5416,29 @@ sctp_initiate_iterator(inp_func inpf, asoc_func af, uint32_t pcb_state, } else { SCTP_INP_INFO_RLOCK(); it->inp = LIST_FIRST(&sctppcbinfo.listhead); + SCTP_INP_INFO_RUNLOCK(); it->iterator_flags = SCTP_ITERATOR_DO_ALL_INP; } + SCTP_IPI_ITERATOR_WQ_LOCK(); + if (it->inp) + SCTP_INP_INCR_REF(it->inp); + TAILQ_INSERT_TAIL(&sctppcbinfo.iteratorhead, it, sctp_nxt_itr); +#if defined(SCTP_USE_THREAD_BASED_ITERATOR) + if (sctppcbinfo.iterator_running == 0) { + sctp_wakeup_iterator(); + } + SCTP_IPI_ITERATOR_WQ_UNLOCK(); +#else + if (it->inp) + SCTP_INP_DECR_REF(it->inp); + SCTP_IPI_ITERATOR_WQ_UNLOCK(); /* Init the timer */ SCTP_OS_TIMER_INIT(&it->tmr.timer); /* add to the list of all iterators */ - SCTP_INP_INFO_WLOCK(); - LIST_INSERT_HEAD(&sctppcbinfo.iteratorhead, it, sctp_nxt_itr); - SCTP_INP_INFO_WUNLOCK(); sctp_timer_start(SCTP_TIMER_TYPE_ITERATOR, (struct sctp_inpcb *)it, NULL, NULL); +#endif return (0); } diff --git a/sys/netinet/sctp_pcb.h b/sys/netinet/sctp_pcb.h index e822d41..8c0be0c 100644 --- a/sys/netinet/sctp_pcb.h +++ b/sys/netinet/sctp_pcb.h @@ -44,12 +44,16 @@ LIST_HEAD(sctppcbhead, sctp_inpcb); LIST_HEAD(sctpasochead, sctp_tcb); LIST_HEAD(sctpladdr, sctp_laddr); LIST_HEAD(sctpvtaghead, sctp_tagblock); +LIST_HEAD(sctp_vrflist, sctp_vrf); +LIST_HEAD(sctp_ifnlist, sctp_ifn); +LIST_HEAD(sctp_ifalist, sctp_ifa); TAILQ_HEAD(sctp_readhead, sctp_queued_to_read); TAILQ_HEAD(sctp_streamhead, sctp_stream_queue_pending); #include <netinet/sctp_structs.h> #include <netinet/sctp_uio.h> #include <netinet/sctp_auth.h> +#include <netinet/sctp_bsd_addr.h> /* * PCB flags (in sctp_flags bitmask) @@ -106,10 +110,60 @@ TAILQ_HEAD(sctp_streamhead, sctp_stream_queue_pending); #define SCTP_PCBHASH_ALLADDR(port, mask) (port & mask) #define SCTP_PCBHASH_ASOC(tag, mask) (tag & mask) +struct sctp_vrf { + LIST_ENTRY(sctp_vrf) next_vrf; + struct sctp_ifnlist ifnlist; + uint32_t vrf_id; + uint32_t total_ifa_count; +}; + +struct sctp_ifn { + struct sctp_ifalist ifalist; + struct sctp_vrf *vrf; + LIST_ENTRY(sctp_ifn) next_ifn; + void *ifn_p; /* never access without appropriate lock */ + uint32_t ifn_type; + uint32_t ifn_index; /* shorthand way to look at ifn for reference */ + uint32_t refcount; /* number of reference held should be >= + * ifa_count */ + uint32_t ifa_count; /* IFA's we hold (in our list - ifalist) */ + char ifn_name[SCTP_IFNAMSIZ]; +}; + +/* SCTP local IFA flags */ +#define SCTP_ADDR_VALID 0x00000001 /* its up and active */ +#define SCTP_BEING_DELETED 0x00000002 /* being deleted, when + * refcount = 0. Note that it + * is pulled from the ifn list + * and ifa_p is nulled right + * away but it cannot be freed + * until the last *net + * pointing to it is deleted. */ +#define SCTP_ADDR_DEFER_USE 0x00000004 /* Hold off using this one */ +#define SCTP_ADDR_IFA_UNUSEABLE 0x00000008 + +struct sctp_ifa { + LIST_ENTRY(sctp_ifa) next_ifa; + struct sctp_ifn *ifn_p; /* back pointer to parent ifn */ + void *ifa; /* pointer to ifa, needed for flag update for + * that we MUST lock appropriate locks. This + * is for V6. */ + union sctp_sockstore address; + uint32_t refcount; /* number of folks refering to this */ + uint32_t flags; + uint32_t localifa_flags; + uint8_t src_is_loop; + uint8_t src_is_priv; + uint8_t src_is_glob; + uint8_t resv; +}; + struct sctp_laddr { LIST_ENTRY(sctp_laddr) sctp_nxt_addr; /* next in list */ - struct ifaddr *ifa; - int action; /* Only used in delayed asconf stuff */ + struct sctp_ifa *ifa; + uint32_t action; /* Used during asconf and adding if no-zero + * src-addr selection will not consider this + * address. */ }; struct sctp_block_entry { @@ -126,7 +180,6 @@ struct sctp_tagblock { struct sctp_timewait vtag_block[SCTP_NUMBER_IN_VTAG_BLOCK]; }; - struct sctp_epinfo { struct sctpasochead *sctp_asochash; u_long hashasocmark; @@ -153,6 +206,9 @@ struct sctp_epinfo { u_long hashtcpmark; uint32_t hashtblsize; + struct sctp_vrflist *sctp_vrfhash; + u_long hashvrfmark; + struct sctppcbhead listhead; struct sctpladdr addr_wq; @@ -169,8 +225,8 @@ struct sctp_epinfo { struct mtx ipi_ep_mtx; struct mtx it_mtx; + struct mtx ipi_iterator_wq_mtx; struct mtx ipi_addr_mtx; - struct mtx timer_mtx; uint32_t ipi_count_ep; /* assoc/tcb zone info */ @@ -197,12 +253,15 @@ struct sctp_epinfo { struct sctpvtaghead vtag_timewait[SCTP_STACK_VTAG_HASH_SIZE]; + /* address work queue handling */ +#if defined(SCTP_USE_THREAD_BASED_ITERATOR) + uint32_t iterator_running; + SCTP_PROCESS_STRUCT thread_proc; +#endif struct sctp_timer addr_wq_timer; }; -extern struct sctpstat sctpstat; - /* * Here we have all the relevant information for each SCTP entity created. We * will need to modify this as approprate. We also need to figure out how to @@ -218,9 +277,9 @@ struct sctp_pcb { unsigned int sctp_minrto; unsigned int sctp_maxrto; unsigned int initial_rto; - int initial_init_rto_max; + unsigned int sctp_sack_freq; uint32_t sctp_sws_sender; uint32_t sctp_sws_receiver; @@ -294,11 +353,15 @@ struct sctp_inpcb { LIST_ENTRY(sctp_inpcb) sctp_hash; /* count of local addresses bound, 0 if bound all */ int laddr_count; - /* list of addrs in use by the EP */ + + /* list of addrs in use by the EP, NULL if bound-all */ struct sctpladdr sctp_addr_list; - /* used for source address selection rotation */ + /* + * used for source address selection rotation when we are subset + * bound + */ struct sctp_laddr *next_addr_touse; - struct ifnet *next_ifn_touse; + /* back pointer to our socket */ struct socket *sctp_socket; uint32_t sctp_flags; /* INP state flag set */ @@ -329,6 +392,7 @@ struct sctp_inpcb { struct mtx inp_create_mtx; struct mtx inp_rdata_mtx; int32_t refcount; + uint32_t def_vrf_id; uint32_t total_sends; uint32_t total_recvs; uint32_t last_abort_code; @@ -371,15 +435,36 @@ struct sctp_tcb { #if defined(_KERNEL) extern struct sctp_epinfo sctppcbinfo; -extern int sctp_auto_asconf; int SCTP6_ARE_ADDR_EQUAL(struct in6_addr *a, struct in6_addr *b); void sctp_fill_pcbinfo(struct sctp_pcbinfo *); +struct sctp_ifn * + sctp_find_ifn(struct sctp_vrf *vrf, void *ifn, uint32_t ifn_index); + +struct sctp_vrf *sctp_allocate_vrf(int vrfid); + +struct sctp_vrf *sctp_find_vrf(uint32_t vrfid); + +struct sctp_ifa * +sctp_add_addr_to_vrf(uint32_t vrfid, + void *ifn, uint32_t ifn_index, uint32_t ifn_type, + const char *if_name, + void *ifa, struct sockaddr *addr, uint32_t ifa_flags); + +void sctp_free_ifa(struct sctp_ifa *sctp_ifap); + +struct sctp_ifa * +sctp_del_addr_from_vrf(uint32_t vrfid, struct sockaddr *addr, + uint32_t ifn_index); + + + + struct sctp_nets *sctp_findnet(struct sctp_tcb *, struct sockaddr *); -struct sctp_inpcb *sctp_pcb_findep(struct sockaddr *, int, int); +struct sctp_inpcb *sctp_pcb_findep(struct sockaddr *, int, int, uint32_t); int sctp_inpcb_bind(struct socket *, struct sockaddr *, struct thread *); @@ -391,7 +476,7 @@ sctp_findassociation_addr(struct mbuf *, int, int, struct sctp_tcb * sctp_findassociation_addr_sa(struct sockaddr *, - struct sockaddr *, struct sctp_inpcb **, struct sctp_nets **, int); + struct sockaddr *, struct sctp_inpcb **, struct sctp_nets **, int, uint32_t); void sctp_move_pcb_and_assoc(struct sctp_inpcb *, struct sctp_inpcb *, @@ -418,28 +503,29 @@ sctp_findassociation_ep_asconf(struct mbuf *, int, int, int sctp_inpcb_alloc(struct socket *); -int sctp_is_address_on_local_host(struct sockaddr *addr); +int sctp_is_address_on_local_host(struct sockaddr *addr, uint32_t vrf_id); void sctp_inpcb_free(struct sctp_inpcb *, int, int); struct sctp_tcb * sctp_aloc_assoc(struct sctp_inpcb *, struct sockaddr *, - int, int *, uint32_t); + int, int *, uint32_t, uint32_t); int sctp_free_assoc(struct sctp_inpcb *, struct sctp_tcb *, int, int); void sctp_add_vtag_to_timewait(struct sctp_inpcb *, uint32_t, uint32_t); -int sctp_add_local_addr_ep(struct sctp_inpcb *, struct ifaddr *); +int sctp_add_local_addr_ep(struct sctp_inpcb *, struct sctp_ifa *, uint32_t); -int sctp_insert_laddr(struct sctpladdr *, struct ifaddr *); +int sctp_insert_laddr(struct sctpladdr *, struct sctp_ifa *, uint32_t); void sctp_remove_laddr(struct sctp_laddr *); -int sctp_del_local_addr_ep(struct sctp_inpcb *, struct ifaddr *); +int sctp_del_local_addr_ep(struct sctp_inpcb *, struct sctp_ifa *); + +void sctp_set_initial_cc_param(struct sctp_tcb *, struct sctp_nets *net); -int sctp_del_local_addr_ep_sa(struct sctp_inpcb *, struct sockaddr *); int sctp_add_remote_addr(struct sctp_tcb *, struct sockaddr *, int, int); @@ -449,11 +535,9 @@ int sctp_del_remote_addr(struct sctp_tcb *, struct sockaddr *); void sctp_pcb_init(void); -int sctp_add_local_addr_assoc(struct sctp_tcb *, struct ifaddr *); - -int sctp_del_local_addr_assoc(struct sctp_tcb *, struct ifaddr *); +int sctp_add_local_addr_assoc(struct sctp_tcb *, struct sctp_ifa *, int); -int sctp_del_local_addr_assoc_sa(struct sctp_tcb *, struct sockaddr *); +int sctp_del_local_addr_assoc(struct sctp_tcb *, struct sctp_ifa *); int sctp_load_addresses_from_init(struct sctp_tcb *, struct mbuf *, int, int, @@ -474,8 +558,15 @@ int sctp_destination_is_reachable(struct sctp_tcb *, struct sockaddr *); * indicates run on ONLY assoc's of the specified endpoint. */ int -sctp_initiate_iterator(inp_func inpf, asoc_func af, uint32_t, uint32_t, - uint32_t, void *, uint32_t, end_func ef, struct sctp_inpcb *, uint8_t co_off); +sctp_initiate_iterator(inp_func inpf, + asoc_func af, + inp_func inpe, + uint32_t, uint32_t, + uint32_t, void *, + uint32_t, + end_func ef, + struct sctp_inpcb *, + uint8_t co_off); #endif /* _KERNEL */ diff --git a/sys/netinet/sctp_peeloff.c b/sys/netinet/sctp_peeloff.c index 88da761..5950bbb 100644 --- a/sys/netinet/sctp_peeloff.c +++ b/sys/netinet/sctp_peeloff.c @@ -174,7 +174,7 @@ sctp_get_peeloff(struct socket *head, sctp_assoc_t assoc_id, int *error) SCTP_FROM_SCTP_PEELOFF + SCTP_LOC_1); } /* Turn off any non-blocking semantic. */ - newso->so_state &= ~SS_NBIO; + SCTP_CLEAR_SO_NBIO(newso); newso->so_state |= SS_ISCONNECTED; /* We remove it right away */ #ifdef SCTP_LOCK_LOGGING diff --git a/sys/netinet/sctp_structs.h b/sys/netinet/sctp_structs.h index 412ee02..6830b13 100644 --- a/sys/netinet/sctp_structs.h +++ b/sys/netinet/sctp_structs.h @@ -96,7 +96,7 @@ TAILQ_HEAD(sctp_resethead, sctp_stream_reset_list); /* * Users of the iterator need to malloc a iterator with a call to - * sctp_initiate_iterator(inp_func, assoc_func, pcb_flags, pcb_features, + * sctp_initiate_iterator(inp_func, assoc_func, inp_func, pcb_flags, pcb_features, * asoc_state, void-ptr-arg, uint32-arg, end_func, inp); * * Use the following two defines if you don't care what pcb flags are on the EP @@ -114,16 +114,17 @@ TAILQ_HEAD(sctp_resethead, sctp_stream_reset_list); typedef void (*asoc_func) (struct sctp_inpcb *, struct sctp_tcb *, void *ptr, uint32_t val); -typedef void (*inp_func) (struct sctp_inpcb *, void *ptr, uint32_t val); +typedef int (*inp_func) (struct sctp_inpcb *, void *ptr, uint32_t val); typedef void (*end_func) (void *ptr, uint32_t val); struct sctp_iterator { - LIST_ENTRY(sctp_iterator) sctp_nxt_itr; + TAILQ_ENTRY(sctp_iterator) sctp_nxt_itr; struct sctp_timer tmr; struct sctp_inpcb *inp; /* current endpoint */ struct sctp_tcb *stcb; /* current* assoc */ asoc_func function_assoc; /* per assoc function */ inp_func function_inp; /* per endpoint function */ + inp_func function_inp_end; /* end INP function */ end_func function_atend;/* iterator completion function */ void *pointer; /* pointer for apply func to use */ uint32_t val; /* value for apply func to use */ @@ -132,13 +133,14 @@ struct sctp_iterator { uint32_t asoc_state; /* assoc state being checked */ uint32_t iterator_flags; uint8_t no_chunk_output; + uint8_t done_current_ep; }; /* iterator_flags values */ #define SCTP_ITERATOR_DO_ALL_INP 0x00000001 #define SCTP_ITERATOR_DO_SINGLE_INP 0x00000002 -LIST_HEAD(sctpiterators, sctp_iterator); +TAILQ_HEAD(sctpiterators, sctp_iterator); struct sctp_copy_all { struct sctp_inpcb *inp; /* ep */ @@ -149,6 +151,12 @@ struct sctp_copy_all { int cnt_failed; }; +struct sctp_asconf_iterator { + struct sctpladdr list_of_work; + int cnt; +}; + + struct sctp_nets { TAILQ_ENTRY(sctp_nets) sctp_next; /* next link */ @@ -165,7 +173,7 @@ struct sctp_nets { struct sctp_route { struct rtentry *ro_rt; union sctp_sockstore _l_addr; /* remote peer addr */ - union sctp_sockstore _s_addr; /* our selected src addr */ + struct sctp_ifa *_s_addr; /* our selected src addr */ } ro; /* mtu discovered so far */ uint32_t mtu; @@ -435,7 +443,7 @@ TAILQ_HEAD(sctp_asconf_addrhead, sctp_asconf_addr); struct sctp_asconf_addr { TAILQ_ENTRY(sctp_asconf_addr) next; struct sctp_asconf_addr_param ap; - struct ifaddr *ifa; /* save the ifa for add/del ip */ + struct sctp_ifa *ifa; /* save the ifa for add/del ip */ uint8_t sent; /* has this been sent yet? */ }; @@ -483,7 +491,8 @@ struct sctp_association { struct sctp_timer delayed_event_timer; /* timer for delayed events */ /* list of local addresses when add/del in progress */ - struct sctpladdr sctp_local_addr_list; + struct sctpladdr sctp_restricted_addrs; + struct sctpnetlisthead nets; /* Free chunk list */ @@ -573,6 +582,8 @@ struct sctp_association { /* queue of chunks waiting to be sent into the local stack */ struct sctp_readhead pending_reply_queue; + uint32_t vrf_id; + uint32_t cookie_preserve_req; /* ASCONF next seq I am sending out, inits at init-tsn */ uint32_t asconf_seq_out; @@ -739,6 +750,9 @@ struct sctp_association { unsigned int cookie_life; /* time to delay acks for */ unsigned int delayed_ack; + unsigned int old_delayed_ack; + unsigned int sack_freq; + unsigned int data_pkts_seen; unsigned int numduptsns; int dup_tsns[SCTP_MAX_DUP_TSNS]; @@ -813,10 +827,10 @@ struct sctp_association { uint8_t stream_locked; uint8_t authenticated; /* packet authenticated ok */ /* - * This flag indicates that we need to send the first SACK. If in - * place it says we have NOT yet sent a SACK and need to. + * This flag indicates that a SACK need to be sent. Initially this + * is 1 to send the first sACK immediately. */ - uint8_t first_ack_sent; + uint8_t send_sack; /* max burst after fast retransmit completes */ uint8_t max_burst; diff --git a/sys/netinet/sctp_sysctl.c b/sys/netinet/sctp_sysctl.c new file mode 100644 index 0000000..37c572f --- /dev/null +++ b/sys/netinet/sctp_sysctl.c @@ -0,0 +1,500 @@ +/*- + * Copyright (c) 2007, Cisco Systems, Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * a) Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * b) Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the distribution. + * + * c) Neither the name of Cisco Systems, Inc. nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <netinet/sctp_os.h> +#include <netinet/sctp_constants.h> +#include <netinet/sctp_sysctl.h> +#include <netinet/sctp_pcb.h> +#include <netinet/sctputil.h> + +/* + * sysctl tunable variables + */ +uint32_t sctp_sendspace = (128 * 1024); +uint32_t sctp_recvspace = 128 * (1024 + +#ifdef INET6 + sizeof(struct sockaddr_in6) +#else + sizeof(struct sockaddr_in) +#endif +); +uint32_t sctp_mbuf_threshold_count = SCTP_DEFAULT_MBUFS_IN_CHAIN; +uint32_t sctp_auto_asconf = SCTP_DEFAULT_AUTO_ASCONF; +uint32_t sctp_ecn_enable = 1; +uint32_t sctp_ecn_nonce = 0; +uint32_t sctp_strict_sacks = 0; +uint32_t sctp_no_csum_on_loopback = 1; +uint32_t sctp_strict_init = 1; +uint32_t sctp_abort_if_one_2_one_hits_limit = 0; +uint32_t sctp_strict_data_order = 0; + +uint32_t sctp_peer_chunk_oh = sizeof(struct mbuf); +uint32_t sctp_max_burst_default = SCTP_DEF_MAX_BURST; +uint32_t sctp_use_cwnd_based_maxburst = 1; +uint32_t sctp_do_drain = 1; +uint32_t sctp_hb_maxburst = SCTP_DEF_MAX_BURST; + +uint32_t sctp_max_chunks_on_queue = SCTP_ASOC_MAX_CHUNKS_ON_QUEUE; +uint32_t sctp_delayed_sack_time_default = SCTP_RECV_MSEC; +uint32_t sctp_sack_freq_default = SCTP_DEFAULT_SACK_FREQ; +uint32_t sctp_heartbeat_interval_default = SCTP_HB_DEFAULT_MSEC; +uint32_t sctp_pmtu_raise_time_default = SCTP_DEF_PMTU_RAISE_SEC; +uint32_t sctp_shutdown_guard_time_default = SCTP_DEF_MAX_SHUTDOWN_SEC; +uint32_t sctp_secret_lifetime_default = SCTP_DEFAULT_SECRET_LIFE_SEC; +uint32_t sctp_rto_max_default = SCTP_RTO_UPPER_BOUND; +uint32_t sctp_rto_min_default = SCTP_RTO_LOWER_BOUND; +uint32_t sctp_rto_initial_default = SCTP_RTO_INITIAL; +uint32_t sctp_init_rto_max_default = SCTP_RTO_UPPER_BOUND; +uint32_t sctp_valid_cookie_life_default = SCTP_DEFAULT_COOKIE_LIFE; +uint32_t sctp_init_rtx_max_default = SCTP_DEF_MAX_INIT; +uint32_t sctp_assoc_rtx_max_default = SCTP_DEF_MAX_SEND; +uint32_t sctp_path_rtx_max_default = SCTP_DEF_MAX_PATH_RTX; +uint32_t sctp_nr_outgoing_streams_default = SCTP_OSTREAM_INITIAL; +uint32_t sctp_add_more_threshold = SCTP_DEFAULT_ADD_MORE; +uint32_t sctp_asoc_free_resc_limit = SCTP_DEF_ASOC_RESC_LIMIT; +uint32_t sctp_system_free_resc_limit = SCTP_DEF_SYSTEM_RESC_LIMIT; + +uint32_t sctp_min_split_point = SCTP_DEFAULT_SPLIT_POINT_MIN; +uint32_t sctp_pcbtblsize = SCTP_PCBHASHSIZE; +uint32_t sctp_hashtblsize = SCTP_TCBHASHSIZE; +uint32_t sctp_chunkscale = SCTP_CHUNKQUEUE_SCALE; + +uint32_t sctp_cmt_on_off = 0; +uint32_t sctp_cmt_use_dac = 0; + +uint32_t sctp_L2_abc_variable = 1; +uint32_t sctp_early_fr = 0; +uint32_t sctp_early_fr_msec = SCTP_MINFR_MSEC_TIMER; +uint32_t sctp_use_rttvar_cc = 0; +uint32_t sctp_says_check_for_deadlock = 0; +uint32_t sctp_asconf_auth_nochk = 0; +uint32_t sctp_auth_disable = 0; +uint32_t sctp_nat_friendly = 1; +struct sctpstat sctpstat; + +#ifdef SCTP_DEBUG +uint32_t sctp_debug_on = 0; + +#endif + + +/* + * sysctl functions + */ +static int +sctp_assoclist(SYSCTL_HANDLER_ARGS) +{ + unsigned int number_of_endpoints; + unsigned int number_of_local_addresses; + unsigned int number_of_associations; + unsigned int number_of_remote_addresses; + unsigned int n; + int error; + struct sctp_inpcb *inp; + struct sctp_tcb *stcb; + struct sctp_nets *net; + struct sctp_laddr *laddr; + struct xsctp_inpcb xinpcb; + struct xsctp_tcb xstcb; + +/* struct xsctp_laddr xladdr; */ + struct xsctp_raddr xraddr; + + number_of_endpoints = 0; + number_of_local_addresses = 0; + number_of_associations = 0; + number_of_remote_addresses = 0; + + SCTP_INP_INFO_RLOCK(); + if (req->oldptr == USER_ADDR_NULL) { + LIST_FOREACH(inp, &sctppcbinfo.listhead, sctp_list) { + SCTP_INP_RLOCK(inp); + number_of_endpoints++; + /* FIXME MT */ + LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) { + number_of_local_addresses++; + } + LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) { + number_of_associations++; + TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { + number_of_remote_addresses++; + } + } + SCTP_INP_RUNLOCK(inp); + } + SCTP_INP_INFO_RUNLOCK(); + n = (number_of_endpoints + 1) * sizeof(struct xsctp_inpcb) + + number_of_local_addresses * sizeof(struct xsctp_laddr) + + number_of_associations * sizeof(struct xsctp_tcb) + + number_of_remote_addresses * sizeof(struct xsctp_raddr); +#ifdef SCTP_DEBUG + printf("inps = %u, stcbs = %u, laddrs = %u, raddrs = %u\n", + number_of_endpoints, number_of_associations, + number_of_local_addresses, number_of_remote_addresses); +#endif + /* request some more memory than needed */ + req->oldidx = (n + n / 8); + return 0; + } + if (req->newptr != USER_ADDR_NULL) { + SCTP_INP_INFO_RUNLOCK(); + return EPERM; + } + LIST_FOREACH(inp, &sctppcbinfo.listhead, sctp_list) { + SCTP_INP_RLOCK(inp); + number_of_local_addresses = 0; + number_of_associations = 0; + /* + * LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) + * { number_of_local_addresses++; } + */ + LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) { + number_of_associations++; + } + xinpcb.last = 0; + xinpcb.local_port = ntohs(inp->sctp_lport); + xinpcb.number_local_addresses = number_of_local_addresses; + xinpcb.number_associations = number_of_associations; + xinpcb.flags = inp->sctp_flags; + xinpcb.features = inp->sctp_features; + xinpcb.total_sends = inp->total_sends; + xinpcb.total_recvs = inp->total_recvs; + xinpcb.total_nospaces = inp->total_nospaces; + SCTP_INP_INCR_REF(inp); + SCTP_INP_RUNLOCK(inp); + SCTP_INP_INFO_RUNLOCK(); + error = SYSCTL_OUT(req, &xinpcb, sizeof(struct xsctp_inpcb)); + if (error) { + return error; + } + SCTP_INP_INFO_RLOCK(); + SCTP_INP_RLOCK(inp); + /* FIXME MT */ + /* + * LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) + * { error = SYSCTL_OUT(req, &xladdr, sizeof(struct + * xsctp_laddr)); if (error) { #if + * defined(SCTP_PER_SOCKET_LOCKING) + * SCTP_SOCKET_UNLOCK(SCTP_INP_SO(inp), 1); + * SCTP_UNLOCK_SHARED(sctppcbinfo.ipi_ep_mtx); #endif + * SCTP_INP_RUNLOCK(inp); SCTP_INP_INFO_RUNLOCK(); return + * error; } } + */ + LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) { + SCTP_TCB_LOCK(stcb); + atomic_add_int(&stcb->asoc.refcnt, 1); + SCTP_TCB_UNLOCK(stcb); + number_of_local_addresses = 0; + number_of_remote_addresses = 0; + TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { + number_of_remote_addresses++; + } + xstcb.LocalPort = ntohs(inp->sctp_lport); + xstcb.RemPort = ntohs(stcb->rport); + if (stcb->asoc.primary_destination != NULL) + xstcb.RemPrimAddr = stcb->asoc.primary_destination->ro._l_addr; + xstcb.HeartBeatInterval = stcb->asoc.heart_beat_delay; + xstcb.State = SCTP_GET_STATE(&stcb->asoc); /* FIXME */ + xstcb.InStreams = stcb->asoc.streamincnt; + xstcb.OutStreams = stcb->asoc.streamoutcnt; + xstcb.MaxRetr = stcb->asoc.overall_error_count; + xstcb.PrimProcess = 0; /* not really supported yet */ + xstcb.T1expireds = stcb->asoc.timoinit + stcb->asoc.timocookie; + xstcb.T2expireds = stcb->asoc.timoshutdown + stcb->asoc.timoshutdownack; + xstcb.RtxChunks = stcb->asoc.marked_retrans; + xstcb.StartTime = stcb->asoc.start_time; + xstcb.DiscontinuityTime = stcb->asoc.discontinuity_time; + + xstcb.number_local_addresses = number_of_local_addresses; + xstcb.number_remote_addresses = number_of_remote_addresses; + xstcb.total_sends = stcb->total_sends; + xstcb.total_recvs = stcb->total_recvs; + xstcb.local_tag = stcb->asoc.my_vtag; + xstcb.remote_tag = stcb->asoc.peer_vtag; + xstcb.initial_tsn = stcb->asoc.init_seq_number; + xstcb.highest_tsn = stcb->asoc.sending_seq - 1; + xstcb.cumulative_tsn = stcb->asoc.last_acked_seq; + xstcb.cumulative_tsn_ack = stcb->asoc.cumulative_tsn; + SCTP_INP_RUNLOCK(inp); + SCTP_INP_INFO_RUNLOCK(); + error = SYSCTL_OUT(req, &xstcb, sizeof(struct xsctp_tcb)); + if (error) { + atomic_add_int(&stcb->asoc.refcnt, -1); + return error; + } + TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { + xraddr.RemAddr = net->ro._l_addr; + xraddr.RemAddrActive = ((net->dest_state & SCTP_ADDR_REACHABLE) == SCTP_ADDR_REACHABLE); + xraddr.RemAddrConfirmed = ((net->dest_state & SCTP_ADDR_UNCONFIRMED) == 0); + xraddr.RemAddrHBActive = ((net->dest_state & SCTP_ADDR_NOHB) == 0); + xraddr.RemAddrRTO = net->RTO; + xraddr.RemAddrMaxPathRtx = net->failure_threshold; + xraddr.RemAddrRtx = net->marked_retrans; + xraddr.RemAddrErrorCounter = net->error_count; + xraddr.RemAddrCwnd = net->cwnd; + xraddr.RemAddrFlightSize = net->flight_size; + xraddr.RemAddrStartTime = net->start_time; + error = SYSCTL_OUT(req, &xraddr, sizeof(struct xsctp_raddr)); + if (error) { + atomic_add_int(&stcb->asoc.refcnt, -1); + return error; + } + } + atomic_add_int(&stcb->asoc.refcnt, -1); + SCTP_INP_INFO_RLOCK(); + SCTP_INP_RLOCK(inp); + } + SCTP_INP_DECR_REF(inp); + SCTP_INP_RUNLOCK(inp); + } + SCTP_INP_INFO_RUNLOCK(); + + xinpcb.last = 1; + xinpcb.local_port = 0; + xinpcb.number_local_addresses = 0; + xinpcb.number_associations = 0; + xinpcb.flags = 0; + xinpcb.features = 0; + error = SYSCTL_OUT(req, &xinpcb, sizeof(struct xsctp_inpcb)); + return error; +} + + +/* + * sysctl definitions + */ + +SYSCTL_INT(_net_inet_sctp, OID_AUTO, sendspace, CTLFLAG_RW, + &sctp_sendspace, 0, "Maximum outgoing SCTP buffer size"); + +SYSCTL_INT(_net_inet_sctp, OID_AUTO, recvspace, CTLFLAG_RW, + &sctp_recvspace, 0, "Maximum incoming SCTP buffer size"); + +#if defined(__FreeBSD__) || defined(SCTP_APPLE_AUTO_ASCONF) +SYSCTL_INT(_net_inet_sctp, OID_AUTO, auto_asconf, CTLFLAG_RW, + &sctp_auto_asconf, 0, "Enable SCTP Auto-ASCONF"); +#endif + +SYSCTL_INT(_net_inet_sctp, OID_AUTO, ecn_enable, CTLFLAG_RW, + &sctp_ecn_enable, 0, "Enable SCTP ECN"); + +SYSCTL_INT(_net_inet_sctp, OID_AUTO, ecn_nonce, CTLFLAG_RW, + &sctp_ecn_nonce, 0, "Enable SCTP ECN Nonce"); + +SYSCTL_INT(_net_inet_sctp, OID_AUTO, strict_sacks, CTLFLAG_RW, + &sctp_strict_sacks, 0, "Enable SCTP Strict SACK checking"); + +SYSCTL_INT(_net_inet_sctp, OID_AUTO, loopback_nocsum, CTLFLAG_RW, + &sctp_no_csum_on_loopback, 0, + "Enable NO Csum on packets sent on loopback"); + +SYSCTL_INT(_net_inet_sctp, OID_AUTO, strict_init, CTLFLAG_RW, + &sctp_strict_init, 0, + "Enable strict INIT/INIT-ACK singleton enforcement"); + +SYSCTL_INT(_net_inet_sctp, OID_AUTO, peer_chkoh, CTLFLAG_RW, + &sctp_peer_chunk_oh, 0, + "Amount to debit peers rwnd per chunk sent"); + +SYSCTL_INT(_net_inet_sctp, OID_AUTO, maxburst, CTLFLAG_RW, + &sctp_max_burst_default, 0, + "Default max burst for sctp endpoints"); + +SYSCTL_INT(_net_inet_sctp, OID_AUTO, maxchunks, CTLFLAG_RW, + &sctp_max_chunks_on_queue, 0, + "Default max chunks on queue per asoc"); + +SYSCTL_INT(_net_inet_sctp, OID_AUTO, tcbhashsize, CTLFLAG_RW, + &sctp_hashtblsize, 0, + "Tuneable for Hash table sizes"); + +SYSCTL_INT(_net_inet_sctp, OID_AUTO, min_split_point, CTLFLAG_RW, + &sctp_min_split_point, 0, + "Minimum size when splitting a chunk"); + +SYSCTL_INT(_net_inet_sctp, OID_AUTO, pcbhashsize, CTLFLAG_RW, + &sctp_pcbtblsize, 0, + "Tuneable for PCB Hash table sizes"); + +SYSCTL_INT(_net_inet_sctp, OID_AUTO, sys_resource, CTLFLAG_RW, + &sctp_system_free_resc_limit, 0, + "Max number of cached resources in the system"); + +SYSCTL_INT(_net_inet_sctp, OID_AUTO, asoc_resource, CTLFLAG_RW, + &sctp_asoc_free_resc_limit, 0, + "Max number of cached resources in an asoc"); + +SYSCTL_INT(_net_inet_sctp, OID_AUTO, chunkscale, CTLFLAG_RW, + &sctp_chunkscale, 0, + "Tuneable for Scaling of number of chunks and messages"); + +SYSCTL_UINT(_net_inet_sctp, OID_AUTO, delayed_sack_time, CTLFLAG_RW, + &sctp_delayed_sack_time_default, 0, + "Default delayed SACK timer in msec"); + +SYSCTL_UINT(_net_inet_sctp, OID_AUTO, sack_freq, CTLFLAG_RW, + &sctp_sack_freq_default, 0, + "Default SACK frequency"); + +SYSCTL_UINT(_net_inet_sctp, OID_AUTO, heartbeat_interval, CTLFLAG_RW, + &sctp_heartbeat_interval_default, 0, + "Default heartbeat interval in msec"); + +SYSCTL_UINT(_net_inet_sctp, OID_AUTO, pmtu_raise_time, CTLFLAG_RW, + &sctp_pmtu_raise_time_default, 0, + "Default PMTU raise timer in sec"); + +SYSCTL_UINT(_net_inet_sctp, OID_AUTO, shutdown_guard_time, CTLFLAG_RW, + &sctp_shutdown_guard_time_default, 0, + "Default shutdown guard timer in sec"); + +SYSCTL_UINT(_net_inet_sctp, OID_AUTO, secret_lifetime, CTLFLAG_RW, + &sctp_secret_lifetime_default, 0, + "Default secret lifetime in sec"); + +SYSCTL_UINT(_net_inet_sctp, OID_AUTO, rto_max, CTLFLAG_RW, + &sctp_rto_max_default, 0, + "Default maximum retransmission timeout in msec"); + +SYSCTL_UINT(_net_inet_sctp, OID_AUTO, rto_min, CTLFLAG_RW, + &sctp_rto_min_default, 0, + "Default minimum retransmission timeout in msec"); + +SYSCTL_UINT(_net_inet_sctp, OID_AUTO, rto_initial, CTLFLAG_RW, + &sctp_rto_initial_default, 0, + "Default initial retransmission timeout in msec"); + +SYSCTL_UINT(_net_inet_sctp, OID_AUTO, init_rto_max, CTLFLAG_RW, + &sctp_init_rto_max_default, 0, + "Default maximum retransmission timeout during association setup in msec"); + +SYSCTL_UINT(_net_inet_sctp, OID_AUTO, valid_cookie_life, CTLFLAG_RW, + &sctp_valid_cookie_life_default, 0, + "Default cookie lifetime in sec"); + +SYSCTL_UINT(_net_inet_sctp, OID_AUTO, init_rtx_max, CTLFLAG_RW, + &sctp_init_rtx_max_default, 0, + "Default maximum number of retransmission for INIT chunks"); + +SYSCTL_UINT(_net_inet_sctp, OID_AUTO, assoc_rtx_max, CTLFLAG_RW, + &sctp_assoc_rtx_max_default, 0, + "Default maximum number of retransmissions per association"); + +SYSCTL_UINT(_net_inet_sctp, OID_AUTO, path_rtx_max, CTLFLAG_RW, + &sctp_path_rtx_max_default, 0, + "Default maximum of retransmissions per path"); + +SYSCTL_UINT(_net_inet_sctp, OID_AUTO, add_more_on_output, CTLFLAG_RW, + &sctp_add_more_threshold, 0, + "When space wise is it worthwhile to try to add more to a socket send buffer"); + +SYSCTL_UINT(_net_inet_sctp, OID_AUTO, outgoing_streams, CTLFLAG_RW, + &sctp_nr_outgoing_streams_default, 0, + "Default number of outgoing streams"); + +SYSCTL_UINT(_net_inet_sctp, OID_AUTO, cmt_on_off, CTLFLAG_RW, + &sctp_cmt_on_off, 0, + "CMT ON/OFF flag"); + +SYSCTL_UINT(_net_inet_sctp, OID_AUTO, cwnd_maxburst, CTLFLAG_RW, + &sctp_use_cwnd_based_maxburst, 0, + "Use a CWND adjusting maxburst"); + +SYSCTL_UINT(_net_inet_sctp, OID_AUTO, early_fast_retran, CTLFLAG_RW, + &sctp_early_fr, 0, + "Early Fast Retransmit with timer"); + +SYSCTL_UINT(_net_inet_sctp, OID_AUTO, use_rttvar_congctrl, CTLFLAG_RW, + &sctp_use_rttvar_cc, 0, + "Use congestion control via rtt variation"); + +SYSCTL_UINT(_net_inet_sctp, OID_AUTO, deadlock_detect, CTLFLAG_RW, + &sctp_says_check_for_deadlock, 0, + "SMP Deadlock detection on/off"); + +SYSCTL_UINT(_net_inet_sctp, OID_AUTO, early_fast_retran_msec, CTLFLAG_RW, + &sctp_early_fr_msec, 0, + "Early Fast Retransmit minimum timer value"); + +SYSCTL_UINT(_net_inet_sctp, OID_AUTO, asconf_auth_nochk, CTLFLAG_RW, + &sctp_asconf_auth_nochk, 0, + "Disable SCTP ASCONF AUTH requirement"); + +SYSCTL_UINT(_net_inet_sctp, OID_AUTO, auth_disable, CTLFLAG_RW, + &sctp_auth_disable, 0, + "Disable SCTP AUTH function"); + +SYSCTL_UINT(_net_inet_sctp, OID_AUTO, nat_friendly, CTLFLAG_RW, + &sctp_nat_friendly, 0, + "SCTP NAT friendly operation"); + +SYSCTL_INT(_net_inet_sctp, OID_AUTO, abc_l_var, CTLFLAG_RW, + &sctp_L2_abc_variable, 0, + "SCTP ABC max increase per SACK (L)"); + +SYSCTL_INT(_net_inet_sctp, OID_AUTO, max_chained_mbufs, CTLFLAG_RW, + &sctp_mbuf_threshold_count, 0, + "Default max number of small mbufs on a chain"); + +SYSCTL_UINT(_net_inet_sctp, OID_AUTO, cmt_use_dac, CTLFLAG_RW, + &sctp_cmt_use_dac, 0, + "CMT DAC ON/OFF flag"); + +SYSCTL_INT(_net_inet_sctp, OID_AUTO, do_sctp_drain, CTLFLAG_RW, + &sctp_do_drain, 0, + "Should SCTP respond to the drain calls"); + +SYSCTL_INT(_net_inet_sctp, OID_AUTO, hb_max_burst, CTLFLAG_RW, + &sctp_hb_maxburst, 0, + "Confirmation Heartbeat max burst?"); + +SYSCTL_INT(_net_inet_sctp, OID_AUTO, abort_at_limit, CTLFLAG_RW, + &sctp_abort_if_one_2_one_hits_limit, 0, + "When one-2-one hits qlimit abort"); + +SYSCTL_INT(_net_inet_sctp, OID_AUTO, strict_data_order, CTLFLAG_RW, + &sctp_strict_data_order, 0, + "Enforce strict data ordering, abort if control inside data"); + +SYSCTL_STRUCT(_net_inet_sctp, OID_AUTO, stats, CTLFLAG_RW, + &sctpstat, sctpstat, + "SCTP statistics (struct sctps_stat, netinet/sctp.h"); + +SYSCTL_PROC(_net_inet_sctp, OID_AUTO, assoclist, CTLFLAG_RD, + 0, 0, sctp_assoclist, + "S,xassoc", "List of active SCTP associations"); + +#ifdef SCTP_DEBUG +SYSCTL_INT(_net_inet_sctp, OID_AUTO, debug, CTLFLAG_RW, + &sctp_debug_on, 0, "Configure debug output"); +#endif /* SCTP_DEBUG */ diff --git a/sys/netinet/sctp_sysctl.h b/sys/netinet/sctp_sysctl.h new file mode 100644 index 0000000..fcd03fe --- /dev/null +++ b/sys/netinet/sctp_sysctl.h @@ -0,0 +1,581 @@ +/*- + * Copyright (c) 2007, Cisco Systems, Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * a) Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * b) Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the distribution. + * + * c) Neither the name of Cisco Systems, Inc. nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#ifndef __sctp_sysctl_h__ +#define __sctp_sysctl_h__ + +#include <netinet/sctp_os.h> +#include <netinet/sctp_constants.h> + +/* + * limits for the sysctl variables + */ +/* maxdgram: Maximum outgoing SCTP buffer size */ +#define SCTPCTL_MAXDGRAM 1 +#define SCTPCTL_MAXDGRAM_DESC "Maximum outgoing SCTP buffer size" +#define SCTPCTL_MAXDGRAM_MIN 0 +#define SCTPCTL_MAXDGRAM_MAX 0xFFFFFFFF +#define SCTPCTL_MAXDGRAM_DEFAULT 262144 /* 256k */ + +/* recvspace: Maximum incoming SCTP buffer size */ +#define SCTPCTL_RECVSPACE 2 +#define SCTPCTL_RECVSPACE_DESC "Maximum incoming SCTP buffer size" +#define SCTPCTL_RECVSPACE_MIN 0 +#define SCTPCTL_RECVSPACE_MAX 0xFFFFFFFF +#define SCTPCTL_RECVSPACE_DEFAULT 262144 /* 256k */ + +/* autoasconf: Enable SCTP Auto-ASCONF */ +#define SCTPCTL_AUTOASCONF 3 +#define SCTPCTL_AUTOASCONF_DESC "Enable SCTP Auto-ASCONF" +#define SCTPCTL_AUTOASCONF_MIN 0 +#define SCTPCTL_AUTOASCONF_MAX 1 +#define SCTPCTL_AUTOASCONF_DEFAULT SCTP_DEFAULT_AUTO_ASCONF + +/* ecn_enable: Enable SCTP ECN */ +#define SCTPCTL_ECN_ENABLE 4 +#define SCTPCTL_ECN_ENABLE_DESC "Enable SCTP ECN" +#define SCTPCTL_ECN_ENABLE_MIN 0 +#define SCTPCTL_ECN_ENABLE_MAX 1 +#define SCTPCTL_ECN_ENABLE_DEFAULT 1 + +/* ecn_nonce: Enable SCTP ECN Nonce */ +#define SCTPCTL_ECN_NONCE 5 +#define SCTPCTL_ECN_NONCE_DESC "Enable SCTP ECN Nonce" +#define SCTPCTL_ECN_NONCE_MIN 0 +#define SCTPCTL_ECN_NONCE_MAX 1 +#define SCTPCTL_ECN_NONCE_DEFAULT 0 + +/* strict_sacks: Enable SCTP Strict SACK checking */ +#define SCTPCTL_STRICT_SACKS 6 +#define SCTPCTL_STRICT_SACKS_DESC "Enable SCTP Strict SACK checking" +#define SCTPCTL_STRICT_SACKS_MIN 0 +#define SCTPCTL_STRICT_SACKS_MAX 1 +#define SCTPCTL_STRICT_SACKS_DEFAULT 0 + +/* loopback_nocsum: Enable NO Csum on packets sent on loopback */ +#define SCTPCTL_LOOPBACK_NOCSUM 7 +#define SCTPCTL_LOOPBACK_NOCSUM_DESC "Enable NO Csum on packets sent on loopback" +#define SCTPCTL_LOOPBACK_NOCSUM_MIN 0 +#define SCTPCTL_LOOPBACK_NOCSUM_MAX 1 +#define SCTPCTL_LOOPBACK_NOCSUM_DEFAULT 1 + +/* strict_init: Enable strict INIT/INIT-ACK singleton enforcement */ +#define SCTPCTL_STRICT_INIT 8 +#define SCTPCTL_STRICT_INIT_DESC "Enable strict INIT/INIT-ACK singleton enforcement" +#define SCTPCTL_STRICT_INIT_MIN 0 +#define SCTPCTL_STRICT_INIT_MAX 1 +#define SCTPCTL_STRICT_INIT_DEFAULT 1 + +/* peer_chkoh: Amount to debit peers rwnd per chunk sent */ +#define SCTPCTL_PEER_CHKOH 9 +#define SCTPCTL_PEER_CHKOH_DESC "Amount to debit peers rwnd per chunk sent" +#define SCTPCTL_PEER_CHKOH_MIN 0 +#define SCTPCTL_PEER_CHKOH_MAX 0xFFFFFFFF +#define SCTPCTL_PEER_CHKOH_DEFAULT 0 /* sizeof struct mbuf */ + +/* maxburst: Default max burst for sctp endpoints */ +#define SCTPCTL_MAXBURST 10 +#define SCTPCTL_MAXBURST_DESC "Default max burst for sctp endpoints" +#define SCTPCTL_MAXBURST_MIN 1 +#define SCTPCTL_MAXBURST_MAX 0xFFFFFFFF +#define SCTPCTL_MAXBURST_DEFAULT SCTP_DEF_MAX_BURST + +/* maxchunks: Default max chunks on queue per asoc */ +#define SCTPCTL_MAXCHUNKS 11 +#define SCTPCTL_MAXCHUNKS_DESC "Default max chunks on queue per asoc" +#define SCTPCTL_MAXCHUNKS_MIN 0 +#define SCTPCTL_MAXCHUNKS_MAX 0xFFFFFFFF +#define SCTPCTL_MAXCHUNKS_DEFAULT SCTP_ASOC_MAX_CHUNKS_ON_QUEUE + +/* tcbhashsize: Tuneable for Hash table sizes */ +#define SCTPCTL_TCBHASHSIZE 12 +#define SCTPCTL_TCBHASHSIZE_DESC "Tunable for TCB hash table sizes" +#define SCTPCTL_TCBHASHSIZE_MIN 1 +#define SCTPCTL_TCBHASHSIZE_MAX 0xFFFFFFFF +#define SCTPCTL_TCBHASHSIZE_DEFAULT SCTP_TCBHASHSIZE + +/* pcbhashsize: Tuneable for PCB Hash table sizes */ +#define SCTPCTL_PCBHASHSIZE 13 +#define SCTPCTL_PCBHASHSIZE_DESC "Tunable for PCB hash table sizes" +#define SCTPCTL_PCBHASHSIZE_MIN 1 +#define SCTPCTL_PCBHASHSIZE_MAX 0xFFFFFFFF +#define SCTPCTL_PCBHASHSIZE_DEFAULT SCTP_PCBHASHSIZE + +/* min_split_point: Minimum size when splitting a chunk */ +#define SCTPCTL_MIN_SPLIT_POINT 14 +#define SCTPCTL_MIN_SPLIT_POINT_DESC "Minimum size when splitting a chunk" +#define SCTPCTL_MIN_SPLIT_POINT_MIN 0 +#define SCTPCTL_MIN_SPLIT_POINT_MAX 0xFFFFFFFF +#define SCTPCTL_MIN_SPLIT_POINT_DEFAULT SCTP_DEFAULT_SPLIT_POINT_MIN + +/* chunkscale: Tuneable for Scaling of number of chunks and messages */ +#define SCTPCTL_CHUNKSCALE 15 +#define SCTPCTL_CHUNKSCALE_DESC "Tuneable for Scaling of number of chunks and messages" +#define SCTPCTL_CHUNKSCALE_MIN 1 +#define SCTPCTL_CHUNKSCALE_MAX 0xFFFFFFFF +#define SCTPCTL_CHUNKSCALE_DEFAULT SCTP_CHUNKQUEUE_SCALE + +/* delayed_sack_time: Default delayed SACK timer in msec */ +#define SCTPCTL_DELAYED_SACK_TIME 16 +#define SCTPCTL_DELAYED_SACK_TIME_DESC "Default delayed SACK timer in msec" +#define SCTPCTL_DELAYED_SACK_TIME_MIN 0 +#define SCTPCTL_DELAYED_SACK_TIME_MAX 0xFFFFFFFF +#define SCTPCTL_DELAYED_SACK_TIME_DEFAULT SCTP_RECV_MSEC + +/* sack_freq: Default SACK frequency */ +#define SCTPCTL_SACK_FREQ 17 +#define SCTPCTL_SACK_FREQ_DESC "Default SACK frequency" +#define SCTPCTL_SACK_FREQ_MIN 0 +#define SCTPCTL_SACK_FREQ_MAX 0xFFFFFFFF +#define SCTPCTL_SACK_FREQ_DEFAULT SCTP_DEFAULT_SACK_FREQ + +/* sys_resource: Max number of cached resources in the system */ +#define SCTPCTL_SYS_RESOURCE 18 +#define SCTPCTL_SYS_RESOURCE_DESC "Max number of cached resources in the system" +#define SCTPCTL_SYS_RESOURCE_MIN 0 +#define SCTPCTL_SYS_RESOURCE_MAX 0xFFFFFFFF +#define SCTPCTL_SYS_RESOURCE_DEFAULT SCTP_DEF_SYSTEM_RESC_LIMIT + +/* asoc_resource: Max number of cached resources in an asoc */ +#define SCTPCTL_ASOC_RESOURCE 19 +#define SCTPCTL_ASOC_RESOURCE_DESC "Max number of cached resources in an asoc" +#define SCTPCTL_ASOC_RESOURCE_MIN 0 +#define SCTPCTL_ASOC_RESOURCE_MAX 0xFFFFFFFF +#define SCTPCTL_ASOC_RESOURCE_DEFAULT SCTP_DEF_ASOC_RESC_LIMIT + +/* heartbeat_interval: Default heartbeat interval in msec */ +#define SCTPCTL_HEARTBEAT_INTERVAL 20 +#define SCTPCTL_HEARTBEAT_INTERVAL_DESC "Default heartbeat interval in msec" +#define SCTPCTL_HEARTBEAT_INTERVAL_MIN 0 +#define SCTPCTL_HEARTBEAT_INTERVAL_MAX 0xFFFFFFFF +#define SCTPCTL_HEARTBEAT_INTERVAL_DEFAULT SCTP_HB_DEFAULT_MSEC + +/* pmtu_raise_time: Default PMTU raise timer in sec */ +#define SCTPCTL_PMTU_RAISE_TIME 21 +#define SCTPCTL_PMTU_RAISE_TIME_DESC "Default PMTU raise timer in sec" +#define SCTPCTL_PMTU_RAISE_TIME_MIN 0 +#define SCTPCTL_PMTU_RAISE_TIME_MAX 0xFFFFFFFF +#define SCTPCTL_PMTU_RAISE_TIME_DEFAULT SCTP_DEF_PMTU_RAISE_SEC + +/* shutdown_guard_time: Default shutdown guard timer in sec */ +#define SCTPCTL_SHUTDOWN_GUARD_TIME 22 +#define SCTPCTL_SHUTDOWN_GUARD_TIME_DESC "Default shutdown guard timer in sec" +#define SCTPCTL_SHUTDOWN_GUARD_TIME_MIN 0 +#define SCTPCTL_SHUTDOWN_GUARD_TIME_MAX 0xFFFFFFFF +#define SCTPCTL_SHUTDOWN_GUARD_TIME_DEFAULT SCTP_DEF_MAX_SHUTDOWN_SEC + +/* secret_lifetime: Default secret lifetime in sec */ +#define SCTPCTL_SECRET_LIFETIME 23 +#define SCTPCTL_SECRET_LIFETIME_DESC "Default secret lifetime in sec" +#define SCTPCTL_SECRET_LIFETIME_MIN 0 +#define SCTPCTL_SECRET_LIFETIME_MAX 0xFFFFFFFF +#define SCTPCTL_SECRET_LIFETIME_DEFAULT SCTP_DEFAULT_SECRET_LIFE_SEC + +/* rto_max: Default maximum retransmission timeout in msec */ +#define SCTPCTL_RTO_MAX 24 +#define SCTPCTL_RTO_MAX_DESC "Default maximum retransmission timeout in msec" +#define SCTPCTL_RTO_MAX_MIN 0 +#define SCTPCTL_RTO_MAX_MAX 0xFFFFFFFF +#define SCTPCTL_RTO_MAX_DEFAULT SCTP_RTO_UPPER_BOUND + +/* rto_min: Default minimum retransmission timeout in msec */ +#define SCTPCTL_RTO_MIN 25 +#define SCTPCTL_RTO_MIN_DESC "Default minimum retransmission timeout in msec" +#define SCTPCTL_RTO_MIN_MIN 0 +#define SCTPCTL_RTO_MIN_MAX 0xFFFFFFFF +#define SCTPCTL_RTO_MIN_DEFAULT SCTP_RTO_LOWER_BOUND + +/* rto_initial: Default initial retransmission timeout in msec */ +#define SCTPCTL_RTO_INITIAL 26 +#define SCTPCTL_RTO_INITIAL_DESC "Default initial retransmission timeout in msec" +#define SCTPCTL_RTO_INITIAL_MIN 0 +#define SCTPCTL_RTO_INITIAL_MAX 0xFFFFFFFF +#define SCTPCTL_RTO_INITIAL_DEFAULT SCTP_RTO_INITIAL + +/* init_rto_max: Default maximum retransmission timeout during association setup in msec */ +#define SCTPCTL_INIT_RTO_MAX 27 +#define SCTPCTL_INIT_RTO_MAX_DESC "Default maximum retransmission timeout during association setup in msec" +#define SCTPCTL_INIT_RTO_MAX_MIN 0 +#define SCTPCTL_INIT_RTO_MAX_MAX 0xFFFFFFFF +#define SCTPCTL_INIT_RTO_MAX_DEFAULT SCTP_RTO_UPPER_BOUND + +/* valid_cookie_life: Default cookie lifetime in sec */ +#define SCTPCTL_VALID_COOKIE_LIFE 28 +#define SCTPCTL_VALID_COOKIE_LIFE_DESC "Default cookie lifetime in sec" +#define SCTPCTL_VALID_COOKIE_LIFE_MIN 0 +#define SCTPCTL_VALID_COOKIE_LIFE_MAX 0xFFFFFFFF +#define SCTPCTL_VALID_COOKIE_LIFE_DEFAULT SCTP_DEFAULT_COOKIE_LIFE + +/* init_rtx_max: Default maximum number of retransmission for INIT chunks */ +#define SCTPCTL_INIT_RTX_MAX 29 +#define SCTPCTL_INIT_RTX_MAX_DESC "Default maximum number of retransmission for INIT chunks" +#define SCTPCTL_INIT_RTX_MAX_MIN 0 +#define SCTPCTL_INIT_RTX_MAX_MAX 0xFFFFFFFF +#define SCTPCTL_INIT_RTX_MAX_DEFAULT SCTP_DEF_MAX_INIT + +/* assoc_rtx_max: Default maximum number of retransmissions per association */ +#define SCTPCTL_ASSOC_RTX_MAX 30 +#define SCTPCTL_ASSOC_RTX_MAX_DESC "Default maximum number of retransmissions per association" +#define SCTPCTL_ASSOC_RTX_MAX_MIN 0 +#define SCTPCTL_ASSOC_RTX_MAX_MAX 0xFFFFFFFF +#define SCTPCTL_ASSOC_RTX_MAX_DEFAULT SCTP_DEF_MAX_SEND + +/* path_rtx_max: Default maximum of retransmissions per path */ +#define SCTPCTL_PATH_RTX_MAX 31 +#define SCTPCTL_PATH_RTX_MAX_DESC "Default maximum of retransmissions per path" +#define SCTPCTL_PATH_RTX_MAX_MIN 0 +#define SCTPCTL_PATH_RTX_MAX_MAX 0xFFFFFFFF +#define SCTPCTL_PATH_RTX_MAX_DEFAULT SCTP_DEF_MAX_PATH_RTX + +/* add_more_on_output: When space wise is it worthwhile to try to add more to a socket send buffer */ +#define SCTPCTL_ADD_MORE_ON_OUTPUT 32 +#define SCTPCTL_ADD_MORE_ON_OUTPUT_DESC "When space wise is it worthwhile to try to add more to a socket send buffer" +#define SCTPCTL_ADD_MORE_ON_OUTPUT_MIN 0 +#define SCTPCTL_ADD_MORE_ON_OUTPUT_MAX 0xFFFFFFFF +#define SCTPCTL_ADD_MORE_ON_OUTPUT_DEFAULT SCTP_DEFAULT_ADD_MORE + +/* outgoing_streams: Default number of outgoing streams */ +#define SCTPCTL_OUTGOING_STREAMS 33 +#define SCTPCTL_OUTGOING_STREAMS_DESC "Default number of outgoing streams" +#define SCTPCTL_OUTGOING_STREAMS_MIN 1 +#define SCTPCTL_OUTGOING_STREAMS_MAX 65535 +#define SCTPCTL_OUTGOING_STREAMS_DEFAULT SCTP_OSTREAM_INITIAL + +/* cmt_on_off: CMT on/off flag */ +#define SCTPCTL_CMT_ON_OFF 34 +#define SCTPCTL_CMT_ON_OFF_DESC "CMT on/off flag" +#define SCTPCTL_CMT_ON_OFF_MIN 0 +#define SCTPCTL_CMT_ON_OFF_MAX 1 +#define SCTPCTL_CMT_ON_OFF_DEFAULT 0 + +/* cwnd_maxburst: Use a CWND adjusting maxburst */ +#define SCTPCTL_CWND_MAXBURST 35 +#define SCTPCTL_CWND_MAXBURST_DESC "Use a CWND adjusting maxburst" +#define SCTPCTL_CWND_MAXBURST_MIN 0 +#define SCTPCTL_CWND_MAXBURST_MAX 1 +#define SCTPCTL_CWND_MAXBURST_DEFAULT 1 + +/* early_fast_retran: Early Fast Retransmit with timer */ +#define SCTPCTL_EARLY_FAST_RETRAN 36 +#define SCTPCTL_EARLY_FAST_RETRAN_DESC "Early Fast Retransmit with timer" +#define SCTPCTL_EARLY_FAST_RETRAN_MIN 0 +#define SCTPCTL_EARLY_FAST_RETRAN_MAX 0xFFFFFFFF +#define SCTPCTL_EARLY_FAST_RETRAN_DEFAULT 0 + +/* use_rttvar_congctrl: Use Congestion Control via rtt variation */ +#define SCTPCTL_USE_RTTVAR_CONGCTRL 37 +#define SCTPCTL_USE_RTTVAR_CONGCTRL_DESC "Use Congestion Control via rtt variation" +#define SCTPCTL_USE_RTTVAR_CONGCTRL_MIN 0 +#define SCTPCTL_USE_RTTVAR_CONGCTRL_MAX 1 +#define SCTPCTL_USE_RTTVAR_CONGCTRL_DEFAULT 0 /* UNUSED?? */ + +/* deadlock_detect: SMP Deadlock detection on/off */ +#define SCTPCTL_DEADLOCK_DETECT 38 +#define SCTPCTL_DEADLOCK_DETECT_DESC "SMP Deadlock detection on/off" +#define SCTPCTL_DEADLOCK_DETECT_MIN 0 +#define SCTPCTL_DEADLOCK_DETECT_MAX 1 +#define SCTPCTL_DEADLOCK_DETECT_DEFAULT 0 + +/* early_fast_retran_msec: Early Fast Retransmit minimum timer value */ +#define SCTPCTL_EARLY_FAST_RETRAN_MSEC 39 +#define SCTPCTL_EARLY_FAST_RETRAN_MSEC_DESC "Early Fast Retransmit minimum timer value" +#define SCTPCTL_EARLY_FAST_RETRAN_MSEC_MIN 0 +#define SCTPCTL_EARLY_FAST_RETRAN_MSEC_MAX 0xFFFFFFFF +#define SCTPCTL_EARLY_FAST_RETRAN_MSEC_DEFAULT SCTP_MINFR_MSEC_TIMER + +/* asconf_auth_nochk: Disable SCTP ASCONF AUTH requirement */ +#define SCTPCTL_ASCONF_AUTH_NOCHK 40 +#define SCTPCTL_ASCONF_AUTH_NOCHK_DESC "Disable SCTP ASCONF AUTH requirement" +#define SCTPCTL_ASCONF_AUTH_NOCHK_MIN 0 +#define SCTPCTL_ASCONF_AUTH_NOCHK_MAX 1 +#define SCTPCTL_ASCONF_AUTH_NOCHK_DEFAULT 0 + +/* auth_disable: Disable SCTP AUTH function */ +#define SCTPCTL_AUTH_DISABLE 41 +#define SCTPCTL_AUTH_DISABLE_DESC "Disable SCTP AUTH function" +#define SCTPCTL_AUTH_DISABLE_MIN 0 +#define SCTPCTL_AUTH_DISABLE_MAX 1 +#define SCTPCTL_AUTH_DISABLE_DEFAULT 0 + +/* nat_friendly: SCTP NAT friendly operation */ +#define SCTPCTL_NAT_FRIENDLY 42 +#define SCTPCTL_NAT_FRIENDLY_DESC "SCTP NAT friendly operation" +#define SCTPCTL_NAT_FRIENDLY_MIN 0 +#define SCTPCTL_NAT_FRIENDLY_MAX 1 +#define SCTPCTL_NAT_FRIENDLY_DEFAULT 1 + +/* abc_l_var: SCTP ABC max increase per SACK (L) */ +#define SCTPCTL_ABC_L_VAR 43 +#define SCTPCTL_ABC_L_VAR_DESC "SCTP ABC max increase per SACK (L)" +#define SCTPCTL_ABC_L_VAR_MIN 0 +#define SCTPCTL_ABC_L_VAR_MAX 0xFFFFFFFF +#define SCTPCTL_ABC_L_VAR_DEFAULT 1 + +/* max_chained_mbufs: Default max number of small mbufs on a chain */ +#define SCTPCTL_MAX_CHAINED_MBUFS 44 +#define SCTPCTL_MAX_CHAINED_MBUFS_DESC "Default max number of small mbufs on a chain" +#define SCTPCTL_MAX_CHAINED_MBUFS_MIN 0 +#define SCTPCTL_MAX_CHAINED_MBUFS_MAX 0xFFFFFFFF +#define SCTPCTL_MAX_CHAINED_MBUFS_DEFAULT SCTP_DEFAULT_MBUFS_IN_CHAIN + +/* cmt_use_dac: CMT DAC on/off flag */ +#define SCTPCTL_CMT_USE_DAC 45 +#define SCTPCTL_CMT_USE_DAC_DESC "CMT DAC on/off flag" +#define SCTPCTL_CMT_USE_DAC_MIN 0 +#define SCTPCTL_CMT_USE_DAC_MAX 1 +#define SCTPCTL_CMT_USE_DAC_DEFAULT 0 + +/* do_sctp_drain: Should SCTP respond to the drain calls */ +#define SCTPCTL_DO_SCTP_DRAIN 46 +#define SCTPCTL_DO_SCTP_DRAIN_DESC "Should SCTP respond to the drain calls" +#define SCTPCTL_DO_SCTP_DRAIN_MIN 0 +#define SCTPCTL_DO_SCTP_DRAIN_MAX 1 +#define SCTPCTL_DO_SCTP_DRAIN_DEFAULT 1 + +/* hb_max_burst: Confirmation Heartbeat max burst? */ +#define SCTPCTL_HB_MAX_BURST 47 +#define SCTPCTL_HB_MAX_BURST_DESC "Confirmation Heartbeat max burst?" +#define SCTPCTL_HB_MAX_BURST_MIN 1 +#define SCTPCTL_HB_MAX_BURST_MAX 0xFFFFFFFF +#define SCTPCTL_HB_MAX_BURST_DEFAULT SCTP_DEF_MAX_BURST + +/* abort_at_limit: When one-2-one hits qlimit abort */ +#define SCTPCTL_ABORT_AT_LIMIT 48 +#define SCTPCTL_ABORT_AT_LIMIT_DESC "When one-2-one hits qlimit abort" +#define SCTPCTL_ABORT_AT_LIMIT_MIN 0 +#define SCTPCTL_ABORT_AT_LIMIT_MAX 1 +#define SCTPCTL_ABORT_AT_LIMIT_DEFAULT 0 + +/* strict_data_order: Enforce strict data ordering, abort if control inside data */ +#define SCTPCTL_STRICT_DATA_ORDER 49 +#define SCTPCTL_STRICT_DATA_ORDER_DESC "Enforce strict data ordering, abort if control inside data" +#define SCTPCTL_STRICT_DATA_ORDER_MIN 0 +#define SCTPCTL_STRICT_DATA_ORDER_MAX 1 +#define SCTPCTL_STRICT_DATA_ORDER_DEFAULT 0 + +/* debug: Configure debug output */ +#define SCTPCTL_DEBUG 50 +#define SCTPCTL_DEBUG_DESC "Configure debug output" +#define SCTPCTL_DEBUG_MIN 0 +#define SCTPCTL_DEBUG_MAX 0xFFFFFFFF +#define SCTPCTL_DEBUG_DEFAULT 0 + +#ifdef SCTP_DEBUG +#define SCTPCTL_MAXID 50 +#else +#define SCTPCTL_MAXID 49 +#endif + +/* + * Names for SCTP sysctl objects variables. + * Must match the OIDs above. + */ +#ifdef SCTP_DEBUG +#define SCTPCTL_NAMES { \ + { 0, 0 }, \ + { "sendspace", CTLTYPE_INT }, \ + { "recvspace", CTLTYPE_INT }, \ + { "autoasconf", CTLTYPE_INT }, \ + { "ecn_enable", CTLTYPE_INT }, \ + { "ecn_nonce", CTLTYPE_INT }, \ + { "strict_sack", CTLTYPE_INT }, \ + { "looback_nocsum", CTLTYPE_INT }, \ + { "strict_init", CTLTYPE_INT }, \ + { "peer_chkoh", CTLTYPE_INT }, \ + { "maxburst", CTLTYPE_INT }, \ + { "maxchunks", CTLTYPE_INT }, \ + { "delayed_sack_time", CTLTYPE_INT }, \ + { "sack_freq", CTLTYPE_INT }, \ + { "heartbeat_interval", CTLTYPE_INT }, \ + { "pmtu_raise_time", CTLTYPE_INT }, \ + { "shutdown_guard_time", CTLTYPE_INT }, \ + { "secret_lifetime", CTLTYPE_INT }, \ + { "rto_max", CTLTYPE_INT }, \ + { "rto_min", CTLTYPE_INT }, \ + { "rto_initial", CTLTYPE_INT }, \ + { "init_rto_max", CTLTYPE_INT }, \ + { "valid_cookie_life", CTLTYPE_INT }, \ + { "init_rtx_max", CTLTYPE_INT }, \ + { "assoc_rtx_max", CTLTYPE_INT }, \ + { "path_rtx_max", CTLTYPE_INT }, \ + { "outgoing_streams", CTLTYPE_INT }, \ + { "cmt_on_off", CTLTYPE_INT }, \ + { "cwnd_maxburst", CTLTYPE_INT }, \ + { "early_fast_retran", CTLTYPE_INT }, \ + { "use_rttvar_congctrl", CTLTYPE_INT }, \ + { "deadlock_detect", CTLTYPE_INT }, \ + { "early_fast_retran_msec", CTLTYPE_INT }, \ + { "asconf_auth_nochk", CTLTYPE_INT }, \ + { "auth_disable", CTLTYPE_INT }, \ + { "nat_friendly", CTLTYPE_INT }, \ + { "abc_l_var", CTLTYPE_INT }, \ + { "max_mbuf_chain", CTLTYPE_INT }, \ + { "cmt_use_dac", CTLTYPE_INT }, \ + { "do_sctp_drain", CTLTYPE_INT }, \ + { "warm_crc_table", CTLTYPE_INT }, \ + { "abort_at_limit", CTLTYPE_INT }, \ + { "strict_data_order", CTLTYPE_INT }, \ + { "tcbhashsize", CTLTYPE_INT }, \ + { "pcbhashsize", CTLTYPE_INT }, \ + { "chunkscale", CTLTYPE_INT }, \ + { "min_split_point", CTLTYPE_INT }, \ + { "add_more_on_output", CTLTYPE_INT }, \ + { "sys_resource", CTLTYPE_INT }, \ + { "asoc_resource", CTLTYPE_INT }, \ + { "debug", CTLTYPE_INT }, \ +} +#else +#define SCTPCTL_NAMES { \ + { 0, 0 }, \ + { "sendspace", CTLTYPE_INT }, \ + { "recvspace", CTLTYPE_INT }, \ + { "autoasconf", CTLTYPE_INT }, \ + { "ecn_enable", CTLTYPE_INT }, \ + { "ecn_nonce", CTLTYPE_INT }, \ + { "strict_sack", CTLTYPE_INT }, \ + { "looback_nocsum", CTLTYPE_INT }, \ + { "strict_init", CTLTYPE_INT }, \ + { "peer_chkoh", CTLTYPE_INT }, \ + { "maxburst", CTLTYPE_INT }, \ + { "maxchunks", CTLTYPE_INT }, \ + { "delayed_sack_time", CTLTYPE_INT }, \ + { "sack_freq", CTLTYPE_INT }, \ + { "heartbeat_interval", CTLTYPE_INT }, \ + { "pmtu_raise_time", CTLTYPE_INT }, \ + { "shutdown_guard_time", CTLTYPE_INT }, \ + { "secret_lifetime", CTLTYPE_INT }, \ + { "rto_max", CTLTYPE_INT }, \ + { "rto_min", CTLTYPE_INT }, \ + { "rto_initial", CTLTYPE_INT }, \ + { "init_rto_max", CTLTYPE_INT }, \ + { "valid_cookie_life", CTLTYPE_INT }, \ + { "init_rtx_max", CTLTYPE_INT }, \ + { "assoc_rtx_max", CTLTYPE_INT }, \ + { "path_rtx_max", CTLTYPE_INT }, \ + { "outgoing_streams", CTLTYPE_INT }, \ + { "cmt_on_off", CTLTYPE_INT }, \ + { "cwnd_maxburst", CTLTYPE_INT }, \ + { "early_fast_retran", CTLTYPE_INT }, \ + { "use_rttvar_congctrl", CTLTYPE_INT }, \ + { "deadlock_detect", CTLTYPE_INT }, \ + { "early_fast_retran_msec", CTLTYPE_INT }, \ + { "asconf_auth_nochk", CTLTYPE_INT }, \ + { "auth_disable", CTLTYPE_INT }, \ + { "nat_friendly", CTLTYPE_INT }, \ + { "abc_l_var", CTLTYPE_INT }, \ + { "max_mbuf_chain", CTLTYPE_INT }, \ + { "cmt_use_dac", CTLTYPE_INT }, \ + { "do_sctp_drain", CTLTYPE_INT }, \ + { "warm_crc_table", CTLTYPE_INT }, \ + { "abort_at_limit", CTLTYPE_INT }, \ + { "strict_data_order", CTLTYPE_INT }, \ + { "tcbhashsize", CTLTYPE_INT }, \ + { "pcbhashsize", CTLTYPE_INT }, \ + { "chunkscale", CTLTYPE_INT }, \ + { "min_split_point", CTLTYPE_INT }, \ + { "add_more_on_output", CTLTYPE_INT }, \ + { "sys_resource", CTLTYPE_INT }, \ + { "asoc_resource", CTLTYPE_INT }, \ +} +#endif + + +#if defined(_KERNEL) + +/* + * variable definitions + */ +extern uint32_t sctp_sendspace; +extern uint32_t sctp_recvspace; +extern uint32_t sctp_auto_asconf; +extern uint32_t sctp_ecn_enable; +extern uint32_t sctp_ecn_nonce; +extern uint32_t sctp_strict_sacks; +extern uint32_t sctp_no_csum_on_loopback; +extern uint32_t sctp_strict_init; +extern uint32_t sctp_peer_chunk_oh; +extern uint32_t sctp_max_burst_default; +extern uint32_t sctp_max_chunks_on_queue; +extern uint32_t sctp_hashtblsize; +extern uint32_t sctp_pcbtblsize; +extern uint32_t sctp_min_split_point; +extern uint32_t sctp_chunkscale; +extern uint32_t sctp_delayed_sack_time_default; +extern uint32_t sctp_sack_freq_default; +extern uint32_t sctp_system_free_resc_limit; +extern uint32_t sctp_asoc_free_resc_limit; +extern uint32_t sctp_heartbeat_interval_default; +extern uint32_t sctp_pmtu_raise_time_default; +extern uint32_t sctp_shutdown_guard_time_default; +extern uint32_t sctp_secret_lifetime_default; +extern uint32_t sctp_rto_max_default; +extern uint32_t sctp_rto_min_default; +extern uint32_t sctp_rto_initial_default; +extern uint32_t sctp_init_rto_max_default; +extern uint32_t sctp_valid_cookie_life_default; +extern uint32_t sctp_init_rtx_max_default; +extern uint32_t sctp_assoc_rtx_max_default; +extern uint32_t sctp_path_rtx_max_default; +extern uint32_t sctp_add_more_threshold; +extern uint32_t sctp_nr_outgoing_streams_default; +extern uint32_t sctp_cmt_on_off; +extern uint32_t sctp_use_cwnd_based_maxburst; +extern uint32_t sctp_early_fr; +extern uint32_t sctp_use_rttvar_cc; +extern uint32_t sctp_says_check_for_deadlock; +extern uint32_t sctp_early_fr_msec; +extern uint32_t sctp_asconf_auth_nochk; +extern uint32_t sctp_auth_disable; +extern uint32_t sctp_nat_friendly; +extern uint32_t sctp_L2_abc_variable; +extern uint32_t sctp_mbuf_threshold_count; +extern uint32_t sctp_cmt_use_dac; +extern uint32_t sctp_do_drain; +extern uint32_t sctp_hb_maxburst; +extern uint32_t sctp_abort_if_one_2_one_hits_limit; +extern uint32_t sctp_strict_data_order; + +#if defined(SCTP_DEBUG) +extern uint32_t sctp_debug_on; + +#endif + +extern struct sctpstat sctpstat; + + +#ifdef SYSCTL_DECL +SYSCTL_DECL(_net_inet_sctp); +#endif + +#endif /* _KERNEL */ +#endif /* __sctp_sysctl_h__ */ diff --git a/sys/netinet/sctp_timer.c b/sys/netinet/sctp_timer.c index 62de925..1d6902e 100644 --- a/sys/netinet/sctp_timer.c +++ b/sys/netinet/sctp_timer.c @@ -40,6 +40,7 @@ __FBSDID("$FreeBSD$"); #include <netinet6/sctp6_var.h> #endif #include <netinet/sctp_var.h> +#include <netinet/sctp_sysctl.h> #include <netinet/sctp_timer.h> #include <netinet/sctputil.h> #include <netinet/sctp_output.h> @@ -51,13 +52,6 @@ __FBSDID("$FreeBSD$"); #include <netinet/sctp_uio.h> -#ifdef SCTP_DEBUG -extern uint32_t sctp_debug_on; - -#endif /* SCTP_DEBUG */ - - -extern unsigned int sctp_early_fr_msec; void sctp_early_fr_timer(struct sctp_inpcb *inp, @@ -224,6 +218,7 @@ sctp_threshold_management(struct sctp_inpcb *inp, struct sctp_tcb *stcb, if (net->dest_state & SCTP_ADDR_REACHABLE) { net->dest_state &= ~SCTP_ADDR_REACHABLE; net->dest_state |= SCTP_ADDR_NOT_REACHABLE; + net->dest_state &= ~SCTP_ADDR_REQ_PRIMARY; if (net == stcb->asoc.primary_destination) { net->dest_state |= SCTP_ADDR_WAS_PRIMARY; } @@ -368,6 +363,10 @@ sctp_find_alternate_net(struct sctp_tcb *stcb, if (sin6->sin6_family == AF_INET6) { (void)sa6_recoverscope(sin6); } + if (alt->ro._s_addr) { + sctp_free_ifa(alt->ro._s_addr); + alt->ro._s_addr = NULL; + } alt->src_addr_selected = 0; } if ( @@ -441,8 +440,6 @@ sctp_backoff_on_timeout(struct sctp_tcb *stcb, } } -extern int sctp_peer_chunk_oh; - static int sctp_mark_all_for_resend(struct sctp_tcb *stcb, struct sctp_nets *net, @@ -635,6 +632,11 @@ sctp_mark_all_for_resend(struct sctp_tcb *stcb, } if (stcb->asoc.total_flight_count > 0) stcb->asoc.total_flight_count--; + if (chk->rec.data.chunk_was_revoked) { + /* deflate the cwnd */ + chk->whoTo->cwnd -= chk->book_size; + chk->rec.data.chunk_was_revoked = 0; + } chk->sent = SCTP_DATAGRAM_RESEND; SCTP_STAT_INCR(sctps_markedretrans); net->marked_retrans++; @@ -934,6 +936,10 @@ sctp_t3rxt_timer(struct sctp_inpcb *inp, (struct sockaddr *)NULL, alt) == 0) { net->dest_state |= SCTP_ADDR_WAS_PRIMARY; + if (net->ro._s_addr) { + sctp_free_ifa(net->ro._s_addr); + net->ro._s_addr = NULL; + } net->src_addr_selected = 0; } } @@ -1387,6 +1393,15 @@ sctp_heartbeat_timer(struct sctp_inpcb *inp, struct sctp_tcb *stcb, { if (net) { if (net->hb_responded == 0) { + if (net->ro._s_addr) { + /* + * Invalidate the src address if we did not + * get a response last time. + */ + sctp_free_ifa(net->ro._s_addr); + net->ro._s_addr = NULL; + net->src_addr_selected = 0; + } sctp_backoff_on_timeout(stcb, net, 1, 0); } /* Zero PBA, if it needs it */ @@ -1415,10 +1430,18 @@ sctp_heartbeat_timer(struct sctp_inpcb *inp, struct sctp_tcb *stcb, if ((net->dest_state & SCTP_ADDR_UNCONFIRMED) && (net->dest_state & SCTP_ADDR_REACHABLE)) { cnt_sent++; + if (net->hb_responded == 0) { + /* Did we respond last time? */ + if (net->ro._s_addr) { + sctp_free_ifa(net->ro._s_addr); + net->ro._s_addr = NULL; + net->src_addr_selected = 0; + } + } if (sctp_send_hb(stcb, 1, net) == 0) { break; } - if (cnt_sent >= stcb->asoc.max_burst) + if (cnt_sent >= sctp_hb_maxburst) break; } } @@ -1598,6 +1621,7 @@ void sctp_iterator_timer(struct sctp_iterator *it) { int iteration_count = 0; + int inp_skip = 0; /* * only one iterator can run at a time. This is the only way we can @@ -1610,7 +1634,7 @@ sctp_iterator_timer(struct sctp_iterator *it) done_with_iterator: SCTP_ITERATOR_UNLOCK(); SCTP_INP_INFO_WLOCK(); - LIST_REMOVE(it, sctp_nxt_itr); + TAILQ_REMOVE(&sctppcbinfo.iteratorhead, it, sctp_nxt_itr); /* stopping the callout is not needed, in theory */ SCTP_INP_INFO_WUNLOCK(); SCTP_OS_TIMER_STOP(&it->tmr.timer); @@ -1650,14 +1674,24 @@ select_a_new_ep: SCTP_INP_WUNLOCK(it->inp); SCTP_INP_RLOCK(it->inp); /* now go through each assoc which is in the desired state */ + if (it->done_current_ep == 0) { + if (it->function_inp != NULL) + inp_skip = (*it->function_inp) (it->inp, it->pointer, it->val); + it->done_current_ep = 1; + } if (it->stcb == NULL) { /* run the per instance function */ - if (it->function_inp != NULL) - (*it->function_inp) (it->inp, it->pointer, it->val); - it->stcb = LIST_FIRST(&it->inp->sctp_asoc_list); } SCTP_INP_RUNLOCK(it->inp); + if ((inp_skip) || it->stcb == NULL) { + if (it->function_inp_end != NULL) { + inp_skip = (*it->function_inp_end) (it->inp, + it->pointer, + it->val); + } + goto no_stcb; + } if ((it->stcb) && (it->stcb->asoc.stcb_starting_point_for_iterator == it)) { it->stcb->asoc.stcb_starting_point_for_iterator = NULL; @@ -1695,8 +1729,17 @@ select_a_new_ep: SCTP_TCB_UNLOCK(it->stcb); next_assoc: it->stcb = LIST_NEXT(it->stcb, sctp_tcblist); + if (it->stcb == NULL) { + if (it->function_inp_end != NULL) { + inp_skip = (*it->function_inp_end) (it->inp, + it->pointer, + it->val); + } + } } +no_stcb: /* done with all assocs on this endpoint, move on to next endpoint */ + it->done_current_ep = 0; SCTP_INP_WLOCK(it->inp); it->inp->inp_starting_point_for_iterator = NULL; SCTP_INP_WUNLOCK(it->inp); diff --git a/sys/netinet/sctp_uio.h b/sys/netinet/sctp_uio.h index 89a81cc..1d60215 100644 --- a/sys/netinet/sctp_uio.h +++ b/sys/netinet/sctp_uio.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2001-2006, Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2001-2007, Cisco Systems, Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -36,8 +36,6 @@ __FBSDID("$FreeBSD$"); #define __sctp_uio_h__ - - #if ! defined(_KERNEL) #include <stdint.h> #endif @@ -409,7 +407,6 @@ struct sctp_paddrparams { uint32_t spp_hbinterval; uint16_t spp_pathmaxrxt; uint32_t spp_pathmtu; - uint32_t spp_sackdelay; uint32_t spp_flags; uint32_t spp_ipv6_flowlabel; uint8_t spp_ipv4_tos; @@ -451,6 +448,8 @@ struct sctp_assocparams { uint32_t sasoc_peer_rwnd; uint32_t sasoc_local_rwnd; uint32_t sasoc_cookie_life; + uint32_t sasoc_sack_delay; + uint32_t sasoc_sack_freq; }; struct sctp_setprim { @@ -890,10 +889,10 @@ struct sctpstat { #define SCTP_STAT_DECR_GAUGE32(_x) SCTP_STAT_DECR(_x) union sctp_sockstore { -#ifdef AF_INET +#if defined(INET) || !defined(_KERNEL) struct sockaddr_in sin; #endif -#ifdef AF_INET6 +#if defined(INET6) || !defined(_KERNEL) struct sockaddr_in6 sin6; #endif struct sockaddr sa; @@ -993,13 +992,12 @@ sctp_sorecvmsg(struct socket *so, /* * API system calls */ - #if !(defined(_KERNEL)) __BEGIN_DECLS int sctp_peeloff __P((int, sctp_assoc_t)); int sctp_bindx __P((int, struct sockaddr *, int, int)); -int sctp_connectx __P((int, const struct sockaddr *, int)); +int sctp_connectx __P((int, const struct sockaddr *, int, sctp_assoc_t *)); int sctp_getaddrlen __P((sa_family_t)); int sctp_getpaddrs __P((int, sctp_assoc_t, struct sockaddr **)); void sctp_freepaddrs __P((struct sockaddr *)); diff --git a/sys/netinet/sctp_usrreq.c b/sys/netinet/sctp_usrreq.c index 1e0d5b3..550a8b3 100644 --- a/sys/netinet/sctp_usrreq.c +++ b/sys/netinet/sctp_usrreq.c @@ -37,92 +37,18 @@ __FBSDID("$FreeBSD$"); #include <netinet/sctp_pcb.h> #include <netinet/sctp_header.h> #include <netinet/sctp_var.h> +#include <netinet/sctp_sysctl.h> #include <netinet/sctp_output.h> #include <netinet/sctp_bsd_addr.h> #include <netinet/sctp_uio.h> #include <netinet/sctp_asconf.h> #include <netinet/sctputil.h> #include <netinet/sctp_indata.h> -#include <netinet/sctp_asconf.h> #include <netinet/sctp_timer.h> #include <netinet/sctp_auth.h> -/* - * sysctl tunable variables - */ -int sctp_sendspace = (128 * 1024); -int sctp_recvspace = 128 * (1024 + -#ifdef INET6 - sizeof(struct sockaddr_in6) -#else - sizeof(struct sockaddr_in) -#endif -); -int sctp_mbuf_threshold_count = SCTP_DEFAULT_MBUFS_IN_CHAIN; -int sctp_auto_asconf = SCTP_DEFAULT_AUTO_ASCONF; -int sctp_ecn_enable = 1; -int sctp_ecn_nonce = 0; -int sctp_strict_sacks = 0; -int sctp_no_csum_on_loopback = 1; -int sctp_strict_init = 1; -int sctp_abort_if_one_2_one_hits_limit = 0; -int sctp_strict_data_order = 0; - -int sctp_peer_chunk_oh = sizeof(struct mbuf); -int sctp_max_burst_default = SCTP_DEF_MAX_BURST; -int sctp_use_cwnd_based_maxburst = 1; -int sctp_do_drain = 1; -int sctp_warm_the_crc32_table = 0; - -unsigned int sctp_max_chunks_on_queue = SCTP_ASOC_MAX_CHUNKS_ON_QUEUE; -unsigned int sctp_delayed_sack_time_default = SCTP_RECV_MSEC; -unsigned int sctp_heartbeat_interval_default = SCTP_HB_DEFAULT_MSEC; -unsigned int sctp_pmtu_raise_time_default = SCTP_DEF_PMTU_RAISE_SEC; -unsigned int sctp_shutdown_guard_time_default = SCTP_DEF_MAX_SHUTDOWN_SEC; -unsigned int sctp_secret_lifetime_default = SCTP_DEFAULT_SECRET_LIFE_SEC; -unsigned int sctp_rto_max_default = SCTP_RTO_UPPER_BOUND; -unsigned int sctp_rto_min_default = SCTP_RTO_LOWER_BOUND; -unsigned int sctp_rto_initial_default = SCTP_RTO_INITIAL; -unsigned int sctp_init_rto_max_default = SCTP_RTO_UPPER_BOUND; -unsigned int sctp_valid_cookie_life_default = SCTP_DEFAULT_COOKIE_LIFE; -unsigned int sctp_init_rtx_max_default = SCTP_DEF_MAX_INIT; -unsigned int sctp_assoc_rtx_max_default = SCTP_DEF_MAX_SEND; -unsigned int sctp_path_rtx_max_default = SCTP_DEF_MAX_PATH_RTX; -unsigned int sctp_nr_outgoing_streams_default = SCTP_OSTREAM_INITIAL; -unsigned int sctp_add_more_threshold = SCTP_DEFAULT_ADD_MORE; - -uint32_t sctp_asoc_free_resc_limit = SCTP_DEF_ASOC_RESC_LIMIT; -uint32_t sctp_system_free_resc_limit = SCTP_DEF_SYSTEM_RESC_LIMIT; - -int sctp_min_split_point = SCTP_DEFAULT_SPLIT_POINT_MIN; -int sctp_pcbtblsize = SCTP_PCBHASHSIZE; -int sctp_hashtblsize = SCTP_TCBHASHSIZE; -int sctp_chunkscale = SCTP_CHUNKQUEUE_SCALE; - -unsigned int sctp_cmt_on_off = 0; -unsigned int sctp_cmt_sockopt_on_off = 0; -unsigned int sctp_cmt_use_dac = 0; - -int sctp_L2_abc_variable = 1; -unsigned int sctp_early_fr = 0; -unsigned int sctp_early_fr_msec = SCTP_MINFR_MSEC_TIMER; -unsigned int sctp_use_rttvar_cc = 0; -int sctp_says_check_for_deadlock = 0; -unsigned int sctp_asconf_auth_nochk = 0; -unsigned int sctp_nat_friendly = 1; -unsigned int sctp_auth_disable = 0; -unsigned int sctp_auth_random_len = SCTP_AUTH_RANDOM_SIZE_DEFAULT; -unsigned int sctp_auth_hmac_id_default = SCTP_AUTH_HMAC_ID_SHA1; -struct sctpstat sctpstat; - -#ifdef SCTP_DEBUG -extern uint32_t sctp_debug_on; - -#endif /* SCTP_DEBUG */ - - void sctp_init(void) { @@ -336,6 +262,11 @@ sctp_notify(struct sctp_inpcb *inp, if ((errno == EHOSTUNREACH) || (errno == EHOSTDOWN)) { if (net->dest_state & SCTP_ADDR_REACHABLE) { /* Ok that destination is NOT reachable */ + printf("ICMP (thresh %d/%d) takes interface %p down\n", + net->error_count, + net->failure_threshold, + net); + net->dest_state &= ~SCTP_ADDR_REACHABLE; net->dest_state |= SCTP_ADDR_NOT_REACHABLE; net->error_count = net->failure_threshold + 1; @@ -384,7 +315,9 @@ sctp_ctlinput(cmd, sa, vip) { struct ip *ip = vip; struct sctphdr *sh; + uint32_t vrf_id; + vrf_id = SCTP_DEFAULT_VRFID; if (sa->sa_family != AF_INET || ((struct sockaddr_in *)sa)->sin_addr.s_addr == INADDR_ANY) { return; @@ -417,7 +350,7 @@ sctp_ctlinput(cmd, sa, vip) */ stcb = sctp_findassociation_addr_sa((struct sockaddr *)&from, (struct sockaddr *)&to, - &inp, &net, 1); + &inp, &net, 1, vrf_id); if (stcb != NULL && inp && (inp->sctp_socket != NULL)) { if (cmd != PRC_MSGSIZE) { int cm; @@ -455,13 +388,16 @@ sctp_getcred(SYSCTL_HANDLER_ARGS) struct sctp_nets *net; struct sctp_tcb *stcb; int error; + uint32_t vrf_id; + vrf_id = SCTP_DEFAULT_VRFID; /* * XXXRW: Other instances of getcred use SUSER_ALLOWJAIL, as socket * visibility is scoped using cr_canseesocket(), which it is not * here. */ - error = priv_check_cred(req->td->td_ucred, PRIV_NETINET_GETCRED, 0); + error = priv_check_cred(req->td->td_ucred, PRIV_NETINET_GETCRED, + SUSER_ALLOWJAIL); if (error) return (error); @@ -471,7 +407,7 @@ sctp_getcred(SYSCTL_HANDLER_ARGS) stcb = sctp_findassociation_addr_sa(sintosa(&addrs[0]), sintosa(&addrs[1]), - &inp, &net, 1); + &inp, &net, 1, vrf_id); if (stcb == NULL || inp == NULL || inp->sctp_socket == NULL) { if ((inp != NULL) && (stcb == NULL)) { /* reduce ref-count */ @@ -506,416 +442,6 @@ out: SYSCTL_PROC(_net_inet_sctp, OID_AUTO, getcred, CTLTYPE_OPAQUE | CTLFLAG_RW, 0, 0, sctp_getcred, "S,ucred", "Get the ucred of a SCTP connection"); -static int -sctp_assoclist(SYSCTL_HANDLER_ARGS) -{ - unsigned int number_of_endpoints; - unsigned int number_of_local_addresses; - unsigned int number_of_associations; - unsigned int number_of_remote_addresses; - unsigned int n; - int error; - struct sctp_inpcb *inp; - struct sctp_tcb *stcb; - struct sctp_nets *net; - struct sctp_laddr *laddr; - struct xsctp_inpcb xinpcb; - struct xsctp_tcb xstcb; - -/* struct xsctp_laddr xladdr; */ - struct xsctp_raddr xraddr; - - number_of_endpoints = 0; - number_of_local_addresses = 0; - number_of_associations = 0; - number_of_remote_addresses = 0; - - SCTP_INP_INFO_RLOCK(); - if (req->oldptr == USER_ADDR_NULL) { - LIST_FOREACH(inp, &sctppcbinfo.listhead, sctp_list) { - SCTP_INP_RLOCK(inp); - number_of_endpoints++; - /* FIXME MT */ - LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) { - number_of_local_addresses++; - } - LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) { - number_of_associations++; - /* FIXME MT */ - LIST_FOREACH(laddr, &stcb->asoc.sctp_local_addr_list, sctp_nxt_addr) { - number_of_local_addresses++; - } - TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { - number_of_remote_addresses++; - } - } - SCTP_INP_RUNLOCK(inp); - } - SCTP_INP_INFO_RUNLOCK(); - n = (number_of_endpoints + 1) * sizeof(struct xsctp_inpcb) + - number_of_local_addresses * sizeof(struct xsctp_laddr) + - number_of_associations * sizeof(struct xsctp_tcb) + - number_of_remote_addresses * sizeof(struct xsctp_raddr); -#ifdef SCTP_DEBUG - printf("inps = %u, stcbs = %u, laddrs = %u, raddrs = %u\n", - number_of_endpoints, number_of_associations, - number_of_local_addresses, number_of_remote_addresses); -#endif - /* request some more memory than needed */ - req->oldidx = (n + n / 8); - return 0; - } - if (req->newptr != USER_ADDR_NULL) { - SCTP_INP_INFO_RUNLOCK(); - return EPERM; - } - LIST_FOREACH(inp, &sctppcbinfo.listhead, sctp_list) { - SCTP_INP_RLOCK(inp); - number_of_local_addresses = 0; - number_of_associations = 0; - /* - * LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) - * { number_of_local_addresses++; } - */ - LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) { - number_of_associations++; - } - xinpcb.last = 0; - xinpcb.local_port = ntohs(inp->sctp_lport); - xinpcb.number_local_addresses = number_of_local_addresses; - xinpcb.number_associations = number_of_associations; - xinpcb.flags = inp->sctp_flags; - xinpcb.features = inp->sctp_features; - xinpcb.total_sends = inp->total_sends; - xinpcb.total_recvs = inp->total_recvs; - xinpcb.total_nospaces = inp->total_nospaces; - SCTP_INP_INCR_REF(inp); - SCTP_INP_RUNLOCK(inp); - SCTP_INP_INFO_RUNLOCK(); - error = SYSCTL_OUT(req, &xinpcb, sizeof(struct xsctp_inpcb)); - if (error) { - return error; - } - SCTP_INP_INFO_RLOCK(); - SCTP_INP_RLOCK(inp); - /* FIXME MT */ - /* - * LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) - * { error = SYSCTL_OUT(req, &xladdr, sizeof(struct - * xsctp_laddr)); if (error) { #if - * defined(SCTP_PER_SOCKET_LOCKING) - * SCTP_SOCKET_UNLOCK(SCTP_INP_SO(inp), 1); - * SCTP_UNLOCK_SHARED(sctppcbinfo.ipi_ep_mtx); #endif - * SCTP_INP_RUNLOCK(inp); SCTP_INP_INFO_RUNLOCK(); return - * error; } } - */ - LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) { - SCTP_TCB_LOCK(stcb); - atomic_add_int(&stcb->asoc.refcnt, 1); - SCTP_TCB_UNLOCK(stcb); - number_of_local_addresses = 0; - number_of_remote_addresses = 0; - /* FIXME MT */ - /* - * LIST_FOREACH(laddr, - * &stcb->asoc.sctp_local_addr_list, sctp_nxt_addr) - * { number_of_local_addresses++; } - */ - TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { - number_of_remote_addresses++; - } - xstcb.LocalPort = ntohs(inp->sctp_lport); - xstcb.RemPort = ntohs(stcb->rport); - if (stcb->asoc.primary_destination != NULL) - xstcb.RemPrimAddr = stcb->asoc.primary_destination->ro._l_addr; - xstcb.HeartBeatInterval = stcb->asoc.heart_beat_delay; - xstcb.State = SCTP_GET_STATE(&stcb->asoc); /* FIXME */ - xstcb.InStreams = stcb->asoc.streamincnt; - xstcb.OutStreams = stcb->asoc.streamoutcnt; - xstcb.MaxRetr = stcb->asoc.overall_error_count; - xstcb.PrimProcess = 0; /* not really supported yet */ - xstcb.T1expireds = stcb->asoc.timoinit + stcb->asoc.timocookie; - xstcb.T2expireds = stcb->asoc.timoshutdown + stcb->asoc.timoshutdownack; - xstcb.RtxChunks = stcb->asoc.marked_retrans; - xstcb.StartTime = stcb->asoc.start_time; - xstcb.DiscontinuityTime = stcb->asoc.discontinuity_time; - - xstcb.number_local_addresses = number_of_local_addresses; - xstcb.number_remote_addresses = number_of_remote_addresses; - xstcb.total_sends = stcb->total_sends; - xstcb.total_recvs = stcb->total_recvs; - xstcb.local_tag = stcb->asoc.my_vtag; - xstcb.remote_tag = stcb->asoc.peer_vtag; - xstcb.initial_tsn = stcb->asoc.init_seq_number; - xstcb.highest_tsn = stcb->asoc.sending_seq - 1; - xstcb.cumulative_tsn = stcb->asoc.last_acked_seq; - xstcb.cumulative_tsn_ack = stcb->asoc.cumulative_tsn; - SCTP_INP_RUNLOCK(inp); - SCTP_INP_INFO_RUNLOCK(); - error = SYSCTL_OUT(req, &xstcb, sizeof(struct xsctp_tcb)); - if (error) { - atomic_add_int(&stcb->asoc.refcnt, -1); - return error; - } - /* FIXME MT */ - /* - * LIST_FOREACH(laddr, - * &stcb->asoc.sctp_local_addr_list, sctp_nxt_addr) - * { error = SYSCTL_OUT(req, &xladdr, sizeof(struct - * xsctp_laddr)); if (error) { #if - * defined(SCTP_PER_SOCKET_LOCKING) - * SCTP_SOCKET_UNLOCK(SCTP_INP_SO(inp), 1); - * SCTP_UNLOCK_SHARED(sctppcbinfo.ipi_ep_mtx); - * #endif SCTP_INP_RUNLOCK(inp); - * SCTP_INP_INFO_RUNLOCK(); return error; } - * */ - TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { - xraddr.RemAddr = net->ro._l_addr; - xraddr.RemAddrActive = ((net->dest_state & SCTP_ADDR_REACHABLE) == SCTP_ADDR_REACHABLE); - xraddr.RemAddrConfirmed = ((net->dest_state & SCTP_ADDR_UNCONFIRMED) == 0); - xraddr.RemAddrHBActive = ((net->dest_state & SCTP_ADDR_NOHB) == 0); - xraddr.RemAddrRTO = net->RTO; - xraddr.RemAddrMaxPathRtx = net->failure_threshold; - xraddr.RemAddrRtx = net->marked_retrans; - xraddr.RemAddrErrorCounter = net->error_count; - xraddr.RemAddrCwnd = net->cwnd; - xraddr.RemAddrFlightSize = net->flight_size; - xraddr.RemAddrStartTime = net->start_time; - error = SYSCTL_OUT(req, &xraddr, sizeof(struct xsctp_raddr)); - if (error) { - atomic_add_int(&stcb->asoc.refcnt, -1); - return error; - } - } - atomic_add_int(&stcb->asoc.refcnt, -1); - SCTP_INP_INFO_RLOCK(); - SCTP_INP_RLOCK(inp); - } - SCTP_INP_DECR_REF(inp); - SCTP_INP_RUNLOCK(inp); - } - SCTP_INP_INFO_RUNLOCK(); - - xinpcb.last = 1; - xinpcb.local_port = 0; - xinpcb.number_local_addresses = 0; - xinpcb.number_associations = 0; - xinpcb.flags = 0; - xinpcb.features = 0; - error = SYSCTL_OUT(req, &xinpcb, sizeof(struct xsctp_inpcb)); - return error; -} - -/* - * sysctl definitions - */ - -SYSCTL_INT(_net_inet_sctp, OID_AUTO, sendspace, CTLFLAG_RW, - &sctp_sendspace, 0, "Maximum outgoing SCTP buffer size"); - -SYSCTL_INT(_net_inet_sctp, OID_AUTO, recvspace, CTLFLAG_RW, - &sctp_recvspace, 0, "Maximum incoming SCTP buffer size"); - -SYSCTL_INT(_net_inet_sctp, OID_AUTO, auto_asconf, CTLFLAG_RW, - &sctp_auto_asconf, 0, "Enable SCTP Auto-ASCONF"); - -SYSCTL_INT(_net_inet_sctp, OID_AUTO, ecn_enable, CTLFLAG_RW, - &sctp_ecn_enable, 0, "Enable SCTP ECN"); - -SYSCTL_INT(_net_inet_sctp, OID_AUTO, ecn_nonce, CTLFLAG_RW, - &sctp_ecn_nonce, 0, "Enable SCTP ECN Nonce"); - -SYSCTL_INT(_net_inet_sctp, OID_AUTO, strict_sacks, CTLFLAG_RW, - &sctp_strict_sacks, 0, "Enable SCTP Strict SACK checking"); - -SYSCTL_INT(_net_inet_sctp, OID_AUTO, loopback_nocsum, CTLFLAG_RW, - &sctp_no_csum_on_loopback, 0, - "Enable NO Csum on packets sent on loopback"); - -SYSCTL_INT(_net_inet_sctp, OID_AUTO, strict_init, CTLFLAG_RW, - &sctp_strict_init, 0, - "Enable strict INIT/INIT-ACK singleton enforcement"); - -SYSCTL_INT(_net_inet_sctp, OID_AUTO, peer_chkoh, CTLFLAG_RW, - &sctp_peer_chunk_oh, 0, - "Amount to debit peers rwnd per chunk sent"); - -SYSCTL_INT(_net_inet_sctp, OID_AUTO, maxburst, CTLFLAG_RW, - &sctp_max_burst_default, 0, - "Default max burst for sctp endpoints"); - -SYSCTL_INT(_net_inet_sctp, OID_AUTO, maxchunks, CTLFLAG_RW, - &sctp_max_chunks_on_queue, 0, - "Default max chunks on queue per asoc"); - -SYSCTL_INT(_net_inet_sctp, OID_AUTO, tcbhashsize, CTLFLAG_RW, - &sctp_hashtblsize, 0, - "Tuneable for Hash table sizes"); - -SYSCTL_INT(_net_inet_sctp, OID_AUTO, min_split_point, CTLFLAG_RW, - &sctp_min_split_point, 0, - "Minimum size when splitting a chunk"); - -SYSCTL_INT(_net_inet_sctp, OID_AUTO, pcbhashsize, CTLFLAG_RW, - &sctp_pcbtblsize, 0, - "Tuneable for PCB Hash table sizes"); - -SYSCTL_INT(_net_inet_sctp, OID_AUTO, sys_resource, CTLFLAG_RW, - &sctp_system_free_resc_limit, 0, - "Max number of cached resources in the system"); - -SYSCTL_INT(_net_inet_sctp, OID_AUTO, asoc_resource, CTLFLAG_RW, - &sctp_asoc_free_resc_limit, 0, - "Max number of cached resources in an asoc"); - -SYSCTL_INT(_net_inet_sctp, OID_AUTO, chunkscale, CTLFLAG_RW, - &sctp_chunkscale, 0, - "Tuneable for Scaling of number of chunks and messages"); - -SYSCTL_UINT(_net_inet_sctp, OID_AUTO, delayed_sack_time, CTLFLAG_RW, - &sctp_delayed_sack_time_default, 0, - "Default delayed SACK timer in msec"); - -SYSCTL_UINT(_net_inet_sctp, OID_AUTO, heartbeat_interval, CTLFLAG_RW, - &sctp_heartbeat_interval_default, 0, - "Default heartbeat interval in msec"); - -SYSCTL_UINT(_net_inet_sctp, OID_AUTO, pmtu_raise_time, CTLFLAG_RW, - &sctp_pmtu_raise_time_default, 0, - "Default PMTU raise timer in sec"); - -SYSCTL_UINT(_net_inet_sctp, OID_AUTO, shutdown_guard_time, CTLFLAG_RW, - &sctp_shutdown_guard_time_default, 0, - "Default shutdown guard timer in sec"); - -SYSCTL_UINT(_net_inet_sctp, OID_AUTO, secret_lifetime, CTLFLAG_RW, - &sctp_secret_lifetime_default, 0, - "Default secret lifetime in sec"); - -SYSCTL_UINT(_net_inet_sctp, OID_AUTO, rto_max, CTLFLAG_RW, - &sctp_rto_max_default, 0, - "Default maximum retransmission timeout in msec"); - -SYSCTL_UINT(_net_inet_sctp, OID_AUTO, rto_min, CTLFLAG_RW, - &sctp_rto_min_default, 0, - "Default minimum retransmission timeout in msec"); - -SYSCTL_UINT(_net_inet_sctp, OID_AUTO, rto_initial, CTLFLAG_RW, - &sctp_rto_initial_default, 0, - "Default initial retransmission timeout in msec"); - -SYSCTL_UINT(_net_inet_sctp, OID_AUTO, init_rto_max, CTLFLAG_RW, - &sctp_init_rto_max_default, 0, - "Default maximum retransmission timeout during association setup in msec"); - -SYSCTL_UINT(_net_inet_sctp, OID_AUTO, valid_cookie_life, CTLFLAG_RW, - &sctp_valid_cookie_life_default, 0, - "Default cookie lifetime in sec"); - -SYSCTL_UINT(_net_inet_sctp, OID_AUTO, init_rtx_max, CTLFLAG_RW, - &sctp_init_rtx_max_default, 0, - "Default maximum number of retransmission for INIT chunks"); - -SYSCTL_UINT(_net_inet_sctp, OID_AUTO, assoc_rtx_max, CTLFLAG_RW, - &sctp_assoc_rtx_max_default, 0, - "Default maximum number of retransmissions per association"); - -SYSCTL_UINT(_net_inet_sctp, OID_AUTO, path_rtx_max, CTLFLAG_RW, - &sctp_path_rtx_max_default, 0, - "Default maximum of retransmissions per path"); - -SYSCTL_UINT(_net_inet_sctp, OID_AUTO, add_more_on_output, CTLFLAG_RW, - &sctp_add_more_threshold, 0, - "When space wise is it worthwhile to try to add more to a socket send buffer"); - -SYSCTL_UINT(_net_inet_sctp, OID_AUTO, nr_outgoing_streams, CTLFLAG_RW, - &sctp_nr_outgoing_streams_default, 0, - "Default number of outgoing streams"); - -SYSCTL_UINT(_net_inet_sctp, OID_AUTO, cmt_on_off, CTLFLAG_RW, - &sctp_cmt_on_off, 0, - "CMT ON/OFF flag"); - -SYSCTL_UINT(_net_inet_sctp, OID_AUTO, cwnd_maxburst, CTLFLAG_RW, - &sctp_use_cwnd_based_maxburst, 0, - "Use a CWND adjusting maxburst"); - -SYSCTL_UINT(_net_inet_sctp, OID_AUTO, early_fast_retran, CTLFLAG_RW, - &sctp_early_fr, 0, - "Early Fast Retransmit with Timer"); - -SYSCTL_UINT(_net_inet_sctp, OID_AUTO, use_rttvar_congctrl, CTLFLAG_RW, - &sctp_use_rttvar_cc, 0, - "Use congestion control via rtt variation"); - -SYSCTL_UINT(_net_inet_sctp, OID_AUTO, deadlock_detect, CTLFLAG_RW, - &sctp_says_check_for_deadlock, 0, - "SMP Deadlock detection on/off"); - -SYSCTL_UINT(_net_inet_sctp, OID_AUTO, early_fast_retran_msec, CTLFLAG_RW, - &sctp_early_fr_msec, 0, - "Early Fast Retransmit minimum timer value"); - -SYSCTL_UINT(_net_inet_sctp, OID_AUTO, asconf_auth_nochk, CTLFLAG_RW, - &sctp_asconf_auth_nochk, 0, - "Disable SCTP ASCONF AUTH requirement"); - -SYSCTL_UINT(_net_inet_sctp, OID_AUTO, auth_disable, CTLFLAG_RW, - &sctp_auth_disable, 0, - "Disable SCTP AUTH chunk requirement/function"); - -SYSCTL_UINT(_net_inet_sctp, OID_AUTO, auth_random_len, CTLFLAG_RW, - &sctp_auth_random_len, 0, - "Length of AUTH RANDOMs"); - -SYSCTL_UINT(_net_inet_sctp, OID_AUTO, auth_hmac_id, CTLFLAG_RW, - &sctp_auth_hmac_id_default, 0, - "Default HMAC Id for SCTP AUTHenthication"); - -SYSCTL_INT(_net_inet_sctp, OID_AUTO, abc_l_var, CTLFLAG_RW, - &sctp_L2_abc_variable, 0, - "SCTP ABC max increase per SACK (L)"); - -SYSCTL_INT(_net_inet_sctp, OID_AUTO, max_chained_mbufs, CTLFLAG_RW, - &sctp_mbuf_threshold_count, 0, - "Default max number of small mbufs on a chain"); - -SYSCTL_UINT(_net_inet_sctp, OID_AUTO, cmt_use_dac, CTLFLAG_RW, - &sctp_cmt_use_dac, 0, - "CMT DAC ON/OFF flag"); - -SYSCTL_INT(_net_inet_sctp, OID_AUTO, do_sctp_drain, CTLFLAG_RW, - &sctp_do_drain, 0, - "Should SCTP respond to the drain calls"); - -SYSCTL_INT(_net_inet_sctp, OID_AUTO, warm_crc_table, CTLFLAG_RW, - &sctp_warm_the_crc32_table, 0, - "Should the CRC32c tables be warmed before checksum?"); - -SYSCTL_INT(_net_inet_sctp, OID_AUTO, abort_at_limit, CTLFLAG_RW, - &sctp_abort_if_one_2_one_hits_limit, 0, - "When one-2-one hits qlimit abort"); - -SYSCTL_INT(_net_inet_sctp, OID_AUTO, strict_data_order, CTLFLAG_RW, - &sctp_strict_data_order, 0, - "Enforce strict data ordering, abort if control inside data"); - -SYSCTL_STRUCT(_net_inet_sctp, OID_AUTO, stats, CTLFLAG_RW, - &sctpstat, sctpstat, - "SCTP statistics (struct sctps_stat, netinet/sctp.h"); - -SYSCTL_PROC(_net_inet_sctp, OID_AUTO, assoclist, CTLFLAG_RD, - 0, 0, sctp_assoclist, - "S,xassoc", "List of active SCTP associations"); - -SYSCTL_UINT(_net_inet_sctp, OID_AUTO, nat_friendly, CTLFLAG_RW, - &sctp_nat_friendly, 0, - "SCTP NAT friendly operation"); - -#ifdef SCTP_DEBUG -SYSCTL_INT(_net_inet_sctp, OID_AUTO, debug, CTLFLAG_RW, - &sctp_debug_on, 0, "Configure debug output"); -#endif /* SCTP_DEBUG */ static void sctp_abort(struct socket *so) @@ -1501,13 +1027,15 @@ static size_t sctp_fill_up_addresses(struct sctp_inpcb *inp, struct sctp_tcb *stcb, size_t limit, - struct sockaddr_storage *sas) + struct sockaddr_storage *sas, + uint32_t vrf_id) { - struct ifnet *ifn; - struct ifaddr *ifa; + struct sctp_ifn *sctp_ifn; + struct sctp_ifa *sctp_ifa; int loopback_scope, ipv4_local_scope, local_scope, site_scope; size_t actual; int ipv4_addr_legal, ipv6_addr_legal; + struct sctp_vrf *vrf; actual = 0; if (limit <= 0) @@ -1533,15 +1061,18 @@ sctp_fill_up_addresses(struct sctp_inpcb *inp, } else { ipv4_addr_legal = 1; } - + vrf = sctp_find_vrf(vrf_id); + if (vrf == NULL) { + return (0); + } if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) { - TAILQ_FOREACH(ifn, &ifnet, if_list) { + LIST_FOREACH(sctp_ifn, &vrf->ifnlist, next_ifn) { if ((loopback_scope == 0) && - (ifn->if_type == IFT_LOOP)) { + SCTP_IFN_IS_IFT_LOOP(sctp_ifn)) { /* Skip loopback if loopback_scope not set */ continue; } - TAILQ_FOREACH(ifa, &ifn->if_addrlist, ifa_list) { + LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) { if (stcb) { /* * For the BOUND-ALL case, the list @@ -1552,15 +1083,15 @@ sctp_fill_up_addresses(struct sctp_inpcb *inp, * is one of those we must skip it. */ if (sctp_is_addr_restricted(stcb, - ifa->ifa_addr)) { + sctp_ifa)) { continue; } } - if ((ifa->ifa_addr->sa_family == AF_INET) && + if ((sctp_ifa->address.sa.sa_family == AF_INET) && (ipv4_addr_legal)) { struct sockaddr_in *sin; - sin = (struct sockaddr_in *)ifa->ifa_addr; + sin = (struct sockaddr_in *)&sctp_ifa->address.sa; if (sin->sin_addr.s_addr == 0) { /* * we skip unspecifed @@ -1586,11 +1117,11 @@ sctp_fill_up_addresses(struct sctp_inpcb *inp, if (actual >= limit) { return (actual); } - } else if ((ifa->ifa_addr->sa_family == AF_INET6) && + } else if ((sctp_ifa->address.sa.sa_family == AF_INET6) && (ipv6_addr_legal)) { struct sockaddr_in6 *sin6; - sin6 = (struct sockaddr_in6 *)ifa->ifa_addr; + sin6 = (struct sockaddr_in6 *)&sctp_ifa->address.sa; if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { /* * we skip unspecifed @@ -1628,67 +1159,22 @@ sctp_fill_up_addresses(struct sctp_inpcb *inp, } else { struct sctp_laddr *laddr; - /* - * If we have a TCB and we do NOT support ASCONF (it's - * turned off or otherwise) then the list is always the true - * list of addresses (the else case below). Otherwise the - * list on the association is a list of addresses that are - * NOT part of the association. - */ - if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_DO_ASCONF)) { - /* The list is a NEGATIVE list */ - LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) { - if (stcb) { - if (sctp_is_addr_restricted(stcb, laddr->ifa->ifa_addr)) { - continue; - } - } - if (sctp_fill_user_address(sas, laddr->ifa->ifa_addr)) + /* The list is a NEGATIVE list */ + LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) { + if (stcb) { + if (sctp_is_addr_restricted(stcb, laddr->ifa)) { continue; - - ((struct sockaddr_in6 *)sas)->sin6_port = inp->sctp_lport; - sas = (struct sockaddr_storage *)((caddr_t)sas + - laddr->ifa->ifa_addr->sa_len); - actual += laddr->ifa->ifa_addr->sa_len; - if (actual >= limit) { - return (actual); } } - } else { - /* The list is a positive list if present */ - if (stcb) { - /* Must use the specific association list */ - LIST_FOREACH(laddr, &stcb->asoc.sctp_local_addr_list, - sctp_nxt_addr) { - if (sctp_fill_user_address(sas, - laddr->ifa->ifa_addr)) - continue; - ((struct sockaddr_in6 *)sas)->sin6_port = inp->sctp_lport; - sas = (struct sockaddr_storage *)((caddr_t)sas + - laddr->ifa->ifa_addr->sa_len); - actual += laddr->ifa->ifa_addr->sa_len; - if (actual >= limit) { - return (actual); - } - } - } else { - /* - * No endpoint so use the endpoints - * individual list - */ - LIST_FOREACH(laddr, &inp->sctp_addr_list, - sctp_nxt_addr) { - if (sctp_fill_user_address(sas, - laddr->ifa->ifa_addr)) - continue; - ((struct sockaddr_in6 *)sas)->sin6_port = inp->sctp_lport; - sas = (struct sockaddr_storage *)((caddr_t)sas + - laddr->ifa->ifa_addr->sa_len); - actual += laddr->ifa->ifa_addr->sa_len; - if (actual >= limit) { - return (actual); - } - } + if (sctp_fill_user_address(sas, &laddr->ifa->address.sa)) + continue; + + ((struct sockaddr_in6 *)sas)->sin6_port = inp->sctp_lport; + sas = (struct sockaddr_storage *)((caddr_t)sas + + laddr->ifa->address.sa.sa_len); + actual += laddr->ifa->address.sa.sa_len; + if (actual >= limit) { + return (actual); } } } @@ -1696,9 +1182,10 @@ sctp_fill_up_addresses(struct sctp_inpcb *inp, } static int -sctp_count_max_addresses(struct sctp_inpcb *inp) +sctp_count_max_addresses(struct sctp_inpcb *inp, uint32_t vrf_id) { int cnt = 0; + struct sctp_vrf *vrf = NULL; /* * In both sub-set bound an bound_all cases we return the MAXIMUM @@ -1707,20 +1194,24 @@ sctp_count_max_addresses(struct sctp_inpcb *inp) * bound-all case a TCB may NOT include the loopback or other * addresses as well. */ + vrf = sctp_find_vrf(vrf_id); + if (vrf == NULL) { + return (0); + } if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) { - struct ifnet *ifn; - struct ifaddr *ifa; + struct sctp_ifn *sctp_ifn; + struct sctp_ifa *sctp_ifa; - TAILQ_FOREACH(ifn, &ifnet, if_list) { - TAILQ_FOREACH(ifa, &ifn->if_addrlist, ifa_list) { + LIST_FOREACH(sctp_ifn, &vrf->ifnlist, next_ifn) { + LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) { /* Count them if they are the right type */ - if (ifa->ifa_addr->sa_family == AF_INET) { + if (sctp_ifa->address.sa.sa_family == AF_INET) { if (inp->sctp_flags & SCTP_PCB_FLAGS_NEEDS_MAPPED_V4) cnt += sizeof(struct sockaddr_in6); else cnt += sizeof(struct sockaddr_in); - } else if (ifa->ifa_addr->sa_family == AF_INET6) + } else if (sctp_ifa->address.sa.sa_family == AF_INET6) cnt += sizeof(struct sockaddr_in6); } } @@ -1728,13 +1219,13 @@ sctp_count_max_addresses(struct sctp_inpcb *inp) struct sctp_laddr *laddr; LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) { - if (laddr->ifa->ifa_addr->sa_family == AF_INET) { + if (laddr->ifa->address.sa.sa_family == AF_INET) { if (inp->sctp_flags & SCTP_PCB_FLAGS_NEEDS_MAPPED_V4) cnt += sizeof(struct sockaddr_in6); else cnt += sizeof(struct sockaddr_in); - } else if (laddr->ifa->ifa_addr->sa_family == AF_INET6) + } else if (laddr->ifa->address.sa.sa_family == AF_INET6) cnt += sizeof(struct sockaddr_in6); } } @@ -1752,6 +1243,8 @@ sctp_do_connect_x(struct socket *so, struct sctp_inpcb *inp, void *optval, struct sockaddr *sa; int num_v6 = 0, num_v4 = 0, *totaddrp, totaddr, i; size_t incr, at; + uint32_t vrf_id; + sctp_assoc_t *a_id; #ifdef SCTP_DEBUG if (sctp_debug_on & SCTP_DEBUG_PCB1) { @@ -1856,8 +1349,9 @@ sctp_do_connect_x(struct socket *so, struct sctp_inpcb *inp, void *optval, SCTP_INP_WUNLOCK(inp); } + vrf_id = SCTP_DEFAULT_VRFID; /* We are GOOD to go */ - stcb = sctp_aloc_assoc(inp, sa, 1, &error, 0); + stcb = sctp_aloc_assoc(inp, sa, 1, &error, 0, vrf_id); if (stcb == NULL) { /* Gak! no memory */ goto out_now; @@ -1889,6 +1383,9 @@ sctp_do_connect_x(struct socket *so, struct sctp_inpcb *inp, void *optval, sa = (struct sockaddr *)((caddr_t)sa + incr); } stcb->asoc.state = SCTP_STATE_COOKIE_WAIT; + /* Fill in the return id */ + a_id = (sctp_assoc_t *) optval; + *a_id = sctp_get_associd(stcb); /* initialize authentication parameters for the assoc */ sctp_initialize_auth_params(inp, stcb); @@ -1939,13 +1436,13 @@ out_now: destp = (type *)srcp; \ } - static int sctp_getopt(struct socket *so, int optname, void *optval, size_t *optsize, void *p) { struct sctp_inpcb *inp; int error, val = 0; + uint32_t vrf_id; struct sctp_tcb *stcb = NULL; if (optval == NULL) { @@ -1954,6 +1451,8 @@ sctp_getopt(struct socket *so, int optname, void *optval, size_t *optsize, inp = (struct sctp_inpcb *)so->so_pcb; if (inp == 0) return EINVAL; + vrf_id = SCTP_DEFAULT_VRFID; + error = 0; switch (optname) { @@ -2008,6 +1507,7 @@ sctp_getopt(struct socket *so, int optname, void *optval, size_t *optsize, *optsize = sizeof(val); } break; + case SCTP_PARTIAL_DELIVERY_POINT: { uint32_t *value; @@ -2052,13 +1552,13 @@ sctp_getopt(struct socket *so, int optname, void *optval, size_t *optsize, SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, *optsize); error = EINVAL; -#ifdef AF_INET +#ifdef INET if (av->assoc_value == AF_INET) { av->assoc_value = sizeof(struct sockaddr_in); error = 0; } #endif -#ifdef AF_INET6 +#ifdef INET6 if (av->assoc_value == AF_INET6) { av->assoc_value = sizeof(struct sockaddr_in6); error = 0; @@ -2130,6 +1630,32 @@ sctp_getopt(struct socket *so, int optname, void *optval, size_t *optsize, *optsize = sizeof(*av); } break; + case SCTP_VRF_ID: + { + uint32_t *vrf_id; + + SCTP_CHECK_AND_CAST(vrf_id, optval, uint32_t, *optsize); + *vrf_id = inp->def_vrf_id; + break; + } + case SCTP_GET_ASOC_VRF: + { + struct sctp_assoc_value *id; + + SCTP_CHECK_AND_CAST(id, optval, struct sctp_assoc_value, *optsize); + SCTP_FIND_STCB(inp, stcb, id->assoc_id); + if (stcb == NULL) { + error = EINVAL; + break; + } + id->assoc_value = stcb->asoc.vrf_id; + break; + } + case SCTP_GET_VRF_IDS: + { + error = EOPNOTSUPP; + break; + } case SCTP_GET_NONCE_VALUES: { struct sctp_get_nonce_values *gnv; @@ -2196,38 +1722,37 @@ sctp_getopt(struct socket *so, int optname, void *optval, size_t *optsize, *optsize = sizeof(uint8_t); } break; - /* - * FIXME MT: Should this be done as the association level by - * using sctp_get_frag_point? - */ case SCTP_MAXSEG: { - uint32_t *segsize; + struct sctp_assoc_value *av; int ovh; - SCTP_CHECK_AND_CAST(segsize, optval, uint32_t, *optsize); + SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, *optsize); + SCTP_FIND_STCB(inp, stcb, av->assoc_id); - SCTP_INP_RLOCK(inp); - if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) { - ovh = SCTP_MED_OVERHEAD; + if (stcb) { + av->assoc_value = sctp_get_frag_point(stcb, &stcb->asoc); + SCTP_TCB_UNLOCK(stcb); } else { - ovh = SCTP_MED_V4_OVERHEAD; + SCTP_INP_RLOCK(inp); + if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) { + ovh = SCTP_MED_OVERHEAD; + } else { + ovh = SCTP_MED_V4_OVERHEAD; + } + av->assoc_value = inp->sctp_frag_point - ovh; + SCTP_INP_RUNLOCK(inp); } - *segsize = inp->sctp_frag_point - ovh; - SCTP_INP_RUNLOCK(inp); - *optsize = sizeof(uint32_t); + *optsize = sizeof(struct sctp_assoc_value); } break; -#if 0 - /* FIXME MT: How does this work? */ case SCTP_GET_STAT_LOG: #ifdef SCTP_STAT_LOGGING - error = sctp_fill_stat_log(m); -#else /* SCTP_DEBUG */ + error = sctp_fill_stat_log(optval, optsize); +#else error = EOPNOTSUPP; #endif break; -#endif case SCTP_EVENTS: { struct sctp_event_subscribe *events; @@ -2298,7 +1823,7 @@ sctp_getopt(struct socket *so, int optname, void *optval, size_t *optsize, SCTP_CHECK_AND_CAST(value, optval, uint32_t, *optsize); SCTP_INP_RLOCK(inp); - *value = sctp_count_max_addresses(inp); + *value = sctp_count_max_addresses(inp, vrf_id); SCTP_INP_RUNLOCK(inp); *optsize = sizeof(uint32_t); } @@ -2399,7 +1924,7 @@ sctp_getopt(struct socket *so, int optname, void *optval, size_t *optsize, sas = (struct sockaddr_storage *)&saddr->addr[0]; limit = *optsize - sizeof(sctp_assoc_t); - actual = sctp_fill_up_addresses(inp, stcb, limit, sas); + actual = sctp_fill_up_addresses(inp, stcb, limit, sas, vrf_id); if (stcb) SCTP_TCB_UNLOCK(stcb); *optsize = sizeof(struct sockaddr_storage) + actual; @@ -2448,13 +1973,13 @@ sctp_getopt(struct socket *so, int optname, void *optval, size_t *optsize, } else { paddrp->spp_flags |= SPP_PMTUD_DISABLE; } -#ifdef AF_INET +#ifdef INET if (net->ro._l_addr.sin.sin_family == AF_INET) { paddrp->spp_ipv4_tos = net->tos_flowlabel & 0x000000fc; paddrp->spp_flags |= SPP_IPV4_TOS; } #endif -#ifdef AF_INET6 +#ifdef INET6 if (net->ro._l_addr.sin6.sin6_family == AF_INET6) { paddrp->spp_ipv6_flowlabel = net->tos_flowlabel; paddrp->spp_flags |= SPP_IPV6_FLOWLABEL; @@ -2467,11 +1992,11 @@ sctp_getopt(struct socket *so, int optname, void *optval, size_t *optsize, */ paddrp->spp_pathmaxrxt = stcb->asoc.def_net_failure; paddrp->spp_pathmtu = sctp_get_frag_point(stcb, &stcb->asoc); -#ifdef AF_INET +#ifdef INET paddrp->spp_ipv4_tos = stcb->asoc.default_tos & 0x000000fc; paddrp->spp_flags |= SPP_IPV4_TOS; #endif -#ifdef AF_INET6 +#ifdef INET6 paddrp->spp_ipv6_flowlabel = stcb->asoc.default_flowlabel; paddrp->spp_flags |= SPP_IPV6_FLOWLABEL; #endif @@ -2481,12 +2006,6 @@ sctp_getopt(struct socket *so, int optname, void *optval, size_t *optsize, } } paddrp->spp_hbinterval = stcb->asoc.heart_beat_delay; - paddrp->spp_sackdelay = stcb->asoc.delayed_ack; - /* - * Currently we don't support no sack delay - * aka SPP_SACKDELAY_DISABLE. - */ - paddrp->spp_flags |= SPP_SACKDELAY_ENABLE; paddrp->spp_assoc_id = sctp_get_associd(stcb); SCTP_TCB_UNLOCK(stcb); } else { @@ -2494,14 +2013,13 @@ sctp_getopt(struct socket *so, int optname, void *optval, size_t *optsize, SCTP_INP_RLOCK(inp); paddrp->spp_pathmaxrxt = inp->sctp_ep.def_net_failure; paddrp->spp_hbinterval = TICKS_TO_MSEC(inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_HEARTBEAT]); - paddrp->spp_sackdelay = TICKS_TO_MSEC(inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_RECV]); paddrp->spp_assoc_id = (sctp_assoc_t) 0; /* get inp's default */ -#ifdef AF_INET +#ifdef INET paddrp->spp_ipv4_tos = inp->ip_inp.inp.inp_ip_tos; paddrp->spp_flags |= SPP_IPV4_TOS; #endif -#ifdef AF_INET6 +#ifdef INET6 if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) { paddrp->spp_ipv6_flowlabel = ((struct in6pcb *)inp)->in6p_flowinfo; paddrp->spp_flags |= SPP_IPV6_FLOWLABEL; @@ -2511,7 +2029,7 @@ sctp_getopt(struct socket *so, int optname, void *optval, size_t *optsize, paddrp->spp_pathmaxrxt = 0; paddrp->spp_pathmtu = 0; /* default behavior, no stcb */ - paddrp->spp_flags = SPP_HB_ENABLE | SPP_SACKDELAY_ENABLE | SPP_PMTUD_ENABLE; + paddrp->spp_flags = SPP_HB_ENABLE | SPP_PMTUD_ENABLE; SCTP_INP_RUNLOCK(inp); } @@ -2569,6 +2087,7 @@ sctp_getopt(struct socket *so, int optname, void *optval, size_t *optsize, *optsize = sizeof(struct sctp_pcbinfo); } break; + case SCTP_STATUS: { struct sctp_nets *net; @@ -2654,6 +2173,8 @@ sctp_getopt(struct socket *so, int optname, void *optval, size_t *optsize, sasoc->sasoc_peer_rwnd = stcb->asoc.peers_rwnd; sasoc->sasoc_local_rwnd = stcb->asoc.my_rwnd; sasoc->sasoc_cookie_life = stcb->asoc.cookie_life; + sasoc->sasoc_sack_delay = stcb->asoc.delayed_ack; + sasoc->sasoc_sack_freq = stcb->asoc.sack_freq; SCTP_TCB_UNLOCK(stcb); } else { SCTP_INP_RLOCK(inp); @@ -2662,6 +2183,8 @@ sctp_getopt(struct socket *so, int optname, void *optval, size_t *optsize, sasoc->sasoc_peer_rwnd = 0; sasoc->sasoc_local_rwnd = sbspace(&inp->sctp_socket->so_rcv); sasoc->sasoc_cookie_life = inp->sctp_ep.def_cookie_life; + sasoc->sasoc_sack_delay = TICKS_TO_MSEC(inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_RECV]); + sasoc->sasoc_sack_freq = inp->sctp_ep.sctp_sack_freq; SCTP_INP_RUNLOCK(inp); } *optsize = sizeof(*sasoc); @@ -2845,7 +2368,6 @@ sctp_getopt(struct socket *so, int optname, void *optval, size_t *optsize, return (error); } - static int sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, void *p) @@ -2854,13 +2376,18 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, uint32_t *mopt; struct sctp_tcb *stcb = NULL; struct sctp_inpcb *inp; + uint32_t vrf_id; if (optval == NULL) { + printf("optval is NULL\n"); return (EINVAL); } inp = (struct sctp_inpcb *)so->so_pcb; - if (inp == 0) + if (inp == 0) { + printf("inp is NULL?\n"); return EINVAL; + } + vrf_id = SCTP_DEFAULT_VRFID; error = 0; switch (optname) { @@ -2979,6 +2506,29 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, } } break; + case SCTP_VRF_ID: + { + uint32_t *vrf_id; + + SCTP_CHECK_AND_CAST(vrf_id, optval, uint32_t, optsize); + if (*vrf_id > SCTP_MAX_VRF_ID) { + error = EINVAL; + break; + } + inp->def_vrf_id = *vrf_id; + break; + } + case SCTP_DEL_VRF_ID: + { + error = EOPNOTSUPP; + break; + } + case SCTP_ADD_VRF_ID: + { + error = EOPNOTSUPP; + break; + } + case SCTP_DELAYED_ACK_TIME: { struct sctp_assoc_value *tm; @@ -3313,24 +2863,33 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, break; case SCTP_MAXSEG: { - uint32_t *segsize; + struct sctp_assoc_value *av; int ovh; - SCTP_CHECK_AND_CAST(segsize, optval, uint32_t, optsize); + SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize); + SCTP_FIND_STCB(inp, stcb, av->assoc_id); - if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) { - ovh = SCTP_MED_OVERHEAD; - } else { - ovh = SCTP_MED_V4_OVERHEAD; - } - SCTP_INP_WLOCK(inp); - /* FIXME MT: Why is this not allowed? */ - if (*segsize) { - inp->sctp_frag_point = (*segsize + ovh); - } else { + if (stcb) { error = EINVAL; + SCTP_TCB_UNLOCK(stcb); + } else { + SCTP_INP_WLOCK(inp); + if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) { + ovh = SCTP_MED_OVERHEAD; + } else { + ovh = SCTP_MED_V4_OVERHEAD; + } + /* + * FIXME MT: I think this is not in tune + * with the API ID + */ + if (av->assoc_value) { + inp->sctp_frag_point = (av->assoc_value + ovh); + } else { + error = EINVAL; + } + SCTP_INP_WUNLOCK(inp); } - SCTP_INP_WUNLOCK(inp); } break; case SCTP_EVENTS: @@ -3477,20 +3036,6 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, if (stcb) { /************************TCB SPECIFIC SET ******************/ - /* sack delay first */ - if (paddrp->spp_flags & SPP_SACKDELAY_ENABLE) { - /* - * we do NOT support turning it off - * (yet). only setting the delay. - */ - if (paddrp->spp_sackdelay >= SCTP_CLOCK_GRANULARITY) - stcb->asoc.delayed_ack = paddrp->spp_sackdelay; - else - stcb->asoc.delayed_ack = SCTP_CLOCK_GRANULARITY; - - } else if (paddrp->spp_flags & SPP_SACKDELAY_DISABLE) { - stcb->asoc.delayed_ack = 0; - } /* * do we change the timer for HB, we run * only one? @@ -3531,14 +3076,14 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, } if (paddrp->spp_pathmaxrxt) net->failure_threshold = paddrp->spp_pathmaxrxt; -#ifdef AF_INET +#ifdef INET if (paddrp->spp_flags & SPP_IPV4_TOS) { if (net->ro._l_addr.sin.sin_family == AF_INET) { net->tos_flowlabel = paddrp->spp_ipv4_tos & 0x000000fc; } } #endif -#ifdef AF_INET6 +#ifdef INET6 if (paddrp->spp_flags & SPP_IPV6_FLOWLABEL) { if (net->ro._l_addr.sin6.sin6_family == AF_INET6) { net->tos_flowlabel = paddrp->spp_ipv6_flowlabel; @@ -3578,11 +3123,11 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, /* start up the timer. */ sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net); } -#ifdef AF_INET +#ifdef INET if (paddrp->spp_flags & SPP_IPV4_TOS) stcb->asoc.default_tos = paddrp->spp_ipv4_tos & 0x000000fc; #endif -#ifdef AF_INET6 +#ifdef INET6 if (paddrp->spp_flags & SPP_IPV6_FLOWLABEL) stcb->asoc.default_flowlabel = paddrp->spp_ipv6_flowlabel; #endif @@ -3605,15 +3150,6 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, } else if (paddrp->spp_flags & SPP_HB_DISABLE) { sctp_feature_on(inp, SCTP_PCB_FLAGS_DONOT_HEARTBEAT); } - if (paddrp->spp_flags & SPP_SACKDELAY_ENABLE) { - if (paddrp->spp_sackdelay > SCTP_CLOCK_GRANULARITY) - inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_RECV] = MSEC_TO_TICKS(paddrp->spp_sackdelay); - else - inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_RECV] = MSEC_TO_TICKS(SCTP_CLOCK_GRANULARITY); - - } else if (paddrp->spp_flags & SPP_SACKDELAY_DISABLE) { - inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_RECV] = 0; - } SCTP_INP_WUNLOCK(inp); } } @@ -3627,11 +3163,11 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, if (stcb) { /* Set in ms we hope :-) */ - if (srto->srto_initial > 10) + if (srto->srto_initial) stcb->asoc.initial_rto = srto->srto_initial; - if (srto->srto_max > 10) + if (srto->srto_max) stcb->asoc.maxrto = srto->srto_max; - if (srto->srto_min > 10) + if (srto->srto_min) stcb->asoc.minrto = srto->srto_min; SCTP_TCB_UNLOCK(stcb); } else { @@ -3640,11 +3176,11 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, * If we have a null asoc, its default for * the endpoint */ - if (srto->srto_initial > 10) + if (srto->srto_initial) inp->sctp_ep.initial_rto = srto->srto_initial; - if (srto->srto_max > 10) + if (srto->srto_max) inp->sctp_ep.sctp_maxrto = srto->srto_max; - if (srto->srto_min > 10) + if (srto->srto_min) inp->sctp_ep.sctp_minrto = srto->srto_min; SCTP_INP_WUNLOCK(inp); } @@ -3665,6 +3201,10 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, sasoc->sasoc_local_rwnd = 0; if (stcb->asoc.cookie_life) stcb->asoc.cookie_life = sasoc->sasoc_cookie_life; + stcb->asoc.delayed_ack = sasoc->sasoc_sack_delay; + if (sasoc->sasoc_sack_freq) { + stcb->asoc.sack_freq = sasoc->sasoc_sack_freq; + } SCTP_TCB_UNLOCK(stcb); } else { SCTP_INP_WLOCK(inp); @@ -3675,6 +3215,10 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, sasoc->sasoc_local_rwnd = 0; if (sasoc->sasoc_cookie_life) inp->sctp_ep.def_cookie_life = sasoc->sasoc_cookie_life; + inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_RECV] = MSEC_TO_TICKS(sasoc->sasoc_sack_delay); + if (sasoc->sasoc_sack_freq) { + inp->sctp_ep.sctp_sack_freq = sasoc->sasoc_sack_freq; + } SCTP_INP_WUNLOCK(inp); } } @@ -3694,12 +3238,7 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, if (sinit->sinit_max_attempts) inp->sctp_ep.max_init_times = sinit->sinit_max_attempts; - if (sinit->sinit_max_init_timeo > 10) - /* - * We must be at least a 100ms (we set in - * ticks) - */ - /* FIXME MT: What is this? */ + if (sinit->sinit_max_init_timeo) inp->sctp_ep.initial_init_rto_max = sinit->sinit_max_init_timeo; SCTP_INP_WUNLOCK(inp); } @@ -3752,7 +3291,21 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, } } break; + case SCTP_SET_DYNAMIC_PRIMARY: + { + union sctp_sockstore *ss; + + error = priv_check_cred(curthread->td_ucred, + PRIV_NETINET_RESERVEDPORT, + SUSER_ALLOWJAIL); + if (error) + break; + SCTP_CHECK_AND_CAST(ss, optval, union sctp_sockstore, optsize); + /* SUPER USER CHECK? */ + error = sctp_dynamic_set_primary(&ss->sa, vrf_id); + } + break; case SCTP_SET_PEER_PRIMARY_ADDR: { struct sctp_setpeerprim *sspp; @@ -3783,6 +3336,7 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, error = EINVAL; break; } + /* Is the VRF one we have */ addr_touse = addrs->addr; if (addrs->addr->sa_family == AF_INET6) { struct sockaddr_in6 *sin6; @@ -3812,7 +3366,7 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, struct sctp_inpcb *lep; ((struct sockaddr_in *)addr_touse)->sin_port = inp->sctp_lport; - lep = sctp_pcb_findep(addr_touse, 1, 0); + lep = sctp_pcb_findep(addr_touse, 1, 0, vrf_id); if (lep != NULL) { /* * We must decrement the refcount @@ -3828,7 +3382,7 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, } else if (lep == NULL) { ((struct sockaddr_in *)addr_touse)->sin_port = 0; error = sctp_addr_mgmt_ep_sa(inp, addr_touse, - SCTP_ADD_IP_ADDRESS); + SCTP_ADD_IP_ADDRESS, vrf_id); } else { error = EADDRNOTAVAIL; } @@ -3873,7 +3427,7 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, if (addrs->sget_assoc_id == 0) { /* delete the address */ sctp_addr_mgmt_ep_sa(inp, addr_touse, - SCTP_DEL_IP_ADDRESS); + SCTP_DEL_IP_ADDRESS, vrf_id); } else { /* * FIX: decide whether we allow assoc based @@ -3890,9 +3444,6 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, } - -extern int sctp_chatty_mbuf; - int sctp_ctloutput(struct socket *so, struct sockopt *sopt) { @@ -3953,6 +3504,7 @@ sctp_connect(struct socket *so, struct sockaddr *addr, struct thread *p) { int error = 0; int create_lock_on = 0; + uint32_t vrf_id; struct sctp_inpcb *inp; struct sctp_tcb *stcb = NULL; @@ -4020,11 +3572,12 @@ sctp_connect(struct socket *so, struct sockaddr *addr, struct thread *p) error = EALREADY; goto out_now; } + vrf_id = SCTP_DEFAULT_VRFID; /* We are GOOD to go */ - stcb = sctp_aloc_assoc(inp, addr, 1, &error, 0); + stcb = sctp_aloc_assoc(inp, addr, 1, &error, 0, vrf_id); if (stcb == NULL) { /* Gak! no memory */ - return (error); + goto out_now; } if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) { stcb->sctp_ep->sctp_flags |= SCTP_PCB_FLAGS_CONNECTED; @@ -4205,7 +3758,7 @@ int sctp_ingetaddr(struct socket *so, struct sockaddr **addr) { struct sockaddr_in *sin; - + uint32_t vrf_id; struct sctp_inpcb *inp; /* @@ -4220,6 +3773,8 @@ sctp_ingetaddr(struct socket *so, struct sockaddr **addr) return ECONNRESET; } SCTP_INP_RLOCK(inp); + struct sctp_ifa *sctp_ifa; + sin->sin_port = inp->sctp_lport; if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) { if (inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED) { @@ -4251,8 +3806,16 @@ sctp_ingetaddr(struct socket *so, struct sockaddr **addr) SCTP_TCB_UNLOCK(stcb); goto notConn; } - sin->sin_addr = sctp_ipv4_source_address_selection(inp, - stcb, (struct route *)&net->ro, net, 0); + vrf_id = SCTP_DEFAULT_VRFID; + + sctp_ifa = sctp_source_address_selection(inp, + stcb, + (struct route *)&net->ro, + net, 0, vrf_id); + if (sctp_ifa) { + sin->sin_addr = sctp_ifa->address.sin.sin_addr; + sctp_free_ifa(sctp_ifa); + } SCTP_TCB_UNLOCK(stcb); } else { /* For the bound all case you get back 0 */ @@ -4266,10 +3829,10 @@ sctp_ingetaddr(struct socket *so, struct sockaddr **addr) int fnd = 0; LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) { - if (laddr->ifa->ifa_addr->sa_family == AF_INET) { + if (laddr->ifa->address.sa.sa_family == AF_INET) { struct sockaddr_in *sin_a; - sin_a = (struct sockaddr_in *)laddr->ifa->ifa_addr; + sin_a = (struct sockaddr_in *)&laddr->ifa->address.sa; sin->sin_addr = sin_a->sin_addr; fnd = 1; break; @@ -4290,14 +3853,12 @@ int sctp_peeraddr(struct socket *so, struct sockaddr **addr) { struct sockaddr_in *sin = (struct sockaddr_in *)*addr; - int fnd; struct sockaddr_in *sin_a; struct sctp_inpcb *inp; struct sctp_tcb *stcb; struct sctp_nets *net; - /* Do the malloc first in case it blocks. */ inp = (struct sctp_inpcb *)so->so_pcb; if ((inp == NULL) || diff --git a/sys/netinet/sctp_var.h b/sys/netinet/sctp_var.h index 5463673..4149ad9 100644 --- a/sys/netinet/sctp_var.h +++ b/sys/netinet/sctp_var.h @@ -36,191 +36,10 @@ __FBSDID("$FreeBSD$"); #ifndef _NETINET_SCTP_VAR_H_ #define _NETINET_SCTP_VAR_H_ - #include <netinet/sctp_uio.h> -/* SCTP Kernel structures */ - -/* - * Names for SCTP sysctl objects - */ -#define SCTPCTL_MAXDGRAM 1 /* max datagram size */ -#define SCTPCTL_RECVSPACE 2 /* default receive buffer space */ -#define SCTPCTL_AUTOASCONF 3 /* auto asconf enable/disable flag */ -#define SCTPCTL_ECN_ENABLE 4 /* Is ecn allowed */ -#define SCTPCTL_ECN_NONCE 5 /* Is ecn nonce allowed */ -#define SCTPCTL_STRICT_SACK 6 /* strictly require sack'd TSN's to be - * smaller than sndnxt. */ -#define SCTPCTL_NOCSUM_LO 7 /* Require that the Loopback NOT have - * the crc32 checksum on packets - * routed over it. */ -#define SCTPCTL_STRICT_INIT 8 -#define SCTPCTL_PEER_CHK_OH 9 -#define SCTPCTL_MAXBURST 10 -#define SCTPCTL_MAXCHUNKONQ 11 -#define SCTPCTL_DELAYED_SACK 12 -#define SCTPCTL_HB_INTERVAL 13 -#define SCTPCTL_PMTU_RAISE 14 -#define SCTPCTL_SHUTDOWN_GUARD 15 -#define SCTPCTL_SECRET_LIFETIME 16 -#define SCTPCTL_RTO_MAX 17 -#define SCTPCTL_RTO_MIN 18 -#define SCTPCTL_RTO_INITIAL 19 -#define SCTPCTL_INIT_RTO_MAX 20 -#define SCTPCTL_COOKIE_LIFE 21 -#define SCTPCTL_INIT_RTX_MAX 22 -#define SCTPCTL_ASSOC_RTX_MAX 23 -#define SCTPCTL_PATH_RTX_MAX 24 -#define SCTPCTL_NR_OUTGOING_STREAMS 25 -#define SCTPCTL_CMT_ON_OFF 26 -#define SCTPCTL_CWND_MAXBURST 27 -#define SCTPCTL_EARLY_FR 28 -#define SCTPCTL_RTTVAR_CC 29 -#define SCTPCTL_DEADLOCK_DET 30 -#define SCTPCTL_EARLY_FR_MSEC 31 -#define SCTPCTL_ASCONF_AUTH_NOCHK 32 -#define SCTPCTL_AUTH_DISABLE 33 -#define SCTPCTL_AUTH_RANDOM_LEN 34 -#define SCTPCTL_AUTH_HMAC_ID 35 -#define SCTPCTL_ABC_L_VAR 36 -#define SCTPCTL_MAX_MBUF_CHAIN 37 -#define SCTPCTL_CMT_USE_DAC 38 -#define SCTPCTL_DO_DRAIN 39 -#define SCTPCTL_WARM_CRC32 40 -#define SCTPCTL_QLIMIT_ABORT 41 -#define SCTPCTL_STRICT_ORDER 42 -#define SCTPCTL_TCBHASHSIZE 43 -#define SCTPCTL_PCBHASHSIZE 44 -#define SCTPCTL_CHUNKSCALE 45 -#define SCTPCTL_MINSPLIT 46 -#define SCTPCTL_ADD_MORE 47 -#define SCTPCTL_SYS_RESC 48 -#define SCTPCTL_ASOC_RESC 49 -#define SCTPCTL_NAT_FRIENDLY 50 -#ifdef SCTP_DEBUG -#define SCTPCTL_DEBUG 51 -#define SCTPCTL_MAXID 51 -#else -#define SCTPCTL_MAXID 50 -#endif - -#ifdef SCTP_DEBUG -#define SCTPCTL_NAMES { \ - { 0, 0 }, \ - { "sendspace", CTLTYPE_INT }, \ - { "recvspace", CTLTYPE_INT }, \ - { "autoasconf", CTLTYPE_INT }, \ - { "ecn_enable", CTLTYPE_INT }, \ - { "ecn_nonce", CTLTYPE_INT }, \ - { "strict_sack", CTLTYPE_INT }, \ - { "looback_nocsum", CTLTYPE_INT }, \ - { "strict_init", CTLTYPE_INT }, \ - { "peer_chkoh", CTLTYPE_INT }, \ - { "maxburst", CTLTYPE_INT }, \ - { "maxchunks", CTLTYPE_INT }, \ - { "delayed_sack_time", CTLTYPE_INT }, \ - { "heartbeat_interval", CTLTYPE_INT }, \ - { "pmtu_raise_time", CTLTYPE_INT }, \ - { "shutdown_guard_time", CTLTYPE_INT }, \ - { "secret_lifetime", CTLTYPE_INT }, \ - { "rto_max", CTLTYPE_INT }, \ - { "rto_min", CTLTYPE_INT }, \ - { "rto_initial", CTLTYPE_INT }, \ - { "init_rto_max", CTLTYPE_INT }, \ - { "valid_cookie_life", CTLTYPE_INT }, \ - { "init_rtx_max", CTLTYPE_INT }, \ - { "assoc_rtx_max", CTLTYPE_INT }, \ - { "path_rtx_max", CTLTYPE_INT }, \ - { "nr_outgoing_streams", CTLTYPE_INT }, \ - { "cmt_on_off", CTLTYPE_INT }, \ - { "cwnd_maxburst", CTLTYPE_INT }, \ - { "early_fast_retran", CTLTYPE_INT }, \ - { "use_rttvar_congctrl", CTLTYPE_INT }, \ - { "deadlock_detect", CTLTYPE_INT }, \ - { "early_fast_retran_msec", CTLTYPE_INT }, \ - { "asconf_auth_nochk", CTLTYPE_INT }, \ - { "auth_disable", CTLTYPE_INT }, \ - { "auth_random_len", CTLTYPE_INT }, \ - { "auth_hmac_id", CTLTYPE_INT }, \ - { "abc_l_var", CTLTYPE_INT }, \ - { "max_mbuf_chain", CTLTYPE_INT }, \ - { "cmt_use_dac", CTLTYPE_INT }, \ - { "do_sctp_drain", CTLTYPE_INT }, \ - { "warm_crc_table", CTLTYPE_INT }, \ - { "abort_at_limit", CTLTYPE_INT }, \ - { "strict_data_order", CTLTYPE_INT }, \ - { "tcbhashsize", CTLTYPE_INT }, \ - { "pcbhashsize", CTLTYPE_INT }, \ - { "chunkscale", CTLTYPE_INT }, \ - { "min_split_point", CTLTYPE_INT }, \ - { "add_more_on_output", CTLTYPE_INT }, \ - { "sys_resource", CTLTYPE_INT }, \ - { "asoc_resource", CTLTYPE_INT }, \ - { "nat_friendly", CTLTYPE_INT }, \ - { "debug", CTLTYPE_INT }, \ -} -#else -#define SCTPCTL_NAMES { \ - { 0, 0 }, \ - { "sendspace", CTLTYPE_INT }, \ - { "recvspace", CTLTYPE_INT }, \ - { "autoasconf", CTLTYPE_INT }, \ - { "ecn_enable", CTLTYPE_INT }, \ - { "ecn_nonce", CTLTYPE_INT }, \ - { "strict_sack", CTLTYPE_INT }, \ - { "looback_nocsum", CTLTYPE_INT }, \ - { "strict_init", CTLTYPE_INT }, \ - { "peer_chkoh", CTLTYPE_INT }, \ - { "maxburst", CTLTYPE_INT }, \ - { "maxchunks", CTLTYPE_INT }, \ - { "delayed_sack_time", CTLTYPE_INT }, \ - { "heartbeat_interval", CTLTYPE_INT }, \ - { "pmtu_raise_time", CTLTYPE_INT }, \ - { "shutdown_guard_time", CTLTYPE_INT }, \ - { "secret_lifetime", CTLTYPE_INT }, \ - { "rto_max", CTLTYPE_INT }, \ - { "rto_min", CTLTYPE_INT }, \ - { "rto_initial", CTLTYPE_INT }, \ - { "init_rto_max", CTLTYPE_INT }, \ - { "valid_cookie_life", CTLTYPE_INT }, \ - { "init_rtx_max", CTLTYPE_INT }, \ - { "assoc_rtx_max", CTLTYPE_INT }, \ - { "path_rtx_max", CTLTYPE_INT }, \ - { "nr_outgoing_streams", CTLTYPE_INT }, \ - { "cmt_on_off", CTLTYPE_INT }, \ - { "cwnd_maxburst", CTLTYPE_INT }, \ - { "early_fast_retran", CTLTYPE_INT }, \ - { "use_rttvar_congctrl", CTLTYPE_INT }, \ - { "deadlock_detect", CTLTYPE_INT }, \ - { "early_fast_retran_msec", CTLTYPE_INT }, \ - { "asconf_auth_nochk", CTLTYPE_INT }, \ - { "auth_disable", CTLTYPE_INT }, \ - { "auth_random_len", CTLTYPE_INT }, \ - { "auth_hmac_id", CTLTYPE_INT }, \ - { "abc_l_var", CTLTYPE_INT }, \ - { "max_mbuf_chain", CTLTYPE_INT }, \ - { "cmt_use_dac", CTLTYPE_INT }, \ - { "do_sctp_drain", CTLTYPE_INT }, \ - { "warm_crc_table", CTLTYPE_INT }, \ - { "abort_at_limit", CTLTYPE_INT }, \ - { "strict_data_order", CTLTYPE_INT }, \ - { "tcbhashsize", CTLTYPE_INT }, \ - { "pcbhashsize", CTLTYPE_INT }, \ - { "chunkscale", CTLTYPE_INT }, \ - { "min_split_point", CTLTYPE_INT }, \ - { "add_more_on_output", CTLTYPE_INT }, \ - { "sys_resource", CTLTYPE_INT }, \ - { "asoc_resource", CTLTYPE_INT }, \ - { "nat_friendly", CTLTYPE_INT }, \ -} -#endif - - #if defined(_KERNEL) -#ifdef SYSCTL_DECL -SYSCTL_DECL(_net_inet_sctp); -#endif extern struct pr_usrreqs sctp_usrreqs; @@ -235,18 +54,14 @@ extern struct pr_usrreqs sctp_usrreqs; #define sctp_sbspace_sub(a,b) ((a > b) ? (a - b) : 0) -extern uint32_t sctp_asoc_free_resc_limit; -extern uint32_t sctp_system_free_resc_limit; - -/* I tried to cache the readq entries at one - * point. But the reality is that it did not - * add any performance since this meant - * we had to lock the STCB on read. And at that point - * once you have to do an extra lock, it really does - * not matter if the lock is in the ZONE stuff or - * in our code. Note that this same problem would - * occur with an mbuf cache as well so it is - * not really worth doing, at least right now :-D +/* + * I tried to cache the readq entries at one point. But the reality + * is that it did not add any performance since this meant we had to + * lock the STCB on read. And at that point once you have to do an + * extra lock, it really does not matter if the lock is in the ZONE + * stuff or in our code. Note that this same problem would occur with + * an mbuf cache as well so it is not really worth doing, at least + * right now :-D */ #define sctp_free_a_readq(_stcb, _readq) { \ @@ -317,11 +132,20 @@ extern uint32_t sctp_system_free_resc_limit; #define sctp_free_remote_addr(__net) { \ - if ((__net)) { \ + if ((__net)) { \ if (atomic_fetchadd_int(&(__net)->ref_count, -1) == 1) { \ SCTP_OS_TIMER_STOP(&(__net)->rxt_timer.timer); \ SCTP_OS_TIMER_STOP(&(__net)->pmtu_timer.timer); \ SCTP_OS_TIMER_STOP(&(__net)->fr_timer.timer); \ + if ((__net)->ro.ro_rt) { \ + RTFREE((__net)->ro.ro_rt); \ + (__net)->ro.ro_rt = NULL; \ + } \ + if ((__net)->src_addr_selected) { \ + sctp_free_ifa((__net)->ro._s_addr); \ + (__net)->ro._s_addr = NULL; \ + } \ + (__net)->src_addr_selected = 0; \ (__net)->dest_state = SCTP_ADDR_NOT_REACHABLE; \ SCTP_ZONE_FREE(sctppcbinfo.ipi_zone_net, (__net)); \ SCTP_DECR_RADDR_COUNT(); \ @@ -408,19 +232,6 @@ extern uint32_t sctp_system_free_resc_limit; } while (0) -/* - * some sysctls - */ -extern int sctp_sendspace; -extern int sctp_recvspace; -extern int sctp_ecn_enable; -extern int sctp_ecn_nonce; -extern int sctp_use_cwnd_based_maxburst; -extern unsigned int sctp_cmt_on_off; -extern unsigned int sctp_cmt_use_dac; -extern unsigned int sctp_cmt_sockopt_on_off; -extern uint32_t sctp_nat_friendly; - struct sctp_nets; struct sctp_inpcb; struct sctp_tcb; @@ -429,7 +240,6 @@ struct sctphdr; void sctp_ctlinput __P((int, struct sockaddr *, void *)); int sctp_ctloutput __P((struct socket *, struct sockopt *)); void sctp_input __P((struct mbuf *, int)); - void sctp_drain __P((void)); void sctp_init __P((void)); @@ -468,7 +278,6 @@ __P((struct sctp_inpcb *, int, struct sctphdr *, int sctp_accept(struct socket *, struct sockaddr **); - #endif /* _KERNEL */ #endif /* !_NETINET_SCTP_VAR_H_ */ diff --git a/sys/netinet/sctputil.c b/sys/netinet/sctputil.c index 8372ba6..e3c240c 100644 --- a/sys/netinet/sctputil.c +++ b/sys/netinet/sctputil.c @@ -37,6 +37,7 @@ __FBSDID("$FreeBSD$"); #include <netinet/sctp_pcb.h> #include <netinet/sctputil.h> #include <netinet/sctp_var.h> +#include <netinet/sctp_sysctl.h> #ifdef INET6 #include <netinet6/sctp6_var.h> #endif @@ -48,16 +49,10 @@ __FBSDID("$FreeBSD$"); #include <netinet/sctp_indata.h>/* for sctp_deliver_data() */ #include <netinet/sctp_auth.h> #include <netinet/sctp_asconf.h> - -extern int sctp_warm_the_crc32_table; +#include <netinet/sctp_bsd_addr.h> #define NUMBER_OF_MTU_SIZES 18 -#ifdef SCTP_DEBUG -extern uint32_t sctp_debug_on; - -#endif - #ifdef SCTP_STAT_LOGGING int global_sctp_cwnd_log_at = 0; @@ -489,22 +484,22 @@ sctp_log_block(uint8_t from, struct socket *so, struct sctp_association *asoc, i } int -sctp_fill_stat_log(struct mbuf *m) +sctp_fill_stat_log(void *optval, size_t *optsize) { int sctp_cwnd_log_at; struct sctp_cwnd_log_req *req; size_t size_limit; int num, i, at, cnt_out = 0; - if (m == NULL) + if (*optsize < sizeof(struct sctp_cwnd_log_req)) { return (EINVAL); - - size_limit = (SCTP_BUF_LEN(m) - sizeof(struct sctp_cwnd_log_req)); + } + size_limit = (*optsize - sizeof(struct sctp_cwnd_log_req)); if (size_limit < sizeof(struct sctp_cwnd_log)) { return (EINVAL); } sctp_cwnd_log_at = global_sctp_cwnd_log_at; - req = mtod(m, struct sctp_cwnd_log_req *); + req = (struct sctp_cwnd_log_req *)optval; num = size_limit / sizeof(struct sctp_cwnd_log); if (global_sctp_cwnd_log_rolled) { req->num_in_log = SCTP_STAT_LOG_SIZE; @@ -569,7 +564,7 @@ sctp_fill_stat_log(struct mbuf *m) if (at >= SCTP_STAT_LOG_SIZE) at = 0; } - SCTP_BUF_LEN(m) = (cnt_out * sizeof(struct sctp_cwnd_log)) + sizeof(struct sctp_cwnd_log_req); + *optsize = (cnt_out * sizeof(struct sctp_cwnd_log)) + sizeof(struct sctp_cwnd_log_req); return (0); } @@ -909,7 +904,7 @@ sctp_select_a_tag(struct sctp_inpcb *m) int sctp_init_asoc(struct sctp_inpcb *m, struct sctp_association *asoc, - int for_a_init, uint32_t override_tag) + int for_a_init, uint32_t override_tag, uint32_t vrf_id) { /* * Anything set to zero is taken care of by the allocation routine's @@ -930,13 +925,13 @@ sctp_init_asoc(struct sctp_inpcb *m, struct sctp_association *asoc, asoc->heart_beat_delay = TICKS_TO_MSEC(m->sctp_ep.sctp_timeoutticks[SCTP_TIMER_HEARTBEAT]); asoc->cookie_life = m->sctp_ep.def_cookie_life; asoc->sctp_cmt_on_off = (uint8_t) sctp_cmt_on_off; -#ifdef AF_INET +#ifdef INET asoc->default_tos = m->ip_inp.inp.inp_ip_tos; #else asoc->default_tos = 0; #endif -#ifdef AF_INET6 +#ifdef INET6 asoc->default_flowlabel = ((struct in6pcb *)m)->in6p_flowinfo; #else asoc->default_flowlabel = 0; @@ -957,6 +952,7 @@ sctp_init_asoc(struct sctp_inpcb *m, struct sctp_association *asoc, /* Get the nonce tags */ asoc->my_vtag_nonce = sctp_select_a_tag(m); asoc->peer_vtag_nonce = sctp_select_a_tag(m); + asoc->vrf_id = vrf_id; if (sctp_is_feature_on(m, SCTP_PCB_FLAGS_DONOT_HEARTBEAT)) asoc->hb_is_disabled = 1; @@ -1005,6 +1001,7 @@ sctp_init_asoc(struct sctp_inpcb *m, struct sctp_association *asoc, asoc->nonce_wait_for_ecne = 0; asoc->nonce_wait_tsn = 0; asoc->delayed_ack = TICKS_TO_MSEC(m->sctp_ep.sctp_timeoutticks[SCTP_TIMER_RECV]); + asoc->sack_freq = m->sctp_ep.sctp_sack_freq; asoc->pr_sctp_cnt = 0; asoc->total_output_queue_size = 0; @@ -1039,7 +1036,10 @@ sctp_init_asoc(struct sctp_inpcb *m, struct sctp_association *asoc, asoc->ecn_echo_cnt_onq = 0; asoc->stream_locked = 0; - LIST_INIT(&asoc->sctp_local_addr_list); + asoc->send_sack = 1; + + LIST_INIT(&asoc->sctp_restricted_addrs); + TAILQ_INIT(&asoc->nets); TAILQ_INIT(&asoc->pending_reply_queue); asoc->last_asconf_ack_sent = NULL; @@ -1141,36 +1141,207 @@ sctp_expand_mapping_array(struct sctp_association *asoc) return (0); } -extern unsigned int sctp_early_fr_msec; +#if defined(SCTP_USE_THREAD_BASED_ITERATOR) +static void +sctp_iterator_work(struct sctp_iterator *it) +{ + int iteration_count = 0; + int inp_skip = 0; + + SCTP_ITERATOR_LOCK(); + if (it->inp) + SCTP_INP_DECR_REF(it->inp); + + if (it->inp == NULL) { + /* iterator is complete */ +done_with_iterator: + SCTP_ITERATOR_UNLOCK(); + if (it->function_atend != NULL) { + (*it->function_atend) (it->pointer, it->val); + } + SCTP_FREE(it); + return; + } +select_a_new_ep: + SCTP_INP_WLOCK(it->inp); + while (((it->pcb_flags) && + ((it->inp->sctp_flags & it->pcb_flags) != it->pcb_flags)) || + ((it->pcb_features) && + ((it->inp->sctp_features & it->pcb_features) != it->pcb_features))) { + /* endpoint flags or features don't match, so keep looking */ + if (it->iterator_flags & SCTP_ITERATOR_DO_SINGLE_INP) { + SCTP_INP_WUNLOCK(it->inp); + goto done_with_iterator; + } + SCTP_INP_WUNLOCK(it->inp); + it->inp = LIST_NEXT(it->inp, sctp_list); + if (it->inp == NULL) { + goto done_with_iterator; + } + SCTP_INP_WLOCK(it->inp); + } + + /* mark the current iterator on the endpoint */ + it->inp->inp_starting_point_for_iterator = it; + SCTP_INP_WUNLOCK(it->inp); + SCTP_INP_RLOCK(it->inp); + + /* now go through each assoc which is in the desired state */ + if (it->done_current_ep == 0) { + if (it->function_inp != NULL) + inp_skip = (*it->function_inp) (it->inp, it->pointer, it->val); + it->done_current_ep = 1; + } + if (it->stcb == NULL) { + /* run the per instance function */ + it->stcb = LIST_FIRST(&it->inp->sctp_asoc_list); + } + if ((inp_skip) || it->stcb == NULL) { + if (it->function_inp_end != NULL) { + inp_skip = (*it->function_inp_end) (it->inp, + it->pointer, + it->val); + } + SCTP_INP_RUNLOCK(it->inp); + goto no_stcb; + } + if ((it->stcb) && + (it->stcb->asoc.stcb_starting_point_for_iterator == it)) { + it->stcb->asoc.stcb_starting_point_for_iterator = NULL; + } + while (it->stcb) { + SCTP_TCB_LOCK(it->stcb); + if (it->asoc_state && ((it->stcb->asoc.state & it->asoc_state) != it->asoc_state)) { + /* not in the right state... keep looking */ + SCTP_TCB_UNLOCK(it->stcb); + goto next_assoc; + } + /* mark the current iterator on the assoc */ + it->stcb->asoc.stcb_starting_point_for_iterator = it; + /* see if we have limited out the iterator loop */ + iteration_count++; + if (iteration_count > SCTP_ITERATOR_MAX_AT_ONCE) { + /* Pause to let others grab the lock */ + atomic_add_int(&it->stcb->asoc.refcnt, 1); + SCTP_TCB_UNLOCK(it->stcb); + SCTP_INP_RUNLOCK(it->inp); + SCTP_ITERATOR_UNLOCK(); + SCTP_ITERATOR_LOCK(); + SCTP_INP_RLOCK(it->inp); + SCTP_TCB_LOCK(it->stcb); + atomic_add_int(&it->stcb->asoc.refcnt, -1); + iteration_count = 0; + } + /* run function on this one */ + (*it->function_assoc) (it->inp, it->stcb, it->pointer, it->val); + + /* + * we lie here, it really needs to have its own type but + * first I must verify that this won't effect things :-0 + */ + if (it->no_chunk_output == 0) + sctp_chunk_output(it->inp, it->stcb, SCTP_OUTPUT_FROM_T3); + + SCTP_TCB_UNLOCK(it->stcb); +next_assoc: + it->stcb = LIST_NEXT(it->stcb, sctp_tcblist); + if (it->stcb == NULL) { + /* Run last function */ + if (it->function_inp_end != NULL) { + inp_skip = (*it->function_inp_end) (it->inp, + it->pointer, + it->val); + } + } + } + SCTP_INP_RUNLOCK(it->inp); +no_stcb: + /* done with all assocs on this endpoint, move on to next endpoint */ + it->done_current_ep = 0; + SCTP_INP_WLOCK(it->inp); + it->inp->inp_starting_point_for_iterator = NULL; + SCTP_INP_WUNLOCK(it->inp); + if (it->iterator_flags & SCTP_ITERATOR_DO_SINGLE_INP) { + it->inp = NULL; + } else { + SCTP_INP_INFO_RLOCK(); + it->inp = LIST_NEXT(it->inp, sctp_list); + SCTP_INP_INFO_RUNLOCK(); + } + if (it->inp == NULL) { + goto done_with_iterator; + } + goto select_a_new_ep; +} + +void +sctp_iterator_worker(void) +{ + struct sctp_iterator *it = NULL; + + /* This function is called with the WQ lock in place */ + + sctppcbinfo.iterator_running = 1; +again: + it = TAILQ_FIRST(&sctppcbinfo.iteratorhead); + while (it) { + /* now lets work on this one */ + TAILQ_REMOVE(&sctppcbinfo.iteratorhead, it, sctp_nxt_itr); + SCTP_IPI_ITERATOR_WQ_UNLOCK(); + sctp_iterator_work(it); + SCTP_IPI_ITERATOR_WQ_LOCK(); + it = TAILQ_FIRST(&sctppcbinfo.iteratorhead); + } + if (TAILQ_FIRST(&sctppcbinfo.iteratorhead)) { + goto again; + } + sctppcbinfo.iterator_running = 0; + return; +} + +#endif + static void sctp_handle_addr_wq(void) { /* deal with the ADDR wq from the rtsock calls */ struct sctp_laddr *wi; + struct sctp_asconf_iterator *asc; - SCTP_IPI_ADDR_LOCK(); - wi = LIST_FIRST(&sctppcbinfo.addr_wq); - if (wi == NULL) { - SCTP_IPI_ADDR_UNLOCK(); - return; - } - LIST_REMOVE(wi, sctp_nxt_addr); - if (!SCTP_LIST_EMPTY(&sctppcbinfo.addr_wq)) { + SCTP_MALLOC(asc, struct sctp_asconf_iterator *, + sizeof(struct sctp_asconf_iterator), "SCTP_ASCONF_ITERATOR"); + if (asc == NULL) { + /* Try later, no memory */ sctp_timer_start(SCTP_TIMER_TYPE_ADDR_WQ, (struct sctp_inpcb *)NULL, (struct sctp_tcb *)NULL, (struct sctp_nets *)NULL); + return; } - SCTP_IPI_ADDR_UNLOCK(); - if (wi->action == RTM_ADD) { - sctp_add_ip_address(wi->ifa); - } else if (wi->action == RTM_DELETE) { - sctp_delete_ip_address(wi->ifa); + LIST_INIT(&asc->list_of_work); + asc->cnt = 0; + SCTP_IPI_ITERATOR_WQ_LOCK(); + wi = LIST_FIRST(&sctppcbinfo.addr_wq); + while (wi != NULL) { + LIST_REMOVE(wi, sctp_nxt_addr); + LIST_INSERT_HEAD(&asc->list_of_work, wi, sctp_nxt_addr); + asc->cnt++; + wi = LIST_FIRST(&sctppcbinfo.addr_wq); + } + SCTP_IPI_ITERATOR_WQ_UNLOCK(); + if (asc->cnt == 0) { + SCTP_FREE(asc); + } else { + sctp_initiate_iterator(sctp_iterator_ep, + sctp_iterator_stcb, + NULL, /* No ep end for boundall */ + SCTP_PCB_FLAGS_BOUNDALL, + SCTP_PCB_ANY_FEATURES, + SCTP_ASOC_ANY_STATE, (void *)asc, 0, + sctp_iterator_end, NULL, 0); } - IFAFREE(wi->ifa); - SCTP_ZONE_FREE(sctppcbinfo.ipi_zone_laddr, wi); - SCTP_DECR_LADDR_COUNT(); + } void @@ -1564,7 +1735,7 @@ sctp_timer_start(int t_type, struct sctp_inpcb *inp, struct sctp_tcb *stcb, case SCTP_TIMER_TYPE_ADDR_WQ: /* Only 1 tick away :-) */ tmr = &sctppcbinfo.addr_wq_timer; - to_ticks = 1; + to_ticks = SCTP_ADDRESS_TICK_DELAY; break; case SCTP_TIMER_TYPE_ITERATOR: { @@ -1658,14 +1829,11 @@ sctp_timer_start(int t_type, struct sctp_inpcb *inp, struct sctp_tcb *stcb, rndval = sctp_select_initial_TSN(&inp->sctp_ep); memcpy(stcb->asoc.hb_random_values, &rndval, sizeof(stcb->asoc.hb_random_values)); - this_random = stcb->asoc.hb_random_values[0]; stcb->asoc.hb_random_idx = 0; - stcb->asoc.hb_ect_randombit = 0; - } else { - this_random = stcb->asoc.hb_random_values[stcb->asoc.hb_random_idx]; - stcb->asoc.hb_random_idx++; - stcb->asoc.hb_ect_randombit = 0; } + this_random = stcb->asoc.hb_random_values[stcb->asoc.hb_random_idx]; + stcb->asoc.hb_random_idx++; + stcb->asoc.hb_ect_randombit = 0; /* * this_random will be 0 - 256 ms RTO is in ms. */ @@ -2631,10 +2799,15 @@ sctp_notify_peer_addr_change(struct sctp_tcb *stcb, uint32_t state, memcpy(&spc->spc_aaddr, sa, sizeof(struct sockaddr_in6)); - /* recover scope_id for user */ sin6 = (struct sockaddr_in6 *)&spc->spc_aaddr; if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) { - (void)sa6_recoverscope(sin6); + if (sin6->sin6_scope_id == 0) { + /* recover scope_id for user */ + (void)sa6_recoverscope(sin6); + } else { + /* clear embedded scope_id for user */ + in6_clearscope(&sin6->sin6_addr); + } } } spc->spc_state = state; @@ -3719,8 +3892,12 @@ sctp_add_to_readq(struct sctp_inpcb *inp, return; } SCTP_INP_READ_LOCK(inp); - atomic_add_int(&inp->total_recvs, 1); - atomic_add_int(&stcb->total_recvs, 1); + if (!(control->spec_flags & M_NOTIFICATION)) { + atomic_add_int(&inp->total_recvs, 1); + if (!control->do_not_ref_stcb) { + atomic_add_int(&stcb->total_recvs, 1); + } + } m = control->data; control->held_length = 0; control->length = 0; @@ -4019,57 +4196,105 @@ sctp_release_pr_sctp_chunk(struct sctp_tcb *stcb, struct sctp_tmit_chunk *tp1, * and doesn't handle multiple addresses with different zone/scope id's note: * ifa_ifwithaddr() compares the entire sockaddr struct */ -struct ifaddr * -sctp_find_ifa_by_addr(struct sockaddr *sa) +struct sctp_ifa * +sctp_find_ifa_in_ep(struct sctp_inpcb *inp, struct sockaddr *addr, int holds_lock) { - struct ifnet *ifn; - struct ifaddr *ifa; + struct sctp_laddr *laddr; - /* go through all our known interfaces */ - TAILQ_FOREACH(ifn, &ifnet, if_list) { - /* go through each interface addresses */ - TAILQ_FOREACH(ifa, &ifn->if_addrlist, ifa_list) { - /* correct family? */ - if (ifa->ifa_addr->sa_family != sa->sa_family) - continue; + if (holds_lock == 0) + SCTP_INP_RLOCK(inp); -#ifdef INET6 - if (ifa->ifa_addr->sa_family == AF_INET6) { - /* IPv6 address */ - struct sockaddr_in6 *sin1, *sin2, sin6_tmp; - - sin1 = (struct sockaddr_in6 *)ifa->ifa_addr; - if (IN6_IS_SCOPE_LINKLOCAL(&sin1->sin6_addr)) { - /* create a copy and clear scope */ - memcpy(&sin6_tmp, sin1, - sizeof(struct sockaddr_in6)); - sin1 = &sin6_tmp; - in6_clearscope(&sin1->sin6_addr); - } - sin2 = (struct sockaddr_in6 *)sa; - if (memcmp(&sin1->sin6_addr, &sin2->sin6_addr, - sizeof(struct in6_addr)) == 0) { - /* found it */ - return (ifa); - } - } else -#endif - if (ifa->ifa_addr->sa_family == AF_INET) { - /* IPv4 address */ - struct sockaddr_in *sin1, *sin2; - - sin1 = (struct sockaddr_in *)ifa->ifa_addr; - sin2 = (struct sockaddr_in *)sa; - if (sin1->sin_addr.s_addr == - sin2->sin_addr.s_addr) { - /* found it */ - return (ifa); - } + LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) { + if (laddr->ifa == NULL) + continue; + if (addr->sa_family != laddr->ifa->address.sa.sa_family) + continue; + if (addr->sa_family == AF_INET) { + if (((struct sockaddr_in *)addr)->sin_addr.s_addr == + laddr->ifa->address.sin.sin_addr.s_addr) { + /* found him. */ + if (holds_lock == 0) + SCTP_INP_RUNLOCK(inp); + return (laddr->ifa); + break; + } + } else if (addr->sa_family == AF_INET6) { + if (SCTP6_ARE_ADDR_EQUAL(&((struct sockaddr_in6 *)addr)->sin6_addr, + &laddr->ifa->address.sin6.sin6_addr)) { + /* found him. */ + if (holds_lock == 0) + SCTP_INP_RUNLOCK(inp); + return (laddr->ifa); + break; + } + } + } + if (holds_lock == 0) + SCTP_INP_RUNLOCK(inp); + return (NULL); +} + +struct sctp_ifa * +sctp_find_ifa_in_ifn(struct sctp_ifn *sctp_ifnp, struct sockaddr *addr, + int holds_lock) +{ + struct sctp_ifa *sctp_ifap; + + if (holds_lock == 0) + SCTP_IPI_ADDR_LOCK(); + + LIST_FOREACH(sctp_ifap, &sctp_ifnp->ifalist, next_ifa) { + if (addr->sa_family != sctp_ifap->address.sa.sa_family) + continue; + if (addr->sa_family == AF_INET) { + if (((struct sockaddr_in *)addr)->sin_addr.s_addr == + sctp_ifap->address.sin.sin_addr.s_addr) { + /* found him. */ + if (holds_lock == 0) + SCTP_IPI_ADDR_UNLOCK(); + return (sctp_ifap); + break; + } + } else if (addr->sa_family == AF_INET6) { + if (SCTP6_ARE_ADDR_EQUAL(&((struct sockaddr_in6 *)addr)->sin6_addr, + &sctp_ifap->address.sin6.sin6_addr)) { + /* found him. */ + if (holds_lock == 0) + SCTP_IPI_ADDR_UNLOCK(); + return (sctp_ifap); + break; } - /* else, not AF_INET or AF_INET6, so skip */ - } /* end foreach ifa */ - } /* end foreach ifn */ - /* not found! */ + } + } + if (holds_lock == 0) + SCTP_IPI_ADDR_UNLOCK(); + return (NULL); +} + +struct sctp_ifa * +sctp_find_ifa_by_addr(struct sockaddr *addr, uint32_t vrf_id, int holds_lock) +{ + struct sctp_ifa *sctp_ifap; + struct sctp_ifn *sctp_ifnp = NULL; + struct sctp_vrf *vrf; + + vrf = sctp_find_vrf(vrf_id); + if (vrf == NULL) + return (NULL); + + if (holds_lock == 0) + SCTP_IPI_ADDR_LOCK(); + + LIST_FOREACH(sctp_ifnp, &vrf->ifnlist, next_ifn) { + sctp_ifap = sctp_find_ifa_in_ifn(sctp_ifnp, addr, 1); + if (sctp_ifap) { + if (holds_lock == 0) + SCTP_IPI_ADDR_UNLOCK(); + return (sctp_ifap); + } + } + if (holds_lock == 0) + SCTP_IPI_ADDR_UNLOCK(); return (NULL); } @@ -4208,7 +4433,8 @@ sctp_sorecvmsg(struct socket *so, uint32_t rwnd_req = 0; int hold_sblock = 0; int hold_rlock = 0; - int alen = 0, slen = 0; + int alen = 0; + int slen = 0; int held_length = 0; if (msg_flags) { @@ -4228,7 +4454,7 @@ sctp_sorecvmsg(struct socket *so, if ((in_flags & (MSG_DONTWAIT | MSG_NBIO )) || - (so->so_state & SS_NBIO)) { + SCTP_SO_IS_NBIO(so)) { block_allowed = 0; } /* setup the endpoint */ @@ -4573,7 +4799,7 @@ found_one: if (fromlen && from) { struct sockaddr *to; -#ifdef AF_INET +#ifdef INET cp_len = min(fromlen, control->whoFrom->ro._l_addr.sin.sin_len); memcpy(from, &control->whoFrom->ro._l_addr, cp_len); ((struct sockaddr_in *)from)->sin_port = control->port_from; @@ -4585,7 +4811,7 @@ found_one: #endif to = from; -#if defined(AF_INET) && defined(AF_INET6) +#if defined(INET) && defined(INET6) if ((inp->sctp_flags & SCTP_PCB_FLAGS_NEEDS_MAPPED_V4) && (to->sa_family == AF_INET) && ((size_t)fromlen >= sizeof(struct sockaddr_in6))) { @@ -4604,7 +4830,7 @@ found_one: memcpy(from, (caddr_t)&sin6, sizeof(sin6)); } #endif -#if defined(AF_INET6) +#if defined(INET6) { struct sockaddr_in6 lsa6, *to6; @@ -5253,6 +5479,52 @@ sctp_m_freem(struct mbuf *mb) #endif +int +sctp_dynamic_set_primary(struct sockaddr *sa, uint32_t vrf_id) +{ + /* + * Given a local address. For all associations that holds the + * address, request a peer-set-primary. + */ + struct sctp_ifa *ifa; + struct sctp_laddr *wi; + + ifa = sctp_find_ifa_by_addr(sa, vrf_id, 0); + if (ifa == NULL) { + return (EADDRNOTAVAIL); + } + /* + * Now that we have the ifa we must awaken the iterator with this + * message. + */ + wi = SCTP_ZONE_GET(sctppcbinfo.ipi_zone_laddr, struct sctp_laddr); + if (wi == NULL) { + return (ENOMEM); + } + /* Now incr the count and int wi structure */ + SCTP_INCR_LADDR_COUNT(); + bzero(wi, sizeof(*wi)); + wi->ifa = ifa; + wi->action = SCTP_SET_PRIM_ADDR; + atomic_add_int(&ifa->refcount, 1); + + /* Now add it to the work queue */ + SCTP_IPI_ITERATOR_WQ_LOCK(); + /* + * Should this really be a tailq? As it is we will process the + * newest first :-0 + */ + LIST_INSERT_HEAD(&sctppcbinfo.addr_wq, wi, sctp_nxt_addr); + sctp_timer_start(SCTP_TIMER_TYPE_ADDR_WQ, + (struct sctp_inpcb *)NULL, + (struct sctp_tcb *)NULL, + (struct sctp_nets *)NULL); + SCTP_IPI_ITERATOR_WQ_UNLOCK(); + return (0); +} + + + int sctp_soreceive(so, psa, uio, mp0, controlp, flagsp) diff --git a/sys/netinet/sctputil.h b/sys/netinet/sctputil.h index 82cd432..5e1ef0d 100644 --- a/sys/netinet/sctputil.h +++ b/sys/netinet/sctputil.h @@ -56,13 +56,20 @@ void sctp_m_freem(struct mbuf *m); /* * Function prototypes */ -struct ifaddr *sctp_find_ifa_by_addr(struct sockaddr *sa); +struct sctp_ifa * + sctp_find_ifa_in_ep(struct sctp_inpcb *inp, struct sockaddr *addr, int hold_lock); +struct sctp_ifa * +sctp_find_ifa_in_ifn(struct sctp_ifn *sctp_ifnp, struct sockaddr *addr, + int holds_lock); + +struct sctp_ifa * + sctp_find_ifa_by_addr(struct sockaddr *addr, uint32_t vrf_id, int holds_lock); uint32_t sctp_select_initial_TSN(struct sctp_pcb *); uint32_t sctp_select_a_tag(struct sctp_inpcb *); -int sctp_init_asoc(struct sctp_inpcb *, struct sctp_association *, int, uint32_t); +int sctp_init_asoc(struct sctp_inpcb *, struct sctp_association *, int, uint32_t, uint32_t); void sctp_fill_random_store(struct sctp_pcb *); @@ -74,6 +81,9 @@ int sctp_timer_stop(int, struct sctp_inpcb *, struct sctp_tcb *, struct sctp_nets *, uint32_t); +int + sctp_dynamic_set_primary(struct sockaddr *sa, uint32_t vrf_id); + uint32_t sctp_calculate_sum(struct mbuf *, int32_t *, uint32_t); void @@ -96,6 +106,8 @@ sctp_append_to_readq(struct sctp_inpcb *inp, struct sockbuf *sb); +void sctp_iterator_worker(void); + int find_next_best_mtu(int); void @@ -288,7 +300,7 @@ void sctp_log_block(uint8_t, struct socket *, struct sctp_association *, int); void sctp_log_rwnd(uint8_t, uint32_t, uint32_t, uint32_t); void sctp_log_mbcnt(uint8_t, uint32_t, uint32_t, uint32_t, uint32_t); void sctp_log_rwnd_set(uint8_t, uint32_t, uint32_t, uint32_t, uint32_t); -int sctp_fill_stat_log(struct mbuf *); +int sctp_fill_stat_log(void *, size_t *); void sctp_log_fr(uint32_t, uint32_t, uint32_t, int); void sctp_log_sack(uint32_t, uint32_t, uint32_t, uint16_t, uint16_t, int); void sctp_log_map(uint32_t, uint32_t, uint32_t, int); diff --git a/sys/netinet6/sctp6_usrreq.c b/sys/netinet6/sctp6_usrreq.c index f08bf2e..cb1c006 100644 --- a/sys/netinet6/sctp6_usrreq.c +++ b/sys/netinet6/sctp6_usrreq.c @@ -36,6 +36,7 @@ __FBSDID("$FreeBSD$"); #include <netinet/sctp_pcb.h> #include <netinet/sctp_header.h> #include <netinet/sctp_var.h> +#include <netinet/sctp_sysctl.h> #include <netinet/sctp_output.h> #include <netinet/sctp_input.h> #include <netinet/sctp_bsd_addr.h> @@ -50,17 +51,10 @@ __FBSDID("$FreeBSD$"); -#ifdef SCTP_DEBUG -extern uint32_t sctp_debug_on; - -#endif /* SCTP_DEBUG */ - extern struct protosw inetsw[]; -extern int sctp_no_csum_on_loopback; - int sctp6_input(mp, offp, proto) struct mbuf **mp; @@ -339,8 +333,11 @@ sctp6_ctlinput(cmd, pktdst, d) { struct sctphdr sh; struct ip6ctlparam *ip6cp = NULL; + uint32_t vrf_id; int cm; + vrf_id = SCTP_DEFAULT_VRFID; + if (pktdst->sa_family != AF_INET6 || pktdst->sa_len != sizeof(struct sockaddr_in6)) return; @@ -386,7 +383,7 @@ sctp6_ctlinput(cmd, pktdst, d) final.sin6_port = sh.dest_port; stcb = sctp_findassociation_addr_sa((struct sockaddr *)ip6cp->ip6c_src, (struct sockaddr *)&final, - &inp, &net, 1); + &inp, &net, 1, vrf_id); /* inp's ref-count increased && stcb locked */ if (stcb != NULL && inp && (inp->sctp_socket != NULL)) { if (cmd == PRC_MSGSIZE) { @@ -437,6 +434,9 @@ sctp6_getcred(SYSCTL_HANDLER_ARGS) struct sctp_nets *net; struct sctp_tcb *stcb; int error; + uint32_t vrf_id; + + vrf_id = SCTP_DEFAULT_VRFID; /* * XXXRW: Other instances of getcred use SUSER_ALLOWJAIL, as socket @@ -458,7 +458,7 @@ sctp6_getcred(SYSCTL_HANDLER_ARGS) stcb = sctp_findassociation_addr_sa(sin6tosa(&addrs[0]), sin6tosa(&addrs[1]), - &inp, &net, 1); + &inp, &net, 1, vrf_id); if (stcb == NULL || inp == NULL || inp->sctp_socket == NULL) { if ((inp != NULL) && (stcb == NULL)) { /* reduce ref-count */ @@ -703,6 +703,7 @@ sctp_must_try_again: } +/* This could be made common with sctp_detach() since they are identical */ static int sctp6_disconnect(struct socket *so) @@ -942,6 +943,7 @@ connected_type: static int sctp6_connect(struct socket *so, struct sockaddr *addr, struct thread *p) { + uint32_t vrf_id; int error = 0; struct sctp_inpcb *inp; struct in6pcb *inp6; @@ -959,6 +961,7 @@ sctp6_connect(struct socket *so, struct sockaddr *addr, struct thread *p) return (ECONNRESET); /* I made the same as TCP since we are * not setup? */ } + vrf_id = SCTP_DEFAULT_VRFID; SCTP_ASOC_CREATE_LOCK(inp); SCTP_INP_RLOCK(inp); if ((inp->sctp_flags & SCTP_PCB_FLAGS_UNBOUND) == @@ -1039,7 +1042,7 @@ sctp6_connect(struct socket *so, struct sockaddr *addr, struct thread *p) return (EALREADY); } /* We are GOOD to go */ - stcb = sctp_aloc_assoc(inp, addr, 1, &error, 0); + stcb = sctp_aloc_assoc(inp, addr, 1, &error, 0, vrf_id); SCTP_ASOC_CREATE_UNLOCK(inp); if (stcb == NULL) { /* Gak! no memory */ @@ -1065,12 +1068,12 @@ static int sctp6_getaddr(struct socket *so, struct sockaddr **addr) { struct sockaddr_in6 *sin6; - struct sctp_inpcb *inp; + uint32_t vrf_id; + struct sctp_ifa *sctp_ifa; int error; - /* * Do the malloc first in case it blocks. */ @@ -1114,9 +1117,12 @@ sctp6_getaddr(struct socket *so, struct sockaddr **addr) /* punt */ goto notConn6; } - sin6->sin6_addr = sctp_ipv6_source_address_selection( - inp, stcb, (struct route *)&net->ro, net, 0); + vrf_id = SCTP_DEFAULT_VRFID; + sctp_ifa = sctp_source_address_selection(inp, stcb, (struct route *)&net->ro, net, 0, vrf_id); + if (sctp_ifa) { + sin6->sin6_addr = sctp_ifa->address.sin6.sin6_addr; + } } else { /* For the bound all case you get back 0 */ notConn6: @@ -1128,10 +1134,10 @@ sctp6_getaddr(struct socket *so, struct sockaddr **addr) int fnd = 0; LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) { - if (laddr->ifa->ifa_addr->sa_family == AF_INET6) { + if (laddr->ifa->address.sa.sa_family == AF_INET6) { struct sockaddr_in6 *sin_a; - sin_a = (struct sockaddr_in6 *)laddr->ifa->ifa_addr; + sin_a = (struct sockaddr_in6 *)&laddr->ifa->address.sin6; sin6->sin6_addr = sin_a->sin6_addr; fnd = 1; break; @@ -1157,7 +1163,6 @@ static int sctp6_peeraddr(struct socket *so, struct sockaddr **addr) { struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)*addr; - int fnd; struct sockaddr_in6 *sin_a6; struct sctp_inpcb *inp; @@ -1166,7 +1171,6 @@ sctp6_peeraddr(struct socket *so, struct sockaddr **addr) int error; - /* * Do the malloc first in case it blocks. */ @@ -1220,7 +1224,6 @@ static int sctp6_in6getaddr(struct socket *so, struct sockaddr **nam) { struct sockaddr *addr; - struct in6pcb *inp6 = sotoin6pcb(so); int error; @@ -1252,7 +1255,6 @@ static int sctp6_getpeeraddr(struct socket *so, struct sockaddr **nam) { struct sockaddr *addr = *nam; - struct in6pcb *inp6 = sotoin6pcb(so); int error; |