From 03fdc2934eb61c44c049a02b02aa974cfdd8a0eb Mon Sep 17 00:00:00 2001 From: glebius Date: Fri, 21 Mar 2014 15:15:30 +0000 Subject: Merge r262763, r262767, r262771, r262806 from head: - Remove rt_metrics_lite and simply put its members into rtentry. - Use counter(9) for rt_pksent (former rt_rmx.rmx_pksent). This removes another cache trashing ++ from packet forwarding path. - Create zini/fini methods for the rtentry UMA zone. Via initialize mutex and counter in them. - Fix reporting of rmx_pksent to routing socket. - Fix netstat(1) to report "Use" both in kvm(3) and sysctl(3) mode. --- sys/net/if_disc.c | 2 +- sys/net/if_faith.c | 2 +- sys/net/if_loop.c | 2 +- sys/net/if_stf.c | 2 +- sys/net/radix_mpath.c | 6 +++--- sys/net/route.c | 56 +++++++++++++++++++++++++++++++++++++++++-------- sys/net/route.h | 39 ++++++++++++++++------------------ sys/net/rtsock.c | 58 ++++++++++++++++++++------------------------------- 8 files changed, 95 insertions(+), 72 deletions(-) (limited to 'sys/net') diff --git a/sys/net/if_disc.c b/sys/net/if_disc.c index 16a1e7b..d072e4d 100644 --- a/sys/net/if_disc.c +++ b/sys/net/if_disc.c @@ -185,7 +185,7 @@ static void discrtrequest(int cmd, struct rtentry *rt, struct rt_addrinfo *info) { RT_LOCK_ASSERT(rt); - rt->rt_rmx.rmx_mtu = DSMTU; + rt->rt_mtu = DSMTU; } /* diff --git a/sys/net/if_faith.c b/sys/net/if_faith.c index 37a53bb..fb98f41 100644 --- a/sys/net/if_faith.c +++ b/sys/net/if_faith.c @@ -245,7 +245,7 @@ faithrtrequest(cmd, rt, info) struct rt_addrinfo *info; { RT_LOCK_ASSERT(rt); - rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu; + rt->rt_mtu = rt->rt_ifp->if_mtu; } /* diff --git a/sys/net/if_loop.c b/sys/net/if_loop.c index 499cd2c..e17fa11 100644 --- a/sys/net/if_loop.c +++ b/sys/net/if_loop.c @@ -394,7 +394,7 @@ lortrequest(int cmd, struct rtentry *rt, struct rt_addrinfo *info) { RT_LOCK_ASSERT(rt); - rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu; + rt->rt_mtu = rt->rt_ifp->if_mtu; } /* diff --git a/sys/net/if_stf.c b/sys/net/if_stf.c index 2c34739..20251dc 100644 --- a/sys/net/if_stf.c +++ b/sys/net/if_stf.c @@ -784,7 +784,7 @@ stf_rtrequest(cmd, rt, info) struct rt_addrinfo *info; { RT_LOCK_ASSERT(rt); - rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu; + rt->rt_mtu = rt->rt_ifp->if_mtu; } static int diff --git a/sys/net/radix_mpath.c b/sys/net/radix_mpath.c index ee7826f..c87f13c 100644 --- a/sys/net/radix_mpath.c +++ b/sys/net/radix_mpath.c @@ -85,7 +85,7 @@ rn_mpath_count(struct radix_node *rn) while (rn != NULL) { rt = (struct rtentry *)rn; - i += rt->rt_rmx.rmx_weight; + i += rt->rt_weight; rn = rn_mpath_next(rn); } return (i); @@ -288,8 +288,8 @@ rtalloc_mpath_fib(struct route *ro, uint32_t hash, u_int fibnum) hash += hashjitter; hash %= n; for (weight = abs((int32_t)hash), rt = ro->ro_rt; - weight >= rt->rt_rmx.rmx_weight && rn; - weight -= rt->rt_rmx.rmx_weight) { + weight >= rt->rt_weight && rn; + weight -= rt->rt_weight) { /* stay within the multipath routes */ if (rn->rn_dupedkey && rn->rn_mask != rn->rn_dupedkey->rn_mask) diff --git a/sys/net/route.c b/sys/net/route.c index bb99496..e546e20 100644 --- a/sys/net/route.c +++ b/sys/net/route.c @@ -200,6 +200,48 @@ route_init(void) } SYSINIT(route_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, route_init, 0); +static int +rtentry_zinit(void *mem, int size, int how) +{ + struct rtentry *rt = mem; + + rt->rt_pksent = counter_u64_alloc(how); + if (rt->rt_pksent == NULL) + return (ENOMEM); + + RT_LOCK_INIT(rt); + + return (0); +} + +static void +rtentry_zfini(void *mem, int size) +{ + struct rtentry *rt = mem; + + RT_LOCK_DESTROY(rt); + counter_u64_free(rt->rt_pksent); +} + +static int +rtentry_ctor(void *mem, int size, void *arg, int how) +{ + struct rtentry *rt = mem; + + bzero(rt, offsetof(struct rtentry, rt_endzero)); + counter_u64_zero(rt->rt_pksent); + + return (0); +} + +static void +rtentry_dtor(void *mem, int size, void *arg) +{ + struct rtentry *rt = mem; + + RT_UNLOCK_COND(rt); +} + static void vnet_route_init(const void *unused __unused) { @@ -211,8 +253,9 @@ vnet_route_init(const void *unused __unused) V_rt_tables = malloc(rt_numfibs * (AF_MAX+1) * sizeof(struct radix_node_head *), M_RTABLE, M_WAITOK|M_ZERO); - V_rtzone = uma_zcreate("rtentry", sizeof(struct rtentry), NULL, NULL, - NULL, NULL, UMA_ALIGN_PTR, 0); + V_rtzone = uma_zcreate("rtentry", sizeof(struct rtentry), + rtentry_ctor, rtentry_dtor, + rtentry_zinit, rtentry_zfini, UMA_ALIGN_PTR, 0); for (dom = domains; dom; dom = dom->dom_next) { if (dom->dom_rtattach == NULL) continue; @@ -489,7 +532,6 @@ rtfree(struct rtentry *rt) /* * and the rtentry itself of course */ - RT_LOCK_DESTROY(rt); uma_zfree(V_rtzone, rt); return; } @@ -1173,12 +1215,11 @@ rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt, } else ifa_ref(info->rti_ifa); ifa = info->rti_ifa; - rt = uma_zalloc(V_rtzone, M_NOWAIT | M_ZERO); + rt = uma_zalloc(V_rtzone, M_NOWAIT); if (rt == NULL) { ifa_free(ifa); senderr(ENOBUFS); } - RT_LOCK_INIT(rt); rt->rt_flags = RTF_UP | flags; rt->rt_fibnum = fibnum; /* @@ -1186,7 +1227,6 @@ rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt, */ RT_LOCK(rt); if ((error = rt_setgate(rt, dst, gateway)) != 0) { - RT_LOCK_DESTROY(rt); ifa_free(ifa); uma_zfree(V_rtzone, rt); senderr(error); @@ -1212,7 +1252,7 @@ rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt, */ rt->rt_ifa = ifa; rt->rt_ifp = ifa->ifa_ifp; - rt->rt_rmx.rmx_weight = 1; + rt->rt_weight = 1; #ifdef RADIX_MPATH /* do not permit exactly the same dst/mask/gw pair */ @@ -1220,7 +1260,6 @@ rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt, rt_mpath_conflict(rnh, rt, netmask)) { ifa_free(rt->rt_ifa); Free(rt_key(rt)); - RT_LOCK_DESTROY(rt); uma_zfree(V_rtzone, rt); senderr(EEXIST); } @@ -1288,7 +1327,6 @@ rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt, if (rn == NULL) { ifa_free(rt->rt_ifa); Free(rt_key(rt)); - RT_LOCK_DESTROY(rt); uma_zfree(V_rtzone, rt); #ifdef FLOWTABLE if (rt0 != NULL) diff --git a/sys/net/route.h b/sys/net/route.h index 02d0ae9..a9fcbd0 100644 --- a/sys/net/route.h +++ b/sys/net/route.h @@ -33,6 +33,8 @@ #ifndef _NET_ROUTE_H_ #define _NET_ROUTE_H_ +#include + /* * Kernel resident routing tables. * @@ -57,17 +59,6 @@ struct route { #define RT_CACHING_CONTEXT 0x1 /* XXX: not used anywhere */ #define RT_NORTREF 0x2 /* doesn't hold reference on ro_rt */ -/* - * These numbers are used by reliable protocols for determining - * retransmission behavior and are included in the routing structure. - */ -struct rt_metrics_lite { - u_long rmx_mtu; /* MTU for this path */ - u_long rmx_expire; /* lifetime for route, e.g. redirect */ - u_long rmx_pksent; /* packets sent using this route */ - u_long rmx_weight; /* absolute weight */ -}; - struct rt_metrics { u_long rmx_locks; /* Kernel must leave these values alone */ u_long rmx_mtu; /* MTU for this path */ @@ -112,6 +103,8 @@ struct mbuf; #include #endif #endif + +#if defined(_KERNEL) || defined(_WANT_RTENTRY) struct rtentry { struct radix_node rt_nodes[2]; /* tree glue, and other values */ /* @@ -122,17 +115,19 @@ struct rtentry { #define rt_key(r) (*((struct sockaddr **)(&(r)->rt_nodes->rn_key))) #define rt_mask(r) (*((struct sockaddr **)(&(r)->rt_nodes->rn_mask))) struct sockaddr *rt_gateway; /* value */ - int rt_flags; /* up/down?, host/net */ - int rt_refcnt; /* # held references */ struct ifnet *rt_ifp; /* the answer: interface to use */ struct ifaddr *rt_ifa; /* the answer: interface address to use */ - struct rt_metrics_lite rt_rmx; /* metrics used by rx'ing protocols */ - u_int rt_fibnum; /* which FIB */ -#ifdef _KERNEL - /* XXX ugly, user apps use this definition but don't have a mtx def */ - struct mtx rt_mtx; /* mutex for routing entry */ -#endif + int rt_flags; /* up/down?, host/net */ + int rt_refcnt; /* # held references */ + u_int rt_fibnum; /* which FIB */ + u_long rt_mtu; /* MTU for this path */ + u_long rt_weight; /* absolute weight */ + u_long rt_expire; /* lifetime for route, e.g. redirect */ +#define rt_endzero rt_pksent + counter_u64_t rt_pksent; /* packets sent using this route */ + struct mtx rt_mtx; /* mutex for routing entry */ }; +#endif /* _KERNEL || _WANT_RTENTRY */ /* * Following structure necessary for 4.3 compatibility; @@ -148,8 +143,6 @@ struct ortentry { struct ifnet *rt_ifp; /* the answer: interface to use */ }; -#define rt_use rt_rmx.rmx_pksent - #define RTF_UP 0x1 /* route usable */ #define RTF_GATEWAY 0x2 /* destination is a gateway */ #define RTF_HOST 0x4 /* host entry (net otherwise) */ @@ -314,6 +307,10 @@ struct rt_addrinfo { #define RT_UNLOCK(_rt) mtx_unlock(&(_rt)->rt_mtx) #define RT_LOCK_DESTROY(_rt) mtx_destroy(&(_rt)->rt_mtx) #define RT_LOCK_ASSERT(_rt) mtx_assert(&(_rt)->rt_mtx, MA_OWNED) +#define RT_UNLOCK_COND(_rt) do { \ + if (mtx_owned(&(_rt)->rt_mtx)) \ + mtx_unlock(&(_rt)->rt_mtx); \ +} while (0) #define RT_ADDREF(_rt) do { \ RT_LOCK_ASSERT(_rt); \ diff --git a/sys/net/rtsock.c b/sys/net/rtsock.c index 0b5d1c5..b65b5eb 100644 --- a/sys/net/rtsock.c +++ b/sys/net/rtsock.c @@ -194,10 +194,8 @@ static int sysctl_dumpentry(struct radix_node *rn, void *vw); static int sysctl_iflist(int af, struct walkarg *w); static int sysctl_ifmalist(int af, struct walkarg *w); static int route_output(struct mbuf *m, struct socket *so); -static void rt_setmetrics(u_long which, const struct rt_metrics *in, - struct rt_metrics_lite *out); -static void rt_getmetrics(const struct rt_metrics_lite *in, - struct rt_metrics *out); +static void rt_setmetrics(const struct rt_msghdr *rtm, struct rtentry *rt); +static void rt_getmetrics(const struct rtentry *rt, struct rt_metrics *out); static void rt_dispatch(struct mbuf *, sa_family_t); static struct netisr_handler rtsock_nh = { @@ -685,8 +683,7 @@ route_output(struct mbuf *m, struct socket *so) rti_need_deembed = (V_deembed_scopeid) ? 1 : 0; #endif RT_LOCK(saved_nrt); - rt_setmetrics(rtm->rtm_inits, - &rtm->rtm_rmx, &saved_nrt->rt_rmx); + rt_setmetrics(rtm, saved_nrt); rtm->rtm_index = saved_nrt->rt_ifp->if_index; RT_REMREF(saved_nrt); RT_UNLOCK(saved_nrt); @@ -860,7 +857,7 @@ route_output(struct mbuf *m, struct socket *so) (rt->rt_flags & ~RTF_GWFLAG_COMPAT); else rtm->rtm_flags = rt->rt_flags; - rt_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx); + rt_getmetrics(rt, &rtm->rtm_rmx); rtm->rtm_addrs = info.rti_addrs; break; @@ -923,8 +920,7 @@ route_output(struct mbuf *m, struct socket *so) /* Allow some flags to be toggled on change. */ rt->rt_flags = (rt->rt_flags & ~RTF_FMASK) | (rtm->rtm_flags & RTF_FMASK); - rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx, - &rt->rt_rmx); + rt_setmetrics(rtm, rt); rtm->rtm_index = rt->rt_ifp->if_index; if (rt->rt_ifa && rt->rt_ifa->ifa_rtrequest) rt->rt_ifa->ifa_rtrequest(RTM_ADD, rt, &info); @@ -1012,34 +1008,30 @@ flush: } static void -rt_setmetrics(u_long which, const struct rt_metrics *in, - struct rt_metrics_lite *out) +rt_setmetrics(const struct rt_msghdr *rtm, struct rtentry *rt) { -#define metric(f, e) if (which & (f)) out->e = in->e; - /* - * Only these are stored in the routing entry since introduction - * of tcp hostcache. The rest is ignored. - */ - metric(RTV_MTU, rmx_mtu); - metric(RTV_WEIGHT, rmx_weight); - /* Userland -> kernel timebase conversion. */ - if (which & RTV_EXPIRE) - out->rmx_expire = in->rmx_expire ? - in->rmx_expire - time_second + time_uptime : 0; -#undef metric + + if (rtm->rtm_inits & RTV_MTU) + rt->rt_mtu = rtm->rtm_rmx.rmx_mtu; + if (rtm->rtm_inits & RTV_WEIGHT) + rt->rt_weight = rtm->rtm_rmx.rmx_weight; + /* Kernel -> userland timebase conversion. */ + if (rtm->rtm_inits & RTV_EXPIRE) + rt->rt_expire = rtm->rtm_rmx.rmx_expire ? + rtm->rtm_rmx.rmx_expire - time_second + time_uptime : 0; } static void -rt_getmetrics(const struct rt_metrics_lite *in, struct rt_metrics *out) +rt_getmetrics(const struct rtentry *rt, struct rt_metrics *out) { -#define metric(e) out->e = in->e; + bzero(out, sizeof(*out)); - metric(rmx_mtu); - metric(rmx_weight); + out->rmx_mtu = rt->rt_mtu; + out->rmx_weight = rt->rt_weight; + out->rmx_pksent = counter_u64_fetch(rt->rt_pksent); /* Kernel -> userland timebase conversion. */ - out->rmx_expire = in->rmx_expire ? - in->rmx_expire - time_uptime + time_second : 0; -#undef metric + out->rmx_expire = rt->rt_expire ? + rt->rt_expire - time_uptime + time_second : 0; } /* @@ -1603,11 +1595,7 @@ sysctl_dumpentry(struct radix_node *rn, void *vw) (rt->rt_flags & ~RTF_GWFLAG_COMPAT); else rtm->rtm_flags = rt->rt_flags; - /* - * let's be honest about this being a retarded hack - */ - rtm->rtm_fmask = rt->rt_rmx.rmx_pksent; - rt_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx); + rt_getmetrics(rt, &rtm->rtm_rmx); rtm->rtm_index = rt->rt_ifp->if_index; rtm->rtm_errno = rtm->rtm_pid = rtm->rtm_seq = 0; rtm->rtm_addrs = info.rti_addrs; -- cgit v1.1