diff options
author | delphij <delphij@FreeBSD.org> | 2015-07-15 19:21:26 +0000 |
---|---|---|
committer | delphij <delphij@FreeBSD.org> | 2015-07-15 19:21:26 +0000 |
commit | 2a25cee78ab1d37e7d2bc40ae675646974d99f56 (patch) | |
tree | b0302ac4be59e104f4e1e54014561a1389397192 /contrib/ntp/ntpd/ntp_monitor.c | |
parent | a0741a75537b2e0514472ac3b28afc55a7846c30 (diff) | |
download | FreeBSD-src-2a25cee78ab1d37e7d2bc40ae675646974d99f56.zip FreeBSD-src-2a25cee78ab1d37e7d2bc40ae675646974d99f56.tar.gz |
MFC r280849,280915-280916,281015-281016,282097,282408,282415,283542,
284864,285169-285170,285435:
ntp 4.2.8p3.
Relnotes: yes
Approved by: re (?)
Diffstat (limited to 'contrib/ntp/ntpd/ntp_monitor.c')
-rw-r--r-- | contrib/ntp/ntpd/ntp_monitor.c | 572 |
1 files changed, 355 insertions, 217 deletions
diff --git a/contrib/ntp/ntpd/ntp_monitor.c b/contrib/ntp/ntpd/ntp_monitor.c index 753fa76..02fd757 100644 --- a/contrib/ntp/ntpd/ntp_monitor.c +++ b/contrib/ntp/ntpd/ntp_monitor.c @@ -8,6 +8,7 @@ #include "ntpd.h" #include "ntp_io.h" #include "ntp_if.h" +#include "ntp_lists.h" #include "ntp_stdlib.h" #include <ntp_random.h> @@ -18,17 +19,14 @@ #endif /* - * I'm still not sure I like what I've done here. It certainly consumes - * memory like it is going out of style, and also may not be as low - * overhead as I'd imagined. + * Record statistics based on source address, mode and version. The + * receive procedure calls us with the incoming rbufp before it does + * anything else. While at it, implement rate controls for inbound + * traffic. * - * Anyway, we record statistics based on source address, mode and - * version (for now, anyway. Check the code). The receive procedure - * calls us with the incoming rbufp before it does anything else. - * - * Each entry is doubly linked into two lists, a hash table and a - * most-recently-used list. When a packet arrives it is looked up in - * the hash table. If found, the statistics are updated and the entry + * Each entry is doubly linked into two lists, a hash table and a most- + * recently-used (MRU) list. When a packet arrives it is looked up in + * the hash table. If found, the statistics are updated and the entry * relinked at the head of the MRU list. If not found, a new entry is * allocated, initialized and linked into both the hash table and at the * head of the MRU list. @@ -39,55 +37,70 @@ * tail for the MRU list, unlinking from the hash table, and * reinitializing. * - * trimmed back memory consumption ... jdg 8/94 - */ -/* - * Limits on the number of structures allocated. This limit is picked - * with the illicit knowlege that we can only return somewhat less - * than 8K bytes in a mode 7 response packet, and that each structure - * will require about 20 bytes of space in the response. - * - * ... I don't believe the above is true anymore ... jdg + * INC_MONLIST is the default allocation granularity in entries. + * INIT_MONLIST is the default initial allocation in entries. */ -#ifndef MAXMONMEM -#define MAXMONMEM 600 /* we allocate up to 600 structures */ +#ifdef MONMEMINC /* old name */ +# define INC_MONLIST MONMEMINC +#elif !defined(INC_MONLIST) +# define INC_MONLIST (4 * 1024 / sizeof(mon_entry)) #endif -#ifndef MONMEMINC -#define MONMEMINC 40 /* allocate them 40 at a time */ +#ifndef INIT_MONLIST +# define INIT_MONLIST (4 * 1024 / sizeof(mon_entry)) +#endif +#ifndef MRU_MAXDEPTH_DEF +# define MRU_MAXDEPTH_DEF (1024 * 1024 / sizeof(mon_entry)) #endif /* * Hashing stuff */ -#define MON_HASH_SIZE 128 -#define MON_HASH_MASK (MON_HASH_SIZE-1) -#define MON_HASH(addr) sock_hash(addr) +u_char mon_hash_bits; /* - * Pointers to the hash table, the MRU list and the count table. Memory - * for the hash and count tables is only allocated if monitoring is - * turned on. + * Pointers to the hash table and the MRU list. Memory for the hash + * table is allocated only if monitoring is enabled. */ -static struct mon_data *mon_hash[MON_HASH_SIZE]; /* list ptrs */ -struct mon_data mon_mru_list; +mon_entry ** mon_hash; /* MRU hash table */ +mon_entry mon_mru_list; /* mru listhead */ /* - * List of free structures structures, and counters of free and total - * structures. The free structures are linked with the hash_next field. + * List of free structures structures, and counters of in-use and total + * structures. The free structures are linked with the hash_next field. */ -static struct mon_data *mon_free; /* free list or null if none */ -static int mon_total_mem; /* total structures allocated */ -static int mon_mem_increments; /* times called malloc() */ +static mon_entry *mon_free; /* free list or null if none */ + u_int mru_alloc; /* mru list + free list count */ + u_int mru_entries; /* mru list count */ + u_int mru_peakentries; /* highest mru_entries seen */ + u_int mru_initalloc = INIT_MONLIST;/* entries to preallocate */ + u_int mru_incalloc = INC_MONLIST;/* allocation batch factor */ +static u_int mon_mem_increments; /* times called malloc() */ + +/* + * Parameters of the RES_LIMITED restriction option. We define headway + * as the idle time between packets. A packet is discarded if the + * headway is less than the minimum, as well as if the average headway + * is less than eight times the increment. + */ +int ntp_minpkt = NTP_MINPKT; /* minimum (log 2 s) */ +u_char ntp_minpoll = NTP_MINPOLL; /* increment (log 2 s) */ /* * Initialization state. We may be monitoring, we may not. If * we aren't, we may not even have allocated any memory yet. */ -int mon_enabled; /* enable switch */ -u_long mon_age = 3000; /* preemption limit */ -static int mon_have_memory; -static void mon_getmoremem P((void)); -static void remove_from_hash P((struct mon_data *)); + u_int mon_enabled; /* enable switch */ + u_int mru_mindepth = 600; /* preempt above this */ + int mru_maxage = 64; /* for entries older than */ + u_int mru_maxdepth = /* MRU count hard limit */ + MRU_MAXDEPTH_DEF; + int mon_age = 3000; /* preemption limit */ + +static void mon_getmoremem(void); +static void remove_from_hash(mon_entry *); +static inline void mon_free_entry(mon_entry *); +static inline void mon_reclaim_entry(mon_entry *); + /* * init_mon - initialize monitoring global data @@ -97,16 +110,86 @@ init_mon(void) { /* * Don't do much of anything here. We don't allocate memory - * until someone explicitly starts us. + * until mon_start(). */ mon_enabled = MON_OFF; - mon_have_memory = 0; + INIT_DLIST(mon_mru_list, mru); +} + + +/* + * remove_from_hash - removes an entry from the address hash table and + * decrements mru_entries. + */ +static void +remove_from_hash( + mon_entry *mon + ) +{ + u_int hash; + mon_entry *punlinked; + + mru_entries--; + hash = MON_HASH(&mon->rmtadr); + UNLINK_SLIST(punlinked, mon_hash[hash], mon, hash_next, + mon_entry); + NTP_ENSURE(punlinked == mon); +} - mon_total_mem = 0; - mon_mem_increments = 0; - mon_free = NULL; - memset(&mon_hash[0], 0, sizeof mon_hash); - memset(&mon_mru_list, 0, sizeof mon_mru_list); + +static inline void +mon_free_entry( + mon_entry *m + ) +{ + ZERO(*m); + LINK_SLIST(mon_free, m, hash_next); +} + + +/* + * mon_reclaim_entry - Remove an entry from the MRU list and from the + * hash array, then zero-initialize it. Indirectly + * decrements mru_entries. + + * The entry is prepared to be reused. Before return, in + * remove_from_hash(), mru_entries is decremented. It is the caller's + * responsibility to increment it again. + */ +static inline void +mon_reclaim_entry( + mon_entry *m + ) +{ + DEBUG_INSIST(NULL != m); + + UNLINK_DLIST(m, mru); + remove_from_hash(m); + ZERO(*m); +} + + +/* + * mon_getmoremem - get more memory and put it on the free list + */ +static void +mon_getmoremem(void) +{ + mon_entry *chunk; + u_int entries; + + entries = (0 == mon_mem_increments) + ? mru_initalloc + : mru_incalloc; + + if (entries) { + chunk = eallocarray(entries, sizeof(*chunk)); + mru_alloc += entries; + for (chunk += entries; entries; entries--) + mon_free_entry(--chunk); + + mon_mem_increments++; + } } @@ -118,24 +201,31 @@ mon_start( int mode ) { + size_t octets; + u_int min_hash_slots; - if (mon_enabled != MON_OFF) { + if (MON_OFF == mode) /* MON_OFF is 0 */ + return; + if (mon_enabled) { mon_enabled |= mode; return; } - if (mode == MON_OFF) - return; - - if (!mon_have_memory) { - mon_total_mem = 0; - mon_mem_increments = 0; - mon_free = NULL; + if (0 == mon_mem_increments) mon_getmoremem(); - mon_have_memory = 1; - } + /* + * Select the MRU hash table size to limit the average count + * per bucket at capacity (mru_maxdepth) to 8, if possible + * given our hash is limited to 16 bits. + */ + min_hash_slots = (mru_maxdepth / 8) + 1; + mon_hash_bits = 0; + while (min_hash_slots >>= 1) + mon_hash_bits++; + mon_hash_bits = max(4, mon_hash_bits); + mon_hash_bits = min(16, mon_hash_bits); + octets = sizeof(*mon_hash) * MON_HASH_SIZE; + mon_hash = erealloc_zero(mon_hash, octets, 0); - mon_mru_list.mru_next = &mon_mru_list; - mon_mru_list.mru_prev = &mon_mru_list; mon_enabled = mode; } @@ -148,211 +238,259 @@ mon_stop( int mode ) { - register struct mon_data *md, *md_next; - register int i; + mon_entry *mon; - if (mon_enabled == MON_OFF) - return; + if (MON_OFF == mon_enabled) + return; if ((mon_enabled & mode) == 0 || mode == MON_OFF) - return; + return; mon_enabled &= ~mode; if (mon_enabled != MON_OFF) - return; + return; /* - * Put everything back on the free list + * Move everything on the MRU list to the free list quickly, + * without bothering to remove each from either the MRU list or + * the hash table. */ - for (i = 0; i < MON_HASH_SIZE; i++) { - md = mon_hash[i]; /* get next list */ - mon_hash[i] = NULL; /* zero the list head */ - while (md != NULL) { - md_next = md->hash_next; - md->hash_next = mon_free; - mon_free = md; - md = md_next; - } - } - - mon_mru_list.mru_next = &mon_mru_list; - mon_mru_list.mru_prev = &mon_mru_list; + ITER_DLIST_BEGIN(mon_mru_list, mon, mru, mon_entry) + mon_free_entry(mon); + ITER_DLIST_END() + + /* empty the MRU list and hash table. */ + mru_entries = 0; + INIT_DLIST(mon_mru_list, mru); + zero_mem(mon_hash, sizeof(*mon_hash) * MON_HASH_SIZE); } + +/* + * mon_clearinterface -- remove mru entries referring to a local address + * which is going away. + */ void -ntp_monclearinterface(struct interface *interface) +mon_clearinterface( + endpt *lcladr + ) { - struct mon_data *md; - - for (md = mon_mru_list.mru_next; md != &mon_mru_list; - md = md->mru_next) { - if (md->interface == interface) - { - /* dequeue from mru list and put to free list */ - md->mru_prev->mru_next = md->mru_next; - md->mru_next->mru_prev = md->mru_prev; - remove_from_hash(md); - md->hash_next = mon_free; - mon_free = md; - } - } + mon_entry *mon; + + /* iterate mon over mon_mru_list */ + ITER_DLIST_BEGIN(mon_mru_list, mon, mru, mon_entry) + if (mon->lcladr == lcladr) { + /* remove from mru list */ + UNLINK_DLIST(mon, mru); + /* remove from hash list, adjust mru_entries */ + remove_from_hash(mon); + /* put on free list */ + mon_free_entry(mon); + } + ITER_DLIST_END() } + /* * ntp_monitor - record stats about this packet * - * Returns 1 if the packet is at the head of the list, 0 otherwise. + * Returns supplied restriction flags, with RES_LIMITED and RES_KOD + * cleared unless the packet should not be responded to normally + * (RES_LIMITED) and possibly should trigger a KoD response (RES_KOD). + * The returned flags are saved in the MRU entry, so that it reflects + * whether the last packet from that source triggered rate limiting, + * and if so, possible KoD response. This implies you can not tell + * whether a given address is eligible for rate limiting/KoD from the + * monlist restrict bits, only whether or not the last packet triggered + * such responses. ntpdc -c reslist lets you see whether RES_LIMITED + * or RES_KOD is lit for a particular address before ntp_monitor()'s + * typical dousing. */ -int +u_short ntp_monitor( - struct recvbuf *rbufp + struct recvbuf *rbufp, + u_short flags ) { - register struct pkt *pkt; - register struct mon_data *md; - struct sockaddr_storage addr; - register int hash; - register int mode; + l_fp interval_fp; + struct pkt * pkt; + mon_entry * mon; + mon_entry * oldest; + int oldest_age; + u_int hash; + u_short restrict_mask; + u_char mode; + u_char version; + int interval; + int head; /* headway increment */ + int leak; /* new headway */ + int limit; /* average threshold */ if (mon_enabled == MON_OFF) - return 0; + return ~(RES_LIMITED | RES_KOD) & flags; pkt = &rbufp->recv_pkt; - memset(&addr, 0, sizeof(addr)); - memcpy(&addr, &(rbufp->recv_srcadr), sizeof(addr)); - hash = MON_HASH(&addr); + hash = MON_HASH(&rbufp->recv_srcadr); mode = PKT_MODE(pkt->li_vn_mode); - md = mon_hash[hash]; - while (md != NULL) { + version = PKT_VERSION(pkt->li_vn_mode); + mon = mon_hash[hash]; + + /* + * We keep track of all traffic for a given IP in one entry, + * otherwise cron'ed ntpdate or similar evades RES_LIMITED. + */ + + for (; mon != NULL; mon = mon->hash_next) + if (SOCK_EQ(&mon->rmtadr, &rbufp->recv_srcadr)) + break; + + if (mon != NULL) { + interval_fp = rbufp->recv_time; + L_SUB(&interval_fp, &mon->last); + /* add one-half second to round up */ + L_ADDUF(&interval_fp, 0x80000000); + interval = interval_fp.l_i; + mon->last = rbufp->recv_time; + NSRCPORT(&mon->rmtadr) = NSRCPORT(&rbufp->recv_srcadr); + mon->count++; + restrict_mask = flags; + mon->vn_mode = VN_MODE(version, mode); + + /* Shuffle to the head of the MRU list. */ + UNLINK_DLIST(mon, mru); + LINK_DLIST(mon_mru_list, mon, mru); /* - * Match address only to conserve MRU size. + * At this point the most recent arrival is first in the + * MRU list. Decrease the counter by the headway, but + * not less than zero. */ - if (SOCKCMP(&md->rmtadr, &addr)) { - md->drop_count = current_time - md->lasttime; - md->lasttime = current_time; - md->count++; - md->rmtport = NSRCPORT(&rbufp->recv_srcadr); - md->mode = (u_char) mode; - md->version = PKT_VERSION(pkt->li_vn_mode); - - /* - * Shuffle to the head of the MRU list. - */ - md->mru_next->mru_prev = md->mru_prev; - md->mru_prev->mru_next = md->mru_next; - md->mru_next = mon_mru_list.mru_next; - md->mru_prev = &mon_mru_list; - mon_mru_list.mru_next->mru_prev = md; - mon_mru_list.mru_next = md; - return 1; - } - md = md->hash_next; + mon->leak -= interval; + mon->leak = max(0, mon->leak); + head = 1 << ntp_minpoll; + leak = mon->leak + head; + limit = NTP_SHIFT * head; + + DPRINTF(2, ("MRU: interval %d headway %d limit %d\n", + interval, leak, limit)); + + /* + * If the minimum and average thresholds are not + * exceeded, douse the RES_LIMITED and RES_KOD bits and + * increase the counter by the headway increment. Note + * that we give a 1-s grace for the minimum threshold + * and a 2-s grace for the headway increment. If one or + * both thresholds are exceeded and the old counter is + * less than the average threshold, set the counter to + * the average threshold plus the increment and leave + * the RES_LIMITED and RES_KOD bits lit. Otherwise, + * leave the counter alone and douse the RES_KOD bit. + * This rate-limits the KoDs to no less than the average + * headway. + */ + if (interval + 1 >= ntp_minpkt && leak < limit) { + mon->leak = leak - 2; + restrict_mask &= ~(RES_LIMITED | RES_KOD); + } else if (mon->leak < limit) + mon->leak = limit + head; + else + restrict_mask &= ~RES_KOD; + + mon->flags = restrict_mask; + + return mon->flags; } /* * If we got here, this is the first we've heard of this * guy. Get him some memory, either from the free list * or from the tail of the MRU list. + * + * The following ntp.conf "mru" knobs come into play determining + * the depth (or count) of the MRU list: + * - mru_mindepth ("mru mindepth") is a floor beneath which + * entries are kept without regard to their age. The + * default is 600 which matches the longtime implementation + * limit on the total number of entries. + * - mru_maxage ("mru maxage") is a ceiling on the age in + * seconds of entries. Entries older than this are + * reclaimed once mon_mindepth is exceeded. 64s default. + * Note that entries older than this can easily survive + * as they are reclaimed only as needed. + * - mru_maxdepth ("mru maxdepth") is a hard limit on the + * number of entries. + * - "mru maxmem" sets mru_maxdepth to the number of entries + * which fit in the given number of kilobytes. The default is + * 1024, or 1 megabyte. + * - mru_initalloc ("mru initalloc" sets the count of the + * initial allocation of MRU entries. + * - "mru initmem" sets mru_initalloc in units of kilobytes. + * The default is 4. + * - mru_incalloc ("mru incalloc" sets the number of entries to + * allocate on-demand each time the free list is empty. + * - "mru incmem" sets mru_incalloc in units of kilobytes. + * The default is 4. + * Whichever of "mru maxmem" or "mru maxdepth" occurs last in + * ntp.conf controls. Similarly for "mru initalloc" and "mru + * initmem", and for "mru incalloc" and "mru incmem". */ - if (mon_free == NULL && mon_total_mem >= MAXMONMEM) { - - /* - * Preempt from the MRU list if old enough. - */ - md = mon_mru_list.mru_prev; - /* We get 31 bits from ntp_random() */ - if (((u_long)ntp_random()) / FRAC > - (double)(current_time - md->lasttime) / mon_age) - return 0; - - md->mru_prev->mru_next = &mon_mru_list; - mon_mru_list.mru_prev = md->mru_prev; - remove_from_hash(md); - } else { - if (mon_free == NULL) + if (mru_entries < mru_mindepth) { + if (NULL == mon_free) mon_getmoremem(); - md = mon_free; - mon_free = md->hash_next; + UNLINK_HEAD_SLIST(mon, mon_free, hash_next); + } else { + oldest = TAIL_DLIST(mon_mru_list, mru); + oldest_age = 0; /* silence uninit warning */ + if (oldest != NULL) { + interval_fp = rbufp->recv_time; + L_SUB(&interval_fp, &oldest->last); + /* add one-half second to round up */ + L_ADDUF(&interval_fp, 0x80000000); + oldest_age = interval_fp.l_i; + } + /* note -1 is legal for mru_maxage (disables) */ + if (oldest != NULL && mru_maxage < oldest_age) { + mon_reclaim_entry(oldest); + mon = oldest; + } else if (mon_free != NULL || mru_alloc < + mru_maxdepth) { + if (NULL == mon_free) + mon_getmoremem(); + UNLINK_HEAD_SLIST(mon, mon_free, hash_next); + /* Preempt from the MRU list if old enough. */ + } else if (ntp_random() / (2. * FRAC) > + (double)oldest_age / mon_age) { + return ~(RES_LIMITED | RES_KOD) & flags; + } else { + mon_reclaim_entry(oldest); + mon = oldest; + } } /* * Got one, initialize it */ - md->avg_interval = 0; - md->lasttime = current_time; - md->count = 1; - md->drop_count = 0; - memset(&md->rmtadr, 0, sizeof(md->rmtadr)); - memcpy(&md->rmtadr, &addr, sizeof(addr)); - md->rmtport = NSRCPORT(&rbufp->recv_srcadr); - md->mode = (u_char) mode; - md->version = PKT_VERSION(pkt->li_vn_mode); - md->interface = rbufp->dstadr; - md->cast_flags = (u_char)(((rbufp->dstadr->flags & INT_MCASTOPEN) && - rbufp->fd == md->interface->fd) ? MDF_MCAST: rbufp->fd == - md->interface->bfd ? MDF_BCAST : MDF_UCAST); + mru_entries++; + mru_peakentries = max(mru_peakentries, mru_entries); + mon->last = rbufp->recv_time; + mon->first = mon->last; + mon->count = 1; + mon->flags = ~(RES_LIMITED | RES_KOD) & flags; + mon->leak = 0; + memcpy(&mon->rmtadr, &rbufp->recv_srcadr, sizeof(mon->rmtadr)); + mon->vn_mode = VN_MODE(version, mode); + mon->lcladr = rbufp->dstadr; + mon->cast_flags = (u_char)(((rbufp->dstadr->flags & + INT_MCASTOPEN) && rbufp->fd == mon->lcladr->fd) ? MDF_MCAST + : rbufp->fd == mon->lcladr->bfd ? MDF_BCAST : MDF_UCAST); /* * Drop him into front of the hash table. Also put him on top of * the MRU list. */ - md->hash_next = mon_hash[hash]; - mon_hash[hash] = md; - md->mru_next = mon_mru_list.mru_next; - md->mru_prev = &mon_mru_list; - mon_mru_list.mru_next->mru_prev = md; - mon_mru_list.mru_next = md; - return 1; -} - + LINK_SLIST(mon_hash[hash], mon, hash_next); + LINK_DLIST(mon_mru_list, mon, mru); -/* - * mon_getmoremem - get more memory and put it on the free list - */ -static void -mon_getmoremem(void) -{ - register struct mon_data *md; - register int i; - struct mon_data *freedata; /* 'old' free list (null) */ - - md = (struct mon_data *)emalloc(MONMEMINC * - sizeof(struct mon_data)); - freedata = mon_free; - mon_free = md; - for (i = 0; i < (MONMEMINC-1); i++) { - md->hash_next = (md + 1); - md++; - } - - /* - * md now points at the last. Link in the rest of the chain. - */ - md->hash_next = freedata; - mon_total_mem += MONMEMINC; - mon_mem_increments++; + return mon->flags; } -static void -remove_from_hash( - struct mon_data *md - ) -{ - register int hash; - register struct mon_data *md_prev; - hash = MON_HASH(&md->rmtadr); - if (mon_hash[hash] == md) { - mon_hash[hash] = md->hash_next; - } else { - md_prev = mon_hash[hash]; - while (md_prev->hash_next != md) { - md_prev = md_prev->hash_next; - if (md_prev == NULL) { - /* logic error */ - return; - } - } - md_prev->hash_next = md->hash_next; - } -} |