diff options
Diffstat (limited to 'lib/dns/dispatch.c')
-rw-r--r-- | lib/dns/dispatch.c | 190 |
1 files changed, 178 insertions, 12 deletions
diff --git a/lib/dns/dispatch.c b/lib/dns/dispatch.c index 794cdb5..9b4e968 100644 --- a/lib/dns/dispatch.c +++ b/lib/dns/dispatch.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2004-2008 Internet Systems Consortium, Inc. ("ISC") + * Copyright (C) 2004-2009 Internet Systems Consortium, Inc. ("ISC") * Copyright (C) 1999-2003 Internet Software Consortium. * * Permission to use, copy, modify, and/or distribute this software for any @@ -15,7 +15,7 @@ * PERFORMANCE OF THIS SOFTWARE. */ -/* $Id: dispatch.c,v 1.116.18.37 2008/09/04 00:24:41 jinmei Exp $ */ +/* $Id: dispatch.c,v 1.155.12.7 2009/04/28 21:39:45 jinmei Exp $ */ /*! \file */ @@ -32,6 +32,7 @@ #include <isc/portset.h> #include <isc/print.h> #include <isc/random.h> +#include <isc/stats.h> #include <isc/string.h> #include <isc/task.h> #include <isc/time.h> @@ -43,14 +44,18 @@ #include <dns/log.h> #include <dns/message.h> #include <dns/portlist.h> +#include <dns/stats.h> #include <dns/tcpmsg.h> #include <dns/types.h> typedef ISC_LIST(dns_dispentry_t) dns_displist_t; -typedef struct dispsocket dispsocket_t; +typedef struct dispsocket dispsocket_t; typedef ISC_LIST(dispsocket_t) dispsocketlist_t; +typedef struct dispportentry dispportentry_t; +typedef ISC_LIST(dispportentry_t) dispportlist_t; + /* ARC4 Random generator state */ typedef struct arc4ctx { isc_uint8_t i; @@ -76,6 +81,7 @@ struct dns_dispatchmgr { isc_mem_t *mctx; dns_acl_t *blackhole; dns_portlist_t *portlist; + isc_stats_t *stats; isc_entropy_t *entropy; /*%< entropy source */ /* Locked by "lock". */ @@ -170,7 +176,8 @@ struct dispsocket { isc_socket_t *socket; dns_dispatch_t *disp; isc_sockaddr_t host; - in_port_t localport; + in_port_t localport; /* XXX: should be removed later */ + dispportentry_t *portentry; dns_dispentry_t *resp; isc_task_t *task; ISC_LINK(dispsocket_t) link; @@ -178,6 +185,21 @@ struct dispsocket { ISC_LINK(dispsocket_t) blink; }; +/*% + * A port table entry. We remember every port we first open in a table with a + * reference counter so that we can 'reuse' the same port (with different + * destination addresses) using the SO_REUSEADDR socket option. + */ +struct dispportentry { + in_port_t port; + unsigned int refs; + ISC_LINK(struct dispportentry) link; +}; + +#ifndef DNS_DISPATCH_PORTTABLESIZE +#define DNS_DISPATCH_PORTTABLESIZE 1024 +#endif + #define INVALID_BUCKET (0xffffdead) /*% @@ -227,6 +249,8 @@ struct dns_dispatch { dns_tcpmsg_t tcpmsg; /*%< for tcp streams */ dns_qid_t *qid; arc4ctx_t arc4ctx; /*%< for QID/UDP port num */ + dispportlist_t *port_table; /*%< hold ports 'owned' by us */ + isc_mempool_t *portpool; /*%< port table entries */ }; #define QID_MAGIC ISC_MAGIC('Q', 'i', 'd', ' ') @@ -330,6 +354,12 @@ mgr_log(dns_dispatchmgr_t *mgr, int level, const char *fmt, ...) { level, "dispatchmgr %p: %s", mgr, msgbuf); } +static inline void +inc_stats(dns_dispatchmgr_t *mgr, isc_statscounter_t counter) { + if (mgr->stats != NULL) + isc_stats_increment(mgr->stats, counter); +} + static void dispatch_log(dns_dispatch_t *disp, int level, const char *fmt, ...) ISC_FORMAT_PRINTF(3, 4); @@ -677,6 +707,64 @@ destroy_disp(isc_task_t *task, isc_event_t *event) { } /*% + * Manipulate port table per dispatch: find an entry for a given port number, + * create a new entry, and decrement a given entry with possible clean-up. + */ +static dispportentry_t * +port_search(dns_dispatch_t *disp, in_port_t port) { + dispportentry_t *portentry; + + REQUIRE(disp->port_table != NULL); + + portentry = ISC_LIST_HEAD(disp->port_table[port % + DNS_DISPATCH_PORTTABLESIZE]); + while (portentry != NULL) { + if (portentry->port == port) + return (portentry); + portentry = ISC_LIST_NEXT(portentry, link); + } + + return (NULL); +} + +static dispportentry_t * +new_portentry(dns_dispatch_t *disp, in_port_t port) { + dispportentry_t *portentry; + + REQUIRE(disp->port_table != NULL); + + portentry = isc_mempool_get(disp->portpool); + if (portentry == NULL) + return (portentry); + + portentry->port = port; + portentry->refs = 0; + ISC_LINK_INIT(portentry, link); + ISC_LIST_APPEND(disp->port_table[port % DNS_DISPATCH_PORTTABLESIZE], + portentry, link); + + return (portentry); +} + +static void +deref_portentry(dns_dispatch_t *disp, dispportentry_t **portentryp) { + dispportentry_t *portentry = *portentryp; + + REQUIRE(disp->port_table != NULL); + REQUIRE(portentry != NULL && portentry->refs > 0); + + portentry->refs--; + if (portentry->refs == 0) { + ISC_LIST_UNLINK(disp->port_table[portentry->port % + DNS_DISPATCH_PORTTABLESIZE], + portentry, link); + isc_mempool_put(disp->portpool, portentry); + } + + *portentryp = NULL; +} + +/*% * Find a dispsocket for socket address 'dest', and port number 'port'. * Return NULL if no such entry exists. */ @@ -692,7 +780,7 @@ socket_search(dns_qid_t *qid, isc_sockaddr_t *dest, in_port_t port, while (dispsock != NULL) { if (isc_sockaddr_equal(dest, &dispsock->host) && - dispsock->localport == port) + dispsock->portentry->port == port) return (dispsock); dispsock = ISC_LIST_NEXT(dispsock, blink); } @@ -720,6 +808,8 @@ get_dispsocket(dns_dispatch_t *disp, isc_sockaddr_t *dest, dispsocket_t *dispsock; unsigned int nports; in_port_t *ports; + unsigned int bindoptions; + dispportentry_t *portentry = NULL; if (isc_sockaddr_pf(&disp->local) == AF_INET) { nports = disp->mgr->nv4ports; @@ -745,6 +835,7 @@ get_dispsocket(dns_dispatch_t *disp, isc_sockaddr_t *dest, dispsock->socket = NULL; dispsock->disp = disp; dispsock->resp = NULL; + dispsock->portentry = NULL; isc_random_get(&r); dispsock->task = NULL; isc_task_attach(disp->task[r % disp->ntasks], &dispsock->task); @@ -767,16 +858,29 @@ get_dispsocket(dns_dispatch_t *disp, isc_sockaddr_t *dest, bucket = dns_hash(qid, dest, 0, port); if (socket_search(qid, dest, port, bucket) != NULL) continue; - - result = open_socket(sockmgr, &localaddr, 0, &sock); - if (result == ISC_R_SUCCESS || result != ISC_R_ADDRINUSE) + bindoptions = 0; + portentry = port_search(disp, port); + if (portentry != NULL) + bindoptions |= ISC_SOCKET_REUSEADDRESS; + result = open_socket(sockmgr, &localaddr, bindoptions, &sock); + if (result == ISC_R_SUCCESS) { + if (portentry == NULL) { + portentry = new_portentry(disp, port); + if (portentry == NULL) { + result = ISC_R_NOMEMORY; + break; + } + } + portentry->refs++; + break; + } else if (result != ISC_R_ADDRINUSE) break; } if (result == ISC_R_SUCCESS) { dispsock->socket = sock; dispsock->host = *dest; - dispsock->localport = port; + dispsock->portentry = portentry; dispsock->bucket = bucket; ISC_LIST_APPEND(qid->sock_table[bucket], dispsock, blink); *dispsockp = dispsock; @@ -813,6 +917,8 @@ destroy_dispsocket(dns_dispatch_t *disp, dispsocket_t **dispsockp) { disp->nsockets--; dispsock->magic = 0; + if (dispsock->portentry != NULL) + deref_portentry(disp, &dispsock->portentry); if (dispsock->socket != NULL) isc_socket_detach(&dispsock->socket); if (ISC_LINK_LINKED(dispsock, blink)) { @@ -847,6 +953,9 @@ deactivate_dispsocket(dns_dispatch_t *disp, dispsocket_t *dispsock) { dispsock->resp->dispsocket = NULL; } + INSIST(dispsock->portentry != NULL); + deref_portentry(disp, &dispsock->portentry); + if (disp->nsockets > DNS_DISPATCH_POOLSOCKS) destroy_dispsocket(disp, &dispsock); else { @@ -1161,6 +1270,7 @@ udp_recv(isc_event_t *ev_in, dns_dispatch_t *disp, dispsocket_t *dispsock) { bucket, (resp == NULL ? "not found" : "found")); if (resp == NULL) { + inc_stats(mgr, dns_resstatscounter_mismatch); free_buffer(disp, ev->region.base, ev->region.length); goto unlock; } @@ -1168,6 +1278,7 @@ udp_recv(isc_event_t *ev_in, dns_dispatch_t *disp, dispsocket_t *dispsock) { &resp->host)) { dispatch_log(disp, LVL(90), "response to an exclusive socket doesn't match"); + inc_stats(mgr, dns_resstatscounter_mismatch); free_buffer(disp, ev->region.base, ev->region.length); goto unlock; } @@ -1603,6 +1714,9 @@ destroy_mgr(dns_dispatchmgr_t **mgrp) { if (mgr->blackhole != NULL) dns_acl_detach(&mgr->blackhole); + if (mgr->stats != NULL) + isc_stats_detach(&mgr->stats); + if (mgr->v4ports != NULL) { isc_mem_put(mctx, mgr->v4ports, mgr->nv4ports * sizeof(in_port_t)); @@ -1628,6 +1742,7 @@ open_socket(isc_socketmgr_t *mgr, isc_sockaddr_t *local, isc_sockettype_udp, &sock); if (result != ISC_R_SUCCESS) return (result); + isc_socket_setname(sock, "dispatcher", NULL); } else { result = isc_socket_open(sock); if (result != ISC_R_SUCCESS) @@ -1692,6 +1807,7 @@ dns_dispatchmgr_create(isc_mem_t *mctx, isc_entropy_t *entropy, isc_mem_attach(mctx, &mgr->mctx); mgr->blackhole = NULL; + mgr->stats = NULL; result = isc_mutex_init(&mgr->lock); if (result != ISC_R_SUCCESS) @@ -2001,6 +2117,15 @@ dns_dispatchmgr_destroy(dns_dispatchmgr_t **mgrp) { destroy_mgr(&mgr); } +void +dns_dispatchmgr_setstats(dns_dispatchmgr_t *mgr, isc_stats_t *stats) { + REQUIRE(VALID_DISPATCHMGR(mgr)); + REQUIRE(ISC_LIST_EMPTY(mgr->list)); + REQUIRE(mgr->stats == NULL); + + isc_stats_attach(stats, &mgr->stats); +} + static int port_cmp(const void *key, const void *ent) { in_port_t p1 = *(const in_port_t *)key; @@ -2269,6 +2394,8 @@ dispatch_allocate(dns_dispatchmgr_t *mgr, unsigned int maxrequests, ISC_LIST_INIT(disp->inactivesockets); disp->nsockets = 0; dispatch_arc4init(&disp->arc4ctx, mgr->entropy, NULL); + disp->port_table = NULL; + disp->portpool = NULL; result = isc_mutex_init(&disp->lock); if (result != ISC_R_SUCCESS) @@ -2298,13 +2425,14 @@ dispatch_allocate(dns_dispatchmgr_t *mgr, unsigned int maxrequests, /* - * MUST be unlocked, and not used by anthing. + * MUST be unlocked, and not used by anything. */ static void dispatch_free(dns_dispatch_t **dispp) { dns_dispatch_t *disp; dns_dispatchmgr_t *mgr; + int i; REQUIRE(VALID_DISPATCH(*dispp)); disp = *dispp; @@ -2329,6 +2457,18 @@ dispatch_free(dns_dispatch_t **dispp) if (disp->qid != NULL) qid_destroy(mgr->mctx, &disp->qid); + + if (disp->port_table != NULL) { + for (i = 0; i < DNS_DISPATCH_PORTTABLESIZE; i++) + INSIST(ISC_LIST_EMPTY(disp->port_table[i])); + isc_mem_put(mgr->mctx, disp->port_table, + sizeof(disp->port_table[0]) * + DNS_DISPATCH_PORTTABLESIZE); + } + + if (disp->portpool != NULL) + isc_mempool_destroy(&disp->portpool); + disp->mgr = NULL; DESTROYLOCK(&disp->lock); disp->magic = 0; @@ -2462,9 +2602,8 @@ dns_dispatch_getudp(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr, } /* - * First, see if we have a dispatcher that matches. + * See if we have a dispatcher that matches. */ - disp = NULL; result = dispatch_find(mgr, localaddr, attributes, mask, &disp); if (result == ISC_R_SUCCESS) { disp->refcount++; @@ -2569,6 +2708,15 @@ get_udpsocket(dns_dispatchmgr_t *mgr, dns_dispatch_t *disp, * If this fails 1024 times, we then ask the kernel for * choosing one. */ + } else { + /* Allow to reuse address for non-random ports. */ + result = open_socket(sockmgr, localaddr, + ISC_SOCKET_REUSEADDRESS, &sock); + + if (result == ISC_R_SUCCESS) + *sockp = sock; + + return (result); } memset(held, 0, sizeof(held)); @@ -2650,6 +2798,21 @@ dispatch_createudp(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr, if (result != ISC_R_SUCCESS) goto deallocate_dispatch; } + + disp->port_table = isc_mem_get(mgr->mctx, + sizeof(disp->port_table[0]) * + DNS_DISPATCH_PORTTABLESIZE); + if (disp->port_table == NULL) + goto deallocate_dispatch; + for (i = 0; i < DNS_DISPATCH_PORTTABLESIZE; i++) + ISC_LIST_INIT(disp->port_table[i]); + + result = isc_mempool_create(mgr->mctx, sizeof(dispportentry_t), + &disp->portpool); + if (result != ISC_R_SUCCESS) + goto deallocate_dispatch; + isc_mempool_setname(disp->portpool, "disp_portpool"); + isc_mempool_setfreemax(disp->portpool, 128); } disp->socktype = isc_sockettype_udp; disp->socket = sock; @@ -2829,6 +2992,8 @@ dns_dispatch_addresponse2(dns_dispatch_t *disp, isc_sockaddr_t *dest, oldestresp->item_out = ISC_TRUE; isc_task_send(oldestresp->task, ISC_EVENT_PTR(&rev)); + inc_stats(disp->mgr, + dns_resstatscounter_dispabort); } } @@ -2852,6 +3017,7 @@ dns_dispatch_addresponse2(dns_dispatch_t *disp, isc_sockaddr_t *dest, if (result != ISC_R_SUCCESS) { UNLOCK(&qid->lock); UNLOCK(&disp->lock); + inc_stats(disp->mgr, dns_resstatscounter_dispsockfail); return (result); } } else { |