summaryrefslogtreecommitdiffstats
path: root/contrib/bind9/lib/isc/unix/socket.c
diff options
context:
space:
mode:
authordougb <dougb@FreeBSD.org>2008-09-01 22:54:49 +0000
committerdougb <dougb@FreeBSD.org>2008-09-01 22:54:49 +0000
commit1f3acabb1896f89a0f69765b488a0e125ddb728c (patch)
tree76dd11ff82e0b16f834088d4e670d19e3c291dfe /contrib/bind9/lib/isc/unix/socket.c
parent188674d6caab2e4ab108c0bf07a4cc26aae1f038 (diff)
downloadFreeBSD-src-1f3acabb1896f89a0f69765b488a0e125ddb728c.zip
FreeBSD-src-1f3acabb1896f89a0f69765b488a0e125ddb728c.tar.gz
Merge from vendor/bind9/dist as of the 9.4.2-P2 import
Diffstat (limited to 'contrib/bind9/lib/isc/unix/socket.c')
-rw-r--r--contrib/bind9/lib/isc/unix/socket.c243
1 files changed, 200 insertions, 43 deletions
diff --git a/contrib/bind9/lib/isc/unix/socket.c b/contrib/bind9/lib/isc/unix/socket.c
index e0b9021..1b4da78 100644
--- a/contrib/bind9/lib/isc/unix/socket.c
+++ b/contrib/bind9/lib/isc/unix/socket.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2004-2007 Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (C) 2004-2008 Internet Systems Consortium, Inc. ("ISC")
* Copyright (C) 1998-2003 Internet Software Consortium.
*
* Permission to use, copy, modify, and/or distribute this software for any
@@ -15,7 +15,7 @@
* PERFORMANCE OF THIS SOFTWARE.
*/
-/* $Id: socket.c,v 1.237.18.29 2007/08/28 07:20:06 tbox Exp $ */
+/* $Id: socket.c,v 1.237.18.29.10.6 2008/07/29 04:47:31 each Exp $ */
/*! \file */
@@ -65,6 +65,34 @@
#endif /* ISC_PLATFORM_USETHREADS */
/*%
+ * Max number of open sockets. In the vast majority of cases the default size
+ * of FD_SETSIZE should be fine, and this constant should be increased only
+ * when absolutely necessary and possible, i.e., the server is exhausting all
+ * available file descriptors (up to FD_SETSIZE) and the select() function
+ * and FD_xxx macros support larger values than FD_SETSIZE (which may not
+ * always by true, but we keep using some of them to ensure as much
+ * portability as possible). Note also that overall server performance
+ * may be rather worsened with a larger value of this constant due to
+ * inherent scalability problems of select().
+ *
+ * As a special note, this value shouldn't have to be touched if
+ * this is a build for an authoritative only DNS server.
+ */
+
+#ifndef ISC_SOCKET_FDSETSIZE
+#define ISC_SOCKET_FDSETSIZE FD_SETSIZE
+#endif
+
+/*%
+ * Mac OS X needs a special definition to support larger values in select()
+ */
+#if ISC_SOCKET_FDSETSIZE > FD_SETSIZE
+#ifdef __APPLE__
+#define _DARWIN_UNLIMITED_SELECT
+#endif /* __APPLE__ */
+#endif
+
+/*%
* Some systems define the socket length argument as an int, some as size_t,
* some as socklen_t. This is here so it can be easily changed if needed.
*/
@@ -202,13 +230,18 @@ struct isc_socketmgr {
unsigned int magic;
isc_mem_t *mctx;
isc_mutex_t lock;
+ int fd_bufsize;
+ int fdsize;
/* Locked by manager lock. */
ISC_LIST(isc_socket_t) socklist;
- fd_set read_fds;
- fd_set write_fds;
- isc_socket_t *fds[FD_SETSIZE];
- int fdstate[FD_SETSIZE];
+ fd_set *read_fds;
+ fd_set *read_fds_copy;
+ fd_set *write_fds;
+ fd_set *write_fds_copy;
+ isc_socket_t **fds;
+ int *fdstate;
int maxfd;
+ int reserved; /* unlocked */
#ifdef ISC_PLATFORM_USETHREADS
isc_thread_t watcher;
isc_condition_t shutdown_ok;
@@ -251,6 +284,8 @@ static void build_msghdr_send(isc_socket_t *, isc_socketevent_t *,
struct msghdr *, struct iovec *, size_t *);
static void build_msghdr_recv(isc_socket_t *, isc_socketevent_t *,
struct msghdr *, struct iovec *, size_t *);
+static void cleanup_fdsets(isc_socketmgr_t *, isc_mem_t *);
+static isc_result_t create_fdsets(isc_socketmgr_t *, isc_mem_t *);
#define SELECT_POKE_SHUTDOWN (-1)
#define SELECT_POKE_NOTHING (-2)
@@ -329,12 +364,12 @@ wakeup_socket(isc_socketmgr_t *manager, int fd, int msg) {
* or writes.
*/
- INSIST(fd >= 0 && fd < (int)FD_SETSIZE);
+ INSIST(fd >= 0 && fd < manager->fdsize);
if (manager->fdstate[fd] == CLOSE_PENDING) {
manager->fdstate[fd] = CLOSED;
- FD_CLR(fd, &manager->read_fds);
- FD_CLR(fd, &manager->write_fds);
+ FD_CLR(fd, manager->read_fds);
+ FD_CLR(fd, manager->write_fds);
(void)close(fd);
return;
}
@@ -347,9 +382,9 @@ wakeup_socket(isc_socketmgr_t *manager, int fd, int msg) {
* Set requested bit.
*/
if (msg == SELECT_POKE_READ)
- FD_SET(sock->fd, &manager->read_fds);
+ FD_SET(sock->fd, manager->read_fds);
if (msg == SELECT_POKE_WRITE)
- FD_SET(sock->fd, &manager->write_fds);
+ FD_SET(sock->fd, manager->write_fds);
}
#ifdef ISC_PLATFORM_USETHREADS
@@ -1221,7 +1256,7 @@ destroy(isc_socket_t **sockp) {
INSIST(ISC_LIST_EMPTY(sock->recv_list));
INSIST(ISC_LIST_EMPTY(sock->send_list));
INSIST(sock->connect_ev == NULL);
- REQUIRE(sock->fd >= 0 && sock->fd < (int)FD_SETSIZE);
+ REQUIRE(sock->fd >= 0 && sock->fd < (int)manager->fdsize);
LOCK(&manager->lock);
@@ -1478,9 +1513,18 @@ isc_socket_create(isc_socketmgr_t *manager, int pf, isc_sockettype_t type,
#ifdef F_DUPFD
/*
- * Leave a space for stdio to work in.
+ * Leave a space for stdio and TCP to work in.
*/
- if (sock->fd >= 0 && sock->fd < 20) {
+ if (manager->reserved != 0 && type == isc_sockettype_udp &&
+ sock->fd >= 0 && sock->fd < manager->reserved) {
+ int new, tmp;
+ new = fcntl(sock->fd, F_DUPFD, manager->reserved);
+ tmp = errno;
+ (void)close(sock->fd);
+ errno = tmp;
+ sock->fd = new;
+ err = "isc_socket_create: fcntl/reserved";
+ } else if (sock->fd >= 0 && sock->fd < 20) {
int new, tmp;
new = fcntl(sock->fd, F_DUPFD, 20);
tmp = errno;
@@ -1491,7 +1535,7 @@ isc_socket_create(isc_socketmgr_t *manager, int pf, isc_sockettype_t type,
}
#endif
- if (sock->fd >= (int)FD_SETSIZE) {
+ if (sock->fd >= (int)manager->fdsize) {
(void)close(sock->fd);
isc_log_iwrite(isc_lctx, ISC_LOGCATEGORY_GENERAL,
ISC_LOGMODULE_SOCKET, ISC_LOG_ERROR,
@@ -1964,7 +2008,7 @@ internal_accept(isc_task_t *me, isc_event_t *ev) {
(void)close(fd);
errno = tmp;
fd = new;
- err = "fcntl";
+ err = "accept/fcntl";
}
#endif
@@ -1972,8 +2016,17 @@ internal_accept(isc_task_t *me, isc_event_t *ev) {
if (SOFT_ERROR(errno))
goto soft_error;
switch (errno) {
- case ENOBUFS:
case ENFILE:
+ case EMFILE:
+ isc_log_iwrite(isc_lctx, ISC_LOGCATEGORY_GENERAL,
+ ISC_LOGMODULE_SOCKET, ISC_LOG_ERROR,
+ isc_msgcat, ISC_MSGSET_SOCKET,
+ ISC_MSG_TOOMANYFDS,
+ "%s: too many open file descriptors",
+ err);
+ goto soft_error;
+
+ case ENOBUFS:
case ENOMEM:
case ECONNRESET:
case ECONNABORTED:
@@ -2023,7 +2076,7 @@ internal_accept(isc_task_t *me, isc_event_t *ev) {
sock->pf);
(void)close(fd);
goto soft_error;
- } else if (fd >= (int)FD_SETSIZE) {
+ } else if (fd >= (int)manager->fdsize) {
isc_log_iwrite(isc_lctx, ISC_LOGCATEGORY_GENERAL,
ISC_LOGMODULE_SOCKET, ISC_LOG_ERROR,
isc_msgcat, ISC_MSGSET_SOCKET,
@@ -2235,7 +2288,7 @@ process_fds(isc_socketmgr_t *manager, int maxfd,
isc_socket_t *sock;
isc_boolean_t unlock_sock;
- REQUIRE(maxfd <= (int)FD_SETSIZE);
+ REQUIRE(maxfd <= (int)manager->fdsize);
/*
* Process read/writes on other fds here. Avoid locking
@@ -2249,8 +2302,8 @@ process_fds(isc_socketmgr_t *manager, int maxfd,
if (manager->fdstate[i] == CLOSE_PENDING) {
manager->fdstate[i] = CLOSED;
- FD_CLR(i, &manager->read_fds);
- FD_CLR(i, &manager->write_fds);
+ FD_CLR(i, manager->read_fds);
+ FD_CLR(i, manager->write_fds);
(void)close(i);
@@ -2261,7 +2314,7 @@ process_fds(isc_socketmgr_t *manager, int maxfd,
unlock_sock = ISC_FALSE;
if (FD_ISSET(i, readfds)) {
if (sock == NULL) {
- FD_CLR(i, &manager->read_fds);
+ FD_CLR(i, manager->read_fds);
goto check_write;
}
unlock_sock = ISC_TRUE;
@@ -2272,12 +2325,12 @@ process_fds(isc_socketmgr_t *manager, int maxfd,
else
dispatch_recv(sock);
}
- FD_CLR(i, &manager->read_fds);
+ FD_CLR(i, manager->read_fds);
}
check_write:
if (FD_ISSET(i, writefds)) {
if (sock == NULL) {
- FD_CLR(i, &manager->write_fds);
+ FD_CLR(i, manager->write_fds);
continue;
}
if (!unlock_sock) {
@@ -2290,7 +2343,7 @@ process_fds(isc_socketmgr_t *manager, int maxfd,
else
dispatch_send(sock);
}
- FD_CLR(i, &manager->write_fds);
+ FD_CLR(i, manager->write_fds);
}
if (unlock_sock)
UNLOCK(&sock->lock);
@@ -2311,8 +2364,6 @@ watcher(void *uap) {
isc_boolean_t done;
int ctlfd;
int cc;
- fd_set readfds;
- fd_set writefds;
int msg, fd;
int maxfd;
char strbuf[ISC_STRERRORSIZE];
@@ -2326,13 +2377,16 @@ watcher(void *uap) {
done = ISC_FALSE;
while (!done) {
do {
- readfds = manager->read_fds;
- writefds = manager->write_fds;
+ memcpy(manager->read_fds_copy, manager->read_fds,
+ manager->fd_bufsize);
+ memcpy(manager->write_fds_copy, manager->write_fds,
+ manager->fd_bufsize);
maxfd = manager->maxfd + 1;
UNLOCK(&manager->lock);
- cc = select(maxfd, &readfds, &writefds, NULL, NULL);
+ cc = select(maxfd, manager->read_fds_copy,
+ manager->write_fds_copy, NULL, NULL);
if (cc < 0) {
if (!SOFT_ERROR(errno)) {
isc__strerror(errno, strbuf,
@@ -2354,7 +2408,7 @@ watcher(void *uap) {
/*
* Process reads on internal, control fd.
*/
- if (FD_ISSET(ctlfd, &readfds)) {
+ if (FD_ISSET(ctlfd, manager->read_fds_copy)) {
for (;;) {
select_readmsg(manager, &fd, &msg);
@@ -2393,7 +2447,8 @@ watcher(void *uap) {
}
}
- process_fds(manager, maxfd, &readfds, &writefds);
+ process_fds(manager, maxfd, manager->read_fds_copy,
+ manager->write_fds_copy);
}
manager_log(manager, TRACE,
@@ -2405,6 +2460,88 @@ watcher(void *uap) {
}
#endif /* ISC_PLATFORM_USETHREADS */
+void
+isc__socketmgr_setreserved(isc_socketmgr_t *manager, isc_uint32_t reserved) {
+
+ REQUIRE(VALID_MANAGER(manager));
+
+ manager->reserved = reserved;
+}
+
+/*
+ * Initialize fdsets in socketmgr structure.
+ */
+static isc_result_t
+create_fdsets(isc_socketmgr_t *manager, isc_mem_t *mctx) {
+#if ISC_SOCKET_FDSETSIZE > FD_SETSIZE
+ manager->fdsize = ISC_SOCKET_FDSETSIZE;
+ manager->fd_bufsize = howmany(ISC_SOCKET_FDSETSIZE, NFDBITS) *
+ sizeof(fd_mask);
+#else
+ manager->fdsize = FD_SETSIZE;
+ manager->fd_bufsize = sizeof(fd_set);
+#endif
+
+ manager->fds = NULL;
+ manager->fdstate = NULL;
+ manager->read_fds = NULL;
+ manager->read_fds_copy = NULL;
+ manager->write_fds = NULL;
+ manager->write_fds_copy = NULL;
+
+ manager->fds = isc_mem_get(mctx,
+ manager->fdsize * sizeof(manager->fds[0]));
+ if (manager->fds == NULL)
+ goto fail;
+
+ manager->fdstate = isc_mem_get(mctx, manager->fdsize *
+ sizeof(manager->fdstate[0]));
+ if (manager->fdstate == NULL)
+ goto fail;
+
+ manager->read_fds = isc_mem_get(mctx, manager->fd_bufsize);
+ if (manager->read_fds == NULL)
+ goto fail;
+ manager->read_fds_copy = isc_mem_get(mctx, manager->fd_bufsize);
+ if (manager->read_fds_copy == NULL)
+ goto fail;
+ manager->write_fds = isc_mem_get(mctx, manager->fd_bufsize);
+ if (manager->write_fds == NULL)
+ goto fail;
+ manager->write_fds_copy = isc_mem_get(mctx, manager->fd_bufsize);
+ if (manager->write_fds_copy == NULL)
+ goto fail;
+
+ return (ISC_R_SUCCESS);
+
+ fail:
+ cleanup_fdsets(manager, mctx);
+ return (ISC_R_NOMEMORY);
+}
+
+/*
+ * Clean up fdsets in socketmgr structure.
+ */
+static void
+cleanup_fdsets(isc_socketmgr_t *manager, isc_mem_t *mctx) {
+ if (manager->fds != NULL) {
+ isc_mem_put(mctx, manager->fds,
+ manager->fdsize * sizeof(manager->fds[0]));
+ }
+ if (manager->fdstate != NULL) {
+ isc_mem_put(mctx, manager->fdstate,
+ manager->fdsize * sizeof(manager->fdstate[0]));
+ }
+ if (manager->read_fds != NULL)
+ isc_mem_put(mctx, manager->read_fds, manager->fd_bufsize);
+ if (manager->read_fds_copy != NULL)
+ isc_mem_put(mctx, manager->read_fds_copy, manager->fd_bufsize);
+ if (manager->write_fds != NULL)
+ isc_mem_put(mctx, manager->write_fds, manager->fd_bufsize);
+ if (manager->write_fds_copy != NULL)
+ isc_mem_put(mctx, manager->write_fds_copy, manager->fd_bufsize);
+}
+
/*
* Create a new socket manager.
*/
@@ -2430,17 +2567,26 @@ isc_socketmgr_create(isc_mem_t *mctx, isc_socketmgr_t **managerp) {
if (manager == NULL)
return (ISC_R_NOMEMORY);
+ result = create_fdsets(manager, mctx);
+ if (result != ISC_R_SUCCESS) {
+ cleanup_fdsets(manager, mctx);
+ isc_mem_put(mctx, manager, sizeof(*manager));
+ return (result);
+ }
+
manager->magic = SOCKET_MANAGER_MAGIC;
manager->mctx = NULL;
- memset(manager->fds, 0, sizeof(manager->fds));
+ memset(manager->fds, 0, sizeof(manager->fds[0]) * manager->fdsize);
ISC_LIST_INIT(manager->socklist);
result = isc_mutex_init(&manager->lock);
if (result != ISC_R_SUCCESS) {
+ cleanup_fdsets(manager, mctx);
isc_mem_put(mctx, manager, sizeof(*manager));
return (result);
}
#ifdef ISC_PLATFORM_USETHREADS
if (isc_condition_init(&manager->shutdown_ok) != ISC_R_SUCCESS) {
+ cleanup_fdsets(manager, mctx);
DESTROYLOCK(&manager->lock);
isc_mem_put(mctx, manager, sizeof(*manager));
UNEXPECTED_ERROR(__FILE__, __LINE__,
@@ -2455,6 +2601,7 @@ isc_socketmgr_create(isc_mem_t *mctx, isc_socketmgr_t **managerp) {
* select/poll loop when something internal needs to be done.
*/
if (pipe(manager->pipe_fds) != 0) {
+ cleanup_fdsets(manager, mctx);
DESTROYLOCK(&manager->lock);
isc_mem_put(mctx, manager, sizeof(*manager));
isc__strerror(errno, strbuf, sizeof(strbuf));
@@ -2478,15 +2625,17 @@ isc_socketmgr_create(isc_mem_t *mctx, isc_socketmgr_t **managerp) {
/*
* Set up initial state for the select loop
*/
- FD_ZERO(&manager->read_fds);
- FD_ZERO(&manager->write_fds);
+ memset(manager->read_fds, 0, manager->fd_bufsize);
+ memset(manager->write_fds, 0, manager->fd_bufsize);
#ifdef ISC_PLATFORM_USETHREADS
- FD_SET(manager->pipe_fds[0], &manager->read_fds);
+ FD_SET(manager->pipe_fds[0], manager->read_fds);
manager->maxfd = manager->pipe_fds[0];
#else /* ISC_PLATFORM_USETHREADS */
manager->maxfd = 0;
#endif /* ISC_PLATFORM_USETHREADS */
- memset(manager->fdstate, 0, sizeof(manager->fdstate));
+ manager->reserved = 0;
+ memset(manager->fdstate, 0,
+ manager->fdsize * sizeof(manager->fdstate[0]));
#ifdef ISC_PLATFORM_USETHREADS
/*
@@ -2592,11 +2741,12 @@ isc_socketmgr_destroy(isc_socketmgr_t **managerp) {
(void)isc_condition_destroy(&manager->shutdown_ok);
#endif /* ISC_PLATFORM_USETHREADS */
- for (i = 0; i < (int)FD_SETSIZE; i++)
+ for (i = 0; i < (int)manager->fdsize; i++)
if (manager->fdstate[i] == CLOSE_PENDING)
(void)close(i);
DESTROYLOCK(&manager->lock);
+ cleanup_fdsets(manager, manager->mctx);
manager->magic = 0;
mctx= manager->mctx;
isc_mem_put(mctx, manager, sizeof(*manager));
@@ -3165,7 +3315,8 @@ isc_socket_permunix(isc_sockaddr_t *sockaddr, isc_uint32_t perm,
}
isc_result_t
-isc_socket_bind(isc_socket_t *sock, isc_sockaddr_t *sockaddr) {
+isc_socket_bind(isc_socket_t *sock, isc_sockaddr_t *sockaddr,
+ unsigned int options) {
char strbuf[ISC_STRERRORSIZE];
int on = 1;
@@ -3184,7 +3335,8 @@ isc_socket_bind(isc_socket_t *sock, isc_sockaddr_t *sockaddr) {
if (sock->pf == AF_UNIX)
goto bind_socket;
#endif
- if (isc_sockaddr_getport(sockaddr) != (in_port_t)0 &&
+ if ((options & ISC_SOCKET_REUSEADDRESS) != 0 &&
+ isc_sockaddr_getport(sockaddr) != (in_port_t)0 &&
setsockopt(sock->fd, SOL_SOCKET, SO_REUSEADDR, (void *)&on,
sizeof(on)) < 0) {
UNEXPECTED_ERROR(__FILE__, __LINE__,
@@ -3813,12 +3965,17 @@ isc_socket_ipv6only(isc_socket_t *sock, isc_boolean_t yes) {
#ifndef ISC_PLATFORM_USETHREADS
void
-isc__socketmgr_getfdsets(fd_set *readset, fd_set *writeset, int *maxfd) {
+isc__socketmgr_getfdsets(fd_set **readset, fd_set **writeset, int *maxfd) {
if (socketmgr == NULL)
*maxfd = 0;
else {
- *readset = socketmgr->read_fds;
- *writeset = socketmgr->write_fds;
+ /* Prepare duplicates of fd_sets, as select() will modify */
+ memcpy(socketmgr->read_fds_copy, socketmgr->read_fds,
+ socketmgr->fd_bufsize);
+ memcpy(socketmgr->write_fds_copy, socketmgr->write_fds,
+ socketmgr->fd_bufsize);
+ *readset = socketmgr->read_fds_copy;
+ *writeset = socketmgr->write_fds_copy;
*maxfd = socketmgr->maxfd + 1;
}
}
OpenPOWER on IntegriCloud