diff options
author | dougb <dougb@FreeBSD.org> | 2008-12-23 18:35:21 +0000 |
---|---|---|
committer | dougb <dougb@FreeBSD.org> | 2008-12-23 18:35:21 +0000 |
commit | 6c8226d7d6bb16d3d0bc2ab68a30b0700011c64f (patch) | |
tree | d25d756be8550df073eb3ed4e5b39831380291b5 /lib/isc | |
parent | e2c9b86ef6b41282513a874faaf6d208422cfc7b (diff) | |
download | FreeBSD-src-6c8226d7d6bb16d3d0bc2ab68a30b0700011c64f.zip FreeBSD-src-6c8226d7d6bb16d3d0bc2ab68a30b0700011c64f.tar.gz |
Vendor import of BIND 9.4.3
Diffstat (limited to 'lib/isc')
-rw-r--r-- | lib/isc/Makefile.in | 8 | ||||
-rw-r--r-- | lib/isc/api | 4 | ||||
-rw-r--r-- | lib/isc/assertions.c | 7 | ||||
-rw-r--r-- | lib/isc/include/isc/assertions.h | 11 | ||||
-rw-r--r-- | lib/isc/include/isc/lex.h | 8 | ||||
-rw-r--r-- | lib/isc/include/isc/mem.h | 86 | ||||
-rw-r--r-- | lib/isc/include/isc/msgs.h | 13 | ||||
-rw-r--r-- | lib/isc/include/isc/platform.h.in | 25 | ||||
-rw-r--r-- | lib/isc/include/isc/portset.h | 141 | ||||
-rw-r--r-- | lib/isc/include/isc/resource.h | 15 | ||||
-rw-r--r-- | lib/isc/include/isc/socket.h | 77 | ||||
-rw-r--r-- | lib/isc/include/isc/timer.h | 2 | ||||
-rw-r--r-- | lib/isc/include/isc/types.h | 7 | ||||
-rw-r--r-- | lib/isc/mem.c | 46 | ||||
-rw-r--r-- | lib/isc/portset.c | 143 | ||||
-rw-r--r-- | lib/isc/print.c | 95 | ||||
-rw-r--r-- | lib/isc/pthreads/mutex.c | 78 | ||||
-rw-r--r-- | lib/isc/timer.c | 4 | ||||
-rw-r--r-- | lib/isc/unix/app.c | 37 | ||||
-rw-r--r-- | lib/isc/unix/include/isc/net.h | 27 | ||||
-rw-r--r-- | lib/isc/unix/net.c | 161 | ||||
-rw-r--r-- | lib/isc/unix/resource.c | 61 | ||||
-rw-r--r-- | lib/isc/unix/socket.c | 1674 | ||||
-rw-r--r-- | lib/isc/unix/socket_p.h | 15 | ||||
-rw-r--r-- | lib/isc/unix/time.c | 10 |
25 files changed, 2182 insertions, 573 deletions
diff --git a/lib/isc/Makefile.in b/lib/isc/Makefile.in index 71e4252..82afe5f 100644 --- a/lib/isc/Makefile.in +++ b/lib/isc/Makefile.in @@ -1,4 +1,4 @@ -# Copyright (C) 2004-2007 Internet Systems Consortium, Inc. ("ISC") +# Copyright (C) 2004-2008 Internet Systems Consortium, Inc. ("ISC") # Copyright (C) 1998-2003 Internet Software Consortium. # # Permission to use, copy, modify, and/or distribute this software for any @@ -13,7 +13,7 @@ # OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR # PERFORMANCE OF THIS SOFTWARE. -# $Id: Makefile.in,v 1.81.18.8 2007/09/14 23:46:18 tbox Exp $ +# $Id: Makefile.in,v 1.81.18.10 2008/06/24 23:45:55 tbox Exp $ srcdir = @srcdir@ VPATH = @srcdir@ @@ -56,7 +56,7 @@ OBJS = @ISC_EXTRA_OBJS@ \ hash.@O@ heap.@O@ hex.@O@ hmacmd5.@O@ hmacsha.@O@\ lex.@O@ lfsr.@O@ lib.@O@ log.@O@ md5.@O@ \ mem.@O@ mutexblock.@O@ netaddr.@O@ netscope.@O@ ondestroy.@O@ \ - parseint.@O@ quota.@O@ random.@O@ \ + parseint.@O@ portset.@O@ quota.@O@ random.@O@ \ ratelimiter.@O@ refcount.@O@ region.@O@ result.@O@ rwlock.@O@ \ serial.@O@ sha1.@O@ sha2.@O@ sockaddr.@O@ string.@O@ \ strtoul.@O@ symtab.@O@ task.@O@ taskpool.@O@ timer.@O@ \ @@ -69,7 +69,7 @@ SRCS = @ISC_EXTRA_SRCS@ \ heap.c hex.c hmacmd5.c hmacsha.c \ lex.c lfsr.c lib.c log.c \ md5.c mem.c mutexblock.c netaddr.c netscope.c ondestroy.c \ - parseint.c quota.c random.c \ + parseint.c portset.c quota.c random.c \ ratelimiter.c refcount.c region.c result.c rwlock.c \ serial.c sha1.c sha2.c sockaddr.c string.c strtoul.c symtab.c \ task.c taskpool.c timer.c version.c diff --git a/lib/isc/api b/lib/isc/api index f40dfe0..0b8a3bc 100644 --- a/lib/isc/api +++ b/lib/isc/api @@ -1,3 +1,3 @@ -LIBINTERFACE = 35 -LIBREVISION = 0 +LIBINTERFACE = 36 +LIBREVISION = 2 LIBAGE = 0 diff --git a/lib/isc/assertions.c b/lib/isc/assertions.c index b3fcf4a..3eb27e0 100644 --- a/lib/isc/assertions.c +++ b/lib/isc/assertions.c @@ -1,8 +1,8 @@ /* - * Copyright (C) 2004, 2005 Internet Systems Consortium, Inc. ("ISC") + * Copyright (C) 2004, 2005, 2008 Internet Systems Consortium, Inc. ("ISC") * Copyright (C) 1997-2001 Internet Software Consortium. * - * Permission to use, copy, modify, and distribute this software for any + * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * @@ -15,7 +15,7 @@ * PERFORMANCE OF THIS SOFTWARE. */ -/* $Id: assertions.c,v 1.17.18.2 2005/04/29 00:16:44 marka Exp $ */ +/* $Id: assertions.c,v 1.17.18.4 2008/10/15 23:46:06 tbox Exp $ */ /*! \file */ @@ -30,6 +30,7 @@ /*% * Forward. */ +/* coverity[+kill] */ static void default_callback(const char *, int, isc_assertiontype_t, const char *); diff --git a/lib/isc/include/isc/assertions.h b/lib/isc/include/isc/assertions.h index c1e68a1..fcf0eb8 100644 --- a/lib/isc/include/isc/assertions.h +++ b/lib/isc/include/isc/assertions.h @@ -1,8 +1,8 @@ /* - * Copyright (C) 2004, 2005 Internet Systems Consortium, Inc. ("ISC") + * Copyright (C) 2004, 2005, 2008 Internet Systems Consortium, Inc. ("ISC") * Copyright (C) 1997-2001 Internet Software Consortium. * - * Permission to use, copy, modify, and distribute this software for any + * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * @@ -16,7 +16,7 @@ */ /* - * $Id: assertions.h,v 1.18.18.2 2005/04/29 00:16:52 marka Exp $ + * $Id: assertions.h,v 1.18.18.4 2008/10/15 23:46:06 tbox Exp $ */ /*! \file assertions.h */ @@ -40,6 +40,7 @@ typedef enum { typedef void (*isc_assertioncallback_t)(const char *, int, isc_assertiontype_t, const char *); +/* coverity[+kill] */ LIBISC_EXTERNAL_DATA extern isc_assertioncallback_t isc_assertion_failed; void @@ -48,14 +49,14 @@ isc_assertion_setcallback(isc_assertioncallback_t); const char * isc_assertion_typetotext(isc_assertiontype_t type); -#ifdef ISC_CHECK_ALL +#if defined(ISC_CHECK_ALL) || defined(__COVERITY__) #define ISC_CHECK_REQUIRE 1 #define ISC_CHECK_ENSURE 1 #define ISC_CHECK_INSIST 1 #define ISC_CHECK_INVARIANT 1 #endif -#ifdef ISC_CHECK_NONE +#if defined(ISC_CHECK_NONE) && !defined(__COVERITY__) #define ISC_CHECK_REQUIRE 0 #define ISC_CHECK_ENSURE 0 #define ISC_CHECK_INSIST 0 diff --git a/lib/isc/include/isc/lex.h b/lib/isc/include/isc/lex.h index 8c6624a..cb9cc18 100644 --- a/lib/isc/include/isc/lex.h +++ b/lib/isc/include/isc/lex.h @@ -1,8 +1,8 @@ /* - * Copyright (C) 2004, 2005 Internet Systems Consortium, Inc. ("ISC") + * Copyright (C) 2004, 2005, 2008 Internet Systems Consortium, Inc. ("ISC") * Copyright (C) 1998-2002 Internet Software Consortium. * - * Permission to use, copy, modify, and distribute this software for any + * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * @@ -15,7 +15,7 @@ * PERFORMANCE OF THIS SOFTWARE. */ -/* $Id: lex.h,v 1.30.18.3 2005/06/04 00:39:05 marka Exp $ */ +/* $Id: lex.h,v 1.30.18.5 2008/05/30 23:46:01 tbox Exp $ */ #ifndef ISC_LEX_H #define ISC_LEX_H 1 @@ -86,7 +86,7 @@ ISC_LANG_BEGINDECLS #define ISC_LEXOPT_DNSMULTILINE 0x20 /*%< Handle '(' and ')'. */ #define ISC_LEXOPT_NOMORE 0x40 /*%< Want "no more" token. */ -#define ISC_LEXOPT_CNUMBER 0x80 /*%< Regognize octal and hex. */ +#define ISC_LEXOPT_CNUMBER 0x80 /*%< Recognize octal and hex. */ #define ISC_LEXOPT_ESCAPE 0x100 /*%< Recognize escapes. */ #define ISC_LEXOPT_QSTRINGMULTILINE 0x200 /*%< Allow multiline "" strings */ #define ISC_LEXOPT_OCTAL 0x400 /*%< Expect a octal number. */ diff --git a/lib/isc/include/isc/mem.h b/lib/isc/include/isc/mem.h index dc68bcb..2c3c54e 100644 --- a/lib/isc/include/isc/mem.h +++ b/lib/isc/include/isc/mem.h @@ -1,8 +1,8 @@ /* - * Copyright (C) 2004-2006 Internet Systems Consortium, Inc. ("ISC") + * Copyright (C) 2004-2006, 2008 Internet Systems Consortium, Inc. ("ISC") * Copyright (C) 1997-2001 Internet Software Consortium. * - * Permission to use, copy, modify, and distribute this software for any + * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * @@ -15,7 +15,7 @@ * PERFORMANCE OF THIS SOFTWARE. */ -/* $Id: mem.h,v 1.59.18.9 2006/01/04 23:50:23 marka Exp $ */ +/* $Id: mem.h,v 1.59.18.11 2008/02/07 23:45:56 tbox Exp $ */ #ifndef ISC_MEM_H #define ISC_MEM_H 1 @@ -156,7 +156,7 @@ LIBISC_EXTERNAL_DATA extern unsigned int isc_mem_debugging; #define isc_mem_strdup(c, p) isc__mem_strdup((c), (p) _ISC_MEM_FILELINE) #define isc_mempool_get(c) isc__mempool_get((c) _ISC_MEM_FILELINE) -/*% +/*% * isc_mem_putanddetach() is a convienence function for use where you * have a structure with an attached memory context. * @@ -215,7 +215,7 @@ LIBISC_EXTERNAL_DATA extern unsigned int isc_mem_debugging; #endif /*@{*/ -isc_result_t +isc_result_t isc_mem_create(size_t max_size, size_t target_size, isc_mem_t **mctxp); @@ -223,12 +223,12 @@ isc_result_t isc_mem_create2(size_t max_size, size_t target_size, isc_mem_t **mctxp, unsigned int flags); -isc_result_t +isc_result_t isc_mem_createx(size_t max_size, size_t target_size, isc_memalloc_t memalloc, isc_memfree_t memfree, void *arg, isc_mem_t **mctxp); -isc_result_t +isc_result_t isc_mem_createx2(size_t max_size, size_t target_size, isc_memalloc_t memalloc, isc_memfree_t memfree, void *arg, isc_mem_t **mctxp, unsigned int flags); @@ -265,9 +265,9 @@ isc_mem_createx2(size_t max_size, size_t target_size, /*@}*/ /*@{*/ -void +void isc_mem_attach(isc_mem_t *, isc_mem_t **); -void +void isc_mem_detach(isc_mem_t **); /*!< * \brief Attach to / detach from a memory context. @@ -275,7 +275,7 @@ isc_mem_detach(isc_mem_t **); * This is intended for applications that use multiple memory contexts * in such a way that it is not obvious when the last allocations from * a given context has been freed and destroying the context is safe. - * + * * Most applications do not need to call these functions as they can * simply create a single memory context at the beginning of main() * and destroy it at the end of main(), thereby guaranteeing that it @@ -283,13 +283,13 @@ isc_mem_detach(isc_mem_t **); */ /*@}*/ -void +void isc_mem_destroy(isc_mem_t **); /*%< * Destroy a memory context. */ -isc_result_t +isc_result_t isc_mem_ondestroy(isc_mem_t *ctx, isc_task_t *task, isc_event_t **event); @@ -298,13 +298,13 @@ isc_mem_ondestroy(isc_mem_t *ctx, * been successfully destroyed. */ -void +void isc_mem_stats(isc_mem_t *mctx, FILE *out); /*%< * Print memory usage statistics for 'mctx' on the stream 'out'. */ -void +void isc_mem_setdestroycheck(isc_mem_t *mctx, isc_boolean_t on); /*%< @@ -313,9 +313,9 @@ isc_mem_setdestroycheck(isc_mem_t *mctx, */ /*@{*/ -void +void isc_mem_setquota(isc_mem_t *, size_t); -size_t +size_t isc_mem_getquota(isc_mem_t *); /*%< * Set/get the memory quota of 'mctx'. This is a hard limit @@ -324,7 +324,7 @@ isc_mem_getquota(isc_mem_t *); */ /*@}*/ -size_t +size_t isc_mem_inuse(isc_mem_t *mctx); /*%< * Get an estimate of the number of memory in use in 'mctx', in bytes. @@ -336,13 +336,29 @@ void isc_mem_setwater(isc_mem_t *mctx, isc_mem_water_t water, void *water_arg, size_t hiwater, size_t lowater); /*%< - * Set high and low water marks for this memory context. - * - * When the memory - * usage of 'mctx' exceeds 'hiwater', '(water)(water_arg, #ISC_MEM_HIWATER)' - * will be called. When the usage drops below 'lowater', 'water' will - * again be called, this time with #ISC_MEM_LOWATER. - * + * Set high and low water marks for this memory context. + * + * When the memory usage of 'mctx' exceeds 'hiwater', + * '(water)(water_arg, #ISC_MEM_HIWATER)' will be called. 'water' needs to + * call isc_mem_waterack() with #ISC_MEM_HIWATER to acknowlege the state + * change. 'water' may be called multiple times. + * + * When the usage drops below 'lowater', 'water' will again be called, this + * time with #ISC_MEM_LOWATER. 'water' need to calls isc_mem_waterack() with + * #ISC_MEM_LOWATER to acknowlege the change. + * + * static void + * water(void *arg, int mark) { + * struct foo *foo = arg; + * + * LOCK(&foo->marklock); + * if (foo->mark != mark) { + * foo->mark = mark; + * .... + * isc_mem_waterack(foo->mctx, mark); + * } + * UNLOCK(&foo->marklock); + * } * If 'water' is NULL then 'water_arg', 'hi_water' and 'lo_water' are * ignored and the state is reset. * @@ -353,6 +369,12 @@ isc_mem_setwater(isc_mem_t *mctx, isc_mem_water_t water, void *water_arg, */ void +isc_mem_waterack(isc_mem_t *ctx, int mark); +/*%< + * Called to acknowledge changes in signalled by calls to 'water'. + */ + +void isc_mem_printactive(isc_mem_t *mctx, FILE *file); /*%< * Print to 'file' all active memory in 'mctx'. @@ -520,22 +542,22 @@ isc_mempool_setfillcount(isc_mempool_t *mpctx, unsigned int limit); /* * Pseudo-private functions for use via macros. Do not call directly. */ -void * +void * isc__mem_get(isc_mem_t *, size_t _ISC_MEM_FLARG); -void +void isc__mem_putanddetach(isc_mem_t **, void *, size_t _ISC_MEM_FLARG); -void +void isc__mem_put(isc_mem_t *, void *, size_t _ISC_MEM_FLARG); -void * +void * isc__mem_allocate(isc_mem_t *, size_t _ISC_MEM_FLARG); -void +void isc__mem_free(isc_mem_t *, void * _ISC_MEM_FLARG); -char * +char * isc__mem_strdup(isc_mem_t *, const char *_ISC_MEM_FLARG); -void * +void * isc__mempool_get(isc_mempool_t * _ISC_MEM_FLARG); -void +void isc__mempool_put(isc_mempool_t *, void * _ISC_MEM_FLARG); ISC_LANG_ENDDECLS diff --git a/lib/isc/include/isc/msgs.h b/lib/isc/include/isc/msgs.h index 97b2108..0970647 100644 --- a/lib/isc/include/isc/msgs.h +++ b/lib/isc/include/isc/msgs.h @@ -1,8 +1,8 @@ /* - * Copyright (C) 2004, 2005 Internet Systems Consortium, Inc. ("ISC") + * Copyright (C) 2004, 2005, 2008 Internet Systems Consortium, Inc. ("ISC") * Copyright (C) 2000-2003 Internet Software Consortium. * - * Permission to use, copy, modify, and distribute this software for any + * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * @@ -15,7 +15,7 @@ * PERFORMANCE OF THIS SOFTWARE. */ -/* $Id: msgs.h,v 1.9.18.2 2005/04/29 00:16:59 marka Exp $ */ +/* $Id: msgs.h,v 1.9.18.4 2008/08/08 06:27:56 tbox Exp $ */ #ifndef ISC_MSGS_H #define ISC_MSGS_H 1 @@ -57,7 +57,7 @@ /*@{*/ /*! - * Message numbers + * Message numbers * are only required to be unique per message set, * but are unique throughout the entire catalog to not be as confusing when * debugging. @@ -153,7 +153,10 @@ #define ISC_MSG_ACCEPTRETURNED 1418 /*%< accept() returned %d/%s */ #define ISC_MSG_TOOMANYFDS 1419 /*%< %s: too many open file descriptors */ #define ISC_MSG_ZEROPORT 1420 /*%< dropping source port zero packet */ -#define ISC_MSG_FILTER 1420 /*%< setsockopt(SO_ACCEPTFILTER): %s */ +#define ISC_MSG_FILTER 1421 /*%< setsockopt(SO_ACCEPTFILTER): %s */ + +#define ISC_MSG_TOOMANYHANDLES 1422 /*%< %s: too many open WSA event handles: %s */ + #define ISC_MSG_AWAKE 1502 /*%< "awake" */ #define ISC_MSG_WORKING 1503 /*%< "working" */ diff --git a/lib/isc/include/isc/platform.h.in b/lib/isc/include/isc/platform.h.in index 0531edf..afcd4df 100644 --- a/lib/isc/include/isc/platform.h.in +++ b/lib/isc/include/isc/platform.h.in @@ -1,5 +1,5 @@ /* - * Copyright (C) 2004, 2005, 2007 Internet Systems Consortium, Inc. ("ISC") + * Copyright (C) 2004, 2005, 2007, 2008 Internet Systems Consortium, Inc. ("ISC") * Copyright (C) 1999-2003 Internet Software Consortium. * * Permission to use, copy, modify, and/or distribute this software for any @@ -15,7 +15,7 @@ * PERFORMANCE OF THIS SOFTWARE. */ -/* $Id: platform.h.in,v 1.34.18.9 2007/09/13 05:04:01 each Exp $ */ +/* $Id: platform.h.in,v 1.34.18.11 2008/06/24 23:45:55 tbox Exp $ */ #ifndef ISC_PLATFORM_H #define ISC_PLATFORM_H 1 @@ -140,6 +140,21 @@ */ @ISC_PLATFORM_FIXIN6ISADDR@ +/*! \brief + * Define if the system supports kqueue multiplexing + */ +@ISC_PLATFORM_HAVEKQUEUE@ + +/*! \brief + * Define if the system supports epoll multiplexing + */ +@ISC_PLATFORM_HAVEEPOLL@ + +/*! \brief + * Define if the system supports /dev/poll multiplexing + */ +@ISC_PLATFORM_HAVEDEVPOLL@ + /* *** Printing. ***/ @@ -224,19 +239,19 @@ /* * If the "xadd" operation is available on this architecture, - * ISC_PLATFORM_HAVEXADD will be defined. + * ISC_PLATFORM_HAVEXADD will be defined. */ @ISC_PLATFORM_HAVEXADD@ /* * If the "atomic swap" operation is available on this architecture, - * ISC_PLATFORM_HAVEATOMICSTORE" will be defined. + * ISC_PLATFORM_HAVEATOMICSTORE" will be defined. */ @ISC_PLATFORM_HAVEATOMICSTORE@ /* * If the "compare-and-exchange" operation is available on this architecture, - * ISC_PLATFORM_HAVECMPXCHG will be defined. + * ISC_PLATFORM_HAVECMPXCHG will be defined. */ @ISC_PLATFORM_HAVECMPXCHG@ diff --git a/lib/isc/include/isc/portset.h b/lib/isc/include/isc/portset.h new file mode 100644 index 0000000..6396e5c --- /dev/null +++ b/lib/isc/include/isc/portset.h @@ -0,0 +1,141 @@ +/* + * Copyright (C) 2008 Internet Systems Consortium, Inc. ("ISC") + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH + * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY + * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT, + * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM + * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE + * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR + * PERFORMANCE OF THIS SOFTWARE. + */ + +/* $Id: portset.h,v 1.3.4.1 2008/06/24 03:42:10 marka Exp $ */ + +/*! \file isc/portset.h + * \brief Transport Protocol Port Manipuration Module + * + * This module provides simple utilities to handle a set of transport protocol + * (UDP or TCP) port numbers, e.g., for creating an ACL list. An isc_portset_t + * object is an opaque instance of a port set, for which the user can add or + * remove a specific port or a range of consecutive ports. This object is + * expected to be used as a temporary work space only, and does not protect + * simultaneous access from multiple threads. Therefore it must not be stored + * in a place that can be accessed from multiple threads. + */ + +#ifndef ISC_PORTSET_H +#define ISC_PORTSET_H 1 + +/*** + *** Imports + ***/ + +#include <isc/net.h> + +/*** + *** Functions + ***/ + +ISC_LANG_BEGINDECLS + +isc_result_t +isc_portset_create(isc_mem_t *mctx, isc_portset_t **portsetp); +/*%< + * Create a port set and initialize it as an empty set. + * + * Requires: + *\li 'mctx' to be valid. + *\li 'portsetp' to be non NULL and '*portsetp' to be NULL; + * + * Returns: + *\li #ISC_R_SUCCESS + *\li #ISC_R_NOMEMORY + */ + +void +isc_portset_destroy(isc_mem_t *mctx, isc_portset_t **portsetp); +/*%< + * Destroy a port set. + * + * Requires: + *\li 'mctx' to be valid and must be the same context given when the port set + * was created. + *\li '*portsetp' to be a valid set. + */ + +isc_boolean_t +isc_portset_isset(isc_portset_t *portset, in_port_t port); +/*%< + * Test whether the given port is stored in the portset. + * + * Requires: + *\li 'portset' to be a valid set. + * + * Returns + * \li #ISC_TRUE if the port is found, ISC_FALSE otherwise. + */ + +unsigned int +isc_portset_nports(isc_portset_t *portset); +/*%< + * Provides the number of ports stored in the given portset. + * + * Requires: + *\li 'portset' to be a valid set. + * + * Returns + * \li the number of ports stored in portset. + */ + +void +isc_portset_add(isc_portset_t *portset, in_port_t port); +/*%< + * Add the given port to the portset. The port may or may not be stored in + * the portset. + * + * Requires: + *\li 'portlist' to be valid. + */ + +void +isc_portset_remove(isc_portset_t *portset, in_port_t port); +/*%< + * Remove the given port to the portset. The port may or may not be stored in + * the portset. + * + * Requires: + *\li 'portlist' to be valid. + */ + +void +isc_portset_addrange(isc_portset_t *portset, in_port_t port_lo, + in_port_t port_hi); +/*%< + * Add a subset of [port_lo, port_hi] (inclusive) to the portset. Ports in the + * subset may or may not be stored in portset. + * + * Requires: + *\li 'portlist' to be valid. + *\li port_lo <= port_hi + */ + +void +isc_portset_removerange(isc_portset_t *portset, in_port_t port_lo, + in_port_t port_hi); +/*%< + * Subtract a subset of [port_lo, port_hi] (inclusive) from the portset. Ports + * in the subset may or may not be stored in portset. + * + * Requires: + *\li 'portlist' to be valid. + *\li port_lo <= port_hi + */ + +ISC_LANG_ENDDECLS + +#endif /* ISC_NETADDR_H */ diff --git a/lib/isc/include/isc/resource.h b/lib/isc/include/isc/resource.h index 18cd0ad..8c33c89 100644 --- a/lib/isc/include/isc/resource.h +++ b/lib/isc/include/isc/resource.h @@ -15,7 +15,7 @@ * PERFORMANCE OF THIS SOFTWARE. */ -/* $Id: resource.h,v 1.5.18.2.52.2 2008/07/23 23:48:17 tbox Exp $ */ +/* $Id: resource.h,v 1.5.18.4 2008/08/01 23:45:58 tbox Exp $ */ #ifndef ISC_RESOURCE_H #define ISC_RESOURCE_H 1 @@ -82,16 +82,13 @@ isc_resource_getlimit(isc_resource_t resource, isc_resourcevalue_t *value); */ isc_result_t -isc_resource_curlimit(isc_resource_t resource, isc_resourcevalue_t *value); -/* - * Get the current limit on a resource. - * - * Requires: - * 'resource' is a valid member of the isc_resource_t enumeration. +isc_resource_getcurlimit(isc_resource_t resource, isc_resourcevalue_t *value); +/*%< + * Same as isc_resource_getlimit(), but returns the current (soft) limit. * * Returns: - * ISC_R_SUCCESS Success. - * ISC_R_NOTIMPLEMENTED 'resource' is not a type known by the OS. + *\li #ISC_R_SUCCESS Success. + *\li #ISC_R_NOTIMPLEMENTED 'resource' is not a type known by the OS. */ ISC_LANG_ENDDECLS diff --git a/lib/isc/include/isc/socket.h b/lib/isc/include/isc/socket.h index 951a063..a9a22c8 100644 --- a/lib/isc/include/isc/socket.h +++ b/lib/isc/include/isc/socket.h @@ -15,7 +15,7 @@ * PERFORMANCE OF THIS SOFTWARE. */ -/* $Id: socket.h,v 1.57.18.6.46.4 2008/07/23 23:16:43 marka Exp $ */ +/* $Id: socket.h,v 1.57.18.15 2008/09/04 08:03:08 marka Exp $ */ #ifndef ISC_SOCKET_H #define ISC_SOCKET_H 1 @@ -318,8 +318,53 @@ isc_socket_detach(isc_socket_t **socketp); */ isc_result_t +isc_socket_open(isc_socket_t *sock); +/*%< + * Open a new socket file descriptor of the given socket structure. It simply + * opens a new descriptor; all of the other parameters including the socket + * type are inherited from the existing socket. This function is provided to + * avoid overhead of destroying and creating sockets when many short-lived + * sockets are frequently opened and closed. When the efficiency is not an + * issue, it should be safer to detach the unused socket and re-create a new + * one. This optimization may not be available for some systems, in which + * case this function will return ISC_R_NOTIMPLEMENTED and must not be used. + * + * Requires: + * + * \li there must be no other reference to this socket. + * + * \li 'socket' is a valid and previously closed by isc_socket_close() + * + * Returns: + * Same as isc_socket_create(). + * \li ISC_R_NOTIMPLEMENTED + */ + +isc_result_t +isc_socket_close(isc_socket_t *sock); +/*%< + * Close a socket file descriptor of the given socket structure. This function + * is provided as an alternative to destroying an unused socket when overhead + * destroying/re-creating sockets can be significant, and is expected to be + * used with isc_socket_open(). This optimization may not be available for some + * systems, in which case this function will return ISC_R_NOTIMPLEMENTED and + * must not be used. + * + * Requires: + * + * \li The socket must have a valid descriptor. + * + * \li There must be no other reference to this socket. + * + * \li There must be no pending I/O requests. + * + * Returns: + * \li #ISC_R_NOTIMPLEMENTED + */ + +isc_result_t isc_socket_bind(isc_socket_t *sock, isc_sockaddr_t *addressp, - unsigned int options); + unsigned int options); /*%< * Bind 'socket' to '*addressp'. * @@ -644,8 +689,15 @@ isc_socket_sendto2(isc_socket_t *sock, isc_region_t *region, isc_result_t isc_socketmgr_create(isc_mem_t *mctx, isc_socketmgr_t **managerp); + +isc_result_t +isc_socketmgr_create2(isc_mem_t *mctx, isc_socketmgr_t **managerp, + unsigned int maxsocks); /*%< - * Create a socket manager. + * Create a socket manager. If "maxsocks" is non-zero, it specifies the + * maximum number of sockets that the created manager should handle. + * isc_socketmgr_create() is equivalent of isc_socketmgr_create2() with + * "maxsocks" being zero. * * Notes: * @@ -666,6 +718,23 @@ isc_socketmgr_create(isc_mem_t *mctx, isc_socketmgr_t **managerp); *\li #ISC_R_SUCCESS *\li #ISC_R_NOMEMORY *\li #ISC_R_UNEXPECTED + *\li #ISC_R_NOTIMPLEMENTED + */ + +isc_result_t +isc_socketmgr_getmaxsockets(isc_socketmgr_t *manager, unsigned int *nsockp); +/*%< + * Returns in "*nsockp" the maximum number of sockets this manager may open. + * + * Requires: + * + *\li '*manager' is a valid isc_socketmgr_t. + *\li 'nsockp' is not NULL. + * + * Returns: + * + *\li #ISC_R_SUCCESS + *\li #ISC_R_NOTIMPLEMENTED */ void @@ -738,7 +807,7 @@ isc_socket_cleanunix(isc_sockaddr_t *addr, isc_boolean_t active); isc_result_t isc_socket_permunix(isc_sockaddr_t *sockaddr, isc_uint32_t perm, - isc_uint32_t owner, isc_uint32_t group); + isc_uint32_t owner, isc_uint32_t group); /*%< * Set ownership and file permissions on the UNIX domain socket. * diff --git a/lib/isc/include/isc/timer.h b/lib/isc/include/isc/timer.h index 6e78be9..7a7f614 100644 --- a/lib/isc/include/isc/timer.h +++ b/lib/isc/include/isc/timer.h @@ -15,7 +15,7 @@ * PERFORMANCE OF THIS SOFTWARE. */ -/* $Id: timer.h,v 1.31.18.3.52.2 2008/07/24 23:48:09 tbox Exp $ */ +/* $Id: timer.h,v 1.31.18.5 2008/06/24 23:45:55 tbox Exp $ */ #ifndef ISC_TIMER_H #define ISC_TIMER_H 1 diff --git a/lib/isc/include/isc/types.h b/lib/isc/include/isc/types.h index 35a0be7..b501b2c 100644 --- a/lib/isc/include/isc/types.h +++ b/lib/isc/include/isc/types.h @@ -1,8 +1,8 @@ /* - * Copyright (C) 2004, 2005 Internet Systems Consortium, Inc. ("ISC") + * Copyright (C) 2004, 2005, 2008 Internet Systems Consortium, Inc. ("ISC") * Copyright (C) 1999-2003 Internet Software Consortium. * - * Permission to use, copy, modify, and distribute this software for any + * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * @@ -15,7 +15,7 @@ * PERFORMANCE OF THIS SOFTWARE. */ -/* $Id: types.h,v 1.35.18.2 2005/04/29 00:17:04 marka Exp $ */ +/* $Id: types.h,v 1.35.18.4 2008/06/24 23:45:55 tbox Exp $ */ #ifndef ISC_TYPES_H #define ISC_TYPES_H 1 @@ -65,6 +65,7 @@ typedef struct isc_mempool isc_mempool_t; /*%< Memory Pool */ typedef struct isc_msgcat isc_msgcat_t; /*%< Message Catalog */ typedef struct isc_ondestroy isc_ondestroy_t; /*%< On Destroy */ typedef struct isc_netaddr isc_netaddr_t; /*%< Net Address */ +typedef struct isc_portset isc_portset_t; /*%< Port Set */ typedef struct isc_quota isc_quota_t; /*%< Quota */ typedef struct isc_random isc_random_t; /*%< Random */ typedef struct isc_ratelimiter isc_ratelimiter_t; /*%< Rate Limiter */ diff --git a/lib/isc/mem.c b/lib/isc/mem.c index aec6118..408770d 100644 --- a/lib/isc/mem.c +++ b/lib/isc/mem.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2004-2007 Internet Systems Consortium, Inc. ("ISC") + * Copyright (C) 2004-2008 Internet Systems Consortium, Inc. ("ISC") * Copyright (C) 1997-2003 Internet Software Consortium. * * Permission to use, copy, modify, and/or distribute this software for any @@ -15,7 +15,7 @@ * PERFORMANCE OF THIS SOFTWARE. */ -/* $Id: mem.c,v 1.116.18.18 2007/10/30 23:31:43 marka Exp $ */ +/* $Id: mem.c,v 1.116.18.21 2008/02/07 23:45:56 tbox Exp $ */ /*! \file */ @@ -193,7 +193,7 @@ struct isc_mempool { if ((isc_mem_debugging & (ISC_MEM_DEBUGTRACE | \ ISC_MEM_DEBUGRECORD)) != 0 && \ b != NULL) \ - add_trace_entry(a, b, c, d, e); \ + add_trace_entry(a, b, c, d, e); \ } while (0) #define DELETE_TRACE(a, b, c, d, e) delete_trace_entry(a, b, c, d, e) @@ -314,7 +314,7 @@ delete_trace_entry(isc_mem_t *mctx, const void *ptr, unsigned int size, static inline size_t rmsize(size_t size) { /* - * round down to ALIGNMENT_SIZE + * round down to ALIGNMENT_SIZE */ return (size & (~(ALIGNMENT_SIZE - 1))); } @@ -603,7 +603,7 @@ mem_get(isc_mem_t *ctx, size_t size) { ret = (ctx->memalloc)(ctx->arg, size); if (ret == NULL) - ctx->memalloc_failures++; + ctx->memalloc_failures++; #if ISC_MEM_FILL if (ret != NULL) @@ -705,7 +705,7 @@ isc_mem_createx(size_t init_max_size, size_t target_size, { return (isc_mem_createx2(init_max_size, target_size, memalloc, memfree, arg, ctxp, ISC_MEMFLAG_DEFAULT)); - + } isc_result_t @@ -882,7 +882,7 @@ destroy(isc_mem_t *ctx) { dl != NULL; dl = ISC_LIST_HEAD(ctx->debuglist[i])) { ISC_LIST_UNLINK(ctx->debuglist[i], - dl, link); + dl, link); free(dl); } } @@ -907,7 +907,8 @@ destroy(isc_mem_t *ctx) { for (i = 0; i < ctx->basic_table_count; i++) (ctx->memfree)(ctx->arg, ctx->basic_table[i]); (ctx->memfree)(ctx->arg, ctx->freelists); - (ctx->memfree)(ctx->arg, ctx->basic_table); + if (ctx->basic_table != NULL) + (ctx->memfree)(ctx->arg, ctx->basic_table); } ondest = ctx->ondestroy; @@ -1085,7 +1086,6 @@ isc__mem_get(isc_mem_t *ctx, size_t size FLARG) { ADD_TRACE(ctx, ptr, size, file, line); if (ctx->hi_water != 0U && !ctx->hi_called && ctx->inuse > ctx->hi_water) { - ctx->hi_called = ISC_TRUE; call_water = ISC_TRUE; } if (ctx->inuse > ctx->maxinuse) { @@ -1141,10 +1141,8 @@ isc__mem_put(isc_mem_t *ctx, void *ptr, size_t size FLARG) * when the context was pushed over hi_water but then had * isc_mem_setwater() called with 0 for hi_water and lo_water. */ - if (ctx->hi_called && + if (ctx->hi_called && (ctx->inuse < ctx->lo_water || ctx->lo_water == 0U)) { - ctx->hi_called = ISC_FALSE; - if (ctx->water != NULL) call_water = ISC_TRUE; } @@ -1154,6 +1152,18 @@ isc__mem_put(isc_mem_t *ctx, void *ptr, size_t size FLARG) (ctx->water)(ctx->water_arg, ISC_MEM_LOWATER); } +void +isc_mem_waterack(isc_mem_t *ctx, int flag) { + REQUIRE(VALID_CONTEXT(ctx)); + + MCTXLOCK(ctx, &ctx->lock); + if (flag == ISC_MEM_LOWATER) + ctx->hi_called = ISC_FALSE; + else if (flag == ISC_MEM_HIWATER) + ctx->hi_called = ISC_TRUE; + MCTXUNLOCK(ctx, &ctx->lock); +} + #if ISC_MEM_TRACKLINES static void print_active(isc_mem_t *mctx, FILE *out) { @@ -1169,11 +1179,11 @@ print_active(isc_mem_t *mctx, FILE *out) { "memory allocations:\n")); found = ISC_FALSE; format = isc_msgcat_get(isc_msgcat, ISC_MSGSET_MEM, - ISC_MSG_PTRFILELINE, + ISC_MSG_PTRFILELINE, "\tptr %p size %u file %s line %u\n"); for (i = 0; i <= mctx->max_size; i++) { dl = ISC_LIST_HEAD(mctx->debuglist[i]); - + if (dl != NULL) found = ISC_TRUE; @@ -1371,7 +1381,7 @@ isc__mem_free(isc_mem_t *ctx, void *ptr FLARG) { * when the context was pushed over hi_water but then had * isc_mem_setwater() called with 0 for hi_water and lo_water. */ - if (ctx->hi_called && + if (ctx->hi_called && (ctx->inuse < ctx->lo_water || ctx->lo_water == 0U)) { ctx->hi_called = ISC_FALSE; @@ -1461,7 +1471,7 @@ isc_mem_inuse(isc_mem_t *ctx) { void isc_mem_setwater(isc_mem_t *ctx, isc_mem_water_t water, void *water_arg, - size_t hiwater, size_t lowater) + size_t hiwater, size_t lowater) { isc_boolean_t callwater = ISC_FALSE; isc_mem_water_t oldwater; @@ -1492,7 +1502,7 @@ isc_mem_setwater(isc_mem_t *ctx, isc_mem_water_t water, void *water_arg, ctx->hi_called = ISC_FALSE; } MCTXUNLOCK(ctx, &ctx->lock); - + if (callwater && oldwater != NULL) (oldwater)(oldwater_arg, ISC_MEM_LOWATER); } @@ -1931,7 +1941,7 @@ isc_mem_printallactive(FILE *file) { #endif } -void +void isc_mem_checkdestroyed(FILE *file) { RUNTIME_CHECK(isc_once_do(&once, initialize_action) == ISC_R_SUCCESS); diff --git a/lib/isc/portset.c b/lib/isc/portset.c new file mode 100644 index 0000000..0265c89 --- /dev/null +++ b/lib/isc/portset.c @@ -0,0 +1,143 @@ +/* + * Copyright (C) 2008 Internet Systems Consortium, Inc. ("ISC") + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH + * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY + * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT, + * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM + * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE + * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR + * PERFORMANCE OF THIS SOFTWARE. + */ + +/* $Id: portset.c,v 1.2.4.3 2008/06/24 23:27:11 marka Exp $ */ + +/*! \file */ + +#include <config.h> + +#include <isc/mem.h> +#include <isc/portset.h> +#include <isc/string.h> +#include <isc/types.h> +#include <isc/util.h> + +#define ISC_PORTSET_BUFSIZE (65536 / (sizeof(isc_uint32_t) * 8)) + +/*% + * Internal representation of portset. It's an array of 32-bit integers, each + * bit corresponding to a single port in the ascending order. For example, + * the second most significant bit of buf[0] corresponds to port 1. + */ +struct isc_portset { + unsigned int nports; /*%< number of ports in the set */ + isc_uint32_t buf[ISC_PORTSET_BUFSIZE]; +}; + +static inline isc_boolean_t +portset_isset(isc_portset_t *portset, in_port_t port) { + return (ISC_TF((portset->buf[port >> 5] & (1 << (port & 31))) != 0)); +} + +static inline void +portset_add(isc_portset_t *portset, in_port_t port) { + if (!portset_isset(portset, port)) { + portset->nports++; + portset->buf[port >> 5] |= (1 << (port & 31)); + } +} + +static inline void +portset_remove(isc_portset_t *portset, in_port_t port) { + if (portset_isset(portset, port)) { + portset->nports--; + portset->buf[port >> 5] &= ~(1 << (port & 31)); + } +} + +isc_result_t +isc_portset_create(isc_mem_t *mctx, isc_portset_t **portsetp) { + isc_portset_t *portset; + + REQUIRE(portsetp != NULL && *portsetp == NULL); + + portset = isc_mem_get(mctx, sizeof(*portset)); + if (portset == NULL) + return (ISC_R_NOMEMORY); + + /* Make the set 'empty' by default */ + memset(portset, 0, sizeof(*portset)); + *portsetp = portset; + + return (ISC_R_SUCCESS); +} + +void +isc_portset_destroy(isc_mem_t *mctx, isc_portset_t **portsetp) { + isc_portset_t *portset; + + REQUIRE(portsetp != NULL); + portset = *portsetp; + + isc_mem_put(mctx, portset, sizeof(*portset)); +} + +isc_boolean_t +isc_portset_isset(isc_portset_t *portset, in_port_t port) { + REQUIRE(portset != NULL); + + return (portset_isset(portset, port)); +} + +unsigned int +isc_portset_nports(isc_portset_t *portset) { + REQUIRE(portset != NULL); + + return (portset->nports); +} + +void +isc_portset_add(isc_portset_t *portset, in_port_t port) { + REQUIRE(portset != NULL); + + portset_add(portset, port); +} + +void +isc_portset_remove(isc_portset_t *portset, in_port_t port) { + portset_remove(portset, port); +} + +void +isc_portset_addrange(isc_portset_t *portset, in_port_t port_lo, + in_port_t port_hi) +{ + in_port_t p; + + REQUIRE(portset != NULL); + REQUIRE(port_lo <= port_hi); + + p = port_lo; + do { + portset_add(portset, p); + } while (p++ < port_hi); +} + +void +isc_portset_removerange(isc_portset_t *portset, in_port_t port_lo, + in_port_t port_hi) +{ + in_port_t p; + + REQUIRE(portset != NULL); + REQUIRE(port_lo <= port_hi); + + p = port_lo; + do { + portset_remove(portset, p); + } while (p++ < port_hi); +} diff --git a/lib/isc/print.c b/lib/isc/print.c index 59c528b..191ad24 100644 --- a/lib/isc/print.c +++ b/lib/isc/print.c @@ -1,8 +1,8 @@ /* - * Copyright (C) 2004-2006 Internet Systems Consortium, Inc. ("ISC") + * Copyright (C) 2004-2006, 2008 Internet Systems Consortium, Inc. ("ISC") * Copyright (C) 1999-2001, 2003 Internet Software Consortium. * - * Permission to use, copy, modify, and distribute this software for any + * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * @@ -15,7 +15,7 @@ * PERFORMANCE OF THIS SOFTWARE. */ -/* $Id: print.c,v 1.27.18.3 2006/04/17 18:27:33 explorer Exp $ */ +/* $Id: print.c,v 1.27.18.5 2008/02/18 23:46:01 tbox Exp $ */ /*! \file */ @@ -246,8 +246,24 @@ isc_print_vsnprintf(char *str, size_t size, const char *format, va_list ap) { head = ""; tmpui = tmpi; } - sprintf(buf, "%" ISC_PRINT_QUADFORMAT "u", - tmpui); + if (tmpui <= 0xffffffffU) + sprintf(buf, "%lu", + (unsigned long)tmpui); + else { + unsigned long mid; + unsigned long lo; + unsigned long hi; + lo = tmpui % 1000000000; + tmpui /= 1000000000; + mid = tmpui % 1000000000; + hi = tmpui / 1000000000; + if (hi != 0) + sprintf(buf, "%lu", hi); + else + buf[0] = '\n'; + sprintf(buf + strlen(buf), "%lu", mid); + sprintf(buf + strlen(buf), "%lu", lo); + } goto printint; case 'o': if (q) @@ -256,10 +272,29 @@ isc_print_vsnprintf(char *str, size_t size, const char *format, va_list ap) { tmpui = va_arg(ap, long int); else tmpui = va_arg(ap, int); - sprintf(buf, - alt ? "%#" ISC_PRINT_QUADFORMAT "o" - : "%" ISC_PRINT_QUADFORMAT "o", - tmpui); + if (tmpui <= 0xffffffffU) + sprintf(buf, alt ? "%#lo" : "%lo", + (unsigned long)tmpui); + else { + unsigned long mid; + unsigned long lo; + unsigned long hi; + lo = tmpui % 010000000000; + tmpui /= 010000000000; + mid = tmpui % 010000000000; + hi = tmpui / 010000000000; + if (hi != 0) { + sprintf(buf, + alt ? "%#lo" : "%lo", + hi); + sprintf(buf + strlen(buf), + "%lo", mid); + } else + sprintf(buf, + alt ? "%#lo" : "%lo", + mid); + sprintf(buf + strlen(buf), "%lo", lo); + } goto printint; case 'u': if (q) @@ -268,8 +303,24 @@ isc_print_vsnprintf(char *str, size_t size, const char *format, va_list ap) { tmpui = va_arg(ap, unsigned long int); else tmpui = va_arg(ap, unsigned int); - sprintf(buf, "%" ISC_PRINT_QUADFORMAT "u", - tmpui); + if (tmpui <= 0xffffffffU) + sprintf(buf, "%lu", + (unsigned long)tmpui); + else { + unsigned long mid; + unsigned long lo; + unsigned long hi; + lo = tmpui % 1000000000; + tmpui /= 1000000000; + mid = tmpui % 1000000000; + hi = tmpui / 1000000000; + if (hi != 0) + sprintf(buf, "%lu", hi); + else + buf[0] = '\n'; + sprintf(buf + strlen(buf), "%lu", mid); + sprintf(buf + strlen(buf), "%lu", lo); + } goto printint; case 'x': if (q) @@ -283,8 +334,15 @@ isc_print_vsnprintf(char *str, size_t size, const char *format, va_list ap) { if (precision > 2) precision -= 2; } - sprintf(buf, "%" ISC_PRINT_QUADFORMAT "x", - tmpui); + if (tmpui <= 0xffffffffU) + sprintf(buf, "%lx", + (unsigned long)tmpui); + else { + unsigned long hi = tmpui>>32; + unsigned long lo = tmpui & 0xffffffff; + sprintf(buf, "%lx", hi); + sprintf(buf + strlen(buf), "%lx", lo); + } goto printint; case 'X': if (q) @@ -298,8 +356,15 @@ isc_print_vsnprintf(char *str, size_t size, const char *format, va_list ap) { if (precision > 2) precision -= 2; } - sprintf(buf, "%" ISC_PRINT_QUADFORMAT "X", - tmpui); + if (tmpui <= 0xffffffffU) + sprintf(buf, "%lX", + (unsigned long)tmpui); + else { + unsigned long hi = tmpui>>32; + unsigned long lo = tmpui & 0xffffffff; + sprintf(buf, "%lX", hi); + sprintf(buf + strlen(buf), "%lX", lo); + } goto printint; printint: if (precision != 0 || width != 0) { diff --git a/lib/isc/pthreads/mutex.c b/lib/isc/pthreads/mutex.c index 7716980..afbc861 100644 --- a/lib/isc/pthreads/mutex.c +++ b/lib/isc/pthreads/mutex.c @@ -1,8 +1,8 @@ /* - * Copyright (C) 2004, 2005 Internet Systems Consortium, Inc. ("ISC") + * Copyright (C) 2004, 2005, 2008 Internet Systems Consortium, Inc. ("ISC") * Copyright (C) 2000-2002 Internet Software Consortium. * - * Permission to use, copy, modify, and distribute this software for any + * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * @@ -15,7 +15,7 @@ * PERFORMANCE OF THIS SOFTWARE. */ -/* $Id: mutex.c,v 1.8.18.4 2005/07/12 01:22:32 marka Exp $ */ +/* $Id: mutex.c,v 1.8.18.6 2008/04/04 23:46:02 tbox Exp $ */ /*! \file */ @@ -36,23 +36,23 @@ /*% Operations on timevals; adapted from FreeBSD's sys/time.h */ #define timevalclear(tvp) ((tvp)->tv_sec = (tvp)->tv_usec = 0) #define timevaladd(vvp, uvp) \ - do { \ - (vvp)->tv_sec += (uvp)->tv_sec; \ - (vvp)->tv_usec += (uvp)->tv_usec; \ - if ((vvp)->tv_usec >= 1000000) { \ - (vvp)->tv_sec++; \ - (vvp)->tv_usec -= 1000000; \ - } \ - } while (0) + do { \ + (vvp)->tv_sec += (uvp)->tv_sec; \ + (vvp)->tv_usec += (uvp)->tv_usec; \ + if ((vvp)->tv_usec >= 1000000) { \ + (vvp)->tv_sec++; \ + (vvp)->tv_usec -= 1000000; \ + } \ + } while (0) #define timevalsub(vvp, uvp) \ - do { \ - (vvp)->tv_sec -= (uvp)->tv_sec; \ - (vvp)->tv_usec -= (uvp)->tv_usec; \ - if ((vvp)->tv_usec < 0) { \ - (vvp)->tv_sec--; \ - (vvp)->tv_usec += 1000000; \ - } \ - } while (0) + do { \ + (vvp)->tv_sec -= (uvp)->tv_sec; \ + (vvp)->tv_usec -= (uvp)->tv_usec; \ + if ((vvp)->tv_usec < 0) { \ + (vvp)->tv_sec--; \ + (vvp)->tv_usec += 1000000; \ + } \ + } while (0) /*@}*/ @@ -77,8 +77,11 @@ struct isc_mutexstats { isc_mutexlocker_t lockers[ISC_MUTEX_MAX_LOCKERS]; }; -#define TABLESIZE (8 * 1024) -static isc_mutexstats_t stats[TABLESIZE]; +#ifndef ISC_MUTEX_PROFTABLESIZE +#define ISC_MUTEX_PROFTABLESIZE (16 * 1024) +#endif +static isc_mutexstats_t stats[ISC_MUTEX_PROFTABLESIZE]; +static int stats_next = 0; static isc_boolean_t stats_init = ISC_FALSE; static pthread_mutex_t statslock = PTHREAD_MUTEX_INITIALIZER; @@ -95,21 +98,19 @@ isc_mutex_init_profile(isc_mutex_t *mp, const char *file, int line) { RUNTIME_CHECK(pthread_mutex_lock(&statslock) == 0); - if (stats_init == ISC_FALSE) { - for (i = 0; i < TABLESIZE; i++) { - stats[i].file = NULL; - } + if (stats_init == ISC_FALSE) stats_init = ISC_TRUE; - } - mp->stats = NULL; - for (i = 0; i < TABLESIZE; i++) { - if (stats[i].file == NULL) { - mp->stats = &stats[i]; - break; - } - } - RUNTIME_CHECK(mp->stats != NULL); + /* + * If all statistics entries have been used, give up and trigger an + * assertion failure. There would be no other way to deal with this + * because we'd like to keep record of all locks for the purpose of + * debugging and the number of necessary locks is unpredictable. + * If this failure is triggered while debugging, named should be + * rebuilt with an increased ISC_MUTEX_PROFTABLESIZE. + */ + RUNTIME_CHECK(stats_next < ISC_MUTEX_PROFTABLESIZE); + mp->stats = &stats[stats_next++]; RUNTIME_CHECK(pthread_mutex_unlock(&statslock) == 0); @@ -196,10 +197,9 @@ void isc_mutex_statsprofile(FILE *fp) { isc_mutexlocker_t *locker; int i, j; + fprintf(fp, "Mutex stats (in us)\n"); - for (i = 0; i < TABLESIZE; i++) { - if (stats[i].file == NULL) - continue; + for (i = 0; i < stats_next; i++) { fprintf(fp, "%-12s %4d: %10u %lu.%06lu %lu.%06lu\n", stats[i].file, stats[i].line, stats[i].count, stats[i].locked_total.tv_sec, @@ -236,7 +236,7 @@ isc_mutex_init_errcheck(isc_mutex_t *mp) if (pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ERRORCHECK) != 0) return (ISC_R_UNEXPECTED); - + err = pthread_mutex_init(mp, &attr) != 0) if (err == ENOMEM) return (ISC_R_NOMEMORY); @@ -251,6 +251,7 @@ pthread_mutexattr_t isc__mutex_attrs = { }; #endif +#if !(ISC_MUTEX_DEBUG && defined(PTHREAD_MUTEX_ERRORCHECK)) && !ISC_MUTEX_PROFILE isc_result_t isc__mutex_init(isc_mutex_t *mp, const char *file, unsigned int line) { char strbuf[ISC_STRERRORSIZE]; @@ -268,3 +269,4 @@ isc__mutex_init(isc_mutex_t *mp, const char *file, unsigned int line) { } return (result); } +#endif diff --git a/lib/isc/timer.c b/lib/isc/timer.c index d594307..c27281d 100644 --- a/lib/isc/timer.c +++ b/lib/isc/timer.c @@ -15,7 +15,7 @@ * PERFORMANCE OF THIS SOFTWARE. */ -/* $Id: timer.c,v 1.73.18.7.10.3 2008/07/29 18:35:53 jinmei Exp $ */ +/* $Id: timer.c,v 1.73.18.10 2008/08/22 05:59:04 marka Exp $ */ /*! \file */ @@ -225,7 +225,7 @@ schedule(isc_timer_t *timer, isc_time_t *now, isc_boolean_t signal_ok) { "*** POKED TIMER ***"); } } - + if (timer->index == 1 && signal_ok) { XTRACE(isc_msgcat_get(isc_msgcat, ISC_MSGSET_TIMER, ISC_MSG_SIGNALSCHED, diff --git a/lib/isc/unix/app.c b/lib/isc/unix/app.c index b71d766..c119362 100644 --- a/lib/isc/unix/app.c +++ b/lib/isc/unix/app.c @@ -1,8 +1,8 @@ /* - * Copyright (C) 2004, 2005 Internet Systems Consortium, Inc. ("ISC") + * Copyright (C) 2004, 2005, 2008 Internet Systems Consortium, Inc. ("ISC") * Copyright (C) 1999-2003 Internet Software Consortium. * - * Permission to use, copy, modify, and distribute this software for any + * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * @@ -15,7 +15,7 @@ * PERFORMANCE OF THIS SOFTWARE. */ -/* $Id: app.c,v 1.50.18.2.50.1 2008/07/29 04:47:31 each Exp $ */ +/* $Id: app.c,v 1.50.18.8 2008/10/15 03:41:17 marka Exp $ */ /*! \file */ @@ -30,6 +30,9 @@ #include <unistd.h> #include <signal.h> #include <sys/time.h> +#ifdef HAVE_EPOLL +#include <sys/epoll.h> +#endif #include <isc/app.h> #include <isc/boolean.h> @@ -59,11 +62,11 @@ static isc_boolean_t running = ISC_FALSE; /*! * We assume that 'want_shutdown' can be read and written atomically. */ -static isc_boolean_t want_shutdown = ISC_FALSE; +static volatile isc_boolean_t want_shutdown = ISC_FALSE; /* * We assume that 'want_reload' can be read and written atomically. */ -static isc_boolean_t want_reload = ISC_FALSE; +static volatile isc_boolean_t want_reload = ISC_FALSE; static isc_boolean_t blocked = ISC_FALSE; #ifdef ISC_PLATFORM_USETHREADS @@ -87,13 +90,13 @@ static pthread_t main_thread; #ifndef HAVE_SIGWAIT static void exit_action(int arg) { - UNUSED(arg); + UNUSED(arg); want_shutdown = ISC_TRUE; } static void reload_action(int arg) { - UNUSED(arg); + UNUSED(arg); want_reload = ISC_TRUE; } #endif @@ -297,14 +300,13 @@ isc_app_onrun(isc_mem_t *mctx, isc_task_t *task, isc_taskaction_t action, * Event loop for nonthreaded programs. */ static isc_result_t -evloop() { +evloop(void) { isc_result_t result; while (!want_shutdown) { int n; isc_time_t when, now; struct timeval tv, *tvp; - fd_set *readfds, *writefds; - int maxfd; + isc_socketwait_t *swait; isc_boolean_t readytasks; isc_boolean_t call_timer_dispatch = ISC_FALSE; @@ -331,15 +333,15 @@ evloop() { } } - isc__socketmgr_getfdsets(&readfds, &writefds, &maxfd); - n = select(maxfd, readfds, writefds, NULL, tvp); + swait = NULL; + n = isc__socketmgr_waitevents(tvp, &swait); if (n == 0 || call_timer_dispatch) { /* * We call isc__timermgr_dispatch() only when * necessary, in order to reduce overhead. If the * select() call indicates a timeout, we need the - * dispatch. Even if not, if we set the 0-timeout + * dispatch. Even if not, if we set the 0-timeout * for the select() call, we need to check the timer * events. In the 'readytasks' case, there may be no * timeout event actually, but there is no other way @@ -352,8 +354,7 @@ evloop() { isc__timermgr_dispatch(); } if (n > 0) - (void)isc__socketmgr_dispatch(readfds, writefds, - maxfd); + (void)isc__socketmgr_dispatch(swait); (void)isc__taskmgr_dispatch(); if (want_reload) { @@ -423,7 +424,7 @@ isc__nothread_signal_hack(isc_condition_t *cp) { signalled = ISC_TRUE; return (ISC_R_SUCCESS); } - + #endif /* ISC_PLATFORM_USETHREADS */ isc_result_t @@ -434,10 +435,10 @@ isc_app_run(void) { #ifdef ISC_PLATFORM_USETHREADS sigset_t sset; char strbuf[ISC_STRERRORSIZE]; -#endif /* ISC_PLATFORM_USETHREADS */ #ifdef HAVE_SIGWAIT int sig; #endif +#endif /* ISC_PLATFORM_USETHREADS */ #ifdef HAVE_LINUXTHREADS REQUIRE(main_thread == pthread_self()); @@ -676,7 +677,7 @@ isc_app_unblock(void) { REQUIRE(blockedthread == pthread_self()); RUNTIME_CHECK(sigemptyset(&sset) == 0 && - sigaddset(&sset, SIGINT) == 0 && + sigaddset(&sset, SIGINT) == 0 && sigaddset(&sset, SIGTERM) == 0); RUNTIME_CHECK(pthread_sigmask(SIG_BLOCK, &sset, NULL) == 0); #endif /* ISC_PLATFORM_USETHREADS */ diff --git a/lib/isc/unix/include/isc/net.h b/lib/isc/unix/include/isc/net.h index bdd8c14..948e7b1 100644 --- a/lib/isc/unix/include/isc/net.h +++ b/lib/isc/unix/include/isc/net.h @@ -1,8 +1,8 @@ /* - * Copyright (C) 2004, 2005 Internet Systems Consortium, Inc. ("ISC") + * Copyright (C) 2004, 2005, 2008 Internet Systems Consortium, Inc. ("ISC") * Copyright (C) 1999-2003 Internet Software Consortium. * - * Permission to use, copy, modify, and distribute this software for any + * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * @@ -15,7 +15,7 @@ * PERFORMANCE OF THIS SOFTWARE. */ -/* $Id: net.h,v 1.39.18.4 2005/04/27 05:02:37 sra Exp $ */ +/* $Id: net.h,v 1.39.18.6 2008/06/24 23:45:55 tbox Exp $ */ #ifndef ISC_NET_H #define ISC_NET_H 1 @@ -104,7 +104,7 @@ /*% * Required for some pre RFC2133 implementations. * IN6ADDR_ANY_INIT and IN6ADDR_LOOPBACK_INIT were added in - * draft-ietf-ipngwg-bsd-api-04.txt or draft-ietf-ipngwg-bsd-api-05.txt. + * draft-ietf-ipngwg-bsd-api-04.txt or draft-ietf-ipngwg-bsd-api-05.txt. * If 's6_addr' is defined then assume that there is a union and three * levels otherwise assume two levels required. */ @@ -202,7 +202,7 @@ extern const struct in6_addr isc_net_in6addrloop; #ifdef ISC_PLATFORM_FIXIN6ISADDR #undef IN6_IS_ADDR_GEOGRAPHIC -/*! +/*! * \brief * Fix UnixWare 7.1.1's broken IN6_IS_ADDR_* definitions. */ @@ -324,6 +324,23 @@ isc_net_probeunix(void); * Returns whether UNIX domain sockets are supported. */ +isc_result_t +isc_net_getudpportrange(int af, in_port_t *low, in_port_t *high); +/*%< + * Returns system's default range of ephemeral UDP ports, if defined. + * If the range is not available or unknown, ISC_NET_PORTRANGELOW and + * ISC_NET_PORTRANGEHIGH will be returned. + * + * Requires: + * + *\li 'low' and 'high' must be non NULL. + * + * Returns: + * + *\li *low and *high will be the ports specifying the low and high ends of + * the range. + */ + #ifdef ISC_PLATFORM_NEEDNTOP const char * isc_net_ntop(int af, const void *src, char *dst, size_t size); diff --git a/lib/isc/unix/net.c b/lib/isc/unix/net.c index 1de6b32..600ac92 100644 --- a/lib/isc/unix/net.c +++ b/lib/isc/unix/net.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2004, 2005, 2007 Internet Systems Consortium, Inc. ("ISC") + * Copyright (C) 2004, 2005, 2007, 2008 Internet Systems Consortium, Inc. ("ISC") * Copyright (C) 1999-2003 Internet Software Consortium. * * Permission to use, copy, modify, and/or distribute this software for any @@ -15,10 +15,19 @@ * PERFORMANCE OF THIS SOFTWARE. */ -/* $Id: net.c,v 1.29.18.6 2007/09/13 23:46:26 tbox Exp $ */ +/* $Id: net.c,v 1.29.18.9 2008/07/04 05:52:05 each Exp $ */ #include <config.h> +#include <sys/types.h> + +#if defined(HAVE_SYS_SYSCTL_H) +#if defined(HAVE_SYS_PARAM_H) +#include <sys/param.h> +#endif +#include <sys/sysctl.h> +#endif + #include <errno.h> #include <unistd.h> @@ -30,6 +39,59 @@ #include <isc/string.h> #include <isc/util.h> +/*% + * Definitions about UDP port range specification. This is a total mess of + * portability variants: some use sysctl (but the sysctl names vary), some use + * system-specific interfaces, some have the same interface for IPv4 and IPv6, + * some separate them, etc... + */ + +/*% + * The last resort defaults: use all non well known port space + */ +#ifndef ISC_NET_PORTRANGELOW +#define ISC_NET_PORTRANGELOW 1024 +#endif /* ISC_NET_PORTRANGELOW */ +#ifndef ISC_NET_PORTRANGEHIGH +#define ISC_NET_PORTRANGEHIGH 65535 +#endif /* ISC_NET_PORTRANGEHIGH */ + +#ifdef HAVE_SYSCTLBYNAME + +/*% + * sysctl variants + */ +#if defined(__FreeBSD__) || defined(__APPLE__) || defined(__DragonFly__) +#define USE_SYSCTL_PORTRANGE +#define SYSCTL_V4PORTRANGE_LOW "net.inet.ip.portrange.hifirst" +#define SYSCTL_V4PORTRANGE_HIGH "net.inet.ip.portrange.hilast" +#define SYSCTL_V6PORTRANGE_LOW "net.inet.ip.portrange.hifirst" +#define SYSCTL_V6PORTRANGE_HIGH "net.inet.ip.portrange.hilast" +#endif + +#ifdef __NetBSD__ +#define USE_SYSCTL_PORTRANGE +#define SYSCTL_V4PORTRANGE_LOW "net.inet.ip.anonportmin" +#define SYSCTL_V4PORTRANGE_HIGH "net.inet.ip.anonportmax" +#define SYSCTL_V6PORTRANGE_LOW "net.inet6.ip6.anonportmin" +#define SYSCTL_V6PORTRANGE_HIGH "net.inet6.ip6.anonportmax" +#endif + +#else /* !HAVE_SYSCTLBYNAME */ + +#ifdef __OpenBSD__ +#define USE_SYSCTL_PORTRANGE +#define SYSCTL_V4PORTRANGE_LOW { CTL_NET, PF_INET, IPPROTO_IP, \ + IPCTL_IPPORT_HIFIRSTAUTO } +#define SYSCTL_V4PORTRANGE_HIGH { CTL_NET, PF_INET, IPPROTO_IP, \ + IPCTL_IPPORT_HILASTAUTO } +/* Same for IPv6 */ +#define SYSCTL_V6PORTRANGE_LOW SYSCTL_V4PORTRANGE_LOW +#define SYSCTL_V6PORTRANGE_HIGH SYSCTL_V4PORTRANGE_HIGH +#endif + +#endif /* HAVE_SYSCTLBYNAME */ + #if defined(ISC_PLATFORM_HAVEIPV6) # if defined(ISC_PLATFORM_NEEDIN6ADDRANY) const struct in6_addr isc_net_in6addrany = IN6ADDR_ANY_INIT; @@ -338,6 +400,101 @@ isc_net_probe_ipv6pktinfo(void) { return (ipv6pktinfo_result); } +#if defined(USE_SYSCTL_PORTRANGE) +#if defined(HAVE_SYSCTLBYNAME) +static isc_result_t +getudpportrange_sysctl(int af, in_port_t *low, in_port_t *high) { + int port_low, port_high; + size_t portlen; + const char *sysctlname_lowport, *sysctlname_hiport; + + if (af == AF_INET) { + sysctlname_lowport = SYSCTL_V4PORTRANGE_LOW; + sysctlname_hiport = SYSCTL_V4PORTRANGE_HIGH; + } else { + sysctlname_lowport = SYSCTL_V6PORTRANGE_LOW; + sysctlname_hiport = SYSCTL_V6PORTRANGE_HIGH; + } + portlen = sizeof(portlen); + if (sysctlbyname(sysctlname_lowport, &port_low, &portlen, + NULL, 0) < 0) { + return (ISC_R_FAILURE); + } + portlen = sizeof(portlen); + if (sysctlbyname(sysctlname_hiport, &port_high, &portlen, + NULL, 0) < 0) { + return (ISC_R_FAILURE); + } + if ((port_low & ~0xffff) != 0 || (port_high & ~0xffff) != 0) + return (ISC_R_RANGE); + + *low = (in_port_t)port_low; + *high = (in_port_t)port_high; + + return (ISC_R_SUCCESS); +} +#else /* !HAVE_SYSCTLBYNAME */ +static isc_result_t +getudpportrange_sysctl(int af, in_port_t *low, in_port_t *high) { + int mib_lo4[4] = SYSCTL_V4PORTRANGE_LOW; + int mib_hi4[4] = SYSCTL_V4PORTRANGE_HIGH; + int mib_lo6[4] = SYSCTL_V6PORTRANGE_LOW; + int mib_hi6[4] = SYSCTL_V6PORTRANGE_HIGH; + int *mib_lo, *mib_hi, miblen; + int port_low, port_high; + size_t portlen; + + if (af == AF_INET) { + mib_lo = mib_lo4; + mib_hi = mib_hi4; + miblen = sizeof(mib_lo4) / sizeof(mib_lo4[0]); + } else { + mib_lo = mib_lo6; + mib_hi = mib_hi6; + miblen = sizeof(mib_lo6) / sizeof(mib_lo6[0]); + } + + portlen = sizeof(portlen); + if (sysctl(mib_lo, miblen, &port_low, &portlen, NULL, 0) < 0) { + return (ISC_R_FAILURE); + } + + portlen = sizeof(portlen); + if (sysctl(mib_hi, miblen, &port_high, &portlen, NULL, 0) < 0) { + return (ISC_R_FAILURE); + } + + if ((port_low & ~0xffff) != 0 || (port_high & ~0xffff) != 0) + return (ISC_R_RANGE); + + *low = (in_port_t) port_low; + *high = (in_port_t) port_high; + + return (ISC_R_SUCCESS); +} +#endif /* HAVE_SYSCTLBYNAME */ +#endif /* USE_SYSCTL_PORTRANGE */ + +isc_result_t +isc_net_getudpportrange(int af, in_port_t *low, in_port_t *high) { + int result = ISC_R_FAILURE; + + REQUIRE(low != NULL && high != NULL); + +#if defined(USE_SYSCTL_PORTRANGE) + result = getudpportrange_sysctl(af, low, high); +#else + UNUSED(af); +#endif + + if (result != ISC_R_SUCCESS) { + *low = ISC_NET_PORTRANGELOW; + *high = ISC_NET_PORTRANGEHIGH; + } + + return (ISC_R_SUCCESS); /* we currently never fail in this function */ +} + void isc_net_disableipv4(void) { initialize(); diff --git a/lib/isc/unix/resource.c b/lib/isc/unix/resource.c index 0264976..e9bc5fd 100644 --- a/lib/isc/unix/resource.c +++ b/lib/isc/unix/resource.c @@ -15,7 +15,7 @@ * PERFORMANCE OF THIS SOFTWARE. */ -/* $Id: resource.c,v 1.12.944.4 2008/07/28 22:44:46 marka Exp $ */ +/* $Id: resource.c,v 1.12.18.6 2008/08/05 07:17:05 marka Exp $ */ #include <config.h> @@ -32,7 +32,7 @@ #include <linux/fs.h> /* To get the large NR_OPEN. */ #endif -#ifdef __hpux +#if defined(__hpux) && defined(HAVE_SYS_DYNTUNE_H) #include <sys/dyntune.h> #endif @@ -48,13 +48,13 @@ resource2rlim(isc_resource_t resource, int *rlim_resource) { break; case isc_resource_cputime: *rlim_resource = RLIMIT_CPU; - break; + break; case isc_resource_datasize: *rlim_resource = RLIMIT_DATA; - break; + break; case isc_resource_filesize: *rlim_resource = RLIMIT_FSIZE; - break; + break; case isc_resource_lockedmemory: #ifdef RLIMIT_MEMLOCK *rlim_resource = RLIMIT_MEMLOCK; @@ -87,7 +87,7 @@ resource2rlim(isc_resource_t resource, int *rlim_resource) { *rlim_resource = RLIMIT_STACK; break; default: - /* + /* * This test is not very robust if isc_resource_t * changes, but generates a clear assertion message. */ @@ -140,51 +140,6 @@ isc_resource_setlimit(isc_resource_t resource, isc_resourcevalue_t value) { rlim_value = value; } - /* - * The BIND 8 documentation reports: - * - * Note: on some operating systems the server cannot set an - * unlimited value and cannot determine the maximum number of - * open files the kernel can support. On such systems, choosing - * unlimited will cause the server to use the larger of the - * rlim_max for RLIMIT_NOFILE and the value returned by - * sysconf(_SC_OPEN_MAX). If the actual kernel limit is larger - * than this value, use limit files to specify the limit - * explicitly. - * - * The CHANGES for 8.1.2-T3A also mention: - * - * 352. [bug] Because of problems with setting an infinite - * rlim_max for RLIMIT_NOFILE on some systems, previous versions - * of the server implemented "limit files unlimited" by setting - * the limit to the value returned by sysconf(_SC_OPEN_MAX). The - * server will now use RLIM_INFINITY on systems which allow it. - * - * At some point the BIND 8 server stopped using SC_OPEN_MAX for this - * purpose at all, but it isn't clear to me when or why, as my access - * to the CVS archive is limited at the time of this writing. What - * BIND 8 *does* do is to set RLIMIT_NOFILE to either RLIMIT_INFINITY - * on a half dozen operating systems or to FD_SETSIZE on the rest, - * the latter of which is probably fewer than the real limit. (Note - * that libisc's socket module will have problems with any fd over - * FD_SETSIZE. This should be fixed in the socket module, not a - * limitation here. BIND 8's eventlib also has a problem, making - * its RLIMIT_INFINITY setting useless, because it closes and ignores - * any fd over FD_SETSIZE.) - * - * More troubling is the reference to some operating systems not being - * able to set an unlimited value for the number of open files. I'd - * hate to put in code that is really only there to support archaic - * systems that the rest of libisc won't work on anyway. So what this - * extremely verbose comment is here to say is the following: - * - * I'm aware there might be an issue with not limiting the value - * for RLIMIT_NOFILE on some systems, but since I don't know yet - * what those systems are and what the best workaround is (use - * sysconf()? rlim_max from getrlimit()? FD_SETSIZE?) so nothing - * is currently being done to clamp the value for open files. - */ - rl.rlim_cur = rl.rlim_max = rlim_value; unixresult = setrlimit(unixresource, &rl); @@ -215,7 +170,7 @@ isc_resource_setlimit(isc_resource_t resource, isc_resourcevalue_t value) { if (unixresult == 0) return (ISC_R_SUCCESS); } -#elif defined(__hpux) +#elif defined(__hpux) && defined(HAVE_SYS_DYNTUNE_H) if (resource == isc_resource_openfiles && rlim_value == RLIM_INFINITY) { uint64_t maxfiles; if (gettune("maxfiles_lim", &maxfiles) == 0) { @@ -255,7 +210,7 @@ isc_resource_getlimit(isc_resource_t resource, isc_resourcevalue_t *value) { } isc_result_t -isc_resource_curlimit(isc_resource_t resource, isc_resourcevalue_t *value) { +isc_resource_getcurlimit(isc_resource_t resource, isc_resourcevalue_t *value) { int unixresult; int unixresource; struct rlimit rl; diff --git a/lib/isc/unix/socket.c b/lib/isc/unix/socket.c index 1b4da78..3239614 100644 --- a/lib/isc/unix/socket.c +++ b/lib/isc/unix/socket.c @@ -15,7 +15,7 @@ * PERFORMANCE OF THIS SOFTWARE. */ -/* $Id: socket.c,v 1.237.18.29.10.6 2008/07/29 04:47:31 each Exp $ */ +/* $Id: socket.c,v 1.237.18.56 2008/11/12 03:58:36 marka Exp $ */ /*! \file */ @@ -25,9 +25,6 @@ #include <sys/types.h> #include <sys/socket.h> #include <sys/stat.h> -#ifdef ISC_PLATFORM_HAVESYSUNH -#include <sys/un.h> -#endif #include <sys/time.h> #include <sys/uio.h> @@ -58,6 +55,19 @@ #include <isc/thread.h> #include <isc/util.h> +#ifdef ISC_PLATFORM_HAVESYSUNH +#include <sys/un.h> +#endif +#ifdef ISC_PLATFORM_HAVEKQUEUE +#include <sys/event.h> +#endif +#ifdef ISC_PLATFORM_HAVEEPOLL +#include <sys/epoll.h> +#endif +#ifdef ISC_PLATFORM_HAVEDEVPOLL +#include <sys/devpoll.h> +#endif + #include "errno2result.h" #ifndef ISC_PLATFORM_USETHREADS @@ -65,9 +75,46 @@ #endif /* ISC_PLATFORM_USETHREADS */ /*% - * Max number of open sockets. In the vast majority of cases the default size - * of FD_SETSIZE should be fine, and this constant should be increased only - * when absolutely necessary and possible, i.e., the server is exhausting all + * Choose the most preferable multiplex method. + */ +#ifdef ISC_PLATFORM_HAVEKQUEUE +#define USE_KQUEUE +#elif defined (ISC_PLATFORM_HAVEEPOLL) +#define USE_EPOLL +#elif defined (ISC_PLATFORM_HAVEDEVPOLL) +#define USE_DEVPOLL +typedef struct { + unsigned int want_read : 1, + want_write : 1; +} pollinfo_t; +#else +#define USE_SELECT +#endif /* ISC_PLATFORM_HAVEKQUEUE */ + +#ifndef ISC_PLATFORM_USETHREADS +#if defined(USE_KQUEUE) || defined(USE_EPOLL) || defined(USE_DEVPOLL) +struct isc_socketwait { + int nevents; +}; +#elif defined (USE_SELECT) +struct isc_socketwait { + fd_set *readset; + fd_set *writeset; + int nfds; + int maxfd; +}; +#endif /* USE_KQUEUE */ +#endif /* !ISC_PLATFORM_USETHREADS */ + +/*% + * Maximum number of allowable open sockets. This is also the maximum + * allowable socket file descriptor. + * + * Care should be taken before modifying this value for select(): + * The API standard doesn't ensure select() accept more than (the system default + * of) FD_SETSIZE descriptors, and the default size should in fact be fine in + * the vast majority of cases. This constant should therefore be increased only + * when absolutely necessary and possible, i.e., the server is exhausting all * available file descriptors (up to FD_SETSIZE) and the select() function * and FD_xxx macros support larger values than FD_SETSIZE (which may not * always by true, but we keep using some of them to ensure as much @@ -78,18 +125,72 @@ * As a special note, this value shouldn't have to be touched if * this is a build for an authoritative only DNS server. */ - -#ifndef ISC_SOCKET_FDSETSIZE -#define ISC_SOCKET_FDSETSIZE FD_SETSIZE -#endif - +#ifndef ISC_SOCKET_MAXSOCKETS +#if defined(USE_KQUEUE) || defined(USE_EPOLL) || defined(USE_DEVPOLL) +#define ISC_SOCKET_MAXSOCKETS 4096 +#elif defined(USE_SELECT) +#define ISC_SOCKET_MAXSOCKETS FD_SETSIZE +#endif /* USE_KQUEUE... */ +#endif /* ISC_SOCKET_MAXSOCKETS */ + +#ifdef USE_SELECT /*% - * Mac OS X needs a special definition to support larger values in select() + * Mac OS X needs a special definition to support larger values in select(). + * We always define this because a larger value can be specified run-time. */ -#if ISC_SOCKET_FDSETSIZE > FD_SETSIZE #ifdef __APPLE__ #define _DARWIN_UNLIMITED_SELECT #endif /* __APPLE__ */ +#endif /* USE_SELECT */ + +#ifdef ISC_SOCKET_USE_POLLWATCH +/*% + * If this macro is defined, enable workaround for a Solaris /dev/poll kernel + * bug: DP_POLL ioctl could keep sleeping even if socket I/O is possible for + * some of the specified FD. The idea is based on the observation that it's + * likely for a busy server to keep receiving packets. It specifically works + * as follows: the socket watcher is first initialized with the state of + * "poll_idle". While it's in the idle state it keeps sleeping until a socket + * event occurs. When it wakes up for a socket I/O event, it moves to the + * poll_active state, and sets the poll timeout to a short period + * (ISC_SOCKET_POLLWATCH_TIMEOUT msec). If timeout occurs in this state, the + * watcher goes to the poll_checking state with the same timeout period. + * In this state, the watcher tries to detect whether this is a break + * during intermittent events or the kernel bug is triggered. If the next + * polling reports an event within the short period, the previous timeout is + * likely to be a kernel bug, and so the watcher goes back to the active state. + * Otherwise, it moves to the idle state again. + * + * It's not clear whether this is a thread-related bug, but since we've only + * seen this with threads, this workaround is used only when enabling threads. + */ + +typedef enum { poll_idle, poll_active, poll_checking } pollstate_t; + +#ifndef ISC_SOCKET_POLLWATCH_TIMEOUT +#define ISC_SOCKET_POLLWATCH_TIMEOUT 10 +#endif /* ISC_SOCKET_POLLWATCH_TIMEOUT */ +#endif /* ISC_SOCKET_USE_POLLWATCH */ + +/*% + * Size of per-FD lock buckets. + */ +#ifdef ISC_PLATFORM_USETHREADS +#define FDLOCK_COUNT 1024 +#define FDLOCK_ID(fd) ((fd) % FDLOCK_COUNT) +#else +#define FDLOCK_COUNT 1 +#define FDLOCK_ID(fd) 0 +#endif /* ISC_PLATFORM_USETHREADS */ + +/*% + * Maximum number of events communicated with the kernel. There should normally + * be no need for having a large number. + */ +#if defined(USE_KQUEUE) || defined(USE_EPOLL) || defined(USE_DEVPOLL) +#ifndef ISC_SOCKET_MAXEVENTS +#define ISC_SOCKET_MAXEVENTS 64 +#endif #endif /*% @@ -230,22 +331,50 @@ struct isc_socketmgr { unsigned int magic; isc_mem_t *mctx; isc_mutex_t lock; + isc_mutex_t *fdlock; +#ifdef USE_KQUEUE + int kqueue_fd; + int nevents; + struct kevent *events; +#endif /* USE_KQUEUE */ +#ifdef USE_EPOLL + int epoll_fd; + int nevents; + struct epoll_event *events; +#endif /* USE_EPOLL */ +#ifdef USE_DEVPOLL + int devpoll_fd; + int nevents; + struct pollfd *events; +#endif /* USE_DEVPOLL */ +#ifdef USE_SELECT int fd_bufsize; - int fdsize; +#endif /* USE_SELECT */ + unsigned int maxsocks; +#ifdef ISC_PLATFORM_USETHREADS + int pipe_fds[2]; +#endif + + /* Locked by fdlock. */ + isc_socket_t **fds; + int *fdstate; +#ifdef USE_DEVPOLL + pollinfo_t *fdpollinfo; +#endif + /* Locked by manager lock. */ ISC_LIST(isc_socket_t) socklist; +#ifdef USE_SELECT fd_set *read_fds; fd_set *read_fds_copy; fd_set *write_fds; fd_set *write_fds_copy; - isc_socket_t **fds; - int *fdstate; int maxfd; - int reserved; /* unlocked */ +#endif /* USE_SELECT */ + int reserved; /* unlocked */ #ifdef ISC_PLATFORM_USETHREADS isc_thread_t watcher; isc_condition_t shutdown_ok; - int pipe_fds[2]; #else /* ISC_PLATFORM_USETHREADS */ unsigned int refs; #endif /* ISC_PLATFORM_USETHREADS */ @@ -284,8 +413,9 @@ static void build_msghdr_send(isc_socket_t *, isc_socketevent_t *, struct msghdr *, struct iovec *, size_t *); static void build_msghdr_recv(isc_socket_t *, isc_socketevent_t *, struct msghdr *, struct iovec *, size_t *); -static void cleanup_fdsets(isc_socketmgr_t *, isc_mem_t *); -static isc_result_t create_fdsets(isc_socketmgr_t *, isc_mem_t *); +#ifdef ISC_PLATFORM_USETHREADS +static isc_boolean_t process_ctlfd(isc_socketmgr_t *manager); +#endif #define SELECT_POKE_SHUTDOWN (-1) #define SELECT_POKE_NOTHING (-2) @@ -354,9 +484,195 @@ socket_log(isc_socket_t *sock, isc_sockaddr_t *address, } } +#if defined(_AIX) && defined(ISC_NET_BSD44MSGHDR) && \ + defined(USE_CMSG) && defined(IPV6_RECVPKTINFO) +/* + * AIX has a kernel bug where IPV6_RECVPKTINFO gets cleared by + * setting IPV6_V6ONLY. + */ +static void +FIX_IPV6_RECVPKTINFO(isc_socket_t *sock) +{ + char strbuf[ISC_STRERRORSIZE]; + int on = 1; + + if (sock->pf != AF_INET6 || sock->type != isc_sockettype_udp) + return; + + if (setsockopt(sock->fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, + (void *)&on, sizeof(on)) < 0) { + + UNEXPECTED_ERROR(__FILE__, __LINE__, + "setsockopt(%d, IPV6_RECVPKTINFO) " + "%s: %s", sock->fd, + isc_msgcat_get(isc_msgcat, + ISC_MSGSET_GENERAL, + ISC_MSG_FAILED, + "failed"), + strbuf); + } +} +#else +#define FIX_IPV6_RECVPKTINFO(sock) (void)0 +#endif + +static inline isc_result_t +watch_fd(isc_socketmgr_t *manager, int fd, int msg) { + isc_result_t result = ISC_R_SUCCESS; + +#ifdef USE_KQUEUE + struct kevent evchange; + + memset(&evchange, 0, sizeof(evchange)); + if (msg == SELECT_POKE_READ) + evchange.filter = EVFILT_READ; + else + evchange.filter = EVFILT_WRITE; + evchange.flags = EV_ADD; + evchange.ident = fd; + if (kevent(manager->kqueue_fd, &evchange, 1, NULL, 0, NULL) != 0) + result = isc__errno2result(errno); + + return (result); +#elif defined(USE_EPOLL) + struct epoll_event event; + + if (msg == SELECT_POKE_READ) + event.events = EPOLLIN; + else + event.events = EPOLLOUT; + event.data.fd = fd; + if (epoll_ctl(manager->epoll_fd, EPOLL_CTL_ADD, fd, &event) == -1 && + errno != EEXIST) { + result = isc__errno2result(errno); + } + + return (result); +#elif defined(USE_DEVPOLL) + struct pollfd pfd; + int lockid = FDLOCK_ID(fd); + + memset(&pfd, 0, sizeof(pfd)); + if (msg == SELECT_POKE_READ) + pfd.events = POLLIN; + else + pfd.events = POLLOUT; + pfd.fd = fd; + pfd.revents = 0; + LOCK(&manager->fdlock[lockid]); + if (write(manager->devpoll_fd, &pfd, sizeof(pfd)) == -1) + result = isc__errno2result(errno); + else { + if (msg == SELECT_POKE_READ) + manager->fdpollinfo[fd].want_read = 1; + else + manager->fdpollinfo[fd].want_write = 1; + } + UNLOCK(&manager->fdlock[lockid]); + + return (result); +#elif defined(USE_SELECT) + LOCK(&manager->lock); + if (msg == SELECT_POKE_READ) + FD_SET(fd, manager->read_fds); + if (msg == SELECT_POKE_WRITE) + FD_SET(fd, manager->write_fds); + UNLOCK(&manager->lock); + + return (result); +#endif +} + +static inline isc_result_t +unwatch_fd(isc_socketmgr_t *manager, int fd, int msg) { + isc_result_t result = ISC_R_SUCCESS; + +#ifdef USE_KQUEUE + struct kevent evchange; + + memset(&evchange, 0, sizeof(evchange)); + if (msg == SELECT_POKE_READ) + evchange.filter = EVFILT_READ; + else + evchange.filter = EVFILT_WRITE; + evchange.flags = EV_DELETE; + evchange.ident = fd; + if (kevent(manager->kqueue_fd, &evchange, 1, NULL, 0, NULL) != 0) + result = isc__errno2result(errno); + + return (result); +#elif defined(USE_EPOLL) + struct epoll_event event; + + if (msg == SELECT_POKE_READ) + event.events = EPOLLIN; + else + event.events = EPOLLOUT; + event.data.fd = fd; + if (epoll_ctl(manager->epoll_fd, EPOLL_CTL_DEL, fd, &event) == -1 && + errno != ENOENT) { + char strbuf[ISC_STRERRORSIZE]; + isc__strerror(errno, strbuf, sizeof(strbuf)); + UNEXPECTED_ERROR(__FILE__, __LINE__, + "epoll_ctl(DEL), %d: %s", fd, strbuf); + result = ISC_R_UNEXPECTED; + } + return (result); +#elif defined(USE_DEVPOLL) + struct pollfd pfds[2]; + size_t writelen = sizeof(pfds[0]); + int lockid = FDLOCK_ID(fd); + + memset(pfds, 0, sizeof(pfds)); + pfds[0].events = POLLREMOVE; + pfds[0].fd = fd; + + /* + * Canceling read or write polling via /dev/poll is tricky. Since it + * only provides a way of canceling per FD, we may need to re-poll the + * socket for the other operation. + */ + LOCK(&manager->fdlock[lockid]); + if (msg == SELECT_POKE_READ && + manager->fdpollinfo[fd].want_write == 1) { + pfds[1].events = POLLOUT; + pfds[1].fd = fd; + writelen += sizeof(pfds[1]); + } + if (msg == SELECT_POKE_WRITE && + manager->fdpollinfo[fd].want_read == 1) { + pfds[1].events = POLLIN; + pfds[1].fd = fd; + writelen += sizeof(pfds[1]); + } + + if (write(manager->devpoll_fd, pfds, writelen) == -1) + result = isc__errno2result(errno); + else { + if (msg == SELECT_POKE_READ) + manager->fdpollinfo[fd].want_read = 0; + else + manager->fdpollinfo[fd].want_write = 0; + } + UNLOCK(&manager->fdlock[lockid]); + + return (result); +#elif defined(USE_SELECT) + LOCK(&manager->lock); + if (msg == SELECT_POKE_READ) + FD_CLR(fd, manager->read_fds); + else if (msg == SELECT_POKE_WRITE) + FD_CLR(fd, manager->write_fds); + UNLOCK(&manager->lock); + + return (result); +#endif +} + static void wakeup_socket(isc_socketmgr_t *manager, int fd, int msg) { - isc_socket_t *sock; + isc_result_t result; + int lockid = FDLOCK_ID(fd); /* * This is a wakeup on a socket. If the socket is not in the @@ -364,27 +680,54 @@ wakeup_socket(isc_socketmgr_t *manager, int fd, int msg) { * or writes. */ - INSIST(fd >= 0 && fd < manager->fdsize); + INSIST(fd >= 0 && fd < (int)manager->maxsocks); - if (manager->fdstate[fd] == CLOSE_PENDING) { + if (msg == SELECT_POKE_CLOSE) { + /* No one should be updating fdstate, so no need to lock it */ + INSIST(manager->fdstate[fd] == CLOSE_PENDING); manager->fdstate[fd] = CLOSED; - FD_CLR(fd, manager->read_fds); - FD_CLR(fd, manager->write_fds); + (void)unwatch_fd(manager, fd, SELECT_POKE_READ); + (void)unwatch_fd(manager, fd, SELECT_POKE_WRITE); (void)close(fd); return; } - if (manager->fdstate[fd] != MANAGED) - return; - sock = manager->fds[fd]; + LOCK(&manager->fdlock[lockid]); + if (manager->fdstate[fd] == CLOSE_PENDING) { + UNLOCK(&manager->fdlock[lockid]); + /* + * We accept (and ignore) any error from unwatch_fd() as we are + * closing the socket, hoping it doesn't leave dangling state in + * the kernel. + * Note that unwatch_fd() must be called after releasing the + * fdlock; otherwise it could cause deadlock due to a lock order + * reversal. + */ + (void)unwatch_fd(manager, fd, SELECT_POKE_READ); + (void)unwatch_fd(manager, fd, SELECT_POKE_WRITE); + return; + } + if (manager->fdstate[fd] != MANAGED) { + UNLOCK(&manager->fdlock[lockid]); + return; + } + UNLOCK(&manager->fdlock[lockid]); /* * Set requested bit. */ - if (msg == SELECT_POKE_READ) - FD_SET(sock->fd, manager->read_fds); - if (msg == SELECT_POKE_WRITE) - FD_SET(sock->fd, manager->write_fds); + result = watch_fd(manager, fd, msg); + if (result != ISC_R_SUCCESS) { + /* + * XXXJT: what should we do? Ignoring the failure of watching + * a socket will make the application dysfunctional, but there + * seems to be no reasonable recovery process. + */ + isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL, + ISC_LOGMODULE_SOCKET, ISC_LOG_ERROR, + "failed to start watching FD (%d): %s", + fd, isc_result_totext(result)); + } } #ifdef ISC_PLATFORM_USETHREADS @@ -452,7 +795,7 @@ select_readmsg(isc_socketmgr_t *mgr, int *fd, int *msg) { "read() failed " "during watcher poke: %s"), strbuf); - + return; } INSIST(cc == sizeof(buf)); @@ -557,7 +900,7 @@ cmsg_space(ISC_SOCKADDR_LEN_T len) { return ((char *)cmsgp - (char *)msg.msg_control); else return (0); -#endif +#endif } #endif /* USE_CMSG */ @@ -631,7 +974,7 @@ process_cmsg(isc_socket_t *sock, struct msghdr *msg, isc_socketevent_t *dev) { "interface received on ifindex %u", dev->pktinfo.ipi6_ifindex); if (IN6_IS_ADDR_MULTICAST(&pktinfop->ipi6_addr)) - dev->attributes |= ISC_SOCKEVENTATTR_MULTICAST; + dev->attributes |= ISC_SOCKEVENTATTR_MULTICAST; goto next; } #endif @@ -679,7 +1022,7 @@ build_msghdr_send(isc_socket_t *sock, isc_socketevent_t *dev, memset(msg, 0, sizeof(*msg)); - if (sock->type == isc_sockettype_udp) { + if (!sock->connected) { msg->msg_name = (void *)&dev->address.type.sa; msg->msg_namelen = dev->address.length; } else { @@ -964,15 +1307,17 @@ dump_msg(struct msghdr *msg) { unsigned int i; printf("MSGHDR %p\n", msg); - printf("\tname %p, namelen %d\n", msg->msg_name, msg->msg_namelen); - printf("\tiov %p, iovlen %d\n", msg->msg_iov, msg->msg_iovlen); + printf("\tname %p, namelen %ld\n", msg->msg_name, + (long) msg->msg_namelen); + printf("\tiov %p, iovlen %ld\n", msg->msg_iov, + (long) msg->msg_iovlen); for (i = 0; i < (unsigned int)msg->msg_iovlen; i++) - printf("\t\t%d\tbase %p, len %d\n", i, + printf("\t\t%d\tbase %p, len %ld\n", i, msg->msg_iov[i].iov_base, - msg->msg_iov[i].iov_len); + (long) msg->msg_iov[i].iov_len); #ifdef ISC_NET_BSD44MSGHDR - printf("\tcontrol %p, controllen %d\n", msg->msg_control, - msg->msg_controllen); + printf("\tcontrol %p, controllen %ld\n", msg->msg_control, + (long) msg->msg_controllen); #endif } #endif @@ -1014,7 +1359,7 @@ doio_recv(isc_socket_t *sock, isc_socketevent_t *dev) { isc__strerror(recv_errno, strbuf, sizeof(strbuf)); socket_log(sock, NULL, IOEVENT, isc_msgcat, ISC_MSGSET_SOCKET, - ISC_MSG_DOIORECV, + ISC_MSG_DOIORECV, "doio_recv: recvmsg(%d) %d bytes, err %d/%s", sock->fd, cc, recv_errno, strbuf); } @@ -1040,6 +1385,14 @@ doio_recv(isc_socket_t *sock, isc_socketevent_t *dev) { /* HPUX 11.11 can return EADDRNOTAVAIL. */ SOFT_OR_HARD(EADDRNOTAVAIL, ISC_R_ADDRNOTAVAIL); ALWAYS_HARD(ENOBUFS, ISC_R_NORESOURCES); + /* + * HPUX returns EPROTO and EINVAL on receiving some ICMP/ICMPv6 + * errors. + */ +#ifdef EPROTO + SOFT_OR_HARD(EPROTO, ISC_R_HOSTUNREACH); +#endif + SOFT_OR_HARD(EINVAL, ISC_R_HOSTUNREACH); #undef SOFT_OR_HARD #undef ALWAYS_HARD @@ -1062,7 +1415,7 @@ doio_recv(isc_socket_t *sock, isc_socketevent_t *dev) { if (isc_log_wouldlog(isc_lctx, IOEVENT_LEVEL)) { socket_log(sock, &dev->address, IOEVENT, isc_msgcat, ISC_MSGSET_SOCKET, - ISC_MSG_ZEROPORT, + ISC_MSG_ZEROPORT, "dropping source port zero packet"); } return (DOIO_SOFT); @@ -1245,7 +1598,54 @@ doio_send(isc_socket_t *sock, isc_socketevent_t *dev) { * references exist. */ static void +closesocket(isc_socketmgr_t *manager, isc_sockettype_t type, int fd) { + int lockid = FDLOCK_ID(fd); + + UNUSED(type); + + /* + * No one has this socket open, so the watcher doesn't have to be + * poked, and the socket doesn't have to be locked. + */ + LOCK(&manager->fdlock[lockid]); + manager->fds[fd] = NULL; + manager->fdstate[fd] = CLOSE_PENDING; + UNLOCK(&manager->fdlock[lockid]); + select_poke(manager, fd, SELECT_POKE_CLOSE); + + /* + * update manager->maxfd here (XXX: this should be implemented more + * efficiently) + */ +#ifdef USE_SELECT + LOCK(&manager->lock); + if (manager->maxfd == fd) { + int i; + + manager->maxfd = 0; + for (i = fd - 1; i >= 0; i--) { + lockid = FDLOCK_ID(i); + + LOCK(&manager->fdlock[lockid]); + if (manager->fdstate[i] == MANAGED) { + manager->maxfd = i; + UNLOCK(&manager->fdlock[lockid]); + break; + } + UNLOCK(&manager->fdlock[lockid]); + } +#ifdef ISC_PLATFORM_USETHREADS + if (manager->maxfd < manager->pipe_fds[0]) + manager->maxfd = manager->pipe_fds[0]; +#endif + } + UNLOCK(&manager->lock); +#endif /* USE_SELECT */ +} + +static void destroy(isc_socket_t **sockp) { + int fd; isc_socket_t *sock = *sockp; isc_socketmgr_t *manager = sock->manager; @@ -1256,17 +1656,16 @@ destroy(isc_socket_t **sockp) { INSIST(ISC_LIST_EMPTY(sock->recv_list)); INSIST(ISC_LIST_EMPTY(sock->send_list)); INSIST(sock->connect_ev == NULL); - REQUIRE(sock->fd >= 0 && sock->fd < (int)manager->fdsize); + REQUIRE(sock->fd == -1 || sock->fd < (int)manager->maxsocks); + + if (sock->fd >= 0) { + fd = sock->fd; + sock->fd = -1; + closesocket(manager, sock->type, fd); + } LOCK(&manager->lock); - /* - * No one has this socket open, so the watcher doesn't have to be - * poked, and the socket doesn't have to be locked. - */ - manager->fds[sock->fd] = NULL; - manager->fdstate[sock->fd] = CLOSE_PENDING; - select_poke(manager, sock->fd, SELECT_POKE_CLOSE); ISC_LIST_UNLINK(manager->socklist, sock, link); #ifdef ISC_PLATFORM_USETHREADS @@ -1274,10 +1673,6 @@ destroy(isc_socket_t **sockp) { SIGNAL(&manager->shutdown_ok); #endif /* ISC_PLATFORM_USETHREADS */ - /* - * XXX should reset manager->maxfd here - */ - UNLOCK(&manager->lock); free_socket(sockp); @@ -1465,18 +1860,11 @@ clear_bsdcompat(void) { } #endif -/*% - * Create a new 'type' socket managed by 'manager'. Events - * will be posted to 'task' and when dispatched 'action' will be - * called with 'arg' as the arg value. The new socket is returned - * in 'socketp'. - */ -isc_result_t -isc_socket_create(isc_socketmgr_t *manager, int pf, isc_sockettype_t type, - isc_socket_t **socketp) -{ - isc_socket_t *sock = NULL; - isc_result_t result; +static isc_result_t +opensocket(isc_socketmgr_t *manager, isc_socket_t *sock) { + char strbuf[ISC_STRERRORSIZE]; + const char *err = "socket"; + int tries = 0; #if defined(USE_CMSG) || defined(SO_BSDCOMPAT) int on = 1; #endif @@ -1484,38 +1872,27 @@ isc_socket_create(isc_socketmgr_t *manager, int pf, isc_sockettype_t type, ISC_SOCKADDR_LEN_T optlen; int size; #endif - char strbuf[ISC_STRERRORSIZE]; - const char *err = "socket"; - int try = 0; - REQUIRE(VALID_MANAGER(manager)); - REQUIRE(socketp != NULL && *socketp == NULL); - - result = allocate_socket(manager, type, &sock); - if (result != ISC_R_SUCCESS) - return (result); - - sock->pf = pf; again: - switch (type) { + switch (sock->type) { case isc_sockettype_udp: - sock->fd = socket(pf, SOCK_DGRAM, IPPROTO_UDP); + sock->fd = socket(sock->pf, SOCK_DGRAM, IPPROTO_UDP); break; case isc_sockettype_tcp: - sock->fd = socket(pf, SOCK_STREAM, IPPROTO_TCP); + sock->fd = socket(sock->pf, SOCK_STREAM, IPPROTO_TCP); break; case isc_sockettype_unix: - sock->fd = socket(pf, SOCK_STREAM, 0); + sock->fd = socket(sock->pf, SOCK_STREAM, 0); break; } - if (sock->fd == -1 && errno == EINTR && try++ < 42) + if (sock->fd == -1 && errno == EINTR && tries++ < 42) goto again; #ifdef F_DUPFD /* * Leave a space for stdio and TCP to work in. */ - if (manager->reserved != 0 && type == isc_sockettype_udp && + if (manager->reserved != 0 && sock->type == isc_sockettype_udp && sock->fd >= 0 && sock->fd < manager->reserved) { int new, tmp; new = fcntl(sock->fd, F_DUPFD, manager->reserved); @@ -1535,20 +1912,18 @@ isc_socket_create(isc_socketmgr_t *manager, int pf, isc_sockettype_t type, } #endif - if (sock->fd >= (int)manager->fdsize) { + if (sock->fd >= (int)manager->maxsocks) { (void)close(sock->fd); isc_log_iwrite(isc_lctx, ISC_LOGCATEGORY_GENERAL, ISC_LOGMODULE_SOCKET, ISC_LOG_ERROR, isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_TOOMANYFDS, - "%s: too many open file descriptors", "socket"); - free_socket(&sock); + "socket: file descriptor exceeds limit (%d/%u)", + sock->fd, manager->maxsocks); return (ISC_R_NORESOURCES); } - - if (sock->fd < 0) { - free_socket(&sock); + if (sock->fd < 0) { switch (errno) { case EMFILE: case ENFILE: @@ -1580,14 +1955,13 @@ isc_socket_create(isc_socketmgr_t *manager, int pf, isc_sockettype_t type, if (make_nonblock(sock->fd) != ISC_R_SUCCESS) { (void)close(sock->fd); - free_socket(&sock); return (ISC_R_UNEXPECTED); } #ifdef SO_BSDCOMPAT RUNTIME_CHECK(isc_once_do(&bsdcompat_once, clear_bsdcompat) == ISC_R_SUCCESS); - if (type != isc_sockettype_unix && bsdcompat && + if (sock->type != isc_sockettype_unix && bsdcompat && setsockopt(sock->fd, SOL_SOCKET, SO_BSDCOMPAT, (void *)&on, sizeof(on)) < 0) { isc__strerror(errno, strbuf, sizeof(strbuf)); @@ -1601,8 +1975,22 @@ isc_socket_create(isc_socketmgr_t *manager, int pf, isc_sockettype_t type, } #endif +#ifdef SO_NOSIGPIPE + if (setsockopt(sock->fd, SOL_SOCKET, SO_NOSIGPIPE, + (void *)&on, sizeof(on)) < 0) { + isc__strerror(errno, strbuf, sizeof(strbuf)); + UNEXPECTED_ERROR(__FILE__, __LINE__, + "setsockopt(%d, SO_NOSIGPIPE) %s: %s", + sock->fd, + isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL, + ISC_MSG_FAILED, "failed"), + strbuf); + /* Press on... */ + } +#endif + #if defined(USE_CMSG) || defined(SO_RCVBUF) - if (type == isc_sockettype_udp) { + if (sock->type == isc_sockettype_udp) { #if defined(USE_CMSG) #if defined(SO_TIMESTAMP) @@ -1612,7 +2000,7 @@ isc_socket_create(isc_socketmgr_t *manager, int pf, isc_sockettype_t type, isc__strerror(errno, strbuf, sizeof(strbuf)); UNEXPECTED_ERROR(__FILE__, __LINE__, "setsockopt(%d, SO_TIMESTAMP) %s: %s", - sock->fd, + sock->fd, isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL, ISC_MSG_FAILED, @@ -1623,7 +2011,7 @@ isc_socket_create(isc_socketmgr_t *manager, int pf, isc_sockettype_t type, #endif /* SO_TIMESTAMP */ #if defined(ISC_PLATFORM_HAVEIPV6) - if (pf == AF_INET6 && sock->recvcmsgbuflen == 0U) { + if (sock->pf == AF_INET6 && sock->recvcmsgbuflen == 0U) { /* * Warn explicitly because this anomaly can be hidden * in usual operation (and unexpectedly appear later). @@ -1635,7 +2023,7 @@ isc_socket_create(isc_socketmgr_t *manager, int pf, isc_sockettype_t type, #ifdef ISC_PLATFORM_HAVEIN6PKTINFO #ifdef IPV6_RECVPKTINFO /* RFC 3542 */ - if ((pf == AF_INET6) + if ((sock->pf == AF_INET6) && (setsockopt(sock->fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, (void *)&on, sizeof(on)) < 0)) { isc__strerror(errno, strbuf, sizeof(strbuf)); @@ -1650,7 +2038,7 @@ isc_socket_create(isc_socketmgr_t *manager, int pf, isc_sockettype_t type, } #else /* RFC 2292 */ - if ((pf == AF_INET6) + if ((sock->pf == AF_INET6) && (setsockopt(sock->fd, IPPROTO_IPV6, IPV6_PKTINFO, (void *)&on, sizeof(on)) < 0)) { isc__strerror(errno, strbuf, sizeof(strbuf)); @@ -1667,7 +2055,7 @@ isc_socket_create(isc_socketmgr_t *manager, int pf, isc_sockettype_t type, #endif /* ISC_PLATFORM_HAVEIN6PKTINFO */ #ifdef IPV6_USE_MIN_MTU /* RFC 3542, not too common yet*/ /* use minimum MTU */ - if (pf == AF_INET6) { + if (sock->pf == AF_INET6) { (void)setsockopt(sock->fd, IPPROTO_IPV6, IPV6_USE_MIN_MTU, (void *)&on, sizeof(on)); @@ -1676,6 +2064,27 @@ isc_socket_create(isc_socketmgr_t *manager, int pf, isc_sockettype_t type, #endif /* ISC_PLATFORM_HAVEIPV6 */ #endif /* defined(USE_CMSG) */ +#if defined(IP_MTU_DISCOVER) && defined(IP_PMTUDISC_DONT) + /* + * Turn off Path MTU discovery on IPv4/UDP sockets. + */ + if (sock->pf == AF_INET) { + int action = IP_PMTUDISC_DONT; + (void)setsockopt(sock->fd, IPPROTO_IP, IP_MTU_DISCOVER, + &action, sizeof(action)); + } +#endif +#if defined(IP_DONTFRAG) + /* + * Turn off Path MTU discovery on IPv4/UDP sockets. + */ + if (sock->pf == AF_INET) { + int off = 0; + (void)setsockopt(sock->fd, IPPROTO_IP, IP_DONTFRAG, + &off, sizeof(off)); + } +#endif + #if defined(SO_RCVBUF) optlen = sizeof(size); if (getsockopt(sock->fd, SOL_SOCKET, SO_RCVBUF, @@ -1699,22 +2108,61 @@ isc_socket_create(isc_socketmgr_t *manager, int pf, isc_sockettype_t type, } #endif /* defined(USE_CMSG) || defined(SO_RCVBUF) */ + return (ISC_R_SUCCESS); +} + +/*% + * Create a new 'type' socket managed by 'manager'. Events + * will be posted to 'task' and when dispatched 'action' will be + * called with 'arg' as the arg value. The new socket is returned + * in 'socketp'. + */ +isc_result_t +isc_socket_create(isc_socketmgr_t *manager, int pf, isc_sockettype_t type, + isc_socket_t **socketp) +{ + isc_socket_t *sock = NULL; + isc_result_t result; + int lockid; + + REQUIRE(VALID_MANAGER(manager)); + REQUIRE(socketp != NULL && *socketp == NULL); + + result = allocate_socket(manager, type, &sock); + if (result != ISC_R_SUCCESS) + return (result); + + sock->pf = pf; + result = opensocket(manager, sock); + if (result != ISC_R_SUCCESS) { + free_socket(&sock); + return (result); + } + sock->references = 1; *socketp = sock; - LOCK(&manager->lock); - /* * Note we don't have to lock the socket like we normally would because * there are no external references to it yet. */ + lockid = FDLOCK_ID(sock->fd); + LOCK(&manager->fdlock[lockid]); manager->fds[sock->fd] = sock; manager->fdstate[sock->fd] = MANAGED; +#ifdef USE_DEVPOLL + INSIST(sock->manager->fdpollinfo[sock->fd].want_read == 0 && + sock->manager->fdpollinfo[sock->fd].want_write == 0); +#endif + UNLOCK(&manager->fdlock[lockid]); + + LOCK(&manager->lock); ISC_LIST_APPEND(manager->socklist, sock, link); +#ifdef USE_SELECT if (manager->maxfd < sock->fd) manager->maxfd = sock->fd; - +#endif UNLOCK(&manager->lock); socket_log(sock, NULL, CREATION, isc_msgcat, ISC_MSGSET_SOCKET, @@ -1723,6 +2171,48 @@ isc_socket_create(isc_socketmgr_t *manager, int pf, isc_sockettype_t type, return (ISC_R_SUCCESS); } +isc_result_t +isc_socket_open(isc_socket_t *sock) { + isc_result_t result; + + REQUIRE(VALID_SOCKET(sock)); + + LOCK(&sock->lock); + REQUIRE(sock->references == 1); + UNLOCK(&sock->lock); + /* + * We don't need to retain the lock hereafter, since no one else has + * this socket. + */ + REQUIRE(sock->fd == -1); + + result = opensocket(sock->manager, sock); + if (result != ISC_R_SUCCESS) + sock->fd = -1; + + if (result == ISC_R_SUCCESS) { + int lockid = FDLOCK_ID(sock->fd); + + LOCK(&sock->manager->fdlock[lockid]); + sock->manager->fds[sock->fd] = sock; + sock->manager->fdstate[sock->fd] = MANAGED; +#ifdef USE_DEVPOLL + INSIST(sock->manager->fdpollinfo[sock->fd].want_read == 0 && + sock->manager->fdpollinfo[sock->fd].want_write == 0); +#endif + UNLOCK(&sock->manager->fdlock[lockid]); + +#ifdef USE_SELECT + LOCK(&sock->manager->lock); + if (sock->manager->maxfd < sock->fd) + sock->manager->maxfd = sock->fd; + UNLOCK(&sock->manager->lock); +#endif + } + + return (result); +} + /* * Attach to a socket. Caller must explicitly detach when it is done. */ @@ -1764,6 +2254,44 @@ isc_socket_detach(isc_socket_t **socketp) { *socketp = NULL; } +isc_result_t +isc_socket_close(isc_socket_t *sock) { + int fd; + + REQUIRE(VALID_SOCKET(sock)); + + LOCK(&sock->lock); + REQUIRE(sock->references == 1); + UNLOCK(&sock->lock); + /* + * We don't need to retain the lock hereafter, since no one else has + * this socket. + */ + + REQUIRE(sock->fd >= 0 && sock->fd < (int)sock->manager->maxsocks); + + INSIST(!sock->connecting); + INSIST(!sock->pending_recv); + INSIST(!sock->pending_send); + INSIST(!sock->pending_accept); + INSIST(ISC_LIST_EMPTY(sock->recv_list)); + INSIST(ISC_LIST_EMPTY(sock->send_list)); + INSIST(ISC_LIST_EMPTY(sock->accept_list)); + INSIST(sock->connect_ev == NULL); + + fd = sock->fd; + sock->fd = -1; + sock->listener = 0; + sock->connected = 0; + sock->connecting = 0; + sock->bound = 0; + isc_sockaddr_any(&sock->address); + + closesocket(sock->manager, sock->type, fd); + + return (ISC_R_SUCCESS); +} + /* * I/O is possible on a given socket. Schedule an event to this task that * will call an internal function to do the I/O. This will charge the @@ -1993,7 +2521,7 @@ internal_accept(isc_task_t *me, isc_event_t *ev) { */ addrlen = sizeof(dev->newsocket->address.type); - memset(&dev->newsocket->address.type.sa, 0, addrlen); + memset(&dev->newsocket->address.type, 0, addrlen); fd = accept(sock->fd, &dev->newsocket->address.type.sa, (void *)&addrlen); @@ -2070,19 +2598,20 @@ internal_accept(isc_task_t *me, isc_event_t *ev) { UNEXPECTED_ERROR(__FILE__, __LINE__, "internal_accept(): " "accept() returned peer address " - "family %u (expected %u)", + "family %u (expected %u)", dev->newsocket->address. type.sa.sa_family, sock->pf); (void)close(fd); goto soft_error; - } else if (fd >= (int)manager->fdsize) { + } else if (fd >= (int)manager->maxsocks) { isc_log_iwrite(isc_lctx, ISC_LOGCATEGORY_GENERAL, ISC_LOGMODULE_SOCKET, ISC_LOG_ERROR, isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_TOOMANYFDS, - "%s: too many open file descriptors", - "accept"); + "accept: " + "file descriptor exceeds limit (%d/%u)", + fd, manager->maxsocks); (void)close(fd); goto soft_error; } @@ -2116,6 +2645,13 @@ internal_accept(isc_task_t *me, isc_event_t *ev) { * -1 means the new socket didn't happen. */ if (fd != -1) { + int lockid = FDLOCK_ID(fd); + + LOCK(&manager->fdlock[lockid]); + manager->fds[fd] = dev->newsocket; + manager->fdstate[fd] = MANAGED; + UNLOCK(&manager->fdlock[lockid]); + LOCK(&manager->lock); ISC_LIST_APPEND(manager->socklist, dev->newsocket, link); @@ -2128,10 +2664,10 @@ internal_accept(isc_task_t *me, isc_event_t *ev) { */ dev->address = dev->newsocket->address; - manager->fds[fd] = dev->newsocket; - manager->fdstate[fd] = MANAGED; +#ifdef USE_SELECT if (manager->maxfd < fd) manager->maxfd = fd; +#endif socket_log(sock, &dev->newsocket->address, CREATION, isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTEDCXN, @@ -2143,7 +2679,7 @@ internal_accept(isc_task_t *me, isc_event_t *ev) { dev->newsocket->references--; free_socket(&dev->newsocket); } - + /* * Fill in the done event details and send it off. */ @@ -2280,77 +2816,256 @@ internal_send(isc_task_t *me, isc_event_t *ev) { UNLOCK(&sock->lock); } +/* + * Process read/writes on each fd here. Avoid locking + * and unlocking twice if both reads and writes are possible. + */ static void -process_fds(isc_socketmgr_t *manager, int maxfd, - fd_set *readfds, fd_set *writefds) +process_fd(isc_socketmgr_t *manager, int fd, isc_boolean_t readable, + isc_boolean_t writeable) { - int i; isc_socket_t *sock; isc_boolean_t unlock_sock; - - REQUIRE(maxfd <= (int)manager->fdsize); + int lockid = FDLOCK_ID(fd); /* - * Process read/writes on other fds here. Avoid locking - * and unlocking twice if both reads and writes are possible. + * If the socket is going to be closed, don't do more I/O. */ - for (i = 0; i < maxfd; i++) { + LOCK(&manager->fdlock[lockid]); + if (manager->fdstate[fd] == CLOSE_PENDING) { + UNLOCK(&manager->fdlock[lockid]); + + (void)unwatch_fd(manager, fd, SELECT_POKE_READ); + (void)unwatch_fd(manager, fd, SELECT_POKE_WRITE); + return; + } + + sock = manager->fds[fd]; + UNLOCK(&manager->fdlock[lockid]); + unlock_sock = ISC_FALSE; + if (readable) { + if (sock == NULL) { + (void)unwatch_fd(manager, fd, SELECT_POKE_READ); + goto check_write; + } + unlock_sock = ISC_TRUE; + LOCK(&sock->lock); + if (!SOCK_DEAD(sock)) { + if (sock->listener) + dispatch_accept(sock); + else + dispatch_recv(sock); + } + (void)unwatch_fd(manager, fd, SELECT_POKE_READ); + } +check_write: + if (writeable) { + if (sock == NULL) { + (void)unwatch_fd(manager, fd, SELECT_POKE_WRITE); + return; + } + if (!unlock_sock) { + unlock_sock = ISC_TRUE; + LOCK(&sock->lock); + } + if (!SOCK_DEAD(sock)) { + if (sock->connecting) + dispatch_connect(sock); + else + dispatch_send(sock); + } + (void)unwatch_fd(manager, fd, SELECT_POKE_WRITE); + } + if (unlock_sock) + UNLOCK(&sock->lock); +} + +#ifdef USE_KQUEUE +static isc_boolean_t +process_fds(isc_socketmgr_t *manager, struct kevent *events, int nevents) { + int i; + isc_boolean_t readable, writable; + isc_boolean_t done = ISC_FALSE; #ifdef ISC_PLATFORM_USETHREADS - if (i == manager->pipe_fds[0] || i == manager->pipe_fds[1]) + isc_boolean_t have_ctlevent = ISC_FALSE; +#endif + + if (nevents == manager->nevents) { + /* + * This is not an error, but something unexpected. If this + * happens, it may indicate the need for increasing + * ISC_SOCKET_MAXEVENTS. + */ + manager_log(manager, ISC_LOGCATEGORY_GENERAL, + ISC_LOGMODULE_SOCKET, ISC_LOG_INFO, + "maximum number of FD events (%d) received", + nevents); + } + + for (i = 0; i < nevents; i++) { + REQUIRE(events[i].ident < manager->maxsocks); +#ifdef ISC_PLATFORM_USETHREADS + if (events[i].ident == (uintptr_t)manager->pipe_fds[0]) { + have_ctlevent = ISC_TRUE; continue; -#endif /* ISC_PLATFORM_USETHREADS */ + } +#endif + readable = ISC_TF(events[i].filter == EVFILT_READ); + writable = ISC_TF(events[i].filter == EVFILT_WRITE); + process_fd(manager, events[i].ident, readable, writable); + } - if (manager->fdstate[i] == CLOSE_PENDING) { - manager->fdstate[i] = CLOSED; - FD_CLR(i, manager->read_fds); - FD_CLR(i, manager->write_fds); +#ifdef ISC_PLATFORM_USETHREADS + if (have_ctlevent) + done = process_ctlfd(manager); +#endif - (void)close(i); + return (done); +} +#elif defined(USE_EPOLL) +static isc_boolean_t +process_fds(isc_socketmgr_t *manager, struct epoll_event *events, int nevents) { + int i; + isc_boolean_t done = ISC_FALSE; +#ifdef ISC_PLATFORM_USETHREADS + isc_boolean_t have_ctlevent = ISC_FALSE; +#endif + + if (nevents == manager->nevents) { + manager_log(manager, ISC_LOGCATEGORY_GENERAL, + ISC_LOGMODULE_SOCKET, ISC_LOG_INFO, + "maximum number of FD events (%d) received", + nevents); + } + for (i = 0; i < nevents; i++) { + REQUIRE(events[i].data.fd < (int)manager->maxsocks); +#ifdef ISC_PLATFORM_USETHREADS + if (events[i].data.fd == manager->pipe_fds[0]) { + have_ctlevent = ISC_TRUE; continue; } - - sock = manager->fds[i]; - unlock_sock = ISC_FALSE; - if (FD_ISSET(i, readfds)) { - if (sock == NULL) { - FD_CLR(i, manager->read_fds); - goto check_write; - } - unlock_sock = ISC_TRUE; - LOCK(&sock->lock); - if (!SOCK_DEAD(sock)) { - if (sock->listener) - dispatch_accept(sock); - else - dispatch_recv(sock); - } - FD_CLR(i, manager->read_fds); +#endif + if ((events[i].events & EPOLLERR) != 0 || + (events[i].events & EPOLLHUP) != 0) { + /* + * epoll does not set IN/OUT bits on an erroneous + * condition, so we need to try both anyway. This is a + * bit inefficient, but should be okay for such rare + * events. Note also that the read or write attempt + * won't block because we use non-blocking sockets. + */ + events[i].events |= (EPOLLIN | EPOLLOUT); } - check_write: - if (FD_ISSET(i, writefds)) { - if (sock == NULL) { - FD_CLR(i, manager->write_fds); - continue; - } - if (!unlock_sock) { - unlock_sock = ISC_TRUE; - LOCK(&sock->lock); - } - if (!SOCK_DEAD(sock)) { - if (sock->connecting) - dispatch_connect(sock); - else - dispatch_send(sock); - } - FD_CLR(i, manager->write_fds); + process_fd(manager, events[i].data.fd, + (events[i].events & EPOLLIN) != 0, + (events[i].events & EPOLLOUT) != 0); + } + +#ifdef ISC_PLATFORM_USETHREADS + if (have_ctlevent) + done = process_ctlfd(manager); +#endif + + return (done); +} +#elif defined(USE_DEVPOLL) +static isc_boolean_t +process_fds(isc_socketmgr_t *manager, struct pollfd *events, int nevents) { + int i; + isc_boolean_t done = ISC_FALSE; +#ifdef ISC_PLATFORM_USETHREADS + isc_boolean_t have_ctlevent = ISC_FALSE; +#endif + + if (nevents == manager->nevents) { + manager_log(manager, ISC_LOGCATEGORY_GENERAL, + ISC_LOGMODULE_SOCKET, ISC_LOG_INFO, + "maximum number of FD events (%d) received", + nevents); + } + + for (i = 0; i < nevents; i++) { + REQUIRE(events[i].fd < (int)manager->maxsocks); +#ifdef ISC_PLATFORM_USETHREADS + if (events[i].fd == manager->pipe_fds[0]) { + have_ctlevent = ISC_TRUE; + continue; } - if (unlock_sock) - UNLOCK(&sock->lock); +#endif + process_fd(manager, events[i].fd, + (events[i].events & POLLIN) != 0, + (events[i].events & POLLOUT) != 0); + } + +#ifdef ISC_PLATFORM_USETHREADS + if (have_ctlevent) + done = process_ctlfd(manager); +#endif + + return (done); +} +#elif defined(USE_SELECT) +static void +process_fds(isc_socketmgr_t *manager, int maxfd, + fd_set *readfds, fd_set *writefds) +{ + int i; + + REQUIRE(maxfd <= (int)manager->maxsocks); + + for (i = 0; i < maxfd; i++) { +#ifdef ISC_PLATFORM_USETHREADS + if (i == manager->pipe_fds[0] || i == manager->pipe_fds[1]) + continue; +#endif /* ISC_PLATFORM_USETHREADS */ + process_fd(manager, i, FD_ISSET(i, readfds), + FD_ISSET(i, writefds)); } } +#endif #ifdef ISC_PLATFORM_USETHREADS +static isc_boolean_t +process_ctlfd(isc_socketmgr_t *manager) { + int msg, fd; + + for (;;) { + select_readmsg(manager, &fd, &msg); + + manager_log(manager, IOEVENT, + isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET, + ISC_MSG_WATCHERMSG, + "watcher got message %d " + "for socket %d"), msg, fd); + + /* + * Nothing to read? + */ + if (msg == SELECT_POKE_NOTHING) + break; + + /* + * Handle shutdown message. We really should + * jump out of this loop right away, but + * it doesn't matter if we have to do a little + * more work first. + */ + if (msg == SELECT_POKE_SHUTDOWN) + return (ISC_TRUE); + + /* + * This is a wakeup on a socket. Look + * at the event queue for both read and write, + * and decide if we need to watch on it now + * or not. + */ + wakeup_socket(manager, fd, msg); + } + + return (ISC_FALSE); +} + /* * This is the thread that will loop forever, always in a select or poll * call. @@ -2364,98 +3079,116 @@ watcher(void *uap) { isc_boolean_t done; int ctlfd; int cc; - int msg, fd; +#ifdef USE_KQUEUE + const char *fnname = "kevent()"; +#elif defined (USE_EPOLL) + const char *fnname = "epoll_wait()"; +#elif defined(USE_DEVPOLL) + const char *fnname = "ioctl(DP_POLL)"; + struct dvpoll dvp; +#elif defined (USE_SELECT) + const char *fnname = "select()"; int maxfd; +#endif char strbuf[ISC_STRERRORSIZE]; +#ifdef ISC_SOCKET_USE_POLLWATCH + pollstate_t pollstate = poll_idle; +#endif /* * Get the control fd here. This will never change. */ - LOCK(&manager->lock); ctlfd = manager->pipe_fds[0]; - done = ISC_FALSE; while (!done) { do { +#ifdef USE_KQUEUE + cc = kevent(manager->kqueue_fd, NULL, 0, + manager->events, manager->nevents, NULL); +#elif defined(USE_EPOLL) + cc = epoll_wait(manager->epoll_fd, manager->events, + manager->nevents, -1); +#elif defined(USE_DEVPOLL) + dvp.dp_fds = manager->events; + dvp.dp_nfds = manager->nevents; +#ifndef ISC_SOCKET_USE_POLLWATCH + dvp.dp_timeout = -1; +#else + if (pollstate == poll_idle) + dvp.dp_timeout = -1; + else + dvp.dp_timeout = ISC_SOCKET_POLLWATCH_TIMEOUT; +#endif /* ISC_SOCKET_USE_POLLWATCH */ + cc = ioctl(manager->devpoll_fd, DP_POLL, &dvp); +#elif defined(USE_SELECT) + LOCK(&manager->lock); memcpy(manager->read_fds_copy, manager->read_fds, manager->fd_bufsize); memcpy(manager->write_fds_copy, manager->write_fds, manager->fd_bufsize); maxfd = manager->maxfd + 1; - UNLOCK(&manager->lock); cc = select(maxfd, manager->read_fds_copy, manager->write_fds_copy, NULL, NULL); - if (cc < 0) { - if (!SOFT_ERROR(errno)) { - isc__strerror(errno, strbuf, - sizeof(strbuf)); - FATAL_ERROR(__FILE__, __LINE__, - "select() %s: %s", - isc_msgcat_get(isc_msgcat, - ISC_MSGSET_GENERAL, - ISC_MSG_FAILED, - "failed"), - strbuf); - } +#endif /* USE_KQUEUE */ + + if (cc < 0 && !SOFT_ERROR(errno)) { + isc__strerror(errno, strbuf, sizeof(strbuf)); + FATAL_ERROR(__FILE__, __LINE__, + "%s %s: %s", fnname, + isc_msgcat_get(isc_msgcat, + ISC_MSGSET_GENERAL, + ISC_MSG_FAILED, + "failed"), strbuf); } - LOCK(&manager->lock); +#if defined(USE_DEVPOLL) && defined(ISC_SOCKET_USE_POLLWATCH) + if (cc == 0) { + if (pollstate == poll_active) + pollstate = poll_checking; + else if (pollstate == poll_checking) + pollstate = poll_idle; + } else if (cc > 0) { + if (pollstate == poll_checking) { + /* + * XXX: We'd like to use a more + * verbose log level as it's actually an + * unexpected event, but the kernel bug + * reportedly happens pretty frequently + * (and it can also be a false positive) + * so it would be just too noisy. + */ + manager_log(manager, + ISC_LOGCATEGORY_GENERAL, + ISC_LOGMODULE_SOCKET, + ISC_LOG_DEBUG(1), + ISC_LOG_INFO, + "unexpected POLL timeout"); + } + pollstate = poll_active; + } +#endif } while (cc < 0); +#if defined(USE_KQUEUE) || defined (USE_EPOLL) || defined (USE_DEVPOLL) + done = process_fds(manager, manager->events, cc); +#elif defined(USE_SELECT) + process_fds(manager, maxfd, manager->read_fds_copy, + manager->write_fds_copy); /* * Process reads on internal, control fd. */ - if (FD_ISSET(ctlfd, manager->read_fds_copy)) { - for (;;) { - select_readmsg(manager, &fd, &msg); - - manager_log(manager, IOEVENT, - isc_msgcat_get(isc_msgcat, - ISC_MSGSET_SOCKET, - ISC_MSG_WATCHERMSG, - "watcher got message %d"), - msg); - - /* - * Nothing to read? - */ - if (msg == SELECT_POKE_NOTHING) - break; - - /* - * Handle shutdown message. We really should - * jump out of this loop right away, but - * it doesn't matter if we have to do a little - * more work first. - */ - if (msg == SELECT_POKE_SHUTDOWN) { - done = ISC_TRUE; - - break; - } - - /* - * This is a wakeup on a socket. Look - * at the event queue for both read and write, - * and decide if we need to watch on it now - * or not. - */ - wakeup_socket(manager, fd, msg); - } - } - - process_fds(manager, maxfd, manager->read_fds_copy, - manager->write_fds_copy); + if (FD_ISSET(ctlfd, manager->read_fds_copy)) + done = process_ctlfd(manager); +#endif } manager_log(manager, TRACE, isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL, ISC_MSG_EXITING, "watcher exiting")); - UNLOCK(&manager->lock); return ((isc_threadresult_t)0); } #endif /* ISC_PLATFORM_USETHREADS */ @@ -2469,69 +3202,187 @@ isc__socketmgr_setreserved(isc_socketmgr_t *manager, isc_uint32_t reserved) { } /* - * Initialize fdsets in socketmgr structure. + * Create a new socket manager. */ + static isc_result_t -create_fdsets(isc_socketmgr_t *manager, isc_mem_t *mctx) { -#if ISC_SOCKET_FDSETSIZE > FD_SETSIZE - manager->fdsize = ISC_SOCKET_FDSETSIZE; - manager->fd_bufsize = howmany(ISC_SOCKET_FDSETSIZE, NFDBITS) * +setup_watcher(isc_mem_t *mctx, isc_socketmgr_t *manager) { + isc_result_t result; + +#ifdef USE_KQUEUE + manager->nevents = ISC_SOCKET_MAXEVENTS; + manager->events = isc_mem_get(mctx, sizeof(struct kevent) * + manager->nevents); + if (manager->events == NULL) + return (ISC_R_NOMEMORY); + manager->kqueue_fd = kqueue(); + if (manager->kqueue_fd == -1) { + result = isc__errno2result(errno); + isc_mem_put(mctx, manager->events, + sizeof(struct kevent) * manager->nevents); + return (result); + } + +#ifdef ISC_PLATFORM_USETHREADS + result = watch_fd(manager, manager->pipe_fds[0], SELECT_POKE_READ); + if (result != ISC_R_SUCCESS) { + close(manager->kqueue_fd); + isc_mem_put(mctx, manager->events, + sizeof(struct kevent) * manager->nevents); + return (result); + } +#endif /* ISC_PLATFORM_USETHREADS */ +#elif defined(USE_EPOLL) + manager->nevents = ISC_SOCKET_MAXEVENTS; + manager->events = isc_mem_get(mctx, sizeof(struct epoll_event) * + manager->nevents); + if (manager->events == NULL) + return (ISC_R_NOMEMORY); + manager->epoll_fd = epoll_create(manager->nevents); + if (manager->epoll_fd == -1) { + result = isc__errno2result(errno); + isc_mem_put(mctx, manager->events, + sizeof(struct epoll_event) * manager->nevents); + return (result); + } +#ifdef ISC_PLATFORM_USETHREADS + result = watch_fd(manager, manager->pipe_fds[0], SELECT_POKE_READ); + if (result != ISC_R_SUCCESS) { + close(manager->epoll_fd); + isc_mem_put(mctx, manager->events, + sizeof(struct epoll_event) * manager->nevents); + return (result); + } +#endif /* ISC_PLATFORM_USETHREADS */ +#elif defined(USE_DEVPOLL) + /* + * XXXJT: /dev/poll seems to reject large numbers of events, + * so we should be careful about redefining ISC_SOCKET_MAXEVENTS. + */ + manager->nevents = ISC_SOCKET_MAXEVENTS; + manager->events = isc_mem_get(mctx, sizeof(struct pollfd) * + manager->nevents); + if (manager->events == NULL) + return (ISC_R_NOMEMORY); + /* + * Note: fdpollinfo should be able to support all possible FDs, so + * it must have maxsocks entries (not nevents). + */ + manager->fdpollinfo = isc_mem_get(mctx, sizeof(pollinfo_t) * + manager->maxsocks); + if (manager->fdpollinfo == NULL) { + isc_mem_put(mctx, manager->events, + sizeof(pollinfo_t) * manager->maxsocks); + return (ISC_R_NOMEMORY); + } + memset(manager->fdpollinfo, 0, sizeof(pollinfo_t) * manager->maxsocks); + manager->devpoll_fd = open("/dev/poll", O_RDWR); + if (manager->devpoll_fd == -1) { + result = isc__errno2result(errno); + isc_mem_put(mctx, manager->events, + sizeof(struct pollfd) * manager->nevents); + isc_mem_put(mctx, manager->fdpollinfo, + sizeof(pollinfo_t) * manager->maxsocks); + return (result); + } +#ifdef ISC_PLATFORM_USETHREADS + result = watch_fd(manager, manager->pipe_fds[0], SELECT_POKE_READ); + if (result != ISC_R_SUCCESS) { + close(manager->devpoll_fd); + isc_mem_put(mctx, manager->events, + sizeof(struct pollfd) * manager->nevents); + isc_mem_put(mctx, manager->fdpollinfo, + sizeof(pollinfo_t) * manager->maxsocks); + return (result); + } +#endif /* ISC_PLATFORM_USETHREADS */ +#elif defined(USE_SELECT) + UNUSED(result); + +#if ISC_SOCKET_MAXSOCKETS > FD_SETSIZE + /* + * Note: this code should also cover the case of MAXSOCKETS <= + * FD_SETSIZE, but we separate the cases to avoid possible portability + * issues regarding howmany() and the actual representation of fd_set. + */ + manager->fd_bufsize = howmany(manager->maxsocks, NFDBITS) * sizeof(fd_mask); #else - manager->fdsize = FD_SETSIZE; manager->fd_bufsize = sizeof(fd_set); #endif - manager->fds = NULL; - manager->fdstate = NULL; manager->read_fds = NULL; manager->read_fds_copy = NULL; manager->write_fds = NULL; manager->write_fds_copy = NULL; - manager->fds = isc_mem_get(mctx, - manager->fdsize * sizeof(manager->fds[0])); - if (manager->fds == NULL) - goto fail; - - manager->fdstate = isc_mem_get(mctx, manager->fdsize * - sizeof(manager->fdstate[0])); - if (manager->fdstate == NULL) - goto fail; - manager->read_fds = isc_mem_get(mctx, manager->fd_bufsize); - if (manager->read_fds == NULL) - goto fail; - manager->read_fds_copy = isc_mem_get(mctx, manager->fd_bufsize); - if (manager->read_fds_copy == NULL) - goto fail; - manager->write_fds = isc_mem_get(mctx, manager->fd_bufsize); - if (manager->write_fds == NULL) - goto fail; - manager->write_fds_copy = isc_mem_get(mctx, manager->fd_bufsize); - if (manager->write_fds_copy == NULL) - goto fail; + if (manager->read_fds != NULL) + manager->read_fds_copy = isc_mem_get(mctx, manager->fd_bufsize); + if (manager->read_fds_copy != NULL) + manager->write_fds = isc_mem_get(mctx, manager->fd_bufsize); + if (manager->write_fds != NULL) { + manager->write_fds_copy = isc_mem_get(mctx, + manager->fd_bufsize); + } + if (manager->write_fds_copy == NULL) { + if (manager->write_fds != NULL) { + isc_mem_put(mctx, manager->write_fds, + manager->fd_bufsize); + } + if (manager->read_fds_copy != NULL) { + isc_mem_put(mctx, manager->read_fds_copy, + manager->fd_bufsize); + } + if (manager->read_fds != NULL) { + isc_mem_put(mctx, manager->read_fds, + manager->fd_bufsize); + } + return (ISC_R_NOMEMORY); + } + memset(manager->read_fds, 0, manager->fd_bufsize); + memset(manager->write_fds, 0, manager->fd_bufsize); - return (ISC_R_SUCCESS); +#ifdef ISC_PLATFORM_USETHREADS + (void)watch_fd(manager, manager->pipe_fds[0], SELECT_POKE_READ); + manager->maxfd = manager->pipe_fds[0]; +#else /* ISC_PLATFORM_USETHREADS */ + manager->maxfd = 0; +#endif /* ISC_PLATFORM_USETHREADS */ +#endif /* USE_KQUEUE */ - fail: - cleanup_fdsets(manager, mctx); - return (ISC_R_NOMEMORY); + return (ISC_R_SUCCESS); } -/* - * Clean up fdsets in socketmgr structure. - */ static void -cleanup_fdsets(isc_socketmgr_t *manager, isc_mem_t *mctx) { - if (manager->fds != NULL) { - isc_mem_put(mctx, manager->fds, - manager->fdsize * sizeof(manager->fds[0])); - } - if (manager->fdstate != NULL) { - isc_mem_put(mctx, manager->fdstate, - manager->fdsize * sizeof(manager->fdstate[0])); +cleanup_watcher(isc_mem_t *mctx, isc_socketmgr_t *manager) { +#ifdef ISC_PLATFORM_USETHREADS + isc_result_t result; + + result = unwatch_fd(manager, manager->pipe_fds[0], SELECT_POKE_READ); + if (result != ISC_R_SUCCESS) { + UNEXPECTED_ERROR(__FILE__, __LINE__, + "epoll_ctl(DEL) %s", + isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL, + ISC_MSG_FAILED, "failed")); } +#endif /* ISC_PLATFORM_USETHREADS */ + +#ifdef USE_KQUEUE + close(manager->kqueue_fd); + isc_mem_put(mctx, manager->events, + sizeof(struct kevent) * manager->nevents); +#elif defined(USE_EPOLL) + close(manager->epoll_fd); + isc_mem_put(mctx, manager->events, + sizeof(struct epoll_event) * manager->nevents); +#elif defined(USE_DEVPOLL) + close(manager->devpoll_fd); + isc_mem_put(mctx, manager->events, + sizeof(struct pollfd) * manager->nevents); + isc_mem_put(mctx, manager->fdpollinfo, + sizeof(pollinfo_t) * manager->maxsocks); +#elif defined(USE_SELECT) if (manager->read_fds != NULL) isc_mem_put(mctx, manager->read_fds, manager->fd_bufsize); if (manager->read_fds_copy != NULL) @@ -2540,13 +3391,19 @@ cleanup_fdsets(isc_socketmgr_t *manager, isc_mem_t *mctx) { isc_mem_put(mctx, manager->write_fds, manager->fd_bufsize); if (manager->write_fds_copy != NULL) isc_mem_put(mctx, manager->write_fds_copy, manager->fd_bufsize); +#endif /* USE_KQUEUE */ } -/* - * Create a new socket manager. - */ isc_result_t isc_socketmgr_create(isc_mem_t *mctx, isc_socketmgr_t **managerp) { + return (isc_socketmgr_create2(mctx, managerp, 0)); +} + +isc_result_t +isc_socketmgr_create2(isc_mem_t *mctx, isc_socketmgr_t **managerp, + unsigned int maxsocks) +{ + int i; isc_socketmgr_t *manager; #ifdef ISC_PLATFORM_USETHREADS char strbuf[ISC_STRERRORSIZE]; @@ -2557,43 +3414,71 @@ isc_socketmgr_create(isc_mem_t *mctx, isc_socketmgr_t **managerp) { #ifndef ISC_PLATFORM_USETHREADS if (socketmgr != NULL) { + /* Don't allow maxsocks to be updated */ + if (maxsocks > 0 && socketmgr->maxsocks != maxsocks) + return (ISC_R_EXISTS); + socketmgr->refs++; *managerp = socketmgr; return (ISC_R_SUCCESS); } #endif /* ISC_PLATFORM_USETHREADS */ + if (maxsocks == 0) + maxsocks = ISC_SOCKET_MAXSOCKETS; + manager = isc_mem_get(mctx, sizeof(*manager)); if (manager == NULL) return (ISC_R_NOMEMORY); - result = create_fdsets(manager, mctx); - if (result != ISC_R_SUCCESS) { - cleanup_fdsets(manager, mctx); - isc_mem_put(mctx, manager, sizeof(*manager)); - return (result); + /* zero-clear so that necessary cleanup on failure will be easy */ + memset(manager, 0, sizeof(*manager)); + manager->maxsocks = maxsocks; + manager->reserved = 0; + manager->fds = isc_mem_get(mctx, + manager->maxsocks * sizeof(isc_socket_t *)); + if (manager->fds == NULL) { + result = ISC_R_NOMEMORY; + goto free_manager; + } + manager->fdstate = isc_mem_get(mctx, manager->maxsocks * sizeof(int)); + if (manager->fds == NULL) { + result = ISC_R_NOMEMORY; + goto free_manager; } manager->magic = SOCKET_MANAGER_MAGIC; manager->mctx = NULL; - memset(manager->fds, 0, sizeof(manager->fds[0]) * manager->fdsize); + memset(manager->fds, 0, manager->maxsocks * sizeof(isc_socket_t *)); ISC_LIST_INIT(manager->socklist); result = isc_mutex_init(&manager->lock); - if (result != ISC_R_SUCCESS) { - cleanup_fdsets(manager, mctx); - isc_mem_put(mctx, manager, sizeof(*manager)); - return (result); + if (result != ISC_R_SUCCESS) + goto free_manager; + manager->fdlock = isc_mem_get(mctx, FDLOCK_COUNT * sizeof(isc_mutex_t)); + if (manager->fdlock == NULL) { + result = ISC_R_NOMEMORY; + goto cleanup_lock; } + for (i = 0; i < FDLOCK_COUNT; i++) { + result = isc_mutex_init(&manager->fdlock[i]); + if (result != ISC_R_SUCCESS) { + while (--i >= 0) + DESTROYLOCK(&manager->fdlock[i]); + isc_mem_put(mctx, manager->fdlock, + FDLOCK_COUNT * sizeof(isc_mutex_t)); + manager->fdlock = NULL; + goto cleanup_lock; + } + } + #ifdef ISC_PLATFORM_USETHREADS if (isc_condition_init(&manager->shutdown_ok) != ISC_R_SUCCESS) { - cleanup_fdsets(manager, mctx); - DESTROYLOCK(&manager->lock); - isc_mem_put(mctx, manager, sizeof(*manager)); UNEXPECTED_ERROR(__FILE__, __LINE__, "isc_condition_init() %s", isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL, ISC_MSG_FAILED, "failed")); - return (ISC_R_UNEXPECTED); + result = ISC_R_UNEXPECTED; + goto cleanup_lock; } /* @@ -2601,17 +3486,14 @@ isc_socketmgr_create(isc_mem_t *mctx, isc_socketmgr_t **managerp) { * select/poll loop when something internal needs to be done. */ if (pipe(manager->pipe_fds) != 0) { - cleanup_fdsets(manager, mctx); - DESTROYLOCK(&manager->lock); - isc_mem_put(mctx, manager, sizeof(*manager)); isc__strerror(errno, strbuf, sizeof(strbuf)); UNEXPECTED_ERROR(__FILE__, __LINE__, "pipe() %s: %s", isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL, ISC_MSG_FAILED, "failed"), strbuf); - - return (ISC_R_UNEXPECTED); + result = ISC_R_UNEXPECTED; + goto cleanup_condition; } RUNTIME_CHECK(make_nonblock(manager->pipe_fds[0]) == ISC_R_SUCCESS); @@ -2625,33 +3507,23 @@ isc_socketmgr_create(isc_mem_t *mctx, isc_socketmgr_t **managerp) { /* * Set up initial state for the select loop */ - memset(manager->read_fds, 0, manager->fd_bufsize); - memset(manager->write_fds, 0, manager->fd_bufsize); -#ifdef ISC_PLATFORM_USETHREADS - FD_SET(manager->pipe_fds[0], manager->read_fds); - manager->maxfd = manager->pipe_fds[0]; -#else /* ISC_PLATFORM_USETHREADS */ - manager->maxfd = 0; -#endif /* ISC_PLATFORM_USETHREADS */ - manager->reserved = 0; - memset(manager->fdstate, 0, - manager->fdsize * sizeof(manager->fdstate[0])); - + result = setup_watcher(mctx, manager); + if (result != ISC_R_SUCCESS) + goto cleanup; + memset(manager->fdstate, 0, manager->maxsocks * sizeof(int)); #ifdef ISC_PLATFORM_USETHREADS /* * Start up the select/poll thread. */ if (isc_thread_create(watcher, manager, &manager->watcher) != ISC_R_SUCCESS) { - (void)close(manager->pipe_fds[0]); - (void)close(manager->pipe_fds[1]); - DESTROYLOCK(&manager->lock); - isc_mem_put(mctx, manager, sizeof(*manager)); UNEXPECTED_ERROR(__FILE__, __LINE__, "isc_thread_create() %s", isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL, ISC_MSG_FAILED, "failed")); - return (ISC_R_UNEXPECTED); + cleanup_watcher(mctx, manager); + result = ISC_R_UNEXPECTED; + goto cleanup; } #endif /* ISC_PLATFORM_USETHREADS */ isc_mem_attach(mctx, &manager->mctx); @@ -2662,6 +3534,52 @@ isc_socketmgr_create(isc_mem_t *mctx, isc_socketmgr_t **managerp) { *managerp = manager; return (ISC_R_SUCCESS); + +cleanup: +#ifdef ISC_PLATFORM_USETHREADS + (void)close(manager->pipe_fds[0]); + (void)close(manager->pipe_fds[1]); +#endif /* ISC_PLATFORM_USETHREADS */ + +#ifdef ISC_PLATFORM_USETHREADS +cleanup_condition: + (void)isc_condition_destroy(&manager->shutdown_ok); +#endif /* ISC_PLATFORM_USETHREADS */ + + +cleanup_lock: + if (manager->fdlock != NULL) { + for (i = 0; i < FDLOCK_COUNT; i++) + DESTROYLOCK(&manager->fdlock[i]); + } + DESTROYLOCK(&manager->lock); + +free_manager: + if (manager->fdlock != NULL) { + isc_mem_put(mctx, manager->fdlock, + FDLOCK_COUNT * sizeof(isc_mutex_t)); + } + if (manager->fdstate != NULL) { + isc_mem_put(mctx, manager->fdstate, + manager->maxsocks * sizeof(int)); + } + if (manager->fds != NULL) { + isc_mem_put(mctx, manager->fds, + manager->maxsocks * sizeof(isc_socket_t *)); + } + isc_mem_put(mctx, manager, sizeof(*manager)); + + return (result); +} + +isc_result_t +isc_socketmgr_getmaxsockets(isc_socketmgr_t *manager, unsigned int *nsockp) { + REQUIRE(VALID_MANAGER(manager)); + REQUIRE(nsockp != NULL); + + *nsockp = manager->maxsocks; + + return (ISC_R_SUCCESS); } void @@ -2735,18 +3653,30 @@ isc_socketmgr_destroy(isc_socketmgr_t **managerp) { /* * Clean up. */ + cleanup_watcher(manager->mctx, manager); + #ifdef ISC_PLATFORM_USETHREADS (void)close(manager->pipe_fds[0]); (void)close(manager->pipe_fds[1]); (void)isc_condition_destroy(&manager->shutdown_ok); #endif /* ISC_PLATFORM_USETHREADS */ - for (i = 0; i < (int)manager->fdsize; i++) - if (manager->fdstate[i] == CLOSE_PENDING) + for (i = 0; i < (int)manager->maxsocks; i++) + if (manager->fdstate[i] == CLOSE_PENDING) /* no need to lock */ (void)close(i); + isc_mem_put(manager->mctx, manager->fds, + manager->maxsocks * sizeof(isc_socket_t *)); + isc_mem_put(manager->mctx, manager->fdstate, + manager->maxsocks * sizeof(int)); + + if (manager->fdlock != NULL) { + for (i = 0; i < FDLOCK_COUNT; i++) + DESTROYLOCK(&manager->fdlock[i]); + isc_mem_put(manager->mctx, manager->fdlock, + FDLOCK_COUNT * sizeof(isc_mutex_t)); + } DESTROYLOCK(&manager->lock); - cleanup_fdsets(manager, manager->mctx); manager->magic = 0; mctx= manager->mctx; isc_mem_put(mctx, manager, sizeof(*manager)); @@ -2799,7 +3729,7 @@ socket_recv(isc_socket_t *sock, isc_socketevent_t *dev, isc_task_t *task, * Enqueue the request. If the socket was previously not being * watched, poke the watcher to start paying attention to it. */ - if (ISC_LIST_EMPTY(sock->recv_list)) + if (ISC_LIST_EMPTY(sock->recv_list) && !sock->pending_recv) select_poke(sock->manager, sock->fd, SELECT_POKE_READ); ISC_LIST_ENQUEUE(sock->recv_list, dev, ev_link); @@ -2996,7 +3926,8 @@ socket_send(isc_socket_t *sock, isc_socketevent_t *dev, isc_task_t *task, * not being watched, poke the watcher to start * paying attention to it. */ - if (ISC_LIST_EMPTY(sock->send_list)) + if (ISC_LIST_EMPTY(sock->send_list) && + !sock->pending_send) select_poke(sock->manager, sock->fd, SELECT_POKE_WRITE); ISC_LIST_ENQUEUE(sock->send_list, dev, ev_link); @@ -3286,7 +4217,7 @@ isc_socket_permunix(isc_sockaddr_t *sockaddr, isc_uint32_t perm, } else strcpy(path, "."); #endif - + if (chmod(path, perm) < 0) { isc__strerror(errno, strbuf, sizeof(strbuf)); isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL, @@ -3315,7 +4246,7 @@ isc_socket_permunix(isc_sockaddr_t *sockaddr, isc_uint32_t perm, } isc_result_t -isc_socket_bind(isc_socket_t *sock, isc_sockaddr_t *sockaddr, +isc_socket_bind(isc_socket_t *sock, isc_sockaddr_t *sockaddr, unsigned int options) { char strbuf[ISC_STRERRORSIZE]; int on = 1; @@ -3446,7 +4377,7 @@ isc_socket_listen(isc_socket_t *sock, unsigned int backlog) { } /* - * This should try to do agressive accept() XXXMLG + * This should try to do aggressive accept() XXXMLG */ isc_result_t isc_socket_accept(isc_socket_t *sock, @@ -3557,6 +4488,16 @@ isc_socket_connect(isc_socket_t *sock, isc_sockaddr_t *addr, sock->address = *addr; cc = connect(sock->fd, &addr->type.sa, addr->length); if (cc < 0) { + /* + * HP-UX "fails" to connect a UDP socket and sets errno to + * EINPROGRESS if it's non-blocking. We'd rather regard this as + * a success and let the user detect it if it's really an error + * at the time of sending a packet on the socket. + */ + if (sock->type == isc_sockettype_udp && errno == EINPROGRESS) { + cc = 0; + goto success; + } if (SOFT_ERROR(errno) || errno == EINPROGRESS) goto queue; @@ -3598,6 +4539,7 @@ isc_socket_connect(isc_socket_t *sock, isc_sockaddr_t *addr, /* * If connect completed, fire off the done event. */ + success: if (cc == 0) { sock->connected = 1; sock->bound = 1; @@ -3957,37 +4899,107 @@ isc_socket_ipv6only(isc_socket_t *sock, isc_boolean_t yes) { #ifdef IPV6_V6ONLY if (sock->pf == AF_INET6) { - (void)setsockopt(sock->fd, IPPROTO_IPV6, IPV6_V6ONLY, - (void *)&onoff, sizeof(onoff)); + if (setsockopt(sock->fd, IPPROTO_IPV6, IPV6_V6ONLY, + (void *)&onoff, sizeof(int)) < 0) { + char strbuf[ISC_STRERRORSIZE]; + + UNEXPECTED_ERROR(__FILE__, __LINE__, + "setsockopt(%d, IPV6_V6ONLY) " + "%s: %s", sock->fd, + isc_msgcat_get(isc_msgcat, + ISC_MSGSET_GENERAL, + ISC_MSG_FAILED, + "failed"), + strbuf); + } } + FIX_IPV6_RECVPKTINFO(sock); /* AIX */ #endif } #ifndef ISC_PLATFORM_USETHREADS -void -isc__socketmgr_getfdsets(fd_set **readset, fd_set **writeset, int *maxfd) { +/* In our assumed scenario, we can simply use a single static object. */ +static isc_socketwait_t swait_private; + +int +isc__socketmgr_waitevents(struct timeval *tvp, isc_socketwait_t **swaitp) { + int n; +#ifdef USE_KQUEUE + struct timespec ts, *tsp; +#endif +#ifdef USE_EPOLL + int timeout; +#endif +#ifdef USE_DEVPOLL + struct dvpoll dvp; +#endif + + REQUIRE(swaitp != NULL && *swaitp == NULL); + if (socketmgr == NULL) - *maxfd = 0; - else { - /* Prepare duplicates of fd_sets, as select() will modify */ - memcpy(socketmgr->read_fds_copy, socketmgr->read_fds, - socketmgr->fd_bufsize); - memcpy(socketmgr->write_fds_copy, socketmgr->write_fds, - socketmgr->fd_bufsize); - *readset = socketmgr->read_fds_copy; - *writeset = socketmgr->write_fds_copy; - *maxfd = socketmgr->maxfd + 1; - } + return (0); + +#ifdef USE_KQUEUE + if (tvp != NULL) { + ts.tv_sec = tvp->tv_sec; + ts.tv_nsec = tvp->tv_usec * 1000; + tsp = &ts; + } else + tsp = NULL; + swait_private.nevents = kevent(socketmgr->kqueue_fd, NULL, 0, + socketmgr->events, socketmgr->nevents, + tsp); + n = swait_private.nevents; +#elif defined(USE_EPOLL) + if (tvp != NULL) + timeout = tvp->tv_sec * 1000 + (tvp->tv_usec + 999) / 1000; + else + timeout = -1; + swait_private.nevents = epoll_wait(socketmgr->epoll_fd, + socketmgr->events, + socketmgr->nevents, timeout); + n = swait_private.nevents; +#elif defined(USE_DEVPOLL) + dvp.dp_fds = socketmgr->events; + dvp.dp_nfds = socketmgr->nevents; + if (tvp != NULL) { + dvp.dp_timeout = tvp->tv_sec * 1000 + + (tvp->tv_usec + 999) / 1000; + } else + dvp.dp_timeout = -1; + swait_private.nevents = ioctl(socketmgr->devpoll_fd, DP_POLL, &dvp); + n = swait_private.nevents; +#elif defined(USE_SELECT) + memcpy(socketmgr->read_fds_copy, socketmgr->read_fds, + socketmgr->fd_bufsize); + memcpy(socketmgr->write_fds_copy, socketmgr->write_fds, + socketmgr->fd_bufsize); + + swait_private.readset = socketmgr->read_fds_copy; + swait_private.writeset = socketmgr->write_fds_copy; + swait_private.maxfd = socketmgr->maxfd + 1; + + n = select(swait_private.maxfd, swait_private.readset, + swait_private.writeset, NULL, tvp); +#endif + + *swaitp = &swait_private; + return (n); } isc_result_t -isc__socketmgr_dispatch(fd_set *readset, fd_set *writeset, int maxfd) { - isc_socketmgr_t *manager = socketmgr; +isc__socketmgr_dispatch(isc_socketwait_t *swait) { + REQUIRE(swait == &swait_private); - if (manager == NULL) + if (socketmgr == NULL) return (ISC_R_NOTFOUND); - process_fds(manager, maxfd, readset, writeset); +#if defined(USE_KQUEUE) || defined(USE_EPOLL) || defined(USE_DEVPOLL) + (void)process_fds(socketmgr, socketmgr->events, swait->nevents); return (ISC_R_SUCCESS); +#elif defined(USE_SELECT) + process_fds(socketmgr, swait->maxfd, swait->readset, swait->writeset); + return (ISC_R_SUCCESS); +#endif } #endif /* ISC_PLATFORM_USETHREADS */ diff --git a/lib/isc/unix/socket_p.h b/lib/isc/unix/socket_p.h index 4f9cf27..b7da860 100644 --- a/lib/isc/unix/socket_p.h +++ b/lib/isc/unix/socket_p.h @@ -1,8 +1,8 @@ /* - * Copyright (C) 2004, 2005 Internet Systems Consortium, Inc. ("ISC") + * Copyright (C) 2004, 2005, 2008 Internet Systems Consortium, Inc. ("ISC") * Copyright (C) 2000, 2001 Internet Software Consortium. * - * Permission to use, copy, modify, and distribute this software for any + * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * @@ -15,7 +15,7 @@ * PERFORMANCE OF THIS SOFTWARE. */ -/* $Id: socket_p.h,v 1.7.18.2.52.1 2008/07/29 04:47:31 each Exp $ */ +/* $Id: socket_p.h,v 1.7.18.4 2008/06/24 23:45:55 tbox Exp $ */ #ifndef ISC_SOCKET_P_H #define ISC_SOCKET_P_H @@ -26,10 +26,7 @@ #include <sys/select.h> #endif -void -isc__socketmgr_getfdsets(fd_set **readset, fd_set **writeset, int *maxfd); - -isc_result_t -isc__socketmgr_dispatch(fd_set *readset, fd_set *writeset, int maxfd); - +typedef struct isc_socketwait isc_socketwait_t; +int isc__socketmgr_waitevents(struct timeval *, isc_socketwait_t **); +isc_result_t isc__socketmgr_dispatch(isc_socketwait_t *); #endif /* ISC_SOCKET_P_H */ diff --git a/lib/isc/unix/time.c b/lib/isc/unix/time.c index bac24d7..facc12b 100644 --- a/lib/isc/unix/time.c +++ b/lib/isc/unix/time.c @@ -1,8 +1,8 @@ /* - * Copyright (C) 2004, 2005 Internet Systems Consortium, Inc. ("ISC") + * Copyright (C) 2004, 2005, 2008 Internet Systems Consortium, Inc. ("ISC") * Copyright (C) 1998-2001, 2003 Internet Software Consortium. * - * Permission to use, copy, modify, and distribute this software for any + * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * @@ -15,7 +15,7 @@ * PERFORMANCE OF THIS SOFTWARE. */ -/* $Id: time.c,v 1.47.18.2 2005/04/29 00:17:09 marka Exp $ */ +/* $Id: time.c,v 1.47.18.4 2008/02/18 23:46:01 tbox Exp $ */ /*! \file */ @@ -227,7 +227,7 @@ isc_time_nowplusinterval(isc_time_t *t, const isc_interval_t *i) { t->seconds = tv.tv_sec + i->seconds; t->nanoseconds = tv.tv_usec * NS_PER_US + i->nanoseconds; - if (t->nanoseconds > NS_PER_S) { + if (t->nanoseconds >= NS_PER_S) { t->seconds++; t->nanoseconds -= NS_PER_S; } @@ -410,5 +410,5 @@ isc_time_formattimestamp(const isc_time_t *t, char *buf, unsigned int len) { snprintf(buf + flen, len - flen, ".%03u", t->nanoseconds / 1000000); else - snprintf(buf, len, "99-Bad-9999 99:99:99.999"); + snprintf(buf, len, "99-Bad-9999 99:99:99.999"); } |