summaryrefslogtreecommitdiffstats
path: root/contrib/bind9/lib/dns/rbtdb.c
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/bind9/lib/dns/rbtdb.c')
-rw-r--r--contrib/bind9/lib/dns/rbtdb.c2651
1 files changed, 2237 insertions, 414 deletions
diff --git a/contrib/bind9/lib/dns/rbtdb.c b/contrib/bind9/lib/dns/rbtdb.c
index 462a718..9741c15 100644
--- a/contrib/bind9/lib/dns/rbtdb.c
+++ b/contrib/bind9/lib/dns/rbtdb.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2004-2008 Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (C) 2004-2009 Internet Systems Consortium, Inc. ("ISC")
* Copyright (C) 1999-2003 Internet Software Consortium.
*
* Permission to use, copy, modify, and/or distribute this software for any
@@ -15,7 +15,7 @@
* PERFORMANCE OF THIS SOFTWARE.
*/
-/* $Id: rbtdb.c,v 1.196.18.53 2008/01/31 23:46:05 tbox Exp $ */
+/* $Id: rbtdb.c,v 1.270.12.6 2009/05/06 23:34:30 jinmei Exp $ */
/*! \file */
@@ -25,13 +25,18 @@
#include <config.h>
+/* #define inline */
+
#include <isc/event.h>
+#include <isc/heap.h>
#include <isc/mem.h>
-#include <isc/print.h>
#include <isc/mutex.h>
+#include <isc/platform.h>
+#include <isc/print.h>
#include <isc/random.h>
#include <isc/refcount.h>
#include <isc/rwlock.h>
+#include <isc/serial.h>
#include <isc/string.h>
#include <isc/task.h>
#include <isc/time.h>
@@ -45,12 +50,16 @@
#include <dns/lib.h>
#include <dns/log.h>
#include <dns/masterdump.h>
+#include <dns/nsec.h>
+#include <dns/nsec3.h>
#include <dns/rbt.h>
#include <dns/rdata.h>
#include <dns/rdataset.h>
#include <dns/rdatasetiter.h>
#include <dns/rdataslab.h>
+#include <dns/rdatastruct.h>
#include <dns/result.h>
+#include <dns/stats.h>
#include <dns/view.h>
#include <dns/zone.h>
#include <dns/zonekey.h>
@@ -62,20 +71,20 @@
#endif
#ifdef DNS_RBTDB_VERSION64
-#define RBTDB_MAGIC ISC_MAGIC('R', 'B', 'D', '8')
+#define RBTDB_MAGIC ISC_MAGIC('R', 'B', 'D', '8')
#else
-#define RBTDB_MAGIC ISC_MAGIC('R', 'B', 'D', '4')
+#define RBTDB_MAGIC ISC_MAGIC('R', 'B', 'D', '4')
#endif
/*%
* Note that "impmagic" is not the first four bytes of the struct, so
* ISC_MAGIC_VALID cannot be used.
*/
-#define VALID_RBTDB(rbtdb) ((rbtdb) != NULL && \
+#define VALID_RBTDB(rbtdb) ((rbtdb) != NULL && \
(rbtdb)->common.impmagic == RBTDB_MAGIC)
#ifdef DNS_RBTDB_VERSION64
-typedef isc_uint64_t rbtdb_serial_t;
+typedef isc_uint64_t rbtdb_serial_t;
/*%
* Make casting easier in symbolic debuggers by using different names
* for the 64 bit version.
@@ -84,17 +93,19 @@ typedef isc_uint64_t rbtdb_serial_t;
#define rdatasetheader_t rdatasetheader64_t
#define rbtdb_version_t rbtdb_version64_t
#else
-typedef isc_uint32_t rbtdb_serial_t;
+typedef isc_uint32_t rbtdb_serial_t;
#endif
-typedef isc_uint32_t rbtdb_rdatatype_t;
+typedef isc_uint32_t rbtdb_rdatatype_t;
-#define RBTDB_RDATATYPE_BASE(type) ((dns_rdatatype_t)((type) & 0xFFFF))
-#define RBTDB_RDATATYPE_EXT(type) ((dns_rdatatype_t)((type) >> 16))
-#define RBTDB_RDATATYPE_VALUE(b, e) (((e) << 16) | (b))
+#define RBTDB_RDATATYPE_BASE(type) ((dns_rdatatype_t)((type) & 0xFFFF))
+#define RBTDB_RDATATYPE_EXT(type) ((dns_rdatatype_t)((type) >> 16))
+#define RBTDB_RDATATYPE_VALUE(b, e) ((rbtdb_rdatatype_t)((e) << 16) | (b))
#define RBTDB_RDATATYPE_SIGNSEC \
RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec)
+#define RBTDB_RDATATYPE_SIGNSEC3 \
+ RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec3)
#define RBTDB_RDATATYPE_SIGNS \
RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_ns)
#define RBTDB_RDATATYPE_SIGCNAME \
@@ -119,15 +130,15 @@ typedef isc_uint32_t rbtdb_rdatatype_t;
#endif
#if DNS_RBTDB_USERWLOCK
-#define RBTDB_INITLOCK(l) isc_rwlock_init((l), 0, 0)
-#define RBTDB_DESTROYLOCK(l) isc_rwlock_destroy(l)
-#define RBTDB_LOCK(l, t) RWLOCK((l), (t))
-#define RBTDB_UNLOCK(l, t) RWUNLOCK((l), (t))
+#define RBTDB_INITLOCK(l) isc_rwlock_init((l), 0, 0)
+#define RBTDB_DESTROYLOCK(l) isc_rwlock_destroy(l)
+#define RBTDB_LOCK(l, t) RWLOCK((l), (t))
+#define RBTDB_UNLOCK(l, t) RWUNLOCK((l), (t))
#else
-#define RBTDB_INITLOCK(l) isc_mutex_init(l)
-#define RBTDB_DESTROYLOCK(l) DESTROYLOCK(l)
-#define RBTDB_LOCK(l, t) LOCK(l)
-#define RBTDB_UNLOCK(l, t) UNLOCK(l)
+#define RBTDB_INITLOCK(l) isc_mutex_init(l)
+#define RBTDB_DESTROYLOCK(l) DESTROYLOCK(l)
+#define RBTDB_LOCK(l, t) LOCK(l)
+#define RBTDB_UNLOCK(l, t) UNLOCK(l)
#endif
/*
@@ -152,47 +163,53 @@ typedef isc_uint32_t rbtdb_rdatatype_t;
#if defined(ISC_RWLOCK_USEATOMIC) && defined(DNS_RBT_USEISCREFCOUNT)
typedef isc_rwlock_t nodelock_t;
-#define NODE_INITLOCK(l) isc_rwlock_init((l), 0, 0)
-#define NODE_DESTROYLOCK(l) isc_rwlock_destroy(l)
-#define NODE_LOCK(l, t) RWLOCK((l), (t))
-#define NODE_UNLOCK(l, t) RWUNLOCK((l), (t))
-#define NODE_TRYUPGRADE(l) isc_rwlock_tryupgrade(l)
-
-#define NODE_STRONGLOCK(l) ((void)0)
-#define NODE_STRONGUNLOCK(l) ((void)0)
-#define NODE_WEAKLOCK(l, t) NODE_LOCK(l, t)
-#define NODE_WEAKUNLOCK(l, t) NODE_UNLOCK(l, t)
-#define NODE_WEAKDOWNGRADE(l) isc_rwlock_downgrade(l)
+#define NODE_INITLOCK(l) isc_rwlock_init((l), 0, 0)
+#define NODE_DESTROYLOCK(l) isc_rwlock_destroy(l)
+#define NODE_LOCK(l, t) RWLOCK((l), (t))
+#define NODE_UNLOCK(l, t) RWUNLOCK((l), (t))
+#define NODE_TRYUPGRADE(l) isc_rwlock_tryupgrade(l)
+
+#define NODE_STRONGLOCK(l) ((void)0)
+#define NODE_STRONGUNLOCK(l) ((void)0)
+#define NODE_WEAKLOCK(l, t) NODE_LOCK(l, t)
+#define NODE_WEAKUNLOCK(l, t) NODE_UNLOCK(l, t)
+#define NODE_WEAKDOWNGRADE(l) isc_rwlock_downgrade(l)
#else
typedef isc_mutex_t nodelock_t;
-#define NODE_INITLOCK(l) isc_mutex_init(l)
-#define NODE_DESTROYLOCK(l) DESTROYLOCK(l)
-#define NODE_LOCK(l, t) LOCK(l)
-#define NODE_UNLOCK(l, t) UNLOCK(l)
-#define NODE_TRYUPGRADE(l) ISC_R_SUCCESS
-
-#define NODE_STRONGLOCK(l) LOCK(l)
-#define NODE_STRONGUNLOCK(l) UNLOCK(l)
-#define NODE_WEAKLOCK(l, t) ((void)0)
-#define NODE_WEAKUNLOCK(l, t) ((void)0)
-#define NODE_WEAKDOWNGRADE(l) ((void)0)
+#define NODE_INITLOCK(l) isc_mutex_init(l)
+#define NODE_DESTROYLOCK(l) DESTROYLOCK(l)
+#define NODE_LOCK(l, t) LOCK(l)
+#define NODE_UNLOCK(l, t) UNLOCK(l)
+#define NODE_TRYUPGRADE(l) ISC_R_SUCCESS
+
+#define NODE_STRONGLOCK(l) LOCK(l)
+#define NODE_STRONGUNLOCK(l) UNLOCK(l)
+#define NODE_WEAKLOCK(l, t) ((void)0)
+#define NODE_WEAKUNLOCK(l, t) ((void)0)
+#define NODE_WEAKDOWNGRADE(l) ((void)0)
#endif
-#ifndef DNS_RDATASET_FIXED
-#define DNS_RDATASET_FIXED 1
+/*%
+ * Whether to rate-limit updating the LRU to avoid possible thread contention.
+ * Our performance measurement has shown the cost is marginal, so it's defined
+ * to be 0 by default either with or without threads.
+ */
+#ifndef DNS_RBTDB_LIMITLRUUPDATE
+#define DNS_RBTDB_LIMITLRUUPDATE 0
#endif
/*
- * Allow clients with a virtual time of upto 5 minutes in the past to see
+ * Allow clients with a virtual time of up to 5 minutes in the past to see
* records that would have otherwise have expired.
*/
#define RBTDB_VIRTUAL 300
struct noqname {
- dns_name_t name;
- void * nsec;
- void * nsecsig;
+ dns_name_t name;
+ void * neg;
+ void * negsig;
+ dns_rdatatype_t type;
};
typedef struct acachectl acachectl_t;
@@ -201,18 +218,19 @@ typedef struct rdatasetheader {
/*%
* Locked by the owning node's lock.
*/
- rbtdb_serial_t serial;
- dns_ttl_t ttl;
- rbtdb_rdatatype_t type;
- isc_uint16_t attributes;
- dns_trust_t trust;
- struct noqname *noqname;
+ rbtdb_serial_t serial;
+ dns_ttl_t rdh_ttl;
+ rbtdb_rdatatype_t type;
+ isc_uint16_t attributes;
+ dns_trust_t trust;
+ struct noqname *noqname;
+ struct noqname *closest;
/*%<
* We don't use the LIST macros, because the LIST structure has
* both head and tail pointers, and is doubly linked.
*/
- struct rdatasetheader *next;
+ struct rdatasetheader *next;
/*%<
* If this is the top header for an rdataset, 'next' points
* to the top header for the next rdataset (i.e., the next type).
@@ -220,13 +238,13 @@ typedef struct rdatasetheader {
* at this header.
*/
- struct rdatasetheader *down;
+ struct rdatasetheader *down;
/*%<
* Points to the header for the next older version of
* this rdataset.
*/
- isc_uint32_t count;
+ isc_uint32_t count;
/*%<
* Monotonously increased every time this rdataset is bound so that
* it is used as the base of the starting point in DNS responses
@@ -235,27 +253,56 @@ typedef struct rdatasetheader {
* performance reasons.
*/
- acachectl_t *additional_auth;
- acachectl_t *additional_glue;
+ acachectl_t *additional_auth;
+ acachectl_t *additional_glue;
+
+ dns_rbtnode_t *node;
+ isc_stdtime_t last_used;
+ ISC_LINK(struct rdatasetheader) lru_link;
+ /*%<
+ * Used for LRU-based cache management. We should probably make
+ * these cache-DB specific. We might also make it a pointer and
+ * ensure only the top header has a valid link to save memory.
+ * The linked-list is locked by the rbtdb->lrulock.
+ */
+
+ /*
+ * It's possible this should not be here anymore, but instead
+ * referenced from the bucket's heap directly.
+ */
+#if 0
+ isc_heap_t *heap;
+#endif
+ unsigned int heap_index;
+ /*%<
+ * Used for TTL-based cache cleaning.
+ */
+ isc_stdtime_t resign;
} rdatasetheader_t;
-#define RDATASET_ATTR_NONEXISTENT 0x0001
-#define RDATASET_ATTR_STALE 0x0002
-#define RDATASET_ATTR_IGNORE 0x0004
-#define RDATASET_ATTR_RETAIN 0x0008
-#define RDATASET_ATTR_NXDOMAIN 0x0010
+typedef ISC_LIST(rdatasetheader_t) rdatasetheaderlist_t;
+typedef ISC_LIST(dns_rbtnode_t) rbtnodelist_t;
+
+#define RDATASET_ATTR_NONEXISTENT 0x0001
+#define RDATASET_ATTR_STALE 0x0002
+#define RDATASET_ATTR_IGNORE 0x0004
+#define RDATASET_ATTR_RETAIN 0x0008
+#define RDATASET_ATTR_NXDOMAIN 0x0010
+#define RDATASET_ATTR_RESIGN 0x0020
+#define RDATASET_ATTR_STATCOUNT 0x0040
+#define RDATASET_ATTR_OPTOUT 0x0080
typedef struct acache_cbarg {
- dns_rdatasetadditional_t type;
- unsigned int count;
- dns_db_t *db;
- dns_dbnode_t *node;
- rdatasetheader_t *header;
+ dns_rdatasetadditional_t type;
+ unsigned int count;
+ dns_db_t *db;
+ dns_dbnode_t *node;
+ rdatasetheader_t *header;
} acache_cbarg_t;
struct acachectl {
- dns_acacheentry_t *entry;
- acache_cbarg_t *cbarg;
+ dns_acacheentry_t *entry;
+ acache_cbarg_t *cbarg;
};
/*
@@ -266,7 +313,7 @@ struct acachectl {
* expired.
*/
-#undef IGNORE /* WIN32 winbase.h defines this. */
+#undef IGNORE /* WIN32 winbase.h defines this. */
#define EXISTS(header) \
(((header)->attributes & RDATASET_ATTR_NONEXISTENT) == 0)
@@ -278,106 +325,164 @@ struct acachectl {
(((header)->attributes & RDATASET_ATTR_RETAIN) != 0)
#define NXDOMAIN(header) \
(((header)->attributes & RDATASET_ATTR_NXDOMAIN) != 0)
+#define RESIGN(header) \
+ (((header)->attributes & RDATASET_ATTR_RESIGN) != 0)
+#define OPTOUT(header) \
+ (((header)->attributes & RDATASET_ATTR_OPTOUT) != 0)
+
+#define DEFAULT_NODE_LOCK_COUNT 7 /*%< Should be prime. */
-#define DEFAULT_NODE_LOCK_COUNT 7 /*%< Should be prime. */
-#define DEFAULT_CACHE_NODE_LOCK_COUNT 1009 /*%< Should be prime. */
+/*%
+ * Number of buckets for cache DB entries (locks, LRU lists, TTL heaps).
+ * There is a tradeoff issue about configuring this value: if this is too
+ * small, it may cause heavier contention between threads; if this is too large,
+ * LRU purge algorithm won't work well (entries tend to be purged prematurely).
+ * The default value should work well for most environments, but this can
+ * also be configurable at compilation time via the
+ * DNS_RBTDB_CACHE_NODE_LOCK_COUNT variable. This value must be larger than
+ * 1 due to the assumption of overmem_purge().
+ */
+#ifdef DNS_RBTDB_CACHE_NODE_LOCK_COUNT
+#if DNS_RBTDB_CACHE_NODE_LOCK_COUNT <= 1
+#error "DNS_RBTDB_CACHE_NODE_LOCK_COUNT must be larger than 1"
+#else
+#define DEFAULT_CACHE_NODE_LOCK_COUNT DNS_RBTDB_CACHE_NODE_LOCK_COUNT
+#endif
+#else
+#define DEFAULT_CACHE_NODE_LOCK_COUNT 16
+#endif /* DNS_RBTDB_CACHE_NODE_LOCK_COUNT */
typedef struct {
- nodelock_t lock;
+ nodelock_t lock;
/* Protected in the refcount routines. */
- isc_refcount_t references;
+ isc_refcount_t references;
/* Locked by lock. */
- isc_boolean_t exiting;
+ isc_boolean_t exiting;
} rbtdb_nodelock_t;
typedef struct rbtdb_changed {
- dns_rbtnode_t * node;
- isc_boolean_t dirty;
- ISC_LINK(struct rbtdb_changed) link;
+ dns_rbtnode_t * node;
+ isc_boolean_t dirty;
+ ISC_LINK(struct rbtdb_changed) link;
} rbtdb_changed_t;
-typedef ISC_LIST(rbtdb_changed_t) rbtdb_changedlist_t;
+typedef ISC_LIST(rbtdb_changed_t) rbtdb_changedlist_t;
+
+typedef enum {
+ dns_db_insecure,
+ dns_db_partial,
+ dns_db_secure
+} dns_db_secure_t;
typedef struct rbtdb_version {
/* Not locked */
- rbtdb_serial_t serial;
+ rbtdb_serial_t serial;
/*
* Protected in the refcount routines.
* XXXJT: should we change the lock policy based on the refcount
* performance?
*/
- isc_refcount_t references;
+ isc_refcount_t references;
/* Locked by database lock. */
- isc_boolean_t writer;
- isc_boolean_t commit_ok;
- rbtdb_changedlist_t changed_list;
- ISC_LINK(struct rbtdb_version) link;
+ isc_boolean_t writer;
+ isc_boolean_t commit_ok;
+ rbtdb_changedlist_t changed_list;
+ rdatasetheaderlist_t resigned_list;
+ ISC_LINK(struct rbtdb_version) link;
+ dns_db_secure_t secure;
+ isc_boolean_t havensec3;
+ /* NSEC3 parameters */
+ dns_hash_t hash;
+ isc_uint8_t flags;
+ isc_uint16_t iterations;
+ isc_uint8_t salt_length;
+ unsigned char salt[NSEC3_MAX_HASH_LENGTH];
} rbtdb_version_t;
-typedef ISC_LIST(rbtdb_version_t) rbtdb_versionlist_t;
+typedef ISC_LIST(rbtdb_version_t) rbtdb_versionlist_t;
typedef struct {
/* Unlocked. */
- dns_db_t common;
+ dns_db_t common;
#if DNS_RBTDB_USERWLOCK
- isc_rwlock_t lock;
+ isc_rwlock_t lock;
#else
- isc_mutex_t lock;
+ isc_mutex_t lock;
#endif
- isc_rwlock_t tree_lock;
- unsigned int node_lock_count;
- rbtdb_nodelock_t * node_locks;
- dns_rbtnode_t * origin_node;
+ isc_rwlock_t tree_lock;
+ unsigned int node_lock_count;
+ rbtdb_nodelock_t * node_locks;
+ dns_rbtnode_t * origin_node;
+ dns_stats_t * rrsetstats; /* cache DB only */
/* Locked by lock. */
- unsigned int active;
- isc_refcount_t references;
- unsigned int attributes;
- rbtdb_serial_t current_serial;
- rbtdb_serial_t least_serial;
- rbtdb_serial_t next_serial;
- rbtdb_version_t * current_version;
- rbtdb_version_t * future_version;
- rbtdb_versionlist_t open_versions;
- isc_boolean_t overmem;
- isc_task_t * task;
- dns_dbnode_t *soanode;
- dns_dbnode_t *nsnode;
+ unsigned int active;
+ isc_refcount_t references;
+ unsigned int attributes;
+ rbtdb_serial_t current_serial;
+ rbtdb_serial_t least_serial;
+ rbtdb_serial_t next_serial;
+ rbtdb_version_t * current_version;
+ rbtdb_version_t * future_version;
+ rbtdb_versionlist_t open_versions;
+ isc_boolean_t overmem;
+ isc_task_t * task;
+ dns_dbnode_t *soanode;
+ dns_dbnode_t *nsnode;
+
+ /*
+ * This is a linked list used to implement the LRU cache. There will
+ * be node_lock_count linked lists here. Nodes in bucket 1 will be
+ * placed on the linked list rdatasets[1].
+ */
+ rdatasetheaderlist_t *rdatasets;
+
+ /*%
+ * Temporary storage for stale cache nodes and dynamically deleted
+ * nodes that await being cleaned up.
+ */
+ rbtnodelist_t *deadnodes;
+
+ /*
+ * Heaps. Each of these is used for TTL based expiry.
+ */
+ isc_heap_t **heaps;
+
/* Locked by tree_lock. */
- dns_rbt_t * tree;
- isc_boolean_t secure;
+ dns_rbt_t * tree;
+ dns_rbt_t * nsec3;
/* Unlocked */
- unsigned int quantum;
+ unsigned int quantum;
} dns_rbtdb_t;
-#define RBTDB_ATTR_LOADED 0x01
-#define RBTDB_ATTR_LOADING 0x02
+#define RBTDB_ATTR_LOADED 0x01
+#define RBTDB_ATTR_LOADING 0x02
/*%
* Search Context
*/
typedef struct {
- dns_rbtdb_t * rbtdb;
- rbtdb_version_t * rbtversion;
- rbtdb_serial_t serial;
- unsigned int options;
- dns_rbtnodechain_t chain;
- isc_boolean_t copy_name;
- isc_boolean_t need_cleanup;
- isc_boolean_t wild;
- dns_rbtnode_t * zonecut;
- rdatasetheader_t * zonecut_rdataset;
- rdatasetheader_t * zonecut_sigrdataset;
- dns_fixedname_t zonecut_name;
- isc_stdtime_t now;
+ dns_rbtdb_t * rbtdb;
+ rbtdb_version_t * rbtversion;
+ rbtdb_serial_t serial;
+ unsigned int options;
+ dns_rbtnodechain_t chain;
+ isc_boolean_t copy_name;
+ isc_boolean_t need_cleanup;
+ isc_boolean_t wild;
+ dns_rbtnode_t * zonecut;
+ rdatasetheader_t * zonecut_rdataset;
+ rdatasetheader_t * zonecut_sigrdataset;
+ dns_fixedname_t zonecut_name;
+ isc_stdtime_t now;
} rbtdb_search_t;
/*%
* Load Context
*/
typedef struct {
- dns_rbtdb_t * rbtdb;
- isc_stdtime_t now;
+ dns_rbtdb_t * rbtdb;
+ isc_stdtime_t now;
} rbtdb_load_t;
static void rdataset_disassociate(dns_rdataset_t *rdataset);
@@ -388,8 +493,12 @@ static void rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target);
static unsigned int rdataset_count(dns_rdataset_t *rdataset);
static isc_result_t rdataset_getnoqname(dns_rdataset_t *rdataset,
dns_name_t *name,
- dns_rdataset_t *nsec,
- dns_rdataset_t *nsecsig);
+ dns_rdataset_t *neg,
+ dns_rdataset_t *negsig);
+static isc_result_t rdataset_getclosest(dns_rdataset_t *rdataset,
+ dns_name_t *name,
+ dns_rdataset_t *neg,
+ dns_rdataset_t *negsig);
static isc_result_t rdataset_getadditional(dns_rdataset_t *rdataset,
dns_rdatasetadditional_t type,
dns_rdatatype_t qtype,
@@ -414,6 +523,17 @@ static isc_result_t rdataset_putadditional(dns_acache_t *acache,
dns_rdataset_t *rdataset,
dns_rdatasetadditional_t type,
dns_rdatatype_t qtype);
+static inline isc_boolean_t need_headerupdate(rdatasetheader_t *header,
+ isc_stdtime_t now);
+static void update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
+ isc_stdtime_t now);
+static void expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
+ isc_boolean_t tree_locked);
+static void overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start,
+ isc_stdtime_t now, isc_boolean_t tree_locked);
+static isc_result_t resign_insert(dns_rbtdb_t *rbtdb, int idx,
+ rdatasetheader_t *newheader);
+static void prune_tree(isc_task_t *task, isc_event_t *event);
static dns_rdatasetmethods_t rdataset_methods = {
rdataset_disassociate,
@@ -424,6 +544,8 @@ static dns_rdatasetmethods_t rdataset_methods = {
rdataset_count,
NULL,
rdataset_getnoqname,
+ NULL,
+ rdataset_getclosest,
rdataset_getadditional,
rdataset_setadditional,
rdataset_putadditional
@@ -443,22 +565,22 @@ static dns_rdatasetitermethods_t rdatasetiter_methods = {
};
typedef struct rbtdb_rdatasetiter {
- dns_rdatasetiter_t common;
- rdatasetheader_t * current;
+ dns_rdatasetiter_t common;
+ rdatasetheader_t * current;
} rbtdb_rdatasetiter_t;
-static void dbiterator_destroy(dns_dbiterator_t **iteratorp);
-static isc_result_t dbiterator_first(dns_dbiterator_t *iterator);
-static isc_result_t dbiterator_last(dns_dbiterator_t *iterator);
-static isc_result_t dbiterator_seek(dns_dbiterator_t *iterator,
+static void dbiterator_destroy(dns_dbiterator_t **iteratorp);
+static isc_result_t dbiterator_first(dns_dbiterator_t *iterator);
+static isc_result_t dbiterator_last(dns_dbiterator_t *iterator);
+static isc_result_t dbiterator_seek(dns_dbiterator_t *iterator,
dns_name_t *name);
-static isc_result_t dbiterator_prev(dns_dbiterator_t *iterator);
-static isc_result_t dbiterator_next(dns_dbiterator_t *iterator);
-static isc_result_t dbiterator_current(dns_dbiterator_t *iterator,
+static isc_result_t dbiterator_prev(dns_dbiterator_t *iterator);
+static isc_result_t dbiterator_next(dns_dbiterator_t *iterator);
+static isc_result_t dbiterator_current(dns_dbiterator_t *iterator,
dns_dbnode_t **nodep,
dns_name_t *name);
-static isc_result_t dbiterator_pause(dns_dbiterator_t *iterator);
-static isc_result_t dbiterator_origin(dns_dbiterator_t *iterator,
+static isc_result_t dbiterator_pause(dns_dbiterator_t *iterator);
+static isc_result_t dbiterator_origin(dns_dbiterator_t *iterator,
dns_name_t *name);
static dns_dbiteratormethods_t dbiterator_methods = {
@@ -479,17 +601,21 @@ static dns_dbiteratormethods_t dbiterator_methods = {
* If 'paused' is ISC_TRUE, then the tree lock is not being held.
*/
typedef struct rbtdb_dbiterator {
- dns_dbiterator_t common;
- isc_boolean_t paused;
- isc_boolean_t new_origin;
- isc_rwlocktype_t tree_locked;
- isc_result_t result;
- dns_fixedname_t name;
- dns_fixedname_t origin;
- dns_rbtnodechain_t chain;
- dns_rbtnode_t *node;
- dns_rbtnode_t *deletions[DELETION_BATCH_MAX];
- int delete;
+ dns_dbiterator_t common;
+ isc_boolean_t paused;
+ isc_boolean_t new_origin;
+ isc_rwlocktype_t tree_locked;
+ isc_result_t result;
+ dns_fixedname_t name;
+ dns_fixedname_t origin;
+ dns_rbtnodechain_t chain;
+ dns_rbtnodechain_t nsec3chain;
+ dns_rbtnodechain_t *current;
+ dns_rbtnode_t *node;
+ dns_rbtnode_t *deletions[DELETION_BATCH_MAX];
+ int delete;
+ isc_boolean_t nsec3only;
+ isc_boolean_t nonsec3;
} rbtdb_dbiterator_t;
@@ -498,17 +624,20 @@ typedef struct rbtdb_dbiterator {
static void free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log,
isc_event_t *event);
+static void overmem(dns_db_t *db, isc_boolean_t overmem);
+static void setnsec3parameters(dns_db_t *db, rbtdb_version_t *version,
+ isc_boolean_t *nsec3createflag);
/*%
* 'init_count' is used to initialize 'newheader->count' which inturn
* is used to determine where in the cycle rrset-order cyclic starts.
- * We don't lock this as we don't care about simultanious updates.
+ * We don't lock this as we don't care about simultaneous updates.
*
* Note:
- * Both init_count and header->count can be ISC_UINT32_MAX.
+ * Both init_count and header->count can be ISC_UINT32_MAX.
* The count on the returned rdataset however can't be as
- * that indicates that the database does not implement cyclic
- * processing.
+ * that indicates that the database does not implement cyclic
+ * processing.
*/
static unsigned int init_count;
@@ -518,12 +647,12 @@ static unsigned int init_count;
* If a routine is going to lock more than one lock in this module, then
* the locking must be done in the following order:
*
- * Tree Lock
+ * Tree Lock
*
- * Node Lock (Only one from the set may be locked at one time by
- * any caller)
+ * Node Lock (Only one from the set may be locked at one time by
+ * any caller)
*
- * Database Lock
+ * Database Lock
*
* Failure to follow this hierarchy can result in deadlock.
*/
@@ -531,11 +660,7 @@ static unsigned int init_count;
/*
* Deleting Nodes
*
- * Currently there is no deletion of nodes from the database, except when
- * the database is being destroyed.
- *
- * If node deletion is added in the future, then for zone databases the node
- * for the origin of the zone MUST NOT be deleted.
+ * For zone databases the node for the origin of the zone MUST NOT be deleted.
*/
@@ -563,6 +688,96 @@ free_rbtdb_callback(isc_task_t *task, isc_event_t *event) {
free_rbtdb(rbtdb, ISC_TRUE, event);
}
+static void
+update_rrsetstats(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
+ isc_boolean_t increment)
+{
+ dns_rdatastatstype_t statattributes = 0;
+ dns_rdatastatstype_t base = 0;
+ dns_rdatastatstype_t type;
+
+ /* At the moment we count statistics only for cache DB */
+ INSIST(IS_CACHE(rbtdb));
+
+ if (NXDOMAIN(header))
+ statattributes = DNS_RDATASTATSTYPE_ATTR_NXDOMAIN;
+ else if (RBTDB_RDATATYPE_BASE(header->type) == 0) {
+ statattributes = DNS_RDATASTATSTYPE_ATTR_NXRRSET;
+ base = RBTDB_RDATATYPE_EXT(header->type);
+ } else
+ base = RBTDB_RDATATYPE_BASE(header->type);
+
+ type = DNS_RDATASTATSTYPE_VALUE(base, statattributes);
+ if (increment)
+ dns_rdatasetstats_increment(rbtdb->rrsetstats, type);
+ else
+ dns_rdatasetstats_decrement(rbtdb->rrsetstats, type);
+}
+
+static void
+set_ttl(dns_rbtdb_t *rbtdb, rdatasetheader_t *header, dns_ttl_t newttl) {
+ int idx;
+ isc_heap_t *heap;
+ dns_ttl_t oldttl;
+
+ oldttl = header->rdh_ttl;
+ header->rdh_ttl = newttl;
+
+ if (!IS_CACHE(rbtdb))
+ return;
+
+ /*
+ * It's possible the rbtdb is not a cache. If this is the case,
+ * we will not have a heap, and we move on. If we do, though,
+ * we might need to adjust things.
+ */
+ if (header->heap_index == 0 || newttl == oldttl)
+ return;
+ idx = header->node->locknum;
+ if (rbtdb->heaps == NULL || rbtdb->heaps[idx] == NULL)
+ return;
+ heap = rbtdb->heaps[idx];
+
+ if (newttl < oldttl)
+ isc_heap_increased(heap, header->heap_index);
+ else
+ isc_heap_decreased(heap, header->heap_index);
+}
+
+/*%
+ * These functions allow the heap code to rank the priority of each
+ * element. It returns ISC_TRUE if v1 happens "sooner" than v2.
+ */
+static isc_boolean_t
+ttl_sooner(void *v1, void *v2) {
+ rdatasetheader_t *h1 = v1;
+ rdatasetheader_t *h2 = v2;
+
+ if (h1->rdh_ttl < h2->rdh_ttl)
+ return (ISC_TRUE);
+ return (ISC_FALSE);
+}
+
+static isc_boolean_t
+resign_sooner(void *v1, void *v2) {
+ rdatasetheader_t *h1 = v1;
+ rdatasetheader_t *h2 = v2;
+
+ if (h1->resign < h2->resign)
+ return (ISC_TRUE);
+ return (ISC_FALSE);
+}
+
+/*%
+ * This function sets the heap index into the header.
+ */
+static void
+set_index(void *what, unsigned int index) {
+ rdatasetheader_t *h = what;
+
+ h->heap_index = index;
+}
+
/*%
* Work out how many nodes can be deleted in the time between two
* requests to the nameserver. Smooth the resulting number and use it
@@ -571,7 +786,7 @@ free_rbtdb_callback(isc_task_t *task, isc_event_t *event) {
*/
static unsigned int
adjust_quantum(unsigned int old, isc_time_t *start) {
- unsigned int pps = dns_pps; /* packets per second */
+ unsigned int pps = dns_pps; /* packets per second */
unsigned int interval;
isc_uint64_t usecs;
isc_time_t end;
@@ -581,7 +796,7 @@ adjust_quantum(unsigned int old, isc_time_t *start) {
pps = 100;
isc_time_now(&end);
- interval = 1000000 / pps; /* interval in usec */
+ interval = 1000000 / pps; /* interval in usec */
if (interval == 0)
interval = 1;
usecs = isc_time_microdiff(&end, start);
@@ -619,6 +834,9 @@ free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log, isc_event_t *event) {
char buf[DNS_NAME_FORMATSIZE];
isc_time_t start;
+ if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
+ overmem((dns_db_t *)rbtdb, (isc_boolean_t)-1);
+
REQUIRE(rbtdb->current_version != NULL || EMPTY(rbtdb->open_versions));
REQUIRE(rbtdb->future_version == NULL);
@@ -633,6 +851,21 @@ free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log, isc_event_t *event) {
isc_mem_put(rbtdb->common.mctx, rbtdb->current_version,
sizeof(rbtdb_version_t));
}
+
+ /*
+ * We assume the number of remaining dead nodes is reasonably small;
+ * the overhead of unlinking all nodes here should be negligible.
+ */
+ for (i = 0; i < rbtdb->node_lock_count; i++) {
+ dns_rbtnode_t *node;
+
+ node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
+ while (node != NULL) {
+ ISC_LIST_UNLINK(rbtdb->deadnodes[i], node, deadlink);
+ node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
+ }
+ }
+
if (event == NULL)
rbtdb->quantum = (rbtdb->task != NULL) ? 100 : 0;
again:
@@ -658,6 +891,30 @@ free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log, isc_event_t *event) {
}
INSIST(result == ISC_R_SUCCESS && rbtdb->tree == NULL);
}
+
+ if (rbtdb->nsec3 != NULL) {
+ isc_time_now(&start);
+ result = dns_rbt_destroy2(&rbtdb->nsec3, rbtdb->quantum);
+ if (result == ISC_R_QUOTA) {
+ INSIST(rbtdb->task != NULL);
+ if (rbtdb->quantum != 0)
+ rbtdb->quantum = adjust_quantum(rbtdb->quantum,
+ &start);
+ if (event == NULL)
+ event = isc_event_allocate(rbtdb->common.mctx,
+ NULL,
+ DNS_EVENT_FREESTORAGE,
+ free_rbtdb_callback,
+ rbtdb,
+ sizeof(isc_event_t));
+ if (event == NULL)
+ goto again;
+ isc_task_send(rbtdb->task, &event);
+ return;
+ }
+ INSIST(result == ISC_R_SUCCESS && rbtdb->nsec3 == NULL);
+ }
+
if (event != NULL)
isc_event_free(&event);
if (log) {
@@ -676,12 +933,47 @@ free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log, isc_event_t *event) {
isc_refcount_destroy(&rbtdb->node_locks[i].references);
NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
}
+
+ /*
+ * Clean up LRU / re-signing order lists.
+ */
+ if (rbtdb->rdatasets != NULL) {
+ for (i = 0; i < rbtdb->node_lock_count; i++)
+ INSIST(ISC_LIST_EMPTY(rbtdb->rdatasets[i]));
+ isc_mem_put(rbtdb->common.mctx, rbtdb->rdatasets,
+ rbtdb->node_lock_count *
+ sizeof(rdatasetheaderlist_t));
+ }
+ /*
+ * Clean up dead node buckets.
+ */
+ if (rbtdb->deadnodes != NULL) {
+ for (i = 0; i < rbtdb->node_lock_count; i++)
+ INSIST(ISC_LIST_EMPTY(rbtdb->deadnodes[i]));
+ isc_mem_put(rbtdb->common.mctx, rbtdb->deadnodes,
+ rbtdb->node_lock_count * sizeof(rbtnodelist_t));
+ }
+ /*
+ * Clean up heap objects.
+ */
+ if (rbtdb->heaps != NULL) {
+ for (i = 0; i < rbtdb->node_lock_count; i++)
+ isc_heap_destroy(&rbtdb->heaps[i]);
+ isc_mem_put(rbtdb->common.mctx, rbtdb->heaps,
+ rbtdb->node_lock_count *
+ sizeof(isc_heap_t *));
+ }
+
+ if (rbtdb->rrsetstats != NULL)
+ dns_stats_detach(&rbtdb->rrsetstats);
+
isc_mem_put(rbtdb->common.mctx, rbtdb->node_locks,
rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
isc_rwlock_destroy(&rbtdb->tree_lock);
isc_refcount_destroy(&rbtdb->references);
if (rbtdb->task != NULL)
isc_task_detach(&rbtdb->task);
+
RBTDB_DESTROYLOCK(&rbtdb->lock);
rbtdb->common.magic = 0;
rbtdb->common.impmagic = 0;
@@ -788,6 +1080,7 @@ allocate_version(isc_mem_t *mctx, rbtdb_serial_t serial,
version->writer = writer;
version->commit_ok = ISC_FALSE;
ISC_LIST_INIT(version->changed_list);
+ ISC_LIST_INIT(version->resigned_list);
ISC_LINK_INIT(version, link);
return (version);
@@ -803,11 +1096,29 @@ newversion(dns_db_t *db, dns_dbversion_t **versionp) {
REQUIRE(rbtdb->future_version == NULL);
RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
- RUNTIME_CHECK(rbtdb->next_serial != 0); /* XXX Error? */
+ RUNTIME_CHECK(rbtdb->next_serial != 0); /* XXX Error? */
version = allocate_version(rbtdb->common.mctx, rbtdb->next_serial, 1,
ISC_TRUE);
if (version != NULL) {
version->commit_ok = ISC_TRUE;
+ version->secure = rbtdb->current_version->secure;
+ version->havensec3 = rbtdb->current_version->havensec3;
+ if (version->havensec3) {
+ version->flags = rbtdb->current_version->flags;
+ version->iterations =
+ rbtdb->current_version->iterations;
+ version->hash = rbtdb->current_version->hash;
+ version->salt_length =
+ rbtdb->current_version->salt_length;
+ memcpy(version->salt, rbtdb->current_version->salt,
+ version->salt_length);
+ } else {
+ version->flags = 0;
+ version->iterations = 0;
+ version->hash = 0;
+ version->salt_length = 0;
+ memset(version->salt, 0, sizeof(version->salt));
+ }
rbtdb->next_serial++;
rbtdb->future_version = version;
}
@@ -875,7 +1186,7 @@ free_acachearray(isc_mem_t *mctx, rdatasetheader_t *header,
{
unsigned int count;
unsigned int i;
- unsigned char *raw; /* RDATASLAB */
+ unsigned char *raw; /* RDATASLAB */
/*
* The caller must be holding the corresponding node lock.
@@ -903,22 +1214,69 @@ free_noqname(isc_mem_t *mctx, struct noqname **noqname) {
if (dns_name_dynamic(&(*noqname)->name))
dns_name_free(&(*noqname)->name, mctx);
- if ((*noqname)->nsec != NULL)
- isc_mem_put(mctx, (*noqname)->nsec,
- dns_rdataslab_size((*noqname)->nsec, 0));
- if ((*noqname)->nsecsig != NULL)
- isc_mem_put(mctx, (*noqname)->nsecsig,
- dns_rdataslab_size((*noqname)->nsecsig, 0));
+ if ((*noqname)->neg != NULL)
+ isc_mem_put(mctx, (*noqname)->neg,
+ dns_rdataslab_size((*noqname)->neg, 0));
+ if ((*noqname)->negsig != NULL)
+ isc_mem_put(mctx, (*noqname)->negsig,
+ dns_rdataslab_size((*noqname)->negsig, 0));
isc_mem_put(mctx, *noqname, sizeof(**noqname));
*noqname = NULL;
}
static inline void
-free_rdataset(isc_mem_t *mctx, rdatasetheader_t *rdataset) {
+init_rdataset(dns_rbtdb_t *rbtdb, rdatasetheader_t *h)
+{
+ ISC_LINK_INIT(h, lru_link);
+ h->heap_index = 0;
+
+#if TRACE_HEADER
+ if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
+ fprintf(stderr, "initialized header: %p\n", h);
+#else
+ UNUSED(rbtdb);
+#endif
+}
+
+static inline rdatasetheader_t *
+new_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx)
+{
+ rdatasetheader_t *h;
+
+ h = isc_mem_get(mctx, sizeof(*h));
+ if (h == NULL)
+ return (NULL);
+
+#if TRACE_HEADER
+ if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
+ fprintf(stderr, "allocated header: %p\n", h);
+#endif
+ init_rdataset(rbtdb, h);
+ return (h);
+}
+
+static inline void
+free_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx, rdatasetheader_t *rdataset)
+{
unsigned int size;
+ int idx;
+
+ if (EXISTS(rdataset) &&
+ (rdataset->attributes & RDATASET_ATTR_STATCOUNT) != 0) {
+ update_rrsetstats(rbtdb, rdataset, ISC_FALSE);
+ }
+
+ idx = rdataset->node->locknum;
+ if (ISC_LINK_LINKED(rdataset, lru_link))
+ ISC_LIST_UNLINK(rbtdb->rdatasets[idx], rdataset, lru_link);
+ if (rdataset->heap_index != 0)
+ isc_heap_delete(rbtdb->heaps[idx], rdataset->heap_index);
+ rdataset->heap_index = 0;
if (rdataset->noqname != NULL)
free_noqname(mctx, &rdataset->noqname);
+ if (rdataset->closest != NULL)
+ free_noqname(mctx, &rdataset->closest);
free_acachearray(mctx, rdataset, rdataset->additional_auth);
free_acachearray(mctx, rdataset, rdataset->additional_glue);
@@ -964,12 +1322,13 @@ rollback_node(dns_rbtnode_t *node, rbtdb_serial_t serial) {
}
static inline void
-clean_stale_headers(isc_mem_t *mctx, rdatasetheader_t *top) {
+clean_stale_headers(dns_rbtdb_t *rbtdb, isc_mem_t *mctx, rdatasetheader_t *top)
+{
rdatasetheader_t *d, *down_next;
for (d = top->down; d != NULL; d = down_next) {
down_next = d->down;
- free_rdataset(mctx, d);
+ free_rdataset(rbtdb, mctx, d);
}
top->down = NULL;
}
@@ -986,7 +1345,7 @@ clean_cache_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
top_prev = NULL;
for (current = node->data; current != NULL; current = top_next) {
top_next = current->next;
- clean_stale_headers(mctx, current);
+ clean_stale_headers(rbtdb, mctx, current);
/*
* If current is nonexistent or stale, we can clean it up.
*/
@@ -996,7 +1355,7 @@ clean_cache_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
top_prev->next = current->next;
else
node->data = current->next;
- free_rdataset(mctx, current);
+ free_rdataset(rbtdb, mctx, current);
} else
top_prev = current;
}
@@ -1037,7 +1396,7 @@ clean_zone_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
if (down_next != NULL)
down_next->next = dparent;
dparent->down = down_next;
- free_rdataset(mctx, dcurrent);
+ free_rdataset(rbtdb, mctx, dcurrent);
} else
dparent = dcurrent;
}
@@ -1053,7 +1412,7 @@ clean_zone_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
top_prev->next = current->next;
else
node->data = current->next;
- free_rdataset(mctx, current);
+ free_rdataset(rbtdb, mctx, current);
/*
* current no longer exists, so we can
* just continue with the loop.
@@ -1069,7 +1428,7 @@ clean_zone_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
else
node->data = down_next;
down_next->next = top_next;
- free_rdataset(mctx, current);
+ free_rdataset(rbtdb, mctx, current);
current = down_next;
}
}
@@ -1096,7 +1455,7 @@ clean_zone_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
do {
down_next = dcurrent->down;
INSIST(dcurrent->serial <= least_serial);
- free_rdataset(mctx, dcurrent);
+ free_rdataset(rbtdb, mctx, dcurrent);
dcurrent = down_next;
} while (dcurrent != NULL);
dparent->down = NULL;
@@ -1120,7 +1479,7 @@ clean_zone_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
top_prev->next = current->next;
else
node->data = current->next;
- free_rdataset(mctx, current);
+ free_rdataset(rbtdb, mctx, current);
} else
top_prev = current;
}
@@ -1129,6 +1488,49 @@ clean_zone_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
node->dirty = 0;
}
+/*%
+ * Clean up dead nodes. These are nodes which have no references, and
+ * have no data. They are dead but we could not or chose not to delete
+ * them when we deleted all the data at that node because we did not want
+ * to wait for the tree write lock.
+ *
+ * The caller must hold a tree write lock and bucketnum'th node (write) lock.
+ */
+static void
+cleanup_dead_nodes(dns_rbtdb_t *rbtdb, int bucketnum) {
+ dns_rbtnode_t *node;
+ isc_result_t result;
+ int count = 10; /* XXXJT: should be adjustable */
+
+ node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
+ while (node != NULL && count > 0) {
+ ISC_LIST_UNLINK(rbtdb->deadnodes[bucketnum], node, deadlink);
+
+ /*
+ * Since we're holding a tree write lock, it should be
+ * impossible for this node to be referenced by others.
+ */
+ INSIST(dns_rbtnode_refcurrent(node) == 0 &&
+ node->data == NULL);
+
+ INSIST(!ISC_LINK_LINKED(node, deadlink));
+ if (node->nsec3)
+ result = dns_rbt_deletenode(rbtdb->nsec3, node,
+ ISC_FALSE);
+ else
+ result = dns_rbt_deletenode(rbtdb->tree, node,
+ ISC_FALSE);
+ if (result != ISC_R_SUCCESS)
+ isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
+ DNS_LOGMODULE_CACHE, ISC_LOG_WARNING,
+ "cleanup_dead_nodes: "
+ "dns_rbt_deletenode: %s",
+ isc_result_totext(result));
+ node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
+ count--;
+ }
+}
+
/*
* Caller must be holding the node lock if its reference must be protected
* by the lock.
@@ -1139,7 +1541,7 @@ new_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
isc_refcount_t *lockref;
dns_rbtnode_refincrement0(node, &noderefs);
- if (noderefs == 1) { /* this is the first reference to the node */
+ if (noderefs == 1) { /* this is the first reference to the node */
lockref = &rbtdb->node_locks[node->locknum].references;
isc_refcount_increment0(lockref, &lockrefs);
INSIST(lockrefs != 0);
@@ -1148,6 +1550,49 @@ new_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
}
/*
+ * This function is assumed to be called when a node is newly referenced
+ * and can be in the deadnode list. In that case the node must be retrieved
+ * from the list because it is going to be used. In addition, if the caller
+ * happens to hold a write lock on the tree, it's a good chance to purge dead
+ * nodes.
+ * Note: while a new reference is gained in multiple places, there are only very
+ * few cases where the node can be in the deadnode list (only empty nodes can
+ * have been added to the list).
+ */
+static inline void
+reactivate_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
+ isc_rwlocktype_t treelocktype)
+{
+ isc_boolean_t need_relock = ISC_FALSE;
+
+ NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
+ new_reference(rbtdb, node);
+
+ NODE_WEAKLOCK(&rbtdb->node_locks[node->locknum].lock,
+ isc_rwlocktype_read);
+ if (ISC_LINK_LINKED(node, deadlink))
+ need_relock = ISC_TRUE;
+ else if (!ISC_LIST_EMPTY(rbtdb->deadnodes[node->locknum]) &&
+ treelocktype == isc_rwlocktype_write)
+ need_relock = ISC_TRUE;
+ NODE_WEAKUNLOCK(&rbtdb->node_locks[node->locknum].lock,
+ isc_rwlocktype_read);
+ if (need_relock) {
+ NODE_WEAKLOCK(&rbtdb->node_locks[node->locknum].lock,
+ isc_rwlocktype_write);
+ if (ISC_LINK_LINKED(node, deadlink))
+ ISC_LIST_UNLINK(rbtdb->deadnodes[node->locknum],
+ node, deadlink);
+ if (treelocktype == isc_rwlocktype_write)
+ cleanup_dead_nodes(rbtdb, node->locknum);
+ NODE_WEAKUNLOCK(&rbtdb->node_locks[node->locknum].lock,
+ isc_rwlocktype_write);
+ }
+
+ NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
+}
+
+/*
* Caller must be holding the node lock; either the "strong", read or write
* lock. Note that the lock must be held even when node references are
* atomically modified; in that case the decrement operation itself does not
@@ -1160,14 +1605,17 @@ new_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
static isc_boolean_t
decrement_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
rbtdb_serial_t least_serial,
- isc_rwlocktype_t nlock, isc_rwlocktype_t tlock)
+ isc_rwlocktype_t nlock, isc_rwlocktype_t tlock,
+ isc_boolean_t pruning)
{
isc_result_t result;
isc_boolean_t write_locked;
rbtdb_nodelock_t *nodelock;
unsigned int refs, nrefs;
+ int bucket = node->locknum;
+ isc_boolean_t no_reference;
- nodelock = &rbtdb->node_locks[node->locknum];
+ nodelock = &rbtdb->node_locks[bucket];
/* Handle easy and typical case first. */
if (!node->dirty && (node->data != NULL || node->down != NULL)) {
@@ -1226,7 +1674,9 @@ decrement_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
}
/*
- * XXXDCL need to add a deferred delete method for ISC_R_LOCKBUSY.
+ * Attempt to switch to a write lock on the tree. If this fails,
+ * we will add this node to a linked list of nodes in this locking
+ * bucket which we will free later.
*/
if (tlock != isc_rwlocktype_write) {
/*
@@ -1246,6 +1696,7 @@ decrement_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
} else
write_locked = ISC_TRUE;
+ no_reference = ISC_TRUE;
if (write_locked && dns_rbtnode_refcurrent(node) == 0) {
/*
* We can now delete the node if the reference counter is
@@ -1254,26 +1705,97 @@ decrement_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
* current thread locks the tree (e.g., in findnode()).
*/
- if (isc_log_wouldlog(dns_lctx, ISC_LOG_DEBUG(1))) {
- char printname[DNS_NAME_FORMATSIZE];
+ /*
+ * If this node is the only one in the level it's in, deleting
+ * this node may recursively make its parent the only node in
+ * the parent level; if so, and if no one is currently using
+ * the parent node, this is almost the only opportunity to
+ * clean it up. But the recursive cleanup is not that trivial
+ * since the child and parent may be in different lock buckets,
+ * which would cause a lock order reversal problem. To avoid
+ * the trouble, we'll dispatch a separate event for batch
+ * cleaning. We need to check whether we're deleting the node
+ * as a result of pruning to avoid infinite dispatching.
+ * Note: pruning happens only when a task has been set for the
+ * rbtdb. If the user of the rbtdb chooses not to set a task,
+ * it's their responsibility to purge stale leaves (e.g. by
+ * periodic walk-through).
+ */
+ if (!pruning && node->parent != NULL &&
+ node->parent->down == node && node->left == NULL &&
+ node->right == NULL && rbtdb->task != NULL) {
+ isc_event_t *ev;
+ dns_db_t *db;
+
+ ev = isc_event_allocate(rbtdb->common.mctx, NULL,
+ DNS_EVENT_RBTPRUNE,
+ prune_tree, node,
+ sizeof(isc_event_t));
+ if (ev != NULL) {
+ new_reference(rbtdb, node);
+ db = NULL;
+ attach((dns_db_t *)rbtdb, &db);
+ ev->ev_sender = db;
+ isc_task_send(rbtdb->task, &ev);
+ no_reference = ISC_FALSE;
+ } else {
+ /*
+ * XXX: this is a weird situation. We could
+ * ignore this error case, but then the stale
+ * node will unlikely be purged except via a
+ * rare condition such as manual cleanup. So
+ * we queue it in the deadnodes list, hoping
+ * the memory shortage is temporary and the node
+ * will be deleted later.
+ */
+ isc_log_write(dns_lctx,
+ DNS_LOGCATEGORY_DATABASE,
+ DNS_LOGMODULE_CACHE,
+ ISC_LOG_INFO,
+ "decrement_reference: failed to "
+ "allocate pruning event");
+ INSIST(!ISC_LINK_LINKED(node, deadlink));
+ ISC_LIST_APPEND(rbtdb->deadnodes[bucket], node,
+ deadlink);
+ }
+ } else {
+ if (isc_log_wouldlog(dns_lctx, ISC_LOG_DEBUG(1))) {
+ char printname[DNS_NAME_FORMATSIZE];
+
+ isc_log_write(dns_lctx,
+ DNS_LOGCATEGORY_DATABASE,
+ DNS_LOGMODULE_CACHE,
+ ISC_LOG_DEBUG(1),
+ "decrement_reference: "
+ "delete from rbt: %p %s",
+ node,
+ dns_rbt_formatnodename(node,
+ printname,
+ sizeof(printname)));
+ }
- isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
- DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
- "decrement_reference: "
- "delete from rbt: %p %s",
- node,
- dns_rbt_formatnodename(node, printname,
- sizeof(printname)));
+ INSIST(!ISC_LINK_LINKED(node, deadlink));
+ if (node->nsec3)
+ result = dns_rbt_deletenode(rbtdb->nsec3, node,
+ ISC_FALSE);
+ else
+ result = dns_rbt_deletenode(rbtdb->tree, node,
+ ISC_FALSE);
+ if (result != ISC_R_SUCCESS) {
+ isc_log_write(dns_lctx,
+ DNS_LOGCATEGORY_DATABASE,
+ DNS_LOGMODULE_CACHE,
+ ISC_LOG_WARNING,
+ "decrement_reference: "
+ "dns_rbt_deletenode: %s",
+ isc_result_totext(result));
+ }
}
-
- result = dns_rbt_deletenode(rbtdb->tree, node, ISC_FALSE);
- if (result != ISC_R_SUCCESS)
- isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
- DNS_LOGMODULE_CACHE, ISC_LOG_WARNING,
- "decrement_reference: "
- "dns_rbt_deletenode: %s",
- isc_result_totext(result));
- }
+ } else if (dns_rbtnode_refcurrent(node) == 0) {
+ INSIST(!ISC_LINK_LINKED(node, deadlink));
+ ISC_LIST_APPEND(rbtdb->deadnodes[bucket], node, deadlink);
+ } else
+ no_reference = ISC_FALSE;
/* Restore the lock? */
if (nlock == isc_rwlocktype_read)
@@ -1290,7 +1812,71 @@ decrement_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
if (write_locked)
isc_rwlock_downgrade(&rbtdb->tree_lock);
- return (ISC_TRUE);
+ return (no_reference);
+}
+
+/*
+ * Prune the tree by recursively cleaning-up single leaves. In the worst
+ * case, the number of iteration is the number of tree levels, which is at
+ * most the maximum number of domain name labels, i.e, 127. In practice, this
+ * should be much smaller (only a few times), and even the worst case would be
+ * acceptable for a single event.
+ */
+static void
+prune_tree(isc_task_t *task, isc_event_t *event) {
+ dns_rbtdb_t *rbtdb = event->ev_sender;
+ dns_rbtnode_t *node = event->ev_arg;
+ dns_rbtnode_t *parent;
+ unsigned int locknum;
+
+ UNUSED(task);
+
+ isc_event_free(&event);
+
+ RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
+ locknum = node->locknum;
+ NODE_LOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
+ do {
+ parent = node->parent;
+ decrement_reference(rbtdb, node, 0, isc_rwlocktype_write,
+ isc_rwlocktype_write, ISC_TRUE);
+
+ if (parent != NULL && parent->down == NULL) {
+ /*
+ * node was the only down child of the parent and has
+ * just been removed. We'll then need to examine the
+ * parent. Keep the lock if possible; otherwise,
+ * release the old lock and acquire one for the parent.
+ */
+ if (parent->locknum != locknum) {
+ NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
+ isc_rwlocktype_write);
+ locknum = parent->locknum;
+ NODE_LOCK(&rbtdb->node_locks[locknum].lock,
+ isc_rwlocktype_write);
+ }
+
+ /*
+ * We need to gain a reference to the node before
+ * decrementing it in the next iteration. In addition,
+ * if the node is in the dead-nodes list, extract it
+ * from the list beforehand as we do in
+ * reactivate_node().
+ */
+ new_reference(rbtdb, parent);
+ if (ISC_LINK_LINKED(parent, deadlink)) {
+ ISC_LIST_UNLINK(rbtdb->deadnodes[locknum],
+ parent, deadlink);
+ }
+ } else
+ parent = NULL;
+
+ node = parent;
+ } while (node != NULL);
+ NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
+ RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
+
+ detach((dns_db_t **)&rbtdb);
}
static inline void
@@ -1337,17 +1923,20 @@ cleanup_nondirty(rbtdb_version_t *version, rbtdb_changedlist_t *cleanup_list) {
}
}
-static isc_boolean_t
-iszonesecure(dns_db_t *db, dns_dbnode_t *origin) {
+static void
+iszonesecure(dns_db_t *db, rbtdb_version_t *version, dns_dbnode_t *origin) {
dns_rdataset_t keyset;
dns_rdataset_t nsecset, signsecset;
+ dns_rdata_t rdata = DNS_RDATA_INIT;
isc_boolean_t haszonekey = ISC_FALSE;
isc_boolean_t hasnsec = ISC_FALSE;
+ isc_boolean_t hasoptbit = ISC_FALSE;
+ isc_boolean_t nsec3createflag = ISC_FALSE;
isc_result_t result;
dns_rdataset_init(&keyset);
- result = dns_db_findrdataset(db, origin, NULL, dns_rdatatype_dnskey, 0,
- 0, &keyset, NULL);
+ result = dns_db_findrdataset(db, origin, version, dns_rdatatype_dnskey,
+ 0, 0, &keyset, NULL);
if (result == ISC_R_SUCCESS) {
dns_rdata_t keyrdata = DNS_RDATA_INIT;
result = dns_rdataset_first(&keyset);
@@ -1361,21 +1950,153 @@ iszonesecure(dns_db_t *db, dns_dbnode_t *origin) {
}
dns_rdataset_disassociate(&keyset);
}
- if (!haszonekey)
- return (ISC_FALSE);
+ if (!haszonekey) {
+ version->secure = dns_db_insecure;
+ version->havensec3 = ISC_FALSE;
+ return;
+ }
dns_rdataset_init(&nsecset);
dns_rdataset_init(&signsecset);
- result = dns_db_findrdataset(db, origin, NULL, dns_rdatatype_nsec, 0,
- 0, &nsecset, &signsecset);
+ result = dns_db_findrdataset(db, origin, version, dns_rdatatype_nsec,
+ 0, 0, &nsecset, &signsecset);
if (result == ISC_R_SUCCESS) {
if (dns_rdataset_isassociated(&signsecset)) {
hasnsec = ISC_TRUE;
+ result = dns_rdataset_first(&nsecset);
+ if (result == ISC_R_SUCCESS) {
+ dns_rdataset_current(&nsecset, &rdata);
+ hasoptbit = dns_nsec_typepresent(&rdata,
+ dns_rdatatype_opt);
+ }
dns_rdataset_disassociate(&signsecset);
}
dns_rdataset_disassociate(&nsecset);
}
- return (hasnsec);
+
+ setnsec3parameters(db, version, &nsec3createflag);
+
+ /*
+ * Do we have a valid NSEC/NSEC3 chain?
+ */
+ if (version->havensec3 || (hasnsec && !hasoptbit))
+ version->secure = dns_db_secure;
+ /*
+ * Do we have a NSEC/NSEC3 chain under creation?
+ */
+ else if (hasoptbit || nsec3createflag)
+ version->secure = dns_db_partial;
+ else
+ version->secure = dns_db_insecure;
+}
+
+/*%<
+ * Walk the origin node looking for NSEC3PARAM records.
+ * Cache the nsec3 parameters.
+ */
+static void
+setnsec3parameters(dns_db_t *db, rbtdb_version_t *version,
+ isc_boolean_t *nsec3createflag)
+{
+ dns_rbtnode_t *node;
+ dns_rdata_nsec3param_t nsec3param;
+ dns_rdata_t rdata = DNS_RDATA_INIT;
+ isc_region_t region;
+ isc_result_t result;
+ rdatasetheader_t *header, *header_next;
+ unsigned char *raw; /* RDATASLAB */
+ unsigned int count, length;
+ dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
+
+ RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
+ version->havensec3 = ISC_FALSE;
+ node = rbtdb->origin_node;
+ NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
+ isc_rwlocktype_read);
+ for (header = node->data;
+ header != NULL;
+ header = header_next) {
+ header_next = header->next;
+ do {
+ if (header->serial <= version->serial &&
+ !IGNORE(header)) {
+ if (NONEXISTENT(header))
+ header = NULL;
+ break;
+ } else
+ header = header->down;
+ } while (header != NULL);
+
+ if (header != NULL &&
+ header->type == dns_rdatatype_nsec3param) {
+ /*
+ * Find A NSEC3PARAM with a supported algorithm.
+ */
+ raw = (unsigned char *)header + sizeof(*header);
+ count = raw[0] * 256 + raw[1]; /* count */
+#if DNS_RDATASET_FIXED
+ raw += count * 4 + 2;
+#else
+ raw += 2;
+#endif
+ while (count-- > 0U) {
+ length = raw[0] * 256 + raw[1];
+#if DNS_RDATASET_FIXED
+ raw += 4;
+#else
+ raw += 2;
+#endif
+ region.base = raw;
+ region.length = length;
+ raw += length;
+ dns_rdata_fromregion(&rdata,
+ rbtdb->common.rdclass,
+ dns_rdatatype_nsec3param,
+ &region);
+ result = dns_rdata_tostruct(&rdata,
+ &nsec3param,
+ NULL);
+ INSIST(result == ISC_R_SUCCESS);
+ dns_rdata_reset(&rdata);
+
+ if (nsec3param.hash != DNS_NSEC3_UNKNOWNALG &&
+ !dns_nsec3_supportedhash(nsec3param.hash))
+ continue;
+
+#ifdef RFC5155_STRICT
+ if (nsec3param.flags != 0)
+ continue;
+#else
+ if ((nsec3param.flags & DNS_NSEC3FLAG_CREATE)
+ != 0)
+ *nsec3createflag = ISC_TRUE;
+ if ((nsec3param.flags & ~DNS_NSEC3FLAG_OPTOUT)
+ != 0)
+ continue;
+#endif
+
+ INSIST(nsec3param.salt_length <=
+ sizeof(version->salt));
+ memcpy(version->salt, nsec3param.salt,
+ nsec3param.salt_length);
+ version->hash = nsec3param.hash;
+ version->salt_length = nsec3param.salt_length;
+ version->iterations = nsec3param.iterations;
+ version->flags = nsec3param.flags;
+ version->havensec3 = ISC_TRUE;
+ /*
+ * Look for a better algorithm than the
+ * unknown test algorithm.
+ */
+ if (nsec3param.hash != DNS_NSEC3_UNKNOWNALG)
+ goto unlock;
+ }
+ }
+ }
+ unlock:
+ NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
+ isc_rwlocktype_read);
+ RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
}
static void
@@ -1384,10 +2105,12 @@ closeversion(dns_db_t *db, dns_dbversion_t **versionp, isc_boolean_t commit) {
rbtdb_version_t *version, *cleanup_version, *least_greater;
isc_boolean_t rollback = ISC_FALSE;
rbtdb_changedlist_t cleanup_list;
+ rdatasetheaderlist_t resigned_list;
rbtdb_changed_t *changed, *next_changed;
rbtdb_serial_t serial, least_serial;
dns_rbtnode_t *rbtnode;
unsigned int refs;
+ rdatasetheader_t *header;
isc_boolean_t writer;
REQUIRE(VALID_RBTDB(rbtdb));
@@ -1395,9 +2118,10 @@ closeversion(dns_db_t *db, dns_dbversion_t **versionp, isc_boolean_t commit) {
cleanup_version = NULL;
ISC_LIST_INIT(cleanup_list);
+ ISC_LIST_INIT(resigned_list);
isc_refcount_decrement(&version->references, &refs);
- if (refs > 0) { /* typical and easy case first */
+ if (refs > 0) { /* typical and easy case first */
if (commit) {
RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
INSIST(!version->writer);
@@ -1484,12 +2208,16 @@ closeversion(dns_db_t *db, dns_dbversion_t **versionp, isc_boolean_t commit) {
INSIST(cur_ref == 1);
PREPEND(rbtdb->open_versions,
rbtdb->current_version, link);
+ resigned_list = version->resigned_list;
+ ISC_LIST_INIT(version->resigned_list);
} else {
/*
* We're rolling back this transaction.
*/
cleanup_list = version->changed_list;
ISC_LIST_INIT(version->changed_list);
+ resigned_list = version->resigned_list;
+ ISC_LIST_INIT(version->resigned_list);
rollback = ISC_TRUE;
cleanup_version = version;
rbtdb->future_version = NULL;
@@ -1542,7 +2270,7 @@ closeversion(dns_db_t *db, dns_dbversion_t **versionp, isc_boolean_t commit) {
* Update the zone's secure status.
*/
if (writer && commit && !IS_CACHE(rbtdb))
- rbtdb->secure = iszonesecure(db, rbtdb->origin_node);
+ iszonesecure(db, version, rbtdb->origin_node);
if (cleanup_version != NULL) {
INSIST(EMPTY(cleanup_version->changed_list));
@@ -1550,7 +2278,35 @@ closeversion(dns_db_t *db, dns_dbversion_t **versionp, isc_boolean_t commit) {
sizeof(*cleanup_version));
}
+ /*
+ * Commit/rollback re-signed headers.
+ */
+ for (header = HEAD(resigned_list);
+ header != NULL;
+ header = HEAD(resigned_list)) {
+ ISC_LIST_UNLINK(resigned_list, header, lru_link);
+ if (rollback) {
+ nodelock_t *lock;
+ lock = &rbtdb->node_locks[header->node->locknum].lock;
+ NODE_LOCK(lock, isc_rwlocktype_write);
+ resign_insert(rbtdb, header->node->locknum, header);
+ NODE_UNLOCK(lock, isc_rwlocktype_write);
+ }
+ decrement_reference(rbtdb, header->node, least_serial,
+ isc_rwlocktype_write, isc_rwlocktype_none,
+ ISC_FALSE);
+ }
+
if (!EMPTY(cleanup_list)) {
+ /*
+ * We acquire a tree write lock here in order to make sure
+ * that stale nodes will be removed in decrement_reference().
+ * If we didn't have the lock, those nodes could miss the
+ * chance to be removed until the server stops. The write lock
+ * is expensive, but this event should be rare enough to justify
+ * the cost.
+ */
+ RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
for (changed = HEAD(cleanup_list);
changed != NULL;
changed = next_changed) {
@@ -1561,19 +2317,27 @@ closeversion(dns_db_t *db, dns_dbversion_t **versionp, isc_boolean_t commit) {
lock = &rbtdb->node_locks[rbtnode->locknum].lock;
NODE_LOCK(lock, isc_rwlocktype_write);
+ /*
+ * This is a good opportunity to purge any dead nodes,
+ * so use it.
+ */
+ cleanup_dead_nodes(rbtdb, rbtnode->locknum);
+
if (rollback)
rollback_node(rbtnode, serial);
decrement_reference(rbtdb, rbtnode, least_serial,
isc_rwlocktype_write,
- isc_rwlocktype_none);
+ isc_rwlocktype_write, ISC_FALSE);
+
NODE_UNLOCK(lock, isc_rwlocktype_write);
isc_mem_put(rbtdb->common.mctx, changed,
sizeof(*changed));
}
+ RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
}
- end:
+ end:
*versionp = NULL;
}
@@ -1606,6 +2370,7 @@ add_wildcard_magic(dns_rbtdb_t *rbtdb, dns_name_t *name) {
result = dns_rbt_addnode(rbtdb->tree, &foundname, &node);
if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
return (result);
+ node->nsec3 = 0;
node->find_callback = 1;
node->wild = 1;
return (ISC_R_SUCCESS);
@@ -1623,7 +2388,7 @@ add_empty_wildcards(dns_rbtdb_t *rbtdb, dns_name_t *name) {
l = dns_name_countlabels(&rbtdb->common.origin);
i = l + 1;
while (i < n) {
- dns_rbtnode_t *node = NULL; /* dummy */
+ dns_rbtnode_t *node = NULL; /* dummy */
dns_name_getlabelsequence(name, n - i, i, &foundname);
if (dns_name_iswildcard(&foundname)) {
result = add_wildcard_magic(rbtdb, &foundname);
@@ -1633,6 +2398,7 @@ add_empty_wildcards(dns_rbtdb_t *rbtdb, dns_name_t *name) {
&node);
if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
return (result);
+ node->nsec3 = 0;
}
i++;
}
@@ -1678,6 +2444,7 @@ findnode(dns_db_t *db, dns_name_t *name, isc_boolean_t create,
node->locknum = dns_name_hash(&nodename, ISC_TRUE) %
rbtdb->node_lock_count;
#endif
+ node->nsec3 = 0;
add_empty_wildcards(rbtdb, name);
if (dns_name_iswildcard(name)) {
@@ -1692,6 +2459,60 @@ findnode(dns_db_t *db, dns_name_t *name, isc_boolean_t create,
return (result);
}
}
+ reactivate_node(rbtdb, node, locktype);
+ RWUNLOCK(&rbtdb->tree_lock, locktype);
+
+ *nodep = (dns_dbnode_t *)node;
+
+ return (ISC_R_SUCCESS);
+}
+
+static isc_result_t
+findnsec3node(dns_db_t *db, dns_name_t *name, isc_boolean_t create,
+ dns_dbnode_t **nodep)
+{
+ dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
+ dns_rbtnode_t *node = NULL;
+ dns_name_t nodename;
+ isc_result_t result;
+ isc_rwlocktype_t locktype = isc_rwlocktype_read;
+
+ REQUIRE(VALID_RBTDB(rbtdb));
+
+ dns_name_init(&nodename, NULL);
+ RWLOCK(&rbtdb->tree_lock, locktype);
+ result = dns_rbt_findnode(rbtdb->nsec3, name, NULL, &node, NULL,
+ DNS_RBTFIND_EMPTYDATA, NULL, NULL);
+ if (result != ISC_R_SUCCESS) {
+ RWUNLOCK(&rbtdb->tree_lock, locktype);
+ if (!create) {
+ if (result == DNS_R_PARTIALMATCH)
+ result = ISC_R_NOTFOUND;
+ return (result);
+ }
+ /*
+ * It would be nice to try to upgrade the lock instead of
+ * unlocking then relocking.
+ */
+ locktype = isc_rwlocktype_write;
+ RWLOCK(&rbtdb->tree_lock, locktype);
+ node = NULL;
+ result = dns_rbt_addnode(rbtdb->nsec3, name, &node);
+ if (result == ISC_R_SUCCESS) {
+ dns_rbt_namefromnode(node, &nodename);
+#ifdef DNS_RBT_USEHASH
+ node->locknum = node->hashval % rbtdb->node_lock_count;
+#else
+ node->locknum = dns_name_hash(&nodename, ISC_TRUE) %
+ rbtdb->node_lock_count;
+#endif
+ node->nsec3 = 1U;
+ } else if (result != ISC_R_EXISTS) {
+ RWUNLOCK(&rbtdb->tree_lock, locktype);
+ return (result);
+ }
+ } else
+ INSIST(node->nsec3);
NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
new_reference(rbtdb, node);
NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
@@ -1846,7 +2667,7 @@ bind_rdataset(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
rdatasetheader_t *header, isc_stdtime_t now,
dns_rdataset_t *rdataset)
{
- unsigned char *raw; /* RDATASLAB */
+ unsigned char *raw; /* RDATASLAB */
/*
* Caller must be holding the node reader lock.
@@ -1861,16 +2682,18 @@ bind_rdataset(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
new_reference(rbtdb, node);
- INSIST(rdataset->methods == NULL); /* We must be disassociated. */
+ INSIST(rdataset->methods == NULL); /* We must be disassociated. */
rdataset->methods = &rdataset_methods;
rdataset->rdclass = rbtdb->common.rdclass;
rdataset->type = RBTDB_RDATATYPE_BASE(header->type);
rdataset->covers = RBTDB_RDATATYPE_EXT(header->type);
- rdataset->ttl = header->ttl - now;
+ rdataset->ttl = header->rdh_ttl - now;
rdataset->trust = header->trust;
if (NXDOMAIN(header))
rdataset->attributes |= DNS_RDATASETATTR_NXDOMAIN;
+ if (OPTOUT(header))
+ rdataset->attributes |= DNS_RDATASETATTR_OPTOUT;
rdataset->private1 = rbtdb;
rdataset->private2 = node;
raw = (unsigned char *)header + sizeof(*header);
@@ -1891,6 +2714,18 @@ bind_rdataset(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
rdataset->private6 = header->noqname;
if (rdataset->private6 != NULL)
rdataset->attributes |= DNS_RDATASETATTR_NOQNAME;
+ rdataset->private7 = header->closest;
+ if (rdataset->private7 != NULL)
+ rdataset->attributes |= DNS_RDATASETATTR_CLOSEST;
+
+ /*
+ * Copy out re-signing information.
+ */
+ if (RESIGN(header)) {
+ rdataset->attributes |= DNS_RDATASETATTR_RESIGN;
+ rdataset->resign = header->resign;
+ } else
+ rdataset->resign = 0;
}
static inline isc_result_t
@@ -1954,7 +2789,7 @@ static inline isc_boolean_t
valid_glue(rbtdb_search_t *search, dns_name_t *name, rbtdb_rdatatype_t type,
dns_rbtnode_t *node)
{
- unsigned char *raw; /* RDATASLAB */
+ unsigned char *raw; /* RDATASLAB */
unsigned int count, size;
dns_name_t ns_name;
isc_boolean_t valid = ISC_FALSE;
@@ -2338,10 +3173,55 @@ find_wildcard(rbtdb_search_t *search, dns_rbtnode_t **nodep,
return (result);
}
+static isc_boolean_t
+matchparams(rdatasetheader_t *header, rbtdb_search_t *search)
+{
+ dns_rdata_t rdata = DNS_RDATA_INIT;
+ dns_rdata_nsec3_t nsec3;
+ unsigned char *raw; /* RDATASLAB */
+ unsigned int rdlen, count;
+ isc_region_t region;
+ isc_result_t result;
+
+ REQUIRE(header->type == dns_rdatatype_nsec3);
+
+ raw = (unsigned char *)header + sizeof(*header);
+ count = raw[0] * 256 + raw[1]; /* count */
+#if DNS_RDATASET_FIXED
+ raw += count * 4 + 2;
+#else
+ raw += 2;
+#endif
+ while (count-- > 0) {
+ rdlen = raw[0] * 256 + raw[1];
+#if DNS_RDATASET_FIXED
+ raw += 4;
+#else
+ raw += 2;
+#endif
+ region.base = raw;
+ region.length = rdlen;
+ dns_rdata_fromregion(&rdata, search->rbtdb->common.rdclass,
+ dns_rdatatype_nsec3, &region);
+ raw += rdlen;
+ result = dns_rdata_tostruct(&rdata, &nsec3, NULL);
+ INSIST(result == ISC_R_SUCCESS);
+ if (nsec3.hash == search->rbtversion->hash &&
+ nsec3.iterations == search->rbtversion->iterations &&
+ nsec3.salt_length == search->rbtversion->salt_length &&
+ memcmp(nsec3.salt, search->rbtversion->salt,
+ nsec3.salt_length) == 0)
+ return (ISC_TRUE);
+ dns_rdata_reset(&rdata);
+ }
+ return (ISC_FALSE);
+}
+
static inline isc_result_t
find_closest_nsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
dns_name_t *foundname, dns_rdataset_t *rdataset,
- dns_rdataset_t *sigrdataset, isc_boolean_t need_sig)
+ dns_rdataset_t *sigrdataset, dns_rbt_t *tree,
+ dns_db_secure_t secure)
{
dns_rbtnode_t *node;
rdatasetheader_t *header, *header_next, *found, *foundsig;
@@ -2349,7 +3229,22 @@ find_closest_nsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
isc_result_t result;
dns_fixedname_t fname, forigin;
dns_name_t *name, *origin;
+ dns_rdatatype_t type;
+ rbtdb_rdatatype_t sigtype;
+ isc_boolean_t wraps;
+ isc_boolean_t need_sig = ISC_TF(secure == dns_db_secure);
+ if (tree == search->rbtdb->nsec3) {
+ type = dns_rdatatype_nsec3;
+ sigtype = RBTDB_RDATATYPE_SIGNSEC3;
+ wraps = ISC_TRUE;
+ } else {
+ type = dns_rdatatype_nsec;
+ sigtype = RBTDB_RDATATYPE_SIGNSEC;
+ wraps = ISC_FALSE;
+ }
+
+ again:
do {
node = NULL;
dns_fixedname_init(&fname);
@@ -2391,12 +3286,11 @@ find_closest_nsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
* active rdataset at this node.
*/
empty_node = ISC_FALSE;
- if (header->type == dns_rdatatype_nsec) {
+ if (header->type == type) {
found = header;
if (foundsig != NULL)
break;
- } else if (header->type ==
- RBTDB_RDATATYPE_SIGNSEC) {
+ } else if (header->type == sigtype) {
foundsig = header;
if (found != NULL)
break;
@@ -2404,11 +3298,19 @@ find_closest_nsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
}
}
if (!empty_node) {
- if (found != NULL &&
- (foundsig != NULL || !need_sig))
+ if (found != NULL && search->rbtversion->havensec3 &&
+ found->type == dns_rdatatype_nsec3 &&
+ !matchparams(found, search)) {
+ empty_node = ISC_TRUE;
+ found = NULL;
+ foundsig = NULL;
+ result = dns_rbtnodechain_prev(&search->chain,
+ NULL, NULL);
+ } else if (found != NULL &&
+ (foundsig != NULL || !need_sig))
{
/*
- * We've found the right NSEC record.
+ * We've found the right NSEC/NSEC3 record.
*
* Note: for this to really be the right
* NSEC record, it's essential that the NSEC
@@ -2465,6 +3367,15 @@ find_closest_nsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
isc_rwlocktype_read);
} while (empty_node && result == ISC_R_SUCCESS);
+ if (result == ISC_R_NOMORE && wraps) {
+ result = dns_rbtnodechain_last(&search->chain, tree,
+ NULL, NULL);
+ if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
+ wraps = ISC_FALSE;
+ goto again;
+ }
+ }
+
/*
* If the result is ISC_R_NOMORE, then we got to the beginning of
* the database and didn't find a NSEC record. This shouldn't
@@ -2497,7 +3408,7 @@ zone_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
isc_boolean_t active;
dns_rbtnodechain_t chain;
nodelock_t *lock;
-
+ dns_rbt_t *tree;
search.rbtdb = (dns_rbtdb_t *)db;
@@ -2540,7 +3451,9 @@ zone_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
* encounter a callback node, zone_zonecut_callback() will search the
* rdatasets at the zone cut for active DNAME or NS rdatasets.
*/
- result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
+ tree = (options & DNS_DBFIND_FORCENSEC3) != 0 ? search.rbtdb->nsec3 :
+ search.rbtdb->tree;
+ result = dns_rbt_findnode(tree, name, foundname, &node,
&search.chain, DNS_RBTFIND_EMPTYDATA,
zone_zonecut_callback, &search);
@@ -2578,12 +3491,14 @@ zone_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
* If we're here, then the name does not exist, is not
* beneath a zonecut, and there's no matching wildcard.
*/
- if (search.rbtdb->secure ||
- (search.options & DNS_DBFIND_FORCENSEC) != 0)
+ if ((search.rbtversion->secure == dns_db_secure &&
+ !search.rbtversion->havensec3) ||
+ (search.options & DNS_DBFIND_FORCENSEC) != 0 ||
+ (search.options & DNS_DBFIND_FORCENSEC3) != 0)
{
result = find_closest_nsec(&search, nodep, foundname,
- rdataset, sigrdataset,
- search.rbtdb->secure);
+ rdataset, sigrdataset, tree,
+ search.rbtversion->secure);
if (result == ISC_R_SUCCESS)
result = active ? DNS_R_EMPTYNAME :
DNS_R_NXDOMAIN;
@@ -2704,6 +3619,14 @@ zone_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
break;
}
+
+ /*
+ * If the NSEC3 record doesn't match the chain
+ * we are using behave as if it isn't here.
+ */
+ if (header->type == dns_rdatatype_nsec3 &&
+ !matchparams(header, &search))
+ goto partial_match;
/*
* If we found a type we were looking for,
* remember it.
@@ -2748,14 +3671,16 @@ zone_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
*/
if (!maybe_zonecut && found != NULL)
break;
- } else if (header->type == dns_rdatatype_nsec) {
+ } else if (header->type == dns_rdatatype_nsec &&
+ !search.rbtversion->havensec3) {
/*
* Remember a NSEC rdataset even if we're
* not specifically looking for it, because
* we might need it later.
*/
nsecheader = header;
- } else if (header->type == RBTDB_RDATATYPE_SIGNSEC) {
+ } else if (header->type == RBTDB_RDATATYPE_SIGNSEC &&
+ !search.rbtversion->havensec3) {
/*
* If we need the NSEC rdataset, we'll also
* need its signature.
@@ -2807,7 +3732,8 @@ zone_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
* The desired type doesn't exist.
*/
result = DNS_R_NXRRSET;
- if (search.rbtdb->secure &&
+ if (search.rbtversion->secure == dns_db_secure &&
+ !search.rbtversion->havensec3 &&
(nsecheader == NULL || nsecsig == NULL)) {
/*
* The zone is secure but there's no NSEC,
@@ -2822,7 +3748,8 @@ zone_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
NODE_UNLOCK(lock, isc_rwlocktype_read);
result = find_closest_nsec(&search, nodep, foundname,
rdataset, sigrdataset,
- search.rbtdb->secure);
+ search.rbtdb->tree,
+ search.rbtversion->secure);
if (result == ISC_R_SUCCESS)
result = DNS_R_EMPTYWILD;
goto tree_exit;
@@ -2841,7 +3768,8 @@ zone_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
new_reference(search.rbtdb, node);
*nodep = node;
}
- if (search.rbtdb->secure ||
+ if ((search.rbtversion->secure == dns_db_secure &&
+ !search.rbtversion->havensec3) ||
(search.options & DNS_DBFIND_FORCENSEC) != 0)
{
bind_rdataset(search.rbtdb, node, nsecheader,
@@ -2882,6 +3810,7 @@ zone_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
* validated updates.
*/
if (type == dns_rdatatype_nsec ||
+ type == dns_rdatatype_nsec3 ||
type == dns_rdatatype_key)
result = ISC_R_SUCCESS;
else if (type == dns_rdatatype_any)
@@ -2948,7 +3877,8 @@ zone_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
NODE_LOCK(lock, isc_rwlocktype_read);
decrement_reference(search.rbtdb, node, 0,
- isc_rwlocktype_read, isc_rwlocktype_none);
+ isc_rwlocktype_read, isc_rwlocktype_none,
+ ISC_FALSE);
NODE_UNLOCK(lock, isc_rwlocktype_read);
}
@@ -3010,7 +3940,7 @@ cache_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
header_prev = NULL;
for (header = node->data; header != NULL; header = header_next) {
header_next = header->next;
- if (header->ttl <= search->now) {
+ if (header->rdh_ttl <= search->now) {
/*
* This rdataset is stale. If no one else is
* using the node, we can clean it up right
@@ -3018,7 +3948,7 @@ cache_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
* the node as dirty, so it will get cleaned
* up later.
*/
- if ((header->ttl <= search->now - RBTDB_VIRTUAL) &&
+ if ((header->rdh_ttl <= search->now - RBTDB_VIRTUAL) &&
(locktype == isc_rwlocktype_write ||
NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
/*
@@ -3044,13 +3974,16 @@ cache_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
* stale headers first.
*/
mctx = search->rbtdb->common.mctx;
- clean_stale_headers(mctx, header);
+ clean_stale_headers(search->rbtdb,
+ mctx,
+ header);
if (header_prev != NULL)
header_prev->next =
header->next;
else
node->data = header->next;
- free_rdataset(mctx, header);
+ free_rdataset(search->rbtdb, mctx,
+ header);
} else {
header->attributes |=
RDATASET_ATTR_STALE;
@@ -3079,6 +4012,7 @@ cache_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
* search->zonecut_rdataset will still be valid later.
*/
new_reference(search->rbtdb, node);
+ INSIST(!ISC_LINK_LINKED(node, deadlink));
search->zonecut = node;
search->zonecut_rdataset = dname_header;
search->zonecut_sigrdataset = sigdname_header;
@@ -3130,7 +4064,7 @@ find_deepest_zonecut(rbtdb_search_t *search, dns_rbtnode_t *node,
header != NULL;
header = header_next) {
header_next = header->next;
- if (header->ttl <= search->now) {
+ if (header->rdh_ttl <= search->now) {
/*
* This rdataset is stale. If no one else is
* using the node, we can clean it up right
@@ -3138,7 +4072,7 @@ find_deepest_zonecut(rbtdb_search_t *search, dns_rbtnode_t *node,
* the node as dirty, so it will get cleaned
* up later.
*/
- if ((header->ttl <= search->now -
+ if ((header->rdh_ttl <= search->now -
RBTDB_VIRTUAL) &&
(locktype == isc_rwlocktype_write ||
NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
@@ -3153,14 +4087,17 @@ find_deepest_zonecut(rbtdb_search_t *search, dns_rbtnode_t *node,
isc_mem_t *m;
m = search->rbtdb->common.mctx;
- clean_stale_headers(m, header);
+ clean_stale_headers(
+ search->rbtdb,
+ m, header);
if (header_prev != NULL)
header_prev->next =
header->next;
else
node->data =
header->next;
- free_rdataset(m, header);
+ free_rdataset(rbtdb, m,
+ header);
} else {
header->attributes |=
RDATASET_ATTR_STALE;
@@ -3229,6 +4166,23 @@ find_deepest_zonecut(rbtdb_search_t *search, dns_rbtnode_t *node,
if (foundsig != NULL)
bind_rdataset(search->rbtdb, node, foundsig,
search->now, sigrdataset);
+ if (need_headerupdate(found, search->now) ||
+ (foundsig != NULL &&
+ need_headerupdate(foundsig, search->now))) {
+ if (locktype != isc_rwlocktype_write) {
+ NODE_UNLOCK(lock, locktype);
+ NODE_LOCK(lock, isc_rwlocktype_write);
+ locktype = isc_rwlocktype_write;
+ }
+ if (need_headerupdate(found, search->now))
+ update_header(search->rbtdb, found,
+ search->now);
+ if (foundsig != NULL &&
+ need_headerupdate(foundsig, search->now)) {
+ update_header(search->rbtdb, foundsig,
+ search->now);
+ }
+ }
}
node_exit:
@@ -3286,7 +4240,7 @@ find_coveringnsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
header != NULL;
header = header_next) {
header_next = header->next;
- if (header->ttl <= now) {
+ if (header->rdh_ttl <= now) {
/*
* This rdataset is stale. If no one else is
* using the node, we can clean it up right
@@ -3294,7 +4248,7 @@ find_coveringnsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
* node as dirty, so it will get cleaned up
* later.
*/
- if ((header->ttl <= now - RBTDB_VIRTUAL) &&
+ if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
(locktype == isc_rwlocktype_write ||
NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
/*
@@ -3308,13 +4262,16 @@ find_coveringnsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
isc_mem_t *m;
m = search->rbtdb->common.mctx;
- clean_stale_headers(m, header);
+ clean_stale_headers(
+ search->rbtdb,
+ m, header);
if (header_prev != NULL)
header_prev->next =
header->next;
else
node->data = header->next;
- free_rdataset(m, header);
+ free_rdataset(search->rbtdb, m,
+ header);
} else {
header->attributes |=
RDATASET_ATTR_STALE;
@@ -3377,6 +4334,7 @@ cache_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
rdatasetheader_t *header, *header_prev, *header_next;
rdatasetheader_t *found, *nsheader;
rdatasetheader_t *foundsig, *nssig, *cnamesig;
+ rdatasetheader_t *update, *updatesig;
rbtdb_rdatatype_t sigtype, negtype;
UNUSED(version);
@@ -3399,6 +4357,8 @@ cache_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
dns_fixedname_init(&search.zonecut_name);
dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
search.now = now;
+ update = NULL;
+ updatesig = NULL;
RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
@@ -3462,14 +4422,14 @@ cache_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
header_prev = NULL;
for (header = node->data; header != NULL; header = header_next) {
header_next = header->next;
- if (header->ttl <= now) {
+ if (header->rdh_ttl <= now) {
/*
* This rdataset is stale. If no one else is using the
* node, we can clean it up right now, otherwise we
* mark it as stale, and the node as dirty, so it will
* get cleaned up later.
*/
- if ((header->ttl <= now - RBTDB_VIRTUAL) &&
+ if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
(locktype == isc_rwlocktype_write ||
NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
/*
@@ -3482,13 +4442,15 @@ cache_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
isc_mem_t *mctx;
mctx = search.rbtdb->common.mctx;
- clean_stale_headers(mctx, header);
+ clean_stale_headers(search.rbtdb, mctx,
+ header);
if (header_prev != NULL)
header_prev->next =
header->next;
else
node->data = header->next;
- free_rdataset(mctx, header);
+ free_rdataset(search.rbtdb, mctx,
+ header);
} else {
header->attributes |=
RDATASET_ATTR_STALE;
@@ -3595,13 +4557,19 @@ cache_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
if (nsheader != NULL) {
if (nodep != NULL) {
new_reference(search.rbtdb, node);
+ INSIST(!ISC_LINK_LINKED(node, deadlink));
*nodep = node;
}
bind_rdataset(search.rbtdb, node, nsheader, search.now,
rdataset);
- if (nssig != NULL)
+ if (need_headerupdate(nsheader, search.now))
+ update = nsheader;
+ if (nssig != NULL) {
bind_rdataset(search.rbtdb, node, nssig,
search.now, sigrdataset);
+ if (need_headerupdate(nssig, search.now))
+ updatesig = nssig;
+ }
result = DNS_R_DELEGATION;
goto node_exit;
}
@@ -3619,6 +4587,7 @@ cache_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
if (nodep != NULL) {
new_reference(search.rbtdb, node);
+ INSIST(!ISC_LINK_LINKED(node, deadlink));
*nodep = node;
}
@@ -3650,12 +4619,28 @@ cache_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
result == DNS_R_NCACHENXRRSET) {
bind_rdataset(search.rbtdb, node, found, search.now,
rdataset);
- if (foundsig != NULL)
+ if (need_headerupdate(found, search.now))
+ update = found;
+ if (foundsig != NULL) {
bind_rdataset(search.rbtdb, node, foundsig, search.now,
sigrdataset);
+ if (need_headerupdate(foundsig, search.now))
+ updatesig = foundsig;
+ }
}
node_exit:
+ if ((update != NULL || updatesig != NULL) &&
+ locktype != isc_rwlocktype_write) {
+ NODE_UNLOCK(lock, locktype);
+ NODE_LOCK(lock, isc_rwlocktype_write);
+ locktype = isc_rwlocktype_write;
+ }
+ if (update != NULL && need_headerupdate(update, search.now))
+ update_header(search.rbtdb, update, search.now);
+ if (updatesig != NULL && need_headerupdate(updatesig, search.now))
+ update_header(search.rbtdb, updatesig, search.now);
+
NODE_UNLOCK(lock, locktype);
tree_exit:
@@ -3671,7 +4656,8 @@ cache_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
NODE_LOCK(lock, isc_rwlocktype_read);
decrement_reference(search.rbtdb, node, 0,
- isc_rwlocktype_read, isc_rwlocktype_none);
+ isc_rwlocktype_read, isc_rwlocktype_none,
+ ISC_FALSE);
NODE_UNLOCK(lock, isc_rwlocktype_read);
}
@@ -3745,14 +4731,14 @@ cache_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
header_prev = NULL;
for (header = node->data; header != NULL; header = header_next) {
header_next = header->next;
- if (header->ttl <= now) {
+ if (header->rdh_ttl <= now) {
/*
* This rdataset is stale. If no one else is using the
* node, we can clean it up right now, otherwise we
* mark it as stale, and the node as dirty, so it will
* get cleaned up later.
*/
- if ((header->ttl <= now - RBTDB_VIRTUAL) &&
+ if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
(locktype == isc_rwlocktype_write ||
NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
/*
@@ -3765,13 +4751,15 @@ cache_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
isc_mem_t *mctx;
mctx = search.rbtdb->common.mctx;
- clean_stale_headers(mctx, header);
+ clean_stale_headers(search.rbtdb, mctx,
+ header);
if (header_prev != NULL)
header_prev->next =
header->next;
else
node->data = header->next;
- free_rdataset(mctx, header);
+ free_rdataset(search.rbtdb, mctx,
+ header);
} else {
header->attributes |=
RDATASET_ATTR_STALE;
@@ -3814,6 +4802,7 @@ cache_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
if (nodep != NULL) {
new_reference(search.rbtdb, node);
+ INSIST(!ISC_LINK_LINKED(node, deadlink));
*nodep = node;
}
@@ -3822,6 +4811,21 @@ cache_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
bind_rdataset(search.rbtdb, node, foundsig, search.now,
sigrdataset);
+ if (need_headerupdate(found, search.now) ||
+ (foundsig != NULL && need_headerupdate(foundsig, search.now))) {
+ if (locktype != isc_rwlocktype_write) {
+ NODE_UNLOCK(lock, locktype);
+ NODE_LOCK(lock, isc_rwlocktype_write);
+ locktype = isc_rwlocktype_write;
+ }
+ if (need_headerupdate(found, search.now))
+ update_header(search.rbtdb, found, search.now);
+ if (foundsig != NULL &&
+ need_headerupdate(foundsig, search.now)) {
+ update_header(search.rbtdb, foundsig, search.now);
+ }
+ }
+
NODE_UNLOCK(lock, locktype);
tree_exit:
@@ -3871,7 +4875,7 @@ detachnode(dns_db_t *db, dns_dbnode_t **targetp) {
NODE_LOCK(&nodelock->lock, isc_rwlocktype_read);
if (decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
- isc_rwlocktype_none)) {
+ isc_rwlocktype_none, ISC_FALSE)) {
if (isc_refcount_current(&nodelock->references) == 0 &&
nodelock->exiting) {
inactive = ISC_TRUE;
@@ -3938,8 +4942,8 @@ expirenode(dns_db_t *db, dns_dbnode_t *node, isc_stdtime_t now) {
/*
* Note that 'log' can be true IFF rbtdb->overmem is also true.
- * rbtdb->ovemem can currently only be true for cache databases
- * -- hence all of the "overmem cache" log strings.
+ * rbtdb->overmem can currently only be true for cache
+ * databases -- hence all of the "overmem cache" log strings.
*/
log = ISC_TF(isc_log_wouldlog(dns_lctx, level));
if (log)
@@ -3959,7 +4963,7 @@ expirenode(dns_db_t *db, dns_dbnode_t *node, isc_stdtime_t now) {
isc_rwlocktype_write);
for (header = rbtnode->data; header != NULL; header = header->next)
- if (header->ttl <= now - RBTDB_VIRTUAL) {
+ if (header->rdh_ttl <= now - RBTDB_VIRTUAL) {
/*
* We don't check if refcurrent(rbtnode) == 0 and try
* to free like we do in cache_find(), because
@@ -3974,7 +4978,7 @@ expirenode(dns_db_t *db, dns_dbnode_t *node, isc_stdtime_t now) {
printname);
} else if (force_expire) {
if (! RETAIN(header)) {
- header->ttl = 0;
+ set_ttl(rbtdb, header, 0);
header->attributes |= RDATASET_ATTR_STALE;
rbtnode->dirty = 1;
} else if (log) {
@@ -3997,9 +5001,8 @@ static void
overmem(dns_db_t *db, isc_boolean_t overmem) {
dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
- if (IS_CACHE(rbtdb)) {
+ if (IS_CACHE(rbtdb))
rbtdb->overmem = overmem;
- }
}
static void
@@ -4030,11 +5033,13 @@ printnode(dns_db_t *db, dns_dbnode_t *node, FILE *out) {
first = ISC_FALSE;
fprintf(out,
"\tserial = %lu, ttl = %u, "
- "trust = %u, attributes = %u\n",
+ "trust = %u, attributes = %u, "
+ "resign = %u\n",
(unsigned long)current->serial,
- current->ttl,
+ current->rdh_ttl,
current->trust,
- current->attributes);
+ current->attributes,
+ current->resign);
current = current->down;
} while (current != NULL);
}
@@ -4046,8 +5051,7 @@ printnode(dns_db_t *db, dns_dbnode_t *node, FILE *out) {
}
static isc_result_t
-createiterator(dns_db_t *db, isc_boolean_t relative_names,
- dns_dbiterator_t **iteratorp)
+createiterator(dns_db_t *db, unsigned int options, dns_dbiterator_t **iteratorp)
{
dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
rbtdb_dbiterator_t *rbtdbiter;
@@ -4061,7 +5065,8 @@ createiterator(dns_db_t *db, isc_boolean_t relative_names,
rbtdbiter->common.methods = &dbiterator_methods;
rbtdbiter->common.db = NULL;
dns_db_attach(db, &rbtdbiter->common.db);
- rbtdbiter->common.relative_names = relative_names;
+ rbtdbiter->common.relative_names =
+ ISC_TF((options & DNS_DB_RELATIVENAMES) != 0);
rbtdbiter->common.magic = DNS_DBITERATOR_MAGIC;
rbtdbiter->common.cleaning = ISC_FALSE;
rbtdbiter->paused = ISC_TRUE;
@@ -4071,8 +5076,15 @@ createiterator(dns_db_t *db, isc_boolean_t relative_names,
dns_fixedname_init(&rbtdbiter->origin);
rbtdbiter->node = NULL;
rbtdbiter->delete = 0;
+ rbtdbiter->nsec3only = ISC_TF((options & DNS_DB_NSEC3ONLY) != 0);
+ rbtdbiter->nonsec3 = ISC_TF((options & DNS_DB_NONSEC3) != 0);
memset(rbtdbiter->deletions, 0, sizeof(rbtdbiter->deletions));
dns_rbtnodechain_init(&rbtdbiter->chain, db->mctx);
+ dns_rbtnodechain_init(&rbtdbiter->nsec3chain, db->mctx);
+ if (rbtdbiter->nsec3only)
+ rbtdbiter->current = &rbtdbiter->nsec3chain;
+ else
+ rbtdbiter->current = &rbtdbiter->chain;
*iteratorp = (dns_dbiterator_t *)rbtdbiter;
@@ -4204,8 +5216,8 @@ cache_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
for (header = rbtnode->data; header != NULL; header = header_next) {
header_next = header->next;
- if (header->ttl <= now) {
- if ((header->ttl <= now - RBTDB_VIRTUAL) &&
+ if (header->rdh_ttl <= now) {
+ if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
(locktype == isc_rwlocktype_write ||
NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
/*
@@ -4355,19 +5367,15 @@ cname_and_other_data(dns_rbtnode_t *node, rbtdb_serial_t serial) {
* Look for active extant "other data".
*
* "Other data" is any rdataset whose type is not
- * KEY, RRSIG KEY, NSEC, RRSIG NSEC or RRSIG CNAME.
+ * KEY, NSEC, SIG or RRSIG.
*/
rdtype = RBTDB_RDATATYPE_BASE(header->type);
- if (rdtype == dns_rdatatype_rrsig ||
- rdtype == dns_rdatatype_sig)
- rdtype = RBTDB_RDATATYPE_EXT(header->type);
- if (rdtype != dns_rdatatype_nsec &&
- rdtype != dns_rdatatype_key &&
- rdtype != dns_rdatatype_cname) {
+ if (rdtype != dns_rdatatype_key &&
+ rdtype != dns_rdatatype_sig &&
+ rdtype != dns_rdatatype_nsec &&
+ rdtype != dns_rdatatype_rrsig) {
/*
- * We've found a type that isn't
- * NSEC, KEY, CNAME, or one of their
- * signatures. Is it active and extant?
+ * Is it active and extant?
*/
do {
if (header->serial <= serial &&
@@ -4395,6 +5403,16 @@ cname_and_other_data(dns_rbtnode_t *node, rbtdb_serial_t serial) {
}
static isc_result_t
+resign_insert(dns_rbtdb_t *rbtdb, int idx, rdatasetheader_t *newheader) {
+ isc_result_t result;
+
+ INSIST(newheader->heap_index == 0);
+ INSIST(!ISC_LINK_LINKED(newheader, lru_link));
+ result = isc_heap_insert(rbtdb->heaps[idx], newheader);
+ return (result);
+}
+
+static isc_result_t
add(dns_rbtdb_t *rbtdb, dns_rbtnode_t *rbtnode, rbtdb_version_t *rbtversion,
rdatasetheader_t *newheader, unsigned int options, isc_boolean_t loading,
dns_rdataset_t *addedrdataset, isc_stdtime_t now)
@@ -4409,6 +5427,7 @@ add(dns_rbtdb_t *rbtdb, dns_rbtnode_t *rbtnode, rbtdb_version_t *rbtversion,
dns_rdatatype_t rdtype, covers;
rbtdb_rdatatype_t negtype;
dns_trust_t trust;
+ int idx;
/*
* Add an rdatasetheader_t to a node.
@@ -4437,7 +5456,7 @@ add(dns_rbtdb_t *rbtdb, dns_rbtnode_t *rbtnode, rbtdb_version_t *rbtversion,
*/
changed = add_changed(rbtdb, rbtversion, rbtnode);
if (changed == NULL) {
- free_rdataset(rbtdb->common.mctx, newheader);
+ free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
return (ISC_R_NOMEMORY);
}
}
@@ -4466,7 +5485,7 @@ add(dns_rbtdb_t *rbtdb, dns_rbtnode_t *rbtnode, rbtdb_version_t *rbtversion,
for (topheader = rbtnode->data;
topheader != NULL;
topheader = topheader->next) {
- topheader->ttl = 0;
+ set_ttl(rbtdb, topheader, 0);
topheader->attributes |=
RDATASET_ATTR_STALE;
}
@@ -4489,7 +5508,7 @@ add(dns_rbtdb_t *rbtdb, dns_rbtnode_t *rbtnode, rbtdb_version_t *rbtversion,
break;
}
if (topheader != NULL && EXISTS(topheader) &&
- topheader->ttl > now) {
+ topheader->rdh_ttl > now) {
/*
* Found one.
*/
@@ -4498,8 +5517,8 @@ add(dns_rbtdb_t *rbtdb, dns_rbtnode_t *rbtnode, rbtdb_version_t *rbtversion,
* The NXDOMAIN/NODATA(QTYPE=ANY)
* is more trusted.
*/
-
- free_rdataset(rbtdb->common.mctx,
+ free_rdataset(rbtdb,
+ rbtdb->common.mctx,
newheader);
if (addedrdataset != NULL)
bind_rdataset(rbtdb, rbtnode,
@@ -4511,7 +5530,7 @@ add(dns_rbtdb_t *rbtdb, dns_rbtnode_t *rbtnode, rbtdb_version_t *rbtversion,
* The new rdataset is better. Expire the
* NXDOMAIN/NODATA(QTYPE=ANY).
*/
- topheader->ttl = 0;
+ set_ttl(rbtdb, topheader, 0);
topheader->attributes |= RDATASET_ATTR_STALE;
rbtnode->dirty = 1;
topheader = NULL;
@@ -4546,7 +5565,7 @@ add(dns_rbtdb_t *rbtdb, dns_rbtnode_t *rbtnode, rbtdb_version_t *rbtversion,
* Deleting an already non-existent rdataset has no effect.
*/
if (header_nx && newheader_nx) {
- free_rdataset(rbtdb->common.mctx, newheader);
+ free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
return (DNS_R_UNCHANGED);
}
@@ -4555,8 +5574,8 @@ add(dns_rbtdb_t *rbtdb, dns_rbtnode_t *rbtnode, rbtdb_version_t *rbtversion,
* has no effect, provided that the cache data isn't stale.
*/
if (rbtversion == NULL && trust < header->trust &&
- (header->ttl > now || header_nx)) {
- free_rdataset(rbtdb->common.mctx, newheader);
+ (header->rdh_ttl > now || header_nx)) {
+ free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
if (addedrdataset != NULL)
bind_rdataset(rbtdb, rbtnode, header, now,
addedrdataset);
@@ -4582,9 +5601,9 @@ add(dns_rbtdb_t *rbtdb, dns_rbtnode_t *rbtnode, rbtdb_version_t *rbtversion,
if ((options & DNS_DBADD_EXACT) != 0)
flags |= DNS_RDATASLAB_EXACT;
if ((options & DNS_DBADD_EXACTTTL) != 0 &&
- newheader->ttl != header->ttl)
+ newheader->rdh_ttl != header->rdh_ttl)
result = DNS_R_NOTEXACT;
- else if (newheader->ttl != header->ttl)
+ else if (newheader->rdh_ttl != header->rdh_ttl)
flags |= DNS_RDATASLAB_FORCE;
if (result == ISC_R_SUCCESS)
result = dns_rdataslab_merge(
@@ -4604,10 +5623,16 @@ add(dns_rbtdb_t *rbtdb, dns_rbtnode_t *rbtnode, rbtdb_version_t *rbtversion,
* alone. It will get cleaned up when
* clean_zone_node() runs.
*/
- free_rdataset(rbtdb->common.mctx, newheader);
+ free_rdataset(rbtdb, rbtdb->common.mctx,
+ newheader);
newheader = (rdatasetheader_t *)merged;
+ if (loading && RESIGN(newheader) &&
+ RESIGN(header) &&
+ header->resign < newheader->resign)
+ newheader->resign = header->resign;
} else {
- free_rdataset(rbtdb->common.mctx, newheader);
+ free_rdataset(rbtdb, rbtdb->common.mctx,
+ newheader);
return (result);
}
}
@@ -4618,7 +5643,7 @@ add(dns_rbtdb_t *rbtdb, dns_rbtnode_t *rbtnode, rbtdb_version_t *rbtversion,
* Don't lower trust of existing record if the
* update is forced.
*/
- if (IS_CACHE(rbtdb) && header->ttl > now &&
+ if (IS_CACHE(rbtdb) && header->rdh_ttl > now &&
header->type == dns_rdatatype_ns &&
!header_nx && !newheader_nx &&
header->trust >= newheader->trust &&
@@ -4631,20 +5656,25 @@ add(dns_rbtdb_t *rbtdb, dns_rbtnode_t *rbtnode, rbtdb_version_t *rbtversion,
* Honour the new ttl if it is less than the
* older one.
*/
- if (header->ttl > newheader->ttl)
- header->ttl = newheader->ttl;
+ if (header->rdh_ttl > newheader->rdh_ttl)
+ set_ttl(rbtdb, header, newheader->rdh_ttl);
if (header->noqname == NULL &&
newheader->noqname != NULL) {
header->noqname = newheader->noqname;
newheader->noqname = NULL;
}
- free_rdataset(rbtdb->common.mctx, newheader);
+ if (header->closest == NULL &&
+ newheader->closest != NULL) {
+ header->closest = newheader->closest;
+ newheader->closest = NULL;
+ }
+ free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
if (addedrdataset != NULL)
bind_rdataset(rbtdb, rbtnode, header, now,
addedrdataset);
return (ISC_R_SUCCESS);
}
- if (IS_CACHE(rbtdb) && header->ttl > now &&
+ if (IS_CACHE(rbtdb) && header->rdh_ttl > now &&
(header->type == dns_rdatatype_a ||
header->type == dns_rdatatype_aaaa) &&
!header_nx && !newheader_nx &&
@@ -4656,14 +5686,19 @@ add(dns_rbtdb_t *rbtdb, dns_rbtnode_t *rbtnode, rbtdb_version_t *rbtversion,
* Honour the new ttl if it is less than the
* older one.
*/
- if (header->ttl > newheader->ttl)
- header->ttl = newheader->ttl;
+ if (header->rdh_ttl > newheader->rdh_ttl)
+ set_ttl(rbtdb, header, newheader->rdh_ttl);
if (header->noqname == NULL &&
newheader->noqname != NULL) {
header->noqname = newheader->noqname;
newheader->noqname = NULL;
}
- free_rdataset(rbtdb->common.mctx, newheader);
+ if (header->closest == NULL &&
+ newheader->closest != NULL) {
+ header->closest = newheader->closest;
+ newheader->closest = NULL;
+ }
+ free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
if (addedrdataset != NULL)
bind_rdataset(rbtdb, rbtnode, header, now,
addedrdataset);
@@ -4684,7 +5719,7 @@ add(dns_rbtdb_t *rbtdb, dns_rbtnode_t *rbtnode, rbtdb_version_t *rbtversion,
* loading, we MUST clean up 'header' now.
*/
newheader->down = NULL;
- free_rdataset(rbtdb->common.mctx, header);
+ free_rdataset(rbtdb, rbtdb->common.mctx, header);
} else {
newheader->down = topheader;
topheader->next = newheader;
@@ -4692,9 +5727,23 @@ add(dns_rbtdb_t *rbtdb, dns_rbtnode_t *rbtnode, rbtdb_version_t *rbtversion,
if (changed != NULL)
changed->dirty = ISC_TRUE;
if (rbtversion == NULL) {
- header->ttl = 0;
+ set_ttl(rbtdb, header, 0);
header->attributes |= RDATASET_ATTR_STALE;
}
+ idx = newheader->node->locknum;
+ if (IS_CACHE(rbtdb)) {
+ ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
+ newheader, lru_link);
+ /*
+ * XXXMLG We don't check the return value
+ * here. If it fails, we will not do TTL
+ * based expiry on this node. However, we
+ * will do it on the LRU side, so memory
+ * will not leak... for long.
+ */
+ isc_heap_insert(rbtdb->heaps[idx], newheader);
+ } else if (RESIGN(newheader))
+ resign_insert(rbtdb, idx, newheader);
}
} else {
/*
@@ -4706,7 +5755,7 @@ add(dns_rbtdb_t *rbtdb, dns_rbtnode_t *rbtnode, rbtdb_version_t *rbtversion,
* If we're trying to delete the type, don't bother.
*/
if (newheader_nx) {
- free_rdataset(rbtdb->common.mctx, newheader);
+ free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
return (DNS_R_UNCHANGED);
}
@@ -4740,6 +5789,14 @@ add(dns_rbtdb_t *rbtdb, dns_rbtnode_t *rbtnode, rbtdb_version_t *rbtversion,
newheader->down = NULL;
rbtnode->data = newheader;
}
+ idx = newheader->node->locknum;
+ if (IS_CACHE(rbtdb)) {
+ ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
+ newheader, lru_link);
+ isc_heap_insert(rbtdb->heaps[idx], newheader);
+ } else if (RESIGN(newheader)) {
+ resign_insert(rbtdb, idx, newheader);
+ }
}
/*
@@ -4778,15 +5835,15 @@ addnoqname(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
struct noqname *noqname;
isc_mem_t *mctx = rbtdb->common.mctx;
dns_name_t name;
- dns_rdataset_t nsec, nsecsig;
+ dns_rdataset_t neg, negsig;
isc_result_t result;
isc_region_t r;
dns_name_init(&name, NULL);
- dns_rdataset_init(&nsec);
- dns_rdataset_init(&nsecsig);
+ dns_rdataset_init(&neg);
+ dns_rdataset_init(&negsig);
- result = dns_rdataset_getnoqname(rdataset, &name, &nsec, &nsecsig);
+ result = dns_rdataset_getnoqname(rdataset, &name, &neg, &negsig);
RUNTIME_CHECK(result == ISC_R_SUCCESS);
noqname = isc_mem_get(mctx, sizeof(*noqname));
@@ -4795,31 +5852,84 @@ addnoqname(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
goto cleanup;
}
dns_name_init(&noqname->name, NULL);
- noqname->nsec = NULL;
- noqname->nsecsig = NULL;
+ noqname->neg = NULL;
+ noqname->negsig = NULL;
+ noqname->type = neg.type;
result = dns_name_dup(&name, mctx, &noqname->name);
if (result != ISC_R_SUCCESS)
goto cleanup;
- result = dns_rdataslab_fromrdataset(&nsec, mctx, &r, 0);
+ result = dns_rdataslab_fromrdataset(&neg, mctx, &r, 0);
if (result != ISC_R_SUCCESS)
goto cleanup;
- noqname->nsec = r.base;
- result = dns_rdataslab_fromrdataset(&nsecsig, mctx, &r, 0);
+ noqname->neg = r.base;
+ result = dns_rdataslab_fromrdataset(&negsig, mctx, &r, 0);
if (result != ISC_R_SUCCESS)
goto cleanup;
- noqname->nsecsig = r.base;
- dns_rdataset_disassociate(&nsec);
- dns_rdataset_disassociate(&nsecsig);
+ noqname->negsig = r.base;
+ dns_rdataset_disassociate(&neg);
+ dns_rdataset_disassociate(&negsig);
newheader->noqname = noqname;
return (ISC_R_SUCCESS);
cleanup:
- dns_rdataset_disassociate(&nsec);
- dns_rdataset_disassociate(&nsecsig);
+ dns_rdataset_disassociate(&neg);
+ dns_rdataset_disassociate(&negsig);
free_noqname(mctx, &noqname);
return(result);
}
+static inline isc_result_t
+addclosest(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
+ dns_rdataset_t *rdataset)
+{
+ struct noqname *closest;
+ isc_mem_t *mctx = rbtdb->common.mctx;
+ dns_name_t name;
+ dns_rdataset_t neg, negsig;
+ isc_result_t result;
+ isc_region_t r;
+
+ dns_name_init(&name, NULL);
+ dns_rdataset_init(&neg);
+ dns_rdataset_init(&negsig);
+
+ result = dns_rdataset_getclosest(rdataset, &name, &neg, &negsig);
+ RUNTIME_CHECK(result == ISC_R_SUCCESS);
+
+ closest = isc_mem_get(mctx, sizeof(*closest));
+ if (closest == NULL) {
+ result = ISC_R_NOMEMORY;
+ goto cleanup;
+ }
+ dns_name_init(&closest->name, NULL);
+ closest->neg = NULL;
+ closest->negsig = NULL;
+ closest->type = neg.type;
+ result = dns_name_dup(&name, mctx, &closest->name);
+ if (result != ISC_R_SUCCESS)
+ goto cleanup;
+ result = dns_rdataslab_fromrdataset(&neg, mctx, &r, 0);
+ if (result != ISC_R_SUCCESS)
+ goto cleanup;
+ closest->neg = r.base;
+ result = dns_rdataslab_fromrdataset(&negsig, mctx, &r, 0);
+ if (result != ISC_R_SUCCESS)
+ goto cleanup;
+ closest->negsig = r.base;
+ dns_rdataset_disassociate(&neg);
+ dns_rdataset_disassociate(&negsig);
+ newheader->closest = closest;
+ return (ISC_R_SUCCESS);
+
+ cleanup:
+ dns_rdataset_disassociate(&neg);
+ dns_rdataset_disassociate(&negsig);
+ free_noqname(mctx, &closest);
+ return(result);
+}
+
+static dns_dbmethods_t zone_methods;
+
static isc_result_t
addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
isc_stdtime_t now, dns_rdataset_t *rdataset, unsigned int options,
@@ -4830,11 +5940,21 @@ addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
rbtdb_version_t *rbtversion = version;
isc_region_t region;
rdatasetheader_t *newheader;
+ rdatasetheader_t *header;
isc_result_t result;
isc_boolean_t delegating;
+ isc_boolean_t tree_locked = ISC_FALSE;
REQUIRE(VALID_RBTDB(rbtdb));
+ if (rbtdb->common.methods == &zone_methods)
+ REQUIRE(((rbtnode->nsec3 &&
+ (rdataset->type == dns_rdatatype_nsec3 ||
+ rdataset->covers == dns_rdatatype_nsec3)) ||
+ (!rbtnode->nsec3 &&
+ rdataset->type != dns_rdatatype_nsec3 &&
+ rdataset->covers != dns_rdatatype_nsec3)));
+
if (rbtversion == NULL) {
if (now == 0)
isc_stdtime_get(&now);
@@ -4848,26 +5968,48 @@ addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
return (result);
newheader = (rdatasetheader_t *)region.base;
- newheader->ttl = rdataset->ttl + now;
+ init_rdataset(rbtdb, newheader);
+ set_ttl(rbtdb, newheader, rdataset->ttl + now);
newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
rdataset->covers);
newheader->attributes = 0;
newheader->noqname = NULL;
+ newheader->closest = NULL;
newheader->count = init_count++;
newheader->trust = rdataset->trust;
newheader->additional_auth = NULL;
newheader->additional_glue = NULL;
+ newheader->last_used = now;
+ newheader->node = rbtnode;
if (rbtversion != NULL) {
newheader->serial = rbtversion->serial;
now = 0;
+
+ if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
+ newheader->attributes |= RDATASET_ATTR_RESIGN;
+ newheader->resign = rdataset->resign;
+ } else
+ newheader->resign = 0;
} else {
newheader->serial = 1;
+ newheader->resign = 0;
if ((rdataset->attributes & DNS_RDATASETATTR_NXDOMAIN) != 0)
newheader->attributes |= RDATASET_ATTR_NXDOMAIN;
+ if ((rdataset->attributes & DNS_RDATASETATTR_OPTOUT) != 0)
+ newheader->attributes |= RDATASET_ATTR_OPTOUT;
if ((rdataset->attributes & DNS_RDATASETATTR_NOQNAME) != 0) {
result = addnoqname(rbtdb, newheader, rdataset);
if (result != ISC_R_SUCCESS) {
- free_rdataset(rbtdb->common.mctx, newheader);
+ free_rdataset(rbtdb, rbtdb->common.mctx,
+ newheader);
+ return (result);
+ }
+ }
+ if ((rdataset->attributes & DNS_RDATASETATTR_CLOSEST) != 0) {
+ result = addclosest(rbtdb, newheader, rdataset);
+ if (result != ISC_R_SUCCESS) {
+ free_rdataset(rbtdb, rbtdb->common.mctx,
+ newheader);
return (result);
}
}
@@ -4876,18 +6018,54 @@ addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
/*
* If we're adding a delegation type (e.g. NS or DNAME for a zone,
* just DNAME for the cache), then we need to set the callback bit
- * on the node, and to do that we must be holding an exclusive lock
- * on the tree.
+ * on the node.
*/
- if (delegating_type(rbtdb, rbtnode, rdataset->type)) {
+ if (delegating_type(rbtdb, rbtnode, rdataset->type))
delegating = ISC_TRUE;
- RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
- } else
+ else
delegating = ISC_FALSE;
+ /*
+ * If we're adding a delegation type or the DB is a cache in an overmem
+ * state, hold an exclusive lock on the tree. In the latter case
+ * the lock does not necessarily have to be acquired but it will help
+ * purge stale entries more effectively.
+ */
+ if (delegating || (IS_CACHE(rbtdb) && rbtdb->overmem)) {
+ tree_locked = ISC_TRUE;
+ RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
+ }
+
+ if (IS_CACHE(rbtdb) && rbtdb->overmem)
+ overmem_purge(rbtdb, rbtnode->locknum, now, tree_locked);
+
NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
isc_rwlocktype_write);
+ if (rbtdb->rrsetstats != NULL) {
+ newheader->attributes |= RDATASET_ATTR_STATCOUNT;
+ update_rrsetstats(rbtdb, newheader, ISC_TRUE);
+ }
+
+ if (IS_CACHE(rbtdb)) {
+ if (tree_locked)
+ cleanup_dead_nodes(rbtdb, rbtnode->locknum);
+
+ header = isc_heap_element(rbtdb->heaps[rbtnode->locknum], 1);
+ if (header && header->rdh_ttl <= now - RBTDB_VIRTUAL)
+ expire_header(rbtdb, header, tree_locked);
+
+ /*
+ * If we've been holding a write lock on the tree just for
+ * cleaning, we can release it now. However, we still need the
+ * node lock.
+ */
+ if (tree_locked && !delegating) {
+ RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
+ tree_locked = ISC_FALSE;
+ }
+ }
+
result = add(rbtdb, rbtnode, rbtversion, newheader, options, ISC_FALSE,
addedrdataset, now);
if (result == ISC_R_SUCCESS && delegating)
@@ -4896,15 +6074,15 @@ addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
isc_rwlocktype_write);
- if (delegating)
+ if (tree_locked)
RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
/*
* Update the zone's secure status. If version is non-NULL
- * this is defered until closeversion() is called.
+ * this is deferred until closeversion() is called.
*/
if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
- rbtdb->secure = iszonesecure(db, rbtdb->origin_node);
+ iszonesecure(db, version, rbtdb->origin_node);
return (result);
}
@@ -4925,29 +6103,46 @@ subtractrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
REQUIRE(VALID_RBTDB(rbtdb));
+ if (rbtdb->common.methods == &zone_methods)
+ REQUIRE(((rbtnode->nsec3 &&
+ (rdataset->type == dns_rdatatype_nsec3 ||
+ rdataset->covers == dns_rdatatype_nsec3)) ||
+ (!rbtnode->nsec3 &&
+ rdataset->type != dns_rdatatype_nsec3 &&
+ rdataset->covers != dns_rdatatype_nsec3)));
+
result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
&region,
sizeof(rdatasetheader_t));
if (result != ISC_R_SUCCESS)
return (result);
newheader = (rdatasetheader_t *)region.base;
- newheader->ttl = rdataset->ttl;
+ init_rdataset(rbtdb, newheader);
+ set_ttl(rbtdb, newheader, rdataset->ttl);
newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
rdataset->covers);
newheader->attributes = 0;
newheader->serial = rbtversion->serial;
newheader->trust = 0;
newheader->noqname = NULL;
+ newheader->closest = NULL;
newheader->count = init_count++;
newheader->additional_auth = NULL;
newheader->additional_glue = NULL;
+ newheader->last_used = 0;
+ newheader->node = rbtnode;
+ if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
+ newheader->attributes |= RDATASET_ATTR_RESIGN;
+ newheader->resign = rdataset->resign;
+ } else
+ newheader->resign = 0;
NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
isc_rwlocktype_write);
changed = add_changed(rbtdb, rbtversion, rbtnode);
if (changed == NULL) {
- free_rdataset(rbtdb->common.mctx, newheader);
+ free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
isc_rwlocktype_write);
return (ISC_R_NOMEMORY);
@@ -4975,7 +6170,7 @@ subtractrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
result = ISC_R_SUCCESS;
if ((options & DNS_DBSUB_EXACT) != 0) {
flags |= DNS_RDATASLAB_EXACT;
- if (newheader->ttl != header->ttl)
+ if (newheader->rdh_ttl != header->rdh_ttl)
result = DNS_R_NOTEXACT;
}
if (result == ISC_R_SUCCESS)
@@ -4988,8 +6183,9 @@ subtractrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
(dns_rdatatype_t)header->type,
flags, &subresult);
if (result == ISC_R_SUCCESS) {
- free_rdataset(rbtdb->common.mctx, newheader);
+ free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
newheader = (rdatasetheader_t *)subresult;
+ init_rdataset(rbtdb, newheader);
/*
* We have to set the serial since the rdataslab
* subtraction routine copies the reserved portion of
@@ -5008,24 +6204,27 @@ subtractrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
* This subtraction would remove all of the rdata;
* add a nonexistent header instead.
*/
- free_rdataset(rbtdb->common.mctx, newheader);
- newheader = isc_mem_get(rbtdb->common.mctx,
- sizeof(*newheader));
+ free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
+ newheader = new_rdataset(rbtdb, rbtdb->common.mctx);
if (newheader == NULL) {
result = ISC_R_NOMEMORY;
goto unlock;
}
- newheader->ttl = 0;
+ set_ttl(rbtdb, newheader, 0);
newheader->type = topheader->type;
newheader->attributes = RDATASET_ATTR_NONEXISTENT;
newheader->trust = 0;
newheader->serial = rbtversion->serial;
newheader->noqname = NULL;
+ newheader->closest = NULL;
newheader->count = 0;
newheader->additional_auth = NULL;
newheader->additional_glue = NULL;
+ newheader->node = rbtnode;
+ newheader->resign = 0;
+ newheader->last_used = 0;
} else {
- free_rdataset(rbtdb->common.mctx, newheader);
+ free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
goto unlock;
}
@@ -5048,7 +6247,7 @@ subtractrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
* The rdataset doesn't exist, so we don't need to do anything
* to satisfy the deletion request.
*/
- free_rdataset(rbtdb->common.mctx, newheader);
+ free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
if ((options & DNS_DBSUB_EXACT) != 0)
result = DNS_R_NOTEXACT;
else
@@ -5064,10 +6263,10 @@ subtractrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
/*
* Update the zone's secure status. If version is non-NULL
- * this is defered until closeversion() is called.
+ * this is deferred until closeversion() is called.
*/
if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
- rbtdb->secure = iszonesecure(db, rbtdb->origin_node);
+ iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
return (result);
}
@@ -5089,14 +6288,15 @@ deleterdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
if (type == dns_rdatatype_rrsig && covers == 0)
return (ISC_R_NOTIMPLEMENTED);
- newheader = isc_mem_get(rbtdb->common.mctx, sizeof(*newheader));
+ newheader = new_rdataset(rbtdb, rbtdb->common.mctx);
if (newheader == NULL)
return (ISC_R_NOMEMORY);
- newheader->ttl = 0;
+ set_ttl(rbtdb, newheader, 0);
newheader->type = RBTDB_RDATATYPE_VALUE(type, covers);
newheader->attributes = RDATASET_ATTR_NONEXISTENT;
newheader->trust = 0;
newheader->noqname = NULL;
+ newheader->closest = NULL;
newheader->additional_auth = NULL;
newheader->additional_glue = NULL;
if (rbtversion != NULL)
@@ -5104,6 +6304,8 @@ deleterdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
else
newheader->serial = 0;
newheader->count = 0;
+ newheader->last_used = 0;
+ newheader->node = rbtnode;
NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
isc_rwlocktype_write);
@@ -5116,10 +6318,10 @@ deleterdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
/*
* Update the zone's secure status. If version is non-NULL
- * this is defered until closeversion() is called.
+ * this is deferred until closeversion() is called.
*/
if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
- rbtdb->secure = iszonesecure(db, rbtdb->origin_node);
+ iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
return (result);
}
@@ -5147,7 +6349,9 @@ loading_addrdataset(void *arg, dns_name_t *name, dns_rdataset_t *rdataset) {
!IS_CACHE(rbtdb) && !dns_name_equal(name, &rbtdb->common.origin))
return (DNS_R_NOTZONETOP);
- add_empty_wildcards(rbtdb, name);
+ if (rdataset->type != dns_rdatatype_nsec3 &&
+ rdataset->covers != dns_rdatatype_nsec3)
+ add_empty_wildcards(rbtdb, name);
if (dns_name_iswildcard(name)) {
/*
@@ -5155,13 +6359,27 @@ loading_addrdataset(void *arg, dns_name_t *name, dns_rdataset_t *rdataset) {
*/
if (rdataset->type == dns_rdatatype_ns)
return (DNS_R_INVALIDNS);
+ /*
+ * NSEC3 record owners cannot legally be wild cards.
+ */
+ if (rdataset->type == dns_rdatatype_nsec3)
+ return (DNS_R_INVALIDNSEC3);
result = add_wildcard_magic(rbtdb, name);
if (result != ISC_R_SUCCESS)
return (result);
}
node = NULL;
- result = dns_rbt_addnode(rbtdb->tree, name, &node);
+ if (rdataset->type == dns_rdatatype_nsec3 ||
+ rdataset->covers == dns_rdatatype_nsec3) {
+ result = dns_rbt_addnode(rbtdb->nsec3, name, &node);
+ if (result == ISC_R_SUCCESS)
+ node->nsec3 = 1;
+ } else {
+ result = dns_rbt_addnode(rbtdb->tree, name, &node);
+ if (result == ISC_R_SUCCESS)
+ node->nsec3 = 0;
+ }
if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
return (result);
if (result != ISC_R_EXISTS) {
@@ -5182,16 +6400,26 @@ loading_addrdataset(void *arg, dns_name_t *name, dns_rdataset_t *rdataset) {
if (result != ISC_R_SUCCESS)
return (result);
newheader = (rdatasetheader_t *)region.base;
- newheader->ttl = rdataset->ttl + loadctx->now; /* XXX overflow check */
+ init_rdataset(rbtdb, newheader);
+ set_ttl(rbtdb, newheader,
+ rdataset->ttl + loadctx->now); /* XXX overflow check */
newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
rdataset->covers);
newheader->attributes = 0;
newheader->trust = rdataset->trust;
newheader->serial = 1;
newheader->noqname = NULL;
+ newheader->closest = NULL;
newheader->count = init_count++;
newheader->additional_auth = NULL;
newheader->additional_glue = NULL;
+ newheader->last_used = 0;
+ newheader->node = node;
+ if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
+ newheader->attributes |= RDATASET_ATTR_RESIGN;
+ newheader->resign = rdataset->resign;
+ } else
+ newheader->resign = 0;
result = add(rbtdb, node, rbtdb->current_version, newheader,
DNS_DBADD_MERGE, ISC_TRUE, NULL, 0);
@@ -5262,7 +6490,7 @@ endload(dns_db_t *db, dns_dbload_t **dbloadp) {
* zone key, we consider the zone secure.
*/
if (! IS_CACHE(rbtdb))
- rbtdb->secure = iszonesecure(db, rbtdb->origin_node);
+ iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
*dbloadp = NULL;
@@ -5292,7 +6520,7 @@ delete_callback(void *data, void *arg) {
for (current = data; current != NULL; current = next) {
next = current->next;
- free_rdataset(rbtdb->common.mctx, current);
+ free_rdataset(rbtdb, rbtdb->common.mctx, current);
}
}
@@ -5306,12 +6534,28 @@ issecure(dns_db_t *db) {
REQUIRE(VALID_RBTDB(rbtdb));
RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
- secure = rbtdb->secure;
+ secure = ISC_TF(rbtdb->current_version->secure == dns_db_secure);
RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
return (secure);
}
+static isc_boolean_t
+isdnssec(dns_db_t *db) {
+ dns_rbtdb_t *rbtdb;
+ isc_boolean_t dnssec;
+
+ rbtdb = (dns_rbtdb_t *)db;
+
+ REQUIRE(VALID_RBTDB(rbtdb));
+
+ RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
+ dnssec = ISC_TF(rbtdb->current_version->secure != dns_db_insecure);
+ RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
+
+ return (dnssec);
+}
+
static unsigned int
nodecount(dns_db_t *db) {
dns_rbtdb_t *rbtdb;
@@ -5368,13 +6612,180 @@ getoriginnode(dns_db_t *db, dns_dbnode_t **nodep) {
*nodep = rbtdb->origin_node;
} else {
- INSIST(!IS_CACHE(rbtdb));
+ INSIST(IS_CACHE(rbtdb));
result = ISC_R_NOTFOUND;
}
return (result);
}
+static isc_result_t
+getnsec3parameters(dns_db_t *db, dns_dbversion_t *version, dns_hash_t *hash,
+ isc_uint8_t *flags, isc_uint16_t *iterations,
+ unsigned char *salt, size_t *salt_length)
+{
+ dns_rbtdb_t *rbtdb;
+ isc_result_t result = ISC_R_NOTFOUND;
+ rbtdb_version_t *rbtversion = version;
+
+ rbtdb = (dns_rbtdb_t *)db;
+
+ REQUIRE(VALID_RBTDB(rbtdb));
+
+ RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
+
+ if (rbtversion == NULL)
+ rbtversion = rbtdb->current_version;
+
+ if (rbtversion->havensec3) {
+ if (hash != NULL)
+ *hash = rbtversion->hash;
+ if (salt != NULL && salt_length != 0) {
+ REQUIRE(*salt_length > rbtversion->salt_length);
+ memcpy(salt, rbtversion->salt, rbtversion->salt_length);
+ }
+ if (salt_length != NULL)
+ *salt_length = rbtversion->salt_length;
+ if (iterations != NULL)
+ *iterations = rbtversion->iterations;
+ if (flags != NULL)
+ *flags = rbtversion->flags;
+ result = ISC_R_SUCCESS;
+ }
+ RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
+
+ return (result);
+}
+
+static isc_result_t
+setsigningtime(dns_db_t *db, dns_rdataset_t *rdataset, isc_stdtime_t resign) {
+ dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
+ isc_stdtime_t oldresign;
+ isc_result_t result = ISC_R_SUCCESS;
+ rdatasetheader_t *header;
+
+ REQUIRE(VALID_RBTDB(rbtdb));
+ REQUIRE(!IS_CACHE(rbtdb));
+ REQUIRE(rdataset != NULL);
+
+ header = rdataset->private3;
+ header--;
+
+ NODE_LOCK(&rbtdb->node_locks[header->node->locknum].lock,
+ isc_rwlocktype_write);
+
+ oldresign = header->resign;
+ header->resign = resign;
+ if (header->heap_index != 0) {
+ INSIST(RESIGN(header));
+ if (resign == 0) {
+ isc_heap_delete(rbtdb->heaps[header->node->locknum],
+ header->heap_index);
+ header->heap_index = 0;
+ } else if (resign < oldresign)
+ isc_heap_increased(rbtdb->heaps[header->node->locknum],
+ header->heap_index);
+ else
+ isc_heap_decreased(rbtdb->heaps[header->node->locknum],
+ header->heap_index);
+ } else if (resign && header->heap_index == 0) {
+ header->attributes |= RDATASET_ATTR_RESIGN;
+ result = resign_insert(rbtdb, header->node->locknum, header);
+ }
+ NODE_UNLOCK(&rbtdb->node_locks[header->node->locknum].lock,
+ isc_rwlocktype_write);
+ return (result);
+}
+
+static isc_result_t
+getsigningtime(dns_db_t *db, dns_rdataset_t *rdataset,
+ dns_name_t *foundname)
+{
+ dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
+ rdatasetheader_t *header = NULL, *this;
+ unsigned int i;
+ isc_result_t result = ISC_R_NOTFOUND;
+
+ REQUIRE(VALID_RBTDB(rbtdb));
+
+ RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
+
+ for (i = 0; i < rbtdb->node_lock_count; i++) {
+ this = isc_heap_element(rbtdb->heaps[i], 1);
+ if (this == NULL)
+ continue;
+ if (header == NULL)
+ header = this;
+ else if (isc_serial_lt(this->resign, header->resign))
+ header = this;
+ }
+
+ if (header == NULL)
+ goto unlock;
+
+ NODE_LOCK(&rbtdb->node_locks[header->node->locknum].lock,
+ isc_rwlocktype_read);
+
+ bind_rdataset(rbtdb, header->node, header, 0, rdataset);
+
+ if (foundname != NULL)
+ dns_rbt_fullnamefromnode(header->node, foundname);
+
+ NODE_UNLOCK(&rbtdb->node_locks[header->node->locknum].lock,
+ isc_rwlocktype_read);
+
+ result = ISC_R_SUCCESS;
+
+ unlock:
+ RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
+
+ return (result);
+}
+
+static void
+resigned(dns_db_t *db, dns_rdataset_t *rdataset, dns_dbversion_t *version)
+{
+ rbtdb_version_t *rbtversion = (rbtdb_version_t *)version;
+ dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
+ dns_rbtnode_t *node;
+ rdatasetheader_t *header;
+
+ REQUIRE(VALID_RBTDB(rbtdb));
+ REQUIRE(rdataset != NULL);
+ REQUIRE(rbtdb->future_version == rbtversion);
+ REQUIRE(rbtversion->writer);
+
+ node = rdataset->private2;
+ header = rdataset->private3;
+ header--;
+
+ RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
+ NODE_LOCK(&rbtdb->node_locks[node->locknum].lock,
+ isc_rwlocktype_write);
+ /*
+ * Delete from heap and save to re-signed list so that it can
+ * be restored if we backout of this change.
+ */
+ new_reference(rbtdb, node);
+ isc_heap_delete(rbtdb->heaps[node->locknum], header->heap_index);
+ header->heap_index = 0;
+ ISC_LIST_APPEND(rbtversion->resigned_list, header, lru_link);
+
+ NODE_UNLOCK(&rbtdb->node_locks[node->locknum].lock,
+ isc_rwlocktype_write);
+ RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
+}
+
+static dns_stats_t *
+getrrsetstats(dns_db_t *db) {
+ dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
+
+ REQUIRE(VALID_RBTDB(rbtdb));
+ REQUIRE(IS_CACHE(rbtdb)); /* current restriction */
+
+ return (rbtdb->rrsetstats);
+}
+
static dns_dbmethods_t zone_methods = {
attach,
detach,
@@ -5403,7 +6814,15 @@ static dns_dbmethods_t zone_methods = {
ispersistent,
overmem,
settask,
- getoriginnode
+ getoriginnode,
+ NULL,
+ getnsec3parameters,
+ findnsec3node,
+ setsigningtime,
+ getsigningtime,
+ resigned,
+ isdnssec,
+ NULL
};
static dns_dbmethods_t cache_methods = {
@@ -5434,7 +6853,15 @@ static dns_dbmethods_t cache_methods = {
ispersistent,
overmem,
settask,
- getoriginnode
+ getoriginnode,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ isdnssec,
+ getrrsetstats
};
isc_result_t
@@ -5451,6 +6878,7 @@ dns_rbtdb_create
isc_result_t result;
int i;
dns_name_t name;
+ isc_boolean_t (*sooner)(void *, void *);
/* Keep the compiler happy. */
UNUSED(argc);
@@ -5483,11 +6911,20 @@ dns_rbtdb_create
if (result != ISC_R_SUCCESS)
goto cleanup_lock;
+ /*
+ * Initialize node_lock_count in a generic way to support future
+ * extension which allows the user to specify this value on creation.
+ * Note that when specified for a cache DB it must be larger than 1
+ * as commented with the definition of DEFAULT_CACHE_NODE_LOCK_COUNT.
+ */
if (rbtdb->node_lock_count == 0) {
if (IS_CACHE(rbtdb))
rbtdb->node_lock_count = DEFAULT_CACHE_NODE_LOCK_COUNT;
else
rbtdb->node_lock_count = DEFAULT_NODE_LOCK_COUNT;
+ } else if (rbtdb->node_lock_count < 2 && IS_CACHE(rbtdb)) {
+ result = ISC_R_RANGE;
+ goto cleanup_tree_lock;
}
INSIST(rbtdb->node_lock_count < (1 << DNS_RBT_LOCKLENGTH));
rbtdb->node_locks = isc_mem_get(mctx, rbtdb->node_lock_count *
@@ -5497,6 +6934,53 @@ dns_rbtdb_create
goto cleanup_tree_lock;
}
+ rbtdb->rrsetstats = NULL;
+ if (IS_CACHE(rbtdb)) {
+ result = dns_rdatasetstats_create(mctx, &rbtdb->rrsetstats);
+ if (result != ISC_R_SUCCESS)
+ goto cleanup_node_locks;
+ rbtdb->rdatasets = isc_mem_get(mctx, rbtdb->node_lock_count *
+ sizeof(rdatasetheaderlist_t));
+ if (rbtdb->rdatasets == NULL) {
+ result = ISC_R_NOMEMORY;
+ goto cleanup_rrsetstats;
+ }
+ for (i = 0; i < (int)rbtdb->node_lock_count; i++)
+ ISC_LIST_INIT(rbtdb->rdatasets[i]);
+ } else
+ rbtdb->rdatasets = NULL;
+
+ /*
+ * Create the heaps.
+ */
+ rbtdb->heaps = isc_mem_get(mctx, rbtdb->node_lock_count *
+ sizeof(isc_heap_t *));
+ if (rbtdb->heaps == NULL) {
+ result = ISC_R_NOMEMORY;
+ goto cleanup_rdatasets;
+ }
+ for (i = 0; i < (int)rbtdb->node_lock_count; i++)
+ rbtdb->heaps[i] = NULL;
+ sooner = IS_CACHE(rbtdb) ? ttl_sooner : resign_sooner;
+ for (i = 0; i < (int)rbtdb->node_lock_count; i++) {
+ result = isc_heap_create(mctx, sooner, set_index, 0,
+ &rbtdb->heaps[i]);
+ if (result != ISC_R_SUCCESS)
+ goto cleanup_heaps;
+ }
+
+ /*
+ * Create deadnode lists.
+ */
+ rbtdb->deadnodes = isc_mem_get(mctx, rbtdb->node_lock_count *
+ sizeof(rbtnodelist_t));
+ if (rbtdb->deadnodes == NULL) {
+ result = ISC_R_NOMEMORY;
+ goto cleanup_heaps;
+ }
+ for (i = 0; i < (int)rbtdb->node_lock_count; i++)
+ ISC_LIST_INIT(rbtdb->deadnodes[i]);
+
rbtdb->active = rbtdb->node_lock_count;
for (i = 0; i < (int)(rbtdb->node_lock_count); i++) {
@@ -5512,7 +6996,7 @@ dns_rbtdb_create
isc_refcount_decrement(&rbtdb->node_locks[i].references, NULL);
isc_refcount_destroy(&rbtdb->node_locks[i].references);
}
- goto cleanup_node_locks;
+ goto cleanup_deadnodes;
}
rbtdb->node_locks[i].exiting = ISC_FALSE;
}
@@ -5525,7 +7009,7 @@ dns_rbtdb_create
isc_mem_attach(mctx, &rbtdb->common.mctx);
/*
- * Must be initalized before free_rbtdb() is called.
+ * Must be initialized before free_rbtdb() is called.
*/
isc_ondestroy_init(&rbtdb->common.ondest);
@@ -5539,13 +7023,20 @@ dns_rbtdb_create
}
/*
- * Make the Red-Black Tree.
+ * Make the Red-Black Trees.
*/
result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->tree);
if (result != ISC_R_SUCCESS) {
free_rbtdb(rbtdb, ISC_FALSE, NULL);
return (result);
}
+
+ result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->nsec3);
+ if (result != ISC_R_SUCCESS) {
+ free_rbtdb(rbtdb, ISC_FALSE, NULL);
+ return (result);
+ }
+
/*
* In order to set the node callback bit correctly in zone databases,
* we need to know if the node has the origin name of the zone.
@@ -5568,6 +7059,7 @@ dns_rbtdb_create
free_rbtdb(rbtdb, ISC_FALSE, NULL);
return (result);
}
+ rbtdb->origin_node->nsec3 = 0;
/*
* We need to give the origin node the right locknum.
*/
@@ -5593,7 +7085,6 @@ dns_rbtdb_create
return (result);
}
rbtdb->attributes = 0;
- rbtdb->secure = ISC_FALSE;
rbtdb->overmem = ISC_FALSE;
rbtdb->task = NULL;
@@ -5610,6 +7101,14 @@ dns_rbtdb_create
free_rbtdb(rbtdb, ISC_FALSE, NULL);
return (ISC_R_NOMEMORY);
}
+ rbtdb->current_version->secure = dns_db_insecure;
+ rbtdb->current_version->havensec3 = ISC_FALSE;
+ rbtdb->current_version->flags = 0;
+ rbtdb->current_version->iterations = 0;
+ rbtdb->current_version->hash = 0;
+ rbtdb->current_version->salt_length = 0;
+ memset(rbtdb->current_version->salt, 0,
+ sizeof(rbtdb->current_version->salt));
rbtdb->future_version = NULL;
ISC_LIST_INIT(rbtdb->open_versions);
/*
@@ -5625,6 +7124,27 @@ dns_rbtdb_create
return (ISC_R_SUCCESS);
+ cleanup_deadnodes:
+ isc_mem_put(mctx, rbtdb->deadnodes,
+ rbtdb->node_lock_count * sizeof(rbtnodelist_t));
+
+ cleanup_heaps:
+ if (rbtdb->heaps != NULL) {
+ for (i = 0 ; i < (int)rbtdb->node_lock_count ; i++)
+ if (rbtdb->heaps[i] != NULL)
+ isc_heap_destroy(&rbtdb->heaps[i]);
+ isc_mem_put(mctx, rbtdb->heaps,
+ rbtdb->node_lock_count * sizeof(isc_heap_t *));
+ }
+
+ cleanup_rdatasets:
+ if (rbtdb->rdatasets != NULL)
+ isc_mem_put(mctx, rbtdb->rdatasets, rbtdb->node_lock_count *
+ sizeof(rdatasetheaderlist_t));
+ cleanup_rrsetstats:
+ if (rbtdb->rrsetstats != NULL)
+ dns_stats_detach(&rbtdb->rrsetstats);
+
cleanup_node_locks:
isc_mem_put(mctx, rbtdb->node_locks,
rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
@@ -5655,7 +7175,7 @@ rdataset_disassociate(dns_rdataset_t *rdataset) {
static isc_result_t
rdataset_first(dns_rdataset_t *rdataset) {
- unsigned char *raw = rdataset->private3; /* RDATASLAB */
+ unsigned char *raw = rdataset->private3; /* RDATASLAB */
unsigned int count;
count = raw[0] * 256 + raw[1];
@@ -5691,7 +7211,7 @@ static isc_result_t
rdataset_next(dns_rdataset_t *rdataset) {
unsigned int count;
unsigned int length;
- unsigned char *raw; /* RDATASLAB */
+ unsigned char *raw; /* RDATASLAB */
count = rdataset->privateuint4;
if (count == 0)
@@ -5710,9 +7230,9 @@ rdataset_next(dns_rdataset_t *rdataset) {
raw += length;
#if DNS_RDATASET_FIXED
}
- rdataset->private5 = raw + 4; /* length(2) + order(2) */
+ rdataset->private5 = raw + 4; /* length(2) + order(2) */
#else
- rdataset->private5 = raw + 2; /* length(2) */
+ rdataset->private5 = raw + 2; /* length(2) */
#endif
return (ISC_R_SUCCESS);
@@ -5720,11 +7240,13 @@ rdataset_next(dns_rdataset_t *rdataset) {
static void
rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata) {
- unsigned char *raw = rdataset->private5; /* RDATASLAB */
+ unsigned char *raw = rdataset->private5; /* RDATASLAB */
#if DNS_RDATASET_FIXED
unsigned int offset;
#endif
+ unsigned int length;
isc_region_t r;
+ unsigned int flags = 0;
REQUIRE(raw != NULL);
@@ -5740,15 +7262,22 @@ rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata) {
raw += offset;
}
#endif
- r.length = raw[0] * 256 + raw[1];
-
+ length = raw[0] * 256 + raw[1];
#if DNS_RDATASET_FIXED
raw += 4;
#else
raw += 2;
#endif
+ if (rdataset->type == dns_rdatatype_rrsig) {
+ if (*raw & DNS_RDATASLAB_OFFLINE)
+ flags |= DNS_RDATA_OFFLINE;
+ length--;
+ raw++;
+ }
+ r.length = length;
r.base = raw;
dns_rdata_fromregion(rdata, rdataset->rdclass, rdataset->type, &r);
+ rdata->flags |= flags;
}
static void
@@ -5769,7 +7298,7 @@ rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target) {
static unsigned int
rdataset_count(dns_rdataset_t *rdataset) {
- unsigned char *raw = rdataset->private3; /* RDATASLAB */
+ unsigned char *raw = rdataset->private3; /* RDATASLAB */
unsigned int count;
count = raw[0] * 256 + raw[1];
@@ -5790,37 +7319,85 @@ rdataset_getnoqname(dns_rdataset_t *rdataset, dns_name_t *name,
attachnode(db, node, &cloned_node);
nsec->methods = &rdataset_methods;
nsec->rdclass = db->rdclass;
- nsec->type = dns_rdatatype_nsec;
+ nsec->type = noqname->type;
nsec->covers = 0;
nsec->ttl = rdataset->ttl;
nsec->trust = rdataset->trust;
nsec->private1 = rdataset->private1;
nsec->private2 = rdataset->private2;
- nsec->private3 = noqname->nsec;
+ nsec->private3 = noqname->neg;
nsec->privateuint4 = 0;
nsec->private5 = NULL;
nsec->private6 = NULL;
+ nsec->private7 = NULL;
cloned_node = NULL;
attachnode(db, node, &cloned_node);
nsecsig->methods = &rdataset_methods;
nsecsig->rdclass = db->rdclass;
nsecsig->type = dns_rdatatype_rrsig;
- nsecsig->covers = dns_rdatatype_nsec;
+ nsecsig->covers = noqname->type;
nsecsig->ttl = rdataset->ttl;
nsecsig->trust = rdataset->trust;
nsecsig->private1 = rdataset->private1;
nsecsig->private2 = rdataset->private2;
- nsecsig->private3 = noqname->nsecsig;
+ nsecsig->private3 = noqname->negsig;
nsecsig->privateuint4 = 0;
nsecsig->private5 = NULL;
nsec->private6 = NULL;
+ nsec->private7 = NULL;
dns_name_clone(&noqname->name, name);
return (ISC_R_SUCCESS);
}
+static isc_result_t
+rdataset_getclosest(dns_rdataset_t *rdataset, dns_name_t *name,
+ dns_rdataset_t *nsec, dns_rdataset_t *nsecsig)
+{
+ dns_db_t *db = rdataset->private1;
+ dns_dbnode_t *node = rdataset->private2;
+ dns_dbnode_t *cloned_node;
+ struct noqname *closest = rdataset->private7;
+
+ cloned_node = NULL;
+ attachnode(db, node, &cloned_node);
+ nsec->methods = &rdataset_methods;
+ nsec->rdclass = db->rdclass;
+ nsec->type = closest->type;
+ nsec->covers = 0;
+ nsec->ttl = rdataset->ttl;
+ nsec->trust = rdataset->trust;
+ nsec->private1 = rdataset->private1;
+ nsec->private2 = rdataset->private2;
+ nsec->private3 = closest->neg;
+ nsec->privateuint4 = 0;
+ nsec->private5 = NULL;
+ nsec->private6 = NULL;
+ nsec->private7 = NULL;
+
+ cloned_node = NULL;
+ attachnode(db, node, &cloned_node);
+ nsecsig->methods = &rdataset_methods;
+ nsecsig->rdclass = db->rdclass;
+ nsecsig->type = dns_rdatatype_rrsig;
+ nsecsig->covers = closest->type;
+ nsecsig->ttl = rdataset->ttl;
+ nsecsig->trust = rdataset->trust;
+ nsecsig->private1 = rdataset->private1;
+ nsecsig->private2 = rdataset->private2;
+ nsecsig->private3 = closest->negsig;
+ nsecsig->privateuint4 = 0;
+ nsecsig->private5 = NULL;
+ nsec->private6 = NULL;
+ nsec->private7 = NULL;
+
+ dns_name_clone(&closest->name, name);
+
+ return (ISC_R_SUCCESS);
+}
+
/*
* Rdataset Iterator Methods
*/
@@ -5871,13 +7448,13 @@ rdatasetiter_first(dns_rdatasetiter_t *iterator) {
* record? Or is it too old in the cache?
*
* Note: unlike everywhere else, we
- * check for now > header->ttl instead
- * of now >= header->ttl. This allows
+ * check for now > header->rdh_ttl instead
+ * of now >= header->rdh_ttl. This allows
* ANY and RRSIG queries for 0 TTL
* rdatasets to work.
*/
if (NONEXISTENT(header) ||
- (now != 0 && now > header->ttl))
+ (now != 0 && now > header->rdh_ttl))
header = NULL;
break;
} else
@@ -5953,7 +7530,7 @@ rdatasetiter_next(dns_rdatasetiter_t *iterator) {
*/
if ((header->attributes &
RDATASET_ATTR_NONEXISTENT) != 0 ||
- (now != 0 && now > header->ttl))
+ (now != 0 && now > header->rdh_ttl))
header = NULL;
break;
} else
@@ -6009,9 +7586,7 @@ reference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
return;
INSIST(rbtdbiter->tree_locked != isc_rwlocktype_none);
- NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
- new_reference(rbtdb, node);
- NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
+ reactivate_node(rbtdb, node, rbtdbiter->tree_locked);
}
static inline void
@@ -6026,7 +7601,7 @@ dereference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
lock = &rbtdb->node_locks[node->locknum].lock;
NODE_LOCK(lock, isc_rwlocktype_read);
decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
- rbtdbiter->tree_locked);
+ rbtdbiter->tree_locked, ISC_FALSE);
NODE_UNLOCK(lock, isc_rwlocktype_read);
rbtdbiter->node = NULL;
@@ -6067,7 +7642,7 @@ flush_deletions(rbtdb_dbiterator_t *rbtdbiter) {
NODE_LOCK(lock, isc_rwlocktype_read);
decrement_reference(rbtdb, node, 0,
isc_rwlocktype_read,
- rbtdbiter->tree_locked);
+ rbtdbiter->tree_locked, ISC_FALSE);
NODE_UNLOCK(lock, isc_rwlocktype_read);
}
@@ -6117,6 +7692,7 @@ dbiterator_destroy(dns_dbiterator_t **iteratorp) {
dns_db_detach(&rbtdbiter->common.db);
dns_rbtnodechain_reset(&rbtdbiter->chain);
+ dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
isc_mem_put(db->mctx, rbtdbiter, sizeof(*rbtdbiter));
dns_db_detach(&db);
@@ -6142,12 +7718,25 @@ dbiterator_first(dns_dbiterator_t *iterator) {
name = dns_fixedname_name(&rbtdbiter->name);
origin = dns_fixedname_name(&rbtdbiter->origin);
dns_rbtnodechain_reset(&rbtdbiter->chain);
+ dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
- result = dns_rbtnodechain_first(&rbtdbiter->chain, rbtdb->tree, name,
- origin);
-
+ if (rbtdbiter->nsec3only) {
+ rbtdbiter->current = &rbtdbiter->nsec3chain;
+ result = dns_rbtnodechain_first(rbtdbiter->current,
+ rbtdb->nsec3, name, origin);
+ } else {
+ rbtdbiter->current = &rbtdbiter->chain;
+ result = dns_rbtnodechain_first(rbtdbiter->current,
+ rbtdb->tree, name, origin);
+ if (!rbtdbiter->nonsec3 && result == ISC_R_NOTFOUND) {
+ rbtdbiter->current = &rbtdbiter->nsec3chain;
+ result = dns_rbtnodechain_first(rbtdbiter->current,
+ rbtdb->nsec3, name,
+ origin);
+ }
+ }
if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
- result = dns_rbtnodechain_current(&rbtdbiter->chain, NULL,
+ result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
NULL, &rbtdbiter->node);
if (result == ISC_R_SUCCESS) {
rbtdbiter->new_origin = ISC_TRUE;
@@ -6182,11 +7771,21 @@ dbiterator_last(dns_dbiterator_t *iterator) {
name = dns_fixedname_name(&rbtdbiter->name);
origin = dns_fixedname_name(&rbtdbiter->origin);
dns_rbtnodechain_reset(&rbtdbiter->chain);
+ dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
- result = dns_rbtnodechain_last(&rbtdbiter->chain, rbtdb->tree, name,
- origin);
+ result = ISC_R_NOTFOUND;
+ if (rbtdbiter->nsec3only && !rbtdbiter->nonsec3) {
+ rbtdbiter->current = &rbtdbiter->nsec3chain;
+ result = dns_rbtnodechain_last(rbtdbiter->current,
+ rbtdb->nsec3, name, origin);
+ }
+ if (!rbtdbiter->nsec3only && result == ISC_R_NOTFOUND) {
+ rbtdbiter->current = &rbtdbiter->chain;
+ result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->tree,
+ name, origin);
+ }
if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
- result = dns_rbtnodechain_current(&rbtdbiter->chain, NULL,
+ result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
NULL, &rbtdbiter->node);
if (result == ISC_R_SUCCESS) {
rbtdbiter->new_origin = ISC_TRUE;
@@ -6210,6 +7809,7 @@ dbiterator_seek(dns_dbiterator_t *iterator, dns_name_t *name) {
dns_name_t *iname, *origin;
if (rbtdbiter->result != ISC_R_SUCCESS &&
+ rbtdbiter->result != ISC_R_NOTFOUND &&
rbtdbiter->result != ISC_R_NOMORE)
return (rbtdbiter->result);
@@ -6221,22 +7821,74 @@ dbiterator_seek(dns_dbiterator_t *iterator, dns_name_t *name) {
iname = dns_fixedname_name(&rbtdbiter->name);
origin = dns_fixedname_name(&rbtdbiter->origin);
dns_rbtnodechain_reset(&rbtdbiter->chain);
+ dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
+
+ if (rbtdbiter->nsec3only) {
+ rbtdbiter->current = &rbtdbiter->nsec3chain;
+ result = dns_rbt_findnode(rbtdb->nsec3, name, NULL,
+ &rbtdbiter->node,
+ rbtdbiter->current,
+ DNS_RBTFIND_EMPTYDATA, NULL, NULL);
+ } else if (rbtdbiter->nonsec3) {
+ rbtdbiter->current = &rbtdbiter->chain;
+ result = dns_rbt_findnode(rbtdb->tree, name, NULL,
+ &rbtdbiter->node,
+ rbtdbiter->current,
+ DNS_RBTFIND_EMPTYDATA, NULL, NULL);
+ } else {
+ /*
+ * Stay on main chain if not found on either chain.
+ */
+ rbtdbiter->current = &rbtdbiter->chain;
+ result = dns_rbt_findnode(rbtdb->tree, name, NULL,
+ &rbtdbiter->node,
+ rbtdbiter->current,
+ DNS_RBTFIND_EMPTYDATA, NULL, NULL);
+ if (result == DNS_R_PARTIALMATCH) {
+ dns_rbtnode_t *node = NULL;
+ result = dns_rbt_findnode(rbtdb->nsec3, name, NULL,
+ &node, &rbtdbiter->nsec3chain,
+ DNS_RBTFIND_EMPTYDATA,
+ NULL, NULL);
+ if (result == ISC_R_SUCCESS) {
+ rbtdbiter->node = node;
+ rbtdbiter->current = &rbtdbiter->nsec3chain;
+ }
+ }
+ }
- result = dns_rbt_findnode(rbtdb->tree, name, NULL, &rbtdbiter->node,
- &rbtdbiter->chain, DNS_RBTFIND_EMPTYDATA,
- NULL, NULL);
+#if 1
if (result == ISC_R_SUCCESS) {
- result = dns_rbtnodechain_current(&rbtdbiter->chain, iname,
+ result = dns_rbtnodechain_current(rbtdbiter->current, iname,
origin, NULL);
if (result == ISC_R_SUCCESS) {
rbtdbiter->new_origin = ISC_TRUE;
reference_iter_node(rbtdbiter);
}
-
- } else if (result == DNS_R_PARTIALMATCH)
+ } else if (result == DNS_R_PARTIALMATCH) {
result = ISC_R_NOTFOUND;
+ rbtdbiter->node = NULL;
+ }
rbtdbiter->result = result;
+#else
+ if (result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH) {
+ isc_result_t tresult;
+ tresult = dns_rbtnodechain_current(rbtdbiter->current, iname,
+ origin, NULL);
+ if (tresult == ISC_R_SUCCESS) {
+ rbtdbiter->new_origin = ISC_TRUE;
+ reference_iter_node(rbtdbiter);
+ } else {
+ result = tresult;
+ rbtdbiter->node = NULL;
+ }
+ } else
+ rbtdbiter->node = NULL;
+
+ rbtdbiter->result = (result == DNS_R_PARTIALMATCH) ?
+ ISC_R_SUCCESS : result;
+#endif
return (result);
}
@@ -6246,6 +7898,7 @@ dbiterator_prev(dns_dbiterator_t *iterator) {
isc_result_t result;
rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
dns_name_t *name, *origin;
+ dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
REQUIRE(rbtdbiter->node != NULL);
@@ -6257,13 +7910,23 @@ dbiterator_prev(dns_dbiterator_t *iterator) {
name = dns_fixedname_name(&rbtdbiter->name);
origin = dns_fixedname_name(&rbtdbiter->origin);
- result = dns_rbtnodechain_prev(&rbtdbiter->chain, name, origin);
+ result = dns_rbtnodechain_prev(rbtdbiter->current, name, origin);
+ if (result == ISC_R_NOMORE && !rbtdbiter->nsec3only &&
+ !rbtdbiter->nonsec3 &&
+ &rbtdbiter->nsec3chain == rbtdbiter->current) {
+ rbtdbiter->current = &rbtdbiter->chain;
+ dns_rbtnodechain_reset(rbtdbiter->current);
+ result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->tree,
+ name, origin);
+ if (result == ISC_R_NOTFOUND)
+ result = ISC_R_NOMORE;
+ }
dereference_iter_node(rbtdbiter);
if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
rbtdbiter->new_origin = ISC_TF(result == DNS_R_NEWORIGIN);
- result = dns_rbtnodechain_current(&rbtdbiter->chain, NULL,
+ result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
NULL, &rbtdbiter->node);
}
@@ -6280,6 +7943,7 @@ dbiterator_next(dns_dbiterator_t *iterator) {
isc_result_t result;
rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
dns_name_t *name, *origin;
+ dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
REQUIRE(rbtdbiter->node != NULL);
@@ -6291,13 +7955,22 @@ dbiterator_next(dns_dbiterator_t *iterator) {
name = dns_fixedname_name(&rbtdbiter->name);
origin = dns_fixedname_name(&rbtdbiter->origin);
- result = dns_rbtnodechain_next(&rbtdbiter->chain, name, origin);
+ result = dns_rbtnodechain_next(rbtdbiter->current, name, origin);
+ if (result == ISC_R_NOMORE && !rbtdbiter->nsec3only &&
+ !rbtdbiter->nonsec3 && &rbtdbiter->chain == rbtdbiter->current) {
+ rbtdbiter->current = &rbtdbiter->nsec3chain;
+ dns_rbtnodechain_reset(rbtdbiter->current);
+ result = dns_rbtnodechain_first(rbtdbiter->current,
+ rbtdb->nsec3, name, origin);
+ if (result == ISC_R_NOTFOUND)
+ result = ISC_R_NOMORE;
+ }
dereference_iter_node(rbtdbiter);
if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
rbtdbiter->new_origin = ISC_TF(result == DNS_R_NEWORIGIN);
- result = dns_rbtnodechain_current(&rbtdbiter->chain, NULL,
+ result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
NULL, &rbtdbiter->node);
}
if (result == ISC_R_SUCCESS)
@@ -6421,7 +8094,7 @@ rdataset_getadditional(dns_rdataset_t *rdataset, dns_rdatasetadditional_t type,
{
dns_rbtdb_t *rbtdb = rdataset->private1;
dns_rbtnode_t *rbtnode = rdataset->private2;
- unsigned char *raw = rdataset->private3; /* RDATASLAB */
+ unsigned char *raw = rdataset->private3; /* RDATASLAB */
unsigned int current_count = rdataset->privateuint4;
unsigned int count;
rdatasetheader_t *header;
@@ -6567,7 +8240,7 @@ rdataset_setadditional(dns_rdataset_t *rdataset, dns_rdatasetadditional_t type,
{
dns_rbtdb_t *rbtdb = rdataset->private1;
dns_rbtnode_t *rbtnode = rdataset->private2;
- unsigned char *raw = rdataset->private3; /* RDATASLAB */
+ unsigned char *raw = rdataset->private3; /* RDATASLAB */
unsigned int current_count = rdataset->privateuint4;
rdatasetheader_t *header;
unsigned int total_count, count;
@@ -6673,7 +8346,7 @@ rdataset_setadditional(dns_rdataset_t *rdataset, dns_rdatasetadditional_t type,
return (ISC_R_SUCCESS);
- fail:
+ fail:
if (newcbarg != NULL) {
if (newentry != NULL) {
acache_cancelentry(rbtdb->common.mctx, newentry,
@@ -6696,7 +8369,7 @@ rdataset_putadditional(dns_acache_t *acache, dns_rdataset_t *rdataset,
{
dns_rbtdb_t *rbtdb = rdataset->private1;
dns_rbtnode_t *rbtnode = rdataset->private2;
- unsigned char *raw = rdataset->private3; /* RDATASLAB */
+ unsigned char *raw = rdataset->private3; /* RDATASLAB */
unsigned int current_count = rdataset->privateuint4;
rdatasetheader_t *header;
nodelock_t *nodelock;
@@ -6705,7 +8378,7 @@ rdataset_putadditional(dns_acache_t *acache, dns_rdataset_t *rdataset,
dns_acacheentry_t *entry;
acache_cbarg_t *cbarg;
- UNUSED(qtype); /* we do not use this value at least for now */
+ UNUSED(qtype); /* we do not use this value at least for now */
UNUSED(acache);
if (type == dns_rdatasetadditional_fromcache)
@@ -6752,9 +8425,159 @@ rdataset_putadditional(dns_acache_t *acache, dns_rdataset_t *rdataset,
NODE_UNLOCK(nodelock, isc_rwlocktype_write);
if (entry != NULL) {
- acache_cancelentry(rbtdb->common.mctx, entry, &cbarg);
+ if (cbarg != NULL)
+ acache_cancelentry(rbtdb->common.mctx, entry, &cbarg);
dns_acache_detachentry(&entry);
}
return (ISC_R_SUCCESS);
}
+
+/*%
+ * Routines for LRU-based cache management.
+ */
+
+/*%
+ * See if a given cache entry that is being reused needs to be updated
+ * in the LRU-list. From the LRU management point of view, this function is
+ * expected to return true for almost all cases. When used with threads,
+ * however, this may cause a non-negligible performance penalty because a
+ * writer lock will have to be acquired before updating the list.
+ * If DNS_RBTDB_LIMITLRUUPDATE is defined to be non 0 at compilation time, this
+ * function returns true if the entry has not been updated for some period of
+ * time. We differentiate the NS or glue address case and the others since
+ * experiments have shown that the former tends to be accessed relatively
+ * infrequently and the cost of cache miss is higher (e.g., a missing NS records
+ * may cause external queries at a higher level zone, involving more
+ * transactions).
+ *
+ * Caller must hold the node (read or write) lock.
+ */
+static inline isc_boolean_t
+need_headerupdate(rdatasetheader_t *header, isc_stdtime_t now) {
+ if ((header->attributes &
+ (RDATASET_ATTR_NONEXISTENT|RDATASET_ATTR_STALE)) != 0)
+ return (ISC_FALSE);
+
+#if DNS_RBTDB_LIMITLRUUPDATE
+ if (header->type == dns_rdatatype_ns ||
+ (header->trust == dns_trust_glue &&
+ (header->type == dns_rdatatype_a ||
+ header->type == dns_rdatatype_aaaa))) {
+ /*
+ * Glue records are updated if at least 60 seconds have passed
+ * since the previous update time.
+ */
+ return (header->last_used + 60 <= now);
+ }
+
+ /* Other records are updated if 5 minutes have passed. */
+ return (header->last_used + 300 <= now);
+#else
+ UNUSED(now);
+
+ return (ISC_TRUE);
+#endif
+}
+
+/*%
+ * Update the timestamp of a given cache entry and move it to the head
+ * of the corresponding LRU list.
+ *
+ * Caller must hold the node (write) lock.
+ *
+ * Note that the we do NOT touch the heap here, as the TTL has not changed.
+ */
+static void
+update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
+ isc_stdtime_t now)
+{
+ INSIST(IS_CACHE(rbtdb));
+
+ /* To be checked: can we really assume this? XXXMLG */
+ INSIST(ISC_LINK_LINKED(header, lru_link));
+
+ ISC_LIST_UNLINK(rbtdb->rdatasets[header->node->locknum],
+ header, lru_link);
+ header->last_used = now;
+ ISC_LIST_PREPEND(rbtdb->rdatasets[header->node->locknum],
+ header, lru_link);
+}
+
+/*%
+ * Purge some expired and/or stale (i.e. unused for some period) cache entries
+ * under an overmem condition. To recover from this condition quickly, up to
+ * 2 entries will be purged. This process is triggered while adding a new
+ * entry, and we specifically avoid purging entries in the same LRU bucket as
+ * the one to which the new entry will belong. Otherwise, we might purge
+ * entries of the same name of different RR types while adding RRsets from a
+ * single response (consider the case where we're adding A and AAAA glue records
+ * of the same NS name).
+ */
+static void
+overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start,
+ isc_stdtime_t now, isc_boolean_t tree_locked)
+{
+ rdatasetheader_t *header, *header_prev;
+ unsigned int locknum;
+ int purgecount = 2;
+
+ for (locknum = (locknum_start + 1) % rbtdb->node_lock_count;
+ locknum != locknum_start && purgecount > 0;
+ locknum = (locknum + 1) % rbtdb->node_lock_count) {
+ NODE_LOCK(&rbtdb->node_locks[locknum].lock,
+ isc_rwlocktype_write);
+
+ header = isc_heap_element(rbtdb->heaps[locknum], 1);
+ if (header && header->rdh_ttl <= now - RBTDB_VIRTUAL) {
+ expire_header(rbtdb, header, tree_locked);
+ purgecount--;
+ }
+
+ for (header = ISC_LIST_TAIL(rbtdb->rdatasets[locknum]);
+ header != NULL && purgecount > 0;
+ header = header_prev) {
+ header_prev = ISC_LIST_PREV(header, lru_link);
+ /*
+ * Unlink the entry at this point to avoid checking it
+ * again even if it's currently used someone else and
+ * cannot be purged at this moment. This entry won't be
+ * referenced any more (so unlinking is safe) since the
+ * TTL was reset to 0.
+ */
+ ISC_LIST_UNLINK(rbtdb->rdatasets[locknum], header,
+ lru_link);
+ expire_header(rbtdb, header, tree_locked);
+ purgecount--;
+ }
+
+ NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
+ isc_rwlocktype_write);
+ }
+}
+
+static void
+expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
+ isc_boolean_t tree_locked)
+{
+ set_ttl(rbtdb, header, 0);
+ header->attributes |= RDATASET_ATTR_STALE;
+ header->node->dirty = 1;
+
+ /*
+ * Caller must hold the node (write) lock.
+ */
+
+ if (dns_rbtnode_refcurrent(header->node) == 0) {
+ /*
+ * If no one else is using the node, we can clean it up now.
+ * We first need to gain a new reference to the node to meet a
+ * requirement of decrement_reference().
+ */
+ new_reference(rbtdb, header->node);
+ decrement_reference(rbtdb, header->node, 0,
+ isc_rwlocktype_write,
+ tree_locked ? isc_rwlocktype_write :
+ isc_rwlocktype_none, ISC_FALSE);
+ }
+}
OpenPOWER on IntegriCloud