diff options
author | dwmalone <dwmalone@FreeBSD.org> | 2000-08-19 08:32:59 +0000 |
---|---|---|
committer | dwmalone <dwmalone@FreeBSD.org> | 2000-08-19 08:32:59 +0000 |
commit | df0e25bf6c3619217f1f2c8b5a35a6e706f2a0b4 (patch) | |
tree | 47f526cc36bae230ba5426a392413b1b46c0d678 /sys/kern | |
parent | 2f92e39a0fe52366609e44e5a1978feb243c8755 (diff) | |
download | FreeBSD-src-df0e25bf6c3619217f1f2c8b5a35a6e706f2a0b4.zip FreeBSD-src-df0e25bf6c3619217f1f2c8b5a35a6e706f2a0b4.tar.gz |
Replace the mbuf external reference counting code with something
that should be better.
The old code counted references to mbuf clusters by using the offset
of the cluster from the start of memory allocated for mbufs and
clusters as an index into an array of chars, which did the reference
counting. If the external storage was not a cluster then reference
counting had to be done by the code using that external storage.
NetBSD's system of linked lists of mbufs was cosidered, but Alfred
felt it would have locking issues when the kernel was made more
SMP friendly.
The system implimented uses a pool of unions to track external
storage. The union contains an int for counting the references and
a pointer for forming a free list. The reference counts are
incremented and decremented atomically and so should be SMP friendly.
This system can track reference counts for any sort of external
storage.
Access to the reference counting stuff is now through macros defined
in mbuf.h, so it should be easier to make changes to the system in
the future.
The possibility of storing the reference count in one of the
referencing mbufs was considered, but was rejected 'cos it would
often leave extra mbufs allocated. Storing the reference count in
the cluster was also considered, but because the external storage
may not be a cluster this isn't an option.
The size of the pool of reference counters is available in the
stats provided by "netstat -m".
PR: 19866
Submitted by: Bosko Milekic <bmilekic@dsuper.net>
Reviewed by: alfred (glanced at by others on -net)
Diffstat (limited to 'sys/kern')
-rw-r--r-- | sys/kern/uipc_mbuf.c | 84 | ||||
-rw-r--r-- | sys/kern/uipc_mbuf2.c | 2 | ||||
-rw-r--r-- | sys/kern/uipc_syscalls.c | 68 |
3 files changed, 83 insertions, 71 deletions
diff --git a/sys/kern/uipc_mbuf.c b/sys/kern/uipc_mbuf.c index 89ec747..ee0b58c 100644 --- a/sys/kern/uipc_mbuf.c +++ b/sys/kern/uipc_mbuf.c @@ -56,11 +56,11 @@ static void mbinit __P((void *)); SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbinit, NULL) struct mbuf *mbutl; -char *mclrefcnt; struct mbstat mbstat; u_long mbtypes[MT_NTYPES]; struct mbuf *mmbfree; union mcluster *mclfree; +union mext_refcnt *mext_refcnt_free; int max_linkhdr; int max_protohdr; int max_hdr; @@ -95,10 +95,9 @@ TUNABLE_INT_DECL("kern.ipc.nmbufs", NMBCLUSTERS * 4, nmbufs); static void m_reclaim __P((void)); -/* "number of clusters of pages" */ -#define NCL_INIT 1 - +#define NCL_INIT 2 #define NMB_INIT 16 +#define REF_INIT (NMBCLUSTERS * 2) /* ARGSUSED*/ static void @@ -107,7 +106,10 @@ mbinit(dummy) { int s; - mmbfree = NULL; mclfree = NULL; + mmbfree = NULL; + mclfree = NULL; + mext_refcnt_free = NULL; + mbstat.m_msize = MSIZE; mbstat.m_mclbytes = MCLBYTES; mbstat.m_minclsize = MINCLSIZE; @@ -115,6 +117,8 @@ mbinit(dummy) mbstat.m_mhlen = MHLEN; s = splimp(); + if (m_alloc_ref(REF_INIT) == 0) + goto bad; if (m_mballoc(NMB_INIT, M_DONTWAIT) == 0) goto bad; #if MCLBYTES <= PAGE_SIZE @@ -128,7 +132,49 @@ mbinit(dummy) splx(s); return; bad: - panic("mbinit"); + panic("mbinit: failed to initialize mbuf subsystem!"); +} + +/* + * Allocate at least nmb reference count structs and place them + * on the ref cnt free list. + * Must be called at splimp. + */ +int +m_alloc_ref(nmb) + u_int nmb; +{ + caddr_t p; + u_int nbytes; + int i; + + /* + * XXX: + * We don't cap the amount of memory that can be used + * by the reference counters, like we do for mbufs and + * mbuf clusters. The reason is that we don't really expect + * to have to be allocating too many of these guys with m_alloc_ref(), + * and if we are, we're probably not out of the woods anyway, + * so leave this way for now. + */ + + if (mb_map_full) + return (0); + + nbytes = round_page(nmb * sizeof(union mext_refcnt)); + if ((p = (caddr_t)kmem_malloc(mb_map, nbytes, M_NOWAIT)) == NULL) + return (0); + nmb = nbytes / sizeof(union mext_refcnt); + + for (i = 0; i < nmb; i++) { + ((union mext_refcnt *)p)->next_ref = mext_refcnt_free; + mext_refcnt_free = (union mext_refcnt *)p; + p += sizeof(union mext_refcnt); + mbstat.m_refree++; + } + mbstat.m_refcnt += nmb; + + return (1); } /* @@ -363,7 +409,7 @@ m_clalloc_wait(void) * MGET, but avoid getting into another instance of m_clalloc_wait() */ p = NULL; - MCLALLOC(p, M_DONTWAIT); + _MCLALLOC(p, M_DONTWAIT); s = splimp(); if (p != NULL) { /* We waited and got something... */ @@ -624,13 +670,9 @@ m_copym(m, off0, len, wait) n->m_len = min(len, m->m_len - off); if (m->m_flags & M_EXT) { n->m_data = m->m_data + off; - if(!m->m_ext.ext_ref) - mclrefcnt[mtocl(m->m_ext.ext_buf)]++; - else - (*(m->m_ext.ext_ref))(m->m_ext.ext_buf, - m->m_ext.ext_size); n->m_ext = m->m_ext; n->m_flags |= M_EXT; + MEXT_ADD_REF(m); } else bcopy(mtod(m, caddr_t)+off, mtod(n, caddr_t), (unsigned)n->m_len); @@ -671,13 +713,9 @@ m_copypacket(m, how) n->m_len = m->m_len; if (m->m_flags & M_EXT) { n->m_data = m->m_data; - if(!m->m_ext.ext_ref) - mclrefcnt[mtocl(m->m_ext.ext_buf)]++; - else - (*(m->m_ext.ext_ref))(m->m_ext.ext_buf, - m->m_ext.ext_size); n->m_ext = m->m_ext; n->m_flags |= M_EXT; + MEXT_ADD_REF(m); } else { bcopy(mtod(m, char *), mtod(n, char *), n->m_len); } @@ -694,13 +732,9 @@ m_copypacket(m, how) n->m_len = m->m_len; if (m->m_flags & M_EXT) { n->m_data = m->m_data; - if(!m->m_ext.ext_ref) - mclrefcnt[mtocl(m->m_ext.ext_buf)]++; - else - (*(m->m_ext.ext_ref))(m->m_ext.ext_buf, - m->m_ext.ext_size); n->m_ext = m->m_ext; n->m_flags |= M_EXT; + MEXT_ADD_REF(m); } else { bcopy(mtod(m, char *), mtod(n, char *), n->m_len); } @@ -1042,11 +1076,7 @@ extpacket: if (m->m_flags & M_EXT) { n->m_flags |= M_EXT; n->m_ext = m->m_ext; - if(!m->m_ext.ext_ref) - mclrefcnt[mtocl(m->m_ext.ext_buf)]++; - else - (*(m->m_ext.ext_ref))(m->m_ext.ext_buf, - m->m_ext.ext_size); + MEXT_ADD_REF(m); m->m_ext.ext_size = 0; /* For Accounting XXXXXX danger */ n->m_data = m->m_data + len; } else { diff --git a/sys/kern/uipc_mbuf2.c b/sys/kern/uipc_mbuf2.c index b39c002..2e6aa1e 100644 --- a/sys/kern/uipc_mbuf2.c +++ b/sys/kern/uipc_mbuf2.c @@ -182,7 +182,7 @@ m_pulldown(m, off, len, offp) else { if (n->m_ext.ext_free) sharedcluster = 1; - else if (mclrefcnt[mtocl(n->m_ext.ext_buf)] > 1) + else if (MEXT_IS_REF(n)) sharedcluster = 1; else sharedcluster = 0; diff --git a/sys/kern/uipc_syscalls.c b/sys/kern/uipc_syscalls.c index 5ba2a24..ed9d691 100644 --- a/sys/kern/uipc_syscalls.c +++ b/sys/kern/uipc_syscalls.c @@ -72,8 +72,7 @@ static void sf_buf_init(void *arg); SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL) static struct sf_buf *sf_buf_alloc(void); -static void sf_buf_ref(caddr_t addr, u_int size); -static void sf_buf_free(caddr_t addr, u_int size); +static void sf_buf_free(caddr_t addr, void *args); static int sendit __P((struct proc *p, int s, struct msghdr *mp, int flags)); static int recvit __P((struct proc *p, int s, struct msghdr *mp, @@ -1354,58 +1353,42 @@ sf_buf_alloc() } SLIST_REMOVE_HEAD(&sf_freelist, free_list); splx(s); - sf->refcnt = 1; return (sf); } #define dtosf(x) (&sf_bufs[((uintptr_t)(x) - (uintptr_t)sf_base) >> PAGE_SHIFT]) -static void -sf_buf_ref(caddr_t addr, u_int size) -{ - struct sf_buf *sf; - - sf = dtosf(addr); - if (sf->refcnt == 0) - panic("sf_buf_ref: referencing a free sf_buf"); - sf->refcnt++; -} /* - * Lose a reference to an sf_buf. When none left, detach mapped page - * and release resources back to the system. + * + * Detatch mapped page and release resources back to the system. * * Must be called at splimp. */ static void -sf_buf_free(caddr_t addr, u_int size) +sf_buf_free(caddr_t addr, void *args) { struct sf_buf *sf; struct vm_page *m; int s; sf = dtosf(addr); - if (sf->refcnt == 0) - panic("sf_buf_free: freeing free sf_buf"); - sf->refcnt--; - if (sf->refcnt == 0) { - pmap_qremove((vm_offset_t)addr, 1); - m = sf->m; - s = splvm(); - vm_page_unwire(m, 0); - /* - * Check for the object going away on us. This can - * happen since we don't hold a reference to it. - * If so, we're responsible for freeing the page. - */ - if (m->wire_count == 0 && m->object == NULL) - vm_page_free(m); - splx(s); - sf->m = NULL; - SLIST_INSERT_HEAD(&sf_freelist, sf, free_list); - if (sf_buf_alloc_want) { - sf_buf_alloc_want = 0; - wakeup(&sf_freelist); - } + pmap_qremove((vm_offset_t)addr, 1); + m = sf->m; + s = splvm(); + vm_page_unwire(m, 0); + /* + * Check for the object going away on us. This can + * happen since we don't hold a reference to it. + * If so, we're responsible for freeing the page. + */ + if (m->wire_count == 0 && m->object == NULL) + vm_page_free(m); + splx(s); + sf->m = NULL; + SLIST_INSERT_HEAD(&sf_freelist, sf, free_list); + if (sf_buf_alloc_want) { + sf_buf_alloc_want = 0; + wakeup(&sf_freelist); } } @@ -1630,12 +1613,11 @@ retry_lookup: error = ENOBUFS; goto done; } - m->m_ext.ext_free = sf_buf_free; - m->m_ext.ext_ref = sf_buf_ref; - m->m_ext.ext_buf = (void *)sf->kva; - m->m_ext.ext_size = PAGE_SIZE; + /* + * Setup external storage for mbuf. + */ + MEXTADD(m, sf->kva, PAGE_SIZE, sf_buf_free, NULL); m->m_data = (char *) sf->kva + pgoff; - m->m_flags |= M_EXT; m->m_pkthdr.len = m->m_len = xfsize; /* * Add the buffer to the socket buffer chain. |