rds: recycle FMRs through lockless lists

FRM allocation and recycling is performance critical and fairly lock intensive. The current code has a per connection lock that all processes bang on and it becomes a major bottleneck on large systems. This changes things to use a number of cmpxchg based lists instead, allowing us to go through the whole FMR lifecycle without locking inside RDS. Zach Brown pointed out that our usage of cmpxchg for xlist removal is racey if someone manages to remove and add back an FMR struct into the list while another CPU can see the FMR's address at the head of the list. The second CPU might assume the list hasn't changed when in fact any number of operations might have happened in between the deletion and reinsertion. This commit maintains a per cpu count of CPUs that are currently in xlist removal, and establishes a grace period to make sure that nobody can see an entry we have just removed from the list. Signed-off-by: Chris Mason <chris.mason@oracle.com>
author: Chris Mason <chris.mason@oracle.com> 2010-06-11 11:17:59 -0700
committer: Andy Grover <andy.grover@oracle.com> 2010-09-08 18:15:28 -0700
commit: 6fa70da6081bbcf948801fd5ee0be4d222298a43 (patch)
tree: 519fef6c63abe15d80173ad25c7cd5aae673df55 /net/rds/xlist.h
parent: 0f4b1c7e89e699f588807a914ec6e6396c851a72 (diff)
download: op-kernel-dev-6fa70da6081bbcf948801fd5ee0be4d222298a43.zip
op-kernel-dev-6fa70da6081bbcf948801fd5ee0be4d222298a43.tar.gz
1 files changed, 110 insertions, 0 deletions
diff --git a/net/rds/xlist.h b/net/rds/xlist.h
new file mode 100644
index 0000000..8c21aca
--- /dev/null
+++ b/net/rds/xlist.h
@@ -0,0 +1,110 @@
+#ifndef _LINUX_XLIST_H
+#define _LINUX_XLIST_H
+
+#include <linux/stddef.h>
+#include <linux/poison.h>
+#include <linux/prefetch.h>
+#include <asm/system.h>
+
+struct xlist_head {
+	struct xlist_head *next;
+};
+
+/*
+ * XLIST_PTR_TAIL can be used to prevent double insertion.  See
+ * xlist_protect()
+ */
+#define XLIST_PTR_TAIL ((struct xlist_head *)0x1)
+
+static inline void xlist_add(struct xlist_head *new, struct xlist_head *tail, struct xlist_head *head)
+{
+	struct xlist_head *cur;
+	struct xlist_head *check;
+
+	while (1) {
+		cur = head->next;
+		tail->next = cur;
+		check = cmpxchg(&head->next, cur, new);
+		if (check == cur)
+			break;
+	}
+}
+
+/*
+ * To avoid duplicate insertion by two CPUs of the same xlist item
+ * you can call xlist_protect.  It will stuff XLIST_PTR_TAIL
+ * into the entry->next pointer with xchg, and only return 1
+ * if there was a NULL there before.
+ *
+ * if xlist_protect returns zero, someone else is busy working
+ * on this entry.  Getting a NULL into the entry in a race
+ * free manner is the caller's job.
+ */
+static inline int xlist_protect(struct xlist_head *entry)
+{
+	struct xlist_head *val;
+
+	val = xchg(&entry->next, XLIST_PTR_TAIL);
+	if (val == NULL)
+		return 1;
+	return 0;
+}
+
+static inline struct xlist_head *xlist_del_head(struct xlist_head *head)
+{
+	struct xlist_head *cur;
+	struct xlist_head *check;
+	struct xlist_head *next;
+
+	while (1) {
+		cur = head->next;
+		if (!cur)
+			goto out;
+
+		if (cur == XLIST_PTR_TAIL) {
+			cur = NULL;
+			goto out;
+		}
+
+		next = cur->next;
+		check = cmpxchg(&head->next, cur, next);
+		if (check == cur)
+			goto out;
+	}
+out:
+	return cur;
+}
+
+static inline struct xlist_head *xlist_del_head_fast(struct xlist_head *head)
+{
+	struct xlist_head *cur;
+
+	cur = head->next;
+	if (!cur || cur == XLIST_PTR_TAIL)
+		return NULL;
+
+	head->next = cur->next;
+	return cur;
+}
+
+static inline void xlist_splice(struct xlist_head *list,
+				struct xlist_head *head)
+{
+	struct xlist_head *cur;
+
+	WARN_ON(head->next);
+	cur = xchg(&list->next, NULL);
+	head->next = cur;
+}
+
+static inline void INIT_XLIST_HEAD(struct xlist_head *list)
+{
+	list->next = NULL;
+}
+
+static inline int xlist_empty(struct xlist_head *head)
+{
+	return head->next == NULL || head->next == XLIST_PTR_TAIL;
+}
+
+#endif
author	Chris Mason <chris.mason@oracle.com>	2010-06-11 11:17:59 -0700
committer	Andy Grover <andy.grover@oracle.com>	2010-09-08 18:15:28 -0700
commit	6fa70da6081bbcf948801fd5ee0be4d222298a43 (patch)
tree	519fef6c63abe15d80173ad25c7cd5aae673df55 /net/rds/xlist.h
parent	0f4b1c7e89e699f588807a914ec6e6396c851a72 (diff)
download	op-kernel-dev-6fa70da6081bbcf948801fd5ee0be4d222298a43.zip op-kernel-dev-6fa70da6081bbcf948801fd5ee0be4d222298a43.tar.gz