summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/fcntl.c9
-rw-r--r--fs/file.c34
-rw-r--r--fs/open.c8
-rw-r--r--include/linux/file.h28
-rw-r--r--include/linux/init_task.h10
-rw-r--r--kernel/fork.c8
6 files changed, 55 insertions, 42 deletions
diff --git a/fs/fcntl.c b/fs/fcntl.c
index dc4a700..03c7895 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -73,8 +73,8 @@ repeat:
* orig_start..fdt->next_fd
*/
start = orig_start;
- if (start < fdt->next_fd)
- start = fdt->next_fd;
+ if (start < files->next_fd)
+ start = files->next_fd;
newfd = start;
if (start < fdt->max_fdset) {
@@ -102,9 +102,8 @@ repeat:
* we reacquire the fdtable pointer and use it while holding
* the lock, no one can free it during that time.
*/
- fdt = files_fdtable(files);
- if (start <= fdt->next_fd)
- fdt->next_fd = newfd + 1;
+ if (start <= files->next_fd)
+ files->next_fd = newfd + 1;
error = newfd;
diff --git a/fs/file.c b/fs/file.c
index cea7cbe..bbc7433 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -125,7 +125,8 @@ static void free_fdtable_rcu(struct rcu_head *rcu)
kmem_cache_free(files_cachep, fdt->free_files);
return;
}
- if (fdt->max_fdset <= __FD_SETSIZE && fdt->max_fds <= NR_OPEN_DEFAULT) {
+ if (fdt->max_fdset <= EMBEDDED_FD_SET_SIZE &&
+ fdt->max_fds <= NR_OPEN_DEFAULT) {
/*
* The fdtable was embedded
*/
@@ -155,8 +156,9 @@ static void free_fdtable_rcu(struct rcu_head *rcu)
void free_fdtable(struct fdtable *fdt)
{
- if (fdt->free_files || fdt->max_fdset > __FD_SETSIZE ||
- fdt->max_fds > NR_OPEN_DEFAULT)
+ if (fdt->free_files ||
+ fdt->max_fdset > EMBEDDED_FD_SET_SIZE ||
+ fdt->max_fds > NR_OPEN_DEFAULT)
call_rcu(&fdt->rcu, free_fdtable_rcu);
}
@@ -199,7 +201,6 @@ static void copy_fdtable(struct fdtable *nfdt, struct fdtable *fdt)
(nfdt->max_fds - fdt->max_fds) *
sizeof(struct file *));
}
- nfdt->next_fd = fdt->next_fd;
}
/*
@@ -220,11 +221,9 @@ fd_set * alloc_fdset(int num)
void free_fdset(fd_set *array, int num)
{
- int size = num / 8;
-
- if (num <= __FD_SETSIZE) /* Don't free an embedded fdset */
+ if (num <= EMBEDDED_FD_SET_SIZE) /* Don't free an embedded fdset */
return;
- else if (size <= PAGE_SIZE)
+ else if (num <= 8 * PAGE_SIZE)
kfree(array);
else
vfree(array);
@@ -237,22 +236,17 @@ static struct fdtable *alloc_fdtable(int nr)
fd_set *new_openset = NULL, *new_execset = NULL;
struct file **new_fds;
- fdt = kmalloc(sizeof(*fdt), GFP_KERNEL);
+ fdt = kzalloc(sizeof(*fdt), GFP_KERNEL);
if (!fdt)
goto out;
- memset(fdt, 0, sizeof(*fdt));
- nfds = __FD_SETSIZE;
+ nfds = 8 * L1_CACHE_BYTES;
/* Expand to the max in easy steps */
- do {
- if (nfds < (PAGE_SIZE * 8))
- nfds = PAGE_SIZE * 8;
- else {
- nfds = nfds * 2;
- if (nfds > NR_OPEN)
- nfds = NR_OPEN;
- }
- } while (nfds <= nr);
+ while (nfds <= nr) {
+ nfds = nfds * 2;
+ if (nfds > NR_OPEN)
+ nfds = NR_OPEN;
+ }
new_openset = alloc_fdset(nfds);
new_execset = alloc_fdset(nfds);
diff --git a/fs/open.c b/fs/open.c
index 70e0230..1091dad 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -973,7 +973,7 @@ repeat:
fdt = files_fdtable(files);
fd = find_next_zero_bit(fdt->open_fds->fds_bits,
fdt->max_fdset,
- fdt->next_fd);
+ files->next_fd);
/*
* N.B. For clone tasks sharing a files structure, this test
@@ -998,7 +998,7 @@ repeat:
FD_SET(fd, fdt->open_fds);
FD_CLR(fd, fdt->close_on_exec);
- fdt->next_fd = fd + 1;
+ files->next_fd = fd + 1;
#if 1
/* Sanity check */
if (fdt->fd[fd] != NULL) {
@@ -1019,8 +1019,8 @@ static void __put_unused_fd(struct files_struct *files, unsigned int fd)
{
struct fdtable *fdt = files_fdtable(files);
__FD_CLR(fd, fdt->open_fds);
- if (fd < fdt->next_fd)
- fdt->next_fd = fd;
+ if (fd < files->next_fd)
+ files->next_fd = fd;
}
void fastcall put_unused_fd(unsigned int fd)
diff --git a/include/linux/file.h b/include/linux/file.h
index 9901b85..9f7c251 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -10,6 +10,7 @@
#include <linux/compiler.h>
#include <linux/spinlock.h>
#include <linux/rcupdate.h>
+#include <linux/types.h>
/*
* The default fd array needs to be at least BITS_PER_LONG,
@@ -17,10 +18,22 @@
*/
#define NR_OPEN_DEFAULT BITS_PER_LONG
+/*
+ * The embedded_fd_set is a small fd_set,
+ * suitable for most tasks (which open <= BITS_PER_LONG files)
+ */
+struct embedded_fd_set {
+ unsigned long fds_bits[1];
+};
+
+/*
+ * More than this number of fds: we use a separately allocated fd_set
+ */
+#define EMBEDDED_FD_SET_SIZE (BITS_PER_BYTE * sizeof(struct embedded_fd_set))
+
struct fdtable {
unsigned int max_fds;
int max_fdset;
- int next_fd;
struct file ** fd; /* current fd array */
fd_set *close_on_exec;
fd_set *open_fds;
@@ -33,13 +46,20 @@ struct fdtable {
* Open file table structure
*/
struct files_struct {
+ /*
+ * read mostly part
+ */
atomic_t count;
struct fdtable *fdt;
struct fdtable fdtab;
- fd_set close_on_exec_init;
- fd_set open_fds_init;
+ /*
+ * written part on a separate cache line in SMP
+ */
+ spinlock_t file_lock ____cacheline_aligned_in_smp;
+ int next_fd;
+ struct embedded_fd_set close_on_exec_init;
+ struct embedded_fd_set open_fds_init;
struct file * fd_array[NR_OPEN_DEFAULT];
- spinlock_t file_lock; /* Protects concurrent writers. Nests inside tsk->alloc_lock */
};
#define files_fdtable(files) (rcu_dereference((files)->fdt))
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index dcfd2ec..92146f3 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -7,11 +7,10 @@
#define INIT_FDTABLE \
{ \
.max_fds = NR_OPEN_DEFAULT, \
- .max_fdset = __FD_SETSIZE, \
- .next_fd = 0, \
+ .max_fdset = EMBEDDED_FD_SET_SIZE, \
.fd = &init_files.fd_array[0], \
- .close_on_exec = &init_files.close_on_exec_init, \
- .open_fds = &init_files.open_fds_init, \
+ .close_on_exec = (fd_set *)&init_files.close_on_exec_init, \
+ .open_fds = (fd_set *)&init_files.open_fds_init, \
.rcu = RCU_HEAD_INIT, \
.free_files = NULL, \
.next = NULL, \
@@ -20,9 +19,10 @@
#define INIT_FILES \
{ \
.count = ATOMIC_INIT(1), \
- .file_lock = SPIN_LOCK_UNLOCKED, \
.fdt = &init_files.fdtab, \
.fdtab = INIT_FDTABLE, \
+ .file_lock = SPIN_LOCK_UNLOCKED, \
+ .next_fd = 0, \
.close_on_exec_init = { { 0, } }, \
.open_fds_init = { { 0, } }, \
.fd_array = { NULL, } \
diff --git a/kernel/fork.c b/kernel/fork.c
index 9bd7b65..c79ae0b 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -607,12 +607,12 @@ static struct files_struct *alloc_files(void)
atomic_set(&newf->count, 1);
spin_lock_init(&newf->file_lock);
+ newf->next_fd = 0;
fdt = &newf->fdtab;
- fdt->next_fd = 0;
fdt->max_fds = NR_OPEN_DEFAULT;
- fdt->max_fdset = __FD_SETSIZE;
- fdt->close_on_exec = &newf->close_on_exec_init;
- fdt->open_fds = &newf->open_fds_init;
+ fdt->max_fdset = EMBEDDED_FD_SET_SIZE;
+ fdt->close_on_exec = (fd_set *)&newf->close_on_exec_init;
+ fdt->open_fds = (fd_set *)&newf->open_fds_init;
fdt->fd = &newf->fd_array[0];
INIT_RCU_HEAD(&fdt->rcu);
fdt->free_files = NULL;
OpenPOWER on IntegriCloud