diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2010-10-23 01:26:47 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-10-23 01:26:47 -0700 |
commit | ab34c02afeafd047a831e6fe2a4dd875ce86bee0 (patch) | |
tree | 833e385e8387e3d4d4531ade23c1d92fba8ed075 | |
parent | 7f3883962870dd28b5f2322ac44a9d03640ef448 (diff) | |
parent | 6b81e14e645016597c81e71cd27ee5c57c3a3c36 (diff) | |
download | op-kernel-dev-ab34c02afeafd047a831e6fe2a4dd875ce86bee0.zip op-kernel-dev-ab34c02afeafd047a831e6fe2a4dd875ce86bee0.tar.gz |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ryusuke/nilfs2
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ryusuke/nilfs2: (36 commits)
nilfs2: eliminate sparse warning - "context imbalance"
nilfs2: eliminate sparse warnings - "symbol not declared"
nilfs2: get rid of bdi from nilfs object
nilfs2: change license of exported header file
nilfs2: add bdev freeze/thaw support
nilfs2: accept 64-bit checkpoint numbers in cp mount option
nilfs2: remove own inode allocator and destructor for metadata files
nilfs2: get rid of back pointer to writable sb instance
nilfs2: get rid of mi_nilfs back pointer to nilfs object
nilfs2: see state of root dentry for mount check of snapshots
nilfs2: use iget for all metadata files
nilfs2: get rid of GCDAT inode
nilfs2: add routines to redirect access to buffers of DAT file
nilfs2: add routines to roll back state of DAT file
nilfs2: add routines to save and restore bmap state
nilfs2: do not allocate nilfs_mdt_info structure to gc-inodes
nilfs2: allow nilfs_clear_inode to clear metadata file inodes
nilfs2: get rid of snapshot mount flag
nilfs2: simplify life cycle management of nilfs object
nilfs2: do not allocate multiple super block instances for a device
...
33 files changed, 1339 insertions, 1257 deletions
diff --git a/fs/nilfs2/Makefile b/fs/nilfs2/Makefile index df3e62c..85c9873 100644 --- a/fs/nilfs2/Makefile +++ b/fs/nilfs2/Makefile @@ -2,4 +2,4 @@ obj-$(CONFIG_NILFS2_FS) += nilfs2.o nilfs2-y := inode.o file.o dir.o super.o namei.o page.o mdt.o \ btnode.o bmap.o btree.o direct.o dat.o recovery.o \ the_nilfs.o segbuf.o segment.o cpfile.o sufile.o \ - ifile.o alloc.o gcinode.o ioctl.o gcdat.o + ifile.o alloc.o gcinode.o ioctl.o diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c index 3dbdc1d..8b782b0 100644 --- a/fs/nilfs2/bmap.c +++ b/fs/nilfs2/bmap.c @@ -533,18 +533,20 @@ void nilfs_bmap_init_gc(struct nilfs_bmap *bmap) nilfs_btree_init_gc(bmap); } -void nilfs_bmap_init_gcdat(struct nilfs_bmap *gcbmap, struct nilfs_bmap *bmap) +void nilfs_bmap_save(const struct nilfs_bmap *bmap, + struct nilfs_bmap_store *store) { - memcpy(gcbmap, bmap, sizeof(*bmap)); - init_rwsem(&gcbmap->b_sem); - lockdep_set_class(&bmap->b_sem, &nilfs_bmap_dat_lock_key); - gcbmap->b_inode = &NILFS_BMAP_I(gcbmap)->vfs_inode; + memcpy(store->data, bmap->b_u.u_data, sizeof(store->data)); + store->last_allocated_key = bmap->b_last_allocated_key; + store->last_allocated_ptr = bmap->b_last_allocated_ptr; + store->state = bmap->b_state; } -void nilfs_bmap_commit_gcdat(struct nilfs_bmap *gcbmap, struct nilfs_bmap *bmap) +void nilfs_bmap_restore(struct nilfs_bmap *bmap, + const struct nilfs_bmap_store *store) { - memcpy(bmap, gcbmap, sizeof(*bmap)); - init_rwsem(&bmap->b_sem); - lockdep_set_class(&bmap->b_sem, &nilfs_bmap_dat_lock_key); - bmap->b_inode = &NILFS_BMAP_I(bmap)->vfs_inode; + memcpy(bmap->b_u.u_data, store->data, sizeof(store->data)); + bmap->b_last_allocated_key = store->last_allocated_key; + bmap->b_last_allocated_ptr = store->last_allocated_ptr; + bmap->b_state = store->state; } diff --git a/fs/nilfs2/bmap.h b/fs/nilfs2/bmap.h index a20569b..bde1c0aa2 100644 --- a/fs/nilfs2/bmap.h +++ b/fs/nilfs2/bmap.h @@ -135,6 +135,12 @@ struct nilfs_bmap { /* state */ #define NILFS_BMAP_DIRTY 0x00000001 +struct nilfs_bmap_store { + __le64 data[NILFS_BMAP_SIZE / sizeof(__le64)]; + __u64 last_allocated_key; + __u64 last_allocated_ptr; + int state; +}; int nilfs_bmap_test_and_clear_dirty(struct nilfs_bmap *); int nilfs_bmap_read(struct nilfs_bmap *, struct nilfs_inode *); @@ -153,9 +159,9 @@ int nilfs_bmap_lookup_at_level(struct nilfs_bmap *, __u64, int, __u64 *); int nilfs_bmap_mark(struct nilfs_bmap *, __u64, int); void nilfs_bmap_init_gc(struct nilfs_bmap *); -void nilfs_bmap_init_gcdat(struct nilfs_bmap *, struct nilfs_bmap *); -void nilfs_bmap_commit_gcdat(struct nilfs_bmap *, struct nilfs_bmap *); +void nilfs_bmap_save(const struct nilfs_bmap *, struct nilfs_bmap_store *); +void nilfs_bmap_restore(struct nilfs_bmap *, const struct nilfs_bmap_store *); static inline int nilfs_bmap_lookup(struct nilfs_bmap *bmap, __u64 key, __u64 *ptr) diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c index f78ab10..5115814c 100644 --- a/fs/nilfs2/btnode.c +++ b/fs/nilfs2/btnode.c @@ -37,15 +37,7 @@ void nilfs_btnode_cache_init_once(struct address_space *btnc) { - memset(btnc, 0, sizeof(*btnc)); - INIT_RADIX_TREE(&btnc->page_tree, GFP_ATOMIC); - spin_lock_init(&btnc->tree_lock); - INIT_LIST_HEAD(&btnc->private_list); - spin_lock_init(&btnc->private_lock); - - spin_lock_init(&btnc->i_mmap_lock); - INIT_RAW_PRIO_TREE_ROOT(&btnc->i_mmap); - INIT_LIST_HEAD(&btnc->i_mmap_nonlinear); + nilfs_mapping_init_once(btnc); } static const struct address_space_operations def_btnode_aops = { @@ -55,12 +47,7 @@ static const struct address_space_operations def_btnode_aops = { void nilfs_btnode_cache_init(struct address_space *btnc, struct backing_dev_info *bdi) { - btnc->host = NULL; /* can safely set to host inode ? */ - btnc->flags = 0; - mapping_set_gfp_mask(btnc, GFP_NOFS); - btnc->assoc_mapping = NULL; - btnc->backing_dev_info = bdi; - btnc->a_ops = &def_btnode_aops; + nilfs_mapping_init(btnc, bdi, &def_btnode_aops); } void nilfs_btnode_cache_clear(struct address_space *btnc) diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c index 1873781..5ff15a8 100644 --- a/fs/nilfs2/cpfile.c +++ b/fs/nilfs2/cpfile.c @@ -863,26 +863,19 @@ int nilfs_cpfile_is_snapshot(struct inode *cpfile, __u64 cno) */ int nilfs_cpfile_change_cpmode(struct inode *cpfile, __u64 cno, int mode) { - struct the_nilfs *nilfs; int ret; - nilfs = NILFS_MDT(cpfile)->mi_nilfs; - switch (mode) { case NILFS_CHECKPOINT: - /* - * Check for protecting existing snapshot mounts: - * ns_mount_mutex is used to make this operation atomic and - * exclusive with a new mount job. Though it doesn't cover - * umount, it's enough for the purpose. - */ - if (nilfs_checkpoint_is_mounted(nilfs, cno, 1)) { - /* Current implementation does not have to protect - plain read-only mounts since they are exclusive - with a read/write mount and are protected from the - cleaner. */ + if (nilfs_checkpoint_is_mounted(cpfile->i_sb, cno)) + /* + * Current implementation does not have to protect + * plain read-only mounts since they are exclusive + * with a read/write mount and are protected from the + * cleaner. + */ ret = -EBUSY; - } else + else ret = nilfs_cpfile_clear_snapshot(cpfile, cno); return ret; case NILFS_SNAPSHOT: @@ -933,27 +926,40 @@ int nilfs_cpfile_get_stat(struct inode *cpfile, struct nilfs_cpstat *cpstat) } /** - * nilfs_cpfile_read - read cpfile inode - * @cpfile: cpfile inode - * @raw_inode: on-disk cpfile inode - */ -int nilfs_cpfile_read(struct inode *cpfile, struct nilfs_inode *raw_inode) -{ - return nilfs_read_inode_common(cpfile, raw_inode); -} - -/** - * nilfs_cpfile_new - create cpfile - * @nilfs: nilfs object + * nilfs_cpfile_read - read or get cpfile inode + * @sb: super block instance * @cpsize: size of a checkpoint entry + * @raw_inode: on-disk cpfile inode + * @inodep: buffer to store the inode */ -struct inode *nilfs_cpfile_new(struct the_nilfs *nilfs, size_t cpsize) +int nilfs_cpfile_read(struct super_block *sb, size_t cpsize, + struct nilfs_inode *raw_inode, struct inode **inodep) { struct inode *cpfile; + int err; + + cpfile = nilfs_iget_locked(sb, NULL, NILFS_CPFILE_INO); + if (unlikely(!cpfile)) + return -ENOMEM; + if (!(cpfile->i_state & I_NEW)) + goto out; + + err = nilfs_mdt_init(cpfile, NILFS_MDT_GFP, 0); + if (err) + goto failed; - cpfile = nilfs_mdt_new(nilfs, NULL, NILFS_CPFILE_INO, 0); - if (cpfile) - nilfs_mdt_set_entry_size(cpfile, cpsize, - sizeof(struct nilfs_cpfile_header)); - return cpfile; + nilfs_mdt_set_entry_size(cpfile, cpsize, + sizeof(struct nilfs_cpfile_header)); + + err = nilfs_read_inode_common(cpfile, raw_inode); + if (err) + goto failed; + + unlock_new_inode(cpfile); + out: + *inodep = cpfile; + return 0; + failed: + iget_failed(cpfile); + return err; } diff --git a/fs/nilfs2/cpfile.h b/fs/nilfs2/cpfile.h index bc0809e..a242b9a 100644 --- a/fs/nilfs2/cpfile.h +++ b/fs/nilfs2/cpfile.h @@ -40,7 +40,7 @@ int nilfs_cpfile_get_stat(struct inode *, struct nilfs_cpstat *); ssize_t nilfs_cpfile_get_cpinfo(struct inode *, __u64 *, int, void *, unsigned, size_t); -int nilfs_cpfile_read(struct inode *cpfile, struct nilfs_inode *raw_inode); -struct inode *nilfs_cpfile_new(struct the_nilfs *nilfs, size_t cpsize); +int nilfs_cpfile_read(struct super_block *sb, size_t cpsize, + struct nilfs_inode *raw_inode, struct inode **inodep); #endif /* _NILFS_CPFILE_H */ diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c index 0131467..49c844d 100644 --- a/fs/nilfs2/dat.c +++ b/fs/nilfs2/dat.c @@ -36,6 +36,7 @@ struct nilfs_dat_info { struct nilfs_mdt_info mi; struct nilfs_palloc_cache palloc_cache; + struct nilfs_shadow_map shadow; }; static inline struct nilfs_dat_info *NILFS_DAT_I(struct inode *dat) @@ -102,7 +103,8 @@ void nilfs_dat_abort_alloc(struct inode *dat, struct nilfs_palloc_req *req) nilfs_palloc_abort_alloc_entry(dat, req); } -void nilfs_dat_commit_free(struct inode *dat, struct nilfs_palloc_req *req) +static void nilfs_dat_commit_free(struct inode *dat, + struct nilfs_palloc_req *req) { struct nilfs_dat_entry *entry; void *kaddr; @@ -327,6 +329,23 @@ int nilfs_dat_move(struct inode *dat, __u64 vblocknr, sector_t blocknr) ret = nilfs_palloc_get_entry_block(dat, vblocknr, 0, &entry_bh); if (ret < 0) return ret; + + /* + * The given disk block number (blocknr) is not yet written to + * the device at this point. + * + * To prevent nilfs_dat_translate() from returning the + * uncommited block number, this makes a copy of the entry + * buffer and redirects nilfs_dat_translate() to the copy. + */ + if (!buffer_nilfs_redirected(entry_bh)) { + ret = nilfs_mdt_freeze_buffer(dat, entry_bh); + if (ret) { + brelse(entry_bh); + return ret; + } + } + kaddr = kmap_atomic(entry_bh->b_page, KM_USER0); entry = nilfs_palloc_block_get_entry(dat, vblocknr, entry_bh, kaddr); if (unlikely(entry->de_blocknr == cpu_to_le64(0))) { @@ -371,7 +390,7 @@ int nilfs_dat_move(struct inode *dat, __u64 vblocknr, sector_t blocknr) */ int nilfs_dat_translate(struct inode *dat, __u64 vblocknr, sector_t *blocknrp) { - struct buffer_head *entry_bh; + struct buffer_head *entry_bh, *bh; struct nilfs_dat_entry *entry; sector_t blocknr; void *kaddr; @@ -381,6 +400,15 @@ int nilfs_dat_translate(struct inode *dat, __u64 vblocknr, sector_t *blocknrp) if (ret < 0) return ret; + if (!nilfs_doing_gc() && buffer_nilfs_redirected(entry_bh)) { + bh = nilfs_mdt_get_frozen_buffer(dat, entry_bh); + if (bh) { + WARN_ON(!buffer_uptodate(bh)); + brelse(entry_bh); + entry_bh = bh; + } + } + kaddr = kmap_atomic(entry_bh->b_page, KM_USER0); entry = nilfs_palloc_block_get_entry(dat, vblocknr, entry_bh, kaddr); blocknr = le64_to_cpu(entry->de_blocknr); @@ -436,38 +464,48 @@ ssize_t nilfs_dat_get_vinfo(struct inode *dat, void *buf, unsigned visz, } /** - * nilfs_dat_read - read dat inode - * @dat: dat inode - * @raw_inode: on-disk dat inode - */ -int nilfs_dat_read(struct inode *dat, struct nilfs_inode *raw_inode) -{ - return nilfs_read_inode_common(dat, raw_inode); -} - -/** - * nilfs_dat_new - create dat file - * @nilfs: nilfs object + * nilfs_dat_read - read or get dat inode + * @sb: super block instance * @entry_size: size of a dat entry + * @raw_inode: on-disk dat inode + * @inodep: buffer to store the inode */ -struct inode *nilfs_dat_new(struct the_nilfs *nilfs, size_t entry_size) +int nilfs_dat_read(struct super_block *sb, size_t entry_size, + struct nilfs_inode *raw_inode, struct inode **inodep) { static struct lock_class_key dat_lock_key; struct inode *dat; struct nilfs_dat_info *di; int err; - dat = nilfs_mdt_new(nilfs, NULL, NILFS_DAT_INO, sizeof(*di)); - if (dat) { - err = nilfs_palloc_init_blockgroup(dat, entry_size); - if (unlikely(err)) { - nilfs_mdt_destroy(dat); - return NULL; - } + dat = nilfs_iget_locked(sb, NULL, NILFS_DAT_INO); + if (unlikely(!dat)) + return -ENOMEM; + if (!(dat->i_state & I_NEW)) + goto out; - di = NILFS_DAT_I(dat); - lockdep_set_class(&di->mi.mi_sem, &dat_lock_key); - nilfs_palloc_setup_cache(dat, &di->palloc_cache); - } - return dat; + err = nilfs_mdt_init(dat, NILFS_MDT_GFP, sizeof(*di)); + if (err) + goto failed; + + err = nilfs_palloc_init_blockgroup(dat, entry_size); + if (err) + goto failed; + + di = NILFS_DAT_I(dat); + lockdep_set_class(&di->mi.mi_sem, &dat_lock_key); + nilfs_palloc_setup_cache(dat, &di->palloc_cache); + nilfs_mdt_setup_shadow_map(dat, &di->shadow); + + err = nilfs_read_inode_common(dat, raw_inode); + if (err) + goto failed; + + unlock_new_inode(dat); + out: + *inodep = dat; + return 0; + failed: + iget_failed(dat); + return err; } diff --git a/fs/nilfs2/dat.h b/fs/nilfs2/dat.h index d31c3aa..cbd8e97 100644 --- a/fs/nilfs2/dat.h +++ b/fs/nilfs2/dat.h @@ -53,7 +53,7 @@ int nilfs_dat_freev(struct inode *, __u64 *, size_t); int nilfs_dat_move(struct inode *, __u64, sector_t); ssize_t nilfs_dat_get_vinfo(struct inode *, void *, unsigned, size_t); -int nilfs_dat_read(struct inode *dat, struct nilfs_inode *raw_inode); -struct inode *nilfs_dat_new(struct the_nilfs *nilfs, size_t entry_size); +int nilfs_dat_read(struct super_block *sb, size_t entry_size, + struct nilfs_inode *raw_inode, struct inode **inodep); #endif /* _NILFS_DAT_H */ diff --git a/fs/nilfs2/export.h b/fs/nilfs2/export.h new file mode 100644 index 0000000..a71cc41 --- /dev/null +++ b/fs/nilfs2/export.h @@ -0,0 +1,17 @@ +#ifndef NILFS_EXPORT_H +#define NILFS_EXPORT_H + +#include <linux/exportfs.h> + +extern const struct export_operations nilfs_export_ops; + +struct nilfs_fid { + u64 cno; + u64 ino; + u32 gen; + + u32 parent_gen; + u64 parent_ino; +} __attribute__ ((packed)); + +#endif diff --git a/fs/nilfs2/gcdat.c b/fs/nilfs2/gcdat.c deleted file mode 100644 index 84a45d1..0000000 --- a/fs/nilfs2/gcdat.c +++ /dev/null @@ -1,87 +0,0 @@ -/* - * gcdat.c - NILFS shadow DAT inode for GC - * - * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * - * Written by Seiji Kihara <kihara@osrg.net>, Amagai Yoshiji <amagai@osrg.net>, - * and Ryusuke Konishi <ryusuke@osrg.net>. - * - */ - -#include <linux/buffer_head.h> -#include "nilfs.h" -#include "page.h" -#include "mdt.h" - -int nilfs_init_gcdat_inode(struct the_nilfs *nilfs) -{ - struct inode *dat = nilfs->ns_dat, *gcdat = nilfs->ns_gc_dat; - struct nilfs_inode_info *dii = NILFS_I(dat), *gii = NILFS_I(gcdat); - int err; - - gcdat->i_state = 0; - gcdat->i_blocks = dat->i_blocks; - gii->i_flags = dii->i_flags; - gii->i_state = dii->i_state | (1 << NILFS_I_GCDAT); - gii->i_cno = 0; - nilfs_bmap_init_gcdat(gii->i_bmap, dii->i_bmap); - err = nilfs_copy_dirty_pages(gcdat->i_mapping, dat->i_mapping); - if (unlikely(err)) - return err; - - return nilfs_copy_dirty_pages(&gii->i_btnode_cache, - &dii->i_btnode_cache); -} - -void nilfs_commit_gcdat_inode(struct the_nilfs *nilfs) -{ - struct inode *dat = nilfs->ns_dat, *gcdat = nilfs->ns_gc_dat; - struct nilfs_inode_info *dii = NILFS_I(dat), *gii = NILFS_I(gcdat); - struct address_space *mapping = dat->i_mapping; - struct address_space *gmapping = gcdat->i_mapping; - - down_write(&NILFS_MDT(dat)->mi_sem); - dat->i_blocks = gcdat->i_blocks; - dii->i_flags = gii->i_flags; - dii->i_state = gii->i_state & ~(1 << NILFS_I_GCDAT); - - nilfs_bmap_commit_gcdat(gii->i_bmap, dii->i_bmap); - - nilfs_palloc_clear_cache(dat); - nilfs_palloc_clear_cache(gcdat); - nilfs_clear_dirty_pages(mapping); - nilfs_copy_back_pages(mapping, gmapping); - /* note: mdt dirty flags should be cleared by segctor. */ - - nilfs_clear_dirty_pages(&dii->i_btnode_cache); - nilfs_copy_back_pages(&dii->i_btnode_cache, &gii->i_btnode_cache); - - up_write(&NILFS_MDT(dat)->mi_sem); -} - -void nilfs_clear_gcdat_inode(struct the_nilfs *nilfs) -{ - struct inode *gcdat = nilfs->ns_gc_dat; - struct nilfs_inode_info *gii = NILFS_I(gcdat); - - gcdat->i_state = I_FREEING | I_CLEAR; - gii->i_flags = 0; - - nilfs_palloc_clear_cache(gcdat); - truncate_inode_pages(gcdat->i_mapping, 0); - truncate_inode_pages(&gii->i_btnode_cache, 0); -} diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c index bed3a78..33ad25d 100644 --- a/fs/nilfs2/gcinode.c +++ b/fs/nilfs2/gcinode.c @@ -28,13 +28,6 @@ * gcinodes), and this file provides lookup function of the dummy * inodes and their buffer read function. * - * Since NILFS2 keeps up multiple checkpoints/snapshots across GC, it - * has to treat blocks that belong to a same file but have different - * checkpoint numbers. To avoid interference among generations, dummy - * inodes are managed separately from actual inodes, and their lookup - * function (nilfs_gc_iget) is designed to be specified with a - * checkpoint number argument as well as an inode number. - * * Buffers and pages held by the dummy inodes will be released each * time after they are copied to a new log. Dirty blocks made on the * current generation and the blocks to be moved by GC never overlap @@ -175,125 +168,46 @@ int nilfs_gccache_wait_and_mark_dirty(struct buffer_head *bh) } nilfs_btnode_mark_dirty(bh); } else { - nilfs_mdt_mark_buffer_dirty(bh); + nilfs_mark_buffer_dirty(bh); } return 0; } -/* - * nilfs_init_gccache() - allocate and initialize gc_inode hash table - * @nilfs - the_nilfs - * - * Return Value: On success, 0. - * On error, a negative error code is returned. - */ -int nilfs_init_gccache(struct the_nilfs *nilfs) +int nilfs_init_gcinode(struct inode *inode) { - int loop; - - BUG_ON(nilfs->ns_gc_inodes_h); - - INIT_LIST_HEAD(&nilfs->ns_gc_inodes); - - nilfs->ns_gc_inodes_h = - kmalloc(sizeof(struct hlist_head) * NILFS_GCINODE_HASH_SIZE, - GFP_NOFS); - if (nilfs->ns_gc_inodes_h == NULL) - return -ENOMEM; - - for (loop = 0; loop < NILFS_GCINODE_HASH_SIZE; loop++) - INIT_HLIST_HEAD(&nilfs->ns_gc_inodes_h[loop]); - return 0; -} - -/* - * nilfs_destroy_gccache() - free gc_inode hash table - * @nilfs - the nilfs - */ -void nilfs_destroy_gccache(struct the_nilfs *nilfs) -{ - if (nilfs->ns_gc_inodes_h) { - nilfs_remove_all_gcinode(nilfs); - kfree(nilfs->ns_gc_inodes_h); - nilfs->ns_gc_inodes_h = NULL; - } -} - -static struct inode *alloc_gcinode(struct the_nilfs *nilfs, ino_t ino, - __u64 cno) -{ - struct inode *inode; - struct nilfs_inode_info *ii; - - inode = nilfs_mdt_new_common(nilfs, NULL, ino, GFP_NOFS, 0); - if (!inode) - return NULL; + struct nilfs_inode_info *ii = NILFS_I(inode); + struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs; - inode->i_op = NULL; - inode->i_fop = NULL; + inode->i_mode = S_IFREG; + mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); inode->i_mapping->a_ops = &def_gcinode_aops; + inode->i_mapping->backing_dev_info = inode->i_sb->s_bdi; - ii = NILFS_I(inode); - ii->i_cno = cno; ii->i_flags = 0; - ii->i_state = 1 << NILFS_I_GCINODE; - ii->i_bh = NULL; nilfs_bmap_init_gc(ii->i_bmap); - return inode; -} - -static unsigned long ihash(ino_t ino, __u64 cno) -{ - return hash_long((unsigned long)((ino << 2) + cno), - NILFS_GCINODE_HASH_BITS); -} - -/* - * nilfs_gc_iget() - find or create gc inode with specified (ino,cno) - */ -struct inode *nilfs_gc_iget(struct the_nilfs *nilfs, ino_t ino, __u64 cno) -{ - struct hlist_head *head = nilfs->ns_gc_inodes_h + ihash(ino, cno); - struct hlist_node *node; - struct inode *inode; - - hlist_for_each_entry(inode, node, head, i_hash) { - if (inode->i_ino == ino && NILFS_I(inode)->i_cno == cno) - return inode; - } + /* + * Add the inode to GC inode list. Garbage Collection + * is serialized and no two processes manipulate the + * list simultaneously. + */ + igrab(inode); + list_add(&NILFS_I(inode)->i_dirty, &nilfs->ns_gc_inodes); - inode = alloc_gcinode(nilfs, ino, cno); - if (likely(inode)) { - hlist_add_head(&inode->i_hash, head); - list_add(&NILFS_I(inode)->i_dirty, &nilfs->ns_gc_inodes); - } - return inode; -} - -/* - * nilfs_clear_gcinode() - clear and free a gc inode - */ -void nilfs_clear_gcinode(struct inode *inode) -{ - nilfs_mdt_destroy(inode); + return 0; } -/* - * nilfs_remove_all_gcinode() - remove all inodes from the_nilfs +/** + * nilfs_remove_all_gcinodes() - remove all unprocessed gc inodes */ -void nilfs_remove_all_gcinode(struct the_nilfs *nilfs) +void nilfs_remove_all_gcinodes(struct the_nilfs *nilfs) { - struct hlist_head *head = nilfs->ns_gc_inodes_h; - struct hlist_node *node, *n; - struct inode *inode; - int loop; + struct list_head *head = &nilfs->ns_gc_inodes; + struct nilfs_inode_info *ii; - for (loop = 0; loop < NILFS_GCINODE_HASH_SIZE; loop++, head++) { - hlist_for_each_entry_safe(inode, node, n, head, i_hash) { - hlist_del_init(&inode->i_hash); - list_del_init(&NILFS_I(inode)->i_dirty); - nilfs_clear_gcinode(inode); /* might sleep */ - } + while (!list_empty(head)) { + ii = list_first_entry(head, struct nilfs_inode_info, i_dirty); + list_del_init(&ii->i_dirty); + iput(&ii->vfs_inode); } } diff --git a/fs/nilfs2/ifile.c b/fs/nilfs2/ifile.c index 922d9dd..9f8a2da 100644 --- a/fs/nilfs2/ifile.c +++ b/fs/nilfs2/ifile.c @@ -161,25 +161,46 @@ int nilfs_ifile_get_inode_block(struct inode *ifile, ino_t ino, } /** - * nilfs_ifile_new - create inode file - * @sbi: nilfs_sb_info struct + * nilfs_ifile_read - read or get ifile inode + * @sb: super block instance + * @root: root object * @inode_size: size of an inode + * @raw_inode: on-disk ifile inode + * @inodep: buffer to store the inode */ -struct inode *nilfs_ifile_new(struct nilfs_sb_info *sbi, size_t inode_size) +int nilfs_ifile_read(struct super_block *sb, struct nilfs_root *root, + size_t inode_size, struct nilfs_inode *raw_inode, + struct inode **inodep) { struct inode *ifile; int err; - ifile = nilfs_mdt_new(sbi->s_nilfs, sbi->s_super, NILFS_IFILE_INO, - sizeof(struct nilfs_ifile_info)); - if (ifile) { - err = nilfs_palloc_init_blockgroup(ifile, inode_size); - if (unlikely(err)) { - nilfs_mdt_destroy(ifile); - return NULL; - } - nilfs_palloc_setup_cache(ifile, - &NILFS_IFILE_I(ifile)->palloc_cache); - } - return ifile; + ifile = nilfs_iget_locked(sb, root, NILFS_IFILE_INO); + if (unlikely(!ifile)) + return -ENOMEM; + if (!(ifile->i_state & I_NEW)) + goto out; + + err = nilfs_mdt_init(ifile, NILFS_MDT_GFP, + sizeof(struct nilfs_ifile_info)); + if (err) + goto failed; + + err = nilfs_palloc_init_blockgroup(ifile, inode_size); + if (err) + goto failed; + + nilfs_palloc_setup_cache(ifile, &NILFS_IFILE_I(ifile)->palloc_cache); + + err = nilfs_read_inode_common(ifile, raw_inode); + if (err) + goto failed; + + unlock_new_inode(ifile); + out: + *inodep = ifile; + return 0; + failed: + iget_failed(ifile); + return err; } diff --git a/fs/nilfs2/ifile.h b/fs/nilfs2/ifile.h index cbca32e..59b6f2b 100644 --- a/fs/nilfs2/ifile.h +++ b/fs/nilfs2/ifile.h @@ -49,6 +49,8 @@ int nilfs_ifile_create_inode(struct inode *, ino_t *, struct buffer_head **); int nilfs_ifile_delete_inode(struct inode *, ino_t); int nilfs_ifile_get_inode_block(struct inode *, ino_t, struct buffer_head **); -struct inode *nilfs_ifile_new(struct nilfs_sb_info *sbi, size_t inode_size); +int nilfs_ifile_read(struct super_block *sb, struct nilfs_root *root, + size_t inode_size, struct nilfs_inode *raw_inode, + struct inode **inodep); #endif /* _NILFS_IFILE_H */ diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index eccb2f2..71d4bc8 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -34,6 +34,12 @@ #include "cpfile.h" #include "ifile.h" +struct nilfs_iget_args { + u64 ino; + __u64 cno; + struct nilfs_root *root; + int for_gc; +}; /** * nilfs_get_block() - get a file block on the filesystem (callback function) @@ -279,6 +285,7 @@ struct inode *nilfs_new_inode(struct inode *dir, int mode) struct nilfs_sb_info *sbi = NILFS_SB(sb); struct inode *inode; struct nilfs_inode_info *ii; + struct nilfs_root *root; int err = -ENOMEM; ino_t ino; @@ -289,15 +296,17 @@ struct inode *nilfs_new_inode(struct inode *dir, int mode) mapping_set_gfp_mask(inode->i_mapping, mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS); + root = NILFS_I(dir)->i_root; ii = NILFS_I(inode); ii->i_state = 1 << NILFS_I_NEW; + ii->i_root = root; - err = nilfs_ifile_create_inode(sbi->s_ifile, &ino, &ii->i_bh); + err = nilfs_ifile_create_inode(root->ifile, &ino, &ii->i_bh); if (unlikely(err)) goto failed_ifile_create_inode; /* reference count of i_bh inherits from nilfs_mdt_read_block() */ - atomic_inc(&sbi->s_inodes_count); + atomic_inc(&root->inodes_count); inode_init_owner(inode, dir, mode); inode->i_ino = ino; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; @@ -320,7 +329,6 @@ struct inode *nilfs_new_inode(struct inode *dir, int mode) /* ii->i_file_acl = 0; */ /* ii->i_dir_acl = 0; */ ii->i_dir_start_lookup = 0; - ii->i_cno = 0; nilfs_set_inode_flags(inode); spin_lock(&sbi->s_next_gen_lock); inode->i_generation = sbi->s_next_generation++; @@ -350,16 +358,6 @@ struct inode *nilfs_new_inode(struct inode *dir, int mode) return ERR_PTR(err); } -void nilfs_free_inode(struct inode *inode) -{ - struct super_block *sb = inode->i_sb; - struct nilfs_sb_info *sbi = NILFS_SB(sb); - - /* XXX: check error code? Is there any thing I can do? */ - (void) nilfs_ifile_delete_inode(sbi->s_ifile, inode->i_ino); - atomic_dec(&sbi->s_inodes_count); -} - void nilfs_set_inode_flags(struct inode *inode) { unsigned int flags = NILFS_I(inode)->i_flags; @@ -410,7 +408,6 @@ int nilfs_read_inode_common(struct inode *inode, 0 : le32_to_cpu(raw_inode->i_dir_acl); #endif ii->i_dir_start_lookup = 0; - ii->i_cno = 0; inode->i_generation = le32_to_cpu(raw_inode->i_generation); if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || @@ -424,7 +421,8 @@ int nilfs_read_inode_common(struct inode *inode, return 0; } -static int __nilfs_read_inode(struct super_block *sb, unsigned long ino, +static int __nilfs_read_inode(struct super_block *sb, + struct nilfs_root *root, unsigned long ino, struct inode *inode) { struct nilfs_sb_info *sbi = NILFS_SB(sb); @@ -434,11 +432,11 @@ static int __nilfs_read_inode(struct super_block *sb, unsigned long ino, int err; down_read(&NILFS_MDT(dat)->mi_sem); /* XXX */ - err = nilfs_ifile_get_inode_block(sbi->s_ifile, ino, &bh); + err = nilfs_ifile_get_inode_block(root->ifile, ino, &bh); if (unlikely(err)) goto bad_inode; - raw_inode = nilfs_ifile_map_inode(sbi->s_ifile, ino, bh); + raw_inode = nilfs_ifile_map_inode(root->ifile, ino, bh); err = nilfs_read_inode_common(inode, raw_inode); if (err) @@ -461,14 +459,14 @@ static int __nilfs_read_inode(struct super_block *sb, unsigned long ino, inode, inode->i_mode, huge_decode_dev(le64_to_cpu(raw_inode->i_device_code))); } - nilfs_ifile_unmap_inode(sbi->s_ifile, ino, bh); + nilfs_ifile_unmap_inode(root->ifile, ino, bh); brelse(bh); up_read(&NILFS_MDT(dat)->mi_sem); /* XXX */ nilfs_set_inode_flags(inode); return 0; failed_unmap: - nilfs_ifile_unmap_inode(sbi->s_ifile, ino, bh); + nilfs_ifile_unmap_inode(root->ifile, ino, bh); brelse(bh); bad_inode: @@ -476,18 +474,95 @@ static int __nilfs_read_inode(struct super_block *sb, unsigned long ino, return err; } -struct inode *nilfs_iget(struct super_block *sb, unsigned long ino) +static int nilfs_iget_test(struct inode *inode, void *opaque) +{ + struct nilfs_iget_args *args = opaque; + struct nilfs_inode_info *ii; + + if (args->ino != inode->i_ino || args->root != NILFS_I(inode)->i_root) + return 0; + + ii = NILFS_I(inode); + if (!test_bit(NILFS_I_GCINODE, &ii->i_state)) + return !args->for_gc; + + return args->for_gc && args->cno == ii->i_cno; +} + +static int nilfs_iget_set(struct inode *inode, void *opaque) +{ + struct nilfs_iget_args *args = opaque; + + inode->i_ino = args->ino; + if (args->for_gc) { + NILFS_I(inode)->i_state = 1 << NILFS_I_GCINODE; + NILFS_I(inode)->i_cno = args->cno; + NILFS_I(inode)->i_root = NULL; + } else { + if (args->root && args->ino == NILFS_ROOT_INO) + nilfs_get_root(args->root); + NILFS_I(inode)->i_root = args->root; + } + return 0; +} + +struct inode *nilfs_ilookup(struct super_block *sb, struct nilfs_root *root, + unsigned long ino) +{ + struct nilfs_iget_args args = { + .ino = ino, .root = root, .cno = 0, .for_gc = 0 + }; + + return ilookup5(sb, ino, nilfs_iget_test, &args); +} + +struct inode *nilfs_iget_locked(struct super_block *sb, struct nilfs_root *root, + unsigned long ino) +{ + struct nilfs_iget_args args = { + .ino = ino, .root = root, .cno = 0, .for_gc = 0 + }; + + return iget5_locked(sb, ino, nilfs_iget_test, nilfs_iget_set, &args); +} + +struct inode *nilfs_iget(struct super_block *sb, struct nilfs_root *root, + unsigned long ino) { struct inode *inode; int err; - inode = iget_locked(sb, ino); + inode = nilfs_iget_locked(sb, root, ino); if (unlikely(!inode)) return ERR_PTR(-ENOMEM); if (!(inode->i_state & I_NEW)) return inode; - err = __nilfs_read_inode(sb, ino, inode); + err = __nilfs_read_inode(sb, root, ino, inode); + if (unlikely(err)) { + iget_failed(inode); + return ERR_PTR(err); + } + unlock_new_inode(inode); + return inode; +} + +struct inode *nilfs_iget_for_gc(struct super_block *sb, unsigned long ino, + __u64 cno) +{ + struct nilfs_iget_args args = { + .ino = ino, .root = NULL, .cno = cno, .for_gc = 1 + }; + struct inode *inode; + int err; + + inode = iget5_locked(sb, ino, nilfs_iget_test, nilfs_iget_set, &args); + if (unlikely(!inode)) + return ERR_PTR(-ENOMEM); + if (!(inode->i_state & I_NEW)) + return inode; + + err = nilfs_init_gcinode(inode); if (unlikely(err)) { iget_failed(inode); return ERR_PTR(err); @@ -528,21 +603,20 @@ void nilfs_update_inode(struct inode *inode, struct buffer_head *ibh) { ino_t ino = inode->i_ino; struct nilfs_inode_info *ii = NILFS_I(inode); - struct super_block *sb = inode->i_sb; - struct nilfs_sb_info *sbi = NILFS_SB(sb); + struct inode *ifile = ii->i_root->ifile; struct nilfs_inode *raw_inode; - raw_inode = nilfs_ifile_map_inode(sbi->s_ifile, ino, ibh); + raw_inode = nilfs_ifile_map_inode(ifile, ino, ibh); if (test_and_clear_bit(NILFS_I_NEW, &ii->i_state)) - memset(raw_inode, 0, NILFS_MDT(sbi->s_ifile)->mi_entry_size); + memset(raw_inode, 0, NILFS_MDT(ifile)->mi_entry_size); set_bit(NILFS_I_INODE_DIRTY, &ii->i_state); nilfs_write_inode_common(inode, raw_inode, 0); /* XXX: call with has_bmap = 0 is a workaround to avoid deadlock of bmap. This delays update of i_bmap to just before writing */ - nilfs_ifile_unmap_inode(sbi->s_ifile, ino, ibh); + nilfs_ifile_unmap_inode(ifile, ino, ibh); } #define NILFS_MAX_TRUNCATE_BLOCKS 16384 /* 64MB for 4KB block */ @@ -617,6 +691,7 @@ void nilfs_truncate(struct inode *inode) static void nilfs_clear_inode(struct inode *inode) { struct nilfs_inode_info *ii = NILFS_I(inode); + struct nilfs_mdt_info *mdi = NILFS_MDT(inode); /* * Free resources allocated in nilfs_read_inode(), here. @@ -625,10 +700,16 @@ static void nilfs_clear_inode(struct inode *inode) brelse(ii->i_bh); ii->i_bh = NULL; + if (mdi && mdi->mi_palloc_cache) + nilfs_palloc_destroy_cache(inode); + if (test_bit(NILFS_I_BMAP, &ii->i_state)) nilfs_bmap_clear(ii->i_bmap); nilfs_btnode_cache_clear(&ii->i_btnode_cache); + + if (ii->i_root && inode->i_ino == NILFS_ROOT_INO) + nilfs_put_root(ii->i_root); } void nilfs_evict_inode(struct inode *inode) @@ -637,7 +718,7 @@ void nilfs_evict_inode(struct inode *inode) struct super_block *sb = inode->i_sb; struct nilfs_inode_info *ii = NILFS_I(inode); - if (inode->i_nlink || unlikely(is_bad_inode(inode))) { + if (inode->i_nlink || !ii->i_root || unlikely(is_bad_inode(inode))) { if (inode->i_data.nrpages) truncate_inode_pages(&inode->i_data, 0); end_writeback(inode); @@ -649,12 +730,16 @@ void nilfs_evict_inode(struct inode *inode) if (inode->i_data.nrpages) truncate_inode_pages(&inode->i_data, 0); + /* TODO: some of the following operations may fail. */ nilfs_truncate_bmap(ii, 0); nilfs_mark_inode_dirty(inode); end_writeback(inode); + + nilfs_ifile_delete_inode(ii->i_root->ifile, inode->i_ino); + atomic_dec(&ii->i_root->inodes_count); + nilfs_clear_inode(inode); - nilfs_free_inode(inode); - /* nilfs_free_inode() marks inode buffer dirty */ + if (IS_SYNC(inode)) nilfs_set_transaction_flag(NILFS_TI_SYNC); nilfs_transaction_commit(sb); @@ -700,6 +785,17 @@ out_err: return err; } +int nilfs_permission(struct inode *inode, int mask) +{ + struct nilfs_root *root = NILFS_I(inode)->i_root; + + if ((mask & MAY_WRITE) && root && + root->cno != NILFS_CPTREE_CURRENT_CNO) + return -EROFS; /* snapshot is not writable */ + + return generic_permission(inode, mask, NULL); +} + int nilfs_load_inode_block(struct nilfs_sb_info *sbi, struct inode *inode, struct buffer_head **pbh) { @@ -709,8 +805,8 @@ int nilfs_load_inode_block(struct nilfs_sb_info *sbi, struct inode *inode, spin_lock(&sbi->s_inode_lock); if (ii->i_bh == NULL) { spin_unlock(&sbi->s_inode_lock); - err = nilfs_ifile_get_inode_block(sbi->s_ifile, inode->i_ino, - pbh); + err = nilfs_ifile_get_inode_block(ii->i_root->ifile, + inode->i_ino, pbh); if (unlikely(err)) return err; spin_lock(&sbi->s_inode_lock); @@ -790,7 +886,7 @@ int nilfs_mark_inode_dirty(struct inode *inode) } nilfs_update_inode(inode, ibh); nilfs_mdt_mark_buffer_dirty(ibh); - nilfs_mdt_mark_dirty(sbi->s_ifile); + nilfs_mdt_mark_dirty(NILFS_I(inode)->i_root->ifile); brelse(ibh); return 0; } @@ -808,6 +904,7 @@ int nilfs_mark_inode_dirty(struct inode *inode) void nilfs_dirty_inode(struct inode *inode) { struct nilfs_transaction_info ti; + struct nilfs_mdt_info *mdi = NILFS_MDT(inode); if (is_bad_inode(inode)) { nilfs_warning(inode->i_sb, __func__, @@ -815,6 +912,10 @@ void nilfs_dirty_inode(struct inode *inode) dump_stack(); return; } + if (mdi) { + nilfs_mdt_mark_dirty(inode); + return; + } nilfs_transaction_begin(inode->i_sb, &ti, 0); nilfs_mark_inode_dirty(inode); nilfs_transaction_commit(inode->i_sb); /* never fails */ diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index 0442ee3..3e90f86 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -117,7 +117,7 @@ static int nilfs_ioctl_change_cpmode(struct inode *inode, struct file *filp, if (copy_from_user(&cpmode, argp, sizeof(cpmode))) goto out; - mutex_lock(&nilfs->ns_mount_mutex); + down_read(&inode->i_sb->s_umount); nilfs_transaction_begin(inode->i_sb, &ti, 0); ret = nilfs_cpfile_change_cpmode( @@ -127,7 +127,7 @@ static int nilfs_ioctl_change_cpmode(struct inode *inode, struct file *filp, else nilfs_transaction_commit(inode->i_sb); /* never fails */ - mutex_unlock(&nilfs->ns_mount_mutex); + up_read(&inode->i_sb->s_umount); out: mnt_drop_write(filp->f_path.mnt); return ret; @@ -333,7 +333,7 @@ static int nilfs_ioctl_move_inode_block(struct inode *inode, return 0; } -static int nilfs_ioctl_move_blocks(struct the_nilfs *nilfs, +static int nilfs_ioctl_move_blocks(struct super_block *sb, struct nilfs_argv *argv, void *buf) { size_t nmembs = argv->v_nmembs; @@ -348,7 +348,7 @@ static int nilfs_ioctl_move_blocks(struct the_nilfs *nilfs, for (i = 0, vdesc = buf; i < nmembs; ) { ino = vdesc->vd_ino; cno = vdesc->vd_cno; - inode = nilfs_gc_iget(nilfs, ino, cno); + inode = nilfs_iget_for_gc(sb, ino, cno); if (unlikely(inode == NULL)) { ret = -ENOMEM; goto failed; @@ -356,11 +356,15 @@ static int nilfs_ioctl_move_blocks(struct the_nilfs *nilfs, do { ret = nilfs_ioctl_move_inode_block(inode, vdesc, &buffers); - if (unlikely(ret < 0)) + if (unlikely(ret < 0)) { + iput(inode); goto failed; + } vdesc++; } while (++i < nmembs && vdesc->vd_ino == ino && vdesc->vd_cno == cno); + + iput(inode); /* The inode still remains in GC inode list */ } list_for_each_entry_safe(bh, n, &buffers, b_assoc_buffers) { @@ -566,7 +570,7 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp, } /* - * nilfs_ioctl_move_blocks() will call nilfs_gc_iget(), + * nilfs_ioctl_move_blocks() will call nilfs_iget_for_gc(), * which will operates an inode list without blocking. * To protect the list from concurrent operations, * nilfs_ioctl_move_blocks should be atomic operation. @@ -576,15 +580,16 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp, goto out_free; } - ret = nilfs_ioctl_move_blocks(nilfs, &argv[0], kbufs[0]); + vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); + + ret = nilfs_ioctl_move_blocks(inode->i_sb, &argv[0], kbufs[0]); if (ret < 0) printk(KERN_ERR "NILFS: GC failed during preparation: " "cannot read source blocks: err=%d\n", ret); else ret = nilfs_clean_segments(inode->i_sb, argv, kbufs); - if (ret < 0) - nilfs_remove_all_gcinode(nilfs); + nilfs_remove_all_gcinodes(nilfs); clear_nilfs_gc_running(nilfs); out_free: diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c index d01aff4..39a5b84 100644 --- a/fs/nilfs2/mdt.c +++ b/fs/nilfs2/mdt.c @@ -36,7 +36,6 @@ #define NILFS_MDT_MAX_RA_BLOCKS (16 - 1) -#define INIT_UNUSED_INODE_FIELDS static int nilfs_mdt_insert_new_block(struct inode *inode, unsigned long block, @@ -78,25 +77,11 @@ static int nilfs_mdt_create_block(struct inode *inode, unsigned long block, struct buffer_head *, void *)) { - struct the_nilfs *nilfs = NILFS_MDT(inode)->mi_nilfs; struct super_block *sb = inode->i_sb; struct nilfs_transaction_info ti; struct buffer_head *bh; int err; - if (!sb) { - /* - * Make sure this function is not called from any - * read-only context. - */ - if (!nilfs->ns_writer) { - WARN_ON(1); - err = -EROFS; - goto out; - } - sb = nilfs->ns_writer->s_super; - } - nilfs_transaction_begin(sb, &ti, 0); err = -ENOMEM; @@ -112,7 +97,7 @@ static int nilfs_mdt_create_block(struct inode *inode, unsigned long block, if (buffer_uptodate(bh)) goto failed_bh; - bh->b_bdev = nilfs->ns_bdev; + bh->b_bdev = sb->s_bdev; err = nilfs_mdt_insert_new_block(inode, block, bh, init_block); if (likely(!err)) { get_bh(bh); @@ -129,7 +114,7 @@ static int nilfs_mdt_create_block(struct inode *inode, unsigned long block, err = nilfs_transaction_commit(sb); else nilfs_transaction_abort(sb); - out: + return err; } @@ -167,9 +152,7 @@ nilfs_mdt_submit_block(struct inode *inode, unsigned long blkoff, unlock_buffer(bh); goto failed_bh; } - bh->b_bdev = NILFS_MDT(inode)->mi_nilfs->ns_bdev; - bh->b_blocknr = (sector_t)blknum; - set_buffer_mapped(bh); + map_bh(bh, inode->i_sb, (sector_t)blknum); bh->b_end_io = end_buffer_read_sync; get_bh(bh); @@ -398,35 +381,24 @@ int nilfs_mdt_fetch_dirty(struct inode *inode) static int nilfs_mdt_write_page(struct page *page, struct writeback_control *wbc) { - struct inode *inode = container_of(page->mapping, - struct inode, i_data); - struct super_block *sb = inode->i_sb; - struct the_nilfs *nilfs = NILFS_MDT(inode)->mi_nilfs; - struct nilfs_sb_info *writer = NULL; + struct inode *inode; + struct super_block *sb; int err = 0; redirty_page_for_writepage(wbc, page); unlock_page(page); - if (page->mapping->assoc_mapping) - return 0; /* Do not request flush for shadow page cache */ - if (!sb) { - down_read(&nilfs->ns_writer_sem); - writer = nilfs->ns_writer; - if (!writer) { - up_read(&nilfs->ns_writer_sem); - return -EROFS; - } - sb = writer->s_super; - } + inode = page->mapping->host; + if (!inode) + return 0; + + sb = inode->i_sb; if (wbc->sync_mode == WB_SYNC_ALL) err = nilfs_construct_segment(sb); else if (wbc->for_reclaim) nilfs_flush_segment(sb, inode->i_ino); - if (writer) - up_read(&nilfs->ns_writer_sem); return err; } @@ -439,105 +411,27 @@ static const struct address_space_operations def_mdt_aops = { static const struct inode_operations def_mdt_iops; static const struct file_operations def_mdt_fops; -/* - * NILFS2 uses pseudo inodes for meta data files such as DAT, cpfile, sufile, - * ifile, or gcinodes. This allows the B-tree code and segment constructor - * to treat them like regular files, and this helps to simplify the - * implementation. - * On the other hand, some of the pseudo inodes have an irregular point: - * They don't have valid inode->i_sb pointer because their lifetimes are - * longer than those of the super block structs; they may continue for - * several consecutive mounts/umounts. This would need discussions. - */ -/** - * nilfs_mdt_new_common - allocate a pseudo inode for metadata file - * @nilfs: nilfs object - * @sb: super block instance the metadata file belongs to - * @ino: inode number - * @gfp_mask: gfp mask for data pages - * @objsz: size of the private object attached to inode->i_private - */ -struct inode * -nilfs_mdt_new_common(struct the_nilfs *nilfs, struct super_block *sb, - ino_t ino, gfp_t gfp_mask, size_t objsz) + +int nilfs_mdt_init(struct inode *inode, gfp_t gfp_mask, size_t objsz) { - struct inode *inode = nilfs_alloc_inode_common(nilfs); + struct nilfs_mdt_info *mi; - if (!inode) - return NULL; - else { - struct address_space * const mapping = &inode->i_data; - struct nilfs_mdt_info *mi; - - mi = kzalloc(max(sizeof(*mi), objsz), GFP_NOFS); - if (!mi) { - nilfs_destroy_inode(inode); - return NULL; - } - mi->mi_nilfs = nilfs; - init_rwsem(&mi->mi_sem); - - inode->i_sb = sb; /* sb may be NULL for some meta data files */ - inode->i_blkbits = nilfs->ns_blocksize_bits; - inode->i_flags = 0; - atomic_set(&inode->i_count, 1); - inode->i_nlink = 1; - inode->i_ino = ino; - inode->i_mode = S_IFREG; - inode->i_private = mi; - -#ifdef INIT_UNUSED_INODE_FIELDS - atomic_set(&inode->i_writecount, 0); - inode->i_size = 0; - inode->i_blocks = 0; - inode->i_bytes = 0; - inode->i_generation = 0; -#ifdef CONFIG_QUOTA - memset(&inode->i_dquot, 0, sizeof(inode->i_dquot)); -#endif - inode->i_pipe = NULL; - inode->i_bdev = NULL; - inode->i_cdev = NULL; - inode->i_rdev = 0; -#ifdef CONFIG_SECURITY - inode->i_security = NULL; -#endif - inode->dirtied_when = 0; - - INIT_LIST_HEAD(&inode->i_list); - INIT_LIST_HEAD(&inode->i_sb_list); - inode->i_state = 0; -#endif - - spin_lock_init(&inode->i_lock); - mutex_init(&inode->i_mutex); - init_rwsem(&inode->i_alloc_sem); - - mapping->host = NULL; /* instead of inode */ - mapping->flags = 0; - mapping_set_gfp_mask(mapping, gfp_mask); - mapping->assoc_mapping = NULL; - mapping->backing_dev_info = nilfs->ns_bdi; - - inode->i_mapping = mapping; - } + mi = kzalloc(max(sizeof(*mi), objsz), GFP_NOFS); + if (!mi) + return -ENOMEM; - return inode; -} + init_rwsem(&mi->mi_sem); + inode->i_private = mi; -struct inode *nilfs_mdt_new(struct the_nilfs *nilfs, struct super_block *sb, - ino_t ino, size_t objsz) -{ - struct inode *inode; - - inode = nilfs_mdt_new_common(nilfs, sb, ino, NILFS_MDT_GFP, objsz); - if (!inode) - return NULL; + inode->i_mode = S_IFREG; + mapping_set_gfp_mask(inode->i_mapping, gfp_mask); + inode->i_mapping->backing_dev_info = inode->i_sb->s_bdi; inode->i_op = &def_mdt_iops; inode->i_fop = &def_mdt_fops; inode->i_mapping->a_ops = &def_mdt_aops; - return inode; + + return 0; } void nilfs_mdt_set_entry_size(struct inode *inode, unsigned entry_size, @@ -550,34 +444,159 @@ void nilfs_mdt_set_entry_size(struct inode *inode, unsigned entry_size, mi->mi_first_entry_offset = DIV_ROUND_UP(header_size, entry_size); } -void nilfs_mdt_set_shadow(struct inode *orig, struct inode *shadow) +static const struct address_space_operations shadow_map_aops = { + .sync_page = block_sync_page, +}; + +/** + * nilfs_mdt_setup_shadow_map - setup shadow map and bind it to metadata file + * @inode: inode of the metadata file + * @shadow: shadow mapping + */ +int nilfs_mdt_setup_shadow_map(struct inode *inode, + struct nilfs_shadow_map *shadow) { - shadow->i_mapping->assoc_mapping = orig->i_mapping; - NILFS_I(shadow)->i_btnode_cache.assoc_mapping = - &NILFS_I(orig)->i_btnode_cache; + struct nilfs_mdt_info *mi = NILFS_MDT(inode); + struct backing_dev_info *bdi = inode->i_sb->s_bdi; + + INIT_LIST_HEAD(&shadow->frozen_buffers); + nilfs_mapping_init_once(&shadow->frozen_data); + nilfs_mapping_init(&shadow->frozen_data, bdi, &shadow_map_aops); + nilfs_mapping_init_once(&shadow->frozen_btnodes); + nilfs_mapping_init(&shadow->frozen_btnodes, bdi, &shadow_map_aops); + mi->mi_shadow = shadow; + return 0; } -static void nilfs_mdt_clear(struct inode *inode) +/** + * nilfs_mdt_save_to_shadow_map - copy bmap and dirty pages to shadow map + * @inode: inode of the metadata file + */ +int nilfs_mdt_save_to_shadow_map(struct inode *inode) { + struct nilfs_mdt_info *mi = NILFS_MDT(inode); struct nilfs_inode_info *ii = NILFS_I(inode); + struct nilfs_shadow_map *shadow = mi->mi_shadow; + int ret; - invalidate_mapping_pages(inode->i_mapping, 0, -1); - truncate_inode_pages(inode->i_mapping, 0); + ret = nilfs_copy_dirty_pages(&shadow->frozen_data, inode->i_mapping); + if (ret) + goto out; + + ret = nilfs_copy_dirty_pages(&shadow->frozen_btnodes, + &ii->i_btnode_cache); + if (ret) + goto out; - if (test_bit(NILFS_I_BMAP, &ii->i_state)) - nilfs_bmap_clear(ii->i_bmap); - nilfs_btnode_cache_clear(&ii->i_btnode_cache); + nilfs_bmap_save(ii->i_bmap, &shadow->bmap_store); + out: + return ret; } -void nilfs_mdt_destroy(struct inode *inode) +int nilfs_mdt_freeze_buffer(struct inode *inode, struct buffer_head *bh) { - struct nilfs_mdt_info *mdi = NILFS_MDT(inode); + struct nilfs_shadow_map *shadow = NILFS_MDT(inode)->mi_shadow; + struct buffer_head *bh_frozen; + struct page *page; + int blkbits = inode->i_blkbits; + int ret = -ENOMEM; + + page = grab_cache_page(&shadow->frozen_data, bh->b_page->index); + if (!page) + return ret; + + if (!page_has_buffers(page)) + create_empty_buffers(page, 1 << blkbits, 0); + + bh_frozen = nilfs_page_get_nth_block(page, bh_offset(bh) >> blkbits); + if (bh_frozen) { + if (!buffer_uptodate(bh_frozen)) + nilfs_copy_buffer(bh_frozen, bh); + if (list_empty(&bh_frozen->b_assoc_buffers)) { + list_add_tail(&bh_frozen->b_assoc_buffers, + &shadow->frozen_buffers); + set_buffer_nilfs_redirected(bh); + } else { + brelse(bh_frozen); /* already frozen */ + } + ret = 0; + } + unlock_page(page); + page_cache_release(page); + return ret; +} + +struct buffer_head * +nilfs_mdt_get_frozen_buffer(struct inode *inode, struct buffer_head *bh) +{ + struct nilfs_shadow_map *shadow = NILFS_MDT(inode)->mi_shadow; + struct buffer_head *bh_frozen = NULL; + struct page *page; + int n; + + page = find_lock_page(&shadow->frozen_data, bh->b_page->index); + if (page) { + if (page_has_buffers(page)) { + n = bh_offset(bh) >> inode->i_blkbits; + bh_frozen = nilfs_page_get_nth_block(page, n); + } + unlock_page(page); + page_cache_release(page); + } + return bh_frozen; +} + +static void nilfs_release_frozen_buffers(struct nilfs_shadow_map *shadow) +{ + struct list_head *head = &shadow->frozen_buffers; + struct buffer_head *bh; + + while (!list_empty(head)) { + bh = list_first_entry(head, struct buffer_head, + b_assoc_buffers); + list_del_init(&bh->b_assoc_buffers); + brelse(bh); /* drop ref-count to make it releasable */ + } +} + +/** + * nilfs_mdt_restore_from_shadow_map - restore dirty pages and bmap state + * @inode: inode of the metadata file + */ +void nilfs_mdt_restore_from_shadow_map(struct inode *inode) +{ + struct nilfs_mdt_info *mi = NILFS_MDT(inode); + struct nilfs_inode_info *ii = NILFS_I(inode); + struct nilfs_shadow_map *shadow = mi->mi_shadow; + + down_write(&mi->mi_sem); - if (mdi->mi_palloc_cache) - nilfs_palloc_destroy_cache(inode); - nilfs_mdt_clear(inode); + if (mi->mi_palloc_cache) + nilfs_palloc_clear_cache(inode); + + nilfs_clear_dirty_pages(inode->i_mapping); + nilfs_copy_back_pages(inode->i_mapping, &shadow->frozen_data); + + nilfs_clear_dirty_pages(&ii->i_btnode_cache); + nilfs_copy_back_pages(&ii->i_btnode_cache, &shadow->frozen_btnodes); + + nilfs_bmap_restore(ii->i_bmap, &shadow->bmap_store); + + up_write(&mi->mi_sem); +} + +/** + * nilfs_mdt_clear_shadow_map - truncate pages in shadow map caches + * @inode: inode of the metadata file + */ +void nilfs_mdt_clear_shadow_map(struct inode *inode) +{ + struct nilfs_mdt_info *mi = NILFS_MDT(inode); + struct nilfs_shadow_map *shadow = mi->mi_shadow; - kfree(mdi->mi_bgl); /* kfree(NULL) is safe */ - kfree(mdi); - nilfs_destroy_inode(inode); + down_write(&mi->mi_sem); + nilfs_release_frozen_buffers(shadow); + truncate_inode_pages(&shadow->frozen_data, 0); + truncate_inode_pages(&shadow->frozen_btnodes, 0); + up_write(&mi->mi_sem); } diff --git a/fs/nilfs2/mdt.h b/fs/nilfs2/mdt.h index 6c4bbb0..b13734b 100644 --- a/fs/nilfs2/mdt.h +++ b/fs/nilfs2/mdt.h @@ -28,26 +28,33 @@ #include "nilfs.h" #include "page.h" +struct nilfs_shadow_map { + struct nilfs_bmap_store bmap_store; + struct address_space frozen_data; + struct address_space frozen_btnodes; + struct list_head frozen_buffers; +}; + /** * struct nilfs_mdt_info - on-memory private data of meta data files - * @mi_nilfs: back pointer to the_nilfs struct * @mi_sem: reader/writer semaphore for meta data operations * @mi_bgl: per-blockgroup locking * @mi_entry_size: size of an entry * @mi_first_entry_offset: offset to the first entry * @mi_entries_per_block: number of entries in a block * @mi_palloc_cache: persistent object allocator cache + * @mi_shadow: shadow of bmap and page caches * @mi_blocks_per_group: number of blocks in a group * @mi_blocks_per_desc_block: number of blocks per descriptor block */ struct nilfs_mdt_info { - struct the_nilfs *mi_nilfs; struct rw_semaphore mi_sem; struct blockgroup_lock *mi_bgl; unsigned mi_entry_size; unsigned mi_first_entry_offset; unsigned long mi_entries_per_block; struct nilfs_palloc_cache *mi_palloc_cache; + struct nilfs_shadow_map *mi_shadow; unsigned long mi_blocks_per_group; unsigned long mi_blocks_per_desc_block; }; @@ -59,9 +66,7 @@ static inline struct nilfs_mdt_info *NILFS_MDT(const struct inode *inode) static inline struct the_nilfs *NILFS_I_NILFS(struct inode *inode) { - struct super_block *sb = inode->i_sb; - - return sb ? NILFS_SB(sb)->s_nilfs : NILFS_MDT(inode)->mi_nilfs; + return NILFS_SB(inode->i_sb)->s_nilfs; } /* Default GFP flags using highmem */ @@ -76,14 +81,17 @@ int nilfs_mdt_forget_block(struct inode *, unsigned long); int nilfs_mdt_mark_block_dirty(struct inode *, unsigned long); int nilfs_mdt_fetch_dirty(struct inode *); -struct inode *nilfs_mdt_new(struct the_nilfs *, struct super_block *, ino_t, - size_t); -struct inode *nilfs_mdt_new_common(struct the_nilfs *, struct super_block *, - ino_t, gfp_t, size_t); -void nilfs_mdt_destroy(struct inode *); +int nilfs_mdt_init(struct inode *inode, gfp_t gfp_mask, size_t objsz); void nilfs_mdt_set_entry_size(struct inode *, unsigned, unsigned); -void nilfs_mdt_set_shadow(struct inode *, struct inode *); +int nilfs_mdt_setup_shadow_map(struct inode *inode, + struct nilfs_shadow_map *shadow); +int nilfs_mdt_save_to_shadow_map(struct inode *inode); +void nilfs_mdt_restore_from_shadow_map(struct inode *inode); +void nilfs_mdt_clear_shadow_map(struct inode *inode); +int nilfs_mdt_freeze_buffer(struct inode *inode, struct buffer_head *bh); +struct buffer_head *nilfs_mdt_get_frozen_buffer(struct inode *inode, + struct buffer_head *bh); #define nilfs_mdt_mark_buffer_dirty(bh) nilfs_mark_buffer_dirty(bh) @@ -100,7 +108,7 @@ static inline void nilfs_mdt_clear_dirty(struct inode *inode) static inline __u64 nilfs_mdt_cno(struct inode *inode) { - return NILFS_MDT(inode)->mi_nilfs->ns_cno; + return NILFS_I_NILFS(inode)->ns_cno; } #define nilfs_mdt_bgl_lock(inode, bg) \ diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c index ad6ed2c..185d160 100644 --- a/fs/nilfs2/namei.c +++ b/fs/nilfs2/namei.c @@ -40,7 +40,11 @@ #include <linux/pagemap.h> #include "nilfs.h" +#include "export.h" +#define NILFS_FID_SIZE_NON_CONNECTABLE \ + (offsetof(struct nilfs_fid, parent_gen) / 4) +#define NILFS_FID_SIZE_CONNECTABLE (sizeof(struct nilfs_fid) / 4) static inline int nilfs_add_nondir(struct dentry *dentry, struct inode *inode) { @@ -70,29 +74,13 @@ nilfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) ino = nilfs_inode_by_name(dir, &dentry->d_name); inode = NULL; if (ino) { - inode = nilfs_iget(dir->i_sb, ino); + inode = nilfs_iget(dir->i_sb, NILFS_I(dir)->i_root, ino); if (IS_ERR(inode)) return ERR_CAST(inode); } return d_splice_alias(inode, dentry); } -struct dentry *nilfs_get_parent(struct dentry *child) -{ - unsigned long ino; - struct inode *inode; - struct qstr dotdot = {.name = "..", .len = 2}; - - ino = nilfs_inode_by_name(child->d_inode, &dotdot); - if (!ino) - return ERR_PTR(-ENOENT); - - inode = nilfs_iget(child->d_inode->i_sb, ino); - if (IS_ERR(inode)) - return ERR_CAST(inode); - return d_obtain_alias(inode); -} - /* * By the time this is called, we already have created * the directory cache entry for the new file, but it @@ -468,6 +456,115 @@ out: return err; } +/* + * Export operations + */ +static struct dentry *nilfs_get_parent(struct dentry *child) +{ + unsigned long ino; + struct inode *inode; + struct qstr dotdot = {.name = "..", .len = 2}; + struct nilfs_root *root; + + ino = nilfs_inode_by_name(child->d_inode, &dotdot); + if (!ino) + return ERR_PTR(-ENOENT); + + root = NILFS_I(child->d_inode)->i_root; + + inode = nilfs_iget(child->d_inode->i_sb, root, ino); + if (IS_ERR(inode)) + return ERR_CAST(inode); + + return d_obtain_alias(inode); +} + +static struct dentry *nilfs_get_dentry(struct super_block *sb, u64 cno, + u64 ino, u32 gen) +{ + struct nilfs_root *root; + struct inode *inode; + + if (ino < NILFS_FIRST_INO(sb) && ino != NILFS_ROOT_INO) + return ERR_PTR(-ESTALE); + + root = nilfs_lookup_root(NILFS_SB(sb)->s_nilfs, cno); + if (!root) + return ERR_PTR(-ESTALE); + + inode = nilfs_iget(sb, root, ino); + nilfs_put_root(root); + + if (IS_ERR(inode)) + return ERR_CAST(inode); + if (gen && inode->i_generation != gen) { + iput(inode); + return ERR_PTR(-ESTALE); + } + return d_obtain_alias(inode); +} + +static struct dentry *nilfs_fh_to_dentry(struct super_block *sb, struct fid *fh, + int fh_len, int fh_type) +{ + struct nilfs_fid *fid = (struct nilfs_fid *)fh; + + if ((fh_len != NILFS_FID_SIZE_NON_CONNECTABLE && + fh_len != NILFS_FID_SIZE_CONNECTABLE) || + (fh_type != FILEID_NILFS_WITH_PARENT && + fh_type != FILEID_NILFS_WITHOUT_PARENT)) + return NULL; + + return nilfs_get_dentry(sb, fid->cno, fid->ino, fid->gen); +} + +static struct dentry *nilfs_fh_to_parent(struct super_block *sb, struct fid *fh, + int fh_len, int fh_type) +{ + struct nilfs_fid *fid = (struct nilfs_fid *)fh; + + if (fh_len != NILFS_FID_SIZE_CONNECTABLE || + fh_type != FILEID_NILFS_WITH_PARENT) + return NULL; + + return nilfs_get_dentry(sb, fid->cno, fid->parent_ino, fid->parent_gen); +} + +static int nilfs_encode_fh(struct dentry *dentry, __u32 *fh, int *lenp, + int connectable) +{ + struct nilfs_fid *fid = (struct nilfs_fid *)fh; + struct inode *inode = dentry->d_inode; + struct nilfs_root *root = NILFS_I(inode)->i_root; + int type; + + if (*lenp < NILFS_FID_SIZE_NON_CONNECTABLE || + (connectable && *lenp < NILFS_FID_SIZE_CONNECTABLE)) + return 255; + + fid->cno = root->cno; + fid->ino = inode->i_ino; + fid->gen = inode->i_generation; + + if (connectable && !S_ISDIR(inode->i_mode)) { + struct inode *parent; + + spin_lock(&dentry->d_lock); + parent = dentry->d_parent->d_inode; + fid->parent_ino = parent->i_ino; + fid->parent_gen = parent->i_generation; + spin_unlock(&dentry->d_lock); + + type = FILEID_NILFS_WITH_PARENT; + *lenp = NILFS_FID_SIZE_CONNECTABLE; + } else { + type = FILEID_NILFS_WITHOUT_PARENT; + *lenp = NILFS_FID_SIZE_NON_CONNECTABLE; + } + + return type; +} + const struct inode_operations nilfs_dir_inode_operations = { .create = nilfs_create, .lookup = nilfs_lookup, @@ -491,4 +588,12 @@ const struct inode_operations nilfs_symlink_inode_operations = { .readlink = generic_readlink, .follow_link = page_follow_link_light, .put_link = page_put_link, + .permission = nilfs_permission, +}; + +const struct export_operations nilfs_export_ops = { + .encode_fh = nilfs_encode_fh, + .fh_to_dentry = nilfs_fh_to_dentry, + .fh_to_parent = nilfs_fh_to_parent, + .get_parent = nilfs_get_parent, }; diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h index d3d5404..f7560da 100644 --- a/fs/nilfs2/nilfs.h +++ b/fs/nilfs2/nilfs.h @@ -59,6 +59,7 @@ struct nilfs_inode_info { #endif struct buffer_head *i_bh; /* i_bh contains a new or dirty disk inode */ + struct nilfs_root *i_root; struct inode vfs_inode; }; @@ -100,7 +101,6 @@ enum { NILFS_I_INODE_DIRTY, /* write_inode is requested */ NILFS_I_BMAP, /* has bmap and btnode_cache */ NILFS_I_GCINODE, /* inode for GC, on memory only */ - NILFS_I_GCDAT, /* shadow DAT, on memory only */ }; /* @@ -192,7 +192,7 @@ static inline int nilfs_doing_construction(void) static inline struct inode *nilfs_dat_inode(const struct the_nilfs *nilfs) { - return nilfs_doing_gc() ? nilfs->ns_gc_dat : nilfs->ns_dat; + return nilfs->ns_dat; } /* @@ -200,12 +200,9 @@ static inline struct inode *nilfs_dat_inode(const struct the_nilfs *nilfs) */ #ifdef CONFIG_NILFS_POSIX_ACL #error "NILFS: not yet supported POSIX ACL" -extern int nilfs_permission(struct inode *, int, struct nameidata *); extern int nilfs_acl_chmod(struct inode *); extern int nilfs_init_acl(struct inode *, struct inode *); #else -#define nilfs_permission NULL - static inline int nilfs_acl_chmod(struct inode *inode) { return 0; @@ -247,11 +244,19 @@ extern int nilfs_get_block(struct inode *, sector_t, struct buffer_head *, int); extern void nilfs_set_inode_flags(struct inode *); extern int nilfs_read_inode_common(struct inode *, struct nilfs_inode *); extern void nilfs_write_inode_common(struct inode *, struct nilfs_inode *, int); -extern struct inode *nilfs_iget(struct super_block *, unsigned long); +struct inode *nilfs_ilookup(struct super_block *sb, struct nilfs_root *root, + unsigned long ino); +struct inode *nilfs_iget_locked(struct super_block *sb, struct nilfs_root *root, + unsigned long ino); +struct inode *nilfs_iget(struct super_block *sb, struct nilfs_root *root, + unsigned long ino); +extern struct inode *nilfs_iget_for_gc(struct super_block *sb, + unsigned long ino, __u64 cno); extern void nilfs_update_inode(struct inode *, struct buffer_head *); extern void nilfs_truncate(struct inode *); extern void nilfs_evict_inode(struct inode *); extern int nilfs_setattr(struct dentry *, struct iattr *); +int nilfs_permission(struct inode *inode, int mask); extern int nilfs_load_inode_block(struct nilfs_sb_info *, struct inode *, struct buffer_head **); extern int nilfs_inode_dirty(struct inode *); @@ -260,11 +265,7 @@ extern int nilfs_set_file_dirty(struct nilfs_sb_info *, struct inode *, extern int nilfs_mark_inode_dirty(struct inode *); extern void nilfs_dirty_inode(struct inode *); -/* namei.c */ -extern struct dentry *nilfs_get_parent(struct dentry *); - /* super.c */ -extern struct inode *nilfs_alloc_inode_common(struct the_nilfs *); extern struct inode *nilfs_alloc_inode(struct super_block *); extern void nilfs_destroy_inode(struct inode *); extern void nilfs_error(struct super_block *, const char *, const char *, ...) @@ -283,8 +284,9 @@ extern struct nilfs_super_block **nilfs_prepare_super(struct nilfs_sb_info *, int flip); extern int nilfs_commit_super(struct nilfs_sb_info *, int); extern int nilfs_cleanup_super(struct nilfs_sb_info *); -extern int nilfs_attach_checkpoint(struct nilfs_sb_info *, __u64); -extern void nilfs_detach_checkpoint(struct nilfs_sb_info *); +int nilfs_attach_checkpoint(struct nilfs_sb_info *sbi, __u64 cno, int curr_mnt, + struct nilfs_root **root); +int nilfs_checkpoint_is_mounted(struct super_block *sb, __u64 cno); /* gcinode.c */ int nilfs_gccache_submit_read_data(struct inode *, sector_t, sector_t, __u64, @@ -292,16 +294,8 @@ int nilfs_gccache_submit_read_data(struct inode *, sector_t, sector_t, __u64, int nilfs_gccache_submit_read_node(struct inode *, sector_t, __u64, struct buffer_head **); int nilfs_gccache_wait_and_mark_dirty(struct buffer_head *); -int nilfs_init_gccache(struct the_nilfs *); -void nilfs_destroy_gccache(struct the_nilfs *); -void nilfs_clear_gcinode(struct inode *); -struct inode *nilfs_gc_iget(struct the_nilfs *, ino_t, __u64); -void nilfs_remove_all_gcinode(struct the_nilfs *); - -/* gcdat.c */ -int nilfs_init_gcdat_inode(struct the_nilfs *); -void nilfs_commit_gcdat_inode(struct the_nilfs *); -void nilfs_clear_gcdat_inode(struct the_nilfs *); +int nilfs_init_gcinode(struct inode *inode); +void nilfs_remove_all_gcinodes(struct the_nilfs *nilfs); /* * Inodes and files operations diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c index aab11db..a6c3c2e8 100644 --- a/fs/nilfs2/page.c +++ b/fs/nilfs2/page.c @@ -79,8 +79,8 @@ struct buffer_head *nilfs_grab_buffer(struct inode *inode, { int blkbits = inode->i_blkbits; pgoff_t index = blkoff >> (PAGE_CACHE_SHIFT - blkbits); - struct page *page, *opage; - struct buffer_head *bh, *obh; + struct page *page; + struct buffer_head *bh; page = grab_cache_page(mapping, index); if (unlikely(!page)) @@ -92,30 +92,6 @@ struct buffer_head *nilfs_grab_buffer(struct inode *inode, page_cache_release(page); return NULL; } - if (!buffer_uptodate(bh) && mapping->assoc_mapping != NULL) { - /* - * Shadow page cache uses assoc_mapping to point its original - * page cache. The following code tries the original cache - * if the given cache is a shadow and it didn't hit. - */ - opage = find_lock_page(mapping->assoc_mapping, index); - if (!opage) - return bh; - - obh = __nilfs_get_page_block(opage, blkoff, index, blkbits, - b_state); - if (buffer_uptodate(obh)) { - nilfs_copy_buffer(bh, obh); - if (buffer_dirty(obh)) { - nilfs_mark_buffer_dirty(bh); - if (!buffer_nilfs_node(bh) && NILFS_MDT(inode)) - nilfs_mdt_mark_dirty(inode); - } - } - brelse(obh); - unlock_page(opage); - page_cache_release(opage); - } return bh; } @@ -131,6 +107,7 @@ void nilfs_forget_buffer(struct buffer_head *bh) lock_buffer(bh); clear_buffer_nilfs_volatile(bh); clear_buffer_nilfs_checked(bh); + clear_buffer_nilfs_redirected(bh); clear_buffer_dirty(bh); if (nilfs_page_buffers_clean(page)) __nilfs_clear_page_dirty(page); @@ -483,6 +460,7 @@ void nilfs_clear_dirty_pages(struct address_space *mapping) clear_buffer_dirty(bh); clear_buffer_nilfs_volatile(bh); clear_buffer_nilfs_checked(bh); + clear_buffer_nilfs_redirected(bh); clear_buffer_uptodate(bh); clear_buffer_mapped(bh); unlock_buffer(bh); @@ -513,6 +491,31 @@ unsigned nilfs_page_count_clean_buffers(struct page *page, } return nc; } + +void nilfs_mapping_init_once(struct address_space *mapping) +{ + memset(mapping, 0, sizeof(*mapping)); + INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC); + spin_lock_init(&mapping->tree_lock); + INIT_LIST_HEAD(&mapping->private_list); + spin_lock_init(&mapping->private_lock); + + spin_lock_init(&mapping->i_mmap_lock); + INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap); + INIT_LIST_HEAD(&mapping->i_mmap_nonlinear); +} + +void nilfs_mapping_init(struct address_space *mapping, + struct backing_dev_info *bdi, + const struct address_space_operations *aops) +{ + mapping->host = NULL; + mapping->flags = 0; + mapping_set_gfp_mask(mapping, GFP_NOFS); + mapping->assoc_mapping = NULL; + mapping->backing_dev_info = bdi; + mapping->a_ops = aops; +} /* * NILFS2 needs clear_page_dirty() in the following two cases: diff --git a/fs/nilfs2/page.h b/fs/nilfs2/page.h index f53d8da..fb9e8a8 100644 --- a/fs/nilfs2/page.h +++ b/fs/nilfs2/page.h @@ -35,12 +35,14 @@ enum { BH_NILFS_Node, BH_NILFS_Volatile, BH_NILFS_Checked, + BH_NILFS_Redirected, }; BUFFER_FNS(NILFS_Allocated, nilfs_allocated) /* nilfs private buffers */ BUFFER_FNS(NILFS_Node, nilfs_node) /* nilfs node buffers */ BUFFER_FNS(NILFS_Volatile, nilfs_volatile) BUFFER_FNS(NILFS_Checked, nilfs_checked) /* buffer is verified */ +BUFFER_FNS(NILFS_Redirected, nilfs_redirected) /* redirected to a copy */ void nilfs_mark_buffer_dirty(struct buffer_head *bh); @@ -59,6 +61,10 @@ void nilfs_free_private_page(struct page *); int nilfs_copy_dirty_pages(struct address_space *, struct address_space *); void nilfs_copy_back_pages(struct address_space *, struct address_space *); void nilfs_clear_dirty_pages(struct address_space *); +void nilfs_mapping_init_once(struct address_space *mapping); +void nilfs_mapping_init(struct address_space *mapping, + struct backing_dev_info *bdi, + const struct address_space_operations *aops); unsigned nilfs_page_count_clean_buffers(struct page *, unsigned, unsigned); #define NILFS_PAGE_BUG(page, m, a...) \ diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c index d0c35ef..5d2711c2 100644 --- a/fs/nilfs2/recovery.c +++ b/fs/nilfs2/recovery.c @@ -440,7 +440,6 @@ static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs, segnum[2] = ri->ri_segnum; segnum[3] = ri->ri_nextnum; - nilfs_attach_writer(nilfs, sbi); /* * Releasing the next segment of the latest super root. * The next segment is invalidated by this recovery. @@ -480,7 +479,6 @@ static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs, failed: /* No need to recover sufile because it will be destroyed on error */ - nilfs_detach_writer(nilfs, sbi); return err; } @@ -504,6 +502,7 @@ static int nilfs_recovery_copy_block(struct the_nilfs *nilfs, static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, + struct nilfs_root *root, struct list_head *head, unsigned long *nr_salvaged_blocks) { @@ -515,7 +514,7 @@ static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs, int err = 0, err2 = 0; list_for_each_entry_safe(rb, n, head, list) { - inode = nilfs_iget(sbi->s_super, rb->ino); + inode = nilfs_iget(sbi->s_super, root, rb->ino); if (IS_ERR(inode)) { err = PTR_ERR(inode); inode = NULL; @@ -578,6 +577,7 @@ static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs, */ static int nilfs_do_roll_forward(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, + struct nilfs_root *root, struct nilfs_recovery_info *ri) { struct buffer_head *bh_sum = NULL; @@ -597,7 +597,6 @@ static int nilfs_do_roll_forward(struct the_nilfs *nilfs, }; int state = RF_INIT_ST; - nilfs_attach_writer(nilfs, sbi); pseg_start = ri->ri_lsegs_start; seg_seq = ri->ri_lsegs_start_seq; segnum = nilfs_get_segnum_of_block(nilfs, pseg_start); @@ -649,7 +648,7 @@ static int nilfs_do_roll_forward(struct the_nilfs *nilfs, goto failed; if (flags & NILFS_SS_LOGEND) { err = nilfs_recover_dsync_blocks( - nilfs, sbi, &dsync_blocks, + nilfs, sbi, root, &dsync_blocks, &nsalvaged_blocks); if (unlikely(err)) goto failed; @@ -688,7 +687,6 @@ static int nilfs_do_roll_forward(struct the_nilfs *nilfs, out: brelse(bh_sum); dispose_recovery_list(&dsync_blocks); - nilfs_detach_writer(nilfs, sbi); return err; confused: @@ -746,19 +744,20 @@ int nilfs_salvage_orphan_logs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, struct nilfs_recovery_info *ri) { + struct nilfs_root *root; int err; if (ri->ri_lsegs_start == 0 || ri->ri_lsegs_end == 0) return 0; - err = nilfs_attach_checkpoint(sbi, ri->ri_cno); + err = nilfs_attach_checkpoint(sbi, ri->ri_cno, true, &root); if (unlikely(err)) { printk(KERN_ERR "NILFS: error loading the latest checkpoint.\n"); return err; } - err = nilfs_do_roll_forward(nilfs, sbi, ri); + err = nilfs_do_roll_forward(nilfs, sbi, root, ri); if (unlikely(err)) goto failed; @@ -770,7 +769,7 @@ int nilfs_salvage_orphan_logs(struct the_nilfs *nilfs, goto failed; } - err = nilfs_attach_segment_constructor(sbi); + err = nilfs_attach_segment_constructor(sbi, root); if (unlikely(err)) goto failed; @@ -788,7 +787,7 @@ int nilfs_salvage_orphan_logs(struct the_nilfs *nilfs, } failed: - nilfs_detach_checkpoint(sbi); + nilfs_put_root(root); return err; } diff --git a/fs/nilfs2/sb.h b/fs/nilfs2/sb.h index 0776ccc..35a0715 100644 --- a/fs/nilfs2/sb.h +++ b/fs/nilfs2/sb.h @@ -42,11 +42,6 @@ struct nilfs_sc_info; * NILFS super-block data in memory */ struct nilfs_sb_info { - /* Snapshot status */ - __u64 s_snapshot_cno; /* Checkpoint number */ - atomic_t s_inodes_count; - atomic_t s_blocks_count; /* Reserved (might be deleted) */ - /* Mount options */ unsigned long s_mount_opt; uid_t s_resuid; @@ -59,8 +54,6 @@ struct nilfs_sb_info { /* Fundamental members */ struct super_block *s_super; /* reverse pointer to super_block */ struct the_nilfs *s_nilfs; - struct list_head s_list; /* list head for nilfs->ns_supers */ - atomic_t s_count; /* reference count */ /* Segment constructor */ struct list_head s_dirty_files; /* dirty files list */ @@ -68,9 +61,6 @@ struct nilfs_sb_info { spinlock_t s_inode_lock; /* Lock for the nilfs inode. It covers s_dirty_files list */ - /* Metadata files */ - struct inode *s_ifile; /* index file inode */ - /* Inode allocator */ spinlock_t s_next_gen_lock; u32 s_next_generation; diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c index 4588fb9..0f83e93 100644 --- a/fs/nilfs2/segbuf.c +++ b/fs/nilfs2/segbuf.c @@ -371,7 +371,8 @@ static int nilfs_segbuf_submit_bio(struct nilfs_segment_buffer *segbuf, struct bio *bio = wi->bio; int err; - if (segbuf->sb_nbio > 0 && bdi_write_congested(wi->nilfs->ns_bdi)) { + if (segbuf->sb_nbio > 0 && + bdi_write_congested(segbuf->sb_super->s_bdi)) { wait_for_completion(&segbuf->sb_bio_event); segbuf->sb_nbio--; if (unlikely(atomic_read(&segbuf->sb_err))) { diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 9fd051a..d926af6 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -191,6 +191,8 @@ int nilfs_transaction_begin(struct super_block *sb, if (ret > 0) return 0; + vfs_check_frozen(sb, SB_FREEZE_WRITE); + sbi = NILFS_SB(sb); nilfs = sbi->s_nilfs; down_read(&nilfs->ns_segctor_sem); @@ -366,8 +368,7 @@ static int nilfs_segctor_reset_segment_buffer(struct nilfs_sc_info *sci) if (nilfs_doing_gc()) flags = NILFS_SS_GC; - err = nilfs_segbuf_reset(segbuf, flags, sci->sc_seg_ctime, - sci->sc_sbi->s_nilfs->ns_cno); + err = nilfs_segbuf_reset(segbuf, flags, sci->sc_seg_ctime, sci->sc_cno); if (unlikely(err)) return err; @@ -440,17 +441,26 @@ static void nilfs_segctor_end_finfo(struct nilfs_sc_info *sci, struct nilfs_finfo *finfo; struct nilfs_inode_info *ii; struct nilfs_segment_buffer *segbuf; + __u64 cno; if (sci->sc_blk_cnt == 0) return; ii = NILFS_I(inode); + + if (test_bit(NILFS_I_GCINODE, &ii->i_state)) + cno = ii->i_cno; + else if (NILFS_ROOT_METADATA_FILE(inode->i_ino)) + cno = 0; + else + cno = sci->sc_cno; + finfo = nilfs_segctor_map_segsum_entry(sci, &sci->sc_finfo_ptr, sizeof(*finfo)); finfo->fi_ino = cpu_to_le64(inode->i_ino); finfo->fi_nblocks = cpu_to_le32(sci->sc_blk_cnt); finfo->fi_ndatablk = cpu_to_le32(sci->sc_datablk_cnt); - finfo->fi_cno = cpu_to_le64(ii->i_cno); + finfo->fi_cno = cpu_to_le64(cno); segbuf = sci->sc_curseg; segbuf->sb_sum.sumbytes = sci->sc_binfo_ptr.offset + @@ -755,12 +765,12 @@ static void nilfs_dispose_list(struct nilfs_sb_info *sbi, } } -static int nilfs_test_metadata_dirty(struct nilfs_sb_info *sbi) +static int nilfs_test_metadata_dirty(struct the_nilfs *nilfs, + struct nilfs_root *root) { - struct the_nilfs *nilfs = sbi->s_nilfs; int ret = 0; - if (nilfs_mdt_fetch_dirty(sbi->s_ifile)) + if (nilfs_mdt_fetch_dirty(root->ifile)) ret++; if (nilfs_mdt_fetch_dirty(nilfs->ns_cpfile)) ret++; @@ -785,7 +795,7 @@ static int nilfs_segctor_confirm(struct nilfs_sc_info *sci) struct nilfs_sb_info *sbi = sci->sc_sbi; int ret = 0; - if (nilfs_test_metadata_dirty(sbi)) + if (nilfs_test_metadata_dirty(sbi->s_nilfs, sci->sc_root)) set_bit(NILFS_SC_DIRTY, &sci->sc_flags); spin_lock(&sbi->s_inode_lock); @@ -801,7 +811,7 @@ static void nilfs_segctor_clear_metadata_dirty(struct nilfs_sc_info *sci) struct nilfs_sb_info *sbi = sci->sc_sbi; struct the_nilfs *nilfs = sbi->s_nilfs; - nilfs_mdt_clear_dirty(sbi->s_ifile); + nilfs_mdt_clear_dirty(sci->sc_root->ifile); nilfs_mdt_clear_dirty(nilfs->ns_cpfile); nilfs_mdt_clear_dirty(nilfs->ns_sufile); nilfs_mdt_clear_dirty(nilfs_dat_inode(nilfs)); @@ -848,9 +858,9 @@ static int nilfs_segctor_fill_in_checkpoint(struct nilfs_sc_info *sci) raw_cp->cp_snapshot_list.ssl_next = 0; raw_cp->cp_snapshot_list.ssl_prev = 0; raw_cp->cp_inodes_count = - cpu_to_le64(atomic_read(&sbi->s_inodes_count)); + cpu_to_le64(atomic_read(&sci->sc_root->inodes_count)); raw_cp->cp_blocks_count = - cpu_to_le64(atomic_read(&sbi->s_blocks_count)); + cpu_to_le64(atomic_read(&sci->sc_root->blocks_count)); raw_cp->cp_nblk_inc = cpu_to_le64(sci->sc_nblk_inc + sci->sc_nblk_this_inc); raw_cp->cp_create = cpu_to_le64(sci->sc_seg_ctime); @@ -861,7 +871,8 @@ static int nilfs_segctor_fill_in_checkpoint(struct nilfs_sc_info *sci) else nilfs_checkpoint_set_minor(raw_cp); - nilfs_write_inode_common(sbi->s_ifile, &raw_cp->cp_ifile_inode, 1); + nilfs_write_inode_common(sci->sc_root->ifile, + &raw_cp->cp_ifile_inode, 1); nilfs_cpfile_put_checkpoint(nilfs->ns_cpfile, nilfs->ns_cno, bh_cp); return 0; @@ -886,13 +897,12 @@ static void nilfs_fill_in_file_bmap(struct inode *ifile, } } -static void nilfs_segctor_fill_in_file_bmap(struct nilfs_sc_info *sci, - struct inode *ifile) +static void nilfs_segctor_fill_in_file_bmap(struct nilfs_sc_info *sci) { struct nilfs_inode_info *ii; list_for_each_entry(ii, &sci->sc_dirty_files, i_dirty) { - nilfs_fill_in_file_bmap(ifile, ii); + nilfs_fill_in_file_bmap(sci->sc_root->ifile, ii); set_bit(NILFS_I_COLLECTED, &ii->i_state); } } @@ -1135,7 +1145,7 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) sci->sc_stage.flags |= NILFS_CF_IFILE_STARTED; /* Fall through */ case NILFS_ST_IFILE: - err = nilfs_segctor_scan_file(sci, sbi->s_ifile, + err = nilfs_segctor_scan_file(sci, sci->sc_root->ifile, &nilfs_sc_file_ops); if (unlikely(err)) break; @@ -1900,6 +1910,7 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci) set_buffer_uptodate(bh); clear_buffer_dirty(bh); clear_buffer_nilfs_volatile(bh); + clear_buffer_nilfs_redirected(bh); if (bh == segbuf->sb_super_root) { if (bh->b_page != bd_page) { end_page_writeback(bd_page); @@ -1936,11 +1947,9 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci) nilfs_drop_collected_inodes(&sci->sc_dirty_files); - if (nilfs_doing_gc()) { + if (nilfs_doing_gc()) nilfs_drop_collected_inodes(&sci->sc_gc_inodes); - if (update_sr) - nilfs_commit_gcdat_inode(nilfs); - } else + else nilfs->ns_nongc_ctime = sci->sc_seg_ctime; sci->sc_nblk_inc += sci->sc_nblk_this_inc; @@ -1976,7 +1985,7 @@ static int nilfs_segctor_check_in_files(struct nilfs_sc_info *sci, struct nilfs_sb_info *sbi) { struct nilfs_inode_info *ii, *n; - __u64 cno = sbi->s_nilfs->ns_cno; + struct inode *ifile = sci->sc_root->ifile; spin_lock(&sbi->s_inode_lock); retry: @@ -1987,14 +1996,14 @@ static int nilfs_segctor_check_in_files(struct nilfs_sc_info *sci, spin_unlock(&sbi->s_inode_lock); err = nilfs_ifile_get_inode_block( - sbi->s_ifile, ii->vfs_inode.i_ino, &ibh); + ifile, ii->vfs_inode.i_ino, &ibh); if (unlikely(err)) { nilfs_warning(sbi->s_super, __func__, "failed to get inode block.\n"); return err; } nilfs_mdt_mark_buffer_dirty(ibh); - nilfs_mdt_mark_dirty(sbi->s_ifile); + nilfs_mdt_mark_dirty(ifile); spin_lock(&sbi->s_inode_lock); if (likely(!ii->i_bh)) ii->i_bh = ibh; @@ -2002,7 +2011,6 @@ static int nilfs_segctor_check_in_files(struct nilfs_sc_info *sci, brelse(ibh); goto retry; } - ii->i_cno = cno; clear_bit(NILFS_I_QUEUED, &ii->i_state); set_bit(NILFS_I_BUSY, &ii->i_state); @@ -2011,8 +2019,6 @@ static int nilfs_segctor_check_in_files(struct nilfs_sc_info *sci, } spin_unlock(&sbi->s_inode_lock); - NILFS_I(sbi->s_ifile)->i_cno = cno; - return 0; } @@ -2021,19 +2027,13 @@ static void nilfs_segctor_check_out_files(struct nilfs_sc_info *sci, { struct nilfs_transaction_info *ti = current->journal_info; struct nilfs_inode_info *ii, *n; - __u64 cno = sbi->s_nilfs->ns_cno; spin_lock(&sbi->s_inode_lock); list_for_each_entry_safe(ii, n, &sci->sc_dirty_files, i_dirty) { if (!test_and_clear_bit(NILFS_I_UPDATED, &ii->i_state) || - test_bit(NILFS_I_DIRTY, &ii->i_state)) { - /* The current checkpoint number (=nilfs->ns_cno) is - changed between check-in and check-out only if the - super root is written out. So, we can update i_cno - for the inodes that remain in the dirty list. */ - ii->i_cno = cno; + test_bit(NILFS_I_DIRTY, &ii->i_state)) continue; - } + clear_bit(NILFS_I_BUSY, &ii->i_state); brelse(ii->i_bh); ii->i_bh = NULL; @@ -2054,12 +2054,13 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) int err; sci->sc_stage.scnt = NILFS_ST_INIT; + sci->sc_cno = nilfs->ns_cno; err = nilfs_segctor_check_in_files(sci, sbi); if (unlikely(err)) goto out; - if (nilfs_test_metadata_dirty(sbi)) + if (nilfs_test_metadata_dirty(nilfs, sci->sc_root)) set_bit(NILFS_SC_DIRTY, &sci->sc_flags); if (nilfs_segctor_clean(sci)) @@ -2091,7 +2092,7 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) goto failed; if (sci->sc_stage.flags & NILFS_CF_IFILE_STARTED) - nilfs_segctor_fill_in_file_bmap(sci, sbi->s_ifile); + nilfs_segctor_fill_in_file_bmap(sci); if (mode == SC_LSEG_SR && sci->sc_stage.scnt >= NILFS_ST_CPFILE) { @@ -2452,9 +2453,8 @@ nilfs_remove_written_gcinodes(struct the_nilfs *nilfs, struct list_head *head) list_for_each_entry_safe(ii, n, head, i_dirty) { if (!test_bit(NILFS_I_UPDATED, &ii->i_state)) continue; - hlist_del_init(&ii->vfs_inode.i_hash); list_del_init(&ii->i_dirty); - nilfs_clear_gcinode(&ii->vfs_inode); + iput(&ii->vfs_inode); } } @@ -2472,13 +2472,15 @@ int nilfs_clean_segments(struct super_block *sb, struct nilfs_argv *argv, nilfs_transaction_lock(sbi, &ti, 1); - err = nilfs_init_gcdat_inode(nilfs); + err = nilfs_mdt_save_to_shadow_map(nilfs->ns_dat); if (unlikely(err)) goto out_unlock; err = nilfs_ioctl_prepare_clean_segments(nilfs, argv, kbufs); - if (unlikely(err)) + if (unlikely(err)) { + nilfs_mdt_restore_from_shadow_map(nilfs->ns_dat); goto out_unlock; + } sci->sc_freesegs = kbufs[4]; sci->sc_nfreesegs = argv[4].v_nmembs; @@ -2510,7 +2512,7 @@ int nilfs_clean_segments(struct super_block *sb, struct nilfs_argv *argv, out_unlock: sci->sc_freesegs = NULL; sci->sc_nfreesegs = 0; - nilfs_clear_gcdat_inode(nilfs); + nilfs_mdt_clear_shadow_map(nilfs->ns_dat); nilfs_transaction_unlock(sbi); return err; } @@ -2672,6 +2674,8 @@ static int nilfs_segctor_start_thread(struct nilfs_sc_info *sci) } static void nilfs_segctor_kill_thread(struct nilfs_sc_info *sci) + __acquires(&sci->sc_state_lock) + __releases(&sci->sc_state_lock) { sci->sc_state |= NILFS_SEGCTOR_QUIT; @@ -2686,7 +2690,8 @@ static void nilfs_segctor_kill_thread(struct nilfs_sc_info *sci) /* * Setup & clean-up functions */ -static struct nilfs_sc_info *nilfs_segctor_new(struct nilfs_sb_info *sbi) +static struct nilfs_sc_info *nilfs_segctor_new(struct nilfs_sb_info *sbi, + struct nilfs_root *root) { struct nilfs_sc_info *sci; @@ -2697,6 +2702,9 @@ static struct nilfs_sc_info *nilfs_segctor_new(struct nilfs_sb_info *sbi) sci->sc_sbi = sbi; sci->sc_super = sbi->s_super; + nilfs_get_root(root); + sci->sc_root = root; + init_waitqueue_head(&sci->sc_wait_request); init_waitqueue_head(&sci->sc_wait_daemon); init_waitqueue_head(&sci->sc_wait_task); @@ -2771,6 +2779,8 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci) WARN_ON(!list_empty(&sci->sc_segbufs)); WARN_ON(!list_empty(&sci->sc_write_logs)); + nilfs_put_root(sci->sc_root); + down_write(&sbi->s_nilfs->ns_segctor_sem); del_timer_sync(&sci->sc_timer); @@ -2780,6 +2790,7 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci) /** * nilfs_attach_segment_constructor - attach a segment constructor * @sbi: nilfs_sb_info + * @root: root object of the current filesystem tree * * nilfs_attach_segment_constructor() allocates a struct nilfs_sc_info, * initializes it, and starts the segment constructor. @@ -2789,9 +2800,9 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci) * * %-ENOMEM - Insufficient memory available. */ -int nilfs_attach_segment_constructor(struct nilfs_sb_info *sbi) +int nilfs_attach_segment_constructor(struct nilfs_sb_info *sbi, + struct nilfs_root *root) { - struct the_nilfs *nilfs = sbi->s_nilfs; int err; if (NILFS_SC(sbi)) { @@ -2803,14 +2814,12 @@ int nilfs_attach_segment_constructor(struct nilfs_sb_info *sbi) nilfs_detach_segment_constructor(sbi); } - sbi->s_sc_info = nilfs_segctor_new(sbi); + sbi->s_sc_info = nilfs_segctor_new(sbi, root); if (!sbi->s_sc_info) return -ENOMEM; - nilfs_attach_writer(nilfs, sbi); err = nilfs_segctor_start_thread(NILFS_SC(sbi)); if (err) { - nilfs_detach_writer(nilfs, sbi); kfree(sbi->s_sc_info); sbi->s_sc_info = NULL; } @@ -2847,5 +2856,4 @@ void nilfs_detach_segment_constructor(struct nilfs_sb_info *sbi) up_write(&nilfs->ns_segctor_sem); nilfs_dispose_list(sbi, &garbage_list, 1); - nilfs_detach_writer(nilfs, sbi); } diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h index 17c487b..cd8056e 100644 --- a/fs/nilfs2/segment.h +++ b/fs/nilfs2/segment.h @@ -29,6 +29,8 @@ #include <linux/nilfs2_fs.h> #include "sb.h" +struct nilfs_root; + /** * struct nilfs_recovery_info - Recovery information * @ri_need_recovery: Recovery status @@ -87,6 +89,7 @@ struct nilfs_segsum_pointer { * struct nilfs_sc_info - Segment constructor information * @sc_super: Back pointer to super_block struct * @sc_sbi: Back pointer to nilfs_sb_info struct + * @sc_root: root object of the current filesystem tree * @sc_nblk_inc: Block count of current generation * @sc_dirty_files: List of files to be written * @sc_gc_inodes: List of GC inodes having blocks to be written @@ -107,6 +110,7 @@ struct nilfs_segsum_pointer { * @sc_datablk_cnt: Data block count of a file * @sc_nblk_this_inc: Number of blocks included in the current logical segment * @sc_seg_ctime: Creation time + * @sc_cno: checkpoint number of current log * @sc_flags: Internal flags * @sc_state_lock: spinlock for sc_state and so on * @sc_state: Segctord state flags @@ -128,6 +132,7 @@ struct nilfs_segsum_pointer { struct nilfs_sc_info { struct super_block *sc_super; struct nilfs_sb_info *sc_sbi; + struct nilfs_root *sc_root; unsigned long sc_nblk_inc; @@ -156,7 +161,7 @@ struct nilfs_sc_info { unsigned long sc_datablk_cnt; unsigned long sc_nblk_this_inc; time_t sc_seg_ctime; - + __u64 sc_cno; unsigned long sc_flags; spinlock_t sc_state_lock; @@ -230,7 +235,8 @@ extern void nilfs_flush_segment(struct super_block *, ino_t); extern int nilfs_clean_segments(struct super_block *, struct nilfs_argv *, void **); -extern int nilfs_attach_segment_constructor(struct nilfs_sb_info *); +int nilfs_attach_segment_constructor(struct nilfs_sb_info *sbi, + struct nilfs_root *root); extern void nilfs_detach_segment_constructor(struct nilfs_sb_info *); /* recovery.c */ diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c index 3c6cc60..1d6f488 100644 --- a/fs/nilfs2/sufile.c +++ b/fs/nilfs2/sufile.c @@ -505,7 +505,7 @@ int nilfs_sufile_get_stat(struct inode *sufile, struct nilfs_sustat *sustat) { struct buffer_head *header_bh; struct nilfs_sufile_header *header; - struct the_nilfs *nilfs = NILFS_MDT(sufile)->mi_nilfs; + struct the_nilfs *nilfs = NILFS_I_NILFS(sufile); void *kaddr; int ret; @@ -583,7 +583,7 @@ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, void *buf, struct nilfs_segment_usage *su; struct nilfs_suinfo *si = buf; size_t susz = NILFS_MDT(sufile)->mi_entry_size; - struct the_nilfs *nilfs = NILFS_MDT(sufile)->mi_nilfs; + struct the_nilfs *nilfs = NILFS_I_NILFS(sufile); void *kaddr; unsigned long nsegs, segusages_per_block; ssize_t n; @@ -635,46 +635,55 @@ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, void *buf, } /** - * nilfs_sufile_read - read sufile inode - * @sufile: sufile inode + * nilfs_sufile_read - read or get sufile inode + * @sb: super block instance + * @susize: size of a segment usage entry * @raw_inode: on-disk sufile inode + * @inodep: buffer to store the inode */ -int nilfs_sufile_read(struct inode *sufile, struct nilfs_inode *raw_inode) +int nilfs_sufile_read(struct super_block *sb, size_t susize, + struct nilfs_inode *raw_inode, struct inode **inodep) { - struct nilfs_sufile_info *sui = NILFS_SUI(sufile); + struct inode *sufile; + struct nilfs_sufile_info *sui; struct buffer_head *header_bh; struct nilfs_sufile_header *header; void *kaddr; - int ret; + int err; - ret = nilfs_read_inode_common(sufile, raw_inode); - if (ret < 0) - return ret; + sufile = nilfs_iget_locked(sb, NULL, NILFS_SUFILE_INO); + if (unlikely(!sufile)) + return -ENOMEM; + if (!(sufile->i_state & I_NEW)) + goto out; - ret = nilfs_sufile_get_header_block(sufile, &header_bh); - if (!ret) { - kaddr = kmap_atomic(header_bh->b_page, KM_USER0); - header = kaddr + bh_offset(header_bh); - sui->ncleansegs = le64_to_cpu(header->sh_ncleansegs); - kunmap_atomic(kaddr, KM_USER0); - brelse(header_bh); - } - return ret; -} + err = nilfs_mdt_init(sufile, NILFS_MDT_GFP, sizeof(*sui)); + if (err) + goto failed; -/** - * nilfs_sufile_new - create sufile - * @nilfs: nilfs object - * @susize: size of a segment usage entry - */ -struct inode *nilfs_sufile_new(struct the_nilfs *nilfs, size_t susize) -{ - struct inode *sufile; + nilfs_mdt_set_entry_size(sufile, susize, + sizeof(struct nilfs_sufile_header)); + + err = nilfs_read_inode_common(sufile, raw_inode); + if (err) + goto failed; + + err = nilfs_sufile_get_header_block(sufile, &header_bh); + if (err) + goto failed; - sufile = nilfs_mdt_new(nilfs, NULL, NILFS_SUFILE_INO, - sizeof(struct nilfs_sufile_info)); - if (sufile) - nilfs_mdt_set_entry_size(sufile, susize, - sizeof(struct nilfs_sufile_header)); - return sufile; + sui = NILFS_SUI(sufile); + kaddr = kmap_atomic(header_bh->b_page, KM_USER0); + header = kaddr + bh_offset(header_bh); + sui->ncleansegs = le64_to_cpu(header->sh_ncleansegs); + kunmap_atomic(kaddr, KM_USER0); + brelse(header_bh); + + unlock_new_inode(sufile); + out: + *inodep = sufile; + return 0; + failed: + iget_failed(sufile); + return err; } diff --git a/fs/nilfs2/sufile.h b/fs/nilfs2/sufile.h index 15163b8..a943fba 100644 --- a/fs/nilfs2/sufile.h +++ b/fs/nilfs2/sufile.h @@ -31,7 +31,7 @@ static inline unsigned long nilfs_sufile_get_nsegments(struct inode *sufile) { - return NILFS_MDT(sufile)->mi_nilfs->ns_nsegments; + return NILFS_I_NILFS(sufile)->ns_nsegments; } unsigned long nilfs_sufile_get_ncleansegs(struct inode *sufile); @@ -61,8 +61,8 @@ void nilfs_sufile_do_cancel_free(struct inode *, __u64, struct buffer_head *, void nilfs_sufile_do_set_error(struct inode *, __u64, struct buffer_head *, struct buffer_head *); -int nilfs_sufile_read(struct inode *sufile, struct nilfs_inode *raw_inode); -struct inode *nilfs_sufile_new(struct the_nilfs *nilfs, size_t susize); +int nilfs_sufile_read(struct super_block *sb, size_t susize, + struct nilfs_inode *raw_inode, struct inode **inodep); /** * nilfs_sufile_scrap - make a segment garbage diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index f3b7520..35ae03c 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -48,10 +48,10 @@ #include <linux/vfs.h> #include <linux/writeback.h> #include <linux/kobject.h> -#include <linux/exportfs.h> #include <linux/seq_file.h> #include <linux/mount.h> #include "nilfs.h" +#include "export.h" #include "mdt.h" #include "alloc.h" #include "btree.h" @@ -68,11 +68,12 @@ MODULE_DESCRIPTION("A New Implementation of the Log-structured Filesystem " "(NILFS)"); MODULE_LICENSE("GPL"); -struct kmem_cache *nilfs_inode_cachep; +static struct kmem_cache *nilfs_inode_cachep; struct kmem_cache *nilfs_transaction_cachep; struct kmem_cache *nilfs_segbuf_cachep; struct kmem_cache *nilfs_btree_path_cache; +static int nilfs_setup_super(struct nilfs_sb_info *sbi, int is_mount); static int nilfs_remount(struct super_block *sb, int *flags, char *data); static void nilfs_set_error(struct nilfs_sb_info *sbi) @@ -146,7 +147,7 @@ void nilfs_warning(struct super_block *sb, const char *function, } -struct inode *nilfs_alloc_inode_common(struct the_nilfs *nilfs) +struct inode *nilfs_alloc_inode(struct super_block *sb) { struct nilfs_inode_info *ii; @@ -155,18 +156,20 @@ struct inode *nilfs_alloc_inode_common(struct the_nilfs *nilfs) return NULL; ii->i_bh = NULL; ii->i_state = 0; + ii->i_cno = 0; ii->vfs_inode.i_version = 1; - nilfs_btnode_cache_init(&ii->i_btnode_cache, nilfs->ns_bdi); + nilfs_btnode_cache_init(&ii->i_btnode_cache, sb->s_bdi); return &ii->vfs_inode; } -struct inode *nilfs_alloc_inode(struct super_block *sb) -{ - return nilfs_alloc_inode_common(NILFS_SB(sb)->s_nilfs); -} - void nilfs_destroy_inode(struct inode *inode) { + struct nilfs_mdt_info *mdi = NILFS_MDT(inode); + + if (mdi) { + kfree(mdi->mi_bgl); /* kfree(NULL) is safe */ + kfree(mdi); + } kmem_cache_free(nilfs_inode_cachep, NILFS_I(inode)); } @@ -340,16 +343,15 @@ static void nilfs_put_super(struct super_block *sb) nilfs_cleanup_super(sbi); up_write(&nilfs->ns_sem); } - down_write(&nilfs->ns_super_sem); - if (nilfs->ns_current == sbi) - nilfs->ns_current = NULL; - up_write(&nilfs->ns_super_sem); - nilfs_detach_checkpoint(sbi); - put_nilfs(sbi->s_nilfs); + iput(nilfs->ns_sufile); + iput(nilfs->ns_cpfile); + iput(nilfs->ns_dat); + + destroy_nilfs(nilfs); sbi->s_super = NULL; sb->s_fs_info = NULL; - nilfs_put_sbinfo(sbi); + kfree(sbi); } static int nilfs_sync_fs(struct super_block *sb, int wait) @@ -376,21 +378,22 @@ static int nilfs_sync_fs(struct super_block *sb, int wait) return err; } -int nilfs_attach_checkpoint(struct nilfs_sb_info *sbi, __u64 cno) +int nilfs_attach_checkpoint(struct nilfs_sb_info *sbi, __u64 cno, int curr_mnt, + struct nilfs_root **rootp) { struct the_nilfs *nilfs = sbi->s_nilfs; + struct nilfs_root *root; struct nilfs_checkpoint *raw_cp; struct buffer_head *bh_cp; - int err; + int err = -ENOMEM; - down_write(&nilfs->ns_super_sem); - list_add(&sbi->s_list, &nilfs->ns_supers); - up_write(&nilfs->ns_super_sem); + root = nilfs_find_or_create_root( + nilfs, curr_mnt ? NILFS_CPTREE_CURRENT_CNO : cno); + if (!root) + return err; - err = -ENOMEM; - sbi->s_ifile = nilfs_ifile_new(sbi, nilfs->ns_inode_size); - if (!sbi->s_ifile) - goto delist; + if (root->ifile) + goto reuse; /* already attached checkpoint */ down_read(&nilfs->ns_segctor_sem); err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, cno, 0, &raw_cp, @@ -406,45 +409,64 @@ int nilfs_attach_checkpoint(struct nilfs_sb_info *sbi, __u64 cno) } goto failed; } - err = nilfs_read_inode_common(sbi->s_ifile, &raw_cp->cp_ifile_inode); - if (unlikely(err)) + + err = nilfs_ifile_read(sbi->s_super, root, nilfs->ns_inode_size, + &raw_cp->cp_ifile_inode, &root->ifile); + if (err) goto failed_bh; - atomic_set(&sbi->s_inodes_count, le64_to_cpu(raw_cp->cp_inodes_count)); - atomic_set(&sbi->s_blocks_count, le64_to_cpu(raw_cp->cp_blocks_count)); + + atomic_set(&root->inodes_count, le64_to_cpu(raw_cp->cp_inodes_count)); + atomic_set(&root->blocks_count, le64_to_cpu(raw_cp->cp_blocks_count)); nilfs_cpfile_put_checkpoint(nilfs->ns_cpfile, cno, bh_cp); + + reuse: + *rootp = root; return 0; failed_bh: nilfs_cpfile_put_checkpoint(nilfs->ns_cpfile, cno, bh_cp); failed: - nilfs_mdt_destroy(sbi->s_ifile); - sbi->s_ifile = NULL; + nilfs_put_root(root); + + return err; +} - delist: - down_write(&nilfs->ns_super_sem); - list_del_init(&sbi->s_list); - up_write(&nilfs->ns_super_sem); +static int nilfs_freeze(struct super_block *sb) +{ + struct nilfs_sb_info *sbi = NILFS_SB(sb); + struct the_nilfs *nilfs = sbi->s_nilfs; + int err; + + if (sb->s_flags & MS_RDONLY) + return 0; + /* Mark super block clean */ + down_write(&nilfs->ns_sem); + err = nilfs_cleanup_super(sbi); + up_write(&nilfs->ns_sem); return err; } -void nilfs_detach_checkpoint(struct nilfs_sb_info *sbi) +static int nilfs_unfreeze(struct super_block *sb) { + struct nilfs_sb_info *sbi = NILFS_SB(sb); struct the_nilfs *nilfs = sbi->s_nilfs; - nilfs_mdt_destroy(sbi->s_ifile); - sbi->s_ifile = NULL; - down_write(&nilfs->ns_super_sem); - list_del_init(&sbi->s_list); - up_write(&nilfs->ns_super_sem); + if (sb->s_flags & MS_RDONLY) + return 0; + + down_write(&nilfs->ns_sem); + nilfs_setup_super(sbi, false); + up_write(&nilfs->ns_sem); + return 0; } static int nilfs_statfs(struct dentry *dentry, struct kstatfs *buf) { struct super_block *sb = dentry->d_sb; - struct nilfs_sb_info *sbi = NILFS_SB(sb); - struct the_nilfs *nilfs = sbi->s_nilfs; + struct nilfs_root *root = NILFS_I(dentry->d_inode)->i_root; + struct the_nilfs *nilfs = root->nilfs; u64 id = huge_encode_dev(sb->s_bdev->bd_dev); unsigned long long blocks; unsigned long overhead; @@ -480,7 +502,7 @@ static int nilfs_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_bfree = nfreeblocks; buf->f_bavail = (buf->f_bfree >= nrsvblocks) ? (buf->f_bfree - nrsvblocks) : 0; - buf->f_files = atomic_read(&sbi->s_inodes_count); + buf->f_files = atomic_read(&root->inodes_count); buf->f_ffree = 0; /* nilfs_count_free_inodes(sb); */ buf->f_namelen = NILFS_NAME_LEN; buf->f_fsid.val[0] = (u32)id; @@ -493,12 +515,12 @@ static int nilfs_show_options(struct seq_file *seq, struct vfsmount *vfs) { struct super_block *sb = vfs->mnt_sb; struct nilfs_sb_info *sbi = NILFS_SB(sb); + struct nilfs_root *root = NILFS_I(vfs->mnt_root->d_inode)->i_root; if (!nilfs_test_opt(sbi, BARRIER)) seq_puts(seq, ",nobarrier"); - if (nilfs_test_opt(sbi, SNAPSHOT)) - seq_printf(seq, ",cp=%llu", - (unsigned long long int)sbi->s_snapshot_cno); + if (root->cno != NILFS_CPTREE_CURRENT_CNO) + seq_printf(seq, ",cp=%llu", (unsigned long long)root->cno); if (nilfs_test_opt(sbi, ERRORS_PANIC)) seq_puts(seq, ",errors=panic"); if (nilfs_test_opt(sbi, ERRORS_CONT)) @@ -524,6 +546,8 @@ static const struct super_operations nilfs_sops = { .put_super = nilfs_put_super, /* .write_super = nilfs_write_super, */ .sync_fs = nilfs_sync_fs, + .freeze_fs = nilfs_freeze, + .unfreeze_fs = nilfs_unfreeze, /* .write_super_lockfs */ /* .unlockfs */ .statfs = nilfs_statfs, @@ -532,48 +556,6 @@ static const struct super_operations nilfs_sops = { .show_options = nilfs_show_options }; -static struct inode * -nilfs_nfs_get_inode(struct super_block *sb, u64 ino, u32 generation) -{ - struct inode *inode; - - if (ino < NILFS_FIRST_INO(sb) && ino != NILFS_ROOT_INO && - ino != NILFS_SKETCH_INO) - return ERR_PTR(-ESTALE); - - inode = nilfs_iget(sb, ino); - if (IS_ERR(inode)) - return ERR_CAST(inode); - if (generation && inode->i_generation != generation) { - iput(inode); - return ERR_PTR(-ESTALE); - } - - return inode; -} - -static struct dentry * -nilfs_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len, - int fh_type) -{ - return generic_fh_to_dentry(sb, fid, fh_len, fh_type, - nilfs_nfs_get_inode); -} - -static struct dentry * -nilfs_fh_to_parent(struct super_block *sb, struct fid *fid, int fh_len, - int fh_type) -{ - return generic_fh_to_parent(sb, fid, fh_len, fh_type, - nilfs_nfs_get_inode); -} - -static const struct export_operations nilfs_export_ops = { - .fh_to_dentry = nilfs_fh_to_dentry, - .fh_to_parent = nilfs_fh_to_parent, - .get_parent = nilfs_get_parent, -}; - enum { Opt_err_cont, Opt_err_panic, Opt_err_ro, Opt_barrier, Opt_nobarrier, Opt_snapshot, Opt_order, Opt_norecovery, @@ -599,7 +581,6 @@ static int parse_options(char *options, struct super_block *sb, int is_remount) struct nilfs_sb_info *sbi = NILFS_SB(sb); char *p; substring_t args[MAX_OPT_ARGS]; - int option; if (!options) return 1; @@ -637,30 +618,12 @@ static int parse_options(char *options, struct super_block *sb, int is_remount) nilfs_write_opt(sbi, ERROR_MODE, ERRORS_CONT); break; case Opt_snapshot: - if (match_int(&args[0], &option) || option <= 0) - return 0; if (is_remount) { - if (!nilfs_test_opt(sbi, SNAPSHOT)) { - printk(KERN_ERR - "NILFS: cannot change regular " - "mount to snapshot.\n"); - return 0; - } else if (option != sbi->s_snapshot_cno) { - printk(KERN_ERR - "NILFS: cannot remount to a " - "different snapshot.\n"); - return 0; - } - break; - } - if (!(sb->s_flags & MS_RDONLY)) { - printk(KERN_ERR "NILFS: cannot mount snapshot " - "read/write. A read-only option is " - "required.\n"); + printk(KERN_ERR + "NILFS: \"%s\" option is invalid " + "for remount.\n", p); return 0; } - sbi->s_snapshot_cno = option; - nilfs_set_opt(sbi, SNAPSHOT); break; case Opt_norecovery: nilfs_set_opt(sbi, NORECOVERY); @@ -688,7 +651,7 @@ nilfs_set_default_options(struct nilfs_sb_info *sbi, NILFS_MOUNT_ERRORS_RO | NILFS_MOUNT_BARRIER; } -static int nilfs_setup_super(struct nilfs_sb_info *sbi) +static int nilfs_setup_super(struct nilfs_sb_info *sbi, int is_mount) { struct the_nilfs *nilfs = sbi->s_nilfs; struct nilfs_super_block **sbp; @@ -700,6 +663,9 @@ static int nilfs_setup_super(struct nilfs_sb_info *sbi) if (!sbp) return -EIO; + if (!is_mount) + goto skip_mount_setup; + max_mnt_count = le16_to_cpu(sbp[0]->s_max_mnt_count); mnt_count = le16_to_cpu(sbp[0]->s_mnt_count); @@ -716,9 +682,11 @@ static int nilfs_setup_super(struct nilfs_sb_info *sbi) sbp[0]->s_max_mnt_count = cpu_to_le16(NILFS_DFL_MAX_MNT_COUNT); sbp[0]->s_mnt_count = cpu_to_le16(mnt_count + 1); + sbp[0]->s_mtime = cpu_to_le64(get_seconds()); + +skip_mount_setup: sbp[0]->s_state = cpu_to_le16(le16_to_cpu(sbp[0]->s_state) & ~NILFS_VALID_FS); - sbp[0]->s_mtime = cpu_to_le64(get_seconds()); /* synchronize sbp[1] with sbp[0] */ memcpy(sbp[1], sbp[0], nilfs->ns_sbsize); return nilfs_commit_super(sbi, NILFS_SB_COMMIT_ALL); @@ -785,22 +753,156 @@ int nilfs_check_feature_compatibility(struct super_block *sb, return 0; } +static int nilfs_get_root_dentry(struct super_block *sb, + struct nilfs_root *root, + struct dentry **root_dentry) +{ + struct inode *inode; + struct dentry *dentry; + int ret = 0; + + inode = nilfs_iget(sb, root, NILFS_ROOT_INO); + if (IS_ERR(inode)) { + printk(KERN_ERR "NILFS: get root inode failed\n"); + ret = PTR_ERR(inode); + goto out; + } + if (!S_ISDIR(inode->i_mode) || !inode->i_blocks || !inode->i_size) { + iput(inode); + printk(KERN_ERR "NILFS: corrupt root inode.\n"); + ret = -EINVAL; + goto out; + } + + if (root->cno == NILFS_CPTREE_CURRENT_CNO) { + dentry = d_find_alias(inode); + if (!dentry) { + dentry = d_alloc_root(inode); + if (!dentry) { + iput(inode); + ret = -ENOMEM; + goto failed_dentry; + } + } else { + iput(inode); + } + } else { + dentry = d_obtain_alias(inode); + if (IS_ERR(dentry)) { + ret = PTR_ERR(dentry); + goto failed_dentry; + } + } + *root_dentry = dentry; + out: + return ret; + + failed_dentry: + printk(KERN_ERR "NILFS: get root dentry failed\n"); + goto out; +} + +static int nilfs_attach_snapshot(struct super_block *s, __u64 cno, + struct dentry **root_dentry) +{ + struct the_nilfs *nilfs = NILFS_SB(s)->s_nilfs; + struct nilfs_root *root; + int ret; + + down_read(&nilfs->ns_segctor_sem); + ret = nilfs_cpfile_is_snapshot(nilfs->ns_cpfile, cno); + up_read(&nilfs->ns_segctor_sem); + if (ret < 0) { + ret = (ret == -ENOENT) ? -EINVAL : ret; + goto out; + } else if (!ret) { + printk(KERN_ERR "NILFS: The specified checkpoint is " + "not a snapshot (checkpoint number=%llu).\n", + (unsigned long long)cno); + ret = -EINVAL; + goto out; + } + + ret = nilfs_attach_checkpoint(NILFS_SB(s), cno, false, &root); + if (ret) { + printk(KERN_ERR "NILFS: error loading snapshot " + "(checkpoint number=%llu).\n", + (unsigned long long)cno); + goto out; + } + ret = nilfs_get_root_dentry(s, root, root_dentry); + nilfs_put_root(root); + out: + return ret; +} + +static int nilfs_tree_was_touched(struct dentry *root_dentry) +{ + return atomic_read(&root_dentry->d_count) > 1; +} + +/** + * nilfs_try_to_shrink_tree() - try to shrink dentries of a checkpoint + * @root_dentry: root dentry of the tree to be shrunk + * + * This function returns true if the tree was in-use. + */ +static int nilfs_try_to_shrink_tree(struct dentry *root_dentry) +{ + if (have_submounts(root_dentry)) + return true; + shrink_dcache_parent(root_dentry); + return nilfs_tree_was_touched(root_dentry); +} + +int nilfs_checkpoint_is_mounted(struct super_block *sb, __u64 cno) +{ + struct the_nilfs *nilfs = NILFS_SB(sb)->s_nilfs; + struct nilfs_root *root; + struct inode *inode; + struct dentry *dentry; + int ret; + + if (cno < 0 || cno > nilfs->ns_cno) + return false; + + if (cno >= nilfs_last_cno(nilfs)) + return true; /* protect recent checkpoints */ + + ret = false; + root = nilfs_lookup_root(NILFS_SB(sb)->s_nilfs, cno); + if (root) { + inode = nilfs_ilookup(sb, root, NILFS_ROOT_INO); + if (inode) { + dentry = d_find_alias(inode); + if (dentry) { + if (nilfs_tree_was_touched(dentry)) + ret = nilfs_try_to_shrink_tree(dentry); + dput(dentry); + } + iput(inode); + } + nilfs_put_root(root); + } + return ret; +} + /** * nilfs_fill_super() - initialize a super block instance * @sb: super_block * @data: mount options * @silent: silent mode flag - * @nilfs: the_nilfs struct * * This function is called exclusively by nilfs->ns_mount_mutex. * So, the recovery process is protected from other simultaneous mounts. */ static int -nilfs_fill_super(struct super_block *sb, void *data, int silent, - struct the_nilfs *nilfs) +nilfs_fill_super(struct super_block *sb, void *data, int silent) { + struct the_nilfs *nilfs; struct nilfs_sb_info *sbi; - struct inode *root; + struct nilfs_root *fsroot; + struct backing_dev_info *bdi; __u64 cno; int err; @@ -809,19 +911,21 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent, return -ENOMEM; sb->s_fs_info = sbi; + sbi->s_super = sb; - get_nilfs(nilfs); + nilfs = alloc_nilfs(sb->s_bdev); + if (!nilfs) { + err = -ENOMEM; + goto failed_sbi; + } sbi->s_nilfs = nilfs; - sbi->s_super = sb; - atomic_set(&sbi->s_count, 1); err = init_nilfs(nilfs, sbi, (char *)data); if (err) - goto failed_sbi; + goto failed_nilfs; spin_lock_init(&sbi->s_inode_lock); INIT_LIST_HEAD(&sbi->s_dirty_files); - INIT_LIST_HEAD(&sbi->s_list); /* * Following initialization is overlapped because @@ -837,94 +941,59 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent, sb->s_export_op = &nilfs_export_ops; sb->s_root = NULL; sb->s_time_gran = 1; - sb->s_bdi = nilfs->ns_bdi; + + bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info; + sb->s_bdi = bdi ? : &default_backing_dev_info; err = load_nilfs(nilfs, sbi); if (err) - goto failed_sbi; + goto failed_nilfs; cno = nilfs_last_cno(nilfs); - - if (sb->s_flags & MS_RDONLY) { - if (nilfs_test_opt(sbi, SNAPSHOT)) { - down_read(&nilfs->ns_segctor_sem); - err = nilfs_cpfile_is_snapshot(nilfs->ns_cpfile, - sbi->s_snapshot_cno); - up_read(&nilfs->ns_segctor_sem); - if (err < 0) { - if (err == -ENOENT) - err = -EINVAL; - goto failed_sbi; - } - if (!err) { - printk(KERN_ERR - "NILFS: The specified checkpoint is " - "not a snapshot " - "(checkpoint number=%llu).\n", - (unsigned long long)sbi->s_snapshot_cno); - err = -EINVAL; - goto failed_sbi; - } - cno = sbi->s_snapshot_cno; - } - } - - err = nilfs_attach_checkpoint(sbi, cno); + err = nilfs_attach_checkpoint(sbi, cno, true, &fsroot); if (err) { - printk(KERN_ERR "NILFS: error loading a checkpoint" - " (checkpoint number=%llu).\n", (unsigned long long)cno); - goto failed_sbi; + printk(KERN_ERR "NILFS: error loading last checkpoint " + "(checkpoint number=%llu).\n", (unsigned long long)cno); + goto failed_unload; } if (!(sb->s_flags & MS_RDONLY)) { - err = nilfs_attach_segment_constructor(sbi); + err = nilfs_attach_segment_constructor(sbi, fsroot); if (err) goto failed_checkpoint; } - root = nilfs_iget(sb, NILFS_ROOT_INO); - if (IS_ERR(root)) { - printk(KERN_ERR "NILFS: get root inode failed\n"); - err = PTR_ERR(root); - goto failed_segctor; - } - if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { - iput(root); - printk(KERN_ERR "NILFS: corrupt root inode.\n"); - err = -EINVAL; - goto failed_segctor; - } - sb->s_root = d_alloc_root(root); - if (!sb->s_root) { - iput(root); - printk(KERN_ERR "NILFS: get root dentry failed\n"); - err = -ENOMEM; + err = nilfs_get_root_dentry(sb, fsroot, &sb->s_root); + if (err) goto failed_segctor; - } + + nilfs_put_root(fsroot); if (!(sb->s_flags & MS_RDONLY)) { down_write(&nilfs->ns_sem); - nilfs_setup_super(sbi); + nilfs_setup_super(sbi, true); up_write(&nilfs->ns_sem); } - down_write(&nilfs->ns_super_sem); - if (!nilfs_test_opt(sbi, SNAPSHOT)) - nilfs->ns_current = sbi; - up_write(&nilfs->ns_super_sem); - return 0; failed_segctor: nilfs_detach_segment_constructor(sbi); failed_checkpoint: - nilfs_detach_checkpoint(sbi); + nilfs_put_root(fsroot); + + failed_unload: + iput(nilfs->ns_sufile); + iput(nilfs->ns_cpfile); + iput(nilfs->ns_dat); + + failed_nilfs: + destroy_nilfs(nilfs); failed_sbi: - put_nilfs(nilfs); sb->s_fs_info = NULL; - nilfs_put_sbinfo(sbi); + kfree(sbi); return err; } @@ -934,13 +1003,10 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data) struct the_nilfs *nilfs = sbi->s_nilfs; unsigned long old_sb_flags; struct nilfs_mount_options old_opts; - int was_snapshot, err; + int err; - down_write(&nilfs->ns_super_sem); old_sb_flags = sb->s_flags; old_opts.mount_opt = sbi->s_mount_opt; - old_opts.snapshot_cno = sbi->s_snapshot_cno; - was_snapshot = nilfs_test_opt(sbi, SNAPSHOT); if (!parse_options(data, sb, 1)) { err = -EINVAL; @@ -949,11 +1015,6 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data) sb->s_flags = (sb->s_flags & ~MS_POSIXACL); err = -EINVAL; - if (was_snapshot && !(*flags & MS_RDONLY)) { - printk(KERN_ERR "NILFS (device %s): cannot remount snapshot " - "read/write.\n", sb->s_id); - goto restore_opts; - } if (!nilfs_valid_fs(nilfs)) { printk(KERN_WARNING "NILFS (device %s): couldn't " @@ -978,6 +1039,7 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data) up_write(&nilfs->ns_sem); } else { __u64 features; + struct nilfs_root *root; /* * Mounting a RDONLY partition read-write, so reread and @@ -999,23 +1061,21 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data) sb->s_flags &= ~MS_RDONLY; - err = nilfs_attach_segment_constructor(sbi); + root = NILFS_I(sb->s_root->d_inode)->i_root; + err = nilfs_attach_segment_constructor(sbi, root); if (err) goto restore_opts; down_write(&nilfs->ns_sem); - nilfs_setup_super(sbi); + nilfs_setup_super(sbi, true); up_write(&nilfs->ns_sem); } out: - up_write(&nilfs->ns_super_sem); return 0; restore_opts: sb->s_flags = old_sb_flags; sbi->s_mount_opt = old_opts.mount_opt; - sbi->s_snapshot_cno = old_opts.snapshot_cno; - up_write(&nilfs->ns_super_sem); return err; } @@ -1035,7 +1095,7 @@ static int nilfs_identify(char *data, struct nilfs_super_data *sd) { char *p, *options = data; substring_t args[MAX_OPT_ARGS]; - int option, token; + int token; int ret = 0; do { @@ -1043,16 +1103,18 @@ static int nilfs_identify(char *data, struct nilfs_super_data *sd) if (p != NULL && *p) { token = match_token(p, tokens, args); if (token == Opt_snapshot) { - if (!(sd->flags & MS_RDONLY)) + if (!(sd->flags & MS_RDONLY)) { ret++; - else { - ret = match_int(&args[0], &option); - if (!ret) { - if (option > 0) - sd->cno = option; - else - ret++; - } + } else { + sd->cno = simple_strtoull(args[0].from, + NULL, 0); + /* + * No need to see the end pointer; + * match_token() has done syntax + * checking. + */ + if (sd->cno == 0) + ret++; } } if (ret) @@ -1069,18 +1131,14 @@ static int nilfs_identify(char *data, struct nilfs_super_data *sd) static int nilfs_set_bdev_super(struct super_block *s, void *data) { - struct nilfs_super_data *sd = data; - - s->s_bdev = sd->bdev; + s->s_bdev = data; s->s_dev = s->s_bdev->bd_dev; return 0; } static int nilfs_test_bdev_super(struct super_block *s, void *data) { - struct nilfs_super_data *sd = data; - - return sd->sbi && s->s_fs_info == (void *)sd->sbi; + return (void *)s->s_bdev == data; } static int @@ -1090,8 +1148,8 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags, struct nilfs_super_data sd; struct super_block *s; fmode_t mode = FMODE_READ; - struct the_nilfs *nilfs; - int err, need_to_close = 1; + struct dentry *root_dentry; + int err, s_new = false; if (!(flags & MS_RDONLY)) mode |= FMODE_WRITE; @@ -1100,12 +1158,6 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags, if (IS_ERR(sd.bdev)) return PTR_ERR(sd.bdev); - /* - * To get mount instance using sget() vfs-routine, NILFS needs - * much more information than normal filesystems to identify mount - * instance. For snapshot mounts, not only a mount type (ro-mount - * or rw-mount) but also a checkpoint number is required. - */ sd.cno = 0; sd.flags = flags; if (nilfs_identify((char *)data, &sd)) { @@ -1113,93 +1165,86 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags, goto failed; } - nilfs = find_or_create_nilfs(sd.bdev); - if (!nilfs) { - err = -ENOMEM; - goto failed; - } - - mutex_lock(&nilfs->ns_mount_mutex); - - if (!sd.cno) { - /* - * Check if an exclusive mount exists or not. - * Snapshot mounts coexist with a current mount - * (i.e. rw-mount or ro-mount), whereas rw-mount and - * ro-mount are mutually exclusive. - */ - down_read(&nilfs->ns_super_sem); - if (nilfs->ns_current && - ((nilfs->ns_current->s_super->s_flags ^ flags) - & MS_RDONLY)) { - up_read(&nilfs->ns_super_sem); - err = -EBUSY; - goto failed_unlock; - } - up_read(&nilfs->ns_super_sem); - } - - /* - * Find existing nilfs_sb_info struct - */ - sd.sbi = nilfs_find_sbinfo(nilfs, !(flags & MS_RDONLY), sd.cno); - /* - * Get super block instance holding the nilfs_sb_info struct. - * A new instance is allocated if no existing mount is present or - * existing instance has been unmounted. + * once the super is inserted into the list by sget, s_umount + * will protect the lockfs code from trying to start a snapshot + * while we are mounting */ - s = sget(fs_type, nilfs_test_bdev_super, nilfs_set_bdev_super, &sd); - if (sd.sbi) - nilfs_put_sbinfo(sd.sbi); - + mutex_lock(&sd.bdev->bd_fsfreeze_mutex); + if (sd.bdev->bd_fsfreeze_count > 0) { + mutex_unlock(&sd.bdev->bd_fsfreeze_mutex); + err = -EBUSY; + goto failed; + } + s = sget(fs_type, nilfs_test_bdev_super, nilfs_set_bdev_super, sd.bdev); + mutex_unlock(&sd.bdev->bd_fsfreeze_mutex); if (IS_ERR(s)) { err = PTR_ERR(s); - goto failed_unlock; + goto failed; } if (!s->s_root) { char b[BDEVNAME_SIZE]; + s_new = true; + /* New superblock instance created */ s->s_flags = flags; s->s_mode = mode; strlcpy(s->s_id, bdevname(sd.bdev, b), sizeof(s->s_id)); sb_set_blocksize(s, block_size(sd.bdev)); - err = nilfs_fill_super(s, data, flags & MS_SILENT ? 1 : 0, - nilfs); + err = nilfs_fill_super(s, data, flags & MS_SILENT ? 1 : 0); if (err) - goto cancel_new; + goto failed_super; s->s_flags |= MS_ACTIVE; - need_to_close = 0; + } else if (!sd.cno) { + int busy = false; + + if (nilfs_tree_was_touched(s->s_root)) { + busy = nilfs_try_to_shrink_tree(s->s_root); + if (busy && (flags ^ s->s_flags) & MS_RDONLY) { + printk(KERN_ERR "NILFS: the device already " + "has a %s mount.\n", + (s->s_flags & MS_RDONLY) ? + "read-only" : "read/write"); + err = -EBUSY; + goto failed_super; + } + } + if (!busy) { + /* + * Try remount to setup mount states if the current + * tree is not mounted and only snapshots use this sb. + */ + err = nilfs_remount(s, &flags, data); + if (err) + goto failed_super; + } } - mutex_unlock(&nilfs->ns_mount_mutex); - put_nilfs(nilfs); - if (need_to_close) + if (sd.cno) { + err = nilfs_attach_snapshot(s, sd.cno, &root_dentry); + if (err) + goto failed_super; + } else { + root_dentry = dget(s->s_root); + } + + if (!s_new) close_bdev_exclusive(sd.bdev, mode); - simple_set_mnt(mnt, s); - return 0; - failed_unlock: - mutex_unlock(&nilfs->ns_mount_mutex); - put_nilfs(nilfs); - failed: - close_bdev_exclusive(sd.bdev, mode); - return err; + mnt->mnt_sb = s; + mnt->mnt_root = root_dentry; + return 0; - cancel_new: - /* Abandoning the newly allocated superblock */ - mutex_unlock(&nilfs->ns_mount_mutex); - put_nilfs(nilfs); + failed_super: deactivate_locked_super(s); - /* - * deactivate_locked_super() invokes close_bdev_exclusive(). - * We must finish all post-cleaning before this call; - * put_nilfs() needs the block device. - */ + + failed: + if (!s_new) + close_bdev_exclusive(sd.bdev, mode); return err; } diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index d277151..0254be2 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -35,9 +35,6 @@ #include "segbuf.h" -static LIST_HEAD(nilfs_objects); -static DEFINE_SPINLOCK(nilfs_lock); - static int nilfs_valid_sb(struct nilfs_super_block *sbp); void nilfs_set_last_segment(struct the_nilfs *nilfs, @@ -61,16 +58,13 @@ void nilfs_set_last_segment(struct the_nilfs *nilfs, } /** - * alloc_nilfs - allocate the_nilfs structure + * alloc_nilfs - allocate a nilfs object * @bdev: block device to which the_nilfs is related * - * alloc_nilfs() allocates memory for the_nilfs and - * initializes its reference count and locks. - * * Return Value: On success, pointer to the_nilfs is returned. * On error, NULL is returned. */ -static struct the_nilfs *alloc_nilfs(struct block_device *bdev) +struct the_nilfs *alloc_nilfs(struct block_device *bdev) { struct the_nilfs *nilfs; @@ -79,103 +73,38 @@ static struct the_nilfs *alloc_nilfs(struct block_device *bdev) return NULL; nilfs->ns_bdev = bdev; - atomic_set(&nilfs->ns_count, 1); atomic_set(&nilfs->ns_ndirtyblks, 0); init_rwsem(&nilfs->ns_sem); - init_rwsem(&nilfs->ns_super_sem); - mutex_init(&nilfs->ns_mount_mutex); - init_rwsem(&nilfs->ns_writer_sem); - INIT_LIST_HEAD(&nilfs->ns_list); - INIT_LIST_HEAD(&nilfs->ns_supers); + INIT_LIST_HEAD(&nilfs->ns_gc_inodes); spin_lock_init(&nilfs->ns_last_segment_lock); - nilfs->ns_gc_inodes_h = NULL; + nilfs->ns_cptree = RB_ROOT; + spin_lock_init(&nilfs->ns_cptree_lock); init_rwsem(&nilfs->ns_segctor_sem); return nilfs; } /** - * find_or_create_nilfs - find or create nilfs object - * @bdev: block device to which the_nilfs is related - * - * find_nilfs() looks up an existent nilfs object created on the - * device and gets the reference count of the object. If no nilfs object - * is found on the device, a new nilfs object is allocated. - * - * Return Value: On success, pointer to the nilfs object is returned. - * On error, NULL is returned. - */ -struct the_nilfs *find_or_create_nilfs(struct block_device *bdev) -{ - struct the_nilfs *nilfs, *new = NULL; - - retry: - spin_lock(&nilfs_lock); - list_for_each_entry(nilfs, &nilfs_objects, ns_list) { - if (nilfs->ns_bdev == bdev) { - get_nilfs(nilfs); - spin_unlock(&nilfs_lock); - if (new) - put_nilfs(new); - return nilfs; /* existing object */ - } - } - if (new) { - list_add_tail(&new->ns_list, &nilfs_objects); - spin_unlock(&nilfs_lock); - return new; /* new object */ - } - spin_unlock(&nilfs_lock); - - new = alloc_nilfs(bdev); - if (new) - goto retry; - return NULL; /* insufficient memory */ -} - -/** - * put_nilfs - release a reference to the_nilfs - * @nilfs: the_nilfs structure to be released - * - * put_nilfs() decrements a reference counter of the_nilfs. - * If the reference count reaches zero, the_nilfs is freed. + * destroy_nilfs - destroy nilfs object + * @nilfs: nilfs object to be released */ -void put_nilfs(struct the_nilfs *nilfs) +void destroy_nilfs(struct the_nilfs *nilfs) { - spin_lock(&nilfs_lock); - if (!atomic_dec_and_test(&nilfs->ns_count)) { - spin_unlock(&nilfs_lock); - return; - } - list_del_init(&nilfs->ns_list); - spin_unlock(&nilfs_lock); - - /* - * Increment of ns_count never occurs below because the caller - * of get_nilfs() holds at least one reference to the_nilfs. - * Thus its exclusion control is not required here. - */ - might_sleep(); - if (nilfs_loaded(nilfs)) { - nilfs_mdt_destroy(nilfs->ns_sufile); - nilfs_mdt_destroy(nilfs->ns_cpfile); - nilfs_mdt_destroy(nilfs->ns_dat); - nilfs_mdt_destroy(nilfs->ns_gc_dat); - } if (nilfs_init(nilfs)) { - nilfs_destroy_gccache(nilfs); brelse(nilfs->ns_sbh[0]); brelse(nilfs->ns_sbh[1]); } kfree(nilfs); } -static int nilfs_load_super_root(struct the_nilfs *nilfs, sector_t sr_block) +static int nilfs_load_super_root(struct the_nilfs *nilfs, + struct super_block *sb, sector_t sr_block) { struct buffer_head *bh_sr; struct nilfs_super_root *raw_sr; struct nilfs_super_block **sbp = nilfs->ns_sbp; + struct nilfs_inode *rawi; unsigned dat_entry_size, segment_usage_size, checkpoint_size; unsigned inode_size; int err; @@ -192,40 +121,22 @@ static int nilfs_load_super_root(struct the_nilfs *nilfs, sector_t sr_block) inode_size = nilfs->ns_inode_size; - err = -ENOMEM; - nilfs->ns_dat = nilfs_dat_new(nilfs, dat_entry_size); - if (unlikely(!nilfs->ns_dat)) + rawi = (void *)bh_sr->b_data + NILFS_SR_DAT_OFFSET(inode_size); + err = nilfs_dat_read(sb, dat_entry_size, rawi, &nilfs->ns_dat); + if (err) goto failed; - nilfs->ns_gc_dat = nilfs_dat_new(nilfs, dat_entry_size); - if (unlikely(!nilfs->ns_gc_dat)) + rawi = (void *)bh_sr->b_data + NILFS_SR_CPFILE_OFFSET(inode_size); + err = nilfs_cpfile_read(sb, checkpoint_size, rawi, &nilfs->ns_cpfile); + if (err) goto failed_dat; - nilfs->ns_cpfile = nilfs_cpfile_new(nilfs, checkpoint_size); - if (unlikely(!nilfs->ns_cpfile)) - goto failed_gc_dat; - - nilfs->ns_sufile = nilfs_sufile_new(nilfs, segment_usage_size); - if (unlikely(!nilfs->ns_sufile)) + rawi = (void *)bh_sr->b_data + NILFS_SR_SUFILE_OFFSET(inode_size); + err = nilfs_sufile_read(sb, segment_usage_size, rawi, + &nilfs->ns_sufile); + if (err) goto failed_cpfile; - nilfs_mdt_set_shadow(nilfs->ns_dat, nilfs->ns_gc_dat); - - err = nilfs_dat_read(nilfs->ns_dat, (void *)bh_sr->b_data + - NILFS_SR_DAT_OFFSET(inode_size)); - if (unlikely(err)) - goto failed_sufile; - - err = nilfs_cpfile_read(nilfs->ns_cpfile, (void *)bh_sr->b_data + - NILFS_SR_CPFILE_OFFSET(inode_size)); - if (unlikely(err)) - goto failed_sufile; - - err = nilfs_sufile_read(nilfs->ns_sufile, (void *)bh_sr->b_data + - NILFS_SR_SUFILE_OFFSET(inode_size)); - if (unlikely(err)) - goto failed_sufile; - raw_sr = (struct nilfs_super_root *)bh_sr->b_data; nilfs->ns_nongc_ctime = le64_to_cpu(raw_sr->sr_nongc_ctime); @@ -233,17 +144,11 @@ static int nilfs_load_super_root(struct the_nilfs *nilfs, sector_t sr_block) brelse(bh_sr); return err; - failed_sufile: - nilfs_mdt_destroy(nilfs->ns_sufile); - failed_cpfile: - nilfs_mdt_destroy(nilfs->ns_cpfile); - - failed_gc_dat: - nilfs_mdt_destroy(nilfs->ns_gc_dat); + iput(nilfs->ns_cpfile); failed_dat: - nilfs_mdt_destroy(nilfs->ns_dat); + iput(nilfs->ns_dat); goto failed; } @@ -306,15 +211,6 @@ int load_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi) int valid_fs = nilfs_valid_fs(nilfs); int err; - if (nilfs_loaded(nilfs)) { - if (valid_fs || - ((s_flags & MS_RDONLY) && nilfs_test_opt(sbi, NORECOVERY))) - return 0; - printk(KERN_ERR "NILFS: the filesystem is in an incomplete " - "recovery state.\n"); - return -EINVAL; - } - if (!valid_fs) { printk(KERN_WARNING "NILFS warning: mounting unchecked fs\n"); if (s_flags & MS_RDONLY) { @@ -375,7 +271,7 @@ int load_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi) goto scan_error; } - err = nilfs_load_super_root(nilfs, ri.ri_super_root); + err = nilfs_load_super_root(nilfs, sbi->s_super, ri.ri_super_root); if (unlikely(err)) { printk(KERN_ERR "NILFS: error loading super root.\n"); goto failed; @@ -443,10 +339,9 @@ int load_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi) goto failed; failed_unload: - nilfs_mdt_destroy(nilfs->ns_cpfile); - nilfs_mdt_destroy(nilfs->ns_sufile); - nilfs_mdt_destroy(nilfs->ns_dat); - nilfs_mdt_destroy(nilfs->ns_gc_dat); + iput(nilfs->ns_cpfile); + iput(nilfs->ns_sufile); + iput(nilfs->ns_dat); failed: nilfs_clear_recovery_info(&ri); @@ -468,8 +363,8 @@ static unsigned long long nilfs_max_size(unsigned int blkbits) static int nilfs_store_disk_layout(struct the_nilfs *nilfs, struct nilfs_super_block *sbp) { - if (le32_to_cpu(sbp->s_rev_level) != NILFS_CURRENT_REV) { - printk(KERN_ERR "NILFS: revision mismatch " + if (le32_to_cpu(sbp->s_rev_level) < NILFS_MIN_SUPP_REV) { + printk(KERN_ERR "NILFS: unsupported revision " "(superblock rev.=%d.%d, current rev.=%d.%d). " "Please check the version of mkfs.nilfs.\n", le32_to_cpu(sbp->s_rev_level), @@ -631,12 +526,7 @@ static int nilfs_load_super_block(struct the_nilfs *nilfs, * * init_nilfs() performs common initialization per block device (e.g. * reading the super block, getting disk layout information, initializing - * shared fields in the_nilfs). It takes on some portion of the jobs - * typically done by a fill_super() routine. This division arises from - * the nature that multiple NILFS instances may be simultaneously - * mounted on a device. - * For multiple mounts on the same device, only the first mount - * invokes these tasks. + * shared fields in the_nilfs). * * Return Value: On success, 0 is returned. On error, a negative error * code is returned. @@ -645,32 +535,10 @@ int init_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, char *data) { struct super_block *sb = sbi->s_super; struct nilfs_super_block *sbp; - struct backing_dev_info *bdi; int blocksize; int err; down_write(&nilfs->ns_sem); - if (nilfs_init(nilfs)) { - /* Load values from existing the_nilfs */ - sbp = nilfs->ns_sbp[0]; - err = nilfs_store_magic_and_option(sb, sbp, data); - if (err) - goto out; - - err = nilfs_check_feature_compatibility(sb, sbp); - if (err) - goto out; - - blocksize = BLOCK_SIZE << le32_to_cpu(sbp->s_log_block_size); - if (sb->s_blocksize != blocksize && - !sb_set_blocksize(sb, blocksize)) { - printk(KERN_ERR "NILFS: blocksize %d unfit to device\n", - blocksize); - err = -EINVAL; - } - sb->s_maxbytes = nilfs_max_size(sb->s_blocksize_bits); - goto out; - } blocksize = sb_min_blocksize(sb, NILFS_MIN_BLOCK_SIZE); if (!blocksize) { @@ -729,18 +597,10 @@ int init_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, char *data) nilfs->ns_mount_state = le16_to_cpu(sbp->s_state); - bdi = nilfs->ns_bdev->bd_inode->i_mapping->backing_dev_info; - nilfs->ns_bdi = bdi ? : &default_backing_dev_info; - err = nilfs_store_log_cursor(nilfs, sbp); if (err) goto failed_sbh; - /* Initialize gcinode cache */ - err = nilfs_init_gccache(nilfs); - if (err) - goto failed_sbh; - set_nilfs_init(nilfs); err = 0; out: @@ -812,79 +672,92 @@ int nilfs_near_disk_full(struct the_nilfs *nilfs) return ncleansegs <= nilfs->ns_nrsvsegs + nincsegs; } -/** - * nilfs_find_sbinfo - find existing nilfs_sb_info structure - * @nilfs: nilfs object - * @rw_mount: mount type (non-zero value for read/write mount) - * @cno: checkpoint number (zero for read-only mount) - * - * nilfs_find_sbinfo() returns the nilfs_sb_info structure which - * @rw_mount and @cno (in case of snapshots) matched. If no instance - * was found, NULL is returned. Although the super block instance can - * be unmounted after this function returns, the nilfs_sb_info struct - * is kept on memory until nilfs_put_sbinfo() is called. - */ -struct nilfs_sb_info *nilfs_find_sbinfo(struct the_nilfs *nilfs, - int rw_mount, __u64 cno) +struct nilfs_root *nilfs_lookup_root(struct the_nilfs *nilfs, __u64 cno) { - struct nilfs_sb_info *sbi; - - down_read(&nilfs->ns_super_sem); - /* - * The SNAPSHOT flag and sb->s_flags are supposed to be - * protected with nilfs->ns_super_sem. - */ - sbi = nilfs->ns_current; - if (rw_mount) { - if (sbi && !(sbi->s_super->s_flags & MS_RDONLY)) - goto found; /* read/write mount */ - else - goto out; - } else if (cno == 0) { - if (sbi && (sbi->s_super->s_flags & MS_RDONLY)) - goto found; /* read-only mount */ - else - goto out; + struct rb_node *n; + struct nilfs_root *root; + + spin_lock(&nilfs->ns_cptree_lock); + n = nilfs->ns_cptree.rb_node; + while (n) { + root = rb_entry(n, struct nilfs_root, rb_node); + + if (cno < root->cno) { + n = n->rb_left; + } else if (cno > root->cno) { + n = n->rb_right; + } else { + atomic_inc(&root->count); + spin_unlock(&nilfs->ns_cptree_lock); + return root; + } } + spin_unlock(&nilfs->ns_cptree_lock); - list_for_each_entry(sbi, &nilfs->ns_supers, s_list) { - if (nilfs_test_opt(sbi, SNAPSHOT) && - sbi->s_snapshot_cno == cno) - goto found; /* snapshot mount */ - } - out: - up_read(&nilfs->ns_super_sem); return NULL; - - found: - atomic_inc(&sbi->s_count); - up_read(&nilfs->ns_super_sem); - return sbi; } -int nilfs_checkpoint_is_mounted(struct the_nilfs *nilfs, __u64 cno, - int snapshot_mount) +struct nilfs_root * +nilfs_find_or_create_root(struct the_nilfs *nilfs, __u64 cno) { - struct nilfs_sb_info *sbi; - int ret = 0; + struct rb_node **p, *parent; + struct nilfs_root *root, *new; - down_read(&nilfs->ns_super_sem); - if (cno == 0 || cno > nilfs->ns_cno) - goto out_unlock; + root = nilfs_lookup_root(nilfs, cno); + if (root) + return root; - list_for_each_entry(sbi, &nilfs->ns_supers, s_list) { - if (sbi->s_snapshot_cno == cno && - (!snapshot_mount || nilfs_test_opt(sbi, SNAPSHOT))) { - /* exclude read-only mounts */ - ret++; - break; + new = kmalloc(sizeof(*root), GFP_KERNEL); + if (!new) + return NULL; + + spin_lock(&nilfs->ns_cptree_lock); + + p = &nilfs->ns_cptree.rb_node; + parent = NULL; + + while (*p) { + parent = *p; + root = rb_entry(parent, struct nilfs_root, rb_node); + + if (cno < root->cno) { + p = &(*p)->rb_left; + } else if (cno > root->cno) { + p = &(*p)->rb_right; + } else { + atomic_inc(&root->count); + spin_unlock(&nilfs->ns_cptree_lock); + kfree(new); + return root; } } - /* for protecting recent checkpoints */ - if (cno >= nilfs_last_cno(nilfs)) - ret++; - out_unlock: - up_read(&nilfs->ns_super_sem); - return ret; + new->cno = cno; + new->ifile = NULL; + new->nilfs = nilfs; + atomic_set(&new->count, 1); + atomic_set(&new->inodes_count, 0); + atomic_set(&new->blocks_count, 0); + + rb_link_node(&new->rb_node, parent, p); + rb_insert_color(&new->rb_node, &nilfs->ns_cptree); + + spin_unlock(&nilfs->ns_cptree_lock); + + return new; +} + +void nilfs_put_root(struct nilfs_root *root) +{ + if (atomic_dec_and_test(&root->count)) { + struct the_nilfs *nilfs = root->nilfs; + + spin_lock(&nilfs->ns_cptree_lock); + rb_erase(&root->rb_node, &nilfs->ns_cptree); + spin_unlock(&nilfs->ns_cptree_lock); + if (root->ifile) + iput(root->ifile); + + kfree(root); + } } diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h index f785a7b..69226e1 100644 --- a/fs/nilfs2/the_nilfs.h +++ b/fs/nilfs2/the_nilfs.h @@ -26,6 +26,7 @@ #include <linux/types.h> #include <linux/buffer_head.h> +#include <linux/rbtree.h> #include <linux/fs.h> #include <linux/blkdev.h> #include <linux/backing-dev.h> @@ -45,22 +46,13 @@ enum { /** * struct the_nilfs - struct to supervise multiple nilfs mount points * @ns_flags: flags - * @ns_count: reference count - * @ns_list: list head for nilfs_list * @ns_bdev: block device - * @ns_bdi: backing dev info - * @ns_writer: back pointer to writable nilfs_sb_info * @ns_sem: semaphore for shared states - * @ns_super_sem: semaphore for global operations across super block instances - * @ns_mount_mutex: mutex protecting mount process of nilfs - * @ns_writer_sem: semaphore protecting ns_writer attach/detach - * @ns_current: back pointer to current mount * @ns_sbh: buffer heads of on-disk super blocks * @ns_sbp: pointers to super block data * @ns_sbwtime: previous write time of super block * @ns_sbwcount: write count of super block * @ns_sbsize: size of valid data in super block - * @ns_supers: list of nilfs super block structs * @ns_seg_seq: segment sequence counter * @ns_segnum: index number of the latest full segment. * @ns_nextnum: index number of the full segment index to be used next @@ -79,9 +71,9 @@ enum { * @ns_dat: DAT file inode * @ns_cpfile: checkpoint file inode * @ns_sufile: segusage file inode - * @ns_gc_dat: shadow inode of the DAT file inode for GC + * @ns_cptree: rb-tree of all mounted checkpoints (nilfs_root) + * @ns_cptree_lock: lock protecting @ns_cptree * @ns_gc_inodes: dummy inodes to keep live blocks - * @ns_gc_inodes_h: hash list to keep dummy inode holding live blocks * @ns_blocksize_bits: bit length of block size * @ns_blocksize: block size * @ns_nsegments: number of segments in filesystem @@ -95,22 +87,9 @@ enum { */ struct the_nilfs { unsigned long ns_flags; - atomic_t ns_count; - struct list_head ns_list; struct block_device *ns_bdev; - struct backing_dev_info *ns_bdi; - struct nilfs_sb_info *ns_writer; struct rw_semaphore ns_sem; - struct rw_semaphore ns_super_sem; - struct mutex ns_mount_mutex; - struct rw_semaphore ns_writer_sem; - - /* - * components protected by ns_super_sem - */ - struct nilfs_sb_info *ns_current; - struct list_head ns_supers; /* * used for @@ -163,11 +142,13 @@ struct the_nilfs { struct inode *ns_dat; struct inode *ns_cpfile; struct inode *ns_sufile; - struct inode *ns_gc_dat; - /* GC inode list and hash table head */ + /* Checkpoint tree */ + struct rb_root ns_cptree; + spinlock_t ns_cptree_lock; + + /* GC inode list */ struct list_head ns_gc_inodes; - struct hlist_head *ns_gc_inodes_h; /* Disk layout information (static) */ unsigned int ns_blocksize_bits; @@ -182,9 +163,6 @@ struct the_nilfs { u32 ns_crc_seed; }; -#define NILFS_GCINODE_HASH_BITS 8 -#define NILFS_GCINODE_HASH_SIZE (1<<NILFS_GCINODE_HASH_BITS) - #define THE_NILFS_FNS(bit, name) \ static inline void set_nilfs_##name(struct the_nilfs *nilfs) \ { \ @@ -205,6 +183,32 @@ THE_NILFS_FNS(DISCONTINUED, discontinued) THE_NILFS_FNS(GC_RUNNING, gc_running) THE_NILFS_FNS(SB_DIRTY, sb_dirty) +/** + * struct nilfs_root - nilfs root object + * @cno: checkpoint number + * @rb_node: red-black tree node + * @count: refcount of this structure + * @nilfs: nilfs object + * @ifile: inode file + * @root: root inode + * @inodes_count: number of inodes + * @blocks_count: number of blocks (Reserved) + */ +struct nilfs_root { + __u64 cno; + struct rb_node rb_node; + + atomic_t count; + struct the_nilfs *nilfs; + struct inode *ifile; + + atomic_t inodes_count; + atomic_t blocks_count; +}; + +/* Special checkpoint number */ +#define NILFS_CPTREE_CURRENT_CNO 0 + /* Minimum interval of periodical update of superblocks (in seconds) */ #define NILFS_SB_FREQ 10 @@ -221,46 +225,25 @@ static inline int nilfs_sb_will_flip(struct the_nilfs *nilfs) } void nilfs_set_last_segment(struct the_nilfs *, sector_t, u64, __u64); -struct the_nilfs *find_or_create_nilfs(struct block_device *); -void put_nilfs(struct the_nilfs *); +struct the_nilfs *alloc_nilfs(struct block_device *bdev); +void destroy_nilfs(struct the_nilfs *nilfs); int init_nilfs(struct the_nilfs *, struct nilfs_sb_info *, char *); int load_nilfs(struct the_nilfs *, struct nilfs_sb_info *); int nilfs_discard_segments(struct the_nilfs *, __u64 *, size_t); int nilfs_count_free_blocks(struct the_nilfs *, sector_t *); +struct nilfs_root *nilfs_lookup_root(struct the_nilfs *nilfs, __u64 cno); +struct nilfs_root *nilfs_find_or_create_root(struct the_nilfs *nilfs, + __u64 cno); +void nilfs_put_root(struct nilfs_root *root); struct nilfs_sb_info *nilfs_find_sbinfo(struct the_nilfs *, int, __u64); -int nilfs_checkpoint_is_mounted(struct the_nilfs *, __u64, int); int nilfs_near_disk_full(struct the_nilfs *); void nilfs_fall_back_super_block(struct the_nilfs *); void nilfs_swap_super_block(struct the_nilfs *); -static inline void get_nilfs(struct the_nilfs *nilfs) -{ - /* Caller must have at least one reference of the_nilfs. */ - atomic_inc(&nilfs->ns_count); -} - -static inline void -nilfs_attach_writer(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi) -{ - down_write(&nilfs->ns_writer_sem); - nilfs->ns_writer = sbi; - up_write(&nilfs->ns_writer_sem); -} - -static inline void -nilfs_detach_writer(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi) -{ - down_write(&nilfs->ns_writer_sem); - if (sbi == nilfs->ns_writer) - nilfs->ns_writer = NULL; - up_write(&nilfs->ns_writer_sem); -} - -static inline void nilfs_put_sbinfo(struct nilfs_sb_info *sbi) +static inline void nilfs_get_root(struct nilfs_root *root) { - if (atomic_dec_and_test(&sbi->s_count)) - kfree(sbi); + atomic_inc(&root->count); } static inline int nilfs_valid_fs(struct the_nilfs *nilfs) diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index a9cd507..2802898 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -67,6 +67,19 @@ enum fid_type { * 32 bit parent block number, 32 bit parent generation number */ FILEID_UDF_WITH_PARENT = 0x52, + + /* + * 64 bit checkpoint number, 64 bit inode number, + * 32 bit generation number. + */ + FILEID_NILFS_WITHOUT_PARENT = 0x61, + + /* + * 64 bit checkpoint number, 64 bit inode number, + * 32 bit generation number, 32 bit parent generation. + * 64 bit parent inode number. + */ + FILEID_NILFS_WITH_PARENT = 0x62, }; struct fid { diff --git a/include/linux/nilfs2_fs.h b/include/linux/nilfs2_fs.h index f5487b6..227e49d 100644 --- a/include/linux/nilfs2_fs.h +++ b/include/linux/nilfs2_fs.h @@ -4,16 +4,16 @@ * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 2.1 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. + * GNU Lesser General Public License for more details. * - * You should have received a copy of the GNU General Public License + * You should have received a copy of the GNU Lesser General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * @@ -147,7 +147,6 @@ struct nilfs_super_root { #define NILFS_MOUNT_ERRORS_CONT 0x0010 /* Continue on errors */ #define NILFS_MOUNT_ERRORS_RO 0x0020 /* Remount fs ro on errors */ #define NILFS_MOUNT_ERRORS_PANIC 0x0040 /* Panic on errors */ -#define NILFS_MOUNT_SNAPSHOT 0x0080 /* Snapshot flag */ #define NILFS_MOUNT_BARRIER 0x1000 /* Use block barriers */ #define NILFS_MOUNT_STRICT_ORDER 0x2000 /* Apply strict in-order semantics also for data */ @@ -229,6 +228,7 @@ struct nilfs_super_block { */ #define NILFS_CURRENT_REV 2 /* current major revision */ #define NILFS_MINOR_REV 0 /* minor revision */ +#define NILFS_MIN_SUPP_REV 2 /* minimum supported revision */ /* * Feature set definitions @@ -270,6 +270,14 @@ struct nilfs_super_block { segments */ /* + * We call DAT, cpfile, and sufile root metadata files. Inodes of + * these files are written in super root block instead of ifile, and + * garbage collector doesn't keep any past versions of these files. + */ +#define NILFS_ROOT_METADATA_FILE(ino) \ + ((ino) >= NILFS_DAT_INO && (ino) <= NILFS_SUFILE_INO) + +/* * bytes offset of secondary super block */ #define NILFS_SB2_OFFSET_BYTES(devsize) ((((devsize) >> 12) - 1) << 12) |