From 31d710a7bd42f0d89e30d53bdaad427c5f191d0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20=C5=BBenczykowski?= Date: Sun, 26 Sep 2010 12:38:28 +0000 Subject: ext3: don't update sb journal_devnum when RO dev MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit An ext3 filesystem on a read-only device, with an external journal which is at a different device number then recorded in the superblock will fail to honor the read-only setting of the device and trigger a superblock update (write). For example: - ext3 on a software raid which is in read-only mode - external journal on a read-write device which has changed device num - attempt to mount with -o journal_dev= - hits BUG_ON(mddev->ro = 1) in md.c Cc: Theodore Ts'o Signed-off-by: Maciej Żenczykowski Signed-off-by: Jan Kara --- fs/ext3/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/ext3') diff --git a/fs/ext3/super.c b/fs/ext3/super.c index acf8695..0416931 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -2290,7 +2290,7 @@ static int ext3_load_journal(struct super_block *sb, EXT3_SB(sb)->s_journal = journal; ext3_clear_journal_err(sb, es); - if (journal_devnum && + if (!really_read_only && journal_devnum && journal_devnum != le32_to_cpu(es->s_journal_dev)) { es->s_journal_dev = cpu_to_le32(journal_devnum); -- cgit v1.1 From 99fbb1e2af5da27d3ee75c2e421712fe9d083fb6 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 9 Nov 2010 10:18:05 -0800 Subject: fs/ext3/super.c: Use printf extension %pV Using %pV reduces the number of printk calls and eliminates any possible message interleaving from other printk calls. Signed-off-by: Joe Perches Signed-off-by: Jan Kara --- fs/ext3/super.c | 56 +++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 37 insertions(+), 19 deletions(-) (limited to 'fs/ext3') diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 0416931..0e9cbc3 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -143,12 +143,16 @@ void ext3_journal_abort_handle(const char *caller, const char *err_fn, void ext3_msg(struct super_block *sb, const char *prefix, const char *fmt, ...) { + struct va_format vaf; va_list args; va_start(args, fmt); - printk("%sEXT3-fs (%s): ", prefix, sb->s_id); - vprintk(fmt, args); - printk("\n"); + + vaf.fmt = fmt; + vaf.va = &args; + + printk("%sEXT3-fs (%s): %pV\n", prefix, sb->s_id, &vaf); + va_end(args); } @@ -195,15 +199,20 @@ static void ext3_handle_error(struct super_block *sb) sb->s_id); } -void ext3_error (struct super_block * sb, const char * function, - const char * fmt, ...) +void ext3_error(struct super_block *sb, const char *function, + const char *fmt, ...) { + struct va_format vaf; va_list args; va_start(args, fmt); - printk(KERN_CRIT "EXT3-fs error (device %s): %s: ",sb->s_id, function); - vprintk(fmt, args); - printk("\n"); + + vaf.fmt = fmt; + vaf.va = &args; + + printk(KERN_CRIT "EXT3-fs error (device %s): %s: %pV\n", + sb->s_id, function, &vaf); + va_end(args); ext3_handle_error(sb); @@ -274,15 +283,20 @@ void __ext3_std_error (struct super_block * sb, const char * function, * case we take the easy way out and panic immediately. */ -void ext3_abort (struct super_block * sb, const char * function, - const char * fmt, ...) +void ext3_abort(struct super_block *sb, const char *function, + const char *fmt, ...) { + struct va_format vaf; va_list args; va_start(args, fmt); - printk(KERN_CRIT "EXT3-fs (%s): error: %s: ", sb->s_id, function); - vprintk(fmt, args); - printk("\n"); + + vaf.fmt = fmt; + vaf.va = &args; + + printk(KERN_CRIT "EXT3-fs (%s): error: %s: %pV\n", + sb->s_id, function, &vaf); + va_end(args); if (test_opt(sb, ERRORS_PANIC)) @@ -300,16 +314,20 @@ void ext3_abort (struct super_block * sb, const char * function, journal_abort(EXT3_SB(sb)->s_journal, -EIO); } -void ext3_warning (struct super_block * sb, const char * function, - const char * fmt, ...) +void ext3_warning(struct super_block *sb, const char *function, + const char *fmt, ...) { + struct va_format vaf; va_list args; va_start(args, fmt); - printk(KERN_WARNING "EXT3-fs (%s): warning: %s: ", - sb->s_id, function); - vprintk(fmt, args); - printk("\n"); + + vaf.fmt = fmt; + vaf.va = &args; + + printk(KERN_WARNING "EXT3-fs (%s): warning: %s: %pV\n", + sb->s_id, function, &vaf); + va_end(args); } -- cgit v1.1 From 2b543edae2d9161ae8dda1d85cbd28ef8a166cc0 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 16 Nov 2010 20:18:12 +0900 Subject: ext3: Add error check in ext3_mkdir() Check return value of ext3_journal_get_write_access, ext3_journal_dirty_metadata and ext3_mark_inode_dirty. Consolidate error path under new label 'out_clear_inode' and adjust bh releasing appropriately. Signed-off-by: Namhyung Kim Signed-off-by: Jan Kara --- fs/ext3/namei.c | 36 ++++++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 14 deletions(-) (limited to 'fs/ext3') diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index bce9dce..03fccc5 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -1762,7 +1762,7 @@ static int ext3_mkdir(struct inode * dir, struct dentry * dentry, int mode) { handle_t *handle; struct inode * inode; - struct buffer_head * dir_block; + struct buffer_head * dir_block = NULL; struct ext3_dir_entry_2 * de; int err, retries = 0; @@ -1790,15 +1790,14 @@ retry: inode->i_fop = &ext3_dir_operations; inode->i_size = EXT3_I(inode)->i_disksize = inode->i_sb->s_blocksize; dir_block = ext3_bread (handle, inode, 0, 1, &err); - if (!dir_block) { - drop_nlink(inode); /* is this nlink == 0? */ - unlock_new_inode(inode); - ext3_mark_inode_dirty(handle, inode); - iput (inode); - goto out_stop; - } + if (!dir_block) + goto out_clear_inode; + BUFFER_TRACE(dir_block, "get_write_access"); - ext3_journal_get_write_access(handle, dir_block); + err = ext3_journal_get_write_access(handle, dir_block); + if (err) + goto out_clear_inode; + de = (struct ext3_dir_entry_2 *) dir_block->b_data; de->inode = cpu_to_le32(inode->i_ino); de->name_len = 1; @@ -1814,11 +1813,16 @@ retry: ext3_set_de_type(dir->i_sb, de, S_IFDIR); inode->i_nlink = 2; BUFFER_TRACE(dir_block, "call ext3_journal_dirty_metadata"); - ext3_journal_dirty_metadata(handle, dir_block); - brelse (dir_block); - ext3_mark_inode_dirty(handle, inode); - err = ext3_add_entry (handle, dentry, inode); + err = ext3_journal_dirty_metadata(handle, dir_block); + if (err) + goto out_clear_inode; + + err = ext3_mark_inode_dirty(handle, inode); + if (!err) + err = ext3_add_entry (handle, dentry, inode); + if (err) { +out_clear_inode: inode->i_nlink = 0; unlock_new_inode(inode); ext3_mark_inode_dirty(handle, inode); @@ -1827,10 +1831,14 @@ retry: } inc_nlink(dir); ext3_update_dx_flag(dir); - ext3_mark_inode_dirty(handle, dir); + err = ext3_mark_inode_dirty(handle, dir); + if (err) + goto out_clear_inode; + d_instantiate(dentry, inode); unlock_new_inode(inode); out_stop: + brelse(dir_block); ext3_journal_stop(handle); if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries)) goto retry; -- cgit v1.1 From fbcae8e32d73ad6cad9c5721881350c51174d552 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 19 Nov 2010 16:28:35 +0900 Subject: ext3: Add journal error check into ext3_delete_entry() Check return value of ext3_journal_get_write_access() and ext3_journal_dirty_metadata(). Signed-off-by: Namhyung Kim Signed-off-by: Jan Kara --- fs/ext3/namei.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'fs/ext3') diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index 03fccc5..672cea1 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -1644,8 +1644,13 @@ static int ext3_delete_entry (handle_t *handle, if (!ext3_check_dir_entry("ext3_delete_entry", dir, de, bh, i)) return -EIO; if (de == de_del) { + int err; + BUFFER_TRACE(bh, "get_write_access"); - ext3_journal_get_write_access(handle, bh); + err = ext3_journal_get_write_access(handle, bh); + if (err) + goto journal_error; + if (pde) pde->rec_len = ext3_rec_len_to_disk( ext3_rec_len_from_disk(pde->rec_len) + @@ -1654,7 +1659,12 @@ static int ext3_delete_entry (handle_t *handle, de->inode = 0; dir->i_version++; BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); - ext3_journal_dirty_metadata(handle, bh); + err = ext3_journal_dirty_metadata(handle, bh); + if (err) { +journal_error: + ext3_std_error(dir->i_sb, err); + return err; + } return 0; } i += ext3_rec_len_from_disk(de->rec_len); -- cgit v1.1 From ad692bf3ea035fa5a9d56462cf3df97d9607cced Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 16 Nov 2010 22:57:44 +0000 Subject: ext3: Return error code from generic_check_addressable ext3_fill_super should return the error code that generic_check_accessible returns when an error condition occurs. Signed-off-by: Darrick J. Wong Signed-off-by: Jan Kara --- fs/ext3/super.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'fs/ext3') diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 0e9cbc3..ebc3a9c 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -1859,13 +1859,15 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) goto failed_mount; } - if (generic_check_addressable(sb->s_blocksize_bits, - le32_to_cpu(es->s_blocks_count))) { + err = generic_check_addressable(sb->s_blocksize_bits, + le32_to_cpu(es->s_blocks_count)); + if (err) { ext3_msg(sb, KERN_ERR, "error: filesystem is too large to mount safely"); if (sizeof(sector_t) < 8) ext3_msg(sb, KERN_ERR, "error: CONFIG_LBDAF not enabled"); + ret = err; goto failed_mount; } -- cgit v1.1 From f0cad89f5e8ef8b6d0c065115565524137e44f0b Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 16 Oct 2010 19:36:59 -0400 Subject: ext3: Avoid uninitialized memory references with a corrupted htree directory If the first htree directory is missing '.' or '..' but is otherwise a valid directory, and we do a lookup for '.' or '..', it's possible to dereference an uninitialized memory pointer in ext3_htree_next_block(). Avoid this. We avoid this by moving the special case from ext3_dx_find_entry() to ext3_find_entry(); this also means we can optimize ext3_find_entry() slightly when NFS looks up "..". Thanks to Brad Spengler for pointing a Clang warning that led me to look more closely at this code. The warning was harmless, but it was useful in pointing out code that was too ugly to live. This warning was also reported by Roman Borisov. Signed-off-by: "Theodore Ts'o" Cc: Jan Kara Cc: Brad Spengler Signed-off-by: Jan Kara --- fs/ext3/namei.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) (limited to 'fs/ext3') diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index 672cea1..d093cbb 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -858,6 +858,7 @@ static struct buffer_head *ext3_find_entry(struct inode *dir, struct buffer_head * bh_use[NAMEI_RA_SIZE]; struct buffer_head * bh, *ret = NULL; unsigned long start, block, b; + const u8 *name = entry->name; int ra_max = 0; /* Number of bh's in the readahead buffer, bh_use[] */ int ra_ptr = 0; /* Current index into readahead @@ -871,6 +872,16 @@ static struct buffer_head *ext3_find_entry(struct inode *dir, namelen = entry->len; if (namelen > EXT3_NAME_LEN) return NULL; + if ((namelen <= 2) && (name[0] == '.') && + (name[1] == '.' || name[1] == 0)) { + /* + * "." or ".." will only be in the first block + * NFS may look up ".."; "." should be handled by the VFS + */ + block = start = 0; + nblocks = 1; + goto restart; + } if (is_dx(dir)) { bh = ext3_dx_find_entry(dir, entry, res_dir, &err); /* @@ -961,9 +972,8 @@ static struct buffer_head * ext3_dx_find_entry(struct inode *dir, struct qstr *entry, struct ext3_dir_entry_2 **res_dir, int *err) { - struct super_block * sb; + struct super_block *sb = dir->i_sb; struct dx_hash_info hinfo; - u32 hash; struct dx_frame frames[2], *frame; struct ext3_dir_entry_2 *de, *top; struct buffer_head *bh; @@ -972,18 +982,8 @@ static struct buffer_head * ext3_dx_find_entry(struct inode *dir, int namelen = entry->len; const u8 *name = entry->name; - sb = dir->i_sb; - /* NFS may look up ".." - look at dx_root directory block */ - if (namelen > 2 || name[0] != '.'|| (namelen == 2 && name[1] != '.')) { - if (!(frame = dx_probe(entry, dir, &hinfo, frames, err))) - return NULL; - } else { - frame = frames; - frame->bh = NULL; /* for dx_release() */ - frame->at = (struct dx_entry *)frames; /* hack for zero entry*/ - dx_set_block(frame->at, 0); /* dx_root block is 0 */ - } - hash = hinfo.hash; + if (!(frame = dx_probe(entry, dir, &hinfo, frames, err))) + return NULL; do { block = dx_get_block(frame->at); if (!(bh = ext3_bread (NULL,dir, block, 0, err))) @@ -1009,7 +1009,7 @@ static struct buffer_head * ext3_dx_find_entry(struct inode *dir, } brelse (bh); /* Check to see if we should continue to search */ - retval = ext3_htree_next_block(dir, hash, frame, + retval = ext3_htree_next_block(dir, hinfo.hash, frame, frames, NULL); if (retval < 0) { ext3_warning(sb, __func__, -- cgit v1.1 From 5026e90b86684bc878e4db0a8cd043fed769719c Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 16 Oct 2010 19:37:00 -0400 Subject: ext3: Use search_dirblock() in ext3_dx_find_entry() Use the search_dirblock() in ext3_dx_find_entry(). It makes the code easier to read, and it takes advantage of common code. It also saves 100 bytes or so of text space. Signed-off-by: "Theodore Ts'o" Cc: Brad Spengler Signed-off-by: Jan Kara --- fs/ext3/namei.c | 33 ++++++++++++--------------------- 1 file changed, 12 insertions(+), 21 deletions(-) (limited to 'fs/ext3') diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index d093cbb..9cc0b2c 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -975,12 +975,9 @@ static struct buffer_head * ext3_dx_find_entry(struct inode *dir, struct super_block *sb = dir->i_sb; struct dx_hash_info hinfo; struct dx_frame frames[2], *frame; - struct ext3_dir_entry_2 *de, *top; struct buffer_head *bh; unsigned long block; int retval; - int namelen = entry->len; - const u8 *name = entry->name; if (!(frame = dx_probe(entry, dir, &hinfo, frames, err))) return NULL; @@ -988,26 +985,20 @@ static struct buffer_head * ext3_dx_find_entry(struct inode *dir, block = dx_get_block(frame->at); if (!(bh = ext3_bread (NULL,dir, block, 0, err))) goto errout; - de = (struct ext3_dir_entry_2 *) bh->b_data; - top = (struct ext3_dir_entry_2 *) ((char *) de + sb->s_blocksize - - EXT3_DIR_REC_LEN(0)); - for (; de < top; de = ext3_next_entry(de)) { - int off = (block << EXT3_BLOCK_SIZE_BITS(sb)) - + ((char *) de - bh->b_data); - - if (!ext3_check_dir_entry(__func__, dir, de, bh, off)) { - brelse(bh); - *err = ERR_BAD_DX_DIR; - goto errout; - } - if (ext3_match(namelen, name, de)) { - *res_dir = de; - dx_release(frames); - return bh; - } + retval = search_dirblock(bh, dir, entry, + block << EXT3_BLOCK_SIZE_BITS(sb), + res_dir); + if (retval == 1) { + dx_release(frames); + return bh; } - brelse (bh); + brelse(bh); + if (retval == -1) { + *err = ERR_BAD_DX_DIR; + goto errout; + } + /* Check to see if we should continue to search */ retval = ext3_htree_next_block(dir, hinfo.hash, frame, frames, NULL); -- cgit v1.1 From ad1857a0e0cb29313efae3bb69c913b2c3c833a1 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 23 Nov 2010 13:30:33 +0900 Subject: ext3: Add journal error check into ext3_rename() Check return value of ext3_journal_get_write_access() and ext3_journal_dirty_metadata(). Signed-off-by: Namhyung Kim Signed-off-by: Jan Kara --- fs/ext3/namei.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) (limited to 'fs/ext3') diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index 9cc0b2c..e69eed5 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -2362,7 +2362,9 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry, goto end_rename; } else { BUFFER_TRACE(new_bh, "get write access"); - ext3_journal_get_write_access(handle, new_bh); + retval = ext3_journal_get_write_access(handle, new_bh); + if (retval) + goto journal_error; new_de->inode = cpu_to_le32(old_inode->i_ino); if (EXT3_HAS_INCOMPAT_FEATURE(new_dir->i_sb, EXT3_FEATURE_INCOMPAT_FILETYPE)) @@ -2371,7 +2373,9 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry, new_dir->i_ctime = new_dir->i_mtime = CURRENT_TIME_SEC; ext3_mark_inode_dirty(handle, new_dir); BUFFER_TRACE(new_bh, "call ext3_journal_dirty_metadata"); - ext3_journal_dirty_metadata(handle, new_bh); + retval = ext3_journal_dirty_metadata(handle, new_bh); + if (retval) + goto journal_error; brelse(new_bh); new_bh = NULL; } @@ -2420,10 +2424,17 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry, ext3_update_dx_flag(old_dir); if (dir_bh) { BUFFER_TRACE(dir_bh, "get_write_access"); - ext3_journal_get_write_access(handle, dir_bh); + retval = ext3_journal_get_write_access(handle, dir_bh); + if (retval) + goto journal_error; PARENT_INO(dir_bh->b_data) = cpu_to_le32(new_dir->i_ino); BUFFER_TRACE(dir_bh, "call ext3_journal_dirty_metadata"); - ext3_journal_dirty_metadata(handle, dir_bh); + retval = ext3_journal_dirty_metadata(handle, dir_bh); + if (retval) { +journal_error: + ext3_std_error(new_dir->i_sb, retval); + goto end_rename; + } drop_nlink(old_dir); if (new_inode) { drop_nlink(new_inode); -- cgit v1.1 From b853b96b1dbdc05fc8eae141a595366d8172962b Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Mon, 22 Nov 2010 12:29:17 +0100 Subject: ext3: Add batched discard support for ext3 Walk through allocation groups and trim all free extents. It can be invoked through FITRIM ioctl on the file system. The main idea is to provide a way to trim the whole file system if needed, since some SSD's may suffer from performance loss after the whole device was filled (it does not mean that fs is full!). It search for free extents in allocation groups specified by Byte range start -> start+len. When the free extent is within this range, blocks are marked as used and then trimmed. Afterwards these blocks are marked as free in per-group bitmap. [JK: Fixed up error handling and trimming of a single group] Signed-off-by: Lukas Czerner Reviewed-by: Jan Kara Reviewed-by: Dmitry Monakhov Signed-off-by: Jan Kara --- fs/ext3/balloc.c | 266 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 266 insertions(+) (limited to 'fs/ext3') diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c index b3db226..045995c 100644 --- a/fs/ext3/balloc.c +++ b/fs/ext3/balloc.c @@ -20,6 +20,7 @@ #include #include #include +#include /* * balloc.c contains the blocks allocation and deallocation routines @@ -39,6 +40,21 @@ #define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) +/* + * Calculate the block group number and offset, given a block number + */ +static void ext3_get_group_no_and_offset(struct super_block *sb, + ext3_fsblk_t blocknr, unsigned long *blockgrpp, ext3_grpblk_t *offsetp) +{ + struct ext3_super_block *es = EXT3_SB(sb)->s_es; + + blocknr = blocknr - le32_to_cpu(es->s_first_data_block); + if (offsetp) + *offsetp = blocknr % EXT3_BLOCKS_PER_GROUP(sb); + if (blockgrpp) + *blockgrpp = blocknr / EXT3_BLOCKS_PER_GROUP(sb); +} + /** * ext3_get_group_desc() -- load group descriptor from disk * @sb: super block @@ -1885,3 +1901,253 @@ unsigned long ext3_bg_num_gdb(struct super_block *sb, int group) return ext3_bg_num_gdb_meta(sb,group); } + +/** + * ext3_trim_all_free -- function to trim all free space in alloc. group + * @sb: super block for file system + * @group: allocation group to trim + * @start: first group block to examine + * @max: last group block to examine + * @gdp: allocation group description structure + * @minblocks: minimum extent block count + * + * ext3_trim_all_free walks through group's block bitmap searching for free + * blocks. When the free block is found, it tries to allocate this block and + * consequent free block to get the biggest free extent possible, until it + * reaches any used block. Then issue a TRIM command on this extent and free + * the extent in the block bitmap. This is done until whole group is scanned. + */ +ext3_grpblk_t ext3_trim_all_free(struct super_block *sb, unsigned int group, + ext3_grpblk_t start, ext3_grpblk_t max, + ext3_grpblk_t minblocks) +{ + handle_t *handle; + ext3_grpblk_t next, free_blocks, bit, freed, count = 0; + ext3_fsblk_t discard_block; + struct ext3_sb_info *sbi; + struct buffer_head *gdp_bh, *bitmap_bh = NULL; + struct ext3_group_desc *gdp; + int err = 0, ret = 0; + + /* + * We will update one block bitmap, and one group descriptor + */ + handle = ext3_journal_start_sb(sb, 2); + if (IS_ERR(handle)) + return PTR_ERR(handle); + + bitmap_bh = read_block_bitmap(sb, group); + if (!bitmap_bh) { + err = -EIO; + goto err_out; + } + + BUFFER_TRACE(bitmap_bh, "getting undo access"); + err = ext3_journal_get_undo_access(handle, bitmap_bh); + if (err) + goto err_out; + + gdp = ext3_get_group_desc(sb, group, &gdp_bh); + if (!gdp) { + err = -EIO; + goto err_out; + } + + BUFFER_TRACE(gdp_bh, "get_write_access"); + err = ext3_journal_get_write_access(handle, gdp_bh); + if (err) + goto err_out; + + free_blocks = le16_to_cpu(gdp->bg_free_blocks_count); + sbi = EXT3_SB(sb); + + /* Walk through the whole group */ + while (start < max) { + start = bitmap_search_next_usable_block(start, bitmap_bh, max); + if (start < 0) + break; + next = start; + + /* + * Allocate contiguous free extents by setting bits in the + * block bitmap + */ + while (next < max + && claim_block(sb_bgl_lock(sbi, group), + next, bitmap_bh)) { + next++; + } + + /* We did not claim any blocks */ + if (next == start) + continue; + + discard_block = (ext3_fsblk_t)start + + ext3_group_first_block_no(sb, group); + + /* Update counters */ + spin_lock(sb_bgl_lock(sbi, group)); + le16_add_cpu(&gdp->bg_free_blocks_count, start - next); + spin_unlock(sb_bgl_lock(sbi, group)); + percpu_counter_sub(&sbi->s_freeblocks_counter, next - start); + + /* Do not issue a TRIM on extents smaller than minblocks */ + if ((next - start) < minblocks) + goto free_extent; + + /* Send the TRIM command down to the device */ + err = sb_issue_discard(sb, discard_block, next - start, + GFP_NOFS, 0); + count += (next - start); +free_extent: + freed = 0; + + /* + * Clear bits in the bitmap + */ + for (bit = start; bit < next; bit++) { + BUFFER_TRACE(bitmap_bh, "clear bit"); + if (!ext3_clear_bit_atomic(sb_bgl_lock(sbi, group), + bit, bitmap_bh->b_data)) { + ext3_error(sb, __func__, + "bit already cleared for block "E3FSBLK, + (unsigned long)bit); + BUFFER_TRACE(bitmap_bh, "bit already cleared"); + } else { + freed++; + } + } + + /* Update couters */ + spin_lock(sb_bgl_lock(sbi, group)); + le16_add_cpu(&gdp->bg_free_blocks_count, freed); + spin_unlock(sb_bgl_lock(sbi, group)); + percpu_counter_add(&sbi->s_freeblocks_counter, freed); + + start = next; + if (err < 0) { + if (err != -EOPNOTSUPP) + ext3_warning(sb, __func__, "Discard command " + "returned error %d\n", err); + break; + } + + if (fatal_signal_pending(current)) { + err = -ERESTARTSYS; + break; + } + + cond_resched(); + + /* No more suitable extents */ + if ((free_blocks - count) < minblocks) + break; + } + + /* We dirtied the bitmap block */ + BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); + ret = ext3_journal_dirty_metadata(handle, bitmap_bh); + if (!err) + err = ret; + + /* And the group descriptor block */ + BUFFER_TRACE(gdp_bh, "dirtied group descriptor block"); + ret = ext3_journal_dirty_metadata(handle, gdp_bh); + if (!err) + err = ret; + + ext3_debug("trimmed %d blocks in the group %d\n", + count, group); + +err_out: + if (err) + count = err; + ext3_journal_stop(handle); + brelse(bitmap_bh); + + return count; +} + +/** + * ext3_trim_fs() -- trim ioctl handle function + * @sb: superblock for filesystem + * @start: First Byte to trim + * @len: number of Bytes to trim from start + * @minlen: minimum extent length in Bytes + * + * ext3_trim_fs goes through all allocation groups containing Bytes from + * start to start+len. For each such a group ext3_trim_all_free function + * is invoked to trim all free space. + */ +int ext3_trim_fs(struct super_block *sb, struct fstrim_range *range) +{ + ext3_grpblk_t last_block, first_block, free_blocks; + unsigned long first_group, last_group; + unsigned long group, ngroups; + struct ext3_group_desc *gdp; + struct ext3_super_block *es = EXT3_SB(sb)->s_es; + uint64_t start, len, minlen, trimmed; + ext3_fsblk_t max_blks = le32_to_cpu(es->s_blocks_count); + int ret = 0; + + start = range->start >> sb->s_blocksize_bits; + len = range->len >> sb->s_blocksize_bits; + minlen = range->minlen >> sb->s_blocksize_bits; + trimmed = 0; + + if (unlikely(minlen > EXT3_BLOCKS_PER_GROUP(sb))) + return -EINVAL; + if (start >= max_blks) + goto out; + if (start < le32_to_cpu(es->s_first_data_block)) { + len -= le32_to_cpu(es->s_first_data_block) - start; + start = le32_to_cpu(es->s_first_data_block); + } + if (start + len > max_blks) + len = max_blks - start; + + ngroups = EXT3_SB(sb)->s_groups_count; + smp_rmb(); + + /* Determine first and last group to examine based on start and len */ + ext3_get_group_no_and_offset(sb, (ext3_fsblk_t) start, + &first_group, &first_block); + ext3_get_group_no_and_offset(sb, (ext3_fsblk_t) (start + len), + &last_group, &last_block); + last_group = (last_group > ngroups - 1) ? ngroups - 1 : last_group; + last_block = EXT3_BLOCKS_PER_GROUP(sb); + + if (first_group > last_group) + return -EINVAL; + + for (group = first_group; group <= last_group; group++) { + gdp = ext3_get_group_desc(sb, group, NULL); + if (!gdp) + break; + + free_blocks = le16_to_cpu(gdp->bg_free_blocks_count); + if (free_blocks < minlen) + continue; + + if (len >= EXT3_BLOCKS_PER_GROUP(sb)) + len -= (EXT3_BLOCKS_PER_GROUP(sb) - first_block); + else + last_block = first_block + len; + + ret = ext3_trim_all_free(sb, group, first_block, + last_block, minlen); + if (ret < 0) + break; + + trimmed += ret; + first_block = 0; + } + + if (ret >= 0) + ret = 0; + +out: + range->len = trimmed * sb->s_blocksize; + + return ret; +} -- cgit v1.1 From 9c52749232b5cef506877ac633ea14083bd17e02 Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Mon, 22 Nov 2010 12:29:18 +0100 Subject: ext3: Add FITRIM handling The ioctl takes fstrim_range structure (defined in include/linux/fs.h) as an argument specifying a range of filesystem to trim and the minimum size of an continguous extent to trim. After the FITRIM is done, the number of bytes passed from the filesystem down the block stack to the device for potential discard is stored in fstrim_range.len. This number is a maximum discard amount from the storage device's perspective, because FITRIM called repeatedly will keep sending the same sectors for discard. fstrim_range.len will report the same potential discard bytes each time, but only sectors which had been written to between the discards would actually be discarded by the storage device. Further, the kernel block layer reserves the right to adjust the discard ranges to fit raid stripe geometry, non-trim capable devices in a LVM setup, etc. These reductions would not be reflected in fstrim_range.len. Thus fstrim_range.len can give the user better insight on how much storage space has potentially been released for wear-leveling, but it needs to be one of only one criteria the userspace tools take into account when trying to optimize calls to FITRIM. Thanks to Greg Freemyer for better commit message. Signed-off-by: Lukas Czerner Signed-off-by: Jan Kara --- fs/ext3/ioctl.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'fs/ext3') diff --git a/fs/ext3/ioctl.c b/fs/ext3/ioctl.c index 8897481..fc080dd 100644 --- a/fs/ext3/ioctl.c +++ b/fs/ext3/ioctl.c @@ -276,7 +276,29 @@ group_add_out: mnt_drop_write(filp->f_path.mnt); return err; } + case FITRIM: { + struct super_block *sb = inode->i_sb; + struct fstrim_range range; + int ret = 0; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (copy_from_user(&range, (struct fstrim_range *)arg, + sizeof(range))) + return -EFAULT; + + ret = ext3_trim_fs(sb, &range); + if (ret < 0) + return ret; + + if (copy_to_user((struct fstrim_range *)arg, &range, + sizeof(range))) + return -EFAULT; + + return 0; + } default: return -ENOTTY; -- cgit v1.1 From 41dc6385bd6cd3366c1b4bede33688521eb21db9 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 25 Nov 2010 01:53:12 +0900 Subject: ext3: Add journal error check in resize.c Check return value of ext3_journal_get_write_access() and ext3_journal_dirty_metadata(). Signed-off-by: Namhyung Kim Signed-off-by: Jan Kara --- fs/ext3/resize.c | 65 ++++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 51 insertions(+), 14 deletions(-) (limited to 'fs/ext3') diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c index e746d30..108b142 100644 --- a/fs/ext3/resize.c +++ b/fs/ext3/resize.c @@ -249,7 +249,11 @@ static int setup_new_group_blocks(struct super_block *sb, memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, gdb->b_size); set_buffer_uptodate(gdb); unlock_buffer(gdb); - ext3_journal_dirty_metadata(handle, gdb); + err = ext3_journal_dirty_metadata(handle, gdb); + if (err) { + brelse(gdb); + goto exit_bh; + } ext3_set_bit(bit, bh->b_data); brelse(gdb); } @@ -269,7 +273,11 @@ static int setup_new_group_blocks(struct super_block *sb, err = PTR_ERR(gdb); goto exit_bh; } - ext3_journal_dirty_metadata(handle, gdb); + err = ext3_journal_dirty_metadata(handle, gdb); + if (err) { + brelse(gdb); + goto exit_bh; + } ext3_set_bit(bit, bh->b_data); brelse(gdb); } @@ -295,7 +303,11 @@ static int setup_new_group_blocks(struct super_block *sb, err = PTR_ERR(it); goto exit_bh; } - ext3_journal_dirty_metadata(handle, it); + err = ext3_journal_dirty_metadata(handle, it); + if (err) { + brelse(it); + goto exit_bh; + } brelse(it); ext3_set_bit(bit, bh->b_data); } @@ -306,7 +318,9 @@ static int setup_new_group_blocks(struct super_block *sb, mark_bitmap_end(input->blocks_count, EXT3_BLOCKS_PER_GROUP(sb), bh->b_data); - ext3_journal_dirty_metadata(handle, bh); + err = ext3_journal_dirty_metadata(handle, bh); + if (err) + goto exit_bh; brelse(bh); /* Mark unused entries in inode bitmap used */ @@ -319,7 +333,7 @@ static int setup_new_group_blocks(struct super_block *sb, mark_bitmap_end(EXT3_INODES_PER_GROUP(sb), EXT3_BLOCKS_PER_GROUP(sb), bh->b_data); - ext3_journal_dirty_metadata(handle, bh); + err = ext3_journal_dirty_metadata(handle, bh); exit_bh: brelse(bh); @@ -503,12 +517,19 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, * reserved inode, and will become GDT blocks (primary and backup). */ data[gdb_num % EXT3_ADDR_PER_BLOCK(sb)] = 0; - ext3_journal_dirty_metadata(handle, dind); + err = ext3_journal_dirty_metadata(handle, dind); + if (err) + goto exit_group_desc; brelse(dind); + dind = NULL; inode->i_blocks -= (gdbackups + 1) * sb->s_blocksize >> 9; - ext3_mark_iloc_dirty(handle, inode, &iloc); + err = ext3_mark_iloc_dirty(handle, inode, &iloc); + if (err) + goto exit_group_desc; memset((*primary)->b_data, 0, sb->s_blocksize); - ext3_journal_dirty_metadata(handle, *primary); + err = ext3_journal_dirty_metadata(handle, *primary); + if (err) + goto exit_group_desc; o_group_desc = EXT3_SB(sb)->s_group_desc; memcpy(n_group_desc, o_group_desc, @@ -519,10 +540,14 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, kfree(o_group_desc); le16_add_cpu(&es->s_reserved_gdt_blocks, -1); - ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); + err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); + if (err) + goto exit_inode; return 0; +exit_group_desc: + kfree(n_group_desc); exit_inode: //ext3_journal_release_buffer(handle, iloc.bh); brelse(iloc.bh); @@ -706,16 +731,20 @@ static void update_backups(struct super_block *sb, } ext3_debug("update metadata backup %#04lx\n", (unsigned long)bh->b_blocknr); - if ((err = ext3_journal_get_write_access(handle, bh))) + if ((err = ext3_journal_get_write_access(handle, bh))) { + brelse(bh); break; + } lock_buffer(bh); memcpy(bh->b_data, data, size); if (rest) memset(bh->b_data + size, 0, rest); set_buffer_uptodate(bh); unlock_buffer(bh); - ext3_journal_dirty_metadata(handle, bh); + err = ext3_journal_dirty_metadata(handle, bh); brelse(bh); + if (err) + break; } if ((err2 = ext3_journal_stop(handle)) && !err) err = err2; @@ -922,7 +951,9 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input) /* Update the global fs size fields */ sbi->s_groups_count++; - ext3_journal_dirty_metadata(handle, primary); + err = ext3_journal_dirty_metadata(handle, primary); + if (err) + goto exit_journal; /* Update the reserved block counts only once the new group is * active. */ @@ -934,7 +965,7 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input) percpu_counter_add(&sbi->s_freeinodes_counter, EXT3_INODES_PER_GROUP(sb)); - ext3_journal_dirty_metadata(handle, sbi->s_sbh); + err = ext3_journal_dirty_metadata(handle, sbi->s_sbh); exit_journal: mutex_unlock(&sbi->s_resize_lock); @@ -1064,8 +1095,14 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es, goto exit_put; } es->s_blocks_count = cpu_to_le32(o_blocks_count + add); - ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); + err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); mutex_unlock(&EXT3_SB(sb)->s_resize_lock); + if (err) { + ext3_warning(sb, __func__, + "error %d on journal dirty metadata", err); + ext3_journal_stop(handle); + goto exit_put; + } ext3_debug("freeing blocks "E3FSBLK" through "E3FSBLK"\n", o_blocks_count, o_blocks_count + add); ext3_free_blocks_sb(handle, sb, o_blocks_count, add, &freed_blocks); -- cgit v1.1 From 156e74312f1ffc0a2639c24c771c5a0e106f0505 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 25 Nov 2010 01:53:13 +0900 Subject: ext3: Add more journal error check Check return value of ext3_journal_get_write_acccess() and ext3_journal_dirty_metadata(). Signed-off-by: Namhyung Kim Signed-off-by: Jan Kara --- fs/ext3/inode.c | 6 ++++-- fs/ext3/namei.c | 4 +++- 2 files changed, 7 insertions(+), 3 deletions(-) (limited to 'fs/ext3') diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index a958061..ae94f6d 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -2145,13 +2145,15 @@ static void ext3_clear_blocks(handle_t *handle, struct inode *inode, if (try_to_extend_transaction(handle, inode)) { if (bh) { BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); - ext3_journal_dirty_metadata(handle, bh); + if (ext3_journal_dirty_metadata(handle, bh)) + return; } ext3_mark_inode_dirty(handle, inode); truncate_restart_transaction(handle, inode); if (bh) { BUFFER_TRACE(bh, "retaking write access"); - ext3_journal_get_write_access(handle, bh); + if (ext3_journal_get_write_access(handle, bh)) + return; } } diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index e69eed5..cc682ab 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -1598,7 +1598,9 @@ static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry, if (err) goto journal_error; } - ext3_journal_dirty_metadata(handle, frames[0].bh); + err = ext3_journal_dirty_metadata(handle, frames[0].bh); + if (err) + goto journal_error; } de = do_split(handle, dir, &bh, frame, &hinfo, &err); if (!de) -- cgit v1.1 From a4ae3094869f18e26ece25ad175bbe4cd740e60b Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Tue, 7 Dec 2010 11:55:27 -0600 Subject: ext3: speed up file creates by optimizing rec_len functions The addition of 64k block capability in the rec_len_from_disk and rec_len_to_disk functions added a bit of math overhead which slows down file create workloads needlessly when the architecture cannot even support 64k blocks, thanks to page size limits. Similar changes already exist in the ext4 codebase. The directory entry checking can also be optimized a bit by sprinkling in some unlikely() conditions to move the error handling out of line. bonnie++ sequential file creates on a 512MB ramdisk speeds up from about 77,000/s to about 82,000/s, about a 6% improvement. Signed-off-by: Eric Sandeen Signed-off-by: Jan Kara --- fs/ext3/dir.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'fs/ext3') diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c index e2e72c3..34f0a07 100644 --- a/fs/ext3/dir.c +++ b/fs/ext3/dir.c @@ -69,25 +69,26 @@ int ext3_check_dir_entry (const char * function, struct inode * dir, const char * error_msg = NULL; const int rlen = ext3_rec_len_from_disk(de->rec_len); - if (rlen < EXT3_DIR_REC_LEN(1)) + if (unlikely(rlen < EXT3_DIR_REC_LEN(1))) error_msg = "rec_len is smaller than minimal"; - else if (rlen % 4 != 0) + else if (unlikely(rlen % 4 != 0)) error_msg = "rec_len % 4 != 0"; - else if (rlen < EXT3_DIR_REC_LEN(de->name_len)) + else if (unlikely(rlen < EXT3_DIR_REC_LEN(de->name_len))) error_msg = "rec_len is too small for name_len"; - else if (((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize) + else if (unlikely((((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize))) error_msg = "directory entry across blocks"; - else if (le32_to_cpu(de->inode) > - le32_to_cpu(EXT3_SB(dir->i_sb)->s_es->s_inodes_count)) + else if (unlikely(le32_to_cpu(de->inode) > + le32_to_cpu(EXT3_SB(dir->i_sb)->s_es->s_inodes_count))) error_msg = "inode out of bounds"; - if (error_msg != NULL) + if (unlikely(error_msg != NULL)) ext3_error (dir->i_sb, function, "bad entry in directory #%lu: %s - " "offset=%lu, inode=%lu, rec_len=%d, name_len=%d", dir->i_ino, error_msg, offset, (unsigned long) le32_to_cpu(de->inode), rlen, de->name_len); + return error_msg == NULL ? 1 : 0; } -- cgit v1.1 From 8057b9653923bd762d89ccb730c76cba40ce96f0 Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Thu, 9 Dec 2010 15:39:34 +0100 Subject: ext3: Remove redundant unlikely() IS_ERR() already implies unlikely(), so it can be omitted here. Signed-off-by: Tobias Klauser Signed-off-by: Jan Kara --- fs/ext3/namei.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/ext3') diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index cc682ab..b27ba71 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -1038,7 +1038,7 @@ static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry, str return ERR_PTR(-EIO); } inode = ext3_iget(dir->i_sb, ino); - if (unlikely(IS_ERR(inode))) { + if (IS_ERR(inode)) { if (PTR_ERR(inode) == -ESTALE) { ext3_error(dir->i_sb, __func__, "deleted inode referenced: %lu", -- cgit v1.1