diff options
Diffstat (limited to 'fs/ntfs/aops.c')
-rw-r--r-- | fs/ntfs/aops.c | 294 |
1 files changed, 170 insertions, 124 deletions
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c index 78adad7..b6cc8cf 100644 --- a/fs/ntfs/aops.c +++ b/fs/ntfs/aops.c @@ -27,6 +27,7 @@ #include <linux/swap.h> #include <linux/buffer_head.h> #include <linux/writeback.h> +#include <linux/bit_spinlock.h> #include "aops.h" #include "attrib.h" @@ -55,9 +56,8 @@ */ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) { - static DEFINE_SPINLOCK(page_uptodate_lock); unsigned long flags; - struct buffer_head *tmp; + struct buffer_head *first, *tmp; struct page *page; ntfs_inode *ni; int page_uptodate = 1; @@ -89,11 +89,13 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) } } else { clear_buffer_uptodate(bh); + SetPageError(page); ntfs_error(ni->vol->sb, "Buffer I/O error, logical block %llu.", (unsigned long long)bh->b_blocknr); - SetPageError(page); } - spin_lock_irqsave(&page_uptodate_lock, flags); + first = page_buffers(page); + local_irq_save(flags); + bit_spin_lock(BH_Uptodate_Lock, &first->b_state); clear_buffer_async_read(bh); unlock_buffer(bh); tmp = bh; @@ -108,7 +110,8 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) } tmp = tmp->b_this_page; } while (tmp != bh); - spin_unlock_irqrestore(&page_uptodate_lock, flags); + bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); + local_irq_restore(flags); /* * If none of the buffers had errors then we can set the page uptodate, * but we first have to perform the post read mst fixups, if the @@ -141,7 +144,8 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) unlock_page(page); return; still_busy: - spin_unlock_irqrestore(&page_uptodate_lock, flags); + bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); + local_irq_restore(flags); return; } @@ -185,13 +189,15 @@ static int ntfs_read_block(struct page *page) blocksize_bits = VFS_I(ni)->i_blkbits; blocksize = 1 << blocksize_bits; - if (!page_has_buffers(page)) + if (!page_has_buffers(page)) { create_empty_buffers(page, blocksize, 0); - bh = head = page_buffers(page); - if (unlikely(!bh)) { - unlock_page(page); - return -ENOMEM; + if (unlikely(!page_has_buffers(page))) { + unlock_page(page); + return -ENOMEM; + } } + bh = head = page_buffers(page); + BUG_ON(!bh); iblock = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits); read_lock_irqsave(&ni->size_lock, flags); @@ -204,6 +210,7 @@ static int ntfs_read_block(struct page *page) nr = i = 0; do { u8 *kaddr; + int err; if (unlikely(buffer_uptodate(bh))) continue; @@ -211,6 +218,7 @@ static int ntfs_read_block(struct page *page) arr[nr++] = bh; continue; } + err = 0; bh->b_bdev = vol->sb->s_bdev; /* Is the block within the allowed limits? */ if (iblock < lblock) { @@ -252,7 +260,6 @@ lock_retry_remap: goto handle_hole; /* If first try and runlist unmapped, map and retry. */ if (!is_retry && lcn == LCN_RL_NOT_MAPPED) { - int err; is_retry = TRUE; /* * Attempt to map runlist, dropping lock for @@ -263,20 +270,30 @@ lock_retry_remap: if (likely(!err)) goto lock_retry_remap; rl = NULL; - lcn = err; } else if (!rl) up_read(&ni->runlist.lock); + /* + * If buffer is outside the runlist, treat it as a + * hole. This can happen due to concurrent truncate + * for example. + */ + if (err == -ENOENT || lcn == LCN_ENOENT) { + err = 0; + goto handle_hole; + } /* Hard error, zero out region. */ + if (!err) + err = -EIO; bh->b_blocknr = -1; SetPageError(page); ntfs_error(vol->sb, "Failed to read from inode 0x%lx, " "attribute type 0x%x, vcn 0x%llx, " "offset 0x%x because its location on " "disk could not be determined%s " - "(error code %lli).", ni->mft_no, + "(error code %i).", ni->mft_no, ni->type, (unsigned long long)vcn, vcn_ofs, is_retry ? " even after " - "retrying" : "", (long long)lcn); + "retrying" : "", err); } /* * Either iblock was outside lblock limits or @@ -289,9 +306,10 @@ handle_hole: handle_zblock: kaddr = kmap_atomic(page, KM_USER0); memset(kaddr + i * blocksize, 0, blocksize); - flush_dcache_page(page); kunmap_atomic(kaddr, KM_USER0); - set_buffer_uptodate(bh); + flush_dcache_page(page); + if (likely(!err)) + set_buffer_uptodate(bh); } while (i++, iblock++, (bh = bh->b_this_page) != head); /* Release the lock if we took it. */ @@ -367,31 +385,38 @@ retry_readpage: return 0; } ni = NTFS_I(page->mapping->host); - + /* + * Only $DATA attributes can be encrypted and only unnamed $DATA + * attributes can be compressed. Index root can have the flags set but + * this means to create compressed/encrypted files, not that the + * attribute is compressed/encrypted. + */ + if (ni->type != AT_INDEX_ROOT) { + /* If attribute is encrypted, deny access, just like NT4. */ + if (NInoEncrypted(ni)) { + BUG_ON(ni->type != AT_DATA); + err = -EACCES; + goto err_out; + } + /* Compressed data streams are handled in compress.c. */ + if (NInoNonResident(ni) && NInoCompressed(ni)) { + BUG_ON(ni->type != AT_DATA); + BUG_ON(ni->name_len); + return ntfs_read_compressed_block(page); + } + } /* NInoNonResident() == NInoIndexAllocPresent() */ if (NInoNonResident(ni)) { - /* - * Only unnamed $DATA attributes can be compressed or - * encrypted. - */ - if (ni->type == AT_DATA && !ni->name_len) { - /* If file is encrypted, deny access, just like NT4. */ - if (NInoEncrypted(ni)) { - err = -EACCES; - goto err_out; - } - /* Compressed data streams are handled in compress.c. */ - if (NInoCompressed(ni)) - return ntfs_read_compressed_block(page); - } - /* Normal data stream. */ + /* Normal, non-resident data stream. */ return ntfs_read_block(page); } /* * Attribute is resident, implying it is not compressed or encrypted. * This also means the attribute is smaller than an mft record and * hence smaller than a page, so can simply zero out any pages with - * index above 0. + * index above 0. Note the attribute can actually be marked compressed + * but if it is resident the actual data is not compressed so we are + * ok to ignore the compressed flag here. */ if (unlikely(page->index > 0)) { kaddr = kmap_atomic(page, KM_USER0); @@ -511,19 +536,21 @@ static int ntfs_write_block(struct page *page, struct writeback_control *wbc) BUG_ON(!PageUptodate(page)); create_empty_buffers(page, blocksize, (1 << BH_Uptodate) | (1 << BH_Dirty)); + if (unlikely(!page_has_buffers(page))) { + ntfs_warning(vol->sb, "Error allocating page " + "buffers. Redirtying page so we try " + "again later."); + /* + * Put the page back on mapping->dirty_pages, but leave + * its buffers' dirty state as-is. + */ + redirty_page_for_writepage(wbc, page); + unlock_page(page); + return 0; + } } bh = head = page_buffers(page); - if (unlikely(!bh)) { - ntfs_warning(vol->sb, "Error allocating page buffers. " - "Redirtying page so we try again later."); - /* - * Put the page back on mapping->dirty_pages, but leave its - * buffer's dirty state as-is. - */ - redirty_page_for_writepage(wbc, page); - unlock_page(page); - return 0; - } + BUG_ON(!bh); /* NOTE: Different naming scheme to ntfs_read_block()! */ @@ -670,6 +697,27 @@ lock_retry_remap: } /* It is a hole, need to instantiate it. */ if (lcn == LCN_HOLE) { + u8 *kaddr; + unsigned long *bpos, *bend; + + /* Check if the buffer is zero. */ + kaddr = kmap_atomic(page, KM_USER0); + bpos = (unsigned long *)(kaddr + bh_offset(bh)); + bend = (unsigned long *)((u8*)bpos + blocksize); + do { + if (unlikely(*bpos)) + break; + } while (likely(++bpos < bend)); + kunmap_atomic(kaddr, KM_USER0); + if (bpos == bend) { + /* + * Buffer is zero and sparse, no need to write + * it. + */ + bh->b_blocknr = -1; + clear_buffer_dirty(bh); + continue; + } // TODO: Instantiate the hole. // clear_buffer_new(bh); // unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr); @@ -690,20 +738,37 @@ lock_retry_remap: if (likely(!err)) goto lock_retry_remap; rl = NULL; - lcn = err; } else if (!rl) up_read(&ni->runlist.lock); + /* + * If buffer is outside the runlist, truncate has cut it out + * of the runlist. Just clean and clear the buffer and set it + * uptodate so it can get discarded by the VM. + */ + if (err == -ENOENT || lcn == LCN_ENOENT) { + u8 *kaddr; + + bh->b_blocknr = -1; + clear_buffer_dirty(bh); + kaddr = kmap_atomic(page, KM_USER0); + memset(kaddr + bh_offset(bh), 0, blocksize); + kunmap_atomic(kaddr, KM_USER0); + flush_dcache_page(page); + set_buffer_uptodate(bh); + err = 0; + continue; + } /* Failed to map the buffer, even after retrying. */ + if (!err) + err = -EIO; bh->b_blocknr = -1; ntfs_error(vol->sb, "Failed to write to inode 0x%lx, " "attribute type 0x%x, vcn 0x%llx, offset 0x%x " "because its location on disk could not be " - "determined%s (error code %lli).", ni->mft_no, + "determined%s (error code %i).", ni->mft_no, ni->type, (unsigned long long)vcn, vcn_ofs, is_retry ? " even after " - "retrying" : "", (long long)lcn); - if (!err) - err = -EIO; + "retrying" : "", err); break; } while (block++, (bh = bh->b_this_page) != head); @@ -714,7 +779,7 @@ lock_retry_remap: /* For the error case, need to reset bh to the beginning. */ bh = head; - /* Just an optimization, so ->readpage() isn't called later. */ + /* Just an optimization, so ->readpage() is not called later. */ if (unlikely(!PageUptodate(page))) { int uptodate = 1; do { @@ -730,7 +795,6 @@ lock_retry_remap: /* Setup all mapped, dirty buffers for async write i/o. */ do { - get_bh(bh); if (buffer_mapped(bh) && buffer_dirty(bh)) { lock_buffer(bh); if (test_clear_buffer_dirty(bh)) { @@ -768,14 +832,8 @@ lock_retry_remap: BUG_ON(PageWriteback(page)); set_page_writeback(page); /* Keeps try_to_free_buffers() away. */ - unlock_page(page); - /* - * Submit the prepared buffers for i/o. Note the page is unlocked, - * and the async write i/o completion handler can end_page_writeback() - * at any time after the *first* submit_bh(). So the buffers can then - * disappear... - */ + /* Submit the prepared buffers for i/o. */ need_end_writeback = TRUE; do { struct buffer_head *next = bh->b_this_page; @@ -783,9 +841,9 @@ lock_retry_remap: submit_bh(WRITE, bh); need_end_writeback = FALSE; } - put_bh(bh); bh = next; } while (bh != head); + unlock_page(page); /* If no i/o was started, need to end_page_writeback(). */ if (unlikely(need_end_writeback)) @@ -860,7 +918,6 @@ static int ntfs_write_mst_block(struct page *page, sync = (wbc->sync_mode == WB_SYNC_ALL); /* Make sure we have mapped buffers. */ - BUG_ON(!page_has_buffers(page)); bh = head = page_buffers(page); BUG_ON(!bh); @@ -1280,38 +1337,42 @@ retry_writepage: ntfs_debug("Write outside i_size - truncated?"); return 0; } + /* + * Only $DATA attributes can be encrypted and only unnamed $DATA + * attributes can be compressed. Index root can have the flags set but + * this means to create compressed/encrypted files, not that the + * attribute is compressed/encrypted. + */ + if (ni->type != AT_INDEX_ROOT) { + /* If file is encrypted, deny access, just like NT4. */ + if (NInoEncrypted(ni)) { + unlock_page(page); + BUG_ON(ni->type != AT_DATA); + ntfs_debug("Denying write access to encrypted " + "file."); + return -EACCES; + } + /* Compressed data streams are handled in compress.c. */ + if (NInoNonResident(ni) && NInoCompressed(ni)) { + BUG_ON(ni->type != AT_DATA); + BUG_ON(ni->name_len); + // TODO: Implement and replace this with + // return ntfs_write_compressed_block(page); + unlock_page(page); + ntfs_error(vi->i_sb, "Writing to compressed files is " + "not supported yet. Sorry."); + return -EOPNOTSUPP; + } + // TODO: Implement and remove this check. + if (NInoNonResident(ni) && NInoSparse(ni)) { + unlock_page(page); + ntfs_error(vi->i_sb, "Writing to sparse files is not " + "supported yet. Sorry."); + return -EOPNOTSUPP; + } + } /* NInoNonResident() == NInoIndexAllocPresent() */ if (NInoNonResident(ni)) { - /* - * Only unnamed $DATA attributes can be compressed, encrypted, - * and/or sparse. - */ - if (ni->type == AT_DATA && !ni->name_len) { - /* If file is encrypted, deny access, just like NT4. */ - if (NInoEncrypted(ni)) { - unlock_page(page); - ntfs_debug("Denying write access to encrypted " - "file."); - return -EACCES; - } - /* Compressed data streams are handled in compress.c. */ - if (NInoCompressed(ni)) { - // TODO: Implement and replace this check with - // return ntfs_write_compressed_block(page); - unlock_page(page); - ntfs_error(vi->i_sb, "Writing to compressed " - "files is not supported yet. " - "Sorry."); - return -EOPNOTSUPP; - } - // TODO: Implement and remove this check. - if (NInoSparse(ni)) { - unlock_page(page); - ntfs_error(vi->i_sb, "Writing to sparse files " - "is not supported yet. Sorry."); - return -EOPNOTSUPP; - } - } /* We have to zero every time due to mmap-at-end-of-file. */ if (page->index >= (i_size >> PAGE_CACHE_SHIFT)) { /* The page straddles i_size. */ @@ -1324,14 +1385,16 @@ retry_writepage: /* Handle mst protected attributes. */ if (NInoMstProtected(ni)) return ntfs_write_mst_block(page, wbc); - /* Normal data stream. */ + /* Normal, non-resident data stream. */ return ntfs_write_block(page, wbc); } /* - * Attribute is resident, implying it is not compressed, encrypted, - * sparse, or mst protected. This also means the attribute is smaller - * than an mft record and hence smaller than a page, so can simply - * return error on any pages with index above 0. + * Attribute is resident, implying it is not compressed, encrypted, or + * mst protected. This also means the attribute is smaller than an mft + * record and hence smaller than a page, so can simply return error on + * any pages with index above 0. Note the attribute can actually be + * marked compressed but if it is resident the actual data is not + * compressed so we are ok to ignore the compressed flag here. */ BUG_ON(page_has_buffers(page)); BUG_ON(!PageUptodate(page)); @@ -1380,30 +1443,14 @@ retry_writepage: BUG_ON(PageWriteback(page)); set_page_writeback(page); unlock_page(page); - /* - * Here, we don't need to zero the out of bounds area everytime because - * the below memcpy() already takes care of the mmap-at-end-of-file - * requirements. If the file is converted to a non-resident one, then - * the code path use is switched to the non-resident one where the - * zeroing happens on each ntfs_writepage() invocation. - * - * The above also applies nicely when i_size is decreased. - * - * When i_size is increased, the memory between the old and new i_size - * _must_ be zeroed (or overwritten with new data). Otherwise we will - * expose data to userspace/disk which should never have been exposed. - * - * FIXME: Ensure that i_size increases do the zeroing/overwriting and - * if we cannot guarantee that, then enable the zeroing below. If the - * zeroing below is enabled, we MUST move the unlock_page() from above - * to after the kunmap_atomic(), i.e. just before the - * end_page_writeback(). - * UPDATE: ntfs_prepare/commit_write() do the zeroing on i_size - * increases for resident attributes so those are ok. - * TODO: ntfs_truncate(), others? + * Here, we do not need to zero the out of bounds area everytime + * because the below memcpy() already takes care of the + * mmap-at-end-of-file requirements. If the file is converted to a + * non-resident one, then the code path use is switched to the + * non-resident one where the zeroing happens on each ntfs_writepage() + * invocation. */ - attr_len = le32_to_cpu(ctx->attr->data.resident.value_length); i_size = i_size_read(vi); if (unlikely(attr_len > i_size)) { @@ -1681,27 +1728,25 @@ lock_retry_remap: if (likely(!err)) goto lock_retry_remap; rl = NULL; - lcn = err; } else if (!rl) up_read(&ni->runlist.lock); /* * Failed to map the buffer, even after * retrying. */ + if (!err) + err = -EIO; bh->b_blocknr = -1; ntfs_error(vol->sb, "Failed to write to inode " "0x%lx, attribute type 0x%x, " "vcn 0x%llx, offset 0x%x " "because its location on disk " "could not be determined%s " - "(error code %lli).", + "(error code %i).", ni->mft_no, ni->type, (unsigned long long)vcn, vcn_ofs, is_retry ? " even " - "after retrying" : "", - (long long)lcn); - if (!err) - err = -EIO; + "after retrying" : "", err); goto err_out; } /* We now have a successful remap, i.e. lcn >= 0. */ @@ -2357,6 +2402,7 @@ void mark_ntfs_record_dirty(struct page *page, const unsigned int ofs) { buffers_to_free = bh; } bh = head = page_buffers(page); + BUG_ON(!bh); do { bh_ofs = bh_offset(bh); if (bh_ofs + bh_size <= ofs) |