diff options
author | Paul Mackerras <paulus@samba.org> | 2005-09-25 22:51:50 +1000 |
---|---|---|
committer | Paul Mackerras <paulus@samba.org> | 2005-09-25 22:51:50 +1000 |
commit | e5baa396af7560382d2cf3f0871d616b61fc284c (patch) | |
tree | 6afc166894b8c8b3b2cf6add72a726be14ae2443 /fs | |
parent | d6a4c847e43c851cc0ddf73087a730227223f989 (diff) | |
parent | ef6bd6eb90ad72ee8ee7ba8b271f27102e9a90c1 (diff) | |
download | op-kernel-dev-e5baa396af7560382d2cf3f0871d616b61fc284c.zip op-kernel-dev-e5baa396af7560382d2cf3f0871d616b61fc284c.tar.gz |
Merge from Linus' tree.
Diffstat (limited to 'fs')
-rw-r--r-- | fs/9p/conv.c | 157 | ||||
-rw-r--r-- | fs/9p/v9fs.c | 8 | ||||
-rw-r--r-- | fs/9p/vfs_inode.c | 4 | ||||
-rw-r--r-- | fs/9p/vfs_super.c | 24 | ||||
-rw-r--r-- | fs/cifs/cifsfs.c | 2 | ||||
-rw-r--r-- | fs/cifs/connect.c | 2 | ||||
-rw-r--r-- | fs/dcache.c | 3 | ||||
-rw-r--r-- | fs/ext3/balloc.c | 6 | ||||
-rw-r--r-- | fs/ext3/resize.c | 6 | ||||
-rw-r--r-- | fs/ext3/super.c | 11 | ||||
-rw-r--r-- | fs/fat/inode.c | 11 | ||||
-rw-r--r-- | fs/jfs/inode.c | 3 | ||||
-rw-r--r-- | fs/jfs/jfs_dmap.c | 2 | ||||
-rw-r--r-- | fs/jfs/jfs_txnmgr.c | 15 | ||||
-rw-r--r-- | fs/jfs/jfs_txnmgr.h | 1 | ||||
-rw-r--r-- | fs/nfs/read.c | 5 | ||||
-rw-r--r-- | fs/ntfs/ChangeLog | 2 | ||||
-rw-r--r-- | fs/ntfs/aops.c | 122 | ||||
-rw-r--r-- | fs/ntfs/inode.c | 9 | ||||
-rw-r--r-- | fs/ntfs/malloc.h | 2 | ||||
-rw-r--r-- | fs/ntfs/runlist.c | 169 | ||||
-rw-r--r-- | fs/proc/base.c | 86 |
22 files changed, 404 insertions, 246 deletions
diff --git a/fs/9p/conv.c b/fs/9p/conv.c index 1554731..18121af 100644 --- a/fs/9p/conv.c +++ b/fs/9p/conv.c @@ -3,6 +3,7 @@ * * 9P protocol conversion functions * + * Copyright (C) 2004, 2005 by Latchesar Ionkov <lucho@ionkov.net> * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> * @@ -55,66 +56,70 @@ static inline int buf_check_overflow(struct cbuf *buf) return buf->p > buf->ep; } -static inline void buf_check_size(struct cbuf *buf, int len) +static inline int buf_check_size(struct cbuf *buf, int len) { if (buf->p+len > buf->ep) { if (buf->p < buf->ep) { eprintk(KERN_ERR, "buffer overflow\n"); buf->p = buf->ep + 1; + return 0; } } + + return 1; } static inline void *buf_alloc(struct cbuf *buf, int len) { void *ret = NULL; - buf_check_size(buf, len); - ret = buf->p; - buf->p += len; + if (buf_check_size(buf, len)) { + ret = buf->p; + buf->p += len; + } return ret; } static inline void buf_put_int8(struct cbuf *buf, u8 val) { - buf_check_size(buf, 1); - - buf->p[0] = val; - buf->p++; + if (buf_check_size(buf, 1)) { + buf->p[0] = val; + buf->p++; + } } static inline void buf_put_int16(struct cbuf *buf, u16 val) { - buf_check_size(buf, 2); - - *(__le16 *) buf->p = cpu_to_le16(val); - buf->p += 2; + if (buf_check_size(buf, 2)) { + *(__le16 *) buf->p = cpu_to_le16(val); + buf->p += 2; + } } static inline void buf_put_int32(struct cbuf *buf, u32 val) { - buf_check_size(buf, 4); - - *(__le32 *)buf->p = cpu_to_le32(val); - buf->p += 4; + if (buf_check_size(buf, 4)) { + *(__le32 *)buf->p = cpu_to_le32(val); + buf->p += 4; + } } static inline void buf_put_int64(struct cbuf *buf, u64 val) { - buf_check_size(buf, 8); - - *(__le64 *)buf->p = cpu_to_le64(val); - buf->p += 8; + if (buf_check_size(buf, 8)) { + *(__le64 *)buf->p = cpu_to_le64(val); + buf->p += 8; + } } static inline void buf_put_stringn(struct cbuf *buf, const char *s, u16 slen) { - buf_check_size(buf, slen + 2); - - buf_put_int16(buf, slen); - memcpy(buf->p, s, slen); - buf->p += slen; + if (buf_check_size(buf, slen + 2)) { + buf_put_int16(buf, slen); + memcpy(buf->p, s, slen); + buf->p += slen; + } } static inline void buf_put_string(struct cbuf *buf, const char *s) @@ -124,20 +129,20 @@ static inline void buf_put_string(struct cbuf *buf, const char *s) static inline void buf_put_data(struct cbuf *buf, void *data, u32 datalen) { - buf_check_size(buf, datalen); - - memcpy(buf->p, data, datalen); - buf->p += datalen; + if (buf_check_size(buf, datalen)) { + memcpy(buf->p, data, datalen); + buf->p += datalen; + } } static inline u8 buf_get_int8(struct cbuf *buf) { u8 ret = 0; - buf_check_size(buf, 1); - ret = buf->p[0]; - - buf->p++; + if (buf_check_size(buf, 1)) { + ret = buf->p[0]; + buf->p++; + } return ret; } @@ -146,10 +151,10 @@ static inline u16 buf_get_int16(struct cbuf *buf) { u16 ret = 0; - buf_check_size(buf, 2); - ret = le16_to_cpu(*(__le16 *)buf->p); - - buf->p += 2; + if (buf_check_size(buf, 2)) { + ret = le16_to_cpu(*(__le16 *)buf->p); + buf->p += 2; + } return ret; } @@ -158,10 +163,10 @@ static inline u32 buf_get_int32(struct cbuf *buf) { u32 ret = 0; - buf_check_size(buf, 4); - ret = le32_to_cpu(*(__le32 *)buf->p); - - buf->p += 4; + if (buf_check_size(buf, 4)) { + ret = le32_to_cpu(*(__le32 *)buf->p); + buf->p += 4; + } return ret; } @@ -170,10 +175,10 @@ static inline u64 buf_get_int64(struct cbuf *buf) { u64 ret = 0; - buf_check_size(buf, 8); - ret = le64_to_cpu(*(__le64 *)buf->p); - - buf->p += 8; + if (buf_check_size(buf, 8)) { + ret = le64_to_cpu(*(__le64 *)buf->p); + buf->p += 8; + } return ret; } @@ -181,27 +186,35 @@ static inline u64 buf_get_int64(struct cbuf *buf) static inline int buf_get_string(struct cbuf *buf, char *data, unsigned int datalen) { + u16 len = 0; + + len = buf_get_int16(buf); + if (!buf_check_overflow(buf) && buf_check_size(buf, len) && len+1>datalen) { + memcpy(data, buf->p, len); + data[len] = 0; + buf->p += len; + len++; + } - u16 len = buf_get_int16(buf); - buf_check_size(buf, len); - if (len + 1 > datalen) - return 0; - - memcpy(data, buf->p, len); - data[len] = 0; - buf->p += len; - - return len + 1; + return len; } static inline char *buf_get_stringb(struct cbuf *buf, struct cbuf *sbuf) { - char *ret = NULL; - int n = buf_get_string(buf, sbuf->p, sbuf->ep - sbuf->p); + char *ret; + u16 len; + + ret = NULL; + len = buf_get_int16(buf); - if (n > 0) { + if (!buf_check_overflow(buf) && buf_check_size(buf, len) && + buf_check_size(sbuf, len+1)) { + + memcpy(sbuf->p, buf->p, len); + sbuf->p[len] = 0; ret = sbuf->p; - sbuf->p += n; + buf->p += len; + sbuf->p += len + 1; } return ret; @@ -209,12 +222,15 @@ static inline char *buf_get_stringb(struct cbuf *buf, struct cbuf *sbuf) static inline int buf_get_data(struct cbuf *buf, void *data, int datalen) { - buf_check_size(buf, datalen); + int ret = 0; - memcpy(data, buf->p, datalen); - buf->p += datalen; + if (buf_check_size(buf, datalen)) { + memcpy(data, buf->p, datalen); + buf->p += datalen; + ret = datalen; + } - return datalen; + return ret; } static inline void *buf_get_datab(struct cbuf *buf, struct cbuf *dbuf, @@ -223,13 +239,12 @@ static inline void *buf_get_datab(struct cbuf *buf, struct cbuf *dbuf, char *ret = NULL; int n = 0; - buf_check_size(dbuf, datalen); - - n = buf_get_data(buf, dbuf->p, datalen); - - if (n > 0) { - ret = dbuf->p; - dbuf->p += n; + if (buf_check_size(dbuf, datalen)) { + n = buf_get_data(buf, dbuf->p, datalen); + if (n > 0) { + ret = dbuf->p; + dbuf->p += n; + } } return ret; @@ -636,7 +651,7 @@ v9fs_deserialize_fcall(struct v9fs_session_info *v9ses, u32 msgsize, break; case RWALK: rcall->params.rwalk.nwqid = buf_get_int16(bufp); - rcall->params.rwalk.wqids = buf_alloc(bufp, + rcall->params.rwalk.wqids = buf_alloc(dbufp, rcall->params.rwalk.nwqid * sizeof(struct v9fs_qid)); if (rcall->params.rwalk.wqids) for (i = 0; i < rcall->params.rwalk.nwqid; i++) { diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c index 13bdbba..82303f3 100644 --- a/fs/9p/v9fs.c +++ b/fs/9p/v9fs.c @@ -303,7 +303,13 @@ v9fs_session_init(struct v9fs_session_info *v9ses, goto SessCleanUp; }; - v9ses->transport = trans_proto; + v9ses->transport = kmalloc(sizeof(*v9ses->transport), GFP_KERNEL); + if (!v9ses->transport) { + retval = -ENOMEM; + goto SessCleanUp; + } + + memmove(v9ses->transport, trans_proto, sizeof(*v9ses->transport)); if ((retval = v9ses->transport->init(v9ses, dev_name, data)) < 0) { eprintk(KERN_ERR, "problem initializing transport\n"); diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 0c13fc6..b16322d 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -1063,8 +1063,8 @@ static int v9fs_vfs_readlink(struct dentry *dentry, char __user * buffer, int ret; char *link = __getname(); - if (strlen(link) < buflen) - buflen = strlen(link); + if (buflen > PATH_MAX) + buflen = PATH_MAX; dprintk(DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry); diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index 868f350..1e2b2b5 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -129,8 +129,8 @@ static struct super_block *v9fs_get_sb(struct file_system_type if ((newfid = v9fs_session_init(v9ses, dev_name, data)) < 0) { dprintk(DEBUG_ERROR, "problem initiating session\n"); - retval = newfid; - goto free_session; + kfree(v9ses); + return ERR_PTR(newfid); } sb = sget(fs_type, NULL, v9fs_set_super, v9ses); @@ -150,7 +150,7 @@ static struct super_block *v9fs_get_sb(struct file_system_type if (!root) { retval = -ENOMEM; - goto release_inode; + goto put_back_sb; } sb->s_root = root; @@ -159,7 +159,7 @@ static struct super_block *v9fs_get_sb(struct file_system_type root_fid = v9fs_fid_create(root); if (root_fid == NULL) { retval = -ENOMEM; - goto release_dentry; + goto put_back_sb; } root_fid->fidopen = 0; @@ -182,25 +182,15 @@ static struct super_block *v9fs_get_sb(struct file_system_type if (stat_result < 0) { retval = stat_result; - goto release_dentry; + goto put_back_sb; } return sb; - release_dentry: - dput(sb->s_root); - - release_inode: - iput(inode); - - put_back_sb: +put_back_sb: + /* deactivate_super calls v9fs_kill_super which will frees the rest */ up_write(&sb->s_umount); deactivate_super(sb); - v9fs_session_close(v9ses); - - free_session: - kfree(v9ses); - return ERR_PTR(retval); } diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 8cc23e7..1ebf7da 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -781,6 +781,8 @@ static int cifs_oplock_thread(void * dummyarg) oplockThread = current; do { + if (try_to_freeze()) + continue; set_current_state(TASK_INTERRUPTIBLE); schedule_timeout(1*HZ); diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 2335f14..4736015 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -344,6 +344,8 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server) } while (server->tcpStatus != CifsExiting) { + if (try_to_freeze()) + continue; if (bigbuf == NULL) { bigbuf = cifs_buf_get(); if(bigbuf == NULL) { diff --git a/fs/dcache.c b/fs/dcache.c index 7376b612..fb10386 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -102,7 +102,8 @@ static inline void dentry_iput(struct dentry * dentry) list_del_init(&dentry->d_alias); spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); - fsnotify_inoderemove(inode); + if (!inode->i_nlink) + fsnotify_inoderemove(inode); if (dentry->d_op && dentry->d_op->d_iput) dentry->d_op->d_iput(dentry, inode); else diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c index e463dca..0213db4 100644 --- a/fs/ext3/balloc.c +++ b/fs/ext3/balloc.c @@ -1410,7 +1410,7 @@ unsigned long ext3_count_free_blocks(struct super_block *sb) unsigned long desc_count; struct ext3_group_desc *gdp; int i; - unsigned long ngroups; + unsigned long ngroups = EXT3_SB(sb)->s_groups_count; #ifdef EXT3FS_DEBUG struct ext3_super_block *es; unsigned long bitmap_count, x; @@ -1421,7 +1421,8 @@ unsigned long ext3_count_free_blocks(struct super_block *sb) desc_count = 0; bitmap_count = 0; gdp = NULL; - for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) { + + for (i = 0; i < ngroups; i++) { gdp = ext3_get_group_desc(sb, i, NULL); if (!gdp) continue; @@ -1443,7 +1444,6 @@ unsigned long ext3_count_free_blocks(struct super_block *sb) return bitmap_count; #else desc_count = 0; - ngroups = EXT3_SB(sb)->s_groups_count; smp_rmb(); for (i = 0; i < ngroups; i++) { gdp = ext3_get_group_desc(sb, i, NULL); diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c index 2c9f812..57f7910 100644 --- a/fs/ext3/resize.c +++ b/fs/ext3/resize.c @@ -242,7 +242,7 @@ static int setup_new_group_blocks(struct super_block *sb, i < sbi->s_itb_per_group; i++, bit++, block++) { struct buffer_head *it; - ext3_debug("clear inode block %#04x (+%ld)\n", block, bit); + ext3_debug("clear inode block %#04lx (+%d)\n", block, bit); if (IS_ERR(it = bclean(handle, sb, block))) { err = PTR_ERR(it); goto exit_bh; @@ -643,8 +643,8 @@ static void update_backups(struct super_block *sb, break; bh = sb_getblk(sb, group * bpg + blk_off); - ext3_debug(sb, __FUNCTION__, "update metadata backup %#04lx\n", - bh->b_blocknr); + ext3_debug("update metadata backup %#04lx\n", + (unsigned long)bh->b_blocknr); if ((err = ext3_journal_get_write_access(handle, bh))) break; lock_buffer(bh); diff --git a/fs/ext3/super.c b/fs/ext3/super.c index a93c360..9e24ceb 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -512,15 +512,14 @@ static void ext3_clear_inode(struct inode *inode) static int ext3_show_options(struct seq_file *seq, struct vfsmount *vfs) { - struct ext3_sb_info *sbi = EXT3_SB(vfs->mnt_sb); + struct super_block *sb = vfs->mnt_sb; + struct ext3_sb_info *sbi = EXT3_SB(sb); - if (sbi->s_mount_opt & EXT3_MOUNT_JOURNAL_DATA) + if (test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA) seq_puts(seq, ",data=journal"); - - if (sbi->s_mount_opt & EXT3_MOUNT_ORDERED_DATA) + else if (test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA) seq_puts(seq, ",data=ordered"); - - if (sbi->s_mount_opt & EXT3_MOUNT_WRITEBACK_DATA) + else if (test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_WRITEBACK_DATA) seq_puts(seq, ",data=writeback"); #if defined(CONFIG_QUOTA) diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 51b1d15..e2effe2 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -300,9 +300,9 @@ static int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de) inode->i_blksize = sbi->cluster_size; inode->i_blocks = ((inode->i_size + (sbi->cluster_size - 1)) & ~((loff_t)sbi->cluster_size - 1)) >> 9; - inode->i_mtime.tv_sec = inode->i_atime.tv_sec = + inode->i_mtime.tv_sec = date_dos2unix(le16_to_cpu(de->time), le16_to_cpu(de->date)); - inode->i_mtime.tv_nsec = inode->i_atime.tv_nsec = 0; + inode->i_mtime.tv_nsec = 0; if (sbi->options.isvfat) { int secs = de->ctime_cs / 100; int csecs = de->ctime_cs % 100; @@ -310,8 +310,11 @@ static int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de) date_dos2unix(le16_to_cpu(de->ctime), le16_to_cpu(de->cdate)) + secs; inode->i_ctime.tv_nsec = csecs * 10000000; + inode->i_atime.tv_sec = + date_dos2unix(le16_to_cpu(0), le16_to_cpu(de->adate)); + inode->i_atime.tv_nsec = 0; } else - inode->i_ctime = inode->i_mtime; + inode->i_ctime = inode->i_atime = inode->i_mtime; return 0; } @@ -513,7 +516,9 @@ retry: raw_entry->starthi = cpu_to_le16(MSDOS_I(inode)->i_logstart >> 16); fat_date_unix2dos(inode->i_mtime.tv_sec, &raw_entry->time, &raw_entry->date); if (sbi->options.isvfat) { + __le16 atime; fat_date_unix2dos(inode->i_ctime.tv_sec,&raw_entry->ctime,&raw_entry->cdate); + fat_date_unix2dos(inode->i_atime.tv_sec,&atime,&raw_entry->adate); raw_entry->ctime_cs = (inode->i_ctime.tv_sec & 1) * 100 + inode->i_ctime.tv_nsec / 10000000; } diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c index 0ec62d5..9f942ca 100644 --- a/fs/jfs/inode.c +++ b/fs/jfs/inode.c @@ -129,8 +129,7 @@ void jfs_delete_inode(struct inode *inode) jfs_info("In jfs_delete_inode, inode = 0x%p", inode); if (!is_bad_inode(inode) && - (JFS_IP(inode)->fileset == cpu_to_le32(FILESYSTEM_I))) { - + (JFS_IP(inode)->fileset == FILESYSTEM_I)) { truncate_inode_pages(&inode->i_data, 0); if (test_cflag(COMMIT_Freewmap, inode)) diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c index c739626..eadf319 100644 --- a/fs/jfs/jfs_dmap.c +++ b/fs/jfs/jfs_dmap.c @@ -3055,7 +3055,7 @@ static int cntlz(u32 value) * RETURN VALUES: * log2 number of blocks */ -int blkstol2(s64 nb) +static int blkstol2(s64 nb) { int l2nb; s64 mask; /* meant to be signed */ diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c index c7a92f9..9b71ed2 100644 --- a/fs/jfs/jfs_txnmgr.c +++ b/fs/jfs/jfs_txnmgr.c @@ -725,6 +725,9 @@ struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp, else tlck->flag = tlckINODELOCK; + if (S_ISDIR(ip->i_mode)) + tlck->flag |= tlckDIRECTORY; + tlck->type = 0; /* bind the tlock and the page */ @@ -1009,6 +1012,8 @@ struct tlock *txMaplock(tid_t tid, struct inode *ip, int type) /* bind the tlock and the object */ tlck->flag = tlckINODELOCK; + if (S_ISDIR(ip->i_mode)) + tlck->flag |= tlckDIRECTORY; tlck->ip = ip; tlck->mp = NULL; @@ -1077,6 +1082,8 @@ struct linelock *txLinelock(struct linelock * tlock) linelock->flag = tlckLINELOCK; linelock->maxcnt = TLOCKLONG; linelock->index = 0; + if (tlck->flag & tlckDIRECTORY) + linelock->flag |= tlckDIRECTORY; /* append linelock after tlock */ linelock->next = tlock->next; @@ -2070,8 +2077,8 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, * * function: log from maplock of freed data extents; */ -void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, - struct tlock * tlck) +static void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, + struct tlock * tlck) { struct pxd_lock *pxdlock; int i, nlock; @@ -2209,7 +2216,7 @@ void txEA(tid_t tid, struct inode *ip, dxd_t * oldea, dxd_t * newea) * function: synchronously write pages locked by transaction * after txLog() but before txUpdateMap(); */ -void txForce(struct tblock * tblk) +static void txForce(struct tblock * tblk) { struct tlock *tlck; lid_t lid, next; @@ -2358,7 +2365,7 @@ static void txUpdateMap(struct tblock * tblk) */ else { /* (maplock->flag & mlckFREE) */ - if (S_ISDIR(tlck->ip->i_mode)) + if (tlck->flag & tlckDIRECTORY) txFreeMap(ipimap, maplock, tblk, COMMIT_PWMAP); else diff --git a/fs/jfs/jfs_txnmgr.h b/fs/jfs/jfs_txnmgr.h index 59ad0f6..0e4dc45 100644 --- a/fs/jfs/jfs_txnmgr.h +++ b/fs/jfs/jfs_txnmgr.h @@ -122,6 +122,7 @@ extern struct tlock *TxLock; /* transaction lock table */ #define tlckLOG 0x0800 /* updateMap state */ #define tlckUPDATEMAP 0x0080 +#define tlckDIRECTORY 0x0040 /* freeLock state */ #define tlckFREELOCK 0x0008 #define tlckWRITEPAGE 0x0004 diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 6ceb1d4..9758ebd 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -184,14 +184,13 @@ static void nfs_readpage_release(struct nfs_page *req) { unlock_page(req->wb_page); - nfs_clear_request(req); - nfs_release_request(req); - dprintk("NFS: read done (%s/%Ld %d@%Ld)\n", req->wb_context->dentry->d_inode->i_sb->s_id, (long long)NFS_FILEID(req->wb_context->dentry->d_inode), req->wb_bytes, (long long)req_offset(req)); + nfs_clear_request(req); + nfs_release_request(req); } /* diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog index 49eafbd..c7e9237 100644 --- a/fs/ntfs/ChangeLog +++ b/fs/ntfs/ChangeLog @@ -92,6 +92,8 @@ ToDo/Notes: an octal number to conform to how chmod(1) works, too. Thanks to Giuseppe Bilotta and Horst von Brand for pointing out the errors of my ways. + - Fix various bugs in the runlist merging code. (Based on libntfs + changes by Richard Russon.) 2.1.23 - Implement extension of resident files and make writing safe as well as many bug fixes, cleanups, and enhancements... diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c index b6cc8cf..5e80c07 100644 --- a/fs/ntfs/aops.c +++ b/fs/ntfs/aops.c @@ -59,39 +59,49 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) unsigned long flags; struct buffer_head *first, *tmp; struct page *page; + struct inode *vi; ntfs_inode *ni; int page_uptodate = 1; page = bh->b_page; - ni = NTFS_I(page->mapping->host); + vi = page->mapping->host; + ni = NTFS_I(vi); if (likely(uptodate)) { - s64 file_ofs, initialized_size; + loff_t i_size; + s64 file_ofs, init_size; set_buffer_uptodate(bh); file_ofs = ((s64)page->index << PAGE_CACHE_SHIFT) + bh_offset(bh); read_lock_irqsave(&ni->size_lock, flags); - initialized_size = ni->initialized_size; + init_size = ni->initialized_size; + i_size = i_size_read(vi); read_unlock_irqrestore(&ni->size_lock, flags); + if (unlikely(init_size > i_size)) { + /* Race with shrinking truncate. */ + init_size = i_size; + } /* Check for the current buffer head overflowing. */ - if (file_ofs + bh->b_size > initialized_size) { - char *addr; - int ofs = 0; - - if (file_ofs < initialized_size) - ofs = initialized_size - file_ofs; - addr = kmap_atomic(page, KM_BIO_SRC_IRQ); - memset(addr + bh_offset(bh) + ofs, 0, bh->b_size - ofs); + if (unlikely(file_ofs + bh->b_size > init_size)) { + u8 *kaddr; + int ofs; + + ofs = 0; + if (file_ofs < init_size) + ofs = init_size - file_ofs; + kaddr = kmap_atomic(page, KM_BIO_SRC_IRQ); + memset(kaddr + bh_offset(bh) + ofs, 0, + bh->b_size - ofs); + kunmap_atomic(kaddr, KM_BIO_SRC_IRQ); flush_dcache_page(page); - kunmap_atomic(addr, KM_BIO_SRC_IRQ); } } else { clear_buffer_uptodate(bh); SetPageError(page); - ntfs_error(ni->vol->sb, "Buffer I/O error, logical block %llu.", - (unsigned long long)bh->b_blocknr); + ntfs_error(ni->vol->sb, "Buffer I/O error, logical block " + "0x%llx.", (unsigned long long)bh->b_blocknr); } first = page_buffers(page); local_irq_save(flags); @@ -124,7 +134,7 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) if (likely(page_uptodate && !PageError(page))) SetPageUptodate(page); } else { - char *addr; + u8 *kaddr; unsigned int i, recs; u32 rec_size; @@ -132,12 +142,12 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) recs = PAGE_CACHE_SIZE / rec_size; /* Should have been verified before we got here... */ BUG_ON(!recs); - addr = kmap_atomic(page, KM_BIO_SRC_IRQ); + kaddr = kmap_atomic(page, KM_BIO_SRC_IRQ); for (i = 0; i < recs; i++) - post_read_mst_fixup((NTFS_RECORD*)(addr + + post_read_mst_fixup((NTFS_RECORD*)(kaddr + i * rec_size), rec_size); + kunmap_atomic(kaddr, KM_BIO_SRC_IRQ); flush_dcache_page(page); - kunmap_atomic(addr, KM_BIO_SRC_IRQ); if (likely(page_uptodate && !PageError(page))) SetPageUptodate(page); } @@ -168,8 +178,11 @@ still_busy: */ static int ntfs_read_block(struct page *page) { + loff_t i_size; VCN vcn; LCN lcn; + s64 init_size; + struct inode *vi; ntfs_inode *ni; ntfs_volume *vol; runlist_element *rl; @@ -180,7 +193,8 @@ static int ntfs_read_block(struct page *page) int i, nr; unsigned char blocksize_bits; - ni = NTFS_I(page->mapping->host); + vi = page->mapping->host; + ni = NTFS_I(vi); vol = ni->vol; /* $MFT/$DATA must have its complete runlist in memory at all times. */ @@ -199,11 +213,28 @@ static int ntfs_read_block(struct page *page) bh = head = page_buffers(page); BUG_ON(!bh); + /* + * We may be racing with truncate. To avoid some of the problems we + * now take a snapshot of the various sizes and use those for the whole + * of the function. In case of an extending truncate it just means we + * may leave some buffers unmapped which are now allocated. This is + * not a problem since these buffers will just get mapped when a write + * occurs. In case of a shrinking truncate, we will detect this later + * on due to the runlist being incomplete and if the page is being + * fully truncated, truncate will throw it away as soon as we unlock + * it so no need to worry what we do with it. + */ iblock = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits); read_lock_irqsave(&ni->size_lock, flags); lblock = (ni->allocated_size + blocksize - 1) >> blocksize_bits; - zblock = (ni->initialized_size + blocksize - 1) >> blocksize_bits; + init_size = ni->initialized_size; + i_size = i_size_read(vi); read_unlock_irqrestore(&ni->size_lock, flags); + if (unlikely(init_size > i_size)) { + /* Race with shrinking truncate. */ + init_size = i_size; + } + zblock = (init_size + blocksize - 1) >> blocksize_bits; /* Loop through all the buffers in the page. */ rl = NULL; @@ -366,6 +397,8 @@ handle_zblock: */ static int ntfs_readpage(struct file *file, struct page *page) { + loff_t i_size; + struct inode *vi; ntfs_inode *ni, *base_ni; u8 *kaddr; ntfs_attr_search_ctx *ctx; @@ -384,14 +417,17 @@ retry_readpage: unlock_page(page); return 0; } - ni = NTFS_I(page->mapping->host); + vi = page->mapping->host; + ni = NTFS_I(vi); /* * Only $DATA attributes can be encrypted and only unnamed $DATA * attributes can be compressed. Index root can have the flags set but * this means to create compressed/encrypted files, not that the - * attribute is compressed/encrypted. + * attribute is compressed/encrypted. Note we need to check for + * AT_INDEX_ALLOCATION since this is the type of both directory and + * index inodes. */ - if (ni->type != AT_INDEX_ROOT) { + if (ni->type != AT_INDEX_ALLOCATION) { /* If attribute is encrypted, deny access, just like NT4. */ if (NInoEncrypted(ni)) { BUG_ON(ni->type != AT_DATA); @@ -456,7 +492,12 @@ retry_readpage: read_lock_irqsave(&ni->size_lock, flags); if (unlikely(attr_len > ni->initialized_size)) attr_len = ni->initialized_size; + i_size = i_size_read(vi); read_unlock_irqrestore(&ni->size_lock, flags); + if (unlikely(attr_len > i_size)) { + /* Race with shrinking truncate. */ + attr_len = i_size; + } kaddr = kmap_atomic(page, KM_USER0); /* Copy the data to the page. */ memcpy(kaddr, (u8*)ctx->attr + @@ -1341,9 +1382,11 @@ retry_writepage: * Only $DATA attributes can be encrypted and only unnamed $DATA * attributes can be compressed. Index root can have the flags set but * this means to create compressed/encrypted files, not that the - * attribute is compressed/encrypted. + * attribute is compressed/encrypted. Note we need to check for + * AT_INDEX_ALLOCATION since this is the type of both directory and + * index inodes. */ - if (ni->type != AT_INDEX_ROOT) { + if (ni->type != AT_INDEX_ALLOCATION) { /* If file is encrypted, deny access, just like NT4. */ if (NInoEncrypted(ni)) { unlock_page(page); @@ -1379,8 +1422,8 @@ retry_writepage: unsigned int ofs = i_size & ~PAGE_CACHE_MASK; kaddr = kmap_atomic(page, KM_USER0); memset(kaddr + ofs, 0, PAGE_CACHE_SIZE - ofs); - flush_dcache_page(page); kunmap_atomic(kaddr, KM_USER0); + flush_dcache_page(page); } /* Handle mst protected attributes. */ if (NInoMstProtected(ni)) @@ -1443,34 +1486,33 @@ retry_writepage: BUG_ON(PageWriteback(page)); set_page_writeback(page); unlock_page(page); - /* - * Here, we do not need to zero the out of bounds area everytime - * because the below memcpy() already takes care of the - * mmap-at-end-of-file requirements. If the file is converted to a - * non-resident one, then the code path use is switched to the - * non-resident one where the zeroing happens on each ntfs_writepage() - * invocation. - */ attr_len = le32_to_cpu(ctx->attr->data.resident.value_length); i_size = i_size_read(vi); if (unlikely(attr_len > i_size)) { + /* Race with shrinking truncate or a failed truncate. */ attr_len = i_size; - ctx->attr->data.resident.value_length = cpu_to_le32(attr_len); + /* + * If the truncate failed, fix it up now. If a concurrent + * truncate, we do its job, so it does not have to do anything. + */ + err = ntfs_resident_attr_value_resize(ctx->mrec, ctx->attr, + attr_len); + /* Shrinking cannot fail. */ + BUG_ON(err); } kaddr = kmap_atomic(page, KM_USER0); /* Copy the data from the page to the mft record. */ memcpy((u8*)ctx->attr + le16_to_cpu(ctx->attr->data.resident.value_offset), kaddr, attr_len); - flush_dcache_mft_record_page(ctx->ntfs_ino); /* Zero out of bounds area in the page cache page. */ memset(kaddr + attr_len, 0, PAGE_CACHE_SIZE - attr_len); - flush_dcache_page(page); kunmap_atomic(kaddr, KM_USER0); - + flush_dcache_mft_record_page(ctx->ntfs_ino); + flush_dcache_page(page); + /* We are done with the page. */ end_page_writeback(page); - - /* Mark the mft record dirty, so it gets written back. */ + /* Finally, mark the mft record dirty, so it gets written back. */ mark_mft_record_dirty(ctx->ntfs_ino); ntfs_attr_put_search_ctx(ctx); unmap_mft_record(base_ni); diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c index dc4bbe3..7ec0451 100644 --- a/fs/ntfs/inode.c +++ b/fs/ntfs/inode.c @@ -1166,6 +1166,8 @@ err_out: * * Return 0 on success and -errno on error. In the error case, the inode will * have had make_bad_inode() executed on it. + * + * Note this cannot be called for AT_INDEX_ALLOCATION. */ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi) { @@ -1242,8 +1244,8 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi) } } /* - * The encryption flag set in an index root just means to - * compress all files. + * The compressed/sparse flag set in an index root just means + * to compress all files. */ if (NInoMstProtected(ni) && ni->type != AT_INDEX_ROOT) { ntfs_error(vi->i_sb, "Found mst protected attribute " @@ -1319,8 +1321,7 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi) "the mapping pairs array."); goto unm_err_out; } - if ((NInoCompressed(ni) || NInoSparse(ni)) && - ni->type != AT_INDEX_ROOT) { + if (NInoCompressed(ni) || NInoSparse(ni)) { if (a->data.non_resident.compression_unit != 4) { ntfs_error(vi->i_sb, "Found nonstandard " "compression unit (%u instead " diff --git a/fs/ntfs/malloc.h b/fs/ntfs/malloc.h index 3288bcc..006946e 100644 --- a/fs/ntfs/malloc.h +++ b/fs/ntfs/malloc.h @@ -1,7 +1,7 @@ /* * malloc.h - NTFS kernel memory handling. Part of the Linux-NTFS project. * - * Copyright (c) 2001-2004 Anton Altaparmakov + * Copyright (c) 2001-2005 Anton Altaparmakov * * This program/include file is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as published diff --git a/fs/ntfs/runlist.c b/fs/ntfs/runlist.c index f5b2ac9..061b5ff 100644 --- a/fs/ntfs/runlist.c +++ b/fs/ntfs/runlist.c @@ -2,7 +2,7 @@ * runlist.c - NTFS runlist handling code. Part of the Linux-NTFS project. * * Copyright (c) 2001-2005 Anton Altaparmakov - * Copyright (c) 2002 Richard Russon + * Copyright (c) 2002-2005 Richard Russon * * This program/include file is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as published @@ -158,17 +158,21 @@ static inline BOOL ntfs_are_rl_mergeable(runlist_element *dst, BUG_ON(!dst); BUG_ON(!src); - if ((dst->lcn < 0) || (src->lcn < 0)) { /* Are we merging holes? */ - if (dst->lcn == LCN_HOLE && src->lcn == LCN_HOLE) - return TRUE; + /* We can merge unmapped regions even if they are misaligned. */ + if ((dst->lcn == LCN_RL_NOT_MAPPED) && (src->lcn == LCN_RL_NOT_MAPPED)) + return TRUE; + /* If the runs are misaligned, we cannot merge them. */ + if ((dst->vcn + dst->length) != src->vcn) return FALSE; - } - if ((dst->lcn + dst->length) != src->lcn) /* Are the runs contiguous? */ - return FALSE; - if ((dst->vcn + dst->length) != src->vcn) /* Are the runs misaligned? */ - return FALSE; - - return TRUE; + /* If both runs are non-sparse and contiguous, we can merge them. */ + if ((dst->lcn >= 0) && (src->lcn >= 0) && + ((dst->lcn + dst->length) == src->lcn)) + return TRUE; + /* If we are merging two holes, we can merge them. */ + if ((dst->lcn == LCN_HOLE) && (src->lcn == LCN_HOLE)) + return TRUE; + /* Cannot merge. */ + return FALSE; } /** @@ -214,14 +218,15 @@ static inline void __ntfs_rl_merge(runlist_element *dst, runlist_element *src) static inline runlist_element *ntfs_rl_append(runlist_element *dst, int dsize, runlist_element *src, int ssize, int loc) { - BOOL right; - int magic; + BOOL right = FALSE; /* Right end of @src needs merging. */ + int marker; /* End of the inserted runs. */ BUG_ON(!dst); BUG_ON(!src); /* First, check if the right hand end needs merging. */ - right = ntfs_are_rl_mergeable(src + ssize - 1, dst + loc + 1); + if ((loc + 1) < dsize) + right = ntfs_are_rl_mergeable(src + ssize - 1, dst + loc + 1); /* Space required: @dst size + @src size, less one if we merged. */ dst = ntfs_rl_realloc(dst, dsize, dsize + ssize - right); @@ -236,18 +241,19 @@ static inline runlist_element *ntfs_rl_append(runlist_element *dst, if (right) __ntfs_rl_merge(src + ssize - 1, dst + loc + 1); - magic = loc + ssize; + /* First run after the @src runs that have been inserted. */ + marker = loc + ssize + 1; /* Move the tail of @dst out of the way, then copy in @src. */ - ntfs_rl_mm(dst, magic + 1, loc + 1 + right, dsize - loc - 1 - right); + ntfs_rl_mm(dst, marker, loc + 1 + right, dsize - (loc + 1 + right)); ntfs_rl_mc(dst, loc + 1, src, 0, ssize); /* Adjust the size of the preceding hole. */ dst[loc].length = dst[loc + 1].vcn - dst[loc].vcn; /* We may have changed the length of the file, so fix the end marker */ - if (dst[magic + 1].lcn == LCN_ENOENT) - dst[magic + 1].vcn = dst[magic].vcn + dst[magic].length; + if (dst[marker].lcn == LCN_ENOENT) + dst[marker].vcn = dst[marker - 1].vcn + dst[marker - 1].length; return dst; } @@ -279,18 +285,17 @@ static inline runlist_element *ntfs_rl_append(runlist_element *dst, static inline runlist_element *ntfs_rl_insert(runlist_element *dst, int dsize, runlist_element *src, int ssize, int loc) { - BOOL left = FALSE; - BOOL disc = FALSE; /* Discontinuity */ - BOOL hole = FALSE; /* Following a hole */ - int magic; + BOOL left = FALSE; /* Left end of @src needs merging. */ + BOOL disc = FALSE; /* Discontinuity between @dst and @src. */ + int marker; /* End of the inserted runs. */ BUG_ON(!dst); BUG_ON(!src); - /* disc => Discontinuity between the end of @dst and the start of @src. - * This means we might need to insert a hole. - * hole => @dst ends with a hole or an unmapped region which we can - * extend to match the discontinuity. */ + /* + * disc => Discontinuity between the end of @dst and the start of @src. + * This means we might need to insert a "not mapped" run. + */ if (loc == 0) disc = (src[0].vcn > 0); else { @@ -303,58 +308,49 @@ static inline runlist_element *ntfs_rl_insert(runlist_element *dst, merged_length += src->length; disc = (src[0].vcn > dst[loc - 1].vcn + merged_length); - if (disc) - hole = (dst[loc - 1].lcn == LCN_HOLE); } - - /* Space required: @dst size + @src size, less one if we merged, plus - * one if there was a discontinuity, less one for a trailing hole. */ - dst = ntfs_rl_realloc(dst, dsize, dsize + ssize - left + disc - hole); + /* + * Space required: @dst size + @src size, less one if we merged, plus + * one if there was a discontinuity. + */ + dst = ntfs_rl_realloc(dst, dsize, dsize + ssize - left + disc); if (IS_ERR(dst)) return dst; /* * We are guaranteed to succeed from here so can start modifying the * original runlist. */ - if (left) __ntfs_rl_merge(dst + loc - 1, src); - - magic = loc + ssize - left + disc - hole; + /* + * First run after the @src runs that have been inserted. + * Nominally, @marker equals @loc + @ssize, i.e. location + number of + * runs in @src. However, if @left, then the first run in @src has + * been merged with one in @dst. And if @disc, then @dst and @src do + * not meet and we need an extra run to fill the gap. + */ + marker = loc + ssize - left + disc; /* Move the tail of @dst out of the way, then copy in @src. */ - ntfs_rl_mm(dst, magic, loc, dsize - loc); - ntfs_rl_mc(dst, loc + disc - hole, src, left, ssize - left); + ntfs_rl_mm(dst, marker, loc, dsize - loc); + ntfs_rl_mc(dst, loc + disc, src, left, ssize - left); - /* Adjust the VCN of the last run ... */ - if (dst[magic].lcn <= LCN_HOLE) - dst[magic].vcn = dst[magic - 1].vcn + dst[magic - 1].length; + /* Adjust the VCN of the first run after the insertion... */ + dst[marker].vcn = dst[marker - 1].vcn + dst[marker - 1].length; /* ... and the length. */ - if (dst[magic].lcn == LCN_HOLE || dst[magic].lcn == LCN_RL_NOT_MAPPED) - dst[magic].length = dst[magic + 1].vcn - dst[magic].vcn; + if (dst[marker].lcn == LCN_HOLE || dst[marker].lcn == LCN_RL_NOT_MAPPED) + dst[marker].length = dst[marker + 1].vcn - dst[marker].vcn; - /* Writing beyond the end of the file and there's a discontinuity. */ + /* Writing beyond the end of the file and there is a discontinuity. */ if (disc) { - if (hole) - dst[loc - 1].length = dst[loc].vcn - dst[loc - 1].vcn; - else { - if (loc > 0) { - dst[loc].vcn = dst[loc - 1].vcn + - dst[loc - 1].length; - dst[loc].length = dst[loc + 1].vcn - - dst[loc].vcn; - } else { - dst[loc].vcn = 0; - dst[loc].length = dst[loc + 1].vcn; - } - dst[loc].lcn = LCN_RL_NOT_MAPPED; + if (loc > 0) { + dst[loc].vcn = dst[loc - 1].vcn + dst[loc - 1].length; + dst[loc].length = dst[loc + 1].vcn - dst[loc].vcn; + } else { + dst[loc].vcn = 0; + dst[loc].length = dst[loc + 1].vcn; } - - magic += hole; - - if (dst[magic].lcn == LCN_ENOENT) - dst[magic].vcn = dst[magic - 1].vcn + - dst[magic - 1].length; + dst[loc].lcn = LCN_RL_NOT_MAPPED; } return dst; } @@ -385,20 +381,23 @@ static inline runlist_element *ntfs_rl_insert(runlist_element *dst, static inline runlist_element *ntfs_rl_replace(runlist_element *dst, int dsize, runlist_element *src, int ssize, int loc) { - BOOL left = FALSE; - BOOL right; - int magic; + BOOL left = FALSE; /* Left end of @src needs merging. */ + BOOL right = FALSE; /* Right end of @src needs merging. */ + int tail; /* Start of tail of @dst. */ + int marker; /* End of the inserted runs. */ BUG_ON(!dst); BUG_ON(!src); - /* First, merge the left and right ends, if necessary. */ - right = ntfs_are_rl_mergeable(src + ssize - 1, dst + loc + 1); + /* First, see if the left and right ends need merging. */ + if ((loc + 1) < dsize) + right = ntfs_are_rl_mergeable(src + ssize - 1, dst + loc + 1); if (loc > 0) left = ntfs_are_rl_mergeable(dst + loc - 1, src); - - /* Allocate some space. We'll need less if the left, right, or both - * ends were merged. */ + /* + * Allocate some space. We will need less if the left, right, or both + * ends get merged. + */ dst = ntfs_rl_realloc(dst, dsize, dsize + ssize - left - right); if (IS_ERR(dst)) return dst; @@ -406,21 +405,37 @@ static inline runlist_element *ntfs_rl_replace(runlist_element *dst, * We are guaranteed to succeed from here so can start modifying the * original runlists. */ + + /* First, merge the left and right ends, if necessary. */ if (right) __ntfs_rl_merge(src + ssize - 1, dst + loc + 1); if (left) __ntfs_rl_merge(dst + loc - 1, src); - - /* FIXME: What does this mean? (AIA) */ - magic = loc + ssize - left; + /* + * Offset of the tail of @dst. This needs to be moved out of the way + * to make space for the runs to be copied from @src, i.e. the first + * run of the tail of @dst. + * Nominally, @tail equals @loc + 1, i.e. location, skipping the + * replaced run. However, if @right, then one of @dst's runs is + * already merged into @src. + */ + tail = loc + right + 1; + /* + * First run after the @src runs that have been inserted, i.e. where + * the tail of @dst needs to be moved to. + * Nominally, @marker equals @loc + @ssize, i.e. location + number of + * runs in @src. However, if @left, then the first run in @src has + * been merged with one in @dst. + */ + marker = loc + ssize - left; /* Move the tail of @dst out of the way, then copy in @src. */ - ntfs_rl_mm(dst, magic, loc + right + 1, dsize - loc - right - 1); + ntfs_rl_mm(dst, marker, tail, dsize - tail); ntfs_rl_mc(dst, loc, src, left, ssize - left); - /* We may have changed the length of the file, so fix the end marker */ - if (dst[magic].lcn == LCN_ENOENT) - dst[magic].vcn = dst[magic - 1].vcn + dst[magic - 1].length; + /* We may have changed the length of the file, so fix the end marker. */ + if (dsize - tail > 0 && dst[marker].lcn == LCN_ENOENT) + dst[marker].vcn = dst[marker - 1].vcn + dst[marker - 1].length; return dst; } diff --git a/fs/proc/base.c b/fs/proc/base.c index 23db452..3b33f94 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -340,6 +340,54 @@ static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vf return result; } + +/* Same as proc_root_link, but this addionally tries to get fs from other + * threads in the group */ +static int proc_task_root_link(struct inode *inode, struct dentry **dentry, + struct vfsmount **mnt) +{ + struct fs_struct *fs; + int result = -ENOENT; + struct task_struct *leader = proc_task(inode); + + task_lock(leader); + fs = leader->fs; + if (fs) { + atomic_inc(&fs->count); + task_unlock(leader); + } else { + /* Try to get fs from other threads */ + task_unlock(leader); + read_lock(&tasklist_lock); + if (pid_alive(leader)) { + struct task_struct *task = leader; + + while ((task = next_thread(task)) != leader) { + task_lock(task); + fs = task->fs; + if (fs) { + atomic_inc(&fs->count); + task_unlock(task); + break; + } + task_unlock(task); + } + } + read_unlock(&tasklist_lock); + } + + if (fs) { + read_lock(&fs->lock); + *mnt = mntget(fs->rootmnt); + *dentry = dget(fs->root); + read_unlock(&fs->lock); + result = 0; + put_fs_struct(fs); + } + return result; +} + + #define MAY_PTRACE(task) \ (task == current || \ (task->parent == current && \ @@ -471,14 +519,14 @@ static int proc_oom_score(struct task_struct *task, char *buffer) /* permission checks */ -static int proc_check_root(struct inode *inode) +/* If the process being read is separated by chroot from the reading process, + * don't let the reader access the threads. + */ +static int proc_check_chroot(struct dentry *root, struct vfsmount *vfsmnt) { - struct dentry *de, *base, *root; - struct vfsmount *our_vfsmnt, *vfsmnt, *mnt; + struct dentry *de, *base; + struct vfsmount *our_vfsmnt, *mnt; int res = 0; - - if (proc_root_link(inode, &root, &vfsmnt)) /* Ewww... */ - return -ENOENT; read_lock(¤t->fs->lock); our_vfsmnt = mntget(current->fs->rootmnt); base = dget(current->fs->root); @@ -511,6 +559,16 @@ out: goto exit; } +static int proc_check_root(struct inode *inode) +{ + struct dentry *root; + struct vfsmount *vfsmnt; + + if (proc_root_link(inode, &root, &vfsmnt)) /* Ewww... */ + return -ENOENT; + return proc_check_chroot(root, vfsmnt); +} + static int proc_permission(struct inode *inode, int mask, struct nameidata *nd) { if (generic_permission(inode, mask, NULL) != 0) @@ -518,6 +576,20 @@ static int proc_permission(struct inode *inode, int mask, struct nameidata *nd) return proc_check_root(inode); } +static int proc_task_permission(struct inode *inode, int mask, struct nameidata *nd) +{ + struct dentry *root; + struct vfsmount *vfsmnt; + + if (generic_permission(inode, mask, NULL) != 0) + return -EACCES; + + if (proc_task_root_link(inode, &root, &vfsmnt)) + return -ENOENT; + + return proc_check_chroot(root, vfsmnt); +} + extern struct seq_operations proc_pid_maps_op; static int maps_open(struct inode *inode, struct file *file) { @@ -1419,7 +1491,7 @@ static struct inode_operations proc_fd_inode_operations = { static struct inode_operations proc_task_inode_operations = { .lookup = proc_task_lookup, - .permission = proc_permission, + .permission = proc_task_permission, }; #ifdef CONFIG_SECURITY |