diff options
author | Anton Altaparmakov <aia21@cantab.net> | 2005-06-23 11:26:22 +0100 |
---|---|---|
committer | Anton Altaparmakov <aia21@cantab.net> | 2005-06-23 11:26:22 +0100 |
commit | 3357d4c75f1fb67e7304998c4ad4e9a9fed66fa4 (patch) | |
tree | ceba46966a5a1112a05d257d8ecb25ae5eee95e0 /fs | |
parent | 364f6c717deef4a3ac4982e670fa9846b43cd060 (diff) | |
parent | ee98689be1b054897ff17655008c3048fe88be94 (diff) | |
download | op-kernel-dev-3357d4c75f1fb67e7304998c4ad4e9a9fed66fa4.zip op-kernel-dev-3357d4c75f1fb67e7304998c4ad4e9a9fed66fa4.tar.gz |
Automatic merge with /usr/src/ntfs-2.6.git.
Diffstat (limited to 'fs')
168 files changed, 5449 insertions, 2644 deletions
@@ -741,56 +741,6 @@ config SYSFS Designers of embedded systems may wish to say N here to conserve space. -config DEVFS_FS - bool "/dev file system support (OBSOLETE)" - depends on EXPERIMENTAL - help - This is support for devfs, a virtual file system (like /proc) which - provides the file system interface to device drivers, normally found - in /dev. Devfs does not depend on major and minor number - allocations. Device drivers register entries in /dev which then - appear automatically, which means that the system administrator does - not have to create character and block special device files in the - /dev directory using the mknod command (or MAKEDEV script) anymore. - - This is work in progress. If you want to use this, you *must* read - the material in <file:Documentation/filesystems/devfs/>, especially - the file README there. - - Note that devfs no longer manages /dev/pts! If you are using UNIX98 - ptys, you will also need to mount the /dev/pts filesystem (devpts). - - Note that devfs has been obsoleted by udev, - <http://www.kernel.org/pub/linux/utils/kernel/hotplug/>. - It has been stripped down to a bare minimum and is only provided for - legacy installations that use its naming scheme which is - unfortunately different from the names normal Linux installations - use. - - If unsure, say N. - -config DEVFS_MOUNT - bool "Automatically mount at boot" - depends on DEVFS_FS - help - This option appears if you have CONFIG_DEVFS_FS enabled. Setting - this to 'Y' will make the kernel automatically mount devfs onto /dev - when the system is booted, before the init thread is started. - You can override this with the "devfs=nomount" boot option. - - If unsure, say N. - -config DEVFS_DEBUG - bool "Debug devfs" - depends on DEVFS_FS - help - If you say Y here, then the /dev file system code will generate - debugging messages. See the file - <file:Documentation/filesystems/devfs/boot-options> for more - details. - - If unsure, say N. - config DEVPTS_FS_XATTR bool "/dev/pts Extended Attributes" depends on UNIX98_PTYS @@ -1318,6 +1268,7 @@ config NFS_FS depends on INET select LOCKD select SUNRPC + select NFS_ACL_SUPPORT if NFS_V3_ACL help If you are connected to some other (usually local) Unix computer (using SLIP, PLIP, PPP or Ethernet) and want to mount files residing @@ -1360,6 +1311,16 @@ config NFS_V3 If unsure, say Y. +config NFS_V3_ACL + bool "Provide client support for the NFSv3 ACL protocol extension" + depends on NFS_V3 + help + Implement the NFSv3 ACL protocol extension for manipulating POSIX + Access Control Lists. The server should also be compiled with + the NFSv3 ACL protocol extension; see the CONFIG_NFSD_V3_ACL option. + + If unsure, say N. + config NFS_V4 bool "Provide NFSv4 client support (EXPERIMENTAL)" depends on NFS_FS && EXPERIMENTAL @@ -1403,6 +1364,7 @@ config NFSD select LOCKD select SUNRPC select EXPORTFS + select NFS_ACL_SUPPORT if NFSD_V3_ACL || NFSD_V2_ACL help If you want your Linux box to act as an NFS *server*, so that other computers on your local network which support NFS can access certain @@ -1426,6 +1388,10 @@ config NFSD To compile the NFS server support as a module, choose M here: the module will be called nfsd. If unsure, say N. +config NFSD_V2_ACL + bool + depends on NFSD + config NFSD_V3 bool "Provide NFSv3 server support" depends on NFSD @@ -1433,6 +1399,16 @@ config NFSD_V3 If you would like to include the NFSv3 server as well as the NFSv2 server, say Y here. If unsure, say Y. +config NFSD_V3_ACL + bool "Provide server support for the NFSv3 ACL protocol extension" + depends on NFSD_V3 + select NFSD_V2_ACL + help + Implement the NFSv3 ACL protocol extension for manipulating POSIX + Access Control Lists on exported file systems. NFS clients should + be compiled with the NFSv3 ACL protocol extension; see the + CONFIG_NFS_V3_ACL option. If unsure, say N. + config NFSD_V4 bool "Provide NFSv4 server support (EXPERIMENTAL)" depends on NFSD_V3 && EXPERIMENTAL @@ -1477,6 +1453,15 @@ config LOCKD_V4 config EXPORTFS tristate +config NFS_ACL_SUPPORT + tristate + select FS_POSIX_ACL + +config NFS_COMMON + bool + depends on NFSD || NFS_FS + default y + config SUNRPC tristate diff --git a/fs/Makefile b/fs/Makefile index 443f2bc..fc92e59 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -31,6 +31,7 @@ obj-$(CONFIG_BINFMT_FLAT) += binfmt_flat.o obj-$(CONFIG_FS_MBCACHE) += mbcache.o obj-$(CONFIG_FS_POSIX_ACL) += posix_acl.o xattr_acl.o +obj-$(CONFIG_NFS_COMMON) += nfs_common/ obj-$(CONFIG_QUOTA) += dquot.o obj-$(CONFIG_QFMT_V1) += quota_v1.o diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index c7b2b88..9c09641 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -185,6 +185,19 @@ int autofs4_wait(struct autofs_sb_info *,struct dentry *, enum autofs_notify); int autofs4_wait_release(struct autofs_sb_info *,autofs_wqt_t,int); void autofs4_catatonic_mode(struct autofs_sb_info *); +static inline int autofs4_follow_mount(struct vfsmount **mnt, struct dentry **dentry) +{ + int res = 0; + + while (d_mountpoint(*dentry)) { + int followed = follow_down(mnt, dentry); + if (!followed) + break; + res = 1; + } + return res; +} + static inline int simple_positive(struct dentry *dentry) { return dentry->d_inode && !d_unhashed(dentry); diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index 500425e..feb6ac4 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -56,12 +56,9 @@ static int autofs4_check_mount(struct vfsmount *mnt, struct dentry *dentry) mntget(mnt); dget(dentry); - if (!follow_down(&mnt, &dentry)) + if (!autofs4_follow_mount(&mnt, &dentry)) goto done; - while (d_mountpoint(dentry) && follow_down(&mnt, &dentry)) - ; - /* This is an autofs submount, we can't expire it */ if (is_autofs4_dentry(dentry)) goto done; diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 3765c04..2a771ec 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -205,7 +205,11 @@ static int autofs4_dir_open(struct inode *inode, struct file *file) struct vfsmount *fp_mnt = mntget(mnt); struct dentry *fp_dentry = dget(dentry); - while (follow_down(&fp_mnt, &fp_dentry) && d_mountpoint(fp_dentry)); + if (!autofs4_follow_mount(&fp_mnt, &fp_dentry)) { + dput(fp_dentry); + mntput(fp_mnt); + return -ENOENT; + } fp = dentry_open(fp_dentry, fp_mnt, file->f_flags); status = PTR_ERR(fp); @@ -302,7 +306,14 @@ static int try_to_fill_dentry(struct dentry *dentry, DPRINTK("expire done status=%d", status); - return 0; + /* + * If the directory still exists the mount request must + * continue otherwise it can't be followed at the right + * time during the walk. + */ + status = d_invalidate(dentry); + if (status != -EBUSY) + return 0; } DPRINTK("dentry=%p %.*s ino=%p", diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index 5a40d36..fa2348d 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c @@ -191,6 +191,13 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry, } if ( !wq ) { + /* Can't wait for an expire if there's no mount */ + if (notify == NFY_NONE && !d_mountpoint(dentry)) { + kfree(name); + up(&sbi->wq_sem); + return -ENOENT; + } + /* Create a new wait queue */ wq = kmalloc(sizeof(struct autofs_wait_queue),GFP_KERNEL); if ( !wq ) { diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c index 009b892..dd9baab 100644 --- a/fs/binfmt_aout.c +++ b/fs/binfmt_aout.c @@ -316,6 +316,7 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs) current->mm->brk = ex.a_bss + (current->mm->start_brk = N_BSSADDR(ex)); current->mm->free_area_cache = current->mm->mmap_base; + current->mm->cached_hole_size = 0; set_mm_counter(current->mm, rss, 0); current->mm->mmap = NULL; diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index c374be5..7976a23 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -775,6 +775,7 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs) change some of these later */ set_mm_counter(current->mm, rss, 0); current->mm->free_area_cache = current->mm->mmap_base; + current->mm->cached_hole_size = 0; retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP), executable_stack); if (retval < 0) { @@ -1125,7 +1126,7 @@ static int dump_write(struct file *file, const void *addr, int nr) return file->f_op->write(file, addr, nr, &file->f_pos) == nr; } -static int dump_seek(struct file *file, off_t off) +static int dump_seek(struct file *file, loff_t off) { if (file->f_op->llseek) { if (file->f_op->llseek(file, off, 0) != off) diff --git a/fs/buffer.c b/fs/buffer.c index 7e9e409..0befa72 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -528,7 +528,7 @@ static void free_more_memory(void) for_each_pgdat(pgdat) { zones = pgdat->node_zonelists[GFP_NOFS&GFP_ZONEMASK].zones; if (*zones) - try_to_free_pages(zones, GFP_NOFS, 0); + try_to_free_pages(zones, GFP_NOFS); } } diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES index 95483ba..dab4774 100644 --- a/fs/cifs/CHANGES +++ b/fs/cifs/CHANGES @@ -6,7 +6,8 @@ kills the cifsd thread (NB: killing the cifs kernel threads is not recommended, unmount and rmmod cifs will kill them when they are no longer needed). Fix readdir to ASCII servers (ie older servers which do not support Unicode) and also require asterik. - +Fix out of memory case in which data could be written one page +off in the page cache. Version 1.33 ------------ diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index d00b3bf..78af585 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -96,5 +96,5 @@ extern ssize_t cifs_getxattr(struct dentry *, const char *, void *, size_t); extern ssize_t cifs_listxattr(struct dentry *, char *, size_t); extern int cifs_ioctl (struct inode * inode, struct file * filep, unsigned int command, unsigned long arg); -#define CIFS_VERSION "1.34" +#define CIFS_VERSION "1.35" #endif /* _CIFSFS_H */ diff --git a/fs/cifs/file.c b/fs/cifs/file.c index dde2d25..30ab70c 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -1352,6 +1352,8 @@ static void cifs_copy_cache_pages(struct address_space *mapping, GFP_KERNEL)) { page_cache_release(page); cFYI(1, ("Add page cache failed")); + data += PAGE_CACHE_SIZE; + bytes_read -= PAGE_CACHE_SIZE; continue; } diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index b8b78cb..8d336a9 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -82,12 +82,12 @@ int cifs_get_inode_info_unix(struct inode **pinode, /* get new inode */ if (*pinode == NULL) { *pinode = new_inode(sb); - if(*pinode == NULL) + if (*pinode == NULL) return -ENOMEM; /* Is an i_ino of zero legal? */ /* Are there sanity checks we can use to ensure that the server is really filling in that field? */ - if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) { + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) { (*pinode)->i_ino = (unsigned long)findData.UniqueId; } /* note ino incremented to unique num in new_inode */ @@ -134,7 +134,7 @@ int cifs_get_inode_info_unix(struct inode **pinode, inode->i_gid = le64_to_cpu(findData.Gid); inode->i_nlink = le64_to_cpu(findData.Nlinks); - if(is_size_safe_to_change(cifsInfo)) { + if (is_size_safe_to_change(cifsInfo)) { /* can not safely change the file size here if the client is writing to it due to potential races */ @@ -162,7 +162,7 @@ int cifs_get_inode_info_unix(struct inode **pinode, if (S_ISREG(inode->i_mode)) { cFYI(1, (" File inode ")); inode->i_op = &cifs_file_inode_ops; - if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) inode->i_fop = &cifs_file_direct_ops; else inode->i_fop = &cifs_file_ops; @@ -198,17 +198,17 @@ int cifs_get_inode_info(struct inode **pinode, pTcon = cifs_sb->tcon; cFYI(1,("Getting info on %s ", search_path)); - if((pfindData == NULL) && (*pinode != NULL)) { - if(CIFS_I(*pinode)->clientCanCacheRead) { + if ((pfindData == NULL) && (*pinode != NULL)) { + if (CIFS_I(*pinode)->clientCanCacheRead) { cFYI(1,("No need to revalidate cached inode sizes")); return rc; } } /* if file info not passed in then get it from server */ - if(pfindData == NULL) { + if (pfindData == NULL) { buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL); - if(buf == NULL) + if (buf == NULL) return -ENOMEM; pfindData = (FILE_ALL_INFO *)buf; /* could do find first instead but this returns more info */ @@ -268,7 +268,7 @@ int cifs_get_inode_info(struct inode **pinode, IndexNumber field is not guaranteed unique? */ #ifdef CONFIG_CIFS_EXPERIMENTAL - if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM){ + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM){ int rc1 = 0; __u64 inode_num; @@ -277,7 +277,7 @@ int cifs_get_inode_info(struct inode **pinode, cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); - if(rc1) { + if (rc1) { cFYI(1,("GetSrvInodeNum rc %d", rc1)); /* BB EOPNOSUPP disable SERVER_INUM? */ } else /* do we need cast or hash to ino? */ @@ -355,7 +355,7 @@ int cifs_get_inode_info(struct inode **pinode, if (S_ISREG(inode->i_mode)) { cFYI(1, (" File inode ")); inode->i_op = &cifs_file_inode_ops; - if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) inode->i_fop = &cifs_file_direct_ops; else inode->i_fop = &cifs_file_ops; @@ -422,7 +422,7 @@ int cifs_unlink(struct inode *inode, struct dentry *direntry) cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); if (!rc) { - if(direntry->d_inode) + if (direntry->d_inode) direntry->d_inode->i_nlink--; } else if (rc == -ENOENT) { d_drop(direntry); @@ -441,7 +441,7 @@ int cifs_unlink(struct inode *inode, struct dentry *direntry) cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); CIFSSMBClose(xid, pTcon, netfid); - if(direntry->d_inode) + if (direntry->d_inode) direntry->d_inode->i_nlink--; } } else if (rc == -EACCES) { @@ -496,7 +496,7 @@ int cifs_unlink(struct inode *inode, struct dentry *direntry) cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); if (!rc) { - if(direntry->d_inode) + if (direntry->d_inode) direntry->d_inode->i_nlink--; } else if (rc == -ETXTBSY) { int oplock = FALSE; @@ -517,14 +517,14 @@ int cifs_unlink(struct inode *inode, struct dentry *direntry) cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); CIFSSMBClose(xid, pTcon, netfid); - if(direntry->d_inode) + if (direntry->d_inode) direntry->d_inode->i_nlink--; } /* BB if rc = -ETXTBUSY goto the rename logic BB */ } } } - if(direntry->d_inode) { + if (direntry->d_inode) { cifsInode = CIFS_I(direntry->d_inode); cifsInode->time = 0; /* will force revalidate to get info when needed */ @@ -582,7 +582,7 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode) if (direntry->d_inode) direntry->d_inode->i_nlink = 2; if (cifs_sb->tcon->ses->capabilities & CAP_UNIX) - if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { CIFSSMBUnixSetPerms(xid, pTcon, full_path, mode, (__u64)current->euid, diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c index ef001a9..3d1cce3 100644 --- a/fs/coda/psdev.c +++ b/fs/coda/psdev.c @@ -61,7 +61,7 @@ unsigned long coda_timeout = 30; /* .. secs, then signals will dequeue */ struct venus_comm coda_comms[MAX_CODADEVS]; -static struct class_simple *coda_psdev_class; +static struct class *coda_psdev_class; /* * Device operations @@ -363,14 +363,14 @@ static int init_coda_psdev(void) CODA_PSDEV_MAJOR); return -EIO; } - coda_psdev_class = class_simple_create(THIS_MODULE, "coda"); + coda_psdev_class = class_create(THIS_MODULE, "coda"); if (IS_ERR(coda_psdev_class)) { err = PTR_ERR(coda_psdev_class); goto out_chrdev; } devfs_mk_dir ("coda"); for (i = 0; i < MAX_CODADEVS; i++) { - class_simple_device_add(coda_psdev_class, MKDEV(CODA_PSDEV_MAJOR,i), + class_device_create(coda_psdev_class, MKDEV(CODA_PSDEV_MAJOR,i), NULL, "cfs%d", i); err = devfs_mk_cdev(MKDEV(CODA_PSDEV_MAJOR, i), S_IFCHR|S_IRUSR|S_IWUSR, "coda/%d", i); @@ -382,8 +382,8 @@ static int init_coda_psdev(void) out_class: for (i = 0; i < MAX_CODADEVS; i++) - class_simple_device_remove(MKDEV(CODA_PSDEV_MAJOR, i)); - class_simple_destroy(coda_psdev_class); + class_device_destroy(coda_psdev_class, MKDEV(CODA_PSDEV_MAJOR, i)); + class_destroy(coda_psdev_class); out_chrdev: unregister_chrdev(CODA_PSDEV_MAJOR, "coda"); out: @@ -425,10 +425,10 @@ static int __init init_coda(void) return 0; out: for (i = 0; i < MAX_CODADEVS; i++) { - class_simple_device_remove(MKDEV(CODA_PSDEV_MAJOR, i)); + class_device_destroy(coda_psdev_class, MKDEV(CODA_PSDEV_MAJOR, i)); devfs_remove("coda/%d", i); } - class_simple_destroy(coda_psdev_class); + class_destroy(coda_psdev_class); devfs_remove("coda"); unregister_chrdev(CODA_PSDEV_MAJOR, "coda"); coda_sysctl_clean(); @@ -447,10 +447,10 @@ static void __exit exit_coda(void) printk("coda: failed to unregister filesystem\n"); } for (i = 0; i < MAX_CODADEVS; i++) { - class_simple_device_remove(MKDEV(CODA_PSDEV_MAJOR, i)); + class_device_destroy(coda_psdev_class, MKDEV(CODA_PSDEV_MAJOR, i)); devfs_remove("coda/%d", i); } - class_simple_destroy(coda_psdev_class); + class_destroy(coda_psdev_class); devfs_remove("coda"); unregister_chrdev(CODA_PSDEV_MAJOR, "coda"); coda_sysctl_clean(); diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c index 548556f..efc97d9 100644 --- a/fs/debugfs/file.c +++ b/fs/debugfs/file.c @@ -45,44 +45,15 @@ struct file_operations debugfs_file_operations = { .open = default_open, }; -#define simple_type(type, format, temptype, strtolfn) \ -static ssize_t read_file_##type(struct file *file, char __user *user_buf, \ - size_t count, loff_t *ppos) \ -{ \ - char buf[32]; \ - type *val = file->private_data; \ - \ - snprintf(buf, sizeof(buf), format "\n", *val); \ - return simple_read_from_buffer(user_buf, count, ppos, buf, strlen(buf));\ -} \ -static ssize_t write_file_##type(struct file *file, const char __user *user_buf,\ - size_t count, loff_t *ppos) \ -{ \ - char *endp; \ - char buf[32]; \ - int buf_size; \ - type *val = file->private_data; \ - temptype tmp; \ - \ - memset(buf, 0x00, sizeof(buf)); \ - buf_size = min(count, (sizeof(buf)-1)); \ - if (copy_from_user(buf, user_buf, buf_size)) \ - return -EFAULT; \ - \ - tmp = strtolfn(buf, &endp, 0); \ - if ((endp == buf) || ((type)tmp != tmp)) \ - return -EINVAL; \ - *val = tmp; \ - return count; \ -} \ -static struct file_operations fops_##type = { \ - .read = read_file_##type, \ - .write = write_file_##type, \ - .open = default_open, \ -}; -simple_type(u8, "%c", unsigned long, simple_strtoul); -simple_type(u16, "%hi", unsigned long, simple_strtoul); -simple_type(u32, "%i", unsigned long, simple_strtoul); +static void debugfs_u8_set(void *data, u64 val) +{ + *(u8 *)data = val; +} +static u64 debugfs_u8_get(void *data) +{ + return *(u8 *)data; +} +DEFINE_SIMPLE_ATTRIBUTE(fops_u8, debugfs_u8_get, debugfs_u8_set, "%llu\n"); /** * debugfs_create_u8 - create a file in the debugfs filesystem that is used to read and write a unsigned 8 bit value. @@ -116,6 +87,16 @@ struct dentry *debugfs_create_u8(const char *name, mode_t mode, } EXPORT_SYMBOL_GPL(debugfs_create_u8); +static void debugfs_u16_set(void *data, u64 val) +{ + *(u16 *)data = val; +} +static u64 debugfs_u16_get(void *data) +{ + return *(u16 *)data; +} +DEFINE_SIMPLE_ATTRIBUTE(fops_u16, debugfs_u16_get, debugfs_u16_set, "%llu\n"); + /** * debugfs_create_u16 - create a file in the debugfs filesystem that is used to read and write a unsigned 8 bit value. * @@ -148,6 +129,16 @@ struct dentry *debugfs_create_u16(const char *name, mode_t mode, } EXPORT_SYMBOL_GPL(debugfs_create_u16); +static void debugfs_u32_set(void *data, u64 val) +{ + *(u32 *)data = val; +} +static u64 debugfs_u32_get(void *data) +{ + return *(u32 *)data; +} +DEFINE_SIMPLE_ATTRIBUTE(fops_u32, debugfs_u32_get, debugfs_u32_set, "%llu\n"); + /** * debugfs_create_u32 - create a file in the debugfs filesystem that is used to read and write a unsigned 8 bit value. * @@ -649,6 +649,7 @@ static inline int de_thread(struct task_struct *tsk) } sig->group_exit_task = NULL; sig->notify_count = 0; + sig->real_timer.data = (unsigned long)current; spin_unlock_irq(lock); /* @@ -675,10 +676,8 @@ static inline int de_thread(struct task_struct *tsk) proc_dentry2 = proc_pid_unhash(leader); write_lock_irq(&tasklist_lock); - if (leader->tgid != current->tgid) - BUG(); - if (current->pid == current->tgid) - BUG(); + BUG_ON(leader->tgid != current->tgid); + BUG_ON(current->pid == current->tgid); /* * An exec() starts a new thread group with the * TGID of the previous thread group. Rehash the @@ -726,8 +725,7 @@ static inline int de_thread(struct task_struct *tsk) proc_pid_flush(proc_dentry1); proc_pid_flush(proc_dentry2); - if (exit_state != EXIT_ZOMBIE) - BUG(); + BUG_ON(exit_state != EXIT_ZOMBIE); release_task(leader); } @@ -772,10 +770,8 @@ no_thread_group: kmem_cache_free(sighand_cachep, oldsighand); } - if (!thread_group_empty(current)) - BUG(); - if (!thread_group_leader(current)) - BUG(); + BUG_ON(!thread_group_empty(current)); + BUG_ON(!thread_group_leader(current)); return 0; } diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 2af3338..3a9b6d1 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -122,6 +122,9 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, start_addr = mm->free_area_cache; + if (len <= mm->cached_hole_size) + start_addr = TASK_UNMAPPED_BASE; + full_search: addr = ALIGN(start_addr, HPAGE_SIZE); diff --git a/fs/isofs/dir.c b/fs/isofs/dir.c index 6030956..7901ac9 100644 --- a/fs/isofs/dir.c +++ b/fs/isofs/dir.c @@ -193,12 +193,17 @@ static int do_isofs_readdir(struct inode *inode, struct file *filp, /* Handle everything else. Do name translation if there is no Rock Ridge NM field. */ - if (sbi->s_unhide == 'n') { - /* Do not report hidden or associated files */ - if (de->flags[-sbi->s_high_sierra] & 5) { - filp->f_pos += de_len; - continue; - } + + /* + * Do not report hidden files if so instructed, or associated + * files unless instructed to do so + */ + if ((sbi->s_hide == 'y' && + (de->flags[-sbi->s_high_sierra] & 1)) || + (sbi->s_showassoc =='n' && + (de->flags[-sbi->s_high_sierra] & 4))) { + filp->f_pos += de_len; + continue; } map = 1; diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index abd7b12..1652de1 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -28,11 +28,6 @@ #define BEQUIET -#ifdef LEAK_CHECK -static int check_malloc; -static int check_bread; -#endif - static int isofs_hashi(struct dentry *parent, struct qstr *qstr); static int isofs_hash(struct dentry *parent, struct qstr *qstr); static int isofs_dentry_cmpi(struct dentry *dentry, struct qstr *a, struct qstr *b); @@ -55,11 +50,6 @@ static void isofs_put_super(struct super_block *sb) } #endif -#ifdef LEAK_CHECK - printk("Outstanding mallocs:%d, outstanding buffers: %d\n", - check_malloc, check_bread); -#endif - kfree(sbi); sb->s_fs_info = NULL; return; @@ -73,7 +63,7 @@ static kmem_cache_t *isofs_inode_cachep; static struct inode *isofs_alloc_inode(struct super_block *sb) { struct iso_inode_info *ei; - ei = (struct iso_inode_info *)kmem_cache_alloc(isofs_inode_cachep, SLAB_KERNEL); + ei = kmem_cache_alloc(isofs_inode_cachep, SLAB_KERNEL); if (!ei) return NULL; return &ei->vfs_inode; @@ -84,9 +74,9 @@ static void isofs_destroy_inode(struct inode *inode) kmem_cache_free(isofs_inode_cachep, ISOFS_I(inode)); } -static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) +static void init_once(void *foo, kmem_cache_t * cachep, unsigned long flags) { - struct iso_inode_info *ei = (struct iso_inode_info *) foo; + struct iso_inode_info *ei = foo; if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == SLAB_CTOR_CONSTRUCTOR) @@ -107,7 +97,8 @@ static int init_inodecache(void) static void destroy_inodecache(void) { if (kmem_cache_destroy(isofs_inode_cachep)) - printk(KERN_INFO "iso_inode_cache: not all structures were freed\n"); + printk(KERN_INFO "iso_inode_cache: not all structures were " + "freed\n"); } static int isofs_remount(struct super_block *sb, int *flags, char *data) @@ -144,7 +135,7 @@ static struct dentry_operations isofs_dentry_ops[] = { { .d_hash = isofs_hashi_ms, .d_compare = isofs_dentry_cmpi_ms, - } + }, #endif }; @@ -153,7 +144,8 @@ struct iso9660_options{ char rock; char joliet; char cruft; - char unhide; + char hide; + char showassoc; char nocompress; unsigned char check; unsigned int blocksize; @@ -219,8 +211,8 @@ isofs_hashi_common(struct dentry *dentry, struct qstr *qstr, int ms) /* * Case insensitive compare of two isofs names. */ -static int -isofs_dentry_cmpi_common(struct dentry *dentry,struct qstr *a,struct qstr *b,int ms) +static int isofs_dentry_cmpi_common(struct dentry *dentry, struct qstr *a, + struct qstr *b, int ms) { int alen, blen; @@ -243,8 +235,8 @@ isofs_dentry_cmpi_common(struct dentry *dentry,struct qstr *a,struct qstr *b,int /* * Case sensitive compare of two isofs names. */ -static int -isofs_dentry_cmp_common(struct dentry *dentry,struct qstr *a,struct qstr *b,int ms) +static int isofs_dentry_cmp_common(struct dentry *dentry, struct qstr *a, + struct qstr *b, int ms) { int alen, blen; @@ -318,13 +310,15 @@ enum { Opt_block, Opt_check_r, Opt_check_s, Opt_cruft, Opt_gid, Opt_ignore, Opt_iocharset, Opt_map_a, Opt_map_n, Opt_map_o, Opt_mode, Opt_nojoliet, Opt_norock, Opt_sb, Opt_session, Opt_uid, Opt_unhide, Opt_utf8, Opt_err, - Opt_nocompress, + Opt_nocompress, Opt_hide, Opt_showassoc, }; static match_table_t tokens = { {Opt_norock, "norock"}, {Opt_nojoliet, "nojoliet"}, {Opt_unhide, "unhide"}, + {Opt_hide, "hide"}, + {Opt_showassoc, "showassoc"}, {Opt_cruft, "cruft"}, {Opt_utf8, "utf8"}, {Opt_iocharset, "iocharset=%s"}, @@ -356,7 +350,7 @@ static match_table_t tokens = { {Opt_err, NULL} }; -static int parse_options(char *options, struct iso9660_options * popt) +static int parse_options(char *options, struct iso9660_options *popt) { char *p; int option; @@ -365,7 +359,8 @@ static int parse_options(char *options, struct iso9660_options * popt) popt->rock = 'y'; popt->joliet = 'y'; popt->cruft = 'n'; - popt->unhide = 'n'; + popt->hide = 'n'; + popt->showassoc = 'n'; popt->check = 'u'; /* unset */ popt->nocompress = 0; popt->blocksize = 1024; @@ -398,8 +393,12 @@ static int parse_options(char *options, struct iso9660_options * popt) case Opt_nojoliet: popt->joliet = 'n'; break; + case Opt_hide: + popt->hide = 'y'; + break; case Opt_unhide: - popt->unhide = 'y'; + case Opt_showassoc: + popt->showassoc = 'y'; break; case Opt_cruft: popt->cruft = 'y'; @@ -493,7 +492,7 @@ static int parse_options(char *options, struct iso9660_options * popt) */ #define WE_OBEY_THE_WRITTEN_STANDARDS 1 -static unsigned int isofs_get_last_session(struct super_block *sb,s32 session ) +static unsigned int isofs_get_last_session(struct super_block *sb, s32 session) { struct cdrom_multisession ms_info; unsigned int vol_desc_start; @@ -518,7 +517,8 @@ static unsigned int isofs_get_last_session(struct super_block *sb,s32 session ) printk(KERN_ERR "Invalid session number or type of track\n"); } i = ioctl_by_bdev(bdev, CDROMMULTISESSION, (unsigned long) &ms_info); - if(session > 0) printk(KERN_ERR "Invalid session number\n"); + if (session > 0) + printk(KERN_ERR "Invalid session number\n"); #if 0 printk("isofs.inode: CDROMMULTISESSION: rc=%d\n",i); if (i==0) { @@ -557,13 +557,13 @@ static int isofs_fill_super(struct super_block *s, void *data, int silent) struct iso9660_options opt; struct isofs_sb_info * sbi; - sbi = kmalloc(sizeof(struct isofs_sb_info), GFP_KERNEL); + sbi = kmalloc(sizeof(*sbi), GFP_KERNEL); if (!sbi) return -ENOMEM; s->s_fs_info = sbi; - memset(sbi, 0, sizeof(struct isofs_sb_info)); + memset(sbi, 0, sizeof(*sbi)); - if (!parse_options((char *) data, &opt)) + if (!parse_options((char *)data, &opt)) goto out_freesbi; /* @@ -792,7 +792,8 @@ root_found: sbi->s_rock = (opt.rock == 'y' ? 2 : 0); sbi->s_rock_offset = -1; /* initial offset, will guess until SP is found*/ sbi->s_cruft = opt.cruft; - sbi->s_unhide = opt.unhide; + sbi->s_hide = opt.hide; + sbi->s_showassoc = opt.showassoc; sbi->s_uid = opt.uid; sbi->s_gid = opt.gid; sbi->s_utf8 = opt.utf8; @@ -1002,7 +1003,6 @@ int isofs_get_blocks(struct inode *inode, sector_t iblock_s, rv++; } - abort: unlock_kernel(); return rv; @@ -1014,7 +1014,7 @@ abort: static int isofs_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { - if ( create ) { + if (create) { printk("isofs_get_block: Kernel tries to allocate a block\n"); return -EROFS; } @@ -1061,19 +1061,17 @@ static struct address_space_operations isofs_aops = { static inline void test_and_set_uid(uid_t *p, uid_t value) { - if(value) { + if (value) *p = value; - } } static inline void test_and_set_gid(gid_t *p, gid_t value) { - if(value) { + if (value) *p = value; - } } -static int isofs_read_level3_size(struct inode * inode) +static int isofs_read_level3_size(struct inode *inode) { unsigned long bufsize = ISOFS_BUFFER_SIZE(inode); int high_sierra = ISOFS_SB(inode->i_sb)->s_high_sierra; @@ -1136,7 +1134,7 @@ static int isofs_read_level3_size(struct inode * inode) bh = sb_bread(inode->i_sb, block); if (!bh) goto out_noread; - memcpy((void *) tmpde + slop, bh->b_data, offset); + memcpy((void *)tmpde+slop, bh->b_data, offset); } de = tmpde; } @@ -1150,12 +1148,11 @@ static int isofs_read_level3_size(struct inode * inode) more_entries = de->flags[-high_sierra] & 0x80; i++; - if(i > 100) + if (i > 100) goto out_toomany; - } while(more_entries); + } while (more_entries); out: - if (tmpde) - kfree(tmpde); + kfree(tmpde); if (bh) brelse(bh); return 0; @@ -1179,7 +1176,7 @@ out_toomany: goto out; } -static void isofs_read_inode(struct inode * inode) +static void isofs_read_inode(struct inode *inode) { struct super_block *sb = inode->i_sb; struct isofs_sb_info *sbi = ISOFS_SB(sb); @@ -1249,7 +1246,7 @@ static void isofs_read_inode(struct inode * inode) ei->i_format_parm[2] = 0; ei->i_section_size = isonum_733 (de->size); - if(de->flags[-high_sierra] & 0x80) { + if (de->flags[-high_sierra] & 0x80) { if(isofs_read_level3_size(inode)) goto fail; } else { ei->i_next_section_block = 0; @@ -1336,16 +1333,16 @@ static void isofs_read_inode(struct inode * inode) /* XXX - parse_rock_ridge_inode() had already set i_rdev. */ init_special_inode(inode, inode->i_mode, inode->i_rdev); - out: +out: if (tmpde) kfree(tmpde); if (bh) brelse(bh); return; - out_badread: +out_badread: printk(KERN_WARNING "ISOFS: unable to read i-node block\n"); - fail: +fail: make_bad_inode(inode); goto out; } @@ -1394,11 +1391,8 @@ struct inode *isofs_iget(struct super_block *sb, hashval = (block << sb->s_blocksize_bits) | offset; - inode = iget5_locked(sb, - hashval, - &isofs_iget5_test, - &isofs_iget5_set, - &data); + inode = iget5_locked(sb, hashval, &isofs_iget5_test, + &isofs_iget5_set, &data); if (inode && (inode->i_state & I_NEW)) { sb->s_op->read_inode(inode); @@ -1408,36 +1402,6 @@ struct inode *isofs_iget(struct super_block *sb, return inode; } -#ifdef LEAK_CHECK -#undef malloc -#undef free_s -#undef sb_bread -#undef brelse - -void * leak_check_malloc(unsigned int size){ - void * tmp; - check_malloc++; - tmp = kmalloc(size, GFP_KERNEL); - return tmp; -} - -void leak_check_free_s(void * obj, int size){ - check_malloc--; - return kfree(obj); -} - -struct buffer_head * leak_check_bread(struct super_block *sb, int block){ - check_bread++; - return sb_bread(sb, block); -} - -void leak_check_brelse(struct buffer_head * bh){ - check_bread--; - return brelse(bh); -} - -#endif - static struct super_block *isofs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { diff --git a/fs/isofs/isofs.h b/fs/isofs/isofs.h index 9ce7b51..38c7515 100644 --- a/fs/isofs/isofs.h +++ b/fs/isofs/isofs.h @@ -47,6 +47,8 @@ struct isofs_sb_info { unsigned char s_nosuid; unsigned char s_nodev; unsigned char s_nocompress; + unsigned char s_hide; + unsigned char s_showassoc; mode_t s_mode; gid_t s_gid; diff --git a/fs/isofs/namei.c b/fs/isofs/namei.c index 690edf3..e37e82b 100644 --- a/fs/isofs/namei.c +++ b/fs/isofs/namei.c @@ -131,14 +131,16 @@ isofs_find_entry(struct inode *dir, struct dentry *dentry, } /* - * Skip hidden or associated files unless unhide is set + * Skip hidden or associated files unless hide or showassoc, + * respectively, is set */ match = 0; if (dlen > 0 && - (!(de->flags[-sbi->s_high_sierra] & 5) - || sbi->s_unhide == 'y')) - { - match = (isofs_cmp(dentry,dpnt,dlen) == 0); + (sbi->s_hide =='n' || + (!(de->flags[-sbi->s_high_sierra] & 1))) && + (sbi->s_showassoc =='y' || + (!(de->flags[-sbi->s_high_sierra] & 4)))) { + match = (isofs_cmp(dentry, dpnt, dlen) == 0); } if (match) { isofs_normalize_block_and_offset(de, @@ -146,11 +148,11 @@ isofs_find_entry(struct inode *dir, struct dentry *dentry, &offset_saved); *block_rv = block_saved; *offset_rv = offset_saved; - if (bh) brelse(bh); + brelse(bh); return 1; } } - if (bh) brelse(bh); + brelse(bh); return 0; } diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c index 089e79c..4326cb4 100644 --- a/fs/isofs/rock.c +++ b/fs/isofs/rock.c @@ -13,352 +13,542 @@ #include "isofs.h" #include "rock.h" -/* These functions are designed to read the system areas of a directory record +/* + * These functions are designed to read the system areas of a directory record * and extract relevant information. There are different functions provided * depending upon what information we need at the time. One function fills * out an inode structure, a second one extracts a filename, a third one * returns a symbolic link name, and a fourth one returns the extent number - * for the file. */ - -#define SIG(A,B) ((A) | ((B) << 8)) /* isonum_721() */ - - -/* This is a way of ensuring that we have something in the system - use fields that is compatible with Rock Ridge */ -#define CHECK_SP(FAIL) \ - if(rr->u.SP.magic[0] != 0xbe) FAIL; \ - if(rr->u.SP.magic[1] != 0xef) FAIL; \ - ISOFS_SB(inode->i_sb)->s_rock_offset=rr->u.SP.skip; -/* We define a series of macros because each function must do exactly the - same thing in certain places. We use the macros to ensure that everything - is done correctly */ - -#define CONTINUE_DECLS \ - int cont_extent = 0, cont_offset = 0, cont_size = 0; \ - void *buffer = NULL - -#define CHECK_CE \ - {cont_extent = isonum_733(rr->u.CE.extent); \ - cont_offset = isonum_733(rr->u.CE.offset); \ - cont_size = isonum_733(rr->u.CE.size);} - -#define SETUP_ROCK_RIDGE(DE,CHR,LEN) \ - {LEN= sizeof(struct iso_directory_record) + DE->name_len[0]; \ - if(LEN & 1) LEN++; \ - CHR = ((unsigned char *) DE) + LEN; \ - LEN = *((unsigned char *) DE) - LEN; \ - if (LEN<0) LEN=0; \ - if (ISOFS_SB(inode->i_sb)->s_rock_offset!=-1) \ - { \ - LEN-=ISOFS_SB(inode->i_sb)->s_rock_offset; \ - CHR+=ISOFS_SB(inode->i_sb)->s_rock_offset; \ - if (LEN<0) LEN=0; \ - } \ -} - -#define MAYBE_CONTINUE(LABEL,DEV) \ - {if (buffer) { kfree(buffer); buffer = NULL; } \ - if (cont_extent){ \ - int block, offset, offset1; \ - struct buffer_head * pbh; \ - buffer = kmalloc(cont_size,GFP_KERNEL); \ - if (!buffer) goto out; \ - block = cont_extent; \ - offset = cont_offset; \ - offset1 = 0; \ - pbh = sb_bread(DEV->i_sb, block); \ - if(pbh){ \ - if (offset > pbh->b_size || offset + cont_size > pbh->b_size){ \ - brelse(pbh); \ - goto out; \ - } \ - memcpy(buffer + offset1, pbh->b_data + offset, cont_size - offset1); \ - brelse(pbh); \ - chr = (unsigned char *) buffer; \ - len = cont_size; \ - cont_extent = 0; \ - cont_size = 0; \ - cont_offset = 0; \ - goto LABEL; \ - } \ - printk("Unable to read rock-ridge attributes\n"); \ - }} - -/* return length of name field; 0: not found, -1: to be ignored */ -int get_rock_ridge_filename(struct iso_directory_record * de, - char * retname, struct inode * inode) + * for the file. + */ + +#define SIG(A,B) ((A) | ((B) << 8)) /* isonum_721() */ + +struct rock_state { + void *buffer; + unsigned char *chr; + int len; + int cont_size; + int cont_extent; + int cont_offset; + struct inode *inode; +}; + +/* + * This is a way of ensuring that we have something in the system + * use fields that is compatible with Rock Ridge. Return zero on success. + */ + +static int check_sp(struct rock_ridge *rr, struct inode *inode) { - int len; - unsigned char * chr; - CONTINUE_DECLS; - int retnamlen = 0, truncate=0; - - if (!ISOFS_SB(inode->i_sb)->s_rock) return 0; - *retname = 0; - - SETUP_ROCK_RIDGE(de, chr, len); - repeat: - { - struct rock_ridge * rr; - int sig; - - while (len > 2){ /* There may be one byte for padding somewhere */ - rr = (struct rock_ridge *) chr; - if (rr->len < 3) goto out; /* Something got screwed up here */ - sig = isonum_721(chr); - chr += rr->len; - len -= rr->len; - if (len < 0) goto out; /* corrupted isofs */ - - switch(sig){ - case SIG('R','R'): - if((rr->u.RR.flags[0] & RR_NM) == 0) goto out; - break; - case SIG('S','P'): - CHECK_SP(goto out); - break; - case SIG('C','E'): - CHECK_CE; - break; - case SIG('N','M'): - if (truncate) break; - if (rr->len < 5) break; - /* - * If the flags are 2 or 4, this indicates '.' or '..'. - * We don't want to do anything with this, because it - * screws up the code that calls us. We don't really - * care anyways, since we can just use the non-RR - * name. - */ - if (rr->u.NM.flags & 6) { - break; + if (rr->u.SP.magic[0] != 0xbe) + return -1; + if (rr->u.SP.magic[1] != 0xef) + return -1; + ISOFS_SB(inode->i_sb)->s_rock_offset = rr->u.SP.skip; + return 0; +} + +static void setup_rock_ridge(struct iso_directory_record *de, + struct inode *inode, struct rock_state *rs) +{ + rs->len = sizeof(struct iso_directory_record) + de->name_len[0]; + if (rs->len & 1) + (rs->len)++; + rs->chr = (unsigned char *)de + rs->len; + rs->len = *((unsigned char *)de) - rs->len; + if (rs->len < 0) + rs->len = 0; + + if (ISOFS_SB(inode->i_sb)->s_rock_offset != -1) { + rs->len -= ISOFS_SB(inode->i_sb)->s_rock_offset; + rs->chr += ISOFS_SB(inode->i_sb)->s_rock_offset; + if (rs->len < 0) + rs->len = 0; + } +} + +static void init_rock_state(struct rock_state *rs, struct inode *inode) +{ + memset(rs, 0, sizeof(*rs)); + rs->inode = inode; +} + +/* + * Returns 0 if the caller should continue scanning, 1 if the scan must end + * and -ve on error. + */ +static int rock_continue(struct rock_state *rs) +{ + int ret = 1; + int blocksize = 1 << rs->inode->i_blkbits; + const int min_de_size = offsetof(struct rock_ridge, u); + + kfree(rs->buffer); + rs->buffer = NULL; + + if ((unsigned)rs->cont_offset > blocksize - min_de_size || + (unsigned)rs->cont_size > blocksize || + (unsigned)(rs->cont_offset + rs->cont_size) > blocksize) { + printk(KERN_NOTICE "rock: corrupted directory entry. " + "extent=%d, offset=%d, size=%d\n", + rs->cont_extent, rs->cont_offset, rs->cont_size); + ret = -EIO; + goto out; } - if (rr->u.NM.flags & ~1) { - printk("Unsupported NM flag settings (%d)\n",rr->u.NM.flags); - break; + if (rs->cont_extent) { + struct buffer_head *bh; + + rs->buffer = kmalloc(rs->cont_size, GFP_KERNEL); + if (!rs->buffer) { + ret = -ENOMEM; + goto out; + } + ret = -EIO; + bh = sb_bread(rs->inode->i_sb, rs->cont_extent); + if (bh) { + memcpy(rs->buffer, bh->b_data + rs->cont_offset, + rs->cont_size); + put_bh(bh); + rs->chr = rs->buffer; + rs->len = rs->cont_size; + rs->cont_extent = 0; + rs->cont_size = 0; + rs->cont_offset = 0; + return 0; + } + printk("Unable to read rock-ridge attributes\n"); + } +out: + kfree(rs->buffer); + rs->buffer = NULL; + return ret; +} + +/* + * We think there's a record of type `sig' at rs->chr. Parse the signature + * and make sure that there's really room for a record of that type. + */ +static int rock_check_overflow(struct rock_state *rs, int sig) +{ + int len; + + switch (sig) { + case SIG('S', 'P'): + len = sizeof(struct SU_SP_s); + break; + case SIG('C', 'E'): + len = sizeof(struct SU_CE_s); + break; + case SIG('E', 'R'): + len = sizeof(struct SU_ER_s); + break; + case SIG('R', 'R'): + len = sizeof(struct RR_RR_s); + break; + case SIG('P', 'X'): + len = sizeof(struct RR_PX_s); + break; + case SIG('P', 'N'): + len = sizeof(struct RR_PN_s); + break; + case SIG('S', 'L'): + len = sizeof(struct RR_SL_s); + break; + case SIG('N', 'M'): + len = sizeof(struct RR_NM_s); + break; + case SIG('C', 'L'): + len = sizeof(struct RR_CL_s); + break; + case SIG('P', 'L'): + len = sizeof(struct RR_PL_s); + break; + case SIG('T', 'F'): + len = sizeof(struct RR_TF_s); + break; + case SIG('Z', 'F'): + len = sizeof(struct RR_ZF_s); + break; + default: + len = 0; + break; } - if((strlen(retname) + rr->len - 5) >= 254) { - truncate = 1; - break; + len += offsetof(struct rock_ridge, u); + if (len > rs->len) { + printk(KERN_NOTICE "rock: directory entry would overflow " + "storage\n"); + printk(KERN_NOTICE "rock: sig=0x%02x, size=%d, remaining=%d\n", + sig, len, rs->len); + return -EIO; + } + return 0; +} + +/* + * return length of name field; 0: not found, -1: to be ignored + */ +int get_rock_ridge_filename(struct iso_directory_record *de, + char *retname, struct inode *inode) +{ + struct rock_state rs; + struct rock_ridge *rr; + int sig; + int retnamlen = 0; + int truncate = 0; + int ret = 0; + + if (!ISOFS_SB(inode->i_sb)->s_rock) + return 0; + *retname = 0; + + init_rock_state(&rs, inode); + setup_rock_ridge(de, inode, &rs); +repeat: + + while (rs.len > 2) { /* There may be one byte for padding somewhere */ + rr = (struct rock_ridge *)rs.chr; + if (rr->len < 3) + goto out; /* Something got screwed up here */ + sig = isonum_721(rs.chr); + if (rock_check_overflow(&rs, sig)) + goto eio; + rs.chr += rr->len; + rs.len -= rr->len; + if (rs.len < 0) + goto eio; /* corrupted isofs */ + + switch (sig) { + case SIG('R', 'R'): + if ((rr->u.RR.flags[0] & RR_NM) == 0) + goto out; + break; + case SIG('S', 'P'): + if (check_sp(rr, inode)) + goto out; + break; + case SIG('C', 'E'): + rs.cont_extent = isonum_733(rr->u.CE.extent); + rs.cont_offset = isonum_733(rr->u.CE.offset); + rs.cont_size = isonum_733(rr->u.CE.size); + break; + case SIG('N', 'M'): + if (truncate) + break; + if (rr->len < 5) + break; + /* + * If the flags are 2 or 4, this indicates '.' or '..'. + * We don't want to do anything with this, because it + * screws up the code that calls us. We don't really + * care anyways, since we can just use the non-RR + * name. + */ + if (rr->u.NM.flags & 6) + break; + + if (rr->u.NM.flags & ~1) { + printk("Unsupported NM flag settings (%d)\n", + rr->u.NM.flags); + break; + } + if ((strlen(retname) + rr->len - 5) >= 254) { + truncate = 1; + break; + } + strncat(retname, rr->u.NM.name, rr->len - 5); + retnamlen += rr->len - 5; + break; + case SIG('R', 'E'): + kfree(rs.buffer); + return -1; + default: + break; + } } - strncat(retname, rr->u.NM.name, rr->len - 5); - retnamlen += rr->len - 5; - break; - case SIG('R','E'): - if (buffer) kfree(buffer); - return -1; - default: - break; - } - } - } - MAYBE_CONTINUE(repeat,inode); - if (buffer) kfree(buffer); - return retnamlen; /* If 0, this file did not have a NM field */ - out: - if(buffer) kfree(buffer); - return 0; + ret = rock_continue(&rs); + if (ret == 0) + goto repeat; + if (ret == 1) + return retnamlen; /* If 0, this file did not have a NM field */ +out: + kfree(rs.buffer); + return ret; +eio: + ret = -EIO; + goto out; } static int parse_rock_ridge_inode_internal(struct iso_directory_record *de, struct inode *inode, int regard_xa) { - int len; - unsigned char * chr; - int symlink_len = 0; - CONTINUE_DECLS; - - if (!ISOFS_SB(inode->i_sb)->s_rock) return 0; - - SETUP_ROCK_RIDGE(de, chr, len); - if (regard_xa) - { - chr+=14; - len-=14; - if (len<0) len=0; - } - - repeat: - { - int cnt, sig; - struct inode * reloc; - struct rock_ridge * rr; - int rootflag; - - while (len > 2){ /* There may be one byte for padding somewhere */ - rr = (struct rock_ridge *) chr; - if (rr->len < 3) goto out; /* Something got screwed up here */ - sig = isonum_721(chr); - chr += rr->len; - len -= rr->len; - if (len < 0) goto out; /* corrupted isofs */ - - switch(sig){ + int symlink_len = 0; + int cnt, sig; + struct inode *reloc; + struct rock_ridge *rr; + int rootflag; + struct rock_state rs; + int ret = 0; + + if (!ISOFS_SB(inode->i_sb)->s_rock) + return 0; + + init_rock_state(&rs, inode); + setup_rock_ridge(de, inode, &rs); + if (regard_xa) { + rs.chr += 14; + rs.len -= 14; + if (rs.len < 0) + rs.len = 0; + } + +repeat: + while (rs.len > 2) { /* There may be one byte for padding somewhere */ + rr = (struct rock_ridge *)rs.chr; + if (rr->len < 3) + goto out; /* Something got screwed up here */ + sig = isonum_721(rs.chr); + if (rock_check_overflow(&rs, sig)) + goto eio; + rs.chr += rr->len; + rs.len -= rr->len; + if (rs.len < 0) + goto eio; /* corrupted isofs */ + + switch (sig) { #ifndef CONFIG_ZISOFS /* No flag for SF or ZF */ - case SIG('R','R'): - if((rr->u.RR.flags[0] & - (RR_PX | RR_TF | RR_SL | RR_CL)) == 0) goto out; - break; + case SIG('R', 'R'): + if ((rr->u.RR.flags[0] & + (RR_PX | RR_TF | RR_SL | RR_CL)) == 0) + goto out; + break; #endif - case SIG('S','P'): - CHECK_SP(goto out); - break; - case SIG('C','E'): - CHECK_CE; - break; - case SIG('E','R'): - ISOFS_SB(inode->i_sb)->s_rock = 1; - printk(KERN_DEBUG "ISO 9660 Extensions: "); - { int p; - for(p=0;p<rr->u.ER.len_id;p++) printk("%c",rr->u.ER.data[p]); - } - printk("\n"); - break; - case SIG('P','X'): - inode->i_mode = isonum_733(rr->u.PX.mode); - inode->i_nlink = isonum_733(rr->u.PX.n_links); - inode->i_uid = isonum_733(rr->u.PX.uid); - inode->i_gid = isonum_733(rr->u.PX.gid); - break; - case SIG('P','N'): - { int high, low; - high = isonum_733(rr->u.PN.dev_high); - low = isonum_733(rr->u.PN.dev_low); - /* - * The Rock Ridge standard specifies that if sizeof(dev_t) <= 4, - * then the high field is unused, and the device number is completely - * stored in the low field. Some writers may ignore this subtlety, - * and as a result we test to see if the entire device number is - * stored in the low field, and use that. - */ - if((low & ~0xff) && high == 0) { - inode->i_rdev = MKDEV(low >> 8, low & 0xff); - } else { - inode->i_rdev = MKDEV(high, low); - } - } - break; - case SIG('T','F'): - /* Some RRIP writers incorrectly place ctime in the TF_CREATE field. - Try to handle this correctly for either case. */ - cnt = 0; /* Rock ridge never appears on a High Sierra disk */ - if(rr->u.TF.flags & TF_CREATE) { - inode->i_ctime.tv_sec = iso_date(rr->u.TF.times[cnt++].time, 0); - inode->i_ctime.tv_nsec = 0; - } - if(rr->u.TF.flags & TF_MODIFY) { - inode->i_mtime.tv_sec = iso_date(rr->u.TF.times[cnt++].time, 0); - inode->i_mtime.tv_nsec = 0; - } - if(rr->u.TF.flags & TF_ACCESS) { - inode->i_atime.tv_sec = iso_date(rr->u.TF.times[cnt++].time, 0); - inode->i_atime.tv_nsec = 0; - } - if(rr->u.TF.flags & TF_ATTRIBUTES) { - inode->i_ctime.tv_sec = iso_date(rr->u.TF.times[cnt++].time, 0); - inode->i_ctime.tv_nsec = 0; - } - break; - case SIG('S','L'): - {int slen; - struct SL_component * slp; - struct SL_component * oldslp; - slen = rr->len - 5; - slp = &rr->u.SL.link; - inode->i_size = symlink_len; - while (slen > 1){ - rootflag = 0; - switch(slp->flags &~1){ - case 0: - inode->i_size += slp->len; - break; - case 2: - inode->i_size += 1; - break; - case 4: - inode->i_size += 2; - break; - case 8: - rootflag = 1; - inode->i_size += 1; - break; - default: - printk("Symlink component flag not implemented\n"); - } - slen -= slp->len + 2; - oldslp = slp; - slp = (struct SL_component *) (((char *) slp) + slp->len + 2); - - if(slen < 2) { - if( ((rr->u.SL.flags & 1) != 0) - && ((oldslp->flags & 1) == 0) ) inode->i_size += 1; - break; - } - - /* - * If this component record isn't continued, then append a '/'. - */ - if (!rootflag && (oldslp->flags & 1) == 0) - inode->i_size += 1; - } - } - symlink_len = inode->i_size; - break; - case SIG('R','E'): - printk(KERN_WARNING "Attempt to read inode for relocated directory\n"); - goto out; - case SIG('C','L'): - ISOFS_I(inode)->i_first_extent = isonum_733(rr->u.CL.location); - reloc = isofs_iget(inode->i_sb, ISOFS_I(inode)->i_first_extent, 0); - if (!reloc) - goto out; - inode->i_mode = reloc->i_mode; - inode->i_nlink = reloc->i_nlink; - inode->i_uid = reloc->i_uid; - inode->i_gid = reloc->i_gid; - inode->i_rdev = reloc->i_rdev; - inode->i_size = reloc->i_size; - inode->i_blocks = reloc->i_blocks; - inode->i_atime = reloc->i_atime; - inode->i_ctime = reloc->i_ctime; - inode->i_mtime = reloc->i_mtime; - iput(reloc); - break; + case SIG('S', 'P'): + if (check_sp(rr, inode)) + goto out; + break; + case SIG('C', 'E'): + rs.cont_extent = isonum_733(rr->u.CE.extent); + rs.cont_offset = isonum_733(rr->u.CE.offset); + rs.cont_size = isonum_733(rr->u.CE.size); + break; + case SIG('E', 'R'): + ISOFS_SB(inode->i_sb)->s_rock = 1; + printk(KERN_DEBUG "ISO 9660 Extensions: "); + { + int p; + for (p = 0; p < rr->u.ER.len_id; p++) + printk("%c", rr->u.ER.data[p]); + } + printk("\n"); + break; + case SIG('P', 'X'): + inode->i_mode = isonum_733(rr->u.PX.mode); + inode->i_nlink = isonum_733(rr->u.PX.n_links); + inode->i_uid = isonum_733(rr->u.PX.uid); + inode->i_gid = isonum_733(rr->u.PX.gid); + break; + case SIG('P', 'N'): + { + int high, low; + high = isonum_733(rr->u.PN.dev_high); + low = isonum_733(rr->u.PN.dev_low); + /* + * The Rock Ridge standard specifies that if + * sizeof(dev_t) <= 4, then the high field is + * unused, and the device number is completely + * stored in the low field. Some writers may + * ignore this subtlety, + * and as a result we test to see if the entire + * device number is + * stored in the low field, and use that. + */ + if ((low & ~0xff) && high == 0) { + inode->i_rdev = + MKDEV(low >> 8, low & 0xff); + } else { + inode->i_rdev = + MKDEV(high, low); + } + } + break; + case SIG('T', 'F'): + /* + * Some RRIP writers incorrectly place ctime in the + * TF_CREATE field. Try to handle this correctly for + * either case. + */ + /* Rock ridge never appears on a High Sierra disk */ + cnt = 0; + if (rr->u.TF.flags & TF_CREATE) { + inode->i_ctime.tv_sec = + iso_date(rr->u.TF.times[cnt++].time, + 0); + inode->i_ctime.tv_nsec = 0; + } + if (rr->u.TF.flags & TF_MODIFY) { + inode->i_mtime.tv_sec = + iso_date(rr->u.TF.times[cnt++].time, + 0); + inode->i_mtime.tv_nsec = 0; + } + if (rr->u.TF.flags & TF_ACCESS) { + inode->i_atime.tv_sec = + iso_date(rr->u.TF.times[cnt++].time, + 0); + inode->i_atime.tv_nsec = 0; + } + if (rr->u.TF.flags & TF_ATTRIBUTES) { + inode->i_ctime.tv_sec = + iso_date(rr->u.TF.times[cnt++].time, + 0); + inode->i_ctime.tv_nsec = 0; + } + break; + case SIG('S', 'L'): + { + int slen; + struct SL_component *slp; + struct SL_component *oldslp; + slen = rr->len - 5; + slp = &rr->u.SL.link; + inode->i_size = symlink_len; + while (slen > 1) { + rootflag = 0; + switch (slp->flags & ~1) { + case 0: + inode->i_size += + slp->len; + break; + case 2: + inode->i_size += 1; + break; + case 4: + inode->i_size += 2; + break; + case 8: + rootflag = 1; + inode->i_size += 1; + break; + default: + printk("Symlink component flag " + "not implemented\n"); + } + slen -= slp->len + 2; + oldslp = slp; + slp = (struct SL_component *) + (((char *)slp) + slp->len + 2); + + if (slen < 2) { + if (((rr->u.SL. + flags & 1) != 0) + && + ((oldslp-> + flags & 1) == 0)) + inode->i_size += + 1; + break; + } + + /* + * If this component record isn't + * continued, then append a '/'. + */ + if (!rootflag + && (oldslp->flags & 1) == 0) + inode->i_size += 1; + } + } + symlink_len = inode->i_size; + break; + case SIG('R', 'E'): + printk(KERN_WARNING "Attempt to read inode for " + "relocated directory\n"); + goto out; + case SIG('C', 'L'): + ISOFS_I(inode)->i_first_extent = + isonum_733(rr->u.CL.location); + reloc = + isofs_iget(inode->i_sb, + ISOFS_I(inode)->i_first_extent, + 0); + if (!reloc) + goto out; + inode->i_mode = reloc->i_mode; + inode->i_nlink = reloc->i_nlink; + inode->i_uid = reloc->i_uid; + inode->i_gid = reloc->i_gid; + inode->i_rdev = reloc->i_rdev; + inode->i_size = reloc->i_size; + inode->i_blocks = reloc->i_blocks; + inode->i_atime = reloc->i_atime; + inode->i_ctime = reloc->i_ctime; + inode->i_mtime = reloc->i_mtime; + iput(reloc); + break; #ifdef CONFIG_ZISOFS - case SIG('Z','F'): - if ( !ISOFS_SB(inode->i_sb)->s_nocompress ) { - int algo; - algo = isonum_721(rr->u.ZF.algorithm); - if ( algo == SIG('p','z') ) { - int block_shift = isonum_711(&rr->u.ZF.parms[1]); - if ( block_shift < PAGE_CACHE_SHIFT || block_shift > 17 ) { - printk(KERN_WARNING "isofs: Can't handle ZF block size of 2^%d\n", block_shift); - } else { - /* Note: we don't change i_blocks here */ - ISOFS_I(inode)->i_file_format = isofs_file_compressed; - /* Parameters to compression algorithm (header size, block size) */ - ISOFS_I(inode)->i_format_parm[0] = isonum_711(&rr->u.ZF.parms[0]); - ISOFS_I(inode)->i_format_parm[1] = isonum_711(&rr->u.ZF.parms[1]); - inode->i_size = isonum_733(rr->u.ZF.real_size); - } - } else { - printk(KERN_WARNING "isofs: Unknown ZF compression algorithm: %c%c\n", - rr->u.ZF.algorithm[0], rr->u.ZF.algorithm[1]); - } - } - break; + case SIG('Z', 'F'): { + int algo; + + if (ISOFS_SB(inode->i_sb)->s_nocompress) + break; + algo = isonum_721(rr->u.ZF.algorithm); + if (algo == SIG('p', 'z')) { + int block_shift = + isonum_711(&rr->u.ZF.parms[1]); + if (block_shift < PAGE_CACHE_SHIFT + || block_shift > 17) { + printk(KERN_WARNING "isofs: " + "Can't handle ZF block " + "size of 2^%d\n", + block_shift); + } else { + /* + * Note: we don't change + * i_blocks here + */ + ISOFS_I(inode)->i_file_format = + isofs_file_compressed; + /* + * Parameters to compression + * algorithm (header size, + * block size) + */ + ISOFS_I(inode)->i_format_parm[0] = + isonum_711(&rr->u.ZF.parms[0]); + ISOFS_I(inode)->i_format_parm[1] = + isonum_711(&rr->u.ZF.parms[1]); + inode->i_size = + isonum_733(rr->u.ZF. + real_size); + } + } else { + printk(KERN_WARNING + "isofs: Unknown ZF compression " + "algorithm: %c%c\n", + rr->u.ZF.algorithm[0], + rr->u.ZF.algorithm[1]); + } + break; + } #endif - default: - break; - } - } - } - MAYBE_CONTINUE(repeat,inode); - out: - if(buffer) kfree(buffer); - return 0; + default: + break; + } + } + ret = rock_continue(&rs); + if (ret == 0) + goto repeat; + if (ret == 1) + ret = 0; +out: + kfree(rs.buffer); + return ret; +eio: + ret = -EIO; + goto out; } static char *get_symlink_chunk(char *rpnt, struct rock_ridge *rr, char *plimit) @@ -376,32 +566,32 @@ static char *get_symlink_chunk(char *rpnt, struct rock_ridge *rr, char *plimit) if (slp->len > plimit - rpnt) return NULL; memcpy(rpnt, slp->text, slp->len); - rpnt+=slp->len; + rpnt += slp->len; break; case 2: if (rpnt >= plimit) return NULL; - *rpnt++='.'; + *rpnt++ = '.'; break; case 4: if (2 > plimit - rpnt) return NULL; - *rpnt++='.'; - *rpnt++='.'; + *rpnt++ = '.'; + *rpnt++ = '.'; break; case 8: if (rpnt >= plimit) return NULL; rootflag = 1; - *rpnt++='/'; + *rpnt++ = '/'; break; default: printk("Symlink component flag not implemented (%d)\n", - slp->flags); + slp->flags); } slen -= slp->len + 2; oldslp = slp; - slp = (struct SL_component *) ((char *) slp + slp->len + 2); + slp = (struct SL_component *)((char *)slp + slp->len + 2); if (slen < 2) { /* @@ -412,7 +602,7 @@ static char *get_symlink_chunk(char *rpnt, struct rock_ridge *rr, char *plimit) !(oldslp->flags & 1)) { if (rpnt >= plimit) return NULL; - *rpnt++='/'; + *rpnt++ = '/'; } break; } @@ -423,59 +613,61 @@ static char *get_symlink_chunk(char *rpnt, struct rock_ridge *rr, char *plimit) if (!rootflag && !(oldslp->flags & 1)) { if (rpnt >= plimit) return NULL; - *rpnt++='/'; + *rpnt++ = '/'; } } return rpnt; } -int parse_rock_ridge_inode(struct iso_directory_record * de, - struct inode * inode) +int parse_rock_ridge_inode(struct iso_directory_record *de, struct inode *inode) { - int result=parse_rock_ridge_inode_internal(de,inode,0); - /* if rockridge flag was reset and we didn't look for attributes - * behind eventual XA attributes, have a look there */ - if ((ISOFS_SB(inode->i_sb)->s_rock_offset==-1) - &&(ISOFS_SB(inode->i_sb)->s_rock==2)) - { - result=parse_rock_ridge_inode_internal(de,inode,14); - } - return result; -} + int result = parse_rock_ridge_inode_internal(de, inode, 0); -/* readpage() for symlinks: reads symlink contents into the page and either - makes it uptodate and returns 0 or returns error (-EIO) */ + /* + * if rockridge flag was reset and we didn't look for attributes + * behind eventual XA attributes, have a look there + */ + if ((ISOFS_SB(inode->i_sb)->s_rock_offset == -1) + && (ISOFS_SB(inode->i_sb)->s_rock == 2)) { + result = parse_rock_ridge_inode_internal(de, inode, 14); + } + return result; +} +/* + * readpage() for symlinks: reads symlink contents into the page and either + * makes it uptodate and returns 0 or returns error (-EIO) + */ static int rock_ridge_symlink_readpage(struct file *file, struct page *page) { struct inode *inode = page->mapping->host; - struct iso_inode_info *ei = ISOFS_I(inode); + struct iso_inode_info *ei = ISOFS_I(inode); char *link = kmap(page); unsigned long bufsize = ISOFS_BUFFER_SIZE(inode); struct buffer_head *bh; char *rpnt = link; unsigned char *pnt; - struct iso_directory_record *raw_inode; - CONTINUE_DECLS; + struct iso_directory_record *raw_de; unsigned long block, offset; int sig; - int len; - unsigned char *chr; struct rock_ridge *rr; + struct rock_state rs; + int ret; if (!ISOFS_SB(inode->i_sb)->s_rock) goto error; + init_rock_state(&rs, inode); block = ei->i_iget5_block; lock_kernel(); bh = sb_bread(inode->i_sb, block); if (!bh) goto out_noread; - offset = ei->i_iget5_offset; - pnt = (unsigned char *) bh->b_data + offset; + offset = ei->i_iget5_offset; + pnt = (unsigned char *)bh->b_data + offset; - raw_inode = (struct iso_directory_record *) pnt; + raw_de = (struct iso_directory_record *)pnt; /* * If we go past the end of the buffer, there is some sort of error. @@ -483,20 +675,24 @@ static int rock_ridge_symlink_readpage(struct file *file, struct page *page) if (offset + *pnt > bufsize) goto out_bad_span; - /* Now test for possible Rock Ridge extensions which will override - some of these numbers in the inode structure. */ + /* + * Now test for possible Rock Ridge extensions which will override + * some of these numbers in the inode structure. + */ - SETUP_ROCK_RIDGE(raw_inode, chr, len); + setup_rock_ridge(raw_de, inode, &rs); - repeat: - while (len > 2) { /* There may be one byte for padding somewhere */ - rr = (struct rock_ridge *) chr; +repeat: + while (rs.len > 2) { /* There may be one byte for padding somewhere */ + rr = (struct rock_ridge *)rs.chr; if (rr->len < 3) goto out; /* Something got screwed up here */ - sig = isonum_721(chr); - chr += rr->len; - len -= rr->len; - if (len < 0) + sig = isonum_721(rs.chr); + if (rock_check_overflow(&rs, sig)) + goto out; + rs.chr += rr->len; + rs.len -= rr->len; + if (rs.len < 0) goto out; /* corrupted isofs */ switch (sig) { @@ -505,7 +701,8 @@ static int rock_ridge_symlink_readpage(struct file *file, struct page *page) goto out; break; case SIG('S', 'P'): - CHECK_SP(goto out); + if (check_sp(rr, inode)) + goto out; break; case SIG('S', 'L'): rpnt = get_symlink_chunk(rpnt, rr, @@ -515,14 +712,18 @@ static int rock_ridge_symlink_readpage(struct file *file, struct page *page) break; case SIG('C', 'E'): /* This tells is if there is a continuation record */ - CHECK_CE; + rs.cont_extent = isonum_733(rr->u.CE.extent); + rs.cont_offset = isonum_733(rr->u.CE.offset); + rs.cont_size = isonum_733(rr->u.CE.size); default: break; } } - MAYBE_CONTINUE(repeat, inode); - if (buffer) - kfree(buffer); + ret = rock_continue(&rs); + if (ret == 0) + goto repeat; + if (ret < 0) + goto fail; if (rpnt == link) goto fail; @@ -535,19 +736,18 @@ static int rock_ridge_symlink_readpage(struct file *file, struct page *page) return 0; /* error exit from macro */ - out: - if (buffer) - kfree(buffer); +out: + kfree(rs.buffer); goto fail; - out_noread: +out_noread: printk("unable to read i-node block"); goto fail; - out_bad_span: +out_bad_span: printk("symlink spans iso9660 blocks\n"); - fail: +fail: brelse(bh); unlock_kernel(); - error: +error: SetPageError(page); kunmap(page); unlock_page(page); @@ -555,5 +755,5 @@ static int rock_ridge_symlink_readpage(struct file *file, struct page *page) } struct address_space_operations isofs_symlink_aops = { - .readpage = rock_ridge_symlink_readpage + .readpage = rock_ridge_symlink_readpage }; diff --git a/fs/isofs/rock.h b/fs/isofs/rock.h index deaf5c8..ed09e2b 100644 --- a/fs/isofs/rock.h +++ b/fs/isofs/rock.h @@ -1,85 +1,88 @@ -/* These structs are used by the system-use-sharing protocol, in which the - Rock Ridge extensions are embedded. It is quite possible that other - extensions are present on the disk, and this is fine as long as they - all use SUSP */ - -struct SU_SP{ - unsigned char magic[2]; - unsigned char skip; -} __attribute__((packed)); - -struct SU_CE{ - char extent[8]; - char offset[8]; - char size[8]; +/* + * These structs are used by the system-use-sharing protocol, in which the + * Rock Ridge extensions are embedded. It is quite possible that other + * extensions are present on the disk, and this is fine as long as they + * all use SUSP + */ + +struct SU_SP_s { + unsigned char magic[2]; + unsigned char skip; +} __attribute__ ((packed)); + +struct SU_CE_s { + char extent[8]; + char offset[8]; + char size[8]; }; -struct SU_ER{ - unsigned char len_id; - unsigned char len_des; - unsigned char len_src; - unsigned char ext_ver; - char data[0]; -} __attribute__((packed)); - -struct RR_RR{ - char flags[1]; -} __attribute__((packed)); - -struct RR_PX{ - char mode[8]; - char n_links[8]; - char uid[8]; - char gid[8]; +struct SU_ER_s { + unsigned char len_id; + unsigned char len_des; + unsigned char len_src; + unsigned char ext_ver; + char data[0]; +} __attribute__ ((packed)); + +struct RR_RR_s { + char flags[1]; +} __attribute__ ((packed)); + +struct RR_PX_s { + char mode[8]; + char n_links[8]; + char uid[8]; + char gid[8]; }; -struct RR_PN{ - char dev_high[8]; - char dev_low[8]; +struct RR_PN_s { + char dev_high[8]; + char dev_low[8]; }; +struct SL_component { + unsigned char flags; + unsigned char len; + char text[0]; +} __attribute__ ((packed)); -struct SL_component{ - unsigned char flags; - unsigned char len; - char text[0]; -} __attribute__((packed)); +struct RR_SL_s { + unsigned char flags; + struct SL_component link; +} __attribute__ ((packed)); -struct RR_SL{ - unsigned char flags; - struct SL_component link; -} __attribute__((packed)); +struct RR_NM_s { + unsigned char flags; + char name[0]; +} __attribute__ ((packed)); -struct RR_NM{ - unsigned char flags; - char name[0]; -} __attribute__((packed)); - -struct RR_CL{ - char location[8]; +struct RR_CL_s { + char location[8]; }; -struct RR_PL{ - char location[8]; +struct RR_PL_s { + char location[8]; }; -struct stamp{ - char time[7]; -} __attribute__((packed)); +struct stamp { + char time[7]; +} __attribute__ ((packed)); -struct RR_TF{ - char flags; - struct stamp times[0]; /* Variable number of these beasts */ -} __attribute__((packed)); +struct RR_TF_s { + char flags; + struct stamp times[0]; /* Variable number of these beasts */ +} __attribute__ ((packed)); /* Linux-specific extension for transparent decompression */ -struct RR_ZF{ - char algorithm[2]; - char parms[2]; - char real_size[8]; +struct RR_ZF_s { + char algorithm[2]; + char parms[2]; + char real_size[8]; }; -/* These are the bits and their meanings for flags in the TF structure. */ +/* + * These are the bits and their meanings for flags in the TF structure. + */ #define TF_CREATE 1 #define TF_MODIFY 2 #define TF_ACCESS 4 @@ -89,31 +92,31 @@ struct RR_ZF{ #define TF_EFFECTIVE 64 #define TF_LONG_FORM 128 -struct rock_ridge{ - char signature[2]; - unsigned char len; - unsigned char version; - union{ - struct SU_SP SP; - struct SU_CE CE; - struct SU_ER ER; - struct RR_RR RR; - struct RR_PX PX; - struct RR_PN PN; - struct RR_SL SL; - struct RR_NM NM; - struct RR_CL CL; - struct RR_PL PL; - struct RR_TF TF; - struct RR_ZF ZF; - } u; +struct rock_ridge { + char signature[2]; + unsigned char len; + unsigned char version; + union { + struct SU_SP_s SP; + struct SU_CE_s CE; + struct SU_ER_s ER; + struct RR_RR_s RR; + struct RR_PX_s PX; + struct RR_PN_s PN; + struct RR_SL_s SL; + struct RR_NM_s NM; + struct RR_CL_s CL; + struct RR_PL_s PL; + struct RR_TF_s TF; + struct RR_ZF_s ZF; + } u; }; -#define RR_PX 1 /* POSIX attributes */ -#define RR_PN 2 /* POSIX devices */ -#define RR_SL 4 /* Symbolic link */ -#define RR_NM 8 /* Alternate Name */ -#define RR_CL 16 /* Child link */ -#define RR_PL 32 /* Parent link */ -#define RR_RE 64 /* Relocation directory */ -#define RR_TF 128 /* Timestamps */ +#define RR_PX 1 /* POSIX attributes */ +#define RR_PN 2 /* POSIX devices */ +#define RR_SL 4 /* Symbolic link */ +#define RR_NM 8 /* Alternate Name */ +#define RR_CL 16 /* Child link */ +#define RR_PL 32 /* Parent link */ +#define RR_RE 64 /* Relocation directory */ +#define RR_TF 128 /* Timestamps */ diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c index 8d2a9ab..30a2bf9 100644 --- a/fs/jfs/acl.c +++ b/fs/jfs/acl.c @@ -70,8 +70,7 @@ static struct posix_acl *jfs_get_acl(struct inode *inode, int type) if (!IS_ERR(acl)) *p_acl = posix_acl_dup(acl); } - if (value) - kfree(value); + kfree(value); return acl; } @@ -112,8 +111,7 @@ static int jfs_set_acl(struct inode *inode, int type, struct posix_acl *acl) } rc = __jfs_setxattr(inode, ea_name, value, size, 0); out: - if (value) - kfree(value); + kfree(value); if (!rc) { if (*p_acl && (*p_acl != JFS_ACL_NOT_CACHED)) diff --git a/fs/jfs/file.c b/fs/jfs/file.c index a87b06f..c2c19c9 100644 --- a/fs/jfs/file.c +++ b/fs/jfs/file.c @@ -1,6 +1,6 @@ /* - * Copyright (c) International Business Machines Corp., 2000-2002 - * Portions Copyright (c) Christoph Hellwig, 2001-2002 + * Copyright (C) International Business Machines Corp., 2000-2002 + * Portions Copyright (C) Christoph Hellwig, 2001-2002 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -19,16 +19,13 @@ #include <linux/fs.h> #include "jfs_incore.h" +#include "jfs_inode.h" #include "jfs_dmap.h" #include "jfs_txnmgr.h" #include "jfs_xattr.h" #include "jfs_acl.h" #include "jfs_debug.h" - -extern int jfs_commit_inode(struct inode *, int); -extern void jfs_truncate(struct inode *); - int jfs_fsync(struct file *file, struct dentry *dentry, int datasync) { struct inode *inode = dentry->d_inode; diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c index 24a6891..2137138 100644 --- a/fs/jfs/inode.c +++ b/fs/jfs/inode.c @@ -23,6 +23,7 @@ #include <linux/pagemap.h> #include <linux/quotaops.h> #include "jfs_incore.h" +#include "jfs_inode.h" #include "jfs_filsys.h" #include "jfs_imap.h" #include "jfs_extent.h" @@ -30,14 +31,6 @@ #include "jfs_debug.h" -extern struct inode_operations jfs_dir_inode_operations; -extern struct inode_operations jfs_file_inode_operations; -extern struct inode_operations jfs_symlink_inode_operations; -extern struct file_operations jfs_dir_operations; -extern struct file_operations jfs_file_operations; -struct address_space_operations jfs_aops; -extern int freeZeroLink(struct inode *); - void jfs_read_inode(struct inode *inode) { if (diRead(inode)) { @@ -136,7 +129,7 @@ void jfs_delete_inode(struct inode *inode) jfs_info("In jfs_delete_inode, inode = 0x%p", inode); if (test_cflag(COMMIT_Freewmap, inode)) - freeZeroLink(inode); + jfs_free_zero_link(inode); diFree(inode); diff --git a/fs/jfs/jfs_debug.c b/fs/jfs/jfs_debug.c index 91a0a88..4caea6b 100644 --- a/fs/jfs/jfs_debug.c +++ b/fs/jfs/jfs_debug.c @@ -58,8 +58,6 @@ void dump_mem(char *label, void *data, int length) static struct proc_dir_entry *base; #ifdef CONFIG_JFS_DEBUG -extern read_proc_t jfs_txanchor_read; - static int loglevel_read(char *page, char **start, off_t off, int count, int *eof, void *data) { @@ -97,14 +95,6 @@ static int loglevel_write(struct file *file, const char __user *buffer, } #endif - -#ifdef CONFIG_JFS_STATISTICS -extern read_proc_t jfs_lmstats_read; -extern read_proc_t jfs_txstats_read; -extern read_proc_t jfs_xtstat_read; -extern read_proc_t jfs_mpstat_read; -#endif - static struct { const char *name; read_proc_t *read_fn; diff --git a/fs/jfs/jfs_debug.h b/fs/jfs/jfs_debug.h index a38079a..ddffbbd 100644 --- a/fs/jfs/jfs_debug.h +++ b/fs/jfs/jfs_debug.h @@ -1,6 +1,6 @@ /* - * Copyright (c) International Business Machines Corp., 2000-2002 - * Portions Copyright (c) Christoph Hellwig, 2001-2002 + * Copyright (C) International Business Machines Corp., 2000-2002 + * Portions Copyright (C) Christoph Hellwig, 2001-2002 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -31,7 +31,9 @@ * CONFIG_JFS_DEBUG or CONFIG_JFS_STATISTICS is defined */ #if defined(CONFIG_PROC_FS) && (defined(CONFIG_JFS_DEBUG) || defined(CONFIG_JFS_STATISTICS)) - #define PROC_FS_JFS +#define PROC_FS_JFS +extern void jfs_proc_init(void); +extern void jfs_proc_clean(void); #endif /* @@ -65,8 +67,8 @@ extern int jfsloglevel; -/* dump memory contents */ extern void dump_mem(char *label, void *data, int length); +extern int jfs_txanchor_read(char *, char **, off_t, int, int *, void *); /* information message: e.g., configuration, major event */ #define jfs_info(fmt, arg...) do { \ @@ -110,6 +112,11 @@ extern void dump_mem(char *label, void *data, int length); * ---------- */ #ifdef CONFIG_JFS_STATISTICS +extern int jfs_lmstats_read(char *, char **, off_t, int, int *, void *); +extern int jfs_txstats_read(char *, char **, off_t, int, int *, void *); +extern int jfs_mpstat_read(char *, char **, off_t, int, int *, void *); +extern int jfs_xtstat_read(char *, char **, off_t, int, int *, void *); + #define INCREMENT(x) ((x)++) #define DECREMENT(x) ((x)--) #define HIGHWATERMARK(x,y) ((x) = max((x), (y))) diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c index 69007fd..cced2fe 100644 --- a/fs/jfs/jfs_dmap.c +++ b/fs/jfs/jfs_dmap.c @@ -272,7 +272,6 @@ int dbMount(struct inode *ipbmap) int dbUnmount(struct inode *ipbmap, int mounterror) { struct bmap *bmp = JFS_SBI(ipbmap->i_sb)->bmap; - int i; if (!(mounterror || isReadOnly(ipbmap))) dbSync(ipbmap); @@ -282,14 +281,6 @@ int dbUnmount(struct inode *ipbmap, int mounterror) */ truncate_inode_pages(ipbmap->i_mapping, 0); - /* - * Sanity Check - */ - for (i = 0; i < bmp->db_numag; i++) - if (atomic_read(&bmp->db_active[i])) - printk(KERN_ERR "dbUnmount: db_active[%d] = %d\n", - i, atomic_read(&bmp->db_active[i])); - /* free the memory for the in-memory bmap. */ kfree(bmp); diff --git a/fs/jfs/jfs_dtree.c b/fs/jfs/jfs_dtree.c index ac41f72..8676aee 100644 --- a/fs/jfs/jfs_dtree.c +++ b/fs/jfs/jfs_dtree.c @@ -2931,6 +2931,9 @@ static void add_missing_indices(struct inode *inode, s64 bn) ASSERT(p->header.flag & BT_LEAF); tlck = txLock(tid, inode, mp, tlckDTREE | tlckENTRY); + if (BT_IS_ROOT(mp)) + tlck->type |= tlckBTROOT; + dtlck = (struct dt_lock *) &tlck->lock; stbl = DT_GETSTBL(p); diff --git a/fs/jfs/jfs_extent.c b/fs/jfs/jfs_extent.c index 1953acb..4879603 100644 --- a/fs/jfs/jfs_extent.c +++ b/fs/jfs/jfs_extent.c @@ -19,6 +19,7 @@ #include <linux/fs.h> #include <linux/quotaops.h> #include "jfs_incore.h" +#include "jfs_inode.h" #include "jfs_superblock.h" #include "jfs_dmap.h" #include "jfs_extent.h" @@ -33,12 +34,6 @@ static int extBrealloc(struct inode *, s64, s64, s64 *, s64 *); #endif static s64 extRoundDown(s64 nb); -/* - * external references - */ -extern int jfs_commit_inode(struct inode *, int); - - #define DPD(a) (printk("(a): %d\n",(a))) #define DPC(a) (printk("(a): %c\n",(a))) #define DPL1(a) \ diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c index 7acff2c..971af29 100644 --- a/fs/jfs/jfs_imap.c +++ b/fs/jfs/jfs_imap.c @@ -47,6 +47,7 @@ #include <linux/quotaops.h> #include "jfs_incore.h" +#include "jfs_inode.h" #include "jfs_filsys.h" #include "jfs_dinode.h" #include "jfs_dmap.h" @@ -69,11 +70,6 @@ #define AG_UNLOCK(imap,agno) up(&imap->im_aglock[agno]) /* - * external references - */ -extern struct address_space_operations jfs_aops; - -/* * forward references */ static int diAllocAG(struct inomap *, int, boolean_t, struct inode *); diff --git a/fs/jfs/jfs_inode.c b/fs/jfs/jfs_inode.c index 84f2459..2af5efb 100644 --- a/fs/jfs/jfs_inode.c +++ b/fs/jfs/jfs_inode.c @@ -19,6 +19,7 @@ #include <linux/fs.h> #include <linux/quotaops.h> #include "jfs_incore.h" +#include "jfs_inode.h" #include "jfs_filsys.h" #include "jfs_imap.h" #include "jfs_dinode.h" diff --git a/fs/jfs/jfs_inode.h b/fs/jfs/jfs_inode.h index 3df91fb..b54bac5 100644 --- a/fs/jfs/jfs_inode.h +++ b/fs/jfs/jfs_inode.h @@ -1,5 +1,5 @@ /* - * Copyright (c) International Business Machines Corp., 2000-2001 + * Copyright (C) International Business Machines Corp., 2000-2001 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -19,5 +19,22 @@ #define _H_JFS_INODE extern struct inode *ialloc(struct inode *, umode_t); +extern int jfs_fsync(struct file *, struct dentry *, int); +extern void jfs_read_inode(struct inode *); +extern int jfs_commit_inode(struct inode *, int); +extern int jfs_write_inode(struct inode*, int); +extern void jfs_delete_inode(struct inode *); +extern void jfs_dirty_inode(struct inode *); +extern void jfs_truncate(struct inode *); +extern void jfs_truncate_nolock(struct inode *, loff_t); +extern void jfs_free_zero_link(struct inode *); +extern struct dentry *jfs_get_parent(struct dentry *dentry); +extern struct address_space_operations jfs_aops; +extern struct inode_operations jfs_dir_inode_operations; +extern struct file_operations jfs_dir_operations; +extern struct inode_operations jfs_file_inode_operations; +extern struct file_operations jfs_file_operations; +extern struct inode_operations jfs_symlink_inode_operations; +extern struct dentry_operations jfs_ci_dentry_operations; #endif /* _H_JFS_INODE */ diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c index dfa1200..7c8387e 100644 --- a/fs/jfs/jfs_logmgr.c +++ b/fs/jfs/jfs_logmgr.c @@ -71,6 +71,7 @@ #include "jfs_incore.h" #include "jfs_filsys.h" #include "jfs_metapage.h" +#include "jfs_superblock.h" #include "jfs_txnmgr.h" #include "jfs_debug.h" @@ -167,14 +168,6 @@ static struct jfs_log *dummy_log = NULL; static DECLARE_MUTEX(jfs_log_sem); /* - * external references - */ -extern void txLazyUnlock(struct tblock * tblk); -extern int jfs_stop_threads; -extern struct completion jfsIOwait; -extern int jfs_tlocks_low; - -/* * forward references */ static int lmWriteRecord(struct jfs_log * log, struct tblock * tblk, @@ -1624,6 +1617,8 @@ void jfs_flush_journal(struct jfs_log *log, int wait) } } assert(list_empty(&log->cqueue)); + +#ifdef CONFIG_JFS_DEBUG if (!list_empty(&log->synclist)) { struct logsyncblk *lp; @@ -1638,9 +1633,8 @@ void jfs_flush_journal(struct jfs_log *log, int wait) dump_mem("orphan tblock", lp, sizeof(struct tblock)); } -// current->state = TASK_INTERRUPTIBLE; -// schedule(); } +#endif //assert(list_empty(&log->synclist)); clear_bit(log_FLUSH, &log->flag); } diff --git a/fs/jfs/jfs_logmgr.h b/fs/jfs/jfs_logmgr.h index 51291fb..747114c 100644 --- a/fs/jfs/jfs_logmgr.h +++ b/fs/jfs/jfs_logmgr.h @@ -507,6 +507,8 @@ extern int lmLogClose(struct super_block *sb); extern int lmLogShutdown(struct jfs_log * log); extern int lmLogInit(struct jfs_log * log); extern int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize); +extern int lmGroupCommit(struct jfs_log *, struct tblock *); +extern int jfsIOWait(void *); extern void jfs_flush_journal(struct jfs_log * log, int wait); extern void jfs_syncpt(struct jfs_log *log); diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c index 41bf078..6c5485d 100644 --- a/fs/jfs/jfs_metapage.c +++ b/fs/jfs/jfs_metapage.c @@ -198,7 +198,7 @@ static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags) } } -static inline struct metapage *alloc_metapage(int gfp_mask) +static inline struct metapage *alloc_metapage(unsigned int gfp_mask) { return mempool_alloc(metapage_mempool, gfp_mask); } @@ -726,12 +726,12 @@ void force_metapage(struct metapage *mp) page_cache_release(page); } -extern void hold_metapage(struct metapage *mp) +void hold_metapage(struct metapage *mp) { lock_page(mp->page); } -extern void put_metapage(struct metapage *mp) +void put_metapage(struct metapage *mp) { if (mp->count || mp->nohomeok) { /* Someone else will release this */ diff --git a/fs/jfs/jfs_metapage.h b/fs/jfs/jfs_metapage.h index 991e9fb..f0b7d32 100644 --- a/fs/jfs/jfs_metapage.h +++ b/fs/jfs/jfs_metapage.h @@ -1,6 +1,6 @@ /* - * Copyright (c) International Business Machines Corp., 2000-2002 - * Portions Copyright (c) Christoph Hellwig, 2001-2002 + * Copyright (C) International Business Machines Corp., 2000-2002 + * Portions Copyright (C) Christoph Hellwig, 2001-2002 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -58,6 +58,8 @@ struct metapage { #define mark_metapage_dirty(mp) set_bit(META_dirty, &(mp)->flag) /* function prototypes */ +extern int metapage_init(void); +extern void metapage_exit(void); extern struct metapage *__get_metapage(struct inode *inode, unsigned long lblock, unsigned int size, int absolute, unsigned long new); diff --git a/fs/jfs/jfs_superblock.h b/fs/jfs/jfs_superblock.h index ab0566f..fcf781b 100644 --- a/fs/jfs/jfs_superblock.h +++ b/fs/jfs/jfs_superblock.h @@ -109,5 +109,16 @@ struct jfs_superblock { extern int readSuper(struct super_block *, struct buffer_head **); extern int updateSuper(struct super_block *, uint); extern void jfs_error(struct super_block *, const char *, ...); +extern int jfs_mount(struct super_block *); +extern int jfs_mount_rw(struct super_block *, int); +extern int jfs_umount(struct super_block *); +extern int jfs_umount_rw(struct super_block *); + +extern int jfs_stop_threads; +extern struct completion jfsIOwait; +extern wait_queue_head_t jfs_IO_thread_wait; +extern wait_queue_head_t jfs_commit_thread_wait; +extern wait_queue_head_t jfs_sync_thread_wait; +extern int jfs_extendfs(struct super_block *, s64, int); #endif /*_H_JFS_SUPERBLOCK */ diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c index e93d01a..8cbaaff 100644 --- a/fs/jfs/jfs_txnmgr.c +++ b/fs/jfs/jfs_txnmgr.c @@ -42,7 +42,6 @@ * hold on to mp+lock thru update of maps */ - #include <linux/fs.h> #include <linux/vmalloc.h> #include <linux/smp_lock.h> @@ -51,6 +50,7 @@ #include <linux/module.h> #include <linux/moduleparam.h> #include "jfs_incore.h" +#include "jfs_inode.h" #include "jfs_filsys.h" #include "jfs_metapage.h" #include "jfs_dinode.h" @@ -109,7 +109,6 @@ static int TxLockHWM; /* High water mark for number of txLocks used */ static int TxLockVHWM; /* Very High water mark */ struct tlock *TxLock; /* transaction lock table */ - /* * transaction management lock */ @@ -149,7 +148,6 @@ static inline void TXN_SLEEP_DROP_LOCK(wait_queue_head_t * event) #define TXN_WAKEUP(event) wake_up_all(event) - /* * statistics */ @@ -161,16 +159,6 @@ static struct { int waitlock; /* 4: # of tlock wait */ } stattx; - -/* - * external references - */ -extern int lmGroupCommit(struct jfs_log *, struct tblock *); -extern int jfs_commit_inode(struct inode *, int); -extern int jfs_stop_threads; - -extern struct completion jfsIOwait; - /* * forward references */ @@ -358,7 +346,6 @@ void txExit(void) TxBlock = NULL; } - /* * NAME: txBegin() * @@ -460,7 +447,6 @@ tid_t txBegin(struct super_block *sb, int flag) return t; } - /* * NAME: txBeginAnon() * @@ -503,7 +489,6 @@ void txBeginAnon(struct super_block *sb) TXN_UNLOCK(); } - /* * txEnd() * @@ -592,7 +577,6 @@ wakeup: TXN_WAKEUP(&TxAnchor.freewait); } - /* * txLock() * @@ -868,7 +852,6 @@ struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp, return NULL; } - /* * NAME: txRelease() * @@ -908,7 +891,6 @@ static void txRelease(struct tblock * tblk) TXN_UNLOCK(); } - /* * NAME: txUnlock() * @@ -996,7 +978,6 @@ static void txUnlock(struct tblock * tblk) } } - /* * txMaplock() * @@ -1069,7 +1050,6 @@ struct tlock *txMaplock(tid_t tid, struct inode *ip, int type) return tlck; } - /* * txLinelock() * @@ -1103,8 +1083,6 @@ struct linelock *txLinelock(struct linelock * tlock) return linelock; } - - /* * transaction commit management * ----------------------------- @@ -1373,7 +1351,6 @@ int txCommit(tid_t tid, /* transaction identifier */ return rc; } - /* * NAME: txLog() * @@ -1437,7 +1414,6 @@ static int txLog(struct jfs_log * log, struct tblock * tblk, struct commit * cd) return rc; } - /* * diLog() * @@ -1465,7 +1441,6 @@ static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, if (tlck->type & tlckENTRY) { /* log after-image for logredo(): */ lrd->type = cpu_to_le16(LOG_REDOPAGE); -// *pxd = mp->cm_pxd; PXDaddress(pxd, mp->index); PXDlength(pxd, mp->logical_size >> tblk->sb->s_blocksize_bits); @@ -1552,7 +1527,6 @@ static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, return rc; } - /* * dataLog() * @@ -1599,7 +1573,6 @@ static int dataLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, return 0; } - /* * dtLog() * @@ -1639,7 +1612,6 @@ static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, lrd->log.redopage.type |= cpu_to_le16(LOG_EXTEND); else lrd->log.redopage.type |= cpu_to_le16(LOG_NEW); -// *pxd = mp->cm_pxd; PXDaddress(pxd, mp->index); PXDlength(pxd, mp->logical_size >> tblk->sb->s_blocksize_bits); @@ -1704,7 +1676,6 @@ static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, return; } - /* * xtLog() * @@ -1760,7 +1731,6 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, * applying the after-image to the meta-data page. */ lrd->type = cpu_to_le16(LOG_REDOPAGE); -// *page_pxd = mp->cm_pxd; PXDaddress(page_pxd, mp->index); PXDlength(page_pxd, mp->logical_size >> tblk->sb->s_blocksize_bits); @@ -2093,7 +2063,6 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, return; } - /* * mapLog() * @@ -2180,7 +2149,6 @@ void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, } } - /* * txEA() * @@ -2233,7 +2201,6 @@ void txEA(tid_t tid, struct inode *ip, dxd_t * oldea, dxd_t * newea) } } - /* * txForce() * @@ -2300,7 +2267,6 @@ void txForce(struct tblock * tblk) } } - /* * txUpdateMap() * @@ -2437,7 +2403,6 @@ static void txUpdateMap(struct tblock * tblk) } } - /* * txAllocPMap() * @@ -2509,7 +2474,6 @@ static void txAllocPMap(struct inode *ip, struct maplock * maplock, } } - /* * txFreeMap() * @@ -2611,7 +2575,6 @@ void txFreeMap(struct inode *ip, } } - /* * txFreelock() * @@ -2652,7 +2615,6 @@ void txFreelock(struct inode *ip) TXN_UNLOCK(); } - /* * txAbort() * diff --git a/fs/jfs/jfs_txnmgr.h b/fs/jfs/jfs_txnmgr.h index b71b82c..59ad0f6 100644 --- a/fs/jfs/jfs_txnmgr.h +++ b/fs/jfs/jfs_txnmgr.h @@ -285,34 +285,26 @@ struct commit { /* * external declarations */ -extern struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage *mp, - int flag); - -extern struct tlock *txMaplock(tid_t tid, struct inode *ip, int flag); - -extern int txCommit(tid_t tid, int nip, struct inode **iplist, int flag); - -extern tid_t txBegin(struct super_block *sb, int flag); - -extern void txBeginAnon(struct super_block *sb); - -extern void txEnd(tid_t tid); - -extern void txAbort(tid_t tid, int dirty); - -extern struct linelock *txLinelock(struct linelock * tlock); - -extern void txFreeMap(struct inode *ip, struct maplock * maplock, - struct tblock * tblk, int maptype); - -extern void txEA(tid_t tid, struct inode *ip, dxd_t * oldea, dxd_t * newea); - -extern void txFreelock(struct inode *ip); - -extern int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, - struct tlock * tlck); - -extern void txQuiesce(struct super_block *sb); - -extern void txResume(struct super_block *sb); +extern int jfs_tlocks_low; + +extern int txInit(void); +extern void txExit(void); +extern struct tlock *txLock(tid_t, struct inode *, struct metapage *, int); +extern struct tlock *txMaplock(tid_t, struct inode *, int); +extern int txCommit(tid_t, int, struct inode **, int); +extern tid_t txBegin(struct super_block *, int); +extern void txBeginAnon(struct super_block *); +extern void txEnd(tid_t); +extern void txAbort(tid_t, int); +extern struct linelock *txLinelock(struct linelock *); +extern void txFreeMap(struct inode *, struct maplock *, struct tblock *, int); +extern void txEA(tid_t, struct inode *, dxd_t *, dxd_t *); +extern void txFreelock(struct inode *); +extern int lmLog(struct jfs_log *, struct tblock *, struct lrd *, + struct tlock *); +extern void txQuiesce(struct super_block *); +extern void txResume(struct super_block *); +extern void txLazyUnlock(struct tblock *); +extern int jfs_lazycommit(void *); +extern int jfs_sync(void *); #endif /* _H_JFS_TXNMGR */ diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index 8413a36..1cae14e 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c @@ -31,20 +31,9 @@ #include "jfs_acl.h" #include "jfs_debug.h" -extern struct inode_operations jfs_file_inode_operations; -extern struct inode_operations jfs_symlink_inode_operations; -extern struct file_operations jfs_file_operations; -extern struct address_space_operations jfs_aops; - -extern int jfs_fsync(struct file *, struct dentry *, int); -extern void jfs_truncate_nolock(struct inode *, loff_t); -extern int jfs_init_acl(struct inode *, struct inode *); - /* * forward references */ -struct inode_operations jfs_dir_inode_operations; -struct file_operations jfs_dir_operations; struct dentry_operations jfs_ci_dentry_operations; static s64 commitZeroLink(tid_t, struct inode *); @@ -655,7 +644,7 @@ static s64 commitZeroLink(tid_t tid, struct inode *ip) /* - * NAME: freeZeroLink() + * NAME: jfs_free_zero_link() * * FUNCTION: for non-directory, called by iClose(), * free resources of a file from cache and WORKING map @@ -663,15 +652,12 @@ static s64 commitZeroLink(tid_t tid, struct inode *ip) * while associated with a pager object, * * PARAMETER: ip - pointer to inode of file. - * - * RETURN: 0 -ok */ -int freeZeroLink(struct inode *ip) +void jfs_free_zero_link(struct inode *ip) { - int rc = 0; int type; - jfs_info("freeZeroLink: ip = 0x%p", ip); + jfs_info("jfs_free_zero_link: ip = 0x%p", ip); /* return if not reg or symbolic link or if size is * already ok. @@ -684,10 +670,10 @@ int freeZeroLink(struct inode *ip) case S_IFLNK: /* if its contained in inode nothing to do */ if (ip->i_size < IDATASIZE) - return 0; + return; break; default: - return 0; + return; } /* @@ -737,9 +723,7 @@ int freeZeroLink(struct inode *ip) * free xtree/data blocks from working block map; */ if (ip->i_size) - rc = xtTruncate(0, ip, 0, COMMIT_WMAP); - - return rc; + xtTruncate(0, ip, 0, COMMIT_WMAP); } /* diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 5e774ed..810a365 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -28,6 +28,7 @@ #include "jfs_incore.h" #include "jfs_filsys.h" +#include "jfs_inode.h" #include "jfs_metapage.h" #include "jfs_superblock.h" #include "jfs_dmap.h" @@ -62,37 +63,6 @@ module_param(jfsloglevel, int, 0644); MODULE_PARM_DESC(jfsloglevel, "Specify JFS loglevel (0, 1 or 2)"); #endif -/* - * External declarations - */ -extern int jfs_mount(struct super_block *); -extern int jfs_mount_rw(struct super_block *, int); -extern int jfs_umount(struct super_block *); -extern int jfs_umount_rw(struct super_block *); - -extern int jfsIOWait(void *); -extern int jfs_lazycommit(void *); -extern int jfs_sync(void *); - -extern void jfs_read_inode(struct inode *inode); -extern void jfs_dirty_inode(struct inode *inode); -extern void jfs_delete_inode(struct inode *inode); -extern int jfs_write_inode(struct inode *inode, int wait); - -extern struct dentry *jfs_get_parent(struct dentry *dentry); -extern int jfs_extendfs(struct super_block *, s64, int); - -extern struct dentry_operations jfs_ci_dentry_operations; - -#ifdef PROC_FS_JFS /* see jfs_debug.h */ -extern void jfs_proc_init(void); -extern void jfs_proc_clean(void); -#endif - -extern wait_queue_head_t jfs_IO_thread_wait; -extern wait_queue_head_t jfs_commit_thread_wait; -extern wait_queue_head_t jfs_sync_thread_wait; - static void jfs_handle_error(struct super_block *sb) { struct jfs_sb_info *sbi = JFS_SBI(sb); @@ -593,11 +563,6 @@ static struct file_system_type jfs_fs_type = { .fs_flags = FS_REQUIRES_DEV, }; -extern int metapage_init(void); -extern int txInit(void); -extern void txExit(void); -extern void metapage_exit(void); - static void init_once(void *foo, kmem_cache_t * cachep, unsigned long flags) { struct jfs_inode_info *jfs_ip = (struct jfs_inode_info *) foo; diff --git a/fs/jfs/symlink.c b/fs/jfs/symlink.c index ef4c07e..287d8d6 100644 --- a/fs/jfs/symlink.c +++ b/fs/jfs/symlink.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Christoph Hellwig, 2001-2002 + * Copyright (C) Christoph Hellwig, 2001-2002 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -19,6 +19,7 @@ #include <linux/fs.h> #include <linux/namei.h> #include "jfs_incore.h" +#include "jfs_inode.h" #include "jfs_xattr.h" static int jfs_follow_link(struct dentry *dentry, struct nameidata *nd) diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c index 7a9ffd5..6016373 100644 --- a/fs/jfs/xattr.c +++ b/fs/jfs/xattr.c @@ -946,8 +946,7 @@ int __jfs_setxattr(struct inode *inode, const char *name, const void *value, out: up_write(&JFS_IP(inode)->xattr_sem); - if (os2name) - kfree(os2name); + kfree(os2name); return rc; } @@ -1042,8 +1041,7 @@ ssize_t __jfs_getxattr(struct inode *inode, const char *name, void *data, out: up_read(&JFS_IP(inode)->xattr_sem); - if (os2name) - kfree(os2name); + kfree(os2name); return size; } @@ -519,6 +519,102 @@ int simple_transaction_release(struct inode *inode, struct file *file) return 0; } +/* Simple attribute files */ + +struct simple_attr { + u64 (*get)(void *); + void (*set)(void *, u64); + char get_buf[24]; /* enough to store a u64 and "\n\0" */ + char set_buf[24]; + void *data; + const char *fmt; /* format for read operation */ + struct semaphore sem; /* protects access to these buffers */ +}; + +/* simple_attr_open is called by an actual attribute open file operation + * to set the attribute specific access operations. */ +int simple_attr_open(struct inode *inode, struct file *file, + u64 (*get)(void *), void (*set)(void *, u64), + const char *fmt) +{ + struct simple_attr *attr; + + attr = kmalloc(sizeof(*attr), GFP_KERNEL); + if (!attr) + return -ENOMEM; + + attr->get = get; + attr->set = set; + attr->data = inode->u.generic_ip; + attr->fmt = fmt; + init_MUTEX(&attr->sem); + + file->private_data = attr; + + return nonseekable_open(inode, file); +} + +int simple_attr_close(struct inode *inode, struct file *file) +{ + kfree(file->private_data); + return 0; +} + +/* read from the buffer that is filled with the get function */ +ssize_t simple_attr_read(struct file *file, char __user *buf, + size_t len, loff_t *ppos) +{ + struct simple_attr *attr; + size_t size; + ssize_t ret; + + attr = file->private_data; + + if (!attr->get) + return -EACCES; + + down(&attr->sem); + if (*ppos) /* continued read */ + size = strlen(attr->get_buf); + else /* first read */ + size = scnprintf(attr->get_buf, sizeof(attr->get_buf), + attr->fmt, + (unsigned long long)attr->get(attr->data)); + + ret = simple_read_from_buffer(buf, len, ppos, attr->get_buf, size); + up(&attr->sem); + return ret; +} + +/* interpret the buffer as a number to call the set function with */ +ssize_t simple_attr_write(struct file *file, const char __user *buf, + size_t len, loff_t *ppos) +{ + struct simple_attr *attr; + u64 val; + size_t size; + ssize_t ret; + + attr = file->private_data; + + if (!attr->set) + return -EACCES; + + down(&attr->sem); + ret = -EFAULT; + size = min(sizeof(attr->set_buf) - 1, len); + if (copy_from_user(attr->set_buf, buf, size)) + goto out; + + ret = len; /* claim we got the whole input */ + attr->set_buf[size] = '\0'; + val = simple_strtol(attr->set_buf, NULL, 0); + attr->set(attr->data, val); +out: + up(&attr->sem); + return ret; +} + EXPORT_SYMBOL(dcache_dir_close); EXPORT_SYMBOL(dcache_dir_lseek); EXPORT_SYMBOL(dcache_dir_open); @@ -547,3 +643,7 @@ EXPORT_SYMBOL(simple_read_from_buffer); EXPORT_SYMBOL(simple_transaction_get); EXPORT_SYMBOL(simple_transaction_read); EXPORT_SYMBOL(simple_transaction_release); +EXPORT_SYMBOL_GPL(simple_attr_open); +EXPORT_SYMBOL_GPL(simple_attr_close); +EXPORT_SYMBOL_GPL(simple_attr_read); +EXPORT_SYMBOL_GPL(simple_attr_write); diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c index ef7103b..006bb9e 100644 --- a/fs/lockd/clntlock.c +++ b/fs/lockd/clntlock.c @@ -31,7 +31,7 @@ static int reclaimer(void *ptr); * This is the representation of a blocked client lock. */ struct nlm_wait { - struct nlm_wait * b_next; /* linked list */ + struct list_head b_list; /* linked list */ wait_queue_head_t b_wait; /* where to wait on */ struct nlm_host * b_host; struct file_lock * b_lock; /* local file lock */ @@ -39,27 +39,54 @@ struct nlm_wait { u32 b_status; /* grant callback status */ }; -static struct nlm_wait * nlm_blocked; +static LIST_HEAD(nlm_blocked); /* - * Block on a lock + * Queue up a lock for blocking so that the GRANTED request can see it */ -int -nlmclnt_block(struct nlm_host *host, struct file_lock *fl, u32 *statp) +int nlmclnt_prepare_block(struct nlm_rqst *req, struct nlm_host *host, struct file_lock *fl) +{ + struct nlm_wait *block; + + BUG_ON(req->a_block != NULL); + block = kmalloc(sizeof(*block), GFP_KERNEL); + if (block == NULL) + return -ENOMEM; + block->b_host = host; + block->b_lock = fl; + init_waitqueue_head(&block->b_wait); + block->b_status = NLM_LCK_BLOCKED; + + list_add(&block->b_list, &nlm_blocked); + req->a_block = block; + + return 0; +} + +void nlmclnt_finish_block(struct nlm_rqst *req) { - struct nlm_wait block, **head; - int err; - u32 pstate; + struct nlm_wait *block = req->a_block; - block.b_host = host; - block.b_lock = fl; - init_waitqueue_head(&block.b_wait); - block.b_status = NLM_LCK_BLOCKED; - block.b_next = nlm_blocked; - nlm_blocked = █ + if (block == NULL) + return; + req->a_block = NULL; + list_del(&block->b_list); + kfree(block); +} + +/* + * Block on a lock + */ +long nlmclnt_block(struct nlm_rqst *req, long timeout) +{ + struct nlm_wait *block = req->a_block; + long ret; - /* Remember pseudo nsm state */ - pstate = host->h_state; + /* A borken server might ask us to block even if we didn't + * request it. Just say no! + */ + if (!req->a_args.block) + return -EAGAIN; /* Go to sleep waiting for GRANT callback. Some servers seem * to lose callbacks, however, so we're going to poll from @@ -69,28 +96,16 @@ nlmclnt_block(struct nlm_host *host, struct file_lock *fl, u32 *statp) * a 1 minute timeout would do. See the comment before * nlmclnt_lock for an explanation. */ - sleep_on_timeout(&block.b_wait, 30*HZ); - - for (head = &nlm_blocked; *head; head = &(*head)->b_next) { - if (*head == &block) { - *head = block.b_next; - break; - } - } + ret = wait_event_interruptible_timeout(block->b_wait, + block->b_status != NLM_LCK_BLOCKED, + timeout); - if (!signalled()) { - *statp = block.b_status; - return 0; + if (block->b_status != NLM_LCK_BLOCKED) { + req->a_res.status = block->b_status; + block->b_status = NLM_LCK_BLOCKED; } - /* Okay, we were interrupted. Cancel the pending request - * unless the server has rebooted. - */ - if (pstate == host->h_state && (err = nlmclnt_cancel(host, fl)) < 0) - printk(KERN_NOTICE - "lockd: CANCEL call failed (errno %d)\n", -err); - - return -ERESTARTSYS; + return ret; } /* @@ -100,27 +115,23 @@ u32 nlmclnt_grant(struct nlm_lock *lock) { struct nlm_wait *block; + u32 res = nlm_lck_denied; /* * Look up blocked request based on arguments. * Warning: must not use cookie to match it! */ - for (block = nlm_blocked; block; block = block->b_next) { - if (nlm_compare_locks(block->b_lock, &lock->fl)) - break; + list_for_each_entry(block, &nlm_blocked, b_list) { + if (nlm_compare_locks(block->b_lock, &lock->fl)) { + /* Alright, we found a lock. Set the return status + * and wake up the caller + */ + block->b_status = NLM_LCK_GRANTED; + wake_up(&block->b_wait); + res = nlm_granted; + } } - - /* Ooops, no blocked request found. */ - if (block == NULL) - return nlm_lck_denied; - - /* Alright, we found the lock. Set the return status and - * wake up the caller. - */ - block->b_status = NLM_LCK_GRANTED; - wake_up(&block->b_wait); - - return nlm_granted; + return res; } /* @@ -230,7 +241,7 @@ restart: host->h_reclaiming = 0; /* Now, wake up all processes that sleep on a blocked lock */ - for (block = nlm_blocked; block; block = block->b_next) { + list_for_each_entry(block, &nlm_blocked, b_list) { if (block->b_host == host) { block->b_status = NLM_LCK_DENIED_GRACE_PERIOD; wake_up(&block->b_wait); diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index a440761..fd77ed1 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c @@ -21,6 +21,7 @@ #define NLMDBG_FACILITY NLMDBG_CLIENT #define NLMCLNT_GRACE_WAIT (5*HZ) +#define NLMCLNT_POLL_TIMEOUT (30*HZ) static int nlmclnt_test(struct nlm_rqst *, struct file_lock *); static int nlmclnt_lock(struct nlm_rqst *, struct file_lock *); @@ -553,7 +554,8 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl) { struct nlm_host *host = req->a_host; struct nlm_res *resp = &req->a_res; - int status; + long timeout; + int status; if (!host->h_monitored && nsm_monitor(host) < 0) { printk(KERN_NOTICE "lockd: failed to monitor %s\n", @@ -562,15 +564,32 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl) goto out; } - do { - if ((status = nlmclnt_call(req, NLMPROC_LOCK)) >= 0) { - if (resp->status != NLM_LCK_BLOCKED) - break; - status = nlmclnt_block(host, fl, &resp->status); - } + if (req->a_args.block) { + status = nlmclnt_prepare_block(req, host, fl); if (status < 0) goto out; - } while (resp->status == NLM_LCK_BLOCKED && req->a_args.block); + } + for(;;) { + status = nlmclnt_call(req, NLMPROC_LOCK); + if (status < 0) + goto out_unblock; + if (resp->status != NLM_LCK_BLOCKED) + break; + /* Wait on an NLM blocking lock */ + timeout = nlmclnt_block(req, NLMCLNT_POLL_TIMEOUT); + /* Did a reclaimer thread notify us of a server reboot? */ + if (resp->status == NLM_LCK_DENIED_GRACE_PERIOD) + continue; + if (resp->status != NLM_LCK_BLOCKED) + break; + if (timeout >= 0) + continue; + /* We were interrupted. Send a CANCEL request to the server + * and exit + */ + status = (int)timeout; + goto out_unblock; + } if (resp->status == NLM_LCK_GRANTED) { fl->fl_u.nfs_fl.state = host->h_state; @@ -579,6 +598,11 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl) do_vfs_lock(fl); } status = nlm_stat_to_errno(resp->status); +out_unblock: + nlmclnt_finish_block(req); + /* Cancel the blocked request if it is still pending */ + if (resp->status == NLM_LCK_BLOCKED) + nlmclnt_cancel(host, fl); out: nlmclnt_release_lockargs(req); return status; diff --git a/fs/lockd/host.c b/fs/lockd/host.c index 52707c5..82c77df 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -189,17 +189,15 @@ nlm_bind_host(struct nlm_host *host) goto forgetit; xprt_set_timeout(&xprt->timeout, 5, nlmsvc_timeout); + xprt->nocong = 1; /* No congestion control for NLM */ + xprt->resvport = 1; /* NLM requires a reserved port */ /* Existing NLM servers accept AUTH_UNIX only */ clnt = rpc_create_client(xprt, host->h_name, &nlm_program, host->h_version, RPC_AUTH_UNIX); - if (IS_ERR(clnt)) { - xprt_destroy(xprt); + if (IS_ERR(clnt)) goto forgetit; - } clnt->cl_autobind = 1; /* turn on pmap queries */ - xprt->nocong = 1; /* No congestion control for NLM */ - xprt->resvport = 1; /* NLM requires a reserved port */ host->h_rpcclnt = clnt; } diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c index 6fc1beb..2d144ab 100644 --- a/fs/lockd/mon.c +++ b/fs/lockd/mon.c @@ -115,20 +115,19 @@ nsm_create(void) xprt = xprt_create_proto(IPPROTO_UDP, &sin, NULL); if (IS_ERR(xprt)) return (struct rpc_clnt *)xprt; + xprt->resvport = 1; /* NSM requires a reserved port */ clnt = rpc_create_client(xprt, "localhost", &nsm_program, SM_VERSION, RPC_AUTH_NULL); if (IS_ERR(clnt)) - goto out_destroy; + goto out_err; clnt->cl_softrtry = 1; clnt->cl_chatty = 1; clnt->cl_oneshot = 1; - xprt->resvport = 1; /* NSM requires a reserved port */ return clnt; -out_destroy: - xprt_destroy(xprt); +out_err: return clnt; } @@ -1548,6 +1548,8 @@ int fcntl_getlk(struct file *filp, struct flock __user *l) if (filp->f_op && filp->f_op->lock) { error = filp->f_op->lock(filp, F_GETLK, &file_lock); + if (file_lock.fl_ops && file_lock.fl_ops->fl_release_private) + file_lock.fl_ops->fl_release_private(&file_lock); if (error < 0) goto out; else @@ -1690,6 +1692,8 @@ int fcntl_getlk64(struct file *filp, struct flock64 __user *l) if (filp->f_op && filp->f_op->lock) { error = filp->f_op->lock(filp, F_GETLK, &file_lock); + if (file_lock.fl_ops && file_lock.fl_ops->fl_release_private) + file_lock.fl_ops->fl_release_private(&file_lock); if (error < 0) goto out; else @@ -1873,6 +1877,8 @@ void locks_remove_flock(struct file *filp) .fl_end = OFFSET_MAX, }; filp->f_op->flock(filp, F_SETLKW, &fl); + if (fl.fl_ops && fl.fl_ops->fl_release_private) + fl.fl_ops->fl_release_private(&fl); } lock_kernel(); diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index b4baa03..8b3bb71 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile @@ -8,6 +8,7 @@ nfs-y := dir.o file.o inode.o nfs2xdr.o pagelist.o \ proc.o read.o symlink.o unlink.o write.o nfs-$(CONFIG_ROOT_NFS) += nfsroot.o mount_clnt.o nfs-$(CONFIG_NFS_V3) += nfs3proc.o nfs3xdr.o +nfs-$(CONFIG_NFS_V3_ACL) += nfs3acl.o nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \ delegation.o idmap.o \ callback.o callback_xdr.o callback_proc.o diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 560d617..f2ca782 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -14,6 +14,7 @@ #include <linux/sunrpc/svc.h> #include <linux/sunrpc/svcsock.h> #include <linux/nfs_fs.h> +#include "nfs4_fs.h" #include "callback.h" #define NFSDBG_FACILITY NFSDBG_CALLBACK diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index ece27e4..65f1e19 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -8,6 +8,7 @@ #include <linux/config.h> #include <linux/nfs4.h> #include <linux/nfs_fs.h> +#include "nfs4_fs.h" #include "callback.h" #include "delegation.h" diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index d271df9..7c33b9a 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -10,6 +10,7 @@ #include <linux/sunrpc/svc.h> #include <linux/nfs4.h> #include <linux/nfs_fs.h> +#include "nfs4_fs.h" #include "callback.h" #define CB_OP_TAGLEN_MAXSZ (512) @@ -410,7 +411,6 @@ static int nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *resp xdr_init_decode(&xdr_in, &rqstp->rq_arg, rqstp->rq_arg.head[0].iov_base); p = (uint32_t*)((char *)rqstp->rq_res.head[0].iov_base + rqstp->rq_res.head[0].iov_len); - rqstp->rq_res.head[0].iov_len = PAGE_SIZE; xdr_init_encode(&xdr_out, &rqstp->rq_res, p); decode_compound_hdr_arg(&xdr_in, &hdr_arg); diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 5b9c60f..d7f7eb6 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -16,6 +16,7 @@ #include <linux/nfs_fs.h> #include <linux/nfs_xdr.h> +#include "nfs4_fs.h" #include "delegation.h" static struct nfs_delegation *nfs_alloc_delegation(void) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index ff6155f..b38a57e 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -32,6 +32,7 @@ #include <linux/smp_lock.h> #include <linux/namei.h> +#include "nfs4_fs.h" #include "delegation.h" #define NFS_PARANOIA 1 @@ -50,8 +51,10 @@ static int nfs_mknod(struct inode *, struct dentry *, int, dev_t); static int nfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); static int nfs_fsync_dir(struct file *, struct dentry *, int); +static loff_t nfs_llseek_dir(struct file *, loff_t, int); struct file_operations nfs_dir_operations = { + .llseek = nfs_llseek_dir, .read = generic_read_dir, .readdir = nfs_readdir, .open = nfs_opendir, @@ -74,6 +77,27 @@ struct inode_operations nfs_dir_inode_operations = { .setattr = nfs_setattr, }; +#ifdef CONFIG_NFS_V3 +struct inode_operations nfs3_dir_inode_operations = { + .create = nfs_create, + .lookup = nfs_lookup, + .link = nfs_link, + .unlink = nfs_unlink, + .symlink = nfs_symlink, + .mkdir = nfs_mkdir, + .rmdir = nfs_rmdir, + .mknod = nfs_mknod, + .rename = nfs_rename, + .permission = nfs_permission, + .getattr = nfs_getattr, + .setattr = nfs_setattr, + .listxattr = nfs3_listxattr, + .getxattr = nfs3_getxattr, + .setxattr = nfs3_setxattr, + .removexattr = nfs3_removexattr, +}; +#endif /* CONFIG_NFS_V3 */ + #ifdef CONFIG_NFS_V4 static struct dentry *nfs_atomic_lookup(struct inode *, struct dentry *, struct nameidata *); @@ -90,6 +114,9 @@ struct inode_operations nfs4_dir_inode_operations = { .permission = nfs_permission, .getattr = nfs_getattr, .setattr = nfs_setattr, + .getxattr = nfs4_getxattr, + .setxattr = nfs4_setxattr, + .listxattr = nfs4_listxattr, }; #endif /* CONFIG_NFS_V4 */ @@ -116,7 +143,8 @@ typedef struct { struct page *page; unsigned long page_index; u32 *ptr; - u64 target; + u64 *dir_cookie; + loff_t current_index; struct nfs_entry *entry; decode_dirent_t decode; int plus; @@ -164,12 +192,10 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page) NFS_FLAGS(inode) |= NFS_INO_INVALID_ATIME; /* Ensure consistent page alignment of the data. * Note: assumes we have exclusive access to this mapping either - * throught inode->i_sem or some other mechanism. + * through inode->i_sem or some other mechanism. */ - if (page->index == 0) { - invalidate_inode_pages(inode->i_mapping); - NFS_I(inode)->readdir_timestamp = timestamp; - } + if (page->index == 0) + invalidate_inode_pages2_range(inode->i_mapping, PAGE_CACHE_SIZE, -1); unlock_page(page); return 0; error: @@ -202,22 +228,22 @@ void dir_page_release(nfs_readdir_descriptor_t *desc) /* * Given a pointer to a buffer that has already been filled by a call - * to readdir, find the next entry. + * to readdir, find the next entry with cookie '*desc->dir_cookie'. * * If the end of the buffer has been reached, return -EAGAIN, if not, * return the offset within the buffer of the next entry to be * read. */ static inline -int find_dirent(nfs_readdir_descriptor_t *desc, struct page *page) +int find_dirent(nfs_readdir_descriptor_t *desc) { struct nfs_entry *entry = desc->entry; int loop_count = 0, status; while((status = dir_decode(desc)) == 0) { - dfprintk(VFS, "NFS: found cookie %Lu\n", (long long)entry->cookie); - if (entry->prev_cookie == desc->target) + dfprintk(VFS, "NFS: found cookie %Lu\n", (unsigned long long)entry->cookie); + if (entry->prev_cookie == *desc->dir_cookie) break; if (loop_count++ > 200) { loop_count = 0; @@ -229,8 +255,44 @@ int find_dirent(nfs_readdir_descriptor_t *desc, struct page *page) } /* - * Find the given page, and call find_dirent() in order to try to - * return the next entry. + * Given a pointer to a buffer that has already been filled by a call + * to readdir, find the entry at offset 'desc->file->f_pos'. + * + * If the end of the buffer has been reached, return -EAGAIN, if not, + * return the offset within the buffer of the next entry to be + * read. + */ +static inline +int find_dirent_index(nfs_readdir_descriptor_t *desc) +{ + struct nfs_entry *entry = desc->entry; + int loop_count = 0, + status; + + for(;;) { + status = dir_decode(desc); + if (status) + break; + + dfprintk(VFS, "NFS: found cookie %Lu at index %Ld\n", (unsigned long long)entry->cookie, desc->current_index); + + if (desc->file->f_pos == desc->current_index) { + *desc->dir_cookie = entry->cookie; + break; + } + desc->current_index++; + if (loop_count++ > 200) { + loop_count = 0; + schedule(); + } + } + dfprintk(VFS, "NFS: find_dirent_index() returns %d\n", status); + return status; +} + +/* + * Find the given page, and call find_dirent() or find_dirent_index in + * order to try to return the next entry. */ static inline int find_dirent_page(nfs_readdir_descriptor_t *desc) @@ -253,7 +315,10 @@ int find_dirent_page(nfs_readdir_descriptor_t *desc) /* NOTE: Someone else may have changed the READDIRPLUS flag */ desc->page = page; desc->ptr = kmap(page); /* matching kunmap in nfs_do_filldir */ - status = find_dirent(desc, page); + if (*desc->dir_cookie != 0) + status = find_dirent(desc); + else + status = find_dirent_index(desc); if (status < 0) dir_page_release(desc); out: @@ -268,7 +333,8 @@ int find_dirent_page(nfs_readdir_descriptor_t *desc) * Recurse through the page cache pages, and return a * filled nfs_entry structure of the next directory entry if possible. * - * The target for the search is 'desc->target'. + * The target for the search is '*desc->dir_cookie' if non-0, + * 'desc->file->f_pos' otherwise */ static inline int readdir_search_pagecache(nfs_readdir_descriptor_t *desc) @@ -276,7 +342,16 @@ int readdir_search_pagecache(nfs_readdir_descriptor_t *desc) int loop_count = 0; int res; - dfprintk(VFS, "NFS: readdir_search_pagecache() searching for cookie %Lu\n", (long long)desc->target); + /* Always search-by-index from the beginning of the cache */ + if (*desc->dir_cookie == 0) { + dfprintk(VFS, "NFS: readdir_search_pagecache() searching for offset %Ld\n", (long long)desc->file->f_pos); + desc->page_index = 0; + desc->entry->cookie = desc->entry->prev_cookie = 0; + desc->entry->eof = 0; + desc->current_index = 0; + } else + dfprintk(VFS, "NFS: readdir_search_pagecache() searching for cookie %Lu\n", (unsigned long long)*desc->dir_cookie); + for (;;) { res = find_dirent_page(desc); if (res != -EAGAIN) @@ -313,7 +388,7 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent, int loop_count = 0, res; - dfprintk(VFS, "NFS: nfs_do_filldir() filling starting @ cookie %Lu\n", (long long)desc->target); + dfprintk(VFS, "NFS: nfs_do_filldir() filling starting @ cookie %Lu\n", (long long)entry->cookie); for(;;) { unsigned d_type = DT_UNKNOWN; @@ -333,10 +408,11 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent, } res = filldir(dirent, entry->name, entry->len, - entry->prev_cookie, fileid, d_type); + file->f_pos, fileid, d_type); if (res < 0) break; - file->f_pos = desc->target = entry->cookie; + file->f_pos++; + *desc->dir_cookie = entry->cookie; if (dir_decode(desc) != 0) { desc->page_index ++; break; @@ -349,7 +425,7 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent, dir_page_release(desc); if (dentry != NULL) dput(dentry); - dfprintk(VFS, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n", (long long)desc->target, res); + dfprintk(VFS, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n", (unsigned long long)*desc->dir_cookie, res); return res; } @@ -375,14 +451,14 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, struct page *page = NULL; int status; - dfprintk(VFS, "NFS: uncached_readdir() searching for cookie %Lu\n", (long long)desc->target); + dfprintk(VFS, "NFS: uncached_readdir() searching for cookie %Lu\n", (unsigned long long)*desc->dir_cookie); page = alloc_page(GFP_HIGHUSER); if (!page) { status = -ENOMEM; goto out; } - desc->error = NFS_PROTO(inode)->readdir(file->f_dentry, cred, desc->target, + desc->error = NFS_PROTO(inode)->readdir(file->f_dentry, cred, *desc->dir_cookie, page, NFS_SERVER(inode)->dtsize, desc->plus); @@ -391,7 +467,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, desc->ptr = kmap(page); /* matching kunmap in nfs_do_filldir */ if (desc->error >= 0) { if ((status = dir_decode(desc)) == 0) - desc->entry->prev_cookie = desc->target; + desc->entry->prev_cookie = *desc->dir_cookie; } else status = -EIO; if (status < 0) @@ -412,8 +488,9 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, goto out; } -/* The file offset position is now represented as a true offset into the - * page cache as is the case in most of the other filesystems. +/* The file offset position represents the dirent entry number. A + last cookie cache takes care of the common case of reading the + whole directory. */ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) { @@ -435,15 +512,15 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) } /* - * filp->f_pos points to the file offset in the page cache. - * but if the cache has meanwhile been zapped, we need to - * read from the last dirent to revalidate f_pos - * itself. + * filp->f_pos points to the dirent entry number. + * *desc->dir_cookie has the cookie for the next entry. We have + * to either find the entry with the appropriate number or + * revalidate the cookie. */ memset(desc, 0, sizeof(*desc)); desc->file = filp; - desc->target = filp->f_pos; + desc->dir_cookie = &((struct nfs_open_context *)filp->private_data)->dir_cookie; desc->decode = NFS_PROTO(inode)->decode_dirent; desc->plus = NFS_USE_READDIRPLUS(inode); @@ -455,9 +532,10 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) while(!desc->entry->eof) { res = readdir_search_pagecache(desc); + if (res == -EBADCOOKIE) { /* This means either end of directory */ - if (desc->entry->cookie != desc->target) { + if (*desc->dir_cookie && desc->entry->cookie != *desc->dir_cookie) { /* Or that the server has 'lost' a cookie */ res = uncached_readdir(desc, dirent, filldir); if (res >= 0) @@ -490,6 +568,28 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) return 0; } +loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin) +{ + down(&filp->f_dentry->d_inode->i_sem); + switch (origin) { + case 1: + offset += filp->f_pos; + case 0: + if (offset >= 0) + break; + default: + offset = -EINVAL; + goto out; + } + if (offset != filp->f_pos) { + filp->f_pos = offset; + ((struct nfs_open_context *)filp->private_data)->dir_cookie = 0; + } +out: + up(&filp->f_dentry->d_inode->i_sem); + return offset; +} + /* * All directory operations under NFS are synchronous, so fsync() * is a dummy operation. diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 68df803..d6a30c8 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -517,7 +517,7 @@ retry: result = tot_bytes; out: - nfs_end_data_update_defer(inode); + nfs_end_data_update(inode); nfs_writedata_free(wdata); return result; diff --git a/fs/nfs/file.c b/fs/nfs/file.c index f06eee6..5621ba9 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -37,6 +37,7 @@ static int nfs_file_open(struct inode *, struct file *); static int nfs_file_release(struct inode *, struct file *); +static loff_t nfs_file_llseek(struct file *file, loff_t offset, int origin); static int nfs_file_mmap(struct file *, struct vm_area_struct *); static ssize_t nfs_file_sendfile(struct file *, loff_t *, size_t, read_actor_t, void *); static ssize_t nfs_file_read(struct kiocb *, char __user *, size_t, loff_t); @@ -48,7 +49,7 @@ static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl); static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl); struct file_operations nfs_file_operations = { - .llseek = remote_llseek, + .llseek = nfs_file_llseek, .read = do_sync_read, .write = do_sync_write, .aio_read = nfs_file_read, @@ -70,6 +71,18 @@ struct inode_operations nfs_file_inode_operations = { .setattr = nfs_setattr, }; +#ifdef CONFIG_NFS_V3 +struct inode_operations nfs3_file_inode_operations = { + .permission = nfs_permission, + .getattr = nfs_getattr, + .setattr = nfs_setattr, + .listxattr = nfs3_listxattr, + .getxattr = nfs3_getxattr, + .setxattr = nfs3_setxattr, + .removexattr = nfs3_removexattr, +}; +#endif /* CONFIG_NFS_v3 */ + /* Hack for future NFS swap support */ #ifndef IS_SWAPFILE # define IS_SWAPFILE(inode) (0) @@ -114,6 +127,61 @@ nfs_file_release(struct inode *inode, struct file *filp) return NFS_PROTO(inode)->file_release(inode, filp); } +/** + * nfs_revalidate_file - Revalidate the page cache & related metadata + * @inode - pointer to inode struct + * @file - pointer to file + */ +static int nfs_revalidate_file(struct inode *inode, struct file *filp) +{ + int retval = 0; + + if ((NFS_FLAGS(inode) & NFS_INO_REVAL_PAGECACHE) || nfs_attribute_timeout(inode)) + retval = __nfs_revalidate_inode(NFS_SERVER(inode), inode); + nfs_revalidate_mapping(inode, filp->f_mapping); + return 0; +} + +/** + * nfs_revalidate_size - Revalidate the file size + * @inode - pointer to inode struct + * @file - pointer to struct file + * + * Revalidates the file length. This is basically a wrapper around + * nfs_revalidate_inode() that takes into account the fact that we may + * have cached writes (in which case we don't care about the server's + * idea of what the file length is), or O_DIRECT (in which case we + * shouldn't trust the cache). + */ +static int nfs_revalidate_file_size(struct inode *inode, struct file *filp) +{ + struct nfs_server *server = NFS_SERVER(inode); + struct nfs_inode *nfsi = NFS_I(inode); + + if (server->flags & NFS_MOUNT_NOAC) + goto force_reval; + if (filp->f_flags & O_DIRECT) + goto force_reval; + if (nfsi->npages != 0) + return 0; + if (!(NFS_FLAGS(inode) & NFS_INO_REVAL_PAGECACHE) && !nfs_attribute_timeout(inode)) + return 0; +force_reval: + return __nfs_revalidate_inode(server, inode); +} + +static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin) +{ + /* origin == SEEK_END => we must revalidate the cached file length */ + if (origin == 2) { + struct inode *inode = filp->f_mapping->host; + int retval = nfs_revalidate_file_size(inode, filp); + if (retval < 0) + return (loff_t)retval; + } + return remote_llseek(filp, offset, origin); +} + /* * Flush all dirty pages, and check for write errors. * @@ -158,7 +226,7 @@ nfs_file_read(struct kiocb *iocb, char __user * buf, size_t count, loff_t pos) dentry->d_parent->d_name.name, dentry->d_name.name, (unsigned long) count, (unsigned long) pos); - result = nfs_revalidate_inode(NFS_SERVER(inode), inode); + result = nfs_revalidate_file(inode, iocb->ki_filp); if (!result) result = generic_file_aio_read(iocb, buf, count, pos); return result; @@ -176,7 +244,7 @@ nfs_file_sendfile(struct file *filp, loff_t *ppos, size_t count, dentry->d_parent->d_name.name, dentry->d_name.name, (unsigned long) count, (unsigned long long) *ppos); - res = nfs_revalidate_inode(NFS_SERVER(inode), inode); + res = nfs_revalidate_file(inode, filp); if (!res) res = generic_file_sendfile(filp, ppos, count, actor, target); return res; @@ -192,7 +260,7 @@ nfs_file_mmap(struct file * file, struct vm_area_struct * vma) dfprintk(VFS, "nfs: mmap(%s/%s)\n", dentry->d_parent->d_name.name, dentry->d_name.name); - status = nfs_revalidate_inode(NFS_SERVER(inode), inode); + status = nfs_revalidate_file(inode, file); if (!status) status = generic_file_mmap(file, vma); return status; @@ -281,9 +349,15 @@ nfs_file_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t result = -EBUSY; if (IS_SWAPFILE(inode)) goto out_swapfile; - result = nfs_revalidate_inode(NFS_SERVER(inode), inode); - if (result) - goto out; + /* + * O_APPEND implies that we must revalidate the file length. + */ + if (iocb->ki_filp->f_flags & O_APPEND) { + result = nfs_revalidate_file_size(inode, iocb->ki_filp); + if (result) + goto out; + } + nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping); result = count; if (!count) diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index 87f4f9a..ffb8df9 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -50,6 +50,7 @@ #include <linux/nfs_fs.h> #include <linux/nfs_idmap.h> +#include "nfs4_fs.h" #define IDMAP_HASH_SZ 128 diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index f2317f3..4845911 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -39,6 +39,7 @@ #include <asm/system.h> #include <asm/uaccess.h> +#include "nfs4_fs.h" #include "delegation.h" #define NFSDBG_FACILITY NFSDBG_VFS @@ -63,6 +64,7 @@ static void nfs_clear_inode(struct inode *); static void nfs_umount_begin(struct super_block *); static int nfs_statfs(struct super_block *, struct kstatfs *); static int nfs_show_options(struct seq_file *, struct vfsmount *); +static void nfs_zap_acl_cache(struct inode *); static struct rpc_program nfs_program; @@ -106,6 +108,21 @@ static struct rpc_program nfs_program = { .pipe_dir_name = "/nfs", }; +#ifdef CONFIG_NFS_V3_ACL +static struct rpc_stat nfsacl_rpcstat = { &nfsacl_program }; +static struct rpc_version * nfsacl_version[] = { + [3] = &nfsacl_version3, +}; + +struct rpc_program nfsacl_program = { + .name = "nfsacl", + .number = NFS_ACL_PROGRAM, + .nrvers = sizeof(nfsacl_version) / sizeof(nfsacl_version[0]), + .version = nfsacl_version, + .stats = &nfsacl_rpcstat, +}; +#endif /* CONFIG_NFS_V3_ACL */ + static inline unsigned long nfs_fattr_to_ino_t(struct nfs_fattr *fattr) { @@ -118,7 +135,7 @@ nfs_write_inode(struct inode *inode, int sync) int flags = sync ? FLUSH_WAIT : 0; int ret; - ret = nfs_commit_inode(inode, 0, 0, flags); + ret = nfs_commit_inode(inode, flags); if (ret < 0) return ret; return 0; @@ -140,10 +157,6 @@ nfs_delete_inode(struct inode * inode) clear_inode(inode); } -/* - * For the moment, the only task for the NFS clear_inode method is to - * release the mmap credential - */ static void nfs_clear_inode(struct inode *inode) { @@ -152,6 +165,7 @@ nfs_clear_inode(struct inode *inode) nfs_wb_all(inode); BUG_ON (!list_empty(&nfsi->open_files)); + nfs_zap_acl_cache(inode); cred = nfsi->cache_access.cred; if (cred) put_rpccred(cred); @@ -161,11 +175,13 @@ nfs_clear_inode(struct inode *inode) void nfs_umount_begin(struct super_block *sb) { - struct nfs_server *server = NFS_SB(sb); - struct rpc_clnt *rpc; + struct rpc_clnt *rpc = NFS_SB(sb)->client; /* -EIO all pending I/O */ - if ((rpc = server->client) != NULL) + if (!IS_ERR(rpc)) + rpc_killall_tasks(rpc); + rpc = NFS_SB(sb)->client_acl; + if (!IS_ERR(rpc)) rpc_killall_tasks(rpc); } @@ -366,13 +382,15 @@ nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data) xprt = xprt_create_proto(tcp ? IPPROTO_TCP : IPPROTO_UDP, &server->addr, &timeparms); if (IS_ERR(xprt)) { - printk(KERN_WARNING "NFS: cannot create RPC transport.\n"); + dprintk("%s: cannot create RPC transport. Error = %ld\n", + __FUNCTION__, PTR_ERR(xprt)); return (struct rpc_clnt *)xprt; } clnt = rpc_create_client(xprt, server->hostname, &nfs_program, server->rpc_ops->version, data->pseudoflavor); if (IS_ERR(clnt)) { - printk(KERN_WARNING "NFS: cannot create RPC client.\n"); + dprintk("%s: cannot create RPC client. Error = %ld\n", + __FUNCTION__, PTR_ERR(xprt)); goto out_fail; } @@ -383,7 +401,6 @@ nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data) return clnt; out_fail: - xprt_destroy(xprt); return clnt; } @@ -427,21 +444,16 @@ nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data, int silent) /* Check NFS protocol revision and initialize RPC op vector * and file handle pool. */ - if (server->flags & NFS_MOUNT_VER3) { #ifdef CONFIG_NFS_V3 + if (server->flags & NFS_MOUNT_VER3) { server->rpc_ops = &nfs_v3_clientops; server->caps |= NFS_CAP_READDIRPLUS; - if (data->version < 4) { - printk(KERN_NOTICE "NFS: NFSv3 not supported by mount program.\n"); - return -EIO; - } -#else - printk(KERN_NOTICE "NFS: NFSv3 not supported.\n"); - return -EIO; -#endif } else { server->rpc_ops = &nfs_v2_clientops; } +#else + server->rpc_ops = &nfs_v2_clientops; +#endif /* Fill in pseudoflavor for mount version < 5 */ if (!(data->flags & NFS_MOUNT_SECFLAVOUR)) @@ -455,17 +467,34 @@ nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data, int silent) return PTR_ERR(server->client); /* RFC 2623, sec 2.3.2 */ if (authflavor != RPC_AUTH_UNIX) { + struct rpc_auth *auth; + server->client_sys = rpc_clone_client(server->client); if (IS_ERR(server->client_sys)) return PTR_ERR(server->client_sys); - if (!rpcauth_create(RPC_AUTH_UNIX, server->client_sys)) - return -ENOMEM; + auth = rpcauth_create(RPC_AUTH_UNIX, server->client_sys); + if (IS_ERR(auth)) + return PTR_ERR(auth); } else { atomic_inc(&server->client->cl_count); server->client_sys = server->client; } - if (server->flags & NFS_MOUNT_VER3) { +#ifdef CONFIG_NFS_V3_ACL + if (!(server->flags & NFS_MOUNT_NOACL)) { + server->client_acl = rpc_bind_new_program(server->client, &nfsacl_program, 3); + /* No errors! Assume that Sun nfsacls are supported */ + if (!IS_ERR(server->client_acl)) + server->caps |= NFS_CAP_ACLS; + } +#else + server->flags &= ~NFS_MOUNT_NOACL; +#endif /* CONFIG_NFS_V3_ACL */ + /* + * The VFS shouldn't apply the umask to mode bits. We will + * do so ourselves when necessary. + */ + sb->s_flags |= MS_POSIXACL; if (server->namelen == 0 || server->namelen > NFS3_MAXNAMLEN) server->namelen = NFS3_MAXNAMLEN; sb->s_time_gran = 1; @@ -549,6 +578,7 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt) { NFS_MOUNT_NOCTO, ",nocto", "" }, { NFS_MOUNT_NOAC, ",noac", "" }, { NFS_MOUNT_NONLM, ",nolock", ",lock" }, + { NFS_MOUNT_NOACL, ",noacl", "" }, { 0, NULL, NULL } }; struct proc_nfs_info *nfs_infop; @@ -590,9 +620,19 @@ nfs_zap_caches(struct inode *inode) memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode))); if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) - nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS; + nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE; else - nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS; + nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE; +} + +static void nfs_zap_acl_cache(struct inode *inode) +{ + void (*clear_acl_cache)(struct inode *); + + clear_acl_cache = NFS_PROTO(inode)->clear_acl_cache; + if (clear_acl_cache != NULL) + clear_acl_cache(inode); + NFS_I(inode)->flags &= ~NFS_INO_INVALID_ACL; } /* @@ -689,7 +729,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) /* Why so? Because we want revalidate for devices/FIFOs, and * that's precisely what we have in nfs_file_inode_operations. */ - inode->i_op = &nfs_file_inode_operations; + inode->i_op = NFS_SB(sb)->rpc_ops->file_inode_ops; if (S_ISREG(inode->i_mode)) { inode->i_fop = &nfs_file_operations; inode->i_data.a_ops = &nfs_file_aops; @@ -792,7 +832,7 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr) } } if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) - NFS_FLAGS(inode) |= NFS_INO_INVALID_ACCESS; + NFS_FLAGS(inode) |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; nfs_end_data_update(inode); unlock_kernel(); return error; @@ -851,7 +891,7 @@ struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, struct rp ctx->state = NULL; ctx->lockowner = current->files; ctx->error = 0; - init_waitqueue_head(&ctx->waitq); + ctx->dir_cookie = 0; } return ctx; } @@ -1015,6 +1055,7 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) goto out; } flags = nfsi->flags; + nfsi->flags &= ~NFS_INO_REVAL_PAGECACHE; /* * We may need to keep the attributes marked as invalid if * we raced with nfs_end_attr_update(). @@ -1022,21 +1063,9 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) if (verifier == nfsi->cache_change_attribute) nfsi->flags &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME); /* Do the page cache invalidation */ - if (flags & NFS_INO_INVALID_DATA) { - if (S_ISREG(inode->i_mode)) { - if (filemap_fdatawrite(inode->i_mapping) == 0) - filemap_fdatawait(inode->i_mapping); - nfs_wb_all(inode); - } - nfsi->flags &= ~NFS_INO_INVALID_DATA; - invalidate_inode_pages2(inode->i_mapping); - memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode))); - dfprintk(PAGECACHE, "NFS: (%s/%Ld) data cache invalidated\n", - inode->i_sb->s_id, - (long long)NFS_FILEID(inode)); - /* This ensures we revalidate dentries */ - nfsi->cache_change_attribute++; - } + nfs_revalidate_mapping(inode, inode->i_mapping); + if (flags & NFS_INO_INVALID_ACL) + nfs_zap_acl_cache(inode); dfprintk(PAGECACHE, "NFS: (%s/%Ld) revalidation complete\n", inode->i_sb->s_id, (long long)NFS_FILEID(inode)); @@ -1074,6 +1103,34 @@ int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) } /** + * nfs_revalidate_mapping - Revalidate the pagecache + * @inode - pointer to host inode + * @mapping - pointer to mapping + */ +void nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping) +{ + struct nfs_inode *nfsi = NFS_I(inode); + + if (nfsi->flags & NFS_INO_INVALID_DATA) { + if (S_ISREG(inode->i_mode)) { + if (filemap_fdatawrite(mapping) == 0) + filemap_fdatawait(mapping); + nfs_wb_all(inode); + } + invalidate_inode_pages2(mapping); + nfsi->flags &= ~NFS_INO_INVALID_DATA; + if (S_ISDIR(inode->i_mode)) { + memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf)); + /* This ensures we revalidate child dentries */ + nfsi->cache_change_attribute++; + } + dfprintk(PAGECACHE, "NFS: (%s/%Ld) data cache invalidated\n", + inode->i_sb->s_id, + (long long)NFS_FILEID(inode)); + } +} + +/** * nfs_begin_data_update * @inode - pointer to inode * Declare that a set of operations will update file data on the server @@ -1106,27 +1163,6 @@ void nfs_end_data_update(struct inode *inode) } /** - * nfs_end_data_update_defer - * @inode - pointer to inode - * Declare end of the operations that will update file data - * This will defer marking the inode as needing revalidation - * unless there are no other pending updates. - */ -void nfs_end_data_update_defer(struct inode *inode) -{ - struct nfs_inode *nfsi = NFS_I(inode); - - if (atomic_dec_and_test(&nfsi->data_updates)) { - /* Mark the attribute cache for revalidation */ - nfsi->flags |= NFS_INO_INVALID_ATTR; - /* Directories and symlinks: invalidate page cache too */ - if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) - nfsi->flags |= NFS_INO_INVALID_DATA; - nfsi->cache_change_attribute ++; - } -} - -/** * nfs_refresh_inode - verify consistency of the inode attribute cache * @inode - pointer to inode * @fattr - updated attributes @@ -1152,8 +1188,11 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) if ((fattr->valid & NFS_ATTR_PRE_CHANGE) != 0 && nfsi->change_attr == fattr->pre_change_attr) nfsi->change_attr = fattr->change_attr; - if (!data_unstable && nfsi->change_attr != fattr->change_attr) + if (nfsi->change_attr != fattr->change_attr) { nfsi->flags |= NFS_INO_INVALID_ATTR; + if (!data_unstable) + nfsi->flags |= NFS_INO_REVAL_PAGECACHE; + } } if ((fattr->valid & NFS_ATTR_FATTR) == 0) @@ -1176,18 +1215,22 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) } /* Verify a few of the more important attributes */ - if (!data_unstable) { - if (!timespec_equal(&inode->i_mtime, &fattr->mtime) - || cur_size != new_isize) - nfsi->flags |= NFS_INO_INVALID_ATTR; - } else if (S_ISREG(inode->i_mode) && new_isize > cur_size) - nfsi->flags |= NFS_INO_INVALID_ATTR; + if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) { + nfsi->flags |= NFS_INO_INVALID_ATTR; + if (!data_unstable) + nfsi->flags |= NFS_INO_REVAL_PAGECACHE; + } + if (cur_size != new_isize) { + nfsi->flags |= NFS_INO_INVALID_ATTR; + if (nfsi->npages == 0) + nfsi->flags |= NFS_INO_REVAL_PAGECACHE; + } /* Have any file permissions changed? */ if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) || inode->i_uid != fattr->uid || inode->i_gid != fattr->gid) - nfsi->flags |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS; + nfsi->flags |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL; /* Has the link count changed? */ if (inode->i_nlink != fattr->nlink) @@ -1215,10 +1258,8 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsigned long verifier) { struct nfs_inode *nfsi = NFS_I(inode); - __u64 new_size; - loff_t new_isize; + loff_t cur_isize, new_isize; unsigned int invalid = 0; - loff_t cur_isize; int data_unstable; dfprintk(VFS, "NFS: %s(%s/%ld ct=%d info=0x%x)\n", @@ -1251,61 +1292,56 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsign /* Are we racing with known updates of the metadata on the server? */ data_unstable = ! nfs_verify_change_attribute(inode, verifier); - /* Check if the file size agrees */ - new_size = fattr->size; + /* Check if our cached file size is stale */ new_isize = nfs_size_to_loff_t(fattr->size); cur_isize = i_size_read(inode); - if (cur_isize != new_size) { -#ifdef NFS_DEBUG_VERBOSE - printk(KERN_DEBUG "NFS: isize change on %s/%ld\n", inode->i_sb->s_id, inode->i_ino); -#endif - /* - * If we have pending writebacks, things can get - * messy. - */ - if (S_ISREG(inode->i_mode) && data_unstable) { - if (new_isize > cur_isize) { + if (new_isize != cur_isize) { + /* Do we perhaps have any outstanding writes? */ + if (nfsi->npages == 0) { + /* No, but did we race with nfs_end_data_update()? */ + if (verifier == nfsi->cache_change_attribute) { inode->i_size = new_isize; - invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; + invalid |= NFS_INO_INVALID_DATA; } - } else { + invalid |= NFS_INO_INVALID_ATTR; + } else if (new_isize > cur_isize) { inode->i_size = new_isize; invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; } + dprintk("NFS: isize change on server for file %s/%ld\n", + inode->i_sb->s_id, inode->i_ino); } - /* - * Note: we don't check inode->i_mtime since pipes etc. - * can change this value in VFS without requiring a - * cache revalidation. - */ + /* Check if the mtime agrees */ if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) { memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); -#ifdef NFS_DEBUG_VERBOSE - printk(KERN_DEBUG "NFS: mtime change on %s/%ld\n", inode->i_sb->s_id, inode->i_ino); -#endif + dprintk("NFS: mtime change on server for file %s/%ld\n", + inode->i_sb->s_id, inode->i_ino); if (!data_unstable) invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; } if ((fattr->valid & NFS_ATTR_FATTR_V4) && nfsi->change_attr != fattr->change_attr) { -#ifdef NFS_DEBUG_VERBOSE - printk(KERN_DEBUG "NFS: change_attr change on %s/%ld\n", + dprintk("NFS: change_attr change on server for file %s/%ld\n", inode->i_sb->s_id, inode->i_ino); -#endif nfsi->change_attr = fattr->change_attr; if (!data_unstable) - invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS; + invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; } - memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); + /* If ctime has changed we should definitely clear access+acl caches */ + if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) { + if (!data_unstable) + invalid |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; + memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); + } memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime)); if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) || inode->i_uid != fattr->uid || inode->i_gid != fattr->gid) - invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS; + invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; inode->i_mode = fattr->mode; inode->i_nlink = fattr->nlink; @@ -1385,74 +1421,95 @@ static struct super_block *nfs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data) { int error; - struct nfs_server *server; + struct nfs_server *server = NULL; struct super_block *s; struct nfs_fh *root; struct nfs_mount_data *data = raw_data; - if (!data) { - printk("nfs_read_super: missing data argument\n"); - return ERR_PTR(-EINVAL); + s = ERR_PTR(-EINVAL); + if (data == NULL) { + dprintk("%s: missing data argument\n", __FUNCTION__); + goto out_err; + } + if (data->version <= 0 || data->version > NFS_MOUNT_VERSION) { + dprintk("%s: bad mount version\n", __FUNCTION__); + goto out_err; } + switch (data->version) { + case 1: + data->namlen = 0; + case 2: + data->bsize = 0; + case 3: + if (data->flags & NFS_MOUNT_VER3) { + dprintk("%s: mount structure version %d does not support NFSv3\n", + __FUNCTION__, + data->version); + goto out_err; + } + data->root.size = NFS2_FHSIZE; + memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE); + case 4: + if (data->flags & NFS_MOUNT_SECFLAVOUR) { + dprintk("%s: mount structure version %d does not support strong security\n", + __FUNCTION__, + data->version); + goto out_err; + } + case 5: + memset(data->context, 0, sizeof(data->context)); + } +#ifndef CONFIG_NFS_V3 + /* If NFSv3 is not compiled in, return -EPROTONOSUPPORT */ + s = ERR_PTR(-EPROTONOSUPPORT); + if (data->flags & NFS_MOUNT_VER3) { + dprintk("%s: NFSv3 not compiled into kernel\n", __FUNCTION__); + goto out_err; + } +#endif /* CONFIG_NFS_V3 */ + s = ERR_PTR(-ENOMEM); server = kmalloc(sizeof(struct nfs_server), GFP_KERNEL); if (!server) - return ERR_PTR(-ENOMEM); + goto out_err; memset(server, 0, sizeof(struct nfs_server)); /* Zero out the NFS state stuff */ init_nfsv4_state(server); - - if (data->version != NFS_MOUNT_VERSION) { - printk("nfs warning: mount version %s than kernel\n", - data->version < NFS_MOUNT_VERSION ? "older" : "newer"); - if (data->version < 2) - data->namlen = 0; - if (data->version < 3) - data->bsize = 0; - if (data->version < 4) { - data->flags &= ~NFS_MOUNT_VER3; - data->root.size = NFS2_FHSIZE; - memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE); - } - if (data->version < 5) - data->flags &= ~NFS_MOUNT_SECFLAVOUR; - } + server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL); root = &server->fh; if (data->flags & NFS_MOUNT_VER3) root->size = data->root.size; else root->size = NFS2_FHSIZE; + s = ERR_PTR(-EINVAL); if (root->size > sizeof(root->data)) { - printk("nfs_get_sb: invalid root filehandle\n"); - kfree(server); - return ERR_PTR(-EINVAL); + dprintk("%s: invalid root filehandle\n", __FUNCTION__); + goto out_err; } memcpy(root->data, data->root.data, root->size); /* We now require that the mount process passes the remote address */ memcpy(&server->addr, &data->addr, sizeof(server->addr)); if (server->addr.sin_addr.s_addr == INADDR_ANY) { - printk("NFS: mount program didn't pass remote address!\n"); - kfree(server); - return ERR_PTR(-EINVAL); + dprintk("%s: mount program didn't pass remote address!\n", + __FUNCTION__); + goto out_err; } - s = sget(fs_type, nfs_compare_super, nfs_set_super, server); - - if (IS_ERR(s) || s->s_root) { - kfree(server); - return s; + /* Fire up rpciod if not yet running */ + s = ERR_PTR(rpciod_up()); + if (IS_ERR(s)) { + dprintk("%s: couldn't start rpciod! Error = %ld\n", + __FUNCTION__, PTR_ERR(s)); + goto out_err; } - s->s_flags = flags; + s = sget(fs_type, nfs_compare_super, nfs_set_super, server); + if (IS_ERR(s) || s->s_root) + goto out_rpciod_down; - /* Fire up rpciod if not yet running */ - if (rpciod_up() != 0) { - printk(KERN_WARNING "NFS: couldn't start rpciod!\n"); - kfree(server); - return ERR_PTR(-EIO); - } + s->s_flags = flags; error = nfs_fill_super(s, data, flags & MS_VERBOSE ? 1 : 0); if (error) { @@ -1462,6 +1519,11 @@ static struct super_block *nfs_get_sb(struct file_system_type *fs_type, } s->s_flags |= MS_ACTIVE; return s; +out_rpciod_down: + rpciod_down(); +out_err: + kfree(server); + return s; } static void nfs_kill_super(struct super_block *s) @@ -1470,10 +1532,12 @@ static void nfs_kill_super(struct super_block *s) kill_anon_super(s); - if (server->client != NULL && !IS_ERR(server->client)) + if (!IS_ERR(server->client)) rpc_shutdown_client(server->client); - if (server->client_sys != NULL && !IS_ERR(server->client_sys)) + if (!IS_ERR(server->client_sys)) rpc_shutdown_client(server->client_sys); + if (!IS_ERR(server->client_acl)) + rpc_shutdown_client(server->client_acl); if (!(server->flags & NFS_MOUNT_NONLM)) lockd_down(); /* release rpc.lockd */ @@ -1594,15 +1658,19 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data, clp = nfs4_get_client(&server->addr.sin_addr); if (!clp) { - printk(KERN_WARNING "NFS: failed to create NFS4 client.\n"); + dprintk("%s: failed to create NFS4 client.\n", __FUNCTION__); return -EIO; } /* Now create transport and client */ authflavour = RPC_AUTH_UNIX; if (data->auth_flavourlen != 0) { - if (data->auth_flavourlen > 1) - printk(KERN_INFO "NFS: cannot yet deal with multiple auth flavours.\n"); + if (data->auth_flavourlen != 1) { + dprintk("%s: Invalid number of RPC auth flavours %d.\n", + __FUNCTION__, data->auth_flavourlen); + err = -EINVAL; + goto out_fail; + } if (copy_from_user(&authflavour, data->auth_flavours, sizeof(authflavour))) { err = -EFAULT; goto out_fail; @@ -1610,21 +1678,22 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data, } down_write(&clp->cl_sem); - if (clp->cl_rpcclient == NULL) { + if (IS_ERR(clp->cl_rpcclient)) { xprt = xprt_create_proto(proto, &server->addr, &timeparms); if (IS_ERR(xprt)) { up_write(&clp->cl_sem); - printk(KERN_WARNING "NFS: cannot create RPC transport.\n"); err = PTR_ERR(xprt); + dprintk("%s: cannot create RPC transport. Error = %d\n", + __FUNCTION__, err); goto out_fail; } clnt = rpc_create_client(xprt, server->hostname, &nfs_program, server->rpc_ops->version, authflavour); if (IS_ERR(clnt)) { up_write(&clp->cl_sem); - printk(KERN_WARNING "NFS: cannot create RPC client.\n"); - xprt_destroy(xprt); err = PTR_ERR(clnt); + dprintk("%s: cannot create RPC client. Error = %d\n", + __FUNCTION__, err); goto out_fail; } clnt->cl_intr = 1; @@ -1656,21 +1725,26 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data, clp = NULL; if (IS_ERR(clnt)) { - printk(KERN_WARNING "NFS: cannot create RPC client.\n"); - return PTR_ERR(clnt); + err = PTR_ERR(clnt); + dprintk("%s: cannot create RPC client. Error = %d\n", + __FUNCTION__, err); + return err; } server->client = clnt; if (server->nfs4_state->cl_idmap == NULL) { - printk(KERN_WARNING "NFS: failed to create idmapper.\n"); + dprintk("%s: failed to create idmapper.\n", __FUNCTION__); return -ENOMEM; } if (clnt->cl_auth->au_flavor != authflavour) { - if (rpcauth_create(authflavour, clnt) == NULL) { - printk(KERN_WARNING "NFS: couldn't create credcache!\n"); - return -ENOMEM; + struct rpc_auth *auth; + + auth = rpcauth_create(authflavour, clnt); + if (IS_ERR(auth)) { + dprintk("%s: couldn't create credcache!\n", __FUNCTION__); + return PTR_ERR(auth); } } @@ -1730,8 +1804,12 @@ static struct super_block *nfs4_get_sb(struct file_system_type *fs_type, struct nfs4_mount_data *data = raw_data; void *p; - if (!data) { - printk("nfs_read_super: missing data argument\n"); + if (data == NULL) { + dprintk("%s: missing data argument\n", __FUNCTION__); + return ERR_PTR(-EINVAL); + } + if (data->version <= 0 || data->version > NFS4_MOUNT_VERSION) { + dprintk("%s: bad mount version\n", __FUNCTION__); return ERR_PTR(-EINVAL); } @@ -1741,11 +1819,7 @@ static struct super_block *nfs4_get_sb(struct file_system_type *fs_type, memset(server, 0, sizeof(struct nfs_server)); /* Zero out the NFS state stuff */ init_nfsv4_state(server); - - if (data->version != NFS4_MOUNT_VERSION) { - printk("nfs warning: mount version %s than kernel\n", - data->version < NFS4_MOUNT_VERSION ? "older" : "newer"); - } + server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL); p = nfs_copy_user_string(NULL, &data->hostname, 256); if (IS_ERR(p)) @@ -1773,11 +1847,20 @@ static struct super_block *nfs4_get_sb(struct file_system_type *fs_type, } if (server->addr.sin_family != AF_INET || server->addr.sin_addr.s_addr == INADDR_ANY) { - printk("NFS: mount program didn't pass remote IP address!\n"); + dprintk("%s: mount program didn't pass remote IP address!\n", + __FUNCTION__); s = ERR_PTR(-EINVAL); goto out_free; } + /* Fire up rpciod if not yet running */ + s = ERR_PTR(rpciod_up()); + if (IS_ERR(s)) { + dprintk("%s: couldn't start rpciod! Error = %ld\n", + __FUNCTION__, PTR_ERR(s)); + goto out_free; + } + s = sget(fs_type, nfs4_compare_super, nfs_set_super, server); if (IS_ERR(s) || s->s_root) @@ -1785,13 +1868,6 @@ static struct super_block *nfs4_get_sb(struct file_system_type *fs_type, s->s_flags = flags; - /* Fire up rpciod if not yet running */ - if (rpciod_up() != 0) { - printk(KERN_WARNING "NFS: couldn't start rpciod!\n"); - s = ERR_PTR(-EIO); - goto out_free; - } - error = nfs4_fill_super(s, data, flags & MS_VERBOSE ? 1 : 0); if (error) { up_write(&s->s_umount); @@ -1875,6 +1951,13 @@ static struct inode *nfs_alloc_inode(struct super_block *sb) if (!nfsi) return NULL; nfsi->flags = 0; +#ifdef CONFIG_NFS_V3_ACL + nfsi->acl_access = ERR_PTR(-EAGAIN); + nfsi->acl_default = ERR_PTR(-EAGAIN); +#endif +#ifdef CONFIG_NFS_V4 + nfsi->nfs4_acl = NULL; +#endif /* CONFIG_NFS_V4 */ return &nfsi->vfs_inode; } diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index 9d3ddad..0e82617 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c @@ -80,9 +80,7 @@ mnt_create(char *hostname, struct sockaddr_in *srvaddr, int version, clnt = rpc_create_client(xprt, hostname, &mnt_program, version, RPC_AUTH_UNIX); - if (IS_ERR(clnt)) { - xprt_destroy(xprt); - } else { + if (!IS_ERR(clnt)) { clnt->cl_softrtry = 1; clnt->cl_chatty = 1; clnt->cl_oneshot = 1; diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c new file mode 100644 index 0000000..ee3536f --- /dev/null +++ b/fs/nfs/nfs3acl.c @@ -0,0 +1,403 @@ +#include <linux/fs.h> +#include <linux/nfs.h> +#include <linux/nfs3.h> +#include <linux/nfs_fs.h> +#include <linux/xattr_acl.h> +#include <linux/nfsacl.h> + +#define NFSDBG_FACILITY NFSDBG_PROC + +ssize_t nfs3_listxattr(struct dentry *dentry, char *buffer, size_t size) +{ + struct inode *inode = dentry->d_inode; + struct posix_acl *acl; + int pos=0, len=0; + +# define output(s) do { \ + if (pos + sizeof(s) <= size) { \ + memcpy(buffer + pos, s, sizeof(s)); \ + pos += sizeof(s); \ + } \ + len += sizeof(s); \ + } while(0) + + acl = nfs3_proc_getacl(inode, ACL_TYPE_ACCESS); + if (IS_ERR(acl)) + return PTR_ERR(acl); + if (acl) { + output("system.posix_acl_access"); + posix_acl_release(acl); + } + + if (S_ISDIR(inode->i_mode)) { + acl = nfs3_proc_getacl(inode, ACL_TYPE_DEFAULT); + if (IS_ERR(acl)) + return PTR_ERR(acl); + if (acl) { + output("system.posix_acl_default"); + posix_acl_release(acl); + } + } + +# undef output + + if (!buffer || len <= size) + return len; + return -ERANGE; +} + +ssize_t nfs3_getxattr(struct dentry *dentry, const char *name, + void *buffer, size_t size) +{ + struct inode *inode = dentry->d_inode; + struct posix_acl *acl; + int type, error = 0; + + if (strcmp(name, XATTR_NAME_ACL_ACCESS) == 0) + type = ACL_TYPE_ACCESS; + else if (strcmp(name, XATTR_NAME_ACL_DEFAULT) == 0) + type = ACL_TYPE_DEFAULT; + else + return -EOPNOTSUPP; + + acl = nfs3_proc_getacl(inode, type); + if (IS_ERR(acl)) + return PTR_ERR(acl); + else if (acl) { + if (type == ACL_TYPE_ACCESS && acl->a_count == 0) + error = -ENODATA; + else + error = posix_acl_to_xattr(acl, buffer, size); + posix_acl_release(acl); + } else + error = -ENODATA; + + return error; +} + +int nfs3_setxattr(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags) +{ + struct inode *inode = dentry->d_inode; + struct posix_acl *acl; + int type, error; + + if (strcmp(name, XATTR_NAME_ACL_ACCESS) == 0) + type = ACL_TYPE_ACCESS; + else if (strcmp(name, XATTR_NAME_ACL_DEFAULT) == 0) + type = ACL_TYPE_DEFAULT; + else + return -EOPNOTSUPP; + + acl = posix_acl_from_xattr(value, size); + if (IS_ERR(acl)) + return PTR_ERR(acl); + error = nfs3_proc_setacl(inode, type, acl); + posix_acl_release(acl); + + return error; +} + +int nfs3_removexattr(struct dentry *dentry, const char *name) +{ + struct inode *inode = dentry->d_inode; + int type; + + if (strcmp(name, XATTR_NAME_ACL_ACCESS) == 0) + type = ACL_TYPE_ACCESS; + else if (strcmp(name, XATTR_NAME_ACL_DEFAULT) == 0) + type = ACL_TYPE_DEFAULT; + else + return -EOPNOTSUPP; + + return nfs3_proc_setacl(inode, type, NULL); +} + +static void __nfs3_forget_cached_acls(struct nfs_inode *nfsi) +{ + if (!IS_ERR(nfsi->acl_access)) { + posix_acl_release(nfsi->acl_access); + nfsi->acl_access = ERR_PTR(-EAGAIN); + } + if (!IS_ERR(nfsi->acl_default)) { + posix_acl_release(nfsi->acl_default); + nfsi->acl_default = ERR_PTR(-EAGAIN); + } +} + +void nfs3_forget_cached_acls(struct inode *inode) +{ + dprintk("NFS: nfs3_forget_cached_acls(%s/%ld)\n", inode->i_sb->s_id, + inode->i_ino); + spin_lock(&inode->i_lock); + __nfs3_forget_cached_acls(NFS_I(inode)); + spin_unlock(&inode->i_lock); +} + +static struct posix_acl *nfs3_get_cached_acl(struct inode *inode, int type) +{ + struct nfs_inode *nfsi = NFS_I(inode); + struct posix_acl *acl = ERR_PTR(-EINVAL); + + spin_lock(&inode->i_lock); + switch(type) { + case ACL_TYPE_ACCESS: + acl = nfsi->acl_access; + break; + + case ACL_TYPE_DEFAULT: + acl = nfsi->acl_default; + break; + + default: + goto out; + } + if (IS_ERR(acl)) + acl = ERR_PTR(-EAGAIN); + else + acl = posix_acl_dup(acl); +out: + spin_unlock(&inode->i_lock); + dprintk("NFS: nfs3_get_cached_acl(%s/%ld, %d) = %p\n", inode->i_sb->s_id, + inode->i_ino, type, acl); + return acl; +} + +static void nfs3_cache_acls(struct inode *inode, struct posix_acl *acl, + struct posix_acl *dfacl) +{ + struct nfs_inode *nfsi = NFS_I(inode); + + dprintk("nfs3_cache_acls(%s/%ld, %p, %p)\n", inode->i_sb->s_id, + inode->i_ino, acl, dfacl); + spin_lock(&inode->i_lock); + __nfs3_forget_cached_acls(NFS_I(inode)); + nfsi->acl_access = posix_acl_dup(acl); + nfsi->acl_default = posix_acl_dup(dfacl); + spin_unlock(&inode->i_lock); +} + +struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type) +{ + struct nfs_server *server = NFS_SERVER(inode); + struct nfs_fattr fattr; + struct page *pages[NFSACL_MAXPAGES] = { }; + struct nfs3_getaclargs args = { + .fh = NFS_FH(inode), + /* The xdr layer may allocate pages here. */ + .pages = pages, + }; + struct nfs3_getaclres res = { + .fattr = &fattr, + }; + struct posix_acl *acl; + int status, count; + + if (!nfs_server_capable(inode, NFS_CAP_ACLS)) + return ERR_PTR(-EOPNOTSUPP); + + status = nfs_revalidate_inode(server, inode); + if (status < 0) + return ERR_PTR(status); + acl = nfs3_get_cached_acl(inode, type); + if (acl != ERR_PTR(-EAGAIN)) + return acl; + acl = NULL; + + /* + * Only get the access acl when explicitly requested: We don't + * need it for access decisions, and only some applications use + * it. Applications which request the access acl first are not + * penalized from this optimization. + */ + if (type == ACL_TYPE_ACCESS) + args.mask |= NFS_ACLCNT|NFS_ACL; + if (S_ISDIR(inode->i_mode)) + args.mask |= NFS_DFACLCNT|NFS_DFACL; + if (args.mask == 0) + return NULL; + + dprintk("NFS call getacl\n"); + status = rpc_call(server->client_acl, ACLPROC3_GETACL, + &args, &res, 0); + dprintk("NFS reply getacl: %d\n", status); + + /* pages may have been allocated at the xdr layer. */ + for (count = 0; count < NFSACL_MAXPAGES && args.pages[count]; count++) + __free_page(args.pages[count]); + + switch (status) { + case 0: + status = nfs_refresh_inode(inode, &fattr); + break; + case -EPFNOSUPPORT: + case -EPROTONOSUPPORT: + dprintk("NFS_V3_ACL extension not supported; disabling\n"); + server->caps &= ~NFS_CAP_ACLS; + case -ENOTSUPP: + status = -EOPNOTSUPP; + default: + goto getout; + } + if ((args.mask & res.mask) != args.mask) { + status = -EIO; + goto getout; + } + + if (res.acl_access != NULL) { + if (posix_acl_equiv_mode(res.acl_access, NULL) == 0) { + posix_acl_release(res.acl_access); + res.acl_access = NULL; + } + } + nfs3_cache_acls(inode, res.acl_access, res.acl_default); + + switch(type) { + case ACL_TYPE_ACCESS: + acl = res.acl_access; + res.acl_access = NULL; + break; + + case ACL_TYPE_DEFAULT: + acl = res.acl_default; + res.acl_default = NULL; + } + +getout: + posix_acl_release(res.acl_access); + posix_acl_release(res.acl_default); + + if (status != 0) { + posix_acl_release(acl); + acl = ERR_PTR(status); + } + return acl; +} + +static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl, + struct posix_acl *dfacl) +{ + struct nfs_server *server = NFS_SERVER(inode); + struct nfs_fattr fattr; + struct page *pages[NFSACL_MAXPAGES] = { }; + struct nfs3_setaclargs args = { + .inode = inode, + .mask = NFS_ACL, + .acl_access = acl, + .pages = pages, + }; + int status, count; + + status = -EOPNOTSUPP; + if (!nfs_server_capable(inode, NFS_CAP_ACLS)) + goto out; + + /* We are doing this here, because XDR marshalling can only + return -ENOMEM. */ + status = -ENOSPC; + if (acl != NULL && acl->a_count > NFS_ACL_MAX_ENTRIES) + goto out; + if (dfacl != NULL && dfacl->a_count > NFS_ACL_MAX_ENTRIES) + goto out; + if (S_ISDIR(inode->i_mode)) { + args.mask |= NFS_DFACL; + args.acl_default = dfacl; + } + + dprintk("NFS call setacl\n"); + nfs_begin_data_update(inode); + status = rpc_call(server->client_acl, ACLPROC3_SETACL, + &args, &fattr, 0); + NFS_FLAGS(inode) |= NFS_INO_INVALID_ACCESS; + nfs_end_data_update(inode); + dprintk("NFS reply setacl: %d\n", status); + + /* pages may have been allocated at the xdr layer. */ + for (count = 0; count < NFSACL_MAXPAGES && args.pages[count]; count++) + __free_page(args.pages[count]); + + switch (status) { + case 0: + status = nfs_refresh_inode(inode, &fattr); + break; + case -EPFNOSUPPORT: + case -EPROTONOSUPPORT: + dprintk("NFS_V3_ACL SETACL RPC not supported" + "(will not retry)\n"); + server->caps &= ~NFS_CAP_ACLS; + case -ENOTSUPP: + status = -EOPNOTSUPP; + } +out: + return status; +} + +int nfs3_proc_setacl(struct inode *inode, int type, struct posix_acl *acl) +{ + struct posix_acl *alloc = NULL, *dfacl = NULL; + int status; + + if (S_ISDIR(inode->i_mode)) { + switch(type) { + case ACL_TYPE_ACCESS: + alloc = dfacl = nfs3_proc_getacl(inode, + ACL_TYPE_DEFAULT); + if (IS_ERR(alloc)) + goto fail; + break; + + case ACL_TYPE_DEFAULT: + dfacl = acl; + alloc = acl = nfs3_proc_getacl(inode, + ACL_TYPE_ACCESS); + if (IS_ERR(alloc)) + goto fail; + break; + + default: + return -EINVAL; + } + } else if (type != ACL_TYPE_ACCESS) + return -EINVAL; + + if (acl == NULL) { + alloc = acl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL); + if (IS_ERR(alloc)) + goto fail; + } + status = nfs3_proc_setacls(inode, acl, dfacl); + posix_acl_release(alloc); + return status; + +fail: + return PTR_ERR(alloc); +} + +int nfs3_proc_set_default_acl(struct inode *dir, struct inode *inode, + mode_t mode) +{ + struct posix_acl *dfacl, *acl; + int error = 0; + + dfacl = nfs3_proc_getacl(dir, ACL_TYPE_DEFAULT); + if (IS_ERR(dfacl)) { + error = PTR_ERR(dfacl); + return (error == -EOPNOTSUPP) ? 0 : error; + } + if (!dfacl) + return 0; + acl = posix_acl_clone(dfacl, GFP_KERNEL); + error = -ENOMEM; + if (!acl) + goto out_release_dfacl; + error = posix_acl_create_masq(acl, &mode); + if (error < 0) + goto out_release_acl; + error = nfs3_proc_setacls(inode, acl, S_ISDIR(inode->i_mode) ? + dfacl : NULL); +out_release_acl: + posix_acl_release(acl); +out_release_dfacl: + posix_acl_release(dfacl); + return error; +} diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 3878494d..7851569 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -17,6 +17,7 @@ #include <linux/nfs_page.h> #include <linux/lockd/bind.h> #include <linux/smp_lock.h> +#include <linux/nfs_mount.h> #define NFSDBG_FACILITY NFSDBG_PROC @@ -45,7 +46,7 @@ static inline int nfs3_rpc_call_wrapper(struct rpc_clnt *clnt, u32 proc, void *argp, void *resp, int flags) { struct rpc_message msg = { - .rpc_proc = &nfs3_procedures[proc], + .rpc_proc = &clnt->cl_procinfo[proc], .rpc_argp = argp, .rpc_resp = resp, }; @@ -313,7 +314,8 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, .fh = &fhandle, .fattr = &fattr }; - int status; + mode_t mode = sattr->ia_mode; + int status; dprintk("NFS call create %s\n", dentry->d_name.name); arg.createmode = NFS3_CREATE_UNCHECKED; @@ -323,6 +325,8 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, arg.verifier[1] = current->pid; } + sattr->ia_mode &= ~current->fs->umask; + again: dir_attr.valid = 0; fattr.valid = 0; @@ -369,6 +373,9 @@ again: nfs_refresh_inode(dentry->d_inode, &fattr); dprintk("NFS reply setattr (post-create): %d\n", status); } + if (status != 0) + goto out; + status = nfs3_proc_set_default_acl(dir, dentry->d_inode, mode); out: dprintk("NFS reply create: %d\n", status); return status; @@ -538,15 +545,24 @@ nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr) .fh = &fhandle, .fattr = &fattr }; - int status; + int mode = sattr->ia_mode; + int status; dprintk("NFS call mkdir %s\n", dentry->d_name.name); dir_attr.valid = 0; fattr.valid = 0; + + sattr->ia_mode &= ~current->fs->umask; + status = rpc_call(NFS_CLIENT(dir), NFS3PROC_MKDIR, &arg, &res, 0); nfs_refresh_inode(dir, &dir_attr); - if (status == 0) - status = nfs_instantiate(dentry, &fhandle, &fattr); + if (status != 0) + goto out; + status = nfs_instantiate(dentry, &fhandle, &fattr); + if (status != 0) + goto out; + status = nfs3_proc_set_default_acl(dir, dentry->d_inode, mode); +out: dprintk("NFS reply mkdir: %d\n", status); return status; } @@ -641,6 +657,7 @@ nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr, .fh = &fh, .fattr = &fattr }; + mode_t mode = sattr->ia_mode; int status; switch (sattr->ia_mode & S_IFMT) { @@ -653,12 +670,20 @@ nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr, dprintk("NFS call mknod %s %u:%u\n", dentry->d_name.name, MAJOR(rdev), MINOR(rdev)); + + sattr->ia_mode &= ~current->fs->umask; + dir_attr.valid = 0; fattr.valid = 0; status = rpc_call(NFS_CLIENT(dir), NFS3PROC_MKNOD, &arg, &res, 0); nfs_refresh_inode(dir, &dir_attr); - if (status == 0) - status = nfs_instantiate(dentry, &fh, &fattr); + if (status != 0) + goto out; + status = nfs_instantiate(dentry, &fh, &fattr); + if (status != 0) + goto out; + status = nfs3_proc_set_default_acl(dir, dentry->d_inode, mode); +out: dprintk("NFS reply mknod: %d\n", status); return status; } @@ -825,7 +850,8 @@ nfs3_proc_lock(struct file *filp, int cmd, struct file_lock *fl) struct nfs_rpc_ops nfs_v3_clientops = { .version = 3, /* protocol version */ .dentry_ops = &nfs_dentry_operations, - .dir_inode_ops = &nfs_dir_inode_operations, + .dir_inode_ops = &nfs3_dir_inode_operations, + .file_inode_ops = &nfs3_file_inode_operations, .getroot = nfs3_proc_get_root, .getattr = nfs3_proc_getattr, .setattr = nfs3_proc_setattr, @@ -856,4 +882,5 @@ struct nfs_rpc_ops nfs_v3_clientops = { .file_open = nfs_open, .file_release = nfs_release, .lock = nfs3_proc_lock, + .clear_acl_cache = nfs3_forget_cached_acls, }; diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index a3593d4..db4a904 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -21,6 +21,7 @@ #include <linux/nfs.h> #include <linux/nfs3.h> #include <linux/nfs_fs.h> +#include <linux/nfsacl.h> #define NFSDBG_FACILITY NFSDBG_XDR @@ -79,6 +80,11 @@ extern int nfs_stat_to_errno(int); #define NFS3_pathconfres_sz (1+NFS3_post_op_attr_sz+6) #define NFS3_commitres_sz (1+NFS3_wcc_data_sz+2) +#define ACL3_getaclargs_sz (NFS3_fh_sz+1) +#define ACL3_setaclargs_sz (NFS3_fh_sz+1+2*(2+5*3)) +#define ACL3_getaclres_sz (1+NFS3_post_op_attr_sz+1+2*(2+5*3)) +#define ACL3_setaclres_sz (1+NFS3_post_op_attr_sz) + /* * Map file type to S_IFMT bits */ @@ -627,6 +633,74 @@ nfs3_xdr_commitargs(struct rpc_rqst *req, u32 *p, struct nfs_writeargs *args) return 0; } +#ifdef CONFIG_NFS_V3_ACL +/* + * Encode GETACL arguments + */ +static int +nfs3_xdr_getaclargs(struct rpc_rqst *req, u32 *p, + struct nfs3_getaclargs *args) +{ + struct rpc_auth *auth = req->rq_task->tk_auth; + unsigned int replen; + + p = xdr_encode_fhandle(p, args->fh); + *p++ = htonl(args->mask); + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + + if (args->mask & (NFS_ACL | NFS_DFACL)) { + /* Inline the page array */ + replen = (RPC_REPHDRSIZE + auth->au_rslack + + ACL3_getaclres_sz) << 2; + xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, 0, + NFSACL_MAXPAGES << PAGE_SHIFT); + } + return 0; +} + +/* + * Encode SETACL arguments + */ +static int +nfs3_xdr_setaclargs(struct rpc_rqst *req, u32 *p, + struct nfs3_setaclargs *args) +{ + struct xdr_buf *buf = &req->rq_snd_buf; + unsigned int base, len_in_head, len = nfsacl_size( + (args->mask & NFS_ACL) ? args->acl_access : NULL, + (args->mask & NFS_DFACL) ? args->acl_default : NULL); + int count, err; + + p = xdr_encode_fhandle(p, NFS_FH(args->inode)); + *p++ = htonl(args->mask); + base = (char *)p - (char *)buf->head->iov_base; + /* put as much of the acls into head as possible. */ + len_in_head = min_t(unsigned int, buf->head->iov_len - base, len); + len -= len_in_head; + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p + (len_in_head >> 2)); + + for (count = 0; (count << PAGE_SHIFT) < len; count++) { + args->pages[count] = alloc_page(GFP_KERNEL); + if (!args->pages[count]) { + while (count) + __free_page(args->pages[--count]); + return -ENOMEM; + } + } + xdr_encode_pages(buf, args->pages, 0, len); + + err = nfsacl_encode(buf, base, args->inode, + (args->mask & NFS_ACL) ? + args->acl_access : NULL, 1, 0); + if (err > 0) + err = nfsacl_encode(buf, base + err, args->inode, + (args->mask & NFS_DFACL) ? + args->acl_default : NULL, 1, + NFS_ACL_DEFAULT); + return (err > 0) ? 0 : err; +} +#endif /* CONFIG_NFS_V3_ACL */ + /* * NFS XDR decode functions */ @@ -978,6 +1052,54 @@ nfs3_xdr_commitres(struct rpc_rqst *req, u32 *p, struct nfs_writeres *res) return 0; } +#ifdef CONFIG_NFS_V3_ACL +/* + * Decode GETACL reply + */ +static int +nfs3_xdr_getaclres(struct rpc_rqst *req, u32 *p, + struct nfs3_getaclres *res) +{ + struct xdr_buf *buf = &req->rq_rcv_buf; + int status = ntohl(*p++); + struct posix_acl **acl; + unsigned int *aclcnt; + int err, base; + + if (status != 0) + return -nfs_stat_to_errno(status); + p = xdr_decode_post_op_attr(p, res->fattr); + res->mask = ntohl(*p++); + if (res->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT)) + return -EINVAL; + base = (char *)p - (char *)req->rq_rcv_buf.head->iov_base; + + acl = (res->mask & NFS_ACL) ? &res->acl_access : NULL; + aclcnt = (res->mask & NFS_ACLCNT) ? &res->acl_access_count : NULL; + err = nfsacl_decode(buf, base, aclcnt, acl); + + acl = (res->mask & NFS_DFACL) ? &res->acl_default : NULL; + aclcnt = (res->mask & NFS_DFACLCNT) ? &res->acl_default_count : NULL; + if (err > 0) + err = nfsacl_decode(buf, base + err, aclcnt, acl); + return (err > 0) ? 0 : err; +} + +/* + * Decode setacl reply. + */ +static int +nfs3_xdr_setaclres(struct rpc_rqst *req, u32 *p, struct nfs_fattr *fattr) +{ + int status = ntohl(*p++); + + if (status) + return -nfs_stat_to_errno(status); + xdr_decode_post_op_attr(p, fattr); + return 0; +} +#endif /* CONFIG_NFS_V3_ACL */ + #ifndef MAX # define MAX(a, b) (((a) > (b))? (a) : (b)) #endif @@ -1021,3 +1143,28 @@ struct rpc_version nfs_version3 = { .procs = nfs3_procedures }; +#ifdef CONFIG_NFS_V3_ACL +static struct rpc_procinfo nfs3_acl_procedures[] = { + [ACLPROC3_GETACL] = { + .p_proc = ACLPROC3_GETACL, + .p_encode = (kxdrproc_t) nfs3_xdr_getaclargs, + .p_decode = (kxdrproc_t) nfs3_xdr_getaclres, + .p_bufsiz = MAX(ACL3_getaclargs_sz, ACL3_getaclres_sz) << 2, + .p_timer = 1, + }, + [ACLPROC3_SETACL] = { + .p_proc = ACLPROC3_SETACL, + .p_encode = (kxdrproc_t) nfs3_xdr_setaclargs, + .p_decode = (kxdrproc_t) nfs3_xdr_setaclres, + .p_bufsiz = MAX(ACL3_setaclargs_sz, ACL3_setaclres_sz) << 2, + .p_timer = 0, + }, +}; + +struct rpc_version nfsacl_version3 = { + .number = 3, + .nrprocs = sizeof(nfs3_acl_procedures)/ + sizeof(nfs3_acl_procedures[0]), + .procs = nfs3_acl_procedures, +}; +#endif /* CONFIG_NFS_V3_ACL */ diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h new file mode 100644 index 0000000..ec1a22d --- /dev/null +++ b/fs/nfs/nfs4_fs.h @@ -0,0 +1,253 @@ +/* + * linux/fs/nfs/nfs4_fs.h + * + * Copyright (C) 2005 Trond Myklebust + * + * NFSv4-specific filesystem definitions and declarations + */ + +#ifndef __LINUX_FS_NFS_NFS4_FS_H +#define __LINUX_FS_NFS_NFS4_FS_H + +#ifdef CONFIG_NFS_V4 + +struct idmap; + +/* + * In a seqid-mutating op, this macro controls which error return + * values trigger incrementation of the seqid. + * + * from rfc 3010: + * The client MUST monotonically increment the sequence number for the + * CLOSE, LOCK, LOCKU, OPEN, OPEN_CONFIRM, and OPEN_DOWNGRADE + * operations. This is true even in the event that the previous + * operation that used the sequence number received an error. The only + * exception to this rule is if the previous operation received one of + * the following errors: NFSERR_STALE_CLIENTID, NFSERR_STALE_STATEID, + * NFSERR_BAD_STATEID, NFSERR_BAD_SEQID, NFSERR_BADXDR, + * NFSERR_RESOURCE, NFSERR_NOFILEHANDLE. + * + */ +#define seqid_mutating_err(err) \ +(((err) != NFSERR_STALE_CLIENTID) && \ + ((err) != NFSERR_STALE_STATEID) && \ + ((err) != NFSERR_BAD_STATEID) && \ + ((err) != NFSERR_BAD_SEQID) && \ + ((err) != NFSERR_BAD_XDR) && \ + ((err) != NFSERR_RESOURCE) && \ + ((err) != NFSERR_NOFILEHANDLE)) + +enum nfs4_client_state { + NFS4CLNT_OK = 0, +}; + +/* + * The nfs4_client identifies our client state to the server. + */ +struct nfs4_client { + struct list_head cl_servers; /* Global list of servers */ + struct in_addr cl_addr; /* Server identifier */ + u64 cl_clientid; /* constant */ + nfs4_verifier cl_confirm; + unsigned long cl_state; + + u32 cl_lockowner_id; + + /* + * The following rwsem ensures exclusive access to the server + * while we recover the state following a lease expiration. + */ + struct rw_semaphore cl_sem; + + struct list_head cl_delegations; + struct list_head cl_state_owners; + struct list_head cl_unused; + int cl_nunused; + spinlock_t cl_lock; + atomic_t cl_count; + + struct rpc_clnt * cl_rpcclient; + struct rpc_cred * cl_cred; + + struct list_head cl_superblocks; /* List of nfs_server structs */ + + unsigned long cl_lease_time; + unsigned long cl_last_renewal; + struct work_struct cl_renewd; + struct work_struct cl_recoverd; + + wait_queue_head_t cl_waitq; + struct rpc_wait_queue cl_rpcwaitq; + + /* used for the setclientid verifier */ + struct timespec cl_boot_time; + + /* idmapper */ + struct idmap * cl_idmap; + + /* Our own IP address, as a null-terminated string. + * This is used to generate the clientid, and the callback address. + */ + char cl_ipaddr[16]; + unsigned char cl_id_uniquifier; +}; + +/* + * NFS4 state_owners and lock_owners are simply labels for ordered + * sequences of RPC calls. Their sole purpose is to provide once-only + * semantics by allowing the server to identify replayed requests. + * + * The ->so_sema is held during all state_owner seqid-mutating operations: + * OPEN, OPEN_DOWNGRADE, and CLOSE. Its purpose is to properly serialize + * so_seqid. + */ +struct nfs4_state_owner { + struct list_head so_list; /* per-clientid list of state_owners */ + struct nfs4_client *so_client; + u32 so_id; /* 32-bit identifier, unique */ + struct semaphore so_sema; + u32 so_seqid; /* protected by so_sema */ + atomic_t so_count; + + struct rpc_cred *so_cred; /* Associated cred */ + struct list_head so_states; + struct list_head so_delegations; +}; + +/* + * struct nfs4_state maintains the client-side state for a given + * (state_owner,inode) tuple (OPEN) or state_owner (LOCK). + * + * OPEN: + * In order to know when to OPEN_DOWNGRADE or CLOSE the state on the server, + * we need to know how many files are open for reading or writing on a + * given inode. This information too is stored here. + * + * LOCK: one nfs4_state (LOCK) to hold the lock stateid nfs4_state(OPEN) + */ + +struct nfs4_lock_state { + struct list_head ls_locks; /* Other lock stateids */ + struct nfs4_state * ls_state; /* Pointer to open state */ + fl_owner_t ls_owner; /* POSIX lock owner */ +#define NFS_LOCK_INITIALIZED 1 + int ls_flags; + u32 ls_seqid; + u32 ls_id; + nfs4_stateid ls_stateid; + atomic_t ls_count; +}; + +/* bits for nfs4_state->flags */ +enum { + LK_STATE_IN_USE, + NFS_DELEGATED_STATE, +}; + +struct nfs4_state { + struct list_head open_states; /* List of states for the same state_owner */ + struct list_head inode_states; /* List of states for the same inode */ + struct list_head lock_states; /* List of subservient lock stateids */ + + struct nfs4_state_owner *owner; /* Pointer to the open owner */ + struct inode *inode; /* Pointer to the inode */ + + unsigned long flags; /* Do we hold any locks? */ + struct semaphore lock_sema; /* Serializes file locking operations */ + spinlock_t state_lock; /* Protects the lock_states list */ + + nfs4_stateid stateid; + + unsigned int nreaders; + unsigned int nwriters; + int state; /* State on the server (R,W, or RW) */ + atomic_t count; +}; + + +struct nfs4_exception { + long timeout; + int retry; +}; + +struct nfs4_state_recovery_ops { + int (*recover_open)(struct nfs4_state_owner *, struct nfs4_state *); + int (*recover_lock)(struct nfs4_state *, struct file_lock *); +}; + +extern struct dentry_operations nfs4_dentry_operations; +extern struct inode_operations nfs4_dir_inode_operations; + +/* inode.c */ +extern ssize_t nfs4_getxattr(struct dentry *, const char *, void *, size_t); +extern int nfs4_setxattr(struct dentry *, const char *, const void *, size_t, int); +extern ssize_t nfs4_listxattr(struct dentry *, char *, size_t); + + +/* nfs4proc.c */ +extern int nfs4_map_errors(int err); +extern int nfs4_proc_setclientid(struct nfs4_client *, u32, unsigned short); +extern int nfs4_proc_setclientid_confirm(struct nfs4_client *); +extern int nfs4_proc_async_renew(struct nfs4_client *); +extern int nfs4_proc_renew(struct nfs4_client *); +extern int nfs4_do_close(struct inode *inode, struct nfs4_state *state, mode_t mode); +extern struct inode *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *); +extern int nfs4_open_revalidate(struct inode *, struct dentry *, int); + +extern struct nfs4_state_recovery_ops nfs4_reboot_recovery_ops; +extern struct nfs4_state_recovery_ops nfs4_network_partition_recovery_ops; + +extern const u32 nfs4_fattr_bitmap[2]; +extern const u32 nfs4_statfs_bitmap[2]; +extern const u32 nfs4_pathconf_bitmap[2]; +extern const u32 nfs4_fsinfo_bitmap[2]; + +/* nfs4renewd.c */ +extern void nfs4_schedule_state_renewal(struct nfs4_client *); +extern void nfs4_renewd_prepare_shutdown(struct nfs_server *); +extern void nfs4_kill_renewd(struct nfs4_client *); +extern void nfs4_renew_state(void *); + +/* nfs4state.c */ +extern void init_nfsv4_state(struct nfs_server *); +extern void destroy_nfsv4_state(struct nfs_server *); +extern struct nfs4_client *nfs4_get_client(struct in_addr *); +extern void nfs4_put_client(struct nfs4_client *clp); +extern int nfs4_init_client(struct nfs4_client *clp); +extern struct nfs4_client *nfs4_find_client(struct in_addr *); +extern u32 nfs4_alloc_lockowner_id(struct nfs4_client *); + +extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *); +extern void nfs4_put_state_owner(struct nfs4_state_owner *); +extern void nfs4_drop_state_owner(struct nfs4_state_owner *); +extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *); +extern void nfs4_put_open_state(struct nfs4_state *); +extern void nfs4_close_state(struct nfs4_state *, mode_t); +extern struct nfs4_state *nfs4_find_state(struct inode *, struct rpc_cred *, mode_t mode); +extern void nfs4_increment_seqid(int status, struct nfs4_state_owner *sp); +extern void nfs4_schedule_state_recovery(struct nfs4_client *); +extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl); +extern void nfs4_increment_lock_seqid(int status, struct nfs4_lock_state *ls); +extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t); + +extern const nfs4_stateid zero_stateid; + +/* nfs4xdr.c */ +extern uint32_t *nfs4_decode_dirent(uint32_t *p, struct nfs_entry *entry, int plus); +extern struct rpc_procinfo nfs4_procedures[]; + +struct nfs4_mount_data; + +/* callback_xdr.c */ +extern struct svc_version nfs4_callback_version1; + +#else + +#define init_nfsv4_state(server) do { } while (0) +#define destroy_nfsv4_state(server) do { } while (0) +#define nfs4_put_state_owner(inode, owner) do { } while (0) +#define nfs4_put_open_state(state) do { } while (0) +#define nfs4_close_state(a, b) do { } while (0) + +#endif /* CONFIG_NFS_V4 */ +#endif /* __LINUX_FS_NFS_NFS4_FS.H */ diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 1d5cb3e..1b76f80 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -48,6 +48,7 @@ #include <linux/smp_lock.h> #include <linux/namei.h> +#include "nfs4_fs.h" #include "delegation.h" #define NFSDBG_FACILITY NFSDBG_PROC @@ -62,8 +63,6 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc extern u32 *nfs4_decode_dirent(u32 *p, struct nfs_entry *entry, int plus); extern struct rpc_procinfo nfs4_procedures[]; -extern nfs4_stateid zero_stateid; - /* Prevent leaks of NFSv4 errors into userland */ int nfs4_map_errors(int err) { @@ -104,7 +103,7 @@ const u32 nfs4_statfs_bitmap[2] = { | FATTR4_WORD1_SPACE_TOTAL }; -u32 nfs4_pathconf_bitmap[2] = { +const u32 nfs4_pathconf_bitmap[2] = { FATTR4_WORD0_MAXLINK | FATTR4_WORD0_MAXNAME, 0 @@ -124,7 +123,7 @@ static void nfs4_setup_readdir(u64 cookie, u32 *verifier, struct dentry *dentry, BUG_ON(readdir->count < 80); if (cookie > 2) { - readdir->cookie = (cookie > 2) ? cookie : 0; + readdir->cookie = cookie; memcpy(&readdir->verifier, verifier, sizeof(readdir->verifier)); return; } @@ -270,14 +269,9 @@ static int nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *sta int err; do { err = _nfs4_open_reclaim(sp, state); - switch (err) { - case 0: - case -NFS4ERR_STALE_CLIENTID: - case -NFS4ERR_STALE_STATEID: - case -NFS4ERR_EXPIRED: - return err; - } - err = nfs4_handle_exception(server, err, &exception); + if (err != -NFS4ERR_DELAY) + break; + nfs4_handle_exception(server, err, &exception); } while (exception.retry); return err; } @@ -509,6 +503,20 @@ out_stale: goto out_nodeleg; } +static inline int nfs4_do_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry) +{ + struct nfs_server *server = NFS_SERVER(dentry->d_inode); + struct nfs4_exception exception = { }; + int err; + + do { + err = _nfs4_open_expired(sp, state, dentry); + if (err == -NFS4ERR_DELAY) + nfs4_handle_exception(server, err, &exception); + } while (exception.retry); + return err; +} + static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state) { struct nfs_inode *nfsi = NFS_I(state->inode); @@ -521,7 +529,7 @@ static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *sta continue; get_nfs_open_context(ctx); spin_unlock(&state->inode->i_lock); - status = _nfs4_open_expired(sp, state, ctx->dentry); + status = nfs4_do_open_expired(sp, state, ctx->dentry); put_nfs_open_context(ctx); return status; } @@ -748,11 +756,10 @@ static int _nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr, fattr->valid = 0; - if (state != NULL) + if (state != NULL) { msg.rpc_cred = state->owner->so_cred; - if (sattr->ia_valid & ATTR_SIZE) - nfs4_copy_stateid(&arg.stateid, state, NULL); - else + nfs4_copy_stateid(&arg.stateid, state, current->files); + } else memcpy(&arg.stateid, &zero_stateid, sizeof(arg.stateid)); return rpc_call_sync(server->client, &msg, 0); @@ -1116,47 +1123,31 @@ static int nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, struct iattr *sattr) { - struct inode * inode = dentry->d_inode; - int size_change = sattr->ia_valid & ATTR_SIZE; - struct nfs4_state *state = NULL; - int need_iput = 0; + struct rpc_cred *cred; + struct inode *inode = dentry->d_inode; + struct nfs4_state *state; int status; fattr->valid = 0; - if (size_change) { - struct rpc_cred *cred = rpcauth_lookupcred(NFS_SERVER(inode)->client->cl_auth, 0); - if (IS_ERR(cred)) - return PTR_ERR(cred); + cred = rpcauth_lookupcred(NFS_SERVER(inode)->client->cl_auth, 0); + if (IS_ERR(cred)) + return PTR_ERR(cred); + /* Search for an existing WRITE delegation first */ + state = nfs4_open_delegated(inode, FMODE_WRITE, cred); + if (!IS_ERR(state)) { + /* NB: nfs4_open_delegated() bumps the inode->i_count */ + iput(inode); + } else { + /* Search for an existing open(O_WRITE) stateid */ state = nfs4_find_state(inode, cred, FMODE_WRITE); - if (state == NULL) { - state = nfs4_open_delegated(dentry->d_inode, - FMODE_WRITE, cred); - if (IS_ERR(state)) - state = nfs4_do_open(dentry->d_parent->d_inode, - dentry, FMODE_WRITE, - NULL, cred); - need_iput = 1; - } - put_rpccred(cred); - if (IS_ERR(state)) - return PTR_ERR(state); - - if (state->inode != inode) { - printk(KERN_WARNING "nfs: raced in setattr (%p != %p), returning -EIO\n", inode, state->inode); - status = -EIO; - goto out; - } } + status = nfs4_do_setattr(NFS_SERVER(inode), fattr, NFS_FH(inode), sattr, state); -out: - if (state) { - inode = state->inode; + if (state != NULL) nfs4_close_state(state, FMODE_WRITE); - if (need_iput) - iput(inode); - } + put_rpccred(cred); return status; } @@ -1731,6 +1722,10 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, }; int status; + dprintk("%s: dentry = %s/%s, cookie = %Lu\n", __FUNCTION__, + dentry->d_parent->d_name.name, + dentry->d_name.name, + (unsigned long long)cookie); lock_kernel(); nfs4_setup_readdir(cookie, NFS_COOKIEVERF(dir), dentry, &args); res.pgbase = args.pgbase; @@ -1738,6 +1733,7 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, if (status == 0) memcpy(NFS_COOKIEVERF(dir), res.verifier.data, NFS4_VERIFIER_SIZE); unlock_kernel(); + dprintk("%s: returns %d\n", __FUNCTION__, status); return status; } @@ -2163,6 +2159,193 @@ nfs4_proc_file_release(struct inode *inode, struct file *filp) return 0; } +static inline int nfs4_server_supports_acls(struct nfs_server *server) +{ + return (server->caps & NFS_CAP_ACLS) + && (server->acl_bitmask & ACL4_SUPPORT_ALLOW_ACL) + && (server->acl_bitmask & ACL4_SUPPORT_DENY_ACL); +} + +/* Assuming that XATTR_SIZE_MAX is a multiple of PAGE_CACHE_SIZE, and that + * it's OK to put sizeof(void) * (XATTR_SIZE_MAX/PAGE_CACHE_SIZE) bytes on + * the stack. + */ +#define NFS4ACL_MAXPAGES (XATTR_SIZE_MAX >> PAGE_CACHE_SHIFT) + +static void buf_to_pages(const void *buf, size_t buflen, + struct page **pages, unsigned int *pgbase) +{ + const void *p = buf; + + *pgbase = offset_in_page(buf); + p -= *pgbase; + while (p < buf + buflen) { + *(pages++) = virt_to_page(p); + p += PAGE_CACHE_SIZE; + } +} + +struct nfs4_cached_acl { + int cached; + size_t len; + char data[0]; +}; + +static void nfs4_set_cached_acl(struct inode *inode, struct nfs4_cached_acl *acl) +{ + struct nfs_inode *nfsi = NFS_I(inode); + + spin_lock(&inode->i_lock); + kfree(nfsi->nfs4_acl); + nfsi->nfs4_acl = acl; + spin_unlock(&inode->i_lock); +} + +static void nfs4_zap_acl_attr(struct inode *inode) +{ + nfs4_set_cached_acl(inode, NULL); +} + +static inline ssize_t nfs4_read_cached_acl(struct inode *inode, char *buf, size_t buflen) +{ + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs4_cached_acl *acl; + int ret = -ENOENT; + + spin_lock(&inode->i_lock); + acl = nfsi->nfs4_acl; + if (acl == NULL) + goto out; + if (buf == NULL) /* user is just asking for length */ + goto out_len; + if (acl->cached == 0) + goto out; + ret = -ERANGE; /* see getxattr(2) man page */ + if (acl->len > buflen) + goto out; + memcpy(buf, acl->data, acl->len); +out_len: + ret = acl->len; +out: + spin_unlock(&inode->i_lock); + return ret; +} + +static void nfs4_write_cached_acl(struct inode *inode, const char *buf, size_t acl_len) +{ + struct nfs4_cached_acl *acl; + + if (buf && acl_len <= PAGE_SIZE) { + acl = kmalloc(sizeof(*acl) + acl_len, GFP_KERNEL); + if (acl == NULL) + goto out; + acl->cached = 1; + memcpy(acl->data, buf, acl_len); + } else { + acl = kmalloc(sizeof(*acl), GFP_KERNEL); + if (acl == NULL) + goto out; + acl->cached = 0; + } + acl->len = acl_len; +out: + nfs4_set_cached_acl(inode, acl); +} + +static inline ssize_t nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen) +{ + struct page *pages[NFS4ACL_MAXPAGES]; + struct nfs_getaclargs args = { + .fh = NFS_FH(inode), + .acl_pages = pages, + .acl_len = buflen, + }; + size_t resp_len = buflen; + void *resp_buf; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETACL], + .rpc_argp = &args, + .rpc_resp = &resp_len, + }; + struct page *localpage = NULL; + int ret; + + if (buflen < PAGE_SIZE) { + /* As long as we're doing a round trip to the server anyway, + * let's be prepared for a page of acl data. */ + localpage = alloc_page(GFP_KERNEL); + resp_buf = page_address(localpage); + if (localpage == NULL) + return -ENOMEM; + args.acl_pages[0] = localpage; + args.acl_pgbase = 0; + args.acl_len = PAGE_SIZE; + } else { + resp_buf = buf; + buf_to_pages(buf, buflen, args.acl_pages, &args.acl_pgbase); + } + ret = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); + if (ret) + goto out_free; + if (resp_len > args.acl_len) + nfs4_write_cached_acl(inode, NULL, resp_len); + else + nfs4_write_cached_acl(inode, resp_buf, resp_len); + if (buf) { + ret = -ERANGE; + if (resp_len > buflen) + goto out_free; + if (localpage) + memcpy(buf, resp_buf, resp_len); + } + ret = resp_len; +out_free: + if (localpage) + __free_page(localpage); + return ret; +} + +static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen) +{ + struct nfs_server *server = NFS_SERVER(inode); + int ret; + + if (!nfs4_server_supports_acls(server)) + return -EOPNOTSUPP; + ret = nfs_revalidate_inode(server, inode); + if (ret < 0) + return ret; + ret = nfs4_read_cached_acl(inode, buf, buflen); + if (ret != -ENOENT) + return ret; + return nfs4_get_acl_uncached(inode, buf, buflen); +} + +static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen) +{ + struct nfs_server *server = NFS_SERVER(inode); + struct page *pages[NFS4ACL_MAXPAGES]; + struct nfs_setaclargs arg = { + .fh = NFS_FH(inode), + .acl_pages = pages, + .acl_len = buflen, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETACL], + .rpc_argp = &arg, + .rpc_resp = NULL, + }; + int ret; + + if (!nfs4_server_supports_acls(server)) + return -EOPNOTSUPP; + buf_to_pages(buf, buflen, arg.acl_pages, &arg.acl_pgbase); + ret = rpc_call_sync(NFS_SERVER(inode)->client, &msg, 0); + if (ret == 0) + nfs4_write_cached_acl(inode, buf, buflen); + return ret; +} + static int nfs4_async_handle_error(struct rpc_task *task, struct nfs_server *server) { @@ -2448,14 +2631,11 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock down_read(&clp->cl_sem); nlo.clientid = clp->cl_clientid; down(&state->lock_sema); - lsp = nfs4_find_lock_state(state, request->fl_owner); - if (lsp) - nlo.id = lsp->ls_id; - else { - spin_lock(&clp->cl_lock); - nlo.id = nfs4_alloc_lockowner_id(clp); - spin_unlock(&clp->cl_lock); - } + status = nfs4_set_lock_state(state, request); + if (status != 0) + goto out; + lsp = request->fl_u.nfs4_fl.owner; + nlo.id = lsp->ls_id; arg.u.lockt = &nlo; status = rpc_call_sync(server->client, &msg, 0); if (!status) { @@ -2476,8 +2656,7 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock request->fl_pid = 0; status = 0; } - if (lsp) - nfs4_put_lock_state(lsp); +out: up(&state->lock_sema); up_read(&clp->cl_sem); return status; @@ -2537,28 +2716,26 @@ static int _nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock }; struct nfs4_lock_state *lsp; struct nfs_locku_opargs luargs; - int status = 0; + int status; down_read(&clp->cl_sem); down(&state->lock_sema); - lsp = nfs4_find_lock_state(state, request->fl_owner); - if (!lsp) + status = nfs4_set_lock_state(state, request); + if (status != 0) goto out; + lsp = request->fl_u.nfs4_fl.owner; /* We might have lost the locks! */ - if ((lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0) { - luargs.seqid = lsp->ls_seqid; - memcpy(&luargs.stateid, &lsp->ls_stateid, sizeof(luargs.stateid)); - arg.u.locku = &luargs; - status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); - nfs4_increment_lock_seqid(status, lsp); - } + if ((lsp->ls_flags & NFS_LOCK_INITIALIZED) == 0) + goto out; + luargs.seqid = lsp->ls_seqid; + memcpy(&luargs.stateid, &lsp->ls_stateid, sizeof(luargs.stateid)); + arg.u.locku = &luargs; + status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); + nfs4_increment_lock_seqid(status, lsp); - if (status == 0) { + if (status == 0) memcpy(&lsp->ls_stateid, &res.u.stateid, sizeof(lsp->ls_stateid)); - nfs4_notify_unlck(state, request, lsp); - } - nfs4_put_lock_state(lsp); out: up(&state->lock_sema); if (status == 0) @@ -2584,7 +2761,7 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *r { struct inode *inode = state->inode; struct nfs_server *server = NFS_SERVER(inode); - struct nfs4_lock_state *lsp; + struct nfs4_lock_state *lsp = request->fl_u.nfs4_fl.owner; struct nfs_lockargs arg = { .fh = NFS_FH(inode), .type = nfs4_lck_type(cmd, request), @@ -2606,9 +2783,6 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *r }; int status; - lsp = nfs4_get_lock_state(state, request->fl_owner); - if (lsp == NULL) - return -ENOMEM; if (!(lsp->ls_flags & NFS_LOCK_INITIALIZED)) { struct nfs4_state_owner *owner = state->owner; struct nfs_open_to_lock otl = { @@ -2630,38 +2804,57 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *r * seqid mutating errors */ nfs4_increment_seqid(status, owner); up(&owner->so_sema); + if (status == 0) { + lsp->ls_flags |= NFS_LOCK_INITIALIZED; + lsp->ls_seqid++; + } } else { struct nfs_exist_lock el = { .seqid = lsp->ls_seqid, }; memcpy(&el.stateid, &lsp->ls_stateid, sizeof(el.stateid)); largs.u.exist_lock = ⪙ - largs.new_lock_owner = 0; arg.u.lock = &largs; status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); + /* increment seqid on success, and * seqid mutating errors*/ + nfs4_increment_lock_seqid(status, lsp); } - /* increment seqid on success, and * seqid mutating errors*/ - nfs4_increment_lock_seqid(status, lsp); /* save the returned stateid. */ - if (status == 0) { + if (status == 0) memcpy(&lsp->ls_stateid, &res.u.stateid, sizeof(nfs4_stateid)); - lsp->ls_flags |= NFS_LOCK_INITIALIZED; - if (!reclaim) - nfs4_notify_setlk(state, request, lsp); - } else if (status == -NFS4ERR_DENIED) + else if (status == -NFS4ERR_DENIED) status = -EAGAIN; - nfs4_put_lock_state(lsp); return status; } static int nfs4_lock_reclaim(struct nfs4_state *state, struct file_lock *request) { - return _nfs4_do_setlk(state, F_SETLK, request, 1); + struct nfs_server *server = NFS_SERVER(state->inode); + struct nfs4_exception exception = { }; + int err; + + do { + err = _nfs4_do_setlk(state, F_SETLK, request, 1); + if (err != -NFS4ERR_DELAY) + break; + nfs4_handle_exception(server, err, &exception); + } while (exception.retry); + return err; } static int nfs4_lock_expired(struct nfs4_state *state, struct file_lock *request) { - return _nfs4_do_setlk(state, F_SETLK, request, 0); + struct nfs_server *server = NFS_SERVER(state->inode); + struct nfs4_exception exception = { }; + int err; + + do { + err = _nfs4_do_setlk(state, F_SETLK, request, 0); + if (err != -NFS4ERR_DELAY) + break; + nfs4_handle_exception(server, err, &exception); + } while (exception.retry); + return err; } static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) @@ -2671,7 +2864,9 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock down_read(&clp->cl_sem); down(&state->lock_sema); - status = _nfs4_do_setlk(state, cmd, request, 0); + status = nfs4_set_lock_state(state, request); + if (status == 0) + status = _nfs4_do_setlk(state, cmd, request, 0); up(&state->lock_sema); if (status == 0) { /* Note: we always want to sleep here! */ @@ -2729,10 +2924,53 @@ nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request) if (signalled()) break; } while(status < 0); - return status; } + +#define XATTR_NAME_NFSV4_ACL "system.nfs4_acl" + +int nfs4_setxattr(struct dentry *dentry, const char *key, const void *buf, + size_t buflen, int flags) +{ + struct inode *inode = dentry->d_inode; + + if (strcmp(key, XATTR_NAME_NFSV4_ACL) != 0) + return -EOPNOTSUPP; + + if (!S_ISREG(inode->i_mode) && + (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX)) + return -EPERM; + + return nfs4_proc_set_acl(inode, buf, buflen); +} + +/* The getxattr man page suggests returning -ENODATA for unknown attributes, + * and that's what we'll do for e.g. user attributes that haven't been set. + * But we'll follow ext2/ext3's lead by returning -EOPNOTSUPP for unsupported + * attributes in kernel-managed attribute namespaces. */ +ssize_t nfs4_getxattr(struct dentry *dentry, const char *key, void *buf, + size_t buflen) +{ + struct inode *inode = dentry->d_inode; + + if (strcmp(key, XATTR_NAME_NFSV4_ACL) != 0) + return -EOPNOTSUPP; + + return nfs4_proc_get_acl(inode, buf, buflen); +} + +ssize_t nfs4_listxattr(struct dentry *dentry, char *buf, size_t buflen) +{ + size_t len = strlen(XATTR_NAME_NFSV4_ACL) + 1; + + if (buf && buflen < len) + return -ERANGE; + if (buf) + memcpy(buf, XATTR_NAME_NFSV4_ACL, len); + return len; +} + struct nfs4_state_recovery_ops nfs4_reboot_recovery_ops = { .recover_open = nfs4_open_reclaim, .recover_lock = nfs4_lock_reclaim, @@ -2743,10 +2981,20 @@ struct nfs4_state_recovery_ops nfs4_network_partition_recovery_ops = { .recover_lock = nfs4_lock_expired, }; +static struct inode_operations nfs4_file_inode_operations = { + .permission = nfs_permission, + .getattr = nfs_getattr, + .setattr = nfs_setattr, + .getxattr = nfs4_getxattr, + .setxattr = nfs4_setxattr, + .listxattr = nfs4_listxattr, +}; + struct nfs_rpc_ops nfs_v4_clientops = { .version = 4, /* protocol version */ .dentry_ops = &nfs4_dentry_operations, .dir_inode_ops = &nfs4_dir_inode_operations, + .file_inode_ops = &nfs4_file_inode_operations, .getroot = nfs4_proc_get_root, .getattr = nfs4_proc_getattr, .setattr = nfs4_proc_setattr, @@ -2777,6 +3025,7 @@ struct nfs_rpc_ops nfs_v4_clientops = { .file_open = nfs4_proc_file_open, .file_release = nfs4_proc_file_release, .lock = nfs4_proc_lock, + .clear_acl_cache = nfs4_zap_acl_attr, }; /* diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c index 667e06f..a300162 100644 --- a/fs/nfs/nfs4renewd.c +++ b/fs/nfs/nfs4renewd.c @@ -53,6 +53,7 @@ #include <linux/nfs.h> #include <linux/nfs4.h> #include <linux/nfs_fs.h> +#include "nfs4_fs.h" #define NFSDBG_FACILITY NFSDBG_PROC diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 231cebc..afe587d 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -46,24 +46,18 @@ #include <linux/workqueue.h> #include <linux/bitops.h> +#include "nfs4_fs.h" #include "callback.h" #include "delegation.h" #define OPENOWNER_POOL_SIZE 8 -static DEFINE_SPINLOCK(state_spinlock); - -nfs4_stateid zero_stateid; - -#if 0 -nfs4_stateid one_stateid = - { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; -#endif +const nfs4_stateid zero_stateid; +static DEFINE_SPINLOCK(state_spinlock); static LIST_HEAD(nfs4_clientid_list); static void nfs4_recover_state(void *); -extern void nfs4_renew_state(void *); void init_nfsv4_state(struct nfs_server *server) @@ -116,6 +110,7 @@ nfs4_alloc_client(struct in_addr *addr) INIT_LIST_HEAD(&clp->cl_superblocks); init_waitqueue_head(&clp->cl_waitq); rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS4 client"); + clp->cl_rpcclient = ERR_PTR(-EINVAL); clp->cl_boot_time = CURRENT_TIME; clp->cl_state = 1 << NFS4CLNT_OK; return clp; @@ -137,7 +132,7 @@ nfs4_free_client(struct nfs4_client *clp) if (clp->cl_cred) put_rpccred(clp->cl_cred); nfs_idmap_delete(clp); - if (clp->cl_rpcclient) + if (!IS_ERR(clp->cl_rpcclient)) rpc_shutdown_client(clp->cl_rpcclient); kfree(clp); nfs_callback_down(); @@ -365,7 +360,7 @@ nfs4_alloc_open_state(void) atomic_set(&state->count, 1); INIT_LIST_HEAD(&state->lock_states); init_MUTEX(&state->lock_sema); - rwlock_init(&state->state_lock); + spin_lock_init(&state->state_lock); return state; } @@ -547,16 +542,6 @@ __nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner) return NULL; } -struct nfs4_lock_state * -nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner) -{ - struct nfs4_lock_state *lsp; - read_lock(&state->state_lock); - lsp = __nfs4_find_lock_state(state, fl_owner); - read_unlock(&state->state_lock); - return lsp; -} - /* * Return a compatible lock_state. If no initialized lock_state structure * exists, return an uninitialized one. @@ -573,14 +558,13 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f return NULL; lsp->ls_flags = 0; lsp->ls_seqid = 0; /* arbitrary */ - lsp->ls_id = -1; memset(lsp->ls_stateid.data, 0, sizeof(lsp->ls_stateid.data)); atomic_set(&lsp->ls_count, 1); lsp->ls_owner = fl_owner; - INIT_LIST_HEAD(&lsp->ls_locks); spin_lock(&clp->cl_lock); lsp->ls_id = nfs4_alloc_lockowner_id(clp); spin_unlock(&clp->cl_lock); + INIT_LIST_HEAD(&lsp->ls_locks); return lsp; } @@ -590,121 +574,112 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f * * The caller must be holding state->lock_sema and clp->cl_sem */ -struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t owner) +static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t owner) { - struct nfs4_lock_state * lsp; + struct nfs4_lock_state *lsp, *new = NULL; - lsp = nfs4_find_lock_state(state, owner); - if (lsp == NULL) - lsp = nfs4_alloc_lock_state(state, owner); + for(;;) { + spin_lock(&state->state_lock); + lsp = __nfs4_find_lock_state(state, owner); + if (lsp != NULL) + break; + if (new != NULL) { + new->ls_state = state; + list_add(&new->ls_locks, &state->lock_states); + set_bit(LK_STATE_IN_USE, &state->flags); + lsp = new; + new = NULL; + break; + } + spin_unlock(&state->state_lock); + new = nfs4_alloc_lock_state(state, owner); + if (new == NULL) + return NULL; + } + spin_unlock(&state->state_lock); + kfree(new); return lsp; } /* - * Byte-range lock aware utility to initialize the stateid of read/write - * requests. + * Release reference to lock_state, and free it if we see that + * it is no longer in use */ -void -nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t fl_owner) +static void nfs4_put_lock_state(struct nfs4_lock_state *lsp) { - if (test_bit(LK_STATE_IN_USE, &state->flags)) { - struct nfs4_lock_state *lsp; + struct nfs4_state *state; - lsp = nfs4_find_lock_state(state, fl_owner); - if (lsp) { - memcpy(dst, &lsp->ls_stateid, sizeof(*dst)); - nfs4_put_lock_state(lsp); - return; - } - } - memcpy(dst, &state->stateid, sizeof(*dst)); + if (lsp == NULL) + return; + state = lsp->ls_state; + if (!atomic_dec_and_lock(&lsp->ls_count, &state->state_lock)) + return; + list_del(&lsp->ls_locks); + if (list_empty(&state->lock_states)) + clear_bit(LK_STATE_IN_USE, &state->flags); + spin_unlock(&state->state_lock); + kfree(lsp); } -/* -* Called with state->lock_sema and clp->cl_sem held. -*/ -void nfs4_increment_lock_seqid(int status, struct nfs4_lock_state *lsp) +static void nfs4_fl_copy_lock(struct file_lock *dst, struct file_lock *src) { - if (status == NFS_OK || seqid_mutating_err(-status)) - lsp->ls_seqid++; -} + struct nfs4_lock_state *lsp = src->fl_u.nfs4_fl.owner; -/* -* Check to see if the request lock (type FL_UNLK) effects the fl lock. -* -* fl and request must have the same posix owner -* -* return: -* 0 -> fl not effected by request -* 1 -> fl consumed by request -*/ + dst->fl_u.nfs4_fl.owner = lsp; + atomic_inc(&lsp->ls_count); +} -static int -nfs4_check_unlock(struct file_lock *fl, struct file_lock *request) +static void nfs4_fl_release_lock(struct file_lock *fl) { - if (fl->fl_start >= request->fl_start && fl->fl_end <= request->fl_end) - return 1; - return 0; + nfs4_put_lock_state(fl->fl_u.nfs4_fl.owner); } -/* - * Post an initialized lock_state on the state->lock_states list. - */ -void nfs4_notify_setlk(struct nfs4_state *state, struct file_lock *request, struct nfs4_lock_state *lsp) +static struct file_lock_operations nfs4_fl_lock_ops = { + .fl_copy_lock = nfs4_fl_copy_lock, + .fl_release_private = nfs4_fl_release_lock, +}; + +int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl) { - if (!list_empty(&lsp->ls_locks)) - return; - atomic_inc(&lsp->ls_count); - write_lock(&state->state_lock); - list_add(&lsp->ls_locks, &state->lock_states); - set_bit(LK_STATE_IN_USE, &state->flags); - write_unlock(&state->state_lock); + struct nfs4_lock_state *lsp; + + if (fl->fl_ops != NULL) + return 0; + lsp = nfs4_get_lock_state(state, fl->fl_owner); + if (lsp == NULL) + return -ENOMEM; + fl->fl_u.nfs4_fl.owner = lsp; + fl->fl_ops = &nfs4_fl_lock_ops; + return 0; } -/* - * to decide to 'reap' lock state: - * 1) search i_flock for file_locks with fl.lock_state = to ls. - * 2) determine if unlock will consume found lock. - * if so, reap - * - * else, don't reap. - * +/* + * Byte-range lock aware utility to initialize the stateid of read/write + * requests. */ -void -nfs4_notify_unlck(struct nfs4_state *state, struct file_lock *request, struct nfs4_lock_state *lsp) +void nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t fl_owner) { - struct inode *inode = state->inode; - struct file_lock *fl; + struct nfs4_lock_state *lsp; - for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { - if (!(fl->fl_flags & FL_POSIX)) - continue; - if (fl->fl_owner != lsp->ls_owner) - continue; - /* Exit if we find at least one lock which is not consumed */ - if (nfs4_check_unlock(fl,request) == 0) - return; - } + memcpy(dst, &state->stateid, sizeof(*dst)); + if (test_bit(LK_STATE_IN_USE, &state->flags) == 0) + return; - write_lock(&state->state_lock); - list_del_init(&lsp->ls_locks); - if (list_empty(&state->lock_states)) - clear_bit(LK_STATE_IN_USE, &state->flags); - write_unlock(&state->state_lock); + spin_lock(&state->state_lock); + lsp = __nfs4_find_lock_state(state, fl_owner); + if (lsp != NULL && (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0) + memcpy(dst, &lsp->ls_stateid, sizeof(*dst)); + spin_unlock(&state->state_lock); nfs4_put_lock_state(lsp); } /* - * Release reference to lock_state, and free it if we see that - * it is no longer in use - */ -void -nfs4_put_lock_state(struct nfs4_lock_state *lsp) +* Called with state->lock_sema and clp->cl_sem held. +*/ +void nfs4_increment_lock_seqid(int status, struct nfs4_lock_state *lsp) { - if (!atomic_dec_and_test(&lsp->ls_count)) - return; - BUG_ON (!list_empty(&lsp->ls_locks)); - kfree(lsp); + if (status == NFS_OK || seqid_mutating_err(-status)) + lsp->ls_seqid++; } /* diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 5f4de05..6c564ef 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -51,6 +51,7 @@ #include <linux/nfs4.h> #include <linux/nfs_fs.h> #include <linux/nfs_idmap.h> +#include "nfs4_fs.h" #define NFSDBG_FACILITY NFSDBG_XDR @@ -82,12 +83,16 @@ static int nfs_stat_to_errno(int); #define encode_getfh_maxsz (op_encode_hdr_maxsz) #define decode_getfh_maxsz (op_decode_hdr_maxsz + 1 + \ ((3+NFS4_FHSIZE) >> 2)) -#define encode_getattr_maxsz (op_encode_hdr_maxsz + 3) +#define nfs4_fattr_bitmap_maxsz 3 +#define encode_getattr_maxsz (op_encode_hdr_maxsz + nfs4_fattr_bitmap_maxsz) #define nfs4_name_maxsz (1 + ((3 + NFS4_MAXNAMLEN) >> 2)) #define nfs4_path_maxsz (1 + ((3 + NFS4_MAXPATHLEN) >> 2)) -#define nfs4_fattr_bitmap_maxsz (36 + 2 * nfs4_name_maxsz) -#define decode_getattr_maxsz (op_decode_hdr_maxsz + 3 + \ - nfs4_fattr_bitmap_maxsz) +/* This is based on getfattr, which uses the most attributes: */ +#define nfs4_fattr_value_maxsz (1 + (1 + 2 + 2 + 4 + 2 + 1 + 1 + 2 + 2 + \ + 3 + 3 + 3 + 2 * nfs4_name_maxsz)) +#define nfs4_fattr_maxsz (nfs4_fattr_bitmap_maxsz + \ + nfs4_fattr_value_maxsz) +#define decode_getattr_maxsz (op_decode_hdr_maxsz + nfs4_fattr_maxsz) #define encode_savefh_maxsz (op_encode_hdr_maxsz) #define decode_savefh_maxsz (op_decode_hdr_maxsz) #define encode_fsinfo_maxsz (op_encode_hdr_maxsz + 2) @@ -122,11 +127,11 @@ static int nfs_stat_to_errno(int); #define encode_symlink_maxsz (op_encode_hdr_maxsz + \ 1 + nfs4_name_maxsz + \ nfs4_path_maxsz + \ - nfs4_fattr_bitmap_maxsz) + nfs4_fattr_maxsz) #define decode_symlink_maxsz (op_decode_hdr_maxsz + 8) #define encode_create_maxsz (op_encode_hdr_maxsz + \ 2 + nfs4_name_maxsz + \ - nfs4_fattr_bitmap_maxsz) + nfs4_fattr_maxsz) #define decode_create_maxsz (op_decode_hdr_maxsz + 8) #define encode_delegreturn_maxsz (op_encode_hdr_maxsz + 4) #define decode_delegreturn_maxsz (op_decode_hdr_maxsz) @@ -205,7 +210,7 @@ static int nfs_stat_to_errno(int); #define NFS4_enc_setattr_sz (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ op_encode_hdr_maxsz + 4 + \ - nfs4_fattr_bitmap_maxsz + \ + nfs4_fattr_maxsz + \ encode_getattr_maxsz) #define NFS4_dec_setattr_sz (compound_decode_hdr_maxsz + \ decode_putfh_maxsz + \ @@ -360,6 +365,20 @@ static int nfs_stat_to_errno(int); encode_delegreturn_maxsz) #define NFS4_dec_delegreturn_sz (compound_decode_hdr_maxsz + \ decode_delegreturn_maxsz) +#define NFS4_enc_getacl_sz (compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + encode_getattr_maxsz) +#define NFS4_dec_getacl_sz (compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + op_decode_hdr_maxsz + \ + nfs4_fattr_bitmap_maxsz + 1) +#define NFS4_enc_setacl_sz (compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + op_encode_hdr_maxsz + 4 + \ + nfs4_fattr_bitmap_maxsz + 1) +#define NFS4_dec_setacl_sz (compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + op_decode_hdr_maxsz + nfs4_fattr_bitmap_maxsz) static struct { unsigned int mode; @@ -459,7 +478,7 @@ static int encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const s * In the worst-case, this would be * 12(bitmap) + 4(attrlen) + 8(size) + 4(mode) + 4(atime) + 4(mtime) * = 36 bytes, plus any contribution from variable-length fields - * such as owner/group/acl's. + * such as owner/group. */ len = 16; @@ -660,8 +679,6 @@ static int encode_getattr_two(struct xdr_stream *xdr, uint32_t bm0, uint32_t bm1 static int encode_getfattr(struct xdr_stream *xdr, const u32* bitmask) { - extern u32 nfs4_fattr_bitmap[]; - return encode_getattr_two(xdr, bitmask[0] & nfs4_fattr_bitmap[0], bitmask[1] & nfs4_fattr_bitmap[1]); @@ -669,8 +686,6 @@ static int encode_getfattr(struct xdr_stream *xdr, const u32* bitmask) static int encode_fsinfo(struct xdr_stream *xdr, const u32* bitmask) { - extern u32 nfs4_fsinfo_bitmap[]; - return encode_getattr_two(xdr, bitmask[0] & nfs4_fsinfo_bitmap[0], bitmask[1] & nfs4_fsinfo_bitmap[1]); } @@ -969,7 +984,6 @@ static int encode_putrootfh(struct xdr_stream *xdr) static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context *ctx) { - extern nfs4_stateid zero_stateid; nfs4_stateid stateid; uint32_t *p; @@ -1000,6 +1014,10 @@ static int encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args) static int encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req) { struct rpc_auth *auth = req->rq_task->tk_auth; + uint32_t attrs[2] = { + FATTR4_WORD0_RDATTR_ERROR|FATTR4_WORD0_FILEID, + FATTR4_WORD1_MOUNTED_ON_FILEID, + }; int replen; uint32_t *p; @@ -1010,13 +1028,20 @@ static int encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg WRITE32(readdir->count >> 1); /* We're not doing readdirplus */ WRITE32(readdir->count); WRITE32(2); - if (readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID) { - WRITE32(0); - WRITE32(FATTR4_WORD1_MOUNTED_ON_FILEID); - } else { - WRITE32(FATTR4_WORD0_FILEID); - WRITE32(0); - } + /* Switch to mounted_on_fileid if the server supports it */ + if (readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID) + attrs[0] &= ~FATTR4_WORD0_FILEID; + else + attrs[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID; + WRITE32(attrs[0] & readdir->bitmask[0]); + WRITE32(attrs[1] & readdir->bitmask[1]); + dprintk("%s: cookie = %Lu, verifier = 0x%x%x, bitmap = 0x%x%x\n", + __FUNCTION__, + (unsigned long long)readdir->cookie, + ((u32 *)readdir->verifier.data)[0], + ((u32 *)readdir->verifier.data)[1], + attrs[0] & readdir->bitmask[0], + attrs[1] & readdir->bitmask[1]); /* set up reply kvec * toplevel_status + taglen + rescount + OP_PUTFH + status @@ -1025,6 +1050,9 @@ static int encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg replen = (RPC_REPHDRSIZE + auth->au_rslack + 9) << 2; xdr_inline_pages(&req->rq_rcv_buf, replen, readdir->pages, readdir->pgbase, readdir->count); + dprintk("%s: inlined page args = (%u, %p, %u, %u)\n", + __FUNCTION__, replen, readdir->pages, + readdir->pgbase, readdir->count); return 0; } @@ -1089,6 +1117,25 @@ static int encode_renew(struct xdr_stream *xdr, const struct nfs4_client *client } static int +encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg) +{ + uint32_t *p; + + RESERVE_SPACE(4+sizeof(zero_stateid.data)); + WRITE32(OP_SETATTR); + WRITEMEM(zero_stateid.data, sizeof(zero_stateid.data)); + RESERVE_SPACE(2*4); + WRITE32(1); + WRITE32(FATTR4_WORD0_ACL); + if (arg->acl_len % 4) + return -EINVAL; + RESERVE_SPACE(4); + WRITE32(arg->acl_len); + xdr_write_pages(xdr, arg->acl_pages, arg->acl_pgbase, arg->acl_len); + return 0; +} + +static int encode_savefh(struct xdr_stream *xdr) { uint32_t *p; @@ -1632,6 +1679,34 @@ out: } /* + * Encode a GETACL request + */ +static int +nfs4_xdr_enc_getacl(struct rpc_rqst *req, uint32_t *p, + struct nfs_getaclargs *args) +{ + struct xdr_stream xdr; + struct rpc_auth *auth = req->rq_task->tk_auth; + struct compound_hdr hdr = { + .nops = 2, + }; + int replen, status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + status = encode_putfh(&xdr, args->fh); + if (status) + goto out; + status = encode_getattr_two(&xdr, FATTR4_WORD0_ACL, 0); + /* set up reply buffer: */ + replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS4_dec_getacl_sz) << 2; + xdr_inline_pages(&req->rq_rcv_buf, replen, + args->acl_pages, args->acl_pgbase, args->acl_len); +out: + return status; +} + +/* * Encode a WRITE request */ static int nfs4_xdr_enc_write(struct rpc_rqst *req, uint32_t *p, struct nfs_writeargs *args) @@ -1697,7 +1772,6 @@ static int nfs4_xdr_enc_fsinfo(struct rpc_rqst *req, uint32_t *p, struct nfs4_fs */ static int nfs4_xdr_enc_pathconf(struct rpc_rqst *req, uint32_t *p, const struct nfs4_pathconf_arg *args) { - extern u32 nfs4_pathconf_bitmap[2]; struct xdr_stream xdr; struct compound_hdr hdr = { .nops = 2, @@ -1718,7 +1792,6 @@ static int nfs4_xdr_enc_pathconf(struct rpc_rqst *req, uint32_t *p, const struct */ static int nfs4_xdr_enc_statfs(struct rpc_rqst *req, uint32_t *p, const struct nfs4_statfs_arg *args) { - extern u32 nfs4_statfs_bitmap[]; struct xdr_stream xdr; struct compound_hdr hdr = { .nops = 2, @@ -3003,6 +3076,11 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n return status; READ_BUF(8); COPYMEM(readdir->verifier.data, 8); + dprintk("%s: verifier = 0x%x%x\n", + __FUNCTION__, + ((u32 *)readdir->verifier.data)[0], + ((u32 *)readdir->verifier.data)[1]); + hdrlen = (char *) p - (char *) iov->iov_base; recvd = rcvbuf->len - hdrlen; @@ -3017,12 +3095,14 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n for (nr = 0; *p++; nr++) { if (p + 3 > end) goto short_pkt; + dprintk("cookie = %Lu, ", *((unsigned long long *)p)); p += 2; /* cookie */ len = ntohl(*p++); /* filename length */ if (len > NFS4_MAXNAMLEN) { printk(KERN_WARNING "NFS: giant filename in readdir (len 0x%x)\n", len); goto err_unmap; } + dprintk("filename = %*s\n", len, (char *)p); p += XDR_QUADLEN(len); if (p + 1 > end) goto short_pkt; @@ -3042,6 +3122,7 @@ out: kunmap_atomic(kaddr, KM_USER0); return 0; short_pkt: + dprintk("%s: short packet at entry %d\n", __FUNCTION__, nr); entry[0] = entry[1] = 0; /* truncate listing ? */ if (!nr) { @@ -3127,6 +3208,47 @@ static int decode_renew(struct xdr_stream *xdr) return decode_op_hdr(xdr, OP_RENEW); } +static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req, + size_t *acl_len) +{ + uint32_t *savep; + uint32_t attrlen, + bitmap[2] = {0}; + struct kvec *iov = req->rq_rcv_buf.head; + int status; + + *acl_len = 0; + if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0) + goto out; + if ((status = decode_attr_bitmap(xdr, bitmap)) != 0) + goto out; + if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0) + goto out; + + if (unlikely(bitmap[0] & (FATTR4_WORD0_ACL - 1U))) + return -EIO; + if (likely(bitmap[0] & FATTR4_WORD0_ACL)) { + int hdrlen, recvd; + + /* We ignore &savep and don't do consistency checks on + * the attr length. Let userspace figure it out.... */ + hdrlen = (u8 *)xdr->p - (u8 *)iov->iov_base; + recvd = req->rq_rcv_buf.len - hdrlen; + if (attrlen > recvd) { + printk(KERN_WARNING "NFS: server cheating in getattr" + " acl reply: attrlen %u > recvd %u\n", + attrlen, recvd); + return -EINVAL; + } + if (attrlen <= *acl_len) + xdr_read_pages(xdr, attrlen); + *acl_len = attrlen; + } + +out: + return status; +} + static int decode_savefh(struct xdr_stream *xdr) { @@ -3418,6 +3540,71 @@ out: } +/* + * Encode an SETACL request + */ +static int +nfs4_xdr_enc_setacl(struct rpc_rqst *req, uint32_t *p, struct nfs_setaclargs *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 2, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + status = encode_putfh(&xdr, args->fh); + if (status) + goto out; + status = encode_setacl(&xdr, args); +out: + return status; +} +/* + * Decode SETACL response + */ +static int +nfs4_xdr_dec_setacl(struct rpc_rqst *rqstp, uint32_t *p, void *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_setattr(&xdr, res); +out: + return status; +} + +/* + * Decode GETACL response + */ +static int +nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, uint32_t *p, size_t *acl_len) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_getacl(&xdr, rqstp, acl_len); + +out: + return status; +} /* * Decode CLOSE response @@ -3895,6 +4082,12 @@ uint32_t *nfs4_decode_dirent(uint32_t *p, struct nfs_entry *entry, int plus) } len = XDR_QUADLEN(ntohl(*p++)); /* attribute buffer length */ if (len > 0) { + if (bitmap[0] & FATTR4_WORD0_RDATTR_ERROR) { + bitmap[0] &= ~FATTR4_WORD0_RDATTR_ERROR; + /* Ignore the return value of rdattr_error for now */ + p++; + len--; + } if (bitmap[0] == 0 && bitmap[1] == FATTR4_WORD1_MOUNTED_ON_FILEID) xdr_decode_hyper(p, &entry->ino); else if (bitmap[0] == FATTR4_WORD0_FILEID) @@ -3934,6 +4127,8 @@ static struct { { NFS4ERR_DQUOT, EDQUOT }, { NFS4ERR_STALE, ESTALE }, { NFS4ERR_BADHANDLE, EBADHANDLE }, + { NFS4ERR_BADOWNER, EINVAL }, + { NFS4ERR_BADNAME, EINVAL }, { NFS4ERR_BAD_COOKIE, EBADCOOKIE }, { NFS4ERR_NOTSUPP, ENOTSUPP }, { NFS4ERR_TOOSMALL, ETOOSMALL }, @@ -4019,6 +4214,8 @@ struct rpc_procinfo nfs4_procedures[] = { PROC(READDIR, enc_readdir, dec_readdir), PROC(SERVER_CAPS, enc_server_caps, dec_server_caps), PROC(DELEGRETURN, enc_delegreturn, dec_delegreturn), + PROC(GETACL, enc_getacl, dec_getacl), + PROC(SETACL, enc_setacl, dec_setacl), }; struct rpc_version nfs_version4 = { diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c index fd5bc59..1b272a1 100644 --- a/fs/nfs/nfsroot.c +++ b/fs/nfs/nfsroot.c @@ -124,6 +124,7 @@ enum { Opt_soft, Opt_hard, Opt_intr, Opt_nointr, Opt_posix, Opt_noposix, Opt_cto, Opt_nocto, Opt_ac, Opt_noac, Opt_lock, Opt_nolock, Opt_v2, Opt_v3, Opt_udp, Opt_tcp, + Opt_acl, Opt_noacl, /* Error token */ Opt_err }; @@ -158,6 +159,8 @@ static match_table_t __initdata tokens = { {Opt_udp, "udp"}, {Opt_tcp, "proto=tcp"}, {Opt_tcp, "tcp"}, + {Opt_acl, "acl"}, + {Opt_noacl, "noacl"}, {Opt_err, NULL} }; @@ -266,6 +269,12 @@ static int __init root_nfs_parse(char *name, char *buf) case Opt_tcp: nfs_data.flags |= NFS_MOUNT_TCP; break; + case Opt_acl: + nfs_data.flags &= ~NFS_MOUNT_NOACL; + break; + case Opt_noacl: + nfs_data.flags |= NFS_MOUNT_NOACL; + break; default : return 0; } diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 4f1ba72..d53857b 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -107,11 +107,38 @@ void nfs_unlock_request(struct nfs_page *req) smp_mb__before_clear_bit(); clear_bit(PG_BUSY, &req->wb_flags); smp_mb__after_clear_bit(); - wake_up_all(&req->wb_context->waitq); + wake_up_bit(&req->wb_flags, PG_BUSY); nfs_release_request(req); } /** + * nfs_set_page_writeback_locked - Lock a request for writeback + * @req: + */ +int nfs_set_page_writeback_locked(struct nfs_page *req) +{ + struct nfs_inode *nfsi = NFS_I(req->wb_context->dentry->d_inode); + + if (!nfs_lock_request(req)) + return 0; + radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_WRITEBACK); + return 1; +} + +/** + * nfs_clear_page_writeback - Unlock request and wake up sleepers + */ +void nfs_clear_page_writeback(struct nfs_page *req) +{ + struct nfs_inode *nfsi = NFS_I(req->wb_context->dentry->d_inode); + + spin_lock(&nfsi->req_lock); + radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_WRITEBACK); + spin_unlock(&nfsi->req_lock); + nfs_unlock_request(req); +} + +/** * nfs_clear_request - Free up all resources allocated to the request * @req: * @@ -150,34 +177,15 @@ nfs_release_request(struct nfs_page *req) nfs_page_free(req); } -/** - * nfs_list_add_request - Insert a request into a sorted list - * @req: request - * @head: head of list into which to insert the request. - * - * Note that the wb_list is sorted by page index in order to facilitate - * coalescing of requests. - * We use an insertion sort that is optimized for the case of appended - * writes. - */ -void -nfs_list_add_request(struct nfs_page *req, struct list_head *head) +static int nfs_wait_bit_interruptible(void *word) { - struct list_head *pos; + int ret = 0; -#ifdef NFS_PARANOIA - if (!list_empty(&req->wb_list)) { - printk(KERN_ERR "NFS: Add to list failed!\n"); - BUG(); - } -#endif - list_for_each_prev(pos, head) { - struct nfs_page *p = nfs_list_entry(pos); - if (p->wb_index < req->wb_index) - break; - } - list_add(&req->wb_list, pos); - req->wb_list_head = head; + if (signal_pending(current)) + ret = -ERESTARTSYS; + else + schedule(); + return ret; } /** @@ -190,12 +198,22 @@ nfs_list_add_request(struct nfs_page *req, struct list_head *head) int nfs_wait_on_request(struct nfs_page *req) { - struct inode *inode = req->wb_context->dentry->d_inode; - struct rpc_clnt *clnt = NFS_CLIENT(inode); - - if (!NFS_WBACK_BUSY(req)) - return 0; - return nfs_wait_event(clnt, req->wb_context->waitq, !NFS_WBACK_BUSY(req)); + struct rpc_clnt *clnt = NFS_CLIENT(req->wb_context->dentry->d_inode); + sigset_t oldmask; + int ret = 0; + + if (!test_bit(PG_BUSY, &req->wb_flags)) + goto out; + /* + * Note: the call to rpc_clnt_sigmask() suffices to ensure that we + * are not interrupted if intr flag is not set + */ + rpc_clnt_sigmask(clnt, &oldmask); + ret = out_of_line_wait_on_bit(&req->wb_flags, PG_BUSY, + nfs_wait_bit_interruptible, TASK_INTERRUPTIBLE); + rpc_clnt_sigunmask(clnt, &oldmask); +out: + return ret; } /** @@ -243,6 +261,62 @@ nfs_coalesce_requests(struct list_head *head, struct list_head *dst, return npages; } +#define NFS_SCAN_MAXENTRIES 16 +/** + * nfs_scan_lock_dirty - Scan the radix tree for dirty requests + * @nfsi: NFS inode + * @dst: Destination list + * @idx_start: lower bound of page->index to scan + * @npages: idx_start + npages sets the upper bound to scan. + * + * Moves elements from one of the inode request lists. + * If the number of requests is set to 0, the entire address_space + * starting at index idx_start, is scanned. + * The requests are *not* checked to ensure that they form a contiguous set. + * You must be holding the inode's req_lock when calling this function + */ +int +nfs_scan_lock_dirty(struct nfs_inode *nfsi, struct list_head *dst, + unsigned long idx_start, unsigned int npages) +{ + struct nfs_page *pgvec[NFS_SCAN_MAXENTRIES]; + struct nfs_page *req; + unsigned long idx_end; + int found, i; + int res; + + res = 0; + if (npages == 0) + idx_end = ~0; + else + idx_end = idx_start + npages - 1; + + for (;;) { + found = radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, + (void **)&pgvec[0], idx_start, NFS_SCAN_MAXENTRIES, + NFS_PAGE_TAG_DIRTY); + if (found <= 0) + break; + for (i = 0; i < found; i++) { + req = pgvec[i]; + if (req->wb_index > idx_end) + goto out; + + idx_start = req->wb_index + 1; + + if (nfs_set_page_writeback_locked(req)) { + radix_tree_tag_clear(&nfsi->nfs_page_tree, + req->wb_index, NFS_PAGE_TAG_DIRTY); + nfs_list_remove_request(req); + nfs_list_add_request(req, dst); + res++; + } + } + } +out: + return res; +} + /** * nfs_scan_list - Scan a list for matching requests * @head: One of the NFS inode request lists @@ -280,7 +354,7 @@ nfs_scan_list(struct list_head *head, struct list_head *dst, if (req->wb_index > idx_end) break; - if (!nfs_lock_request(req)) + if (!nfs_set_page_writeback_locked(req)) continue; nfs_list_remove_request(req); nfs_list_add_request(req, dst); diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index d31b4d6..cedf636 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -622,6 +622,7 @@ struct nfs_rpc_ops nfs_v2_clientops = { .version = 2, /* protocol version */ .dentry_ops = &nfs_dentry_operations, .dir_inode_ops = &nfs_dir_inode_operations, + .file_inode_ops = &nfs_file_inode_operations, .getroot = nfs_proc_get_root, .getattr = nfs_proc_getattr, .setattr = nfs_proc_setattr, diff --git a/fs/nfs/read.c b/fs/nfs/read.c index a0042fb..6f866b8aa 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -173,7 +173,6 @@ static int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, if (len < PAGE_CACHE_SIZE) memclear_highpage_flush(page, len, PAGE_CACHE_SIZE - len); - nfs_lock_request(new); nfs_list_add_request(new, &one_request); nfs_pagein_one(&one_request, inode); return 0; @@ -185,7 +184,6 @@ static void nfs_readpage_release(struct nfs_page *req) nfs_clear_request(req); nfs_release_request(req); - nfs_unlock_request(req); dprintk("NFS: read done (%s/%Ld %d@%Ld)\n", req->wb_context->dentry->d_inode->i_sb->s_id, @@ -553,7 +551,6 @@ readpage_async_filler(void *data, struct page *page) } if (len < PAGE_CACHE_SIZE) memclear_highpage_flush(page, len, PAGE_CACHE_SIZE - len); - nfs_lock_request(new); nfs_list_add_request(new, desc->head); return 0; } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 6f7a4af..5130eda 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -220,7 +220,7 @@ static int nfs_writepage_sync(struct nfs_open_context *ctx, struct inode *inode, ClearPageError(page); io_error: - nfs_end_data_update_defer(inode); + nfs_end_data_update(inode); nfs_writedata_free(wdata); return written ? written : result; } @@ -352,7 +352,7 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) if (err < 0) goto out; } - err = nfs_commit_inode(inode, 0, 0, wb_priority(wbc)); + err = nfs_commit_inode(inode, wb_priority(wbc)); if (err > 0) { wbc->nr_to_write -= err; err = 0; @@ -401,7 +401,7 @@ static void nfs_inode_remove_request(struct nfs_page *req) nfsi->npages--; if (!nfsi->npages) { spin_unlock(&nfsi->req_lock); - nfs_end_data_update_defer(inode); + nfs_end_data_update(inode); iput(inode); } else spin_unlock(&nfsi->req_lock); @@ -446,6 +446,8 @@ nfs_mark_request_dirty(struct nfs_page *req) struct nfs_inode *nfsi = NFS_I(inode); spin_lock(&nfsi->req_lock); + radix_tree_tag_set(&nfsi->nfs_page_tree, + req->wb_index, NFS_PAGE_TAG_DIRTY); nfs_list_add_request(req, &nfsi->dirty); nfsi->ndirty++; spin_unlock(&nfsi->req_lock); @@ -503,13 +505,12 @@ nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, unsigned int spin_lock(&nfsi->req_lock); next = idx_start; - while (radix_tree_gang_lookup(&nfsi->nfs_page_tree, (void **)&req, next, 1)) { + while (radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, (void **)&req, next, 1, NFS_PAGE_TAG_WRITEBACK)) { if (req->wb_index > idx_end) break; next = req->wb_index + 1; - if (!NFS_WBACK_BUSY(req)) - continue; + BUG_ON(!NFS_WBACK_BUSY(req)); atomic_inc(&req->wb_count); spin_unlock(&nfsi->req_lock); @@ -538,12 +539,15 @@ static int nfs_scan_dirty(struct inode *inode, struct list_head *dst, unsigned long idx_start, unsigned int npages) { struct nfs_inode *nfsi = NFS_I(inode); - int res; - res = nfs_scan_list(&nfsi->dirty, dst, idx_start, npages); - nfsi->ndirty -= res; - sub_page_state(nr_dirty,res); - if ((nfsi->ndirty == 0) != list_empty(&nfsi->dirty)) - printk(KERN_ERR "NFS: desynchronized value of nfs_i.ndirty.\n"); + int res = 0; + + if (nfsi->ndirty != 0) { + res = nfs_scan_lock_dirty(nfsi, dst, idx_start, npages); + nfsi->ndirty -= res; + sub_page_state(nr_dirty,res); + if ((nfsi->ndirty == 0) != list_empty(&nfsi->dirty)) + printk(KERN_ERR "NFS: desynchronized value of nfs_i.ndirty.\n"); + } return res; } @@ -562,11 +566,14 @@ static int nfs_scan_commit(struct inode *inode, struct list_head *dst, unsigned long idx_start, unsigned int npages) { struct nfs_inode *nfsi = NFS_I(inode); - int res; - res = nfs_scan_list(&nfsi->commit, dst, idx_start, npages); - nfsi->ncommit -= res; - if ((nfsi->ncommit == 0) != list_empty(&nfsi->commit)) - printk(KERN_ERR "NFS: desynchronized value of nfs_i.ncommit.\n"); + int res = 0; + + if (nfsi->ncommit != 0) { + res = nfs_scan_list(&nfsi->commit, dst, idx_start, npages); + nfsi->ncommit -= res; + if ((nfsi->ncommit == 0) != list_empty(&nfsi->commit)) + printk(KERN_ERR "NFS: desynchronized value of nfs_i.ncommit.\n"); + } return res; } #endif @@ -750,7 +757,7 @@ int nfs_updatepage(struct file *file, struct page *page, * is entirely in cache, it may be more efficient to avoid * fragmenting write requests. */ - if (PageUptodate(page) && inode->i_flock == NULL) { + if (PageUptodate(page) && inode->i_flock == NULL && !(file->f_mode & O_SYNC)) { loff_t end_offs = i_size_read(inode) - 1; unsigned long end_index = end_offs >> PAGE_CACHE_SHIFT; @@ -821,7 +828,7 @@ out: #else nfs_inode_remove_request(req); #endif - nfs_unlock_request(req); + nfs_clear_page_writeback(req); } static inline int flush_task_priority(int how) @@ -952,7 +959,7 @@ out_bad: nfs_writedata_free(data); } nfs_mark_request_dirty(req); - nfs_unlock_request(req); + nfs_clear_page_writeback(req); return -ENOMEM; } @@ -1002,7 +1009,7 @@ static int nfs_flush_one(struct list_head *head, struct inode *inode, int how) struct nfs_page *req = nfs_list_entry(head->next); nfs_list_remove_request(req); nfs_mark_request_dirty(req); - nfs_unlock_request(req); + nfs_clear_page_writeback(req); } return -ENOMEM; } @@ -1029,7 +1036,7 @@ nfs_flush_list(struct list_head *head, int wpages, int how) req = nfs_list_entry(head->next); nfs_list_remove_request(req); nfs_mark_request_dirty(req); - nfs_unlock_request(req); + nfs_clear_page_writeback(req); } return error; } @@ -1121,7 +1128,7 @@ static void nfs_writeback_done_full(struct nfs_write_data *data, int status) nfs_inode_remove_request(req); #endif next: - nfs_unlock_request(req); + nfs_clear_page_writeback(req); } } @@ -1210,36 +1217,24 @@ static void nfs_commit_rpcsetup(struct list_head *head, struct nfs_write_data *data, int how) { struct rpc_task *task = &data->task; - struct nfs_page *first, *last; + struct nfs_page *first; struct inode *inode; - loff_t start, end, len; /* Set up the RPC argument and reply structs * NB: take care not to mess about with data->commit et al. */ list_splice_init(head, &data->pages); first = nfs_list_entry(data->pages.next); - last = nfs_list_entry(data->pages.prev); inode = first->wb_context->dentry->d_inode; - /* - * Determine the offset range of requests in the COMMIT call. - * We rely on the fact that data->pages is an ordered list... - */ - start = req_offset(first); - end = req_offset(last) + last->wb_bytes; - len = end - start; - /* If 'len' is not a 32-bit quantity, pass '0' in the COMMIT call */ - if (end >= i_size_read(inode) || len < 0 || len > (~((u32)0) >> 1)) - len = 0; - data->inode = inode; data->cred = first->wb_context->cred; data->args.fh = NFS_FH(data->inode); - data->args.offset = start; - data->args.count = len; - data->res.count = len; + /* Note: we always request a commit of the entire inode */ + data->args.offset = 0; + data->args.count = 0; + data->res.count = 0; data->res.fattr = &data->fattr; data->res.verf = &data->verf; @@ -1278,7 +1273,7 @@ nfs_commit_list(struct list_head *head, int how) req = nfs_list_entry(head->next); nfs_list_remove_request(req); nfs_mark_request_commit(req); - nfs_unlock_request(req); + nfs_clear_page_writeback(req); } return -ENOMEM; } @@ -1324,7 +1319,7 @@ nfs_commit_done(struct rpc_task *task) dprintk(" mismatch\n"); nfs_mark_request_dirty(req); next: - nfs_unlock_request(req); + nfs_clear_page_writeback(req); res++; } sub_page_state(nr_unstable,res); @@ -1342,16 +1337,23 @@ static int nfs_flush_inode(struct inode *inode, unsigned long idx_start, spin_lock(&nfsi->req_lock); res = nfs_scan_dirty(inode, &head, idx_start, npages); spin_unlock(&nfsi->req_lock); - if (res) - error = nfs_flush_list(&head, NFS_SERVER(inode)->wpages, how); + if (res) { + struct nfs_server *server = NFS_SERVER(inode); + + /* For single writes, FLUSH_STABLE is more efficient */ + if (res == nfsi->npages && nfsi->npages <= server->wpages) { + if (res > 1 || nfs_list_entry(head.next)->wb_bytes <= server->wsize) + how |= FLUSH_STABLE; + } + error = nfs_flush_list(&head, server->wpages, how); + } if (error < 0) return error; return res; } #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) -int nfs_commit_inode(struct inode *inode, unsigned long idx_start, - unsigned int npages, int how) +int nfs_commit_inode(struct inode *inode, int how) { struct nfs_inode *nfsi = NFS_I(inode); LIST_HEAD(head); @@ -1359,15 +1361,13 @@ int nfs_commit_inode(struct inode *inode, unsigned long idx_start, error = 0; spin_lock(&nfsi->req_lock); - res = nfs_scan_commit(inode, &head, idx_start, npages); + res = nfs_scan_commit(inode, &head, 0, 0); + spin_unlock(&nfsi->req_lock); if (res) { - res += nfs_scan_commit(inode, &head, 0, 0); - spin_unlock(&nfsi->req_lock); error = nfs_commit_list(&head, how); - } else - spin_unlock(&nfsi->req_lock); - if (error < 0) - return error; + if (error < 0) + return error; + } return res; } #endif @@ -1389,7 +1389,7 @@ int nfs_sync_inode(struct inode *inode, unsigned long idx_start, error = nfs_flush_inode(inode, idx_start, npages, how); #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) if (error == 0) - error = nfs_commit_inode(inode, idx_start, npages, how); + error = nfs_commit_inode(inode, how); #endif } while (error > 0); return error; diff --git a/fs/nfs_common/Makefile b/fs/nfs_common/Makefile new file mode 100644 index 0000000..f689ed8 --- /dev/null +++ b/fs/nfs_common/Makefile @@ -0,0 +1,7 @@ +# +# Makefile for Linux filesystem routines that are shared by client and server. +# + +obj-$(CONFIG_NFS_ACL_SUPPORT) += nfs_acl.o + +nfs_acl-objs := nfsacl.o diff --git a/fs/nfs_common/nfsacl.c b/fs/nfs_common/nfsacl.c new file mode 100644 index 0000000..18c58c3 --- /dev/null +++ b/fs/nfs_common/nfsacl.c @@ -0,0 +1,257 @@ +/* + * fs/nfs_common/nfsacl.c + * + * Copyright (C) 2002-2003 Andreas Gruenbacher <agruen@suse.de> + */ + +/* + * The Solaris nfsacl protocol represents some ACLs slightly differently + * than POSIX 1003.1e draft 17 does (and we do): + * + * - Minimal ACLs always have an ACL_MASK entry, so they have + * four instead of three entries. + * - The ACL_MASK entry in such minimal ACLs always has the same + * permissions as the ACL_GROUP_OBJ entry. (In extended ACLs + * the ACL_MASK and ACL_GROUP_OBJ entries may differ.) + * - The identifier fields of the ACL_USER_OBJ and ACL_GROUP_OBJ + * entries contain the identifiers of the owner and owning group. + * (In POSIX ACLs we always set them to ACL_UNDEFINED_ID). + * - ACL entries in the kernel are kept sorted in ascending order + * of (e_tag, e_id). Solaris ACLs are unsorted. + */ + +#include <linux/module.h> +#include <linux/fs.h> +#include <linux/sunrpc/xdr.h> +#include <linux/nfsacl.h> +#include <linux/nfs3.h> +#include <linux/sort.h> + +MODULE_LICENSE("GPL"); + +EXPORT_SYMBOL(nfsacl_encode); +EXPORT_SYMBOL(nfsacl_decode); + +struct nfsacl_encode_desc { + struct xdr_array2_desc desc; + unsigned int count; + struct posix_acl *acl; + int typeflag; + uid_t uid; + gid_t gid; +}; + +static int +xdr_nfsace_encode(struct xdr_array2_desc *desc, void *elem) +{ + struct nfsacl_encode_desc *nfsacl_desc = + (struct nfsacl_encode_desc *) desc; + u32 *p = (u32 *) elem; + + if (nfsacl_desc->count < nfsacl_desc->acl->a_count) { + struct posix_acl_entry *entry = + &nfsacl_desc->acl->a_entries[nfsacl_desc->count++]; + + *p++ = htonl(entry->e_tag | nfsacl_desc->typeflag); + switch(entry->e_tag) { + case ACL_USER_OBJ: + *p++ = htonl(nfsacl_desc->uid); + break; + case ACL_GROUP_OBJ: + *p++ = htonl(nfsacl_desc->gid); + break; + case ACL_USER: + case ACL_GROUP: + *p++ = htonl(entry->e_id); + break; + default: /* Solaris depends on that! */ + *p++ = 0; + break; + } + *p++ = htonl(entry->e_perm & S_IRWXO); + } else { + const struct posix_acl_entry *pa, *pe; + int group_obj_perm = ACL_READ|ACL_WRITE|ACL_EXECUTE; + + FOREACH_ACL_ENTRY(pa, nfsacl_desc->acl, pe) { + if (pa->e_tag == ACL_GROUP_OBJ) { + group_obj_perm = pa->e_perm & S_IRWXO; + break; + } + } + /* fake up ACL_MASK entry */ + *p++ = htonl(ACL_MASK | nfsacl_desc->typeflag); + *p++ = htonl(0); + *p++ = htonl(group_obj_perm); + } + + return 0; +} + +unsigned int +nfsacl_encode(struct xdr_buf *buf, unsigned int base, struct inode *inode, + struct posix_acl *acl, int encode_entries, int typeflag) +{ + int entries = (acl && acl->a_count) ? max_t(int, acl->a_count, 4) : 0; + struct nfsacl_encode_desc nfsacl_desc = { + .desc = { + .elem_size = 12, + .array_len = encode_entries ? entries : 0, + .xcode = xdr_nfsace_encode, + }, + .acl = acl, + .typeflag = typeflag, + .uid = inode->i_uid, + .gid = inode->i_gid, + }; + int err; + + if (entries > NFS_ACL_MAX_ENTRIES || + xdr_encode_word(buf, base, entries)) + return -EINVAL; + err = xdr_encode_array2(buf, base + 4, &nfsacl_desc.desc); + if (!err) + err = 8 + nfsacl_desc.desc.elem_size * + nfsacl_desc.desc.array_len; + return err; +} + +struct nfsacl_decode_desc { + struct xdr_array2_desc desc; + unsigned int count; + struct posix_acl *acl; +}; + +static int +xdr_nfsace_decode(struct xdr_array2_desc *desc, void *elem) +{ + struct nfsacl_decode_desc *nfsacl_desc = + (struct nfsacl_decode_desc *) desc; + u32 *p = (u32 *) elem; + struct posix_acl_entry *entry; + + if (!nfsacl_desc->acl) { + if (desc->array_len > NFS_ACL_MAX_ENTRIES) + return -EINVAL; + nfsacl_desc->acl = posix_acl_alloc(desc->array_len, GFP_KERNEL); + if (!nfsacl_desc->acl) + return -ENOMEM; + nfsacl_desc->count = 0; + } + + entry = &nfsacl_desc->acl->a_entries[nfsacl_desc->count++]; + entry->e_tag = ntohl(*p++) & ~NFS_ACL_DEFAULT; + entry->e_id = ntohl(*p++); + entry->e_perm = ntohl(*p++); + + switch(entry->e_tag) { + case ACL_USER_OBJ: + case ACL_USER: + case ACL_GROUP_OBJ: + case ACL_GROUP: + case ACL_OTHER: + if (entry->e_perm & ~S_IRWXO) + return -EINVAL; + break; + case ACL_MASK: + /* Solaris sometimes sets additonal bits in the mask */ + entry->e_perm &= S_IRWXO; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int +cmp_acl_entry(const void *x, const void *y) +{ + const struct posix_acl_entry *a = x, *b = y; + + if (a->e_tag != b->e_tag) + return a->e_tag - b->e_tag; + else if (a->e_id > b->e_id) + return 1; + else if (a->e_id < b->e_id) + return -1; + else + return 0; +} + +/* + * Convert from a Solaris ACL to a POSIX 1003.1e draft 17 ACL. + */ +static int +posix_acl_from_nfsacl(struct posix_acl *acl) +{ + struct posix_acl_entry *pa, *pe, + *group_obj = NULL, *mask = NULL; + + if (!acl) + return 0; + + sort(acl->a_entries, acl->a_count, sizeof(struct posix_acl_entry), + cmp_acl_entry, NULL); + + /* Clear undefined identifier fields and find the ACL_GROUP_OBJ + and ACL_MASK entries. */ + FOREACH_ACL_ENTRY(pa, acl, pe) { + switch(pa->e_tag) { + case ACL_USER_OBJ: + pa->e_id = ACL_UNDEFINED_ID; + break; + case ACL_GROUP_OBJ: + pa->e_id = ACL_UNDEFINED_ID; + group_obj = pa; + break; + case ACL_MASK: + mask = pa; + /* fall through */ + case ACL_OTHER: + pa->e_id = ACL_UNDEFINED_ID; + break; + } + } + if (acl->a_count == 4 && group_obj && mask && + mask->e_perm == group_obj->e_perm) { + /* remove bogus ACL_MASK entry */ + memmove(mask, mask+1, (3 - (mask - acl->a_entries)) * + sizeof(struct posix_acl_entry)); + acl->a_count = 3; + } + return 0; +} + +unsigned int +nfsacl_decode(struct xdr_buf *buf, unsigned int base, unsigned int *aclcnt, + struct posix_acl **pacl) +{ + struct nfsacl_decode_desc nfsacl_desc = { + .desc = { + .elem_size = 12, + .xcode = pacl ? xdr_nfsace_decode : NULL, + }, + }; + u32 entries; + int err; + + if (xdr_decode_word(buf, base, &entries) || + entries > NFS_ACL_MAX_ENTRIES) + return -EINVAL; + err = xdr_decode_array2(buf, base + 4, &nfsacl_desc.desc); + if (err) + return err; + if (pacl) { + if (entries != nfsacl_desc.desc.array_len || + posix_acl_from_nfsacl(nfsacl_desc.acl) != 0) { + posix_acl_release(nfsacl_desc.acl); + return -EINVAL; + } + *pacl = nfsacl_desc.acl; + } + if (aclcnt) + *aclcnt = entries; + return 8 + nfsacl_desc.desc.elem_size * + nfsacl_desc.desc.array_len; +} diff --git a/fs/nfsd/Makefile b/fs/nfsd/Makefile index b8680a2..9f043f4 100644 --- a/fs/nfsd/Makefile +++ b/fs/nfsd/Makefile @@ -6,7 +6,9 @@ obj-$(CONFIG_NFSD) += nfsd.o nfsd-y := nfssvc.o nfsctl.o nfsproc.o nfsfh.o vfs.o \ export.o auth.o lockd.o nfscache.o nfsxdr.o stats.o +nfsd-$(CONFIG_NFSD_V2_ACL) += nfs2acl.o nfsd-$(CONFIG_NFSD_V3) += nfs3proc.o nfs3xdr.o +nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o nfsd-$(CONFIG_NFSD_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \ nfs4acl.o nfs4callback.o nfsd-objs := $(nfsd-y) diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c new file mode 100644 index 0000000..7cbf068 --- /dev/null +++ b/fs/nfsd/nfs2acl.c @@ -0,0 +1,336 @@ +/* + * linux/fs/nfsd/nfsacl.c + * + * Process version 2 NFSACL requests. + * + * Copyright (C) 2002-2003 Andreas Gruenbacher <agruen@suse.de> + */ + +#include <linux/sunrpc/svc.h> +#include <linux/nfs.h> +#include <linux/nfsd/nfsd.h> +#include <linux/nfsd/cache.h> +#include <linux/nfsd/xdr.h> +#include <linux/nfsd/xdr3.h> +#include <linux/posix_acl.h> +#include <linux/nfsacl.h> + +#define NFSDDBG_FACILITY NFSDDBG_PROC +#define RETURN_STATUS(st) { resp->status = (st); return (st); } + +/* + * NULL call. + */ +static int +nfsacld_proc_null(struct svc_rqst *rqstp, void *argp, void *resp) +{ + return nfs_ok; +} + +/* + * Get the Access and/or Default ACL of a file. + */ +static int nfsacld_proc_getacl(struct svc_rqst * rqstp, + struct nfsd3_getaclargs *argp, struct nfsd3_getaclres *resp) +{ + svc_fh *fh; + struct posix_acl *acl; + int nfserr = 0; + + dprintk("nfsd: GETACL(2acl) %s\n", SVCFH_fmt(&argp->fh)); + + fh = fh_copy(&resp->fh, &argp->fh); + if ((nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_NOP))) + RETURN_STATUS(nfserr_inval); + + if (argp->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT)) + RETURN_STATUS(nfserr_inval); + resp->mask = argp->mask; + + if (resp->mask & (NFS_ACL|NFS_ACLCNT)) { + acl = nfsd_get_posix_acl(fh, ACL_TYPE_ACCESS); + if (IS_ERR(acl)) { + int err = PTR_ERR(acl); + + if (err == -ENODATA || err == -EOPNOTSUPP) + acl = NULL; + else { + nfserr = nfserrno(err); + goto fail; + } + } + if (acl == NULL) { + /* Solaris returns the inode's minimum ACL. */ + + struct inode *inode = fh->fh_dentry->d_inode; + acl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL); + } + resp->acl_access = acl; + } + if (resp->mask & (NFS_DFACL|NFS_DFACLCNT)) { + /* Check how Solaris handles requests for the Default ACL + of a non-directory! */ + + acl = nfsd_get_posix_acl(fh, ACL_TYPE_DEFAULT); + if (IS_ERR(acl)) { + int err = PTR_ERR(acl); + + if (err == -ENODATA || err == -EOPNOTSUPP) + acl = NULL; + else { + nfserr = nfserrno(err); + goto fail; + } + } + resp->acl_default = acl; + } + + /* resp->acl_{access,default} are released in nfssvc_release_getacl. */ + RETURN_STATUS(0); + +fail: + posix_acl_release(resp->acl_access); + posix_acl_release(resp->acl_default); + RETURN_STATUS(nfserr); +} + +/* + * Set the Access and/or Default ACL of a file. + */ +static int nfsacld_proc_setacl(struct svc_rqst * rqstp, + struct nfsd3_setaclargs *argp, + struct nfsd_attrstat *resp) +{ + svc_fh *fh; + int nfserr = 0; + + dprintk("nfsd: SETACL(2acl) %s\n", SVCFH_fmt(&argp->fh)); + + fh = fh_copy(&resp->fh, &argp->fh); + nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_NOP); + + if (!nfserr) { + nfserr = nfserrno( nfsd_set_posix_acl( + fh, ACL_TYPE_ACCESS, argp->acl_access) ); + } + if (!nfserr) { + nfserr = nfserrno( nfsd_set_posix_acl( + fh, ACL_TYPE_DEFAULT, argp->acl_default) ); + } + + /* argp->acl_{access,default} may have been allocated in + nfssvc_decode_setaclargs. */ + posix_acl_release(argp->acl_access); + posix_acl_release(argp->acl_default); + return nfserr; +} + +/* + * Check file attributes + */ +static int nfsacld_proc_getattr(struct svc_rqst * rqstp, + struct nfsd_fhandle *argp, struct nfsd_attrstat *resp) +{ + dprintk("nfsd: GETATTR %s\n", SVCFH_fmt(&argp->fh)); + + fh_copy(&resp->fh, &argp->fh); + return fh_verify(rqstp, &resp->fh, 0, MAY_NOP); +} + +/* + * Check file access + */ +static int nfsacld_proc_access(struct svc_rqst *rqstp, struct nfsd3_accessargs *argp, + struct nfsd3_accessres *resp) +{ + int nfserr; + + dprintk("nfsd: ACCESS(2acl) %s 0x%x\n", + SVCFH_fmt(&argp->fh), + argp->access); + + fh_copy(&resp->fh, &argp->fh); + resp->access = argp->access; + nfserr = nfsd_access(rqstp, &resp->fh, &resp->access, NULL); + return nfserr; +} + +/* + * XDR decode functions + */ +static int nfsaclsvc_decode_getaclargs(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_getaclargs *argp) +{ + if (!(p = nfs2svc_decode_fh(p, &argp->fh))) + return 0; + argp->mask = ntohl(*p); p++; + + return xdr_argsize_check(rqstp, p); +} + + +static int nfsaclsvc_decode_setaclargs(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_setaclargs *argp) +{ + struct kvec *head = rqstp->rq_arg.head; + unsigned int base; + int n; + + if (!(p = nfs2svc_decode_fh(p, &argp->fh))) + return 0; + argp->mask = ntohl(*p++); + if (argp->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT) || + !xdr_argsize_check(rqstp, p)) + return 0; + + base = (char *)p - (char *)head->iov_base; + n = nfsacl_decode(&rqstp->rq_arg, base, NULL, + (argp->mask & NFS_ACL) ? + &argp->acl_access : NULL); + if (n > 0) + n = nfsacl_decode(&rqstp->rq_arg, base + n, NULL, + (argp->mask & NFS_DFACL) ? + &argp->acl_default : NULL); + return (n > 0); +} + +static int nfsaclsvc_decode_fhandleargs(struct svc_rqst *rqstp, u32 *p, + struct nfsd_fhandle *argp) +{ + if (!(p = nfs2svc_decode_fh(p, &argp->fh))) + return 0; + return xdr_argsize_check(rqstp, p); +} + +static int nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_accessargs *argp) +{ + if (!(p = nfs2svc_decode_fh(p, &argp->fh))) + return 0; + argp->access = ntohl(*p++); + + return xdr_argsize_check(rqstp, p); +} + +/* + * XDR encode functions + */ + +/* GETACL */ +static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_getaclres *resp) +{ + struct dentry *dentry = resp->fh.fh_dentry; + struct inode *inode = dentry->d_inode; + int w = nfsacl_size( + (resp->mask & NFS_ACL) ? resp->acl_access : NULL, + (resp->mask & NFS_DFACL) ? resp->acl_default : NULL); + struct kvec *head = rqstp->rq_res.head; + unsigned int base; + int n; + + if (dentry == NULL || dentry->d_inode == NULL) + return 0; + inode = dentry->d_inode; + + p = nfs2svc_encode_fattr(rqstp, p, &resp->fh); + *p++ = htonl(resp->mask); + if (!xdr_ressize_check(rqstp, p)) + return 0; + base = (char *)p - (char *)head->iov_base; + + rqstp->rq_res.page_len = w; + while (w > 0) { + if (!svc_take_res_page(rqstp)) + return 0; + w -= PAGE_SIZE; + } + + n = nfsacl_encode(&rqstp->rq_res, base, inode, + resp->acl_access, + resp->mask & NFS_ACL, 0); + if (n > 0) + n = nfsacl_encode(&rqstp->rq_res, base + n, inode, + resp->acl_default, + resp->mask & NFS_DFACL, + NFS_ACL_DEFAULT); + if (n <= 0) + return 0; + return 1; +} + +static int nfsaclsvc_encode_attrstatres(struct svc_rqst *rqstp, u32 *p, + struct nfsd_attrstat *resp) +{ + p = nfs2svc_encode_fattr(rqstp, p, &resp->fh); + return xdr_ressize_check(rqstp, p); +} + +/* ACCESS */ +static int nfsaclsvc_encode_accessres(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_accessres *resp) +{ + p = nfs2svc_encode_fattr(rqstp, p, &resp->fh); + *p++ = htonl(resp->access); + return xdr_ressize_check(rqstp, p); +} + +/* + * XDR release functions + */ +static int nfsaclsvc_release_getacl(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_getaclres *resp) +{ + fh_put(&resp->fh); + posix_acl_release(resp->acl_access); + posix_acl_release(resp->acl_default); + return 1; +} + +static int nfsaclsvc_release_fhandle(struct svc_rqst *rqstp, u32 *p, + struct nfsd_fhandle *resp) +{ + fh_put(&resp->fh); + return 1; +} + +#define nfsaclsvc_decode_voidargs NULL +#define nfsaclsvc_encode_voidres NULL +#define nfsaclsvc_release_void NULL +#define nfsd3_fhandleargs nfsd_fhandle +#define nfsd3_attrstatres nfsd_attrstat +#define nfsd3_voidres nfsd3_voidargs +struct nfsd3_voidargs { int dummy; }; + +#define PROC(name, argt, rest, relt, cache, respsize) \ + { (svc_procfunc) nfsacld_proc_##name, \ + (kxdrproc_t) nfsaclsvc_decode_##argt##args, \ + (kxdrproc_t) nfsaclsvc_encode_##rest##res, \ + (kxdrproc_t) nfsaclsvc_release_##relt, \ + sizeof(struct nfsd3_##argt##args), \ + sizeof(struct nfsd3_##rest##res), \ + 0, \ + cache, \ + respsize, \ + } + +#define ST 1 /* status*/ +#define AT 21 /* attributes */ +#define pAT (1+AT) /* post attributes - conditional */ +#define ACL (1+NFS_ACL_MAX_ENTRIES*3) /* Access Control List */ + +static struct svc_procedure nfsd_acl_procedures2[] = { + PROC(null, void, void, void, RC_NOCACHE, ST), + PROC(getacl, getacl, getacl, getacl, RC_NOCACHE, ST+1+2*(1+ACL)), + PROC(setacl, setacl, attrstat, fhandle, RC_NOCACHE, ST+AT), + PROC(getattr, fhandle, attrstat, fhandle, RC_NOCACHE, ST+AT), + PROC(access, access, access, fhandle, RC_NOCACHE, ST+AT+1), +}; + +struct svc_version nfsd_acl_version2 = { + .vs_vers = 2, + .vs_nproc = 5, + .vs_proc = nfsd_acl_procedures2, + .vs_dispatch = nfsd_dispatch, + .vs_xdrsize = NFS3_SVC_XDRSIZE, +}; diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c new file mode 100644 index 0000000..64ba405 --- /dev/null +++ b/fs/nfsd/nfs3acl.c @@ -0,0 +1,267 @@ +/* + * linux/fs/nfsd/nfs3acl.c + * + * Process version 3 NFSACL requests. + * + * Copyright (C) 2002-2003 Andreas Gruenbacher <agruen@suse.de> + */ + +#include <linux/sunrpc/svc.h> +#include <linux/nfs3.h> +#include <linux/nfsd/nfsd.h> +#include <linux/nfsd/cache.h> +#include <linux/nfsd/xdr3.h> +#include <linux/posix_acl.h> +#include <linux/nfsacl.h> + +#define RETURN_STATUS(st) { resp->status = (st); return (st); } + +/* + * NULL call. + */ +static int +nfsd3_proc_null(struct svc_rqst *rqstp, void *argp, void *resp) +{ + return nfs_ok; +} + +/* + * Get the Access and/or Default ACL of a file. + */ +static int nfsd3_proc_getacl(struct svc_rqst * rqstp, + struct nfsd3_getaclargs *argp, struct nfsd3_getaclres *resp) +{ + svc_fh *fh; + struct posix_acl *acl; + int nfserr = 0; + + fh = fh_copy(&resp->fh, &argp->fh); + if ((nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_NOP))) + RETURN_STATUS(nfserr_inval); + + if (argp->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT)) + RETURN_STATUS(nfserr_inval); + resp->mask = argp->mask; + + if (resp->mask & (NFS_ACL|NFS_ACLCNT)) { + acl = nfsd_get_posix_acl(fh, ACL_TYPE_ACCESS); + if (IS_ERR(acl)) { + int err = PTR_ERR(acl); + + if (err == -ENODATA || err == -EOPNOTSUPP) + acl = NULL; + else { + nfserr = nfserrno(err); + goto fail; + } + } + if (acl == NULL) { + /* Solaris returns the inode's minimum ACL. */ + + struct inode *inode = fh->fh_dentry->d_inode; + acl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL); + } + resp->acl_access = acl; + } + if (resp->mask & (NFS_DFACL|NFS_DFACLCNT)) { + /* Check how Solaris handles requests for the Default ACL + of a non-directory! */ + + acl = nfsd_get_posix_acl(fh, ACL_TYPE_DEFAULT); + if (IS_ERR(acl)) { + int err = PTR_ERR(acl); + + if (err == -ENODATA || err == -EOPNOTSUPP) + acl = NULL; + else { + nfserr = nfserrno(err); + goto fail; + } + } + resp->acl_default = acl; + } + + /* resp->acl_{access,default} are released in nfs3svc_release_getacl. */ + RETURN_STATUS(0); + +fail: + posix_acl_release(resp->acl_access); + posix_acl_release(resp->acl_default); + RETURN_STATUS(nfserr); +} + +/* + * Set the Access and/or Default ACL of a file. + */ +static int nfsd3_proc_setacl(struct svc_rqst * rqstp, + struct nfsd3_setaclargs *argp, + struct nfsd3_attrstat *resp) +{ + svc_fh *fh; + int nfserr = 0; + + fh = fh_copy(&resp->fh, &argp->fh); + nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_NOP); + + if (!nfserr) { + nfserr = nfserrno( nfsd_set_posix_acl( + fh, ACL_TYPE_ACCESS, argp->acl_access) ); + } + if (!nfserr) { + nfserr = nfserrno( nfsd_set_posix_acl( + fh, ACL_TYPE_DEFAULT, argp->acl_default) ); + } + + /* argp->acl_{access,default} may have been allocated in + nfs3svc_decode_setaclargs. */ + posix_acl_release(argp->acl_access); + posix_acl_release(argp->acl_default); + RETURN_STATUS(nfserr); +} + +/* + * XDR decode functions + */ +static int nfs3svc_decode_getaclargs(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_getaclargs *args) +{ + if (!(p = nfs3svc_decode_fh(p, &args->fh))) + return 0; + args->mask = ntohl(*p); p++; + + return xdr_argsize_check(rqstp, p); +} + + +static int nfs3svc_decode_setaclargs(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_setaclargs *args) +{ + struct kvec *head = rqstp->rq_arg.head; + unsigned int base; + int n; + + if (!(p = nfs3svc_decode_fh(p, &args->fh))) + return 0; + args->mask = ntohl(*p++); + if (args->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT) || + !xdr_argsize_check(rqstp, p)) + return 0; + + base = (char *)p - (char *)head->iov_base; + n = nfsacl_decode(&rqstp->rq_arg, base, NULL, + (args->mask & NFS_ACL) ? + &args->acl_access : NULL); + if (n > 0) + n = nfsacl_decode(&rqstp->rq_arg, base + n, NULL, + (args->mask & NFS_DFACL) ? + &args->acl_default : NULL); + return (n > 0); +} + +/* + * XDR encode functions + */ + +/* GETACL */ +static int nfs3svc_encode_getaclres(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_getaclres *resp) +{ + struct dentry *dentry = resp->fh.fh_dentry; + + p = nfs3svc_encode_post_op_attr(rqstp, p, &resp->fh); + if (resp->status == 0 && dentry && dentry->d_inode) { + struct inode *inode = dentry->d_inode; + int w = nfsacl_size( + (resp->mask & NFS_ACL) ? resp->acl_access : NULL, + (resp->mask & NFS_DFACL) ? resp->acl_default : NULL); + struct kvec *head = rqstp->rq_res.head; + unsigned int base; + int n; + + *p++ = htonl(resp->mask); + if (!xdr_ressize_check(rqstp, p)) + return 0; + base = (char *)p - (char *)head->iov_base; + + rqstp->rq_res.page_len = w; + while (w > 0) { + if (!svc_take_res_page(rqstp)) + return 0; + w -= PAGE_SIZE; + } + + n = nfsacl_encode(&rqstp->rq_res, base, inode, + resp->acl_access, + resp->mask & NFS_ACL, 0); + if (n > 0) + n = nfsacl_encode(&rqstp->rq_res, base + n, inode, + resp->acl_default, + resp->mask & NFS_DFACL, + NFS_ACL_DEFAULT); + if (n <= 0) + return 0; + } else + if (!xdr_ressize_check(rqstp, p)) + return 0; + + return 1; +} + +/* SETACL */ +static int nfs3svc_encode_setaclres(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_attrstat *resp) +{ + p = nfs3svc_encode_post_op_attr(rqstp, p, &resp->fh); + + return xdr_ressize_check(rqstp, p); +} + +/* + * XDR release functions + */ +static int nfs3svc_release_getacl(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_getaclres *resp) +{ + fh_put(&resp->fh); + posix_acl_release(resp->acl_access); + posix_acl_release(resp->acl_default); + return 1; +} + +#define nfs3svc_decode_voidargs NULL +#define nfs3svc_release_void NULL +#define nfsd3_setaclres nfsd3_attrstat +#define nfsd3_voidres nfsd3_voidargs +struct nfsd3_voidargs { int dummy; }; + +#define PROC(name, argt, rest, relt, cache, respsize) \ + { (svc_procfunc) nfsd3_proc_##name, \ + (kxdrproc_t) nfs3svc_decode_##argt##args, \ + (kxdrproc_t) nfs3svc_encode_##rest##res, \ + (kxdrproc_t) nfs3svc_release_##relt, \ + sizeof(struct nfsd3_##argt##args), \ + sizeof(struct nfsd3_##rest##res), \ + 0, \ + cache, \ + respsize, \ + } + +#define ST 1 /* status*/ +#define AT 21 /* attributes */ +#define pAT (1+AT) /* post attributes - conditional */ +#define ACL (1+NFS_ACL_MAX_ENTRIES*3) /* Access Control List */ + +static struct svc_procedure nfsd_acl_procedures3[] = { + PROC(null, void, void, void, RC_NOCACHE, ST), + PROC(getacl, getacl, getacl, getacl, RC_NOCACHE, ST+1+2*(1+ACL)), + PROC(setacl, setacl, setacl, fhandle, RC_NOCACHE, ST+pAT), +}; + +struct svc_version nfsd_acl_version3 = { + .vs_vers = 3, + .vs_nproc = 3, + .vs_proc = nfsd_acl_procedures3, + .vs_dispatch = nfsd_dispatch, + .vs_xdrsize = NFS3_SVC_XDRSIZE, +}; + diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 11f8068..e0e134d 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -71,6 +71,12 @@ decode_fh(u32 *p, struct svc_fh *fhp) return p + XDR_QUADLEN(size); } +/* Helper function for NFSv3 ACL code */ +u32 *nfs3svc_decode_fh(u32 *p, struct svc_fh *fhp) +{ + return decode_fh(p, fhp); +} + static inline u32 * encode_fh(u32 *p, struct svc_fh *fhp) { @@ -233,6 +239,13 @@ encode_post_op_attr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp) return p; } +/* Helper for NFSv3 ACLs */ +u32 * +nfs3svc_encode_post_op_attr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp) +{ + return encode_post_op_attr(rqstp, p, fhp); +} + /* * Enocde weak cache consistency data */ diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 1a55dfc..634465e 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -430,7 +430,7 @@ nfsd4_probe_callback(struct nfs4_client *clp) clnt = rpc_create_client(xprt, hostname, program, 1, RPC_AUTH_UNIX); if (IS_ERR(clnt)) { dprintk("NFSD: couldn't create callback client\n"); - goto out_xprt; + goto out_err; } clnt->cl_intr = 0; clnt->cl_softrtry = 1; @@ -465,8 +465,6 @@ out_rpciod: out_clnt: rpc_shutdown_client(clnt); goto out_err; -out_xprt: - xprt_destroy(xprt); out_err: dprintk("NFSD: warning: no callback path to client %.*s\n", (int)clp->cl_name.len, clp->cl_name.data); diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index 757f9d2..0aa1b96 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -591,6 +591,7 @@ nfserrno (int errno) { nfserr_dropit, -ENOMEM }, { nfserr_badname, -ESRCH }, { nfserr_io, -ETXTBSY }, + { nfserr_notsupp, -EOPNOTSUPP }, { -1, -EIO } }; int i; diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 02ded7c..904df60 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -31,6 +31,7 @@ #include <linux/nfsd/stats.h> #include <linux/nfsd/cache.h> #include <linux/lockd/bind.h> +#include <linux/nfsacl.h> #define NFSDDBG_FACILITY NFSDDBG_SVC @@ -362,6 +363,32 @@ nfsd_dispatch(struct svc_rqst *rqstp, u32 *statp) return 1; } +#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) +static struct svc_stat nfsd_acl_svcstats; +static struct svc_version * nfsd_acl_version[] = { + [2] = &nfsd_acl_version2, + [3] = &nfsd_acl_version3, +}; + +#define NFSD_ACL_NRVERS (sizeof(nfsd_acl_version)/sizeof(nfsd_acl_version[0])) +static struct svc_program nfsd_acl_program = { + .pg_prog = NFS_ACL_PROGRAM, + .pg_nvers = NFSD_ACL_NRVERS, + .pg_vers = nfsd_acl_version, + .pg_name = "nfsd", + .pg_class = "nfsd", + .pg_stats = &nfsd_acl_svcstats, +}; + +static struct svc_stat nfsd_acl_svcstats = { + .program = &nfsd_acl_program, +}; + +#define nfsd_acl_program_p &nfsd_acl_program +#else +#define nfsd_acl_program_p NULL +#endif /* defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) */ + extern struct svc_version nfsd_version2, nfsd_version3, nfsd_version4; static struct svc_version * nfsd_version[] = { @@ -376,6 +403,7 @@ static struct svc_version * nfsd_version[] = { #define NFSD_NRVERS (sizeof(nfsd_version)/sizeof(nfsd_version[0])) struct svc_program nfsd_program = { + .pg_next = nfsd_acl_program_p, .pg_prog = NFS_PROGRAM, /* program number */ .pg_nvers = NFSD_NRVERS, /* nr of entries in nfsd_version */ .pg_vers = nfsd_version, /* version table */ diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c index 948b082..b45999f 100644 --- a/fs/nfsd/nfsxdr.c +++ b/fs/nfsd/nfsxdr.c @@ -49,6 +49,12 @@ decode_fh(u32 *p, struct svc_fh *fhp) return p + (NFS_FHSIZE >> 2); } +/* Helper function for NFSv2 ACL code */ +u32 *nfs2svc_decode_fh(u32 *p, struct svc_fh *fhp) +{ + return decode_fh(p, fhp); +} + static inline u32 * encode_fh(u32 *p, struct svc_fh *fhp) { @@ -190,6 +196,11 @@ encode_fattr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp) return p; } +/* Helper function for NFSv2 ACL code */ +u32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp) +{ + return encode_fattr(rqstp, p, fhp); +} /* * XDR decode functions diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index e3e9d21..ae3940d 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -46,8 +46,9 @@ #include <linux/nfsd/nfsfh.h> #include <linux/quotaops.h> #include <linux/dnotify.h> -#ifdef CONFIG_NFSD_V4 +#include <linux/xattr_acl.h> #include <linux/posix_acl.h> +#ifdef CONFIG_NFSD_V4 #include <linux/posix_acl_xattr.h> #include <linux/xattr_acl.h> #include <linux/xattr.h> @@ -1857,3 +1858,107 @@ nfsd_racache_init(int cache_size) nfsdstats.ra_size = cache_size; return 0; } + +#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) +struct posix_acl * +nfsd_get_posix_acl(struct svc_fh *fhp, int type) +{ + struct inode *inode = fhp->fh_dentry->d_inode; + char *name; + void *value = NULL; + ssize_t size; + struct posix_acl *acl; + + if (!IS_POSIXACL(inode) || !inode->i_op || !inode->i_op->getxattr) + return ERR_PTR(-EOPNOTSUPP); + switch(type) { + case ACL_TYPE_ACCESS: + name = XATTR_NAME_ACL_ACCESS; + break; + case ACL_TYPE_DEFAULT: + name = XATTR_NAME_ACL_DEFAULT; + break; + default: + return ERR_PTR(-EOPNOTSUPP); + } + + size = inode->i_op->getxattr(fhp->fh_dentry, name, NULL, 0); + + if (size < 0) { + acl = ERR_PTR(size); + goto getout; + } else if (size > 0) { + value = kmalloc(size, GFP_KERNEL); + if (!value) { + acl = ERR_PTR(-ENOMEM); + goto getout; + } + size = inode->i_op->getxattr(fhp->fh_dentry, name, value, size); + if (size < 0) { + acl = ERR_PTR(size); + goto getout; + } + } + acl = posix_acl_from_xattr(value, size); + +getout: + kfree(value); + return acl; +} + +int +nfsd_set_posix_acl(struct svc_fh *fhp, int type, struct posix_acl *acl) +{ + struct inode *inode = fhp->fh_dentry->d_inode; + char *name; + void *value = NULL; + size_t size; + int error; + + if (!IS_POSIXACL(inode) || !inode->i_op || + !inode->i_op->setxattr || !inode->i_op->removexattr) + return -EOPNOTSUPP; + switch(type) { + case ACL_TYPE_ACCESS: + name = XATTR_NAME_ACL_ACCESS; + break; + case ACL_TYPE_DEFAULT: + name = XATTR_NAME_ACL_DEFAULT; + break; + default: + return -EOPNOTSUPP; + } + + if (acl && acl->a_count) { + size = xattr_acl_size(acl->a_count); + value = kmalloc(size, GFP_KERNEL); + if (!value) + return -ENOMEM; + size = posix_acl_to_xattr(acl, value, size); + if (size < 0) { + error = size; + goto getout; + } + } else + size = 0; + + if (!fhp->fh_locked) + fh_lock(fhp); /* unlocking is done automatically */ + if (size) + error = inode->i_op->setxattr(fhp->fh_dentry, name, + value, size, 0); + else { + if (!S_ISDIR(inode->i_mode) && type == ACL_TYPE_DEFAULT) + error = 0; + else { + error = inode->i_op->removexattr(fhp->fh_dentry, name); + if (error == -ENODATA) + error = 0; + } + } + +getout: + kfree(value); + return error; +} +#endif /* defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) */ diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index a60a3b3..63a9fbf 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -219,6 +219,19 @@ static struct file_operations fragmentation_file_operations = { .release = seq_release, }; +extern struct seq_operations zoneinfo_op; +static int zoneinfo_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &zoneinfo_op); +} + +static struct file_operations proc_zoneinfo_file_operations = { + .open = zoneinfo_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + static int version_read_proc(char *page, char **start, off_t off, int count, int *eof, void *data) { @@ -589,6 +602,7 @@ void __init proc_misc_init(void) create_seq_entry("slabinfo",S_IWUSR|S_IRUGO,&proc_slabinfo_operations); create_seq_entry("buddyinfo",S_IRUGO, &fragmentation_file_operations); create_seq_entry("vmstat",S_IRUGO, &proc_vmstat_file_operations); + create_seq_entry("zoneinfo",S_IRUGO, &proc_zoneinfo_file_operations); create_seq_entry("diskstats", 0, &proc_diskstats_operations); #ifdef CONFIG_MODULES create_seq_entry("modules", 0, &proc_modules_operations); @@ -835,6 +835,7 @@ do_kern_mount(const char *fstype, int flags, const char *name, void *data) mnt->mnt_parent = mnt; mnt->mnt_namespace = current->namespace; up_write(&sb->s_umount); + free_secdata(secdata); put_filesystem(type); return mnt; out_sb: diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c index d4aaa88..78899ee 100644 --- a/fs/sysfs/bin.c +++ b/fs/sysfs/bin.c @@ -25,7 +25,7 @@ fill_read(struct dentry *dentry, char *buffer, loff_t off, size_t count) struct kobject * kobj = to_kobj(dentry->d_parent); if (!attr->read) - return -EINVAL; + return -EIO; return attr->read(kobj, buffer, off, count); } @@ -71,7 +71,7 @@ flush_write(struct dentry *dentry, char *buffer, loff_t offset, size_t count) struct kobject *kobj = to_kobj(dentry->d_parent); if (!attr->write) - return -EINVAL; + return -EIO; return attr->write(kobj, buffer, offset, count); } diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index fe19821..37d7a68 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -101,18 +101,19 @@ static int create_dir(struct kobject * k, struct dentry * p, down(&p->d_inode->i_sem); *d = sysfs_get_dentry(p,n); if (!IS_ERR(*d)) { - error = sysfs_create(*d, mode, init_dir); + error = sysfs_make_dirent(p->d_fsdata, *d, k, mode, SYSFS_DIR); if (!error) { - error = sysfs_make_dirent(p->d_fsdata, *d, k, mode, - SYSFS_DIR); + error = sysfs_create(*d, mode, init_dir); if (!error) { p->d_inode->i_nlink++; (*d)->d_op = &sysfs_dentry_ops; d_rehash(*d); } } - if (error && (error != -EEXIST)) + if (error && (error != -EEXIST)) { + sysfs_put((*d)->d_fsdata); d_drop(*d); + } dput(*d); } else error = PTR_ERR(*d); @@ -171,17 +172,19 @@ static int sysfs_attach_attr(struct sysfs_dirent * sd, struct dentry * dentry) init = init_file; } + dentry->d_fsdata = sysfs_get(sd); + sd->s_dentry = dentry; error = sysfs_create(dentry, (attr->mode & S_IALLUGO) | S_IFREG, init); - if (error) + if (error) { + sysfs_put(sd); return error; + } if (bin_attr) { dentry->d_inode->i_size = bin_attr->size; dentry->d_inode->i_fop = &bin_fops; } dentry->d_op = &sysfs_dentry_ops; - dentry->d_fsdata = sysfs_get(sd); - sd->s_dentry = dentry; d_rehash(dentry); return 0; @@ -191,13 +194,15 @@ static int sysfs_attach_link(struct sysfs_dirent * sd, struct dentry * dentry) { int err = 0; + dentry->d_fsdata = sysfs_get(sd); + sd->s_dentry = dentry; err = sysfs_create(dentry, S_IFLNK|S_IRWXUGO, init_symlink); if (!err) { dentry->d_op = &sysfs_dentry_ops; - dentry->d_fsdata = sysfs_get(sd); - sd->s_dentry = dentry; d_rehash(dentry); - } + } else + sysfs_put(sd); + return err; } @@ -228,6 +233,7 @@ static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry, struct inode_operations sysfs_dir_inode_operations = { .lookup = sysfs_lookup, + .setattr = sysfs_setattr, }; static void remove_dir(struct dentry * d) diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index 3642080..849aac1 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -23,7 +23,7 @@ subsys_attr_show(struct kobject * kobj, struct attribute * attr, char * page) { struct subsystem * s = to_subsys(kobj); struct subsys_attribute * sattr = to_sattr(attr); - ssize_t ret = 0; + ssize_t ret = -EIO; if (sattr->show) ret = sattr->show(s,page); @@ -36,7 +36,7 @@ subsys_attr_store(struct kobject * kobj, struct attribute * attr, { struct subsystem * s = to_subsys(kobj); struct subsys_attribute * sattr = to_sattr(attr); - ssize_t ret = 0; + ssize_t ret = -EIO; if (sattr->store) ret = sattr->store(s,page,count); @@ -182,7 +182,7 @@ fill_write_buffer(struct sysfs_buffer * buffer, const char __user * buf, size_t return -ENOMEM; if (count >= PAGE_SIZE) - count = PAGE_SIZE - 1; + count = PAGE_SIZE; error = copy_from_user(buffer->page,buf,count); buffer->needs_read_fill = 1; return error ? -EFAULT : count; diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c index aff7b2d..565cac1 100644 --- a/fs/sysfs/inode.c +++ b/fs/sysfs/inode.c @@ -26,18 +26,107 @@ static struct backing_dev_info sysfs_backing_dev_info = { .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK, }; -struct inode * sysfs_new_inode(mode_t mode) +static struct inode_operations sysfs_inode_operations ={ + .setattr = sysfs_setattr, +}; + +int sysfs_setattr(struct dentry * dentry, struct iattr * iattr) +{ + struct inode * inode = dentry->d_inode; + struct sysfs_dirent * sd = dentry->d_fsdata; + struct iattr * sd_iattr; + unsigned int ia_valid = iattr->ia_valid; + int error; + + if (!sd) + return -EINVAL; + + sd_iattr = sd->s_iattr; + + error = inode_change_ok(inode, iattr); + if (error) + return error; + + error = inode_setattr(inode, iattr); + if (error) + return error; + + if (!sd_iattr) { + /* setting attributes for the first time, allocate now */ + sd_iattr = kmalloc(sizeof(struct iattr), GFP_KERNEL); + if (!sd_iattr) + return -ENOMEM; + /* assign default attributes */ + memset(sd_iattr, 0, sizeof(struct iattr)); + sd_iattr->ia_mode = sd->s_mode; + sd_iattr->ia_uid = 0; + sd_iattr->ia_gid = 0; + sd_iattr->ia_atime = sd_iattr->ia_mtime = sd_iattr->ia_ctime = CURRENT_TIME; + sd->s_iattr = sd_iattr; + } + + /* attributes were changed atleast once in past */ + + if (ia_valid & ATTR_UID) + sd_iattr->ia_uid = iattr->ia_uid; + if (ia_valid & ATTR_GID) + sd_iattr->ia_gid = iattr->ia_gid; + if (ia_valid & ATTR_ATIME) + sd_iattr->ia_atime = timespec_trunc(iattr->ia_atime, + inode->i_sb->s_time_gran); + if (ia_valid & ATTR_MTIME) + sd_iattr->ia_mtime = timespec_trunc(iattr->ia_mtime, + inode->i_sb->s_time_gran); + if (ia_valid & ATTR_CTIME) + sd_iattr->ia_ctime = timespec_trunc(iattr->ia_ctime, + inode->i_sb->s_time_gran); + if (ia_valid & ATTR_MODE) { + umode_t mode = iattr->ia_mode; + + if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID)) + mode &= ~S_ISGID; + sd_iattr->ia_mode = mode; + } + + return error; +} + +static inline void set_default_inode_attr(struct inode * inode, mode_t mode) +{ + inode->i_mode = mode; + inode->i_uid = 0; + inode->i_gid = 0; + inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; +} + +static inline void set_inode_attr(struct inode * inode, struct iattr * iattr) +{ + inode->i_mode = iattr->ia_mode; + inode->i_uid = iattr->ia_uid; + inode->i_gid = iattr->ia_gid; + inode->i_atime = iattr->ia_atime; + inode->i_mtime = iattr->ia_mtime; + inode->i_ctime = iattr->ia_ctime; +} + +struct inode * sysfs_new_inode(mode_t mode, struct sysfs_dirent * sd) { struct inode * inode = new_inode(sysfs_sb); if (inode) { - inode->i_mode = mode; - inode->i_uid = 0; - inode->i_gid = 0; inode->i_blksize = PAGE_CACHE_SIZE; inode->i_blocks = 0; - inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; inode->i_mapping->a_ops = &sysfs_aops; inode->i_mapping->backing_dev_info = &sysfs_backing_dev_info; + inode->i_op = &sysfs_inode_operations; + + if (sd->s_iattr) { + /* sysfs_dirent has non-default attributes + * get them for the new inode from persistent copy + * in sysfs_dirent + */ + set_inode_attr(inode, sd->s_iattr); + } else + set_default_inode_attr(inode, mode); } return inode; } @@ -48,7 +137,8 @@ int sysfs_create(struct dentry * dentry, int mode, int (*init)(struct inode *)) struct inode * inode = NULL; if (dentry) { if (!dentry->d_inode) { - if ((inode = sysfs_new_inode(mode))) { + struct sysfs_dirent * sd = dentry->d_fsdata; + if ((inode = sysfs_new_inode(mode, sd))) { if (dentry->d_parent && dentry->d_parent->d_inode) { struct inode *p_inode = dentry->d_parent->d_inode; p_inode->i_mtime = p_inode->i_ctime = CURRENT_TIME; diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c index 5c805bb..f1117e8 100644 --- a/fs/sysfs/mount.c +++ b/fs/sysfs/mount.c @@ -28,6 +28,7 @@ static struct sysfs_dirent sysfs_root = { .s_children = LIST_HEAD_INIT(sysfs_root.s_children), .s_element = NULL, .s_type = SYSFS_ROOT, + .s_iattr = NULL, }; static int sysfs_fill_super(struct super_block *sb, void *data, int silent) @@ -42,7 +43,8 @@ static int sysfs_fill_super(struct super_block *sb, void *data, int silent) sb->s_time_gran = 1; sysfs_sb = sb; - inode = sysfs_new_inode(S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO); + inode = sysfs_new_inode(S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO, + &sysfs_root); if (inode) { inode->i_op = &sysfs_dir_inode_operations; inode->i_fop = &sysfs_dir_operations; diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c index dfdf701..fae57c8 100644 --- a/fs/sysfs/symlink.c +++ b/fs/sysfs/symlink.c @@ -43,7 +43,7 @@ static void fill_object_path(struct kobject * kobj, char * buffer, int length) } } -static int sysfs_add_link(struct dentry * parent, char * name, struct kobject * target) +static int sysfs_add_link(struct dentry * parent, const char * name, struct kobject * target) { struct sysfs_dirent * parent_sd = parent->d_fsdata; struct sysfs_symlink * sl; @@ -79,7 +79,7 @@ exit1: * @target: object we're pointing to. * @name: name of the symlink. */ -int sysfs_create_link(struct kobject * kobj, struct kobject * target, char * name) +int sysfs_create_link(struct kobject * kobj, struct kobject * target, const char * name) { struct dentry * dentry = kobj->dentry; int error = 0; @@ -99,13 +99,13 @@ int sysfs_create_link(struct kobject * kobj, struct kobject * target, char * nam * @name: name of the symlink to remove. */ -void sysfs_remove_link(struct kobject * kobj, char * name) +void sysfs_remove_link(struct kobject * kobj, const char * name) { sysfs_hash_and_remove(kobj->dentry,name); } static int sysfs_get_target_path(struct kobject * kobj, struct kobject * target, - char *path) + char *path) { char * s; int depth, size; diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h index a8a24a0..29da6f5 100644 --- a/fs/sysfs/sysfs.h +++ b/fs/sysfs/sysfs.h @@ -2,7 +2,7 @@ extern struct vfsmount * sysfs_mount; extern kmem_cache_t *sysfs_dir_cachep; -extern struct inode * sysfs_new_inode(mode_t mode); +extern struct inode * sysfs_new_inode(mode_t mode, struct sysfs_dirent *); extern int sysfs_create(struct dentry *, int mode, int (*init)(struct inode *)); extern int sysfs_make_dirent(struct sysfs_dirent *, struct dentry *, void *, @@ -17,6 +17,7 @@ extern void sysfs_remove_subdir(struct dentry *); extern const unsigned char * sysfs_get_name(struct sysfs_dirent *sd); extern void sysfs_drop_dentry(struct sysfs_dirent *sd, struct dentry *parent); +extern int sysfs_setattr(struct dentry *dentry, struct iattr *iattr); extern struct rw_semaphore sysfs_rename_sem; extern struct super_block * sysfs_sb; @@ -75,6 +76,7 @@ static inline void release_sysfs_dirent(struct sysfs_dirent * sd) kobject_put(sl->target_kobj); kfree(sl); } + kfree(sd->s_iattr); kmem_cache_free(sysfs_dir_cachep, sd); } diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 997963e5..c60e694 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -61,12 +61,13 @@ * File wide globals */ -STATIC kmem_cache_t *pagebuf_cache; +STATIC kmem_cache_t *pagebuf_zone; STATIC kmem_shaker_t pagebuf_shake; -STATIC int pagebuf_daemon_wakeup(int, unsigned int); +STATIC int xfsbufd_wakeup(int, unsigned int); STATIC void pagebuf_delwri_queue(xfs_buf_t *, int); -STATIC struct workqueue_struct *pagebuf_logio_workqueue; -STATIC struct workqueue_struct *pagebuf_dataio_workqueue; + +STATIC struct workqueue_struct *xfslogd_workqueue; +STATIC struct workqueue_struct *xfsdatad_workqueue; /* * Pagebuf debugging @@ -123,9 +124,9 @@ ktrace_t *pagebuf_trace_buf; #define pagebuf_allocate(flags) \ - kmem_zone_alloc(pagebuf_cache, pb_to_km(flags)) + kmem_zone_alloc(pagebuf_zone, pb_to_km(flags)) #define pagebuf_deallocate(pb) \ - kmem_zone_free(pagebuf_cache, (pb)); + kmem_zone_free(pagebuf_zone, (pb)); /* * Page Region interfaces. @@ -425,7 +426,7 @@ _pagebuf_lookup_pages( __FUNCTION__, gfp_mask); XFS_STATS_INC(pb_page_retries); - pagebuf_daemon_wakeup(0, gfp_mask); + xfsbufd_wakeup(0, gfp_mask); blk_congestion_wait(WRITE, HZ/50); goto retry; } @@ -1136,8 +1137,8 @@ pagebuf_iodone( if ((pb->pb_iodone) || (pb->pb_flags & PBF_ASYNC)) { if (schedule) { INIT_WORK(&pb->pb_iodone_work, pagebuf_iodone_work, pb); - queue_work(dataio ? pagebuf_dataio_workqueue : - pagebuf_logio_workqueue, &pb->pb_iodone_work); + queue_work(dataio ? xfsdatad_workqueue : + xfslogd_workqueue, &pb->pb_iodone_work); } else { pagebuf_iodone_work(pb); } @@ -1562,16 +1563,6 @@ xfs_free_buftarg( kmem_free(btp, sizeof(*btp)); } -void -xfs_incore_relse( - xfs_buftarg_t *btp, - int delwri_only, - int wait) -{ - invalidate_bdev(btp->pbr_bdev, 1); - truncate_inode_pages(btp->pbr_mapping, 0LL); -} - STATIC int xfs_setsize_buftarg_flags( xfs_buftarg_t *btp, @@ -1742,27 +1733,27 @@ pagebuf_runall_queues( } /* Defines for pagebuf daemon */ -STATIC DECLARE_COMPLETION(pagebuf_daemon_done); -STATIC struct task_struct *pagebuf_daemon_task; -STATIC int pagebuf_daemon_active; -STATIC int force_flush; -STATIC int force_sleep; +STATIC DECLARE_COMPLETION(xfsbufd_done); +STATIC struct task_struct *xfsbufd_task; +STATIC int xfsbufd_active; +STATIC int xfsbufd_force_flush; +STATIC int xfsbufd_force_sleep; STATIC int -pagebuf_daemon_wakeup( +xfsbufd_wakeup( int priority, unsigned int mask) { - if (force_sleep) + if (xfsbufd_force_sleep) return 0; - force_flush = 1; + xfsbufd_force_flush = 1; barrier(); - wake_up_process(pagebuf_daemon_task); + wake_up_process(xfsbufd_task); return 0; } STATIC int -pagebuf_daemon( +xfsbufd( void *data) { struct list_head tmp; @@ -1774,17 +1765,17 @@ pagebuf_daemon( daemonize("xfsbufd"); current->flags |= PF_MEMALLOC; - pagebuf_daemon_task = current; - pagebuf_daemon_active = 1; + xfsbufd_task = current; + xfsbufd_active = 1; barrier(); INIT_LIST_HEAD(&tmp); do { if (unlikely(current->flags & PF_FREEZE)) { - force_sleep = 1; + xfsbufd_force_sleep = 1; refrigerator(PF_FREEZE); } else { - force_sleep = 0; + xfsbufd_force_sleep = 0; } set_current_state(TASK_INTERRUPTIBLE); @@ -1797,7 +1788,7 @@ pagebuf_daemon( ASSERT(pb->pb_flags & PBF_DELWRI); if (!pagebuf_ispin(pb) && !pagebuf_cond_lock(pb)) { - if (!force_flush && + if (!xfsbufd_force_flush && time_before(jiffies, pb->pb_queuetime + age)) { pagebuf_unlock(pb); @@ -1824,10 +1815,10 @@ pagebuf_daemon( if (as_list_len > 0) purge_addresses(); - force_flush = 0; - } while (pagebuf_daemon_active); + xfsbufd_force_flush = 0; + } while (xfsbufd_active); - complete_and_exit(&pagebuf_daemon_done, 0); + complete_and_exit(&xfsbufd_done, 0); } /* @@ -1844,8 +1835,8 @@ xfs_flush_buftarg( xfs_buf_t *pb, *n; int pincount = 0; - pagebuf_runall_queues(pagebuf_dataio_workqueue); - pagebuf_runall_queues(pagebuf_logio_workqueue); + pagebuf_runall_queues(xfsdatad_workqueue); + pagebuf_runall_queues(xfslogd_workqueue); INIT_LIST_HEAD(&tmp); spin_lock(&pbd_delwrite_lock); @@ -1898,43 +1889,43 @@ xfs_flush_buftarg( } STATIC int -pagebuf_daemon_start(void) +xfs_buf_daemons_start(void) { - int rval; + int error = -ENOMEM; - pagebuf_logio_workqueue = create_workqueue("xfslogd"); - if (!pagebuf_logio_workqueue) - return -ENOMEM; + xfslogd_workqueue = create_workqueue("xfslogd"); + if (!xfslogd_workqueue) + goto out; - pagebuf_dataio_workqueue = create_workqueue("xfsdatad"); - if (!pagebuf_dataio_workqueue) { - destroy_workqueue(pagebuf_logio_workqueue); - return -ENOMEM; - } + xfsdatad_workqueue = create_workqueue("xfsdatad"); + if (!xfsdatad_workqueue) + goto out_destroy_xfslogd_workqueue; - rval = kernel_thread(pagebuf_daemon, NULL, CLONE_FS|CLONE_FILES); - if (rval < 0) { - destroy_workqueue(pagebuf_logio_workqueue); - destroy_workqueue(pagebuf_dataio_workqueue); - } + error = kernel_thread(xfsbufd, NULL, CLONE_FS|CLONE_FILES); + if (error < 0) + goto out_destroy_xfsdatad_workqueue; + return 0; - return rval; + out_destroy_xfsdatad_workqueue: + destroy_workqueue(xfsdatad_workqueue); + out_destroy_xfslogd_workqueue: + destroy_workqueue(xfslogd_workqueue); + out: + return error; } /* - * pagebuf_daemon_stop - * * Note: do not mark as __exit, it is called from pagebuf_terminate. */ STATIC void -pagebuf_daemon_stop(void) +xfs_buf_daemons_stop(void) { - pagebuf_daemon_active = 0; + xfsbufd_active = 0; barrier(); - wait_for_completion(&pagebuf_daemon_done); + wait_for_completion(&xfsbufd_done); - destroy_workqueue(pagebuf_logio_workqueue); - destroy_workqueue(pagebuf_dataio_workqueue); + destroy_workqueue(xfslogd_workqueue); + destroy_workqueue(xfsdatad_workqueue); } /* @@ -1944,27 +1935,37 @@ pagebuf_daemon_stop(void) int __init pagebuf_init(void) { - pagebuf_cache = kmem_cache_create("xfs_buf_t", sizeof(xfs_buf_t), 0, - SLAB_HWCACHE_ALIGN, NULL, NULL); - if (pagebuf_cache == NULL) { - printk("XFS: couldn't init xfs_buf_t cache\n"); - pagebuf_terminate(); - return -ENOMEM; - } + int error = -ENOMEM; + + pagebuf_zone = kmem_zone_init(sizeof(xfs_buf_t), "xfs_buf"); + if (!pagebuf_zone) + goto out; #ifdef PAGEBUF_TRACE pagebuf_trace_buf = ktrace_alloc(PAGEBUF_TRACE_SIZE, KM_SLEEP); #endif - pagebuf_daemon_start(); + error = xfs_buf_daemons_start(); + if (error) + goto out_free_buf_zone; - pagebuf_shake = kmem_shake_register(pagebuf_daemon_wakeup); - if (pagebuf_shake == NULL) { - pagebuf_terminate(); - return -ENOMEM; + pagebuf_shake = kmem_shake_register(xfsbufd_wakeup); + if (!pagebuf_shake) { + error = -ENOMEM; + goto out_stop_daemons; } return 0; + + out_stop_daemons: + xfs_buf_daemons_stop(); + out_free_buf_zone: +#ifdef PAGEBUF_TRACE + ktrace_free(pagebuf_trace_buf); +#endif + kmem_zone_destroy(pagebuf_zone); + out: + return error; } @@ -1976,12 +1977,12 @@ pagebuf_init(void) void pagebuf_terminate(void) { - pagebuf_daemon_stop(); + xfs_buf_daemons_stop(); #ifdef PAGEBUF_TRACE ktrace_free(pagebuf_trace_buf); #endif - kmem_zone_destroy(pagebuf_cache); + kmem_zone_destroy(pagebuf_zone); kmem_shake_deregister(pagebuf_shake); } diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index 74deed8..3f8f69a 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h @@ -576,7 +576,6 @@ extern xfs_buftarg_t *xfs_alloc_buftarg(struct block_device *, int); extern void xfs_free_buftarg(xfs_buftarg_t *, int); extern void xfs_wait_buftarg(xfs_buftarg_t *); extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int); -extern void xfs_incore_relse(xfs_buftarg_t *, int, int); extern int xfs_flush_buftarg(xfs_buftarg_t *, int); #define xfs_getsize_buftarg(buftarg) \ diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index 24fa3b1..f1ce432 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c @@ -57,7 +57,9 @@ #include <linux/smp_lock.h> static struct vm_operations_struct linvfs_file_vm_ops; - +#ifdef CONFIG_XFS_DMAPI +static struct vm_operations_struct linvfs_dmapi_file_vm_ops; +#endif STATIC inline ssize_t __linvfs_read( @@ -388,6 +390,14 @@ done: return -error; } +#ifdef CONFIG_XFS_DMAPI +STATIC void +linvfs_mmap_close( + struct vm_area_struct *vma) +{ + xfs_dm_mm_put(vma); +} +#endif /* CONFIG_XFS_DMAPI */ STATIC int linvfs_file_mmap( @@ -399,16 +409,19 @@ linvfs_file_mmap( vattr_t va = { .va_mask = XFS_AT_UPDATIME }; int error; + vma->vm_ops = &linvfs_file_vm_ops; + if (vp->v_vfsp->vfs_flag & VFS_DMI) { xfs_mount_t *mp = XFS_VFSTOM(vp->v_vfsp); error = -XFS_SEND_MMAP(mp, vma, 0); if (error) return error; +#ifdef CONFIG_XFS_DMAPI + vma->vm_ops = &linvfs_dmapi_file_vm_ops; +#endif } - vma->vm_ops = &linvfs_file_vm_ops; - VOP_SETATTR(vp, &va, XFS_AT_UPDATIME, NULL, error); if (!error) vn_revalidate(vp); /* update Linux inode flags */ @@ -609,7 +622,15 @@ struct file_operations linvfs_dir_operations = { static struct vm_operations_struct linvfs_file_vm_ops = { .nopage = filemap_nopage, .populate = filemap_populate, +}; + +#ifdef CONFIG_XFS_DMAPI +static struct vm_operations_struct linvfs_dmapi_file_vm_ops = { + .close = linvfs_mmap_close, + .nopage = filemap_nopage, + .populate = filemap_populate, #ifdef HAVE_VMOP_MPROTECT .mprotect = linvfs_mprotect, #endif }; +#endif /* CONFIG_XFS_DMAPI */ diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index 69809ee..05a447e 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c @@ -1174,7 +1174,8 @@ xfs_ioc_xattr( switch (cmd) { case XFS_IOC_FSGETXATTR: { - va.va_mask = XFS_AT_XFLAGS|XFS_AT_EXTSIZE|XFS_AT_NEXTENTS; + va.va_mask = XFS_AT_XFLAGS | XFS_AT_EXTSIZE | \ + XFS_AT_NEXTENTS | XFS_AT_PROJID; VOP_GETATTR(vp, &va, 0, NULL, error); if (error) return -error; @@ -1182,6 +1183,7 @@ xfs_ioc_xattr( fa.fsx_xflags = va.va_xflags; fa.fsx_extsize = va.va_extsize; fa.fsx_nextents = va.va_nextents; + fa.fsx_projid = va.va_projid; if (copy_to_user(arg, &fa, sizeof(fa))) return -XFS_ERROR(EFAULT); @@ -1196,9 +1198,10 @@ xfs_ioc_xattr( if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) attr_flags |= ATTR_NONBLOCK; - va.va_mask = XFS_AT_XFLAGS | XFS_AT_EXTSIZE; + va.va_mask = XFS_AT_XFLAGS | XFS_AT_EXTSIZE | XFS_AT_PROJID; va.va_xflags = fa.fsx_xflags; va.va_extsize = fa.fsx_extsize; + va.va_projid = fa.fsx_projid; VOP_SETATTR(vp, &va, attr_flags, NULL, error); if (!error) @@ -1207,7 +1210,8 @@ xfs_ioc_xattr( } case XFS_IOC_FSGETXATTRA: { - va.va_mask = XFS_AT_XFLAGS|XFS_AT_EXTSIZE|XFS_AT_ANEXTENTS; + va.va_mask = XFS_AT_XFLAGS | XFS_AT_EXTSIZE | \ + XFS_AT_ANEXTENTS | XFS_AT_PROJID; VOP_GETATTR(vp, &va, 0, NULL, error); if (error) return -error; @@ -1215,6 +1219,7 @@ xfs_ioc_xattr( fa.fsx_xflags = va.va_xflags; fa.fsx_extsize = va.va_extsize; fa.fsx_nextents = va.va_anextents; + fa.fsx_projid = va.va_projid; if (copy_to_user(arg, &fa, sizeof(fa))) return -XFS_ERROR(EFAULT); diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h index 71bb410..42dc5e4 100644 --- a/fs/xfs/linux-2.6/xfs_linux.h +++ b/fs/xfs/linux-2.6/xfs_linux.h @@ -145,10 +145,10 @@ static inline void set_buffer_unwritten_io(struct buffer_head *bh) #define xfs_inherit_nosymlinks xfs_params.inherit_nosym.val #define xfs_rotorstep xfs_params.rotorstep.val -#ifndef __smp_processor_id -#define __smp_processor_id() smp_processor_id() +#ifndef raw_smp_processor_id +#define raw_smp_processor_id() smp_processor_id() #endif -#define current_cpu() __smp_processor_id() +#define current_cpu() raw_smp_processor_id() #define current_pid() (current->pid) #define current_fsuid(cred) (current->fsuid) #define current_fsgid(cred) (current->fsgid) @@ -230,8 +230,10 @@ static inline void set_buffer_unwritten_io(struct buffer_head *bh) * field (see the QCMD macro in quota.h). These macros help keep the * code portable - they are not visible from the syscall interface. */ -#define Q_XSETGQLIM XQM_CMD(0x8) /* set groups disk limits */ -#define Q_XGETGQUOTA XQM_CMD(0x9) /* get groups disk limits */ +#define Q_XSETGQLIM XQM_CMD(8) /* set groups disk limits */ +#define Q_XGETGQUOTA XQM_CMD(9) /* get groups disk limits */ +#define Q_XSETPQLIM XQM_CMD(10) /* set projects disk limits */ +#define Q_XGETPQUOTA XQM_CMD(11) /* get projects disk limits */ /* IRIX uses a dynamic sizing algorithm (ndquot = 200 + numprocs*2) */ /* we may well need to fine-tune this if it ever becomes an issue. */ diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c index aa9daae..acab58c 100644 --- a/fs/xfs/linux-2.6/xfs_lrw.c +++ b/fs/xfs/linux-2.6/xfs_lrw.c @@ -209,30 +209,6 @@ unlock: return (-status); } -/* - * xfs_inval_cached_pages - * - * This routine is responsible for keeping direct I/O and buffered I/O - * somewhat coherent. From here we make sure that we're at least - * temporarily holding the inode I/O lock exclusively and then call - * the page cache to flush and invalidate any cached pages. If there - * are no cached pages this routine will be very quick. - */ -void -xfs_inval_cached_pages( - vnode_t *vp, - xfs_iocore_t *io, - xfs_off_t offset, - int write, - int relock) -{ - if (VN_CACHED(vp)) { - xfs_inval_cached_trace(io, offset, -1, ctooff(offtoct(offset)), -1); - VOP_FLUSHINVAL_PAGES(vp, ctooff(offtoct(offset)), -1, FI_REMAPF_LOCKED); - } - -} - ssize_t /* bytes read, or (-) error */ xfs_read( bhv_desc_t *bdp, @@ -304,10 +280,11 @@ xfs_read( if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_READ) && !(ioflags & IO_INVIS)) { vrwlock_t locktype = VRWLOCK_READ; + int dmflags = FILP_DELAY_FLAG(file) | DM_SEM_FLAG_RD(ioflags); ret = -XFS_SEND_DATA(mp, DM_EVENT_READ, BHV_TO_VNODE(bdp), *offset, size, - FILP_DELAY_FLAG(file), &locktype); + dmflags, &locktype); if (ret) { xfs_iunlock(ip, XFS_IOLOCK_SHARED); goto unlock_isem; @@ -867,11 +844,15 @@ retry: !(ioflags & IO_INVIS)) { xfs_rwunlock(bdp, locktype); + if (need_isem) + up(&inode->i_sem); error = XFS_SEND_NAMESP(xip->i_mount, DM_EVENT_NOSPACE, vp, DM_RIGHT_NULL, vp, DM_RIGHT_NULL, NULL, NULL, 0, 0, 0); /* Delay flag intentionally unused */ if (error) - goto out_unlock_isem; + goto out_nounlocks; + if (need_isem) + down(&inode->i_sem); xfs_rwlock(bdp, locktype); pos = xip->i_d.di_size; ret = 0; @@ -986,6 +967,7 @@ retry: out_unlock_isem: if (need_isem) up(&inode->i_sem); + out_nounlocks: return -error; } diff --git a/fs/xfs/linux-2.6/xfs_lrw.h b/fs/xfs/linux-2.6/xfs_lrw.h index d723e35..f197a72 100644 --- a/fs/xfs/linux-2.6/xfs_lrw.h +++ b/fs/xfs/linux-2.6/xfs_lrw.h @@ -94,8 +94,6 @@ extern int xfs_bdstrat_cb(struct xfs_buf *); extern int xfs_zero_eof(struct vnode *, struct xfs_iocore *, xfs_off_t, xfs_fsize_t, xfs_fsize_t); -extern void xfs_inval_cached_pages(struct vnode *, struct xfs_iocore *, - xfs_off_t, int, int); extern ssize_t xfs_read(struct bhv_desc *, struct kiocb *, const struct iovec *, unsigned int, loff_t *, int, struct cred *); diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 455e2b2..5fe9af3 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -590,8 +590,10 @@ linvfs_sync_super( int error; int flags = SYNC_FSDATA; - if (wait) - flags |= SYNC_WAIT; + if (unlikely(sb->s_frozen == SB_FREEZE_WRITE)) + flags = SYNC_QUIESCE; + else + flags = SYNC_FSDATA | (wait ? SYNC_WAIT : 0); VFS_SYNC(vfsp, flags, NULL, error); sb->s_dirt = 0; @@ -701,7 +703,8 @@ linvfs_getxquota( struct vfs *vfsp = LINVFS_GET_VFS(sb); int error, getmode; - getmode = (type == GRPQUOTA) ? Q_XGETGQUOTA : Q_XGETQUOTA; + getmode = (type == USRQUOTA) ? Q_XGETQUOTA : + ((type == GRPQUOTA) ? Q_XGETGQUOTA : Q_XGETPQUOTA); VFS_QUOTACTL(vfsp, getmode, id, (caddr_t)fdq, error); return -error; } @@ -716,7 +719,8 @@ linvfs_setxquota( struct vfs *vfsp = LINVFS_GET_VFS(sb); int error, setmode; - setmode = (type == GRPQUOTA) ? Q_XSETGQLIM : Q_XSETQLIM; + setmode = (type == USRQUOTA) ? Q_XSETQLIM : + ((type == GRPQUOTA) ? Q_XSETGQLIM : Q_XSETPQLIM); VFS_QUOTACTL(vfsp, setmode, id, (caddr_t)fdq, error); return -error; } diff --git a/fs/xfs/linux-2.6/xfs_vfs.h b/fs/xfs/linux-2.6/xfs_vfs.h index 7649399..7ee1f71 100644 --- a/fs/xfs/linux-2.6/xfs_vfs.h +++ b/fs/xfs/linux-2.6/xfs_vfs.h @@ -107,6 +107,7 @@ typedef enum { #define SYNC_FSDATA 0x0020 /* flush fs data (e.g. superblocks) */ #define SYNC_REFCACHE 0x0040 /* prune some of the nfs ref cache */ #define SYNC_REMOUNT 0x0080 /* remount readonly, no dummy LRs */ +#define SYNC_QUIESCE 0x0100 /* quiesce fileystem for a snapshot */ typedef int (*vfs_mount_t)(bhv_desc_t *, struct xfs_mount_args *, struct cred *); diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c index a832d16..250cad5 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.c +++ b/fs/xfs/linux-2.6/xfs_vnode.c @@ -411,13 +411,13 @@ vn_remove( /* 0 */ (void *)(__psint_t)(vk), \ /* 1 */ (void *)(s), \ /* 2 */ (void *)(__psint_t) line, \ -/* 3 */ (void *)(vn_count(vp)), \ +/* 3 */ (void *)(__psint_t)(vn_count(vp)), \ /* 4 */ (void *)(ra), \ /* 5 */ (void *)(__psunsigned_t)(vp)->v_flag, \ /* 6 */ (void *)(__psint_t)current_cpu(), \ /* 7 */ (void *)(__psint_t)current_pid(), \ /* 8 */ (void *)__return_address, \ -/* 9 */ 0, 0, 0, 0, 0, 0, 0) +/* 9 */ NULL, NULL, NULL, NULL, NULL, NULL, NULL) /* * Vnode tracing code. diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h index 00466c3..a6e57c6 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.h +++ b/fs/xfs/linux-2.6/xfs_vnode.h @@ -426,7 +426,7 @@ typedef struct vattr { u_long va_extsize; /* file extent size */ u_long va_nextents; /* number of extents in file */ u_long va_anextents; /* number of attr extents in file */ - int va_projid; /* project id */ + prid_t va_projid; /* project id */ } vattr_t; /* diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c index 740d20d..46ce1e3 100644 --- a/fs/xfs/quota/xfs_dquot.c +++ b/fs/xfs/quota/xfs_dquot.c @@ -101,7 +101,7 @@ int xfs_dqerror_mod = 33; * is the d_id field. The idea is to fill in the entire q_core * when we read in the on disk dquot. */ -xfs_dquot_t * +STATIC xfs_dquot_t * xfs_qm_dqinit( xfs_mount_t *mp, xfs_dqid_t id, @@ -286,7 +286,9 @@ xfs_qm_adjust_dqlimits( * We also return 0 as the values of the timers in Q_GETQUOTA calls, when * enforcement's off. * In contrast, warnings are a little different in that they don't - * 'automatically' get started when limits get exceeded. + * 'automatically' get started when limits get exceeded. They do + * get reset to zero, however, when we find the count to be under + * the soft limit (they are only ever set non-zero via userspace). */ void xfs_qm_adjust_dqtimers( @@ -315,6 +317,8 @@ xfs_qm_adjust_dqtimers( INT_GET(d->d_blk_hardlimit, ARCH_CONVERT)))) { INT_SET(d->d_btimer, ARCH_CONVERT, get_seconds() + XFS_QI_BTIMELIMIT(mp)); + } else { + d->d_bwarns = 0; } } else { if ((!d->d_blk_softlimit || @@ -336,6 +340,8 @@ xfs_qm_adjust_dqtimers( INT_GET(d->d_ino_hardlimit, ARCH_CONVERT)))) { INT_SET(d->d_itimer, ARCH_CONVERT, get_seconds() + XFS_QI_ITIMELIMIT(mp)); + } else { + d->d_iwarns = 0; } } else { if ((!d->d_ino_softlimit || @@ -357,6 +363,8 @@ xfs_qm_adjust_dqtimers( INT_GET(d->d_rtb_hardlimit, ARCH_CONVERT)))) { INT_SET(d->d_rtbtimer, ARCH_CONVERT, get_seconds() + XFS_QI_RTBTIMELIMIT(mp)); + } else { + d->d_rtbwarns = 0; } } else { if ((!d->d_rtb_softlimit || @@ -371,68 +379,6 @@ xfs_qm_adjust_dqtimers( } /* - * Increment or reset warnings of a given dquot. - */ -int -xfs_qm_dqwarn( - xfs_disk_dquot_t *d, - uint flags) -{ - int warned; - - /* - * root's limits are not real limits. - */ - if (!d->d_id) - return (0); - - warned = 0; - if (INT_GET(d->d_blk_softlimit, ARCH_CONVERT) && - (INT_GET(d->d_bcount, ARCH_CONVERT) >= - INT_GET(d->d_blk_softlimit, ARCH_CONVERT))) { - if (flags & XFS_QMOPT_DOWARN) { - INT_MOD(d->d_bwarns, ARCH_CONVERT, +1); - warned++; - } - } else { - if (!d->d_blk_softlimit || - (INT_GET(d->d_bcount, ARCH_CONVERT) < - INT_GET(d->d_blk_softlimit, ARCH_CONVERT))) { - d->d_bwarns = 0; - } - } - - if (INT_GET(d->d_ino_softlimit, ARCH_CONVERT) > 0 && - (INT_GET(d->d_icount, ARCH_CONVERT) >= - INT_GET(d->d_ino_softlimit, ARCH_CONVERT))) { - if (flags & XFS_QMOPT_DOWARN) { - INT_MOD(d->d_iwarns, ARCH_CONVERT, +1); - warned++; - } - } else { - if (!d->d_ino_softlimit || - (INT_GET(d->d_icount, ARCH_CONVERT) < - INT_GET(d->d_ino_softlimit, ARCH_CONVERT))) { - d->d_iwarns = 0; - } - } -#ifdef QUOTADEBUG - if (INT_GET(d->d_iwarns, ARCH_CONVERT)) - cmn_err(CE_DEBUG, - "--------@@Inode warnings running : %Lu >= %Lu", - INT_GET(d->d_icount, ARCH_CONVERT), - INT_GET(d->d_ino_softlimit, ARCH_CONVERT)); - if (INT_GET(d->d_bwarns, ARCH_CONVERT)) - cmn_err(CE_DEBUG, - "--------@@Blks warnings running : %Lu >= %Lu", - INT_GET(d->d_bcount, ARCH_CONVERT), - INT_GET(d->d_blk_softlimit, ARCH_CONVERT)); -#endif - return (warned); -} - - -/* * initialize a buffer full of dquots and log the whole thing */ STATIC void @@ -461,9 +407,9 @@ xfs_qm_init_dquot_blk( for (i = 0; i < XFS_QM_DQPERBLK(mp); i++, d++, curid++) xfs_qm_dqinit_core(curid, type, d); xfs_trans_dquot_buf(tp, bp, - type & XFS_DQ_USER ? - XFS_BLI_UDQUOT_BUF : - XFS_BLI_GDQUOT_BUF); + (type & XFS_DQ_USER ? XFS_BLI_UDQUOT_BUF : + ((type & XFS_DQ_PROJ) ? XFS_BLI_PDQUOT_BUF : + XFS_BLI_GDQUOT_BUF))); xfs_trans_log_buf(tp, bp, 0, BBTOB(XFS_QI_DQCHUNKLEN(mp)) - 1); } @@ -544,8 +490,7 @@ xfs_qm_dqalloc( * the entire thing. */ xfs_qm_init_dquot_blk(tp, mp, INT_GET(dqp->q_core.d_id, ARCH_CONVERT), - dqp->dq_flags & (XFS_DQ_USER|XFS_DQ_GROUP), - bp); + dqp->dq_flags & XFS_DQ_ALLTYPES, bp); if ((error = xfs_bmap_finish(&tp, &flist, firstblock, &committed))) { goto error1; @@ -675,8 +620,7 @@ xfs_qm_dqtobp( /* * A simple sanity check in case we got a corrupted dquot... */ - if (xfs_qm_dqcheck(ddq, id, - dqp->dq_flags & (XFS_DQ_USER|XFS_DQ_GROUP), + if (xfs_qm_dqcheck(ddq, id, dqp->dq_flags & XFS_DQ_ALLTYPES, flags & (XFS_QMOPT_DQREPAIR|XFS_QMOPT_DOWARN), "dqtobp")) { if (!(flags & XFS_QMOPT_DQREPAIR)) { @@ -953,8 +897,8 @@ int xfs_qm_dqget( xfs_mount_t *mp, xfs_inode_t *ip, /* locked inode (optional) */ - xfs_dqid_t id, /* gid or uid, depending on type */ - uint type, /* UDQUOT or GDQUOT */ + xfs_dqid_t id, /* uid/projid/gid depending on type */ + uint type, /* XFS_DQ_USER/XFS_DQ_PROJ/XFS_DQ_GROUP */ uint flags, /* DQALLOC, DQSUSER, DQREPAIR, DOWARN */ xfs_dquot_t **O_dqpp) /* OUT : locked incore dquot */ { @@ -965,6 +909,7 @@ xfs_qm_dqget( ASSERT(XFS_IS_QUOTA_RUNNING(mp)); if ((! XFS_IS_UQUOTA_ON(mp) && type == XFS_DQ_USER) || + (! XFS_IS_PQUOTA_ON(mp) && type == XFS_DQ_PROJ) || (! XFS_IS_GQUOTA_ON(mp) && type == XFS_DQ_GROUP)) { return (ESRCH); } @@ -983,7 +928,9 @@ xfs_qm_dqget( again: #ifdef DEBUG - ASSERT(type == XFS_DQ_USER || type == XFS_DQ_GROUP); + ASSERT(type == XFS_DQ_USER || + type == XFS_DQ_PROJ || + type == XFS_DQ_GROUP); if (ip) { ASSERT(XFS_ISLOCKED_INODE_EXCL(ip)); if (type == XFS_DQ_USER) @@ -1306,8 +1253,8 @@ xfs_qm_dqflush( return (error); } - if (xfs_qm_dqcheck(&dqp->q_core, INT_GET(ddqp->d_id, ARCH_CONVERT), 0, XFS_QMOPT_DOWARN, - "dqflush (incore copy)")) { + if (xfs_qm_dqcheck(&dqp->q_core, INT_GET(ddqp->d_id, ARCH_CONVERT), + 0, XFS_QMOPT_DOWARN, "dqflush (incore copy)")) { xfs_force_shutdown(dqp->q_mount, XFS_CORRUPT_INCORE); return XFS_ERROR(EIO); } @@ -1459,7 +1406,8 @@ xfs_dqlock2( { if (d1 && d2) { ASSERT(d1 != d2); - if (INT_GET(d1->q_core.d_id, ARCH_CONVERT) > INT_GET(d2->q_core.d_id, ARCH_CONVERT)) { + if (INT_GET(d1->q_core.d_id, ARCH_CONVERT) > + INT_GET(d2->q_core.d_id, ARCH_CONVERT)) { xfs_dqlock(d2); xfs_dqlock(d1); } else { @@ -1582,8 +1530,7 @@ xfs_qm_dqprint(xfs_dquot_t *dqp) cmn_err(CE_DEBUG, "-----------KERNEL DQUOT----------------"); cmn_err(CE_DEBUG, "---- dquotID = %d", (int)INT_GET(dqp->q_core.d_id, ARCH_CONVERT)); - cmn_err(CE_DEBUG, "---- type = %s", - XFS_QM_ISUDQ(dqp) ? "USR" : "GRP"); + cmn_err(CE_DEBUG, "---- type = %s", DQFLAGTO_TYPESTR(dqp)); cmn_err(CE_DEBUG, "---- fs = 0x%p", dqp->q_mount); cmn_err(CE_DEBUG, "---- blkno = 0x%x", (int) dqp->q_blkno); cmn_err(CE_DEBUG, "---- boffset = 0x%x", (int) dqp->q_bufoffset); diff --git a/fs/xfs/quota/xfs_dquot.h b/fs/xfs/quota/xfs_dquot.h index 0c3fe31..3917510 100644 --- a/fs/xfs/quota/xfs_dquot.h +++ b/fs/xfs/quota/xfs_dquot.h @@ -114,25 +114,18 @@ typedef struct xfs_dquot { #define XFS_DQHOLD(dqp) ((dqp)->q_nrefs++) /* - * Quota Accounting flags + * Quota Accounting/Enforcement flags */ -#define XFS_ALL_QUOTA_ACCT (XFS_UQUOTA_ACCT | XFS_GQUOTA_ACCT) -#define XFS_ALL_QUOTA_ENFD (XFS_UQUOTA_ENFD | XFS_GQUOTA_ENFD) -#define XFS_ALL_QUOTA_CHKD (XFS_UQUOTA_CHKD | XFS_GQUOTA_CHKD) -#define XFS_ALL_QUOTA_ACTV (XFS_UQUOTA_ACTIVE | XFS_GQUOTA_ACTIVE) -#define XFS_ALL_QUOTA_ACCT_ENFD (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD|\ - XFS_GQUOTA_ACCT|XFS_GQUOTA_ENFD) +#define XFS_ALL_QUOTA_ACCT \ + (XFS_UQUOTA_ACCT | XFS_GQUOTA_ACCT | XFS_PQUOTA_ACCT) +#define XFS_ALL_QUOTA_ENFD (XFS_UQUOTA_ENFD | XFS_OQUOTA_ENFD) +#define XFS_ALL_QUOTA_CHKD (XFS_UQUOTA_CHKD | XFS_OQUOTA_CHKD) -#define XFS_IS_QUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_ALL_QUOTA_ACCT) -#define XFS_IS_UQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_UQUOTA_ACCT) -#define XFS_IS_GQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_GQUOTA_ACCT) - -/* - * Quota Limit Enforcement flags - */ +#define XFS_IS_QUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_ALL_QUOTA_ACCT) #define XFS_IS_QUOTA_ENFORCED(mp) ((mp)->m_qflags & XFS_ALL_QUOTA_ENFD) -#define XFS_IS_UQUOTA_ENFORCED(mp) ((mp)->m_qflags & XFS_UQUOTA_ENFD) -#define XFS_IS_GQUOTA_ENFORCED(mp) ((mp)->m_qflags & XFS_GQUOTA_ENFD) +#define XFS_IS_UQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_UQUOTA_ACCT) +#define XFS_IS_PQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_PQUOTA_ACCT) +#define XFS_IS_GQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_GQUOTA_ACCT) #ifdef DEBUG static inline int @@ -167,6 +160,8 @@ XFS_DQ_IS_LOCKED(xfs_dquot_t *dqp) #define XFS_DQ_IS_ON_FREELIST(dqp) ((dqp)->dq_flnext != (dqp)) #define XFS_DQ_IS_DIRTY(dqp) ((dqp)->dq_flags & XFS_DQ_DIRTY) #define XFS_QM_ISUDQ(dqp) ((dqp)->dq_flags & XFS_DQ_USER) +#define XFS_QM_ISPDQ(dqp) ((dqp)->dq_flags & XFS_DQ_PROJ) +#define XFS_QM_ISGDQ(dqp) ((dqp)->dq_flags & XFS_DQ_GROUP) #define XFS_DQ_TO_QINF(dqp) ((dqp)->q_mount->m_quotainfo) #define XFS_DQ_TO_QIP(dqp) (XFS_QM_ISUDQ(dqp) ? \ XFS_DQ_TO_QINF(dqp)->qi_uquotaip : \ @@ -174,7 +169,7 @@ XFS_DQ_IS_LOCKED(xfs_dquot_t *dqp) #define XFS_IS_THIS_QUOTA_OFF(d) (! (XFS_QM_ISUDQ(d) ? \ (XFS_IS_UQUOTA_ON((d)->q_mount)) : \ - (XFS_IS_GQUOTA_ON((d)->q_mount)))) + (XFS_IS_OQUOTA_ON((d)->q_mount)))) #ifdef XFS_DQUOT_TRACE /* @@ -211,7 +206,6 @@ extern void xfs_qm_adjust_dqtimers(xfs_mount_t *, xfs_disk_dquot_t *); extern void xfs_qm_adjust_dqlimits(xfs_mount_t *, xfs_disk_dquot_t *); -extern int xfs_qm_dqwarn(xfs_disk_dquot_t *, uint); extern int xfs_qm_dqget(xfs_mount_t *, xfs_inode_t *, xfs_dqid_t, uint, uint, xfs_dquot_t **); extern void xfs_qm_dqput(xfs_dquot_t *); diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c index a5425ee..f5271b7 100644 --- a/fs/xfs/quota/xfs_dquot_item.c +++ b/fs/xfs/quota/xfs_dquot_item.c @@ -428,7 +428,7 @@ xfs_qm_dquot_logitem_committing( /* * This is the ops vector for dquots */ -struct xfs_item_ops xfs_dquot_item_ops = { +STATIC struct xfs_item_ops xfs_dquot_item_ops = { .iop_size = (uint(*)(xfs_log_item_t*))xfs_qm_dquot_logitem_size, .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*)) xfs_qm_dquot_logitem_format, @@ -646,7 +646,7 @@ xfs_qm_qoffend_logitem_committing(xfs_qoff_logitem_t *qip, xfs_lsn_t commit_lsn) return; } -struct xfs_item_ops xfs_qm_qoffend_logitem_ops = { +STATIC struct xfs_item_ops xfs_qm_qoffend_logitem_ops = { .iop_size = (uint(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_size, .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*)) xfs_qm_qoff_logitem_format, @@ -669,7 +669,7 @@ struct xfs_item_ops xfs_qm_qoffend_logitem_ops = { /* * This is the ops vector shared by all quotaoff-start log items. */ -struct xfs_item_ops xfs_qm_qoff_logitem_ops = { +STATIC struct xfs_item_ops xfs_qm_qoff_logitem_ops = { .iop_size = (uint(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_size, .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*)) xfs_qm_qoff_logitem_format, diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c index 89f2cd6..f665ca8f 100644 --- a/fs/xfs/quota/xfs_qm.c +++ b/fs/xfs/quota/xfs_qm.c @@ -81,12 +81,18 @@ struct xfs_qm *xfs_Gqm; kmem_zone_t *qm_dqzone; kmem_zone_t *qm_dqtrxzone; -kmem_shaker_t xfs_qm_shaker; +STATIC kmem_shaker_t xfs_qm_shaker; STATIC void xfs_qm_list_init(xfs_dqlist_t *, char *, int); STATIC void xfs_qm_list_destroy(xfs_dqlist_t *); +STATIC void xfs_qm_freelist_init(xfs_frlist_t *); +STATIC void xfs_qm_freelist_destroy(xfs_frlist_t *); +STATIC int xfs_qm_mplist_nowait(xfs_mount_t *); +STATIC int xfs_qm_dqhashlock_nowait(xfs_dquot_t *); + STATIC int xfs_qm_init_quotainos(xfs_mount_t *); +STATIC int xfs_qm_init_quotainfo(xfs_mount_t *); STATIC int xfs_qm_shake(int, unsigned int); #ifdef DEBUG @@ -184,7 +190,7 @@ xfs_Gqm_init(void) /* * Destroy the global quota manager when its reference count goes to zero. */ -void +STATIC void xfs_qm_destroy( struct xfs_qm *xqm) { @@ -304,9 +310,9 @@ xfs_qm_mount_quotainit( uint flags) { /* - * User or group quotas has to be on. + * User, projects or group quotas has to be on. */ - ASSERT(flags & (XFSMNT_UQUOTA | XFSMNT_GQUOTA)); + ASSERT(flags & (XFSMNT_UQUOTA | XFSMNT_PQUOTA | XFSMNT_GQUOTA)); /* * Initialize the flags in the mount structure. From this point @@ -324,7 +330,11 @@ xfs_qm_mount_quotainit( if (flags & XFSMNT_GQUOTA) { mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE); if (flags & XFSMNT_GQUOTAENF) - mp->m_qflags |= XFS_GQUOTA_ENFD; + mp->m_qflags |= XFS_OQUOTA_ENFD; + } else if (flags & XFSMNT_PQUOTA) { + mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE); + if (flags & XFSMNT_PQUOTAENF) + mp->m_qflags |= XFS_OQUOTA_ENFD; } } @@ -357,11 +367,11 @@ xfs_qm_mount_quotas( /* * If a file system had quotas running earlier, but decided to - * mount without -o quota/uquota/gquota options, revoke the + * mount without -o uquota/pquota/gquota options, revoke the * quotachecked license, and bail out. */ if (! XFS_IS_QUOTA_ON(mp) && - (mp->m_sb.sb_qflags & (XFS_UQUOTA_ACCT|XFS_GQUOTA_ACCT))) { + (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT)) { mp->m_qflags = 0; goto write_changes; } @@ -509,7 +519,7 @@ out: * Flush all dquots of the given file system to disk. The dquots are * _not_ purged from memory here, just their data written to disk. */ -int +STATIC int xfs_qm_dqflush_all( xfs_mount_t *mp, int flags) @@ -613,7 +623,7 @@ xfs_qm_detach_gdquots( STATIC int xfs_qm_dqpurge_int( xfs_mount_t *mp, - uint flags) /* QUOTAOFF/UMOUNTING/UQUOTA/GQUOTA */ + uint flags) /* QUOTAOFF/UMOUNTING/UQUOTA/PQUOTA/GQUOTA */ { xfs_dquot_t *dqp; uint dqtype; @@ -625,6 +635,7 @@ xfs_qm_dqpurge_int( return (0); dqtype = (flags & XFS_QMOPT_UQUOTA) ? XFS_DQ_USER : 0; + dqtype |= (flags & XFS_QMOPT_PQUOTA) ? XFS_DQ_PROJ : 0; dqtype |= (flags & XFS_QMOPT_GQUOTA) ? XFS_DQ_GROUP : 0; xfs_qm_mplist_lock(mp); @@ -734,11 +745,11 @@ xfs_qm_dqattach_one( /* * udqhint is the i_udquot field in inode, and is non-NULL only - * when the type arg is XFS_DQ_GROUP. Its purpose is to save a + * when the type arg is group/project. Its purpose is to save a * lookup by dqid (xfs_qm_dqget) by caching a group dquot inside * the user dquot. */ - ASSERT(!udqhint || type == XFS_DQ_GROUP); + ASSERT(!udqhint || type == XFS_DQ_GROUP || type == XFS_DQ_PROJ); if (udqhint && !dolock) xfs_dqlock(udqhint); @@ -897,8 +908,8 @@ xfs_qm_dqattach_grouphint( /* - * Given a locked inode, attach dquot(s) to it, taking UQUOTAON / GQUOTAON - * in to account. + * Given a locked inode, attach dquot(s) to it, taking U/G/P-QUOTAON + * into account. * If XFS_QMOPT_DQALLOC, the dquot(s) will be allocated if needed. * If XFS_QMOPT_DQLOCK, the dquot(s) will be returned locked. This option pretty * much made this code a complete mess, but it has been pretty useful. @@ -937,8 +948,13 @@ xfs_qm_dqattach( nquotas++; } ASSERT(XFS_ISLOCKED_INODE_EXCL(ip)); - if (XFS_IS_GQUOTA_ON(mp)) { - error = xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP, + if (XFS_IS_OQUOTA_ON(mp)) { + error = XFS_IS_GQUOTA_ON(mp) ? + xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP, + flags & XFS_QMOPT_DQALLOC, + flags & XFS_QMOPT_DQLOCK, + ip->i_udquot, &ip->i_gdquot) : + xfs_qm_dqattach_one(ip, ip->i_d.di_projid, XFS_DQ_PROJ, flags & XFS_QMOPT_DQALLOC, flags & XFS_QMOPT_DQLOCK, ip->i_udquot, &ip->i_gdquot); @@ -989,7 +1005,7 @@ xfs_qm_dqattach( } if (XFS_IS_UQUOTA_ON(mp)) ASSERT(ip->i_udquot); - if (XFS_IS_GQUOTA_ON(mp)) + if (XFS_IS_OQUOTA_ON(mp)) ASSERT(ip->i_gdquot); } #endif @@ -1018,13 +1034,13 @@ xfs_qm_dqdetach( ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_uquotino); ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_gquotino); - if (ip->i_udquot) - xfs_dqtrace_entry_ino(ip->i_udquot, "DQDETTACH", ip); if (ip->i_udquot) { + xfs_dqtrace_entry_ino(ip->i_udquot, "DQDETTACH", ip); xfs_qm_dqrele(ip->i_udquot); ip->i_udquot = NULL; } if (ip->i_gdquot) { + xfs_dqtrace_entry_ino(ip->i_gdquot, "DQDETTACH", ip); xfs_qm_dqrele(ip->i_gdquot); ip->i_gdquot = NULL; } @@ -1149,7 +1165,7 @@ xfs_qm_sync( * This initializes all the quota information that's kept in the * mount structure */ -int +STATIC int xfs_qm_init_quotainfo( xfs_mount_t *mp) { @@ -1202,8 +1218,9 @@ xfs_qm_init_quotainfo( * and group quotas, at least not at this point. */ error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)0, - (XFS_IS_UQUOTA_RUNNING(mp)) ? - XFS_DQ_USER : XFS_DQ_GROUP, + XFS_IS_UQUOTA_RUNNING(mp) ? XFS_DQ_USER : + (XFS_IS_GQUOTA_RUNNING(mp) ? XFS_DQ_GROUP : + XFS_DQ_PROJ), XFS_QMOPT_DQSUSER|XFS_QMOPT_DOWARN, &dqp); if (! error) { @@ -1234,6 +1251,10 @@ xfs_qm_init_quotainfo( INT_GET(ddqp->d_iwarns, ARCH_CONVERT) ? INT_GET(ddqp->d_iwarns, ARCH_CONVERT) : XFS_QM_IWARNLIMIT; + qinf->qi_rtbwarnlimit = + INT_GET(ddqp->d_rtbwarns, ARCH_CONVERT) ? + INT_GET(ddqp->d_rtbwarns, ARCH_CONVERT) : + XFS_QM_RTBWARNLIMIT; qinf->qi_bhardlimit = INT_GET(ddqp->d_blk_hardlimit, ARCH_CONVERT); qinf->qi_bsoftlimit = @@ -1259,6 +1280,7 @@ xfs_qm_init_quotainfo( qinf->qi_rtbtimelimit = XFS_QM_RTBTIMELIMIT; qinf->qi_bwarnlimit = XFS_QM_BWARNLIMIT; qinf->qi_iwarnlimit = XFS_QM_IWARNLIMIT; + qinf->qi_rtbwarnlimit = XFS_QM_RTBWARNLIMIT; } return (0); @@ -1366,13 +1388,20 @@ xfs_qm_dqget_noattach( ASSERT(udqp); } - if (XFS_IS_GQUOTA_ON(mp)) { + if (XFS_IS_OQUOTA_ON(mp)) { ASSERT(ip->i_gdquot == NULL); if (udqp) xfs_dqunlock(udqp); - if ((error = xfs_qm_dqget(mp, ip, ip->i_d.di_gid, XFS_DQ_GROUP, - XFS_QMOPT_DQALLOC|XFS_QMOPT_DOWARN, - &gdqp))) { + error = XFS_IS_GQUOTA_ON(mp) ? + xfs_qm_dqget(mp, ip, + ip->i_d.di_gid, XFS_DQ_GROUP, + XFS_QMOPT_DQALLOC|XFS_QMOPT_DOWARN, + &gdqp) : + xfs_qm_dqget(mp, ip, + ip->i_d.di_projid, XFS_DQ_PROJ, + XFS_QMOPT_DQALLOC|XFS_QMOPT_DOWARN, + &gdqp); + if (error) { if (udqp) xfs_qm_dqrele(udqp); ASSERT(error != ESRCH); @@ -1521,8 +1550,10 @@ xfs_qm_reset_dqcounts( INT_SET(ddq->d_rtbcount, ARCH_CONVERT, 0ULL); INT_SET(ddq->d_btimer, ARCH_CONVERT, (time_t)0); INT_SET(ddq->d_itimer, ARCH_CONVERT, (time_t)0); + INT_SET(ddq->d_rtbtimer, ARCH_CONVERT, (time_t)0); INT_SET(ddq->d_bwarns, ARCH_CONVERT, 0UL); INT_SET(ddq->d_iwarns, ARCH_CONVERT, 0UL); + INT_SET(ddq->d_rtbwarns, ARCH_CONVERT, 0UL); ddq = (xfs_disk_dquot_t *) ((xfs_dqblk_t *)ddq + 1); } @@ -1541,11 +1572,14 @@ xfs_qm_dqiter_bufs( int error; int notcommitted; int incr; + int type; ASSERT(blkcnt > 0); notcommitted = 0; incr = (blkcnt > XFS_QM_MAX_DQCLUSTER_LOGSZ) ? XFS_QM_MAX_DQCLUSTER_LOGSZ : blkcnt; + type = flags & XFS_QMOPT_UQUOTA ? XFS_DQ_USER : + (flags & XFS_QMOPT_PQUOTA ? XFS_DQ_PROJ : XFS_DQ_GROUP); error = 0; /* @@ -1564,9 +1598,7 @@ xfs_qm_dqiter_bufs( if (error) break; - (void) xfs_qm_reset_dqcounts(mp, bp, firstid, - flags & XFS_QMOPT_UQUOTA ? - XFS_DQ_USER : XFS_DQ_GROUP); + (void) xfs_qm_reset_dqcounts(mp, bp, firstid, type); xfs_bdwrite(mp, bp); /* * goto the next block. @@ -1578,7 +1610,7 @@ xfs_qm_dqiter_bufs( } /* - * Iterate over all allocated USR/GRP dquots in the system, calling a + * Iterate over all allocated USR/GRP/PRJ dquots in the system, calling a * caller supplied function for every chunk of dquots that we find. */ STATIC int @@ -1849,7 +1881,7 @@ xfs_qm_dqusage_adjust( xfs_qm_quotacheck_dqadjust(udqp, nblks, rtblks); xfs_qm_dqput(udqp); } - if (XFS_IS_GQUOTA_ON(mp)) { + if (XFS_IS_OQUOTA_ON(mp)) { ASSERT(gdqp); xfs_qm_quotacheck_dqadjust(gdqp, nblks, rtblks); xfs_qm_dqput(gdqp); @@ -1898,7 +1930,7 @@ xfs_qm_quotacheck( cmn_err(CE_NOTE, "XFS quotacheck %s: Please wait.", mp->m_fsname); /* - * First we go thru all the dquots on disk, USR and GRP, and reset + * First we go thru all the dquots on disk, USR and GRP/PRJ, and reset * their counters to zero. We need a clean slate. * We don't log our changes till later. */ @@ -1909,9 +1941,10 @@ xfs_qm_quotacheck( } if ((gip = XFS_QI_GQIP(mp))) { - if ((error = xfs_qm_dqiterate(mp, gip, XFS_QMOPT_GQUOTA))) + if ((error = xfs_qm_dqiterate(mp, gip, XFS_IS_GQUOTA_ON(mp) ? + XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA))) goto error_return; - flags |= XFS_GQUOTA_CHKD; + flags |= XFS_OQUOTA_CHKD; } do { @@ -1938,7 +1971,7 @@ xfs_qm_quotacheck( if (error) { xfs_qm_dqpurge_all(mp, XFS_QMOPT_UQUOTA|XFS_QMOPT_GQUOTA| - XFS_QMOPT_QUOTAOFF); + XFS_QMOPT_PQUOTA|XFS_QMOPT_QUOTAOFF); goto error_return; } /* @@ -1961,7 +1994,7 @@ xfs_qm_quotacheck( * quotachecked status, since we won't be doing accounting for * that type anymore. */ - mp->m_qflags &= ~(XFS_GQUOTA_CHKD | XFS_UQUOTA_CHKD); + mp->m_qflags &= ~(XFS_OQUOTA_CHKD | XFS_UQUOTA_CHKD); mp->m_qflags |= flags; XQM_LIST_PRINT(&(XFS_QI_MPL_LIST(mp)), MPL_NEXT, "++++ Mp list +++"); @@ -2013,7 +2046,7 @@ xfs_qm_init_quotainos( 0, 0, &uip, 0))) return XFS_ERROR(error); } - if (XFS_IS_GQUOTA_ON(mp) && + if (XFS_IS_OQUOTA_ON(mp) && mp->m_sb.sb_gquotino != NULLFSINO) { ASSERT(mp->m_sb.sb_gquotino > 0); if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, @@ -2043,10 +2076,12 @@ xfs_qm_init_quotainos( flags &= ~XFS_QMOPT_SBVERSION; } - if (XFS_IS_GQUOTA_ON(mp) && gip == NULL) { - if ((error = xfs_qm_qino_alloc(mp, &gip, - sbflags | XFS_SB_GQUOTINO, - flags | XFS_QMOPT_GQUOTA))) { + if (XFS_IS_OQUOTA_ON(mp) && gip == NULL) { + flags |= (XFS_IS_GQUOTA_ON(mp) ? + XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA); + error = xfs_qm_qino_alloc(mp, &gip, + sbflags | XFS_SB_GQUOTINO, flags); + if (error) { if (uip) VN_RELE(XFS_ITOV(uip)); @@ -2452,6 +2487,7 @@ xfs_qm_vop_dqalloc( xfs_inode_t *ip, uid_t uid, gid_t gid, + prid_t prid, uint flags, xfs_dquot_t **O_udqpp, xfs_dquot_t **O_gdqpp) @@ -2483,8 +2519,7 @@ xfs_qm_vop_dqalloc( } uq = gq = NULL; - if ((flags & XFS_QMOPT_UQUOTA) && - XFS_IS_UQUOTA_ON(mp)) { + if ((flags & XFS_QMOPT_UQUOTA) && XFS_IS_UQUOTA_ON(mp)) { if (ip->i_d.di_uid != uid) { /* * What we need is the dquot that has this uid, and @@ -2522,8 +2557,7 @@ xfs_qm_vop_dqalloc( xfs_dqunlock(uq); } } - if ((flags & XFS_QMOPT_GQUOTA) && - XFS_IS_GQUOTA_ON(mp)) { + if ((flags & XFS_QMOPT_GQUOTA) && XFS_IS_GQUOTA_ON(mp)) { if (ip->i_d.di_gid != gid) { xfs_iunlock(ip, lockflags); if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)gid, @@ -2546,6 +2580,29 @@ xfs_qm_vop_dqalloc( XFS_DQHOLD(gq); xfs_dqunlock(gq); } + } else if ((flags & XFS_QMOPT_PQUOTA) && XFS_IS_PQUOTA_ON(mp)) { + if (ip->i_d.di_projid != prid) { + xfs_iunlock(ip, lockflags); + if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)prid, + XFS_DQ_PROJ, + XFS_QMOPT_DQALLOC | + XFS_QMOPT_DOWARN, + &gq))) { + if (uq) + xfs_qm_dqrele(uq); + ASSERT(error != ENOENT); + return (error); + } + xfs_dqunlock(gq); + lockflags = XFS_ILOCK_SHARED; + xfs_ilock(ip, lockflags); + } else { + ASSERT(ip->i_gdquot); + gq = ip->i_gdquot; + xfs_dqlock(gq); + XFS_DQHOLD(gq); + xfs_dqunlock(gq); + } } if (uq) xfs_dqtrace_entry_ino(uq, "DQALLOC", ip); @@ -2574,6 +2631,9 @@ xfs_qm_vop_chown( xfs_dquot_t *newdq) { xfs_dquot_t *prevdq; + uint bfield = XFS_IS_REALTIME_INODE(ip) ? + XFS_TRANS_DQ_RTBCOUNT : XFS_TRANS_DQ_BCOUNT; + ASSERT(XFS_ISLOCKED_INODE_EXCL(ip)); ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount)); @@ -2582,20 +2642,12 @@ xfs_qm_vop_chown( ASSERT(prevdq); ASSERT(prevdq != newdq); - xfs_trans_mod_dquot(tp, prevdq, - XFS_TRANS_DQ_BCOUNT, - -(ip->i_d.di_nblocks)); - xfs_trans_mod_dquot(tp, prevdq, - XFS_TRANS_DQ_ICOUNT, - -1); + xfs_trans_mod_dquot(tp, prevdq, bfield, -(ip->i_d.di_nblocks)); + xfs_trans_mod_dquot(tp, prevdq, XFS_TRANS_DQ_ICOUNT, -1); /* the sparkling new dquot */ - xfs_trans_mod_dquot(tp, newdq, - XFS_TRANS_DQ_BCOUNT, - ip->i_d.di_nblocks); - xfs_trans_mod_dquot(tp, newdq, - XFS_TRANS_DQ_ICOUNT, - 1); + xfs_trans_mod_dquot(tp, newdq, bfield, ip->i_d.di_nblocks); + xfs_trans_mod_dquot(tp, newdq, XFS_TRANS_DQ_ICOUNT, 1); /* * Take an extra reference, because the inode @@ -2611,7 +2663,7 @@ xfs_qm_vop_chown( } /* - * Quota reservations for setattr(AT_UID|AT_GID). + * Quota reservations for setattr(AT_UID|AT_GID|AT_PROJID). */ int xfs_qm_vop_chown_reserve( @@ -2623,7 +2675,7 @@ xfs_qm_vop_chown_reserve( { int error; xfs_mount_t *mp; - uint delblks; + uint delblks, blkflags; xfs_dquot_t *unresudq, *unresgdq, *delblksudq, *delblksgdq; ASSERT(XFS_ISLOCKED_INODE(ip)); @@ -2632,6 +2684,8 @@ xfs_qm_vop_chown_reserve( delblks = ip->i_delayed_blks; delblksudq = delblksgdq = unresudq = unresgdq = NULL; + blkflags = XFS_IS_REALTIME_INODE(ip) ? + XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS; if (XFS_IS_UQUOTA_ON(mp) && udqp && ip->i_d.di_uid != (uid_t)INT_GET(udqp->q_core.d_id, ARCH_CONVERT)) { @@ -2646,18 +2700,22 @@ xfs_qm_vop_chown_reserve( unresudq = ip->i_udquot; } } - if (XFS_IS_GQUOTA_ON(ip->i_mount) && gdqp && - ip->i_d.di_gid != INT_GET(gdqp->q_core.d_id, ARCH_CONVERT)) { - delblksgdq = gdqp; - if (delblks) { - ASSERT(ip->i_gdquot); - unresgdq = ip->i_gdquot; + if (XFS_IS_OQUOTA_ON(ip->i_mount) && gdqp) { + if ((XFS_IS_GQUOTA_ON(ip->i_mount) && ip->i_d.di_gid != + INT_GET(gdqp->q_core.d_id, ARCH_CONVERT)) || + (XFS_IS_PQUOTA_ON(ip->i_mount) && ip->i_d.di_projid != + INT_GET(gdqp->q_core.d_id, ARCH_CONVERT))) { + delblksgdq = gdqp; + if (delblks) { + ASSERT(ip->i_gdquot); + unresgdq = ip->i_gdquot; + } } } if ((error = xfs_trans_reserve_quota_bydquots(tp, ip->i_mount, delblksudq, delblksgdq, ip->i_d.di_nblocks, 1, - flags | XFS_QMOPT_RES_REGBLKS))) + flags | blkflags))) return (error); /* @@ -2674,11 +2732,11 @@ xfs_qm_vop_chown_reserve( ASSERT(unresudq || unresgdq); if ((error = xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount, delblksudq, delblksgdq, (xfs_qcnt_t)delblks, 0, - flags | XFS_QMOPT_RES_REGBLKS))) + flags | blkflags))) return (error); xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount, unresudq, unresgdq, -((xfs_qcnt_t)delblks), 0, - XFS_QMOPT_RES_REGBLKS); + blkflags); } return (0); @@ -2751,7 +2809,7 @@ xfs_qm_vop_dqattach_and_dqmod_newinode( } /* ------------- list stuff -----------------*/ -void +STATIC void xfs_qm_freelist_init(xfs_frlist_t *ql) { ql->qh_next = ql->qh_prev = (xfs_dquot_t *) ql; @@ -2760,7 +2818,7 @@ xfs_qm_freelist_init(xfs_frlist_t *ql) ql->qh_nelems = 0; } -void +STATIC void xfs_qm_freelist_destroy(xfs_frlist_t *ql) { xfs_dquot_t *dqp, *nextdqp; @@ -2786,7 +2844,7 @@ xfs_qm_freelist_destroy(xfs_frlist_t *ql) ASSERT(ql->qh_nelems == 0); } -void +STATIC void xfs_qm_freelist_insert(xfs_frlist_t *ql, xfs_dquot_t *dq) { dq->dq_flnext = ql->qh_next; @@ -2816,7 +2874,7 @@ xfs_qm_freelist_append(xfs_frlist_t *ql, xfs_dquot_t *dq) xfs_qm_freelist_insert((xfs_frlist_t *)ql->qh_prev, dq); } -int +STATIC int xfs_qm_dqhashlock_nowait( xfs_dquot_t *dqp) { @@ -2836,7 +2894,7 @@ xfs_qm_freelist_lock_nowait( return (locked); } -int +STATIC int xfs_qm_mplist_nowait( xfs_mount_t *mp) { diff --git a/fs/xfs/quota/xfs_qm.h b/fs/xfs/quota/xfs_qm.h index dcf1a7a..b03eecf 100644 --- a/fs/xfs/quota/xfs_qm.h +++ b/fs/xfs/quota/xfs_qm.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) 2000-2005 Silicon Graphics, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2 of the GNU General Public License as @@ -133,8 +133,9 @@ typedef struct xfs_quotainfo { time_t qi_btimelimit; /* limit for blks timer */ time_t qi_itimelimit; /* limit for inodes timer */ time_t qi_rtbtimelimit;/* limit for rt blks timer */ - xfs_qwarncnt_t qi_bwarnlimit; /* limit for num warnings */ - xfs_qwarncnt_t qi_iwarnlimit; /* limit for num warnings */ + xfs_qwarncnt_t qi_bwarnlimit; /* limit for blks warnings */ + xfs_qwarncnt_t qi_iwarnlimit; /* limit for inodes warnings */ + xfs_qwarncnt_t qi_rtbwarnlimit;/* limit for rt blks warnings */ mutex_t qi_quotaofflock;/* to serialize quotaoff */ xfs_filblks_t qi_dqchunklen; /* # BBs in a chunk of dqs */ uint qi_dqperchunk; /* # ondisk dqs in above chunk */ @@ -176,6 +177,7 @@ typedef struct xfs_dquot_acct { #define XFS_QM_BWARNLIMIT 5 #define XFS_QM_IWARNLIMIT 5 +#define XFS_QM_RTBWARNLIMIT 5 #define XFS_QM_LOCK(xqm) (mutex_lock(&xqm##_lock, PINOD)) #define XFS_QM_UNLOCK(xqm) (mutex_unlock(&xqm##_lock)) @@ -184,7 +186,6 @@ typedef struct xfs_dquot_acct { extern void xfs_mount_reset_sbqflags(xfs_mount_t *); -extern int xfs_qm_init_quotainfo(xfs_mount_t *); extern void xfs_qm_destroy_quotainfo(xfs_mount_t *); extern int xfs_qm_mount_quotas(xfs_mount_t *, int); extern void xfs_qm_mount_quotainit(xfs_mount_t *, uint); @@ -203,7 +204,7 @@ extern void xfs_qm_dqrele_all_inodes(xfs_mount_t *, uint); /* vop stuff */ extern int xfs_qm_vop_dqalloc(xfs_mount_t *, xfs_inode_t *, - uid_t, gid_t, uint, + uid_t, gid_t, prid_t, uint, xfs_dquot_t **, xfs_dquot_t **); extern void xfs_qm_vop_dqattach_and_dqmod_newinode( xfs_trans_t *, xfs_inode_t *, @@ -215,14 +216,9 @@ extern int xfs_qm_vop_chown_reserve(xfs_trans_t *, xfs_inode_t *, xfs_dquot_t *, xfs_dquot_t *, uint); /* list stuff */ -extern void xfs_qm_freelist_init(xfs_frlist_t *); -extern void xfs_qm_freelist_destroy(xfs_frlist_t *); -extern void xfs_qm_freelist_insert(xfs_frlist_t *, xfs_dquot_t *); extern void xfs_qm_freelist_append(xfs_frlist_t *, xfs_dquot_t *); extern void xfs_qm_freelist_unlink(xfs_dquot_t *); extern int xfs_qm_freelist_lock_nowait(xfs_qm_t *); -extern int xfs_qm_mplist_nowait(xfs_mount_t *); -extern int xfs_qm_dqhashlock_nowait(xfs_dquot_t *); /* system call interface */ extern int xfs_qm_quotactl(bhv_desc_t *, int, int, xfs_caddr_t); diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c index be67d9c..dc3c37a 100644 --- a/fs/xfs/quota/xfs_qm_bhv.c +++ b/fs/xfs/quota/xfs_qm_bhv.c @@ -71,10 +71,13 @@ #define MNTOPT_NOQUOTA "noquota" /* no quotas */ #define MNTOPT_USRQUOTA "usrquota" /* user quota enabled */ #define MNTOPT_GRPQUOTA "grpquota" /* group quota enabled */ +#define MNTOPT_PRJQUOTA "prjquota" /* project quota enabled */ #define MNTOPT_UQUOTA "uquota" /* user quota (IRIX variant) */ #define MNTOPT_GQUOTA "gquota" /* group quota (IRIX variant) */ +#define MNTOPT_PQUOTA "pquota" /* project quota (IRIX variant) */ #define MNTOPT_UQUOTANOENF "uqnoenforce"/* user quota limit enforcement */ #define MNTOPT_GQUOTANOENF "gqnoenforce"/* group quota limit enforcement */ +#define MNTOPT_PQUOTANOENF "pqnoenforce"/* project quota limit enforcement */ #define MNTOPT_QUOTANOENF "qnoenforce" /* same as uqnoenforce */ STATIC int @@ -109,6 +112,14 @@ xfs_qm_parseargs( args->flags |= XFSMNT_UQUOTA; args->flags &= ~XFSMNT_UQUOTAENF; referenced = 1; + } else if (!strcmp(this_char, MNTOPT_PQUOTA) || + !strcmp(this_char, MNTOPT_PRJQUOTA)) { + args->flags |= XFSMNT_PQUOTA | XFSMNT_PQUOTAENF; + referenced = 1; + } else if (!strcmp(this_char, MNTOPT_PQUOTANOENF)) { + args->flags |= XFSMNT_PQUOTA; + args->flags &= ~XFSMNT_PQUOTAENF; + referenced = 1; } else if (!strcmp(this_char, MNTOPT_GQUOTA) || !strcmp(this_char, MNTOPT_GRPQUOTA)) { args->flags |= XFSMNT_GQUOTA | XFSMNT_GQUOTAENF; @@ -127,6 +138,12 @@ xfs_qm_parseargs( *this_char++ = ','; } + if ((args->flags & XFSMNT_GQUOTA) && (args->flags & XFSMNT_PQUOTA)) { + cmn_err(CE_WARN, + "XFS: cannot mount with both project and group quota"); + return XFS_ERROR(EINVAL); + } + PVFS_PARSEARGS(BHV_NEXT(bhv), options, args, update, error); if (!error && !referenced) bhv_remove_vfsops(bhvtovfs(bhv), VFS_POSITION_QM); @@ -148,13 +165,19 @@ xfs_qm_showargs( seq_puts(m, "," MNTOPT_UQUOTANOENF); } + if (mp->m_qflags & XFS_PQUOTA_ACCT) { + (mp->m_qflags & XFS_OQUOTA_ENFD) ? + seq_puts(m, "," MNTOPT_PRJQUOTA) : + seq_puts(m, "," MNTOPT_PQUOTANOENF); + } + if (mp->m_qflags & XFS_GQUOTA_ACCT) { - (mp->m_qflags & XFS_GQUOTA_ENFD) ? + (mp->m_qflags & XFS_OQUOTA_ENFD) ? seq_puts(m, "," MNTOPT_GRPQUOTA) : seq_puts(m, "," MNTOPT_GQUOTANOENF); } - if (!(mp->m_qflags & (XFS_UQUOTA_ACCT|XFS_GQUOTA_ACCT))) + if (!(mp->m_qflags & XFS_ALL_QUOTA_ACCT)) seq_puts(m, "," MNTOPT_NOQUOTA); PVFS_SHOWARGS(BHV_NEXT(bhv), m, error); @@ -171,7 +194,7 @@ xfs_qm_mount( struct xfs_mount *mp = XFS_VFSTOM(vfsp); int error; - if (args->flags & (XFSMNT_UQUOTA | XFSMNT_GQUOTA)) + if (args->flags & (XFSMNT_UQUOTA | XFSMNT_GQUOTA | XFSMNT_PQUOTA)) xfs_qm_mount_quotainit(mp, args->flags); PVFS_MOUNT(BHV_NEXT(bhv), args, cr, error); return error; @@ -255,16 +278,17 @@ xfs_qm_newmount( uint *quotaflags) { uint quotaondisk; - uint uquotaondisk = 0, gquotaondisk = 0; + uint uquotaondisk = 0, gquotaondisk = 0, pquotaondisk = 0; *quotaflags = 0; *needquotamount = B_FALSE; quotaondisk = XFS_SB_VERSION_HASQUOTA(&mp->m_sb) && - mp->m_sb.sb_qflags & (XFS_UQUOTA_ACCT|XFS_GQUOTA_ACCT); + (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT); if (quotaondisk) { uquotaondisk = mp->m_sb.sb_qflags & XFS_UQUOTA_ACCT; + pquotaondisk = mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT; gquotaondisk = mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT; } @@ -277,13 +301,16 @@ xfs_qm_newmount( if (((uquotaondisk && !XFS_IS_UQUOTA_ON(mp)) || (!uquotaondisk && XFS_IS_UQUOTA_ON(mp)) || + (pquotaondisk && !XFS_IS_PQUOTA_ON(mp)) || + (!pquotaondisk && XFS_IS_PQUOTA_ON(mp)) || (gquotaondisk && !XFS_IS_GQUOTA_ON(mp)) || - (!gquotaondisk && XFS_IS_GQUOTA_ON(mp))) && + (!gquotaondisk && XFS_IS_OQUOTA_ON(mp))) && xfs_dev_is_read_only(mp, "changing quota state")) { cmn_err(CE_WARN, - "XFS: please mount with%s%s%s.", + "XFS: please mount with%s%s%s%s.", (!quotaondisk ? "out quota" : ""), (uquotaondisk ? " usrquota" : ""), + (pquotaondisk ? " prjquota" : ""), (gquotaondisk ? " grpquota" : "")); return XFS_ERROR(EPERM); } @@ -359,7 +386,7 @@ xfs_qm_dqrele_null( } -struct xfs_qmops xfs_qmcore_xfs = { +STATIC struct xfs_qmops xfs_qmcore_xfs = { .xfs_qminit = xfs_qm_newmount, .xfs_qmdone = xfs_qm_unmount_quotadestroy, .xfs_qmmount = xfs_qm_endmount, diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c index 229f5b5..68e9896 100644 --- a/fs/xfs/quota/xfs_qm_syscalls.c +++ b/fs/xfs/quota/xfs_qm_syscalls.c @@ -118,40 +118,41 @@ xfs_qm_quotactl( * The following commands are valid even when quotaoff. */ switch (cmd) { + case Q_XQUOTARM: /* - * truncate quota files. quota must be off. + * Truncate quota files. quota must be off. */ - case Q_XQUOTARM: if (XFS_IS_QUOTA_ON(mp) || addr == NULL) return XFS_ERROR(EINVAL); if (vfsp->vfs_flag & VFS_RDONLY) return XFS_ERROR(EROFS); return (xfs_qm_scall_trunc_qfiles(mp, xfs_qm_import_qtype_flags(*(uint *)addr))); + + case Q_XGETQSTAT: /* * Get quota status information. */ - case Q_XGETQSTAT: return (xfs_qm_scall_getqstat(mp, (fs_quota_stat_t *)addr)); + case Q_XQUOTAON: /* - * QUOTAON for root f/s and quota enforcement on others.. - * Quota accounting for non-root f/s's must be turned on - * at mount time. + * QUOTAON - enabling quota enforcement. + * Quota accounting must be turned on at mount time. */ - case Q_XQUOTAON: if (addr == NULL) return XFS_ERROR(EINVAL); if (vfsp->vfs_flag & VFS_RDONLY) return XFS_ERROR(EROFS); return (xfs_qm_scall_quotaon(mp, xfs_qm_import_flags(*(uint *)addr))); - case Q_XQUOTAOFF: + + case Q_XQUOTAOFF: if (vfsp->vfs_flag & VFS_RDONLY) return XFS_ERROR(EROFS); break; - default: + default: break; } @@ -159,7 +160,7 @@ xfs_qm_quotactl( return XFS_ERROR(ESRCH); switch (cmd) { - case Q_XQUOTAOFF: + case Q_XQUOTAOFF: if (vfsp->vfs_flag & VFS_RDONLY) return XFS_ERROR(EROFS); error = xfs_qm_scall_quotaoff(mp, @@ -167,42 +168,39 @@ xfs_qm_quotactl( B_FALSE); break; - /* - * Defaults to XFS_GETUQUOTA. - */ - case Q_XGETQUOTA: + case Q_XGETQUOTA: error = xfs_qm_scall_getquota(mp, (xfs_dqid_t)id, XFS_DQ_USER, (fs_disk_quota_t *)addr); break; - /* - * Set limits, both hard and soft. Defaults to Q_SETUQLIM. - */ - case Q_XSETQLIM: + case Q_XGETGQUOTA: + error = xfs_qm_scall_getquota(mp, (xfs_dqid_t)id, XFS_DQ_GROUP, + (fs_disk_quota_t *)addr); + break; + case Q_XGETPQUOTA: + error = xfs_qm_scall_getquota(mp, (xfs_dqid_t)id, XFS_DQ_PROJ, + (fs_disk_quota_t *)addr); + break; + + case Q_XSETQLIM: if (vfsp->vfs_flag & VFS_RDONLY) return XFS_ERROR(EROFS); error = xfs_qm_scall_setqlim(mp, (xfs_dqid_t)id, XFS_DQ_USER, (fs_disk_quota_t *)addr); break; - - case Q_XSETGQLIM: + case Q_XSETGQLIM: if (vfsp->vfs_flag & VFS_RDONLY) return XFS_ERROR(EROFS); error = xfs_qm_scall_setqlim(mp, (xfs_dqid_t)id, XFS_DQ_GROUP, (fs_disk_quota_t *)addr); break; - - - case Q_XGETGQUOTA: - error = xfs_qm_scall_getquota(mp, (xfs_dqid_t)id, XFS_DQ_GROUP, - (fs_disk_quota_t *)addr); + case Q_XSETPQLIM: + if (vfsp->vfs_flag & VFS_RDONLY) + return XFS_ERROR(EROFS); + error = xfs_qm_scall_setqlim(mp, (xfs_dqid_t)id, XFS_DQ_PROJ, + (fs_disk_quota_t *)addr); break; - /* - * Quotas are entirely undefined after quotaoff in XFS quotas. - * For instance, there's no way to set limits when quotaoff. - */ - - default: + default: error = XFS_ERROR(EINVAL); break; } @@ -286,8 +284,12 @@ xfs_qm_scall_quotaoff( } if (flags & XFS_GQUOTA_ACCT) { dqtype |= XFS_QMOPT_GQUOTA; - flags |= (XFS_GQUOTA_CHKD | XFS_GQUOTA_ENFD); + flags |= (XFS_OQUOTA_CHKD | XFS_OQUOTA_ENFD); inactivate_flags |= XFS_GQUOTA_ACTIVE; + } else if (flags & XFS_PQUOTA_ACCT) { + dqtype |= XFS_QMOPT_PQUOTA; + flags |= (XFS_OQUOTA_CHKD | XFS_OQUOTA_ENFD); + inactivate_flags |= XFS_PQUOTA_ACTIVE; } /* @@ -364,7 +366,8 @@ xfs_qm_scall_quotaoff( /* * If quotas is completely disabled, close shop. */ - if ((flags & XFS_MOUNT_QUOTA_ALL) == XFS_MOUNT_QUOTA_ALL) { + if (((flags & XFS_MOUNT_QUOTA_ALL) == XFS_MOUNT_QUOTA_SET1) || + ((flags & XFS_MOUNT_QUOTA_ALL) == XFS_MOUNT_QUOTA_SET2)) { mutex_unlock(&(XFS_QI_QOFFLOCK(mp))); xfs_qm_destroy_quotainfo(mp); return (0); @@ -378,7 +381,7 @@ xfs_qm_scall_quotaoff( XFS_PURGE_INODE(XFS_QI_UQIP(mp)); XFS_QI_UQIP(mp) = NULL; } - if ((dqtype & XFS_QMOPT_GQUOTA) && XFS_QI_GQIP(mp)) { + if ((dqtype & (XFS_QMOPT_GQUOTA|XFS_QMOPT_PQUOTA)) && XFS_QI_GQIP(mp)) { XFS_PURGE_INODE(XFS_QI_GQIP(mp)); XFS_QI_GQIP(mp) = NULL; } @@ -411,7 +414,8 @@ xfs_qm_scall_trunc_qfiles( } } - if ((flags & XFS_DQ_GROUP) && mp->m_sb.sb_gquotino != NULLFSINO) { + if ((flags & (XFS_DQ_GROUP|XFS_DQ_PROJ)) && + mp->m_sb.sb_gquotino != NULLFSINO) { error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, 0, 0, &qip, 0); if (! error) { (void) xfs_truncate_file(mp, qip); @@ -434,7 +438,7 @@ xfs_qm_scall_quotaon( uint flags) { int error; - unsigned long s; + unsigned long s; uint qf; uint accflags; __int64_t sbflags; @@ -468,9 +472,13 @@ xfs_qm_scall_quotaon( (mp->m_sb.sb_qflags & XFS_UQUOTA_ACCT) == 0 && (flags & XFS_UQUOTA_ENFD)) || + ((flags & XFS_PQUOTA_ACCT) == 0 && + (mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT) == 0 && + (flags & XFS_OQUOTA_ENFD)) + || ((flags & XFS_GQUOTA_ACCT) == 0 && (mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT) == 0 && - (flags & XFS_GQUOTA_ENFD))) { + (flags & XFS_OQUOTA_ENFD))) { qdprintk("Can't enforce without acct, flags=%x sbflags=%x\n", flags, mp->m_sb.sb_qflags); return XFS_ERROR(EINVAL); @@ -504,6 +512,10 @@ xfs_qm_scall_quotaon( */ if (((mp->m_sb.sb_qflags & XFS_UQUOTA_ACCT) != (mp->m_qflags & XFS_UQUOTA_ACCT)) || + ((mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT) != + (mp->m_qflags & XFS_PQUOTA_ACCT)) || + ((mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT) != + (mp->m_qflags & XFS_GQUOTA_ACCT)) || (flags & XFS_ALL_QUOTA_ENFD) == 0) return (0); @@ -521,7 +533,6 @@ xfs_qm_scall_quotaon( } - /* * Return quota status information, such as uquota-off, enforcements, etc. */ @@ -606,7 +617,8 @@ xfs_qm_scall_setqlim( if (!capable(CAP_SYS_ADMIN)) return XFS_ERROR(EPERM); - if ((newlim->d_fieldmask & (FS_DQ_LIMIT_MASK|FS_DQ_TIMER_MASK)) == 0) + if ((newlim->d_fieldmask & + (FS_DQ_LIMIT_MASK|FS_DQ_TIMER_MASK|FS_DQ_WARNS_MASK)) == 0) return (0); tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SETQLIM); @@ -691,12 +703,23 @@ xfs_qm_scall_setqlim( qdprintk("ihard %Ld < isoft %Ld\n", hard, soft); } + /* + * Update warnings counter(s) if requested + */ + if (newlim->d_fieldmask & FS_DQ_BWARNS) + INT_SET(ddq->d_bwarns, ARCH_CONVERT, newlim->d_bwarns); + if (newlim->d_fieldmask & FS_DQ_IWARNS) + INT_SET(ddq->d_iwarns, ARCH_CONVERT, newlim->d_iwarns); + if (newlim->d_fieldmask & FS_DQ_RTBWARNS) + INT_SET(ddq->d_rtbwarns, ARCH_CONVERT, newlim->d_rtbwarns); + if (id == 0) { /* * Timelimits for the super user set the relative time * the other users can be over quota for this file system. * If it is zero a default is used. Ditto for the default - * soft and hard limit values (already done, above). + * soft and hard limit values (already done, above), and + * for warnings. */ if (newlim->d_fieldmask & FS_DQ_BTIMER) { mp->m_quotainfo->qi_btimelimit = newlim->d_btimer; @@ -710,7 +733,13 @@ xfs_qm_scall_setqlim( mp->m_quotainfo->qi_rtbtimelimit = newlim->d_rtbtimer; INT_SET(ddq->d_rtbtimer, ARCH_CONVERT, newlim->d_rtbtimer); } - } else /* if (XFS_IS_QUOTA_ENFORCED(mp)) */ { + if (newlim->d_fieldmask & FS_DQ_BWARNS) + mp->m_quotainfo->qi_bwarnlimit = newlim->d_bwarns; + if (newlim->d_fieldmask & FS_DQ_IWARNS) + mp->m_quotainfo->qi_iwarnlimit = newlim->d_iwarns; + if (newlim->d_fieldmask & FS_DQ_RTBWARNS) + mp->m_quotainfo->qi_rtbwarnlimit = newlim->d_rtbwarns; + } else { /* * If the user is now over quota, start the timelimit. * The user will not be 'warned'. @@ -776,9 +805,9 @@ xfs_qm_log_quotaoff_end( xfs_qoff_logitem_t *startqoff, uint flags) { - xfs_trans_t *tp; + xfs_trans_t *tp; int error; - xfs_qoff_logitem_t *qoffi; + xfs_qoff_logitem_t *qoffi; tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QUOTAOFF_END); @@ -928,18 +957,26 @@ xfs_qm_export_dquot( STATIC uint xfs_qm_import_qtype_flags( - uint uflags) + uint uflags) { + uint oflags = 0; + /* - * Can't be both at the same time. + * Can't be more than one, or none. */ if (((uflags & (XFS_GROUP_QUOTA | XFS_USER_QUOTA)) == - (XFS_GROUP_QUOTA | XFS_USER_QUOTA)) || - ((uflags & (XFS_GROUP_QUOTA | XFS_USER_QUOTA)) == 0)) + (XFS_GROUP_QUOTA | XFS_USER_QUOTA)) || + ((uflags & (XFS_GROUP_QUOTA | XFS_PROJ_QUOTA)) == + (XFS_GROUP_QUOTA | XFS_PROJ_QUOTA)) || + ((uflags & (XFS_USER_QUOTA | XFS_PROJ_QUOTA)) == + (XFS_USER_QUOTA | XFS_PROJ_QUOTA)) || + ((uflags & (XFS_GROUP_QUOTA|XFS_USER_QUOTA|XFS_PROJ_QUOTA)) == 0)) return (0); - return (uflags & XFS_USER_QUOTA) ? - XFS_DQ_USER : XFS_DQ_GROUP; + oflags |= (uflags & XFS_USER_QUOTA) ? XFS_DQ_USER : 0; + oflags |= (uflags & XFS_PROJ_QUOTA) ? XFS_DQ_PROJ : 0; + oflags |= (uflags & XFS_GROUP_QUOTA) ? XFS_DQ_GROUP: 0; + return oflags; } STATIC uint @@ -947,14 +984,19 @@ xfs_qm_export_qtype_flags( uint flags) { /* - * Can't be both at the same time. + * Can't be more than one, or none. */ - ASSERT((flags & (XFS_GROUP_QUOTA | XFS_USER_QUOTA)) != - (XFS_GROUP_QUOTA | XFS_USER_QUOTA)); - ASSERT((flags & (XFS_GROUP_QUOTA | XFS_USER_QUOTA)) != 0); + ASSERT((flags & (XFS_PROJ_QUOTA | XFS_USER_QUOTA)) != + (XFS_PROJ_QUOTA | XFS_USER_QUOTA)); + ASSERT((flags & (XFS_PROJ_QUOTA | XFS_GROUP_QUOTA)) != + (XFS_PROJ_QUOTA | XFS_GROUP_QUOTA)); + ASSERT((flags & (XFS_USER_QUOTA | XFS_GROUP_QUOTA)) != + (XFS_USER_QUOTA | XFS_GROUP_QUOTA)); + ASSERT((flags & (XFS_PROJ_QUOTA|XFS_USER_QUOTA|XFS_GROUP_QUOTA)) != 0); return (flags & XFS_DQ_USER) ? - XFS_USER_QUOTA : XFS_GROUP_QUOTA; + XFS_USER_QUOTA : (flags & XFS_DQ_PROJ) ? + XFS_PROJ_QUOTA : XFS_GROUP_QUOTA; } STATIC uint @@ -965,12 +1007,14 @@ xfs_qm_import_flags( if (uflags & XFS_QUOTA_UDQ_ACCT) flags |= XFS_UQUOTA_ACCT; + if (uflags & XFS_QUOTA_PDQ_ACCT) + flags |= XFS_PQUOTA_ACCT; if (uflags & XFS_QUOTA_GDQ_ACCT) flags |= XFS_GQUOTA_ACCT; if (uflags & XFS_QUOTA_UDQ_ENFD) flags |= XFS_UQUOTA_ENFD; - if (uflags & XFS_QUOTA_GDQ_ENFD) - flags |= XFS_GQUOTA_ENFD; + if (uflags & (XFS_QUOTA_PDQ_ENFD|XFS_QUOTA_GDQ_ENFD)) + flags |= XFS_OQUOTA_ENFD; return (flags); } @@ -984,12 +1028,16 @@ xfs_qm_export_flags( uflags = 0; if (flags & XFS_UQUOTA_ACCT) uflags |= XFS_QUOTA_UDQ_ACCT; + if (flags & XFS_PQUOTA_ACCT) + uflags |= XFS_QUOTA_PDQ_ACCT; if (flags & XFS_GQUOTA_ACCT) uflags |= XFS_QUOTA_GDQ_ACCT; if (flags & XFS_UQUOTA_ENFD) uflags |= XFS_QUOTA_UDQ_ENFD; - if (flags & XFS_GQUOTA_ENFD) - uflags |= XFS_QUOTA_GDQ_ENFD; + if (flags & (XFS_OQUOTA_ENFD)) { + uflags |= (flags & XFS_GQUOTA_ACCT) ? + XFS_QUOTA_GDQ_ENFD : XFS_QUOTA_PDQ_ENFD; + } return (uflags); } @@ -1070,7 +1118,7 @@ again: xfs_qm_dqrele(ip->i_udquot); ip->i_udquot = NULL; } - if ((flags & XFS_GQUOTA_ACCT) && ip->i_gdquot) { + if (flags & (XFS_PQUOTA_ACCT|XFS_GQUOTA_ACCT) && ip->i_gdquot) { xfs_qm_dqrele(ip->i_gdquot); ip->i_gdquot = NULL; } @@ -1160,7 +1208,6 @@ xfs_qm_dqtest_print( { cmn_err(CE_DEBUG, "-----------DQTEST DQUOT----------------"); cmn_err(CE_DEBUG, "---- dquot ID = %d", d->d_id); - cmn_err(CE_DEBUG, "---- type = %s", XFS_QM_ISUDQ(d)? "USR" : "GRP"); cmn_err(CE_DEBUG, "---- fs = 0x%p", d->q_mount); cmn_err(CE_DEBUG, "---- bcount = %Lu (0x%x)", d->d_bcount, (int)d->d_bcount); @@ -1231,7 +1278,7 @@ xfs_dqtest_cmp2( #ifdef QUOTADEBUG if (!err) { cmn_err(CE_DEBUG, "%d [%s] [0x%p] qchecked", - d->d_id, XFS_QM_ISUDQ(d) ? "USR" : "GRP", d->q_mount); + d->d_id, DQFLAGTO_TYPESTR(d), d->q_mount); } #endif return (err); @@ -1287,6 +1334,7 @@ STATIC void xfs_qm_internalqcheck_get_dquots( xfs_mount_t *mp, xfs_dqid_t uid, + xfs_dqid_t projid, xfs_dqid_t gid, xfs_dqtest_t **ud, xfs_dqtest_t **gd) @@ -1295,6 +1343,8 @@ xfs_qm_internalqcheck_get_dquots( xfs_qm_internalqcheck_dqget(mp, uid, XFS_DQ_USER, ud); if (XFS_IS_GQUOTA_ON(mp)) xfs_qm_internalqcheck_dqget(mp, gid, XFS_DQ_GROUP, gd); + else if (XFS_IS_PQUOTA_ON(mp)) + xfs_qm_internalqcheck_dqget(mp, projid, XFS_DQ_PROJ, gd); } @@ -1362,13 +1412,14 @@ xfs_qm_internalqcheck_adjust( } xfs_qm_internalqcheck_get_dquots(mp, (xfs_dqid_t) ip->i_d.di_uid, + (xfs_dqid_t) ip->i_d.di_projid, (xfs_dqid_t) ip->i_d.di_gid, &ud, &gd); if (XFS_IS_UQUOTA_ON(mp)) { ASSERT(ud); xfs_qm_internalqcheck_dqadjust(ip, ud); } - if (XFS_IS_GQUOTA_ON(mp)) { + if (XFS_IS_OQUOTA_ON(mp)) { ASSERT(gd); xfs_qm_internalqcheck_dqadjust(ip, gd); } diff --git a/fs/xfs/quota/xfs_quota_priv.h b/fs/xfs/quota/xfs_quota_priv.h index 414b600..bf413e7 100644 --- a/fs/xfs/quota/xfs_quota_priv.h +++ b/fs/xfs/quota/xfs_quota_priv.h @@ -56,6 +56,7 @@ #define XFS_QI_RTBTIMELIMIT(mp) ((mp)->m_quotainfo->qi_rtbtimelimit) #define XFS_QI_ITIMELIMIT(mp) ((mp)->m_quotainfo->qi_itimelimit) #define XFS_QI_BWARNLIMIT(mp) ((mp)->m_quotainfo->qi_bwarnlimit) +#define XFS_QI_RTBWARNLIMIT(mp) ((mp)->m_quotainfo->qi_rtbwarnlimit) #define XFS_QI_IWARNLIMIT(mp) ((mp)->m_quotainfo->qi_iwarnlimit) #define XFS_QI_QOFFLOCK(mp) ((mp)->m_quotainfo->qi_quotaofflock) @@ -102,7 +103,8 @@ static inline int XQMISLCKD(struct xfs_dqhash *h) (xfs_Gqm->qm_grp_dqhtable + \ XFS_DQ_HASHVAL(mp, id))) #define XFS_IS_DQTYPE_ON(mp, type) (type == XFS_DQ_USER ? \ - XFS_IS_UQUOTA_ON(mp):XFS_IS_GQUOTA_ON(mp)) + XFS_IS_UQUOTA_ON(mp) : \ + XFS_IS_OQUOTA_ON(mp)) #define XFS_IS_DQUOT_UNINITIALIZED(dqp) ( \ !dqp->q_core.d_blk_hardlimit && \ !dqp->q_core.d_blk_softlimit && \ @@ -177,16 +179,11 @@ for ((dqp) = (qlist)->qh_next; (dqp) != (xfs_dquot_t *)(qlist); \ (!((dqp)->q_core.d_id)) #define XFS_PURGE_INODE(ip) \ - { \ - vmap_t dqvmap; \ - vnode_t *dqvp; \ - dqvp = XFS_ITOV(ip); \ - VMAP(dqvp, dqvmap); \ - VN_RELE(dqvp); \ - } + IRELE(ip); #define DQFLAGTO_TYPESTR(d) (((d)->dq_flags & XFS_DQ_USER) ? "USR" : \ - (((d)->dq_flags & XFS_DQ_GROUP) ? "GRP" : "???")) + (((d)->dq_flags & XFS_DQ_GROUP) ? "GRP" : \ + (((d)->dq_flags & XFS_DQ_PROJ) ? "PRJ":"???"))) #define DQFLAGTO_DIRTYSTR(d) (XFS_DQ_IS_DIRTY(d) ? "DIRTY" : "NOTDIRTY") #endif /* __XFS_QUOTA_PRIV_H__ */ diff --git a/fs/xfs/quota/xfs_trans_dquot.c b/fs/xfs/quota/xfs_trans_dquot.c index 149b2a1..3b99daf 100644 --- a/fs/xfs/quota/xfs_trans_dquot.c +++ b/fs/xfs/quota/xfs_trans_dquot.c @@ -187,7 +187,7 @@ xfs_trans_dup_dqinfo( /* * Wrap around mod_dquot to account for both user and group quotas. */ -void +STATIC void xfs_trans_mod_dquot_byino( xfs_trans_t *tp, xfs_inode_t *ip, @@ -207,12 +207,10 @@ xfs_trans_mod_dquot_byino( if (tp->t_dqinfo == NULL) xfs_trans_alloc_dqinfo(tp); - if (XFS_IS_UQUOTA_ON(mp) && ip->i_udquot) { + if (XFS_IS_UQUOTA_ON(mp) && ip->i_udquot) (void) xfs_trans_mod_dquot(tp, ip->i_udquot, field, delta); - } - if (XFS_IS_GQUOTA_ON(mp) && ip->i_gdquot) { + if (XFS_IS_OQUOTA_ON(mp) && ip->i_gdquot) (void) xfs_trans_mod_dquot(tp, ip->i_gdquot, field, delta); - } } STATIC xfs_dqtrx_t * @@ -368,7 +366,7 @@ xfs_trans_dqlockedjoin( * Unreserve just the reservations done by this transaction. * dquot is still left locked at exit. */ -void +STATIC void xfs_trans_apply_dquot_deltas( xfs_trans_t *tp) { @@ -499,7 +497,7 @@ xfs_trans_apply_dquot_deltas( * Adjust the RT reservation. */ if (qtrx->qt_rtblk_res != 0) { - if (qtrx->qt_blk_res != qtrx->qt_blk_res_used) { + if (qtrx->qt_rtblk_res != qtrx->qt_rtblk_res_used) { if (qtrx->qt_rtblk_res > qtrx->qt_rtblk_res_used) dqp->q_res_rtbcount -= (xfs_qcnt_t) @@ -532,12 +530,6 @@ xfs_trans_apply_dquot_deltas( (xfs_qcnt_t)qtrx->qt_icount_delta; } - -#ifdef QUOTADEBUG - if (qtrx->qt_rtblk_res != 0) - cmn_err(CE_DEBUG, "RT res %d for 0x%p\n", - (int) qtrx->qt_rtblk_res, dqp); -#endif ASSERT(dqp->q_res_bcount >= INT_GET(dqp->q_core.d_bcount, ARCH_CONVERT)); ASSERT(dqp->q_res_icount >= @@ -638,7 +630,10 @@ xfs_trans_dqresv( int error; xfs_qcnt_t hardlimit; xfs_qcnt_t softlimit; - time_t btimer; + time_t timer; + xfs_qwarncnt_t warns; + xfs_qwarncnt_t warnlimit; + xfs_qcnt_t count; xfs_qcnt_t *resbcountp; xfs_quotainfo_t *q = mp->m_quotainfo; @@ -653,7 +648,9 @@ xfs_trans_dqresv( softlimit = INT_GET(dqp->q_core.d_blk_softlimit, ARCH_CONVERT); if (!softlimit) softlimit = q->qi_bsoftlimit; - btimer = INT_GET(dqp->q_core.d_btimer, ARCH_CONVERT); + timer = INT_GET(dqp->q_core.d_btimer, ARCH_CONVERT); + warns = INT_GET(dqp->q_core.d_bwarns, ARCH_CONVERT); + warnlimit = XFS_QI_BWARNLIMIT(dqp->q_mount); resbcountp = &dqp->q_res_bcount; } else { ASSERT(flags & XFS_TRANS_DQ_RES_RTBLKS); @@ -663,7 +660,9 @@ xfs_trans_dqresv( softlimit = INT_GET(dqp->q_core.d_rtb_softlimit, ARCH_CONVERT); if (!softlimit) softlimit = q->qi_rtbsoftlimit; - btimer = INT_GET(dqp->q_core.d_rtbtimer, ARCH_CONVERT); + timer = INT_GET(dqp->q_core.d_rtbtimer, ARCH_CONVERT); + warns = INT_GET(dqp->q_core.d_rtbwarns, ARCH_CONVERT); + warnlimit = XFS_QI_RTBWARNLIMIT(dqp->q_mount); resbcountp = &dqp->q_res_rtbcount; } error = 0; @@ -693,37 +692,36 @@ xfs_trans_dqresv( * If timer or warnings has expired, * return EDQUOT */ - if ((btimer != 0 && get_seconds() > btimer) || - (dqp->q_core.d_bwarns && - INT_GET(dqp->q_core.d_bwarns, ARCH_CONVERT) >= - XFS_QI_BWARNLIMIT(dqp->q_mount))) { + if ((timer != 0 && get_seconds() > timer) || + (warns != 0 && warns >= warnlimit)) { error = EDQUOT; goto error_return; } } } if (ninos > 0) { - hardlimit = INT_GET(dqp->q_core.d_ino_hardlimit, ARCH_CONVERT); + count = INT_GET(dqp->q_core.d_icount, ARCH_CONVERT); + timer = INT_GET(dqp->q_core.d_itimer, ARCH_CONVERT); + warns = INT_GET(dqp->q_core.d_iwarns, ARCH_CONVERT); + warnlimit = XFS_QI_IWARNLIMIT(dqp->q_mount); + hardlimit = INT_GET(dqp->q_core.d_ino_hardlimit, + ARCH_CONVERT); if (!hardlimit) hardlimit = q->qi_ihardlimit; - softlimit = INT_GET(dqp->q_core.d_ino_softlimit, ARCH_CONVERT); + softlimit = INT_GET(dqp->q_core.d_ino_softlimit, + ARCH_CONVERT); if (!softlimit) softlimit = q->qi_isoftlimit; - if (hardlimit > 0ULL && - INT_GET(dqp->q_core.d_icount, ARCH_CONVERT) >= hardlimit) { + if (hardlimit > 0ULL && count >= hardlimit) { error = EDQUOT; goto error_return; - } else if (softlimit > 0ULL && - INT_GET(dqp->q_core.d_icount, ARCH_CONVERT) >= softlimit) { + } else if (softlimit > 0ULL && count >= softlimit) { /* * If timer or warnings has expired, * return EDQUOT */ - if ((dqp->q_core.d_itimer && - get_seconds() > INT_GET(dqp->q_core.d_itimer, ARCH_CONVERT)) || - (dqp->q_core.d_iwarns && - INT_GET(dqp->q_core.d_iwarns, ARCH_CONVERT) >= - XFS_QI_IWARNLIMIT(dqp->q_mount))) { + if ((timer != 0 && get_seconds() > timer) || + (warns != 0 && warns >= warnlimit)) { error = EDQUOT; goto error_return; } diff --git a/fs/xfs/support/debug.c b/fs/xfs/support/debug.c index 7d6e1f3..4ed7b69 100644 --- a/fs/xfs/support/debug.c +++ b/fs/xfs/support/debug.c @@ -36,7 +36,6 @@ #include <linux/sched.h> #include <linux/kernel.h> -int doass = 1; static char message[256]; /* keep it off the stack */ static DEFINE_SPINLOCK(xfs_err_lock); diff --git a/fs/xfs/support/debug.h b/fs/xfs/support/debug.h index 40b0f4c..c5b9365 100644 --- a/fs/xfs/support/debug.h +++ b/fs/xfs/support/debug.h @@ -50,16 +50,11 @@ extern void cmn_err(int, char *, ...); #endif #ifdef DEBUG -# ifdef lint -# define ASSERT(EX) ((void)0) /* avoid "constant in conditional" babble */ -# else -# define ASSERT(EX) ((!doass||(EX))?((void)0):assfail(#EX, __FILE__, __LINE__)) -# endif /* lint */ +# define ASSERT(EX) ((EX) ? ((void)0) : assfail(#EX, __FILE__, __LINE__)) #else # define ASSERT(x) ((void)0) #endif -extern int doass; /* dynamically turn off asserts */ extern void assfail(char *, char *, int); #ifdef DEBUG extern unsigned long random(void); diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index 36603db..dcfe197 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c @@ -59,7 +59,7 @@ #define XFSA_FIXUP_BNO_OK 1 #define XFSA_FIXUP_CNT_OK 2 -int +STATIC int xfs_alloc_search_busy(xfs_trans_t *tp, xfs_agnumber_t agno, xfs_agblock_t bno, @@ -2562,7 +2562,7 @@ xfs_alloc_clear_busy(xfs_trans_t *tp, /* * returns non-zero if any of (agno,bno):len is in a busy list */ -int +STATIC int xfs_alloc_search_busy(xfs_trans_t *tp, xfs_agnumber_t agno, xfs_agblock_t bno, diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c index ee8b590..a41ad3a 100644 --- a/fs/xfs/xfs_attr.c +++ b/fs/xfs/xfs_attr.c @@ -71,6 +71,11 @@ * Provide the external interfaces to manage attribute lists. */ +#define ATTR_SYSCOUNT 2 +STATIC struct attrnames posix_acl_access; +STATIC struct attrnames posix_acl_default; +STATIC struct attrnames *attr_system_names[ATTR_SYSCOUNT]; + /*======================================================================== * Function prototypes for the kernel. *========================================================================*/ @@ -83,6 +88,7 @@ STATIC int xfs_attr_shortform_addname(xfs_da_args_t *args); /* * Internal routines when attribute list is one block. */ +STATIC int xfs_attr_leaf_get(xfs_da_args_t *args); STATIC int xfs_attr_leaf_addname(xfs_da_args_t *args); STATIC int xfs_attr_leaf_removename(xfs_da_args_t *args); STATIC int xfs_attr_leaf_list(xfs_attr_list_context_t *context); @@ -90,6 +96,7 @@ STATIC int xfs_attr_leaf_list(xfs_attr_list_context_t *context); /* * Internal routines when attribute list is more than one block. */ +STATIC int xfs_attr_node_get(xfs_da_args_t *args); STATIC int xfs_attr_node_addname(xfs_da_args_t *args); STATIC int xfs_attr_node_removename(xfs_da_args_t *args); STATIC int xfs_attr_node_list(xfs_attr_list_context_t *context); @@ -1102,7 +1109,7 @@ xfs_attr_leaf_removename(xfs_da_args_t *args) * This leaf block cannot have a "remote" value, we only call this routine * if bmap_one_block() says there is only one block (ie: no remote blks). */ -int +STATIC int xfs_attr_leaf_get(xfs_da_args_t *args) { xfs_dabuf_t *bp; @@ -1707,7 +1714,7 @@ xfs_attr_refillstate(xfs_da_state_t *state) * block, ie: both true Btree attr lists and for single-leaf-blocks with * "remote" values taking up more blocks. */ -int +STATIC int xfs_attr_node_get(xfs_da_args_t *args) { xfs_da_state_t *state; @@ -2398,7 +2405,7 @@ posix_acl_default_exists( return xfs_acl_vhasacl_default(vp); } -struct attrnames posix_acl_access = { +STATIC struct attrnames posix_acl_access = { .attr_name = "posix_acl_access", .attr_namelen = sizeof("posix_acl_access") - 1, .attr_get = posix_acl_access_get, @@ -2407,7 +2414,7 @@ struct attrnames posix_acl_access = { .attr_exists = posix_acl_access_exists, }; -struct attrnames posix_acl_default = { +STATIC struct attrnames posix_acl_default = { .attr_name = "posix_acl_default", .attr_namelen = sizeof("posix_acl_default") - 1, .attr_get = posix_acl_default_get, @@ -2416,7 +2423,7 @@ struct attrnames posix_acl_default = { .attr_exists = posix_acl_default_exists, }; -struct attrnames *attr_system_names[] = +STATIC struct attrnames *attr_system_names[] = { &posix_acl_access, &posix_acl_default }; diff --git a/fs/xfs/xfs_attr.h b/fs/xfs/xfs_attr.h index 67cd0f5..45ab1c5 100644 --- a/fs/xfs/xfs_attr.h +++ b/fs/xfs/xfs_attr.h @@ -76,11 +76,6 @@ extern struct attrnames attr_system; extern struct attrnames attr_trusted; extern struct attrnames *attr_namespaces[ATTR_NAMECOUNT]; -#define ATTR_SYSCOUNT 2 -extern struct attrnames posix_acl_access; -extern struct attrnames posix_acl_default; -extern struct attrnames *attr_system_names[ATTR_SYSCOUNT]; - extern attrnames_t *attr_lookup_namespace(char *, attrnames_t **, int); extern int attr_generic_list(struct vnode *, void *, size_t, int, ssize_t *); @@ -184,8 +179,6 @@ int xfs_attr_list(bhv_desc_t *, char *, int, int, struct attrlist_cursor_kern *, struct cred *); int xfs_attr_inactive(struct xfs_inode *dp); -int xfs_attr_node_get(struct xfs_da_args *); -int xfs_attr_leaf_get(struct xfs_da_args *); int xfs_attr_shortform_getvalue(struct xfs_da_args *); int xfs_attr_fetch(struct xfs_inode *, char *, int, char *, int *, int, struct cred *); diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c index b11256e..1cdd574 100644 --- a/fs/xfs/xfs_attr_leaf.c +++ b/fs/xfs/xfs_attr_leaf.c @@ -79,6 +79,8 @@ /* * Routines used for growing the Btree. */ +STATIC int xfs_attr_leaf_create(xfs_da_args_t *args, xfs_dablk_t which_block, + xfs_dabuf_t **bpp); STATIC int xfs_attr_leaf_add_work(xfs_dabuf_t *leaf_buffer, xfs_da_args_t *args, int freemap_index); STATIC void xfs_attr_leaf_compact(xfs_trans_t *trans, xfs_dabuf_t *leaf_buffer); @@ -92,6 +94,16 @@ STATIC int xfs_attr_leaf_figure_balance(xfs_da_state_t *state, int *number_usedbytes_in_blk1); /* + * Routines used for shrinking the Btree. + */ +STATIC int xfs_attr_node_inactive(xfs_trans_t **trans, xfs_inode_t *dp, + xfs_dabuf_t *bp, int level); +STATIC int xfs_attr_leaf_inactive(xfs_trans_t **trans, xfs_inode_t *dp, + xfs_dabuf_t *bp); +STATIC int xfs_attr_leaf_freextent(xfs_trans_t **trans, xfs_inode_t *dp, + xfs_dablk_t blkno, int blkcnt); + +/* * Utility routines. */ STATIC void xfs_attr_leaf_moveents(xfs_attr_leafblock_t *src_leaf, @@ -99,6 +111,10 @@ STATIC void xfs_attr_leaf_moveents(xfs_attr_leafblock_t *src_leaf, xfs_attr_leafblock_t *dst_leaf, int dst_start, int move_count, xfs_mount_t *mp); +STATIC int xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index); +STATIC int xfs_attr_put_listent(xfs_attr_list_context_t *context, + attrnames_t *, char *name, int namelen, + int valuelen); /*======================================================================== @@ -774,7 +790,7 @@ out: * Create the initial contents of a leaf attribute list * or a leaf in a node attribute list. */ -int +STATIC int xfs_attr_leaf_create(xfs_da_args_t *args, xfs_dablk_t blkno, xfs_dabuf_t **bpp) { xfs_attr_leafblock_t *leaf; @@ -2209,7 +2225,7 @@ xfs_attr_leaf_lasthash(xfs_dabuf_t *bp, int *count) * Calculate the number of bytes used to store the indicated attribute * (whether local or remote only calculate bytes in this block). */ -int +STATIC int xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index) { xfs_attr_leaf_name_local_t *name_loc; @@ -2380,7 +2396,7 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context) * we may be reading them directly out of a user buffer. */ /*ARGSUSED*/ -int +STATIC int xfs_attr_put_listent(xfs_attr_list_context_t *context, attrnames_t *namesp, char *name, int namelen, int valuelen) { @@ -2740,7 +2756,7 @@ xfs_attr_root_inactive(xfs_trans_t **trans, xfs_inode_t *dp) * Recurse (gasp!) through the attribute nodes until we find leaves. * We're doing a depth-first traversal in order to invalidate everything. */ -int +STATIC int xfs_attr_node_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp, int level) { @@ -2849,7 +2865,7 @@ xfs_attr_node_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp, * Note that we must release the lock on the buffer so that we are not * caught holding something that the logging code wants to flush to disk. */ -int +STATIC int xfs_attr_leaf_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp) { xfs_attr_leafblock_t *leaf; @@ -2934,7 +2950,7 @@ xfs_attr_leaf_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp) * Look at all the extents for this logical region, * invalidate any buffers that are incore/in transactions. */ -int +STATIC int xfs_attr_leaf_freextent(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dablk_t blkno, int blkcnt) { diff --git a/fs/xfs/xfs_attr_leaf.h b/fs/xfs/xfs_attr_leaf.h index b1480e0..0a4cfad 100644 --- a/fs/xfs/xfs_attr_leaf.h +++ b/fs/xfs/xfs_attr_leaf.h @@ -261,8 +261,6 @@ int xfs_attr_leaf_flipflags(xfs_da_args_t *args); /* * Routines used for growing the Btree. */ -int xfs_attr_leaf_create(struct xfs_da_args *args, xfs_dablk_t which_block, - struct xfs_dabuf **bpp); int xfs_attr_leaf_split(struct xfs_da_state *state, struct xfs_da_state_blk *oldblk, struct xfs_da_state_blk *newblk); @@ -284,12 +282,6 @@ void xfs_attr_leaf_unbalance(struct xfs_da_state *state, struct xfs_da_state_blk *drop_blk, struct xfs_da_state_blk *save_blk); int xfs_attr_root_inactive(struct xfs_trans **trans, struct xfs_inode *dp); -int xfs_attr_node_inactive(struct xfs_trans **trans, struct xfs_inode *dp, - struct xfs_dabuf *bp, int level); -int xfs_attr_leaf_inactive(struct xfs_trans **trans, struct xfs_inode *dp, - struct xfs_dabuf *bp); -int xfs_attr_leaf_freextent(struct xfs_trans **trans, struct xfs_inode *dp, - xfs_dablk_t blkno, int blkcnt); /* * Utility routines. @@ -299,10 +291,6 @@ int xfs_attr_leaf_order(struct xfs_dabuf *leaf1_bp, struct xfs_dabuf *leaf2_bp); int xfs_attr_leaf_newentsize(struct xfs_da_args *args, int blocksize, int *local); -int xfs_attr_leaf_entsize(struct xfs_attr_leafblock *leaf, int index); -int xfs_attr_put_listent(struct xfs_attr_list_context *context, - struct attrnames *, char *name, int namelen, - int valuelen); int xfs_attr_rolltrans(struct xfs_trans **transp, struct xfs_inode *dp); #endif /* __XFS_ATTR_LEAF_H__ */ diff --git a/fs/xfs/xfs_bit.c b/fs/xfs/xfs_bit.c index a20a6c3..76c9ad3 100644 --- a/fs/xfs/xfs_bit.c +++ b/fs/xfs/xfs_bit.c @@ -45,7 +45,7 @@ /* * Index of high bit number in byte, -1 for none set, 0..7 otherwise. */ -const char xfs_highbit[256] = { +STATIC const char xfs_highbit[256] = { -1, 0, 1, 1, 2, 2, 2, 2, /* 00 .. 07 */ 3, 3, 3, 3, 3, 3, 3, 3, /* 08 .. 0f */ 4, 4, 4, 4, 4, 4, 4, 4, /* 10 .. 17 */ diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index de31624..6f5d283 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -301,6 +301,19 @@ xfs_bmap_search_extents( xfs_bmbt_irec_t *gotp, /* out: extent entry found */ xfs_bmbt_irec_t *prevp); /* out: previous extent entry found */ +/* + * Check the last inode extent to determine whether this allocation will result + * in blocks being allocated at the end of the file. When we allocate new data + * blocks at the end of the file which do not start at the previous data block, + * we will try to align the new blocks at stripe unit boundaries. + */ +STATIC int /* error */ +xfs_bmap_isaeof( + xfs_inode_t *ip, /* incore inode pointer */ + xfs_fileoff_t off, /* file offset in fsblocks */ + int whichfork, /* data or attribute fork */ + char *aeof); /* return value */ + #ifdef XFS_BMAP_TRACE /* * Add a bmap trace buffer entry. Base routine for the others. @@ -4532,18 +4545,17 @@ xfs_bmapi( xfs_extlen_t alen; /* allocated extent length */ xfs_fileoff_t aoff; /* allocated file offset */ xfs_bmalloca_t bma; /* args for xfs_bmap_alloc */ - char contig; /* allocation must be one extent */ xfs_btree_cur_t *cur; /* bmap btree cursor */ - char delay; /* this request is for delayed alloc */ xfs_fileoff_t end; /* end of mapped file region */ int eof; /* we've hit the end of extent list */ + char contig; /* allocation must be one extent */ + char delay; /* this request is for delayed alloc */ + char exact; /* don't do all of wasdelayed extent */ xfs_bmbt_rec_t *ep; /* extent list entry pointer */ int error; /* error return */ - char exact; /* don't do all of wasdelayed extent */ xfs_bmbt_irec_t got; /* current extent list record */ xfs_ifork_t *ifp; /* inode fork pointer */ xfs_extlen_t indlen; /* indirect blocks length */ - char inhole; /* current location is hole in file */ xfs_extnum_t lastx; /* last useful extent number */ int logflags; /* flags for transaction logging */ xfs_extlen_t minleft; /* min blocks left after allocation */ @@ -4554,13 +4566,15 @@ xfs_bmapi( xfs_extnum_t nextents; /* number of extents in file */ xfs_fileoff_t obno; /* old block number (offset) */ xfs_bmbt_irec_t prev; /* previous extent list record */ - char stateless; /* ignore state flag set */ int tmp_logflags; /* temp flags holder */ + int whichfork; /* data or attr fork */ + char inhole; /* current location is hole in file */ + char stateless; /* ignore state flag set */ char trim; /* output trimmed to match range */ char userdata; /* allocating non-metadata */ char wasdelay; /* old extent was delayed */ - int whichfork; /* data or attr fork */ char wr; /* this is a write request */ + char rt; /* this is a realtime file */ char rsvd; /* OK to allocate reserved blocks */ #ifdef DEBUG xfs_fileoff_t orig_bno; /* original block number value */ @@ -4590,6 +4604,7 @@ xfs_bmapi( } if (XFS_FORCED_SHUTDOWN(mp)) return XFS_ERROR(EIO); + rt = XFS_IS_REALTIME_INODE(ip); ifp = XFS_IFORK_PTR(ip, whichfork); ASSERT(ifp->if_ext_max == XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t)); @@ -4694,9 +4709,16 @@ xfs_bmapi( } minlen = contig ? alen : 1; if (delay) { - indlen = (xfs_extlen_t) - xfs_bmap_worst_indlen(ip, alen); - ASSERT(indlen > 0); + xfs_extlen_t extsz = 0; + + /* Figure out the extent size, adjust alen */ + if (rt) { + if (!(extsz = ip->i_d.di_extsize)) + extsz = mp->m_sb.sb_rextsize; + alen = roundup(alen, extsz); + extsz = alen / mp->m_sb.sb_rextsize; + } + /* * Make a transaction-less quota reservation for * delayed allocation blocks. This number gets @@ -4704,8 +4726,10 @@ xfs_bmapi( * We return EDQUOT if we haven't allocated * blks already inside this loop; */ - if (XFS_TRANS_RESERVE_BLKQUOTA( - mp, NULL, ip, (long)alen)) { + if (XFS_TRANS_RESERVE_QUOTA_NBLKS( + mp, NULL, ip, (long)alen, 0, + rt ? XFS_QMOPT_RES_RTBLKS : + XFS_QMOPT_RES_REGBLKS)) { if (n == 0) { *nmap = 0; ASSERT(cur == NULL); @@ -4718,40 +4742,34 @@ xfs_bmapi( * Split changing sb for alen and indlen since * they could be coming from different places. */ - if (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) { - xfs_extlen_t extsz; - xfs_extlen_t ralen; - if (!(extsz = ip->i_d.di_extsize)) - extsz = mp->m_sb.sb_rextsize; - ralen = roundup(alen, extsz); - ralen = ralen / mp->m_sb.sb_rextsize; - if (xfs_mod_incore_sb(mp, - XFS_SBS_FREXTENTS, - -(ralen), rsvd)) { - if (XFS_IS_QUOTA_ON(ip->i_mount)) - XFS_TRANS_UNRESERVE_BLKQUOTA( - mp, NULL, ip, - (long)alen); - break; - } - } else { - if (xfs_mod_incore_sb(mp, - XFS_SBS_FDBLOCKS, - -(alen), rsvd)) { - if (XFS_IS_QUOTA_ON(ip->i_mount)) - XFS_TRANS_UNRESERVE_BLKQUOTA( - mp, NULL, ip, - (long)alen); - break; - } - } + indlen = (xfs_extlen_t) + xfs_bmap_worst_indlen(ip, alen); + ASSERT(indlen > 0); - if (xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS, - -(indlen), rsvd)) { - XFS_TRANS_UNRESERVE_BLKQUOTA( - mp, NULL, ip, (long)alen); + if (rt) + error = xfs_mod_incore_sb(mp, + XFS_SBS_FREXTENTS, + -(extsz), rsvd); + else + error = xfs_mod_incore_sb(mp, + XFS_SBS_FDBLOCKS, + -(alen), rsvd); + if (!error) + error = xfs_mod_incore_sb(mp, + XFS_SBS_FDBLOCKS, + -(indlen), rsvd); + + if (error) { + if (XFS_IS_QUOTA_ON(ip->i_mount)) + /* unreserve the blocks now */ + XFS_TRANS_UNRESERVE_QUOTA_NBLKS( + mp, NULL, ip, + (long)alen, 0, rt ? + XFS_QMOPT_RES_RTBLKS : + XFS_QMOPT_RES_REGBLKS); break; } + ip->i_delayed_blks += alen; abno = NULLSTARTBLOCK(indlen); } else { @@ -5376,13 +5394,24 @@ xfs_bunmapi( } if (wasdel) { ASSERT(STARTBLOCKVAL(del.br_startblock) > 0); - xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS, - (int)del.br_blockcount, rsvd); - /* Unreserve our quota space */ - XFS_TRANS_RESERVE_QUOTA_NBLKS( - mp, NULL, ip, -((long)del.br_blockcount), 0, - isrt ? XFS_QMOPT_RES_RTBLKS : + /* Update realtim/data freespace, unreserve quota */ + if (isrt) { + xfs_filblks_t rtexts; + + rtexts = XFS_FSB_TO_B(mp, del.br_blockcount); + do_div(rtexts, mp->m_sb.sb_rextsize); + xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS, + (int)rtexts, rsvd); + XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, NULL, ip, + -((long)del.br_blockcount), 0, + XFS_QMOPT_RES_RTBLKS); + } else { + xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS, + (int)del.br_blockcount, rsvd); + XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, NULL, ip, + -((long)del.br_blockcount), 0, XFS_QMOPT_RES_REGBLKS); + } ip->i_delayed_blks -= del.br_blockcount; if (cur) cur->bc_private.b.flags |= @@ -5714,7 +5743,7 @@ unlock_and_return: * blocks at the end of the file which do not start at the previous data block, * we will try to align the new blocks at stripe unit boundaries. */ -int /* error */ +STATIC int /* error */ xfs_bmap_isaeof( xfs_inode_t *ip, /* incore inode pointer */ xfs_fileoff_t off, /* file offset in fsblocks */ diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h index f1bc22f..e6d22ec 100644 --- a/fs/xfs/xfs_bmap.h +++ b/fs/xfs/xfs_bmap.h @@ -332,19 +332,6 @@ xfs_getbmap( int iflags); /* interface flags */ /* - * Check the last inode extent to determine whether this allocation will result - * in blocks being allocated at the end of the file. When we allocate new data - * blocks at the end of the file which do not start at the previous data block, - * we will try to align the new blocks at stripe unit boundaries. - */ -int -xfs_bmap_isaeof( - struct xfs_inode *ip, - xfs_fileoff_t off, - int whichfork, - char *aeof); - -/* * Check if the endoff is outside the last extent. If so the caller will grow * the allocation to a stripe unit boundary */ diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index 163305a..09c4135 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c @@ -2331,20 +2331,6 @@ xfs_bmbt_lookup_ge( return xfs_bmbt_lookup(cur, XFS_LOOKUP_GE, stat); } -int /* error */ -xfs_bmbt_lookup_le( - xfs_btree_cur_t *cur, - xfs_fileoff_t off, - xfs_fsblock_t bno, - xfs_filblks_t len, - int *stat) /* success/failure */ -{ - cur->bc_rec.b.br_startoff = off; - cur->bc_rec.b.br_startblock = bno; - cur->bc_rec.b.br_blockcount = len; - return xfs_bmbt_lookup(cur, XFS_LOOKUP_LE, stat); -} - /* * Give the bmap btree a new root block. Copy the old broot contents * down into a real block and make the broot point to it. diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h index 843ff12..0a40cf1 100644 --- a/fs/xfs/xfs_bmap_btree.h +++ b/fs/xfs/xfs_bmap_btree.h @@ -580,14 +580,6 @@ xfs_bmbt_lookup_ge( xfs_filblks_t, int *); -int -xfs_bmbt_lookup_le( - struct xfs_btree_cur *, - xfs_fileoff_t, - xfs_fsblock_t, - xfs_filblks_t, - int *); - /* * Give the bmap btree a new root block. Copy the old broot contents * down into a real block and make the broot point to it. diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index 9dd22dd..0cc63d6 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c @@ -90,6 +90,16 @@ xfs_btree_maxrecs( */ /* + * Retrieve the block pointer from the cursor at the given level. + * This may be a bmap btree root or from a buffer. + */ +STATIC xfs_btree_block_t * /* generic btree block pointer */ +xfs_btree_get_block( + xfs_btree_cur_t *cur, /* btree cursor */ + int level, /* level in btree */ + struct xfs_buf **bpp); /* buffer containing the block */ + +/* * Checking routine: return maxrecs for the block. */ STATIC int /* number of records fitting in block */ @@ -497,7 +507,7 @@ xfs_btree_firstrec( * Retrieve the block pointer from the cursor at the given level. * This may be a bmap btree root or from a buffer. */ -xfs_btree_block_t * /* generic btree block pointer */ +STATIC xfs_btree_block_t * /* generic btree block pointer */ xfs_btree_get_block( xfs_btree_cur_t *cur, /* btree cursor */ int level, /* level in btree */ diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index 93872bb..09b4e15 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -325,16 +325,6 @@ xfs_btree_firstrec( int level); /* level to change */ /* - * Retrieve the block pointer from the cursor at the given level. - * This may be a bmap btree root or from a buffer. - */ -xfs_btree_block_t * /* generic btree block pointer */ -xfs_btree_get_block( - xfs_btree_cur_t *cur, /* btree cursor */ - int level, /* level in btree */ - struct xfs_buf **bpp); /* buffer containing the block */ - -/* * Get a buffer for the block, return it with no data read. * Long-form addressing. */ diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 9ab0039..30b8285 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -172,7 +172,7 @@ STATIC void xfs_buf_do_callbacks(xfs_buf_t *bp, xfs_log_item_t *lip); * * If the XFS_BLI_STALE flag has been set, then log nothing. */ -uint +STATIC uint xfs_buf_item_size( xfs_buf_log_item_t *bip) { @@ -240,7 +240,7 @@ xfs_buf_item_size( * format structure, and the rest point to contiguous chunks * within the buffer. */ -void +STATIC void xfs_buf_item_format( xfs_buf_log_item_t *bip, xfs_log_iovec_t *log_vector) @@ -365,7 +365,7 @@ xfs_buf_item_format( * item in memory so it cannot be written out. Simply call bpin() * on the buffer to do this. */ -void +STATIC void xfs_buf_item_pin( xfs_buf_log_item_t *bip) { @@ -391,7 +391,7 @@ xfs_buf_item_pin( * If the XFS_BLI_STALE flag is set and we are the last reference, * then free up the buf log item and unlock the buffer. */ -void +STATIC void xfs_buf_item_unpin( xfs_buf_log_item_t *bip, int stale) @@ -446,7 +446,7 @@ xfs_buf_item_unpin( * so we need to free the item's descriptor (that points to the item) * in the transaction. */ -void +STATIC void xfs_buf_item_unpin_remove( xfs_buf_log_item_t *bip, xfs_trans_t *tp) @@ -493,7 +493,7 @@ xfs_buf_item_unpin_remove( * the lock right away, return 0. If we can get the lock, pull the * buffer from the free list, mark it busy, and return 1. */ -uint +STATIC uint xfs_buf_item_trylock( xfs_buf_log_item_t *bip) { @@ -537,7 +537,7 @@ xfs_buf_item_trylock( * This is for support of xfs_trans_bhold(). Make sure the * XFS_BLI_HOLD field is cleared if we don't free the item. */ -void +STATIC void xfs_buf_item_unlock( xfs_buf_log_item_t *bip) { @@ -635,7 +635,7 @@ xfs_buf_item_unlock( * by returning the original lsn of that transaction here rather than * the current one. */ -xfs_lsn_t +STATIC xfs_lsn_t xfs_buf_item_committed( xfs_buf_log_item_t *bip, xfs_lsn_t lsn) @@ -654,7 +654,7 @@ xfs_buf_item_committed( * and have aborted this transaction, we'll trap this buffer when it tries to * get written out. */ -void +STATIC void xfs_buf_item_abort( xfs_buf_log_item_t *bip) { @@ -674,7 +674,7 @@ xfs_buf_item_abort( * B_DELWRI set, then get it going out to disk with a call to bawrite(). * If not, then just release the buffer. */ -void +STATIC void xfs_buf_item_push( xfs_buf_log_item_t *bip) { @@ -693,7 +693,7 @@ xfs_buf_item_push( } /* ARGSUSED */ -void +STATIC void xfs_buf_item_committing(xfs_buf_log_item_t *bip, xfs_lsn_t commit_lsn) { } @@ -701,7 +701,7 @@ xfs_buf_item_committing(xfs_buf_log_item_t *bip, xfs_lsn_t commit_lsn) /* * This is the ops vector shared by all buf log items. */ -struct xfs_item_ops xfs_buf_item_ops = { +STATIC struct xfs_item_ops xfs_buf_item_ops = { .iop_size = (uint(*)(xfs_log_item_t*))xfs_buf_item_size, .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*)) xfs_buf_item_format, diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h index 5f1b0c9..01aed5f 100644 --- a/fs/xfs/xfs_buf_item.h +++ b/fs/xfs/xfs_buf_item.h @@ -80,7 +80,7 @@ typedef struct xfs_buf_log_format_t { * user or group dquots and may require special recovery handling. */ #define XFS_BLI_UDQUOT_BUF 0x4 -/* #define XFS_BLI_PDQUOT_BUF 0x8 */ +#define XFS_BLI_PDQUOT_BUF 0x8 #define XFS_BLI_GDQUOT_BUF 0x10 #define XFS_BLI_CHUNK 128 diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c index d7fe288..8b792dd 100644 --- a/fs/xfs/xfs_da_btree.c +++ b/fs/xfs/xfs_da_btree.c @@ -113,7 +113,10 @@ STATIC void xfs_da_node_unbalance(xfs_da_state_t *state, STATIC uint xfs_da_node_lasthash(xfs_dabuf_t *bp, int *count); STATIC int xfs_da_node_order(xfs_dabuf_t *node1_bp, xfs_dabuf_t *node2_bp); STATIC xfs_dabuf_t *xfs_da_buf_make(int nbuf, xfs_buf_t **bps, inst_t *ra); - +STATIC int xfs_da_blk_unlink(xfs_da_state_t *state, + xfs_da_state_blk_t *drop_blk, + xfs_da_state_blk_t *save_blk); +STATIC void xfs_da_state_kill_altpath(xfs_da_state_t *state); /*======================================================================== * Routines used for growing the Btree. @@ -1424,7 +1427,7 @@ xfs_da_node_lasthash(xfs_dabuf_t *bp, int *count) /* * Unlink a block from a doubly linked list of blocks. */ -int /* error */ +STATIC int /* error */ xfs_da_blk_unlink(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk, xfs_da_state_blk_t *save_blk) { @@ -2381,7 +2384,7 @@ xfs_da_state_alloc(void) /* * Kill the altpath contents of a da-state structure. */ -void +STATIC void xfs_da_state_kill_altpath(xfs_da_state_t *state) { int i; diff --git a/fs/xfs/xfs_da_btree.h b/fs/xfs/xfs_da_btree.h index 9fc699d..3a9b9e8 100644 --- a/fs/xfs/xfs_da_btree.h +++ b/fs/xfs/xfs_da_btree.h @@ -296,8 +296,6 @@ int xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path, /* * Utility routines. */ -int xfs_da_blk_unlink(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk, - xfs_da_state_blk_t *save_blk); int xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk, xfs_da_state_blk_t *new_blk); @@ -320,7 +318,6 @@ uint xfs_da_hashname(uchar_t *name_string, int name_length); uint xfs_da_log2_roundup(uint i); xfs_da_state_t *xfs_da_state_alloc(void); void xfs_da_state_free(xfs_da_state_t *state); -void xfs_da_state_kill_altpath(xfs_da_state_t *state); void xfs_da_buf_done(xfs_dabuf_t *dabuf); void xfs_da_log_buf(struct xfs_trans *tp, xfs_dabuf_t *dabuf, uint first, diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c index 63abdc2..681be5c 100644 --- a/fs/xfs/xfs_dfrag.c +++ b/fs/xfs/xfs_dfrag.c @@ -180,9 +180,10 @@ xfs_swapext( goto error0; } - if (VN_CACHED(tvp) != 0) - xfs_inval_cached_pages(XFS_ITOV(tip), &(tip->i_iocore), - (xfs_off_t)0, 0, 0); + if (VN_CACHED(tvp) != 0) { + xfs_inval_cached_trace(&tip->i_iocore, 0, -1, 0, -1); + VOP_FLUSHINVAL_PAGES(tvp, 0, -1, FI_REMAPF_LOCKED); + } /* Verify O_DIRECT for ftmp */ if (VN_CACHED(tvp) != 0) { diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/xfs_dir2_data.c index db9887a..a0aa0e4 100644 --- a/fs/xfs/xfs_dir2_data.c +++ b/fs/xfs/xfs_dir2_data.c @@ -304,7 +304,7 @@ xfs_dir2_data_freeinsert( /* * Remove a bestfree entry from the table. */ -void +STATIC void xfs_dir2_data_freeremove( xfs_dir2_data_t *d, /* data block pointer */ xfs_dir2_data_free_t *dfp, /* bestfree entry pointer */ diff --git a/fs/xfs/xfs_dir2_data.h b/fs/xfs/xfs_dir2_data.h index 3f02294..476cac9 100644 --- a/fs/xfs/xfs_dir2_data.h +++ b/fs/xfs/xfs_dir2_data.h @@ -193,10 +193,6 @@ extern xfs_dir2_data_free_t * xfs_dir2_data_unused_t *dup, int *loghead); extern void - xfs_dir2_data_freeremove(xfs_dir2_data_t *d, - xfs_dir2_data_free_t *dfp, int *loghead); - -extern void xfs_dir2_data_freescan(struct xfs_mount *mp, xfs_dir2_data_t *d, int *loghead, char *aendp); diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c index 262d1e8..056f528 100644 --- a/fs/xfs/xfs_dir2_leaf.c +++ b/fs/xfs/xfs_dir2_leaf.c @@ -77,6 +77,10 @@ static void xfs_dir2_leaf_check(xfs_inode_t *dp, xfs_dabuf_t *bp); #endif static int xfs_dir2_leaf_lookup_int(xfs_da_args_t *args, xfs_dabuf_t **lbpp, int *indexp, xfs_dabuf_t **dbpp); +static void xfs_dir2_leaf_log_bests(struct xfs_trans *tp, struct xfs_dabuf *bp, + int first, int last); +static void xfs_dir2_leaf_log_tail(struct xfs_trans *tp, struct xfs_dabuf *bp); + /* * Convert a block form directory to a leaf form directory. @@ -1214,7 +1218,7 @@ xfs_dir2_leaf_init( /* * Log the bests entries indicated from a leaf1 block. */ -void +static void xfs_dir2_leaf_log_bests( xfs_trans_t *tp, /* transaction pointer */ xfs_dabuf_t *bp, /* leaf buffer */ @@ -1278,7 +1282,7 @@ xfs_dir2_leaf_log_header( /* * Log the tail of the leaf1 block. */ -void +STATIC void xfs_dir2_leaf_log_tail( xfs_trans_t *tp, /* transaction pointer */ xfs_dabuf_t *bp) /* leaf buffer */ diff --git a/fs/xfs/xfs_dir2_leaf.h b/fs/xfs/xfs_dir2_leaf.h index 7f20eee..3303cd6 100644 --- a/fs/xfs/xfs_dir2_leaf.h +++ b/fs/xfs/xfs_dir2_leaf.h @@ -330,15 +330,8 @@ extern void int first, int last); extern void - xfs_dir2_leaf_log_bests(struct xfs_trans *tp, struct xfs_dabuf *bp, - int first, int last); - -extern void xfs_dir2_leaf_log_header(struct xfs_trans *tp, struct xfs_dabuf *bp); -extern void - xfs_dir2_leaf_log_tail(struct xfs_trans *tp, struct xfs_dabuf *bp); - extern int xfs_dir2_leaf_lookup(struct xfs_da_args *args); diff --git a/fs/xfs/xfs_dir_leaf.c b/fs/xfs/xfs_dir_leaf.c index 617018d..c2ea617 100644 --- a/fs/xfs/xfs_dir_leaf.c +++ b/fs/xfs/xfs_dir_leaf.c @@ -91,6 +91,10 @@ STATIC int xfs_dir_leaf_figure_balance(xfs_da_state_t *state, int *number_entries_in_blk1, int *number_namebytes_in_blk1); +STATIC int xfs_dir_leaf_create(struct xfs_da_args *args, + xfs_dablk_t which_block, + struct xfs_dabuf **bpp); + /* * Utility routines. */ @@ -781,7 +785,7 @@ xfs_dir_leaf_to_node(xfs_da_args_t *args) * Create the initial contents of a leaf directory * or a leaf in a node directory. */ -int +STATIC int xfs_dir_leaf_create(xfs_da_args_t *args, xfs_dablk_t blkno, xfs_dabuf_t **bpp) { xfs_dir_leafblock_t *leaf; diff --git a/fs/xfs/xfs_dir_leaf.h b/fs/xfs/xfs_dir_leaf.h index 00d68d3..dd423ce 100644 --- a/fs/xfs/xfs_dir_leaf.h +++ b/fs/xfs/xfs_dir_leaf.h @@ -202,8 +202,6 @@ int xfs_dir_leaf_to_shortform(struct xfs_da_args *args); /* * Routines used for growing the Btree. */ -int xfs_dir_leaf_create(struct xfs_da_args *args, xfs_dablk_t which_block, - struct xfs_dabuf **bpp); int xfs_dir_leaf_split(struct xfs_da_state *state, struct xfs_da_state_blk *oldblk, struct xfs_da_state_blk *newblk); diff --git a/fs/xfs/xfs_dmapi.h b/fs/xfs/xfs_dmapi.h index 55ae3e6..55c17ad 100644 --- a/fs/xfs/xfs_dmapi.h +++ b/fs/xfs/xfs_dmapi.h @@ -166,27 +166,32 @@ typedef enum { #define DM_FLAGS_NDELAY 0x001 /* return EAGAIN after dm_pending() */ #define DM_FLAGS_UNWANTED 0x002 /* event not in fsys dm_eventset_t */ #define DM_FLAGS_ISEM 0x004 /* thread holds i_sem */ -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) -#if LINUX_VERSION_CODE > KERNEL_VERSION(2,4,21) -/* i_alloc_sem was added in 2.4.22-pre1 */ #define DM_FLAGS_IALLOCSEM_RD 0x010 /* thread holds i_alloc_sem rd */ #define DM_FLAGS_IALLOCSEM_WR 0x020 /* thread holds i_alloc_sem wr */ -#endif -#endif /* * Based on IO_ISDIRECT, decide which i_ flag is set. */ -#ifdef DM_FLAGS_IALLOCSEM_RD +#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,0) +#define DM_SEM_FLAG_RD(ioflags) (((ioflags) & IO_ISDIRECT) ? \ + DM_FLAGS_ISEM : 0) +#define DM_SEM_FLAG_WR (DM_FLAGS_IALLOCSEM_WR | DM_FLAGS_ISEM) +#endif + +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)) && \ + (LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,22)) #define DM_SEM_FLAG_RD(ioflags) (((ioflags) & IO_ISDIRECT) ? \ DM_FLAGS_IALLOCSEM_RD : DM_FLAGS_ISEM) #define DM_SEM_FLAG_WR (DM_FLAGS_IALLOCSEM_WR | DM_FLAGS_ISEM) -#else +#endif + +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,4,21) #define DM_SEM_FLAG_RD(ioflags) (((ioflags) & IO_ISDIRECT) ? \ 0 : DM_FLAGS_ISEM) #define DM_SEM_FLAG_WR (DM_FLAGS_ISEM) #endif + /* * Macros to turn caller specified delay/block flags into * dm_send_xxxx_event flag DM_FLAGS_NDELAY. diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c index bbe1dea..dcd3fdd 100644 --- a/fs/xfs/xfs_error.c +++ b/fs/xfs/xfs_error.c @@ -280,7 +280,7 @@ xfs_error_report( } } -void +STATIC void xfs_hex_dump(void *p, int length) { __uint8_t *uip = (__uint8_t*)p; diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h index 6bc0535..52ee2b9 100644 --- a/fs/xfs/xfs_error.h +++ b/fs/xfs/xfs_error.h @@ -73,9 +73,6 @@ xfs_corruption_error( int linenum, inst_t *ra); -extern void -xfs_hex_dump(void *p, int length); - #define XFS_ERROR_REPORT(e, lvl, mp) \ xfs_error_report(e, lvl, mp, __FILE__, __LINE__, __return_address) #define XFS_CORRUPTION_ERROR(e, lvl, mp, mem) \ diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index 5eafd5b..db7cbd1 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -59,6 +59,18 @@ STATIC void xfs_efi_item_abort(xfs_efi_log_item_t *); STATIC void xfs_efd_item_abort(xfs_efd_log_item_t *); +void +xfs_efi_item_free(xfs_efi_log_item_t *efip) +{ + int nexts = efip->efi_format.efi_nextents; + + if (nexts > XFS_EFI_MAX_FAST_EXTENTS) { + kmem_free(efip, sizeof(xfs_efi_log_item_t) + + (nexts - 1) * sizeof(xfs_extent_t)); + } else { + kmem_zone_free(xfs_efi_zone, efip); + } +} /* * This returns the number of iovecs needed to log the given efi item. @@ -120,8 +132,6 @@ xfs_efi_item_pin(xfs_efi_log_item_t *efip) STATIC void xfs_efi_item_unpin(xfs_efi_log_item_t *efip, int stale) { - int nexts; - int size; xfs_mount_t *mp; SPLDECL(s); @@ -132,21 +142,11 @@ xfs_efi_item_unpin(xfs_efi_log_item_t *efip, int stale) * xfs_trans_delete_ail() drops the AIL lock. */ xfs_trans_delete_ail(mp, (xfs_log_item_t *)efip, s); - - nexts = efip->efi_format.efi_nextents; - if (nexts > XFS_EFI_MAX_FAST_EXTENTS) { - size = sizeof(xfs_efi_log_item_t); - size += (nexts - 1) * sizeof(xfs_extent_t); - kmem_free(efip, size); - } else { - kmem_zone_free(xfs_efi_zone, efip); - } + xfs_efi_item_free(efip); } else { efip->efi_flags |= XFS_EFI_COMMITTED; AIL_UNLOCK(mp, s); } - - return; } /* @@ -159,8 +159,6 @@ xfs_efi_item_unpin(xfs_efi_log_item_t *efip, int stale) STATIC void xfs_efi_item_unpin_remove(xfs_efi_log_item_t *efip, xfs_trans_t *tp) { - int nexts; - int size; xfs_mount_t *mp; xfs_log_item_desc_t *lidp; SPLDECL(s); @@ -178,23 +176,11 @@ xfs_efi_item_unpin_remove(xfs_efi_log_item_t *efip, xfs_trans_t *tp) * xfs_trans_delete_ail() drops the AIL lock. */ xfs_trans_delete_ail(mp, (xfs_log_item_t *)efip, s); - /* - * now free the item itself - */ - nexts = efip->efi_format.efi_nextents; - if (nexts > XFS_EFI_MAX_FAST_EXTENTS) { - size = sizeof(xfs_efi_log_item_t); - size += (nexts - 1) * sizeof(xfs_extent_t); - kmem_free(efip, size); - } else { - kmem_zone_free(xfs_efi_zone, efip); - } + xfs_efi_item_free(efip); } else { efip->efi_flags |= XFS_EFI_COMMITTED; AIL_UNLOCK(mp, s); } - - return; } /* @@ -245,18 +231,7 @@ xfs_efi_item_committed(xfs_efi_log_item_t *efip, xfs_lsn_t lsn) STATIC void xfs_efi_item_abort(xfs_efi_log_item_t *efip) { - int nexts; - int size; - - nexts = efip->efi_format.efi_nextents; - if (nexts > XFS_EFI_MAX_FAST_EXTENTS) { - size = sizeof(xfs_efi_log_item_t); - size += (nexts - 1) * sizeof(xfs_extent_t); - kmem_free(efip, size); - } else { - kmem_zone_free(xfs_efi_zone, efip); - } - return; + xfs_efi_item_free(efip); } /* @@ -288,7 +263,7 @@ xfs_efi_item_committing(xfs_efi_log_item_t *efip, xfs_lsn_t lsn) /* * This is the ops vector shared by all efi log items. */ -struct xfs_item_ops xfs_efi_item_ops = { +STATIC struct xfs_item_ops xfs_efi_item_ops = { .iop_size = (uint(*)(xfs_log_item_t*))xfs_efi_item_size, .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*)) xfs_efi_item_format, @@ -355,8 +330,6 @@ xfs_efi_release(xfs_efi_log_item_t *efip, { xfs_mount_t *mp; int extents_left; - uint size; - int nexts; SPLDECL(s); mp = efip->efi_item.li_mountp; @@ -372,20 +345,10 @@ xfs_efi_release(xfs_efi_log_item_t *efip, * xfs_trans_delete_ail() drops the AIL lock. */ xfs_trans_delete_ail(mp, (xfs_log_item_t *)efip, s); + xfs_efi_item_free(efip); } else { AIL_UNLOCK(mp, s); } - - if (extents_left == 0) { - nexts = efip->efi_format.efi_nextents; - if (nexts > XFS_EFI_MAX_FAST_EXTENTS) { - size = sizeof(xfs_efi_log_item_t); - size += (nexts - 1) * sizeof(xfs_extent_t); - kmem_free(efip, size); - } else { - kmem_zone_free(xfs_efi_zone, efip); - } - } } /* @@ -398,8 +361,6 @@ STATIC void xfs_efi_cancel( xfs_efi_log_item_t *efip) { - int nexts; - int size; xfs_mount_t *mp; SPLDECL(s); @@ -410,26 +371,25 @@ xfs_efi_cancel( * xfs_trans_delete_ail() drops the AIL lock. */ xfs_trans_delete_ail(mp, (xfs_log_item_t *)efip, s); - - nexts = efip->efi_format.efi_nextents; - if (nexts > XFS_EFI_MAX_FAST_EXTENTS) { - size = sizeof(xfs_efi_log_item_t); - size += (nexts - 1) * sizeof(xfs_extent_t); - kmem_free(efip, size); - } else { - kmem_zone_free(xfs_efi_zone, efip); - } + xfs_efi_item_free(efip); } else { efip->efi_flags |= XFS_EFI_CANCELED; AIL_UNLOCK(mp, s); } - - return; } +STATIC void +xfs_efd_item_free(xfs_efd_log_item_t *efdp) +{ + int nexts = efdp->efd_format.efd_nextents; - - + if (nexts > XFS_EFD_MAX_FAST_EXTENTS) { + kmem_free(efdp, sizeof(xfs_efd_log_item_t) + + (nexts - 1) * sizeof(xfs_extent_t)); + } else { + kmem_zone_free(xfs_efd_zone, efdp); + } +} /* * This returns the number of iovecs needed to log the given efd item. @@ -533,9 +493,6 @@ xfs_efd_item_unlock(xfs_efd_log_item_t *efdp) STATIC xfs_lsn_t xfs_efd_item_committed(xfs_efd_log_item_t *efdp, xfs_lsn_t lsn) { - uint size; - int nexts; - /* * If we got a log I/O error, it's always the case that the LR with the * EFI got unpinned and freed before the EFD got aborted. @@ -543,15 +500,7 @@ xfs_efd_item_committed(xfs_efd_log_item_t *efdp, xfs_lsn_t lsn) if ((efdp->efd_item.li_flags & XFS_LI_ABORTED) == 0) xfs_efi_release(efdp->efd_efip, efdp->efd_format.efd_nextents); - nexts = efdp->efd_format.efd_nextents; - if (nexts > XFS_EFD_MAX_FAST_EXTENTS) { - size = sizeof(xfs_efd_log_item_t); - size += (nexts - 1) * sizeof(xfs_extent_t); - kmem_free(efdp, size); - } else { - kmem_zone_free(xfs_efd_zone, efdp); - } - + xfs_efd_item_free(efdp); return (xfs_lsn_t)-1; } @@ -565,9 +514,6 @@ xfs_efd_item_committed(xfs_efd_log_item_t *efdp, xfs_lsn_t lsn) STATIC void xfs_efd_item_abort(xfs_efd_log_item_t *efdp) { - int nexts; - int size; - /* * If we got a log I/O error, it's always the case that the LR with the * EFI got unpinned and freed before the EFD got aborted. So don't @@ -576,15 +522,7 @@ xfs_efd_item_abort(xfs_efd_log_item_t *efdp) if ((efdp->efd_item.li_flags & XFS_LI_ABORTED) == 0) xfs_efi_cancel(efdp->efd_efip); - nexts = efdp->efd_format.efd_nextents; - if (nexts > XFS_EFD_MAX_FAST_EXTENTS) { - size = sizeof(xfs_efd_log_item_t); - size += (nexts - 1) * sizeof(xfs_extent_t); - kmem_free(efdp, size); - } else { - kmem_zone_free(xfs_efd_zone, efdp); - } - return; + xfs_efd_item_free(efdp); } /* @@ -615,7 +553,7 @@ xfs_efd_item_committing(xfs_efd_log_item_t *efip, xfs_lsn_t lsn) /* * This is the ops vector shared by all efd log items. */ -struct xfs_item_ops xfs_efd_item_ops = { +STATIC struct xfs_item_ops xfs_efd_item_ops = { .iop_size = (uint(*)(xfs_log_item_t*))xfs_efd_item_size, .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*)) xfs_efd_item_format, diff --git a/fs/xfs/xfs_extfree_item.h b/fs/xfs/xfs_extfree_item.h index 7122d61..d433bac 100644 --- a/fs/xfs/xfs_extfree_item.h +++ b/fs/xfs/xfs_extfree_item.h @@ -118,6 +118,8 @@ xfs_efi_log_item_t *xfs_efi_init(struct xfs_mount *, uint); xfs_efd_log_item_t *xfs_efd_init(struct xfs_mount *, xfs_efi_log_item_t *, uint); +void xfs_efi_item_free(xfs_efi_log_item_t *); + #endif /* __KERNEL__ */ #endif /* __XFS_EXTFREE_ITEM_H__ */ diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h index 6ee8443..095af0a 100644 --- a/fs/xfs/xfs_fs.h +++ b/fs/xfs/xfs_fs.h @@ -60,7 +60,8 @@ struct fsxattr { __u32 fsx_xflags; /* xflags field value (get/set) */ __u32 fsx_extsize; /* extsize field value (get/set)*/ __u32 fsx_nextents; /* nextents field value (get) */ - unsigned char fsx_pad[16]; + __u32 fsx_projid; /* project identifier (get/set) */ + unsigned char fsx_pad[12]; }; #endif diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 2121305..ca535d6 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -559,32 +559,6 @@ xfs_reserve_blocks( return(0); } -void -xfs_fs_log_dummy(xfs_mount_t *mp) -{ - xfs_trans_t *tp; - xfs_inode_t *ip; - - - tp = _xfs_trans_alloc(mp, XFS_TRANS_DUMMY1); - atomic_inc(&mp->m_active_trans); - if (xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0)) { - xfs_trans_cancel(tp, 0); - return; - } - - ip = mp->m_rootip; - xfs_ilock(ip, XFS_ILOCK_EXCL); - - xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); - xfs_trans_ihold(tp, ip); - xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - xfs_trans_set_sync(tp); - xfs_trans_commit(tp, 0, NULL); - - xfs_iunlock(ip, XFS_ILOCK_EXCL); -} - int xfs_fs_goingdown( xfs_mount_t *mp, diff --git a/fs/xfs/xfs_ialloc_btree.h b/fs/xfs/xfs_ialloc_btree.h index 803c4d1..44be188 100644 --- a/fs/xfs/xfs_ialloc_btree.h +++ b/fs/xfs/xfs_ialloc_btree.h @@ -100,9 +100,13 @@ xfs_inofree_t xfs_inobt_mask(int i); #endif #if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INOBT_IS_FREE) int xfs_inobt_is_free(xfs_inobt_rec_t *rp, int i); -#define XFS_INOBT_IS_FREE(rp,i) xfs_inobt_is_free(rp,i) +#define XFS_INOBT_IS_FREE(rp,i) xfs_inobt_is_free(rp,i) +#define XFS_INOBT_IS_FREE_DISK(rp,i) xfs_inobt_is_free_disk(rp,i) #else -#define XFS_INOBT_IS_FREE(rp,i) (((rp)->ir_free & XFS_INOBT_MASK(i)) != 0) +#define XFS_INOBT_IS_FREE(rp,i) \ + (((rp)->ir_free & XFS_INOBT_MASK(i)) != 0) +#define XFS_INOBT_IS_FREE_DISK(rp,i) \ + ((INT_GET((rp)->ir_free, ARCH_CONVERT) & XFS_INOBT_MASK(i)) != 0) #endif #if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INOBT_SET_FREE) void xfs_inobt_set_free(xfs_inobt_rec_t *rp, int i); diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index bc8c8c7..34bdf59 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -146,51 +146,6 @@ xfs_inobp_check( #endif /* - * called from bwrite on xfs inode buffers - */ -void -xfs_inobp_bwcheck(xfs_buf_t *bp) -{ - xfs_mount_t *mp; - int i; - int j; - xfs_dinode_t *dip; - - ASSERT(XFS_BUF_FSPRIVATE3(bp, void *) != NULL); - - mp = XFS_BUF_FSPRIVATE3(bp, xfs_mount_t *); - - - j = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog; - - for (i = 0; i < j; i++) { - dip = (xfs_dinode_t *) xfs_buf_offset(bp, - i * mp->m_sb.sb_inodesize); - if (INT_GET(dip->di_core.di_magic, ARCH_CONVERT) != XFS_DINODE_MAGIC) { - cmn_err(CE_WARN, -"Bad magic # 0x%x in XFS inode buffer 0x%Lx, starting blockno %Ld, offset 0x%x", - INT_GET(dip->di_core.di_magic, ARCH_CONVERT), - (__uint64_t)(__psunsigned_t) bp, - (__int64_t) XFS_BUF_ADDR(bp), - xfs_buf_offset(bp, i * mp->m_sb.sb_inodesize)); - xfs_fs_cmn_err(CE_WARN, mp, - "corrupt, unmount and run xfs_repair"); - } - if (!dip->di_next_unlinked) { - cmn_err(CE_WARN, -"Bad next_unlinked field (0) in XFS inode buffer 0x%p, starting blockno %Ld, offset 0x%x", - (__uint64_t)(__psunsigned_t) bp, - (__int64_t) XFS_BUF_ADDR(bp), - xfs_buf_offset(bp, i * mp->m_sb.sb_inodesize)); - xfs_fs_cmn_err(CE_WARN, mp, - "corrupt, unmount and run xfs_repair"); - } - } - - return; -} - -/* * This routine is called to map an inode number within a file * system to the buffer containing the on-disk version of the * inode. It returns a pointer to the buffer containing the @@ -203,7 +158,7 @@ xfs_inobp_bwcheck(xfs_buf_t *bp) * Use xfs_imap() to determine the size and location of the * buffer to read from disk. */ -int +STATIC int xfs_inotobp( xfs_mount_t *mp, xfs_trans_t *tp, @@ -1247,26 +1202,32 @@ xfs_ialloc( case S_IFREG: case S_IFDIR: if (unlikely(pip->i_d.di_flags & XFS_DIFLAG_ANY)) { - if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT) { - if ((mode & S_IFMT) == S_IFDIR) { - ip->i_d.di_flags |= XFS_DIFLAG_RTINHERIT; - } else { - ip->i_d.di_flags |= XFS_DIFLAG_REALTIME; + uint di_flags = 0; + + if ((mode & S_IFMT) == S_IFDIR) { + if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT) + di_flags |= XFS_DIFLAG_RTINHERIT; + } else { + if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT) { + di_flags |= XFS_DIFLAG_REALTIME; ip->i_iocore.io_flags |= XFS_IOCORE_RT; } } if ((pip->i_d.di_flags & XFS_DIFLAG_NOATIME) && xfs_inherit_noatime) - ip->i_d.di_flags |= XFS_DIFLAG_NOATIME; + di_flags |= XFS_DIFLAG_NOATIME; if ((pip->i_d.di_flags & XFS_DIFLAG_NODUMP) && xfs_inherit_nodump) - ip->i_d.di_flags |= XFS_DIFLAG_NODUMP; + di_flags |= XFS_DIFLAG_NODUMP; if ((pip->i_d.di_flags & XFS_DIFLAG_SYNC) && xfs_inherit_sync) - ip->i_d.di_flags |= XFS_DIFLAG_SYNC; + di_flags |= XFS_DIFLAG_SYNC; if ((pip->i_d.di_flags & XFS_DIFLAG_NOSYMLINKS) && xfs_inherit_nosymlinks) - ip->i_d.di_flags |= XFS_DIFLAG_NOSYMLINKS; + di_flags |= XFS_DIFLAG_NOSYMLINKS; + if (pip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) + di_flags |= XFS_DIFLAG_PROJINHERIT; + ip->i_d.di_flags |= di_flags; } /* FALLTHROUGH */ case S_IFLNK: @@ -2156,7 +2117,7 @@ static __inline__ int xfs_inode_clean(xfs_inode_t *ip) (ip->i_update_core == 0)); } -void +STATIC void xfs_ifree_cluster( xfs_inode_t *free_ip, xfs_trans_t *tp, @@ -2875,7 +2836,7 @@ xfs_iunpin( * be subsequently pinned once someone is waiting for it to be * unpinned. */ -void +STATIC void xfs_iunpin_wait( xfs_inode_t *ip) { @@ -3601,107 +3562,43 @@ corrupt_out: /* - * Flush all inactive inodes in mp. Return true if no user references - * were found, false otherwise. + * Flush all inactive inodes in mp. */ -int +void xfs_iflush_all( - xfs_mount_t *mp, - int flag) + xfs_mount_t *mp) { - int busy; - int done; - int purged; xfs_inode_t *ip; - vmap_t vmap; vnode_t *vp; - busy = done = 0; - while (!done) { - purged = 0; - XFS_MOUNT_ILOCK(mp); - ip = mp->m_inodes; - if (ip == NULL) { - break; - } - do { - /* Make sure we skip markers inserted by sync */ - if (ip->i_mount == NULL) { - ip = ip->i_mnext; - continue; - } - - /* - * It's up to our caller to purge the root - * and quota vnodes later. - */ - vp = XFS_ITOV_NULL(ip); - - if (!vp) { - XFS_MOUNT_IUNLOCK(mp); - xfs_finish_reclaim(ip, 0, XFS_IFLUSH_ASYNC); - purged = 1; - break; - } + again: + XFS_MOUNT_ILOCK(mp); + ip = mp->m_inodes; + if (ip == NULL) + goto out; - if (vn_count(vp) != 0) { - if (vn_count(vp) == 1 && - (ip == mp->m_rootip || - (mp->m_quotainfo && - (ip->i_ino == mp->m_sb.sb_uquotino || - ip->i_ino == mp->m_sb.sb_gquotino)))) { + do { + /* Make sure we skip markers inserted by sync */ + if (ip->i_mount == NULL) { + ip = ip->i_mnext; + continue; + } - ip = ip->i_mnext; - continue; - } - if (!(flag & XFS_FLUSH_ALL)) { - busy = 1; - done = 1; - break; - } - /* - * Ignore busy inodes but continue flushing - * others. - */ - ip = ip->i_mnext; - continue; - } - /* - * Sample vp mapping while holding mp locked on MP - * systems, so we don't purge a reclaimed or - * nonexistent vnode. We break from the loop - * since we know that we modify - * it by pulling ourselves from it in xfs_reclaim() - * called via vn_purge() below. Set ip to the next - * entry in the list anyway so we'll know below - * whether we reached the end or not. - */ - VMAP(vp, vmap); + vp = XFS_ITOV_NULL(ip); + if (!vp) { XFS_MOUNT_IUNLOCK(mp); + xfs_finish_reclaim(ip, 0, XFS_IFLUSH_ASYNC); + goto again; + } - vn_purge(vp, &vmap); + ASSERT(vn_count(vp) == 0); - purged = 1; - break; - } while (ip != mp->m_inodes); - /* - * We need to distinguish between when we exit the loop - * after a purge and when we simply hit the end of the - * list. We can't use the (ip == mp->m_inodes) test, - * because when we purge an inode at the start of the list - * the next inode on the list becomes mp->m_inodes. That - * would cause such a test to bail out early. The purged - * variable tells us how we got out of the loop. - */ - if (!purged) { - done = 1; - } - } + ip = ip->i_mnext; + } while (ip != mp->m_inodes); + out: XFS_MOUNT_IUNLOCK(mp); - return !busy; } - /* * xfs_iaccess: check accessibility of inode for mode. */ diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 37e1c31..54d9e54 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -412,11 +412,6 @@ void xfs_ifork_next_set(xfs_inode_t *ip, int w, int n); #define XFS_IFLUSH_DELWRI 5 /* - * Flags for xfs_iflush_all. - */ -#define XFS_FLUSH_ALL 0x1 - -/* * Flags for xfs_itruncate_start(). */ #define XFS_ITRUNC_DEFINITE 0x1 @@ -487,8 +482,6 @@ int xfs_finish_reclaim_all(struct xfs_mount *, int); /* * xfs_inode.c prototypes. */ -int xfs_inotobp(struct xfs_mount *, struct xfs_trans *, xfs_ino_t, - xfs_dinode_t **, struct xfs_buf **, int *); int xfs_itobp(struct xfs_mount *, struct xfs_trans *, xfs_inode_t *, xfs_dinode_t **, struct xfs_buf **, xfs_daddr_t); @@ -522,7 +515,7 @@ void xfs_ipin(xfs_inode_t *); void xfs_iunpin(xfs_inode_t *); int xfs_iextents_copy(xfs_inode_t *, xfs_bmbt_rec_t *, int); int xfs_iflush(xfs_inode_t *, uint); -int xfs_iflush_all(struct xfs_mount *, int); +void xfs_iflush_all(struct xfs_mount *); int xfs_iaccess(xfs_inode_t *, mode_t, cred_t *); uint xfs_iroundup(uint); void xfs_ichgtime(xfs_inode_t *, int); diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 768cb18..0eed30f 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -910,7 +910,7 @@ xfs_inode_item_committing( /* * This is the ops vector shared by all buf log items. */ -struct xfs_item_ops xfs_inode_item_ops = { +STATIC struct xfs_item_ops xfs_inode_item_ops = { .iop_size = (uint(*)(xfs_log_item_t*))xfs_inode_item_size, .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*)) xfs_inode_item_format, diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 469e1a7..2edd676 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -385,15 +385,15 @@ xfs_iomap_write_direct( int nimaps, maps; int error; int bmapi_flag; + int quota_flag; int rt; xfs_trans_t *tp; xfs_bmbt_irec_t imap[XFS_WRITE_IMAPS], *imapp; xfs_bmap_free_t free_list; int aeof; - xfs_filblks_t datablocks; + xfs_filblks_t datablocks, qblocks, resblks; int committed; int numrtextents; - uint resblks; /* * Make sure that the dquots are there. This doesn't hold @@ -419,7 +419,6 @@ xfs_iomap_write_direct( xfs_fileoff_t map_last_fsb; map_last_fsb = ret_imap->br_blockcount + ret_imap->br_startoff; - if (map_last_fsb < last_fsb) { last_fsb = map_last_fsb; count_fsb = last_fsb - offset_fsb; @@ -428,56 +427,47 @@ xfs_iomap_write_direct( } /* - * determine if reserving space on - * the data or realtime partition. + * Determine if reserving space on the data or realtime partition. */ if ((rt = XFS_IS_REALTIME_INODE(ip))) { - int sbrtextsize, iprtextsize; + xfs_extlen_t extsz; - sbrtextsize = mp->m_sb.sb_rextsize; - iprtextsize = - ip->i_d.di_extsize ? ip->i_d.di_extsize : sbrtextsize; - numrtextents = (count_fsb + iprtextsize - 1); - do_div(numrtextents, sbrtextsize); + if (!(extsz = ip->i_d.di_extsize)) + extsz = mp->m_sb.sb_rextsize; + numrtextents = qblocks = (count_fsb + extsz - 1); + do_div(numrtextents, mp->m_sb.sb_rextsize); + quota_flag = XFS_QMOPT_RES_RTBLKS; datablocks = 0; } else { - datablocks = count_fsb; + datablocks = qblocks = count_fsb; + quota_flag = XFS_QMOPT_RES_REGBLKS; numrtextents = 0; } /* - * allocate and setup the transaction + * Allocate and setup the transaction */ xfs_iunlock(ip, XFS_ILOCK_EXCL); tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); - resblks = XFS_DIOSTRAT_SPACE_RES(mp, datablocks); - error = xfs_trans_reserve(tp, resblks, XFS_WRITE_LOG_RES(mp), numrtextents, XFS_TRANS_PERM_LOG_RES, XFS_WRITE_LOG_COUNT); /* - * check for running out of space + * Check for running out of space, note: need lock to return */ if (error) - /* - * Free the transaction structure. - */ xfs_trans_cancel(tp, 0); - xfs_ilock(ip, XFS_ILOCK_EXCL); - if (error) - goto error_out; /* Don't return in above if .. trans .., - need lock to return */ + goto error_out; - if (XFS_TRANS_RESERVE_BLKQUOTA(mp, tp, ip, resblks)) { + if (XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip, qblocks, 0, quota_flag)) { error = (EDQUOT); goto error1; } - nimaps = 1; bmapi_flag = XFS_BMAPI_WRITE; xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); @@ -487,31 +477,29 @@ xfs_iomap_write_direct( bmapi_flag |= XFS_BMAPI_PREALLOC; /* - * issue the bmapi() call to allocate the blocks + * Issue the bmapi() call to allocate the blocks */ XFS_BMAP_INIT(&free_list, &firstfsb); + nimaps = 1; imapp = &imap[0]; error = xfs_bmapi(tp, ip, offset_fsb, count_fsb, bmapi_flag, &firstfsb, 0, imapp, &nimaps, &free_list); - if (error) { + if (error) goto error0; - } /* - * complete the transaction + * Complete the transaction */ - error = xfs_bmap_finish(&tp, &free_list, firstfsb, &committed); - if (error) { + if (error) goto error0; - } - error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); - if (error) { + if (error) goto error_out; - } - /* copy any maps to caller's array and return any error. */ + /* + * Copy any maps to caller's array and return any error. + */ if (nimaps == 0) { error = (ENOSPC); goto error_out; @@ -530,10 +518,11 @@ xfs_iomap_write_direct( } return 0; - error0: /* Cancel bmap, unlock inode, and cancel trans */ +error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */ xfs_bmap_cancel(&free_list); + XFS_TRANS_UNRESERVE_QUOTA_NBLKS(mp, tp, ip, qblocks, 0, quota_flag); - error1: /* Just cancel transaction */ +error1: /* Just cancel transaction */ xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); *nmaps = 0; /* nothing set-up here */ diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 092d5fb..1cd2ac1 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -134,7 +134,7 @@ STATIC void xlog_verify_tail_lsn(xlog_t *log, xlog_in_core_t *iclog, #define xlog_verify_tail_lsn(a,b,c) #endif -int xlog_iclogs_empty(xlog_t *log); +STATIC int xlog_iclogs_empty(xlog_t *log); #ifdef DEBUG int xlog_do_error = 0; @@ -1857,7 +1857,7 @@ xlog_write(xfs_mount_t * mp, * * State Change: DIRTY -> ACTIVE */ -void +STATIC void xlog_state_clean_log(xlog_t *log) { xlog_in_core_t *iclog; @@ -3542,7 +3542,7 @@ xfs_log_force_umount( return (retval); } -int +STATIC int xlog_iclogs_empty(xlog_t *log) { xlog_in_core_t *iclog; diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index c31e3ce..1a1d452 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -535,7 +535,6 @@ typedef struct log { /* common routines */ extern xfs_lsn_t xlog_assign_tail_lsn(struct xfs_mount *mp); -extern int xlog_find_head(xlog_t *log, xfs_daddr_t *head_blk); extern int xlog_find_tail(xlog_t *log, xfs_daddr_t *head_blk, xfs_daddr_t *tail_blk, @@ -548,7 +547,6 @@ extern void xlog_recover_process_iunlinks(xlog_t *log); extern struct xfs_buf *xlog_get_bp(xlog_t *, int); extern void xlog_put_bp(struct xfs_buf *); extern int xlog_bread(xlog_t *, xfs_daddr_t, int, struct xfs_buf *); -extern xfs_caddr_t xlog_align(xlog_t *, xfs_daddr_t, int, struct xfs_buf *); /* iclog tracing */ #define XLOG_TRACE_GRAB_FLUSH 1 diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 9824b5b..0aac28d 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -148,7 +148,7 @@ xlog_bread( * The buffer is kept locked across the write and is returned locked. * This can only be used for synchronous log writes. */ -int +STATIC int xlog_bwrite( xlog_t *log, xfs_daddr_t blk_no, @@ -179,7 +179,7 @@ xlog_bwrite( return error; } -xfs_caddr_t +STATIC xfs_caddr_t xlog_align( xlog_t *log, xfs_daddr_t blk_no, @@ -528,7 +528,7 @@ out: * * Return: zero if normal, non-zero if error. */ -int +STATIC int xlog_find_head( xlog_t *log, xfs_daddr_t *return_head_blk) @@ -1964,7 +1964,8 @@ xlog_recover_do_reg_buffer( * probably a good thing to do for other buf types also. */ error = 0; - if (buf_f->blf_flags & (XFS_BLI_UDQUOT_BUF|XFS_BLI_GDQUOT_BUF)) { + if (buf_f->blf_flags & + (XFS_BLI_UDQUOT_BUF|XFS_BLI_PDQUOT_BUF|XFS_BLI_GDQUOT_BUF)) { error = xfs_qm_dqcheck((xfs_disk_dquot_t *) item->ri_buf[i].i_addr, -1, 0, XFS_QMOPT_DOWARN, @@ -2030,6 +2031,7 @@ xfs_qm_dqcheck( } if (INT_GET(ddq->d_flags, ARCH_CONVERT) != XFS_DQ_USER && + INT_GET(ddq->d_flags, ARCH_CONVERT) != XFS_DQ_PROJ && INT_GET(ddq->d_flags, ARCH_CONVERT) != XFS_DQ_GROUP) { if (flags & XFS_QMOPT_DOWARN) cmn_err(CE_ALERT, @@ -2135,6 +2137,8 @@ xlog_recover_do_dquot_buffer( type = 0; if (buf_f->blf_flags & XFS_BLI_UDQUOT_BUF) type |= XFS_DQ_USER; + if (buf_f->blf_flags & XFS_BLI_PDQUOT_BUF) + type |= XFS_DQ_PROJ; if (buf_f->blf_flags & XFS_BLI_GDQUOT_BUF) type |= XFS_DQ_GROUP; /* @@ -2247,7 +2251,8 @@ xlog_recover_do_buffer_trans( error = 0; if (flags & XFS_BLI_INODE_BUF) { error = xlog_recover_do_inode_buffer(mp, item, bp, buf_f); - } else if (flags & (XFS_BLI_UDQUOT_BUF | XFS_BLI_GDQUOT_BUF)) { + } else if (flags & + (XFS_BLI_UDQUOT_BUF|XFS_BLI_PDQUOT_BUF|XFS_BLI_GDQUOT_BUF)) { xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f); } else { xlog_recover_do_reg_buffer(mp, item, bp, buf_f); @@ -2619,7 +2624,7 @@ xlog_recover_do_dquot_trans( * This type of quotas was turned off, so ignore this record. */ type = INT_GET(recddq->d_flags, ARCH_CONVERT) & - (XFS_DQ_USER | XFS_DQ_GROUP); + (XFS_DQ_USER | XFS_DQ_PROJ | XFS_DQ_GROUP); ASSERT(type); if (log->l_quotaoffs_flag & type) return (0); @@ -2742,7 +2747,6 @@ xlog_recover_do_efd_trans( xfs_efi_log_item_t *efip = NULL; xfs_log_item_t *lip; int gen; - int nexts; __uint64_t efi_id; SPLDECL(s); @@ -2777,22 +2781,15 @@ xlog_recover_do_efd_trans( } lip = xfs_trans_next_ail(mp, lip, &gen, NULL); } - if (lip == NULL) { - AIL_UNLOCK(mp, s); - } /* * If we found it, then free it up. If it wasn't there, it * must have been overwritten in the log. Oh well. */ if (lip != NULL) { - nexts = efip->efi_format.efi_nextents; - if (nexts > XFS_EFI_MAX_FAST_EXTENTS) { - kmem_free(lip, sizeof(xfs_efi_log_item_t) + - ((nexts - 1) * sizeof(xfs_extent_t))); - } else { - kmem_zone_free(xfs_efi_zone, efip); - } + xfs_efi_item_free(efip); + } else { + AIL_UNLOCK(mp, s); } } diff --git a/fs/xfs/xfs_macros.c b/fs/xfs/xfs_macros.c index ce4f46c..698c2cd 100644 --- a/fs/xfs/xfs_macros.c +++ b/fs/xfs/xfs_macros.c @@ -1658,6 +1658,11 @@ xfs_inobt_is_free(xfs_inobt_rec_t *rp, int i) { return XFS_INOBT_IS_FREE(rp, i); } +int +xfs_inobt_is_free_disk(xfs_inobt_rec_t *rp, int i) +{ + return XFS_INOBT_IS_FREE_DISK(rp, i); +} #endif #if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_INOBT_IS_LAST_REC) diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 2ec967d..82e1646 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -64,6 +64,7 @@ STATIC void xfs_mount_log_sbunit(xfs_mount_t *, __int64_t); STATIC int xfs_uuid_mount(xfs_mount_t *); STATIC void xfs_uuid_unmount(xfs_mount_t *mp); +STATIC void xfs_unmountfs_wait(xfs_mount_t *); static struct { short offset; @@ -555,7 +556,7 @@ xfs_readsb(xfs_mount_t *mp) * fields from the superblock associated with the given * mount structure */ -void +STATIC void xfs_mount_common(xfs_mount_t *mp, xfs_sb_t *sbp) { int i; @@ -1081,7 +1082,7 @@ xfs_unmountfs(xfs_mount_t *mp, struct cred *cr) int64_t fsid; #endif - xfs_iflush_all(mp, XFS_FLUSH_ALL); + xfs_iflush_all(mp); XFS_QM_DQPURGEALL(mp, XFS_QMOPT_UQUOTA | XFS_QMOPT_GQUOTA | XFS_QMOPT_UMOUNTING); @@ -1111,15 +1112,6 @@ xfs_unmountfs(xfs_mount_t *mp, struct cred *cr) */ ASSERT(mp->m_inodes == NULL); - /* - * We may have bufs that are in the process of getting written still. - * We must wait for the I/O completion of those. The sync flag here - * does a two pass iteration thru the bufcache. - */ - if (XFS_FORCED_SHUTDOWN(mp)) { - xfs_incore_relse(mp->m_ddev_targp, 0, 1); /* synchronous */ - } - xfs_unmountfs_close(mp, cr); if ((mp->m_flags & XFS_MOUNT_NOUUID) == 0) xfs_uuid_unmount(mp); @@ -1146,7 +1138,7 @@ xfs_unmountfs_close(xfs_mount_t *mp, struct cred *cr) xfs_free_buftarg(mp->m_ddev_targp, 0); } -void +STATIC void xfs_unmountfs_wait(xfs_mount_t *mp) { if (mp->m_logdev_targp != mp->m_ddev_targp) diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 30dd08f..5affba3 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -141,7 +141,7 @@ typedef int (*xfs_dqattach_t)(struct xfs_inode *, uint); typedef void (*xfs_dqdetach_t)(struct xfs_inode *); typedef int (*xfs_dqpurgeall_t)(struct xfs_mount *, uint); typedef int (*xfs_dqvopalloc_t)(struct xfs_mount *, - struct xfs_inode *, uid_t, gid_t, uint, + struct xfs_inode *, uid_t, gid_t, prid_t, uint, struct xfs_dquot **, struct xfs_dquot **); typedef void (*xfs_dqvopcreate_t)(struct xfs_trans *, struct xfs_inode *, struct xfs_dquot *, struct xfs_dquot *); @@ -185,8 +185,8 @@ typedef struct xfs_qmops { (*(mp)->m_qm_ops.xfs_dqdetach)(ip) #define XFS_QM_DQPURGEALL(mp, fl) \ (*(mp)->m_qm_ops.xfs_dqpurgeall)(mp, fl) -#define XFS_QM_DQVOPALLOC(mp, ip, uid, gid, fl, dq1, dq2) \ - (*(mp)->m_qm_ops.xfs_dqvopalloc)(mp, ip, uid, gid, fl, dq1, dq2) +#define XFS_QM_DQVOPALLOC(mp, ip, uid, gid, prid, fl, dq1, dq2) \ + (*(mp)->m_qm_ops.xfs_dqvopalloc)(mp, ip, uid, gid, prid, fl, dq1, dq2) #define XFS_QM_DQVOPCREATE(mp, tp, ip, dq1, dq2) \ (*(mp)->m_qm_ops.xfs_dqvopcreate)(tp, ip, dq1, dq2) #define XFS_QM_DQVOPRENAME(mp, ip) \ @@ -544,7 +544,6 @@ extern void xfs_mount_free(xfs_mount_t *mp, int remove_bhv); extern int xfs_mountfs(struct vfs *, xfs_mount_t *mp, int); extern int xfs_unmountfs(xfs_mount_t *, struct cred *); -extern void xfs_unmountfs_wait(xfs_mount_t *); extern void xfs_unmountfs_close(xfs_mount_t *, struct cred *); extern int xfs_unmountfs_writesb(xfs_mount_t *); extern int xfs_unmount_flush(xfs_mount_t *, int); diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h index 703ec4e..7134576 100644 --- a/fs/xfs/xfs_quota.h +++ b/fs/xfs/xfs_quota.h @@ -96,7 +96,7 @@ typedef struct xfs_dqblk { * flags for q_flags field in the dquot. */ #define XFS_DQ_USER 0x0001 /* a user quota */ -/* #define XFS_DQ_PROJ 0x0002 -- project quota (IRIX) */ +#define XFS_DQ_PROJ 0x0002 /* project quota */ #define XFS_DQ_GROUP 0x0004 /* a group quota */ #define XFS_DQ_FLOCKED 0x0008 /* flush lock taken */ #define XFS_DQ_DIRTY 0x0010 /* dquot is dirty */ @@ -104,6 +104,8 @@ typedef struct xfs_dqblk { #define XFS_DQ_INACTIVE 0x0040 /* dq off mplist & hashlist */ #define XFS_DQ_MARKER 0x0080 /* sentinel */ +#define XFS_DQ_ALLTYPES (XFS_DQ_USER|XFS_DQ_PROJ|XFS_DQ_GROUP) + /* * In the worst case, when both user and group quotas are on, * we can have a max of three dquots changing in a single transaction. @@ -124,7 +126,7 @@ typedef struct xfs_dqblk { typedef struct xfs_dq_logformat { __uint16_t qlf_type; /* dquot log item type */ __uint16_t qlf_size; /* size of this item */ - xfs_dqid_t qlf_id; /* usr/grp id number : 32 bits */ + xfs_dqid_t qlf_id; /* usr/grp/proj id : 32 bits */ __int64_t qlf_blkno; /* blkno of dquot buffer */ __int32_t qlf_len; /* len of dquot buffer */ __uint32_t qlf_boffset; /* off of dquot in buffer */ @@ -152,9 +154,9 @@ typedef struct xfs_qoff_logformat { #define XFS_UQUOTA_ACCT 0x0001 /* user quota accounting ON */ #define XFS_UQUOTA_ENFD 0x0002 /* user quota limits enforced */ #define XFS_UQUOTA_CHKD 0x0004 /* quotacheck run on usr quotas */ -#define XFS_PQUOTA_ACCT 0x0008 /* (IRIX) project quota accounting ON */ -#define XFS_GQUOTA_ENFD 0x0010 /* group quota limits enforced */ -#define XFS_GQUOTA_CHKD 0x0020 /* quotacheck run on grp quotas */ +#define XFS_PQUOTA_ACCT 0x0008 /* project quota accounting ON */ +#define XFS_OQUOTA_ENFD 0x0010 /* other (grp/prj) quota limits enforced */ +#define XFS_OQUOTA_CHKD 0x0020 /* quotacheck run on other (grp/prj) quotas */ #define XFS_GQUOTA_ACCT 0x0040 /* group quota accounting ON */ /* @@ -162,17 +164,22 @@ typedef struct xfs_qoff_logformat { * are in the process of getting turned off. These flags are in m_qflags but * never in sb_qflags. */ -#define XFS_UQUOTA_ACTIVE 0x0080 /* uquotas are being turned off */ -#define XFS_GQUOTA_ACTIVE 0x0100 /* gquotas are being turned off */ +#define XFS_UQUOTA_ACTIVE 0x0100 /* uquotas are being turned off */ +#define XFS_PQUOTA_ACTIVE 0x0200 /* pquotas are being turned off */ +#define XFS_GQUOTA_ACTIVE 0x0400 /* gquotas are being turned off */ /* * Checking XFS_IS_*QUOTA_ON() while holding any inode lock guarantees * quota will be not be switched off as long as that inode lock is held. */ #define XFS_IS_QUOTA_ON(mp) ((mp)->m_qflags & (XFS_UQUOTA_ACTIVE | \ - XFS_GQUOTA_ACTIVE)) + XFS_GQUOTA_ACTIVE | \ + XFS_PQUOTA_ACTIVE)) +#define XFS_IS_OQUOTA_ON(mp) ((mp)->m_qflags & (XFS_GQUOTA_ACTIVE | \ + XFS_PQUOTA_ACTIVE)) #define XFS_IS_UQUOTA_ON(mp) ((mp)->m_qflags & XFS_UQUOTA_ACTIVE) #define XFS_IS_GQUOTA_ON(mp) ((mp)->m_qflags & XFS_GQUOTA_ACTIVE) +#define XFS_IS_PQUOTA_ON(mp) ((mp)->m_qflags & XFS_PQUOTA_ACTIVE) /* * Flags to tell various functions what to do. Not all of these are meaningful @@ -182,7 +189,7 @@ typedef struct xfs_qoff_logformat { #define XFS_QMOPT_DQLOCK 0x0000001 /* dqlock */ #define XFS_QMOPT_DQALLOC 0x0000002 /* alloc dquot ondisk if needed */ #define XFS_QMOPT_UQUOTA 0x0000004 /* user dquot requested */ -#define XFS_QMOPT_GQUOTA 0x0000008 /* group dquot requested */ +#define XFS_QMOPT_PQUOTA 0x0000008 /* project dquot requested */ #define XFS_QMOPT_FORCE_RES 0x0000010 /* ignore quota limits */ #define XFS_QMOPT_DQSUSER 0x0000020 /* don't cache super users dquot */ #define XFS_QMOPT_SBVERSION 0x0000040 /* change superblock version num */ @@ -192,6 +199,7 @@ typedef struct xfs_qoff_logformat { #define XFS_QMOPT_DOWARN 0x0000400 /* increase warning cnt if necessary */ #define XFS_QMOPT_ILOCKED 0x0000800 /* inode is already locked (excl) */ #define XFS_QMOPT_DQREPAIR 0x0001000 /* repair dquot, if damaged. */ +#define XFS_QMOPT_GQUOTA 0x0002000 /* group dquot requested */ /* * flags to xfs_trans_mod_dquot to indicate which field needs to be @@ -231,7 +239,8 @@ typedef struct xfs_qoff_logformat { #define XFS_TRANS_DQ_DELRTBCOUNT XFS_QMOPT_DELRTBCOUNT -#define XFS_QMOPT_QUOTALL (XFS_QMOPT_UQUOTA|XFS_QMOPT_GQUOTA) +#define XFS_QMOPT_QUOTALL \ + (XFS_QMOPT_UQUOTA | XFS_QMOPT_PQUOTA | XFS_QMOPT_GQUOTA) #define XFS_QMOPT_RESBLK_MASK (XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_RES_RTBLKS) #ifdef __KERNEL__ @@ -246,21 +255,33 @@ typedef struct xfs_qoff_logformat { */ #define XFS_NOT_DQATTACHED(mp, ip) ((XFS_IS_UQUOTA_ON(mp) &&\ (ip)->i_udquot == NULL) || \ - (XFS_IS_GQUOTA_ON(mp) && \ + (XFS_IS_OQUOTA_ON(mp) && \ (ip)->i_gdquot == NULL)) -#define XFS_QM_NEED_QUOTACHECK(mp) ((XFS_IS_UQUOTA_ON(mp) && \ - (mp->m_sb.sb_qflags & \ - XFS_UQUOTA_CHKD) == 0) || \ - (XFS_IS_GQUOTA_ON(mp) && \ - (mp->m_sb.sb_qflags & \ - XFS_GQUOTA_CHKD) == 0)) +#define XFS_QM_NEED_QUOTACHECK(mp) \ + ((XFS_IS_UQUOTA_ON(mp) && \ + (mp->m_sb.sb_qflags & XFS_UQUOTA_CHKD) == 0) || \ + (XFS_IS_GQUOTA_ON(mp) && \ + ((mp->m_sb.sb_qflags & XFS_OQUOTA_CHKD) == 0 || \ + (mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT))) || \ + (XFS_IS_PQUOTA_ON(mp) && \ + ((mp->m_sb.sb_qflags & XFS_OQUOTA_CHKD) == 0 || \ + (mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT)))) + +#define XFS_MOUNT_QUOTA_SET1 (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD|\ + XFS_UQUOTA_CHKD|XFS_PQUOTA_ACCT|\ + XFS_OQUOTA_ENFD|XFS_OQUOTA_CHKD) + +#define XFS_MOUNT_QUOTA_SET2 (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD|\ + XFS_UQUOTA_CHKD|XFS_GQUOTA_ACCT|\ + XFS_OQUOTA_ENFD|XFS_OQUOTA_CHKD) #define XFS_MOUNT_QUOTA_ALL (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD|\ - XFS_UQUOTA_CHKD|XFS_GQUOTA_ACCT|\ - XFS_GQUOTA_ENFD|XFS_GQUOTA_CHKD) + XFS_UQUOTA_CHKD|XFS_PQUOTA_ACCT|\ + XFS_OQUOTA_ENFD|XFS_OQUOTA_CHKD|\ + XFS_GQUOTA_ACCT) #define XFS_MOUNT_QUOTA_MASK (XFS_MOUNT_QUOTA_ALL | XFS_UQUOTA_ACTIVE | \ - XFS_GQUOTA_ACTIVE) + XFS_GQUOTA_ACTIVE | XFS_PQUOTA_ACTIVE) /* @@ -331,15 +352,8 @@ typedef struct xfs_dqtrxops { #define XFS_TRANS_UNRESERVE_AND_MOD_DQUOTS(mp, tp) \ XFS_DQTRXOP_VOID(mp, tp, qo_unreserve_and_mod_dquots) -#define XFS_TRANS_RESERVE_BLKQUOTA(mp, tp, ip, nblks) \ - XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip, nblks, 0, \ - XFS_QMOPT_RES_REGBLKS) -#define XFS_TRANS_RESERVE_BLKQUOTA_FORCE(mp, tp, ip, nblks) \ - XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip, nblks, 0, \ - XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES) -#define XFS_TRANS_UNRESERVE_BLKQUOTA(mp, tp, ip, nblks) \ - XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip, -(nblks), 0, \ - XFS_QMOPT_RES_REGBLKS) +#define XFS_TRANS_UNRESERVE_QUOTA_NBLKS(mp, tp, ip, nblks, ninos, flags) \ + XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip, -(nblks), -(ninos), flags) #define XFS_TRANS_RESERVE_QUOTA(mp, tp, ud, gd, nb, ni, f) \ XFS_TRANS_RESERVE_QUOTA_BYDQUOTS(mp, tp, ud, gd, nb, ni, \ f | XFS_QMOPT_RES_REGBLKS) diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c index cb13f9a..23b48ac 100644 --- a/fs/xfs/xfs_rename.c +++ b/fs/xfs/xfs_rename.c @@ -234,9 +234,6 @@ xfs_lock_for_rename( return 0; } - -int rename_which_error_return = 0; - /* * xfs_rename */ @@ -316,7 +313,6 @@ xfs_rename( &num_inodes); if (error) { - rename_which_error_return = __LINE__; /* * We have nothing locked, no inode references, and * no transaction, so just get out. @@ -332,7 +328,6 @@ xfs_rename( */ if (target_ip == NULL && (src_dp != target_dp) && target_dp->i_d.di_nlink >= XFS_MAXLINK) { - rename_which_error_return = __LINE__; error = XFS_ERROR(EMLINK); xfs_rename_unlock4(inodes, XFS_ILOCK_SHARED); goto rele_return; @@ -359,7 +354,6 @@ xfs_rename( XFS_TRANS_PERM_LOG_RES, XFS_RENAME_LOG_COUNT); } if (error) { - rename_which_error_return = __LINE__; xfs_trans_cancel(tp, 0); goto rele_return; } @@ -369,7 +363,6 @@ xfs_rename( */ if ((error = XFS_QM_DQVOPRENAME(mp, inodes))) { xfs_trans_cancel(tp, cancel_flags); - rename_which_error_return = __LINE__; goto rele_return; } @@ -413,7 +406,6 @@ xfs_rename( if (spaceres == 0 && (error = XFS_DIR_CANENTER(mp, tp, target_dp, target_name, target_namelen))) { - rename_which_error_return = __LINE__; goto error_return; } /* @@ -425,11 +417,9 @@ xfs_rename( target_namelen, src_ip->i_ino, &first_block, &free_list, spaceres); if (error == ENOSPC) { - rename_which_error_return = __LINE__; goto error_return; } if (error) { - rename_which_error_return = __LINE__; goto abort_return; } xfs_ichgtime(target_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); @@ -437,7 +427,6 @@ xfs_rename( if (new_parent && src_is_directory) { error = xfs_bumplink(tp, target_dp); if (error) { - rename_which_error_return = __LINE__; goto abort_return; } } @@ -455,7 +444,6 @@ xfs_rename( if (!(XFS_DIR_ISEMPTY(target_ip->i_mount, target_ip)) || (target_ip->i_d.di_nlink > 2)) { error = XFS_ERROR(EEXIST); - rename_which_error_return = __LINE__; goto error_return; } } @@ -473,7 +461,6 @@ xfs_rename( target_namelen, src_ip->i_ino, &first_block, &free_list, spaceres); if (error) { - rename_which_error_return = __LINE__; goto abort_return; } xfs_ichgtime(target_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); @@ -484,7 +471,6 @@ xfs_rename( */ error = xfs_droplink(tp, target_ip); if (error) { - rename_which_error_return = __LINE__; goto abort_return; } target_ip_dropped = 1; @@ -495,7 +481,6 @@ xfs_rename( */ error = xfs_droplink(tp, target_ip); if (error) { - rename_which_error_return = __LINE__; goto abort_return; } } @@ -519,7 +504,6 @@ xfs_rename( &free_list, spaceres); ASSERT(error != EEXIST); if (error) { - rename_which_error_return = __LINE__; goto abort_return; } xfs_ichgtime(src_ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); @@ -550,7 +534,6 @@ xfs_rename( */ error = xfs_droplink(tp, src_dp); if (error) { - rename_which_error_return = __LINE__; goto abort_return; } } @@ -558,7 +541,6 @@ xfs_rename( error = XFS_DIR_REMOVENAME(mp, tp, src_dp, src_name, src_namelen, src_ip->i_ino, &first_block, &free_list, spaceres); if (error) { - rename_which_error_return = __LINE__; goto abort_return; } xfs_ichgtime(src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 3db0e22..06dfca5 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -332,25 +332,6 @@ undo_blocks: /* - * This is called to set the a callback to be called when the given - * transaction is committed to disk. The transaction pointer and the - * argument pointer will be passed to the callback routine. - * - * Only one callback can be associated with any single transaction. - */ -void -xfs_trans_callback( - xfs_trans_t *tp, - xfs_trans_callback_t callback, - void *arg) -{ - ASSERT(tp->t_callback == NULL); - tp->t_callback = callback; - tp->t_callarg = arg; -} - - -/* * Record the indicated change to the given field for application * to the file system's superblock when the transaction commits. * For now, just store the change in the transaction structure. @@ -551,7 +532,7 @@ xfs_trans_apply_sb_deltas( * * This is done efficiently with a single call to xfs_mod_incore_sb_batch(). */ -void +STATIC void xfs_trans_unreserve_and_mod_sb( xfs_trans_t *tp) { diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index bd37ccb..ec541d6 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -987,8 +987,6 @@ xfs_trans_t *_xfs_trans_alloc(struct xfs_mount *, uint); xfs_trans_t *xfs_trans_dup(xfs_trans_t *); int xfs_trans_reserve(xfs_trans_t *, uint, uint, uint, uint, uint); -void xfs_trans_callback(xfs_trans_t *, - void (*)(xfs_trans_t *, void *), void *); void xfs_trans_mod_sb(xfs_trans_t *, uint, long); struct xfs_buf *xfs_trans_get_buf(xfs_trans_t *, struct xfs_buftarg *, xfs_daddr_t, int, uint); @@ -1010,7 +1008,6 @@ int xfs_trans_iget(struct xfs_mount *, xfs_trans_t *, xfs_ino_t , uint, uint, struct xfs_inode **); void xfs_trans_ijoin(xfs_trans_t *, struct xfs_inode *, uint); void xfs_trans_ihold(xfs_trans_t *, struct xfs_inode *); -void xfs_trans_ihold_release(xfs_trans_t *, struct xfs_inode *); void xfs_trans_log_buf(xfs_trans_t *, struct xfs_buf *, uint, uint); void xfs_trans_log_inode(xfs_trans_t *, struct xfs_inode *, uint); struct xfs_efi_log_item *xfs_trans_get_efi(xfs_trans_t *, uint); diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index a9682b9..144da7a 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c @@ -976,6 +976,7 @@ xfs_trans_dquot_buf( ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); ASSERT(type == XFS_BLI_UDQUOT_BUF || + type == XFS_BLI_PDQUOT_BUF || type == XFS_BLI_GDQUOT_BUF); bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c index e2c3706..7e7631c 100644 --- a/fs/xfs/xfs_trans_inode.c +++ b/fs/xfs/xfs_trans_inode.c @@ -253,24 +253,6 @@ xfs_trans_ihold( ip->i_itemp->ili_flags |= XFS_ILI_HOLD; } -/* - * Cancel the previous inode hold request made on this inode - * for this transaction. - */ -/*ARGSUSED*/ -void -xfs_trans_ihold_release( - xfs_trans_t *tp, - xfs_inode_t *ip) -{ - ASSERT(ip->i_transp == tp); - ASSERT(ip->i_itemp != NULL); - ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE)); - ASSERT(ip->i_itemp->ili_flags & XFS_ILI_HOLD); - - ip->i_itemp->ili_flags &= ~XFS_ILI_HOLD; -} - /* * This is called to mark the fields indicated in fieldmask as needing diff --git a/fs/xfs/xfs_types.h b/fs/xfs/xfs_types.h index e4bf711..16f5371 100644 --- a/fs/xfs/xfs_types.h +++ b/fs/xfs/xfs_types.h @@ -55,7 +55,7 @@ typedef signed long long int __int64_t; typedef unsigned long long int __uint64_t; typedef enum { B_FALSE,B_TRUE } boolean_t; -typedef __int64_t prid_t; /* project ID */ +typedef __uint32_t prid_t; /* project ID */ typedef __uint32_t inst_t; /* an instruction */ typedef __s64 xfs_off_t; /* <file offset> type */ diff --git a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c index d1f8146..11351f0 100644 --- a/fs/xfs/xfs_utils.c +++ b/fs/xfs/xfs_utils.c @@ -428,7 +428,7 @@ xfs_truncate_file( if (ip->i_ino != mp->m_sb.sb_uquotino) ASSERT(ip->i_udquot); } - if (XFS_IS_GQUOTA_ON(mp)) { + if (XFS_IS_OQUOTA_ON(mp)) { if (ip->i_ino != mp->m_sb.sb_gquotino) ASSERT(ip->i_gdquot); } diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c index b537366..42bcc02 100644 --- a/fs/xfs/xfs_vfsops.c +++ b/fs/xfs/xfs_vfsops.c @@ -368,16 +368,6 @@ xfs_finish_flags( } /* - * disallow mount attempts with (IRIX) project quota enabled - */ - if (XFS_SB_VERSION_HASQUOTA(&mp->m_sb) && - (mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT)) { - cmn_err(CE_WARN, - "XFS: cannot mount a filesystem with IRIX project quota enabled"); - return XFS_ERROR(ENOSYS); - } - - /* * check for shared mount. */ if (ap->flags & XFSMNT_SHARED) { @@ -622,7 +612,34 @@ out: return XFS_ERROR(error); } -#define REMOUNT_READONLY_FLAGS (SYNC_REMOUNT|SYNC_ATTR|SYNC_WAIT) +STATIC int +xfs_quiesce_fs( + xfs_mount_t *mp) +{ + int count = 0, pincount; + + xfs_refcache_purge_mp(mp); + xfs_flush_buftarg(mp->m_ddev_targp, 0); + xfs_finish_reclaim_all(mp, 0); + + /* This loop must run at least twice. + * The first instance of the loop will flush + * most meta data but that will generate more + * meta data (typically directory updates). + * Which then must be flushed and logged before + * we can write the unmount record. + */ + do { + xfs_syncsub(mp, SYNC_REMOUNT|SYNC_ATTR|SYNC_WAIT, 0, NULL); + pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1); + if (!pincount) { + delay(50); + count++; + } + } while (count < 2); + + return 0; +} STATIC int xfs_mntupdate( @@ -632,8 +649,7 @@ xfs_mntupdate( { struct vfs *vfsp = bhvtovfs(bdp); xfs_mount_t *mp = XFS_BHVTOM(bdp); - int pincount, error; - int count = 0; + int error; if (args->flags & XFSMNT_NOATIME) mp->m_flags |= XFS_MOUNT_NOATIME; @@ -645,25 +661,7 @@ xfs_mntupdate( } if (*flags & MS_RDONLY) { - xfs_refcache_purge_mp(mp); - xfs_flush_buftarg(mp->m_ddev_targp, 0); - xfs_finish_reclaim_all(mp, 0); - - /* This loop must run at least twice. - * The first instance of the loop will flush - * most meta data but that will generate more - * meta data (typically directory updates). - * Which then must be flushed and logged before - * we can write the unmount record. - */ - do { - VFS_SYNC(vfsp, REMOUNT_READONLY_FLAGS, NULL, error); - pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1); - if (!pincount) { - delay(50); - count++; - } - } while (count < 2); + xfs_quiesce_fs(mp); /* Ok now write out an unmount record */ xfs_log_unmount_write(mp); @@ -879,10 +877,12 @@ xfs_sync( int flags, cred_t *credp) { - xfs_mount_t *mp; + xfs_mount_t *mp = XFS_BHVTOM(bdp); - mp = XFS_BHVTOM(bdp); - return (xfs_syncsub(mp, flags, 0, NULL)); + if (unlikely(flags == SYNC_QUIESCE)) + return xfs_quiesce_fs(mp); + else + return xfs_syncsub(mp, flags, 0, NULL); } /* @@ -1681,7 +1681,7 @@ suffix_strtoul(const char *cp, char **endp, unsigned int base) return simple_strtoul(cp, endp, base) << shift_left_factor; } -int +STATIC int xfs_parseargs( struct bhv_desc *bhv, char *options, @@ -1867,7 +1867,7 @@ printk("XFS: irixsgid is now a sysctl(2) variable, option is deprecated.\n"); return 0; } -int +STATIC int xfs_showargs( struct bhv_desc *bhv, struct seq_file *m) diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 25a5266..1377c86 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) 2000-2005 Silicon Graphics, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2 of the GNU General Public License as @@ -351,21 +351,28 @@ xfs_setattr( * If the IDs do change before we take the ilock, we're covered * because the i_*dquot fields will get updated anyway. */ - if (XFS_IS_QUOTA_ON(mp) && (mask & (XFS_AT_UID|XFS_AT_GID))) { + if (XFS_IS_QUOTA_ON(mp) && + (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_PROJID))) { uint qflags = 0; - if (mask & XFS_AT_UID) { + if ((mask & XFS_AT_UID) && XFS_IS_UQUOTA_ON(mp)) { uid = vap->va_uid; qflags |= XFS_QMOPT_UQUOTA; } else { uid = ip->i_d.di_uid; } - if (mask & XFS_AT_GID) { + if ((mask & XFS_AT_GID) && XFS_IS_GQUOTA_ON(mp)) { gid = vap->va_gid; qflags |= XFS_QMOPT_GQUOTA; } else { gid = ip->i_d.di_gid; } + if ((mask & XFS_AT_PROJID) && XFS_IS_PQUOTA_ON(mp)) { + projid = vap->va_projid; + qflags |= XFS_QMOPT_PQUOTA; + } else { + projid = ip->i_d.di_projid; + } /* * We take a reference when we initialize udqp and gdqp, * so it is important that we never blindly double trip on @@ -373,7 +380,8 @@ xfs_setattr( */ ASSERT(udqp == NULL); ASSERT(gdqp == NULL); - code = XFS_QM_DQVOPALLOC(mp, ip, uid,gid, qflags, &udqp, &gdqp); + code = XFS_QM_DQVOPALLOC(mp, ip, uid, gid, projid, qflags, + &udqp, &gdqp); if (code) return (code); } @@ -499,8 +507,6 @@ xfs_setattr( * that the group ID supplied to the chown() function * shall be equal to either the group ID or one of the * supplementary group IDs of the calling process. - * - * XXX: How does restricted_chown affect projid? */ if (restricted_chown && (iuid != uid || (igid != gid && @@ -510,10 +516,11 @@ xfs_setattr( goto error_return; } /* - * Do a quota reservation only if uid or gid is actually + * Do a quota reservation only if uid/projid/gid is actually * going to change. */ if ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) || + (XFS_IS_PQUOTA_ON(mp) && iprojid != projid) || (XFS_IS_GQUOTA_ON(mp) && igid != gid)) { ASSERT(tp); code = XFS_QM_DQVOPCHOWNRESV(mp, tp, ip, udqp, gdqp, @@ -774,6 +781,7 @@ xfs_setattr( } if (igid != gid) { if (XFS_IS_GQUOTA_ON(mp)) { + ASSERT(!XFS_IS_PQUOTA_ON(mp)); ASSERT(mask & XFS_AT_GID); ASSERT(gdqp); olddquot2 = XFS_QM_DQVOPCHOWN(mp, tp, ip, @@ -782,6 +790,13 @@ xfs_setattr( ip->i_d.di_gid = gid; } if (iprojid != projid) { + if (XFS_IS_PQUOTA_ON(mp)) { + ASSERT(!XFS_IS_GQUOTA_ON(mp)); + ASSERT(mask & XFS_AT_PROJID); + ASSERT(gdqp); + olddquot2 = XFS_QM_DQVOPCHOWN(mp, tp, ip, + &ip->i_gdquot, gdqp); + } ip->i_d.di_projid = projid; /* * We may have to rev the inode as well as @@ -843,6 +858,8 @@ xfs_setattr( di_flags |= XFS_DIFLAG_NOATIME; if (vap->va_xflags & XFS_XFLAG_NODUMP) di_flags |= XFS_DIFLAG_NODUMP; + if (vap->va_xflags & XFS_XFLAG_PROJINHERIT) + di_flags |= XFS_DIFLAG_PROJINHERIT; if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) { if (vap->va_xflags & XFS_XFLAG_RTINHERIT) di_flags |= XFS_DIFLAG_RTINHERIT; @@ -1898,7 +1915,9 @@ xfs_create( /* Return through std_return after this point. */ udqp = gdqp = NULL; - if (vap->va_mask & XFS_AT_PROJID) + if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) + prid = dp->i_d.di_projid; + else if (vap->va_mask & XFS_AT_PROJID) prid = (xfs_prid_t)vap->va_projid; else prid = (xfs_prid_t)dfltprid; @@ -1907,7 +1926,7 @@ xfs_create( * Make sure that we have allocated dquot(s) on disk. */ error = XFS_QM_DQVOPALLOC(mp, dp, - current_fsuid(credp), current_fsgid(credp), + current_fsuid(credp), current_fsgid(credp), prid, XFS_QMOPT_QUOTALL|XFS_QMOPT_INHERIT, &udqp, &gdqp); if (error) goto std_return; @@ -2604,17 +2623,7 @@ xfs_link( if (src_vp->v_type == VDIR) return XFS_ERROR(EPERM); - /* - * For now, manually find the XFS behavior descriptor for - * the source vnode. If it doesn't exist then something - * is wrong and we should just return an error. - * Eventually we need to figure out how link is going to - * work in the face of stacked vnodes. - */ src_bdp = vn_bhv_lookup_unlocked(VN_BHV_HEAD(src_vp), &xfs_vnodeops); - if (src_bdp == NULL) { - return XFS_ERROR(EXDEV); - } sip = XFS_BHVTOI(src_bdp); tdp = XFS_BHVTOI(target_dir_bdp); mp = tdp->i_mount; @@ -2681,6 +2690,17 @@ xfs_link( goto error_return; } + /* + * If we are using project inheritance, we only allow hard link + * creation in our tree when the project IDs are the same; else + * the tree quota mechanism could be circumvented. + */ + if (unlikely((tdp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && + (tdp->i_d.di_projid != sip->i_d.di_projid))) { + error = XFS_ERROR(EPERM); + goto error_return; + } + if (resblks == 0 && (error = XFS_DIR_CANENTER(mp, tp, tdp, target_name, target_namelen))) @@ -2803,7 +2823,9 @@ xfs_mkdir( mp = dp->i_mount; udqp = gdqp = NULL; - if (vap->va_mask & XFS_AT_PROJID) + if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) + prid = dp->i_d.di_projid; + else if (vap->va_mask & XFS_AT_PROJID) prid = (xfs_prid_t)vap->va_projid; else prid = (xfs_prid_t)dfltprid; @@ -2812,7 +2834,7 @@ xfs_mkdir( * Make sure that we have allocated dquot(s) on disk. */ error = XFS_QM_DQVOPALLOC(mp, dp, - current_fsuid(credp), current_fsgid(credp), + current_fsuid(credp), current_fsgid(credp), prid, XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp); if (error) goto std_return; @@ -3357,7 +3379,9 @@ xfs_symlink( /* Return through std_return after this point. */ udqp = gdqp = NULL; - if (vap->va_mask & XFS_AT_PROJID) + if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) + prid = dp->i_d.di_projid; + else if (vap->va_mask & XFS_AT_PROJID) prid = (xfs_prid_t)vap->va_projid; else prid = (xfs_prid_t)dfltprid; @@ -3366,7 +3390,7 @@ xfs_symlink( * Make sure that we have allocated dquot(s) on disk. */ error = XFS_QM_DQVOPALLOC(mp, dp, - current_fsuid(credp), current_fsgid(credp), + current_fsuid(credp), current_fsgid(credp), prid, XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp); if (error) goto std_return; @@ -4028,7 +4052,7 @@ xfs_finish_reclaim_all(xfs_mount_t *mp, int noblock) * errno on error * */ -int +STATIC int xfs_alloc_file_space( xfs_inode_t *ip, xfs_off_t offset, @@ -4151,9 +4175,8 @@ retry: break; } xfs_ilock(ip, XFS_ILOCK_EXCL); - error = XFS_TRANS_RESERVE_QUOTA_BYDQUOTS(mp, tp, - ip->i_udquot, ip->i_gdquot, resblks, 0, rt ? - XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS); + error = XFS_TRANS_RESERVE_QUOTA(mp, tp, + ip->i_udquot, ip->i_gdquot, resblks, 0, 0); if (error) goto error1; @@ -4305,6 +4328,7 @@ xfs_free_file_space( xfs_off_t len, int attr_flags) { + vnode_t *vp; int committed; int done; xfs_off_t end_dmi_offset; @@ -4325,9 +4349,11 @@ xfs_free_file_space( xfs_trans_t *tp; int need_iolock = 1; - vn_trace_entry(XFS_ITOV(ip), __FUNCTION__, (inst_t *)__return_address); + vp = XFS_ITOV(ip); mp = ip->i_mount; + vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); + if ((error = XFS_QM_DQATTACH(mp, ip, 0))) return error; @@ -4344,7 +4370,7 @@ xfs_free_file_space( DM_EVENT_ENABLED(XFS_MTOVFS(mp), ip, DM_EVENT_WRITE)) { if (end_dmi_offset > ip->i_d.di_size) end_dmi_offset = ip->i_d.di_size; - error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, XFS_ITOV(ip), + error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, vp, offset, end_dmi_offset - offset, AT_DELAY_FLAG(attr_flags), NULL); if (error) @@ -4363,7 +4389,14 @@ xfs_free_file_space( ioffset = offset & ~(rounding - 1); if (ilen & (rounding - 1)) ilen = (ilen + rounding) & ~(rounding - 1); - xfs_inval_cached_pages(XFS_ITOV(ip), &(ip->i_iocore), ioffset, 0, 0); + + if (VN_CACHED(vp) != 0) { + xfs_inval_cached_trace(&ip->i_iocore, ioffset, -1, + ctooff(offtoct(ioffset)), -1); + VOP_FLUSHINVAL_PAGES(vp, ctooff(offtoct(ioffset)), + -1, FI_REMAPF_LOCKED); + } + /* * Need to zero the stuff we're not freeing, on disk. * If its a realtime file & can't use unwritten extents then we |